From fddaad962b5b35c39f3b8c26dc0a066b2f739438 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Szymon=20Stefa=C5=84ski?= Date: Tue, 31 Mar 2026 17:44:39 +0200 Subject: [PATCH] Updata database - add relational database --- Database/relational_database.py | 101 +++++++++++++++++++++++++ Database/vector_database.py | 130 ++++++++++++++++++++++---------- 2 files changed, 192 insertions(+), 39 deletions(-) diff --git a/Database/relational_database.py b/Database/relational_database.py index e69de29..7d4ca21 100644 --- a/Database/relational_database.py +++ b/Database/relational_database.py @@ -0,0 +1,101 @@ +import sqlite3 +import json +import os +from fastapi import FastAPI, Body, HTTPException +from fastapi.middleware.cors import CORSMiddleware +import uvicorn + +app = FastAPI() + +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_methods=["*"], + allow_headers=["*"], +) + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) +DB_FILE = os.path.join(BASE_DIR, "archivium.db") + + +def get_db_connection(): + conn = sqlite3.connect(DB_FILE) + conn.execute("PRAGMA journal_mode=WAL;") + conn.row_factory = sqlite3.Row + return conn + + +def init_db(): + with get_db_connection() as conn: + conn.execute(""" + CREATE TABLE IF NOT EXISTS archive + ( + id + INTEGER + PRIMARY + KEY + AUTOINCREMENT, + filename + TEXT + UNIQUE, + ocr_text + TEXT, + metadata + TEXT, + created_at + TIMESTAMP + DEFAULT + CURRENT_TIMESTAMP + ) + """) + conn.commit() + + +init_db() + + +@app.post("/save-document") +async def save_document(data: dict = Body(...)): + title = data.get("title") + content = data.get("content") + + if not title or content is None: + raise HTTPException(status_code=400, detail="Missing title or content") + + content_str = json.dumps(content) + + try: + with get_db_connection() as conn: + conn.execute(""" + INSERT INTO archive (filename, ocr_text) + VALUES (?, ?) ON CONFLICT(filename) DO + UPDATE SET + ocr_text=excluded.ocr_text + """, (title, content_str)) + conn.commit() + return {"status": "success"} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.get("/load-document") +async def load_document(title: str = None): + with get_db_connection() as conn: + if title: + row = conn.execute("SELECT filename, ocr_text FROM archive WHERE filename = ?", (title,)).fetchone() + else: + row = conn.execute("SELECT filename, ocr_text FROM archive ORDER BY id DESC LIMIT 1").fetchone() + + if row: + try: + content_val = json.loads(row['ocr_text']) + except: + content_val = row['ocr_text'] + + return {"title": row['filename'], "content": content_val} + + raise HTTPException(status_code=404, detail="Document not found") + + +if __name__ == "__main__": + uvicorn.run(app, host="127.0.0.1", port=8000) diff --git a/Database/vector_database.py b/Database/vector_database.py index 1812e42..6f3731f 100644 --- a/Database/vector_database.py +++ b/Database/vector_database.py @@ -1,7 +1,7 @@ import sqlite3 -import json import os -from fastapi import FastAPI, Body +import numpy as np +from fastapi import FastAPI, Body, HTTPException from fastapi.middleware.cors import CORSMiddleware from sentence_transformers import SentenceTransformer import uvicorn @@ -19,64 +19,116 @@ BASE_DIR = os.path.dirname(os.path.abspath(__file__)) DB_FILE = os.path.join(BASE_DIR, "assets.db") MODEL_DIR = os.path.join(BASE_DIR, "local_model_miniLM") + if not os.path.exists(MODEL_DIR): model = SentenceTransformer('all-MiniLM-L6-v2') model.save(MODEL_DIR) else: model = SentenceTransformer(MODEL_DIR) -def init_db(): + +def get_db_connection(): conn = sqlite3.connect(DB_FILE) - conn.execute(""" - CREATE TABLE IF NOT EXISTS documents - ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - title TEXT UNIQUE, - content TEXT, - embedding TEXT - ) - """) - conn.commit() - conn.close() + + conn.execute("PRAGMA journal_mode=WAL;") + conn.row_factory = sqlite3.Row + return conn + + +def init_db(): + with get_db_connection() as conn: + conn.execute(""" + CREATE TABLE IF NOT EXISTS documents + ( + id + INTEGER + PRIMARY + KEY + AUTOINCREMENT, + title + TEXT + UNIQUE, + content + BLOB, + content_type + TEXT, + embedding + BLOB + ) + """) + conn.commit() + init_db() + @app.post("/save-document") -async def save_document(data: dict = Body(...)): - title = data.get("title") - content = data.get("content") +async def save_document( + title: str = Body(...), + content: str = Body(...), + content_type: str = Body("text/plain") +): - text_to_vector = f"{title} {str(content)}" - vector = model.encode(text_to_vector).tolist() + vector = model.encode(f"{title} {content}").astype(np.float32).tobytes() - conn = sqlite3.connect(DB_FILE) try: - conn.execute(""" - INSERT INTO documents (title, content, embedding) - VALUES (?, ?, ?) ON CONFLICT(title) DO - UPDATE SET - content=excluded.content, - embedding=excluded.embedding - """, (title, json.dumps(content), json.dumps(vector))) - conn.commit() - return {"status": "success"} + with get_db_connection() as conn: + conn.execute(""" + INSERT INTO documents (title, content, content_type, embedding) + VALUES (?, ?, ?, ?) ON CONFLICT(title) DO + UPDATE SET + content=excluded.content, + content_type=excluded.content_type, + embedding=excluded.embedding + """, (title, content.encode('utf-8'), content_type, vector)) + conn.commit() + return {"status": "success", "message": f"Dokument '{title}' zapisany."} except Exception as e: - return {"status": "error", "message": str(e)} - finally: - conn.close() + raise HTTPException(status_code=500, detail=str(e)) + + +@app.post("/search") +async def search_similar(query: str = Body(..., embed=True), top_k: int = 3): + """Wyszukiwanie semantyczne (Vector Search)""" + query_vector = model.encode(query).astype(np.float32) + + with get_db_connection() as conn: + cursor = conn.execute("SELECT title, content, embedding FROM documents") + rows = cursor.fetchall() + + results = [] + for row in rows: + db_vector = np.frombuffer(row['embedding'], dtype=np.float32) + + + score = np.dot(query_vector, db_vector) / (np.linalg.norm(query_vector) * np.linalg.norm(db_vector)) + + results.append({ + "title": row['title'], + "content": row['content'].decode('utf-8', errors='ignore'), + "score": float(score) + }) + + + results = sorted(results, key=lambda x: x['score'], reverse=True)[:top_k] + return {"results": results} + @app.get("/load-document") async def load_document(title: str = None): - conn = sqlite3.connect(DB_FILE) - if title: - row = conn.execute("SELECT title, content FROM documents WHERE title = ?", (title,)).fetchone() - else: - row = conn.execute("SELECT title, content FROM documents ORDER BY id DESC LIMIT 1").fetchone() - conn.close() + with get_db_connection() as conn: + if title: + row = conn.execute("SELECT title, content FROM documents WHERE title = ?", (title,)).fetchone() + else: + row = conn.execute("SELECT title, content FROM documents ORDER BY id DESC LIMIT 1").fetchone() if row: - return {"title": row[0], "content": json.loads(row[1])} + return { + "title": row['title'], + "content": row['content'].decode('utf-8', errors='ignore') + } return {"error": "Nie znaleziono dokumentu"} + if __name__ == "__main__": uvicorn.run(app, host="127.0.0.1", port=8000)