import sqlite3 import os import numpy as np from fastapi import FastAPI, Body, HTTPException from fastapi.middleware.cors import CORSMiddleware from sentence_transformers import SentenceTransformer import uvicorn app = FastAPI() app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"], ) BASE_DIR = os.path.dirname(os.path.abspath(__file__)) DB_FILE = os.path.join(BASE_DIR, "assets.db") MODEL_DIR = os.path.join(BASE_DIR, "local_model_miniLM") if not os.path.exists(MODEL_DIR): model = SentenceTransformer('all-MiniLM-L6-v2') model.save(MODEL_DIR) else: model = SentenceTransformer(MODEL_DIR) def get_db_connection(): conn = sqlite3.connect(DB_FILE) conn.execute("PRAGMA journal_mode=WAL;") conn.row_factory = sqlite3.Row return conn def init_db(): with get_db_connection() as conn: conn.execute(""" CREATE TABLE IF NOT EXISTS documents ( id INTEGER PRIMARY KEY AUTOINCREMENT, title TEXT UNIQUE, content BLOB, content_type TEXT, embedding BLOB ) """) conn.commit() init_db() @app.post("/save-document") async def save_document( title: str = Body(...), content: str = Body(...), content_type: str = Body("text/plain") ): vector = model.encode(f"{title} {content}").astype(np.float32).tobytes() try: with get_db_connection() as conn: conn.execute(""" INSERT INTO documents (title, content, content_type, embedding) VALUES (?, ?, ?, ?) ON CONFLICT(title) DO UPDATE SET content=excluded.content, content_type=excluded.content_type, embedding=excluded.embedding """, (title, content.encode('utf-8'), content_type, vector)) conn.commit() return {"status": "success", "message": f"Dokument '{title}' zapisany."} except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @app.post("/search") async def search_similar(query: str = Body(..., embed=True), top_k: int = 3): """Wyszukiwanie semantyczne (Vector Search)""" query_vector = model.encode(query).astype(np.float32) with get_db_connection() as conn: cursor = conn.execute("SELECT title, content, embedding FROM documents") rows = cursor.fetchall() results = [] for row in rows: db_vector = np.frombuffer(row['embedding'], dtype=np.float32) score = np.dot(query_vector, db_vector) / (np.linalg.norm(query_vector) * np.linalg.norm(db_vector)) results.append({ "title": row['title'], "content": row['content'].decode('utf-8', errors='ignore'), "score": float(score) }) results = sorted(results, key=lambda x: x['score'], reverse=True)[:top_k] return {"results": results} @app.get("/load-document") async def load_document(title: str = None): with get_db_connection() as conn: if title: row = conn.execute("SELECT title, content FROM documents WHERE title = ?", (title,)).fetchone() else: row = conn.execute("SELECT title, content FROM documents ORDER BY id DESC LIMIT 1").fetchone() if row: return { "title": row['title'], "content": row['content'].decode('utf-8', errors='ignore') } return {"error": "Nie znaleziono dokumentu"} if __name__ == "__main__": uvicorn.run(app, host="127.0.0.1", port=8000)