Kod/Database/vector_database.py

import sqlite3
import os
import numpy as np
from fastapi import FastAPI, Body, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from sentence_transformers import SentenceTransformer
import uvicorn

app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_methods=["*"],
    allow_headers=["*"],
)

BASE_DIR = os.path.dirname(os.path.abspath(__file__))
DB_FILE = os.path.join(BASE_DIR, "assets.db")
MODEL_DIR = os.path.join(BASE_DIR, "local_model_miniLM")


if not os.path.exists(MODEL_DIR):
    model = SentenceTransformer('all-MiniLM-L6-v2')
    model.save(MODEL_DIR)
else:
    model = SentenceTransformer(MODEL_DIR)


def get_db_connection():
    conn = sqlite3.connect(DB_FILE)

    conn.execute("PRAGMA journal_mode=WAL;")
    conn.row_factory = sqlite3.Row
    return conn


def init_db():
    with get_db_connection() as conn:
        conn.execute("""
                     CREATE TABLE IF NOT EXISTS documents
                     (
                         id
                         INTEGER
                         PRIMARY
                         KEY
                         AUTOINCREMENT,
                         title
                         TEXT
                         UNIQUE,
                         content
                         BLOB,
                         content_type
                         TEXT,
                         embedding
                         BLOB
                     )
                     """)
        conn.commit()


init_db()


@app.post("/save-document")
async def save_document(
        title: str = Body(...),
        content: str = Body(...),
        content_type: str = Body("text/plain")
):

    vector = model.encode(f"{title} {content}").astype(np.float32).tobytes()

    try:
        with get_db_connection() as conn:
            conn.execute("""
                         INSERT INTO documents (title, content, content_type, embedding)
                         VALUES (?, ?, ?, ?) ON CONFLICT(title) DO
                         UPDATE SET
                             content=excluded.content,
                             content_type=excluded.content_type,
                             embedding=excluded.embedding
                         """, (title, content.encode('utf-8'), content_type, vector))
            conn.commit()
        return {"status": "success", "message": f"Dokument '{title}' zapisany."}
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


@app.post("/search")
async def search_similar(query: str = Body(..., embed=True), top_k: int = 3):
    """Wyszukiwanie semantyczne (Vector Search)"""
    query_vector = model.encode(query).astype(np.float32)

    with get_db_connection() as conn:
        cursor = conn.execute("SELECT title, content, embedding FROM documents")
        rows = cursor.fetchall()

    results = []
    for row in rows:
        db_vector = np.frombuffer(row['embedding'], dtype=np.float32)


        score = np.dot(query_vector, db_vector) / (np.linalg.norm(query_vector) * np.linalg.norm(db_vector))

        results.append({
            "title": row['title'],
            "content": row['content'].decode('utf-8', errors='ignore'),
            "score": float(score)
        })


    results = sorted(results, key=lambda x: x['score'], reverse=True)[:top_k]
    return {"results": results}


@app.get("/load-document")
async def load_document(title: str = None):
    with get_db_connection() as conn:
        if title:
            row = conn.execute("SELECT title, content FROM documents WHERE title = ?", (title,)).fetchone()
        else:
            row = conn.execute("SELECT title, content FROM documents ORDER BY id DESC LIMIT 1").fetchone()

    if row:
        return {
            "title": row['title'],
            "content": row['content'].decode('utf-8', errors='ignore')
        }
    return {"error": "Nie znaleziono dokumentu"}


if __name__ == "__main__":
    uvicorn.run(app, host="127.0.0.1", port=8000)