Updata database - add relational database

This commit is contained in:
Szymon Stefański
2026-03-31 17:44:39 +02:00
parent cf4fc4acfc
commit fddaad962b
2 changed files with 192 additions and 39 deletions

View File

@@ -0,0 +1,101 @@
import sqlite3
import json
import os
from fastapi import FastAPI, Body, HTTPException
from fastapi.middleware.cors import CORSMiddleware
import uvicorn
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
DB_FILE = os.path.join(BASE_DIR, "archivium.db")
def get_db_connection():
conn = sqlite3.connect(DB_FILE)
conn.execute("PRAGMA journal_mode=WAL;")
conn.row_factory = sqlite3.Row
return conn
def init_db():
with get_db_connection() as conn:
conn.execute("""
CREATE TABLE IF NOT EXISTS archive
(
id
INTEGER
PRIMARY
KEY
AUTOINCREMENT,
filename
TEXT
UNIQUE,
ocr_text
TEXT,
metadata
TEXT,
created_at
TIMESTAMP
DEFAULT
CURRENT_TIMESTAMP
)
""")
conn.commit()
init_db()
@app.post("/save-document")
async def save_document(data: dict = Body(...)):
title = data.get("title")
content = data.get("content")
if not title or content is None:
raise HTTPException(status_code=400, detail="Missing title or content")
content_str = json.dumps(content)
try:
with get_db_connection() as conn:
conn.execute("""
INSERT INTO archive (filename, ocr_text)
VALUES (?, ?) ON CONFLICT(filename) DO
UPDATE SET
ocr_text=excluded.ocr_text
""", (title, content_str))
conn.commit()
return {"status": "success"}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/load-document")
async def load_document(title: str = None):
with get_db_connection() as conn:
if title:
row = conn.execute("SELECT filename, ocr_text FROM archive WHERE filename = ?", (title,)).fetchone()
else:
row = conn.execute("SELECT filename, ocr_text FROM archive ORDER BY id DESC LIMIT 1").fetchone()
if row:
try:
content_val = json.loads(row['ocr_text'])
except:
content_val = row['ocr_text']
return {"title": row['filename'], "content": content_val}
raise HTTPException(status_code=404, detail="Document not found")
if __name__ == "__main__":
uvicorn.run(app, host="127.0.0.1", port=8000)

View File

@@ -1,7 +1,7 @@
import sqlite3 import sqlite3
import json
import os import os
from fastapi import FastAPI, Body import numpy as np
from fastapi import FastAPI, Body, HTTPException
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from sentence_transformers import SentenceTransformer from sentence_transformers import SentenceTransformer
import uvicorn import uvicorn
@@ -19,64 +19,116 @@ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
DB_FILE = os.path.join(BASE_DIR, "assets.db") DB_FILE = os.path.join(BASE_DIR, "assets.db")
MODEL_DIR = os.path.join(BASE_DIR, "local_model_miniLM") MODEL_DIR = os.path.join(BASE_DIR, "local_model_miniLM")
if not os.path.exists(MODEL_DIR): if not os.path.exists(MODEL_DIR):
model = SentenceTransformer('all-MiniLM-L6-v2') model = SentenceTransformer('all-MiniLM-L6-v2')
model.save(MODEL_DIR) model.save(MODEL_DIR)
else: else:
model = SentenceTransformer(MODEL_DIR) model = SentenceTransformer(MODEL_DIR)
def init_db():
def get_db_connection():
conn = sqlite3.connect(DB_FILE) conn = sqlite3.connect(DB_FILE)
conn.execute("PRAGMA journal_mode=WAL;")
conn.row_factory = sqlite3.Row
return conn
def init_db():
with get_db_connection() as conn:
conn.execute(""" conn.execute("""
CREATE TABLE IF NOT EXISTS documents CREATE TABLE IF NOT EXISTS documents
( (
id INTEGER PRIMARY KEY AUTOINCREMENT, id
title TEXT UNIQUE, INTEGER
content TEXT, PRIMARY
embedding TEXT KEY
AUTOINCREMENT,
title
TEXT
UNIQUE,
content
BLOB,
content_type
TEXT,
embedding
BLOB
) )
""") """)
conn.commit() conn.commit()
conn.close()
init_db() init_db()
@app.post("/save-document") @app.post("/save-document")
async def save_document(data: dict = Body(...)): async def save_document(
title = data.get("title") title: str = Body(...),
content = data.get("content") content: str = Body(...),
content_type: str = Body("text/plain")
):
text_to_vector = f"{title} {str(content)}" vector = model.encode(f"{title} {content}").astype(np.float32).tobytes()
vector = model.encode(text_to_vector).tolist()
conn = sqlite3.connect(DB_FILE)
try: try:
with get_db_connection() as conn:
conn.execute(""" conn.execute("""
INSERT INTO documents (title, content, embedding) INSERT INTO documents (title, content, content_type, embedding)
VALUES (?, ?, ?) ON CONFLICT(title) DO VALUES (?, ?, ?, ?) ON CONFLICT(title) DO
UPDATE SET UPDATE SET
content=excluded.content, content=excluded.content,
content_type=excluded.content_type,
embedding=excluded.embedding embedding=excluded.embedding
""", (title, json.dumps(content), json.dumps(vector))) """, (title, content.encode('utf-8'), content_type, vector))
conn.commit() conn.commit()
return {"status": "success"} return {"status": "success", "message": f"Dokument '{title}' zapisany."}
except Exception as e: except Exception as e:
return {"status": "error", "message": str(e)} raise HTTPException(status_code=500, detail=str(e))
finally:
conn.close()
@app.post("/search")
async def search_similar(query: str = Body(..., embed=True), top_k: int = 3):
"""Wyszukiwanie semantyczne (Vector Search)"""
query_vector = model.encode(query).astype(np.float32)
with get_db_connection() as conn:
cursor = conn.execute("SELECT title, content, embedding FROM documents")
rows = cursor.fetchall()
results = []
for row in rows:
db_vector = np.frombuffer(row['embedding'], dtype=np.float32)
score = np.dot(query_vector, db_vector) / (np.linalg.norm(query_vector) * np.linalg.norm(db_vector))
results.append({
"title": row['title'],
"content": row['content'].decode('utf-8', errors='ignore'),
"score": float(score)
})
results = sorted(results, key=lambda x: x['score'], reverse=True)[:top_k]
return {"results": results}
@app.get("/load-document") @app.get("/load-document")
async def load_document(title: str = None): async def load_document(title: str = None):
conn = sqlite3.connect(DB_FILE) with get_db_connection() as conn:
if title: if title:
row = conn.execute("SELECT title, content FROM documents WHERE title = ?", (title,)).fetchone() row = conn.execute("SELECT title, content FROM documents WHERE title = ?", (title,)).fetchone()
else: else:
row = conn.execute("SELECT title, content FROM documents ORDER BY id DESC LIMIT 1").fetchone() row = conn.execute("SELECT title, content FROM documents ORDER BY id DESC LIMIT 1").fetchone()
conn.close()
if row: if row:
return {"title": row[0], "content": json.loads(row[1])} return {
"title": row['title'],
"content": row['content'].decode('utf-8', errors='ignore')
}
return {"error": "Nie znaleziono dokumentu"} return {"error": "Nie znaleziono dokumentu"}
if __name__ == "__main__": if __name__ == "__main__":
uvicorn.run(app, host="127.0.0.1", port=8000) uvicorn.run(app, host="127.0.0.1", port=8000)