Compare commits

...

2 Commits

Author SHA1 Message Date
Szymon Stefański
fddaad962b Updata database - add relational database 2026-03-31 17:44:39 +02:00
Szymon Stefański
cf4fc4acfc Updata database - add relational database 2026-03-31 17:42:23 +02:00
4 changed files with 235 additions and 82 deletions

Binary file not shown.

View File

@@ -1,82 +0,0 @@
import sqlite3
import json
import os
from fastapi import FastAPI, Body
from fastapi.middleware.cors import CORSMiddleware
from sentence_transformers import SentenceTransformer
import uvicorn
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
DB_FILE = os.path.join(BASE_DIR, "archivium.db")
MODEL_DIR = os.path.join(BASE_DIR, "local_model_miniLM")
if not os.path.exists(MODEL_DIR):
model = SentenceTransformer('all-MiniLM-L6-v2')
model.save(MODEL_DIR)
else:
model = SentenceTransformer(MODEL_DIR)
def init_db():
conn = sqlite3.connect(DB_FILE)
conn.execute("""
CREATE TABLE IF NOT EXISTS documents
(
id INTEGER PRIMARY KEY AUTOINCREMENT,
title TEXT UNIQUE,
content TEXT,
embedding TEXT
)
""")
conn.commit()
conn.close()
init_db()
@app.post("/save-document")
async def save_document(data: dict = Body(...)):
title = data.get("title")
content = data.get("content")
text_to_vector = f"{title} {str(content)}"
vector = model.encode(text_to_vector).tolist()
conn = sqlite3.connect(DB_FILE)
try:
conn.execute("""
INSERT INTO documents (title, content, embedding)
VALUES (?, ?, ?) ON CONFLICT(title) DO
UPDATE SET
content=excluded.content,
embedding=excluded.embedding
""", (title, json.dumps(content), json.dumps(vector)))
conn.commit()
return {"status": "success"}
except Exception as e:
return {"status": "error", "message": str(e)}
finally:
conn.close()
@app.get("/load-document")
async def load_document(title: str = None):
conn = sqlite3.connect(DB_FILE)
if title:
row = conn.execute("SELECT title, content FROM documents WHERE title = ?", (title,)).fetchone()
else:
row = conn.execute("SELECT title, content FROM documents ORDER BY id DESC LIMIT 1").fetchone()
conn.close()
if row:
return {"title": row[0], "content": json.loads(row[1])}
return {"error": "Nie znaleziono dokumentu"}
if __name__ == "__main__":
uvicorn.run(app, host="127.0.0.1", port=8000)

View File

@@ -0,0 +1,101 @@
import sqlite3
import json
import os
from fastapi import FastAPI, Body, HTTPException
from fastapi.middleware.cors import CORSMiddleware
import uvicorn
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
DB_FILE = os.path.join(BASE_DIR, "archivium.db")
def get_db_connection():
conn = sqlite3.connect(DB_FILE)
conn.execute("PRAGMA journal_mode=WAL;")
conn.row_factory = sqlite3.Row
return conn
def init_db():
with get_db_connection() as conn:
conn.execute("""
CREATE TABLE IF NOT EXISTS archive
(
id
INTEGER
PRIMARY
KEY
AUTOINCREMENT,
filename
TEXT
UNIQUE,
ocr_text
TEXT,
metadata
TEXT,
created_at
TIMESTAMP
DEFAULT
CURRENT_TIMESTAMP
)
""")
conn.commit()
init_db()
@app.post("/save-document")
async def save_document(data: dict = Body(...)):
title = data.get("title")
content = data.get("content")
if not title or content is None:
raise HTTPException(status_code=400, detail="Missing title or content")
content_str = json.dumps(content)
try:
with get_db_connection() as conn:
conn.execute("""
INSERT INTO archive (filename, ocr_text)
VALUES (?, ?) ON CONFLICT(filename) DO
UPDATE SET
ocr_text=excluded.ocr_text
""", (title, content_str))
conn.commit()
return {"status": "success"}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/load-document")
async def load_document(title: str = None):
with get_db_connection() as conn:
if title:
row = conn.execute("SELECT filename, ocr_text FROM archive WHERE filename = ?", (title,)).fetchone()
else:
row = conn.execute("SELECT filename, ocr_text FROM archive ORDER BY id DESC LIMIT 1").fetchone()
if row:
try:
content_val = json.loads(row['ocr_text'])
except:
content_val = row['ocr_text']
return {"title": row['filename'], "content": content_val}
raise HTTPException(status_code=404, detail="Document not found")
if __name__ == "__main__":
uvicorn.run(app, host="127.0.0.1", port=8000)

134
Database/vector_database.py Normal file
View File

@@ -0,0 +1,134 @@
import sqlite3
import os
import numpy as np
from fastapi import FastAPI, Body, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from sentence_transformers import SentenceTransformer
import uvicorn
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
DB_FILE = os.path.join(BASE_DIR, "assets.db")
MODEL_DIR = os.path.join(BASE_DIR, "local_model_miniLM")
if not os.path.exists(MODEL_DIR):
model = SentenceTransformer('all-MiniLM-L6-v2')
model.save(MODEL_DIR)
else:
model = SentenceTransformer(MODEL_DIR)
def get_db_connection():
conn = sqlite3.connect(DB_FILE)
conn.execute("PRAGMA journal_mode=WAL;")
conn.row_factory = sqlite3.Row
return conn
def init_db():
with get_db_connection() as conn:
conn.execute("""
CREATE TABLE IF NOT EXISTS documents
(
id
INTEGER
PRIMARY
KEY
AUTOINCREMENT,
title
TEXT
UNIQUE,
content
BLOB,
content_type
TEXT,
embedding
BLOB
)
""")
conn.commit()
init_db()
@app.post("/save-document")
async def save_document(
title: str = Body(...),
content: str = Body(...),
content_type: str = Body("text/plain")
):
vector = model.encode(f"{title} {content}").astype(np.float32).tobytes()
try:
with get_db_connection() as conn:
conn.execute("""
INSERT INTO documents (title, content, content_type, embedding)
VALUES (?, ?, ?, ?) ON CONFLICT(title) DO
UPDATE SET
content=excluded.content,
content_type=excluded.content_type,
embedding=excluded.embedding
""", (title, content.encode('utf-8'), content_type, vector))
conn.commit()
return {"status": "success", "message": f"Dokument '{title}' zapisany."}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/search")
async def search_similar(query: str = Body(..., embed=True), top_k: int = 3):
"""Wyszukiwanie semantyczne (Vector Search)"""
query_vector = model.encode(query).astype(np.float32)
with get_db_connection() as conn:
cursor = conn.execute("SELECT title, content, embedding FROM documents")
rows = cursor.fetchall()
results = []
for row in rows:
db_vector = np.frombuffer(row['embedding'], dtype=np.float32)
score = np.dot(query_vector, db_vector) / (np.linalg.norm(query_vector) * np.linalg.norm(db_vector))
results.append({
"title": row['title'],
"content": row['content'].decode('utf-8', errors='ignore'),
"score": float(score)
})
results = sorted(results, key=lambda x: x['score'], reverse=True)[:top_k]
return {"results": results}
@app.get("/load-document")
async def load_document(title: str = None):
with get_db_connection() as conn:
if title:
row = conn.execute("SELECT title, content FROM documents WHERE title = ?", (title,)).fetchone()
else:
row = conn.execute("SELECT title, content FROM documents ORDER BY id DESC LIMIT 1").fetchone()
if row:
return {
"title": row['title'],
"content": row['content'].decode('utf-8', errors='ignore')
}
return {"error": "Nie znaleziono dokumentu"}
if __name__ == "__main__":
uvicorn.run(app, host="127.0.0.1", port=8000)