Files
Ondřej Glaser 48cef99257 Initial portal commit: landing + 9 AI-powered apps
Apps:
- dwg-rooms: extract room numbers from DWG/DXF
- dwg-counting: count symbols in PDF drawings (OpenCV template matching)
- contract-check: review PDF contracts against a checklist (Claude vision + Tesseract OCR fallback)
- email-drafter: bullet notes → polished Czech/English business emails
- invoice-extractor: PDF/image invoice → structured data → Excel
- translator: Czech-first translator across 19 languages with tone control
- vv-check: find inconsistent unit prices across VV sheets in one workbook
- vv-compare: diff original vs new VV files (changes / added / removed)
- feature-request: portal users submit ideas + sample files

Infrastructure:
- LiteLLM gateway with per-app virtual keys + budgets
- Langfuse observability
- Geist font, shared theme, cross-subdomain back link + theme sync via cookie/URL
- Caddy reverse proxy on *.klas.chat
2026-05-13 15:25:04 +02:00

135 lines
4.2 KiB
Python

"""FastAPI app: upload contract PDF → analyze against checklist → annotated PDF."""
import asyncio
import logging
import os
import uuid
from pathlib import Path
from fastapi import FastAPI, File, HTTPException, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel
from analyzer import analyze_contract
from checklist import DEFAULT_CHECKLIST
from pdf_annotator import annotate
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
app = FastAPI(title="Contract Terms Check")
app.add_middleware(CORSMiddleware, allow_origins=["*"],
allow_methods=["*"], allow_headers=["*"])
WORK_DIR = Path(os.getenv("WORK_DIR", "/tmp/contract-check"))
WORK_DIR.mkdir(parents=True, exist_ok=True)
jobs: dict[str, dict] = {}
@app.get("/")
async def root():
return FileResponse("static/index.html")
@app.get("/api/checklist")
async def get_checklist():
"""Default checklist items the UI can pre-populate."""
return {"items": DEFAULT_CHECKLIST}
@app.post("/api/upload")
async def upload(file: UploadFile = File(...)):
suffix = Path(file.filename or "").suffix.lower()
if suffix != ".pdf":
raise HTTPException(400, "Podporovaný formát: .pdf")
job_id = str(uuid.uuid4())
job_dir = WORK_DIR / job_id
job_dir.mkdir()
input_path = job_dir / "input.pdf"
raw = await file.read()
input_path.write_bytes(raw)
logger.info("Job %s: %s (%d bytes)", job_id, file.filename, len(raw))
jobs[job_id] = {
"filename": file.filename,
"job_dir": str(job_dir),
"input_path": str(input_path),
"analysis": None,
"checklist": None,
}
return {"job_id": job_id}
class AnalyzeRequest(BaseModel):
items: list[dict] # [{id, label, hint?, default?}]
@app.post("/api/analyze/{job_id}")
async def analyze(job_id: str, req: AnalyzeRequest):
if job_id not in jobs:
raise HTTPException(404, "Úloha nenalezena")
if not req.items:
raise HTTPException(400, "Vyberte alespoň jednu položku ke kontrole")
job = jobs[job_id]
input_path = Path(job["input_path"])
try:
analysis = await analyze_contract(input_path, req.items)
except Exception as exc:
logger.exception("Analysis failed")
raise HTTPException(500, str(exc))
# Merge LLM-returned items with the original checklist labels so the UI
# can show the user-facing label even if the LLM was terse.
labels_by_id = {it["id"]: it["label"] for it in req.items}
for it in analysis.get("items", []):
if "label" not in it and it.get("id") in labels_by_id:
it["label"] = labels_by_id[it["id"]]
job["analysis"] = analysis
job["checklist"] = req.items
job["used_ocr"] = bool(analysis.get("_used_ocr"))
return analysis
@app.get("/api/annotated/{job_id}")
async def annotated_pdf(job_id: str):
if job_id not in jobs:
raise HTTPException(404, "Úloha nenalezena")
job = jobs[job_id]
if not job.get("analysis"):
raise HTTPException(400, "Nejprve spusťte analýzu")
input_path = Path(job["input_path"])
out_path = Path(job["job_dir"]) / "annotated.pdf"
analysis = job["analysis"]
# For OCR-only contracts the original PDF has no text layer; skip the
# excerpt-search step so we don't waste time on guaranteed misses.
skip_highlights = bool(job.get("used_ocr"))
try:
await asyncio.to_thread(
annotate, input_path, out_path,
analysis.get("items", []),
analysis.get("overall_summary", ""),
analysis.get("risk_level", ""),
skip_highlights,
job.get("filename") or "",
)
except Exception as exc:
logger.exception("Annotation failed")
raise HTTPException(500, f"Anotace selhala: {exc}")
stem = Path(job["filename"]).stem if job.get("filename") else "smlouva"
return FileResponse(
str(out_path),
media_type="application/pdf",
filename=f"kontrola_{stem}.pdf",
)
@app.get("/health")
async def health():
return {"status": "ok"}
app.mount("/static", StaticFiles(directory="static"), name="static")