Initial portal commit: landing + 9 AI-powered apps

Apps: - dwg-rooms: extract room numbers from DWG/DXF - dwg-counting: count symbols in PDF drawings (OpenCV template matching) - contract-check: review PDF contracts against a checklist (Claude vision + Tesseract OCR fallback) - email-drafter: bullet notes → polished Czech/English business emails - invoice-extractor: PDF/image invoice → structured data → Excel - translator: Czech-first translator across 19 languages with tone control - vv-check: find inconsistent unit prices across VV sheets in one workbook - vv-compare: diff original vs new VV files (changes / added / removed) - feature-request: portal users submit ideas + sample files Infrastructure: - LiteLLM gateway with per-app virtual keys + budgets - Langfuse observability - Geist font, shared theme, cross-subdomain back link + theme sync via cookie/URL - Caddy reverse proxy on *.klas.chat
2026-05-13 15:25:04 +02:00
commit 48cef99257
139 changed files with 20171 additions and 0 deletions
--- a/contract-check/main.py
+++ b/contract-check/main.py
@@ -0,0 +1,134 @@
+"""FastAPI app: upload contract PDF → analyze against checklist → annotated PDF."""
+import asyncio
+import logging
+import os
+import uuid
+from pathlib import Path
+
+from fastapi import FastAPI, File, HTTPException, UploadFile
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import FileResponse
+from fastapi.staticfiles import StaticFiles
+from pydantic import BaseModel
+
+from analyzer import analyze_contract
+from checklist import DEFAULT_CHECKLIST
+from pdf_annotator import annotate
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+app = FastAPI(title="Contract Terms Check")
+app.add_middleware(CORSMiddleware, allow_origins=["*"],
+                   allow_methods=["*"], allow_headers=["*"])
+
+WORK_DIR = Path(os.getenv("WORK_DIR", "/tmp/contract-check"))
+WORK_DIR.mkdir(parents=True, exist_ok=True)
+
+jobs: dict[str, dict] = {}
+
+
+@app.get("/")
+async def root():
+    return FileResponse("static/index.html")
+
+
+@app.get("/api/checklist")
+async def get_checklist():
+    """Default checklist items the UI can pre-populate."""
+    return {"items": DEFAULT_CHECKLIST}
+
+
+@app.post("/api/upload")
+async def upload(file: UploadFile = File(...)):
+    suffix = Path(file.filename or "").suffix.lower()
+    if suffix != ".pdf":
+        raise HTTPException(400, "Podporovaný formát: .pdf")
+
+    job_id = str(uuid.uuid4())
+    job_dir = WORK_DIR / job_id
+    job_dir.mkdir()
+
+    input_path = job_dir / "input.pdf"
+    raw = await file.read()
+    input_path.write_bytes(raw)
+    logger.info("Job %s: %s (%d bytes)", job_id, file.filename, len(raw))
+
+    jobs[job_id] = {
+        "filename": file.filename,
+        "job_dir": str(job_dir),
+        "input_path": str(input_path),
+        "analysis": None,
+        "checklist": None,
+    }
+    return {"job_id": job_id}
+
+
+class AnalyzeRequest(BaseModel):
+    items: list[dict]  # [{id, label, hint?, default?}]
+
+
+@app.post("/api/analyze/{job_id}")
+async def analyze(job_id: str, req: AnalyzeRequest):
+    if job_id not in jobs:
+        raise HTTPException(404, "Úloha nenalezena")
+    if not req.items:
+        raise HTTPException(400, "Vyberte alespoň jednu položku ke kontrole")
+    job = jobs[job_id]
+    input_path = Path(job["input_path"])
+    try:
+        analysis = await analyze_contract(input_path, req.items)
+    except Exception as exc:
+        logger.exception("Analysis failed")
+        raise HTTPException(500, str(exc))
+    # Merge LLM-returned items with the original checklist labels so the UI
+    # can show the user-facing label even if the LLM was terse.
+    labels_by_id = {it["id"]: it["label"] for it in req.items}
+    for it in analysis.get("items", []):
+        if "label" not in it and it.get("id") in labels_by_id:
+            it["label"] = labels_by_id[it["id"]]
+    job["analysis"] = analysis
+    job["checklist"] = req.items
+    job["used_ocr"] = bool(analysis.get("_used_ocr"))
+    return analysis
+
+
+@app.get("/api/annotated/{job_id}")
+async def annotated_pdf(job_id: str):
+    if job_id not in jobs:
+        raise HTTPException(404, "Úloha nenalezena")
+    job = jobs[job_id]
+    if not job.get("analysis"):
+        raise HTTPException(400, "Nejprve spusťte analýzu")
+    input_path = Path(job["input_path"])
+    out_path = Path(job["job_dir"]) / "annotated.pdf"
+    analysis = job["analysis"]
+    # For OCR-only contracts the original PDF has no text layer; skip the
+    # excerpt-search step so we don't waste time on guaranteed misses.
+    skip_highlights = bool(job.get("used_ocr"))
+    try:
+        await asyncio.to_thread(
+            annotate, input_path, out_path,
+            analysis.get("items", []),
+            analysis.get("overall_summary", ""),
+            analysis.get("risk_level", ""),
+            skip_highlights,
+            job.get("filename") or "",
+        )
+    except Exception as exc:
+        logger.exception("Annotation failed")
+        raise HTTPException(500, f"Anotace selhala: {exc}")
+    stem = Path(job["filename"]).stem if job.get("filename") else "smlouva"
+    return FileResponse(
+        str(out_path),
+        media_type="application/pdf",
+        filename=f"kontrola_{stem}.pdf",
+    )
+
+
+@app.get("/health")
+async def health():
+    return {"status": "ok"}
+
+
+app.mount("/static", StaticFiles(directory="static"), name="static")