"""FastAPI app: upload contract PDF → analyze against checklist → annotated PDF.""" import asyncio import logging import os import uuid from pathlib import Path from fastapi import FastAPI, File, HTTPException, UploadFile from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import FileResponse from fastapi.staticfiles import StaticFiles from pydantic import BaseModel from analyzer import analyze_contract from checklist import DEFAULT_CHECKLIST from pdf_annotator import annotate logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) app = FastAPI(title="Contract Terms Check") app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"]) WORK_DIR = Path(os.getenv("WORK_DIR", "/tmp/contract-check")) WORK_DIR.mkdir(parents=True, exist_ok=True) jobs: dict[str, dict] = {} @app.get("/") async def root(): return FileResponse("static/index.html") @app.get("/api/checklist") async def get_checklist(): """Default checklist items the UI can pre-populate.""" return {"items": DEFAULT_CHECKLIST} @app.post("/api/upload") async def upload(file: UploadFile = File(...)): suffix = Path(file.filename or "").suffix.lower() if suffix != ".pdf": raise HTTPException(400, "Podporovaný formát: .pdf") job_id = str(uuid.uuid4()) job_dir = WORK_DIR / job_id job_dir.mkdir() input_path = job_dir / "input.pdf" raw = await file.read() input_path.write_bytes(raw) logger.info("Job %s: %s (%d bytes)", job_id, file.filename, len(raw)) jobs[job_id] = { "filename": file.filename, "job_dir": str(job_dir), "input_path": str(input_path), "analysis": None, "checklist": None, } return {"job_id": job_id} class AnalyzeRequest(BaseModel): items: list[dict] # [{id, label, hint?, default?}] @app.post("/api/analyze/{job_id}") async def analyze(job_id: str, req: AnalyzeRequest): if job_id not in jobs: raise HTTPException(404, "Úloha nenalezena") if not req.items: raise HTTPException(400, "Vyberte alespoň jednu položku ke kontrole") job = jobs[job_id] input_path = Path(job["input_path"]) try: analysis = await analyze_contract(input_path, req.items) except Exception as exc: logger.exception("Analysis failed") raise HTTPException(500, str(exc)) # Merge LLM-returned items with the original checklist labels so the UI # can show the user-facing label even if the LLM was terse. labels_by_id = {it["id"]: it["label"] for it in req.items} for it in analysis.get("items", []): if "label" not in it and it.get("id") in labels_by_id: it["label"] = labels_by_id[it["id"]] job["analysis"] = analysis job["checklist"] = req.items job["used_ocr"] = bool(analysis.get("_used_ocr")) return analysis @app.get("/api/annotated/{job_id}") async def annotated_pdf(job_id: str): if job_id not in jobs: raise HTTPException(404, "Úloha nenalezena") job = jobs[job_id] if not job.get("analysis"): raise HTTPException(400, "Nejprve spusťte analýzu") input_path = Path(job["input_path"]) out_path = Path(job["job_dir"]) / "annotated.pdf" analysis = job["analysis"] # For OCR-only contracts the original PDF has no text layer; skip the # excerpt-search step so we don't waste time on guaranteed misses. skip_highlights = bool(job.get("used_ocr")) try: await asyncio.to_thread( annotate, input_path, out_path, analysis.get("items", []), analysis.get("overall_summary", ""), analysis.get("risk_level", ""), skip_highlights, job.get("filename") or "", ) except Exception as exc: logger.exception("Annotation failed") raise HTTPException(500, f"Anotace selhala: {exc}") stem = Path(job["filename"]).stem if job.get("filename") else "smlouva" return FileResponse( str(out_path), media_type="application/pdf", filename=f"kontrola_{stem}.pdf", ) @app.get("/health") async def health(): return {"status": "ok"} app.mount("/static", StaticFiles(directory="static"), name="static")