Initial portal commit: landing + 9 AI-powered apps
Apps: - dwg-rooms: extract room numbers from DWG/DXF - dwg-counting: count symbols in PDF drawings (OpenCV template matching) - contract-check: review PDF contracts against a checklist (Claude vision + Tesseract OCR fallback) - email-drafter: bullet notes → polished Czech/English business emails - invoice-extractor: PDF/image invoice → structured data → Excel - translator: Czech-first translator across 19 languages with tone control - vv-check: find inconsistent unit prices across VV sheets in one workbook - vv-compare: diff original vs new VV files (changes / added / removed) - feature-request: portal users submit ideas + sample files Infrastructure: - LiteLLM gateway with per-app virtual keys + budgets - Langfuse observability - Geist font, shared theme, cross-subdomain back link + theme sync via cookie/URL - Caddy reverse proxy on *.klas.chat
This commit is contained in:
134
contract-check/main.py
Normal file
134
contract-check/main.py
Normal file
@@ -0,0 +1,134 @@
|
||||
"""FastAPI app: upload contract PDF → analyze against checklist → annotated PDF."""
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi import FastAPI, File, HTTPException, UploadFile
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import FileResponse
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from pydantic import BaseModel
|
||||
|
||||
from analyzer import analyze_contract
|
||||
from checklist import DEFAULT_CHECKLIST
|
||||
from pdf_annotator import annotate
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
app = FastAPI(title="Contract Terms Check")
|
||||
app.add_middleware(CORSMiddleware, allow_origins=["*"],
|
||||
allow_methods=["*"], allow_headers=["*"])
|
||||
|
||||
WORK_DIR = Path(os.getenv("WORK_DIR", "/tmp/contract-check"))
|
||||
WORK_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
jobs: dict[str, dict] = {}
|
||||
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
return FileResponse("static/index.html")
|
||||
|
||||
|
||||
@app.get("/api/checklist")
|
||||
async def get_checklist():
|
||||
"""Default checklist items the UI can pre-populate."""
|
||||
return {"items": DEFAULT_CHECKLIST}
|
||||
|
||||
|
||||
@app.post("/api/upload")
|
||||
async def upload(file: UploadFile = File(...)):
|
||||
suffix = Path(file.filename or "").suffix.lower()
|
||||
if suffix != ".pdf":
|
||||
raise HTTPException(400, "Podporovaný formát: .pdf")
|
||||
|
||||
job_id = str(uuid.uuid4())
|
||||
job_dir = WORK_DIR / job_id
|
||||
job_dir.mkdir()
|
||||
|
||||
input_path = job_dir / "input.pdf"
|
||||
raw = await file.read()
|
||||
input_path.write_bytes(raw)
|
||||
logger.info("Job %s: %s (%d bytes)", job_id, file.filename, len(raw))
|
||||
|
||||
jobs[job_id] = {
|
||||
"filename": file.filename,
|
||||
"job_dir": str(job_dir),
|
||||
"input_path": str(input_path),
|
||||
"analysis": None,
|
||||
"checklist": None,
|
||||
}
|
||||
return {"job_id": job_id}
|
||||
|
||||
|
||||
class AnalyzeRequest(BaseModel):
|
||||
items: list[dict] # [{id, label, hint?, default?}]
|
||||
|
||||
|
||||
@app.post("/api/analyze/{job_id}")
|
||||
async def analyze(job_id: str, req: AnalyzeRequest):
|
||||
if job_id not in jobs:
|
||||
raise HTTPException(404, "Úloha nenalezena")
|
||||
if not req.items:
|
||||
raise HTTPException(400, "Vyberte alespoň jednu položku ke kontrole")
|
||||
job = jobs[job_id]
|
||||
input_path = Path(job["input_path"])
|
||||
try:
|
||||
analysis = await analyze_contract(input_path, req.items)
|
||||
except Exception as exc:
|
||||
logger.exception("Analysis failed")
|
||||
raise HTTPException(500, str(exc))
|
||||
# Merge LLM-returned items with the original checklist labels so the UI
|
||||
# can show the user-facing label even if the LLM was terse.
|
||||
labels_by_id = {it["id"]: it["label"] for it in req.items}
|
||||
for it in analysis.get("items", []):
|
||||
if "label" not in it and it.get("id") in labels_by_id:
|
||||
it["label"] = labels_by_id[it["id"]]
|
||||
job["analysis"] = analysis
|
||||
job["checklist"] = req.items
|
||||
job["used_ocr"] = bool(analysis.get("_used_ocr"))
|
||||
return analysis
|
||||
|
||||
|
||||
@app.get("/api/annotated/{job_id}")
|
||||
async def annotated_pdf(job_id: str):
|
||||
if job_id not in jobs:
|
||||
raise HTTPException(404, "Úloha nenalezena")
|
||||
job = jobs[job_id]
|
||||
if not job.get("analysis"):
|
||||
raise HTTPException(400, "Nejprve spusťte analýzu")
|
||||
input_path = Path(job["input_path"])
|
||||
out_path = Path(job["job_dir"]) / "annotated.pdf"
|
||||
analysis = job["analysis"]
|
||||
# For OCR-only contracts the original PDF has no text layer; skip the
|
||||
# excerpt-search step so we don't waste time on guaranteed misses.
|
||||
skip_highlights = bool(job.get("used_ocr"))
|
||||
try:
|
||||
await asyncio.to_thread(
|
||||
annotate, input_path, out_path,
|
||||
analysis.get("items", []),
|
||||
analysis.get("overall_summary", ""),
|
||||
analysis.get("risk_level", ""),
|
||||
skip_highlights,
|
||||
job.get("filename") or "",
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.exception("Annotation failed")
|
||||
raise HTTPException(500, f"Anotace selhala: {exc}")
|
||||
stem = Path(job["filename"]).stem if job.get("filename") else "smlouva"
|
||||
return FileResponse(
|
||||
str(out_path),
|
||||
media_type="application/pdf",
|
||||
filename=f"kontrola_{stem}.pdf",
|
||||
)
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
async def health():
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
app.mount("/static", StaticFiles(directory="static"), name="static")
|
||||
Reference in New Issue
Block a user