Apps: - dwg-rooms: extract room numbers from DWG/DXF - dwg-counting: count symbols in PDF drawings (OpenCV template matching) - contract-check: review PDF contracts against a checklist (Claude vision + Tesseract OCR fallback) - email-drafter: bullet notes → polished Czech/English business emails - invoice-extractor: PDF/image invoice → structured data → Excel - translator: Czech-first translator across 19 languages with tone control - vv-check: find inconsistent unit prices across VV sheets in one workbook - vv-compare: diff original vs new VV files (changes / added / removed) - feature-request: portal users submit ideas + sample files Infrastructure: - LiteLLM gateway with per-app virtual keys + budgets - Langfuse observability - Geist font, shared theme, cross-subdomain back link + theme sync via cookie/URL - Caddy reverse proxy on *.klas.chat
99 lines
2.9 KiB
Python
99 lines
2.9 KiB
Python
"""FastAPI: invoice PDF/image → structured data → editable form → XLSX export."""
|
|
import asyncio
|
|
import logging
|
|
import os
|
|
import uuid
|
|
from pathlib import Path
|
|
|
|
from fastapi import FastAPI, File, HTTPException, UploadFile
|
|
from fastapi.middleware.cors import CORSMiddleware
|
|
from fastapi.responses import FileResponse
|
|
from fastapi.staticfiles import StaticFiles
|
|
from pydantic import BaseModel
|
|
|
|
from excel_export import write_invoice_xlsx
|
|
from extractor import extract_invoice
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
app = FastAPI(title="Invoice Extractor")
|
|
app.add_middleware(CORSMiddleware, allow_origins=["*"],
|
|
allow_methods=["*"], allow_headers=["*"])
|
|
|
|
WORK_DIR = Path(os.getenv("WORK_DIR", "/tmp/invoice-extractor"))
|
|
WORK_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
jobs: dict[str, dict] = {}
|
|
|
|
ALLOWED_EXT = {".pdf", ".jpg", ".jpeg", ".png", ".webp"}
|
|
|
|
|
|
@app.get("/")
|
|
async def root():
|
|
return FileResponse("static/index.html")
|
|
|
|
|
|
@app.post("/api/upload")
|
|
async def upload(file: UploadFile = File(...)):
|
|
suffix = Path(file.filename or "").suffix.lower()
|
|
if suffix not in ALLOWED_EXT:
|
|
raise HTTPException(400, f"Podporované formáty: {', '.join(sorted(ALLOWED_EXT))}")
|
|
|
|
job_id = str(uuid.uuid4())
|
|
job_dir = WORK_DIR / job_id
|
|
job_dir.mkdir()
|
|
input_path = job_dir / f"input{suffix}"
|
|
input_path.write_bytes(await file.read())
|
|
logger.info("Job %s: %s (%d bytes)", job_id, file.filename, input_path.stat().st_size)
|
|
|
|
try:
|
|
data = await extract_invoice(input_path)
|
|
except Exception as exc:
|
|
logger.exception("Extraction failed")
|
|
raise HTTPException(500, str(exc))
|
|
|
|
jobs[job_id] = {
|
|
"filename": file.filename,
|
|
"job_dir": str(job_dir),
|
|
"data": data,
|
|
}
|
|
return {"job_id": job_id, "data": data}
|
|
|
|
|
|
class SaveRequest(BaseModel):
|
|
data: dict
|
|
|
|
|
|
@app.post("/api/save/{job_id}")
|
|
async def save_data(job_id: str, req: SaveRequest):
|
|
if job_id not in jobs:
|
|
raise HTTPException(404, "Nenalezeno")
|
|
jobs[job_id]["data"] = req.data
|
|
return {"ok": True}
|
|
|
|
|
|
@app.get("/api/export/{job_id}")
|
|
async def export(job_id: str):
|
|
if job_id not in jobs:
|
|
raise HTTPException(404, "Nenalezeno")
|
|
job = jobs[job_id]
|
|
out_path = Path(job["job_dir"]) / "invoice.xlsx"
|
|
write_invoice_xlsx(job["data"], str(out_path))
|
|
inv_no = (job["data"].get("invoice_number") or
|
|
Path(job["filename"]).stem if job.get("filename") else "faktura")
|
|
safe = "".join(c if c.isalnum() or c in "-_." else "_" for c in str(inv_no))
|
|
return FileResponse(
|
|
str(out_path),
|
|
media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
filename=f"faktura_{safe}.xlsx",
|
|
)
|
|
|
|
|
|
@app.get("/health")
|
|
async def health():
|
|
return {"status": "ok"}
|
|
|
|
|
|
app.mount("/static", StaticFiles(directory="static"), name="static")
|