Files
AI_portal/invoice-extractor/main.py
Ondřej Glaser 48cef99257 Initial portal commit: landing + 9 AI-powered apps
Apps:
- dwg-rooms: extract room numbers from DWG/DXF
- dwg-counting: count symbols in PDF drawings (OpenCV template matching)
- contract-check: review PDF contracts against a checklist (Claude vision + Tesseract OCR fallback)
- email-drafter: bullet notes → polished Czech/English business emails
- invoice-extractor: PDF/image invoice → structured data → Excel
- translator: Czech-first translator across 19 languages with tone control
- vv-check: find inconsistent unit prices across VV sheets in one workbook
- vv-compare: diff original vs new VV files (changes / added / removed)
- feature-request: portal users submit ideas + sample files

Infrastructure:
- LiteLLM gateway with per-app virtual keys + budgets
- Langfuse observability
- Geist font, shared theme, cross-subdomain back link + theme sync via cookie/URL
- Caddy reverse proxy on *.klas.chat
2026-05-13 15:25:04 +02:00

99 lines
2.9 KiB
Python

"""FastAPI: invoice PDF/image → structured data → editable form → XLSX export."""
import asyncio
import logging
import os
import uuid
from pathlib import Path
from fastapi import FastAPI, File, HTTPException, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel
from excel_export import write_invoice_xlsx
from extractor import extract_invoice
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
app = FastAPI(title="Invoice Extractor")
app.add_middleware(CORSMiddleware, allow_origins=["*"],
allow_methods=["*"], allow_headers=["*"])
WORK_DIR = Path(os.getenv("WORK_DIR", "/tmp/invoice-extractor"))
WORK_DIR.mkdir(parents=True, exist_ok=True)
jobs: dict[str, dict] = {}
ALLOWED_EXT = {".pdf", ".jpg", ".jpeg", ".png", ".webp"}
@app.get("/")
async def root():
return FileResponse("static/index.html")
@app.post("/api/upload")
async def upload(file: UploadFile = File(...)):
suffix = Path(file.filename or "").suffix.lower()
if suffix not in ALLOWED_EXT:
raise HTTPException(400, f"Podporované formáty: {', '.join(sorted(ALLOWED_EXT))}")
job_id = str(uuid.uuid4())
job_dir = WORK_DIR / job_id
job_dir.mkdir()
input_path = job_dir / f"input{suffix}"
input_path.write_bytes(await file.read())
logger.info("Job %s: %s (%d bytes)", job_id, file.filename, input_path.stat().st_size)
try:
data = await extract_invoice(input_path)
except Exception as exc:
logger.exception("Extraction failed")
raise HTTPException(500, str(exc))
jobs[job_id] = {
"filename": file.filename,
"job_dir": str(job_dir),
"data": data,
}
return {"job_id": job_id, "data": data}
class SaveRequest(BaseModel):
data: dict
@app.post("/api/save/{job_id}")
async def save_data(job_id: str, req: SaveRequest):
if job_id not in jobs:
raise HTTPException(404, "Nenalezeno")
jobs[job_id]["data"] = req.data
return {"ok": True}
@app.get("/api/export/{job_id}")
async def export(job_id: str):
if job_id not in jobs:
raise HTTPException(404, "Nenalezeno")
job = jobs[job_id]
out_path = Path(job["job_dir"]) / "invoice.xlsx"
write_invoice_xlsx(job["data"], str(out_path))
inv_no = (job["data"].get("invoice_number") or
Path(job["filename"]).stem if job.get("filename") else "faktura")
safe = "".join(c if c.isalnum() or c in "-_." else "_" for c in str(inv_no))
return FileResponse(
str(out_path),
media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
filename=f"faktura_{safe}.xlsx",
)
@app.get("/health")
async def health():
return {"status": "ok"}
app.mount("/static", StaticFiles(directory="static"), name="static")