Initial portal commit: landing + 9 AI-powered apps
Apps: - dwg-rooms: extract room numbers from DWG/DXF - dwg-counting: count symbols in PDF drawings (OpenCV template matching) - contract-check: review PDF contracts against a checklist (Claude vision + Tesseract OCR fallback) - email-drafter: bullet notes → polished Czech/English business emails - invoice-extractor: PDF/image invoice → structured data → Excel - translator: Czech-first translator across 19 languages with tone control - vv-check: find inconsistent unit prices across VV sheets in one workbook - vv-compare: diff original vs new VV files (changes / added / removed) - feature-request: portal users submit ideas + sample files Infrastructure: - LiteLLM gateway with per-app virtual keys + budgets - Langfuse observability - Geist font, shared theme, cross-subdomain back link + theme sync via cookie/URL - Caddy reverse proxy on *.klas.chat
This commit is contained in:
98
invoice-extractor/main.py
Normal file
98
invoice-extractor/main.py
Normal file
@@ -0,0 +1,98 @@
|
||||
"""FastAPI: invoice PDF/image → structured data → editable form → XLSX export."""
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi import FastAPI, File, HTTPException, UploadFile
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import FileResponse
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from pydantic import BaseModel
|
||||
|
||||
from excel_export import write_invoice_xlsx
|
||||
from extractor import extract_invoice
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
app = FastAPI(title="Invoice Extractor")
|
||||
app.add_middleware(CORSMiddleware, allow_origins=["*"],
|
||||
allow_methods=["*"], allow_headers=["*"])
|
||||
|
||||
WORK_DIR = Path(os.getenv("WORK_DIR", "/tmp/invoice-extractor"))
|
||||
WORK_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
jobs: dict[str, dict] = {}
|
||||
|
||||
ALLOWED_EXT = {".pdf", ".jpg", ".jpeg", ".png", ".webp"}
|
||||
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
return FileResponse("static/index.html")
|
||||
|
||||
|
||||
@app.post("/api/upload")
|
||||
async def upload(file: UploadFile = File(...)):
|
||||
suffix = Path(file.filename or "").suffix.lower()
|
||||
if suffix not in ALLOWED_EXT:
|
||||
raise HTTPException(400, f"Podporované formáty: {', '.join(sorted(ALLOWED_EXT))}")
|
||||
|
||||
job_id = str(uuid.uuid4())
|
||||
job_dir = WORK_DIR / job_id
|
||||
job_dir.mkdir()
|
||||
input_path = job_dir / f"input{suffix}"
|
||||
input_path.write_bytes(await file.read())
|
||||
logger.info("Job %s: %s (%d bytes)", job_id, file.filename, input_path.stat().st_size)
|
||||
|
||||
try:
|
||||
data = await extract_invoice(input_path)
|
||||
except Exception as exc:
|
||||
logger.exception("Extraction failed")
|
||||
raise HTTPException(500, str(exc))
|
||||
|
||||
jobs[job_id] = {
|
||||
"filename": file.filename,
|
||||
"job_dir": str(job_dir),
|
||||
"data": data,
|
||||
}
|
||||
return {"job_id": job_id, "data": data}
|
||||
|
||||
|
||||
class SaveRequest(BaseModel):
|
||||
data: dict
|
||||
|
||||
|
||||
@app.post("/api/save/{job_id}")
|
||||
async def save_data(job_id: str, req: SaveRequest):
|
||||
if job_id not in jobs:
|
||||
raise HTTPException(404, "Nenalezeno")
|
||||
jobs[job_id]["data"] = req.data
|
||||
return {"ok": True}
|
||||
|
||||
|
||||
@app.get("/api/export/{job_id}")
|
||||
async def export(job_id: str):
|
||||
if job_id not in jobs:
|
||||
raise HTTPException(404, "Nenalezeno")
|
||||
job = jobs[job_id]
|
||||
out_path = Path(job["job_dir"]) / "invoice.xlsx"
|
||||
write_invoice_xlsx(job["data"], str(out_path))
|
||||
inv_no = (job["data"].get("invoice_number") or
|
||||
Path(job["filename"]).stem if job.get("filename") else "faktura")
|
||||
safe = "".join(c if c.isalnum() or c in "-_." else "_" for c in str(inv_no))
|
||||
return FileResponse(
|
||||
str(out_path),
|
||||
media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
filename=f"faktura_{safe}.xlsx",
|
||||
)
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
async def health():
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
app.mount("/static", StaticFiles(directory="static"), name="static")
|
||||
Reference in New Issue
Block a user