Initial portal commit: landing + 9 AI-powered apps

Apps: - dwg-rooms: extract room numbers from DWG/DXF - dwg-counting: count symbols in PDF drawings (OpenCV template matching) - contract-check: review PDF contracts against a checklist (Claude vision + Tesseract OCR fallback) - email-drafter: bullet notes → polished Czech/English business emails - invoice-extractor: PDF/image invoice → structured data → Excel - translator: Czech-first translator across 19 languages with tone control - vv-check: find inconsistent unit prices across VV sheets in one workbook - vv-compare: diff original vs new VV files (changes / added / removed) - feature-request: portal users submit ideas + sample files Infrastructure: - LiteLLM gateway with per-app virtual keys + budgets - Langfuse observability - Geist font, shared theme, cross-subdomain back link + theme sync via cookie/URL - Caddy reverse proxy on *.klas.chat
2026-05-13 15:25:04 +02:00
commit 48cef99257
139 changed files with 20171 additions and 0 deletions
--- a/dwg-rooms/llm_helper.py
+++ b/dwg-rooms/llm_helper.py
@@ -0,0 +1,80 @@
+"""LLM fallback: classify unmatched DXF text entities as rooms via LiteLLM."""
+import json
+import logging
+import os
+
+from openai import AsyncOpenAI
+
+logger = logging.getLogger(__name__)
+
+_client: AsyncOpenAI | None = None
+
+
+def _get_client() -> AsyncOpenAI:
+    global _client
+    if _client is None:
+        _client = AsyncOpenAI(
+            base_url=os.getenv("LITELLM_BASE_URL", "http://host.docker.internal:4000"),
+            api_key=os.getenv("LITELLM_API_KEY", "sk-dummy"),
+        )
+    return _client
+
+
+SYSTEM = """You are a specialist extracting room data from Czech architectural DXF floor plans.
+You receive text entities (text, x, y) that were not matched by rule-based parsing.
+
+Identify pairs of room number + Czech room name/description.
+Czech room numbers: 4-6 digit codes, sometimes prefixed with "č.m.".
+Czech room names: e.g. "Chodba", "Serverovna", "Sklep", "WC", "Kancelář", etc.
+
+Return ONLY a JSON array of objects:
+{"room": "XXXXX", "description": "Czech name", "confidence": 0.0-1.0}
+
+Skip: measurements (m2, m²), material names (beton, dlažba), dimensions, unrelated text.
+Only include entries with confidence > 0.5."""
+
+
+async def enhance_with_llm(unmatched: list[dict]) -> list[dict]:
+    api_key = os.getenv("LITELLM_API_KEY", "")
+    if not api_key or api_key == "sk-dummy":
+        logger.info("LITELLM_API_KEY not set — skipping LLM enhancement")
+        return []
+
+    sample = unmatched[:200]
+    text_block = "\n".join(
+        f'- "{t["text"]}" x={t["x"]:.0f} y={t["y"]:.0f}' for t in sample
+    )
+    model = os.getenv("LLM_MODEL", "gpt-4o-mini")
+
+    try:
+        resp = await _get_client().chat.completions.create(
+            model=model,
+            messages=[
+                {"role": "system", "content": SYSTEM},
+                {"role": "user", "content": f"Text entities:\n{text_block}"},
+            ],
+            temperature=0.1,
+            max_tokens=2000,
+        )
+        raw = resp.choices[0].message.content or "[]"
+        # Strip markdown code fences if present
+        raw = raw.strip().removeprefix("```json").removeprefix("```").removesuffix("```").strip()
+        data = json.loads(raw)
+        if isinstance(data, dict):
+            data = data.get("rooms", data.get("result", []))
+
+        return [
+            {
+                "room": str(r["room"]),
+                "description": r.get("description", ""),
+                "x": 0.0,
+                "y": 0.0,
+                "source": "llm",
+                "confidence": float(r.get("confidence", 0.7)),
+            }
+            for r in data
+            if isinstance(r, dict) and r.get("room")
+        ]
+    except Exception as exc:
+        logger.error("LLM enhancement failed: %s", exc)
+        return []