Initial portal commit: landing + 9 AI-powered apps

Apps: - dwg-rooms: extract room numbers from DWG/DXF - dwg-counting: count symbols in PDF drawings (OpenCV template matching) - contract-check: review PDF contracts against a checklist (Claude vision + Tesseract OCR fallback) - email-drafter: bullet notes → polished Czech/English business emails - invoice-extractor: PDF/image invoice → structured data → Excel - translator: Czech-first translator across 19 languages with tone control - vv-check: find inconsistent unit prices across VV sheets in one workbook - vv-compare: diff original vs new VV files (changes / added / removed) - feature-request: portal users submit ideas + sample files Infrastructure: - LiteLLM gateway with per-app virtual keys + budgets - Langfuse observability - Geist font, shared theme, cross-subdomain back link + theme sync via cookie/URL - Caddy reverse proxy on *.klas.chat
2026-05-13 15:25:04 +02:00
commit 48cef99257
139 changed files with 20171 additions and 0 deletions
--- a/dwg-rooms/extractor.py
+++ b/dwg-rooms/extractor.py
@@ -0,0 +1,168 @@
+"""Rule-based room extraction from DXF files."""
+import math
+import re
+
+import ezdxf
+
+# Defaults shown to user; they can remove or replace.
+DEFAULT_EXAMPLES = ["č.m. 0301", "01024"]
+
+MEASUREMENT_KW = (
+    "podlaha:", "stěny:", "strop:", "m2", "m²", "povrchová", "nátěr",
+    "penetrační", "vrstva", "odstín", "ral ", "chodníky:", "klenba:",
+    "portály:", "epoxidový", "beton", "dlažba", "omítka", "obklad",
+)
+
+
+def example_to_regex(example: str) -> re.Pattern | None:
+    """
+    Convert an example like '4-22408' or 'č.m. 0301' into a compiled regex.
+      - digits become wildcards (\\d, exactly N digits where the example had N digits)
+      - everything else is matched literally
+      - an optional trailing letter is allowed (for variants like '0301a')
+    """
+    if not example or not example.strip():
+        return None
+    s = example.strip()
+    parts: list[str] = []
+    run = 0
+    for ch in s:
+        if ch.isdigit():
+            run += 1
+            continue
+        if run:
+            parts.append(rf"\d{{{run}}}")
+            run = 0
+        parts.append(re.escape(ch))
+    if run:
+        parts.append(rf"\d{{{run}}}")
+    pattern = "^(" + "".join(parts) + r"[a-zA-Z]?)$"
+    try:
+        return re.compile(pattern, re.IGNORECASE)
+    except re.error:
+        return None
+
+
+def compile_examples(examples: list[str] | None) -> list[re.Pattern]:
+    items = examples if examples is not None else DEFAULT_EXAMPLES
+    out = []
+    for ex in items:
+        rx = example_to_regex(ex)
+        if rx is not None:
+            out.append(rx)
+    return out
+
+
+def _clean_mtext(text: str) -> str:
+    text = re.sub(r"\{\\[^;]*;", "", text)
+    text = re.sub(r"\\[Pp]", " ", text)
+    text = text.replace("}", "")
+    text = re.sub(r"\s*\d+[,\.]\d*\s*m2\s*$", "", text)
+    return " ".join(text.split()).strip()
+
+
+def _is_measurement(text: str) -> bool:
+    t = text.lower()
+    return any(k in t for k in MEASUREMENT_KW)
+
+
+def _is_room_marker(text: str, regexes: list[re.Pattern]) -> re.Match | None:
+    s = text.strip()
+    for rx in regexes:
+        m = rx.fullmatch(s)
+        if m:
+            return m
+    return None
+
+
+def _is_dimension(text: str, regexes: list[re.Pattern]) -> bool:
+    """
+    Anything that's effectively just digits (with optional separators) and is NOT
+    claimed by any active room pattern — treat as a dimension/measurement value.
+    """
+    if _is_room_marker(text, regexes):
+        return False
+    clean = re.sub(r"[\s.,\-x×+]", "", text)
+    return clean.isdigit() and len(clean) > 0
+
+
+def _all_text_entities(dxf_path: str) -> list[dict]:
+    doc = ezdxf.readfile(dxf_path)
+    msp = doc.modelspace()
+    out = []
+    for ent in msp:
+        try:
+            if ent.dxftype() == "TEXT":
+                t = ent.dxf.text.strip()
+                x, y = ent.dxf.insert.x, ent.dxf.insert.y
+            elif ent.dxftype() == "MTEXT":
+                t = _clean_mtext(ent.text)
+                x, y = ent.dxf.insert.x, ent.dxf.insert.y
+            else:
+                continue
+            if t:
+                out.append({"text": t, "x": x, "y": y})
+        except Exception:
+            pass
+    return out
+
+
+def _nearest_description(rx_x: float, ry: float, candidates: list[dict],
+                          regexes: list[re.Pattern], max_dist: float = 8000) -> str | None:
+    best, best_d = None, max_dist
+    for c in candidates:
+        t = c["text"]
+        if _is_measurement(t) or _is_dimension(t, regexes) or len(t.strip()) < 2:
+            continue
+        if _is_room_marker(t, regexes):
+            continue
+        d = math.hypot(c["x"] - rx_x, c["y"] - ry)
+        if d < best_d:
+            best_d, best = d, t
+    return best
+
+
+def extract_rooms(dxf_path: str, examples: list[str] | None = None) -> tuple[list[dict], list[dict]]:
+    """
+    Returns (rooms, unmatched_texts).
+    examples: user-provided room-number examples; None → DEFAULT_EXAMPLES.
+    """
+    regexes = compile_examples(examples)
+    entities = _all_text_entities(dxf_path)
+
+    room_markers, other = [], []
+    for e in entities:
+        m = _is_room_marker(e["text"], regexes)
+        if m:
+            room_markers.append({"room": m.group(1), "x": e["x"], "y": e["y"]})
+        else:
+            other.append(e)
+
+    used: set[str] = set()
+    seen_rooms: set[str] = set()
+    rooms: list[dict] = []
+    for rm in room_markers:
+        if rm["room"] in seen_rooms:
+            continue
+        seen_rooms.add(rm["room"])
+        desc = _nearest_description(rm["x"], rm["y"], other, regexes)
+        rooms.append({
+            "room": rm["room"],
+            "description": desc or "",
+            "x": round(rm["x"], 1),
+            "y": round(rm["y"], 1),
+            "source": "rule",
+            "confidence": 1.0 if desc else 0.6,
+        })
+        if desc:
+            used.add(desc)
+
+    unmatched = [
+        e for e in other
+        if e["text"] not in used
+        and not _is_measurement(e["text"])
+        and not _is_dimension(e["text"], regexes)
+        and len(e["text"]) > 3
+    ]
+
+    return rooms, unmatched