Initial portal commit: landing + 9 AI-powered apps

Apps:
- dwg-rooms: extract room numbers from DWG/DXF
- dwg-counting: count symbols in PDF drawings (OpenCV template matching)
- contract-check: review PDF contracts against a checklist (Claude vision + Tesseract OCR fallback)
- email-drafter: bullet notes → polished Czech/English business emails
- invoice-extractor: PDF/image invoice → structured data → Excel
- translator: Czech-first translator across 19 languages with tone control
- vv-check: find inconsistent unit prices across VV sheets in one workbook
- vv-compare: diff original vs new VV files (changes / added / removed)
- feature-request: portal users submit ideas + sample files

Infrastructure:
- LiteLLM gateway with per-app virtual keys + budgets
- Langfuse observability
- Geist font, shared theme, cross-subdomain back link + theme sync via cookie/URL
- Caddy reverse proxy on *.klas.chat
This commit is contained in:
Ondřej Glaser
2026-05-13 15:25:04 +02:00
commit 48cef99257
139 changed files with 20171 additions and 0 deletions

168
dwg-rooms/extractor.py Normal file
View File

@@ -0,0 +1,168 @@
"""Rule-based room extraction from DXF files."""
import math
import re
import ezdxf
# Defaults shown to user; they can remove or replace.
DEFAULT_EXAMPLES = ["č.m. 0301", "01024"]
MEASUREMENT_KW = (
"podlaha:", "stěny:", "strop:", "m2", "", "povrchová", "nátěr",
"penetrační", "vrstva", "odstín", "ral ", "chodníky:", "klenba:",
"portály:", "epoxidový", "beton", "dlažba", "omítka", "obklad",
)
def example_to_regex(example: str) -> re.Pattern | None:
"""
Convert an example like '4-22408' or 'č.m. 0301' into a compiled regex.
- digits become wildcards (\\d, exactly N digits where the example had N digits)
- everything else is matched literally
- an optional trailing letter is allowed (for variants like '0301a')
"""
if not example or not example.strip():
return None
s = example.strip()
parts: list[str] = []
run = 0
for ch in s:
if ch.isdigit():
run += 1
continue
if run:
parts.append(rf"\d{{{run}}}")
run = 0
parts.append(re.escape(ch))
if run:
parts.append(rf"\d{{{run}}}")
pattern = "^(" + "".join(parts) + r"[a-zA-Z]?)$"
try:
return re.compile(pattern, re.IGNORECASE)
except re.error:
return None
def compile_examples(examples: list[str] | None) -> list[re.Pattern]:
items = examples if examples is not None else DEFAULT_EXAMPLES
out = []
for ex in items:
rx = example_to_regex(ex)
if rx is not None:
out.append(rx)
return out
def _clean_mtext(text: str) -> str:
text = re.sub(r"\{\\[^;]*;", "", text)
text = re.sub(r"\\[Pp]", " ", text)
text = text.replace("}", "")
text = re.sub(r"\s*\d+[,\.]\d*\s*m2\s*$", "", text)
return " ".join(text.split()).strip()
def _is_measurement(text: str) -> bool:
t = text.lower()
return any(k in t for k in MEASUREMENT_KW)
def _is_room_marker(text: str, regexes: list[re.Pattern]) -> re.Match | None:
s = text.strip()
for rx in regexes:
m = rx.fullmatch(s)
if m:
return m
return None
def _is_dimension(text: str, regexes: list[re.Pattern]) -> bool:
"""
Anything that's effectively just digits (with optional separators) and is NOT
claimed by any active room pattern — treat as a dimension/measurement value.
"""
if _is_room_marker(text, regexes):
return False
clean = re.sub(r"[\s.,\-x×+]", "", text)
return clean.isdigit() and len(clean) > 0
def _all_text_entities(dxf_path: str) -> list[dict]:
doc = ezdxf.readfile(dxf_path)
msp = doc.modelspace()
out = []
for ent in msp:
try:
if ent.dxftype() == "TEXT":
t = ent.dxf.text.strip()
x, y = ent.dxf.insert.x, ent.dxf.insert.y
elif ent.dxftype() == "MTEXT":
t = _clean_mtext(ent.text)
x, y = ent.dxf.insert.x, ent.dxf.insert.y
else:
continue
if t:
out.append({"text": t, "x": x, "y": y})
except Exception:
pass
return out
def _nearest_description(rx_x: float, ry: float, candidates: list[dict],
regexes: list[re.Pattern], max_dist: float = 8000) -> str | None:
best, best_d = None, max_dist
for c in candidates:
t = c["text"]
if _is_measurement(t) or _is_dimension(t, regexes) or len(t.strip()) < 2:
continue
if _is_room_marker(t, regexes):
continue
d = math.hypot(c["x"] - rx_x, c["y"] - ry)
if d < best_d:
best_d, best = d, t
return best
def extract_rooms(dxf_path: str, examples: list[str] | None = None) -> tuple[list[dict], list[dict]]:
"""
Returns (rooms, unmatched_texts).
examples: user-provided room-number examples; None → DEFAULT_EXAMPLES.
"""
regexes = compile_examples(examples)
entities = _all_text_entities(dxf_path)
room_markers, other = [], []
for e in entities:
m = _is_room_marker(e["text"], regexes)
if m:
room_markers.append({"room": m.group(1), "x": e["x"], "y": e["y"]})
else:
other.append(e)
used: set[str] = set()
seen_rooms: set[str] = set()
rooms: list[dict] = []
for rm in room_markers:
if rm["room"] in seen_rooms:
continue
seen_rooms.add(rm["room"])
desc = _nearest_description(rm["x"], rm["y"], other, regexes)
rooms.append({
"room": rm["room"],
"description": desc or "",
"x": round(rm["x"], 1),
"y": round(rm["y"], 1),
"source": "rule",
"confidence": 1.0 if desc else 0.6,
})
if desc:
used.add(desc)
unmatched = [
e for e in other
if e["text"] not in used
and not _is_measurement(e["text"])
and not _is_dimension(e["text"], regexes)
and len(e["text"]) > 3
]
return rooms, unmatched