Initial portal commit: landing + 9 AI-powered apps

Apps:
- dwg-rooms: extract room numbers from DWG/DXF
- dwg-counting: count symbols in PDF drawings (OpenCV template matching)
- contract-check: review PDF contracts against a checklist (Claude vision + Tesseract OCR fallback)
- email-drafter: bullet notes → polished Czech/English business emails
- invoice-extractor: PDF/image invoice → structured data → Excel
- translator: Czech-first translator across 19 languages with tone control
- vv-check: find inconsistent unit prices across VV sheets in one workbook
- vv-compare: diff original vs new VV files (changes / added / removed)
- feature-request: portal users submit ideas + sample files

Infrastructure:
- LiteLLM gateway with per-app virtual keys + budgets
- Langfuse observability
- Geist font, shared theme, cross-subdomain back link + theme sync via cookie/URL
- Caddy reverse proxy on *.klas.chat
This commit is contained in:
Ondřej Glaser
2026-05-13 15:25:04 +02:00
commit 48cef99257
139 changed files with 20171 additions and 0 deletions

286
dwg-counting/counting.py Normal file
View File

@@ -0,0 +1,286 @@
"""Shape-based symbol counting via contour matching.
Why not cv2.matchTemplate: CAD symbols are usually thin-line drawings on a
white background (~10-20% ink). Normalized cross-correlation gives spuriously
high scores for any sparse-ink region (e.g. wall edges), producing false
positives everywhere.
Approach used here:
1. Binarize template + drawing to "ink" maps.
2. Find external contours in both.
3. Use the template's main contour as a shape reference.
4. For every contour in the drawing, compare via cv2.matchShapes (Hu moments
— invariant to scale, rotation, translation).
5. Filter by area ratio (similar size to template, allowing ±factor).
6. Keep contours below a shape-distance threshold.
This is the classic approach to CAD symbol takeoff.
"""
import logging
from pathlib import Path
from typing import Iterable
import cv2
import numpy as np
logger = logging.getLogger(__name__)
# Template matching: higher = stricter. 0.65 is permissive, 0.85 strict.
DEFAULT_THRESHOLD = 0.7
def _prep(img_path: Path) -> np.ndarray:
"""Binarize to 'ink vs not-ink'.
No dilation — for contour-based matching we need lines to stay narrow
so distinct symbols don't merge into one mega-contour through CAD's
dense linework.
"""
arr = cv2.imread(str(img_path), cv2.IMREAD_GRAYSCALE)
if arr is None:
raise RuntimeError(f"Could not load {img_path}")
_, ink = cv2.threshold(arr, 245, 255, cv2.THRESH_BINARY_INV)
return ink
def _crop_to_content(template: np.ndarray, bg_threshold: int = 240) -> np.ndarray:
"""Crop a template to its non-background bounding box.
After _prep the template is BINARY (ink=255, bg=0). So 'content' is
everything with value > 0. The bg_threshold param is kept for the
grayscale case for backward compat.
"""
if template.max() <= 1 or template.min() == 0 and template.max() == 255:
# Binary map: any non-zero pixel is ink
mask = template > 0
else:
mask = template < bg_threshold
if not mask.any():
return template
ys, xs = np.where(mask)
y0, y1 = ys.min(), ys.max() + 1
x0, x1 = xs.min(), xs.max() + 1
pad = 2
y0 = max(0, y0 - pad)
x0 = max(0, x0 - pad)
y1 = min(template.shape[0], y1 + pad)
x1 = min(template.shape[1], x1 + pad)
return template[y0:y1, x0:x1]
def _nms(boxes: list[tuple], scores: list[float], overlap_thresh: float = 0.3) -> list[int]:
"""Non-max suppression. Returns indices of kept boxes."""
if not boxes:
return []
boxes_arr = np.array(boxes, dtype=np.float32)
x1 = boxes_arr[:, 0]
y1 = boxes_arr[:, 1]
x2 = boxes_arr[:, 2]
y2 = boxes_arr[:, 3]
areas = (x2 - x1) * (y2 - y1)
order = np.argsort(scores)[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(int(i))
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0.0, xx2 - xx1)
h = np.maximum(0.0, yy2 - yy1)
inter = w * h
union = areas[i] + areas[order[1:]] - inter
iou = inter / np.maximum(union, 1e-6)
order = order[1:][iou <= overlap_thresh]
return keep
MAX_DRAWING_PX = 9000 # effectively no downscale for typical A1/A0
def debug_template(template_path: Path, drawing_path: Path) -> dict:
"""Return diagnostics for tuning a symbol template."""
template = _prep(template_path)
template_cropped = _crop_to_content(template)
drawing = _prep(drawing_path)
info = {
"template_size": list(template.shape),
"template_cropped_size": list(template_cropped.shape),
"template_ink_pixels": int((template_cropped > 0).sum()
if template_cropped.max() == 255 and template_cropped.min() == 0
else (template_cropped < 240).sum()),
"template_total_pixels": int(template_cropped.size),
"drawing_size": list(drawing.shape),
}
if min(template_cropped.shape) < 8:
info["error"] = "template too small after content crop"
return info
# Scale sweep — finds the size at which template matches best
scale_scan = []
best_overall = -1.0
for scale in (0.15, 0.25, 0.35, 0.5, 0.7, 0.85, 1.0, 1.2, 1.5, 2.0):
nw = max(8, int(template_cropped.shape[1] * scale))
nh = max(8, int(template_cropped.shape[0] * scale))
if nh > drawing.shape[0] or nw > drawing.shape[1]:
continue
tmpl = cv2.resize(template_cropped, (nw, nh), interpolation=cv2.INTER_AREA)
res = cv2.matchTemplate(drawing, tmpl, cv2.TM_CCOEFF_NORMED)
m = float(res.max())
scale_scan.append({
"scale": scale,
"template_px": [nh, nw],
"max_score": round(m, 3),
"count_at_0.7": int((res >= 0.7).sum()),
"count_at_0.6": int((res >= 0.6).sum()),
})
if m > best_overall:
best_overall = m
info["max_score"] = best_overall
info["scale_scan"] = scale_scan
# Threshold counts at scale=1.0 for reference
result = cv2.matchTemplate(drawing, template_cropped, cv2.TM_CCOEFF_NORMED)
info["matches_at_threshold"] = {
"0.60": int((result >= 0.60).sum()),
"0.70": int((result >= 0.70).sum()),
"0.75": int((result >= 0.75).sum()),
"0.80": int((result >= 0.80).sum()),
"0.85": int((result >= 0.85).sum()),
"0.90": int((result >= 0.90).sum()),
}
return info
def _merge_template_contour(ink: np.ndarray) -> np.ndarray | None:
"""Combine all strokes of a template into one contour via closing.
Returns the biggest connected component. The template is small enough that
closing safely merges the line + arc of symbols like '-C' into one shape
without affecting matching against the drawing.
"""
closed = cv2.morphologyEx(ink, cv2.MORPH_CLOSE, np.ones((5, 5), np.uint8))
contours, _ = cv2.findContours(closed, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
if not contours:
return None
return max(contours, key=cv2.contourArea)
def count_template(
template_path: Path,
drawing_path: Path,
threshold: float = DEFAULT_THRESHOLD,
rotations: Iterable[int] = (0, 90, 180, 270),
scales: Iterable[float] = (0.6, 0.8, 1.0, 1.25, 1.5),
exclude_box: tuple | None = None,
area_tolerance: float = 200.0,
mirror: bool = True,
) -> dict:
"""Multi-scale, multi-rotation template matching (TM_SQDIFF_NORMED).
With mirror=True the template is also tested flipped horizontally — at
each rotation — so we catch mirrored instances (e.g. a socket facing
left vs facing right).
"""
template = _prep(template_path)
template = _crop_to_content(template)
drawing = _prep(drawing_path)
coord_scale = 1.0
if max(drawing.shape) > MAX_DRAWING_PX:
coord_scale = max(drawing.shape) / MAX_DRAWING_PX
new_w = int(drawing.shape[1] / coord_scale)
new_h = int(drawing.shape[0] / coord_scale)
drawing = cv2.resize(drawing, (new_w, new_h), interpolation=cv2.INTER_AREA)
if exclude_box is not None:
ex_x, ex_y, ex_w, ex_h = exclude_box
ex_x = int(ex_x / coord_scale)
ex_y = int(ex_y / coord_scale)
ex_w = int(ex_w / coord_scale)
ex_h = int(ex_h / coord_scale)
h, w = drawing.shape
ex_x = max(0, min(w, ex_x))
ex_y = max(0, min(h, ex_y))
ex_x2 = max(0, min(w, ex_x + ex_w))
ex_y2 = max(0, min(h, ex_y + ex_h))
drawing[ex_y:ex_y2, ex_x:ex_x2] = 0
logger.info("Masked legend region (%d,%d,%d,%d)", ex_x, ex_y, ex_w, ex_h)
if min(template.shape) < 8 or int((template > 0).sum()) < 5:
logger.warning("Template too small / empty after preprocessing")
return {"count": 0, "matches": [], "threshold_used": threshold}
all_boxes: list[tuple] = []
all_scores: list[float] = []
# Build the variants we'll search with: rotations × (original, mirrored)
variants = [(template, False)]
if mirror:
variants.append((cv2.flip(template, 1), True))
def _rotate(img, angle_deg):
if angle_deg == 0:
return img
if angle_deg in (90, 180, 270):
return np.rot90(img, k=angle_deg // 90)
# Arbitrary angle — rotate with bbox expansion + white fill
h, w = img.shape[:2]
center = (w / 2, h / 2)
M = cv2.getRotationMatrix2D(center, angle_deg, 1.0)
cos = abs(M[0, 0]); sin = abs(M[0, 1])
new_w = int(h * sin + w * cos)
new_h = int(h * cos + w * sin)
M[0, 2] += new_w / 2 - center[0]
M[1, 2] += new_h / 2 - center[1]
return cv2.warpAffine(img, M, (new_w, new_h),
flags=cv2.INTER_NEAREST, borderValue=0)
# Build the full job list (one entry per rot×scale×mirror)
jobs_list = []
for variant, _ in variants:
for rot in rotations:
base = _rotate(variant, rot)
for scale in scales:
new_w = max(8, int(base.shape[1] * scale))
new_h = max(8, int(base.shape[0] * scale))
if new_h > drawing.shape[0] or new_w > drawing.shape[1]:
continue
tmpl = cv2.resize(base, (new_w, new_h),
interpolation=cv2.INTER_AREA)
jobs_list.append((tmpl, new_w, new_h))
def _run(args):
tmpl, new_w, new_h = args
sq = cv2.matchTemplate(drawing, tmpl, cv2.TM_SQDIFF_NORMED)
cc = cv2.matchTemplate(drawing, tmpl, cv2.TM_CCOEFF_NORMED)
sim = np.maximum(1.0 - sq, cc)
ys, xs = np.where(sim >= threshold)
out = []
for y, x in zip(ys, xs):
out.append((float(x), float(y), float(x + new_w),
float(y + new_h), float(sim[y, x])))
return out
# Threading: cv2.matchTemplate releases the GIL, so threads give real speedup
import concurrent.futures
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as ex:
for result in ex.map(_run, jobs_list):
for x0, y0, x1, y1, score in result:
all_boxes.append((x0, y0, x1, y1))
all_scores.append(score)
if not all_boxes:
return {"count": 0, "matches": [], "threshold_used": threshold}
keep = _nms(all_boxes, all_scores, overlap_thresh=0.3)
matches = []
for i in keep:
x0, y0, x1, y1 = all_boxes[i]
matches.append({
"x": int(x0 * coord_scale),
"y": int(y0 * coord_scale),
"w": int((x1 - x0) * coord_scale),
"h": int((y1 - y0) * coord_scale),
"score": round(all_scores[i], 3),
})
return {"count": len(matches), "matches": matches, "threshold_used": threshold}