Initial portal commit: landing + 9 AI-powered apps
Apps: - dwg-rooms: extract room numbers from DWG/DXF - dwg-counting: count symbols in PDF drawings (OpenCV template matching) - contract-check: review PDF contracts against a checklist (Claude vision + Tesseract OCR fallback) - email-drafter: bullet notes → polished Czech/English business emails - invoice-extractor: PDF/image invoice → structured data → Excel - translator: Czech-first translator across 19 languages with tone control - vv-check: find inconsistent unit prices across VV sheets in one workbook - vv-compare: diff original vs new VV files (changes / added / removed) - feature-request: portal users submit ideas + sample files Infrastructure: - LiteLLM gateway with per-app virtual keys + budgets - Langfuse observability - Geist font, shared theme, cross-subdomain back link + theme sync via cookie/URL - Caddy reverse proxy on *.klas.chat
This commit is contained in:
286
dwg-counting/counting.py
Normal file
286
dwg-counting/counting.py
Normal file
@@ -0,0 +1,286 @@
|
||||
"""Shape-based symbol counting via contour matching.
|
||||
|
||||
Why not cv2.matchTemplate: CAD symbols are usually thin-line drawings on a
|
||||
white background (~10-20% ink). Normalized cross-correlation gives spuriously
|
||||
high scores for any sparse-ink region (e.g. wall edges), producing false
|
||||
positives everywhere.
|
||||
|
||||
Approach used here:
|
||||
1. Binarize template + drawing to "ink" maps.
|
||||
2. Find external contours in both.
|
||||
3. Use the template's main contour as a shape reference.
|
||||
4. For every contour in the drawing, compare via cv2.matchShapes (Hu moments
|
||||
— invariant to scale, rotation, translation).
|
||||
5. Filter by area ratio (similar size to template, allowing ±factor).
|
||||
6. Keep contours below a shape-distance threshold.
|
||||
|
||||
This is the classic approach to CAD symbol takeoff.
|
||||
"""
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Iterable
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Template matching: higher = stricter. 0.65 is permissive, 0.85 strict.
|
||||
DEFAULT_THRESHOLD = 0.7
|
||||
|
||||
|
||||
def _prep(img_path: Path) -> np.ndarray:
|
||||
"""Binarize to 'ink vs not-ink'.
|
||||
|
||||
No dilation — for contour-based matching we need lines to stay narrow
|
||||
so distinct symbols don't merge into one mega-contour through CAD's
|
||||
dense linework.
|
||||
"""
|
||||
arr = cv2.imread(str(img_path), cv2.IMREAD_GRAYSCALE)
|
||||
if arr is None:
|
||||
raise RuntimeError(f"Could not load {img_path}")
|
||||
_, ink = cv2.threshold(arr, 245, 255, cv2.THRESH_BINARY_INV)
|
||||
return ink
|
||||
|
||||
|
||||
def _crop_to_content(template: np.ndarray, bg_threshold: int = 240) -> np.ndarray:
|
||||
"""Crop a template to its non-background bounding box.
|
||||
|
||||
After _prep the template is BINARY (ink=255, bg=0). So 'content' is
|
||||
everything with value > 0. The bg_threshold param is kept for the
|
||||
grayscale case for backward compat.
|
||||
"""
|
||||
if template.max() <= 1 or template.min() == 0 and template.max() == 255:
|
||||
# Binary map: any non-zero pixel is ink
|
||||
mask = template > 0
|
||||
else:
|
||||
mask = template < bg_threshold
|
||||
if not mask.any():
|
||||
return template
|
||||
ys, xs = np.where(mask)
|
||||
y0, y1 = ys.min(), ys.max() + 1
|
||||
x0, x1 = xs.min(), xs.max() + 1
|
||||
pad = 2
|
||||
y0 = max(0, y0 - pad)
|
||||
x0 = max(0, x0 - pad)
|
||||
y1 = min(template.shape[0], y1 + pad)
|
||||
x1 = min(template.shape[1], x1 + pad)
|
||||
return template[y0:y1, x0:x1]
|
||||
|
||||
|
||||
def _nms(boxes: list[tuple], scores: list[float], overlap_thresh: float = 0.3) -> list[int]:
|
||||
"""Non-max suppression. Returns indices of kept boxes."""
|
||||
if not boxes:
|
||||
return []
|
||||
boxes_arr = np.array(boxes, dtype=np.float32)
|
||||
x1 = boxes_arr[:, 0]
|
||||
y1 = boxes_arr[:, 1]
|
||||
x2 = boxes_arr[:, 2]
|
||||
y2 = boxes_arr[:, 3]
|
||||
areas = (x2 - x1) * (y2 - y1)
|
||||
order = np.argsort(scores)[::-1]
|
||||
keep = []
|
||||
while order.size > 0:
|
||||
i = order[0]
|
||||
keep.append(int(i))
|
||||
xx1 = np.maximum(x1[i], x1[order[1:]])
|
||||
yy1 = np.maximum(y1[i], y1[order[1:]])
|
||||
xx2 = np.minimum(x2[i], x2[order[1:]])
|
||||
yy2 = np.minimum(y2[i], y2[order[1:]])
|
||||
w = np.maximum(0.0, xx2 - xx1)
|
||||
h = np.maximum(0.0, yy2 - yy1)
|
||||
inter = w * h
|
||||
union = areas[i] + areas[order[1:]] - inter
|
||||
iou = inter / np.maximum(union, 1e-6)
|
||||
order = order[1:][iou <= overlap_thresh]
|
||||
return keep
|
||||
|
||||
|
||||
MAX_DRAWING_PX = 9000 # effectively no downscale for typical A1/A0
|
||||
|
||||
|
||||
def debug_template(template_path: Path, drawing_path: Path) -> dict:
|
||||
"""Return diagnostics for tuning a symbol template."""
|
||||
template = _prep(template_path)
|
||||
template_cropped = _crop_to_content(template)
|
||||
drawing = _prep(drawing_path)
|
||||
info = {
|
||||
"template_size": list(template.shape),
|
||||
"template_cropped_size": list(template_cropped.shape),
|
||||
"template_ink_pixels": int((template_cropped > 0).sum()
|
||||
if template_cropped.max() == 255 and template_cropped.min() == 0
|
||||
else (template_cropped < 240).sum()),
|
||||
"template_total_pixels": int(template_cropped.size),
|
||||
"drawing_size": list(drawing.shape),
|
||||
}
|
||||
if min(template_cropped.shape) < 8:
|
||||
info["error"] = "template too small after content crop"
|
||||
return info
|
||||
# Scale sweep — finds the size at which template matches best
|
||||
scale_scan = []
|
||||
best_overall = -1.0
|
||||
for scale in (0.15, 0.25, 0.35, 0.5, 0.7, 0.85, 1.0, 1.2, 1.5, 2.0):
|
||||
nw = max(8, int(template_cropped.shape[1] * scale))
|
||||
nh = max(8, int(template_cropped.shape[0] * scale))
|
||||
if nh > drawing.shape[0] or nw > drawing.shape[1]:
|
||||
continue
|
||||
tmpl = cv2.resize(template_cropped, (nw, nh), interpolation=cv2.INTER_AREA)
|
||||
res = cv2.matchTemplate(drawing, tmpl, cv2.TM_CCOEFF_NORMED)
|
||||
m = float(res.max())
|
||||
scale_scan.append({
|
||||
"scale": scale,
|
||||
"template_px": [nh, nw],
|
||||
"max_score": round(m, 3),
|
||||
"count_at_0.7": int((res >= 0.7).sum()),
|
||||
"count_at_0.6": int((res >= 0.6).sum()),
|
||||
})
|
||||
if m > best_overall:
|
||||
best_overall = m
|
||||
info["max_score"] = best_overall
|
||||
info["scale_scan"] = scale_scan
|
||||
# Threshold counts at scale=1.0 for reference
|
||||
result = cv2.matchTemplate(drawing, template_cropped, cv2.TM_CCOEFF_NORMED)
|
||||
info["matches_at_threshold"] = {
|
||||
"0.60": int((result >= 0.60).sum()),
|
||||
"0.70": int((result >= 0.70).sum()),
|
||||
"0.75": int((result >= 0.75).sum()),
|
||||
"0.80": int((result >= 0.80).sum()),
|
||||
"0.85": int((result >= 0.85).sum()),
|
||||
"0.90": int((result >= 0.90).sum()),
|
||||
}
|
||||
return info
|
||||
|
||||
|
||||
def _merge_template_contour(ink: np.ndarray) -> np.ndarray | None:
|
||||
"""Combine all strokes of a template into one contour via closing.
|
||||
|
||||
Returns the biggest connected component. The template is small enough that
|
||||
closing safely merges the line + arc of symbols like '-C' into one shape
|
||||
without affecting matching against the drawing.
|
||||
"""
|
||||
closed = cv2.morphologyEx(ink, cv2.MORPH_CLOSE, np.ones((5, 5), np.uint8))
|
||||
contours, _ = cv2.findContours(closed, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
|
||||
if not contours:
|
||||
return None
|
||||
return max(contours, key=cv2.contourArea)
|
||||
|
||||
|
||||
def count_template(
|
||||
template_path: Path,
|
||||
drawing_path: Path,
|
||||
threshold: float = DEFAULT_THRESHOLD,
|
||||
rotations: Iterable[int] = (0, 90, 180, 270),
|
||||
scales: Iterable[float] = (0.6, 0.8, 1.0, 1.25, 1.5),
|
||||
exclude_box: tuple | None = None,
|
||||
area_tolerance: float = 200.0,
|
||||
mirror: bool = True,
|
||||
) -> dict:
|
||||
"""Multi-scale, multi-rotation template matching (TM_SQDIFF_NORMED).
|
||||
|
||||
With mirror=True the template is also tested flipped horizontally — at
|
||||
each rotation — so we catch mirrored instances (e.g. a socket facing
|
||||
left vs facing right).
|
||||
"""
|
||||
template = _prep(template_path)
|
||||
template = _crop_to_content(template)
|
||||
drawing = _prep(drawing_path)
|
||||
|
||||
coord_scale = 1.0
|
||||
if max(drawing.shape) > MAX_DRAWING_PX:
|
||||
coord_scale = max(drawing.shape) / MAX_DRAWING_PX
|
||||
new_w = int(drawing.shape[1] / coord_scale)
|
||||
new_h = int(drawing.shape[0] / coord_scale)
|
||||
drawing = cv2.resize(drawing, (new_w, new_h), interpolation=cv2.INTER_AREA)
|
||||
|
||||
if exclude_box is not None:
|
||||
ex_x, ex_y, ex_w, ex_h = exclude_box
|
||||
ex_x = int(ex_x / coord_scale)
|
||||
ex_y = int(ex_y / coord_scale)
|
||||
ex_w = int(ex_w / coord_scale)
|
||||
ex_h = int(ex_h / coord_scale)
|
||||
h, w = drawing.shape
|
||||
ex_x = max(0, min(w, ex_x))
|
||||
ex_y = max(0, min(h, ex_y))
|
||||
ex_x2 = max(0, min(w, ex_x + ex_w))
|
||||
ex_y2 = max(0, min(h, ex_y + ex_h))
|
||||
drawing[ex_y:ex_y2, ex_x:ex_x2] = 0
|
||||
logger.info("Masked legend region (%d,%d,%d,%d)", ex_x, ex_y, ex_w, ex_h)
|
||||
|
||||
if min(template.shape) < 8 or int((template > 0).sum()) < 5:
|
||||
logger.warning("Template too small / empty after preprocessing")
|
||||
return {"count": 0, "matches": [], "threshold_used": threshold}
|
||||
|
||||
all_boxes: list[tuple] = []
|
||||
all_scores: list[float] = []
|
||||
|
||||
# Build the variants we'll search with: rotations × (original, mirrored)
|
||||
variants = [(template, False)]
|
||||
if mirror:
|
||||
variants.append((cv2.flip(template, 1), True))
|
||||
|
||||
def _rotate(img, angle_deg):
|
||||
if angle_deg == 0:
|
||||
return img
|
||||
if angle_deg in (90, 180, 270):
|
||||
return np.rot90(img, k=angle_deg // 90)
|
||||
# Arbitrary angle — rotate with bbox expansion + white fill
|
||||
h, w = img.shape[:2]
|
||||
center = (w / 2, h / 2)
|
||||
M = cv2.getRotationMatrix2D(center, angle_deg, 1.0)
|
||||
cos = abs(M[0, 0]); sin = abs(M[0, 1])
|
||||
new_w = int(h * sin + w * cos)
|
||||
new_h = int(h * cos + w * sin)
|
||||
M[0, 2] += new_w / 2 - center[0]
|
||||
M[1, 2] += new_h / 2 - center[1]
|
||||
return cv2.warpAffine(img, M, (new_w, new_h),
|
||||
flags=cv2.INTER_NEAREST, borderValue=0)
|
||||
|
||||
# Build the full job list (one entry per rot×scale×mirror)
|
||||
jobs_list = []
|
||||
for variant, _ in variants:
|
||||
for rot in rotations:
|
||||
base = _rotate(variant, rot)
|
||||
for scale in scales:
|
||||
new_w = max(8, int(base.shape[1] * scale))
|
||||
new_h = max(8, int(base.shape[0] * scale))
|
||||
if new_h > drawing.shape[0] or new_w > drawing.shape[1]:
|
||||
continue
|
||||
tmpl = cv2.resize(base, (new_w, new_h),
|
||||
interpolation=cv2.INTER_AREA)
|
||||
jobs_list.append((tmpl, new_w, new_h))
|
||||
|
||||
def _run(args):
|
||||
tmpl, new_w, new_h = args
|
||||
sq = cv2.matchTemplate(drawing, tmpl, cv2.TM_SQDIFF_NORMED)
|
||||
cc = cv2.matchTemplate(drawing, tmpl, cv2.TM_CCOEFF_NORMED)
|
||||
sim = np.maximum(1.0 - sq, cc)
|
||||
ys, xs = np.where(sim >= threshold)
|
||||
out = []
|
||||
for y, x in zip(ys, xs):
|
||||
out.append((float(x), float(y), float(x + new_w),
|
||||
float(y + new_h), float(sim[y, x])))
|
||||
return out
|
||||
|
||||
# Threading: cv2.matchTemplate releases the GIL, so threads give real speedup
|
||||
import concurrent.futures
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as ex:
|
||||
for result in ex.map(_run, jobs_list):
|
||||
for x0, y0, x1, y1, score in result:
|
||||
all_boxes.append((x0, y0, x1, y1))
|
||||
all_scores.append(score)
|
||||
|
||||
if not all_boxes:
|
||||
return {"count": 0, "matches": [], "threshold_used": threshold}
|
||||
|
||||
keep = _nms(all_boxes, all_scores, overlap_thresh=0.3)
|
||||
matches = []
|
||||
for i in keep:
|
||||
x0, y0, x1, y1 = all_boxes[i]
|
||||
matches.append({
|
||||
"x": int(x0 * coord_scale),
|
||||
"y": int(y0 * coord_scale),
|
||||
"w": int((x1 - x0) * coord_scale),
|
||||
"h": int((y1 - y0) * coord_scale),
|
||||
"score": round(all_scores[i], 3),
|
||||
})
|
||||
return {"count": len(matches), "matches": matches, "threshold_used": threshold}
|
||||
Reference in New Issue
Block a user