Initial portal commit: landing + 9 AI-powered apps

Apps: - dwg-rooms: extract room numbers from DWG/DXF - dwg-counting: count symbols in PDF drawings (OpenCV template matching) - contract-check: review PDF contracts against a checklist (Claude vision + Tesseract OCR fallback) - email-drafter: bullet notes → polished Czech/English business emails - invoice-extractor: PDF/image invoice → structured data → Excel - translator: Czech-first translator across 19 languages with tone control - vv-check: find inconsistent unit prices across VV sheets in one workbook - vv-compare: diff original vs new VV files (changes / added / removed) - feature-request: portal users submit ideas + sample files Infrastructure: - LiteLLM gateway with per-app virtual keys + budgets - Langfuse observability - Geist font, shared theme, cross-subdomain back link + theme sync via cookie/URL - Caddy reverse proxy on *.klas.chat
2026-05-13 15:25:04 +02:00
commit 48cef99257
139 changed files with 20171 additions and 0 deletions
--- a/vv-check/vv_logic.py
+++ b/vv-check/vv_logic.py
@@ -0,0 +1,312 @@
+"""Detect VV sheets in an Excel workbook and find items with inconsistent
+unit prices across them.
+
+A "VV" sheet is identified by either:
+  - Its name contains "VV" (case-insensitive), OR
+  - It has a typical VV header row with columns matching Poř./Kód/Popis/MJ/Výměra/cena.
+
+Items are matched by their description text (normalised: trimmed, multiple
+spaces collapsed). The unit-price comparison is exact (rounded to 2 decimals
+to absorb floating-point noise).
+"""
+import logging
+import re
+from collections import defaultdict
+from pathlib import Path
+from typing import Iterable
+
+import openpyxl
+from openpyxl.utils import get_column_letter
+from openpyxl.styles import Alignment, Border, Font, PatternFill, Side
+from openpyxl.workbook import Workbook
+
+logger = logging.getLogger(__name__)
+
+# Heuristic header keywords (Czech). We look for the row with at least three of these.
+HEADER_HINTS = {
+    "popis": ["popis", "název", "název položky", "naziv"],
+    "mj":    ["mj", "j.j.", "jednotka", "měrná jednotka"],
+    "vymera": ["výměra", "vymera", "množství", "mnozstvi", "počet", "pocet"],
+    # Unit-price column. Common Czech spellings include "Jedn. cena",
+    # "J. cena", "Jednotková cena", "cena/jed", "kč/mj", ...
+    "cena_jed": ["jednotková cena", "jednotkova cena",
+                 "jedn. cena", "jedn.cena", "jedn cena",
+                 "j. cena", "j.cena", "j cena",
+                 "jed. cena", "jed.cena", "jed cena",
+                 "cena/jed", "cena za jednotku", "cena j.",
+                 "cena jed", "cena za mj", "kč/mj"],
+    "cena_tot": ["cena celkem", "cena", "celkem"],
+}
+
+# Allowed VV-name patterns (case-insensitive substring match).
+VV_NAME_PATTERNS = ["vv", "výkaz", "vykaz"]
+
+
+def normalise(text) -> str:
+    if text is None:
+        return ""
+    return re.sub(r"\s+", " ", str(text).strip()).lower()
+
+
+def is_vv_sheet(ws) -> tuple[bool, dict | None]:
+    """Return (is_vv, header_columns) where header_columns maps role → col index (1-based)."""
+    name_match = any(p in ws.title.lower() for p in VV_NAME_PATTERNS)
+    # Scan first 12 rows for a header row
+    header_row = None
+    header_cols: dict[str, int] = {}
+    for row_idx in range(1, min(13, ws.max_row + 1)):
+        row_values = [(c, normalise(ws.cell(row=row_idx, column=c).value))
+                      for c in range(1, min(15, ws.max_column + 1))]
+        matched_roles = {}
+        for col_idx, val in row_values:
+            for role, hints in HEADER_HINTS.items():
+                if role in matched_roles:
+                    continue
+                if any(val == h or val.startswith(h) for h in hints):
+                    matched_roles[role] = col_idx
+                    break
+        if len(matched_roles) >= 3 and "popis" in matched_roles \
+                and ("cena_jed" in matched_roles or "cena_tot" in matched_roles):
+            header_row = row_idx
+            header_cols = matched_roles
+            header_cols["_header_row"] = row_idx
+            break
+    return ((name_match or header_row is not None), header_cols if header_row else None)
+
+
+def extract_items(ws, header_cols: dict) -> list[dict]:
+    """Yield item dicts from a VV sheet given its header columns.
+
+    Returns items even when unit_price is missing (None) so the UI can
+    report "this sheet is a VV but has no prices" instead of silently
+    dropping everything.
+    """
+    header_row = header_cols.get("_header_row", 1)
+    popis_col = header_cols.get("popis")
+    mj_col = header_cols.get("mj")
+    vymera_col = header_cols.get("vymera")
+    cena_jed_col = header_cols.get("cena_jed")
+    if not popis_col:
+        return []
+
+    items: list[dict] = []
+    for r in range(header_row + 1, ws.max_row + 1):
+        popis = ws.cell(row=r, column=popis_col).value
+        if popis is None or not str(popis).strip():
+            continue
+        popis_text = str(popis).strip()
+        # Skip section rows like "001: Rozvaděče" — empty MJ + colon in popis
+        mj_val = ws.cell(row=r, column=mj_col).value if mj_col else None
+        if not mj_val and ":" in popis_text and len(popis_text) < 60:
+            continue
+        up: float | None = None
+        if cena_jed_col:
+            raw_price = ws.cell(row=r, column=cena_jed_col).value
+            up = _to_float(raw_price)
+            if up is not None:
+                up = round(up, 2)
+        items.append({
+            "row": r,
+            "description": popis_text,
+            "description_norm": normalise(popis_text),
+            "mj": str(mj_val).strip() if mj_val else "",
+            "quantity": _to_float(ws.cell(row=r, column=vymera_col).value) if vymera_col else None,
+            "unit_price": up,
+        })
+    return items
+
+
+def _to_float(v):
+    if v is None or v == "":
+        return None
+    try:
+        return float(v)
+    except (ValueError, TypeError):
+        return None
+
+
+def analyse(xlsx_path: Path) -> dict:
+    """Run the full price-check analysis. Returns a structured report."""
+    wb = openpyxl.load_workbook(xlsx_path, data_only=True)
+    sheets_info = []
+    vv_items: dict[str, list[dict]] = {}
+
+    for ws in wb.worksheets:
+        is_vv, header_cols = is_vv_sheet(ws)
+        info = {
+            "name": ws.title,
+            "is_vv": bool(is_vv),
+            "items": 0,            # total rows recognised as items
+            "priced_items": 0,     # items with a unit price filled in
+            "has_unit_price_col": False,
+        }
+        if is_vv and header_cols:
+            info["has_unit_price_col"] = bool(header_cols.get("cena_jed"))
+            items = extract_items(ws, header_cols)
+            info["items"] = len(items)
+            info["priced_items"] = sum(1 for it in items if it["unit_price"] is not None)
+            vv_items[ws.title] = items
+        sheets_info.append(info)
+
+    # Only items WITH a unit price participate in the price-consistency check
+    grouped: dict[str, list[tuple[str, dict]]] = defaultdict(list)
+    for sheet_name, items in vv_items.items():
+        for it in items:
+            if it["unit_price"] is None:
+                continue
+            grouped[it["description_norm"]].append((sheet_name, it))
+
+    # Inconsistencies: same description appearing in 2+ sheets with different price
+    inconsistencies = []
+    for desc_norm, entries in grouped.items():
+        if len(entries) < 2:
+            continue
+        sheets_present = {s for s, _ in entries}
+        if len(sheets_present) < 2:
+            continue  # appears multiple times in same sheet — not a cross-sheet issue
+        prices = {round(it["unit_price"], 2) for _, it in entries}
+        if len(prices) < 2:
+            continue
+        # Use the longest seen description as canonical (more readable)
+        canonical = max((it["description"] for _, it in entries), key=len)
+        rows = []
+        for sheet_name, it in entries:
+            rows.append({
+                "sheet": sheet_name,
+                "row": it["row"],
+                "mj": it["mj"],
+                "unit_price": it["unit_price"],
+            })
+        inconsistencies.append({
+            "description": canonical,
+            "occurrences": len(entries),
+            "distinct_prices": sorted(prices),
+            "rows": rows,
+        })
+
+    # Sort by description for stable output
+    inconsistencies.sort(key=lambda x: x["description"].lower())
+
+    vv_sheets_with_prices = sum(
+        1 for s in sheets_info if s["is_vv"] and s["priced_items"] > 0
+    )
+    return {
+        "sheets": sheets_info,
+        "vv_sheet_count": sum(1 for s in sheets_info if s["is_vv"]),
+        "vv_sheets_with_prices": vv_sheets_with_prices,
+        "total_inconsistencies": len(inconsistencies),
+        "inconsistencies": inconsistencies,
+    }
+
+
+# ── Excel report writer ─────────────────────────────────────────────
+
+BLUE = "1F4E78"
+WHITE = "FFFFFF"
+GRAY = "F2F2F2"
+RED_BG = "FCE4E4"
+THIN = Side(style="thin", color="BFBFBF")
+BORDER = Border(left=THIN, right=THIN, top=THIN, bottom=THIN)
+
+
+def write_report(result: dict, source_filename: str, out_path: Path) -> Path:
+    wb = Workbook()
+    ws = wb.active
+    ws.title = "Nesoulady"
+
+    # Title row
+    ws.cell(row=1, column=1, value="Kontrola jednotkových cen ve výkazech výměr").font = \
+        Font(name="Arial", bold=True, size=14, color=BLUE)
+    ws.merge_cells("A1:F1")
+    ws.cell(row=2, column=1, value=f"Zdroj: {source_filename}").font = \
+        Font(name="Arial", italic=True, size=10, color="595959")
+    ws.merge_cells("A2:F2")
+
+    # Header
+    headers = ["Název položky", "List VV", "Řádek", "MJ", "Jednotková cena", "Poznámka"]
+    for c, h in enumerate(headers, 1):
+        cell = ws.cell(row=4, column=c, value=h)
+        cell.font = Font(name="Arial", bold=True, size=11, color=WHITE)
+        cell.fill = PatternFill("solid", fgColor=BLUE)
+        cell.alignment = Alignment(horizontal="center", vertical="center")
+        cell.border = BORDER
+
+    row = 5
+    if not result["inconsistencies"]:
+        ws.cell(row=row, column=1,
+                value="Žádné nesoulady — všechny položky se stejným názvem mají shodné jednotkové ceny.").font = \
+            Font(name="Arial", size=11, color="006100", italic=True)
+        ws.merge_cells(start_row=row, start_column=1, end_row=row, end_column=6)
+    else:
+        for inc in result["inconsistencies"]:
+            min_price = min(r["unit_price"] for r in inc["rows"])
+            max_price = max(r["unit_price"] for r in inc["rows"])
+            for r_info in inc["rows"]:
+                note_parts = []
+                if r_info["unit_price"] == min_price:
+                    note_parts.append("nejnižší")
+                if r_info["unit_price"] == max_price:
+                    note_parts.append("nejvyšší")
+                note = ", ".join(note_parts)
+                values = [
+                    inc["description"],
+                    r_info["sheet"],
+                    r_info["row"],
+                    r_info["mj"],
+                    r_info["unit_price"],
+                    note,
+                ]
+                for c, v in enumerate(values, 1):
+                    cell = ws.cell(row=row, column=c, value=v)
+                    cell.font = Font(name="Arial", size=10)
+                    cell.border = BORDER
+                    cell.alignment = Alignment(vertical="top",
+                                               wrap_text=(c == 1))
+                    if c == 5:
+                        cell.number_format = '#,##0.00 "Kč";[Red]-#,##0.00 "Kč";"-"'
+                        cell.alignment = Alignment(horizontal="right", vertical="top")
+                # Highlight rows with the highest price as a visual cue
+                if r_info["unit_price"] == max_price and min_price != max_price:
+                    for c in range(1, 7):
+                        ws.cell(row=row, column=c).fill = PatternFill("solid", fgColor=RED_BG)
+                row += 1
+            row += 0
+
+    # Column widths
+    widths = {1: 56, 2: 22, 3: 8, 4: 8, 5: 16, 6: 16}
+    for c, w in widths.items():
+        ws.column_dimensions[get_column_letter(c)].width = w
+    ws.freeze_panes = "A5"
+    ws.auto_filter.ref = f"A4:F{max(5, row - 1)}"
+
+    # ── Second sheet: per-sheet breakdown ─────────────────────
+    s2 = wb.create_sheet("Detekované listy")
+    s2.cell(row=1, column=1, value="Přehled listů v sešitu").font = \
+        Font(name="Arial", bold=True, size=14, color=BLUE)
+    s2.merge_cells("A1:D1")
+    s2_headers = ["Název listu", "Je VV?", "Počet položek s cenou", "Poznámka"]
+    for c, h in enumerate(s2_headers, 1):
+        cell = s2.cell(row=3, column=c, value=h)
+        cell.font = Font(name="Arial", bold=True, size=11, color=WHITE)
+        cell.fill = PatternFill("solid", fgColor=BLUE)
+        cell.border = BORDER
+    r = 4
+    for s in result["sheets"]:
+        priced = s.get("priced_items", 0)
+        s2.cell(row=r, column=1, value=s["name"]).border = BORDER
+        s2.cell(row=r, column=2, value=("Ano" if s["is_vv"] else "Ne")).border = BORDER
+        s2.cell(row=r, column=3, value=priced).border = BORDER
+        note = ""
+        if s["is_vv"] and priced == 0:
+            note = "list VV bez jednotkových cen — nelze kontrolovat"
+        elif not s["is_vv"]:
+            note = "neidentifikován jako VV"
+        s2.cell(row=r, column=4, value=note).border = BORDER
+        for c in range(1, 5):
+            s2.cell(row=r, column=c).font = Font(name="Arial", size=10)
+        r += 1
+    for c, w in {1: 30, 2: 10, 3: 22, 4: 40}.items():
+        s2.column_dimensions[get_column_letter(c)].width = w
+
+    wb.save(str(out_path))
+    return out_path