Files
AI_portal/vv-check/vv_logic.py
Ondřej Glaser 48cef99257 Initial portal commit: landing + 9 AI-powered apps
Apps:
- dwg-rooms: extract room numbers from DWG/DXF
- dwg-counting: count symbols in PDF drawings (OpenCV template matching)
- contract-check: review PDF contracts against a checklist (Claude vision + Tesseract OCR fallback)
- email-drafter: bullet notes → polished Czech/English business emails
- invoice-extractor: PDF/image invoice → structured data → Excel
- translator: Czech-first translator across 19 languages with tone control
- vv-check: find inconsistent unit prices across VV sheets in one workbook
- vv-compare: diff original vs new VV files (changes / added / removed)
- feature-request: portal users submit ideas + sample files

Infrastructure:
- LiteLLM gateway with per-app virtual keys + budgets
- Langfuse observability
- Geist font, shared theme, cross-subdomain back link + theme sync via cookie/URL
- Caddy reverse proxy on *.klas.chat
2026-05-13 15:25:04 +02:00

313 lines
12 KiB
Python

"""Detect VV sheets in an Excel workbook and find items with inconsistent
unit prices across them.
A "VV" sheet is identified by either:
- Its name contains "VV" (case-insensitive), OR
- It has a typical VV header row with columns matching Poř./Kód/Popis/MJ/Výměra/cena.
Items are matched by their description text (normalised: trimmed, multiple
spaces collapsed). The unit-price comparison is exact (rounded to 2 decimals
to absorb floating-point noise).
"""
import logging
import re
from collections import defaultdict
from pathlib import Path
from typing import Iterable
import openpyxl
from openpyxl.utils import get_column_letter
from openpyxl.styles import Alignment, Border, Font, PatternFill, Side
from openpyxl.workbook import Workbook
logger = logging.getLogger(__name__)
# Heuristic header keywords (Czech). We look for the row with at least three of these.
HEADER_HINTS = {
"popis": ["popis", "název", "název položky", "naziv"],
"mj": ["mj", "j.j.", "jednotka", "měrná jednotka"],
"vymera": ["výměra", "vymera", "množství", "mnozstvi", "počet", "pocet"],
# Unit-price column. Common Czech spellings include "Jedn. cena",
# "J. cena", "Jednotková cena", "cena/jed", "kč/mj", ...
"cena_jed": ["jednotková cena", "jednotkova cena",
"jedn. cena", "jedn.cena", "jedn cena",
"j. cena", "j.cena", "j cena",
"jed. cena", "jed.cena", "jed cena",
"cena/jed", "cena za jednotku", "cena j.",
"cena jed", "cena za mj", "kč/mj"],
"cena_tot": ["cena celkem", "cena", "celkem"],
}
# Allowed VV-name patterns (case-insensitive substring match).
VV_NAME_PATTERNS = ["vv", "výkaz", "vykaz"]
def normalise(text) -> str:
if text is None:
return ""
return re.sub(r"\s+", " ", str(text).strip()).lower()
def is_vv_sheet(ws) -> tuple[bool, dict | None]:
"""Return (is_vv, header_columns) where header_columns maps role → col index (1-based)."""
name_match = any(p in ws.title.lower() for p in VV_NAME_PATTERNS)
# Scan first 12 rows for a header row
header_row = None
header_cols: dict[str, int] = {}
for row_idx in range(1, min(13, ws.max_row + 1)):
row_values = [(c, normalise(ws.cell(row=row_idx, column=c).value))
for c in range(1, min(15, ws.max_column + 1))]
matched_roles = {}
for col_idx, val in row_values:
for role, hints in HEADER_HINTS.items():
if role in matched_roles:
continue
if any(val == h or val.startswith(h) for h in hints):
matched_roles[role] = col_idx
break
if len(matched_roles) >= 3 and "popis" in matched_roles \
and ("cena_jed" in matched_roles or "cena_tot" in matched_roles):
header_row = row_idx
header_cols = matched_roles
header_cols["_header_row"] = row_idx
break
return ((name_match or header_row is not None), header_cols if header_row else None)
def extract_items(ws, header_cols: dict) -> list[dict]:
"""Yield item dicts from a VV sheet given its header columns.
Returns items even when unit_price is missing (None) so the UI can
report "this sheet is a VV but has no prices" instead of silently
dropping everything.
"""
header_row = header_cols.get("_header_row", 1)
popis_col = header_cols.get("popis")
mj_col = header_cols.get("mj")
vymera_col = header_cols.get("vymera")
cena_jed_col = header_cols.get("cena_jed")
if not popis_col:
return []
items: list[dict] = []
for r in range(header_row + 1, ws.max_row + 1):
popis = ws.cell(row=r, column=popis_col).value
if popis is None or not str(popis).strip():
continue
popis_text = str(popis).strip()
# Skip section rows like "001: Rozvaděče" — empty MJ + colon in popis
mj_val = ws.cell(row=r, column=mj_col).value if mj_col else None
if not mj_val and ":" in popis_text and len(popis_text) < 60:
continue
up: float | None = None
if cena_jed_col:
raw_price = ws.cell(row=r, column=cena_jed_col).value
up = _to_float(raw_price)
if up is not None:
up = round(up, 2)
items.append({
"row": r,
"description": popis_text,
"description_norm": normalise(popis_text),
"mj": str(mj_val).strip() if mj_val else "",
"quantity": _to_float(ws.cell(row=r, column=vymera_col).value) if vymera_col else None,
"unit_price": up,
})
return items
def _to_float(v):
if v is None or v == "":
return None
try:
return float(v)
except (ValueError, TypeError):
return None
def analyse(xlsx_path: Path) -> dict:
"""Run the full price-check analysis. Returns a structured report."""
wb = openpyxl.load_workbook(xlsx_path, data_only=True)
sheets_info = []
vv_items: dict[str, list[dict]] = {}
for ws in wb.worksheets:
is_vv, header_cols = is_vv_sheet(ws)
info = {
"name": ws.title,
"is_vv": bool(is_vv),
"items": 0, # total rows recognised as items
"priced_items": 0, # items with a unit price filled in
"has_unit_price_col": False,
}
if is_vv and header_cols:
info["has_unit_price_col"] = bool(header_cols.get("cena_jed"))
items = extract_items(ws, header_cols)
info["items"] = len(items)
info["priced_items"] = sum(1 for it in items if it["unit_price"] is not None)
vv_items[ws.title] = items
sheets_info.append(info)
# Only items WITH a unit price participate in the price-consistency check
grouped: dict[str, list[tuple[str, dict]]] = defaultdict(list)
for sheet_name, items in vv_items.items():
for it in items:
if it["unit_price"] is None:
continue
grouped[it["description_norm"]].append((sheet_name, it))
# Inconsistencies: same description appearing in 2+ sheets with different price
inconsistencies = []
for desc_norm, entries in grouped.items():
if len(entries) < 2:
continue
sheets_present = {s for s, _ in entries}
if len(sheets_present) < 2:
continue # appears multiple times in same sheet — not a cross-sheet issue
prices = {round(it["unit_price"], 2) for _, it in entries}
if len(prices) < 2:
continue
# Use the longest seen description as canonical (more readable)
canonical = max((it["description"] for _, it in entries), key=len)
rows = []
for sheet_name, it in entries:
rows.append({
"sheet": sheet_name,
"row": it["row"],
"mj": it["mj"],
"unit_price": it["unit_price"],
})
inconsistencies.append({
"description": canonical,
"occurrences": len(entries),
"distinct_prices": sorted(prices),
"rows": rows,
})
# Sort by description for stable output
inconsistencies.sort(key=lambda x: x["description"].lower())
vv_sheets_with_prices = sum(
1 for s in sheets_info if s["is_vv"] and s["priced_items"] > 0
)
return {
"sheets": sheets_info,
"vv_sheet_count": sum(1 for s in sheets_info if s["is_vv"]),
"vv_sheets_with_prices": vv_sheets_with_prices,
"total_inconsistencies": len(inconsistencies),
"inconsistencies": inconsistencies,
}
# ── Excel report writer ─────────────────────────────────────────────
BLUE = "1F4E78"
WHITE = "FFFFFF"
GRAY = "F2F2F2"
RED_BG = "FCE4E4"
THIN = Side(style="thin", color="BFBFBF")
BORDER = Border(left=THIN, right=THIN, top=THIN, bottom=THIN)
def write_report(result: dict, source_filename: str, out_path: Path) -> Path:
wb = Workbook()
ws = wb.active
ws.title = "Nesoulady"
# Title row
ws.cell(row=1, column=1, value="Kontrola jednotkových cen ve výkazech výměr").font = \
Font(name="Arial", bold=True, size=14, color=BLUE)
ws.merge_cells("A1:F1")
ws.cell(row=2, column=1, value=f"Zdroj: {source_filename}").font = \
Font(name="Arial", italic=True, size=10, color="595959")
ws.merge_cells("A2:F2")
# Header
headers = ["Název položky", "List VV", "Řádek", "MJ", "Jednotková cena", "Poznámka"]
for c, h in enumerate(headers, 1):
cell = ws.cell(row=4, column=c, value=h)
cell.font = Font(name="Arial", bold=True, size=11, color=WHITE)
cell.fill = PatternFill("solid", fgColor=BLUE)
cell.alignment = Alignment(horizontal="center", vertical="center")
cell.border = BORDER
row = 5
if not result["inconsistencies"]:
ws.cell(row=row, column=1,
value="Žádné nesoulady — všechny položky se stejným názvem mají shodné jednotkové ceny.").font = \
Font(name="Arial", size=11, color="006100", italic=True)
ws.merge_cells(start_row=row, start_column=1, end_row=row, end_column=6)
else:
for inc in result["inconsistencies"]:
min_price = min(r["unit_price"] for r in inc["rows"])
max_price = max(r["unit_price"] for r in inc["rows"])
for r_info in inc["rows"]:
note_parts = []
if r_info["unit_price"] == min_price:
note_parts.append("nejnižší")
if r_info["unit_price"] == max_price:
note_parts.append("nejvyšší")
note = ", ".join(note_parts)
values = [
inc["description"],
r_info["sheet"],
r_info["row"],
r_info["mj"],
r_info["unit_price"],
note,
]
for c, v in enumerate(values, 1):
cell = ws.cell(row=row, column=c, value=v)
cell.font = Font(name="Arial", size=10)
cell.border = BORDER
cell.alignment = Alignment(vertical="top",
wrap_text=(c == 1))
if c == 5:
cell.number_format = '#,##0.00 "";[Red]-#,##0.00 "";"-"'
cell.alignment = Alignment(horizontal="right", vertical="top")
# Highlight rows with the highest price as a visual cue
if r_info["unit_price"] == max_price and min_price != max_price:
for c in range(1, 7):
ws.cell(row=row, column=c).fill = PatternFill("solid", fgColor=RED_BG)
row += 1
row += 0
# Column widths
widths = {1: 56, 2: 22, 3: 8, 4: 8, 5: 16, 6: 16}
for c, w in widths.items():
ws.column_dimensions[get_column_letter(c)].width = w
ws.freeze_panes = "A5"
ws.auto_filter.ref = f"A4:F{max(5, row - 1)}"
# ── Second sheet: per-sheet breakdown ─────────────────────
s2 = wb.create_sheet("Detekované listy")
s2.cell(row=1, column=1, value="Přehled listů v sešitu").font = \
Font(name="Arial", bold=True, size=14, color=BLUE)
s2.merge_cells("A1:D1")
s2_headers = ["Název listu", "Je VV?", "Počet položek s cenou", "Poznámka"]
for c, h in enumerate(s2_headers, 1):
cell = s2.cell(row=3, column=c, value=h)
cell.font = Font(name="Arial", bold=True, size=11, color=WHITE)
cell.fill = PatternFill("solid", fgColor=BLUE)
cell.border = BORDER
r = 4
for s in result["sheets"]:
priced = s.get("priced_items", 0)
s2.cell(row=r, column=1, value=s["name"]).border = BORDER
s2.cell(row=r, column=2, value=("Ano" if s["is_vv"] else "Ne")).border = BORDER
s2.cell(row=r, column=3, value=priced).border = BORDER
note = ""
if s["is_vv"] and priced == 0:
note = "list VV bez jednotkových cen — nelze kontrolovat"
elif not s["is_vv"]:
note = "neidentifikován jako VV"
s2.cell(row=r, column=4, value=note).border = BORDER
for c in range(1, 5):
s2.cell(row=r, column=c).font = Font(name="Arial", size=10)
r += 1
for c, w in {1: 30, 2: 10, 3: 22, 4: 40}.items():
s2.column_dimensions[get_column_letter(c)].width = w
wb.save(str(out_path))
return out_path