Initial portal commit: landing + 9 AI-powered apps
Apps: - dwg-rooms: extract room numbers from DWG/DXF - dwg-counting: count symbols in PDF drawings (OpenCV template matching) - contract-check: review PDF contracts against a checklist (Claude vision + Tesseract OCR fallback) - email-drafter: bullet notes → polished Czech/English business emails - invoice-extractor: PDF/image invoice → structured data → Excel - translator: Czech-first translator across 19 languages with tone control - vv-check: find inconsistent unit prices across VV sheets in one workbook - vv-compare: diff original vs new VV files (changes / added / removed) - feature-request: portal users submit ideas + sample files Infrastructure: - LiteLLM gateway with per-app virtual keys + budgets - Langfuse observability - Geist font, shared theme, cross-subdomain back link + theme sync via cookie/URL - Caddy reverse proxy on *.klas.chat
This commit is contained in:
312
vv-check/vv_logic.py
Normal file
312
vv-check/vv_logic.py
Normal file
@@ -0,0 +1,312 @@
|
||||
"""Detect VV sheets in an Excel workbook and find items with inconsistent
|
||||
unit prices across them.
|
||||
|
||||
A "VV" sheet is identified by either:
|
||||
- Its name contains "VV" (case-insensitive), OR
|
||||
- It has a typical VV header row with columns matching Poř./Kód/Popis/MJ/Výměra/cena.
|
||||
|
||||
Items are matched by their description text (normalised: trimmed, multiple
|
||||
spaces collapsed). The unit-price comparison is exact (rounded to 2 decimals
|
||||
to absorb floating-point noise).
|
||||
"""
|
||||
import logging
|
||||
import re
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
from typing import Iterable
|
||||
|
||||
import openpyxl
|
||||
from openpyxl.utils import get_column_letter
|
||||
from openpyxl.styles import Alignment, Border, Font, PatternFill, Side
|
||||
from openpyxl.workbook import Workbook
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Heuristic header keywords (Czech). We look for the row with at least three of these.
|
||||
HEADER_HINTS = {
|
||||
"popis": ["popis", "název", "název položky", "naziv"],
|
||||
"mj": ["mj", "j.j.", "jednotka", "měrná jednotka"],
|
||||
"vymera": ["výměra", "vymera", "množství", "mnozstvi", "počet", "pocet"],
|
||||
# Unit-price column. Common Czech spellings include "Jedn. cena",
|
||||
# "J. cena", "Jednotková cena", "cena/jed", "kč/mj", ...
|
||||
"cena_jed": ["jednotková cena", "jednotkova cena",
|
||||
"jedn. cena", "jedn.cena", "jedn cena",
|
||||
"j. cena", "j.cena", "j cena",
|
||||
"jed. cena", "jed.cena", "jed cena",
|
||||
"cena/jed", "cena za jednotku", "cena j.",
|
||||
"cena jed", "cena za mj", "kč/mj"],
|
||||
"cena_tot": ["cena celkem", "cena", "celkem"],
|
||||
}
|
||||
|
||||
# Allowed VV-name patterns (case-insensitive substring match).
|
||||
VV_NAME_PATTERNS = ["vv", "výkaz", "vykaz"]
|
||||
|
||||
|
||||
def normalise(text) -> str:
|
||||
if text is None:
|
||||
return ""
|
||||
return re.sub(r"\s+", " ", str(text).strip()).lower()
|
||||
|
||||
|
||||
def is_vv_sheet(ws) -> tuple[bool, dict | None]:
|
||||
"""Return (is_vv, header_columns) where header_columns maps role → col index (1-based)."""
|
||||
name_match = any(p in ws.title.lower() for p in VV_NAME_PATTERNS)
|
||||
# Scan first 12 rows for a header row
|
||||
header_row = None
|
||||
header_cols: dict[str, int] = {}
|
||||
for row_idx in range(1, min(13, ws.max_row + 1)):
|
||||
row_values = [(c, normalise(ws.cell(row=row_idx, column=c).value))
|
||||
for c in range(1, min(15, ws.max_column + 1))]
|
||||
matched_roles = {}
|
||||
for col_idx, val in row_values:
|
||||
for role, hints in HEADER_HINTS.items():
|
||||
if role in matched_roles:
|
||||
continue
|
||||
if any(val == h or val.startswith(h) for h in hints):
|
||||
matched_roles[role] = col_idx
|
||||
break
|
||||
if len(matched_roles) >= 3 and "popis" in matched_roles \
|
||||
and ("cena_jed" in matched_roles or "cena_tot" in matched_roles):
|
||||
header_row = row_idx
|
||||
header_cols = matched_roles
|
||||
header_cols["_header_row"] = row_idx
|
||||
break
|
||||
return ((name_match or header_row is not None), header_cols if header_row else None)
|
||||
|
||||
|
||||
def extract_items(ws, header_cols: dict) -> list[dict]:
|
||||
"""Yield item dicts from a VV sheet given its header columns.
|
||||
|
||||
Returns items even when unit_price is missing (None) so the UI can
|
||||
report "this sheet is a VV but has no prices" instead of silently
|
||||
dropping everything.
|
||||
"""
|
||||
header_row = header_cols.get("_header_row", 1)
|
||||
popis_col = header_cols.get("popis")
|
||||
mj_col = header_cols.get("mj")
|
||||
vymera_col = header_cols.get("vymera")
|
||||
cena_jed_col = header_cols.get("cena_jed")
|
||||
if not popis_col:
|
||||
return []
|
||||
|
||||
items: list[dict] = []
|
||||
for r in range(header_row + 1, ws.max_row + 1):
|
||||
popis = ws.cell(row=r, column=popis_col).value
|
||||
if popis is None or not str(popis).strip():
|
||||
continue
|
||||
popis_text = str(popis).strip()
|
||||
# Skip section rows like "001: Rozvaděče" — empty MJ + colon in popis
|
||||
mj_val = ws.cell(row=r, column=mj_col).value if mj_col else None
|
||||
if not mj_val and ":" in popis_text and len(popis_text) < 60:
|
||||
continue
|
||||
up: float | None = None
|
||||
if cena_jed_col:
|
||||
raw_price = ws.cell(row=r, column=cena_jed_col).value
|
||||
up = _to_float(raw_price)
|
||||
if up is not None:
|
||||
up = round(up, 2)
|
||||
items.append({
|
||||
"row": r,
|
||||
"description": popis_text,
|
||||
"description_norm": normalise(popis_text),
|
||||
"mj": str(mj_val).strip() if mj_val else "",
|
||||
"quantity": _to_float(ws.cell(row=r, column=vymera_col).value) if vymera_col else None,
|
||||
"unit_price": up,
|
||||
})
|
||||
return items
|
||||
|
||||
|
||||
def _to_float(v):
|
||||
if v is None or v == "":
|
||||
return None
|
||||
try:
|
||||
return float(v)
|
||||
except (ValueError, TypeError):
|
||||
return None
|
||||
|
||||
|
||||
def analyse(xlsx_path: Path) -> dict:
|
||||
"""Run the full price-check analysis. Returns a structured report."""
|
||||
wb = openpyxl.load_workbook(xlsx_path, data_only=True)
|
||||
sheets_info = []
|
||||
vv_items: dict[str, list[dict]] = {}
|
||||
|
||||
for ws in wb.worksheets:
|
||||
is_vv, header_cols = is_vv_sheet(ws)
|
||||
info = {
|
||||
"name": ws.title,
|
||||
"is_vv": bool(is_vv),
|
||||
"items": 0, # total rows recognised as items
|
||||
"priced_items": 0, # items with a unit price filled in
|
||||
"has_unit_price_col": False,
|
||||
}
|
||||
if is_vv and header_cols:
|
||||
info["has_unit_price_col"] = bool(header_cols.get("cena_jed"))
|
||||
items = extract_items(ws, header_cols)
|
||||
info["items"] = len(items)
|
||||
info["priced_items"] = sum(1 for it in items if it["unit_price"] is not None)
|
||||
vv_items[ws.title] = items
|
||||
sheets_info.append(info)
|
||||
|
||||
# Only items WITH a unit price participate in the price-consistency check
|
||||
grouped: dict[str, list[tuple[str, dict]]] = defaultdict(list)
|
||||
for sheet_name, items in vv_items.items():
|
||||
for it in items:
|
||||
if it["unit_price"] is None:
|
||||
continue
|
||||
grouped[it["description_norm"]].append((sheet_name, it))
|
||||
|
||||
# Inconsistencies: same description appearing in 2+ sheets with different price
|
||||
inconsistencies = []
|
||||
for desc_norm, entries in grouped.items():
|
||||
if len(entries) < 2:
|
||||
continue
|
||||
sheets_present = {s for s, _ in entries}
|
||||
if len(sheets_present) < 2:
|
||||
continue # appears multiple times in same sheet — not a cross-sheet issue
|
||||
prices = {round(it["unit_price"], 2) for _, it in entries}
|
||||
if len(prices) < 2:
|
||||
continue
|
||||
# Use the longest seen description as canonical (more readable)
|
||||
canonical = max((it["description"] for _, it in entries), key=len)
|
||||
rows = []
|
||||
for sheet_name, it in entries:
|
||||
rows.append({
|
||||
"sheet": sheet_name,
|
||||
"row": it["row"],
|
||||
"mj": it["mj"],
|
||||
"unit_price": it["unit_price"],
|
||||
})
|
||||
inconsistencies.append({
|
||||
"description": canonical,
|
||||
"occurrences": len(entries),
|
||||
"distinct_prices": sorted(prices),
|
||||
"rows": rows,
|
||||
})
|
||||
|
||||
# Sort by description for stable output
|
||||
inconsistencies.sort(key=lambda x: x["description"].lower())
|
||||
|
||||
vv_sheets_with_prices = sum(
|
||||
1 for s in sheets_info if s["is_vv"] and s["priced_items"] > 0
|
||||
)
|
||||
return {
|
||||
"sheets": sheets_info,
|
||||
"vv_sheet_count": sum(1 for s in sheets_info if s["is_vv"]),
|
||||
"vv_sheets_with_prices": vv_sheets_with_prices,
|
||||
"total_inconsistencies": len(inconsistencies),
|
||||
"inconsistencies": inconsistencies,
|
||||
}
|
||||
|
||||
|
||||
# ── Excel report writer ─────────────────────────────────────────────
|
||||
|
||||
BLUE = "1F4E78"
|
||||
WHITE = "FFFFFF"
|
||||
GRAY = "F2F2F2"
|
||||
RED_BG = "FCE4E4"
|
||||
THIN = Side(style="thin", color="BFBFBF")
|
||||
BORDER = Border(left=THIN, right=THIN, top=THIN, bottom=THIN)
|
||||
|
||||
|
||||
def write_report(result: dict, source_filename: str, out_path: Path) -> Path:
|
||||
wb = Workbook()
|
||||
ws = wb.active
|
||||
ws.title = "Nesoulady"
|
||||
|
||||
# Title row
|
||||
ws.cell(row=1, column=1, value="Kontrola jednotkových cen ve výkazech výměr").font = \
|
||||
Font(name="Arial", bold=True, size=14, color=BLUE)
|
||||
ws.merge_cells("A1:F1")
|
||||
ws.cell(row=2, column=1, value=f"Zdroj: {source_filename}").font = \
|
||||
Font(name="Arial", italic=True, size=10, color="595959")
|
||||
ws.merge_cells("A2:F2")
|
||||
|
||||
# Header
|
||||
headers = ["Název položky", "List VV", "Řádek", "MJ", "Jednotková cena", "Poznámka"]
|
||||
for c, h in enumerate(headers, 1):
|
||||
cell = ws.cell(row=4, column=c, value=h)
|
||||
cell.font = Font(name="Arial", bold=True, size=11, color=WHITE)
|
||||
cell.fill = PatternFill("solid", fgColor=BLUE)
|
||||
cell.alignment = Alignment(horizontal="center", vertical="center")
|
||||
cell.border = BORDER
|
||||
|
||||
row = 5
|
||||
if not result["inconsistencies"]:
|
||||
ws.cell(row=row, column=1,
|
||||
value="Žádné nesoulady — všechny položky se stejným názvem mají shodné jednotkové ceny.").font = \
|
||||
Font(name="Arial", size=11, color="006100", italic=True)
|
||||
ws.merge_cells(start_row=row, start_column=1, end_row=row, end_column=6)
|
||||
else:
|
||||
for inc in result["inconsistencies"]:
|
||||
min_price = min(r["unit_price"] for r in inc["rows"])
|
||||
max_price = max(r["unit_price"] for r in inc["rows"])
|
||||
for r_info in inc["rows"]:
|
||||
note_parts = []
|
||||
if r_info["unit_price"] == min_price:
|
||||
note_parts.append("nejnižší")
|
||||
if r_info["unit_price"] == max_price:
|
||||
note_parts.append("nejvyšší")
|
||||
note = ", ".join(note_parts)
|
||||
values = [
|
||||
inc["description"],
|
||||
r_info["sheet"],
|
||||
r_info["row"],
|
||||
r_info["mj"],
|
||||
r_info["unit_price"],
|
||||
note,
|
||||
]
|
||||
for c, v in enumerate(values, 1):
|
||||
cell = ws.cell(row=row, column=c, value=v)
|
||||
cell.font = Font(name="Arial", size=10)
|
||||
cell.border = BORDER
|
||||
cell.alignment = Alignment(vertical="top",
|
||||
wrap_text=(c == 1))
|
||||
if c == 5:
|
||||
cell.number_format = '#,##0.00 "Kč";[Red]-#,##0.00 "Kč";"-"'
|
||||
cell.alignment = Alignment(horizontal="right", vertical="top")
|
||||
# Highlight rows with the highest price as a visual cue
|
||||
if r_info["unit_price"] == max_price and min_price != max_price:
|
||||
for c in range(1, 7):
|
||||
ws.cell(row=row, column=c).fill = PatternFill("solid", fgColor=RED_BG)
|
||||
row += 1
|
||||
row += 0
|
||||
|
||||
# Column widths
|
||||
widths = {1: 56, 2: 22, 3: 8, 4: 8, 5: 16, 6: 16}
|
||||
for c, w in widths.items():
|
||||
ws.column_dimensions[get_column_letter(c)].width = w
|
||||
ws.freeze_panes = "A5"
|
||||
ws.auto_filter.ref = f"A4:F{max(5, row - 1)}"
|
||||
|
||||
# ── Second sheet: per-sheet breakdown ─────────────────────
|
||||
s2 = wb.create_sheet("Detekované listy")
|
||||
s2.cell(row=1, column=1, value="Přehled listů v sešitu").font = \
|
||||
Font(name="Arial", bold=True, size=14, color=BLUE)
|
||||
s2.merge_cells("A1:D1")
|
||||
s2_headers = ["Název listu", "Je VV?", "Počet položek s cenou", "Poznámka"]
|
||||
for c, h in enumerate(s2_headers, 1):
|
||||
cell = s2.cell(row=3, column=c, value=h)
|
||||
cell.font = Font(name="Arial", bold=True, size=11, color=WHITE)
|
||||
cell.fill = PatternFill("solid", fgColor=BLUE)
|
||||
cell.border = BORDER
|
||||
r = 4
|
||||
for s in result["sheets"]:
|
||||
priced = s.get("priced_items", 0)
|
||||
s2.cell(row=r, column=1, value=s["name"]).border = BORDER
|
||||
s2.cell(row=r, column=2, value=("Ano" if s["is_vv"] else "Ne")).border = BORDER
|
||||
s2.cell(row=r, column=3, value=priced).border = BORDER
|
||||
note = ""
|
||||
if s["is_vv"] and priced == 0:
|
||||
note = "list VV bez jednotkových cen — nelze kontrolovat"
|
||||
elif not s["is_vv"]:
|
||||
note = "neidentifikován jako VV"
|
||||
s2.cell(row=r, column=4, value=note).border = BORDER
|
||||
for c in range(1, 5):
|
||||
s2.cell(row=r, column=c).font = Font(name="Arial", size=10)
|
||||
r += 1
|
||||
for c, w in {1: 30, 2: 10, 3: 22, 4: 40}.items():
|
||||
s2.column_dimensions[get_column_letter(c)].width = w
|
||||
|
||||
wb.save(str(out_path))
|
||||
return out_path
|
||||
Reference in New Issue
Block a user