Files
AI_portal/vv-compare/vv_compare.py
Ondřej Glaser 48cef99257 Initial portal commit: landing + 9 AI-powered apps
Apps:
- dwg-rooms: extract room numbers from DWG/DXF
- dwg-counting: count symbols in PDF drawings (OpenCV template matching)
- contract-check: review PDF contracts against a checklist (Claude vision + Tesseract OCR fallback)
- email-drafter: bullet notes → polished Czech/English business emails
- invoice-extractor: PDF/image invoice → structured data → Excel
- translator: Czech-first translator across 19 languages with tone control
- vv-check: find inconsistent unit prices across VV sheets in one workbook
- vv-compare: diff original vs new VV files (changes / added / removed)
- feature-request: portal users submit ideas + sample files

Infrastructure:
- LiteLLM gateway with per-app virtual keys + budgets
- Langfuse observability
- Geist font, shared theme, cross-subdomain back link + theme sync via cookie/URL
- Caddy reverse proxy on *.klas.chat
2026-05-13 15:25:04 +02:00

508 lines
21 KiB
Python

"""Compare original vs new VV (Výkaz Výměr) Excel files.
Produces a 4-sheet report:
- Souhrn — overview table with per-sheet counts + grand totals
- Změny — items present in both but with different quantity / MJ
- Přidané — items in new but not in original
- Odebrané — items in original but not in new
Following Jirka's spec exactly: section rows are skipped, change = qty/MJ
diff only (NOT price), heavy use of colour-coding.
"""
import logging
import re
from collections import defaultdict
from pathlib import Path
import openpyxl
from openpyxl.styles import Alignment, Border, Font, PatternFill, Side
from openpyxl.utils import get_column_letter
from openpyxl.workbook import Workbook
logger = logging.getLogger(__name__)
HEADER_HINTS = {
"por": ["poř.", "por.", "pořadí", "č.", "č"],
"kod": ["kód", "kod"],
"popis": ["popis", "název", "název položky"],
"mj": ["mj", "j.j.", "jednotka"],
"vymera": ["výměra", "vymera", "množství", "mnozstvi"],
"cena_jed": ["jednotková cena", "jednotkova cena", "j. cena", "j.cena",
"jed. cena", "cena/jed", "cena j.", "cena za jednotku"],
"cena_tot": ["cena celkem", "celkem", "cena"],
}
# Colors per spec
BLUE = "1F4E78"
WHITE = "FFFFFF"
GRAY = "F2F2F2"
YELLOW = "FFF2CC" # changes
GREEN_BG = "D9EAD3" # added
RED_BG = "F4CCCC" # removed
GREEN_DIFF = "E4F0DC" # positive qty diff
RED_DIFF = "FCE4E4" # negative qty diff
DARK_GREEN = "006100"
DARK_RED = "C00000"
GRAY_TEXT = "595959"
BORDER_GRAY = "BFBFBF"
THIN = Side(style="thin", color=BORDER_GRAY)
BORDER = Border(left=THIN, right=THIN, top=THIN, bottom=THIN)
def normalise(text) -> str:
if text is None:
return ""
return re.sub(r"\s+", " ", str(text).strip())
def _to_float(v):
if v is None or v == "":
return None
try:
return float(v)
except (ValueError, TypeError):
return None
def find_header(ws) -> dict | None:
"""Return header column mapping or None if no VV-like header found."""
for row_idx in range(1, min(13, ws.max_row + 1)):
matched = {}
for col_idx in range(1, min(15, ws.max_column + 1)):
val = normalise(ws.cell(row=row_idx, column=col_idx).value).lower()
for role, hints in HEADER_HINTS.items():
if role in matched:
continue
if any(val == h or val.startswith(h) for h in hints):
matched[role] = col_idx
break
if "popis" in matched and "vymera" in matched and "mj" in matched:
matched["_header_row"] = row_idx
return matched
return None
def extract_items(ws, header: dict) -> tuple[list[dict], str | None]:
"""Return (items, hala_name)."""
header_row = header["_header_row"]
# Hall/object name: row 4 col C per spec, but be flexible
hala = None
for r in range(1, header_row):
for c in range(1, min(8, ws.max_column + 1)):
v = ws.cell(row=r, column=c).value
if v and isinstance(v, str) and 5 < len(v) < 200 \
and "vykaz" not in v.lower() and "výkaz" not in v.lower():
hala = v.strip()
break
if hala:
break
items = []
current_section = None
popis_col = header["popis"]
mj_col = header["mj"]
vymera_col = header["vymera"]
cena_jed_col = header.get("cena_jed")
cena_tot_col = header.get("cena_tot")
kod_col = header.get("kod")
for r in range(header_row + 1, ws.max_row + 1):
popis = ws.cell(row=r, column=popis_col).value
if popis is None or not str(popis).strip():
continue
popis_text = str(popis).strip()
mj_val = ws.cell(row=r, column=mj_col).value
# Detect section row: empty MJ + description contains ":"
if (not mj_val or not str(mj_val).strip()) and ":" in popis_text \
and len(popis_text) < 100:
current_section = popis_text
continue
vymera = _to_float(ws.cell(row=r, column=vymera_col).value)
# Skip rows without quantity (probably subtotals)
if vymera is None:
continue
items.append({
"row": r,
"section": current_section,
"kod": ws.cell(row=r, column=kod_col).value if kod_col else None,
"description": popis_text,
"description_norm": normalise(popis_text),
"mj": str(mj_val).strip() if mj_val else "",
"vymera": vymera,
"cena_jed": _to_float(ws.cell(row=r, column=cena_jed_col).value) if cena_jed_col else None,
"cena_tot": _to_float(ws.cell(row=r, column=cena_tot_col).value) if cena_tot_col else None,
})
return items, hala
def analyse_workbook(path: Path) -> dict:
"""Return {sheet_name: {items, hala, header_found}}."""
wb = openpyxl.load_workbook(path, data_only=True)
out = {}
for ws in wb.worksheets:
header = find_header(ws)
if not header:
continue
items, hala = extract_items(ws, header)
if not items:
continue
out[ws.title] = {"items": items, "hala": hala, "header": header}
wb.close()
return out
def compare(orig_path: Path, new_path: Path) -> dict:
orig_sheets = analyse_workbook(orig_path)
new_sheets = analyse_workbook(new_path)
# Match sheets by exact name first; fall back to position-based match for unmatched.
matched_pairs: list[tuple[str | None, str | None]] = []
orig_used: set[str] = set()
new_used: set[str] = set()
for name in orig_sheets:
if name in new_sheets:
matched_pairs.append((name, name))
orig_used.add(name)
new_used.add(name)
remaining_orig = [s for s in orig_sheets if s not in orig_used]
remaining_new = [s for s in new_sheets if s not in new_used]
# Position match by order of declaration
for o, n in zip(remaining_orig, remaining_new):
matched_pairs.append((o, n))
# Orphans (only orig)
for o in remaining_orig[len(remaining_new):]:
matched_pairs.append((o, None))
for n in remaining_new[len(remaining_orig):]:
matched_pairs.append((None, n))
per_sheet = []
all_changes = []
all_added = []
all_removed = []
for orig_name, new_name in matched_pairs:
orig_items = orig_sheets.get(orig_name, {}).get("items", []) if orig_name else []
new_items = new_sheets.get(new_name, {}).get("items", []) if new_name else []
hala = (new_sheets.get(new_name, {}).get("hala") if new_name else None) \
or (orig_sheets.get(orig_name, {}).get("hala") if orig_name else None) \
or ""
sheet_label = new_name or orig_name or ""
# Pair items by (section, description_norm)
orig_index = defaultdict(list)
for it in orig_items:
orig_index[(it["section"] or "", it["description_norm"])].append(it)
new_index = defaultdict(list)
for it in new_items:
new_index[(it["section"] or "", it["description_norm"])].append(it)
changes = []
added = []
removed = []
# Items present in both
for key, new_list in new_index.items():
orig_list = orig_index.get(key, [])
if not orig_list:
for it in new_list:
added.append({
"sheet": sheet_label, "hala": hala,
"section": key[0], "description": it["description"],
"mj": it["mj"], "vymera": it["vymera"],
"cena_jed": it["cena_jed"], "cena_tot": it["cena_tot"],
})
continue
# Compare in pairs (one-to-one by index)
for idx, new_it in enumerate(new_list):
if idx >= len(orig_list):
# Duplicate added entry
added.append({
"sheet": sheet_label, "hala": hala,
"section": key[0], "description": new_it["description"],
"mj": new_it["mj"], "vymera": new_it["vymera"],
"cena_jed": new_it["cena_jed"], "cena_tot": new_it["cena_tot"],
})
continue
orig_it = orig_list[idx]
qty_diff = (new_it["vymera"] or 0) - (orig_it["vymera"] or 0)
mj_diff = orig_it["mj"] != new_it["mj"]
if abs(qty_diff) > 1e-9 or mj_diff:
changes.append({
"sheet": sheet_label, "hala": hala,
"section": key[0], "description": new_it["description"],
"mj_orig": orig_it["mj"], "mj_new": new_it["mj"],
"vymera_orig": orig_it["vymera"], "vymera_new": new_it["vymera"],
"vymera_diff": qty_diff,
"cena_jed_orig": orig_it["cena_jed"],
"cena_orig": orig_it["cena_tot"],
"cena_new": new_it["cena_tot"],
})
# Items only in original
for key, orig_list in orig_index.items():
if key in new_index:
# Handle leftover originals (orig had more duplicates than new)
used = min(len(new_index[key]), len(orig_list))
for it in orig_list[used:]:
removed.append({
"sheet": sheet_label, "hala": hala,
"section": key[0], "description": it["description"],
"mj": it["mj"], "vymera": it["vymera"],
"cena_jed": it["cena_jed"], "cena_tot": it["cena_tot"],
})
continue
for it in orig_list:
removed.append({
"sheet": sheet_label, "hala": hala,
"section": key[0], "description": it["description"],
"mj": it["mj"], "vymera": it["vymera"],
"cena_jed": it["cena_jed"], "cena_tot": it["cena_tot"],
})
per_sheet.append({
"sheet": sheet_label,
"hala": hala,
"orig_count": len(orig_items),
"new_count": len(new_items),
"changes": len(changes),
"added": len(added),
"removed": len(removed),
})
all_changes.extend(changes)
all_added.extend(added)
all_removed.extend(removed)
return {
"per_sheet": per_sheet,
"changes": all_changes,
"added": all_added,
"removed": all_removed,
}
# ── Excel report writer (4 sheets per spec) ───────────────────
NUM_QTY = '#,##0.##;[Red]-#,##0.##;"-"'
NUM_CZK = '#,##0.00 "";[Red]-#,##0.00 "";"-"'
def _title(ws, text: str, subtitle: str = ""):
cell = ws.cell(row=1, column=1, value=text)
cell.font = Font(name="Arial", bold=True, size=14, color=BLUE)
ws.merge_cells(start_row=1, start_column=1, end_row=1, end_column=12)
if subtitle:
sub = ws.cell(row=2, column=1, value=subtitle)
sub.font = Font(name="Arial", italic=True, size=10, color=GRAY_TEXT)
ws.merge_cells(start_row=2, start_column=1, end_row=2, end_column=12)
def _hdr_cell(cell):
cell.font = Font(name="Arial", bold=True, size=11, color=WHITE)
cell.fill = PatternFill("solid", fgColor=BLUE)
cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=True)
cell.border = BORDER
def _body_cell(cell, num_format: str | None = None, fill: str | None = None,
bold: bool = False, color: str | None = None,
horizontal: str | None = None):
cell.font = Font(name="Arial", size=10, bold=bold,
color=color or "000000")
cell.border = BORDER
if fill:
cell.fill = PatternFill("solid", fgColor=fill)
if num_format:
cell.number_format = num_format
cell.alignment = Alignment(horizontal=horizontal or "left",
vertical="top",
wrap_text=True)
def write_compare_report(result: dict, orig_filename: str, new_filename: str,
out_path: Path) -> Path:
wb = Workbook()
# ── 1. Souhrn ─────────────────────────────────────────
ws = wb.active
ws.title = "Souhrn"
_title(ws, "Porovnání výkazu výměr",
f"Původní: {orig_filename} · Nový: {new_filename}")
headers = ["List", "Hala / objekt", "Počet pol. (původní)", "Počet pol. (nový)",
"Změněné položky", "Přidané položky", "Odebrané položky"]
for c, h in enumerate(headers, 1):
_hdr_cell(ws.cell(row=4, column=c, value=h))
r = 5
for ps in result["per_sheet"]:
ws.cell(row=r, column=1, value=ps["sheet"])
ws.cell(row=r, column=2, value=ps["hala"])
ws.cell(row=r, column=3, value=ps["orig_count"])
ws.cell(row=r, column=4, value=ps["new_count"])
ws.cell(row=r, column=5, value=ps["changes"])
ws.cell(row=r, column=6, value=ps["added"])
ws.cell(row=r, column=7, value=ps["removed"])
for c in range(1, 8):
_body_cell(ws.cell(row=r, column=c))
# Colored highlights when > 0
if ps["changes"]:
_body_cell(ws.cell(row=r, column=5), fill=YELLOW, bold=True,
horizontal="right")
if ps["added"]:
_body_cell(ws.cell(row=r, column=6), fill=GREEN_BG, bold=True,
color=DARK_GREEN, horizontal="right")
if ps["removed"]:
_body_cell(ws.cell(row=r, column=7), fill=RED_BG, bold=True,
color=DARK_RED, horizontal="right")
r += 1
# CELKEM row
if result["per_sheet"]:
first_data_row = 5
last_data_row = r - 1
ws.cell(row=r, column=1, value="CELKEM")
for c in range(3, 8):
col_letter = get_column_letter(c)
ws.cell(row=r, column=c,
value=f"=SUM({col_letter}{first_data_row}:{col_letter}{last_data_row})")
for c in range(1, 8):
_body_cell(ws.cell(row=r, column=c),
fill=GRAY, bold=True,
horizontal=("right" if c >= 3 else "left"))
r += 1
# Recap box
r += 1
ws.cell(row=r, column=1, value="Rekapitulace změn (celkem)").font = \
Font(name="Arial", bold=True, size=12, color=BLUE)
r += 1
for label, cnt, fill, color in [
("Změněné", len(result["changes"]), YELLOW, None),
("Přidané", len(result["added"]), GREEN_BG, DARK_GREEN),
("Odebrané", len(result["removed"]), RED_BG, DARK_RED),
]:
_body_cell(ws.cell(row=r, column=1, value=label), bold=True)
_body_cell(ws.cell(row=r, column=2, value=cnt),
fill=fill, bold=True, color=color, horizontal="right")
r += 1
r += 1
note = ws.cell(row=r, column=1,
value=("Za „změnu\" je považován pouze rozdíl ve výměře nebo MJ. "
"Cenové rozdíly se ignorují, protože nový VV obvykle ceny "
"neobsahuje."))
note.font = Font(name="Arial", italic=True, size=9, color=GRAY_TEXT)
note.alignment = Alignment(wrap_text=True)
ws.merge_cells(start_row=r, start_column=1, end_row=r, end_column=7)
for c, w in {1: 22, 2: 30, 3: 18, 4: 18, 5: 18, 6: 18, 7: 18}.items():
ws.column_dimensions[get_column_letter(c)].width = w
ws.row_dimensions[4].height = 32
# ── 2. Změny ─────────────────────────────────────────
ws = wb.create_sheet("Změny")
_title(ws, "Změněné položky (rozdíl ve výměře nebo MJ)")
cols = ["List", "Hala", "Sekce", "Popis položky",
"MJ orig.", "MJ nová", "Výměra orig.", "Výměra nová", "Rozdíl výměra",
"Jed. cena orig.", "Cena orig.", "Cena nová"]
for c, h in enumerate(cols, 1):
_hdr_cell(ws.cell(row=4, column=c, value=h))
r = 5
for ch in result["changes"]:
ws.cell(row=r, column=1, value=ch["sheet"])
ws.cell(row=r, column=2, value=ch["hala"])
ws.cell(row=r, column=3, value=ch["section"])
ws.cell(row=r, column=4, value=ch["description"])
ws.cell(row=r, column=5, value=ch["mj_orig"])
ws.cell(row=r, column=6, value=ch["mj_new"])
ws.cell(row=r, column=7, value=ch["vymera_orig"])
ws.cell(row=r, column=8, value=ch["vymera_new"])
ws.cell(row=r, column=9, value=ch["vymera_diff"])
ws.cell(row=r, column=10, value=ch["cena_jed_orig"])
ws.cell(row=r, column=11, value=ch["cena_orig"])
ws.cell(row=r, column=12, value=ch["cena_new"])
for c in range(1, 13):
_body_cell(ws.cell(row=r, column=c))
# Number formats
for c in (7, 8, 9):
ws.cell(row=r, column=c).number_format = NUM_QTY
ws.cell(row=r, column=c).alignment = Alignment(horizontal="right", vertical="top")
for c in (10, 11, 12):
ws.cell(row=r, column=c).number_format = NUM_CZK
ws.cell(row=r, column=c).alignment = Alignment(horizontal="right", vertical="top")
# Highlight diff cell
diff_fill = GREEN_DIFF if ch["vymera_diff"] > 0 else RED_DIFF
_body_cell(ws.cell(row=r, column=9), fill=diff_fill, bold=True,
num_format=NUM_QTY, horizontal="right")
r += 1
widths = {1: 14, 2: 22, 3: 22, 4: 50, 5: 10, 6: 10, 7: 14, 8: 14, 9: 14, 10: 14, 11: 14, 12: 14}
for c, w in widths.items():
ws.column_dimensions[get_column_letter(c)].width = w
ws.freeze_panes = "A5"
ws.auto_filter.ref = f"A4:L{max(5, r - 1)}"
ws.row_dimensions[4].height = 32
# ── 3. Přidané ─────────────────────────────────────────
ws = wb.create_sheet("Přidané")
_title(ws, "Přidané položky (jsou v novém VV, nebyly v původním)")
add_cols = ["List", "Hala", "Sekce", "Popis položky", "MJ", "Výměra",
"Jed. cena", "Cena"]
for c, h in enumerate(add_cols, 1):
_hdr_cell(ws.cell(row=4, column=c, value=h))
r = 5
for it in result["added"]:
vals = [it["sheet"], it["hala"], it["section"], it["description"],
it["mj"], it["vymera"], it["cena_jed"], it["cena_tot"]]
for c, v in enumerate(vals, 1):
_body_cell(ws.cell(row=r, column=c, value=v),
fill=GREEN_BG, color=DARK_GREEN)
ws.cell(row=r, column=6).number_format = NUM_QTY
ws.cell(row=r, column=6).alignment = Alignment(horizontal="right", vertical="top")
ws.cell(row=r, column=7).number_format = NUM_CZK
ws.cell(row=r, column=7).alignment = Alignment(horizontal="right", vertical="top")
ws.cell(row=r, column=8).number_format = NUM_CZK
ws.cell(row=r, column=8).alignment = Alignment(horizontal="right", vertical="top")
r += 1
for c, w in {1: 14, 2: 22, 3: 22, 4: 50, 5: 10, 6: 14, 7: 14, 8: 14}.items():
ws.column_dimensions[get_column_letter(c)].width = w
ws.freeze_panes = "A5"
ws.auto_filter.ref = f"A4:H{max(5, r - 1)}"
ws.row_dimensions[4].height = 32
# ── 4. Odebrané ─────────────────────────────────────────
ws = wb.create_sheet("Odebrané")
_title(ws, "Odebrané položky (byly v původním VV, v novém nejsou)")
for c, h in enumerate(add_cols, 1):
_hdr_cell(ws.cell(row=4, column=c, value=h))
r = 5
first_data_row = r
for it in result["removed"]:
vals = [it["sheet"], it["hala"], it["section"], it["description"],
it["mj"], it["vymera"], it["cena_jed"], it["cena_tot"]]
for c, v in enumerate(vals, 1):
_body_cell(ws.cell(row=r, column=c, value=v),
fill=RED_BG, color=DARK_RED)
ws.cell(row=r, column=6).number_format = NUM_QTY
ws.cell(row=r, column=6).alignment = Alignment(horizontal="right", vertical="top")
ws.cell(row=r, column=7).number_format = NUM_CZK
ws.cell(row=r, column=7).alignment = Alignment(horizontal="right", vertical="top")
ws.cell(row=r, column=8).number_format = NUM_CZK
ws.cell(row=r, column=8).alignment = Alignment(horizontal="right", vertical="top")
r += 1
last_data_row = r - 1
if last_data_row >= first_data_row:
# Sum cena_tot column
ws.cell(row=r, column=1, value="Součet")
ws.cell(row=r, column=8,
value=f"=SUM(H{first_data_row}:H{last_data_row})")
for c in range(1, 9):
_body_cell(ws.cell(row=r, column=c), fill=GRAY, bold=True,
horizontal=("right" if c == 8 else "left"))
ws.cell(row=r, column=8).number_format = NUM_CZK
for c, w in {1: 14, 2: 22, 3: 22, 4: 50, 5: 10, 6: 14, 7: 14, 8: 14}.items():
ws.column_dimensions[get_column_letter(c)].width = w
ws.freeze_panes = "A5"
ws.row_dimensions[4].height = 32
wb.save(str(out_path))
return out_path