"""Compare original vs new VV (Výkaz Výměr) Excel files. Produces a 4-sheet report: - Souhrn — overview table with per-sheet counts + grand totals - Změny — items present in both but with different quantity / MJ - Přidané — items in new but not in original - Odebrané — items in original but not in new Following Jirka's spec exactly: section rows are skipped, change = qty/MJ diff only (NOT price), heavy use of colour-coding. """ import logging import re from collections import defaultdict from pathlib import Path import openpyxl from openpyxl.styles import Alignment, Border, Font, PatternFill, Side from openpyxl.utils import get_column_letter from openpyxl.workbook import Workbook logger = logging.getLogger(__name__) HEADER_HINTS = { "por": ["poř.", "por.", "pořadí", "č.", "č"], "kod": ["kód", "kod"], "popis": ["popis", "název", "název položky"], "mj": ["mj", "j.j.", "jednotka"], "vymera": ["výměra", "vymera", "množství", "mnozstvi"], "cena_jed": ["jednotková cena", "jednotkova cena", "j. cena", "j.cena", "jed. cena", "cena/jed", "cena j.", "cena za jednotku"], "cena_tot": ["cena celkem", "celkem", "cena"], } # Colors per spec BLUE = "1F4E78" WHITE = "FFFFFF" GRAY = "F2F2F2" YELLOW = "FFF2CC" # changes GREEN_BG = "D9EAD3" # added RED_BG = "F4CCCC" # removed GREEN_DIFF = "E4F0DC" # positive qty diff RED_DIFF = "FCE4E4" # negative qty diff DARK_GREEN = "006100" DARK_RED = "C00000" GRAY_TEXT = "595959" BORDER_GRAY = "BFBFBF" THIN = Side(style="thin", color=BORDER_GRAY) BORDER = Border(left=THIN, right=THIN, top=THIN, bottom=THIN) def normalise(text) -> str: if text is None: return "" return re.sub(r"\s+", " ", str(text).strip()) def _to_float(v): if v is None or v == "": return None try: return float(v) except (ValueError, TypeError): return None def find_header(ws) -> dict | None: """Return header column mapping or None if no VV-like header found.""" for row_idx in range(1, min(13, ws.max_row + 1)): matched = {} for col_idx in range(1, min(15, ws.max_column + 1)): val = normalise(ws.cell(row=row_idx, column=col_idx).value).lower() for role, hints in HEADER_HINTS.items(): if role in matched: continue if any(val == h or val.startswith(h) for h in hints): matched[role] = col_idx break if "popis" in matched and "vymera" in matched and "mj" in matched: matched["_header_row"] = row_idx return matched return None def extract_items(ws, header: dict) -> tuple[list[dict], str | None]: """Return (items, hala_name).""" header_row = header["_header_row"] # Hall/object name: row 4 col C per spec, but be flexible hala = None for r in range(1, header_row): for c in range(1, min(8, ws.max_column + 1)): v = ws.cell(row=r, column=c).value if v and isinstance(v, str) and 5 < len(v) < 200 \ and "vykaz" not in v.lower() and "výkaz" not in v.lower(): hala = v.strip() break if hala: break items = [] current_section = None popis_col = header["popis"] mj_col = header["mj"] vymera_col = header["vymera"] cena_jed_col = header.get("cena_jed") cena_tot_col = header.get("cena_tot") kod_col = header.get("kod") for r in range(header_row + 1, ws.max_row + 1): popis = ws.cell(row=r, column=popis_col).value if popis is None or not str(popis).strip(): continue popis_text = str(popis).strip() mj_val = ws.cell(row=r, column=mj_col).value # Detect section row: empty MJ + description contains ":" if (not mj_val or not str(mj_val).strip()) and ":" in popis_text \ and len(popis_text) < 100: current_section = popis_text continue vymera = _to_float(ws.cell(row=r, column=vymera_col).value) # Skip rows without quantity (probably subtotals) if vymera is None: continue items.append({ "row": r, "section": current_section, "kod": ws.cell(row=r, column=kod_col).value if kod_col else None, "description": popis_text, "description_norm": normalise(popis_text), "mj": str(mj_val).strip() if mj_val else "", "vymera": vymera, "cena_jed": _to_float(ws.cell(row=r, column=cena_jed_col).value) if cena_jed_col else None, "cena_tot": _to_float(ws.cell(row=r, column=cena_tot_col).value) if cena_tot_col else None, }) return items, hala def analyse_workbook(path: Path) -> dict: """Return {sheet_name: {items, hala, header_found}}.""" wb = openpyxl.load_workbook(path, data_only=True) out = {} for ws in wb.worksheets: header = find_header(ws) if not header: continue items, hala = extract_items(ws, header) if not items: continue out[ws.title] = {"items": items, "hala": hala, "header": header} wb.close() return out def compare(orig_path: Path, new_path: Path) -> dict: orig_sheets = analyse_workbook(orig_path) new_sheets = analyse_workbook(new_path) # Match sheets by exact name first; fall back to position-based match for unmatched. matched_pairs: list[tuple[str | None, str | None]] = [] orig_used: set[str] = set() new_used: set[str] = set() for name in orig_sheets: if name in new_sheets: matched_pairs.append((name, name)) orig_used.add(name) new_used.add(name) remaining_orig = [s for s in orig_sheets if s not in orig_used] remaining_new = [s for s in new_sheets if s not in new_used] # Position match by order of declaration for o, n in zip(remaining_orig, remaining_new): matched_pairs.append((o, n)) # Orphans (only orig) for o in remaining_orig[len(remaining_new):]: matched_pairs.append((o, None)) for n in remaining_new[len(remaining_orig):]: matched_pairs.append((None, n)) per_sheet = [] all_changes = [] all_added = [] all_removed = [] for orig_name, new_name in matched_pairs: orig_items = orig_sheets.get(orig_name, {}).get("items", []) if orig_name else [] new_items = new_sheets.get(new_name, {}).get("items", []) if new_name else [] hala = (new_sheets.get(new_name, {}).get("hala") if new_name else None) \ or (orig_sheets.get(orig_name, {}).get("hala") if orig_name else None) \ or "" sheet_label = new_name or orig_name or "" # Pair items by (section, description_norm) orig_index = defaultdict(list) for it in orig_items: orig_index[(it["section"] or "", it["description_norm"])].append(it) new_index = defaultdict(list) for it in new_items: new_index[(it["section"] or "", it["description_norm"])].append(it) changes = [] added = [] removed = [] # Items present in both for key, new_list in new_index.items(): orig_list = orig_index.get(key, []) if not orig_list: for it in new_list: added.append({ "sheet": sheet_label, "hala": hala, "section": key[0], "description": it["description"], "mj": it["mj"], "vymera": it["vymera"], "cena_jed": it["cena_jed"], "cena_tot": it["cena_tot"], }) continue # Compare in pairs (one-to-one by index) for idx, new_it in enumerate(new_list): if idx >= len(orig_list): # Duplicate added entry added.append({ "sheet": sheet_label, "hala": hala, "section": key[0], "description": new_it["description"], "mj": new_it["mj"], "vymera": new_it["vymera"], "cena_jed": new_it["cena_jed"], "cena_tot": new_it["cena_tot"], }) continue orig_it = orig_list[idx] qty_diff = (new_it["vymera"] or 0) - (orig_it["vymera"] or 0) mj_diff = orig_it["mj"] != new_it["mj"] if abs(qty_diff) > 1e-9 or mj_diff: changes.append({ "sheet": sheet_label, "hala": hala, "section": key[0], "description": new_it["description"], "mj_orig": orig_it["mj"], "mj_new": new_it["mj"], "vymera_orig": orig_it["vymera"], "vymera_new": new_it["vymera"], "vymera_diff": qty_diff, "cena_jed_orig": orig_it["cena_jed"], "cena_orig": orig_it["cena_tot"], "cena_new": new_it["cena_tot"], }) # Items only in original for key, orig_list in orig_index.items(): if key in new_index: # Handle leftover originals (orig had more duplicates than new) used = min(len(new_index[key]), len(orig_list)) for it in orig_list[used:]: removed.append({ "sheet": sheet_label, "hala": hala, "section": key[0], "description": it["description"], "mj": it["mj"], "vymera": it["vymera"], "cena_jed": it["cena_jed"], "cena_tot": it["cena_tot"], }) continue for it in orig_list: removed.append({ "sheet": sheet_label, "hala": hala, "section": key[0], "description": it["description"], "mj": it["mj"], "vymera": it["vymera"], "cena_jed": it["cena_jed"], "cena_tot": it["cena_tot"], }) per_sheet.append({ "sheet": sheet_label, "hala": hala, "orig_count": len(orig_items), "new_count": len(new_items), "changes": len(changes), "added": len(added), "removed": len(removed), }) all_changes.extend(changes) all_added.extend(added) all_removed.extend(removed) return { "per_sheet": per_sheet, "changes": all_changes, "added": all_added, "removed": all_removed, } # ── Excel report writer (4 sheets per spec) ─────────────────── NUM_QTY = '#,##0.##;[Red]-#,##0.##;"-"' NUM_CZK = '#,##0.00 "Kč";[Red]-#,##0.00 "Kč";"-"' def _title(ws, text: str, subtitle: str = ""): cell = ws.cell(row=1, column=1, value=text) cell.font = Font(name="Arial", bold=True, size=14, color=BLUE) ws.merge_cells(start_row=1, start_column=1, end_row=1, end_column=12) if subtitle: sub = ws.cell(row=2, column=1, value=subtitle) sub.font = Font(name="Arial", italic=True, size=10, color=GRAY_TEXT) ws.merge_cells(start_row=2, start_column=1, end_row=2, end_column=12) def _hdr_cell(cell): cell.font = Font(name="Arial", bold=True, size=11, color=WHITE) cell.fill = PatternFill("solid", fgColor=BLUE) cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=True) cell.border = BORDER def _body_cell(cell, num_format: str | None = None, fill: str | None = None, bold: bool = False, color: str | None = None, horizontal: str | None = None): cell.font = Font(name="Arial", size=10, bold=bold, color=color or "000000") cell.border = BORDER if fill: cell.fill = PatternFill("solid", fgColor=fill) if num_format: cell.number_format = num_format cell.alignment = Alignment(horizontal=horizontal or "left", vertical="top", wrap_text=True) def write_compare_report(result: dict, orig_filename: str, new_filename: str, out_path: Path) -> Path: wb = Workbook() # ── 1. Souhrn ───────────────────────────────────────── ws = wb.active ws.title = "Souhrn" _title(ws, "Porovnání výkazu výměr", f"Původní: {orig_filename} · Nový: {new_filename}") headers = ["List", "Hala / objekt", "Počet pol. (původní)", "Počet pol. (nový)", "Změněné položky", "Přidané položky", "Odebrané položky"] for c, h in enumerate(headers, 1): _hdr_cell(ws.cell(row=4, column=c, value=h)) r = 5 for ps in result["per_sheet"]: ws.cell(row=r, column=1, value=ps["sheet"]) ws.cell(row=r, column=2, value=ps["hala"]) ws.cell(row=r, column=3, value=ps["orig_count"]) ws.cell(row=r, column=4, value=ps["new_count"]) ws.cell(row=r, column=5, value=ps["changes"]) ws.cell(row=r, column=6, value=ps["added"]) ws.cell(row=r, column=7, value=ps["removed"]) for c in range(1, 8): _body_cell(ws.cell(row=r, column=c)) # Colored highlights when > 0 if ps["changes"]: _body_cell(ws.cell(row=r, column=5), fill=YELLOW, bold=True, horizontal="right") if ps["added"]: _body_cell(ws.cell(row=r, column=6), fill=GREEN_BG, bold=True, color=DARK_GREEN, horizontal="right") if ps["removed"]: _body_cell(ws.cell(row=r, column=7), fill=RED_BG, bold=True, color=DARK_RED, horizontal="right") r += 1 # CELKEM row if result["per_sheet"]: first_data_row = 5 last_data_row = r - 1 ws.cell(row=r, column=1, value="CELKEM") for c in range(3, 8): col_letter = get_column_letter(c) ws.cell(row=r, column=c, value=f"=SUM({col_letter}{first_data_row}:{col_letter}{last_data_row})") for c in range(1, 8): _body_cell(ws.cell(row=r, column=c), fill=GRAY, bold=True, horizontal=("right" if c >= 3 else "left")) r += 1 # Recap box r += 1 ws.cell(row=r, column=1, value="Rekapitulace změn (celkem)").font = \ Font(name="Arial", bold=True, size=12, color=BLUE) r += 1 for label, cnt, fill, color in [ ("Změněné", len(result["changes"]), YELLOW, None), ("Přidané", len(result["added"]), GREEN_BG, DARK_GREEN), ("Odebrané", len(result["removed"]), RED_BG, DARK_RED), ]: _body_cell(ws.cell(row=r, column=1, value=label), bold=True) _body_cell(ws.cell(row=r, column=2, value=cnt), fill=fill, bold=True, color=color, horizontal="right") r += 1 r += 1 note = ws.cell(row=r, column=1, value=("Za „změnu\" je považován pouze rozdíl ve výměře nebo MJ. " "Cenové rozdíly se ignorují, protože nový VV obvykle ceny " "neobsahuje.")) note.font = Font(name="Arial", italic=True, size=9, color=GRAY_TEXT) note.alignment = Alignment(wrap_text=True) ws.merge_cells(start_row=r, start_column=1, end_row=r, end_column=7) for c, w in {1: 22, 2: 30, 3: 18, 4: 18, 5: 18, 6: 18, 7: 18}.items(): ws.column_dimensions[get_column_letter(c)].width = w ws.row_dimensions[4].height = 32 # ── 2. Změny ───────────────────────────────────────── ws = wb.create_sheet("Změny") _title(ws, "Změněné položky (rozdíl ve výměře nebo MJ)") cols = ["List", "Hala", "Sekce", "Popis položky", "MJ orig.", "MJ nová", "Výměra orig.", "Výměra nová", "Rozdíl výměra", "Jed. cena orig.", "Cena orig.", "Cena nová"] for c, h in enumerate(cols, 1): _hdr_cell(ws.cell(row=4, column=c, value=h)) r = 5 for ch in result["changes"]: ws.cell(row=r, column=1, value=ch["sheet"]) ws.cell(row=r, column=2, value=ch["hala"]) ws.cell(row=r, column=3, value=ch["section"]) ws.cell(row=r, column=4, value=ch["description"]) ws.cell(row=r, column=5, value=ch["mj_orig"]) ws.cell(row=r, column=6, value=ch["mj_new"]) ws.cell(row=r, column=7, value=ch["vymera_orig"]) ws.cell(row=r, column=8, value=ch["vymera_new"]) ws.cell(row=r, column=9, value=ch["vymera_diff"]) ws.cell(row=r, column=10, value=ch["cena_jed_orig"]) ws.cell(row=r, column=11, value=ch["cena_orig"]) ws.cell(row=r, column=12, value=ch["cena_new"]) for c in range(1, 13): _body_cell(ws.cell(row=r, column=c)) # Number formats for c in (7, 8, 9): ws.cell(row=r, column=c).number_format = NUM_QTY ws.cell(row=r, column=c).alignment = Alignment(horizontal="right", vertical="top") for c in (10, 11, 12): ws.cell(row=r, column=c).number_format = NUM_CZK ws.cell(row=r, column=c).alignment = Alignment(horizontal="right", vertical="top") # Highlight diff cell diff_fill = GREEN_DIFF if ch["vymera_diff"] > 0 else RED_DIFF _body_cell(ws.cell(row=r, column=9), fill=diff_fill, bold=True, num_format=NUM_QTY, horizontal="right") r += 1 widths = {1: 14, 2: 22, 3: 22, 4: 50, 5: 10, 6: 10, 7: 14, 8: 14, 9: 14, 10: 14, 11: 14, 12: 14} for c, w in widths.items(): ws.column_dimensions[get_column_letter(c)].width = w ws.freeze_panes = "A5" ws.auto_filter.ref = f"A4:L{max(5, r - 1)}" ws.row_dimensions[4].height = 32 # ── 3. Přidané ───────────────────────────────────────── ws = wb.create_sheet("Přidané") _title(ws, "Přidané položky (jsou v novém VV, nebyly v původním)") add_cols = ["List", "Hala", "Sekce", "Popis položky", "MJ", "Výměra", "Jed. cena", "Cena"] for c, h in enumerate(add_cols, 1): _hdr_cell(ws.cell(row=4, column=c, value=h)) r = 5 for it in result["added"]: vals = [it["sheet"], it["hala"], it["section"], it["description"], it["mj"], it["vymera"], it["cena_jed"], it["cena_tot"]] for c, v in enumerate(vals, 1): _body_cell(ws.cell(row=r, column=c, value=v), fill=GREEN_BG, color=DARK_GREEN) ws.cell(row=r, column=6).number_format = NUM_QTY ws.cell(row=r, column=6).alignment = Alignment(horizontal="right", vertical="top") ws.cell(row=r, column=7).number_format = NUM_CZK ws.cell(row=r, column=7).alignment = Alignment(horizontal="right", vertical="top") ws.cell(row=r, column=8).number_format = NUM_CZK ws.cell(row=r, column=8).alignment = Alignment(horizontal="right", vertical="top") r += 1 for c, w in {1: 14, 2: 22, 3: 22, 4: 50, 5: 10, 6: 14, 7: 14, 8: 14}.items(): ws.column_dimensions[get_column_letter(c)].width = w ws.freeze_panes = "A5" ws.auto_filter.ref = f"A4:H{max(5, r - 1)}" ws.row_dimensions[4].height = 32 # ── 4. Odebrané ───────────────────────────────────────── ws = wb.create_sheet("Odebrané") _title(ws, "Odebrané položky (byly v původním VV, v novém nejsou)") for c, h in enumerate(add_cols, 1): _hdr_cell(ws.cell(row=4, column=c, value=h)) r = 5 first_data_row = r for it in result["removed"]: vals = [it["sheet"], it["hala"], it["section"], it["description"], it["mj"], it["vymera"], it["cena_jed"], it["cena_tot"]] for c, v in enumerate(vals, 1): _body_cell(ws.cell(row=r, column=c, value=v), fill=RED_BG, color=DARK_RED) ws.cell(row=r, column=6).number_format = NUM_QTY ws.cell(row=r, column=6).alignment = Alignment(horizontal="right", vertical="top") ws.cell(row=r, column=7).number_format = NUM_CZK ws.cell(row=r, column=7).alignment = Alignment(horizontal="right", vertical="top") ws.cell(row=r, column=8).number_format = NUM_CZK ws.cell(row=r, column=8).alignment = Alignment(horizontal="right", vertical="top") r += 1 last_data_row = r - 1 if last_data_row >= first_data_row: # Sum cena_tot column ws.cell(row=r, column=1, value="Součet") ws.cell(row=r, column=8, value=f"=SUM(H{first_data_row}:H{last_data_row})") for c in range(1, 9): _body_cell(ws.cell(row=r, column=c), fill=GRAY, bold=True, horizontal=("right" if c == 8 else "left")) ws.cell(row=r, column=8).number_format = NUM_CZK for c, w in {1: 14, 2: 22, 3: 22, 4: 50, 5: 10, 6: 14, 7: 14, 8: 14}.items(): ws.column_dimensions[get_column_letter(c)].width = w ws.freeze_panes = "A5" ws.row_dimensions[4].height = 32 wb.save(str(out_path)) return out_path