#!/usr/bin/env python3 """Parse Excel schedule file and output JSON to stdout.""" import json import sys from datetime import date import openpyxl filepath = sys.argv[1] wb = openpyxl.load_workbook(filepath, data_only=True) ws = wb.active # Also load with styles for comments wb2 = openpyxl.load_workbook(filepath) ws2 = wb2.active # --- Build dayIndex from row 9 (months) and row 11 (days) --- month_starts = {} for col in range(7, 300): val = ws.cell(row=9, column=col).value if val is not None and hasattr(val, 'month'): month_starts[col] = (val.year, val.month) if not month_starts: print(json.dumps({"error": "No month data found in row 9"}), file=sys.stderr) sys.exit(1) day_index = [] for col in range(7, 300): day_val = ws.cell(row=11, column=col).value if day_val is None: continue day_num = int(day_val) current_month = None for mcol in sorted(month_starts.keys(), reverse=True): if col >= mcol: current_month = month_starts[mcol] break if current_month is None: continue year, month = current_month idx = col - 7 try: d = date(year, month, day_num) is_weekend = d.weekday() >= 5 week = d.isocalendar()[1] except ValueError: is_weekend = False week = 0 day_index.append({ "idx": idx, "day": day_num, "month": month, "year": year, "week": week, "weekend": is_weekend, }) # --- Extract station data --- valid_idx = set(d["idx"] for d in day_index) stations = [] for row in range(13, 40): code = ws.cell(row=row, column=1).value name = ws.cell(row=row, column=2).value or "" server = ws.cell(row=row, column=3).value or "" if not code: continue data = {} for col in range(7, 300): idx = col - 7 if idx not in valid_idx: continue val = ws.cell(row=row, column=col).value if val is None: continue if isinstance(val, str) and val.strip() == "": continue entry = {} if isinstance(val, (int, float)): entry["v"] = int(val) if val == int(val) else val else: v = str(val).strip() # Normalize: lowercase z -> uppercase Z if v == 'z': v = 'Z' entry["v"] = v data[str(idx)] = entry stations.append({ "code": str(code).strip(), "name": str(name).strip(), "server": str(server).strip(), "duration": None, "data": data, }) # --- Extract DEN row comments --- day_comments = [] for col in range(7, 300): cell = ws2.cell(row=11, column=col) if cell.comment: idx = col - 7 if idx not in valid_idx: continue text = cell.comment.text # Extract actual comment from threaded format (Czech or English) if "Komentář:\n" in text: note = text.split("Komentář:\n")[-1].strip() elif "Comment:\n" in text: note = text.split("Comment:\n")[-1].strip() else: note = text.strip() if note: day_comments.append({"dayIdx": idx, "text": note}) # --- Extract cell comments from data rows --- cell_comments = [] for row in range(13, 40): code = ws2.cell(row=row, column=1).value if not code: continue code = str(code).strip() for col in range(7, 300): cell = ws2.cell(row=row, column=col) if cell.comment: idx = col - 7 if idx not in valid_idx: continue text = cell.comment.text if "Komentář:\n" in text: note = text.split("Komentář:\n")[-1].strip() elif "Comment:\n" in text: note = text.split("Comment:\n")[-1].strip() else: note = text.strip() if note: cell_comments.append({"stationCode": code, "dayIdx": idx, "text": note}) # Deduplicate dayIndex — keep only first occurrence of each date seen_dates = set() deduped_day_index = [] for d in day_index: date_key = (d["year"], d["month"], d["day"]) if date_key not in seen_dates: seen_dates.add(date_key) deduped_day_index.append(d) day_index = deduped_day_index result = { "dayIndex": day_index, "stations": stations, "obstacles": [], "dayComments": day_comments, "cellComments": cell_comments, } print(json.dumps(result, ensure_ascii=False))