import requests from bs4 import BeautifulSoup import pandas as pd import logging from typing import Dict, Any, List import time logger = logging.getLogger(__name__) class JourneybookFiller: def __init__(self, username: str, password: str, vehicle_registration: str = "4SH1148"): self.username = username self.password = password self.vehicle_registration = vehicle_registration self.base_url = "https://kj.colsys.cz/prehled_mesic.php" self.session = requests.Session() self.session.auth = (username, password) self.session.verify = False def fill_month(self, df: pd.DataFrame, month: str, dry_run: bool = True) -> Dict[str, Any]: """ Fill journeybook data for a given month. Args: df: DataFrame with calculated journey data month: Month in format YYYY-MM (e.g., "2025-01") dry_run: If True, only show what would be filled without actually submitting Returns: Dict with results including success/failure counts """ url = f"{self.base_url}?rz={self.vehicle_registration}&den={month}-01" logger.info(f"Fetching form for month {month}, vehicle {self.vehicle_registration}") response = self.session.get(url) response.raise_for_status() soup = BeautifulSoup(response.content, 'html.parser') table = soup.find('table', class_='table table-striped table-bordered table-condensed table-sm') if not table: raise ValueError("Journeybook table not found") # Extract forms with their parent rows to get dates import re table_rows = table.find('tbody').find_all('tr') logger.info(f"Found {len(table_rows)} table rows") logger.info(f"DataFrame has {len(df)} rows") updates = [] deletes = [] for i, row in enumerate(table_rows): form = row.find('form') if not form: continue # Skip rows without forms # Extract date from the row's first cell cells = row.find_all('td') if not cells: continue # Get text from first cell (including button text) date_text = cells[0].get_text(strip=True) # Extract date pattern "2. 1. 2025" from text like "Zapiš2. 1. 2025Zapiš" date_match = re.search(r'(\d{1,2}\.\s*\d{1,2}\.\s*\d{4})', date_text) if date_match: clean_date = date_match.group(1).replace(' ', '') # Determine if this is a journey form or refueling form form_data = self._extract_form_data(form) is_refueling_form = any(btn.get("name") == "f_ulozitkm" for btn in form_data["buttons"]) # Match with DataFrame matching_rows = df[df["Datum"] == clean_date] if len(matching_rows) > 0: # Row exists in our data - update it row_data = matching_rows.iloc[0] update = self._prepare_update(form_data, row_data, update_mode=True, is_refueling=is_refueling_form) updates.append(update) form_type = "refueling" if is_refueling_form else "journey" logger.info(f"Matched {form_type} row {i} with date {clean_date}") else: # Row exists on website but not in our data - delete it delete = self._prepare_update(form_data, None, update_mode=False, is_refueling=is_refueling_form) deletes.append(delete) logger.info(f"Will delete row {i} with date {clean_date}") else: logger.debug(f"Skipping row {i} (no date pattern)") if dry_run: logger.info("DRY RUN MODE - No data will be submitted") return { "dry_run": True, "month": month, "updates_prepared": len(updates), "deletes_prepared": len(deletes), "updates": updates[:5], # Show first 5 as sample "deletes": deletes[:5] } # Actually submit the data results = { "month": month, "updates_total": len(updates), "deletes_total": len(deletes), "updates_successful": 0, "updates_failed": 0, "deletes_successful": 0, "deletes_failed": 0, "errors": [] } # First, submit all updates for i, update in enumerate(updates): try: logger.info(f"Updating row {i+1}/{len(updates)}") self._submit_form(update) results["updates_successful"] += 1 time.sleep(0.5) # Rate limiting except Exception as e: logger.error(f"Failed to update row {i+1}: {e}") results["updates_failed"] += 1 results["errors"].append({"type": "update", "row": i+1, "error": str(e)}) # Then, submit all deletes for i, delete in enumerate(deletes): try: logger.info(f"Deleting row {i+1}/{len(deletes)}") self._submit_form(delete) results["deletes_successful"] += 1 time.sleep(0.5) # Rate limiting except Exception as e: logger.error(f"Failed to delete row {i+1}: {e}") results["deletes_failed"] += 1 results["errors"].append({"type": "delete", "row": i+1, "error": str(e)}) return results def _extract_form_data(self, form) -> Dict[str, Any]: """Extract all form fields and their current values""" form_data = { "action": form.get('action', ''), "method": form.get('method', 'post'), "fields": {}, "buttons": [] } # Get all input fields for input_field in form.find_all('input'): name = input_field.get('name', '') value = input_field.get('value', '') field_type = input_field.get('type', 'text') if name: form_data["fields"][name] = { "value": value, "type": field_type } # Track input buttons if field_type in ['submit', 'button']: form_data["buttons"].append({ "name": name, "value": value, "type": field_type }) # CRITICAL: Also get