Initial commit - Journey book (kniha jízd) automation system
Features: - FastAPI backend for scraping attendance and journey book data - Deterministic kilometer distribution with random variance - Refueling form filling with km values - Next.js frontend with date range selector - Docker deployment setup 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
281
backend/fillers/journeybook_filler.py
Normal file
281
backend/fillers/journeybook_filler.py
Normal file
@@ -0,0 +1,281 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import pandas as pd
|
||||
import logging
|
||||
from typing import Dict, Any, List
|
||||
import time
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class JourneybookFiller:
|
||||
def __init__(self, username: str, password: str, vehicle_registration: str = "4SH1148"):
|
||||
self.username = username
|
||||
self.password = password
|
||||
self.vehicle_registration = vehicle_registration
|
||||
self.base_url = "https://kj.colsys.cz/prehled_mesic.php"
|
||||
self.session = requests.Session()
|
||||
self.session.auth = (username, password)
|
||||
self.session.verify = False
|
||||
|
||||
def fill_month(self, df: pd.DataFrame, month: str, dry_run: bool = True) -> Dict[str, Any]:
|
||||
"""
|
||||
Fill journeybook data for a given month.
|
||||
|
||||
Args:
|
||||
df: DataFrame with calculated journey data
|
||||
month: Month in format YYYY-MM (e.g., "2025-01")
|
||||
dry_run: If True, only show what would be filled without actually submitting
|
||||
|
||||
Returns:
|
||||
Dict with results including success/failure counts
|
||||
"""
|
||||
url = f"{self.base_url}?rz={self.vehicle_registration}&den={month}-01"
|
||||
|
||||
logger.info(f"Fetching form for month {month}, vehicle {self.vehicle_registration}")
|
||||
response = self.session.get(url)
|
||||
response.raise_for_status()
|
||||
|
||||
soup = BeautifulSoup(response.content, 'html.parser')
|
||||
table = soup.find('table', class_='table table-striped table-bordered table-condensed table-sm')
|
||||
|
||||
if not table:
|
||||
raise ValueError("Journeybook table not found")
|
||||
|
||||
# Extract forms with their parent rows to get dates
|
||||
import re
|
||||
table_rows = table.find('tbody').find_all('tr')
|
||||
|
||||
logger.info(f"Found {len(table_rows)} table rows")
|
||||
logger.info(f"DataFrame has {len(df)} rows")
|
||||
|
||||
updates = []
|
||||
deletes = []
|
||||
|
||||
for i, row in enumerate(table_rows):
|
||||
form = row.find('form')
|
||||
if not form:
|
||||
continue # Skip rows without forms
|
||||
|
||||
# Extract date from the row's first cell
|
||||
cells = row.find_all('td')
|
||||
if not cells:
|
||||
continue
|
||||
|
||||
# Get text from first cell (including button text)
|
||||
date_text = cells[0].get_text(strip=True)
|
||||
|
||||
# Extract date pattern "2. 1. 2025" from text like "Zapiš2. 1. 2025Zapiš"
|
||||
date_match = re.search(r'(\d{1,2}\.\s*\d{1,2}\.\s*\d{4})', date_text)
|
||||
|
||||
if date_match:
|
||||
clean_date = date_match.group(1).replace(' ', '')
|
||||
|
||||
# Determine if this is a journey form or refueling form
|
||||
form_data = self._extract_form_data(form)
|
||||
is_refueling_form = any(btn.get("name") == "f_ulozitkm" for btn in form_data["buttons"])
|
||||
|
||||
# Match with DataFrame
|
||||
matching_rows = df[df["Datum"] == clean_date]
|
||||
if len(matching_rows) > 0:
|
||||
# Row exists in our data - update it
|
||||
row_data = matching_rows.iloc[0]
|
||||
update = self._prepare_update(form_data, row_data, update_mode=True, is_refueling=is_refueling_form)
|
||||
updates.append(update)
|
||||
form_type = "refueling" if is_refueling_form else "journey"
|
||||
logger.info(f"Matched {form_type} row {i} with date {clean_date}")
|
||||
else:
|
||||
# Row exists on website but not in our data - delete it
|
||||
delete = self._prepare_update(form_data, None, update_mode=False, is_refueling=is_refueling_form)
|
||||
deletes.append(delete)
|
||||
logger.info(f"Will delete row {i} with date {clean_date}")
|
||||
else:
|
||||
logger.debug(f"Skipping row {i} (no date pattern)")
|
||||
|
||||
if dry_run:
|
||||
logger.info("DRY RUN MODE - No data will be submitted")
|
||||
return {
|
||||
"dry_run": True,
|
||||
"month": month,
|
||||
"updates_prepared": len(updates),
|
||||
"deletes_prepared": len(deletes),
|
||||
"updates": updates[:5], # Show first 5 as sample
|
||||
"deletes": deletes[:5]
|
||||
}
|
||||
|
||||
# Actually submit the data
|
||||
results = {
|
||||
"month": month,
|
||||
"updates_total": len(updates),
|
||||
"deletes_total": len(deletes),
|
||||
"updates_successful": 0,
|
||||
"updates_failed": 0,
|
||||
"deletes_successful": 0,
|
||||
"deletes_failed": 0,
|
||||
"errors": []
|
||||
}
|
||||
|
||||
# First, submit all updates
|
||||
for i, update in enumerate(updates):
|
||||
try:
|
||||
logger.info(f"Updating row {i+1}/{len(updates)}")
|
||||
self._submit_form(update)
|
||||
results["updates_successful"] += 1
|
||||
time.sleep(0.5) # Rate limiting
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to update row {i+1}: {e}")
|
||||
results["updates_failed"] += 1
|
||||
results["errors"].append({"type": "update", "row": i+1, "error": str(e)})
|
||||
|
||||
# Then, submit all deletes
|
||||
for i, delete in enumerate(deletes):
|
||||
try:
|
||||
logger.info(f"Deleting row {i+1}/{len(deletes)}")
|
||||
self._submit_form(delete)
|
||||
results["deletes_successful"] += 1
|
||||
time.sleep(0.5) # Rate limiting
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete row {i+1}: {e}")
|
||||
results["deletes_failed"] += 1
|
||||
results["errors"].append({"type": "delete", "row": i+1, "error": str(e)})
|
||||
|
||||
return results
|
||||
|
||||
def _extract_form_data(self, form) -> Dict[str, Any]:
|
||||
"""Extract all form fields and their current values"""
|
||||
form_data = {
|
||||
"action": form.get('action', ''),
|
||||
"method": form.get('method', 'post'),
|
||||
"fields": {},
|
||||
"buttons": []
|
||||
}
|
||||
|
||||
# Get all input fields
|
||||
for input_field in form.find_all('input'):
|
||||
name = input_field.get('name', '')
|
||||
value = input_field.get('value', '')
|
||||
field_type = input_field.get('type', 'text')
|
||||
|
||||
if name:
|
||||
form_data["fields"][name] = {
|
||||
"value": value,
|
||||
"type": field_type
|
||||
}
|
||||
|
||||
# Track input buttons
|
||||
if field_type in ['submit', 'button']:
|
||||
form_data["buttons"].append({
|
||||
"name": name,
|
||||
"value": value,
|
||||
"type": field_type
|
||||
})
|
||||
|
||||
# CRITICAL: Also get <button> elements (not just <input type="submit">)
|
||||
for button in form.find_all('button'):
|
||||
name = button.get('name', '')
|
||||
value = button.get('value', button.get_text(strip=True))
|
||||
btn_type = button.get('type', 'submit')
|
||||
|
||||
if name:
|
||||
form_data["buttons"].append({
|
||||
"name": name,
|
||||
"value": value,
|
||||
"type": btn_type
|
||||
})
|
||||
|
||||
return form_data
|
||||
|
||||
def _prepare_update(self, form_data: Dict, row_data: pd.Series, update_mode: bool = True, is_refueling: bool = False) -> Dict[str, Any]:
|
||||
"""Prepare form data with updated values from DataFrame or for deletion
|
||||
|
||||
Args:
|
||||
form_data: Extracted form data
|
||||
row_data: DataFrame row with journey data (None for delete)
|
||||
update_mode: True to update row, False to delete row
|
||||
is_refueling: True if this is a refueling form, False if journey form
|
||||
"""
|
||||
update = {
|
||||
"action": form_data["action"],
|
||||
"method": form_data["method"],
|
||||
"data": {},
|
||||
"buttons": form_data.get("buttons", [])
|
||||
}
|
||||
|
||||
# Copy all existing fields
|
||||
for field_name, field_info in form_data["fields"].items():
|
||||
update["data"][field_name] = field_info["value"]
|
||||
|
||||
if not update_mode:
|
||||
# Delete mode - find and add "Smazat" (Delete) button
|
||||
for button in update["buttons"]:
|
||||
button_value = button.get("value", "")
|
||||
if "Smazat" in button_value or "smazat" in button.get("name", "").lower():
|
||||
if button.get("name"):
|
||||
update["data"][button["name"]] = button["value"]
|
||||
logger.info(f"Adding DELETE button: {button['name']}={button['value']}")
|
||||
break
|
||||
return update
|
||||
|
||||
# Update mode - handle refueling forms vs journey forms differently
|
||||
if is_refueling:
|
||||
# Refueling form - fill the km value from Tankováno při column
|
||||
# We need to determine if this is refueling 1 or 2 for this date
|
||||
# The form should have an f_km field that needs to be filled
|
||||
|
||||
# Check if this date has refueling data in the DataFrame
|
||||
if "Tankováno při 1 [km]" in row_data and pd.notna(row_data["Tankováno při 1 [km]"]):
|
||||
if "f_km" in update["data"]:
|
||||
# Check if this is the first or second refueling form
|
||||
current_km = update["data"].get("f_km", "0")
|
||||
refuel_1_km = int(row_data["Tankováno při 1 [km]"])
|
||||
|
||||
# If f_km is 0 or empty, fill with refuel 1
|
||||
if current_km == "0" or current_km == "":
|
||||
update["data"]["f_km"] = str(refuel_1_km)
|
||||
logger.info(f"Filling refuel km: f_km={refuel_1_km}")
|
||||
# Otherwise check if there's a second refueling
|
||||
elif "Tankováno při 2 [km]" in row_data and pd.notna(row_data["Tankováno při 2 [km]"]):
|
||||
refuel_2_km = int(row_data["Tankováno při 2 [km]"])
|
||||
if int(current_km) != refuel_1_km:
|
||||
# This might be the second refueling form
|
||||
update["data"]["f_km"] = str(refuel_2_km)
|
||||
logger.info(f"Filling refuel 2 km: f_km={refuel_2_km}")
|
||||
else:
|
||||
# Journey form - fill with data from DataFrame
|
||||
# ONLY update f_ujeto (distance traveled)
|
||||
# Let kj.colsys.cz calculate f_cil_km (end km) automatically
|
||||
if "Ujeto [km]" in row_data and pd.notna(row_data["Ujeto [km]"]):
|
||||
if "f_ujeto" in update["data"]:
|
||||
update["data"]["f_ujeto"] = str(int(row_data["Ujeto [km]"]))
|
||||
|
||||
# Add button click - look for "Uložit km" or "Přepočítat" buttons
|
||||
# Exclude "Uzavřít měsíc" button
|
||||
button_added = False
|
||||
for button in update["buttons"]:
|
||||
button_value = button.get("value", "")
|
||||
button_name = button.get("name", "")
|
||||
if "Uložit" in button_value or "Přepočítat" in button_value or "ulozit" in button_name.lower():
|
||||
# Include the button in the POST data to trigger its action
|
||||
if button_name:
|
||||
update["data"][button_name] = button_value
|
||||
logger.info(f"Adding button to POST: {button_name}={button_value}")
|
||||
button_added = True
|
||||
break
|
||||
|
||||
if not button_added:
|
||||
logger.warning(f"No save button found! Available buttons: {update['buttons']}")
|
||||
|
||||
return update
|
||||
|
||||
def _submit_form(self, update: Dict[str, Any]):
|
||||
"""Submit a form update"""
|
||||
url = update["action"] if update["action"].startswith('http') else f"https://kj.colsys.cz/{update['action']}"
|
||||
|
||||
response = self.session.post(
|
||||
url,
|
||||
data=update["data"],
|
||||
allow_redirects=False
|
||||
)
|
||||
|
||||
response.raise_for_status()
|
||||
logger.info(f"Form submitted successfully: {response.status_code}")
|
||||
Reference in New Issue
Block a user