Initial commit - Journey book (kniha jízd) automation system

Features:
- FastAPI backend for scraping attendance and journey book data
- Deterministic kilometer distribution with random variance
- Refueling form filling with km values
- Next.js frontend with date range selector
- Docker deployment setup

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Docker Config Backup
2025-10-10 15:41:11 +02:00
commit 3b5d9fd940
40 changed files with 3777 additions and 0 deletions

View File

@@ -0,0 +1,281 @@
import requests
from bs4 import BeautifulSoup
import pandas as pd
import logging
from typing import Dict, Any, List
import time
logger = logging.getLogger(__name__)
class JourneybookFiller:
def __init__(self, username: str, password: str, vehicle_registration: str = "4SH1148"):
self.username = username
self.password = password
self.vehicle_registration = vehicle_registration
self.base_url = "https://kj.colsys.cz/prehled_mesic.php"
self.session = requests.Session()
self.session.auth = (username, password)
self.session.verify = False
def fill_month(self, df: pd.DataFrame, month: str, dry_run: bool = True) -> Dict[str, Any]:
"""
Fill journeybook data for a given month.
Args:
df: DataFrame with calculated journey data
month: Month in format YYYY-MM (e.g., "2025-01")
dry_run: If True, only show what would be filled without actually submitting
Returns:
Dict with results including success/failure counts
"""
url = f"{self.base_url}?rz={self.vehicle_registration}&den={month}-01"
logger.info(f"Fetching form for month {month}, vehicle {self.vehicle_registration}")
response = self.session.get(url)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser')
table = soup.find('table', class_='table table-striped table-bordered table-condensed table-sm')
if not table:
raise ValueError("Journeybook table not found")
# Extract forms with their parent rows to get dates
import re
table_rows = table.find('tbody').find_all('tr')
logger.info(f"Found {len(table_rows)} table rows")
logger.info(f"DataFrame has {len(df)} rows")
updates = []
deletes = []
for i, row in enumerate(table_rows):
form = row.find('form')
if not form:
continue # Skip rows without forms
# Extract date from the row's first cell
cells = row.find_all('td')
if not cells:
continue
# Get text from first cell (including button text)
date_text = cells[0].get_text(strip=True)
# Extract date pattern "2. 1. 2025" from text like "Zapiš2. 1. 2025Zapiš"
date_match = re.search(r'(\d{1,2}\.\s*\d{1,2}\.\s*\d{4})', date_text)
if date_match:
clean_date = date_match.group(1).replace(' ', '')
# Determine if this is a journey form or refueling form
form_data = self._extract_form_data(form)
is_refueling_form = any(btn.get("name") == "f_ulozitkm" for btn in form_data["buttons"])
# Match with DataFrame
matching_rows = df[df["Datum"] == clean_date]
if len(matching_rows) > 0:
# Row exists in our data - update it
row_data = matching_rows.iloc[0]
update = self._prepare_update(form_data, row_data, update_mode=True, is_refueling=is_refueling_form)
updates.append(update)
form_type = "refueling" if is_refueling_form else "journey"
logger.info(f"Matched {form_type} row {i} with date {clean_date}")
else:
# Row exists on website but not in our data - delete it
delete = self._prepare_update(form_data, None, update_mode=False, is_refueling=is_refueling_form)
deletes.append(delete)
logger.info(f"Will delete row {i} with date {clean_date}")
else:
logger.debug(f"Skipping row {i} (no date pattern)")
if dry_run:
logger.info("DRY RUN MODE - No data will be submitted")
return {
"dry_run": True,
"month": month,
"updates_prepared": len(updates),
"deletes_prepared": len(deletes),
"updates": updates[:5], # Show first 5 as sample
"deletes": deletes[:5]
}
# Actually submit the data
results = {
"month": month,
"updates_total": len(updates),
"deletes_total": len(deletes),
"updates_successful": 0,
"updates_failed": 0,
"deletes_successful": 0,
"deletes_failed": 0,
"errors": []
}
# First, submit all updates
for i, update in enumerate(updates):
try:
logger.info(f"Updating row {i+1}/{len(updates)}")
self._submit_form(update)
results["updates_successful"] += 1
time.sleep(0.5) # Rate limiting
except Exception as e:
logger.error(f"Failed to update row {i+1}: {e}")
results["updates_failed"] += 1
results["errors"].append({"type": "update", "row": i+1, "error": str(e)})
# Then, submit all deletes
for i, delete in enumerate(deletes):
try:
logger.info(f"Deleting row {i+1}/{len(deletes)}")
self._submit_form(delete)
results["deletes_successful"] += 1
time.sleep(0.5) # Rate limiting
except Exception as e:
logger.error(f"Failed to delete row {i+1}: {e}")
results["deletes_failed"] += 1
results["errors"].append({"type": "delete", "row": i+1, "error": str(e)})
return results
def _extract_form_data(self, form) -> Dict[str, Any]:
"""Extract all form fields and their current values"""
form_data = {
"action": form.get('action', ''),
"method": form.get('method', 'post'),
"fields": {},
"buttons": []
}
# Get all input fields
for input_field in form.find_all('input'):
name = input_field.get('name', '')
value = input_field.get('value', '')
field_type = input_field.get('type', 'text')
if name:
form_data["fields"][name] = {
"value": value,
"type": field_type
}
# Track input buttons
if field_type in ['submit', 'button']:
form_data["buttons"].append({
"name": name,
"value": value,
"type": field_type
})
# CRITICAL: Also get <button> elements (not just <input type="submit">)
for button in form.find_all('button'):
name = button.get('name', '')
value = button.get('value', button.get_text(strip=True))
btn_type = button.get('type', 'submit')
if name:
form_data["buttons"].append({
"name": name,
"value": value,
"type": btn_type
})
return form_data
def _prepare_update(self, form_data: Dict, row_data: pd.Series, update_mode: bool = True, is_refueling: bool = False) -> Dict[str, Any]:
"""Prepare form data with updated values from DataFrame or for deletion
Args:
form_data: Extracted form data
row_data: DataFrame row with journey data (None for delete)
update_mode: True to update row, False to delete row
is_refueling: True if this is a refueling form, False if journey form
"""
update = {
"action": form_data["action"],
"method": form_data["method"],
"data": {},
"buttons": form_data.get("buttons", [])
}
# Copy all existing fields
for field_name, field_info in form_data["fields"].items():
update["data"][field_name] = field_info["value"]
if not update_mode:
# Delete mode - find and add "Smazat" (Delete) button
for button in update["buttons"]:
button_value = button.get("value", "")
if "Smazat" in button_value or "smazat" in button.get("name", "").lower():
if button.get("name"):
update["data"][button["name"]] = button["value"]
logger.info(f"Adding DELETE button: {button['name']}={button['value']}")
break
return update
# Update mode - handle refueling forms vs journey forms differently
if is_refueling:
# Refueling form - fill the km value from Tankováno při column
# We need to determine if this is refueling 1 or 2 for this date
# The form should have an f_km field that needs to be filled
# Check if this date has refueling data in the DataFrame
if "Tankováno při 1 [km]" in row_data and pd.notna(row_data["Tankováno při 1 [km]"]):
if "f_km" in update["data"]:
# Check if this is the first or second refueling form
current_km = update["data"].get("f_km", "0")
refuel_1_km = int(row_data["Tankováno při 1 [km]"])
# If f_km is 0 or empty, fill with refuel 1
if current_km == "0" or current_km == "":
update["data"]["f_km"] = str(refuel_1_km)
logger.info(f"Filling refuel km: f_km={refuel_1_km}")
# Otherwise check if there's a second refueling
elif "Tankováno při 2 [km]" in row_data and pd.notna(row_data["Tankováno při 2 [km]"]):
refuel_2_km = int(row_data["Tankováno při 2 [km]"])
if int(current_km) != refuel_1_km:
# This might be the second refueling form
update["data"]["f_km"] = str(refuel_2_km)
logger.info(f"Filling refuel 2 km: f_km={refuel_2_km}")
else:
# Journey form - fill with data from DataFrame
# ONLY update f_ujeto (distance traveled)
# Let kj.colsys.cz calculate f_cil_km (end km) automatically
if "Ujeto [km]" in row_data and pd.notna(row_data["Ujeto [km]"]):
if "f_ujeto" in update["data"]:
update["data"]["f_ujeto"] = str(int(row_data["Ujeto [km]"]))
# Add button click - look for "Uložit km" or "Přepočítat" buttons
# Exclude "Uzavřít měsíc" button
button_added = False
for button in update["buttons"]:
button_value = button.get("value", "")
button_name = button.get("name", "")
if "Uložit" in button_value or "Přepočítat" in button_value or "ulozit" in button_name.lower():
# Include the button in the POST data to trigger its action
if button_name:
update["data"][button_name] = button_value
logger.info(f"Adding button to POST: {button_name}={button_value}")
button_added = True
break
if not button_added:
logger.warning(f"No save button found! Available buttons: {update['buttons']}")
return update
def _submit_form(self, update: Dict[str, Any]):
"""Submit a form update"""
url = update["action"] if update["action"].startswith('http') else f"https://kj.colsys.cz/{update['action']}"
response = self.session.post(
url,
data=update["data"],
allow_redirects=False
)
response.raise_for_status()
logger.info(f"Form submitted successfully: {response.status_code}")