Initial commit - Journey book (kniha jízd) automation system

Features:
- FastAPI backend for scraping attendance and journey book data
- Deterministic kilometer distribution with random variance
- Refueling form filling with km values
- Next.js frontend with date range selector
- Docker deployment setup

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Docker Config Backup
2025-10-10 15:41:11 +02:00
commit 3b5d9fd940
40 changed files with 3777 additions and 0 deletions

View File

@@ -0,0 +1,73 @@
import re
import requests
from bs4 import BeautifulSoup
import pandas as pd
from typing import Dict, Any
class JourneybookScraper:
def __init__(self, username: str, password: str, vehicle_registration: str = "4SH1148"):
self.username = username
self.password = password
self.vehicle_registration = vehicle_registration
self.base_url = "https://kj.colsys.cz/prehled_mesic.php"
@staticmethod
def normalize_date(date_str: str) -> str:
return re.sub(r'\s+', '', date_str)
def scrape_month(self, month: str) -> pd.DataFrame:
"""
Scrape journeybook data for a given month.
Returns DataFrame with columns: Datum, Počáteční stav, Koncový stav, Ujeto [km], Natankováno [l|kg]
"""
url = f"{self.base_url}?rz={self.vehicle_registration}&den={month}-01"
response = requests.get(url, auth=(self.username, self.password), verify=False)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser')
table = soup.find('table', class_='table table-striped table-bordered table-condensed table-sm')
if not table:
raise ValueError("Journeybook table not found")
headers = [th.text.strip() for th in table.find('thead').find_all('th')]
headers = [header.replace(" ", "") for header in headers]
columns_to_keep = ["Datum", "Počátečnístav", "Koncovýstav", "Ujeto[km]"]
new_headers = ["Datum", "Počáteční stav", "Koncový stav", "Ujeto [km]", "Natankováno [l|kg]"]
for col in columns_to_keep:
if col not in headers:
raise ValueError(f"Column '{col}' not found. Headers: {headers}")
rows = []
for row in table.find('tbody').find_all('tr'):
if "Tankováno" in row.text:
refuel_text = row.text.strip()
amount_match = re.search(r'natankováno\s(\d+\.\d+)\s\[l\|kg\]', refuel_text)
amount = amount_match.group(1) if amount_match else ""
rows.append([""] * len(columns_to_keep) + [amount])
elif row.find('form'):
cells = []
for cell in row.find_all('td'):
input_field = cell.find('input')
if input_field:
cells.append(input_field.get('value', ''))
else:
if headers[len(cells)] == "Datum":
date_match = re.search(r'\d{1,2}\.\s\d{1,2}\.\s\d{4}', cell.text.strip())
if date_match:
cells.append(self.normalize_date(date_match.group()))
else:
cells.append(cell.text.strip())
else:
cells.append(cell.text.strip())
filtered_cells = [cells[headers.index(col)] for col in columns_to_keep]
filtered_cells.append("")
rows.append(filtered_cells)
df = pd.DataFrame(rows, columns=new_headers)
return df