Initial commit - Journey book (kniha jízd) automation system

Features:
- FastAPI backend for scraping attendance and journey book data
- Deterministic kilometer distribution with random variance
- Refueling form filling with km values
- Next.js frontend with date range selector
- Docker deployment setup

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Docker Config Backup
2025-10-10 15:41:11 +02:00
commit 3b5d9fd940
40 changed files with 3777 additions and 0 deletions

View File

@@ -0,0 +1,147 @@
import pandas as pd
import numpy as np
from typing import Optional
import logging
logger = logging.getLogger(__name__)
class KilometerCalculator:
@staticmethod
def recalculate(
df: pd.DataFrame,
start_km: Optional[int] = None,
end_km: Optional[int] = None,
variance: float = 0.1
) -> pd.DataFrame:
"""
Recalculate kilometers with random distribution.
Args:
df: DataFrame with journey data
start_km: Override starting kilometers (uses first row if None)
end_km: Override ending kilometers (uses last row if None)
variance: Random variance factor (default 0.1 = 10%)
"""
df = df.copy()
if start_km is None:
start_km = df.iloc[0]["Počáteční stav"]
if end_km is None:
end_km = df.iloc[-1]["Koncový stav"]
logger.info(f"Start KM: {start_km}, End KM: {end_km}")
# Set deterministic random seed based on start/end km to ensure consistent results
# This ensures the same input always produces the same output
seed = (start_km * 1000 + end_km) % (2**31)
np.random.seed(seed)
logger.info(f"Using deterministic seed: {seed}")
# Reset index FIRST to ensure continuous indices after filtering
df = df.reset_index(drop=True)
# Merge refueling rows into journey rows (consolidate by date)
# Scraped data structure: journey rows have dates, refueling rows follow with empty date
journey_rows = []
refuel_data = {} # Store refueling data by date
last_date = None
for i in range(len(df)):
datum = df.at[i, "Datum"]
refuel_amount = df.at[i, "Natankováno [l|kg]"]
if pd.notna(datum) and datum != "":
# This is a journey row with a date
journey_rows.append(i)
last_date = datum
elif pd.notna(refuel_amount) and refuel_amount != "" and last_date:
# This is a refueling row (no date, but has refueling amount)
# Associate it with the last journey date
if last_date not in refuel_data:
refuel_data[last_date] = []
refuel_data[last_date].append(refuel_amount)
# Maximum 2 refuelings per day
max_refuelings = 2
logger.info(f"Consolidated to {len(journey_rows)} journey days, {len(refuel_data)} days with refueling")
# Create new dataframe with only journey rows
df = df.iloc[journey_rows].copy()
df = df.reset_index(drop=True)
# Remove original refueling column
df = df.drop(columns=["Natankováno [l|kg]"])
# Create exactly 2 refueling columns (always)
df["Natankováno 1 [l|kg]"] = None
df["Tankováno při 1 [km]"] = None
df["Natankováno 2 [l|kg]"] = None
df["Tankováno při 2 [km]"] = None
# Fill in refueling data (max 2 per day)
for i in range(len(df)):
datum = df.at[i, "Datum"]
if datum in refuel_data:
amounts = refuel_data[datum][:2] # Take only first 2 refuelings
for idx, amount in enumerate(amounts, start=1):
df.at[i, f"Natankováno {idx} [l|kg]"] = amount
date_mask = df["Datum"].notna()
num_days = date_mask.sum()
if num_days == 0:
raise ValueError("No valid days found")
total_kilometers = end_km - start_km
logger.info(f"Total km to distribute: {total_kilometers} across {num_days} days")
avg_km_per_day = total_kilometers / num_days
km_per_day = np.abs(np.random.normal(avg_km_per_day, avg_km_per_day * variance, num_days))
km_per_day = np.round(km_per_day).astype(int)
difference = total_kilometers - np.sum(km_per_day)
logger.info(f"Difference to distribute: {difference}")
if difference != 0:
adjustment = int(difference // num_days)
km_per_day += adjustment
remaining = int(difference % num_days)
km_per_day[:remaining] += 1
df.loc[date_mask, "Ujeto [km]"] = km_per_day
# Recalculate km states for journey rows
current_km = start_km
for i in range(len(df)):
df.at[i, "Počáteční stav"] = current_km
df.at[i, "Koncový stav"] = current_km + int(df.at[i, "Ujeto [km]"])
current_km = df.at[i, "Koncový stav"]
# Set final end km for last row
df.at[len(df) - 1, "Koncový stav"] = end_km
# Calculate "Tankováno při [km]" for rows with refueling data
for i in range(len(df)):
start_km_val = df.at[i, "Počáteční stav"]
end_km_val = df.at[i, "Koncový stav"]
if isinstance(start_km_val, (int, float)) and isinstance(end_km_val, (int, float)):
# Check each refueling column
for refuel_num in range(1, max_refuelings + 1):
refuel_col = f"Natankováno {refuel_num} [l|kg]"
km_col = f"Tankováno při {refuel_num} [km]"
if refuel_col in df.columns and pd.notna(df.at[i, refuel_col]) and df.at[i, refuel_col] != "":
# Generate random km within the journey range
if end_km_val > start_km_val:
refuel_km = np.random.randint(int(start_km_val), int(end_km_val) + 1)
else:
refuel_km = start_km_val
df.at[i, km_col] = refuel_km
# Replace NaN values with None for JSON serialization
df = df.replace({np.nan: None})
return df