Initial commit - Journey book (kniha jízd) automation system
Features: - FastAPI backend for scraping attendance and journey book data - Deterministic kilometer distribution with random variance - Refueling form filling with km values - Next.js frontend with date range selector - Docker deployment setup 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
147
backend/calculators/kilometer_calculator.py
Normal file
147
backend/calculators/kilometer_calculator.py
Normal file
@@ -0,0 +1,147 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from typing import Optional
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class KilometerCalculator:
|
||||
@staticmethod
|
||||
def recalculate(
|
||||
df: pd.DataFrame,
|
||||
start_km: Optional[int] = None,
|
||||
end_km: Optional[int] = None,
|
||||
variance: float = 0.1
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Recalculate kilometers with random distribution.
|
||||
|
||||
Args:
|
||||
df: DataFrame with journey data
|
||||
start_km: Override starting kilometers (uses first row if None)
|
||||
end_km: Override ending kilometers (uses last row if None)
|
||||
variance: Random variance factor (default 0.1 = 10%)
|
||||
"""
|
||||
df = df.copy()
|
||||
|
||||
if start_km is None:
|
||||
start_km = df.iloc[0]["Počáteční stav"]
|
||||
if end_km is None:
|
||||
end_km = df.iloc[-1]["Koncový stav"]
|
||||
|
||||
logger.info(f"Start KM: {start_km}, End KM: {end_km}")
|
||||
|
||||
# Set deterministic random seed based on start/end km to ensure consistent results
|
||||
# This ensures the same input always produces the same output
|
||||
seed = (start_km * 1000 + end_km) % (2**31)
|
||||
np.random.seed(seed)
|
||||
logger.info(f"Using deterministic seed: {seed}")
|
||||
|
||||
# Reset index FIRST to ensure continuous indices after filtering
|
||||
df = df.reset_index(drop=True)
|
||||
|
||||
# Merge refueling rows into journey rows (consolidate by date)
|
||||
# Scraped data structure: journey rows have dates, refueling rows follow with empty date
|
||||
journey_rows = []
|
||||
refuel_data = {} # Store refueling data by date
|
||||
last_date = None
|
||||
|
||||
for i in range(len(df)):
|
||||
datum = df.at[i, "Datum"]
|
||||
refuel_amount = df.at[i, "Natankováno [l|kg]"]
|
||||
|
||||
if pd.notna(datum) and datum != "":
|
||||
# This is a journey row with a date
|
||||
journey_rows.append(i)
|
||||
last_date = datum
|
||||
elif pd.notna(refuel_amount) and refuel_amount != "" and last_date:
|
||||
# This is a refueling row (no date, but has refueling amount)
|
||||
# Associate it with the last journey date
|
||||
if last_date not in refuel_data:
|
||||
refuel_data[last_date] = []
|
||||
refuel_data[last_date].append(refuel_amount)
|
||||
|
||||
# Maximum 2 refuelings per day
|
||||
max_refuelings = 2
|
||||
logger.info(f"Consolidated to {len(journey_rows)} journey days, {len(refuel_data)} days with refueling")
|
||||
|
||||
# Create new dataframe with only journey rows
|
||||
df = df.iloc[journey_rows].copy()
|
||||
df = df.reset_index(drop=True)
|
||||
|
||||
# Remove original refueling column
|
||||
df = df.drop(columns=["Natankováno [l|kg]"])
|
||||
|
||||
# Create exactly 2 refueling columns (always)
|
||||
df["Natankováno 1 [l|kg]"] = None
|
||||
df["Tankováno při 1 [km]"] = None
|
||||
df["Natankováno 2 [l|kg]"] = None
|
||||
df["Tankováno při 2 [km]"] = None
|
||||
|
||||
# Fill in refueling data (max 2 per day)
|
||||
for i in range(len(df)):
|
||||
datum = df.at[i, "Datum"]
|
||||
if datum in refuel_data:
|
||||
amounts = refuel_data[datum][:2] # Take only first 2 refuelings
|
||||
for idx, amount in enumerate(amounts, start=1):
|
||||
df.at[i, f"Natankováno {idx} [l|kg]"] = amount
|
||||
|
||||
date_mask = df["Datum"].notna()
|
||||
num_days = date_mask.sum()
|
||||
|
||||
if num_days == 0:
|
||||
raise ValueError("No valid days found")
|
||||
|
||||
total_kilometers = end_km - start_km
|
||||
logger.info(f"Total km to distribute: {total_kilometers} across {num_days} days")
|
||||
|
||||
avg_km_per_day = total_kilometers / num_days
|
||||
km_per_day = np.abs(np.random.normal(avg_km_per_day, avg_km_per_day * variance, num_days))
|
||||
km_per_day = np.round(km_per_day).astype(int)
|
||||
|
||||
difference = total_kilometers - np.sum(km_per_day)
|
||||
logger.info(f"Difference to distribute: {difference}")
|
||||
|
||||
if difference != 0:
|
||||
adjustment = int(difference // num_days)
|
||||
km_per_day += adjustment
|
||||
remaining = int(difference % num_days)
|
||||
km_per_day[:remaining] += 1
|
||||
|
||||
df.loc[date_mask, "Ujeto [km]"] = km_per_day
|
||||
|
||||
# Recalculate km states for journey rows
|
||||
current_km = start_km
|
||||
|
||||
for i in range(len(df)):
|
||||
df.at[i, "Počáteční stav"] = current_km
|
||||
df.at[i, "Koncový stav"] = current_km + int(df.at[i, "Ujeto [km]"])
|
||||
current_km = df.at[i, "Koncový stav"]
|
||||
|
||||
# Set final end km for last row
|
||||
df.at[len(df) - 1, "Koncový stav"] = end_km
|
||||
|
||||
# Calculate "Tankováno při [km]" for rows with refueling data
|
||||
for i in range(len(df)):
|
||||
start_km_val = df.at[i, "Počáteční stav"]
|
||||
end_km_val = df.at[i, "Koncový stav"]
|
||||
|
||||
if isinstance(start_km_val, (int, float)) and isinstance(end_km_val, (int, float)):
|
||||
# Check each refueling column
|
||||
for refuel_num in range(1, max_refuelings + 1):
|
||||
refuel_col = f"Natankováno {refuel_num} [l|kg]"
|
||||
km_col = f"Tankováno při {refuel_num} [km]"
|
||||
|
||||
if refuel_col in df.columns and pd.notna(df.at[i, refuel_col]) and df.at[i, refuel_col] != "":
|
||||
# Generate random km within the journey range
|
||||
if end_km_val > start_km_val:
|
||||
refuel_km = np.random.randint(int(start_km_val), int(end_km_val) + 1)
|
||||
else:
|
||||
refuel_km = start_km_val
|
||||
df.at[i, km_col] = refuel_km
|
||||
|
||||
# Replace NaN values with None for JSON serialization
|
||||
df = df.replace({np.nan: None})
|
||||
|
||||
return df
|
||||
Reference in New Issue
Block a user