Features: - FastAPI backend for scraping attendance and journey book data - Deterministic kilometer distribution with random variance - Refueling form filling with km values - Next.js frontend with date range selector - Docker deployment setup 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
148 lines
5.8 KiB
Python
148 lines
5.8 KiB
Python
import pandas as pd
|
|
import numpy as np
|
|
from typing import Optional
|
|
import logging
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class KilometerCalculator:
|
|
@staticmethod
|
|
def recalculate(
|
|
df: pd.DataFrame,
|
|
start_km: Optional[int] = None,
|
|
end_km: Optional[int] = None,
|
|
variance: float = 0.1
|
|
) -> pd.DataFrame:
|
|
"""
|
|
Recalculate kilometers with random distribution.
|
|
|
|
Args:
|
|
df: DataFrame with journey data
|
|
start_km: Override starting kilometers (uses first row if None)
|
|
end_km: Override ending kilometers (uses last row if None)
|
|
variance: Random variance factor (default 0.1 = 10%)
|
|
"""
|
|
df = df.copy()
|
|
|
|
if start_km is None:
|
|
start_km = df.iloc[0]["Počáteční stav"]
|
|
if end_km is None:
|
|
end_km = df.iloc[-1]["Koncový stav"]
|
|
|
|
logger.info(f"Start KM: {start_km}, End KM: {end_km}")
|
|
|
|
# Set deterministic random seed based on start/end km to ensure consistent results
|
|
# This ensures the same input always produces the same output
|
|
seed = (start_km * 1000 + end_km) % (2**31)
|
|
np.random.seed(seed)
|
|
logger.info(f"Using deterministic seed: {seed}")
|
|
|
|
# Reset index FIRST to ensure continuous indices after filtering
|
|
df = df.reset_index(drop=True)
|
|
|
|
# Merge refueling rows into journey rows (consolidate by date)
|
|
# Scraped data structure: journey rows have dates, refueling rows follow with empty date
|
|
journey_rows = []
|
|
refuel_data = {} # Store refueling data by date
|
|
last_date = None
|
|
|
|
for i in range(len(df)):
|
|
datum = df.at[i, "Datum"]
|
|
refuel_amount = df.at[i, "Natankováno [l|kg]"]
|
|
|
|
if pd.notna(datum) and datum != "":
|
|
# This is a journey row with a date
|
|
journey_rows.append(i)
|
|
last_date = datum
|
|
elif pd.notna(refuel_amount) and refuel_amount != "" and last_date:
|
|
# This is a refueling row (no date, but has refueling amount)
|
|
# Associate it with the last journey date
|
|
if last_date not in refuel_data:
|
|
refuel_data[last_date] = []
|
|
refuel_data[last_date].append(refuel_amount)
|
|
|
|
# Maximum 2 refuelings per day
|
|
max_refuelings = 2
|
|
logger.info(f"Consolidated to {len(journey_rows)} journey days, {len(refuel_data)} days with refueling")
|
|
|
|
# Create new dataframe with only journey rows
|
|
df = df.iloc[journey_rows].copy()
|
|
df = df.reset_index(drop=True)
|
|
|
|
# Remove original refueling column
|
|
df = df.drop(columns=["Natankováno [l|kg]"])
|
|
|
|
# Create exactly 2 refueling columns (always)
|
|
df["Natankováno 1 [l|kg]"] = None
|
|
df["Tankováno při 1 [km]"] = None
|
|
df["Natankováno 2 [l|kg]"] = None
|
|
df["Tankováno při 2 [km]"] = None
|
|
|
|
# Fill in refueling data (max 2 per day)
|
|
for i in range(len(df)):
|
|
datum = df.at[i, "Datum"]
|
|
if datum in refuel_data:
|
|
amounts = refuel_data[datum][:2] # Take only first 2 refuelings
|
|
for idx, amount in enumerate(amounts, start=1):
|
|
df.at[i, f"Natankováno {idx} [l|kg]"] = amount
|
|
|
|
date_mask = df["Datum"].notna()
|
|
num_days = date_mask.sum()
|
|
|
|
if num_days == 0:
|
|
raise ValueError("No valid days found")
|
|
|
|
total_kilometers = end_km - start_km
|
|
logger.info(f"Total km to distribute: {total_kilometers} across {num_days} days")
|
|
|
|
avg_km_per_day = total_kilometers / num_days
|
|
km_per_day = np.abs(np.random.normal(avg_km_per_day, avg_km_per_day * variance, num_days))
|
|
km_per_day = np.round(km_per_day).astype(int)
|
|
|
|
difference = total_kilometers - np.sum(km_per_day)
|
|
logger.info(f"Difference to distribute: {difference}")
|
|
|
|
if difference != 0:
|
|
adjustment = int(difference // num_days)
|
|
km_per_day += adjustment
|
|
remaining = int(difference % num_days)
|
|
km_per_day[:remaining] += 1
|
|
|
|
df.loc[date_mask, "Ujeto [km]"] = km_per_day
|
|
|
|
# Recalculate km states for journey rows
|
|
current_km = start_km
|
|
|
|
for i in range(len(df)):
|
|
df.at[i, "Počáteční stav"] = current_km
|
|
df.at[i, "Koncový stav"] = current_km + int(df.at[i, "Ujeto [km]"])
|
|
current_km = df.at[i, "Koncový stav"]
|
|
|
|
# Set final end km for last row
|
|
df.at[len(df) - 1, "Koncový stav"] = end_km
|
|
|
|
# Calculate "Tankováno při [km]" for rows with refueling data
|
|
for i in range(len(df)):
|
|
start_km_val = df.at[i, "Počáteční stav"]
|
|
end_km_val = df.at[i, "Koncový stav"]
|
|
|
|
if isinstance(start_km_val, (int, float)) and isinstance(end_km_val, (int, float)):
|
|
# Check each refueling column
|
|
for refuel_num in range(1, max_refuelings + 1):
|
|
refuel_col = f"Natankováno {refuel_num} [l|kg]"
|
|
km_col = f"Tankováno při {refuel_num} [km]"
|
|
|
|
if refuel_col in df.columns and pd.notna(df.at[i, refuel_col]) and df.at[i, refuel_col] != "":
|
|
# Generate random km within the journey range
|
|
if end_km_val > start_km_val:
|
|
refuel_km = np.random.randint(int(start_km_val), int(end_km_val) + 1)
|
|
else:
|
|
refuel_km = start_km_val
|
|
df.at[i, km_col] = refuel_km
|
|
|
|
# Replace NaN values with None for JSON serialization
|
|
df = df.replace({np.nan: None})
|
|
|
|
return df
|