Initial commit - Journey book (kniha jízd) automation system

Features: - FastAPI backend for scraping attendance and journey book data - Deterministic kilometer distribution with random variance - Refueling form filling with km values - Next.js frontend with date range selector - Docker deployment setup 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-10 15:41:11 +02:00
commit 3b5d9fd940
40 changed files with 3777 additions and 0 deletions
--- a/backend/calculators/kilometer_calculator.py
+++ b/backend/calculators/kilometer_calculator.py
@@ -0,0 +1,147 @@
+import pandas as pd
+import numpy as np
+from typing import Optional
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class KilometerCalculator:
+    @staticmethod
+    def recalculate(
+        df: pd.DataFrame,
+        start_km: Optional[int] = None,
+        end_km: Optional[int] = None,
+        variance: float = 0.1
+    ) -> pd.DataFrame:
+        """
+        Recalculate kilometers with random distribution.
+
+        Args:
+            df: DataFrame with journey data
+            start_km: Override starting kilometers (uses first row if None)
+            end_km: Override ending kilometers (uses last row if None)
+            variance: Random variance factor (default 0.1 = 10%)
+        """
+        df = df.copy()
+
+        if start_km is None:
+            start_km = df.iloc[0]["Počáteční stav"]
+        if end_km is None:
+            end_km = df.iloc[-1]["Koncový stav"]
+
+        logger.info(f"Start KM: {start_km}, End KM: {end_km}")
+
+        # Set deterministic random seed based on start/end km to ensure consistent results
+        # This ensures the same input always produces the same output
+        seed = (start_km * 1000 + end_km) % (2**31)
+        np.random.seed(seed)
+        logger.info(f"Using deterministic seed: {seed}")
+
+        # Reset index FIRST to ensure continuous indices after filtering
+        df = df.reset_index(drop=True)
+
+        # Merge refueling rows into journey rows (consolidate by date)
+        # Scraped data structure: journey rows have dates, refueling rows follow with empty date
+        journey_rows = []
+        refuel_data = {}  # Store refueling data by date
+        last_date = None
+
+        for i in range(len(df)):
+            datum = df.at[i, "Datum"]
+            refuel_amount = df.at[i, "Natankováno [l|kg]"]
+
+            if pd.notna(datum) and datum != "":
+                # This is a journey row with a date
+                journey_rows.append(i)
+                last_date = datum
+            elif pd.notna(refuel_amount) and refuel_amount != "" and last_date:
+                # This is a refueling row (no date, but has refueling amount)
+                # Associate it with the last journey date
+                if last_date not in refuel_data:
+                    refuel_data[last_date] = []
+                refuel_data[last_date].append(refuel_amount)
+
+        # Maximum 2 refuelings per day
+        max_refuelings = 2
+        logger.info(f"Consolidated to {len(journey_rows)} journey days, {len(refuel_data)} days with refueling")
+
+        # Create new dataframe with only journey rows
+        df = df.iloc[journey_rows].copy()
+        df = df.reset_index(drop=True)
+
+        # Remove original refueling column
+        df = df.drop(columns=["Natankováno [l|kg]"])
+
+        # Create exactly 2 refueling columns (always)
+        df["Natankováno 1 [l|kg]"] = None
+        df["Tankováno při 1 [km]"] = None
+        df["Natankováno 2 [l|kg]"] = None
+        df["Tankováno při 2 [km]"] = None
+
+        # Fill in refueling data (max 2 per day)
+        for i in range(len(df)):
+            datum = df.at[i, "Datum"]
+            if datum in refuel_data:
+                amounts = refuel_data[datum][:2]  # Take only first 2 refuelings
+                for idx, amount in enumerate(amounts, start=1):
+                    df.at[i, f"Natankováno {idx} [l|kg]"] = amount
+
+        date_mask = df["Datum"].notna()
+        num_days = date_mask.sum()
+
+        if num_days == 0:
+            raise ValueError("No valid days found")
+
+        total_kilometers = end_km - start_km
+        logger.info(f"Total km to distribute: {total_kilometers} across {num_days} days")
+
+        avg_km_per_day = total_kilometers / num_days
+        km_per_day = np.abs(np.random.normal(avg_km_per_day, avg_km_per_day * variance, num_days))
+        km_per_day = np.round(km_per_day).astype(int)
+
+        difference = total_kilometers - np.sum(km_per_day)
+        logger.info(f"Difference to distribute: {difference}")
+
+        if difference != 0:
+            adjustment = int(difference // num_days)
+            km_per_day += adjustment
+            remaining = int(difference % num_days)
+            km_per_day[:remaining] += 1
+
+        df.loc[date_mask, "Ujeto [km]"] = km_per_day
+
+        # Recalculate km states for journey rows
+        current_km = start_km
+
+        for i in range(len(df)):
+            df.at[i, "Počáteční stav"] = current_km
+            df.at[i, "Koncový stav"] = current_km + int(df.at[i, "Ujeto [km]"])
+            current_km = df.at[i, "Koncový stav"]
+
+        # Set final end km for last row
+        df.at[len(df) - 1, "Koncový stav"] = end_km
+
+        # Calculate "Tankováno při [km]" for rows with refueling data
+        for i in range(len(df)):
+            start_km_val = df.at[i, "Počáteční stav"]
+            end_km_val = df.at[i, "Koncový stav"]
+
+            if isinstance(start_km_val, (int, float)) and isinstance(end_km_val, (int, float)):
+                # Check each refueling column
+                for refuel_num in range(1, max_refuelings + 1):
+                    refuel_col = f"Natankováno {refuel_num} [l|kg]"
+                    km_col = f"Tankováno při {refuel_num} [km]"
+
+                    if refuel_col in df.columns and pd.notna(df.at[i, refuel_col]) and df.at[i, refuel_col] != "":
+                        # Generate random km within the journey range
+                        if end_km_val > start_km_val:
+                            refuel_km = np.random.randint(int(start_km_val), int(end_km_val) + 1)
+                        else:
+                            refuel_km = start_km_val
+                        df.at[i, km_col] = refuel_km
+
+        # Replace NaN values with None for JSON serialization
+        df = df.replace({np.nan: None})
+
+        return df