Initial commit - Journey book (kniha jízd) automation system

Features: - FastAPI backend for scraping attendance and journey book data - Deterministic kilometer distribution with random variance - Refueling form filling with km values - Next.js frontend with date range selector - Docker deployment setup 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-10 15:41:11 +02:00
commit 3b5d9fd940
40 changed files with 3777 additions and 0 deletions
--- a/backend/scrapers/journeybook_scraper.py
+++ b/backend/scrapers/journeybook_scraper.py
@@ -0,0 +1,73 @@
+import re
+import requests
+from bs4 import BeautifulSoup
+import pandas as pd
+from typing import Dict, Any
+
+
+class JourneybookScraper:
+    def __init__(self, username: str, password: str, vehicle_registration: str = "4SH1148"):
+        self.username = username
+        self.password = password
+        self.vehicle_registration = vehicle_registration
+        self.base_url = "https://kj.colsys.cz/prehled_mesic.php"
+
+    @staticmethod
+    def normalize_date(date_str: str) -> str:
+        return re.sub(r'\s+', '', date_str)
+
+    def scrape_month(self, month: str) -> pd.DataFrame:
+        """
+        Scrape journeybook data for a given month.
+        Returns DataFrame with columns: Datum, Počáteční stav, Koncový stav, Ujeto [km], Natankováno [l|kg]
+        """
+        url = f"{self.base_url}?rz={self.vehicle_registration}&den={month}-01"
+
+        response = requests.get(url, auth=(self.username, self.password), verify=False)
+        response.raise_for_status()
+
+        soup = BeautifulSoup(response.content, 'html.parser')
+        table = soup.find('table', class_='table table-striped table-bordered table-condensed table-sm')
+
+        if not table:
+            raise ValueError("Journeybook table not found")
+
+        headers = [th.text.strip() for th in table.find('thead').find_all('th')]
+        headers = [header.replace(" ", "") for header in headers]
+
+        columns_to_keep = ["Datum", "Počátečnístav", "Koncovýstav", "Ujeto[km]"]
+        new_headers = ["Datum", "Počáteční stav", "Koncový stav", "Ujeto [km]", "Natankováno [l|kg]"]
+
+        for col in columns_to_keep:
+            if col not in headers:
+                raise ValueError(f"Column '{col}' not found. Headers: {headers}")
+
+        rows = []
+        for row in table.find('tbody').find_all('tr'):
+            if "Tankováno" in row.text:
+                refuel_text = row.text.strip()
+                amount_match = re.search(r'natankováno\s(\d+\.\d+)\s\[l\|kg\]', refuel_text)
+                amount = amount_match.group(1) if amount_match else ""
+                rows.append([""] * len(columns_to_keep) + [amount])
+            elif row.find('form'):
+                cells = []
+                for cell in row.find_all('td'):
+                    input_field = cell.find('input')
+                    if input_field:
+                        cells.append(input_field.get('value', ''))
+                    else:
+                        if headers[len(cells)] == "Datum":
+                            date_match = re.search(r'\d{1,2}\.\s\d{1,2}\.\s\d{4}', cell.text.strip())
+                            if date_match:
+                                cells.append(self.normalize_date(date_match.group()))
+                            else:
+                                cells.append(cell.text.strip())
+                        else:
+                            cells.append(cell.text.strip())
+
+                filtered_cells = [cells[headers.index(col)] for col in columns_to_keep]
+                filtered_cells.append("")
+                rows.append(filtered_cells)
+
+        df = pd.DataFrame(rows, columns=new_headers)
+        return df