Initial commit - Journey book (kniha jízd) automation system
Features: - FastAPI backend for scraping attendance and journey book data - Deterministic kilometer distribution with random variance - Refueling form filling with km values - Next.js frontend with date range selector - Docker deployment setup 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
4
backend/.env.example
Normal file
4
backend/.env.example
Normal file
@@ -0,0 +1,4 @@
|
||||
USERNAME=your_username
|
||||
PASSWORD=your_password
|
||||
VEHICLE_REGISTRATION=4SH1148
|
||||
DATABASE_URL=sqlite:///./journeybook.db
|
||||
14
backend/Dockerfile
Normal file
14
backend/Dockerfile
Normal file
@@ -0,0 +1,14 @@
|
||||
FROM python:3.11-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update && apt-get install -y \
|
||||
gcc \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
COPY . .
|
||||
|
||||
CMD ["uvicorn", "api.main:app", "--host", "0.0.0.0", "--port", "8002", "--reload"]
|
||||
243
backend/api/main.py
Normal file
243
backend/api/main.py
Normal file
@@ -0,0 +1,243 @@
|
||||
from fastapi import FastAPI, HTTPException, Depends
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional
|
||||
import pandas as pd
|
||||
from io import BytesIO
|
||||
from fastapi.responses import StreamingResponse
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from dateutil.relativedelta import relativedelta
|
||||
|
||||
from scrapers import AttendanceScraper, JourneybookScraper
|
||||
from calculators.kilometer_calculator import KilometerCalculator
|
||||
from fillers import JourneybookFiller
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
app = FastAPI(title="Kniha Jízd API", version="1.0.0")
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
|
||||
class ScrapeRequest(BaseModel):
|
||||
username: str
|
||||
password: str
|
||||
month: str
|
||||
vehicle_registration: str = "4SH1148"
|
||||
|
||||
|
||||
class CalculateRequest(BaseModel):
|
||||
username: str
|
||||
password: str
|
||||
start_date: str # Format: YYYY-MM-DD
|
||||
end_date: str # Format: YYYY-MM-DD
|
||||
start_km: int
|
||||
end_km: int
|
||||
vehicle_registration: str = "4SH1148"
|
||||
variance: float = 0.1
|
||||
|
||||
|
||||
class FillRequest(CalculateRequest):
|
||||
dry_run: bool = True
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
return {"status": "healthy"}
|
||||
|
||||
|
||||
@app.post("/api/scrape/attendance")
|
||||
async def scrape_attendance(request: ScrapeRequest):
|
||||
"""Scrape attendance data for a month"""
|
||||
try:
|
||||
scraper = AttendanceScraper(request.username, request.password)
|
||||
attendance_dates = scraper.scrape_month(request.month)
|
||||
return {
|
||||
"month": request.month,
|
||||
"sick_vacation_days": attendance_dates,
|
||||
"count": len(attendance_dates)
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Attendance scraping failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@app.post("/api/scrape/journeybook")
|
||||
async def scrape_journeybook(request: ScrapeRequest):
|
||||
"""Scrape journeybook data for a month"""
|
||||
try:
|
||||
scraper = JourneybookScraper(request.username, request.password, request.vehicle_registration)
|
||||
df = scraper.scrape_month(request.month)
|
||||
|
||||
return {
|
||||
"month": request.month,
|
||||
"entries": df.to_dict(orient='records'),
|
||||
"count": len(df)
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Journeybook scraping failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@app.post("/api/calculate")
|
||||
async def calculate_kilometers(request: CalculateRequest):
|
||||
"""Scrape data, filter sick days, and recalculate kilometers"""
|
||||
try:
|
||||
attendance_scraper = AttendanceScraper(request.username, request.password)
|
||||
journeybook_scraper = JourneybookScraper(request.username, request.password, request.vehicle_registration)
|
||||
|
||||
# Get all months in the date range
|
||||
start = datetime.strptime(request.start_date, "%Y-%m-%d")
|
||||
end = datetime.strptime(request.end_date, "%Y-%m-%d")
|
||||
|
||||
# Collect data from all months
|
||||
all_attendance_dates = []
|
||||
all_dfs = []
|
||||
|
||||
current = start
|
||||
while current <= end:
|
||||
month_str = current.strftime("%Y-%m")
|
||||
logger.info(f"Scraping attendance for {month_str}")
|
||||
attendance_dates = attendance_scraper.scrape_month(month_str)
|
||||
all_attendance_dates.extend(attendance_dates)
|
||||
|
||||
logger.info(f"Scraping journeybook for {month_str}")
|
||||
df_month = journeybook_scraper.scrape_month(month_str)
|
||||
all_dfs.append(df_month)
|
||||
|
||||
current = current + relativedelta(months=1)
|
||||
|
||||
# Combine all months
|
||||
df = pd.concat(all_dfs, ignore_index=True) if all_dfs else pd.DataFrame()
|
||||
|
||||
# Filter by actual date range, but preserve refueling rows (empty Datum)
|
||||
if not df.empty:
|
||||
date_parsed = pd.to_datetime(df["Datum"], format="%d.%m.%Y", errors='coerce')
|
||||
is_refuel = df["Datum"].isna() | (df["Datum"] == "")
|
||||
is_in_range = (date_parsed >= start) & (date_parsed <= end)
|
||||
df = df[is_in_range | is_refuel]
|
||||
|
||||
logger.info(f"Filtering out {len(all_attendance_dates)} sick/vacation days")
|
||||
# Only filter journey rows, not refueling rows
|
||||
is_refuel = df["Datum"].isna() | (df["Datum"] == "")
|
||||
df = df[~df["Datum"].isin(all_attendance_dates) | is_refuel]
|
||||
|
||||
logger.info("Recalculating kilometers")
|
||||
df = KilometerCalculator.recalculate(df, request.start_km, request.end_km, request.variance)
|
||||
|
||||
return {
|
||||
"month": f"{request.start_date} - {request.end_date}",
|
||||
"start_km": request.start_km,
|
||||
"end_km": request.end_km,
|
||||
"filtered_days": len(all_attendance_dates),
|
||||
"entries": df.to_dict(orient='records'),
|
||||
"total_entries": len(df)
|
||||
}
|
||||
except Exception as e:
|
||||
import traceback
|
||||
logger.error(f"Calculate failed: {e}")
|
||||
logger.error(traceback.format_exc())
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@app.post("/api/export/excel")
|
||||
async def export_to_excel(request: CalculateRequest):
|
||||
"""Generate and download Excel file"""
|
||||
try:
|
||||
attendance_scraper = AttendanceScraper(request.username, request.password)
|
||||
journeybook_scraper = JourneybookScraper(request.username, request.password, request.vehicle_registration)
|
||||
|
||||
# Get all months in the date range
|
||||
start = datetime.strptime(request.start_date, "%Y-%m-%d")
|
||||
end = datetime.strptime(request.end_date, "%Y-%m-%d")
|
||||
|
||||
# Collect data from all months
|
||||
all_attendance_dates = []
|
||||
all_dfs = []
|
||||
|
||||
current = start
|
||||
while current <= end:
|
||||
month_str = current.strftime("%Y-%m")
|
||||
attendance_dates = attendance_scraper.scrape_month(month_str)
|
||||
all_attendance_dates.extend(attendance_dates)
|
||||
df_month = journeybook_scraper.scrape_month(month_str)
|
||||
all_dfs.append(df_month)
|
||||
current = current + relativedelta(months=1)
|
||||
|
||||
# Combine all months
|
||||
df = pd.concat(all_dfs, ignore_index=True) if all_dfs else pd.DataFrame()
|
||||
|
||||
# Filter by actual date range, but preserve refueling rows (empty Datum)
|
||||
if not df.empty:
|
||||
date_parsed = pd.to_datetime(df["Datum"], format="%d.%m.%Y", errors='coerce')
|
||||
is_refuel = df["Datum"].isna() | (df["Datum"] == "")
|
||||
is_in_range = (date_parsed >= start) & (date_parsed <= end)
|
||||
df = df[is_in_range | is_refuel]
|
||||
|
||||
# Only filter journey rows, not refueling rows
|
||||
is_refuel = df["Datum"].isna() | (df["Datum"] == "")
|
||||
df = df[~df["Datum"].isin(all_attendance_dates) | is_refuel]
|
||||
df = KilometerCalculator.recalculate(df, request.start_km, request.end_km, request.variance)
|
||||
|
||||
output = BytesIO()
|
||||
df.to_excel(output, index=False, engine='openpyxl')
|
||||
output.seek(0)
|
||||
|
||||
filename = f"journeybook_{request.start_date}_{request.end_date}.xlsx"
|
||||
return StreamingResponse(
|
||||
output,
|
||||
media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
headers={"Content-Disposition": f"attachment; filename={filename}"}
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Export failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@app.post("/api/fill/journeybook")
|
||||
async def fill_journeybook(request: FillRequest):
|
||||
"""Fill calculated data back to kj.colsys.cz (restricted to January 2025)"""
|
||||
try:
|
||||
logger.info(f"Fill request received with dry_run={request.dry_run}")
|
||||
|
||||
# Only allow January 2025 for testing
|
||||
if request.start_date != "2025-01-01" or request.end_date < "2025-01-31":
|
||||
raise HTTPException(status_code=400, detail="Only January 2025 is allowed for testing")
|
||||
|
||||
attendance_scraper = AttendanceScraper(request.username, request.password)
|
||||
journeybook_scraper = JourneybookScraper(request.username, request.password, request.vehicle_registration)
|
||||
|
||||
# Get January data
|
||||
month_str = "2025-01"
|
||||
attendance_dates = attendance_scraper.scrape_month(month_str)
|
||||
df = journeybook_scraper.scrape_month(month_str)
|
||||
|
||||
# Filter sick/vacation days
|
||||
df = df[~df["Datum"].isin(attendance_dates)]
|
||||
|
||||
# Recalculate kilometers
|
||||
df = KilometerCalculator.recalculate(df, request.start_km, request.end_km, request.variance)
|
||||
|
||||
# Fill data back (supports dry_run mode)
|
||||
filler = JourneybookFiller(request.username, request.password, request.vehicle_registration)
|
||||
result = filler.fill_month(df, month_str, dry_run=request.dry_run)
|
||||
|
||||
return result
|
||||
except Exception as e:
|
||||
import traceback
|
||||
logger.error(f"Fill failed: {e}")
|
||||
logger.error(traceback.format_exc())
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
uvicorn.run(app, host="0.0.0.0", port=8000)
|
||||
147
backend/calculators/kilometer_calculator.py
Normal file
147
backend/calculators/kilometer_calculator.py
Normal file
@@ -0,0 +1,147 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from typing import Optional
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class KilometerCalculator:
|
||||
@staticmethod
|
||||
def recalculate(
|
||||
df: pd.DataFrame,
|
||||
start_km: Optional[int] = None,
|
||||
end_km: Optional[int] = None,
|
||||
variance: float = 0.1
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Recalculate kilometers with random distribution.
|
||||
|
||||
Args:
|
||||
df: DataFrame with journey data
|
||||
start_km: Override starting kilometers (uses first row if None)
|
||||
end_km: Override ending kilometers (uses last row if None)
|
||||
variance: Random variance factor (default 0.1 = 10%)
|
||||
"""
|
||||
df = df.copy()
|
||||
|
||||
if start_km is None:
|
||||
start_km = df.iloc[0]["Počáteční stav"]
|
||||
if end_km is None:
|
||||
end_km = df.iloc[-1]["Koncový stav"]
|
||||
|
||||
logger.info(f"Start KM: {start_km}, End KM: {end_km}")
|
||||
|
||||
# Set deterministic random seed based on start/end km to ensure consistent results
|
||||
# This ensures the same input always produces the same output
|
||||
seed = (start_km * 1000 + end_km) % (2**31)
|
||||
np.random.seed(seed)
|
||||
logger.info(f"Using deterministic seed: {seed}")
|
||||
|
||||
# Reset index FIRST to ensure continuous indices after filtering
|
||||
df = df.reset_index(drop=True)
|
||||
|
||||
# Merge refueling rows into journey rows (consolidate by date)
|
||||
# Scraped data structure: journey rows have dates, refueling rows follow with empty date
|
||||
journey_rows = []
|
||||
refuel_data = {} # Store refueling data by date
|
||||
last_date = None
|
||||
|
||||
for i in range(len(df)):
|
||||
datum = df.at[i, "Datum"]
|
||||
refuel_amount = df.at[i, "Natankováno [l|kg]"]
|
||||
|
||||
if pd.notna(datum) and datum != "":
|
||||
# This is a journey row with a date
|
||||
journey_rows.append(i)
|
||||
last_date = datum
|
||||
elif pd.notna(refuel_amount) and refuel_amount != "" and last_date:
|
||||
# This is a refueling row (no date, but has refueling amount)
|
||||
# Associate it with the last journey date
|
||||
if last_date not in refuel_data:
|
||||
refuel_data[last_date] = []
|
||||
refuel_data[last_date].append(refuel_amount)
|
||||
|
||||
# Maximum 2 refuelings per day
|
||||
max_refuelings = 2
|
||||
logger.info(f"Consolidated to {len(journey_rows)} journey days, {len(refuel_data)} days with refueling")
|
||||
|
||||
# Create new dataframe with only journey rows
|
||||
df = df.iloc[journey_rows].copy()
|
||||
df = df.reset_index(drop=True)
|
||||
|
||||
# Remove original refueling column
|
||||
df = df.drop(columns=["Natankováno [l|kg]"])
|
||||
|
||||
# Create exactly 2 refueling columns (always)
|
||||
df["Natankováno 1 [l|kg]"] = None
|
||||
df["Tankováno při 1 [km]"] = None
|
||||
df["Natankováno 2 [l|kg]"] = None
|
||||
df["Tankováno při 2 [km]"] = None
|
||||
|
||||
# Fill in refueling data (max 2 per day)
|
||||
for i in range(len(df)):
|
||||
datum = df.at[i, "Datum"]
|
||||
if datum in refuel_data:
|
||||
amounts = refuel_data[datum][:2] # Take only first 2 refuelings
|
||||
for idx, amount in enumerate(amounts, start=1):
|
||||
df.at[i, f"Natankováno {idx} [l|kg]"] = amount
|
||||
|
||||
date_mask = df["Datum"].notna()
|
||||
num_days = date_mask.sum()
|
||||
|
||||
if num_days == 0:
|
||||
raise ValueError("No valid days found")
|
||||
|
||||
total_kilometers = end_km - start_km
|
||||
logger.info(f"Total km to distribute: {total_kilometers} across {num_days} days")
|
||||
|
||||
avg_km_per_day = total_kilometers / num_days
|
||||
km_per_day = np.abs(np.random.normal(avg_km_per_day, avg_km_per_day * variance, num_days))
|
||||
km_per_day = np.round(km_per_day).astype(int)
|
||||
|
||||
difference = total_kilometers - np.sum(km_per_day)
|
||||
logger.info(f"Difference to distribute: {difference}")
|
||||
|
||||
if difference != 0:
|
||||
adjustment = int(difference // num_days)
|
||||
km_per_day += adjustment
|
||||
remaining = int(difference % num_days)
|
||||
km_per_day[:remaining] += 1
|
||||
|
||||
df.loc[date_mask, "Ujeto [km]"] = km_per_day
|
||||
|
||||
# Recalculate km states for journey rows
|
||||
current_km = start_km
|
||||
|
||||
for i in range(len(df)):
|
||||
df.at[i, "Počáteční stav"] = current_km
|
||||
df.at[i, "Koncový stav"] = current_km + int(df.at[i, "Ujeto [km]"])
|
||||
current_km = df.at[i, "Koncový stav"]
|
||||
|
||||
# Set final end km for last row
|
||||
df.at[len(df) - 1, "Koncový stav"] = end_km
|
||||
|
||||
# Calculate "Tankováno při [km]" for rows with refueling data
|
||||
for i in range(len(df)):
|
||||
start_km_val = df.at[i, "Počáteční stav"]
|
||||
end_km_val = df.at[i, "Koncový stav"]
|
||||
|
||||
if isinstance(start_km_val, (int, float)) and isinstance(end_km_val, (int, float)):
|
||||
# Check each refueling column
|
||||
for refuel_num in range(1, max_refuelings + 1):
|
||||
refuel_col = f"Natankováno {refuel_num} [l|kg]"
|
||||
km_col = f"Tankováno při {refuel_num} [km]"
|
||||
|
||||
if refuel_col in df.columns and pd.notna(df.at[i, refuel_col]) and df.at[i, refuel_col] != "":
|
||||
# Generate random km within the journey range
|
||||
if end_km_val > start_km_val:
|
||||
refuel_km = np.random.randint(int(start_km_val), int(end_km_val) + 1)
|
||||
else:
|
||||
refuel_km = start_km_val
|
||||
df.at[i, km_col] = refuel_km
|
||||
|
||||
# Replace NaN values with None for JSON serialization
|
||||
df = df.replace({np.nan: None})
|
||||
|
||||
return df
|
||||
3
backend/fillers/__init__.py
Normal file
3
backend/fillers/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from .journeybook_filler import JourneybookFiller
|
||||
|
||||
__all__ = ['JourneybookFiller']
|
||||
281
backend/fillers/journeybook_filler.py
Normal file
281
backend/fillers/journeybook_filler.py
Normal file
@@ -0,0 +1,281 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import pandas as pd
|
||||
import logging
|
||||
from typing import Dict, Any, List
|
||||
import time
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class JourneybookFiller:
|
||||
def __init__(self, username: str, password: str, vehicle_registration: str = "4SH1148"):
|
||||
self.username = username
|
||||
self.password = password
|
||||
self.vehicle_registration = vehicle_registration
|
||||
self.base_url = "https://kj.colsys.cz/prehled_mesic.php"
|
||||
self.session = requests.Session()
|
||||
self.session.auth = (username, password)
|
||||
self.session.verify = False
|
||||
|
||||
def fill_month(self, df: pd.DataFrame, month: str, dry_run: bool = True) -> Dict[str, Any]:
|
||||
"""
|
||||
Fill journeybook data for a given month.
|
||||
|
||||
Args:
|
||||
df: DataFrame with calculated journey data
|
||||
month: Month in format YYYY-MM (e.g., "2025-01")
|
||||
dry_run: If True, only show what would be filled without actually submitting
|
||||
|
||||
Returns:
|
||||
Dict with results including success/failure counts
|
||||
"""
|
||||
url = f"{self.base_url}?rz={self.vehicle_registration}&den={month}-01"
|
||||
|
||||
logger.info(f"Fetching form for month {month}, vehicle {self.vehicle_registration}")
|
||||
response = self.session.get(url)
|
||||
response.raise_for_status()
|
||||
|
||||
soup = BeautifulSoup(response.content, 'html.parser')
|
||||
table = soup.find('table', class_='table table-striped table-bordered table-condensed table-sm')
|
||||
|
||||
if not table:
|
||||
raise ValueError("Journeybook table not found")
|
||||
|
||||
# Extract forms with their parent rows to get dates
|
||||
import re
|
||||
table_rows = table.find('tbody').find_all('tr')
|
||||
|
||||
logger.info(f"Found {len(table_rows)} table rows")
|
||||
logger.info(f"DataFrame has {len(df)} rows")
|
||||
|
||||
updates = []
|
||||
deletes = []
|
||||
|
||||
for i, row in enumerate(table_rows):
|
||||
form = row.find('form')
|
||||
if not form:
|
||||
continue # Skip rows without forms
|
||||
|
||||
# Extract date from the row's first cell
|
||||
cells = row.find_all('td')
|
||||
if not cells:
|
||||
continue
|
||||
|
||||
# Get text from first cell (including button text)
|
||||
date_text = cells[0].get_text(strip=True)
|
||||
|
||||
# Extract date pattern "2. 1. 2025" from text like "Zapiš2. 1. 2025Zapiš"
|
||||
date_match = re.search(r'(\d{1,2}\.\s*\d{1,2}\.\s*\d{4})', date_text)
|
||||
|
||||
if date_match:
|
||||
clean_date = date_match.group(1).replace(' ', '')
|
||||
|
||||
# Determine if this is a journey form or refueling form
|
||||
form_data = self._extract_form_data(form)
|
||||
is_refueling_form = any(btn.get("name") == "f_ulozitkm" for btn in form_data["buttons"])
|
||||
|
||||
# Match with DataFrame
|
||||
matching_rows = df[df["Datum"] == clean_date]
|
||||
if len(matching_rows) > 0:
|
||||
# Row exists in our data - update it
|
||||
row_data = matching_rows.iloc[0]
|
||||
update = self._prepare_update(form_data, row_data, update_mode=True, is_refueling=is_refueling_form)
|
||||
updates.append(update)
|
||||
form_type = "refueling" if is_refueling_form else "journey"
|
||||
logger.info(f"Matched {form_type} row {i} with date {clean_date}")
|
||||
else:
|
||||
# Row exists on website but not in our data - delete it
|
||||
delete = self._prepare_update(form_data, None, update_mode=False, is_refueling=is_refueling_form)
|
||||
deletes.append(delete)
|
||||
logger.info(f"Will delete row {i} with date {clean_date}")
|
||||
else:
|
||||
logger.debug(f"Skipping row {i} (no date pattern)")
|
||||
|
||||
if dry_run:
|
||||
logger.info("DRY RUN MODE - No data will be submitted")
|
||||
return {
|
||||
"dry_run": True,
|
||||
"month": month,
|
||||
"updates_prepared": len(updates),
|
||||
"deletes_prepared": len(deletes),
|
||||
"updates": updates[:5], # Show first 5 as sample
|
||||
"deletes": deletes[:5]
|
||||
}
|
||||
|
||||
# Actually submit the data
|
||||
results = {
|
||||
"month": month,
|
||||
"updates_total": len(updates),
|
||||
"deletes_total": len(deletes),
|
||||
"updates_successful": 0,
|
||||
"updates_failed": 0,
|
||||
"deletes_successful": 0,
|
||||
"deletes_failed": 0,
|
||||
"errors": []
|
||||
}
|
||||
|
||||
# First, submit all updates
|
||||
for i, update in enumerate(updates):
|
||||
try:
|
||||
logger.info(f"Updating row {i+1}/{len(updates)}")
|
||||
self._submit_form(update)
|
||||
results["updates_successful"] += 1
|
||||
time.sleep(0.5) # Rate limiting
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to update row {i+1}: {e}")
|
||||
results["updates_failed"] += 1
|
||||
results["errors"].append({"type": "update", "row": i+1, "error": str(e)})
|
||||
|
||||
# Then, submit all deletes
|
||||
for i, delete in enumerate(deletes):
|
||||
try:
|
||||
logger.info(f"Deleting row {i+1}/{len(deletes)}")
|
||||
self._submit_form(delete)
|
||||
results["deletes_successful"] += 1
|
||||
time.sleep(0.5) # Rate limiting
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete row {i+1}: {e}")
|
||||
results["deletes_failed"] += 1
|
||||
results["errors"].append({"type": "delete", "row": i+1, "error": str(e)})
|
||||
|
||||
return results
|
||||
|
||||
def _extract_form_data(self, form) -> Dict[str, Any]:
|
||||
"""Extract all form fields and their current values"""
|
||||
form_data = {
|
||||
"action": form.get('action', ''),
|
||||
"method": form.get('method', 'post'),
|
||||
"fields": {},
|
||||
"buttons": []
|
||||
}
|
||||
|
||||
# Get all input fields
|
||||
for input_field in form.find_all('input'):
|
||||
name = input_field.get('name', '')
|
||||
value = input_field.get('value', '')
|
||||
field_type = input_field.get('type', 'text')
|
||||
|
||||
if name:
|
||||
form_data["fields"][name] = {
|
||||
"value": value,
|
||||
"type": field_type
|
||||
}
|
||||
|
||||
# Track input buttons
|
||||
if field_type in ['submit', 'button']:
|
||||
form_data["buttons"].append({
|
||||
"name": name,
|
||||
"value": value,
|
||||
"type": field_type
|
||||
})
|
||||
|
||||
# CRITICAL: Also get <button> elements (not just <input type="submit">)
|
||||
for button in form.find_all('button'):
|
||||
name = button.get('name', '')
|
||||
value = button.get('value', button.get_text(strip=True))
|
||||
btn_type = button.get('type', 'submit')
|
||||
|
||||
if name:
|
||||
form_data["buttons"].append({
|
||||
"name": name,
|
||||
"value": value,
|
||||
"type": btn_type
|
||||
})
|
||||
|
||||
return form_data
|
||||
|
||||
def _prepare_update(self, form_data: Dict, row_data: pd.Series, update_mode: bool = True, is_refueling: bool = False) -> Dict[str, Any]:
|
||||
"""Prepare form data with updated values from DataFrame or for deletion
|
||||
|
||||
Args:
|
||||
form_data: Extracted form data
|
||||
row_data: DataFrame row with journey data (None for delete)
|
||||
update_mode: True to update row, False to delete row
|
||||
is_refueling: True if this is a refueling form, False if journey form
|
||||
"""
|
||||
update = {
|
||||
"action": form_data["action"],
|
||||
"method": form_data["method"],
|
||||
"data": {},
|
||||
"buttons": form_data.get("buttons", [])
|
||||
}
|
||||
|
||||
# Copy all existing fields
|
||||
for field_name, field_info in form_data["fields"].items():
|
||||
update["data"][field_name] = field_info["value"]
|
||||
|
||||
if not update_mode:
|
||||
# Delete mode - find and add "Smazat" (Delete) button
|
||||
for button in update["buttons"]:
|
||||
button_value = button.get("value", "")
|
||||
if "Smazat" in button_value or "smazat" in button.get("name", "").lower():
|
||||
if button.get("name"):
|
||||
update["data"][button["name"]] = button["value"]
|
||||
logger.info(f"Adding DELETE button: {button['name']}={button['value']}")
|
||||
break
|
||||
return update
|
||||
|
||||
# Update mode - handle refueling forms vs journey forms differently
|
||||
if is_refueling:
|
||||
# Refueling form - fill the km value from Tankováno při column
|
||||
# We need to determine if this is refueling 1 or 2 for this date
|
||||
# The form should have an f_km field that needs to be filled
|
||||
|
||||
# Check if this date has refueling data in the DataFrame
|
||||
if "Tankováno při 1 [km]" in row_data and pd.notna(row_data["Tankováno při 1 [km]"]):
|
||||
if "f_km" in update["data"]:
|
||||
# Check if this is the first or second refueling form
|
||||
current_km = update["data"].get("f_km", "0")
|
||||
refuel_1_km = int(row_data["Tankováno při 1 [km]"])
|
||||
|
||||
# If f_km is 0 or empty, fill with refuel 1
|
||||
if current_km == "0" or current_km == "":
|
||||
update["data"]["f_km"] = str(refuel_1_km)
|
||||
logger.info(f"Filling refuel km: f_km={refuel_1_km}")
|
||||
# Otherwise check if there's a second refueling
|
||||
elif "Tankováno při 2 [km]" in row_data and pd.notna(row_data["Tankováno při 2 [km]"]):
|
||||
refuel_2_km = int(row_data["Tankováno při 2 [km]"])
|
||||
if int(current_km) != refuel_1_km:
|
||||
# This might be the second refueling form
|
||||
update["data"]["f_km"] = str(refuel_2_km)
|
||||
logger.info(f"Filling refuel 2 km: f_km={refuel_2_km}")
|
||||
else:
|
||||
# Journey form - fill with data from DataFrame
|
||||
# ONLY update f_ujeto (distance traveled)
|
||||
# Let kj.colsys.cz calculate f_cil_km (end km) automatically
|
||||
if "Ujeto [km]" in row_data and pd.notna(row_data["Ujeto [km]"]):
|
||||
if "f_ujeto" in update["data"]:
|
||||
update["data"]["f_ujeto"] = str(int(row_data["Ujeto [km]"]))
|
||||
|
||||
# Add button click - look for "Uložit km" or "Přepočítat" buttons
|
||||
# Exclude "Uzavřít měsíc" button
|
||||
button_added = False
|
||||
for button in update["buttons"]:
|
||||
button_value = button.get("value", "")
|
||||
button_name = button.get("name", "")
|
||||
if "Uložit" in button_value or "Přepočítat" in button_value or "ulozit" in button_name.lower():
|
||||
# Include the button in the POST data to trigger its action
|
||||
if button_name:
|
||||
update["data"][button_name] = button_value
|
||||
logger.info(f"Adding button to POST: {button_name}={button_value}")
|
||||
button_added = True
|
||||
break
|
||||
|
||||
if not button_added:
|
||||
logger.warning(f"No save button found! Available buttons: {update['buttons']}")
|
||||
|
||||
return update
|
||||
|
||||
def _submit_form(self, update: Dict[str, Any]):
|
||||
"""Submit a form update"""
|
||||
url = update["action"] if update["action"].startswith('http') else f"https://kj.colsys.cz/{update['action']}"
|
||||
|
||||
response = self.session.post(
|
||||
url,
|
||||
data=update["data"],
|
||||
allow_redirects=False
|
||||
)
|
||||
|
||||
response.raise_for_status()
|
||||
logger.info(f"Form submitted successfully: {response.status_code}")
|
||||
3
backend/models/__init__.py
Normal file
3
backend/models/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from .journey import Journey, JourneyEntry, RefuelingEntry
|
||||
|
||||
__all__ = ["Journey", "JourneyEntry", "RefuelingEntry"]
|
||||
37
backend/models/journey.py
Normal file
37
backend/models/journey.py
Normal file
@@ -0,0 +1,37 @@
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class JourneyEntry(BaseModel):
|
||||
date: str
|
||||
start_km: Optional[int] = None
|
||||
end_km: Optional[int] = None
|
||||
distance_km: Optional[int] = None
|
||||
is_sick_day: bool = False
|
||||
is_vacation: bool = False
|
||||
|
||||
|
||||
class RefuelingEntry(BaseModel):
|
||||
date: str
|
||||
amount_liters: float
|
||||
km_at_refuel: Optional[int] = None
|
||||
|
||||
|
||||
class Journey(BaseModel):
|
||||
month: str = Field(..., pattern=r"^\d{4}-\d{2}$")
|
||||
start_km: int = Field(..., gt=0)
|
||||
end_km: int = Field(..., gt=0)
|
||||
entries: list[JourneyEntry] = []
|
||||
refueling_entries: list[RefuelingEntry] = []
|
||||
|
||||
class Config:
|
||||
json_schema_extra = {
|
||||
"example": {
|
||||
"month": "2024-03",
|
||||
"start_km": 12000,
|
||||
"end_km": 13500,
|
||||
"entries": [],
|
||||
"refueling_entries": []
|
||||
}
|
||||
}
|
||||
15
backend/requirements.txt
Normal file
15
backend/requirements.txt
Normal file
@@ -0,0 +1,15 @@
|
||||
fastapi==0.109.0
|
||||
uvicorn[standard]==0.27.0
|
||||
pydantic==2.5.3
|
||||
pydantic-settings==2.1.0
|
||||
sqlalchemy==2.0.25
|
||||
alembic==1.13.1
|
||||
pandas==2.2.0
|
||||
numpy==1.26.3
|
||||
requests==2.31.0
|
||||
beautifulsoup4==4.12.3
|
||||
playwright==1.41.0
|
||||
python-dotenv==1.0.0
|
||||
python-multipart==0.0.6
|
||||
openpyxl==3.1.2
|
||||
python-dateutil==2.8.2
|
||||
4
backend/scrapers/__init__.py
Normal file
4
backend/scrapers/__init__.py
Normal file
@@ -0,0 +1,4 @@
|
||||
from .attendance_scraper import AttendanceScraper
|
||||
from .journeybook_scraper import JourneybookScraper
|
||||
|
||||
__all__ = ["AttendanceScraper", "JourneybookScraper"]
|
||||
45
backend/scrapers/attendance_scraper.py
Normal file
45
backend/scrapers/attendance_scraper.py
Normal file
@@ -0,0 +1,45 @@
|
||||
import re
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from typing import List
|
||||
|
||||
|
||||
class AttendanceScraper:
|
||||
def __init__(self, username: str, password: str):
|
||||
self.username = username
|
||||
self.password = password
|
||||
self.base_url = "https://agenda.colsys.cz/dochazka/index.php"
|
||||
|
||||
@staticmethod
|
||||
def normalize_date(date_str: str) -> str:
|
||||
return re.sub(r'\s+', '', date_str)
|
||||
|
||||
def scrape_month(self, month: str) -> List[str]:
|
||||
"""
|
||||
Scrape attendance data for a given month.
|
||||
Returns list of dates with sick days, vacation, or unpaid leave.
|
||||
"""
|
||||
url = f"{self.base_url}?kdy={month}-01"
|
||||
|
||||
response = requests.get(url, auth=(self.username, self.password), verify=False)
|
||||
response.raise_for_status()
|
||||
|
||||
soup = BeautifulSoup(response.content, 'html.parser')
|
||||
table = soup.find('table', class_='restrikce')
|
||||
|
||||
if not table:
|
||||
raise ValueError("Attendance table not found")
|
||||
|
||||
attendance_dates = []
|
||||
for row in table.find_all('tr')[1:]:
|
||||
cells = row.find_all('td')
|
||||
if len(cells) >= 3:
|
||||
date = cells[0].text.strip()
|
||||
presence = cells[2].text.strip()
|
||||
|
||||
if ("sick day" in presence.lower() or
|
||||
"dovolená" in presence.lower() or
|
||||
"neplacené volno" in presence.lower()):
|
||||
attendance_dates.append(self.normalize_date(date))
|
||||
|
||||
return attendance_dates
|
||||
73
backend/scrapers/journeybook_scraper.py
Normal file
73
backend/scrapers/journeybook_scraper.py
Normal file
@@ -0,0 +1,73 @@
|
||||
import re
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import pandas as pd
|
||||
from typing import Dict, Any
|
||||
|
||||
|
||||
class JourneybookScraper:
|
||||
def __init__(self, username: str, password: str, vehicle_registration: str = "4SH1148"):
|
||||
self.username = username
|
||||
self.password = password
|
||||
self.vehicle_registration = vehicle_registration
|
||||
self.base_url = "https://kj.colsys.cz/prehled_mesic.php"
|
||||
|
||||
@staticmethod
|
||||
def normalize_date(date_str: str) -> str:
|
||||
return re.sub(r'\s+', '', date_str)
|
||||
|
||||
def scrape_month(self, month: str) -> pd.DataFrame:
|
||||
"""
|
||||
Scrape journeybook data for a given month.
|
||||
Returns DataFrame with columns: Datum, Počáteční stav, Koncový stav, Ujeto [km], Natankováno [l|kg]
|
||||
"""
|
||||
url = f"{self.base_url}?rz={self.vehicle_registration}&den={month}-01"
|
||||
|
||||
response = requests.get(url, auth=(self.username, self.password), verify=False)
|
||||
response.raise_for_status()
|
||||
|
||||
soup = BeautifulSoup(response.content, 'html.parser')
|
||||
table = soup.find('table', class_='table table-striped table-bordered table-condensed table-sm')
|
||||
|
||||
if not table:
|
||||
raise ValueError("Journeybook table not found")
|
||||
|
||||
headers = [th.text.strip() for th in table.find('thead').find_all('th')]
|
||||
headers = [header.replace(" ", "") for header in headers]
|
||||
|
||||
columns_to_keep = ["Datum", "Počátečnístav", "Koncovýstav", "Ujeto[km]"]
|
||||
new_headers = ["Datum", "Počáteční stav", "Koncový stav", "Ujeto [km]", "Natankováno [l|kg]"]
|
||||
|
||||
for col in columns_to_keep:
|
||||
if col not in headers:
|
||||
raise ValueError(f"Column '{col}' not found. Headers: {headers}")
|
||||
|
||||
rows = []
|
||||
for row in table.find('tbody').find_all('tr'):
|
||||
if "Tankováno" in row.text:
|
||||
refuel_text = row.text.strip()
|
||||
amount_match = re.search(r'natankováno\s(\d+\.\d+)\s\[l\|kg\]', refuel_text)
|
||||
amount = amount_match.group(1) if amount_match else ""
|
||||
rows.append([""] * len(columns_to_keep) + [amount])
|
||||
elif row.find('form'):
|
||||
cells = []
|
||||
for cell in row.find_all('td'):
|
||||
input_field = cell.find('input')
|
||||
if input_field:
|
||||
cells.append(input_field.get('value', ''))
|
||||
else:
|
||||
if headers[len(cells)] == "Datum":
|
||||
date_match = re.search(r'\d{1,2}\.\s\d{1,2}\.\s\d{4}', cell.text.strip())
|
||||
if date_match:
|
||||
cells.append(self.normalize_date(date_match.group()))
|
||||
else:
|
||||
cells.append(cell.text.strip())
|
||||
else:
|
||||
cells.append(cell.text.strip())
|
||||
|
||||
filtered_cells = [cells[headers.index(col)] for col in columns_to_keep]
|
||||
filtered_cells.append("")
|
||||
rows.append(filtered_cells)
|
||||
|
||||
df = pd.DataFrame(rows, columns=new_headers)
|
||||
return df
|
||||
Reference in New Issue
Block a user