Files
kniha_jizd_web/backend/scrapers/attendance_scraper.py
Docker Config Backup 3b5d9fd940 Initial commit - Journey book (kniha jízd) automation system
Features:
- FastAPI backend for scraping attendance and journey book data
- Deterministic kilometer distribution with random variance
- Refueling form filling with km values
- Next.js frontend with date range selector
- Docker deployment setup

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-10 15:41:11 +02:00

46 lines
1.5 KiB
Python

import re
import requests
from bs4 import BeautifulSoup
from typing import List
class AttendanceScraper:
def __init__(self, username: str, password: str):
self.username = username
self.password = password
self.base_url = "https://agenda.colsys.cz/dochazka/index.php"
@staticmethod
def normalize_date(date_str: str) -> str:
return re.sub(r'\s+', '', date_str)
def scrape_month(self, month: str) -> List[str]:
"""
Scrape attendance data for a given month.
Returns list of dates with sick days, vacation, or unpaid leave.
"""
url = f"{self.base_url}?kdy={month}-01"
response = requests.get(url, auth=(self.username, self.password), verify=False)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser')
table = soup.find('table', class_='restrikce')
if not table:
raise ValueError("Attendance table not found")
attendance_dates = []
for row in table.find_all('tr')[1:]:
cells = row.find_all('td')
if len(cells) >= 3:
date = cells[0].text.strip()
presence = cells[2].text.strip()
if ("sick day" in presence.lower() or
"dovolená" in presence.lower() or
"neplacené volno" in presence.lower()):
attendance_dates.append(self.normalize_date(date))
return attendance_dates