Initial portal commit: landing + 9 AI-powered apps

Apps:
- dwg-rooms: extract room numbers from DWG/DXF
- dwg-counting: count symbols in PDF drawings (OpenCV template matching)
- contract-check: review PDF contracts against a checklist (Claude vision + Tesseract OCR fallback)
- email-drafter: bullet notes → polished Czech/English business emails
- invoice-extractor: PDF/image invoice → structured data → Excel
- translator: Czech-first translator across 19 languages with tone control
- vv-check: find inconsistent unit prices across VV sheets in one workbook
- vv-compare: diff original vs new VV files (changes / added / removed)
- feature-request: portal users submit ideas + sample files

Infrastructure:
- LiteLLM gateway with per-app virtual keys + budgets
- Langfuse observability
- Geist font, shared theme, cross-subdomain back link + theme sync via cookie/URL
- Caddy reverse proxy on *.klas.chat
This commit is contained in:
Ondřej Glaser
2026-05-13 15:25:04 +02:00
commit 48cef99257
139 changed files with 20171 additions and 0 deletions

View File

@@ -0,0 +1,257 @@
"""Add color-coded highlights and a Czech-correct summary page to a PDF."""
import logging
from pathlib import Path
import fitz # PyMuPDF
logger = logging.getLogger(__name__)
# RGB 0-1 for PyMuPDF
COLORS = {
"ok": (0.69, 0.91, 0.69), # green
"warning": (1.00, 0.90, 0.45), # yellow
"problem": (1.00, 0.65, 0.65), # red
"missing": (0.85, 0.85, 0.85), # grey
}
# DejaVu Sans is shipped via fonts-dejavu-core; supports full Czech glyph set.
FONT_PATH_SANS = "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
FONT_PATH_BOLD = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"
def annotate(input_pdf: Path, output_pdf: Path, items: list[dict],
overall_summary: str = "", risk_level: str = "",
skip_highlights: bool = False,
contract_name: str = "") -> Path:
"""Open input_pdf, highlight excerpts, prepend a summary page."""
doc = fitz.open(str(input_pdf))
highlighted_count = 0
not_found_count = 0
items_to_annotate = [] if skip_highlights else items
if skip_highlights:
logger.info("Skipping per-excerpt highlights (OCR'd PDF — no text layer)")
for item in items_to_annotate:
color = COLORS.get(item.get("status", "warning"), COLORS["warning"])
title = item.get("title") or item.get("label") or item.get("id", "")
for ex in item.get("excerpts") or []:
quote = (ex.get("text") or "").strip()
comment = (ex.get("comment") or "").strip()
if not quote:
continue
found_any = False
for page in doc:
rects = page.search_for(quote, quads=False)
if not rects and len(quote) > 20:
rects = page.search_for(quote[:20], quads=False)
if not rects:
continue
for rect in rects:
annot = page.add_highlight_annot(rect)
annot.set_colors(stroke=color)
annot.set_info(
title=title,
content=f"{title}\n\n{comment}" if comment else title,
)
annot.update()
found_any = True
if found_any:
highlighted_count += 1
else:
not_found_count += 1
logger.info("Quote not found in PDF: %r", quote[:60])
# Build & prepend summary
summary = _build_summary_pdf(doc, items, overall_summary, risk_level,
contract_name)
if summary:
doc.insert_pdf(summary, start_at=0)
summary.close()
# Set PDF metadata title for nice display in viewers
if contract_name:
meta = doc.metadata or {}
meta["title"] = f"Kontrola: {contract_name}"
doc.set_metadata(meta)
doc.save(str(output_pdf), garbage=4, deflate=True)
doc.close()
logger.info("Annotated PDF: highlighted=%d not_found=%d", highlighted_count, not_found_count)
return output_pdf
def _build_summary_pdf(orig_doc, items: list[dict],
overall_summary: str, risk_level: str,
contract_name: str):
"""Build a 1-N page summary PDF using a Czech-supporting font."""
if not orig_doc.page_count:
return None
src_rect = orig_doc[0].rect
width = max(float(src_rect.width), 595.0) # ensure at least A4
height = max(float(src_rect.height), 842.0)
new = fitz.open()
page = new.new_page(width=width, height=height)
_register_fonts(page)
margin_x = 50.0
y = 50.0
title_size = 18
body_size = 10.5
line_h = body_size * 1.45
# Header line: filename
if contract_name:
_draw_text(page, contract_name, margin_x, y, font="sans",
size=11, color=(0.40, 0.45, 0.55))
y += 18
# Title
_draw_text(page, "Kontrola smluvních podmínek", margin_x, y,
font="bold", size=title_size, color=(0.06, 0.10, 0.20))
y += title_size * 1.6
# Risk badge line
if risk_level:
labels = {"low": "NÍZKÉ", "medium": "STŘEDNÍ", "high": "VYSOKÉ"}
colors = {
"low": (0.20, 0.65, 0.32),
"medium": (0.85, 0.55, 0.10),
"high": (0.80, 0.20, 0.20),
}
_draw_text(page, f"Celková míra rizika: {labels.get(risk_level, risk_level.upper())}",
margin_x, y, font="bold", size=12,
color=colors.get(risk_level, (0.4, 0.4, 0.4)))
y += 22
# Overall summary
if overall_summary:
y = _wrap_text(page, overall_summary, margin_x, y,
width - 2 * margin_x, body_size, font="sans")
y += 12
y += 6
page.draw_line((margin_x, y), (width - margin_x, y),
color=(0.85, 0.85, 0.85))
y += 16
_draw_text(page, "Položky kontroly", margin_x, y,
font="bold", size=12, color=(0.15, 0.20, 0.30))
y += 18
status_labels = {"ok": "OK", "warning": "POZOR",
"problem": "PROBLÉM", "missing": "CHYBÍ"}
for item in items:
# Need new page?
if y > height - 80:
page = new.new_page(width=width, height=height)
_register_fonts(page)
y = 50
status = item.get("status", "")
color = COLORS.get(status, (0.6, 0.6, 0.6))
label = status_labels.get(status, status.upper())
title = item.get("title") or item.get("label") or item.get("id", "")
# Colored bullet square
page.draw_rect(
fitz.Rect(margin_x, y, margin_x + 10, y + 10),
color=color, fill=color,
)
_draw_text(page, f"[{label}] {title}",
margin_x + 18, y, font="bold", size=11,
color=(0.06, 0.10, 0.20))
y += line_h + 4
summary = item.get("summary", "")
if summary:
y = _wrap_text(page, summary, margin_x + 18, y,
width - 2 * margin_x - 18, body_size,
font="sans", color=(0.30, 0.35, 0.45))
# List page references for each excerpt
excerpts = item.get("excerpts") or []
if excerpts:
for ex in excerpts:
pg = ex.get("page")
text = (ex.get("text") or "").strip()
if not text:
continue
pg_str = f"str. {pg}: " if pg else ""
snippet = text if len(text) <= 90 else text[:87] + ""
y = _wrap_text(page, f"{pg_str}{snippet}\"",
margin_x + 18, y,
width - 2 * margin_x - 18, body_size - 0.5,
font="sans", color=(0.25, 0.30, 0.40))
cmt = (ex.get("comment") or "").strip()
if cmt:
y = _wrap_text(page, f"{cmt}",
margin_x + 18, y,
width - 2 * margin_x - 18, body_size - 0.5,
font="sans", color=(0.45, 0.50, 0.60))
y += 12
return new
# ── font helpers ─────────────────────────────────────────
def _register_fonts(page):
"""Insert DejaVu Sans (regular + bold) on the page if available."""
try:
page.insert_font(fontname="sans", fontfile=FONT_PATH_SANS)
except Exception as e:
logger.warning("Could not register DejaVuSans: %s", e)
try:
page.insert_font(fontname="bold", fontfile=FONT_PATH_BOLD)
except Exception:
# Fall back to regular for bold
try:
page.insert_font(fontname="bold", fontfile=FONT_PATH_SANS)
except Exception:
pass
def _draw_text(page, text: str, x: float, y: float,
font: str = "sans", size: float = 10.5,
color: tuple = (0.06, 0.10, 0.20)):
"""Render a single line at baseline y+size."""
try:
page.insert_text((x, y + size), text,
fontname=font, fontsize=size, color=color)
except Exception:
# Fallback to PyMuPDF built-in (may mangle diacritics but won't crash)
page.insert_text((x, y + size), text,
fontsize=size, color=color)
def _wrap_text(page, text: str, x: float, y: float, max_width: float,
font_size: float, font: str = "sans",
color: tuple = (0.06, 0.10, 0.20)) -> float:
"""Word-wrap `text` and return the new y position."""
line_h = font_size * 1.45
# PyMuPDF has Page.get_text_length() for width calculation
def measure(s: str) -> float:
try:
return fitz.get_text_length(s, fontname=font, fontsize=font_size)
except Exception:
return len(s) * font_size * 0.50
words = text.split()
if not words:
return y
line = ""
for word in words:
candidate = (line + " " + word).strip()
if measure(candidate) > max_width and line:
_draw_text(page, line, x, y, font=font,
size=font_size, color=color)
y += line_h
line = word
else:
line = candidate
if line:
_draw_text(page, line, x, y, font=font, size=font_size, color=color)
y += line_h
return y