Initial portal commit: landing + 9 AI-powered apps

Apps: - dwg-rooms: extract room numbers from DWG/DXF - dwg-counting: count symbols in PDF drawings (OpenCV template matching) - contract-check: review PDF contracts against a checklist (Claude vision + Tesseract OCR fallback) - email-drafter: bullet notes → polished Czech/English business emails - invoice-extractor: PDF/image invoice → structured data → Excel - translator: Czech-first translator across 19 languages with tone control - vv-check: find inconsistent unit prices across VV sheets in one workbook - vv-compare: diff original vs new VV files (changes / added / removed) - feature-request: portal users submit ideas + sample files Infrastructure: - LiteLLM gateway with per-app virtual keys + budgets - Langfuse observability - Geist font, shared theme, cross-subdomain back link + theme sync via cookie/URL - Caddy reverse proxy on *.klas.chat
2026-05-13 15:25:04 +02:00
commit 48cef99257
139 changed files with 20171 additions and 0 deletions
--- a/contract-check/pdf_annotator.py
+++ b/contract-check/pdf_annotator.py
@@ -0,0 +1,257 @@
+"""Add color-coded highlights and a Czech-correct summary page to a PDF."""
+import logging
+from pathlib import Path
+
+import fitz  # PyMuPDF
+
+logger = logging.getLogger(__name__)
+
+# RGB 0-1 for PyMuPDF
+COLORS = {
+    "ok":      (0.69, 0.91, 0.69),  # green
+    "warning": (1.00, 0.90, 0.45),  # yellow
+    "problem": (1.00, 0.65, 0.65),  # red
+    "missing": (0.85, 0.85, 0.85),  # grey
+}
+
+# DejaVu Sans is shipped via fonts-dejavu-core; supports full Czech glyph set.
+FONT_PATH_SANS = "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
+FONT_PATH_BOLD = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"
+
+
+def annotate(input_pdf: Path, output_pdf: Path, items: list[dict],
+             overall_summary: str = "", risk_level: str = "",
+             skip_highlights: bool = False,
+             contract_name: str = "") -> Path:
+    """Open input_pdf, highlight excerpts, prepend a summary page."""
+    doc = fitz.open(str(input_pdf))
+
+    highlighted_count = 0
+    not_found_count = 0
+    items_to_annotate = [] if skip_highlights else items
+    if skip_highlights:
+        logger.info("Skipping per-excerpt highlights (OCR'd PDF — no text layer)")
+    for item in items_to_annotate:
+        color = COLORS.get(item.get("status", "warning"), COLORS["warning"])
+        title = item.get("title") or item.get("label") or item.get("id", "")
+        for ex in item.get("excerpts") or []:
+            quote = (ex.get("text") or "").strip()
+            comment = (ex.get("comment") or "").strip()
+            if not quote:
+                continue
+            found_any = False
+            for page in doc:
+                rects = page.search_for(quote, quads=False)
+                if not rects and len(quote) > 20:
+                    rects = page.search_for(quote[:20], quads=False)
+                if not rects:
+                    continue
+                for rect in rects:
+                    annot = page.add_highlight_annot(rect)
+                    annot.set_colors(stroke=color)
+                    annot.set_info(
+                        title=title,
+                        content=f"{title}\n\n{comment}" if comment else title,
+                    )
+                    annot.update()
+                found_any = True
+            if found_any:
+                highlighted_count += 1
+            else:
+                not_found_count += 1
+                logger.info("Quote not found in PDF: %r", quote[:60])
+
+    # Build & prepend summary
+    summary = _build_summary_pdf(doc, items, overall_summary, risk_level,
+                                 contract_name)
+    if summary:
+        doc.insert_pdf(summary, start_at=0)
+        summary.close()
+
+    # Set PDF metadata title for nice display in viewers
+    if contract_name:
+        meta = doc.metadata or {}
+        meta["title"] = f"Kontrola: {contract_name}"
+        doc.set_metadata(meta)
+
+    doc.save(str(output_pdf), garbage=4, deflate=True)
+    doc.close()
+    logger.info("Annotated PDF: highlighted=%d not_found=%d", highlighted_count, not_found_count)
+    return output_pdf
+
+
+def _build_summary_pdf(orig_doc, items: list[dict],
+                       overall_summary: str, risk_level: str,
+                       contract_name: str):
+    """Build a 1-N page summary PDF using a Czech-supporting font."""
+    if not orig_doc.page_count:
+        return None
+    src_rect = orig_doc[0].rect
+    width = max(float(src_rect.width), 595.0)  # ensure at least A4
+    height = max(float(src_rect.height), 842.0)
+
+    new = fitz.open()
+    page = new.new_page(width=width, height=height)
+    _register_fonts(page)
+
+    margin_x = 50.0
+    y = 50.0
+    title_size = 18
+    body_size = 10.5
+    line_h = body_size * 1.45
+
+    # Header line: filename
+    if contract_name:
+        _draw_text(page, contract_name, margin_x, y, font="sans",
+                   size=11, color=(0.40, 0.45, 0.55))
+        y += 18
+
+    # Title
+    _draw_text(page, "Kontrola smluvních podmínek", margin_x, y,
+               font="bold", size=title_size, color=(0.06, 0.10, 0.20))
+    y += title_size * 1.6
+
+    # Risk badge line
+    if risk_level:
+        labels = {"low": "NÍZKÉ", "medium": "STŘEDNÍ", "high": "VYSOKÉ"}
+        colors = {
+            "low": (0.20, 0.65, 0.32),
+            "medium": (0.85, 0.55, 0.10),
+            "high": (0.80, 0.20, 0.20),
+        }
+        _draw_text(page, f"Celková míra rizika: {labels.get(risk_level, risk_level.upper())}",
+                   margin_x, y, font="bold", size=12,
+                   color=colors.get(risk_level, (0.4, 0.4, 0.4)))
+        y += 22
+
+    # Overall summary
+    if overall_summary:
+        y = _wrap_text(page, overall_summary, margin_x, y,
+                       width - 2 * margin_x, body_size, font="sans")
+        y += 12
+
+    y += 6
+    page.draw_line((margin_x, y), (width - margin_x, y),
+                   color=(0.85, 0.85, 0.85))
+    y += 16
+
+    _draw_text(page, "Položky kontroly", margin_x, y,
+               font="bold", size=12, color=(0.15, 0.20, 0.30))
+    y += 18
+
+    status_labels = {"ok": "OK", "warning": "POZOR",
+                     "problem": "PROBLÉM", "missing": "CHYBÍ"}
+    for item in items:
+        # Need new page?
+        if y > height - 80:
+            page = new.new_page(width=width, height=height)
+            _register_fonts(page)
+            y = 50
+
+        status = item.get("status", "")
+        color = COLORS.get(status, (0.6, 0.6, 0.6))
+        label = status_labels.get(status, status.upper())
+        title = item.get("title") or item.get("label") or item.get("id", "")
+
+        # Colored bullet square
+        page.draw_rect(
+            fitz.Rect(margin_x, y, margin_x + 10, y + 10),
+            color=color, fill=color,
+        )
+        _draw_text(page, f"[{label}]  {title}",
+                   margin_x + 18, y, font="bold", size=11,
+                   color=(0.06, 0.10, 0.20))
+        y += line_h + 4
+
+        summary = item.get("summary", "")
+        if summary:
+            y = _wrap_text(page, summary, margin_x + 18, y,
+                           width - 2 * margin_x - 18, body_size,
+                           font="sans", color=(0.30, 0.35, 0.45))
+
+        # List page references for each excerpt
+        excerpts = item.get("excerpts") or []
+        if excerpts:
+            for ex in excerpts:
+                pg = ex.get("page")
+                text = (ex.get("text") or "").strip()
+                if not text:
+                    continue
+                pg_str = f"str. {pg}: " if pg else ""
+                snippet = text if len(text) <= 90 else text[:87] + "…"
+                y = _wrap_text(page, f"• {pg_str}„{snippet}\"",
+                               margin_x + 18, y,
+                               width - 2 * margin_x - 18, body_size - 0.5,
+                               font="sans", color=(0.25, 0.30, 0.40))
+                cmt = (ex.get("comment") or "").strip()
+                if cmt:
+                    y = _wrap_text(page, f"   — {cmt}",
+                                   margin_x + 18, y,
+                                   width - 2 * margin_x - 18, body_size - 0.5,
+                                   font="sans", color=(0.45, 0.50, 0.60))
+
+        y += 12
+
+    return new
+
+
+# ── font helpers ─────────────────────────────────────────
+
+def _register_fonts(page):
+    """Insert DejaVu Sans (regular + bold) on the page if available."""
+    try:
+        page.insert_font(fontname="sans", fontfile=FONT_PATH_SANS)
+    except Exception as e:
+        logger.warning("Could not register DejaVuSans: %s", e)
+    try:
+        page.insert_font(fontname="bold", fontfile=FONT_PATH_BOLD)
+    except Exception:
+        # Fall back to regular for bold
+        try:
+            page.insert_font(fontname="bold", fontfile=FONT_PATH_SANS)
+        except Exception:
+            pass
+
+
+def _draw_text(page, text: str, x: float, y: float,
+               font: str = "sans", size: float = 10.5,
+               color: tuple = (0.06, 0.10, 0.20)):
+    """Render a single line at baseline y+size."""
+    try:
+        page.insert_text((x, y + size), text,
+                         fontname=font, fontsize=size, color=color)
+    except Exception:
+        # Fallback to PyMuPDF built-in (may mangle diacritics but won't crash)
+        page.insert_text((x, y + size), text,
+                         fontsize=size, color=color)
+
+
+def _wrap_text(page, text: str, x: float, y: float, max_width: float,
+               font_size: float, font: str = "sans",
+               color: tuple = (0.06, 0.10, 0.20)) -> float:
+    """Word-wrap `text` and return the new y position."""
+    line_h = font_size * 1.45
+    # PyMuPDF has Page.get_text_length() for width calculation
+    def measure(s: str) -> float:
+        try:
+            return fitz.get_text_length(s, fontname=font, fontsize=font_size)
+        except Exception:
+            return len(s) * font_size * 0.50
+
+    words = text.split()
+    if not words:
+        return y
+    line = ""
+    for word in words:
+        candidate = (line + " " + word).strip()
+        if measure(candidate) > max_width and line:
+            _draw_text(page, line, x, y, font=font,
+                       size=font_size, color=color)
+            y += line_h
+            line = word
+        else:
+            line = candidate
+    if line:
+        _draw_text(page, line, x, y, font=font, size=font_size, color=color)
+        y += line_h
+    return y