Files
AI_portal/translator/main.py
Ondřej Glaser 48cef99257 Initial portal commit: landing + 9 AI-powered apps
Apps:
- dwg-rooms: extract room numbers from DWG/DXF
- dwg-counting: count symbols in PDF drawings (OpenCV template matching)
- contract-check: review PDF contracts against a checklist (Claude vision + Tesseract OCR fallback)
- email-drafter: bullet notes → polished Czech/English business emails
- invoice-extractor: PDF/image invoice → structured data → Excel
- translator: Czech-first translator across 19 languages with tone control
- vv-check: find inconsistent unit prices across VV sheets in one workbook
- vv-compare: diff original vs new VV files (changes / added / removed)
- feature-request: portal users submit ideas + sample files

Infrastructure:
- LiteLLM gateway with per-app virtual keys + budgets
- Langfuse observability
- Geist font, shared theme, cross-subdomain back link + theme sync via cookie/URL
- Caddy reverse proxy on *.klas.chat
2026-05-13 15:25:04 +02:00

151 lines
5.4 KiB
Python

"""FastAPI: source text → translated text via Claude Sonnet 4.
Supports auto-detection of source language and a configurable tone.
"""
import json
import logging
import os
from typing import Literal
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse
from fastapi.staticfiles import StaticFiles
from openai import AsyncOpenAI
from pydantic import BaseModel, Field
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
app = FastAPI(title="Translator")
app.add_middleware(CORSMiddleware, allow_origins=["*"],
allow_methods=["*"], allow_headers=["*"])
MODEL = os.getenv("LLM_MODEL", "anthropic/claude-sonnet-4-20250514")
_client: AsyncOpenAI | None = None
def _get_client() -> AsyncOpenAI:
global _client
if _client is None:
_client = AsyncOpenAI(
base_url=os.getenv("LITELLM_BASE_URL", "http://host.docker.internal:4000/v1"),
api_key=os.getenv("LITELLM_API_KEY", "sk-dummy"),
)
return _client
# Display name (Czech) and English label for the LLM prompt.
LANGUAGES = [
{"code": "auto", "cs": "Automaticky rozpoznat", "en": "auto-detect"},
{"code": "cs", "cs": "Čeština", "en": "Czech"},
{"code": "en", "cs": "Angličtina", "en": "English"},
{"code": "sk", "cs": "Slovenština", "en": "Slovak"},
{"code": "de", "cs": "Němčina", "en": "German"},
{"code": "pl", "cs": "Polština", "en": "Polish"},
{"code": "uk", "cs": "Ukrajinština", "en": "Ukrainian"},
{"code": "ru", "cs": "Ruština", "en": "Russian"},
{"code": "fr", "cs": "Francouzština", "en": "French"},
{"code": "it", "cs": "Italština", "en": "Italian"},
{"code": "es", "cs": "Španělština", "en": "Spanish"},
{"code": "pt", "cs": "Portugalština", "en": "Portuguese"},
{"code": "nl", "cs": "Nizozemština", "en": "Dutch"},
{"code": "hu", "cs": "Maďarština", "en": "Hungarian"},
{"code": "ro", "cs": "Rumunština", "en": "Romanian"},
{"code": "bg", "cs": "Bulharština", "en": "Bulgarian"},
{"code": "tr", "cs": "Turečtina", "en": "Turkish"},
{"code": "zh", "cs": "Čínština", "en": "Chinese (Simplified)"},
{"code": "ja", "cs": "Japonština", "en": "Japanese"},
{"code": "ar", "cs": "Arabština", "en": "Arabic"},
]
LANG_BY_CODE = {l["code"]: l for l in LANGUAGES}
TONES = {
"formal": "formal, professional, polished business register",
"casual": "casual, friendly, conversational",
"technical": "technical, precise, preserving exact technical terms",
"marketing": "marketing copy — engaging, persuasive, brand-voice",
"legal": "legal / contractual — precise, neutral, preserving legal terms of art",
}
class TranslateRequest(BaseModel):
text: str = Field(..., min_length=1, max_length=20000)
source_lang: str = "auto"
target_lang: str = "en"
tone: Literal["formal", "casual", "technical", "marketing", "legal"] = "formal"
@app.get("/")
async def root():
return FileResponse("static/index.html")
@app.get("/api/languages")
async def languages():
return {"languages": LANGUAGES}
@app.post("/api/translate")
async def translate(req: TranslateRequest):
target = LANG_BY_CODE.get(req.target_lang)
if not target or target["code"] == "auto":
raise HTTPException(400, "Vyberte cílový jazyk")
source_label = (
"auto-detect the source language"
if req.source_lang == "auto"
else f"the source language is {LANG_BY_CODE.get(req.source_lang, {}).get('en', req.source_lang)}"
)
system = f"""You are a professional translator producing high-quality business translations.
Translate the user's input into **{target['en']}**.
{source_label.capitalize()}.
Tone: **{TONES[req.tone]}**.
Strict rules:
- Output ONLY the translated text. No quotes, no preamble, no explanation, no language tag.
- Preserve formatting: line breaks, lists, paragraphs, code blocks, URLs.
- Keep proper names, brand names, product codes, and acronyms unchanged unless they have a well-established translation in the target language.
- Numbers, dates, and currencies: convert format conventions to the target language (e.g. decimal comma vs dot, date format) but do NOT convert values.
- If the input is already in the target language, return it unchanged.
- Never add information that isn't in the source."""
try:
resp = await _get_client().chat.completions.create(
model=MODEL,
messages=[
{"role": "system", "content": system},
{"role": "user", "content": req.text},
],
temperature=0.2,
max_tokens=8000,
)
except Exception as exc:
logger.exception("LLM call failed")
raise HTTPException(500, f"Překlad selhal: {exc}")
translated = (resp.choices[0].message.content or "").strip()
# Strip accidental markdown code fences
if translated.startswith("```") and translated.endswith("```"):
translated = translated.removeprefix("```").removesuffix("```").strip()
return {
"translated": translated,
"source_lang": req.source_lang,
"target_lang": req.target_lang,
"tone": req.tone,
}
@app.get("/health")
async def health():
return {"status": "ok"}
app.mount("/static", StaticFiles(directory="static"), name="static")