"""LLM fallback: classify unmatched DXF text entities as rooms via LiteLLM.""" import json import logging import os from openai import AsyncOpenAI logger = logging.getLogger(__name__) _client: AsyncOpenAI | None = None def _get_client() -> AsyncOpenAI: global _client if _client is None: _client = AsyncOpenAI( base_url=os.getenv("LITELLM_BASE_URL", "http://host.docker.internal:4000"), api_key=os.getenv("LITELLM_API_KEY", "sk-dummy"), ) return _client SYSTEM = """You are a specialist extracting room data from Czech architectural DXF floor plans. You receive text entities (text, x, y) that were not matched by rule-based parsing. Identify pairs of room number + Czech room name/description. Czech room numbers: 4-6 digit codes, sometimes prefixed with "č.m.". Czech room names: e.g. "Chodba", "Serverovna", "Sklep", "WC", "Kancelář", etc. Return ONLY a JSON array of objects: {"room": "XXXXX", "description": "Czech name", "confidence": 0.0-1.0} Skip: measurements (m2, m²), material names (beton, dlažba), dimensions, unrelated text. Only include entries with confidence > 0.5.""" async def enhance_with_llm(unmatched: list[dict]) -> list[dict]: api_key = os.getenv("LITELLM_API_KEY", "") if not api_key or api_key == "sk-dummy": logger.info("LITELLM_API_KEY not set — skipping LLM enhancement") return [] sample = unmatched[:200] text_block = "\n".join( f'- "{t["text"]}" x={t["x"]:.0f} y={t["y"]:.0f}' for t in sample ) model = os.getenv("LLM_MODEL", "gpt-4o-mini") try: resp = await _get_client().chat.completions.create( model=model, messages=[ {"role": "system", "content": SYSTEM}, {"role": "user", "content": f"Text entities:\n{text_block}"}, ], temperature=0.1, max_tokens=2000, ) raw = resp.choices[0].message.content or "[]" # Strip markdown code fences if present raw = raw.strip().removeprefix("```json").removeprefix("```").removesuffix("```").strip() data = json.loads(raw) if isinstance(data, dict): data = data.get("rooms", data.get("result", [])) return [ { "room": str(r["room"]), "description": r.get("description", ""), "x": 0.0, "y": 0.0, "source": "llm", "confidence": float(r.get("confidence", 0.7)), } for r in data if isinstance(r, dict) and r.get("room") ] except Exception as exc: logger.error("LLM enhancement failed: %s", exc) return []