🛡️ fix(orchestration): endurecer extracao estruturada e fallbacks de llm

chore/observability-latency-markers
parent ad6a966c00
commit b687f5e5c7

@ -11,6 +11,7 @@ class Settings(BaseSettings):
google_project_id: str
google_location: str = "us-central1"
vertex_model_name: str = "gemini-2.5-pro"
vertex_bundle_model_name: str = "gemini-2.5-pro"
# Tools database (MySQL)
db_host: str = "127.0.0.1"

@ -39,7 +39,11 @@ class LLMService:
configured = settings.vertex_model_name.strip()
fallback_models = ["gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.0-flash-001"]
self.model_names = [configured] + [m for m in fallback_models if m != configured]
self.model_names = self._build_model_sequence(configured, *fallback_models)
self.bundle_model_names = self._build_model_sequence(
settings.vertex_bundle_model_name.strip(),
*self.model_names,
)
def _log_llm_event(self, event: str, **payload) -> None:
logger.info("llm_service_event=%s payload=%s", event, payload)
@ -206,6 +210,14 @@ class LLMService:
LLMService._models[model_name] = model
return model
def _build_model_sequence(self, *model_names: str | None) -> list[str]:
sequence: list[str] = []
for item in model_names:
candidate = str(item or "").strip()
if candidate and candidate not in sequence:
sequence.append(candidate)
return sequence
def _extract_response_payload(self, response) -> Dict[str, Any]:
candidate = response.candidates[0] if getattr(response, "candidates", None) else None
content = getattr(candidate, "content", None)
@ -224,7 +236,22 @@ class LLMService:
if isinstance(text_value, str) and text_value.strip():
text_parts.append(text_value)
response_text = "\n".join(text_parts).strip() or None
response_text = "\n".join(text_parts).strip()
if not response_text:
fallback_text = None
for carrier in (response, candidate, content):
if carrier is None:
continue
try:
text_value = getattr(carrier, "text", None)
except (AttributeError, ValueError):
text_value = None
if isinstance(text_value, str) and text_value.strip():
fallback_text = text_value.strip()
break
response_text = fallback_text or None
else:
response_text = response_text or None
return {
"response": response_text,
"tool_call": tool_call,
@ -235,9 +262,12 @@ class LLMService:
message: str,
tools: List[ToolDefinition],
history: List[Dict[str, Any]] = None,
preferred_models: List[str] | None = None,
generation_config: Dict[str, Any] | None = None,
) -> Dict[str, Any]:
"""Gera resposta textual ou chamada de tool a partir da mensagem do usuario."""
vertex_tools = self.build_vertex_tools(tools)
candidate_models = self._build_model_sequence(*(preferred_models or []), *self.model_names)
response = None
last_error = None
@ -247,13 +277,18 @@ class LLMService:
# Tenta o modelo configurado e cai para nomes alternativos
# quando o principal nao estiver disponivel no projeto/regiao.
for model_name in self.model_names:
for model_name in candidate_models:
attempts += 1
try:
model = self._get_model(model_name)
chat = model.start_chat(history=history or [])
send_kwargs = {"tools": vertex_tools} if vertex_tools else {}
if generation_config:
send_kwargs["generation_config"] = generation_config
if history:
chat = model.start_chat(history=history)
response = await asyncio.to_thread(chat.send_message, message, **send_kwargs)
else:
response = await asyncio.to_thread(model.generate_content, message, **send_kwargs)
selected_model_name = model_name
break
except NotFound as err:
@ -297,3 +332,5 @@ class LLMService:
except Exception:
# Warmup e melhor esforco; falhas nao devem bloquear inicializacao.
return

@ -58,6 +58,7 @@ class MessagePlanner:
except Exception:
logger.exception("Falha ao extrair plano da mensagem com LLM. user_id=%s", user_id)
return default
async def extract_turn_bundle(self, message: str, user_id: int | None) -> dict:
user_context = f"user_id={user_id}" if user_id is not None else "user_id=anonimo"
default_turn_decision = self.normalizer.empty_turn_decision()
@ -100,32 +101,40 @@ class MessagePlanner:
)
prompt = (
"Analise a mensagem do usuario e retorne APENAS JSON valido com duas secoes: turn_decision e message_plan.\n"
"Nao use markdown. Nao escreva texto fora do JSON. Nao invente dados ausentes.\n\n"
"Formato obrigatorio:\n"
"Sem markdown, sem texto fora do JSON, sem inventar dados ausentes.\n\n"
"Contrato:\n"
f"{schema_example}\n\n"
"Regras gerais:\n"
"- turn_decision resume a intencao principal do turno completo.\n"
"- message_plan.orders separa pedidos operacionais em ordem de aparicao; se nao houver pedido operacional, use um unico item general com a mensagem inteira.\n"
"- Cada item de orders deve conter domain, message e entities. Mantenha message curta e fiel ao texto do usuario.\n"
"- domain deve ser review, sales ou general.\n"
"- Em pedidos de compra com faixa de preco ou orcamento (ex.: 70 mil, ate 50 mil, R$ 45000), preencha entities.generic_memory.orcamento_max nas secoes relevantes.\n"
"- Em pedidos com tipo de carro (ex.: suv, sedan, hatch, pickup), preencha entities.generic_memory.perfil_veiculo nas secoes relevantes.\n"
"- Se faltar dado para continuar um fluxo, use action=ask_missing_fields e preencha missing_fields e response_to_user.\n"
"- Se nao houver acao operacional, use action=answer_user.\n"
"- Se o usuario quiser efetivar a compra de um veiculo, use intent=order_create, domain=sales e prefira tool_name=realizar_pedido.\n"
"- Se o usuario quiser listar pedidos, use intent=order_list, domain=sales, action=call_tool e tool_name=listar_pedidos.\n"
"- Se o usuario quiser consultar estoque com filtros de compra, use intent=inventory_search e domain=sales.\n"
"- Se o usuario quiser listar revisoes, use intent=review_list, domain=review, action=call_tool e tool_name=listar_agendamentos_revisao.\n"
"- Se o usuario quiser cancelar revisao, use intent=review_cancel, domain=review e prefira tool_name=cancelar_agendamento_revisao.\n"
"- Se o usuario quiser remarcar revisao, use intent=review_reschedule, domain=review e prefira tool_name=editar_data_revisao.\n"
"- Se o usuario quiser avaliar um veiculo na troca e houver modelo, ano e km, use domain=sales, action=call_tool, tool_name=avaliar_veiculo_troca e informe esses campos em tool_arguments.\n\n"
"Regras:\n"
"- turn_decision resume a intencao principal do turno; domain deve ser review, sales ou general.\n"
"- message_plan.orders separa pedidos operacionais em ordem; se nao houver pedido operacional, use um unico item general com a mensagem inteira.\n"
"- Cada order deve ter domain, message e entities; mantenha message curta e fiel ao texto do usuario.\n"
"- Preencha apenas dados claros. Use entities.generic_memory.orcamento_max para teto/faixa de preco e perfil_veiculo para suv/sedan/hatch/pickup.\n"
"- Se faltar dado para continuar um fluxo, use action=ask_missing_fields e preencha missing_fields e response_to_user. Se nao houver acao operacional, use action=answer_user.\n"
"- Compra efetiva: intent=order_create, domain=sales, prefira tool_name=realizar_pedido.\n"
"- Listar pedidos: intent=order_list, domain=sales, action=call_tool, tool_name=listar_pedidos.\n"
"- Consultar/listar/buscar/ver estoque para compra: intent=inventory_search, domain=sales, action=call_tool, tool_name=consultar_estoque; tool_arguments so com filtros explicitamente pedidos, como preco_max, categoria e opcionalmente limite.\n"
"- Listar revisoes: intent=review_list, domain=review, action=call_tool, tool_name=listar_agendamentos_revisao.\n"
"- Cancelar revisao: intent=review_cancel, domain=review, prefira tool_name=cancelar_agendamento_revisao.\n"
"- Remarcar revisao: intent=review_reschedule, domain=review, prefira tool_name=editar_data_revisao.\n"
"- Avaliar troca com modelo, ano e km: domain=sales, action=call_tool, tool_name=avaliar_veiculo_troca e informe esses campos em tool_arguments.\n\n"
f"Contexto: {user_context}\n"
f"Mensagem do usuario: {message}"
)
preferred_models = getattr(self.llm, "bundle_model_names", None)
bundle_generation_config = {
"candidate_count": 1,
"temperature": 0,
"max_output_tokens": 768,
}
for attempt in range(2):
try:
result = await self.llm.generate_response(message=prompt, tools=[])
result = await self.llm.generate_response(
message=prompt,
tools=[],
preferred_models=preferred_models if attempt == 0 else None,
generation_config=bundle_generation_config,
)
text = (result.get("response") or "").strip()
payload = self.normalizer.parse_json_object(text)
if not isinstance(payload, dict):
@ -178,6 +187,7 @@ class MessagePlanner:
"has_turn_decision": False,
"has_message_plan": False,
}
async def extract_routing(self, message: str, user_id: int | None) -> dict:
plan = await self.extract_message_plan(message=message, user_id=user_id)
return {
@ -189,7 +199,6 @@ class MessagePlanner:
for item in plan.get("orders", [])
]
}
async def extract_entities(self, message: str, user_id: int | None) -> dict:
user_context = f"user_id={user_id}" if user_id is not None else "user_id=anonimo"
prompt = (
@ -312,6 +321,8 @@ class MessagePlanner:
"- Em pedidos com tipo de carro (ex.: suv, sedan, hatch, pickup), preencha entities.generic_memory.perfil_veiculo.\n"
"- Se o usuario quiser efetivar a compra de um veiculo, use intent='order_create', domain='sales' e prefira tool_name='realizar_pedido'.\n"
"- Se o usuario quiser listar os pedidos dele, use intent='order_list', domain='sales', action='call_tool' e tool_name='listar_pedidos'.\n"
"- Se o usuario quiser consultar, listar, buscar ou ver veiculos/estoque para compra, use intent='inventory_search', domain='sales', action='call_tool' e tool_name='consultar_estoque'.\n"
"- Em consultar_estoque, preencha tool_arguments apenas com filtros claramente expressos pelo usuario, como preco_max, categoria e opcionalmente limite.\n"
"- Se o usuario quiser listar agendamentos de revisao, use intent='review_list', domain='review', action='call_tool' e tool_name='listar_agendamentos_revisao'.\n"
"- Se o usuario quiser cancelar um agendamento de revisao, use intent='review_cancel', domain='review' e prefira tool_name='cancelar_agendamento_revisao'.\n"
"- Se o usuario quiser remarcar um agendamento de revisao, use intent='review_reschedule', domain='review' e prefira tool_name='editar_data_revisao'.\n"
@ -370,3 +381,5 @@ class MessagePlanner:
"cancel_order_fields": self.normalizer.normalize_cancel_order_fields(coerced.get("cancel_order_fields")),
"intents": self.normalizer.normalize_intents(coerced.get("intents")),
}

@ -92,7 +92,7 @@ def extract_budget_from_text(text: str) -> float | None:
normalized = normalize_text(candidate)
keyword_match = re.search(
r"(?:ate|até|de|por|orcamento|orçamento)\s+(\d[\d\.\,\s]{1,12})(?!\d)",
r"(?:ate|até|de|por|orcamento|orÃÆÃ†â€™Ãƒâ€ Ã¢â¬â„¢ÃƒÆÃ¢â¬Â ÃƒÂ¢Ã¢â€šÂ¬Ã¢â€žÂ¢ÃƒÆÃ†â€™ÃƒÂ¢Ã¢â€šÂ¬Ã ÃÆÃ¢Ã¢ââ¬Å¡Ã¬Ã¢ââ¬Å¾Ã¢ÃÆÃ†â€™Ãƒâ€ Ã¢â¬â„¢ÃƒÆÃ¢Ã¢ââ¬Å¡Ã¬Ã…Ã¡ÃÆÃ†â€™ÃƒÂ¢Ã¢â€šÂ¬Ã…¡ÃÆÃ¢â¬Å¡Ãƒâ€šÃ§amento)\s+(\d[\d\.\,\s]{1,12})(?!\d)",
normalized,
flags=re.IGNORECASE,
)
@ -128,8 +128,12 @@ def normalize_bool(value) -> bool | None:
def normalize_datetime_connector(text: str) -> str:
compact = " ".join(str(text or "").strip().split())
return re.sub(r"\s+[aàáâã]s\s+", " ", compact, flags=re.IGNORECASE).strip()
return re.sub(
r"((?:\d{1,2}[/-]\d{1,2}[/-]\d{4})|(?:\d{4}[/-]\d{1,2}[/-]\d{1,2}))\s+(?:[^\d\s]{1,6}\s+){1,2}(\d{1,2}:\d{2}(?::\d{2})?(?:\s*(?:Z|[+-]\d{2}:\d{2}))?)",
r"\1 \2",
compact,
flags=re.IGNORECASE,
).strip()
def try_parse_iso_datetime(text: str) -> datetime | None:
candidate = str(text or "").strip()

@ -62,6 +62,25 @@ class LLMServiceResponseParsingTests(unittest.TestCase):
self.assertEqual(payload, {"response": "Resposta simples", "tool_call": None})
def test_extract_response_payload_falls_back_to_response_text_accessor(self):
service = LLMService.__new__(LLMService)
response = SimpleNamespace(
text='{"ok": true}',
candidates=[
SimpleNamespace(
content=SimpleNamespace(
parts=[
SimpleNamespace(function_call=None),
]
)
)
]
)
payload = service._extract_response_payload(response)
self.assertEqual(payload, {"response": '{"ok": true}', "tool_call": None})
class LLMServiceImageWorkflowPromptTests(unittest.TestCase):
def test_build_image_workflow_prompt_preserves_visible_payment_time(self):
@ -102,3 +121,87 @@ class LLMServiceImageWorkflowPromptTests(unittest.TestCase):
response,
"Registrar pagamento de aluguel: contrato LOC-20260319-33CD6567; valor R$ 379,80.",
)
class LLMServiceDispatchTests(unittest.IsolatedAsyncioTestCase):
async def test_generate_response_uses_generate_content_when_history_is_empty(self):
service = LLMService.__new__(LLMService)
service.model_names = ["gemini-2.5-pro"]
service._log_llm_event = lambda *args, **kwargs: None
service.build_vertex_tools = lambda tools: None
class DummyChat:
def __init__(self):
self.calls = []
def send_message(self, message, **kwargs):
self.calls.append((message, kwargs))
return SimpleNamespace(candidates=[])
class DummyModel:
def __init__(self):
self.generate_calls = []
self.chat = DummyChat()
def generate_content(self, message, **kwargs):
self.generate_calls.append((message, kwargs))
return SimpleNamespace(candidates=[])
def start_chat(self, history):
raise AssertionError("nao deveria abrir chat quando nao ha historico")
model = DummyModel()
service._get_model = lambda model_name: model
service._extract_response_payload = lambda response: {"response": "ok", "tool_call": None}
generation_config = {"temperature": 0, "max_output_tokens": 128}
payload = await service.generate_response(
message="teste",
tools=[],
history=[],
generation_config=generation_config,
)
self.assertEqual(payload, {"response": "ok", "tool_call": None})
self.assertEqual(
model.generate_calls,
[("teste", {"generation_config": generation_config})],
)
async def test_generate_response_uses_chat_when_history_is_present(self):
service = LLMService.__new__(LLMService)
service.model_names = ["gemini-2.5-pro"]
service._log_llm_event = lambda *args, **kwargs: None
service.build_vertex_tools = lambda tools: None
class DummyChat:
def __init__(self):
self.calls = []
def send_message(self, message, **kwargs):
self.calls.append((message, kwargs))
return SimpleNamespace(candidates=[])
class DummyModel:
def __init__(self):
self.chat = DummyChat()
self.histories = []
def generate_content(self, message, **kwargs):
raise AssertionError("nao deveria usar generate_content quando ha historico")
def start_chat(self, history):
self.histories.append(history)
return self.chat
model = DummyModel()
service._get_model = lambda model_name: model
service._extract_response_payload = lambda response: {"response": "ok", "tool_call": None}
history = [{"role": "user", "parts": ["oi"]}]
payload = await service.generate_response(message="teste", tools=[], history=history)
self.assertEqual(payload, {"response": "ok", "tool_call": None})
self.assertEqual(model.histories, [history])
self.assertEqual(model.chat.calls, [("teste", {})])

Loading…
Cancel
Save