diff --git a/app/core/settings.py b/app/core/settings.py index c9e880a..6c0f224 100644 --- a/app/core/settings.py +++ b/app/core/settings.py @@ -11,6 +11,7 @@ class Settings(BaseSettings): google_project_id: str google_location: str = "us-central1" vertex_model_name: str = "gemini-2.5-pro" + vertex_bundle_model_name: str = "gemini-2.5-pro" # Tools database (MySQL) db_host: str = "127.0.0.1" diff --git a/app/services/ai/llm_service.py b/app/services/ai/llm_service.py index 37c5f69..a2c96e2 100644 --- a/app/services/ai/llm_service.py +++ b/app/services/ai/llm_service.py @@ -39,7 +39,11 @@ class LLMService: configured = settings.vertex_model_name.strip() fallback_models = ["gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.0-flash-001"] - self.model_names = [configured] + [m for m in fallback_models if m != configured] + self.model_names = self._build_model_sequence(configured, *fallback_models) + self.bundle_model_names = self._build_model_sequence( + settings.vertex_bundle_model_name.strip(), + *self.model_names, + ) def _log_llm_event(self, event: str, **payload) -> None: logger.info("llm_service_event=%s payload=%s", event, payload) @@ -206,6 +210,14 @@ class LLMService: LLMService._models[model_name] = model return model + def _build_model_sequence(self, *model_names: str | None) -> list[str]: + sequence: list[str] = [] + for item in model_names: + candidate = str(item or "").strip() + if candidate and candidate not in sequence: + sequence.append(candidate) + return sequence + def _extract_response_payload(self, response) -> Dict[str, Any]: candidate = response.candidates[0] if getattr(response, "candidates", None) else None content = getattr(candidate, "content", None) @@ -224,7 +236,22 @@ class LLMService: if isinstance(text_value, str) and text_value.strip(): text_parts.append(text_value) - response_text = "\n".join(text_parts).strip() or None + response_text = "\n".join(text_parts).strip() + if not response_text: + fallback_text = None + for carrier in (response, candidate, content): + if carrier is None: + continue + try: + text_value = getattr(carrier, "text", None) + except (AttributeError, ValueError): + text_value = None + if isinstance(text_value, str) and text_value.strip(): + fallback_text = text_value.strip() + break + response_text = fallback_text or None + else: + response_text = response_text or None return { "response": response_text, "tool_call": tool_call, @@ -235,9 +262,12 @@ class LLMService: message: str, tools: List[ToolDefinition], history: List[Dict[str, Any]] = None, + preferred_models: List[str] | None = None, + generation_config: Dict[str, Any] | None = None, ) -> Dict[str, Any]: """Gera resposta textual ou chamada de tool a partir da mensagem do usuario.""" vertex_tools = self.build_vertex_tools(tools) + candidate_models = self._build_model_sequence(*(preferred_models or []), *self.model_names) response = None last_error = None @@ -247,13 +277,18 @@ class LLMService: # Tenta o modelo configurado e cai para nomes alternativos # quando o principal nao estiver disponivel no projeto/regiao. - for model_name in self.model_names: + for model_name in candidate_models: attempts += 1 try: model = self._get_model(model_name) - chat = model.start_chat(history=history or []) send_kwargs = {"tools": vertex_tools} if vertex_tools else {} - response = await asyncio.to_thread(chat.send_message, message, **send_kwargs) + if generation_config: + send_kwargs["generation_config"] = generation_config + if history: + chat = model.start_chat(history=history) + response = await asyncio.to_thread(chat.send_message, message, **send_kwargs) + else: + response = await asyncio.to_thread(model.generate_content, message, **send_kwargs) selected_model_name = model_name break except NotFound as err: @@ -297,3 +332,5 @@ class LLMService: except Exception: # Warmup e melhor esforco; falhas nao devem bloquear inicializacao. return + + diff --git a/app/services/orchestration/message_planner.py b/app/services/orchestration/message_planner.py index d36b7e5..5afd494 100644 --- a/app/services/orchestration/message_planner.py +++ b/app/services/orchestration/message_planner.py @@ -58,6 +58,7 @@ class MessagePlanner: except Exception: logger.exception("Falha ao extrair plano da mensagem com LLM. user_id=%s", user_id) return default + async def extract_turn_bundle(self, message: str, user_id: int | None) -> dict: user_context = f"user_id={user_id}" if user_id is not None else "user_id=anonimo" default_turn_decision = self.normalizer.empty_turn_decision() @@ -100,32 +101,40 @@ class MessagePlanner: ) prompt = ( "Analise a mensagem do usuario e retorne APENAS JSON valido com duas secoes: turn_decision e message_plan.\n" - "Nao use markdown. Nao escreva texto fora do JSON. Nao invente dados ausentes.\n\n" - "Formato obrigatorio:\n" + "Sem markdown, sem texto fora do JSON, sem inventar dados ausentes.\n\n" + "Contrato:\n" f"{schema_example}\n\n" - "Regras gerais:\n" - "- turn_decision resume a intencao principal do turno completo.\n" - "- message_plan.orders separa pedidos operacionais em ordem de aparicao; se nao houver pedido operacional, use um unico item general com a mensagem inteira.\n" - "- Cada item de orders deve conter domain, message e entities. Mantenha message curta e fiel ao texto do usuario.\n" - "- domain deve ser review, sales ou general.\n" - "- Em pedidos de compra com faixa de preco ou orcamento (ex.: 70 mil, ate 50 mil, R$ 45000), preencha entities.generic_memory.orcamento_max nas secoes relevantes.\n" - "- Em pedidos com tipo de carro (ex.: suv, sedan, hatch, pickup), preencha entities.generic_memory.perfil_veiculo nas secoes relevantes.\n" - "- Se faltar dado para continuar um fluxo, use action=ask_missing_fields e preencha missing_fields e response_to_user.\n" - "- Se nao houver acao operacional, use action=answer_user.\n" - "- Se o usuario quiser efetivar a compra de um veiculo, use intent=order_create, domain=sales e prefira tool_name=realizar_pedido.\n" - "- Se o usuario quiser listar pedidos, use intent=order_list, domain=sales, action=call_tool e tool_name=listar_pedidos.\n" - "- Se o usuario quiser consultar estoque com filtros de compra, use intent=inventory_search e domain=sales.\n" - "- Se o usuario quiser listar revisoes, use intent=review_list, domain=review, action=call_tool e tool_name=listar_agendamentos_revisao.\n" - "- Se o usuario quiser cancelar revisao, use intent=review_cancel, domain=review e prefira tool_name=cancelar_agendamento_revisao.\n" - "- Se o usuario quiser remarcar revisao, use intent=review_reschedule, domain=review e prefira tool_name=editar_data_revisao.\n" - "- Se o usuario quiser avaliar um veiculo na troca e houver modelo, ano e km, use domain=sales, action=call_tool, tool_name=avaliar_veiculo_troca e informe esses campos em tool_arguments.\n\n" + "Regras:\n" + "- turn_decision resume a intencao principal do turno; domain deve ser review, sales ou general.\n" + "- message_plan.orders separa pedidos operacionais em ordem; se nao houver pedido operacional, use um unico item general com a mensagem inteira.\n" + "- Cada order deve ter domain, message e entities; mantenha message curta e fiel ao texto do usuario.\n" + "- Preencha apenas dados claros. Use entities.generic_memory.orcamento_max para teto/faixa de preco e perfil_veiculo para suv/sedan/hatch/pickup.\n" + "- Se faltar dado para continuar um fluxo, use action=ask_missing_fields e preencha missing_fields e response_to_user. Se nao houver acao operacional, use action=answer_user.\n" + "- Compra efetiva: intent=order_create, domain=sales, prefira tool_name=realizar_pedido.\n" + "- Listar pedidos: intent=order_list, domain=sales, action=call_tool, tool_name=listar_pedidos.\n" + "- Consultar/listar/buscar/ver estoque para compra: intent=inventory_search, domain=sales, action=call_tool, tool_name=consultar_estoque; tool_arguments so com filtros explicitamente pedidos, como preco_max, categoria e opcionalmente limite.\n" + "- Listar revisoes: intent=review_list, domain=review, action=call_tool, tool_name=listar_agendamentos_revisao.\n" + "- Cancelar revisao: intent=review_cancel, domain=review, prefira tool_name=cancelar_agendamento_revisao.\n" + "- Remarcar revisao: intent=review_reschedule, domain=review, prefira tool_name=editar_data_revisao.\n" + "- Avaliar troca com modelo, ano e km: domain=sales, action=call_tool, tool_name=avaliar_veiculo_troca e informe esses campos em tool_arguments.\n\n" f"Contexto: {user_context}\n" f"Mensagem do usuario: {message}" ) + preferred_models = getattr(self.llm, "bundle_model_names", None) + bundle_generation_config = { + "candidate_count": 1, + "temperature": 0, + "max_output_tokens": 768, + } for attempt in range(2): try: - result = await self.llm.generate_response(message=prompt, tools=[]) + result = await self.llm.generate_response( + message=prompt, + tools=[], + preferred_models=preferred_models if attempt == 0 else None, + generation_config=bundle_generation_config, + ) text = (result.get("response") or "").strip() payload = self.normalizer.parse_json_object(text) if not isinstance(payload, dict): @@ -178,6 +187,7 @@ class MessagePlanner: "has_turn_decision": False, "has_message_plan": False, } + async def extract_routing(self, message: str, user_id: int | None) -> dict: plan = await self.extract_message_plan(message=message, user_id=user_id) return { @@ -189,7 +199,6 @@ class MessagePlanner: for item in plan.get("orders", []) ] } - async def extract_entities(self, message: str, user_id: int | None) -> dict: user_context = f"user_id={user_id}" if user_id is not None else "user_id=anonimo" prompt = ( @@ -312,6 +321,8 @@ class MessagePlanner: "- Em pedidos com tipo de carro (ex.: suv, sedan, hatch, pickup), preencha entities.generic_memory.perfil_veiculo.\n" "- Se o usuario quiser efetivar a compra de um veiculo, use intent='order_create', domain='sales' e prefira tool_name='realizar_pedido'.\n" "- Se o usuario quiser listar os pedidos dele, use intent='order_list', domain='sales', action='call_tool' e tool_name='listar_pedidos'.\n" + "- Se o usuario quiser consultar, listar, buscar ou ver veiculos/estoque para compra, use intent='inventory_search', domain='sales', action='call_tool' e tool_name='consultar_estoque'.\n" + "- Em consultar_estoque, preencha tool_arguments apenas com filtros claramente expressos pelo usuario, como preco_max, categoria e opcionalmente limite.\n" "- Se o usuario quiser listar agendamentos de revisao, use intent='review_list', domain='review', action='call_tool' e tool_name='listar_agendamentos_revisao'.\n" "- Se o usuario quiser cancelar um agendamento de revisao, use intent='review_cancel', domain='review' e prefira tool_name='cancelar_agendamento_revisao'.\n" "- Se o usuario quiser remarcar um agendamento de revisao, use intent='review_reschedule', domain='review' e prefira tool_name='editar_data_revisao'.\n" @@ -370,3 +381,5 @@ class MessagePlanner: "cancel_order_fields": self.normalizer.normalize_cancel_order_fields(coerced.get("cancel_order_fields")), "intents": self.normalizer.normalize_intents(coerced.get("intents")), } + + diff --git a/app/services/orchestration/technical_normalizer.py b/app/services/orchestration/technical_normalizer.py index 62da10a..d558c15 100644 --- a/app/services/orchestration/technical_normalizer.py +++ b/app/services/orchestration/technical_normalizer.py @@ -92,7 +92,7 @@ def extract_budget_from_text(text: str) -> float | None: normalized = normalize_text(candidate) keyword_match = re.search( - r"(?:ate|até|de|por|orcamento|orçamento)\s+(\d[\d\.\,\s]{1,12})(?!\d)", + r"(?:ate|até|de|por|orcamento|orçamento)\s+(\d[\d\.\,\s]{1,12})(?!\d)", normalized, flags=re.IGNORECASE, ) @@ -128,8 +128,12 @@ def normalize_bool(value) -> bool | None: def normalize_datetime_connector(text: str) -> str: compact = " ".join(str(text or "").strip().split()) - return re.sub(r"\s+[aàáâã]s\s+", " ", compact, flags=re.IGNORECASE).strip() - + return re.sub( + r"((?:\d{1,2}[/-]\d{1,2}[/-]\d{4})|(?:\d{4}[/-]\d{1,2}[/-]\d{1,2}))\s+(?:[^\d\s]{1,6}\s+){1,2}(\d{1,2}:\d{2}(?::\d{2})?(?:\s*(?:Z|[+-]\d{2}:\d{2}))?)", + r"\1 \2", + compact, + flags=re.IGNORECASE, + ).strip() def try_parse_iso_datetime(text: str) -> datetime | None: candidate = str(text or "").strip() diff --git a/tests/test_llm_service.py b/tests/test_llm_service.py index c997b8e..178679b 100644 --- a/tests/test_llm_service.py +++ b/tests/test_llm_service.py @@ -62,6 +62,25 @@ class LLMServiceResponseParsingTests(unittest.TestCase): self.assertEqual(payload, {"response": "Resposta simples", "tool_call": None}) + def test_extract_response_payload_falls_back_to_response_text_accessor(self): + service = LLMService.__new__(LLMService) + response = SimpleNamespace( + text='{"ok": true}', + candidates=[ + SimpleNamespace( + content=SimpleNamespace( + parts=[ + SimpleNamespace(function_call=None), + ] + ) + ) + ] + ) + + payload = service._extract_response_payload(response) + + self.assertEqual(payload, {"response": '{"ok": true}', "tool_call": None}) + class LLMServiceImageWorkflowPromptTests(unittest.TestCase): def test_build_image_workflow_prompt_preserves_visible_payment_time(self): @@ -102,3 +121,87 @@ class LLMServiceImageWorkflowPromptTests(unittest.TestCase): response, "Registrar pagamento de aluguel: contrato LOC-20260319-33CD6567; valor R$ 379,80.", ) + +class LLMServiceDispatchTests(unittest.IsolatedAsyncioTestCase): + async def test_generate_response_uses_generate_content_when_history_is_empty(self): + service = LLMService.__new__(LLMService) + service.model_names = ["gemini-2.5-pro"] + service._log_llm_event = lambda *args, **kwargs: None + service.build_vertex_tools = lambda tools: None + + class DummyChat: + def __init__(self): + self.calls = [] + + def send_message(self, message, **kwargs): + self.calls.append((message, kwargs)) + return SimpleNamespace(candidates=[]) + + class DummyModel: + def __init__(self): + self.generate_calls = [] + self.chat = DummyChat() + + def generate_content(self, message, **kwargs): + self.generate_calls.append((message, kwargs)) + return SimpleNamespace(candidates=[]) + + def start_chat(self, history): + raise AssertionError("nao deveria abrir chat quando nao ha historico") + + model = DummyModel() + service._get_model = lambda model_name: model + service._extract_response_payload = lambda response: {"response": "ok", "tool_call": None} + + generation_config = {"temperature": 0, "max_output_tokens": 128} + + payload = await service.generate_response( + message="teste", + tools=[], + history=[], + generation_config=generation_config, + ) + + self.assertEqual(payload, {"response": "ok", "tool_call": None}) + self.assertEqual( + model.generate_calls, + [("teste", {"generation_config": generation_config})], + ) + + async def test_generate_response_uses_chat_when_history_is_present(self): + service = LLMService.__new__(LLMService) + service.model_names = ["gemini-2.5-pro"] + service._log_llm_event = lambda *args, **kwargs: None + service.build_vertex_tools = lambda tools: None + + class DummyChat: + def __init__(self): + self.calls = [] + + def send_message(self, message, **kwargs): + self.calls.append((message, kwargs)) + return SimpleNamespace(candidates=[]) + + class DummyModel: + def __init__(self): + self.chat = DummyChat() + self.histories = [] + + def generate_content(self, message, **kwargs): + raise AssertionError("nao deveria usar generate_content quando ha historico") + + def start_chat(self, history): + self.histories.append(history) + return self.chat + + model = DummyModel() + service._get_model = lambda model_name: model + service._extract_response_payload = lambda response: {"response": "ok", "tool_call": None} + history = [{"role": "user", "parts": ["oi"]}] + + payload = await service.generate_response(message="teste", tools=[], history=history) + + self.assertEqual(payload, {"response": "ok", "tool_call": None}) + self.assertEqual(model.histories, [history]) + self.assertEqual(model.chat.calls, [("teste", {})]) +