perf(orchestration): reduzir chamadas redundantes de llm

chore/observability-latency-markers
parent 146475a278
commit ad6a966c00

@ -62,15 +62,36 @@ class MessagePlanner:
user_context = f"user_id={user_id}" if user_id is not None else "user_id=anonimo"
default_turn_decision = self.normalizer.empty_turn_decision()
default_message_plan = self.normalizer.empty_message_plan(message=message)
compact_turn_entities = {
"generic_memory": {},
"review_fields": {},
"review_management_fields": {},
"order_fields": {},
"cancel_order_fields": {},
}
compact_order_entities = {
**compact_turn_entities,
"intents": {},
}
schema_example = json.dumps(
{
"turn_decision": TurnDecision().model_dump(),
"turn_decision": {
"intent": "general",
"domain": "general",
"action": "answer_user",
"entities": compact_turn_entities,
"missing_fields": [],
"selection_index": None,
"tool_name": None,
"tool_arguments": {},
"response_to_user": None,
},
"message_plan": {
"orders": [
{
"domain": "general",
"message": "trecho literal do pedido",
"entities": self.normalizer.empty_extraction_payload(),
"entities": compact_order_entities,
}
]
},
@ -82,28 +103,22 @@ class MessagePlanner:
"Nao use markdown. Nao escreva texto fora do JSON. Nao invente dados ausentes.\n\n"
"Formato obrigatorio:\n"
f"{schema_example}\n\n"
"Regras para turn_decision:\n"
"- 'turn_decision' deve seguir o contrato de decisao por turno.\n"
"- 'domain' deve ser review, sales ou general.\n"
"- 'intent' deve refletir a intencao principal do turno completo.\n"
"- 'action' deve ser uma das acoes do contrato.\n"
"- Se faltar dado para continuar um fluxo, use action='ask_missing_fields' e preencha 'missing_fields' e 'response_to_user'.\n"
"- Se nao houver acao operacional, use action='answer_user'.\n"
"- Em pedidos de compra com faixa de preco ou orcamento (ex.: '70 mil', 'ate 50 mil', 'R$ 45000'), preencha entities.generic_memory.orcamento_max.\n"
"- Em pedidos com tipo de carro (ex.: suv, sedan, hatch, pickup), preencha entities.generic_memory.perfil_veiculo.\n"
"- Se o usuario quiser efetivar a compra de um veiculo, use intent='order_create', domain='sales' e prefira tool_name='realizar_pedido'.\n"
"- Se o usuario quiser listar pedidos, use intent='order_list', domain='sales', action='call_tool' e tool_name='listar_pedidos'.\n"
"- Se o usuario quiser listar revisoes, use intent='review_list', domain='review', action='call_tool' e tool_name='listar_agendamentos_revisao'.\n"
"- Se o usuario quiser cancelar revisao, use intent='review_cancel', domain='review' e prefira tool_name='cancelar_agendamento_revisao'.\n"
"- Se o usuario quiser remarcar revisao, use intent='review_reschedule', domain='review' e prefira tool_name='editar_data_revisao'.\n\n"
"Regras para message_plan:\n"
"- 'message_plan.orders' deve listar os pedidos operacionais em ordem de aparicao.\n"
"- Se houver mais de um pedido operacional, separe em itens distintos.\n"
"- Se nao houver pedido operacional, use domain='general' com a mensagem inteira.\n"
"- Cada item deve conter 'domain', 'message' e 'entities'.\n"
"- Mantenha cada 'message' curta e fiel ao texto do usuario.\n"
"- Em pedidos de compra com faixa de preco ou orcamento (ex.: '70 mil', 'ate 50 mil', 'R$ 45000'), preencha entities.generic_memory.orcamento_max.\n"
"- Em pedidos com tipo de carro (ex.: suv, sedan, hatch, pickup), preencha entities.generic_memory.perfil_veiculo.\n\n"
"Regras gerais:\n"
"- turn_decision resume a intencao principal do turno completo.\n"
"- message_plan.orders separa pedidos operacionais em ordem de aparicao; se nao houver pedido operacional, use um unico item general com a mensagem inteira.\n"
"- Cada item de orders deve conter domain, message e entities. Mantenha message curta e fiel ao texto do usuario.\n"
"- domain deve ser review, sales ou general.\n"
"- Em pedidos de compra com faixa de preco ou orcamento (ex.: 70 mil, ate 50 mil, R$ 45000), preencha entities.generic_memory.orcamento_max nas secoes relevantes.\n"
"- Em pedidos com tipo de carro (ex.: suv, sedan, hatch, pickup), preencha entities.generic_memory.perfil_veiculo nas secoes relevantes.\n"
"- Se faltar dado para continuar um fluxo, use action=ask_missing_fields e preencha missing_fields e response_to_user.\n"
"- Se nao houver acao operacional, use action=answer_user.\n"
"- Se o usuario quiser efetivar a compra de um veiculo, use intent=order_create, domain=sales e prefira tool_name=realizar_pedido.\n"
"- Se o usuario quiser listar pedidos, use intent=order_list, domain=sales, action=call_tool e tool_name=listar_pedidos.\n"
"- Se o usuario quiser consultar estoque com filtros de compra, use intent=inventory_search e domain=sales.\n"
"- Se o usuario quiser listar revisoes, use intent=review_list, domain=review, action=call_tool e tool_name=listar_agendamentos_revisao.\n"
"- Se o usuario quiser cancelar revisao, use intent=review_cancel, domain=review e prefira tool_name=cancelar_agendamento_revisao.\n"
"- Se o usuario quiser remarcar revisao, use intent=review_reschedule, domain=review e prefira tool_name=editar_data_revisao.\n"
"- Se o usuario quiser avaliar um veiculo na troca e houver modelo, ano e km, use domain=sales, action=call_tool, tool_name=avaliar_veiculo_troca e informe esses campos em tool_arguments.\n\n"
f"Contexto: {user_context}\n"
f"Mensagem do usuario: {message}"
)
@ -144,6 +159,8 @@ class MessagePlanner:
}
if has_turn_decision and has_message_plan:
return bundle
if has_turn_decision or has_message_plan:
return bundle
if attempt == 0:
logger.warning(
"Bundle estruturado incompleto; repetindo uma vez. user_id=%s has_turn_decision=%s has_message_plan=%s",
@ -161,7 +178,6 @@ class MessagePlanner:
"has_turn_decision": False,
"has_message_plan": False,
}
async def extract_routing(self, message: str, user_id: int | None) -> dict:
plan = await self.extract_message_plan(message=message, user_id=user_id)
return {

@ -47,6 +47,7 @@ from app.services.flows.review_flow import ReviewFlowMixin
from app.services.orchestration.tool_executor import ToolExecutor
from app.services.tools.tool_registry import ToolRegistry
from app.services.orchestration.response_formatter import format_currency_br, format_datetime_for_chat
from app.services.orchestration.technical_normalizer import extract_budget_from_text, normalize_vehicle_profile
logger = logging.getLogger(__name__)
@ -233,16 +234,20 @@ class OrquestradorService(ReviewFlowMixin, OrderFlowMixin, RentalFlowMixin):
message=message,
user_id=user_id,
)
can_use_turn_bundle = (
bundle_has_useful_turn_decision = (
isinstance(turn_bundle, dict)
and bool(turn_bundle.get("has_turn_decision"))
and bool(turn_bundle.get("has_message_plan"))
and isinstance(turn_bundle.get("turn_decision"), dict)
and self._has_useful_turn_decision(turn_bundle.get("turn_decision"))
)
bundle_has_message_plan = (
isinstance(turn_bundle, dict)
and bool(turn_bundle.get("has_message_plan"))
and isinstance(turn_bundle.get("message_plan"), dict)
)
early_turn_decision = (
turn_bundle.get("turn_decision")
if can_use_turn_bundle
if bundle_has_useful_turn_decision
else await self._extract_turn_decision_with_llm(
message=message,
user_id=user_id,
@ -283,7 +288,7 @@ class OrquestradorService(ReviewFlowMixin, OrderFlowMixin, RentalFlowMixin):
message_plan = (
turn_bundle.get("message_plan")
if can_use_turn_bundle
if bundle_has_message_plan
else await self._extract_message_plan_with_llm(
message=message,
user_id=user_id,
@ -424,6 +429,7 @@ class OrquestradorService(ReviewFlowMixin, OrderFlowMixin, RentalFlowMixin):
self._update_active_domain(user_id=user_id, domain_hint=domain_hint)
reusable_router_result = None
orchestration_override = await self._try_execute_orchestration_control_tool(
message=routing_message,
user_id=user_id,
@ -432,7 +438,12 @@ class OrquestradorService(ReviewFlowMixin, OrderFlowMixin, RentalFlowMixin):
queue_notice=queue_notice,
finish=finish,
)
if orchestration_override:
if isinstance(orchestration_override, dict):
reusable_router_result = orchestration_override.get("llm_result")
handled_response = orchestration_override.get("handled_response")
if handled_response:
return handled_response
elif orchestration_override:
return orchestration_override
trade_in_response = await self._try_handle_trade_in_evaluation(
@ -546,11 +557,13 @@ class OrquestradorService(ReviewFlowMixin, OrderFlowMixin, RentalFlowMixin):
tools = self.registry.get_tools()
llm_result = await self._call_llm_with_trace(
operation="router",
message=self._build_router_prompt(user_message=routing_message, user_id=user_id),
tools=tools,
)
llm_result = reusable_router_result
if not isinstance(llm_result, dict):
llm_result = await self._call_llm_with_trace(
operation="router",
message=self._build_router_prompt(user_message=routing_message, user_id=user_id),
tools=tools,
)
first_pass_text = (llm_result.get("response") or "").strip()
should_force_tool = (
@ -661,7 +674,7 @@ class OrquestradorService(ReviewFlowMixin, OrderFlowMixin, RentalFlowMixin):
extracted_entities: dict,
queue_notice: str | None,
finish,
) -> str | None:
) -> str | dict | None:
decision = turn_decision or {}
decision_action = str(decision.get("action") or "").strip()
decision_tool_name = str(decision.get("tool_name") or "").strip()
@ -693,6 +706,9 @@ class OrquestradorService(ReviewFlowMixin, OrderFlowMixin, RentalFlowMixin):
queue_notice=queue_notice,
)
if self._should_skip_orchestration_control_router(turn_decision=decision):
return None
tools = self.registry.get_tools()
llm_result = await self._call_llm_with_trace(
operation="orchestration_router",
@ -733,8 +749,12 @@ class OrquestradorService(ReviewFlowMixin, OrderFlowMixin, RentalFlowMixin):
)
and self._is_low_value_response(first_pass_text)
)
reusable_first_pass = self._build_reusable_router_result_payload(
llm_result=llm_result,
source="orchestration_router",
)
if not should_force_tool:
return None
return reusable_first_pass
llm_result = await self._call_llm_with_trace(
operation="orchestration_force_tool",
@ -744,7 +764,10 @@ class OrquestradorService(ReviewFlowMixin, OrderFlowMixin, RentalFlowMixin):
forced_tool_call = llm_result.get("tool_call") or {}
forced_tool_name = forced_tool_call.get("name")
if forced_tool_name not in ORCHESTRATION_CONTROL_TOOLS:
return None
return self._build_reusable_router_result_payload(
llm_result=llm_result,
source="orchestration_force_tool",
)
if (
forced_tool_name == "cancelar_fluxo_atual"
and self.policy.should_defer_flow_cancellation_control(message=message, user_id=user_id)
@ -2113,6 +2136,45 @@ class OrquestradorService(ReviewFlowMixin, OrderFlowMixin, RentalFlowMixin):
)
return result
def _extract_sales_search_context_deterministically(self, message: str) -> dict:
started_at = perf_counter()
candidate = str(message or "").strip()
if not candidate:
return {}
extracted: dict[str, object] = {}
budget = extract_budget_from_text(candidate)
if budget:
extracted["orcamento_max"] = int(round(budget))
normalized_message = self._normalize_text(candidate)
raw_profiles: list[str] = []
for pattern, canonical in (
(r"\bsuv\b", "suv"),
(r"\bsedan\b", "sedan"),
(r"\bhatch\b", "hatch"),
(r"\bpickup\b", "pickup"),
(r"\bpicape\b", "pickup"),
):
if canonical in raw_profiles:
continue
if re.search(pattern, normalized_message):
raw_profiles.append(canonical)
profile = normalize_vehicle_profile(raw_profiles)
if profile:
extracted["perfil_veiculo"] = profile
if extracted:
self._emit_turn_stage_metric(
"extract_sales_search_context_short_circuit",
started_at,
source="technical",
has_budget=bool(extracted.get("orcamento_max")),
profile_count=len(extracted.get("perfil_veiculo") or []),
)
return extracted
async def _extract_missing_sales_search_context_with_llm(
self,
message: str,
@ -2123,7 +2185,9 @@ class OrquestradorService(ReviewFlowMixin, OrderFlowMixin, RentalFlowMixin):
decision = turn_decision or {}
decision_intent = str(decision.get("intent") or "").strip().lower()
decision_domain = str(decision.get("domain") or "").strip().lower()
if decision_domain != "sales" and decision_intent not in {"order_create", "order_list", "inventory_search"}:
if decision_intent not in {"order_create", "inventory_search"} and decision_domain != "sales":
return {}
if decision_intent not in {"order_create", "inventory_search"}:
return {}
generic_memory = (extracted_entities or {}).get("generic_memory")
@ -2131,7 +2195,8 @@ class OrquestradorService(ReviewFlowMixin, OrderFlowMixin, RentalFlowMixin):
generic_memory = {}
if generic_memory.get("orcamento_max") or generic_memory.get("perfil_veiculo"):
return {}
return await self._extract_sales_search_context_with_llm(message=message, user_id=user_id)
return self._extract_sales_search_context_deterministically(message)
async def _extract_turn_decision_with_llm(self, message: str, user_id: int | None) -> dict:
started_at = perf_counter()
@ -2200,6 +2265,14 @@ class OrquestradorService(ReviewFlowMixin, OrderFlowMixin, RentalFlowMixin):
def _has_useful_turn_decision(self, turn_decision: dict | None) -> bool:
if not isinstance(turn_decision, dict):
return False
if str(turn_decision.get("response_to_user") or "").strip():
return True
if turn_decision.get("selection_index") is not None:
return True
if str(turn_decision.get("tool_name") or "").strip():
return True
if turn_decision.get("missing_fields"):
return True
if (turn_decision.get("intent") or "general") != "general":
return True
if (turn_decision.get("action") or "answer_user") != "answer_user":
@ -2207,11 +2280,57 @@ class OrquestradorService(ReviewFlowMixin, OrderFlowMixin, RentalFlowMixin):
entities = turn_decision.get("entities")
return self._has_useful_extraction(self._extracted_entities_from_turn_decision(turn_decision)) if isinstance(entities, dict) else False
def _build_reusable_router_result_payload(self, llm_result: dict | None, source: str) -> dict | None:
if not isinstance(llm_result, dict):
return None
tool_call = llm_result.get("tool_call") or {}
tool_name = str(tool_call.get("name") or "").strip()
if tool_name and tool_name not in ORCHESTRATION_CONTROL_TOOLS:
return {"llm_result": llm_result, "source": source}
response_text = str(llm_result.get("response") or "").strip()
if response_text and not self._is_low_value_response(response_text):
return {"llm_result": llm_result, "source": source}
return None
def _should_skip_orchestration_control_router(self, turn_decision: dict | None) -> bool:
decision = turn_decision or {}
decision_action = str(decision.get("action") or "").strip().lower()
decision_intent = str(decision.get("intent") or "").strip().lower()
decision_domain = str(decision.get("domain") or "").strip().lower()
normalized_tool_name = self.normalizer.normalize_tool_name(decision.get("tool_name"))
if normalized_tool_name in ORCHESTRATION_CONTROL_TOOLS:
return False
if decision_action in {"clear_context", "continue_queue", "discard_queue", "cancel_active_flow"}:
return False
if decision_action == "call_tool" and normalized_tool_name:
return True
if decision_intent in {
"order_create",
"inventory_search",
"order_list",
"order_cancel",
"review_schedule",
"review_list",
"review_cancel",
"review_reschedule",
}:
return True
return decision_domain in {"sales", "review"} and decision_action in {
"ask_missing_fields",
"collect_review_schedule",
"collect_review_management",
"collect_order_create",
"collect_order_cancel",
}
def _extracted_entities_from_turn_decision(self, turn_decision: dict | None) -> dict:
entities = (turn_decision or {}).get("entities")
if not isinstance(entities, dict):
entities = {}
return {
extracted = {
"generic_memory": entities.get("generic_memory", {}),
"review_fields": entities.get("review_fields", {}),
"review_management_fields": entities.get("review_management_fields", {}),
@ -2220,6 +2339,25 @@ class OrquestradorService(ReviewFlowMixin, OrderFlowMixin, RentalFlowMixin):
"intents": {},
}
normalized_tool_name = self.normalizer.normalize_tool_name((turn_decision or {}).get("tool_name"))
raw_tool_arguments = (turn_decision or {}).get("tool_arguments")
if normalized_tool_name == "avaliar_veiculo_troca" and isinstance(raw_tool_arguments, dict):
normalized_arguments = self.normalizer.normalize_tool_arguments(
"avaliar_veiculo_troca",
raw_tool_arguments,
)
if normalized_arguments:
review_fields = extracted.get("review_fields")
if not isinstance(review_fields, dict):
review_fields = {}
for field in ("modelo", "ano", "km"):
value = normalized_arguments.get(field)
if value not in (None, "", [], {}):
review_fields[field] = value
extracted["review_fields"] = review_fields
return extracted
def _merge_extracted_entities(self, base: dict | None, override: dict | None) -> dict:
merged = self._empty_extraction_payload()
for section in ("generic_memory", "review_fields", "review_management_fields", "order_fields", "cancel_order_fields"):

@ -252,6 +252,45 @@ class TurnDecisionContractTests(unittest.IsolatedAsyncioTestCase):
self.assertEqual(bundle["turn_decision"]["entities"]["generic_memory"]["orcamento_max"], 70000)
self.assertEqual(bundle["message_plan"]["orders"][0]["domain"], "sales")
self.assertEqual(bundle["message_plan"]["orders"][0]["message"], "Quero comprar um carro até 70 mil")
async def test_extract_turn_bundle_returns_partial_payload_without_retry_when_first_response_is_useful(self):
llm = FakeLLM(
[
{
"response": """
{
"turn_decision": {
"intent": "order_create",
"domain": "sales",
"action": "ask_missing_fields",
"entities": {
"generic_memory": {"orcamento_max": 70000},
"review_fields": {},
"review_management_fields": {},
"order_fields": {},
"cancel_order_fields": {}
},
"missing_fields": ["modelo_veiculo"],
"tool_name": null,
"tool_arguments": {},
"response_to_user": "Qual veiculo voce quer comprar?"
},
"message_plan": {}
}
""",
"tool_call": None,
},
{"response": "nao deveria chamar de novo", "tool_call": None},
]
)
planner = MessagePlanner(llm=llm, normalizer=EntityNormalizer())
bundle = await planner.extract_turn_bundle("Quero comprar um carro ate 70 mil", user_id=7)
self.assertEqual(llm.calls, 1)
self.assertTrue(bundle["has_turn_decision"])
self.assertFalse(bundle["has_message_plan"])
self.assertEqual(bundle["turn_decision"]["intent"], "order_create")
self.assertEqual(bundle["turn_decision"]["entities"]["generic_memory"]["orcamento_max"], 70000)
def test_parse_json_object_accepts_python_style_dict_with_trailing_commas(self):
normalizer = EntityNormalizer()
@ -956,17 +995,18 @@ class TurnDecisionContractTests(unittest.IsolatedAsyncioTestCase):
self.assertEqual(merged["generic_memory"]["orcamento_max"], 70000)
self.assertEqual(merged["order_fields"]["cpf"], "12345678909")
async def test_missing_sales_search_context_triggers_focused_llm_enrichment(self):
async def test_missing_sales_search_context_uses_deterministic_enrichment_before_llm(self):
service = OrquestradorService.__new__(OrquestradorService)
service.normalizer = EntityNormalizer()
service._log_turn_event = lambda *args, **kwargs: None
async def fake_extract_sales_search_context_with_llm(message: str, user_id: int | None):
return {"orcamento_max": 70000}
async def should_not_run(message: str, user_id: int | None):
raise AssertionError("nao deveria consultar LLM extra para filtros de compra explicitos")
service._extract_sales_search_context_with_llm = fake_extract_sales_search_context_with_llm
service._extract_sales_search_context_with_llm = should_not_run
result = await service._extract_missing_sales_search_context_with_llm(
message="Quero comprar um carro de 70 mil, meu CPF e 12345678909",
message="Quero comprar um carro hatch de 70 mil, meu CPF e 12345678909",
user_id=7,
turn_decision={"domain": "sales", "intent": "order_create", "action": "collect_order_create"},
extracted_entities={
@ -980,6 +1020,72 @@ class TurnDecisionContractTests(unittest.IsolatedAsyncioTestCase):
)
self.assertEqual(result["orcamento_max"], 70000)
self.assertEqual(result["perfil_veiculo"], ["hatch"])
async def test_orchestration_control_router_is_skipped_when_turn_decision_is_already_operational(self):
service = OrquestradorService.__new__(OrquestradorService)
service.normalizer = EntityNormalizer()
async def should_not_run(**kwargs):
raise AssertionError("nao deveria consultar orchestration_router quando a decisao ja for operacional")
service._call_llm_with_trace = should_not_run
async def finish(response: str, queue_notice: str | None = None) -> str:
return response
result = await service._try_execute_orchestration_control_tool(
message="Quero comprar um carro ate 70 mil",
user_id=7,
turn_decision={"intent": "order_create", "domain": "sales", "action": "ask_missing_fields"},
extracted_entities={},
queue_notice=None,
finish=finish,
)
self.assertIsNone(result)
async def test_try_execute_orchestration_control_tool_returns_business_tool_call_for_reuse(self):
service = OrquestradorService.__new__(OrquestradorService)
service.normalizer = EntityNormalizer()
service.policy = SimpleNamespace(should_defer_flow_cancellation_control=lambda **kwargs: False)
class DummyRegistry:
def get_tools(self):
return []
service.registry = DummyRegistry()
service._build_router_prompt = lambda user_message, user_id=None: user_message
service._is_low_value_response = lambda text: False
service._has_open_flow = lambda user_id, domain: False
service._get_user_context = lambda user_id: {}
async def fake_call_llm_with_trace(**kwargs):
return {
"response": "",
"tool_call": {
"name": "consultar_estoque",
"arguments": {"preco_max": 80000.0, "limite": 5},
},
}
service._call_llm_with_trace = fake_call_llm_with_trace
async def finish(response: str, queue_notice: str | None = None) -> str:
return response
result = await service._try_execute_orchestration_control_tool(
message="Quero ver carros ate 80000 reais",
user_id=7,
turn_decision={},
extracted_entities={},
queue_notice=None,
finish=finish,
)
self.assertIsInstance(result, dict)
self.assertEqual(result["source"], "orchestration_router")
self.assertEqual(result["llm_result"]["tool_call"]["name"], "consultar_estoque")
async def test_turn_decision_call_tool_executes_without_router(self):
service = OrquestradorService.__new__(OrquestradorService)
@ -5153,6 +5259,56 @@ class OrquestradorLatencyOptimizationTests(unittest.IsolatedAsyncioTestCase):
self.assertEqual(len(planner_calls), 1)
self.assertEqual(response, "Fluxo de venda continuado.")
async def test_handle_message_skips_entity_extraction_when_trade_in_tool_arguments_are_already_present(self):
service = self._build_service()
planner_calls = []
async def fake_extract_turn_decision(message: str, user_id: int | None):
return {
"intent": "general",
"domain": "sales",
"action": "call_tool",
"entities": service.normalizer.empty_extraction_payload(),
"missing_fields": [],
"selection_index": None,
"tool_name": "avaliar_veiculo_troca",
"tool_arguments": {"modelo": "Onix", "ano": 2020, "km": 45000},
"response_to_user": None,
}
async def fake_extract_message_plan(message: str, user_id: int | None):
planner_calls.append((message, user_id))
return {
"orders": [
{
"domain": "sales",
"message": message,
"entities": service.normalizer.empty_extraction_payload(),
}
]
}
async def should_not_run_entities(message: str, user_id: int | None):
raise AssertionError("extracao dedicada nao deveria rodar quando a decisao de troca ja trouxe tool_arguments completos")
async def fake_try_handle_trade_in_evaluation(**kwargs):
extracted_entities = kwargs.get("extracted_entities") or {}
review_fields = extracted_entities.get("review_fields") or {}
self.assertEqual(review_fields.get("modelo"), "Onix")
self.assertEqual(review_fields.get("ano"), 2020)
self.assertEqual(review_fields.get("km"), 45000)
return "Estimativa de troca concluida."
service._extract_turn_decision_with_llm = fake_extract_turn_decision
service._extract_message_plan_with_llm = fake_extract_message_plan
service._extract_entities_with_llm = should_not_run_entities
service._try_handle_trade_in_evaluation = fake_try_handle_trade_in_evaluation
response = await service.handle_message("Quero avaliar meu carro para troca: Onix 2020, 45000 km", user_id=1)
self.assertEqual(len(planner_calls), 1)
self.assertEqual(response, "Estimativa de troca concluida.")
async def test_handle_message_runs_entity_extraction_when_turn_decision_entities_are_empty(self):
service = self._build_service()
planner_calls = []
@ -5208,6 +5364,88 @@ class OrquestradorLatencyOptimizationTests(unittest.IsolatedAsyncioTestCase):
self.assertEqual(len(entity_calls), 1)
self.assertEqual(response, "Fluxo de venda continuado.")
async def test_handle_message_reuses_orchestration_router_tool_call_without_second_router(self):
service = self._build_service()
async def fake_extract_turn_bundle(message: str, user_id: int | None):
return {
"turn_decision": {
"intent": "general",
"domain": "general",
"action": "answer_user",
"entities": {
"generic_memory": {"orcamento_max": 80000},
"review_fields": {},
"review_management_fields": {},
"order_fields": {},
"cancel_order_fields": {},
},
"missing_fields": [],
"selection_index": None,
"tool_name": None,
"tool_arguments": {},
"response_to_user": None,
},
"message_plan": {
"orders": [
{
"domain": "general",
"message": message,
"entities": {
"generic_memory": {"orcamento_max": 80000},
"review_fields": {},
"review_management_fields": {},
"order_fields": {},
"cancel_order_fields": {},
"intents": {},
},
}
]
},
"has_turn_decision": True,
"has_message_plan": True,
}
async def should_not_run_router(**kwargs):
raise AssertionError("nao deveria consultar o router quando o orchestration_router ja trouxe tool_call reutilizavel")
async def fake_try_execute_orchestration_control_tool(**kwargs):
return {
"source": "orchestration_router",
"llm_result": {
"response": "",
"tool_call": {
"name": "consultar_estoque",
"arguments": {"preco_max": 80000.0, "limite": 5},
},
},
}
async def fake_execute_tool_with_trace(tool_name, arguments, user_id=None):
return [
{"id": 1, "modelo": "Toyota Corolla 2020", "categoria": "hatch", "preco": 39809.0},
]
async def fake_maybe_build_stock_suggestion_response(**kwargs):
return "Estoque reutilizado do primeiro router."
service._extract_turn_bundle_with_llm = fake_extract_turn_bundle
service._try_execute_orchestration_control_tool = fake_try_execute_orchestration_control_tool
service._call_llm_with_trace = should_not_run_router
service._execute_tool_with_trace = fake_execute_tool_with_trace
service._maybe_build_stock_suggestion_response = fake_maybe_build_stock_suggestion_response
service._capture_successful_tool_side_effects = lambda **kwargs: None
class DummyRegistry:
def get_tools(self):
return []
service.registry = DummyRegistry()
response = await service.handle_message("Quero ver carros ate 80000 reais", user_id=1)
self.assertEqual(response, "Estoque reutilizado do primeiro router.")
async def test_handle_message_uses_turn_bundle_when_available(self):
service = self._build_service()
bundle_calls = []
@ -5264,6 +5502,124 @@ class OrquestradorLatencyOptimizationTests(unittest.IsolatedAsyncioTestCase):
self.assertEqual(len(bundle_calls), 1)
self.assertEqual(response, "Fluxo de venda continuado.")
async def test_handle_message_reuses_partial_bundle_turn_decision_when_message_plan_is_missing(self):
service = self._build_service()
turn_decision_calls = []
message_plan_calls = []
async def fake_extract_turn_bundle(message: str, user_id: int | None):
return {
"turn_decision": {
"intent": "order_create",
"domain": "sales",
"action": "ask_missing_fields",
"entities": {
"generic_memory": {"orcamento_max": 70000},
"review_fields": {},
"review_management_fields": {},
"order_fields": {},
"cancel_order_fields": {},
},
"missing_fields": ["modelo_veiculo"],
"selection_index": None,
"tool_name": None,
"tool_arguments": {},
"response_to_user": None,
},
"message_plan": service.normalizer.empty_message_plan(message),
"has_turn_decision": True,
"has_message_plan": False,
}
async def should_not_run_turn_decision(message: str, user_id: int | None):
turn_decision_calls.append((message, user_id))
raise AssertionError("nao deveria consultar turn_decision legado quando o bundle ja trouxe decisao util")
async def fake_extract_message_plan(message: str, user_id: int | None):
message_plan_calls.append((message, user_id))
return {
"orders": [
{
"domain": "sales",
"message": message,
"entities": service.normalizer.empty_extraction_payload(),
}
]
}
async def fake_try_collect_and_create_order(**kwargs):
return "Fluxo de venda continuado."
service._extract_turn_bundle_with_llm = fake_extract_turn_bundle
service._extract_turn_decision_with_llm = should_not_run_turn_decision
service._extract_message_plan_with_llm = fake_extract_message_plan
service._try_collect_and_create_order = fake_try_collect_and_create_order
response = await service.handle_message("quero comprar um carro ate 70 mil", user_id=1)
self.assertEqual(len(turn_decision_calls), 0)
self.assertEqual(len(message_plan_calls), 1)
self.assertEqual(response, "Fluxo de venda continuado.")
async def test_handle_message_reuses_partial_bundle_message_plan_when_turn_decision_is_missing(self):
service = self._build_service()
turn_decision_calls = []
message_plan_calls = []
async def fake_extract_turn_bundle(message: str, user_id: int | None):
return {
"turn_decision": service.normalizer.empty_turn_decision(),
"message_plan": {
"orders": [
{
"domain": "sales",
"message": message,
"entities": service.normalizer.empty_extraction_payload(),
}
]
},
"has_turn_decision": False,
"has_message_plan": True,
}
async def fake_extract_turn_decision(message: str, user_id: int | None):
turn_decision_calls.append((message, user_id))
return {
"intent": "order_create",
"domain": "sales",
"action": "ask_missing_fields",
"entities": {
"generic_memory": {"orcamento_max": 70000},
"review_fields": {},
"review_management_fields": {},
"order_fields": {},
"cancel_order_fields": {},
},
"missing_fields": ["modelo_veiculo"],
"selection_index": None,
"tool_name": None,
"tool_arguments": {},
"response_to_user": None,
}
async def should_not_run_message_plan(message: str, user_id: int | None):
message_plan_calls.append((message, user_id))
raise AssertionError("nao deveria consultar message_plan legado quando o bundle ja trouxe plano util")
async def fake_try_collect_and_create_order(**kwargs):
return "Fluxo de venda continuado."
service._extract_turn_bundle_with_llm = fake_extract_turn_bundle
service._extract_turn_decision_with_llm = fake_extract_turn_decision
service._extract_message_plan_with_llm = should_not_run_message_plan
service._try_collect_and_create_order = fake_try_collect_and_create_order
response = await service.handle_message("quero comprar um carro ate 70 mil", user_id=1)
self.assertEqual(len(turn_decision_calls), 1)
self.assertEqual(len(message_plan_calls), 0)
self.assertEqual(response, "Fluxo de venda continuado.")
async def test_handle_message_falls_back_to_legacy_turn_decision_and_message_plan_when_bundle_is_incomplete(self):
service = self._build_service()
bundle_calls = []

Loading…
Cancel
Save