import asyncio import json from typing import Dict, Any, List, Optional import vertexai from google.api_core.exceptions import NotFound from vertexai.generative_models import FunctionDeclaration, GenerativeModel, Tool from app.core.settings import settings from app.models.tool_model import ToolDefinition class LLMService: _vertex_initialized = False _models: dict[str, GenerativeModel] = {} _vertex_tools_cache: dict[str, Optional[List[Tool]]] = {} def __init__(self): """Inicializa o cliente Vertex AI e define modelos de fallback.""" if not LLMService._vertex_initialized: vertexai.init( project=settings.google_project_id, location=settings.google_location, ) LLMService._vertex_initialized = True configured = settings.vertex_model_name.strip() fallback_models = ["gemini-2.5-flash", "gemini-2.0-flash-001", "gemini-1.5-pro"] self.model_names = [configured] + [m for m in fallback_models if m != configured] def build_vertex_tools(self, tools: List[ToolDefinition]) -> Optional[List[Tool]]: """Converte tools internas para o formato esperado pelo Vertex AI.""" # Vertex espera uma lista de Tool, com function_declarations agrupadas em um unico Tool. if not tools: return None cache_key = json.dumps( [ { "name": tool.name, "description": tool.description, "parameters": tool.parameters, } for tool in tools ], sort_keys=True, ensure_ascii=True, separators=(",", ":"), ) cached = LLMService._vertex_tools_cache.get(cache_key) if cached is not None: return cached function_declarations = [ FunctionDeclaration( name=tool.name, description=tool.description, parameters=tool.parameters, ) for tool in tools ] vertex_tools = [Tool(function_declarations=function_declarations)] LLMService._vertex_tools_cache[cache_key] = vertex_tools return vertex_tools def _get_model(self, model_name: str) -> GenerativeModel: model = LLMService._models.get(model_name) if model is None: model = GenerativeModel(model_name) LLMService._models[model_name] = model return model async def generate_response( self, message: str, tools: List[ToolDefinition], history: List[Dict[str, Any]] = None, ) -> Dict[str, Any]: """Gera resposta textual ou chamada de tool a partir da mensagem do usuario.""" vertex_tools = self.build_vertex_tools(tools) response = None last_error = None for model_name in self.model_names: try: model = self._get_model(model_name) chat = model.start_chat(history=history or []) send_kwargs = {"tools": vertex_tools} if vertex_tools else {} response = await asyncio.to_thread(chat.send_message, message, **send_kwargs) break except NotFound as err: last_error = err LLMService._models.pop(model_name, None) continue if response is None: if last_error: raise RuntimeError( f"Nenhum modelo Vertex disponivel. Verifique VERTEX_MODEL_NAME e acesso no projeto. Erro: {last_error}" ) from last_error raise RuntimeError("Falha ao gerar resposta no Vertex AI.") part = response.candidates[0].content.parts[0] if part.function_call: return { "response": None, "tool_call": { "name": part.function_call.name, "arguments": dict(part.function_call.args), }, } return { "response": response.text, "tool_call": None, } async def warmup(self) -> None: """Preaquece conexao/modelo para reduzir latencia da primeira requisicao real.""" try: await self.generate_response( message="Responda apenas: ok", tools=[], ) except Exception: # Warmup e melhor esforco; falhas nao devem bloquear inicializacao. return