You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
135 lines
4.7 KiB
Python
135 lines
4.7 KiB
Python
import asyncio
|
|
import json
|
|
from typing import Dict, Any, List, Optional
|
|
|
|
import vertexai
|
|
from google.api_core.exceptions import NotFound
|
|
from vertexai.generative_models import FunctionDeclaration, GenerativeModel, Tool
|
|
|
|
from app.core.settings import settings
|
|
from app.models.tool_model import ToolDefinition
|
|
|
|
|
|
# Essa classe encapsula a integracao com o Vertex AI:
|
|
# inicializacao, cache de modelos e serializacao das tools.
|
|
class LLMService:
|
|
_vertex_initialized = False
|
|
_models: dict[str, GenerativeModel] = {}
|
|
_vertex_tools_cache: dict[str, Optional[List[Tool]]] = {}
|
|
|
|
def __init__(self):
|
|
"""Inicializa o cliente Vertex AI e define modelos de fallback."""
|
|
if not LLMService._vertex_initialized:
|
|
vertexai.init(
|
|
project=settings.google_project_id,
|
|
location=settings.google_location,
|
|
)
|
|
LLMService._vertex_initialized = True
|
|
|
|
configured = settings.vertex_model_name.strip()
|
|
fallback_models = ["gemini-2.5-flash", "gemini-2.0-flash-001", "gemini-1.5-pro"]
|
|
self.model_names = [configured] + [m for m in fallback_models if m != configured]
|
|
|
|
def build_vertex_tools(self, tools: List[ToolDefinition]) -> Optional[List[Tool]]:
|
|
"""Converte tools internas para o formato esperado pelo Vertex AI."""
|
|
# Vertex espera uma lista de Tool, com function_declarations agrupadas em um unico Tool.
|
|
if not tools:
|
|
return None
|
|
|
|
cache_key = json.dumps(
|
|
[
|
|
{
|
|
"name": tool.name,
|
|
"description": tool.description,
|
|
"parameters": tool.parameters,
|
|
}
|
|
for tool in tools
|
|
],
|
|
sort_keys=True,
|
|
ensure_ascii=True,
|
|
separators=(",", ":"),
|
|
)
|
|
cached = LLMService._vertex_tools_cache.get(cache_key)
|
|
if cached is not None:
|
|
return cached
|
|
|
|
function_declarations = [
|
|
FunctionDeclaration(
|
|
name=tool.name,
|
|
description=tool.description,
|
|
parameters=tool.parameters,
|
|
)
|
|
for tool in tools
|
|
]
|
|
|
|
vertex_tools = [Tool(function_declarations=function_declarations)]
|
|
LLMService._vertex_tools_cache[cache_key] = vertex_tools
|
|
return vertex_tools
|
|
|
|
def _get_model(self, model_name: str) -> GenerativeModel:
|
|
model = LLMService._models.get(model_name)
|
|
if model is None:
|
|
model = GenerativeModel(model_name)
|
|
LLMService._models[model_name] = model
|
|
return model
|
|
|
|
async def generate_response(
|
|
self,
|
|
message: str,
|
|
tools: List[ToolDefinition],
|
|
history: List[Dict[str, Any]] = None,
|
|
) -> Dict[str, Any]:
|
|
"""Gera resposta textual ou chamada de tool a partir da mensagem do usuario."""
|
|
vertex_tools = self.build_vertex_tools(tools)
|
|
|
|
response = None
|
|
last_error = None
|
|
|
|
# Tenta o modelo configurado e cai para nomes alternativos
|
|
# quando o principal nao estiver disponivel no projeto/regiao.
|
|
for model_name in self.model_names:
|
|
try:
|
|
model = self._get_model(model_name)
|
|
chat = model.start_chat(history=history or [])
|
|
send_kwargs = {"tools": vertex_tools} if vertex_tools else {}
|
|
response = await asyncio.to_thread(chat.send_message, message, **send_kwargs)
|
|
break
|
|
except NotFound as err:
|
|
last_error = err
|
|
LLMService._models.pop(model_name, None)
|
|
continue
|
|
|
|
if response is None:
|
|
if last_error:
|
|
raise RuntimeError(
|
|
f"Nenhum modelo Vertex disponivel. Verifique VERTEX_MODEL_NAME e acesso no projeto. Erro: {last_error}"
|
|
) from last_error
|
|
raise RuntimeError("Falha ao gerar resposta no Vertex AI.")
|
|
|
|
part = response.candidates[0].content.parts[0]
|
|
|
|
if part.function_call:
|
|
return {
|
|
"response": None,
|
|
"tool_call": {
|
|
"name": part.function_call.name,
|
|
"arguments": dict(part.function_call.args),
|
|
},
|
|
}
|
|
|
|
return {
|
|
"response": response.text,
|
|
"tool_call": None,
|
|
}
|
|
|
|
async def warmup(self) -> None:
|
|
"""Preaquece conexao/modelo para reduzir latencia da primeira requisicao real."""
|
|
try:
|
|
await self.generate_response(
|
|
message="Responda apenas: ok",
|
|
tools=[],
|
|
)
|
|
except Exception:
|
|
# Warmup e melhor esforco; falhas nao devem bloquear inicializacao.
|
|
return
|