feat(02-02): wire tool-call loop into agent runner and orchestrator pipeline
- runner.py: multi-turn tool-call loop (LLM -> tool -> observe -> respond) - runner.py: max 5 iterations guard against runaway tool chains - runner.py: confirmation gate — returns confirmation msg, stops loop - runner.py: audit logging for every LLM call via audit_logger - tasks.py: AuditLogger initialized at task start with session factory - tasks.py: tool registry built from agent.tool_assignments - tasks.py: pending tool confirmation flow via Redis (10 min TTL) - tasks.py: memory persistence skipped for confirmation request responses - llm-pool/router.py: LLMResponse model with content + tool_calls fields - llm-pool/router.py: tools parameter forwarded to litellm.acompletion() - llm-pool/main.py: CompleteRequest accepts optional tools list - llm-pool/main.py: CompleteResponse includes tool_calls field - Migration renamed to 004 (003 was already taken by escalation migration) - [Rule 1 - Bug] Renamed 003_phase2_audit_kb.py -> 004 to fix duplicate revision ID
This commit is contained in:
@@ -9,6 +9,7 @@ Endpoints:
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from fastapi import FastAPI
|
||||
from pydantic import BaseModel
|
||||
@@ -41,6 +42,12 @@ class CompleteRequest(BaseModel):
|
||||
tenant_id: str
|
||||
"""Konstruct tenant UUID for cost tracking."""
|
||||
|
||||
tools: list[dict] | None = None
|
||||
"""
|
||||
Optional OpenAI function-calling tool definitions.
|
||||
When provided, the LLM may return tool_calls instead of text content.
|
||||
"""
|
||||
|
||||
|
||||
class UsageInfo(BaseModel):
|
||||
prompt_tokens: int = 0
|
||||
@@ -51,6 +58,11 @@ class CompleteResponse(BaseModel):
|
||||
content: str
|
||||
model: str
|
||||
usage: UsageInfo
|
||||
tool_calls: list[dict[str, Any]] = []
|
||||
"""
|
||||
Tool calls returned by the LLM, in OpenAI format.
|
||||
Non-empty when the LLM decided to use a tool instead of responding with text.
|
||||
"""
|
||||
|
||||
|
||||
class HealthResponse(BaseModel):
|
||||
@@ -77,23 +89,29 @@ async def complete_endpoint(request: CompleteRequest) -> CompleteResponse:
|
||||
LiteLLM handles provider selection, retries, and cross-group fallback
|
||||
automatically.
|
||||
|
||||
When `tools` are provided, the LLM may return tool_calls instead of text.
|
||||
The response includes both `content` and `tool_calls` fields — exactly one
|
||||
will be populated depending on whether the LLM chose to use a tool.
|
||||
|
||||
Returns 503 JSON if all providers (including fallbacks) are unavailable.
|
||||
"""
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
try:
|
||||
content = await router_complete(
|
||||
llm_response = await router_complete(
|
||||
model_group=request.model,
|
||||
messages=request.messages,
|
||||
tenant_id=request.tenant_id,
|
||||
tools=request.tools,
|
||||
)
|
||||
# LiteLLM Router doesn't expose per-call usage easily via acompletion
|
||||
# on all provider paths; we return zeroed usage for now and will wire
|
||||
# real token counts in a follow-up plan when cost tracking is added.
|
||||
return CompleteResponse(
|
||||
content=content,
|
||||
content=llm_response.content,
|
||||
model=request.model,
|
||||
usage=UsageInfo(),
|
||||
tool_calls=llm_response.tool_calls,
|
||||
)
|
||||
except Exception:
|
||||
logger.exception(
|
||||
|
||||
@@ -16,6 +16,7 @@ NOTE: LiteLLM is pinned to ==1.82.5 in pyproject.toml.
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from litellm import Router
|
||||
|
||||
@@ -66,11 +67,26 @@ llm_router = Router(
|
||||
)
|
||||
|
||||
|
||||
class LLMResponse:
|
||||
"""
|
||||
Container for LLM completion response.
|
||||
|
||||
Attributes:
|
||||
content: Text content of the response (empty string if tool_calls present).
|
||||
tool_calls: List of tool call dicts in OpenAI format, or empty list.
|
||||
"""
|
||||
|
||||
def __init__(self, content: str, tool_calls: list[dict[str, Any]]) -> None:
|
||||
self.content = content
|
||||
self.tool_calls = tool_calls
|
||||
|
||||
|
||||
async def complete(
|
||||
model_group: str,
|
||||
messages: list[dict],
|
||||
tenant_id: str,
|
||||
) -> str:
|
||||
tools: list[dict] | None = None,
|
||||
) -> LLMResponse:
|
||||
"""
|
||||
Request a completion from the LiteLLM Router.
|
||||
|
||||
@@ -80,20 +96,50 @@ async def complete(
|
||||
[{"role": "system", "content": "..."}, {"role": "user", "content": "..."}]
|
||||
tenant_id: Konstruct tenant UUID, attached to LiteLLM metadata for
|
||||
per-tenant cost tracking.
|
||||
tools: Optional list of OpenAI function-calling tool dicts. When provided,
|
||||
the LLM may return tool_calls instead of text content.
|
||||
|
||||
Returns:
|
||||
The model's response content as a plain string.
|
||||
LLMResponse with content (text) and tool_calls (list of tool call dicts).
|
||||
- If LLM returns text: content is non-empty, tool_calls is empty.
|
||||
- If LLM returns tool calls: content is empty, tool_calls contains calls.
|
||||
|
||||
Raises:
|
||||
Exception: Propagated if all providers in the group (and fallbacks) fail.
|
||||
"""
|
||||
logger.info("LLM request", extra={"model_group": model_group, "tenant_id": tenant_id})
|
||||
|
||||
response = await llm_router.acompletion(
|
||||
model=model_group,
|
||||
messages=messages,
|
||||
metadata={"tenant_id": tenant_id},
|
||||
)
|
||||
kwargs: dict[str, Any] = {
|
||||
"model": model_group,
|
||||
"messages": messages,
|
||||
"metadata": {"tenant_id": tenant_id},
|
||||
}
|
||||
if tools:
|
||||
kwargs["tools"] = tools
|
||||
|
||||
content: str = response.choices[0].message.content or ""
|
||||
return content
|
||||
response = await llm_router.acompletion(**kwargs)
|
||||
|
||||
choice = response.choices[0]
|
||||
message = choice.message
|
||||
|
||||
# Extract tool_calls if present
|
||||
raw_tool_calls = getattr(message, "tool_calls", None) or []
|
||||
tool_calls: list[dict[str, Any]] = []
|
||||
for tc in raw_tool_calls:
|
||||
# LiteLLM returns tool calls as objects with .id, .function.name, .function.arguments
|
||||
try:
|
||||
tool_calls.append({
|
||||
"id": tc.id,
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": tc.function.name,
|
||||
"arguments": tc.function.arguments,
|
||||
},
|
||||
})
|
||||
except AttributeError:
|
||||
# Fallback: if it's already a dict (some providers)
|
||||
if isinstance(tc, dict):
|
||||
tool_calls.append(tc)
|
||||
|
||||
content: str = message.content or ""
|
||||
return LLMResponse(content=content, tool_calls=tool_calls)
|
||||
|
||||
Reference in New Issue
Block a user