feat(02-02): wire tool-call loop into agent runner and orchestrator pipeline

- runner.py: multi-turn tool-call loop (LLM -> tool -> observe -> respond)
- runner.py: max 5 iterations guard against runaway tool chains
- runner.py: confirmation gate — returns confirmation msg, stops loop
- runner.py: audit logging for every LLM call via audit_logger
- tasks.py: AuditLogger initialized at task start with session factory
- tasks.py: tool registry built from agent.tool_assignments
- tasks.py: pending tool confirmation flow via Redis (10 min TTL)
- tasks.py: memory persistence skipped for confirmation request responses
- llm-pool/router.py: LLMResponse model with content + tool_calls fields
- llm-pool/router.py: tools parameter forwarded to litellm.acompletion()
- llm-pool/main.py: CompleteRequest accepts optional tools list
- llm-pool/main.py: CompleteResponse includes tool_calls field
- Migration renamed to 004 (003 was already taken by escalation migration)
- [Rule 1 - Bug] Renamed 003_phase2_audit_kb.py -> 004 to fix duplicate revision ID
This commit is contained in:
2026-03-23 15:00:17 -06:00
parent d1bcdef0f5
commit 44fa7e6845
5 changed files with 671 additions and 237 deletions

View File

@@ -9,6 +9,7 @@ Endpoints:
from __future__ import annotations
import logging
from typing import Any
from fastapi import FastAPI
from pydantic import BaseModel
@@ -41,6 +42,12 @@ class CompleteRequest(BaseModel):
tenant_id: str
"""Konstruct tenant UUID for cost tracking."""
tools: list[dict] | None = None
"""
Optional OpenAI function-calling tool definitions.
When provided, the LLM may return tool_calls instead of text content.
"""
class UsageInfo(BaseModel):
prompt_tokens: int = 0
@@ -51,6 +58,11 @@ class CompleteResponse(BaseModel):
content: str
model: str
usage: UsageInfo
tool_calls: list[dict[str, Any]] = []
"""
Tool calls returned by the LLM, in OpenAI format.
Non-empty when the LLM decided to use a tool instead of responding with text.
"""
class HealthResponse(BaseModel):
@@ -77,23 +89,29 @@ async def complete_endpoint(request: CompleteRequest) -> CompleteResponse:
LiteLLM handles provider selection, retries, and cross-group fallback
automatically.
When `tools` are provided, the LLM may return tool_calls instead of text.
The response includes both `content` and `tool_calls` fields — exactly one
will be populated depending on whether the LLM chose to use a tool.
Returns 503 JSON if all providers (including fallbacks) are unavailable.
"""
from fastapi.responses import JSONResponse
try:
content = await router_complete(
llm_response = await router_complete(
model_group=request.model,
messages=request.messages,
tenant_id=request.tenant_id,
tools=request.tools,
)
# LiteLLM Router doesn't expose per-call usage easily via acompletion
# on all provider paths; we return zeroed usage for now and will wire
# real token counts in a follow-up plan when cost tracking is added.
return CompleteResponse(
content=content,
content=llm_response.content,
model=request.model,
usage=UsageInfo(),
tool_calls=llm_response.tool_calls,
)
except Exception:
logger.exception(