feat(02-02): wire tool-call loop into agent runner and orchestrator pipeline

- runner.py: multi-turn tool-call loop (LLM -> tool -> observe -> respond) - runner.py: max 5 iterations guard against runaway tool chains - runner.py: confirmation gate — returns confirmation msg, stops loop - runner.py: audit logging for every LLM call via audit_logger - tasks.py: AuditLogger initialized at task start with session factory - tasks.py: tool registry built from agent.tool_assignments - tasks.py: pending tool confirmation flow via Redis (10 min TTL) - tasks.py: memory persistence skipped for confirmation request responses - llm-pool/router.py: LLMResponse model with content + tool_calls fields - llm-pool/router.py: tools parameter forwarded to litellm.acompletion() - llm-pool/main.py: CompleteRequest accepts optional tools list - llm-pool/main.py: CompleteResponse includes tool_calls field - Migration renamed to 004 (003 was already taken by escalation migration) - [Rule 1 - Bug] Renamed 003_phase2_audit_kb.py -> 004 to fix duplicate revision ID
2026-03-23 15:00:17 -06:00
parent d1bcdef0f5
commit 44fa7e6845
5 changed files with 671 additions and 237 deletions
--- a/packages/llm-pool/llm_pool/main.py
+++ b/packages/llm-pool/llm_pool/main.py
@@ -9,6 +9,7 @@ Endpoints:
 from __future__ import annotations

 import logging
+from typing import Any

 from fastapi import FastAPI
 from pydantic import BaseModel
@@ -41,6 +42,12 @@ class CompleteRequest(BaseModel):
    tenant_id: str
    """Konstruct tenant UUID for cost tracking."""

+    tools: list[dict] | None = None
+    """
+    Optional OpenAI function-calling tool definitions.
+    When provided, the LLM may return tool_calls instead of text content.
+    """
+

 class UsageInfo(BaseModel):
    prompt_tokens: int = 0
@@ -51,6 +58,11 @@ class CompleteResponse(BaseModel):
    content: str
    model: str
    usage: UsageInfo
+    tool_calls: list[dict[str, Any]] = []
+    """
+    Tool calls returned by the LLM, in OpenAI format.
+    Non-empty when the LLM decided to use a tool instead of responding with text.
+    """


 class HealthResponse(BaseModel):
@@ -77,23 +89,29 @@ async def complete_endpoint(request: CompleteRequest) -> CompleteResponse:
    LiteLLM handles provider selection, retries, and cross-group fallback
    automatically.

+    When `tools` are provided, the LLM may return tool_calls instead of text.
+    The response includes both `content` and `tool_calls` fields — exactly one
+    will be populated depending on whether the LLM chose to use a tool.
+
    Returns 503 JSON if all providers (including fallbacks) are unavailable.
    """
    from fastapi.responses import JSONResponse

    try:
-        content = await router_complete(
+        llm_response = await router_complete(
            model_group=request.model,
            messages=request.messages,
            tenant_id=request.tenant_id,
+            tools=request.tools,
        )
        # LiteLLM Router doesn't expose per-call usage easily via acompletion
        # on all provider paths; we return zeroed usage for now and will wire
        # real token counts in a follow-up plan when cost tracking is added.
        return CompleteResponse(
-            content=content,
+            content=llm_response.content,
            model=request.model,
            usage=UsageInfo(),
+            tool_calls=llm_response.tool_calls,
        )
    except Exception:
        logger.exception(
--- a/packages/llm-pool/llm_pool/router.py
+++ b/packages/llm-pool/llm_pool/router.py
@@ -16,6 +16,7 @@ NOTE: LiteLLM is pinned to ==1.82.5 in pyproject.toml.
 from __future__ import annotations

 import logging
+from typing import Any

 from litellm import Router

@@ -66,11 +67,26 @@ llm_router = Router(
 )


+class LLMResponse:
+    """
+    Container for LLM completion response.
+
+    Attributes:
+        content:    Text content of the response (empty string if tool_calls present).
+        tool_calls: List of tool call dicts in OpenAI format, or empty list.
+    """
+
+    def __init__(self, content: str, tool_calls: list[dict[str, Any]]) -> None:
+        self.content = content
+        self.tool_calls = tool_calls
+
+
 async def complete(
    model_group: str,
    messages: list[dict],
    tenant_id: str,
-) -> str:
+    tools: list[dict] | None = None,
+) -> LLMResponse:
    """
    Request a completion from the LiteLLM Router.

@@ -80,20 +96,50 @@ async def complete(
                     [{"role": "system", "content": "..."}, {"role": "user", "content": "..."}]
        tenant_id:   Konstruct tenant UUID, attached to LiteLLM metadata for
                     per-tenant cost tracking.
+        tools:       Optional list of OpenAI function-calling tool dicts. When provided,
+                     the LLM may return tool_calls instead of text content.

    Returns:
-        The model's response content as a plain string.
+        LLMResponse with content (text) and tool_calls (list of tool call dicts).
+        - If LLM returns text: content is non-empty, tool_calls is empty.
+        - If LLM returns tool calls: content is empty, tool_calls contains calls.

    Raises:
        Exception: Propagated if all providers in the group (and fallbacks) fail.
    """
    logger.info("LLM request", extra={"model_group": model_group, "tenant_id": tenant_id})

-    response = await llm_router.acompletion(
-        model=model_group,
-        messages=messages,
-        metadata={"tenant_id": tenant_id},
-    )
+    kwargs: dict[str, Any] = {
+        "model": model_group,
+        "messages": messages,
+        "metadata": {"tenant_id": tenant_id},
+    }
+    if tools:
+        kwargs["tools"] = tools

-    content: str = response.choices[0].message.content or ""
-    return content
+    response = await llm_router.acompletion(**kwargs)
+
+    choice = response.choices[0]
+    message = choice.message
+
+    # Extract tool_calls if present
+    raw_tool_calls = getattr(message, "tool_calls", None) or []
+    tool_calls: list[dict[str, Any]] = []
+    for tc in raw_tool_calls:
+        # LiteLLM returns tool calls as objects with .id, .function.name, .function.arguments
+        try:
+            tool_calls.append({
+                "id": tc.id,
+                "type": "function",
+                "function": {
+                    "name": tc.function.name,
+                    "arguments": tc.function.arguments,
+                },
+            })
+        except AttributeError:
+            # Fallback: if it's already a dict (some providers)
+            if isinstance(tc, dict):
+                tool_calls.append(tc)
+
+    content: str = message.content or ""
+    return LLMResponse(content=content, tool_calls=tool_calls)