feat(streaming): add run_agent_streaming() to orchestrator runner
- run_agent_streaming() calls POST /complete/stream and yields token strings - Reads NDJSON lines from the streaming response, yields on 'chunk' events - On 'error' or connection failure, yields the fallback response string - Tool calls are not supported in the streaming path - Existing run_agent() (non-streaming, tool-call loop) is unchanged
This commit is contained in:
@@ -16,13 +16,20 @@ Tool-call loop (Phase 2):
|
||||
- Otherwise: append tool result as 'tool' role message, re-call LLM
|
||||
Loop until LLM returns plain text (no tool_calls) or max_iterations reached.
|
||||
Max iterations = 5 (prevents runaway tool chains).
|
||||
|
||||
Streaming (web channel only):
|
||||
run_agent_streaming() calls POST /complete/stream and yields token strings.
|
||||
Tool calls are NOT supported in the streaming path — only used for the final
|
||||
text response after the tool-call loop has resolved (or when no tools needed).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
import uuid
|
||||
from collections.abc import AsyncGenerator
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
import httpx
|
||||
@@ -268,6 +275,99 @@ async def run_agent(
|
||||
return _FALLBACK_RESPONSE
|
||||
|
||||
|
||||
async def run_agent_streaming(
|
||||
msg: KonstructMessage,
|
||||
agent: Agent,
|
||||
messages: list[dict] | None = None,
|
||||
) -> AsyncGenerator[str, None]:
|
||||
"""
|
||||
Stream the final LLM response for an agent, yielding token strings.
|
||||
|
||||
This is the web-channel-only streaming path. It does NOT support tool calls —
|
||||
tool resolution must be completed BEFORE calling this function (use run_agent()
|
||||
for the tool-call loop, then only call this for streaming plain text responses).
|
||||
|
||||
The caller (tasks._process_message) is responsible for:
|
||||
- Running the tool-call loop via run_agent() first if tools are registered
|
||||
- Only falling into this path when the response will be plain text
|
||||
- Publishing each yielded token to Redis pub-sub
|
||||
|
||||
For simple conversations (no tools), this streams the response directly.
|
||||
|
||||
Args:
|
||||
msg: The inbound KonstructMessage.
|
||||
agent: The ORM Agent instance.
|
||||
messages: Memory-enriched messages array (from build_messages_with_memory).
|
||||
When None, falls back to simple [system, user] construction.
|
||||
|
||||
Yields:
|
||||
Token strings as generated by the LLM.
|
||||
On error, yields the fallback response string as a single chunk.
|
||||
"""
|
||||
if messages is None:
|
||||
from orchestrator.agents.builder import build_messages, build_system_prompt
|
||||
system_prompt = build_system_prompt(agent)
|
||||
user_text: str = msg.content.text or ""
|
||||
messages = build_messages(
|
||||
system_prompt=system_prompt,
|
||||
user_message=user_text,
|
||||
)
|
||||
|
||||
llm_stream_url = f"{settings.llm_pool_url}/complete/stream"
|
||||
payload: dict[str, Any] = {
|
||||
"model": agent.model_preference,
|
||||
"messages": messages,
|
||||
"tenant_id": str(msg.tenant_id) if msg.tenant_id else "",
|
||||
}
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=_LLM_TIMEOUT) as client:
|
||||
async with client.stream("POST", llm_stream_url, json=payload) as response:
|
||||
if response.status_code != 200:
|
||||
logger.error(
|
||||
"LLM pool stream returned %d for tenant=%s agent=%s",
|
||||
response.status_code,
|
||||
msg.tenant_id,
|
||||
agent.id,
|
||||
)
|
||||
yield _FALLBACK_RESPONSE
|
||||
return
|
||||
|
||||
async for line in response.aiter_lines():
|
||||
if not line.strip():
|
||||
continue
|
||||
try:
|
||||
event = json.loads(line)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
event_type = event.get("type")
|
||||
if event_type == "chunk":
|
||||
token = event.get("text", "")
|
||||
if token:
|
||||
yield token
|
||||
elif event_type == "done":
|
||||
return
|
||||
elif event_type == "error":
|
||||
logger.error(
|
||||
"LLM pool stream error for tenant=%s agent=%s: %s",
|
||||
msg.tenant_id,
|
||||
agent.id,
|
||||
event.get("message", "unknown"),
|
||||
)
|
||||
yield _FALLBACK_RESPONSE
|
||||
return
|
||||
|
||||
except httpx.RequestError:
|
||||
logger.exception(
|
||||
"LLM pool stream unreachable for tenant=%s agent=%s url=%s",
|
||||
msg.tenant_id,
|
||||
agent.id,
|
||||
llm_stream_url,
|
||||
)
|
||||
yield _FALLBACK_RESPONSE
|
||||
|
||||
|
||||
def _get_last_user_message(messages: list[dict[str, Any]]) -> str:
|
||||
"""Extract the content of the last user message for audit summary."""
|
||||
for msg in reversed(messages):
|
||||
|
||||
Reference in New Issue
Block a user