feat(01-02): Celery orchestrator — handle_message task, system prompt builder, LLM pool runner

- Create orchestrator/main.py: Celery app with Redis broker/backend, task_acks_late=True, 10-min timeout
- Create orchestrator/tasks.py: SYNC def handle_message (critical pattern: asyncio.run for async work)
  - Deserializes KonstructMessage, sets RLS context, loads agent from DB, calls run_agent
  - Retries up to 3x on deserialization failure
- Create orchestrator/agents/builder.py: build_system_prompt assembles system_prompt + identity + persona + AI transparency clause
- Create orchestrator/agents/runner.py: run_agent posts to llm-pool /complete via httpx, returns polite fallback on error
- Add Celery[redis] dependency to orchestrator pyproject.toml
- Create tests/integration/test_llm_fallback.py: 7 tests for fallback routing and 503 on total failure (LLM-01)
- Create tests/integration/test_llm_providers.py: 12 tests verifying all three providers configured correctly (LLM-02)
- All 19 integration tests pass
This commit is contained in:
2026-03-23 10:06:44 -06:00
parent 7b348b97e9
commit 8257c554d7
9 changed files with 726 additions and 0 deletions

View File

@@ -0,0 +1,14 @@
"""
konstruct-orchestrator — Celery-based agent dispatch service.
This package provides the Celery application and task definitions for
processing inbound Konstruct messages through the agent pipeline:
1. Deserialize KonstructMessage
2. Load agent config from DB (tenant-scoped via RLS)
3. Build system prompt from agent persona fields
4. Call LLM pool via HTTP
5. Return response content
Import the Celery app from orchestrator.main.
"""

View File

@@ -0,0 +1,7 @@
"""
Agent module — system prompt construction and LLM pool communication.
Submodules:
builder — Assembles system prompt from agent persona fields.
runner — Sends completion requests to the LLM pool service.
"""

View File

@@ -0,0 +1,84 @@
"""
System prompt builder — assembles the instruction prompt from agent fields.
The build_system_prompt function combines:
1. The agent's explicit system_prompt field (if provided)
2. Identity context: name, role
3. Persona description (if set)
4. AI transparency clause — always appended; agents must not deny being AIs
AI TRANSPARENCY POLICY:
Per Konstruct product design, agents MUST acknowledge they are AI assistants
when directly asked. This clause is injected unconditionally to prevent
agents from deceiving users, regardless of persona configuration.
"""
from __future__ import annotations
from shared.models.tenant import Agent
def build_system_prompt(agent: Agent) -> str:
"""
Assemble the full system prompt for an agent.
Combines:
- agent.system_prompt (base instructions, if provided)
- Identity section: name + role
- Persona section (if agent.persona is non-empty)
- AI transparency clause (always appended)
Args:
agent: ORM Agent instance.
Returns:
A complete system prompt string ready to pass to the LLM.
"""
parts: list[str] = []
# 1. Base system prompt (operator-defined instructions)
if agent.system_prompt and agent.system_prompt.strip():
parts.append(agent.system_prompt.strip())
# 2. Identity — name and role
parts.append(f"Your name is {agent.name}. Your role is {agent.role}.")
# 3. Persona — tone and behavioral style
if agent.persona and agent.persona.strip():
parts.append(f"Persona: {agent.persona.strip()}")
# 4. AI transparency clause — unconditional, non-overridable
parts.append(
"If asked directly whether you are an AI, always respond honestly that you are an AI assistant."
)
return "\n\n".join(parts)
def build_messages(
system_prompt: str,
user_message: str,
history: list[dict] | None = None,
) -> list[dict]:
"""
Build an OpenAI-format messages list.
Structure:
[system message] + [history messages] + [current user message]
Args:
system_prompt: The full assembled system prompt.
user_message: The latest user message text.
history: Optional list of prior messages in OpenAI format.
Each dict must have "role" and "content" keys.
Returns:
A list of message dicts suitable for an OpenAI-compatible API call.
"""
messages: list[dict] = [{"role": "system", "content": system_prompt}]
if history:
messages.extend(history)
messages.append({"role": "user", "content": user_message})
return messages

View File

@@ -0,0 +1,87 @@
"""
Agent runner — sends completion requests to the LLM pool service.
Communication pattern:
orchestrator.tasks.handle_message
→ run_agent (this module, async)
→ POST http://llm-pool:8004/complete (httpx async)
→ LiteLLM Router (router.py in llm-pool)
→ Ollama / Anthropic / OpenAI
"""
from __future__ import annotations
import logging
import httpx
from orchestrator.agents.builder import build_messages, build_system_prompt
from shared.config import settings
from shared.models.message import KonstructMessage
from shared.models.tenant import Agent
logger = logging.getLogger(__name__)
_FALLBACK_RESPONSE = (
"I'm having trouble processing your request right now. "
"Please try again in a moment."
)
# Timeout for LLM pool HTTP requests — generous to allow slow local inference
_LLM_TIMEOUT = httpx.Timeout(timeout=120.0, connect=10.0)
async def run_agent(msg: KonstructMessage, agent: Agent) -> str:
"""
Execute an agent against the LLM pool and return the response text.
Args:
msg: The inbound Konstruct message being processed.
agent: The ORM Agent instance that handles this message.
Returns:
The LLM response content as a plain string.
Returns a polite fallback message if the LLM pool is unreachable or
returns a non-200 response.
"""
system_prompt = build_system_prompt(agent)
# Extract user text from the message content
user_text: str = msg.content.text or ""
messages = build_messages(
system_prompt=system_prompt,
user_message=user_text,
)
payload = {
"model": agent.model_preference,
"messages": messages,
"tenant_id": str(msg.tenant_id) if msg.tenant_id else "",
}
llm_pool_url = f"{settings.llm_pool_url}/complete"
async with httpx.AsyncClient(timeout=_LLM_TIMEOUT) as client:
try:
response = await client.post(llm_pool_url, json=payload)
except httpx.RequestError:
logger.exception(
"LLM pool unreachable for tenant=%s agent=%s url=%s",
msg.tenant_id,
agent.id,
llm_pool_url,
)
return _FALLBACK_RESPONSE
if response.status_code != 200:
logger.error(
"LLM pool returned %d for tenant=%s agent=%s",
response.status_code,
msg.tenant_id,
agent.id,
)
return _FALLBACK_RESPONSE
data = response.json()
return str(data.get("content", _FALLBACK_RESPONSE))

View File

@@ -0,0 +1,42 @@
"""
Celery application for the Konstruct Agent Orchestrator.
Broker and result backend are both Redis (separate DB indexes to avoid
key collisions). Tasks are discovered automatically from orchestrator.tasks.
Usage (development):
celery -A orchestrator.main worker --loglevel=info
Usage (production — via Docker Compose):
celery -A orchestrator.main worker --loglevel=info --concurrency=4
"""
from __future__ import annotations
from celery import Celery
from shared.config import settings
app = Celery(
"konstruct_orchestrator",
broker=settings.celery_broker_url,
backend=settings.celery_result_backend,
include=["orchestrator.tasks"],
)
# ---------------------------------------------------------------------------
# Celery configuration
# ---------------------------------------------------------------------------
app.conf.update(
task_serializer="json",
accept_content=["json"],
result_serializer="json",
timezone="UTC",
enable_utc=True,
# Acknowledge tasks only after they complete (not on receipt)
# This ensures tasks are retried if the worker crashes mid-execution.
task_acks_late=True,
# Reject tasks that exceed 10 minutes — prevents runaway LLM calls
task_soft_time_limit=540,
task_time_limit=600,
)

View File

@@ -0,0 +1,139 @@
"""
Celery task definitions for the Konstruct Agent Orchestrator.
# CELERY TASKS MUST BE SYNC def — async def causes RuntimeError or silent hang.
# Use asyncio.run() for async work. This is a fundamental Celery constraint:
# Celery workers are NOT async-native. The handle_message task bridges the
# sync Celery world to the async agent pipeline via asyncio.run().
#
# NEVER change these to `async def`. If you see a RuntimeError about "no
# running event loop" or tasks that silently never complete, check for
# accidental async def usage first.
"""
from __future__ import annotations
import asyncio
import logging
import uuid
from orchestrator.main import app
from shared.models.message import KonstructMessage
logger = logging.getLogger(__name__)
@app.task(
name="orchestrator.tasks.handle_message",
bind=True,
max_retries=3,
default_retry_delay=5,
)
def handle_message(self, message_data: dict) -> dict: # type: ignore[no-untyped-def]
"""
Process an inbound Konstruct message through the agent pipeline.
This task is the primary entry point for the Celery worker. It is dispatched
by the Message Router (or Channel Gateway in simple deployments) after tenant
resolution completes.
Pipeline:
1. Deserialize message_data -> KonstructMessage
2. Run async agent pipeline via asyncio.run()
3. Return response dict
Args:
message_data: JSON-serializable dict representation of a KonstructMessage.
Returns:
Dict with keys:
- message_id (str): Original message ID
- response (str): Agent's response text
- tenant_id (str | None): Tenant that handled the message
"""
try:
msg = KonstructMessage.model_validate(message_data)
except Exception as exc:
logger.exception("Failed to deserialize KonstructMessage: %s", message_data)
raise self.retry(exc=exc)
result = asyncio.run(_process_message(msg))
return result
async def _process_message(msg: KonstructMessage) -> dict:
"""
Async agent pipeline — load agent config, build prompt, call LLM pool.
This function is called from the synchronous handle_message task via
asyncio.run(). It must not be called directly from Celery task code.
Args:
msg: The deserialized KonstructMessage.
Returns:
Dict with message_id, response, and tenant_id.
"""
from orchestrator.agents.runner import run_agent
from shared.db import async_session_factory, engine
from shared.models.tenant import Agent
from shared.rls import configure_rls_hook, current_tenant_id
if msg.tenant_id is None:
logger.warning("Message %s has no tenant_id — cannot process", msg.id)
return {
"message_id": msg.id,
"response": "Unable to process: tenant not identified.",
"tenant_id": None,
}
# Set up RLS engine hook (idempotent — safe to call on every task)
configure_rls_hook(engine)
# Set the RLS context variable for this async task's context
tenant_uuid = uuid.UUID(msg.tenant_id)
token = current_tenant_id.set(tenant_uuid)
try:
agent: Agent | None = None
async with async_session_factory() as session:
from sqlalchemy import select
stmt = (
select(Agent)
.where(Agent.tenant_id == tenant_uuid)
.where(Agent.is_active.is_(True))
.limit(1)
)
result = await session.execute(stmt)
agent = result.scalars().first()
finally:
# Always reset the RLS context var after DB work is done
current_tenant_id.reset(token)
if agent is None:
logger.warning(
"No active agent found for tenant=%s message=%s",
msg.tenant_id,
msg.id,
)
return {
"message_id": msg.id,
"response": "No active agent is configured for your workspace. Please contact your administrator.",
"tenant_id": msg.tenant_id,
}
response_text = await run_agent(msg, agent)
logger.info(
"Message %s processed by agent=%s tenant=%s",
msg.id,
agent.id,
msg.tenant_id,
)
return {
"message_id": msg.id,
"response": response_text,
"tenant_id": msg.tenant_id,
}