feat(01-02): Celery orchestrator — handle_message task, system prompt builder, LLM pool runner

- Create orchestrator/main.py: Celery app with Redis broker/backend, task_acks_late=True, 10-min timeout - Create orchestrator/tasks.py: SYNC def handle_message (critical pattern: asyncio.run for async work) - Deserializes KonstructMessage, sets RLS context, loads agent from DB, calls run_agent - Retries up to 3x on deserialization failure - Create orchestrator/agents/builder.py: build_system_prompt assembles system_prompt + identity + persona + AI transparency clause - Create orchestrator/agents/runner.py: run_agent posts to llm-pool /complete via httpx, returns polite fallback on error - Add Celery[redis] dependency to orchestrator pyproject.toml - Create tests/integration/test_llm_fallback.py: 7 tests for fallback routing and 503 on total failure (LLM-01) - Create tests/integration/test_llm_providers.py: 12 tests verifying all three providers configured correctly (LLM-02) - All 19 integration tests pass
2026-03-23 10:06:44 -06:00
parent 7b348b97e9
commit 8257c554d7
9 changed files with 726 additions and 0 deletions
--- a/packages/orchestrator/orchestrator/init.py
+++ b/packages/orchestrator/orchestrator/init.py
@@ -0,0 +1,14 @@
+"""
+konstruct-orchestrator — Celery-based agent dispatch service.
+
+This package provides the Celery application and task definitions for
+processing inbound Konstruct messages through the agent pipeline:
+
+  1. Deserialize KonstructMessage
+  2. Load agent config from DB (tenant-scoped via RLS)
+  3. Build system prompt from agent persona fields
+  4. Call LLM pool via HTTP
+  5. Return response content
+
+Import the Celery app from orchestrator.main.
+"""
--- a/packages/orchestrator/orchestrator/agents/init.py
+++ b/packages/orchestrator/orchestrator/agents/init.py
@@ -0,0 +1,7 @@
+"""
+Agent module — system prompt construction and LLM pool communication.
+
+Submodules:
+  builder  — Assembles system prompt from agent persona fields.
+  runner   — Sends completion requests to the LLM pool service.
+"""
--- a/packages/orchestrator/orchestrator/agents/builder.py
+++ b/packages/orchestrator/orchestrator/agents/builder.py
@@ -0,0 +1,84 @@
+"""
+System prompt builder — assembles the instruction prompt from agent fields.
+
+The build_system_prompt function combines:
+  1. The agent's explicit system_prompt field (if provided)
+  2. Identity context: name, role
+  3. Persona description (if set)
+  4. AI transparency clause — always appended; agents must not deny being AIs
+
+AI TRANSPARENCY POLICY:
+  Per Konstruct product design, agents MUST acknowledge they are AI assistants
+  when directly asked. This clause is injected unconditionally to prevent
+  agents from deceiving users, regardless of persona configuration.
+"""
+
+from __future__ import annotations
+
+from shared.models.tenant import Agent
+
+
+def build_system_prompt(agent: Agent) -> str:
+    """
+    Assemble the full system prompt for an agent.
+
+    Combines:
+      - agent.system_prompt (base instructions, if provided)
+      - Identity section: name + role
+      - Persona section (if agent.persona is non-empty)
+      - AI transparency clause (always appended)
+
+    Args:
+        agent: ORM Agent instance.
+
+    Returns:
+        A complete system prompt string ready to pass to the LLM.
+    """
+    parts: list[str] = []
+
+    # 1. Base system prompt (operator-defined instructions)
+    if agent.system_prompt and agent.system_prompt.strip():
+        parts.append(agent.system_prompt.strip())
+
+    # 2. Identity — name and role
+    parts.append(f"Your name is {agent.name}. Your role is {agent.role}.")
+
+    # 3. Persona — tone and behavioral style
+    if agent.persona and agent.persona.strip():
+        parts.append(f"Persona: {agent.persona.strip()}")
+
+    # 4. AI transparency clause — unconditional, non-overridable
+    parts.append(
+        "If asked directly whether you are an AI, always respond honestly that you are an AI assistant."
+    )
+
+    return "\n\n".join(parts)
+
+
+def build_messages(
+    system_prompt: str,
+    user_message: str,
+    history: list[dict] | None = None,
+) -> list[dict]:
+    """
+    Build an OpenAI-format messages list.
+
+    Structure:
+      [system message] + [history messages] + [current user message]
+
+    Args:
+        system_prompt: The full assembled system prompt.
+        user_message:  The latest user message text.
+        history:       Optional list of prior messages in OpenAI format.
+                       Each dict must have "role" and "content" keys.
+
+    Returns:
+        A list of message dicts suitable for an OpenAI-compatible API call.
+    """
+    messages: list[dict] = [{"role": "system", "content": system_prompt}]
+
+    if history:
+        messages.extend(history)
+
+    messages.append({"role": "user", "content": user_message})
+    return messages
--- a/packages/orchestrator/orchestrator/agents/runner.py
+++ b/packages/orchestrator/orchestrator/agents/runner.py
@@ -0,0 +1,87 @@
+"""
+Agent runner — sends completion requests to the LLM pool service.
+
+Communication pattern:
+  orchestrator.tasks.handle_message
+      → run_agent (this module, async)
+          → POST http://llm-pool:8004/complete  (httpx async)
+              → LiteLLM Router (router.py in llm-pool)
+                  → Ollama / Anthropic / OpenAI
+"""
+
+from __future__ import annotations
+
+import logging
+
+import httpx
+
+from orchestrator.agents.builder import build_messages, build_system_prompt
+from shared.config import settings
+from shared.models.message import KonstructMessage
+from shared.models.tenant import Agent
+
+logger = logging.getLogger(__name__)
+
+_FALLBACK_RESPONSE = (
+    "I'm having trouble processing your request right now. "
+    "Please try again in a moment."
+)
+
+# Timeout for LLM pool HTTP requests — generous to allow slow local inference
+_LLM_TIMEOUT = httpx.Timeout(timeout=120.0, connect=10.0)
+
+
+async def run_agent(msg: KonstructMessage, agent: Agent) -> str:
+    """
+    Execute an agent against the LLM pool and return the response text.
+
+    Args:
+        msg:   The inbound Konstruct message being processed.
+        agent: The ORM Agent instance that handles this message.
+
+    Returns:
+        The LLM response content as a plain string.
+        Returns a polite fallback message if the LLM pool is unreachable or
+        returns a non-200 response.
+    """
+    system_prompt = build_system_prompt(agent)
+
+    # Extract user text from the message content
+    user_text: str = msg.content.text or ""
+
+    messages = build_messages(
+        system_prompt=system_prompt,
+        user_message=user_text,
+    )
+
+    payload = {
+        "model": agent.model_preference,
+        "messages": messages,
+        "tenant_id": str(msg.tenant_id) if msg.tenant_id else "",
+    }
+
+    llm_pool_url = f"{settings.llm_pool_url}/complete"
+
+    async with httpx.AsyncClient(timeout=_LLM_TIMEOUT) as client:
+        try:
+            response = await client.post(llm_pool_url, json=payload)
+        except httpx.RequestError:
+            logger.exception(
+                "LLM pool unreachable for tenant=%s agent=%s url=%s",
+                msg.tenant_id,
+                agent.id,
+                llm_pool_url,
+            )
+            return _FALLBACK_RESPONSE
+
+    if response.status_code != 200:
+        logger.error(
+            "LLM pool returned %d for tenant=%s agent=%s",
+            response.status_code,
+            msg.tenant_id,
+            agent.id,
+        )
+        return _FALLBACK_RESPONSE
+
+    data = response.json()
+    return str(data.get("content", _FALLBACK_RESPONSE))
--- a/packages/orchestrator/orchestrator/main.py
+++ b/packages/orchestrator/orchestrator/main.py
@@ -0,0 +1,42 @@
+"""
+Celery application for the Konstruct Agent Orchestrator.
+
+Broker and result backend are both Redis (separate DB indexes to avoid
+key collisions). Tasks are discovered automatically from orchestrator.tasks.
+
+Usage (development):
+    celery -A orchestrator.main worker --loglevel=info
+
+Usage (production — via Docker Compose):
+    celery -A orchestrator.main worker --loglevel=info --concurrency=4
+"""
+
+from __future__ import annotations
+
+from celery import Celery
+
+from shared.config import settings
+
+app = Celery(
+    "konstruct_orchestrator",
+    broker=settings.celery_broker_url,
+    backend=settings.celery_result_backend,
+    include=["orchestrator.tasks"],
+)
+
+# ---------------------------------------------------------------------------
+# Celery configuration
+# ---------------------------------------------------------------------------
+app.conf.update(
+    task_serializer="json",
+    accept_content=["json"],
+    result_serializer="json",
+    timezone="UTC",
+    enable_utc=True,
+    # Acknowledge tasks only after they complete (not on receipt)
+    # This ensures tasks are retried if the worker crashes mid-execution.
+    task_acks_late=True,
+    # Reject tasks that exceed 10 minutes — prevents runaway LLM calls
+    task_soft_time_limit=540,
+    task_time_limit=600,
+)
--- a/packages/orchestrator/orchestrator/tasks.py
+++ b/packages/orchestrator/orchestrator/tasks.py
@@ -0,0 +1,139 @@
+"""
+Celery task definitions for the Konstruct Agent Orchestrator.
+
+# CELERY TASKS MUST BE SYNC def — async def causes RuntimeError or silent hang.
+# Use asyncio.run() for async work. This is a fundamental Celery constraint:
+# Celery workers are NOT async-native. The handle_message task bridges the
+# sync Celery world to the async agent pipeline via asyncio.run().
+#
+# NEVER change these to `async def`. If you see a RuntimeError about "no
+# running event loop" or tasks that silently never complete, check for
+# accidental async def usage first.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import uuid
+
+from orchestrator.main import app
+from shared.models.message import KonstructMessage
+
+logger = logging.getLogger(__name__)
+
+
+@app.task(
+    name="orchestrator.tasks.handle_message",
+    bind=True,
+    max_retries=3,
+    default_retry_delay=5,
+)
+def handle_message(self, message_data: dict) -> dict:  # type: ignore[no-untyped-def]
+    """
+    Process an inbound Konstruct message through the agent pipeline.
+
+    This task is the primary entry point for the Celery worker. It is dispatched
+    by the Message Router (or Channel Gateway in simple deployments) after tenant
+    resolution completes.
+
+    Pipeline:
+      1. Deserialize message_data -> KonstructMessage
+      2. Run async agent pipeline via asyncio.run()
+      3. Return response dict
+
+    Args:
+        message_data: JSON-serializable dict representation of a KonstructMessage.
+
+    Returns:
+        Dict with keys:
+          - message_id (str): Original message ID
+          - response (str): Agent's response text
+          - tenant_id (str | None): Tenant that handled the message
+    """
+    try:
+        msg = KonstructMessage.model_validate(message_data)
+    except Exception as exc:
+        logger.exception("Failed to deserialize KonstructMessage: %s", message_data)
+        raise self.retry(exc=exc)
+
+    result = asyncio.run(_process_message(msg))
+    return result
+
+
+async def _process_message(msg: KonstructMessage) -> dict:
+    """
+    Async agent pipeline — load agent config, build prompt, call LLM pool.
+
+    This function is called from the synchronous handle_message task via
+    asyncio.run(). It must not be called directly from Celery task code.
+
+    Args:
+        msg: The deserialized KonstructMessage.
+
+    Returns:
+        Dict with message_id, response, and tenant_id.
+    """
+    from orchestrator.agents.runner import run_agent
+    from shared.db import async_session_factory, engine
+    from shared.models.tenant import Agent
+    from shared.rls import configure_rls_hook, current_tenant_id
+
+    if msg.tenant_id is None:
+        logger.warning("Message %s has no tenant_id — cannot process", msg.id)
+        return {
+            "message_id": msg.id,
+            "response": "Unable to process: tenant not identified.",
+            "tenant_id": None,
+        }
+
+    # Set up RLS engine hook (idempotent — safe to call on every task)
+    configure_rls_hook(engine)
+
+    # Set the RLS context variable for this async task's context
+    tenant_uuid = uuid.UUID(msg.tenant_id)
+    token = current_tenant_id.set(tenant_uuid)
+
+    try:
+        agent: Agent | None = None
+        async with async_session_factory() as session:
+            from sqlalchemy import select
+
+            stmt = (
+                select(Agent)
+                .where(Agent.tenant_id == tenant_uuid)
+                .where(Agent.is_active.is_(True))
+                .limit(1)
+            )
+            result = await session.execute(stmt)
+            agent = result.scalars().first()
+    finally:
+        # Always reset the RLS context var after DB work is done
+        current_tenant_id.reset(token)
+
+    if agent is None:
+        logger.warning(
+            "No active agent found for tenant=%s message=%s",
+            msg.tenant_id,
+            msg.id,
+        )
+        return {
+            "message_id": msg.id,
+            "response": "No active agent is configured for your workspace. Please contact your administrator.",
+            "tenant_id": msg.tenant_id,
+        }
+
+    response_text = await run_agent(msg, agent)
+
+    logger.info(
+        "Message %s processed by agent=%s tenant=%s",
+        msg.id,
+        agent.id,
+        msg.tenant_id,
+    )
+
+    return {
+        "message_id": msg.id,
+        "response": response_text,
+        "tenant_id": msg.tenant_id,
+    }