feat(01-03): Channel Gateway (Slack adapter) and Message Router

- gateway/normalize.py: normalize_slack_event -> KonstructMessage (strips bot mention) - gateway/channels/slack.py: register_slack_handlers for app_mention + DM events - rate limit check -> ephemeral rejection on exceeded - idempotency dedup (Slack retry protection) - placeholder 'Thinking...' message posted in-thread before Celery dispatch - auto-follow engaged threads with 30-minute TTL - HTTP 200 returned immediately; all LLM work dispatched to Celery - gateway/main.py: FastAPI on port 8001, /slack/events + /health - router/tenant.py: resolve_tenant workspace_id -> tenant_id (RLS-bypass query) - router/ratelimit.py: check_rate_limit Redis token bucket, RateLimitExceeded exception - router/idempotency.py: is_duplicate + mark_processed (SET NX, 24h TTL) - router/context.py: load_agent_for_tenant with RLS ContextVar setup - orchestrator/tasks.py: handle_message now extracts placeholder_ts/channel_id, calls _update_slack_placeholder via chat.update after LLM response - docker-compose.yml: gateway service on port 8001 - pyproject.toml: added redis, konstruct-router, konstruct-orchestrator deps
2026-03-23 10:27:59 -06:00
parent dcd89cc8fd
commit 6f30705e1a
17 changed files with 1166 additions and 10 deletions
--- a/packages/orchestrator/orchestrator/tasks.py
+++ b/packages/orchestrator/orchestrator/tasks.py
@@ -34,16 +34,26 @@ def handle_message(self, message_data: dict) -> dict:  # type: ignore[no-untyped
    Process an inbound Konstruct message through the agent pipeline.

    This task is the primary entry point for the Celery worker. It is dispatched
-    by the Message Router (or Channel Gateway in simple deployments) after tenant
-    resolution completes.
+    by the Channel Gateway after tenant resolution completes.
+
+    The ``message_data`` dict MAY contain extra keys beyond KonstructMessage
+    fields. Specifically, the Slack handler injects:
+      - ``placeholder_ts``:  Slack message timestamp of the "Thinking..." placeholder
+      - ``channel_id``:      Slack channel ID where the response should be posted
+
+    These are extracted before KonstructMessage validation and used to update
+    the placeholder with the real LLM response via chat.update.

    Pipeline:
-      1. Deserialize message_data -> KonstructMessage
-      2. Run async agent pipeline via asyncio.run()
-      3. Return response dict
+      1. Extract Slack reply metadata (placeholder_ts, channel_id) if present
+      2. Deserialize message_data -> KonstructMessage
+      3. Run async agent pipeline via asyncio.run()
+      4. If Slack metadata present: call chat.update to replace placeholder
+      5. Return response dict

    Args:
-        message_data: JSON-serializable dict representation of a KonstructMessage.
+        message_data: JSON-serializable dict. Must contain KonstructMessage
+                      fields plus optional ``placeholder_ts`` and ``channel_id``.

    Returns:
        Dict with keys:
@@ -51,25 +61,40 @@ def handle_message(self, message_data: dict) -> dict:  # type: ignore[no-untyped
          - response (str): Agent's response text
          - tenant_id (str | None): Tenant that handled the message
    """
+    # Extract Slack-specific reply metadata before model validation
+    # (KonstructMessage doesn't know about these fields)
+    placeholder_ts: str = message_data.pop("placeholder_ts", "") or ""
+    channel_id: str = message_data.pop("channel_id", "") or ""
+
    try:
        msg = KonstructMessage.model_validate(message_data)
    except Exception as exc:
        logger.exception("Failed to deserialize KonstructMessage: %s", message_data)
        raise self.retry(exc=exc)

-    result = asyncio.run(_process_message(msg))
+    result = asyncio.run(_process_message(msg, placeholder_ts=placeholder_ts, channel_id=channel_id))
    return result


-async def _process_message(msg: KonstructMessage) -> dict:
+async def _process_message(
+    msg: KonstructMessage,
+    placeholder_ts: str = "",
+    channel_id: str = "",
+) -> dict:
    """
    Async agent pipeline — load agent config, build prompt, call LLM pool.

+    After getting the LLM response, if Slack placeholder metadata is present,
+    updates the "Thinking..." placeholder message with the real response using
+    Slack's chat.update API.
+
    This function is called from the synchronous handle_message task via
    asyncio.run(). It must not be called directly from Celery task code.

    Args:
-        msg: The deserialized KonstructMessage.
+        msg:            The deserialized KonstructMessage.
+        placeholder_ts: Slack message timestamp of the "Thinking..." placeholder.
+        channel_id:     Slack channel ID for the chat.update call.

    Returns:
        Dict with message_id, response, and tenant_id.
@@ -94,6 +119,8 @@ async def _process_message(msg: KonstructMessage) -> dict:
    tenant_uuid = uuid.UUID(msg.tenant_id)
    token = current_tenant_id.set(tenant_uuid)

+    slack_bot_token: str = ""
+
    try:
        agent: Agent | None = None
        async with async_session_factory() as session:
@@ -107,6 +134,21 @@ async def _process_message(msg: KonstructMessage) -> dict:
            )
            result = await session.execute(stmt)
            agent = result.scalars().first()
+
+            # Load the bot token for this tenant from channel_connections config
+            if agent is not None and placeholder_ts and channel_id:
+                from shared.models.tenant import ChannelConnection, ChannelTypeEnum
+
+                conn_stmt = (
+                    select(ChannelConnection)
+                    .where(ChannelConnection.tenant_id == tenant_uuid)
+                    .where(ChannelConnection.channel_type == ChannelTypeEnum.SLACK)
+                    .limit(1)
+                )
+                conn_result = await session.execute(conn_stmt)
+                conn = conn_result.scalars().first()
+                if conn and conn.config:
+                    slack_bot_token = conn.config.get("bot_token", "")
    finally:
        # Always reset the RLS context var after DB work is done
        current_tenant_id.reset(token)
@@ -117,9 +159,17 @@ async def _process_message(msg: KonstructMessage) -> dict:
            msg.tenant_id,
            msg.id,
        )
+        no_agent_response = "No active agent is configured for your workspace. Please contact your administrator."
+        if placeholder_ts and channel_id:
+            await _update_slack_placeholder(
+                bot_token=slack_bot_token,
+                channel_id=channel_id,
+                placeholder_ts=placeholder_ts,
+                text=no_agent_response,
+            )
        return {
            "message_id": msg.id,
-            "response": "No active agent is configured for your workspace. Please contact your administrator.",
+            "response": no_agent_response,
            "tenant_id": msg.tenant_id,
        }

@@ -132,8 +182,78 @@ async def _process_message(msg: KonstructMessage) -> dict:
        msg.tenant_id,
    )

+    # Replace the "Thinking..." placeholder with the real response
+    if placeholder_ts and channel_id:
+        await _update_slack_placeholder(
+            bot_token=slack_bot_token,
+            channel_id=channel_id,
+            placeholder_ts=placeholder_ts,
+            text=response_text,
+        )
+
    return {
        "message_id": msg.id,
        "response": response_text,
        "tenant_id": msg.tenant_id,
    }
+
+
+async def _update_slack_placeholder(
+    bot_token: str,
+    channel_id: str,
+    placeholder_ts: str,
+    text: str,
+) -> None:
+    """
+    Replace the "Thinking..." placeholder message with the real agent response.
+
+    Uses Slack's chat.update API via httpx (no slack-bolt dependency in
+    orchestrator — keeps the service boundary clean).
+
+    Per user decision: responses are always posted in threads (thread_ts is
+    set to placeholder_ts — the placeholder was posted in-thread).
+
+    Args:
+        bot_token:      Slack bot token (xoxb-...) for this tenant.
+        channel_id:     Slack channel ID where the placeholder was posted.
+        placeholder_ts: Slack message timestamp of the placeholder to replace.
+        text:           The real LLM response to replace the placeholder with.
+    """
+    import httpx
+
+    if not bot_token:
+        # No bot token available — cannot update via Slack API.
+        # This happens when channel_connections has no bot_token in config.
+        # Log and continue — the placeholder will remain as "Thinking...".
+        logger.warning(
+            "No Slack bot token for channel=%s placeholder_ts=%s — cannot update placeholder",
+            channel_id,
+            placeholder_ts,
+        )
+        return
+
+    try:
+        async with httpx.AsyncClient(timeout=httpx.Timeout(30.0)) as client:
+            response = await client.post(
+                "https://slack.com/api/chat.update",
+                headers={"Authorization": f"Bearer {bot_token}"},
+                json={
+                    "channel": channel_id,
+                    "ts": placeholder_ts,
+                    "text": text,
+                },
+            )
+            data = response.json()
+            if not data.get("ok"):
+                logger.error(
+                    "chat.update failed: channel=%s ts=%s error=%r",
+                    channel_id,
+                    placeholder_ts,
+                    data.get("error"),
+                )
+    except Exception:
+        logger.exception(
+            "Failed to update Slack placeholder: channel=%s ts=%s",
+            channel_id,
+            placeholder_ts,
+        )