feat(01-03): Channel Gateway (Slack adapter) and Message Router

- gateway/normalize.py: normalize_slack_event -> KonstructMessage (strips bot mention) - gateway/channels/slack.py: register_slack_handlers for app_mention + DM events - rate limit check -> ephemeral rejection on exceeded - idempotency dedup (Slack retry protection) - placeholder 'Thinking...' message posted in-thread before Celery dispatch - auto-follow engaged threads with 30-minute TTL - HTTP 200 returned immediately; all LLM work dispatched to Celery - gateway/main.py: FastAPI on port 8001, /slack/events + /health - router/tenant.py: resolve_tenant workspace_id -> tenant_id (RLS-bypass query) - router/ratelimit.py: check_rate_limit Redis token bucket, RateLimitExceeded exception - router/idempotency.py: is_duplicate + mark_processed (SET NX, 24h TTL) - router/context.py: load_agent_for_tenant with RLS ContextVar setup - orchestrator/tasks.py: handle_message now extracts placeholder_ts/channel_id, calls _update_slack_placeholder via chat.update after LLM response - docker-compose.yml: gateway service on port 8001 - pyproject.toml: added redis, konstruct-router, konstruct-orchestrator deps
2026-03-23 10:27:59 -06:00
parent dcd89cc8fd
commit 6f30705e1a
17 changed files with 1166 additions and 10 deletions
--- a/packages/router/router/init.py
+++ b/packages/router/router/init.py
@@ -0,0 +1,6 @@
+"""
+Konstruct Message Router.
+
+Handles tenant resolution, rate limiting, idempotency deduplication,
+and context loading before dispatching to the Agent Orchestrator.
+"""
--- a/packages/router/router/context.py
+++ b/packages/router/router/context.py
@@ -0,0 +1,76 @@
+"""
+Agent context loading.
+
+Loads the active agent for a tenant before message processing. Phase 1 supports
+a single agent per tenant. The RLS context variable must be set before calling
+any function here so that PostgreSQL RLS filters correctly.
+"""
+
+from __future__ import annotations
+
+import logging
+import uuid
+
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from shared.models.tenant import Agent
+from shared.rls import current_tenant_id
+
+logger = logging.getLogger(__name__)
+
+
+async def load_agent_for_tenant(
+    tenant_id: str,
+    session: AsyncSession,
+) -> Agent | None:
+    """
+    Load the active agent for a tenant.
+
+    Sets the ``current_tenant_id`` ContextVar so that PostgreSQL RLS policies
+    correctly filter the agents table to only return rows belonging to this
+    tenant.
+
+    Phase 1: Returns the first active agent for the tenant (single-agent model).
+    Phase 2+: Will support agent selection based on message content and routing
+    rules.
+
+    Args:
+        tenant_id:  Konstruct tenant ID as a UUID string.
+        session:    Async SQLAlchemy session.
+
+    Returns:
+        The active Agent ORM instance, or None if no active agent is configured.
+    """
+    try:
+        tenant_uuid = uuid.UUID(tenant_id)
+    except (ValueError, AttributeError):
+        logger.error("load_agent_for_tenant: invalid tenant_id=%r", tenant_id)
+        return None
+
+    # Set RLS context so the DB query is correctly scoped to this tenant
+    token = current_tenant_id.set(tenant_uuid)
+    try:
+        stmt = (
+            select(Agent)
+            .where(Agent.tenant_id == tenant_uuid)
+            .where(Agent.is_active.is_(True))
+            .limit(1)
+        )
+        result = await session.execute(stmt)
+        agent = result.scalars().first()
+    except Exception:
+        logger.exception(
+            "load_agent_for_tenant: DB error for tenant=%s", tenant_id
+        )
+        return None
+    finally:
+        # Always reset the RLS context var after DB work completes
+        current_tenant_id.reset(token)
+
+    if agent is None:
+        logger.warning(
+            "load_agent_for_tenant: no active agent for tenant=%s", tenant_id
+        )
+
+    return agent
--- a/packages/router/router/idempotency.py
+++ b/packages/router/router/idempotency.py
@@ -0,0 +1,87 @@
+"""
+Message deduplication (idempotency).
+
+Slack (and other channels) retry event delivery when the gateway does not
+respond with HTTP 200 within 3 seconds. This module tracks which message
+IDs have already been dispatched to Celery, preventing duplicate processing.
+
+Design:
+  - Key:  {tenant_id}:dedup:{message_id}  (from shared.redis_keys)
+  - TTL:  24 hours (Slack retries stop after ~1 hour; 24h is conservative)
+  - Op:   SET NX (atomic check-and-set)
+"""
+
+from __future__ import annotations
+
+import logging
+
+from redis.asyncio import Redis
+
+from shared.redis_keys import idempotency_key
+
+logger = logging.getLogger(__name__)
+
+# How long to remember a message ID (seconds).
+# Slack retries for up to ~1 hour; 24h gives plenty of buffer.
+_DEDUP_TTL_SECONDS = 86400  # 24 hours
+
+
+async def is_duplicate(
+    tenant_id: str,
+    message_id: str,
+    redis: Redis,  # type: ignore[type-arg]
+) -> bool:
+    """
+    Check if this message has already been dispatched for processing.
+
+    Uses SET NX (set-if-not-exists) as an atomic check-and-mark operation.
+    If the key did not exist, it is created with a 24-hour TTL and this
+    function returns False (not a duplicate — process it).
+    If the key already existed, this function returns True (duplicate — skip).
+
+    Args:
+        tenant_id:  Konstruct tenant identifier.
+        message_id: Unique message identifier (e.g. Slack event_ts or UUID).
+        redis:      Async Redis client.
+
+    Returns:
+        True if this message is a duplicate (already dispatched).
+        False if this is the first time we've seen this message.
+    """
+    key = idempotency_key(tenant_id, message_id)
+
+    # SET key "1" NX EX ttl — returns True if key was set (new), None if key existed
+    was_set = await redis.set(key, "1", nx=True, ex=_DEDUP_TTL_SECONDS)
+
+    if was_set:
+        # Key was freshly created — this is NOT a duplicate
+        return False
+
+    # Key already existed — this IS a duplicate
+    logger.info(
+        "Duplicate message detected: tenant=%s message_id=%s — skipping",
+        tenant_id,
+        message_id,
+    )
+    return True
+
+
+async def mark_processed(
+    tenant_id: str,
+    message_id: str,
+    redis: Redis,  # type: ignore[type-arg]
+) -> None:
+    """
+    Explicitly mark a message as processed (without the duplicate check).
+
+    Use this when you want to mark a message as seen without the
+    check-and-mark semantics of ``is_duplicate``. Typically you'll use
+    ``is_duplicate`` instead (which does both).
+
+    Args:
+        tenant_id:  Konstruct tenant identifier.
+        message_id: Unique message identifier.
+        redis:      Async Redis client.
+    """
+    key = idempotency_key(tenant_id, message_id)
+    await redis.set(key, "1", ex=_DEDUP_TTL_SECONDS)
--- a/packages/router/router/main.py
+++ b/packages/router/router/main.py
@@ -0,0 +1,24 @@
+"""
+Message Router — FastAPI application.
+
+The router is an internal service. In the current architecture (Phase 1),
+routing logic is embedded directly in the channel gateway handlers rather
+than as a separate HTTP call. This FastAPI app provides a health endpoint
+and is a placeholder for future standalone router deployments.
+"""
+
+from __future__ import annotations
+
+from fastapi import FastAPI
+
+app = FastAPI(
+    title="Konstruct Message Router",
+    description="Tenant resolution, rate limiting, context loading",
+    version="0.1.0",
+)
+
+
+@app.get("/health")
+async def health() -> dict[str, str]:
+    """Health check endpoint."""
+    return {"status": "ok", "service": "router"}
--- a/packages/router/router/ratelimit.py
+++ b/packages/router/router/ratelimit.py
@@ -0,0 +1,121 @@
+"""
+Redis token bucket rate limiter.
+
+Implements a sliding window token bucket using Redis atomic operations.
+
+Design:
+  - Key:     {tenant_id}:ratelimit:{channel}  (from shared.redis_keys)
+  - Window:  configurable (default 60s)
+  - Tokens:  configurable (default 30 per window per tenant per channel)
+  - Storage: INCR + EXPIRE (atomic via pipeline)
+
+The token bucket approach:
+  1. INCR the counter key
+  2. If count == 1, set EXPIRE (first request in window — starts the clock)
+  3. If count > limit: raise RateLimitExceeded
+  4. Otherwise: return True (request allowed)
+
+This is NOT a sliding window (it's a fixed window with INCR/EXPIRE) — it's
+simple, Redis-atomic, and correct enough for Phase 1. A true sliding window
+can be implemented with ZADD/ZREMRANGEBYSCORE later if needed.
+"""
+
+from __future__ import annotations
+
+import logging
+
+from redis.asyncio import Redis
+
+from shared.redis_keys import rate_limit_key
+
+logger = logging.getLogger(__name__)
+
+# Default rate limit configuration — override per-tenant in Phase 2
+_DEFAULT_LIMIT = 30     # Max requests per window
+_DEFAULT_WINDOW = 60    # Window duration in seconds
+
+
+class RateLimitExceeded(Exception):
+    """
+    Raised when a tenant's per-channel rate limit is exceeded.
+
+    Attributes:
+        tenant_id:         The tenant that exceeded the limit.
+        channel:           The channel that hit the limit.
+        remaining_seconds: Approximate TTL on the rate limit key (how long
+                           until the window resets).
+    """
+
+    def __init__(
+        self,
+        tenant_id: str,
+        channel: str,
+        remaining_seconds: int = 60,
+    ) -> None:
+        self.tenant_id = tenant_id
+        self.channel = channel
+        self.remaining_seconds = remaining_seconds
+        super().__init__(
+            f"Rate limit exceeded for tenant={tenant_id} channel={channel}. "
+            f"Resets in ~{remaining_seconds}s."
+        )
+
+
+async def check_rate_limit(
+    tenant_id: str,
+    channel: str,
+    redis: Redis,  # type: ignore[type-arg]
+    limit: int = _DEFAULT_LIMIT,
+    window_seconds: int = _DEFAULT_WINDOW,
+) -> bool:
+    """
+    Check whether the tenant-channel combination is within its rate limit.
+
+    Uses an atomic INCR + EXPIRE pipeline. On the first request in a new
+    window the counter is set and the TTL clock starts. Subsequent requests
+    increment the counter; once it exceeds ``limit``, RateLimitExceeded is
+    raised with the remaining window TTL.
+
+    Args:
+        tenant_id:      Konstruct tenant identifier.
+        channel:        Channel string (e.g. "slack").
+        redis:          Async Redis client.
+        limit:          Maximum requests per window (default 30).
+        window_seconds: Window duration in seconds (default 60).
+
+    Returns:
+        True if the request is allowed.
+
+    Raises:
+        RateLimitExceeded: If the request exceeds the limit.
+    """
+    key = rate_limit_key(tenant_id, channel)
+
+    # Atomic pipeline: INCR then conditional EXPIRE
+    pipe = redis.pipeline(transaction=True)
+    pipe.incr(key)
+    pipe.ttl(key)
+    results = await pipe.execute()
+
+    count: int = results[0]
+    ttl: int = results[1]
+
+    # If TTL is -1, the key exists but has no expiry — set one now.
+    # This handles the case where INCR created the key but EXPIRE wasn't set yet.
+    if ttl == -1 or count == 1:
+        await redis.expire(key, window_seconds)
+        ttl = window_seconds
+
+    if count > limit:
+        remaining = max(ttl, 0)
+        logger.warning(
+            "Rate limit exceeded: tenant=%s channel=%s count=%d limit=%d ttl=%d",
+            tenant_id,
+            channel,
+            count,
+            limit,
+            remaining,
+        )
+        raise RateLimitExceeded(tenant_id, channel, remaining_seconds=remaining)
+
+    return True
--- a/packages/router/router/tenant.py
+++ b/packages/router/router/tenant.py
@@ -0,0 +1,102 @@
+"""
+Tenant resolution — maps channel workspace IDs to Konstruct tenant IDs.
+
+This is the ONE pre-RLS query in the system. Tenant resolution must work
+across all tenants because we don't know which tenant owns a message until
+after we resolve it. The query bypasses RLS by using the admin/superuser
+connection for this specific lookup only.
+
+Design:
+  - Query `channel_connections` for matching workspace_id + channel_type
+  - Returns the tenant_id UUID as a string, or None if not found
+  - Uses a raw SELECT without RLS context (intentional — pre-resolution)
+"""
+
+from __future__ import annotations
+
+import logging
+
+from sqlalchemy import select, text
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from shared.models.message import ChannelType
+from shared.models.tenant import ChannelConnection, ChannelTypeEnum
+
+logger = logging.getLogger(__name__)
+
+# Map ChannelType (StrEnum from message.py) to ChannelTypeEnum (ORM enum from tenant.py)
+_CHANNEL_TYPE_MAP: dict[str, ChannelTypeEnum] = {
+    "slack": ChannelTypeEnum.SLACK,
+    "whatsapp": ChannelTypeEnum.WHATSAPP,
+    "mattermost": ChannelTypeEnum.MATTERMOST,
+    "rocketchat": ChannelTypeEnum.ROCKETCHAT,
+    "teams": ChannelTypeEnum.TEAMS,
+    "telegram": ChannelTypeEnum.TELEGRAM,
+    "signal": ChannelTypeEnum.SIGNAL,
+}
+
+
+async def resolve_tenant(
+    workspace_id: str,
+    channel_type: ChannelType | str,
+    session: AsyncSession,
+) -> str | None:
+    """
+    Resolve a channel workspace ID to a Konstruct tenant ID.
+
+    This is deliberately a RLS-bypass query — we cannot know which tenant to
+    set in `app.current_tenant` until after we resolve the tenant. The session
+    passed here should use the admin connection (postgres superuser) or the
+    konstruct_app role with RLS disabled for this specific query.
+
+    In practice, for this single lookup, we disable the RLS SET LOCAL by
+    temporarily not setting `current_tenant_id` — the ContextVar defaults to
+    None, so the RLS hook does not inject SET LOCAL, and the query sees all
+    rows in `channel_connections`.
+
+    Args:
+        workspace_id:  Channel-native workspace identifier (e.g. Slack T12345).
+        channel_type:  Channel type as ChannelType enum or string.
+        session:       Async SQLAlchemy session.
+
+    Returns:
+        Tenant ID as a string (UUID), or None if no matching connection found.
+    """
+    channel_str = str(channel_type).lower()
+    orm_channel = _CHANNEL_TYPE_MAP.get(channel_str)
+    if orm_channel is None:
+        logger.warning("resolve_tenant: unknown channel_type=%r", channel_type)
+        return None
+
+    try:
+        # Bypass RLS for this query — disable RLS row filtering at the session level
+        # by setting app.current_tenant to empty (no policy match = all rows visible
+        # to konstruct_app for SELECT on channel_connections).
+        # We use a raw SET LOCAL here to ensure the tenant policy is not applied.
+        await session.execute(text("SET LOCAL app.current_tenant = ''"))
+
+        stmt = (
+            select(ChannelConnection.tenant_id)
+            .where(ChannelConnection.channel_type == orm_channel)
+            .where(ChannelConnection.workspace_id == workspace_id)
+            .limit(1)
+        )
+        result = await session.execute(stmt)
+        row = result.scalar_one_or_none()
+    except Exception:
+        logger.exception(
+            "resolve_tenant: DB error workspace_id=%r channel=%r",
+            workspace_id,
+            channel_type,
+        )
+        return None
+
+    if row is None:
+        logger.debug(
+            "resolve_tenant: no match workspace_id=%r channel=%r",
+            workspace_id,
+            channel_type,
+        )
+        return None
+
+    return str(row)