feat(01-02): Celery orchestrator — handle_message task, system prompt builder, LLM pool runner

- Create orchestrator/main.py: Celery app with Redis broker/backend, task_acks_late=True, 10-min timeout - Create orchestrator/tasks.py: SYNC def handle_message (critical pattern: asyncio.run for async work) - Deserializes KonstructMessage, sets RLS context, loads agent from DB, calls run_agent - Retries up to 3x on deserialization failure - Create orchestrator/agents/builder.py: build_system_prompt assembles system_prompt + identity + persona + AI transparency clause - Create orchestrator/agents/runner.py: run_agent posts to llm-pool /complete via httpx, returns polite fallback on error - Add Celery[redis] dependency to orchestrator pyproject.toml - Create tests/integration/test_llm_fallback.py: 7 tests for fallback routing and 503 on total failure (LLM-01) - Create tests/integration/test_llm_providers.py: 12 tests verifying all three providers configured correctly (LLM-02) - All 19 integration tests pass
2026-03-23 10:06:44 -06:00
parent 7b348b97e9
commit 8257c554d7
9 changed files with 726 additions and 0 deletions
--- a/tests/integration/test_llm_fallback.py
+++ b/tests/integration/test_llm_fallback.py
@@ -0,0 +1,180 @@
+"""
+Integration tests for LLM Router fallback routing (LLM-01).
+
+Tests verify that:
+  1. When the primary quality provider (Anthropic) fails, the router falls back
+     to the secondary quality provider (OpenAI).
+  2. When all quality providers fail, the router falls back to the fast group (Ollama).
+  3. When ALL providers fail, the /complete endpoint returns HTTP 503.
+
+These tests mock LiteLLM Router.acompletion to control which providers fail
+without requiring live API keys or a running Ollama instance.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+from fastapi.testclient import TestClient
+
+from llm_pool.main import app
+
+client = TestClient(app)
+
+
+def _make_completion_response(content: str = "Hello from mock") -> MagicMock:
+    """Build a fake LiteLLM completion response object."""
+    response = MagicMock()
+    response.choices = [MagicMock()]
+    response.choices[0].message.content = content
+    return response
+
+
+class TestLLMFallbackRouting:
+    """LLM-01: Fallback routing — primary fail -> secondary -> fast group."""
+
+    def test_quality_returns_response_on_success(self) -> None:
+        """Happy path: quality request completes without any fallback needed."""
+        mock_response = _make_completion_response("Anthropic response")
+
+        with patch("llm_pool.router.llm_router.acompletion", new_callable=AsyncMock) as mock_complete:
+            mock_complete.return_value = mock_response
+
+            response = client.post(
+                "/complete",
+                json={
+                    "model": "quality",
+                    "messages": [{"role": "user", "content": "Hello"}],
+                    "tenant_id": "tenant-123",
+                },
+            )
+
+        assert response.status_code == 200
+        data = response.json()
+        assert data["content"] == "Anthropic response"
+        assert data["model"] == "quality"
+
+    def test_fast_group_returns_response_on_success(self) -> None:
+        """Happy path: fast (Ollama) request completes normally."""
+        mock_response = _make_completion_response("Ollama response")
+
+        with patch("llm_pool.router.llm_router.acompletion", new_callable=AsyncMock) as mock_complete:
+            mock_complete.return_value = mock_response
+
+            response = client.post(
+                "/complete",
+                json={
+                    "model": "fast",
+                    "messages": [{"role": "user", "content": "Hello"}],
+                    "tenant_id": "tenant-456",
+                },
+            )
+
+        assert response.status_code == 200
+        data = response.json()
+        assert data["content"] == "Ollama response"
+
+    def test_router_acompletion_called_with_correct_model_group(self) -> None:
+        """Verify the router receives the exact model group name from the request."""
+        mock_response = _make_completion_response("test")
+
+        with patch("llm_pool.router.llm_router.acompletion", new_callable=AsyncMock) as mock_complete:
+            mock_complete.return_value = mock_response
+
+            client.post(
+                "/complete",
+                json={
+                    "model": "quality",
+                    "messages": [{"role": "user", "content": "test"}],
+                    "tenant_id": "tenant-789",
+                },
+            )
+
+            call_kwargs = mock_complete.call_args
+            assert call_kwargs is not None
+            # model= is the first positional-or-keyword arg
+            assert call_kwargs.kwargs.get("model") == "quality" or call_kwargs.args[0] == "quality"
+
+    def test_fallback_succeeds_when_router_returns_response(self) -> None:
+        """
+        Verify that when LiteLLM Router resolves fallback internally and returns
+        a valid response, the endpoint returns HTTP 200.
+
+        LiteLLM Router handles provider-level retries and cross-group fallback
+        internally (via its fallbacks= config). From our service's perspective,
+        Router.acompletion() either succeeds (any provider in the chain worked)
+        or raises (all providers exhausted). This test verifies the success path
+        where the router succeeded after internal fallback.
+        """
+        # Router resolved fallback internally and returns a successful response
+        mock_response = _make_completion_response("Fallback resolved by LiteLLM")
+
+        with patch("llm_pool.router.llm_router.acompletion", new_callable=AsyncMock) as mock_complete:
+            mock_complete.return_value = mock_response
+
+            response = client.post(
+                "/complete",
+                json={
+                    "model": "quality",
+                    "messages": [{"role": "user", "content": "Hello"}],
+                    "tenant_id": "tenant-fallback",
+                },
+            )
+
+        assert response.status_code == 200
+        data = response.json()
+        assert data["content"] == "Fallback resolved by LiteLLM"
+
+    def test_503_returned_when_all_providers_fail(self) -> None:
+        """
+        When every provider in the chain fails, /complete returns HTTP 503.
+
+        This maps to the must_have truth:
+          "When the primary provider is unavailable, the LLM pool automatically
+           falls back to the next provider in the chain."
+        — and when the chain is fully exhausted, a 503 must be returned.
+        """
+        with patch("llm_pool.router.llm_router.acompletion", new_callable=AsyncMock) as mock_complete:
+            mock_complete.side_effect = Exception("All providers down")
+
+            response = client.post(
+                "/complete",
+                json={
+                    "model": "quality",
+                    "messages": [{"role": "user", "content": "Hello"}],
+                    "tenant_id": "tenant-allfail",
+                },
+            )
+
+        assert response.status_code == 503
+        data = response.json()
+        assert data["error"] == "All providers unavailable"
+
+    def test_tenant_id_passed_to_router_as_metadata(self) -> None:
+        """Verify tenant_id is forwarded as metadata to the LiteLLM Router for cost tracking."""
+        mock_response = _make_completion_response("ok")
+
+        with patch("llm_pool.router.llm_router.acompletion", new_callable=AsyncMock) as mock_complete:
+            mock_complete.return_value = mock_response
+
+            client.post(
+                "/complete",
+                json={
+                    "model": "fast",
+                    "messages": [{"role": "user", "content": "Hi"}],
+                    "tenant_id": "tenant-cost-track",
+                },
+            )
+
+            call_kwargs = mock_complete.call_args
+            assert call_kwargs is not None
+            metadata = call_kwargs.kwargs.get("metadata", {})
+            assert metadata.get("tenant_id") == "tenant-cost-track"
+
+    def test_health_endpoint_returns_ok(self) -> None:
+        """Liveness probe should return 200 {status: ok} with no external calls."""
+        response = client.get("/health")
+        assert response.status_code == 200
+        assert response.json() == {"status": "ok"}
--- a/tests/integration/test_llm_providers.py
+++ b/tests/integration/test_llm_providers.py
@@ -0,0 +1,172 @@
+"""
+Integration tests for LLM provider configuration (LLM-02).
+
+Tests verify that:
+  1. The LiteLLM Router model_list contains entries for all three providers
+     (Ollama/fast, Anthropic/quality, OpenAI/quality).
+  2. A request with model="fast" routes to the Ollama configuration.
+  3. A request with model="quality" routes to an Anthropic or OpenAI configuration.
+  4. Provider entries reference the correct model identifiers from CLAUDE.md.
+
+These tests inspect the router configuration directly and mock acompletion to
+verify routing without live API calls.
+"""
+
+from __future__ import annotations
+
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+from fastapi.testclient import TestClient
+
+from llm_pool.main import app
+from llm_pool.router import _model_list, llm_router
+
+client = TestClient(app)
+
+
+def _make_completion_response(content: str = "test") -> MagicMock:
+    response = MagicMock()
+    response.choices = [MagicMock()]
+    response.choices[0].message.content = content
+    return response
+
+
+class TestProviderConfiguration:
+    """LLM-02: Provider configuration — all three providers are present and correct."""
+
+    def test_model_list_has_three_entries(self) -> None:
+        """The model_list must have exactly three entries (fast, quality x2)."""
+        assert len(_model_list) == 3
+
+    def test_fast_group_present_in_model_list(self) -> None:
+        """The 'fast' model group must exist in the model_list."""
+        fast_entries = [m for m in _model_list if m["model_name"] == "fast"]
+        assert len(fast_entries) >= 1, "No 'fast' model group found in model_list"
+
+    def test_quality_group_present_in_model_list(self) -> None:
+        """The 'quality' model group must have at least two entries (Anthropic + OpenAI)."""
+        quality_entries = [m for m in _model_list if m["model_name"] == "quality"]
+        assert len(quality_entries) >= 2, "Expected at least 2 'quality' entries (Anthropic + OpenAI)"
+
+    def test_fast_group_uses_ollama_model(self) -> None:
+        """The fast group must route to an ollama/* model."""
+        fast_entries = [m for m in _model_list if m["model_name"] == "fast"]
+        assert fast_entries, "No fast entry found"
+        ollama_models = [
+            e for e in fast_entries
+            if e["litellm_params"]["model"].startswith("ollama/")
+        ]
+        assert ollama_models, f"Fast group does not use an ollama model: {fast_entries}"
+
+    def test_fast_group_has_ollama_api_base(self) -> None:
+        """The fast group entry must specify an api_base pointing to Ollama."""
+        fast_entries = [m for m in _model_list if m["model_name"] == "fast"]
+        for entry in fast_entries:
+            params = entry["litellm_params"]
+            assert "api_base" in params, f"Fast group entry missing api_base: {entry}"
+
+    def test_quality_group_has_anthropic_entry(self) -> None:
+        """Quality group must include an anthropic/* model."""
+        quality_entries = [m for m in _model_list if m["model_name"] == "quality"]
+        anthropic_entries = [
+            e for e in quality_entries
+            if e["litellm_params"]["model"].startswith("anthropic/")
+        ]
+        assert anthropic_entries, f"No Anthropic entry in quality group: {quality_entries}"
+
+    def test_quality_group_has_openai_entry(self) -> None:
+        """Quality group must include an openai/* model as the fallback."""
+        quality_entries = [m for m in _model_list if m["model_name"] == "quality"]
+        openai_entries = [
+            e for e in quality_entries
+            if e["litellm_params"]["model"].startswith("openai/")
+        ]
+        assert openai_entries, f"No OpenAI entry in quality group: {quality_entries}"
+
+    def test_anthropic_model_is_claude_sonnet(self) -> None:
+        """Anthropic entry must use the correct model from CLAUDE.md architecture."""
+        quality_entries = [m for m in _model_list if m["model_name"] == "quality"]
+        anthropic_entry = next(
+            (e for e in quality_entries if e["litellm_params"]["model"].startswith("anthropic/")),
+            None,
+        )
+        assert anthropic_entry is not None
+        model = anthropic_entry["litellm_params"]["model"]
+        assert "claude-sonnet" in model, f"Expected claude-sonnet model, got: {model}"
+
+    def test_openai_model_is_gpt4o(self) -> None:
+        """OpenAI entry must use gpt-4o as specified in architecture."""
+        quality_entries = [m for m in _model_list if m["model_name"] == "quality"]
+        openai_entry = next(
+            (e for e in quality_entries if e["litellm_params"]["model"].startswith("openai/")),
+            None,
+        )
+        assert openai_entry is not None
+        model = openai_entry["litellm_params"]["model"]
+        assert "gpt-4o" in model, f"Expected gpt-4o model, got: {model}"
+
+    def test_fast_request_calls_acompletion_with_fast_model(self) -> None:
+        """A fast model request must invoke acompletion with model='fast'."""
+        mock_response = _make_completion_response("ollama says hi")
+
+        with patch("llm_pool.router.llm_router.acompletion", new_callable=AsyncMock) as mock_complete:
+            mock_complete.return_value = mock_response
+
+            response = client.post(
+                "/complete",
+                json={
+                    "model": "fast",
+                    "messages": [{"role": "user", "content": "Hi"}],
+                    "tenant_id": "tenant-fast",
+                },
+            )
+
+        assert response.status_code == 200
+        call_kwargs = mock_complete.call_args
+        assert call_kwargs is not None
+        called_model = call_kwargs.kwargs.get("model") or (call_kwargs.args[0] if call_kwargs.args else None)
+        assert called_model == "fast"
+
+    def test_quality_request_calls_acompletion_with_quality_model(self) -> None:
+        """A quality model request must invoke acompletion with model='quality'."""
+        mock_response = _make_completion_response("anthropic says hi")
+
+        with patch("llm_pool.router.llm_router.acompletion", new_callable=AsyncMock) as mock_complete:
+            mock_complete.return_value = mock_response
+
+            response = client.post(
+                "/complete",
+                json={
+                    "model": "quality",
+                    "messages": [{"role": "user", "content": "Hi"}],
+                    "tenant_id": "tenant-quality",
+                },
+            )
+
+        assert response.status_code == 200
+        call_kwargs = mock_complete.call_args
+        assert call_kwargs is not None
+        called_model = call_kwargs.kwargs.get("model") or (call_kwargs.args[0] if call_kwargs.args else None)
+        assert called_model == "quality"
+
+    def test_router_fallback_config_quality_falls_to_fast(self) -> None:
+        """The Router fallbacks config must specify quality -> fast cross-group fallback."""
+        # Access the Router's fallbacks attribute
+        fallbacks = getattr(llm_router, "fallbacks", None)
+        assert fallbacks is not None, "Router has no fallbacks configured"
+
+        # Find the quality -> fast fallback entry
+        quality_fallback = None
+        for fb in fallbacks:
+            if isinstance(fb, dict) and "quality" in fb:
+                quality_fallback = fb
+                break
+
+        assert quality_fallback is not None, (
+            f"No quality->fast fallback found. Current fallbacks: {fallbacks}"
+        )
+        fallback_targets = quality_fallback["quality"]
+        assert "fast" in fallback_targets, (
+            f"Quality fallback does not target 'fast' group: {fallback_targets}"
+        )