- Create orchestrator/main.py: Celery app with Redis broker/backend, task_acks_late=True, 10-min timeout - Create orchestrator/tasks.py: SYNC def handle_message (critical pattern: asyncio.run for async work) - Deserializes KonstructMessage, sets RLS context, loads agent from DB, calls run_agent - Retries up to 3x on deserialization failure - Create orchestrator/agents/builder.py: build_system_prompt assembles system_prompt + identity + persona + AI transparency clause - Create orchestrator/agents/runner.py: run_agent posts to llm-pool /complete via httpx, returns polite fallback on error - Add Celery[redis] dependency to orchestrator pyproject.toml - Create tests/integration/test_llm_fallback.py: 7 tests for fallback routing and 503 on total failure (LLM-01) - Create tests/integration/test_llm_providers.py: 12 tests verifying all three providers configured correctly (LLM-02) - All 19 integration tests pass
173 lines
7.4 KiB
Python
173 lines
7.4 KiB
Python
"""
|
|
Integration tests for LLM provider configuration (LLM-02).
|
|
|
|
Tests verify that:
|
|
1. The LiteLLM Router model_list contains entries for all three providers
|
|
(Ollama/fast, Anthropic/quality, OpenAI/quality).
|
|
2. A request with model="fast" routes to the Ollama configuration.
|
|
3. A request with model="quality" routes to an Anthropic or OpenAI configuration.
|
|
4. Provider entries reference the correct model identifiers from CLAUDE.md.
|
|
|
|
These tests inspect the router configuration directly and mock acompletion to
|
|
verify routing without live API calls.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
|
import pytest
|
|
from fastapi.testclient import TestClient
|
|
|
|
from llm_pool.main import app
|
|
from llm_pool.router import _model_list, llm_router
|
|
|
|
client = TestClient(app)
|
|
|
|
|
|
def _make_completion_response(content: str = "test") -> MagicMock:
|
|
response = MagicMock()
|
|
response.choices = [MagicMock()]
|
|
response.choices[0].message.content = content
|
|
return response
|
|
|
|
|
|
class TestProviderConfiguration:
|
|
"""LLM-02: Provider configuration — all three providers are present and correct."""
|
|
|
|
def test_model_list_has_three_entries(self) -> None:
|
|
"""The model_list must have exactly three entries (fast, quality x2)."""
|
|
assert len(_model_list) == 3
|
|
|
|
def test_fast_group_present_in_model_list(self) -> None:
|
|
"""The 'fast' model group must exist in the model_list."""
|
|
fast_entries = [m for m in _model_list if m["model_name"] == "fast"]
|
|
assert len(fast_entries) >= 1, "No 'fast' model group found in model_list"
|
|
|
|
def test_quality_group_present_in_model_list(self) -> None:
|
|
"""The 'quality' model group must have at least two entries (Anthropic + OpenAI)."""
|
|
quality_entries = [m for m in _model_list if m["model_name"] == "quality"]
|
|
assert len(quality_entries) >= 2, "Expected at least 2 'quality' entries (Anthropic + OpenAI)"
|
|
|
|
def test_fast_group_uses_ollama_model(self) -> None:
|
|
"""The fast group must route to an ollama/* model."""
|
|
fast_entries = [m for m in _model_list if m["model_name"] == "fast"]
|
|
assert fast_entries, "No fast entry found"
|
|
ollama_models = [
|
|
e for e in fast_entries
|
|
if e["litellm_params"]["model"].startswith("ollama/")
|
|
]
|
|
assert ollama_models, f"Fast group does not use an ollama model: {fast_entries}"
|
|
|
|
def test_fast_group_has_ollama_api_base(self) -> None:
|
|
"""The fast group entry must specify an api_base pointing to Ollama."""
|
|
fast_entries = [m for m in _model_list if m["model_name"] == "fast"]
|
|
for entry in fast_entries:
|
|
params = entry["litellm_params"]
|
|
assert "api_base" in params, f"Fast group entry missing api_base: {entry}"
|
|
|
|
def test_quality_group_has_anthropic_entry(self) -> None:
|
|
"""Quality group must include an anthropic/* model."""
|
|
quality_entries = [m for m in _model_list if m["model_name"] == "quality"]
|
|
anthropic_entries = [
|
|
e for e in quality_entries
|
|
if e["litellm_params"]["model"].startswith("anthropic/")
|
|
]
|
|
assert anthropic_entries, f"No Anthropic entry in quality group: {quality_entries}"
|
|
|
|
def test_quality_group_has_openai_entry(self) -> None:
|
|
"""Quality group must include an openai/* model as the fallback."""
|
|
quality_entries = [m for m in _model_list if m["model_name"] == "quality"]
|
|
openai_entries = [
|
|
e for e in quality_entries
|
|
if e["litellm_params"]["model"].startswith("openai/")
|
|
]
|
|
assert openai_entries, f"No OpenAI entry in quality group: {quality_entries}"
|
|
|
|
def test_anthropic_model_is_claude_sonnet(self) -> None:
|
|
"""Anthropic entry must use the correct model from CLAUDE.md architecture."""
|
|
quality_entries = [m for m in _model_list if m["model_name"] == "quality"]
|
|
anthropic_entry = next(
|
|
(e for e in quality_entries if e["litellm_params"]["model"].startswith("anthropic/")),
|
|
None,
|
|
)
|
|
assert anthropic_entry is not None
|
|
model = anthropic_entry["litellm_params"]["model"]
|
|
assert "claude-sonnet" in model, f"Expected claude-sonnet model, got: {model}"
|
|
|
|
def test_openai_model_is_gpt4o(self) -> None:
|
|
"""OpenAI entry must use gpt-4o as specified in architecture."""
|
|
quality_entries = [m for m in _model_list if m["model_name"] == "quality"]
|
|
openai_entry = next(
|
|
(e for e in quality_entries if e["litellm_params"]["model"].startswith("openai/")),
|
|
None,
|
|
)
|
|
assert openai_entry is not None
|
|
model = openai_entry["litellm_params"]["model"]
|
|
assert "gpt-4o" in model, f"Expected gpt-4o model, got: {model}"
|
|
|
|
def test_fast_request_calls_acompletion_with_fast_model(self) -> None:
|
|
"""A fast model request must invoke acompletion with model='fast'."""
|
|
mock_response = _make_completion_response("ollama says hi")
|
|
|
|
with patch("llm_pool.router.llm_router.acompletion", new_callable=AsyncMock) as mock_complete:
|
|
mock_complete.return_value = mock_response
|
|
|
|
response = client.post(
|
|
"/complete",
|
|
json={
|
|
"model": "fast",
|
|
"messages": [{"role": "user", "content": "Hi"}],
|
|
"tenant_id": "tenant-fast",
|
|
},
|
|
)
|
|
|
|
assert response.status_code == 200
|
|
call_kwargs = mock_complete.call_args
|
|
assert call_kwargs is not None
|
|
called_model = call_kwargs.kwargs.get("model") or (call_kwargs.args[0] if call_kwargs.args else None)
|
|
assert called_model == "fast"
|
|
|
|
def test_quality_request_calls_acompletion_with_quality_model(self) -> None:
|
|
"""A quality model request must invoke acompletion with model='quality'."""
|
|
mock_response = _make_completion_response("anthropic says hi")
|
|
|
|
with patch("llm_pool.router.llm_router.acompletion", new_callable=AsyncMock) as mock_complete:
|
|
mock_complete.return_value = mock_response
|
|
|
|
response = client.post(
|
|
"/complete",
|
|
json={
|
|
"model": "quality",
|
|
"messages": [{"role": "user", "content": "Hi"}],
|
|
"tenant_id": "tenant-quality",
|
|
},
|
|
)
|
|
|
|
assert response.status_code == 200
|
|
call_kwargs = mock_complete.call_args
|
|
assert call_kwargs is not None
|
|
called_model = call_kwargs.kwargs.get("model") or (call_kwargs.args[0] if call_kwargs.args else None)
|
|
assert called_model == "quality"
|
|
|
|
def test_router_fallback_config_quality_falls_to_fast(self) -> None:
|
|
"""The Router fallbacks config must specify quality -> fast cross-group fallback."""
|
|
# Access the Router's fallbacks attribute
|
|
fallbacks = getattr(llm_router, "fallbacks", None)
|
|
assert fallbacks is not None, "Router has no fallbacks configured"
|
|
|
|
# Find the quality -> fast fallback entry
|
|
quality_fallback = None
|
|
for fb in fallbacks:
|
|
if isinstance(fb, dict) and "quality" in fb:
|
|
quality_fallback = fb
|
|
break
|
|
|
|
assert quality_fallback is not None, (
|
|
f"No quality->fast fallback found. Current fallbacks: {fallbacks}"
|
|
)
|
|
fallback_targets = quality_fallback["quality"]
|
|
assert "fast" in fallback_targets, (
|
|
f"Quality fallback does not target 'fast' group: {fallback_targets}"
|
|
)
|