diff --git a/packages/llm-pool/llm_pool/router.py b/packages/llm-pool/llm_pool/router.py index 5e60fd6..e18fbe1 100644 --- a/packages/llm-pool/llm_pool/router.py +++ b/packages/llm-pool/llm_pool/router.py @@ -28,15 +28,46 @@ logger = logging.getLogger(__name__) # Model list — three entries across two groups # --------------------------------------------------------------------------- _model_list: list[dict] = [ - # fast group — local Ollama, no API cost + # ── local group — Ollama, no API cost ── { - "model_name": "fast", + "model_name": "local", "litellm_params": { - "model": "ollama/qwen3:8b", + "model": "ollama/qwen3:32b", "api_base": settings.ollama_base_url, }, }, - # quality group — Anthropic primary + # ── fast group — same as local (aliases for preference mapping) ── + { + "model_name": "fast", + "litellm_params": { + "model": "ollama/qwen3:32b", + "api_base": settings.ollama_base_url, + }, + }, + # ── economy group — local model, cheaper than commercial ── + { + "model_name": "economy", + "litellm_params": { + "model": "ollama/qwen3:32b", + "api_base": settings.ollama_base_url, + }, + }, + # ── balanced group — Ollama primary, commercial fallback ── + { + "model_name": "balanced", + "litellm_params": { + "model": "ollama/qwen3:32b", + "api_base": settings.ollama_base_url, + }, + }, + { + "model_name": "balanced", + "litellm_params": { + "model": "anthropic/claude-sonnet-4-20250514", + "api_key": settings.anthropic_api_key, + }, + }, + # ── quality group — Anthropic primary, OpenAI fallback ── { "model_name": "quality", "litellm_params": { @@ -44,7 +75,6 @@ _model_list: list[dict] = [ "api_key": settings.anthropic_api_key, }, }, - # quality group — OpenAI fallback (within the same group) { "model_name": "quality", "litellm_params": { @@ -60,7 +90,7 @@ _model_list: list[dict] = [ llm_router = Router( model_list=_model_list, # If all quality providers fail, fall back to the fast group - fallbacks=[{"quality": ["fast"]}], + fallbacks=[{"quality": ["fast"]}, {"balanced": ["fast"]}], routing_strategy="latency-based-routing", num_retries=2, set_verbose=False,