diff --git a/docker-compose.yml b/docker-compose.yml index 7bdddbd..5450707 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -66,9 +66,10 @@ services: environment: - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-} - OPENAI_API_KEY=${OPENAI_API_KEY:-} - - OLLAMA_BASE_URL=http://host.docker.internal:11434 + - OLLAMA_BASE_URL=${OLLAMA_BASE_URL:-http://host.docker.internal:11434} + - OLLAMA_MODEL=${OLLAMA_MODEL:-qwen3:32b} - REDIS_URL=redis://redis:6379/0 - - LOG_LEVEL=INFO + - LOG_LEVEL=${LOG_LEVEL:-INFO} restart: unless-stopped healthcheck: test: ["CMD-SHELL", "curl -sf http://localhost:8004/health || exit 1"] diff --git a/packages/llm-pool/llm_pool/router.py b/packages/llm-pool/llm_pool/router.py index e18fbe1..4ba9df9 100644 --- a/packages/llm-pool/llm_pool/router.py +++ b/packages/llm-pool/llm_pool/router.py @@ -32,7 +32,7 @@ _model_list: list[dict] = [ { "model_name": "local", "litellm_params": { - "model": "ollama/qwen3:32b", + "model": f"ollama/{settings.ollama_model}", "api_base": settings.ollama_base_url, }, }, @@ -40,7 +40,7 @@ _model_list: list[dict] = [ { "model_name": "fast", "litellm_params": { - "model": "ollama/qwen3:32b", + "model": f"ollama/{settings.ollama_model}", "api_base": settings.ollama_base_url, }, }, @@ -48,7 +48,7 @@ _model_list: list[dict] = [ { "model_name": "economy", "litellm_params": { - "model": "ollama/qwen3:32b", + "model": f"ollama/{settings.ollama_model}", "api_base": settings.ollama_base_url, }, }, @@ -56,7 +56,7 @@ _model_list: list[dict] = [ { "model_name": "balanced", "litellm_params": { - "model": "ollama/qwen3:32b", + "model": f"ollama/{settings.ollama_model}", "api_base": settings.ollama_base_url, }, }, diff --git a/packages/shared/shared/config.py b/packages/shared/shared/config.py index d2d6d40..7a134d9 100644 --- a/packages/shared/shared/config.py +++ b/packages/shared/shared/config.py @@ -112,6 +112,10 @@ class Settings(BaseSettings): default="http://localhost:11434", description="Ollama inference server base URL", ) + ollama_model: str = Field( + default="qwen3:32b", + description="Ollama model to use for local inference (e.g., qwen3:32b, llama3.1:70b)", + ) # ------------------------------------------------------------------------- # Auth / Security