feat(01-02): LLM Backend Pool — LiteLLM Router with Ollama + Anthropic + OpenAI fallback

- Create llm_pool/router.py: LiteLLM Router with fast (Ollama) and quality (Anthropic/OpenAI) model groups
- Configure fallback chain: quality providers fail -> fast group
- Pin LiteLLM to ==1.82.5 (avoid September 2025 OOM regression in later releases)
- Create llm_pool/main.py: FastAPI service on port 8004 with /complete and /health endpoints
- Add providers/__init__.py: reserved for future per-provider customization
- Update docker-compose.yml: add llm-pool and celery-worker service stubs
This commit is contained in:
2026-03-23 10:03:05 -06:00
parent 0054383be0
commit ee2f88e13b
7 changed files with 370 additions and 5 deletions

View File

@@ -64,3 +64,72 @@ services:
capabilities: [gpu]
# Service starts even if no GPU is available — GPU config is optional
restart: unless-stopped
llm-pool:
build:
context: .
dockerfile_inline: |
FROM python:3.12-slim
WORKDIR /app
RUN pip install uv
COPY pyproject.toml ./
COPY packages/shared ./packages/shared
COPY packages/llm-pool ./packages/llm-pool
RUN uv pip install --system -e packages/shared -e packages/llm-pool
CMD ["uvicorn", "llm_pool.main:app", "--host", "0.0.0.0", "--port", "8004"]
container_name: konstruct-llm-pool
ports:
- "8004:8004"
networks:
- konstruct-net
depends_on:
ollama:
condition: service_started
redis:
condition: service_healthy
environment:
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
- OPENAI_API_KEY=${OPENAI_API_KEY:-}
- OLLAMA_BASE_URL=http://ollama:11434
- REDIS_URL=redis://redis:6379/0
- LOG_LEVEL=INFO
restart: unless-stopped
healthcheck:
test: ["CMD-SHELL", "curl -sf http://localhost:8004/health || exit 1"]
interval: 10s
timeout: 5s
retries: 5
celery-worker:
build:
context: .
dockerfile_inline: |
FROM python:3.12-slim
WORKDIR /app
RUN pip install uv
COPY pyproject.toml ./
COPY packages/shared ./packages/shared
COPY packages/orchestrator ./packages/orchestrator
RUN uv pip install --system -e packages/shared -e packages/orchestrator
CMD ["celery", "-A", "orchestrator.main", "worker", "--loglevel=info"]
container_name: konstruct-celery-worker
networks:
- konstruct-net
depends_on:
redis:
condition: service_healthy
postgres:
condition: service_healthy
llm-pool:
condition: service_healthy
environment:
- DATABASE_URL=postgresql+asyncpg://konstruct_app:konstruct_dev@postgres:5432/konstruct
- REDIS_URL=redis://redis:6379/0
- CELERY_BROKER_URL=redis://redis:6379/1
- CELERY_RESULT_BACKEND=redis://redis:6379/2
- LLM_POOL_URL=http://llm-pool:8004
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
- OPENAI_API_KEY=${OPENAI_API_KEY:-}
- OLLAMA_BASE_URL=http://ollama:11434
- LOG_LEVEL=INFO
restart: unless-stopped