- Create llm_pool/router.py: LiteLLM Router with fast (Ollama) and quality (Anthropic/OpenAI) model groups - Configure fallback chain: quality providers fail -> fast group - Pin LiteLLM to ==1.82.5 (avoid September 2025 OOM regression in later releases) - Create llm_pool/main.py: FastAPI service on port 8004 with /complete and /health endpoints - Add providers/__init__.py: reserved for future per-provider customization - Update docker-compose.yml: add llm-pool and celery-worker service stubs
24 lines
639 B
TOML
24 lines
639 B
TOML
[build-system]
|
|
requires = ["hatchling"]
|
|
build-backend = "hatchling.build"
|
|
|
|
[project]
|
|
name = "konstruct-llm-pool"
|
|
version = "0.1.0"
|
|
description = "LLM Backend Pool — LiteLLM router for Ollama, vLLM, OpenAI, Anthropic, and BYO endpoints"
|
|
requires-python = ">=3.12"
|
|
dependencies = [
|
|
"konstruct-shared",
|
|
# Pinned: do NOT upgrade past 1.82.5 — a September 2025 OOM regression exists
|
|
# in later releases. Verify fix before bumping.
|
|
"litellm==1.82.5",
|
|
"fastapi[standard]>=0.115.0",
|
|
"httpx>=0.28.0",
|
|
]
|
|
|
|
[tool.uv.sources]
|
|
konstruct-shared = { workspace = true }
|
|
|
|
[tool.hatch.build.targets.wheel]
|
|
packages = ["llm_pool"]
|