diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..8aee692 Binary files /dev/null and b/.DS_Store differ diff --git a/ai-templates-0/README.md b/ai-templates-0/README.md new file mode 100644 index 0000000..b0db4dd --- /dev/null +++ b/ai-templates-0/README.md @@ -0,0 +1,187 @@ +# Portainer AI Templates (v2) + +> **26 production-ready AI/ML Docker Compose stacks for Portainer** — filling the AI gap in the official v3 template library. Aligned with an AI infrastructure positioning strategy for Portainer. + +## Background + +The official [Portainer v3 templates](https://raw.githubusercontent.com/portainer/templates/v3/templates.json) contain **71 templates** with **zero pure AI/ML deployments**. This repository provides a curated, Portainer-compatible template set covering the entire AI infrastructure stack — from edge inference to distributed training to governed ML pipelines. + +See [docs/AI_GAP_ANALYSIS.md](docs/AI_GAP_ANALYSIS.md) for the full gap analysis. + +## Homepage Alignment + +These templates map directly to the AI infrastructure positioning pillars: + +| Mock-Up Pillar | Templates Covering It | +|---|---| +| **GPU-Aware Fleet Management** | Triton, vLLM, NVIDIA NIM, Ray Cluster, Ollama, LocalAI | +| **Model Lifecycle Governance** | MLflow + MinIO (Production MLOps), Prefect, BentoML, Label Studio | +| **Edge AI Deployment** | ONNX Runtime (CPU/edge profile), Triton, DeepStream | +| **Self-Service AI Stacks** | Open WebUI, Langflow, Flowise, n8n AI, Jupyter GPU | +| **LLM Fine-Tune** (diagram) | Ray Cluster (distributed training) | +| **RAG Pipeline** (diagram) | Qdrant, ChromaDB, Weaviate + Langflow/Flowise | +| **Vision Model** (diagram) | DeepStream, ComfyUI, Stable Diffusion WebUI | +| **Anomaly Detection** (diagram) | DeepStream (video analytics), Triton (custom models) | + +## Quick Start + +### Option A: Use as Custom Template URL in Portainer + +1. In Portainer, go to **Settings > App Templates** +2. Set the URL to: + ``` + https://git.oe74.net/adelorenzo/portainer_scripts/raw/branch/master/ai-templates/portainer-ai-templates.json + ``` +3. Click **Save** — all 26 AI templates appear in your App Templates list + +### Option B: Deploy Individual Stacks + +```bash +cd stacks/ollama +docker compose up -d +``` + +## Template Catalog + +### LLM Inference and Model Serving + +| # | Template | Port | GPU | Description | +|---|---|---|---|---| +| 1 | **Ollama** | 11434 | Yes | Local LLM engine — Llama, Mistral, Qwen, Gemma, Phi | +| 2 | **Open WebUI + Ollama** | 3000 | Yes | ChatGPT-like UI bundled with Ollama backend | +| 3 | **LocalAI** | 8080 | Yes | Drop-in OpenAI API replacement | +| 4 | **vLLM** | 8000 | Yes | High-throughput serving with PagedAttention | +| 5 | **Text Gen WebUI** | 7860 | Yes | Comprehensive LLM interface (oobabooga) | +| 6 | **LiteLLM Proxy** | 4000 | No | Unified API gateway for 100+ LLM providers | +| 26 | **NVIDIA NIM** | 8000 | Yes | Enterprise TensorRT-LLM optimized inference | + +### Production Inference Serving + +| # | Template | Port | GPU | Description | +|---|---|---|---|---| +| 19 | **NVIDIA Triton** | 8000 | Yes | Multi-framework inference server (TensorRT, ONNX, PyTorch, TF) | +| 20 | **ONNX Runtime** | 8001 | Optional | Lightweight inference with GPU and CPU/edge profiles | +| 24 | **BentoML** | 3000 | Yes | Model packaging and serving with metrics | + +### Image and Video Generation + +| # | Template | Port | GPU | Description | +|---|---|---|---|---| +| 7 | **ComfyUI** | 8188 | Yes | Node-based Stable Diffusion workflow engine | +| 8 | **Stable Diffusion WebUI** | 7860 | Yes | AUTOMATIC1111 interface for image generation | + +### Industrial AI and Computer Vision + +| # | Template | Port | GPU | Description | +|---|---|---|---|---| +| 21 | **NVIDIA DeepStream** | 8554 | Yes | Video analytics for inspection, anomaly detection, smart factory | + +### Distributed Training + +| # | Template | Port | GPU | Description | +|---|---|---|---|---| +| 22 | **Ray Cluster** | 8265 | Yes | Head + workers for LLM fine-tuning, distributed training, Ray Serve | + +### AI Agents and Workflows + +| # | Template | Port | GPU | Description | +|---|---|---|---|---| +| 9 | **Langflow** | 7860 | No | Visual multi-agent and RAG pipeline builder | +| 10 | **Flowise** | 3000 | No | Drag-and-drop LLM chatflow builder | +| 11 | **n8n (AI-Enabled)** | 5678 | No | Workflow automation with AI agent nodes | + +### Vector Databases + +| # | Template | Port | GPU | Description | +|---|---|---|---|---| +| 12 | **Qdrant** | 6333 | No | High-performance vector similarity search | +| 13 | **ChromaDB** | 8000 | No | AI-native embedding database | +| 14 | **Weaviate** | 8080 | No | Vector DB with built-in vectorization modules | + +### ML Operations and Governance + +| # | Template | Port | GPU | Description | +|---|---|---|---|---| +| 15 | **MLflow** | 5000 | No | Experiment tracking and model registry (SQLite) | +| 25 | **MLflow + MinIO** | 5000 | No | Production MLOps: PostgreSQL + S3 artifact store | +| 23 | **Prefect** | 4200 | No | Governed ML pipeline orchestration with audit logging | +| 16 | **Label Studio** | 8080 | No | Multi-type data labeling platform | +| 17 | **Jupyter (GPU/PyTorch)** | 8888 | Yes | GPU-accelerated notebooks | + +### Speech and Audio + +| # | Template | Port | GPU | Description | +|---|---|---|---|---| +| 18 | **Whisper ASR** | 9000 | Yes | Speech-to-text API server | + +## GPU Requirements + +Templates marked **GPU: Yes** require: +- NVIDIA GPU with CUDA support +- [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html) installed +- Docker configured with `nvidia` runtime + +**Edge deployments (ONNX Runtime CPU profile):** No GPU required — runs on ARM or x86 with constrained CPU/memory limits. + +For AMD GPUs (ROCm), modify the `deploy.resources` section to use ROCm-compatible images and remove the NVIDIA device reservation. + +## File Structure + +``` +ai-templates/ +├── portainer-ai-templates.json # Portainer v3 template definition (26 templates) +├── README.md +├── docs/ +│ └── AI_GAP_ANALYSIS.md # Analysis of official templates gap +└── stacks/ + ├── ollama/ # LLM Inference + ├── open-webui/ + ├── localai/ + ├── vllm/ + ├── text-generation-webui/ + ├── litellm/ + ├── nvidia-nim/ # v2: Enterprise inference + ├── triton/ # v2: Production inference serving + ├── onnx-runtime/ # v2: Edge-friendly inference + ├── bentoml/ # v2: Model packaging + serving + ├── deepstream/ # v2: Industrial computer vision + ├── ray-cluster/ # v2: Distributed training + ├── prefect/ # v2: Governed ML pipelines + ├── minio-mlops/ # v2: Production MLOps stack + ├── comfyui/ # Image generation + ├── stable-diffusion-webui/ + ├── langflow/ # AI agents + ├── flowise/ + ├── n8n-ai/ + ├── qdrant/ # Vector databases + ├── chromadb/ + ├── weaviate/ + ├── mlflow/ # ML operations + ├── label-studio/ + ├── jupyter-gpu/ + └── whisper/ # Speech +``` + +## Changelog + +### v2 (March 2026) +- Added 8 templates to close alignment gap with AI infrastructure positioning: + - **NVIDIA Triton Inference Server** — production multi-framework inference + - **ONNX Runtime Server** — lightweight edge inference with CPU/GPU profiles + - **NVIDIA DeepStream** — industrial computer vision and video analytics + - **Ray Cluster (GPU)** — distributed training and fine-tuning + - **Prefect** — governed ML pipeline orchestration + - **BentoML** — model packaging and serving + - **MLflow + MinIO** — production MLOps with S3 artifact governance + - **NVIDIA NIM** — enterprise-optimized LLM inference + +### v1 (March 2026) +- Initial 18 AI templates covering LLM inference, image generation, agents, vector DBs, MLOps, and speech + +## License + +These templates reference publicly available Docker images from their respective maintainers. Each tool has its own license — refer to the individual project documentation. + +--- + +*Portainer AI Templates by Adolfo De Lorenzo — March 2026* diff --git a/ai-templates-0/docs/AI_GAP_ANALYSIS.md b/ai-templates-0/docs/AI_GAP_ANALYSIS.md new file mode 100644 index 0000000..56beee0 --- /dev/null +++ b/ai-templates-0/docs/AI_GAP_ANALYSIS.md @@ -0,0 +1,89 @@ +# Portainer v3 Templates — AI Gap Analysis + +## Overview + +The official Portainer v3 templates (`templates.json`) contain **71 templates** across the following categories: + +| Category | Count | Examples | +|---|---|---| +| Database | 10 | MySQL, PostgreSQL, Mongo, Redis, CrateDB, Elasticsearch, CockroachDB, TimescaleDB | +| Edge/IIoT | 14 | Softing EdgeConnectors, OPC Router, TOSIBOX, EMQX MQTT, Mosquitto, Node-RED, Litmus Edge | +| Web/CMS | 8 | Nginx, Caddy, WordPress, Drupal, Joomla, Ghost, Plone | +| DevOps/CI | 5 | Jenkins, GitLab CE, Dokku, Registry | +| Monitoring | 4 | Grafana, Datadog, Sematext, Swarm Monitoring | +| Messaging | 1 | RabbitMQ | +| Storage | 3 | Minio, Scality S3, File Browser | +| Serverless | 2 | OpenFaaS, IronFunctions | +| Other | 6 | Ubuntu, NodeJS, Portainer Agent, OpenAMT, FDO, LiveSwitch | + +## AI Template Count in Official Repo: **0** + +There are **zero purely AI/ML-focused templates** in the current v3 template list. + +### Closest to AI + +- **Litmus Edge** (#70, #71) — Described as "enables industrial AI at scale" but is an OT data platform, not an AI deployment. +- **Elasticsearch** (#13) — Used in vector search / RAG pipelines but is a general-purpose search engine. + +--- + +## v2 Coverage Map + +This repository now provides **26 AI templates** organized into 9 sub-categories, mapped against the 4 AI infrastructure positioning pillars: + +### Pillar 1: GPU-Aware Fleet Management +| Template | What It Proves | +|---|---| +| NVIDIA Triton | Multi-framework model serving across GPU fleet with dynamic batching | +| vLLM | High-throughput LLM inference with tensor parallelism across GPUs | +| NVIDIA NIM | Enterprise-grade NVIDIA-optimized inference microservices | +| Ray Cluster | Distributed GPU scheduling across head + worker nodes | +| Ollama / LocalAI | Single-node GPU inference engines | + +### Pillar 2: Model Lifecycle Governance +| Template | What It Proves | +|---|---| +| MLflow + MinIO (Prod) | Versioned model registry + S3 artifact store + PostgreSQL tracking | +| Prefect | Governed pipeline orchestration with scheduling, retries, audit logs | +| BentoML | Model packaging with versioning and metrics endpoints | +| Label Studio | Data labeling with project-level access control | +| MLflow (standalone) | Experiment tracking and model comparison | + +### Pillar 3: Edge AI Deployment +| Template | What It Proves | +|---|---| +| ONNX Runtime (edge profile) | CPU-only inference with memory/CPU limits for constrained devices | +| NVIDIA Triton | Supports Jetson via multiarch images, model polling for OTA updates | +| NVIDIA DeepStream | Video analytics pipeline for factory-floor cameras | + +### Pillar 4: Self-Service AI Stacks +| Template | What It Proves | +|---|---| +| Open WebUI + Ollama | One-click ChatGPT-like deployment, no CLI needed | +| Langflow / Flowise | Visual drag-and-drop agent builders | +| n8n (AI-Enabled) | Workflow automation with AI nodes, accessible to non-developers | +| Jupyter GPU | Notebook environment for data science teams | + +### Architecture Diagram Workloads +| Diagram Node | Template(s) | +|---|---| +| LLM Fine-Tune | Ray Cluster | +| RAG Pipeline | Qdrant + ChromaDB + Weaviate + Langflow/Flowise | +| Vision Model | DeepStream, ComfyUI, Stable Diffusion WebUI | +| Anomaly Detection | DeepStream (video analytics), Triton (custom ONNX/TensorRT models) | + +--- + +## Remaining Gaps (Future Work) + +| Gap | Why It Matters | Potential Addition | +|---|---|---| +| ARM/Jetson-native images | True edge AI on embedded devices | Triton Jetson images, ONNX Runtime ARM builds | +| Air-gapped deployment | Industrial environments with no internet | Offline model bundling scripts | +| Model A/B testing | Production model governance | Seldon Core or custom Envoy routing | +| Federated learning | Privacy-preserving distributed training | NVIDIA FLARE or Flower | +| LLM evaluation/guardrails | Safety and quality governance | Ragas, DeepEval, NVIDIA NeMo Guardrails | + +--- + +*Generated: March 2026 — For use with Portainer Business Edition and Community Edition* diff --git a/ai-templates-0/portainer-ai-templates.json b/ai-templates-0/portainer-ai-templates.json new file mode 100644 index 0000000..587c965 --- /dev/null +++ b/ai-templates-0/portainer-ai-templates.json @@ -0,0 +1,959 @@ +{ + "version": "3", + "templates": [ + { + "id": 1, + "type": 3, + "title": "Ollama", + "description": "Local LLM inference engine supporting Llama, Mistral, Qwen, Gemma, Phi and 100+ models with GPU acceleration", + "note": "Requires NVIDIA GPU with Docker GPU runtime configured. Pull models after deployment with: docker exec ollama ollama pull llama3.1", + "categories": ["ai", "llm", "inference"], + "platform": "linux", + "logo": "https://ollama.com/public/ollama.png", + "repository": { + "url": "https://git.oe74.net/adelorenzo/portainer_scripts", + "stackfile": "ai-templates/stacks/ollama/docker-compose.yml" + }, + "env": [ + { + "name": "OLLAMA_PORT", + "label": "Ollama API port", + "default": "11434" + }, + { + "name": "OLLAMA_NUM_PARALLEL", + "label": "Max parallel requests", + "default": "4" + }, + { + "name": "OLLAMA_MAX_LOADED_MODELS", + "label": "Max models loaded in VRAM", + "default": "2" + } + ] + }, + { + "id": 2, + "type": 3, + "title": "Open WebUI + Ollama", + "description": "Full-featured ChatGPT-like web interface bundled with Ollama backend for local LLM inference", + "note": "Access the web UI at the configured port. First user to register becomes admin. Requires NVIDIA GPU.", + "categories": ["ai", "llm", "chat-ui"], + "platform": "linux", + "logo": "https://docs.openwebui.com/img/logo.png", + "repository": { + "url": "https://git.oe74.net/adelorenzo/portainer_scripts", + "stackfile": "ai-templates/stacks/open-webui/docker-compose.yml" + }, + "env": [ + { + "name": "OPEN_WEBUI_PORT", + "label": "Web UI port", + "default": "3000" + }, + { + "name": "OLLAMA_PORT", + "label": "Ollama API port", + "default": "11434" + }, + { + "name": "WEBUI_SECRET_KEY", + "label": "Secret key for sessions", + "default": "changeme" + }, + { + "name": "ENABLE_SIGNUP", + "label": "Allow user registration", + "default": "true" + } + ] + }, + { + "id": 3, + "type": 3, + "title": "LocalAI", + "description": "Drop-in OpenAI API compatible replacement. Run LLMs, generate images, audio locally with GPU acceleration", + "note": "Exposes an OpenAI-compatible API at /v1/. Models can be loaded via the API or placed in the models volume.", + "categories": ["ai", "llm", "openai-api"], + "platform": "linux", + "logo": "https://localai.io/logo.png", + "repository": { + "url": "https://git.oe74.net/adelorenzo/portainer_scripts", + "stackfile": "ai-templates/stacks/localai/docker-compose.yml" + }, + "env": [ + { + "name": "LOCALAI_PORT", + "label": "API port", + "default": "8080" + }, + { + "name": "THREADS", + "label": "CPU threads for inference", + "default": "4" + }, + { + "name": "CONTEXT_SIZE", + "label": "Default context window size", + "default": "4096" + } + ] + }, + { + "id": 4, + "type": 3, + "title": "vLLM", + "description": "High-throughput LLM serving engine with PagedAttention, continuous batching, and OpenAI-compatible API", + "note": "Requires NVIDIA GPU with sufficient VRAM for the chosen model. HuggingFace token needed for gated models.", + "categories": ["ai", "llm", "inference", "high-performance"], + "platform": "linux", + "logo": "https://docs.vllm.ai/en/latest/_static/vllm-logo-text-light.png", + "repository": { + "url": "https://git.oe74.net/adelorenzo/portainer_scripts", + "stackfile": "ai-templates/stacks/vllm/docker-compose.yml" + }, + "env": [ + { + "name": "VLLM_PORT", + "label": "API port", + "default": "8000" + }, + { + "name": "MODEL_NAME", + "label": "HuggingFace model ID", + "default": "meta-llama/Llama-3.1-8B-Instruct" + }, + { + "name": "HF_TOKEN", + "label": "HuggingFace access token" + }, + { + "name": "MAX_MODEL_LEN", + "label": "Max sequence length", + "default": "4096" + }, + { + "name": "GPU_MEM_UTIL", + "label": "GPU memory utilization (0-1)", + "default": "0.90" + }, + { + "name": "TENSOR_PARALLEL", + "label": "Tensor parallel GPU count", + "default": "1" + } + ] + }, + { + "id": 5, + "type": 3, + "title": "Text Generation WebUI", + "description": "Comprehensive web UI for running LLMs locally (oobabooga). Supports GGUF, GPTQ, AWQ, EXL2, and HF formats", + "note": "Requires NVIDIA GPU. Models should be placed in the models volume. Supports extensions for RAG, TTS, and more.", + "categories": ["ai", "llm", "chat-ui"], + "platform": "linux", + "logo": "https://raw.githubusercontent.com/oobabooga/text-generation-webui/main/docs/logo.png", + "repository": { + "url": "https://git.oe74.net/adelorenzo/portainer_scripts", + "stackfile": "ai-templates/stacks/text-generation-webui/docker-compose.yml" + }, + "env": [ + { + "name": "WEBUI_PORT", + "label": "Web UI port", + "default": "7860" + }, + { + "name": "API_PORT", + "label": "API port", + "default": "5000" + }, + { + "name": "STREAM_PORT", + "label": "Streaming API port", + "default": "5005" + }, + { + "name": "EXTRA_LAUNCH_ARGS", + "label": "Extra launch arguments", + "default": "--listen --api" + } + ] + }, + { + "id": 6, + "type": 3, + "title": "LiteLLM Proxy", + "description": "Unified LLM API gateway supporting 100+ providers (OpenAI, Anthropic, Ollama, vLLM, etc.) with spend tracking and load balancing", + "note": "Configure models in /app/config/litellm_config.yaml after deployment. Includes PostgreSQL for usage tracking.", + "categories": ["ai", "llm", "api-gateway", "proxy"], + "platform": "linux", + "logo": "https://litellm.ai/favicon.ico", + "repository": { + "url": "https://git.oe74.net/adelorenzo/portainer_scripts", + "stackfile": "ai-templates/stacks/litellm/docker-compose.yml" + }, + "env": [ + { + "name": "LITELLM_PORT", + "label": "Proxy API port", + "default": "4000" + }, + { + "name": "LITELLM_MASTER_KEY", + "label": "Master API key", + "default": "sk-master-key" + }, + { + "name": "PG_USER", + "label": "PostgreSQL user", + "default": "litellm" + }, + { + "name": "PG_PASSWORD", + "label": "PostgreSQL password", + "default": "litellm" + } + ] + }, + { + "id": 7, + "type": 3, + "title": "ComfyUI", + "description": "Node-based Stable Diffusion workflow engine for image and video generation with GPU acceleration", + "note": "Requires NVIDIA GPU. Access the node editor at the configured port. Models go in the models volume.", + "categories": ["ai", "image-generation", "stable-diffusion"], + "platform": "linux", + "logo": "https://raw.githubusercontent.com/comfyanonymous/ComfyUI/master/web/assets/comfyui-logo.png", + "repository": { + "url": "https://git.oe74.net/adelorenzo/portainer_scripts", + "stackfile": "ai-templates/stacks/comfyui/docker-compose.yml" + }, + "env": [ + { + "name": "COMFYUI_PORT", + "label": "Web UI port", + "default": "8188" + }, + { + "name": "CLI_ARGS", + "label": "Launch arguments", + "default": "--listen 0.0.0.0 --port 8188" + } + ] + }, + { + "id": 8, + "type": 3, + "title": "Stable Diffusion WebUI", + "description": "AUTOMATIC1111 web interface for Stable Diffusion image generation with extensive extension ecosystem", + "note": "Requires NVIDIA GPU with 8GB+ VRAM. First startup downloads the base model and may take several minutes.", + "categories": ["ai", "image-generation", "stable-diffusion"], + "platform": "linux", + "logo": "https://raw.githubusercontent.com/AUTOMATIC1111/stable-diffusion-webui/master/html/logo.png", + "repository": { + "url": "https://git.oe74.net/adelorenzo/portainer_scripts", + "stackfile": "ai-templates/stacks/stable-diffusion-webui/docker-compose.yml" + }, + "env": [ + { + "name": "SD_PORT", + "label": "Web UI port", + "default": "7860" + }, + { + "name": "CLI_ARGS", + "label": "Launch arguments", + "default": "--listen --api --xformers" + } + ] + }, + { + "id": 9, + "type": 3, + "title": "Langflow", + "description": "Visual framework for building multi-agent and RAG applications. Drag-and-drop LLM pipeline builder", + "note": "Access the visual editor at the configured port. Connect to Ollama, OpenAI, or any LLM backend.", + "categories": ["ai", "agents", "rag", "workflows"], + "platform": "linux", + "logo": "https://avatars.githubusercontent.com/u/128686189", + "repository": { + "url": "https://git.oe74.net/adelorenzo/portainer_scripts", + "stackfile": "ai-templates/stacks/langflow/docker-compose.yml" + }, + "env": [ + { + "name": "LANGFLOW_PORT", + "label": "Web UI port", + "default": "7860" + }, + { + "name": "AUTO_LOGIN", + "label": "Skip login screen", + "default": "true" + } + ] + }, + { + "id": 10, + "type": 3, + "title": "Flowise", + "description": "Drag-and-drop LLM orchestration tool. Build chatbots, agents, and RAG pipelines without coding", + "note": "Default credentials are admin/changeme. Connect to any OpenAI-compatible API backend.", + "categories": ["ai", "agents", "rag", "chatbots"], + "platform": "linux", + "logo": "https://flowiseai.com/favicon.ico", + "repository": { + "url": "https://git.oe74.net/adelorenzo/portainer_scripts", + "stackfile": "ai-templates/stacks/flowise/docker-compose.yml" + }, + "env": [ + { + "name": "FLOWISE_PORT", + "label": "Web UI port", + "default": "3000" + }, + { + "name": "FLOWISE_USERNAME", + "label": "Admin username", + "default": "admin" + }, + { + "name": "FLOWISE_PASSWORD", + "label": "Admin password", + "default": "changeme" + } + ] + }, + { + "id": 11, + "type": 3, + "title": "n8n (AI-Enabled)", + "description": "Workflow automation platform with built-in AI agent nodes, LLM chains, and vector store integrations", + "note": "AI features include: AI Agent nodes, LLM Chain, Document Loaders, Vector Stores, Text Splitters, and Memory nodes.", + "categories": ["ai", "automation", "workflows", "agents"], + "platform": "linux", + "logo": "https://n8n.io/favicon.ico", + "repository": { + "url": "https://git.oe74.net/adelorenzo/portainer_scripts", + "stackfile": "ai-templates/stacks/n8n-ai/docker-compose.yml" + }, + "env": [ + { + "name": "N8N_PORT", + "label": "Web UI port", + "default": "5678" + }, + { + "name": "N8N_USER", + "label": "Admin username", + "default": "admin" + }, + { + "name": "N8N_PASSWORD", + "label": "Admin password", + "default": "changeme" + }, + { + "name": "WEBHOOK_URL", + "label": "External webhook URL", + "default": "http://localhost:5678/" + } + ] + }, + { + "id": 12, + "type": 3, + "title": "Qdrant", + "description": "High-performance vector similarity search engine for RAG, semantic search, and AI applications", + "note": "REST API on port 6333, gRPC on 6334. Supports filtering, payload indexing, and distributed mode.", + "categories": ["ai", "vector-database", "rag", "embeddings"], + "platform": "linux", + "logo": "https://qdrant.tech/images/logo_with_text.png", + "repository": { + "url": "https://git.oe74.net/adelorenzo/portainer_scripts", + "stackfile": "ai-templates/stacks/qdrant/docker-compose.yml" + }, + "env": [ + { + "name": "QDRANT_HTTP_PORT", + "label": "REST API port", + "default": "6333" + }, + { + "name": "QDRANT_GRPC_PORT", + "label": "gRPC port", + "default": "6334" + }, + { + "name": "QDRANT_API_KEY", + "label": "API key (optional)" + } + ] + }, + { + "id": 13, + "type": 3, + "title": "ChromaDB", + "description": "AI-native open-source embedding database. The easiest vector store to get started with for RAG applications", + "note": "Persistent storage enabled by default. Compatible with LangChain, LlamaIndex, and all major AI frameworks.", + "categories": ["ai", "vector-database", "rag", "embeddings"], + "platform": "linux", + "logo": "https://www.trychroma.com/chroma-logo.png", + "repository": { + "url": "https://git.oe74.net/adelorenzo/portainer_scripts", + "stackfile": "ai-templates/stacks/chromadb/docker-compose.yml" + }, + "env": [ + { + "name": "CHROMA_PORT", + "label": "API port", + "default": "8000" + }, + { + "name": "CHROMA_TOKEN", + "label": "Auth token (optional)" + }, + { + "name": "TELEMETRY", + "label": "Anonymous telemetry", + "default": "FALSE" + } + ] + }, + { + "id": 14, + "type": 3, + "title": "Weaviate", + "description": "AI-native vector database with built-in vectorization modules and hybrid search capabilities", + "note": "Supports text2vec-transformers, generative-openai, and many other modules. Configure modules via environment variables.", + "categories": ["ai", "vector-database", "rag", "search"], + "platform": "linux", + "logo": "https://weaviate.io/img/site/weaviate-logo-light.png", + "repository": { + "url": "https://git.oe74.net/adelorenzo/portainer_scripts", + "stackfile": "ai-templates/stacks/weaviate/docker-compose.yml" + }, + "env": [ + { + "name": "WEAVIATE_HTTP_PORT", + "label": "HTTP API port", + "default": "8080" + }, + { + "name": "WEAVIATE_GRPC_PORT", + "label": "gRPC port", + "default": "50051" + }, + { + "name": "VECTORIZER", + "label": "Default vectorizer module", + "default": "none" + }, + { + "name": "MODULES", + "label": "Enabled modules", + "default": "text2vec-transformers,generative-openai" + }, + { + "name": "ANON_ACCESS", + "label": "Anonymous access enabled", + "default": "true" + } + ] + }, + { + "id": 15, + "type": 3, + "title": "MLflow", + "description": "Open-source ML lifecycle platform — experiment tracking, model registry, and model serving", + "note": "Access the tracking UI at the configured port. Uses SQLite backend by default — switch to PostgreSQL for production.", + "categories": ["ai", "mlops", "experiment-tracking", "model-registry"], + "platform": "linux", + "logo": "https://mlflow.org/img/mlflow-black.svg", + "repository": { + "url": "https://git.oe74.net/adelorenzo/portainer_scripts", + "stackfile": "ai-templates/stacks/mlflow/docker-compose.yml" + }, + "env": [ + { + "name": "MLFLOW_PORT", + "label": "Tracking UI port", + "default": "5000" + } + ] + }, + { + "id": 16, + "type": 3, + "title": "Label Studio", + "description": "Multi-type data labeling and annotation platform for training ML and AI models", + "note": "Supports image, text, audio, video, and time-series annotation. Export to all major ML formats.", + "categories": ["ai", "mlops", "data-labeling", "annotation"], + "platform": "linux", + "logo": "https://labelstud.io/images/ls-logo.png", + "repository": { + "url": "https://git.oe74.net/adelorenzo/portainer_scripts", + "stackfile": "ai-templates/stacks/label-studio/docker-compose.yml" + }, + "env": [ + { + "name": "LS_PORT", + "label": "Web UI port", + "default": "8080" + }, + { + "name": "LS_USER", + "label": "Admin email", + "default": "admin@example.com" + }, + { + "name": "LS_PASSWORD", + "label": "Admin password", + "default": "changeme" + } + ] + }, + { + "id": 17, + "type": 3, + "title": "Jupyter (GPU / PyTorch)", + "description": "GPU-accelerated Jupyter Lab with PyTorch, CUDA, and data science libraries pre-installed", + "note": "Requires NVIDIA GPU. Access with the configured token. Workspace persists in the work volume.", + "categories": ["ai", "ml-development", "notebooks", "pytorch"], + "platform": "linux", + "logo": "https://jupyter.org/assets/homepage/main-logo.svg", + "repository": { + "url": "https://git.oe74.net/adelorenzo/portainer_scripts", + "stackfile": "ai-templates/stacks/jupyter-gpu/docker-compose.yml" + }, + "env": [ + { + "name": "JUPYTER_PORT", + "label": "Jupyter Lab port", + "default": "8888" + }, + { + "name": "JUPYTER_TOKEN", + "label": "Access token", + "default": "changeme" + }, + { + "name": "GRANT_SUDO", + "label": "Allow sudo in notebooks", + "default": "yes" + } + ] + }, + { + "id": 18, + "type": 3, + "title": "Whisper ASR", + "description": "OpenAI Whisper speech-to-text API server with GPU acceleration. Supports transcription and translation", + "note": "Requires NVIDIA GPU. API documentation available at /docs. Supports models: tiny, base, small, medium, large-v3.", + "categories": ["ai", "speech-to-text", "transcription", "audio"], + "platform": "linux", + "logo": "https://upload.wikimedia.org/wikipedia/commons/0/04/ChatGPT_logo.svg", + "repository": { + "url": "https://git.oe74.net/adelorenzo/portainer_scripts", + "stackfile": "ai-templates/stacks/whisper/docker-compose.yml" + }, + "env": [ + { + "name": "WHISPER_PORT", + "label": "API port", + "default": "9000" + }, + { + "name": "ASR_MODEL", + "label": "Whisper model size", + "description": "Options: tiny, base, small, medium, large-v3", + "default": "base" + }, + { + "name": "ASR_ENGINE", + "label": "ASR engine", + "default": "openai_whisper" + } + ] + }, + { + "id": 19, + "type": 3, + "title": "NVIDIA Triton Inference Server", + "description": "Production-grade inference serving for any AI model — supports TensorRT, ONNX, PyTorch, TensorFlow, vLLM, and Python backends with dynamic batching, model ensembles, and multi-GPU scheduling", + "note": "Requires NVIDIA GPU. Place model repositories in the models volume following Triton's model repository layout. Health check at /v2/health/ready.", + "categories": ["ai", "inference", "edge", "production", "nvidia"], + "platform": "linux", + "logo": "https://developer.nvidia.com/favicon.ico", + "repository": { + "url": "https://git.oe74.net/adelorenzo/portainer_scripts", + "stackfile": "ai-templates/stacks/triton/docker-compose.yml" + }, + "env": [ + { + "name": "HTTP_PORT", + "label": "HTTP inference port", + "default": "8000" + }, + { + "name": "GRPC_PORT", + "label": "gRPC inference port", + "default": "8001" + }, + { + "name": "METRICS_PORT", + "label": "Prometheus metrics port", + "default": "8002" + }, + { + "name": "TRITON_VERSION", + "label": "Triton version tag", + "default": "24.08" + }, + { + "name": "MODEL_CONTROL", + "label": "Model control mode (none, poll, explicit)", + "default": "poll" + }, + { + "name": "POLL_INTERVAL", + "label": "Model repository poll interval (seconds)", + "default": "30" + }, + { + "name": "SHM_SIZE", + "label": "Shared memory size", + "default": "1g" + } + ] + }, + { + "id": 20, + "type": 3, + "title": "ONNX Runtime Server", + "description": "Lightweight cross-platform inference server for ONNX models. Supports GPU and CPU-only profiles for edge deployment on resource-constrained nodes", + "note": "Use docker compose --profile gpu up for GPU nodes or --profile edge up for CPU-only edge nodes. Place your .onnx model file in the models volume.", + "categories": ["ai", "inference", "edge", "lightweight", "onnx"], + "platform": "linux", + "logo": "https://onnxruntime.ai/images/icons/ONNX-Runtime-logo.svg", + "repository": { + "url": "https://git.oe74.net/adelorenzo/portainer_scripts", + "stackfile": "ai-templates/stacks/onnx-runtime/docker-compose.yml" + }, + "env": [ + { + "name": "HTTP_PORT", + "label": "HTTP port", + "default": "8001" + }, + { + "name": "GRPC_PORT", + "label": "gRPC port", + "default": "50051" + }, + { + "name": "MODEL_FILE", + "label": "Model filename in /models", + "default": "model.onnx" + }, + { + "name": "NUM_THREADS", + "label": "Inference threads", + "default": "4" + }, + { + "name": "CPU_LIMIT", + "label": "CPU core limit (edge profile)", + "default": "2.0" + }, + { + "name": "MEM_LIMIT", + "label": "Memory limit (edge profile)", + "default": "2G" + } + ] + }, + { + "id": 21, + "type": 3, + "title": "NVIDIA DeepStream", + "description": "GPU-accelerated video analytics and computer vision pipeline for industrial inspection, anomaly detection, and smart factory applications with Triton backend", + "note": "Requires NVIDIA GPU with video decode capabilities. For camera access on edge devices, set PRIVILEGED=true. Supports RTSP output on port 8554.", + "categories": ["ai", "computer-vision", "industrial", "edge", "video-analytics"], + "platform": "linux", + "logo": "https://developer.nvidia.com/favicon.ico", + "repository": { + "url": "https://git.oe74.net/adelorenzo/portainer_scripts", + "stackfile": "ai-templates/stacks/deepstream/docker-compose.yml" + }, + "env": [ + { + "name": "RTSP_PORT", + "label": "RTSP output port", + "default": "8554" + }, + { + "name": "REST_PORT", + "label": "REST API port", + "default": "9000" + }, + { + "name": "DS_VERSION", + "label": "DeepStream version", + "default": "7.1" + }, + { + "name": "SHM_SIZE", + "label": "Shared memory size", + "default": "2g" + }, + { + "name": "PRIVILEGED", + "label": "Privileged mode (for device access)", + "default": "false" + } + ] + }, + { + "id": 22, + "type": 3, + "title": "Ray Cluster (GPU)", + "description": "Distributed compute cluster for LLM fine-tuning, distributed training, hyperparameter tuning, and scalable inference with Ray Serve. Head + configurable worker nodes", + "note": "Requires NVIDIA GPU on all nodes. Scale workers with NUM_WORKERS. Dashboard accessible at the configured port. Includes Ray Train, Tune, Serve, and Data.", + "categories": ["ai", "distributed-training", "fine-tuning", "inference", "cluster"], + "platform": "linux", + "logo": "https://docs.ray.io/en/latest/_static/ray_logo.png", + "repository": { + "url": "https://git.oe74.net/adelorenzo/portainer_scripts", + "stackfile": "ai-templates/stacks/ray-cluster/docker-compose.yml" + }, + "env": [ + { + "name": "DASHBOARD_PORT", + "label": "Ray Dashboard port", + "default": "8265" + }, + { + "name": "SERVE_PORT", + "label": "Ray Serve port", + "default": "8000" + }, + { + "name": "RAY_VERSION", + "label": "Ray version", + "default": "2.40.0" + }, + { + "name": "NUM_WORKERS", + "label": "Number of worker nodes", + "default": "1" + }, + { + "name": "HEAD_GPUS", + "label": "GPUs on head node", + "default": "1" + }, + { + "name": "WORKER_GPUS", + "label": "GPUs per worker", + "default": "1" + }, + { + "name": "WORKER_CPUS", + "label": "CPUs per worker", + "default": "4" + }, + { + "name": "SHM_SIZE", + "label": "Shared memory per node", + "default": "8g" + } + ] + }, + { + "id": 23, + "type": 3, + "title": "Prefect (ML Pipeline Orchestration)", + "description": "Governed ML pipeline orchestration platform with scheduling, retries, audit logging, and role-based access. Includes server, worker, and PostgreSQL backend", + "note": "Access the Prefect UI at the configured port. Create flows in Python and register them against this server. Worker uses Docker execution for isolation.", + "categories": ["ai", "mlops", "pipelines", "governance", "orchestration"], + "platform": "linux", + "logo": "https://www.prefect.io/favicon.ico", + "repository": { + "url": "https://git.oe74.net/adelorenzo/portainer_scripts", + "stackfile": "ai-templates/stacks/prefect/docker-compose.yml" + }, + "env": [ + { + "name": "PREFECT_PORT", + "label": "Prefect UI port", + "default": "4200" + }, + { + "name": "PREFECT_VERSION", + "label": "Prefect version", + "default": "3-latest" + }, + { + "name": "PG_USER", + "label": "PostgreSQL user", + "default": "prefect" + }, + { + "name": "PG_PASSWORD", + "label": "PostgreSQL password", + "default": "prefect" + }, + { + "name": "ANALYTICS", + "label": "Enable analytics", + "default": "false" + } + ] + }, + { + "id": 24, + "type": 3, + "title": "BentoML", + "description": "Unified model serving framework for packaging, deploying, and managing ML models as production-ready API endpoints with GPU support", + "note": "Requires NVIDIA GPU. Build Bentos (model packages) and serve them through this runtime. Prometheus metrics on port 3001.", + "categories": ["ai", "model-serving", "inference", "mlops"], + "platform": "linux", + "logo": "https://docs.bentoml.com/en/latest/_static/img/logo.svg", + "repository": { + "url": "https://git.oe74.net/adelorenzo/portainer_scripts", + "stackfile": "ai-templates/stacks/bentoml/docker-compose.yml" + }, + "env": [ + { + "name": "BENTO_PORT", + "label": "Serving API port", + "default": "3000" + }, + { + "name": "METRICS_PORT", + "label": "Prometheus metrics port", + "default": "3001" + }, + { + "name": "BENTO_VERSION", + "label": "BentoML version", + "default": "latest" + }, + { + "name": "LOG_LEVEL", + "label": "Log level", + "default": "INFO" + } + ] + }, + { + "id": 25, + "type": 3, + "title": "MLflow + MinIO (Production MLOps)", + "description": "Production-grade MLOps stack: MLflow tracking server with PostgreSQL backend and MinIO S3-compatible artifact store for governed model registry, experiment tracking, and versioned artifact storage", + "note": "MinIO console available at port 9001. MLflow auto-creates the artifact bucket on startup. For production, change all default credentials.", + "categories": ["ai", "mlops", "model-registry", "governance", "experiment-tracking"], + "platform": "linux", + "logo": "https://mlflow.org/img/mlflow-black.svg", + "repository": { + "url": "https://git.oe74.net/adelorenzo/portainer_scripts", + "stackfile": "ai-templates/stacks/minio-mlops/docker-compose.yml" + }, + "env": [ + { + "name": "MLFLOW_PORT", + "label": "MLflow UI port", + "default": "5000" + }, + { + "name": "MINIO_API_PORT", + "label": "MinIO S3 API port", + "default": "9000" + }, + { + "name": "MINIO_CONSOLE_PORT", + "label": "MinIO console port", + "default": "9001" + }, + { + "name": "PG_USER", + "label": "PostgreSQL user", + "default": "mlflow" + }, + { + "name": "PG_PASSWORD", + "label": "PostgreSQL password", + "default": "mlflow" + }, + { + "name": "MINIO_ROOT_USER", + "label": "MinIO root user", + "default": "mlflow" + }, + { + "name": "MINIO_ROOT_PASSWORD", + "label": "MinIO root password", + "default": "mlflow123" + }, + { + "name": "ARTIFACT_BUCKET", + "label": "S3 artifact bucket name", + "default": "mlflow-artifacts" + } + ] + }, + { + "id": 26, + "type": 3, + "title": "NVIDIA NIM", + "description": "Enterprise-grade optimized LLM inference microservice from NVIDIA. Pre-optimized with TensorRT-LLM for maximum throughput with OpenAI-compatible API", + "note": "Requires NVIDIA GPU and an NGC API key from NVIDIA Build. Model downloads are cached in the nim_cache volume. First startup may take several minutes.", + "categories": ["ai", "llm", "inference", "enterprise", "nvidia"], + "platform": "linux", + "logo": "https://developer.nvidia.com/favicon.ico", + "repository": { + "url": "https://git.oe74.net/adelorenzo/portainer_scripts", + "stackfile": "ai-templates/stacks/nvidia-nim/docker-compose.yml" + }, + "env": [ + { + "name": "NIM_PORT", + "label": "API port", + "default": "8000" + }, + { + "name": "NGC_API_KEY", + "label": "NVIDIA NGC API key (required)" + }, + { + "name": "NIM_MODEL", + "label": "NIM model container", + "description": "Model from NVIDIA NGC catalog", + "default": "meta/llama-3.1-8b-instruct" + }, + { + "name": "NIM_VERSION", + "label": "NIM version", + "default": "latest" + }, + { + "name": "MAX_MODEL_LEN", + "label": "Max sequence length", + "default": "4096" + }, + { + "name": "GPU_MEM_UTIL", + "label": "GPU memory utilization (0-1)", + "default": "0.9" + }, + { + "name": "SHM_SIZE", + "label": "Shared memory size", + "default": "16g" + } + ] + } + ] +} diff --git a/ai-templates-0/stacks/bentoml/docker-compose.yml b/ai-templates-0/stacks/bentoml/docker-compose.yml new file mode 100644 index 0000000..8833740 --- /dev/null +++ b/ai-templates-0/stacks/bentoml/docker-compose.yml @@ -0,0 +1,30 @@ +version: "3.8" + +services: + bentoml: + image: bentoml/bentoml:${BENTO_VERSION:-latest} + container_name: bentoml + restart: unless-stopped + ports: + - "${BENTO_PORT:-3000}:3000" + - "${METRICS_PORT:-3001}:3001" + volumes: + - bentoml_home:/home/bentoml + - bentoml_models:/home/bentoml/bentoml/models + environment: + - BENTOML_HOME=/home/bentoml/bentoml + - BENTOML_PORT=3000 + - BENTOML_METRICS_PORT=3001 + - BENTOML_LOG_LEVEL=${LOG_LEVEL:-INFO} + command: bentoml serve --host 0.0.0.0 --port 3000 + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + +volumes: + bentoml_home: + bentoml_models: diff --git a/ai-templates-0/stacks/chromadb/docker-compose.yml b/ai-templates-0/stacks/chromadb/docker-compose.yml new file mode 100644 index 0000000..f0a9496 --- /dev/null +++ b/ai-templates-0/stacks/chromadb/docker-compose.yml @@ -0,0 +1,20 @@ +version: "3.8" + +services: + chromadb: + image: chromadb/chroma:latest + container_name: chromadb + restart: unless-stopped + ports: + - "${CHROMA_PORT:-8000}:8000" + volumes: + - chroma_data:/chroma/chroma + environment: + - IS_PERSISTENT=TRUE + - PERSIST_DIRECTORY=/chroma/chroma + - ANONYMIZED_TELEMETRY=${TELEMETRY:-FALSE} + - CHROMA_SERVER_AUTHN_CREDENTIALS=${CHROMA_TOKEN:-} + - CHROMA_SERVER_AUTHN_PROVIDER=${CHROMA_AUTH_PROVIDER:-} + +volumes: + chroma_data: diff --git a/ai-templates-0/stacks/comfyui/docker-compose.yml b/ai-templates-0/stacks/comfyui/docker-compose.yml new file mode 100644 index 0000000..f25f4b1 --- /dev/null +++ b/ai-templates-0/stacks/comfyui/docker-compose.yml @@ -0,0 +1,31 @@ +version: "3.8" + +services: + comfyui: + image: yanwk/comfyui-boot:latest + container_name: comfyui + restart: unless-stopped + ports: + - "${COMFYUI_PORT:-8188}:8188" + volumes: + - comfyui_data:/root + - comfyui_models:/root/ComfyUI/models + - comfyui_output:/root/ComfyUI/output + - comfyui_input:/root/ComfyUI/input + - comfyui_custom_nodes:/root/ComfyUI/custom_nodes + environment: + - CLI_ARGS=${CLI_ARGS:---listen 0.0.0.0 --port 8188} + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + +volumes: + comfyui_data: + comfyui_models: + comfyui_output: + comfyui_input: + comfyui_custom_nodes: diff --git a/ai-templates-0/stacks/deepstream/docker-compose.yml b/ai-templates-0/stacks/deepstream/docker-compose.yml new file mode 100644 index 0000000..a338d72 --- /dev/null +++ b/ai-templates-0/stacks/deepstream/docker-compose.yml @@ -0,0 +1,38 @@ +version: "3.8" + +services: + deepstream: + image: nvcr.io/nvidia/deepstream:${DS_VERSION:-7.1}-triton-multiarch + container_name: deepstream + restart: unless-stopped + ports: + - "${RTSP_PORT:-8554}:8554" + - "${REST_PORT:-9000}:9000" + volumes: + - deepstream_apps:/opt/nvidia/deepstream/deepstream/sources/apps + - deepstream_models:/opt/nvidia/deepstream/deepstream/samples/models + - deepstream_configs:/opt/nvidia/deepstream/deepstream/samples/configs + - deepstream_streams:/opt/nvidia/deepstream/deepstream/samples/streams + environment: + - CUDA_VISIBLE_DEVICES=${CUDA_DEVICES:-all} + - DISPLAY=${DISPLAY:-} + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu, video, compute, utility] + runtime: nvidia + network_mode: ${NETWORK_MODE:-bridge} + shm_size: ${SHM_SIZE:-2g} + # Required for video device access on edge nodes + privileged: ${PRIVILEGED:-false} + devices: + - /dev/video0:/dev/video0 + +volumes: + deepstream_apps: + deepstream_models: + deepstream_configs: + deepstream_streams: diff --git a/ai-templates-0/stacks/flowise/docker-compose.yml b/ai-templates-0/stacks/flowise/docker-compose.yml new file mode 100644 index 0000000..d877614 --- /dev/null +++ b/ai-templates-0/stacks/flowise/docker-compose.yml @@ -0,0 +1,19 @@ +version: "3.8" + +services: + flowise: + image: flowiseai/flowise:latest + container_name: flowise + restart: unless-stopped + ports: + - "${FLOWISE_PORT:-3000}:3000" + volumes: + - flowise_data:/root/.flowise + environment: + - FLOWISE_USERNAME=${FLOWISE_USERNAME:-admin} + - FLOWISE_PASSWORD=${FLOWISE_PASSWORD:-changeme} + - APIKEY_PATH=/root/.flowise + - LOG_PATH=/root/.flowise/logs + +volumes: + flowise_data: diff --git a/ai-templates-0/stacks/jupyter-gpu/docker-compose.yml b/ai-templates-0/stacks/jupyter-gpu/docker-compose.yml new file mode 100644 index 0000000..b8554c4 --- /dev/null +++ b/ai-templates-0/stacks/jupyter-gpu/docker-compose.yml @@ -0,0 +1,26 @@ +version: "3.8" + +services: + jupyter: + image: quay.io/jupyter/pytorch-notebook:latest + container_name: jupyter-gpu + restart: unless-stopped + ports: + - "${JUPYTER_PORT:-8888}:8888" + volumes: + - jupyter_data:/home/jovyan/work + environment: + - JUPYTER_TOKEN=${JUPYTER_TOKEN:-changeme} + - JUPYTER_ENABLE_LAB=yes + - GRANT_SUDO=${GRANT_SUDO:-yes} + user: root + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + +volumes: + jupyter_data: diff --git a/ai-templates-0/stacks/label-studio/docker-compose.yml b/ai-templates-0/stacks/label-studio/docker-compose.yml new file mode 100644 index 0000000..50558b7 --- /dev/null +++ b/ai-templates-0/stacks/label-studio/docker-compose.yml @@ -0,0 +1,19 @@ +version: "3.8" + +services: + label-studio: + image: heartexlabs/label-studio:latest + container_name: label-studio + restart: unless-stopped + ports: + - "${LS_PORT:-8080}:8080" + volumes: + - label_studio_data:/label-studio/data + environment: + - LABEL_STUDIO_LOCAL_FILES_SERVING_ENABLED=true + - LABEL_STUDIO_LOCAL_FILES_DOCUMENT_ROOT=/label-studio/data/files + - LABEL_STUDIO_USERNAME=${LS_USER:-admin@example.com} + - LABEL_STUDIO_PASSWORD=${LS_PASSWORD:-changeme} + +volumes: + label_studio_data: diff --git a/ai-templates-0/stacks/langflow/docker-compose.yml b/ai-templates-0/stacks/langflow/docker-compose.yml new file mode 100644 index 0000000..965b524 --- /dev/null +++ b/ai-templates-0/stacks/langflow/docker-compose.yml @@ -0,0 +1,18 @@ +version: "3.8" + +services: + langflow: + image: langflowai/langflow:latest + container_name: langflow + restart: unless-stopped + ports: + - "${LANGFLOW_PORT:-7860}:7860" + volumes: + - langflow_data:/app/langflow + environment: + - LANGFLOW_DATABASE_URL=sqlite:////app/langflow/langflow.db + - LANGFLOW_CONFIG_DIR=/app/langflow + - LANGFLOW_AUTO_LOGIN=${AUTO_LOGIN:-true} + +volumes: + langflow_data: diff --git a/ai-templates-0/stacks/litellm/docker-compose.yml b/ai-templates-0/stacks/litellm/docker-compose.yml new file mode 100644 index 0000000..e381fbb --- /dev/null +++ b/ai-templates-0/stacks/litellm/docker-compose.yml @@ -0,0 +1,33 @@ +version: "3.8" + +services: + litellm: + image: ghcr.io/berriai/litellm:main-latest + container_name: litellm + restart: unless-stopped + ports: + - "${LITELLM_PORT:-4000}:4000" + volumes: + - litellm_config:/app/config + environment: + - LITELLM_MASTER_KEY=${LITELLM_MASTER_KEY:-sk-master-key} + - LITELLM_LOG_LEVEL=${LOG_LEVEL:-INFO} + - DATABASE_URL=postgresql://${PG_USER:-litellm}:${PG_PASSWORD:-litellm}@litellm-db:5432/${PG_DB:-litellm} + command: --config /app/config/litellm_config.yaml --port 4000 + depends_on: + - litellm-db + + litellm-db: + image: postgres:16-alpine + container_name: litellm-db + restart: unless-stopped + environment: + - POSTGRES_USER=${PG_USER:-litellm} + - POSTGRES_PASSWORD=${PG_PASSWORD:-litellm} + - POSTGRES_DB=${PG_DB:-litellm} + volumes: + - litellm_pg_data:/var/lib/postgresql/data + +volumes: + litellm_config: + litellm_pg_data: diff --git a/ai-templates-0/stacks/localai/docker-compose.yml b/ai-templates-0/stacks/localai/docker-compose.yml new file mode 100644 index 0000000..9c5f8fe --- /dev/null +++ b/ai-templates-0/stacks/localai/docker-compose.yml @@ -0,0 +1,25 @@ +version: "3.8" + +services: + localai: + image: localai/localai:latest-gpu-nvidia-cuda-12 + container_name: localai + restart: unless-stopped + ports: + - "${LOCALAI_PORT:-8080}:8080" + volumes: + - localai_models:/build/models + environment: + - THREADS=${THREADS:-4} + - CONTEXT_SIZE=${CONTEXT_SIZE:-4096} + - MODELS_PATH=/build/models + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + +volumes: + localai_models: diff --git a/ai-templates-0/stacks/minio-mlops/docker-compose.yml b/ai-templates-0/stacks/minio-mlops/docker-compose.yml new file mode 100644 index 0000000..a682d08 --- /dev/null +++ b/ai-templates-0/stacks/minio-mlops/docker-compose.yml @@ -0,0 +1,76 @@ +version: "3.8" + +services: + mlflow: + image: ghcr.io/mlflow/mlflow:${MLFLOW_VERSION:-latest} + container_name: mlflow-server + restart: unless-stopped + ports: + - "${MLFLOW_PORT:-5000}:5000" + environment: + - MLFLOW_TRACKING_URI=postgresql://${PG_USER:-mlflow}:${PG_PASSWORD:-mlflow}@mlflow-db:5432/${PG_DB:-mlflow} + - MLFLOW_S3_ENDPOINT_URL=http://mlflow-minio:9000 + - AWS_ACCESS_KEY_ID=${MINIO_ROOT_USER:-mlflow} + - AWS_SECRET_ACCESS_KEY=${MINIO_ROOT_PASSWORD:-mlflow123} + - MLFLOW_DEFAULT_ARTIFACT_ROOT=s3://${ARTIFACT_BUCKET:-mlflow-artifacts}/ + command: > + mlflow server + --host 0.0.0.0 + --port 5000 + --backend-store-uri postgresql://${PG_USER:-mlflow}:${PG_PASSWORD:-mlflow}@mlflow-db:5432/${PG_DB:-mlflow} + --default-artifact-root s3://${ARTIFACT_BUCKET:-mlflow-artifacts}/ + --serve-artifacts + depends_on: + mlflow-db: + condition: service_healthy + mlflow-minio: + condition: service_started + + mlflow-db: + image: postgres:16-alpine + container_name: mlflow-db + restart: unless-stopped + environment: + - POSTGRES_USER=${PG_USER:-mlflow} + - POSTGRES_PASSWORD=${PG_PASSWORD:-mlflow} + - POSTGRES_DB=${PG_DB:-mlflow} + volumes: + - mlflow_pg_data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${PG_USER:-mlflow}"] + interval: 10s + timeout: 5s + retries: 5 + + mlflow-minio: + image: quay.io/minio/minio:latest + container_name: mlflow-minio + restart: unless-stopped + ports: + - "${MINIO_API_PORT:-9000}:9000" + - "${MINIO_CONSOLE_PORT:-9001}:9001" + volumes: + - mlflow_minio_data:/data + environment: + - MINIO_ROOT_USER=${MINIO_ROOT_USER:-mlflow} + - MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD:-mlflow123} + command: server /data --console-address ':9001' + + # Init container to create the default bucket + mlflow-minio-init: + image: quay.io/minio/mc:latest + container_name: mlflow-minio-init + depends_on: + - mlflow-minio + entrypoint: > + /bin/sh -c " + sleep 5; + mc alias set myminio http://mlflow-minio:9000 ${MINIO_ROOT_USER:-mlflow} ${MINIO_ROOT_PASSWORD:-mlflow123}; + mc mb --ignore-existing myminio/${ARTIFACT_BUCKET:-mlflow-artifacts}; + mc anonymous set download myminio/${ARTIFACT_BUCKET:-mlflow-artifacts}; + exit 0; + " + +volumes: + mlflow_pg_data: + mlflow_minio_data: diff --git a/ai-templates-0/stacks/mlflow/docker-compose.yml b/ai-templates-0/stacks/mlflow/docker-compose.yml new file mode 100644 index 0000000..958504b --- /dev/null +++ b/ai-templates-0/stacks/mlflow/docker-compose.yml @@ -0,0 +1,20 @@ +version: "3.8" + +services: + mlflow: + image: ghcr.io/mlflow/mlflow:latest + container_name: mlflow + restart: unless-stopped + ports: + - "${MLFLOW_PORT:-5000}:5000" + volumes: + - mlflow_data:/mlflow + command: > + mlflow server + --host 0.0.0.0 + --port 5000 + --backend-store-uri sqlite:///mlflow/mlflow.db + --default-artifact-root /mlflow/artifacts + +volumes: + mlflow_data: diff --git a/ai-templates-0/stacks/n8n-ai/docker-compose.yml b/ai-templates-0/stacks/n8n-ai/docker-compose.yml new file mode 100644 index 0000000..30ac80e --- /dev/null +++ b/ai-templates-0/stacks/n8n-ai/docker-compose.yml @@ -0,0 +1,20 @@ +version: "3.8" + +services: + n8n: + image: docker.n8n.io/n8nio/n8n:latest + container_name: n8n-ai + restart: unless-stopped + ports: + - "${N8N_PORT:-5678}:5678" + volumes: + - n8n_data:/home/node/.n8n + environment: + - N8N_BASIC_AUTH_ACTIVE=${N8N_AUTH:-true} + - N8N_BASIC_AUTH_USER=${N8N_USER:-admin} + - N8N_BASIC_AUTH_PASSWORD=${N8N_PASSWORD:-changeme} + - WEBHOOK_URL=${WEBHOOK_URL:-http://localhost:5678/} + - N8N_AI_ENABLED=true + +volumes: + n8n_data: diff --git a/ai-templates-0/stacks/nvidia-nim/docker-compose.yml b/ai-templates-0/stacks/nvidia-nim/docker-compose.yml new file mode 100644 index 0000000..77976bd --- /dev/null +++ b/ai-templates-0/stacks/nvidia-nim/docker-compose.yml @@ -0,0 +1,36 @@ +version: "3.8" + +services: + nim: + image: nvcr.io/nim/${NIM_MODEL:-meta/llama-3.1-8b-instruct}:${NIM_VERSION:-latest} + container_name: nvidia-nim + restart: unless-stopped + ports: + - "${NIM_PORT:-8000}:8000" + volumes: + - nim_cache:/opt/nim/.cache + environment: + - NGC_API_KEY=${NGC_API_KEY} + - NIM_MAX_MODEL_LEN=${MAX_MODEL_LEN:-4096} + - NIM_GPU_MEMORY_UTILIZATION=${GPU_MEM_UTIL:-0.9} + - NIM_MAX_BATCH_SIZE=${MAX_BATCH:-256} + - NIM_LOG_LEVEL=${LOG_LEVEL:-INFO} + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + shm_size: ${SHM_SIZE:-16g} + ulimits: + memlock: -1 + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/v1/health/ready"] + interval: 30s + timeout: 10s + retries: 10 + start_period: 120s + +volumes: + nim_cache: diff --git a/ai-templates-0/stacks/ollama/docker-compose.yml b/ai-templates-0/stacks/ollama/docker-compose.yml new file mode 100644 index 0000000..a10b813 --- /dev/null +++ b/ai-templates-0/stacks/ollama/docker-compose.yml @@ -0,0 +1,25 @@ +version: "3.8" + +services: + ollama: + image: ollama/ollama:latest + container_name: ollama + restart: unless-stopped + ports: + - "${OLLAMA_PORT:-11434}:11434" + volumes: + - ollama_data:/root/.ollama + environment: + - OLLAMA_HOST=0.0.0.0 + - OLLAMA_NUM_PARALLEL=${OLLAMA_NUM_PARALLEL:-4} + - OLLAMA_MAX_LOADED_MODELS=${OLLAMA_MAX_LOADED_MODELS:-2} + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + +volumes: + ollama_data: diff --git a/ai-templates-0/stacks/onnx-runtime/docker-compose.yml b/ai-templates-0/stacks/onnx-runtime/docker-compose.yml new file mode 100644 index 0000000..16ff061 --- /dev/null +++ b/ai-templates-0/stacks/onnx-runtime/docker-compose.yml @@ -0,0 +1,57 @@ +version: "3.8" + +services: + # GPU variant — for data center / cloud nodes + onnx-runtime-gpu: + image: mcr.microsoft.com/onnxruntime/server:latest + container_name: onnx-runtime-gpu + restart: unless-stopped + profiles: ["gpu"] + ports: + - "${HTTP_PORT:-8001}:8001" + - "${GRPC_PORT:-50051}:50051" + volumes: + - onnx_models:/models + environment: + - ORT_LOG_LEVEL=${LOG_LEVEL:-WARNING} + command: > + --model_path /models/${MODEL_FILE:-model.onnx} + --http_port 8001 + --grpc_port 50051 + --num_threads ${NUM_THREADS:-4} + --execution_provider ${EXEC_PROVIDER:-cuda} + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] + + # CPU variant — for edge nodes, ARM, resource-constrained environments + onnx-runtime-cpu: + image: mcr.microsoft.com/onnxruntime/server:latest + container_name: onnx-runtime-cpu + restart: unless-stopped + profiles: ["cpu", "edge"] + ports: + - "${HTTP_PORT:-8001}:8001" + - "${GRPC_PORT:-50051}:50051" + volumes: + - onnx_models:/models + environment: + - ORT_LOG_LEVEL=${LOG_LEVEL:-WARNING} + command: > + --model_path /models/${MODEL_FILE:-model.onnx} + --http_port 8001 + --grpc_port 50051 + --num_threads ${NUM_THREADS:-4} + --execution_provider cpu + deploy: + resources: + limits: + cpus: "${CPU_LIMIT:-2.0}" + memory: ${MEM_LIMIT:-2G} + +volumes: + onnx_models: diff --git a/ai-templates-0/stacks/open-webui/docker-compose.yml b/ai-templates-0/stacks/open-webui/docker-compose.yml new file mode 100644 index 0000000..709f69f --- /dev/null +++ b/ai-templates-0/stacks/open-webui/docker-compose.yml @@ -0,0 +1,39 @@ +version: "3.8" + +services: + open-webui: + image: ghcr.io/open-webui/open-webui:main + container_name: open-webui + restart: unless-stopped + ports: + - "${OPEN_WEBUI_PORT:-3000}:8080" + volumes: + - open_webui_data:/app/backend/data + environment: + - OLLAMA_BASE_URL=${OLLAMA_BASE_URL:-http://ollama:11434} + - WEBUI_SECRET_KEY=${WEBUI_SECRET_KEY:-changeme} + - ENABLE_SIGNUP=${ENABLE_SIGNUP:-true} + depends_on: + - ollama + + ollama: + image: ollama/ollama:latest + container_name: ollama + restart: unless-stopped + ports: + - "${OLLAMA_PORT:-11434}:11434" + volumes: + - ollama_data:/root/.ollama + environment: + - OLLAMA_HOST=0.0.0.0 + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + +volumes: + open_webui_data: + ollama_data: diff --git a/ai-templates-0/stacks/prefect/docker-compose.yml b/ai-templates-0/stacks/prefect/docker-compose.yml new file mode 100644 index 0000000..c26748f --- /dev/null +++ b/ai-templates-0/stacks/prefect/docker-compose.yml @@ -0,0 +1,55 @@ +version: "3.8" + +services: + prefect-server: + image: prefecthq/prefect:${PREFECT_VERSION:-3-latest} + container_name: prefect-server + restart: unless-stopped + ports: + - "${PREFECT_PORT:-4200}:4200" + volumes: + - prefect_data:/root/.prefect + - prefect_flows:/flows + environment: + - PREFECT_SERVER_API_HOST=0.0.0.0 + - PREFECT_SERVER_API_PORT=4200 + - PREFECT_API_DATABASE_CONNECTION_URL=postgresql+asyncpg://${PG_USER:-prefect}:${PG_PASSWORD:-prefect}@prefect-db:5432/${PG_DB:-prefect} + - PREFECT_SERVER_ANALYTICS_ENABLED=${ANALYTICS:-false} + command: prefect server start + depends_on: + prefect-db: + condition: service_healthy + + prefect-worker: + image: prefecthq/prefect:${PREFECT_VERSION:-3-latest} + container_name: prefect-worker + restart: unless-stopped + volumes: + - prefect_flows:/flows + - /var/run/docker.sock:/var/run/docker.sock + environment: + - PREFECT_API_URL=http://prefect-server:4200/api + command: prefect worker start --pool default-agent-pool --type docker + depends_on: + - prefect-server + + prefect-db: + image: postgres:16-alpine + container_name: prefect-db + restart: unless-stopped + environment: + - POSTGRES_USER=${PG_USER:-prefect} + - POSTGRES_PASSWORD=${PG_PASSWORD:-prefect} + - POSTGRES_DB=${PG_DB:-prefect} + volumes: + - prefect_pg_data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${PG_USER:-prefect}"] + interval: 10s + timeout: 5s + retries: 5 + +volumes: + prefect_data: + prefect_flows: + prefect_pg_data: diff --git a/ai-templates-0/stacks/qdrant/docker-compose.yml b/ai-templates-0/stacks/qdrant/docker-compose.yml new file mode 100644 index 0000000..03ca69c --- /dev/null +++ b/ai-templates-0/stacks/qdrant/docker-compose.yml @@ -0,0 +1,19 @@ +version: "3.8" + +services: + qdrant: + image: qdrant/qdrant:latest + container_name: qdrant + restart: unless-stopped + ports: + - "${QDRANT_HTTP_PORT:-6333}:6333" + - "${QDRANT_GRPC_PORT:-6334}:6334" + volumes: + - qdrant_data:/qdrant/storage + - qdrant_snapshots:/qdrant/snapshots + environment: + - QDRANT__SERVICE__API_KEY=${QDRANT_API_KEY:-} + +volumes: + qdrant_data: + qdrant_snapshots: diff --git a/ai-templates-0/stacks/ray-cluster/docker-compose.yml b/ai-templates-0/stacks/ray-cluster/docker-compose.yml new file mode 100644 index 0000000..be66943 --- /dev/null +++ b/ai-templates-0/stacks/ray-cluster/docker-compose.yml @@ -0,0 +1,60 @@ +version: "3.8" + +services: + ray-head: + image: rayproject/ray-ml:${RAY_VERSION:-2.40.0}-py310-gpu + container_name: ray-head + restart: unless-stopped + ports: + - "${DASHBOARD_PORT:-8265}:8265" + - "${CLIENT_PORT:-10001}:10001" + - "${GCS_PORT:-6379}:6379" + - "${SERVE_PORT:-8000}:8000" + volumes: + - ray_data:/home/ray/data + - ray_results:/home/ray/ray_results + command: > + ray start --head + --port=6379 + --dashboard-host=0.0.0.0 + --dashboard-port=8265 + --num-gpus=${HEAD_GPUS:-1} + --block + environment: + - RAY_GRAFANA_HOST=http://grafana:3000 + - RAY_PROMETHEUS_HOST=http://prometheus:9090 + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + shm_size: ${SHM_SIZE:-8g} + + ray-worker: + image: rayproject/ray-ml:${RAY_VERSION:-2.40.0}-py310-gpu + restart: unless-stopped + depends_on: + - ray-head + command: > + ray start + --address=ray-head:6379 + --num-gpus=${WORKER_GPUS:-1} + --num-cpus=${WORKER_CPUS:-4} + --block + volumes: + - ray_data:/home/ray/data + deploy: + replicas: ${NUM_WORKERS:-1} + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + shm_size: ${SHM_SIZE:-8g} + +volumes: + ray_data: + ray_results: diff --git a/ai-templates-0/stacks/stable-diffusion-webui/docker-compose.yml b/ai-templates-0/stacks/stable-diffusion-webui/docker-compose.yml new file mode 100644 index 0000000..b4592de --- /dev/null +++ b/ai-templates-0/stacks/stable-diffusion-webui/docker-compose.yml @@ -0,0 +1,25 @@ +version: "3.8" + +services: + stable-diffusion-webui: + image: universonic/stable-diffusion-webui:latest + container_name: stable-diffusion-webui + restart: unless-stopped + ports: + - "${SD_PORT:-7860}:7860" + volumes: + - sd_data:/data + - sd_output:/output + environment: + - CLI_ARGS=${CLI_ARGS:---listen --api --xformers} + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + +volumes: + sd_data: + sd_output: diff --git a/ai-templates-0/stacks/text-generation-webui/docker-compose.yml b/ai-templates-0/stacks/text-generation-webui/docker-compose.yml new file mode 100644 index 0000000..cb65de8 --- /dev/null +++ b/ai-templates-0/stacks/text-generation-webui/docker-compose.yml @@ -0,0 +1,35 @@ +version: "3.8" + +services: + text-gen-webui: + image: atinoda/text-generation-webui:default-nvidia + container_name: text-generation-webui + restart: unless-stopped + ports: + - "${WEBUI_PORT:-7860}:7860" + - "${API_PORT:-5000}:5000" + - "${STREAM_PORT:-5005}:5005" + volumes: + - tgw_characters:/app/characters + - tgw_loras:/app/loras + - tgw_models:/app/models + - tgw_presets:/app/presets + - tgw_prompts:/app/prompts + - tgw_extensions:/app/extensions + environment: + - EXTRA_LAUNCH_ARGS=${EXTRA_LAUNCH_ARGS:---listen --api} + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + +volumes: + tgw_characters: + tgw_loras: + tgw_models: + tgw_presets: + tgw_prompts: + tgw_extensions: diff --git a/ai-templates-0/stacks/triton/docker-compose.yml b/ai-templates-0/stacks/triton/docker-compose.yml new file mode 100644 index 0000000..39c4b56 --- /dev/null +++ b/ai-templates-0/stacks/triton/docker-compose.yml @@ -0,0 +1,43 @@ +version: "3.8" + +services: + triton: + image: nvcr.io/nvidia/tritonserver:${TRITON_VERSION:-24.08}-py3 + container_name: triton-inference-server + restart: unless-stopped + ports: + - "${HTTP_PORT:-8000}:8000" + - "${GRPC_PORT:-8001}:8001" + - "${METRICS_PORT:-8002}:8002" + volumes: + - triton_models:/models + command: > + tritonserver + --model-repository=/models + --strict-model-config=${STRICT_CONFIG:-false} + --log-verbose=${LOG_VERBOSE:-0} + --exit-on-error=${EXIT_ON_ERROR:-false} + --rate-limit=${RATE_LIMIT:-off} + --model-control-mode=${MODEL_CONTROL:-poll} + --repository-poll-secs=${POLL_INTERVAL:-30} + environment: + - CUDA_VISIBLE_DEVICES=${CUDA_DEVICES:-all} + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + shm_size: ${SHM_SIZE:-1g} + ulimits: + memlock: -1 + stack: 67108864 + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/v2/health/ready"] + interval: 30s + timeout: 10s + retries: 5 + +volumes: + triton_models: diff --git a/ai-templates-0/stacks/vllm/docker-compose.yml b/ai-templates-0/stacks/vllm/docker-compose.yml new file mode 100644 index 0000000..4b8c06d --- /dev/null +++ b/ai-templates-0/stacks/vllm/docker-compose.yml @@ -0,0 +1,29 @@ +version: "3.8" + +services: + vllm: + image: vllm/vllm-openai:latest + container_name: vllm + restart: unless-stopped + ports: + - "${VLLM_PORT:-8000}:8000" + volumes: + - vllm_cache:/root/.cache/huggingface + environment: + - HUGGING_FACE_HUB_TOKEN=${HF_TOKEN:-} + command: > + --model ${MODEL_NAME:-meta-llama/Llama-3.1-8B-Instruct} + --max-model-len ${MAX_MODEL_LEN:-4096} + --gpu-memory-utilization ${GPU_MEM_UTIL:-0.90} + --tensor-parallel-size ${TENSOR_PARALLEL:-1} + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + ipc: host + +volumes: + vllm_cache: diff --git a/ai-templates-0/stacks/weaviate/docker-compose.yml b/ai-templates-0/stacks/weaviate/docker-compose.yml new file mode 100644 index 0000000..df41b85 --- /dev/null +++ b/ai-templates-0/stacks/weaviate/docker-compose.yml @@ -0,0 +1,22 @@ +version: "3.8" + +services: + weaviate: + image: cr.weaviate.io/semitechnologies/weaviate:latest + container_name: weaviate + restart: unless-stopped + ports: + - "${WEAVIATE_HTTP_PORT:-8080}:8080" + - "${WEAVIATE_GRPC_PORT:-50051}:50051" + volumes: + - weaviate_data:/var/lib/weaviate + environment: + - QUERY_DEFAULTS_LIMIT=25 + - AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED=${ANON_ACCESS:-true} + - PERSISTENCE_DATA_PATH=/var/lib/weaviate + - DEFAULT_VECTORIZER_MODULE=${VECTORIZER:-none} + - CLUSTER_HOSTNAME=node1 + - ENABLE_MODULES=${MODULES:-text2vec-transformers,generative-openai} + +volumes: + weaviate_data: diff --git a/ai-templates-0/stacks/whisper/docker-compose.yml b/ai-templates-0/stacks/whisper/docker-compose.yml new file mode 100644 index 0000000..6a04b81 --- /dev/null +++ b/ai-templates-0/stacks/whisper/docker-compose.yml @@ -0,0 +1,19 @@ +version: "3.8" + +services: + whisper: + image: onerahmet/openai-whisper-asr-webservice:latest-gpu + container_name: whisper-asr + restart: unless-stopped + ports: + - "${WHISPER_PORT:-9000}:9000" + environment: + - ASR_MODEL=${ASR_MODEL:-base} + - ASR_ENGINE=${ASR_ENGINE:-openai_whisper} + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu]