{ "version": "3", "templates": [ { "id": 1, "type": 3, "title": "Ollama", "description": "Local LLM inference engine supporting Llama, Mistral, Qwen, Gemma, Phi and 100+ models with GPU acceleration", "note": "Requires NVIDIA GPU with Docker GPU runtime configured. Pull models after deployment with: docker exec ollama ollama pull llama3.1", "categories": ["ai", "llm", "inference"], "platform": "linux", "logo": "https://ollama.com/public/ollama.png", "repository": { "url": "https://git.oe74.net/adelorenzo/portainer_scripts", "stackfile": "ai-templates/stacks/ollama/docker-compose.yml" }, "env": [ { "name": "OLLAMA_PORT", "label": "Ollama API port", "default": "11434" }, { "name": "OLLAMA_NUM_PARALLEL", "label": "Max parallel requests", "default": "4" }, { "name": "OLLAMA_MAX_LOADED_MODELS", "label": "Max models loaded in VRAM", "default": "2" } ] }, { "id": 2, "type": 3, "title": "Open WebUI + Ollama", "description": "Full-featured ChatGPT-like web interface bundled with Ollama backend for local LLM inference", "note": "Access the web UI at the configured port. First user to register becomes admin. Requires NVIDIA GPU.", "categories": ["ai", "llm", "chat-ui"], "platform": "linux", "logo": "https://docs.openwebui.com/img/logo.png", "repository": { "url": "https://git.oe74.net/adelorenzo/portainer_scripts", "stackfile": "ai-templates/stacks/open-webui/docker-compose.yml" }, "env": [ { "name": "OPEN_WEBUI_PORT", "label": "Web UI port", "default": "3000" }, { "name": "OLLAMA_PORT", "label": "Ollama API port", "default": "11434" }, { "name": "WEBUI_SECRET_KEY", "label": "Secret key for sessions", "default": "changeme" }, { "name": "ENABLE_SIGNUP", "label": "Allow user registration", "default": "true" } ] }, { "id": 3, "type": 3, "title": "LocalAI", "description": "Drop-in OpenAI API compatible replacement. Run LLMs, generate images, audio locally with GPU acceleration", "note": "Exposes an OpenAI-compatible API at /v1/. Models can be loaded via the API or placed in the models volume.", "categories": ["ai", "llm", "openai-api"], "platform": "linux", "logo": "https://localai.io/logo.png", "repository": { "url": "https://git.oe74.net/adelorenzo/portainer_scripts", "stackfile": "ai-templates/stacks/localai/docker-compose.yml" }, "env": [ { "name": "LOCALAI_PORT", "label": "API port", "default": "8080" }, { "name": "THREADS", "label": "CPU threads for inference", "default": "4" }, { "name": "CONTEXT_SIZE", "label": "Default context window size", "default": "4096" } ] }, { "id": 4, "type": 3, "title": "vLLM", "description": "High-throughput LLM serving engine with PagedAttention, continuous batching, and OpenAI-compatible API", "note": "Requires NVIDIA GPU with sufficient VRAM for the chosen model. HuggingFace token needed for gated models.", "categories": ["ai", "llm", "inference", "high-performance"], "platform": "linux", "logo": "https://docs.vllm.ai/en/latest/_static/vllm-logo-text-light.png", "repository": { "url": "https://git.oe74.net/adelorenzo/portainer_scripts", "stackfile": "ai-templates/stacks/vllm/docker-compose.yml" }, "env": [ { "name": "VLLM_PORT", "label": "API port", "default": "8000" }, { "name": "MODEL_NAME", "label": "HuggingFace model ID", "default": "meta-llama/Llama-3.1-8B-Instruct" }, { "name": "HF_TOKEN", "label": "HuggingFace access token" }, { "name": "MAX_MODEL_LEN", "label": "Max sequence length", "default": "4096" }, { "name": "GPU_MEM_UTIL", "label": "GPU memory utilization (0-1)", "default": "0.90" }, { "name": "TENSOR_PARALLEL", "label": "Tensor parallel GPU count", "default": "1" } ] }, { "id": 5, "type": 3, "title": "Text Generation WebUI", "description": "Comprehensive web UI for running LLMs locally (oobabooga). Supports GGUF, GPTQ, AWQ, EXL2, and HF formats", "note": "Requires NVIDIA GPU. Models should be placed in the models volume. Supports extensions for RAG, TTS, and more.", "categories": ["ai", "llm", "chat-ui"], "platform": "linux", "logo": "https://raw.githubusercontent.com/oobabooga/text-generation-webui/main/docs/logo.png", "repository": { "url": "https://git.oe74.net/adelorenzo/portainer_scripts", "stackfile": "ai-templates/stacks/text-generation-webui/docker-compose.yml" }, "env": [ { "name": "WEBUI_PORT", "label": "Web UI port", "default": "7860" }, { "name": "API_PORT", "label": "API port", "default": "5000" }, { "name": "STREAM_PORT", "label": "Streaming API port", "default": "5005" }, { "name": "EXTRA_LAUNCH_ARGS", "label": "Extra launch arguments", "default": "--listen --api" } ] }, { "id": 6, "type": 3, "title": "LiteLLM Proxy", "description": "Unified LLM API gateway supporting 100+ providers (OpenAI, Anthropic, Ollama, vLLM, etc.) with spend tracking and load balancing", "note": "Configure models in /app/config/litellm_config.yaml after deployment. Includes PostgreSQL for usage tracking.", "categories": ["ai", "llm", "api-gateway", "proxy"], "platform": "linux", "logo": "https://litellm.ai/favicon.ico", "repository": { "url": "https://git.oe74.net/adelorenzo/portainer_scripts", "stackfile": "ai-templates/stacks/litellm/docker-compose.yml" }, "env": [ { "name": "LITELLM_PORT", "label": "Proxy API port", "default": "4000" }, { "name": "LITELLM_MASTER_KEY", "label": "Master API key", "default": "sk-master-key" }, { "name": "PG_USER", "label": "PostgreSQL user", "default": "litellm" }, { "name": "PG_PASSWORD", "label": "PostgreSQL password", "default": "litellm" } ] }, { "id": 7, "type": 3, "title": "ComfyUI", "description": "Node-based Stable Diffusion workflow engine for image and video generation with GPU acceleration", "note": "Requires NVIDIA GPU. Access the node editor at the configured port. Models go in the models volume.", "categories": ["ai", "image-generation", "stable-diffusion"], "platform": "linux", "logo": "https://raw.githubusercontent.com/comfyanonymous/ComfyUI/master/web/assets/comfyui-logo.png", "repository": { "url": "https://git.oe74.net/adelorenzo/portainer_scripts", "stackfile": "ai-templates/stacks/comfyui/docker-compose.yml" }, "env": [ { "name": "COMFYUI_PORT", "label": "Web UI port", "default": "8188" }, { "name": "CLI_ARGS", "label": "Launch arguments", "default": "--listen 0.0.0.0 --port 8188" } ] }, { "id": 8, "type": 3, "title": "Stable Diffusion WebUI", "description": "AUTOMATIC1111 web interface for Stable Diffusion image generation with extensive extension ecosystem", "note": "Requires NVIDIA GPU with 8GB+ VRAM. First startup downloads the base model and may take several minutes.", "categories": ["ai", "image-generation", "stable-diffusion"], "platform": "linux", "logo": "https://raw.githubusercontent.com/AUTOMATIC1111/stable-diffusion-webui/master/html/logo.png", "repository": { "url": "https://git.oe74.net/adelorenzo/portainer_scripts", "stackfile": "ai-templates/stacks/stable-diffusion-webui/docker-compose.yml" }, "env": [ { "name": "SD_PORT", "label": "Web UI port", "default": "7860" }, { "name": "CLI_ARGS", "label": "Launch arguments", "default": "--listen --api --xformers" } ] }, { "id": 9, "type": 3, "title": "Langflow", "description": "Visual framework for building multi-agent and RAG applications. Drag-and-drop LLM pipeline builder", "note": "Access the visual editor at the configured port. Connect to Ollama, OpenAI, or any LLM backend.", "categories": ["ai", "agents", "rag", "workflows"], "platform": "linux", "logo": "https://avatars.githubusercontent.com/u/128686189", "repository": { "url": "https://git.oe74.net/adelorenzo/portainer_scripts", "stackfile": "ai-templates/stacks/langflow/docker-compose.yml" }, "env": [ { "name": "LANGFLOW_PORT", "label": "Web UI port", "default": "7860" }, { "name": "AUTO_LOGIN", "label": "Skip login screen", "default": "true" } ] }, { "id": 10, "type": 3, "title": "Flowise", "description": "Drag-and-drop LLM orchestration tool. Build chatbots, agents, and RAG pipelines without coding", "note": "Default credentials are admin/changeme. Connect to any OpenAI-compatible API backend.", "categories": ["ai", "agents", "rag", "chatbots"], "platform": "linux", "logo": "https://flowiseai.com/favicon.ico", "repository": { "url": "https://git.oe74.net/adelorenzo/portainer_scripts", "stackfile": "ai-templates/stacks/flowise/docker-compose.yml" }, "env": [ { "name": "FLOWISE_PORT", "label": "Web UI port", "default": "3000" }, { "name": "FLOWISE_USERNAME", "label": "Admin username", "default": "admin" }, { "name": "FLOWISE_PASSWORD", "label": "Admin password", "default": "changeme" } ] }, { "id": 11, "type": 3, "title": "n8n (AI-Enabled)", "description": "Workflow automation platform with built-in AI agent nodes, LLM chains, and vector store integrations", "note": "AI features include: AI Agent nodes, LLM Chain, Document Loaders, Vector Stores, Text Splitters, and Memory nodes.", "categories": ["ai", "automation", "workflows", "agents"], "platform": "linux", "logo": "https://n8n.io/favicon.ico", "repository": { "url": "https://git.oe74.net/adelorenzo/portainer_scripts", "stackfile": "ai-templates/stacks/n8n-ai/docker-compose.yml" }, "env": [ { "name": "N8N_PORT", "label": "Web UI port", "default": "5678" }, { "name": "N8N_USER", "label": "Admin username", "default": "admin" }, { "name": "N8N_PASSWORD", "label": "Admin password", "default": "changeme" }, { "name": "WEBHOOK_URL", "label": "External webhook URL", "default": "http://localhost:5678/" } ] }, { "id": 12, "type": 3, "title": "Qdrant", "description": "High-performance vector similarity search engine for RAG, semantic search, and AI applications", "note": "REST API on port 6333, gRPC on 6334. Supports filtering, payload indexing, and distributed mode.", "categories": ["ai", "vector-database", "rag", "embeddings"], "platform": "linux", "logo": "https://qdrant.tech/images/logo_with_text.png", "repository": { "url": "https://git.oe74.net/adelorenzo/portainer_scripts", "stackfile": "ai-templates/stacks/qdrant/docker-compose.yml" }, "env": [ { "name": "QDRANT_HTTP_PORT", "label": "REST API port", "default": "6333" }, { "name": "QDRANT_GRPC_PORT", "label": "gRPC port", "default": "6334" }, { "name": "QDRANT_API_KEY", "label": "API key (optional)" } ] }, { "id": 13, "type": 3, "title": "ChromaDB", "description": "AI-native open-source embedding database. The easiest vector store to get started with for RAG applications", "note": "Persistent storage enabled by default. Compatible with LangChain, LlamaIndex, and all major AI frameworks.", "categories": ["ai", "vector-database", "rag", "embeddings"], "platform": "linux", "logo": "https://www.trychroma.com/chroma-logo.png", "repository": { "url": "https://git.oe74.net/adelorenzo/portainer_scripts", "stackfile": "ai-templates/stacks/chromadb/docker-compose.yml" }, "env": [ { "name": "CHROMA_PORT", "label": "API port", "default": "8000" }, { "name": "CHROMA_TOKEN", "label": "Auth token (optional)" }, { "name": "TELEMETRY", "label": "Anonymous telemetry", "default": "FALSE" } ] }, { "id": 14, "type": 3, "title": "Weaviate", "description": "AI-native vector database with built-in vectorization modules and hybrid search capabilities", "note": "Supports text2vec-transformers, generative-openai, and many other modules. Configure modules via environment variables.", "categories": ["ai", "vector-database", "rag", "search"], "platform": "linux", "logo": "https://weaviate.io/img/site/weaviate-logo-light.png", "repository": { "url": "https://git.oe74.net/adelorenzo/portainer_scripts", "stackfile": "ai-templates/stacks/weaviate/docker-compose.yml" }, "env": [ { "name": "WEAVIATE_HTTP_PORT", "label": "HTTP API port", "default": "8080" }, { "name": "WEAVIATE_GRPC_PORT", "label": "gRPC port", "default": "50051" }, { "name": "VECTORIZER", "label": "Default vectorizer module", "default": "none" }, { "name": "MODULES", "label": "Enabled modules", "default": "text2vec-transformers,generative-openai" }, { "name": "ANON_ACCESS", "label": "Anonymous access enabled", "default": "true" } ] }, { "id": 15, "type": 3, "title": "MLflow", "description": "Open-source ML lifecycle platform — experiment tracking, model registry, and model serving", "note": "Access the tracking UI at the configured port. Uses SQLite backend by default — switch to PostgreSQL for production.", "categories": ["ai", "mlops", "experiment-tracking", "model-registry"], "platform": "linux", "logo": "https://mlflow.org/img/mlflow-black.svg", "repository": { "url": "https://git.oe74.net/adelorenzo/portainer_scripts", "stackfile": "ai-templates/stacks/mlflow/docker-compose.yml" }, "env": [ { "name": "MLFLOW_PORT", "label": "Tracking UI port", "default": "5000" } ] }, { "id": 16, "type": 3, "title": "Label Studio", "description": "Multi-type data labeling and annotation platform for training ML and AI models", "note": "Supports image, text, audio, video, and time-series annotation. Export to all major ML formats.", "categories": ["ai", "mlops", "data-labeling", "annotation"], "platform": "linux", "logo": "https://labelstud.io/images/ls-logo.png", "repository": { "url": "https://git.oe74.net/adelorenzo/portainer_scripts", "stackfile": "ai-templates/stacks/label-studio/docker-compose.yml" }, "env": [ { "name": "LS_PORT", "label": "Web UI port", "default": "8080" }, { "name": "LS_USER", "label": "Admin email", "default": "admin@example.com" }, { "name": "LS_PASSWORD", "label": "Admin password", "default": "changeme" } ] }, { "id": 17, "type": 3, "title": "Jupyter (GPU / PyTorch)", "description": "GPU-accelerated Jupyter Lab with PyTorch, CUDA, and data science libraries pre-installed", "note": "Requires NVIDIA GPU. Access with the configured token. Workspace persists in the work volume.", "categories": ["ai", "ml-development", "notebooks", "pytorch"], "platform": "linux", "logo": "https://jupyter.org/assets/homepage/main-logo.svg", "repository": { "url": "https://git.oe74.net/adelorenzo/portainer_scripts", "stackfile": "ai-templates/stacks/jupyter-gpu/docker-compose.yml" }, "env": [ { "name": "JUPYTER_PORT", "label": "Jupyter Lab port", "default": "8888" }, { "name": "JUPYTER_TOKEN", "label": "Access token", "default": "changeme" }, { "name": "GRANT_SUDO", "label": "Allow sudo in notebooks", "default": "yes" } ] }, { "id": 18, "type": 3, "title": "Whisper ASR", "description": "OpenAI Whisper speech-to-text API server with GPU acceleration. Supports transcription and translation", "note": "Requires NVIDIA GPU. API documentation available at /docs. Supports models: tiny, base, small, medium, large-v3.", "categories": ["ai", "speech-to-text", "transcription", "audio"], "platform": "linux", "logo": "https://upload.wikimedia.org/wikipedia/commons/0/04/ChatGPT_logo.svg", "repository": { "url": "https://git.oe74.net/adelorenzo/portainer_scripts", "stackfile": "ai-templates/stacks/whisper/docker-compose.yml" }, "env": [ { "name": "WHISPER_PORT", "label": "API port", "default": "9000" }, { "name": "ASR_MODEL", "label": "Whisper model size", "description": "Options: tiny, base, small, medium, large-v3", "default": "base" }, { "name": "ASR_ENGINE", "label": "ASR engine", "default": "openai_whisper" } ] }, { "id": 19, "type": 3, "title": "NVIDIA Triton Inference Server", "description": "Production-grade inference serving for any AI model — supports TensorRT, ONNX, PyTorch, TensorFlow, vLLM, and Python backends with dynamic batching, model ensembles, and multi-GPU scheduling", "note": "Requires NVIDIA GPU. Place model repositories in the models volume following Triton's model repository layout. Health check at /v2/health/ready.", "categories": ["ai", "inference", "edge", "production", "nvidia"], "platform": "linux", "logo": "https://developer.nvidia.com/favicon.ico", "repository": { "url": "https://git.oe74.net/adelorenzo/portainer_scripts", "stackfile": "ai-templates/stacks/triton/docker-compose.yml" }, "env": [ { "name": "HTTP_PORT", "label": "HTTP inference port", "default": "8000" }, { "name": "GRPC_PORT", "label": "gRPC inference port", "default": "8001" }, { "name": "METRICS_PORT", "label": "Prometheus metrics port", "default": "8002" }, { "name": "TRITON_VERSION", "label": "Triton version tag", "default": "24.08" }, { "name": "MODEL_CONTROL", "label": "Model control mode (none, poll, explicit)", "default": "poll" }, { "name": "POLL_INTERVAL", "label": "Model repository poll interval (seconds)", "default": "30" }, { "name": "SHM_SIZE", "label": "Shared memory size", "default": "1g" } ] }, { "id": 20, "type": 3, "title": "ONNX Runtime Server", "description": "Lightweight cross-platform inference server for ONNX models. Supports GPU and CPU-only profiles for edge deployment on resource-constrained nodes", "note": "Use docker compose --profile gpu up for GPU nodes or --profile edge up for CPU-only edge nodes. Place your .onnx model file in the models volume.", "categories": ["ai", "inference", "edge", "lightweight", "onnx"], "platform": "linux", "logo": "https://onnxruntime.ai/images/icons/ONNX-Runtime-logo.svg", "repository": { "url": "https://git.oe74.net/adelorenzo/portainer_scripts", "stackfile": "ai-templates/stacks/onnx-runtime/docker-compose.yml" }, "env": [ { "name": "HTTP_PORT", "label": "HTTP port", "default": "8001" }, { "name": "GRPC_PORT", "label": "gRPC port", "default": "50051" }, { "name": "MODEL_FILE", "label": "Model filename in /models", "default": "model.onnx" }, { "name": "NUM_THREADS", "label": "Inference threads", "default": "4" }, { "name": "CPU_LIMIT", "label": "CPU core limit (edge profile)", "default": "2.0" }, { "name": "MEM_LIMIT", "label": "Memory limit (edge profile)", "default": "2G" } ] }, { "id": 21, "type": 3, "title": "NVIDIA DeepStream", "description": "GPU-accelerated video analytics and computer vision pipeline for industrial inspection, anomaly detection, and smart factory applications with Triton backend", "note": "Requires NVIDIA GPU with video decode capabilities. For camera access on edge devices, set PRIVILEGED=true. Supports RTSP output on port 8554.", "categories": ["ai", "computer-vision", "industrial", "edge", "video-analytics"], "platform": "linux", "logo": "https://developer.nvidia.com/favicon.ico", "repository": { "url": "https://git.oe74.net/adelorenzo/portainer_scripts", "stackfile": "ai-templates/stacks/deepstream/docker-compose.yml" }, "env": [ { "name": "RTSP_PORT", "label": "RTSP output port", "default": "8554" }, { "name": "REST_PORT", "label": "REST API port", "default": "9000" }, { "name": "DS_VERSION", "label": "DeepStream version", "default": "7.1" }, { "name": "SHM_SIZE", "label": "Shared memory size", "default": "2g" }, { "name": "PRIVILEGED", "label": "Privileged mode (for device access)", "default": "false" } ] }, { "id": 22, "type": 3, "title": "Ray Cluster (GPU)", "description": "Distributed compute cluster for LLM fine-tuning, distributed training, hyperparameter tuning, and scalable inference with Ray Serve. Head + configurable worker nodes", "note": "Requires NVIDIA GPU on all nodes. Scale workers with NUM_WORKERS. Dashboard accessible at the configured port. Includes Ray Train, Tune, Serve, and Data.", "categories": ["ai", "distributed-training", "fine-tuning", "inference", "cluster"], "platform": "linux", "logo": "https://docs.ray.io/en/latest/_static/ray_logo.png", "repository": { "url": "https://git.oe74.net/adelorenzo/portainer_scripts", "stackfile": "ai-templates/stacks/ray-cluster/docker-compose.yml" }, "env": [ { "name": "DASHBOARD_PORT", "label": "Ray Dashboard port", "default": "8265" }, { "name": "SERVE_PORT", "label": "Ray Serve port", "default": "8000" }, { "name": "RAY_VERSION", "label": "Ray version", "default": "2.40.0" }, { "name": "NUM_WORKERS", "label": "Number of worker nodes", "default": "1" }, { "name": "HEAD_GPUS", "label": "GPUs on head node", "default": "1" }, { "name": "WORKER_GPUS", "label": "GPUs per worker", "default": "1" }, { "name": "WORKER_CPUS", "label": "CPUs per worker", "default": "4" }, { "name": "SHM_SIZE", "label": "Shared memory per node", "default": "8g" } ] }, { "id": 23, "type": 3, "title": "Prefect (ML Pipeline Orchestration)", "description": "Governed ML pipeline orchestration platform with scheduling, retries, audit logging, and role-based access. Includes server, worker, and PostgreSQL backend", "note": "Access the Prefect UI at the configured port. Create flows in Python and register them against this server. Worker uses Docker execution for isolation.", "categories": ["ai", "mlops", "pipelines", "governance", "orchestration"], "platform": "linux", "logo": "https://www.prefect.io/favicon.ico", "repository": { "url": "https://git.oe74.net/adelorenzo/portainer_scripts", "stackfile": "ai-templates/stacks/prefect/docker-compose.yml" }, "env": [ { "name": "PREFECT_PORT", "label": "Prefect UI port", "default": "4200" }, { "name": "PREFECT_VERSION", "label": "Prefect version", "default": "3-latest" }, { "name": "PG_USER", "label": "PostgreSQL user", "default": "prefect" }, { "name": "PG_PASSWORD", "label": "PostgreSQL password", "default": "prefect" }, { "name": "ANALYTICS", "label": "Enable analytics", "default": "false" } ] }, { "id": 24, "type": 3, "title": "BentoML", "description": "Unified model serving framework for packaging, deploying, and managing ML models as production-ready API endpoints with GPU support", "note": "Requires NVIDIA GPU. Build Bentos (model packages) and serve them through this runtime. Prometheus metrics on port 3001.", "categories": ["ai", "model-serving", "inference", "mlops"], "platform": "linux", "logo": "https://docs.bentoml.com/en/latest/_static/img/logo.svg", "repository": { "url": "https://git.oe74.net/adelorenzo/portainer_scripts", "stackfile": "ai-templates/stacks/bentoml/docker-compose.yml" }, "env": [ { "name": "BENTO_PORT", "label": "Serving API port", "default": "3000" }, { "name": "METRICS_PORT", "label": "Prometheus metrics port", "default": "3001" }, { "name": "BENTO_VERSION", "label": "BentoML version", "default": "latest" }, { "name": "LOG_LEVEL", "label": "Log level", "default": "INFO" } ] }, { "id": 25, "type": 3, "title": "MLflow + MinIO (Production MLOps)", "description": "Production-grade MLOps stack: MLflow tracking server with PostgreSQL backend and MinIO S3-compatible artifact store for governed model registry, experiment tracking, and versioned artifact storage", "note": "MinIO console available at port 9001. MLflow auto-creates the artifact bucket on startup. For production, change all default credentials.", "categories": ["ai", "mlops", "model-registry", "governance", "experiment-tracking"], "platform": "linux", "logo": "https://mlflow.org/img/mlflow-black.svg", "repository": { "url": "https://git.oe74.net/adelorenzo/portainer_scripts", "stackfile": "ai-templates/stacks/minio-mlops/docker-compose.yml" }, "env": [ { "name": "MLFLOW_PORT", "label": "MLflow UI port", "default": "5000" }, { "name": "MINIO_API_PORT", "label": "MinIO S3 API port", "default": "9000" }, { "name": "MINIO_CONSOLE_PORT", "label": "MinIO console port", "default": "9001" }, { "name": "PG_USER", "label": "PostgreSQL user", "default": "mlflow" }, { "name": "PG_PASSWORD", "label": "PostgreSQL password", "default": "mlflow" }, { "name": "MINIO_ROOT_USER", "label": "MinIO root user", "default": "mlflow" }, { "name": "MINIO_ROOT_PASSWORD", "label": "MinIO root password", "default": "mlflow123" }, { "name": "ARTIFACT_BUCKET", "label": "S3 artifact bucket name", "default": "mlflow-artifacts" } ] }, { "id": 26, "type": 3, "title": "NVIDIA NIM", "description": "Enterprise-grade optimized LLM inference microservice from NVIDIA. Pre-optimized with TensorRT-LLM for maximum throughput with OpenAI-compatible API", "note": "Requires NVIDIA GPU and an NGC API key from NVIDIA Build. Model downloads are cached in the nim_cache volume. First startup may take several minutes.", "categories": ["ai", "llm", "inference", "enterprise", "nvidia"], "platform": "linux", "logo": "https://developer.nvidia.com/favicon.ico", "repository": { "url": "https://git.oe74.net/adelorenzo/portainer_scripts", "stackfile": "ai-templates/stacks/nvidia-nim/docker-compose.yml" }, "env": [ { "name": "NIM_PORT", "label": "API port", "default": "8000" }, { "name": "NGC_API_KEY", "label": "NVIDIA NGC API key (required)" }, { "name": "NIM_MODEL", "label": "NIM model container", "description": "Model from NVIDIA NGC catalog", "default": "meta/llama-3.1-8b-instruct" }, { "name": "NIM_VERSION", "label": "NIM version", "default": "latest" }, { "name": "MAX_MODEL_LEN", "label": "Max sequence length", "default": "4096" }, { "name": "GPU_MEM_UTIL", "label": "GPU memory utilization (0-1)", "default": "0.9" }, { "name": "SHM_SIZE", "label": "Shared memory size", "default": "16g" } ] } ] }