v2

2026-03-10 14:40:51 -03:00
parent 290f05be87
commit 92713a4d1c
30 changed files with 2074 additions and 0 deletions
--- a/ai-templates-0/stacks/ray-cluster/docker-compose.yml
+++ b/ai-templates-0/stacks/ray-cluster/docker-compose.yml
@@ -0,0 +1,60 @@
+version: "3.8"
+
+services:
+  ray-head:
+    image: rayproject/ray-ml:${RAY_VERSION:-2.40.0}-py310-gpu
+    container_name: ray-head
+    restart: unless-stopped
+    ports:
+      - "${DASHBOARD_PORT:-8265}:8265"
+      - "${CLIENT_PORT:-10001}:10001"
+      - "${GCS_PORT:-6379}:6379"
+      - "${SERVE_PORT:-8000}:8000"
+    volumes:
+      - ray_data:/home/ray/data
+      - ray_results:/home/ray/ray_results
+    command: >
+      ray start --head
+      --port=6379
+      --dashboard-host=0.0.0.0
+      --dashboard-port=8265
+      --num-gpus=${HEAD_GPUS:-1}
+      --block
+    environment:
+      - RAY_GRAFANA_HOST=http://grafana:3000
+      - RAY_PROMETHEUS_HOST=http://prometheus:9090
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
+    shm_size: ${SHM_SIZE:-8g}
+
+  ray-worker:
+    image: rayproject/ray-ml:${RAY_VERSION:-2.40.0}-py310-gpu
+    restart: unless-stopped
+    depends_on:
+      - ray-head
+    command: >
+      ray start
+      --address=ray-head:6379
+      --num-gpus=${WORKER_GPUS:-1}
+      --num-cpus=${WORKER_CPUS:-4}
+      --block
+    volumes:
+      - ray_data:/home/ray/data
+    deploy:
+      replicas: ${NUM_WORKERS:-1}
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
+    shm_size: ${SHM_SIZE:-8g}
+
+volumes:
+  ray_data:
+  ray_results: