From 10b3bc378fee2255c8e472fdf0ce7009e7d09d15 Mon Sep 17 00:00:00 2001 From: Jordan Moore Date: Wed, 22 Apr 2020 13:24:43 -0500 Subject: [PATCH] inital commit Signed-off-by: Jordan Moore --- ultimate-instrumentation/README.md | 6 + ultimate-instrumentation/docker-compose.yaml | 134 ++++++++++++++++ ultimate-instrumentation/telegraf.conf | 153 +++++++++++++++++++ 3 files changed, 293 insertions(+) create mode 100644 ultimate-instrumentation/README.md create mode 100644 ultimate-instrumentation/docker-compose.yaml create mode 100644 ultimate-instrumentation/telegraf.conf diff --git a/ultimate-instrumentation/README.md b/ultimate-instrumentation/README.md new file mode 100644 index 0000000..d6d38d8 --- /dev/null +++ b/ultimate-instrumentation/README.md @@ -0,0 +1,6 @@ +ultimate-instrumentation +=== + +TODO + +Refer comments in [`docker-compose.yaml`](docker-compose.yaml). \ No newline at end of file diff --git a/ultimate-instrumentation/docker-compose.yaml b/ultimate-instrumentation/docker-compose.yaml new file mode 100644 index 0000000..05c3b92 --- /dev/null +++ b/ultimate-instrumentation/docker-compose.yaml @@ -0,0 +1,134 @@ +## Ultimate Instrumentation Stack +## Provides +## - log collection and forwarding (logspout) (localhost:8000/logs) +## - http/tcp/udp proxying (traefik) (localhost:8080/dashboard/) +## - service discovery + dns + health-checks (consul) (localhost:8500/ui) (depends on registrator) +## - time-series DB storage (influxdb) (localhost:8086) +## - metric & event visualization (grafana) (localhost:3000) +## http routing provided by traefik on `localhost/` (see SERVICE_TAGS vars for routes) + +version: '3' +services: + # logging - Logspount meant for log collection, not storage. + # Could pair this with syslog/GELF into FluentD/Logstash/Kafka with indexing by Graylog/Elasticsearch/Solr/Splunk + logspout: + image: gliderlabs/logspout:master + volumes: ['/var/run/docker.sock:/tmp/docker.sock:ro'] + networks: ['backend','frontend'] + ports: ['8000:80'] # logs avail at http://localhost:8000/logs + environment: + EXCLUDE_LABEL: logspout.exclude + # registrator + SERVICE_TAGS: 'traefik.enable=true,traefik.port=8000,traefik.docker.network=frontend,traefik.http.routers.router0.rule=PathPrefix(`/logs`)' + + # service discovery (Consul + registrator) + registrator: + image: 'gliderlabs/registrator:master' + depends_on: ['consul'] + networks: ['backend'] + volumes: ['/var/run/docker.sock:/tmp/docker.sock:ro'] + command: ["-internal=true", "consul://consul:8500"] + consul: + image: 'consul:1.6.4' + restart: always + networks: ['backend','frontend'] + ports: + - '8500:8500' # web ui + # - '8300:8300' # server rpc + # - '8301:8301' # lan serf tcp + # - '8301:8301/udp' # lan serf udp + # - '8600:8600' # dns tcp + # - '8600:8600/udp' # dns udp + environment: + # registrator + SERVICE_TAGS: "traefik.enable=true,traefik.docker.network=frontend" + labels: + - logspount.exclude + + # load balancing + routing + traefik: + image: traefik:2.2 + depends_on: ['registrator', 'consul'] + networks: ['backend','frontend'] + ports: + - '8080:8080' # web ui + - '80:80' + command: + # - "--log.level=DEBUG" + - "--api.insecure=true" + - "--api.dashboard=true" + - "--providers.consulcatalog=true" + - "--providers.consulcatalog.endpoint.address=http://consul:8500" + - "--providers.consulcatalog.endpoint.datacenter=dc1" + - "--providers.consulcatalog.cache=true" + - "--providers.consulcatalog.exposedByDefault=false" + labels: + - logspount.exclude + + #monitoring + # TODO: Tracing - Zipkin/Jaeger + + ## TIG Stack - Telegraf + Influx + Grafana + ## This is functionally equivalent to just Prometheus + Grafana, but allows metric push & pull + telegraf: + image: telegraf:1.14-alpine + restart: unless-stopped + networks: ['backend', 'monitor'] + depends_on: ['influxdb'] + labels: + - logspount.exclude + volumes: + - ./telegraf.conf:/etc/telegraf/telegraf.conf:ro + # For docker stats + - /var/run/docker.sock:/var/run/docker.sock:ro + influxdb: + image: influxdb:1.7-alpine + restart: always + ports: ['8086:8086'] + networks: ['monitor'] + labels: + - logspount.exclude + environment: + INFLUXDB_DB: telegraf + INFLUXDB_USER: telegraf + INFLUXDB_USER_PASSWORD: 'compose' + INFLUXDB_REPORTING_DISABLED: 'true' + + # volumes: + # - influxdb-volume:/var/lib/influxdb + grafana: + image: grafana/grafana:master + depends_on: ['influxdb'] + ports: ["3000:3000"] + networks: ['monitor', 'frontend'] + user: "0" + labels: + - logspount.exclude + environment: + GF_SECURITY_ADMIN_PASSWORD: 'compose' + GF_USERS_ALLOW_SIGN_UP: 'false' + # registrator + SERVICE_TAGS: 'traefik.enable=true,traefik.docker.network=frontend,traefik.http.routers.router0.rule=PathPrefix(`/grafana`)' + + ## TODO - get loki working (in grafana) + # loki: + # image: grafana/loki:latest + # depends_on: ['registrator', 'consul'] + # ports: ["3100:3100"] + # command: ["-config.file=/etc/loki/local-config.yaml"] + # networks: ['backend'] + # environment: + # # LOGSPOUT: ignore + # SERVICE_TAGS: "traefik.enable=true,traefik.http.routers.router0.rule=PathPrefix(`/loki`)" + + +# Create a network for intrastructure components +networks: + backend: + monitor: + frontend: + +# Create local persistent volumes +volumes: + grafana-volume: + influxdb-volume: \ No newline at end of file diff --git a/ultimate-instrumentation/telegraf.conf b/ultimate-instrumentation/telegraf.conf new file mode 100644 index 0000000..121d452 --- /dev/null +++ b/ultimate-instrumentation/telegraf.conf @@ -0,0 +1,153 @@ +# Configuration for telegraf agent +[agent] + interval = "10s" + round_interval = true + metric_batch_size = 1000 + metric_buffer_limit = 10000 + + collection_jitter = "3s" + flush_interval = "10s" + flush_jitter = "5s" + + debug = false + quiet = false + logfile = "/var/log/telegraf/telegraf.log" + logfile_rotation_interval = "0d" + logfile_rotation_max_size = "1MB" + logfile_rotation_max_archives = 5 + + hostname = "" + +############################################################################### +# OUTPUT PLUGINS # +############################################################################### + +# Configuration for sending metrics to InfluxDB +[[outputs.influxdb]] + urls = ["http://influxdb:8086"] # required + database = "telegraf" # required + username = "telegraf" + password = "minitrue" + ## If true, the database tag will not be added to the metric. + exclude_database_tag = false + retention_policy = "" + write_consistency = "any" + timeout = "5s" + ## If true, no CREATE DATABASE queries will be sent. Set to true when using + ## Telegraf with a user without permissions to create databases or when the + ## database already exists. + skip_database_creation = false + +############################################################################### +# INPUT PLUGINS # +############################################################################### + +# Read metrics about cpu usage +[[inputs.cpu]] + ## Whether to report per-cpu stats or not + percpu = true + ## Whether to report total system cpu stats or not + totalcpu = true + ## If true, collect raw CPU time metrics. + collect_cpu_time = false + report_active = false + +# Read metrics about disk usage by mount point +[[inputs.disk]] + ## Ignore some mountpoints by filesystem type. For example (dev)tmpfs (usually + ## present on /run, /var/run, /dev/shm or /dev). + ignore_fs = ["tmpfs", "devtmpfs", "devfs", "iso9660", "overlay", "aufs", "squashfs"] + +# Read metrics about disk IO by device +[[inputs.diskio]] + ## Setting devices will restrict the stats to the specified devices. + # devices = ["sda", "sdb"] + +# Get kernel statistics from /proc/stat +[[inputs.kernel]] + # no configuration + +# Read metrics about memory usage +[[inputs.mem]] + # no configuration + +# Get the number of processes and group them by status +[[inputs.processes]] + # no configuration + +# Read metrics about swap memory usage +[[inputs.swap]] + # no configuration + +# Read metrics about system load & uptime +[[inputs.system]] + ## Uncomment to remove deprecated metrics. + # fielddrop = ["uptime_format"] + +[[inputs.internal]] + collect_memstats = true + +# # Read metrics about network interface usage +[[inputs.net]] + interfaces = ["eth*"] + +# # Read metrics about docker containers +[[inputs.docker]] + endpoint = "unix:///var/run/docker.sock" + timeout = "5s" + +# Statsd Server +[[inputs.statsd]] + ## Address and port to host UDP listener on + service_address = ":8125" + + ## Percentiles to calculate for timing & histogram stats. + percentiles = [50.0, 75.0, 99.0, 99.9] + + ## Delete gauges every interval (default=false) + delete_gauges = true + ## Delete counters every interval (default=false) + delete_counters = true + + ## separator to use between elements of a statsd metric + metric_separator = "_" + + ## convert measurement names, “.” to “_” and “-” to “__” + convert_names = false + + ## used to parse StatD variable name correctly for InfluxDB + ## it enables to have counters/gauges grouped by measurement + templates = [ + "* measurement.field" + ] + + ## Parses extensions to statsd in the datadog statsd format + ## currently supports metrics and datadog tags. + ## http://docs.datadoghq.com/guides/dogstatsd/ + datadog_extensions = true + + ## Number of UDP messages allowed to queue up, once filled, + ## the statsd server will start dropping packets + allowed_pending_messages = 10000 + + ## Number of timing/histogram values to track per-measurement in the + ## calculation of percentiles. Raising this limit increases the accuracy + ## of percentiles but also increases the memory usage and cpu time. + percentile_limit = 1000 + + +# [[inputs.docker_log]] +# ## To use TCP, set endpoint = "tcp://[ip]:[port]" +# ## To use environment variables (ie, docker-machine), set endpoint = "ENV" +# endpoint = "unix:///var/run/docker.sock" + +# ## When true, container logs are read from the beginning; otherwise +# ## reading begins at the end of the log. +# # from_beginning = false + +# ## docker labels to include. Globs accepted. +# ## Note that an empty array for both will include all labels as tags +# docker_label_include = ["logcapture"] + +# ## Set the source tag for the metrics to the container ID hostname, eg first 12 chars +# source_tag = true \ No newline at end of file