inital commit
Signed-off-by: Jordan Moore <crikket.007@gmail.com>
This commit is contained in:
parent
d3c5f28243
commit
10b3bc378f
6
ultimate-instrumentation/README.md
Normal file
6
ultimate-instrumentation/README.md
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
ultimate-instrumentation
|
||||||
|
===
|
||||||
|
|
||||||
|
TODO
|
||||||
|
|
||||||
|
Refer comments in [`docker-compose.yaml`](docker-compose.yaml).
|
134
ultimate-instrumentation/docker-compose.yaml
Normal file
134
ultimate-instrumentation/docker-compose.yaml
Normal file
@ -0,0 +1,134 @@
|
|||||||
|
## Ultimate Instrumentation Stack
|
||||||
|
## Provides
|
||||||
|
## - log collection and forwarding (logspout) (localhost:8000/logs)
|
||||||
|
## - http/tcp/udp proxying (traefik) (localhost:8080/dashboard/)
|
||||||
|
## - service discovery + dns + health-checks (consul) (localhost:8500/ui) (depends on registrator)
|
||||||
|
## - time-series DB storage (influxdb) (localhost:8086)
|
||||||
|
## - metric & event visualization (grafana) (localhost:3000)
|
||||||
|
## http routing provided by traefik on `localhost/<route>` (see SERVICE_TAGS vars for routes)
|
||||||
|
|
||||||
|
version: '3'
|
||||||
|
services:
|
||||||
|
# logging - Logspount meant for log collection, not storage.
|
||||||
|
# Could pair this with syslog/GELF into FluentD/Logstash/Kafka with indexing by Graylog/Elasticsearch/Solr/Splunk
|
||||||
|
logspout:
|
||||||
|
image: gliderlabs/logspout:master
|
||||||
|
volumes: ['/var/run/docker.sock:/tmp/docker.sock:ro']
|
||||||
|
networks: ['backend','frontend']
|
||||||
|
ports: ['8000:80'] # logs avail at http://localhost:8000/logs
|
||||||
|
environment:
|
||||||
|
EXCLUDE_LABEL: logspout.exclude
|
||||||
|
# registrator
|
||||||
|
SERVICE_TAGS: 'traefik.enable=true,traefik.port=8000,traefik.docker.network=frontend,traefik.http.routers.router0.rule=PathPrefix(`/logs`)'
|
||||||
|
|
||||||
|
# service discovery (Consul + registrator)
|
||||||
|
registrator:
|
||||||
|
image: 'gliderlabs/registrator:master'
|
||||||
|
depends_on: ['consul']
|
||||||
|
networks: ['backend']
|
||||||
|
volumes: ['/var/run/docker.sock:/tmp/docker.sock:ro']
|
||||||
|
command: ["-internal=true", "consul://consul:8500"]
|
||||||
|
consul:
|
||||||
|
image: 'consul:1.6.4'
|
||||||
|
restart: always
|
||||||
|
networks: ['backend','frontend']
|
||||||
|
ports:
|
||||||
|
- '8500:8500' # web ui
|
||||||
|
# - '8300:8300' # server rpc
|
||||||
|
# - '8301:8301' # lan serf tcp
|
||||||
|
# - '8301:8301/udp' # lan serf udp
|
||||||
|
# - '8600:8600' # dns tcp
|
||||||
|
# - '8600:8600/udp' # dns udp
|
||||||
|
environment:
|
||||||
|
# registrator
|
||||||
|
SERVICE_TAGS: "traefik.enable=true,traefik.docker.network=frontend"
|
||||||
|
labels:
|
||||||
|
- logspount.exclude
|
||||||
|
|
||||||
|
# load balancing + routing
|
||||||
|
traefik:
|
||||||
|
image: traefik:2.2
|
||||||
|
depends_on: ['registrator', 'consul']
|
||||||
|
networks: ['backend','frontend']
|
||||||
|
ports:
|
||||||
|
- '8080:8080' # web ui
|
||||||
|
- '80:80'
|
||||||
|
command:
|
||||||
|
# - "--log.level=DEBUG"
|
||||||
|
- "--api.insecure=true"
|
||||||
|
- "--api.dashboard=true"
|
||||||
|
- "--providers.consulcatalog=true"
|
||||||
|
- "--providers.consulcatalog.endpoint.address=http://consul:8500"
|
||||||
|
- "--providers.consulcatalog.endpoint.datacenter=dc1"
|
||||||
|
- "--providers.consulcatalog.cache=true"
|
||||||
|
- "--providers.consulcatalog.exposedByDefault=false"
|
||||||
|
labels:
|
||||||
|
- logspount.exclude
|
||||||
|
|
||||||
|
#monitoring
|
||||||
|
# TODO: Tracing - Zipkin/Jaeger
|
||||||
|
|
||||||
|
## TIG Stack - Telegraf + Influx + Grafana
|
||||||
|
## This is functionally equivalent to just Prometheus + Grafana, but allows metric push & pull
|
||||||
|
telegraf:
|
||||||
|
image: telegraf:1.14-alpine
|
||||||
|
restart: unless-stopped
|
||||||
|
networks: ['backend', 'monitor']
|
||||||
|
depends_on: ['influxdb']
|
||||||
|
labels:
|
||||||
|
- logspount.exclude
|
||||||
|
volumes:
|
||||||
|
- ./telegraf.conf:/etc/telegraf/telegraf.conf:ro
|
||||||
|
# For docker stats
|
||||||
|
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||||
|
influxdb:
|
||||||
|
image: influxdb:1.7-alpine
|
||||||
|
restart: always
|
||||||
|
ports: ['8086:8086']
|
||||||
|
networks: ['monitor']
|
||||||
|
labels:
|
||||||
|
- logspount.exclude
|
||||||
|
environment:
|
||||||
|
INFLUXDB_DB: telegraf
|
||||||
|
INFLUXDB_USER: telegraf
|
||||||
|
INFLUXDB_USER_PASSWORD: 'compose'
|
||||||
|
INFLUXDB_REPORTING_DISABLED: 'true'
|
||||||
|
|
||||||
|
# volumes:
|
||||||
|
# - influxdb-volume:/var/lib/influxdb
|
||||||
|
grafana:
|
||||||
|
image: grafana/grafana:master
|
||||||
|
depends_on: ['influxdb']
|
||||||
|
ports: ["3000:3000"]
|
||||||
|
networks: ['monitor', 'frontend']
|
||||||
|
user: "0"
|
||||||
|
labels:
|
||||||
|
- logspount.exclude
|
||||||
|
environment:
|
||||||
|
GF_SECURITY_ADMIN_PASSWORD: 'compose'
|
||||||
|
GF_USERS_ALLOW_SIGN_UP: 'false'
|
||||||
|
# registrator
|
||||||
|
SERVICE_TAGS: 'traefik.enable=true,traefik.docker.network=frontend,traefik.http.routers.router0.rule=PathPrefix(`/grafana`)'
|
||||||
|
|
||||||
|
## TODO - get loki working (in grafana)
|
||||||
|
# loki:
|
||||||
|
# image: grafana/loki:latest
|
||||||
|
# depends_on: ['registrator', 'consul']
|
||||||
|
# ports: ["3100:3100"]
|
||||||
|
# command: ["-config.file=/etc/loki/local-config.yaml"]
|
||||||
|
# networks: ['backend']
|
||||||
|
# environment:
|
||||||
|
# # LOGSPOUT: ignore
|
||||||
|
# SERVICE_TAGS: "traefik.enable=true,traefik.http.routers.router0.rule=PathPrefix(`/loki`)"
|
||||||
|
|
||||||
|
|
||||||
|
# Create a network for intrastructure components
|
||||||
|
networks:
|
||||||
|
backend:
|
||||||
|
monitor:
|
||||||
|
frontend:
|
||||||
|
|
||||||
|
# Create local persistent volumes
|
||||||
|
volumes:
|
||||||
|
grafana-volume:
|
||||||
|
influxdb-volume:
|
153
ultimate-instrumentation/telegraf.conf
Normal file
153
ultimate-instrumentation/telegraf.conf
Normal file
@ -0,0 +1,153 @@
|
|||||||
|
# Configuration for telegraf agent
|
||||||
|
[agent]
|
||||||
|
interval = "10s"
|
||||||
|
round_interval = true
|
||||||
|
metric_batch_size = 1000
|
||||||
|
metric_buffer_limit = 10000
|
||||||
|
|
||||||
|
collection_jitter = "3s"
|
||||||
|
flush_interval = "10s"
|
||||||
|
flush_jitter = "5s"
|
||||||
|
|
||||||
|
debug = false
|
||||||
|
quiet = false
|
||||||
|
logfile = "/var/log/telegraf/telegraf.log"
|
||||||
|
logfile_rotation_interval = "0d"
|
||||||
|
logfile_rotation_max_size = "1MB"
|
||||||
|
logfile_rotation_max_archives = 5
|
||||||
|
|
||||||
|
hostname = ""
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
# OUTPUT PLUGINS #
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
# Configuration for sending metrics to InfluxDB
|
||||||
|
[[outputs.influxdb]]
|
||||||
|
urls = ["http://influxdb:8086"] # required
|
||||||
|
database = "telegraf" # required
|
||||||
|
username = "telegraf"
|
||||||
|
password = "minitrue"
|
||||||
|
## If true, the database tag will not be added to the metric.
|
||||||
|
exclude_database_tag = false
|
||||||
|
retention_policy = ""
|
||||||
|
write_consistency = "any"
|
||||||
|
timeout = "5s"
|
||||||
|
## If true, no CREATE DATABASE queries will be sent. Set to true when using
|
||||||
|
## Telegraf with a user without permissions to create databases or when the
|
||||||
|
## database already exists.
|
||||||
|
skip_database_creation = false
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
# INPUT PLUGINS #
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
# Read metrics about cpu usage
|
||||||
|
[[inputs.cpu]]
|
||||||
|
## Whether to report per-cpu stats or not
|
||||||
|
percpu = true
|
||||||
|
## Whether to report total system cpu stats or not
|
||||||
|
totalcpu = true
|
||||||
|
## If true, collect raw CPU time metrics.
|
||||||
|
collect_cpu_time = false
|
||||||
|
report_active = false
|
||||||
|
|
||||||
|
# Read metrics about disk usage by mount point
|
||||||
|
[[inputs.disk]]
|
||||||
|
## Ignore some mountpoints by filesystem type. For example (dev)tmpfs (usually
|
||||||
|
## present on /run, /var/run, /dev/shm or /dev).
|
||||||
|
ignore_fs = ["tmpfs", "devtmpfs", "devfs", "iso9660", "overlay", "aufs", "squashfs"]
|
||||||
|
|
||||||
|
# Read metrics about disk IO by device
|
||||||
|
[[inputs.diskio]]
|
||||||
|
## Setting devices will restrict the stats to the specified devices.
|
||||||
|
# devices = ["sda", "sdb"]
|
||||||
|
|
||||||
|
# Get kernel statistics from /proc/stat
|
||||||
|
[[inputs.kernel]]
|
||||||
|
# no configuration
|
||||||
|
|
||||||
|
# Read metrics about memory usage
|
||||||
|
[[inputs.mem]]
|
||||||
|
# no configuration
|
||||||
|
|
||||||
|
# Get the number of processes and group them by status
|
||||||
|
[[inputs.processes]]
|
||||||
|
# no configuration
|
||||||
|
|
||||||
|
# Read metrics about swap memory usage
|
||||||
|
[[inputs.swap]]
|
||||||
|
# no configuration
|
||||||
|
|
||||||
|
# Read metrics about system load & uptime
|
||||||
|
[[inputs.system]]
|
||||||
|
## Uncomment to remove deprecated metrics.
|
||||||
|
# fielddrop = ["uptime_format"]
|
||||||
|
|
||||||
|
[[inputs.internal]]
|
||||||
|
collect_memstats = true
|
||||||
|
|
||||||
|
# # Read metrics about network interface usage
|
||||||
|
[[inputs.net]]
|
||||||
|
interfaces = ["eth*"]
|
||||||
|
|
||||||
|
# # Read metrics about docker containers
|
||||||
|
[[inputs.docker]]
|
||||||
|
endpoint = "unix:///var/run/docker.sock"
|
||||||
|
timeout = "5s"
|
||||||
|
|
||||||
|
# Statsd Server
|
||||||
|
[[inputs.statsd]]
|
||||||
|
## Address and port to host UDP listener on
|
||||||
|
service_address = ":8125"
|
||||||
|
|
||||||
|
## Percentiles to calculate for timing & histogram stats.
|
||||||
|
percentiles = [50.0, 75.0, 99.0, 99.9]
|
||||||
|
|
||||||
|
## Delete gauges every interval (default=false)
|
||||||
|
delete_gauges = true
|
||||||
|
## Delete counters every interval (default=false)
|
||||||
|
delete_counters = true
|
||||||
|
|
||||||
|
## separator to use between elements of a statsd metric
|
||||||
|
metric_separator = "_"
|
||||||
|
|
||||||
|
## convert measurement names, “.” to “_” and “-” to “__”
|
||||||
|
convert_names = false
|
||||||
|
|
||||||
|
## used to parse StatD variable name correctly for InfluxDB
|
||||||
|
## it enables to have counters/gauges grouped by measurement
|
||||||
|
templates = [
|
||||||
|
"* measurement.field"
|
||||||
|
]
|
||||||
|
|
||||||
|
## Parses extensions to statsd in the datadog statsd format
|
||||||
|
## currently supports metrics and datadog tags.
|
||||||
|
## http://docs.datadoghq.com/guides/dogstatsd/
|
||||||
|
datadog_extensions = true
|
||||||
|
|
||||||
|
## Number of UDP messages allowed to queue up, once filled,
|
||||||
|
## the statsd server will start dropping packets
|
||||||
|
allowed_pending_messages = 10000
|
||||||
|
|
||||||
|
## Number of timing/histogram values to track per-measurement in the
|
||||||
|
## calculation of percentiles. Raising this limit increases the accuracy
|
||||||
|
## of percentiles but also increases the memory usage and cpu time.
|
||||||
|
percentile_limit = 1000
|
||||||
|
|
||||||
|
|
||||||
|
# [[inputs.docker_log]]
|
||||||
|
# ## To use TCP, set endpoint = "tcp://[ip]:[port]"
|
||||||
|
# ## To use environment variables (ie, docker-machine), set endpoint = "ENV"
|
||||||
|
# endpoint = "unix:///var/run/docker.sock"
|
||||||
|
|
||||||
|
# ## When true, container logs are read from the beginning; otherwise
|
||||||
|
# ## reading begins at the end of the log.
|
||||||
|
# # from_beginning = false
|
||||||
|
|
||||||
|
# ## docker labels to include. Globs accepted.
|
||||||
|
# ## Note that an empty array for both will include all labels as tags
|
||||||
|
# docker_label_include = ["logcapture"]
|
||||||
|
|
||||||
|
# ## Set the source tag for the metrics to the container ID hostname, eg first 12 chars
|
||||||
|
# source_tag = true
|
Loading…
Reference in New Issue
Block a user