inital commit
Signed-off-by: Jordan Moore <crikket.007@gmail.com>
This commit is contained in:
parent
d3c5f28243
commit
10b3bc378f
6
ultimate-instrumentation/README.md
Normal file
6
ultimate-instrumentation/README.md
Normal file
@ -0,0 +1,6 @@
|
||||
ultimate-instrumentation
|
||||
===
|
||||
|
||||
TODO
|
||||
|
||||
Refer comments in [`docker-compose.yaml`](docker-compose.yaml).
|
134
ultimate-instrumentation/docker-compose.yaml
Normal file
134
ultimate-instrumentation/docker-compose.yaml
Normal file
@ -0,0 +1,134 @@
|
||||
## Ultimate Instrumentation Stack
|
||||
## Provides
|
||||
## - log collection and forwarding (logspout) (localhost:8000/logs)
|
||||
## - http/tcp/udp proxying (traefik) (localhost:8080/dashboard/)
|
||||
## - service discovery + dns + health-checks (consul) (localhost:8500/ui) (depends on registrator)
|
||||
## - time-series DB storage (influxdb) (localhost:8086)
|
||||
## - metric & event visualization (grafana) (localhost:3000)
|
||||
## http routing provided by traefik on `localhost/<route>` (see SERVICE_TAGS vars for routes)
|
||||
|
||||
version: '3'
|
||||
services:
|
||||
# logging - Logspount meant for log collection, not storage.
|
||||
# Could pair this with syslog/GELF into FluentD/Logstash/Kafka with indexing by Graylog/Elasticsearch/Solr/Splunk
|
||||
logspout:
|
||||
image: gliderlabs/logspout:master
|
||||
volumes: ['/var/run/docker.sock:/tmp/docker.sock:ro']
|
||||
networks: ['backend','frontend']
|
||||
ports: ['8000:80'] # logs avail at http://localhost:8000/logs
|
||||
environment:
|
||||
EXCLUDE_LABEL: logspout.exclude
|
||||
# registrator
|
||||
SERVICE_TAGS: 'traefik.enable=true,traefik.port=8000,traefik.docker.network=frontend,traefik.http.routers.router0.rule=PathPrefix(`/logs`)'
|
||||
|
||||
# service discovery (Consul + registrator)
|
||||
registrator:
|
||||
image: 'gliderlabs/registrator:master'
|
||||
depends_on: ['consul']
|
||||
networks: ['backend']
|
||||
volumes: ['/var/run/docker.sock:/tmp/docker.sock:ro']
|
||||
command: ["-internal=true", "consul://consul:8500"]
|
||||
consul:
|
||||
image: 'consul:1.6.4'
|
||||
restart: always
|
||||
networks: ['backend','frontend']
|
||||
ports:
|
||||
- '8500:8500' # web ui
|
||||
# - '8300:8300' # server rpc
|
||||
# - '8301:8301' # lan serf tcp
|
||||
# - '8301:8301/udp' # lan serf udp
|
||||
# - '8600:8600' # dns tcp
|
||||
# - '8600:8600/udp' # dns udp
|
||||
environment:
|
||||
# registrator
|
||||
SERVICE_TAGS: "traefik.enable=true,traefik.docker.network=frontend"
|
||||
labels:
|
||||
- logspount.exclude
|
||||
|
||||
# load balancing + routing
|
||||
traefik:
|
||||
image: traefik:2.2
|
||||
depends_on: ['registrator', 'consul']
|
||||
networks: ['backend','frontend']
|
||||
ports:
|
||||
- '8080:8080' # web ui
|
||||
- '80:80'
|
||||
command:
|
||||
# - "--log.level=DEBUG"
|
||||
- "--api.insecure=true"
|
||||
- "--api.dashboard=true"
|
||||
- "--providers.consulcatalog=true"
|
||||
- "--providers.consulcatalog.endpoint.address=http://consul:8500"
|
||||
- "--providers.consulcatalog.endpoint.datacenter=dc1"
|
||||
- "--providers.consulcatalog.cache=true"
|
||||
- "--providers.consulcatalog.exposedByDefault=false"
|
||||
labels:
|
||||
- logspount.exclude
|
||||
|
||||
#monitoring
|
||||
# TODO: Tracing - Zipkin/Jaeger
|
||||
|
||||
## TIG Stack - Telegraf + Influx + Grafana
|
||||
## This is functionally equivalent to just Prometheus + Grafana, but allows metric push & pull
|
||||
telegraf:
|
||||
image: telegraf:1.14-alpine
|
||||
restart: unless-stopped
|
||||
networks: ['backend', 'monitor']
|
||||
depends_on: ['influxdb']
|
||||
labels:
|
||||
- logspount.exclude
|
||||
volumes:
|
||||
- ./telegraf.conf:/etc/telegraf/telegraf.conf:ro
|
||||
# For docker stats
|
||||
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||
influxdb:
|
||||
image: influxdb:1.7-alpine
|
||||
restart: always
|
||||
ports: ['8086:8086']
|
||||
networks: ['monitor']
|
||||
labels:
|
||||
- logspount.exclude
|
||||
environment:
|
||||
INFLUXDB_DB: telegraf
|
||||
INFLUXDB_USER: telegraf
|
||||
INFLUXDB_USER_PASSWORD: 'compose'
|
||||
INFLUXDB_REPORTING_DISABLED: 'true'
|
||||
|
||||
# volumes:
|
||||
# - influxdb-volume:/var/lib/influxdb
|
||||
grafana:
|
||||
image: grafana/grafana:master
|
||||
depends_on: ['influxdb']
|
||||
ports: ["3000:3000"]
|
||||
networks: ['monitor', 'frontend']
|
||||
user: "0"
|
||||
labels:
|
||||
- logspount.exclude
|
||||
environment:
|
||||
GF_SECURITY_ADMIN_PASSWORD: 'compose'
|
||||
GF_USERS_ALLOW_SIGN_UP: 'false'
|
||||
# registrator
|
||||
SERVICE_TAGS: 'traefik.enable=true,traefik.docker.network=frontend,traefik.http.routers.router0.rule=PathPrefix(`/grafana`)'
|
||||
|
||||
## TODO - get loki working (in grafana)
|
||||
# loki:
|
||||
# image: grafana/loki:latest
|
||||
# depends_on: ['registrator', 'consul']
|
||||
# ports: ["3100:3100"]
|
||||
# command: ["-config.file=/etc/loki/local-config.yaml"]
|
||||
# networks: ['backend']
|
||||
# environment:
|
||||
# # LOGSPOUT: ignore
|
||||
# SERVICE_TAGS: "traefik.enable=true,traefik.http.routers.router0.rule=PathPrefix(`/loki`)"
|
||||
|
||||
|
||||
# Create a network for intrastructure components
|
||||
networks:
|
||||
backend:
|
||||
monitor:
|
||||
frontend:
|
||||
|
||||
# Create local persistent volumes
|
||||
volumes:
|
||||
grafana-volume:
|
||||
influxdb-volume:
|
153
ultimate-instrumentation/telegraf.conf
Normal file
153
ultimate-instrumentation/telegraf.conf
Normal file
@ -0,0 +1,153 @@
|
||||
# Configuration for telegraf agent
|
||||
[agent]
|
||||
interval = "10s"
|
||||
round_interval = true
|
||||
metric_batch_size = 1000
|
||||
metric_buffer_limit = 10000
|
||||
|
||||
collection_jitter = "3s"
|
||||
flush_interval = "10s"
|
||||
flush_jitter = "5s"
|
||||
|
||||
debug = false
|
||||
quiet = false
|
||||
logfile = "/var/log/telegraf/telegraf.log"
|
||||
logfile_rotation_interval = "0d"
|
||||
logfile_rotation_max_size = "1MB"
|
||||
logfile_rotation_max_archives = 5
|
||||
|
||||
hostname = ""
|
||||
|
||||
###############################################################################
|
||||
# OUTPUT PLUGINS #
|
||||
###############################################################################
|
||||
|
||||
# Configuration for sending metrics to InfluxDB
|
||||
[[outputs.influxdb]]
|
||||
urls = ["http://influxdb:8086"] # required
|
||||
database = "telegraf" # required
|
||||
username = "telegraf"
|
||||
password = "minitrue"
|
||||
## If true, the database tag will not be added to the metric.
|
||||
exclude_database_tag = false
|
||||
retention_policy = ""
|
||||
write_consistency = "any"
|
||||
timeout = "5s"
|
||||
## If true, no CREATE DATABASE queries will be sent. Set to true when using
|
||||
## Telegraf with a user without permissions to create databases or when the
|
||||
## database already exists.
|
||||
skip_database_creation = false
|
||||
|
||||
###############################################################################
|
||||
# INPUT PLUGINS #
|
||||
###############################################################################
|
||||
|
||||
# Read metrics about cpu usage
|
||||
[[inputs.cpu]]
|
||||
## Whether to report per-cpu stats or not
|
||||
percpu = true
|
||||
## Whether to report total system cpu stats or not
|
||||
totalcpu = true
|
||||
## If true, collect raw CPU time metrics.
|
||||
collect_cpu_time = false
|
||||
report_active = false
|
||||
|
||||
# Read metrics about disk usage by mount point
|
||||
[[inputs.disk]]
|
||||
## Ignore some mountpoints by filesystem type. For example (dev)tmpfs (usually
|
||||
## present on /run, /var/run, /dev/shm or /dev).
|
||||
ignore_fs = ["tmpfs", "devtmpfs", "devfs", "iso9660", "overlay", "aufs", "squashfs"]
|
||||
|
||||
# Read metrics about disk IO by device
|
||||
[[inputs.diskio]]
|
||||
## Setting devices will restrict the stats to the specified devices.
|
||||
# devices = ["sda", "sdb"]
|
||||
|
||||
# Get kernel statistics from /proc/stat
|
||||
[[inputs.kernel]]
|
||||
# no configuration
|
||||
|
||||
# Read metrics about memory usage
|
||||
[[inputs.mem]]
|
||||
# no configuration
|
||||
|
||||
# Get the number of processes and group them by status
|
||||
[[inputs.processes]]
|
||||
# no configuration
|
||||
|
||||
# Read metrics about swap memory usage
|
||||
[[inputs.swap]]
|
||||
# no configuration
|
||||
|
||||
# Read metrics about system load & uptime
|
||||
[[inputs.system]]
|
||||
## Uncomment to remove deprecated metrics.
|
||||
# fielddrop = ["uptime_format"]
|
||||
|
||||
[[inputs.internal]]
|
||||
collect_memstats = true
|
||||
|
||||
# # Read metrics about network interface usage
|
||||
[[inputs.net]]
|
||||
interfaces = ["eth*"]
|
||||
|
||||
# # Read metrics about docker containers
|
||||
[[inputs.docker]]
|
||||
endpoint = "unix:///var/run/docker.sock"
|
||||
timeout = "5s"
|
||||
|
||||
# Statsd Server
|
||||
[[inputs.statsd]]
|
||||
## Address and port to host UDP listener on
|
||||
service_address = ":8125"
|
||||
|
||||
## Percentiles to calculate for timing & histogram stats.
|
||||
percentiles = [50.0, 75.0, 99.0, 99.9]
|
||||
|
||||
## Delete gauges every interval (default=false)
|
||||
delete_gauges = true
|
||||
## Delete counters every interval (default=false)
|
||||
delete_counters = true
|
||||
|
||||
## separator to use between elements of a statsd metric
|
||||
metric_separator = "_"
|
||||
|
||||
## convert measurement names, “.” to “_” and “-” to “__”
|
||||
convert_names = false
|
||||
|
||||
## used to parse StatD variable name correctly for InfluxDB
|
||||
## it enables to have counters/gauges grouped by measurement
|
||||
templates = [
|
||||
"* measurement.field"
|
||||
]
|
||||
|
||||
## Parses extensions to statsd in the datadog statsd format
|
||||
## currently supports metrics and datadog tags.
|
||||
## http://docs.datadoghq.com/guides/dogstatsd/
|
||||
datadog_extensions = true
|
||||
|
||||
## Number of UDP messages allowed to queue up, once filled,
|
||||
## the statsd server will start dropping packets
|
||||
allowed_pending_messages = 10000
|
||||
|
||||
## Number of timing/histogram values to track per-measurement in the
|
||||
## calculation of percentiles. Raising this limit increases the accuracy
|
||||
## of percentiles but also increases the memory usage and cpu time.
|
||||
percentile_limit = 1000
|
||||
|
||||
|
||||
# [[inputs.docker_log]]
|
||||
# ## To use TCP, set endpoint = "tcp://[ip]:[port]"
|
||||
# ## To use environment variables (ie, docker-machine), set endpoint = "ENV"
|
||||
# endpoint = "unix:///var/run/docker.sock"
|
||||
|
||||
# ## When true, container logs are read from the beginning; otherwise
|
||||
# ## reading begins at the end of the log.
|
||||
# # from_beginning = false
|
||||
|
||||
# ## docker labels to include. Globs accepted.
|
||||
# ## Note that an empty array for both will include all labels as tags
|
||||
# docker_label_include = ["logcapture"]
|
||||
|
||||
# ## Set the source tag for the metrics to the container ID hostname, eg first 12 chars
|
||||
# source_tag = true
|
Loading…
Reference in New Issue
Block a user