prometheus
文章目录
node-exporter
# for test
docker run --rm --name node-exporter \
-v "/proc:/host/proc" -v "/sys:/host/sys" -v "/:/rootfs" --net="host" prom/node-exporter:v1.0.0-rc.0 --path.procfs /host/proc --path.sysfs /host/proc --collector.filesystem.ignored-mount-points "^/(sys|proc|dev|host|etc)($|/)"
# for daemon
docker run -d --name node-exporter \
--restart always \
-v "/proc:/host/proc" -v "/sys:/host/sys" -v "/:/rootfs" --net="host" prom/node-exporter:v1.0.0-rc.0 --path.procfs /host/proc --path.sysfs /host/proc --collector.filesystem.ignored-mount-points "^/(sys|proc|dev|host|etc)($|/)"
blackbox-exporter
docker run -d \
--name blackbox-exporter \
--restart always \
-p 9115:9115 \
prom/blackbox-exporter:v0.18.0
prometheus
mkdir -p $(pwd)/prometheus/alert.rules.d
# docker run -it --rm \
docker run -d --name prometheus \
--restart always \
-p 19090:9090 \
-v $(pwd)/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml \
-v $(pwd)/prometheus/alert.rules.d:/etc/prometheus/alert.rules.d \
prom/prometheus:v2.22.1 --config.file=/etc/prometheus/prometheus.yml --storage.tsdb.retention.time=10d --web.enable-lifecycle
alertmanager
mkdir -p $(pwd)/alertmanager/template
cat >$(pwd)/alertmanager/alertmanager.yml <<EOL
global:
resolve_timeout: 5m
route:
group_by: ['alertname']
group_wait: 10s
group_interval: 10s
repeat_interval: 1h
receiver: 'web.hook'
receivers:
- name: 'web.hook'
webhook_configs:
- url: 'http://192.168.122.37:19000/hooks/redeploy-webhook'
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'dev', 'instance']
EOL
docker run -d --name alertmanager-1 --restart always \
-v $(pwd)/alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml \
-v $(pwd)/alertmanager/template:/etc/alertmanager/template \
-p 19193:9193 \
-p 19194:9194 \
prom/alertmanager:v0.20.0 \
--config.file=/etc/alertmanager/alertmanager.yml \
--storage.path=/alertmanager \
--web.listen-address=":9193" \
--cluster.listen-address="0.0.0.0:9194"
docker run -d --name alertmanager-2 --restart always \
-v $(pwd)/alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml \
-v $(pwd)/alertmanager/template:/etc/alertmanager/template \
-p 19293:9193 \
-p 19294:9194 \
prom/alertmanager:v0.20.0 \
--config.file=/etc/alertmanager/alertmanager.yml \
--storage.path=/alertmanager \
--web.listen-address=":9193" \
--cluster.listen-address="0.0.0.0:9194" \
--cluster.peer="192.168.122.37:19194"
docker run -d --name alertmanager-3 --restart always \
-v $(pwd)/alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml \
-v $(pwd)/alertmanager/template:/etc/alertmanager/template \
-p 19393:9193 \
-p 19394:9194 \
prom/alertmanager:v0.20.0 \
--config.file=/etc/alertmanager/alertmanager.yml \
--storage.path=/alertmanager \
--web.listen-address=":9193" \
--cluster.listen-address="0.0.0.0:9194" \
--cluster.peer="192.168.122.37:19194"
cat >$(pwd)/data.json <<EOL
[
{
"labels": {
"alertname": "DiskRunningFull",
"dev": "sda1",
"instance": "example1"
},
"annotations": {
"info": "The disk sda1 is running full",
"summary": "please check the instance example1"
}
},
{
"labels": {
"alertname": "DiskRunningFull",
"dev": "sdb2",
"instance": "example2"
},
"annotations": {
"info": "The disk sdb2 is running full",
"summary": "please check the instance example2"
}
},
{
"labels": {
"alertname": "DiskRunningFull",
"dev": "sda1",
"instance": "example3",
"severity": "critical"
}
},
{
"labels": {
"alertname": "DiskRunningFull",
"dev": "sda1",
"instance": "example3",
"severity": "warning"
}
}
]
EOL
curl -XPOST --data @data.json http://192.168.122.37:19193/api/v1/alerts
webhook
docker run --name ding -d \
--restart always \
-p18060:8060 \
-v $(pwd)/prometheus-webhook-dingtalk/config.yml:/etc/prometheus-webhook-dingtalk/config.yml \
timonwong/prometheus-webhook-dingtalk:v1.4.0 \
--config.file=/etc/prometheus-webhook-dingtalk/config.yml \
--web.enable-ui \
--web.enable-lifecycle \
--log.level=debug
docker run -d -p 19000:9000 --name=webhook -v $(pwd)/webhook:/etc/webhook almir/webhook:2.6.11 -verbose -hooks=/etc/webhook/hooks.json -hotreload
cat >$(pwd)/webhook/hooks.json <<EOL
[
{
"id": "redeploy-webhook",
"execute-command": "/etc/webhook/redeploy.sh",
"command-working-directory": "/tmp",
"response-message": "I got the payload!"
}
]
EOL
# https://github.com/adnanh/webhook
# https://github.com/adnanh/webhook/blob/master/docs/Hook-Examples.md
grafana
# 默认用户名密码为admin/admin
docker run -d --name=grafana -p 23000:3000 --restart=always grafana/grafana:6.6.2
# https://grafana.com/grafana/dashboards/8919
# https://grafana.com/grafana/dashboards/179
cadvisor
# cadvisor
docker run \
--volume=/:/rootfs:ro \
--volume=/var/run:/var/run:ro \
--volume=/sys:/sys:ro \
--volume=/var/lib/docker/:/var/lib/docker:ro \
--volume=/dev/disk/:/dev/disk:ro \
--publish=8080:8080 \
--detach=true \
--name=cadvisor \
gcr.azk8s.cn/google_containers/cadvisor:v0.36.0
troubleshooting
yaml: unmarshal errors:\n line 51: field lables not found in type struct { Targets []string \"yaml:\\\"targets\\\"\"; Labels model.LabelSet \"yaml:\\\"labels\\\"\" }"
level=error ts=2020-11-09T13:48:14.875Z caller=main.go:290 msg="Error loading config (--config.file=/etc/prometheus/prometheus.yml)" err="parsing YAML file /etc/prometheus/prometheus.yml: yaml: unmarshal errors:\n line 51: field lables not found in type struct { Targets []string \"yaml:\\\"targets\\\"\"; Labels model.LabelSet \"yaml:\\\"labels\\\"\" }"
curl "http://192.168.122.37:9115/probe?module=icmp&target=223.5.5.5"
# HELP probe_dns_lookup_time_seconds Returns the time taken for probe dns lookup in seconds
# TYPE probe_dns_lookup_time_seconds gauge
probe_dns_lookup_time_seconds 7.522e-06
# HELP probe_duration_seconds Returns how long the probe took to complete in seconds
# TYPE probe_duration_seconds gauge
probe_duration_seconds 0.005357035
# HELP probe_icmp_duration_seconds Duration of icmp request by phase
# TYPE probe_icmp_duration_seconds gauge
probe_icmp_duration_seconds{phase="resolve"} 7.522e-06
probe_icmp_duration_seconds{phase="rtt"} 0.005181756
probe_icmp_duration_seconds{phase="setup"} 5.5147e-05
# HELP probe_icmp_reply_hop_limit Replied packet hop limit (TTL for ipv4)
# TYPE probe_icmp_reply_hop_limit gauge
probe_icmp_reply_hop_limit 115
# HELP probe_ip_addr_hash Specifies the hash of IP address. It's useful to detect if the IP address changes.
# TYPE probe_ip_addr_hash gauge
probe_ip_addr_hash 2.744926669e+09
# HELP probe_ip_protocol Specifies whether probe ip protocol is IP4 or IP6
# TYPE probe_ip_protocol gauge
probe_ip_protocol 4
# HELP probe_success Displays whether or not the probe was a success
# TYPE probe_success gauge
probe_success 1
curl "http://192.168.122.37:9115/probe?module=tcp_connect&target=223.5.5.5:53"
# HELP probe_dns_lookup_time_seconds Returns the time taken for probe dns lookup in seconds
# TYPE probe_dns_lookup_time_seconds gauge
probe_dns_lookup_time_seconds 7.603e-06
# HELP probe_duration_seconds Returns how long the probe took to complete in seconds
# TYPE probe_duration_seconds gauge
probe_duration_seconds 0.005699097
# HELP probe_failed_due_to_regex Indicates if probe failed due to regex
# TYPE probe_failed_due_to_regex gauge
probe_failed_due_to_regex 0
# HELP probe_ip_addr_hash Specifies the hash of IP address. It's useful to detect if the IP address changes.
# TYPE probe_ip_addr_hash gauge
probe_ip_addr_hash 2.744926669e+09
# HELP probe_ip_protocol Specifies whether probe ip protocol is IP4 or IP6
# TYPE probe_ip_protocol gauge
probe_ip_protocol 4
# HELP probe_success Displays whether or not the probe was a success
# TYPE probe_success gauge
probe_success 1
curl "http://192.168.122.37:9115/probe?module=tcp_connect&target=223.5.5.5:5555"
# HELP probe_dns_lookup_time_seconds Returns the time taken for probe dns lookup in seconds
# TYPE probe_dns_lookup_time_seconds gauge
probe_dns_lookup_time_seconds 8.187e-06
# HELP probe_duration_seconds Returns how long the probe took to complete in seconds
# TYPE probe_duration_seconds gauge
probe_duration_seconds 31.702927304
# HELP probe_failed_due_to_regex Indicates if probe failed due to regex
# TYPE probe_failed_due_to_regex gauge
probe_failed_due_to_regex 0
# HELP probe_ip_addr_hash Specifies the hash of IP address. It's useful to detect if the IP address changes.
# TYPE probe_ip_addr_hash gauge
probe_ip_addr_hash 2.744926669e+09
# HELP probe_ip_protocol Specifies whether probe ip protocol is IP4 or IP6
# TYPE probe_ip_protocol gauge
probe_ip_protocol 4
# HELP probe_success Displays whether or not the probe was a success
# TYPE probe_success gauge
probe_success 0
ref
- collectd
- statsd
- QUERYING PROMETHEUS
- Blackbox exporter configuration
- Blackbox exporter configuration Exmaple
- Prometheus 实战
- google-containers
- google/cadvisor: Analyzes resource usage and performance characteristics of running containers.
- Alertmanager高可用
- 搞搞 Prometheus: Alertmanager
- https://github.com/prometheus/alertmanager/issues/964
- https://github.com/prometheus/alertmanager/issues/1125
- https://www.lijiaocn.com/%E9%97%AE%E9%A2%98/2018/08/03/prometheus-problem.html
上次更新 2019-02-15
原始文档 查看本文 Markdown 版本 »