部署
docker-compose.yml
version: '3'
services:
prometheus:
image: prom/prometheus:latest
container_name: prometheus
hostname: prometheus
restart: always
user: "1000:1000" # 使用适当的用户ID
volumes:
- /etc/localtime:/etc/localtime:ro
- /app/minio/prometheus:/prometheus
- ./prometheus.yml:/prometheus/prometheus.yml
- ./prometheus.yml:/etc/prometheus/prometheus.yml
- ./node_down.yml:/etc/prometheus/node_down.yml
- ./web.config.yml:/etc/prometheus/web.config.yml # 挂载认证配置
ports:
- "9090:9090"
command:
- '--web.config.file=/etc/prometheus/web.config.yml'
- '--web.enable-remote-write-receiver'
node-exporter:
image: node-exporter:latest
container_name: node-exporter
hostname: node-exporter
restart: always
ports:
- "9100:9100"
prometheus.yml
global:
scrape_interval: 15s # 设置间隔15s,默认1分钟.
evaluation_interval: 15s # 每15秒评估一次规则, 默认1分钟.
# 加载规则一次,并根据全局规则定期对其进行评估'evaluation_interval'.
rule_files:
- "node_down.yml"
# - "first_rules.yml"
# - "second_rules.yml"
# 监控配置:
scrape_configs:
- job_name: 'minio1'
static_configs:
- targets: ['{ip:port}']
- job_name: 'minio2'
scrape_interval: 8s
static_configs:
- targets: ['{ip:port}']
node_down.yml
groups:
- name: node_down
rules:
- alert: InstanceDown
expr: up == 0
for: 1m
labels:
user: test
annotations:
summary: "Instance {{ $labels.instance }} down"
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes."
web.config.yml
htpasswd -nBC 12 '' | tr -d ':\n'
将生成的密码填入到下方即可
basic_auth_users:
admin: {passwd}
评论区