官方文档:
功能:
- 多个prometheus聚合到一起监控
- 集中存储监控数据
- 统一报警
# my global config global: scrape_interval: 30s # 30秒更新一次 evaluation_interval: 2m #这个和报警想对应,2分钟报警从inactive到pending然后2分钟在到fire,期间收集到数据如果变正常,则不再触发报警 scrape_timeout: 30s #pull超时30秒,默认10S # Alertmanager configuration alerting: #Alertmanager 配置 alertmanagers: - static_configs: - targets: ["172.22.1.14:8080"] # 报警规则配置 rule_files: - "/etc/prometheus/rule.yml" # - "first_rules.yml" # - "second_rules.yml" # job配置 scrape_configs: # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config. - job_name: 'prometheus' #常规的监控服务器 # metrics_path defaults to '/metrics' # scheme defaults to 'http'. static_configs: - targets: ['127.0.0.1:9090'] - job_name: "alertmanager" static_configs: - targets: ['172.22.1.14:8080'] - job_name: 'federate-sdorica' #聚合监控,通过其他prometheus拉取数据 scrape_interval: 30s #30秒拉取1次 honor_labels: true #不覆盖原来的标签 metrics_path: '/federate' #采集路径这个不改 params: 'match[]': #筛选原来prometheus下面的标签 - '{job=~"kubernetes-.*"}' - '{job=~"traefik.*"}' static_configs: #原来prometheus的配置 - targets: ['1.1.1.1:30090'] labels: #给原来prometheus加一个标签,防止多了不知道是哪个集群的机器 k8scluster: sdorica-k8s - job_name: 'federate-soe.demon.hj5' scrape_interval: 30s honor_labels: true metrics_path: '/federate' params: 'match[]': - '{job=~"kubernetes-.*"}' static_configs: - targets: ['3.3.3.3:30090'] labels: k8scluster: soe-demon-hj5-k8s - job_name: 'federate-jcyfb.av' scrape_interval: 30s honor_labels: true metrics_path: '/federate' params: 'match[]': - '{job=~"kubernetes-.*"}' - '{job=~"traefik.*"}' static_configs: - targets: ['2.2.2.2:30090'] labels: k8scluster: jcyfb-av-k8s
评论区