k8s之prometheus部署

介绍

  • prometheus:天然支持监控k8s性能指标,主动抓取各个agent数据存储。
  • alert-feishu:飞书告警webhook
  • dingtalk:钉钉告警webhook
  • alertmanager:告警配置组件

部署

k8s集群外部署

参考文档: https://blog.csdn.net/yanggd1987/article/details/108807171

docker-compose.yaml 配置

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
version: '3'
services:
prometheus:
image: prom/prometheus
container_name: prometheus
restart: always
ports:
- "9090:9090"
volumes:
- /data/prometheus/etc/prometheus.yml:/etc/prometheus/prometheus.yml
- /data/prometheus:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--web.enable-admin-api' # 控制对admin HTTP API的访问,其中包括删除时间序列等功能
- '--web.enable-lifecycle' # 支持热更新,直接执行localhost:9090/-/reload立即生效
alertmanager:
image: prom/alertmanager
container_name: alertmanager
restart: always
ports:
- "9093:9093"
volumes:
- /data/prometheus/etc/alertmanager.yml:/etc/alertmanager/alertmanager.yml
dingtalk:
image: timonwong/prometheus-webhook-dingtalk:master
container_name: dingtalk
restart: always
ports:
- "8060:8060"
volumes:
- /data/prometheus/etc/config.yml:/etc/prometheus-webhook-dingtalk/config.yml
alert-feishu:
image: 10.0.1.120:8000/ops/alert:1.5
container_name: alert-feishu
restart: always
ports:
- "8888:8080"

prometheus.yml配置

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
global:
scrape_interval: 10s
evaluation_interval: 10s

# 告警配置
alerting:
alertmanagers:
- static_configs:
- targets: ["alertmanager:9093"]
rule_files:
- "/prometheus/etc/rules/*.yml"

# 监控k8s指标
scrape_configs:
# node监控
- job_name: kubernetes-node-exporter
metrics_path: /metrics
scheme: https
kubernetes_sd_configs:
- api_server: https://192.168.0.33:5443/
role: node
bearer_token_file: /prometheus/k8s_token
tls_config:
insecure_skip_verify: true
bearer_token_file: /prometheus/k8s_token
tls_config:
insecure_skip_verify: true
relabel_configs:
- separator: ;
regex: __meta_kubernetes_node_label_(.+)
replacement: $1
action: labelmap
- separator: ;
regex: (.*)
target_label: __address__
replacement: 192.168.0.33:5443
action: replace
- source_labels: [__meta_kubernetes_node_name]
separator: ;
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}:9100/proxy/metrics
action: replace
# pod监控
- job_name: 'kubernetes-pods'
scheme: https
kubernetes_sd_configs:
- api_server: https://192.168.0.33:5443/
role: pod
bearer_token_file: /prometheus/k8s_token
tls_config:
insecure_skip_verify: true
tls_config:
insecure_skip_verify: true
bearer_token_file: /prometheus/k8s_token
relabel_configs:
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: kubernetes_pod_name
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
target_label: __address__
- source_labels: [__address__]
separator: ;
regex: '.*:(.*)'
target_label: __pod_port__
replacement: $1
action: replace
- source_labels: [__meta_kubernetes_namespace,__meta_kubernetes_pod_name, __pod_port__]
separator: ;
regex: (.*);(.*);(.*)
target_label: __metrics_path__
replacement: /api/v1/namespaces/$1/pods/$2:$3/proxy/metrics
action: replace
- source_labels: [__address__]
separator: ;
regex: (.*)
target_label: __address__
replacement: 192.168.0.33:5443
action: replace
# apiservers监控
- job_name: kubernetes-apiservers
metrics_path: /metrics
scheme: https
kubernetes_sd_configs:
- api_server: https://192.168.0.33:5443/ #从阿里云Kubernetes集群管理页面获取“API Server 公网或内网连接端点”
role: endpoints
bearer_token_file: /prometheus/k8s_token #在Prometheus“参数配置”页面中设置的token文件名
tls_config:
insecure_skip_verify: true
bearer_token_file: /prometheus/k8s_token
tls_config:
insecure_skip_verify: true
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
separator: ;
regex: default;kubernetes;https
replacement: $1
action: keep
- separator: ;
regex: (.*)
target_label: __address__
replacement: 192.168.0.33:5443
action: replace
# Node-Kubelet
- job_name: kubernetes-node-kubelet
metrics_path: /metrics
scheme: https
kubernetes_sd_configs:
- api_server: https://192.168.0.33:5443/
role: node
bearer_token_file: /prometheus/k8s_token
tls_config:
insecure_skip_verify: true
bearer_token_file: /prometheus/k8s_token
tls_config:
insecure_skip_verify: true
relabel_configs:
- separator: ;
regex: __meta_kubernetes_node_label_(.+)
replacement: $1
action: labelmap
- separator: ;
regex: (.*)
target_label: __address__
replacement: 192.168.0.33:5443
action: replace
- source_labels: [__meta_kubernetes_node_name]
separator: ;
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}:10255/proxy/metrics
action: replace
# cAdvisor
- job_name: kubernetes-cadvisor
metrics_path: /metrics
scheme: https
kubernetes_sd_configs:
- api_server: https://192.168.0.33:5443/
role: node
bearer_token_file: /prometheus/k8s_token
tls_config:
insecure_skip_verify: true
bearer_token_file: /prometheus/k8s_token
tls_config:
insecure_skip_verify: true
relabel_configs:
- separator: ;
regex: __meta_kubernetes_node_label_(.+)
replacement: $1
action: labelmap
- separator: ;
regex: (.*)
target_label: __address__
replacement: 192.168.0.33:5443
action: replace
- source_labels: [__meta_kubernetes_node_name]
separator: ;
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
action: replace
## Service
# - job_name: 'kubernetes-services'
# scheme: https
# kubernetes_sd_configs:
# - api_server: https://192.168.0.33:5443/
# role: service
# bearer_token_file: /prometheus/k8s_token
# tls_config:
# insecure_skip_verify: true
# tls_config:
# insecure_skip_verify: true
# bearer_token_file: /prometheus/k8s_token
# relabel_configs:
# - separator: ;
# regex: __meta_kubernetes_service_label_(.+)
# replacement: $1
# action: labelmap
# - source_labels: [__address__]
# separator: ;
# regex: '.*:(.*)'
# target_label: __service_port__
# replacement: $1
# action: replace
# - source_labels: [__meta_kubernetes_namespace,__meta_kubernetes_service_name, __service_port__]
# separator: ;
# regex: (.*);(.*);(.*)
# target_label: __metrics_path__
# replacement: /api/v1/namespaces/$1/services/$2:$3/proxy/metrics
# action: replace
# - source_labels: [__address__]
# separator: ;
# regex: (.*)
# target_label: __address__
# replacement: 192.168.0.33:5443
# action: replace
## Endpoint
# - job_name: kubernetes-endpoints
# scheme: https
# metrics_path: /metrics
# kubernetes_sd_configs:
# - role: endpoints
# - api_server: https://192.168.0.33:5443/
# role: endpoints
# bearer_token_file: /prometheus/k8s_token
# tls_config:
# insecure_skip_verify: true
# bearer_token_file: /prometheus/k8s_token
# tls_config:
# insecure_skip_verify: true
# relabel_configs:
# - separator: ;
# regex: __meta_kubernetes_service_label_(.+)
# replacement: $1
# action: labelmap
# - source_labels: [__meta_kubernetes_namespace]
# separator: ;
# regex: (.*)
# target_label: kubernetes_namespace
# replacement: $1
# action: replace
# - source_labels: [__meta_kubernetes_endpoints_name]
# separator: ;
# regex: (.*)
# target_label: kubernetes_endpoint_name
# replacement: $1
# action: replace
# - source_labels: [__address__]
# separator: ;
# regex: '.*:(.*)'
# target_label: __service_port__
# replacement: $1
# action: replace
# - source_labels: [__meta_kubernetes_namespace,__meta_kubernetes_endpoint_address_target_name, __service_port__]
# separator: ;
# regex: (.*);(.*);(.*)
# target_label: __metrics_path__
# replacement: /api/v1/namespaces/$1/pods/$2:$3/proxy/metrics
# action: replace
# - source_labels: [__address__]
# separator: ;
# regex: (.*)
# target_label: __address__
# replacement: 192.168.0.33:5443
# action: replace

# 数据上报
- job_name: pushgateway
static_configs:
- targets: ['pushgateway:9091']
labels:
instance: pushgateway

alertmanager.yml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
global:
# 在指定时间内没有新的事件就发送恢复通知
resolve_timeout: 5m
route:
# 默认的接收器名称
receiver: 'webhook_mention_all'
# 在组内等待所配置的时间,如果同组内,30秒内出现相同报警,在一个组内出现。
group_wait: 30s
# # 如果组内内容不变化,5m后发送。
group_interval: 5m
# 发送报警间隔,如果指定时间内没有修复,则重新发送报警
repeat_interval: 24h
# # 报警分组,根据 prometheus 的 lables 进行报警分组,这些警报会合并为一个通知发送给接收器,也就是警报分组。
group_by: ['alertname']
routes:
- receiver: 'webhook_mention_all'
group_wait: 10s
receivers:
- name: 'webhook_mention_all'
webhook_configs:
# 钉钉报警
- url: 'http://dingtalk:8060/dingtalk/webhook_mention_all/send'
send_resolved: true
# 飞书报警
- url: 'http://alert-feishu:8080/Alter'

config.yml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
targets:
webhook1:
url: https://oapi.dingtalk.com/robot/send?access_token=cb8ddfe0377c28a4294439e3a9b47588aaccbde11054a1fxxxxxxxxxxxxxx
# secret for signature
secret: SEC000000000000000000000
webhook2:
url: https://oapi.dingtalk.com/robot/send?access_token=cb8ddfe0377c28a4294439e3a9b47588aaccbde11054a1fxxxxxxxxxxxxxx
mention:
mobiles: ['13685060xxx']
webhook_legacy:
url: https://oapi.dingtalk.com/robot/send?access_token=cb8ddfe0377c28a4294439e3a9b47588aaccbde11054a1fxxxxxxxxxxxxxx
# Customize template content
message:
# Use legacy template
title: '{{ template "legacy.title" . }}'
text: '{{ template "legacy.content" . }}'
webhook_mention_all:
url: https://oapi.dingtalk.com/robot/send?access_token=cb8ddfe0377c28a4294439e3a9b47588aaccbde11054a1fxxxxxxxxxxxxxx
mention:
all: true
webhook_mention_users:
url: https://oapi.dingtalk.com/robot/send?access_token=cb8ddfe0377c28a4294439e3a9b47588aaccbde11054a1fxxxxxxxxxxxxxx
mention:
mobiles: ['13685060xxx', '13675840xxx']

rules告警配置

hpa.yml
1
2
3
4
5
6
7
8
9
10
groups:
- name: hpa-rule
rules:
- alert: 'k8s pod 伸缩告警'
expr: (sum by (horizontalpodautoscaler) (kube_horizontalpodautoscaler_status_current_replicas - kube_horizontalpodautoscaler_spec_min_replicas)) > 0
for: 1m
labels:
team: pod
annotations:
description: "{{$labels.horizontalpodautoscaler}}: 扩容中 (当前值是: {{ $value }})"
pod.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
groups:
- name: hpa-rule
rules:
- alert: 'k8s pod 伸缩告警'
expr: (sum by (horizontalpodautoscaler) (kube_horizontalpodautoscaler_status_current_replicas - kube_horizontalpodautoscaler_spec_min_replicas)) > 0
for: 1m
labels:
team: pod
annotations:
description: "{{$labels.horizontalpodautoscaler}}: 扩容中 (当前值是: {{ $value }})"
[root@ecs-d70c rules]# cat pod.yml
groups:
- name: pod-rule
rules:
- alert: 'k8s pod CPU使用率告警'
#expr: (sum (rate (container_cpu_usage_seconds_total{image!="",container!="POD"}[1m])) by(name) /( sum (container_spec_cpu_quota) by(name) /100000) * 100) > 80
expr: (sum (rate (container_cpu_usage_seconds_total{image!="",container!~"((POD)|(mc-raise-kitten-server-front))"}[1m])) by(name) /( sum (container_spec_cpu_quota) by(name) /100000) * 100) > 80
for: 1m
labels:
team: pod
annotations:
summary: "{{$labels.name}}: CPU使用率高"
description: "{{$labels.name}}: CPU使用率超过80% (当前值是: {{ $value }})"

- alert: 'k8s pod 内存使用率告警'
expr: (sum by(name) (container_memory_working_set_bytes/container_spec_memory_limit_bytes{container!~"((POD)|(icagent)|(virtual-kubelet)|(resource-syncer)|(virtual-kubelet-proxy)|(virtual-kubelet-webhook))",image!=""} *100)) > 90
for: 1m
labels:
team: pod
annotations:
summary: "{{$labels.name}}: 内存使用率高"
description: "{{$labels.name}}: 内存使用率超过90% (当前值是: {{ $value }})"

- alert: 'k8s pod 重启告警'
expr: (sum by (namespace,pod) (changes(kube_pod_container_status_restarts_total{instance="192.168.0.33:5443",job="kubernetes-pods"}[5m]))) > 0
for: 1m
labels:
team: pod
annotations:
description: "{{$labels.namespace}}-{{$labels.pod}}: 重启 (当前值是: {{ $value }})"

# - alert: 'k8s pod 流量使用率告警'
# expr: sort_desc(sum by (pod) (rate (container_network_receive_bytes_total[1m]) )) / 1024 / 1024 > 100
# for: 1m
# labels:
# team: pod
# annotations:
# summary: "{{$labels.name}}: 流量使用率高"
# description: "{{$labels.name}}: 内存使用率大于100MB (当前值是: {{ $value }} MiB)"

- alert: 'k8s pod 启动失败告警'
expr: (sum by(namespace,pod) (kube_pod_status_phase{instance="192.168.0.33:5443",phase=~"Failed|Unknown"})) == 1
for: 1m
labels:
team: pod
annotations:
description: "{{$labels.namespace}}-{{$labels.pod}}: 启动失败 (当前值是: {{ $value }})"

grafana配置

面板variables配置

  • date
  • namespace
  • podname

pod cpu使用量

1
sum by (pod)( rate(container_cpu_usage_seconds_total{image!="",container!="POD",namespace="$namespace",name=~".*$podname.*"}[1m] ) )*1000

pod mem使用量

1
sum by(pod) (container_memory_working_set_bytes{namespace="$namespace",container!="POD",image!="",name=~".*$podname.*"})

pod 流量

1
sort_desc(sum by (pod) (rate (container_network_receive_bytes_total{namespace="$namespace",name=~".*$podname.*"}[1m]) ))

pod 套接字

1
sum by (pod)(container_sockets{namespace="$namespace",pod=~".*$podname.*"})

k8s集群部署

使用prometheus-operator安装,实际配置监控项,告警规则等都通过k8s的CRD资源动态创建,方便管理维护。

组件说明:

  • Operator: 根据自定义资源(Custom Resource Definition / CRDs)来部署和管理 Prometheus Server,同时监控这些自定义资源事件的变化来做相应的处理,是整个系统的控制中心。
  • Prometheus:声明 Prometheus deployment 期望的状态,Operator 确保这个 deployment 运行时一直与定义保持一致。
  • Prometheus Server: Operator 根据自定义资源 Prometheus 类型中定义的内容而部署的 Prometheus Server 集群,这些自定义资源可以看作是用来管理 Prometheus Server 集群的 StatefulSets 资源。
  • ServiceMonitor:声明指定监控的服务,描述了一组被 Prometheus 监控的目标列表。该资源通过 Labels 来选取对应的 Service Endpoint,让 Prometheus Server 通过选取的 Service 来获取 Metrics 信息。
  • Service:简单的说就是 Prometheus 监控的对象。
  • Alertmanager:定义 AlertManager deployment 期望的状态,Operator 确保这个 deployment 运行时一直与定义保持一致。

安装

1
2
3
4
5
6
wget https://github.com/prometheus-operator/kube-prometheus/archive/refs/tags/v0.7.0.tar.gz

tar xf v0.7.0.tar.gz
cd kube-prometheus-0.7.0
kubectl apply --server-side -f manifests/setup
kubectl apply --server-side -f manifest

卸载

1
kubectl delete --ignore-not-found=true -f manifests/ -f manifests/setup

添加外部服务监控项

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
kind: Service
apiVersion: v1
metadata:
name: pushgateway
namespace: default
labels:
k8s-app: pushgateway
spec:
type: ClusterIP
ports:
- name: gateport
port: 31247

---
kind: Endpoints
apiVersion: v1
metadata:
name: pushgateway
namespace: default
labels:
k8s-app: pushgateway
subsets:
- addresses:
- ip: 10.0.1.40
ports:
- name: gateport
port: 31247

---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor #添加Prometheus监控项
metadata:
labels:
k8s-app: pushgateway
name: pushgateway
namespace: monitoring
spec:
jobLabel: k8s-app
endpoints:
- interval: 30s
port: gateport
selector:
matchLabels:
k8s-app: pushgateway
namespaceSelector:
matchNames:
- default

---
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule #添加告警配置
metadata:
labels: #必须添加以下两个标签,否则配置无法使用
prometheus: k8s
role: alert-rules
name: external-pushgateway-rules
namespace: monitoring
spec:
groups:
- name: external-pushgateway
rules:
- alert: external-pushgateway
annotations:
message: zhi da yu 10
expr: |
some_metric > 10
for: 1m
labels:
severity: warning

添加钉钉告警

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: dingtalk
namespace: monitoring
spec:
progressDeadlineSeconds: 600
replicas: 1
revisionHistoryLimit: 10
selector:
matchLabels:
k8s.kuboard.cn/layer: ''
k8s.kuboard.cn/name: dingtalk
strategy:
rollingUpdate:
maxSurge: 25%
maxUnavailable: 25%
type: RollingUpdate
template:
metadata:
labels:
k8s.kuboard.cn/layer: ''
k8s.kuboard.cn/name: dingtalk
spec:
containers:
- image: 'timonwong/prometheus-webhook-dingtalk:v2.1.0'
imagePullPolicy: IfNotPresent
name: dingtalk
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /etc/prometheus-webhook-dingtalk/config.yml
name: conf
subPath: config.yml
dnsPolicy: ClusterFirst
restartPolicy: Always
schedulerName: default-scheduler
terminationGracePeriodSeconds: 30
volumes:
- configMap:
defaultMode: 420
items:
- key: config.yml
path: config.yml
name: dingtalk-conf
name: conf

---
apiVersion: v1
kind: Service
metadata:
name: dingtalk
namespace: monitoring
spec:
ports:
- name: jrpdpg
port: 8060
protocol: TCP
targetPort: 8060
selector:
k8s.kuboard.cn/layer: ''
k8s.kuboard.cn/name: dingtalk
sessionAffinity: None
type: ClusterIP

---
apiVersion: v1
data:
config.yml: |-
targets:
webhook_mention_all:
url: https://oapi.dingtalk.com/robot/send?access_token=cb8ddfe0377c28a4294439e3a9b47588aaccbde11054a1f0xxxxxxxxxxxxxxxx
mention:
all: true
kind: ConfigMap
metadata:
name: dingtalk-conf
namespace: monitoring

添加飞书告警

使用go语言编译,打包成docker 镜像

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
package main

import (
"bytes"
"encoding/json"
"fmt"
"io/ioutil"
"log"
"net/http"
"strings"
"time"

"github.com/gin-gonic/gin"
)

type Message struct {
MsgType string `json:"msg_type"`
Content struct {
Text string `json:"text"`
} `json:"content"`
}

type Alert struct {
Labels map[string]string `json:"labels"`
Annotations map[string]string `json:annotations`
StartsAt time.Time `json:"startsAt"`
EndsAt time.Time `json:"endsAt"`
}

type Notification struct {
Version string `json:"version"`
GroupKey string `json:"groupKey"`
Status string `json:"status"`
Receiver string `json:receiver`
GroupLabels map[string]string `json:groupLabels`
CommonLabels map[string]string `json:commonLabels`
CommonAnnotations map[string]string `json:commonAnnotations`
ExternalURL string `json:externalURL`
Alerts []Alert `json:alerts`
}

var defaultRobot = "https://open.feishu.cn/open-apis/bot/v2/hook/7f52c85b-7b0c-415f-8428-5b548b3ee973"

// 告警接收人
func SendMessage(notification Notification, defaultRobot string) {
msg, err := json.Marshal(notification.GroupLabels["alertname"])
if err != nil {
log.Println("notification.GroupLabels Marshal failed,", err)
return
}

msg1, err := json.Marshal(notification.CommonAnnotations["description"])
if err != nil {
log.Println("notification.CommonAnnotations Marshal failed,", err)
return
}

msg2, err := json.Marshal(notification.CommonLabels["instance"])
if err != nil {
log.Println("notification.CommonAnnotations Marshal failed,", err)
return
}

// 告警消息
var buffer bytes.Buffer
buffer.WriteString(fmt.Sprintf("%v\n", string(msg)))
buffer.WriteString(fmt.Sprintf("告警主机: %v\n", string(msg2)))
buffer.WriteString(fmt.Sprintf("告警详情: %v\n", string(msg1)))
buffer.WriteString(fmt.Sprintf("<at user_id=\"all\">所有人</at>"))
// 恢复消息
var buffer2 bytes.Buffer
buffer2.WriteString(fmt.Sprintf("告警恢复: %v\n", string(msg)))
buffer2.WriteString(fmt.Sprintf("告警主机: %v\n", string(msg2)))
buffer2.WriteString(fmt.Sprintf("告警详情: %v\n", string(msg1)))
buffer2.WriteString(fmt.Sprintf("<at user_id=\"all\">所有人</at>"))

var m Message
m.MsgType = "text"
if notification.Status == "resolved" {
m.Content.Text = buffer2.String()
} else if notification.Status == "firing" {
m.Content.Text = buffer.String()
}

jsons, err := json.Marshal(m)
if err != nil {
log.Println("SendMessage Marshal failed,", err)
return
}

resp := string(jsons)
client := &http.Client{}

req, err := http.NewRequest("POST", defaultRobot, strings.NewReader(resp))
if err != nil {
log.Println("SendMessage http NewRequest failed,", err)
return
}

req.Header.Set("Content-Type", "application/json")
r, err := client.Do(req)
if err != nil {
log.Println("SendMessage client Do failed", err)
return
}

defer r.Body.Close()
body, err := ioutil.ReadAll(r.Body)
if err != nil {
log.Println("SendMessage ReadAll Body failed", err)
return
}

log.Println("SendMessage success,body:", string(body))
}

func Alter(c *gin.Context) {
var notification Notification

err := c.BindJSON(&notification)
// fmt.Printf("%#v", notification)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
SendMessage(notification, defaultRobot)
}

func main() {
t := gin.Default()
t.POST("/Alter", Alter)
t.Run(":8080")
}

飞书dockerfile

1
2
3
4
5
6
7
8
9
10
11
FROM alpine:latest

RUN mkdir /app
WORKDIR /app

ADD alert-feishu /app
RUN chmod +x alert-feishu

EXPOSE 8080

CMD ["./alert-feishu"]

本博客所有文章除特别声明外,均采用 CC BY-SA 4.0 协议 ,转载请注明出处!