-
Notifications
You must be signed in to change notification settings - Fork 108
/
Copy pathhealth_checks.go
190 lines (160 loc) · 6.27 KB
/
health_checks.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
package rabbithole
import (
"encoding/json"
"net/http"
"strconv"
)
type TimeUnit string
const (
SECONDS TimeUnit = "seconds"
DAYS TimeUnit = "days"
MONTHS TimeUnit = "months"
YEARS TimeUnit = "years"
)
type Protocol string
const (
AMQP Protocol = "amqp"
AMQPS Protocol = "amqp/ssl"
AMQP091 Protocol = "amqp091"
AMQP10 Protocol = "amqp10"
MQTT Protocol = "mqtt"
STOMP Protocol = "stomp"
WebMQTT Protocol = "web-mqtt"
WebSTOMP Protocol = "web-stomp"
HTTP Protocol = "http"
HTTPS Protocol = "https"
Prometheus Protocol = "http/prometheus"
Clustering Protocol = "clustering"
)
// HealthCheck represents a generic health check endpoint response
// Related RabbitMQ doc guide: https://www.rabbitmq.com/monitoring.html
type HealthCheck interface {
// Returns true if the check is ok, otherwise false
Ok() bool
}
// HealthCheckStatus represents a generic health check endpoint response
// Related RabbitMQ doc guide: https://www.rabbitmq.com/monitoring.html
type HealthCheckStatus struct {
HealthCheck
Status string `json:"status"`
Reason string `json:"reason,omitempty"`
}
// Ok returns true if the health check succeeded
func (h *HealthCheckStatus) Ok() bool {
return h.Status == "ok"
}
// AlarmInEffect represents a resource alarm in effect on a node
type AlarmInEffect struct {
Node string `json:"node"`
Resource string `json:"resource"`
}
// ResourceAlarmCheckStatus represents the response from HealthCheckALarms
type ResourceAlarmCheckStatus struct {
HealthCheck
Status string `json:"status"`
Reason string `json:"reason,omitempty"`
Alarms []AlarmInEffect `json:"alarms,omitempty"`
}
// Ok returns true if the health check succeeded
func (h *ResourceAlarmCheckStatus) Ok() bool {
return h.Status == "ok"
}
// HealthCheckAlarms checks if there are resource alarms in effect in the cluster
// Related RabbitMQ doc guide: https://www.rabbitmq.com/alarms.html
func (c *Client) HealthCheckAlarms() (rec ResourceAlarmCheckStatus, err error) {
err = c.executeCheck("health/checks/alarms", &rec)
return rec, err
}
// HealthCheckLocalAlarms checks if there are resource alarms in effect on the target node
// Related RabbitMQ doc guide: https://www.rabbitmq.com/alarms.html
func (c *Client) HealthCheckLocalAlarms() (rec ResourceAlarmCheckStatus, err error) {
err = c.executeCheck("health/checks/local-alarms", &rec)
return rec, err
}
// HealthCheckCertificateExpiration checks the expiration date on the certificates for every listener configured to use TLS.
// Valid units: days, weeks, months, years. The value of the within argument is the number of units.
// So, when within is 2 and unit is "months", the expiration period used by the check will be the next two months.
func (c *Client) HealthCheckCertificateExpiration(within uint, unit TimeUnit) (rec HealthCheckStatus, err error) {
err = c.executeCheck("health/checks/certificate-expiration/"+strconv.Itoa(int(within))+"/"+string(unit), &rec)
return rec, err
}
// PortListenerCheckStatus represents the response from HealthCheckPortListener
type PortListenerCheckStatus struct {
HealthCheck
Status string `json:"status"`
Reason string `json:"reason,omitempty"`
Port uint `json:"port,omitempty"`
Missing uint `json:"missing,omitempty"`
Ports []uint `json:"ports,omitempty"`
}
// Ok returns true if the health check succeeded
func (h *PortListenerCheckStatus) Ok() bool {
return h.Status == "ok"
}
// HealthCheckPortListener checks if there is an active listener on the give port.
// Relevant RabbitMQ doc guide: https://www.rabbitmq.com/monitoring.html
func (c *Client) HealthCheckPortListener(port uint) (rec PortListenerCheckStatus, err error) {
err = c.executeCheck("health/checks/port-listener/"+strconv.Itoa(int(port)), &rec)
return rec, err
}
// ProtocolListenerCheckStatus represents the response from HealthCheckProtocolListener
type ProtocolListenerCheckStatus struct {
HealthCheck
Status string `json:"status"`
Reason string `json:"reason,omitempty"`
Missing string `json:"missing,omitempty"`
Protocols []string `json:"protocols,omitempty"`
}
// Ok returns true if the health check succeeded
func (h *ProtocolListenerCheckStatus) Ok() bool {
return h.Status == "ok"
}
// HealthCheckProtocolListener checks if there is an active listener for the given protocol
// Valid protocol names are: amqp091, amqp10, mqtt, stomp, web-mqtt, web-stomp, http, https, clustering
// Relevant RabbitMQ doc guide: https://www.rabbitmq.com/monitoring.html
func (c *Client) HealthCheckProtocolListener(protocol Protocol) (rec ProtocolListenerCheckStatus, err error) {
err = c.executeCheck("health/checks/protocol-listener/"+string(protocol), &rec)
return rec, err
}
// HealthCheckVirtualHosts checks if all virtual hosts are running on the target node
func (c *Client) HealthCheckVirtualHosts() (rec HealthCheckStatus, err error) {
err = c.executeCheck("health/checks/virtual-hosts", &rec)
return rec, err
}
// HealthCheckNodeIsMirrorSyncCritical checks if there are classic mirrored queues without synchronised mirrors online
// (queues that would potentially lose data if the target node is shut down).
func (c *Client) HealthCheckNodeIsMirrorSyncCritical() (rec HealthCheckStatus, err error) {
err = c.executeCheck("health/checks/node-is-mirror-sync-critical", &rec)
return rec, err
}
// HealthCheckNodeIsQuorumCritical checks if there are quorum queues with minimum online quorum (queues that would lose
// their quorum and availability if the target node is shut down).
// Relevant RabbitMQ doc guide: https://www.rabbitmq.com/quorum-queues.html
func (c *Client) HealthCheckNodeIsQuorumCritical() (rec HealthCheckStatus, err error) {
err = c.executeCheck("health/checks/node-is-quorum-critical", &rec)
return rec, err
}
func (c *Client) executeCheck(path string, rec interface{}) error {
req, err := newGETRequest(c, path)
httpc := &http.Client{
Timeout: c.timeout,
}
if c.transport != nil {
httpc.Transport = c.transport
}
resp, err := httpc.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode < http.StatusBadRequest || resp.StatusCode == http.StatusServiceUnavailable {
if err = json.NewDecoder(resp.Body).Decode(&rec); err != nil {
return err
}
return nil
}
if err = parseResponseErrors(resp); err != nil {
return err
}
return nil
}