-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathdowndetector-exporter.go
598 lines (523 loc) · 19.5 KB
/
downdetector-exporter.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
package main
import (
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"os"
"reflect"
"runtime"
"strconv"
"strings"
"time"
"github.com/goccy/go-yaml"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/urfave/cli/v2"
"github.com/coreos/go-systemd/daemon"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
const (
// a token can live at most 3600 seconds before it needs to be refreshed
tokenGraceSeconds = 300 // Seconds before token EOL when a new token must be fetched
// seconds before next loop is started
minSleepSeconds = 60
baseURL = "https://downdetectorapi.com/v2"
)
var (
lg = log.NewLogfmtLogger(os.Stdout)
// fields for metrics request. If expanded, struct CompanySet needs to be expanded accordingly
fieldsToReturn = []string{"id", "name", "slug", "baseline_current", "country_iso", "stats_24", "stats_60", "status"}
fieldsToReturnSearch = []string{"id", "name", "slug", "country_iso"}
token Token
credentials BasicAuth
username string
password string
httpClient *http.Client
// Downdetector delivers one CompanySet per given ID
metricsResponse []CompanySet
// exposed holds the various metrics that are collected
exposed = map[string]*prometheus.GaugeVec{}
// show last update time to see if system is working correctly
lastUpdate = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "dd_lastUpdate",
Help: "Last update timestamp in epoch seconds",
},
[]string{"scope"},
)
)
func init() {
// add the lastUpdate metrics to prometheus
prometheus.MustRegister(lastUpdate)
}
// BasicAuth contains username and string after reading them in from Yaml file
type BasicAuth struct {
UserName string `json:"username"`
Password string `json:"password"`
}
// Token contains token, expiration at issuing time, type and, later, the time of issuing
type Token struct {
// Access containing access token (type Bearer normally)
Access string `json:"access_token"`
// ExpiresIn usually contains 3600 (seconds)
ExpiresIn int `json:"expires_in"`
// Type contains the token type (Bearer)
Type string `json:"token_type"`
// RefreshTime must programmatically be set after a token has been successfully fetched
RefreshTime time.Time
}
// CompanySet contains returned data per Company
// CompanySet Prefix field with Label if value is to be used as label
// CompanySet Prefix field with Ignore if value is neither a metric nor a label but you want to handle it programmatically
// CompanySet Fields without Prefix will be used as metrics value
type CompanySet struct {
LabelCountryISO string `json:"country_iso,omitempty"`
LabelName string `json:"name,omitempty"`
LabelSlug string `json:"slug,omitempty"`
// IgnoreStatus contains the status name (success, warning, danger) in string form
IgnoreStatus string `json:"status,omitempty"`
LabelID int `json:"id"`
// BaseLineCurrent is a value generated over the last 24 hours, shows the normal baseline value of a service
BaselineCurrent int `json:"baseline_current"`
// Stats60 is the current metrics of mentions
Stats60 int `json:"stats_60"`
// IgnoreStats24 is the statistics over the last 24h in 15 minute buckets.
IgnoreStats24 []int `json:"stats_24"`
// Stats15 is the number of reports over the last 15
Stats15 int `json:"-"`
// NumStatus needs to be filled in programmatically from IgnoreStatus value so it can be used as metric
NumStatus int `json:"-"`
}
func getCredentials(credentialsFile string) {
// given username and password takes precedence over credentialsFile
if username != "" && password != "" {
credentials.UserName = username
credentials.Password = password
} else {
osFile, err := os.Open(credentialsFile)
if err != nil {
// return if we weren't successful - we have tokenGraceSeconds to retry
level.Error(lg).Log("msg", fmt.Sprintf("Couldn't read credentials file: %s", err.Error()))
os.Exit(2)
}
//fmt.Println(dat)
err = yaml.NewDecoder(osFile).Decode(&credentials)
if err != nil || credentials.Password == "" || credentials.UserName == "" {
errorText := "Username/Password not set"
if err != nil {
//errorText = err.Error()
}
level.Error(lg).Log("msg", fmt.Sprintf("Couldn't parse credentials file: %s", errorText))
level.Error(lg).Log("msg", fmt.Sprintf("YAML file needs to contain userName and password fields"))
os.Exit(2)
}
}
}
// trace prints out information about the current function called
func trace() string {
pc, file, line, ok := runtime.Caller(1)
if !ok {
return "TRACE ERROR"
}
fn := runtime.FuncForPC(pc)
return fmt.Sprintf("File: %s Line: %d Function: %s", file, line, fn.Name())
}
func main() {
// Destination variables of command line parser
var listenAddress string
var credentialsFile string
var metricsPath string
var logLevel string
var companyIDs string
var searchString string
// TODO: - value checking
// app is a command line parser
app := &cli.App{
Authors: []*cli.Author{
{
Name: "Torben Frey",
Email: "[email protected]",
},
},
Commands: nil,
ArgsUsage: " ",
Name: "downdetector-exporter",
Usage: "report metrics of downdetector api",
Flags: []cli.Flag{
&cli.StringFlag{
Name: "company_ids",
Aliases: []string{"i"},
Usage: "comma separated list of company ids to monitor",
Destination: &companyIDs,
EnvVars: []string{"COMPANY_IDS"},
},
&cli.StringFlag{
Name: "credentials_file",
Aliases: []string{"c"},
Usage: "file containing credentials for downdetector. Credentials file is in YAML format and contains two fields, username and password. Alternatively give username and password, they win over credentials file.",
Destination: &credentialsFile,
EnvVars: []string{"CREDENTIALS_FILE"},
},
&cli.StringFlag{
Name: "username",
Value: "",
Aliases: []string{"u"},
Usage: "username, wins over credentials file",
Destination: &username,
EnvVars: []string{"DD_USERNAME"},
},
&cli.StringFlag{
Name: "password",
Value: "",
Aliases: []string{"p"},
Usage: "password, wins over credentials file",
Destination: &password,
EnvVars: []string{"DD_PASSWORD"},
},
&cli.StringFlag{
Name: "listen_address",
Value: ":9313",
Aliases: []string{"l"},
Usage: "[optional] address to listen on, either :port or address:port",
Destination: &listenAddress,
EnvVars: []string{"LISTEN_ADDRESS"},
},
&cli.StringFlag{
Name: "metrics_path",
Value: "/metrics",
Aliases: []string{"m"},
Usage: "[optional] URL path where metrics are exposed",
Destination: &metricsPath,
EnvVars: []string{"METRICS_PATH"},
},
&cli.StringFlag{
Name: "log_level",
Value: "ERROR",
Aliases: []string{"v"},
Usage: "[optional] log level, choose from DEBUG, INFO, WARN, ERROR",
Destination: &logLevel,
EnvVars: []string{"LOG_LEVEL"},
},
&cli.StringFlag{
Name: "search_string",
Value: "",
Aliases: []string{"s"},
Usage: "[optional] search for companies containing this text and return their IDs",
Destination: &searchString,
},
},
Action: func(c *cli.Context) error {
if credentialsFile == "" {
if username == "" || password == "" {
level.Error(lg).Log("msg", "Either credentials_file or username and password need to be set!")
os.Exit(2)
}
}
if companyIDs == "" && searchString == "" {
level.Error(lg).Log("msg", "Either company_ids or a search string need to be set!")
os.Exit(2)
}
// Debugging output
lg = log.NewLogfmtLogger(os.Stdout)
lg = log.With(lg, "ts", log.DefaultTimestamp, "caller", log.DefaultCaller)
switch logLevel {
case "DEBUG":
lg = level.NewFilter(lg, level.AllowDebug())
case "INFO":
lg = level.NewFilter(lg, level.AllowInfo())
case "WARN":
lg = level.NewFilter(lg, level.AllowWarn())
default:
lg = level.NewFilter(lg, level.AllowError())
}
level.Debug(lg).Log("msg", fmt.Sprintf("listenAddress: %s", listenAddress))
level.Debug(lg).Log("msg", fmt.Sprintf("credentialsFile: %s", credentialsFile))
level.Debug(lg).Log("msg", fmt.Sprintf("metricsPath: %s", metricsPath))
level.Debug(lg).Log("msg", fmt.Sprintf("companyIDs: %v", companyIDs))
// install promhttp handler for metricsPath (/metrics)
http.Handle(metricsPath, promhttp.Handler())
// show nice web page if called without metricsPath
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
w.Write([]byte(`<html>
<head><title>Downdetector Exporter</title></head>
<body>
<h1>Downdetector Exporter</h1>
<p><a href='` + metricsPath + `'>Metrics</a></p>
</body>
</html>`))
})
// Start the http server in background, but catch error
go func() {
err := http.ListenAndServe(listenAddress, nil)
level.Error(lg).Log("msg", err.Error())
os.Exit(2)
}()
// wait for initialization of http server before looping so the systemd alive check doesn't fail
time.Sleep(time.Second * 3)
// notify systemd that we're ready
daemon.SdNotify(false, daemon.SdNotifyReady)
// read in credentials from Yaml file or username/password variables
getCredentials(credentialsFile)
// TODO: Proxy URL instead of ""
httpClient = getHTTPClient("")
// Working loop
for {
// does the individual work, so the rest of the code can be used for other exporters
workHorse(companyIDs, searchString)
// send aliveness to systemd
systemAlive(listenAddress, metricsPath)
// sleep minSleepSeconds seconds before starting next loop
time.Sleep(time.Second * minSleepSeconds)
}
},
}
// Start the app
err := app.Run(os.Args)
if err != nil {
level.Error(lg).Log("msg", err.Error())
}
}
func workHorse(companyIDs string, searchString string) {
// refresh token if only tokenGraceSeconds are left before it expires
if token.Access == "" || int(time.Now().Sub(token.RefreshTime).Seconds()) > token.ExpiresIn-tokenGraceSeconds {
level.Debug(lg).Log("msg", "refreshing token")
initToken()
} else {
level.Debug(lg).Log("msg", fmt.Sprintf("Seconds before a new token must be fetched: %d", (token.ExpiresIn-tokenGraceSeconds)-int(time.Now().Sub(token.RefreshTime).Seconds())))
}
getMetrics(companyIDs, searchString)
}
func getHTTPClient(proxyURLStr string) *http.Client {
var (
httpRequestsTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "client_api_requests_total",
Help: "Total number of client requests made.",
},
[]string{"method", "code"},
)
)
prometheus.MustRegister(httpRequestsTotal)
transport := http.DefaultTransport.(*http.Transport).Clone()
// tr.TLSClientConfig = &tls.Config{InsecureSkipVerify: true}
if proxyURLStr != "" {
proxyURL, err := url.Parse(proxyURLStr)
if err != nil {
level.Error(lg).Log("msg", fmt.Sprintf("Couldn't parse proxy url: %s", err.Error()))
os.Exit(2)
}
transport.Proxy = http.ProxyURL(proxyURL)
}
roundTripper := promhttp.InstrumentRoundTripperCounter(httpRequestsTotal, transport)
//adding the Transport object to the http Client
client := &http.Client{
Transport: roundTripper,
Timeout: 60 * time.Second,
}
return client
}
func initToken() {
// create the token refresh request
url := baseURL + "/tokens?grant_type=client_credentials"
req, err := http.NewRequest("POST", url, nil)
req.SetBasicAuth(credentials.UserName, credentials.Password)
if err != nil {
// return if we weren't successful - we have tokenGraceSeconds to retry
level.Warn(lg).Log("msg", fmt.Sprintf("Couldn't apply Basic Auth: %s", err.Error()))
return
}
// send the token refresh request
res, err := httpClient.Do(req)
if err != nil {
level.Error(lg).Log("msg", fmt.Sprintf("Couldn't get token: %s", err.Error()))
return
}
if res.StatusCode != 200 {
// return if we weren't successful - we have tokenGraceSeconds to retry
body, _ := io.ReadAll(res.Body)
level.Warn(lg).Log("msg", fmt.Sprintf("Error response code: %d - %s", res.StatusCode, body))
return
}
defer res.Body.Close()
// read body from response
body, err := io.ReadAll(res.Body)
if err != nil {
// return if we weren't successful - we have tokenGraceSeconds to retry
level.Warn(lg).Log("msg", fmt.Sprintf("Couldn't read in body: %s", err.Error()))
return
}
// unmarshal body content into token struct
err = json.Unmarshal(body, &token)
if err != nil {
level.Warn(lg).Log("msg", fmt.Sprintf("Couldn't unmarshal json: %s", err.Error()))
return
}
// Mark we have refreshed token right now
token.RefreshTime = time.Now()
level.Debug(lg).Log("msg", fmt.Sprintf("Token Type: %s", token.Type))
level.Debug(lg).Log("msg", fmt.Sprintf("Expires in: %d", token.ExpiresIn))
level.Debug(lg).Log("msg", fmt.Sprintf("Token Refresh Time: %s", token.RefreshTime))
}
func getMetrics(companyIDs string, searchString string) {
var url string
if searchString == "" {
// create the metrics fetching request
url = baseURL + "/companies?fields=" + strings.Join(fieldsToReturn[:], "%2C") + "&ids=" + strings.ReplaceAll(companyIDs, ",", "%2C")
} else {
url = baseURL + "/companies/search?name=" + searchString + "&fields=" + strings.Join(fieldsToReturnSearch[:], "%2C")
}
// curl --request GET -H "Authorization: Bearer $TOKEN" --url 'https://downdetectorapi.com/v2/companies/search?name=mail.com&fields=url%2Cbaseline%2Csite_id%2Cstatus%2Ccountry_iso%2Cname%2Cslug' | jq .
req, err := http.NewRequest("GET", url, nil)
req.Header.Add("Authorization", "Bearer "+token.Access)
if err != nil {
level.Warn(lg).Log("msg", fmt.Sprintf("Couldn't apply authorization header: %s", err.Error()))
return
}
// send the metrics request
res, err := httpClient.Do(req)
if err != nil {
level.Error(lg).Log("msg", fmt.Sprintf("Couldn't get metrics: %s", err.Error()))
return
}
if res.StatusCode != 200 {
// return if we weren't successful
body, _ := io.ReadAll(res.Body)
level.Warn(lg).Log("msg", fmt.Sprintf("Could not get metrics: %d - %s", res.StatusCode, body))
return
}
defer res.Body.Close()
// read body from response
body, err := io.ReadAll(res.Body)
if err != nil {
// return if we weren't successful - we have tokenGraceSeconds to retry
level.Warn(lg).Log("msg", fmt.Sprintf("Couldn't read in body: %s", err.Error()))
return
}
// unmarshal body content into metricResponse struct
err = json.Unmarshal(body, &metricsResponse)
if err != nil {
level.Warn(lg).Log("msg", fmt.Sprintf("Couldn't unmarshal json: %s", err.Error()))
return
}
// Loop through all companies in response
for _, companySet := range metricsResponse {
if searchString != "" {
fmt.Printf("ID: %d - Name: %s, Slug: %s, Country: %s\n", companySet.LabelID, companySet.LabelName, companySet.LabelSlug, companySet.LabelCountryISO)
} else {
// convert string value (success, warning, danger) to int metrics
switch companySet.IgnoreStatus {
case "success":
companySet.NumStatus = 0
case "warning":
companySet.NumStatus = 1
case "danger":
companySet.NumStatus = 2
default:
companySet.NumStatus = -1
}
// get last value from Stats24 array
companySet.Stats15 = companySet.IgnoreStats24[len(companySet.IgnoreStats24)-1]
// Debugging output
level.Debug(lg).Log("msg", fmt.Sprintf(""))
level.Debug(lg).Log("msg", fmt.Sprintf("===== Labels ====="))
level.Debug(lg).Log("msg", fmt.Sprintf("Name: %s", companySet.LabelName))
level.Debug(lg).Log("msg", fmt.Sprintf("Slug: %s", companySet.LabelSlug))
level.Debug(lg).Log("msg", fmt.Sprintf("Country: %s", companySet.LabelCountryISO))
level.Debug(lg).Log("msg", fmt.Sprintf("Name: %d", companySet.LabelID))
level.Debug(lg).Log("msg", fmt.Sprintf("===== Info ====="))
level.Debug(lg).Log("msg", fmt.Sprintf("Status: %s", companySet.IgnoreStatus))
level.Debug(lg).Log("msg", fmt.Sprintf("===== Metrics ====="))
level.Debug(lg).Log("msg", fmt.Sprintf("Current Baseline: %d", companySet.BaselineCurrent))
level.Debug(lg).Log("msg", fmt.Sprintf("Stats60: %d", companySet.Stats60))
level.Debug(lg).Log("msg", fmt.Sprintf("Stats15: %d", companySet.Stats15))
level.Debug(lg).Log("msg", fmt.Sprintf("Status: %d", companySet.NumStatus))
// create empty array to hold labels
labels := make([]string, 0)
// create empty array to hold label values
labelValues := make([]string, 0)
// reflect to get members of struct
cs := reflect.ValueOf(companySet)
typeOfCompanySet := cs.Type()
// Loop over all struct members and collect all fields starting with Label in array of labels
level.Debug(lg).Log("msg", fmt.Sprintf(""))
level.Debug(lg).Log("msg", fmt.Sprintf("Looping over CompanySet"))
for i := 0; i < cs.NumField(); i++ {
key := typeOfCompanySet.Field(i).Name
value := cs.Field(i).Interface()
level.Debug(lg).Log("msg", fmt.Sprintf("Field: %s, Value: %v", key, value))
if strings.HasPrefix(key, "Label") {
// labels have lower case names
labels = append(labels, strings.ToLower(strings.TrimPrefix(key, "Label")))
var labelValue string
// IDs are returned as integers, convert to string
if cs.Field(i).Type().Name() == "string" {
labelValue = cs.Field(i).String()
} else {
labelValue = strconv.FormatInt(cs.Field(i).Int(), 10)
}
labelValues = append(labelValues, labelValue)
}
}
level.Debug(lg).Log("msg", fmt.Sprintf(""))
level.Debug(lg).Log("msg", fmt.Sprintf("Labels: %v", labels))
// Loop over all struct fields and set Exporter to value with list of labels if they don't
// start with Label or Ignore
level.Debug(lg).Log("msg", fmt.Sprintf(""))
for i := 0; i < cs.NumField(); i++ {
key := typeOfCompanySet.Field(i).Name
if !(strings.HasPrefix(key, "Label") || strings.HasPrefix(key, "Ignore")) {
value := cs.Field(i).Int()
setPrometheusMetric(key, int(value), labels, labelValues)
}
}
}
}
if searchString != "" {
os.Exit(2)
}
}
func setPrometheusMetric(key string, value int, labels []string, labelValues []string) {
level.Debug(lg).Log("msg", fmt.Sprintf("Key: %s, Value: %d, Labels: %v", key, value, labels))
// Check if metric is already registered, if not, register it
_, ok := exposed[key]
if !ok {
exposed[key] = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "dd_" + key,
Help: "N/A",
},
labels,
)
prometheus.MustRegister(exposed[key])
}
// Now set the value
exposed[key].WithLabelValues(labelValues...).Set(float64(value))
// Update lastUpdate so we immediately see when no updates happen anymore
now := time.Now()
seconds := now.Unix()
lastUpdate.WithLabelValues("global").Set(float64(seconds))
}
func systemAlive(listenAddress string, metricsPath string) {
// systemd alive check
var metricsURL string
if !strings.HasPrefix(listenAddress, ":") {
// User has provided address + port
metricsURL = "http://" + listenAddress + metricsPath
} else {
// User has provided :port only - we need to check ourselves on 127.0.0.1
metricsURL = "http://127.0.0.1" + listenAddress + metricsPath
}
// Call the metrics URL...
res, err := http.Get(metricsURL)
if err == nil {
// ... and notify systemd that everything was ok
daemon.SdNotify(false, daemon.SdNotifyWatchdog)
} else {
// ... do nothing if it was not ok, but log. Systemd will restart soon.
level.Warn(lg).Log("msg", fmt.Sprintf("liveness check failed: %s", err.Error()))
}
// Read all away or else we'll run out of sockets sooner or later
_, _ = io.ReadAll(res.Body)
defer res.Body.Close()
}