From c5927cf741e5f957ee8a9c2a720ca0471c72fe11 Mon Sep 17 00:00:00 2001 From: Sarkis Varozian Date: Wed, 28 Feb 2018 09:10:58 -0800 Subject: [PATCH 01/30] docs for new http and script checks --- README.md | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 4647ac8..37c5bf5 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # health-checker -A simple HTTP server that will return `200 OK` if the given TCP ports are all successfully accepting connections. +A simple HTTP server that will return `200 OK` if the given checks are all successful. ## Motivation @@ -12,14 +12,15 @@ the TCP Listeners of both services are successfully accepting connections. But t a single TCP port, or an HTTP(S) endpoint. As a result, our use case just isn't supported natively by AWS. We wrote health-checker so that we could run a daemon on the server that reports the true health of the server by -attempting to open a TCP connection to more than one port when it receives an inbound HTTP request on the given listener. +checking more conditions than a just single port or HTTP request while still allowing for a single HTTP request on the given listener. ## How It Works When health-checker is started, it will listen for inbound HTTP requests for any URL on the IP address and port specified by `--listener`. When it receives a request, it will attempt to open TCP connections to each of the ports specified by -an instance of `--port`. If all TCP connections succeed, it will return `HTTP 200 OK`. If any TCP connection fails, it -will return `HTTP 504 Gateway Not Found`. +an instance of `--port`, send a request out to each of the HTTP endpoints specified by `--http`, and run all scripts +specified by `--script`. If all TCP connections succeed, HTTP requests return a 2XX status code, and all specified scripts return +with a zero exit code, it will return `HTTP 200 OK`. If any of the specified checks fail, it will return `HTTP 504 Gateway Not Found`. Configure your AWS Health Check to only pass the Health Check on `HTTP 200 OK`. Now when an HTTP Health Check request comes in, all desired TCP ports will be checked. @@ -42,19 +43,38 @@ health-checker [options] | Option | Description | Default | ------ | ----------- | ------- -| `--port` | The port number on which a TCP connection will be attempted. Specify one or more times. | | +| `--port` | The port number on which a TCP connection will be attempted. Can be specified multiple times. | | +| `--http` | The url:port to check for a 2XX status code. Can be specified multiple times. | | +| `--script` | Path to an executable script that should return with a 0 exit status if successful. Can be specified multiple times. | | | `--listener` | The IP address and port on which inbound HTTP connections will be accepted. | `0.0.0.0:5000` | `--log-level` | Set the log level to LEVEL. Must be one of: `panic`, `fatal`, `error,` `warning`, `info`, or `debug` | `info` | `--help` | Show the help screen | | | `--version` | Show the program's version | | -#### Example +#### Examples Run a listener on port 6000 that accepts all inbound HTTP connections for any URL. When the request is received, -attempt to open TCP connections to port 5432 and 3306. If both succeed, return `HTTP 200 OK`. If any fails, return `HTTP +attempt to open TCP connections to port 5432 and 3306. If both succeed, return `HTTP 200 OK`. If any fail, return `HTTP 504 Gateway Not Found`. ``` health-checker --listener "0.0.0.0:6000" --port 5432 --port 3306 ``` +Run a listener on port 6000 that accepts all inbound HTTP connections for any URL. When the request is received, +attempt to open TCP connections to port 5432 and send an HTTP request to `localhost:80`. If a connection is successfully opened +to port 5432 and the service at `localhost:80` responds with a 2XX status code, return `HTTP 200 OK`. If any fail, return `HTTP +504 Gateway Not Found`. + +``` +health-checker --listener "0.0.0.0:6000" --port 5432 --http "localhost:80" +``` + +Run a listener on port 6000 that accepts all inbound HTTP connections for any URL. When the request is received, +attempt to open TCP connections to port 5432, send an HTTP request to `localhost:80`, and run the script at `/usr/local/bin/check_foo.sh`. +If a connection is successfully opened to port 5432, the service at `localhost:80` responds with a 2XX status code, and the script +exits with a zero exit status code, return `HTTP 200 OK`. If any fail, return `HTTP 504 Gateway Not Found`. + +``` +health-checker --listener "0.0.0.0:6000" --port 5432 --http "localhost:80" --script "/usr/local/bin/check_foo.sh" +``` From b452fc1d5a62fa12f5dcaa643a83b690d91b72b7 Mon Sep 17 00:00:00 2001 From: Sarkis Varozian Date: Wed, 28 Feb 2018 09:28:50 -0800 Subject: [PATCH 02/30] fix 504 http status --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 37c5bf5..f9c8153 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ When health-checker is started, it will listen for inbound HTTP requests for any by `--listener`. When it receives a request, it will attempt to open TCP connections to each of the ports specified by an instance of `--port`, send a request out to each of the HTTP endpoints specified by `--http`, and run all scripts specified by `--script`. If all TCP connections succeed, HTTP requests return a 2XX status code, and all specified scripts return -with a zero exit code, it will return `HTTP 200 OK`. If any of the specified checks fail, it will return `HTTP 504 Gateway Not Found`. +with a zero exit code, it will return `HTTP 200 OK`. If any of the specified checks fail, it will return `HTTP 504 GATEWAY TIMEOUT`. Configure your AWS Health Check to only pass the Health Check on `HTTP 200 OK`. Now when an HTTP Health Check request comes in, all desired TCP ports will be checked. @@ -55,7 +55,7 @@ health-checker [options] Run a listener on port 6000 that accepts all inbound HTTP connections for any URL. When the request is received, attempt to open TCP connections to port 5432 and 3306. If both succeed, return `HTTP 200 OK`. If any fail, return `HTTP -504 Gateway Not Found`. +504 GATEWAY TIMEOUT`. ``` health-checker --listener "0.0.0.0:6000" --port 5432 --port 3306 @@ -64,7 +64,7 @@ health-checker --listener "0.0.0.0:6000" --port 5432 --port 3306 Run a listener on port 6000 that accepts all inbound HTTP connections for any URL. When the request is received, attempt to open TCP connections to port 5432 and send an HTTP request to `localhost:80`. If a connection is successfully opened to port 5432 and the service at `localhost:80` responds with a 2XX status code, return `HTTP 200 OK`. If any fail, return `HTTP -504 Gateway Not Found`. +504 GATEWAY TIMEOUT`. ``` health-checker --listener "0.0.0.0:6000" --port 5432 --http "localhost:80" @@ -73,7 +73,7 @@ health-checker --listener "0.0.0.0:6000" --port 5432 --http "localhost:80" Run a listener on port 6000 that accepts all inbound HTTP connections for any URL. When the request is received, attempt to open TCP connections to port 5432, send an HTTP request to `localhost:80`, and run the script at `/usr/local/bin/check_foo.sh`. If a connection is successfully opened to port 5432, the service at `localhost:80` responds with a 2XX status code, and the script -exits with a zero exit status code, return `HTTP 200 OK`. If any fail, return `HTTP 504 Gateway Not Found`. +exits with a zero exit status code, return `HTTP 200 OK`. If any fail, return `HTTP 504 GATEWAY TIMEOUT`. ``` health-checker --listener "0.0.0.0:6000" --port 5432 --http "localhost:80" --script "/usr/local/bin/check_foo.sh" From 73592015fff81ab96263a453ddd5e9667c2f55b1 Mon Sep 17 00:00:00 2001 From: Sarkis Varozian Date: Wed, 28 Feb 2018 11:27:01 -0800 Subject: [PATCH 03/30] a new approach using a config file --- README.md | 44 ++++++++++++++------------------------------ 1 file changed, 14 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index f9c8153..754b0bb 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # health-checker -A simple HTTP server that will return `200 OK` if the given checks are all successful. +A simple HTTP server that will return `200 OK` if the configured checks are all successful. ## Motivation @@ -16,11 +16,10 @@ checking more conditions than a just single port or HTTP request while still all ## How It Works -When health-checker is started, it will listen for inbound HTTP requests for any URL on the IP address and port specified -by `--listener`. When it receives a request, it will attempt to open TCP connections to each of the ports specified by -an instance of `--port`, send a request out to each of the HTTP endpoints specified by `--http`, and run all scripts -specified by `--script`. If all TCP connections succeed, HTTP requests return a 2XX status code, and all specified scripts return -with a zero exit code, it will return `HTTP 200 OK`. If any of the specified checks fail, it will return `HTTP 504 GATEWAY TIMEOUT`. +When health-checker is started, it will parse a YAML file specified with the `--config` flag (example config +in [examples/config.yml.simple]()) and listen for inbound HTTP requests for any URL on the IP address and port specified +by `listener` directive. When it receives a request, it will attempt to run all checks specified in the config +and return `HTTP 200 OK` if all checks pass. If any of the checks fail, it will return `HTTP 504 GATEWAY TIMEOUT`. Configure your AWS Health Check to only pass the Health Check on `HTTP 200 OK`. Now when an HTTP Health Check request comes in, all desired TCP ports will be checked. @@ -43,38 +42,23 @@ health-checker [options] | Option | Description | Default | ------ | ----------- | ------- -| `--port` | The port number on which a TCP connection will be attempted. Can be specified multiple times. | | -| `--http` | The url:port to check for a 2XX status code. Can be specified multiple times. | | -| `--script` | Path to an executable script that should return with a 0 exit status if successful. Can be specified multiple times. | | -| `--listener` | The IP address and port on which inbound HTTP connections will be accepted. | `0.0.0.0:5000` +| `--config` | A YAML config file containing options and checks | | | `--log-level` | Set the log level to LEVEL. Must be one of: `panic`, `fatal`, `error,` `warning`, `info`, or `debug` | `info` | `--help` | Show the help screen | | | `--version` | Show the program's version | | -#### Examples +#### Config File Options -Run a listener on port 6000 that accepts all inbound HTTP connections for any URL. When the request is received, -attempt to open TCP connections to port 5432 and 3306. If both succeed, return `HTTP 200 OK`. If any fail, return `HTTP -504 GATEWAY TIMEOUT`. +TODO: add more info on the config options -``` -health-checker --listener "0.0.0.0:6000" --port 5432 --port 3306 -``` +#### Examples -Run a listener on port 6000 that accepts all inbound HTTP connections for any URL. When the request is received, -attempt to open TCP connections to port 5432 and send an HTTP request to `localhost:80`. If a connection is successfully opened -to port 5432 and the service at `localhost:80` responds with a 2XX status code, return `HTTP 200 OK`. If any fail, return `HTTP -504 GATEWAY TIMEOUT`. +Parse configuration from `health-checker.yml` and run a listener that accepts all inbound HTTP connections for any URL. When +the request is received, attempt to run all checks specified in `health-checker.yml`. If all checks succeed, return `HTTP 200 OK`. +If any fail, return `HTTP 504 GATEWAY TIMEOUT`. ``` -health-checker --listener "0.0.0.0:6000" --port 5432 --http "localhost:80" +health-checker --config health-checker.yml ``` -Run a listener on port 6000 that accepts all inbound HTTP connections for any URL. When the request is received, -attempt to open TCP connections to port 5432, send an HTTP request to `localhost:80`, and run the script at `/usr/local/bin/check_foo.sh`. -If a connection is successfully opened to port 5432, the service at `localhost:80` responds with a 2XX status code, and the script -exits with a zero exit status code, return `HTTP 200 OK`. If any fail, return `HTTP 504 GATEWAY TIMEOUT`. - -``` -health-checker --listener "0.0.0.0:6000" --port 5432 --http "localhost:80" --script "/usr/local/bin/check_foo.sh" -``` +See [examples/]() for configuration examples. From 1a8dc74a58d39c1c784a73fb5801f15346e763b2 Mon Sep 17 00:00:00 2001 From: Sarkis Varozian Date: Wed, 28 Feb 2018 12:24:23 -0800 Subject: [PATCH 04/30] changes from CR and better documentation --- README.md | 59 +++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 44 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 754b0bb..f538892 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,15 @@ A simple HTTP server that will return `200 OK` if the configured checks are all successful. +## Quick start + +Put the following in `health-checks.yml`: + +TODO: include simple example of a file with two health checks + +and run `health-checker`. Now, requests to `0.0.0.0:5000` will return a 200 OK if the checks +specified in `health-checks.yml` pass. + ## Motivation We were setting up an AWS [Auto Scaling Group](http://docs.aws.amazon.com/autoscaling/latest/userguide/AutoScalingGroup.html) @@ -16,10 +25,10 @@ checking more conditions than a just single port or HTTP request while still all ## How It Works -When health-checker is started, it will parse a YAML file specified with the `--config` flag (example config -in [examples/config.yml.simple]()) and listen for inbound HTTP requests for any URL on the IP address and port specified -by `listener` directive. When it receives a request, it will attempt to run all checks specified in the config -and return `HTTP 200 OK` if all checks pass. If any of the checks fail, it will return `HTTP 504 GATEWAY TIMEOUT`. +When health-checker is started, it will parse a YAML file specified with the `--checks` option (example checks +in [examples/]()) and listen for inbound HTTP requests for any URL on the IP address and port specified +by `--listener`. When it receives a request, it will evaluate all checks and return `HTTP 200 OK` if all checks pass. +If any of the checks fail, it will return `HTTP 504 GATEWAY TIMEOUT`. Configure your AWS Health Check to only pass the Health Check on `HTTP 200 OK`. Now when an HTTP Health Check request comes in, all desired TCP ports will be checked. @@ -42,23 +51,43 @@ health-checker [options] | Option | Description | Default | ------ | ----------- | ------- -| `--config` | A YAML config file containing options and checks | | +| `--listener` | The IP address and port on which inbound HTTP connections will be accepted. | `0.0.0.0:5000` +| `--checks` | A YAML file containing checks which will be evaluated | `health-checks.yml` | `--log-level` | Set the log level to LEVEL. Must be one of: `panic`, `fatal`, `error,` `warning`, `info`, or `debug` | `info` | `--help` | Show the help screen | | | `--version` | Show the program's version | | -#### Config File Options +``` +health-checker --listener "0.0.0.0:6000" --checks "my-checks.yml" --log-level "warning" +``` +#### Checks -TODO: add more info on the config options +##### port -#### Examples +| Option | Pass Condition +| ------ | -------------- +| N/A | Pass if TCP connection is successfully established to `port` -Parse configuration from `health-checker.yml` and run a listener that accepts all inbound HTTP connections for any URL. When -the request is received, attempt to run all checks specified in `health-checker.yml`. If all checks succeed, return `HTTP 200 OK`. -If any fail, return `HTTP 504 GATEWAY TIMEOUT`. +TODO: add example yaml block -``` -health-checker --config health-checker.yml -``` +##### http + +| Option | Pass Condition +| -------------- | -------------- +| `status_codes` | Pass if HTTP request returns one of `status_codes` + +TODO: add note about HTTPS support here once confirming if it's available + +TODO: add example yaml block + +##### script + +| Option | Pass Condition +| ------ | -------------- +| N/A | Pass if script returns with a 0 exit status code. + +TODO: add example yaml block + +##### Examples -See [examples/]() for configuration examples. +See [examples/]() folder for more complete `health-checks.yml` examples. From dc2a791f00bf46c3d7ff1520f45a127abc4f954e Mon Sep 17 00:00:00 2001 From: Sarkis Varozian Date: Wed, 28 Feb 2018 14:27:07 -0800 Subject: [PATCH 05/30] convert current ports check to work with yaml file --- README.md | 41 ++++++++--------------- commands/cli.go | 4 --- commands/flags.go | 55 +++++++++++++++++++++---------- examples/health-checks.yml.simple | 3 ++ 4 files changed, 53 insertions(+), 50 deletions(-) create mode 100644 examples/health-checks.yml.simple diff --git a/README.md b/README.md index f538892..5ccbf96 100644 --- a/README.md +++ b/README.md @@ -25,10 +25,9 @@ checking more conditions than a just single port or HTTP request while still all ## How It Works -When health-checker is started, it will parse a YAML file specified with the `--checks` option (example checks -in [examples/]()) and listen for inbound HTTP requests for any URL on the IP address and port specified -by `--listener`. When it receives a request, it will evaluate all checks and return `HTTP 200 OK` if all checks pass. -If any of the checks fail, it will return `HTTP 504 GATEWAY TIMEOUT`. +When `health-checker` is started, it will parse a YAML file specified with the `--checks` option (see [examples folder](examples/)) +and listen for inbound HTTP requests for any URL on the IP address and port specified by `--listener`. When it receives a request, +it will evaluate all checks and return `HTTP 200 OK` if all checks pass. If any of the checks fail, it will return `HTTP 504 GATEWAY TIMEOUT`. Configure your AWS Health Check to only pass the Health Check on `HTTP 200 OK`. Now when an HTTP Health Check request comes in, all desired TCP ports will be checked. @@ -62,32 +61,18 @@ health-checker --listener "0.0.0.0:6000" --checks "my-checks.yml" --log-level "w ``` #### Checks -##### port +##### ports -| Option | Pass Condition -| ------ | -------------- -| N/A | Pass if TCP connection is successfully established to `port` +| Option(s) | Pass Condition +| --------- | -------------- +| N/A | Pass if TCP connection is successfully established to list of one or more `ports` -TODO: add example yaml block - -##### http - -| Option | Pass Condition -| -------------- | -------------- -| `status_codes` | Pass if HTTP request returns one of `status_codes` - -TODO: add note about HTTPS support here once confirming if it's available - -TODO: add example yaml block - -##### script - -| Option | Pass Condition -| ------ | -------------- -| N/A | Pass if script returns with a 0 exit status code. - -TODO: add example yaml block +```yaml +ports: + - 8080 + - 9090 +``` ##### Examples -See [examples/]() folder for more complete `health-checks.yml` examples. +See [examples folder](examples/) for more complete `health-checks.yml` examples. diff --git a/commands/cli.go b/commands/cli.go index 1a384d8..bb11f64 100644 --- a/commands/cli.go +++ b/commands/cli.go @@ -43,10 +43,6 @@ func CreateCli(version string) *cli.App { } func runHealthChecker(cliContext *cli.Context) error { - if allCliOptionsEmpty(cliContext) { - cli.ShowAppHelpAndExit(cliContext, 0) - } - opts, err := parseOptions(cliContext) if isDebugMode() { opts.Logger.Infof("Note: To enable debug mode, set %s to \"true\"", ENV_VAR_NAME_DEBUG_MODE) diff --git a/commands/flags.go b/commands/flags.go index 1eaaae6..cada8cf 100644 --- a/commands/flags.go +++ b/commands/flags.go @@ -2,21 +2,31 @@ package commands import ( "fmt" + "io/ioutil" + "os" + "strings" + + "gopkg.in/yaml.v2" + "github.com/gruntwork-io/health-checker/options" "github.com/gruntwork-io/gruntwork-cli/logging" "github.com/urfave/cli" "github.com/sirupsen/logrus" - "os" - "strings" ) +const DEFAULT_CHECKS_FILE = "health-checks.yml" const DEFAULT_LISTENER_IP_ADDRESS = "0.0.0.0" const DEFAULT_LISTENER_PORT = 5500 const ENV_VAR_NAME_DEBUG_MODE = "HEALTH_CHECKER_DEBUG" -var portFlag = cli.IntSliceFlag{ - Name: "port", - Usage: fmt.Sprintf("[Required] The port number on which a TCP connection will be attempted. Specify one or more times. Example: 8000"), +type Checks struct { + Ports []int `yaml:"ports"` +} + +var checksFlag = cli.StringFlag{ + Name: "checks", + Usage: fmt.Sprintf("[Required] A YAML file containing health checks."), + Value: DEFAULT_CHECKS_FILE, } var listenerFlag = cli.StringFlag{ @@ -32,17 +42,11 @@ var logLevelFlag = cli.StringFlag{ } var defaultFlags = []cli.Flag{ - portFlag, + checksFlag, listenerFlag, logLevelFlag, } -// Return true if no options at all were passed to the CLI. Note that we are specifically testing for flags, some of which -// are required, not just args. -func allCliOptionsEmpty(cliContext *cli.Context) bool { - return cliContext.NumFlags() == 0 -} - // Parse and validate all CLI options func parseOptions(cliContext *cli.Context) (*options.Options, error) { logger := logging.GetLogger("health-checker") @@ -57,18 +61,33 @@ func parseOptions(cliContext *cli.Context) (*options.Options, error) { } logger.SetLevel(level) - ports := cliContext.IntSlice("port") - if len(ports) == 0 { - return nil, MissingParam(portFlag.Name) - } - listener := cliContext.String("listener") if listener == "" { return nil, MissingParam(listenerFlag.Name) } + checksFile := cliContext.String("checks") + if checksFile == "" { + return nil, MissingParam(checksFlag.Name) + } + checksFileContents, err := ioutil.ReadFile(checksFile) + if err != nil { + fmt.Print(err) + } + + var checks Checks + + err = yaml.Unmarshal(checksFileContents, &checks) + if err != nil{ + panic(err) + } + + if len(checks.Ports) == 0 { + panic(err) + } + return &options.Options{ - Ports: ports, + Ports: checks.Ports, Listener: listener, Logger: logger, }, nil diff --git a/examples/health-checks.yml.simple b/examples/health-checks.yml.simple new file mode 100644 index 0000000..5734636 --- /dev/null +++ b/examples/health-checks.yml.simple @@ -0,0 +1,3 @@ +ports: + - 5500 + - 6500 From 296b46f0de3229db0cbd2836e6bbcc35b0280587 Mon Sep 17 00:00:00 2001 From: Sarkis Varozian Date: Wed, 28 Feb 2018 16:55:53 -0800 Subject: [PATCH 06/30] use the struct to do tcp checks --- commands/cli.go | 1 - commands/flags.go | 33 ++++++++++++++++----------- examples/health-checks.yml.simple | 10 +++++--- options/options.go | 31 +++++++++++++++++++++++-- server/server.go | 38 +++++++++++++++++-------------- 5 files changed, 77 insertions(+), 36 deletions(-) diff --git a/commands/cli.go b/commands/cli.go index bb11f64..4480a53 100644 --- a/commands/cli.go +++ b/commands/cli.go @@ -52,7 +52,6 @@ func runHealthChecker(cliContext *cli.Context) error { return errors.WithStackTrace(err) } - opts.Logger.Infof("The Health Check will attempt to connect to the following ports via TCP: %v", opts.Ports) opts.Logger.Infof("Listening on Port %s...", opts.Listener) err = server.StartHttpServer(opts) if err != nil { diff --git a/commands/flags.go b/commands/flags.go index cada8cf..15bc434 100644 --- a/commands/flags.go +++ b/commands/flags.go @@ -19,9 +19,6 @@ const DEFAULT_LISTENER_IP_ADDRESS = "0.0.0.0" const DEFAULT_LISTENER_PORT = 5500 const ENV_VAR_NAME_DEBUG_MODE = "HEALTH_CHECKER_DEBUG" -type Checks struct { - Ports []int `yaml:"ports"` -} var checksFlag = cli.StringFlag{ Name: "checks", @@ -70,27 +67,37 @@ func parseOptions(cliContext *cli.Context) (*options.Options, error) { if checksFile == "" { return nil, MissingParam(checksFlag.Name) } + + checks, err := parseChecksFile(checksFile) + if err != nil { + return nil, err + } + + return &options.Options{ + Checks: checks, + Listener: listener, + Logger: logger, + }, nil +} + +func parseChecksFile(checksFile string) (*options.Checks, error) { checksFileContents, err := ioutil.ReadFile(checksFile) if err != nil { - fmt.Print(err) + return nil, err } - var checks Checks + var checks options.Checks err = yaml.Unmarshal(checksFileContents, &checks) if err != nil{ - panic(err) + return nil, err } - if len(checks.Ports) == 0 { - panic(err) + if len(checks.TcpChecks) + len(checks.HttpChecks) + len(checks.ScriptChecks) == 0 { + return nil, err } - return &options.Options{ - Ports: checks.Ports, - Listener: listener, - Logger: logger, - }, nil + return &checks, nil } // Some error types are simple enough that we'd rather just show the error message directly instead of vomiting out a diff --git a/examples/health-checks.yml.simple b/examples/health-checks.yml.simple index 5734636..d430311 100644 --- a/examples/health-checks.yml.simple +++ b/examples/health-checks.yml.simple @@ -1,3 +1,7 @@ -ports: - - 5500 - - 6500 +tcp: + - name: service1 + host: localhost + port: 5500 + - name: service2 + host: 0.0.0.0 + port: 6500 diff --git a/options/options.go b/options/options.go index 4e34f21..d71355d 100644 --- a/options/options.go +++ b/options/options.go @@ -1,10 +1,37 @@ package options -import "github.com/sirupsen/logrus" +import ( + "github.com/sirupsen/logrus" +) + +type Tcp struct { + Name string `yaml:"name"` + Host string `yaml:"host"` + Port int `yaml:"port"` +} + +type Http struct { + Name string `yaml:"name"` + Host string `yaml:"host"` + Port int `yaml:"port"` + SuccessStatusCodes []int `yaml:"success_status_codes"` + BodyRegex string `yaml:"body_regex"` +} + +type Script struct { + Script string `yaml:"script"` + SuccessExitCodes []int `yaml:"success_exit_codes"` +} + +type Checks struct { + TcpChecks []Tcp `yaml:"tcp"` + HttpChecks []Http `yaml:"http"` + ScriptChecks []Script `yaml:"scripts"` +} // The options accepted by this CLI tool type Options struct { - Ports []int + Checks *Checks Listener string Logger *logrus.Logger } diff --git a/server/server.go b/server/server.go index 7cb5774..bccde5d 100644 --- a/server/server.go +++ b/server/server.go @@ -5,7 +5,9 @@ import ( "net" "fmt" "sync" + "sync/atomic" "time" + "github.com/gruntwork-io/health-checker/options" "github.com/gruntwork-io/gruntwork-cli/errors" ) @@ -17,7 +19,7 @@ type httpResponse struct { func StartHttpServer(opts *options.Options) error { http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { - resp := checkTcpPorts(opts) + resp := checkHealthChecks(opts) err := writeHttpResponse(w, resp) if err != nil { opts.Logger.Error("Failed to send HTTP response. Exiting.") @@ -32,49 +34,51 @@ func StartHttpServer(opts *options.Options) error { return nil } -// Check that we can open a TPC connection to all the ports in opts.Ports -func checkTcpPorts(opts *options.Options) *httpResponse { +func checkHealthChecks(opts *options.Options) *httpResponse { logger := opts.Logger logger.Infof("Received inbound request. Beginning health checks...") - allPortsValid := true - + // initialize failedChecks to 0 + var failedChecks uint64 var waitGroup = sync.WaitGroup{} - for _, port := range opts.Ports { + for _, tcpCheck := range opts.Checks.TcpChecks { + name := tcpCheck.Name + host := tcpCheck.Host + port := tcpCheck.Port waitGroup.Add(1) go func(port int) { - err := attemptTcpConnection(port, opts) + err := attemptTcpConnection(tcpCheck.Host, tcpCheck.Port,opts) if err != nil { - logger.Warnf("TCP connection to port %d FAILED: %s", port, err) - allPortsValid = false + logger.Warnf("TCP connection to %s at %s:%d FAILED: %s", name, host, port, err) + atomic.AddUint64(&failedChecks, 1) } else { - logger.Infof("TCP connection to port %d successful", port) + logger.Infof("TCP connection to %s at %s:%d successful", name, host, port) } - waitGroup.Done() }(port) } waitGroup.Wait() - if allPortsValid { - logger.Infof("All health checks passed. Returning HTTP 200 response.\n") - return &httpResponse{ StatusCode: http.StatusOK, Body: "OK" } - } else { + failedChecksFinal := atomic.LoadUint64(&failedChecks) + if failedChecksFinal > 0 { logger.Infof("At least one health check failed. Returning HTTP 504 response.\n") return &httpResponse{ StatusCode: http.StatusGatewayTimeout, Body: "At least one health check failed" } + } else { + logger.Infof("All health checks passed. Returning HTTP 200 response.\n") + return &httpResponse{ StatusCode: http.StatusOK, Body: "OK" } } } // Attempt to open a TCP connection to the given port -func attemptTcpConnection(port int, opts *options.Options) error { +func attemptTcpConnection(host string, port int, opts *options.Options) error { logger := opts.Logger logger.Infof("Attempting to connect to port %d via TCP...", port) defaultTimeout := time.Second * 5 - conn, err := net.DialTimeout("tcp", fmt.Sprintf("0.0.0.0:%d", port), defaultTimeout) + conn, err := net.DialTimeout("tcp", fmt.Sprintf("%s:%d", host, port), defaultTimeout) if err != nil { return err } From f8e392f7b89c55a5bef7082e8666f8592312bd8f Mon Sep 17 00:00:00 2001 From: Sarkis Varozian Date: Wed, 28 Feb 2018 17:16:17 -0800 Subject: [PATCH 07/30] fixes from CR --- commands/flags.go | 3 ++- server/server.go | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/commands/flags.go b/commands/flags.go index 15bc434..67ada4a 100644 --- a/commands/flags.go +++ b/commands/flags.go @@ -2,6 +2,7 @@ package commands import ( "fmt" + "errors" "io/ioutil" "os" "strings" @@ -94,7 +95,7 @@ func parseChecksFile(checksFile string) (*options.Checks, error) { } if len(checks.TcpChecks) + len(checks.HttpChecks) + len(checks.ScriptChecks) == 0 { - return nil, err + return nil, errors.New("no checks found: must specify at least one check") } return &checks, nil diff --git a/server/server.go b/server/server.go index bccde5d..800cc62 100644 --- a/server/server.go +++ b/server/server.go @@ -55,7 +55,7 @@ func checkHealthChecks(opts *options.Options) *httpResponse { } else { logger.Infof("TCP connection to %s at %s:%d successful", name, host, port) } - waitGroup.Done() + defer waitGroup.Done() }(port) } From b8ce7a9d65e5bcaedbf24f56eda60c03e2a50847 Mon Sep 17 00:00:00 2001 From: Sarkis Varozian Date: Wed, 28 Feb 2018 18:38:15 -0800 Subject: [PATCH 08/30] add http checkers --- server/server.go | 112 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 106 insertions(+), 6 deletions(-) diff --git a/server/server.go b/server/server.go index 800cc62..183a278 100644 --- a/server/server.go +++ b/server/server.go @@ -7,9 +7,12 @@ import ( "sync" "sync/atomic" "time" + gerrors "errors" "github.com/gruntwork-io/health-checker/options" "github.com/gruntwork-io/gruntwork-cli/errors" + "io/ioutil" + "strings" ) type httpResponse struct { @@ -47,8 +50,8 @@ func checkHealthChecks(opts *options.Options) *httpResponse { host := tcpCheck.Host port := tcpCheck.Port waitGroup.Add(1) - go func(port int) { - err := attemptTcpConnection(tcpCheck.Host, tcpCheck.Port,opts) + go func(name string, host string, port int) { + err := checkTcpConnection(name, host, port, opts) if err != nil { logger.Warnf("TCP connection to %s at %s:%d FAILED: %s", name, host, port, err) atomic.AddUint64(&failedChecks, 1) @@ -56,9 +59,43 @@ func checkHealthChecks(opts *options.Options) *httpResponse { logger.Infof("TCP connection to %s at %s:%d successful", name, host, port) } defer waitGroup.Done() - }(port) + }(name, host, port) } + for _, httpCheck := range opts.Checks.HttpChecks { + name := httpCheck.Name + host := httpCheck.Host + port := httpCheck.Port + successCodes := httpCheck.SuccessStatusCodes + expected := httpCheck.BodyRegex + waitGroup.Add(1) + go func(name string, host string, port int, successCodes []int, expected string) { + if len(successCodes) > 0 { + err := checkHttpResponse(name, host, port, successCodes, opts) + if err != nil { + logger.Warnf("HTTP Status check to %s at %s:%d FAILED: %s", name, host, port, err) + atomic.AddUint64(&failedChecks, 1) + } else { + logger.Infof("HTTP Status check to %s at %s:%d successful", name, host, port) + } + } else if len(expected) > 0 { + err := checkHttpResponseBody(name, host, port, expected, opts) + if err != nil { + logger.Warnf("HTTP Body check to %s at %s:%d FAILED: %s", name, host, port, err) + atomic.AddUint64(&failedChecks, 1) + } else { + logger.Infof("HTTP Body check to %s at %s:%d successful", name, host, port) + } + } else { + logger.Warnf("FAILED: At least one of success_codes or body_regex not specified for %s", name) + atomic.AddUint64(&failedChecks, 1) + } + defer waitGroup.Done() + }(name, host, port, successCodes, expected) + } + + // TODO: implement scriptCheck logic + waitGroup.Wait() failedChecksFinal := atomic.LoadUint64(&failedChecks) @@ -71,10 +108,9 @@ func checkHealthChecks(opts *options.Options) *httpResponse { } } -// Attempt to open a TCP connection to the given port -func attemptTcpConnection(host string, port int, opts *options.Options) error { +func checkTcpConnection(name string, host string, port int, opts *options.Options) error { logger := opts.Logger - logger.Infof("Attempting to connect to port %d via TCP...", port) + logger.Infof("Attempting to connect to %s at %s:%d via TCP...", name, host, port) defaultTimeout := time.Second * 5 @@ -88,6 +124,70 @@ func attemptTcpConnection(host string, port int, opts *options.Options) error { return nil } +func checkHttpResponse(name string, host string, port int, successCodes []int, opts *options.Options) error { + logger := opts.Logger + logger.Infof("Checking %s at %s:%d via HTTP...", name, host, port) + + defaultTimeout := time.Second * 5 + client := http.Client{ + Timeout: defaultTimeout, + } + resp, err := client.Get(fmt.Sprintf("http://%s:%d", host, port)) + if err != nil { + return err + } + + if contains(successCodes, resp.StatusCode){ + // Success! resp has one of the success_codes + return nil + } else { + return gerrors.New(fmt.Sprintf("http status code %s was not one of %v", resp.StatusCode, successCodes)) + } +} + +// TODO: move into helpers +func contains(s []int, e int) bool { + for _, a := range s { + if a == e { + return true + } + } + return false +} + +func checkHttpResponseBody(name string, host string, port int, expected string, opts *options.Options) error { + logger := opts.Logger + logger.Infof("Checking HTTP response body for %s at %s:%d...", name, host, port) + + defaultTimeout := time.Second * 5 + client := http.Client{ + Timeout: defaultTimeout, + } + resp, err := client.Get(fmt.Sprintf("http://%s:%d", host, port)) + if err != nil { + return err + } + + defer resp.Body.Close() + body, err := ioutil.ReadAll(resp.Body) + + if strings.Contains(string(body), expected){ + // Success! resp body has expected string + return nil + } else { + return gerrors.New(fmt.Sprintf("expected %s in http body: %s", expected, body)) + } +} + +//func checkScript(script string, expectedExitStatus int, opts *options.Options) error { +// logger := opts.Logger +// logger.Infof("Checking script %s for exit status %d...", script, expectedExitStatus) + + //defaultTimeout := time.Second * 5 + + // TODO: add code here +//} + func writeHttpResponse(w http.ResponseWriter, resp *httpResponse) error { w.WriteHeader(resp.StatusCode) _, err := w.Write([]byte(resp.Body)) From 667aa16e5987550b238e0b6fe1910c144750bb07 Mon Sep 17 00:00:00 2001 From: Sarkis Varozian Date: Wed, 28 Feb 2018 20:38:37 -0800 Subject: [PATCH 09/30] adjust debug mode logic --- commands/cli.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/commands/cli.go b/commands/cli.go index 4480a53..0a8dfa7 100644 --- a/commands/cli.go +++ b/commands/cli.go @@ -44,11 +44,10 @@ func CreateCli(version string) *cli.App { func runHealthChecker(cliContext *cli.Context) error { opts, err := parseOptions(cliContext) - if isDebugMode() { + if err != nil && !isDebugMode() { opts.Logger.Infof("Note: To enable debug mode, set %s to \"true\"", ENV_VAR_NAME_DEBUG_MODE) return err - } - if err != nil { + } else if err != nil { return errors.WithStackTrace(err) } From f6c2b8cb0da9dd4cde4e5e50395dff2201d14b07 Mon Sep 17 00:00:00 2001 From: Sarkis Varozian Date: Wed, 28 Feb 2018 20:51:52 -0800 Subject: [PATCH 10/30] update README and cleanup --- README.md | 27 ++++++++++++--------------- commands/flags.go | 8 ++++---- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 5ccbf96..8883efc 100644 --- a/README.md +++ b/README.md @@ -6,9 +6,19 @@ A simple HTTP server that will return `200 OK` if the configured checks are all Put the following in `health-checks.yml`: -TODO: include simple example of a file with two health checks +```yaml +tcp: + - name: tcpService1 + host: localhost + port: 5500 +http: + - name: httpService1 + host: 127.0.0.1 + port: 8080 + success_codes: [200, 204, 301, 302] +``` -and run `health-checker`. Now, requests to `0.0.0.0:5000` will return a 200 OK if the checks +and run `health-checker`. Now, requests to `0.0.0.0:5000` will return a 200 OK if all the checks specified in `health-checks.yml` pass. ## Motivation @@ -59,19 +69,6 @@ health-checker [options] ``` health-checker --listener "0.0.0.0:6000" --checks "my-checks.yml" --log-level "warning" ``` -#### Checks - -##### ports - -| Option(s) | Pass Condition -| --------- | -------------- -| N/A | Pass if TCP connection is successfully established to list of one or more `ports` - -```yaml -ports: - - 8080 - - 9090 -``` ##### Examples diff --git a/commands/flags.go b/commands/flags.go index 67ada4a..832e753 100644 --- a/commands/flags.go +++ b/commands/flags.go @@ -1,18 +1,18 @@ package commands import ( - "fmt" "errors" + "fmt" "io/ioutil" "os" "strings" "gopkg.in/yaml.v2" - "github.com/gruntwork-io/health-checker/options" "github.com/gruntwork-io/gruntwork-cli/logging" - "github.com/urfave/cli" + "github.com/gruntwork-io/health-checker/options" "github.com/sirupsen/logrus" + "github.com/urfave/cli" ) const DEFAULT_CHECKS_FILE = "health-checks.yml" @@ -23,7 +23,7 @@ const ENV_VAR_NAME_DEBUG_MODE = "HEALTH_CHECKER_DEBUG" var checksFlag = cli.StringFlag{ Name: "checks", - Usage: fmt.Sprintf("[Required] A YAML file containing health checks."), + Usage: fmt.Sprintf("[Required] A YAML file containing health checks. Default: %s", DEFAULT_CHECKS_FILE), Value: DEFAULT_CHECKS_FILE, } From 687b55017bfb52c88964f8c85b74292acc8553ac Mon Sep 17 00:00:00 2001 From: Sarkis Varozian Date: Thu, 1 Mar 2018 09:07:11 -0800 Subject: [PATCH 11/30] comments and cleanup --- server/server.go | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/server/server.go b/server/server.go index 183a278..9e431eb 100644 --- a/server/server.go +++ b/server/server.go @@ -1,23 +1,23 @@ package server import ( - "net/http" - "net" + gerrors "errors" "fmt" + "io/ioutil" + "net" + "net/http" + "strings" "sync" "sync/atomic" "time" - gerrors "errors" - "github.com/gruntwork-io/health-checker/options" "github.com/gruntwork-io/gruntwork-cli/errors" - "io/ioutil" - "strings" + "github.com/gruntwork-io/health-checker/options" ) type httpResponse struct { StatusCode int - Body string + Body string } func StartHttpServer(opts *options.Options) error { @@ -41,7 +41,7 @@ func checkHealthChecks(opts *options.Options) *httpResponse { logger := opts.Logger logger.Infof("Received inbound request. Beginning health checks...") - // initialize failedChecks to 0 + // initialize failedChecks to 0, used as atomic counter for goroutines below var failedChecks uint64 var waitGroup = sync.WaitGroup{} @@ -78,7 +78,7 @@ func checkHealthChecks(opts *options.Options) *httpResponse { } else { logger.Infof("HTTP Status check to %s at %s:%d successful", name, host, port) } - } else if len(expected) > 0 { + } else if len(expected) > 0 { err := checkHttpResponseBody(name, host, port, expected, opts) if err != nil { logger.Warnf("HTTP Body check to %s at %s:%d FAILED: %s", name, host, port, err) @@ -101,10 +101,10 @@ func checkHealthChecks(opts *options.Options) *httpResponse { failedChecksFinal := atomic.LoadUint64(&failedChecks) if failedChecksFinal > 0 { logger.Infof("At least one health check failed. Returning HTTP 504 response.\n") - return &httpResponse{ StatusCode: http.StatusGatewayTimeout, Body: "At least one health check failed" } + return &httpResponse{StatusCode: http.StatusGatewayTimeout, Body: "At least one health check failed"} } else { logger.Infof("All health checks passed. Returning HTTP 200 response.\n") - return &httpResponse{ StatusCode: http.StatusOK, Body: "OK" } + return &httpResponse{StatusCode: http.StatusOK, Body: "OK"} } } @@ -137,7 +137,7 @@ func checkHttpResponse(name string, host string, port int, successCodes []int, o return err } - if contains(successCodes, resp.StatusCode){ + if contains(successCodes, resp.StatusCode) { // Success! resp has one of the success_codes return nil } else { @@ -171,7 +171,7 @@ func checkHttpResponseBody(name string, host string, port int, expected string, defer resp.Body.Close() body, err := ioutil.ReadAll(resp.Body) - if strings.Contains(string(body), expected){ + if strings.Contains(string(body), expected) { // Success! resp body has expected string return nil } else { @@ -183,9 +183,9 @@ func checkHttpResponseBody(name string, host string, port int, expected string, // logger := opts.Logger // logger.Infof("Checking script %s for exit status %d...", script, expectedExitStatus) - //defaultTimeout := time.Second * 5 +//defaultTimeout := time.Second * 5 - // TODO: add code here +// TODO: add code here //} func writeHttpResponse(w http.ResponseWriter, resp *httpResponse) error { @@ -197,4 +197,3 @@ func writeHttpResponse(w http.ResponseWriter, resp *httpResponse) error { return nil } - From de4dae1c502ba34ff767a1b23b43a5efc49d1bad Mon Sep 17 00:00:00 2001 From: Sarkis Varozian Date: Thu, 1 Mar 2018 22:12:20 -0800 Subject: [PATCH 12/30] use common Check interface --- commands/flags.go | 29 +++++++-- options/options.go | 28 ++------- server/server.go | 144 +++++++++++---------------------------------- 3 files changed, 62 insertions(+), 139 deletions(-) diff --git a/commands/flags.go b/commands/flags.go index 832e753..6f77897 100644 --- a/commands/flags.go +++ b/commands/flags.go @@ -13,6 +13,7 @@ import ( "github.com/gruntwork-io/health-checker/options" "github.com/sirupsen/logrus" "github.com/urfave/cli" + "github.com/gruntwork-io/health-checker/server" ) const DEFAULT_CHECKS_FILE = "health-checks.yml" @@ -45,6 +46,13 @@ var defaultFlags = []cli.Flag{ logLevelFlag, } +// define structure of yaml file +type Checks struct { + TcpChecks []server.TcpCheck `yaml:"tcp"` + HttpChecks []server.HttpCheck `yaml:"http"` + ScriptChecks []server.ScriptCheck `yaml:"scripts"` +} + // Parse and validate all CLI options func parseOptions(cliContext *cli.Context) (*options.Options, error) { logger := logging.GetLogger("health-checker") @@ -81,24 +89,35 @@ func parseOptions(cliContext *cli.Context) (*options.Options, error) { }, nil } -func parseChecksFile(checksFile string) (*options.Checks, error) { +func parseChecksFile(checksFile string) ([]options.Check, error) { checksFileContents, err := ioutil.ReadFile(checksFile) if err != nil { return nil, err } - var checks options.Checks + var checks Checks + var checkSlice []options.Check err = yaml.Unmarshal(checksFileContents, &checks) if err != nil{ return nil, err } - if len(checks.TcpChecks) + len(checks.HttpChecks) + len(checks.ScriptChecks) == 0 { + allChecksLen := len(checks.TcpChecks) + len(checks.HttpChecks) + len(checks.ScriptChecks) + if allChecksLen == 0 { return nil, errors.New("no checks found: must specify at least one check") + } else { + for n := range checks.TcpChecks { + checkSlice = append(checkSlice, checks.TcpChecks[n]) + } + for n := range checks.HttpChecks { + checkSlice = append(checkSlice, checks.HttpChecks[n]) + } + for n := range checks.ScriptChecks { + checkSlice = append(checkSlice, checks.ScriptChecks[n]) + } } - - return &checks, nil + return checkSlice, nil } // Some error types are simple enough that we'd rather just show the error message directly instead of vomiting out a diff --git a/options/options.go b/options/options.go index d71355d..2a91086 100644 --- a/options/options.go +++ b/options/options.go @@ -4,34 +4,14 @@ import ( "github.com/sirupsen/logrus" ) -type Tcp struct { - Name string `yaml:"name"` - Host string `yaml:"host"` - Port int `yaml:"port"` -} - -type Http struct { - Name string `yaml:"name"` - Host string `yaml:"host"` - Port int `yaml:"port"` - SuccessStatusCodes []int `yaml:"success_status_codes"` - BodyRegex string `yaml:"body_regex"` -} - -type Script struct { - Script string `yaml:"script"` - SuccessExitCodes []int `yaml:"success_exit_codes"` -} - -type Checks struct { - TcpChecks []Tcp `yaml:"tcp"` - HttpChecks []Http `yaml:"http"` - ScriptChecks []Script `yaml:"scripts"` +type Check interface { + //ValidateCheck() error + DoCheck(*Options) error } // The options accepted by this CLI tool type Options struct { - Checks *Checks + Checks []Check Listener string Logger *logrus.Logger } diff --git a/server/server.go b/server/server.go index 9e431eb..bda072b 100644 --- a/server/server.go +++ b/server/server.go @@ -1,12 +1,9 @@ package server import ( - gerrors "errors" "fmt" - "io/ioutil" "net" "net/http" - "strings" "sync" "sync/atomic" "time" @@ -15,6 +12,25 @@ import ( "github.com/gruntwork-io/health-checker/options" ) +type TcpCheck struct { + Name string `yaml:"name"` + Host string `yaml:"host"` + Port int `yaml:"port"` +} + +type HttpCheck struct { + Name string `yaml:"name"` + Host string `yaml:"host"` + Port int `yaml:"port"` + SuccessStatusCodes []int `yaml:"success_status_codes"` + BodyRegex string `yaml:"body_regex"` +} + +type ScriptCheck struct { + Script string `yaml:"script"` + SuccessExitCodes []int `yaml:"success_exit_codes"` +} + type httpResponse struct { StatusCode int Body string @@ -40,62 +56,26 @@ func StartHttpServer(opts *options.Options) error { func checkHealthChecks(opts *options.Options) *httpResponse { logger := opts.Logger logger.Infof("Received inbound request. Beginning health checks...") + fmt.Printf("%v", opts.Checks) // initialize failedChecks to 0, used as atomic counter for goroutines below var failedChecks uint64 var waitGroup = sync.WaitGroup{} - for _, tcpCheck := range opts.Checks.TcpChecks { - name := tcpCheck.Name - host := tcpCheck.Host - port := tcpCheck.Port + for _, check := range opts.Checks { waitGroup.Add(1) - go func(name string, host string, port int) { - err := checkTcpConnection(name, host, port, opts) + go func(check options.Check) { + defer waitGroup.Done() + err := check.DoCheck(opts) if err != nil { - logger.Warnf("TCP connection to %s at %s:%d FAILED: %s", name, host, port, err) + logger.Warnf("Check for %s FAILED: %s", check, err) atomic.AddUint64(&failedChecks, 1) } else { - logger.Infof("TCP connection to %s at %s:%d successful", name, host, port) + logger.Infof("Check for %s successful", check) } - defer waitGroup.Done() - }(name, host, port) + }(check) } - for _, httpCheck := range opts.Checks.HttpChecks { - name := httpCheck.Name - host := httpCheck.Host - port := httpCheck.Port - successCodes := httpCheck.SuccessStatusCodes - expected := httpCheck.BodyRegex - waitGroup.Add(1) - go func(name string, host string, port int, successCodes []int, expected string) { - if len(successCodes) > 0 { - err := checkHttpResponse(name, host, port, successCodes, opts) - if err != nil { - logger.Warnf("HTTP Status check to %s at %s:%d FAILED: %s", name, host, port, err) - atomic.AddUint64(&failedChecks, 1) - } else { - logger.Infof("HTTP Status check to %s at %s:%d successful", name, host, port) - } - } else if len(expected) > 0 { - err := checkHttpResponseBody(name, host, port, expected, opts) - if err != nil { - logger.Warnf("HTTP Body check to %s at %s:%d FAILED: %s", name, host, port, err) - atomic.AddUint64(&failedChecks, 1) - } else { - logger.Infof("HTTP Body check to %s at %s:%d successful", name, host, port) - } - } else { - logger.Warnf("FAILED: At least one of success_codes or body_regex not specified for %s", name) - atomic.AddUint64(&failedChecks, 1) - } - defer waitGroup.Done() - }(name, host, port, successCodes, expected) - } - - // TODO: implement scriptCheck logic - waitGroup.Wait() failedChecksFinal := atomic.LoadUint64(&failedChecks) @@ -108,13 +88,13 @@ func checkHealthChecks(opts *options.Options) *httpResponse { } } -func checkTcpConnection(name string, host string, port int, opts *options.Options) error { +func (c TcpCheck) DoCheck (opts *options.Options) error { logger := opts.Logger - logger.Infof("Attempting to connect to %s at %s:%d via TCP...", name, host, port) + logger.Infof("Attempting to connect to %s at %s:%d via TCP...", c.Name, c.Host, c.Port) defaultTimeout := time.Second * 5 - conn, err := net.DialTimeout("tcp", fmt.Sprintf("%s:%d", host, port), defaultTimeout) + conn, err := net.DialTimeout("tcp", fmt.Sprintf("%s:%d", c.Host, c.Port), defaultTimeout) if err != nil { return err } @@ -124,70 +104,14 @@ func checkTcpConnection(name string, host string, port int, opts *options.Option return nil } -func checkHttpResponse(name string, host string, port int, successCodes []int, opts *options.Options) error { - logger := opts.Logger - logger.Infof("Checking %s at %s:%d via HTTP...", name, host, port) - - defaultTimeout := time.Second * 5 - client := http.Client{ - Timeout: defaultTimeout, - } - resp, err := client.Get(fmt.Sprintf("http://%s:%d", host, port)) - if err != nil { - return err - } - - if contains(successCodes, resp.StatusCode) { - // Success! resp has one of the success_codes - return nil - } else { - return gerrors.New(fmt.Sprintf("http status code %s was not one of %v", resp.StatusCode, successCodes)) - } -} - -// TODO: move into helpers -func contains(s []int, e int) bool { - for _, a := range s { - if a == e { - return true - } - } - return false +func (c HttpCheck) DoCheck (opts *options.Options) error { + return nil } -func checkHttpResponseBody(name string, host string, port int, expected string, opts *options.Options) error { - logger := opts.Logger - logger.Infof("Checking HTTP response body for %s at %s:%d...", name, host, port) - - defaultTimeout := time.Second * 5 - client := http.Client{ - Timeout: defaultTimeout, - } - resp, err := client.Get(fmt.Sprintf("http://%s:%d", host, port)) - if err != nil { - return err - } - - defer resp.Body.Close() - body, err := ioutil.ReadAll(resp.Body) - - if strings.Contains(string(body), expected) { - // Success! resp body has expected string - return nil - } else { - return gerrors.New(fmt.Sprintf("expected %s in http body: %s", expected, body)) - } +func (c ScriptCheck) DoCheck (opts *options.Options) error { + return nil } -//func checkScript(script string, expectedExitStatus int, opts *options.Options) error { -// logger := opts.Logger -// logger.Infof("Checking script %s for exit status %d...", script, expectedExitStatus) - -//defaultTimeout := time.Second * 5 - -// TODO: add code here -//} - func writeHttpResponse(w http.ResponseWriter, resp *httpResponse) error { w.WriteHeader(resp.StatusCode) _, err := w.Write([]byte(resp.Body)) From c11575877e401405b04789559ebfbd66d45464b4 Mon Sep 17 00:00:00 2001 From: Sarkis Varozian Date: Thu, 1 Mar 2018 22:15:48 -0800 Subject: [PATCH 13/30] no longer need this to be a variable --- commands/flags.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/commands/flags.go b/commands/flags.go index 6f77897..d6c7627 100644 --- a/commands/flags.go +++ b/commands/flags.go @@ -103,8 +103,7 @@ func parseChecksFile(checksFile string) ([]options.Check, error) { return nil, err } - allChecksLen := len(checks.TcpChecks) + len(checks.HttpChecks) + len(checks.ScriptChecks) - if allChecksLen == 0 { + if len(checks.TcpChecks) + len(checks.HttpChecks) + len(checks.ScriptChecks) == 0 { return nil, errors.New("no checks found: must specify at least one check") } else { for n := range checks.TcpChecks { From 7b1127ef9a68666920419b546ce61fa06e0daf92 Mon Sep 17 00:00:00 2001 From: Sarkis Varozian Date: Fri, 2 Mar 2018 13:34:58 -0800 Subject: [PATCH 14/30] add HttpCheck DoCheck method --- server/server.go | 49 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/server/server.go b/server/server.go index bda072b..670a360 100644 --- a/server/server.go +++ b/server/server.go @@ -2,8 +2,11 @@ package server import ( "fmt" + gerrors "errors" + "io/ioutil" "net" "net/http" + "strings" "sync" "sync/atomic" "time" @@ -105,7 +108,51 @@ func (c TcpCheck) DoCheck (opts *options.Options) error { } func (c HttpCheck) DoCheck (opts *options.Options) error { - return nil + logger := opts.Logger + logger.Infof("Checking %s at %s:%d via HTTP...", c.Name, c.Host, c.Port) + + defaultTimeout := time.Second * 5 + client := http.Client{ + Timeout: defaultTimeout, + } + resp, err := client.Get(fmt.Sprintf("http://%s:%d", c.Host, c.Port)) + if err != nil { + return err + } + + if len(c.SuccessStatusCodes) > 0 { + // when success_codes is defined we only need to check this + if contains(c.SuccessStatusCodes, resp.StatusCode) { + // Success! response has one of the success_codes + return nil + } else { + return gerrors.New(fmt.Sprintf("http status code %s was not one of %v", resp.StatusCode, c.SuccessStatusCodes)) + } + } else { + // since no success_codes defined we compare body with body_regex + defer resp.Body.Close() + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + return err + } + + if strings.Contains(string(body), c.BodyRegex) { + // Success! resp body has expected string + return nil + } else { + return gerrors.New(fmt.Sprintf("expected %s in http body: %s", c.BodyRegex, body)) + } + } +} + +// TODO: move into helpers +func contains(s []int, e int) bool { + for _, a := range s { + if a == e { + return true + } + } + return false } func (c ScriptCheck) DoCheck (opts *options.Options) error { From c27b432e0db392fa2d0045a1c544f45f645002a8 Mon Sep 17 00:00:00 2001 From: Sarkis Varozian Date: Fri, 2 Mar 2018 18:52:26 -0800 Subject: [PATCH 15/30] add DoCheck for ScriptCheck --- server/server.go | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/server/server.go b/server/server.go index 670a360..0791fa8 100644 --- a/server/server.go +++ b/server/server.go @@ -1,11 +1,13 @@ package server import ( + "context" "fmt" gerrors "errors" "io/ioutil" "net" "net/http" + "os/exec" "strings" "sync" "sync/atomic" @@ -30,8 +32,8 @@ type HttpCheck struct { } type ScriptCheck struct { + Name string `yaml:"name"` Script string `yaml:"script"` - SuccessExitCodes []int `yaml:"success_exit_codes"` } type httpResponse struct { @@ -59,7 +61,6 @@ func StartHttpServer(opts *options.Options) error { func checkHealthChecks(opts *options.Options) *httpResponse { logger := opts.Logger logger.Infof("Received inbound request. Beginning health checks...") - fmt.Printf("%v", opts.Checks) // initialize failedChecks to 0, used as atomic counter for goroutines below var failedChecks uint64 @@ -156,6 +157,19 @@ func contains(s []int, e int) bool { } func (c ScriptCheck) DoCheck (opts *options.Options) error { + defaultTimeout := 5*time.Second + ctx, cancel := context.WithTimeout(context.Background(), defaultTimeout) + defer cancel() + + cmd := exec.CommandContext(ctx, c.Script) + _, err := cmd.Output() + if ctx.Err() == context.DeadlineExceeded { + // script timed out + return gerrors.New(fmt.Sprintf("check %s at %s FAILED to complete within %ds", c.Name, c.Script, defaultTimeout)) + } + if err != nil { + return gerrors.New(fmt.Sprintf("check %s at %s FAILED with a non-zero exit code", c.Name, c.Script)) + } return nil } From 607a644d2cc7b0b775c59e1a13b029e230906aeb Mon Sep 17 00:00:00 2001 From: Sarkis Varozian Date: Fri, 2 Mar 2018 23:17:06 -0800 Subject: [PATCH 16/30] add ValidateCheck for all checks and run after Unmarshaling --- commands/flags.go | 11 ++++++++--- options/options.go | 2 +- server/server.go | 41 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 4 deletions(-) diff --git a/commands/flags.go b/commands/flags.go index d6c7627..22cf067 100644 --- a/commands/flags.go +++ b/commands/flags.go @@ -50,7 +50,7 @@ var defaultFlags = []cli.Flag{ type Checks struct { TcpChecks []server.TcpCheck `yaml:"tcp"` HttpChecks []server.HttpCheck `yaml:"http"` - ScriptChecks []server.ScriptCheck `yaml:"scripts"` + ScriptChecks []server.ScriptCheck `yaml:"script"` } // Parse and validate all CLI options @@ -77,7 +77,7 @@ func parseOptions(cliContext *cli.Context) (*options.Options, error) { return nil, MissingParam(checksFlag.Name) } - checks, err := parseChecksFile(checksFile) + checks, err := parseChecks(checksFile, logger) if err != nil { return nil, err } @@ -89,7 +89,7 @@ func parseOptions(cliContext *cli.Context) (*options.Options, error) { }, nil } -func parseChecksFile(checksFile string) ([]options.Check, error) { +func parseChecks(checksFile string, logger *logrus.Logger) ([]options.Check, error) { checksFileContents, err := ioutil.ReadFile(checksFile) if err != nil { return nil, err @@ -116,6 +116,11 @@ func parseChecksFile(checksFile string) ([]options.Check, error) { checkSlice = append(checkSlice, checks.ScriptChecks[n]) } } + + for _, check := range checkSlice { + check.ValidateCheck(logger) + } + return checkSlice, nil } diff --git a/options/options.go b/options/options.go index 2a91086..ddc34c6 100644 --- a/options/options.go +++ b/options/options.go @@ -5,8 +5,8 @@ import ( ) type Check interface { - //ValidateCheck() error DoCheck(*Options) error + ValidateCheck(*logrus.Logger) } // The options accepted by this CLI tool diff --git a/server/server.go b/server/server.go index 0791fa8..caac041 100644 --- a/server/server.go +++ b/server/server.go @@ -15,6 +15,7 @@ import ( "github.com/gruntwork-io/gruntwork-cli/errors" "github.com/gruntwork-io/health-checker/options" + "github.com/sirupsen/logrus" ) type TcpCheck struct { @@ -92,6 +93,18 @@ func checkHealthChecks(opts *options.Options) *httpResponse { } } +func (c TcpCheck) ValidateCheck (logger *logrus.Logger) { + if c.Name == "" { + missingRequiredKey("tcp","name", logger) + } + if c.Host == "" { + missingRequiredKey("tcp","host", logger) + } + if c.Port == 0 { + missingRequiredKey("tcp","port", logger) + } +} + func (c TcpCheck) DoCheck (opts *options.Options) error { logger := opts.Logger logger.Infof("Attempting to connect to %s at %s:%d via TCP...", c.Name, c.Host, c.Port) @@ -108,6 +121,21 @@ func (c TcpCheck) DoCheck (opts *options.Options) error { return nil } +func (c HttpCheck) ValidateCheck (logger *logrus.Logger) { + if c.Name == "" { + missingRequiredKey("http","name", logger) + } + if c.Host == "" { + missingRequiredKey("http","host", logger) + } + if c.Port == 0 { + missingRequiredKey("http","port", logger) + } + if len(c.SuccessStatusCodes) == 0 || c.BodyRegex == "" { + missingRequiredKey("http", "success_codes or body_regex", logger) + } +} + func (c HttpCheck) DoCheck (opts *options.Options) error { logger := opts.Logger logger.Infof("Checking %s at %s:%d via HTTP...", c.Name, c.Host, c.Port) @@ -156,6 +184,19 @@ func contains(s []int, e int) bool { return false } +func missingRequiredKey(check string, key string, logger *logrus.Logger) { + logger.Fatalf("Failed to parse YAML: %s check missing required key: %s", check, key) +} + +func (c ScriptCheck) ValidateCheck (logger *logrus.Logger) { + if c.Name == "" { + missingRequiredKey("script","name", logger) + } + if c.Script == "" { + missingRequiredKey("script","script", logger) + } +} + func (c ScriptCheck) DoCheck (opts *options.Options) error { defaultTimeout := 5*time.Second ctx, cancel := context.WithTimeout(context.Background(), defaultTimeout) From e98dd198cdb96a49f3292350f93a8c180774928d Mon Sep 17 00:00:00 2001 From: Sarkis Varozian Date: Sat, 3 Mar 2018 11:22:49 -0800 Subject: [PATCH 17/30] add a default http behavior to pass on 200 if no conditions defined --- README.md | 6 +++--- commands/flags.go | 20 ++++++++++---------- server/server.go | 16 ++++++++++------ 3 files changed, 23 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 8883efc..f4b5511 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ checking more conditions than a just single port or HTTP request while still all ## How It Works -When `health-checker` is started, it will parse a YAML file specified with the `--checks` option (see [examples folder](examples/)) +When `health-checker` is started, it will parse a YAML file specified with the `--config` option (see [examples folder](examples/)) and listen for inbound HTTP requests for any URL on the IP address and port specified by `--listener`. When it receives a request, it will evaluate all checks and return `HTTP 200 OK` if all checks pass. If any of the checks fail, it will return `HTTP 504 GATEWAY TIMEOUT`. @@ -61,13 +61,13 @@ health-checker [options] | Option | Description | Default | ------ | ----------- | ------- | `--listener` | The IP address and port on which inbound HTTP connections will be accepted. | `0.0.0.0:5000` -| `--checks` | A YAML file containing checks which will be evaluated | `health-checks.yml` +| `--config` | A YAML file containing checks which will be evaluated | `health-checks.yml` | `--log-level` | Set the log level to LEVEL. Must be one of: `panic`, `fatal`, `error,` `warning`, `info`, or `debug` | `info` | `--help` | Show the help screen | | | `--version` | Show the program's version | | ``` -health-checker --listener "0.0.0.0:6000" --checks "my-checks.yml" --log-level "warning" +health-checker --listener "0.0.0.0:6000" --config "my-checks.yml" --log-level "warning" ``` ##### Examples diff --git a/commands/flags.go b/commands/flags.go index 22cf067..1a108e1 100644 --- a/commands/flags.go +++ b/commands/flags.go @@ -22,8 +22,8 @@ const DEFAULT_LISTENER_PORT = 5500 const ENV_VAR_NAME_DEBUG_MODE = "HEALTH_CHECKER_DEBUG" -var checksFlag = cli.StringFlag{ - Name: "checks", +var configFlag = cli.StringFlag{ + Name: "config", Usage: fmt.Sprintf("[Required] A YAML file containing health checks. Default: %s", DEFAULT_CHECKS_FILE), Value: DEFAULT_CHECKS_FILE, } @@ -41,7 +41,7 @@ var logLevelFlag = cli.StringFlag{ } var defaultFlags = []cli.Flag{ - checksFlag, + configFlag, listenerFlag, logLevelFlag, } @@ -72,12 +72,12 @@ func parseOptions(cliContext *cli.Context) (*options.Options, error) { return nil, MissingParam(listenerFlag.Name) } - checksFile := cliContext.String("checks") - if checksFile == "" { - return nil, MissingParam(checksFlag.Name) + configFile := cliContext.String("config") + if configFile == "" { + return nil, MissingParam(configFlag.Name) } - checks, err := parseChecks(checksFile, logger) + checks, err := parseChecksFromConfigFile(configFile, logger) if err != nil { return nil, err } @@ -89,8 +89,8 @@ func parseOptions(cliContext *cli.Context) (*options.Options, error) { }, nil } -func parseChecks(checksFile string, logger *logrus.Logger) ([]options.Check, error) { - checksFileContents, err := ioutil.ReadFile(checksFile) +func parseChecksFromConfigFile(configFile string, logger *logrus.Logger) ([]options.Check, error) { + configFileAsByteSlice, err := ioutil.ReadFile(configFile) if err != nil { return nil, err } @@ -98,7 +98,7 @@ func parseChecks(checksFile string, logger *logrus.Logger) ([]options.Check, err var checks Checks var checkSlice []options.Check - err = yaml.Unmarshal(checksFileContents, &checks) + err = yaml.Unmarshal(configFileAsByteSlice, &checks) if err != nil{ return nil, err } diff --git a/server/server.go b/server/server.go index caac041..c2c2604 100644 --- a/server/server.go +++ b/server/server.go @@ -131,9 +131,6 @@ func (c HttpCheck) ValidateCheck (logger *logrus.Logger) { if c.Port == 0 { missingRequiredKey("http","port", logger) } - if len(c.SuccessStatusCodes) == 0 || c.BodyRegex == "" { - missingRequiredKey("http", "success_codes or body_regex", logger) - } } func (c HttpCheck) DoCheck (opts *options.Options) error { @@ -155,9 +152,9 @@ func (c HttpCheck) DoCheck (opts *options.Options) error { // Success! response has one of the success_codes return nil } else { - return gerrors.New(fmt.Sprintf("http status code %s was not one of %v", resp.StatusCode, c.SuccessStatusCodes)) + return gerrors.New(fmt.Sprintf("http check %s wanted one of %v got %d", c.Name, c.SuccessStatusCodes, resp.Status)) } - } else { + } else if c.BodyRegex != ""{ // since no success_codes defined we compare body with body_regex defer resp.Body.Close() body, err := ioutil.ReadAll(resp.Body) @@ -169,7 +166,14 @@ func (c HttpCheck) DoCheck (opts *options.Options) error { // Success! resp body has expected string return nil } else { - return gerrors.New(fmt.Sprintf("expected %s in http body: %s", c.BodyRegex, body)) + return gerrors.New(fmt.Sprintf("http check %s wanted %s in http body got %s", c.Name, c.BodyRegex, body)) + } + } else { + // no success_codes or body_regex defined, only pass on 200 + if resp.StatusCode == http.StatusOK { + return nil + } else { + return gerrors.New(fmt.Sprintf("http check %s wanted status code 200 got %d", c.Name, resp.StatusCode)) } } } From 0321e74b522ec4d39fac37285f40ac0be42e5438 Mon Sep 17 00:00:00 2001 From: Sarkis Varozian Date: Sat, 3 Mar 2018 11:53:39 -0800 Subject: [PATCH 18/30] add more info on configs in README --- README.md | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/README.md b/README.md index f4b5511..c922253 100644 --- a/README.md +++ b/README.md @@ -70,6 +70,54 @@ health-checker [options] health-checker --listener "0.0.0.0:6000" --config "my-checks.yml" --log-level "warning" ``` +#### Configuration + +##### tcp +| Key | Description +| ------- | ----------- +| `name` | (Required) Name of the health check +| `host` | (Required) IP or hostname to check +| `port` | (Required) `port` to check on `host` + +```yaml +tcp: + - name: tcpService1 + host: localhost + port: 5500 +``` + +##### http +| Key | Description +| -------------- | ----------- +| `name` | (Required) Name of the health check +| `host` | (Required) IP or hostname to check +| `port` | (Required) `port` to check on `host` +| `status_codes` | An array of status codes which should PASS health check +| `body_regex` | **Will not be checked if `status_codes` specified** - A string to search for in the body of the response, if found will pass health check + +```yaml +http: + - name: httpService1 + host: 127.0.0.1 + port: 8080 + success_codes: [200, 204, 301, 302] + - name: httpService2 + host: 127.0.0.1 + port: 8081 + body_regex: "healthy" +``` +##### script +| Key | Description +| -------------- | ----------- +| `name` | (Required) Name of the health check +| `script` | (Required) Path to script to run - will PASS if it completes within 5s with a zero exit status + +```yaml +script: + - name: scriptCheck1 + script: /path/to/some/script.sh +``` + ##### Examples See [examples folder](examples/) for more complete `health-checks.yml` examples. From 500434f0191c002465cd18ca314b5ddb482ec539 Mon Sep 17 00:00:00 2001 From: Sarkis Varozian Date: Sat, 3 Mar 2018 12:12:57 -0800 Subject: [PATCH 19/30] add an advanced example --- examples/health-checks.yml.advanced | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 examples/health-checks.yml.advanced diff --git a/examples/health-checks.yml.advanced b/examples/health-checks.yml.advanced new file mode 100644 index 0000000..1fee913 --- /dev/null +++ b/examples/health-checks.yml.advanced @@ -0,0 +1,19 @@ +tcp: + - name: service1 + host: localhost + port: 5500 + - name: service2 + host: 0.0.0.0 + port: 6500 +http: + - name: httpService1 + host: 127.0.0.1 + port: 8080 + success_codes: [200, 204, 301, 302] + - name: httpService2 + host: 127.0.0.1 + port: 8081 + body_regex: "healthy" +script: + - name: scriptCheck1 + script: /path/to/some/script.sh From 2a8369d5adc6ec19a7b5aeb6245bc40b381e2fcf Mon Sep 17 00:00:00 2001 From: Sarkis Varozian Date: Sun, 4 Mar 2018 02:04:15 -0800 Subject: [PATCH 20/30] refactor a bit to make unit testing easier --- commands/flags.go | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/commands/flags.go b/commands/flags.go index 1a108e1..b168246 100644 --- a/commands/flags.go +++ b/commands/flags.go @@ -77,7 +77,12 @@ func parseOptions(cliContext *cli.Context) (*options.Options, error) { return nil, MissingParam(configFlag.Name) } - checks, err := parseChecksFromConfigFile(configFile, logger) + configAsByteSlice, err := parseConfigAsByteSlice(configFile, logger) + if err != nil { + return nil, err + } + + checks, err := parseChecksFromConfig(configAsByteSlice, logger) if err != nil { return nil, err } @@ -89,18 +94,15 @@ func parseOptions(cliContext *cli.Context) (*options.Options, error) { }, nil } -func parseChecksFromConfigFile(configFile string, logger *logrus.Logger) ([]options.Check, error) { - configFileAsByteSlice, err := ioutil.ReadFile(configFile) - if err != nil { - return nil, err - } - +func parseChecksFromConfig(configAsByteSlice []byte, logger *logrus.Logger) ([]options.Check, error) { var checks Checks var checkSlice []options.Check - err = yaml.Unmarshal(configFileAsByteSlice, &checks) - if err != nil{ - return nil, err + // Use UnmarshalStrict to catch any issues in the config, + // such as misspelled keys. + err := yaml.UnmarshalStrict(configAsByteSlice, &checks) + if err != nil { + logger.Fatal(err) } if len(checks.TcpChecks) + len(checks.HttpChecks) + len(checks.ScriptChecks) == 0 { @@ -124,6 +126,14 @@ func parseChecksFromConfigFile(configFile string, logger *logrus.Logger) ([]opti return checkSlice, nil } +func parseConfigAsByteSlice(configFile string, logger *logrus.Logger) ([]byte, error) { + configAsByteSlice, err := ioutil.ReadFile(configFile) + if err != nil { + return nil, err + } + return configAsByteSlice, nil +} + // Some error types are simple enough that we'd rather just show the error message directly instead of vomiting out a // whole stack trace in log output. Therefore, allow a debug mode that always shows full stack traces. Otherwise, show // simple messages. From 4eb333e906838bc1dbf38189a10b558a0b04d5da Mon Sep 17 00:00:00 2001 From: Sarkis Varozian Date: Sun, 4 Mar 2018 02:36:31 -0800 Subject: [PATCH 21/30] error with a message out if bad or empty config file is passed --- commands/flags.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/commands/flags.go b/commands/flags.go index b168246..ec264cd 100644 --- a/commands/flags.go +++ b/commands/flags.go @@ -129,7 +129,9 @@ func parseChecksFromConfig(configAsByteSlice []byte, logger *logrus.Logger) ([]o func parseConfigAsByteSlice(configFile string, logger *logrus.Logger) ([]byte, error) { configAsByteSlice, err := ioutil.ReadFile(configFile) if err != nil { - return nil, err + logger.Fatal(err) + } else if len(configAsByteSlice) == 0 { + logger.Fatalf("config file: %s is empty", configFile) } return configAsByteSlice, nil } From fd09b6ceea4298996ae24d6268e4e377e9d5b2b9 Mon Sep 17 00:00:00 2001 From: Sarkis Varozian Date: Sun, 4 Mar 2018 03:04:23 -0800 Subject: [PATCH 22/30] configurable timeout with a 5s default if not specified --- README.md | 13 ++++++++----- server/server.go | 35 +++++++++++++++++++++++++++-------- 2 files changed, 35 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index c922253..4c425a9 100644 --- a/README.md +++ b/README.md @@ -73,11 +73,12 @@ health-checker --listener "0.0.0.0:6000" --config "my-checks.yml" --log-level "w #### Configuration ##### tcp -| Key | Description -| ------- | ----------- -| `name` | (Required) Name of the health check -| `host` | (Required) IP or hostname to check -| `port` | (Required) `port` to check on `host` +| Key | Description +| --------- | ----------- +| `name` | (Required) Name of the health check +| `host` | (Required) IP or hostname to check +| `port` | (Required) `port` to check on `host` +| `timeout` | Timeout for health check in seconds - if not specified defaults to 5 ```yaml tcp: @@ -92,6 +93,7 @@ tcp: | `name` | (Required) Name of the health check | `host` | (Required) IP or hostname to check | `port` | (Required) `port` to check on `host` +| `timeout` | Timeout for health check in seconds - if not specified defaults to 5 | `status_codes` | An array of status codes which should PASS health check | `body_regex` | **Will not be checked if `status_codes` specified** - A string to search for in the body of the response, if found will pass health check @@ -111,6 +113,7 @@ http: | -------------- | ----------- | `name` | (Required) Name of the health check | `script` | (Required) Path to script to run - will PASS if it completes within 5s with a zero exit status +| `timeout` | Timeout for health check in seconds - if not specified defaults to 5 ```yaml script: diff --git a/server/server.go b/server/server.go index c2c2604..f1ed522 100644 --- a/server/server.go +++ b/server/server.go @@ -18,10 +18,13 @@ import ( "github.com/sirupsen/logrus" ) +const DEFAULT_CHECK_TIMEOUT = 5 + type TcpCheck struct { Name string `yaml:"name"` Host string `yaml:"host"` Port int `yaml:"port"` + Timeout int `yaml:"timeout"` } type HttpCheck struct { @@ -30,11 +33,13 @@ type HttpCheck struct { Port int `yaml:"port"` SuccessStatusCodes []int `yaml:"success_status_codes"` BodyRegex string `yaml:"body_regex"` + Timeout int `yaml:"timeout"` } type ScriptCheck struct { Name string `yaml:"name"` Script string `yaml:"script"` + Timeout int `yaml:"timeout"` } type httpResponse struct { @@ -109,9 +114,13 @@ func (c TcpCheck) DoCheck (opts *options.Options) error { logger := opts.Logger logger.Infof("Attempting to connect to %s at %s:%d via TCP...", c.Name, c.Host, c.Port) - defaultTimeout := time.Second * 5 + timeout := time.Second * DEFAULT_CHECK_TIMEOUT + if c.Timeout != 0 { + // override default with user defined timeout + timeout = time.Second * time.Duration(c.Timeout) + } - conn, err := net.DialTimeout("tcp", fmt.Sprintf("%s:%d", c.Host, c.Port), defaultTimeout) + conn, err := net.DialTimeout("tcp", fmt.Sprintf("%s:%d", c.Host, c.Port), timeout) if err != nil { return err } @@ -137,9 +146,14 @@ func (c HttpCheck) DoCheck (opts *options.Options) error { logger := opts.Logger logger.Infof("Checking %s at %s:%d via HTTP...", c.Name, c.Host, c.Port) - defaultTimeout := time.Second * 5 + timeout := time.Second * DEFAULT_CHECK_TIMEOUT + if c.Timeout != 0 { + // override default with user defined timeout + timeout = time.Second * time.Duration(c.Timeout) + } + client := http.Client{ - Timeout: defaultTimeout, + Timeout: timeout, } resp, err := client.Get(fmt.Sprintf("http://%s:%d", c.Host, c.Port)) if err != nil { @@ -169,7 +183,7 @@ func (c HttpCheck) DoCheck (opts *options.Options) error { return gerrors.New(fmt.Sprintf("http check %s wanted %s in http body got %s", c.Name, c.BodyRegex, body)) } } else { - // no success_codes or body_regex defined, only pass on 200 +s_codes or body_regex defined, only pass on 200 if resp.StatusCode == http.StatusOK { return nil } else { @@ -202,15 +216,20 @@ func (c ScriptCheck) ValidateCheck (logger *logrus.Logger) { } func (c ScriptCheck) DoCheck (opts *options.Options) error { - defaultTimeout := 5*time.Second - ctx, cancel := context.WithTimeout(context.Background(), defaultTimeout) + timeout := time.Second * DEFAULT_CHECK_TIMEOUT + if c.Timeout != 0 { + // override default with user defined timeout + timeout = time.Second * time.Duration(c.Timeout) + } + + ctx, cancel := context.WithTimeout(context.Background(), timeout) defer cancel() cmd := exec.CommandContext(ctx, c.Script) _, err := cmd.Output() if ctx.Err() == context.DeadlineExceeded { // script timed out - return gerrors.New(fmt.Sprintf("check %s at %s FAILED to complete within %ds", c.Name, c.Script, defaultTimeout)) + return gerrors.New(fmt.Sprintf("check %s at %s FAILED to complete within %ds", c.Name, c.Script, timeout)) } if err != nil { return gerrors.New(fmt.Sprintf("check %s at %s FAILED with a non-zero exit code", c.Name, c.Script)) From c62118c3c6fc4e24a496ac033463bd21ffbaebcb Mon Sep 17 00:00:00 2001 From: Sarkis Varozian Date: Sun, 4 Mar 2018 03:06:46 -0800 Subject: [PATCH 23/30] fix syntax error --- server/server.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/server.go b/server/server.go index f1ed522..2159ea1 100644 --- a/server/server.go +++ b/server/server.go @@ -183,7 +183,7 @@ func (c HttpCheck) DoCheck (opts *options.Options) error { return gerrors.New(fmt.Sprintf("http check %s wanted %s in http body got %s", c.Name, c.BodyRegex, body)) } } else { -s_codes or body_regex defined, only pass on 200 + // no success_codes or body_regex defined, only pass on 200 if resp.StatusCode == http.StatusOK { return nil } else { From faa39bddbd4bb809c46d314373ac9fdc691a5011 Mon Sep 17 00:00:00 2001 From: Sarkis Varozian Date: Sun, 4 Mar 2018 12:11:32 -0800 Subject: [PATCH 24/30] remove call to opts here as it will not exist in the if err != nil --- commands/cli.go | 1 - 1 file changed, 1 deletion(-) diff --git a/commands/cli.go b/commands/cli.go index 0a8dfa7..aae40f5 100644 --- a/commands/cli.go +++ b/commands/cli.go @@ -45,7 +45,6 @@ func CreateCli(version string) *cli.App { func runHealthChecker(cliContext *cli.Context) error { opts, err := parseOptions(cliContext) if err != nil && !isDebugMode() { - opts.Logger.Infof("Note: To enable debug mode, set %s to \"true\"", ENV_VAR_NAME_DEBUG_MODE) return err } else if err != nil { return errors.WithStackTrace(err) From 61a145bcd8833b293e8bb7dd03b9938668bd3c41 Mon Sep 17 00:00:00 2001 From: Sarkis Varozian Date: Sun, 4 Mar 2018 12:12:38 -0800 Subject: [PATCH 25/30] add first internal unit test --- commands/flags.go | 19 +++++------ commands/flags_internal_test.go | 59 +++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 10 deletions(-) create mode 100644 commands/flags_internal_test.go diff --git a/commands/flags.go b/commands/flags.go index ec264cd..7fccfc1 100644 --- a/commands/flags.go +++ b/commands/flags.go @@ -77,7 +77,7 @@ func parseOptions(cliContext *cli.Context) (*options.Options, error) { return nil, MissingParam(configFlag.Name) } - configAsByteSlice, err := parseConfigAsByteSlice(configFile, logger) + configAsByteSlice, err := parseConfigToByteSlice(configFile, logger) if err != nil { return nil, err } @@ -94,15 +94,15 @@ func parseOptions(cliContext *cli.Context) (*options.Options, error) { }, nil } -func parseChecksFromConfig(configAsByteSlice []byte, logger *logrus.Logger) ([]options.Check, error) { +func parseChecksFromConfig(configByteSlice []byte, logger *logrus.Logger) ([]options.Check, error) { var checks Checks var checkSlice []options.Check // Use UnmarshalStrict to catch any issues in the config, // such as misspelled keys. - err := yaml.UnmarshalStrict(configAsByteSlice, &checks) + err := yaml.UnmarshalStrict(configByteSlice, &checks) if err != nil { - logger.Fatal(err) + return nil, err } if len(checks.TcpChecks) + len(checks.HttpChecks) + len(checks.ScriptChecks) == 0 { @@ -126,12 +126,10 @@ func parseChecksFromConfig(configAsByteSlice []byte, logger *logrus.Logger) ([]o return checkSlice, nil } -func parseConfigAsByteSlice(configFile string, logger *logrus.Logger) ([]byte, error) { +func parseConfigToByteSlice(configFile string, logger *logrus.Logger) ([]byte, error) { configAsByteSlice, err := ioutil.ReadFile(configFile) - if err != nil { - logger.Fatal(err) - } else if len(configAsByteSlice) == 0 { - logger.Fatalf("config file: %s is empty", configFile) + if err != nil || len(configAsByteSlice) == 0 { + return nil, err } return configAsByteSlice, nil } @@ -157,4 +155,5 @@ type MissingParam string func (paramName MissingParam) Error() string { return fmt.Sprintf("Missing required parameter --%s", string(paramName)) -} \ No newline at end of file +} + diff --git a/commands/flags_internal_test.go b/commands/flags_internal_test.go new file mode 100644 index 0000000..dec9076 --- /dev/null +++ b/commands/flags_internal_test.go @@ -0,0 +1,59 @@ +package commands + +import ( + "strings" + "testing" + + "github.com/gruntwork-io/health-checker/options" + "github.com/gruntwork-io/gruntwork-cli/logging" + "github.com/stretchr/testify/assert" +) + +func TestParseChecksFromConfigWithInvalidOrEmptyConfig(t *testing.T) { + t.Parallel() + + testCases := []struct { + config string + expectedChecks []options.Check + expectedErr string + }{ + { + ``, + nil, + "no checks found", + }, + { + ` `, + nil, + "no checks found", + }, + { + `there is no checks + or even valid + yml here? + -`, + nil, + "unmarshal error", + }, + } + + for _, testCase := range(testCases) { + checks, err := parseChecksFromConfigString(testCase.config) + if testCase.expectedErr != "" && err == nil { + t.Fatalf("Expected error to contain \"%s\" but got checks: %v", testCase.expectedErr, checks) + } + assert.True(t, strings.Contains(err.Error(), testCase.expectedErr)) + } +} + +func parseChecksFromConfigString(configString string) ([]options.Check, error){ + logger := logging.GetLogger("TEST") + configByteSlice := []byte(configString) + + checks, err := parseChecksFromConfig(configByteSlice, logger) + if err != nil { + return nil, err + } + + return checks, nil +} \ No newline at end of file From 03544b5e889fe4c7bf5b7cddc08fc1d167c89972 Mon Sep 17 00:00:00 2001 From: Sarkis Varozian Date: Sun, 4 Mar 2018 12:31:54 -0800 Subject: [PATCH 26/30] test an invalid config --- commands/flags_internal_test.go | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/commands/flags_internal_test.go b/commands/flags_internal_test.go index dec9076..91a670f 100644 --- a/commands/flags_internal_test.go +++ b/commands/flags_internal_test.go @@ -1,11 +1,12 @@ package commands import ( + "fmt" "strings" "testing" - "github.com/gruntwork-io/health-checker/options" "github.com/gruntwork-io/gruntwork-cli/logging" + "github.com/gruntwork-io/health-checker/options" "github.com/stretchr/testify/assert" ) @@ -13,9 +14,9 @@ func TestParseChecksFromConfigWithInvalidOrEmptyConfig(t *testing.T) { t.Parallel() testCases := []struct { - config string + config string expectedChecks []options.Check - expectedErr string + expectedErr string }{ { ``, @@ -35,9 +36,22 @@ func TestParseChecksFromConfigWithInvalidOrEmptyConfig(t *testing.T) { nil, "unmarshal error", }, + { + ` +http: + - name: httpService1 + host: 127.0.0.1 + port: 8080 + success_codes: [200, 204, 301, 302] +invalidkey: + - name: bad + description: this should fail`, + nil, + "unmarshal error", + }, } - for _, testCase := range(testCases) { + for _, testCase := range testCases { checks, err := parseChecksFromConfigString(testCase.config) if testCase.expectedErr != "" && err == nil { t.Fatalf("Expected error to contain \"%s\" but got checks: %v", testCase.expectedErr, checks) @@ -46,7 +60,7 @@ func TestParseChecksFromConfigWithInvalidOrEmptyConfig(t *testing.T) { } } -func parseChecksFromConfigString(configString string) ([]options.Check, error){ +func parseChecksFromConfigString(configString string) ([]options.Check, error) { logger := logging.GetLogger("TEST") configByteSlice := []byte(configString) @@ -56,4 +70,4 @@ func parseChecksFromConfigString(configString string) ([]options.Check, error){ } return checks, nil -} \ No newline at end of file +} From 308c457066660a7118479eb974fff0e355894ede Mon Sep 17 00:00:00 2001 From: Sarkis Varozian Date: Sat, 10 Mar 2018 09:05:42 -0800 Subject: [PATCH 27/30] cleanup tests and use subtests for better organization --- commands/flags.go | 2 +- commands/flags_internal_test.go | 133 ++++++++++++++++++++++++-------- 2 files changed, 100 insertions(+), 35 deletions(-) diff --git a/commands/flags.go b/commands/flags.go index 7fccfc1..8035a2e 100644 --- a/commands/flags.go +++ b/commands/flags.go @@ -46,7 +46,7 @@ var defaultFlags = []cli.Flag{ logLevelFlag, } -// define structure of yaml file +// define structure of config file type Checks struct { TcpChecks []server.TcpCheck `yaml:"tcp"` HttpChecks []server.HttpCheck `yaml:"http"` diff --git a/commands/flags_internal_test.go b/commands/flags_internal_test.go index 91a670f..4d41013 100644 --- a/commands/flags_internal_test.go +++ b/commands/flags_internal_test.go @@ -1,43 +1,45 @@ package commands import ( - "fmt" "strings" "testing" "github.com/gruntwork-io/gruntwork-cli/logging" "github.com/gruntwork-io/health-checker/options" + "github.com/gruntwork-io/health-checker/server" "github.com/stretchr/testify/assert" ) -func TestParseChecksFromConfigWithInvalidOrEmptyConfig(t *testing.T) { +func TestParseChecksFromConfig(t *testing.T) { t.Parallel() - testCases := []struct { - config string - expectedChecks []options.Check - expectedErr string + tt := []struct { + name string + config string + checks []options.Check + err string }{ { - ``, - nil, - "no checks found", + name: "config empty", + config: ``, + err: "no checks found", }, { - ` `, - nil, - "no checks found", + name: "config with only whitespace", + config: ` `, + err: "no checks found", }, { - `there is no checks + name: "config with invalid yaml", + config: `there is no checks or even valid yml here? -`, - nil, - "unmarshal error", + err: "unmarshal error", }, { - ` + name: "config with an unknown key", + config: ` http: - name: httpService1 host: 127.0.0.1 @@ -46,28 +48,91 @@ http: invalidkey: - name: bad description: this should fail`, - nil, - "unmarshal error", + err: "unmarshal error", + }, + { + name: "config with single tcp check", + config: ` +tcp: + - name: service1 + host: 127.0.0.1 + port: 8081`, + checks: []options.Check{ + server.TcpCheck{ + Name: "service1", + Host: "127.0.0.1", + Port: 8081, + }, + }, + }, + { + name: "config with two tcp checks", + config: ` +tcp: + - name: service1 + host: 127.0.0.1 + port: 8080 + timeout: 5 + - name: service2 + host: 0.0.0.0 + port: 8081`, + checks: []options.Check{ + server.TcpCheck{ + Name: "service1", + Host: "127.0.0.1", + Port: 8080, + Timeout: 5, + }, + server.TcpCheck{ + Name: "service2", + Host: "0.0.0.0", + Port: 8081, + }, + }, + }, + { + name: "config with all check types", + config: ` +tcp: + - name: service1 + host: 127.0.0.1 + port: 8080 + timeout: 5 +http: + - name: httpservice1 + host: localhost + port: 80 + success_status_codes: [200, 204, 429] + timeout: 3 + - name: httpservice2 + host: 127.0.0.1 + port: 8081 + body_regex: "test" +script: + - name: script1 + script: /usr/local/bin/foo.sh`, + checks: []options.Check{ + server.TcpCheck{Name: "service1", Host: "127.0.0.1", Port: 8080, Timeout: 5}, + server.HttpCheck{Name: "httpservice1", Host: "localhost", Port: 80, SuccessStatusCodes: []int{200, 204, 429}, Timeout: 3}, + server.HttpCheck{Name: "httpservice2", Host: "127.0.0.1", Port: 8081, BodyRegex: "test"}, + server.ScriptCheck{Name: "script1", Script: "/usr/local/bin/foo.sh"}, + }, }, } - for _, testCase := range testCases { - checks, err := parseChecksFromConfigString(testCase.config) - if testCase.expectedErr != "" && err == nil { - t.Fatalf("Expected error to contain \"%s\" but got checks: %v", testCase.expectedErr, checks) - } - assert.True(t, strings.Contains(err.Error(), testCase.expectedErr)) - } -} + for _, tc := range tt { + t.Run(tc.name, func(t *testing.T) { + logger := logging.GetLogger("TEST") + b := []byte(tc.config) -func parseChecksFromConfigString(configString string) ([]options.Check, error) { - logger := logging.GetLogger("TEST") - configByteSlice := []byte(configString) + checks, err := parseChecksFromConfig(b, logger) + if err != nil && tc.err != "" { + assert.True(t, strings.Contains(err.Error(), tc.err)) + } else if err != nil { + t.Fatalf("unexpected error, got %v", err.Error()) + } - checks, err := parseChecksFromConfig(configByteSlice, logger) - if err != nil { - return nil, err + assert.Equal(t, tc.checks, checks) + }) } - - return checks, nil } From 00fc8e529ab1b3f5f7bc96eff23299f849d90fdc Mon Sep 17 00:00:00 2001 From: Sarkis Varozian Date: Sat, 10 Mar 2018 11:10:13 -0800 Subject: [PATCH 28/30] error cleanup --- commands/flags.go | 28 ++++++---- commands/flags_internal_test.go | 5 +- options/options.go | 2 +- server/server.go | 96 +++++++++++++++++++-------------- 4 files changed, 76 insertions(+), 55 deletions(-) diff --git a/commands/flags.go b/commands/flags.go index 8035a2e..a172143 100644 --- a/commands/flags.go +++ b/commands/flags.go @@ -77,12 +77,12 @@ func parseOptions(cliContext *cli.Context) (*options.Options, error) { return nil, MissingParam(configFlag.Name) } - configAsByteSlice, err := parseConfigToByteSlice(configFile, logger) + b, err := loadBytes(configFile) if err != nil { - return nil, err + return nil, InvalidConfigFile(configFile) } - checks, err := parseChecksFromConfig(configAsByteSlice, logger) + checks, err := parseChecksFromConfig(b) if err != nil { return nil, err } @@ -94,13 +94,13 @@ func parseOptions(cliContext *cli.Context) (*options.Options, error) { }, nil } -func parseChecksFromConfig(configByteSlice []byte, logger *logrus.Logger) ([]options.Check, error) { +func parseChecksFromConfig(b []byte) ([]options.Check, error) { var checks Checks var checkSlice []options.Check // Use UnmarshalStrict to catch any issues in the config, // such as misspelled keys. - err := yaml.UnmarshalStrict(configByteSlice, &checks) + err := yaml.UnmarshalStrict(b, &checks) if err != nil { return nil, err } @@ -120,18 +120,21 @@ func parseChecksFromConfig(configByteSlice []byte, logger *logrus.Logger) ([]opt } for _, check := range checkSlice { - check.ValidateCheck(logger) + err := check.ValidateCheck() + if err != nil { + return nil, err + } } return checkSlice, nil } -func parseConfigToByteSlice(configFile string, logger *logrus.Logger) ([]byte, error) { - configAsByteSlice, err := ioutil.ReadFile(configFile) - if err != nil || len(configAsByteSlice) == 0 { +func loadBytes(filename string) ([]byte, error) { + b, err := ioutil.ReadFile(filename) + if err != nil || len(b) == 0 { return nil, err } - return configAsByteSlice, nil + return b, nil } // Some error types are simple enough that we'd rather just show the error message directly instead of vomiting out a @@ -144,6 +147,11 @@ func isDebugMode() bool { } // Custom error types +type InvalidConfigFile string + +func (configFile InvalidConfigFile) Error() string { + return fmt.Sprintf("Error while parsing config: %s", string(configFile)) +} type InvalidLogLevel string diff --git a/commands/flags_internal_test.go b/commands/flags_internal_test.go index 4d41013..e167d2c 100644 --- a/commands/flags_internal_test.go +++ b/commands/flags_internal_test.go @@ -4,7 +4,6 @@ import ( "strings" "testing" - "github.com/gruntwork-io/gruntwork-cli/logging" "github.com/gruntwork-io/health-checker/options" "github.com/gruntwork-io/health-checker/server" "github.com/stretchr/testify/assert" @@ -122,10 +121,9 @@ script: for _, tc := range tt { t.Run(tc.name, func(t *testing.T) { - logger := logging.GetLogger("TEST") b := []byte(tc.config) - checks, err := parseChecksFromConfig(b, logger) + checks, err := parseChecksFromConfig(b) if err != nil && tc.err != "" { assert.True(t, strings.Contains(err.Error(), tc.err)) } else if err != nil { @@ -136,3 +134,4 @@ script: }) } } + diff --git a/options/options.go b/options/options.go index ddc34c6..8a0d01a 100644 --- a/options/options.go +++ b/options/options.go @@ -6,7 +6,7 @@ import ( type Check interface { DoCheck(*Options) error - ValidateCheck(*logrus.Logger) + ValidateCheck() error } // The options accepted by this CLI tool diff --git a/server/server.go b/server/server.go index 2159ea1..d7c2ee8 100644 --- a/server/server.go +++ b/server/server.go @@ -3,7 +3,6 @@ package server import ( "context" "fmt" - gerrors "errors" "io/ioutil" "net" "net/http" @@ -15,7 +14,6 @@ import ( "github.com/gruntwork-io/gruntwork-cli/errors" "github.com/gruntwork-io/health-checker/options" - "github.com/sirupsen/logrus" ) const DEFAULT_CHECK_TIMEOUT = 5 @@ -98,16 +96,17 @@ func checkHealthChecks(opts *options.Options) *httpResponse { } } -func (c TcpCheck) ValidateCheck (logger *logrus.Logger) { +func (c TcpCheck) ValidateCheck () error { if c.Name == "" { - missingRequiredKey("tcp","name", logger) + return &InvalidCheck{name: "tcp", key: "name"} } if c.Host == "" { - missingRequiredKey("tcp","host", logger) + return &InvalidCheck{name: c.Name, key: "host"} } if c.Port == 0 { - missingRequiredKey("tcp","port", logger) + return &InvalidCheck{name: c.Name, key: "port"} } + return nil } func (c TcpCheck) DoCheck (opts *options.Options) error { @@ -122,7 +121,7 @@ func (c TcpCheck) DoCheck (opts *options.Options) error { conn, err := net.DialTimeout("tcp", fmt.Sprintf("%s:%d", c.Host, c.Port), timeout) if err != nil { - return err + return &CheckFail{name: c.Name, reason: err.Error()} } defer conn.Close() @@ -130,16 +129,17 @@ func (c TcpCheck) DoCheck (opts *options.Options) error { return nil } -func (c HttpCheck) ValidateCheck (logger *logrus.Logger) { +func (c HttpCheck) ValidateCheck () error { if c.Name == "" { - missingRequiredKey("http","name", logger) + return &InvalidCheck{name: "http", key: "name"} } if c.Host == "" { - missingRequiredKey("http","host", logger) + return &InvalidCheck{name: c.Name, key: "host"} } if c.Port == 0 { - missingRequiredKey("http","port", logger) + return &InvalidCheck{name: c.Name, key: "port"} } + return nil } func (c HttpCheck) DoCheck (opts *options.Options) error { @@ -160,36 +160,28 @@ func (c HttpCheck) DoCheck (opts *options.Options) error { return err } - if len(c.SuccessStatusCodes) > 0 { - // when success_codes is defined we only need to check this - if contains(c.SuccessStatusCodes, resp.StatusCode) { - // Success! response has one of the success_codes - return nil - } else { - return gerrors.New(fmt.Sprintf("http check %s wanted one of %v got %d", c.Name, c.SuccessStatusCodes, resp.Status)) + switch { + case len(c.SuccessStatusCodes) > 0: + if !contains(c.SuccessStatusCodes, resp.StatusCode) { + return &CheckFail{name: c.Name, reason: fmt.Sprintf("wanted one of %v, got %d", c.SuccessStatusCodes, resp.Status)} } - } else if c.BodyRegex != ""{ - // since no success_codes defined we compare body with body_regex + case c.BodyRegex != "": defer resp.Body.Close() body, err := ioutil.ReadAll(resp.Body) if err != nil { - return err + return &CheckFail{name: c.Name, reason: "failed reading body"} } - if strings.Contains(string(body), c.BodyRegex) { - // Success! resp body has expected string - return nil - } else { - return gerrors.New(fmt.Sprintf("http check %s wanted %s in http body got %s", c.Name, c.BodyRegex, body)) + if !strings.Contains(string(body), c.BodyRegex) { + return &CheckFail{name: c.Name, reason: fmt.Sprintf("wanted %s in http body, got %s", c.BodyRegex, body)} } - } else { + default: // no success_codes or body_regex defined, only pass on 200 - if resp.StatusCode == http.StatusOK { - return nil - } else { - return gerrors.New(fmt.Sprintf("http check %s wanted status code 200 got %d", c.Name, resp.StatusCode)) + if resp.StatusCode != http.StatusOK { + return &CheckFail{name: c.Name, reason: fmt.Sprintf("wanted status code 200, got %d", resp.StatusCode)} } } + return nil } // TODO: move into helpers @@ -202,17 +194,14 @@ func contains(s []int, e int) bool { return false } -func missingRequiredKey(check string, key string, logger *logrus.Logger) { - logger.Fatalf("Failed to parse YAML: %s check missing required key: %s", check, key) -} - -func (c ScriptCheck) ValidateCheck (logger *logrus.Logger) { +func (c ScriptCheck) ValidateCheck () error { if c.Name == "" { - missingRequiredKey("script","name", logger) + return &InvalidCheck{name: "script", key: "name"} } if c.Script == "" { - missingRequiredKey("script","script", logger) + return &InvalidCheck{name: c.Name, key: "script"} } + return nil } func (c ScriptCheck) DoCheck (opts *options.Options) error { @@ -228,11 +217,10 @@ func (c ScriptCheck) DoCheck (opts *options.Options) error { cmd := exec.CommandContext(ctx, c.Script) _, err := cmd.Output() if ctx.Err() == context.DeadlineExceeded { - // script timed out - return gerrors.New(fmt.Sprintf("check %s at %s FAILED to complete within %ds", c.Name, c.Script, timeout)) + return &CheckTimeout{name: c.Name, timeout: int(timeout)} } if err != nil { - return gerrors.New(fmt.Sprintf("check %s at %s FAILED with a non-zero exit code", c.Name, c.Script)) + return &CheckFail{name: c.Name, reason: "non-zero exit code"} } return nil } @@ -246,3 +234,29 @@ func writeHttpResponse(w http.ResponseWriter, resp *httpResponse) error { return nil } + +// custom error types +type InvalidCheck struct { + name, key string +} + +func (e *InvalidCheck) Error() string { + return fmt.Sprintf("Invalid check: %s, missing key: %s", string(e.name), string(e.key)) +} + +type CheckFail struct { + name, reason string +} + +func (e *CheckFail) Error() string { + return fmt.Sprintf("Check Failed: %s failed due to: %s", string(e.name), string(e.reason)) +} + +type CheckTimeout struct { + name string + timeout int +} + +func (e *CheckTimeout) Error() string { + return fmt.Sprintf("Check Timed Out: %s took longer than configured timeout: %ds", string(e.name), int(e.timeout)) +} From cf2321b73e637439896df2d43052af8140b38e5f Mon Sep 17 00:00:00 2001 From: Sarkis Varozian Date: Sat, 10 Mar 2018 11:50:40 -0800 Subject: [PATCH 29/30] add more logging --- server/server.go | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/server/server.go b/server/server.go index d7c2ee8..84763ae 100644 --- a/server/server.go +++ b/server/server.go @@ -76,10 +76,8 @@ func checkHealthChecks(opts *options.Options) *httpResponse { defer waitGroup.Done() err := check.DoCheck(opts) if err != nil { - logger.Warnf("Check for %s FAILED: %s", check, err) + logger.Warnf(err.Error()) atomic.AddUint64(&failedChecks, 1) - } else { - logger.Infof("Check for %s successful", check) } }(check) } @@ -111,7 +109,7 @@ func (c TcpCheck) ValidateCheck () error { func (c TcpCheck) DoCheck (opts *options.Options) error { logger := opts.Logger - logger.Infof("Attempting to connect to %s at %s:%d via TCP...", c.Name, c.Host, c.Port) + logger.Infof("Checking %s at %s:%d via TCP...", c.Name, c.Host, c.Port) timeout := time.Second * DEFAULT_CHECK_TIMEOUT if c.Timeout != 0 { @@ -125,7 +123,7 @@ func (c TcpCheck) DoCheck (opts *options.Options) error { } defer conn.Close() - + logger.Infof("Check SUCCESS: %s", c.Name) return nil } @@ -157,7 +155,7 @@ func (c HttpCheck) DoCheck (opts *options.Options) error { } resp, err := client.Get(fmt.Sprintf("http://%s:%d", c.Host, c.Port)) if err != nil { - return err + return &CheckFail{name: c.Name, reason: err.Error()} } switch { @@ -181,6 +179,8 @@ func (c HttpCheck) DoCheck (opts *options.Options) error { return &CheckFail{name: c.Name, reason: fmt.Sprintf("wanted status code 200, got %d", resp.StatusCode)} } } + + logger.Infof("Check SUCCESS: %s", c.Name) return nil } @@ -205,6 +205,9 @@ func (c ScriptCheck) ValidateCheck () error { } func (c ScriptCheck) DoCheck (opts *options.Options) error { + logger := opts.Logger + logger.Infof("Checking %s at %s...", c.Name, c.Script) + timeout := time.Second * DEFAULT_CHECK_TIMEOUT if c.Timeout != 0 { // override default with user defined timeout @@ -222,6 +225,7 @@ func (c ScriptCheck) DoCheck (opts *options.Options) error { if err != nil { return &CheckFail{name: c.Name, reason: "non-zero exit code"} } + logger.Infof("Check SUCCESS: %s", c.Name) return nil } @@ -249,7 +253,7 @@ type CheckFail struct { } func (e *CheckFail) Error() string { - return fmt.Sprintf("Check Failed: %s failed due to: %s", string(e.name), string(e.reason)) + return fmt.Sprintf("Check FAILED: %s reason: %s", string(e.name), string(e.reason)) } type CheckTimeout struct { @@ -258,5 +262,5 @@ type CheckTimeout struct { } func (e *CheckTimeout) Error() string { - return fmt.Sprintf("Check Timed Out: %s took longer than configured timeout: %ds", string(e.name), int(e.timeout)) + return fmt.Sprintf("Check TIMEOUT: %s took longer than configured timeout: %ds", string(e.name), int(e.timeout)) } From 59c1e2ac55f8f55f1474c79b59483459180bd179 Mon Sep 17 00:00:00 2001 From: Sarkis Varozian Date: Sat, 10 Mar 2018 11:52:21 -0800 Subject: [PATCH 30/30] rename test file --- commands/{flags_internal_test.go => flags_test.go} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename commands/{flags_internal_test.go => flags_test.go} (100%) diff --git a/commands/flags_internal_test.go b/commands/flags_test.go similarity index 100% rename from commands/flags_internal_test.go rename to commands/flags_test.go