Skip to content

Commit

Permalink
Merge pull request #43 from InVisionApp/feature/add-check-callback
Browse files Browse the repository at this point in the history
Feature/add check callback
  • Loading branch information
dselans authored May 17, 2018
2 parents 2e75277 + 9a63c91 commit 08eeb4f
Show file tree
Hide file tree
Showing 7 changed files with 364 additions and 22 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ This library:
* Is test-friendly
+ Provides an easy way to disable dependency health checking.
+ Uses an interface for its dependencies, allowing you to insert fakes/mocks at test time.
* Allows you to trigger listener functions when a health check fails or recovers. **[3]**

**[1]** Make sure to run your checks on a "sane" interval - ie. if you are checking your
Redis dependency once every five minutes, your service is essentially running _blind_
Expand All @@ -53,6 +54,8 @@ you to query that data via `.State()`. Alternatively, you can use one of the
pre-built HTTP handlers for your `/healthcheck` endpoint (and thus not have to
manually inspect the state data).

**[3]** By utilizing an implementation of the `IStatusListener` interface

## Example

For _full_ examples, look through the [examples dir](examples/)
Expand Down Expand Up @@ -122,6 +125,7 @@ output would look something like this:
## Additional Documentation
* [Examples](/examples)
* [Status Listeners](/examples/status-listener)
* [Checkers](/checkers)
* [Logging](/loggers)
Expand Down
19 changes: 19 additions & 0 deletions examples/status-listener/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
## Status Listener Example

The `IStatusListener` interface allows you to hook into health check failures and recoveries as they occur. This example runs a dependency service, `dependency.go`, and a dependent service, `service.go`.

The web server in `dependency.go` will return a `200` status code for 10 contiguous requests, then return `500` for 5 contiguous requests. The request cycle then resets.

The web server in `service.go` uses go-health to check the dependency server. It also uses an implementation of `IStatusListener`, which includes functions

* `HealthCheckFailed(entry *health.State)`
* `HealthCheckRecovered(entry *health.State, recordedFailures int64, failureDurationSeconds float64)`

The function `HealthCheckFailed` is triggered when the health check fails for the first time. A count of contiguous failures will be kept until the dependency recovers. Once the dependency does recover, the function `HealthCheckRecovered` is triggered, which reports how many healthchecks failed, as well as how long (in seconds) the dependency was in an unhealthy state.



### To run example

Within the project folder `/examples/status_listener/dependency`, run `go run dependency.go` on one terminal window, then from within `/examples/status_listener/service`, run `go run service.go` in another. You will observe the requests to the dependency, as well as the triggering and recovery of failed health checks.

62 changes: 62 additions & 0 deletions examples/status-listener/dependency/dependency.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
package main

import (
"log"
"net/http"
"os"
"sync"
)

// mischief
type loki struct {
sync.Mutex
callcount int
}

// this is just a function that will return true
// the last 5 out of every 15 times called
func (l *loki) shouldBreakThings() bool {
l.Lock()
defer l.Unlock()
l.callcount++
if l.callcount > 15 {
l.callcount = 0
return false
}
if l.callcount > 10 {
return true
}

return false
}

var (
l *loki
logger *log.Logger
)

func init() {
l = &loki{}
logger = log.New(os.Stderr, "dependency: ", 0)
}

func handleRequest(rw http.ResponseWriter, r *http.Request) {
// ignore favicon
if r.URL.Path == "/favicon.ico" {
rw.WriteHeader(http.StatusOK)
return
}
if l.shouldBreakThings() {
logger.Print("👎")
rw.WriteHeader(http.StatusInternalServerError)
return
}
logger.Print("👍")
rw.WriteHeader(http.StatusOK)
return
}

func main() {
http.HandleFunc("/", handleRequest)
http.ListenAndServe("0.0.0.0:8081", nil)
}
70 changes: 70 additions & 0 deletions examples/status-listener/service/service.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package main

import (
"log"
"net/http"
"net/url"
"os"
"time"

"github.com/InVisionApp/go-health"
"github.com/InVisionApp/go-health/checkers"
"github.com/InVisionApp/go-health/handlers"
)

var svcLogger *log.Logger

// HealthCheckStatusListener is the implementation of the IStatusListener interface
type HealthCheckStatusListener struct{}

// HealthCheckFailed is triggered when a health check fails the first time
func (sl *HealthCheckStatusListener) HealthCheckFailed(entry *health.State) {
svcLogger.Printf("State for failed health check: %+v", entry)
}

// HealthCheckRecovered is triggered when a health check recovers
func (sl *HealthCheckStatusListener) HealthCheckRecovered(entry *health.State, recordedFailures int64, failureDurationSeconds float64) {
svcLogger.Printf("Recovering from %d contiguous errors, lasting %1.2f seconds: %+v", recordedFailures, failureDurationSeconds, entry)
}

func init() {
svcLogger = log.New(os.Stderr, "service: ", 0)
}

func main() {
// Create a new health instance
h := health.New()
// disable logging from health lib
h.DisableLogging()
testURL, _ := url.Parse("http://0.0.0.0:8081")

// Create a couple of checks
httpCheck, _ := checkers.NewHTTP(&checkers.HTTPConfig{
URL: testURL,
})

// Add the checks to the health instance
h.AddChecks([]*health.Config{
{
Name: "dependency-check",
Checker: httpCheck,
Interval: time.Duration(2) * time.Second,
Fatal: true,
},
})

// set status listener
sl := &HealthCheckStatusListener{}
h.StatusListener = sl

// Start the healthcheck process
if err := h.Start(); err != nil {
svcLogger.Fatalf("Unable to start healthcheck: %v", err)
}

svcLogger.Println("Server listening on :8080")

// Define a healthcheck endpoint and use the built-in JSON handler
http.HandleFunc("/healthcheck", handlers.NewJSONHandlerFunc(h, nil))
http.ListenAndServe(":8080", nil)
}
1 change: 1 addition & 0 deletions handlers/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ func writeJSONStatus(rw http.ResponseWriter, status, message string, statusCode

func writeJSONResponse(rw http.ResponseWriter, statusCode int, content []byte) {
rw.Header().Set("Content-Type", "application/json")
rw.Header().Set("Content-Length", fmt.Sprintf("%d", len(content)))
rw.WriteHeader(statusCode)
rw.Write(content)
}
120 changes: 100 additions & 20 deletions health.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ package health

import (
"errors"
"fmt"
"sync"
"time"

Expand Down Expand Up @@ -42,7 +41,7 @@ type IHealth interface {
Failed() bool
}

// The ICheckable interface is implemented by a number of bundled checkers such
// ICheckable is an interface implemented by a number of bundled checkers such
// as "MySQLChecker", "RedisChecker" and "HTTPChecker". By implementing the
// interface, you can feed your own custom checkers into the health library.
type ICheckable interface {
Expand All @@ -52,30 +51,77 @@ type ICheckable interface {
Status() (interface{}, error)
}

// The Config struct is used for defining and configuring checks.
// IStatusListener is an interface that handles health check failures and
// recoveries, primarily for stats recording purposes
type IStatusListener interface {
// HealthCheckFailed is a function that handles the failure of a health
// check event. This function is called when a health check state
// transitions from passing to failing.
// * entry - The recorded state of the health check that triggered the failure
HealthCheckFailed(entry *State)

// HealthCheckRecovered is a function that handles the recovery of a failed
// health check.
// * entry - The recorded state of the health check that triggered the recovery
// * recordedFailures - the total failed health checks that lapsed
// between the failure and recovery
// * failureDurationSeconds - the lapsed time, in seconds, of the recovered failure
HealthCheckRecovered(entry *State, recordedFailures int64, failureDurationSeconds float64)
}

// Config is a struct used for defining and configuring checks.
type Config struct {
Name string
Checker ICheckable
// Name of the check
Name string

// Checker instance used to perform health check
Checker ICheckable

// Interval between health checks
Interval time.Duration
Fatal bool

// Fatal marks a failing health check so that the
// entire health check request fails with a 500 error
Fatal bool
}

// The State struct contains the results of the latest run of a particular check.
// State is a struct that contains the results of the latest
// run of a particular check.
type State struct {
Name string `json:"name"`
Status string `json:"status"`
Err string `json:"error,omitempty"`
Details interface{} `json:"details,omitempty"` // contains JSON message (that can be marshaled)
CheckTime time.Time `json:"check_time"`
// Name of the health check
Name string `json:"name"`

// Status of the health check state ("ok" or "failed")
Status string `json:"status"`

// Err is the error returned from a failed health check
Err string `json:"error,omitempty"`

// Details contains more contextual detail about a
// failing health check.
Details interface{} `json:"details,omitempty"` // contains JSON message (that can be marshaled)

// CheckTime is the time of the last health check
CheckTime time.Time `json:"check_time"`

ContiguousFailures int64 `json:"num_failures"` // the number of failures that occurred in a row
TimeOfFirstFailure time.Time `json:"first_failure_at"` // the time of the initial transitional failure for any given health check
}

// indicates state is failure
func (s *State) isFailure() bool {
return s.Status == "failed"
}

// Health contains internal go-health internal structures.
type Health struct {
Logger log.Logger

active *sBool // indicates whether the healthcheck is actively running
failed *sBool // indicates whether the healthcheck has encountered a fatal error in one of its deps
// StatusListener will report failures and recoveries
StatusListener IStatusListener

active *sBool // indicates whether the healthcheck is actively running
failed *sBool // indicates whether the healthcheck has encountered a fatal error in one of its deps
configs []*Config
states map[string]State
statesLock sync.Mutex
Expand Down Expand Up @@ -139,9 +185,7 @@ func (h *Health) Start() error {
ticker := time.NewTicker(c.Interval)
stop := make(chan struct{})

if err := h.startRunner(c, ticker, stop); err != nil {
return fmt.Errorf("Unable to create healthcheck runner '%v': %v", c.Name, err)
}
h.startRunner(c, ticker, stop)

h.runners[c.Name] = stop
}
Expand Down Expand Up @@ -191,7 +235,7 @@ func (h *Health) Failed() bool {
return h.failed.val()
}

func (h *Health) startRunner(cfg *Config, ticker *time.Ticker, stop <-chan struct{}) error {
func (h *Health) startRunner(cfg *Config, ticker *time.Ticker, stop <-chan struct{}) {

// function to execute and collect check data
checkFunc := func() {
Expand Down Expand Up @@ -242,25 +286,61 @@ func (h *Health) startRunner(cfg *Config, ticker *time.Ticker, stop <-chan struc

h.Logger.WithFields(log.Fields{"name": cfg.Name}).Debug("Checker exiting")
}()

return nil
}

// resets the states in a concurrency-safe manner
func (h *Health) safeResetStates() {
h.statesLock.Lock()
defer h.statesLock.Unlock()
h.states = make(map[string]State, 0)
}

// updates the check state in a concurrency-safe manner
func (h *Health) safeUpdateState(stateEntry *State) {
// dispatch any status listeners
h.handleStatusListener(stateEntry)

// update states here
h.statesLock.Lock()
defer h.statesLock.Unlock()

h.states[stateEntry.Name] = *stateEntry
}

// get all states in a concurrency-safe manner
func (h *Health) safeGetStates() map[string]State {
h.statesLock.Lock()
defer h.statesLock.Unlock()
return h.states
}

// if a status listener is attached
func (h *Health) handleStatusListener(stateEntry *State) {
// get the previous state
h.statesLock.Lock()
prevState := h.states[stateEntry.Name]
h.statesLock.Unlock()

// state is failure
if stateEntry.isFailure() {
if !prevState.isFailure() {
// new failure: previous state was ok
if h.StatusListener != nil {
go h.StatusListener.HealthCheckFailed(stateEntry)
}

stateEntry.TimeOfFirstFailure = time.Now()
} else {
// carry the time of first failure from the previous state
stateEntry.TimeOfFirstFailure = prevState.TimeOfFirstFailure
}
stateEntry.ContiguousFailures = prevState.ContiguousFailures + 1
} else if prevState.isFailure() {
// recovery, previous state was failure
failureSeconds := time.Now().Sub(prevState.TimeOfFirstFailure).Seconds()

if h.StatusListener != nil {
go h.StatusListener.HealthCheckRecovered(stateEntry, prevState.ContiguousFailures, failureSeconds)
}
}
}
Loading

0 comments on commit 08eeb4f

Please sign in to comment.