Compare commits

...

5 Commits

Author SHA1 Message Date
Francis Lavoie 6d010189a5 Implement success ratio in health checks 2023-04-15 11:34:09 -04:00
Francis Lavoie 2c61b50b5f Add min_successes 2023-04-15 11:34:09 -04:00
Francis Lavoie c8b8c3a7b2 Add min_success_ratio WIP 2023-04-15 11:34:09 -04:00
Francis Lavoie c4b934f232 Add caddyhttp.Ratio type 2023-04-15 11:34:09 -04:00
Francis Lavoie cf69cd7b27 Add success_duration 2023-04-15 11:34:09 -04:00
7 changed files with 307 additions and 3 deletions
+78 -1
View File
@@ -17,6 +17,7 @@ package caddyhttp
import ( import (
"bytes" "bytes"
"encoding/json" "encoding/json"
"fmt"
"io" "io"
"net" "net"
"net/http" "net/http"
@@ -164,7 +165,7 @@ func (ws *WeakString) UnmarshalJSON(b []byte) error {
return nil return nil
} }
// MarshalJSON marshals was a boolean if true or false, // MarshalJSON marshals as a boolean if true or false,
// a number if an integer, or a string otherwise. // a number if an integer, or a string otherwise.
func (ws WeakString) MarshalJSON() ([]byte, error) { func (ws WeakString) MarshalJSON() ([]byte, error) {
if ws == "true" { if ws == "true" {
@@ -204,6 +205,82 @@ func (ws WeakString) String() string {
return string(ws) return string(ws)
} }
// Ratio is a type that unmarshals a valid numerical ratio string.
// Valid formats are:
// - a/b as a fraction (a / b)
// - a:b as a ratio (a / a+b)
// - a floating point number
type Ratio float64
// UnmarshalJSON satisfies json.Unmarshaler according to
// this type's documentation.
func (r *Ratio) UnmarshalJSON(b []byte) error {
if len(b) == 0 {
return io.EOF
}
if b[0] == byte('"') && b[len(b)-1] == byte('"') {
if !strings.Contains(string(b), "/") && !strings.Contains(string(b), ":") {
return fmt.Errorf("ratio string '%s' did not contain a slash '/' or colon ':'", string(b[1:len(b)-1]))
}
if strings.Contains(string(b), "/") {
left, right, _ := strings.Cut(string(b[1:len(b)-1]), "/")
num, err := strconv.Atoi(left)
if err != nil {
return fmt.Errorf("failed parsing numerator as integer %s: %v", left, err)
}
denom, err := strconv.Atoi(right)
if err != nil {
return fmt.Errorf("failed parsing denominator as integer %s: %v", right, err)
}
*r = Ratio(float64(num) / float64(denom))
return nil
}
if strings.Contains(string(b), ":") {
left, right, _ := strings.Cut(string(b[1:len(b)-1]), ":")
num, err := strconv.Atoi(left)
if err != nil {
return fmt.Errorf("failed parsing numerator as integer %s: %v", left, err)
}
denom, err := strconv.Atoi(right)
if err != nil {
return fmt.Errorf("failed parsing denominator as integer %s: %v", right, err)
}
*r = Ratio(float64(num) / (float64(num) + float64(denom)))
return nil
}
return fmt.Errorf("invalid ratio string '%s'", string(b[1:len(b)-1]))
}
if bytes.Equal(b, []byte("null")) {
return nil
}
float, err := strconv.ParseFloat(string(b), 64)
if err != nil {
return fmt.Errorf("failed parsing ratio as float %s: %v", b, err)
}
*r = Ratio(float)
return nil
}
func ParseRatio(r string) (Ratio, error) {
if strings.Contains(r, "/") {
left, right, _ := strings.Cut(r, "/")
num, err := strconv.Atoi(left)
if err != nil {
return 0, fmt.Errorf("failed parsing numerator as integer %s: %v", left, err)
}
denom, err := strconv.Atoi(right)
if err != nil {
return 0, fmt.Errorf("failed parsing denominator as integer %s: %v", right, err)
}
return Ratio(float64(num) / float64(denom)), nil
}
float, err := strconv.ParseFloat(r, 64)
if err != nil {
return 0, fmt.Errorf("failed parsing ratio as float %s: %v", r, err)
}
return Ratio(float), nil
}
// StatusCodeMatches returns true if a real HTTP status code matches // StatusCodeMatches returns true if a real HTTP status code matches
// the configured status code, which may be either a real HTTP status // the configured status code, which may be either a real HTTP status
// code or an integer representing a class of codes (e.g. 4 for all // code or an integer representing a class of codes (e.g. 4 for all
+76
View File
@@ -149,3 +149,79 @@ func TestCleanPath(t *testing.T) {
} }
} }
} }
func TestUnmarshalRatio(t *testing.T) {
for i, tc := range []struct {
input []byte
expect float64
errMsg string
}{
{
input: []byte("null"),
expect: 0,
},
{
input: []byte(`"1/3"`),
expect: float64(1) / float64(3),
},
{
input: []byte(`"1/100"`),
expect: float64(1) / float64(100),
},
{
input: []byte(`"3:2"`),
expect: 0.6,
},
{
input: []byte(`"99:1"`),
expect: 0.99,
},
{
input: []byte(`"1/100"`),
expect: float64(1) / float64(100),
},
{
input: []byte(`0.1`),
expect: 0.1,
},
{
input: []byte(`0.005`),
expect: 0.005,
},
{
input: []byte(`0`),
expect: 0,
},
{
input: []byte(`"0"`),
errMsg: `ratio string '0' did not contain a slash '/' or colon ':'`,
},
{
input: []byte(`a`),
errMsg: `failed parsing ratio as float a: strconv.ParseFloat: parsing "a": invalid syntax`,
},
{
input: []byte(`"a/1"`),
errMsg: `failed parsing numerator as integer a: strconv.Atoi: parsing "a": invalid syntax`,
},
{
input: []byte(`"1/a"`),
errMsg: `failed parsing denominator as integer a: strconv.Atoi: parsing "a": invalid syntax`,
},
} {
ratio := Ratio(0)
err := ratio.UnmarshalJSON(tc.input)
if err != nil {
if tc.errMsg != "" {
if tc.errMsg != err.Error() {
t.Fatalf("Test %d: expected error: %v, got: %v", i, tc.errMsg, err)
}
continue
}
t.Fatalf("Test %d: invalid ratio: %v", i, err)
}
if ratio != Ratio(tc.expect) {
t.Fatalf("Test %d: expected %v, got %v", i, tc.expect, ratio)
}
}
}
+2
View File
@@ -37,6 +37,7 @@ type upstreamStatus struct {
Address string `json:"address"` Address string `json:"address"`
NumRequests int `json:"num_requests"` NumRequests int `json:"num_requests"`
Fails int `json:"fails"` Fails int `json:"fails"`
Successes int `json:"successes"`
} }
// CaddyModule returns the Caddy module information. // CaddyModule returns the Caddy module information.
@@ -99,6 +100,7 @@ func (adminUpstreams) handleUpstreams(w http.ResponseWriter, r *http.Request) er
Address: address, Address: address,
NumRequests: upstream.NumRequests(), NumRequests: upstream.NumRequests(),
Fails: upstream.Fails(), Fails: upstream.Fails(),
Successes: upstream.Successes(),
}) })
return true return true
}) })
@@ -77,6 +77,9 @@ func parseCaddyfile(h httpcaddyfile.Helper) (caddyhttp.MiddlewareHandler, error)
// # passive health checking // # passive health checking
// fail_duration <duration> // fail_duration <duration>
// max_fails <num> // max_fails <num>
// success_duration <duration>
// min_success_ratio <ratio>
// min_success <num>
// unhealthy_status <status> // unhealthy_status <status>
// unhealthy_latency <duration> // unhealthy_latency <duration>
// unhealthy_request_count <num> // unhealthy_request_count <num>
@@ -422,6 +425,54 @@ func (h *Handler) UnmarshalCaddyfile(d *caddyfile.Dispenser) error {
} }
h.HealthChecks.Passive.MaxFails = maxFails h.HealthChecks.Passive.MaxFails = maxFails
case "success_duration":
if !d.NextArg() {
return d.ArgErr()
}
if h.HealthChecks == nil {
h.HealthChecks = new(HealthChecks)
}
if h.HealthChecks.Passive == nil {
h.HealthChecks.Passive = new(PassiveHealthChecks)
}
dur, err := caddy.ParseDuration(d.Val())
if err != nil {
return d.Errf("bad duration value '%s': %v", d.Val(), err)
}
h.HealthChecks.Passive.SuccessDuration = caddy.Duration(dur)
case "min_success_ratio":
if !d.NextArg() {
return d.ArgErr()
}
if h.HealthChecks == nil {
h.HealthChecks = new(HealthChecks)
}
if h.HealthChecks.Passive == nil {
h.HealthChecks.Passive = new(PassiveHealthChecks)
}
ratio, err := caddyhttp.ParseRatio(d.Val())
if err != nil {
return d.Errf("bad ratio value '%s': %v", d.Val(), err)
}
h.HealthChecks.Passive.MinSuccessRatio = ratio
case "min_successes":
if !d.NextArg() {
return d.ArgErr()
}
if h.HealthChecks == nil {
h.HealthChecks = new(HealthChecks)
}
if h.HealthChecks.Passive == nil {
h.HealthChecks.Passive = new(PassiveHealthChecks)
}
count, err := strconv.Atoi(d.Val())
if err != nil {
return d.Errf("invalid minimum success count '%s': %v", d.Val(), err)
}
h.HealthChecks.Passive.MinSuccesses = count
case "fail_duration": case "fail_duration":
if !d.NextArg() { if !d.NextArg() {
return d.ArgErr() return d.ArgErr()
+68 -2
View File
@@ -110,8 +110,8 @@ type ActiveHealthChecks struct {
// health checks (that is, health checks which occur during // health checks (that is, health checks which occur during
// the normal flow of request proxying). // the normal flow of request proxying).
type PassiveHealthChecks struct { type PassiveHealthChecks struct {
// How long to remember a failed request to a backend. A duration > 0 // How long to remember a failed request to a backend.
// enables passive health checking. Default is 0. // A duration > 0 enables passive health checking. Default is 0.
FailDuration caddy.Duration `json:"fail_duration,omitempty"` FailDuration caddy.Duration `json:"fail_duration,omitempty"`
// The number of failed requests within the FailDuration window to // The number of failed requests within the FailDuration window to
@@ -119,6 +119,22 @@ type PassiveHealthChecks struct {
// that FailDuration be > 0. // that FailDuration be > 0.
MaxFails int `json:"max_fails,omitempty"` MaxFails int `json:"max_fails,omitempty"`
// How long to remember a successful request to a backend. Default is 0.
SuccessDuration caddy.Duration `json:"success_duration,omitempty"`
// The minimum ratio of successful to failed requests necessary to
// consider a backend as healthy. Both fail and success durations
// must be configured for those stats to be counted. Default is 0 (no ratio).
MinSuccessRatio caddyhttp.Ratio `json:"min_success_ratio,omitempty"`
// The minimum number of successful requests before considering the
// minimum success ratio. Default is 5. Requires MinSuccessRatio >= 0.
//
// If there are less than this many successful requests, then the ratio is
// ignored, because of a lack of data. This ensures that the upstream isn't
// prematurely considered unhealthy because no requests have happened yet.
MinSuccesses int `json:"min_successes,omitempty"`
// Limits the number of simultaneous requests to a backend by // Limits the number of simultaneous requests to a backend by
// marking the backend as "down" if it has this many concurrent // marking the backend as "down" if it has this many concurrent
// requests or more. // requests or more.
@@ -362,6 +378,56 @@ func (h *Handler) doActiveHealthCheck(dialInfo DialInfo, hostAddr string, upstre
return nil return nil
} }
// countSuccess is used with passive health checks. It
// remembers 1 success for upstream for the configured
// duration. If passive health checks are disabled or
// success expiry is 0, this is a no-op.
func (h *Handler) countSuccess(upstream *Upstream) {
// only count successes if passive health checking is enabled
// and if successes are configured have a non-zero expiry
if h.HealthChecks == nil || h.HealthChecks.Passive == nil {
return
}
successDuration := time.Duration(h.HealthChecks.Passive.SuccessDuration)
if successDuration == 0 {
return
}
// count success immediately
err := upstream.Host.countSuccess(1)
if err != nil {
h.HealthChecks.Passive.logger.Error("could not count success",
zap.String("host", upstream.Dial),
zap.Error(err))
return
}
// forget it later
go func(host *Host, successDuration time.Duration) {
defer func() {
if err := recover(); err != nil {
h.HealthChecks.Passive.logger.Error("passive health check success forgetter panicked",
zap.Any("error", err),
zap.ByteString("stack", debug.Stack()))
}
}()
timer := time.NewTimer(successDuration)
select {
case <-h.ctx.Done():
if !timer.Stop() {
<-timer.C
}
case <-timer.C:
}
err := host.countSuccess(-1)
if err != nil {
h.HealthChecks.Passive.logger.Error("could not forget success",
zap.String("host", upstream.Dial),
zap.Error(err))
}
}(upstream.Host, successDuration)
}
// countFailure is used with passive health checks. It // countFailure is used with passive health checks. It
// remembers 1 failure for upstream for the configured // remembers 1 failure for upstream for the configured
// duration. If passive health checks are disabled or // duration. If passive health checks are disabled or
+25
View File
@@ -84,6 +84,15 @@ func (u *Upstream) Healthy() bool {
if healthy && u.healthCheckPolicy != nil { if healthy && u.healthCheckPolicy != nil {
healthy = u.Host.Fails() < u.healthCheckPolicy.MaxFails healthy = u.Host.Fails() < u.healthCheckPolicy.MaxFails
} }
if healthy && u.healthCheckPolicy != nil &&
u.healthCheckPolicy.MinSuccessRatio > 0 {
successes := u.Host.Successes()
if successes >= u.healthCheckPolicy.MinSuccesses {
fails := u.Host.Fails()
healthRatio := float64(fails) / float64(successes)
healthy = healthRatio < (1 - float64(u.healthCheckPolicy.MinSuccessRatio))
}
}
if healthy && u.cb != nil { if healthy && u.cb != nil {
healthy = u.cb.OK() healthy = u.cb.OK()
} }
@@ -136,6 +145,7 @@ func (u *Upstream) fillHost() {
// Its fields are accessed atomically and Host values must not be copied. // Its fields are accessed atomically and Host values must not be copied.
type Host struct { type Host struct {
numRequests int64 // must be 64-bit aligned on 32-bit systems (see https://golang.org/pkg/sync/atomic/#pkg-note-BUG) numRequests int64 // must be 64-bit aligned on 32-bit systems (see https://golang.org/pkg/sync/atomic/#pkg-note-BUG)
successes int64
fails int64 fails int64
} }
@@ -144,6 +154,11 @@ func (h *Host) NumRequests() int {
return int(atomic.LoadInt64(&h.numRequests)) return int(atomic.LoadInt64(&h.numRequests))
} }
// Successes returns the number of recent successes with the upstream.
func (h *Host) Successes() int {
return int(atomic.LoadInt64(&h.successes))
}
// Fails returns the number of recent failures with the upstream. // Fails returns the number of recent failures with the upstream.
func (h *Host) Fails() int { func (h *Host) Fails() int {
return int(atomic.LoadInt64(&h.fails)) return int(atomic.LoadInt64(&h.fails))
@@ -159,6 +174,16 @@ func (h *Host) countRequest(delta int) error {
return nil return nil
} }
// countSuccess mutates the recent successes count by
// delta. It returns an error if the adjustment fails.
func (h *Host) countSuccess(delta int) error {
result := atomic.AddInt64(&h.successes, int64(delta))
if result < 0 {
return fmt.Errorf("count below 0: %d", result)
}
return nil
}
// countFail mutates the recent failures count by // countFail mutates the recent failures count by
// delta. It returns an error if the adjustment fails. // delta. It returns an error if the adjustment fails.
func (h *Host) countFail(delta int) error { func (h *Host) countFail(delta int) error {
@@ -352,6 +352,10 @@ func (h *Handler) Provision(ctx caddy.Context) error {
if h.HealthChecks.Passive.FailDuration > 0 && h.HealthChecks.Passive.MaxFails == 0 { if h.HealthChecks.Passive.FailDuration > 0 && h.HealthChecks.Passive.MaxFails == 0 {
h.HealthChecks.Passive.MaxFails = 1 h.HealthChecks.Passive.MaxFails = 1
} }
if h.HealthChecks.Passive.MinSuccessRatio > 0 && h.HealthChecks.Passive.MinSuccesses == 0 {
h.HealthChecks.Passive.MinSuccesses = 5
}
} }
// if active health checks are enabled, configure them and start a worker // if active health checks are enabled, configure them and start a worker
@@ -562,6 +566,7 @@ func (h *Handler) proxyLoopIteration(r *http.Request, origReq *http.Request, w h
repl.Set("http.reverse_proxy.upstream.port", dialInfo.Port) repl.Set("http.reverse_proxy.upstream.port", dialInfo.Port)
repl.Set("http.reverse_proxy.upstream.requests", upstream.Host.NumRequests()) repl.Set("http.reverse_proxy.upstream.requests", upstream.Host.NumRequests())
repl.Set("http.reverse_proxy.upstream.max_requests", upstream.MaxRequests) repl.Set("http.reverse_proxy.upstream.max_requests", upstream.MaxRequests)
repl.Set("http.reverse_proxy.upstream.successes", upstream.Host.Successes())
repl.Set("http.reverse_proxy.upstream.fails", upstream.Host.Fails()) repl.Set("http.reverse_proxy.upstream.fails", upstream.Host.Fails())
// mutate request headers according to this upstream; // mutate request headers according to this upstream;
@@ -580,6 +585,7 @@ func (h *Handler) proxyLoopIteration(r *http.Request, origReq *http.Request, w h
if proxyErr == nil || errors.Is(proxyErr, context.Canceled) { if proxyErr == nil || errors.Is(proxyErr, context.Canceled) {
// context.Canceled happens when the downstream client // context.Canceled happens when the downstream client
// cancels the request, which is not our failure // cancels the request, which is not our failure
h.countSuccess(upstream)
return true, nil return true, nil
} }
@@ -588,6 +594,7 @@ func (h *Handler) proxyLoopIteration(r *http.Request, origReq *http.Request, w h
// occur after the roundtrip if, for example, a response handler // occur after the roundtrip if, for example, a response handler
// after the roundtrip returns an error) // after the roundtrip returns an error)
if succ, ok := proxyErr.(roundtripSucceeded); ok { if succ, ok := proxyErr.(roundtripSucceeded); ok {
h.countSuccess(upstream)
return true, succ.error return true, succ.error
} }