Compare commits

...

5 Commits

Author SHA1 Message Date
Francis Lavoie 6d010189a5 Implement success ratio in health checks 2023-04-15 11:34:09 -04:00
Francis Lavoie 2c61b50b5f Add min_successes 2023-04-15 11:34:09 -04:00
Francis Lavoie c8b8c3a7b2 Add min_success_ratio WIP 2023-04-15 11:34:09 -04:00
Francis Lavoie c4b934f232 Add caddyhttp.Ratio type 2023-04-15 11:34:09 -04:00
Francis Lavoie cf69cd7b27 Add success_duration 2023-04-15 11:34:09 -04:00
7 changed files with 307 additions and 3 deletions
+78 -1
View File
@@ -17,6 +17,7 @@ package caddyhttp
import (
"bytes"
"encoding/json"
"fmt"
"io"
"net"
"net/http"
@@ -164,7 +165,7 @@ func (ws *WeakString) UnmarshalJSON(b []byte) error {
return nil
}
// MarshalJSON marshals was a boolean if true or false,
// MarshalJSON marshals as a boolean if true or false,
// a number if an integer, or a string otherwise.
func (ws WeakString) MarshalJSON() ([]byte, error) {
if ws == "true" {
@@ -204,6 +205,82 @@ func (ws WeakString) String() string {
return string(ws)
}
// Ratio is a type that unmarshals a valid numerical ratio string.
// Valid formats are:
// - a/b as a fraction (a / b)
// - a:b as a ratio (a / a+b)
// - a floating point number
type Ratio float64
// UnmarshalJSON satisfies json.Unmarshaler according to
// this type's documentation.
func (r *Ratio) UnmarshalJSON(b []byte) error {
if len(b) == 0 {
return io.EOF
}
if b[0] == byte('"') && b[len(b)-1] == byte('"') {
if !strings.Contains(string(b), "/") && !strings.Contains(string(b), ":") {
return fmt.Errorf("ratio string '%s' did not contain a slash '/' or colon ':'", string(b[1:len(b)-1]))
}
if strings.Contains(string(b), "/") {
left, right, _ := strings.Cut(string(b[1:len(b)-1]), "/")
num, err := strconv.Atoi(left)
if err != nil {
return fmt.Errorf("failed parsing numerator as integer %s: %v", left, err)
}
denom, err := strconv.Atoi(right)
if err != nil {
return fmt.Errorf("failed parsing denominator as integer %s: %v", right, err)
}
*r = Ratio(float64(num) / float64(denom))
return nil
}
if strings.Contains(string(b), ":") {
left, right, _ := strings.Cut(string(b[1:len(b)-1]), ":")
num, err := strconv.Atoi(left)
if err != nil {
return fmt.Errorf("failed parsing numerator as integer %s: %v", left, err)
}
denom, err := strconv.Atoi(right)
if err != nil {
return fmt.Errorf("failed parsing denominator as integer %s: %v", right, err)
}
*r = Ratio(float64(num) / (float64(num) + float64(denom)))
return nil
}
return fmt.Errorf("invalid ratio string '%s'", string(b[1:len(b)-1]))
}
if bytes.Equal(b, []byte("null")) {
return nil
}
float, err := strconv.ParseFloat(string(b), 64)
if err != nil {
return fmt.Errorf("failed parsing ratio as float %s: %v", b, err)
}
*r = Ratio(float)
return nil
}
func ParseRatio(r string) (Ratio, error) {
if strings.Contains(r, "/") {
left, right, _ := strings.Cut(r, "/")
num, err := strconv.Atoi(left)
if err != nil {
return 0, fmt.Errorf("failed parsing numerator as integer %s: %v", left, err)
}
denom, err := strconv.Atoi(right)
if err != nil {
return 0, fmt.Errorf("failed parsing denominator as integer %s: %v", right, err)
}
return Ratio(float64(num) / float64(denom)), nil
}
float, err := strconv.ParseFloat(r, 64)
if err != nil {
return 0, fmt.Errorf("failed parsing ratio as float %s: %v", r, err)
}
return Ratio(float), nil
}
// StatusCodeMatches returns true if a real HTTP status code matches
// the configured status code, which may be either a real HTTP status
// code or an integer representing a class of codes (e.g. 4 for all
+76
View File
@@ -149,3 +149,79 @@ func TestCleanPath(t *testing.T) {
}
}
}
func TestUnmarshalRatio(t *testing.T) {
for i, tc := range []struct {
input []byte
expect float64
errMsg string
}{
{
input: []byte("null"),
expect: 0,
},
{
input: []byte(`"1/3"`),
expect: float64(1) / float64(3),
},
{
input: []byte(`"1/100"`),
expect: float64(1) / float64(100),
},
{
input: []byte(`"3:2"`),
expect: 0.6,
},
{
input: []byte(`"99:1"`),
expect: 0.99,
},
{
input: []byte(`"1/100"`),
expect: float64(1) / float64(100),
},
{
input: []byte(`0.1`),
expect: 0.1,
},
{
input: []byte(`0.005`),
expect: 0.005,
},
{
input: []byte(`0`),
expect: 0,
},
{
input: []byte(`"0"`),
errMsg: `ratio string '0' did not contain a slash '/' or colon ':'`,
},
{
input: []byte(`a`),
errMsg: `failed parsing ratio as float a: strconv.ParseFloat: parsing "a": invalid syntax`,
},
{
input: []byte(`"a/1"`),
errMsg: `failed parsing numerator as integer a: strconv.Atoi: parsing "a": invalid syntax`,
},
{
input: []byte(`"1/a"`),
errMsg: `failed parsing denominator as integer a: strconv.Atoi: parsing "a": invalid syntax`,
},
} {
ratio := Ratio(0)
err := ratio.UnmarshalJSON(tc.input)
if err != nil {
if tc.errMsg != "" {
if tc.errMsg != err.Error() {
t.Fatalf("Test %d: expected error: %v, got: %v", i, tc.errMsg, err)
}
continue
}
t.Fatalf("Test %d: invalid ratio: %v", i, err)
}
if ratio != Ratio(tc.expect) {
t.Fatalf("Test %d: expected %v, got %v", i, tc.expect, ratio)
}
}
}
+2
View File
@@ -37,6 +37,7 @@ type upstreamStatus struct {
Address string `json:"address"`
NumRequests int `json:"num_requests"`
Fails int `json:"fails"`
Successes int `json:"successes"`
}
// CaddyModule returns the Caddy module information.
@@ -99,6 +100,7 @@ func (adminUpstreams) handleUpstreams(w http.ResponseWriter, r *http.Request) er
Address: address,
NumRequests: upstream.NumRequests(),
Fails: upstream.Fails(),
Successes: upstream.Successes(),
})
return true
})
@@ -77,6 +77,9 @@ func parseCaddyfile(h httpcaddyfile.Helper) (caddyhttp.MiddlewareHandler, error)
// # passive health checking
// fail_duration <duration>
// max_fails <num>
// success_duration <duration>
// min_success_ratio <ratio>
// min_success <num>
// unhealthy_status <status>
// unhealthy_latency <duration>
// unhealthy_request_count <num>
@@ -422,6 +425,54 @@ func (h *Handler) UnmarshalCaddyfile(d *caddyfile.Dispenser) error {
}
h.HealthChecks.Passive.MaxFails = maxFails
case "success_duration":
if !d.NextArg() {
return d.ArgErr()
}
if h.HealthChecks == nil {
h.HealthChecks = new(HealthChecks)
}
if h.HealthChecks.Passive == nil {
h.HealthChecks.Passive = new(PassiveHealthChecks)
}
dur, err := caddy.ParseDuration(d.Val())
if err != nil {
return d.Errf("bad duration value '%s': %v", d.Val(), err)
}
h.HealthChecks.Passive.SuccessDuration = caddy.Duration(dur)
case "min_success_ratio":
if !d.NextArg() {
return d.ArgErr()
}
if h.HealthChecks == nil {
h.HealthChecks = new(HealthChecks)
}
if h.HealthChecks.Passive == nil {
h.HealthChecks.Passive = new(PassiveHealthChecks)
}
ratio, err := caddyhttp.ParseRatio(d.Val())
if err != nil {
return d.Errf("bad ratio value '%s': %v", d.Val(), err)
}
h.HealthChecks.Passive.MinSuccessRatio = ratio
case "min_successes":
if !d.NextArg() {
return d.ArgErr()
}
if h.HealthChecks == nil {
h.HealthChecks = new(HealthChecks)
}
if h.HealthChecks.Passive == nil {
h.HealthChecks.Passive = new(PassiveHealthChecks)
}
count, err := strconv.Atoi(d.Val())
if err != nil {
return d.Errf("invalid minimum success count '%s': %v", d.Val(), err)
}
h.HealthChecks.Passive.MinSuccesses = count
case "fail_duration":
if !d.NextArg() {
return d.ArgErr()
+68 -2
View File
@@ -110,8 +110,8 @@ type ActiveHealthChecks struct {
// health checks (that is, health checks which occur during
// the normal flow of request proxying).
type PassiveHealthChecks struct {
// How long to remember a failed request to a backend. A duration > 0
// enables passive health checking. Default is 0.
// How long to remember a failed request to a backend.
// A duration > 0 enables passive health checking. Default is 0.
FailDuration caddy.Duration `json:"fail_duration,omitempty"`
// The number of failed requests within the FailDuration window to
@@ -119,6 +119,22 @@ type PassiveHealthChecks struct {
// that FailDuration be > 0.
MaxFails int `json:"max_fails,omitempty"`
// How long to remember a successful request to a backend. Default is 0.
SuccessDuration caddy.Duration `json:"success_duration,omitempty"`
// The minimum ratio of successful to failed requests necessary to
// consider a backend as healthy. Both fail and success durations
// must be configured for those stats to be counted. Default is 0 (no ratio).
MinSuccessRatio caddyhttp.Ratio `json:"min_success_ratio,omitempty"`
// The minimum number of successful requests before considering the
// minimum success ratio. Default is 5. Requires MinSuccessRatio >= 0.
//
// If there are less than this many successful requests, then the ratio is
// ignored, because of a lack of data. This ensures that the upstream isn't
// prematurely considered unhealthy because no requests have happened yet.
MinSuccesses int `json:"min_successes,omitempty"`
// Limits the number of simultaneous requests to a backend by
// marking the backend as "down" if it has this many concurrent
// requests or more.
@@ -362,6 +378,56 @@ func (h *Handler) doActiveHealthCheck(dialInfo DialInfo, hostAddr string, upstre
return nil
}
// countSuccess is used with passive health checks. It
// remembers 1 success for upstream for the configured
// duration. If passive health checks are disabled or
// success expiry is 0, this is a no-op.
func (h *Handler) countSuccess(upstream *Upstream) {
// only count successes if passive health checking is enabled
// and if successes are configured have a non-zero expiry
if h.HealthChecks == nil || h.HealthChecks.Passive == nil {
return
}
successDuration := time.Duration(h.HealthChecks.Passive.SuccessDuration)
if successDuration == 0 {
return
}
// count success immediately
err := upstream.Host.countSuccess(1)
if err != nil {
h.HealthChecks.Passive.logger.Error("could not count success",
zap.String("host", upstream.Dial),
zap.Error(err))
return
}
// forget it later
go func(host *Host, successDuration time.Duration) {
defer func() {
if err := recover(); err != nil {
h.HealthChecks.Passive.logger.Error("passive health check success forgetter panicked",
zap.Any("error", err),
zap.ByteString("stack", debug.Stack()))
}
}()
timer := time.NewTimer(successDuration)
select {
case <-h.ctx.Done():
if !timer.Stop() {
<-timer.C
}
case <-timer.C:
}
err := host.countSuccess(-1)
if err != nil {
h.HealthChecks.Passive.logger.Error("could not forget success",
zap.String("host", upstream.Dial),
zap.Error(err))
}
}(upstream.Host, successDuration)
}
// countFailure is used with passive health checks. It
// remembers 1 failure for upstream for the configured
// duration. If passive health checks are disabled or
+25
View File
@@ -84,6 +84,15 @@ func (u *Upstream) Healthy() bool {
if healthy && u.healthCheckPolicy != nil {
healthy = u.Host.Fails() < u.healthCheckPolicy.MaxFails
}
if healthy && u.healthCheckPolicy != nil &&
u.healthCheckPolicy.MinSuccessRatio > 0 {
successes := u.Host.Successes()
if successes >= u.healthCheckPolicy.MinSuccesses {
fails := u.Host.Fails()
healthRatio := float64(fails) / float64(successes)
healthy = healthRatio < (1 - float64(u.healthCheckPolicy.MinSuccessRatio))
}
}
if healthy && u.cb != nil {
healthy = u.cb.OK()
}
@@ -136,6 +145,7 @@ func (u *Upstream) fillHost() {
// Its fields are accessed atomically and Host values must not be copied.
type Host struct {
numRequests int64 // must be 64-bit aligned on 32-bit systems (see https://golang.org/pkg/sync/atomic/#pkg-note-BUG)
successes int64
fails int64
}
@@ -144,6 +154,11 @@ func (h *Host) NumRequests() int {
return int(atomic.LoadInt64(&h.numRequests))
}
// Successes returns the number of recent successes with the upstream.
func (h *Host) Successes() int {
return int(atomic.LoadInt64(&h.successes))
}
// Fails returns the number of recent failures with the upstream.
func (h *Host) Fails() int {
return int(atomic.LoadInt64(&h.fails))
@@ -159,6 +174,16 @@ func (h *Host) countRequest(delta int) error {
return nil
}
// countSuccess mutates the recent successes count by
// delta. It returns an error if the adjustment fails.
func (h *Host) countSuccess(delta int) error {
result := atomic.AddInt64(&h.successes, int64(delta))
if result < 0 {
return fmt.Errorf("count below 0: %d", result)
}
return nil
}
// countFail mutates the recent failures count by
// delta. It returns an error if the adjustment fails.
func (h *Host) countFail(delta int) error {
@@ -352,6 +352,10 @@ func (h *Handler) Provision(ctx caddy.Context) error {
if h.HealthChecks.Passive.FailDuration > 0 && h.HealthChecks.Passive.MaxFails == 0 {
h.HealthChecks.Passive.MaxFails = 1
}
if h.HealthChecks.Passive.MinSuccessRatio > 0 && h.HealthChecks.Passive.MinSuccesses == 0 {
h.HealthChecks.Passive.MinSuccesses = 5
}
}
// if active health checks are enabled, configure them and start a worker
@@ -562,6 +566,7 @@ func (h *Handler) proxyLoopIteration(r *http.Request, origReq *http.Request, w h
repl.Set("http.reverse_proxy.upstream.port", dialInfo.Port)
repl.Set("http.reverse_proxy.upstream.requests", upstream.Host.NumRequests())
repl.Set("http.reverse_proxy.upstream.max_requests", upstream.MaxRequests)
repl.Set("http.reverse_proxy.upstream.successes", upstream.Host.Successes())
repl.Set("http.reverse_proxy.upstream.fails", upstream.Host.Fails())
// mutate request headers according to this upstream;
@@ -580,6 +585,7 @@ func (h *Handler) proxyLoopIteration(r *http.Request, origReq *http.Request, w h
if proxyErr == nil || errors.Is(proxyErr, context.Canceled) {
// context.Canceled happens when the downstream client
// cancels the request, which is not our failure
h.countSuccess(upstream)
return true, nil
}
@@ -588,6 +594,7 @@ func (h *Handler) proxyLoopIteration(r *http.Request, origReq *http.Request, w h
// occur after the roundtrip if, for example, a response handler
// after the roundtrip returns an error)
if succ, ok := proxyErr.(roundtripSucceeded); ok {
h.countSuccess(upstream)
return true, succ.error
}