// Copyright 2015 Matthew Holt and The Caddy Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package reverseproxy import ( "context" "testing" "time" "github.com/caddyserver/caddy/v2" ) // newPassiveHandler builds a minimal Handler with passive health checks // configured and a live caddy.Context so the fail-forgetter goroutine can // be cancelled cleanly. The caller must call cancel() when done. func newPassiveHandler(t *testing.T, maxFails int, failDuration time.Duration) (*Handler, context.CancelFunc) { t.Helper() caddyCtx, cancel := caddy.NewContext(caddy.Context{Context: context.Background()}) h := &Handler{ ctx: caddyCtx, HealthChecks: &HealthChecks{ Passive: &PassiveHealthChecks{ MaxFails: maxFails, FailDuration: caddy.Duration(failDuration), }, }, } return h, cancel } // provisionedStaticUpstream creates a static upstream, registers it in the // UsagePool, and returns a cleanup func that removes it from the pool. func provisionedStaticUpstream(t *testing.T, h *Handler, addr string) (*Upstream, func()) { t.Helper() u := &Upstream{Dial: addr} h.provisionUpstream(u, false) return u, func() { _, _ = hosts.Delete(addr) } } // provisionedDynamicUpstream creates a dynamic upstream, registers it in // dynamicHosts, and returns a cleanup func that removes it. func provisionedDynamicUpstream(t *testing.T, h *Handler, addr string) (*Upstream, func()) { t.Helper() u := &Upstream{Dial: addr} h.provisionUpstream(u, true) return u, func() { dynamicHostsMu.Lock() delete(dynamicHosts, addr) dynamicHostsMu.Unlock() } } // --- countFailure behaviour --- // TestCountFailureNoopWhenNoHealthChecks verifies that countFailure is a no-op // when HealthChecks is nil. func TestCountFailureNoopWhenNoHealthChecks(t *testing.T) { resetDynamicHosts() h := &Handler{} u := &Upstream{Dial: "10.1.0.1:80", Host: new(Host)} h.countFailure(u) if u.Host.Fails() != 0 { t.Errorf("expected 0 fails with no HealthChecks config, got %d", u.Host.Fails()) } } // TestCountFailureNoopWhenZeroDuration verifies that countFailure is a no-op // when FailDuration is 0 (the zero value disables passive checks). func TestCountFailureNoopWhenZeroDuration(t *testing.T) { resetDynamicHosts() caddyCtx, cancel := caddy.NewContext(caddy.Context{Context: context.Background()}) defer cancel() h := &Handler{ ctx: caddyCtx, HealthChecks: &HealthChecks{ Passive: &PassiveHealthChecks{MaxFails: 1, FailDuration: 0}, }, } u := &Upstream{Dial: "10.1.0.2:80", Host: new(Host)} h.countFailure(u) if u.Host.Fails() != 0 { t.Errorf("expected 0 fails with zero FailDuration, got %d", u.Host.Fails()) } } // TestCountFailureIncrementsCount verifies that countFailure increments the // fail count on the upstream's Host. func TestCountFailureIncrementsCount(t *testing.T) { resetDynamicHosts() h, cancel := newPassiveHandler(t, 2, time.Minute) defer cancel() u := &Upstream{Dial: "10.1.0.3:80", Host: new(Host)} h.countFailure(u) if u.Host.Fails() != 1 { t.Errorf("expected 1 fail after countFailure, got %d", u.Host.Fails()) } } // TestCountFailureDecrementsAfterDuration verifies that the fail count is // decremented back after FailDuration elapses. func TestCountFailureDecrementsAfterDuration(t *testing.T) { resetDynamicHosts() const failDuration = 50 * time.Millisecond h, cancel := newPassiveHandler(t, 2, failDuration) defer cancel() u := &Upstream{Dial: "10.1.0.4:80", Host: new(Host)} h.countFailure(u) if u.Host.Fails() != 1 { t.Fatalf("expected 1 fail immediately after countFailure, got %d", u.Host.Fails()) } // Wait long enough for the forgetter goroutine to fire. time.Sleep(3 * failDuration) if u.Host.Fails() != 0 { t.Errorf("expected fail count to return to 0 after FailDuration, got %d", u.Host.Fails()) } } // TestCountFailureCancelledContextForgets verifies that cancelling the handler // context (simulating a config unload) also triggers the forgetter to run, // decrementing the fail count. func TestCountFailureCancelledContextForgets(t *testing.T) { resetDynamicHosts() h, cancel := newPassiveHandler(t, 2, time.Hour) // very long duration u := &Upstream{Dial: "10.1.0.5:80", Host: new(Host)} h.countFailure(u) if u.Host.Fails() != 1 { t.Fatalf("expected 1 fail immediately after countFailure, got %d", u.Host.Fails()) } // Cancelling the context should cause the forgetter goroutine to exit and // decrement the count. cancel() time.Sleep(50 * time.Millisecond) if u.Host.Fails() != 0 { t.Errorf("expected fail count to be decremented after context cancel, got %d", u.Host.Fails()) } } // --- static upstream passive health check --- // TestStaticUpstreamHealthyWithNoFailures verifies that a static upstream with // no recorded failures is considered healthy. func TestStaticUpstreamHealthyWithNoFailures(t *testing.T) { resetDynamicHosts() h, cancel := newPassiveHandler(t, 2, time.Minute) defer cancel() u, cleanup := provisionedStaticUpstream(t, h, "10.2.0.1:80") defer cleanup() if !u.Healthy() { t.Error("upstream with no failures should be healthy") } } // TestStaticUpstreamUnhealthyAtMaxFails verifies that a static upstream is // marked unhealthy once its fail count reaches MaxFails. func TestStaticUpstreamUnhealthyAtMaxFails(t *testing.T) { resetDynamicHosts() h, cancel := newPassiveHandler(t, 2, time.Minute) defer cancel() u, cleanup := provisionedStaticUpstream(t, h, "10.2.0.2:80") defer cleanup() h.countFailure(u) if !u.Healthy() { t.Error("upstream should still be healthy after 1 of 2 allowed failures") } h.countFailure(u) if u.Healthy() { t.Error("upstream should be unhealthy after reaching MaxFails=2") } } // TestStaticUpstreamRecoversAfterFailDuration verifies that a static upstream // returns to healthy once its failures expire. func TestStaticUpstreamRecoversAfterFailDuration(t *testing.T) { resetDynamicHosts() const failDuration = 50 * time.Millisecond h, cancel := newPassiveHandler(t, 1, failDuration) defer cancel() u, cleanup := provisionedStaticUpstream(t, h, "10.2.0.3:80") defer cleanup() h.countFailure(u) if u.Healthy() { t.Fatal("upstream should be unhealthy immediately after MaxFails failure") } time.Sleep(3 * failDuration) if !u.Healthy() { t.Errorf("upstream should recover to healthy after FailDuration, Fails=%d", u.Host.Fails()) } } // TestStaticUpstreamHealthPersistedAcrossReprovisioning verifies that static // upstreams share a Host via the UsagePool, so a second call to provisionUpstream // for the same address (as happens on config reload) sees the accumulated state. func TestStaticUpstreamHealthPersistedAcrossReprovisioning(t *testing.T) { resetDynamicHosts() h, cancel := newPassiveHandler(t, 2, time.Minute) defer cancel() u1, cleanup1 := provisionedStaticUpstream(t, h, "10.2.0.4:80") defer cleanup1() h.countFailure(u1) h.countFailure(u1) // Simulate a second handler instance referencing the same upstream // (e.g. after a config reload that keeps the same backend address). u2, cleanup2 := provisionedStaticUpstream(t, h, "10.2.0.4:80") defer cleanup2() if u1.Host != u2.Host { t.Fatal("expected both Upstream structs to share the same *Host via UsagePool") } if u2.Healthy() { t.Error("re-provisioned upstream should still see the prior fail count and be unhealthy") } } // --- dynamic upstream passive health check --- // TestDynamicUpstreamHealthyWithNoFailures verifies that a freshly provisioned // dynamic upstream is healthy. func TestDynamicUpstreamHealthyWithNoFailures(t *testing.T) { resetDynamicHosts() h, cancel := newPassiveHandler(t, 2, time.Minute) defer cancel() u, cleanup := provisionedDynamicUpstream(t, h, "10.3.0.1:80") defer cleanup() if !u.Healthy() { t.Error("dynamic upstream with no failures should be healthy") } } // TestDynamicUpstreamUnhealthyAtMaxFails verifies that a dynamic upstream is // marked unhealthy once its fail count reaches MaxFails. func TestDynamicUpstreamUnhealthyAtMaxFails(t *testing.T) { resetDynamicHosts() h, cancel := newPassiveHandler(t, 2, time.Minute) defer cancel() u, cleanup := provisionedDynamicUpstream(t, h, "10.3.0.2:80") defer cleanup() h.countFailure(u) if !u.Healthy() { t.Error("dynamic upstream should still be healthy after 1 of 2 allowed failures") } h.countFailure(u) if u.Healthy() { t.Error("dynamic upstream should be unhealthy after reaching MaxFails=2") } } // TestDynamicUpstreamFailCountPersistedBetweenRequests is the core regression // test: it simulates two sequential (non-concurrent) requests to the same // dynamic upstream. Before the fix, the UsagePool entry would be deleted // between requests, wiping the fail count. Now it should survive. func TestDynamicUpstreamFailCountPersistedBetweenRequests(t *testing.T) { resetDynamicHosts() h, cancel := newPassiveHandler(t, 2, time.Minute) defer cancel() // --- first request --- u1 := &Upstream{Dial: "10.3.0.3:80"} h.provisionUpstream(u1, true) h.countFailure(u1) if u1.Host.Fails() != 1 { t.Fatalf("expected 1 fail after first request, got %d", u1.Host.Fails()) } // Simulate end of first request: no delete from any pool (key difference // vs. the old behaviour where hosts.Delete was deferred). // --- second request: brand-new *Upstream struct, same dial address --- u2 := &Upstream{Dial: "10.3.0.3:80"} h.provisionUpstream(u2, true) if u1.Host != u2.Host { t.Fatal("expected both requests to share the same *Host pointer from dynamicHosts") } if u2.Host.Fails() != 1 { t.Errorf("expected fail count to persist across requests, got %d", u2.Host.Fails()) } // A second failure now tips it over MaxFails=2. h.countFailure(u2) if u2.Healthy() { t.Error("upstream should be unhealthy after accumulated failures across requests") } // Cleanup. dynamicHostsMu.Lock() delete(dynamicHosts, "10.3.0.3:80") dynamicHostsMu.Unlock() } // TestDynamicUpstreamRecoveryAfterFailDuration verifies that a dynamic // upstream's fail count expires and it returns to healthy. func TestDynamicUpstreamRecoveryAfterFailDuration(t *testing.T) { resetDynamicHosts() const failDuration = 50 * time.Millisecond h, cancel := newPassiveHandler(t, 1, failDuration) defer cancel() u, cleanup := provisionedDynamicUpstream(t, h, "10.3.0.4:80") defer cleanup() h.countFailure(u) if u.Healthy() { t.Fatal("upstream should be unhealthy immediately after MaxFails failure") } time.Sleep(3 * failDuration) // Re-provision (as a new request would) to get fresh *Upstream with policy set. u2 := &Upstream{Dial: "10.3.0.4:80"} h.provisionUpstream(u2, true) if !u2.Healthy() { t.Errorf("dynamic upstream should recover to healthy after FailDuration, Fails=%d", u2.Host.Fails()) } } // TestDynamicUpstreamMaxRequestsFromUnhealthyRequestCount verifies that // UnhealthyRequestCount is copied into MaxRequests so Full() works correctly. func TestDynamicUpstreamMaxRequestsFromUnhealthyRequestCount(t *testing.T) { resetDynamicHosts() caddyCtx, cancel := caddy.NewContext(caddy.Context{Context: context.Background()}) defer cancel() h := &Handler{ ctx: caddyCtx, HealthChecks: &HealthChecks{ Passive: &PassiveHealthChecks{ UnhealthyRequestCount: 3, }, }, } u, cleanup := provisionedDynamicUpstream(t, h, "10.3.0.5:80") defer cleanup() if u.MaxRequests != 3 { t.Errorf("expected MaxRequests=3 from UnhealthyRequestCount, got %d", u.MaxRequests) } // Should not be full with fewer requests than the limit. _ = u.Host.countRequest(2) if u.Full() { t.Error("upstream should not be full with 2 of 3 allowed requests") } _ = u.Host.countRequest(1) if !u.Full() { t.Error("upstream should be full at UnhealthyRequestCount concurrent requests") } }