From a5e7c6e232573b8a8df2946914a84a36070d4b9f Mon Sep 17 00:00:00 2001 From: Tom Paulus Date: Wed, 4 Mar 2026 12:17:02 -0800 Subject: [PATCH] reverseproxy: prevent body close on dial-error retries (#7547) --- .../caddyhttp/reverseproxy/retries_test.go | 257 ++++++++++++++++++ .../caddyhttp/reverseproxy/reverseproxy.go | 35 ++- 2 files changed, 281 insertions(+), 11 deletions(-) create mode 100644 modules/caddyhttp/reverseproxy/retries_test.go diff --git a/modules/caddyhttp/reverseproxy/retries_test.go b/modules/caddyhttp/reverseproxy/retries_test.go new file mode 100644 index 000000000..056223d4c --- /dev/null +++ b/modules/caddyhttp/reverseproxy/retries_test.go @@ -0,0 +1,257 @@ +package reverseproxy + +import ( + "errors" + "io" + "net" + "net/http" + "net/http/httptest" + "strings" + "sync" + "testing" + + "go.uber.org/zap" + + "github.com/caddyserver/caddy/v2" + "github.com/caddyserver/caddy/v2/modules/caddyhttp" +) + +// prepareTestRequest injects the context values that ServeHTTP and +// proxyLoopIteration require (caddy.ReplacerCtxKey, VarsCtxKey, etc.) using +// the same helper that the real HTTP server uses. +// +// A zero-value Server is passed so that caddyhttp.ServerCtxKey is set to a +// non-nil pointer; reverseProxy dereferences it to check ShouldLogCredentials. +func prepareTestRequest(req *http.Request) *http.Request { + repl := caddy.NewReplacer() + return caddyhttp.PrepareRequest(req, repl, nil, &caddyhttp.Server{}) +} + +// closeOnCloseReader is an io.ReadCloser whose Close method actually makes +// subsequent reads fail, mimicking the behaviour of a real HTTP request body +// (as opposed to io.NopCloser, whose Close is a no-op and would mask the bug +// we are testing). +type closeOnCloseReader struct { + mu sync.Mutex + r *strings.Reader + closed bool +} + +func newCloseOnCloseReader(s string) *closeOnCloseReader { + return &closeOnCloseReader{r: strings.NewReader(s)} +} + +func (c *closeOnCloseReader) Read(p []byte) (int, error) { + c.mu.Lock() + defer c.mu.Unlock() + if c.closed { + return 0, errors.New("http: invalid Read on closed Body") + } + return c.r.Read(p) +} + +func (c *closeOnCloseReader) Close() error { + c.mu.Lock() + defer c.mu.Unlock() + c.closed = true + return nil +} + +// deadUpstreamAddr returns a TCP address that is guaranteed to refuse +// connections: we bind a listener, note its address, close it immediately, +// and return the address. Any dial to that address will get ECONNREFUSED. +func deadUpstreamAddr(t *testing.T) string { + t.Helper() + ln, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + t.Fatalf("failed to create dead upstream listener: %v", err) + } + addr := ln.Addr().String() + ln.Close() + return addr +} + +// testTransport wraps http.Transport to: +// 1. Set the URL scheme to "http" when it is empty (matching what +// HTTPTransport.SetScheme does in production; cloneRequest strips the +// scheme intentionally so a plain *http.Transport would fail with +// "unsupported protocol scheme"). +// 2. Wrap dial errors as DialError so that tryAgain correctly identifies them +// as safe-to-retry regardless of request method (as HTTPTransport does in +// production via its custom dialer). +type testTransport struct{ *http.Transport } + +func (t testTransport) RoundTrip(req *http.Request) (*http.Response, error) { + if req.URL.Scheme == "" { + req.URL.Scheme = "http" + } + resp, err := t.Transport.RoundTrip(req) + if err != nil { + // Wrap dial errors as DialError to match production behaviour. + // Without this wrapping, tryAgain treats ECONNREFUSED on a POST + // request as non-retryable (only GET is retried by default when + // the error is not a DialError). + var opErr *net.OpError + if errors.As(err, &opErr) && opErr.Op == "dial" { + return nil, DialError{err} + } + } + return resp, err +} + +// minimalHandler returns a Handler with only the fields required by ServeHTTP +// set directly, bypassing Provision (which requires a full Caddy runtime). +// RoundRobinSelection is used so that successive iterations of the proxy loop +// advance through the upstream pool in a predictable order. +func minimalHandler(retries int, upstreams ...*Upstream) *Handler { + return &Handler{ + logger: zap.NewNop(), + Transport: testTransport{&http.Transport{}}, + Upstreams: upstreams, + LoadBalancing: &LoadBalancing{ + Retries: retries, + SelectionPolicy: &RoundRobinSelection{}, + // RetryMatch intentionally nil: dial errors are always retried + // regardless of RetryMatch or request method. + }, + // ctx, connections, connectionsMu, events: zero/nil values are safe + // for the code paths exercised by these tests (TryInterval=0 so + // ctx.Done() is never consulted; no WebSocket hijacking; no passive + // health-check event emission). + } +} + +// TestDialErrorBodyRetry verifies that a POST request whose body has NOT been +// pre-buffered via request_buffers can still be retried after a dial error. +// +// Before the fix, a dial error caused Go's transport to close the shared body +// (via cloneRequest's shallow copy), so the retry attempt would read from an +// already-closed io.ReadCloser and produce: +// +// http: invalid Read on closed Body → HTTP 502 +// +// After the fix the handler wraps the body in noCloseBody when retries are +// configured, preventing the transport's Close() from propagating to the +// shared body. Since dial errors never read any bytes, the body remains at +// position 0 for the retry. +func TestDialErrorBodyRetry(t *testing.T) { + // Good upstream: echoes the request body with 200 OK. + goodServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + body, err := io.ReadAll(r.Body) + if err != nil { + http.Error(w, "read body: "+err.Error(), http.StatusInternalServerError) + return + } + w.WriteHeader(http.StatusOK) + _, _ = w.Write(body) + })) + t.Cleanup(goodServer.Close) + + const requestBody = "hello, retry" + + tests := []struct { + name string + method string + body string + retries int + wantStatus int + wantBody string + }{ + { + // Core regression case: POST with a body, no request_buffers, + // dial error on first upstream → retry to second upstream succeeds. + name: "POST body retried after dial error", + method: http.MethodPost, + body: requestBody, + retries: 1, + wantStatus: http.StatusOK, + wantBody: requestBody, + }, + { + // Dial errors are always retried regardless of method, but there + // is no body to re-read, so GET has always worked. Keep it as a + // sanity check that we did not break the no-body path. + name: "GET without body retried after dial error", + method: http.MethodGet, + body: "", + retries: 1, + wantStatus: http.StatusOK, + wantBody: "", + }, + { + // Without any retry configuration the handler must give up on the + // first dial error and return a 502. Confirms no wrapping occurs + // in the no-retry path. + name: "no retries configured returns 502 on dial error", + method: http.MethodPost, + body: requestBody, + retries: 0, + wantStatus: http.StatusBadGateway, + wantBody: "", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + dead := deadUpstreamAddr(t) + + // Build the upstream pool. RoundRobinSelection starts its + // counter at 0 and increments before returning, so with a + // two-element pool it picks index 1 first, then index 0. + // Put the good upstream at index 0 and the dead one at + // index 1 so that: + // attempt 1 → pool[1] = dead → DialError (ECONNREFUSED) + // attempt 2 → pool[0] = good → 200 + upstreams := []*Upstream{ + {Host: new(Host), Dial: goodServer.Listener.Addr().String()}, + {Host: new(Host), Dial: dead}, + } + if tc.retries == 0 { + // For the "no retries" case use only the dead upstream so + // there is nowhere to retry to. + upstreams = []*Upstream{ + {Host: new(Host), Dial: dead}, + } + } + + h := minimalHandler(tc.retries, upstreams...) + + // Use closeOnCloseReader so that Close() truly prevents further + // reads, matching real http.body semantics. io.NopCloser would + // mask the bug because its Close is a no-op. + var bodyReader io.ReadCloser + if tc.body != "" { + bodyReader = newCloseOnCloseReader(tc.body) + } + req := httptest.NewRequest(tc.method, "http://example.com/", bodyReader) + if bodyReader != nil { + // httptest.NewRequest wraps the reader in NopCloser; replace + // it with our close-aware reader so Close() is propagated. + req.Body = bodyReader + req.ContentLength = int64(len(tc.body)) + } + req = prepareTestRequest(req) + + rec := httptest.NewRecorder() + err := h.ServeHTTP(rec, req, caddyhttp.HandlerFunc(func(w http.ResponseWriter, r *http.Request) error { + return nil + })) + + // For error cases (e.g. 502) ServeHTTP returns a HandlerError + // rather than writing the status itself. + gotStatus := rec.Code + if err != nil { + if herr, ok := err.(caddyhttp.HandlerError); ok { + gotStatus = herr.StatusCode + } + } + + if gotStatus != tc.wantStatus { + t.Errorf("status: got %d, want %d (err=%v)", gotStatus, tc.wantStatus, err) + } + if tc.wantBody != "" && rec.Body.String() != tc.wantBody { + t.Errorf("body: got %q, want %q", rec.Body.String(), tc.wantBody) + } + }) + } +} diff --git a/modules/caddyhttp/reverseproxy/reverseproxy.go b/modules/caddyhttp/reverseproxy/reverseproxy.go index 2ea063bd7..2169d1717 100644 --- a/modules/caddyhttp/reverseproxy/reverseproxy.go +++ b/modules/caddyhttp/reverseproxy/reverseproxy.go @@ -482,18 +482,31 @@ func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request, next caddyht reqHost := clonedReq.Host reqHeader := clonedReq.Header - // If the cloned request body was fully buffered, keep a reference to its - // buffer so we can reuse it across retries and return it to the pool - // once we’re done. + // When retries are configured and there is a body, wrap it in + // io.NopCloser to prevent Go's transport from closing it on dial + // errors. cloneRequest does a shallow copy, so clonedReq.Body and + // r.Body share the same io.ReadCloser — a dial-failure Close() + // would kill the original body for all subsequent retry attempts. + // The real body is closed by the HTTP server when the handler + // returns. + // + // If the body was already fully buffered (via request_buffers), + // we also extract the buffer so the retry loop can replay it + // from the beginning on each attempt. (see #6259, #7546) var bufferedReqBody *bytes.Buffer - if reqBodyBuf, ok := clonedReq.Body.(bodyReadCloser); ok && reqBodyBuf.body == nil && reqBodyBuf.buf != nil { - bufferedReqBody = reqBodyBuf.buf - reqBodyBuf.buf = nil - - defer func() { - bufferedReqBody.Reset() - bufPool.Put(bufferedReqBody) - }() + if clonedReq.Body != nil && h.LoadBalancing != nil && + (h.LoadBalancing.Retries > 0 || h.LoadBalancing.TryDuration > 0) { + if reqBodyBuf, ok := clonedReq.Body.(bodyReadCloser); ok && reqBodyBuf.body == nil && reqBodyBuf.buf != nil { + bufferedReqBody = reqBodyBuf.buf + reqBodyBuf.buf = nil + clonedReq.Body = io.NopCloser(bytes.NewReader(bufferedReqBody.Bytes())) + defer func() { + bufferedReqBody.Reset() + bufPool.Put(bufferedReqBody) + }() + } else { + clonedReq.Body = io.NopCloser(clonedReq.Body) + } } start := time.Now()