mirror of
				https://github.com/caddyserver/caddy.git
				synced 2025-10-25 07:49:19 -04:00 
			
		
		
		
	reverseproxy: Make shallow-ish clone of the request (#4551)
* reverseproxy: Make shallow-ish clone of the request * Refactor request cloning into separate function Co-authored-by: Matthew Holt <mholt@users.noreply.github.com>
This commit is contained in:
		
							parent
							
								
									6b385a36f9
								
							
						
					
					
						commit
						f5e104944e
					
				| @ -358,54 +358,20 @@ func (h *Handler) Cleanup() error { | |||||||
| func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request, next caddyhttp.Handler) error { | func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request, next caddyhttp.Handler) error { | ||||||
| 	repl := r.Context().Value(caddy.ReplacerCtxKey).(*caddy.Replacer) | 	repl := r.Context().Value(caddy.ReplacerCtxKey).(*caddy.Replacer) | ||||||
| 
 | 
 | ||||||
| 	// if enabled, buffer client request; |  | ||||||
| 	// this should only be enabled if the |  | ||||||
| 	// upstream requires it and does not |  | ||||||
| 	// work with "slow clients" (gunicorn, |  | ||||||
| 	// etc.) - this obviously has a perf |  | ||||||
| 	// overhead and makes the proxy at |  | ||||||
| 	// risk of exhausting memory and more |  | ||||||
| 	// susceptible to slowloris attacks, |  | ||||||
| 	// so it is strongly recommended to |  | ||||||
| 	// only use this feature if absolutely |  | ||||||
| 	// required, if read timeouts are set, |  | ||||||
| 	// and if body size is limited |  | ||||||
| 	if h.BufferRequests { |  | ||||||
| 		r.Body = h.bufferedBody(r.Body) |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	// prepare the request for proxying; this is needed only once | 	// prepare the request for proxying; this is needed only once | ||||||
| 	err := h.prepareRequest(r) | 	clonedReq, err := h.prepareRequest(r) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		return caddyhttp.Error(http.StatusInternalServerError, | 		return caddyhttp.Error(http.StatusInternalServerError, | ||||||
| 			fmt.Errorf("preparing request for upstream round-trip: %v", err)) | 			fmt.Errorf("preparing request for upstream round-trip: %v", err)) | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	// we will need the original headers and Host value if | 	// we will need the original headers and Host value if | ||||||
| 	// header operations are configured; and we should | 	// header operations are configured; this is so that each | ||||||
| 	// restore them after we're done if they are changed | 	// retry can apply the modifications, because placeholders | ||||||
| 	// (for example, changing the outbound Host header | 	// may be used which depend on the selected upstream for | ||||||
| 	// should not permanently change r.Host; issue #3509) | 	// their values | ||||||
| 	reqHost := r.Host | 	reqHost := clonedReq.Host | ||||||
| 	reqHeader := r.Header | 	reqHeader := clonedReq.Header | ||||||
| 
 |  | ||||||
| 	// sanitize the request URL; we expect it to not contain the scheme and host |  | ||||||
| 	// since those should be determined by r.TLS and r.Host respectively, but |  | ||||||
| 	// some clients may include it in the request-line, which is technically |  | ||||||
| 	// valid in HTTP, but breaks reverseproxy behaviour, overriding how the |  | ||||||
| 	// dialer will behave. See #4237 for context. |  | ||||||
| 	origURLScheme := r.URL.Scheme |  | ||||||
| 	origURLHost := r.URL.Host |  | ||||||
| 	r.URL.Scheme = "" |  | ||||||
| 	r.URL.Host = "" |  | ||||||
| 
 |  | ||||||
| 	// restore modifications to the request after we're done proxying |  | ||||||
| 	defer func() { |  | ||||||
| 		r.Host = reqHost     // TODO: data race, see #4038 |  | ||||||
| 		r.Header = reqHeader // TODO: data race, see #4038 |  | ||||||
| 		r.URL.Scheme = origURLScheme |  | ||||||
| 		r.URL.Host = origURLHost |  | ||||||
| 	}() |  | ||||||
| 
 | 
 | ||||||
| 	start := time.Now() | 	start := time.Now() | ||||||
| 	defer func() { | 	defer func() { | ||||||
| @ -416,12 +382,12 @@ func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request, next caddyht | |||||||
| 	var proxyErr error | 	var proxyErr error | ||||||
| 	for { | 	for { | ||||||
| 		// choose an available upstream | 		// choose an available upstream | ||||||
| 		upstream := h.LoadBalancing.SelectionPolicy.Select(h.Upstreams, r, w) | 		upstream := h.LoadBalancing.SelectionPolicy.Select(h.Upstreams, clonedReq, w) | ||||||
| 		if upstream == nil { | 		if upstream == nil { | ||||||
| 			if proxyErr == nil { | 			if proxyErr == nil { | ||||||
| 				proxyErr = fmt.Errorf("no upstreams available") | 				proxyErr = fmt.Errorf("no upstreams available") | ||||||
| 			} | 			} | ||||||
| 			if !h.LoadBalancing.tryAgain(h.ctx, start, proxyErr, r) { | 			if !h.LoadBalancing.tryAgain(h.ctx, start, proxyErr, clonedReq) { | ||||||
| 				break | 				break | ||||||
| 			} | 			} | ||||||
| 			continue | 			continue | ||||||
| @ -430,7 +396,7 @@ func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request, next caddyht | |||||||
| 		// the dial address may vary per-request if placeholders are | 		// the dial address may vary per-request if placeholders are | ||||||
| 		// used, so perform those replacements here; the resulting | 		// used, so perform those replacements here; the resulting | ||||||
| 		// DialInfo struct should have valid network address syntax | 		// DialInfo struct should have valid network address syntax | ||||||
| 		dialInfo, err := upstream.fillDialInfo(r) | 		dialInfo, err := upstream.fillDialInfo(clonedReq) | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			return statusError(fmt.Errorf("making dial info: %v", err)) | 			return statusError(fmt.Errorf("making dial info: %v", err)) | ||||||
| 		} | 		} | ||||||
| @ -451,17 +417,17 @@ func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request, next caddyht | |||||||
| 
 | 
 | ||||||
| 		// mutate request headers according to this upstream; | 		// mutate request headers according to this upstream; | ||||||
| 		// because we're in a retry loop, we have to copy | 		// because we're in a retry loop, we have to copy | ||||||
| 		// headers (and the r.Host value) from the original | 		// headers (and the Host value) from the original | ||||||
| 		// so that each retry is identical to the first | 		// so that each retry is identical to the first | ||||||
| 		if h.Headers != nil && h.Headers.Request != nil { | 		if h.Headers != nil && h.Headers.Request != nil { | ||||||
| 			r.Header = make(http.Header) | 			clonedReq.Header = make(http.Header) | ||||||
| 			copyHeader(r.Header, reqHeader) | 			copyHeader(clonedReq.Header, reqHeader) | ||||||
| 			r.Host = reqHost | 			clonedReq.Host = reqHost | ||||||
| 			h.Headers.Request.ApplyToRequest(r) | 			h.Headers.Request.ApplyToRequest(clonedReq) | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 		// proxy the request to that upstream | 		// proxy the request to that upstream | ||||||
| 		proxyErr = h.reverseProxy(w, r, repl, dialInfo, next) | 		proxyErr = h.reverseProxy(w, clonedReq, repl, dialInfo, next) | ||||||
| 		if proxyErr == nil || proxyErr == context.Canceled { | 		if proxyErr == nil || proxyErr == context.Canceled { | ||||||
| 			// context.Canceled happens when the downstream client | 			// context.Canceled happens when the downstream client | ||||||
| 			// cancels the request, which is not our failure | 			// cancels the request, which is not our failure | ||||||
| @ -480,7 +446,7 @@ func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request, next caddyht | |||||||
| 		h.countFailure(upstream) | 		h.countFailure(upstream) | ||||||
| 
 | 
 | ||||||
| 		// if we've tried long enough, break | 		// if we've tried long enough, break | ||||||
| 		if !h.LoadBalancing.tryAgain(h.ctx, start, proxyErr, r) { | 		if !h.LoadBalancing.tryAgain(h.ctx, start, proxyErr, clonedReq) { | ||||||
| 			break | 			break | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| @ -488,14 +454,27 @@ func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request, next caddyht | |||||||
| 	return statusError(proxyErr) | 	return statusError(proxyErr) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // prepareRequest modifies req so that it is ready to be proxied, | // prepareRequest clones req so that it can be safely modified without | ||||||
| // except for directing to a specific upstream. This method mutates | // changing the original request or introducing data races. It then | ||||||
| // headers and other necessary properties of the request and should | // modifies it so that it is ready to be proxied, except for directing | ||||||
| // be done just once (before proxying) regardless of proxy retries. | // to a specific upstream. This method adjusts headers and other relevant | ||||||
| // This assumes that no mutations of the request are performed | // properties of the cloned request and should be done just once (before | ||||||
| // by h during or after proxying. | // proxying) regardless of proxy retries. This assumes that no mutations | ||||||
| func (h Handler) prepareRequest(req *http.Request) error { | // of the cloned request are performed by h during or after proxying. | ||||||
| 	// most of this is borrowed from the Go std lib reverse proxy | func (h Handler) prepareRequest(req *http.Request) (*http.Request, error) { | ||||||
|  | 	req = cloneRequest(req) | ||||||
|  | 
 | ||||||
|  | 	// if enabled, buffer client request; this should only be | ||||||
|  | 	// enabled if the upstream requires it and does not work | ||||||
|  | 	// with "slow clients" (gunicorn, etc.) - this obviously | ||||||
|  | 	// has a perf overhead and makes the proxy at risk of | ||||||
|  | 	// exhausting memory and more susceptible to slowloris | ||||||
|  | 	// attacks, so it is strongly recommended to only use this | ||||||
|  | 	// feature if absolutely required, if read timeouts are | ||||||
|  | 	// set, and if body size is limited | ||||||
|  | 	if h.BufferRequests { | ||||||
|  | 		req.Body = h.bufferedBody(req.Body) | ||||||
|  | 	} | ||||||
| 
 | 
 | ||||||
| 	if req.ContentLength == 0 { | 	if req.ContentLength == 0 { | ||||||
| 		req.Body = nil // Issue golang/go#16036: nil Body for http.Transport retries | 		req.Body = nil // Issue golang/go#16036: nil Body for http.Transport retries | ||||||
| @ -560,7 +539,7 @@ func (h Handler) prepareRequest(req *http.Request) error { | |||||||
| 		req.Header.Set("X-Forwarded-Proto", proto) | 		req.Header.Set("X-Forwarded-Proto", proto) | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	return nil | 	return req, nil | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // reverseProxy performs a round-trip to the given backend and processes the response with the client. | // reverseProxy performs a round-trip to the given backend and processes the response with the client. | ||||||
| @ -807,7 +786,7 @@ func (lb LoadBalancing) tryAgain(ctx caddy.Context, start time.Time, proxyErr er | |||||||
| 
 | 
 | ||||||
| // directRequest modifies only req.URL so that it points to the upstream | // directRequest modifies only req.URL so that it points to the upstream | ||||||
| // in the given DialInfo. It must modify ONLY the request URL. | // in the given DialInfo. It must modify ONLY the request URL. | ||||||
| func (h Handler) directRequest(req *http.Request, di DialInfo) { | func (Handler) directRequest(req *http.Request, di DialInfo) { | ||||||
| 	// we need a host, so set the upstream's host address | 	// we need a host, so set the upstream's host address | ||||||
| 	reqHost := di.Address | 	reqHost := di.Address | ||||||
| 
 | 
 | ||||||
| @ -845,6 +824,42 @@ func (h Handler) bufferedBody(originalBody io.ReadCloser) io.ReadCloser { | |||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | // cloneRequest makes a semi-deep clone of origReq. | ||||||
|  | // | ||||||
|  | // Most of this code is borrowed from the Go stdlib reverse proxy, | ||||||
|  | // but we make a shallow-ish clone the request (deep clone only | ||||||
|  | // the headers and URL) so we can avoid manipulating the original | ||||||
|  | // request when using it to proxy upstream. This prevents request | ||||||
|  | // corruption and data races. | ||||||
|  | func cloneRequest(origReq *http.Request) *http.Request { | ||||||
|  | 	req := new(http.Request) | ||||||
|  | 	*req = *origReq | ||||||
|  | 	if origReq.URL != nil { | ||||||
|  | 		newURL := new(url.URL) | ||||||
|  | 		*newURL = *origReq.URL | ||||||
|  | 		if origReq.URL.User != nil { | ||||||
|  | 			newURL.User = new(url.Userinfo) | ||||||
|  | 			*newURL.User = *origReq.URL.User | ||||||
|  | 		} | ||||||
|  | 		// sanitize the request URL; we expect it to not contain the | ||||||
|  | 		// scheme and host since those should be determined by r.TLS | ||||||
|  | 		// and r.Host respectively, but some clients may include it | ||||||
|  | 		// in the request-line, which is technically valid in HTTP, | ||||||
|  | 		// but breaks reverseproxy behaviour, overriding how the | ||||||
|  | 		// dialer will behave. See #4237 for context. | ||||||
|  | 		newURL.Scheme = "" | ||||||
|  | 		newURL.Host = "" | ||||||
|  | 		req.URL = newURL | ||||||
|  | 	} | ||||||
|  | 	if origReq.Header != nil { | ||||||
|  | 		req.Header = origReq.Header.Clone() | ||||||
|  | 	} | ||||||
|  | 	if origReq.Trailer != nil { | ||||||
|  | 		req.Trailer = origReq.Trailer.Clone() | ||||||
|  | 	} | ||||||
|  | 	return req | ||||||
|  | } | ||||||
|  | 
 | ||||||
| func copyHeader(dst, src http.Header) { | func copyHeader(dst, src http.Header) { | ||||||
| 	for k, vv := range src { | 	for k, vv := range src { | ||||||
| 		for _, v := range vv { | 		for _, v := range vv { | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user