mirror of
https://github.com/caddyserver/caddy.git
synced 2026-04-26 10:49:51 -04:00
Some checks failed
Tests / test (./cmd/caddy/caddy, ~1.26.0, ubuntu-latest, 0, 1.26, linux) (push) Failing after 1m23s
Tests / test (s390x on IBM Z) (push) Has been skipped
Tests / goreleaser-check (push) Has been skipped
Cross-Build / build (~1.26.0, 1.26, aix) (push) Successful in 1m30s
Cross-Build / build (~1.26.0, 1.26, darwin) (push) Successful in 1m25s
Cross-Build / build (~1.26.0, 1.26, dragonfly) (push) Successful in 1m23s
Cross-Build / build (~1.26.0, 1.26, freebsd) (push) Successful in 1m26s
Cross-Build / build (~1.26.0, 1.26, illumos) (push) Successful in 1m25s
Cross-Build / build (~1.26.0, 1.26, linux) (push) Successful in 1m26s
Cross-Build / build (~1.26.0, 1.26, netbsd) (push) Successful in 1m23s
Cross-Build / build (~1.26.0, 1.26, openbsd) (push) Successful in 1m24s
Cross-Build / build (~1.26.0, 1.26, solaris) (push) Successful in 1m24s
Cross-Build / build (~1.26.0, 1.26, windows) (push) Successful in 1m26s
Lint / lint (ubuntu-latest, linux) (push) Successful in 1m58s
Lint / govulncheck (push) Successful in 1m13s
Lint / dependency-review (push) Failing after 24s
OpenSSF Scorecard supply-chain security / Scorecard analysis (push) Failing after 29s
Tests / test (./cmd/caddy/caddy, ~1.26.0, macos-14, 0, 1.26, mac) (push) Has been cancelled
Tests / test (./cmd/caddy/caddy.exe, ~1.26.0, windows-latest, True, 1.26, windows) (push) Has been cancelled
Lint / lint (macos-14, mac) (push) Has been cancelled
Lint / lint (windows-latest, windows) (push) Has been cancelled
411 lines
14 KiB
Go
411 lines
14 KiB
Go
package caddyhttp
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"net/http"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"github.com/prometheus/client_golang/prometheus/promauto"
|
|
otelprom "go.opentelemetry.io/contrib/bridges/prometheus"
|
|
"go.opentelemetry.io/contrib/exporters/autoexport"
|
|
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
|
|
"go.opentelemetry.io/otel/sdk/resource"
|
|
semconv "go.opentelemetry.io/otel/semconv/v1.26.0"
|
|
|
|
"github.com/caddyserver/caddy/v2"
|
|
caddymetrics "github.com/caddyserver/caddy/v2/internal/metrics"
|
|
)
|
|
|
|
// Metrics configures metrics observations.
|
|
// EXPERIMENTAL and subject to change or removal.
|
|
//
|
|
// Example configuration:
|
|
//
|
|
// {
|
|
// "apps": {
|
|
// "http": {
|
|
// "metrics": {
|
|
// "per_host": true,
|
|
// "observe_catchall_hosts": false
|
|
// },
|
|
// "servers": {
|
|
// "srv0": {
|
|
// "routes": [{
|
|
// "match": [{"host": ["example.com", "www.example.com"]}],
|
|
// "handle": [{"handler": "static_response", "body": "Hello"}]
|
|
// }]
|
|
// }
|
|
// }
|
|
// }
|
|
// }
|
|
// }
|
|
//
|
|
// In this configuration:
|
|
// - Requests to example.com and www.example.com get individual host labels
|
|
// - All other hosts (e.g., attacker.com) are aggregated under "_other" label
|
|
// - This prevents unlimited cardinality from arbitrary Host headers
|
|
type Metrics struct {
|
|
// Enable per-host metrics. Enabling this option may
|
|
// incur high-memory consumption, depending on the number of hosts
|
|
// managed by Caddy.
|
|
//
|
|
// CARDINALITY PROTECTION: To prevent unbounded cardinality attacks,
|
|
// only explicitly configured hosts (via host matchers) are allowed
|
|
// by default. Other hosts are aggregated under the "_other" label.
|
|
// See AllowCatchAllHosts to change this behavior.
|
|
PerHost bool `json:"per_host,omitempty"`
|
|
|
|
// Allow metrics for catch-all hosts (hosts without explicit configuration).
|
|
// When false (default), only hosts explicitly configured via host matchers
|
|
// will get individual metrics labels. All other hosts will be aggregated
|
|
// under the "_other" label to prevent cardinality explosion.
|
|
//
|
|
// This is automatically enabled for HTTPS servers (since certificates provide
|
|
// some protection against unbounded cardinality), but disabled for HTTP servers
|
|
// by default to prevent cardinality attacks from arbitrary Host headers.
|
|
//
|
|
// Set to true to allow all hosts to get individual metrics (NOT RECOMMENDED
|
|
// for production environments exposed to the internet).
|
|
ObserveCatchallHosts bool `json:"observe_catchall_hosts,omitempty"`
|
|
|
|
// Enable pushing metrics via OTLP in addition to the existing Prometheus
|
|
// scrape endpoints. When set, a PeriodicReader is attached to the shared
|
|
// Prometheus registry (via a Prometheus -> OpenTelemetry bridge), and the
|
|
// exporter is autoconfigured from the standard OTEL_* environment
|
|
// variables (OTEL_EXPORTER_OTLP_ENDPOINT, OTEL_EXPORTER_OTLP_PROTOCOL,
|
|
// OTEL_METRICS_EXPORTER, ...). Set OTEL_METRICS_EXPORTER=none or simply
|
|
// keep this field false to disable OTLP export.
|
|
OTLP bool `json:"otlp,omitempty"`
|
|
|
|
init sync.Once
|
|
httpMetrics *httpMetrics
|
|
allowedHosts map[string]struct{}
|
|
hasHTTPSServer bool
|
|
meterProvider *sdkmetric.MeterProvider
|
|
}
|
|
|
|
type httpMetrics struct {
|
|
requestInFlight *prometheus.GaugeVec
|
|
requestCount *prometheus.CounterVec
|
|
requestErrors *prometheus.CounterVec
|
|
requestDuration *prometheus.HistogramVec
|
|
requestSize *prometheus.HistogramVec
|
|
responseSize *prometheus.HistogramVec
|
|
responseDuration *prometheus.HistogramVec
|
|
}
|
|
|
|
func initHTTPMetrics(ctx caddy.Context, metrics *Metrics) {
|
|
const ns, sub = "caddy", "http"
|
|
registry := ctx.GetMetricsRegistry()
|
|
basicLabels := []string{"server", "handler"}
|
|
if metrics.PerHost {
|
|
basicLabels = append(basicLabels, "host")
|
|
}
|
|
metrics.httpMetrics.requestInFlight = promauto.With(registry).NewGaugeVec(prometheus.GaugeOpts{
|
|
Namespace: ns,
|
|
Subsystem: sub,
|
|
Name: "requests_in_flight",
|
|
Help: "Number of requests currently handled by this server.",
|
|
}, basicLabels)
|
|
metrics.httpMetrics.requestErrors = promauto.With(registry).NewCounterVec(prometheus.CounterOpts{
|
|
Namespace: ns,
|
|
Subsystem: sub,
|
|
Name: "request_errors_total",
|
|
Help: "Number of requests resulting in middleware errors.",
|
|
}, basicLabels)
|
|
metrics.httpMetrics.requestCount = promauto.With(registry).NewCounterVec(prometheus.CounterOpts{
|
|
Namespace: ns,
|
|
Subsystem: sub,
|
|
Name: "requests_total",
|
|
Help: "Counter of HTTP(S) requests made.",
|
|
}, basicLabels)
|
|
|
|
// TODO: allow these to be customized in the config
|
|
durationBuckets := prometheus.DefBuckets
|
|
sizeBuckets := prometheus.ExponentialBuckets(256, 4, 8)
|
|
|
|
httpLabels := []string{"server", "handler", "code", "method"}
|
|
if metrics.PerHost {
|
|
httpLabels = append(httpLabels, "host")
|
|
}
|
|
metrics.httpMetrics.requestDuration = promauto.With(registry).NewHistogramVec(prometheus.HistogramOpts{
|
|
Namespace: ns,
|
|
Subsystem: sub,
|
|
Name: "request_duration_seconds",
|
|
Help: "Histogram of round-trip request durations.",
|
|
Buckets: durationBuckets,
|
|
}, httpLabels)
|
|
metrics.httpMetrics.requestSize = promauto.With(registry).NewHistogramVec(prometheus.HistogramOpts{
|
|
Namespace: ns,
|
|
Subsystem: sub,
|
|
Name: "request_size_bytes",
|
|
Help: "Total size of the request. Includes body",
|
|
Buckets: sizeBuckets,
|
|
}, httpLabels)
|
|
metrics.httpMetrics.responseSize = promauto.With(registry).NewHistogramVec(prometheus.HistogramOpts{
|
|
Namespace: ns,
|
|
Subsystem: sub,
|
|
Name: "response_size_bytes",
|
|
Help: "Size of the returned response.",
|
|
Buckets: sizeBuckets,
|
|
}, httpLabels)
|
|
metrics.httpMetrics.responseDuration = promauto.With(registry).NewHistogramVec(prometheus.HistogramOpts{
|
|
Namespace: ns,
|
|
Subsystem: sub,
|
|
Name: "response_duration_seconds",
|
|
Help: "Histogram of times to first byte in response bodies.",
|
|
Buckets: durationBuckets,
|
|
}, httpLabels)
|
|
}
|
|
|
|
// provisionOTLP wires a MeterProvider that periodically reads the process-wide
|
|
// Prometheus registry and pushes the result via OTLP. The exporter and reader
|
|
// are autoconfigured from the standard OTEL_* environment variables, matching
|
|
// the ergonomics of the existing `tracing` directive. It is a no-op when
|
|
// m.OTLP is false, and honors OTEL_METRICS_EXPORTER=none (autoexport
|
|
// short-circuits to a no-op reader in that case).
|
|
func (m *Metrics) provisionOTLP(ctx caddy.Context) error {
|
|
if !m.OTLP {
|
|
return nil
|
|
}
|
|
|
|
// Register a Prometheus -> OpenTelemetry bridge against the process-wide
|
|
// Prometheus registry as the *default* source the NewMetricReader below
|
|
// will read from.
|
|
//
|
|
// NB: despite the "With*" naming, autoexport.WithFallbackMetricProducer is
|
|
// a package-level setter (it returns nothing) — it mutates autoexport's
|
|
// internal producer registry and takes effect on the very next call to
|
|
// NewMetricReader. It is NOT a MetricOption and must not be passed as one.
|
|
// Users can still override the source by setting OTEL_METRICS_PRODUCERS.
|
|
reg := ctx.GetMetricsRegistry()
|
|
autoexport.WithFallbackMetricProducer(func(context.Context) (sdkmetric.Producer, error) {
|
|
return otelprom.NewMetricProducer(otelprom.WithGatherer(reg)), nil
|
|
})
|
|
|
|
reader, err := autoexport.NewMetricReader(ctx)
|
|
if err != nil {
|
|
return fmt.Errorf("creating OTLP metric reader: %w", err)
|
|
}
|
|
|
|
version, _ := caddy.Version()
|
|
res, err := resource.Merge(resource.Default(), resource.NewSchemaless(
|
|
semconv.WebEngineName(ServerHeader),
|
|
semconv.WebEngineVersion(version),
|
|
))
|
|
if err != nil {
|
|
return fmt.Errorf("building OTLP metrics resource: %w", err)
|
|
}
|
|
|
|
m.meterProvider = sdkmetric.NewMeterProvider(
|
|
sdkmetric.WithResource(res),
|
|
sdkmetric.WithReader(reader),
|
|
)
|
|
|
|
return nil
|
|
}
|
|
|
|
// shutdown flushes and tears down the OTLP MeterProvider if one was provisioned.
|
|
// Both ForceFlush and Shutdown are always attempted so that a flush failure
|
|
// does not prevent the reader goroutines from being stopped; errors from both
|
|
// are returned joined.
|
|
func (m *Metrics) shutdown(ctx context.Context) error {
|
|
if m == nil || m.meterProvider == nil {
|
|
return nil
|
|
}
|
|
|
|
// ForceFlush gives the final collection a chance to reach the collector
|
|
// before the reader goroutine is stopped by Shutdown.
|
|
return errors.Join(
|
|
m.meterProvider.ForceFlush(ctx),
|
|
m.meterProvider.Shutdown(ctx),
|
|
)
|
|
}
|
|
|
|
// scanConfigForHosts scans the HTTP app configuration to build a set of allowed hosts
|
|
// for metrics collection, similar to how auto-HTTPS scans for domain names.
|
|
func (m *Metrics) scanConfigForHosts(app *App) {
|
|
if !m.PerHost {
|
|
return
|
|
}
|
|
|
|
m.allowedHosts = make(map[string]struct{})
|
|
m.hasHTTPSServer = false
|
|
|
|
for _, srv := range app.Servers {
|
|
// Check if this server has TLS enabled
|
|
serverHasTLS := len(srv.TLSConnPolicies) > 0
|
|
if serverHasTLS {
|
|
m.hasHTTPSServer = true
|
|
}
|
|
|
|
// Collect hosts from route matchers
|
|
for _, route := range srv.Routes {
|
|
for _, matcherSet := range route.MatcherSets {
|
|
for _, matcher := range matcherSet {
|
|
if hm, ok := matcher.(*MatchHost); ok {
|
|
for _, host := range *hm {
|
|
// Only allow non-fuzzy hosts to prevent unbounded cardinality
|
|
if !hm.fuzzy(host) {
|
|
m.allowedHosts[strings.ToLower(host)] = struct{}{}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// shouldAllowHostMetrics determines if metrics should be collected for the given host.
|
|
// This implements the cardinality protection by only allowing metrics for:
|
|
// 1. Explicitly configured hosts
|
|
// 2. Catch-all requests on HTTPS servers (if AllowCatchAllHosts is true or auto-enabled)
|
|
// 3. Catch-all requests on HTTP servers only if explicitly allowed
|
|
func (m *Metrics) shouldAllowHostMetrics(host string, isHTTPS bool) bool {
|
|
if !m.PerHost {
|
|
return true // host won't be used in labels anyway
|
|
}
|
|
|
|
normalizedHost := strings.ToLower(host)
|
|
|
|
// Always allow explicitly configured hosts
|
|
if _, exists := m.allowedHosts[normalizedHost]; exists {
|
|
return true
|
|
}
|
|
|
|
// For catch-all requests (not in allowed hosts)
|
|
allowCatchAll := m.ObserveCatchallHosts || (isHTTPS && m.hasHTTPSServer)
|
|
return allowCatchAll
|
|
}
|
|
|
|
// serverNameFromContext extracts the current server name from the context.
|
|
// Returns "UNKNOWN" if none is available (should probably never happen).
|
|
func serverNameFromContext(ctx context.Context) string {
|
|
srv, ok := ctx.Value(ServerCtxKey).(*Server)
|
|
if !ok || srv == nil || srv.name == "" {
|
|
return "UNKNOWN"
|
|
}
|
|
return srv.name
|
|
}
|
|
|
|
// metricsInstrumentedRoute wraps a compiled route Handler with metrics
|
|
// instrumentation. It wraps the entire compiled route chain once,
|
|
// collecting metrics only once per route match.
|
|
type metricsInstrumentedRoute struct {
|
|
handler string
|
|
next Handler
|
|
metrics *Metrics
|
|
}
|
|
|
|
func newMetricsInstrumentedRoute(ctx caddy.Context, handler string, next Handler, m *Metrics) *metricsInstrumentedRoute {
|
|
m.init.Do(func() {
|
|
initHTTPMetrics(ctx, m)
|
|
})
|
|
|
|
return &metricsInstrumentedRoute{handler: handler, next: next, metrics: m}
|
|
}
|
|
|
|
func (h *metricsInstrumentedRoute) ServeHTTP(w http.ResponseWriter, r *http.Request) error {
|
|
server := serverNameFromContext(r.Context())
|
|
labels := prometheus.Labels{"server": server, "handler": h.handler}
|
|
method := caddymetrics.SanitizeMethod(r.Method)
|
|
// the "code" value is set later, but initialized here to eliminate the possibility
|
|
// of a panic
|
|
statusLabels := prometheus.Labels{"server": server, "handler": h.handler, "method": method, "code": ""}
|
|
|
|
// Determine if this is an HTTPS request
|
|
isHTTPS := r.TLS != nil
|
|
|
|
if h.metrics.PerHost {
|
|
// Apply cardinality protection for host metrics
|
|
if h.metrics.shouldAllowHostMetrics(r.Host, isHTTPS) {
|
|
labels["host"] = strings.ToLower(r.Host)
|
|
statusLabels["host"] = strings.ToLower(r.Host)
|
|
} else {
|
|
// Use a catch-all label for unallowed hosts to prevent cardinality explosion
|
|
labels["host"] = "_other"
|
|
statusLabels["host"] = "_other"
|
|
}
|
|
}
|
|
|
|
inFlight := h.metrics.httpMetrics.requestInFlight.With(labels)
|
|
inFlight.Inc()
|
|
defer inFlight.Dec()
|
|
|
|
start := time.Now()
|
|
|
|
// This is a _bit_ of a hack - it depends on the ShouldBufferFunc always
|
|
// being called when the headers are written.
|
|
// Effectively the same behaviour as promhttp.InstrumentHandlerTimeToWriteHeader.
|
|
writeHeaderRecorder := ShouldBufferFunc(func(status int, header http.Header) bool {
|
|
statusLabels["code"] = caddymetrics.SanitizeCode(status)
|
|
ttfb := time.Since(start).Seconds()
|
|
h.metrics.httpMetrics.responseDuration.With(statusLabels).Observe(ttfb)
|
|
return false
|
|
})
|
|
wrec := NewResponseRecorder(w, nil, writeHeaderRecorder)
|
|
err := h.next.ServeHTTP(wrec, r)
|
|
dur := time.Since(start).Seconds()
|
|
h.metrics.httpMetrics.requestCount.With(labels).Inc()
|
|
|
|
observeRequest := func(status int) {
|
|
// If the code hasn't been set yet, and we didn't encounter an error, we're
|
|
// probably falling through with an empty handler.
|
|
if statusLabels["code"] == "" {
|
|
// we still sanitize it, even though it's likely to be 0. A 200 is
|
|
// returned on fallthrough so we want to reflect that.
|
|
statusLabels["code"] = caddymetrics.SanitizeCode(status)
|
|
}
|
|
|
|
h.metrics.httpMetrics.requestDuration.With(statusLabels).Observe(dur)
|
|
h.metrics.httpMetrics.requestSize.With(statusLabels).Observe(float64(computeApproximateRequestSize(r)))
|
|
h.metrics.httpMetrics.responseSize.With(statusLabels).Observe(float64(wrec.Size()))
|
|
}
|
|
|
|
if err != nil {
|
|
var handlerErr HandlerError
|
|
if errors.As(err, &handlerErr) {
|
|
observeRequest(handlerErr.StatusCode)
|
|
}
|
|
|
|
h.metrics.httpMetrics.requestErrors.With(labels).Inc()
|
|
|
|
return err
|
|
}
|
|
|
|
observeRequest(wrec.Status())
|
|
|
|
return nil
|
|
}
|
|
|
|
// taken from https://github.com/prometheus/client_golang/blob/6007b2b5cae01203111de55f753e76d8dac1f529/prometheus/promhttp/instrument_server.go#L298
|
|
func computeApproximateRequestSize(r *http.Request) int {
|
|
s := 0
|
|
if r.URL != nil {
|
|
s += len(r.URL.String())
|
|
}
|
|
|
|
s += len(r.Method)
|
|
s += len(r.Proto)
|
|
for name, values := range r.Header {
|
|
s += len(name)
|
|
for _, value := range values {
|
|
s += len(value)
|
|
}
|
|
}
|
|
s += len(r.Host)
|
|
|
|
// N.B. r.Form and r.MultipartForm are assumed to be included in r.URL.
|
|
|
|
if r.ContentLength != -1 {
|
|
s += int(r.ContentLength)
|
|
}
|
|
return s
|
|
}
|