mirror of
https://github.com/caddyserver/caddy.git
synced 2025-06-02 21:24:35 -04:00
caddyhttp: Optimize large host matchers
This commit is contained in:
parent
4cff36d731
commit
9157051f45
@ -23,6 +23,7 @@ import (
|
|||||||
"net/url"
|
"net/url"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/caddyserver/caddy/v2"
|
"github.com/caddyserver/caddy/v2"
|
||||||
@ -51,6 +52,8 @@ type (
|
|||||||
//
|
//
|
||||||
// The wildcard can be useful for matching all subdomains, for example:
|
// The wildcard can be useful for matching all subdomains, for example:
|
||||||
// `*.example.com` matches `foo.example.com` but not `foo.bar.example.com`.
|
// `*.example.com` matches `foo.example.com` but not `foo.bar.example.com`.
|
||||||
|
//
|
||||||
|
// Duplicate entries will return an error.
|
||||||
MatchHost []string
|
MatchHost []string
|
||||||
|
|
||||||
// MatchPath matches requests by the URI's path (case-insensitive). Path
|
// MatchPath matches requests by the URI's path (case-insensitive). Path
|
||||||
@ -167,6 +170,40 @@ func (m *MatchHost) UnmarshalCaddyfile(d *caddyfile.Dispenser) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Provision sets up and validates m, including making it more efficient for large lists.
|
||||||
|
func (m MatchHost) Provision(_ caddy.Context) error {
|
||||||
|
// check for duplicates; they are nonsensical and reduce efficiency
|
||||||
|
// (we could just remove them, but the user should know their config is erroneous)
|
||||||
|
seen := make(map[string]int)
|
||||||
|
for i, h := range m {
|
||||||
|
h = strings.ToLower(h)
|
||||||
|
if firstI, ok := seen[h]; ok {
|
||||||
|
return fmt.Errorf("host at index %d is repeated at index %d: %s", firstI, i, h)
|
||||||
|
}
|
||||||
|
seen[h] = i
|
||||||
|
}
|
||||||
|
|
||||||
|
if m.large() {
|
||||||
|
// sort the slice lexicographically, grouping "fuzzy" entries (wildcards and placeholders)
|
||||||
|
// at the front of the list; this allows us to use binary search for exact matches, which
|
||||||
|
// we have seen from experience is the most common kind of value in large lists; and any
|
||||||
|
// other kinds of values (wildcards and placeholders) are grouped in front so the linear
|
||||||
|
// search should find a match fairly quickly
|
||||||
|
sort.Slice(m, func(i, j int) bool {
|
||||||
|
iInexact, jInexact := m.fuzzy(m[i]), m.fuzzy(m[j])
|
||||||
|
if iInexact && !jInexact {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if !iInexact && jInexact {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return m[i] < m[j]
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// Match returns true if r matches m.
|
// Match returns true if r matches m.
|
||||||
func (m MatchHost) Match(r *http.Request) bool {
|
func (m MatchHost) Match(r *http.Request) bool {
|
||||||
reqHost, _, err := net.SplitHostPort(r.Host)
|
reqHost, _, err := net.SplitHostPort(r.Host)
|
||||||
@ -179,10 +216,31 @@ func (m MatchHost) Match(r *http.Request) bool {
|
|||||||
reqHost = strings.TrimSuffix(reqHost, "]")
|
reqHost = strings.TrimSuffix(reqHost, "]")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if m.large() {
|
||||||
|
// fast path: locate exact match using binary search (about 100-1000x faster for large lists)
|
||||||
|
pos := sort.Search(len(m), func(i int) bool {
|
||||||
|
if m.fuzzy(m[i]) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return m[i] >= reqHost
|
||||||
|
})
|
||||||
|
if pos < len(m) && m[pos] == reqHost {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
repl := r.Context().Value(caddy.ReplacerCtxKey).(*caddy.Replacer)
|
repl := r.Context().Value(caddy.ReplacerCtxKey).(*caddy.Replacer)
|
||||||
|
|
||||||
outer:
|
outer:
|
||||||
for _, host := range m {
|
for _, host := range m {
|
||||||
|
// fast path: if matcher is large, we already know we don't have an exact
|
||||||
|
// match, so we're only looking for fuzzy match now, which should be at the
|
||||||
|
// front of the list; if we have reached a value that is not fuzzy, there
|
||||||
|
// will be no match and we can short-circuit for efficiency
|
||||||
|
if m.large() && !m.fuzzy(host) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
host = repl.ReplaceAll(host, "")
|
host = repl.ReplaceAll(host, "")
|
||||||
if strings.Contains(host, "*") {
|
if strings.Contains(host, "*") {
|
||||||
patternParts := strings.Split(host, ".")
|
patternParts := strings.Split(host, ".")
|
||||||
@ -207,6 +265,15 @@ outer:
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// fuzzy returns true if the given hostname h is not a specific
|
||||||
|
// hostname, e.g. has placeholders or wildcards.
|
||||||
|
func (MatchHost) fuzzy(h string) bool { return strings.ContainsAny(h, "{*") }
|
||||||
|
|
||||||
|
// large returns true if m is considered to be large. Optimizing
|
||||||
|
// the matcher for smaller lists has diminishing returns.
|
||||||
|
// See related benchmark function in test file to conduct experiments.
|
||||||
|
func (m MatchHost) large() bool { return len(m) > 100 }
|
||||||
|
|
||||||
// CaddyModule returns the Caddy module information.
|
// CaddyModule returns the Caddy module information.
|
||||||
func (MatchPath) CaddyModule() caddy.ModuleInfo {
|
func (MatchPath) CaddyModule() caddy.ModuleInfo {
|
||||||
return caddy.ModuleInfo{
|
return caddy.ModuleInfo{
|
||||||
@ -909,6 +976,7 @@ const regexpPlaceholderPrefix = "http.regexp"
|
|||||||
// Interface guards
|
// Interface guards
|
||||||
var (
|
var (
|
||||||
_ RequestMatcher = (*MatchHost)(nil)
|
_ RequestMatcher = (*MatchHost)(nil)
|
||||||
|
_ caddy.Provisioner = (*MatchHost)(nil)
|
||||||
_ RequestMatcher = (*MatchPath)(nil)
|
_ RequestMatcher = (*MatchPath)(nil)
|
||||||
_ RequestMatcher = (*MatchPathRE)(nil)
|
_ RequestMatcher = (*MatchPathRE)(nil)
|
||||||
_ caddy.Provisioner = (*MatchPathRE)(nil)
|
_ caddy.Provisioner = (*MatchPathRE)(nil)
|
||||||
|
@ -1018,6 +1018,33 @@ func TestNotMatcher(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
func BenchmarkLargeHostMatcher(b *testing.B) {
|
||||||
|
// this benchmark simulates a large host matcher (thousands of entries) where each
|
||||||
|
// value is an exact hostname (not a placeholder or wildcard) - compare the results
|
||||||
|
// of this with and without the binary search (comment out the various fast path
|
||||||
|
// sections in Match) to conduct experiments
|
||||||
|
|
||||||
|
const n = 10000
|
||||||
|
lastHost := fmt.Sprintf("%d.example.com", n-1)
|
||||||
|
req := &http.Request{Host: lastHost}
|
||||||
|
repl := caddy.NewReplacer()
|
||||||
|
ctx := context.WithValue(req.Context(), caddy.ReplacerCtxKey, repl)
|
||||||
|
req = req.WithContext(ctx)
|
||||||
|
|
||||||
|
matcher := make(MatchHost, n)
|
||||||
|
for i := 0; i < n; i++ {
|
||||||
|
matcher[i] = fmt.Sprintf("%d.example.com", i)
|
||||||
|
}
|
||||||
|
err := matcher.Provision(caddy.Context{})
|
||||||
|
if err != nil {
|
||||||
|
b.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
matcher.Match(req)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func BenchmarkHostMatcherWithoutPlaceholder(b *testing.B) {
|
func BenchmarkHostMatcherWithoutPlaceholder(b *testing.B) {
|
||||||
req := &http.Request{Host: "localhost"}
|
req := &http.Request{Host: "localhost"}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user