From 6a64bb2ce55b9a0aaf89f0372a7381aefdebd530 Mon Sep 17 00:00:00 2001 From: mfrischknecht Date: Wed, 29 Apr 2026 13:52:04 +0200 Subject: [PATCH] listeners: clean up stale Unix socket files on Windows (#7676) * Delete old unix domain socket files on Windows While Windows doesn't have the need to reuse a socket file descriptor by dup()ing it on config reloads, there still is a valid need for an equivalent to the `syscall.Unlink()` call in listen_unix.go (also in `reuseUnixSocket`). If a previous Caddy instance didn't terminate properly, the chances it will leave behind a socket file are very high, breaking all subsequent starting attempts. Other than for regular files, Windows seemingly has no way for a process to flag a UNIX domain socket file with `FILE_DELETE_ON_CLOSE`, which means this scenario can never be avoided entirely (e.g. in the case of crashes). For the long comment on `isAbstractUnixSocket`: the logic itself is likely of dubious value, but I thought it better to explicitly reference the issue, as I have just spent half an hour searching the web to figure out whether abstract names will work or not on Windows. At least, the logic as-is should now do the sensible thing if these are ever implemented properly (and it matches what the Golang standard library does internally). * Add a dial attempt to check for active server processes As @steadytao pointed out (thanks!), the previous code didn't have solid proof that an existing unix socket file had really been orphaned, as it's also possible that there's another server process (still running). This would still give the Windows implementation parity with the unix one (as that one also unlinks the socket file without further checks), but I've performed a couple of small tests and found this way of handling socket files still problematic at least problematic if Caddy is used as a reverse proxy in real world scenarios. In tests with a simple Caddyfile that only declares an admin socket, starting two caddy instances with the same Caddyfile works and behaves like one would expect: the second instance removes the first instance's socket file and "wins" the race. When Caddy is used as a reverse proxy, though, what'll happen is more complicated: While the second instance wins the race for the admin socket, as long as the Caddyfile specifies a TCP downstream socket, the second process will not be able to take this one over from the first (also to be expected, that's how socket binding usually works). This results in a rather broken state: The first process still holds on to its TCP listening sockets, the second process fails to start because of the error in its listening attempt, leaving an orphaned admin socket file in the file system. Afterwards, the second process won't be running and the first _will_ be running but unable to be controlled because its admin socket has been replaced. This leaves the system in another state that is bad from an ops perspective. With this new change, we try first to connect to any unix socket that isn't already covered by our current process (with a very low timeout) and can easily decide if the socket is still in use by another process: - If the connection is accepted, there's obviously a server process. - If Windows returns WSACONNREFUSED [^1], there is either no active server process for the socket file anymore, or the socket file does not exist. - Any other errors are likely a sign that there still is a server process (e.g. a timeout would indicate that it's just slow in accepting new connection attempts). [^1]: https://learn.microsoft.com/en-us/windows/win32/winsock/windows-sockets-error-codes-2#wsaeconnrefused * chore: tidy Windows unix socket reuse helper --------- Co-authored-by: Zen Dodd --- listen.go | 4 -- listen_reuseUnixSocket.go | 21 ++++++++ listen_reuseUnixSocket_windows.go | 89 +++++++++++++++++++++++++++++++ 3 files changed, 110 insertions(+), 4 deletions(-) create mode 100644 listen_reuseUnixSocket.go create mode 100644 listen_reuseUnixSocket_windows.go diff --git a/listen.go b/listen.go index 03b63c1e2..21df13ff4 100644 --- a/listen.go +++ b/listen.go @@ -30,10 +30,6 @@ import ( "go.uber.org/zap" ) -func reuseUnixSocket(_, _ string) (any, error) { - return nil, nil -} - func listenReusable(ctx context.Context, lnKey string, network, address string, config net.ListenConfig) (any, error) { var socketFile *os.File diff --git a/listen_reuseUnixSocket.go b/listen_reuseUnixSocket.go new file mode 100644 index 000000000..006610edc --- /dev/null +++ b/listen_reuseUnixSocket.go @@ -0,0 +1,21 @@ +// Copyright 2015 Matthew Holt and The Caddy Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build (!unix || solaris) && !windows + +package caddy + +func reuseUnixSocket(_, _ string) (any, error) { + return nil, nil +} diff --git a/listen_reuseUnixSocket_windows.go b/listen_reuseUnixSocket_windows.go new file mode 100644 index 000000000..9c547933e --- /dev/null +++ b/listen_reuseUnixSocket_windows.go @@ -0,0 +1,89 @@ +// Copyright 2015 Matthew Holt and The Caddy Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build windows + +package caddy + +import ( + "errors" + "fmt" + "io/fs" + "net" + "os" + "strings" + "syscall" + "time" +) + +var errUnixSocketAlreadyInUse = errors.New("unix socket is already in use by another process") + +func reuseUnixSocket(network, addr string) (any, error) { + if !IsUnixNetwork(network) { + return nil, nil + } + + // Note: This is here mainly for proper compatibility, because Unix sockets with abstract names are in an interesting limbo state on Windows: + // Go already translates `@` characters to `\0` for Windows: https://github.com/golang/go/blob/65d5c5f6dd8aa7b221cff6ec3f5101ea2e5f3efa/src/syscall/syscall_windows.go#L910 + // ...but there still is an open issue about the fact that this is not properly supported: https://github.com/microsoft/WSL/issues/4240#issuecomment-620805115 + // The main issue is that the original announcement proclaimed support for this feature, but it was (apparently) never implemented: https://devblogs.microsoft.com/commandline/af_unix-comes-to-windows/ + isAbstractUnixSocket := strings.HasPrefix(addr, "@") + + if isAbstractUnixSocket { + // Abstract Unix sockets do not require us to remove stale socket files. + return nil, nil + } + + // On Windows, we're using the `fakeCloseListener` wrappers around a single, ever-living listener. + // So, if there's an active listener entry in the pool, we're the current owner of the Unix socket file. + _, socketBelongsToCurrentProcess := listenerPool.References(listenerKey(network, addr)) + + if socketBelongsToCurrentProcess { + // Reuse/cleanup is entirely handled by the refcounting mechanism in `listenerPool`. + return nil, nil + } + + // If the socket file does not exist or has no backing server process, this will fail instantly. + connection, err := net.DialTimeout("unix", addr, 10*time.Millisecond) + + if err == nil { + connection.Close() + return nil, fmt.Errorf("cannot reuse socket %v: %w", addr, errUnixSocketAlreadyInUse) + } + + // Windows returns this error code both if the socket file does not exist and if it isn't backed by a server process anymore. + // See: https://learn.microsoft.com/en-us/windows/win32/winsock/windows-sockets-error-codes-2#wsaeconnrefused + const WSAECONNREFUSED syscall.Errno = 10061 + + var errno syscall.Errno + hasNoListeningServerProcess := errors.As(err, &errno) && errno == WSAECONNREFUSED + + if !hasNoListeningServerProcess { + return nil, fmt.Errorf("cannot reuse socket %v: %w", addr, errUnixSocketAlreadyInUse) + } + + // If the socket file exists, it hasn't been created by our process, and it seemingly + // isn't backed by a server process anymore. Try to delete it so we can bind to it later. + err = os.Remove(addr) + + if err == nil { + return nil, nil + } else if errors.Is(err, fs.ErrNotExist) { + // Either the file didn't exist in the first place, or it was deleted before we were able to. + return nil, nil + } else { + // We failed to delete the file. Likely, it belongs to another (active) process. + return nil, err + } +}