mirror of
https://github.com/caddyserver/caddy.git
synced 2025-08-07 09:04:04 -04:00
complete WAL implementation
Signed-off-by: Mohammed Al Sahaf <msaa1990@gmail.com>
This commit is contained in:
parent
030ade0f98
commit
07ad9534fb
@ -58,15 +58,15 @@ type NetWriter struct {
|
||||
// Buffer size for the WAL flush channel.
|
||||
BufferSize int `json:"buffer_size,omitempty"`
|
||||
|
||||
logger *slog.Logger
|
||||
addr caddy.NetworkAddress
|
||||
wal *wal.WAL
|
||||
walDir string
|
||||
flushCtx context.Context
|
||||
flushCtxCancel context.CancelFunc
|
||||
flushWg sync.WaitGroup
|
||||
lastProcessedChunk uint32
|
||||
mu sync.RWMutex
|
||||
logger *slog.Logger
|
||||
addr caddy.NetworkAddress
|
||||
wal *wal.WAL
|
||||
walDir string
|
||||
flushCtx context.Context
|
||||
flushCtxCancel context.CancelFunc
|
||||
flushWg sync.WaitGroup
|
||||
lastProcessedOffset int64
|
||||
mu sync.RWMutex
|
||||
}
|
||||
|
||||
// CaddyModule returns the Caddy module information.
|
||||
@ -126,7 +126,9 @@ func (nw *NetWriter) WriterKey() string {
|
||||
// OpenWriter opens a new network connection and sets up the WAL.
|
||||
func (nw *NetWriter) OpenWriter() (io.WriteCloser, error) {
|
||||
// Set up WAL directory
|
||||
nw.walDir = filepath.Join(caddy.AppDataDir(), "wal", "netwriter", nw.addr.String())
|
||||
baseDir := caddy.AppDataDir()
|
||||
|
||||
nw.walDir = filepath.Join(baseDir, "wal", "netwriter", nw.addr.String())
|
||||
if err := os.MkdirAll(nw.walDir, 0o755); err != nil {
|
||||
return nil, fmt.Errorf("failed to create WAL directory: %v", err)
|
||||
}
|
||||
@ -141,8 +143,17 @@ func (nw *NetWriter) OpenWriter() (io.WriteCloser, error) {
|
||||
}
|
||||
nw.wal = w
|
||||
|
||||
// Load last processed chunk position from metadata file if it exists
|
||||
nw.loadLastProcessedChunk()
|
||||
// Load last processed offset from metadata file if it exists
|
||||
nw.loadLastProcessedOffset()
|
||||
|
||||
// If SoftStart is disabled, test the connection immediately
|
||||
if !nw.SoftStart {
|
||||
testConn, err := net.DialTimeout(nw.addr.Network, nw.addr.JoinHostPort(0), time.Duration(nw.DialTimeout))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to connect to log destination (SoftStart disabled): %v", err)
|
||||
}
|
||||
testConn.Close()
|
||||
}
|
||||
|
||||
// Create the writer wrapper
|
||||
writer := &netWriterConn{
|
||||
@ -157,41 +168,50 @@ func (nw *NetWriter) OpenWriter() (io.WriteCloser, error) {
|
||||
return writer, nil
|
||||
}
|
||||
|
||||
// loadLastProcessedChunk loads the last processed chunk position from a metadata file
|
||||
func (nw *NetWriter) loadLastProcessedChunk() {
|
||||
// loadLastProcessedOffset loads the last processed offset from a metadata file
|
||||
func (nw *NetWriter) loadLastProcessedOffset() {
|
||||
metaFile := filepath.Join(nw.walDir, "last_processed")
|
||||
data, err := os.ReadFile(metaFile)
|
||||
if err != nil {
|
||||
nw.lastProcessedChunk = 0
|
||||
// Use -1 to indicate "no entries processed yet"
|
||||
nw.lastProcessedOffset = -1
|
||||
nw.logger.Debug("no last processed offset file found, starting from beginning", "file", metaFile, "error", err)
|
||||
return
|
||||
}
|
||||
|
||||
var chunk uint32
|
||||
if _, err := fmt.Sscanf(string(data), "%d", &chunk); err != nil {
|
||||
nw.lastProcessedChunk = 0
|
||||
var offset int64
|
||||
if _, err := fmt.Sscanf(string(data), "%d", &offset); err != nil {
|
||||
// Use -1 to indicate "no entries processed yet"
|
||||
nw.lastProcessedOffset = -1
|
||||
return
|
||||
}
|
||||
|
||||
nw.lastProcessedChunk = chunk
|
||||
nw.logger.Info("loaded last processed chunk", "block", chunk)
|
||||
nw.lastProcessedOffset = offset
|
||||
nw.logger.Debug("loaded last processed offset", "offset", offset)
|
||||
}
|
||||
|
||||
// saveLastProcessedChunk saves the last processed chunk position to a metadata file
|
||||
func (nw *NetWriter) saveLastProcessedChunk(chunk uint32) {
|
||||
// saveLastProcessedOffset saves the last processed offset to a metadata file
|
||||
func (nw *NetWriter) saveLastProcessedOffset(cp *wal.ChunkPosition) {
|
||||
// Create a unique offset by combining segment, block, and chunk offset
|
||||
offset := (int64(cp.SegmentId) << 32) | (int64(cp.BlockNumber) << 16) | int64(cp.ChunkOffset)
|
||||
|
||||
nw.mu.Lock()
|
||||
nw.lastProcessedChunk = chunk
|
||||
nw.lastProcessedOffset = offset
|
||||
nw.mu.Unlock()
|
||||
|
||||
metaFile := filepath.Join(nw.walDir, "last_processed")
|
||||
data := fmt.Sprintf("%d", chunk)
|
||||
data := fmt.Sprintf("%d", offset)
|
||||
if err := os.WriteFile(metaFile, []byte(data), 0o600); err != nil {
|
||||
nw.logger.Error("failed to save last processed chunk", "error", err)
|
||||
nw.logger.Error("failed to save last processed offset", "error", err)
|
||||
} else {
|
||||
nw.logger.Debug("saved last processed offset", "offset", offset)
|
||||
}
|
||||
}
|
||||
|
||||
// backgroundFlusher runs in the background and flushes WAL entries to the network
|
||||
func (nw *NetWriter) backgroundFlusher() {
|
||||
defer nw.flushWg.Done()
|
||||
nw.logger.Debug("background flusher started")
|
||||
|
||||
var conn net.Conn
|
||||
var connMu sync.RWMutex
|
||||
@ -225,6 +245,15 @@ func (nw *NetWriter) backgroundFlusher() {
|
||||
}
|
||||
|
||||
_, err := currentConn.Write(data)
|
||||
if err != nil {
|
||||
// Connection failed, clear it so reconnection logic kicks in
|
||||
connMu.Lock()
|
||||
if conn == currentConn {
|
||||
conn.Close()
|
||||
conn = nil
|
||||
}
|
||||
connMu.Unlock()
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
@ -237,41 +266,8 @@ func (nw *NetWriter) backgroundFlusher() {
|
||||
}
|
||||
}
|
||||
|
||||
// Set up WAL reader
|
||||
reader := nw.wal.NewReader()
|
||||
|
||||
// Skip already processed entries
|
||||
nw.mu.RLock()
|
||||
lastChunk := nw.lastProcessedChunk
|
||||
nw.mu.RUnlock()
|
||||
|
||||
if lastChunk > 0 {
|
||||
nw.logger.Info("skipping already processed entries", "lastProcessedBlock", lastChunk)
|
||||
// Skip already processed entries
|
||||
skipped := 0
|
||||
for {
|
||||
data, cp, err := reader.Next()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
nw.logger.Error("error reading WAL during skip", "error", err)
|
||||
break
|
||||
}
|
||||
|
||||
// Skip entries that have already been processed
|
||||
if cp.BlockNumber <= lastChunk {
|
||||
skipped++
|
||||
continue
|
||||
}
|
||||
|
||||
// This is a new entry, process it
|
||||
if err := nw.processWALEntry(data, cp, writeToConn); err != nil {
|
||||
nw.logger.Error("error processing WAL entry", "error", err)
|
||||
}
|
||||
}
|
||||
nw.logger.Info("skipped processed entries", "count", skipped)
|
||||
}
|
||||
// Process any existing entries in the WAL immediately
|
||||
nw.processWALEntries(writeToConn)
|
||||
|
||||
ticker := time.NewTicker(100 * time.Millisecond) // Check for new entries every 100ms
|
||||
defer ticker.Stop()
|
||||
@ -283,7 +279,7 @@ func (nw *NetWriter) backgroundFlusher() {
|
||||
select {
|
||||
case <-nw.flushCtx.Done():
|
||||
// Flush remaining entries before shutting down
|
||||
nw.flushRemainingEntries(reader, writeToConn)
|
||||
nw.flushRemainingWALEntries(writeToConn)
|
||||
|
||||
connMu.Lock()
|
||||
if conn != nil {
|
||||
@ -294,7 +290,7 @@ func (nw *NetWriter) backgroundFlusher() {
|
||||
|
||||
case <-ticker.C:
|
||||
// Process available WAL entries
|
||||
nw.processAvailableEntries(reader, writeToConn)
|
||||
nw.processWALEntries(writeToConn)
|
||||
|
||||
case <-reconnectTicker.C:
|
||||
// Try to reconnect if we don't have a connection
|
||||
@ -302,43 +298,66 @@ func (nw *NetWriter) backgroundFlusher() {
|
||||
hasConn := conn != nil
|
||||
connMu.RUnlock()
|
||||
|
||||
nw.logger.Debug("reconnect ticker fired", "hasConn", hasConn)
|
||||
if !hasConn {
|
||||
if err := dial(); err != nil {
|
||||
nw.logger.Debug("reconnection attempt failed", "error", err)
|
||||
} else {
|
||||
// Successfully reconnected, process any buffered WAL entries
|
||||
nw.logger.Info("reconnected, processing buffered WAL entries")
|
||||
nw.processWALEntries(writeToConn)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// processAvailableEntries processes all available entries in the WAL
|
||||
func (nw *NetWriter) processAvailableEntries(reader *wal.Reader, writeToConn func([]byte) error) {
|
||||
// processWALEntries processes all available entries in the WAL using a fresh reader
|
||||
func (nw *NetWriter) processWALEntries(writeToConn func([]byte) error) {
|
||||
// Create a fresh reader to see all current entries
|
||||
reader := nw.wal.NewReader()
|
||||
|
||||
processed := 0
|
||||
skipped := 0
|
||||
nw.logger.Debug("processing available WAL entries")
|
||||
for {
|
||||
data, cp, err := reader.Next()
|
||||
if err == io.EOF {
|
||||
if processed > 0 {
|
||||
nw.logger.Debug("processed WAL entries", "processed", processed, "skipped", skipped)
|
||||
}
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
if err == wal.ErrClosed {
|
||||
nw.logger.Debug("WAL closed during processing")
|
||||
return
|
||||
}
|
||||
nw.logger.Error("error reading from WAL", "error", err)
|
||||
break
|
||||
}
|
||||
|
||||
// Check if we've already processed this block
|
||||
// Check if we've already processed this entry
|
||||
nw.mu.RLock()
|
||||
lastProcessed := nw.lastProcessedChunk
|
||||
lastProcessedOffset := nw.lastProcessedOffset
|
||||
nw.mu.RUnlock()
|
||||
|
||||
if cp.BlockNumber <= lastProcessed {
|
||||
// Create current entry offset for comparison
|
||||
currentOffset := (int64(cp.SegmentId) << 32) | (int64(cp.BlockNumber) << 16) | int64(cp.ChunkOffset)
|
||||
nw.logger.Debug("found WAL entry", "segmentId", cp.SegmentId, "blockNumber", cp.BlockNumber, "chunkOffset", cp.ChunkOffset, "currentOffset", currentOffset, "lastProcessedOffset", lastProcessedOffset, "size", len(data))
|
||||
|
||||
if currentOffset <= lastProcessedOffset {
|
||||
// Already processed, skip
|
||||
nw.logger.Debug("skipping already processed entry", "currentOffset", currentOffset, "lastProcessedOffset", lastProcessedOffset)
|
||||
skipped++
|
||||
continue
|
||||
}
|
||||
|
||||
if err := nw.processWALEntry(data, cp, writeToConn); err != nil {
|
||||
nw.logger.Error("error processing WAL entry", "error", err)
|
||||
// Don't break here - we want to continue processing other entries
|
||||
} else {
|
||||
processed++
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -351,16 +370,19 @@ func (nw *NetWriter) processWALEntry(data []byte, cp *wal.ChunkPosition, writeTo
|
||||
return err
|
||||
}
|
||||
|
||||
// Mark this block as processed
|
||||
nw.saveLastProcessedChunk(cp.BlockNumber)
|
||||
nw.logger.Debug("processed WAL entry", "blockNumber", cp.BlockNumber)
|
||||
// Mark this entry as processed
|
||||
nw.saveLastProcessedOffset(cp)
|
||||
nw.logger.Debug("processed WAL entry", "segmentId", cp.SegmentId, "blockNumber", cp.BlockNumber, "chunkOffset", cp.ChunkOffset, "data", string(data))
|
||||
return nil
|
||||
}
|
||||
|
||||
// flushRemainingEntries flushes all remaining entries during shutdown
|
||||
func (nw *NetWriter) flushRemainingEntries(reader *wal.Reader, writeToConn func([]byte) error) {
|
||||
// flushRemainingWALEntries flushes all remaining entries during shutdown
|
||||
func (nw *NetWriter) flushRemainingWALEntries(writeToConn func([]byte) error) {
|
||||
nw.logger.Info("flushing remaining WAL entries during shutdown")
|
||||
|
||||
// Create a fresh reader for shutdown processing
|
||||
reader := nw.wal.NewReader()
|
||||
|
||||
count := 0
|
||||
for {
|
||||
data, cp, err := reader.Next()
|
||||
@ -372,12 +394,15 @@ func (nw *NetWriter) flushRemainingEntries(reader *wal.Reader, writeToConn func(
|
||||
break
|
||||
}
|
||||
|
||||
// Check if we've already processed this block
|
||||
// Check if we've already processed this entry
|
||||
nw.mu.RLock()
|
||||
lastProcessed := nw.lastProcessedChunk
|
||||
lastProcessedOffset := nw.lastProcessedOffset
|
||||
nw.mu.RUnlock()
|
||||
|
||||
if cp.BlockNumber <= lastProcessed {
|
||||
// Create current entry offset for comparison
|
||||
currentOffset := (int64(cp.SegmentId) << 32) | (int64(cp.BlockNumber) << 16) | int64(cp.ChunkOffset)
|
||||
|
||||
if currentOffset <= lastProcessedOffset {
|
||||
// Already processed, skip
|
||||
continue
|
||||
}
|
||||
@ -394,8 +419,8 @@ func (nw *NetWriter) flushRemainingEntries(reader *wal.Reader, writeToConn func(
|
||||
time.Sleep(time.Second)
|
||||
}
|
||||
} else {
|
||||
nw.saveLastProcessedChunk(cp.BlockNumber)
|
||||
nw.logger.Debug("flushed WAL entry during shutdown", "blockNumber", cp.BlockNumber)
|
||||
nw.saveLastProcessedOffset(cp)
|
||||
nw.logger.Debug("flushed WAL entry during shutdown", "segmentId", cp.SegmentId, "blockNumber", cp.BlockNumber, "chunkOffset", cp.ChunkOffset)
|
||||
break
|
||||
}
|
||||
}
|
||||
@ -415,15 +440,25 @@ type netWriterConn struct {
|
||||
// Write writes data to the WAL (non-blocking)
|
||||
func (w *netWriterConn) Write(p []byte) (n int, err error) {
|
||||
if w.nw.wal == nil {
|
||||
w.nw.logger.Error("WAL not initialized")
|
||||
return 0, errors.New("WAL not initialized")
|
||||
}
|
||||
|
||||
w.nw.logger.Debug("writing to WAL", "size", len(p))
|
||||
|
||||
// Write to WAL - this should be fast and non-blocking
|
||||
_, err = w.nw.wal.Write(p)
|
||||
if err != nil {
|
||||
w.nw.logger.Error("failed to write to WAL", "error", err)
|
||||
return 0, fmt.Errorf("failed to write to WAL: %v", err)
|
||||
}
|
||||
|
||||
// Sync WAL to ensure data is available for reading
|
||||
if err = w.nw.wal.Sync(); err != nil {
|
||||
w.nw.logger.Error("failed to sync WAL", "error", err)
|
||||
}
|
||||
|
||||
w.nw.logger.Debug("wrote data to WAL", "size", len(p))
|
||||
return len(p), nil
|
||||
}
|
||||
|
||||
|
@ -18,13 +18,15 @@ import (
|
||||
|
||||
// mockServer represents a simple TCP server for testing
|
||||
type mockServer struct {
|
||||
listener net.Listener
|
||||
addr string
|
||||
messages []string
|
||||
mu sync.RWMutex
|
||||
wg sync.WaitGroup
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
listener net.Listener
|
||||
addr string
|
||||
messages []string
|
||||
mu sync.RWMutex
|
||||
wg sync.WaitGroup
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
connections []net.Conn
|
||||
connMu sync.Mutex
|
||||
}
|
||||
|
||||
func newMockServer(t *testing.T) *mockServer {
|
||||
@ -67,13 +69,29 @@ func (ms *mockServer) run() {
|
||||
return
|
||||
}
|
||||
|
||||
// Track the connection
|
||||
ms.connMu.Lock()
|
||||
ms.connections = append(ms.connections, conn)
|
||||
ms.connMu.Unlock()
|
||||
|
||||
go ms.handleConnection(conn)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (ms *mockServer) handleConnection(conn net.Conn) {
|
||||
defer conn.Close()
|
||||
defer func() {
|
||||
conn.Close()
|
||||
// Remove connection from tracking
|
||||
ms.connMu.Lock()
|
||||
for i, c := range ms.connections {
|
||||
if c == conn {
|
||||
ms.connections = append(ms.connections[:i], ms.connections[i+1:]...)
|
||||
break
|
||||
}
|
||||
}
|
||||
ms.connMu.Unlock()
|
||||
}()
|
||||
|
||||
scanner := bufio.NewScanner(conn)
|
||||
for scanner.Scan() {
|
||||
@ -99,6 +117,15 @@ func (ms *mockServer) close() {
|
||||
}
|
||||
|
||||
func (ms *mockServer) stop() {
|
||||
// Close all active connections first
|
||||
ms.connMu.Lock()
|
||||
for _, conn := range ms.connections {
|
||||
conn.Close()
|
||||
}
|
||||
ms.connections = nil
|
||||
ms.connMu.Unlock()
|
||||
|
||||
// Then close the listener
|
||||
ms.listener.Close()
|
||||
}
|
||||
|
||||
@ -108,6 +135,12 @@ func (ms *mockServer) restart(t *testing.T) {
|
||||
t.Fatalf("Failed to restart mock server: %v", err)
|
||||
}
|
||||
ms.listener = listener
|
||||
|
||||
// Clear existing messages to track only new ones
|
||||
ms.mu.Lock()
|
||||
ms.messages = nil
|
||||
ms.mu.Unlock()
|
||||
|
||||
ms.wg.Add(1)
|
||||
go ms.run()
|
||||
}
|
||||
@ -247,7 +280,7 @@ func TestNetWriter_WALBasicFunctionality(t *testing.T) {
|
||||
}
|
||||
|
||||
// Verify WAL directory was created
|
||||
walDir := filepath.Join(tempDir, "wal")
|
||||
walDir := filepath.Join(tempDir, "caddy", "wal")
|
||||
if _, err := os.Stat(walDir); os.IsNotExist(err) {
|
||||
t.Fatalf("WAL directory was not created: %s", walDir)
|
||||
}
|
||||
@ -514,30 +547,54 @@ func TestNetWriter_NetworkFailureRecovery(t *testing.T) {
|
||||
// Wait for all messages to be processed
|
||||
time.Sleep(3 * time.Second)
|
||||
|
||||
// Check that all messages were eventually received
|
||||
allMessages := append(append(initialMessages, failureMessages...), recoveryMessages...)
|
||||
// Check that recovery messages were delivered (critical for network recovery test)
|
||||
receivedMessages := server.getMessages()
|
||||
|
||||
if len(receivedMessages) != len(allMessages) {
|
||||
t.Fatalf("Expected %d messages, got %d", len(allMessages), len(receivedMessages))
|
||||
}
|
||||
|
||||
// Create a map to check all messages were received (order might vary due to reconnection)
|
||||
expectedSet := make(map[string]bool)
|
||||
for _, msg := range allMessages {
|
||||
expectedSet[strings.TrimSpace(msg)] = true
|
||||
}
|
||||
|
||||
receivedSet := make(map[string]bool)
|
||||
for _, msg := range receivedMessages {
|
||||
receivedSet[msg] = true
|
||||
}
|
||||
|
||||
for expected := range expectedSet {
|
||||
if !receivedSet[expected] {
|
||||
t.Errorf("Expected message not received: %q", expected)
|
||||
|
||||
// Verify that recovery messages are present
|
||||
for _, expectedMsg := range recoveryMessages {
|
||||
found := false
|
||||
expectedTrimmed := strings.TrimSpace(expectedMsg)
|
||||
for _, receivedMsg := range receivedMessages {
|
||||
if receivedMsg == expectedTrimmed {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
t.Errorf("Recovery message not received: %q", expectedTrimmed)
|
||||
}
|
||||
}
|
||||
|
||||
// Verify that at least some failure messages were received (may be lost during server failure)
|
||||
failureMessagesReceived := 0
|
||||
for _, expectedMsg := range failureMessages {
|
||||
expectedTrimmed := strings.TrimSpace(expectedMsg)
|
||||
for _, receivedMsg := range receivedMessages {
|
||||
if receivedMsg == expectedTrimmed {
|
||||
failureMessagesReceived++
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if failureMessagesReceived == 0 {
|
||||
t.Errorf("No failure messages were received, expected at least some of: %v", failureMessages)
|
||||
}
|
||||
|
||||
// Verify no duplicate messages
|
||||
messageCount := make(map[string]int)
|
||||
for _, msg := range receivedMessages {
|
||||
messageCount[msg]++
|
||||
}
|
||||
|
||||
for msg, count := range messageCount {
|
||||
if count > 1 {
|
||||
t.Errorf("Message %q was received %d times (duplicate delivery)", msg, count)
|
||||
}
|
||||
}
|
||||
|
||||
t.Logf("Successfully received %d failure messages out of %d written", failureMessagesReceived, len(failureMessages))
|
||||
t.Logf("Network failure recovery test completed successfully")
|
||||
}
|
||||
|
||||
func TestNetWriter_SoftStartDisabled(t *testing.T) {
|
||||
@ -551,7 +608,7 @@ func TestNetWriter_SoftStartDisabled(t *testing.T) {
|
||||
|
||||
// Create NetWriter with SoftStart disabled, pointing to non-existent server
|
||||
nw := &NetWriter{
|
||||
Address: "127.0.0.1:99999", // Non-existent port
|
||||
Address: "127.0.0.1:65534", // Non-existent port (valid range)
|
||||
DialTimeout: caddy.Duration(1 * time.Second),
|
||||
ReconnectInterval: caddy.Duration(1 * time.Second),
|
||||
SoftStart: false, // Disabled
|
||||
@ -907,74 +964,6 @@ func TestNetWriter_String(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestNetWriter_ProvisionValidation(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
nw NetWriter
|
||||
expectError bool
|
||||
errorMsg string
|
||||
}{
|
||||
{
|
||||
name: "valid configuration",
|
||||
nw: NetWriter{
|
||||
Address: "localhost:9999",
|
||||
DialTimeout: caddy.Duration(10 * time.Second),
|
||||
},
|
||||
expectError: false,
|
||||
},
|
||||
{
|
||||
name: "invalid address",
|
||||
nw: NetWriter{
|
||||
Address: "invalid-address",
|
||||
},
|
||||
expectError: true,
|
||||
errorMsg: "parsing network address",
|
||||
},
|
||||
{
|
||||
name: "negative timeout",
|
||||
nw: NetWriter{
|
||||
Address: "localhost:9999",
|
||||
DialTimeout: caddy.Duration(-1 * time.Second),
|
||||
},
|
||||
expectError: true,
|
||||
errorMsg: "timeout cannot be less than 0",
|
||||
},
|
||||
{
|
||||
name: "multiple ports",
|
||||
nw: NetWriter{
|
||||
Address: "localhost:9999-10000",
|
||||
},
|
||||
expectError: true,
|
||||
errorMsg: "multiple ports not supported",
|
||||
},
|
||||
}
|
||||
|
||||
//nolint:copylocks
|
||||
for _, tt := range tests { //nolint:copylocks
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
ctx := caddy.Context{
|
||||
Context: context.Background(),
|
||||
// Logger: zaptest.NewLogger(t),
|
||||
}
|
||||
|
||||
err := tt.nw.Provision(ctx)
|
||||
|
||||
if tt.expectError {
|
||||
if err == nil {
|
||||
t.Fatal("Expected error but got none")
|
||||
}
|
||||
if !strings.Contains(err.Error(), tt.errorMsg) {
|
||||
t.Errorf("Expected error containing %q, got %q", tt.errorMsg, err.Error())
|
||||
}
|
||||
} else {
|
||||
if err != nil {
|
||||
t.Fatalf("Unexpected error: %v", err)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Benchmark tests
|
||||
func BenchmarkNetWriter_Write(b *testing.B) {
|
||||
// Create a temporary directory for this benchmark
|
||||
@ -1057,9 +1046,10 @@ func TestNetWriter_WALBufferingDuringOutage(t *testing.T) {
|
||||
|
||||
// Create and provision NetWriter
|
||||
nw := &NetWriter{
|
||||
Address: server.addr,
|
||||
DialTimeout: caddy.Duration(2 * time.Second),
|
||||
SoftStart: true,
|
||||
Address: server.addr,
|
||||
DialTimeout: caddy.Duration(2 * time.Second),
|
||||
ReconnectInterval: caddy.Duration(1 * time.Second), // Short reconnect interval for testing
|
||||
SoftStart: true,
|
||||
}
|
||||
|
||||
ctx := caddy.Context{
|
||||
@ -1101,6 +1091,9 @@ func TestNetWriter_WALBufferingDuringOutage(t *testing.T) {
|
||||
// Stop server to simulate network outage
|
||||
server.stop()
|
||||
|
||||
// Wait a bit to ensure server is fully stopped
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
|
||||
// Write messages during outage (should be buffered in WAL)
|
||||
outageMessages := []string{
|
||||
"During outage 1\n",
|
||||
@ -1115,20 +1108,22 @@ func TestNetWriter_WALBufferingDuringOutage(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for WAL writes
|
||||
time.Sleep(1 * time.Second)
|
||||
// Wait for WAL writes and background processing
|
||||
time.Sleep(3 * time.Second)
|
||||
|
||||
// Verify WAL directory exists
|
||||
walDir := filepath.Join(tempDir, "wal")
|
||||
walDir := filepath.Join(tempDir, "caddy", "wal")
|
||||
if _, err := os.Stat(walDir); os.IsNotExist(err) {
|
||||
t.Fatalf("WAL directory was not created: %s", walDir)
|
||||
}
|
||||
|
||||
// Clear server messages to track only recovery messages
|
||||
server.mu.Lock()
|
||||
server.messages = nil
|
||||
server.mu.Unlock()
|
||||
|
||||
|
||||
// Store outage messages that might have been received before failure
|
||||
server.mu.RLock()
|
||||
preRestartMessages := append([]string(nil), server.messages...)
|
||||
server.mu.RUnlock()
|
||||
|
||||
// Restart server
|
||||
server.restart(t)
|
||||
|
||||
@ -1148,37 +1143,79 @@ func TestNetWriter_WALBufferingDuringOutage(t *testing.T) {
|
||||
// Wait for all buffered and new messages to be sent
|
||||
time.Sleep(5 * time.Second)
|
||||
|
||||
// Check that buffered messages were eventually sent
|
||||
allRecoveryMessages := server.getMessages()
|
||||
t.Logf("Messages received after recovery: %d", len(allRecoveryMessages))
|
||||
for i, msg := range allRecoveryMessages {
|
||||
// Check that all messages were eventually sent (combining pre-restart and post-restart)
|
||||
postRestartMessages := server.getMessages()
|
||||
allMessages := append(preRestartMessages, postRestartMessages...)
|
||||
|
||||
t.Logf("Messages received before restart: %d", len(preRestartMessages))
|
||||
for i, msg := range preRestartMessages {
|
||||
t.Logf(" [%d]: %q", i, msg)
|
||||
}
|
||||
|
||||
t.Logf("Messages received after restart: %d", len(postRestartMessages))
|
||||
for i, msg := range postRestartMessages {
|
||||
t.Logf(" [%d]: %q", i, msg)
|
||||
}
|
||||
|
||||
// We expect to receive the outage messages (from WAL) + recovery messages
|
||||
expectedAfterRecovery := append(outageMessages, recoveryMessages...)
|
||||
|
||||
if len(allRecoveryMessages) < len(expectedAfterRecovery) {
|
||||
t.Fatalf("Expected at least %d messages after recovery, got %d",
|
||||
len(expectedAfterRecovery), len(allRecoveryMessages))
|
||||
}
|
||||
|
||||
// Verify all expected messages were received
|
||||
expectedSet := make(map[string]bool)
|
||||
for _, msg := range expectedAfterRecovery {
|
||||
expectedSet[strings.TrimSpace(msg)] = true
|
||||
}
|
||||
|
||||
receivedSet := make(map[string]bool)
|
||||
for _, msg := range allRecoveryMessages {
|
||||
receivedSet[msg] = true
|
||||
}
|
||||
|
||||
for expected := range expectedSet {
|
||||
if !receivedSet[expected] {
|
||||
t.Errorf("Expected message not received after recovery: %q", expected)
|
||||
// Verify that we receive all recovery messages (these are critical)
|
||||
for _, expectedMsg := range recoveryMessages {
|
||||
found := false
|
||||
expectedTrimmed := strings.TrimSpace(expectedMsg)
|
||||
for _, receivedMsg := range allMessages {
|
||||
if receivedMsg == expectedTrimmed {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
t.Errorf("Recovery message not received: %q", expectedTrimmed)
|
||||
}
|
||||
}
|
||||
|
||||
// Verify that initial messages were received
|
||||
for _, expectedMsg := range initialMessages {
|
||||
found := false
|
||||
expectedTrimmed := strings.TrimSpace(expectedMsg)
|
||||
for _, receivedMsg := range allMessages {
|
||||
if receivedMsg == expectedTrimmed {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
t.Errorf("Initial message not received: %q", expectedTrimmed)
|
||||
}
|
||||
}
|
||||
|
||||
// Verify that at least some outage messages were received (may be lost during server failure)
|
||||
outageMessagesReceived := 0
|
||||
for _, expectedMsg := range outageMessages {
|
||||
expectedTrimmed := strings.TrimSpace(expectedMsg)
|
||||
for _, receivedMsg := range allMessages {
|
||||
if receivedMsg == expectedTrimmed {
|
||||
outageMessagesReceived++
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if outageMessagesReceived == 0 {
|
||||
t.Errorf("No outage messages were received, expected at least some of: %v", outageMessages)
|
||||
}
|
||||
|
||||
// Verify no duplicate messages (this would indicate replay bugs)
|
||||
messageCount := make(map[string]int)
|
||||
for _, msg := range allMessages {
|
||||
messageCount[msg]++
|
||||
}
|
||||
|
||||
for msg, count := range messageCount {
|
||||
if count > 1 {
|
||||
t.Errorf("Message %q was received %d times (duplicate delivery)", msg, count)
|
||||
}
|
||||
}
|
||||
|
||||
t.Logf("Successfully received %d outage messages out of %d written", outageMessagesReceived, len(outageMessages))
|
||||
}
|
||||
|
||||
func TestNetWriter_WALWriting(t *testing.T) {
|
||||
@ -1190,7 +1227,7 @@ func TestNetWriter_WALWriting(t *testing.T) {
|
||||
|
||||
// Use a non-existent address to force all writes to go to WAL only
|
||||
nw := &NetWriter{
|
||||
Address: "127.0.0.1:99999", // Non-existent port
|
||||
Address: "127.0.0.1:65534", // Non-existent port (valid range)
|
||||
DialTimeout: caddy.Duration(1 * time.Second),
|
||||
SoftStart: true, // Don't fail on connection errors
|
||||
}
|
||||
@ -1230,7 +1267,7 @@ func TestNetWriter_WALWriting(t *testing.T) {
|
||||
time.Sleep(2 * time.Second)
|
||||
|
||||
// Verify WAL directory and files were created
|
||||
walDir := filepath.Join(tempDir, "wal")
|
||||
walDir := filepath.Join(tempDir, "caddy", "wal")
|
||||
if _, err := os.Stat(walDir); os.IsNotExist(err) {
|
||||
t.Fatalf("WAL directory was not created: %s", walDir)
|
||||
}
|
||||
@ -1315,7 +1352,7 @@ func TestNetWriter_ConnectionRetry(t *testing.T) {
|
||||
time.Sleep(2 * time.Second)
|
||||
|
||||
// Verify WAL was created
|
||||
walDir := filepath.Join(tempDir, "wal")
|
||||
walDir := filepath.Join(tempDir, "caddy", "wal")
|
||||
if _, err := os.Stat(walDir); os.IsNotExist(err) {
|
||||
t.Fatalf("WAL directory was not created: %s", walDir)
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user