proxy: replace fsnotify with stat-poll watcher and add SIGHUP reload (#685)
The fsnotify-based config watcher does not work reliably when the config file is bind-mounted into a Docker container as an individual file, and mishandles k8s ConfigMap projections (atomically swapped symlinks). Replace it with a small os.Stat-polling watcher and add SIGHUP as an explicit reload signal. - new proxy/configwatcher package: 2s os.Stat poller, follows symlinks, fires on mtime/size change and on missing -> present transitions - SIGHUP triggers reload unconditionally (works without --watch-config) via the same ConfigFileChangedEvent pipeline so the UI sees identical state transitions - watcher goroutine now exits cleanly on shutdown via a context - drop github.com/fsnotify/fsnotify dependency fixes #682
This commit is contained in:
@@ -0,0 +1,85 @@
|
||||
// Package configwatcher provides a simple cross-platform file watcher based
|
||||
// on os.Stat polling. It works correctly inside Docker containers where the
|
||||
// config file is bind-mounted as an individual file, and for k8s ConfigMap
|
||||
// projections (which present the file as a symlink to an atomically swapped
|
||||
// target) — both cases where inotify-based watchers are unreliable.
|
||||
package configwatcher
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"io/fs"
|
||||
"log"
|
||||
"os"
|
||||
"time"
|
||||
)
|
||||
|
||||
const DefaultInterval = 2 * time.Second
|
||||
|
||||
type Watcher struct {
|
||||
Path string
|
||||
Interval time.Duration
|
||||
OnChange func()
|
||||
}
|
||||
|
||||
type snapshot struct {
|
||||
exists bool
|
||||
modTime time.Time
|
||||
size int64
|
||||
}
|
||||
|
||||
// Run blocks until ctx is canceled. It polls Path on Interval and invokes
|
||||
// OnChange whenever the file's modification time or size changes, or when
|
||||
// the file reappears after being missing. The baseline poll establishes
|
||||
// initial state and does not fire OnChange.
|
||||
func (w *Watcher) Run(ctx context.Context) {
|
||||
interval := w.Interval
|
||||
if interval <= 0 {
|
||||
interval = DefaultInterval
|
||||
}
|
||||
|
||||
prev := stat(w.Path)
|
||||
|
||||
ticker := time.NewTicker(interval)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-ticker.C:
|
||||
cur := stat(w.Path)
|
||||
if changed(prev, cur) && w.OnChange != nil {
|
||||
w.OnChange()
|
||||
}
|
||||
prev = cur
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func stat(path string) snapshot {
|
||||
fi, err := os.Stat(path)
|
||||
if err != nil {
|
||||
if !errors.Is(err, fs.ErrNotExist) {
|
||||
log.Printf("configwatcher: stat %s: %v", path, err)
|
||||
}
|
||||
return snapshot{}
|
||||
}
|
||||
return snapshot{
|
||||
exists: true,
|
||||
modTime: fi.ModTime(),
|
||||
size: fi.Size(),
|
||||
}
|
||||
}
|
||||
|
||||
func changed(prev, cur snapshot) bool {
|
||||
// Present → missing: stay quiet (likely a transient rename-style write).
|
||||
// Missing → present: fire so we reload as soon as the file comes back.
|
||||
if !cur.exists {
|
||||
return false
|
||||
}
|
||||
if !prev.exists {
|
||||
return true
|
||||
}
|
||||
return !prev.modTime.Equal(cur.modTime) || prev.size != cur.size
|
||||
}
|
||||
@@ -0,0 +1,191 @@
|
||||
package configwatcher
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
const testInterval = 25 * time.Millisecond
|
||||
|
||||
// startWatcher launches w.Run in a goroutine and returns a function that
|
||||
// cancels the context and waits for Run to return.
|
||||
func startWatcher(t *testing.T, w *Watcher) func() {
|
||||
t.Helper()
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
w.Run(ctx)
|
||||
close(done)
|
||||
}()
|
||||
return func() {
|
||||
cancel()
|
||||
select {
|
||||
case <-done:
|
||||
case <-time.After(2 * time.Second):
|
||||
t.Fatal("watcher did not stop within 2s of cancel")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// waitForCount blocks until counter reaches want or timeout elapses.
|
||||
func waitForCount(t *testing.T, counter *int64, want int64, timeout time.Duration) bool {
|
||||
t.Helper()
|
||||
deadline := time.Now().Add(timeout)
|
||||
for time.Now().Before(deadline) {
|
||||
if atomic.LoadInt64(counter) >= want {
|
||||
return true
|
||||
}
|
||||
time.Sleep(5 * time.Millisecond)
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func TestWatcher_NoFireOnBaseline(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "config.yaml")
|
||||
require.NoError(t, os.WriteFile(path, []byte("a"), 0o644))
|
||||
|
||||
var n int64
|
||||
stop := startWatcher(t, &Watcher{
|
||||
Path: path,
|
||||
Interval: testInterval,
|
||||
OnChange: func() { atomic.AddInt64(&n, 1) },
|
||||
})
|
||||
defer stop()
|
||||
|
||||
time.Sleep(testInterval * 5)
|
||||
require.Equal(t, int64(0), atomic.LoadInt64(&n), "baseline poll must not fire")
|
||||
}
|
||||
|
||||
func TestWatcher_DetectsModTimeChange(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "config.yaml")
|
||||
require.NoError(t, os.WriteFile(path, []byte("a"), 0o644))
|
||||
|
||||
// Force a known baseline mtime.
|
||||
base := time.Now().Add(-1 * time.Hour).Truncate(time.Second)
|
||||
require.NoError(t, os.Chtimes(path, base, base))
|
||||
|
||||
var n int64
|
||||
stop := startWatcher(t, &Watcher{
|
||||
Path: path,
|
||||
Interval: testInterval,
|
||||
OnChange: func() { atomic.AddInt64(&n, 1) },
|
||||
})
|
||||
defer stop()
|
||||
|
||||
// Let the baseline settle.
|
||||
time.Sleep(testInterval * 2)
|
||||
|
||||
// Bump mtime well above the baseline so low-resolution filesystems still notice.
|
||||
require.NoError(t, os.Chtimes(path, base.Add(10*time.Second), base.Add(10*time.Second)))
|
||||
|
||||
require.True(t, waitForCount(t, &n, 1, time.Second), "callback should fire after mtime change")
|
||||
}
|
||||
|
||||
func TestWatcher_DetectsSizeChangeWithSameModTime(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "config.yaml")
|
||||
require.NoError(t, os.WriteFile(path, []byte("a"), 0o644))
|
||||
|
||||
fi, err := os.Stat(path)
|
||||
require.NoError(t, err)
|
||||
originalMtime := fi.ModTime()
|
||||
|
||||
var n int64
|
||||
stop := startWatcher(t, &Watcher{
|
||||
Path: path,
|
||||
Interval: testInterval,
|
||||
OnChange: func() { atomic.AddInt64(&n, 1) },
|
||||
})
|
||||
defer stop()
|
||||
time.Sleep(testInterval * 2)
|
||||
|
||||
require.NoError(t, os.WriteFile(path, []byte("aaaaa"), 0o644))
|
||||
// Reset mtime back to the original so size is the only signal.
|
||||
require.NoError(t, os.Chtimes(path, originalMtime, originalMtime))
|
||||
|
||||
require.True(t, waitForCount(t, &n, 1, time.Second), "callback should fire on size change")
|
||||
}
|
||||
|
||||
func TestWatcher_SymlinkTargetSwap(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
targetA := filepath.Join(dir, "targetA")
|
||||
targetB := filepath.Join(dir, "targetB")
|
||||
link := filepath.Join(dir, "config.yaml")
|
||||
|
||||
require.NoError(t, os.WriteFile(targetA, []byte("AAAA"), 0o644))
|
||||
require.NoError(t, os.WriteFile(targetB, []byte("BBBBBBBB"), 0o644))
|
||||
|
||||
if err := os.Symlink(targetA, link); err != nil {
|
||||
if runtime.GOOS == "windows" {
|
||||
t.Skipf("symlink creation requires privilege on Windows: %v", err)
|
||||
}
|
||||
t.Fatalf("os.Symlink: %v", err)
|
||||
}
|
||||
|
||||
var n int64
|
||||
stop := startWatcher(t, &Watcher{
|
||||
Path: link,
|
||||
Interval: testInterval,
|
||||
OnChange: func() { atomic.AddInt64(&n, 1) },
|
||||
})
|
||||
defer stop()
|
||||
time.Sleep(testInterval * 2)
|
||||
|
||||
// Atomic symlink swap (k8s ConfigMap pattern): create new symlink at a
|
||||
// temp name, then rename over the existing one.
|
||||
tmpLink := filepath.Join(dir, "config.yaml.tmp")
|
||||
require.NoError(t, os.Symlink(targetB, tmpLink))
|
||||
require.NoError(t, os.Rename(tmpLink, link))
|
||||
|
||||
require.True(t, waitForCount(t, &n, 1, time.Second), "callback should fire after symlink target swap")
|
||||
}
|
||||
|
||||
func TestWatcher_FileMissingThenReturns(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "config.yaml")
|
||||
require.NoError(t, os.WriteFile(path, []byte("a"), 0o644))
|
||||
|
||||
var n int64
|
||||
stop := startWatcher(t, &Watcher{
|
||||
Path: path,
|
||||
Interval: testInterval,
|
||||
OnChange: func() { atomic.AddInt64(&n, 1) },
|
||||
})
|
||||
defer stop()
|
||||
time.Sleep(testInterval * 2)
|
||||
|
||||
require.NoError(t, os.Remove(path))
|
||||
time.Sleep(testInterval * 3)
|
||||
require.Equal(t, int64(0), atomic.LoadInt64(&n), "removal alone must not fire")
|
||||
|
||||
require.NoError(t, os.WriteFile(path, []byte("b"), 0o644))
|
||||
require.True(t, waitForCount(t, &n, 1, time.Second), "callback should fire when file returns")
|
||||
}
|
||||
|
||||
func TestWatcher_ContextCancelStopsRun(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "config.yaml")
|
||||
require.NoError(t, os.WriteFile(path, []byte("a"), 0o644))
|
||||
|
||||
w := &Watcher{Path: path, Interval: testInterval}
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
done := make(chan struct{})
|
||||
go func() { w.Run(ctx); close(done) }()
|
||||
|
||||
time.Sleep(testInterval * 2)
|
||||
cancel()
|
||||
select {
|
||||
case <-done:
|
||||
case <-time.After(2 * time.Second):
|
||||
t.Fatal("Run did not return within 2s of cancel")
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user