package config import ( "context" "errors" "os" "path/filepath" "sync" "testing" "time" "github.com/spf13/viper" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) // loadFromFile is a helper that mimics LoadConfig() for a specific file path // without going through the env-prefix and singleton machinery — keeps the // test hermetic. func loadFromFile(t *testing.T, path string) *Config { t.Helper() v := viper.New() v.SetConfigFile(path) v.SetConfigType("yaml") v.SetDefault("logging.level", "info") v.SetDefault("auth.jwt.ttl", time.Hour) require.NoError(t, v.ReadInConfig()) c := &Config{viper: v} require.NoError(t, v.Unmarshal(c)) return c } // TestWatchAndApply_LoggingLevel proves the hot-reload pipe end-to-end: // write a new logging.level to the watched file, the OnConfigChange handler // re-unmarshals, and the in-memory Config reflects the new value. func TestWatchAndApply_LoggingLevel(t *testing.T) { dir := t.TempDir() path := filepath.Join(dir, "config.yaml") require.NoError(t, os.WriteFile(path, []byte("logging:\n level: info\n"), 0644)) c := loadFromFile(t, path) assert.Equal(t, "info", c.GetLogLevel()) ctx, cancel := context.WithCancel(context.Background()) defer cancel() c.WatchAndApply(ctx) // Mutate the file. fsnotify needs a real write event; rewrite atomically. require.NoError(t, os.WriteFile(path, []byte("logging:\n level: debug\n"), 0644)) // Poll for up to 2s waiting for the in-memory level to flip. deadline := time.Now().Add(2 * time.Second) for time.Now().Before(deadline) { c.reloadMu.RLock() level := c.GetLogLevel() c.reloadMu.RUnlock() if level == "debug" { return } time.Sleep(20 * time.Millisecond) } c.reloadMu.RLock() defer c.reloadMu.RUnlock() t.Fatalf("logging level did not hot-reload to debug: still %q", c.GetLogLevel()) } // TestWatchAndApply_NoFileNoOp confirms the watcher is a safe no-op when no // config file is in use (env-only / defaults) — important so production // containers without a mounted config.yaml don't crash. func TestWatchAndApply_NoFileNoOp(t *testing.T) { c := &Config{viper: viper.New()} ctx, cancel := context.WithCancel(context.Background()) defer cancel() c.WatchAndApply(ctx) // should return without panicking } // TestWatchAndApply_NilViperNoOp confirms the watcher tolerates a Config // constructed without the viper field (e.g. tests that build a Config{} // manually — same defensive code path as production but exercised explicitly). func TestWatchAndApply_NilViperNoOp(t *testing.T) { c := &Config{} ctx, cancel := context.WithCancel(context.Background()) defer cancel() c.WatchAndApply(ctx) } // TestWatchAndApply_JWTTTL proves Phase 2 of ADR-0023: the JWT TTL is // re-read on every token generation via the GetJWTTTL method value, so // after a config-file change the new TTL takes effect without restart. func TestWatchAndApply_JWTTTL(t *testing.T) { dir := t.TempDir() path := filepath.Join(dir, "config.yaml") require.NoError(t, os.WriteFile(path, []byte("auth:\n jwt:\n ttl: 1h\n"), 0644)) c := loadFromFile(t, path) assert.Equal(t, time.Hour, c.GetJWTTTL()) ctx, cancel := context.WithCancel(context.Background()) defer cancel() c.WatchAndApply(ctx) require.NoError(t, os.WriteFile(path, []byte("auth:\n jwt:\n ttl: 30m\n"), 0644)) deadline := time.Now().Add(2 * time.Second) for time.Now().Before(deadline) { c.reloadMu.RLock() ttl := c.GetJWTTTL() c.reloadMu.RUnlock() if ttl == 30*time.Minute { return } time.Sleep(20 * time.Millisecond) } c.reloadMu.RLock() defer c.reloadMu.RUnlock() t.Fatalf("auth.jwt.ttl did not hot-reload to 30m: still %s", c.GetJWTTTL()) } // TestWatchAndApply_TelemetrySamplerType proves Phase 3 of ADR-0023: // when telemetry.sampler.type changes, the callback registered via // SetSamplerReconfigureCallback is invoked exactly once with the new value. func TestWatchAndApply_TelemetrySamplerType(t *testing.T) { dir := t.TempDir() path := filepath.Join(dir, "config.yaml") initial := []byte(`telemetry: sampler: type: parentbased_always_on ratio: 1.0 `) changed := []byte(`telemetry: sampler: type: traceidratio ratio: 1.0 `) require.NoError(t, os.WriteFile(path, initial, 0644)) c := loadFromFile(t, path) assert.Equal(t, "parentbased_always_on", c.GetSamplerType()) // Setup callback tracker var mu sync.Mutex callbackCalled := false var recordedType string var recordedRatio float64 c.SetSamplerReconfigureCallback(func(ctx context.Context, samplerType string, samplerRatio float64) error { mu.Lock() defer mu.Unlock() callbackCalled = true recordedType = samplerType recordedRatio = samplerRatio return nil }) ctx, cancel := context.WithCancel(context.Background()) defer cancel() c.WatchAndApply(ctx) // Mutate the file require.NoError(t, os.WriteFile(path, changed, 0644)) // Poll for up to 2s waiting for callback deadline := time.Now().Add(2 * time.Second) for time.Now().Before(deadline) { mu.Lock() if callbackCalled { mu.Unlock() assert.Equal(t, "traceidratio", recordedType) assert.Equal(t, 1.0, recordedRatio) return } mu.Unlock() time.Sleep(20 * time.Millisecond) } mu.Lock() defer mu.Unlock() t.Fatalf("sampler reconfigure callback was not invoked: callbackCalled=%v", callbackCalled) } // TestWatchAndApply_TelemetrySamplerRatio proves Phase 3 of ADR-0023: // when telemetry.sampler.ratio changes, the callback registered via // SetSamplerReconfigureCallback is invoked exactly once with the new value. func TestWatchAndApply_TelemetrySamplerRatio(t *testing.T) { dir := t.TempDir() path := filepath.Join(dir, "config.yaml") initial := []byte(`telemetry: sampler: type: parentbased_always_on ratio: 1.0 `) changed := []byte(`telemetry: sampler: type: parentbased_always_on ratio: 0.5 `) require.NoError(t, os.WriteFile(path, initial, 0644)) c := loadFromFile(t, path) assert.Equal(t, 1.0, c.GetSamplerRatio()) // Setup callback tracker var mu sync.Mutex callbackCalled := false var recordedType string var recordedRatio float64 c.SetSamplerReconfigureCallback(func(ctx context.Context, samplerType string, samplerRatio float64) error { mu.Lock() defer mu.Unlock() callbackCalled = true recordedType = samplerType recordedRatio = samplerRatio return nil }) ctx, cancel := context.WithCancel(context.Background()) defer cancel() c.WatchAndApply(ctx) // Mutate the file require.NoError(t, os.WriteFile(path, changed, 0644)) // Poll for up to 2s waiting for callback deadline := time.Now().Add(2 * time.Second) for time.Now().Before(deadline) { mu.Lock() if callbackCalled { mu.Unlock() assert.Equal(t, "parentbased_always_on", recordedType) assert.Equal(t, 0.5, recordedRatio) return } mu.Unlock() time.Sleep(20 * time.Millisecond) } mu.Lock() defer mu.Unlock() t.Fatalf("sampler reconfigure callback was not invoked: callbackCalled=%v", callbackCalled) } // TestWatchAndApply_SamplerCallbackNotCalledWhenNoChange proves that // the sampler callback is NOT invoked when the config file changes but // sampler type and ratio remain the same. func TestWatchAndApply_SamplerCallbackNotCalledWhenNoChange(t *testing.T) { dir := t.TempDir() path := filepath.Join(dir, "config.yaml") initial := []byte(`telemetry: sampler: type: parentbased_always_on ratio: 1.0 logging: level: info `) changed := []byte(`telemetry: sampler: type: parentbased_always_on ratio: 1.0 logging: level: debug `) require.NoError(t, os.WriteFile(path, initial, 0644)) c := loadFromFile(t, path) // Setup callback tracker var mu sync.Mutex callbackCalled := false c.SetSamplerReconfigureCallback(func(ctx context.Context, samplerType string, samplerRatio float64) error { mu.Lock() defer mu.Unlock() callbackCalled = true return nil }) ctx, cancel := context.WithCancel(context.Background()) defer cancel() c.WatchAndApply(ctx) // Mutate the file (logging level changes, but sampler stays the same) require.NoError(t, os.WriteFile(path, changed, 0644)) // Poll for up to 2s - callback should NOT be called deadline := time.Now().Add(2 * time.Second) for time.Now().Before(deadline) { mu.Lock() wasCalled := callbackCalled mu.Unlock() if wasCalled { t.Fatalf("sampler reconfigure callback was invoked but sampler did not change") } time.Sleep(20 * time.Millisecond) } } // TestWatchAndApply_SamplerCallbackErrorHandling proves that when the // sampler reconfigure callback returns an error, the previous sampler values // are NOT updated, allowing retry on next config change. func TestWatchAndApply_SamplerCallbackErrorHandling(t *testing.T) { dir := t.TempDir() path := filepath.Join(dir, "config.yaml") initial := []byte(`telemetry: sampler: type: parentbased_always_on ratio: 1.0 `) changed := []byte(`telemetry: sampler: type: traceidratio ratio: 0.5 `) require.NoError(t, os.WriteFile(path, initial, 0644)) c := loadFromFile(t, path) // Setup callback that returns an error expectedErr := errors.New("reconfigure failed") var mu sync.Mutex callbackCalled := false c.SetSamplerReconfigureCallback(func(ctx context.Context, samplerType string, samplerRatio float64) error { mu.Lock() defer mu.Unlock() callbackCalled = true return expectedErr }) ctx, cancel := context.WithCancel(context.Background()) defer cancel() c.WatchAndApply(ctx) // Mutate the file require.NoError(t, os.WriteFile(path, changed, 0644)) // Poll for up to 2s waiting for callback error deadline := time.Now().Add(2 * time.Second) for time.Now().Before(deadline) { mu.Lock() if callbackCalled { mu.Unlock() // Verify previous values were NOT updated (so retry can work) c.reloadMu.RLock() assert.Equal(t, "parentbased_always_on", c.prevSamplerType) assert.Equal(t, 1.0, c.prevSamplerRatio) c.reloadMu.RUnlock() return } mu.Unlock() time.Sleep(20 * time.Millisecond) } mu.Lock() defer mu.Unlock() t.Fatalf("sampler reconfigure callback was not invoked: callbackCalled=%v", callbackCalled) }