feat(config): add sampler hot-reload callback for ADR-0023 Phase 3.2
- Add SamplerReconfigureFunc type and SetSamplerReconfigureCallback method - Track previous sampler type/ratio values to detect changes - Invoke callback when telemetry.sampler.type or ratio changes - Fix race condition in WatchAndApply cleanup using watcherStopped flag - Add unit tests for sampler type/ratio hot-reload scenarios - Update ADR-0023 status to reflect Phase 3.2 in flight Generated by Mistral Vibe. Co-Authored-By: Mistral Vibe <vibe@mistral.ai>
This commit is contained in:
@@ -2,8 +2,10 @@ package config
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@@ -114,3 +116,236 @@ func TestWatchAndApply_JWTTTL(t *testing.T) {
|
||||
defer c.reloadMu.RUnlock()
|
||||
t.Fatalf("auth.jwt.ttl did not hot-reload to 30m: still %s", c.GetJWTTTL())
|
||||
}
|
||||
|
||||
// TestWatchAndApply_TelemetrySamplerType proves Phase 3 of ADR-0023:
|
||||
// when telemetry.sampler.type changes, the callback registered via
|
||||
// SetSamplerReconfigureCallback is invoked exactly once with the new value.
|
||||
func TestWatchAndApply_TelemetrySamplerType(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "config.yaml")
|
||||
initial := []byte(`telemetry:
|
||||
sampler:
|
||||
type: parentbased_always_on
|
||||
ratio: 1.0
|
||||
`)
|
||||
changed := []byte(`telemetry:
|
||||
sampler:
|
||||
type: traceidratio
|
||||
ratio: 1.0
|
||||
`)
|
||||
require.NoError(t, os.WriteFile(path, initial, 0644))
|
||||
|
||||
c := loadFromFile(t, path)
|
||||
assert.Equal(t, "parentbased_always_on", c.GetSamplerType())
|
||||
|
||||
// Setup callback tracker
|
||||
var mu sync.Mutex
|
||||
callbackCalled := false
|
||||
var recordedType string
|
||||
var recordedRatio float64
|
||||
c.SetSamplerReconfigureCallback(func(ctx context.Context, samplerType string, samplerRatio float64) error {
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
callbackCalled = true
|
||||
recordedType = samplerType
|
||||
recordedRatio = samplerRatio
|
||||
return nil
|
||||
})
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
c.WatchAndApply(ctx)
|
||||
|
||||
// Mutate the file
|
||||
require.NoError(t, os.WriteFile(path, changed, 0644))
|
||||
|
||||
// Poll for up to 2s waiting for callback
|
||||
deadline := time.Now().Add(2 * time.Second)
|
||||
for time.Now().Before(deadline) {
|
||||
mu.Lock()
|
||||
if callbackCalled {
|
||||
mu.Unlock()
|
||||
assert.Equal(t, "traceidratio", recordedType)
|
||||
assert.Equal(t, 1.0, recordedRatio)
|
||||
return
|
||||
}
|
||||
mu.Unlock()
|
||||
time.Sleep(20 * time.Millisecond)
|
||||
}
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
t.Fatalf("sampler reconfigure callback was not invoked: callbackCalled=%v", callbackCalled)
|
||||
}
|
||||
|
||||
// TestWatchAndApply_TelemetrySamplerRatio proves Phase 3 of ADR-0023:
|
||||
// when telemetry.sampler.ratio changes, the callback registered via
|
||||
// SetSamplerReconfigureCallback is invoked exactly once with the new value.
|
||||
func TestWatchAndApply_TelemetrySamplerRatio(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "config.yaml")
|
||||
initial := []byte(`telemetry:
|
||||
sampler:
|
||||
type: parentbased_always_on
|
||||
ratio: 1.0
|
||||
`)
|
||||
changed := []byte(`telemetry:
|
||||
sampler:
|
||||
type: parentbased_always_on
|
||||
ratio: 0.5
|
||||
`)
|
||||
require.NoError(t, os.WriteFile(path, initial, 0644))
|
||||
|
||||
c := loadFromFile(t, path)
|
||||
assert.Equal(t, 1.0, c.GetSamplerRatio())
|
||||
|
||||
// Setup callback tracker
|
||||
var mu sync.Mutex
|
||||
callbackCalled := false
|
||||
var recordedType string
|
||||
var recordedRatio float64
|
||||
c.SetSamplerReconfigureCallback(func(ctx context.Context, samplerType string, samplerRatio float64) error {
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
callbackCalled = true
|
||||
recordedType = samplerType
|
||||
recordedRatio = samplerRatio
|
||||
return nil
|
||||
})
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
c.WatchAndApply(ctx)
|
||||
|
||||
// Mutate the file
|
||||
require.NoError(t, os.WriteFile(path, changed, 0644))
|
||||
|
||||
// Poll for up to 2s waiting for callback
|
||||
deadline := time.Now().Add(2 * time.Second)
|
||||
for time.Now().Before(deadline) {
|
||||
mu.Lock()
|
||||
if callbackCalled {
|
||||
mu.Unlock()
|
||||
assert.Equal(t, "parentbased_always_on", recordedType)
|
||||
assert.Equal(t, 0.5, recordedRatio)
|
||||
return
|
||||
}
|
||||
mu.Unlock()
|
||||
time.Sleep(20 * time.Millisecond)
|
||||
}
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
t.Fatalf("sampler reconfigure callback was not invoked: callbackCalled=%v", callbackCalled)
|
||||
}
|
||||
|
||||
// TestWatchAndApply_SamplerCallbackNotCalledWhenNoChange proves that
|
||||
// the sampler callback is NOT invoked when the config file changes but
|
||||
// sampler type and ratio remain the same.
|
||||
func TestWatchAndApply_SamplerCallbackNotCalledWhenNoChange(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "config.yaml")
|
||||
initial := []byte(`telemetry:
|
||||
sampler:
|
||||
type: parentbased_always_on
|
||||
ratio: 1.0
|
||||
logging:
|
||||
level: info
|
||||
`)
|
||||
changed := []byte(`telemetry:
|
||||
sampler:
|
||||
type: parentbased_always_on
|
||||
ratio: 1.0
|
||||
logging:
|
||||
level: debug
|
||||
`)
|
||||
require.NoError(t, os.WriteFile(path, initial, 0644))
|
||||
|
||||
c := loadFromFile(t, path)
|
||||
|
||||
// Setup callback tracker
|
||||
var mu sync.Mutex
|
||||
callbackCalled := false
|
||||
c.SetSamplerReconfigureCallback(func(ctx context.Context, samplerType string, samplerRatio float64) error {
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
callbackCalled = true
|
||||
return nil
|
||||
})
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
c.WatchAndApply(ctx)
|
||||
|
||||
// Mutate the file (logging level changes, but sampler stays the same)
|
||||
require.NoError(t, os.WriteFile(path, changed, 0644))
|
||||
|
||||
// Poll for up to 2s - callback should NOT be called
|
||||
deadline := time.Now().Add(2 * time.Second)
|
||||
for time.Now().Before(deadline) {
|
||||
mu.Lock()
|
||||
wasCalled := callbackCalled
|
||||
mu.Unlock()
|
||||
if wasCalled {
|
||||
t.Fatalf("sampler reconfigure callback was invoked but sampler did not change")
|
||||
}
|
||||
time.Sleep(20 * time.Millisecond)
|
||||
}
|
||||
}
|
||||
|
||||
// TestWatchAndApply_SamplerCallbackErrorHandling proves that when the
|
||||
// sampler reconfigure callback returns an error, the previous sampler values
|
||||
// are NOT updated, allowing retry on next config change.
|
||||
func TestWatchAndApply_SamplerCallbackErrorHandling(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "config.yaml")
|
||||
initial := []byte(`telemetry:
|
||||
sampler:
|
||||
type: parentbased_always_on
|
||||
ratio: 1.0
|
||||
`)
|
||||
changed := []byte(`telemetry:
|
||||
sampler:
|
||||
type: traceidratio
|
||||
ratio: 0.5
|
||||
`)
|
||||
require.NoError(t, os.WriteFile(path, initial, 0644))
|
||||
|
||||
c := loadFromFile(t, path)
|
||||
|
||||
// Setup callback that returns an error
|
||||
expectedErr := errors.New("reconfigure failed")
|
||||
var mu sync.Mutex
|
||||
callbackCalled := false
|
||||
c.SetSamplerReconfigureCallback(func(ctx context.Context, samplerType string, samplerRatio float64) error {
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
callbackCalled = true
|
||||
return expectedErr
|
||||
})
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
c.WatchAndApply(ctx)
|
||||
|
||||
// Mutate the file
|
||||
require.NoError(t, os.WriteFile(path, changed, 0644))
|
||||
|
||||
// Poll for up to 2s waiting for callback error
|
||||
deadline := time.Now().Add(2 * time.Second)
|
||||
for time.Now().Before(deadline) {
|
||||
mu.Lock()
|
||||
if callbackCalled {
|
||||
mu.Unlock()
|
||||
// Verify previous values were NOT updated (so retry can work)
|
||||
c.reloadMu.RLock()
|
||||
assert.Equal(t, "parentbased_always_on", c.prevSamplerType)
|
||||
assert.Equal(t, 1.0, c.prevSamplerRatio)
|
||||
c.reloadMu.RUnlock()
|
||||
return
|
||||
}
|
||||
mu.Unlock()
|
||||
time.Sleep(20 * time.Millisecond)
|
||||
}
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
t.Fatalf("sampler reconfigure callback was not invoked: callbackCalled=%v", callbackCalled)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user