Files
dance-lessons-coach/pkg/config/config.go
Gabriel Radureau 7d95259667
All checks were successful
CI/CD Pipeline / Build Docker Cache (push) Successful in 17s
CI/CD Pipeline / CI Pipeline (push) Successful in 6m20s
CI/CD Pipeline / Trigger Docker Push (push) Has been skipped
🐛 fix(config): remove racy log.Info in WatchAndApply cancel goroutine
The cancel-handler goroutine emitted a "watcher stopped" log AFTER
setting the watcherStopped flag. Because viper's internal watcher
goroutine (started by viper.WatchConfig) has no public Stop and can
outlive the context, this log call would race with subsequent
zerolog.SetGlobalLevel from the next test's LoadConfig → SetupLogging.

Symptom (under go test -race ./pkg/config/...):
  WARNING: DATA RACE
  Write at zerolog.SetGlobalLevel
  Previous read by zerolog.(*Logger).disabled in log.Info(...)

Fix: drop the informational log. The flag is sufficient — the cancel
ack does not need to be observable. Test cleanups (defer cancel())
already serialize via the t.Cleanup teardown order.

Verified: go test -race ./pkg/config/... passes, full BDD suite green.
2026-05-05 09:39:46 +02:00

744 lines
25 KiB
Go

package config
import (
"context"
"fmt"
"os"
"strings"
"sync"
"time"
"github.com/fsnotify/fsnotify"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
"github.com/spf13/viper"
"dance-lessons-coach/pkg/version"
)
// SamplerReconfigureFunc is the signature for callbacks invoked when
// telemetry.sampler.type or telemetry.sampler.ratio change via hot-reload.
// The callback receives the new sampler type and ratio values.
// It must be safe to call concurrently — implementations should use their
// own synchronisation if needed. Returns an error if the reconfigure fails.
type SamplerReconfigureFunc func(ctx context.Context, samplerType string, samplerRatio float64) error
// NewZerologWriter creates a zerolog writer based on configuration
func NewZerologWriter() *os.File {
return os.Stderr
}
// Config represents the application configuration
type Config struct {
Server ServerConfig `mapstructure:"server"`
Shutdown ShutdownConfig `mapstructure:"shutdown"`
Logging LoggingConfig `mapstructure:"logging"`
Telemetry TelemetryConfig `mapstructure:"telemetry"`
API APIConfig `mapstructure:"api"`
Auth AuthConfig `mapstructure:"auth"`
Database DatabaseConfig `mapstructure:"database"`
RateLimit RateLimitConfig `mapstructure:"rate_limit"`
Cache CacheConfig `mapstructure:"cache"`
// viper is the underlying configuration source. Kept (unexported,
// mapstructure:"-") so hot-reload can re-unmarshal on file changes —
// see WatchAndApply (ADR-0023 selective hot-reload).
viper *viper.Viper `mapstructure:"-"`
// reloadMu serialises Unmarshal during hot-reload so a partial mutation
// can't be observed mid-flight by getter calls.
reloadMu sync.RWMutex `mapstructure:"-"`
// samplerReconfigureCallback is invoked when telemetry.sampler.type or
// telemetry.sampler.ratio change. nil means no callback registered.
samplerReconfigureCallback SamplerReconfigureFunc `mapstructure:"-"`
// prevSamplerType and prevSamplerRatio track the last-seen sampler values
// to detect changes during hot-reload (ADR-0023 Phase 3).
prevSamplerType string `mapstructure:"-"`
prevSamplerRatio float64 `mapstructure:"-"`
// watcherStopped indicates that the config watcher has been stopped via
// the context being cancelled. This prevents the OnConfigChange handler
// from processing events after cleanup.
watcherStopped bool `mapstructure:"-"`
}
// ServerConfig holds server-related configuration
type ServerConfig struct {
Host string `mapstructure:"host"`
Port int `mapstructure:"port"`
}
// ShutdownConfig holds shutdown-related configuration
type ShutdownConfig struct {
Timeout time.Duration `mapstructure:"timeout"`
}
// LoggingConfig holds logging-related configuration
type LoggingConfig struct {
JSON bool `mapstructure:"json"`
Level string `mapstructure:"level"`
Output string `mapstructure:"output"`
}
// TelemetryConfig holds OpenTelemetry-related configuration
type TelemetryConfig struct {
Enabled bool `mapstructure:"enabled"`
OTLPEndpoint string `mapstructure:"otlp_endpoint"`
ServiceName string `mapstructure:"service_name"`
Insecure bool `mapstructure:"insecure"`
Sampler SamplerConfig `mapstructure:"sampler"`
Persistence PersistenceTelemetryConfig `mapstructure:"persistence"`
}
// PersistenceTelemetryConfig holds persistence layer telemetry configuration
type PersistenceTelemetryConfig struct {
Enabled bool `mapstructure:"enabled"`
}
// APIConfig holds API version configuration
type APIConfig struct {
V2Enabled bool `mapstructure:"v2_enabled"`
}
// AuthConfig holds authentication configuration
type AuthConfig struct {
JWTSecret string `mapstructure:"jwt_secret"`
AdminMasterPassword string `mapstructure:"admin_master_password"`
JWT JWTConfig `mapstructure:"jwt"`
}
// JWTConfig holds JWT-specific configuration
type JWTConfig struct {
TTL time.Duration `mapstructure:"ttl"`
SecretRetention struct {
RetentionFactor float64 `mapstructure:"retention_factor"`
MaxRetention time.Duration `mapstructure:"max_retention"`
CleanupInterval time.Duration `mapstructure:"cleanup_interval"`
} `mapstructure:"secret_retention"`
}
// DatabaseConfig holds database configuration
type DatabaseConfig struct {
Host string `mapstructure:"host"`
Port int `mapstructure:"port"`
User string `mapstructure:"user"`
Password string `mapstructure:"password"`
Name string `mapstructure:"name"`
SSLMode string `mapstructure:"ssl_mode"`
MaxOpenConns int `mapstructure:"max_open_conns"`
MaxIdleConns int `mapstructure:"max_idle_conns"`
ConnMaxLifetime time.Duration `mapstructure:"conn_max_lifetime"`
}
// RateLimitConfig holds rate limiting configuration
type RateLimitConfig struct {
Enabled bool `mapstructure:"enabled"`
RequestsPerMinute int `mapstructure:"requests_per_minute"`
BurstSize int `mapstructure:"burst_size"`
}
// CacheConfig holds cache configuration
type CacheConfig struct {
Enabled bool `mapstructure:"enabled"`
DefaultTTLSeconds int `mapstructure:"default_ttl_seconds"`
CleanupIntervalSeconds int `mapstructure:"cleanup_interval_seconds"`
}
// VersionInfo holds application version information
type VersionInfo struct {
Version string `mapstructure:"-"` // Set via ldflags
Commit string `mapstructure:"-"` // Set via ldflags
Date string `mapstructure:"-"` // Set via ldflags
GoVersion string `mapstructure:"-"` // Set at runtime
}
// VersionCommand handles version display
func (c *Config) VersionCommand() string {
// This will be enhanced when we integrate with cobra
return fmt.Sprintf("dance-lessons-coach %s (commit: %s, built: %s, go: %s)",
version.Version, version.Commit, version.Date, version.GoVersion)
}
// SamplerConfig holds tracing sampler configuration
type SamplerConfig struct {
Type string `mapstructure:"type"`
Ratio float64 `mapstructure:"ratio"`
}
// peekJSONLogging determines whether JSON logging should be used before the full
// config is loaded, solving the chicken-and-egg problem where the logger format
// must be known before any log is emitted, yet the format is stored in the config.
//
// Resolution order (mirrors Viper's own priority):
// 1. DLC_LOGGING_JSON env var — checked directly via os.Getenv (zero overhead)
// 2. logging.json key in the config file — read with a minimal throwaway Viper
// instance so we don't parse the whole config twice unnecessarily
func peekJSONLogging() bool {
// 1. Env var takes highest priority — check it first
if env := os.Getenv("DLC_LOGGING_JSON"); env != "" {
return strings.EqualFold(env, "true") || env == "1"
}
// 2. Try to read logging.json from the config file
preV := viper.New()
preV.SetDefault("logging.json", false)
if configFile := os.Getenv("DLC_CONFIG_FILE"); configFile != "" {
preV.SetConfigFile(configFile)
} else {
preV.SetConfigName("config")
preV.SetConfigType("yaml")
preV.AddConfigPath(".")
}
_ = preV.ReadInConfig() // ignore errors — defaults apply on failure
return preV.GetBool("logging.json")
}
// LoadConfig loads configuration from file, environment variables, and defaults
// Configuration priority: file > environment variables > defaults
// To specify a custom config file path, set DLC_CONFIG_FILE environment variable
func LoadConfig() (*Config, error) {
// Check if we're in a test environment - this should NOT be called during BDD tests
if os.Getenv("FEATURE") != "" {
panic("ERROR: LoadConfig() was called during BDD tests! This should not happen - tests should use createTestConfig() instead.")
}
v := viper.New()
// Configure the logger format before emitting any log output.
// peekJSONLogging reads the JSON setting early (env var + config file pre-read)
// so that every log line — including those produced during config loading — is
// already in the correct format.
jsonLogging := peekJSONLogging()
if jsonLogging {
log.Logger = log.Output(os.Stderr)
} else {
log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr})
}
log.Info().Bool("json", jsonLogging).Msg("Logging configured")
// Set default values
v.SetDefault("server.host", "0.0.0.0")
v.SetDefault("server.port", 8080)
v.SetDefault("shutdown.timeout", 30*time.Second)
v.SetDefault("logging.json", false)
v.SetDefault("logging.level", "trace")
v.SetDefault("logging.output", "")
// Telemetry defaults
v.SetDefault("telemetry.enabled", false)
v.SetDefault("telemetry.otlp_endpoint", "localhost:4317")
v.SetDefault("telemetry.service_name", "dance-lessons-coach")
v.SetDefault("telemetry.insecure", true)
v.SetDefault("telemetry.sampler.type", "parentbased_always_on")
v.SetDefault("telemetry.sampler.ratio", 1.0)
v.SetDefault("telemetry.persistence.enabled", false)
// API defaults
v.SetDefault("api.v2_enabled", false)
// Rate limit defaults
v.SetDefault("rate_limit.enabled", true)
v.SetDefault("rate_limit.requests_per_minute", 60)
v.SetDefault("rate_limit.burst_size", 10)
// Cache defaults
v.SetDefault("cache.enabled", true)
v.SetDefault("cache.default_ttl_seconds", 300)
v.SetDefault("cache.cleanup_interval_seconds", 600)
// Auth defaults
v.SetDefault("auth.jwt_secret", "default-secret-key-please-change-in-production")
v.SetDefault("auth.admin_master_password", "admin123")
v.SetDefault("auth.jwt.ttl", 1*time.Hour)
v.SetDefault("auth.jwt.secret_retention.retention_factor", 2.0)
v.SetDefault("auth.jwt.secret_retention.max_retention", 72*time.Hour)
v.SetDefault("auth.jwt.secret_retention.cleanup_interval", 1*time.Hour)
// Check for custom config file path via environment variable
if configFile := os.Getenv("DLC_CONFIG_FILE"); configFile != "" {
v.SetConfigFile(configFile)
log.Info().Str("config_file", configFile).Msg("Using custom config file path")
} else {
// Default: look for config.yaml in current directory
v.SetConfigName("config")
v.SetConfigType("yaml")
v.AddConfigPath(".")
}
// Read config file if it exists
if err := v.ReadInConfig(); err != nil {
if _, ok := err.(viper.ConfigFileNotFoundError); !ok {
// Config file was found but there was an error reading it
log.Warn().Err(err).Msg("Error reading config file, using defaults")
}
// Config file not found, continue with environment variables and defaults
} else {
log.Info().Str("config_file", v.ConfigFileUsed()).Msg("Config file loaded")
}
// Bind environment variables
v.AutomaticEnv()
v.SetEnvPrefix("DLC") // dance-lessons-coach prefix
v.BindEnv("server.host", "DLC_SERVER_HOST")
v.BindEnv("server.port", "DLC_SERVER_PORT")
v.BindEnv("shutdown.timeout", "DLC_SHUTDOWN_TIMEOUT")
v.BindEnv("logging.json", "DLC_LOGGING_JSON")
v.BindEnv("logging.level", "DLC_LOGGING_LEVEL")
v.BindEnv("logging.output", "DLC_LOGGING_OUTPUT")
// Telemetry environment variables
v.BindEnv("telemetry.enabled", "DLC_TELEMETRY_ENABLED")
v.BindEnv("telemetry.otlp_endpoint", "DLC_TELEMETRY_OTLP_ENDPOINT")
v.BindEnv("telemetry.service_name", "DLC_TELEMETRY_SERVICE_NAME")
v.BindEnv("telemetry.insecure", "DLC_TELEMETRY_INSECURE")
// Auth environment variables
v.BindEnv("auth.jwt_secret", "DLC_AUTH_JWT_SECRET")
v.BindEnv("auth.admin_master_password", "DLC_AUTH_ADMIN_MASTER_PASSWORD")
v.BindEnv("auth.jwt.ttl", "DLC_AUTH_JWT_TTL")
v.BindEnv("auth.jwt.secret_retention.retention_factor", "DLC_AUTH_JWT_SECRET_RETENTION_FACTOR")
v.BindEnv("auth.jwt.secret_retention.max_retention", "DLC_AUTH_JWT_SECRET_MAX_RETENTION")
v.BindEnv("auth.jwt.secret_retention.cleanup_interval", "DLC_AUTH_JWT_SECRET_CLEANUP_INTERVAL")
v.BindEnv("telemetry.sampler.type", "DLC_TELEMETRY_SAMPLER_TYPE")
v.BindEnv("telemetry.sampler.ratio", "DLC_TELEMETRY_SAMPLER_RATIO")
// API environment variables
v.BindEnv("api.v2_enabled", "DLC_API_V2_ENABLED")
// Rate limit environment variables
v.BindEnv("rate_limit.enabled", "DLC_RATE_LIMIT_ENABLED")
v.BindEnv("rate_limit.requests_per_minute", "DLC_RATE_LIMIT_REQUESTS_PER_MINUTE")
v.BindEnv("rate_limit.burst_size", "DLC_RATE_LIMIT_BURST_SIZE")
// Cache environment variables
v.BindEnv("cache.enabled", "DLC_CACHE_ENABLED")
v.BindEnv("cache.default_ttl_seconds", "DLC_CACHE_DEFAULT_TTL_SECONDS")
v.BindEnv("cache.cleanup_interval_seconds", "DLC_CACHE_CLEANUP_INTERVAL_SECONDS")
// Database environment variables
v.BindEnv("database.host", "DLC_DATABASE_HOST")
v.BindEnv("database.port", "DLC_DATABASE_PORT")
v.BindEnv("database.user", "DLC_DATABASE_USER")
v.BindEnv("database.password", "DLC_DATABASE_PASSWORD")
v.BindEnv("database.name", "DLC_DATABASE_NAME")
v.BindEnv("database.ssl_mode", "DLC_DATABASE_SSL_MODE")
// Unmarshal into Config struct
var config Config
if err := v.Unmarshal(&config); err != nil {
log.Error().Err(err).Msg("Failed to unmarshal config")
return nil, fmt.Errorf("config unmarshal error: %w", err)
}
// Keep the viper instance for hot-reload (ADR-0023).
config.viper = v
// Initialize previous sampler values for hot-reload change detection
// (ADR-0023 Phase 3).
config.prevSamplerType = config.Telemetry.Sampler.Type
config.prevSamplerRatio = config.Telemetry.Sampler.Ratio
// Setup logging based on configuration (level, output file, time format).
// The JSON/console format was already applied at the top of LoadConfig via
// peekJSONLogging, so SetupLogging only needs to handle the remaining knobs.
config.SetupLogging()
log.Info().
Str("host", config.Server.Host).
Int("port", config.Server.Port).
Dur("shutdown_timeout", config.Shutdown.Timeout).
Bool("logging_json", config.Logging.JSON).
Str("logging_level", config.Logging.Level).
Str("logging_output", config.Logging.Output).
Bool("telemetry_enabled", config.Telemetry.Enabled).
Str("telemetry_service", config.Telemetry.ServiceName).
Bool("api_v2_enabled", config.API.V2Enabled).
Dur("jwt_ttl", config.GetJWTTTL()).
Float64("jwt_retention_factor", config.GetJWTSecretRetentionFactor()).
Dur("jwt_max_retention", config.GetJWTSecretMaxRetention()).
Dur("jwt_cleanup_interval", config.GetJWTSecretCleanupInterval()).
Msg("Configuration loaded")
return &config, nil
}
// GetServerAddress returns the formatted server address (host:port)
func (c *Config) GetServerAddress() string {
return fmt.Sprintf("%s:%d", c.Server.Host, c.Server.Port)
}
// GetTelemetryEnabled returns whether telemetry is enabled
func (c *Config) GetTelemetryEnabled() bool {
return c.Telemetry.Enabled
}
// GetOTLPEndpoint returns the OTLP endpoint for telemetry
func (c *Config) GetOTLPEndpoint() string {
return c.Telemetry.OTLPEndpoint
}
// GetServiceName returns the service name for telemetry
func (c *Config) GetServiceName() string {
return c.Telemetry.ServiceName
}
// GetPersistenceTelemetryEnabled returns whether persistence layer telemetry is enabled
func (c *Config) GetPersistenceTelemetryEnabled() bool {
return c.Telemetry.Enabled && c.Telemetry.Persistence.Enabled
}
// GetTelemetryInsecure returns whether to use insecure connection
func (c *Config) GetTelemetryInsecure() bool {
return c.Telemetry.Insecure
}
// GetSamplerType returns the sampler type
func (c *Config) GetSamplerType() string {
return c.Telemetry.Sampler.Type
}
// GetSamplerRatio returns the sampler ratio
func (c *Config) GetSamplerRatio() float64 {
return c.Telemetry.Sampler.Ratio
}
// SetSamplerReconfigureCallback registers a callback that is invoked when
// telemetry.sampler.type or telemetry.sampler.ratio change via hot-reload.
// The callback receives the new sampler type and ratio values.
// Pass nil to unregister the callback.
func (c *Config) SetSamplerReconfigureCallback(callback SamplerReconfigureFunc) {
c.reloadMu.Lock()
defer c.reloadMu.Unlock()
c.samplerReconfigureCallback = callback
// Initialize previous values so we can detect changes on first hot-reload
c.prevSamplerType = c.Telemetry.Sampler.Type
c.prevSamplerRatio = c.Telemetry.Sampler.Ratio
}
// GetV2Enabled returns whether v2 API is enabled
func (c *Config) GetV2Enabled() bool {
return c.API.V2Enabled
}
// GetJWTSecret returns the JWT secret
func (c *Config) GetJWTSecret() string {
return c.Auth.JWTSecret
}
// GetAdminMasterPassword returns the admin master password
func (c *Config) GetAdminMasterPassword() string {
return c.Auth.AdminMasterPassword
}
// GetJWTTTL returns the JWT TTL
func (c *Config) GetJWTTTL() time.Duration {
if c.Auth.JWT.TTL == 0 {
return 1 * time.Hour // Default value
}
return c.Auth.JWT.TTL
}
// GetJWTSecretRetentionFactor returns the JWT secret retention factor
func (c *Config) GetJWTSecretRetentionFactor() float64 {
if c.Auth.JWT.SecretRetention.RetentionFactor == 0 {
return 2.0 // Default value
}
return c.Auth.JWT.SecretRetention.RetentionFactor
}
// GetJWTSecretMaxRetention returns the maximum JWT secret retention period
func (c *Config) GetJWTSecretMaxRetention() time.Duration {
if c.Auth.JWT.SecretRetention.MaxRetention == 0 {
return 72 * time.Hour // Default value
}
return c.Auth.JWT.SecretRetention.MaxRetention
}
// GetJWTSecretCleanupInterval returns the JWT secret cleanup interval
func (c *Config) GetJWTSecretCleanupInterval() time.Duration {
if c.Auth.JWT.SecretRetention.CleanupInterval == 0 {
return 1 * time.Hour // Default value
}
return c.Auth.JWT.SecretRetention.CleanupInterval
}
// GetLoggingJSON returns whether JSON logging is enabled
func (c *Config) GetLoggingJSON() bool {
return c.Logging.JSON
}
// GetLogLevel returns the logging level
func (c *Config) GetLogLevel() string {
return c.Logging.Level
}
// GetLogOutput returns the log output path
func (c *Config) GetLogOutput() string {
return c.Logging.Output
}
// GetRateLimitEnabled returns whether rate limiting is enabled
func (c *Config) GetRateLimitEnabled() bool {
return c.RateLimit.Enabled
}
// GetRateLimitRequestsPerMinute returns the requests per minute limit
func (c *Config) GetRateLimitRequestsPerMinute() int {
if c.RateLimit.RequestsPerMinute <= 0 {
return 60
}
return c.RateLimit.RequestsPerMinute
}
// GetRateLimitBurstSize returns the burst size for rate limiting
func (c *Config) GetRateLimitBurstSize() int {
if c.RateLimit.BurstSize <= 0 {
return 10
}
return c.RateLimit.BurstSize
}
// GetCacheEnabled returns whether cache is enabled
func (c *Config) GetCacheEnabled() bool {
return c.Cache.Enabled
}
// GetCacheDefaultTTLSeconds returns the default TTL in seconds for cache items
func (c *Config) GetCacheDefaultTTLSeconds() int {
if c.Cache.DefaultTTLSeconds <= 0 {
return 300
}
return c.Cache.DefaultTTLSeconds
}
// GetCacheCleanupIntervalSeconds returns the cleanup interval in seconds for cache
func (c *Config) GetCacheCleanupIntervalSeconds() int {
if c.Cache.CleanupIntervalSeconds <= 0 {
return 600
}
return c.Cache.CleanupIntervalSeconds
}
// GetDatabaseHost returns the database host
func (c *Config) GetDatabaseHost() string {
if c.Database.Host == "" {
return "localhost"
}
return c.Database.Host
}
// GetDatabasePort returns the database port
func (c *Config) GetDatabasePort() int {
if c.Database.Port == 0 {
return 5432
}
return c.Database.Port
}
// GetDatabaseUser returns the database user
func (c *Config) GetDatabaseUser() string {
if c.Database.User == "" {
return "postgres"
}
return c.Database.User
}
// GetDatabasePassword returns the database password
func (c *Config) GetDatabasePassword() string {
return c.Database.Password
}
// GetDatabaseName returns the database name
func (c *Config) GetDatabaseName() string {
if c.Database.Name == "" {
return "dance_lessons_coach"
}
return c.Database.Name
}
// GetDatabaseSSLMode returns the database SSL mode
func (c *Config) GetDatabaseSSLMode() string {
if c.Database.SSLMode == "" {
return "disable"
}
return c.Database.SSLMode
}
// GetDatabaseMaxOpenConns returns the maximum number of open connections
func (c *Config) GetDatabaseMaxOpenConns() int {
if c.Database.MaxOpenConns == 0 {
return 25
}
return c.Database.MaxOpenConns
}
// GetDatabaseMaxIdleConns returns the maximum number of idle connections
func (c *Config) GetDatabaseMaxIdleConns() int {
if c.Database.MaxIdleConns == 0 {
return 5
}
return c.Database.MaxIdleConns
}
// GetDatabaseConnMaxLifetime returns the maximum lifetime of connections
func (c *Config) GetDatabaseConnMaxLifetime() time.Duration {
if c.Database.ConnMaxLifetime == 0 {
return time.Hour
}
return c.Database.ConnMaxLifetime
}
// SetupLogging configures zerolog based on the configuration
func (c *Config) SetupLogging() {
// Parse log level
level := parseLogLevel(c.GetLogLevel())
zerolog.SetGlobalLevel(level)
// Setup log output
c.setupLogOutput()
zerolog.TimeFieldFormat = zerolog.TimeFormatUnix
}
// parseLogLevel converts a string log level to zerolog.Level
func parseLogLevel(level string) zerolog.Level {
switch strings.ToLower(level) {
case "trace":
return zerolog.TraceLevel
case "debug":
return zerolog.DebugLevel
case "info":
return zerolog.InfoLevel
case "warn", "warning":
return zerolog.WarnLevel
case "error":
return zerolog.ErrorLevel
case "fatal":
return zerolog.FatalLevel
case "panic":
return zerolog.PanicLevel
default:
log.Warn().Str("level", level).Msg("Unknown log level, defaulting to trace")
return zerolog.TraceLevel
}
}
// setupLogOutput configures the log output based on configuration
func (c *Config) setupLogOutput() {
output := c.GetLogOutput()
if output == "" {
// Use stderr by default
return
}
// Open the log file
file, err := os.OpenFile(output, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
if err != nil {
log.Error().Err(err).Str("output", output).Msg("Failed to open log file, using stderr")
return
}
// Set the log output
log.Logger = log.Output(file)
log.Trace().Str("output", output).Msg("Logging to file")
}
// WatchAndApply starts watching the config file for changes and applies the
// hot-reloadable subset on every change (ADR-0023 selective hot-reload).
//
// Phases shipped:
// - Phase 1: logging.level — re-applied via SetupLogging on every change.
// - Phase 2: auth.jwt.ttl — picked up automatically because the userService
// reads it via JWTConfig.GetTTL (a method value capturing this *Config).
// The reloaded TTL is used on the NEXT token generation; tokens issued
// before the change keep their original expiry.
// - Phase 3: telemetry.sampler.type + telemetry.sampler.ratio — triggers
// the callback set via SetSamplerReconfigureCallback if the values change.
//
// The other fields listed in ADR-0023 (api.v2_enabled) remain restart-only
// until their handlers land in subsequent phases.
//
// Stops when ctx is cancelled. Safe to call once at server startup.
// If the config file is absent (ConfigFileNotFoundError at load time), this
// becomes a no-op and logs a single warning.
func (c *Config) WatchAndApply(ctx context.Context) {
if c.viper == nil {
log.Warn().Msg("Config hot-reload disabled: no viper instance attached")
return
}
if c.viper.ConfigFileUsed() == "" {
log.Info().Msg("Config hot-reload disabled: no config file in use (env-only or defaults)")
return
}
c.viper.OnConfigChange(func(in fsnotify.Event) {
// Skip processing if watcher has been stopped
c.reloadMu.Lock()
if c.watcherStopped {
c.reloadMu.Unlock()
return
}
c.reloadMu.Unlock()
log.Info().Str("event", in.Op.String()).Str("file", in.Name).Msg("Config file changed, reloading hot-reloadable fields")
c.reloadMu.Lock()
defer c.reloadMu.Unlock()
if err := c.viper.Unmarshal(c); err != nil {
log.Error().Err(err).Msg("Hot-reload: failed to unmarshal new config, keeping previous values")
return
}
// Apply hot-reloadable fields. Order matters: logging first so the
// rest of the reload is logged at the right level.
c.SetupLogging()
// Check if sampler config changed and invoke callback if registered
samplerChanged := c.prevSamplerType != c.Telemetry.Sampler.Type ||
c.prevSamplerRatio != c.Telemetry.Sampler.Ratio
if samplerChanged && c.samplerReconfigureCallback != nil {
if err := c.samplerReconfigureCallback(context.Background(),
c.Telemetry.Sampler.Type,
c.Telemetry.Sampler.Ratio); err != nil {
log.Error().Err(err).Msg("Hot-reload: sampler reconfigure callback failed")
} else {
// Update previous values only after successful callback
c.prevSamplerType = c.Telemetry.Sampler.Type
c.prevSamplerRatio = c.Telemetry.Sampler.Ratio
log.Info().
Str("sampler_type", c.prevSamplerType).
Float64("sampler_ratio", c.prevSamplerRatio).
Msg("Hot-reload applied: telemetry sampler reconfigured")
}
} else if samplerChanged {
// No callback registered, just update tracking values
c.prevSamplerType = c.Telemetry.Sampler.Type
c.prevSamplerRatio = c.Telemetry.Sampler.Ratio
}
log.Info().
Str("logging_level", c.GetLogLevel()).
Dur("jwt_ttl", c.GetJWTTTL()).
Msg("Hot-reload applied (logging.level + auth.jwt.ttl)")
})
c.viper.WatchConfig()
log.Info().Str("file", c.viper.ConfigFileUsed()).Msg("Config hot-reload watcher started (ADR-0023 Phase 1)")
// Stop the watcher on context cancel — we set a flag that the
// OnConfigChange handler checks, avoiding the race with viper's
// internal state that would occur if we called OnConfigChange again.
// We deliberately do NOT log here: viper's internal watcher goroutine
// has no public Stop, so it can outlive ctx, and a zerolog call here
// would race with the next test's LoadConfig → SetupLogging →
// zerolog.SetGlobalLevel under -race (observed 2026-05-05).
go func() {
<-ctx.Done()
c.reloadMu.Lock()
c.watcherStopped = true
c.reloadMu.Unlock()
}()
}