Files
dance-lessons-coach/pkg/config/config.go
Gabriel Radureau b33ad236e1
Some checks failed
CI/CD Pipeline / Build Docker Cache (push) Successful in 59s
CI/CD Pipeline / CI Pipeline (push) Failing after 4m3s
CI/CD Pipeline / Trigger Docker Push (push) Has been skipped
feat(config): hot-reload Phase 1 — logging.level (ADR-0023) (#42)
Co-authored-by: Gabriel Radureau <arcodange@gmail.com>
Co-committed-by: Gabriel Radureau <arcodange@gmail.com>
2026-05-05 08:45:19 +02:00

658 lines
22 KiB
Go

package config
import (
"context"
"fmt"
"os"
"strings"
"sync"
"time"
"github.com/fsnotify/fsnotify"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
"github.com/spf13/viper"
"dance-lessons-coach/pkg/version"
)
// NewZerologWriter creates a zerolog writer based on configuration
func NewZerologWriter() *os.File {
return os.Stderr
}
// Config represents the application configuration
type Config struct {
Server ServerConfig `mapstructure:"server"`
Shutdown ShutdownConfig `mapstructure:"shutdown"`
Logging LoggingConfig `mapstructure:"logging"`
Telemetry TelemetryConfig `mapstructure:"telemetry"`
API APIConfig `mapstructure:"api"`
Auth AuthConfig `mapstructure:"auth"`
Database DatabaseConfig `mapstructure:"database"`
RateLimit RateLimitConfig `mapstructure:"rate_limit"`
Cache CacheConfig `mapstructure:"cache"`
// viper is the underlying configuration source. Kept (unexported,
// mapstructure:"-") so hot-reload can re-unmarshal on file changes —
// see WatchAndApply (ADR-0023 selective hot-reload).
viper *viper.Viper `mapstructure:"-"`
// reloadMu serialises Unmarshal during hot-reload so a partial mutation
// can't be observed mid-flight by getter calls.
reloadMu sync.RWMutex `mapstructure:"-"`
}
// ServerConfig holds server-related configuration
type ServerConfig struct {
Host string `mapstructure:"host"`
Port int `mapstructure:"port"`
}
// ShutdownConfig holds shutdown-related configuration
type ShutdownConfig struct {
Timeout time.Duration `mapstructure:"timeout"`
}
// LoggingConfig holds logging-related configuration
type LoggingConfig struct {
JSON bool `mapstructure:"json"`
Level string `mapstructure:"level"`
Output string `mapstructure:"output"`
}
// TelemetryConfig holds OpenTelemetry-related configuration
type TelemetryConfig struct {
Enabled bool `mapstructure:"enabled"`
OTLPEndpoint string `mapstructure:"otlp_endpoint"`
ServiceName string `mapstructure:"service_name"`
Insecure bool `mapstructure:"insecure"`
Sampler SamplerConfig `mapstructure:"sampler"`
Persistence PersistenceTelemetryConfig `mapstructure:"persistence"`
}
// PersistenceTelemetryConfig holds persistence layer telemetry configuration
type PersistenceTelemetryConfig struct {
Enabled bool `mapstructure:"enabled"`
}
// APIConfig holds API version configuration
type APIConfig struct {
V2Enabled bool `mapstructure:"v2_enabled"`
}
// AuthConfig holds authentication configuration
type AuthConfig struct {
JWTSecret string `mapstructure:"jwt_secret"`
AdminMasterPassword string `mapstructure:"admin_master_password"`
JWT JWTConfig `mapstructure:"jwt"`
}
// JWTConfig holds JWT-specific configuration
type JWTConfig struct {
TTL time.Duration `mapstructure:"ttl"`
SecretRetention struct {
RetentionFactor float64 `mapstructure:"retention_factor"`
MaxRetention time.Duration `mapstructure:"max_retention"`
CleanupInterval time.Duration `mapstructure:"cleanup_interval"`
} `mapstructure:"secret_retention"`
}
// DatabaseConfig holds database configuration
type DatabaseConfig struct {
Host string `mapstructure:"host"`
Port int `mapstructure:"port"`
User string `mapstructure:"user"`
Password string `mapstructure:"password"`
Name string `mapstructure:"name"`
SSLMode string `mapstructure:"ssl_mode"`
MaxOpenConns int `mapstructure:"max_open_conns"`
MaxIdleConns int `mapstructure:"max_idle_conns"`
ConnMaxLifetime time.Duration `mapstructure:"conn_max_lifetime"`
}
// RateLimitConfig holds rate limiting configuration
type RateLimitConfig struct {
Enabled bool `mapstructure:"enabled"`
RequestsPerMinute int `mapstructure:"requests_per_minute"`
BurstSize int `mapstructure:"burst_size"`
}
// CacheConfig holds cache configuration
type CacheConfig struct {
Enabled bool `mapstructure:"enabled"`
DefaultTTLSeconds int `mapstructure:"default_ttl_seconds"`
CleanupIntervalSeconds int `mapstructure:"cleanup_interval_seconds"`
}
// VersionInfo holds application version information
type VersionInfo struct {
Version string `mapstructure:"-"` // Set via ldflags
Commit string `mapstructure:"-"` // Set via ldflags
Date string `mapstructure:"-"` // Set via ldflags
GoVersion string `mapstructure:"-"` // Set at runtime
}
// VersionCommand handles version display
func (c *Config) VersionCommand() string {
// This will be enhanced when we integrate with cobra
return fmt.Sprintf("dance-lessons-coach %s (commit: %s, built: %s, go: %s)",
version.Version, version.Commit, version.Date, version.GoVersion)
}
// SamplerConfig holds tracing sampler configuration
type SamplerConfig struct {
Type string `mapstructure:"type"`
Ratio float64 `mapstructure:"ratio"`
}
// peekJSONLogging determines whether JSON logging should be used before the full
// config is loaded, solving the chicken-and-egg problem where the logger format
// must be known before any log is emitted, yet the format is stored in the config.
//
// Resolution order (mirrors Viper's own priority):
// 1. DLC_LOGGING_JSON env var — checked directly via os.Getenv (zero overhead)
// 2. logging.json key in the config file — read with a minimal throwaway Viper
// instance so we don't parse the whole config twice unnecessarily
func peekJSONLogging() bool {
// 1. Env var takes highest priority — check it first
if env := os.Getenv("DLC_LOGGING_JSON"); env != "" {
return strings.EqualFold(env, "true") || env == "1"
}
// 2. Try to read logging.json from the config file
preV := viper.New()
preV.SetDefault("logging.json", false)
if configFile := os.Getenv("DLC_CONFIG_FILE"); configFile != "" {
preV.SetConfigFile(configFile)
} else {
preV.SetConfigName("config")
preV.SetConfigType("yaml")
preV.AddConfigPath(".")
}
_ = preV.ReadInConfig() // ignore errors — defaults apply on failure
return preV.GetBool("logging.json")
}
// LoadConfig loads configuration from file, environment variables, and defaults
// Configuration priority: file > environment variables > defaults
// To specify a custom config file path, set DLC_CONFIG_FILE environment variable
func LoadConfig() (*Config, error) {
// Check if we're in a test environment - this should NOT be called during BDD tests
if os.Getenv("FEATURE") != "" {
panic("ERROR: LoadConfig() was called during BDD tests! This should not happen - tests should use createTestConfig() instead.")
}
v := viper.New()
// Configure the logger format before emitting any log output.
// peekJSONLogging reads the JSON setting early (env var + config file pre-read)
// so that every log line — including those produced during config loading — is
// already in the correct format.
jsonLogging := peekJSONLogging()
if jsonLogging {
log.Logger = log.Output(os.Stderr)
} else {
log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr})
}
log.Info().Bool("json", jsonLogging).Msg("Logging configured")
// Set default values
v.SetDefault("server.host", "0.0.0.0")
v.SetDefault("server.port", 8080)
v.SetDefault("shutdown.timeout", 30*time.Second)
v.SetDefault("logging.json", false)
v.SetDefault("logging.level", "trace")
v.SetDefault("logging.output", "")
// Telemetry defaults
v.SetDefault("telemetry.enabled", false)
v.SetDefault("telemetry.otlp_endpoint", "localhost:4317")
v.SetDefault("telemetry.service_name", "dance-lessons-coach")
v.SetDefault("telemetry.insecure", true)
v.SetDefault("telemetry.sampler.type", "parentbased_always_on")
v.SetDefault("telemetry.sampler.ratio", 1.0)
v.SetDefault("telemetry.persistence.enabled", false)
// API defaults
v.SetDefault("api.v2_enabled", false)
// Rate limit defaults
v.SetDefault("rate_limit.enabled", true)
v.SetDefault("rate_limit.requests_per_minute", 60)
v.SetDefault("rate_limit.burst_size", 10)
// Cache defaults
v.SetDefault("cache.enabled", true)
v.SetDefault("cache.default_ttl_seconds", 300)
v.SetDefault("cache.cleanup_interval_seconds", 600)
// Auth defaults
v.SetDefault("auth.jwt_secret", "default-secret-key-please-change-in-production")
v.SetDefault("auth.admin_master_password", "admin123")
v.SetDefault("auth.jwt.ttl", 1*time.Hour)
v.SetDefault("auth.jwt.secret_retention.retention_factor", 2.0)
v.SetDefault("auth.jwt.secret_retention.max_retention", 72*time.Hour)
v.SetDefault("auth.jwt.secret_retention.cleanup_interval", 1*time.Hour)
// Check for custom config file path via environment variable
if configFile := os.Getenv("DLC_CONFIG_FILE"); configFile != "" {
v.SetConfigFile(configFile)
log.Info().Str("config_file", configFile).Msg("Using custom config file path")
} else {
// Default: look for config.yaml in current directory
v.SetConfigName("config")
v.SetConfigType("yaml")
v.AddConfigPath(".")
}
// Read config file if it exists
if err := v.ReadInConfig(); err != nil {
if _, ok := err.(viper.ConfigFileNotFoundError); !ok {
// Config file was found but there was an error reading it
log.Warn().Err(err).Msg("Error reading config file, using defaults")
}
// Config file not found, continue with environment variables and defaults
} else {
log.Info().Str("config_file", v.ConfigFileUsed()).Msg("Config file loaded")
}
// Bind environment variables
v.AutomaticEnv()
v.SetEnvPrefix("DLC") // dance-lessons-coach prefix
v.BindEnv("server.host", "DLC_SERVER_HOST")
v.BindEnv("server.port", "DLC_SERVER_PORT")
v.BindEnv("shutdown.timeout", "DLC_SHUTDOWN_TIMEOUT")
v.BindEnv("logging.json", "DLC_LOGGING_JSON")
v.BindEnv("logging.level", "DLC_LOGGING_LEVEL")
v.BindEnv("logging.output", "DLC_LOGGING_OUTPUT")
// Telemetry environment variables
v.BindEnv("telemetry.enabled", "DLC_TELEMETRY_ENABLED")
v.BindEnv("telemetry.otlp_endpoint", "DLC_TELEMETRY_OTLP_ENDPOINT")
v.BindEnv("telemetry.service_name", "DLC_TELEMETRY_SERVICE_NAME")
v.BindEnv("telemetry.insecure", "DLC_TELEMETRY_INSECURE")
// Auth environment variables
v.BindEnv("auth.jwt_secret", "DLC_AUTH_JWT_SECRET")
v.BindEnv("auth.admin_master_password", "DLC_AUTH_ADMIN_MASTER_PASSWORD")
v.BindEnv("auth.jwt.ttl", "DLC_AUTH_JWT_TTL")
v.BindEnv("auth.jwt.secret_retention.retention_factor", "DLC_AUTH_JWT_SECRET_RETENTION_FACTOR")
v.BindEnv("auth.jwt.secret_retention.max_retention", "DLC_AUTH_JWT_SECRET_MAX_RETENTION")
v.BindEnv("auth.jwt.secret_retention.cleanup_interval", "DLC_AUTH_JWT_SECRET_CLEANUP_INTERVAL")
v.BindEnv("telemetry.sampler.type", "DLC_TELEMETRY_SAMPLER_TYPE")
v.BindEnv("telemetry.sampler.ratio", "DLC_TELEMETRY_SAMPLER_RATIO")
// API environment variables
v.BindEnv("api.v2_enabled", "DLC_API_V2_ENABLED")
// Rate limit environment variables
v.BindEnv("rate_limit.enabled", "DLC_RATE_LIMIT_ENABLED")
v.BindEnv("rate_limit.requests_per_minute", "DLC_RATE_LIMIT_REQUESTS_PER_MINUTE")
v.BindEnv("rate_limit.burst_size", "DLC_RATE_LIMIT_BURST_SIZE")
// Cache environment variables
v.BindEnv("cache.enabled", "DLC_CACHE_ENABLED")
v.BindEnv("cache.default_ttl_seconds", "DLC_CACHE_DEFAULT_TTL_SECONDS")
v.BindEnv("cache.cleanup_interval_seconds", "DLC_CACHE_CLEANUP_INTERVAL_SECONDS")
// Database environment variables
v.BindEnv("database.host", "DLC_DATABASE_HOST")
v.BindEnv("database.port", "DLC_DATABASE_PORT")
v.BindEnv("database.user", "DLC_DATABASE_USER")
v.BindEnv("database.password", "DLC_DATABASE_PASSWORD")
v.BindEnv("database.name", "DLC_DATABASE_NAME")
v.BindEnv("database.ssl_mode", "DLC_DATABASE_SSL_MODE")
// Unmarshal into Config struct
var config Config
if err := v.Unmarshal(&config); err != nil {
log.Error().Err(err).Msg("Failed to unmarshal config")
return nil, fmt.Errorf("config unmarshal error: %w", err)
}
// Keep the viper instance for hot-reload (ADR-0023).
config.viper = v
// Setup logging based on configuration (level, output file, time format).
// The JSON/console format was already applied at the top of LoadConfig via
// peekJSONLogging, so SetupLogging only needs to handle the remaining knobs.
config.SetupLogging()
log.Info().
Str("host", config.Server.Host).
Int("port", config.Server.Port).
Dur("shutdown_timeout", config.Shutdown.Timeout).
Bool("logging_json", config.Logging.JSON).
Str("logging_level", config.Logging.Level).
Str("logging_output", config.Logging.Output).
Bool("telemetry_enabled", config.Telemetry.Enabled).
Str("telemetry_service", config.Telemetry.ServiceName).
Bool("api_v2_enabled", config.API.V2Enabled).
Dur("jwt_ttl", config.GetJWTTTL()).
Float64("jwt_retention_factor", config.GetJWTSecretRetentionFactor()).
Dur("jwt_max_retention", config.GetJWTSecretMaxRetention()).
Dur("jwt_cleanup_interval", config.GetJWTSecretCleanupInterval()).
Msg("Configuration loaded")
return &config, nil
}
// GetServerAddress returns the formatted server address (host:port)
func (c *Config) GetServerAddress() string {
return fmt.Sprintf("%s:%d", c.Server.Host, c.Server.Port)
}
// GetTelemetryEnabled returns whether telemetry is enabled
func (c *Config) GetTelemetryEnabled() bool {
return c.Telemetry.Enabled
}
// GetOTLPEndpoint returns the OTLP endpoint for telemetry
func (c *Config) GetOTLPEndpoint() string {
return c.Telemetry.OTLPEndpoint
}
// GetServiceName returns the service name for telemetry
func (c *Config) GetServiceName() string {
return c.Telemetry.ServiceName
}
// GetPersistenceTelemetryEnabled returns whether persistence layer telemetry is enabled
func (c *Config) GetPersistenceTelemetryEnabled() bool {
return c.Telemetry.Enabled && c.Telemetry.Persistence.Enabled
}
// GetTelemetryInsecure returns whether to use insecure connection
func (c *Config) GetTelemetryInsecure() bool {
return c.Telemetry.Insecure
}
// GetSamplerType returns the sampler type
func (c *Config) GetSamplerType() string {
return c.Telemetry.Sampler.Type
}
// GetSamplerRatio returns the sampler ratio
func (c *Config) GetSamplerRatio() float64 {
return c.Telemetry.Sampler.Ratio
}
// GetV2Enabled returns whether v2 API is enabled
func (c *Config) GetV2Enabled() bool {
return c.API.V2Enabled
}
// GetJWTSecret returns the JWT secret
func (c *Config) GetJWTSecret() string {
return c.Auth.JWTSecret
}
// GetAdminMasterPassword returns the admin master password
func (c *Config) GetAdminMasterPassword() string {
return c.Auth.AdminMasterPassword
}
// GetJWTTTL returns the JWT TTL
func (c *Config) GetJWTTTL() time.Duration {
if c.Auth.JWT.TTL == 0 {
return 1 * time.Hour // Default value
}
return c.Auth.JWT.TTL
}
// GetJWTSecretRetentionFactor returns the JWT secret retention factor
func (c *Config) GetJWTSecretRetentionFactor() float64 {
if c.Auth.JWT.SecretRetention.RetentionFactor == 0 {
return 2.0 // Default value
}
return c.Auth.JWT.SecretRetention.RetentionFactor
}
// GetJWTSecretMaxRetention returns the maximum JWT secret retention period
func (c *Config) GetJWTSecretMaxRetention() time.Duration {
if c.Auth.JWT.SecretRetention.MaxRetention == 0 {
return 72 * time.Hour // Default value
}
return c.Auth.JWT.SecretRetention.MaxRetention
}
// GetJWTSecretCleanupInterval returns the JWT secret cleanup interval
func (c *Config) GetJWTSecretCleanupInterval() time.Duration {
if c.Auth.JWT.SecretRetention.CleanupInterval == 0 {
return 1 * time.Hour // Default value
}
return c.Auth.JWT.SecretRetention.CleanupInterval
}
// GetLoggingJSON returns whether JSON logging is enabled
func (c *Config) GetLoggingJSON() bool {
return c.Logging.JSON
}
// GetLogLevel returns the logging level
func (c *Config) GetLogLevel() string {
return c.Logging.Level
}
// GetLogOutput returns the log output path
func (c *Config) GetLogOutput() string {
return c.Logging.Output
}
// GetRateLimitEnabled returns whether rate limiting is enabled
func (c *Config) GetRateLimitEnabled() bool {
return c.RateLimit.Enabled
}
// GetRateLimitRequestsPerMinute returns the requests per minute limit
func (c *Config) GetRateLimitRequestsPerMinute() int {
if c.RateLimit.RequestsPerMinute <= 0 {
return 60
}
return c.RateLimit.RequestsPerMinute
}
// GetRateLimitBurstSize returns the burst size for rate limiting
func (c *Config) GetRateLimitBurstSize() int {
if c.RateLimit.BurstSize <= 0 {
return 10
}
return c.RateLimit.BurstSize
}
// GetCacheEnabled returns whether cache is enabled
func (c *Config) GetCacheEnabled() bool {
return c.Cache.Enabled
}
// GetCacheDefaultTTLSeconds returns the default TTL in seconds for cache items
func (c *Config) GetCacheDefaultTTLSeconds() int {
if c.Cache.DefaultTTLSeconds <= 0 {
return 300
}
return c.Cache.DefaultTTLSeconds
}
// GetCacheCleanupIntervalSeconds returns the cleanup interval in seconds for cache
func (c *Config) GetCacheCleanupIntervalSeconds() int {
if c.Cache.CleanupIntervalSeconds <= 0 {
return 600
}
return c.Cache.CleanupIntervalSeconds
}
// GetDatabaseHost returns the database host
func (c *Config) GetDatabaseHost() string {
if c.Database.Host == "" {
return "localhost"
}
return c.Database.Host
}
// GetDatabasePort returns the database port
func (c *Config) GetDatabasePort() int {
if c.Database.Port == 0 {
return 5432
}
return c.Database.Port
}
// GetDatabaseUser returns the database user
func (c *Config) GetDatabaseUser() string {
if c.Database.User == "" {
return "postgres"
}
return c.Database.User
}
// GetDatabasePassword returns the database password
func (c *Config) GetDatabasePassword() string {
return c.Database.Password
}
// GetDatabaseName returns the database name
func (c *Config) GetDatabaseName() string {
if c.Database.Name == "" {
return "dance_lessons_coach"
}
return c.Database.Name
}
// GetDatabaseSSLMode returns the database SSL mode
func (c *Config) GetDatabaseSSLMode() string {
if c.Database.SSLMode == "" {
return "disable"
}
return c.Database.SSLMode
}
// GetDatabaseMaxOpenConns returns the maximum number of open connections
func (c *Config) GetDatabaseMaxOpenConns() int {
if c.Database.MaxOpenConns == 0 {
return 25
}
return c.Database.MaxOpenConns
}
// GetDatabaseMaxIdleConns returns the maximum number of idle connections
func (c *Config) GetDatabaseMaxIdleConns() int {
if c.Database.MaxIdleConns == 0 {
return 5
}
return c.Database.MaxIdleConns
}
// GetDatabaseConnMaxLifetime returns the maximum lifetime of connections
func (c *Config) GetDatabaseConnMaxLifetime() time.Duration {
if c.Database.ConnMaxLifetime == 0 {
return time.Hour
}
return c.Database.ConnMaxLifetime
}
// SetupLogging configures zerolog based on the configuration
func (c *Config) SetupLogging() {
// Parse log level
level := parseLogLevel(c.GetLogLevel())
zerolog.SetGlobalLevel(level)
// Setup log output
c.setupLogOutput()
zerolog.TimeFieldFormat = zerolog.TimeFormatUnix
}
// parseLogLevel converts a string log level to zerolog.Level
func parseLogLevel(level string) zerolog.Level {
switch strings.ToLower(level) {
case "trace":
return zerolog.TraceLevel
case "debug":
return zerolog.DebugLevel
case "info":
return zerolog.InfoLevel
case "warn", "warning":
return zerolog.WarnLevel
case "error":
return zerolog.ErrorLevel
case "fatal":
return zerolog.FatalLevel
case "panic":
return zerolog.PanicLevel
default:
log.Warn().Str("level", level).Msg("Unknown log level, defaulting to trace")
return zerolog.TraceLevel
}
}
// setupLogOutput configures the log output based on configuration
func (c *Config) setupLogOutput() {
output := c.GetLogOutput()
if output == "" {
// Use stderr by default
return
}
// Open the log file
file, err := os.OpenFile(output, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
if err != nil {
log.Error().Err(err).Str("output", output).Msg("Failed to open log file, using stderr")
return
}
// Set the log output
log.Logger = log.Output(file)
log.Trace().Str("output", output).Msg("Logging to file")
}
// WatchAndApply starts watching the config file for changes and applies the
// hot-reloadable subset on every change (ADR-0023 selective hot-reload).
//
// Phase 1 (this PR) reloads:
// - logging.level — re-applies via SetupLogging on every change.
//
// The other fields listed in ADR-0023 (api.v2_enabled, telemetry sampler,
// auth.jwt.ttl) remain restart-only until their handlers land in a follow-up.
//
// Stops when ctx is cancelled. Safe to call once at server startup.
// If the config file is absent (ConfigFileNotFoundError at load time), this
// becomes a no-op and logs a single warning.
func (c *Config) WatchAndApply(ctx context.Context) {
if c.viper == nil {
log.Warn().Msg("Config hot-reload disabled: no viper instance attached")
return
}
if c.viper.ConfigFileUsed() == "" {
log.Info().Msg("Config hot-reload disabled: no config file in use (env-only or defaults)")
return
}
c.viper.OnConfigChange(func(in fsnotify.Event) {
log.Info().Str("event", in.Op.String()).Str("file", in.Name).Msg("Config file changed, reloading hot-reloadable fields")
c.reloadMu.Lock()
defer c.reloadMu.Unlock()
if err := c.viper.Unmarshal(c); err != nil {
log.Error().Err(err).Msg("Hot-reload: failed to unmarshal new config, keeping previous values")
return
}
// Apply hot-reloadable fields. Order matters: logging first so the
// rest of the reload is logged at the right level.
c.SetupLogging()
log.Info().Str("logging_level", c.GetLogLevel()).Msg("Hot-reload applied (logging.level)")
})
c.viper.WatchConfig()
log.Info().Str("file", c.viper.ConfigFileUsed()).Msg("Config hot-reload watcher started (ADR-0023 Phase 1)")
// Stop the watcher on context cancel — viper has no public Stop method,
// so we just clear the callback to make further events no-ops.
go func() {
<-ctx.Done()
c.viper.OnConfigChange(func(_ fsnotify.Event) {})
log.Info().Msg("Config hot-reload watcher stopped")
}()
}