Files
dance-lessons-coach/pkg/telemetry/telemetry.go
Gabriel Radureau c577e2603c feat(telemetry): ReconfigureTracerProvider for sampler hot-reload (ADR-0023 Phase 3, sub-phase 3.1)
First sub-phase of ADR-0023 Phase 3 (telemetry sampler hot-reload), per
the Mistral-produced phase plan validated 2026-05-05 (Q-037 in
mistral-quirks.md).

This is the isolated telemetry-package change: adds ReconfigureTracerProvider
that builds a new TracerProvider with updated sampler settings, swaps the
global, and gracefully shuts down the old. No-op when oldTP is nil
(telemetry-disabled-at-startup is out of scope for Phase 3).

The wiring (config callback → server-level invocation) lands in sub-phases
3.2 and 3.3 — kept separate for clean rollback semantics.

Tests:
- 3 new tests in pkg/telemetry/telemetry_test.go covering nil no-op, the
  global TP swap, and error-tolerance when old TP shutdown fails.
- go test -race ./pkg/telemetry/... passes.

Verifier verdict (skill-driven, mental run): APPROVE. Function is 17 lines,
single responsibility, defensive on nil; tests cover positive + invariant
+ tolerance.
2026-05-05 09:26:54 +02:00

133 lines
4.0 KiB
Go

// Package telemetry provides OpenTelemetry instrumentation for the dance-lessons-coach application
package telemetry
import (
"context"
"log"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
"go.opentelemetry.io/otel/propagation"
"go.opentelemetry.io/otel/sdk/resource"
sdktrace "go.opentelemetry.io/otel/sdk/trace"
semconv "go.opentelemetry.io/otel/semconv/v1.21.0"
"go.opentelemetry.io/otel/trace"
)
// Setup initializes OpenTelemetry tracing with the given configuration
type Setup struct {
ServiceName string
OTLPEndpoint string
Insecure bool
SamplerType string
SamplerRatio float64
Version string
}
// InitializeTracing sets up OpenTelemetry tracing provider
func (s *Setup) InitializeTracing(ctx context.Context) (*sdktrace.TracerProvider, error) {
// Create OTLP gRPC exporter
exporter, err := otlptracegrpc.New(ctx,
otlptracegrpc.WithEndpoint(s.OTLPEndpoint),
otlptracegrpc.WithInsecure(),
)
if err != nil {
return nil, err
}
// Create resource with service name and version
res, err := resource.New(ctx,
resource.WithAttributes(
semconv.ServiceName(s.ServiceName),
semconv.ServiceVersion(s.Version),
),
)
if err != nil {
return nil, err
}
// Create sampler based on configuration
sampler := s.getSampler()
// Create trace provider
tp := sdktrace.NewTracerProvider(
sdktrace.WithBatcher(exporter),
sdktrace.WithResource(res),
sdktrace.WithSampler(sampler),
)
// Set global tracer provider and propagator
otel.SetTracerProvider(tp)
otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator(
propagation.TraceContext{},
propagation.Baggage{},
))
return tp, nil
}
// Shutdown performs cleanup of the tracer provider
func Shutdown(ctx context.Context, tp *sdktrace.TracerProvider) error {
if tp == nil {
return nil
}
return tp.Shutdown(ctx)
}
// ReconfigureTracerProvider rebuilds the global tracer provider with the
// updated sampler settings (ADR-0023 Phase 3 hot-reload). The previous
// provider is gracefully shut down so in-flight spans are flushed.
//
// No-op if oldTP is nil — telemetry was disabled at startup, hot-reloading
// it on would require a different code path (out of scope for Phase 3).
//
// Returns the new TracerProvider so the caller can track it for the next
// shutdown / reconfigure cycle. On error the old TP is left in place.
func (s *Setup) ReconfigureTracerProvider(ctx context.Context, oldTP *sdktrace.TracerProvider) (*sdktrace.TracerProvider, error) {
if oldTP == nil {
return nil, nil
}
// Build the new provider first — if anything fails we keep the old TP active.
newTP, err := s.InitializeTracing(ctx)
if err != nil {
return nil, err
}
// InitializeTracing already swapped the global provider via otel.SetTracerProvider,
// so the new one is now active. Drain the old one so no spans are lost.
if shutdownErr := oldTP.Shutdown(ctx); shutdownErr != nil {
// Log via the standard logger — zerolog isn't imported in this package.
log.Printf("ReconfigureTracerProvider: old TP shutdown failed: %v (new TP is active)", shutdownErr)
}
return newTP, nil
}
// getSampler returns the appropriate sampler based on configuration
func (s *Setup) getSampler() sdktrace.Sampler {
switch s.SamplerType {
case "always_on":
return sdktrace.AlwaysSample()
case "always_off":
return sdktrace.NeverSample()
case "traceidratio":
return sdktrace.TraceIDRatioBased(s.SamplerRatio)
case "parentbased_always_on":
return sdktrace.ParentBased(sdktrace.AlwaysSample())
case "parentbased_always_off":
return sdktrace.ParentBased(sdktrace.NeverSample())
case "parentbased_traceidratio":
return sdktrace.ParentBased(sdktrace.TraceIDRatioBased(s.SamplerRatio))
default:
log.Printf("Unknown sampler type: %s, defaulting to always_on", s.SamplerType)
return sdktrace.AlwaysSample()
}
}
// GetTracer returns a named tracer from the global provider
// Returns a no-op tracer if OpenTelemetry is not initialized
func GetTracer(name string) trace.Tracer {
return otel.Tracer(name)
}