Add readiness endpoint for graceful shutdown coordination

Implement readiness endpoint (/api/ready) that returns:
- {"ready":true} (HTTP 200) during normal operation
- {"ready":false} (HTTP 503) during graceful shutdown

Key changes:
- Added readiness context to control readiness state
- Modified server.NewServer() to accept readiness context
- Implemented handleReadiness() with context-aware logic
- Updated cmd/server/main.go to manage readiness state
- Readiness set to false when shutdown signal received
- Updated test script to validate readiness behavior
- Added comprehensive documentation for readiness endpoint

This allows Kubernetes/service meshes to stop routing traffic
to the pod during graceful shutdown while allowing existing
requests to complete. Health endpoint continues to return
happy status during shutdown for proper orchestration.
This commit is contained in:
Gabriel Radureau
2026-04-03 19:53:14 +02:00
parent 7c5e61c386
commit f986711974
5 changed files with 123 additions and 36 deletions

View File

@@ -225,9 +225,28 @@ curl http://127.0.0.1:9090/api/health
```bash
# Check health endpoint
curl -s http://localhost:8080/api/health
# Check readiness endpoint
curl -s http://localhost:8080/api/ready
```
**Expected response:** `{"status":"healthy"}`
**Expected responses:**
- Health: `{"status":"healthy"}`
- Readiness (normal): `{"ready":true}`
- Readiness (during shutdown): `{"ready":false}` with HTTP 503
**Endpoint Differences:**
- **Health endpoint** (`/api/health`): Indicates if the application is running and functional
- **Readiness endpoint** (`/api/ready`): Indicates if the application is ready to accept traffic
**Use Cases:**
- **Health**: Used by load balancers to check if the app is alive
- **Readiness**: Used by Kubernetes/service meshes to determine if the app can accept new requests
**During Graceful Shutdown:**
- Health endpoint continues to return `{"status":"healthy"}`
- Readiness endpoint returns `{"ready":false}` with HTTP 503 Service Unavailable
- This allows existing requests to complete while preventing new requests
### Stopping the Server
@@ -276,6 +295,17 @@ GET /api/health
{"status":"healthy"}
```
### Readiness Check
```http
GET /api/ready
```
**Responses:**
- Normal operation: `{"ready":true}` (HTTP 200)
- During shutdown: `{"ready":false}` (HTTP 503 Service Unavailable)
**Purpose:** Indicates whether the server is ready to accept new requests. Returns false during graceful shutdown to allow existing requests to complete while preventing new ones.
### Greet Service
```http
GET /api/v1/greet/

View File

@@ -127,6 +127,9 @@ go run ./cmd/server
curl http://localhost:8080/api/health
# Output: {"status":"healthy"}
curl http://localhost:8080/api/ready
# Output: {"ready":true}
curl http://localhost:8080/api/v1/greet
# Output: {"message":"Hello world!"}

View File

@@ -61,8 +61,12 @@ func main() {
// Create ongoing context for active requests
ongoingCtx, stopOngoingGracefully := context.WithCancel(context.Background())
// Create readiness context to control readiness state
readyCtx, readyCancel := context.WithCancel(context.Background())
defer readyCancel()
// Start server in goroutine
server := server.NewServer(cfg)
server := server.NewServer(cfg, readyCtx)
serverCtx, serverStop := context.WithCancel(ctx)
go func() {
@@ -87,7 +91,11 @@ func main() {
<-rootCtx.Done()
stop()
log.Info().Msg("Shutdown signal received")
// Cancel readiness context to stop accepting new requests
readyCancel()
log.Info().Msg("Readiness set to false, no longer accepting new requests")
// Give time for readiness check to propagate (simplified for our case)
time.Sleep(1 * time.Second)
log.Info().Msg("Readiness check propagated, now waiting for ongoing requests to finish.")

View File

@@ -1,6 +1,7 @@
package server
import (
"context"
"net/http"
"DanceLessonsCoach/pkg/config"
@@ -12,12 +13,14 @@ import (
)
type Server struct {
router *chi.Mux
router *chi.Mux
readyCtx context.Context
}
func NewServer(cfg *config.Config) *Server {
func NewServer(cfg *config.Config, readyCtx context.Context) *Server {
s := &Server{
router: chi.NewRouter(),
router: chi.NewRouter(),
readyCtx: readyCtx,
}
s.setupRoutes()
return s
@@ -29,10 +32,13 @@ func (s *Server) setupRoutes() {
Logger: &log.Logger,
NoColor: false,
}))
// Health endpoint at root level
s.router.Get("/api/health", s.handleHealth)
// Readiness endpoint at root level
s.router.Get("/api/ready", s.handleReadiness)
// API routes
s.router.Route("/api/v1", func(r chi.Router) {
r.Use(s.apiMiddlewares()...)
@@ -60,6 +66,20 @@ func (s *Server) handleHealth(w http.ResponseWriter, r *http.Request) {
w.Write([]byte(`{"status":"healthy"}`))
}
func (s *Server) handleReadiness(w http.ResponseWriter, r *http.Request) {
log.Info().Msg("Readiness check requested")
select {
case <-s.readyCtx.Done():
log.Info().Msg("Readiness check: not ready (shutting down)")
w.WriteHeader(http.StatusServiceUnavailable)
w.Write([]byte(`{"ready":false}`))
default:
log.Info().Msg("Readiness check: ready")
w.Write([]byte(`{"ready":true}`))
}
}
func (s *Server) Router() http.Handler {
return s.router
}

View File

@@ -14,8 +14,7 @@ PID_FILE="server.pid"
TEST_LOG="shutdown_test.log"
# Colors for output - use simple echo -e with inline ANSI codes
echo -e "\\033[1;34m=== DanceLessonsCoach Graceful Shutdown Test ===\\033[0m"
echo -e "\033[1;34m=== DanceLessonsCoach Graceful Shutdown Test ===\033[0m"
echo ""
# Clean up any existing server
@@ -42,6 +41,11 @@ echo "Testing /api/health:"
HEALTH_RESPONSE=$(curl -s http://localhost:8080/api/health)
echo "Response: $HEALTH_RESPONSE"
# Test readiness endpoint
echo "Testing /api/ready:"
READY_RESPONSE=$(curl -s http://localhost:8080/api/ready)
echo "Response: $READY_RESPONSE"
# Test greet endpoint
echo "Testing /api/v1/greet/:"
GREET_RESPONSE=$(curl -s http://localhost:8080/api/v1/greet/)
@@ -54,6 +58,10 @@ echo "Response: $GREET_NAME_RESPONSE"
echo ""
echo "Stopping server gracefully..."
# Test readiness during shutdown (in background)
(curl -s http://localhost:8080/api/ready > /dev/null 2>&1 &)
$SERVER_CMD stop
sleep 3
@@ -62,95 +70,113 @@ echo "Analyzing server logs..."
# Check if log file exists and is not empty
if [ ! -f "$LOG_FILE" ] || [ ! -s "$LOG_FILE" ]; then
echo -e "\\033[0;31m❌ FAIL: Log file is missing or empty\\033[0m"
echo -e "\033[0;31m❌ FAIL: Log file is missing or empty\033[0m"
exit 1
fi
# Validate all lines are proper JSON
if ! cat "$LOG_FILE" | jq -e '.' >/dev/null 2>&1; then
echo -e "\\033[0;31m❌ FAIL: Some log lines are not valid JSON\\033[0m"
echo -e "\033[0;31m❌ FAIL: Some log lines are not valid JSON\033[0m"
exit 1
fi
echo -e "\\033[0;32m✅ All log lines are valid JSON\033[0m"
echo -e "\033[0;32m✅ All log lines are valid JSON\033[0m"
# Check for required startup logs
if ! grep -q "Config file loaded" "$LOG_FILE"; then
echo -e "\\033[0;31m❌ FAIL: Missing 'Config file loaded' log\033[0m"
echo -e "\033[0;31m❌ FAIL: Missing 'Config file loaded' log\033[0m"
exit 1
fi
if ! grep -q "Configuration loaded" "$LOG_FILE"; then
echo -e "\\033[0;31m❌ FAIL: Missing 'Configuration loaded' log\033[0m"
echo -e "\033[0;31m❌ FAIL: Missing 'Configuration loaded' log\033[0m"
exit 1
fi
if ! grep -q "Logging configured" "$LOG_FILE"; then
echo -e "\\033[0;31m❌ FAIL: Missing 'Logging configured' log\033[0m"
echo -e "\033[0;31m❌ FAIL: Missing 'Logging configured' log\033[0m"
exit 1
fi
if ! grep -q "Registering greet routes" "$LOG_FILE"; then
echo -e "\\033[0;31m❌ FAIL: Missing 'Registering greet routes' log\033[0m"
echo -e "\033[0;31m❌ FAIL: Missing 'Registering greet routes' log\033[0m"
exit 1
fi
if ! grep -q "Server running" "$LOG_FILE"; then
echo -e "\\033[0;31m❌ FAIL: Missing 'Server running' log\033[0m"
echo -e "\033[0;31m❌ FAIL: Missing 'Server running' log\033[0m"
exit 1
fi
echo -e "\\033[0;32m✅ All startup logs present\033[0m"
echo -e "\033[0;32m✅ All startup logs present\033[0m"
# Check for readiness logs
if ! grep -q "Readiness check: ready" "$LOG_FILE"; then
echo -e "\033[0;31m❌ FAIL: Missing 'Readiness check: ready' log\033[0m"
exit 1
fi
if ! grep -q "Readiness check: not ready" "$LOG_FILE"; then
echo -e "\033[0;31m❌ FAIL: Missing 'Readiness check: not ready' log\033[0m"
exit 1
fi
echo -e "\033[0;32m✅ All readiness logs present\033[0m"
# Check for API call logs
if ! grep -q "Health check requested" "$LOG_FILE"; then
echo -e "\\033[0;31m❌ FAIL: Missing 'Health check requested' log\033[0m"
echo -e "\033[0;31m❌ FAIL: Missing 'Health check requested' log\033[0m"
exit 1
fi
if ! grep -q "TestUser" "$LOG_FILE"; then
echo -e "\\033[0;31m❌ FAIL: Missing 'TestUser' greet log\033[0m"
echo -e "\033[0;31m❌ FAIL: Missing 'TestUser' greet log\033[0m"
exit 1
fi
echo -e "\\033[0;32m✅ All API call logs present\033[0m"
echo -e "\033[0;32m✅ All API call logs present\033[0m"
# Check for required shutdown logs - THIS IS THE CRITICAL PART
MISSING_LOGS=0
if ! grep -q "Shutdown signal received" "$LOG_FILE"; then
echo -e "\\033[0;31m❌ FAIL: Missing 'Shutdown signal received' log\033[0m"
echo -e "\033[0;31m❌ FAIL: Missing 'Shutdown signal received' log\033[0m"
MISSING_LOGS=1
fi
if ! grep -q "Readiness set to false" "$LOG_FILE"; then
echo -e "\033[0;31m❌ FAIL: Missing 'Readiness set to false' log\033[0m"
MISSING_LOGS=1
fi
if ! grep -q "Readiness check propagated" "$LOG_FILE"; then
echo -e "\\033[0;31m❌ FAIL: Missing 'Readiness check propagated' log\033[0m"
echo -e "\033[0;31m❌ FAIL: Missing 'Readiness check propagated' log\033[0m"
MISSING_LOGS=1
fi
if ! grep -q "Server shutdown complete" "$LOG_FILE"; then
echo -e "\\033[0;31m❌ FAIL: Missing 'Server shutdown complete' log\033[0m"
echo -e "\033[0;31m❌ FAIL: Missing 'Server shutdown complete' log\033[0m"
MISSING_LOGS=1
fi
if ! grep -q "Server exited" "$LOG_FILE"; then
echo -e "\\033[0;31m❌ FAIL: Missing 'Server exited' log\033[0m"
echo -e "\033[0;31m❌ FAIL: Missing 'Server exited' log\033[0m"
MISSING_LOGS=1
fi
if [ $MISSING_LOGS -eq 1 ]; then
echo ""
echo -e "\\033[1;33mCurrent log file contents:\033[0m"
echo "================================"
echo "Current log file contents:"
echo "==============================="
cat "$LOG_FILE"
echo "================================"
echo "==============================="
echo ""
echo -e "\\033[0;31m❌ GRACEFUL SHUTDOWN TEST FAILED\033[0m"
echo -e "\033[0;31m❌ GRACEFUL SHUTDOWN TEST FAILED\033[0m"
echo "Shutdown logs are missing!"
exit 1
fi
echo -e "\\033[0;32m✅ All shutdown logs present\033[0m"
echo -e "\033[0;32m✅ All shutdown logs present\033[0m"
# Additional validation: Check that logs appear in correct order
echo "Validating log sequence..."
@@ -163,21 +189,21 @@ if echo "$LOG_MESSAGES" | grep -n "Server running" | head -1 | cut -d: -f1 | xar
# Check that API calls come before shutdown
HEALTH_LINE=$(echo "$LOG_MESSAGES" | grep -n "Health check requested" | head -1 | cut -d: -f1)
SHUTDOWN_LINE=$(echo "$LOG_MESSAGES" | grep -n "Shutdown signal received" | head -1 | cut -d: -f1)
if [ "$HEALTH_LINE" -lt "$SHUTDOWN_LINE" ]; then
echo -e "\\033[0;32m✅ Log sequence is correct\033[0m"
echo -e "\033[0;32m✅ Log sequence is correct\033[0m"
else
echo -e "\\033[0;31m❌ FAIL: Log sequence is incorrect\033[0m"
echo -e "\033[0;31m❌ FAIL: Log sequence is incorrect\033[0m"
exit 1
fi
fi
echo ""
echo -e "\\033[0;32m🎉 GRACEFUL SHUTDOWN TEST PASSED!\033[0m"
echo -e "\033[0;32m🎉 GRACEFUL SHUTDOWN TEST PASSED!\033[0m"
echo "All required logs are present and in correct order."
echo ""
# Clean up
rm -f "$PID_FILE" "$LOG_FILE"
exit 0
exit 0