Add readiness endpoint for graceful shutdown coordination
Implement readiness endpoint (/api/ready) that returns:
- {"ready":true} (HTTP 200) during normal operation
- {"ready":false} (HTTP 503) during graceful shutdown
Key changes:
- Added readiness context to control readiness state
- Modified server.NewServer() to accept readiness context
- Implemented handleReadiness() with context-aware logic
- Updated cmd/server/main.go to manage readiness state
- Readiness set to false when shutdown signal received
- Updated test script to validate readiness behavior
- Added comprehensive documentation for readiness endpoint
This allows Kubernetes/service meshes to stop routing traffic
to the pod during graceful shutdown while allowing existing
requests to complete. Health endpoint continues to return
happy status during shutdown for proper orchestration.
This commit is contained in:
32
AGENTS.md
32
AGENTS.md
@@ -225,9 +225,28 @@ curl http://127.0.0.1:9090/api/health
|
||||
```bash
|
||||
# Check health endpoint
|
||||
curl -s http://localhost:8080/api/health
|
||||
|
||||
# Check readiness endpoint
|
||||
curl -s http://localhost:8080/api/ready
|
||||
```
|
||||
|
||||
**Expected response:** `{"status":"healthy"}`
|
||||
**Expected responses:**
|
||||
- Health: `{"status":"healthy"}`
|
||||
- Readiness (normal): `{"ready":true}`
|
||||
- Readiness (during shutdown): `{"ready":false}` with HTTP 503
|
||||
|
||||
**Endpoint Differences:**
|
||||
- **Health endpoint** (`/api/health`): Indicates if the application is running and functional
|
||||
- **Readiness endpoint** (`/api/ready`): Indicates if the application is ready to accept traffic
|
||||
|
||||
**Use Cases:**
|
||||
- **Health**: Used by load balancers to check if the app is alive
|
||||
- **Readiness**: Used by Kubernetes/service meshes to determine if the app can accept new requests
|
||||
|
||||
**During Graceful Shutdown:**
|
||||
- Health endpoint continues to return `{"status":"healthy"}`
|
||||
- Readiness endpoint returns `{"ready":false}` with HTTP 503 Service Unavailable
|
||||
- This allows existing requests to complete while preventing new requests
|
||||
|
||||
### Stopping the Server
|
||||
|
||||
@@ -276,6 +295,17 @@ GET /api/health
|
||||
{"status":"healthy"}
|
||||
```
|
||||
|
||||
### Readiness Check
|
||||
```http
|
||||
GET /api/ready
|
||||
```
|
||||
|
||||
**Responses:**
|
||||
- Normal operation: `{"ready":true}` (HTTP 200)
|
||||
- During shutdown: `{"ready":false}` (HTTP 503 Service Unavailable)
|
||||
|
||||
**Purpose:** Indicates whether the server is ready to accept new requests. Returns false during graceful shutdown to allow existing requests to complete while preventing new ones.
|
||||
|
||||
### Greet Service
|
||||
```http
|
||||
GET /api/v1/greet/
|
||||
|
||||
@@ -127,6 +127,9 @@ go run ./cmd/server
|
||||
curl http://localhost:8080/api/health
|
||||
# Output: {"status":"healthy"}
|
||||
|
||||
curl http://localhost:8080/api/ready
|
||||
# Output: {"ready":true}
|
||||
|
||||
curl http://localhost:8080/api/v1/greet
|
||||
# Output: {"message":"Hello world!"}
|
||||
|
||||
|
||||
@@ -61,8 +61,12 @@ func main() {
|
||||
// Create ongoing context for active requests
|
||||
ongoingCtx, stopOngoingGracefully := context.WithCancel(context.Background())
|
||||
|
||||
// Create readiness context to control readiness state
|
||||
readyCtx, readyCancel := context.WithCancel(context.Background())
|
||||
defer readyCancel()
|
||||
|
||||
// Start server in goroutine
|
||||
server := server.NewServer(cfg)
|
||||
server := server.NewServer(cfg, readyCtx)
|
||||
serverCtx, serverStop := context.WithCancel(ctx)
|
||||
|
||||
go func() {
|
||||
@@ -87,7 +91,11 @@ func main() {
|
||||
<-rootCtx.Done()
|
||||
stop()
|
||||
log.Info().Msg("Shutdown signal received")
|
||||
|
||||
|
||||
// Cancel readiness context to stop accepting new requests
|
||||
readyCancel()
|
||||
log.Info().Msg("Readiness set to false, no longer accepting new requests")
|
||||
|
||||
// Give time for readiness check to propagate (simplified for our case)
|
||||
time.Sleep(1 * time.Second)
|
||||
log.Info().Msg("Readiness check propagated, now waiting for ongoing requests to finish.")
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
|
||||
"DanceLessonsCoach/pkg/config"
|
||||
@@ -12,12 +13,14 @@ import (
|
||||
)
|
||||
|
||||
type Server struct {
|
||||
router *chi.Mux
|
||||
router *chi.Mux
|
||||
readyCtx context.Context
|
||||
}
|
||||
|
||||
func NewServer(cfg *config.Config) *Server {
|
||||
func NewServer(cfg *config.Config, readyCtx context.Context) *Server {
|
||||
s := &Server{
|
||||
router: chi.NewRouter(),
|
||||
router: chi.NewRouter(),
|
||||
readyCtx: readyCtx,
|
||||
}
|
||||
s.setupRoutes()
|
||||
return s
|
||||
@@ -29,10 +32,13 @@ func (s *Server) setupRoutes() {
|
||||
Logger: &log.Logger,
|
||||
NoColor: false,
|
||||
}))
|
||||
|
||||
|
||||
// Health endpoint at root level
|
||||
s.router.Get("/api/health", s.handleHealth)
|
||||
|
||||
|
||||
// Readiness endpoint at root level
|
||||
s.router.Get("/api/ready", s.handleReadiness)
|
||||
|
||||
// API routes
|
||||
s.router.Route("/api/v1", func(r chi.Router) {
|
||||
r.Use(s.apiMiddlewares()...)
|
||||
@@ -60,6 +66,20 @@ func (s *Server) handleHealth(w http.ResponseWriter, r *http.Request) {
|
||||
w.Write([]byte(`{"status":"healthy"}`))
|
||||
}
|
||||
|
||||
func (s *Server) handleReadiness(w http.ResponseWriter, r *http.Request) {
|
||||
log.Info().Msg("Readiness check requested")
|
||||
|
||||
select {
|
||||
case <-s.readyCtx.Done():
|
||||
log.Info().Msg("Readiness check: not ready (shutting down)")
|
||||
w.WriteHeader(http.StatusServiceUnavailable)
|
||||
w.Write([]byte(`{"ready":false}`))
|
||||
default:
|
||||
log.Info().Msg("Readiness check: ready")
|
||||
w.Write([]byte(`{"ready":true}`))
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Server) Router() http.Handler {
|
||||
return s.router
|
||||
}
|
||||
|
||||
@@ -14,8 +14,7 @@ PID_FILE="server.pid"
|
||||
TEST_LOG="shutdown_test.log"
|
||||
|
||||
# Colors for output - use simple echo -e with inline ANSI codes
|
||||
|
||||
echo -e "\\033[1;34m=== DanceLessonsCoach Graceful Shutdown Test ===\\033[0m"
|
||||
echo -e "\033[1;34m=== DanceLessonsCoach Graceful Shutdown Test ===\033[0m"
|
||||
echo ""
|
||||
|
||||
# Clean up any existing server
|
||||
@@ -42,6 +41,11 @@ echo "Testing /api/health:"
|
||||
HEALTH_RESPONSE=$(curl -s http://localhost:8080/api/health)
|
||||
echo "Response: $HEALTH_RESPONSE"
|
||||
|
||||
# Test readiness endpoint
|
||||
echo "Testing /api/ready:"
|
||||
READY_RESPONSE=$(curl -s http://localhost:8080/api/ready)
|
||||
echo "Response: $READY_RESPONSE"
|
||||
|
||||
# Test greet endpoint
|
||||
echo "Testing /api/v1/greet/:"
|
||||
GREET_RESPONSE=$(curl -s http://localhost:8080/api/v1/greet/)
|
||||
@@ -54,6 +58,10 @@ echo "Response: $GREET_NAME_RESPONSE"
|
||||
|
||||
echo ""
|
||||
echo "Stopping server gracefully..."
|
||||
|
||||
# Test readiness during shutdown (in background)
|
||||
(curl -s http://localhost:8080/api/ready > /dev/null 2>&1 &)
|
||||
|
||||
$SERVER_CMD stop
|
||||
sleep 3
|
||||
|
||||
@@ -62,95 +70,113 @@ echo "Analyzing server logs..."
|
||||
|
||||
# Check if log file exists and is not empty
|
||||
if [ ! -f "$LOG_FILE" ] || [ ! -s "$LOG_FILE" ]; then
|
||||
echo -e "\\033[0;31m❌ FAIL: Log file is missing or empty\\033[0m"
|
||||
echo -e "\033[0;31m❌ FAIL: Log file is missing or empty\033[0m"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Validate all lines are proper JSON
|
||||
if ! cat "$LOG_FILE" | jq -e '.' >/dev/null 2>&1; then
|
||||
echo -e "\\033[0;31m❌ FAIL: Some log lines are not valid JSON\\033[0m"
|
||||
echo -e "\033[0;31m❌ FAIL: Some log lines are not valid JSON\033[0m"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo -e "\\033[0;32m✅ All log lines are valid JSON\033[0m"
|
||||
echo -e "\033[0;32m✅ All log lines are valid JSON\033[0m"
|
||||
|
||||
# Check for required startup logs
|
||||
if ! grep -q "Config file loaded" "$LOG_FILE"; then
|
||||
echo -e "\\033[0;31m❌ FAIL: Missing 'Config file loaded' log\033[0m"
|
||||
echo -e "\033[0;31m❌ FAIL: Missing 'Config file loaded' log\033[0m"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! grep -q "Configuration loaded" "$LOG_FILE"; then
|
||||
echo -e "\\033[0;31m❌ FAIL: Missing 'Configuration loaded' log\033[0m"
|
||||
echo -e "\033[0;31m❌ FAIL: Missing 'Configuration loaded' log\033[0m"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! grep -q "Logging configured" "$LOG_FILE"; then
|
||||
echo -e "\\033[0;31m❌ FAIL: Missing 'Logging configured' log\033[0m"
|
||||
echo -e "\033[0;31m❌ FAIL: Missing 'Logging configured' log\033[0m"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! grep -q "Registering greet routes" "$LOG_FILE"; then
|
||||
echo -e "\\033[0;31m❌ FAIL: Missing 'Registering greet routes' log\033[0m"
|
||||
echo -e "\033[0;31m❌ FAIL: Missing 'Registering greet routes' log\033[0m"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! grep -q "Server running" "$LOG_FILE"; then
|
||||
echo -e "\\033[0;31m❌ FAIL: Missing 'Server running' log\033[0m"
|
||||
echo -e "\033[0;31m❌ FAIL: Missing 'Server running' log\033[0m"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo -e "\\033[0;32m✅ All startup logs present\033[0m"
|
||||
echo -e "\033[0;32m✅ All startup logs present\033[0m"
|
||||
|
||||
# Check for readiness logs
|
||||
if ! grep -q "Readiness check: ready" "$LOG_FILE"; then
|
||||
echo -e "\033[0;31m❌ FAIL: Missing 'Readiness check: ready' log\033[0m"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! grep -q "Readiness check: not ready" "$LOG_FILE"; then
|
||||
echo -e "\033[0;31m❌ FAIL: Missing 'Readiness check: not ready' log\033[0m"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo -e "\033[0;32m✅ All readiness logs present\033[0m"
|
||||
|
||||
# Check for API call logs
|
||||
if ! grep -q "Health check requested" "$LOG_FILE"; then
|
||||
echo -e "\\033[0;31m❌ FAIL: Missing 'Health check requested' log\033[0m"
|
||||
echo -e "\033[0;31m❌ FAIL: Missing 'Health check requested' log\033[0m"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! grep -q "TestUser" "$LOG_FILE"; then
|
||||
echo -e "\\033[0;31m❌ FAIL: Missing 'TestUser' greet log\033[0m"
|
||||
echo -e "\033[0;31m❌ FAIL: Missing 'TestUser' greet log\033[0m"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo -e "\\033[0;32m✅ All API call logs present\033[0m"
|
||||
echo -e "\033[0;32m✅ All API call logs present\033[0m"
|
||||
|
||||
# Check for required shutdown logs - THIS IS THE CRITICAL PART
|
||||
MISSING_LOGS=0
|
||||
|
||||
if ! grep -q "Shutdown signal received" "$LOG_FILE"; then
|
||||
echo -e "\\033[0;31m❌ FAIL: Missing 'Shutdown signal received' log\033[0m"
|
||||
echo -e "\033[0;31m❌ FAIL: Missing 'Shutdown signal received' log\033[0m"
|
||||
MISSING_LOGS=1
|
||||
fi
|
||||
|
||||
if ! grep -q "Readiness set to false" "$LOG_FILE"; then
|
||||
echo -e "\033[0;31m❌ FAIL: Missing 'Readiness set to false' log\033[0m"
|
||||
MISSING_LOGS=1
|
||||
fi
|
||||
|
||||
if ! grep -q "Readiness check propagated" "$LOG_FILE"; then
|
||||
echo -e "\\033[0;31m❌ FAIL: Missing 'Readiness check propagated' log\033[0m"
|
||||
echo -e "\033[0;31m❌ FAIL: Missing 'Readiness check propagated' log\033[0m"
|
||||
MISSING_LOGS=1
|
||||
fi
|
||||
|
||||
if ! grep -q "Server shutdown complete" "$LOG_FILE"; then
|
||||
echo -e "\\033[0;31m❌ FAIL: Missing 'Server shutdown complete' log\033[0m"
|
||||
echo -e "\033[0;31m❌ FAIL: Missing 'Server shutdown complete' log\033[0m"
|
||||
MISSING_LOGS=1
|
||||
fi
|
||||
|
||||
if ! grep -q "Server exited" "$LOG_FILE"; then
|
||||
echo -e "\\033[0;31m❌ FAIL: Missing 'Server exited' log\033[0m"
|
||||
echo -e "\033[0;31m❌ FAIL: Missing 'Server exited' log\033[0m"
|
||||
MISSING_LOGS=1
|
||||
fi
|
||||
|
||||
if [ $MISSING_LOGS -eq 1 ]; then
|
||||
echo ""
|
||||
echo -e "\\033[1;33mCurrent log file contents:\033[0m"
|
||||
echo "================================"
|
||||
echo "Current log file contents:"
|
||||
echo "==============================="
|
||||
cat "$LOG_FILE"
|
||||
echo "================================"
|
||||
echo "==============================="
|
||||
echo ""
|
||||
echo -e "\\033[0;31m❌ GRACEFUL SHUTDOWN TEST FAILED\033[0m"
|
||||
echo -e "\033[0;31m❌ GRACEFUL SHUTDOWN TEST FAILED\033[0m"
|
||||
echo "Shutdown logs are missing!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo -e "\\033[0;32m✅ All shutdown logs present\033[0m"
|
||||
echo -e "\033[0;32m✅ All shutdown logs present\033[0m"
|
||||
|
||||
# Additional validation: Check that logs appear in correct order
|
||||
echo "Validating log sequence..."
|
||||
@@ -163,21 +189,21 @@ if echo "$LOG_MESSAGES" | grep -n "Server running" | head -1 | cut -d: -f1 | xar
|
||||
# Check that API calls come before shutdown
|
||||
HEALTH_LINE=$(echo "$LOG_MESSAGES" | grep -n "Health check requested" | head -1 | cut -d: -f1)
|
||||
SHUTDOWN_LINE=$(echo "$LOG_MESSAGES" | grep -n "Shutdown signal received" | head -1 | cut -d: -f1)
|
||||
|
||||
|
||||
if [ "$HEALTH_LINE" -lt "$SHUTDOWN_LINE" ]; then
|
||||
echo -e "\\033[0;32m✅ Log sequence is correct\033[0m"
|
||||
echo -e "\033[0;32m✅ Log sequence is correct\033[0m"
|
||||
else
|
||||
echo -e "\\033[0;31m❌ FAIL: Log sequence is incorrect\033[0m"
|
||||
echo -e "\033[0;31m❌ FAIL: Log sequence is incorrect\033[0m"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo -e "\\033[0;32m🎉 GRACEFUL SHUTDOWN TEST PASSED!\033[0m"
|
||||
echo -e "\033[0;32m🎉 GRACEFUL SHUTDOWN TEST PASSED!\033[0m"
|
||||
echo "All required logs are present and in correct order."
|
||||
echo ""
|
||||
|
||||
# Clean up
|
||||
rm -f "$PID_FILE" "$LOG_FILE"
|
||||
|
||||
exit 0
|
||||
exit 0
|
||||
|
||||
Reference in New Issue
Block a user