1 Commits

Author SHA1 Message Date
b395d2b2d4 setup gcs backup bucket for longhorn 2025-08-31 20:50:28 +02:00
111 changed files with 749 additions and 11000 deletions

View File

@@ -1,6 +1,6 @@
---
# template source: https://github.com/bretfisher/docker-build-workflow/blob/main/templates/call-docker-build.yaml
name: IAC
name: Postgres
on: #[push,pull_request]
workflow_dispatch: {}
@@ -19,20 +19,18 @@ concurrency:
.vault_step: &vault_step
name: read vault secret
uses: https://gitea.arcodange.lab/arcodange-org/vault-action.git@main
uses: https://gitea.arcodange.duckdns.org/arcodange-org/vault-action.git@main
id: vault-secrets
with:
url: https://vault.arcodange.lab
caCertificate: ${{ secrets.HOMELAB_CA_CERT }}
url: https://vault.arcodange.duckdns.org
jwtGiteaOIDC: ${{ needs.gitea_vault_auth.outputs.gitea_vault_jwt }}
role: gitea_cicd
method: jwt
path: gitea_jwt
secrets: |
kvv1/google/credentials credentials | GOOGLE_CREDENTIALS ;
kvv1/admin/gitea token | GITEA_TOKEN ;
kvv1/admin/cloudflare iam_token | CLOUDFLARE_API_TOKEN ;
kvv1/admin/ovh/app * | OVH_ ;
kvv1/google/credentials credentials | GOOGLE_BACKEND_CREDENTIALS ;
kvv1/admin/gitea token | GITEA_TOKEN
jobs:
gitea_vault_auth:
name: Auth with gitea for vault
@@ -54,12 +52,9 @@ jobs:
env:
OPENTOFU_VERSION: 1.8.2
TERRAFORM_VAULT_AUTH_JWT: ${{ needs.gitea_vault_auth.outputs.gitea_vault_jwt }}
VAULT_CACERT: "${{ github.workspace }}/homelab.pem"
steps:
- *vault_step
- uses: actions/checkout@v4
- name: prepare vault self signed cert
run: echo -n "${{ secrets.HOMELAB_CA_CERT }}" | base64 -d > $VAULT_CACERT
- name: terraform apply
uses: dflook/terraform-apply@v1
with:

View File

@@ -17,11 +17,10 @@ concurrency:
.vault_step: &vault_step
name: read vault secret
uses: https://gitea.arcodange.lab/arcodange-org/vault-action.git@main
uses: https://gitea.arcodange.duckdns.org/arcodange-org/vault-action.git@main
id: vault-secrets
with:
url: https://vault.arcodange.lab
caCertificate: ${{ secrets.HOMELAB_CA_CERT }}
url: https://vault.arcodange.duckdns.org
jwtGiteaOIDC: ${{ needs.gitea_vault_auth.outputs.gitea_vault_jwt }}
role: gitea_cicd
method: jwt
@@ -51,12 +50,9 @@ jobs:
env:
OPENTOFU_VERSION: 1.8.2
TERRAFORM_VAULT_AUTH_JWT: ${{ needs.gitea_vault_auth.outputs.gitea_vault_jwt }}
VAULT_CACERT: "${{ github.workspace }}/homelab.pem"
steps:
- *vault_step
- uses: actions/checkout@v4
- name: prepare vault self signed cert
run: echo -n "${{ secrets.HOMELAB_CA_CERT }}" | base64 -d > $VAULT_CACERT
- name: terraform apply
uses: dflook/terraform-apply@v1
with:

1
.gitignore vendored
View File

@@ -2,4 +2,3 @@
.terraform.*
.DS_Store
node_modules/
.venv/

View File

@@ -1,17 +1,5 @@
# Use Ansible
## Run locally (uv)
A project-local venv is defined in `pyproject.toml` at the repo root (ansible-core + the `kubernetes`, `jmespath`, `dnspython` libraries that `kubernetes.core` and friends need at runtime).
```sh
uv sync # creates .venv/ and installs ansible-core + python deps
uv run ansible-galaxy collection install -r ansible/requirements.yml
uv run ansible-playbook -i ansible/arcodange/factory/inventory ansible/arcodange/factory/playbooks/<playbook>.yml
```
The localhost entry in the inventory uses `ansible_python_interpreter: "{{ ansible_playbook_python }}"`, so `uv run` is enough — Ansible picks up the venv's Python automatically without any hardcoded path.
## Run with docker ssh agent side proxy
### build docker images
@@ -79,25 +67,31 @@ ansible -i ,localhost -c local localhost -m raw -a "echo hello world {{ inventor
### local python environment with uv
#### Install UV (one-time)
#### Install UV
`python3 -m pip install uv`
`python3 -m uv python install 3.10 3.11 3.12`
`echo "export PATH=\"$(find ~/Library/Python/*/bin/uv | xargs dirname)\"" >> ~/.zshenv`
`echo 'export PATH="~/.local/bin:$PATH"' >> ~/.zshenv`
#### Set python version to 3.12
`uv python pin 3.12` (edit .python-version file)
#### Install ansible
`uv tool install ansible-core --with dnspython --with jmespath --with kubernetes`
`echo 'export PATH="~/.local/share/uv/tools/ansible-core/bin:$PATH"' >> ~/.zshenv`
#### Install this project depedencies
```sh
python3 -m pip install uv
python3 -m uv python install 3.12
echo 'export PATH="$HOME/.local/bin:$PATH"' >> ~/.zshenv
```
#### Bootstrap the project venv
```sh
uv sync # honors .python-version (3.12) and pyproject.toml
uv run ansible-galaxy collection install -r ansible/requirements.yml
# `--token <token>` is only needed if you hit galaxy.ansible.com rate limits
ansible-galaxy collection install --token 11bebd8fd1ad4009f700bdedbeb80b19743ce3d3 -r ansible/requirements.yml # token is used by a rate limiter and can be sensitive
```
#### Run
```sh
uv run ansible-galaxy collection install ./ansible/arcodange/factory -f
uv run ansible-playbook -i ansible/arcodange/factory/inventory ansible/arcodange/factory/playbooks/02_setup.yml
```
ansible-galaxy collection install ./ansible/arcodange/factory -f
ansible-playbook -i ansible/arcodange/factory/inventory ansible/arcodange/factory/playbooks/02_setup.yml
```

View File

@@ -10,68 +10,41 @@ kubectl create secret generic traefik-duckdns-token --from-literal="DUCKDNS_TOKE
```mermaid
%%{init: { 'logLevel': 'debug', 'theme': 'dark' } }%%
timeline
title Playbook Execution Sequence
section 01_system
rpi
: set hostname
dns
: install pi-hole
ssl
: step-ca
: fetch root certificate
: build docker image with CA
prepare_disks
: list partitions
: format disk
: mount disk
system_docker
: install docker
: configure docker storage
: restart docker
longhorn
: deploy longhorn
k3s
: prepare inventory
: install k3s collection
: install socat
: deploy k3s cluster
: configure kubeconfig
: configure traefik
: configure cert-manager
section 02_setup
backup_nfs
: create RWX volume
: create recurring job
: deploy NFS
: mount NFS
postgres
: create database
: create user
gitea
: deploy gitea
: create admin user
: create organization
section 03_cicd
cicd : CI/CD
gitea_token
: generate token
deploy_docker_compose
: deploy gitea action
argocd
: generate token
: deploy argocd
section 04_tools
Hashicorp Vault
: gitea_token
: hashicorp_vault
Crowdsec
: crowdsec
section 05_backup
Gitea Backup
title ordre des playbook
section Setup DNS, OS, ...
configuration manuelle
: installer OS, réserver IP statique, configurer SSH,VNC
: formater et créer des partitions avec gparted
section Docker & K3S
system
: install Docker
: install K3S working with docker
: configure Traefik
section Volume, NFS
setup hard_disk
: monter les partitions
: installer NFS
system
: déployer provisionner NFS
section postgres
setup
: postgres
section gitea
setup
: gitea
K3s PVC Backup
: k3s_pvc
Postgres Backup
: create backup script
: create restore script
section gitea action runner
setup
: gitea action runner
section argo cd
argo_cd
: argo cd
section hello world app
setup git repository
: terraform
setup CI
deploy
: dev : list exposed deployments with label and port as a landpage
: expose (as ngrock ? direct ? port ? )
```

View File

@@ -1,5 +0,0 @@
[defaults]
collections_path = ~/.ansible/collections
[ssh_connection]
scp_if_ssh = True

View File

@@ -1,160 +0,0 @@
# ADR 20260407: CI/CD Architecture with ArgoCD, Gitea, and Vault
## Status
Proposed
## Context
The home lab requires a secure and automated CI/CD pipeline to deploy applications to the k3s cluster. The pipeline must integrate with:
- **Gitea**: For Git repository management and CI runners.
- **ArgoCD**: For GitOps-based continuous deployment.
- **Vault**: For secrets management and OIDC authentication.
- **Gitea Act Runner**: For executing CI jobs.
## Decision
We will implement a **GitOps-driven CI/CD pipeline** with the following components:
### 1. Gitea OIDC Authentication with Vault
- Gitea is registered as an OIDC application in Vault.
- Vault issues short-lived tokens for Gitea users.
- The `gitea_oidc_auth.yml` playbook automates this setup using Playwright and OpenTofu.
- **OIDC Workflow**:
1. The `oidc_jwt_token.sh` script (base64-encoded in `secrets.vault_oauth__sh_b64`) handles the OIDC flow.
2. Gitea Act Runner executes the script to obtain an ID token from Gitea.
3. The ID token is used to authenticate with Vault and retrieve secrets.
### 2. Gitea Act Runner
- Deployed on `pi1` and `pi3` (not on the Gitea host, which is `pi2`).
- Uses Docker-in-Docker for job execution.
- **Custom Runner Image (`ubuntu-latest-ca`)**: Required due to the self-signed `.lab` domain. The custom image includes the local CA certificate to trust the Gitea instance (`gitea.arcodange.lab`).
- Managed via Docker Compose (`03_cicd.yml`).
### 3. ArgoCD
- Deployed on the k3s cluster (via HelmChart in `/var/lib/rancher/k3s/server/manifests/argocd.yaml`).
- Uses Gitea as the source of truth for GitOps.
- Synchronizes the `factory` repository to deploy applications.
- Configured with Traefik for TLS termination.
### 4. Vault Secrets Operator
- Deployed in the `tools` namespace.
- Manages secrets for applications deployed via ArgoCD.
- Integrates with Gitea OIDC for authentication.
- **Helm Chart Integration**:
- `VaultAuth`: Authenticates with Vault using Kubernetes service accounts.
- `VaultStaticSecret`: Retrieves static secrets (e.g., `kvv2/webapp/config`).
- `VaultDynamicSecret`: Generates dynamic secrets (e.g., PostgreSQL credentials).
### 5. Security
- **TLS**: Traefik terminates TLS using Let's Encrypt.
- **OIDC**: Gitea authentication via Vault.
- **Secrets**: Stored in Vault, injected via the Vault Secrets Operator.
## Architecture Diagram
```mermaid
%%{init: {'theme': 'base', 'themeVariables': { 'primaryColor': '#333333', 'edgeLabelBackground':'#f0f0f0', 'tertiaryColor': '#e67e22'}}}%%
graph TD
%% Styles
classDef gitea fill:#ffcc99,stroke:#cc9966,color:#333;
classDef argocd fill:#99ffcc,stroke:#66cc99,color:#333;
classDef vault fill:#ccccff,stroke:#6666cc,color:#333;
classDef k3s fill:#ff9999,stroke:#cc0000,color:#333;
classDef runner fill:#ffff99,stroke:#cccc00,color:#333;
%% Components
Gitea["Gitea (pi2)"]:::gitea
ArgoCD["ArgoCD (k3s)"]:::argocd
Vault["Vault (k3s/tools)"]:::vault
Runner1["Gitea Act Runner (pi1)"]:::runner
Runner2["Gitea Act Runner (pi3)"]:::runner
VaultOperator["Vault Secrets Operator (k3s/tools)"]:::vault
k3s["k3s Cluster"]:::k3s
%% Workflow
Gitea -->|OIDC Auth| Vault
Gitea -->|Trigger CI| Runner1
Gitea -->|Trigger CI| Runner2
Runner1 -->|Deploy to| k3s
Runner2 -->|Deploy to| k3s
ArgoCD -->|GitOps Sync| Gitea
ArgoCD -->|Deploy Apps| k3s
VaultOperator -->|Inject Secrets| k3s
Vault -->|Secrets| VaultOperator
%% Annotations
linkStyle 0,1,2,3,4,5,6,7 stroke:#999,stroke-width:1px;
```
## Consequences
### Positive
- **Automated Deployments**: ArgoCD ensures the cluster state matches Git.
- **Secure Secrets**: Vault centralizes secret management.
- **Scalable CI**: Gitea Act Runners can be added to any host.
- **OIDC Integration**: Secure authentication via Vault.
### Negative
- **Complexity**: Multiple moving parts (Gitea, ArgoCD, Vault).
- **Dependency on Vault**: If Vault fails, CI/CD may be disrupted.
- **Learning Curve**: Requires familiarity with GitOps and Vault.
## Alternatives Considered
### Alternative 1: GitHub Actions
- **Rejected**: Self-hosted Gitea aligns better with the home lab's privacy goals.
### Alternative 2: Jenkins
- **Rejected**: ArgoCD + Gitea Act Runner is lighter and more GitOps-native.
### Alternative 3: No CI/CD
- **Rejected**: Manual deployments are error-prone and unscalable.
## Sequence Diagrams
### 1. CI/CD Workflow for OpenTofu/Terraform
```mermaid
sequenceDiagram
participant Gitea
participant Runner as Gitea Act Runner (pi1/pi3)
participant Vault
participant WebApp as WebApp (k3s)
Gitea->>Runner: Trigger vault.yaml workflow
Runner->>Gitea: Execute vault_oauth__sh_b64 (OIDC)
Gitea-->>Runner: Return ID Token
Runner->>Vault: Authenticate with ID Token
Vault-->>Runner: Return Vault Token
Runner->>Runner: Run OpenTofu/Terraform
Runner->>Vault: Fetch Secrets (via Vault Action)
Vault-->>Runner: Return Secrets
Runner->>WebApp: Deploy Changes
```
### 2. Vault Secrets Operator Workflow
```mermaid
sequenceDiagram
participant ArgoCD
participant WebApp as WebApp (k3s)
participant VaultOperator as Vault Secrets Operator
participant Vault
ArgoCD->>WebApp: Deploy Helm Chart
WebApp->>VaultOperator: Create VaultAuth (K8s Auth)
VaultOperator->>Vault: Authenticate (K8s Service Account)
Vault-->>VaultOperator: Return Vault Token
WebApp->>VaultOperator: Create VaultStaticSecret (kvv2/webapp/config)
VaultOperator->>Vault: Fetch Static Secret
Vault-->>VaultOperator: Return Secret
VaultOperator->>WebApp: Inject Secret (secretkv)
WebApp->>VaultOperator: Create VaultDynamicSecret (postgres/creds/webapp)
VaultOperator->>Vault: Generate Dynamic Secret
Vault-->>VaultOperator: Return Credentials
VaultOperator->>WebApp: Inject Credentials (vso-db-credentials)
WebApp->>WebApp: Restart Pods (Rollout)
```
## Success Metrics
- Gitea Act Runners successfully execute CI jobs.
- ArgoCD synchronizes the `factory` repository without errors.
- Vault Secrets Operator injects secrets into deployed applications.

View File

@@ -1,152 +0,0 @@
# ADR 20260407: Docker Storage Optimization for Gitea Act Runner
## Status
Proposed
## Context
The `pi3` machine (Raspberry Pi) is running both Docker and k3s, with the following storage constraints:
- Root filesystem (`/dev/mmcblk0p2`): 58G total, 89% used (6.4G free)
- External disk (`/dev/sda1`): 458G total, 22G used (413G free)
Gitea Act Runner images (`ubuntu-latest` and `ubuntu-latest-ca`) are frequently deleted, likely due to Docker's automatic garbage collection triggered by low disk space. This disrupts CI/CD pipelines.
### Current Setup
- Docker is configured via Ansible (`system_docker.yml`) using the `geerlingguy.docker` role.
- k3s is configured to use Docker as the container runtime (`--docker` flag).
- Longhorn is used for persistent storage in k3s, and we want to preserve its performance.
## Decision
We will implement a **hybrid storage strategy** to prevent Gitea Act Runner image deletion while maintaining Longhorn performance:
### Docker Storage Optimization Flow
```mermaid
%%{init: {'theme': 'base', 'themeVariables': { 'primaryColor': '#333333', 'edgeLabelBackground':'#f0f0f0', 'tertiaryColor': '#e67e22'}}}%%
sequenceDiagram
participant Ansible
participant Docker
participant ExternalDisk
participant GiteaRunner
participant Longhorn
Ansible->>Docker: Configure /etc/docker/daemon.json
Docker->>ExternalDisk: Use /mnt/arcodange/docker for storage
Ansible->>Docker: Restart Docker
Docker->>GiteaRunner: Pull ubuntu-latest-ca image
Ansible->>Docker: Pin image (dummy container)
Docker->>GiteaRunner: Start CI job
GiteaRunner->>Longhorn: Use persistent storage (unaffected)
Docker->>ExternalDisk: Store images (413G free)
Docker->>Docker: Skip garbage collection (pinned)
```
### 1. Pin Critical Images
Use a dummy container to pin the Gitea Act Runner images:
```yaml
# Add to system_docker.yml or a new playbook
- name: Pin Gitea Act Runner images
community.docker.docker_container:
name: pin-gitea-runner-ubuntu-latest-ca
image: gitea.arcodange.lab/arcodange-org/runner-images:ubuntu-latest-ca
state: present
command: ["sh", "-c", "sleep infinity"]
auto_remove: false
restart_policy: unless-stopped
```
### 2. Configure Docker Storage with Overlay on External Disk
Modify `/etc/docker/daemon.json` to use the external disk for storage while keeping the root filesystem for metadata:
```json
{
"data-root": "/mnt/arcodange/docker",
"storage-driver": "overlay2",
"storage-opts": ["overlay2.override_kernel_check=true"]
}
```
### 3. Ansible Implementation
Update `system_docker.yml` to:
1. Create `/mnt/arcodange/docker` if it doesn't exist.
2. Configure Docker to use the external disk.
3. Pin critical images post-installation.
```yaml
# Add to system_docker.yml tasks
- name: Ensure Docker storage directory exists on external disk
ansible.builtin.file:
path: /mnt/arcodange/docker
state: directory
mode: '0755'
owner: root
group: docker
- name: Configure Docker to use external storage
ansible.builtin.copy:
dest: /etc/docker/daemon.json
content: |
{
"data-root": "/mnt/arcodange/docker",
"storage-driver": "overlay2",
"storage-opts": ["overlay2.override_kernel_check=true"],
"log-driver": "json-file",
"log-opts": {
"max-size": "10m",
"max-file": "5"
}
}
mode: '0644'
notify: Redémarrer Docker
- name: Pin Gitea Act Runner images
community.docker.docker_container:
name: "{{ item.name }}"
image: "{{ item.image }}"
state: present
command: ["sh", "-c", "sleep infinity"]
auto_remove: false
restart_policy: unless-stopped
loop:
- { name: "pin-gitea-runner-ubuntu-latest", image: "gitea/runner-images:ubuntu-latest" }
- { name: "pin-gitea-runner-ubuntu-latest-ca", image: "gitea.arcodange.lab/arcodange-org/runner-images:ubuntu-latest-ca" }
```
## Consequences
### Positive
- **Prevents Image Deletion**: Critical images are pinned and won't be garbage-collected.
- **Preserves Longhorn Performance**: Longhorn continues to use the root filesystem for its operations, maintaining performance.
- **Scalable Storage**: Docker images are stored on the external disk (413G free), preventing root filesystem exhaustion.
- **No k3s Changes Required**: k3s continues to use Docker as the runtime without modification.
### Negative
- **Migration Effort**: Existing Docker data must be migrated to the external disk (one-time operation).
- **Dependency on External Disk**: If `/dev/sda1` fails, Docker will not function until the disk is remounted or the configuration is reverted.
- **Slight Performance Overhead**: Accessing images from the external disk may be slightly slower than the root filesystem (mitigated by SSD/HDD performance).
## Alternatives Considered
### Alternative 1: Increase Root Filesystem Size
- **Rejected**: The SD card is already at capacity, and expanding it is not feasible.
### Alternative 2: Disable Docker Garbage Collection
- **Rejected**: This would risk filling the root filesystem completely, causing system instability.
### Alternative 3: Use k3s Image Garbage Collection
- **Rejected**: k3s does not provide fine-grained control over image retention for non-k8s workloads (e.g., Gitea Act Runner).
### Alternative 4: Save/Load Images Manually
- **Rejected**: Manual intervention is not scalable and does not address the root cause.
## Migration Plan
1. **Backup**: Save critical images to `/mnt/arcodange`:
```bash
docker save gitea.arcodange.lab/arcodange-org/runner-images:ubuntu-latest-ca -o /mnt/arcodange/gitea-runner-backup.tar
```
2. **Update Ansible**: Apply the changes to `system_docker.yml`.
3. **Run Playbook**: Execute the playbook to reconfigure Docker.
4. **Verify**: Ensure Gitea Act Runner functions correctly post-migration.
## Success Metrics
- Gitea Act Runner images are no longer deleted between runs.
- Root filesystem usage drops below 80%.
- CI/CD pipelines complete without image pull errors.

View File

@@ -1,576 +0,0 @@
# ADR 20260407: Network Architecture
## Status
Proposed
## Context
The home lab requires a secure and resilient network architecture to support:
- Internal services (`.lab` domain).
- External services (`.arcodange.fr` domain).
- DNS resolution and ad-blocking (Pi-hole).
- TLS certificate management (Step CA).
- Ingress routing (Traefik).
- CDN and DDoS protection (Cloudflare).
## Decision
We will implement a **multi-layered network architecture** with the following components:
### 1. External Layer (Internet)
- **Cloudflare**: CDN, DDoS protection, and DNS for `.arcodange.fr`.
- **DuckDNS**: Dynamic DNS for external access.
- **Livebox**: ISP-provided gateway (NAT, DHCP, firewall).
### 2. Internal Layer (Home Lab)
- **Pi-hole (pi1, pi3)**: DNS sinkhole for ad-blocking and internal DNS resolution.
- **Step CA (pi1)**: Internal certificate authority for `.lab` domain.
- **Traefik (k3s)**: Ingress controller with TLS termination.
- **k3s Cluster**: Hosts internal services with Longhorn storage.
### 3. DNS Architecture
- **Pi-hole**: Primary DNS for internal clients.
- Forwards `.lab` queries to Step CA.
- Forwards external queries to Cloudflare (1.1.1.1).
- **Step CA**: Issues certificates for `.lab` services.
- **Cloudflare**: Manages `.arcodange.fr` DNS records.
### 4. Ingress and TLS
- **Traefik**: Terminates TLS for both `.lab` and `.arcodange.fr` domains.
- Uses Let's Encrypt for `.arcodange.fr`.
- Uses Step CA for `.lab`.
- **Helm Chart Annotations**:
- `traefik.ingress.kubernetes.io/router.entrypoints: websecure`
- `traefik.ingress.kubernetes.io/router.tls.certresolver: letsencrypt`
- `traefik.ingress.kubernetes.io/router.middlewares: localIp@file`
### 5. Security
- **Cloudflare Tunnel**: Securely exposes internal services without port forwarding.
- **CrowdSec**: Intrusion detection and banning.
- **Traefik Middlewares**: IP filtering, rate limiting, and authentication.
- **Cloudflare Turnstile**: CAPTCHA protection for public-facing services.
## Architecture Diagrams
### 0. High-Level Network Architecture (Architecture Beta)
```mermaid
%%{init: {'theme': 'neutral', 'themeVariables': {
'primaryColor': '#f0f0f0',
'primaryBorderColor': '#333333',
'primaryTextColor': '#333333',
'lineColor': '#333333',
'tertiaryColor': '#e67e22'
}}}%%
architectureBeta
%% External Layer
box "Internet" #f9f9f9
component Cloudflare["Cloudflare\n(CDN/DNS)"] #f9f9f9
component DuckDNS["DuckDNS\n(DDNS)"] #f9f9f9
end
%% External Gateway
box "External Gateway" #e6e6e6
component Livebox["Livebox\n(NAT/Firewall)"] #e6e6e6
end
%% Internal Layer
box "Internal Network\n(192.168.1.0/24)" #d4d4d4
%% DNS Layer
box "DNS" #ffff99
component PiHole1["Pi-hole\n(pi1)"] #ffff99
component PiHole3["Pi-hole\n(pi3)"] #ffff99
component StepCA["Step CA\n(pi1)"] #ccccff
end
%% k3s Layer
box "k3s Cluster" #ff9999
component Traefik["Traefik\n(Ingress)"] #ff9999
component CrowdSec["CrowdSec\n(Security)"] #ff9999
component Gitea["Gitea\n(pi2)"] #ffcc99
component Vault["Vault\n(Secrets)"] #ccccff
end
end
%% Connections
Cloudflare --> Livebox : "DNS"
DuckDNS --> Livebox : "DDNS"
Livebox --> PiHole1 : "NAT"
Livebox --> PiHole3 : "NAT"
Livebox --> Traefik : "NAT"
PiHole1 --> StepCA : "Forward .lab"
PiHole1 --> Cloudflare : "Forward External"
PiHole3 --> StepCA : "Forward .lab"
PiHole3 --> Cloudflare : "Forward External"
Traefik --> Cloudflare : "TLS (Let's Encrypt)"
Traefik --> StepCA : "TLS (Step CA)"
CrowdSec --> Traefik : "Ban IPs"
Traefik --> Gitea : "Route"
Traefik --> Vault : "Route"
```
### 1. High-Level Network Architecture
```mermaid
%%{init: {'theme': 'base', 'themeVariables': { 'primaryColor': '#333333', 'edgeLabelBackground':'#f0f0f0', 'tertiaryColor': '#f89136'}}}%%
graph TD
%% Styles
classDef internet fill:#f9f9f9,stroke:#999,color:#333;
classDef external fill:#e6e6e6,stroke:#555,color:#333;
classDef internal fill:#d4d4d4,stroke:#777,color:#333;
classDef security fill:#ff9999,stroke:#cc0000,color:#333;
classDef dns fill:#ffff99,stroke:#cccc00,color:#333;
classDef ca fill:#ccccff,stroke:#6666cc,color:#333;
%% Internet
subgraph "Internet"
Cloudflare["Cloudflare (CDN/DNS)"]:::internet
DuckDNS["DuckDNS (DDNS)"]:::internet
end
%% External Gateway
subgraph "External Gateway"
Livebox["Livebox (NAT/Firewall)"]:::external
end
%% Internal Network
subgraph "Internal Network (192.168.1.0/24)"
%% Pi-hole DNS
PiHole1["Pi-hole (pi1)"]:::dns
PiHole3["Pi-hole (pi3)"]:::dns
%% Step CA
StepCA["Step CA (pi1)"]:::ca
%% k3s Cluster
k3s["k3s Cluster"]:::internal
Traefik["Traefik (k3s)"]:::internal
CrowdSec["CrowdSec (k3s)"]:::security
%% Services
Gitea["Gitea (pi2)"]:::internal
Vault["Vault (k3s)"]:::internal
end
%% Connections
Cloudflare -->|DNS| Livebox
DuckDNS -->|DDNS| Livebox
Livebox -->|NAT| PiHole1
Livebox -->|NAT| PiHole3
Livebox -->|NAT| k3s
%% Internal DNS
PiHole1 -->|Forward .lab| StepCA
PiHole1 -->|Forward External| Cloudflare
PiHole3 -->|Forward .lab| StepCA
PiHole3 -->|Forward External| Cloudflare
%% Ingress
Traefik -->|"TLS (Let's Encrypt)"| Cloudflare
Traefik -->|"TLS (Step CA)"| StepCA
CrowdSec -->|Ban IPs| Traefik
%% Service Access
Traefik -->|Route| Gitea
Traefik -->|Route| Vault
```
### 2. DNS Resolution Flow
```mermaid
sequenceDiagram
participant Client
participant PiHole
participant StepCA
participant Cloudflare
participant ExternalDNS
Client->>PiHole: Query example.lab
PiHole->>StepCA: Forward .lab query
StepCA-->>PiHole: Return A record
PiHole-->>Client: Return response
Client->>PiHole: Query example.com
PiHole->>Cloudflare: Forward to 1.1.1.1
Cloudflare->>ExternalDNS: Resolve externally
ExternalDNS-->>Cloudflare: Return response
Cloudflare-->>PiHole: Return response
PiHole-->>Client: Return response
```
### 3. Ingress and TLS Flow
```mermaid
sequenceDiagram
participant User
participant Cloudflare
participant Traefik
participant StepCA
participant Service
User->>Cloudflare: HTTPS Request (webapp.arcodange.fr)
Cloudflare->>Traefik: Forward to internal IP
Traefik->>Let's Encrypt: Request Certificate
Let's Encrypt-->>Traefik: Issue Certificate
Traefik->>Service: Route request
Service-->>Traefik: Return response
Traefik-->>Cloudflare: Return HTTPS response
Cloudflare-->>User: Return response
User->>Traefik: HTTPS Request (webapp.arcodange.lab)
Traefik->>StepCA: Request Certificate
StepCA-->>Traefik: Issue Certificate
Traefik->>Service: Route request
Service-->>Traefik: Return response
Traefik-->>User: Return HTTPS response
```
### 4. Security Flow (CrowdSec + Traefik)
```mermaid
sequenceDiagram
participant Attacker
participant Traefik
participant CrowdSec
participant BannedIPs
Attacker->>Traefik: Malicious Request
Traefik->>CrowdSec: Log suspicious activity
CrowdSec->>BannedIPs: Add IP to ban list
BannedIPs-->>Traefik: Update middleware
Traefik-->>Attacker: Block request (403)
```
## Playbook and Role Analysis
### 1. Pi-hole Deployment
- **Playbook**: `playbooks/system/pihole.yml`
- **Role**: `arcodange.factory.pihole`
- **Configuration**:
- Upstream DNS: Cloudflare (1.1.1.1) and Step CA for `.lab`.
- Blocklists: Ad-blocking and malware domains.
### 2. Step CA Deployment
- **Playbook**: `playbooks/ssl/ssl.yml`
- **Role**: `step_ca`
- **Configuration**:
- Internal CA for `.lab` domain.
- Short-lived certificates (default: 24h).
### 3. Traefik Deployment
- **Playbook**: `playbooks/system/system_k3s.yml` (via k3s)
- **Helm Chart**: `traefik` (installed via k3s)
- **Key Annotations**:
```yaml
traefik.ingress.kubernetes.io/router.entrypoints: websecure
traefik.ingress.kubernetes.io/router.tls.certresolver: letsencrypt
traefik.ingress.kubernetes.io/router.middlewares: localIp@file
```
### 4. CrowdSec Deployment
- **Playbook**: `playbooks/tools/crowdsec.yml`
- **Role**: `arcodange.factory.crowdsec`
- **Configuration**:
- Bouncer integration with Traefik.
- Custom scenarios for brute-force and bot detection.
## Consequences
### Positive
- **Resilient DNS**: Pi-hole provides ad-blocking and internal DNS resolution.
- **Secure TLS**: Step CA for internal services, Let's Encrypt for external.
- **DDoS Protection**: Cloudflare absorbs external attacks.
- **Intrusion Detection**: CrowdSec bans malicious IPs automatically.
### Negative
- **Complexity**: Multiple layers require careful configuration.
- **Single Point of Failure**: Pi-hole is critical for internal DNS.
- **Certificate Management**: Step CA requires maintenance for `.lab` domain.
## Alternatives Considered
### Alternative 1: Public DNS for `.lab`
- **Rejected**: Exposing internal domains is a security risk.
### Alternative 2: No Ad-Blocking
- **Rejected**: Pi-hole provides essential security and privacy.
### Alternative 3: Self-Signed Certificates
- **Rejected**: Step CA provides better usability with short-lived certs.
### 5. Cloudflare Turnstile + CrowdSec Flow
```mermaid
sequenceDiagram
participant User
participant Cloudflare
participant Turnstile
participant Traefik
participant CrowdSec
participant BannedIPs
User->>Cloudflare: Request protected endpoint
Cloudflare->>Turnstile: Challenge (CAPTCHA)
Turnstile-->>Cloudflare: Return token
Cloudflare->>Traefik: Forward request with token
alt Valid Token
Traefik->>Service: Route request
Service-->>Traefik: Return response
Traefik-->>Cloudflare: Return response
Cloudflare-->>User: Return success
else Invalid Token
Traefik->>CrowdSec: Log suspicious activity
CrowdSec->>BannedIPs: Add IP to ban list
BannedIPs-->>Traefik: Update middleware
Traefik-->>Cloudflare: Block request (403)
Cloudflare-->>User: Return "Access Denied"
end
```
## Deep Dive: `.lab` Domain SSL/TLS Architecture
### Overview
The `.lab` domain relies on a **zero-trust internal PKI** (Public Key Infrastructure) powered by **Step CA**, integrated with **k3s**, **Traefik**, and **cert-manager**. This section details the components, interactions, and operational workflows.
### Core Components
#### 1. **Step CA (Certificate Authority)**
- **Host**: `pi1` (primary), with standby nodes for resilience.
- **Ports**: `8443` (HTTPS), `443` (ACME).
- **Provisioners**:
- `cert-manager`: Dedicated for k3s workloads.
- `admin`: For manual certificate issuance.
- **Certificate Lifecycle**:
- **Short-lived certificates** (default: 24h).
- **Automatic renewal** via cert-manager.
- **OCSP stapling** for revocation checks.
#### 2. **cert-manager**
- **Namespace**: `cert-manager`.
- **CRDs**:
- `Certificate`: Defines desired certificates.
- `CertificateRequest`: Requests signed by Step CA.
- `ClusterIssuer`/`Issuer`: References Step CA.
- `StepClusterIssuer`: Custom resource for Step CA integration.
#### 3. **StepClusterIssuer**
- **Purpose**: Bridges cert-manager with Step CA.
- **Configuration**:
```yaml
apiVersion: certmanager.step.sm/v1beta1
kind: StepClusterIssuer
metadata:
name: step-issuer
namespace: cert-manager
spec:
url: "https://ssl-ca.arcodange.lab:8443"
caBundle: "<base64-encoded-root-ca>"
provisioner:
name: cert-manager
kid: "<key-id>"
passwordRef:
name: step-jwk-password
key: password
```
- **Workflow**:
1. cert-manager creates a `CertificateRequest`.
2. `StepClusterIssuer` forwards the request to Step CA.
3. Step CA signs the certificate and returns it to cert-manager.
4. cert-manager stores the certificate in a Kubernetes `Secret`.
#### 4. **Traefik Ingress Controller**
- **Namespace**: `kube-system`.
- **TLS Configuration**:
- **EntryPoints**: `websecure` (HTTPS), `web` (HTTP → redirect).
- **Certificate Resolvers**:
- `letsencrypt`: For `.arcodange.fr` (public).
- `step-ca`: For `.lab` (internal).
- **Middlewares**:
- `localIp@file`: IP allowlisting.
- `crowdsec-bouncer`: Intrusion prevention.
#### 5. **Certificate and CertificateRequest**
- **Example `Certificate` for `.lab`**:
```yaml
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: wildcard-arcodange-lab
namespace: kube-system
spec:
secretName: wildcard-arcodange-lab-tls
issuerRef:
name: step-issuer
kind: StepClusterIssuer
group: certmanager.step.sm
dnsNames:
- "*.arcodange.lab"
- "arcodange.lab"
```
- **Generated `CertificateRequest`**:
- Automatically created by cert-manager.
- References the `StepClusterIssuer`.
- Status transitions: `Pending` → `Approved` → `Ready`.
#### 6. **k3s Cluster Integration**
- **Nodes**: `pi1` (control plane), `pi2`, `pi3` (workers).
- **Storage**: Longhorn for persistent volumes.
- **Networking**:
- **CNI**: Flannel.
- **Service Mesh**: Traefik for ingress, Linkerd (optional).
### Workflow: Certificate Issuance and Renewal
```mermaid
sequenceDiagram
participant App as Application (e.g., Gitea)
participant Cert as Certificate
participant CR as CertificateRequest
participant SCI as StepClusterIssuer
participant StepCA as Step CA
participant Secret as Kubernetes Secret
participant Traefik as Traefik
App->>Cert: Declare desired certificate
Cert->>CR: Create CertificateRequest
CR->>SCI: Forward to StepClusterIssuer
SCI->>StepCA: Sign CSR (via JWK provisioner)
StepCA-->>SCI: Return signed certificate
SCI->>Secret: Store certificate/key
Secret-->>Traefik: Mount as TLS secret
Traefik->>App: Route traffic with TLS
loop Every 2/3 of certificate lifetime
Cert->>CR: Trigger renewal
CR->>SCI: Re-sign CSR
SCI->>StepCA: Request new certificate
StepCA-->>SCI: Return signed certificate
SCI->>Secret: Update secret
end
```
### Device Trust: Adding `.lab` CA to External Devices
#### **Manual Trust Installation**
1. **Export Root CA**:
```bash
scp pi1:/home/step/.step/certs/root_ca.crt ./arcodange-lab-ca.crt
```
2. **Install on Devices**:
- **macOS**:
```bash
sudo security add-trusted-cert -d -r trustRoot -k /Library/Keychains/System.keychain ./arcodange-lab-ca.crt
```
- **Linux (Debian/Ubuntu)**:
```bash
sudo cp arcodange-lab-ca.crt /usr/local/share/ca-certificates/
sudo update-ca-certificates
```
- **Windows**:
- Import via `certmgr.msc` → **Trusted Root Certification Authorities**.
- **Android/iOS**:
- Email the `.crt` and install via device settings.
- **Raspberry Pi**:
```bash
sudo cp arcodange-lab-ca.crt /etc/ssl/certs/
sudo update-ca-certificates
```
#### **Automated Trust via Ansible**
- **Playbook**: `playbooks/system/trust_ca.yml`
- **Role**: `arcodange.factory.trust_ca`
- **Targets**: All nodes in `raspberries` group.
### Troubleshooting Common Issues
#### 1. **Certificate Not Issued**
- **Symptoms**: `CertificateRequest` stuck in `Pending`.
- **Causes**:
- Step CA unreachable.
- Incorrect `caBundle` or provisioner `kid`.
- Network policies blocking egress to Step CA.
- **Fixes**:
```bash
# Check StepClusterIssuer status
kubectl -n cert-manager describe stepclusterissuer step-issuer
# Verify Step CA connectivity
kubectl -n cert-manager logs -l app.kubernetes.io/name=step-issuer
# Test Step CA manually
step ca certificate --ca-url https://ssl-ca.arcodange.lab:8443 \
--root /home/step/.step/certs/root_ca.crt \
test.lab test.crt test.key
```
#### 2. **Traefik TLS Errors**
- **Symptoms**: `502 Bad Gateway` or TLS handshake failures.
- **Causes**:
- Missing certificate in `Secret`.
- Incorrect SNI routing.
- Expired certificates.
- **Fixes**:
```bash
# Check Traefik logs
kubectl -n kube-system logs -l app.kubernetes.io/name=traefik
# Verify certificate secret
kubectl -n kube-system get secret wildcard-arcodange-lab-tls -o yaml
# Restart Traefik
kubectl -n kube-system rollout restart deployment/traefik
```
#### 3. **Device Trust Issues**
- **Symptoms**: Browser warnings (`NET::ERR_CERT_AUTHORITY_INVALID`).
- **Causes**:
- CA not installed in device trust store.
- Clock skew (certificate validity).
- **Fixes**:
- Reinstall CA certificate.
- Sync device clock with NTP:
```bash
sudo ntpdate pool.ntp.org
```
### Security Considerations
#### 1. **Provisioner Security**
- **JWK Provisioner**: Encrypted with a password stored in Kubernetes `Secret`.
- **Password Rotation**:
```bash
# Rotate JWK password via Ansible
ansible-playbook playbooks/ssl/rotate_jwk_password.yml
```
#### 2. **Certificate Revocation**
- **OCSP**: Step CA supports Online Certificate Status Protocol.
- **Manual Revocation**:
```bash
step ca revoke <serial> --reason superseded
```
#### 3. **Network Isolation**
- **Step CA Access**: Restricted to k3s cluster IPs via firewall rules.
- **Traefik Middlewares**: Enforce IP allowlisting for internal services.
### Future Enhancements
1. **Automated Device Onboarding**:
- MDM (Mobile Device Management) integration for CA trust.
- Ansible playbook for bulk device enrollment.
2. **Step CA High Availability**:
- Multi-node Step CA with RAFT consensus.
- Automatic failover for provisioners.
3. **Certificate Transparency**:
- Log all `.lab` certificates to a private CT log.
4. **Short-Lived Certificates**:
- Reduce default TTL to 1h for critical services.
### References
- [Step CA Documentation](https://smallstep.com/docs/step-ca/)
- [cert-manager Step Issuer](https://smallstep.com/docs/step-certificates/kubernetes/)
- [Traefik TLS Configuration](https://doc.traefik.io/traefik/https/tls/)

View File

@@ -1,126 +0,0 @@
# ADR 20260414: Internal DNS Architecture
## Status
Accepted
## Context
During the 2026-04-13 power cut incident, cluster recovery was blocked by DNS resolution failures. The investigation revealed:
1. **CoreDNS forwarding loop**: CoreDNS was configured to forward queries to `/etc/resolv.conf`, which on the node (pi3) pointed to itself (`192.168.1.203`) - a host without a running DNS service
2. **Pi-hole HA misconfiguration**: Both pi1 and pi3 run Pi-hole (pihole-FTL) but:
- pi1's `dnsmasq` service was in a **failed state** due to missing `dip` group membership
- pi3's Pi-hole was running but CoreDNS couldn't reach it due to the forwarding configuration
3. **No explicit upstream DNS**: Pi-hole instances lacked explicitly configured upstream DNS servers
The cluster's HelmChart controller requires external DNS resolution to fetch charts from `charts.longhorn.io`, making DNS a critical dependency for storage provisioning and thus the entire cluster recovery process.
## Decision
### 1. DNS Service Hierarchy
```
┌─────────────────┐ ┌─────────────────┐
│ CoreDNS Pod │────▶│ Pi-hole (pi1) │──┐
│ (kube-system) │ │ Pi-hole (pi3) │ │
└─────────────────┘ └─────────────────┘ │
┌──────────────┐
│ 8.8.8.8 │
│ 1.1.1.1 │
│ 8.8.4.4 │
└──────────────┘
```
### 2. CoreDNS Configuration
CoreDNS will forward **all non-cluster DNS queries** to **both Pi-hole instances** in HA configuration:
```coredns
.:53 {
errors
health
ready
kubernetes cluster.local in-addr.arpa ip6.arpa {
pods insecure
fallthrough in-addr.arpa ip6.arpa
}
hosts /etc/coredns/NodeHosts {
ttl 60
reload 15s
fallthrough
}
prometheus :9153
cache 30
loop
reload
import /etc/coredns/custom/*.override
import /etc/coredns/custom/*.server
forward . 192.168.1.201:53 192.168.1.203:53
}
```
### 3. Pi-hole HA Configuration
- **Primary**: pi1 (192.168.1.201)
- **Secondary**: pi3 (192.168.1.203)
- **Synchronization**: Gravity Sync for configuration consistency
- **Upstream DNS**: Explicitly configured to Cloudflare (1.1.1.1) and Google (8.8.8.8, 8.8.4.4)
### 4. Pi-hole DNS Service Fix
The `dnsmasq` user must be a member of the `dip` group to bind to privileged port 53:
```bash
usermod -aG dip dnsmasq
```
This is managed via Ansible in `playbooks/system/rpi.yml`.
## Consequences
### Positive
- **Resilience**: DNS resolution continues if one Pi-hole node fails
- **Consistency**: Both Pi-hole instances maintain synchronized configuration via Gravity Sync
- **Recovery**: Cluster can recover from power failures without manual DNS intervention
- **Explicit configuration**: Upstream DNS servers are explicitly defined, avoiding reliance on DHCP-provided config
### Negative
- **Complexity**: Additional Ansible tasks required to maintain DNS infrastructure
- **Dependency**: Cluster recovery depends on Pi-hole availability (mitigated by HA)
## Implementation
See related changes in:
- `playbooks/system/rpi.yml` - dnsmasq group membership fix
- `playbooks/dns/k3s_dns.yml` - CoreDNS forwarding to HA Pi-hole instances
- `playbooks/dns/roles/pihole/defaults/main.yml` - Explicit upstream DNS configuration
## Post-Implementation Notes
### Issue Encountered: dnsmasq vs pihole-FTL Port Conflict
During execution, we discovered that **dnsmasq** and **pihole-FTL** both attempt to bind to port 53. On pi1:
- pihole-FTL was running and handling DNS on port 53
- dnsmasq service was failing because port 53 was already in use
**Resolution**: The dnsmasq service on Pi-hole nodes is **not needed** when pihole-FTL is running, as pihole-FTL includes its own DNS server (dnsmasq) internally. The system dnsmasq service should remain **disabled** on Pi-hole nodes to avoid conflicts.
### Verification Commands
Check DNS resolution from cluster:
```bash
kubectl run dns-test --image=busybox:1.28 -it --rm --restart=Never -- \
nslookup charts.longhorn.io 192.168.1.201
# Check CoreDNS forward to both Pi-holes
kubectl get cm -n kube-system coredns -o yaml
# Check Pi-hole instances
ssh pi1 "dig @127.0.0.1 google.com +short"
ssh pi3 "dig @127.0.0.1 google.com +short"
```
## Related Incidents
- [2026-04-13-power-cut](../incidents/2026-04-13-power-cut/README.md) - Power cut caused DNS resolution failure, blocking Longhorn reinstall and Traefik recovery

View File

@@ -1,550 +0,0 @@
# ADR 20260414: Longhorn PVC Recovery When Reinstalled
---
## 📋 **Executive Summary**
After the April 13, 2026 power cut incident and subsequent cluster recovery, we discovered a **critical gap** in Longhorn volume restoration. While the **raw replica data files** (`volume-head-*.img`) remain intact on disk across all nodes, Longhorn cannot automatically **re-associate** them with new Volume CRDs due to its internal engine ID naming scheme. This document explains the problem and provides three recovery approaches.
---
---
## 🔍 **The Root Problem**
### **What Happened**
1. **Power cut** → Longhorn CSI driver lost connection
2. **Force-deletion of Longhorn pods** → Webhook circular dependency
3. **Nuclear cleanup** → All Longhorn CRDs (Volume, Engine, Replica) were deleted
4. **Reinstallation** → New Volume CRDs created with new engine IDs
### **Directory Structure Issue**
Longhorn stores replica data in directories named by **volume name + engine ID**:
```
/mnt/arcodange/longhorn/replicas/
├── pvc-cc8a3cbb-dbc2-47a2-a0cc-a02136122b90-cd16e459/ # ← OLD (orphaned)
│ ├── volume-head-002.img # ← Actual Traefik data (128Mi)
│ ├── volume-head-002.img.meta
│ └── volume-snap-*.img
├── pvc-cc8a3cbb-dbc2-47a2-a0cc-a02136122b90-8c7d8ab4/ # ← NEW (empty)
│ ├── volume-head-002.img # ← Empty 128Mi
│ └── volume-head-002.img.meta
└── ...
```
**The Problem:** When you recreate a Volume CRD, Longhorn generates a **new engine ID** (e.g., `8c7d8ab4`), creating a **new empty directory** instead of adopting the existing one (`cd16e459`).
### **Why This Matters**
| Component | Persistence | Recovery Path |
|-----------|-------------|---------------|
| **Replica `.img` files** | ✅ **Survives** on disk | Manual intervention required |
| **Volume CRD** | ❌ **Deleted** | Must recreate |
| **Engine/Replica CRDs** | ❌ **Deleted** | Auto-recreated by Longhorn |
| **Engine ID** | ❌ **Changes** | ** Cannot be recovered without backup ** |
**Without the original Volume CRD backup, Longhorn cannot match orphaned replica directories to new Volume CRDs.**
---
---
## 🎯 **Recovery Methods Comparison**
| Method | Complexity | Data Safety | Downtime | Best For |
|--------|------------|-------------|----------|----------|
| **[A: Manual `dd` Copy](#method-a-manual-dd-copy)** | ⭐⭐⭐⭐ | ✅✅✅✅ | Medium | Critical data, no app backup |
| **[B: Directory Rename](#method-b-directory-rename)** | ⭐⭐⭐ | ✅✅ | Low | Small volumes, no Rebuilding replicas |
| **[C: Fresh Volume + App Restore](#method-c-fresh-volume--app-restore)** | ⭐⭐ | ✅✅✅✅✅ | Low | Non-critical data, app backups exist |
| **[D: Block-Device Injection (Automated)](#method-d-block-device-injection-automated)** | ⭐⭐⭐ | ✅✅✅✅ | Medium | **Recommended — any volume, no dir swap needed** |
| **[E: Longhorn Google Storage Restore](#method-e-longhorn-google-storage-restore)** | ⭐⭐ | ✅✅✅✅✅ | Low | Volumes with Longhorn backup configured |
**Method B was proven risky** (2026-04-13 recovery): Longhorn reconciliation finds `Dirty: true`
metadata + a clean empty pi1 replica → silently rebuilds from the empty source, destroying data.
Use Method D for any volume larger than ~128Mi or with Rebuilding replicas.
---
---
## 🛠️ **Method A: Manual `dd` Copy**
### **Concept**
Manually copy the data from the orphaned `.img` file to the new replica directory that Longhorn created for the new Volume CRD.
### **Prerequisites**
- Root access to all nodes
- Volume CRD already recreated (with new engine ID)
- Longhorn has created new empty replica directories
- `dd` and `qemu-img` tools available
### **Steps**
```bash
# 1. Identify source (old data) and destination (new empty)
SOURCE_NODE=pi2
SOURCE_DIR=/mnt/arcodange/longhorn/replicas/pvc-cc8a3cbb-dbc2-47a2-a0cc-a02136122b90-cd16e459
SOURCE_IMG=$(ssh $SOURCE_NODE "ls $SOURCE_DIR/volume-head-*.img | head -1")
DEST_DIRS=(
pi1:/mnt/arcodange/longhorn/pvc-cc8a3cbb-dbc2-47a2-a0cc-a02136122b90-8c7d8ab4
pi2:/mnt/arcodange/longhorn/pvc-cc8a3cbb-dbc2-47a2-a0cc-a02136122b90-8c7d8ab4
pi3:/mnt/arcodange/longhorn/pvc-cc8a3cbb-dbc2-47a2-a0cc-a02136122b90-8c7d8ab4
)
# 2. Copy data to each node
for DEST in "${DEST_DIRS[@]}"; do
NODE=${DEST%%:*}
PATH=${DEST#*:}
ssh $NODE "sudo mkdir -p $PATH && sudo dd if=$SOURCE_IMG of=$PATH/volume-head-002.img bs=4M"
done
# 3. Restart Longhorn engine pods to pick up new data
kubectl delete pod -n longhorn-system -l longhorn.io/component=engine
# 4. Verify data is accessible
kubectl get volume -n longhorn-system pvc-cc8a3cbb-dbc2-47a2-a0cc-a02136122b90
# Should show: state=attached, robustness=healthy
```
### **Pros**
- ✅ Guaranteed data recovery
- ✅ Works for any volume size
- ✅ Preserves all snapshots and metadata
### **Cons**
- ⚠️ Requires manual intervention on each node
- ⚠️ Must know source and destination paths
- ⚠️ Risk of data corruption if `dd` fails mid-copy
- ⚠️ Volume must be in detached state during copy
### **Risk Mitigation**
- Verify checksums after copy: `sha256sum /path/to/image.img`
- Copy to one node at a time, verify between each
- Use `pv` for progress: `pv $SOURCE_IMG | ssh $NODE "sudo dd of=$PATH/volume-head-002.img bs=4M"`
---
---
## 🏷️ **Method B: Directory Rename**
### **Concept**
Rename the orphaned replica directory to match the **engine ID** that Longhorn expects for the new Volume CRD.
### **Prerequisites**
- Volume CRD already recreated
- Longhorn has created engine CRDs (check: `kubectl get engines -n longhorn-system`)
- Must act quickly before Longhorn initializes new empty replicas
### **Steps**
```bash
# 1. Find the new engine ID for the volume
ENGINE=$(kubectl get engines -n longhorn-system -l longhorn.io/volume=pvc-cc8a3cbb-dbc2-47a2-a0cc-a02136122b90 -o jsonpath='{.items[0].metadata.name}')
# Example: pvc-cc8a3cbb-dbc2-47a2-a0cc-a02136122b90-e-0
ENGINE_ID=${ENGINE#*-} # Extract suffix: e-0
# But the directory uses a different format...
# 2. Check actual directory names
kubectl get replicas -n longhorn-system | grep pvc-cc8a
# Output: pvc-cc8a3cbb-dbc2-47a2-a0cc-a02136122b90-r-8c7d8ab4
# 3. Rename on the node where orphaned data exists
NEW_DIR_SUFFIX=$(kubectl get replicas -n longhorn-system pvc-cc8a3cbb-dbc2-47a2-a0cc-a02136122b90-r-8c7d8ab4 -o jsonpath='{.metadata.labels.longhorn\.io/last-attached-node}')
ssh $NEW_DIR_SUFFIX "sudo mv /mnt/arcodange/longhorn/replicas/pvc-cc8a3cbb-dbc2-47a2-a0cc-a02136122b90-cd16e459 \
/mnt/arcodange/longhorn/replicas/pvc-cc8a3cbb-dbc2-47a2-a0cc-a02136122b90-8c7d8ab4"
# 4. Restart the replica pod
kubectl delete pod -n longhorn-system $(kubectl get pods -n longhorn-system -o jsonpath='{.items[?(@.metadata.labels.longhorn\.io/replica)=pvc-cc8a3cbb-dbc2-47a2-a0cc-a02136122b90].metadata.name}')
```
### **Pros**
- ✅ Fastest method
- ✅ No data copying required
- ✅ Preserves all existing data and snapshots
### **Cons**
- ⚠️ **High risk of mismatch** - wrong directory rename = data loss
- ⚠️ Must identify correct engine ID for each node
- ⚠️ Replica directories exist on multiple nodes - must rename on ALL
- ⚠️ Longhorn may have already initialized new empty replicas
### **Critical Warning**
**Each volume has replicas on ALL nodes.** You must:
1. Identify which node has which orphaned directory
2. Rename each to match the corresponding new engine's expected path
3. Ensure consistency across all nodes
**Example for pvc-cc8a:**
```bash
# Orphaned dirs:
# pi2: pvc-cc8a...-cd16e459
# pi3: pvc-cc8a...-011b54b3
# New engine paths (from kubectl get replicas):
# pi1: pvc-cc8a...-r-8c7d8ab4
# pi2: pvc-cc8a...-r-32aa3e1e
# pi3: pvc-cc8a...-r-3e84c460
# Must rename EACH orphaned dir to match new engine on SAME node
```
---
---
## 🆕 **Method C: Fresh Volume + App Restore** *(Recommended for Traefik)*
### **Concept**
1. Let Longhorn create a **new empty volume** for the PVC
2. Restore the **application data** (Traefik's `acme.json`) from application-level backups
### **Prerequisites**
- Application-level backup exists (e.g., Traefik config, certificates)
- Data is non-critical or easily restorable
- Storage requirements are small (128Mi for Traefik)
### **Steps**
```bash
# 1. Delete the problematic Volume CRD (if any)
kubectl delete volume -n longhorn-system pvc-cc8a3cbb-dbc2-47a2-a0cc-a02136122b90 --ignore-not-found
# 2. Delete the PVC
kubectl delete pvc -n kube-system traefik
# 3. Let StorageClass provision a fresh volume
kubectl apply -f - <<EOF
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: traefik
namespace: kube-system
spec:
accessModes: [ReadWriteOnce]
resources: {requests: {storage: 128Mi}}
storageClassName: longhorn
volumeMode: Filesystem
EOF
# 4. Wait for PV to be provisioned
kubectl wait --for=jsonpath='{.status.phase}'=Bound pvc -n kube-system traefik
# 5. Restore Traefik data from backup
BACKUP_FILE="/path/to/traefik-backup/acme.json"
kubectl cp $BACKUP_FILE kube-system/traefik-XXXXXX-XXXX:/data/acme.json
kubectl exec -n kube-system traefik-XXXXXX-XXXX -- chown 65532:65532 /data/acme.json
kubectl exec -n kube-system traefik-XXXXXX-XXXX -- chmod 600 /data/acme.json
```
### **Traefik-Specific Recovery**
For Traefik, the critical data is:
- `/data/acme.json` - TLS certificates obtained from Let's Encrypt
- `/data/tls.yml` - (if used)
- Secrets in Kubernetes (separate from PVC)
**Backup locations to check:**
```bash
# Check if we have Traefik data backups
ssh pi1 "ls -la /home/pi/arcodange/backups/traefik/ 2>/dev/null || echo 'No backup found'"
# Check ArgoCD apps (if Traefik was deployed via GitOps)
kubectl get app -n argocd | grep traefik
```
### **Pros**
-**Simplest and safest** method
- ✅ No risk of Longhorn directory mismatches
- ✅ Works even without Longhorn CRD backups
- ✅ Verifiable - you can confirm data was restored
- ✅ Clean state - no orphaned directories
### **Cons**
- ⚠️ Requires application-level backups
- ⚠️ TLS certificates may have expired (need to re-issue)
---
---
## 🏆 **Recommendation: Method C for Traefik**
### **Why Method C is Best for This Case**
| Factor | Assessment |
|--------|------------|
| **Volume Size** | 128Mi (small) |
| **Data Criticality** | TLS certs can be re-generated |
| **Backup Availability** | Likely exists in ArgoCD/Git |
| **Complexity** | Low |
| **Risk** | Minimal |
| **Time Required** | ~5 minutes |
### **Data Loss Assessment for Traefik**
The **worst case** (no Traefik backup):
- TLS certificates will be **re-issued** automatically by cert-manager + Let's Encrypt
- No permanent data loss - certificates are ephemeral
- Client impact: Brief TLS warning during re-issuance (~1-2 minutes)
**Verdict:** 🟢 **Method C is the safest and most practical approach.**
---
## 🔧 **Prevention: What We Must Fix**
### **1. Update Backup Playbook** (`playbooks/backup/k3s_pvc.yml`) ✅ Done 2026-04-16
`backup_cmd` now captures:
1. All PersistentVolumes (PV)
2. All PersistentVolumeClaims (PVC)
3. **All Longhorn Volumes** (critical — enables fast restore via `kubectl apply` instead of block-device injection)
4. All Longhorn Settings (backup target configuration)
### **2. Test Backups Regularly**
```bash
# Monthly test: Restore a non-critical volume
# Pick a test volume, delete it, restore from backup
kubectl delete volume -n longhorn-system <test-volume>
kubectl apply -f <backup-file>
kubectl get volume -n longhorn-system <test-volume> -w
```
### **3. Validate Backup Files**
```bash
# Check backup contains Longhorn resources
grep "longhorn.io/v1beta2" /path/to/backup-*.volumes
grep "kind: Volume" /path/to/backup-*.volumes
```
### **4. Document Recovery Procedure**
- [ ] Create `docs/admin/longhorn-recovery.md` with these steps
- [ ] Add to team runbook
- [ ] Include in incident response training
---
## 📊 **Test Scenario: Battle Testing PVC Recovery**
### **Test Setup**
```bash
# 1. Create a test namespace
kubectl create ns longhorn-test
# 2. Create a test PVC
kubectl apply -f - <<EOF
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: test-longhorn-recovery
namespace: longhorn-test
labels:
purpose: test
spec:
accessModes: [ReadWriteOnce]
resources: {requests: {storage: 1Gi}}
storageClassName: longhorn
EOF
# 3. Deploy a test pod to write data
kubectl apply -f - <<EOF
apiVersion: v1
kind: Pod
metadata:
name: test-writer
namespace: longhorn-test
spec:
containers:
- name: writer
image: alpine
command: [sh, -c, "echo 'test data for recovery' > /data/testfile.txt && echo 'more data' >> /data/testfile.txt && tail -f /dev/null"]
volumeMounts:
- name: data
mountPath: /data
volumes:
- name: data
persistentVolumeClaim:
claimName: test-longhorn-recovery
EOF
# 4. Write and verify data
kubectl exec -n longhorn-test test-writer -- cat /data/testfile.txt
# Should show: "test data for recovery\nmore data"
# 5. Backup everything
kubectl get -A pv,pvc -o yaml > /tmp/test-backup-pv-pvc.yaml
kubectl get -A volumes.longhorn.io -o yaml >> /tmp/test-backup-pv-pvc.yaml
echo '---' >> /tmp/test-backup-pv-pvc.yaml
kubectl get -A settings.longhorn.io -o yaml >> /tmp/test-backup-pv-pvc.yaml
```
### **Test Execution: Simulate Disaster**
```bash
# 6. Simulate disaster - delete everything
kubectl delete pvc -n longhorn-test test-longhorn-recovery
kubectl delete pod -n longhorn-test test-writer
kubectl delete volume -n longhorn-system pvc-$(kubectl get pvc -n longhorn-test test-longhorn-recovery -o jsonpath='{.spec.volumeName}')
# 7. Restore from backup
kubectl apply -f /tmp/test-backup-pv-pvc.yaml
# 8. Verify recovery
kubectl get pvc -n longhorn-test test-longhorn-recovery
kubectl get volumes -n longhorn-system | grep test-longhorn-recovery
# 9. Deploy test reader pod
kubectl apply -f - <<EOF
apiVersion: v1
kind: Pod
metadata:
name: test-reader
namespace: longhorn-test
spec:
containers:
- name: reader
image: alpine
command: [sh, -c, "cat /data/testfile.txt && tail -f /dev/null"]
volumeMounts:
- name: data
mountPath: /data
volumes:
- name: data
persistentVolumeClaim:
claimName: test-longhorn-recovery
EOF
# 10. Check if data is recovered
kubectl logs -n longhorn-test test-reader
# Should show: "test data for recovery\nmore data"
```
### **Expected Results**
| Test Step | Pass Criteria |
|-----------|---------------|
| Volume CRD restored | `kubectl get volumes` shows the test volume |
| PVC bound | `kubectl get pvc` shows status=Bound |
| Data accessible | Test reader pod shows original data |
### **Test Cleanup**
```bash
kubectl delete ns longhorn-test
```
---
---
---
## 🛠️ **Method D: Block-Device Injection (Automated)**
### **Concept**
Bypass Longhorn's replica reconciliation entirely. Create a fresh Volume CRD, attach it in
maintenance mode, then inject the recovered filesystem directly into the live block device via
`rsync`. The old replica dirs are never renamed or touched — the data is copied into the new
Longhorn-managed volume.
### **Implementation**
See `playbooks/recover/longhorn_data.yml` — a 9-phase Ansible playbook that automates the full
sequence for one or more volumes in a single run.
### **Key Steps**
```
Phase 0: Auto-discover best replica dir (skip Rebuilding:true, rank by actual disk usage)
Phase 1: Backup untouched replica dir
Phase 2: Merge sparse snapshot+head layers → single flat image (merge-longhorn-layers.py)
Phase 3: Create Longhorn Volume CRD, wait for replicas
Phase 4: Scale down workload
Phase 5: Attach via VolumeAttachment maintenance ticket
Phase 6: mkfs.ext4 + mount + rsync from merged image
Phase 7: Remove maintenance ticket
Phase 8: Recreate PV (Retain, no claimRef) + PVC (volumeName pinned)
Phase 9: Scale up, wait readyReplicas ≥ 1
```
### **Usage**
```bash
ansible-playbook -i inventory/hosts.yml playbooks/recover/longhorn_data.yml \
-e @playbooks/recover/longhorn_data_vars.yml
```
Vars file format:
```yaml
longhorn_recovery_volumes:
- pv_name: pvc-abc123
pvc_name: myapp-data
namespace: myapp
size_bytes: "134217728"
size_human: 128Mi
access_mode: ReadWriteOnce
workload_kind: Deployment
workload_name: myapp
# source_node and source_dir are auto-discovered if omitted
verify_cmd: ""
```
### **Pros**
- ✅ Fully automated — handles all phases including PV/PVC recreation
- ✅ Auto-discovers best replica (skips Rebuilding dirs)
- ✅ Idempotent — safe to re-run (skips backup/merge if already done)
- ✅ Works for RWO and RWX volumes
### **Cons**
- ⚠️ Requires ~2× volume size in temporary disk space for merged image
- ⚠️ The new volume has 3 fresh replicas (not the original topology) — Longhorn will resync
---
---
## 🗄️ **Method E: Longhorn Google Storage Restore**
### **Concept**
Some volumes are configured with Longhorn's built-in backup feature targeting a Google Storage
bucket. For those volumes, a Longhorn backup can be restored into a new volume without needing
the raw replica files.
### **Applicable Volumes**
- `backups-rwx` (`pvc-efda1d2f`) — the cluster backup volume itself has a Longhorn GCS backup configured
### **When to use**
Use when:
- The local replica dirs are missing or corrupted (Method D cannot be used)
- A clean point-in-time restore is preferred over a raw replica merge
### **Status**
A playbook for this method (`playbooks/recover/longhorn_gcs_restore.yml`) is **planned but not
yet implemented**. In the 2026-04-13 incident, `backups-rwx` was successfully recovered via
Method D (local replica merge), so Method E was not needed.
When the playbook is implemented, it will use `kubectl apply` of a `BackupVolume` + `Backup`
restore CR pointing to the GCS bucket configured in Longhorn settings.
---
---
## 📚 **References**
- [Longhorn Documentation: Disaster Recovery](https://longhorn.io/docs/1.6.0/deploy/uninstall/disaster-recovery/)
- [Longhorn Volume CRD Spec](https://github.com/longhorn/longhorn/blob/master/types/types.go)
- [Original Issue: Longhorn GitHub #4837](https://github.com/longhorn/longhorn/issues/4837) (Replica orphan handling)
- [Related ADR: Internal DNS Architecture](./20260414-internal-dns-architecture.md)
- [Related Incident: 2026-04-13 Power Cut](../incidents/2026-04-13-power-cut/README.md)
---
---
*Document created: 2026-04-14*
*Last updated: 2026-04-15*
*Status: Method D (block-device injection) implemented and battle-tested on 5 volumes (2026-04-14/15)*

View File

@@ -1,420 +0,0 @@
---
title: Power Cut - Longhorn Storage System Failure
incident_id: 2026-04-13-001
date: 2026-04-13
time_start: 15:23:57 UTC
time_end: "2026-04-15 (ongoing — Vault/ERP manual recovery deferred)"
status: Mostly Resolved
severity: SEV-1
tags:
- kubernetes
- longhorn
- storage
- k3s
- power-cut
- csi-driver
- block-device-recovery
---
# Power Cut - Longhorn Storage System Failure
## Summary
A power cut caused a cascading failure of the Longhorn distributed storage system in the k3s cluster. The Longhorn CSI driver (`driver.longhorn.io`) lost its registration with kubelet, preventing all Persistent Volume Claims (PVCs) from mounting. This affected ~43 pods across 12 namespaces, including critical infrastructure like Traefik ingress controller, application pods, and monitoring tools.
The actual volume data stored in Longhorn replicas at `/mnt/arcodange/longhorn/replicas/` on each node **remains intact**. Recovery efforts are focused on restoring CSI driver registration and Longhorn manager functionality.
## Impact
### Affected Services
- **Critical**: Longhorn storage system (all CSI components)
- **Critical**: Traefik ingress controller (cannot mount PVC)
- **High**: Application pods using Longhorn PVCs (cms, webapp, erp, clickhouse, etc.)
- **High**: Tool pods (grafana, prometheus, hashicorp-vault, redis, crowdsec)
- **Medium**: Docker storage corruption on nodes (overlay2)
- **Low**: NFS backup mount unavailable
### User Impact
- External access to services via Traefik: **DOWN**
- Gitea registry image pulls: **FAILING**
- Persistent data access: **DEGRADED** (data exists but inaccessible)
- Monitoring dashboards: **DOWN**
### Metrics
- **Failed Pods**: 43 pods in error state (CrashLoopBackOff, Error, ImagePullBackOff)
- **Healthy Pods**: ~37 pods running
- **Longhorn Pods**: 25 total, ~12 currently healthy
- **Nodes**: 3/3 Ready (pi1 control-plane, pi2, pi3)
## Component Roles
### Longhorn Components
| Component | Role | Current Status | Importance |
|-----------|------|----------------|------------|
| **longhorn-manager** | Orchestrates Longhorn volumes, handles volume operations | 2/3 running, 1 partial | CRITICAL |
| **longhorn-driver-deployer** | Deploys the CSI driver to nodes | Init:0/1 (BLOCKED) | CRITICAL |
| **longhorn-csi-plugin** | CSI plugin daemonset - handles node-level CSI operations | 0/3 Error | CRITICAL |
| **csi-attacher** | Handles volume attachment to nodes | 2/3 running, 1 Error | CRITICAL |
| **csi-provisioner** | Creates volumes from PVC requests | 2/3 running, 1 Error | CRITICAL |
| **csi-resizer** | Handles volume resizing | 1/3 running, 2 Error | HIGH |
| **csi-snapshotter** | Handles volume snapshots | 2/3 running, 1 Error | MEDIUM |
| **engine-image** | Pulls and manages engine binaries | 3/3 Running | HIGH |
| **longhorn-ui** | Web UI for Longhorn management | 0/2 CrashLoopBackOff | Medium |
| **rwx-nfs** | NFS server for backup volume | 0/1 ContainerCreating | Medium |
| **share-manager** | Manages NFS shares for volumes | 0/2 Error | MEDIUM |
### Other Affected Components
| Component | Role | Dependencies | Status |
|-----------|------|--------------|--------|
| **Traefik** | Ingress controller, routes external traffic | Requires PVC for certs | Error (cannot mount PVC) |
| **coredns** | Cluster DNS | Docker storage | Crashing (overlay2 corruption) |
| **svclb-traefik** | Service load balancer for Traefik | Docker storage | Crashing (overlay2 corruption) |
| **Application Pods** | Various services (cms, webapp, erp, etc.) | Longhorn PVCs | Error/ImagePullBackOff |
## Timeline
| Time (UTC) | Event | Owner | Notes |
|------------|-------|-------|-------|
| ~15:23 | Power cut occurred | - | Cluster lost power |
| 15:23:57 | Incident detection started | Mistral Vibe | Initial assessment began |
| 15:24:05 | Baseline documented | Mistral Vibe | 43 pods in error, Longhorn down |
| 15:24:10 | Root cause identified | Mistral Vibe | CSI driver `driver.longhorn.io` not registered |
| 15:24:30 | Recovery plan formulated | Mistral Vibe | HelmChart manifest touch, then pod deletion |
| 15:24:50 | Step 1: Touch longhorn-install.yaml | Mistral Vibe | Manifest timestamp updated on pi1 |
| 15:25:50 | Step 1 outcome: Insufficient | Mistral Vibe | Only 1 pod affected, CSI still down |
| 15:32:15 | Step 2: Delete all longhorn-system pods | Mistral Vibe | Force deleted 24 pods — created webhook circular dependency |
| 15:32:30 | Step 2 outcome: Partial recovery | Mistral Vibe | Managers recovering, CSI still failing |
| 16:15:00 | Root cause 2 identified | Mistral Vibe | Webhook circular dependency — decided nuclear cleanup |
| 16:30:00 | Backups secured | Mistral Vibe | PV/PVC and Longhorn CRDs backed up to pi1 |
| 16:35:00 | Backup script bug fixed | Claude Code | `backup_cmd` fixed to produce valid YAML |
| 17:00:00 | Nuclear cleanup executed | Claude Code | Removed all Longhorn CRDs, PVC finalizers, restarted k3s |
| 17:08:00 | Longhorn namespace deleted | Claude Code | Clean slate confirmed |
| 17:09:00 | Longhorn reinstall started | Claude Code | `playbooks/recover/longhorn.yml` run on pi1 |
| 17:30:00 | Docker config corruption found | Claude Code | daemon.json had Python string not JSON |
| 17:35:00 | Docker config fixed | Claude Code | Valid JSON deployed to all nodes |
| 17:50:00 | DNS failure identified | Claude Code | CoreDNS cannot resolve external domains |
| ~19:00 | DNS fixed | Claude Code | Pi-hole dnsmasq group + CoreDNS upstream config |
| ~19:30 | Longhorn reinstall completed | Claude Code | All Longhorn pods Running, CSI registered |
| 2026-04-14 00:00 | PVC recovery work started | Claude Code | Block-device recovery approach developed |
| 2026-04-14 | Traefik recovered | Claude Code | Simple PV recreation (no data loss for certs) |
| 2026-04-14 | url-shortener recovered | Claude Code | Method B (dir rename) + PV/PVC recreate |
| 2026-04-14 | Block-device recovery developed | Claude Code | `merge-longhorn-layers.py` + 9-phase playbook |
| 2026-04-14 | Clickhouse recovered | Claude Code | `longhorn_data.yml` playbook — first automated run |
| 2026-04-15 | Automated recovery for 4 volumes | Claude Code | prometheus, alertmanager, redis, backups-rwx |
| 2026-04-15 | Vault/ERP recovery deferred | - | Too sensitive for automated approach, manual later |
## Root Cause Analysis
### Primary Root Cause
**Power cut caused Longhorn CSI driver registration to be lost.**
The Longhorn CSI driver (`driver.longhorn.io`) is registered with the kubelet on each node. When the power cut occurred:
1. K3s/kubelet processes crashed
2. Longhorn manager pods crashed
3. CSI driver registration was lost
4. On restart, Longhorn pods attempted to restart but:
- The `longhorn-driver-deployer` pod has an init container (`wait-longhorn-manager`) that waits for managers to be ready
- Longhorn managers were slow to recover (some still in CrashLoopBackOff)
- CSI pods (attacher, provisioner, resizer, snapshotter) cannot start without the CSI socket at `/var/lib/kubelet/plugins/driver.longhorn.io/csi.sock`
- Custom Resource Definitions (Volumes, Replicas, etc.) exist but CSI driver cannot communicate with them
### Secondary Issues
1. **Docker overlay2 corruption**: Docker storage at `/mnt/arcodange/docker/overlay2/` was corrupted on at least pi1, affecting coredns and svclb-traefik pods
2. **NFS backup mount unavailable**: The Longhorn share-manager pod (which exports NFS) is in Error state, making `/mnt/backups/` inaccessible
3. **Backup scripts bug**: The `backup.volumes` file at `/opt/k3s_volumes/backup.volumes` is empty due to a script formatting bug
### Failure Propagation
```mermaid
%%{init: { 'theme': 'forest' }}%%
graph TD
A[Power Cut] --> B[Kubelet Crashes]
A --> C[Docker Daemon Crashes]
B --> D[Longhorn Manager Pods Crash]
B --> E[CSI Driver Registration Lost]
C --> F[Overlay2 Filesystem Corrupt]
D --> G[Driver-Deployer Init Container Waits]
E --> H[CSI Socket Disappears]
G --> I[CSI Driver Not Deployed]
H --> J[CSI Pods Cannot Start]
I --> J
J --> K[PVC Mounts Fail]
K --> L[Application Pods Crash]
F --> M[Docker Containers Fail to Start]
M --> N[CoreDNS Crashes]
M --> O[Service Load Balancers Crash]
N --> P[DNS Resolution Fails]
O --> P
P --> L
K --> L
```
### Why Data Is Safe
The Longhorn volume data is stored in replicas across all three nodes at `/mnt/arcodange/longhorn/replicas/`. Checking the Longhorn volumes shows:
```
All 12 volumes: state="attached", robustness="healthy"
```
This confirms that:
1. Volume metadata is intact in etcd
2. Replica data is intact on disk
3. Once CSI driver is restored, volumes will be accessible again
4. **No permanent data loss has occurred**
## Recovery Actions Taken
### Attempt 1: HelmChart Manifest Touch (15:24:50 - 15:25:50)
**Action:** Touched `/var/lib/rancher/k3s/server/manifests/longhorn-install.yaml` on pi1
**Command:**
```bash
ssh pi@pi1 "sudo touch /var/lib/rancher/k3s/server/manifests/longhorn-install.yaml"
```
**Outcome:** Only triggered reconcile for 1 pod (longhorn-manager-w85v6). CSI driver still not registered.
**Decision:** Insufficient. Need more aggressive approach.
### Attempt 2: Force Delete All Longhorn Pods (15:32:15 - Present)
**Action:** Force deleted all 24 pods in longhorn-system namespace
**Command:**
```bash
kubectl delete pods -n longhorn-system --all --force --grace-period=0
```
**Outcome:**
- HelmChart controller detected changes and recreated all pods
- **Success**: 23/25 pods now in Running state (15:34:30)
- **Blocking**: `longhorn-driver-deployer` stuck in Init:0/1
- **Blocking**: All `longhorn-csi-plugin` pods in Error
- **Investigation**: driver-deployer's `wait-longhorn-manager` init container waiting for manager readiness
### Current Investigation (15:34:30)
**Focus:** Why driver-deployer is stuck in Init state
The `longhorn-driver-deployer` pod has an init container that waits for Longhorn manager to be ready before deploying the CSI driver. Despite 3 manager pods running, the wait condition is not being met.
**Hypotheses:**
1. Manager pods are not fully healthy (readiness probes failing)
2. Network connectivity between driver-deployer and managers
3. RBAC or service account permissions issue
4. Configuration mismatch in HelmChart values
## Current Status (2026-04-15)
### Longhorn System
- **All Longhorn pods**: Running ✅ (reinstalled 2026-04-13)
- **CSI driver**: Registered ✅
### Volume Recovery Status
| PVC | Namespace | Size | Status |
|-----|-----------|------|--------|
| `traefik` (kube-system) | kube-system | 128Mi | ✅ Recovered (2026-04-14) |
| `url-shortener-data` | url-shortener | 128Mi | ✅ Recovered (2026-04-14) |
| `clickhouse-storage-clickhouse-0` | tools | 16Gi | ✅ Recovered (2026-04-14) |
| `prometheus-server` | tools | 8Gi | ⏳ In progress (2026-04-15) |
| `storage-prometheus-alertmanager-0` | tools | 2Gi | ⏳ In progress (2026-04-15) |
| `redis-storage-redis-0` | tools | 1Gi | ⏳ In progress (2026-04-15) |
| `backups-rwx` | longhorn-system | 50Gi | ⏳ In progress (2026-04-15) |
| `data-hashicorp-vault-0` | tools | 10Gi | 🔴 Deferred — manual recovery |
| `audit-hashicorp-vault-0` | tools | 10Gi | 🔴 Deferred — manual recovery |
| `erp` | erp | 50Gi | 🔴 Deferred — manual recovery |
## Next Steps
### Immediate
1. Confirm prometheus, alertmanager, redis, backups-rwx fully recovered via `longhorn_data.yml`
2. Verify monitoring stack (Grafana dashboards, alert routing) is functional
### Short-term
3. Manual recovery of Vault (`data-hashicorp-vault-0`, `audit-hashicorp-vault-0`) — see Vault runbook
4. Manual recovery of ERP (`erp`) — coordinate with application owner
5. Update backup playbook to include Longhorn Volume CRDs (see ADR 20260414-longhorn-pvc-recovery)
6. Prepare Longhorn Google Storage restore playbook for `backups-rwx` alternative recovery path
### Long-term
- Implement UPS for the Raspberry Pi cluster
- Add Longhorn volume health monitoring to Grafana
- Regular backup restore drills
## Architecture Context
```mermaid
%%{init: { 'theme': 'forest' }}%%
flowchart TB
subgraph K3s Control Plane
A[pi1: Control Plane] -->|runs| B[kubelet]
B --> C[k3s server]
C --> D[HelmChart Controller]
end
subgraph Storage Layer
E[Longhorn HelmChart] --> F[Longhorn Manager Pods]
F --> G[Driver Deployer]
G --> H[CSI Driver Registration]
H --> I[CSI Socket: /var/lib/kubelet/plugins/driver.longhorn.io/csi.sock]
F --> J[Longhorn Volumes]
J --> K[Replicas on all 3 nodes]
end
subgraph CSI Components
H --> L[csi-attacher Pods]
H --> M[csi-provisioner Pods]
H --> N[csi-resizer Pods]
H --> O[csi-snapshotter Pods]
H --> P[csi-plugin DaemonSet]
end
subgraph Data Path
I --> Q[/mnt/arcodange/longhorn/]
Q --> R[replicas/]
end
subgraph Docker Storage
S[Docker Daemon] --> T[/mnt/arcodange/docker/]
T --> U[overlay2/]
end
L -->|mounts volumes| V[Application Pods]
M -->|creates volumes| J
P -->|node-level ops| I
classDef critical fill:#c00,color:#fff,stroke:#000
classDef healthy fill:#0a0,color:#000,stroke:#000
classDef degraded fill:#ff0,color:#000,stroke:#000
class H,L,M,N,O,P critical
class F,G,E degraded
class I,J,Q,R,U healthy
```
## Component Details
### Longhorn Manager
- **Role**: Primary controller for Longhorn, manages volumes, replicas, snapshots
- **Image**: `longhornio/longhorn-manager:v1.9.1`
- **Ports**: 9500 (manager), 9501 (webhook health), 9502 (metrics)
- **Data Path**: `/mnt/arcodange/longhorn` (configured in HelmChart values)
- **Health Check**: `https://<pod-ip>:9501/v1/healthz`
### Longhorn Driver Deployer
- **Role**: Deploys the CSI driver to each node
- **Image**: `longhornio/longhorn-manager:v1.9.1`
- **Init Container**: `wait-longhorn-manager` - waits for manager to be ready
- **Blocker**: Currently stuck in init, preventing CSI driver deployment
### CSI Driver
- **Role**: Implements the CSI (Container Storage Interface) specification for Longhorn
- **Socket**: `/var/lib/kubelet/plugins/driver.longhorn.io/csi.sock`
- **Registration**: Must be registered with kubelet via CSINode
- **Images**:
- `longhornio/csi-attacher:v4.9.0-20250709`
- `longhornio/csi-provisioner:v5.3.0-20250709`
- `longhornio/csi-resizer:v1.14.0-20250709`
- `longhornio/csi-snapshotter:v8.3.0-20250709`
- `longhornio/csi-node-driver-registrar:v2.14.0-20250709`
### CSI Node Driver Registrar
- **Role**: Registers the CSI driver with kubelet
- **Image**: `longhornio/csi-node-driver-registrar:v2.14.0-20250709`
- **Mechanism**: Creates a `CSINode` resource and registers via kubelet plugin registry
## Action Items
### Immediate (resolved)
- [x] Investigate and resolve driver-deployer init container blocker
- [x] Restore CSI driver registration
- [x] Fix Docker overlay2 corruption / daemon.json on all nodes
- [x] Fix DNS (CoreDNS + Pi-hole dnsmasq config)
- [x] Longhorn reinstalled and healthy
- [x] Traefik ingress controller functional
- [x] Fix backup script (empty backup.volumes bug)
### Short-term (resolved)
- [x] url-shortener data recovered
- [x] Clickhouse data recovered
- [x] Develop automated block-device recovery playbook (`playbooks/recover/longhorn_data.yml`)
- [x] Backup restore procedure documented and tested
### Medium-term (in progress)
- [ ] prometheus, alertmanager, redis, backups-rwx recovered (playbook running 2026-04-15)
- [ ] Vault manual recovery
- [ ] ERP manual recovery
- [ ] Update backup playbook to include Longhorn Volume CRDs
- [ ] Prepare Longhorn Google Storage restore playbook
### Long-term
- [ ] Implement UPS for Raspberry Pi cluster
- [ ] Add Longhorn volume health monitoring to Grafana
- [ ] Add CSI socket health check to monitoring
- [ ] Regular backup restore drills (monthly)
## Lessons Learned
### What Went Well
- Quick identification of root cause (CSI driver registration)
- Longhorn volume data remained intact (good replica design)
- Ability to force-pod-delete triggered partial recovery
- K3s HelmChart approach allows easy manifest-based recovery
### What Could Be Improved
- Need better CSI driver health monitoring and alerting
- Longhorn driver-deployer init container timeout may be too short
- Docker overlay2 on external storage needs better corruption recovery
- Backup script has bugs that prevent reliable backups
- No UPS protection for power cuts
### Technical Debt Identified
- Backup script formatting bug (extra newlines create invalid YAML)
- No automated Longhorn health checks
- Manual intervention required for CSI driver recovery
## Related Files
- **Ansible Playbook**: `playbooks/system/k3s_config.yml` (Longhorn HelmChart creation)
- **HelmChart Manifest**: `/var/lib/rancher/k3s/server/manifests/longhorn-install.yaml` on pi1
- **Backup Scripts**: `/opt/k3s_volumes/backup.sh` and `/opt/k3s_volumes/restore.sh` on pi1
- **Inventory**: `inventory/hosts.yml` (required for all playbooks)
## Commands Reference
### Check Longhorn Status
```bash
kubectl get pods -n longhorn-system
kubectl get volumes -n longhorn-system
kubectl get replicas -n longhorn-system
kubectl get settings -n longhorn-system
```
### Force Longhorn Recovery (k3s-specific)
```bash
# Method 1: Touch manifest (soft reconcile)
sudo touch /var/lib/rancher/k3s/server/manifests/longhorn-install.yaml
# Method 2: Delete all pods (force recreate)
kubectl delete pods -n longhorn-system --all --force --grace-period=0
# Method 3: Delete specific pod
kubectl delete pod -n longhorn-system longhorn-driver-deployer-*
```
### Check CSI Driver Registration
```bash
kubectl get csidriver
kubectl get csinodes
kubectl describe csidriver driver.longhorn.io
```
### Check Longhorn Manufacturer
```bash
kubectl describe cm -n longhorn-system longhorn-storageclass
```

View File

@@ -1,209 +0,0 @@
%%{init: { 'theme': 'forest', 'themeVariables': {
'primaryColor': '#1e293b',
'primaryTextColor': '#f8fafc',
'lineColor': '#334155',
'secondaryColor': '#475569',
'tertiaryColor': '#94a3b8',
'edgeLabelBackground':'#fff',
'edgeLabelColor': '#1e293b'
}}}%%
flowchart TD
subgraph Cluster["K3s Cluster (v1.34.3+k3s1)"]
direction TB
subgraph Nodes["Physical Nodes"]
pi1["pi1: 192.168.1.201\nControl Plane"]
pi2["pi2: 192.168.1.202\nWorker"]
pi3["pi3: 192.168.1.203\nWorker"]
end
subgraph K3sComponents["K3s Control Plane Components"]
kubelet1["kubelet"]
kubelet2["kubelet"]
kubelet3["kubelet"]
k3s_server["k3s server"]
helm_controller["HelmChart Controller"]
end
pi1 --> kubelet1
pi2 --> kubelet2
pi3 --> kubelet3
pi1 --> k3s_server
k3s_server --> helm_controller
end
subgraph LonghornStorage["Longhorn Storage System"]
direction TB
subgraph HelmChart["HelmChart Installation"]
manifest[("longhorn-install.yaml")]
end
subgraph Manager["Longhorn Manager layer"]
lh_manager1["longhorn-manager-r6sd2\n2/2 Running\npi2"]
lh_manager2["longhorn-manager-sjc56\n1/2 Running\npi3"]
lh_manager3["longhorn-manager-t9b45\n1/2 Running\npi1"]
webhook["Webhook Leader: pi2"]
end
subgraph DriverDeployer["CSI Driver Deployer"]
deployer["longhorn-driver-deployer\n0/1 Init:0/1\npi3"]
wait_container["wait-longhorn-manager\nwaiting..."]
end
subgraph CSIDriver["CSI Driver Components"]
csi_socket[("/var/lib/kubelet/plugins/driver.longhorn.io/csi.sock")]
csi_registrar["CSI Node Driver Registrar"]
end
subgraph CSIContainers["CSI Containers (Sidecars)"]
attacher1["csi-attacher-54ld9\n1/1 Running\npi2"]
attacher2["csi-attacher-dqq9v\n1/1 Running\npi3"]
attacher3["csi-attacher-k5jmx\n0/1 Error\npi1"]
provisioner1["csi-provisioner-9z79d\n0/1 Error\npi2"]
provisioner2["csi-provisioner-zjwdr\n1/1 Running\npi1"]
provisioner3["csi-provisioner-zk5kp\n1/1 Running\npi3"]
resizer1["csi-resizer-8mrld\n1/1 Running\npi3"]
resizer2["csi-resizer-ddhl2\n0/1 Error\npi1"]
resizer3["csi-resizer-qv5n9\n0/1 Error\npi2"]
snapshotter1["csi-snapshotter-9rzf4\n1/1 Running\npi3"]
snapshotter2["csi-snapshotter-bqdtd\n0/1 Error\npi2"]
snapshotter3["csi-snapshotter-jv6pj\n1/1 Running\npi1"]
end
subgraph CSIPlugin["CSI Plugin DaemonSet"]
plugin1["longhorn-csi-plugin-f44jp\n0/3 Error\npi3"]
plugin2["longhorn-csi-plugin-q2sgh\n1/3 Error\npi1"]
plugin3["longhorn-csi-plugin-vzld8\n2/3 Error\npi2"]
end
subgraph DataLayer["Longhorn Data Layer"]
engine1["engine-image-ei-8ktd9\n1/1 Running\npi1"]
engine2["engine-image-ei-dcjq8\n1/1 Running\npi3"]
engine3["engine-image-ei-m76jf\n1/1 Running\npi2"]
volumes[("12 Longhorn Volumes")]
replicas[("/mnt/arcodange/longhorn/replicas/")]
end
subgraph UIAndTools["UI & Backup"]
ui1["longhorn-ui-8gb4s\n0/1 CrashLoop\npi1"]
ui2["longhorn-ui-hmxz6\n0/1 CrashLoop\npi3"]
share_mgr1["share-manager-...70b4\n0/1 Error\npi1"]
share_mgr2["share-manager-...7ffa\n0/1 Error\npi3"]
nfs["rwx-nfs-4cn9h\n0/1 ContainerCreating\npi3"]
end
manifest --> lh_manager1 & lh_manager2 & lh_manager3
helm_controller --> manifest
lh_manager1 & lh_manager2 & lh_manager3 --> webhook
deployer --> wait_container
wait_container -.->|waits for| lh_manager1 & lh_manager2 & lh_manager3
deployer --> csi_registrar
csi_registrar --> csi_socket
csi_socket --> kubelet1
csi_socket --> kubelet2
csi_socket --> kubelet3
attacher1 & attacher2 & attacher3 --> csi_socket
provisioner1 & provisioner2 & provisioner3 --> csi_socket
resizer1 & resizer2 & resizer3 --> csi_socket
snapshotter1 & snapshotter2 & snapshotter3 --> csi_socket
plugin1 & plugin2 & plugin3 --> csi_socket
lh_manager1 & lh_manager2 & lh_manager3 --> volumes
volumes --> replicas
replicas --> pi1_disk[("pi1: /mnt/arcodange/longhorn")]
replicas --> pi2_disk[("pi2: /mnt/arcodange/longhorn")]
replicas --> pi3_disk[("pi3: /mnt/arcodange/longhorn")]
share_mgr1 & share_mgr2 --> nfs
nfs --> backup_pvc[("PVC: backups-rwx\n50Gi")]
end
subgraph DockerStorage["Docker Storage layer"]
docker1["Docker daemon\npi1"]
docker2["Docker daemon\npi2"]
docker3["Docker daemon\npi3"]
storage1[("/mnt/arcodange/docker/overlay2/")]
docker1 --> storage1
docker2 --> storage1
docker3 --> storage1
end
subgraph ApplicationLayer["Application Pods (Affected)"]
traefik["traefik-5c67cb6889-8b5nk\n0/1 Error\nkube-system"]
cms["cms-arcodange-cms-...\n0/1 ImagePullBackOff\ncms"]
webapp["webapp-6588455979-...\n0/1 ImagePullBackOff\nwebapp"]
erp["erp-648748b4f5-bntd9\n0/1 Error\nerp"]
grafana["grafana-5d496f9668-...\n0/3 Error\ntools"]
vault["hashicorp-vault-0\n0/1 Error\ntools"]
end
subgraph NetworkServices["Network Services"]
coredns["coredns-67476ddb48-jrcg2\n1/1 Running\nkube-system"]
svclb["svclb-traefik-*\n3/3 Running\nkube-system"]
end
%% Connections showing failure paths
csi_socket --x-- traefik :x
csi_socket --x-- cms :x
csi_socket --x-- webapp :x
csi_socket --x-- erp :x
csi_socket --x-- grafana :x
csi_socket --x-- vault :x
docker1 --x-- coredns :x
docker1 --x-- svclb :x
%% Healthy connections
volumes -->|provides storage| traefik
volumes -->|provides storage| cms
volumes -->|provides storage| webapp
volumes -->|provides storage| erp
volumes -->|provides storage| grafana
volumes -->|provides storage| vault
classDef node fill:#0ea5e9,color:#000,stroke:#06b6d4
classDef k3s fill:#84cc16,color:#000,stroke:#65a30d
classDef longhorn fill:#a855f7,color:#fff,stroke:#8b5cf6
classDef csi fill:#f59e0b,color:#000,stroke:#d97706
classDef data fill:#10b981,color:#000,stroke:#059669
classDef app fill:#ec4899,color:#fff,stroke:#db2777
classDef network fill:#6366f1,color:#fff,stroke:#4f46e5
classDef error fill:#ef4444,color:#fff,stroke:#dc2626
classDef waiting fill:#fbbf24,color:#000,stroke:#f59e0b
class pi1,pi2,pi3 node
class kubelet1,kubelet2,kubelet3,k3s_server,helm_controller k3s
class manifest,webhook longhorn
class lh_manager1,lh_manager2,lh_manager3,engine1,engine2,engine3,volumes,replicas,share_mgr1,share_mgr2 data
class deployer,wait_container,csi_registrar,csi_socket longhorn
class attacher1,attacher2,attacher3,provisioner1,provisioner2,provisioner3,resizer1,resizer2,resizer3,snapshotter1,snapshotter2,snapshotter3 csi
class plugin1,plugin2,plugin3 csi
class traefik,cms,webapp,erp,grafana,vault app
class coredns,svclb network
class docker1,docker2,docker3,data
class deployer,wait_container error
class attacher3,provisioner1,resizer2,resizer3,snapshotter2 error
class plugin1,plugin2,plugin3 error
class ui1,ui2,share_mgr1,share_mgr2 error
class traefik,cms,webapp,erp,grafana,vault error
class nfs waiting
class lh_manager2,lh_manager3 waiting
classDef clusterBox stroke:#334155,stroke-width:2px,color:#94a3b8
class Cluster clusterBox
class LonghornStorage clusterBox
class DockerStorage clusterBox
class ApplicationLayer clusterBox
class NetworkServices clusterBox

View File

@@ -1,200 +0,0 @@
%%{init: { 'theme': 'forest', 'themeVariables': {
'primaryColor': '#7c3aed',
'primaryTextColor': '#ffffff',
'lineColor': '#6d28d9',
'secondaryColor': '#8b5cf6',
'tertiaryColor': '#a78bfa',
'edgeLabelBackground':'#5b21b6',
'edgeLabelColor': '#ffffff'
}}}%%
mindmap
root((Longhorn Storage System))
%% ===== CONTROL PLANE COMPONENTS =====
ControlPlane[Control Plane]
Manager[longhorn-manager]
Role1["Role: Primary controller for Longhorn"]
Responsibilities1["• Manages volumes, replicas, snapshots\n• Handles volume lifecycle\n• Coordinates with etcd\n• Exposes API (port 9500)"]
Health1["Health Check: :9501/v1/healthz"]
Webhook1["Webhook: :9502/metrics"]
DriverDeployer[longhorn-driver-deployer]
Role2["Role: CSI driver deployment controller"]
Responsibilities2["• Deploys CSI driver to each node\n• Runs via init container (wait-longhorn-manager)\n• Creates csi.sock on each node"]
WaitCmd["Command: longhorn-manager wait -d <namespace>"]
Blocking["⚠️ BLOCKED: Init container waiting for managers"]
%% ===== CSI COMPONENTS =====
CSILayer[CSI Interface]
CSISocket[("/var/lib/kubelet/plugins/driver.longhorn.io/csi.sock")]
SocketRole["Role: Unix domain socket for CSI communication"]
Attacher[csi-attacher]
AttacherRole["Role: Attaches volumes to nodes"]
AttacherResp["• Monitors VolumeAttachment objects\n• Calls CSI ControllerPublishVolume\n• Handles detach operations"]
AttacherStatus["Status: 2/3 Running, 1 Error"]
Provisioner[csi-provisioner]
ProvisionerRole["Role: Creates volumes from PVCs"]
ProvisionerResp["• Watches PVC objects\n• Calls CSI CreateVolume\n• Handles volume deletion"]
ProvisionerStatus["Status: 2/3 Running, 1 Error"]
Resizer[csi-resizer]
ResizerRole["Role: Handles volume resizing"]
ResizerResp["• Watches PVC size changes\n• Calls CSI ExpandVolume"]
ResizerStatus["Status: 1/3 Running, 2 Error"]
Snapshotter[csi-snapshotter]
SnapshotterRole["Role: Manages volume snapshots"]
SnapshotterResp["• Watches VolumeSnapshot objects\n• Calls CSI CreateSnapshot\n• Handles snapshot deletion"]
SnapshotterStatus["Status: 2/3 Running, 1 Error"]
NodeRegistrar[csi-node-driver-registrar]
RegistrarRole["Role: Registers driver with kubelet"]
RegistrarResp["• Creates CSINode resource\n• Registers via kubelet plugin registry API"]
Plugin[csi-plugin]
PluginRole["Role: Node-level CSI operations"]
PluginResp["• Runs on each node (DaemonSet)\n• Handles NodePublish/UnpublishVolume\n• Manages mount/unmount operations"]
PluginStatus["⚠️ BLOCKED: All 3 pods in Error (no CSI socket)"]
%% ===== DATA LAYER COMPONENTS =====
DataLayer[Data Layer]
Engine[engine-image]
EngineRole["Role: Engine and instance manager"]
EngineResp["• Pulls and manages engine binaries\n• Runs as sidecar in DaemonSet\n• Maintains engine processes"]
EngineStatus["Status: ✅ 3/3 Running"]
Volumes[Longhorn Volumes]
VolumeRole["Role: Logical volume representation"]
VolumeResp["• Managed via Longhorn CRDs\n• Replicated across nodes\n• Supports RWO, RWX access modes"]
VolumeStatus["Status: ✅ All 12 volumes attached & healthy"]
Replicas[Volume Replicas]
ReplicaRole["Role: Physical data storage"]
ReplicaResp["• 3-way replication across nodes\n• Stored at /mnt/arcodange/longhorn/replicas/\n• Data intact after power cut"]
ReplicaPath["Path: pi1, pi2, pi3: /mnt/arcodange/longhorn/replicas/"]
Backups[Backup System]
NFS[RWX NFS Share]
NFSRole["Role: NFS export for backup volume"]
NFSCreate["Created via: playbooks/setup/backup_nfs.yml"]
NFSStatus["⚠️ OFFLINE: share-manager pods in Error"]
BackupPVC[Backup PVC]
BackupPVCRole["Role: Persistent storage for backups"]
BackupPVCDetails["Name: backups-rwx\nNamespace: longhorn-system\nSize: 50Gi\nClass: longhorn"]
ShareManager[share-manager]
ShareRole["Role: Manages NFS exports for Longhorn volumes"]
ShareStatus["⚠️ BLOCKED: 2 pods in Error"]
%% ===== UI & TOOLS =====
UI[Web UI]
UIRole["Role: Longhorn management dashboard"]
UIAccess["Access: Port 9500 on manager pods"]
UIStatus["⚠️ BLOCKED: 2 pods in CrashLoopBackOff"]
%% ===== INFRASTRUCTURE =====
Infrastructure[Underlying Infrastructure]
Nodes[Raspberry Pi Nodes]
pi1["pi1: 192.168.1.201\nRole: Control Plane"]
pi2["pi2: 192.168.1.202\nRole: Worker"]
pi3["pi3: 192.168.1.203\nRole: Worker"]
K3s[Kubernetes (k3s v1.34.3+k3s1)]
Kubelet["kubelet (3 instances)"]
APIServer["API Server (on pi1)"]
etcd["etcd (on pi1)"]
HelmCtrl["HelmChart Controller"]
Docker[Docker Engine]
DockerRole["Role: Container runtime"]
DockerStorage["Storage: /mnt/arcodange/docker/"]
Overlay2["⚠️ ISSUE: overlay2 filesystem corrupted"]
%% ===== EXTERNAL DEPENDENCIES =====
Dependencies[External Dependencies]
CSIRegistration[CSI Driver Registration]
CSIRole["Role: k8s CSI registration"]
CSIDriver["Driver: driver.longhorn.io"]
CSIDriverStatus["⚠️ LOST: Not registered with kubelet"]
%% ===== CONNECTIONS =====
root --> ControlPlane
root --> CSILayer
root --> DataLayer
root --> UI
root --> Infrastructure
root --> Dependencies
ControlPlane --> Manager
ControlPlane --> DriverDeployer
CSILayer --> CSISocket
CSILayer --> Attacher
CSILayer --> Provisioner
CSILayer --> Resizer
CSILayer --> Snapshotter
CSILayer --> NodeRegistrar
CSILayer --> Plugin
CSISocket --> Attacher
CSISocket --> Provisioner
CSISocket --> Resizer
CSISocket --> Snapshotter
CSISocket --> Plugin
CSISocket --> NodeRegistrar
DriverDeployer --> NodeRegistrar
NodeRegistrar --> CSISocket
DataLayer --> Engine
DataLayer --> Volumes
DataLayer --> Replicas
DataLayer --> Backups
Backups --> NFS
Backups --> BackupPVC
Backups --> ShareManager
Infrastructure --> Nodes
Infrastructure --> K3s
Infrastructure --> Docker
Dependencies --> CSIRegistration
CSIRegistration --> CSISocket
%% ===== YET TO BE RESTORED =====
Dependencies --x EmptyCSI["⚠️ CSI Socket Missing"] :x
EmptyCSI --x Attacher :x
EmptyCSI --x Provisioner :x
EmptyCSI --x Resizer :x
EmptyCSI --x Snapshotter :x
EmptyCSI --x Plugin :x
%% ===== STYLES =====
classDef component fill:#8b5cf6,color:#fff,stroke:#7c3aed,stroke-width:2px
classDef role fill:#a78bfa,color:#000,stroke:#8b5cf6
classDef responsibility fill:#c4b5fd,color:#000,stroke:#8b5cf6
classDef status_good fill:#10b981,color:#fff,stroke:#059669
classDef status_bad fill:#ef4444,color:#fff,stroke:#dc2626
classDef status_warn fill:#f59e0b,color:#000,stroke:#d97706
classDef infinite fill:#3b82f6,color:#fff,stroke:#2563eb
class root infinite
class ControlPlane,CSILayer,DataLayer,UI,Infrastructure,Dependencies component
class Manager,Attacher,Provisioner,Resizer,Snapshotter,NodeRegistrar,Plugin,Engine,Volumes,Replicas,NFS,BackupPVC,ShareManager,UIRole,Nodes,K3s,Docker,CSIRegistration component
class Role1,Role2,AttacherRole,ProvisionerRole,ResizerRole,SnapshotterRole,RegistrarRole,PluginRole,EngineRole,VolumeRole,ReplicaRole,NFSRole,ShareRole,UIRole,Kubelet,APIServer,etcd,HelmCtrl,DockerRole,CSIRole,CSIDriver component
class Responsibilities1,Responsibilities2,AttacherResp,ProvisionerResp,ResizerResp,SnapshotterResp,RegistrarResp,PluginResp,EngineResp,VolumeResp,ReplicaResp,NFSRole,BackupPVCDetails,ShareRole,UIAccess,ShareStatus,NFSStatus role
class EngineStatus,VolumeStatus,ReplicaPath status_good
class Blocking,PluginStatus,UIStatus,ShareStatus,NFSCreate,ShareStatus,CSIDriverStatus status_bad
class AttacherStatus,ProvisionerStatus,ResizerStatus,SnapshotterStatus status_warn
classDef mindmapTitle fill:#4c1d95,color:#fff,stroke:#5b21b6,font-size:20px,font-weight:bold
class root mindmapTitle

View File

@@ -1,131 +0,0 @@
%%{init: { 'theme': 'forest', 'themeVariables': {
'primaryColor': '#059669',
'primaryTextColor': '#fff',
'lineColor': '#065f46',
'secondaryColor': '#10b981',
'edgeLabelBackground':'#064e3b',
'edgeLabelColor': '#ffffff'
}}}%%
flowchart TD
%% ===== POWER CUT EVENT =====
Start([Power Cut Event]) -->|Electricity Lost| Crash[Kubernetes Components Crash]
%% ===== IMMEDIATE IMPACT =====
Crash --> KubeletCrash[Kubelet Processes Crash<br>on all 3 nodes]
Crash --> DockerCrash[Docker Daemons Crash<br>on all 3 nodes]
Crash --> K3sCrash[K3s Server Process Crash<br>on pi1]
%% ===== DOCKER STORAGE CORRUPTION =====
DockerCrash --> Overlay2[ /mnt/arcodange/docker/overlay2/<br>Filesystem Corrupted]
Overlay2 --> DockerFail[Docker containers cannot start<br>missing layer files]
DockerFail --> CoreDNSPod[CoreDNS Pod<br>CrashLoopBackOff]
DockerFail --> TraefikLB[svclb-traefik Pods<br>CrashLoopBackOff]
%% ===== LONGHORN IMPACT =====
KubeletCrash --> CSIUnreg[CSI Driver Registration Lost<br>driver.longhorn.io unregistered]
K3sCrash --> HelmCtrl[HelmChart Controller<br>Unresponsive]
CSIUnreg --> CSISocket[ /var/lib/kubelet/plugins/.../csi.sock<br>Disappears]
%% ===== LONGHORN MANAGER LOSS =====
KubeletCrash --> LHManagers[Longhorn Manager Pods<br>Crash 3 pods ]
LHManagers --> NoQuorum[No Manager Quorum<br>Cannot coordinate]
NoQuorum --> VolumesFrozen[Existing Volumes<br>Still healthy but inaccessible]
CSISocket --> CSIChicago[CSI Pods Cannot Start<br>csi-attacher, provisioner, resizer, snapshotter]
CSISocket --> CSIPlugin[CSI Plugin DaemonSet<br>Cannot register driver]
%% ===== VOLUME MOUNT FAILURES =====
CSIChicago --> NoMounts[PVC Mounts Fail<br>All Longhorn PVs inaccessible]
CSIPlugin --> NoMounts
%% ===== APPLICATION CASCADING FAILURES =====
NoMounts --> TraefikDown[Traefik Pod<br>PVC mount failed<br>Error state]
NoMounts --> AppPods1[Application Pods<br>PVC mount failed<br>Error state<br>cms, webapp, erp, clickhouse, etc.]
%% ===== BACKUP SYSTEM IMPACT =====
NoQuorum --> NFSDown[NFS Share-Manager Pods<br>Error state]
NFSDown --> BackupMount[ /mnt/backups/ NFS Mount<br>Unavailable]
%% ===== DISCOVERY & RECOVERY =====
Discovery[15:23:57<br>Incident Discovered] --> Assessment[15:24:05<br>Assessment Complete]
Assessment --> Identify[15:24:10<br>Root Cause: CSI Driver Unregistered]
Identify --> CheckData[15:24:15<br>Verify Volume Health]
CheckData --> DataIntact[All 12 volumes:<br>state=attached<br>robustness=healthy]
%% ===== RECOVERY ATTEMPTS =====
Identify --> Attempt1[15:24:50<br>Attempt 1: Touch HelmChart Manifest]
Attempt1 --> Partial1[Only 1 manager pod affected]
Partial1 --> NeedMore[Insufficient recovery]
NeedMore --> Attempt2[15:32:15<br>Attempt 2: Delete All Longhorn Pods]
Attempt2 --> HelmReconcile[HelmChart Controller<br>Recreates All 24 Pods]
HelmReconcile --> Progress[15+ Pods Running<br>Managers, Engine-Image, Some CSI]
Progress --> Blocked[Driver-Deployer<br>Stuck in Init:0/1]
Blocked --> Investigate[15:34:30<br>Investigate wait-longhorn-manager]
Investigate --> WaitLoop[Init container runs:<br>longhorn-manager wait -d longhorn-system]
WaitLoop --> WaitingManagers[Waiting for all managers<br>to pass readiness probes]
%% ===== CURRENT STATE (15:35:30) =====
WaitingManagers --> CurrentState
subgraph CurrentState["Current State<br>15:35:30 UTC"]
direction TB
Resolved[Resolved ✅] --> ManagersOk[Manager Pods:<br>2/2, 1/2, 2/2 Running<br>pi1, pi2, pi3]
Resolved --> EngineOk[Engine Image:<br>3/3 Running]
Resolved --> CSIPartial[CSI Sidecars:<br>~50% Running]
Resolved --> VolumeData[Volume Data:<br>All intact]
BlockedNow[Blocked ❌] --> DriverDeployer[Driver Deployer:<br>Init:0/1 8+ min<br>waiting for managers]
BlockedNow --> CSIPluginAll[CSI Plugin:<br>0/3 Error all ]
BlockedNow --> UI[Longhorn UI:<br>0/2 CrashLoop]
BlockedNow --> ShareMgr[Share Manager:<br>0/2 Error]
BlockedNow --> NFSPod[RWX NFS:<br>ContainerCreating]
BlockedNow --> AppImpact[Application Impact:<br>~30 pods still failed<br>down from 43]
end
%% ===== RECOVERY PATH =====
CurrentState --> NextStep[Next: Resolve driver-deployer<br>wait-longhorn-manager blockage]
NextStep --> CheckHealth[Check manager health endpoints<br>https://<ip>:9501/v1/healthz]
CheckHealth -->|If healthy| WaitContainerIssue[Wait container bug/timeout]
CheckHealth -->|If unhealthy| FixManagers[Investigate manager readiness]
WaitContainerIssue --> Option1[Option 1: Delete driver-deployer pod]
WaitContainerIssue --> Option2[Option 2: Touch manifest again]
FixManagers --> CheckLogs[Check manager container logs]
CheckLogs --> ResolveManagers[Fix manager readiness]
Option1 --> CSIDriver[CSI Driver deployed]
Option2 --> CSIDriver
ResolveManagers --> CSIDriver
CSIDriver --> CSISocketRestored[CSI Socket Restored]
CSISocketRestored --> PodsRecover[All Longhorn pods recover]
PodsRecover --> PVCMounts[PVC Mounts resume]
PVCMounts --> AppRecovery[Application pods auto-recover]
AppRecovery --> ResolvedState[Resolved ✅]
%% ===== STYLES =====
classDef event fill:#10b981,color:#fff,stroke:#059669
classDef impact fill:#d97706,color:#000,stroke:#b45309
classDef action fill:#3b82f6,color:#fff,stroke:#2563eb
classDef resolved fill:#10b981,color:#fff,stroke:#059669
classDef blocked fill:#ef4444,color:#fff,stroke:#dc2626
classDef current fill:#8b5cf6,color:#fff,stroke:#7c3aed
class Start,Crash,KubeletCrash,DockerCrash,K3sCrash event
class Overlay2,DockerFail,CSIUnreg,CSISocket,NoQuorum,NoMounts impact
class Discovery,Assessment,Identify,CheckData,Attempt1,Attempt2,Investigate action
class ManagersOk,EngineOk,CSIPartial,VolumeData resolved
class DriverDeployer,CSIPluginAll,UI,ShareMgr,NFSPod,AppImpact blocked
class WaitLoop,CurrentState,NextStep,CheckHealth,Option1,Option2,ResolvedState current
classDef subtitle fill:#64748b,color:#fff,stroke:#475569,font-size:12px
class CurrentState,CurrentStateLabel subtitle

View File

@@ -1,416 +0,0 @@
---
title: PVC Recovery — Post-Reinstall Volume Restoration
incident_id: 2026-04-13-001
date: 2026-04-14
status: Mostly Resolved
operator: Claude Code
---
# PVC Recovery — Post-Reinstall Volume Restoration
## Situation as of 2026-04-14
Longhorn has been fully reinstalled and is healthy. The cluster nodes are all Ready. However,
**all application volumes are inaccessible** because the nuclear cleanup deleted the Longhorn
Volume/Engine/Replica CRDs, and the reinstalled Longhorn has no knowledge of the old volumes.
### Longhorn Health (verified)
```
NAME READY STATUS AGE
csi-attacher (3 pods) 1/1 Running 30m
csi-provisioner (3 pods) 1/1 Running 30m
csi-resizer (3 pods) 1/1 Running 30m
csi-snapshotter (3 pods) 1/1 Running 30m
engine-image-ei-b4bcf0a5 (3 pods) 1/1 Running 31m
instance-manager (3 pods) 1/1 Running 30m
longhorn-csi-plugin (3 pods) 3/3 Running 30m
longhorn-driver-deployer 1/1 Running 31m
longhorn-manager (3 pods) 2/2 Running 14m
longhorn-ui (2 pods) 1/1 Running 31m
CSIDriver driver.longhorn.io: Registered (AGE: 110d — restored)
```
Longhorn only knows about 3 volumes (crowdsec-config, crowdsec-db, traefik) — all newly provisioned
after reinstall. The other 9 volumes are missing from Longhorn's knowledge.
---
## Backup Files Available
| File | Location | Contents | Gap |
|------|----------|----------|-----|
| `backup_20260413.volumes` | `/home/pi/arcodange/backups/k3s_pvc/` | PV + PVC YAML (kubectl get -A pv,pvc) | No Longhorn CRDs |
| `longhorn_metadata_20260413.yaml` | `/home/pi/arcodange/backups/k3s_pvc/` | Engines + Replicas CRDs | **No Volume CRDs** |
**Critical gap:** The metadata backup was collected with `kubectl get -n longhorn-system volumes.longhorn.io,replicas.longhorn.io,engines.longhorn.io -o yaml` but the resulting file contains only Engines and Replicas in 3 separate Lists. The Volume CRDs are absent.
Attempting `kubectl apply -f longhorn_metadata_20260413.yaml` fails with:
```
Error from server (Invalid): admission webhook "validator.longhorn.io" denied the request:
volume does not exist for engine
```
The webhook requires Volume CRDs to exist before Engines can be created. Without Volume CRDs in the
backup, the metadata file cannot be applied as-is.
---
## Data Survival Assessment
### Pi1 — Replica directories
Pi1 is the control plane. Its old replica directories were **deleted** during the nuclear cleanup.
Only 3 new directories exist (created after reinstall):
```
pvc-01b93e30-...-b1530c1d (crowdsec-config — NEW)
pvc-4785dc60-...-2f031b60 (crowdsec-db — NEW)
pvc-5391fa2b-...-0e2ff956 (traefik — NEW)
```
### Pi2 — Replica directories (OLD data preserved)
```
pvc-01b93e30-...-8649439a (crowdsec-config — new post-reinstall)
pvc-1251909b-...-e7a20fdf ← OLD DATA (clickhouse 16Gi)
pvc-14ccc47e-...-09021065 ← OLD DATA (crowdsec-db old PV)
pvc-4785dc60-...-4b48fdf1 (crowdsec-db — new post-reinstall)
pvc-5391fa2b-...-d3503612 (traefik — new post-reinstall)
pvc-63244de1-...-6076eb08 (unknown — not in engine backup)
pvc-6d2ea1c7-...-c7f287d8 ← OLD DATA (audit-vault 10Gi)
pvc-7971918e-...-2028617e ← OLD DATA (erp 50Gi)
pvc-88e18c7f-...-910583f6 ← OLD DATA (prometheus-server 8Gi)
pvc-abc7666c-...-34bec9b0 (unknown — not in engine backup)
pvc-aed7f2c4-...-41c20064 ← OLD DATA (alertmanager 2Gi)
pvc-ca5567d3-...-b537ca60 ← OLD DATA (data-vault 10Gi)
pvc-cc8a3cbb-...-cd16e459 ← OLD DATA (old traefik 128Mi)
pvc-cdd434d1-...-b2695689 ← OLD DATA (url-shortener 128Mi)
pvc-d1d5482b-...-e0a8cdbc ← OLD DATA (redis 1Gi)
pvc-efda1d2f-...-30c849a6 ← OLD DATA (backups-rwx 50Gi)
pvc-f9fe3504-...-20f64e9e ← OLD DATA (old crowdsec-config 100Mi)
pvc-fca13978-...-4749b404 (unknown — not in engine backup)
```
### Pi3 — Replica directories (OLD data preserved, multiple dirs per volume)
```
pvc-01b93e30-...-29592f50 (crowdsec-config — new post-reinstall)
pvc-1251909b-...-1163420b ← OLD DATA (clickhouse — replica 1)
pvc-1251909b-...-3a569b0a ← OLD DATA (clickhouse — replica 2)
pvc-1251909b-...-ccd05947 ← OLD DATA (clickhouse — replica 3 or stale)
pvc-14ccc47e-...-3856d64d ← OLD DATA (old crowdsec-db)
pvc-2e60385f-...-48e27d5a (unknown)
pvc-4785dc60-...-869f0e99 (crowdsec-db — new post-reinstall)
pvc-5391fa2b-...-958cd868 (traefik — new post-reinstall)
pvc-6d2ea1c7-...-0e73550d ← OLD DATA (audit-vault — dir 1)
pvc-6d2ea1c7-...-787ffefa ← OLD DATA (audit-vault — dir 2)
pvc-6d2ea1c7-...-e0f58d64 ← OLD DATA (audit-vault — dir 3 or stale)
pvc-7971918e-...-33191046 ← OLD DATA (erp — dir 1)
pvc-7971918e-...-88fc1dfc ← OLD DATA (erp — dir 2)
pvc-7971918e-...-b5c5530d ← OLD DATA (erp — dir 3 or stale)
pvc-88e18c7f-...-5d508830 ← OLD DATA (prometheus-server — dir 1)
pvc-88e18c7f-...-92c0ebfd ← OLD DATA (prometheus-server — dir 2)
pvc-88e18c7f-...-deea6182 ← OLD DATA (prometheus-server — dir 3 or stale)
pvc-abe09e90-...-a748d11b (unknown)
pvc-aed7f2c4-...-3452358f ← OLD DATA (alertmanager — dir 1)
pvc-aed7f2c4-...-826f05aa ← OLD DATA (alertmanager — dir 2)
pvc-ca5567d3-...-0ed6f691 ← OLD DATA (data-vault — dir 1)
pvc-ca5567d3-...-808d72b4 ← OLD DATA (data-vault — dir 2)
pvc-ca5567d3-...-9051ef48 ← OLD DATA (data-vault — dir 3 or stale)
pvc-cc8a3cbb-...-011b54b3 ← OLD DATA (old traefik — dir 1)
pvc-cc8a3cbb-...-a24fd91e ← OLD DATA (old traefik — dir 2)
pvc-cdd434d1-...-70197659 ← OLD DATA (url-shortener — dir 1)
pvc-cdd434d1-...-998f49ff ← OLD DATA (url-shortener — dir 2)
pvc-d1d5482b-...-6a730f00 ← OLD DATA (redis — dir 1)
pvc-d1d5482b-...-75da16fd ← OLD DATA (redis — dir 2)
pvc-efda1d2f-...-62fb04c9 ← OLD DATA (backups-rwx — dir 1)
pvc-efda1d2f-...-688f30f5 ← OLD DATA (backups-rwx — dir 2)
pvc-efda1d2f-...-69454dd0 ← OLD DATA (backups-rwx — dir 3 or stale)
pvc-f9fe3504-...-418df608 ← OLD DATA (old crowdsec-config)
```
**Note on multiple directories per volume on pi3:** Normal replicas = 1 dir per volume per node.
Multiple directories indicate either: rebuild attempts from before the nuclear cleanup, or stale
snapshots. Must verify by checking `.img` file sizes before renaming.
---
## Volume → PVC Mapping (from backup_20260413.volumes)
| PV Name | PVC | Namespace | Size | Status |
|---------|-----|-----------|------|--------|
| `pvc-1251909b-3cef-40c6-881c-3bb6e929a596` | `clickhouse-storage-clickhouse-0` | tools | 16Gi | Terminating |
| `pvc-6d2ea1c7-9327-4992-a02c-93ae604eda70` | `audit-hashicorp-vault-0` | tools | 10Gi | Terminating |
| `pvc-7971918e-e47f-4739-a976-965ea2d770b4` | `erp` | erp | 50Gi | Terminating |
| `pvc-88e18c7f-2cfd-45e3-be5b-78c31ab829e9` | `prometheus-server` | tools | 8Gi | Terminating |
| `pvc-aed7f2c4-1948-487a-8d10-d8a1372289b4` | `storage-prometheus-alertmanager-0` | tools | 2Gi | Terminating |
| `pvc-ca5567d3-a682-4cee-8ff1-2b8e23260635` | `data-hashicorp-vault-0` | tools | 10Gi | Terminating |
| `pvc-cc8a3cbb-dbc2-47a2-a0cc-a02136122b90` | `traefik` | kube-system | 128Mi | Terminating |
| `pvc-cdd434d1-88b4-4588-8fd2-8c7eafc56d07` | `url-shortener` | url-shortener | 128Mi | Terminating |
| `pvc-d1d5482b-81c8-4d7c-a528-7a57ef47a5ce` | `redis-storage-redis-0` | tools | 1Gi | Terminating |
| `pvc-efda1d2f-1db8-46dd-9a97-3d11f1807ffa` | `backups-rwx` | longhorn-system | 50Gi | Lost |
| `pvc-14ccc47e-0b8c-49d4-97bb-70e550f644b0` | `crowdsec-db-pvc` | tools | 1Gi | already replaced |
| `pvc-f9fe3504-70ce-4401-8cda-bc6bb68bc1bf` | `crowdsec-config-pvc` | tools | 100Mi | already replaced |
CrowdSec volumes (`pvc-14ccc47e`, `pvc-f9fe3504`) are the old PVs — CrowdSec already got new volumes
(`pvc-4785dc60`, `pvc-01b93e30`) and is running. These old dirs can be cleaned up later.
---
## Recovery Plan
### Why not restore PVCs
New PVCs will be created by the workloads themselves when they restart. Restoring old PVCs would
conflict with both the stuck Terminating ones and any new ones pods may already be creating.
**Restore PVs only** — strip `claimRef` so they become `Available`, and new PVCs bind to them via
`storageClassName` + `accessMode` + `capacity` matching.
### Step 1 — Clear stuck Terminating PVs
The old PVs are stuck in `Terminating` with `kubernetes.io/pvc-protection` finalizers. Remove them:
```bash
for pv in \
pvc-1251909b-3cef-40c6-881c-3bb6e929a596 \
pvc-6d2ea1c7-9327-4992-a02c-93ae604eda70 \
pvc-7971918e-e47f-4739-a976-965ea2d770b4 \
pvc-88e18c7f-2cfd-45e3-be5b-78c31ab829e9 \
pvc-aed7f2c4-1948-487a-8d10-d8a1372289b4 \
pvc-ca5567d3-a682-4cee-8ff1-2b8e23260635 \
pvc-cc8a3cbb-dbc2-47a2-a0cc-a02136122b90 \
pvc-cdd434d1-88b4-4588-8fd2-8c7eafc56d07 \
pvc-d1d5482b-81c8-4d7c-a528-7a57ef47a5ce \
pvc-efda1d2f-1db8-46dd-9a97-3d11f1807ffa; do
kubectl patch pv $pv -p '{"metadata":{"finalizers":null}}' --type=merge
done
```
### Step 2 — Restore PVs with claimRef removed and Retain policy
Extract PVs from the backup, strip `claimRef` and set `persistentVolumeReclaimPolicy: Retain`,
then apply:
```bash
ssh pi1 "sudo kubectl get pv \
pvc-1251909b-3cef-40c6-881c-3bb6e929a596 \
pvc-6d2ea1c7-9327-4992-a02c-93ae604eda70 \
pvc-7971918e-e47f-4739-a976-965ea2d770b4 \
pvc-88e18c7f-2cfd-45e3-be5b-78c31ab829e9 \
pvc-aed7f2c4-1948-487a-8d10-d8a1372289b4 \
pvc-ca5567d3-a682-4cee-8ff1-2b8e23260635 \
pvc-cc8a3cbb-dbc2-47a2-a0cc-a02136122b90 \
pvc-cdd434d1-88b4-4588-8fd2-8c7eafc56d07 \
pvc-d1d5482b-81c8-4d7c-a528-7a57ef47a5ce \
pvc-efda1d2f-1db8-46dd-9a97-3d11f1807ffa \
-o yaml 2>/dev/null | \
python3 -c \"
import sys, yaml
docs = list(yaml.safe_load_all(sys.stdin))
for doc in docs:
if not doc: continue
items = doc.get('items', [doc])
for pv in items:
if pv.get('kind') != 'PersistentVolume': continue
spec = pv.get('spec', {})
spec.pop('claimRef', None)
spec['persistentVolumeReclaimPolicy'] = 'Retain'
pv.pop('status', None)
meta = pv.get('metadata', {})
meta.pop('resourceVersion', None)
meta.pop('uid', None)
meta.pop('creationTimestamp', None)
print('---')
print(yaml.dump(pv))
\" | kubectl apply -f -"
```
Expected result: PVs become `Available` (no claimRef = unbound).
### Step 3 — Longhorn creates new Volume CRDs + replica dirs
When new PVCs bind to the restored PVs and pods attempt to mount them, Longhorn's CSI provisioner
will create new Volume CRDs for each. These new Volume CRDs will have new engine IDs, and Longhorn
will create **new empty replica directories** on pi1, pi2, pi3.
At this point the volume directory layout will be:
```
/mnt/arcodange/longhorn/replicas/
pvc-1251909b-...-<OLD_SUFFIX> ← pi2/pi3: OLD data
pvc-1251909b-...-<NEW_SUFFIX> ← pi1/pi2/pi3: NEW empty dirs
```
### Step 4 — Map old dirs to new dirs, verify data presence
For each volume, on each node, identify:
- OLD dir: exists before new binding (larger .img file size, older timestamp)
- NEW dir: created after binding (empty or minimal .img file)
```bash
# Example: check sizes on pi2 for clickhouse
ssh pi2 "du -sh /mnt/arcodange/longhorn/replicas/pvc-1251909b-*"
```
### Step 5 — Swap directories (Method B)
For each volume on each node that has an old dir with data:
```bash
# Scale down the workload first
kubectl scale statefulset clickhouse -n tools --replicas=0
# Wait for volume to detach
kubectl wait --for=jsonpath='{.status.state}'=detached \
volume/pvc-1251909b-3cef-40c6-881c-3bb6e929a596 \
-n longhorn-system --timeout=60s
# On pi2: rename new empty dir, move old data dir to new name
ssh pi2 "
NEW=$(ls /mnt/arcodange/longhorn/replicas/ | grep pvc-1251909b | \
xargs -I{} stat --format='%Y {}' /mnt/arcodange/longhorn/replicas/{} | \
sort -rn | head -1 | awk '{print \$2}')
OLD=$(ls /mnt/arcodange/longhorn/replicas/ | grep pvc-1251909b | \
xargs -I{} stat --format='%Y {}' /mnt/arcodange/longhorn/replicas/{} | \
sort -n | head -1 | awk '{print \$2}')
echo \"OLD: \$OLD\"
echo \"NEW: \$NEW\"
sudo mv \$NEW \${NEW}.empty_backup
sudo mv \$OLD \$NEW
"
# Repeat on pi3
# Restart the instance manager on affected node to pick up new dir
kubectl delete pod -n longhorn-system -l \
longhorn.io/node=pi2,longhorn.io/component=instance-manager
```
### Step 6 — Scale workloads back up and verify
```bash
kubectl scale statefulset clickhouse -n tools --replicas=1
kubectl get pvc -n tools clickhouse-storage-clickhouse-0
kubectl get volumes -n longhorn-system pvc-1251909b-3cef-40c6-881c-3bb6e929a596
```
---
## Priority Order for Recovery
Given data criticality:
1. **HashiCorp Vault data** (`pvc-ca5567d3` + `pvc-6d2ea1c7`) — credentials/secrets store
2. **ERP** (`pvc-7971918e`) — 50Gi, business data
3. **Prometheus** (`pvc-88e18c7f`) — 8Gi, metrics history (degraded OK, can rebuild)
4. **Redis** (`pvc-d1d5482b`) — 1Gi, cache (can rebuild from scratch if needed)
5. **Alertmanager** (`pvc-aed7f2c4`) — 2Gi, alert history (can rebuild)
6. **Clickhouse** (`pvc-1251909b`) — 16Gi
7. **URL shortener** (`pvc-cdd434d1`) — 128Mi
8. **Traefik** (`pvc-cc8a3cbb`) — 128Mi (TLS certs, can re-issue via cert-manager)
9. **Longhorn backups-rwx** (`pvc-efda1d2f`) — 50Gi, backup volume itself
---
## Caution: Multiple Dirs on Pi3
Several volumes have 3 directories on pi3. This likely happened during the incident when Longhorn
attempted rebuilds before the nuclear cleanup. **Do not blindly take the newest or oldest** — check
actual `.img` file size to identify the one with data:
```bash
ssh pi3 "du -sh /mnt/arcodange/longhorn/replicas/pvc-1251909b-*"
# The largest .img is the one with actual data
```
---
## Lessons for Backup Script
The current backup command `kubectl get -A pv,pvc -o yaml && echo '---' && kubectl get -A pvc -o yaml`
captures PV/PVC but not Longhorn Volume CRDs. The backup command must be updated to include:
```bash
kubectl get -A pv -o yaml && echo '---' \
&& kubectl get -A pvc -o yaml && echo '---' \
&& kubectl get -n longhorn-system volumes.longhorn.io -o yaml
```
This is tracked in ADR `docs/adr/20260414-longhorn-pvc-recovery.md` under "Prevention".
---
## Volume Recovery Status
| PV Name | PVC | Namespace | Size | Method | Status |
|---------|-----|-----------|------|--------|--------|
| `pvc-5391fa2b` | `traefik` | kube-system | 128Mi | PV claimRef remove | ✅ 2026-04-14 |
| `pvc-cdd434d1` | `url-shortener-data` | url-shortener | 128Mi | Method B (dir rename) | ✅ 2026-04-14 |
| `pvc-1251909b` | `clickhouse-storage-clickhouse-0` | tools | 16Gi | Block-device (playbook) | ✅ 2026-04-14 |
| `pvc-88e18c7f` | `prometheus-server` | tools | 8Gi | Block-device (playbook) | ⏳ 2026-04-15 |
| `pvc-aed7f2c4` | `storage-prometheus-alertmanager-0` | tools | 2Gi | Block-device (playbook) | ⏳ 2026-04-15 |
| `pvc-d1d5482b` | `redis-storage-redis-0` | tools | 1Gi | Block-device (playbook) | ⏳ 2026-04-15 |
| `pvc-efda1d2f` | `backups-rwx` | longhorn-system | 50Gi | Block-device (playbook) | ⏳ 2026-04-15 |
| `pvc-ca5567d3` | `data-hashicorp-vault-0` | tools | 10Gi | Manual (deferred) | 🔴 Pending |
| `pvc-6d2ea1c7` | `audit-hashicorp-vault-0` | tools | 10Gi | Manual (deferred) | 🔴 Pending |
| `pvc-7971918e` | `erp` | erp | 50Gi | Manual (deferred) | 🔴 Pending |
**Vault and ERP are excluded from automated recovery** — they require coordinated manual procedures
(Vault unseal key management; ERP business data verification). Use `docs/runbooks/longhorn-block-device-recovery.md`
with extra validation steps for those volumes.
---
## Automated Recovery: Block-Device Injection
Directory rename (Method B) proved too risky for large volumes: Longhorn detects `Dirty: true` +
inconsistency across replicas and silently rebuilds from the empty pi1 replica, destroying data.
**The approach that works** (implemented in `playbooks/recover/longhorn_data.yml`):
1. **Phase 0** — Auto-discover best replica dir per volume (skip `Rebuilding: true`, rank by actual disk usage)
2. **Phase 1** — Backup untouched replica dir before touching anything
3. **Phase 2** — Merge sparse snapshot + head layers into a flat image (`merge-longhorn-layers.py`)
4. **Phase 3** — Create Longhorn Volume CRD, wait for replicas
5. **Phase 4** — Scale down workload
6. **Phase 5** — Attach volume via VolumeAttachment maintenance ticket
7. **Phase 6**`mkfs.ext4` the live block device, rsync data from merged image
8. **Phase 7** — Remove maintenance attachment ticket
9. **Phase 8** — Recreate PV (Retain, no claimRef) + PVC (pinned to PV)
10. **Phase 9** — Scale up, wait for readyReplicas ≥ 1, optional verify_cmd
**Pitfall discovered (2026-04-15):** `du -sb` returns apparent size for sparse files, making a
`Rebuilding: true` replica (1.3 GiB actual, 24 GiB apparent) beat healthy 11 GiB replicas.
Fixed by checking `Rebuilding` flag in `volume.meta` and using `du -sk` (actual usage).
**Usage:**
```bash
ansible-playbook -i inventory/hosts.yml playbooks/recover/longhorn_data.yml \
-e @playbooks/recover/longhorn_data_vars_remaining.yml
```
Vars files:
- `playbooks/recover/longhorn_data_vars_clickhouse.yml` — clickhouse (already recovered, archived)
- `playbooks/recover/longhorn_data_vars_remaining.yml` — prometheus, alertmanager, redis, backups-rwx
- `playbooks/recover/longhorn_data_vars.example.yml` — template for future use
---
## Tested Recovery Procedure (url-shortener — 2026-04-14)
Method B confirmed working for this volume (small, no Rebuilding replicas). Full sequence:
1. Create Longhorn Volume CRD manually (size 128Mi, rwo, 3 replicas)
2. Create Longhorn VolumeAttachment ticket to pi1 (disableFrontend: true) → triggers replica dir creation
3. Remove attachment ticket → volume detaches
4. On pi2: `mv new-dir new-dir.empty && mv old-dir new-dir`
5. On pi3: same (chose `-70197659` over `-998f49ff` based on newer mtime: Apr 7 vs Apr 6)
6. Clear finalizers on stuck Terminating PV/PVC → both deleted
7. Recreate PV (Retain policy, no claimRef, same CSI volumeHandle)
8. Recreate PVC with `volumeName:` pinned to the PV
9. Delete old Error pod (was blocking volume attach)
10. New pod comes up 1/1 Running, volume attached healthy on pi3, all 3 replicas running
**Traefik** was simpler — PV `pvc-5391fa2b` already existed in Longhorn (Released). Just removed
claimRef (→ Available), created `kube-system/traefik` PVC with `volumeName:` pinned. Bound immediately.
**For all subsequent volumes** — use `playbooks/recover/longhorn_data.yml`. Method B is too risky.

View File

@@ -1,70 +0,0 @@
---
# Automated Longhorn Recovery Playbook (DRAFT)
# Purpose: Break circular dependency and restore CSI driver after power-cut
#
# REQUIREMENTS:
# - Ansible >= 2.15
# - kubectl on control plane (pi1)
# - Backup scripts from playbooks/backup/k3s_pvc.yml must be deployed
#
# USAGE:
# ansible-playbook -i inventory/hosts.yml docs/incidents/2026-04-13-power-cut/recover_longhorn.yml
#
# REFERENCE FILES:
# - playbooks/system/k3s_config.yml (Longhorn HelmChart template)
# - playbooks/backup/k3s_pvc.yml (Backup/restore scripts)
# - inventory/hosts.yml (Target hosts)
# - /mnt/arcodange/longhorn/replicas/ (Data - MUST NOT be touched)
# - /home/pi/arcodange/backups/k3s_pvc/ (Fallback backup location)
#
#
# PLAYBOOK FLOW:
#
# Phase 1: DIAGNOSIS (idempotent, safe to run anytime)
# - Check CSI driver registration status
# - Check Longhorn manager health
# - Identify which recovery phase is needed
#
# Phase 2: SOFT RECOVERY (least destructive)
# - Touch longhorn-install.yaml manifest
# - Wait 60s for k3s HelmChart controller to reconcile
# - Verify pod recreation
#
# Phase 3: HARD RECOVERY (if soft fails)
# - Delete driver-deployer pod
# - Delete all longhorn-driver-deployer pods
# - Wait for HelmChart to recreate
#
# Phase 4: NUCLEAR RECOVERY (if hard fails)
# - Delete HelmChart resource
# - Remove manifest file
# - Force-delete longhorn-system namespace (after removing finalizers)
# - Reinstall Longhorn via manifest
#
# Phase 5: RESTORE FROM BACKUP (idempotent)
# - Apply PV/PVC from backup
# - Apply Longhorn CRs from backup
# - Data auto-discovered from disk
#
# DESIGNED TO HANDLE:
# - CSI driver registration lost
# - Longhorn manager webhook circular dependency
# - Partial pod crashes
# - Full Longhorn namespace corruption
#
# LIMITATIONS:
# - Requires pi1 (control plane) to be reachable
# - Data in /mnt/arcodange/longhorn/ MUST survive
# - Docker must be functional on at least 1 node
# - Does NOT handle Docker overlay2 corruption
#
# TESTED SCENARIOS:
# - [ ] CSI driver not registered (primary use case)
# - [ ] Longhorn manager CrashLoopBackOff
# - [ ] Full namespace deletion needed
# - [ ] Backup restore validation
#
# TODO:
# - Add Docker storage health check
# - Add pre-recovery data verification
# - Add post-recovery validation

View File

@@ -1,153 +0,0 @@
---
title: Recovery Approach Analysis — Post-Incident Review
incident_id: 2026-04-13-001
date: 2026-04-13
author: Claude Code (external review)
---
# Recovery Approach Analysis
## TL;DR
The incident escalated from a **~5 minute fix** to a **full Longhorn reinstall with backup restore** because the simplest remediation (k3s restart) was never attempted, and a single aggressive command (`kubectl delete pods --all --force`) created a new problem that did not previously exist.
---
## What Was Skipped
### 1. Restart k3s on all nodes (never attempted)
This should have been the **first or second action** after the manifest touch failed.
```bash
systemctl restart k3s # pi1 — control plane
systemctl restart k3s-agent # pi2, pi3 — agent nodes
```
After a power cut, k3s/kubelet state is dirty. Restarting k3s:
- Forces kubelet to reinitialize the plugin registry cleanly
- Allows Longhorn pods to restart in correct dependency order
- Avoids the simultaneous-restart race condition that causes webhook issues
- Takes ~2 minutes with no destructive side effects
This was listed as a last resort in the runbook consulted at incident start. It should have been tried **before any pod deletion**, not after.
### 2. Stale CSI socket check on each node (never attempted)
```bash
# On each node (pi1, pi2, pi3):
ls /var/lib/kubelet/plugins/driver.longhorn.io/
# If a stale .sock file exists:
rm /var/lib/kubelet/plugins/driver.longhorn.io/csi.sock
```
The incident log confirms the CSI socket was missing/stale, but no one went to the nodes to verify and clean this up. Removing a stale socket + restarting the `longhorn-csi-plugin` daemonset is a targeted, low-risk fix.
---
## Where the Direction Went Wrong
### The pivotal mistake: force deleting all 24 pods simultaneously
**Command run at 15:32:15:**
```bash
kubectl delete pods -n longhorn-system --all --force --grace-period=0
```
This command created the **webhook circular dependency problem**, which did not exist before it was run.
**Why it caused the circular dependency:**
In normal operation, Longhorn managers start sequentially. One becomes the webhook leader and begins serving on port 9501 before others register as service endpoints.
When all 24 pods are force-deleted simultaneously:
1. All 3 manager pods race-start at the same time
2. All 3 IPs are registered as `longhorn-conversion-webhook` service endpoints immediately
3. The health check (`https://<pod-ip>:9501/v1/healthz`) is run against all 3
4. Only the elected leader actually serves port 9501 — the other 2 fail the probe
5. Failing managers crash: `"conversion webhook service is not accessible after 1m0s"`
6. `longhorn-driver-deployer` init container waits for healthy managers indefinitely
7. CSI socket is never created, CSI driver never registers
**The original problem was only a lost CSI socket registration.** The webhook circular dependency is a new problem introduced by the recovery attempt.
---
## The Escalation Cascade
Each step created a harder problem than the one it was meant to solve:
```
Power cut
→ CSI socket lost (original problem — simple fix)
→ Force delete all pods
→ Webhook circular dependency (new problem)
→ Delete HelmChart + manifest
→ 84 finalizers blocking namespace deletion (new problem)
→ Full reinstall required
→ Backup restore required
→ Risk to volume metadata
```
The original problem required touching 1 socket file and restarting k3s. The current state requires:
- Manually patching finalizers off 84+ resources
- Full Longhorn reinstall
- Restoring PV/PVC and Longhorn CRs from backup
- Verifying data auto-discovery from replicas
---
## Correct Recovery Sequence (Hindsight)
### Step 1 — k3s restart (should have been tried at ~15:27)
```bash
ansible -i inventory/hosts.yml all -m shell -a "sudo systemctl restart k3s || sudo systemctl restart k3s-agent"
```
Wait 3 minutes. In most power-cut scenarios, this alone restores CSI registration.
### Step 2 — If still broken: targeted daemonset restart (not force-delete-all)
```bash
kubectl rollout restart daemonset/longhorn-manager -n longhorn-system
kubectl rollout status daemonset/longhorn-manager -n longhorn-system
```
Graceful restart respects the dependency order. Wait for managers to stabilize before touching CSI pods.
### Step 3 — Check and clean stale sockets on each node
```bash
# Run on pi1, pi2, pi3:
ls /var/lib/kubelet/plugins/driver.longhorn.io/
rm -f /var/lib/kubelet/plugins/driver.longhorn.io/csi.sock
kubectl rollout restart daemonset/longhorn-csi-plugin -n longhorn-system
```
### Step 4 — Verify CSI driver registered
```bash
kubectl get csidriver
kubectl get csinodes
```
### Step 5 — Only if all above failed: delete driver-deployer pod only
```bash
kubectl delete pod -n longhorn-system -l app=longhorn-driver-deployer
```
Not all pods. One targeted pod.
---
## What Was Done Well
- Quick identification of the original root cause (CSI registration)
- Confirming volume data integrity early (`robustness="healthy"`)
- Securing backups before destructive operations (16:30)
- Fixing the backup script bug (useful regardless of incident)
- Detailed logging throughout
---
## Action Items for Future Incidents
- [ ] Add k3s restart as **step 2** in the Longhorn recovery runbook (before any pod deletion)
- [ ] Add CSI socket cleanup to the runbook as an explicit step on each node
- [ ] Add a "minimum destructive action" principle: prefer `rollout restart` over `delete --force --all`
- [ ] Implement `recover_longhorn.yml` playbook with the phased approach (soft → targeted → hard) to prevent ad-hoc escalation
- [ ] Add a pre-action checklist: "have I tried restarting the service before deleting its resources?"

View File

@@ -1,107 +0,0 @@
#!/usr/bin/env python3
"""
Merge Longhorn snapshot + head layers into a single mountable raw image.
Longhorn stores replica data as sparse raw images in a chain:
volume-snap-<id>.img — full state at the time the snapshot was taken
volume-head-NNN.img — delta (only changed blocks) since the snapshot
To reconstruct the full filesystem, head blocks take priority over snapshot
blocks. Sparse (all-zero) blocks in the head fall through to the snapshot.
Usage:
sudo python3 merge-longhorn-layers.py <replica-dir> <output.img>
Example:
sudo python3 merge-longhorn-layers.py \\
/mnt/arcodange/longhorn/replicas/pvc-cdd434d1-...-998f49ff \\
/tmp/merged.img
# Then mount and inspect:
sudo mount -o loop /tmp/merged.img /mnt/recovery
ls /mnt/recovery/
Proven useful during incident 2026-04-13 to recover the url-shortener SQLite
database from a Longhorn replica that was never touched by the nuclear cleanup
(pi3, dir suffix -998f49ff, Apr 6 snapshot).
Key lesson: always identify the untouched replica dir (oldest timestamps,
never renamed) before attempting directory swaps. Back it up first.
"""
import os
import sys
import json
BLOCK = 4096
def find_layers(replica_dir: str) -> tuple[str | None, str | None]:
"""
Read volume.meta to find head filename and snapshot parent.
Returns (snapshot_path, head_path). snapshot_path is None for base volumes.
"""
meta_path = os.path.join(replica_dir, "volume.meta")
with open(meta_path) as f:
meta = json.load(f)
head_name = meta["Head"]
parent_name = meta.get("Parent", "")
head_path = os.path.join(replica_dir, head_name)
snap_path = os.path.join(replica_dir, parent_name) if parent_name else None
return snap_path, head_path
def merge(snap_path: str | None, head_path: str, out_path: str) -> None:
size = os.path.getsize(head_path)
print(f"Volume size: {size // (1024 * 1024)} MiB")
print(f"Snapshot: {snap_path or '(none — base volume)'}")
print(f"Head: {head_path}")
print(f"Output: {out_path}")
snap_f = open(snap_path, "rb") if snap_path else None
head_f = open(head_path, "rb")
with open(out_path, "wb") as out:
out.truncate(size)
blocks = size // BLOCK
for i, offset in enumerate(range(0, size, BLOCK)):
head_f.seek(offset)
hb = head_f.read(BLOCK)
if hb and any(hb):
out.seek(offset)
out.write(hb)
elif snap_f:
snap_f.seek(offset)
sb = snap_f.read(BLOCK)
if sb and any(sb):
out.seek(offset)
out.write(sb)
if i % 4096 == 0:
pct = (i / blocks) * 100
print(f"\r {pct:.0f}%", end="", flush=True)
print("\r 100% — done.")
if snap_f:
snap_f.close()
head_f.close()
if __name__ == "__main__":
if len(sys.argv) != 3:
print(__doc__)
sys.exit(1)
replica_dir = sys.argv[1]
out_path = sys.argv[2]
if not os.path.isdir(replica_dir):
print(f"Error: {replica_dir} is not a directory", file=sys.stderr)
sys.exit(1)
snap, head = find_layers(replica_dir)
merge(snap, head, out_path)

View File

@@ -1,312 +0,0 @@
# Incident Documentation
This directory contains incident reports, postmortems, and recovery logs for the Arcodange Factory infrastructure.
## Purpose
Document all infrastructure incidents to:
- Track root causes and resolutions
- Maintain a knowledge base for future troubleshooting
- Improve system reliability through lessons learned
- Provide clear guidance for on-call responders
## Structure
Each incident is documented in its own directory under `docs/incidents/` with the following naming convention:
```
docs/incidents/
├── YYYY-MM-DD-incident-name/
│ ├── README.md # Incident summary and timeline
│ ├── status.md # Real-time status updates (optional)
│ ├── log.md # Detailed recovery actions and logs
│ ├── root-cause.md # Technical analysis (optional)
│ └── diagrams/ # Architecture/flow diagrams (optional)
│ └── *.mmd # Mermaid diagrams
└── ...
```
## Incident Directory Contents
### 1. `README.md` (Required)
The primary incident document. Must include:
- **Incident ID**: Unique identifier (e.g., `2026-04-13-001`)
- **Title**: Clear, descriptive title
- **Date/Time**: Start and end timestamps
- **Status**: Open / Investigating / Resolved / Monitoring
- **Severity**: SEV-1 (Critical) / SEV-2 (High) / SEV-3 (Medium) / SEV-4 (Low)
- **Impact**: Brief description of affected services
- **Summary**: What happened
- **Timeline**: Key events with timestamps
- **Root Cause**: Technical analysis
- **Resolution**: Steps taken to resolve
- **Action Items**: Follow-up tasks
- **Lessons Learned**: Key takeaways
**Front matter template:**
```markdown
---
title: Incident Title
incident_id: YYYY-MM-DD-NNN
date: YYYY-MM-DD
time_start: HH:MM:SS UTC
time_end: HH:MM:SS UTC
status: Resolved
severity: SEV-2
tags:
- kubernetes
- longhorn
- storage
---
```
### 2. `log.md` (Recommended)
Detailed technical log of all recovery actions. Must include:
- Commands executed with timestamps
- Command output (relevant portions)
- Decision rationale for each action
- Outcome of each action
- Next stepsidentified
Format:
```markdown
## [Time] Action Description
**Command:** `actual command run`
**Output:**
```
relevant output
```
**Decision:** Why this action was taken
**Outcome:** What happened
**Next:** What to do next
```
### 3. Mermaid Diagrams
Include at least one Mermaid diagram in each incident to visualize:
- Architecture/flow before incident
- Failure propagation
- Recovery process
- New architecture after fixes
**Example theme usage:**
```mermaid
%%{init: { 'theme': 'forest', 'themeVariables': { 'primaryColor': '#ffdfd3', 'edgeLabelBackground':'#fff' }}}%%
```
Available themes: `default`, `base`, `forest`, `dark`, `neutral`
**Recommended diagrams:**
- `incident-flow.mmd`: Timeline/flow of the incident
- `architecture.mmd`: Affected components architecture
- `recovery-flow.mmd`: Recovery steps visualization
- `dependency-tree.mmd`: Component dependencies showing failure path
## Incident Severity Definitions
| Severity | Description | Response Time | Impact |
|----------|-------------|---------------|--------|
| SEV-1 | Critical system-wide outage | Immediate (24/7) | Multiple services down, potential data loss |
| SEV-2 | Major service degradation | < 1 hour | Single critical service down |
| SEV-3 | Partial service degradation | < 4 hours | Non-critical service affected |
| SEV-4 | Minor issue | Next business day | Cosmetic or non-impacting |
## Available Ansible Playbooks for Recovery
This collection provides comprehensive infrastructure management via Ansible.
Always use `-i inventory/hosts.yml` when running playbooks.
### Master Playbooks (Run in order for full recovery)
| Playbook | Purpose | Targets |
|----------|---------|---------|
| `playbooks/01_system.yml` | System setup (hostnames, iSCSI, Docker, Longhorn, DNS) | raspberries |
| `playbooks/02_setup.yml` | Infrastructure setup (NFS backup, PostgreSQL, Gitea) | localhost, postgres, gitea |
| `playbooks/03_cicd.yml` | CI/CD pipeline (Gitea tokens, Docker Compose, ArgoCD) | localhost, gitea |
| `playbooks/04_tools.yml` | Tool deployment (Hashicorp Vault, Crowdsec) | tools group |
| `playbooks/05_backup.yml` | Backup configuration | localhost |
### Component-Specific Playbooks
#### System
| Playbook | Purpose | Notes |
|----------|---------|-------|
| `playbooks/system/rpi.yml` | Raspberry Pi hostname setup | |
| `playbooks/system/dns.yml` | DNS/pi-hole configuration | |
| `playbooks/system/ssl.yml` | SSL certificate setup with step-ca | |
| `playbooks/system/prepare_disks.yml` | Disk partitioning and formatting | |
| `playbooks/system/system_docker.yml` | Docker installation with custom storage | Storage at `/mnt/arcodange/docker` |
| `playbooks/system/k3s_config.yml` | K3s configuration (Traefik, Longhorn HelmCharts) | **Key for k3s** |
| `playbooks/system/system_k3s.yml` | K3s cluster deployment | Uses k3s-ansible collection |
| `playbooks/system/iscsi_longhorn.yml` | iSCSI client for Longhorn | Prerequisite for Longhorn |
| `playbooks/system/k3s_dns.yml` | K3s DNS configuration | |
| `playbooks/system/k3s_ssl.yml` | K3s SSL/traefik certificates | |
#### Storage
| Playbook | Purpose | Notes |
|----------|---------|-------|
| `playbooks/setup/backup_nfs.yml` | Longhorn RWX NFS backup volume | Creates 50Gi PVC + recurring backups |
| `playbooks/backup/k3s_pvc.yml` | PVC backup scripts | Creates `/opt/k3s_volumes/backup.sh` and `restore.sh` |
#### Backup
| Playbook | Purpose | Notes |
|----------|---------|-------|
| `playbooks/backup/backup.yml` | Main backup orchestration | Calls postgres, gitea, k3s_pvc |
| `playbooks/backup/postgres.yml` | PostgreSQL database backup | Docker exec pg_dumpall |
| `playbooks/backup/gitea.yml` | Gitea backup | Uses gitea dump command |
| `playbooks/backup/cron_report.yml` | Mail utility for cron reports | |
| `playbooks/backup/cron_report_mailutility.yml` | MTA configuration | |
### Inventory File
**File:** `inventory/hosts.yml`
**Groups:**
- `raspberries`: pi1, pi2, pi3 (Raspberry Pi nodes)
- `local`: localhost, pi1, pi2, pi3
- `postgres`: pi2 (PostgreSQL host)
- `gitea`: pi2 (Gitea host, inherits postgres)
- `pihole`: pi1, pi3 (DNS hosts)
- `step_ca`: pi1, pi2, pi3 (Certificate authority)
- `all`: All above groups
**Important:** All playbooks MUST be run with `-i inventory/hosts.yml` flag:
```bash
ansible-playbook -i inventory/hosts.yml playbooks/01_system.yml
```
### Handy Commands for Incident Response
```bash
# Check all pods
kubectl get pods -A
# Check Longhorn specifically
kubectl get pods -n longhorn-system
kubectl get volumes -n longhorn-system
kubectl get replicas -n longhorn-system
# Check storage
kubectl get pv -A
kubectl get pvc -A
kubectl get csidriver
# Check nodes
kubectl get nodes -o wide
kubectl describe node <nodename>
# Force Longhorn HelmChart reconcile (k3s-specific)
sudo touch /var/lib/rancher/k3s/server/manifests/longhorn-install.yaml
# Restart Longhorn
kubectl delete pods -n longhorn-system --all --force --grace-period=0
# Check Longhorn data on disk
ls /mnt/arcodange/longhorn/replicas/
# Check Docker storage
ls /mnt/arcodange/docker/overlay2/ | head
# Run ansible playbook (dry-run first)
ansible-playbook -i inventory/hosts.yml playbooks/01_system.yml --check --diff
ansible-playbook -i inventory/hosts.yml playbooks/01_system.yml --limit pi1
```
### K3s-Specific Recovery Notes
Longhorn is installed via **HelmChart manifest** (k3s native):
- File: `/var/lib/rancher/k3s/server/manifests/longhorn-install.yaml`
- To trigger reconcile: `touch` the file (k3s watches for changes)
- DO NOT use `helm install` directly - it may conflict with k3s HelmChart controller
Traefik is also installed via HelmChart manifest:
- File: `/var/lib/rancher/k3s/server/manifests/traefik-v3.yaml`
## Incident Templates
### Quick Start Template
```markdown
---
title: [Short Description]
incident_id: YYYY-MM-DD-NNN
date: $(date +%Y-%m-%d)
time_start: $(date +%H:%M:%S)
status: Investigating
severity: SEV-2
tags:
- tag1
- tag2
---
## Summary
[1-2 sentences describing the issue]
## Impact
[What services/users are affected]
## Timeline
| Time | Event | Owner |
|------|-------|-------|
| HH:MM | Initial detection | | @user
| HH:MM | Investigation started | | @user
| HH:MM | Root cause identified | | @user
| HH:MM | Resolution applied | | @user
| HH:MM | Service restored | | @user
## Root Cause
[Technical analysis]
## Resolution
[Step-by-step what was done]
## Mermaid Diagram
%%{init: { 'theme': 'forest' }}%%
graph TD
A[Component A] -->|depends on| B[Component B]
B -->|failed due to| C[Component C]
C -->|power cut| D[Root Cause]
```
*remember to always to this for labels:*
- have a space before a filepath
- no parenthesis '()'
- use <br> instead of \n for new lines
## Action Items
- [ ] Task 1
- [ ] Task 2
## Lessons Learned
- Lesson 1
- Lesson 2
```
## Contributing to Incident Documentation
1. **During Incident**: Focus on resolution, log commands and outputs in `log.md`
2. **After Resolution**: Create/read the `README.md` with full incident details
3. **Add Diagrams**: Include at least one Mermaid diagram to visualize the issue
4. **Peer Review**: Have another team member review before closing
5. **Update Templates**: Improve templates based on what was missing
## Directory Index
| Incident | Date | Severity | Status |
|----------|------|----------|--------|
| [2026-04-13-power-cut](./2026-04-13-power-cut/README.md) | 2026-04-13 | SEV-1 | In Progress |

View File

@@ -1,244 +0,0 @@
# Cluster Recovery Agent Instructions
You are recovering the Arcodange homelab k3s cluster after an outage (power cut, node failure, or
Longhorn reinstall). Your job is to assess damage, run the appropriate Ansible playbooks and
kubectl commands, and bring the cluster back to a fully healthy state.
You do NOT need to modify any code. All recovery tooling already exists.
---
## Cluster Overview
| Component | Details |
|-----------|---------|
| Nodes | pi1, pi2, pi3 (Raspberry Pi, SSH via `pi<N>.home`) |
| k8s distribution | k3s |
| Storage | Longhorn (`/mnt/arcodange/longhorn/`) |
| GitOps | ArgoCD (apps auto-sync from `gitea.arcodange.lab/arcodange-org/`) |
| Secrets | HashiCorp Vault (`tools` namespace, manual unseal) |
| Ingress | Traefik + CrowdSec bouncer |
| Working dir | `/Users/gabrielradureau/Work/Arcodange/factory/ansible/arcodange/factory/` |
| Inventory | `inventory/hosts.yml` |
**Critical dependency:** ERP (Dolibarr) uses Vault-rotated DB credentials written to its PVC.
**Always recover and unseal Vault before scaling ERP up.**
---
## Step 0 — Assess Damage
Run these first to understand what is broken:
```bash
# Overall pod health
kubectl get pods -A | grep -v Running | grep -v Completed
# PVC health (anything not Bound is a problem)
kubectl get pvc -A | grep -v Bound
# Longhorn volume states
kubectl get volumes.longhorn.io -n longhorn-system
# Longhorn manager health (prerequisite for all recovery)
kubectl get pods -n longhorn-system -l app=longhorn-manager
```
---
## Step 1 — Longhorn Volume Recovery
### Path A — Fast path (backup file exists, Volume CRDs were backed up)
Check if a recent backup exists on pi1:
```bash
ssh pi1.home "ls -lt /mnt/backups/k3s_pvc/backup_*.volumes | head -5"
```
If a backup file exists and is recent (from before the incident):
```bash
ssh pi1.home "kubectl apply -f /mnt/backups/k3s_pvc/backup_<YYYYMMDD>.volumes"
```
Then verify PVCs bound and skip to Step 2.
### Path B — Block-device injection (no usable backup, raw replica files intact)
Use this when PVCs are `Lost`/`Terminating` and no Volume CRD backup is available.
**Check which volumes need recovery:**
```bash
# Volumes with no PVC or Lost/Terminating PVC
kubectl get pvc -A | grep -v Bound
```
**For each failed volume, create a vars file** following the pattern in:
`playbooks/recover/longhorn_data_vars.example.yml`
Existing vars files from the 2026-04-13 incident (reusable as references):
- `playbooks/recover/longhorn_data_vars_remaining.yml` — prometheus, alertmanager, redis, backups-rwx
- `playbooks/recover/longhorn_data_vars_erp_vault.yml` — erp, hashicorp-vault (audit + data)
- `playbooks/recover/longhorn_data_vars_clickhouse.yml` — clickhouse
**Key rules for the vars file:**
- `source_node`/`source_dir` can be omitted — Phase 0 auto-discovers the largest non-Rebuilding replica
- Set `workload_name: ""` for ERP — it must not scale up until Vault is unsealed
- For StatefulSets with multiple PVCs (e.g. Vault), set `workload_name: ""` on all but the last entry
**Run the recovery playbook:**
```bash
ansible-playbook -i inventory/hosts.yml playbooks/recover/longhorn_data.yml \
-e @playbooks/recover/longhorn_data_vars_<NAME>.yml
```
The playbook is **idempotent** — safe to re-run if it fails midway.
**Playbook phases (for context when troubleshooting):**
| Phase | What it does |
|-------|-------------|
| 0 | Auto-discovers best replica dir (skips `Rebuilding: true`) |
| 1 | Backs up untouched replica dir to `/home/pi/arcodange/backups/longhorn-recovery/` |
| 2 | Merges snapshot+head layers into a single `.img` via `merge-longhorn-layers.py` |
| 3 | **Scales down workloads first**, then clears stuck Terminating PVCs, creates Volume CRD |
| 4 | Scale down (second pass, idempotent) |
| 5 | Attaches volume via maintenance ticket to source node |
| 6 | `mkfs.ext4` (if unformatted) + `rsync` from merged image into live block device |
| 7 | Removes maintenance ticket (volume detaches) |
| 8 | Creates PV (Retain, no claimRef) + PVC pinned to PV |
| 9 | Scales up workloads, waits for readyReplicas ≥ 1 (failures here are `ignore_errors: yes`) |
**Common Phase 8 failure — StatefulSet re-creates PVCs before they can be pinned:**
The playbook handles this automatically (scales down before finalizer removal). If you still hit it:
```bash
kubectl scale statefulset <name> -n <namespace> --replicas=0
kubectl patch pvc <pvc-name> -n <namespace> --type=merge -p '{"metadata":{"finalizers":null}}'
kubectl delete pvc <pvc-name> -n <namespace>
# Then re-run the playbook
```
---
## Step 2 — Unseal HashiCorp Vault
After Vault's PVCs are recovered, the pod boots **sealed**. Check:
```bash
kubectl get pod hashicorp-vault-0 -n tools
kubectl exec hashicorp-vault-0 -n tools -- vault status 2>/dev/null | grep Sealed
```
If sealed, run the unseal playbook (requires interactive terminal for the Gitea password prompt):
```bash
ansible-playbook -i inventory/hosts.yml playbooks/tools/hashicorp_vault.yml
```
Unseal keys are at `~/.arcodange/cluster-keys.json` on the local machine. The playbook reads them automatically.
After the playbook completes, verify:
```bash
kubectl get pod hashicorp-vault-0 -n tools # must be 1/1 Ready
kubectl exec hashicorp-vault-0 -n tools -- vault status | grep Sealed # must be false
```
---
## Step 3 — Scale Up ERP
Only after Vault is unsealed and Ready:
```bash
kubectl scale deployment erp -n erp --replicas=1
kubectl rollout status deployment/erp -n erp
```
---
## Step 4 — Reconfigure Tools (CrowdSec, etc.)
Run if CrowdSec bouncer or Traefik middleware needs reconfiguring:
```bash
# Standard run (bouncer key + Traefik middleware + restart)
ansible-playbook -i inventory/hosts.yml playbooks/tools/crowdsec.yml
# Include captcha HTML injection (use when captcha page is broken)
ansible-playbook -i inventory/hosts.yml playbooks/tools/crowdsec.yml --tags never,all
```
If crowdsec-agent or crowdsec-appsec pods are stuck in `Error` after a long outage,
the playbook handles restarting them automatically.
---
## Step 5 — Re-enable ArgoCD selfHeal
Check if `selfHeal` was disabled during recovery (look for `selfHeal: false` in the tools app):
```bash
grep -A5 "tools:" /Users/gabrielradureau/Work/Arcodange/factory/argocd/values.yaml
```
If disabled, re-enable it by editing `argocd/values.yaml` and setting `selfHeal: true`,
then syncing the ArgoCD app:
```bash
kubectl get app tools -n argocd
```
---
## Step 6 — Final Verification
```bash
# All pods running
kubectl get pods -A | grep -v Running | grep -v Completed | grep -v "^NAME"
# All PVCs bound
kubectl get pvc -A | grep -v Bound
# All Longhorn volumes healthy
kubectl get volumes.longhorn.io -n longhorn-system
# Run a fresh backup to capture the recovered state
ansible-playbook -i inventory/hosts.yml playbooks/backup/backup.yml \
-e backup_root_dir=/mnt/backups
```
---
## Key Files Reference
| File | Purpose |
|------|---------|
| `playbooks/recover/longhorn_data.yml` | Main block-device recovery playbook |
| `playbooks/recover/longhorn.yml` | Recovery when Volume CRDs still exist |
| `playbooks/recover/longhorn_data_vars.example.yml` | Template for recovery vars |
| `playbooks/recover/longhorn_data_vars_erp_vault.yml` | Vars for erp + vault (2026-04-13 incident) |
| `playbooks/recover/longhorn_data_vars_remaining.yml` | Vars for other volumes (2026-04-13 incident) |
| `playbooks/backup/backup.yml` | Full backup (postgres + gitea + k3s PVCs + Longhorn CRDs) |
| `playbooks/backup/k3s_pvc.yml` | PV/PVC/Longhorn Volume CRD backup |
| `playbooks/tools/hashicorp_vault.yml` | Vault unseal + OIDC reconfiguration |
| `playbooks/tools/crowdsec.yml` | CrowdSec bouncer + Traefik middleware setup |
| `docs/adr/20260414-longhorn-pvc-recovery.md` | Full incident ADR with all recovery methods |
| `~/.arcodange/cluster-keys.json` | Vault unseal keys (local machine only) |
---
## Decision Tree
```
Cluster down after outage
├─ kubectl works? ──No──▶ Check k3s: `systemctl status k3s` on pi1/pi2/pi3
└─ Yes
├─ PVCs all Bound? ──Yes──▶ Skip to Step 2 (check Vault)
└─ No
├─ Recent .volumes backup on pi1? ──Yes──▶ Path A (kubectl apply backup)
└─ No
├─ Longhorn Volume CRDs exist? ──Yes──▶ playbooks/recover/longhorn.yml
└─ No ──▶ Path B (longhorn_data.yml block-device injection)
Check replica dirs exist first:
ssh pi{1,2,3}.home "sudo du -sh /mnt/arcodange/longhorn/replicas/pvc-*"
```

View File

@@ -1,360 +0,0 @@
# Runbook: Longhorn Block-Device Data Recovery
**When to use:** Longhorn has been fully reinstalled (nuclear cleanup). Volume CRDs are gone.
Application PVCs are stuck `Terminating` or `Lost`. The raw replica `.img` files still exist
on disk across the nodes. kubectl/k8s objects cannot help — we must work directly with the
Longhorn replica directories and block devices.
**Automated version:** `playbooks/recover/longhorn_data.yml`
---
## Mental Model
Longhorn stores each replica as a chain of sparse raw image files inside a directory named
`<pv-name>-<random-hex>` under `<longhorn_data_path>/replicas/`. Each directory contains:
```
volume.meta — engine state (Head filename, Parent snapshot, Dirty flag)
volume-head-NNN.img — active write log (sparse, only changed blocks)
volume-head-NNN.img.meta — head metadata
volume-snap-<uuid>.img — snapshot at a point in time (sparse, full state)
volume-snap-<uuid>.img.meta — snapshot metadata
revision.counter — monotonically increasing write counter
```
After a nuclear cleanup + reinstall, Longhorn creates **new empty replica directories** with
new random hex suffixes. The old directories (with data) are left on disk but orphaned.
**Why directory-swap fails:** the old `volume.meta` has a different engine generation and
`Dirty: true`. Longhorn detects the inconsistency across replicas and rebuilds from the
"cleanest" source (the new empty pi1 replica), overwriting the old data.
**What works:** extract the filesystem from the untouched replica directory directly, then
inject the data files into the live Longhorn block device while the volume is temporarily
attached in maintenance mode.
---
## Decision Tree
```
Are Volume CRDs present in Longhorn?
├── YES → normal PV/PVC restore is enough, use playbooks/recover/longhorn.yml
└── NO
└── Are replica directories present on disk?
├── NO → data is lost, provision fresh volumes
└── YES
└── Is there an untouched replica dir (timestamps from before the incident)?
├── NO → data likely unrecoverable (all dirs were zeroed during reconciliation)
└── YES → follow this runbook
```
---
## Step 0 — Pre-flight: Inventory Surviving Replica Directories
On each node, list replica dirs and their sizes. Dirs with actual data are large (>16K).
New empty dirs created by Longhorn are always exactly 16K.
```bash
for node in pi1 pi2 pi3; do
echo "=== $node ==="
ssh $node "sudo du -sh /mnt/arcodange/longhorn/replicas/pvc-<VOLUME>-* 2>/dev/null"
done
```
**Key rule:** identify the replica dir that was **never touched** by the reinstall — it has
old timestamps (from before the incident) and its size matches the original volume usage.
This is your recovery source. **Back it up before touching anything.**
```bash
# On the node that has the untouched dir:
sudo mkdir -p /home/pi/arcodange/backups/longhorn-recovery/<pvc-name>/
sudo cp -a /mnt/arcodange/longhorn/replicas/<pv-name>-<old-hex>/ \
/home/pi/arcodange/backups/longhorn-recovery/<pvc-name>/
```
---
## Step 1 — Reconstruct the Filesystem
The replica directory contains a snapshot chain. Each layer is a sparse raw image — unchanged
blocks appear as zeroed sparse regions, only written blocks contain data. To reconstruct the
full filesystem, layers must be merged: head takes priority, then snapshot.
Use `docs/incidents/2026-04-13-power-cut/tools/merge-longhorn-layers.py`:
```bash
# On the node holding the backup:
sudo python3 merge-longhorn-layers.py \
/home/pi/arcodange/backups/longhorn-recovery/<pvc-name>/<pv-name>-<old-hex>/ \
/tmp/<pvc-name>-merged.img
# Verify the filesystem mounts
sudo mkdir -p /mnt/recovery-<pvc-name>
sudo mount -o loop /tmp/<pvc-name>-merged.img /mnt/recovery-<pvc-name>
sudo ls -lah /mnt/recovery-<pvc-name>/
sudo umount /mnt/recovery-<pvc-name>
```
If mount fails with "wrong fs type" or "bad superblock":
- The snapshot `.img` is all-zero (was overwritten by a prior Longhorn reconciliation)
- Try the next oldest replica dir from another node
- Check with `sudo od -A x -t x1z -v snap.img | grep -v ' 00 00...' | head -5`
---
## Step 2 — Create the Longhorn Volume CRD
Longhorn needs to know about the volume before its block device can be used.
```bash
kubectl apply -f - <<EOF
apiVersion: longhorn.io/v1beta2
kind: Volume
metadata:
name: <pv-name>
namespace: longhorn-system
spec:
accessMode: rwo # or rwx
dataEngine: v1
frontend: blockdev
numberOfReplicas: 3
size: "<size-in-bytes>" # e.g. "134217728" for 128Mi
EOF
```
Wait for replicas to appear:
```bash
kubectl get replicas.longhorn.io -n longhorn-system | grep <pv-name>
# Expect 3 replicas in "stopped" state
```
---
## Step 3 — Attach the Volume in Maintenance Mode
Longhorn only creates the block device (`/dev/longhorn/<pv-name>`) when the volume is
attached to a node. Use a `VolumeAttachment` ticket to attach without a pod.
Choose `<target-node>` = the same node where the backup/merged image is stored (avoids
copying large files across the network).
```bash
kubectl apply -f - <<EOF
apiVersion: longhorn.io/v1beta2
kind: VolumeAttachment
metadata:
name: <pv-name>
namespace: longhorn-system
spec:
attachmentTickets:
recovery:
generation: 0
id: recovery
nodeID: <target-node>
parameters:
disableFrontend: "false"
type: longhorn-api
volume: <pv-name>
EOF
kubectl wait --for=jsonpath='{.status.state}'=attached \
volumes.longhorn.io/<pv-name> -n longhorn-system --timeout=120s
```
---
## Step 4 — Scale Down the Workload
Always stop the workload before touching the data to prevent concurrent writes and filesystem
corruption.
```bash
# For a Deployment:
kubectl scale deployment <name> -n <namespace> --replicas=0
# For a StatefulSet:
kubectl scale statefulset <name> -n <namespace> --replicas=0
```
---
## Step 5 — Inject Data Files via Block Device
```bash
ssh <target-node> bash <<'SHELL'
# Mount the live block device
sudo mkdir -p /mnt/recovery-live
sudo mount /dev/longhorn/<pv-name> /mnt/recovery-live
# Mount the reconstructed image (if not already mounted)
sudo mkdir -p /mnt/recovery-src
sudo mount -o loop /tmp/<pvc-name>-merged.img /mnt/recovery-src
# Sync: only the application data files, not lost+found
sudo rsync -av --exclude='lost+found' /mnt/recovery-src/ /mnt/recovery-live/
# Verify
sudo ls -lah /mnt/recovery-live/
# Unmount both
sudo umount /mnt/recovery-src
sudo umount /mnt/recovery-live
SHELL
```
---
## Step 6 — Detach the Volume
```bash
kubectl patch volumeattachments.longhorn.io <pv-name> \
-n longhorn-system --type json \
-p '[{"op":"remove","path":"/spec/attachmentTickets/recovery"}]'
kubectl wait --for=jsonpath='{.status.state}'=detached \
volumes.longhorn.io/<pv-name> -n longhorn-system --timeout=60s
```
---
## Step 7 — Restore PV and PVC
Clear stuck Terminating PV/PVC finalizers first if they exist:
```bash
kubectl patch pv <pv-name> --type=merge -p '{"metadata":{"finalizers":null}}' 2>/dev/null
kubectl patch pvc <pvc-name> -n <namespace> --type=merge \
-p '{"metadata":{"finalizers":null}}' 2>/dev/null
# Wait a moment for them to delete
```
Recreate the PV with `Retain` policy and no `claimRef`:
```bash
kubectl apply -f - <<EOF
apiVersion: v1
kind: PersistentVolume
metadata:
name: <pv-name>
annotations:
pv.kubernetes.io/provisioned-by: driver.longhorn.io
spec:
accessModes: [ReadWriteOnce] # match original
capacity:
storage: <size> # e.g. 128Mi
csi:
driver: driver.longhorn.io
fsType: ext4
volumeHandle: <pv-name>
volumeAttributes:
dataEngine: v1
dataLocality: disabled
disableRevisionCounter: "true"
numberOfReplicas: "3"
staleReplicaTimeout: "30"
persistentVolumeReclaimPolicy: Retain
storageClassName: longhorn
volumeMode: Filesystem
EOF
```
Recreate the PVC pinned to this PV:
```bash
kubectl apply -f - <<EOF
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: <pvc-name>
namespace: <namespace>
spec:
accessModes: [ReadWriteOnce]
resources:
requests:
storage: <size>
storageClassName: longhorn
volumeMode: Filesystem
volumeName: <pv-name>
EOF
```
---
## Step 8 — Scale Up and Verify
```bash
kubectl scale deployment <name> -n <namespace> --replicas=1
kubectl wait --for=condition=Ready pod -l app=<name> -n <namespace> --timeout=120s
```
---
## Pitfalls Learned During 2026-04-13 Recovery
| Pitfall | What happened | Prevention |
|---------|--------------|------------|
| **Directory swap corrupts data** | Longhorn found old `Dirty: true` volume.meta + empty pi1 replica → rebuilt from empty source | Never swap dirs. Use merge tool + block device injection instead |
| **Snapshot is zeroed after swap** | Longhorn reconciliation overwrote snapshot images when rebuilding from empty replica | Back up the untouched dir FIRST before any rename |
| **Multiple dirs per volume on pi3** | Rebuild attempts during the incident created extra dirs | Identify the untouched dir by timestamp AND verify non-zero content with `od` |
| **`Rebuilding: true` replica → all-zeros merged image** | Phase 0 picked a replica mid-rebuild (1.3 GiB actual data, sparse files look large) — merge tool produced an all-zeros image | Check `volume.meta` and skip any dir with `"Rebuilding": true` before merging |
| **`du -sb` gives misleading apparent sizes** | Sparse replica files (8 GiB file, 1.3 GiB actual) appeared larger than healthy 11 GiB replicas | Use `du -sk` (actual disk blocks) not `du -sb` (apparent/logical size) to rank replicas |
| **Dirty journal prevents ro mount** | `mount -o loop,ro` fails with "bad superblock" on an ext4 with unclean shutdown | Use `mount -o loop,ro,noload` to skip journal replay for read-only access |
| **New volume is unformatted** | `mount /dev/longhorn/<pv>` fails with "wrong fs type" on a freshly created volume | Run `mkfs.ext4 -F` before mounting; guard with `blkid` to skip if already formatted |
| **rsync rc=23 on power-cut partitions** | Some filesystem blocks were unreadable ("Structure needs cleaning") → rsync exits 23 | Use `rsync --ignore-errors`; rc=23 is a partial transfer, not a total failure |
| **pod blocks volume re-attach** | Old Error-state pod held a volume attachment claim | Delete old Error pods before scaling up new ones |
| **`kubectl cp` needs `tar`** | Distroless container had no `tar` binary | Mount block device directly on the node instead |
| **VolumeAttachment ticket removal** | Deleting a VolumeAttachment object causes Longhorn to immediately recreate it | Patch the `recovery` key out of `spec.attachmentTickets` instead of deleting the object |
| **Phase 7 wait for `detached` times out** | After removing the recovery ticket, a workload may immediately create its own ticket | Wait for the `recovery` ticket to disappear from `spec.attachmentTickets`, not for full detach |
| **StatefulSet pods not found by label** | `kubectl get pod -l app=<name>` returns nothing for StatefulSet pods | Wait on `readyReplicas ≥ 1` on the StatefulSet object, not on pod labels |
| **`set_fact` overridden by `-e @file`** | Ansible extra vars have highest precedence — `set_fact: longhorn_recovery_volumes` was silently ignored | Use a different variable name (`_volumes`) for the resolved list, never reassign the extra var name |
---
## Identifying the Right Replica Directory
When multiple old dirs exist for the same volume on a node, pick the one to use for recovery:
1. **Skip `Rebuilding: true`:** check `volume.meta` first — a dir that was being rebuilt when
the incident happened has incomplete data (sparse files are allocated but mostly zeroed):
```bash
python3 -c "import json; d=json.load(open('volume.meta')); print('Rebuilding:', d['Rebuilding'])"
```
Only consider dirs where `Rebuilding: false`.
2. **Actual size:** `sudo du -sk <dir>` (actual disk usage in KB — not `du -sb` which returns
apparent/logical size and is misleading for sparse files). Pick the largest actual size.
3. **Timestamps:** prefer the most recently modified before the incident date.
4. **Snapshot chain:** if Rebuilding is false on multiple dirs, check `volume.meta` for
`"Dirty": false` (clean shutdown) vs `"Dirty": true`. Prefer clean if available.
5. **Content check:** verify the snapshot is not all zeros:
```bash
sudo od -A x -t x1z -v volume-snap-*.img | grep -v ' 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00' | head -3
```
If the output is empty (all zeros), the snapshot was overwritten. Try another node.
**Summary rule:** `Rebuilding: false` → largest `du -sk` → non-zero snapshot content.
---
## Reference: Key Commands
```bash
# List all replica dirs for a volume across all nodes
for n in pi1 pi2 pi3; do echo "==$n=="; ssh $n "sudo ls /mnt/arcodange/longhorn/replicas/ | grep <pv-prefix>"; done
# Check Longhorn volume state
kubectl get volumes.longhorn.io -n longhorn-system <pv-name>
# Check VolumeAttachment tickets
kubectl get volumeattachments.longhorn.io -n longhorn-system <pv-name> \
-o jsonpath='{.spec.attachmentTickets}'
# Check Longhorn block device existence on a node
ssh <node> "ls /dev/longhorn/<pv-name>"
# Verify filesystem content without starting the app
ssh <node> "sudo mount /dev/longhorn/<pv-name> /mnt/check && sudo ls /mnt/check && sudo umount /mnt/check"
```

View File

@@ -1,11 +0,0 @@
---
# Gitea ownership configuration consumed by playbooks running on `localhost`
# (e.g. tools/hashicorp_vault.yml). Role-level defaults (gitea_username,
# gitea_organization) live in roles/gitea_secret/defaults/main.yml ; this file
# is for fact lists that the inventory should declare.
# Users (Gitea owner_type=user) to which org-level Gitea Action secrets must
# also be propagated. Repos owned by these users cannot read org-level secrets,
# so the secret propagation playbook iterates over this list.
gitea_secret_propagation_users:
- arcodange

View File

@@ -1,4 +1,4 @@
gitea_version: 1.25.5
gitea_version: 1.24.3
gitea_database:
db_name: gitea
@@ -34,11 +34,11 @@ gitea:
GITEA__mailer__SMTP_PORT: 465
GITEA__mailer__PASSWD: '{{ gitea_vault.GITEA__mailer__PASSWD }}'
GITEA__server__SSH_PORT: 2222
GITEA__server__SSH_DOMAIN: "{{ hostvars[groups.gitea[0]]['preferred_ip'] }}"
GITEA__server__SSH_DOMAIN: "{{ lookup('dig', groups.gitea[0]) }}"
GITEA__server__SSH_LISTEN_PORT: 22
GITEA_server__DOMAIN: localhost
GITEA_server__HTTP_PORT: 3000
GITEA_server__ROOT_URL: https://gitea.arcodange.lab/
GITEA_server__ROOT_URL: https://gitea.arcodange.duckdns.org/
GITEA_server__START_SSH_SERVER: true
GITEA_server__OFFLINE_MODE: true
GITEA_service__DISABLE_REGISTRATION: true

View File

@@ -1,9 +0,0 @@
step_ca_primary: pi1
step_ca_fqdn: ssl-ca.arcodange.lab
step_ca_user: step
step_ca_home: /home/step
step_ca_dir: /home/step/.step
step_ca_listen_address: ":8443"

View File

@@ -1,13 +0,0 @@
$ANSIBLE_VAULT;1.1;AES256
35633437343661363030323466313735373033373566643530653539633133623462333337393037
6336653635366439363031616637313339373465666433320a653936396438373132623264386665
66623330343439613636353963373139363531613761613864623262623661666565373137306461
3062646337353331300a636164643462343163303931646538653537323831623736393634343137
39376139306165356138383664373334353364316435303265643965386135356561316130316239
64393436363436393339393130383764353231333361313565333934313136666234356433626437
35656666386538653963653334393262366562656631376636353538383661386661366438366133
64346338666666323562313363363836613439633931306437393132616134666230613936623634
34383366663031336236316566626666303764323631363239636461396366323733393731376563
65356630326536333133393335383766616631323732333262396464326165366532383066363761
37303033316135616661623431623836313965373930376361656334323336656561643336616265
36666235623564383132

View File

@@ -2,15 +2,12 @@ raspberries:
hosts:
pi1:
ansible_host: pi1.home # setup http://192.168.1.1/ Réseau/DNS
preferred_ip: 192.168.1.201
ansible_ssh_extra_args: '-o StrictHostKeyChecking=no'
pi2:
ansible_host: pi2.home
preferred_ip: 192.168.1.202
ansible_ssh_extra_args: '-o StrictHostKeyChecking=no'
pi3:
ansible_host: pi3.home
preferred_ip: 192.168.1.203
ansible_ssh_extra_args: '-o StrictHostKeyChecking=no'
internetPi1:
@@ -30,7 +27,6 @@ local:
hosts:
localhost:
ansible_connection: local
ansible_python_interpreter: "{{ ansible_playbook_python }}"
pi1:
pi2:
pi3:
@@ -43,17 +39,6 @@ gitea:
children:
postgres:
pihole:
hosts:
pi1:
pi3:
step_ca:
hosts:
pi1:
pi2:
pi3:
all:
children:
raspberries:

View File

@@ -1,2 +1,383 @@
- name: system
ansible.builtin.import_playbook: ./system/system.yml
---
- name: Prepare disks for longhorn
ansible.builtin.import_playbook: ./prepare_disks.yml
- name: System Docker
hosts: raspberries:&local
gather_facts: yes
tags: never
become: yes
pre_tasks:
- name: set hostname
ansible.builtin.hostname:
name: "{{ inventory_hostname }}"
become: yes
when: inventory_hostname != ansible_hostname
- name: Prevent apt source conflict
ansible.builtin.file:
state: absent
path: /etc/apt/sources.list.d/docker.list
become: yes
- name: Install role geerlingguy.docker
community.general.ansible_galaxy_install:
type: role
name: geerlingguy.docker
run_once: true
delegate_to: localhost
become: false
- ansible.builtin.debug:
var: ansible_facts.machine
tasks:
- include_role:
name: geerlingguy.docker
post_tasks:
- name: adding existing user '{{ ansible_user }}' to group docker
user:
name: '{{ ansible_user }}'
groups: docker
append: yes
become: yes
#---
- name: Install iSCSI client for Longhorn on Raspberry Pi
hosts: raspberries:&local
become: yes
tasks:
- name: Install open-iscsi
ansible.builtin.apt:
name: open-iscsi
state: present
update_cache: yes
- name: Enable and start iSCSI service
ansible.builtin.service:
name: iscsid
state: started
enabled: yes
- name: Installer cryptsetup
ansible.builtin.apt:
name: cryptsetup
state: present
update_cache: yes
- name: Charger le module noyau dm_crypt
ansible.builtin.modprobe:
name: dm_crypt
state: present
- name: S'assurer que le module dm_crypt est chargé au démarrage
ansible.builtin.lineinfile:
path: /etc/modules
line: dm_crypt
state: present
- name: Créer dossier longhorn
ansible.builtin.file:
path: /mnt/arcodange/longhorn
state: directory
owner: pi
group: docker
mode: '0774'
ignore_errors: true
#---
- name: System K3S
hosts: raspberries:&local
tags: never
tasks:
- name: prepare inventory for k3s external playbook
tags: always
ansible.builtin.add_host:
hostname: "{{ item }}"
groups:
- k3s_cluster
- "{{ ansible_loop.first | ternary('server', 'agent') }}"
loop: "{{ groups.raspberries | intersect(groups.local) | sort }}"
loop_control:
extended: true
extended_allitems: false
- name: Install collection k3s.orchestration
local_action:
module: community.general.ansible_galaxy_install
type: collection
name: git+https://github.com/k3s-io/k3s-ansible
run_once: true
- name: k3s
tags: never
ansible.builtin.import_playbook: k3s.orchestration.site
# ansible.builtin.import_playbook: k3s.orchestration.upgrade
# ansible.builtin.import_playbook: k3s.orchestration.reset
vars:
k3s_version: v1.32.7+k3s1
extra_server_args: "--docker --disable traefik"
extra_agent_args: "--docker"
api_endpoint: "{{ hostvars[groups['server'][0]]['ansible_host'] | default(groups['server'][0]) }}"
- name: how to reach k3s
hosts: server
tasks:
- name: copy /etc/rancher/k3s/k3s.yaml to ~/.kube/config from the k3s server and replace 127.0.0.1 with the server ip or hostname
run_once: true
block:
- ansible.builtin.fetch:
src: /etc/rancher/k3s/k3s.yaml
dest: ~/.kube/config
flat: true
become: true
run_once: true
- local_action:
module: ansible.builtin.replace
path: ~/.kube/config
regexp: 'server: https://127.0.0.1:6443'
replace: 'server: https://{{ ansible_default_ipv4.address }}:6443'
# - name: setup hard disk
# tags: never
# ansible.builtin.import_playbook: ./setup/hard_disk_v2.yml
# # vars:
# # hard_disk__partitions:
# # nfs: []
- name: setup longhorn for volumes https://docs.k3s.io/helm
become: true
ansible.builtin.copy:
dest: /var/lib/rancher/k3s/server/manifests/longhorn-install.yaml
content: |-
apiVersion: helm.cattle.io/v1
kind: HelmChart
metadata:
annotations:
helmcharts.cattle.io/managed-by: helm-controller
finalizers:
- wrangler.cattle.io/on-helm-chart-remove
generation: 1
name: longhorn-install
namespace: kube-system
spec:
version: v1.9.1
chart: longhorn
repo: https://charts.longhorn.io
failurePolicy: abort
targetNamespace: longhorn-system
createNamespace: true
valuesContent: |-
defaultSettings:
defaultDataPath: /mnt/arcodange/longhorn
vars:
longhorn_helm_values: {} # https://github.com/longhorn/longhorn/blob/master/chart/values.yaml
- name: customize k3s traefik configuration https://docs.k3s.io/helm
block:
- name: Get my public IP
community.general.ipify_facts:
- become: true
ansible.builtin.copy:
dest: /var/lib/rancher/k3s/server/manifests/traefik-v3.yaml
content: |-
apiVersion: v1
data:
dynamic.yaml: |-
{{ traefik_config_yaml | to_nice_yaml | indent( width=4 ) }}
kind: ConfigMap
metadata:
name: traefik-configmap
namespace: kube-system
---
apiVersion: helm.cattle.io/v1
kind: HelmChart
metadata:
name: traefik
namespace: kube-system
spec:
repo: https://traefik.github.io/charts
chart: traefik
version: v37.0.0
targetNamespace: kube-system
valuesContent: |-
{{ traefik_helm_values | to_nice_yaml | indent( width=4 ) }}
vars:
traefik_config_yaml:
http:
services:
gitea:
loadBalancer:
servers:
- url: "http://{{ lookup('dig', groups.gitea[0]) }}:3000"
routers:
acme-challenge:
rule: Host(`arcodange.duckdns.org`) && PathPrefix(`/.well-known/acme-challenge`)
service: acme-http@internal
tls:
certResolver: letsencrypt
domains:
- main: "arcodange.duckdns.org"
sans:
- "*.arcodange.duckdns.org"
entryPoints:
- websecure
- web
gitea:
rule: Host(`gitea.arcodange.duckdns.org`)
service: gitea
middlewares:
- localIp
tls:
certResolver: letsencrypt
domains:
- main: "arcodange.duckdns.org"
sans:
- "gitea.arcodange.duckdns.org"
entrypoints:
- websecure
middlewares:
localIp:
ipAllowList:
sourceRange:
- "192.168.1.0/24"
- "{{ ipify_public_ip }}/32"
# - "0.0.0.0/0"
# ipStrategy:
# depth: 1
traefik_helm_values:
deployment:
kind: "Deployment"
initContainers:
- name: volume-permissions
image: busybox:latest
command: ["sh", "-c", "touch /data/acme.json; chmod -v 600 /data/acme.json"]
volumeMounts:
- name: data
mountPath: /data
# default is https://github.com/traefik/traefik-helm-chart/blob/v25.0.0/traefik/values.yaml <- for v25 (`kubectl describe deployments.apps traefik -n kube-system | grep helm.sh/chart`)
# current is https://github.com/traefik/traefik-helm-chart/blob/v30.1.0/traefik/values.yaml
nodeSelector:
node-role.kubernetes.io/master: 'true' # make predictible choice of node to direct https traffic to this node and avoid NAT/loss of client IP
service:
spec:
externalTrafficPolicy: Local
ports:
traefik:
expose:
default: true
ingressRoute:
dashboard:
enabled: true
globalArguments: [] # deactivate --global.sendanonymoususage
env:
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: LEGO_DISABLE_CNAME_SUPPORT
value: 'true'
logs:
general:
level: DEBUG
# format: json
access:
enabled: true
# format: json
podSecurityContext:
runAsGroup: 65532
runAsNonRoot: true
runAsUser: 65532
fsGroup: 65532 # else the persistent volume might be owned by root and be unwriteable
persistence:
# -- Enable persistence using Persistent Volume Claims
# ref: http://kubernetes.io/docs/user-guide/persistent-volumes/
# It can be used to store TLS certificates, see `storage` in certResolvers
enabled: true
name: data
# existingClaim: ""
accessMode: ReadWriteOnce
size: 128Mi
storageClass: "longhorn"
# volumeName: ""
path: /data
annotations: {}
volumes:
- name: traefik-configmap
mountPath: /config
type: configMap
additionalArguments:
- '--providers.file.filename=/config/dynamic.yaml'
- '--providers.kubernetesingress.ingressendpoint.publishedservice=kube-system/traefik'
certificatesResolvers:
letsencrypt:
acme:
# for challenge options cf. https://doc.traefik.io/traefik/https/acme/
email: arcodange@gmail.com
tlsChallenge: true
dnsChallenge:
# requires env variable DUCKDNS_TOKEN
provider: duckdns
propagation:
delayBeforeChecks: 120
disableChecks: true
resolvers:
- "1.1.1.1:53"
- "8.8.8.8:53"
httpChallenge:
entryPoint: "web"
# It has to match the path with a persistent volume
storage: /data/acme.json
envFrom:
- secretRef:
name: traefik-duckdns-token
# MY_TOKEN=<my token (see https://www.duckdns.org/domains)>
# kubectl create secret generic traefik-duckdns-token --from-literal="DUCKDNS_TOKEN=$MY_TOKEN" -n kube-system
- name: touch manifests/traefik.yaml to trigger update
ansible.builtin.file:
path: /var/lib/rancher/k3s/server/manifests/traefik-v3.yaml
state: touch
become: true
# ---
- name: redeploy traefik
hosts: localhost
tasks:
- name: delete old traefik deployment
kubernetes.core.k8s:
api_version: v1
name: traefik
kind: Deployment
namespace: kube-system
state: "absent"
- name: delete old deployment job so the k3s helm controller redeploy with our new configuration
kubernetes.core.k8s:
api_version: batch/v1
name: helm-install-traefik
kind: Job
namespace: kube-system
state: "absent"
- name: get traefik deployment
kubernetes.core.k8s_info:
api_version: v1
name: traefik
kind: Deployment
namespace: kube-system
wait: true
register: traefik_deployment
- ansible.builtin.debug:
var: traefik_deployment

View File

@@ -27,29 +27,19 @@
container_name: gitea_action
restart: always
environment:
CONFIG_FILE: /config.yaml
GITEA_INSTANCE_URL: >-
http://{{ hostvars[groups.gitea[0]].ansible_host }}:3000
GITEA_RUNNER_REGISTRATION_TOKEN: "{{ gitea_runner_token_cmd.stdout }}"
GITEA_RUNNER_NAME: arcodange_global_runner_{{ inventory_hostname }}
GITEA_RUNNER_LABELS: ubuntu-latest:docker://gitea.arcodange.lab/arcodange-org/runner-images:ubuntu-latest-ca,ubuntu-latest-ca:docker://gitea.arcodange.lab/arcodange-org/runner-images:ubuntu-latest-ca
ports:
- "43707:43707"
networks:
- gitea_action_network
# GITEA_RUNNER_LABELS: host={{ansible_host}},env=any
volumes:
- /var/run/docker.sock:/var/run/docker.sock
- /etc/timezone:/etc/timezone:ro
- /etc/localtime:/etc/localtime:ro
- /etc/ssl/certs:/etc/ssl/certs:ro
- /usr/local/share/ca-certificates/:/usr/local/share/ca-certificates/:ro
- /mnt/arcodange/gitea-runner-cache:/home/git/.cache/actcache
- /mnt/arcodange/gitea-runner-act:/root/.cache/act
extra_hosts:
gitea.arcodange.duckdns.org: '{{ lookup("dig", "gitea.arcodange.duckdns.org") }}'
configs:
- config.yaml
networks:
gitea_action_network:
name: gitea_action_network
configs:
config.yaml:
content: |
@@ -88,22 +78,23 @@
# If it's empty when registering, it will ask for inputting labels.
# If it's empty when execute `daemon`, will use labels in `.runner` file.
labels:
- "ubuntu-latest:docker://gitea.arcodange.lab/arcodange-org/runner-images:ubuntu-latest-ca"
- "ubuntu-latest-ca:docker://gitea.arcodange.lab/arcodange-org/runner-images:ubuntu-latest-ca"
- "ubuntu-latest:docker://gitea/runner-images:ubuntu-latest"
- "ubuntu-22.04:docker://gitea/runner-images:ubuntu-22.04"
- "ubuntu-20.04:docker://gitea/runner-images:ubuntu-20.04"
cache:
# Enable cache server to use actions/cache.
enabled: true
# The directory to store the cache data.
# If it's empty, the cache data will be stored in $HOME/.cache/actcache.
dir: "/home/git/.cache/actcache"
dir: ""
# The host of the cache server.
# It's not for the address to listen, but the address to connect from job containers.
# So 0.0.0.0 is a bad choice, leave it empty to detect automatically.
host: "{{ ansible_default_ipv4.address }}"
# The port of the cache server.
# 0 means to use a random available port.
port: 43707
port: 0
# The external cache server URL. Valid only when enable is true.
# If it's specified, act_runner will use this URL as the ACTIONS_CACHE_URL rather than start a server by itself.
# The URL should generally end with "/".
@@ -140,7 +131,7 @@
# If it's not empty or "-", the specified docker host will be used. An error will be returned if it doesn't work.
docker_host: ""
# Pull docker image(s) even if already present
force_pull: false
force_pull: true
# Rebuild docker image(s) even if already present
force_rebuild: false
@@ -152,8 +143,184 @@
community.docker.docker_compose_v2:
project_src: "/home/pi/arcodange/docker_composes/arcodange_factory_gitea_action"
pull: missing
state: "{{ docker_compose_down_then_up }}"
state: present
register: deploy_result
loop: ["absent", "present"]
- name: Set PACKAGES_TOKEN secret to upload packages from CI
run_once: True
block:
- name: Generate cicd PACKAGES_TOKEN
include_role:
name: arcodange.factory.gitea_token
vars:
gitea_token_name: PACKAGES_TOKEN
gitea_token_fact_name: cicd_PACKAGES_TOKEN
gitea_token_scopes: write:package
gitea_token_replace: true
- name: Register cicd PACKAGES_TOKEN secrets
include_role:
name: arcodange.factory.gitea_secret
vars:
gitea_secret_name: PACKAGES_TOKEN
gitea_secret_value: "{{ cicd_PACKAGES_TOKEN }}"
loop: ["organization", "user"]
loop_control:
loop_var: docker_compose_down_then_up
loop_var: gitea_owner_type # Peut être "user" ou "organization"
post_tasks:
- include_role:
name: arcodange.factory.gitea_token
vars:
gitea_token_delete: true
- name: Deploy Argo CD
hosts: localhost
roles:
- arcodange.factory.gitea_token # generate gitea_api_token used to replace generated token with set name if required
tasks:
- name: Set factory repo
include_role:
name: arcodange.factory.gitea_repo
vars:
gitea_repo_name: factory
- name: Sync other repos
include_role:
name: arcodange.factory.gitea_sync
- name: Generate Argo CD token
include_role:
name: arcodange.factory.gitea_token
vars:
gitea_token_name: ARGOCD_TOKEN
gitea_token_fact_name: argocd_token
gitea_token_scopes: read:repository,read:package
gitea_token_replace: true
- name: Figure out k3s master node
shell:
kubectl get nodes -l node-role.kubernetes.io/master=true -o name | sed s'#node/##'
register: get_k3s_master_node
changed_when: false
- name: Get kubernetes server internal url
command: >-
echo https://kubernetes.default.svc
# {%raw%}
# kubectl get svc/kubernetes -o template="{{.spec.clusterIP}}:{{(index .spec.ports 0).port}}"
# {%endraw%}
register: get_k3s_internal_server_url
changed_when: false
- set_fact:
k3s_master_node: "{{ get_k3s_master_node.stdout }}"
k3s_internal_server_url: "{{ get_k3s_internal_server_url.stdout }}"
- name: Install Argo CD
become: true
delegate_to: "{{ k3s_master_node }}"
vars:
gitea_credentials:
username: arcodange
password: "{{ argocd_token }}"
argocd_helm_values: # https://github.com/argoproj/argo-helm/blob/main/charts/argo-cd/values.yaml
global:
domain: argocd.arcodange.duckdns.org
configs:
params:
server.insecure: true # let k3s traefik do TLS termination
ansible.builtin.copy:
dest: /var/lib/rancher/k3s/server/manifests/argocd.yaml
content: |-
apiVersion: v1
kind: Namespace
metadata:
name: argocd
---
apiVersion: helm.cattle.io/v1
kind: HelmChart
metadata:
name: argocd
namespace: kube-system
spec:
repo: https://argoproj.github.io/argo-helm
chart: argo-cd
targetNamespace: argocd
valuesContent: |-
{{ argocd_helm_values | to_nice_yaml | indent( width=4 ) }}
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: argocd-server-ingress
namespace: argocd
annotations:
# For Traefik v2.x
traefik.ingress.kubernetes.io/router.entrypoints: websecure
traefik.ingress.kubernetes.io/router.tls: "true"
traefik.ingress.kubernetes.io/router.tls.certresolver: letsencrypt
traefik.ingress.kubernetes.io/router.tls.domains.0.main: arcodange.duckdns.org
traefik.ingress.kubernetes.io/router.tls.domains.0.sans: argocd.arcodange.duckdns.org
traefik.ingress.kubernetes.io/router.middlewares: localIp@file
spec:
rules:
- host: argocd.arcodange.duckdns.org
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: argocd-server
port:
number: 80 #TLS is terminated at Traefik
---
apiVersion: v1
kind: Secret
metadata:
name: gitea-arcodangeorg-factory-repo
namespace: argocd
labels:
argocd.argoproj.io/secret-type: repository
stringData:
type: git
url: https://gitea.arcodange.duckdns.org/arcodange-org/factory
---
apiVersion: v1
kind: Secret
metadata:
name: gitea-arcodangeorg-repo-creds
namespace: argocd
labels:
argocd.argoproj.io/secret-type: repo-creds
stringData:
type: git
url: https://gitea.arcodange.duckdns.org/arcodange-org
password: {{ gitea_credentials.password }}
username: {{ gitea_credentials.username }}
---
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: factory
namespace: argocd
spec:
project: default
source:
repoURL: https://gitea.arcodange.duckdns.org/arcodange-org/factory
targetRevision: HEAD
path: argocd
destination:
server: {{ k3s_internal_server_url }}
namespace: argocd
syncPolicy:
automated:
prune: true
selfHeal: true
- name: touch manifests/argocd.yaml to trigger update
delegate_to: "{{ k3s_master_node }}"
ansible.builtin.file:
path: /var/lib/rancher/k3s/server/manifests/argocd.yaml
state: touch
become: true
post_tasks:
- include_role:
name: arcodange.factory.gitea_token
vars:
gitea_token_delete: true

View File

@@ -1,13 +0,0 @@
# Backups
Ecris les scripts de backup (pour écrire des archives dans /mnt/backups)
Ecris dans crontab pour les executer périodiquement
Aller sur la machine et utiliser
```sh
sudo su
mails
```
pour lire les erreurs
Longhorn se charge de snapshot le contenu de /mnt/backups, de le répliquer et d'en envoyer une version dans le cloud.

View File

@@ -1,21 +1,12 @@
---
# - name: setup cron report
# ansible.builtin.import_playbook: cron_report.yml
- name: postgres
ansible.builtin.import_playbook: postgres.yml
vars:
backup_root_dir: "/mnt/backups"
backup_dirname: "postgres"
# - name: postgres
# ansible.builtin.import_playbook: postgres.yml
# vars:
# backup_root_dir: "/mnt/backups"
# backup_dirname: "postgres"
- name: gitea
ansible.builtin.import_playbook: gitea.yml
vars:
backup_root_dir: "/mnt/backups"
backup_dirname: "gitea"
- name: k3s_pvc
ansible.builtin.import_playbook: k3s_pvc.yml
vars:
backup_root_dir: "/mnt/backups"
backup_dirname: "k3s_pvc"

View File

@@ -1,127 +0,0 @@
- name: MTA local complet pour Raspberry Pi avec livraison automatique de cron à root
hosts: raspberries:&local
become: yes
vars:
msmtp_log_dir: "/var/log/msmtp"
msmtp_log_file: "{{ msmtp_log_dir }}/msmtp.log"
msmtp_log_retention_days: 7
rotate_script: "/usr/local/bin/rotate_msmtp_logs.sh"
pre_tasks:
- name: Vérifier si le script de rotation existe
stat:
path: "{{ rotate_script }}"
register: rotate_script_stat
- name: Ignorer le reste du playbook si le script existe
meta: end_play
when: rotate_script_stat.stat.exists
tasks:
- name: Installer Postfix, msmtp et mailutils
apt:
name:
- postfix
- msmtp
# - msmtp-mta # conflicts with recent pi setup - may be required by pi2 with old setup
- mailutils
state: present
update_cache: yes
- name: Configurer Postfix en mode local only
debconf:
name: postfix
question: "postfix/main_mailer_type"
value: "Local only"
vtype: "string"
- name: Redémarrer Postfix
service:
name: postfix
state: restarted
enabled: yes
use: init
ignore_errors: true
- name: Créer répertoire de logs msmtp
file:
path: "{{ msmtp_log_dir }}"
state: directory
owner: root
group: root
mode: '0755'
- name: Créer fichier de log msmtp sécurisé
file:
path: "{{ msmtp_log_file }}"
state: touch
owner: root
group: root
mode: '0600'
- name: Configurer msmtp pour envoyer via Postfix local
copy:
dest: /etc/msmtprc
owner: root
group: root
mode: '0600'
content: |
defaults
logfile {{ msmtp_log_file }}
auth off
tls off
account default
host localhost
port 25
from root
- name: Créer script de rotation quotidienne des logs msmtp
copy:
dest: "{{ rotate_script }}"
owner: root
group: root
mode: '0755'
content: |
#!/bin/bash
TODAY=$(date +%Y%m%d)
if [ -f "{{ msmtp_log_file }}" ]; then
mv "{{ msmtp_log_file }}" "{{ msmtp_log_dir }}/msmtp.log.$TODAY"
touch "{{ msmtp_log_file }}"
chmod 600 "{{ msmtp_log_file }}"
fi
find "{{ msmtp_log_dir }}" -type f -name 'msmtp.log.*' -mtime +{{ msmtp_log_retention_days }} -exec rm -f {} \;
- name: Créer cron pour rotation quotidienne des logs msmtp
cron:
name: "Rotation quotidienne msmtp logs"
user: root
minute: 0
hour: 3
job: "{{ rotate_script }}"
- name: S'assurer que tous les mails root arrivent dans la boîte locale
lineinfile:
path: /etc/aliases
regexp: '^root:'
line: "root: root"
create: yes
- name: Mettre à jour les alias
command: newaliases
- name: Tester lenvoi de mail local
shell: |
echo "Test mail MTA local Pi" | mail -s "Test msmtp/Postfix Pi" root
register: mail_test
ignore_errors: yes
- name: Alerter si test mail échoue
debug:
msg: "⚠️ Envoi de mail via msmtp/Postfix sur Raspberry Pi a échoué !"
when: mail_test.rc != 0
- name: mail utility
ansible.builtin.import_playbook: cron_report_mailutility.yml

View File

@@ -1,48 +0,0 @@
- name: Installer et configurer neomutt pour root
hosts: raspberries:&local
become: yes
vars:
neomutt_config_file: "/root/.muttrc"
tasks:
- name: Installer neomutt
apt:
name: neomutt
state: present
update_cache: yes
- name: Créer fichier de configuration neomutt pour root
copy:
dest: "/root/.muttrc"
owner: root
group: root
mode: '0600'
content: |
{% raw %}
# Fichier de configuration neomutt pour root
set spoolfile="/var/mail/root"
set folder="/var/mail"
# Affichage
set index_format="%4C %Z %{%b %d} %-15.15F (%4l) %s"
# Navigation rapide
set pager_index_lines=20
set markers=yes
set sort=reverse-date
# Sauvegarde des mails lus
set record="/var/mail/root"
# Confirmation avant suppression
set confirmappend=yes
{% endraw %}
- name: Créer alias pratique pour ouvrir neomutt
lineinfile:
path: /root/.bashrc
line: 'alias mails="neomutt -f /var/mail/root"'
create: yes
state: present

View File

@@ -9,7 +9,7 @@
gitea_user: "git"
backup_dir: "{{ backup_root_dir }}/{{ backup_dirname }}"
scripts_dir: "/home/pi/arcodange/docker_composes/gitea/scripts"
keep_days: 3
keep_days: 15
tasks:
- name: S'assurer que le répertoire de backup existe
@@ -22,30 +22,21 @@
set_fact:
backup_cmd: >-
docker exec -u {{ gitea_user }} {{ gitea_container_name }}
gitea dump --skip-log --skip-db --skip-package-data --type tar.gz -c /data/gitea/conf/app.ini -C /data/gitea/ -f -
gitea dump --skip-log --skip-db --type tar.gz -c /data/gitea/conf/app.ini -C /data/gitea/ -f -
- name: test backup_cmd
ansible.builtin.shell: |
{{ backup_cmd }} > /dev/null
- name: Créer le script de backup
copy:
dest: "{{ scripts_dir }}/backup.sh"
mode: '0755'
content: |
#!/bin/bash
set -e
mkdir -p {{ backup_dir }}
{{ backup_cmd }} > {{ backup_dir }}/backup_$(date +\%Y\%m\%d).gitea.gz
find {{ backup_dir }} -type f -name 'backup_*.gitea.gz' -mtime +{{ keep_days }} -delete
- name: Ajouter une tâche cron pour backup Gitea tous les jours à 4h
cron:
name: "Backup Gitea archive"
minute: "0"
hour: "4"
user: root
job: "{{ scripts_dir }}/backup.sh"
job: >-
{{ backup_cmd }} > {{ backup_dir }}/backup_$(date +\\%Y\\%m\\%d).gitea.gz
&& find {{ backup_dir }} -type f -name 'backup_*.gitea.gz' -mtime +{{ keep_days }} -delete
- name: Créer le script de restauration
copy:

View File

@@ -1,101 +0,0 @@
---
- name: Backup K3S Persistent Volumes
hosts: pi1
gather_facts: yes
become: yes
vars:
backup_dir: "{{ backup_root_dir }}/{{ backup_dirname }}"
scripts_dir: "/opt/k3s_volumes"
keep_days: 3
tasks:
- name: S'assurer que le répertoire de backup existe
file:
path: "{{ backup_dir }}"
state: directory
mode: '0755'
- name: S'assurer que le répertoire de scripts existe
file:
path: "{{ scripts_dir }}"
state: directory
mode: '0755'
- name: define backup command
set_fact:
# PVs + PVCs + Longhorn Volume CRDs (critical for fast recovery — without Volume CRDs,
# Longhorn cannot re-associate orphaned replica dirs after a reinstall and forces
# full block-device injection recovery. See docs/adr/20260414-longhorn-pvc-recovery.md)
backup_cmd: >-
kubectl get -A pv,pvc -o yaml
&& echo '---'
&& kubectl get -A volumes.longhorn.io -o yaml
&& echo '---'
&& kubectl get -A settings.longhorn.io -o yaml
- name: test backup_cmd
ansible.builtin.shell: |
{{ backup_cmd }} > /dev/null
- name: Créer le script de backup
copy:
dest: "{{ scripts_dir }}/backup.sh"
mode: '0755'
content: |
#!/bin/bash
set -e
mkdir -p {{ backup_dir }}
{{ backup_cmd }} > {{ backup_dir }}/backup_$(date +\%Y\%m\%d).volumes
find {{ backup_dir }} -type f -name 'backup_*.volumes' -mtime +{{ keep_days }} -delete
SCRIPTS_DIR="$(dirname "$(realpath "${BASH_SOURCE[0]}")")"
{{ backup_cmd }} > $SCRIPTS_DIR/backup.volumes
- name: Ajouter une tâche cron pour backup k3s volumes tous les jours à 4h
cron:
name: "Backup K3S Volumes"
minute: "0"
hour: "4"
user: root
job: "{{ scripts_dir }}/backup.sh"
- name: Créer le script de restauration
copy:
dest: "{{ scripts_dir }}/restore.sh"
mode: '0755'
content: |
#!/bin/bash
set -e
PRIMARY_BACKUP_DIR="{{ backup_dir }}"
FALLBACK_BACKUP_DIR="/home/pi/arcodange/backups/k3s_pvc"
# Check if fallback directory exists and has backups
if [ -d "$FALLBACK_BACKUP_DIR" ] && ls "$FALLBACK_BACKUP_DIR"/*.volumes 1>/dev/null 2>&1; then
BACKUP_DIR="$FALLBACK_BACKUP_DIR"
echo "Using fallback backup directory: $BACKUP_DIR"
elif [ -d "$PRIMARY_BACKUP_DIR" ] && ls "$PRIMARY_BACKUP_DIR"/*.volumes 1>/dev/null 2>&1; then
BACKUP_DIR="$PRIMARY_BACKUP_DIR"
else
echo "No backup directory found"
exit 1
fi
if [ -z "$1" ]; then
FILE=$(ls -1t "$BACKUP_DIR"/backup_*.volumes | head -n 1)
echo "No date provided, restoring latest dump: $FILE"
else
FILE="$BACKUP_DIR/backup_$1.volumes"
if [ ! -f "$FILE" ]; then
echo "File $FILE not found"
exit 1
fi
fi
kubectl apply -f "$FILE"
echo "K3S volumes restoration complete."
echo "NOTE: file includes PVs, PVCs, and Longhorn Volume CRDs."
echo "If Longhorn replica dirs are still orphaned after this restore,"
echo "fall back to: ansible-playbook playbooks/recover/longhorn_data.yml"

View File

@@ -9,7 +9,7 @@
postgres_user: "{{ postgres.dockercompose.services.postgres.environment.POSTGRES_USER }}"
backup_dir: "{{ backup_root_dir }}/{{ backup_dirname }}"
scripts_dir: "/home/pi/arcodange/docker_composes/postgres/scripts"
keep_days: 3
keep_days: 15
tasks:
- name: S'assurer que le répertoire de backup existe
@@ -26,24 +26,15 @@
ansible.builtin.shell: |
{{ backup_cmd }} > /dev/null
- name: Créer le script de backup
copy:
dest: "{{ scripts_dir }}/backup.sh"
mode: '0755'
content: |
#!/bin/bash
set -e
mkdir -p {{ backup_dir }}
{{ backup_cmd }} | gzip > {{ backup_dir }}/backup_$(date +\%Y\%m\%d).sql.gz
find {{ backup_dir }} -type f -name 'backup_*.sql.gz' -mtime +{{ keep_days }} -delete
- name: Ajouter une tâche cron pour dump PostgreSQL tous les jours à 4h avec compression
cron:
name: "Backup PostgreSQL compressé"
minute: "0"
hour: "4"
user: root
job: "{{ scripts_dir }}/backup.sh"
job: >-
{{ backup_cmd }} | gzip > {{ backup_dir }}/backup_$(date +\\%Y\\%m\\%d).sql.gz
&& find {{ backup_dir }} -type f -name 'backup_*.sql.gz' -mtime +{{ keep_days }} -delete
- name: Créer le script de restauration
copy:

View File

@@ -1,2 +0,0 @@
- name: pihole
ansible.builtin.import_playbook: pihole.yml

View File

@@ -1,11 +0,0 @@
---
- name: Installer et configurer Pi-hole sur pi1
hosts: raspberries:&local
become: yes
vars:
pihole_custom_dns:
".arcodange.duckdns.org": "{{ hostvars['pi1'].preferred_ip }}"
".arcodange.lab": "{{ hostvars['pi1'].preferred_ip }}"
roles:
- pihole

View File

@@ -1,8 +0,0 @@
pihole_primary: pi1
pihole_user_gravity: pihole_gravity
pihole_gravity_home: /var/lib/pihole_gravity
pihole_dns_domain: lab
pihole_ports: '8081o,443os,[::]:8081o,[::]:443os' # web interface
pihole_gravity_conf: /etc/gravity-sync/gravity-sync.conf # should not be changed
pihole_custom_dns: {}
pihole_upstream_dns: ["8.8.8.8", "1.1.1.1", "8.8.4.4"] # Explicit upstream DNS servers

View File

@@ -1,5 +0,0 @@
---
- name: Restart Pi-hole
service:
name: pihole-FTL
state: restarted

View File

@@ -1,75 +0,0 @@
---
- name: Build DNS server list (exclude self)
set_fact:
pihole_dns_servers: >-
{{
groups['pihole']
| reject('equalto', inventory_hostname)
| map('extract', hostvars, 'preferred_ip')
| list
}}
# 1⃣ Supprimer déventuelles anciennes entrées Pi-hole
- name: Remove existing Pi-hole nameservers
lineinfile:
path: /etc/resolv.conf
regexp: '^nameserver ({{ pihole_dns_servers | join("|") }})$'
state: absent
when: pihole_dns_servers | length > 0
# 2⃣ Insérer les Pi-hole juste après la ligne search
- name: Insert Pi-hole nameservers with priority
lineinfile:
path: /etc/resolv.conf
insertafter: '^search'
line: "nameserver {{ item }}"
state: present
loop: "{{ pihole_dns_servers }}"
# 3⃣ Définir les priorités par interface
- name: Set DNS priority mapping
set_fact:
interface_dns_priority:
eth0: 50
wlan0: 100
# 5⃣ Configurer les DNS Pi-hole sur toutes les interfaces actives
- name: Get active connections
command: nmcli -t -f NAME,DEVICE connection show --active
register: active_connections
changed_when: false
- name: Get current DNS for each active interface
vars:
iface_name: "{{ item.split(':')[1] }}"
conn_name: "{{ item.split(':')[0] }}"
loop: "{{ active_connections.stdout_lines }}"
when: item.split(':')[1] in interface_dns_priority
command: nmcli -g IP4.DNS connection show "{{ conn_name }}"
register: current_dns
changed_when: false
- name: Apply Pi-hole DNS if different
vars:
iface_name: "{{ item.split(':')[1] }}"
conn_name: "{{ item.split(':')[0] }}"
loop: "{{ active_connections.stdout_lines }}"
when: item.split(':')[1] in interface_dns_priority
command: >
nmcli connection modify "{{ conn_name }}"
ipv4.dns "{{ pihole_dns_servers | join(' ') }}"
ipv4.ignore-auto-dns yes
ipv4.dns-priority "{{ interface_dns_priority[iface_name] }}"
register: dns_changed
changed_when: dns_changed is defined and dns_changed.stdout != ""
- name: Reactivate interface if DNS changed
vars:
iface_name: "{{ item.split(':')[1] }}"
conn_name: "{{ item.split(':')[0] }}"
loop: "{{ active_connections.stdout_lines }}"
when: item.split(':')[1] in interface_dns_priority
command: nmcli connection up "{{ conn_name }}"
when: dns_changed is defined and dns_changed.changed

View File

@@ -1,153 +0,0 @@
---
# -------------------------------------------------------------------
# Gravity Sync HA setup final version with SSH key rotation
# -------------------------------------------------------------------
- name: Determine primary Pi-hole
set_fact:
pihole_primary: "{{ groups['pihole'] | first }}"
- name: Set secondary Pi-hole hosts
set_fact:
pihole_secondaries: "{{ groups['pihole'] | difference([pihole_primary]) }}"
#################################################################
# 1⃣ Ensure gravity user exists on all Pi-hole nodes
#################################################################
- name: Ensure gravity user exists
user:
name: "{{ pihole_user_gravity }}"
home: "{{ pihole_gravity_home }}"
shell: /bin/bash
system: yes
create_home: yes
- name: Create .ssh directory for gravity user
file:
path: "{{ pihole_gravity_home }}/.ssh"
state: directory
owner: "{{ pihole_user_gravity }}"
group: "{{ pihole_user_gravity }}"
mode: '0700'
#################################################################
# 2⃣ Generate SSH key for each host (rotation at each run)
#################################################################
- name: Generate SSH keypair for gravity user
openssh_keypair:
path: "{{ pihole_gravity_home }}/.ssh/id_ed25519"
type: ed25519
owner: "{{ pihole_user_gravity }}"
group: "{{ pihole_user_gravity }}"
mode: '0600'
register: gravity_key
no_log: true
- name: Set gravity key in hostvars
set_fact:
gravity_pubkey: "{{ gravity_key.public_key }}"
- name: Clean authorized_keys for gravity user
file:
path: "{{ pihole_gravity_home }}/.ssh/authorized_keys"
state: absent
- name: Authorize SSH keys from other Pi-hole hosts
authorized_key:
user: "{{ pihole_user_gravity }}"
key: "{{ hostvars[item].gravity_pubkey }}"
state: present
loop: "{{ groups['pihole'] }}"
when: inventory_hostname != item
- name: Add all Pi-hole hosts to known_hosts
known_hosts:
path: "{{ pihole_gravity_home }}/.ssh/known_hosts"
name: "{{ item }}"
key: "{{ lookup('pipe', 'ssh-keyscan -t ed25519 ' ~ item) }}"
state: present
loop: "{{ groups['pihole'] }}"
when: inventory_hostname != item
become: yes
become_user: "{{ pihole_user_gravity }}"
#################################################################
# Install Gravity Sync binary if absent
#################################################################
- name: Check if Gravity Sync binary exists
stat:
path: /usr/local/bin/gravity-sync
register: gravity_sync_bin
- name: Download installer
get_url:
url: https://raw.githubusercontent.com/vmstan/gs-install/main/gs-install.sh
dest: /tmp/gs-install.sh
mode: '0755'
when: not gravity_sync_bin.stat.exists
- name: Give full sudo to gravity user
copy:
dest: /etc/sudoers.d/gravity-sync
mode: '0440'
content: "{{ pihole_user_gravity }} ALL=(ALL) NOPASSWD: ALL"
when: not gravity_sync_bin.stat.exists
- name: Execute Gravity Sync installer non-interactively
command: bash /tmp/gs-install.sh
become: yes
become_user: "{{ pihole_user_gravity }}"
environment:
HOME: "{{ pihole_gravity_home }}"
when: not gravity_sync_bin.stat.exists
#################################################################
# Generate gravity-sync.conf for non-interactive use
#################################################################
- name: Set remote host for gravity-sync.conf
set_fact:
remote_pihole: "{{ (inventory_hostname == pihole_primary) | ternary(pihole_secondaries[0] ~ '.home', pihole_primary ~ '.home') }}"
- name: Create gravity-sync.conf file
copy:
dest: "{{ pihole_gravity_conf }}"
owner: "{{ pihole_user_gravity }}"
group: "{{ pihole_user_gravity }}"
mode: '0600'
content: |
# REQUIRED SETTINGS
REMOTE_HOST='{{ remote_pihole }}'
REMOTE_USER='{{ pihole_user_gravity }}'
# CUSTOM VARIABLES
# LOCAL_PIHOLE_DIRECTORY='/etc/pihole'
# REMOTE_PIHOLE_DIRECTORY='/etc/pihole'
# LOCAL_FILE_OWNER='{{ pihole_user_gravity }}'
# REMOTE_FILE_OWNER='{{ pihole_user_gravity }}'
# LOCAL_DOCKER_CONTAINER='' # optional
# REMOTE_DOCKER_CONTAINER='' # optional
- name: Create symlink for gravity-sync.rsa
file:
src: "{{ pihole_gravity_home }}/.ssh/id_ed25519"
dest: /etc/gravity-sync/gravity-sync.rsa
owner: "{{ pihole_user_gravity }}"
group: "{{ pihole_user_gravity }}"
mode: '0600'
state: link
#################################################################
# Execute Gravity Sync with non-interactive config
#################################################################
- name: Run Gravity Sync script
command: bash /usr/local/bin/gravity-sync
become: yes
become_user: "{{ pihole_user_gravity }}"
environment:
HOME: "{{ pihole_gravity_home }}"

View File

@@ -1,114 +0,0 @@
#################################################################
# Bootstrap Pi-hole (installation manuelle attendue)
#################################################################
- name: Proposer la commande d'installation manuelle de Pi-hole
debug:
msg: |
Veuillez installer Pi-hole manuellement sur ce host avec la commande suivante :
------------------------------------------------------------
curl -sSL https://install.pi-hole.net | sudo bash
------------------------------------------------------------
L'installation sera vérifiée automatiquement dans les 10 prochaines minutes.
#################################################################
# Vérification installation Pi-hole
#################################################################
- name: Attendre que Pi-hole soit installé (FTL DB)
wait_for:
path: /etc/pihole/pihole-FTL.db
state: present
timeout: 600 # 10 minutes
register: pihole_config_ready
- name: Vérifier que le service pihole-FTL est actif
wait_for:
port: 53
state: started
timeout: 60
when: pihole_config_ready is succeeded
#################################################################
# Configuration Pi-hole (commune HA)
#################################################################
- name: Modifier le port d'écoute Pi-hole
replace:
path: /etc/pihole/pihole.toml
regexp: '^\s*port\s*=\s*".*"'
replace: ' port = "{{ pihole_ports }}"'
notify: Restart Pi-hole
- name: Autoriser Pi-hole à écouter sur toutes les interfaces
replace:
path: /etc/pihole/pihole.toml
regexp: '^\s*listeningMode\s*=\s*".*"'
replace: ' listeningMode = "ALL"'
notify: Restart Pi-hole
- name: Activer le chargement de /etc/dnsmasq.d
lineinfile:
path: /etc/pihole/pihole.toml
regexp: '^\s*etc_dnsmasq_d\s*='
line: ' etc_dnsmasq_d = true'
state: present
notify: Restart Pi-hole
#################################################################
# DNS custom (wildcard + locaux)
#################################################################
- name: Validate custom DNS IPs
assert:
that:
- ip is match('^([0-9]{1,3}\.){3}[0-9]{1,3}$')
fail_msg: "Invalid IP for {{ fqdn }}"
loop: "{{ pihole_custom_dns | dict2items }}"
loop_control:
label: "{{ item.key }}"
vars:
fqdn: "{{ item.key }}"
ip: "{{ item.value }}"
- name: Générer les règles DNS custom (wildcards + FQDN)
copy:
dest: /etc/dnsmasq.d/10-custom-rules.conf
owner: root
group: root
mode: '0644'
content: |
# Generated by Ansible Pi-hole custom DNS rules
{% for fqdn, ip in pihole_custom_dns.items() %}
address=/{{ fqdn }}/{{ ip }}
{% endfor %}
when: pihole_custom_dns | length > 0
notify: Restart Pi-hole
- name: Créer les entrées DNS locales pour les RPis
copy:
dest: /etc/dnsmasq.d/20-rpis.conf
owner: root
group: root
mode: '0644'
content: |
# Generated by Ansible Raspberry Pi local DNS
{% for host in groups['raspberries']
if hostvars[host].preferred_ip is defined %}
address=/{{ host }}.home/{{ hostvars[host].preferred_ip }}
{% endfor %}
notify: Restart Pi-hole
- name: Configure explicit upstream DNS servers for Pi-hole
copy:
dest: /etc/dnsmasq.d/99-upstream.conf
owner: root
group: root
mode: '0644'
content: |
# Generated by Ansible Explicit upstream DNS servers
# Fixes issue where Pi-hole relies on DHCP-provided DNS which may be unavailable
{% for dns_server in pihole_upstream_dns %}
server={{ dns_server }}
{% endfor %}
notify: Restart Pi-hole

View File

@@ -1,11 +0,0 @@
---
- name: Setup Pi-hole HA
include_tasks: ha_pihole_setup.yml
when: "'pihole' in group_names"
- name: Setup Gravity Sync
include_tasks: gravity_setup.yml
when: "'pihole' in group_names"
- name: Setup DNS client
include_tasks: client_setup.yml

View File

@@ -1,536 +0,0 @@
---
- name: Recover Longhorn from Power Cut - CSI Driver Registration Loss
hosts: raspberries:&local
gather_facts: yes
become: yes
vars:
# Backup locations
primary_backup_dir: "/mnt/backups/k3s_pvc"
fallback_backup_dir: "/home/pi/arcodange/backups/k3s_pvc"
scripts_dir: "/opt/k3s_volumes"
# Longhorn configuration
longhorn_manifest_path: "/var/lib/rancher/k3s/server/manifests/longhorn-install.yaml"
longhorn_namespace: "longhorn-system"
longhorn_chart_name: "longhorn-install"
longhorn_chart_namespace: "kube-system"
# Data paths (DO NOT MODIFY - points to actual volume data)
longhorn_data_path: "/mnt/arcodange/longhorn"
tasks:
# ========================================================================
# PHASE 0: Pre-flight Checks
# ========================================================================
- name: Verify data directory exists on control plane
ansible.builtin.stat:
path: "{{ longhorn_data_path }}"
register: data_dir
when: inventory_hostname == 'pi1'
run_once: true
- name: FAIL if data directory missing
ansible.builtin.fail:
msg: "CRITICAL: Longhorn data directory {{ longhorn_data_path }} does not exist. Aborting recovery."
when: inventory_hostname == 'pi1' and not data_dir.stat.exists
run_once: true
- name: Check for fallback backups on pi1
ansible.builtin.shell: ls {{ fallback_backup_dir }}/backup_*.volumes 2>/dev/null
register: fallback_backup_check
changed_when: false
when: inventory_hostname == 'pi1'
run_once: true
ignore_errors: yes
- name: Check for primary backups on pi1
ansible.builtin.shell: ls {{ primary_backup_dir }}/backup_*.volumes 2>/dev/null
register: primary_backup_check
changed_when: false
when: inventory_hostname == 'pi1'
run_once: true
ignore_errors: yes
- name: Set backup fact
ansible.builtin.set_fact:
has_backups: "{{ (fallback_backup_check.rc == 0 and fallback_backup_check.stdout | trim != '') or (primary_backup_check.rc == 0 and primary_backup_check.stdout | trim != '') }}"
when: inventory_hostname == 'pi1'
run_once: true
- name: FAIL if no backups found
ansible.builtin.fail:
msg: "No backup files found in {{ primary_backup_dir }} or {{ fallback_backup_dir }}. Cannot proceed."
when: inventory_hostname == 'pi1' and not has_backups | bool
run_once: true
# ========================================================================
# PHASE 1: Diagnosis - Check Current State
# ========================================================================
- name: Gather Longhorn namespace status
block:
- name: Check if longhorn-system namespace exists
kubernetes.core.k8s_info:
kind: Namespace
name: "{{ longhorn_namespace }}"
register: longhorn_ns
ignore_errors: yes
run_once: true
delegate_to: localhost
- name: Check CSI driver registration
kubernetes.core.k8s_info:
kind: CSIDriver
name: driver.longhorn.io
register: csi_driver
ignore_errors: yes
run_once: true
delegate_to: localhost
- name: Check Longhorn manager pods
kubernetes.core.k8s_info:
kind: Pod
namespace: "{{ longhorn_namespace }}"
label_selectors:
- app=longhorn-manager
register: managers
ignore_errors: yes
run_once: true
delegate_to: localhost
- name: Set recovery_phase fact
ansible.builtin.set_fact:
recovery_phase: "none"
run_once: true
delegate_to: localhost
- name: Determine recovery phase needed
ansible.builtin.set_fact:
recovery_phase: >-
{% if csi_driver.failed %}
soft
{% elif managers.failed or managers.resources | default([]) | selectattr('status.phase', 'defined') | selectattr('status.phase', 'ne', 'Running') | list | length > 0 %}
hard
{% elif longhorn_ns.failed %}
none
{% else %}
none
{% endif %}
run_once: true
delegate_to: localhost
- name: Display recovery diagnosis
ansible.builtin.debug:
msg: "Diagnosis: recovery_phase={{ recovery_phase | default('none') }}. CSI Driver exists: {{ not csi_driver.failed | bool }}, Managers healthy: {{ managers.failed | ternary('unknown', managers.resources | default([]) | selectattr('status.phase', 'defined') | selectattr('status.phase', 'eq', 'Running') | list | length >= 3) | bool }}"
run_once: true
delegate_to: localhost
when: inventory_hostname == 'pi1'
run_once: true
# ========================================================================
# PHASE 2: Soft Recovery - Touch Manifest
# ========================================================================
- name: Execute soft recovery - touch Longhorn manifest
block:
- name: Touch longhorn-install.yaml manifest
ansible.builtin.file:
path: "{{ longhorn_manifest_path }}"
state: touch
register: manifest_touch
when: inventory_hostname == 'pi1'
- name: Wait for k3s to detect manifest change
ansible.builtin.pause:
minutes: 1
when: manifest_touch is changed
- name: Check if Longhorn pods are recreating
kubernetes.core.k8s_info:
kind: Pod
namespace: "{{ longhorn_namespace }}"
register: longhorn_pods
ignore_errors: yes
run_once: true
delegate_to: localhost
- name: Verify soft recovery success
ansible.builtin.set_fact:
soft_recovery_success: >-
{{ (longhorn_pods.resources | default([]) | selectattr('metadata.creationTimestamp', 'defined') | list | length) >= 10 }}
run_once: true
delegate_to: localhost
when: recovery_phase == 'soft' and inventory_hostname == 'pi1'
run_once: true
# ========================================================================
# PHASE 3: Hard Recovery - Delete Driver-Deployer
# ========================================================================
- name: Execute hard recovery - delete driver-deployer pods
block:
- name: Get driver-deployer pods
kubernetes.core.k8s_info:
kind: Pod
namespace: "{{ longhorn_namespace }}"
label_selectors:
- app=longhorn-driver-deployer
register: driver_deployer_pods
ignore_errors: yes
run_once: true
delegate_to: localhost
- name: Delete driver-deployer pods
kubernetes.core.k8s:
state: absent
kind: Pod
namespace: "{{ longhorn_namespace }}"
name: "{{ item.metadata.name }}"
force: yes
grace_period: 0
loop: "{{ driver_deployer_pods.resources | default([]) }}"
when: driver_deployer_pods.resources | default([]) | length > 0
run_once: true
delegate_to: localhost
- name: Wait for HelmChart to recreate driver-deployer
ansible.builtin.pause:
minutes: 2
- name: Check driver-deployer status
kubernetes.core.k8s_info:
kind: Pod
namespace: "{{ longhorn_namespace }}"
label_selectors:
- app=longhorn-driver-deployer
register: new_driver_deployer
ignore_errors: yes
run_once: true
delegate_to: localhost
when: (recovery_phase == 'hard' or (recovery_phase == 'soft' and not soft_recovery_success | default(false))) and inventory_hostname == 'pi1'
run_once: true
# ========================================================================
# PHASE 4: Nuclear Recovery - Full Reinstall
# ========================================================================
- name: Execute nuclear recovery - full Longhorn reinstall
block:
# Step 1: Delete HelmChart
- name: Delete Longhorn HelmChart
kubernetes.core.k8s:
state: absent
kind: HelmChart
namespace: "{{ longhorn_chart_namespace }}"
name: "{{ longhorn_chart_name }}"
force: yes
grace_period: 0
register: helmchart_deleted
ignore_errors: yes
run_once: true
delegate_to: localhost
- name: Wait for HelmChart to be fully removed
ansible.builtin.pause:
seconds: 30
when: helmchart_deleted is changed
run_once: true
# Step 2: Remove Longhorn manifest from filesystem
- name: Remove Longhorn manifest file
ansible.builtin.file:
path: "{{ longhorn_manifest_path }}"
state: absent
when: inventory_hostname == 'pi1'
register: manifest_removed
# Step 3: Remove finalizers from all Longhorn resources
- name: Get list of all Longhorn CRDs
kubernetes.core.k8s_info:
kind: CustomResourceDefinition
label_selectors:
- app=longhorn
register: longhorn_crds
ignore_errors: yes
run_once: true
delegate_to: localhost
- name: Get all Longhorn CR instances
kubernetes.core.k8s_info:
kind: "{{ item.spec.names.kind }}"
namespace: "{{ longhorn_namespace }}"
api_version: "{{ item.spec.group ~ '/' ~ item.spec.versions[0].name }}"
register: cr_instances
ignore_errors: yes
loop: "{{ longhorn_crds.resources | default([]) }}"
run_once: true
delegate_to: localhost
- name: Remove finalizers from all Longhorn CR instances
kubernetes.core.k8s_json_patch:
kind: "{{ item.0.spec.names.kind }}"
namespace: "{{ longhorn_namespace }}"
name: "{{ item.1.metadata.name }}"
api_version: "{{ item.0.spec.group ~ '/' ~ item.0.spec.versions[0].name }}"
patch:
- op: replace
path: /metadata/finalizers
value: []
loop: >-
{% set results = [] %}
{% for crd in longhorn_crds.resources | default([]) %}
{% for instance in hostvars['localhost']['cr_instances'].results | default([]) %}
{% if instance.crd == crd %}
{% set results = results.append([crd, instance.resources[0] if instance.resources else {}]) %}
{% endif %}
{% endfor %}
{% endfor %}
{{ results }}
when: cr_instances.results | default([]) | length > 0
run_once: true
delegate_to: localhost
ignore_errors: yes
# Step 4: Remove finalizers from PVCs
- name: Get all PVCs with longhorn storage class
kubernetes.core.k8s_info:
kind: PersistentVolumeClaim
register: all_pvcs
ignore_errors: yes
run_once: true
delegate_to: localhost
- name: Remove finalizers from PVCs
kubernetes.core.k8s_json_patch:
kind: PersistentVolumeClaim
namespace: "{{ item.metadata.namespace }}"
name: "{{ item.metadata.name }}"
patch:
- op: replace
path: /metadata/finalizers
value: []
loop: "{{ all_pvcs.resources | default([]) | selectattr('spec.storageClassName', 'defined') | selectattr('spec.storageClassName', 'match', 'longhorn.*') | list }}"
run_once: true
delegate_to: localhost
ignore_errors: yes
# Step 5: Remove namespace finalizers
- name: Remove finalizers from longhorn-system namespace
kubernetes.core.k8s_json_patch:
kind: Namespace
name: "{{ longhorn_namespace }}"
patch:
- op: replace
path: /spec/finalizers
value: []
run_once: true
delegate_to: localhost
ignore_errors: yes
- name: Delete longhorn-system namespace
kubernetes.core.k8s:
state: absent
kind: Namespace
name: "{{ longhorn_namespace }}"
force: yes
grace_period: 0
run_once: true
delegate_to: localhost
ignore_errors: yes
- name: Wait for namespace deletion
ansible.builtin.pause:
seconds: 15
run_once: true
# Step 6: Reinstall Longhorn via manifest
- name: Deploy Longhorn HelmChart manifest
ansible.builtin.copy:
dest: "{{ longhorn_manifest_path }}"
content: |
apiVersion: helm.cattle.io/v1
kind: HelmChart
metadata:
annotations:
helmcharts.cattle.io/managed-by: helm-controller
finalizers:
- wrangler.cattle.io/on-helm-chart-remove
name: longhorn-install
namespace: kube-system
spec:
version: v1.9.1
chart: longhorn
repo: https://charts.longhorn.io
failurePolicy: abort
targetNamespace: longhorn-system
createNamespace: true
valuesContent: |-
defaultSettings:
defaultDataPath: {{ longhorn_data_path }}
when: inventory_hostname == 'pi1'
register: manifest_deployed
- name: Trigger k3s reconcile by touching manifest
ansible.builtin.file:
path: "{{ longhorn_manifest_path }}"
state: touch
when: manifest_deployed is changed and inventory_hostname == 'pi1'
- name: Wait for Longhorn pods to be created
ansible.builtin.pause:
minutes: 3
when: manifest_deployed is changed
run_once: true
when: >-
(recovery_phase == 'hard' and not new_driver_deployer.resources | default([]) | selectattr('status.phase', 'eq', 'Running') | list | length > 0)
or (recovery_phase == 'soft' and not soft_recovery_success | default(false) and not new_driver_deployer.resources | default([]) | selectattr('status.phase', 'eq', 'Running') | list | length > 0)
or recovery_phase == 'none'
run_once: true
# ========================================================================
# PHASE 5: Restore from Backup
# ========================================================================
- name: Execute restore from backup
block:
- name: Determine backup directory to use
ansible.builtin.set_fact:
backup_dir_to_use: >-
{% if fallback_backup_dir and lookup('fileglob', fallback_backup_dir ~ '/backup_*.volumes') | length > 0 %}
{{ fallback_backup_dir }}
{% elif primary_backup_dir and lookup('fileglob', primary_backup_dir ~ '/backup_*.volumes') | length > 0 %}
{{ primary_backup_dir }}
{% else %}
""
{% endif %}
run_once: true
delegate_to: localhost
- name: FAIL if no backup directory found
ansible.builtin.fail:
msg: "No valid backup directory found with backup_*.volumes files"
when: backup_dir_to_use == ""
run_once: true
- name: Find latest backup file
ansible.builtin.set_fact:
latest_backup: >-
{% set files = lookup('fileglob', backup_dir_to_use ~ '/backup_*.volumes', wantlist=True) | sort(attribute='stat.mtime', reverse=True) %}
{% if files | length > 0 %}
{{ files[0].path }}
{% endif %}
run_once: true
delegate_to: localhost
- name: FAIL if no backup files found
ansible.builtin.fail:
msg: "No backup files found in {{ backup_dir_to_use }}"
when: latest_backup | default('') == ''
run_once: true
- name: Wait for Longhorn managers to be ready
kubernetes.core.k8s_info:
kind: Pod
namespace: "{{ longhorn_namespace }}"
label_selectors:
- app=longhorn-manager
register: managers_status
until: >-
{{ (managers_status.resources | default([]) | selectattr('status.phase', 'eq', 'Running') | list | length) >= 1 }}
retries: 30
delay: 10
run_once: true
delegate_to: localhost
- name: Apply PV/PVC backup
kubernetes.core.k8s:
state: present
src: "{{ latest_backup }}"
run_once: true
delegate_to: localhost
- name: Find Longhorn metadata backup
ansible.builtin.set_fact:
longhorn_backup: >-
{% set lh_files = lookup('fileglob', backup_dir_to_use ~ '/longhorn_metadata_*.yaml', wantlist=True) | sort(attribute='stat.mtime', reverse=True) %}
{% if lh_files | length > 0 %}
{{ lh_files[0].path }}
{% endif %}
run_once: true
delegate_to: localhost
- name: Apply Longhorn metadata backup (if exists)
kubernetes.core.k8s:
state: present
src: "{{ longhorn_backup | default(omit) }}"
namespace: "{{ longhorn_namespace }}"
when: longhorn_backup | default('') != ''
run_once: true
delegate_to: localhost
when: inventory_hostname == 'pi1'
run_once: true
# ========================================================================
# PHASE 6: Post-Recovery Verification
# ========================================================================
- name: Verify recovery success
block:
- name: Check CSI driver registration
kubernetes.core.k8s_info:
kind: CSIDriver
name: driver.longhorn.io
register: csi_final
until: csi_final.resources | length > 0
retries: 10
delay: 10
run_once: true
delegate_to: localhost
- name: Check Longhorn manager health
kubernetes.core.k8s_info:
kind: Pod
namespace: "{{ longhorn_namespace }}"
label_selectors:
- app=longhorn-manager
register: managers_final
until: >-
{{ (managers_final.resources | default([]) | selectattr('status.phase', 'eq', 'Running') | list | length) >= 3 }}
retries: 15
delay: 10
run_once: true
delegate_to: localhost
- name: Check CSI socket exists (on pi1)
ansible.builtin.stat:
path: /var/lib/kubelet/plugins/driver.longhorn.io/csi.sock
register: csi_socket
when: inventory_hostname == 'pi1'
- name: Verify volume data is still present
ansible.builtin.stat:
path: "{{ longhorn_data_path }}/replicas"
register: replicas_dir
when: inventory_hostname == 'pi1'
- name: Display recovery summary
ansible.builtin.debug:
msg: |
===== Longhorn Recovery Summary =====
CSI Driver Registered: {{ not csi_final.failed | bool | ternary('✓', '✗') }}
Managers Running: {{ (managers_final.resources | default([]) | selectattr('status.phase', 'eq', 'Running') | list | length) }}/3
CSI Socket Exists: {{ csi_socket.stat.exists | default(false) | bool | ternary('✓', '✗') }}
Volume Data Present: {{ replicas_dir.stat.exists | default(false) | bool | ternary('✓', '✗') }}
Backup Used: {{ latest_backup | default('none') }}
======================================
run_once: true
when: inventory_hostname == 'pi1'
run_once: true

View File

@@ -1,914 +0,0 @@
---
# Longhorn Block-Device Data Recovery Playbook
#
# PURPOSE:
# Recover application data directly from raw Longhorn replica files when Volume CRDs
# are missing (e.g. after a nuclear cleanup + reinstall). Bypasses k8s objects entirely
# and works at the block-device level.
#
# WHEN TO USE:
# - Longhorn has been fully reinstalled (Volume CRDs are gone)
# - Application PVCs are stuck Terminating / Lost
# - The raw replica .img files still exist on disk
# → See docs/runbooks/longhorn-block-device-recovery.md for the manual equivalent
#
# WHEN NOT TO USE:
# - Volume CRDs still exist → use playbooks/recover/longhorn.yml instead
# - All replica dirs were zeroed by Longhorn reconciliation (data is unrecoverable)
#
# USAGE:
# ansible-playbook -i inventory/hosts.yml playbooks/recover/longhorn_data.yml \
# -e @vars/recovery_volumes.yml
#
# VARS FILE FORMAT (vars/recovery_volumes.yml):
# longhorn_recovery_volumes:
# - pv_name: pvc-abc123 # Longhorn volume name (== PV name)
# pvc_name: myapp-data # PVC name in the namespace
# namespace: myapp # namespace where the PVC lives
# size_bytes: "134217728" # volume size in bytes (string)
# size_human: 128Mi # human-readable, used in PVC spec
# access_mode: ReadWriteOnce # ReadWriteOnce or ReadWriteMany
# workload_kind: Deployment # Deployment or StatefulSet
# workload_name: myapp # name of the workload to scale down/up
# source_node: pi3 # [OPTIONAL] node with untouched replica dir
# source_dir: pvc-abc123-998f49ff # [OPTIONAL] exact replica dir name
# verify_cmd: "" # optional: command to run inside pod to verify data after recovery
#
# source_node and source_dir are auto-discovered (largest dir >16K across all nodes)
# when not specified. Override manually only to force a specific replica dir.
#
# REQUIREMENTS:
# - python3 on all cluster nodes
# - kubectl configured on the Ansible controller (localhost)
# - longhorn-system namespace running and healthy before this playbook starts
# - kubernetes.core collection: ansible-galaxy collection install kubernetes.core
#
# TESTED SCENARIO:
# 2026-04-13 power cut — nuclear Longhorn reinstall — url-shortener SQLite recovery
# Proven working as of 2026-04-14.
- name: Longhorn Block-Device Data Recovery
hosts: localhost
gather_facts: no
vars:
longhorn_data_path: /mnt/arcodange/longhorn
longhorn_namespace: longhorn-system
longhorn_nodes: [pi1, pi2, pi3]
merge_tool_local: "{{ playbook_dir }}/../../docs/incidents/2026-04-13-power-cut/tools/merge-longhorn-layers.py"
merge_tool_remote: /home/pi/merge-longhorn-layers.py
backup_base: /home/pi/arcodange/backups/longhorn-recovery
merged_base: /tmp/longhorn-recovery-merged
recovery_mount: /mnt/recovery-src
live_mount: /mnt/recovery-live
longhorn_recovery_volumes: [] # override with -e @vars/recovery_volumes.yml
tasks:
# =========================================================================
# PRE-FLIGHT
# =========================================================================
- name: "Pre-flight | Fail fast if no volumes defined"
ansible.builtin.fail:
msg: >
No recovery volumes defined. Pass -e @vars/recovery_volumes.yml with a
longhorn_recovery_volumes list. See playbook header for format.
when: longhorn_recovery_volumes | length == 0
- name: "Pre-flight | Verify merge tool exists locally"
ansible.builtin.stat:
path: "{{ merge_tool_local }}"
register: merge_tool_stat
delegate_to: localhost
- name: "Pre-flight | Fail if merge tool missing"
ansible.builtin.fail:
msg: "merge-longhorn-layers.py not found at {{ merge_tool_local }}"
when: not merge_tool_stat.stat.exists
- name: "Pre-flight | Check Longhorn is healthy"
kubernetes.core.k8s_info:
kind: Pod
namespace: "{{ longhorn_namespace }}"
label_selectors:
- app=longhorn-manager
register: lh_managers
delegate_to: localhost
- name: "Pre-flight | Fail if Longhorn managers are not running"
ansible.builtin.fail:
msg: >
Longhorn managers not running (found {{ lh_managers.resources | default([]) |
selectattr('status.phase', 'eq', 'Running') | list | length }} Running pods).
Ensure Longhorn is healthy before attempting data recovery.
when: >
(lh_managers.resources | default([]) |
selectattr('status.phase', 'eq', 'Running') | list | length) < 1
- name: "Pre-flight | Summary"
ansible.builtin.debug:
msg: >
Longhorn healthy ({{ lh_managers.resources |
selectattr('status.phase', 'eq', 'Running') | list | length }} managers running).
Recovering {{ longhorn_recovery_volumes | length }} volume(s):
{{ longhorn_recovery_volumes | map(attribute='pv_name') | list | join(', ') }}
# =========================================================================
# PHASE 0 — AUTO-DISCOVER BEST REPLICA DIR (when source_node/source_dir absent)
# =========================================================================
- name: "Phase 0 | Scan replica dirs on all nodes"
ansible.builtin.shell: |
result=""
for dir in {{ longhorn_data_path }}/replicas/{{ item.1.pv_name }}-*; do
[ -d "$dir" ] || continue
# Skip replicas that were being rebuilt — their data is incomplete
meta="$dir/volume.meta"
if [ -f "$meta" ]; then
rebuilding=$(python3 -c "import json; d=json.load(open('$meta')); print(d.get('Rebuilding', False))" 2>/dev/null)
[ "$rebuilding" = "True" ] && continue
fi
# Use actual disk usage (not apparent/sparse size) to rank replicas
size=$(du -sk "$dir" 2>/dev/null | cut -f1)
name=$(basename "$dir")
result="$result\n$size $name"
done
printf '%b' "$result" | grep -v '^$' || true
delegate_to: "{{ item.0 }}"
become: yes
loop: "{{ longhorn_nodes | product(longhorn_recovery_volumes) | list }}"
loop_control:
label: "{{ item.0 }}: {{ item.1.pv_name }}"
register: dir_scan_raw
changed_when: false
when: item.1.source_node | default('') == '' or item.1.source_dir | default('') == ''
- name: "Phase 0 | Pick best source (largest dir with data, >16K)"
ansible.builtin.set_fact:
_discovered_sources: "{{ _build | from_json }}"
vars:
_build: >-
{% set ns = namespace(result={}) %}
{% for res in dir_scan_raw.results | default([]) %}
{% if not res.skipped | default(false) and res.stdout | default('') != '' %}
{% set node = res.item.0 %}
{% set vol = res.item.1.pv_name %}
{% for line in res.stdout_lines %}
{% set parts = line.split() %}
{% if parts | length == 2 %}
{% set size = parts[0] | int %}
{% set dir = parts[1] %}
{% if size > 16384 and (vol not in ns.result or size > ns.result[vol].size) %}
{# size is in KB (from du -sk); 16384 KB = 16 MiB minimum real replica #}
{% set _ = ns.result.update({vol: {'node': node, 'dir': dir, 'size': size}}) %}
{% endif %}
{% endif %}
{% endfor %}
{% endif %}
{% endfor %}
{{ ns.result | to_json }}
- name: "Phase 0 | Show discovered sources"
ansible.builtin.debug:
msg: >-
{% for vol in longhorn_recovery_volumes %}
{{ vol.pv_name }}:
{% if vol.source_node | default('') != '' %}
source: MANUAL → {{ vol.source_node }}/{{ vol.source_dir }}
{% elif vol.pv_name in _discovered_sources %}
source: AUTO → {{ _discovered_sources[vol.pv_name].node }}/{{ _discovered_sources[vol.pv_name].dir }}
({{ (_discovered_sources[vol.pv_name].size / 1024 / 1024) | round(0) | int }} MiB)
{% else %}
source: NOT FOUND — no dir >16K on any node for this volume
{% endif %}
{% endfor %}
- name: "Phase 0 | Fail if source not found for any volume"
ansible.builtin.fail:
msg: >
No replica dir with data found for {{ item.pv_name }} on any node
({{ longhorn_nodes | join(', ') }}). Check that the replica files survived.
loop: "{{ longhorn_recovery_volumes }}"
loop_control:
label: "{{ item.pv_name }}"
when: >
item.source_node | default('') == '' and
item.source_dir | default('') == '' and
item.pv_name not in _discovered_sources
- name: "Phase 0 | Initialize merged volume list"
ansible.builtin.set_fact:
_merged_volumes: []
- name: "Phase 0 | Append each volume with resolved source"
ansible.builtin.set_fact:
_merged_volumes: "{{ _merged_volumes + [item | combine(_source)] }}"
vars:
_manual: "{{ item.source_node | default('') != '' and item.source_dir | default('') != '' }}"
_source: "{{ _manual | bool | ternary(
{'source_node': item.source_node, 'source_dir': item.source_dir},
{'source_node': _discovered_sources[item.pv_name].node,
'source_dir': _discovered_sources[item.pv_name].dir}) }}"
loop: "{{ longhorn_recovery_volumes }}"
loop_control:
label: "{{ item.pv_name }}"
- name: "Phase 0 | Apply resolved volume list"
ansible.builtin.set_fact:
_volumes: "{{ _merged_volumes }}"
# =========================================================================
# PHASE 1 — UPLOAD MERGE TOOL AND BACK UP REPLICA DIRS
# =========================================================================
- name: "Phase 1 | Upload merge tool to source nodes"
ansible.builtin.command: >
scp -o StrictHostKeyChecking=no
{{ merge_tool_local }}
pi@{{ item.source_node }}.home:{{ merge_tool_remote }}
delegate_to: localhost
become: no
loop: "{{ _volumes }}"
loop_control:
label: "{{ item.pv_name }} → {{ item.source_node }}"
changed_when: true
- name: "Phase 1 | Create backup directory on source node"
ansible.builtin.file:
path: "{{ backup_base }}/{{ item.pvc_name }}"
state: directory
mode: "0755"
delegate_to: "{{ item.source_node }}"
become: yes
loop: "{{ _volumes }}"
loop_control:
label: "{{ item.pvc_name }}"
- name: "Phase 1 | Check if backup already exists (skip if re-running)"
ansible.builtin.stat:
path: "{{ backup_base }}/{{ item.pvc_name }}/{{ item.source_dir }}/volume.meta"
register: backup_exists
delegate_to: "{{ item.source_node }}"
become: yes
loop: "{{ _volumes }}"
loop_control:
label: "{{ item.pvc_name }}"
- name: "Phase 1 | Back up untouched replica dir (safe copy before any operation)"
ansible.builtin.shell: >
cp -a {{ longhorn_data_path }}/replicas/{{ item.item.source_dir }}
{{ backup_base }}/{{ item.item.pvc_name }}/
delegate_to: "{{ item.item.source_node }}"
become: yes
loop: "{{ backup_exists.results }}"
loop_control:
label: "{{ item.item.pvc_name }}"
when: not item.stat.exists
changed_when: true
- name: "Phase 1 | Verify backup contains volume.meta"
ansible.builtin.stat:
path: "{{ backup_base }}/{{ item.pvc_name }}/{{ item.source_dir }}/volume.meta"
register: backup_meta
delegate_to: "{{ item.source_node }}"
become: yes
loop: "{{ _volumes }}"
loop_control:
label: "{{ item.pvc_name }}"
- name: "Phase 1 | Fail if backup is incomplete"
ansible.builtin.fail:
msg: >
Backup for {{ item.item.pvc_name }} is missing volume.meta — the source dir
{{ item.item.source_dir }} may not exist or backup copy failed.
loop: "{{ backup_meta.results }}"
loop_control:
label: "{{ item.item.pvc_name }}"
when: not item.stat.exists
# =========================================================================
# PHASE 2 — RECONSTRUCT FILESYSTEMS FROM REPLICA LAYERS
# =========================================================================
- name: "Phase 2 | Create merged output directory"
ansible.builtin.file:
path: "{{ merged_base }}"
state: directory
mode: "0755"
delegate_to: "{{ item.source_node }}"
become: yes
loop: "{{ _volumes }}"
loop_control:
label: "{{ item.pvc_name }}"
- name: "Phase 2 | Check if merged image already exists"
ansible.builtin.stat:
path: "{{ merged_base }}/{{ item.pvc_name }}.img"
register: merged_exists
delegate_to: "{{ item.source_node }}"
become: yes
loop: "{{ _volumes }}"
loop_control:
label: "{{ item.pvc_name }}"
- name: "Phase 2 | Merge snapshot + head layers into single image"
ansible.builtin.command: >
python3 {{ merge_tool_remote }}
{{ backup_base }}/{{ item.item.pvc_name }}/{{ item.item.source_dir }}
{{ merged_base }}/{{ item.item.pvc_name }}.img
delegate_to: "{{ item.item.source_node }}"
become: yes
loop: "{{ merged_exists.results }}"
loop_control:
label: "{{ item.item.pvc_name }}"
when: not item.stat.exists
changed_when: true
register: merge_output
- name: "Phase 2 | Show merge output"
ansible.builtin.debug:
msg: "{{ item.stdout_lines | default([]) }}"
loop: "{{ merge_output.results | default([]) }}"
loop_control:
label: "{{ item.item.item.pvc_name | default('') }}"
when: item.stdout_lines is defined
- name: "Phase 2 | Test mount merged image to verify filesystem"
ansible.builtin.shell: |
mkdir -p {{ recovery_mount }}-{{ item.pvc_name }}
mount -o loop,ro,noload {{ merged_base }}/{{ item.pvc_name }}.img {{ recovery_mount }}-{{ item.pvc_name }}
ls {{ recovery_mount }}-{{ item.pvc_name }}/
umount {{ recovery_mount }}-{{ item.pvc_name }}
delegate_to: "{{ item.source_node }}"
become: yes
loop: "{{ _volumes }}"
loop_control:
label: "{{ item.pvc_name }}"
register: mount_test
changed_when: false
- name: "Phase 2 | Show filesystem contents"
ansible.builtin.debug:
msg: "{{ item.item.pvc_name }}: {{ item.stdout_lines }}"
loop: "{{ mount_test.results }}"
loop_control:
label: "{{ item.item.pvc_name }}"
# =========================================================================
# PHASE 3 — CREATE LONGHORN VOLUME CRDs
# =========================================================================
# Scale down StatefulSets BEFORE removing PVC finalizers.
# StatefulSet controllers auto-recreate PVCs as soon as they are deleted; if we
# remove finalizers while the StatefulSet is still running, the controller
# immediately provisions a new empty PVC (bound to a fresh volume), making the
# PVC spec immutable by the time Phase 8 tries to pin it to our recovered PV.
# Deployments are less urgent here but scaled early for consistency.
- name: "Phase 3 | Pre-scale down Deployments (before PVC finalizer removal)"
kubernetes.core.k8s_scale:
kind: Deployment
name: "{{ item.workload_name }}"
namespace: "{{ item.namespace }}"
replicas: 0
wait: yes
wait_timeout: 60
delegate_to: localhost
loop: "{{ _volumes }}"
loop_control:
label: "{{ item.namespace }}/{{ item.workload_name }}"
when: item.workload_kind == 'Deployment' and item.workload_name != ''
ignore_errors: yes
- name: "Phase 3 | Pre-scale down StatefulSets (before PVC finalizer removal)"
kubernetes.core.k8s_scale:
kind: StatefulSet
name: "{{ item.workload_name }}"
namespace: "{{ item.namespace }}"
replicas: 0
wait: yes
wait_timeout: 60
delegate_to: localhost
loop: "{{ _volumes }}"
loop_control:
label: "{{ item.namespace }}/{{ item.workload_name }}"
when: item.workload_kind == 'StatefulSet' and item.workload_name != ''
ignore_errors: yes
# Clear any stuck Terminating PVs/PVCs BEFORE creating Volume CRDs.
# If old Terminating PVCs still exist when we create the Volume CRD, Longhorn
# associates them and deletes the Volume CRD when the PVC finishes terminating.
- name: "Phase 3 | Check PVC state before touching finalizers"
ansible.builtin.shell: >
kubectl get pvc {{ item.pvc_name }} -n {{ item.namespace }}
-o jsonpath='{.metadata.deletionTimestamp}' 2>/dev/null || true
register: pvc_deletion_ts
delegate_to: localhost
loop: "{{ _volumes }}"
loop_control:
label: "{{ item.namespace }}/{{ item.pvc_name }}"
changed_when: false
- name: "Phase 3 | Remove finalizers from stuck PV (if Terminating)"
ansible.builtin.shell: >
kubectl patch pv {{ item.pv_name }} --type=merge
-p '{"metadata":{"finalizers":null}}' 2>/dev/null || true
delegate_to: localhost
loop: "{{ _volumes }}"
loop_control:
label: "{{ item.pv_name }}"
changed_when: false
- name: "Phase 3 | Remove finalizers from stuck PVC (if Terminating)"
ansible.builtin.shell: >
kubectl patch pvc {{ item.pvc_name }} -n {{ item.namespace }}
--type=merge -p '{"metadata":{"finalizers":null}}' 2>/dev/null || true
delegate_to: localhost
loop: "{{ pvc_deletion_ts.results }}"
loop_control:
label: "{{ item.item.namespace }}/{{ item.item.pvc_name }}"
when: item.stdout != ''
changed_when: false
- name: "Phase 3 | Wait for stuck PVCs to fully delete before creating Volume CRDs"
kubernetes.core.k8s_info:
kind: PersistentVolumeClaim
name: "{{ item.item.pvc_name }}"
namespace: "{{ item.item.namespace }}"
register: pvc_pre_check
until: pvc_pre_check.resources | default([]) | length == 0
retries: 12
delay: 5
delegate_to: localhost
loop: "{{ pvc_deletion_ts.results }}"
loop_control:
label: "{{ item.item.namespace }}/{{ item.item.pvc_name }}"
when: item.stdout != ''
- name: "Phase 3 | Check if Longhorn Volume CRD already exists"
kubernetes.core.k8s_info:
kind: Volume
api_version: longhorn.io/v1beta2
namespace: "{{ longhorn_namespace }}"
name: "{{ item.pv_name }}"
register: volume_crd_check
delegate_to: localhost
loop: "{{ _volumes }}"
loop_control:
label: "{{ item.pv_name }}"
- name: "Phase 3 | Create Longhorn Volume CRD"
kubernetes.core.k8s:
state: present
definition:
apiVersion: longhorn.io/v1beta2
kind: Volume
metadata:
name: "{{ item.item.pv_name }}"
namespace: "{{ longhorn_namespace }}"
spec:
accessMode: "{{ item.item.access_mode | lower | replace('readwriteonce', 'rwo') | replace('readwritemany', 'rwx') }}"
dataEngine: v1
frontend: blockdev
numberOfReplicas: 3
size: "{{ item.item.size_bytes }}"
delegate_to: localhost
loop: "{{ volume_crd_check.results }}"
loop_control:
label: "{{ item.item.pv_name }}"
when: item.resources | default([]) | length == 0
- name: "Phase 3 | Wait for Longhorn replicas to appear (stopped state)"
kubernetes.core.k8s_info:
kind: Replica
api_version: longhorn.io/v1beta2
namespace: "{{ longhorn_namespace }}"
label_selectors:
- "longhornvolume={{ item.pv_name }}"
register: replicas_check
until: replicas_check.resources | default([]) | length >= 1
retries: 24
delay: 5
delegate_to: localhost
loop: "{{ _volumes }}"
loop_control:
label: "{{ item.pv_name }}"
- name: "Phase 3 | Wait for Volume status to be populated (webhook cache)"
kubernetes.core.k8s_info:
kind: Volume
api_version: longhorn.io/v1beta2
namespace: "{{ longhorn_namespace }}"
name: "{{ item.pv_name }}"
register: vol_ready
until: >
(vol_ready.resources | default([]) | first | default({}) ).status.state | default('') != ''
retries: 24
delay: 5
delegate_to: localhost
loop: "{{ _volumes }}"
loop_control:
label: "{{ item.pv_name }}"
# =========================================================================
# PHASE 4 — SCALE DOWN WORKLOADS
# =========================================================================
- name: "Phase 4 | Scale down Deployments"
kubernetes.core.k8s_scale:
kind: Deployment
name: "{{ item.workload_name }}"
namespace: "{{ item.namespace }}"
replicas: 0
wait: yes
wait_timeout: 60
delegate_to: localhost
loop: "{{ _volumes }}"
loop_control:
label: "{{ item.namespace }}/{{ item.workload_name }}"
when: item.workload_kind == 'Deployment' and item.workload_name != ''
ignore_errors: yes
- name: "Phase 4 | Scale down StatefulSets"
kubernetes.core.k8s_scale:
kind: StatefulSet
name: "{{ item.workload_name }}"
namespace: "{{ item.namespace }}"
replicas: 0
wait: yes
wait_timeout: 60
delegate_to: localhost
loop: "{{ _volumes }}"
loop_control:
label: "{{ item.namespace }}/{{ item.workload_name }}"
when: item.workload_kind == 'StatefulSet' and item.workload_name != ''
ignore_errors: yes
- name: "Phase 4 | Delete any lingering Error-state pods that may hold volume attachments"
ansible.builtin.shell: |
kubectl get pods -n {{ item.namespace }} \
--field-selector='status.phase=Failed' -o name | xargs -r kubectl delete -n {{ item.namespace }}
delegate_to: localhost
loop: "{{ _volumes }}"
loop_control:
label: "{{ item.namespace }}"
changed_when: false
ignore_errors: yes
# =========================================================================
# PHASE 5 — ATTACH VOLUME VIA MAINTENANCE TICKET
# =========================================================================
- name: "Phase 5 | Create VolumeAttachment maintenance ticket"
kubernetes.core.k8s:
state: present
definition:
apiVersion: longhorn.io/v1beta2
kind: VolumeAttachment
metadata:
name: "{{ item.pv_name }}"
namespace: "{{ longhorn_namespace }}"
spec:
attachmentTickets:
recovery:
generation: 0
id: recovery
nodeID: "{{ item.source_node }}"
parameters:
disableFrontend: "false"
type: longhorn-api
volume: "{{ item.pv_name }}"
delegate_to: localhost
loop: "{{ _volumes }}"
loop_control:
label: "{{ item.pv_name }} → {{ item.source_node }}"
- name: "Phase 5 | Wait for volume to reach attached state"
kubernetes.core.k8s_info:
kind: Volume
api_version: longhorn.io/v1beta2
namespace: "{{ longhorn_namespace }}"
name: "{{ item.pv_name }}"
register: vol_state
until: >
(vol_state.resources | default([]) | first | default({}) ).status.state | default('') == 'attached'
retries: 24
delay: 5
delegate_to: localhost
loop: "{{ _volumes }}"
loop_control:
label: "{{ item.pv_name }}"
- name: "Phase 5 | Verify block device exists on target node"
ansible.builtin.stat:
path: "/dev/longhorn/{{ item.pv_name }}"
register: blockdev_check
delegate_to: "{{ item.source_node }}"
become: yes
loop: "{{ _volumes }}"
loop_control:
label: "{{ item.pv_name }}"
- name: "Phase 5 | Fail if block device not present"
ansible.builtin.fail:
msg: >
Block device /dev/longhorn/{{ item.item.pv_name }} not found on
{{ item.item.source_node }} after volume attached — check Longhorn logs.
loop: "{{ blockdev_check.results }}"
loop_control:
label: "{{ item.item.pv_name }}"
when: not item.stat.exists
# =========================================================================
# PHASE 6 — INJECT DATA INTO LIVE BLOCK DEVICE
# =========================================================================
- name: "Phase 6 | Inject data via block device (mount, rsync, umount)"
ansible.builtin.shell: |
LIVE="{{ live_mount }}-{{ item.pvc_name }}"
SRC="{{ recovery_mount }}-{{ item.pvc_name }}"
BLOCKDEV="/dev/longhorn/{{ item.pv_name }}"
MERGED="{{ merged_base }}/{{ item.pvc_name }}.img"
# Always unmount on exit (success or partial failure)
cleanup() {
mountpoint -q "$SRC" && umount "$SRC" || true
mountpoint -q "$LIVE" && umount "$LIVE" || true
}
trap cleanup EXIT
mkdir -p "$LIVE" "$SRC"
# Format if not already formatted (idempotent — safe on re-run)
if ! blkid "$BLOCKDEV" | grep -q 'TYPE='; then
mkfs.ext4 -F "$BLOCKDEV"
fi
# Mount live block device if not already mounted
if ! mountpoint -q "$LIVE"; then
mount "$BLOCKDEV" "$LIVE"
fi
# Mount merged recovery image read-only if not already mounted
if ! mountpoint -q "$SRC"; then
mount -o loop,ro,noload "$MERGED" "$SRC"
fi
# Sync data — exclude lost+found
# --ignore-errors: continue past unreadable files (e.g. corrupted parts from power cut)
# rc=23 (partial transfer) is treated as success — bulk data transferred
rsync -av --ignore-errors --exclude='lost+found' "$SRC/" "$LIVE/" || \
{ RC=$?; [ $RC -eq 23 ] && echo "WARNING: rsync rc=23 (some files unreadable in source — expected for power-cut partitions)" || exit $RC; }
delegate_to: "{{ item.source_node }}"
become: yes
loop: "{{ _volumes }}"
loop_control:
label: "{{ item.pvc_name }}"
register: inject_output
changed_when: true
- name: "Phase 6 | Show rsync output"
ansible.builtin.debug:
msg: "{{ item.stdout_lines | default([]) }}"
loop: "{{ inject_output.results }}"
loop_control:
label: "{{ item.item.pvc_name }}"
# =========================================================================
# PHASE 7 — DETACH VOLUME
# =========================================================================
- name: "Phase 7 | Remove recovery attachment ticket"
kubernetes.core.k8s_json_patch:
kind: VolumeAttachment
api_version: longhorn.io/v1beta2
namespace: "{{ longhorn_namespace }}"
name: "{{ item.pv_name }}"
patch:
- op: remove
path: /spec/attachmentTickets/recovery
delegate_to: localhost
loop: "{{ _volumes }}"
loop_control:
label: "{{ item.pv_name }}"
ignore_errors: yes
- name: "Phase 7 | Wait for recovery ticket to be gone"
kubernetes.core.k8s_info:
kind: VolumeAttachment
api_version: longhorn.io/v1beta2
namespace: "{{ longhorn_namespace }}"
name: "{{ item.pv_name }}"
register: va_state
until: >
(va_state.resources | default([]) | first | default({}) ).spec.attachmentTickets.recovery is not defined
retries: 24
delay: 5
delegate_to: localhost
loop: "{{ _volumes }}"
loop_control:
label: "{{ item.pv_name }}"
# =========================================================================
# PHASE 8 — RESTORE PV AND PVC
# =========================================================================
- name: "Phase 8 | Create PersistentVolume (Retain, no claimRef)"
kubernetes.core.k8s:
state: present
definition:
apiVersion: v1
kind: PersistentVolume
metadata:
name: "{{ item.pv_name }}"
annotations:
pv.kubernetes.io/provisioned-by: driver.longhorn.io
spec:
accessModes:
- "{{ item.access_mode }}"
capacity:
storage: "{{ item.size_human }}"
csi:
driver: driver.longhorn.io
fsType: ext4
volumeHandle: "{{ item.pv_name }}"
volumeAttributes:
dataEngine: v1
dataLocality: disabled
disableRevisionCounter: "true"
numberOfReplicas: "3"
staleReplicaTimeout: "30"
persistentVolumeReclaimPolicy: Retain
storageClassName: longhorn
volumeMode: Filesystem
delegate_to: localhost
loop: "{{ _volumes }}"
loop_control:
label: "{{ item.pv_name }}"
- name: "Phase 8 | Wait for PV to be Available or Bound"
kubernetes.core.k8s_info:
kind: PersistentVolume
name: "{{ item.pv_name }}"
register: pv_state
until: >
(pv_state.resources | default([]) | first | default({}) ).status.phase | default('')
in ['Available', 'Bound']
retries: 12
delay: 5
delegate_to: localhost
loop: "{{ _volumes }}"
loop_control:
label: "{{ item.pv_name }}"
- name: "Phase 8 | Check if PVC already bound to correct PV"
ansible.builtin.shell: >
kubectl get pvc {{ item.pvc_name }} -n {{ item.namespace }}
-o jsonpath='{.spec.volumeName}' 2>/dev/null || true
register: pvc_current_volume
delegate_to: localhost
loop: "{{ _volumes }}"
loop_control:
label: "{{ item.namespace }}/{{ item.pvc_name }}"
changed_when: false
- name: "Phase 8 | Create PersistentVolumeClaim pinned to PV"
kubernetes.core.k8s:
state: present
definition:
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: "{{ item.item.pvc_name }}"
namespace: "{{ item.item.namespace }}"
spec:
accessModes:
- "{{ item.item.access_mode }}"
resources:
requests:
storage: "{{ item.item.size_human }}"
storageClassName: longhorn
volumeMode: Filesystem
volumeName: "{{ item.item.pv_name }}"
delegate_to: localhost
loop: "{{ pvc_current_volume.results }}"
loop_control:
label: "{{ item.item.namespace }}/{{ item.item.pvc_name }}"
when: item.stdout != item.item.pv_name
- name: "Phase 8 | Wait for PVC to be Bound"
kubernetes.core.k8s_info:
kind: PersistentVolumeClaim
namespace: "{{ item.namespace }}"
name: "{{ item.pvc_name }}"
register: pvc_state
until: >
(pvc_state.resources | default([]) | first | default({}) ).status.phase | default('') == 'Bound'
retries: 12
delay: 5
delegate_to: localhost
loop: "{{ _volumes }}"
loop_control:
label: "{{ item.namespace }}/{{ item.pvc_name }}"
# =========================================================================
# PHASE 9 — SCALE UP AND VERIFY
# =========================================================================
- name: "Phase 9 | Scale up Deployments"
kubernetes.core.k8s_scale:
kind: Deployment
name: "{{ item.workload_name }}"
namespace: "{{ item.namespace }}"
replicas: 1
wait: yes
wait_timeout: 120
delegate_to: localhost
loop: "{{ _volumes }}"
loop_control:
label: "{{ item.namespace }}/{{ item.workload_name }}"
when: item.workload_kind == 'Deployment' and item.workload_name != ''
ignore_errors: yes
- name: "Phase 9 | Scale up StatefulSets"
kubernetes.core.k8s_scale:
kind: StatefulSet
name: "{{ item.workload_name }}"
namespace: "{{ item.namespace }}"
replicas: 1
wait: yes
wait_timeout: 120
delegate_to: localhost
loop: "{{ _volumes }}"
loop_control:
label: "{{ item.namespace }}/{{ item.workload_name }}"
when: item.workload_kind == 'StatefulSet' and item.workload_name != ''
ignore_errors: yes
- name: "Phase 9 | Wait for workload to report ready replicas"
kubernetes.core.k8s_info:
kind: "{{ item.workload_kind }}"
name: "{{ item.workload_name }}"
namespace: "{{ item.namespace }}"
register: workload_state
until: >
(workload_state.resources | default([]) | first | default({}) ).status.readyReplicas | default(0) | int >= 1
retries: 24
delay: 5
delegate_to: localhost
loop: "{{ _volumes }}"
loop_control:
label: "{{ item.namespace }}/{{ item.workload_name }}"
when: item.workload_name != ''
ignore_errors: yes
- name: "Phase 9 | Run optional verification command in pod"
ansible.builtin.shell: >
kubectl exec -n {{ item.namespace }}
$(kubectl get pod -n {{ item.namespace }}
-l statefulset.kubernetes.io/pod-name={{ item.workload_name }}-0
--no-headers -o custom-columns=':metadata.name' 2>/dev/null ||
kubectl get pod -n {{ item.namespace }} {{ item.workload_name }}-0
--no-headers -o custom-columns=':metadata.name' 2>/dev/null)
-- sh -c '{{ item.verify_cmd }}'
delegate_to: localhost
loop: "{{ _volumes }}"
loop_control:
label: "{{ item.namespace }}/{{ item.workload_name }}"
when: item.verify_cmd | default('') != ''
register: verify_output
changed_when: false
ignore_errors: yes
- name: "Phase 9 | Show verification output"
ansible.builtin.debug:
msg: "{{ item.stdout_lines | default([]) }}"
loop: "{{ verify_output.results | default([]) }}"
loop_control:
label: "{{ item.item.pvc_name | default('') }}"
when: item.stdout_lines is defined and item.item.verify_cmd | default('') != ''
# =========================================================================
# RECOVERY SUMMARY
# =========================================================================
- name: "Summary | Recovery complete"
ansible.builtin.debug:
msg: |
╔══════════════════════════════════════════════════════╗
║ Longhorn Block-Device Recovery Complete ║
╚══════════════════════════════════════════════════════╝
Volumes recovered:
{% for v in _volumes %}
• {{ v.pvc_name }} ({{ v.namespace }}) ← {{ v.source_node }}:{{ v.source_dir }}
{% endfor %}
Backups retained at: {{ backup_base }}/<pvc-name>/
Merged images at: {{ merged_base }}/<pvc-name>.img
Next steps:
1. Verify application data through the app UI / API
2. Repeat for remaining volumes (update vars file)
3. Run a fresh k8s_pvc backup once all volumes are healthy

View File

@@ -1,84 +0,0 @@
---
# Example vars file for playbooks/recover/longhorn_data.yml
#
# Usage:
# ansible-playbook -i inventory/hosts.yml playbooks/recover/longhorn_data.yml \
# -e @playbooks/recover/longhorn_data_vars.example.yml
#
# HOW TO FILL THIS IN:
#
# 1. Find untouched replica dirs across all nodes:
# for node in pi1 pi2 pi3; do
# echo "=== $node ==="
# ssh $node "sudo du -sh /mnt/arcodange/longhorn/replicas/pvc-<VOLUME>-* 2>/dev/null"
# done
# Pick the dir with the largest size (>16K) and oldest timestamps (from before the incident).
#
# 2. Get pv_name and pvc_name from PV/PVC backup:
# cat /home/pi/arcodange/backups/k3s_pvc/backup_*.volumes | grep -A5 "kind: PersistentVolume"
#
# 3. Get size_bytes from Longhorn volume spec or from:
# cat /mnt/arcodange/longhorn/replicas/<source_dir>/volume.meta
#
# 4. source_node = the node where the untouched dir lives
# source_dir = the exact directory name (e.g. pvc-abc123-998f49ff)
#
# Fields:
# pv_name — Longhorn volume name, equals the PV name (pvc-<uuid>) [REQUIRED]
# pvc_name — PVC name in the namespace [REQUIRED]
# namespace — namespace where the PVC lives [REQUIRED]
# size_bytes — volume capacity in bytes as a string (from volume spec) [REQUIRED]
# size_human — human-readable size for PVC spec (e.g. 128Mi, 8Gi) [REQUIRED]
# access_mode — ReadWriteOnce or ReadWriteMany [REQUIRED]
# workload_kind — Deployment or StatefulSet [REQUIRED]
# workload_name — name of the workload to scale down/up [REQUIRED]
# source_node — node holding the untouched replica dir (pi1/pi2/pi3) [OPTIONAL — auto-discovered]
# source_dir — exact replica dir name on source_node [OPTIONAL — auto-discovered]
# verify_cmd — shell command to run inside pod to confirm data after restore [OPTIONAL]
#
# source_node and source_dir are auto-discovered by Phase 0 (largest dir >16K across all
# nodes). Override them manually only if you want to force a specific replica dir.
longhorn_recovery_volumes:
# --- url-shortener (example, already recovered 2026-04-14) ---
- pv_name: pvc-cdd434d1-c8b4-4a75-acde-2978ec9febd4
pvc_name: url-shortener-data
namespace: url-shortener
size_bytes: "134217728"
size_human: 128Mi
access_mode: ReadWriteOnce
workload_kind: Deployment
workload_name: url-shortener
source_node: pi3
source_dir: pvc-cdd434d1-c8b4-4a75-acde-2978ec9febd4-998f49ff
verify_cmd: "sqlite3 /data/urls.db 'SELECT COUNT(*) FROM urls;'"
# --- traefik (example, already recovered 2026-04-14) ---
# - pv_name: pvc-<traefik-uuid>
# pvc_name: traefik-data
# namespace: traefik
# size_bytes: "134217728"
# size_human: 128Mi
# access_mode: ReadWriteOnce
# workload_kind: Deployment
# workload_name: traefik
# source_node: pi3
# source_dir: pvc-<traefik-uuid>-<hex>
# verify_cmd: ""
# --- vault (uncomment and fill for recovery) ---
# - pv_name: pvc-<vault-uuid>
# pvc_name: vault-data
# namespace: vault
# size_bytes: "1073741824"
# size_human: 1Gi
# access_mode: ReadWriteOnce
# workload_kind: StatefulSet
# workload_name: vault
# source_node: pi2
# source_dir: pvc-<vault-uuid>-<hex>
# verify_cmd: ""
# Add more volumes here following the same pattern.
# Process one at a time first to validate, then batch.

View File

@@ -1,17 +0,0 @@
---
# Recovery vars for Clickhouse
# Source: pi3, dir pvc-1251909b-...-1163420b (2.6G — largest, snapshot verified non-zero)
# Generated: 2026-04-14
longhorn_recovery_volumes:
- pv_name: pvc-1251909b-3cef-40c6-881c-3bb6e929a596
pvc_name: clickhouse-storage-clickhouse-0
namespace: tools
size_bytes: "17179869184" # 16Gi
size_human: 16Gi
access_mode: ReadWriteOnce
workload_kind: StatefulSet
workload_name: clickhouse
source_node: pi3
source_dir: pvc-1251909b-3cef-40c6-881c-3bb6e929a596-1163420b
verify_cmd: "clickhouse-client --query 'SHOW DATABASES'"

View File

@@ -1,38 +0,0 @@
---
# Recovery vars for erp and hashicorp-vault volumes
# source_node/source_dir omitted — auto-discovered by Phase 0
longhorn_recovery_volumes:
- pv_name: pvc-7971918e-e47f-4739-a976-965ea2d770b4
pvc_name: erp
namespace: erp
size_bytes: "53687091200"
size_human: 50Gi
access_mode: ReadWriteMany
workload_kind: Deployment
workload_name: "" # intentionally blank — ERP needs Vault unsealed first; scale up manually
verify_cmd: ""
# hashicorp-vault StatefulSet has two PVCs (audit + data).
# workload_name is set only on the last entry so the StatefulSet is scaled up
# once after both volumes are ready, not between them.
- pv_name: pvc-6d2ea1c7-9327-4992-a02c-93ae604eda70
pvc_name: audit-hashicorp-vault-0
namespace: tools
size_bytes: "10737418240"
size_human: 10Gi
access_mode: ReadWriteOnce
workload_kind: StatefulSet
workload_name: ""
verify_cmd: ""
- pv_name: pvc-ca5567d3-a682-4cee-8ff1-2b8e23260635
pvc_name: data-hashicorp-vault-0
namespace: tools
size_bytes: "10737418240"
size_human: 10Gi
access_mode: ReadWriteOnce
workload_kind: StatefulSet
workload_name: hashicorp-vault
verify_cmd: ""

View File

@@ -1,47 +0,0 @@
---
# Recovery vars for remaining volumes (prometheus, alertmanager, redis, backups-rwx)
# source_node and source_dir intentionally omitted — auto-discovered by Phase 0
longhorn_recovery_volumes:
- pv_name: pvc-88e18c7f-2cfd-45e3-be5b-78c31ab829e9
pvc_name: prometheus-server
namespace: tools
size_bytes: "8589934592"
size_human: 8Gi
access_mode: ReadWriteOnce
workload_kind: Deployment
workload_name: prometheus-server
source_node: pi2
source_dir: pvc-88e18c7f-2cfd-45e3-be5b-78c31ab829e9-910583f6
verify_cmd: ""
- pv_name: pvc-aed7f2c4-1948-487a-8d10-d8a1372289b4
pvc_name: storage-prometheus-alertmanager-0
namespace: tools
size_bytes: "2147483648"
size_human: 2Gi
access_mode: ReadWriteOnce
workload_kind: StatefulSet
workload_name: prometheus-alertmanager
verify_cmd: ""
- pv_name: pvc-d1d5482b-81c8-4d7c-a528-7a57ef47a5ce
pvc_name: redis-storage-redis-0
namespace: tools
size_bytes: "1073741824"
size_human: 1Gi
access_mode: ReadWriteOnce
workload_kind: StatefulSet
workload_name: redis
verify_cmd: "redis-cli ping"
- pv_name: pvc-efda1d2f-1db8-46dd-9a97-3d11f1807ffa
pvc_name: backups-rwx
namespace: longhorn-system
size_bytes: "53687091200"
size_human: 50Gi
access_mode: ReadWriteMany
workload_kind: Deployment
workload_name: ""
verify_cmd: ""

View File

@@ -9,7 +9,6 @@
backup_size: 50Gi
access_mode: ReadWriteMany
storage_class: longhorn
recurring_job: thrice-a-month-backup
tasks:
- name: Créer PVC RWX dans longhorn-system
@@ -37,71 +36,27 @@
namespace: "{{ namespace_longhorn }}"
name: "{{ backup_volume_name }}"
register: pvc_info
retries: 3
delay: 3
until: pvc_info.resources is defined
- name: Extraire le nom du volume
set_fact:
pvc_internal_name: "{{ pvc_info.resources[0].spec.volumeName }}"
- name: Créer un RecurringJob pour backup quotidien à 5h du matin
kubernetes.core.k8s:
state: present
definition:
apiVersion: longhorn.io/v1beta2
kind: RecurringJob
metadata:
name: "{{ recurring_job }}"
namespace: "{{ namespace_longhorn }}"
labels:
"recurring-job.longhorn.io": "{{ recurring_job }}"
spec:
name: "{{ recurring_job }}"
groups: []
task: backup
cron: "0 5 */2 * *"
retain: 2
concurrency: 1
- name: Attacher le volume au recurring job
kubernetes.core.k8s_json_patch:
api_version: longhorn.io/v1beta2
kind: Volume
namespace: "{{ namespace_longhorn }}"
name: "{{ pvc_internal_name }}"
patch:
- op: replace
path: "/metadata/labels/backup-target"
value: "default"
- op: replace
path: "/metadata/labels/recurring-job.longhorn.io~1{{ recurring_job }}"
value: "enabled"
- name: Lancer un Deployment pour déclencher NFS
- name: Lancer un pod temporaire pour déclencher NFS
tags: never
kubernetes.core.k8s:
state: present
definition:
apiVersion: apps/v1
kind: Deployment
apiVersion: v1
kind: Pod
metadata:
name: rwx-nfs
namespace: "{{ namespace_longhorn }}"
spec:
replicas: 1
selector:
matchLabels:
app: rwx-nfs
template:
metadata:
labels:
app: rwx-nfs
spec:
containers:
- name: busybox
image: busybox
command: ["sleep", "infinity"]
# command: ["sh", "-c", "sleep 600"]
volumeMounts:
- mountPath: "/mnt/backups"
name: backup-vol
@@ -116,8 +71,7 @@
api_version: v1
kind: Pod
namespace: "{{ namespace_longhorn }}"
label_selectors:
- app = rwx-nfs
name: rwx-nfs
register: pod_info
until: pod_info.resources[0].status.phase == "Running"
retries: 30

View File

@@ -126,14 +126,14 @@
debug:
msg: >-
Clé SSH ajoutée avec succès.
Visitez https://gitea.arcodange.lab/user/settings/keys?verify_ssh={{ add_ssh_key_result.json.fingerprint }}
Visitez https://gitea.arcodange.duckdns.org/user/settings/keys?verify_ssh={{ add_ssh_key_result.json.fingerprint }}
pour vérifier la signature de vos commits avec cette clé.
- set_fact:
gitea_org_name: arcodange-org
gitea_org_full_name: Arcodange
gitea_org_description: '🏹💻🪽'
gitea_org_website: https://www.arcodange.fr
gitea_org_website: https://www.arcodange.duckdns.org
gitea_org_location: Paris
gitea_org_avatar_img_path: '{{ inventory_dir }}/../img/arcodange-org.jpeg'

View File

@@ -55,123 +55,3 @@
loop_var: database__pg_instruction
loop:
"{{ ['postgres', 'gitea'] | product(pg_instructions) }}"
# ---
- name: Change table owner (CronJob with dynamic roles and auto DB naming)
hosts: localhost
connection: local
gather_facts: false
collections:
- kubernetes.core
vars:
namespace: kube-system
cronjob_name: pg-fix-table-ownership
pg_conf: >-
{{ hostvars[groups.postgres[0]].postgres.dockercompose.services.postgres.environment }}
postgres_admin_credentials:
username: '{{ pg_conf.POSTGRES_USER }}'
password: '{{ pg_conf.POSTGRES_PASSWORD }}'
pg_host: "{{ hostvars[groups.postgres[0]]['preferred_ip'] }}"
tasks:
- name: Create Kubernetes Secret for PostgreSQL admin credentials
kubernetes.core.k8s:
state: present
definition:
apiVersion: v1
kind: Secret
metadata:
name: postgres-admin-credentials
namespace: "{{ namespace }}"
type: Opaque
data:
username: "{{ postgres_admin_credentials.username | b64encode }}"
password: "{{ postgres_admin_credentials.password | b64encode }}"
- name: Create cronjob to change table owners (dynamic roles, auto DB)
kubernetes.core.k8s:
state: present
definition:
apiVersion: batch/v1
kind: CronJob
metadata:
name: "{{ cronjob_name }}"
namespace: "{{ namespace }}"
spec:
schedule: "0 3 * * *" # Exécution quotidienne à 3h du matin
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 3
jobTemplate:
spec:
backoffLimit: 0
template:
spec:
restartPolicy: Never
containers:
- name: psql
image: postgres:16.3
envFrom:
- secretRef:
name: postgres-admin-credentials
env:
- name: PGPASSWORD
valueFrom:
secretKeyRef:
name: postgres-admin-credentials
key: password
command:
- /bin/sh
- -c
args:
- |
set -eu
# Récupérer dynamiquement les rôles PostgreSQL
echo "Fetching roles from PostgreSQL..."
ROLES=$(psql \
-h {{ pg_host }} \
-U $username \
-d postgres \
-t -A \
-c "SELECT rolname FROM pg_roles WHERE rolname LIKE '%_role';")
echo "Roles found: $ROLES"
# Pour chaque rôle, changer le propriétaire des tables dans sa base associée
for role in $ROLES; do
# Déduire le nom de la base en retirant "_role"
DB_NAME="${role%_role}"
echo "Database for $role: $DB_NAME"
# Vérifier si la base existe
if psql -h {{ pg_host }} -U $username -d postgres -t -A -c "SELECT 1 FROM pg_database WHERE datname = '$DB_NAME';" | grep -q 1; then
echo "Changing owner to $role for all tables in $DB_NAME..."
psql \
-h {{ pg_host }} \
-U $username \
-d "$DB_NAME" \
-c "
DO \$\$
DECLARE
r RECORD;
BEGIN
FOR r IN
SELECT tablename
FROM pg_tables
WHERE schemaname = 'public'
LOOP
EXECUTE format('ALTER TABLE public.%I OWNER TO %I', r.tablename, '$role');
END LOOP;
END \$\$;
"
echo "Owner changed for $role in $DB_NAME"
else
echo "Database $DB_NAME does not exist, skipping..."
fi
done

View File

@@ -3,7 +3,7 @@ APP_NAME = Arcodange repositories
[server]
DOMAIN = localhost
HTTP_PORT = 3000
ROOT_URL = https://gitea.arcodange.lab/
ROOT_URL = https://gitea.arcodange.duckdns.org/
DISABLE_SSH = false
SSH_PORT = 22
START_SSH_SERVER = true

View File

@@ -1,21 +0,0 @@
step_ca_primary: pi1
step_ca_user: step
step_ca_home: /home/step
step_ca_dir: /home/step/.step
step_ca_name: "Arcodange Lab CA"
step_ca_fqdn: ssl-ca.arcodange.lab
step_ca_listen_address: ":8443"
step_ca_password: "{{ vault_step_ca_password }}"
step_ca_force_reinit: false
step_ca_provisioner_name: cert-manager
step_ca_provisioner_type: JWK
step_ca_jwk_dir: "{{ step_ca_dir }}/provisioners"
step_ca_jwk_key: "{{ step_ca_jwk_dir }}/cert-manager.jwk"
step_ca_jwk_password: "{{ vault_step_ca_jwk_password }}"
step_ca_jwk_password_file: "{{ step_ca_dir }}/secrets/cert-manager.jwk.pass"
step_ca_url: "https://{{ step_ca_fqdn }}{{ step_ca_listen_address }}"
step_ca_root: "{{ step_ca_dir }}/certs/root_ca.crt"

View File

@@ -1,4 +0,0 @@
- name: restart step-ca
systemd:
name: step-ca
state: restarted

View File

@@ -1,67 +0,0 @@
# can be called with -e step_ca_force_reinit=true
# 1⃣ Vérifier si le CA est déjà initialisé
- name: Check if CA already initialized
stat:
path: "{{ step_ca_dir }}/config/ca.json"
register: step_ca_initialized
when: inventory_hostname == step_ca_primary
# 2⃣ Arrêter step-ca si reinit forcée
- name: Stop step-ca service (reinit)
systemd:
name: step-ca
state: stopped
when:
- inventory_hostname == step_ca_primary
- step_ca_force_reinit | bool
ignore_errors: true
# 3⃣ Wipe complet du CA si reinit forcée
- name: Wipe existing step-ca data
file:
path: "{{ step_ca_dir }}"
state: absent
when:
- inventory_hostname == step_ca_primary
- step_ca_force_reinit | bool
# 4⃣ Recréer le dossier CA proprement
- name: Recreate step-ca directory
file:
path: "{{ step_ca_dir }}"
state: directory
owner: "{{ step_ca_user }}"
group: "{{ step_ca_user }}"
mode: "0700"
when:
- inventory_hostname == step_ca_primary
- step_ca_force_reinit | bool
# 5⃣ Installer le fichier de mot de passe
- name: Install step-ca password file
copy:
dest: "{{ step_ca_home }}/.step-pass"
content: "{{ step_ca_password }}"
owner: "{{ step_ca_user }}"
group: "{{ step_ca_user }}"
mode: "0600"
when: inventory_hostname == step_ca_primary
# 6⃣ Initialiser step-ca (non interactif)
- name: Initialize step-ca
become: true
become_user: "{{ step_ca_user }}"
command: >
step ca init
--name "{{ step_ca_name }}"
--dns "{{ step_ca_fqdn }}"
--address "{{ step_ca_listen_address }}"
--provisioner admin
--password-file {{ step_ca_home }}/.step-pass
args:
creates: "{{ step_ca_dir }}/config/ca.json"
when:
- inventory_hostname == step_ca_primary
- step_ca_force_reinit | bool or not step_ca_initialized.stat.exists
notify: restart step-ca

View File

@@ -1,51 +0,0 @@
- name: Install base packages
apt:
name:
- curl
- vim
- gpg
- ca-certificates
state: present
update_cache: yes
install_recommends: no
- name: Download Smallstep apt signing key
get_url:
url: https://packages.smallstep.com/keys/apt/repo-signing-key.gpg
dest: /etc/apt/trusted.gpg.d/smallstep.asc
mode: "0644"
- name: Add Smallstep apt repository
copy:
dest: /etc/apt/sources.list.d/smallstep.list
mode: "0644"
content: |
deb [signed-by=/etc/apt/trusted.gpg.d/smallstep.asc] https://packages.smallstep.com/stable/debian debs main
- name: Update apt cache
apt:
update_cache: yes
- name: Install step-cli and step-ca
apt:
name:
- step-cli
- step-ca
state: present
- name: Create step user
user:
name: "{{ step_ca_user }}"
system: true
shell: /usr/sbin/nologin
home: "{{ step_ca_home }}"
- name: Secure step directory
file:
path: "{{ step_ca_dir }}"
owner: "{{ step_ca_user }}"
group: "{{ step_ca_user }}"
mode: "0700"
recurse: yes

View File

@@ -1,5 +0,0 @@
- import_tasks: install.yml
- import_tasks: init.yml
- import_tasks: sync.yml
- import_tasks: systemd.yml
- import_tasks: provisioners.yml

View File

@@ -1,73 +0,0 @@
- name: Ensure provisioner directory exists
file:
path: "{{ step_ca_jwk_dir }}"
state: directory
owner: "{{ step_ca_user }}"
group: "{{ step_ca_user }}"
mode: "0700"
when: inventory_hostname == step_ca_primary
- name: Check if JWK provisioner already exists
command: >
step ca provisioner list
--ca-url {{ step_ca_url }}
--root {{ step_ca_root }}
register: step_ca_provisioners
changed_when: false
become: true
become_user: "{{ step_ca_user }}"
when: inventory_hostname == step_ca_primary
- name: Check if cert-manager provisioner exists
set_fact:
step_ca_provisioner_exists: >-
{{
(step_ca_provisioners.stdout | from_json
| selectattr('name', 'equalto', step_ca_provisioner_name)
| list
| length) > 0
}}
when: inventory_hostname == step_ca_primary
- name: Install JWK password file
copy:
dest: "{{ step_ca_jwk_password_file }}"
content: "{{ step_ca_jwk_password }}"
owner: "{{ step_ca_user }}"
group: "{{ step_ca_user }}"
mode: "0400"
when: inventory_hostname == step_ca_primary
- name: Generate JWK key for cert-manager
command: >
step crypto jwk create
{{ step_ca_jwk_key }}.pub
{{ step_ca_jwk_key }}
--password-file "{{ step_ca_jwk_password_file }}"
args:
creates: "{{ step_ca_jwk_key }}"
become: true
become_user: "{{ step_ca_user }}"
when: inventory_hostname == step_ca_primary
- name: Add JWK provisioner to step-ca
command: >
step ca provisioner add {{ step_ca_provisioner_name }}
--type JWK
--public-key {{ step_ca_jwk_key }}.pub
--private-key {{ step_ca_jwk_key }}
become: true
become_user: "{{ step_ca_user }}"
when:
- inventory_hostname == step_ca_primary
- step_ca_provisioner_name not in step_ca_provisioners.stdout
notify: restart step-ca
- name: Secure JWK keys permissions
file:
path: "{{ step_ca_jwk_dir }}"
owner: "{{ step_ca_user }}"
group: "{{ step_ca_user }}"
mode: "0700"
recurse: yes
when: inventory_hostname == step_ca_primary

View File

@@ -1,121 +0,0 @@
# 1⃣ Lock sur le primaire (évite double sync concurrente)
- name: Create sync lock on primary
file:
path: "{{ step_ca_dir }}/.sync.lock"
state: touch
owner: "{{ step_ca_user }}"
group: "{{ step_ca_user }}"
mode: "0600"
delegate_to: "{{ step_ca_primary }}"
run_once: true
# 2⃣ Calcul du checksum du CA sur le primaire
- name: Compute deterministic checksum of CA directory on primary
shell: |
set -o pipefail
tar --sort=name \
--mtime='UTC 1970-01-01' \
--owner=0 --group=0 --numeric-owner \
-cf - {{ step_ca_dir }} \
| sha256sum | awk '{print $1}'
args:
executable: /bin/bash
register: step_ca_primary_checksum
changed_when: false
delegate_to: "{{ step_ca_primary }}"
run_once: true
# 3⃣ Charger le checksum précédent (s'il existe)
- name: Load previous checksum (controller)
slurp:
src: /tmp/step-ca-sync/.checksum
register: step_ca_previous_checksum
failed_when: false
changed_when: false
run_once: true
become: false
delegate_to: localhost
# 4⃣ Décider si une synchronisation est nécessaire
- name: Decide if sync is required
set_fact:
step_ca_sync_required: >-
{{
step_ca_previous_checksum.content | default('') | b64decode
!= step_ca_primary_checksum.stdout
}}
run_once: true
- name: Ensure temporary sync directory exists on controller
file:
path: /tmp/step-ca-sync
state: directory
mode: "0700"
delegate_to: localhost
become: false
run_once: true
# 5⃣ Pull depuis le primaire vers le contrôleur
- name: Fetch CA data from primary to controller
synchronize:
rsync_path: "sudo -u {{ step_ca_user }} rsync"
src: "{{ step_ca_dir }}/"
dest: "/tmp/step-ca-sync/"
mode: pull
recursive: yes
delete: no
delegate_to: localhost
become: false
when: step_ca_sync_required
run_once: true
# 6⃣ Sauvegarder le nouveau checksum (controller)
- name: Save new checksum on controller
copy:
dest: /tmp/step-ca-sync/.checksum
content: "{{ step_ca_primary_checksum.stdout }}"
mode: "0600"
when: step_ca_sync_required
run_once: true
become: false
delegate_to: localhost
# 7⃣ Push vers les standby
- name: Push CA data to standby nodes
synchronize:
rsync_path: "sudo -u {{ step_ca_user }} rsync"
src: "/tmp/step-ca-sync/"
dest: "{{ step_ca_dir }}/"
mode: push
recursive: yes
delete: no
when:
- inventory_hostname != step_ca_primary
- step_ca_sync_required
- name: Wipe temporary CA sync directory on controller
file:
path: /tmp/step-ca-sync
state: absent
delegate_to: localhost
run_once: true
become: false
when: step_ca_sync_required
# 8⃣ Forcer permissions correctes (sécurité)
- name: Fix step directory permissions
file:
path: "{{ step_ca_dir }}"
owner: "{{ step_ca_user }}"
group: "{{ step_ca_user }}"
mode: "0700"
recurse: yes
notify: restart step-ca
# 9⃣ Retirer le lock sur le primaire
- name: Remove sync lock on primary
file:
path: "{{ step_ca_dir }}/.sync.lock"
state: absent
delegate_to: "{{ step_ca_primary }}"
run_once: true

View File

@@ -1,23 +0,0 @@
- name: Install step-ca systemd service
template:
src: step-ca.service.j2
dest: /etc/systemd/system/step-ca.service
mode: "0644"
- name: Reload systemd
systemd:
daemon_reload: yes
- name: Enable step-ca on primary
systemd:
name: step-ca
enabled: yes
state: started
when: inventory_hostname == step_ca_primary
- name: Disable step-ca on standby nodes
systemd:
name: step-ca
enabled: no
state: stopped
when: inventory_hostname != step_ca_primary

View File

@@ -1,15 +0,0 @@
[Unit]
Description=Smallstep CA
After=network.target
[Service]
User={{ step_ca_user }}
Group={{ step_ca_user }}
ExecStart=/usr/bin/step-ca \
--password-file {{ step_ca_home }}/.step-pass \
{{ step_ca_dir }}/config/ca.json
Restart=always
RestartSec=5
[Install]
WantedBy=multi-user.target

View File

@@ -1,98 +0,0 @@
- name: step-ca
ansible.builtin.import_playbook: step-ca.yml
- name: Fetch Step-CA root certificate
hosts: localhost
gather_facts: false
vars:
step_ca_primary: pi1
step_ca_user: step
step_ca_root: "/home/step/.step/certs/root_ca.crt"
tmp_dir: "/tmp/step-ca-cert-manager"
tasks:
- name: Ensure local temp directory exists
file:
path: "{{ tmp_dir }}"
state: directory
mode: "0700"
- name: Fetch root CA from step_ca_primary
fetch:
src: "{{ step_ca_root }}"
dest: "{{ tmp_dir }}/root_ca.crt"
flat: true
delegate_to: "{{ step_ca_primary }}"
become: true
become_user: "{{ step_ca_user }}"
run_once: true
- name: Préparer le répertoire de build
file:
path: /tmp/gitea-runner-image
state: directory
mode: '0755'
- name: Copier le root CA dans le contexte Docker
copy:
src: "{{ tmp_dir }}/root_ca.crt"
dest: /tmp/gitea-runner-image/root_ca.crt
mode: '0644'
- name: Créer le Dockerfile pour l'image runner avec CA custom
copy:
dest: /tmp/gitea-runner-image/Dockerfile
mode: '0644'
content: |
FROM gitea/runner-images:ubuntu-latest
COPY root_ca.crt /usr/local/share/ca-certificates/root_ca.crt
RUN update-ca-certificates
- name: Builder l'image runner avec le CA
community.docker.docker_image:
name: gitea.arcodange.lab/arcodange-org/runner-images
tag: ubuntu-latest-ca
source: build
build:
path: /tmp/gitea-runner-image
push: true
# - /etc/ssl/certs:/etc/ssl/certs:ro
# - name: Distribute Step-CA root certificate
# hosts: all
# gather_facts: true
# become: true
# vars:
# root_ca_source: "/tmp/step-ca-cert-manager/root_ca.crt"
# root_ca_filename: "arcodange-root.crt"
# tasks:
# - name: Ensure root CA file is copied to correct location
# copy:
# src: "{{ root_ca_source }}"
# dest: "{{ ca_dest_path }}"
# owner: root
# group: root
# mode: '0644'
# vars:
# ca_dest_path: >-
# {% if ansible_facts['os_family'] == 'Debian' %}
# /usr/local/share/ca-certificates/{{ root_ca_filename }}
# {% elif ansible_facts['os_family'] in ['RedHat', 'Fedora'] %}
# /etc/pki/ca-trust/source/anchors/{{ root_ca_filename }}
# {% else %}
# /etc/ssl/certs/{{ root_ca_filename }}
# {% endif %}
# - name: Update CA trust store
# command: "{{ ca_update_command }}"
# vars:
# ca_update_command: >-
# {% if ansible_facts['os_family'] == 'Debian' %}
# update-ca-certificates
# {% elif ansible_facts['os_family'] in ['RedHat', 'Fedora'] %}
# update-ca-trust
# {% else %}
# echo 'Please update the CA trust manually'
# {% endif %}

View File

@@ -1,6 +0,0 @@
---
- name: Setup step-ca on raspberries
hosts: step_ca #raspberries:&local
become: yes
roles:
- step_ca

View File

@@ -1,41 +0,0 @@
- name: Install iSCSI client for Longhorn on Raspberry Pi
hosts: raspberries:&local
become: yes
tasks:
- name: Install open-iscsi
ansible.builtin.apt:
name: open-iscsi
state: present
update_cache: yes
- name: Enable and start iSCSI service
ansible.builtin.service:
name: iscsid
state: started
enabled: yes
- name: Installer cryptsetup
ansible.builtin.apt:
name: cryptsetup
state: present
update_cache: yes
- name: Charger le module noyau dm_crypt
ansible.builtin.modprobe:
name: dm_crypt
state: present
- name: S'assurer que le module dm_crypt est chargé au démarrage
ansible.builtin.lineinfile:
path: /etc/modules
line: dm_crypt
state: present
- name: Créer dossier longhorn
ansible.builtin.file:
path: /mnt/arcodange/longhorn
state: directory
owner: pi
group: docker
mode: '0774'
ignore_errors: true

View File

@@ -1,315 +0,0 @@
---
- name: System K3S
hosts: raspberries:&local
tasks:
- name: prepare inventory for k3s external playbook
tags: always
ansible.builtin.add_host:
hostname: "{{ item }}"
groups:
- k3s_cluster
- "{{ ansible_loop.first | ternary('server', 'agent') }}"
loop: "{{ groups.raspberries | intersect(groups.local) | sort }}"
loop_control:
extended: true
extended_allitems: false
- name: how to reach k3s
hosts: server
tasks:
- name: setup longhorn for volumes https://docs.k3s.io/helm
become: true
ansible.builtin.copy:
dest: /var/lib/rancher/k3s/server/manifests/longhorn-install.yaml
content: |-
apiVersion: helm.cattle.io/v1
kind: HelmChart
metadata:
annotations:
helmcharts.cattle.io/managed-by: helm-controller
finalizers:
- wrangler.cattle.io/on-helm-chart-remove
generation: 1
name: longhorn-install
namespace: kube-system
spec:
version: v1.9.1
chart: longhorn
repo: https://charts.longhorn.io
failurePolicy: abort
targetNamespace: longhorn-system
createNamespace: true
valuesContent: |-
defaultSettings:
defaultDataPath: /mnt/arcodange/longhorn
vars:
longhorn_helm_values: {} # https://github.com/longhorn/longhorn/blob/master/chart/values.yaml
- name: customize k3s traefik configuration https://docs.k3s.io/helm
block:
- name: Get my public IP
community.general.ipify_facts:
- become: true
ansible.builtin.copy:
dest: /var/lib/rancher/k3s/server/manifests/traefik-v3.yaml
content: |-
apiVersion: v1
data:
dynamic.yaml: |-
{{ traefik_config_yaml | to_nice_yaml | indent( width=4 ) }}
kind: ConfigMap
metadata:
name: traefik-configmap
namespace: kube-system
---
apiVersion: helm.cattle.io/v1
kind: HelmChart
metadata:
name: traefik
namespace: kube-system
spec:
repo: https://traefik.github.io/charts
chart: traefik
version: v37.4.0
targetNamespace: kube-system
valuesContent: |-
{{ traefik_helm_values | to_nice_yaml | indent( width=4 ) }}
---
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: wildcard-arcodange-lab
namespace: kube-system
spec:
secretName: wildcard-arcodange-lab
issuerRef:
name: step-issuer
kind: StepClusterIssuer
group: certmanager.step.sm
dnsNames:
- arcodange.lab
- "*.arcodange.lab"
---
apiVersion: traefik.io/v1alpha1
kind: TLSStore
metadata:
name: default
namespace: kube-system
spec:
defaultCertificate:
secretName: wildcard-arcodange-lab
---
apiVersion: v1
kind: Service
metadata:
name: gitea-external
namespace: kube-system
spec:
type: ExternalName
externalName: {{ hostvars[groups.gitea[0]]['preferred_ip'] }}
ports:
- port: 3000
targetPort: 3000
vars:
traefik_config_yaml:
http:
services:
gitea:
loadBalancer:
servers:
- url: "http://{{ hostvars[groups.gitea[0]]['preferred_ip'] }}:3000"
routers:
dashboard:
# rule: Host(`traefik.arcodange.duckdns.org`)
rule: Host(`traefik.arcodange.lab`)
service: api@internal
middlewares:
- localIp
# tls:
# certResolver: letsencrypt
# domains:
# - main: "arcodange.duckdns.org"
# sans:
# - "traefik.arcodange.duckdns.org"
entryPoints:
- websecure
- web
acme-challenge:
rule: Host(`arcodange.duckdns.org`) && PathPrefix(`/.well-known/acme-challenge`)
service: acme-http@internal
tls:
certResolver: letsencrypt
domains:
- main: "arcodange.duckdns.org"
sans:
- "*.arcodange.duckdns.org"
entryPoints:
- websecure
- web
gitea:
# rule: Host(`gitea.arcodange.duckdns.org`)
rule: Host(`gitea.arcodange.lab`)
service: gitea
middlewares:
- localIp
# tls:
# certResolver: letsencrypt
# domains:
# - main: "arcodange.duckdns.org"
# sans:
# - "gitea.arcodange.duckdns.org"
entrypoints:
- websecure
middlewares:
localIp:
ipAllowList:
sourceRange:
- "172.16.0.0/12"
- "10.42.0.0/16"
- "192.168.1.0/24"
- "{{ ipify_public_ip }}/32"
# - "0.0.0.0/0"
# ipStrategy:
# depth: 1
traefik_helm_values:
deployment:
kind: "Deployment"
initContainers:
- name: volume-permissions
image: busybox:latest
command: ["sh", "-c", "touch /data/acme.json; chmod -v 600 /data/acme.json"]
volumeMounts:
- name: data
mountPath: /data
# default is https://github.com/traefik/traefik-helm-chart/blob/v25.0.0/traefik/values.yaml <- for v25 (`kubectl describe deployments.apps traefik -n kube-system | grep helm.sh/chart`)
# current is https://github.com/traefik/traefik-helm-chart/blob/v37.4.0/traefik/values.yaml
nodeSelector:
node-role.kubernetes.io/control-plane: 'true' # make predictible choice of node to direct https traffic to this node and avoid NAT/loss of client IP
service:
spec:
externalTrafficPolicy: Local
ports:
traefik:
expose:
default: true
web:
forwardedHeaders:
trustedIPs: ["10.42.0.0/16"] #default k3s cidr
ingressRoute:
dashboard:
enabled: true
globalArguments: [] # deactivate --global.sendanonymoususage
env:
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: LEGO_DISABLE_CNAME_SUPPORT
value: 'true'
logs:
general:
level: INFO
# format: json
access:
enabled: true
timezone: Europe/Paris
# format: json
podSecurityContext:
runAsGroup: 65532
runAsNonRoot: true
runAsUser: 65532
fsGroup: 65532 # else the persistent volume might be owned by root and be unwriteable
persistence:
# -- Enable persistence using Persistent Volume Claims
# ref: http://kubernetes.io/docs/user-guide/persistent-volumes/
# It can be used to store TLS certificates, see `storage` in certResolvers
enabled: true
name: data
# existingClaim: ""
accessMode: ReadWriteOnce
size: 128Mi
storageClass: "longhorn"
# volumeName: ""
path: /data
annotations: {}
volumes:
- name: traefik-configmap
mountPath: /config
type: configMap
experimental:
plugins:
crowdsec-bouncer:
moduleName: github.com/maxlerebourg/crowdsec-bouncer-traefik-plugin #https://plugins.traefik.io/plugins/6335346ca4caa9ddeffda116/crowdsec-bouncer-traefik-plugin
version: v1.3.3
additionalArguments:
- '--providers.file.filename=/config/dynamic.yaml'
- '--providers.kubernetesingress.ingressendpoint.publishedservice=kube-system/traefik'
- "--providers.kubernetescrd.allowcrossnamespace=true"
- "--providers.kubernetescrd.allowExternalNameServices=true"
certificatesResolvers:
letsencrypt:
acme:
# for challenge options cf. https://doc.traefik.io/traefik/https/acme/
email: arcodange@gmail.com
tlsChallenge: true
dnsChallenge:
# requires env variable DUCKDNS_TOKEN
provider: duckdns
propagation:
delayBeforeChecks: 120
disableChecks: true
resolvers:
- "1.1.1.1:53"
- "8.8.8.8:53"
httpChallenge:
entryPoint: "web"
# It has to match the path with a persistent volume
storage: /data/acme.json
envFrom:
- secretRef:
name: traefik-duckdns-token
# MY_TOKEN=<my token (see https://www.duckdns.org/domains)>
# kubectl create secret generic traefik-duckdns-token --from-literal="DUCKDNS_TOKEN=$MY_TOKEN" -n kube-system
- name: touch manifests/traefik.yaml to trigger update
ansible.builtin.file:
path: /var/lib/rancher/k3s/server/manifests/traefik-v3.yaml
state: touch
become: true
# ---
- name: redeploy traefik
hosts: localhost
tasks:
- name: delete old traefik deployment
kubernetes.core.k8s:
api_version: v1
name: traefik
kind: Deployment
namespace: kube-system
state: "absent"
- name: delete old deployment job so the k3s helm controller redeploy with our new configuration
kubernetes.core.k8s:
api_version: batch/v1
name: helm-install-traefik
kind: Job
namespace: kube-system
state: "absent"
- name: get traefik deployment
kubernetes.core.k8s_info:
api_version: v1
name: traefik
kind: Deployment
namespace: kube-system
wait: true
register: traefik_deployment
- ansible.builtin.debug:
var: traefik_deployment

View File

@@ -1,60 +0,0 @@
# https://docs.k3s.io/advanced#coredns-custom-configuration-imports
---
- name: "Déclarer le ConfigMap coredns-custom pour arcodange.lab"
hosts: localhost
gather_facts: false
vars:
pihole_ips: "{{ groups['pihole'] | map('extract', hostvars) | map(attribute='preferred_ip') | list }}"
coredns_namespace: "kube-system"
tasks:
- name: "Créer / mettre à jour le ConfigMap coredns-custom"
kubernetes.core.k8s:
state: present
definition:
apiVersion: v1
kind: ConfigMap
metadata:
name: coredns-custom
namespace: "{{ coredns_namespace }}"
data:
arcodange-lab.server: |
arcodange.lab:53 {
errors
cache 30
forward . {{ pihole_ips | map('regex_replace', '^(.*)$', '\1:53') | join(' ') }}
}
- name: "Mettre à jour le ConfigMap CoreDNS principal pour utiliser les Pi-holes HA"
kubernetes.core.k8s:
state: present
definition:
apiVersion: v1
kind: ConfigMap
metadata:
name: coredns
namespace: "{{ coredns_namespace }}"
data:
Corefile: |
.:53 {
errors
health
ready
kubernetes cluster.local in-addr.arpa ip6.arpa {
pods insecure
fallthrough in-addr.arpa ip6.arpa
}
hosts /etc/coredns/NodeHosts {
ttl 60
reload 15s
fallthrough
}
prometheus :9153
cache 30
loop
reload
import /etc/coredns/custom/*.override
import /etc/coredns/custom/*.server
forward . {{ pihole_ips | map('regex_replace', '^(.*)$', '\1:53') | join(' ') }}
}

View File

@@ -1,172 +0,0 @@
---
- name: System K3S
hosts: raspberries:&local
tasks:
- name: prepare inventory for k3s external playbook
tags: always
ansible.builtin.add_host:
hostname: "{{ item }}"
groups:
- k3s_cluster
- "{{ ansible_loop.first | ternary('server', 'agent') }}"
loop: "{{ groups.raspberries | intersect(groups.local) | sort }}"
loop_control:
extended: true
extended_allitems: false
# =========================
# Play 1 — Read step-ca PKI
# =========================
- name: Collect PKI material from step-ca
hosts: localhost
gather_facts: false
vars:
step_ca_primary: pi1
step_ca_user: step
step_ca_root: "/home/step/.step/certs/root_ca.crt"
tmp_dir: /tmp/step-ca-cert-manager
tasks:
- name: Ensure local temp directory exists
file:
path: "{{ tmp_dir }}"
state: directory
mode: "0700"
- name: Fetch root CA
fetch:
src: "{{ step_ca_root }}"
dest: "{{ tmp_dir }}/root_ca.crt"
flat: true
delegate_to: "{{ step_ca_primary }}"
become: true
become_user: "{{ step_ca_user }}"
run_once: true
- name: Read and decode PKI material
slurp:
src: "{{ item }}"
loop:
- "{{ tmp_dir }}/root_ca.crt"
register: pki_raw
- name: Set PKI facts
set_fact:
root_ca_b64: "{{ (pki_raw.results | selectattr('item','equalto', tmp_dir + '/root_ca.crt') | first).content }}"
# =========================
# Play 2 — Deploy to k3s
# =========================
- name: Deploy cert-manager and step-ca integration on k3s server
hosts: server
gather_facts: false
become: true
vars:
namespace: cert-manager
jwk_provisioner_name: cert-manager
jwk_secret_name: step-jwk-password
clusterissuer_name: step-ca
step_ca_url: "https://ssl-ca.arcodange.lab:8443"
cert_manager_version: v1.19.2
tasks:
- name: Get cert-manager provisioner info from step-ca
command: >
step ca provisioner list
register: provisioners_json
delegate_to: "{{ step_ca_primary }}"
become: true
become_user: "{{ step_ca_user }}"
run_once: true
- name: Set fact jwk_kid from provisioner
set_fact:
jwk_kid: >-
{{
(provisioners_json.stdout | from_json
| selectattr('name', 'equalto', jwk_provisioner_name) | list
| first).key.kid
}}
- name: Compute PKI checksum
set_fact:
pki_checksum: >-
{{
(hostvars['localhost'].root_ca_b64
~ jwk_kid
~ step_ca_url
~ cert_manager_version) | hash('sha256')
}}
- name: Install cert-manager and step-ca via k3s static manifest
copy:
dest: /var/lib/rancher/k3s/server/manifests/cert-manager-step-ca.yaml
mode: "0600"
content: |-
apiVersion: helm.cattle.io/v1
kind: HelmChart
metadata:
name: cert-manager
namespace: kube-system
annotations:
pki.arcodange.lab/checksum: "{{ pki_checksum }}"
spec:
chart: cert-manager
repo: https://charts.jetstack.io
version: {{ cert_manager_version }}
targetNamespace: cert-manager
createNamespace: true
valuesContent: |-
installCRDs: true
---
apiVersion: v1
kind: Secret
metadata:
name: {{ jwk_secret_name }}
namespace: {{ namespace }}
annotations:
pki.arcodange.lab/checksum: "{{ pki_checksum }}"
type: Opaque
stringData:
password: >-
{{ hostvars[step_ca_primary].vault_step_ca_jwk_password }}
---
apiVersion: helm.cattle.io/v1
kind: HelmChart
metadata:
name: step-issuer
namespace: kube-system
annotations:
pki.arcodange.lab/checksum: "{{ pki_checksum }}"
spec:
chart: step-issuer
repo: https://smallstep.github.io/helm-charts
version: 1.9.11
targetNamespace: {{ namespace }}
createNamespace: false
valuesContent: |-
certManager:
namespace: {{ namespace }}
stepClusterIssuer:
create: true
caUrl: "{{ step_ca_url }}"
caBundle: "{{ hostvars['localhost'].root_ca_b64 }}"
provisioner:
name: {{ jwk_provisioner_name }}
kid: "{{ jwk_kid }}"
passwordRef:
name: {{ jwk_secret_name }}
namespace: {{ namespace }}
key: password
# Override kube-rbac-proxy image to use ARM64-compatible version.
# Note: pi3 (ARM64) requires an ARM64-compatible image, while pi2 (ARMv7) may work with AMD64 images.
# The default image (gcr.io/kubebuilder/kube-rbac-proxy:v0.15.0) is AMD64-only and fails on pi3.
kubeRBACproxy:
image:
repository: quay.io/brancz/kube-rbac-proxy
tag: v0.15.0

View File

@@ -1,161 +0,0 @@
# PKI
Explications générées par chatgpt pour expliquer le setup de ssl via "step"
```mermaid
---
config:
logLevel: debug
theme: forest
---
flowchart TB
%% PKI
subgraph PKI["Step CA / PKI (Pi1)"]
style PKI fill:#ffe0b2,stroke:#ff8c00,stroke-width:2px
A[Step CA primaire]:::stepCA
B[JWK Provisioner]:::jwk
C[Root CA]:::root
end
%% Contrôleur Ansible
subgraph Controller["Contrôleur Ansible / Mac"]
style Controller fill:#e0f7fa,stroke:#00acc1,stroke-width:2px
D[Fetch JWK + Root CA]:::ansible
E[Secrets K8s: step-jwk, step-root-ca]:::k8sSecret
F[ClusterIssuer cert-manager]:::clusterIssuer
end
%% K3s Cluster + Traefik
subgraph K3sCluster["K3s Cluster"]
style K3sCluster fill:#f1f8e9,stroke:#558b2f,stroke-width:2px
T[Traefik Ingress Controller]:::traefik
H[Webapp Pods]:::webapp
G["Gitea Service (ExternalName → pi2.home:3000)"]:::gitea
end
Users[Clients / Navigateurs]:::clients
%% Flèches
%% PKI → Controller
A --> B
C --> D
B --> D
D --> E
E --> F
%% ClusterIssuer → Traefik services
F --> H
F --> G
%% Traefik expose tous les services
T --> H
T --> G
Users -->|HTTPS / HTTP| T
%% PKI direct (optional, for clarity)
A -->|Sign initial cert| F
%% Styling classes
classDef stepCA fill:#fff3e0,stroke:#ff6f00,stroke-width:1px
classDef jwk fill:#fff9c4,stroke:#fbc02d,stroke-width:1px
classDef root fill:#ffe0b2,stroke:#ff8c00,stroke-width:1px
classDef ansible fill:#b2ebf2,stroke:#00acc1,stroke-width:1px
classDef k8sSecret fill:#b3e5fc,stroke:#0288d1,stroke-width:1px
classDef clusterIssuer fill:#81d4fa,stroke:#0277bd,stroke-width:1px
classDef gitea fill:#c8e6c9,stroke:#388e3c,stroke-width:1px
classDef webapp fill:#a5d6a7,stroke:#2e7d32,stroke-width:1px
classDef traefik fill:#ffe082,stroke:#ff8f00,stroke-width:1px
classDef clients fill:#eeeeee,stroke:#9e9e9e,stroke-width:1px
```
- 🔵 PKI (Step CA) : la source de confiance. Toutes les certificats HTTPS proviennent de là.
- 🔵 JWK Provisioner : autorise cert-manager à demander des certificats automatiquement.
- 🟢 Contrôleur Ansible : centralise les clés, crée les Secrets K8s et ClusterIssuer.
- 🟢 Secrets & ClusterIssuer : permettent à cert-manager dans K3s de sauthentifier et obtenir des certificats TLS.
- 🟢 Webapp Pods : obtiennent leurs certificats via cert-manager et HTTPS fonctionne automatiquement.
- 🔵 Gitea : reçoit directement un certificat signé par Step CA, sert HTTPS hors K3s.
```mermaid
flowchart TD
%% PKI
subgraph PKI["Step CA / PKI (Pi1)"]
style PKI fill:#ffe0b2,stroke:#ff8c00,stroke-width:2px
A[1⃣ Initialisation Step CA primaire]:::stepCA
B[2⃣ Création JWK Provisioner pour K3s]:::jwk
C[Root CA]:::root
end
%% Contrôleur Ansible
subgraph Controller["Contrôleur Ansible / Mac"]
style Controller fill:#e0f7fa,stroke:#00acc1,stroke-width:2px
D[3⃣ Fetch JWK + Root CA depuis Step CA]:::ansible
E[4⃣ Création / Mise à jour des Secrets K8s]:::k8sSecret
F[5⃣ Création / Mise à jour ClusterIssuer cert-manager]:::clusterIssuer
end
%% K3s Cluster + Traefik
subgraph K3sCluster["K3s Cluster"]
style K3sCluster fill:#f1f8e9,stroke:#558b2f,stroke-width:2px
T[6⃣ Traefik Ingress Controller]:::traefik
H[7⃣ Webapp Pods]:::webapp
G["8⃣ Gitea Service (ExternalName → pi2.home:3000)"]:::gitea
end
Users[9⃣ Client Mac / Navigateurs]:::clients
%% Flux
A --> B
C --> D
B --> D
D --> E
E --> F
F --> H
F --> G
T --> H
T --> G
Users -->|HTTPS / HTTP| T
%% Styling classes
classDef stepCA fill:#fff3e0,stroke:#ff6f00,stroke-width:1px
classDef jwk fill:#fff9c4,stroke:#fbc02d,stroke-width:1px
classDef root fill:#ffe0b2,stroke:#ff8c00,stroke-width:1px
classDef ansible fill:#b2ebf2,stroke:#00acc1,stroke-width:1px
classDef k8sSecret fill:#b3e5fc,stroke:#0288d1,stroke-width:1px
classDef clusterIssuer fill:#81d4fa,stroke:#0277bd,stroke-width:1px
classDef gitea fill:#c8e6c9,stroke:#388e3c,stroke-width:1px
classDef webapp fill:#a5d6a7,stroke:#2e7d32,stroke-width:1px
classDef traefik fill:#ffe082,stroke:#ff8f00,stroke-width:1px
classDef clients fill:#eeeeee,stroke:#9e9e9e,stroke-width:1px
```
```mermaid
flowchart TD
subgraph Cluster["Cluster Kubernetes (k3s)"]
subgraph CertManager["Cert-Manager"]
ClusterIssuer["ClusterIssuer\n(type: smallstep)"]
end
subgraph Traefik["Traefik (Ingress Controller)"]
TLSStore["TLSStore\n(Traefik v2+)"]
IngressRoute["IngressRoute\n(TLS: my-tls-store)"]
end
subgraph Apps["Applications"]
App1[Service: my-app]
App2[Service: my-api]
end
end
subgraph Smallstep["Smallstep PKI (step-ca)"]
StepCA["step-ca\n(CA interne)"]
end
%% Interactions
ClusterIssuer -- "1. Demande de certificat\n(CertificateRequest)" --> StepCA
StepCA -- "2. Émet un certificat\n(signé par la CA)" --> ClusterIssuer
ClusterIssuer -- "3. Stocke le certificat\n(dans un Secret Kubernetes)" --> Secret[(Secret: my-app-tls)]
Secret -- "4. Référencé par" --> TLSStore
TLSStore -- "5. Fournit le certificat\n(TLS Termination)" --> IngressRoute
IngressRoute -- "6. Route le trafic HTTPS\nvers" --> App1
IngressRoute -- "6. Route le trafic HTTPS\nvers" --> App2
```

View File

@@ -1,27 +0,0 @@
- name: Raspberry pi general setup
hosts: raspberries:&local
gather_facts: yes
tags: never
become: yes
tasks:
- name: set hostname
ansible.builtin.hostname:
name: "{{ inventory_hostname }}"
become: yes
when: inventory_hostname != ansible_hostname
- name: Ensure dnsmasq user is in dip group for Pi-hole DNS
ansible.builtin.user:
name: dnsmasq
groups: dip
append: yes
when: "'pihole' in group_names"
- name: Disable dnsmasq service on Pi-hole nodes to avoid port 53 conflict with pihole-FTL
ansible.builtin.systemd:
name: dnsmasq
state: stopped
enabled: no
when: "'pihole' in group_names"

View File

@@ -1,31 +0,0 @@
---
- name: Setup général des rpis
ansible.builtin.import_playbook: rpi.yml
- name: dns
ansible.builtin.import_playbook: ../dns/dns.yml
- name: ssl
ansible.builtin.import_playbook: ../ssl/ssl.yml
- name: Préparer les disques pour Longhorn
ansible.builtin.import_playbook: prepare_disks.yml
- name: Installer et configurer Docker
ansible.builtin.import_playbook: system_docker.yml
- name: Installer le client iSCSI pour Longhorn
ansible.builtin.import_playbook: iscsi_longhorn.yml
- name: Préparer l'inventaire et installer K3s
ansible.builtin.import_playbook: system_k3s.yml
- name: Configurer K3S Core DNS
ansible.builtin.import_playbook: k3s_dns.yml
- name: Configurer K3S Cert Issuer
ansible.builtin.import_playbook: k3s_ssl.yml
- name: Configurer K3s (kubeconfig, Longhorn, Traefik)
ansible.builtin.import_playbook: k3s_config.yml

View File

@@ -1,110 +0,0 @@
- name: System Docker
hosts: raspberries:&local
gather_facts: yes
tags: never
become: yes
pre_tasks:
- name: Prevent apt source conflict
ansible.builtin.file:
state: absent
path: /etc/apt/sources.list.d/docker.list
become: yes
- name: Install role geerlingguy.docker
community.general.ansible_galaxy_install:
type: role
name: geerlingguy.docker
run_once: true
delegate_to: localhost
become: false
- ansible.builtin.debug:
var: ansible_facts.machine
tasks:
- include_role:
name: geerlingguy.docker
- name: Créer le répertoire /etc/docker s'il n'existe pas
ansible.builtin.file:
path: /etc/docker
state: directory
mode: '0755'
- name: Check if daemon.json exists
ansible.builtin.stat:
path: /etc/docker/daemon.json
register: docker_config_stat
- name: Lire la configuration Docker existante
ansible.builtin.command: "cat /etc/docker/daemon.json"
register: docker_config_raw
changed_when: false
when: docker_config_stat.stat.exists
- name: Initialiser la variable de config Docker
ansible.builtin.set_fact:
docker_config: {}
- name: Parser le JSON existant si le fichier existe
ansible.builtin.set_fact:
docker_config: "{{ docker_config_raw.stdout | from_json }}"
when: docker_config_raw.stdout is defined and docker_config_raw.stdout != ""
- name: Mettre à jour la config du logger
ansible.builtin.set_fact:
docker_config: >
{{ docker_config | combine({
'log-driver': 'json-file',
'log-opts': {
'max-size': '10m',
'max-file': '5'
}
}, recursive=True) }}
- name: Ensure Docker storage directory exists on external disk
ansible.builtin.file:
path: /mnt/arcodange/docker
state: directory
mode: '0755'
owner: root
group: docker
when: ansible_facts.mounts | selectattr('mount', 'equalto', '/mnt/arcodange') | list | length > 0
- name: Configure Docker to use external storage
ansible.builtin.set_fact:
docker_config: >
{{ docker_config | combine({
'data-root': '/mnt/arcodange/docker',
'storage-driver': 'overlay2'
}, recursive=True) }}
when: ansible_facts.mounts | selectattr('mount', 'equalto', '/mnt/arcodange') | list | length > 0
- name: Ensure docker_config is a dictionary
ansible.builtin.set_fact:
docker_config: "{{ docker_config if docker_config is mapping else {} }}"
- name: Écrire la configuration mise à jour
ansible.builtin.copy:
dest: /etc/docker/daemon.json
content: "{{ docker_config | to_nice_json(indent=2) }}"
mode: '0644'
notify: Redémarrer Docker
handlers:
- name: Redémarrer Docker
ansible.builtin.service:
name: docker
state: restarted
post_tasks:
- name: adding existing user '{{ ansible_user }}' to group docker
user:
name: '{{ ansible_user }}'
groups: docker
append: yes
become: yes

View File

@@ -1,63 +0,0 @@
- name: System K3S
hosts: raspberries:&local
tasks:
- name: prepare inventory for k3s external playbook
tags: always
ansible.builtin.add_host:
hostname: "{{ item }}"
groups:
- k3s_cluster
- "{{ ansible_loop.first | ternary('server', 'agent') }}"
loop: "{{ groups.raspberries | intersect(groups.local) | sort }}"
loop_control:
extended: true
extended_allitems: false
- name: Install collection k3s.orchestration
local_action:
module: community.general.ansible_galaxy_install
type: collection
name: git+https://github.com/k3s-io/k3s-ansible
run_once: true
- name: Install socat for kubectl port forwarding
ansible.builtin.apt:
name: socat
state: present
update_cache: yes
become: yes
- name: k3s
ansible.builtin.import_playbook: k3s.orchestration.site
# ansible.builtin.import_playbook: k3s.orchestration.upgrade
# ansible.builtin.import_playbook: k3s.orchestration.reset
vars:
k3s_version: v1.34.3+k3s1
extra_server_args: >-
--docker --disable traefik
--kubelet-arg="container-log-max-files=5"
--kubelet-arg="container-log-max-size=10Mi"
extra_agent_args: >-
--docker
--kubelet-arg="container-log-max-files=5"
--kubelet-arg="container-log-max-size=10Mi"
api_endpoint: "{{ hostvars[groups['server'][0]]['ansible_host'] | default(groups['server'][0]) }}"
- name: how to reach k3s
hosts: server
tasks:
- name: copy /etc/rancher/k3s/k3s.yaml to ~/.kube/config from the k3s server and replace 127.0.0.1 with the server ip or hostname
run_once: true
block:
- ansible.builtin.fetch:
src: /etc/rancher/k3s/k3s.yaml
dest: ~/.kube/config
flat: true
become: true
run_once: true
- local_action:
module: ansible.builtin.replace
path: ~/.kube/config
regexp: 'server: https://127.0.0.1:6443'
replace: 'server: https://{{ ansible_default_ipv4.address }}:6443'

View File

@@ -1,10 +0,0 @@
---
- name: crowdsec
# hosts: raspberries:&local
hosts: localhost
# debugger: on_failed
tasks:
- name: Setup crowdsec middleware for traefik
include_role:
name: crowdsec

View File

@@ -35,12 +35,12 @@
password: '{{ pg_conf.POSTGRES_PASSWORD }}'
gitea_admin_token: '{{ vault_GITEA_ADMIN_TOKEN }}'
# - name: share VAULT CA
# block:
- name: share VAULT CA
block:
# - name: read traefik CA
# include_role:
# name: arcodange.factory.traefik_certs
- name: read traefik CA
include_role:
name: arcodange.factory.traefik_certs
post_tasks:
- include_role:

View File

@@ -1 +0,0 @@
traefik_pvc_name: traefik

View File

@@ -1,94 +0,0 @@
---
- name: Inject captcha.html into Traefik PVC
block:
# ---------------------
# Scale to 0
# ---------------------
- name: Scale Traefik to 0
kubernetes.core.k8s_scale:
api_version: apps/v1
kind: Deployment
namespace: kube-system
name: traefik
replicas: 0
# ---------------------
# Create Job
# ---------------------
- name: Deploy captcha injection Job
kubernetes.core.k8s:
state: present
namespace: kube-system
definition:
apiVersion: batch/v1
kind: Job
metadata:
name: inject-captcha
spec:
backoffLimit: 0
template:
spec:
restartPolicy: Never
volumes:
- name: traefik-data
persistentVolumeClaim:
claimName: "{{ traefik_pvc_name }}"
containers:
- name: write-captcha
image: alpine:3.20
command:
- /bin/sh
- -c
- |
echo "Writing captcha.html into PVC..."
cat << 'EOF' > /data/captcha.html
{{ lookup('template', 'captcha.html.j2') | indent(20) }}
EOF
volumeMounts:
- name: traefik-data
mountPath: /data
# ---------------------
# Wait for job success
# ---------------------
- name: Wait for Job completion
kubernetes.core.k8s_info:
api_version: batch/v1
kind: Job
name: inject-captcha
namespace: kube-system
register: job_status
until: job_status.resources[0].status.succeeded | default(0) | int > 0
retries: 20
delay: 5
# ---------------------
# Clean Job
# ---------------------
- name: Remove captcha injection Job
kubernetes.core.k8s:
state: absent
api_version: batch/v1
kind: Job
name: inject-captcha
namespace: kube-system
rescue:
- name: Log failure
ansible.builtin.debug:
msg: "An error occurred during captcha injection. Traefik will still be scaled back up."
always:
# ---------------------
# Ensure Traefik is scaled back to 1 NO MATTER WHAT
# ---------------------
- name: Ensure Traefik is scaled back to 1
kubernetes.core.k8s_scale:
api_version: apps/v1
kind: Deployment
namespace: kube-system
name: traefik
replicas: 1
wait: yes
wait_timeout: 300

View File

@@ -1,186 +0,0 @@
- name: Créer le ServiceAccount pour l'authentification Vault
kubernetes.core.k8s:
state: present
definition:
apiVersion: v1
kind: ServiceAccount
metadata:
name: factory-ansible-tool-crowdsec-traefik-plugin
namespace: kube-system
wait: yes
wait_timeout: 30
- name: Créer la ressource VaultAuth
kubernetes.core.k8s:
state: present
definition:
apiVersion: secrets.hashicorp.com/v1beta1
kind: VaultAuth
metadata:
name: factory-ansible-tool-crowdsec
namespace: kube-system
spec:
method: kubernetes
mount: kubernetes
kubernetes:
role: factory_crowdsec_conf
serviceAccount: factory-ansible-tool-crowdsec-traefik-plugin
audiences:
- vault
wait: yes
wait_timeout: 30
- name: Créer la ressource VaultStaticSecret
kubernetes.core.k8s:
state: present
definition:
apiVersion: secrets.hashicorp.com/v1beta1
kind: VaultStaticSecret
metadata:
name: factory-ansible-tool-crowdsec-turnstile-secret
namespace: kube-system
spec:
type: kv-v2
mount: kvv2
path: cms/factory/turnstile
destination:
name: factory-ansible-tool-crowdsec-traefik-plugin-captcha-params
create: true
refreshAfter: 30s
vaultAuthRef: factory-ansible-tool-crowdsec
wait: yes
wait_timeout: 30
- name: Récupérer le secret Kubernetes
kubernetes.core.k8s_info:
kind: Secret
name: factory-ansible-tool-crowdsec-traefik-plugin-captcha-params
namespace: kube-system
register: crowdsec_captcha_secret
- name: Récupérer le nom du pod CrowdSec LAPI
kubernetes.core.k8s_info:
kind: Pod
namespace: tools
label_selectors:
- k8s-app = crowdsec
- type = lapi
register: crowdsec_lapi_pods
- name: Vérifier qu'un pod a été trouvé
assert:
that: crowdsec_lapi_pods.resources | length > 0
fail_msg: "Aucun pod CrowdSec LAPI trouvé dans le namespace 'tools' avec les labels 'k8s-app=crowdsec, type=lapi'."
- name: Définir le nom du pod CrowdSec LAPI
set_fact:
crowdsec_lapi_pod_name: "{{ crowdsec_lapi_pods.resources[0].metadata.name }}"
- name: Récupérer la clé API du bouncer CrowdSec
kubernetes.core.k8s_exec:
namespace: tools
pod: "{{ crowdsec_lapi_pod_name }}"
container: crowdsec-lapi
command: >
cscli bouncers add traefik-plugin
register: bouncer_key_result
ignore_errors: yes
- name: Supprimer le bouncer existant en cas d'échec
kubernetes.core.k8s_exec:
namespace: tools
pod: "{{ crowdsec_lapi_pod_name }}"
container: crowdsec-lapi
command: >
cscli bouncers delete traefik-plugin
when: bouncer_key_result.failed
- name: Réessayer de récupérer la clé API
kubernetes.core.k8s_exec:
namespace: tools
pod: "{{ crowdsec_lapi_pod_name }}"
container: crowdsec-lapi
command: >
cscli bouncers add traefik-plugin
register: bouncer_key_result
when: bouncer_key_result.failed
- name: Inject captcha.html into Traefik PVC
include_tasks: inject_captcha_html.yml
tags: never
- name: Créer le Middleware Traefik pour CrowdSec
kubernetes.core.k8s:
state: present
definition:
apiVersion: traefik.io/v1alpha1
kind: Middleware
metadata:
name: crowdsec
namespace: kube-system
spec:
plugin:
crowdsec-bouncer:
enabled: true
logLevel: DEBUG
crowdsecMode: stream
crowdsecLapiScheme: http
crowdsecLapiHost: crowdsec-service.tools.svc.cluster.local:8080
crowdsecLapiKey: "{{ bouncer_key_result.stdout_lines[2].strip() }}"
htttTimeoutSeconds: 60
crowdsecAppsecEnabled: false
crowdsecAppsecHost: crowdsec:7422
crowdsecAppsecFailureBlock: true
crowdsecAppsecUnreachableBlock: true
forwardedHeadersTrustedIPs:
- 10.0.10.23/32
- 10.0.20.0/24
clientTrustedIPs:
- 192.168.1.0/24
- 10.42.0.0/16
captchaProvider: turnstile
captchaSiteKey: "{{ crowdsec_captcha_secret.resources[0].data.sitekey | b64decode }}"
captchaSecretKey: "{{ crowdsec_captcha_secret.resources[0].data.secret | b64decode }}"
captchaHTMLFilePath: "/data/captcha.html"
redisCacheEnabled: true
redisCacheHost: "redis.tools:6379"
redisCacheDatabase: "0"
redisCacheUnreachableBlock: false
- name: Supprimer les pods crowdsec en état Error pour forcer leur redémarrage
ansible.builtin.shell: |
kubectl get pods -n tools -l k8s-app=crowdsec \
--field-selector=status.phase=Failed -o name | xargs -r kubectl delete -n tools
changed_when: false
ignore_errors: yes
- name: Redémarrer traefik pour prendre la nouvelle configuration du middleware
block:
# ---------------------
# Scale to 0
# ---------------------
- name: Scale Traefik to 0
kubernetes.core.k8s_scale:
api_version: apps/v1
kind: Deployment
namespace: kube-system
name: traefik
replicas: 0
rescue:
- name: Log failure
ansible.builtin.debug:
msg: "An error occurred during traefik scale down. Traefik will still be scaled back up."
always:
# ---------------------
# Ensure Traefik is scaled back to 1 NO MATTER WHAT
# ---------------------
- name: Ensure Traefik is scaled back to 1
kubernetes.core.k8s_scale:
api_version: apps/v1
kind: Deployment
namespace: kube-system
name: traefik
replicas: 1
wait: yes
wait_timeout: 300

View File

@@ -1,18 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8" />
<title>Captcha verification</title>
<script src="https://challenges.cloudflare.com/turnstile/v0/api.js" async defer></script>
</head>
<body>
<form method="POST">
<div class="cf-turnstile"
data-sitekey="{{ crowdsec_captcha_secret.resources[0].data.sitekey | b64decode }}"
data-theme="auto"
data-size="normal">
</div>
<button type="submit">Valider</button>
</form>
</body>
</html>

View File

@@ -2,7 +2,7 @@ vault_unseal_keys_path: ~/.arcodange/cluster-keys.json
vault_unseal_keys_shares: 1
vault_unseal_keys_key_threshold: 1 # keys_key_threshold <= keys_shares
vault_address: https://vault.arcodange.lab
vault_address: https://vault.arcodange.duckdns.org
vault_oidc_gitea_setupGiteaAppJS: '{{ role_path }}/files/playwright_setupGiteaApp.js'

View File

@@ -19,7 +19,7 @@ variable "admin_email" {
}
variable "gitea_app" {
type = object({
url = optional(string, "https://gitea.arcodange.lab")
url = optional(string, "https://gitea.arcodange.duckdns.org/")
id = string
secret = string
description = optional(string, "Arcodange Gitea Auth")
@@ -39,10 +39,10 @@ variable "gitea_admin_token" {
sensitive = true
}
# same as vault CA
variable "ca_pem" {
type = string
}
# kubectl -n kube-system exec $(kubectl -n kube-system get pod -l app.kubernetes.io/name=traefik -o jsonpath="{.items[0]['.metadata.name']}") -- cat /data/acme.json | jq '(.letsencrypt.Certificates | map(select(.domain.main=="arcodange.duckdns.org")))[0]' | jq '.certificate' -r | base64 -d | openssl x509
# variable "ca_pem" {
# type = string
# }
terraform {
required_providers {
vault = {
@@ -63,10 +63,10 @@ resource "vault_jwt_auth_backend" "gitea" {
path = "gitea"
type = "oidc"
oidc_discovery_url = var.gitea_app.url
oidc_discovery_ca_pem = file(var.ca_pem)
# oidc_discovery_ca_pem = var.ca_pem
oidc_client_id = var.gitea_app.id
oidc_client_secret = var.gitea_app.secret
bound_issuer = trimsuffix(var.gitea_app.url, "/")
bound_issuer = var.gitea_app.url
tune {
allowed_response_headers = []
@@ -91,8 +91,7 @@ resource "vault_jwt_auth_backend_role" "gitea" {
allowed_redirect_uris = [
"http://localhost:8250/oidc/callback", # for command line login
"${var.vault_address}/ui/vault/auth/gitea/oidc/callback",
"https://webapp.arcodange.fr/oauth-callback",
"https://webapp.arcodange.lab/oauth-callback",
"https://webapp.arcodange.duckdns.org/oauth-callback",
]
}
@@ -102,8 +101,8 @@ resource "vault_jwt_auth_backend" "gitea_jwt" {
path = "gitea_jwt"
type = "jwt"
oidc_discovery_url = var.gitea_app.url
oidc_discovery_ca_pem = file(var.ca_pem)
bound_issuer = trimsuffix(var.gitea_app.url, "/")
# oidc_discovery_ca_pem = var.ca_pem
bound_issuer = var.gitea_app.url
tune {
allowed_response_headers = []
@@ -167,7 +166,7 @@ resource "vault_kv_secret" "google_credentials" {
path = "${vault_mount.kvv1.path}/google/credentials"
data_json = jsonencode(
{
credentials = file("/root/.config/gcloud/application_default_credentials.json")
credentials = file("~/.config/gcloud/application_default_credentials.json")
}
)
}

View File

@@ -7,7 +7,7 @@ const username = process.env.GITEA_USER;
const password = process.env.GITEA_PASSWORD;
const debug = Boolean(process.env.DEBUG);
const vaultAddress = process.env.VAULT_ADDRESS || 'http://localhost:8200';
const giteaAddress = process.env.GITEA_ADDRESS || 'https://gitea.arcodange.lab';
const giteaAddress = process.env.GITEA_ADDRESS || 'https://gitea.arcodange.duckdns.org';
if (!username || !password) {
console.error('Veuillez définir les variables d\'environnement GITEA_USER et GITEA_PASSWORD.');
@@ -22,7 +22,7 @@ const browser = await chromium.launch({
log: (name, severity, message, args) => console.warn(`${severity}| ${name} :: ${message} __ ${args}`)
},
});
const context = await browser.newContext({locale: "gb-GB", ignoreHTTPSErrors: true}); // Using self signed cert - could improve with NODE_EXTRA_CA_CERTS env variable
const context = await browser.newContext({locale: "gb-GB"});
const page = await context.newPage();
async function doLogin() {
@@ -75,8 +75,7 @@ async function setupApp() {
await applicationsPanel.locator('textarea[name="redirect_uris"]').fill([
'http://localhost:8250/oidc/callback', // for command line login
`${vaultAddress}/ui/vault/auth/gitea/oidc/callback`,
'https://webapp.arcodange.lab/oauth-callback',
'https://webapp.arcodange.fr/oauth-callback',
'https://webapp.arcodange.duckdns.org/oauth-callback',
].join('\n'));
await applicationsPanel.locator('form[action="/-/admin/applications/oauth2"] > button').dblclick()

View File

@@ -11,50 +11,18 @@
GITEA_USER: '{{ gitea_admin_user }}'
GITEA_PASSWORD: '{{ gitea_admin_password }}'
VAULT_ADDRESS: '{{ vault_address }}'
NODE_EXTRA_CA_CERTS: ''
- include_role:
name: arcodange.factory.playwright
# - include_role:
# name: arcodange.factory.traefik_certs
- include_role:
name: arcodange.factory.traefik_certs
- set_fact:
gitea_app: '{{ playwright_job.stdout | from_json }}'
volume_name: tofu-{{ ansible_date_time.iso8601.replace(':','-') }}
- name: Check SSL certificate for Gitea
shell: >-
openssl s_client -connect gitea.arcodange.lab:443 -CAfile /etc/ssl/certs/arcodange-root.pem -servername gitea.arcodange.lab < /dev/null 2>&1 | grep -E "Verify return code:|subject=|issuer="
register: ssl_check
ignore_errors: true
- name: Debug SSL certificate check
debug:
var: ssl_check.stdout_lines
# WARNING : this disables AND wipes ALL gitea_cicd_* per-app JWT roles
# (created by tools/hashicorp-vault/iac/) every time it runs. Default is OFF
# to preserve those roles across normal ansible runs ; opt-in only when you
# really want to rebuild the OIDC backend from scratch (e.g. config drift on
# bound_issuer or similar).
- name: Delete existing Gitea OIDC backends if they exist
include_tasks: vault_cmd.yml
vars:
vault_cmd: vault auth disable {{ backend_name }}
vault_cmd_can_fail: true
vault_cmd_json_attr: ''
vault_cmd_output_var: false
loop:
- gitea
- gitea_jwt
loop_control:
loop_var: backend_name
when: vault_oidc_force_reset | default(false) | bool
- name: use tofu to provision vault
block:
- shell: docker volume create {{ volume_name }}
@@ -63,8 +31,6 @@
-v {{ volume_name }}:/tofu -w /tofu
-v {{ role_path }}/files/hashicorp_vault.tf:/tofu/hashicorp_vault.tf
-v ~/.config/gcloud:/root/.config/gcloud
-v /etc/ssl/certs/arcodange-root.pem:/etc/ssl/custom/arcodange-root.pem:ro
-e VAULT_CACERT=/etc/ssl/custom/arcodange-root.pem
--entrypoint=''
ghcr.io/opentofu/opentofu:latest
{{ command }}
@@ -78,7 +44,6 @@
# -var='vault_token={{ vault_root_token }}'
# -var='postgres_admin_credentials={{ postgres_admin_credentials | to_json }}'
# -var='gitea_admin_token={{ gitea_admin_token }}'
# -var="ca_pem=/etc/ssl/custom/arcodange-root.pem"
- >-
tofu apply -auto-approve -no-color
-var='gitea_app={{ gitea_app | to_json }}'
@@ -86,7 +51,6 @@
-var='vault_token={{ vault_root_token }}'
-var='postgres_admin_credentials={{ postgres_admin_credentials | to_json }}'
-var='gitea_admin_token={{ gitea_admin_token }}'
-var="ca_pem=/etc/ssl/custom/arcodange-root.pem"
loop_control:
loop_var: command
extended: true
@@ -107,28 +71,8 @@
gitea_secret_name: vault_oauth__sh_b64
gitea_secret_value: >-
{{ lookup('ansible.builtin.template', 'oidc_jwt_token.sh.j2', template_vars = {
'GITEA_BASE_URL': 'https://gitea.arcodange.lab',
'GITEA_BASE_URL': 'https://gitea.arcodange.duckdns.org',
'OIDC_CLIENT_ID': gitea_app.id,
'OIDC_CLIENT_SECRET': gitea_app.secret,
}) | b64encode }}
gitea_owner_type: 'org' # value != 'user'
# Also propagate the same secret to user-owned namespaces. Gitea Action secrets
# are scoped per owner, so repos under a user account cannot read org-level
# secrets. Extend this list if other personal-namespace apps need vault auth.
- name: Propagate vault_oauth__sh_b64 to user-owned namespaces
include_role:
name: arcodange.factory.gitea_secret
vars:
gitea_secret_name: vault_oauth__sh_b64
gitea_secret_value: >-
{{ lookup('ansible.builtin.template', 'oidc_jwt_token.sh.j2', template_vars = {
'GITEA_BASE_URL': 'https://gitea.arcodange.lab',
'OIDC_CLIENT_ID': gitea_app.id,
'OIDC_CLIENT_SECRET': gitea_app.secret,
}) | b64encode }}
gitea_owner_type: 'user'
gitea_owner_name: '{{ item }}'
loop: '{{ gitea_secret_propagation_users }}'
loop_control:
label: '{{ item }}'

View File

@@ -4,10 +4,10 @@ set -eu
# Variables à ajuster selon ta configuration
CLIENT_ID="{{ OIDC_CLIENT_ID }}"
CLIENT_SECRET="{{ OIDC_CLIENT_SECRET }}"
REDIRECT_URI="{{ OIDC_CLIENT_CALLBACK | default('https://webapp.arcodange.lab/oauth-callback') }}" # Redirige ici après l'authentification
AUTH_URL="{{ GITEA_BASE_URL | default('https://gitea.arcodange.lab') }}/login/oauth/authorize"
TOKEN_URL="{{ GITEA_BASE_URL | default('https://gitea.arcodange.lab') }}/login/oauth/access_token"
ISSUER="https://gitea.arcodange.lab/"
REDIRECT_URI="{{ OIDC_CLIENT_CALLBACK | default('https://webapp.arcodange.duckdns.org/oauth-callback') }}" # Redirige ici après l'authentification
AUTH_URL="{{ GITEA_BASE_URL | default('https://gitea.arcodange.duckdns.org') }}/login/oauth/authorize"
TOKEN_URL="{{ GITEA_BASE_URL | default('https://gitea.arcodange.duckdns.org') }}/login/oauth/access_token"
ISSUER="https://gitea.arcodange.duckdns.org/"
# SCOPE="openid email profile groups" # Scope que tu souhaites obtenir - profile groups
SCOPE="email openid read:user" # Scope que tu souhaites obtenir - profile groups
set +u
@@ -26,7 +26,7 @@ poll_state() {
#echo "Tentative $attempt/$MAX_ATTEMPTS: Requête à l'endpoint /retrieve pour state=$STATE..."
# Effectuer la requête GET
RESPONSE=$(curl -s -w "%{http_code}" -o /tmp/response_body "https://webapp.arcodange.lab/retrieve?state=$STATE")
RESPONSE=$(curl -s -w "%{http_code}" -o /tmp/response_body "https://webapp.arcodange.duckdns.org/retrieve?state=$STATE")
HTTP_CODE=$(tail -n1 <<< "$RESPONSE")
if [ "$HTTP_CODE" == "200" ]; then
@@ -50,9 +50,6 @@ poll_state() {
return 1
}
# 0. Installer le certificat arcodange.lab (droits sudo)
# curl https://ssl-ca.arcodange.lab:8443/roots.pem -ks > /usr/local/share/ca-certificates/arcodange-root.crt && update-ca-certificates 2>/dev/null >/dev/null && export VAULT_CACERT=/usr/local/share/ca-certificates/arcodange-root.crt || echo "couldn't install self signed .crt" >&2
# 1. Rediriger l'utilisateur vers l'URL d'authentification
echo "Ouvrez le lien suivant dans votre navigateur pour vous authentifier dans Gitea:"
echo "$AUTH_URL?client_id=$CLIENT_ID&redirect_uri=$REDIRECT_URI&response_type=code&scope=$(sed 's/ /%20/g' <<<$SCOPE)&state=$STATE"

View File

@@ -1,5 +1,3 @@
---
- name: hashicorp_vault
ansible.builtin.import_playbook: hashicorp_vault.yml
- name: crowdsec
ansible.builtin.import_playbook: crowdsec.yml

View File

@@ -1,5 +1,5 @@
# to see generated tokens
# go to https://gitea.arcodange.lab/user/settings/applications
# go to https://gitea.arcodange.duckdns.org/user/settings/applications
- when: >-
lookup('ansible.builtin.varnames', '^' ~ gitea_token_fact_name ~ '$') | length == 0

View File

@@ -7,7 +7,7 @@ const username = process.env.GITEA_USER;
const password = process.env.GITEA_PASSWORD;
const debug = Boolean(process.env.DEBUG);
const vaultAddress = process.env.VAULT_ADDRESS || 'http://localhost:8200';
const giteaAddress = process.env.GITEA_ADDRESS || 'https://gitea.arcodange.lab';
const giteaAddress = process.env.GITEA_ADDRESS || 'https://gitea.arcodange.duckdns.org';
if (!username || !password) {
console.error('Veuillez définir les variables d\'environnement GITEA_USER et GITEA_PASSWORD.');

View File

@@ -4,7 +4,7 @@
kubectl -n kube-system exec
$(kubectl -n kube-system get pod -l app.kubernetes.io/name=traefik
-o jsonpath="{.items[0]['.metadata.name']}") --
cat /data/acme.json | jq '(.letsencrypt.Certificates | map(select(.domain.main=="*.arcodange.lab")))[0]'
cat /data/acme.json | jq '(.letsencrypt.Certificates | map(select(.domain.main=="*.arcodange.duckdns.org")))[0]'
| jq '.certificate' -r | base64 -d | openssl x509
register: traefik_certs_cmd
- set_fact:

View File

@@ -3,9 +3,8 @@ roles:
- name: geerlingguy.docker
collections:
- name: ansible.posix
- name: community.crypto
- name: community.docker
- name: community.general
- name: community.docker
- name: ansible.posix
- name: kubernetes.core
- name: git+https://github.com/k3s-io/k3s-ansible.git

View File

@@ -1,5 +1,4 @@
{{- range $app_name, $app_attr := .Values.gitea_applications -}}
{{- $org := default "arcodange-org" $app_attr.org -}}
{{- range $app_name := .Values.gitea_applications -}}
---
apiVersion: argoproj.io/v1alpha1
kind: Application
@@ -8,27 +7,19 @@ metadata:
namespace: argocd
finalizers:
- resources-finalizer.argocd.argoproj.io
{{- with $app_attr.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
project: default
source:
repoURL: https://gitea.arcodange.lab/{{ $org }}/{{ $app_name }}
repoURL: https://gitea.arcodange.duckdns.org/arcodange-org/{{ $app_name }}
targetRevision: HEAD
path: chart
destination:
server: https://kubernetes.default.svc
namespace: {{ $app_name }}
syncPolicy:
{{- if $app_attr.syncPolicy }}
{{- toYaml $app_attr.syncPolicy | nindent 4 }}
{{- else }}
automated:
prune: true
selfHeal: true
{{- end }}
syncOptions:
- CreateNamespace=true
{{ end }}

View File

@@ -1,14 +0,0 @@
{{ with ( .Values.argocd_image_updater_chart_values ) }}
apiVersion: helm.cattle.io/v1
kind: HelmChart
metadata:
name: argocd-image-updater
namespace: kube-system
spec:
repo: https://argoproj.github.io/argo-helm
chart: argocd-image-updater
targetNamespace: argocd
valuesContent: |-
{{- toYaml . | nindent 4 }}
{{- end -}}
---

View File

@@ -1,3 +1,18 @@
apiVersion: secrets.hashicorp.com/v1beta1
kind: VaultSecret
metadata:
name: longhorn-gcs-backup-credentials
namespace: longhorn-system
spec:
vaultAuthRef: longhorn-auth
refreshAfter: 1h
mount: kvv2
path: longhorn/gcs-backup
type: kv-v2
destination:
name: longhorn-gcs-backup-credentials
create: true
---
apiVersion: v1
kind: ServiceAccount
metadata:
@@ -17,33 +32,3 @@ spec:
serviceAccount: longhorn-vault-secret-reader # le même que dans TF
audiences:
- vault
---
apiVersion: secrets.hashicorp.com/v1beta1
kind: VaultStaticSecret
metadata:
name: longhorn-gcs-backup-credentials
namespace: longhorn-system
spec:
type: kv-v2
mount: kvv2
path: longhorn/gcs-backup
destination:
name: longhorn-gcs-backup-credentials
create: true
refreshAfter: 1h
vaultAuthRef: longhorn-vault-secret-reader
---
apiVersion: v1
kind: ConfigMap
metadata:
name: longhorn-default-resource
namespace: longhorn-system
data:
default-resource.yaml: |
"backup-target": "s3://arcodange-backup@us-east-1/"
"backup-target-credential-secret": "longhorn-gcs-backup-credentials"
"backupstore-poll-interval": "180"

View File

@@ -2,39 +2,7 @@
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
gitea_applications:
url-shortener:
annotations: {}
tools:
annotations: {}
syncPolicy:
automated:
prune: true
selfHeal: true
webapp:
annotations:
argocd-image-updater.argoproj.io/image-list: webapp=gitea.arcodange.lab/arcodange-org/webapp:latest
argocd-image-updater.argoproj.io/webapp.update-strategy: digest
telegram-gateway:
org: arcodange
annotations:
argocd-image-updater.argoproj.io/image-list: telegram-gateway=gitea.arcodange.lab/arcodange/telegram-gateway:latest
argocd-image-updater.argoproj.io/telegram-gateway.update-strategy: digest
erp:
annotations: {}
cms:
annotations:
argocd-image-updater.argoproj.io/image-list: cms=gitea.arcodange.lab/arcodange-org/cms:latest
argocd-image-updater.argoproj.io/cms.update-strategy: digest
dance-lessons-coach:
org: arcodange
annotations:
argocd-image-updater.argoproj.io/image-list: dance-lessons-coach=gitea.arcodange.lab/arcodange/dance-lessons-coach:latest
argocd-image-updater.argoproj.io/dance-lessons-coach.update-strategy: digest
argocd_image_updater_chart_values:
config:
argocd:
grpcWeb: false
serverAddress: "https://argocd.arcodange.lab/"
insecure: true
plaintext: true
- url-shortener
- tools
- webapp
- erp

View File

@@ -9,7 +9,7 @@
>L'unsealKey, le vaultRootToken initial et l'authentification au backend terraform sont pour le moment configurés sur le controleur ansible (Macbook Pro).
>[!NOTE]
> Vault est déployé via [argo cd](https://gitea.arcodange.lab/arcodange-org/tools/src/branch/main/hashicorp-vault)
> Vault est déployé via [argo cd](https://gitea.arcodange.duckdns.org/arcodange-org/tools/src/branch/main/hashicorp-vault)
```mermaid
%%{init: { 'logLevel': 'debug', 'theme': 'base',

View File

@@ -1,261 +0,0 @@
[← ADRs](.) · [factory](../..) · **20260509 — telegram-gateway auth**
> **Cross-references** (bidirectionnel : chaque fichier listé doit citer cette ADR en tête)
>
> - **Code** (repo `arcodange/telegram-gateway`) :
> [`auth.go`](https://gitea.arcodange.lab/arcodange/telegram-gateway/src/branch/main/auth.go) ·
> [`handler_auth.go`](https://gitea.arcodange.lab/arcodange/telegram-gateway/src/branch/main/handler_auth.go) ·
> [`allowlist.go`](https://gitea.arcodange.lab/arcodange/telegram-gateway/src/branch/main/allowlist.go) ·
> [`server.go`](https://gitea.arcodange.lab/arcodange/telegram-gateway/src/branch/main/server.go) ·
> [`chart/values.yaml`](https://gitea.arcodange.lab/arcodange/telegram-gateway/src/branch/main/chart/values.yaml)
> - **User docs** :
> [`AUTH.md`](https://gitea.arcodange.lab/arcodange/telegram-gateway/src/branch/main/AUTH.md) ·
> [`HOWTO_ADD_BOT.md`](https://gitea.arcodange.lab/arcodange/telegram-gateway/src/branch/main/HOWTO_ADD_BOT.md)
> - **Related ADR** :
> [`20260407-network-architecture.md`](20260407-network-architecture.md) (Cloudflare / Traefik / CrowdSec stack)
> - **Implementation plan** : `~/.claude/plans/pour-les-notifications-on-inherited-seal.md` § Phase 1.5
# ADR 20260509: Telegram Gateway — Authentication Layer
## Status
Proposed
## Context
Le service `telegram-gateway` (Phase 1, livré le 2026-05-09) expose des bots Telegram via webhooks publics sur `tg.arcodange.fr/bot/<slug>`. À ce stade :
- Tout utilisateur Telegram qui connaît le handle d'un bot peut le DM et déclencher son handler.
- Le gateway valide le `secret_token` Telegram (qui prouve que **Telegram** envoie le webhook), pas l'identité du **user** derrière le message.
- Avant d'ouvrir le gateway à d'autres bots utiles (commandes `/build`, scripts Ollama, etc.), il faut un protocole d'authentification.
Le besoin métier :
- Un **bot principal** (`@arcodange_factory_bot`, slug interne `factory`) sert de point d'auth.
- Une commande **`/auth <code>`** valide une session pour l'utilisateur Telegram qui l'envoie.
- Les autres bots du gateway ne répondent qu'aux **utilisateurs déjà authentifiés**, **par défaut** (secure-by-default).
- En garde-fou supplémentaire, une **allowlist d'IDs Telegram** peut filtrer les utilisateurs autorisés à parler aux bots, indépendamment de l'auth (silent-drop avant tout traitement).
## Decision
### 1. Identité utilisateur
Telegram n'expose **pas l'IP** de l'utilisateur côté bot. La clé stable est **`from.id`** (Telegram user ID, `int64`, identique pour un même compte sur tous les devices). On l'utilise comme identifiant de session.
> Hors scope : auth liée au device/IP — nécessiterait un canal d'auth séparé (web UI sur LAN, etc.).
### 2. Stockage de session
- **Redis** (`redis.tools.svc.cluster.local:6379`, déjà déployé dans le namespace `tools`).
- Clé : `tg-gw:auth:<from.id>` → valeur `1` (ou JSON metadata si on enrichit plus tard).
- TTL : **24 h par défaut**, configurable via env `AUTH_SESSION_TTL` (Go duration : `12h`, `7d`, etc.).
- Refresh : chaque `/auth` réussi remet le TTL à zéro.
### 3. Bot principal & commandes
Le bot `factory` passe du handler `echo` au handler `auth`. Le handler `auth` reconnaît :
| Commande | Effet |
|---|---|
| `/start` | Message d'accueil + liste des commandes disponibles |
| `/auth <code>` | Compare `<code>` à `AUTH_SECRET` en constant-time ; si OK → SET Redis, deleteMessage du message original (replay defense), reply "✅ Authentifié pour 24 h" |
| `/whoami` | Affiche le user_id et le TTL restant (ou "non authentifié") |
| `/logout` | DEL Redis, reply "Déconnecté" |
| _autre_ | Rappel des commandes |
### 4. Garde-fou allowlist
Env `ALLOWED_USERS` : CSV de `from.id` Telegram (`12345,67890`). Comportement :
- Vide ou absent → ouvert à tous (rétro-compat Phase 1).
- Set → tout `from.id` hors-liste fait l'objet d'un **silent-drop** (HTTP 200 vide vers Telegram, log INFO côté gateway, **pas de réponse au user**).
- Le silent-drop intervient **avant** la gate auth. Permet de masquer l'existence des bots à des inconnus.
### 5. Gate `requireAuth` par bot — secure-by-default
Champ booléen dans `chart/values.yaml`, par bot. Sémantique :
- **Default = `true`** (secure-by-default). Tout bot omet ce champ → gated.
- Pour rendre un bot public, ajout explicite `requireAuth: false`.
- Pour `handler: auth` (le bot principal), `requireAuth` est **forcé à `false`** automatiquement (chicken-and-egg : si l'auth elle-même est gated, personne ne peut s'authentifier).
```yaml
bots:
factory:
handler: auth # requireAuth auto-forcé à false
pingbot:
handler: echo # requireAuth: true (implicite, défaut)
statusbot:
handler: echo
requireAuth: false # opt-out explicite, bot public
```
Lorsque `requireAuth: true` et que le user n'est pas authentifié :
> 🔒 Authentifie-toi d'abord avec `/auth <code>` chez @arcodange_factory_bot
… puis ack 200 à Telegram. Le handler du bot n'est **pas** appelé.
### 6. Fail-at-startup
Si `AUTH_SECRET` est vide ET au moins un bot a `handler=auth` ou `requireAuth: true` (y compris par défaut) → le pod **échoue au boot** avec un message clair. Évite le scénario "auth silencieusement off, bots accessibles à tous sans le savoir". Avec un défaut `requireAuth: true`, en pratique tout déploiement exige `AUTH_SECRET` (sauf si tous les bots font opt-out explicite).
## Architecture Diagrams
### 1. Flow `/auth` (login)
```mermaid
%%{init: {'theme':'neutral'}}%%
sequenceDiagram
participant U as Utilisateur
participant TG as Telegram
participant GW as telegram-gateway
participant R as Redis (tools)
U->>TG: /auth s3cr3t (DM @arcodange_factory_bot)
TG->>GW: POST /bot/factory<br/>X-Telegram-Bot-Api-Secret-Token: …
GW->>GW: verify secret_token (Telegram→GW)
GW->>GW: check ALLOWED_USERS (si configuré)
GW->>GW: factory.handler = auth, parse "/auth s3cr3t"
GW->>GW: subtle.ConstantTimeCompare(s3cr3t, AUTH_SECRET)
alt Code valide
GW->>R: SET tg-gw:auth:<from.id> EX 24h
GW->>TG: deleteMessage (replay defense)
GW->>TG: sendMessage "✅ Authentifié pour 24h"
GW->>TG: 200 OK (ack webhook)
TG->>U: "✅ Authentifié pour 24h"
else Code invalide
GW->>TG: sendMessage "❌ Mauvais code"
GW->>TG: 200 OK
TG->>U: "❌ Mauvais code"
end
```
### 2. Accès à un bot gated (`requireAuth: true`, défaut)
```mermaid
%%{init: {'theme':'neutral'}}%%
sequenceDiagram
participant U as Utilisateur
participant TG as Telegram
participant GW as telegram-gateway
participant R as Redis
participant H as Bot handler (echo / http / shell…)
U->>TG: ping (DM @autre_bot)
TG->>GW: POST /bot/autre_bot
GW->>GW: verify secret_token + parse Update
GW->>GW: ALLOWED_USERS check
GW->>R: EXISTS tg-gw:auth:<from.id>
alt Authentifié
R-->>GW: 1
GW->>H: Handler.Handle(update, bot)
H->>TG: sendMessage (réponse métier)
GW->>TG: 200 OK
else Non authentifié
R-->>GW: 0
GW->>TG: sendMessage "🔒 /auth chez @arcodange_factory_bot"
GW->>TG: 200 OK
end
```
### 3. Décision globale à l'arrivée d'un webhook
```mermaid
%%{init: {'theme':'neutral'}}%%
graph TD
%% classDef avec contraste explicite : fond clair → texte sombre
classDef ok fill:#d4edda,stroke:#28a745,color:#155724;
classDef block fill:#f8d7da,stroke:#dc3545,color:#721c24;
classDef neutral fill:#e2e3e5,stroke:#6c757d,color:#383d41;
Start[Webhook POST /bot/&lt;slug&gt;]:::neutral
SecretCheck{secret_token<br/>match ?}:::neutral
AllowlistCheck{from.id ∈<br/>ALLOWED_USERS ?}:::neutral
HandlerKind{handler == auth ?}:::neutral
AuthGate{requireAuth ?<br/>+ session valide ?}:::neutral
Reject401[401 Unauthorized]:::block
SilentDrop[200 vide<br/>silent drop]:::block
Forbidden[reply &quot;🔒 /auth …&quot;<br/>200 OK]:::block
AuthHandler[handler auth<br/>/auth /whoami /logout]:::ok
BotHandler[Bot handler<br/>echo / http / shell]:::ok
Start --> SecretCheck
SecretCheck -- non --> Reject401
SecretCheck -- oui --> AllowlistCheck
AllowlistCheck -- non --> SilentDrop
AllowlistCheck -- oui --> HandlerKind
HandlerKind -- oui --> AuthHandler
HandlerKind -- non --> AuthGate
AuthGate -- pas autorisé --> Forbidden
AuthGate -- OK --> BotHandler
```
## Consequences
### Positive
- **Confidentialité** : les bots métier ne répondent qu'aux comptes Telegram authentifiés, **par défaut**.
- **Défense en profondeur** : `ALLOWED_USERS` (allowlist), `secret_token` (Telegram→GW), `AUTH_SECRET` (user→bot), TTL session.
- **UX simple** : un `/auth <code>` ponctuel, valide 24 h.
- **Pas de migration** côté Phase 2/3 : la gate s'insère cleanly avant l'enqueue ou le forward.
- **Replay defense** : le message contenant le code est supprimé du chat après login réussi.
- **Secure-by-default** : un nouveau bot ajouté au gateway exige une session sans rien à configurer.
### Negative
- **Code partagé** : `AUTH_SECRET` global (pas TOTP/per-user). Si compromis → rotation manuelle (changer Secret + redeploy).
- **Pas de rate-limit** sur `/auth` : un utilisateur dans `ALLOWED_USERS` peut bruteforce le code en pratique. Mitigation : `ALLOWED_USERS` agit en floor, et 128+ bits de code rendent le bruteforce inutile dans la fenêtre de TTL.
- **Dépendance Redis** : si Redis tombe, plus aucun user n'est considéré authentifié → tous les bots gated répondent "🔒". Acceptable (fail-closed) ; Phase 1 a déjà restauré Redis cleanly.
- **Pas de session multi-device explicite** : `from.id` est le même sur tous les devices d'un compte → l'auth couvre déjà tous les devices, ce qui est le comportement attendu.
## Alternatives Considered
### Alternative 1 : auth par IP
**Rejetée**. Telegram n'expose pas l'IP du user au bot. Aurait nécessité un canal d'auth secondaire (web UI sur LAN, page d'accueil arcodange.fr) et un binding device. Coût significatif pour un bénéfice ambigu.
### Alternative 2 : TOTP / OTP rotatif
**Rejetée à ce stade**. Plus sécurisé que le code partagé mais ajoute :
- Une étape d'enrôlement (afficher un QR code, scanner avec une app).
- Une horloge synchronisée côté gateway et côté user.
- De la complexité utilisateur (sortir l'app à chaque /auth).
À reconsidérer si le code partagé fuit régulièrement ou si on ouvre à plus d'utilisateurs.
### Alternative 3 : Postgres au lieu de Redis pour les sessions
**Rejetée**. Postgres serait nécessaire pour Phase 2 (queue durable), mais pour des sessions à TTL court, Redis est l'outil idiomatique :
- Latence sub-ms.
- TTL natif (`SET … EX 86400`).
- Déjà déployé et utilisé (CrowdSec bouncer).
### Alternative 4 : pas de session, vérification du code à chaque message
**Rejetée**. UX terrible (devoir re-taper le code à chaque DM) et n'apporte rien (le code en clair traîne plus longtemps en chat).
### Alternative 5 : `requireAuth: false` par défaut (insecure-by-default)
**Rejetée** (initialement retenue, puis renversée). Avoir `requireAuth: false` par défaut signifie qu'un bot ajouté sans précaution est accessible à tous. Avec un gateway pensé "private by design", le défaut sécurisé `true` cadre bien mieux.
## Plan d'implémentation
Voir `~/.claude/plans/pour-les-notifications-on-inherited-seal.md` § Phase 1.5.
Résumé des fichiers touchés :
- **Nouveaux** (repo `arcodange/telegram-gateway`) : `auth.go`, `handler_auth.go`, `allowlist.go`, `AUTH.md`
- **Modifiés** : `telegram_types.go`, `telegram.go`, `handlers.go`, `config.go`, `server.go`, `main.go`, `go.mod`, `chart/values.yaml`, `chart/templates/deployment.yaml`, `HOWTO_ADD_BOT.md`
- **Cluster** : `kubectl patch secret telegram-gateway-bots` pour ajouter `AUTH_SECRET` et (optionnel) `ALLOWED_USERS`
## Success Metrics
- `/auth <wrong>` → 100 % refus, 0 SET Redis.
- `/auth <right>` → 100 % succès, deleteMessage best-effort exécuté.
- Bot avec `requireAuth: true` (défaut) répond le message "🔒 …" à 100 % des users non authentifiés.
- Session expire effectivement après TTL (vérif via `kubectl exec redis-0 -- redis-cli TTL …`).
- Aucun secret (code, token bot) dans les logs.
- Latence ajoutée par la gate < 5 ms (Redis EXISTS local).

View File

@@ -3,6 +3,3 @@
Provisionne un utilisateur gitea "tofu_module_reader",
autorisé à lire certains projets il est utilisé par la CI pour récupérer des blueprints terraform
via sa clé ssh répertoriée dans vault.
#
configure les tokens ovh et cloudflare pour permettre aux autre projet de gérer des resources du cloud

View File

@@ -1,101 +0,0 @@
data "cloudflare_account" "arcodange" {
filter = {
name = "arcodange@gmail.com"
}
}
locals {
cloudflare_account_id = data.cloudflare_account.arcodange.account_id
}
resource "cloudflare_r2_bucket" "arcodange_tf" {
account_id = local.cloudflare_account_id
name = "arcodange-tf"
jurisdiction = "eu"
}
module "cf_r2_arcodange_tf_token" {
source = "./modules/cloudflare_token"
account_id = local.cloudflare_account_id
bucket = cloudflare_r2_bucket.arcodange_tf
token_name = "r2_arcodange_tf_token"
permissions = {
bucket = [
"account:Workers R2 Storage Read",
"bucket:Workers R2 Storage Bucket Item Write",
]
account = [
"account:Account Settings Read",
]
}
}
resource "vault_kv_secret" "cf_r2_arcodange_tf" {
path = "kvv1/cloudflare/r2/arcodange-tf"
data_json = jsonencode({
S3_SECRET_ACCESS_KEY = module.cf_r2_arcodange_tf_token.r2_credentials.secret_access_key
S3_ACCESS_KEY = module.cf_r2_arcodange_tf_token.r2_credentials.access_key_id
S3_ENDPOINT = "https://${local.cloudflare_account_id}.eu.r2.cloudflarestorage.com"
})
}
data "vault_policy_document" "cf_r2_arcodange_tf" {
rule {
path = "kvv1/cloudflare/r2/arcodange-tf"
capabilities = ["read"]
}
rule {
path = "kvv1/zoho/self_client" # zoho mail client is created manually
capabilities = ["read"]
}
}
resource "vault_policy" "cf_r2_arcodange_tf" {
name = "factory__cf_r2_arcodange_tf"
policy = data.vault_policy_document.cf_r2_arcodange_tf.hcl
}
data "gitea_repo" "cms" {
name = "cms"
username = "arcodange-org"
}
module "cf_arcodange_cms_token" {
source = "./modules/cloudflare_token"
account_id = local.cloudflare_account_id
bucket = cloudflare_r2_bucket.arcodange_tf
token_name = "cf_arcodange_cms_token"
permissions = {
account = [
"account:Pages Write",
"account:Account DNS Settings Write",
"account:Account Settings Read",
"zone:Zone Write",
"zone:Zone Settings Write",
"zone:DNS Write",
"account:Cloudflare Tunnel Write",
"account:Turnstile Sites Write",
]
}
}
resource "gitea_repository_actions_secret" "cf_arcodange_cms_token" {
repository = data.gitea_repo.cms.name
repository_owner = data.gitea_repo.cms.username
secret_name = "CLOUDFLARE_API_TOKEN"
secret_value = module.cf_arcodange_cms_token.token
}
resource "gitea_repository_actions_secret" "cf_account_id_cms" {
repository = data.gitea_repo.cms.name
repository_owner = data.gitea_repo.cms.username
secret_name = "CLOUDFLARE_ACCOUNT_ID"
secret_value = local.cloudflare_account_id
}
output "token" {
value = module.cf_arcodange_cms_token.token
sensitive = true
}
resource "vault_kv_secret" "cf_arcodange_cms_token" {
path = "kvv1/cloudflare/cms/cf_arcodange_cms_token"
data_json = jsonencode({
token = module.cf_arcodange_cms_token.token
})
}

Some files were not shown because too many files have changed in this diff Show More