setup longhorn and prepare nfs server to store backups

This commit is contained in:
2025-08-14 15:42:33 +02:00
parent b4bde14809
commit 588a6482e9
10 changed files with 366 additions and 134 deletions

View File

@@ -46,6 +46,50 @@
#---
- name: Install iSCSI client for Longhorn on Raspberry Pi
hosts: raspberries:&local
become: yes
tasks:
- name: Install open-iscsi
ansible.builtin.apt:
name: open-iscsi
state: present
update_cache: yes
- name: Enable and start iSCSI service
ansible.builtin.service:
name: iscsid
state: started
enabled: yes
- name: Installer cryptsetup
ansible.builtin.apt:
name: cryptsetup
state: present
update_cache: yes
- name: Charger le module noyau dm_crypt
ansible.builtin.modprobe:
name: dm_crypt
state: present
- name: S'assurer que le module dm_crypt est chargé au démarrage
ansible.builtin.lineinfile:
path: /etc/modules
line: dm_crypt
state: present
- name: Créer dossier longhorn
ansible.builtin.file:
path: /mnt/arcodange/longhorn
state: directory
owner: pi
group: docker
mode: '0774'
ignore_errors: true
#---
- name: System K3S
hosts: raspberries:&local
tags: never
@@ -76,7 +120,7 @@
# ansible.builtin.import_playbook: k3s.orchestration.upgrade
# ansible.builtin.import_playbook: k3s.orchestration.reset
vars:
k3s_version: v1.32.2+k3s1
k3s_version: v1.32.7+k3s1
extra_server_args: "--docker --disable traefik"
extra_agent_args: "--docker"
api_endpoint: "{{ hostvars[groups['server'][0]]['ansible_host'] | default(groups['server'][0]) }}"
@@ -98,6 +142,42 @@
path: ~/.kube/config
regexp: 'server: https://127.0.0.1:6443'
replace: 'server: https://{{ ansible_default_ipv4.address }}:6443'
# - name: setup hard disk
# tags: never
# ansible.builtin.import_playbook: ./setup/hard_disk_v2.yml
# # vars:
# # hard_disk__partitions:
# # nfs: []
- name: setup longhorn for volumes https://docs.k3s.io/helm
become: true
ansible.builtin.copy:
dest: /var/lib/rancher/k3s/server/manifests/longhorn-install.yaml
content: |-
apiVersion: helm.cattle.io/v1
kind: HelmChart
metadata:
annotations:
helmcharts.cattle.io/managed-by: helm-controller
finalizers:
- wrangler.cattle.io/on-helm-chart-remove
generation: 1
name: longhorn-install
namespace: kube-system
spec:
version: v1.9.1
chart: longhorn
repo: https://charts.longhorn.io
failurePolicy: abort
targetNamespace: longhorn-system
createNamespace: true
valuesContent: |-
defaultSettings:
defaultDataPath: /mnt/arcodange/longhorn
vars:
longhorn_helm_values: {} # https://github.com/longhorn/longhorn/blob/master/chart/values.yaml
- name: customize k3s traefik configuration https://docs.k3s.io/helm
block:
- name: Get my public IP
@@ -173,6 +253,13 @@
traefik_helm_values:
deployment:
kind: "Deployment"
initContainers:
- name: volume-permissions
image: busybox:latest
command: ["sh", "-c", "touch /data/acme.json; chmod -v 600 /data/acme.json"]
volumeMounts:
- name: data
mountPath: /data
# default is https://github.com/traefik/traefik-helm-chart/blob/v25.0.0/traefik/values.yaml <- for v25 (`kubectl describe deployments.apps traefik -n kube-system | grep helm.sh/chart`)
# current is https://github.com/traefik/traefik-helm-chart/blob/v30.1.0/traefik/values.yaml
nodeSelector:
@@ -206,6 +293,11 @@
access:
enabled: true
# format: json
podSecurityContext:
runAsGroup: 65532
runAsNonRoot: true
runAsUser: 65532
fsGroup: 65532 # else the persistent volume might be owned by root and be unwriteable
persistence:
# -- Enable persistence using Persistent Volume Claims
# ref: http://kubernetes.io/docs/user-guide/persistent-volumes/
@@ -215,7 +307,7 @@
# existingClaim: ""
accessMode: ReadWriteOnce
size: 128Mi
storageClass: "nfs-client"
storageClass: "longhorn"
# volumeName: ""
path: /data
annotations: {}
@@ -235,6 +327,12 @@
dnsChallenge:
# requires env variable DUCKDNS_TOKEN
provider: duckdns
propagation:
delayBeforeChecks: 120
disableChecks: true
resolvers:
- "1.1.1.1:53"
- "8.8.8.8:53"
httpChallenge:
entryPoint: "web"
# It has to match the path with a persistent volume
@@ -252,39 +350,6 @@
# ---
- name: setup hard disk
tags: never
ansible.builtin.import_playbook: ./setup/hard_disk.yml
vars:
hard_disk__partitions:
nfs: []
- name: Deploy NFS Subdir External Provisioner and alter default traefik deployment
tags: never
hosts: localhost
tasks:
- name: Deploy NFS Subdir External Provisioner
block:
- name: Add Helm repository for NFS Subdir External Provisioner
kubernetes.core.helm_repository:
name: nfs-subdir-external-provisioner
repo_url: https://kubernetes-sigs.github.io/nfs-subdir-external-provisioner/
force_update: yes
- name: Install NFS Subdir External Provisioner using Helm
# debug:
# var: hard_disk__nfs
kubernetes.core.helm:
name: nfs-subdir-external-provisioner
chart_ref: nfs-subdir-external-provisioner/nfs-subdir-external-provisioner
release_namespace: "{{ hard_disk__nfs.ks_namespace }}"
values:
nfs:
server: "{{ hard_disk__nfs.server_ip }}"
path: "{{ hard_disk__nfs.export_directory }}"
vars:
hard_disk__nfs: "{{ hostvars[groups.hard_disk[0]].hard_disk__nfs }}"
- name: redeploy traefik
hosts: localhost

View File

@@ -1,5 +1,18 @@
# Setup factory services
## Changements à venir (V2)
Le NFS était un Single Point of Failure car en cas de panne de disque dur tout est corrumpu et inexploitable. Le disque de 4Tb ne sera donc plus utilisé.
À la place l'outil de réplication LongHorn sera utilisé via K3S sur des disques durs de 500Gb. Le storage class NFS ne sera plus disponible au profit de LongHorn.
Pour ne plus subir la corruption de la base de données postgres qui s'execute à l'exterieur de k3s la solution suivante est mise en place:
Un volume permanent avec accès ReadWriteMany et la classe de stockage LongHorn donne accès à un volume NFS administré par LongHorn. Ce volume servira de dossier où déposer les sauvegardes de postgres ou gitea périodiquement (1 fois par jour).
De plus, [longhorn exportera les données sur un storage externe](https://longhorn.io/docs/1.9.1/snapshots-and-backups/backup-and-restore/set-backup-target/#set-up-gcp-cloud-storage-backupstore).
## V1
```mermaid
%%{init: { 'logLevel': 'debug', 'theme': 'base', 'rough':true } }%%
flowchart

View File

@@ -0,0 +1,132 @@
---
- name: Créer volume RWX Longhorn pour sauvegardes
hosts: localhost
connection: local
gather_facts: no
vars:
namespace_longhorn: longhorn-system
backup_volume_name: backups-rwx
backup_size: 50Gi
access_mode: ReadWriteMany
storage_class: longhorn
tasks:
- name: Créer PVC RWX dans longhorn-system
tags: never
kubernetes.core.k8s:
state: present
definition:
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: "{{ backup_volume_name }}"
namespace: "{{ namespace_longhorn }}"
spec:
accessModes:
- "{{ access_mode }}"
resources:
requests:
storage: "{{ backup_size }}"
storageClassName: "{{ storage_class }}"
- name: Récupérer infos du PVC
kubernetes.core.k8s_info:
api_version: v1
kind: PersistentVolumeClaim
namespace: "{{ namespace_longhorn }}"
name: "{{ backup_volume_name }}"
register: pvc_info
- name: Extraire le nom du volume
set_fact:
pvc_internal_name: "{{ pvc_info.resources[0].spec.volumeName }}"
- name: Lancer un pod temporaire pour déclencher NFS
tags: never
kubernetes.core.k8s:
state: present
definition:
apiVersion: v1
kind: Pod
metadata:
name: rwx-nfs
namespace: "{{ namespace_longhorn }}"
spec:
containers:
- name: busybox
image: busybox
command: ["sleep", "infinity"]
# command: ["sh", "-c", "sleep 600"]
volumeMounts:
- mountPath: "/mnt/backups"
name: backup-vol
volumes:
- name: backup-vol
persistentVolumeClaim:
claimName: "{{ backup_volume_name }}"
- name: Attendre que le pod rwx-nfs soit Running
tags: never
kubernetes.core.k8s_info:
api_version: v1
kind: Pod
namespace: "{{ namespace_longhorn }}"
name: rwx-nfs
register: pod_info
until: pod_info.resources[0].status.phase == "Running"
retries: 30
delay: 5
- name: Récupérer Endpoints du NFS Longhorn
kubernetes.core.k8s_info:
api_version: v1
kind: Endpoints
namespace: "{{ namespace_longhorn }}"
name: "{{ pvc_internal_name }}"
register: nfs_endpoint
- name: Extraire IP du pod NFS
set_fact:
backup_nfs_ip: "{{ nfs_endpoint.resources[0].subsets[0].addresses[0].ip }}"
- name: Sauvegarder infos NFS
set_fact:
nfs_info:
ip: "{{ backup_nfs_ip }}"
path: "/{{ pvc_internal_name }}/"
- name: Monter volume RWX Longhorn sur Raspberry
hosts: raspberries:&local
become: yes
vars:
backup_mount: "/mnt/backups"
tasks:
- name: Installer client NFS
apt:
name: nfs-common
state: present
update_cache: yes
- name: Créer point de montage
file:
path: "{{ backup_mount }}"
state: directory
mode: '0755'
- name: Monter volume de backup
mount:
path: "{{ backup_mount }}"
src: "{{ hostvars['localhost'].nfs_info.ip }}:{{ hostvars['localhost'].nfs_info.path }}"
fstype: nfs
opts: vers=4.1,rw
state: mounted
- name: Ajouter entrée dans fstab pour montage automatique
mount:
path: "{{ backup_mount }}"
src: "{{ hostvars['localhost'].nfs_info.ip }}:{{ hostvars['localhost'].nfs_info.path }}"
fstype: nfs
opts: rw
state: present

View File

@@ -0,0 +1,120 @@
---
- name: Préparer automatiquement le disque externe en ext4
hosts: raspberries:&local
become: yes
vars:
mount_point: /mnt/arcodange
tasks:
- name: Lister toutes les partitions en bytes (lsblk -b)
command: lsblk -b -J -o NAME,SIZE,TYPE,MOUNTPOINT
register: lsblk_json
changed_when: false
- name: Extraire tous les children partitions
set_fact:
all_partitions: >-
{{
lsblk_json.stdout | from_json | json_query("blockdevices[].children")
| flatten
}}
- name: Filtrer les partitions hors disque système (quelque soit mountpoint)
set_fact:
candidate_partitions: >-
{{
all_partitions
| selectattr('name', 'search', '^((?!mmcblk0).)*$')
| list
}}
- name: Vérifier qu'on a au moins une partition candidate
fail:
msg: "Aucune partition externe trouvée."
when: candidate_partitions | length == 0
- name: Choisir la partition la plus grosse parmi les candidates
block:
- set_fact:
target_partition: "{{ (candidate_partitions | sort(attribute='size'))[-1] }}"
- set_fact:
target_device: "/dev/{{ target_partition.name }}"
- name: Vérifier si la partition est déjà montée sur le point de montage voulu
set_fact:
partition_mounted_correctly: "{{ target_partition.mountpoint == mount_point }}"
- debug:
var: partition_mounted_correctly
- name: Demander confirmation avant formatage et (dé)montage
run_once: true
pause:
prompt: |
Attention : la partition {{ target_device }} sera formatée et (dé)montée.
Tapez 'oui' pour continuer, ou autre chose pour annuler :
register: user_confirm
- name: Annuler si l'utilisateur n'a pas confirmé
fail:
msg: "Formatage annulé par l'utilisateur."
when: user_confirm.user_input | lower != 'oui'
- name: Démonter la partition si montée ailleurs
mount:
path: "{{ target_partition.mountpoint }}"
state: unmounted # utiliser absent si pas dans fstab mais pour un nouveau disque non configuré ce n'est normalement pas le cas
when:
- target_partition.mountpoint is defined
- target_partition.mountpoint != ''
- target_partition.mountpoint != mount_point
- name: Formater la partition en ext4 si non montée au bon point
filesystem:
fstype: ext4
dev: "{{ target_device }}"
force: true
when: not partition_mounted_correctly
- name: Démonter la partition si montée ailleurs
mount:
path: "{{ target_partition.mountpoint }}"
state: absent
when:
- target_partition.mountpoint is defined
- target_partition.mountpoint != ''
- target_partition.mountpoint != mount_point
- name: Formater la partition en ext4 si non montée au bon point
filesystem:
fstype: ext4
dev: "{{ target_device }}"
when: not partition_mounted_correctly
- name: Créer point de montage si absent
file:
path: "{{ mount_point }}"
state: directory
owner: root
group: root
mode: '0755'
- name: Monter la partition avec options idempotentes
mount:
path: "{{ mount_point }}"
src: "{{ target_device }}"
fstype: ext4
opts: defaults
state: mounted
when: not partition_mounted_correctly
- name: Ajouter entrée fstab si absente (assure persistante)
mount:
path: "{{ mount_point }}"
src: "{{ target_device }}"
fstype: ext4
opts: defaults
state: present
when: not partition_mounted_correctly

View File

@@ -1,3 +0,0 @@
---
nfs_setup_export_directory: /arcodange/nfs
# nfs_setup_server_ip: "{{ hostvars['pi2'].ansible_default_ipv4.address }}"

View File

@@ -1,23 +0,0 @@
---
- name: Install Avahi and related packages
ansible.builtin.apt: # https://www.baeldung.com/linux/conflicting-values-error-resolution
name: "{{ item }}"
state: present
update_cache: yes
with_items:
- avahi-daemon
- avahi-utils
- name: Create Avahi service file for NFS
template:
src: nfs.service.j2
dest: /etc/avahi/services/nfs.service
owner: root
group: root
mode: '0644'
- name: Restart Avahi daemon
service:
name: avahi-daemon
state: restarted
enabled: yes

View File

@@ -1,39 +0,0 @@
---
- name: Install NFS server package
ansible.builtin.apt: # https://www.baeldung.com/linux/conflicting-values-error-resolution
name: nfs-kernel-server
state: present
update_cache: yes
- name: Create export directory
ansible.builtin.file:
path: "{{ nfs_setup_export_directory }}"
state: directory
owner: root
group: root
mode: '0755'
- name: Configure /etc/exports
ansible.builtin.lineinfile:
path: /etc/exports
line: "{{ nfs_setup_export_directory }} 192.168.1.0/24(rw,sync,no_subtree_check,anonuid=1000,anongid=1000)"
create: yes
state: present
- name: Ensure NFS service is running and enabled
ansible.builtin.service:
name: nfs-kernel-server
state: started
enabled: yes
- name: Export the shared directories
ansible.builtin.command: exportfs -ra
- name: Verify NFS exports
ansible.builtin.command: exportfs -v
register: nfs_exports
- ansible.builtin.debug:
msg: "NFS Exports: {{ nfs_exports.stdout }}"
- include_tasks: announce.yml

View File

@@ -1,24 +0,0 @@
---
- name: Install NFS client package
ansible.builtin.apt: # https://www.baeldung.com/linux/conflicting-values-error-resolution
name: nfs-common
state: present
update_cache: yes
- name: Create local mount directory
ansible.builtin.file:
path: /mnt/nfs
state: directory
owner: pi
group: docker
mode: '0774'
ignore_errors: true
- name: Mount NFS share
mount:
src: "{{ nfs_setup_server_ip }}:{{ nfs_setup_export_directory }}"
path: /mnt/nfs
fstype: nfs
opts: rw,vers=4
state: mounted
ignore_errors: true

View File

@@ -1,9 +0,0 @@
<?xml version="1.0" standalone='no'?>
<!DOCTYPE service-group SYSTEM "avahi-service.dtd">
<service-group>
<name replace-wildcards="yes">%h NFS</name>
<service>
<type>_nfs._tcp</type>
<port>2049</port>
</service>
</service-group>

View File

@@ -6,7 +6,7 @@
- ansible.builtin.ping:
- name: setup hard disk
ansible.builtin.import_playbook: hard_disk.yml
ansible.builtin.import_playbook: hard_disk_v2.yml
tags: never
- name: setup factory postgres