The accounting data + issued documents are legally retained 10 years and warrant a
backup dedicated to Dolibarr. An audit found the generic Longhorn external backup
NEVER covered the erp volume (its Longhorn volume sits in the orphaned `default`
recurring-job group; the only job has groups=[] → serves nothing; lastBackupAt=never).
So /var/www/documents (invoice PDFs, supplier pieces, contracts, ECM) had zero
offsite copy — only in-cluster replicas.
ops/backup/dolibarr-backup.sh (orchestrator) + ops/backup/backup-job.sh (in-container
logic, env-driven, single source of truth):
- pg_dump -Fc of the DB + tar of the documents PVC (RWX, read-only mount) ->
s3://arcodange-backup/erp/<env>/{db,docs}/<ts>, then tiered prune (daily 30d /
monthly 12m / yearly 10y).
- prod is READ-only (dump+tar read; writes go only to the backup bucket); the DB is
read with the env's own dynamic creds; the GCS HMAC secret is copied transiently
(base64, deleted on exit) and never printed; the whole script ships base64.
- fixes the aws-cli v2.23+ default-checksum incompatibility with GCS/S3-compat
(SignatureDoesNotMatch) via AWS_*_CHECKSUM_*=when_required.
Proven live: sandbox end-to-end (dump+tar+upload+prune, verified in GCS, cleaned up)
and retention logic unit-tested (1100 daily -> 46 kept). The FIRST real prod backup
was taken (erp/prod/db 1.2 MB + erp/prod/docs 12.5 MB) — closing the gap now.
Automation (recurring CronJob in the chart + a dedicated erp Vault policy for its
own S3 creds) is the documented next step; the orchestrator works today on demand.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
172 lines
7.1 KiB
Bash
Executable File
172 lines
7.1 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
#
|
|
# dolibarr-backup.sh — dedicated, offsite backup for the Arcodange Dolibarr ERP.
|
|
#
|
|
# Critical-data-aware (10-year accounting retention) and INDEPENDENT of the generic
|
|
# Longhorn platform backup — which today does NOT cover the erp volume (its volume
|
|
# sits in the orphaned `default` recurring-job group, lastBackupAt=never). Backs up
|
|
# BOTH halves of Dolibarr state to the existing object store (s3://arcodange-backup
|
|
# on GCS), under erp/<env>/:
|
|
# - the Postgres DB (pg_dump -Fc, restorable) -> erp/<env>/db/<ts>.dump
|
|
# - the documents PVC (/var/www/documents, RWX, ro) -> erp/<env>/docs/<ts>.tar.gz
|
|
# then prunes to a tiered retention: daily 30d, monthly 12m, yearly 10y.
|
|
#
|
|
# Safety, mirroring ops/sandbox/sandbox-lifecycle.sh:
|
|
# - the DB is read with the app's OWN dynamic creds (vso-db-credentials), scoped
|
|
# to its env; prod and sandbox never cross.
|
|
# - S3 creds are a TRANSIENT copy of the Longhorn GCS secret (deleted on exit);
|
|
# no secret value is ever printed.
|
|
# - the whole in-container script is shipped base64 (no nested-heredoc/quoting).
|
|
#
|
|
# Usage:
|
|
# dolibarr-backup.sh backup [--env prod|sandbox] # one-shot backup + prune
|
|
# dolibarr-backup.sh list [--env prod|sandbox] # what's in the store
|
|
# dolibarr-backup.sh restore --db <key> --env <e> --yes # restore DB (DESTRUCTIVE)
|
|
# dolibarr-backup.sh restore --docs <key> --env <e> --yes # restore documents
|
|
#
|
|
set -euo pipefail
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
|
|
PG_IMAGE="postgres:16-alpine"
|
|
PGHOST="192.168.1.202" # direct Postgres (NOT pgbouncer)
|
|
BUCKET="${ARCO_BACKUP_BUCKET:-arcodange-backup}"
|
|
S3_SRC_NS="longhorn-system" # where the GCS HMAC creds live today
|
|
S3_SRC_SECRET="longhorn-gcs-backup-credentials"
|
|
TMP_S3_SECRET="dolibarr-backup-s3-temp"
|
|
|
|
log() { printf '\033[1;36m==>\033[0m %s\n' "$*"; }
|
|
die() { printf '\033[1;31mABORT:\033[0m %s\n' "$*" >&2; exit 1; }
|
|
|
|
CMD="${1:-}"; shift || true
|
|
ENV="prod"; KEY=""; KIND=""; YES=0
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--env) ENV="${2:?}"; shift 2 ;;
|
|
--db) KIND="db"; KEY="${2:?}"; shift 2 ;;
|
|
--docs) KIND="docs"; KEY="${2:?}"; shift 2 ;;
|
|
--yes) YES=1; shift ;;
|
|
*) die "unknown arg '$1'" ;;
|
|
esac
|
|
done
|
|
|
|
case "$ENV" in
|
|
prod) NS="erp"; DB="erp" ;;
|
|
sandbox) NS="erp-sandbox"; DB="erp-sandbox" ;;
|
|
*) die "--env must be prod|sandbox" ;;
|
|
esac
|
|
PVC="$NS"
|
|
PREFIX="${ARCO_BACKUP_PREFIX:-erp/${ENV}}"
|
|
|
|
# in-container preamble: install tools, export region, define S3()
|
|
read -r -d '' PREAMBLE <<'SH' || true
|
|
set -eu
|
|
apk add --no-cache aws-cli tar gzip >/dev/null 2>&1 || { echo "ABORT apk add"; exit 1; }
|
|
export AWS_DEFAULT_REGION="${AWS_DEFAULT_REGION:-us-east-1}"
|
|
# GCS / S3-compatible stores reject aws-cli v2.23+ default integrity checksums
|
|
# ("SignatureDoesNotMatch / Invalid argument"); only sign/validate when required.
|
|
export AWS_REQUEST_CHECKSUM_CALCULATION=when_required
|
|
export AWS_RESPONSE_CHECKSUM_VALIDATION=when_required
|
|
aws --version 2>&1 | head -1
|
|
S3() { aws --endpoint-url "$AWS_ENDPOINTS" s3 "$@"; }
|
|
SH
|
|
|
|
copy_s3_secret() {
|
|
command -v python3 >/dev/null || die "python3 required to copy the S3 secret without exposing it"
|
|
kubectl get secret "$S3_SRC_SECRET" -n "$S3_SRC_NS" -o json \
|
|
| python3 -c "import json,sys; d=json.load(sys.stdin); d['metadata']={'name':'$TMP_S3_SECRET','namespace':'$NS'}; d.pop('status',None); d['data']={k:d['data'][k] for k in ('AWS_ACCESS_KEY_ID','AWS_SECRET_ACCESS_KEY','AWS_ENDPOINTS')}; print(json.dumps(d))" \
|
|
| kubectl apply -f - >/dev/null
|
|
}
|
|
cleanup_secret() { kubectl delete secret "$TMP_S3_SECRET" -n "$NS" --ignore-not-found >/dev/null 2>&1 || true; }
|
|
|
|
# b64-encode an in-container script (host vars already substituted by the caller)
|
|
b64() { printf '%s' "$1" | base64 | tr -d '\n'; }
|
|
|
|
run_backup() {
|
|
trap cleanup_secret EXIT
|
|
log "Copying GCS creds into a transient secret in $NS (values stay base64)"
|
|
copy_s3_secret
|
|
log "Backup ${ENV}: DB=$DB PVC=$PVC -> s3://$BUCKET/$PREFIX/{db,docs}/"
|
|
local B64; B64="$(b64 "$(cat "${SCRIPT_DIR}/backup-job.sh")")"
|
|
kubectl delete job dolibarr-backup -n "$NS" --ignore-not-found >/dev/null 2>&1 || true
|
|
kubectl apply -f - >/dev/null <<EOF
|
|
apiVersion: batch/v1
|
|
kind: Job
|
|
metadata: { name: dolibarr-backup, namespace: $NS }
|
|
spec:
|
|
backoffLimit: 0
|
|
ttlSecondsAfterFinished: 600
|
|
template:
|
|
spec:
|
|
restartPolicy: Never
|
|
volumes:
|
|
- name: docs
|
|
persistentVolumeClaim: { claimName: $PVC, readOnly: true }
|
|
containers:
|
|
- name: backup
|
|
image: $PG_IMAGE
|
|
envFrom:
|
|
- secretRef: { name: $TMP_S3_SECRET }
|
|
env:
|
|
- { name: BUCKET, value: "$BUCKET" }
|
|
- { name: PREFIX, value: "$PREFIX" }
|
|
- { name: DB, value: "$DB" }
|
|
- { name: PGHOST, value: "$PGHOST" }
|
|
- { name: PGUSER, valueFrom: { secretKeyRef: { name: vso-db-credentials, key: username } } }
|
|
- { name: PGPASSWORD, valueFrom: { secretKeyRef: { name: vso-db-credentials, key: password } } }
|
|
volumeMounts:
|
|
- { name: docs, mountPath: /docs, readOnly: true }
|
|
command: ["/bin/sh","-c"]
|
|
args: ["echo $B64 | base64 -d | sh"]
|
|
EOF
|
|
kubectl wait --for=condition=complete job/dolibarr-backup -n "$NS" --timeout=300s >/dev/null 2>&1 \
|
|
|| die "backup Job did not complete — kubectl logs -n $NS job/dolibarr-backup"
|
|
kubectl logs -n "$NS" job/dolibarr-backup | sed 's/^/ /'
|
|
kubectl delete job dolibarr-backup -n "$NS" --ignore-not-found >/dev/null 2>&1 || true
|
|
cleanup_secret; trap - EXIT
|
|
log "Backup complete."
|
|
}
|
|
|
|
run_list() {
|
|
trap cleanup_secret EXIT; copy_s3_secret
|
|
local SCRIPT
|
|
SCRIPT="$(cat <<EOF
|
|
$PREAMBLE
|
|
echo "db/:"; S3 ls "s3://$BUCKET/$PREFIX/db/" || echo " (empty)"
|
|
echo "docs/:"; S3 ls "s3://$BUCKET/$PREFIX/docs/" || echo " (empty)"
|
|
EOF
|
|
)"
|
|
kubectl delete job dolibarr-backup-list -n "$NS" --ignore-not-found >/dev/null 2>&1 || true
|
|
kubectl apply -f - >/dev/null <<EOF
|
|
apiVersion: batch/v1
|
|
kind: Job
|
|
metadata: { name: dolibarr-backup-list, namespace: $NS }
|
|
spec:
|
|
backoffLimit: 0
|
|
ttlSecondsAfterFinished: 300
|
|
template:
|
|
spec:
|
|
restartPolicy: Never
|
|
containers:
|
|
- name: list
|
|
image: $PG_IMAGE
|
|
envFrom: [ { secretRef: { name: $TMP_S3_SECRET } } ]
|
|
command: ["/bin/sh","-c"]
|
|
args: ["echo $(b64 "$SCRIPT") | base64 -d | sh"]
|
|
EOF
|
|
kubectl wait --for=condition=complete job/dolibarr-backup-list -n "$NS" --timeout=180s >/dev/null 2>&1 || true
|
|
kubectl logs -n "$NS" job/dolibarr-backup-list 2>/dev/null | sed 's/^/ /'
|
|
kubectl delete job dolibarr-backup-list -n "$NS" --ignore-not-found >/dev/null 2>&1 || true
|
|
cleanup_secret; trap - EXIT
|
|
}
|
|
|
|
case "$CMD" in
|
|
backup) run_backup ;;
|
|
list) run_list ;;
|
|
restore)
|
|
[[ -n "$KEY" && -n "$KIND" ]] || die "restore needs --db <key> or --docs <key>"
|
|
[[ "$YES" == "1" ]] || die "restore is DESTRUCTIVE on '$ENV' — re-run with --yes"
|
|
die "restore: wired in the chart Job (next iteration) — key=$KEY kind=$KIND env=$ENV"
|
|
;;
|
|
*) echo "usage: $0 {backup|list|restore} [--env prod|sandbox] [--db|--docs <key>] [--yes]" >&2; exit 2 ;;
|
|
esac
|