#!/bin/sh # In-container backup logic for Dolibarr — the single source of truth shared by the # manual orchestrator (ops/backup/dolibarr-backup.sh) and the scheduled CronJob # (chart/templates/backup-cronjob.yaml). Driven entirely by environment: # BUCKET PREFIX DB PGHOST (config) # PGUSER PGPASSWORD (DB creds, from vso-db-credentials) # AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY AWS_ENDPOINTS (S3 creds) # Dumps the DB (pg_dump -Fc) + tars the documents mounted at /docs, pushes both to # s3://$BUCKET/$PREFIX/{db,docs}/, then prunes to a tiered retention. # # Skip-if-unchanged: each half carries a content fingerprint at $PREFIX/.fp-{db,docs}; # a half is dumped+uploaded ONLY if its fingerprint differs from the last run, so a # quiet ERP day re-uploads nothing. DB fingerprint = count + max(tms) over every # tms-bearing table (catches insert/update/delete); docs = path|size|mtime per file. set -eu apk add --no-cache aws-cli tar gzip findutils >/dev/null 2>&1 || { echo "ABORT apk add"; exit 1; } : "${BUCKET:?}"; : "${PREFIX:?}"; : "${DB:?}"; : "${PGHOST:?}" export AWS_DEFAULT_REGION="${AWS_DEFAULT_REGION:-us-east-1}" # GCS / S3-compatible stores reject aws-cli v2.23+ default integrity checksums # ("SignatureDoesNotMatch / Invalid argument") — only sign/validate when required. export AWS_REQUEST_CHECKSUM_CALCULATION=when_required export AWS_RESPONSE_CHECKSUM_VALIDATION=when_required S3() { aws --endpoint-url "$AWS_ENDPOINTS" s3 "$@"; } PSQL() { PGPASSWORD="$PGPASSWORD" psql -h "$PGHOST" -U "$PGUSER" -d "$DB" -tAc "$1"; } TS=$(date -u +%Y-%m-%dT%H-%M-%SZ) echo "timestamp=$TS db=$DB -> s3://$BUCKET/$PREFIX" # --- fingerprints: has either half changed since last time? --- # Restrict to durable BUSINESS content — ignore volatile noise that changes every # cron tick / page view (else a quiet ERP would never skip): # DB: exclude llx_const, llx_user (login/counter churn), session/cron tables # docs: exclude */temp/* (Dolibarr stats cache regenerated constantly) # Excludes are identical for the fingerprint AND the upload, so "unchanged" means # "the backed-up set is unchanged". DENY="'llx_const','llx_user','llx_session','llx_cronjob','llx_user_param'" GEN=$(PSQL "select coalesce(string_agg(format('select count(*) c, coalesce(max(tms)::text,''0'') m from %I', table_name), ' union all '), 'select 0 c, ''0'' m') from information_schema.columns where column_name='tms' and table_schema='public' and table_name not in ($DENY)") FP_DB=$(PSQL "$GEN" | sort | md5sum | cut -d' ' -f1) FP_DOCS=$(find /docs -type f -not -path '*/temp/*' -printf '%p|%s|%T@\n' 2>/dev/null | sort | md5sum | cut -d' ' -f1) LAST_DB=$(S3 cp "s3://$BUCKET/$PREFIX/.fp-db" - 2>/dev/null || true) LAST_DOCS=$(S3 cp "s3://$BUCKET/$PREFIX/.fp-docs" - 2>/dev/null || true) uploaded=0 if [ "$FP_DB" != "$LAST_DB" ]; then pg_dump -h "$PGHOST" -U "$PGUSER" -d "$DB" -Fc -f /tmp/db.dump S3 cp /tmp/db.dump "s3://$BUCKET/$PREFIX/db/$TS.dump" printf '%s' "$FP_DB" | S3 cp - "s3://$BUCKET/$PREFIX/.fp-db" echo "db: backed up ($(wc -c < /tmp/db.dump) bytes)"; uploaded=1 else echo "db: unchanged — skipped" fi if [ "$FP_DOCS" != "$LAST_DOCS" ]; then tar -C /docs --exclude='*/temp/*' -czf /tmp/docs.tar.gz . 2>/dev/null S3 cp /tmp/docs.tar.gz "s3://$BUCKET/$PREFIX/docs/$TS.tar.gz" printf '%s' "$FP_DOCS" | S3 cp - "s3://$BUCKET/$PREFIX/.fp-docs" echo "docs: backed up ($(wc -c < /tmp/docs.tar.gz) bytes)"; uploaded=1 else echo "docs: unchanged — skipped" fi # --- tiered retention prune (daily 30d / monthly 12m / yearly ~10y); always runs --- cat > /tmp/prune.py <<'PY' import sys, datetime keys=[k.strip() for k in open(sys.argv[1]) if k.strip()] now=datetime.datetime.strptime(sys.argv[2][:10], "%Y-%m-%d").date() def d(k): try: return datetime.datetime.strptime(k[:10], "%Y-%m-%d").date() except Exception: return None dated=sorted([(d(k),k) for k in keys if d(k)], key=lambda x:x[0]) keep=set(); bymonth={}; byyear={} for dt,k in dated: age=(now-dt).days if age <= 30: keep.add(k) elif age <= 365: bymonth[(dt.year,dt.month)]=k elif age <= 3660: byyear[dt.year]=k keep |= set(bymonth.values()) | set(byyear.values()) for dt,k in dated: if k not in keep: print(k) PY for SUB in db docs; do S3 ls "s3://$BUCKET/$PREFIX/$SUB/" | awk '{print $4}' > /tmp/keys.$SUB || true python3 /tmp/prune.py "/tmp/keys.$SUB" "$TS" > /tmp/del.$SUB || true while read -r DK; do [ -n "$DK" ] && S3 rm "s3://$BUCKET/$PREFIX/$SUB/$DK" && echo "pruned $SUB/$DK" done < /tmp/del.$SUB done echo "DONE (uploaded=$uploaded)."