#!/usr/bin/env bash # Audit one Arcodange invoice end-to-end. # # Usage: # audit-invoice.sh # # JSON side: pulls /invoices/{id} + /thirdparties/{socid}, prints facts # (ref, dates, HT, TVA, TTC, paye flag, mode_reglement, cond_reglement). # # PDF side: pulls /documents/download, base64-decodes, runs pdftotext, # then checks STRUCTURAL PRESENCE of mandatory French invoice mentions. # We do NOT match against static/config/company.json — that file holds # placeholder values today; Dolibarr renders the real legal data from # its own setup. So we check shape, not exact strings. # # Mandatory-mention checklist (codified for SARL / SAS / EURL Arcodange): # - Forme juridique (any of SARL / SAS / EURL / SA / SCI / SASU) # - SIRET (14-digit number, optionally space-separated) # - TVA intracom (FR + 2 chars + 9 digits) # - RCS Évry / RCS Evry / R.C.S. Évry # - NAF-APE code (NNNNL pattern) # - Capital social ("capital" anywhere) # - TVA 259-1° CGI (autoliquidation for non-EU prestations de services) # - L.441-10 — late-payment penalties (BCE+10 or 12,15 %) # - 40 € indemnité forfaitaire (Décret 2012-1115) # - L.123-22 / R.123-237 identification mentions # # Exits 0 if every check passes, 1 otherwise. # # Requires: curl, python3, jq, pdftotext (brew install poppler). set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" DOL_CURL="${SCRIPT_DIR}/../../dolibarr/scripts/dol-curl.sh" if [[ $# -lt 1 ]]; then echo "audit-invoice.sh: missing invoice id. Usage: audit-invoice.sh " >&2 exit 2 fi INVOICE_ID="$1" command -v pdftotext >/dev/null || { echo "audit-invoice.sh: pdftotext not found (brew install poppler)" >&2; exit 2; } command -v jq >/dev/null || { echo "audit-invoice.sh: jq not found" >&2; exit 2; } # --- JSON side ------------------------------------------------------------- INV_JSON="$("${DOL_CURL}" "/invoices/${INVOICE_ID}")" SOCID=$( echo "${INV_JSON}" | jq -r .socid) REF=$( echo "${INV_JSON}" | jq -r .ref) DATE_TS=$(echo "${INV_JSON}" | jq -r '.date // 0 | tonumber') DUE_TS=$( echo "${INV_JSON}" | jq -r '.date_lim_reglement // 0 | tonumber') HT=$( echo "${INV_JSON}" | jq -r .total_ht) TVA=$( echo "${INV_JSON}" | jq -r .total_tva) TTC=$( echo "${INV_JSON}" | jq -r .total_ttc) PAYE=$( echo "${INV_JSON}" | jq -r .paye) COND=$( echo "${INV_JSON}" | jq -r '.cond_reglement_code // "-"') PDF_PATH=$(echo "${INV_JSON}" | jq -r .last_main_doc) DATE_HUMAN=$(python3 -c "import datetime,sys; ts=int(sys.argv[1]); print(datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d') if ts else '-')" "${DATE_TS}") DUE_HUMAN=$( python3 -c "import datetime,sys; ts=int(sys.argv[1]); print(datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d') if ts else '-')" "${DUE_TS}") TP_JSON="$("${DOL_CURL}" "/thirdparties/${SOCID}")" TP_NAME=$(echo "${TP_JSON}" | jq -r '.name // .ref') PAYE_HUMAN=$( [[ "${PAYE}" == "1" ]] && echo "PAID" || echo "UNPAID" ) # Heuristic: TVA == 0 -> likely reverse-charge (KM is in the US, 259-1° CGI). TVA_NOTE="" if [[ "${TVA}" == "0" || "${TVA}" == "0.00000000" || "${TVA}" == "0.00" ]]; then TVA_NOTE="(TVA=0 → reverse-charge expected; the 259-1° CGI mention MUST be on the PDF)" fi cat <&2 exit 1 fi PDF_TMP="$(mktemp -t dolaudit.XXXXXX.pdf)" TXT_TMP="$(mktemp -t dolaudit.XXXXXX.txt)" trap 'rm -f "${PDF_TMP}" "${TXT_TMP}"' EXIT echo "${PDF_B64}" | base64 -d > "${PDF_TMP}" pdftotext -layout "${PDF_TMP}" "${TXT_TMP}" pass_count=0 fail_count=0 check() { # check "label" "extended-regex-pattern" local label="$1" pat="$2" if grep -E -q -- "${pat}" "${TXT_TMP}"; then printf ' [OK] %s\n' "${label}" pass_count=$((pass_count+1)) else printf ' [XX] %s (looked for: %s)\n' "${label}" "${pat}" fail_count=$((fail_count+1)) fi } echo echo " Mandatory-mention audit — structural presence on the PDF:" check "Forme juridique (SARL/SAS/EURL/SA/SCI/SASU)" "(SARL|SAS|EURL|SCI|SASU|SA[^[:alpha:]])" check "SIRET (14 digits)" "SIRET[[:space:]:]*[0-9]{14}" check "Numéro TVA intracom (FR + 11 chars)" "(TVA|TVA intra)[[:space:]:]*FR[0-9A-Z]{11}" check "RCS / R.C.S. Évry" "(R\\.?C\\.?S\\.?|RCS).*[EÉ]vry" check "NAF-APE code" "(NAF|APE)[[:space:]:-]*[0-9]{4}[A-Z]" check "Capital social" "[Cc]apital" check "TVA 259-1° CGI / autoliquidation" "(259-?1|autoliquidation|reverse[- ]charge)" check "L.441-10 — BCE+10 or 12,15 %" "(L\\.?441-10|BCE[[:space:]]*\\+[[:space:]]*10|12[.,]15[[:space:]]*%)" check "40 € indemnité forfaitaire" "(40[[:space:]]*€|indemnit[eé] forfaitaire|D[eé]cret 2012-1115)" check "L.123-22 / R.123-237" "(L\\.?123-22|R\\.?123-237)" # Surface the real legal identifiers extracted from the PDF — useful for # cross-checking against the cohort review without re-running pdftotext. echo echo " Real identifiers extracted from the PDF (informational):" SIRET_FOUND=$( grep -E -o "SIRET[[:space:]:]*[0-9]{14}" "${TXT_TMP}" | head -1 || true) TVA_FOUND=$( grep -E -o "(TVA|TVA intra)[[:space:]:]*FR[0-9A-Z]{11}" "${TXT_TMP}" | head -1 || true) APE_FOUND=$( grep -E -o "(NAF|APE)[[:space:]:-]*[0-9]{4}[A-Z]" "${TXT_TMP}" | head -1 || true) FORM_FOUND=$( grep -E -o "(SARL|SAS|EURL|SCI|SASU|Soci[eé]t[eé] [^[:cntrl:]]{0,40})" "${TXT_TMP}" | head -1 || true) echo " ${FORM_FOUND:-(forme juridique not found)}" echo " ${SIRET_FOUND:-(SIRET not found)}" echo " ${TVA_FOUND:-(TVA intracom not found)}" echo " ${APE_FOUND:-(NAF-APE not found)}" echo echo " ${pass_count} pass / ${fail_count} fail" [[ ${fail_count} -eq 0 ]]