email-list.sh gains two hard-exclusion filters (applied before the candidate test, regardless of attachments): - EXCLUDE_PATTERN matches subjects starting with Invitation: / Updated invitation: / Canceled event: / Accepted: / Declined: / Tentative: / Maybe: (after stripping Re:/Fwd:/Tr: prefixes). Filters Google Calendar events that always carry an .ics attachment. - EXCLUDE_SENDER matches updates.<domain>, noreply@*calendar, news@, newsletter@. Filters newsletter blast traffic. Effect on --all-folders --candidates-only baseline: 27 noisy → 12 actionable (calendar invites + the staying-ahead.ai newsletter blast removed). Real supplier docs intact: Darnis F1042 in /Notification, 3 Free Mobile factures in /Inbox/abonnements, Mistral + Anthropic in /Inbox/books. The originally-planned --mark-ingested feature is deferred to V8.2: flag-set requires the Zoho OAuth scope ZohoMail.messages.UPDATE which our read-only refresh_token doesn't have. Documented in SKILL.md: once the user opts in to the wider scope, --mark-ingested becomes a one-line flag on email-inspect.sh and is_candidate() learns to skip flag_info messages. Captured the new --all-folders baseline at examples/email-list-all-folders.txt. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
142 lines
5.7 KiB
Bash
Executable File
142 lines
5.7 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# List candidate supplier-invoice emails from the books@ Zoho mailbox.
|
|
#
|
|
# Usage:
|
|
# email-list.sh [--folder PATH] # default: /Inbox/books (the books@ alias-filtered folder)
|
|
# [--limit N] # default: 30
|
|
# [--candidates-only] # filter by subject pattern OR attachment
|
|
# [--all-folders] # scan every folder (slow, lots of API calls)
|
|
#
|
|
# Output: table with mid, date, from, subject, hasAttachment.
|
|
# A "candidate" is a message whose subject matches a supplier-like pattern
|
|
# (facture/invoice/receipt/reçu/payment/paiement/abonnement/order/commande)
|
|
# OR which has an attachment.
|
|
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
ZOHO_CURL="${SCRIPT_DIR}/zoho-curl.sh"
|
|
|
|
FOLDER="/Inbox/books"
|
|
LIMIT=30
|
|
CANDIDATES_ONLY=0
|
|
ALL_FOLDERS=0
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--folder) FOLDER="$2"; shift 2 ;;
|
|
--limit) LIMIT="$2"; shift 2 ;;
|
|
--candidates-only) CANDIDATES_ONLY=1; shift ;;
|
|
--all-folders) ALL_FOLDERS=1; shift ;;
|
|
-h|--help) sed -n '2,12p' "$0" | sed 's/^# \{0,1\}//'; exit 0 ;;
|
|
*) echo "email-list.sh: unknown arg: $1" >&2; exit 2 ;;
|
|
esac
|
|
done
|
|
|
|
WORK="$(mktemp -d -t emailist.XXXXXX)"
|
|
trap 'rm -rf "${WORK}"' EXIT
|
|
|
|
# 1. Discover accountId
|
|
"${ZOHO_CURL}" /accounts > "${WORK}/accounts.json"
|
|
AID=$(python3 -c "import json,sys; d=json.load(open(sys.argv[1])); print((d.get('data') or [{}])[0].get('accountId',''))" "${WORK}/accounts.json")
|
|
[[ -z "${AID}" ]] && { echo "email-list.sh: no accountId in /accounts response" >&2; exit 1; }
|
|
|
|
# 2. Resolve folder path → folderId
|
|
"${ZOHO_CURL}" "/accounts/${AID}/folders" > "${WORK}/folders.json"
|
|
|
|
# Build list of (folderId, path) tuples to scan
|
|
if [[ "${ALL_FOLDERS}" == "1" ]]; then
|
|
FOLDER_IDS=$(python3 -c "
|
|
import json, sys
|
|
d = json.load(open(sys.argv[1]))
|
|
for f in (d.get('data') or []):
|
|
fid = f.get('folderId'); path = f.get('path') or f.get('folderName','-')
|
|
# Skip noisy system folders
|
|
if path in ('/Drafts','/Templates','/Snoozed','/Sent','/Spam','/Trash','/Outbox'): continue
|
|
print(f\"{fid}|{path}\")" "${WORK}/folders.json")
|
|
else
|
|
FOLDER_IDS=$(python3 -c "
|
|
import json, sys
|
|
d = json.load(open(sys.argv[1]))
|
|
target = sys.argv[2]
|
|
for f in (d.get('data') or []):
|
|
if f.get('path') == target:
|
|
print(f\"{f.get('folderId')}|{f.get('path')}\")
|
|
break" "${WORK}/folders.json" "${FOLDER}")
|
|
if [[ -z "${FOLDER_IDS}" ]]; then
|
|
echo "email-list.sh: folder '${FOLDER}' not found. Available:" >&2
|
|
python3 -c "import json,sys; [print(f' {f.get(\"path\",\"-\")}') for f in json.load(open(sys.argv[1])).get('data',[])]" "${WORK}/folders.json" >&2
|
|
exit 2
|
|
fi
|
|
fi
|
|
|
|
# 3. Fetch messages per folder
|
|
mkdir -p "${WORK}/msgs"
|
|
COUNT=0
|
|
while IFS='|' read -r fid fpath; do
|
|
[[ -z "${fid}" ]] && continue
|
|
COUNT=$((COUNT+1))
|
|
out="${WORK}/msgs/$(printf '%03d' "${COUNT}").json"
|
|
"${ZOHO_CURL}" "/accounts/${AID}/messages/view?folderId=${fid}&limit=${LIMIT}&sortorder=false&start=1" > "${out}" 2>/dev/null || echo '{"data":[]}' > "${out}"
|
|
echo "${fpath}" > "${out}.path"
|
|
done <<< "${FOLDER_IDS}"
|
|
|
|
# 4. Render
|
|
python3 - "${WORK}/msgs" "${CANDIDATES_ONLY}" <<'PY'
|
|
import json, sys, os, re, datetime, glob
|
|
msgs_dir, candidates_only_str = sys.argv[1:3]
|
|
candidates_only = candidates_only_str == "1"
|
|
|
|
CANDIDATE_PATTERN = re.compile(
|
|
r'facture|invoice|receipt|re[cç]u|payment|paiement|abonnement|subscription|order|commande|invoice|bill',
|
|
re.IGNORECASE,
|
|
)
|
|
|
|
# Subjects that look like calendar invites / event updates / generic notifications
|
|
# get filtered out of --candidates-only — they always have a .ics attachment so
|
|
# the "has-attachment" heuristic alone catches them as false positives.
|
|
EXCLUDE_PATTERN = re.compile(
|
|
r'^(?:re:\s*|fwd:\s*|tr:\s*)*' # strip Re:/Fwd:/Tr: prefixes
|
|
r'(?:invitation|updated\s+invitation|canceled\s+event|accepted|declined|tentative|maybe)\s*:',
|
|
re.IGNORECASE,
|
|
)
|
|
|
|
# Senders that are pure noise — newsletter/marketing patterns.
|
|
EXCLUDE_SENDER = re.compile(
|
|
r'(updates\.|noreply@.*calendar|@calendar\.|news@|newsletter@|@updates\.)',
|
|
re.IGNORECASE,
|
|
)
|
|
|
|
def is_candidate(m):
|
|
subj = m.get("subject","") or ""
|
|
sender = m.get("fromAddress","") or m.get("sender","") or ""
|
|
# Hard exclusions take precedence over inclusions
|
|
if EXCLUDE_PATTERN.match(subj.strip()): return False
|
|
if EXCLUDE_SENDER.search(sender): return False
|
|
if str(m.get("hasAttachment","")) == "1": return True
|
|
if CANDIDATE_PATTERN.search(subj): return True
|
|
return False
|
|
|
|
rows = []
|
|
for f in sorted(glob.glob(os.path.join(msgs_dir, "*.json"))):
|
|
fpath = open(f + ".path").read().strip()
|
|
try: data = json.load(open(f)).get("data") or []
|
|
except: continue
|
|
for m in data:
|
|
if candidates_only and not is_candidate(m): continue
|
|
ts = int(m.get("sentDateInGMT") or m.get("receivedTime") or 0) // 1000
|
|
dt = datetime.datetime.fromtimestamp(ts).strftime("%Y-%m-%d") if ts else "-"
|
|
frm = (m.get("fromAddress") or m.get("sender") or "-").replace("<","<").replace(">",">").replace("<","").replace(">","")[:36]
|
|
subj = (m.get("subject") or "-")[:55]
|
|
has = "Y" if str(m.get("hasAttachment","")) == "1" else " "
|
|
cand = "*" if is_candidate(m) else " "
|
|
rows.append((dt, fpath, cand, has, m.get("messageId","-"), frm, subj))
|
|
|
|
rows.sort(key=lambda r: r[0], reverse=True)
|
|
print(f"{'date':<10} {'cand':<4} {'att':<3} {'messageId':<22} {'folder':<22} {'from':<36} subject")
|
|
print("-" * 130)
|
|
for dt, fpath, cand, has, mid, frm, subj in rows:
|
|
print(f"{dt:<10} [{cand}] [{has}] {mid:<22} {fpath[:22]:<22} {frm:<36} {subj}")
|
|
print("-" * 130)
|
|
print(f"# {len(rows)} message(s)" + (" (candidates only)" if candidates_only else ""))
|
|
PY
|