fix(dns): harden DNS resilience after power-cut incident
During the 2026-04-13 power cut recovery, DNS resolution failures blocked Longhorn reinstall. Root causes: - CoreDNS forwarded to a single hardcoded Pi-hole IP instead of both HA instances - CoreDNS main Corefile forwarded to /etc/resolv.conf which pointed to itself on pi3 - Pi-hole lacked explicit upstream DNS, relying on DHCP-provided config - dnsmasq system service conflicted with pihole-FTL on port 53 Changes: - k3s_dns: forward CoreDNS to both Pi-hole HA instances (pi1 + pi3) dynamically - k3s_dns: update main CoreDNS Corefile to forward to Pi-holes instead of resolv.conf - pihole defaults: add explicit upstream DNS servers (8.8.8.8, 1.1.1.1, 8.8.4.4) - pihole ha_setup: write /etc/dnsmasq.d/99-upstream.conf with explicit upstreams - rpi: add dnsmasq user to dip group and disable conflicting dnsmasq service on Pi-hole nodes See docs/adr/20260414-internal-dns-architecture.md for full rationale. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -5,7 +5,7 @@
|
||||
gather_facts: false
|
||||
|
||||
vars:
|
||||
pihole_ip: "192.168.1.201"
|
||||
pihole_ips: "{{ groups['pihole'] | map('extract', hostvars) | map(attribute='preferred_ip') | list }}"
|
||||
coredns_namespace: "kube-system"
|
||||
|
||||
tasks:
|
||||
@@ -23,5 +23,38 @@
|
||||
arcodange.lab:53 {
|
||||
errors
|
||||
cache 30
|
||||
forward . {{ pihole_ip }}:53
|
||||
forward . {{ pihole_ips | map('regex_replace', '^(.*)$', '\1:53') | join(' ') }}
|
||||
}
|
||||
|
||||
- name: "Mettre à jour le ConfigMap CoreDNS principal pour utiliser les Pi-holes HA"
|
||||
kubernetes.core.k8s:
|
||||
state: present
|
||||
definition:
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: coredns
|
||||
namespace: "{{ coredns_namespace }}"
|
||||
data:
|
||||
Corefile: |
|
||||
.:53 {
|
||||
errors
|
||||
health
|
||||
ready
|
||||
kubernetes cluster.local in-addr.arpa ip6.arpa {
|
||||
pods insecure
|
||||
fallthrough in-addr.arpa ip6.arpa
|
||||
}
|
||||
hosts /etc/coredns/NodeHosts {
|
||||
ttl 60
|
||||
reload 15s
|
||||
fallthrough
|
||||
}
|
||||
prometheus :9153
|
||||
cache 30
|
||||
loop
|
||||
reload
|
||||
import /etc/coredns/custom/*.override
|
||||
import /etc/coredns/custom/*.server
|
||||
forward . {{ pihole_ips | map('regex_replace', '^(.*)$', '\1:53') | join(' ') }}
|
||||
}
|
||||
|
||||
@@ -10,4 +10,18 @@
|
||||
ansible.builtin.hostname:
|
||||
name: "{{ inventory_hostname }}"
|
||||
become: yes
|
||||
when: inventory_hostname != ansible_hostname
|
||||
when: inventory_hostname != ansible_hostname
|
||||
|
||||
- name: Ensure dnsmasq user is in dip group for Pi-hole DNS
|
||||
ansible.builtin.user:
|
||||
name: dnsmasq
|
||||
groups: dip
|
||||
append: yes
|
||||
when: "'pihole' in group_names"
|
||||
|
||||
- name: Disable dnsmasq service on Pi-hole nodes to avoid port 53 conflict with pihole-FTL
|
||||
ansible.builtin.systemd:
|
||||
name: dnsmasq
|
||||
state: stopped
|
||||
enabled: no
|
||||
when: "'pihole' in group_names"
|
||||
Reference in New Issue
Block a user