changed order reload network on script node at last

This commit is contained in:
duffyduck 2026-03-04 23:30:34 +01:00
parent 1083fbb375
commit c0e6f96498
1 changed files with 68 additions and 19 deletions

View File

@ -372,14 +372,25 @@ class Migrator:
def _update_network(self, plan: MigrationPlan, configs: dict,
dry_run: bool) -> bool:
"""Update /etc/network/interfaces and restart networking."""
for node in plan.nodes:
if not node.is_reachable or node.name not in configs['nodes']:
continue
"""Update /etc/network/interfaces and restart networking.
Strategy:
1. Copy staged interfaces to /etc/network/interfaces on ALL nodes first
2. Reload remote nodes with nohup + delay (fire-and-forget, SSH will die)
3. Reload local node LAST
This avoids cutting off our own SSH connectivity before reaching remote nodes.
"""
active_nodes = [
n for n in plan.nodes
if n.is_reachable and n.name in configs['nodes']
]
remote_nodes = [n for n in active_nodes if not n.is_local]
local_node = next((n for n in active_nodes if n.is_local), None)
# Phase 1: Copy staged interfaces on ALL nodes (no reload yet)
for node in active_nodes:
if dry_run:
print(f" [{node.name}] Würde /etc/network/interfaces aktualisieren")
print(f" [{node.name}] Würde 'ifreload -a' ausführen")
continue
staging = "/root/.network-migration-staged/interfaces"
@ -394,25 +405,63 @@ class Migrator:
print(f" [{node.name}] FEHLER: {err}")
return False
# Reload network - ifreload -a reloads ALL interfaces
rc, _, _ = self.ssh.run_on_node(
node.ssh_host, "which ifreload", node.is_local
)
if rc == 0:
reload_cmd = "ifreload -a"
else:
# Fallback: restart networking service
reload_cmd = "systemctl restart networking"
if dry_run:
for node in active_nodes:
print(f" [{node.name}] Würde 'ifreload -a' ausführen")
return True
print(f" [{node.name}] Netzwerk wird neu geladen ({reload_cmd})...")
# Determine reload command
rc, _, _ = self.ssh.execute_local("which ifreload")
reload_cmd = "ifreload -a" if rc == 0 else "systemctl restart networking"
# Phase 2: Reload REMOTE nodes first (fire-and-forget with nohup)
# The SSH connection will die when the remote network changes,
# so we use nohup + delay to let the SSH command return first.
for node in remote_nodes:
# nohup with 2s delay: SSH returns immediately, then network reloads
bg_cmd = (
f"nohup bash -c 'sleep 2 && {reload_cmd}' "
f">/tmp/ifreload.log 2>&1 &"
)
print(f" [{node.name}] Netzwerk-Reload geplant (fire-and-forget)...")
self.ssh.run_on_node(node.ssh_host, bg_cmd, False, timeout=10)
print(f" [{node.name}] {reload_cmd} wird in 2s ausgeführt")
# Phase 3: Reload LOCAL node last
if local_node:
print(f" [{local_node.name}] Netzwerk wird neu geladen ({reload_cmd})...")
rc, _, err = self.ssh.run_on_node(
node.ssh_host, reload_cmd, node.is_local, timeout=60
local_node.ssh_host, reload_cmd, True, timeout=60
)
if rc == 0:
print(f" [{node.name}] Netzwerk neu geladen")
print(f" [{local_node.name}] Netzwerk neu geladen")
else:
print(f" [{node.name}] WARNUNG beim Netzwerk-Reload: {err}")
# Don't fail here - the node might just be unreachable on old IP now
print(f" [{local_node.name}] WARNUNG beim Netzwerk-Reload: {err}")
# Wait for remote nodes to finish their reload
if remote_nodes:
wait_secs = 8
print(f"\n Warte {wait_secs}s bis alle Remote-Nodes ihr Netzwerk neu geladen haben...")
time.sleep(wait_secs)
# Verify: try to reach remote nodes on NEW IPs (with retries)
print(" [Verifikation] Prüfe Erreichbarkeit auf neuen IPs...")
for node in remote_nodes:
if not node.new_ip:
continue
reachable = False
for attempt in range(3):
reachable = self.ssh.is_reachable(node.new_ip)
if reachable:
break
if attempt < 2:
print(f" [{node.name}] {node.new_ip} noch nicht erreichbar, warte 5s...")
time.sleep(5)
if reachable:
print(f" [{node.name}] {node.new_ip} erreichbar")
else:
print(f" [{node.name}] {node.new_ip} NICHT erreichbar nach 3 Versuchen!")
print(f" [{node.name}] WARNUNG: Service-Start auf diesem Node könnte fehlschlagen")
return True