changed order reload network on script node at last
This commit is contained in:
parent
1083fbb375
commit
c0e6f96498
87
migrator.py
87
migrator.py
|
|
@ -372,14 +372,25 @@ class Migrator:
|
|||
|
||||
def _update_network(self, plan: MigrationPlan, configs: dict,
|
||||
dry_run: bool) -> bool:
|
||||
"""Update /etc/network/interfaces and restart networking."""
|
||||
for node in plan.nodes:
|
||||
if not node.is_reachable or node.name not in configs['nodes']:
|
||||
continue
|
||||
"""Update /etc/network/interfaces and restart networking.
|
||||
|
||||
Strategy:
|
||||
1. Copy staged interfaces to /etc/network/interfaces on ALL nodes first
|
||||
2. Reload remote nodes with nohup + delay (fire-and-forget, SSH will die)
|
||||
3. Reload local node LAST
|
||||
This avoids cutting off our own SSH connectivity before reaching remote nodes.
|
||||
"""
|
||||
active_nodes = [
|
||||
n for n in plan.nodes
|
||||
if n.is_reachable and n.name in configs['nodes']
|
||||
]
|
||||
remote_nodes = [n for n in active_nodes if not n.is_local]
|
||||
local_node = next((n for n in active_nodes if n.is_local), None)
|
||||
|
||||
# Phase 1: Copy staged interfaces on ALL nodes (no reload yet)
|
||||
for node in active_nodes:
|
||||
if dry_run:
|
||||
print(f" [{node.name}] Würde /etc/network/interfaces aktualisieren")
|
||||
print(f" [{node.name}] Würde 'ifreload -a' ausführen")
|
||||
continue
|
||||
|
||||
staging = "/root/.network-migration-staged/interfaces"
|
||||
|
|
@ -394,25 +405,63 @@ class Migrator:
|
|||
print(f" [{node.name}] FEHLER: {err}")
|
||||
return False
|
||||
|
||||
# Reload network - ifreload -a reloads ALL interfaces
|
||||
rc, _, _ = self.ssh.run_on_node(
|
||||
node.ssh_host, "which ifreload", node.is_local
|
||||
)
|
||||
if rc == 0:
|
||||
reload_cmd = "ifreload -a"
|
||||
else:
|
||||
# Fallback: restart networking service
|
||||
reload_cmd = "systemctl restart networking"
|
||||
if dry_run:
|
||||
for node in active_nodes:
|
||||
print(f" [{node.name}] Würde 'ifreload -a' ausführen")
|
||||
return True
|
||||
|
||||
print(f" [{node.name}] Netzwerk wird neu geladen ({reload_cmd})...")
|
||||
# Determine reload command
|
||||
rc, _, _ = self.ssh.execute_local("which ifreload")
|
||||
reload_cmd = "ifreload -a" if rc == 0 else "systemctl restart networking"
|
||||
|
||||
# Phase 2: Reload REMOTE nodes first (fire-and-forget with nohup)
|
||||
# The SSH connection will die when the remote network changes,
|
||||
# so we use nohup + delay to let the SSH command return first.
|
||||
for node in remote_nodes:
|
||||
# nohup with 2s delay: SSH returns immediately, then network reloads
|
||||
bg_cmd = (
|
||||
f"nohup bash -c 'sleep 2 && {reload_cmd}' "
|
||||
f">/tmp/ifreload.log 2>&1 &"
|
||||
)
|
||||
print(f" [{node.name}] Netzwerk-Reload geplant (fire-and-forget)...")
|
||||
self.ssh.run_on_node(node.ssh_host, bg_cmd, False, timeout=10)
|
||||
print(f" [{node.name}] {reload_cmd} wird in 2s ausgeführt")
|
||||
|
||||
# Phase 3: Reload LOCAL node last
|
||||
if local_node:
|
||||
print(f" [{local_node.name}] Netzwerk wird neu geladen ({reload_cmd})...")
|
||||
rc, _, err = self.ssh.run_on_node(
|
||||
node.ssh_host, reload_cmd, node.is_local, timeout=60
|
||||
local_node.ssh_host, reload_cmd, True, timeout=60
|
||||
)
|
||||
if rc == 0:
|
||||
print(f" [{node.name}] Netzwerk neu geladen")
|
||||
print(f" [{local_node.name}] Netzwerk neu geladen")
|
||||
else:
|
||||
print(f" [{node.name}] WARNUNG beim Netzwerk-Reload: {err}")
|
||||
# Don't fail here - the node might just be unreachable on old IP now
|
||||
print(f" [{local_node.name}] WARNUNG beim Netzwerk-Reload: {err}")
|
||||
|
||||
# Wait for remote nodes to finish their reload
|
||||
if remote_nodes:
|
||||
wait_secs = 8
|
||||
print(f"\n Warte {wait_secs}s bis alle Remote-Nodes ihr Netzwerk neu geladen haben...")
|
||||
time.sleep(wait_secs)
|
||||
|
||||
# Verify: try to reach remote nodes on NEW IPs (with retries)
|
||||
print(" [Verifikation] Prüfe Erreichbarkeit auf neuen IPs...")
|
||||
for node in remote_nodes:
|
||||
if not node.new_ip:
|
||||
continue
|
||||
reachable = False
|
||||
for attempt in range(3):
|
||||
reachable = self.ssh.is_reachable(node.new_ip)
|
||||
if reachable:
|
||||
break
|
||||
if attempt < 2:
|
||||
print(f" [{node.name}] {node.new_ip} noch nicht erreichbar, warte 5s...")
|
||||
time.sleep(5)
|
||||
if reachable:
|
||||
print(f" [{node.name}] {node.new_ip} erreichbar")
|
||||
else:
|
||||
print(f" [{node.name}] {node.new_ip} NICHT erreichbar nach 3 Versuchen!")
|
||||
print(f" [{node.name}] WARNUNG: Service-Start auf diesem Node könnte fehlschlagen")
|
||||
|
||||
return True
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue