changed order reload network on script node at last
This commit is contained in:
parent
1083fbb375
commit
c0e6f96498
87
migrator.py
87
migrator.py
|
|
@ -372,14 +372,25 @@ class Migrator:
|
||||||
|
|
||||||
def _update_network(self, plan: MigrationPlan, configs: dict,
|
def _update_network(self, plan: MigrationPlan, configs: dict,
|
||||||
dry_run: bool) -> bool:
|
dry_run: bool) -> bool:
|
||||||
"""Update /etc/network/interfaces and restart networking."""
|
"""Update /etc/network/interfaces and restart networking.
|
||||||
for node in plan.nodes:
|
|
||||||
if not node.is_reachable or node.name not in configs['nodes']:
|
|
||||||
continue
|
|
||||||
|
|
||||||
|
Strategy:
|
||||||
|
1. Copy staged interfaces to /etc/network/interfaces on ALL nodes first
|
||||||
|
2. Reload remote nodes with nohup + delay (fire-and-forget, SSH will die)
|
||||||
|
3. Reload local node LAST
|
||||||
|
This avoids cutting off our own SSH connectivity before reaching remote nodes.
|
||||||
|
"""
|
||||||
|
active_nodes = [
|
||||||
|
n for n in plan.nodes
|
||||||
|
if n.is_reachable and n.name in configs['nodes']
|
||||||
|
]
|
||||||
|
remote_nodes = [n for n in active_nodes if not n.is_local]
|
||||||
|
local_node = next((n for n in active_nodes if n.is_local), None)
|
||||||
|
|
||||||
|
# Phase 1: Copy staged interfaces on ALL nodes (no reload yet)
|
||||||
|
for node in active_nodes:
|
||||||
if dry_run:
|
if dry_run:
|
||||||
print(f" [{node.name}] Würde /etc/network/interfaces aktualisieren")
|
print(f" [{node.name}] Würde /etc/network/interfaces aktualisieren")
|
||||||
print(f" [{node.name}] Würde 'ifreload -a' ausführen")
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
staging = "/root/.network-migration-staged/interfaces"
|
staging = "/root/.network-migration-staged/interfaces"
|
||||||
|
|
@ -394,25 +405,63 @@ class Migrator:
|
||||||
print(f" [{node.name}] FEHLER: {err}")
|
print(f" [{node.name}] FEHLER: {err}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Reload network - ifreload -a reloads ALL interfaces
|
if dry_run:
|
||||||
rc, _, _ = self.ssh.run_on_node(
|
for node in active_nodes:
|
||||||
node.ssh_host, "which ifreload", node.is_local
|
print(f" [{node.name}] Würde 'ifreload -a' ausführen")
|
||||||
)
|
return True
|
||||||
if rc == 0:
|
|
||||||
reload_cmd = "ifreload -a"
|
|
||||||
else:
|
|
||||||
# Fallback: restart networking service
|
|
||||||
reload_cmd = "systemctl restart networking"
|
|
||||||
|
|
||||||
print(f" [{node.name}] Netzwerk wird neu geladen ({reload_cmd})...")
|
# Determine reload command
|
||||||
|
rc, _, _ = self.ssh.execute_local("which ifreload")
|
||||||
|
reload_cmd = "ifreload -a" if rc == 0 else "systemctl restart networking"
|
||||||
|
|
||||||
|
# Phase 2: Reload REMOTE nodes first (fire-and-forget with nohup)
|
||||||
|
# The SSH connection will die when the remote network changes,
|
||||||
|
# so we use nohup + delay to let the SSH command return first.
|
||||||
|
for node in remote_nodes:
|
||||||
|
# nohup with 2s delay: SSH returns immediately, then network reloads
|
||||||
|
bg_cmd = (
|
||||||
|
f"nohup bash -c 'sleep 2 && {reload_cmd}' "
|
||||||
|
f">/tmp/ifreload.log 2>&1 &"
|
||||||
|
)
|
||||||
|
print(f" [{node.name}] Netzwerk-Reload geplant (fire-and-forget)...")
|
||||||
|
self.ssh.run_on_node(node.ssh_host, bg_cmd, False, timeout=10)
|
||||||
|
print(f" [{node.name}] {reload_cmd} wird in 2s ausgeführt")
|
||||||
|
|
||||||
|
# Phase 3: Reload LOCAL node last
|
||||||
|
if local_node:
|
||||||
|
print(f" [{local_node.name}] Netzwerk wird neu geladen ({reload_cmd})...")
|
||||||
rc, _, err = self.ssh.run_on_node(
|
rc, _, err = self.ssh.run_on_node(
|
||||||
node.ssh_host, reload_cmd, node.is_local, timeout=60
|
local_node.ssh_host, reload_cmd, True, timeout=60
|
||||||
)
|
)
|
||||||
if rc == 0:
|
if rc == 0:
|
||||||
print(f" [{node.name}] Netzwerk neu geladen")
|
print(f" [{local_node.name}] Netzwerk neu geladen")
|
||||||
else:
|
else:
|
||||||
print(f" [{node.name}] WARNUNG beim Netzwerk-Reload: {err}")
|
print(f" [{local_node.name}] WARNUNG beim Netzwerk-Reload: {err}")
|
||||||
# Don't fail here - the node might just be unreachable on old IP now
|
|
||||||
|
# Wait for remote nodes to finish their reload
|
||||||
|
if remote_nodes:
|
||||||
|
wait_secs = 8
|
||||||
|
print(f"\n Warte {wait_secs}s bis alle Remote-Nodes ihr Netzwerk neu geladen haben...")
|
||||||
|
time.sleep(wait_secs)
|
||||||
|
|
||||||
|
# Verify: try to reach remote nodes on NEW IPs (with retries)
|
||||||
|
print(" [Verifikation] Prüfe Erreichbarkeit auf neuen IPs...")
|
||||||
|
for node in remote_nodes:
|
||||||
|
if not node.new_ip:
|
||||||
|
continue
|
||||||
|
reachable = False
|
||||||
|
for attempt in range(3):
|
||||||
|
reachable = self.ssh.is_reachable(node.new_ip)
|
||||||
|
if reachable:
|
||||||
|
break
|
||||||
|
if attempt < 2:
|
||||||
|
print(f" [{node.name}] {node.new_ip} noch nicht erreichbar, warte 5s...")
|
||||||
|
time.sleep(5)
|
||||||
|
if reachable:
|
||||||
|
print(f" [{node.name}] {node.new_ip} erreichbar")
|
||||||
|
else:
|
||||||
|
print(f" [{node.name}] {node.new_ip} NICHT erreichbar nach 3 Versuchen!")
|
||||||
|
print(f" [{node.name}] WARNUNG: Service-Start auf diesem Node könnte fehlschlagen")
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue