proxmox-cluster-network-cha.../verifier.py

113 lines
4.0 KiB
Python

"""Phase 5: Verify the migration was successful."""
import time
from models import MigrationPlan
from ssh_manager import SSHManager
class Verifier:
"""Verifies the cluster state after migration."""
def __init__(self, ssh: SSHManager):
self.ssh = ssh
def run(self, plan: MigrationPlan) -> bool:
"""Run all verification checks."""
print("\n=== Phase 5: Verifikation ===\n")
all_ok = True
# Check node reachability on new IPs
print("[Node-Erreichbarkeit (neue IPs)]")
for node in plan.nodes:
if not node.new_ip:
continue
if node.is_local:
# Check local IP
rc, stdout, _ = self.ssh.execute_local(
f"ip addr show | grep -q '{node.new_ip}'"
)
reachable = rc == 0
else:
reachable = self.ssh.is_reachable(node.new_ip)
status = "OK" if reachable else "FEHLER"
print(f" {node.name} ({node.new_ip}): {status}")
if not reachable:
all_ok = False
# Check cluster status
print("\n[Cluster Status]")
rc, stdout, _ = self.ssh.execute_local("pvecm status 2>/dev/null")
if rc == 0:
# Extract relevant info
for line in stdout.split('\n'):
line = line.strip()
if any(k in line for k in ['Quorate:', 'Nodes:', 'Node name',
'Total votes', 'Expected votes']):
print(f" {line}")
if "Quorate: Yes" not in stdout:
print(" [!] WARNUNG: Cluster hat KEIN Quorum!")
all_ok = False
else:
print(" [!] pvecm status fehlgeschlagen")
all_ok = False
# Check corosync members
print("\n[Corosync Members]")
rc, stdout, _ = self.ssh.execute_local("corosync-cmapctl 2>/dev/null | grep 'ip(' || true")
if rc == 0 and stdout.strip():
for line in stdout.strip().split('\n'):
print(f" {line.strip()}")
else:
print(" Keine Corosync-Member-Info verfügbar")
# Check Ceph if it was configured
if plan.ceph_config:
print("\n[Ceph Status]")
rc, stdout, _ = self.ssh.execute_local("ceph -s 2>/dev/null")
if rc == 0:
for line in stdout.split('\n'):
line = line.strip()
if line:
print(f" {line}")
else:
print(" [!] ceph -s fehlgeschlagen")
all_ok = False
print("\n[Ceph MON Status]")
rc, stdout, _ = self.ssh.execute_local("ceph mon stat 2>/dev/null")
if rc == 0:
print(f" {stdout.strip()}")
else:
print(" [!] ceph mon stat fehlgeschlagen")
print("\n[Ceph OSD Status]")
rc, stdout, _ = self.ssh.execute_local("ceph osd tree 2>/dev/null")
if rc == 0:
for line in stdout.split('\n')[:20]: # First 20 lines
if line.strip():
print(f" {line}")
# Summary
print("\n" + "=" * 60)
if all_ok:
print(" MIGRATION ERFOLGREICH!")
print(" Alle Checks bestanden.")
else:
print(" MIGRATION MIT WARNUNGEN ABGESCHLOSSEN")
print(" Einige Checks sind fehlgeschlagen. Bitte manuell prüfen!")
print("=" * 60)
# Suggest next steps
print("\n[Empfohlene nächste Schritte]")
print(" 1. VMs/CTs auf allen Nodes prüfen: qm list / pct list")
print(" 2. Live-Migration testen: qm migrate <vmid> <target>")
print(" 3. Ceph Recovery abwarten: ceph -w")
if not all_ok:
print(" 4. Bei Problemen Backup wiederherstellen:")
print(" ls /root/network-migration-backup-*/")
return all_ok