213 lines
6.8 KiB
Python
213 lines
6.8 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Proxmox Cluster Network Changer
|
|
|
|
Migriert ein Proxmox-Cluster (inkl. Ceph) von einem Netzwerk in ein anderes.
|
|
Behandelt Corosync, Ceph, /etc/network/interfaces und /etc/hosts.
|
|
|
|
Kann auch mit gebrochenem Quorum umgehen (z.B. wenn ein Node bereits
|
|
manuell geändert wurde).
|
|
|
|
Muss als root auf einem Proxmox-Node ausgeführt werden.
|
|
|
|
Verwendung:
|
|
python3 main.py # Interaktiver Modus
|
|
python3 main.py --dry-run # Nur anzeigen, nichts ändern
|
|
python3 main.py --discover # Nur Discovery, keine Migration
|
|
"""
|
|
|
|
import argparse
|
|
import os
|
|
import sys
|
|
|
|
from ssh_manager import SSHManager
|
|
from discovery import Discovery
|
|
from planner import Planner
|
|
from backup import Backup
|
|
from migrator import Migrator
|
|
from verifier import Verifier
|
|
from rescue import RescueNetwork
|
|
|
|
|
|
def check_prerequisites():
|
|
"""Check that we're running as root on a Proxmox node."""
|
|
if os.geteuid() != 0:
|
|
print("FEHLER: Dieses Tool muss als root ausgeführt werden!")
|
|
print("Bitte mit 'sudo python3 main.py' starten.")
|
|
sys.exit(1)
|
|
|
|
if not os.path.exists("/etc/pve") and not os.path.exists("/etc/corosync"):
|
|
print("WARNUNG: Dies scheint kein Proxmox-Node zu sein.")
|
|
print(" /etc/pve und /etc/corosync nicht gefunden.")
|
|
answer = input("Trotzdem fortfahren? [j/N]: ").strip().lower()
|
|
if answer not in ('j', 'ja', 'y', 'yes'):
|
|
sys.exit(0)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Proxmox Cluster Network Changer - "
|
|
"Migriert Cluster + Ceph in ein neues Netzwerk"
|
|
)
|
|
parser.add_argument(
|
|
"--dry-run", action="store_true",
|
|
help="Nur anzeigen was geändert würde, nichts ändern"
|
|
)
|
|
parser.add_argument(
|
|
"--discover", action="store_true",
|
|
help="Nur Discovery durchführen, keine Migration"
|
|
)
|
|
parser.add_argument(
|
|
"--ssh-key", type=str, default=None,
|
|
help="Pfad zum SSH-Key (Standard: Default SSH-Key)"
|
|
)
|
|
parser.add_argument(
|
|
"--ssh-port", type=int, default=22,
|
|
help="SSH-Port (Standard: 22)"
|
|
)
|
|
parser.add_argument(
|
|
"--rescue", action="store_true",
|
|
help="Rescue-Modus: Emergency-Netzwerk einrichten wenn Nodes "
|
|
"sich nicht erreichen können"
|
|
)
|
|
parser.add_argument(
|
|
"--rescue-commands", type=str, metavar="SUBNET",
|
|
help="Nur Rescue-Befehle ausgeben ohne Migration "
|
|
"(z.B. --rescue-commands 10.99.99.0/24)"
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
print("=" * 60)
|
|
print(" Proxmox Cluster Network Changer")
|
|
print("=" * 60)
|
|
|
|
check_prerequisites()
|
|
|
|
# Initialize SSH manager
|
|
ssh = SSHManager(ssh_key=args.ssh_key, ssh_port=args.ssh_port)
|
|
rescue = RescueNetwork(ssh)
|
|
|
|
# Quick mode: just print rescue commands and exit
|
|
if args.rescue_commands:
|
|
discovery = Discovery(ssh)
|
|
print("\n[Corosync]")
|
|
corosync = discovery.discover_corosync()
|
|
if not corosync:
|
|
print("\nFEHLER: Konnte Cluster-Konfiguration nicht lesen.")
|
|
sys.exit(1)
|
|
|
|
bridge_input = input(f"Bridge [{rescue.bridge}]: ").strip()
|
|
bridge = bridge_input or rescue.bridge
|
|
|
|
commands = rescue.get_rescue_commands(corosync, args.rescue_commands, bridge)
|
|
print()
|
|
print("=" * 60)
|
|
print(" RESCUE BEFEHLE")
|
|
print(f" Subnetz: {args.rescue_commands} | Bridge: {bridge}")
|
|
print("=" * 60)
|
|
print()
|
|
for cmd_info in commands:
|
|
print(f" {cmd_info['name']} ({cmd_info['current_ip']}):")
|
|
print(f" {cmd_info['command']}")
|
|
print()
|
|
print(" Zum Entfernen:")
|
|
for cmd_info in commands:
|
|
print(f" {cmd_info['remove_command']} # {cmd_info['name']}")
|
|
print()
|
|
sys.exit(0)
|
|
|
|
# Phase 1: Discovery
|
|
discovery = Discovery(ssh)
|
|
corosync, ceph, nodes, has_quorum = discovery.run()
|
|
|
|
if not corosync:
|
|
print("\nFEHLER: Konnte Cluster-Konfiguration nicht lesen. Abbruch.")
|
|
sys.exit(1)
|
|
|
|
# Check if rescue mode is needed
|
|
unreachable = [n for n in nodes if not n.is_reachable and not n.is_local]
|
|
use_rescue = args.rescue
|
|
|
|
if unreachable and not use_rescue:
|
|
print(f"\n {len(unreachable)} Node(s) nicht erreichbar.")
|
|
answer = input(" Rescue-Netzwerk einrichten? [J/n]: ").strip().lower()
|
|
if answer not in ('n', 'nein', 'no'):
|
|
use_rescue = True
|
|
|
|
if use_rescue:
|
|
rescue_nodes = rescue.setup_interactive(corosync)
|
|
if not rescue_nodes:
|
|
sys.exit(1)
|
|
# Re-run discovery with rescue IPs to read configs from all nodes
|
|
print("\n [Rescue] Lese Konfigurationen über Rescue-Netzwerk...")
|
|
nodes = discovery.discover_nodes_with_overrides(
|
|
corosync, rescue_nodes
|
|
)
|
|
# Re-check quorum
|
|
has_quorum = discovery.check_quorum()
|
|
# Re-read ceph
|
|
ceph = discovery.discover_ceph()
|
|
|
|
if args.discover:
|
|
if rescue.active:
|
|
rescue.cleanup(nodes)
|
|
print("\n--- Discovery abgeschlossen (--discover Modus) ---")
|
|
sys.exit(0)
|
|
|
|
# Phase 2: Planning
|
|
planner = Planner()
|
|
plan = planner.plan(nodes, corosync, ceph, has_quorum)
|
|
|
|
if not plan:
|
|
if rescue.active:
|
|
rescue.cleanup(nodes)
|
|
sys.exit(0)
|
|
|
|
plan.dry_run = args.dry_run
|
|
|
|
# Generate all new config files
|
|
configs = planner.generate_new_configs(plan)
|
|
|
|
# Phase 3: Backup (skip in dry-run)
|
|
if not args.dry_run:
|
|
backup = Backup(ssh)
|
|
if not backup.run(plan):
|
|
print("\nBackup fehlgeschlagen! Trotzdem fortfahren?")
|
|
answer = input("[j/N]: ").strip().lower()
|
|
if answer not in ('j', 'ja', 'y', 'yes'):
|
|
if rescue.active:
|
|
rescue.cleanup(nodes)
|
|
sys.exit(1)
|
|
else:
|
|
print("\n=== Phase 3: Backup (übersprungen im Dry-Run) ===")
|
|
|
|
# Phase 4: Migration
|
|
migrator = Migrator(ssh)
|
|
success = migrator.run(plan, configs, dry_run=args.dry_run)
|
|
|
|
if not success:
|
|
print("\n[!] Migration hatte Fehler!")
|
|
if not args.dry_run:
|
|
print(" Prüfe Backups in /root/network-migration-backup-*/")
|
|
if rescue.active:
|
|
rescue.cleanup(nodes)
|
|
sys.exit(1)
|
|
|
|
# Cleanup rescue network (before verification, so we verify real connectivity)
|
|
if rescue.active and not args.dry_run:
|
|
rescue.cleanup(nodes)
|
|
|
|
# Phase 5: Verification (skip in dry-run)
|
|
if not args.dry_run:
|
|
verifier = Verifier(ssh)
|
|
verifier.run(plan)
|
|
else:
|
|
if rescue.active:
|
|
rescue.cleanup(nodes)
|
|
print("\n=== Phase 5: Verifikation (übersprungen im Dry-Run) ===")
|
|
print("\nDry-Run abgeschlossen. Keine Änderungen vorgenommen.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|