diff --git a/migrator.py b/migrator.py index 31d3704..db10ac1 100644 --- a/migrator.py +++ b/migrator.py @@ -438,22 +438,29 @@ class Migrator: self._update_ceph_mon_map(plan) # Restart Ceph services + # Note: first MON is already running (started during monmap update) print("\n [Ceph] Services neu starten...") + first_started = False for node in plan.nodes: if not node.is_reachable: continue new_host = node.new_ip if not node.is_local else node.ssh_host - # Start MON (already stopped by monmap update) - rc, _, err = self.ssh.run_on_node( - new_host, - f"systemctl start ceph-mon@{node.name} 2>/dev/null", - node.is_local, timeout=30, - ) - if rc == 0: - print(f" [{node.name}] ceph-mon gestartet") + if not first_started: + # First node's MON was already started during monmap update + first_started = True + print(f" [{node.name}] ceph-mon läuft bereits (Primary)") else: - print(f" [{node.name}] WARNUNG ceph-mon: {err}") + # Start MON on remaining nodes + rc, _, err = self.ssh.run_on_node( + new_host, + f"systemctl start ceph-mon@{node.name} 2>/dev/null", + node.is_local, timeout=30, + ) + if rc == 0: + print(f" [{node.name}] ceph-mon gestartet") + else: + print(f" [{node.name}] WARNUNG ceph-mon: {err}") # Restart MGR self.ssh.run_on_node( @@ -467,6 +474,12 @@ class Migrator: "systemctl restart ceph-osd.target 2>/dev/null", node.is_local, timeout=60, ) + # Restart MDS if present (CephFS metadata server) + self.ssh.run_on_node( + new_host, + f"systemctl restart ceph-mds@{node.name} 2>/dev/null", + node.is_local, timeout=30, + ) print(f" [{node.name}] Ceph-Services gestartet") def _update_ceph_direct(self, plan: MigrationPlan, configs: dict): @@ -491,12 +504,9 @@ class Migrator: When MON IPs change, the internal monmap (stored in MON's RocksDB) must be explicitly updated. Just updating ceph.conf is NOT enough. - Steps per node: - 1. Stop ceph-mon - 2. Extract monmap from MON database - 3. Remove all old MON entries - 4. Re-add MON entries with new IPs (msgr2 + msgr1) - 5. Reinject updated monmap + Strategy: Update monmap on the FIRST node, start its MON, then get + the authoritative monmap from the running MON and inject it into all + remaining nodes. This avoids epoch mismatches between nodes. """ ip_mapping = {n.current_ip: n.new_ip for n in plan.nodes if n.new_ip} @@ -506,18 +516,22 @@ class Migrator: # Build the list of MON nodes with their new IPs mon_nodes = [] + reachable_nodes = [] for node in plan.nodes: if not node.is_reachable: continue new_ip = node.new_ip or node.current_ip mon_nodes.append((node.name, new_ip)) + reachable_nodes.append(node) + + if not reachable_nodes: + print(" [Ceph] Keine erreichbaren Nodes für MON-Map Update") + return print("\n [Ceph] MON-Map aktualisieren...") # Stop ceph-mon on all nodes first - for node in plan.nodes: - if not node.is_reachable: - continue + for node in reachable_nodes: new_host = node.new_ip if not node.is_local else node.ssh_host self.ssh.run_on_node( new_host, @@ -526,66 +540,210 @@ class Migrator: ) print(f" [{node.name}] ceph-mon gestoppt") - # Update monmap on each node - for node in plan.nodes: - if not node.is_reachable: - continue - new_host = node.new_ip if not node.is_local else node.ssh_host + # --- Phase 1: Update monmap on the FIRST node --- + first_node = reachable_nodes[0] + first_host = first_node.new_ip if not first_node.is_local else first_node.ssh_host + remaining_nodes = reachable_nodes[1:] - # Extract current monmap + print(f"\n [{first_node.name}] Erstelle neue MON-Map (Primary)...") + + # Extract current monmap from first node + rc, _, err = self.ssh.run_on_node( + first_host, + f"ceph-mon -i {first_node.name} --extract-monmap /tmp/monmap", + first_node.is_local, timeout=30, + ) + if rc != 0: + print(f" [{first_node.name}] FEHLER: monmap extrahieren fehlgeschlagen: {err}") + return + + # Show current monmap + self.ssh.run_on_node( + first_host, "monmaptool --print /tmp/monmap", + first_node.is_local, timeout=10, + ) + + # Remove all existing MON entries + for mon_name, _ in mon_nodes: + self.ssh.run_on_node( + first_host, + f"monmaptool --rm {mon_name} /tmp/monmap 2>/dev/null", + first_node.is_local, timeout=10, + ) + + # Re-add all MON entries with new IPs (msgr2 on 3300 + msgr1 on 6789) + for mon_name, new_ip in mon_nodes: rc, _, err = self.ssh.run_on_node( - new_host, - f"ceph-mon -i {node.name} --extract-monmap /tmp/monmap", - node.is_local, timeout=30, + first_host, + f"monmaptool --addv {mon_name} " + f"[v2:{new_ip}:3300/0,v1:{new_ip}:6789/0] /tmp/monmap", + first_node.is_local, timeout=10, ) if rc != 0: - print(f" [{node.name}] WARNUNG: monmap extrahieren fehlgeschlagen: {err}") - print(f" [{node.name}] Überspringe MON-Map Update") - continue - - # Print current monmap for debugging - self.ssh.run_on_node( - new_host, - "monmaptool --print /tmp/monmap", - node.is_local, timeout=10, - ) - - # Remove all existing MON entries - for mon_name, _ in mon_nodes: + # Fallback: try legacy --add (older Ceph versions) self.ssh.run_on_node( - new_host, - f"monmaptool --rm {mon_name} /tmp/monmap 2>/dev/null", - node.is_local, timeout=10, + first_host, + f"monmaptool --add {mon_name} {new_ip}:6789 /tmp/monmap", + first_node.is_local, timeout=10, ) - # Re-add all MON entries with new IPs (msgr2 on 3300 + msgr1 on 6789) - for mon_name, new_ip in mon_nodes: + # Show updated monmap + print(f" [{first_node.name}] Neue MON-Map:") + self.ssh.run_on_node( + first_host, "monmaptool --print /tmp/monmap", + first_node.is_local, timeout=10, + ) + + # Inject into first node + rc, _, err = self.ssh.run_on_node( + first_host, + f"ceph-mon -i {first_node.name} --inject-monmap /tmp/monmap", + first_node.is_local, timeout=30, + ) + if rc == 0: + print(f" [{first_node.name}] MON-Map injiziert") + else: + print(f" [{first_node.name}] FEHLER MON-Map reinject: {err}") + self.ssh.run_on_node(first_host, "rm -f /tmp/monmap", first_node.is_local) + return + + # Start first MON so we can get the authoritative map + print(f" [{first_node.name}] Starte ceph-mon (Primary)...") + self.ssh.run_on_node( + first_host, + f"systemctl start ceph-mon@{first_node.name}", + first_node.is_local, timeout=30, + ) + # Give it a moment to initialize + time.sleep(3) + + # --- Phase 2: Get authoritative monmap from running MON --- + if remaining_nodes: + print(f"\n Hole autoritative MON-Map vom laufenden MON...") + rc, _, err = self.ssh.run_on_node( + first_host, + "ceph mon getmap -o /tmp/monmap_auth", + first_node.is_local, timeout=30, + ) + + if rc == 0: + # Use authoritative map from running MON + monmap_path = "/tmp/monmap_auth" + print(f" Autoritative MON-Map erhalten") + else: + # Fallback: use the manually built map + print(f" WARNUNG: Konnte autoritative Map nicht holen ({err})") + print(f" Verwende manuell erstellte Map als Fallback") + monmap_path = "/tmp/monmap" + + # --- Phase 3: Inject authoritative map into remaining nodes --- + for node in remaining_nodes: + new_host = node.new_ip if not node.is_local else node.ssh_host + + # Copy monmap from first node to this node via SSH + if first_node.is_local: + # First node is local: SCP the map to remote node + rc, _, err = self.ssh.execute_local( + f"sshpass -p '{self.ssh.ssh_password}' " + f"scp -o StrictHostKeyChecking=no " + f"-o PubkeyAuthentication=no " + f"-P {self.ssh.ssh_port} " + f"{monmap_path} " + f"{self.ssh.ssh_user}@{new_host}:/tmp/monmap", + timeout=30, + ) + elif node.is_local: + # This node is local: SCP from remote first node + rc, _, err = self.ssh.execute_local( + f"sshpass -p '{self.ssh.ssh_password}' " + f"scp -o StrictHostKeyChecking=no " + f"-o PubkeyAuthentication=no " + f"-P {self.ssh.ssh_port} " + f"{self.ssh.ssh_user}@{first_host}:{monmap_path} " + f"/tmp/monmap", + timeout=30, + ) + else: + # Both remote: read from first, write to second + rc_read, stdout, _ = self.ssh.execute( + first_host, f"base64 {monmap_path}", timeout=30, + ) + if rc_read == 0: + rc, _, err = self.ssh.execute( + new_host, + f"echo '{stdout.strip()}' | base64 -d > /tmp/monmap", + timeout=30, + ) + else: + rc = -1 + err = "Konnte monmap nicht vom Primary lesen" + + if rc != 0: + print(f" [{node.name}] WARNUNG: monmap kopieren fehlgeschlagen: {err}") + print(f" [{node.name}] Erstelle Map manuell als Fallback...") + # Fallback: build map manually on this node + self._update_monmap_manual(node, new_host, mon_nodes) + continue + + # Inject monmap rc, _, err = self.ssh.run_on_node( new_host, - f"monmaptool --addv {mon_name} " - f"[v2:{new_ip}:3300/0,v1:{new_ip}:6789/0] /tmp/monmap", + f"ceph-mon -i {node.name} --inject-monmap /tmp/monmap", + node.is_local, timeout=30, + ) + if rc == 0: + print(f" [{node.name}] Autoritative MON-Map injiziert") + else: + print(f" [{node.name}] FEHLER MON-Map reinject: {err}") + + # Cleanup + self.ssh.run_on_node(new_host, "rm -f /tmp/monmap", node.is_local) + + # Cleanup on first node + self.ssh.run_on_node( + first_host, "rm -f /tmp/monmap /tmp/monmap_auth", + first_node.is_local, + ) + + def _update_monmap_manual(self, node, host: str, mon_nodes: list): + """Fallback: manually build and inject monmap on a single node.""" + rc, _, err = self.ssh.run_on_node( + host, + f"ceph-mon -i {node.name} --extract-monmap /tmp/monmap", + node.is_local, timeout=30, + ) + if rc != 0: + print(f" [{node.name}] FEHLER: monmap extrahieren fehlgeschlagen") + return + + for mon_name, _ in mon_nodes: + self.ssh.run_on_node( + host, + f"monmaptool --rm {mon_name} /tmp/monmap 2>/dev/null", + node.is_local, timeout=10, + ) + for mon_name, new_ip in mon_nodes: + rc, _, _ = self.ssh.run_on_node( + host, + f"monmaptool --addv {mon_name} " + f"[v2:{new_ip}:3300/0,v1:{new_ip}:6789/0] /tmp/monmap", + node.is_local, timeout=10, + ) + if rc != 0: + self.ssh.run_on_node( + host, + f"monmaptool --add {mon_name} {new_ip}:6789 /tmp/monmap", node.is_local, timeout=10, ) - if rc != 0: - # Fallback: try legacy --add (older Ceph versions) - self.ssh.run_on_node( - new_host, - f"monmaptool --add {mon_name} {new_ip}:6789 /tmp/monmap", - node.is_local, timeout=10, - ) - # Reinject updated monmap - rc, _, err = self.ssh.run_on_node( - new_host, - f"ceph-mon -i {node.name} --inject-monmap /tmp/monmap", - node.is_local, timeout=30, - ) - if rc == 0: - print(f" [{node.name}] MON-Map aktualisiert") - else: - print(f" [{node.name}] FEHLER MON-Map reinject: {err}") + rc, _, err = self.ssh.run_on_node( + host, + f"ceph-mon -i {node.name} --inject-monmap /tmp/monmap", + node.is_local, timeout=30, + ) + if rc == 0: + print(f" [{node.name}] MON-Map manuell aktualisiert (Fallback)") + else: + print(f" [{node.name}] FEHLER MON-Map reinject: {err}") - # Cleanup - self.ssh.run_on_node( - new_host, "rm -f /tmp/monmap", node.is_local, - ) + self.ssh.run_on_node(host, "rm -f /tmp/monmap", node.is_local)