fixed mon map, altered first and only copied to other nodes

This commit is contained in:
duffyduck 2026-03-05 00:28:53 +01:00
parent aebad45615
commit 7674656bf2
1 changed files with 226 additions and 68 deletions

View File

@ -438,22 +438,29 @@ class Migrator:
self._update_ceph_mon_map(plan) self._update_ceph_mon_map(plan)
# Restart Ceph services # Restart Ceph services
# Note: first MON is already running (started during monmap update)
print("\n [Ceph] Services neu starten...") print("\n [Ceph] Services neu starten...")
first_started = False
for node in plan.nodes: for node in plan.nodes:
if not node.is_reachable: if not node.is_reachable:
continue continue
new_host = node.new_ip if not node.is_local else node.ssh_host new_host = node.new_ip if not node.is_local else node.ssh_host
# Start MON (already stopped by monmap update) if not first_started:
rc, _, err = self.ssh.run_on_node( # First node's MON was already started during monmap update
new_host, first_started = True
f"systemctl start ceph-mon@{node.name} 2>/dev/null", print(f" [{node.name}] ceph-mon läuft bereits (Primary)")
node.is_local, timeout=30,
)
if rc == 0:
print(f" [{node.name}] ceph-mon gestartet")
else: else:
print(f" [{node.name}] WARNUNG ceph-mon: {err}") # Start MON on remaining nodes
rc, _, err = self.ssh.run_on_node(
new_host,
f"systemctl start ceph-mon@{node.name} 2>/dev/null",
node.is_local, timeout=30,
)
if rc == 0:
print(f" [{node.name}] ceph-mon gestartet")
else:
print(f" [{node.name}] WARNUNG ceph-mon: {err}")
# Restart MGR # Restart MGR
self.ssh.run_on_node( self.ssh.run_on_node(
@ -467,6 +474,12 @@ class Migrator:
"systemctl restart ceph-osd.target 2>/dev/null", "systemctl restart ceph-osd.target 2>/dev/null",
node.is_local, timeout=60, node.is_local, timeout=60,
) )
# Restart MDS if present (CephFS metadata server)
self.ssh.run_on_node(
new_host,
f"systemctl restart ceph-mds@{node.name} 2>/dev/null",
node.is_local, timeout=30,
)
print(f" [{node.name}] Ceph-Services gestartet") print(f" [{node.name}] Ceph-Services gestartet")
def _update_ceph_direct(self, plan: MigrationPlan, configs: dict): def _update_ceph_direct(self, plan: MigrationPlan, configs: dict):
@ -491,12 +504,9 @@ class Migrator:
When MON IPs change, the internal monmap (stored in MON's RocksDB) When MON IPs change, the internal monmap (stored in MON's RocksDB)
must be explicitly updated. Just updating ceph.conf is NOT enough. must be explicitly updated. Just updating ceph.conf is NOT enough.
Steps per node: Strategy: Update monmap on the FIRST node, start its MON, then get
1. Stop ceph-mon the authoritative monmap from the running MON and inject it into all
2. Extract monmap from MON database remaining nodes. This avoids epoch mismatches between nodes.
3. Remove all old MON entries
4. Re-add MON entries with new IPs (msgr2 + msgr1)
5. Reinject updated monmap
""" """
ip_mapping = {n.current_ip: n.new_ip for n in plan.nodes if n.new_ip} ip_mapping = {n.current_ip: n.new_ip for n in plan.nodes if n.new_ip}
@ -506,18 +516,22 @@ class Migrator:
# Build the list of MON nodes with their new IPs # Build the list of MON nodes with their new IPs
mon_nodes = [] mon_nodes = []
reachable_nodes = []
for node in plan.nodes: for node in plan.nodes:
if not node.is_reachable: if not node.is_reachable:
continue continue
new_ip = node.new_ip or node.current_ip new_ip = node.new_ip or node.current_ip
mon_nodes.append((node.name, new_ip)) mon_nodes.append((node.name, new_ip))
reachable_nodes.append(node)
if not reachable_nodes:
print(" [Ceph] Keine erreichbaren Nodes für MON-Map Update")
return
print("\n [Ceph] MON-Map aktualisieren...") print("\n [Ceph] MON-Map aktualisieren...")
# Stop ceph-mon on all nodes first # Stop ceph-mon on all nodes first
for node in plan.nodes: for node in reachable_nodes:
if not node.is_reachable:
continue
new_host = node.new_ip if not node.is_local else node.ssh_host new_host = node.new_ip if not node.is_local else node.ssh_host
self.ssh.run_on_node( self.ssh.run_on_node(
new_host, new_host,
@ -526,66 +540,210 @@ class Migrator:
) )
print(f" [{node.name}] ceph-mon gestoppt") print(f" [{node.name}] ceph-mon gestoppt")
# Update monmap on each node # --- Phase 1: Update monmap on the FIRST node ---
for node in plan.nodes: first_node = reachable_nodes[0]
if not node.is_reachable: first_host = first_node.new_ip if not first_node.is_local else first_node.ssh_host
continue remaining_nodes = reachable_nodes[1:]
new_host = node.new_ip if not node.is_local else node.ssh_host
# Extract current monmap print(f"\n [{first_node.name}] Erstelle neue MON-Map (Primary)...")
# Extract current monmap from first node
rc, _, err = self.ssh.run_on_node(
first_host,
f"ceph-mon -i {first_node.name} --extract-monmap /tmp/monmap",
first_node.is_local, timeout=30,
)
if rc != 0:
print(f" [{first_node.name}] FEHLER: monmap extrahieren fehlgeschlagen: {err}")
return
# Show current monmap
self.ssh.run_on_node(
first_host, "monmaptool --print /tmp/monmap",
first_node.is_local, timeout=10,
)
# Remove all existing MON entries
for mon_name, _ in mon_nodes:
self.ssh.run_on_node(
first_host,
f"monmaptool --rm {mon_name} /tmp/monmap 2>/dev/null",
first_node.is_local, timeout=10,
)
# Re-add all MON entries with new IPs (msgr2 on 3300 + msgr1 on 6789)
for mon_name, new_ip in mon_nodes:
rc, _, err = self.ssh.run_on_node( rc, _, err = self.ssh.run_on_node(
new_host, first_host,
f"ceph-mon -i {node.name} --extract-monmap /tmp/monmap", f"monmaptool --addv {mon_name} "
node.is_local, timeout=30, f"[v2:{new_ip}:3300/0,v1:{new_ip}:6789/0] /tmp/monmap",
first_node.is_local, timeout=10,
) )
if rc != 0: if rc != 0:
print(f" [{node.name}] WARNUNG: monmap extrahieren fehlgeschlagen: {err}") # Fallback: try legacy --add (older Ceph versions)
print(f" [{node.name}] Überspringe MON-Map Update")
continue
# Print current monmap for debugging
self.ssh.run_on_node(
new_host,
"monmaptool --print /tmp/monmap",
node.is_local, timeout=10,
)
# Remove all existing MON entries
for mon_name, _ in mon_nodes:
self.ssh.run_on_node( self.ssh.run_on_node(
new_host, first_host,
f"monmaptool --rm {mon_name} /tmp/monmap 2>/dev/null", f"monmaptool --add {mon_name} {new_ip}:6789 /tmp/monmap",
node.is_local, timeout=10, first_node.is_local, timeout=10,
) )
# Re-add all MON entries with new IPs (msgr2 on 3300 + msgr1 on 6789) # Show updated monmap
for mon_name, new_ip in mon_nodes: print(f" [{first_node.name}] Neue MON-Map:")
self.ssh.run_on_node(
first_host, "monmaptool --print /tmp/monmap",
first_node.is_local, timeout=10,
)
# Inject into first node
rc, _, err = self.ssh.run_on_node(
first_host,
f"ceph-mon -i {first_node.name} --inject-monmap /tmp/monmap",
first_node.is_local, timeout=30,
)
if rc == 0:
print(f" [{first_node.name}] MON-Map injiziert")
else:
print(f" [{first_node.name}] FEHLER MON-Map reinject: {err}")
self.ssh.run_on_node(first_host, "rm -f /tmp/monmap", first_node.is_local)
return
# Start first MON so we can get the authoritative map
print(f" [{first_node.name}] Starte ceph-mon (Primary)...")
self.ssh.run_on_node(
first_host,
f"systemctl start ceph-mon@{first_node.name}",
first_node.is_local, timeout=30,
)
# Give it a moment to initialize
time.sleep(3)
# --- Phase 2: Get authoritative monmap from running MON ---
if remaining_nodes:
print(f"\n Hole autoritative MON-Map vom laufenden MON...")
rc, _, err = self.ssh.run_on_node(
first_host,
"ceph mon getmap -o /tmp/monmap_auth",
first_node.is_local, timeout=30,
)
if rc == 0:
# Use authoritative map from running MON
monmap_path = "/tmp/monmap_auth"
print(f" Autoritative MON-Map erhalten")
else:
# Fallback: use the manually built map
print(f" WARNUNG: Konnte autoritative Map nicht holen ({err})")
print(f" Verwende manuell erstellte Map als Fallback")
monmap_path = "/tmp/monmap"
# --- Phase 3: Inject authoritative map into remaining nodes ---
for node in remaining_nodes:
new_host = node.new_ip if not node.is_local else node.ssh_host
# Copy monmap from first node to this node via SSH
if first_node.is_local:
# First node is local: SCP the map to remote node
rc, _, err = self.ssh.execute_local(
f"sshpass -p '{self.ssh.ssh_password}' "
f"scp -o StrictHostKeyChecking=no "
f"-o PubkeyAuthentication=no "
f"-P {self.ssh.ssh_port} "
f"{monmap_path} "
f"{self.ssh.ssh_user}@{new_host}:/tmp/monmap",
timeout=30,
)
elif node.is_local:
# This node is local: SCP from remote first node
rc, _, err = self.ssh.execute_local(
f"sshpass -p '{self.ssh.ssh_password}' "
f"scp -o StrictHostKeyChecking=no "
f"-o PubkeyAuthentication=no "
f"-P {self.ssh.ssh_port} "
f"{self.ssh.ssh_user}@{first_host}:{monmap_path} "
f"/tmp/monmap",
timeout=30,
)
else:
# Both remote: read from first, write to second
rc_read, stdout, _ = self.ssh.execute(
first_host, f"base64 {monmap_path}", timeout=30,
)
if rc_read == 0:
rc, _, err = self.ssh.execute(
new_host,
f"echo '{stdout.strip()}' | base64 -d > /tmp/monmap",
timeout=30,
)
else:
rc = -1
err = "Konnte monmap nicht vom Primary lesen"
if rc != 0:
print(f" [{node.name}] WARNUNG: monmap kopieren fehlgeschlagen: {err}")
print(f" [{node.name}] Erstelle Map manuell als Fallback...")
# Fallback: build map manually on this node
self._update_monmap_manual(node, new_host, mon_nodes)
continue
# Inject monmap
rc, _, err = self.ssh.run_on_node( rc, _, err = self.ssh.run_on_node(
new_host, new_host,
f"monmaptool --addv {mon_name} " f"ceph-mon -i {node.name} --inject-monmap /tmp/monmap",
f"[v2:{new_ip}:3300/0,v1:{new_ip}:6789/0] /tmp/monmap", node.is_local, timeout=30,
)
if rc == 0:
print(f" [{node.name}] Autoritative MON-Map injiziert")
else:
print(f" [{node.name}] FEHLER MON-Map reinject: {err}")
# Cleanup
self.ssh.run_on_node(new_host, "rm -f /tmp/monmap", node.is_local)
# Cleanup on first node
self.ssh.run_on_node(
first_host, "rm -f /tmp/monmap /tmp/monmap_auth",
first_node.is_local,
)
def _update_monmap_manual(self, node, host: str, mon_nodes: list):
"""Fallback: manually build and inject monmap on a single node."""
rc, _, err = self.ssh.run_on_node(
host,
f"ceph-mon -i {node.name} --extract-monmap /tmp/monmap",
node.is_local, timeout=30,
)
if rc != 0:
print(f" [{node.name}] FEHLER: monmap extrahieren fehlgeschlagen")
return
for mon_name, _ in mon_nodes:
self.ssh.run_on_node(
host,
f"monmaptool --rm {mon_name} /tmp/monmap 2>/dev/null",
node.is_local, timeout=10,
)
for mon_name, new_ip in mon_nodes:
rc, _, _ = self.ssh.run_on_node(
host,
f"monmaptool --addv {mon_name} "
f"[v2:{new_ip}:3300/0,v1:{new_ip}:6789/0] /tmp/monmap",
node.is_local, timeout=10,
)
if rc != 0:
self.ssh.run_on_node(
host,
f"monmaptool --add {mon_name} {new_ip}:6789 /tmp/monmap",
node.is_local, timeout=10, node.is_local, timeout=10,
) )
if rc != 0:
# Fallback: try legacy --add (older Ceph versions)
self.ssh.run_on_node(
new_host,
f"monmaptool --add {mon_name} {new_ip}:6789 /tmp/monmap",
node.is_local, timeout=10,
)
# Reinject updated monmap rc, _, err = self.ssh.run_on_node(
rc, _, err = self.ssh.run_on_node( host,
new_host, f"ceph-mon -i {node.name} --inject-monmap /tmp/monmap",
f"ceph-mon -i {node.name} --inject-monmap /tmp/monmap", node.is_local, timeout=30,
node.is_local, timeout=30, )
) if rc == 0:
if rc == 0: print(f" [{node.name}] MON-Map manuell aktualisiert (Fallback)")
print(f" [{node.name}] MON-Map aktualisiert") else:
else: print(f" [{node.name}] FEHLER MON-Map reinject: {err}")
print(f" [{node.name}] FEHLER MON-Map reinject: {err}")
# Cleanup self.ssh.run_on_node(host, "rm -f /tmp/monmap", node.is_local)
self.ssh.run_on_node(
new_host, "rm -f /tmp/monmap", node.is_local,
)