fixed mon map, altered first and only copied to other nodes

This commit is contained in:
duffyduck 2026-03-05 00:28:53 +01:00
parent aebad45615
commit 7674656bf2
1 changed files with 226 additions and 68 deletions

View File

@ -438,22 +438,29 @@ class Migrator:
self._update_ceph_mon_map(plan)
# Restart Ceph services
# Note: first MON is already running (started during monmap update)
print("\n [Ceph] Services neu starten...")
first_started = False
for node in plan.nodes:
if not node.is_reachable:
continue
new_host = node.new_ip if not node.is_local else node.ssh_host
# Start MON (already stopped by monmap update)
rc, _, err = self.ssh.run_on_node(
new_host,
f"systemctl start ceph-mon@{node.name} 2>/dev/null",
node.is_local, timeout=30,
)
if rc == 0:
print(f" [{node.name}] ceph-mon gestartet")
if not first_started:
# First node's MON was already started during monmap update
first_started = True
print(f" [{node.name}] ceph-mon läuft bereits (Primary)")
else:
print(f" [{node.name}] WARNUNG ceph-mon: {err}")
# Start MON on remaining nodes
rc, _, err = self.ssh.run_on_node(
new_host,
f"systemctl start ceph-mon@{node.name} 2>/dev/null",
node.is_local, timeout=30,
)
if rc == 0:
print(f" [{node.name}] ceph-mon gestartet")
else:
print(f" [{node.name}] WARNUNG ceph-mon: {err}")
# Restart MGR
self.ssh.run_on_node(
@ -467,6 +474,12 @@ class Migrator:
"systemctl restart ceph-osd.target 2>/dev/null",
node.is_local, timeout=60,
)
# Restart MDS if present (CephFS metadata server)
self.ssh.run_on_node(
new_host,
f"systemctl restart ceph-mds@{node.name} 2>/dev/null",
node.is_local, timeout=30,
)
print(f" [{node.name}] Ceph-Services gestartet")
def _update_ceph_direct(self, plan: MigrationPlan, configs: dict):
@ -491,12 +504,9 @@ class Migrator:
When MON IPs change, the internal monmap (stored in MON's RocksDB)
must be explicitly updated. Just updating ceph.conf is NOT enough.
Steps per node:
1. Stop ceph-mon
2. Extract monmap from MON database
3. Remove all old MON entries
4. Re-add MON entries with new IPs (msgr2 + msgr1)
5. Reinject updated monmap
Strategy: Update monmap on the FIRST node, start its MON, then get
the authoritative monmap from the running MON and inject it into all
remaining nodes. This avoids epoch mismatches between nodes.
"""
ip_mapping = {n.current_ip: n.new_ip for n in plan.nodes if n.new_ip}
@ -506,18 +516,22 @@ class Migrator:
# Build the list of MON nodes with their new IPs
mon_nodes = []
reachable_nodes = []
for node in plan.nodes:
if not node.is_reachable:
continue
new_ip = node.new_ip or node.current_ip
mon_nodes.append((node.name, new_ip))
reachable_nodes.append(node)
if not reachable_nodes:
print(" [Ceph] Keine erreichbaren Nodes für MON-Map Update")
return
print("\n [Ceph] MON-Map aktualisieren...")
# Stop ceph-mon on all nodes first
for node in plan.nodes:
if not node.is_reachable:
continue
for node in reachable_nodes:
new_host = node.new_ip if not node.is_local else node.ssh_host
self.ssh.run_on_node(
new_host,
@ -526,66 +540,210 @@ class Migrator:
)
print(f" [{node.name}] ceph-mon gestoppt")
# Update monmap on each node
for node in plan.nodes:
if not node.is_reachable:
continue
new_host = node.new_ip if not node.is_local else node.ssh_host
# --- Phase 1: Update monmap on the FIRST node ---
first_node = reachable_nodes[0]
first_host = first_node.new_ip if not first_node.is_local else first_node.ssh_host
remaining_nodes = reachable_nodes[1:]
# Extract current monmap
print(f"\n [{first_node.name}] Erstelle neue MON-Map (Primary)...")
# Extract current monmap from first node
rc, _, err = self.ssh.run_on_node(
first_host,
f"ceph-mon -i {first_node.name} --extract-monmap /tmp/monmap",
first_node.is_local, timeout=30,
)
if rc != 0:
print(f" [{first_node.name}] FEHLER: monmap extrahieren fehlgeschlagen: {err}")
return
# Show current monmap
self.ssh.run_on_node(
first_host, "monmaptool --print /tmp/monmap",
first_node.is_local, timeout=10,
)
# Remove all existing MON entries
for mon_name, _ in mon_nodes:
self.ssh.run_on_node(
first_host,
f"monmaptool --rm {mon_name} /tmp/monmap 2>/dev/null",
first_node.is_local, timeout=10,
)
# Re-add all MON entries with new IPs (msgr2 on 3300 + msgr1 on 6789)
for mon_name, new_ip in mon_nodes:
rc, _, err = self.ssh.run_on_node(
new_host,
f"ceph-mon -i {node.name} --extract-monmap /tmp/monmap",
node.is_local, timeout=30,
first_host,
f"monmaptool --addv {mon_name} "
f"[v2:{new_ip}:3300/0,v1:{new_ip}:6789/0] /tmp/monmap",
first_node.is_local, timeout=10,
)
if rc != 0:
print(f" [{node.name}] WARNUNG: monmap extrahieren fehlgeschlagen: {err}")
print(f" [{node.name}] Überspringe MON-Map Update")
continue
# Print current monmap for debugging
self.ssh.run_on_node(
new_host,
"monmaptool --print /tmp/monmap",
node.is_local, timeout=10,
)
# Remove all existing MON entries
for mon_name, _ in mon_nodes:
# Fallback: try legacy --add (older Ceph versions)
self.ssh.run_on_node(
new_host,
f"monmaptool --rm {mon_name} /tmp/monmap 2>/dev/null",
node.is_local, timeout=10,
first_host,
f"monmaptool --add {mon_name} {new_ip}:6789 /tmp/monmap",
first_node.is_local, timeout=10,
)
# Re-add all MON entries with new IPs (msgr2 on 3300 + msgr1 on 6789)
for mon_name, new_ip in mon_nodes:
# Show updated monmap
print(f" [{first_node.name}] Neue MON-Map:")
self.ssh.run_on_node(
first_host, "monmaptool --print /tmp/monmap",
first_node.is_local, timeout=10,
)
# Inject into first node
rc, _, err = self.ssh.run_on_node(
first_host,
f"ceph-mon -i {first_node.name} --inject-monmap /tmp/monmap",
first_node.is_local, timeout=30,
)
if rc == 0:
print(f" [{first_node.name}] MON-Map injiziert")
else:
print(f" [{first_node.name}] FEHLER MON-Map reinject: {err}")
self.ssh.run_on_node(first_host, "rm -f /tmp/monmap", first_node.is_local)
return
# Start first MON so we can get the authoritative map
print(f" [{first_node.name}] Starte ceph-mon (Primary)...")
self.ssh.run_on_node(
first_host,
f"systemctl start ceph-mon@{first_node.name}",
first_node.is_local, timeout=30,
)
# Give it a moment to initialize
time.sleep(3)
# --- Phase 2: Get authoritative monmap from running MON ---
if remaining_nodes:
print(f"\n Hole autoritative MON-Map vom laufenden MON...")
rc, _, err = self.ssh.run_on_node(
first_host,
"ceph mon getmap -o /tmp/monmap_auth",
first_node.is_local, timeout=30,
)
if rc == 0:
# Use authoritative map from running MON
monmap_path = "/tmp/monmap_auth"
print(f" Autoritative MON-Map erhalten")
else:
# Fallback: use the manually built map
print(f" WARNUNG: Konnte autoritative Map nicht holen ({err})")
print(f" Verwende manuell erstellte Map als Fallback")
monmap_path = "/tmp/monmap"
# --- Phase 3: Inject authoritative map into remaining nodes ---
for node in remaining_nodes:
new_host = node.new_ip if not node.is_local else node.ssh_host
# Copy monmap from first node to this node via SSH
if first_node.is_local:
# First node is local: SCP the map to remote node
rc, _, err = self.ssh.execute_local(
f"sshpass -p '{self.ssh.ssh_password}' "
f"scp -o StrictHostKeyChecking=no "
f"-o PubkeyAuthentication=no "
f"-P {self.ssh.ssh_port} "
f"{monmap_path} "
f"{self.ssh.ssh_user}@{new_host}:/tmp/monmap",
timeout=30,
)
elif node.is_local:
# This node is local: SCP from remote first node
rc, _, err = self.ssh.execute_local(
f"sshpass -p '{self.ssh.ssh_password}' "
f"scp -o StrictHostKeyChecking=no "
f"-o PubkeyAuthentication=no "
f"-P {self.ssh.ssh_port} "
f"{self.ssh.ssh_user}@{first_host}:{monmap_path} "
f"/tmp/monmap",
timeout=30,
)
else:
# Both remote: read from first, write to second
rc_read, stdout, _ = self.ssh.execute(
first_host, f"base64 {monmap_path}", timeout=30,
)
if rc_read == 0:
rc, _, err = self.ssh.execute(
new_host,
f"echo '{stdout.strip()}' | base64 -d > /tmp/monmap",
timeout=30,
)
else:
rc = -1
err = "Konnte monmap nicht vom Primary lesen"
if rc != 0:
print(f" [{node.name}] WARNUNG: monmap kopieren fehlgeschlagen: {err}")
print(f" [{node.name}] Erstelle Map manuell als Fallback...")
# Fallback: build map manually on this node
self._update_monmap_manual(node, new_host, mon_nodes)
continue
# Inject monmap
rc, _, err = self.ssh.run_on_node(
new_host,
f"monmaptool --addv {mon_name} "
f"[v2:{new_ip}:3300/0,v1:{new_ip}:6789/0] /tmp/monmap",
f"ceph-mon -i {node.name} --inject-monmap /tmp/monmap",
node.is_local, timeout=30,
)
if rc == 0:
print(f" [{node.name}] Autoritative MON-Map injiziert")
else:
print(f" [{node.name}] FEHLER MON-Map reinject: {err}")
# Cleanup
self.ssh.run_on_node(new_host, "rm -f /tmp/monmap", node.is_local)
# Cleanup on first node
self.ssh.run_on_node(
first_host, "rm -f /tmp/monmap /tmp/monmap_auth",
first_node.is_local,
)
def _update_monmap_manual(self, node, host: str, mon_nodes: list):
"""Fallback: manually build and inject monmap on a single node."""
rc, _, err = self.ssh.run_on_node(
host,
f"ceph-mon -i {node.name} --extract-monmap /tmp/monmap",
node.is_local, timeout=30,
)
if rc != 0:
print(f" [{node.name}] FEHLER: monmap extrahieren fehlgeschlagen")
return
for mon_name, _ in mon_nodes:
self.ssh.run_on_node(
host,
f"monmaptool --rm {mon_name} /tmp/monmap 2>/dev/null",
node.is_local, timeout=10,
)
for mon_name, new_ip in mon_nodes:
rc, _, _ = self.ssh.run_on_node(
host,
f"monmaptool --addv {mon_name} "
f"[v2:{new_ip}:3300/0,v1:{new_ip}:6789/0] /tmp/monmap",
node.is_local, timeout=10,
)
if rc != 0:
self.ssh.run_on_node(
host,
f"monmaptool --add {mon_name} {new_ip}:6789 /tmp/monmap",
node.is_local, timeout=10,
)
if rc != 0:
# Fallback: try legacy --add (older Ceph versions)
self.ssh.run_on_node(
new_host,
f"monmaptool --add {mon_name} {new_ip}:6789 /tmp/monmap",
node.is_local, timeout=10,
)
# Reinject updated monmap
rc, _, err = self.ssh.run_on_node(
new_host,
f"ceph-mon -i {node.name} --inject-monmap /tmp/monmap",
node.is_local, timeout=30,
)
if rc == 0:
print(f" [{node.name}] MON-Map aktualisiert")
else:
print(f" [{node.name}] FEHLER MON-Map reinject: {err}")
rc, _, err = self.ssh.run_on_node(
host,
f"ceph-mon -i {node.name} --inject-monmap /tmp/monmap",
node.is_local, timeout=30,
)
if rc == 0:
print(f" [{node.name}] MON-Map manuell aktualisiert (Fallback)")
else:
print(f" [{node.name}] FEHLER MON-Map reinject: {err}")
# Cleanup
self.ssh.run_on_node(
new_host, "rm -f /tmp/monmap", node.is_local,
)
self.ssh.run_on_node(host, "rm -f /tmp/monmap", node.is_local)