added mon map correction and injection
This commit is contained in:
parent
f072320ab9
commit
aebad45615
41
README.md
41
README.md
|
|
@ -285,6 +285,47 @@ systemctl restart corosync
|
|||
5. `journalctl -u corosync` — Corosync-Logs prüfen
|
||||
6. `journalctl -u pve-cluster` — pmxcfs-Logs prüfen
|
||||
|
||||
### Workaround: Ceph MON-Map manuell aktualisieren
|
||||
|
||||
Falls nach der Migration `ceph-mon` und `ceph-mgr` nicht starten (z.B. weil eine ältere Version des Tools die MON-Map nicht aktualisiert hat), muss die Ceph MON-Map manuell korrigiert werden. Die MON-Map ist eine interne Datenbank in der die MON-Adressen gespeichert sind — ein reines Update der `ceph.conf` reicht nicht.
|
||||
|
||||
**Auf jedem Node ausführen:**
|
||||
|
||||
```bash
|
||||
# 1. MON stoppen
|
||||
systemctl stop ceph-mon@$(hostname)
|
||||
|
||||
# 2. Aktuelle MON-Map extrahieren und prüfen
|
||||
ceph-mon -i $(hostname) --extract-monmap /tmp/monmap
|
||||
monmaptool --print /tmp/monmap
|
||||
|
||||
# 3. Alte Einträge entfernen (für jeden MON-Node)
|
||||
monmaptool --rm pve1 /tmp/monmap
|
||||
monmaptool --rm pve2 /tmp/monmap
|
||||
monmaptool --rm pve3 /tmp/monmap
|
||||
|
||||
# 4. Neue Einträge mit neuen IPs hinzufügen
|
||||
monmaptool --addv pve1 [v2:172.0.2.101:3300/0,v1:172.0.2.101:6789/0] /tmp/monmap
|
||||
monmaptool --addv pve2 [v2:172.0.2.102:3300/0,v1:172.0.2.102:6789/0] /tmp/monmap
|
||||
monmaptool --addv pve3 [v2:172.0.2.103:3300/0,v1:172.0.2.103:6789/0] /tmp/monmap
|
||||
|
||||
# 5. Ergebnis prüfen
|
||||
monmaptool --print /tmp/monmap
|
||||
|
||||
# 6. Aktualisierte MON-Map zurückschreiben
|
||||
ceph-mon -i $(hostname) --inject-monmap /tmp/monmap
|
||||
|
||||
# 7. Services starten
|
||||
systemctl start ceph-mon@$(hostname)
|
||||
systemctl restart ceph-mgr@$(hostname)
|
||||
systemctl restart ceph-osd.target
|
||||
|
||||
# 8. Aufräumen
|
||||
rm -f /tmp/monmap
|
||||
```
|
||||
|
||||
> **Hinweis:** Node-Namen und IPs an das eigene Setup anpassen. Schritte 3+4 müssen alle MON-Nodes des Clusters enthalten, nicht nur den lokalen. Aktuelle Versionen des Tools aktualisieren die MON-Map automatisch.
|
||||
|
||||
## Hinweise
|
||||
|
||||
- Das Tool muss als **root** ausgeführt werden
|
||||
|
|
|
|||
119
migrator.py
119
migrator.py
|
|
@ -434,19 +434,27 @@ class Migrator:
|
|||
print(" [Ceph] /etc/pve nicht beschreibbar, schreibe direkt...")
|
||||
self._update_ceph_direct(plan, configs)
|
||||
|
||||
# Update Ceph MON map with new IPs (MUST happen before restart)
|
||||
self._update_ceph_mon_map(plan)
|
||||
|
||||
# Restart Ceph services
|
||||
print(" [Ceph] Services neu starten...")
|
||||
print("\n [Ceph] Services neu starten...")
|
||||
for node in plan.nodes:
|
||||
if not node.is_reachable:
|
||||
continue
|
||||
new_host = node.new_ip if not node.is_local else node.ssh_host
|
||||
|
||||
# Restart MON
|
||||
self.ssh.run_on_node(
|
||||
# Start MON (already stopped by monmap update)
|
||||
rc, _, err = self.ssh.run_on_node(
|
||||
new_host,
|
||||
f"systemctl restart ceph-mon@{node.name} 2>/dev/null",
|
||||
f"systemctl start ceph-mon@{node.name} 2>/dev/null",
|
||||
node.is_local, timeout=30,
|
||||
)
|
||||
if rc == 0:
|
||||
print(f" [{node.name}] ceph-mon gestartet")
|
||||
else:
|
||||
print(f" [{node.name}] WARNUNG ceph-mon: {err}")
|
||||
|
||||
# Restart MGR
|
||||
self.ssh.run_on_node(
|
||||
new_host,
|
||||
|
|
@ -459,7 +467,7 @@ class Migrator:
|
|||
"systemctl restart ceph-osd.target 2>/dev/null",
|
||||
node.is_local, timeout=60,
|
||||
)
|
||||
print(f" [{node.name}] Ceph-Services neu gestartet")
|
||||
print(f" [{node.name}] Ceph-Services gestartet")
|
||||
|
||||
def _update_ceph_direct(self, plan: MigrationPlan, configs: dict):
|
||||
"""Write ceph.conf directly on each node (fallback when no quorum)."""
|
||||
|
|
@ -480,21 +488,104 @@ class Migrator:
|
|||
def _update_ceph_mon_map(self, plan: MigrationPlan):
|
||||
"""Update Ceph MON map with new addresses.
|
||||
|
||||
This is needed when MON IPs change.
|
||||
When MON IPs change, the internal monmap (stored in MON's RocksDB)
|
||||
must be explicitly updated. Just updating ceph.conf is NOT enough.
|
||||
|
||||
Steps per node:
|
||||
1. Stop ceph-mon
|
||||
2. Extract monmap from MON database
|
||||
3. Remove all old MON entries
|
||||
4. Re-add MON entries with new IPs (msgr2 + msgr1)
|
||||
5. Reinject updated monmap
|
||||
"""
|
||||
ip_mapping = {n.current_ip: n.new_ip for n in plan.nodes if n.new_ip}
|
||||
|
||||
if not ip_mapping:
|
||||
print(" [Ceph] Keine IP-Änderungen für MON-Map")
|
||||
return
|
||||
|
||||
# Build the list of MON nodes with their new IPs
|
||||
mon_nodes = []
|
||||
for node in plan.nodes:
|
||||
if not node.is_reachable:
|
||||
continue
|
||||
new_ip = node.new_ip or node.current_ip
|
||||
mon_nodes.append((node.name, new_ip))
|
||||
|
||||
print("\n [Ceph] MON-Map aktualisieren...")
|
||||
|
||||
# Stop ceph-mon on all nodes first
|
||||
for node in plan.nodes:
|
||||
if not node.is_reachable:
|
||||
continue
|
||||
new_host = node.new_ip if not node.is_local else node.ssh_host
|
||||
new_ip = node.new_ip
|
||||
self.ssh.run_on_node(
|
||||
new_host,
|
||||
f"systemctl stop ceph-mon@{node.name} 2>/dev/null",
|
||||
node.is_local, timeout=30,
|
||||
)
|
||||
print(f" [{node.name}] ceph-mon gestoppt")
|
||||
|
||||
# Extract monmap, modify, and reinject
|
||||
cmds = [
|
||||
# Update monmap on each node
|
||||
for node in plan.nodes:
|
||||
if not node.is_reachable:
|
||||
continue
|
||||
new_host = node.new_ip if not node.is_local else node.ssh_host
|
||||
|
||||
# Extract current monmap
|
||||
rc, _, err = self.ssh.run_on_node(
|
||||
new_host,
|
||||
f"ceph-mon -i {node.name} --extract-monmap /tmp/monmap",
|
||||
# Remove old entries and add new ones
|
||||
]
|
||||
# This is complex - for now we rely on the ceph.conf update
|
||||
# and let Ceph handle the MON map update on restart
|
||||
print(f" [{node.name}] MON-Map wird beim Neustart aktualisiert")
|
||||
node.is_local, timeout=30,
|
||||
)
|
||||
if rc != 0:
|
||||
print(f" [{node.name}] WARNUNG: monmap extrahieren fehlgeschlagen: {err}")
|
||||
print(f" [{node.name}] Überspringe MON-Map Update")
|
||||
continue
|
||||
|
||||
# Print current monmap for debugging
|
||||
self.ssh.run_on_node(
|
||||
new_host,
|
||||
"monmaptool --print /tmp/monmap",
|
||||
node.is_local, timeout=10,
|
||||
)
|
||||
|
||||
# Remove all existing MON entries
|
||||
for mon_name, _ in mon_nodes:
|
||||
self.ssh.run_on_node(
|
||||
new_host,
|
||||
f"monmaptool --rm {mon_name} /tmp/monmap 2>/dev/null",
|
||||
node.is_local, timeout=10,
|
||||
)
|
||||
|
||||
# Re-add all MON entries with new IPs (msgr2 on 3300 + msgr1 on 6789)
|
||||
for mon_name, new_ip in mon_nodes:
|
||||
rc, _, err = self.ssh.run_on_node(
|
||||
new_host,
|
||||
f"monmaptool --addv {mon_name} "
|
||||
f"[v2:{new_ip}:3300/0,v1:{new_ip}:6789/0] /tmp/monmap",
|
||||
node.is_local, timeout=10,
|
||||
)
|
||||
if rc != 0:
|
||||
# Fallback: try legacy --add (older Ceph versions)
|
||||
self.ssh.run_on_node(
|
||||
new_host,
|
||||
f"monmaptool --add {mon_name} {new_ip}:6789 /tmp/monmap",
|
||||
node.is_local, timeout=10,
|
||||
)
|
||||
|
||||
# Reinject updated monmap
|
||||
rc, _, err = self.ssh.run_on_node(
|
||||
new_host,
|
||||
f"ceph-mon -i {node.name} --inject-monmap /tmp/monmap",
|
||||
node.is_local, timeout=30,
|
||||
)
|
||||
if rc == 0:
|
||||
print(f" [{node.name}] MON-Map aktualisiert")
|
||||
else:
|
||||
print(f" [{node.name}] FEHLER MON-Map reinject: {err}")
|
||||
|
||||
# Cleanup
|
||||
self.ssh.run_on_node(
|
||||
new_host, "rm -f /tmp/monmap", node.is_local,
|
||||
)
|
||||
|
|
|
|||
Loading…
Reference in New Issue