added mon map correction and injection
This commit is contained in:
parent
f072320ab9
commit
aebad45615
41
README.md
41
README.md
|
|
@ -285,6 +285,47 @@ systemctl restart corosync
|
||||||
5. `journalctl -u corosync` — Corosync-Logs prüfen
|
5. `journalctl -u corosync` — Corosync-Logs prüfen
|
||||||
6. `journalctl -u pve-cluster` — pmxcfs-Logs prüfen
|
6. `journalctl -u pve-cluster` — pmxcfs-Logs prüfen
|
||||||
|
|
||||||
|
### Workaround: Ceph MON-Map manuell aktualisieren
|
||||||
|
|
||||||
|
Falls nach der Migration `ceph-mon` und `ceph-mgr` nicht starten (z.B. weil eine ältere Version des Tools die MON-Map nicht aktualisiert hat), muss die Ceph MON-Map manuell korrigiert werden. Die MON-Map ist eine interne Datenbank in der die MON-Adressen gespeichert sind — ein reines Update der `ceph.conf` reicht nicht.
|
||||||
|
|
||||||
|
**Auf jedem Node ausführen:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. MON stoppen
|
||||||
|
systemctl stop ceph-mon@$(hostname)
|
||||||
|
|
||||||
|
# 2. Aktuelle MON-Map extrahieren und prüfen
|
||||||
|
ceph-mon -i $(hostname) --extract-monmap /tmp/monmap
|
||||||
|
monmaptool --print /tmp/monmap
|
||||||
|
|
||||||
|
# 3. Alte Einträge entfernen (für jeden MON-Node)
|
||||||
|
monmaptool --rm pve1 /tmp/monmap
|
||||||
|
monmaptool --rm pve2 /tmp/monmap
|
||||||
|
monmaptool --rm pve3 /tmp/monmap
|
||||||
|
|
||||||
|
# 4. Neue Einträge mit neuen IPs hinzufügen
|
||||||
|
monmaptool --addv pve1 [v2:172.0.2.101:3300/0,v1:172.0.2.101:6789/0] /tmp/monmap
|
||||||
|
monmaptool --addv pve2 [v2:172.0.2.102:3300/0,v1:172.0.2.102:6789/0] /tmp/monmap
|
||||||
|
monmaptool --addv pve3 [v2:172.0.2.103:3300/0,v1:172.0.2.103:6789/0] /tmp/monmap
|
||||||
|
|
||||||
|
# 5. Ergebnis prüfen
|
||||||
|
monmaptool --print /tmp/monmap
|
||||||
|
|
||||||
|
# 6. Aktualisierte MON-Map zurückschreiben
|
||||||
|
ceph-mon -i $(hostname) --inject-monmap /tmp/monmap
|
||||||
|
|
||||||
|
# 7. Services starten
|
||||||
|
systemctl start ceph-mon@$(hostname)
|
||||||
|
systemctl restart ceph-mgr@$(hostname)
|
||||||
|
systemctl restart ceph-osd.target
|
||||||
|
|
||||||
|
# 8. Aufräumen
|
||||||
|
rm -f /tmp/monmap
|
||||||
|
```
|
||||||
|
|
||||||
|
> **Hinweis:** Node-Namen und IPs an das eigene Setup anpassen. Schritte 3+4 müssen alle MON-Nodes des Clusters enthalten, nicht nur den lokalen. Aktuelle Versionen des Tools aktualisieren die MON-Map automatisch.
|
||||||
|
|
||||||
## Hinweise
|
## Hinweise
|
||||||
|
|
||||||
- Das Tool muss als **root** ausgeführt werden
|
- Das Tool muss als **root** ausgeführt werden
|
||||||
|
|
|
||||||
119
migrator.py
119
migrator.py
|
|
@ -434,19 +434,27 @@ class Migrator:
|
||||||
print(" [Ceph] /etc/pve nicht beschreibbar, schreibe direkt...")
|
print(" [Ceph] /etc/pve nicht beschreibbar, schreibe direkt...")
|
||||||
self._update_ceph_direct(plan, configs)
|
self._update_ceph_direct(plan, configs)
|
||||||
|
|
||||||
|
# Update Ceph MON map with new IPs (MUST happen before restart)
|
||||||
|
self._update_ceph_mon_map(plan)
|
||||||
|
|
||||||
# Restart Ceph services
|
# Restart Ceph services
|
||||||
print(" [Ceph] Services neu starten...")
|
print("\n [Ceph] Services neu starten...")
|
||||||
for node in plan.nodes:
|
for node in plan.nodes:
|
||||||
if not node.is_reachable:
|
if not node.is_reachable:
|
||||||
continue
|
continue
|
||||||
new_host = node.new_ip if not node.is_local else node.ssh_host
|
new_host = node.new_ip if not node.is_local else node.ssh_host
|
||||||
|
|
||||||
# Restart MON
|
# Start MON (already stopped by monmap update)
|
||||||
self.ssh.run_on_node(
|
rc, _, err = self.ssh.run_on_node(
|
||||||
new_host,
|
new_host,
|
||||||
f"systemctl restart ceph-mon@{node.name} 2>/dev/null",
|
f"systemctl start ceph-mon@{node.name} 2>/dev/null",
|
||||||
node.is_local, timeout=30,
|
node.is_local, timeout=30,
|
||||||
)
|
)
|
||||||
|
if rc == 0:
|
||||||
|
print(f" [{node.name}] ceph-mon gestartet")
|
||||||
|
else:
|
||||||
|
print(f" [{node.name}] WARNUNG ceph-mon: {err}")
|
||||||
|
|
||||||
# Restart MGR
|
# Restart MGR
|
||||||
self.ssh.run_on_node(
|
self.ssh.run_on_node(
|
||||||
new_host,
|
new_host,
|
||||||
|
|
@ -459,7 +467,7 @@ class Migrator:
|
||||||
"systemctl restart ceph-osd.target 2>/dev/null",
|
"systemctl restart ceph-osd.target 2>/dev/null",
|
||||||
node.is_local, timeout=60,
|
node.is_local, timeout=60,
|
||||||
)
|
)
|
||||||
print(f" [{node.name}] Ceph-Services neu gestartet")
|
print(f" [{node.name}] Ceph-Services gestartet")
|
||||||
|
|
||||||
def _update_ceph_direct(self, plan: MigrationPlan, configs: dict):
|
def _update_ceph_direct(self, plan: MigrationPlan, configs: dict):
|
||||||
"""Write ceph.conf directly on each node (fallback when no quorum)."""
|
"""Write ceph.conf directly on each node (fallback when no quorum)."""
|
||||||
|
|
@ -480,21 +488,104 @@ class Migrator:
|
||||||
def _update_ceph_mon_map(self, plan: MigrationPlan):
|
def _update_ceph_mon_map(self, plan: MigrationPlan):
|
||||||
"""Update Ceph MON map with new addresses.
|
"""Update Ceph MON map with new addresses.
|
||||||
|
|
||||||
This is needed when MON IPs change.
|
When MON IPs change, the internal monmap (stored in MON's RocksDB)
|
||||||
|
must be explicitly updated. Just updating ceph.conf is NOT enough.
|
||||||
|
|
||||||
|
Steps per node:
|
||||||
|
1. Stop ceph-mon
|
||||||
|
2. Extract monmap from MON database
|
||||||
|
3. Remove all old MON entries
|
||||||
|
4. Re-add MON entries with new IPs (msgr2 + msgr1)
|
||||||
|
5. Reinject updated monmap
|
||||||
"""
|
"""
|
||||||
ip_mapping = {n.current_ip: n.new_ip for n in plan.nodes if n.new_ip}
|
ip_mapping = {n.current_ip: n.new_ip for n in plan.nodes if n.new_ip}
|
||||||
|
|
||||||
|
if not ip_mapping:
|
||||||
|
print(" [Ceph] Keine IP-Änderungen für MON-Map")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Build the list of MON nodes with their new IPs
|
||||||
|
mon_nodes = []
|
||||||
|
for node in plan.nodes:
|
||||||
|
if not node.is_reachable:
|
||||||
|
continue
|
||||||
|
new_ip = node.new_ip or node.current_ip
|
||||||
|
mon_nodes.append((node.name, new_ip))
|
||||||
|
|
||||||
|
print("\n [Ceph] MON-Map aktualisieren...")
|
||||||
|
|
||||||
|
# Stop ceph-mon on all nodes first
|
||||||
for node in plan.nodes:
|
for node in plan.nodes:
|
||||||
if not node.is_reachable:
|
if not node.is_reachable:
|
||||||
continue
|
continue
|
||||||
new_host = node.new_ip if not node.is_local else node.ssh_host
|
new_host = node.new_ip if not node.is_local else node.ssh_host
|
||||||
new_ip = node.new_ip
|
self.ssh.run_on_node(
|
||||||
|
new_host,
|
||||||
|
f"systemctl stop ceph-mon@{node.name} 2>/dev/null",
|
||||||
|
node.is_local, timeout=30,
|
||||||
|
)
|
||||||
|
print(f" [{node.name}] ceph-mon gestoppt")
|
||||||
|
|
||||||
# Extract monmap, modify, and reinject
|
# Update monmap on each node
|
||||||
cmds = [
|
for node in plan.nodes:
|
||||||
|
if not node.is_reachable:
|
||||||
|
continue
|
||||||
|
new_host = node.new_ip if not node.is_local else node.ssh_host
|
||||||
|
|
||||||
|
# Extract current monmap
|
||||||
|
rc, _, err = self.ssh.run_on_node(
|
||||||
|
new_host,
|
||||||
f"ceph-mon -i {node.name} --extract-monmap /tmp/monmap",
|
f"ceph-mon -i {node.name} --extract-monmap /tmp/monmap",
|
||||||
# Remove old entries and add new ones
|
node.is_local, timeout=30,
|
||||||
]
|
)
|
||||||
# This is complex - for now we rely on the ceph.conf update
|
if rc != 0:
|
||||||
# and let Ceph handle the MON map update on restart
|
print(f" [{node.name}] WARNUNG: monmap extrahieren fehlgeschlagen: {err}")
|
||||||
print(f" [{node.name}] MON-Map wird beim Neustart aktualisiert")
|
print(f" [{node.name}] Überspringe MON-Map Update")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Print current monmap for debugging
|
||||||
|
self.ssh.run_on_node(
|
||||||
|
new_host,
|
||||||
|
"monmaptool --print /tmp/monmap",
|
||||||
|
node.is_local, timeout=10,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Remove all existing MON entries
|
||||||
|
for mon_name, _ in mon_nodes:
|
||||||
|
self.ssh.run_on_node(
|
||||||
|
new_host,
|
||||||
|
f"monmaptool --rm {mon_name} /tmp/monmap 2>/dev/null",
|
||||||
|
node.is_local, timeout=10,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Re-add all MON entries with new IPs (msgr2 on 3300 + msgr1 on 6789)
|
||||||
|
for mon_name, new_ip in mon_nodes:
|
||||||
|
rc, _, err = self.ssh.run_on_node(
|
||||||
|
new_host,
|
||||||
|
f"monmaptool --addv {mon_name} "
|
||||||
|
f"[v2:{new_ip}:3300/0,v1:{new_ip}:6789/0] /tmp/monmap",
|
||||||
|
node.is_local, timeout=10,
|
||||||
|
)
|
||||||
|
if rc != 0:
|
||||||
|
# Fallback: try legacy --add (older Ceph versions)
|
||||||
|
self.ssh.run_on_node(
|
||||||
|
new_host,
|
||||||
|
f"monmaptool --add {mon_name} {new_ip}:6789 /tmp/monmap",
|
||||||
|
node.is_local, timeout=10,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Reinject updated monmap
|
||||||
|
rc, _, err = self.ssh.run_on_node(
|
||||||
|
new_host,
|
||||||
|
f"ceph-mon -i {node.name} --inject-monmap /tmp/monmap",
|
||||||
|
node.is_local, timeout=30,
|
||||||
|
)
|
||||||
|
if rc == 0:
|
||||||
|
print(f" [{node.name}] MON-Map aktualisiert")
|
||||||
|
else:
|
||||||
|
print(f" [{node.name}] FEHLER MON-Map reinject: {err}")
|
||||||
|
|
||||||
|
# Cleanup
|
||||||
|
self.ssh.run_on_node(
|
||||||
|
new_host, "rm -f /tmp/monmap", node.is_local,
|
||||||
|
)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue