From 4bb9b93baf399d07eef159e40d47fa44c06be3d5 Mon Sep 17 00:00:00 2001
From: duffyduck <info@hacker-net.de>
Date: Thu, 5 Mar 2026 08:27:01 +0100
Subject: [PATCH] update storage.cfg

---
 README.md   | 40 +++++++++++++++---------
 migrator.py | 87 ++++++++++++++++++++++++++++++++++++++++++-----------
 2 files changed, 95 insertions(+), 32 deletions(-)

diff --git a/README.md b/README.md
index d66e5f4..488f3bc 100644
--- a/README.md
+++ b/README.md
@@ -471,7 +471,7 @@ ceph -s
 
 ### Fehlerbehebung: CephFS/RBD-Storage nicht erreichbar nach Migration
 
-Falls CephFS oder RBD-Storage nach der Migration nicht gemountet werden können, liegt es daran, dass in `/etc/pve/storage.cfg` noch die alten MON-IPs stehen. Aktuelle Versionen des Tools aktualisieren die `storage.cfg` automatisch.
+Nach der Migration kann CephFS nicht erreichbar sein, weil der Kernel-Mount noch die alten MON-IPs cached. Das passiert unabhängig davon, ob `monhost` in der `storage.cfg` steht oder nicht. Aktuelle Versionen des Tools machen den Remount automatisch.
 
 **Symptom:** CephFS-Storage zeigt Fehler in der GUI, `mount | grep ceph` zeigt alte IPs:
 
@@ -479,27 +479,37 @@ Falls CephFS oder RBD-Storage nach der Migration nicht gemountet werden können,
 172.0.4.1,172.0.4.2,172.0.4.3:/ on /mnt/pve/cephfs type ceph (...)
 ```
 
-**Lösung:**
+**Fall 1: Keine `monhost`-Einträge in storage.cfg (Standard bei Proxmox)**
+
+Proxmox holt sich die MON-IPs automatisch aus `/etc/ceph/ceph.conf`. Es reicht ein Remount:
 
 ```bash
-# 1. Prüfen welche IPs in storage.cfg stehen:
-grep monhost /etc/pve/storage.cfg
-
-# 2. monhost für CephFS aktualisieren (neue MON-IPs kommasepariert):
-pvesm set cephfs --monhost 192.168.101.1,192.168.101.2,192.168.101.3
-
-# 3. Falls RBD-Storage vorhanden:
-pvesm set local-rbd --monhost 192.168.101.1,192.168.101.2,192.168.101.3
-
-# 4. CephFS neu mounten:
+# Auf JEDEM Node:
 umount /mnt/pve/cephfs
 mount /mnt/pve/cephfs
 
-# Oder auf allen Nodes per Neustart des Storage-Dienstes:
-systemctl restart pve-ha-lrm
+# Prüfen ob die neuen IPs verwendet werden:
+mount | grep ceph
 ```
 
-> **Hinweis:** Die Storage-Namen (`cephfs`, `local-rbd`) können bei jedem Setup anders heißen. Mit `pvesm status` werden alle konfigurierten Storages angezeigt.
+**Fall 2: Explizite `monhost`-Einträge in storage.cfg**
+
+```bash
+# 1. Prüfen ob monhost-Einträge vorhanden sind:
+grep monhost /etc/pve/storage.cfg
+
+# 2. Falls ja, monhost für CephFS aktualisieren (neue MON-IPs kommasepariert):
+pvesm set cephfs --monhost 192.168.101.1,192.168.101.2,192.168.101.3
+
+# 3. Falls RBD-Storage vorhanden:
+pvesm set data --monhost 192.168.101.1,192.168.101.2,192.168.101.3
+
+# 4. Dann Remount auf JEDEM Node:
+umount /mnt/pve/cephfs
+mount /mnt/pve/cephfs
+```
+
+> **Hinweis:** Die Storage-Namen (`cephfs`, `data`) können bei jedem Setup anders heißen. Mit `pvesm status` werden alle konfigurierten Storages angezeigt.
 
 ## Hinweise
 
diff --git a/migrator.py b/migrator.py
index fa2a24c..3176410 100644
--- a/migrator.py
+++ b/migrator.py
@@ -391,10 +391,11 @@ class Migrator:
         if configs.get('ceph'):
             self._update_ceph(plan, configs)
 
-        # Update storage.cfg (CephFS/RBD monhost entries)
+        # Update storage.cfg (CephFS/RBD monhost entries) and remount CephFS
         ip_mapping = {n.current_ip: n.new_ip for n in plan.nodes if n.new_ip}
         if ip_mapping:
             self._update_storage_cfg(ip_mapping)
+        self._remount_cephfs(plan)
 
         # Cleanup staging directories
         print("\n  Staging-Verzeichnisse aufräumen...")
@@ -436,11 +437,12 @@ class Migrator:
             print("  [Corosync] Manuell ausführen: cp /etc/corosync/corosync.conf /etc/pve/corosync.conf")
 
     def _update_storage_cfg(self, ip_mapping: dict[str, str]):
-        """Update /etc/pve/storage.cfg with new MON IPs.
+        """Update /etc/pve/storage.cfg with new MON IPs and remount CephFS.
 
-        CephFS and RBD storage entries contain a 'monhost' field with the
-        MON IP addresses. After a network migration these must be updated,
-        otherwise CephFS mounts and RBD connections fail.
+        CephFS and RBD storage entries may contain a 'monhost' field with
+        MON IP addresses. If present, these must be updated. Additionally,
+        active CephFS mounts use the old IPs in the kernel and need a remount
+        regardless of whether monhost is in storage.cfg or not.
         """
         print("\n  [Storage] /etc/pve/storage.cfg prüfen...")
 
@@ -459,27 +461,78 @@ class Migrator:
             print(f"  [Storage] WARNUNG: storage.cfg nicht lesbar: {err}")
             return
 
-        # Check if any old IPs are present
+        # Check if any old IPs are present in storage.cfg
         needs_update = False
         for old_ip in ip_mapping:
             if old_ip in content:
                 needs_update = True
                 break
 
-        if not needs_update:
-            print("  [Storage] Keine alten IPs in storage.cfg gefunden")
+        if needs_update:
+            # Replace old IPs with new IPs
+            new_content = content
+            for old_ip, new_ip in ip_mapping.items():
+                new_content = new_content.replace(old_ip, new_ip)
+
+            ok, msg = self.ssh.write_local_file("/etc/pve/storage.cfg", new_content)
+            if ok:
+                print("  [Storage] /etc/pve/storage.cfg aktualisiert (monhost IPs ersetzt)")
+            else:
+                print(f"  [Storage] FEHLER: {msg}")
+        else:
+            print("  [Storage] Keine alten IPs in storage.cfg (monhost wird aus ceph.conf gelesen)")
+
+    def _remount_cephfs(self, plan: MigrationPlan):
+        """Remount CephFS on all nodes after migration.
+
+        The kernel CephFS mount caches the old MON IPs. Even if ceph.conf
+        is updated, the existing mount still uses the old addresses.
+        A remount picks up the new IPs from the updated config.
+        """
+        # Check if any CephFS mounts exist on the local node
+        rc, mounts, _ = self.ssh.execute_local("mount -t ceph 2>/dev/null")
+        if rc != 0 or not mounts or not mounts.strip():
             return
 
-        # Replace old IPs with new IPs
-        new_content = content
-        for old_ip, new_ip in ip_mapping.items():
-            new_content = new_content.replace(old_ip, new_ip)
+        # Extract CephFS mount points (skip non-cephfs mounts)
+        mount_points = []
+        for line in mounts.strip().split('\n'):
+            # Format: 1.2.3.4,5.6.7.8:/ on /mnt/pve/cephfs type ceph (...)
+            if ' type ceph ' in line:
+                parts = line.split(' on ')
+                if len(parts) >= 2:
+                    mp = parts[1].split(' type ')[0].strip()
+                    mount_points.append(mp)
 
-        ok, msg = self.ssh.write_local_file("/etc/pve/storage.cfg", new_content)
-        if ok:
-            print("  [Storage] /etc/pve/storage.cfg aktualisiert (monhost IPs ersetzt)")
-        else:
-            print(f"  [Storage] FEHLER: {msg}")
+        if not mount_points:
+            return
+
+        print(f"\n  [CephFS] {len(mount_points)} Mount(s) gefunden, Remount auf allen Nodes...")
+
+        for node in plan.nodes:
+            if not node.is_reachable:
+                continue
+            new_host = node.new_ip if not node.is_local else node.ssh_host
+
+            for mp in mount_points:
+                if node.is_local or True:  # Remount on all nodes
+                    rc, _, err = self.ssh.run_on_node(
+                        new_host,
+                        f"umount {mp} 2>/dev/null; mount {mp}",
+                        node.is_local, timeout=30,
+                    )
+                    if rc == 0:
+                        print(f"  [{node.name}] CephFS {mp} remounted")
+                    else:
+                        # Mount might not exist on this node yet, try just mount
+                        rc2, _, _ = self.ssh.run_on_node(
+                            new_host, f"mount {mp} 2>/dev/null",
+                            node.is_local, timeout=30,
+                        )
+                        if rc2 == 0:
+                            print(f"  [{node.name}] CephFS {mp} mounted")
+                        else:
+                            print(f"  [{node.name}] CephFS {mp} WARNUNG: Remount fehlgeschlagen: {err}")
 
     def _wait_for_quorum(self, timeout: int = 60) -> bool:
         """Wait for cluster quorum to be established."""