feat: F5-TTS Tuning ueber Diagnostic statt .env

Folgt der "keine neuen Settings in .env" Regel. f5tts/bridge.py: - F5TTS_MODEL/CKPT_FILE/VOCAB_FILE/CFG_STRENGTH/NFE_STEP ENV-Vars raus - Hard-coded Defaults im Code (DEFAULT_F5TTS_*) - F5Runner besitzt Live-Settings als Instance-Vars + update_config() - config-Broadcast triggert Modell-Reload nur wenn Modell-relevantes sich aendert (cfg_strength/nfe_step ohne Reload) - F5TTS_DEVICE bleibt ENV (Hardware-Bootstrap) xtts/docker-compose.yml: F5TTS_* ENV-Vars rausgenommen, Kommentar verweist auf Diagnostic-Config. aria-bridge: nimmt f5tts*-Felder im config-Handler entgegen, persistiert sie in voice_config.json. Beim RVS-Connect broadcastet die Bridge die persistierte Config einmalig — damit die f5tts-bridge nach Container- Restart automatisch die zuletzt gewaehlten Settings bekommt, ohne dass der User in Diagnostic was klicken muss. Diagnostic UI: - Neuer aufklappbarer "F5-TTS Modell-Tuning (advanced)" Bereich - Felder: Modell-ID, Custom-Checkpoint, Vocab, cfg_strength, nfe_step - voice_config beim Laden: Felder werden zurueck in die UI gesetzt - sendVoiceConfig schickt die neuen Felder mit - Server: send_voice_config persistiert die Felder, leere Strings werden geloescht damit die Hard-Defaults greifen Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-24 15:44:58 +02:00
parent 467f95424e
commit 187ffad7ee
5 changed files with 231 additions and 32 deletions
@@ -450,6 +450,58 @@
        <!-- Gecloned Stimmen — Liste mit Loeschen -->
        <div id="xtts-voice-list" style="margin-bottom:12px;"></div>

+        <!-- F5-TTS Modell-Tuning -->
+        <details style="background:#0D0D1A;border:1px solid #2A2A3E;border-radius:6px;padding:10px 12px;margin-bottom:12px;">
+          <summary style="color:#8888AA;font-size:12px;cursor:pointer;">F5-TTS Modell-Tuning (advanced)</summary>
+          <div style="margin-top:10px;display:flex;flex-direction:column;gap:8px;">
+            <div style="color:#8888AA;font-size:11px;">
+              Werden via RVS an die f5tts-bridge auf der Gamebox geschickt.
+              Modell-/Checkpoint-Wechsel triggert einen Reload (~30s).
+              Hardcoded Defaults: F5TTS_v1_Base, cfg_strength=2.5, nfe_step=32.
+            </div>
+
+            <label style="color:#8888AA;font-size:12px;">Modell-ID:</label>
+            <input type="text" id="diag-f5tts-model"
+              placeholder="F5TTS_v1_Base"
+              style="background:#1E1E2E;color:#fff;border:1px solid #2A2A3E;border-radius:6px;padding:6px 10px;font-size:13px;">
+
+            <label style="color:#8888AA;font-size:12px;">
+              Custom Checkpoint (HF-Repo "user/repo" oder Container-Pfad, leer = Default):
+            </label>
+            <input type="text" id="diag-f5tts-ckpt"
+              placeholder="z.B. aoxo/F5-TTS-German"
+              style="background:#1E1E2E;color:#fff;border:1px solid #2A2A3E;border-radius:6px;padding:6px 10px;font-size:13px;">
+
+            <label style="color:#8888AA;font-size:12px;">
+              Custom Vocab (passend zum Checkpoint, optional):
+            </label>
+            <input type="text" id="diag-f5tts-vocab"
+              placeholder="leer = Default"
+              style="background:#1E1E2E;color:#fff;border:1px solid #2A2A3E;border-radius:6px;padding:6px 10px;font-size:13px;">
+
+            <div style="display:flex;gap:12px;">
+              <div style="flex:1;">
+                <label style="color:#8888AA;font-size:12px;">cfg_strength (1.0 - 5.0):</label>
+                <input type="number" id="diag-f5tts-cfg" step="0.1" min="1" max="5"
+                  placeholder="2.5"
+                  style="background:#1E1E2E;color:#fff;border:1px solid #2A2A3E;border-radius:6px;padding:6px 10px;font-size:13px;width:100%;box-sizing:border-box;">
+                <div style="color:#666680;font-size:10px;">Hoeher = klebt staerker an Referenz</div>
+              </div>
+              <div style="flex:1;">
+                <label style="color:#8888AA;font-size:12px;">nfe_step (8 - 64):</label>
+                <input type="number" id="diag-f5tts-nfe" step="1" min="8" max="64"
+                  placeholder="32"
+                  style="background:#1E1E2E;color:#fff;border:1px solid #2A2A3E;border-radius:6px;padding:6px 10px;font-size:13px;width:100%;box-sizing:border-box;">
+                <div style="color:#666680;font-size:10px;">Hoeher = bessere Qualitaet, langsamer</div>
+              </div>
+            </div>
+
+            <button class="btn primary" onclick="sendVoiceConfig()" style="padding:6px 14px;font-size:12px;align-self:flex-start;margin-top:6px;">
+              Anwenden
+            </button>
+          </div>
+        </details>
+
        <!-- Voice Cloning -->
        <div style="background:#1E1E2E;border-radius:8px;padding:12px;margin-top:8px;">
          <div style="color:#0096FF;font-size:13px;font-weight:600;margin-bottom:8px;">Stimme klonen</div>
@@ -841,6 +893,16 @@
            const wSel = document.getElementById('diag-whisper-model');
            if (wSel) wSel.value = msg.whisperModel;
          }
+          // F5-TTS Tuning-Felder wiederherstellen (falls gesetzt)
+          const setIfPresent = (id, val) => {
+            const el = document.getElementById(id);
+            if (el && val !== undefined && val !== null && val !== '') el.value = val;
+          };
+          setIfPresent('diag-f5tts-model', msg.f5ttsModel);
+          setIfPresent('diag-f5tts-ckpt', msg.f5ttsCkptFile);
+          setIfPresent('diag-f5tts-vocab', msg.f5ttsVocabFile);
+          setIfPresent('diag-f5tts-cfg', msg.f5ttsCfgStrength);
+          setIfPresent('diag-f5tts-nfe', msg.f5ttsNfeStep);
          return;
        }

@@ -1570,7 +1632,19 @@
      const ttsEnabled = document.getElementById('diag-tts-enabled').checked;
      const xttsVoice = document.getElementById('diag-xtts-voice').value;
      const whisperModel = document.getElementById('diag-whisper-model').value;
-      send({ action: 'send_voice_config', ttsEnabled, xttsVoice, whisperModel });
+      const f5ttsModel = document.getElementById('diag-f5tts-model')?.value || '';
+      const f5ttsCkptFile = document.getElementById('diag-f5tts-ckpt')?.value || '';
+      const f5ttsVocabFile = document.getElementById('diag-f5tts-vocab')?.value || '';
+      const f5ttsCfgRaw = document.getElementById('diag-f5tts-cfg')?.value || '';
+      const f5ttsNfeRaw = document.getElementById('diag-f5tts-nfe')?.value || '';
+      const f5ttsCfgStrength = f5ttsCfgRaw ? parseFloat(f5ttsCfgRaw) : undefined;
+      const f5ttsNfeStep = f5ttsNfeRaw ? parseInt(f5ttsNfeRaw, 10) : undefined;
+      send({
+        action: 'send_voice_config',
+        ttsEnabled, xttsVoice, whisperModel,
+        f5ttsModel, f5ttsCkptFile, f5ttsVocabFile,
+        f5ttsCfgStrength, f5ttsNfeStep,
+      });
      const statusEl = document.getElementById('voice-status');
      if (statusEl && xttsVoice) {
        statusEl.textContent = `⏳ Stimme "${xttsVoice}" wird geladen...`;
@@ -1423,6 +1423,25 @@ wss.on("connection", (ws) => {
          xttsVoice: msg.xttsVoice || "",
        };
        if (msg.whisperModel !== undefined) voiceConfig.whisperModel = msg.whisperModel;
+        // F5-TTS Tuning-Felder — leere Strings entfernen damit der Default greift
+        if (msg.f5ttsModel !== undefined) {
+          if (msg.f5ttsModel) voiceConfig.f5ttsModel = msg.f5ttsModel;
+          else delete voiceConfig.f5ttsModel;
+        }
+        if (msg.f5ttsCkptFile !== undefined) {
+          if (msg.f5ttsCkptFile) voiceConfig.f5ttsCkptFile = msg.f5ttsCkptFile;
+          else delete voiceConfig.f5ttsCkptFile;
+        }
+        if (msg.f5ttsVocabFile !== undefined) {
+          if (msg.f5ttsVocabFile) voiceConfig.f5ttsVocabFile = msg.f5ttsVocabFile;
+          else delete voiceConfig.f5ttsVocabFile;
+        }
+        if (msg.f5ttsCfgStrength !== undefined && !isNaN(msg.f5ttsCfgStrength)) {
+          voiceConfig.f5ttsCfgStrength = msg.f5ttsCfgStrength;
+        }
+        if (msg.f5ttsNfeStep !== undefined && !isNaN(msg.f5ttsNfeStep)) {
+          voiceConfig.f5ttsNfeStep = msg.f5ttsNfeStep;
+        }
        try {
          fs.mkdirSync("/shared/config", { recursive: true });
          fs.writeFileSync("/shared/config/voice_config.json", JSON.stringify(voiceConfig, null, 2));