feat: F5-TTS Tuning ueber Diagnostic statt .env
Folgt der "keine neuen Settings in .env" Regel.
f5tts/bridge.py:
- F5TTS_MODEL/CKPT_FILE/VOCAB_FILE/CFG_STRENGTH/NFE_STEP ENV-Vars raus
- Hard-coded Defaults im Code (DEFAULT_F5TTS_*)
- F5Runner besitzt Live-Settings als Instance-Vars + update_config()
- config-Broadcast triggert Modell-Reload nur wenn Modell-relevantes
sich aendert (cfg_strength/nfe_step ohne Reload)
- F5TTS_DEVICE bleibt ENV (Hardware-Bootstrap)
xtts/docker-compose.yml: F5TTS_* ENV-Vars rausgenommen, Kommentar
verweist auf Diagnostic-Config.
aria-bridge: nimmt f5tts*-Felder im config-Handler entgegen, persistiert
sie in voice_config.json. Beim RVS-Connect broadcastet die Bridge die
persistierte Config einmalig — damit die f5tts-bridge nach Container-
Restart automatisch die zuletzt gewaehlten Settings bekommt, ohne dass
der User in Diagnostic was klicken muss.
Diagnostic UI:
- Neuer aufklappbarer "F5-TTS Modell-Tuning (advanced)" Bereich
- Felder: Modell-ID, Custom-Checkpoint, Vocab, cfg_strength, nfe_step
- voice_config beim Laden: Felder werden zurueck in die UI gesetzt
- sendVoiceConfig schickt die neuen Felder mit
- Server: send_voice_config persistiert die Felder, leere Strings
werden geloescht damit die Hard-Defaults greifen
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
+75
-1
@@ -450,6 +450,58 @@
|
||||
<!-- Gecloned Stimmen — Liste mit Loeschen -->
|
||||
<div id="xtts-voice-list" style="margin-bottom:12px;"></div>
|
||||
|
||||
<!-- F5-TTS Modell-Tuning -->
|
||||
<details style="background:#0D0D1A;border:1px solid #2A2A3E;border-radius:6px;padding:10px 12px;margin-bottom:12px;">
|
||||
<summary style="color:#8888AA;font-size:12px;cursor:pointer;">F5-TTS Modell-Tuning (advanced)</summary>
|
||||
<div style="margin-top:10px;display:flex;flex-direction:column;gap:8px;">
|
||||
<div style="color:#8888AA;font-size:11px;">
|
||||
Werden via RVS an die f5tts-bridge auf der Gamebox geschickt.
|
||||
Modell-/Checkpoint-Wechsel triggert einen Reload (~30s).
|
||||
Hardcoded Defaults: F5TTS_v1_Base, cfg_strength=2.5, nfe_step=32.
|
||||
</div>
|
||||
|
||||
<label style="color:#8888AA;font-size:12px;">Modell-ID:</label>
|
||||
<input type="text" id="diag-f5tts-model"
|
||||
placeholder="F5TTS_v1_Base"
|
||||
style="background:#1E1E2E;color:#fff;border:1px solid #2A2A3E;border-radius:6px;padding:6px 10px;font-size:13px;">
|
||||
|
||||
<label style="color:#8888AA;font-size:12px;">
|
||||
Custom Checkpoint (HF-Repo "user/repo" oder Container-Pfad, leer = Default):
|
||||
</label>
|
||||
<input type="text" id="diag-f5tts-ckpt"
|
||||
placeholder="z.B. aoxo/F5-TTS-German"
|
||||
style="background:#1E1E2E;color:#fff;border:1px solid #2A2A3E;border-radius:6px;padding:6px 10px;font-size:13px;">
|
||||
|
||||
<label style="color:#8888AA;font-size:12px;">
|
||||
Custom Vocab (passend zum Checkpoint, optional):
|
||||
</label>
|
||||
<input type="text" id="diag-f5tts-vocab"
|
||||
placeholder="leer = Default"
|
||||
style="background:#1E1E2E;color:#fff;border:1px solid #2A2A3E;border-radius:6px;padding:6px 10px;font-size:13px;">
|
||||
|
||||
<div style="display:flex;gap:12px;">
|
||||
<div style="flex:1;">
|
||||
<label style="color:#8888AA;font-size:12px;">cfg_strength (1.0 - 5.0):</label>
|
||||
<input type="number" id="diag-f5tts-cfg" step="0.1" min="1" max="5"
|
||||
placeholder="2.5"
|
||||
style="background:#1E1E2E;color:#fff;border:1px solid #2A2A3E;border-radius:6px;padding:6px 10px;font-size:13px;width:100%;box-sizing:border-box;">
|
||||
<div style="color:#666680;font-size:10px;">Hoeher = klebt staerker an Referenz</div>
|
||||
</div>
|
||||
<div style="flex:1;">
|
||||
<label style="color:#8888AA;font-size:12px;">nfe_step (8 - 64):</label>
|
||||
<input type="number" id="diag-f5tts-nfe" step="1" min="8" max="64"
|
||||
placeholder="32"
|
||||
style="background:#1E1E2E;color:#fff;border:1px solid #2A2A3E;border-radius:6px;padding:6px 10px;font-size:13px;width:100%;box-sizing:border-box;">
|
||||
<div style="color:#666680;font-size:10px;">Hoeher = bessere Qualitaet, langsamer</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<button class="btn primary" onclick="sendVoiceConfig()" style="padding:6px 14px;font-size:12px;align-self:flex-start;margin-top:6px;">
|
||||
Anwenden
|
||||
</button>
|
||||
</div>
|
||||
</details>
|
||||
|
||||
<!-- Voice Cloning -->
|
||||
<div style="background:#1E1E2E;border-radius:8px;padding:12px;margin-top:8px;">
|
||||
<div style="color:#0096FF;font-size:13px;font-weight:600;margin-bottom:8px;">Stimme klonen</div>
|
||||
@@ -841,6 +893,16 @@
|
||||
const wSel = document.getElementById('diag-whisper-model');
|
||||
if (wSel) wSel.value = msg.whisperModel;
|
||||
}
|
||||
// F5-TTS Tuning-Felder wiederherstellen (falls gesetzt)
|
||||
const setIfPresent = (id, val) => {
|
||||
const el = document.getElementById(id);
|
||||
if (el && val !== undefined && val !== null && val !== '') el.value = val;
|
||||
};
|
||||
setIfPresent('diag-f5tts-model', msg.f5ttsModel);
|
||||
setIfPresent('diag-f5tts-ckpt', msg.f5ttsCkptFile);
|
||||
setIfPresent('diag-f5tts-vocab', msg.f5ttsVocabFile);
|
||||
setIfPresent('diag-f5tts-cfg', msg.f5ttsCfgStrength);
|
||||
setIfPresent('diag-f5tts-nfe', msg.f5ttsNfeStep);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1570,7 +1632,19 @@
|
||||
const ttsEnabled = document.getElementById('diag-tts-enabled').checked;
|
||||
const xttsVoice = document.getElementById('diag-xtts-voice').value;
|
||||
const whisperModel = document.getElementById('diag-whisper-model').value;
|
||||
send({ action: 'send_voice_config', ttsEnabled, xttsVoice, whisperModel });
|
||||
const f5ttsModel = document.getElementById('diag-f5tts-model')?.value || '';
|
||||
const f5ttsCkptFile = document.getElementById('diag-f5tts-ckpt')?.value || '';
|
||||
const f5ttsVocabFile = document.getElementById('diag-f5tts-vocab')?.value || '';
|
||||
const f5ttsCfgRaw = document.getElementById('diag-f5tts-cfg')?.value || '';
|
||||
const f5ttsNfeRaw = document.getElementById('diag-f5tts-nfe')?.value || '';
|
||||
const f5ttsCfgStrength = f5ttsCfgRaw ? parseFloat(f5ttsCfgRaw) : undefined;
|
||||
const f5ttsNfeStep = f5ttsNfeRaw ? parseInt(f5ttsNfeRaw, 10) : undefined;
|
||||
send({
|
||||
action: 'send_voice_config',
|
||||
ttsEnabled, xttsVoice, whisperModel,
|
||||
f5ttsModel, f5ttsCkptFile, f5ttsVocabFile,
|
||||
f5ttsCfgStrength, f5ttsNfeStep,
|
||||
});
|
||||
const statusEl = document.getElementById('voice-status');
|
||||
if (statusEl && xttsVoice) {
|
||||
statusEl.textContent = `⏳ Stimme "${xttsVoice}" wird geladen...`;
|
||||
|
||||
@@ -1423,6 +1423,25 @@ wss.on("connection", (ws) => {
|
||||
xttsVoice: msg.xttsVoice || "",
|
||||
};
|
||||
if (msg.whisperModel !== undefined) voiceConfig.whisperModel = msg.whisperModel;
|
||||
// F5-TTS Tuning-Felder — leere Strings entfernen damit der Default greift
|
||||
if (msg.f5ttsModel !== undefined) {
|
||||
if (msg.f5ttsModel) voiceConfig.f5ttsModel = msg.f5ttsModel;
|
||||
else delete voiceConfig.f5ttsModel;
|
||||
}
|
||||
if (msg.f5ttsCkptFile !== undefined) {
|
||||
if (msg.f5ttsCkptFile) voiceConfig.f5ttsCkptFile = msg.f5ttsCkptFile;
|
||||
else delete voiceConfig.f5ttsCkptFile;
|
||||
}
|
||||
if (msg.f5ttsVocabFile !== undefined) {
|
||||
if (msg.f5ttsVocabFile) voiceConfig.f5ttsVocabFile = msg.f5ttsVocabFile;
|
||||
else delete voiceConfig.f5ttsVocabFile;
|
||||
}
|
||||
if (msg.f5ttsCfgStrength !== undefined && !isNaN(msg.f5ttsCfgStrength)) {
|
||||
voiceConfig.f5ttsCfgStrength = msg.f5ttsCfgStrength;
|
||||
}
|
||||
if (msg.f5ttsNfeStep !== undefined && !isNaN(msg.f5ttsNfeStep)) {
|
||||
voiceConfig.f5ttsNfeStep = msg.f5ttsNfeStep;
|
||||
}
|
||||
try {
|
||||
fs.mkdirSync("/shared/config", { recursive: true });
|
||||
fs.writeFileSync("/shared/config/voice_config.json", JSON.stringify(voiceConfig, null, 2));
|
||||
|
||||
Reference in New Issue
Block a user