diff --git a/xtts/f5tts/bridge.py b/xtts/f5tts/bridge.py index 15f39d4..0d90273 100644 --- a/xtts/f5tts/bridge.py +++ b/xtts/f5tts/bridge.py @@ -268,7 +268,15 @@ def split_sentences(text: str, max_len: int = 350) -> list[str]: def float_to_pcm16(wav: np.ndarray) -> bytes: - """Float32 (-1..+1) → int16 little-endian bytes.""" + """Float32 (-1..+1) → int16 little-endian bytes. + + F5-TTS generiert gelegentlich NaN/Inf bei Instabilitaeten — ohne sanitize + waere der Cast zu int16 undefiniert (RuntimeWarning + kaputter Sound). + """ + nan_count = int(np.isnan(wav).sum() + np.isinf(wav).sum()) + if nan_count > 0: + logger.warning("F5-TTS Output enthaelt %d NaN/Inf samples — ersetze mit 0", nan_count) + wav = np.nan_to_num(wav, nan=0.0, posinf=1.0, neginf=-1.0) wav = np.clip(wav, -1.0, 1.0) pcm = (wav * 32767.0).astype(np.int16) return pcm.tobytes()