feat: Streaming TTS — PCM-Stream statt WAV-Chunks (Weg A)
Pipeline: XTTS-Server → xtts-bridge → aria-bridge → RVS → App AudioTrack XTTS-Bridge (Gaming-PC): - streamXTTSAsPCM(): liest /tts_to_audio/ Response inkrementell, parst WAV-Header (samplerate/channels), teilt PCM in 8KB-Chunks (~170ms bei 24kHz s16 mono) und sendet jeden als audio_pcm. - Finaler Chunk mit final=true nach letztem Text-Chunk aria-bridge: - audio_pcm Handler leitet payload 1:1 weiter, filled messageId aus requestId → messageId Map falls XTTS-Bridge messageId nicht hatte - Alter xtts_response Pfad bleibt als Legacy-Fallback (WAV) RVS: audio_pcm in ALLOWED_TYPES Android Native: - PcmStreamPlayerModule (Kotlin): AudioTrack MODE_STREAM mit Writer-Thread und BlockingQueue. start(rate, ch) / writeChunk(b64) / end() / stop() - 8x MinBufferSize grosszuegig dimensioniert, glatt auch bei Netz-Aussetzern - Registered im MainApplication via PcmStreamPlayerPackage App JS: - audioService.handlePcmChunk(): erkennt neue Session (messageId-Wechsel), started nativen Stream, cached PCM-Bytes pro Message. Bei final=true Stream sauber schliessen + _savePcmBufferAsWav → WAV-File im tts_cache/<messageId>.wav - _savePcmBufferAsWav: baut 44-byte WAV-Header (PCM s16le, korrekte samplerate/channels), haengt alle gesammelten base64-PCM-Chunks an - stopPlayback beendet auch aktiven PCM-Stream - ChatScreen routet type=audio_pcm an handlePcmChunk, bei final setzt audioPath in der Message Play-Button: falls messageId einen audioPath hat → WAV aus Cache (Sound-basiert), egal ob Original-TTS Piper oder XTTS war. Audio-Focus: - requestDuck() beim Stream-Start, release() bei Stream-Ende - Andere Apps (Spotify etc.) werden leiser waehrend ARIA spricht Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -274,6 +274,20 @@ const ChatScreen: React.FC = () => {
|
||||
}
|
||||
}
|
||||
|
||||
// XTTS PCM-Stream: direkt an AudioTrack, bei final WAV-Cache schreiben
|
||||
if (message.type === ('audio_pcm' as any)) {
|
||||
const p = message.payload as any;
|
||||
const refId = (p.messageId as string) || '';
|
||||
audioService.handlePcmChunk(p).then((audioPath: any) => {
|
||||
// Wenn final + Cache-Pfad zurueckkam, Message aktualisieren
|
||||
if (p.final && audioPath && refId) {
|
||||
setMessages(prev => prev.map(m =>
|
||||
m.messageId === refId ? { ...m, audioPath } : m
|
||||
));
|
||||
}
|
||||
}).catch(() => {});
|
||||
}
|
||||
|
||||
// Thinking-Indicator Status von der Bridge
|
||||
if (message.type === 'agent_activity') {
|
||||
const activity = (message.payload.activity as string) || 'idle';
|
||||
|
||||
@@ -16,13 +16,36 @@ import AudioRecorderPlayer, {
|
||||
OutputFormatAndroidType,
|
||||
} from 'react-native-audio-recorder-player';
|
||||
|
||||
// Base64-Encoder fuer Binary-Strings (Header-Bytes → Base64)
|
||||
const B64_CHARS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/';
|
||||
function btoaSafe(bin: string): string {
|
||||
let out = '';
|
||||
const len = bin.length;
|
||||
for (let i = 0; i < len; i += 3) {
|
||||
const b1 = bin.charCodeAt(i) & 0xff;
|
||||
const b2 = i + 1 < len ? bin.charCodeAt(i + 1) & 0xff : 0;
|
||||
const b3 = i + 2 < len ? bin.charCodeAt(i + 2) & 0xff : 0;
|
||||
out += B64_CHARS[b1 >> 2];
|
||||
out += B64_CHARS[((b1 & 0x03) << 4) | (b2 >> 4)];
|
||||
out += i + 1 < len ? B64_CHARS[((b2 & 0x0f) << 2) | (b3 >> 6)] : '=';
|
||||
out += i + 2 < len ? B64_CHARS[b3 & 0x3f] : '=';
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
// Native Module fuer Audio-Focus (Ducking/Muten anderer Apps)
|
||||
const { AudioFocus } = NativeModules as {
|
||||
const { AudioFocus, PcmStreamPlayer } = NativeModules as {
|
||||
AudioFocus?: {
|
||||
requestDuck: () => Promise<boolean>;
|
||||
requestExclusive: () => Promise<boolean>;
|
||||
release: () => Promise<boolean>;
|
||||
};
|
||||
PcmStreamPlayer?: {
|
||||
start: (sampleRate: number, channels: number) => Promise<boolean>;
|
||||
writeChunk: (base64Pcm: string) => Promise<boolean>;
|
||||
end: () => Promise<boolean>;
|
||||
stop: () => Promise<boolean>;
|
||||
};
|
||||
};
|
||||
|
||||
// --- Typen ---
|
||||
@@ -79,6 +102,15 @@ class AudioService {
|
||||
private speechDetected: boolean = false;
|
||||
private speechStartTime: number = 0;
|
||||
|
||||
// PCM-Stream (XTTS): aktive Session + Cache-Puffer pro messageId
|
||||
private pcmStreamActive: boolean = false;
|
||||
private pcmMessageId: string = '';
|
||||
private pcmSampleRate: number = 24000;
|
||||
private pcmChannels: number = 1;
|
||||
private pcmBuffer: string[] = []; // base64-chunks zum spaeteren WAV-Build
|
||||
private pcmBytesCollected: number = 0;
|
||||
private readonly PCM_MAX_CACHE_BYTES = 30 * 1024 * 1024; // 30MB
|
||||
|
||||
// VAD State
|
||||
private vadEnabled: boolean = false;
|
||||
private lastSpeechTime: number = 0;
|
||||
@@ -303,6 +335,141 @@ class AudioService {
|
||||
}
|
||||
}
|
||||
|
||||
/** Einen PCM-Chunk aus einer audio_pcm Nachricht empfangen und spielen/cachen.
|
||||
* Gibt bei final=true den Cache-Pfad zurueck (file://) oder '' wenn nicht gecached. */
|
||||
async handlePcmChunk(payload: {
|
||||
base64: string;
|
||||
sampleRate?: number;
|
||||
channels?: number;
|
||||
messageId?: string;
|
||||
chunk?: number;
|
||||
final?: boolean;
|
||||
}): Promise<string> {
|
||||
if (!PcmStreamPlayer) {
|
||||
console.warn('[Audio] PcmStreamPlayer Native Module nicht verfuegbar');
|
||||
return '';
|
||||
}
|
||||
|
||||
const messageId = payload.messageId || '';
|
||||
const sampleRate = payload.sampleRate || 24000;
|
||||
const channels = payload.channels || 1;
|
||||
const base64 = payload.base64 || '';
|
||||
const isFinal = !!payload.final;
|
||||
|
||||
// Neuer Stream? (messageId Wechsel oder nicht aktiv)
|
||||
if (!this.pcmStreamActive || this.pcmMessageId !== messageId) {
|
||||
// Vorherigen Stream clean beenden (falls da)
|
||||
if (this.pcmStreamActive) {
|
||||
try { await PcmStreamPlayer.stop(); } catch {}
|
||||
// Altes Buffer verwerfen (wurde nicht final — neue Message kam dazwischen)
|
||||
this.pcmBuffer = [];
|
||||
this.pcmBytesCollected = 0;
|
||||
}
|
||||
this.pcmStreamActive = true;
|
||||
this.pcmMessageId = messageId;
|
||||
this.pcmSampleRate = sampleRate;
|
||||
this.pcmChannels = channels;
|
||||
this.pcmBuffer = [];
|
||||
this.pcmBytesCollected = 0;
|
||||
try {
|
||||
await PcmStreamPlayer.start(sampleRate, channels);
|
||||
} catch (err) {
|
||||
console.error('[Audio] PcmStreamPlayer.start fehlgeschlagen:', err);
|
||||
this.pcmStreamActive = false;
|
||||
return '';
|
||||
}
|
||||
// Audio-Focus: andere Apps ducken
|
||||
AudioFocus?.requestDuck().catch(() => {});
|
||||
}
|
||||
|
||||
// Chunk abspielen + cachen
|
||||
if (base64) {
|
||||
try { await PcmStreamPlayer.writeChunk(base64); } catch (err) { console.warn('[Audio] writeChunk', err); }
|
||||
// Buffer fuer Cache sammeln (wenn noch nicht zu gross)
|
||||
if (messageId && this.pcmBytesCollected < this.PCM_MAX_CACHE_BYTES) {
|
||||
this.pcmBuffer.push(base64);
|
||||
// 4 base64-chars ≈ 3 bytes — grobe Schaetzung
|
||||
this.pcmBytesCollected += Math.floor(base64.length * 0.75);
|
||||
}
|
||||
}
|
||||
|
||||
if (isFinal) {
|
||||
// Stream sauber beenden (spielt noch bis Puffer leer ist)
|
||||
try { await PcmStreamPlayer.end(); } catch {}
|
||||
this.pcmStreamActive = false;
|
||||
AudioFocus?.release().catch(() => {});
|
||||
|
||||
// Aus gesammelten PCM-Chunks eine WAV-Datei fuer Replay bauen
|
||||
if (messageId && this.pcmBuffer.length > 0) {
|
||||
const audioPath = await this._savePcmBufferAsWav(messageId);
|
||||
this.pcmBuffer = [];
|
||||
this.pcmBytesCollected = 0;
|
||||
this.pcmMessageId = '';
|
||||
return audioPath;
|
||||
}
|
||||
this.pcmMessageId = '';
|
||||
}
|
||||
return '';
|
||||
}
|
||||
|
||||
/** Gesammelte PCM-Chunks als WAV speichern. Gibt file:// Pfad zurueck. */
|
||||
private async _savePcmBufferAsWav(messageId: string): Promise<string> {
|
||||
try {
|
||||
const dir = `${RNFS.DocumentDirectoryPath}/tts_cache`;
|
||||
await RNFS.mkdir(dir).catch(() => {});
|
||||
const path = `${dir}/${messageId}.wav`;
|
||||
|
||||
// WAV-Header fuer PCM s16le
|
||||
const sampleRate = this.pcmSampleRate;
|
||||
const channels = this.pcmChannels;
|
||||
const bitsPerSample = 16;
|
||||
const byteRate = sampleRate * channels * bitsPerSample / 8;
|
||||
const blockAlign = channels * bitsPerSample / 8;
|
||||
const dataSize = this.pcmBytesCollected;
|
||||
const fileSize = 36 + dataSize;
|
||||
|
||||
// Header als Base64 (44 bytes)
|
||||
const header = new Uint8Array(44);
|
||||
const dv = new DataView(header.buffer);
|
||||
// "RIFF"
|
||||
header[0] = 0x52; header[1] = 0x49; header[2] = 0x46; header[3] = 0x46;
|
||||
dv.setUint32(4, fileSize, true);
|
||||
// "WAVE"
|
||||
header[8] = 0x57; header[9] = 0x41; header[10] = 0x56; header[11] = 0x45;
|
||||
// "fmt "
|
||||
header[12] = 0x66; header[13] = 0x6d; header[14] = 0x74; header[15] = 0x20;
|
||||
dv.setUint32(16, 16, true); // fmt chunk size
|
||||
dv.setUint16(20, 1, true); // PCM format
|
||||
dv.setUint16(22, channels, true);
|
||||
dv.setUint32(24, sampleRate, true);
|
||||
dv.setUint32(28, byteRate, true);
|
||||
dv.setUint16(32, blockAlign, true);
|
||||
dv.setUint16(34, bitsPerSample, true);
|
||||
// "data"
|
||||
header[36] = 0x64; header[37] = 0x61; header[38] = 0x74; header[39] = 0x61;
|
||||
dv.setUint32(40, dataSize, true);
|
||||
|
||||
// Header als base64
|
||||
let headerB64 = '';
|
||||
const chunk = 1024;
|
||||
for (let i = 0; i < header.length; i += chunk) {
|
||||
headerB64 += String.fromCharCode(...Array.from(header.slice(i, i + chunk)));
|
||||
}
|
||||
headerB64 = btoaSafe(headerB64);
|
||||
|
||||
// Datei schreiben: Header + alle PCM-Chunks
|
||||
await RNFS.writeFile(path, headerB64, 'base64');
|
||||
for (const b64 of this.pcmBuffer) {
|
||||
await RNFS.appendFile(path, b64, 'base64');
|
||||
}
|
||||
console.log(`[Audio] PCM-Cache geschrieben: ${path} (${(dataSize / 1024).toFixed(0)}KB, ${this.pcmBuffer.length} chunks)`);
|
||||
return `file://${path}`;
|
||||
} catch (err) {
|
||||
console.warn('[Audio] _savePcmBufferAsWav fehlgeschlagen:', err);
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
/** Audio aus lokaler Datei (file:// Pfad) in die Queue und abspielen. */
|
||||
async playFromPath(filePath: string): Promise<void> {
|
||||
if (!filePath) return;
|
||||
@@ -419,6 +586,14 @@ class AudioService {
|
||||
if (this.preloadedPath) RNFS.unlink(this.preloadedPath).catch(() => {});
|
||||
this.preloadedPath = '';
|
||||
}
|
||||
// PCM-Stream ebenfalls hart stoppen (Cancel/Abbruch)
|
||||
if (this.pcmStreamActive) {
|
||||
PcmStreamPlayer?.stop().catch(() => {});
|
||||
this.pcmStreamActive = false;
|
||||
this.pcmBuffer = [];
|
||||
this.pcmBytesCollected = 0;
|
||||
this.pcmMessageId = '';
|
||||
}
|
||||
// Audio-Focus freigeben
|
||||
AudioFocus?.release().catch(() => {});
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user