feat: Streaming TTS — PCM-Stream statt WAV-Chunks (Weg A)
Pipeline: XTTS-Server → xtts-bridge → aria-bridge → RVS → App AudioTrack XTTS-Bridge (Gaming-PC): - streamXTTSAsPCM(): liest /tts_to_audio/ Response inkrementell, parst WAV-Header (samplerate/channels), teilt PCM in 8KB-Chunks (~170ms bei 24kHz s16 mono) und sendet jeden als audio_pcm. - Finaler Chunk mit final=true nach letztem Text-Chunk aria-bridge: - audio_pcm Handler leitet payload 1:1 weiter, filled messageId aus requestId → messageId Map falls XTTS-Bridge messageId nicht hatte - Alter xtts_response Pfad bleibt als Legacy-Fallback (WAV) RVS: audio_pcm in ALLOWED_TYPES Android Native: - PcmStreamPlayerModule (Kotlin): AudioTrack MODE_STREAM mit Writer-Thread und BlockingQueue. start(rate, ch) / writeChunk(b64) / end() / stop() - 8x MinBufferSize grosszuegig dimensioniert, glatt auch bei Netz-Aussetzern - Registered im MainApplication via PcmStreamPlayerPackage App JS: - audioService.handlePcmChunk(): erkennt neue Session (messageId-Wechsel), started nativen Stream, cached PCM-Bytes pro Message. Bei final=true Stream sauber schliessen + _savePcmBufferAsWav → WAV-File im tts_cache/<messageId>.wav - _savePcmBufferAsWav: baut 44-byte WAV-Header (PCM s16le, korrekte samplerate/channels), haengt alle gesammelten base64-PCM-Chunks an - stopPlayback beendet auch aktiven PCM-Stream - ChatScreen routet type=audio_pcm an handlePcmChunk, bei final setzt audioPath in der Message Play-Button: falls messageId einen audioPath hat → WAV aus Cache (Sound-basiert), egal ob Original-TTS Piper oder XTTS war. Audio-Focus: - requestDuck() beim Stream-Start, release() bei Stream-Ende - Andere Apps (Spotify etc.) werden leiser waehrend ARIA spricht Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -20,6 +20,7 @@ class MainApplication : Application(), ReactApplication {
|
||||
PackageList(this).packages.apply {
|
||||
add(ApkInstallerPackage())
|
||||
add(AudioFocusPackage())
|
||||
add(PcmStreamPlayerPackage())
|
||||
}
|
||||
|
||||
override fun getJSMainModuleName(): String = "index"
|
||||
|
||||
@@ -0,0 +1,158 @@
|
||||
package com.ariacockpit
|
||||
|
||||
import android.media.AudioAttributes
|
||||
import android.media.AudioFormat
|
||||
import android.media.AudioManager
|
||||
import android.media.AudioTrack
|
||||
import android.util.Base64
|
||||
import android.util.Log
|
||||
import com.facebook.react.bridge.Promise
|
||||
import com.facebook.react.bridge.ReactApplicationContext
|
||||
import com.facebook.react.bridge.ReactContextBaseJavaModule
|
||||
import com.facebook.react.bridge.ReactMethod
|
||||
import java.util.concurrent.LinkedBlockingQueue
|
||||
|
||||
/**
|
||||
* Streamt PCM-s16le Audio direkt via AudioTrack MODE_STREAM.
|
||||
*
|
||||
* Flow:
|
||||
* JS: start(sampleRate, channels) → öffnet AudioTrack und startet Writer-Thread
|
||||
* JS: writeChunk(base64) → dekodiert, queued, Writer schreibt non-blocking
|
||||
* JS: end() → wartet bis Queue leer, schließt AudioTrack
|
||||
* JS: stop() → Hart stoppen, Queue leeren (Cancel)
|
||||
*
|
||||
* Vorteil gegenüber Sound-File-Queue:
|
||||
* - Keine Gap zwischen Chunks (AudioTrack puffert intern)
|
||||
* - Erste Samples beginnen zu spielen sobald der erste Chunk da ist
|
||||
* - Kein WAV-Header-Parsing pro Chunk
|
||||
*/
|
||||
class PcmStreamPlayerModule(reactContext: ReactApplicationContext) : ReactContextBaseJavaModule(reactContext) {
|
||||
companion object {
|
||||
private const val TAG = "PcmStreamPlayer"
|
||||
}
|
||||
|
||||
override fun getName() = "PcmStreamPlayer"
|
||||
|
||||
private var track: AudioTrack? = null
|
||||
private val queue = LinkedBlockingQueue<ByteArray>()
|
||||
private var writerThread: Thread? = null
|
||||
@Volatile private var writerShouldStop = false
|
||||
@Volatile private var endRequested = false
|
||||
|
||||
// ── Lifecycle ──
|
||||
|
||||
@ReactMethod
|
||||
fun start(sampleRate: Int, channels: Int, promise: Promise) {
|
||||
try {
|
||||
// Alte Session beenden falls vorhanden
|
||||
stopInternal()
|
||||
|
||||
val channelConfig = if (channels == 2) AudioFormat.CHANNEL_OUT_STEREO else AudioFormat.CHANNEL_OUT_MONO
|
||||
val encoding = AudioFormat.ENCODING_PCM_16BIT
|
||||
val minBuf = AudioTrack.getMinBufferSize(sampleRate, channelConfig, encoding)
|
||||
// Etwas grosszuegiger Buffer: 8x MinSize (ca. 200-400ms bei 24kHz) — glatt auch bei kleinen Netzwerk-Aussetzern
|
||||
val bufferSize = (minBuf * 8).coerceAtLeast(32 * 1024)
|
||||
|
||||
val newTrack = AudioTrack.Builder()
|
||||
.setAudioAttributes(
|
||||
AudioAttributes.Builder()
|
||||
.setUsage(AudioAttributes.USAGE_ASSISTANT)
|
||||
.setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
|
||||
.build(),
|
||||
)
|
||||
.setAudioFormat(
|
||||
AudioFormat.Builder()
|
||||
.setSampleRate(sampleRate)
|
||||
.setChannelMask(channelConfig)
|
||||
.setEncoding(encoding)
|
||||
.build(),
|
||||
)
|
||||
.setBufferSizeInBytes(bufferSize)
|
||||
.setTransferMode(AudioTrack.MODE_STREAM)
|
||||
.build()
|
||||
|
||||
newTrack.play()
|
||||
track = newTrack
|
||||
queue.clear()
|
||||
writerShouldStop = false
|
||||
endRequested = false
|
||||
|
||||
writerThread = Thread({
|
||||
val t = track ?: return@Thread
|
||||
try {
|
||||
while (!writerShouldStop) {
|
||||
val data = queue.poll(50, java.util.concurrent.TimeUnit.MILLISECONDS) ?: run {
|
||||
if (endRequested) return@Thread
|
||||
null
|
||||
} ?: continue
|
||||
var offset = 0
|
||||
while (offset < data.size && !writerShouldStop) {
|
||||
val written = t.write(data, offset, data.size - offset)
|
||||
if (written <= 0) break
|
||||
offset += written
|
||||
}
|
||||
}
|
||||
} catch (e: Exception) {
|
||||
Log.w(TAG, "Writer-Thread Fehler: ${e.message}")
|
||||
} finally {
|
||||
try { t.stop() } catch (_: Exception) {}
|
||||
try { t.release() } catch (_: Exception) {}
|
||||
}
|
||||
}, "PcmStreamWriter").apply { start() }
|
||||
|
||||
Log.i(TAG, "Stream gestartet: ${sampleRate}Hz ch=$channels buf=${bufferSize}B")
|
||||
promise.resolve(true)
|
||||
} catch (e: Exception) {
|
||||
Log.e(TAG, "start fehlgeschlagen", e)
|
||||
promise.reject("START_FAILED", e.message, e)
|
||||
}
|
||||
}
|
||||
|
||||
@ReactMethod
|
||||
fun writeChunk(base64Pcm: String, promise: Promise) {
|
||||
try {
|
||||
if (base64Pcm.isEmpty()) {
|
||||
promise.resolve(true)
|
||||
return
|
||||
}
|
||||
val bytes = Base64.decode(base64Pcm, Base64.DEFAULT)
|
||||
queue.put(bytes)
|
||||
promise.resolve(true)
|
||||
} catch (e: Exception) {
|
||||
promise.reject("WRITE_FAILED", e.message, e)
|
||||
}
|
||||
}
|
||||
|
||||
/** Signalisiert: keine weiteren Chunks. Writer wartet auf Queue-Abschluss, dann stoppt. */
|
||||
@ReactMethod
|
||||
fun end(promise: Promise) {
|
||||
endRequested = true
|
||||
promise.resolve(true)
|
||||
}
|
||||
|
||||
/** Harter Stop (Cancel) — Queue verwerfen. */
|
||||
@ReactMethod
|
||||
fun stop(promise: Promise) {
|
||||
stopInternal()
|
||||
promise.resolve(true)
|
||||
}
|
||||
|
||||
private fun stopInternal() {
|
||||
writerShouldStop = true
|
||||
endRequested = true
|
||||
queue.clear()
|
||||
writerThread?.interrupt()
|
||||
writerThread = null
|
||||
val t = track
|
||||
if (t != null) {
|
||||
try { t.stop() } catch (_: Exception) {}
|
||||
try { t.release() } catch (_: Exception) {}
|
||||
}
|
||||
track = null
|
||||
}
|
||||
|
||||
override fun onCatalystInstanceDestroy() {
|
||||
stopInternal()
|
||||
super.onCatalystInstanceDestroy()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
package com.ariacockpit
|
||||
|
||||
import com.facebook.react.ReactPackage
|
||||
import com.facebook.react.bridge.NativeModule
|
||||
import com.facebook.react.bridge.ReactApplicationContext
|
||||
import com.facebook.react.uimanager.ViewManager
|
||||
|
||||
class PcmStreamPlayerPackage : ReactPackage {
|
||||
override fun createNativeModules(reactContext: ReactApplicationContext): List<NativeModule> {
|
||||
return listOf(PcmStreamPlayerModule(reactContext))
|
||||
}
|
||||
|
||||
override fun createViewManagers(reactContext: ReactApplicationContext): List<ViewManager<*, *>> {
|
||||
return emptyList()
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user