feat(android): natives PcmStreamRecorder-Modul — 16 kHz mono s16le → JS-Events

Neues Native-Modul fuer die Streaming-STT-Pipeline:

  PcmStreamRecorder.start()  — oeffnet AudioRecord 16 kHz mono PCM,
                                VOICE_COMMUNICATION-Source mit AEC/NS,
                                PARTIAL_WAKE_LOCK gegen Doze
  PcmStreamRecorder.stop()   — sauber schliessen
  Event "PcmStreamChunk"     — {pcm: base64-s16le, seq, ts} alle 200ms
  Event "PcmStreamError"     — bei Capture-Crash

200ms-Chunks: gross genug fuer geringen RVS-Overhead, klein genug fuer
granulares Endpointing in der Whisper-Bridge.

Mic-Ownership: darf NICHT parallel zu OpenWakeWord laufen — beide
wollen AudioRecord. Coordination liegt bei audio.ts (stop OWW vor
start, start OWW nach stop), genau wie's bisher mit react-native-
audio-recorder-player gemacht wurde.
This commit is contained in:
2026-05-30 21:33:18 +02:00
parent e99bf0b032
commit 199297a3a1
3 changed files with 263 additions and 0 deletions
@@ -21,6 +21,7 @@ class MainApplication : Application(), ReactApplication {
add(ApkInstallerPackage()) add(ApkInstallerPackage())
add(AudioFocusPackage()) add(AudioFocusPackage())
add(PcmStreamPlayerPackage()) add(PcmStreamPlayerPackage())
add(PcmStreamRecorderPackage())
add(OpenWakeWordPackage()) add(OpenWakeWordPackage())
add(PhoneCallPackage()) add(PhoneCallPackage())
add(BackgroundAudioPackage()) add(BackgroundAudioPackage())
@@ -0,0 +1,246 @@
package com.ariacockpit
import android.Manifest
import android.content.Context
import android.content.pm.PackageManager
import android.media.AudioFormat
import android.media.AudioRecord
import android.media.MediaRecorder
import android.media.audiofx.AcousticEchoCanceler
import android.media.audiofx.AutomaticGainControl
import android.media.audiofx.NoiseSuppressor
import android.os.PowerManager
import android.util.Base64
import android.util.Log
import androidx.core.content.ContextCompat
import com.facebook.react.bridge.Arguments
import com.facebook.react.bridge.Promise
import com.facebook.react.bridge.ReactApplicationContext
import com.facebook.react.bridge.ReactContextBaseJavaModule
import com.facebook.react.bridge.ReactMethod
import com.facebook.react.modules.core.DeviceEventManagerModule
import java.util.concurrent.atomic.AtomicBoolean
/**
* PCM-Streaming-Recorder fuer die Streaming-Whisper-Bridge.
*
* Oeffnet AudioRecord (16 kHz mono s16le, VOICE_COMMUNICATION-Source mit
* automatischer AEC + NS) und feuert ~200ms-Chunks als base64-Event
* "PcmStreamChunk" an die JS-Bridge.
*
* audio.ts schickt die Chunks via RVS direkt an die whisper-bridge die
* dort einen ML-Endpointer laufen laesst — kein dB-VAD-Tuning mehr.
*
* Mic-Ownership: dieser Recorder DARF nicht gleichzeitig mit
* OpenWakeWord laufen — beide wollen AudioRecord vom MIC. Caller
* muss OpenWakeWord.stop() vor start() hier aufrufen und nach stop()
* hier wieder OpenWakeWord.start() — genau wie's audio.ts ohnehin
* macht.
*
* Events:
* "PcmStreamChunk" { pcm: base64-s16le, seq: N, ts: epochMs }
* "PcmStreamError" { error: string }
*/
class PcmStreamRecorderModule(reactContext: ReactApplicationContext) :
ReactContextBaseJavaModule(reactContext) {
override fun getName() = "PcmStreamRecorder"
companion object {
private const val TAG = "PcmStreamRecorder"
private const val SAMPLE_RATE = 16000
// 200ms-Chunks: gross genug fuer wenig RVS-Overhead, klein genug damit
// der Endpointer im Whisper-Bridge granular sieht. 200ms ist auch das
// Whisper-VAD-Frame-Hop — passt also zu downstream.
private const val CHUNK_SAMPLES = 3200 // 200ms @ 16 kHz
private const val BYTES_PER_SAMPLE = 2 // s16
private const val CHUNK_BYTES = CHUNK_SAMPLES * BYTES_PER_SAMPLE
}
private var audioRecord: AudioRecord? = null
private val running = AtomicBoolean(false)
private var captureThread: Thread? = null
private var aec: AcousticEchoCanceler? = null
private var ns: NoiseSuppressor? = null
private var agc: AutomaticGainControl? = null
// PARTIAL_WAKE_LOCK damit der JS-Bridge-Loop weiterlaeuft auch wenn das
// Display aus ist — sonst sammeln sich zwar Chunks in der nativen Queue
// an, aber emit() landet nicht zeitnah in JS und der Whisper-Bridge
// bekommt die Audio-Chunks erst beim App-Foreground-Resume.
private var wakeLock: PowerManager.WakeLock? = null
private var seq: Long = 0L
@ReactMethod
fun start(promise: Promise) {
if (running.get()) {
promise.resolve(true)
return
}
val perm = ContextCompat.checkSelfPermission(
reactApplicationContext, Manifest.permission.RECORD_AUDIO
)
if (perm != PackageManager.PERMISSION_GRANTED) {
promise.reject("NO_MIC_PERMISSION", "RECORD_AUDIO Permission fehlt")
return
}
try {
val minBuf = AudioRecord.getMinBufferSize(
SAMPLE_RATE,
AudioFormat.CHANNEL_IN_MONO,
AudioFormat.ENCODING_PCM_16BIT,
).coerceAtLeast(CHUNK_BYTES * 4) // 4x Chunk-Size als Sicherheit
val record = AudioRecord(
MediaRecorder.AudioSource.VOICE_COMMUNICATION,
SAMPLE_RATE,
AudioFormat.CHANNEL_IN_MONO,
AudioFormat.ENCODING_PCM_16BIT,
minBuf,
)
if (record.state != AudioRecord.STATE_INITIALIZED) {
record.release()
promise.reject("AUDIO_INIT", "AudioRecord nicht initialisiert (Mikro belegt? OpenWakeWord noch aktiv?)")
return
}
audioRecord = record
// AEC/NS/AGC explizit anschalten — manche Geraete liefern's via
// VOICE_COMMUNICATION zwar mit, aber Belt-and-Suspenders.
try {
if (AcousticEchoCanceler.isAvailable()) {
aec = AcousticEchoCanceler.create(record.audioSessionId)?.apply { enabled = true }
}
} catch (e: Exception) { Log.w(TAG, "AEC failed: ${e.message}") }
try {
if (NoiseSuppressor.isAvailable()) {
ns = NoiseSuppressor.create(record.audioSessionId)?.apply { enabled = true }
}
} catch (e: Exception) { Log.w(TAG, "NS failed: ${e.message}") }
try {
if (AutomaticGainControl.isAvailable()) {
agc = AutomaticGainControl.create(record.audioSessionId)?.apply { enabled = true }
}
} catch (e: Exception) { Log.w(TAG, "AGC failed: ${e.message}") }
seq = 0L
running.set(true)
record.startRecording()
try {
val pm = reactApplicationContext.getSystemService(Context.POWER_SERVICE) as PowerManager
wakeLock = pm.newWakeLock(PowerManager.PARTIAL_WAKE_LOCK,
"AriaCockpit:PcmStreamRecord").apply {
setReferenceCounted(false)
acquire(8 * 60 * 60 * 1000L) // 8h Cap
}
} catch (e: Exception) {
Log.w(TAG, "WakeLock acquire fehlgeschlagen: ${e.message}")
}
captureThread = Thread({ captureLoop() }, "PcmStreamRecorderCapture").apply {
isDaemon = true
start()
}
Log.i(TAG, "Recording gestartet (16kHz mono s16le, ${CHUNK_SAMPLES} samples/chunk)")
promise.resolve(true)
} catch (e: Exception) {
Log.e(TAG, "start fehlgeschlagen", e)
running.set(false)
audioRecord?.release()
audioRecord = null
releaseAudioEffects()
releaseWakeLock()
promise.reject("START_FAILED", e.message ?: "Unbekannter Fehler", e)
}
}
@ReactMethod
fun stop(promise: Promise) {
running.set(false)
try {
captureThread?.join(1500)
} catch (_: InterruptedException) {}
captureThread = null
try { audioRecord?.stop() } catch (_: Exception) {}
try { audioRecord?.release() } catch (_: Exception) {}
audioRecord = null
releaseAudioEffects()
releaseWakeLock()
Log.i(TAG, "Recording gestoppt (seq=$seq Chunks gesendet)")
promise.resolve(true)
}
@ReactMethod
fun isRecording(promise: Promise) {
promise.resolve(running.get())
}
private fun captureLoop() {
val buffer = ByteArray(CHUNK_BYTES)
val rec = audioRecord ?: return
try {
while (running.get()) {
var offset = 0
// Solange lesen bis ein voller 200ms-Chunk zusammen ist.
// AudioRecord.read kann weniger als angefordert liefern.
while (offset < CHUNK_BYTES && running.get()) {
val n = rec.read(buffer, offset, CHUNK_BYTES - offset)
if (n <= 0) {
if (!running.get()) break
// Fehlerzustand — kurze Pause, dann weiter probieren
Thread.sleep(5)
continue
}
offset += n
}
if (offset < CHUNK_BYTES) break
val b64 = Base64.encodeToString(buffer, Base64.NO_WRAP)
val ts = System.currentTimeMillis()
val params = Arguments.createMap().apply {
putString("pcm", b64)
// putLong existiert nicht in WritableMap — putDouble fuer ts/seq.
putDouble("seq", seq.toDouble())
putDouble("ts", ts.toDouble())
}
reactApplicationContext
.getJSModule(DeviceEventManagerModule.RCTDeviceEventEmitter::class.java)
.emit("PcmStreamChunk", params)
seq++
}
} catch (e: Exception) {
Log.e(TAG, "captureLoop crashed", e)
try {
val err = Arguments.createMap().apply {
putString("error", e.message ?: "unknown")
}
reactApplicationContext
.getJSModule(DeviceEventManagerModule.RCTDeviceEventEmitter::class.java)
.emit("PcmStreamError", err)
} catch (_: Exception) {}
}
}
private fun releaseAudioEffects() {
try { aec?.release() } catch (_: Exception) {}
try { ns?.release() } catch (_: Exception) {}
try { agc?.release() } catch (_: Exception) {}
aec = null; ns = null; agc = null
}
private fun releaseWakeLock() {
try {
if (wakeLock?.isHeld == true) wakeLock?.release()
} catch (_: Exception) {}
wakeLock = null
}
// Damit RCTEventEmitter den Listener-Lifecycle nicht crasht
@ReactMethod fun addListener(eventName: String) {}
@ReactMethod fun removeListeners(count: Int) {}
}
@@ -0,0 +1,16 @@
package com.ariacockpit
import com.facebook.react.ReactPackage
import com.facebook.react.bridge.NativeModule
import com.facebook.react.bridge.ReactApplicationContext
import com.facebook.react.uimanager.ViewManager
class PcmStreamRecorderPackage : ReactPackage {
override fun createNativeModules(reactContext: ReactApplicationContext): List<NativeModule> {
return listOf(PcmStreamRecorderModule(reactContext))
}
override fun createViewManagers(reactContext: ReactApplicationContext): List<ViewManager<*, *>> {
return emptyList()
}
}