Compare commits
21 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 97b6ea1b3e | |||
| 94ee0455a2 | |||
| 0bf6d49432 | |||
| 493cba36a2 | |||
| a68827fb38 | |||
| 11ca316e4e | |||
| be1d2e950a | |||
| 199297a3a1 | |||
| e99bf0b032 | |||
| 41999c2304 | |||
| 095c1e2d70 | |||
| 0145179aca | |||
| c2475ffef6 | |||
| 98982fea2f | |||
| 356f8b3171 | |||
| b4115bb345 | |||
| 02cac99ef9 | |||
| 2940ce0075 | |||
| d78b668e31 | |||
| a9115699db | |||
| f2bfd4bbc6 |
@@ -79,8 +79,8 @@ android {
|
|||||||
applicationId "com.ariacockpit"
|
applicationId "com.ariacockpit"
|
||||||
minSdkVersion rootProject.ext.minSdkVersion
|
minSdkVersion rootProject.ext.minSdkVersion
|
||||||
targetSdkVersion rootProject.ext.targetSdkVersion
|
targetSdkVersion rootProject.ext.targetSdkVersion
|
||||||
versionCode 10605
|
versionCode 10801
|
||||||
versionName "0.1.6.5"
|
versionName "0.1.8.1"
|
||||||
// Fallback fuer Libraries mit Product Flavors
|
// Fallback fuer Libraries mit Product Flavors
|
||||||
missingDimensionStrategy 'react-native-camera', 'general'
|
missingDimensionStrategy 'react-native-camera', 'general'
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,13 +9,20 @@
|
|||||||
<!-- Optional: GPS-Position der Frage anhaengen (nur wenn User in Settings aktiviert) -->
|
<!-- Optional: GPS-Position der Frage anhaengen (nur wenn User in Settings aktiviert) -->
|
||||||
<uses-permission android:name="android.permission.ACCESS_COARSE_LOCATION" />
|
<uses-permission android:name="android.permission.ACCESS_COARSE_LOCATION" />
|
||||||
<uses-permission android:name="android.permission.ACCESS_FINE_LOCATION" />
|
<uses-permission android:name="android.permission.ACCESS_FINE_LOCATION" />
|
||||||
|
<!-- Background-Location ist OPT-IN (Settings → GPS auch im Hintergrund).
|
||||||
|
Muss vom User explizit in Android-Einstellungen auf "Immer erlauben"
|
||||||
|
gesetzt werden — kann nicht ueber den normalen Permission-Dialog
|
||||||
|
angefordert werden (Android 10+). Default: aus. -->
|
||||||
|
<uses-permission android:name="android.permission.ACCESS_BACKGROUND_LOCATION" />
|
||||||
<!-- Foreground-Service damit TTS auch bei minimierter App weiterlaeuft.
|
<!-- Foreground-Service damit TTS auch bei minimierter App weiterlaeuft.
|
||||||
FOREGROUND_SERVICE_MICROPHONE ist Pflicht ab Android 14 wenn der
|
FOREGROUND_SERVICE_MICROPHONE ist Pflicht ab Android 14 wenn der
|
||||||
Service waehrend des Backgrounds aufs Mikro zugreift (Wake-Word,
|
Service waehrend des Backgrounds aufs Mikro zugreift (Wake-Word,
|
||||||
Aufnahme im Gespraechsmodus). -->
|
Aufnahme im Gespraechsmodus). LOCATION wird nur aktiv wenn der
|
||||||
|
User Background-GPS in Settings einschaltet. -->
|
||||||
<uses-permission android:name="android.permission.FOREGROUND_SERVICE" />
|
<uses-permission android:name="android.permission.FOREGROUND_SERVICE" />
|
||||||
<uses-permission android:name="android.permission.FOREGROUND_SERVICE_MEDIA_PLAYBACK" />
|
<uses-permission android:name="android.permission.FOREGROUND_SERVICE_MEDIA_PLAYBACK" />
|
||||||
<uses-permission android:name="android.permission.FOREGROUND_SERVICE_MICROPHONE" />
|
<uses-permission android:name="android.permission.FOREGROUND_SERVICE_MICROPHONE" />
|
||||||
|
<uses-permission android:name="android.permission.FOREGROUND_SERVICE_LOCATION" />
|
||||||
<uses-permission android:name="android.permission.POST_NOTIFICATIONS" />
|
<uses-permission android:name="android.permission.POST_NOTIFICATIONS" />
|
||||||
<!-- WAKE_LOCK damit Wake-Word + JS-Bridge auch bei aus-Display und Doze
|
<!-- WAKE_LOCK damit Wake-Word + JS-Bridge auch bei aus-Display und Doze
|
||||||
arbeiten: ohne Lock pausiert Android die CPU, Native-AudioRecord
|
arbeiten: ohne Lock pausiert Android die CPU, Native-AudioRecord
|
||||||
@@ -57,6 +64,6 @@
|
|||||||
<service
|
<service
|
||||||
android:name=".AriaPlaybackService"
|
android:name=".AriaPlaybackService"
|
||||||
android:exported="false"
|
android:exported="false"
|
||||||
android:foregroundServiceType="mediaPlayback|microphone" />
|
android:foregroundServiceType="mediaPlayback|microphone|location" />
|
||||||
</application>
|
</application>
|
||||||
</manifest>
|
</manifest>
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ class MainApplication : Application(), ReactApplication {
|
|||||||
add(ApkInstallerPackage())
|
add(ApkInstallerPackage())
|
||||||
add(AudioFocusPackage())
|
add(AudioFocusPackage())
|
||||||
add(PcmStreamPlayerPackage())
|
add(PcmStreamPlayerPackage())
|
||||||
|
add(PcmStreamRecorderPackage())
|
||||||
add(OpenWakeWordPackage())
|
add(OpenWakeWordPackage())
|
||||||
add(PhoneCallPackage())
|
add(PhoneCallPackage())
|
||||||
add(BackgroundAudioPackage())
|
add(BackgroundAudioPackage())
|
||||||
|
|||||||
@@ -0,0 +1,246 @@
|
|||||||
|
package com.ariacockpit
|
||||||
|
|
||||||
|
import android.Manifest
|
||||||
|
import android.content.Context
|
||||||
|
import android.content.pm.PackageManager
|
||||||
|
import android.media.AudioFormat
|
||||||
|
import android.media.AudioRecord
|
||||||
|
import android.media.MediaRecorder
|
||||||
|
import android.media.audiofx.AcousticEchoCanceler
|
||||||
|
import android.media.audiofx.AutomaticGainControl
|
||||||
|
import android.media.audiofx.NoiseSuppressor
|
||||||
|
import android.os.PowerManager
|
||||||
|
import android.util.Base64
|
||||||
|
import android.util.Log
|
||||||
|
import androidx.core.content.ContextCompat
|
||||||
|
import com.facebook.react.bridge.Arguments
|
||||||
|
import com.facebook.react.bridge.Promise
|
||||||
|
import com.facebook.react.bridge.ReactApplicationContext
|
||||||
|
import com.facebook.react.bridge.ReactContextBaseJavaModule
|
||||||
|
import com.facebook.react.bridge.ReactMethod
|
||||||
|
import com.facebook.react.modules.core.DeviceEventManagerModule
|
||||||
|
import java.util.concurrent.atomic.AtomicBoolean
|
||||||
|
|
||||||
|
/**
|
||||||
|
* PCM-Streaming-Recorder fuer die Streaming-Whisper-Bridge.
|
||||||
|
*
|
||||||
|
* Oeffnet AudioRecord (16 kHz mono s16le, VOICE_COMMUNICATION-Source mit
|
||||||
|
* automatischer AEC + NS) und feuert ~200ms-Chunks als base64-Event
|
||||||
|
* "PcmStreamChunk" an die JS-Bridge.
|
||||||
|
*
|
||||||
|
* audio.ts schickt die Chunks via RVS direkt an die whisper-bridge die
|
||||||
|
* dort einen ML-Endpointer laufen laesst — kein dB-VAD-Tuning mehr.
|
||||||
|
*
|
||||||
|
* Mic-Ownership: dieser Recorder DARF nicht gleichzeitig mit
|
||||||
|
* OpenWakeWord laufen — beide wollen AudioRecord vom MIC. Caller
|
||||||
|
* muss OpenWakeWord.stop() vor start() hier aufrufen und nach stop()
|
||||||
|
* hier wieder OpenWakeWord.start() — genau wie's audio.ts ohnehin
|
||||||
|
* macht.
|
||||||
|
*
|
||||||
|
* Events:
|
||||||
|
* "PcmStreamChunk" { pcm: base64-s16le, seq: N, ts: epochMs }
|
||||||
|
* "PcmStreamError" { error: string }
|
||||||
|
*/
|
||||||
|
class PcmStreamRecorderModule(reactContext: ReactApplicationContext) :
|
||||||
|
ReactContextBaseJavaModule(reactContext) {
|
||||||
|
|
||||||
|
override fun getName() = "PcmStreamRecorder"
|
||||||
|
|
||||||
|
companion object {
|
||||||
|
private const val TAG = "PcmStreamRecorder"
|
||||||
|
private const val SAMPLE_RATE = 16000
|
||||||
|
// 200ms-Chunks: gross genug fuer wenig RVS-Overhead, klein genug damit
|
||||||
|
// der Endpointer im Whisper-Bridge granular sieht. 200ms ist auch das
|
||||||
|
// Whisper-VAD-Frame-Hop — passt also zu downstream.
|
||||||
|
private const val CHUNK_SAMPLES = 3200 // 200ms @ 16 kHz
|
||||||
|
private const val BYTES_PER_SAMPLE = 2 // s16
|
||||||
|
private const val CHUNK_BYTES = CHUNK_SAMPLES * BYTES_PER_SAMPLE
|
||||||
|
}
|
||||||
|
|
||||||
|
private var audioRecord: AudioRecord? = null
|
||||||
|
private val running = AtomicBoolean(false)
|
||||||
|
private var captureThread: Thread? = null
|
||||||
|
|
||||||
|
private var aec: AcousticEchoCanceler? = null
|
||||||
|
private var ns: NoiseSuppressor? = null
|
||||||
|
private var agc: AutomaticGainControl? = null
|
||||||
|
|
||||||
|
// PARTIAL_WAKE_LOCK damit der JS-Bridge-Loop weiterlaeuft auch wenn das
|
||||||
|
// Display aus ist — sonst sammeln sich zwar Chunks in der nativen Queue
|
||||||
|
// an, aber emit() landet nicht zeitnah in JS und der Whisper-Bridge
|
||||||
|
// bekommt die Audio-Chunks erst beim App-Foreground-Resume.
|
||||||
|
private var wakeLock: PowerManager.WakeLock? = null
|
||||||
|
|
||||||
|
private var seq: Long = 0L
|
||||||
|
|
||||||
|
@ReactMethod
|
||||||
|
fun start(promise: Promise) {
|
||||||
|
if (running.get()) {
|
||||||
|
promise.resolve(true)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
val perm = ContextCompat.checkSelfPermission(
|
||||||
|
reactApplicationContext, Manifest.permission.RECORD_AUDIO
|
||||||
|
)
|
||||||
|
if (perm != PackageManager.PERMISSION_GRANTED) {
|
||||||
|
promise.reject("NO_MIC_PERMISSION", "RECORD_AUDIO Permission fehlt")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
val minBuf = AudioRecord.getMinBufferSize(
|
||||||
|
SAMPLE_RATE,
|
||||||
|
AudioFormat.CHANNEL_IN_MONO,
|
||||||
|
AudioFormat.ENCODING_PCM_16BIT,
|
||||||
|
).coerceAtLeast(CHUNK_BYTES * 4) // 4x Chunk-Size als Sicherheit
|
||||||
|
|
||||||
|
val record = AudioRecord(
|
||||||
|
MediaRecorder.AudioSource.VOICE_COMMUNICATION,
|
||||||
|
SAMPLE_RATE,
|
||||||
|
AudioFormat.CHANNEL_IN_MONO,
|
||||||
|
AudioFormat.ENCODING_PCM_16BIT,
|
||||||
|
minBuf,
|
||||||
|
)
|
||||||
|
if (record.state != AudioRecord.STATE_INITIALIZED) {
|
||||||
|
record.release()
|
||||||
|
promise.reject("AUDIO_INIT", "AudioRecord nicht initialisiert (Mikro belegt? OpenWakeWord noch aktiv?)")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
audioRecord = record
|
||||||
|
|
||||||
|
// AEC/NS/AGC explizit anschalten — manche Geraete liefern's via
|
||||||
|
// VOICE_COMMUNICATION zwar mit, aber Belt-and-Suspenders.
|
||||||
|
try {
|
||||||
|
if (AcousticEchoCanceler.isAvailable()) {
|
||||||
|
aec = AcousticEchoCanceler.create(record.audioSessionId)?.apply { enabled = true }
|
||||||
|
}
|
||||||
|
} catch (e: Exception) { Log.w(TAG, "AEC failed: ${e.message}") }
|
||||||
|
try {
|
||||||
|
if (NoiseSuppressor.isAvailable()) {
|
||||||
|
ns = NoiseSuppressor.create(record.audioSessionId)?.apply { enabled = true }
|
||||||
|
}
|
||||||
|
} catch (e: Exception) { Log.w(TAG, "NS failed: ${e.message}") }
|
||||||
|
try {
|
||||||
|
if (AutomaticGainControl.isAvailable()) {
|
||||||
|
agc = AutomaticGainControl.create(record.audioSessionId)?.apply { enabled = true }
|
||||||
|
}
|
||||||
|
} catch (e: Exception) { Log.w(TAG, "AGC failed: ${e.message}") }
|
||||||
|
|
||||||
|
seq = 0L
|
||||||
|
running.set(true)
|
||||||
|
record.startRecording()
|
||||||
|
|
||||||
|
try {
|
||||||
|
val pm = reactApplicationContext.getSystemService(Context.POWER_SERVICE) as PowerManager
|
||||||
|
wakeLock = pm.newWakeLock(PowerManager.PARTIAL_WAKE_LOCK,
|
||||||
|
"AriaCockpit:PcmStreamRecord").apply {
|
||||||
|
setReferenceCounted(false)
|
||||||
|
acquire(8 * 60 * 60 * 1000L) // 8h Cap
|
||||||
|
}
|
||||||
|
} catch (e: Exception) {
|
||||||
|
Log.w(TAG, "WakeLock acquire fehlgeschlagen: ${e.message}")
|
||||||
|
}
|
||||||
|
|
||||||
|
captureThread = Thread({ captureLoop() }, "PcmStreamRecorderCapture").apply {
|
||||||
|
isDaemon = true
|
||||||
|
start()
|
||||||
|
}
|
||||||
|
|
||||||
|
Log.i(TAG, "Recording gestartet (16kHz mono s16le, ${CHUNK_SAMPLES} samples/chunk)")
|
||||||
|
promise.resolve(true)
|
||||||
|
} catch (e: Exception) {
|
||||||
|
Log.e(TAG, "start fehlgeschlagen", e)
|
||||||
|
running.set(false)
|
||||||
|
audioRecord?.release()
|
||||||
|
audioRecord = null
|
||||||
|
releaseAudioEffects()
|
||||||
|
releaseWakeLock()
|
||||||
|
promise.reject("START_FAILED", e.message ?: "Unbekannter Fehler", e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@ReactMethod
|
||||||
|
fun stop(promise: Promise) {
|
||||||
|
running.set(false)
|
||||||
|
try {
|
||||||
|
captureThread?.join(1500)
|
||||||
|
} catch (_: InterruptedException) {}
|
||||||
|
captureThread = null
|
||||||
|
try { audioRecord?.stop() } catch (_: Exception) {}
|
||||||
|
try { audioRecord?.release() } catch (_: Exception) {}
|
||||||
|
audioRecord = null
|
||||||
|
releaseAudioEffects()
|
||||||
|
releaseWakeLock()
|
||||||
|
Log.i(TAG, "Recording gestoppt (seq=$seq Chunks gesendet)")
|
||||||
|
promise.resolve(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
@ReactMethod
|
||||||
|
fun isRecording(promise: Promise) {
|
||||||
|
promise.resolve(running.get())
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun captureLoop() {
|
||||||
|
val buffer = ByteArray(CHUNK_BYTES)
|
||||||
|
val rec = audioRecord ?: return
|
||||||
|
try {
|
||||||
|
while (running.get()) {
|
||||||
|
var offset = 0
|
||||||
|
// Solange lesen bis ein voller 200ms-Chunk zusammen ist.
|
||||||
|
// AudioRecord.read kann weniger als angefordert liefern.
|
||||||
|
while (offset < CHUNK_BYTES && running.get()) {
|
||||||
|
val n = rec.read(buffer, offset, CHUNK_BYTES - offset)
|
||||||
|
if (n <= 0) {
|
||||||
|
if (!running.get()) break
|
||||||
|
// Fehlerzustand — kurze Pause, dann weiter probieren
|
||||||
|
Thread.sleep(5)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
offset += n
|
||||||
|
}
|
||||||
|
if (offset < CHUNK_BYTES) break
|
||||||
|
|
||||||
|
val b64 = Base64.encodeToString(buffer, Base64.NO_WRAP)
|
||||||
|
val ts = System.currentTimeMillis()
|
||||||
|
val params = Arguments.createMap().apply {
|
||||||
|
putString("pcm", b64)
|
||||||
|
// putLong existiert nicht in WritableMap — putDouble fuer ts/seq.
|
||||||
|
putDouble("seq", seq.toDouble())
|
||||||
|
putDouble("ts", ts.toDouble())
|
||||||
|
}
|
||||||
|
reactApplicationContext
|
||||||
|
.getJSModule(DeviceEventManagerModule.RCTDeviceEventEmitter::class.java)
|
||||||
|
.emit("PcmStreamChunk", params)
|
||||||
|
seq++
|
||||||
|
}
|
||||||
|
} catch (e: Exception) {
|
||||||
|
Log.e(TAG, "captureLoop crashed", e)
|
||||||
|
try {
|
||||||
|
val err = Arguments.createMap().apply {
|
||||||
|
putString("error", e.message ?: "unknown")
|
||||||
|
}
|
||||||
|
reactApplicationContext
|
||||||
|
.getJSModule(DeviceEventManagerModule.RCTDeviceEventEmitter::class.java)
|
||||||
|
.emit("PcmStreamError", err)
|
||||||
|
} catch (_: Exception) {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun releaseAudioEffects() {
|
||||||
|
try { aec?.release() } catch (_: Exception) {}
|
||||||
|
try { ns?.release() } catch (_: Exception) {}
|
||||||
|
try { agc?.release() } catch (_: Exception) {}
|
||||||
|
aec = null; ns = null; agc = null
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun releaseWakeLock() {
|
||||||
|
try {
|
||||||
|
if (wakeLock?.isHeld == true) wakeLock?.release()
|
||||||
|
} catch (_: Exception) {}
|
||||||
|
wakeLock = null
|
||||||
|
}
|
||||||
|
|
||||||
|
// Damit RCTEventEmitter den Listener-Lifecycle nicht crasht
|
||||||
|
@ReactMethod fun addListener(eventName: String) {}
|
||||||
|
@ReactMethod fun removeListeners(count: Int) {}
|
||||||
|
}
|
||||||
@@ -0,0 +1,16 @@
|
|||||||
|
package com.ariacockpit
|
||||||
|
|
||||||
|
import com.facebook.react.ReactPackage
|
||||||
|
import com.facebook.react.bridge.NativeModule
|
||||||
|
import com.facebook.react.bridge.ReactApplicationContext
|
||||||
|
import com.facebook.react.uimanager.ViewManager
|
||||||
|
|
||||||
|
class PcmStreamRecorderPackage : ReactPackage {
|
||||||
|
override fun createNativeModules(reactContext: ReactApplicationContext): List<NativeModule> {
|
||||||
|
return listOf(PcmStreamRecorderModule(reactContext))
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun createViewManagers(reactContext: ReactApplicationContext): List<ViewManager<*, *>> {
|
||||||
|
return emptyList()
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "aria-cockpit",
|
"name": "aria-cockpit",
|
||||||
"version": "0.1.6.5",
|
"version": "0.1.8.1",
|
||||||
"private": true,
|
"private": true,
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"android": "react-native run-android",
|
"android": "react-native run-android",
|
||||||
|
|||||||
@@ -531,7 +531,14 @@ const ChatScreen: React.FC = () => {
|
|||||||
if (bgDur > 30_000) {
|
if (bgDur > 30_000) {
|
||||||
wakeWordService.discardIfFreshlyTriggered(15_000).then(discarded => {
|
wakeWordService.discardIfFreshlyTriggered(15_000).then(discarded => {
|
||||||
if (discarded) {
|
if (discarded) {
|
||||||
try { audioService.cancelRecording(); } catch {}
|
// Sowohl legacy als auch Streaming-Pfad abdecken
|
||||||
|
try {
|
||||||
|
if (audioService.isStreamingRecording()) {
|
||||||
|
audioService.cancelStreamingRecording('wake-discarded');
|
||||||
|
} else {
|
||||||
|
audioService.cancelRecording();
|
||||||
|
}
|
||||||
|
} catch {}
|
||||||
}
|
}
|
||||||
}).catch(() => {});
|
}).catch(() => {});
|
||||||
}
|
}
|
||||||
@@ -1266,61 +1273,75 @@ const ChatScreen: React.FC = () => {
|
|||||||
return () => unsubPlayback();
|
return () => unsubPlayback();
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
// Wake Word / Gespraechsmodus: Auto-Aufnahme starten
|
// Wake Word / Gespraechsmodus: Auto-Aufnahme starten (Streaming-Modus)
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
const unsubWake = wakeWordService.onWakeWord(async () => {
|
const unsubWake = wakeWordService.onWakeWord(async () => {
|
||||||
console.log('[Chat] Gespraechsmodus — starte Auto-Aufnahme');
|
console.log('[Chat] Gespraechsmodus — starte Streaming-Aufnahme');
|
||||||
// Conversation-Window: User hat X Sekunden um anzufangen, sonst Konversation aus
|
import('../services/logger').then(m => m.reportAppDebug('wake.cb', 'callback fired, calling startStreamingRecording')).catch(()=>{});
|
||||||
|
|
||||||
|
// Bubble SOFORT bauen — bevor Whisper-Bridge antwortet — damit der User
|
||||||
|
// sieht "Es passiert was". stt_endpoint kommt typisch <1s spaeter mit
|
||||||
|
// dem finalen Text, dann wird die Bubble ueber audioRequestId-Match
|
||||||
|
// aktualisiert (siehe chat-Handler oben).
|
||||||
|
const audioRequestId = `audio_${Date.now()}_${Math.floor(Math.random() * 100000)}`;
|
||||||
|
const wasInterrupted = interruptAriaIfBusy();
|
||||||
|
const location = await getCurrentLocation();
|
||||||
const windowMs = await loadConvWindowMs();
|
const windowMs = await loadConvWindowMs();
|
||||||
const started = await audioService.startRecording(true, windowMs);
|
|
||||||
if (started) {
|
const userMsg: ChatMessage = {
|
||||||
// Erst JETZT signalisieren dass das Mikro wirklich offen ist —
|
id: nextId(),
|
||||||
// vorher war's noch in der Init-Phase. So weiss der User exakt
|
sender: 'user',
|
||||||
// ab wann er reden kann. "Bereit"-Sound (Ding-Dong) ist optional
|
text: '🎙 Spracheingabe wird verarbeitet...',
|
||||||
// ueber Settings → Wake-Word abschaltbar.
|
timestamp: Date.now(),
|
||||||
|
attachments: [{ type: 'audio', name: 'Sprachaufnahme' }],
|
||||||
|
audioRequestId,
|
||||||
|
};
|
||||||
|
setMessages(prev => capMessages([...prev, userMsg]));
|
||||||
|
|
||||||
|
const { ok } = await audioService.startStreamingRecording({
|
||||||
|
audioRequestId,
|
||||||
|
voice: localXttsVoiceRef.current,
|
||||||
|
speed: ttsSpeedRef.current,
|
||||||
|
interrupted: wasInterrupted,
|
||||||
|
location: location || null,
|
||||||
|
noSpeechTimeoutMs: windowMs,
|
||||||
|
endpointMs: 1500,
|
||||||
|
hardCapMs: 60000,
|
||||||
|
});
|
||||||
|
import('../services/logger').then(m => m.reportAppDebug('wake.cb', `startStreamingRecording returned ok=${ok}`)).catch(()=>{});
|
||||||
|
if (ok) {
|
||||||
ToastAndroid.show('🎤 Mikro offen — sprich jetzt', ToastAndroid.SHORT);
|
ToastAndroid.show('🎤 Mikro offen — sprich jetzt', ToastAndroid.SHORT);
|
||||||
playWakeReadySound().catch(() => {});
|
playWakeReadySound().catch(() => {});
|
||||||
|
scheduleStaleAudioCleanup(audioRequestId, 60000);
|
||||||
|
import('../services/logger').then(m => m.reportAppDebug('wake.cb', 'gong played + streaming started')).catch(()=>{});
|
||||||
} else {
|
} else {
|
||||||
// Mikrofon nicht verfuegbar, naechsten Versuch
|
// Mikrofon nicht verfuegbar → Bubble wieder weg, naechster Versuch
|
||||||
|
setMessages(prev => prev.filter(m => m.audioRequestId !== audioRequestId));
|
||||||
wakeWordService.resume();
|
wakeWordService.resume();
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
// Auto-Stop Callback: wenn Stille erkannt → Aufnahme senden + Wake Word wieder starten
|
// STT-Endpoint-Callback ersetzt den alten onSilenceDetected.
|
||||||
const unsubSilence = audioService.onSilenceDetected(async () => {
|
// Feuert in 2 Faellen:
|
||||||
const result = await audioService.stopRecording();
|
// - text != '' → Whisper-Bridge hat ML-Endpoint erkannt, Text liegt vor.
|
||||||
if (result && result.durationMs > 500) {
|
// aria-bridge bekommt das gleiche Event und triggert Brain
|
||||||
// User hat im Fenster gesprochen → Sprachnachricht senden
|
// direkt. App muss nix mehr senden.
|
||||||
// Barge-In: laufende ARIA-Aktivitaet abbrechen wenn welche da ist.
|
// - text == '' → cancelStreamingRecording (no-speech / hardcap / error).
|
||||||
const wasInterrupted = interruptAriaIfBusy();
|
// Konversation beenden wie frueher der "kein Speech"-Fall.
|
||||||
const location = await getCurrentLocation();
|
const unsubEndpoint = audioService.onSttEndpoint((ev) => {
|
||||||
const audioRequestId = `audio_${Date.now()}_${Math.floor(Math.random() * 100000)}`;
|
if (ev.text && ev.text.trim()) {
|
||||||
const userMsg: ChatMessage = {
|
console.log('[Chat] STT-Endpoint: %r (reason=%s, %dms, %.1fs Audio)',
|
||||||
id: nextId(),
|
ev.text.slice(0, 80), ev.reason, ev.sttMs, ev.durationS);
|
||||||
sender: 'user',
|
// Brain laeuft via aria-bridge — wir warten auf chat(sender=stt) +
|
||||||
text: '🎙 Spracheingabe wird verarbeitet...',
|
// chat(sender=aria) wie im Legacy-Pfad.
|
||||||
timestamp: Date.now(),
|
|
||||||
attachments: [{ type: 'audio', name: 'Sprachaufnahme' }],
|
|
||||||
audioRequestId,
|
|
||||||
};
|
|
||||||
setMessages(prev => capMessages([...prev, userMsg]));
|
|
||||||
rvs.send('audio', {
|
|
||||||
base64: result.base64,
|
|
||||||
durationMs: result.durationMs,
|
|
||||||
mimeType: result.mimeType,
|
|
||||||
voice: localXttsVoiceRef.current,
|
|
||||||
speed: ttsSpeedRef.current,
|
|
||||||
interrupted: wasInterrupted,
|
|
||||||
audioRequestId,
|
|
||||||
...(location && { location }),
|
|
||||||
});
|
|
||||||
scheduleStaleAudioCleanup(audioRequestId, result.durationMs);
|
|
||||||
// resume() wird durch onPlaybackFinished nach ARIAs Antwort getriggert.
|
|
||||||
} else {
|
} else {
|
||||||
// Kein Speech im Window → Konversation beenden (Ohr geht aus oder
|
// Kein Speech im Window → Konversation beenden
|
||||||
// bleibt armed wenn Wake Word verfuegbar)
|
console.log('[Chat] STT-Endpoint ohne Text (reason=%s) — endConversation', ev.reason);
|
||||||
|
// Placeholder-Bubble wieder weg
|
||||||
|
if (ev.audioRequestId) {
|
||||||
|
setMessages(prev => prev.filter(m => m.audioRequestId !== ev.audioRequestId));
|
||||||
|
}
|
||||||
wakeWordService.endConversation();
|
wakeWordService.endConversation();
|
||||||
// UI-State synchron halten
|
|
||||||
if (!wakeWordService.isActive()) setWakeWordActive(false);
|
if (!wakeWordService.isActive()) setWakeWordActive(false);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
@@ -1329,17 +1350,42 @@ const ChatScreen: React.FC = () => {
|
|||||||
// Wake-Word-Service hat bei TTS-Start parallel zu lauschen begonnen
|
// Wake-Word-Service hat bei TTS-Start parallel zu lauschen begonnen
|
||||||
// (mit AcousticEchoCanceler damit ARIAs eigene Stimme nicht triggert).
|
// (mit AcousticEchoCanceler damit ARIAs eigene Stimme nicht triggert).
|
||||||
const unsubBarge = wakeWordService.onBargeIn(async () => {
|
const unsubBarge = wakeWordService.onBargeIn(async () => {
|
||||||
console.log('[Chat] Barge-In via Wake-Word — TTS abbrechen + neue Aufnahme');
|
console.log('[Chat] Barge-In via Wake-Word — TTS abbrechen + neue Streaming-Aufnahme');
|
||||||
audioService.haltAllPlayback('barge-in via wake-word');
|
audioService.haltAllPlayback('barge-in via wake-word');
|
||||||
setAgentActivity({ activity: 'idle', tool: '' });
|
setAgentActivity({ activity: 'idle', tool: '' });
|
||||||
rvs.send('cancel_request' as any, {});
|
rvs.send('cancel_request' as any, {});
|
||||||
// Kurze Pause damit halt durchgreift, dann neue Aufnahme starten
|
// Kurze Pause damit halt durchgreift, dann neue Aufnahme starten
|
||||||
await new Promise(r => setTimeout(r, 150));
|
await new Promise(r => setTimeout(r, 150));
|
||||||
|
const audioRequestId = `audio_${Date.now()}_${Math.floor(Math.random() * 100000)}`;
|
||||||
|
const location = await getCurrentLocation();
|
||||||
const windowMs = await loadConvWindowMs();
|
const windowMs = await loadConvWindowMs();
|
||||||
const started = await audioService.startRecording(true, windowMs);
|
|
||||||
if (started) {
|
const userMsg: ChatMessage = {
|
||||||
|
id: nextId(),
|
||||||
|
sender: 'user',
|
||||||
|
text: '🎙 Spracheingabe wird verarbeitet...',
|
||||||
|
timestamp: Date.now(),
|
||||||
|
attachments: [{ type: 'audio', name: 'Sprachaufnahme' }],
|
||||||
|
audioRequestId,
|
||||||
|
};
|
||||||
|
setMessages(prev => capMessages([...prev, userMsg]));
|
||||||
|
|
||||||
|
const { ok } = await audioService.startStreamingRecording({
|
||||||
|
audioRequestId,
|
||||||
|
voice: localXttsVoiceRef.current,
|
||||||
|
speed: ttsSpeedRef.current,
|
||||||
|
interrupted: true, // Barge-In → Brain weiss "User hat unterbrochen"
|
||||||
|
location: location || null,
|
||||||
|
noSpeechTimeoutMs: windowMs,
|
||||||
|
endpointMs: 1500,
|
||||||
|
hardCapMs: 60000,
|
||||||
|
});
|
||||||
|
if (ok) {
|
||||||
ToastAndroid.show('🎤 Mikro offen — sprich jetzt', ToastAndroid.SHORT);
|
ToastAndroid.show('🎤 Mikro offen — sprich jetzt', ToastAndroid.SHORT);
|
||||||
playWakeReadySound().catch(() => {});
|
playWakeReadySound().catch(() => {});
|
||||||
|
scheduleStaleAudioCleanup(audioRequestId, 60000);
|
||||||
|
} else {
|
||||||
|
setMessages(prev => prev.filter(m => m.audioRequestId !== audioRequestId));
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -1362,7 +1408,7 @@ const ChatScreen: React.FC = () => {
|
|||||||
|
|
||||||
return () => {
|
return () => {
|
||||||
unsubWake();
|
unsubWake();
|
||||||
unsubSilence();
|
unsubEndpoint();
|
||||||
unsubBarge();
|
unsubBarge();
|
||||||
unsubTtsStart();
|
unsubTtsStart();
|
||||||
unsubTtsEnd();
|
unsubTtsEnd();
|
||||||
@@ -1372,11 +1418,18 @@ const ChatScreen: React.FC = () => {
|
|||||||
// Wake Word Toggle Handler
|
// Wake Word Toggle Handler
|
||||||
const toggleWakeWord = useCallback(async () => {
|
const toggleWakeWord = useCallback(async () => {
|
||||||
if (wakeWordActive) {
|
if (wakeWordActive) {
|
||||||
// Vor Porcupine-Stop: eventuelle laufende Aufnahme abbrechen. Sonst
|
// Vor Wake-Word-Stop: eventuelle laufende Aufnahme abbrechen. Sonst
|
||||||
// bleibt audioService.recordingState=='recording' haengen und der
|
// bleibt audioService.recordingState=='recording' haengen und der
|
||||||
// normale Aufnahme-Button wirkt nicht mehr (startRecording lehnt
|
// normale Aufnahme-Button wirkt nicht mehr (startRecording lehnt
|
||||||
// ab weil "Aufnahme laeuft bereits").
|
// ab weil "Aufnahme laeuft bereits"). Beide Pfade abdecken — legacy
|
||||||
try { await audioService.stopRecording(); } catch {}
|
// file-Aufnahme + neue Streaming-Aufnahme.
|
||||||
|
try {
|
||||||
|
if (audioService.isStreamingRecording()) {
|
||||||
|
await audioService.cancelStreamingRecording('wake-toggle-off');
|
||||||
|
} else {
|
||||||
|
await audioService.stopRecording();
|
||||||
|
}
|
||||||
|
} catch {}
|
||||||
await wakeWordService.stop();
|
await wakeWordService.stop();
|
||||||
setWakeWordActive(false);
|
setWakeWordActive(false);
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ import {
|
|||||||
Modal,
|
Modal,
|
||||||
PermissionsAndroid,
|
PermissionsAndroid,
|
||||||
useWindowDimensions,
|
useWindowDimensions,
|
||||||
|
DeviceEventEmitter,
|
||||||
} from 'react-native';
|
} from 'react-native';
|
||||||
import AsyncStorage from '@react-native-async-storage/async-storage';
|
import AsyncStorage from '@react-native-async-storage/async-storage';
|
||||||
import RNFS from 'react-native-fs';
|
import RNFS from 'react-native-fs';
|
||||||
@@ -52,13 +53,17 @@ import {
|
|||||||
TTS_SPEED_STORAGE_KEY,
|
TTS_SPEED_STORAGE_KEY,
|
||||||
} from '../services/audio';
|
} from '../services/audio';
|
||||||
import audioService from '../services/audio';
|
import audioService from '../services/audio';
|
||||||
import gpsTrackingService from '../services/gpsTracking';
|
import gpsTrackingService, {
|
||||||
|
isBackgroundGpsEnabled,
|
||||||
|
setBackgroundGpsEnabled,
|
||||||
|
ensureBackgroundLocationPermission,
|
||||||
|
} from '../services/gpsTracking';
|
||||||
import { acquireBackgroundAudio, releaseBackgroundAudio } from '../services/backgroundAudio';
|
import { acquireBackgroundAudio, releaseBackgroundAudio } from '../services/backgroundAudio';
|
||||||
import MemoryBrowser from '../components/MemoryBrowser';
|
import MemoryBrowser from '../components/MemoryBrowser';
|
||||||
import TriggerBrowser from '../components/TriggerBrowser';
|
import TriggerBrowser from '../components/TriggerBrowser';
|
||||||
import SkillBrowser from '../components/SkillBrowser';
|
import SkillBrowser from '../components/SkillBrowser';
|
||||||
import OAuthBrowser from '../components/OAuthBrowser';
|
import OAuthBrowser from '../components/OAuthBrowser';
|
||||||
import { isVerboseLogging, setVerboseLogging } from '../services/logger';
|
import { isVerboseLogging, setVerboseLogging, isDebugLogsToBridge, setDebugLogsToBridge, APP_LOG_EVENT } from '../services/logger';
|
||||||
import {
|
import {
|
||||||
isWakeReadySoundEnabled,
|
isWakeReadySoundEnabled,
|
||||||
setWakeReadySoundEnabled,
|
setWakeReadySoundEnabled,
|
||||||
@@ -134,6 +139,7 @@ const SettingsScreen: React.FC = () => {
|
|||||||
const [currentMode, setCurrentMode] = useState('normal');
|
const [currentMode, setCurrentMode] = useState('normal');
|
||||||
const [gpsEnabled, setGpsEnabled] = useState(false);
|
const [gpsEnabled, setGpsEnabled] = useState(false);
|
||||||
const [gpsTracking, setGpsTracking] = useState(gpsTrackingService.isActive());
|
const [gpsTracking, setGpsTracking] = useState(gpsTrackingService.isActive());
|
||||||
|
const [bgGpsEnabled, setBgGpsEnabled] = useState(false);
|
||||||
const [backgroundMode, setBackgroundMode] = useState(true); // Default an
|
const [backgroundMode, setBackgroundMode] = useState(true); // Default an
|
||||||
const [showSystemHints, setShowSystemHints] = useState(false); // Default aus
|
const [showSystemHints, setShowSystemHints] = useState(false); // Default aus
|
||||||
const [scannerVisible, setScannerVisible] = useState(false);
|
const [scannerVisible, setScannerVisible] = useState(false);
|
||||||
@@ -155,6 +161,7 @@ const SettingsScreen: React.FC = () => {
|
|||||||
const [apkCacheInfo, setApkCacheInfo] = useState<{count: number, totalMB: number} | null>(null);
|
const [apkCacheInfo, setApkCacheInfo] = useState<{count: number, totalMB: number} | null>(null);
|
||||||
const [ttsCacheInfo, setTtsCacheInfo] = useState<{count: number, totalMB: number} | null>(null);
|
const [ttsCacheInfo, setTtsCacheInfo] = useState<{count: number, totalMB: number} | null>(null);
|
||||||
const [verboseLogging, setVerboseLoggingState] = useState<boolean>(isVerboseLogging());
|
const [verboseLogging, setVerboseLoggingState] = useState<boolean>(isVerboseLogging());
|
||||||
|
const [debugLogsToBridge, setDebugLogsToBridgeState] = useState<boolean>(isDebugLogsToBridge());
|
||||||
const [ttsSpeed, setTtsSpeed] = useState<number>(TTS_SPEED_DEFAULT);
|
const [ttsSpeed, setTtsSpeed] = useState<number>(TTS_SPEED_DEFAULT);
|
||||||
const [wakeKeyword, setWakeKeyword] = useState<string>(DEFAULT_KEYWORD);
|
const [wakeKeyword, setWakeKeyword] = useState<string>(DEFAULT_KEYWORD);
|
||||||
const [wakeStatus, setWakeStatus] = useState<string>('');
|
const [wakeStatus, setWakeStatus] = useState<string>('');
|
||||||
@@ -216,6 +223,8 @@ const SettingsScreen: React.FC = () => {
|
|||||||
const offGps = gpsTrackingService.onChange(setGpsTracking);
|
const offGps = gpsTrackingService.onChange(setGpsTracking);
|
||||||
// Persistierten Status wiederherstellen (war Tracking beim letzten Mal an?)
|
// Persistierten Status wiederherstellen (war Tracking beim letzten Mal an?)
|
||||||
gpsTrackingService.restoreFromStorage().catch(() => {});
|
gpsTrackingService.restoreFromStorage().catch(() => {});
|
||||||
|
// Background-GPS-Toggle initial laden
|
||||||
|
isBackgroundGpsEnabled().then(setBgGpsEnabled).catch(() => {});
|
||||||
AsyncStorage.getItem(TTS_PREROLL_STORAGE_KEY).then(saved => {
|
AsyncStorage.getItem(TTS_PREROLL_STORAGE_KEY).then(saved => {
|
||||||
if (saved != null) {
|
if (saved != null) {
|
||||||
const n = parseFloat(saved);
|
const n = parseFloat(saved);
|
||||||
@@ -380,6 +389,19 @@ const SettingsScreen: React.FC = () => {
|
|||||||
setConnLog(prev => [...prev.slice(-99), entry]);
|
setConnLog(prev => [...prev.slice(-99), entry]);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Lokale App-Logs (reportAppDebug/Error) im Live-Logs-Tab anzeigen
|
||||||
|
// — damit Stefan ohne curl direkt in der App sieht was passiert.
|
||||||
|
const localLogSub = DeviceEventEmitter.addListener(APP_LOG_EVENT, (e: any) => {
|
||||||
|
const entry: LogEntry = {
|
||||||
|
id: `applog_${e.ts || Date.now()}_${logIdCounter++}`,
|
||||||
|
timestamp: e.ts || Date.now(),
|
||||||
|
source: e.scope || 'app',
|
||||||
|
message: e.message || '',
|
||||||
|
level: e.level || 'info',
|
||||||
|
};
|
||||||
|
setLogs(prev => [...prev.slice(-200), entry]);
|
||||||
|
});
|
||||||
|
|
||||||
const unsubMessage = rvs.onMessage((message: RVSMessage) => {
|
const unsubMessage = rvs.onMessage((message: RVSMessage) => {
|
||||||
if (message.type === 'log') {
|
if (message.type === 'log') {
|
||||||
const entry: LogEntry = {
|
const entry: LogEntry = {
|
||||||
@@ -515,6 +537,7 @@ const SettingsScreen: React.FC = () => {
|
|||||||
unsubState();
|
unsubState();
|
||||||
unsubMessage();
|
unsubMessage();
|
||||||
unsubLog();
|
unsubLog();
|
||||||
|
localLogSub.remove();
|
||||||
};
|
};
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
@@ -1117,6 +1140,52 @@ const SettingsScreen: React.FC = () => {
|
|||||||
thumbColor={gpsTracking ? '#FFFFFF' : '#666680'}
|
thumbColor={gpsTracking ? '#FFFFFF' : '#666680'}
|
||||||
/>
|
/>
|
||||||
</View>
|
</View>
|
||||||
|
|
||||||
|
{/* Background-GPS opt-in — Default AUS. Braucht ACCESS_BACKGROUND_LOCATION
|
||||||
|
(User muss in Android-Settings 'Immer erlauben' aktivieren). */}
|
||||||
|
<View style={[styles.toggleRow, {marginTop: 12, borderTopWidth: 1, borderTopColor: '#1E1E2E', paddingTop: 12}]}>
|
||||||
|
<View style={styles.toggleInfo}>
|
||||||
|
<Text style={styles.toggleLabel}>GPS auch im Hintergrund</Text>
|
||||||
|
<Text style={styles.toggleHint}>
|
||||||
|
Damit ARIA auch unterwegs deine aktuelle Position kennt wenn die
|
||||||
|
App im Hintergrund ist (Auto, Handy-Tasche). Standard: aus.
|
||||||
|
{'\n\n'}
|
||||||
|
Android verlangt fuer Background-GPS, dass du in den
|
||||||
|
System-Einstellungen unter Standort "Immer erlauben" auswaehlst.
|
||||||
|
Beim Aktivieren wird Android-Settings geoeffnet falls noetig.
|
||||||
|
{'\n\n'}
|
||||||
|
Akku-Verbrauch: ~3-5% mehr pro Tag durch dauerhaftes Polling.
|
||||||
|
</Text>
|
||||||
|
</View>
|
||||||
|
<Switch
|
||||||
|
value={bgGpsEnabled}
|
||||||
|
onValueChange={async (v) => {
|
||||||
|
if (v) {
|
||||||
|
const ok = await ensureBackgroundLocationPermission();
|
||||||
|
if (!ok) {
|
||||||
|
// User muss in Android-Settings auf "Immer erlauben" — Toggle
|
||||||
|
// bleibt aus bis er zurueckkommt und nochmal tippt.
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
await setBackgroundGpsEnabled(true);
|
||||||
|
setBgGpsEnabled(true);
|
||||||
|
// Wenn Tracking bereits laeuft: neu starten damit der
|
||||||
|
// Foreground-Service jetzt mit location-Slot kommt
|
||||||
|
if (gpsTrackingService.isActive()) {
|
||||||
|
gpsTrackingService.stop('bg-toggle');
|
||||||
|
gpsTrackingService.start('bg-aktiviert').catch(() => {});
|
||||||
|
}
|
||||||
|
ToastAndroid.show('Background-GPS aktiviert', ToastAndroid.SHORT);
|
||||||
|
} else {
|
||||||
|
await setBackgroundGpsEnabled(false);
|
||||||
|
setBgGpsEnabled(false);
|
||||||
|
ToastAndroid.show('Background-GPS aus — nur noch Foreground', ToastAndroid.SHORT);
|
||||||
|
}
|
||||||
|
}}
|
||||||
|
trackColor={{ false: '#2A2A3E', true: '#FF3B30' }}
|
||||||
|
thumbColor={bgGpsEnabled ? '#FFFFFF' : '#666680'}
|
||||||
|
/>
|
||||||
|
</View>
|
||||||
</View>
|
</View>
|
||||||
|
|
||||||
{/* === Bubble-Anzeige === */}
|
{/* === Bubble-Anzeige === */}
|
||||||
@@ -1863,6 +1932,27 @@ const SettingsScreen: React.FC = () => {
|
|||||||
Warnungen und Fehler bleiben immer aktiv. Bei Bedarf einschalten zum
|
Warnungen und Fehler bleiben immer aktiv. Bei Bedarf einschalten zum
|
||||||
Debuggen via adb logcat.
|
Debuggen via adb logcat.
|
||||||
</Text>
|
</Text>
|
||||||
|
|
||||||
|
{/* Debug-Logs an Bridge: scharf nur wenn aktiv gebraucht */}
|
||||||
|
<View style={[styles.toggleRow, {marginTop: 12, borderTopWidth: 1, borderTopColor: '#1E1E2E', paddingTop: 12}]}>
|
||||||
|
<Text style={styles.toggleLabel}>Debug-Logs an Bridge</Text>
|
||||||
|
<Switch
|
||||||
|
value={debugLogsToBridge}
|
||||||
|
onValueChange={(v) => {
|
||||||
|
setDebugLogsToBridge(v);
|
||||||
|
setDebugLogsToBridgeState(v);
|
||||||
|
}}
|
||||||
|
trackColor={{ false: '#3A3A52', true: '#FF9500' }}
|
||||||
|
thumbColor={debugLogsToBridge ? '#FFFFFF' : '#666680'}
|
||||||
|
/>
|
||||||
|
</View>
|
||||||
|
<Text style={styles.toggleHint}>
|
||||||
|
Schickt detaillierte Diagnose-Logs (Wake-Word-Pipeline, Audio-Focus,
|
||||||
|
Background-Service) per RVS an die Bridge — abrufbar via
|
||||||
|
`curl /api/app-log?lines=N` ohne ADB. Default AUS damit kein
|
||||||
|
unnoetiger Traffic + Disk-Schreiben. Crash-Reports (Errors) gehen
|
||||||
|
IMMER, dieser Toggle betrifft nur Info-Logs.
|
||||||
|
</Text>
|
||||||
</View>
|
</View>
|
||||||
|
|
||||||
<View style={styles.card}>
|
<View style={styles.card}>
|
||||||
|
|||||||
@@ -36,7 +36,7 @@ function btoaSafe(bin: string): string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Native Module fuer Audio-Focus (Ducking/Muten anderer Apps)
|
// Native Module fuer Audio-Focus (Ducking/Muten anderer Apps)
|
||||||
const { AudioFocus, PcmStreamPlayer } = NativeModules as {
|
const { AudioFocus, PcmStreamPlayer, PcmStreamRecorder } = NativeModules as {
|
||||||
AudioFocus?: {
|
AudioFocus?: {
|
||||||
requestDuck: () => Promise<boolean>;
|
requestDuck: () => Promise<boolean>;
|
||||||
requestExclusive: () => Promise<boolean>;
|
requestExclusive: () => Promise<boolean>;
|
||||||
@@ -51,8 +51,15 @@ const { AudioFocus, PcmStreamPlayer } = NativeModules as {
|
|||||||
end: () => Promise<boolean>;
|
end: () => Promise<boolean>;
|
||||||
stop: () => Promise<boolean>;
|
stop: () => Promise<boolean>;
|
||||||
};
|
};
|
||||||
|
PcmStreamRecorder?: {
|
||||||
|
start: () => Promise<boolean>;
|
||||||
|
stop: () => Promise<boolean>;
|
||||||
|
isRecording: () => Promise<boolean>;
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
import rvs from './rvs';
|
||||||
|
|
||||||
// --- Typen ---
|
// --- Typen ---
|
||||||
|
|
||||||
export interface RecordingResult {
|
export interface RecordingResult {
|
||||||
@@ -70,6 +77,19 @@ type RecordingStateCallback = (state: RecordingState) => void;
|
|||||||
type MeterCallback = (db: number) => void;
|
type MeterCallback = (db: number) => void;
|
||||||
type SilenceCallback = () => void;
|
type SilenceCallback = () => void;
|
||||||
|
|
||||||
|
/** Endpoint-Event von der Streaming-Whisper-Bridge — finaler Text +
|
||||||
|
* Echo-Felder. ChatScreen reagiert darauf wie frueher auf
|
||||||
|
* onSilenceDetected, nur dass der Text schon da ist. */
|
||||||
|
export interface SttEndpointEvent {
|
||||||
|
audioRequestId: string;
|
||||||
|
text: string;
|
||||||
|
reason: string; // 'endpoint' | 'stream_end' | 'hardcap'
|
||||||
|
durationS: number;
|
||||||
|
sttMs: number;
|
||||||
|
}
|
||||||
|
type SttEndpointCallback = (e: SttEndpointEvent) => void;
|
||||||
|
type SttPartialCallback = (text: string) => void;
|
||||||
|
|
||||||
// --- Konstanten ---
|
// --- Konstanten ---
|
||||||
|
|
||||||
const AUDIO_SAMPLE_RATE = 16000;
|
const AUDIO_SAMPLE_RATE = 16000;
|
||||||
@@ -286,6 +306,30 @@ class AudioService {
|
|||||||
// Position-Berechnen vom playbackStarted abziehen
|
// Position-Berechnen vom playbackStarted abziehen
|
||||||
private readonly LEADING_SILENCE_SEC = 0.3;
|
private readonly LEADING_SILENCE_SEC = 0.3;
|
||||||
|
|
||||||
|
// ── Streaming-STT-Session-State ──
|
||||||
|
// Aktuelle Session-ID (requestId der whisper-bridge). Leer wenn kein Stream
|
||||||
|
// aktiv. Wird beim Eintreffen von Chunks geprueft damit wir nicht versehent-
|
||||||
|
// lich Chunks einer alten Session in eine neue mischen.
|
||||||
|
private streamRequestId: string = '';
|
||||||
|
private streamAudioRequestId: string = '';
|
||||||
|
// Latch: ist endpointListeners fuer den aktuellen Session-Cycle schon gefeuert
|
||||||
|
// worden? Wird auf false gesetzt beim startStreamingRecording, auf true beim
|
||||||
|
// ersten Endpoint (egal ob via RVS oder Fallback). Verhindert Doppel-Fires.
|
||||||
|
private streamEndpointFired: boolean = false;
|
||||||
|
// Subscriber-Handles fuer Native-Events + RVS-Listener (cleanup beim stop)
|
||||||
|
private streamPcmChunkSub: { remove: () => void } | null = null;
|
||||||
|
private streamPcmErrorSub: { remove: () => void } | null = null;
|
||||||
|
private streamRvsUnsub: (() => void) | null = null;
|
||||||
|
// No-speech-Watchdog: wenn nach N ms noch kein einziger stt_partial kam,
|
||||||
|
// brechen wir die Session ab (Stille → User hat nix gesagt → Konversation
|
||||||
|
// beenden). Ersetzt den alten vad noSpeechTimer.
|
||||||
|
private streamNoSpeechTimer: ReturnType<typeof setTimeout> | null = null;
|
||||||
|
private streamGotPartial: boolean = false;
|
||||||
|
private streamHardCapTimer: ReturnType<typeof setTimeout> | null = null;
|
||||||
|
// Endpoint/Partial-Callbacks fuer ChatScreen
|
||||||
|
private endpointListeners: SttEndpointCallback[] = [];
|
||||||
|
private partialListeners: SttPartialCallback[] = [];
|
||||||
|
|
||||||
constructor() {
|
constructor() {
|
||||||
this.recorder = new AudioRecorderPlayer();
|
this.recorder = new AudioRecorderPlayer();
|
||||||
this.recorder.setSubscriptionDuration(0.1); // 100ms Metering-Updates
|
this.recorder.setSubscriptionDuration(0.1); // 100ms Metering-Updates
|
||||||
@@ -310,6 +354,58 @@ class AudioService {
|
|||||||
// bleibt liegen. 5min-Threshold damit gerade aktiv geschriebene Files sicher
|
// bleibt liegen. 5min-Threshold damit gerade aktiv geschriebene Files sicher
|
||||||
// sind. cleanupOnStartup ist async, blockt den Constructor nicht.
|
// sind. cleanupOnStartup ist async, blockt den Constructor nicht.
|
||||||
this._cleanupStaleCacheFiles(5 * 60 * 1000).catch(() => {});
|
this._cleanupStaleCacheFiles(5 * 60 * 1000).catch(() => {});
|
||||||
|
|
||||||
|
// RVS-Listener fuer Streaming-STT-Antworten der Whisper-Bridge.
|
||||||
|
// Wir subscribed permanent — gefiltert wird ueber streamRequestId-Match.
|
||||||
|
// Das macht startStreamingRecording einfacher (kein subscribe/unsubscribe
|
||||||
|
// pro Session noetig).
|
||||||
|
try {
|
||||||
|
this.streamRvsUnsub = rvs.onMessage((msg) => {
|
||||||
|
const t = msg?.type;
|
||||||
|
if (t !== 'stt_partial' && t !== 'stt_endpoint' && t !== 'stt_stream_done') return;
|
||||||
|
const p = (msg as any).payload || {};
|
||||||
|
const reqId = String(p.requestId || '');
|
||||||
|
if (!reqId || reqId !== this.streamRequestId) return;
|
||||||
|
if (t === 'stt_partial') {
|
||||||
|
const text = String(p.text || '');
|
||||||
|
this.streamGotPartial = true;
|
||||||
|
// Sobald wir ueberhaupt mal Text gekriegt haben, ist der no-speech
|
||||||
|
// Watchdog erledigt.
|
||||||
|
if (this.streamNoSpeechTimer) {
|
||||||
|
clearTimeout(this.streamNoSpeechTimer);
|
||||||
|
this.streamNoSpeechTimer = null;
|
||||||
|
}
|
||||||
|
this.partialListeners.forEach(cb => {
|
||||||
|
try { cb(text); } catch (e) { console.warn('[Audio] partial listener err:', e); }
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (t === 'stt_endpoint') {
|
||||||
|
const ev: SttEndpointEvent = {
|
||||||
|
audioRequestId: String(p.audioRequestId || ''),
|
||||||
|
text: String(p.text || ''),
|
||||||
|
reason: String(p.reason || ''),
|
||||||
|
durationS: Number(p.durationS || 0),
|
||||||
|
sttMs: Number(p.sttMs || 0),
|
||||||
|
};
|
||||||
|
console.log('[Audio] stt_endpoint: %dms, %.1fs Audio, text=%r',
|
||||||
|
ev.sttMs, ev.durationS, ev.text.slice(0, 80));
|
||||||
|
// Wir stoppen die Aufnahme — whisper hat alles was es braucht.
|
||||||
|
// Kein stt_stream_end senden: das Endpoint kam von der Bridge,
|
||||||
|
// sie hat schon finalisiert.
|
||||||
|
this._fireEndpoint(ev);
|
||||||
|
this._cleanupStreamLocal('endpoint');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (t === 'stt_stream_done') {
|
||||||
|
// Idempotent — falls cleanup nach endpoint schon lief, harmlos.
|
||||||
|
this._cleanupStreamLocal('stream_done');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
console.warn('[Audio] RVS-Listener-Subscribe fehlgeschlagen:', err);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** AudioFocus mit kleiner Verzoegerung freigeben — Spotify/YouTube
|
/** AudioFocus mit kleiner Verzoegerung freigeben — Spotify/YouTube
|
||||||
@@ -822,6 +918,282 @@ class AudioService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ──────────────────────────────────────────────────────────────
|
||||||
|
// STREAMING-AUFNAHME (Phase 1+2 — PCM live an Whisper-Bridge)
|
||||||
|
// ──────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/** Startet eine Streaming-STT-Session.
|
||||||
|
*
|
||||||
|
* Statt eine MP4-Datei aufzunehmen und am Ende hochzuladen, oeffnet der
|
||||||
|
* PcmStreamRecorder (16 kHz mono s16le) ein AudioRecord und schickt
|
||||||
|
* alle 200ms einen PCM-Chunk via rvs.send('stt_audio_chunk') an die
|
||||||
|
* whisper-bridge. Diese transkribiert live und feuert stt_endpoint
|
||||||
|
* sobald der erkannte Text fuer endpointMs nicht mehr waechst.
|
||||||
|
*
|
||||||
|
* Auf stt_endpoint reagiert audio.ts indem es PcmStreamRecorder stoppt
|
||||||
|
* und endpointListeners feuert — ChatScreen baut dann die Chat-Bubble.
|
||||||
|
* Den eigentlichen Brain-Call macht aria-bridge direkt nach stt_endpoint,
|
||||||
|
* KEIN Audio-Roundtrip ueber die App noetig.
|
||||||
|
*
|
||||||
|
* Args:
|
||||||
|
* audioRequestId — eindeutige Korrelations-ID fuer die "wird
|
||||||
|
* verarbeitet"-Bubble (gleiche Semantik wie beim
|
||||||
|
* Legacy-Pfad mit rvs.send('audio')).
|
||||||
|
* voice/speed — TTS-Echo-Felder, werden an Brain weitergegeben.
|
||||||
|
* interrupted — true bei Barge-In waehrend ARIA noch sprach.
|
||||||
|
* location — GPS, falls vorhanden.
|
||||||
|
* noSpeechTimeoutMs — wenn nach so vielen ms KEIN stt_partial kam
|
||||||
|
* (= Whisper hat nix erkannt), brechen wir die
|
||||||
|
* Session ab. 0 = kein Watchdog.
|
||||||
|
* endpointMs — Schwellwert fuer Endpoint (Stille = kein neuer
|
||||||
|
* Text). Default 1500ms — Whisper-Bridge nutzt
|
||||||
|
* den Wert wenn mitgesendet.
|
||||||
|
* hardCapMs — Schmerzgrenze. Default 60s.
|
||||||
|
*/
|
||||||
|
async startStreamingRecording(opts: {
|
||||||
|
audioRequestId: string;
|
||||||
|
voice?: string;
|
||||||
|
speed?: number;
|
||||||
|
interrupted?: boolean;
|
||||||
|
location?: any;
|
||||||
|
noSpeechTimeoutMs?: number;
|
||||||
|
endpointMs?: number;
|
||||||
|
hardCapMs?: number;
|
||||||
|
}): Promise<{ requestId: string; ok: boolean }> {
|
||||||
|
if (this.recordingState !== 'idle') {
|
||||||
|
console.warn('[Audio] startStreamingRecording: bereits aktiv (state=%s)', this.recordingState);
|
||||||
|
return { requestId: '', ok: false };
|
||||||
|
}
|
||||||
|
if (!PcmStreamRecorder) {
|
||||||
|
console.warn('[Audio] PcmStreamRecorder Native-Modul nicht verfuegbar');
|
||||||
|
return { requestId: '', ok: false };
|
||||||
|
}
|
||||||
|
const hasPermission = await this.requestMicrophonePermission();
|
||||||
|
if (!hasPermission) {
|
||||||
|
console.warn('[Audio] Keine Mikrofon-Berechtigung');
|
||||||
|
return { requestId: '', ok: false };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Laufende Wiedergabe stoppen (damit ARIA sich nicht selbst hoert)
|
||||||
|
this.stopPlayback();
|
||||||
|
|
||||||
|
const requestId = `sttstr_${Date.now()}_${Math.floor(Math.random() * 100000)}`;
|
||||||
|
this.streamRequestId = requestId;
|
||||||
|
this.streamAudioRequestId = opts.audioRequestId || '';
|
||||||
|
this.streamGotPartial = false;
|
||||||
|
this.streamEndpointFired = false;
|
||||||
|
this.recordingStartTime = Date.now();
|
||||||
|
|
||||||
|
try {
|
||||||
|
await acquireBackgroundAudio('rec');
|
||||||
|
|
||||||
|
// PcmStreamChunk-Subscriber AUFSETZEN BEVOR der Recorder startet —
|
||||||
|
// sonst koennten die ersten 1-2 Chunks ins Leere gehen.
|
||||||
|
try {
|
||||||
|
const emitter = new NativeEventEmitter(NativeModules.PcmStreamRecorder as any);
|
||||||
|
this.streamPcmChunkSub = emitter.addListener('PcmStreamChunk', (e: any) => {
|
||||||
|
// Nur Chunks der aktuellen Session weiterleiten — verhindert dass
|
||||||
|
// ein verspaeteter Chunk in einer neuen Session landet.
|
||||||
|
if (!this.streamRequestId) return;
|
||||||
|
const sessionId = this.streamRequestId;
|
||||||
|
rvs.send('stt_audio_chunk' as any, {
|
||||||
|
requestId: sessionId,
|
||||||
|
pcm: String(e?.pcm || ''),
|
||||||
|
seq: Number(e?.seq || 0),
|
||||||
|
});
|
||||||
|
});
|
||||||
|
this.streamPcmErrorSub = emitter.addListener('PcmStreamError', (e: any) => {
|
||||||
|
console.warn('[Audio] PcmStreamRecorder-Fehler:', e?.error);
|
||||||
|
this._cleanupStreamLocal('pcm-error');
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
console.warn('[Audio] PcmStreamChunk-Subscription fehlgeschlagen:', err);
|
||||||
|
}
|
||||||
|
|
||||||
|
const started = await PcmStreamRecorder.start();
|
||||||
|
if (!started) {
|
||||||
|
throw new Error('PcmStreamRecorder.start returned false');
|
||||||
|
}
|
||||||
|
|
||||||
|
// AudioFocus exklusiv — gleiche Semantik wie beim Legacy-Pfad.
|
||||||
|
this._cancelDeferredFocusRelease();
|
||||||
|
AudioFocus?.requestExclusive().catch(() => {});
|
||||||
|
|
||||||
|
this.setState('recording');
|
||||||
|
|
||||||
|
// stt_stream_start — der Whisper-Bridge mitteilen dass jetzt Chunks kommen.
|
||||||
|
rvs.send('stt_stream_start' as any, {
|
||||||
|
requestId,
|
||||||
|
audioRequestId: opts.audioRequestId || '',
|
||||||
|
voice: opts.voice || '',
|
||||||
|
speed: typeof opts.speed === 'number' ? opts.speed : 1.0,
|
||||||
|
interrupted: !!opts.interrupted,
|
||||||
|
location: opts.location || null,
|
||||||
|
endpointMs: typeof opts.endpointMs === 'number' ? opts.endpointMs : 1500,
|
||||||
|
hardCapMs: typeof opts.hardCapMs === 'number' ? opts.hardCapMs : 60000,
|
||||||
|
sampleRate: 16000,
|
||||||
|
});
|
||||||
|
|
||||||
|
// No-Speech-Watchdog — ersetzt den alten VAD-noSpeechTimer.
|
||||||
|
// Wenn nach Konversationsfenster kein einziger stt_partial gekommen ist,
|
||||||
|
// hat der User vermutlich nix gesagt → Session beenden.
|
||||||
|
const noSpeechMs = Number(opts.noSpeechTimeoutMs || 0);
|
||||||
|
if (noSpeechMs > 0) {
|
||||||
|
this.streamNoSpeechTimer = setTimeout(() => {
|
||||||
|
if (this.streamRequestId === requestId && !this.streamGotPartial) {
|
||||||
|
console.log('[Audio] Stream %s: no-speech nach %dms → cancel',
|
||||||
|
requestId.slice(0, 12), noSpeechMs);
|
||||||
|
this.cancelStreamingRecording('no-speech').catch(() => {});
|
||||||
|
}
|
||||||
|
}, noSpeechMs);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Hard-Cap als zweite Sicherheitsleine (App-seitig zusaetzlich zur Bridge).
|
||||||
|
const hardCapMs = Number(opts.hardCapMs || 60000);
|
||||||
|
this.streamHardCapTimer = setTimeout(() => {
|
||||||
|
if (this.streamRequestId === requestId) {
|
||||||
|
console.log('[Audio] Stream %s: app-side hardcap %dms erreicht → end',
|
||||||
|
requestId.slice(0, 12), hardCapMs);
|
||||||
|
this.stopStreamingRecording('hardcap').catch(() => {});
|
||||||
|
}
|
||||||
|
}, hardCapMs + 2000); // +2s damit Bridge zuerst feuert wenn moeglich
|
||||||
|
|
||||||
|
console.log('[Audio] Streaming-Aufnahme gestartet (requestId=%s, audioRequestId=%s)',
|
||||||
|
requestId.slice(0, 12), (opts.audioRequestId || '').slice(0, 16));
|
||||||
|
return { requestId, ok: true };
|
||||||
|
} catch (err) {
|
||||||
|
console.error('[Audio] startStreamingRecording fehlgeschlagen:', err);
|
||||||
|
this._cleanupStreamLocal('start-failed');
|
||||||
|
return { requestId: '', ok: false };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Sauberer User-initiated Stop. Sendet stt_stream_end an die Bridge,
|
||||||
|
* die noch ihren Final-Transcribe macht.
|
||||||
|
*
|
||||||
|
* Plus: Fallback-Timer (3s). Wenn die Bridge nicht antwortet (z.B. weil
|
||||||
|
* veraltete Version ohne Streaming-Handler laeuft), feuern wir den
|
||||||
|
* Endpoint-Listener trotzdem mit text='' damit die App-UI nicht in
|
||||||
|
* "wird verarbeitet..." haengt. ChatScreen behandelt das wie den
|
||||||
|
* No-Speech-Fall (Bubble weg + endConversation). */
|
||||||
|
async stopStreamingRecording(reason: string = 'user'): Promise<void> {
|
||||||
|
const reqId = this.streamRequestId;
|
||||||
|
if (!reqId) return;
|
||||||
|
const audioReqId = this.streamAudioRequestId;
|
||||||
|
try {
|
||||||
|
rvs.send('stt_stream_end' as any, { requestId: reqId, reason });
|
||||||
|
} catch (e) {
|
||||||
|
console.warn('[Audio] stt_stream_end senden fehlgeschlagen:', e);
|
||||||
|
}
|
||||||
|
// Recorder lokal abschalten — Bridge feuert dann ihrerseits noch
|
||||||
|
// stt_endpoint + stt_stream_done.
|
||||||
|
this._cleanupStreamLocal(`stop:${reason}`);
|
||||||
|
// Fallback-Watchdog: nach 3s noch immer kein Endpoint via RVS angekommen
|
||||||
|
// → _fireEndpoint mit text='' (idempotent via streamEndpointFired-Latch,
|
||||||
|
// d.h. wenn echtes stt_endpoint zwischen jetzt und +3s ankommt feuert
|
||||||
|
// dieser Fallback NICHT).
|
||||||
|
setTimeout(() => {
|
||||||
|
if (this.streamEndpointFired) return;
|
||||||
|
console.log('[Audio] stopStreamingRecording: 3s ohne Bridge-Antwort — fallback fire');
|
||||||
|
this._fireEndpoint({
|
||||||
|
audioRequestId: audioReqId,
|
||||||
|
text: '',
|
||||||
|
reason: `stop:${reason}:no-response`,
|
||||||
|
durationS: 0,
|
||||||
|
sttMs: 0,
|
||||||
|
});
|
||||||
|
}, 3000);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Abbruch ohne dass Brain den Text verarbeitet — z.B. wenn der User
|
||||||
|
* im Conversation-Window nichts sagt oder cancel drueckt.
|
||||||
|
*
|
||||||
|
* Feuert endpointListeners mit text='' damit ChatScreen den Fall genauso
|
||||||
|
* behandeln kann wie frueher onSilenceDetected→stopRecording()→null:
|
||||||
|
* Konversation beenden, Ohr zurueck auf armed. */
|
||||||
|
async cancelStreamingRecording(reason: string = 'cancel'): Promise<void> {
|
||||||
|
const reqId = this.streamRequestId;
|
||||||
|
if (!reqId) return;
|
||||||
|
const audioReqId = this.streamAudioRequestId;
|
||||||
|
try {
|
||||||
|
rvs.send('stt_stream_end' as any, { requestId: reqId, reason: `cancel:${reason}` });
|
||||||
|
} catch {}
|
||||||
|
this._cleanupStreamLocal(`cancel:${reason}`);
|
||||||
|
// Listener feuern damit ChatScreen reagieren kann (endConversation etc.)
|
||||||
|
this._fireEndpoint({
|
||||||
|
audioRequestId: audioReqId,
|
||||||
|
text: '',
|
||||||
|
reason: `cancel:${reason}`,
|
||||||
|
durationS: 0,
|
||||||
|
sttMs: 0,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Feuert den Endpoint-Listener — aber nur einmal pro Session-Cycle.
|
||||||
|
* Wird sowohl vom RVS-stt_endpoint-Pfad als auch vom Fallback-Watchdog
|
||||||
|
* und cancelStreamingRecording aufgerufen. */
|
||||||
|
private _fireEndpoint(ev: SttEndpointEvent): void {
|
||||||
|
if (this.streamEndpointFired) return;
|
||||||
|
this.streamEndpointFired = true;
|
||||||
|
this.endpointListeners.forEach(cb => {
|
||||||
|
try { cb(ev); } catch (e) { console.warn('[Audio] endpoint listener err:', e); }
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Nur-lokale Cleanup: PcmStreamRecorder stoppen, Listener entfernen,
|
||||||
|
* AudioFocus freigeben, State zurueck auf idle. Nicht ueber RVS
|
||||||
|
* kommunizieren — Caller hat das schon erledigt (oder eben nicht
|
||||||
|
* noetig wenn Bridge das Endpoint gefeuert hat). */
|
||||||
|
private _cleanupStreamLocal(reason: string): void {
|
||||||
|
if (!this.streamRequestId) return;
|
||||||
|
console.log('[Audio] Stream cleanup (%s)', reason);
|
||||||
|
this.streamRequestId = '';
|
||||||
|
this.streamAudioRequestId = '';
|
||||||
|
this.streamGotPartial = false;
|
||||||
|
if (this.streamNoSpeechTimer) {
|
||||||
|
clearTimeout(this.streamNoSpeechTimer);
|
||||||
|
this.streamNoSpeechTimer = null;
|
||||||
|
}
|
||||||
|
if (this.streamHardCapTimer) {
|
||||||
|
clearTimeout(this.streamHardCapTimer);
|
||||||
|
this.streamHardCapTimer = null;
|
||||||
|
}
|
||||||
|
if (this.streamPcmChunkSub) {
|
||||||
|
try { this.streamPcmChunkSub.remove(); } catch {}
|
||||||
|
this.streamPcmChunkSub = null;
|
||||||
|
}
|
||||||
|
if (this.streamPcmErrorSub) {
|
||||||
|
try { this.streamPcmErrorSub.remove(); } catch {}
|
||||||
|
this.streamPcmErrorSub = null;
|
||||||
|
}
|
||||||
|
PcmStreamRecorder?.stop().catch(() => {});
|
||||||
|
this._releaseFocusDeferred();
|
||||||
|
this.setState('idle');
|
||||||
|
}
|
||||||
|
|
||||||
|
/** True wenn aktuell eine Streaming-Session laeuft. */
|
||||||
|
isStreamingRecording(): boolean {
|
||||||
|
return !!this.streamRequestId;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Subscribe auf stt_endpoint — feuert wenn die Whisper-Bridge erkannt
|
||||||
|
* hat, dass der User fertig gesprochen hat (ML-Endpointer). */
|
||||||
|
onSttEndpoint(callback: SttEndpointCallback): () => void {
|
||||||
|
this.endpointListeners.push(callback);
|
||||||
|
return () => {
|
||||||
|
this.endpointListeners = this.endpointListeners.filter(cb => cb !== callback);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Subscribe auf stt_partial — Live-Transkript-Updates (optional fuer
|
||||||
|
* UI-Feedback in der Voice-Bubble). */
|
||||||
|
onSttPartial(callback: SttPartialCallback): () => void {
|
||||||
|
this.partialListeners.push(callback);
|
||||||
|
return () => {
|
||||||
|
this.partialListeners = this.partialListeners.filter(cb => cb !== callback);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
// --- Wiedergabe ---
|
// --- Wiedergabe ---
|
||||||
|
|
||||||
/** Base64-kodiertes Audio in die Queue stellen und abspielen */
|
/** Base64-kodiertes Audio in die Queue stellen und abspielen */
|
||||||
|
|||||||
@@ -9,13 +9,14 @@
|
|||||||
* - 'tts' : ARIA spricht
|
* - 'tts' : ARIA spricht
|
||||||
* - 'rec' : Aufnahme laeuft
|
* - 'rec' : Aufnahme laeuft
|
||||||
* - 'wake' : Wake-Word lauscht passiv (Ohr aktiv)
|
* - 'wake' : Wake-Word lauscht passiv (Ohr aktiv)
|
||||||
|
* - 'location' : Background-GPS-Tracking (opt-in in Settings)
|
||||||
* - 'background' : Persistenter Hintergrund-Modus (Settings-Toggle).
|
* - 'background' : Persistenter Hintergrund-Modus (Settings-Toggle).
|
||||||
* Haelt JS-Engine + WebSocket auch ohne Audio am Leben
|
* Haelt JS-Engine + WebSocket auch ohne Audio am Leben
|
||||||
* → Trigger-Replies, Reconnects, Push-Reaktionen.
|
* → Trigger-Replies, Reconnects, Push-Reaktionen.
|
||||||
*
|
*
|
||||||
* Solange mindestens ein Slot aktiv ist, laeuft der Service. Wenn alle
|
* Solange mindestens ein Slot aktiv ist, laeuft der Service. Wenn alle
|
||||||
* Slots leer sind, wird er gestoppt. Der Notification-Text passt sich an
|
* Slots leer sind, wird er gestoppt. Der Notification-Text passt sich an
|
||||||
* den hoechstprioren Slot an (tts > rec > wake > background).
|
* den hoechstprioren Slot an (tts > rec > wake > location > background).
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { NativeModules } from 'react-native';
|
import { NativeModules } from 'react-native';
|
||||||
@@ -27,13 +28,13 @@ interface BackgroundAudioNative {
|
|||||||
|
|
||||||
const { BackgroundAudio } = NativeModules as { BackgroundAudio?: BackgroundAudioNative };
|
const { BackgroundAudio } = NativeModules as { BackgroundAudio?: BackgroundAudioNative };
|
||||||
|
|
||||||
type Slot = 'tts' | 'rec' | 'wake' | 'background';
|
type Slot = 'tts' | 'rec' | 'wake' | 'location' | 'background';
|
||||||
|
|
||||||
const slots = new Set<Slot>();
|
const slots = new Set<Slot>();
|
||||||
|
|
||||||
// Prioritaet fuer den Notification-Text — hoechste zuerst. 'background'
|
// Prioritaet fuer den Notification-Text — hoechste zuerst. 'background'
|
||||||
// ist die fallback-Anzeige wenn nichts anderes laeuft.
|
// ist die fallback-Anzeige wenn nichts anderes laeuft.
|
||||||
const PRIORITY: Slot[] = ['tts', 'rec', 'wake', 'background'];
|
const PRIORITY: Slot[] = ['tts', 'rec', 'wake', 'location', 'background'];
|
||||||
|
|
||||||
function topReason(): string {
|
function topReason(): string {
|
||||||
for (const s of PRIORITY) {
|
for (const s of PRIORITY) {
|
||||||
@@ -47,6 +48,7 @@ async function applyState(): Promise<void> {
|
|||||||
if (slots.size === 0) {
|
if (slots.size === 0) {
|
||||||
try { await BackgroundAudio.stop(); } catch {}
|
try { await BackgroundAudio.stop(); } catch {}
|
||||||
console.log('[BackgroundAudio] Service gestoppt (keine Slots)');
|
console.log('[BackgroundAudio] Service gestoppt (keine Slots)');
|
||||||
|
import('./logger').then(m => m.reportAppDebug('bg.stop', 'service stopped')).catch(()=>{});
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const reason = topReason();
|
const reason = topReason();
|
||||||
@@ -54,8 +56,10 @@ async function applyState(): Promise<void> {
|
|||||||
await BackgroundAudio.start(reason);
|
await BackgroundAudio.start(reason);
|
||||||
console.log('[BackgroundAudio] Service aktiv (slot=%s, slots=%s)',
|
console.log('[BackgroundAudio] Service aktiv (slot=%s, slots=%s)',
|
||||||
reason, [...slots].join('+'));
|
reason, [...slots].join('+'));
|
||||||
|
import('./logger').then(m => m.reportAppDebug('bg.start', `slot=${reason} all=[${[...slots].join(',')}]`)).catch(()=>{});
|
||||||
} catch (err: any) {
|
} catch (err: any) {
|
||||||
console.warn('[BackgroundAudio] start fehlgeschlagen:', err?.message || err);
|
console.warn('[BackgroundAudio] start fehlgeschlagen:', err?.message || err);
|
||||||
|
import('./logger').then(m => m.reportAppDebug('bg.start.fail', err?.message || String(err))).catch(()=>{});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -14,9 +14,62 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
import AsyncStorage from '@react-native-async-storage/async-storage';
|
import AsyncStorage from '@react-native-async-storage/async-storage';
|
||||||
import { PermissionsAndroid, Platform, ToastAndroid } from 'react-native';
|
import { Linking, PermissionsAndroid, Platform, ToastAndroid } from 'react-native';
|
||||||
import Geolocation from '@react-native-community/geolocation';
|
import Geolocation from '@react-native-community/geolocation';
|
||||||
import rvs from './rvs';
|
import rvs from './rvs';
|
||||||
|
import { acquireBackgroundAudio, releaseBackgroundAudio } from './backgroundAudio';
|
||||||
|
|
||||||
|
// Opt-in Background-GPS — Settings-Toggle "GPS auch im Hintergrund".
|
||||||
|
// Default AUS. Wenn AN: ACCESS_BACKGROUND_LOCATION-Permission noetig
|
||||||
|
// (kann nicht ueber Standard-Dialog angefordert werden, User muss in
|
||||||
|
// Android-Settings auf "Immer erlauben" gehen) + ForegroundService mit
|
||||||
|
// foregroundServiceType=location wird hochgezogen.
|
||||||
|
export const BG_GPS_STORAGE_KEY = 'aria_gps_background_enabled';
|
||||||
|
|
||||||
|
export async function isBackgroundGpsEnabled(): Promise<boolean> {
|
||||||
|
try {
|
||||||
|
const v = await AsyncStorage.getItem(BG_GPS_STORAGE_KEY);
|
||||||
|
return v === 'true';
|
||||||
|
} catch {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function setBackgroundGpsEnabled(enabled: boolean): Promise<void> {
|
||||||
|
try {
|
||||||
|
await AsyncStorage.setItem(BG_GPS_STORAGE_KEY, String(enabled));
|
||||||
|
} catch {}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Prueft ob ACCESS_BACKGROUND_LOCATION gewaehrt ist und oeffnet sonst die
|
||||||
|
* Android-App-Settings damit der User "Immer erlauben" auswaehlen kann.
|
||||||
|
* Returns true wenn permission ok, false wenn User Settings oeffnen muss. */
|
||||||
|
export async function ensureBackgroundLocationPermission(): Promise<boolean> {
|
||||||
|
if (Platform.OS !== 'android') return true;
|
||||||
|
try {
|
||||||
|
const granted = await PermissionsAndroid.check(
|
||||||
|
'android.permission.ACCESS_BACKGROUND_LOCATION' as any,
|
||||||
|
);
|
||||||
|
if (granted) return true;
|
||||||
|
// Erst FINE_LOCATION anfordern falls noch nicht da
|
||||||
|
const fine = await PermissionsAndroid.request(
|
||||||
|
PermissionsAndroid.PERMISSIONS.ACCESS_FINE_LOCATION,
|
||||||
|
);
|
||||||
|
if (fine !== PermissionsAndroid.RESULTS.GRANTED) return false;
|
||||||
|
// Ab Android 10+ kann BACKGROUND_LOCATION NICHT ueber den normalen
|
||||||
|
// PermissionsAndroid.request abgefragt werden — User muss in Settings
|
||||||
|
// auf "Immer erlauben" wechseln. Wir oeffnen die App-Settings-Seite.
|
||||||
|
ToastAndroid.show(
|
||||||
|
'Bitte in Android-Einstellungen unter Standort "Immer erlauben" auswaehlen',
|
||||||
|
ToastAndroid.LONG,
|
||||||
|
);
|
||||||
|
Linking.openSettings();
|
||||||
|
return false;
|
||||||
|
} catch (e) {
|
||||||
|
console.warn('[gps-track] BG-Permission-Check fehlgeschlagen:', e);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
type Listener = (active: boolean) => void;
|
type Listener = (active: boolean) => void;
|
||||||
|
|
||||||
@@ -86,6 +139,14 @@ class GpsTrackingService {
|
|||||||
ToastAndroid.show('GPS-Tracking: Berechtigung abgelehnt', ToastAndroid.LONG);
|
ToastAndroid.show('GPS-Tracking: Berechtigung abgelehnt', ToastAndroid.LONG);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
// Background-GPS opt-in: wenn aktiv, ForegroundService mit type=location
|
||||||
|
// hochziehen. Brauche ACCESS_BACKGROUND_LOCATION (User muss in Android-
|
||||||
|
// Settings 'Immer erlauben' aktivieren). Wenn die fehlt, watchPosition
|
||||||
|
// liefert im Hintergrund keine Updates (nur Heartbeat sendet alte Werte).
|
||||||
|
const bgEnabled = await isBackgroundGpsEnabled();
|
||||||
|
if (bgEnabled) {
|
||||||
|
try { await acquireBackgroundAudio('location'); } catch {}
|
||||||
|
}
|
||||||
try {
|
try {
|
||||||
this.watchId = Geolocation.watchPosition(
|
this.watchId = Geolocation.watchPosition(
|
||||||
(pos) => {
|
(pos) => {
|
||||||
@@ -142,6 +203,8 @@ class GpsTrackingService {
|
|||||||
clearInterval(this.heartbeatTimer);
|
clearInterval(this.heartbeatTimer);
|
||||||
this.heartbeatTimer = null;
|
this.heartbeatTimer = null;
|
||||||
}
|
}
|
||||||
|
// Location-Foreground-Service-Slot freigeben (falls vorher acquired)
|
||||||
|
try { releaseBackgroundAudio('location'); } catch {}
|
||||||
this.active = false;
|
this.active = false;
|
||||||
this.lastChangeAt = Date.now();
|
this.lastChangeAt = Date.now();
|
||||||
this.notify();
|
this.notify();
|
||||||
|
|||||||
@@ -7,10 +7,28 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
import AsyncStorage from '@react-native-async-storage/async-storage';
|
import AsyncStorage from '@react-native-async-storage/async-storage';
|
||||||
import { Platform } from 'react-native';
|
import { Platform, DeviceEventEmitter } from 'react-native';
|
||||||
import rvs from './rvs';
|
import rvs from './rvs';
|
||||||
|
|
||||||
|
// Lokales Event damit die SettingsScreen Live Logs / Events Tabs
|
||||||
|
// auch das sehen was die App SELBST loggt (reportAppDebug/Error).
|
||||||
|
// Bisher gingen die nur via RVS an die Bridge. Lokal sichtbar = Mama-
|
||||||
|
// tauglich Debug ohne curl.
|
||||||
|
export const APP_LOG_EVENT = 'AriaLocalAppLog';
|
||||||
|
|
||||||
|
interface LocalLogEntry {
|
||||||
|
ts: number;
|
||||||
|
level: 'info' | 'warn' | 'error';
|
||||||
|
scope: string;
|
||||||
|
message: string;
|
||||||
|
}
|
||||||
|
|
||||||
export const VERBOSE_LOGGING_KEY = 'aria_verbose_logging';
|
export const VERBOSE_LOGGING_KEY = 'aria_verbose_logging';
|
||||||
|
// Eigener Toggle fuer Debug-Logs die ueber RVS an die Bridge gehen
|
||||||
|
// (/shared/logs/app.log → Diagnostic /api/app-log). Damit der Default-User
|
||||||
|
// nicht stuendlich Traffic + Disk-Schreiben hat, dieser ist DEFAULT AUS.
|
||||||
|
// Stefan schaltet's nur ein wenn er ein konkretes Problem debuggen muss.
|
||||||
|
export const DEBUG_LOGS_TO_BRIDGE_KEY = 'aria_debug_logs_to_bridge';
|
||||||
|
|
||||||
// Original-console.log retten, damit wir die Wrapper jederzeit wieder
|
// Original-console.log retten, damit wir die Wrapper jederzeit wieder
|
||||||
// "scharf" stellen koennen (sonst waere ein Toggle-an nach -aus tot).
|
// "scharf" stellen koennen (sonst waere ein Toggle-an nach -aus tot).
|
||||||
@@ -18,6 +36,7 @@ const originalLog = console.log.bind(console);
|
|||||||
const noop = () => {};
|
const noop = () => {};
|
||||||
|
|
||||||
let _verbose = true;
|
let _verbose = true;
|
||||||
|
let _debugLogsToBridge = false;
|
||||||
|
|
||||||
function applyState(): void {
|
function applyState(): void {
|
||||||
console.log = _verbose ? originalLog : noop;
|
console.log = _verbose ? originalLog : noop;
|
||||||
@@ -29,6 +48,10 @@ export async function initLogger(): Promise<void> {
|
|||||||
const v = await AsyncStorage.getItem(VERBOSE_LOGGING_KEY);
|
const v = await AsyncStorage.getItem(VERBOSE_LOGGING_KEY);
|
||||||
_verbose = v !== 'false'; // default: true
|
_verbose = v !== 'false'; // default: true
|
||||||
} catch {}
|
} catch {}
|
||||||
|
try {
|
||||||
|
const d = await AsyncStorage.getItem(DEBUG_LOGS_TO_BRIDGE_KEY);
|
||||||
|
_debugLogsToBridge = d === 'true'; // default: false
|
||||||
|
} catch {}
|
||||||
applyState();
|
applyState();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -42,6 +65,15 @@ export function setVerboseLogging(verbose: boolean): void {
|
|||||||
AsyncStorage.setItem(VERBOSE_LOGGING_KEY, String(verbose)).catch(() => {});
|
AsyncStorage.setItem(VERBOSE_LOGGING_KEY, String(verbose)).catch(() => {});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function isDebugLogsToBridge(): boolean {
|
||||||
|
return _debugLogsToBridge;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function setDebugLogsToBridge(enabled: boolean): void {
|
||||||
|
_debugLogsToBridge = enabled;
|
||||||
|
AsyncStorage.setItem(DEBUG_LOGS_TO_BRIDGE_KEY, String(enabled)).catch(() => {});
|
||||||
|
}
|
||||||
|
|
||||||
// ─── App-Crash-Reporting via RVS ────────────────────────────────────
|
// ─── App-Crash-Reporting via RVS ────────────────────────────────────
|
||||||
//
|
//
|
||||||
// Wenn die App crasht — egal ob React-Render-Fehler (ErrorBoundary) oder
|
// Wenn die App crasht — egal ob React-Render-Fehler (ErrorBoundary) oder
|
||||||
@@ -61,9 +93,10 @@ let _reportingInstalled = false;
|
|||||||
|
|
||||||
/** Schickt einen App-Fehler via RVS an die Bridge. */
|
/** Schickt einen App-Fehler via RVS an die Bridge. */
|
||||||
export function reportAppError(ev: AppErrorEvent): void {
|
export function reportAppError(ev: AppErrorEvent): void {
|
||||||
|
const ts = Date.now();
|
||||||
try {
|
try {
|
||||||
rvs.send('app_log' as any, {
|
rvs.send('app_log' as any, {
|
||||||
ts: Date.now(),
|
ts,
|
||||||
platform: Platform.OS,
|
platform: Platform.OS,
|
||||||
level: ev.level || 'error',
|
level: ev.level || 'error',
|
||||||
scope: ev.scope,
|
scope: ev.scope,
|
||||||
@@ -73,11 +106,49 @@ export function reportAppError(ev: AppErrorEvent): void {
|
|||||||
} catch {
|
} catch {
|
||||||
// RVS noch nicht connected — Fehler geht im console weiter.
|
// RVS noch nicht connected — Fehler geht im console weiter.
|
||||||
}
|
}
|
||||||
|
// Lokal in den App-Logs-Tab emitten — Errors gehen IMMER durch
|
||||||
|
// (unabhaengig vom Debug-Toggle).
|
||||||
|
try {
|
||||||
|
const entry: LocalLogEntry = {
|
||||||
|
ts, level: ev.level || 'error', scope: ev.scope, message: ev.message,
|
||||||
|
};
|
||||||
|
DeviceEventEmitter.emit(APP_LOG_EVENT, entry);
|
||||||
|
} catch {}
|
||||||
// Plus lokal: console.error, damit Stefan's adb (wenn doch mal verfuegbar)
|
// Plus lokal: console.error, damit Stefan's adb (wenn doch mal verfuegbar)
|
||||||
// den Crash sieht.
|
// den Crash sieht.
|
||||||
console.error(`[app-error scope=${ev.scope}]`, ev.message, '\n', ev.stack || '');
|
console.error(`[app-error scope=${ev.scope}]`, ev.message, '\n', ev.stack || '');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Schickt eine Debug-/Info-Message via RVS an die Bridge. Landet ebenfalls
|
||||||
|
* in /shared/logs/app.log — abrufbar via `curl /api/app-log?lines=N`.
|
||||||
|
* Im Gegensatz zu reportAppError: keine Stacktrace, level=info, kein
|
||||||
|
* console.error. Fuer Live-Diagnose im Hintergrund wenn ADB nicht da ist.
|
||||||
|
*
|
||||||
|
* Nur aktiv wenn Settings → Protokoll → Debug-Logs an Bridge AN ist.
|
||||||
|
* Default aus damit Mama-Modus keine Disk-Schreiblast hat. Error-Reports
|
||||||
|
* (reportAppError) gehen weiterhin IMMER durch. */
|
||||||
|
export function reportAppDebug(scope: string, message: string): void {
|
||||||
|
if (!_debugLogsToBridge) return;
|
||||||
|
const ts = Date.now();
|
||||||
|
const trimmed = String(message).slice(0, 2000);
|
||||||
|
try {
|
||||||
|
rvs.send('app_log' as any, {
|
||||||
|
ts,
|
||||||
|
platform: Platform.OS,
|
||||||
|
level: 'info',
|
||||||
|
scope,
|
||||||
|
message: trimmed,
|
||||||
|
});
|
||||||
|
} catch {}
|
||||||
|
// Plus lokal in den App-Logs-Tab emitten — damit Stefan in der App
|
||||||
|
// selbst (Settings → Protokoll → Live Logs) sieht was passiert,
|
||||||
|
// ohne curl gegen Bridge.
|
||||||
|
try {
|
||||||
|
const entry: LocalLogEntry = { ts, level: 'info', scope, message: trimmed };
|
||||||
|
DeviceEventEmitter.emit(APP_LOG_EVENT, entry);
|
||||||
|
} catch {}
|
||||||
|
}
|
||||||
|
|
||||||
/** Installiert einen globalen JS-Error-Handler der ungefangene Errors via
|
/** Installiert einen globalen JS-Error-Handler der ungefangene Errors via
|
||||||
* RVS an die Bridge schickt. Beim App-Start aufrufen. */
|
* RVS an die Bridge schickt. Beim App-Start aufrufen. */
|
||||||
export function installGlobalCrashReporter(): void {
|
export function installGlobalCrashReporter(): void {
|
||||||
|
|||||||
@@ -189,7 +189,7 @@ class UpdateService {
|
|||||||
const destPath = `${RNFS.CachesDirectoryPath}/${apkData.fileName}`;
|
const destPath = `${RNFS.CachesDirectoryPath}/${apkData.fileName}`;
|
||||||
await RNFS.writeFile(destPath, apkData.base64, 'base64');
|
await RNFS.writeFile(destPath, apkData.base64, 'base64');
|
||||||
const fileSize = await RNFS.stat(destPath);
|
const fileSize = await RNFS.stat(destPath);
|
||||||
console.log(`[Update] APK gespeichert: ${destPath} (${(parseInt(fileSize.size) / 1024 / 1024).toFixed(1)}MB)`);
|
console.log(`[Update] APK gespeichert: ${destPath} (${(Number(fileSize.size) / 1024 / 1024).toFixed(1)}MB)`);
|
||||||
|
|
||||||
// APK installieren via natives ApkInstaller Module (FileProvider + Intent)
|
// APK installieren via natives ApkInstaller Module (FileProvider + Intent)
|
||||||
if (Platform.OS === 'android') {
|
if (Platform.OS === 'android') {
|
||||||
|
|||||||
@@ -179,6 +179,8 @@ class WakeWordService {
|
|||||||
try {
|
try {
|
||||||
await OpenWakeWord.start();
|
await OpenWakeWord.start();
|
||||||
console.log('[WakeWord] armed — warte auf "%s"', this.keyword);
|
console.log('[WakeWord] armed — warte auf "%s"', this.keyword);
|
||||||
|
// Debug-Log via RVS damit wir auch ohne ADB sehen wann es greift
|
||||||
|
import('./logger').then(m => m.reportAppDebug('wake.start', `armed, keyword=${this.keyword}`)).catch(()=>{});
|
||||||
ToastAndroid.show(`Lausche auf "${KEYWORD_LABELS[this.keyword]}"`, ToastAndroid.SHORT);
|
ToastAndroid.show(`Lausche auf "${KEYWORD_LABELS[this.keyword]}"`, ToastAndroid.SHORT);
|
||||||
this.setState('armed');
|
this.setState('armed');
|
||||||
return true;
|
return true;
|
||||||
@@ -236,15 +238,24 @@ class WakeWordService {
|
|||||||
}
|
}
|
||||||
console.log('[WakeWord] Wake-Word "%s" erkannt! (state=%s, barge=%s)',
|
console.log('[WakeWord] Wake-Word "%s" erkannt! (state=%s, barge=%s)',
|
||||||
this.keyword, this.state, this.bargeListening);
|
this.keyword, this.state, this.bargeListening);
|
||||||
|
import('./logger').then(m => m.reportAppDebug('wake.detect',
|
||||||
|
`keyword=${this.keyword} state=${this.state} barge=${this.bargeListening}`)).catch(()=>{});
|
||||||
this.lastTriggerAt = now;
|
this.lastTriggerAt = now;
|
||||||
if (this.nativeReady && OpenWakeWord) {
|
if (this.nativeReady && OpenWakeWord) {
|
||||||
try { await OpenWakeWord.stop(); } catch {}
|
try {
|
||||||
|
await OpenWakeWord.stop();
|
||||||
|
import('./logger').then(m => m.reportAppDebug('wake.detect', 'native stop ok')).catch(()=>{});
|
||||||
|
} catch (e: any) {
|
||||||
|
import('./logger').then(m => m.reportAppDebug('wake.detect', `native stop FAIL ${e?.message}`)).catch(()=>{});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
this.bargeListening = false;
|
this.bargeListening = false;
|
||||||
// Wenn wir bereits in 'conversing' sind und der Trigger waehrend ARIAs TTS
|
// Wenn wir bereits in 'conversing' sind und der Trigger waehrend ARIAs TTS
|
||||||
// kam (Barge-In via Wake-Word), feuern wir einen separaten Callback damit
|
// kam (Barge-In via Wake-Word), feuern wir einen separaten Callback damit
|
||||||
// ChatScreen das TTS abbrechen + neue Aufnahme starten kann. Sonst normal.
|
// ChatScreen das TTS abbrechen + neue Aufnahme starten kann. Sonst normal.
|
||||||
if (this.state === 'conversing') {
|
if (this.state === 'conversing') {
|
||||||
|
import('./logger').then(m => m.reportAppDebug('wake.detect',
|
||||||
|
`barge path: cbs=${this.bargeCallbacks.length}`)).catch(()=>{});
|
||||||
this.bargeCallbacks.forEach(cb => {
|
this.bargeCallbacks.forEach(cb => {
|
||||||
try { cb(); } catch (e) { console.warn('[WakeWord] barge cb err:', e); }
|
try { cb(); } catch (e) { console.warn('[WakeWord] barge cb err:', e); }
|
||||||
});
|
});
|
||||||
@@ -252,11 +263,16 @@ class WakeWordService {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
this.setState('conversing');
|
this.setState('conversing');
|
||||||
setTimeout(() => {
|
// Direkt feuern — KEIN setTimeout. Im Hintergrund (Display aus) parkt
|
||||||
if (this.state === 'conversing') {
|
// Android den JS-Thread; ein setTimeout(200ms) kann dann Minuten lang
|
||||||
this.wakeCallbacks.forEach(cb => cb());
|
// nicht zuendekommen, weil Hermes auf einen Native-Wake-Event wartet.
|
||||||
}
|
// OpenWakeWord.stop() oben ist awaited → Mikro ist schon frei, kein
|
||||||
}, 200);
|
// 200ms-Sicherheitsabstand noetig.
|
||||||
|
import('./logger').then(m => m.reportAppDebug('wake.detect',
|
||||||
|
`state→conversing, firing ${this.wakeCallbacks.length} callback(s) directly`)).catch(()=>{});
|
||||||
|
this.wakeCallbacks.forEach(cb => {
|
||||||
|
try { cb(); } catch (e) { console.warn('[WakeWord] wake cb err:', e); }
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Wake-Word PARALLEL zur TTS-Wiedergabe lauschen lassen — User kann
|
/** Wake-Word PARALLEL zur TTS-Wiedergabe lauschen lassen — User kann
|
||||||
|
|||||||
@@ -556,6 +556,12 @@ class ARIABridge:
|
|||||||
for k in ("fluxDefaultModel", "fluxKeywordRaw", "fluxKeywordSwitch", "huggingfaceToken"):
|
for k in ("fluxDefaultModel", "fluxKeywordRaw", "fluxKeywordSwitch", "huggingfaceToken"):
|
||||||
if k in vc:
|
if k in vc:
|
||||||
self._flux_config[k] = vc[k]
|
self._flux_config[k] = vc[k]
|
||||||
|
# Debug-Log-Toggles fuer Whisper / F5TTS Bridges (Diagnostic-Toggle).
|
||||||
|
# Default: aus — sonst muellen wir uns volle Disk wenn alles laeuft.
|
||||||
|
self._debug_log_config: dict = {}
|
||||||
|
for k in ("whisperDebugLog", "f5ttsDebugLog"):
|
||||||
|
if k in vc:
|
||||||
|
self._debug_log_config[k] = bool(vc[k])
|
||||||
logger.info("Voice-Config geladen: tts=%s voice=%s f5tts=%s flux=%s",
|
logger.info("Voice-Config geladen: tts=%s voice=%s f5tts=%s flux=%s",
|
||||||
self.tts_enabled, self.xtts_voice or "default",
|
self.tts_enabled, self.xtts_voice or "default",
|
||||||
self._f5tts_config or "defaults",
|
self._f5tts_config or "defaults",
|
||||||
@@ -1304,6 +1310,7 @@ class ARIABridge:
|
|||||||
payload["xttsSpeed"] = self._persistent_xtts_speed
|
payload["xttsSpeed"] = self._persistent_xtts_speed
|
||||||
payload.update(getattr(self, "_f5tts_config", {}) or {})
|
payload.update(getattr(self, "_f5tts_config", {}) or {})
|
||||||
payload.update(getattr(self, "_flux_config", {}) or {})
|
payload.update(getattr(self, "_flux_config", {}) or {})
|
||||||
|
payload.update(getattr(self, "_debug_log_config", {}) or {})
|
||||||
await self._send_to_rvs({
|
await self._send_to_rvs({
|
||||||
"type": "config",
|
"type": "config",
|
||||||
"payload": payload,
|
"payload": payload,
|
||||||
@@ -1978,6 +1985,15 @@ class ARIABridge:
|
|||||||
self._flux_config = {}
|
self._flux_config = {}
|
||||||
self._flux_config[k] = payload[k]
|
self._flux_config[k] = payload[k]
|
||||||
changed = True
|
changed = True
|
||||||
|
# Debug-Log-Toggles fuer Whisper- und F5TTS-Bridge — werden via
|
||||||
|
# naechstem config-Broadcast an die jeweiligen Bridges weitergegeben.
|
||||||
|
# Persistent damit Toggle einen Container-Restart ueberlebt.
|
||||||
|
for k in ("whisperDebugLog", "f5ttsDebugLog"):
|
||||||
|
if k in payload:
|
||||||
|
if not hasattr(self, "_debug_log_config"):
|
||||||
|
self._debug_log_config = {}
|
||||||
|
self._debug_log_config[k] = bool(payload[k])
|
||||||
|
changed = True
|
||||||
# Persistent speichern in Shared Volume
|
# Persistent speichern in Shared Volume
|
||||||
if changed:
|
if changed:
|
||||||
try:
|
try:
|
||||||
@@ -1991,6 +2007,7 @@ class ARIABridge:
|
|||||||
config_data["xttsSpeed"] = self._persistent_xtts_speed
|
config_data["xttsSpeed"] = self._persistent_xtts_speed
|
||||||
config_data.update(getattr(self, "_f5tts_config", {}))
|
config_data.update(getattr(self, "_f5tts_config", {}))
|
||||||
config_data.update(getattr(self, "_flux_config", {}))
|
config_data.update(getattr(self, "_flux_config", {}))
|
||||||
|
config_data.update(getattr(self, "_debug_log_config", {}))
|
||||||
with open("/shared/config/voice_config.json", "w") as f:
|
with open("/shared/config/voice_config.json", "w") as f:
|
||||||
json.dump(config_data, f, indent=2)
|
json.dump(config_data, f, indent=2)
|
||||||
logger.info("[rvs] Voice-Config gespeichert: %s", config_data)
|
logger.info("[rvs] Voice-Config gespeichert: %s", config_data)
|
||||||
@@ -2520,6 +2537,59 @@ class ARIABridge:
|
|||||||
future.set_result(text)
|
future.set_result(text)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
elif msg_type == "stt_endpoint":
|
||||||
|
# Phase 2 Brain-Shortcut: die whisper-bridge hat im Streaming-Modus
|
||||||
|
# einen Endpoint erkannt und schickt den finalen Text direkt.
|
||||||
|
# Wir uebernehmen die Rolle die sonst _process_app_audio NACH dem
|
||||||
|
# STT-Schritt hat: STT-Text fuer UI broadcasten + send_to_core.
|
||||||
|
# Kein Audio-Roundtrip mehr — App-Latenz sinkt deutlich.
|
||||||
|
text = (payload.get("text") or "").strip()
|
||||||
|
if not text:
|
||||||
|
logger.info("[rvs] stt_endpoint mit leerem Text — ignoriert (reason=%s)",
|
||||||
|
payload.get("reason", ""))
|
||||||
|
return
|
||||||
|
audio_request_id = payload.get("audioRequestId", "") or ""
|
||||||
|
voice = payload.get("voice", "") or ""
|
||||||
|
speed_raw = payload.get("speed")
|
||||||
|
interrupted = bool(payload.get("interrupted", False))
|
||||||
|
location = payload.get("location") or None
|
||||||
|
|
||||||
|
# Voice-Override fuer Folgenachrichten — gleiche Semantik wie beim
|
||||||
|
# 'audio'-Event. Nur setzen wenn vom App-Stream mitgegeben.
|
||||||
|
if voice:
|
||||||
|
self._next_voice_override = voice or None
|
||||||
|
logger.info("[rvs] Voice fuer Antworten (via stt_endpoint): %s",
|
||||||
|
self._next_voice_override or "(Default)")
|
||||||
|
if speed_raw is not None:
|
||||||
|
try:
|
||||||
|
sp = float(speed_raw)
|
||||||
|
self._next_speed_override = sp if 0.1 <= sp <= 5.0 else None
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
self._next_speed_override = None
|
||||||
|
|
||||||
|
# State-Persist wie bei _process_app_audio
|
||||||
|
self._persist_location(location)
|
||||||
|
self._persist_user_activity()
|
||||||
|
|
||||||
|
logger.info("[rvs] stt_endpoint: '%s' (%dms, reason=%s)%s%s reqId=%s",
|
||||||
|
text[:80],
|
||||||
|
payload.get("sttMs", 0),
|
||||||
|
payload.get("reason", ""),
|
||||||
|
" [BARGE-IN]" if interrupted else "",
|
||||||
|
" [GPS]" if location else "",
|
||||||
|
audio_request_id[:16] if audio_request_id else "?")
|
||||||
|
|
||||||
|
# Idempotenz ueber audioRequestId — falls App den Stream irgendwie
|
||||||
|
# nochmal triggern sollte (Reconnect-Race etc.).
|
||||||
|
client_msg_id = audio_request_id or None
|
||||||
|
if self._is_duplicate_client_msg(client_msg_id):
|
||||||
|
return
|
||||||
|
|
||||||
|
asyncio.create_task(self._process_endpoint_text(
|
||||||
|
text, interrupted, audio_request_id, location,
|
||||||
|
client_msg_id=client_msg_id))
|
||||||
|
return
|
||||||
|
|
||||||
elif msg_type == "oauth_callback":
|
elif msg_type == "oauth_callback":
|
||||||
# RVS hat einen OAuth-Provider-Callback empfangen (z.B. Spotify
|
# RVS hat einen OAuth-Provider-Callback empfangen (z.B. Spotify
|
||||||
# nach User-Authorize) und broadcastet ihn. Wir forwarden an Brain,
|
# nach User-Authorize) und broadcastet ihn. Wir forwarden an Brain,
|
||||||
@@ -2662,6 +2732,44 @@ class ARIABridge:
|
|||||||
else:
|
else:
|
||||||
logger.info("[rvs] Keine Sprache erkannt — ignoriert")
|
logger.info("[rvs] Keine Sprache erkannt — ignoriert")
|
||||||
|
|
||||||
|
async def _process_endpoint_text(self, text: str,
|
||||||
|
interrupted: bool = False,
|
||||||
|
audio_request_id: str = "",
|
||||||
|
location: Optional[dict] = None,
|
||||||
|
client_msg_id: Optional[str] = None) -> None:
|
||||||
|
"""Phase-2 Brain-Shortcut: Streaming-Whisper hat den finalen Text
|
||||||
|
schon ermittelt — wir uebernehmen den Pfad ab broadcast-STT + brain.
|
||||||
|
|
||||||
|
Spiegel-Methode zu _process_app_audio NACH dem STT-Schritt. Bewusst
|
||||||
|
eigene Methode statt Code-Pfade in _process_app_audio aufdroeseln,
|
||||||
|
damit der Legacy-Pfad (App schickt 'audio') unangetastet bleibt.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
stt_payload = {
|
||||||
|
"text": text,
|
||||||
|
"sender": "stt",
|
||||||
|
}
|
||||||
|
if audio_request_id:
|
||||||
|
stt_payload["audioRequestId"] = audio_request_id
|
||||||
|
if location:
|
||||||
|
stt_payload["location"] = location
|
||||||
|
ok = await self._send_to_rvs({
|
||||||
|
"type": "chat",
|
||||||
|
"payload": stt_payload,
|
||||||
|
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
||||||
|
})
|
||||||
|
if ok:
|
||||||
|
logger.info("[rvs] STT-Text (endpoint) broadcastet")
|
||||||
|
else:
|
||||||
|
logger.warning("[rvs] STT-Text (endpoint) NICHT broadcastet")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("[rvs] STT-Text (endpoint) konnte nicht broadcastet werden: %s", e)
|
||||||
|
|
||||||
|
core_text = self._build_core_text(text, interrupted, location)
|
||||||
|
await self.send_to_core(core_text,
|
||||||
|
source="app-voice-stream" + (" [barge-in]" if interrupted else ""),
|
||||||
|
client_msg_id=client_msg_id)
|
||||||
|
|
||||||
async def _stt_remote(self, audio_b64: str, mime_type: str) -> Optional[str]:
|
async def _stt_remote(self, audio_b64: str, mime_type: str) -> Optional[str]:
|
||||||
"""Schickt Audio an die whisper-bridge und wartet auf stt_response.
|
"""Schickt Audio an die whisper-bridge und wartet auf stt_response.
|
||||||
|
|
||||||
|
|||||||
@@ -38,6 +38,10 @@ const ALLOWED_TYPES = new Set([
|
|||||||
"xtts_delete_voice",
|
"xtts_delete_voice",
|
||||||
"voice_preload", "voice_ready",
|
"voice_preload", "voice_ready",
|
||||||
"stt_request", "stt_response",
|
"stt_request", "stt_response",
|
||||||
|
// Streaming-STT (Phase 1+2): App schickt PCM live an whisper-bridge,
|
||||||
|
// die feuert stt_endpoint mit dem finalen Text — kein Audio-Roundtrip.
|
||||||
|
"stt_stream_start", "stt_audio_chunk", "stt_stream_end",
|
||||||
|
"stt_partial", "stt_endpoint", "stt_stream_done",
|
||||||
"service_status",
|
"service_status",
|
||||||
"config_request",
|
"config_request",
|
||||||
"flux_request", "flux_response",
|
"flux_request", "flux_response",
|
||||||
|
|||||||
@@ -375,6 +375,41 @@ async def _send(ws, mtype: str, payload: dict) -> None:
|
|||||||
logger.warning("Send fehlgeschlagen (%s): %s", mtype, e)
|
logger.warning("Send fehlgeschlagen (%s): %s", mtype, e)
|
||||||
|
|
||||||
|
|
||||||
|
# ──────────────────────────────────────────────────────────────
|
||||||
|
# DEBUG-LOG ueber RVS → /shared/logs/app.log
|
||||||
|
#
|
||||||
|
# Gleiches Pattern wie in whisper-bridge: Stefan's Gamebox ist
|
||||||
|
# Windows (kein SSH), in Zukunft koennten whisper + f5tts auf
|
||||||
|
# unterschiedlichen Hosts laufen. Logs ueber RVS heisst: ein Pfad.
|
||||||
|
#
|
||||||
|
# Toggle via aria-bridge config broadcast: f5ttsDebugLog (bool).
|
||||||
|
# ──────────────────────────────────────────────────────────────
|
||||||
|
_DEBUG_LOG_TO_BRIDGE: bool = False # default OFF — TTS-Renders sind teurer
|
||||||
|
# zu debuggen, normalerweise nicht noetig
|
||||||
|
|
||||||
|
|
||||||
|
async def _debug_log(ws, scope: str, message: str, level: str = "info") -> None:
|
||||||
|
"""Schickt einen app_log via RVS → /shared/logs/app.log mit platform='f5tts'.
|
||||||
|
No-op wenn Toggle aus."""
|
||||||
|
if not _DEBUG_LOG_TO_BRIDGE:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
await ws.send(json.dumps({
|
||||||
|
"type": "app_log",
|
||||||
|
"payload": {
|
||||||
|
"ts": int(time.time() * 1000),
|
||||||
|
"platform": "f5tts",
|
||||||
|
"level": level,
|
||||||
|
"scope": scope,
|
||||||
|
"message": str(message)[:2000],
|
||||||
|
"stack": "",
|
||||||
|
},
|
||||||
|
"timestamp": int(time.time() * 1000),
|
||||||
|
}))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
# ── Interne Transkription via whisper-bridge ────────────────
|
# ── Interne Transkription via whisper-bridge ────────────────
|
||||||
|
|
||||||
_pending_stt: dict[str, asyncio.Future] = {}
|
_pending_stt: dict[str, asyncio.Future] = {}
|
||||||
@@ -867,6 +902,30 @@ async def run_loop(runner: F5Runner) -> None:
|
|||||||
else:
|
else:
|
||||||
fut.set_result(payload.get("text") or "")
|
fut.set_result(payload.get("text") or "")
|
||||||
elif mtype == "config":
|
elif mtype == "config":
|
||||||
|
# Debug-Toggle (gleiche Semantik wie in whisper-bridge)
|
||||||
|
if "f5ttsDebugLog" in payload:
|
||||||
|
global _DEBUG_LOG_TO_BRIDGE
|
||||||
|
old = _DEBUG_LOG_TO_BRIDGE
|
||||||
|
_DEBUG_LOG_TO_BRIDGE = bool(payload.get("f5ttsDebugLog", False))
|
||||||
|
if old != _DEBUG_LOG_TO_BRIDGE:
|
||||||
|
logger.info("Debug-Log-to-Bridge: %s", "ON" if _DEBUG_LOG_TO_BRIDGE else "OFF")
|
||||||
|
# Last gasp wenn ausgeschaltet wird
|
||||||
|
if not _DEBUG_LOG_TO_BRIDGE:
|
||||||
|
try:
|
||||||
|
await ws.send(json.dumps({
|
||||||
|
"type": "app_log",
|
||||||
|
"payload": {
|
||||||
|
"ts": int(time.time() * 1000),
|
||||||
|
"platform": "f5tts",
|
||||||
|
"level": "info",
|
||||||
|
"scope": "config",
|
||||||
|
"message": "debug-log OFF (toggle aus)",
|
||||||
|
"stack": "",
|
||||||
|
},
|
||||||
|
"timestamp": int(time.time() * 1000),
|
||||||
|
}))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
# F5-TTS-Settings aktualisieren (Modell, cfg_strength, nfe)
|
# F5-TTS-Settings aktualisieren (Modell, cfg_strength, nfe)
|
||||||
async def _update_with_status(p):
|
async def _update_with_status(p):
|
||||||
# Schaut ob ein Modell-Wechsel ansteht — falls ja:
|
# Schaut ob ein Modell-Wechsel ansteht — falls ja:
|
||||||
|
|||||||
+446
-29
@@ -2,8 +2,19 @@
|
|||||||
"""
|
"""
|
||||||
ARIA Whisper Bridge — laeuft auf der Gamebox (RTX 3060).
|
ARIA Whisper Bridge — laeuft auf der Gamebox (RTX 3060).
|
||||||
|
|
||||||
Empfaengt stt_request via RVS → FFmpeg-Konvertierung → faster-whisper auf GPU
|
Zwei Modi:
|
||||||
→ sendet stt_response zurueck an die aria-bridge.
|
|
||||||
|
1) Legacy One-Shot: stt_request mit komplettem Audio (mp4/wav/ogg base64)
|
||||||
|
→ ffmpeg → faster-whisper → stt_response. Bleibt fuer Fallback/alte App.
|
||||||
|
|
||||||
|
2) Streaming + ML-Endpointer (neu): App schickt live PCM-Chunks waehrend
|
||||||
|
der Aufnahme. Bridge transkribiert alle ~700ms auf dem Ringbuffer und
|
||||||
|
feuert stt_endpoint sobald der Transkript-String N ms nicht mehr
|
||||||
|
waechst. Ersetzt dB/VAD-Stille — endpointet auf SEMANTISCHE Stille,
|
||||||
|
funktioniert im Auto / mit Musik im Hintergrund.
|
||||||
|
|
||||||
|
Erwartetes PCM-Format vom App-Native-Modul: 16 kHz mono s16le (genau
|
||||||
|
das was OpenWakeWord/AudioRecord schon liefert — kein Resampling).
|
||||||
|
|
||||||
Env:
|
Env:
|
||||||
RVS_HOST, RVS_PORT, RVS_TLS, RVS_TLS_FALLBACK, RVS_TOKEN
|
RVS_HOST, RVS_PORT, RVS_TLS, RVS_TLS_FALLBACK, RVS_TOKEN
|
||||||
@@ -21,6 +32,7 @@ import subprocess
|
|||||||
import sys
|
import sys
|
||||||
import tempfile
|
import tempfile
|
||||||
import time
|
import time
|
||||||
|
from dataclasses import dataclass, field
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@@ -47,6 +59,13 @@ WHISPER_LANGUAGE = os.getenv("WHISPER_LANGUAGE", "de")
|
|||||||
|
|
||||||
ALLOWED_MODELS = {"tiny", "base", "small", "medium", "large-v3"}
|
ALLOWED_MODELS = {"tiny", "base", "small", "medium", "large-v3"}
|
||||||
|
|
||||||
|
# Streaming-Parameter (Defaults — koennen pro Session vom App-Payload ueberschrieben werden)
|
||||||
|
STREAM_TRANSCRIBE_INTERVAL_MS = 700 # alle 700ms transkribieren waehrend Stream laeuft
|
||||||
|
STREAM_DEFAULT_ENDPOINT_MS = 1500 # nach 1.5s ohne neuen Text → Endpoint
|
||||||
|
STREAM_DEFAULT_HARD_CAP_MS = 60000 # nach 60s Audio: harter Cut egal was
|
||||||
|
STREAM_MIN_AUDIO_MS = 600 # erst transkribieren wenn min 600ms Audio da
|
||||||
|
STREAM_SESSION_TTL_S = 120 # tote Sessions nach 2 min aufraeumen
|
||||||
|
|
||||||
|
|
||||||
class WhisperRunner:
|
class WhisperRunner:
|
||||||
"""Haelt das Whisper-Modell. Hot-Swap bei Konfig-Wechsel via ensure_loaded()."""
|
"""Haelt das Whisper-Modell. Hot-Swap bei Konfig-Wechsel via ensure_loaded()."""
|
||||||
@@ -55,6 +74,9 @@ class WhisperRunner:
|
|||||||
self.model_size: str = WHISPER_MODEL
|
self.model_size: str = WHISPER_MODEL
|
||||||
self.model: Optional[WhisperModel] = None
|
self.model: Optional[WhisperModel] = None
|
||||||
self._lock = asyncio.Lock()
|
self._lock = asyncio.Lock()
|
||||||
|
# Serialisiert transcribe()-Calls — faster-whisper ist nicht
|
||||||
|
# parallel-safe auf einer GPU-Instanz, plus VRAM-Fragmentierung.
|
||||||
|
self._transcribe_lock = asyncio.Lock()
|
||||||
|
|
||||||
def _load_blocking(self, size: str) -> None:
|
def _load_blocking(self, size: str) -> None:
|
||||||
logger.info(
|
logger.info(
|
||||||
@@ -78,19 +100,21 @@ class WhisperRunner:
|
|||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
await loop.run_in_executor(None, self._load_blocking, desired_size)
|
await loop.run_in_executor(None, self._load_blocking, desired_size)
|
||||||
|
|
||||||
async def transcribe(self, audio: np.ndarray, language: str) -> tuple[str, float]:
|
async def transcribe(self, audio: np.ndarray, language: str,
|
||||||
|
beam_size: int = 5, vad_filter: bool = True) -> tuple[str, float]:
|
||||||
if self.model is None:
|
if self.model is None:
|
||||||
return "", 0.0
|
return "", 0.0
|
||||||
|
|
||||||
def _run():
|
def _run():
|
||||||
segments, info = self.model.transcribe(
|
segments, info = self.model.transcribe(
|
||||||
audio, language=language, beam_size=5, vad_filter=True,
|
audio, language=language, beam_size=beam_size, vad_filter=vad_filter,
|
||||||
)
|
)
|
||||||
text = " ".join(seg.text.strip() for seg in segments)
|
text = " ".join(seg.text.strip() for seg in segments)
|
||||||
return text, info.duration
|
return text, info.duration
|
||||||
|
|
||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
return await loop.run_in_executor(None, _run)
|
async with self._transcribe_lock:
|
||||||
|
return await loop.run_in_executor(None, _run)
|
||||||
|
|
||||||
|
|
||||||
def ffmpeg_to_float32(audio_b64: str, mime_type: str) -> np.ndarray:
|
def ffmpeg_to_float32(audio_b64: str, mime_type: str) -> np.ndarray:
|
||||||
@@ -128,6 +152,14 @@ def ffmpeg_to_float32(audio_b64: str, mime_type: str) -> np.ndarray:
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def pcm_s16le_to_float32(pcm_bytes: bytes) -> np.ndarray:
|
||||||
|
"""16-bit signed little-endian PCM → float32 in [-1, 1]. Whisper-Format."""
|
||||||
|
if not pcm_bytes:
|
||||||
|
return np.zeros(0, dtype=np.float32)
|
||||||
|
arr = np.frombuffer(pcm_bytes, dtype=np.int16).astype(np.float32) / 32768.0
|
||||||
|
return arr
|
||||||
|
|
||||||
|
|
||||||
async def _send(ws, mtype: str, payload: dict) -> None:
|
async def _send(ws, mtype: str, payload: dict) -> None:
|
||||||
try:
|
try:
|
||||||
await ws.send(json.dumps({
|
await ws.send(json.dumps({
|
||||||
@@ -139,14 +171,326 @@ async def _send(ws, mtype: str, payload: dict) -> None:
|
|||||||
logger.warning("Send fehlgeschlagen (%s): %s", mtype, e)
|
logger.warning("Send fehlgeschlagen (%s): %s", mtype, e)
|
||||||
|
|
||||||
|
|
||||||
|
# ──────────────────────────────────────────────────────────────
|
||||||
|
# DEBUG-LOG ueber RVS → /shared/logs/app.log
|
||||||
|
#
|
||||||
|
# Stefan's Gamebox ist Windows, kein SSH → wir brauchen Whisper-Bridge-
|
||||||
|
# Logs ueber den gleichen Pfad wie die App: app_log-Messages via RVS,
|
||||||
|
# aria-bridge schreibt sie in /shared/logs/app.log. Diagnostic / App-
|
||||||
|
# Logs-Tab zeigen sie dann mit platform="whisper".
|
||||||
|
#
|
||||||
|
# Toggle via aria-bridge config broadcast: whisperDebugLog (bool).
|
||||||
|
# Default ON solange wir Phase-1/2-Pipeline einfahren — danach
|
||||||
|
# defaultet aria-bridge ihn aus damit kein Spam.
|
||||||
|
# ──────────────────────────────────────────────────────────────
|
||||||
|
_DEBUG_LOG_TO_BRIDGE: bool = True
|
||||||
|
|
||||||
|
|
||||||
|
async def _debug_log(ws, scope: str, message: str, level: str = "info") -> None:
|
||||||
|
"""Schickt einen app_log via RVS → landet in /shared/logs/app.log mit
|
||||||
|
platform='whisper'. Idempotent: wenn Toggle aus → no-op."""
|
||||||
|
if not _DEBUG_LOG_TO_BRIDGE:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
await ws.send(json.dumps({
|
||||||
|
"type": "app_log",
|
||||||
|
"payload": {
|
||||||
|
"ts": int(time.time() * 1000),
|
||||||
|
"platform": "whisper",
|
||||||
|
"level": level,
|
||||||
|
"scope": scope,
|
||||||
|
"message": str(message)[:2000],
|
||||||
|
"stack": "",
|
||||||
|
},
|
||||||
|
"timestamp": int(time.time() * 1000),
|
||||||
|
}))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
# ──────────────────────────────────────────────────────────────
|
||||||
|
# STREAMING-SESSIONS
|
||||||
|
# ──────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class StreamSession:
|
||||||
|
"""State pro laufendem Streaming-STT-Request."""
|
||||||
|
request_id: str
|
||||||
|
audio_request_id: str
|
||||||
|
language: str
|
||||||
|
model: str
|
||||||
|
endpoint_ms: int
|
||||||
|
hard_cap_ms: int
|
||||||
|
voice: str = "" # echoed back via stt_endpoint fuer ChatScreen → TTS-Override
|
||||||
|
speed: float = 1.0
|
||||||
|
interrupted: bool = False # Barge-In
|
||||||
|
location: Optional[dict] = None
|
||||||
|
sample_rate: int = 16000
|
||||||
|
pcm_buffer: bytearray = field(default_factory=bytearray)
|
||||||
|
started_at: float = field(default_factory=time.time)
|
||||||
|
last_chunk_at: float = field(default_factory=time.time)
|
||||||
|
last_partial: str = ""
|
||||||
|
last_growth_at: float = 0.0
|
||||||
|
last_transcribe_at: float = 0.0
|
||||||
|
closed: bool = False # nach stream_end gesetzt
|
||||||
|
endpoint_sent: bool = False # Endpoint nur einmal feuern
|
||||||
|
|
||||||
|
|
||||||
|
class SessionManager:
|
||||||
|
"""Haelt alle aktiven Streaming-Sessions + Endpointer-Loop."""
|
||||||
|
|
||||||
|
def __init__(self, runner: WhisperRunner) -> None:
|
||||||
|
self.runner = runner
|
||||||
|
self._sessions: dict[str, StreamSession] = {}
|
||||||
|
self._ws = None # wird vom run_loop gesetzt
|
||||||
|
self._loop_task: Optional[asyncio.Task] = None
|
||||||
|
|
||||||
|
def attach_ws(self, ws) -> None:
|
||||||
|
self._ws = ws
|
||||||
|
|
||||||
|
def detach_ws(self) -> None:
|
||||||
|
self._ws = None
|
||||||
|
# Sessions ueberleben Disconnect — der naechste Reconnect kann sie weiter
|
||||||
|
# fuettern, falls die App das gleiche requestId nochmal schickt.
|
||||||
|
# Aber unsere App startet nach Reconnect eine neue Aufnahme; alte Sessions
|
||||||
|
# werden vom Cleanup-Task entsorgt nach STREAM_SESSION_TTL_S.
|
||||||
|
|
||||||
|
def start_session(self, payload: dict) -> Optional[StreamSession]:
|
||||||
|
request_id = payload.get("requestId", "").strip()
|
||||||
|
if not request_id:
|
||||||
|
logger.warning("stt_stream_start ohne requestId — ignoriert")
|
||||||
|
return None
|
||||||
|
if request_id in self._sessions:
|
||||||
|
logger.warning("stt_stream_start: requestId %s schon aktiv — alte Session wird ersetzt",
|
||||||
|
request_id[:8])
|
||||||
|
try:
|
||||||
|
endpoint_ms = int(payload.get("endpointMs") or STREAM_DEFAULT_ENDPOINT_MS)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
endpoint_ms = STREAM_DEFAULT_ENDPOINT_MS
|
||||||
|
try:
|
||||||
|
hard_cap_ms = int(payload.get("hardCapMs") or STREAM_DEFAULT_HARD_CAP_MS)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
hard_cap_ms = STREAM_DEFAULT_HARD_CAP_MS
|
||||||
|
try:
|
||||||
|
speed = float(payload.get("speed") or 1.0)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
speed = 1.0
|
||||||
|
session = StreamSession(
|
||||||
|
request_id=request_id,
|
||||||
|
audio_request_id=payload.get("audioRequestId", "") or "",
|
||||||
|
language=payload.get("language") or WHISPER_LANGUAGE,
|
||||||
|
model=payload.get("model") or self.runner.model_size or WHISPER_MODEL,
|
||||||
|
endpoint_ms=endpoint_ms,
|
||||||
|
hard_cap_ms=hard_cap_ms,
|
||||||
|
voice=payload.get("voice", "") or "",
|
||||||
|
speed=speed,
|
||||||
|
interrupted=bool(payload.get("interrupted", False)),
|
||||||
|
location=payload.get("location") or None,
|
||||||
|
sample_rate=int(payload.get("sampleRate") or 16000),
|
||||||
|
)
|
||||||
|
self._sessions[request_id] = session
|
||||||
|
logger.info("Stream-Session offen: id=%s lang=%s model=%s endpointMs=%d hardCapMs=%d voice=%r",
|
||||||
|
request_id[:8], session.language, session.model,
|
||||||
|
session.endpoint_ms, session.hard_cap_ms, session.voice or "(default)")
|
||||||
|
return session
|
||||||
|
|
||||||
|
def feed_chunk(self, payload: dict) -> bool:
|
||||||
|
request_id = payload.get("requestId", "")
|
||||||
|
session = self._sessions.get(request_id)
|
||||||
|
if session is None or session.closed:
|
||||||
|
return False
|
||||||
|
pcm_b64 = payload.get("pcm", "")
|
||||||
|
if not pcm_b64:
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
pcm_bytes = base64.b64decode(pcm_b64)
|
||||||
|
except Exception:
|
||||||
|
logger.warning("Stream %s: ungueltige base64-PCM-Daten", request_id[:8])
|
||||||
|
return False
|
||||||
|
session.pcm_buffer.extend(pcm_bytes)
|
||||||
|
session.last_chunk_at = time.time()
|
||||||
|
return True
|
||||||
|
|
||||||
|
def end_session(self, request_id: str) -> Optional[StreamSession]:
|
||||||
|
"""Markiert Session als geschlossen. Der Endpointer-Loop macht das
|
||||||
|
Final-Transcribe + Cleanup."""
|
||||||
|
session = self._sessions.get(request_id)
|
||||||
|
if session is None:
|
||||||
|
return None
|
||||||
|
session.closed = True
|
||||||
|
return session
|
||||||
|
|
||||||
|
def drop(self, request_id: str) -> None:
|
||||||
|
self._sessions.pop(request_id, None)
|
||||||
|
|
||||||
|
async def run_endpointer(self) -> None:
|
||||||
|
"""Background-Loop: alle ~200ms ueber alle Sessions iterieren."""
|
||||||
|
logger.info("Endpointer-Loop gestartet (transcribe-interval=%dms, default-endpoint=%dms)",
|
||||||
|
STREAM_TRANSCRIBE_INTERVAL_MS, STREAM_DEFAULT_ENDPOINT_MS)
|
||||||
|
while True:
|
||||||
|
await asyncio.sleep(0.2)
|
||||||
|
now = time.time()
|
||||||
|
# Snapshot — sonst RuntimeError wenn wir waehrend Iteration sessions[]
|
||||||
|
# mutieren (Endpoint-Drop).
|
||||||
|
for sid, sess in list(self._sessions.items()):
|
||||||
|
try:
|
||||||
|
await self._tick_session(sess, now)
|
||||||
|
except Exception:
|
||||||
|
logger.exception("Endpointer-Tick crashed (session=%s)", sid[:8])
|
||||||
|
|
||||||
|
# Cleanup: tote Sessions (ohne Chunk seit STREAM_SESSION_TTL_S)
|
||||||
|
for sid, sess in list(self._sessions.items()):
|
||||||
|
if now - sess.last_chunk_at > STREAM_SESSION_TTL_S:
|
||||||
|
logger.info("Stream %s: TTL ueberschritten (ohne Daten seit %.0fs) — drop",
|
||||||
|
sid[:8], now - sess.last_chunk_at)
|
||||||
|
self.drop(sid)
|
||||||
|
|
||||||
|
async def _tick_session(self, sess: StreamSession, now: float) -> None:
|
||||||
|
ws = self._ws
|
||||||
|
if ws is None:
|
||||||
|
return # disconnected — Endpointer pausiert bis Reconnect
|
||||||
|
|
||||||
|
audio_ms = self._buffer_duration_ms(sess)
|
||||||
|
|
||||||
|
# Hard-Cap erreicht → wie Endpoint behandeln (egal ob neuer Text)
|
||||||
|
elapsed_ms = (now - sess.started_at) * 1000.0
|
||||||
|
if elapsed_ms > sess.hard_cap_ms and not sess.endpoint_sent and not sess.closed:
|
||||||
|
logger.info("Stream %s: HardCap %dms erreicht — forciere Endpoint",
|
||||||
|
sess.request_id[:8], sess.hard_cap_ms)
|
||||||
|
await self._finalize(sess, ws, reason="hardcap")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Closed (stream_end empfangen) → finalisieren mit dem gesammelten Buffer
|
||||||
|
if sess.closed and not sess.endpoint_sent:
|
||||||
|
await self._finalize(sess, ws, reason="stream_end")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Noch zu wenig Audio fuer eine erste Transkription
|
||||||
|
if audio_ms < STREAM_MIN_AUDIO_MS:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Transcribe-Throttling
|
||||||
|
since_last = (now - sess.last_transcribe_at) * 1000.0
|
||||||
|
if since_last < STREAM_TRANSCRIBE_INTERVAL_MS:
|
||||||
|
return
|
||||||
|
|
||||||
|
sess.last_transcribe_at = now
|
||||||
|
try:
|
||||||
|
audio = pcm_s16le_to_float32(bytes(sess.pcm_buffer))
|
||||||
|
except Exception:
|
||||||
|
logger.exception("Stream %s: PCM-Decode fehlgeschlagen", sess.request_id[:8])
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Kleinere beam_size fuer Streaming-Partials — wir wollen Latenz,
|
||||||
|
# nicht maximale Genauigkeit. Final-Transcribe (in _finalize) faehrt
|
||||||
|
# dann mit beam_size=5.
|
||||||
|
text, _dur = await self.runner.transcribe(audio, sess.language, beam_size=1, vad_filter=True)
|
||||||
|
except Exception:
|
||||||
|
logger.exception("Stream %s: Partial-Transcribe crashed", sess.request_id[:8])
|
||||||
|
return
|
||||||
|
|
||||||
|
text = text.strip()
|
||||||
|
grew = bool(text) and text != sess.last_partial
|
||||||
|
if grew:
|
||||||
|
sess.last_partial = text
|
||||||
|
sess.last_growth_at = now
|
||||||
|
# Optional: stt_partial broadcasten fuer UI-Feedback. Wir schicken's
|
||||||
|
# mit damit Diagnostic / ChatScreen Live-Text zeigen kann.
|
||||||
|
await _send(ws, "stt_partial", {
|
||||||
|
"requestId": sess.request_id,
|
||||||
|
"audioRequestId": sess.audio_request_id,
|
||||||
|
"text": text,
|
||||||
|
})
|
||||||
|
await _debug_log(ws, "stream.partial",
|
||||||
|
f"id={sess.request_id[:12]} text={text[:80]!r}")
|
||||||
|
else:
|
||||||
|
# Stagnation pruefen — Endpoint-Bedingung
|
||||||
|
if sess.last_growth_at == 0.0:
|
||||||
|
# Noch gar kein Text erkannt. Wenn der User gar nichts sagt
|
||||||
|
# springt Brain irgendwann aus eigenem Conversation-Window-
|
||||||
|
# Timeout in der App raus; wir machen hier nix.
|
||||||
|
return
|
||||||
|
silence_ms = (now - sess.last_growth_at) * 1000.0
|
||||||
|
if silence_ms >= sess.endpoint_ms and not sess.endpoint_sent:
|
||||||
|
logger.info("Stream %s: Endpoint nach %dms ohne neuen Text — Text=%r",
|
||||||
|
sess.request_id[:8], int(silence_ms), sess.last_partial[:80])
|
||||||
|
await self._finalize(sess, ws, reason="endpoint")
|
||||||
|
|
||||||
|
def _buffer_duration_ms(self, sess: StreamSession) -> float:
|
||||||
|
# 16-bit s16le mono → 2 bytes pro Sample
|
||||||
|
samples = len(sess.pcm_buffer) // 2
|
||||||
|
if samples == 0:
|
||||||
|
return 0.0
|
||||||
|
return (samples / sess.sample_rate) * 1000.0
|
||||||
|
|
||||||
|
async def _finalize(self, sess: StreamSession, ws, reason: str) -> None:
|
||||||
|
"""Endgueltige Transkription auf dem vollen Buffer (beam_size=5),
|
||||||
|
feuert stt_endpoint + stt_stream_done, droppt Session."""
|
||||||
|
if sess.endpoint_sent:
|
||||||
|
return
|
||||||
|
sess.endpoint_sent = True
|
||||||
|
audio = pcm_s16le_to_float32(bytes(sess.pcm_buffer))
|
||||||
|
if audio.size == 0:
|
||||||
|
logger.info("Stream %s: leere Audio-Daten — final text leer", sess.request_id[:8])
|
||||||
|
final_text = ""
|
||||||
|
stt_ms = 0
|
||||||
|
duration_s = 0.0
|
||||||
|
else:
|
||||||
|
t0 = time.time()
|
||||||
|
try:
|
||||||
|
final_text, _dur = await self.runner.transcribe(audio, sess.language, beam_size=5, vad_filter=True)
|
||||||
|
except Exception:
|
||||||
|
logger.exception("Stream %s: Final-Transcribe crashed", sess.request_id[:8])
|
||||||
|
final_text = sess.last_partial # fallback auf letzten Partial
|
||||||
|
stt_ms = int((time.time() - t0) * 1000)
|
||||||
|
duration_s = audio.size / 16000.0
|
||||||
|
final_text = final_text.strip()
|
||||||
|
|
||||||
|
logger.info("Stream %s: FINAL (reason=%s, %.1fs Audio, %dms): %r",
|
||||||
|
sess.request_id[:8], reason, duration_s, stt_ms, final_text[:120])
|
||||||
|
await _debug_log(ws, "stream.final",
|
||||||
|
f"id={sess.request_id[:12]} reason={reason} "
|
||||||
|
f"audio={duration_s:.1f}s stt={stt_ms}ms text={final_text[:80]!r}")
|
||||||
|
|
||||||
|
# stt_endpoint: das ist DAS Event auf das aria-bridge horcht fuer den
|
||||||
|
# Brain-Shortcut. Enthaelt alle Felder die bisher in 'audio' lagen,
|
||||||
|
# ohne den Audio-Roundtrip (App → aria-bridge → whisper → aria-bridge).
|
||||||
|
endpoint_payload = {
|
||||||
|
"requestId": sess.request_id,
|
||||||
|
"audioRequestId": sess.audio_request_id,
|
||||||
|
"text": final_text,
|
||||||
|
"reason": reason,
|
||||||
|
"durationS": duration_s,
|
||||||
|
"sttMs": stt_ms,
|
||||||
|
"voice": sess.voice,
|
||||||
|
"speed": sess.speed,
|
||||||
|
"interrupted": sess.interrupted,
|
||||||
|
}
|
||||||
|
if sess.location:
|
||||||
|
endpoint_payload["location"] = sess.location
|
||||||
|
await _send(ws, "stt_endpoint", endpoint_payload)
|
||||||
|
|
||||||
|
# stt_stream_done: an die App — damit sie ihre Recording-State-Machine
|
||||||
|
# zurueck auf armed setzt (Mikro aus, ggf. Wake-Word wieder an).
|
||||||
|
await _send(ws, "stt_stream_done", {
|
||||||
|
"requestId": sess.request_id,
|
||||||
|
"audioRequestId": sess.audio_request_id,
|
||||||
|
"text": final_text,
|
||||||
|
"reason": reason,
|
||||||
|
})
|
||||||
|
|
||||||
|
self.drop(sess.request_id)
|
||||||
|
|
||||||
|
|
||||||
|
# ──────────────────────────────────────────────────────────────
|
||||||
|
# LEGACY ONE-SHOT (unveraendert)
|
||||||
|
# ──────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
async def handle_stt_request(ws, payload: dict, runner: WhisperRunner) -> None:
|
async def handle_stt_request(ws, payload: dict, runner: WhisperRunner) -> None:
|
||||||
request_id = payload.get("requestId", "")
|
request_id = payload.get("requestId", "")
|
||||||
audio_b64 = payload.get("audio", "")
|
audio_b64 = payload.get("audio", "")
|
||||||
mime_type = payload.get("mimeType", "audio/mp4")
|
mime_type = payload.get("mimeType", "audio/mp4")
|
||||||
# Modell-Auswahl:
|
|
||||||
# payload.model gesetzt → nimm das (aria-bridge sendet's basierend auf Config)
|
|
||||||
# sonst + Modell geladen → behalt das aktuelle (kein sinnloser Swap)
|
|
||||||
# sonst → fallback auf ENV-Default
|
|
||||||
model = payload.get("model") or (runner.model_size if runner.model is not None else WHISPER_MODEL)
|
model = payload.get("model") or (runner.model_size if runner.model is not None else WHISPER_MODEL)
|
||||||
language = payload.get("language") or WHISPER_LANGUAGE
|
language = payload.get("language") or WHISPER_LANGUAGE
|
||||||
|
|
||||||
@@ -156,8 +500,6 @@ async def handle_stt_request(ws, payload: dict, runner: WhisperRunner) -> None:
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
t_load = time.time()
|
t_load = time.time()
|
||||||
# Falls Modell noch nicht geladen (Race-Condition: stt_request vor config)
|
|
||||||
# → Status-Broadcast loading→ready damit der App-Banner aufpoppt
|
|
||||||
needs_load = runner.model is None or runner.model_size != model
|
needs_load = runner.model is None or runner.model_size != model
|
||||||
if needs_load:
|
if needs_load:
|
||||||
await _broadcast_status(ws, "loading", model=model)
|
await _broadcast_status(ws, "loading", model=model)
|
||||||
@@ -205,7 +547,11 @@ async def _broadcast_status(ws, state: str, **extra) -> None:
|
|||||||
await _send(ws, "service_status", payload)
|
await _send(ws, "service_status", payload)
|
||||||
|
|
||||||
|
|
||||||
async def run_loop(runner: WhisperRunner) -> None:
|
# ──────────────────────────────────────────────────────────────
|
||||||
|
# WS-LOOP
|
||||||
|
# ──────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
async def run_loop(runner: WhisperRunner, sessions: SessionManager) -> None:
|
||||||
use_tls = RVS_TLS
|
use_tls = RVS_TLS
|
||||||
retry_s = 2
|
retry_s = 2
|
||||||
tls_fallback_tried = False
|
tls_fallback_tried = False
|
||||||
@@ -216,20 +562,12 @@ async def run_loop(runner: WhisperRunner) -> None:
|
|||||||
masked = url.replace(RVS_TOKEN, "***") if RVS_TOKEN else url
|
masked = url.replace(RVS_TOKEN, "***") if RVS_TOKEN else url
|
||||||
try:
|
try:
|
||||||
logger.info("Verbinde zu RVS: %s", masked)
|
logger.info("Verbinde zu RVS: %s", masked)
|
||||||
# max_size 50MB damit grosse stt_request (Voice-Cloning-WAVs als
|
|
||||||
# base64 koennen mehrere MB werden) nicht das Frame-Limit sprengen
|
|
||||||
# und die Verbindung mit 1009 'message too big' killen.
|
|
||||||
async with websockets.connect(url, ping_interval=20, ping_timeout=10, max_size=50 * 1024 * 1024) as ws:
|
async with websockets.connect(url, ping_interval=20, ping_timeout=10, max_size=50 * 1024 * 1024) as ws:
|
||||||
logger.info("RVS verbunden")
|
logger.info("RVS verbunden")
|
||||||
retry_s = 2
|
retry_s = 2
|
||||||
tls_fallback_tried = False
|
tls_fallback_tried = False
|
||||||
|
sessions.attach_ws(ws)
|
||||||
|
|
||||||
# Initialer Status-Broadcast — uebertont alten "ready"-State
|
|
||||||
# im App/Diagnostic Banner (sonst denkt der User noch alles ist
|
|
||||||
# gut von vorher). Wenn Modell schon geladen → ready, sonst
|
|
||||||
# loading mit aktuellem (Default-)Namen.
|
|
||||||
# Plus: config_request an aria-bridge — wir wissen nicht ob
|
|
||||||
# sie auch grad reconnected hat oder schon laenger online ist.
|
|
||||||
async def _initial_handshake():
|
async def _initial_handshake():
|
||||||
try:
|
try:
|
||||||
if runner.model is not None:
|
if runner.model is not None:
|
||||||
@@ -241,6 +579,11 @@ async def run_loop(runner: WhisperRunner) -> None:
|
|||||||
await _broadcast_status(ws, "loading", model=init_model)
|
await _broadcast_status(ws, "loading", model=init_model)
|
||||||
logger.info("Initial: sende config_request an aria-bridge")
|
logger.info("Initial: sende config_request an aria-bridge")
|
||||||
await _send(ws, "config_request", {"service": "whisper"})
|
await _send(ws, "config_request", {"service": "whisper"})
|
||||||
|
# Startup-Marker — App-Logs zeigen damit ob Streaming-Code
|
||||||
|
# ueberhaupt aktiv ist (Stefan baut auf Gamebox via PS,
|
||||||
|
# Build/Restart kann unbeabsichtigt alte Version weiterfahren).
|
||||||
|
await _debug_log(ws, "boot",
|
||||||
|
"whisper-bridge online — streaming-mode ENABLED, debug-log ON")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.exception("Initial-Handshake crashed: %s", e)
|
logger.exception("Initial-Handshake crashed: %s", e)
|
||||||
asyncio.create_task(_initial_handshake())
|
asyncio.create_task(_initial_handshake())
|
||||||
@@ -259,9 +602,84 @@ async def run_loop(runner: WhisperRunner) -> None:
|
|||||||
logger.info("stt_request empfangen (id=%s, %dKB Audio)",
|
logger.info("stt_request empfangen (id=%s, %dKB Audio)",
|
||||||
req_id[:8] if req_id != "?" else "?", audio_len // 1365)
|
req_id[:8] if req_id != "?" else "?", audio_len // 1365)
|
||||||
asyncio.create_task(handle_stt_request(ws, payload, runner))
|
asyncio.create_task(handle_stt_request(ws, payload, runner))
|
||||||
|
|
||||||
|
elif mtype == "stt_stream_start":
|
||||||
|
await _debug_log(ws, "stream.start",
|
||||||
|
f"received id={payload.get('requestId', '?')[:12]} "
|
||||||
|
f"audioReqId={payload.get('audioRequestId', '?')[:16]} "
|
||||||
|
f"endpointMs={payload.get('endpointMs')} "
|
||||||
|
f"hardCapMs={payload.get('hardCapMs')}")
|
||||||
|
# Ggf. Modell sicherstellen — sonst antwortet der erste
|
||||||
|
# transcribe-Call mit Leerstring weil Model None.
|
||||||
|
target_model = payload.get("model") or runner.model_size or WHISPER_MODEL
|
||||||
|
needs_load = (runner.model is None) or (target_model != runner.model_size)
|
||||||
|
if needs_load:
|
||||||
|
async def _load_then_start(p, target):
|
||||||
|
await _broadcast_status(ws, "loading", model=target)
|
||||||
|
try:
|
||||||
|
await runner.ensure_loaded(target)
|
||||||
|
await _broadcast_status(ws, "ready", model=runner.model_size)
|
||||||
|
except Exception as e:
|
||||||
|
await _broadcast_status(ws, "error", error=str(e)[:200])
|
||||||
|
return
|
||||||
|
sessions.start_session(p)
|
||||||
|
asyncio.create_task(_load_then_start(payload, target_model))
|
||||||
|
else:
|
||||||
|
sessions.start_session(payload)
|
||||||
|
|
||||||
|
elif mtype == "stt_audio_chunk":
|
||||||
|
ok = sessions.feed_chunk(payload)
|
||||||
|
if not ok:
|
||||||
|
# Sehr verbose im Schlimmstfall — debug-Level reicht.
|
||||||
|
logger.debug("stt_audio_chunk: unbekannte/closed session %s",
|
||||||
|
payload.get("requestId", "")[:8])
|
||||||
|
await _debug_log(ws, "stream.chunk.reject",
|
||||||
|
f"unknown/closed session id={payload.get('requestId', '?')[:12]}",
|
||||||
|
level="warn")
|
||||||
|
else:
|
||||||
|
# Nur alle 25 Chunks loggen (=5s Audio) — sonst Spam.
|
||||||
|
try:
|
||||||
|
seq = int(payload.get("seq", 0) or 0)
|
||||||
|
if seq % 25 == 0:
|
||||||
|
await _debug_log(ws, "stream.chunk",
|
||||||
|
f"id={payload.get('requestId', '?')[:12]} seq={seq}")
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
elif mtype == "stt_stream_end":
|
||||||
|
req_id = payload.get("requestId", "")
|
||||||
|
logger.info("stt_stream_end empfangen: id=%s reason=%s",
|
||||||
|
req_id[:8], payload.get("reason", ""))
|
||||||
|
await _debug_log(ws, "stream.end",
|
||||||
|
f"received id={req_id[:12]} reason={payload.get('reason', '')}")
|
||||||
|
sessions.end_session(req_id)
|
||||||
|
|
||||||
elif mtype == "config":
|
elif mtype == "config":
|
||||||
|
# Debug-Toggle: aria-bridge broadcastet jetzt whisperDebugLog
|
||||||
|
# damit Stefan im laufenden Betrieb via Diagnostic-Settings
|
||||||
|
# die Logs an/aus schalten kann.
|
||||||
|
if "whisperDebugLog" in payload:
|
||||||
|
global _DEBUG_LOG_TO_BRIDGE
|
||||||
|
old = _DEBUG_LOG_TO_BRIDGE
|
||||||
|
_DEBUG_LOG_TO_BRIDGE = bool(payload.get("whisperDebugLog", False))
|
||||||
|
if old != _DEBUG_LOG_TO_BRIDGE:
|
||||||
|
logger.info("Debug-Log-to-Bridge: %s", "ON" if _DEBUG_LOG_TO_BRIDGE else "OFF")
|
||||||
|
# Last gasp wenn ausgeschaltet wird damit Stefan im Log sieht
|
||||||
|
# dass der Toggle griff.
|
||||||
|
if not _DEBUG_LOG_TO_BRIDGE:
|
||||||
|
await ws.send(json.dumps({
|
||||||
|
"type": "app_log",
|
||||||
|
"payload": {
|
||||||
|
"ts": int(time.time() * 1000),
|
||||||
|
"platform": "whisper",
|
||||||
|
"level": "info",
|
||||||
|
"scope": "config",
|
||||||
|
"message": "debug-log OFF (toggle aus)",
|
||||||
|
"stack": "",
|
||||||
|
},
|
||||||
|
"timestamp": int(time.time() * 1000),
|
||||||
|
}))
|
||||||
new_model = payload.get("whisperModel") or WHISPER_MODEL
|
new_model = payload.get("whisperModel") or WHISPER_MODEL
|
||||||
# Laden wenn (a) noch nix geladen, oder (b) Modell wechselt
|
|
||||||
needs_load = (runner.model is None) or (new_model != runner.model_size)
|
needs_load = (runner.model is None) or (new_model != runner.model_size)
|
||||||
if needs_load:
|
if needs_load:
|
||||||
logger.info("Config-Broadcast: Whisper-Modell -> %s%s",
|
logger.info("Config-Broadcast: Whisper-Modell -> %s%s",
|
||||||
@@ -280,11 +698,10 @@ async def run_loop(runner: WhisperRunner) -> None:
|
|||||||
await _broadcast_status(ws, "error", error=str(e)[:200])
|
await _broadcast_status(ws, "error", error=str(e)[:200])
|
||||||
asyncio.create_task(_swap_with_status(new_model))
|
asyncio.create_task(_swap_with_status(new_model))
|
||||||
else:
|
else:
|
||||||
# Alle anderen Nachrichten debug-loggen — hilft beim Diagnostizieren,
|
|
||||||
# ob stt_request ueberhaupt durch den RVS kommt
|
|
||||||
logger.debug("Unbeachteter Type: %s", mtype)
|
logger.debug("Unbeachteter Type: %s", mtype)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning("Verbindung verloren: %s", e)
|
logger.warning("Verbindung verloren: %s", e)
|
||||||
|
sessions.detach_ws()
|
||||||
if use_tls and RVS_TLS_FALLBACK and not tls_fallback_tried:
|
if use_tls and RVS_TLS_FALLBACK and not tls_fallback_tried:
|
||||||
logger.info("TLS-Verbindung fehlgeschlagen — Fallback auf ws://")
|
logger.info("TLS-Verbindung fehlgeschlagen — Fallback auf ws://")
|
||||||
use_tls = False
|
use_tls = False
|
||||||
@@ -292,10 +709,6 @@ async def run_loop(runner: WhisperRunner) -> None:
|
|||||||
continue
|
continue
|
||||||
await asyncio.sleep(min(retry_s, 30))
|
await asyncio.sleep(min(retry_s, 30))
|
||||||
retry_s = min(retry_s * 2, 30)
|
retry_s = min(retry_s * 2, 30)
|
||||||
# Sticky-Fallback verhindern: nach jedem Disconnect-Cycle wieder
|
|
||||||
# mit wss anfangen. Sonst klebt der Client nach einem temporaeren
|
|
||||||
# TLS-Hick auf ws:// fest und kommt nie mehr auf wss zurueck —
|
|
||||||
# genau das Problem das die App + Bridge frueher schon hatten.
|
|
||||||
use_tls = RVS_TLS
|
use_tls = RVS_TLS
|
||||||
tls_fallback_tried = False
|
tls_fallback_tried = False
|
||||||
|
|
||||||
@@ -305,7 +718,11 @@ async def main() -> None:
|
|||||||
logger.error("RVS_HOST ist nicht gesetzt — Abbruch")
|
logger.error("RVS_HOST ist nicht gesetzt — Abbruch")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
runner = WhisperRunner()
|
runner = WhisperRunner()
|
||||||
await run_loop(runner)
|
sessions = SessionManager(runner)
|
||||||
|
# Endpointer-Loop nebenbei laufen lassen — er pruefst _ws is None und
|
||||||
|
# schlaeft solange das nicht gesetzt ist.
|
||||||
|
asyncio.create_task(sessions.run_endpointer())
|
||||||
|
await run_loop(runner, sessions)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
Reference in New Issue
Block a user