Compare commits
9 Commits
| Author | SHA1 | Date |
|---|---|---|
|
|
b1ccf29295 | |
|
|
4cd9faece2 | |
|
|
fec8aa977b | |
|
|
20123de827 | |
|
|
8761d1a1b7 | |
|
|
abc5b971f4 | |
|
|
b588dd7e3b | |
|
|
309df9d851 | |
|
|
f2e643d1fb |
|
|
@ -406,10 +406,11 @@ mit ONNX Runtime — kein API-Key, kein Cloud-Roundtrip, kein Cent Lizenzgebuehr
|
||||||
und das Audio verlaesst das Geraet nie.
|
und das Audio verlaesst das Geraet nie.
|
||||||
|
|
||||||
**Mitgelieferte Wake-Words** (ONNX-Dateien in `android/android/app/src/main/assets/openwakeword/`):
|
**Mitgelieferte Wake-Words** (ONNX-Dateien in `android/android/app/src/main/assets/openwakeword/`):
|
||||||
- `Hey Jarvis` (Default)
|
- `Hey Jarvis` (Default, openWakeWord-Original)
|
||||||
- `Alexa`
|
- `Computer` (Star-Trek-Style, Community-Modell)
|
||||||
- `Hey Mycroft`
|
- `Alexa`, `Hey Mycroft`, `Hey Rhasspy` (openWakeWord-Originale)
|
||||||
- `Hey Rhasspy`
|
|
||||||
|
Community-Modelle stammen aus [fwartner/home-assistant-wakewords-collection](https://github.com/fwartner/home-assistant-wakewords-collection).
|
||||||
|
|
||||||
**Bedienung:**
|
**Bedienung:**
|
||||||
- App → **Einstellungen** → **Wake-Word** → gewuenschtes Keyword waehlen → **Speichern + Aktivieren**
|
- App → **Einstellungen** → **Wake-Word** → gewuenschtes Keyword waehlen → **Speichern + Aktivieren**
|
||||||
|
|
|
||||||
|
|
@ -79,8 +79,8 @@ android {
|
||||||
applicationId "com.ariacockpit"
|
applicationId "com.ariacockpit"
|
||||||
minSdkVersion rootProject.ext.minSdkVersion
|
minSdkVersion rootProject.ext.minSdkVersion
|
||||||
targetSdkVersion rootProject.ext.targetSdkVersion
|
targetSdkVersion rootProject.ext.targetSdkVersion
|
||||||
versionCode 607
|
versionCode 701
|
||||||
versionName "0.0.6.7"
|
versionName "0.0.7.1"
|
||||||
// Fallback fuer Libraries mit Product Flavors
|
// Fallback fuer Libraries mit Product Flavors
|
||||||
missingDimensionStrategy 'react-native-camera', 'general'
|
missingDimensionStrategy 'react-native-camera', 'general'
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,8 @@
|
||||||
<uses-permission android:name="android.permission.CAMERA" />
|
<uses-permission android:name="android.permission.CAMERA" />
|
||||||
<uses-permission android:name="android.permission.RECORD_AUDIO" />
|
<uses-permission android:name="android.permission.RECORD_AUDIO" />
|
||||||
<uses-permission android:name="android.permission.REQUEST_INSTALL_PACKAGES" />
|
<uses-permission android:name="android.permission.REQUEST_INSTALL_PACKAGES" />
|
||||||
|
<!-- Anruf-State lesen damit TTS bei klingelndem Telefon pausiert -->
|
||||||
|
<uses-permission android:name="android.permission.READ_PHONE_STATE" />
|
||||||
|
|
||||||
<application
|
<application
|
||||||
android:name=".MainApplication"
|
android:name=".MainApplication"
|
||||||
|
|
|
||||||
Binary file not shown.
|
|
@ -22,6 +22,7 @@ class MainApplication : Application(), ReactApplication {
|
||||||
add(AudioFocusPackage())
|
add(AudioFocusPackage())
|
||||||
add(PcmStreamPlayerPackage())
|
add(PcmStreamPlayerPackage())
|
||||||
add(OpenWakeWordPackage())
|
add(OpenWakeWordPackage())
|
||||||
|
add(PhoneCallPackage())
|
||||||
}
|
}
|
||||||
|
|
||||||
override fun getJSMainModuleName(): String = "index"
|
override fun getJSMainModuleName(): String = "index"
|
||||||
|
|
|
||||||
|
|
@ -42,8 +42,8 @@ class OpenWakeWordModule(reactContext: ReactApplicationContext) : ReactContextBa
|
||||||
private const val MEL_FRAMES_PER_EMBEDDING = 76 // Embedding-Fenster
|
private const val MEL_FRAMES_PER_EMBEDDING = 76 // Embedding-Fenster
|
||||||
private const val EMBEDDING_STRIDE = 8 // Slide um 8 Mel-Frames
|
private const val EMBEDDING_STRIDE = 8 // Slide um 8 Mel-Frames
|
||||||
private const val EMBEDDING_DIM = 96
|
private const val EMBEDDING_DIM = 96
|
||||||
private const val WW_INPUT_FRAMES = 16 // 16 Embeddings = ~1.28s
|
|
||||||
private const val MEL_BINS = 32
|
private const val MEL_BINS = 32
|
||||||
|
private const val DEFAULT_WW_INPUT_FRAMES = 16 // Fallback wenn Modell-Metadata fehlt
|
||||||
}
|
}
|
||||||
|
|
||||||
private val env: OrtEnvironment = OrtEnvironment.getEnvironment()
|
private val env: OrtEnvironment = OrtEnvironment.getEnvironment()
|
||||||
|
|
@ -54,6 +54,10 @@ class OpenWakeWordModule(reactContext: ReactApplicationContext) : ReactContextBa
|
||||||
private var melInputName: String = "input"
|
private var melInputName: String = "input"
|
||||||
private var embInputName: String = "input_1"
|
private var embInputName: String = "input_1"
|
||||||
private var wwInputName: String = "input"
|
private var wwInputName: String = "input"
|
||||||
|
// Anzahl Embedding-Frames die der Wake-Word-Klassifikator pro Inferenz erwartet —
|
||||||
|
// hey_jarvis hat 16, andere Community-Modelle koennen abweichen (z.B. 28).
|
||||||
|
// Wird beim init() aus den Modell-Metadaten gelesen.
|
||||||
|
private var wwInputFrames: Int = DEFAULT_WW_INPUT_FRAMES
|
||||||
|
|
||||||
// Konfiguration
|
// Konfiguration
|
||||||
private var threshold: Float = 0.5f
|
private var threshold: Float = 0.5f
|
||||||
|
|
@ -100,7 +104,13 @@ class OpenWakeWordModule(reactContext: ReactApplicationContext) : ReactContextBa
|
||||||
embInputName = embSession!!.inputNames.first()
|
embInputName = embSession!!.inputNames.first()
|
||||||
wwInputName = wwSession!!.inputNames.first()
|
wwInputName = wwSession!!.inputNames.first()
|
||||||
|
|
||||||
Log.i(TAG, "Init OK: model=$modelName threshold=$threshold patience=$patience " +
|
// WW-Input-Frame-Count aus dem Modell lesen — variiert pro Keyword.
|
||||||
|
// Erwartete Form: (1, N, 96), N steht in der Modell-Metadaten.
|
||||||
|
val wwInputInfo = wwSession!!.inputInfo[wwInputName]
|
||||||
|
val wwShape = (wwInputInfo?.info as? ai.onnxruntime.TensorInfo)?.shape
|
||||||
|
wwInputFrames = wwShape?.getOrNull(1)?.toInt()?.takeIf { it > 0 } ?: DEFAULT_WW_INPUT_FRAMES
|
||||||
|
|
||||||
|
Log.i(TAG, "Init OK: model=$modelName wwFrames=$wwInputFrames threshold=$threshold patience=$patience " +
|
||||||
"debounce=${debounceMs}ms (inputs: mel=$melInputName emb=$embInputName ww=$wwInputName)")
|
"debounce=${debounceMs}ms (inputs: mel=$melInputName emb=$embInputName ww=$wwInputName)")
|
||||||
promise.resolve(true)
|
promise.resolve(true)
|
||||||
} catch (e: Exception) {
|
} catch (e: Exception) {
|
||||||
|
|
@ -299,11 +309,12 @@ class OpenWakeWordModule(reactContext: ReactApplicationContext) : ReactContextBa
|
||||||
val embRes = embSession!!.run(mapOf(embInputName to embIn))
|
val embRes = embSession!!.run(mapOf(embInputName to embIn))
|
||||||
val embOut = embRes.get(0).value
|
val embOut = embRes.get(0).value
|
||||||
embIn.close()
|
embIn.close()
|
||||||
// Erwartete Output-Form: (1, 96) → Array<FloatArray>
|
// Erwartete Output-Form: (1, 1, 1, 96) — rank-4, NICHT (1, 96).
|
||||||
|
// Die Google-Embedding-Pipeline behaelt extra Dimensionen.
|
||||||
@Suppress("UNCHECKED_CAST")
|
@Suppress("UNCHECKED_CAST")
|
||||||
val embArr = embOut as Array<FloatArray>
|
val embArr = embOut as Array<Array<Array<FloatArray>>>
|
||||||
embBuffer.addLast(embArr[0].copyOf())
|
embBuffer.addLast(embArr[0][0][0].copyOf())
|
||||||
while (embBuffer.size > WW_INPUT_FRAMES) embBuffer.removeFirst()
|
while (embBuffer.size > wwInputFrames) embBuffer.removeFirst()
|
||||||
embRes.close()
|
embRes.close()
|
||||||
|
|
||||||
melProcessedIdx += EMBEDDING_STRIDE
|
melProcessedIdx += EMBEDDING_STRIDE
|
||||||
|
|
@ -319,9 +330,10 @@ class OpenWakeWordModule(reactContext: ReactApplicationContext) : ReactContextBa
|
||||||
}
|
}
|
||||||
|
|
||||||
// 3) Klassifikation — sobald wir 16 Embeddings haben
|
// 3) Klassifikation — sobald wir 16 Embeddings haben
|
||||||
if (embBuffer.size < WW_INPUT_FRAMES) return
|
if (embBuffer.size < wwInputFrames) return
|
||||||
val flatEmb = FloatArray(WW_INPUT_FRAMES * EMBEDDING_DIM)
|
val flatEmb = FloatArray(wwInputFrames * EMBEDDING_DIM)
|
||||||
var p = 0
|
var p = 0
|
||||||
|
// Letzte wwInputFrames Embeddings nehmen (embBuffer ist auf wwInputFrames begrenzt)
|
||||||
for (e in embBuffer) {
|
for (e in embBuffer) {
|
||||||
System.arraycopy(e, 0, flatEmb, p, EMBEDDING_DIM)
|
System.arraycopy(e, 0, flatEmb, p, EMBEDDING_DIM)
|
||||||
p += EMBEDDING_DIM
|
p += EMBEDDING_DIM
|
||||||
|
|
@ -329,7 +341,7 @@ class OpenWakeWordModule(reactContext: ReactApplicationContext) : ReactContextBa
|
||||||
val wwIn = OnnxTensor.createTensor(
|
val wwIn = OnnxTensor.createTensor(
|
||||||
env,
|
env,
|
||||||
FloatBuffer.wrap(flatEmb),
|
FloatBuffer.wrap(flatEmb),
|
||||||
longArrayOf(1L, WW_INPUT_FRAMES.toLong(), EMBEDDING_DIM.toLong()),
|
longArrayOf(1L, wwInputFrames.toLong(), EMBEDDING_DIM.toLong()),
|
||||||
)
|
)
|
||||||
val wwRes = wwSession!!.run(mapOf(wwInputName to wwIn))
|
val wwRes = wwSession!!.run(mapOf(wwInputName to wwIn))
|
||||||
val wwOut = wwRes.get(0).value
|
val wwOut = wwRes.get(0).value
|
||||||
|
|
|
||||||
|
|
@ -137,6 +137,17 @@ class PcmStreamPlayerModule(reactContext: ReactApplicationContext) : ReactContex
|
||||||
Log.w(TAG, "play() sofort failed: ${e.message}")
|
Log.w(TAG, "play() sofort failed: ${e.message}")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Idle-Cutoff: wenn endRequested NICHT kam aber 30s nichts mehr
|
||||||
|
// reinkommt, brechen wir ab (Bridge-Crash, verlorener final).
|
||||||
|
var idleMs = 0L
|
||||||
|
val maxIdleMs = 30_000L
|
||||||
|
// Zielpufferfuellung — unter diesem Wasserstand fuettern wir
|
||||||
|
// Stille rein damit AudioTrack nicht underrunt waehrend die
|
||||||
|
// Bridge den naechsten Satz rendert. Spotify/YouTube reagieren
|
||||||
|
// sonst mit eigenmaechtiger Wiederaufnahme nach ~10s Stille.
|
||||||
|
val underrunGuardFrames = sampleRate / 10 // ~100ms
|
||||||
|
val silenceFillFrames = sampleRate / 20 // ~50ms pro Refill
|
||||||
|
|
||||||
mainLoop@ while (!writerShouldStop) {
|
mainLoop@ while (!writerShouldStop) {
|
||||||
val data = queue.poll(50, java.util.concurrent.TimeUnit.MILLISECONDS)
|
val data = queue.poll(50, java.util.concurrent.TimeUnit.MILLISECONDS)
|
||||||
if (data == null) {
|
if (data == null) {
|
||||||
|
|
@ -153,8 +164,33 @@ class PcmStreamPlayerModule(reactContext: ReactApplicationContext) : ReactContex
|
||||||
}
|
}
|
||||||
break@mainLoop
|
break@mainLoop
|
||||||
}
|
}
|
||||||
|
// Underrun-Schutz: Stille reinfuettern wenn der AudioTrack-
|
||||||
|
// Puffer leerzulaufen droht. Spotify resumed sonst nach
|
||||||
|
// ~10s Pause auf eigene Faust, obwohl wir den Fokus halten.
|
||||||
|
if (playbackStarted) {
|
||||||
|
val framesWritten = bytesBuffered / streamBytesPerFrame
|
||||||
|
val framesPlayed = t.playbackHeadPosition.toLong()
|
||||||
|
val framesInBuffer = framesWritten - framesPlayed
|
||||||
|
if (framesInBuffer < underrunGuardFrames) {
|
||||||
|
val fillBytes = silenceFillFrames * streamBytesPerFrame
|
||||||
|
val silence = ByteArray(fillBytes)
|
||||||
|
var silOff = 0
|
||||||
|
while (silOff < silence.size && !writerShouldStop) {
|
||||||
|
val w = t.write(silence, silOff, silence.size - silOff)
|
||||||
|
if (w <= 0) break
|
||||||
|
silOff += w
|
||||||
|
}
|
||||||
|
bytesBuffered += silence.size
|
||||||
|
}
|
||||||
|
}
|
||||||
|
idleMs += 50L
|
||||||
|
if (idleMs >= maxIdleMs) {
|
||||||
|
Log.w(TAG, "Idle-Cutoff: ${maxIdleMs}ms keine Daten — Stream wird beendet")
|
||||||
|
break@mainLoop
|
||||||
|
}
|
||||||
continue@mainLoop
|
continue@mainLoop
|
||||||
}
|
}
|
||||||
|
idleMs = 0L
|
||||||
|
|
||||||
// Pre-Roll Check: play() erst wenn genug gepuffert
|
// Pre-Roll Check: play() erst wenn genug gepuffert
|
||||||
if (!playbackStarted && bytesBuffered + data.size >= prerollBytes) {
|
if (!playbackStarted && bytesBuffered + data.size >= prerollBytes) {
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,126 @@
|
||||||
|
package com.ariacockpit
|
||||||
|
|
||||||
|
import android.Manifest
|
||||||
|
import android.content.Context
|
||||||
|
import android.content.pm.PackageManager
|
||||||
|
import android.os.Build
|
||||||
|
import android.telephony.PhoneStateListener
|
||||||
|
import android.telephony.TelephonyCallback
|
||||||
|
import android.telephony.TelephonyManager
|
||||||
|
import android.util.Log
|
||||||
|
import androidx.core.content.ContextCompat
|
||||||
|
import com.facebook.react.bridge.Arguments
|
||||||
|
import com.facebook.react.bridge.Promise
|
||||||
|
import com.facebook.react.bridge.ReactApplicationContext
|
||||||
|
import com.facebook.react.bridge.ReactContextBaseJavaModule
|
||||||
|
import com.facebook.react.bridge.ReactMethod
|
||||||
|
import com.facebook.react.modules.core.DeviceEventManagerModule
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Lauscht auf Anruf-Statusaenderungen — wenn das Telefon klingelt oder ein
|
||||||
|
* Anruf laeuft, sendet das Modul ein "PhoneCallStateChanged"-Event an JS.
|
||||||
|
*
|
||||||
|
* JS-Side stoppt dann die TTS-Wiedergabe damit ARIA nicht mitten ins Gespraech
|
||||||
|
* weiterredet. Ohne READ_PHONE_STATE-Permission failt start() leise — der Rest
|
||||||
|
* der App funktioniert wie bisher.
|
||||||
|
*
|
||||||
|
* State-Strings: "idle" | "ringing" | "offhook"
|
||||||
|
*/
|
||||||
|
class PhoneCallModule(reactContext: ReactApplicationContext) : ReactContextBaseJavaModule(reactContext) {
|
||||||
|
override fun getName() = "PhoneCall"
|
||||||
|
|
||||||
|
companion object { private const val TAG = "PhoneCall" }
|
||||||
|
|
||||||
|
private var telephonyManager: TelephonyManager? = null
|
||||||
|
private var legacyListener: PhoneStateListener? = null
|
||||||
|
private var modernCallback: Any? = null // TelephonyCallback ab API 31
|
||||||
|
private var lastState: Int = TelephonyManager.CALL_STATE_IDLE
|
||||||
|
|
||||||
|
@ReactMethod
|
||||||
|
fun start(promise: Promise) {
|
||||||
|
try {
|
||||||
|
val perm = ContextCompat.checkSelfPermission(reactApplicationContext, Manifest.permission.READ_PHONE_STATE)
|
||||||
|
if (perm != PackageManager.PERMISSION_GRANTED) {
|
||||||
|
Log.w(TAG, "READ_PHONE_STATE Permission fehlt — Anruf-Erkennung inaktiv")
|
||||||
|
promise.resolve(false)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
val tm = reactApplicationContext.getSystemService(Context.TELEPHONY_SERVICE) as? TelephonyManager
|
||||||
|
if (tm == null) {
|
||||||
|
Log.w(TAG, "TelephonyManager nicht verfuegbar")
|
||||||
|
promise.resolve(false)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
telephonyManager = tm
|
||||||
|
|
||||||
|
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S) {
|
||||||
|
val cb = object : TelephonyCallback(), TelephonyCallback.CallStateListener {
|
||||||
|
override fun onCallStateChanged(state: Int) {
|
||||||
|
handleStateChange(state)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tm.registerTelephonyCallback(reactApplicationContext.mainExecutor, cb)
|
||||||
|
modernCallback = cb
|
||||||
|
} else {
|
||||||
|
@Suppress("DEPRECATION")
|
||||||
|
val l = object : PhoneStateListener() {
|
||||||
|
override fun onCallStateChanged(state: Int, phoneNumber: String?) {
|
||||||
|
handleStateChange(state)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@Suppress("DEPRECATION")
|
||||||
|
tm.listen(l, PhoneStateListener.LISTEN_CALL_STATE)
|
||||||
|
legacyListener = l
|
||||||
|
}
|
||||||
|
Log.i(TAG, "PhoneCall-Listener aktiv")
|
||||||
|
promise.resolve(true)
|
||||||
|
} catch (e: Exception) {
|
||||||
|
Log.e(TAG, "start fehlgeschlagen", e)
|
||||||
|
promise.reject("START_FAILED", e.message ?: "Unbekannter Fehler", e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@ReactMethod
|
||||||
|
fun stop(promise: Promise) {
|
||||||
|
try {
|
||||||
|
val tm = telephonyManager
|
||||||
|
if (tm != null) {
|
||||||
|
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S) {
|
||||||
|
(modernCallback as? TelephonyCallback)?.let { tm.unregisterTelephonyCallback(it) }
|
||||||
|
modernCallback = null
|
||||||
|
} else {
|
||||||
|
@Suppress("DEPRECATION")
|
||||||
|
legacyListener?.let { tm.listen(it, PhoneStateListener.LISTEN_NONE) }
|
||||||
|
legacyListener = null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
telephonyManager = null
|
||||||
|
lastState = TelephonyManager.CALL_STATE_IDLE
|
||||||
|
promise.resolve(true)
|
||||||
|
} catch (e: Exception) {
|
||||||
|
promise.reject("STOP_FAILED", e.message ?: "")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun handleStateChange(state: Int) {
|
||||||
|
if (state == lastState) return
|
||||||
|
lastState = state
|
||||||
|
val name = when (state) {
|
||||||
|
TelephonyManager.CALL_STATE_RINGING -> "ringing"
|
||||||
|
TelephonyManager.CALL_STATE_OFFHOOK -> "offhook"
|
||||||
|
TelephonyManager.CALL_STATE_IDLE -> "idle"
|
||||||
|
else -> return
|
||||||
|
}
|
||||||
|
Log.i(TAG, "Telefon-State: $name")
|
||||||
|
val params = Arguments.createMap().apply { putString("state", name) }
|
||||||
|
try {
|
||||||
|
reactApplicationContext.getJSModule(DeviceEventManagerModule.RCTDeviceEventEmitter::class.java)
|
||||||
|
.emit("PhoneCallStateChanged", params)
|
||||||
|
} catch (e: Exception) {
|
||||||
|
Log.w(TAG, "Event-emit fehlgeschlagen: ${e.message}")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@ReactMethod fun addListener(eventName: String) {}
|
||||||
|
@ReactMethod fun removeListeners(count: Int) {}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,16 @@
|
||||||
|
package com.ariacockpit
|
||||||
|
|
||||||
|
import com.facebook.react.ReactPackage
|
||||||
|
import com.facebook.react.bridge.NativeModule
|
||||||
|
import com.facebook.react.bridge.ReactApplicationContext
|
||||||
|
import com.facebook.react.uimanager.ViewManager
|
||||||
|
|
||||||
|
class PhoneCallPackage : ReactPackage {
|
||||||
|
override fun createNativeModules(reactContext: ReactApplicationContext): List<NativeModule> {
|
||||||
|
return listOf(PhoneCallModule(reactContext))
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun createViewManagers(reactContext: ReactApplicationContext): List<ViewManager<*, *>> {
|
||||||
|
return emptyList()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
{
|
{
|
||||||
"name": "aria-cockpit",
|
"name": "aria-cockpit",
|
||||||
"version": "0.0.6.7",
|
"version": "0.0.7.1",
|
||||||
"private": true,
|
"private": true,
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"android": "react-native run-android",
|
"android": "react-native run-android",
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@ import RNFS from 'react-native-fs';
|
||||||
import rvs, { RVSMessage, ConnectionState } from '../services/rvs';
|
import rvs, { RVSMessage, ConnectionState } from '../services/rvs';
|
||||||
import audioService from '../services/audio';
|
import audioService from '../services/audio';
|
||||||
import wakeWordService from '../services/wakeword';
|
import wakeWordService from '../services/wakeword';
|
||||||
|
import phoneCallService from '../services/phoneCall';
|
||||||
import updateService from '../services/updater';
|
import updateService from '../services/updater';
|
||||||
import VoiceButton from '../components/VoiceButton';
|
import VoiceButton from '../components/VoiceButton';
|
||||||
import FileUpload, { FileData } from '../components/FileUpload';
|
import FileUpload, { FileData } from '../components/FileUpload';
|
||||||
|
|
@ -159,10 +160,23 @@ const ChatScreen: React.FC = () => {
|
||||||
const unsub = wakeWordService.onStateChange((s) => {
|
const unsub = wakeWordService.onStateChange((s) => {
|
||||||
setWakeWordState(s);
|
setWakeWordState(s);
|
||||||
setWakeWordActive(s !== 'off');
|
setWakeWordActive(s !== 'off');
|
||||||
|
// Conversation-Focus an Wake-Word-State koppeln: solange wir aktiv im
|
||||||
|
// Dialog sind, soll Spotify dauerhaft gepaust bleiben (auch ueber
|
||||||
|
// Render-Pausen + zwischen Antworten hinweg). Sobald wir zurueck nach
|
||||||
|
// 'armed' oder 'off' fallen, darf Spotify wieder.
|
||||||
|
if (s === 'conversing') audioService.acquireConversationFocus();
|
||||||
|
else audioService.releaseConversationFocus();
|
||||||
});
|
});
|
||||||
return () => unsub();
|
return () => unsub();
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
|
// Anruf-Erkennung: TTS pausieren wenn das Telefon klingelt
|
||||||
|
useEffect(() => {
|
||||||
|
phoneCallService.start().catch(err =>
|
||||||
|
console.warn('[Chat] phoneCall.start fehlgeschlagen', err));
|
||||||
|
return () => { phoneCallService.stop().catch(() => {}); };
|
||||||
|
}, []);
|
||||||
|
|
||||||
// ttsCanPlayRef live aktuell halten — Closure in onMessage unten liest
|
// ttsCanPlayRef live aktuell halten — Closure in onMessage unten liest
|
||||||
// darueber statt direkt ttsDeviceEnabled/ttsMuted (sonst stale).
|
// darueber statt direkt ttsDeviceEnabled/ttsMuted (sonst stale).
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
|
|
@ -281,9 +295,22 @@ const ChatScreen: React.FC = () => {
|
||||||
const idx = prev.findIndex(m =>
|
const idx = prev.findIndex(m =>
|
||||||
m.sender === 'user' && m.text.includes('Spracheingabe wird verarbeitet')
|
m.sender === 'user' && m.text.includes('Spracheingabe wird verarbeitet')
|
||||||
);
|
);
|
||||||
if (idx < 0) return prev;
|
const newText = `\uD83C\uDFA4 ${sttText}`;
|
||||||
|
if (idx < 0) {
|
||||||
|
// Defensiv: wenn keine Placeholder im State (z.B. weil sie nie
|
||||||
|
// hinzugefuegt wurde oder schon durch ein anderes Update verloren
|
||||||
|
// ging), die Sprachnachricht trotzdem als neue Bubble einfuegen.
|
||||||
|
// Sonst kommt ARIAs Antwort ohne sichtbare User-Nachricht.
|
||||||
|
return capMessages([...prev, {
|
||||||
|
id: nextId(),
|
||||||
|
sender: 'user',
|
||||||
|
text: newText,
|
||||||
|
timestamp: message.timestamp,
|
||||||
|
attachments: [{ type: 'audio', name: 'Sprachaufnahme' }],
|
||||||
|
}]);
|
||||||
|
}
|
||||||
const next = prev.slice();
|
const next = prev.slice();
|
||||||
next[idx] = { ...next[idx], text: `\uD83C\uDFA4 ${sttText}` };
|
next[idx] = { ...next[idx], text: newText };
|
||||||
return next;
|
return next;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
@ -619,6 +646,8 @@ const ChatScreen: React.FC = () => {
|
||||||
base64: result.base64,
|
base64: result.base64,
|
||||||
durationMs: result.durationMs,
|
durationMs: result.durationMs,
|
||||||
mimeType: result.mimeType,
|
mimeType: result.mimeType,
|
||||||
|
voice: localXttsVoiceRef.current,
|
||||||
|
speed: ttsSpeedRef.current,
|
||||||
...(location && { location }),
|
...(location && { location }),
|
||||||
});
|
});
|
||||||
}, [getCurrentLocation]);
|
}, [getCurrentLocation]);
|
||||||
|
|
|
||||||
|
|
@ -198,6 +198,12 @@ class AudioService {
|
||||||
private focusReleaseTimer: ReturnType<typeof setTimeout> | null = null;
|
private focusReleaseTimer: ReturnType<typeof setTimeout> | null = null;
|
||||||
private readonly FOCUS_RELEASE_DELAY_MS = 800;
|
private readonly FOCUS_RELEASE_DELAY_MS = 800;
|
||||||
|
|
||||||
|
// Conversation-Mode: solange aktiv (Wake-Word Status 'conversing' ODER
|
||||||
|
// wir wissen "ARIA spricht gerade in einem Multi-Turn-Dialog"), halten wir
|
||||||
|
// den AudioFocus DAUERHAFT. Der per-Stream-Release wird unterdrueckt,
|
||||||
|
// damit Spotify nicht in Render-Pausen oder zwischen Antworten zurueckkehrt.
|
||||||
|
private _conversationFocusActive: boolean = false;
|
||||||
|
|
||||||
// VAD State
|
// VAD State
|
||||||
private vadEnabled: boolean = false;
|
private vadEnabled: boolean = false;
|
||||||
private lastSpeechTime: number = 0;
|
private lastSpeechTime: number = 0;
|
||||||
|
|
@ -214,11 +220,18 @@ class AudioService {
|
||||||
|
|
||||||
/** AudioFocus mit kleiner Verzoegerung freigeben — Spotify/YouTube
|
/** AudioFocus mit kleiner Verzoegerung freigeben — Spotify/YouTube
|
||||||
* springen sonst im Gap zwischen zwei TTS-Streams (oder wenn ARIA
|
* springen sonst im Gap zwischen zwei TTS-Streams (oder wenn ARIA
|
||||||
* eine zweite Antwort direkt hinterherschickt) kurz wieder an. */
|
* eine zweite Antwort direkt hinterherschickt) kurz wieder an.
|
||||||
|
* Im Conversation-Mode (Wake-Word conversing) wird das Release komplett
|
||||||
|
* unterdrueckt — der Focus bleibt fuer die ganze Konversation gehalten. */
|
||||||
private _releaseFocusDeferred(): void {
|
private _releaseFocusDeferred(): void {
|
||||||
|
if (this._conversationFocusActive) {
|
||||||
|
this._cancelDeferredFocusRelease();
|
||||||
|
return;
|
||||||
|
}
|
||||||
this._cancelDeferredFocusRelease();
|
this._cancelDeferredFocusRelease();
|
||||||
this.focusReleaseTimer = setTimeout(() => {
|
this.focusReleaseTimer = setTimeout(() => {
|
||||||
this.focusReleaseTimer = null;
|
this.focusReleaseTimer = null;
|
||||||
|
if (this._conversationFocusActive) return;
|
||||||
AudioFocus?.release().catch(() => {});
|
AudioFocus?.release().catch(() => {});
|
||||||
}, this.FOCUS_RELEASE_DELAY_MS);
|
}, this.FOCUS_RELEASE_DELAY_MS);
|
||||||
}
|
}
|
||||||
|
|
@ -230,6 +243,33 @@ class AudioService {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Conversation-Mode beginnt → AudioFocus dauerhaft halten (Spotify bleibt
|
||||||
|
* pausiert). Idempotent: mehrfaches Aufrufen ist sicher. */
|
||||||
|
acquireConversationFocus(): void {
|
||||||
|
if (this._conversationFocusActive) return;
|
||||||
|
this._conversationFocusActive = true;
|
||||||
|
this._cancelDeferredFocusRelease();
|
||||||
|
console.log('[Audio] Conversation-Focus aktiv (Spotify bleibt gepaust)');
|
||||||
|
AudioFocus?.requestDuck().catch(() => {});
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Conversation-Mode endet → Focus darf wieder freigegeben werden
|
||||||
|
* (verzoegert, damit eine direkt folgende Antwort nichts kaputtmacht). */
|
||||||
|
releaseConversationFocus(): void {
|
||||||
|
if (!this._conversationFocusActive) return;
|
||||||
|
this._conversationFocusActive = false;
|
||||||
|
console.log('[Audio] Conversation-Focus inaktiv');
|
||||||
|
this._releaseFocusDeferred();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** TTS-Wiedergabe haart stoppen — z.B. wenn ein Anruf reinkommt.
|
||||||
|
* Released auch sofort den AudioFocus damit der Anruf-Klingelton hoerbar ist. */
|
||||||
|
haltAllPlayback(reason: string = ''): void {
|
||||||
|
console.log('[Audio] haltAllPlayback: %s', reason || '(no reason)');
|
||||||
|
this._conversationFocusActive = false;
|
||||||
|
this.stopPlayback();
|
||||||
|
}
|
||||||
|
|
||||||
// --- Berechtigungen ---
|
// --- Berechtigungen ---
|
||||||
|
|
||||||
async requestMicrophonePermission(): Promise<boolean> {
|
async requestMicrophonePermission(): Promise<boolean> {
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,108 @@
|
||||||
|
/**
|
||||||
|
* PhoneCall-Service — pausiert die TTS-Wiedergabe wenn das Telefon klingelt
|
||||||
|
* oder ein Anruf laeuft. Native-Bindung an PhoneCallModule.kt.
|
||||||
|
*
|
||||||
|
* Bei "ringing" oder "offhook" wird audioService.haltAllPlayback() gerufen —
|
||||||
|
* ARIA verstummt sofort. Nach dem Auflegen passiert nichts automatisch
|
||||||
|
* (Audio kommt nicht zurueck), der User muesste die Antwort manuell
|
||||||
|
* nochmal anfordern (Play-Button auf der Nachricht).
|
||||||
|
*
|
||||||
|
* Permission READ_PHONE_STATE muss vom Nutzer einmalig erteilt werden —
|
||||||
|
* wenn nicht, failed start() leise und der Rest funktioniert wie bisher.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import {
|
||||||
|
NativeEventEmitter,
|
||||||
|
NativeModules,
|
||||||
|
PermissionsAndroid,
|
||||||
|
Platform,
|
||||||
|
ToastAndroid,
|
||||||
|
} from 'react-native';
|
||||||
|
import audioService from './audio';
|
||||||
|
|
||||||
|
interface PhoneCallNative {
|
||||||
|
start(): Promise<boolean>;
|
||||||
|
stop(): Promise<boolean>;
|
||||||
|
}
|
||||||
|
|
||||||
|
const { PhoneCall } = NativeModules as { PhoneCall?: PhoneCallNative };
|
||||||
|
|
||||||
|
type PhoneState = 'idle' | 'ringing' | 'offhook';
|
||||||
|
|
||||||
|
class PhoneCallService {
|
||||||
|
private started: boolean = false;
|
||||||
|
private subscription: { remove: () => void } | null = null;
|
||||||
|
private lastState: PhoneState = 'idle';
|
||||||
|
|
||||||
|
async start(): Promise<boolean> {
|
||||||
|
if (this.started || !PhoneCall) return false;
|
||||||
|
if (Platform.OS !== 'android') return false;
|
||||||
|
|
||||||
|
// Runtime-Permission holen (nur einmal noetig)
|
||||||
|
try {
|
||||||
|
const granted = await PermissionsAndroid.request(
|
||||||
|
PermissionsAndroid.PERMISSIONS.READ_PHONE_STATE,
|
||||||
|
{
|
||||||
|
title: 'ARIA Cockpit — Anruf-Erkennung',
|
||||||
|
message: 'Damit ARIA bei einem eingehenden Anruf nicht weiterredet, '
|
||||||
|
+ 'darf die App den Anruf-Status sehen (Klingeln/Aktiv/Aufgelegt). '
|
||||||
|
+ 'Es werden keine Anrufdaten gelesen oder gespeichert.',
|
||||||
|
buttonPositive: 'Erlauben',
|
||||||
|
buttonNegative: 'Spaeter',
|
||||||
|
},
|
||||||
|
);
|
||||||
|
if (granted !== PermissionsAndroid.RESULTS.GRANTED) {
|
||||||
|
console.warn('[PhoneCall] READ_PHONE_STATE Permission abgelehnt');
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
console.warn('[PhoneCall] Permission-Anfrage gescheitert', err);
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const ok = await PhoneCall.start();
|
||||||
|
if (!ok) {
|
||||||
|
console.warn('[PhoneCall] Native start() lieferte false (Permission?)');
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const emitter = new NativeEventEmitter(NativeModules.PhoneCall as any);
|
||||||
|
this.subscription = emitter.addListener('PhoneCallStateChanged', (e: { state: PhoneState }) => {
|
||||||
|
this._onStateChanged(e.state);
|
||||||
|
});
|
||||||
|
this.started = true;
|
||||||
|
console.log('[PhoneCall] Listener aktiv');
|
||||||
|
return true;
|
||||||
|
} catch (err: any) {
|
||||||
|
console.warn('[PhoneCall] start gescheitert:', err?.message || err);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async stop(): Promise<void> {
|
||||||
|
if (!this.started || !PhoneCall) return;
|
||||||
|
try {
|
||||||
|
this.subscription?.remove();
|
||||||
|
this.subscription = null;
|
||||||
|
await PhoneCall.stop();
|
||||||
|
} catch {}
|
||||||
|
this.started = false;
|
||||||
|
this.lastState = 'idle';
|
||||||
|
}
|
||||||
|
|
||||||
|
private _onStateChanged(state: PhoneState): void {
|
||||||
|
if (state === this.lastState) return;
|
||||||
|
console.log('[PhoneCall] State: %s → %s', this.lastState, state);
|
||||||
|
this.lastState = state;
|
||||||
|
if (state === 'ringing' || state === 'offhook') {
|
||||||
|
audioService.haltAllPlayback(`Telefon-State: ${state}`);
|
||||||
|
ToastAndroid.show(
|
||||||
|
state === 'ringing' ? 'Anruf — ARIA pausiert' : 'Im Gespraech — ARIA pausiert',
|
||||||
|
ToastAndroid.SHORT,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
// idle: nichts automatisch — User soll nichts unbeabsichtigt re-triggern
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const phoneCallService = new PhoneCallService();
|
||||||
|
export default phoneCallService;
|
||||||
|
|
@ -36,6 +36,7 @@ export const WAKE_KEYWORD_STORAGE = 'aria_wake_keyword';
|
||||||
* werden — Diagnostic-Upload ist Phase 2. */
|
* werden — Diagnostic-Upload ist Phase 2. */
|
||||||
export const WAKE_KEYWORDS = [
|
export const WAKE_KEYWORDS = [
|
||||||
'hey_jarvis',
|
'hey_jarvis',
|
||||||
|
'computer',
|
||||||
'alexa',
|
'alexa',
|
||||||
'hey_mycroft',
|
'hey_mycroft',
|
||||||
'hey_rhasspy',
|
'hey_rhasspy',
|
||||||
|
|
@ -46,6 +47,7 @@ export const DEFAULT_KEYWORD: WakeKeyword = 'hey_jarvis';
|
||||||
/** Hilfs-Mapping fuer die Anzeige im UI. */
|
/** Hilfs-Mapping fuer die Anzeige im UI. */
|
||||||
export const KEYWORD_LABELS: Record<WakeKeyword, string> = {
|
export const KEYWORD_LABELS: Record<WakeKeyword, string> = {
|
||||||
hey_jarvis: 'Hey Jarvis',
|
hey_jarvis: 'Hey Jarvis',
|
||||||
|
computer: 'Computer',
|
||||||
alexa: 'Alexa',
|
alexa: 'Alexa',
|
||||||
hey_mycroft: 'Hey Mycroft',
|
hey_mycroft: 'Hey Mycroft',
|
||||||
hey_rhasspy: 'Hey Rhasspy',
|
hey_rhasspy: 'Hey Rhasspy',
|
||||||
|
|
|
||||||
|
|
@ -551,6 +551,15 @@ class ARIABridge:
|
||||||
# Beeinflusst das Timeout fuer stt_request — bei "loading" warten wir laenger,
|
# Beeinflusst das Timeout fuer stt_request — bei "loading" warten wir laenger,
|
||||||
# weil das Modell beim ersten Request noch ~1-2 Min runtergeladen werden kann.
|
# weil das Modell beim ersten Request noch ~1-2 Min runtergeladen werden kann.
|
||||||
self._remote_stt_ready: bool = False
|
self._remote_stt_ready: bool = False
|
||||||
|
# Pending Files: wenn die App ein Bild + Text gleichzeitig schickt, kommen
|
||||||
|
# zwei separate RVS-Events ('file' und 'chat') — wir buffern die Files
|
||||||
|
# kurz und mergen sie mit dem nachfolgenden Chat-Text zu einer einzigen
|
||||||
|
# Anfrage an aria-core. Sonst antwortet ARIA zweimal (einmal "warte auf
|
||||||
|
# Anweisung" beim file, einmal auf den Chat-Text).
|
||||||
|
# Liste von Tuples: (file_path, name, file_type, size_kb, width, height)
|
||||||
|
self._pending_files: list[tuple[str, str, str, int, int, int]] = []
|
||||||
|
self._pending_files_flush_task: Optional[asyncio.Task] = None
|
||||||
|
self._PENDING_FILES_WINDOW_SEC: float = 0.8
|
||||||
|
|
||||||
def initialize(self) -> None:
|
def initialize(self) -> None:
|
||||||
"""Initialisiert alle Komponenten.
|
"""Initialisiert alle Komponenten.
|
||||||
|
|
@ -907,18 +916,13 @@ class ARIABridge:
|
||||||
logger.info("[core] TTS unterdrueckt (Modus: %s)", self.current_mode.config.name)
|
logger.info("[core] TTS unterdrueckt (Modus: %s)", self.current_mode.config.name)
|
||||||
return
|
return
|
||||||
|
|
||||||
# Voice bestimmen: App-Override fuer diesen Request > globale Default-Voice
|
# Voice bestimmen: App-Override (gesetzt durch letzten chat-Event) > globale
|
||||||
|
# Default-Voice. Der Override wird NICHT pro Antwort verbraucht — sonst nutzt
|
||||||
|
# eine Multi-Turn-Antwort von ARIA (Tool-Use + finale Antwort) ab dem zweiten
|
||||||
|
# TTS-Call wieder die alte Default-Stimme. Der Override bleibt gueltig bis
|
||||||
|
# zum naechsten chat-Event, wo er entweder ueberschrieben oder geloescht wird.
|
||||||
xtts_voice = self._next_voice_override or getattr(self, 'xtts_voice', '')
|
xtts_voice = self._next_voice_override or getattr(self, 'xtts_voice', '')
|
||||||
# Override verbrauchen (gilt nur fuer genau diese naechste Antwort)
|
|
||||||
if self._next_voice_override:
|
|
||||||
logger.info("[core] Nutze Voice-Override: %s", self._next_voice_override)
|
|
||||||
self._next_voice_override = None
|
|
||||||
|
|
||||||
# Speed ebenfalls aus App-Override nehmen (fallback 1.0)
|
|
||||||
xtts_speed = self._next_speed_override or 1.0
|
xtts_speed = self._next_speed_override or 1.0
|
||||||
if self._next_speed_override:
|
|
||||||
logger.info("[core] Nutze Speed-Override: %.2fx", self._next_speed_override)
|
|
||||||
self._next_speed_override = None
|
|
||||||
|
|
||||||
tts_text = tts_text_preview or text
|
tts_text = tts_text_preview or text
|
||||||
if not tts_text:
|
if not tts_text:
|
||||||
|
|
@ -1024,6 +1028,51 @@ class ARIABridge:
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug("[session] Diagnostic nicht erreichbar (%s) — nutze '%s'", e, self._session_key)
|
logger.debug("[session] Diagnostic nicht erreichbar (%s) — nutze '%s'", e, self._session_key)
|
||||||
|
|
||||||
|
def _build_pending_files_message(self, user_text: str) -> str:
|
||||||
|
"""Baut eine Anweisung an aria-core aus den gepufferten Files + optionalem
|
||||||
|
User-Text. user_text leer → 'warte auf Anweisung'-Variante."""
|
||||||
|
parts: list[str] = []
|
||||||
|
for fp, name, ftype, kb, w, h in self._pending_files:
|
||||||
|
dim = f" {w}x{h}px" if (w and h) else ""
|
||||||
|
kind = "Bild" if ftype.startswith("image/") else "Datei"
|
||||||
|
parts.append(f"- {kind}: {name}{dim} ({ftype}, {kb}KB) liegt unter {fp}")
|
||||||
|
files_summary = "\n".join(parts)
|
||||||
|
n = len(self._pending_files)
|
||||||
|
anhang = "Anhang" if n == 1 else "Anhaenge"
|
||||||
|
if user_text:
|
||||||
|
return (f"Stefan hat dir {n} {anhang} geschickt:\n{files_summary}\n\n"
|
||||||
|
f"Er sagt dazu: \"{user_text}\"")
|
||||||
|
return (f"Stefan hat dir {n} {anhang} geschickt:\n{files_summary}\n\n"
|
||||||
|
f"Warte auf seine Anweisung was du damit tun sollst.")
|
||||||
|
|
||||||
|
async def _flush_pending_files_after(self, delay: float) -> None:
|
||||||
|
"""Wenn nach `delay`s kein chat-Text gekommen ist: Files alleine an
|
||||||
|
aria-core senden ('warte auf Anweisung'-Variante)."""
|
||||||
|
try:
|
||||||
|
await asyncio.sleep(delay)
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
return
|
||||||
|
if not self._pending_files:
|
||||||
|
return
|
||||||
|
text = self._build_pending_files_message("")
|
||||||
|
self._pending_files = []
|
||||||
|
self._pending_files_flush_task = None
|
||||||
|
await self.send_to_core(text, source="app-file")
|
||||||
|
|
||||||
|
async def _flush_pending_files_with_text(self, user_text: str) -> bool:
|
||||||
|
"""Wenn ein chat-Text reinkommt waehrend Files gepuffert sind:
|
||||||
|
Files + Text zu einer einzigen aria-core-Nachricht mergen.
|
||||||
|
Returns True wenn gemerged wurde (Caller soll dann nicht nochmal senden)."""
|
||||||
|
if not self._pending_files:
|
||||||
|
return False
|
||||||
|
if self._pending_files_flush_task and not self._pending_files_flush_task.done():
|
||||||
|
self._pending_files_flush_task.cancel()
|
||||||
|
self._pending_files_flush_task = None
|
||||||
|
text = self._build_pending_files_message(user_text)
|
||||||
|
self._pending_files = []
|
||||||
|
await self.send_to_core(text, source="app-file+chat")
|
||||||
|
return True
|
||||||
|
|
||||||
async def send_to_core(self, text: str, source: str = "bridge") -> None:
|
async def send_to_core(self, text: str, source: str = "bridge") -> None:
|
||||||
"""Sendet Text an aria-core (OpenClaw chat.send Protokoll)."""
|
"""Sendet Text an aria-core (OpenClaw chat.send Protokoll)."""
|
||||||
if self.ws_core is None:
|
if self.ws_core is None:
|
||||||
|
|
@ -1169,19 +1218,30 @@ class ARIABridge:
|
||||||
if sender in ("aria", "stt"):
|
if sender in ("aria", "stt"):
|
||||||
return
|
return
|
||||||
text = payload.get("text", "")
|
text = payload.get("text", "")
|
||||||
# Voice-Override fuer die naechste ARIA-Antwort merken
|
# Voice-Override fuer Folgenachrichten setzen — gilt bis zum naechsten
|
||||||
voice_override = payload.get("voice", "")
|
# chat-Event. Leerer String "" = explizit Default-Voice (override loeschen).
|
||||||
if voice_override:
|
# Field nicht gesendet = vorherigen Override unveraendert lassen (z.B. wenn
|
||||||
self._next_voice_override = voice_override
|
# cancel_request oder anderer Service die App umgeht).
|
||||||
logger.info("[rvs] Voice-Override fuer naechste Antwort: %s", voice_override)
|
if "voice" in payload:
|
||||||
|
voice_override = payload.get("voice", "") or ""
|
||||||
|
self._next_voice_override = voice_override or None
|
||||||
|
logger.info("[rvs] Voice fuer Antworten: %s",
|
||||||
|
self._next_voice_override or "(Default)")
|
||||||
# Speed-Override (TTS-Wiedergabegeschwindigkeit, pro Geraet)
|
# Speed-Override (TTS-Wiedergabegeschwindigkeit, pro Geraet)
|
||||||
|
if "speed" in payload:
|
||||||
try:
|
try:
|
||||||
speed = float(payload.get("speed", 0) or 0)
|
speed = float(payload.get("speed", 0) or 0)
|
||||||
if 0.1 <= speed <= 5.0:
|
self._next_speed_override = speed if 0.1 <= speed <= 5.0 else None
|
||||||
self._next_speed_override = speed
|
|
||||||
except (TypeError, ValueError):
|
except (TypeError, ValueError):
|
||||||
pass
|
self._next_speed_override = None
|
||||||
if text:
|
if text:
|
||||||
|
# Wenn Files gerade gepuffert sind (Bild + Text gleichzeitig
|
||||||
|
# gesendet), mergen wir sie zu einer einzigen Anfrage statt
|
||||||
|
# zwei separater send_to_core-Calls.
|
||||||
|
merged = await self._flush_pending_files_with_text(text)
|
||||||
|
if merged:
|
||||||
|
logger.info("[rvs] App-Chat (mit Anhaengen): '%s'", text[:80])
|
||||||
|
else:
|
||||||
logger.info("[rvs] App-Chat: '%s'", text[:80])
|
logger.info("[rvs] App-Chat: '%s'", text[:80])
|
||||||
await self.send_to_core(text, source="app")
|
await self.send_to_core(text, source="app")
|
||||||
return
|
return
|
||||||
|
|
@ -1342,59 +1402,46 @@ class ARIABridge:
|
||||||
await self.ws_core.send(raw_message)
|
await self.ws_core.send(raw_message)
|
||||||
|
|
||||||
elif msg_type == "file":
|
elif msg_type == "file":
|
||||||
# Datei von der App → als Text-Nachricht an aria-core
|
# Datei von der App: speichern + zu Pending-Queue hinzufuegen.
|
||||||
|
# Wird mit dem nachfolgenden chat-Event (innerhalb PENDING_FILES_WINDOW)
|
||||||
|
# zu einer einzigen aria-core-Anfrage gemerged. Sonst antwortet ARIA
|
||||||
|
# zweimal: einmal "warte auf Anweisung" beim file, einmal auf den Chat.
|
||||||
file_name = payload.get("name", "unbekannt")
|
file_name = payload.get("name", "unbekannt")
|
||||||
file_type = payload.get("type", "")
|
file_type = payload.get("type", "")
|
||||||
file_b64 = payload.get("base64", "")
|
file_b64 = payload.get("base64", "")
|
||||||
file_size = payload.get("size", 0)
|
|
||||||
width = payload.get("width", 0)
|
width = payload.get("width", 0)
|
||||||
height = payload.get("height", 0)
|
height = payload.get("height", 0)
|
||||||
logger.info("[rvs] Datei empfangen: %s (%s, %dKB)",
|
logger.info("[rvs] Datei empfangen: %s (%s, %dKB)",
|
||||||
file_name, file_type, len(file_b64) // 1365 if file_b64 else 0)
|
file_name, file_type, len(file_b64) // 1365 if file_b64 else 0)
|
||||||
|
|
||||||
# Shared Volume: /shared/ ist in Bridge UND aria-core gemountet
|
|
||||||
SHARED_DIR = "/shared/uploads"
|
SHARED_DIR = "/shared/uploads"
|
||||||
os.makedirs(SHARED_DIR, exist_ok=True)
|
os.makedirs(SHARED_DIR, exist_ok=True)
|
||||||
|
|
||||||
if file_b64 and file_type.startswith("image/"):
|
if not file_b64:
|
||||||
# Bild in Shared Volume speichern
|
text = f"Stefan hat eine Datei gesendet ({file_name}, {file_type}) aber die Daten sind leer angekommen."
|
||||||
|
await self.send_to_core(text, source="app-file")
|
||||||
|
return
|
||||||
|
|
||||||
|
if file_type.startswith("image/"):
|
||||||
ext = ".jpg" if "jpeg" in file_type or "jpg" in file_type else ".png"
|
ext = ".jpg" if "jpeg" in file_type or "jpg" in file_type else ".png"
|
||||||
safe_name = f"img_{int(asyncio.get_event_loop().time())}_{file_name.replace('/', '_')}"
|
safe_name = f"img_{int(asyncio.get_event_loop().time())}_{file_name.replace('/', '_')}"
|
||||||
file_path = os.path.join(SHARED_DIR, safe_name if safe_name.endswith(ext) else safe_name + ext)
|
file_path = os.path.join(SHARED_DIR, safe_name if safe_name.endswith(ext) else safe_name + ext)
|
||||||
with open(file_path, "wb") as f:
|
else:
|
||||||
f.write(base64.b64decode(file_b64))
|
|
||||||
size_kb = len(file_b64) // 1365
|
|
||||||
logger.info("[rvs] Bild gespeichert: %s (%dKB)", file_path, size_kb)
|
|
||||||
# ERST an aria-core senden (wichtigster Schritt)
|
|
||||||
text = (f"Stefan hat dir ein Bild geschickt: {file_name}"
|
|
||||||
f"{f' ({width}x{height}px)' if width else ''}"
|
|
||||||
f", {size_kb}KB."
|
|
||||||
f" Das Bild liegt unter: {file_path}"
|
|
||||||
f" Warte auf Stefans Anweisung was du damit tun sollst.")
|
|
||||||
await self.send_to_core(text, source="app-file")
|
|
||||||
# Dann App informieren (optional, darf nicht crashen)
|
|
||||||
try:
|
|
||||||
await self._send_to_rvs({
|
|
||||||
"type": "file_saved",
|
|
||||||
"payload": {"name": file_name, "serverPath": file_path, "mimeType": file_type},
|
|
||||||
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
|
||||||
})
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning("[rvs] file_saved konnte nicht an App gesendet werden: %s", e)
|
|
||||||
elif file_b64:
|
|
||||||
# Andere Datei in Shared Volume speichern
|
|
||||||
safe_name = f"file_{int(asyncio.get_event_loop().time())}_{file_name.replace('/', '_')}"
|
safe_name = f"file_{int(asyncio.get_event_loop().time())}_{file_name.replace('/', '_')}"
|
||||||
file_path = os.path.join(SHARED_DIR, safe_name)
|
file_path = os.path.join(SHARED_DIR, safe_name)
|
||||||
with open(file_path, "wb") as f:
|
with open(file_path, "wb") as f:
|
||||||
f.write(base64.b64decode(file_b64))
|
f.write(base64.b64decode(file_b64))
|
||||||
size_kb = len(file_b64) // 1365
|
size_kb = len(file_b64) // 1365
|
||||||
logger.info("[rvs] Datei gespeichert: %s (%dKB)", file_path, size_kb)
|
logger.info("[rvs] Datei gespeichert: %s (%dKB)", file_path, size_kb)
|
||||||
# ERST an aria-core senden
|
|
||||||
text = (f"Stefan hat dir eine Datei geschickt: {file_name}"
|
# In Pending-Queue + Flush-Timer (anti-spam Buffering)
|
||||||
f" ({file_type}, {size_kb}KB)."
|
self._pending_files.append((file_path, file_name, file_type, size_kb, int(width or 0), int(height or 0)))
|
||||||
f" Die Datei liegt unter: {file_path}"
|
if self._pending_files_flush_task and not self._pending_files_flush_task.done():
|
||||||
f" Warte auf Stefans Anweisung was du damit tun sollst.")
|
self._pending_files_flush_task.cancel()
|
||||||
await self.send_to_core(text, source="app-file")
|
self._pending_files_flush_task = asyncio.create_task(
|
||||||
|
self._flush_pending_files_after(self._PENDING_FILES_WINDOW_SEC)
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
await self._send_to_rvs({
|
await self._send_to_rvs({
|
||||||
"type": "file_saved",
|
"type": "file_saved",
|
||||||
|
|
@ -1403,9 +1450,6 @@ class ARIABridge:
|
||||||
})
|
})
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning("[rvs] file_saved konnte nicht an App gesendet werden: %s", e)
|
logger.warning("[rvs] file_saved konnte nicht an App gesendet werden: %s", e)
|
||||||
else:
|
|
||||||
text = f"Stefan hat eine Datei gesendet ({file_name}, {file_type}) aber die Daten sind leer angekommen."
|
|
||||||
await self.send_to_core(text, source="app-file")
|
|
||||||
|
|
||||||
elif msg_type == "file_request":
|
elif msg_type == "file_request":
|
||||||
# App fordert eine Datei an (Re-Download nach Cache-Leerung)
|
# App fordert eine Datei an (Re-Download nach Cache-Leerung)
|
||||||
|
|
@ -1444,17 +1488,18 @@ class ARIABridge:
|
||||||
if not audio_b64:
|
if not audio_b64:
|
||||||
logger.warning("[rvs] Audio ohne Daten empfangen")
|
logger.warning("[rvs] Audio ohne Daten empfangen")
|
||||||
return
|
return
|
||||||
# Voice-Override fuer die kommende ARIA-Antwort (App-lokal gewaehlt)
|
# Voice-Override fuer Folgenachrichten — gleiche Semantik wie beim chat-Event.
|
||||||
voice_override = payload.get("voice", "")
|
if "voice" in payload:
|
||||||
if voice_override:
|
voice_override = payload.get("voice", "") or ""
|
||||||
self._next_voice_override = voice_override
|
self._next_voice_override = voice_override or None
|
||||||
logger.info("[rvs] Voice-Override (via Audio): %s", voice_override)
|
logger.info("[rvs] Voice fuer Antworten (via Audio): %s",
|
||||||
|
self._next_voice_override or "(Default)")
|
||||||
|
if "speed" in payload:
|
||||||
try:
|
try:
|
||||||
speed = float(payload.get("speed", 0) or 0)
|
speed = float(payload.get("speed", 0) or 0)
|
||||||
if 0.1 <= speed <= 5.0:
|
self._next_speed_override = speed if 0.1 <= speed <= 5.0 else None
|
||||||
self._next_speed_override = speed
|
|
||||||
except (TypeError, ValueError):
|
except (TypeError, ValueError):
|
||||||
pass
|
self._next_speed_override = None
|
||||||
logger.info("[rvs] Audio empfangen: %s, %dms, %dKB",
|
logger.info("[rvs] Audio empfangen: %s, %dms, %dKB",
|
||||||
mime_type, duration_ms, len(audio_b64) // 1365)
|
mime_type, duration_ms, len(audio_b64) // 1365)
|
||||||
asyncio.create_task(self._process_app_audio(audio_b64, mime_type))
|
asyncio.create_task(self._process_app_audio(audio_b64, mime_type))
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue