diff --git a/README.md b/README.md index 7e7b9dd..7db26df 100644 --- a/README.md +++ b/README.md @@ -406,10 +406,11 @@ mit ONNX Runtime — kein API-Key, kein Cloud-Roundtrip, kein Cent Lizenzgebuehr und das Audio verlaesst das Geraet nie. **Mitgelieferte Wake-Words** (ONNX-Dateien in `android/android/app/src/main/assets/openwakeword/`): -- `Hey Jarvis` (Default) -- `Alexa` -- `Hey Mycroft` -- `Hey Rhasspy` +- `Hey Jarvis` (Default, openWakeWord-Original) +- `Computer` (Star-Trek-Style, Community-Modell) +- `Alexa`, `Hey Mycroft`, `Hey Rhasspy` (openWakeWord-Originale) + +Community-Modelle stammen aus [fwartner/home-assistant-wakewords-collection](https://github.com/fwartner/home-assistant-wakewords-collection). **Bedienung:** - App → **Einstellungen** → **Wake-Word** → gewuenschtes Keyword waehlen → **Speichern + Aktivieren** diff --git a/android/android/app/src/main/assets/openwakeword/computer.onnx b/android/android/app/src/main/assets/openwakeword/computer.onnx new file mode 100644 index 0000000..519399e Binary files /dev/null and b/android/android/app/src/main/assets/openwakeword/computer.onnx differ diff --git a/android/android/app/src/main/java/com/ariacockpit/OpenWakeWordModule.kt b/android/android/app/src/main/java/com/ariacockpit/OpenWakeWordModule.kt index 50b456f..88897da 100644 --- a/android/android/app/src/main/java/com/ariacockpit/OpenWakeWordModule.kt +++ b/android/android/app/src/main/java/com/ariacockpit/OpenWakeWordModule.kt @@ -42,8 +42,8 @@ class OpenWakeWordModule(reactContext: ReactApplicationContext) : ReactContextBa private const val MEL_FRAMES_PER_EMBEDDING = 76 // Embedding-Fenster private const val EMBEDDING_STRIDE = 8 // Slide um 8 Mel-Frames private const val EMBEDDING_DIM = 96 - private const val WW_INPUT_FRAMES = 16 // 16 Embeddings = ~1.28s private const val MEL_BINS = 32 + private const val DEFAULT_WW_INPUT_FRAMES = 16 // Fallback wenn Modell-Metadata fehlt } private val env: OrtEnvironment = OrtEnvironment.getEnvironment() @@ -54,6 +54,10 @@ class OpenWakeWordModule(reactContext: ReactApplicationContext) : ReactContextBa private var melInputName: String = "input" private var embInputName: String = "input_1" private var wwInputName: String = "input" + // Anzahl Embedding-Frames die der Wake-Word-Klassifikator pro Inferenz erwartet — + // hey_jarvis hat 16, andere Community-Modelle koennen abweichen (z.B. 28). + // Wird beim init() aus den Modell-Metadaten gelesen. + private var wwInputFrames: Int = DEFAULT_WW_INPUT_FRAMES // Konfiguration private var threshold: Float = 0.5f @@ -100,7 +104,13 @@ class OpenWakeWordModule(reactContext: ReactApplicationContext) : ReactContextBa embInputName = embSession!!.inputNames.first() wwInputName = wwSession!!.inputNames.first() - Log.i(TAG, "Init OK: model=$modelName threshold=$threshold patience=$patience " + + // WW-Input-Frame-Count aus dem Modell lesen — variiert pro Keyword. + // Erwartete Form: (1, N, 96), N steht in der Modell-Metadaten. + val wwInputInfo = wwSession!!.inputInfo[wwInputName] + val wwShape = (wwInputInfo?.info as? ai.onnxruntime.TensorInfo)?.shape + wwInputFrames = wwShape?.getOrNull(1)?.toInt()?.takeIf { it > 0 } ?: DEFAULT_WW_INPUT_FRAMES + + Log.i(TAG, "Init OK: model=$modelName wwFrames=$wwInputFrames threshold=$threshold patience=$patience " + "debounce=${debounceMs}ms (inputs: mel=$melInputName emb=$embInputName ww=$wwInputName)") promise.resolve(true) } catch (e: Exception) { @@ -299,11 +309,12 @@ class OpenWakeWordModule(reactContext: ReactApplicationContext) : ReactContextBa val embRes = embSession!!.run(mapOf(embInputName to embIn)) val embOut = embRes.get(0).value embIn.close() - // Erwartete Output-Form: (1, 96) → Array + // Erwartete Output-Form: (1, 1, 1, 96) — rank-4, NICHT (1, 96). + // Die Google-Embedding-Pipeline behaelt extra Dimensionen. @Suppress("UNCHECKED_CAST") - val embArr = embOut as Array - embBuffer.addLast(embArr[0].copyOf()) - while (embBuffer.size > WW_INPUT_FRAMES) embBuffer.removeFirst() + val embArr = embOut as Array>> + embBuffer.addLast(embArr[0][0][0].copyOf()) + while (embBuffer.size > wwInputFrames) embBuffer.removeFirst() embRes.close() melProcessedIdx += EMBEDDING_STRIDE @@ -319,9 +330,10 @@ class OpenWakeWordModule(reactContext: ReactApplicationContext) : ReactContextBa } // 3) Klassifikation — sobald wir 16 Embeddings haben - if (embBuffer.size < WW_INPUT_FRAMES) return - val flatEmb = FloatArray(WW_INPUT_FRAMES * EMBEDDING_DIM) + if (embBuffer.size < wwInputFrames) return + val flatEmb = FloatArray(wwInputFrames * EMBEDDING_DIM) var p = 0 + // Letzte wwInputFrames Embeddings nehmen (embBuffer ist auf wwInputFrames begrenzt) for (e in embBuffer) { System.arraycopy(e, 0, flatEmb, p, EMBEDDING_DIM) p += EMBEDDING_DIM @@ -329,7 +341,7 @@ class OpenWakeWordModule(reactContext: ReactApplicationContext) : ReactContextBa val wwIn = OnnxTensor.createTensor( env, FloatBuffer.wrap(flatEmb), - longArrayOf(1L, WW_INPUT_FRAMES.toLong(), EMBEDDING_DIM.toLong()), + longArrayOf(1L, wwInputFrames.toLong(), EMBEDDING_DIM.toLong()), ) val wwRes = wwSession!!.run(mapOf(wwInputName to wwIn)) val wwOut = wwRes.get(0).value diff --git a/android/src/services/wakeword.ts b/android/src/services/wakeword.ts index e25d18f..548cadf 100644 --- a/android/src/services/wakeword.ts +++ b/android/src/services/wakeword.ts @@ -36,6 +36,7 @@ export const WAKE_KEYWORD_STORAGE = 'aria_wake_keyword'; * werden — Diagnostic-Upload ist Phase 2. */ export const WAKE_KEYWORDS = [ 'hey_jarvis', + 'computer', 'alexa', 'hey_mycroft', 'hey_rhasspy', @@ -46,6 +47,7 @@ export const DEFAULT_KEYWORD: WakeKeyword = 'hey_jarvis'; /** Hilfs-Mapping fuer die Anzeige im UI. */ export const KEYWORD_LABELS: Record = { hey_jarvis: 'Hey Jarvis', + computer: 'Computer', alexa: 'Alexa', hey_mycroft: 'Hey Mycroft', hey_rhasspy: 'Hey Rhasspy',