Compare commits
9 Commits
20e623dc37
...
v0.1.9.2
| Author | SHA1 | Date | |
|---|---|---|---|
| 095a10aaf0 | |||
| e3a224478d | |||
| 61c9183033 | |||
| e04bbef361 | |||
| e82e07e3a2 | |||
| 886b4409d2 | |||
| bcea49365d | |||
| 05eb7ed144 | |||
| ddfc4261e5 |
@@ -79,8 +79,8 @@ android {
|
||||
applicationId "com.ariacockpit"
|
||||
minSdkVersion rootProject.ext.minSdkVersion
|
||||
targetSdkVersion rootProject.ext.targetSdkVersion
|
||||
versionCode 10900
|
||||
versionName "0.1.9.0"
|
||||
versionCode 10902
|
||||
versionName "0.1.9.2"
|
||||
// Fallback fuer Libraries mit Product Flavors
|
||||
missingDimensionStrategy 'react-native-camera', 'general'
|
||||
}
|
||||
|
||||
@@ -7,11 +7,16 @@ import android.Manifest
|
||||
import android.content.Context
|
||||
import android.content.pm.PackageManager
|
||||
import android.media.AudioFormat
|
||||
import android.media.AudioManager
|
||||
import android.media.AudioRecord
|
||||
import android.media.AudioRecordingConfiguration
|
||||
import android.media.MediaRecorder
|
||||
import android.media.audiofx.AcousticEchoCanceler
|
||||
import android.media.audiofx.AutomaticGainControl
|
||||
import android.media.audiofx.NoiseSuppressor
|
||||
import android.os.Build
|
||||
import android.os.Handler
|
||||
import android.os.Looper
|
||||
import android.os.PowerManager
|
||||
import android.util.Log
|
||||
import androidx.core.content.ContextCompat
|
||||
@@ -49,6 +54,12 @@ class OpenWakeWordModule(reactContext: ReactApplicationContext) : ReactContextBa
|
||||
private const val EMBEDDING_DIM = 96
|
||||
private const val MEL_BINS = 32
|
||||
private const val DEFAULT_WW_INPUT_FRAMES = 16 // Fallback wenn Modell-Metadata fehlt
|
||||
// Nach record.startRecording() erzeugt das Mikro fuer ~1s einen Spin-up-Spike
|
||||
// (DC-Offset, AGC-Settling) der vom Wake-Word-Klassifikator faelschlich als
|
||||
// Trigger eingestuft werden kann. Folge: App pausiert beim Oeffnen die Musik,
|
||||
// weil der False-Positive die AudioFocus-Switch-Logik anwirft (Stefan-Bug 06/2026).
|
||||
// Loesung: in dieser Phase keine Detections an JS weiterleiten.
|
||||
private const val STARTUP_SUPPRESSION_MS = 1500L
|
||||
}
|
||||
|
||||
private val env: OrtEnvironment = OrtEnvironment.getEnvironment()
|
||||
@@ -95,6 +106,22 @@ class OpenWakeWordModule(reactContext: ReactApplicationContext) : ReactContextBa
|
||||
private val embBuffer: ArrayDeque<FloatArray> = ArrayDeque(32) // Ringpuffer letzter Embeddings
|
||||
private var consecutiveAboveThreshold: Int = 0
|
||||
private var lastDetectionMs: Long = 0L
|
||||
// Zeitpunkt des letzten startRecording — fuer STARTUP_SUPPRESSION_MS-Fenster
|
||||
private var recordingStartedMs: Long = 0L
|
||||
|
||||
// Audio-Sharing mit anderen Apps:
|
||||
// Wenn z.B. WhatsApp eine Sprachnachricht aufnimmt, dann hält ARIAs
|
||||
// VOICE_COMMUNICATION-Lock zwar das System nicht offiziell exklusiv,
|
||||
// aber die Foreground-App bekommt nur Stille — die WhatsApp-Aufnahme
|
||||
// ist tonlos. Loesung: AudioRecordingCallback hoeren, sobald eine andere
|
||||
// App das Mic anfordert → unsere AudioRecord freigeben (externallyPaused=true).
|
||||
// Wenn die andere App fertig ist → reaktivieren. Wakeword pausiert solange.
|
||||
private var recordingCallback: AudioManager.AudioRecordingCallback? = null
|
||||
@Volatile private var externallyPaused: Boolean = false
|
||||
private val mainHandler: Handler by lazy { Handler(Looper.getMainLooper()) }
|
||||
private val audioManager: AudioManager by lazy {
|
||||
reactApplicationContext.getSystemService(Context.AUDIO_SERVICE) as AudioManager
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialisiert die ONNX-Sessions fuer ein bestimmtes Wake-Word.
|
||||
@@ -159,53 +186,7 @@ class OpenWakeWordModule(reactContext: ReactApplicationContext) : ReactContextBa
|
||||
}
|
||||
|
||||
try {
|
||||
val minBuf = AudioRecord.getMinBufferSize(
|
||||
SAMPLE_RATE,
|
||||
AudioFormat.CHANNEL_IN_MONO,
|
||||
AudioFormat.ENCODING_PCM_16BIT,
|
||||
).coerceAtLeast(CHUNK_SAMPLES * 2 * 4)
|
||||
|
||||
// VOICE_COMMUNICATION-Source: aktiviert auf den meisten Android-Geraeten
|
||||
// automatisch Echo-Cancellation + Noise-Suppression. Wichtig damit
|
||||
// ARIAs eigene Stimme nicht das Wake-Word triggert wenn parallel
|
||||
// zur TTS-Wiedergabe gelauscht wird.
|
||||
val record = AudioRecord(
|
||||
MediaRecorder.AudioSource.VOICE_COMMUNICATION,
|
||||
SAMPLE_RATE,
|
||||
AudioFormat.CHANNEL_IN_MONO,
|
||||
AudioFormat.ENCODING_PCM_16BIT,
|
||||
minBuf,
|
||||
)
|
||||
if (record.state != AudioRecord.STATE_INITIALIZED) {
|
||||
record.release()
|
||||
promise.reject("AUDIO_INIT", "AudioRecord nicht initialisiert (Mikro belegt?)")
|
||||
return
|
||||
}
|
||||
audioRecord = record
|
||||
|
||||
// Audio-Effects ZUSAETZLICH explizit aktivieren — manche Geraete
|
||||
// benoetigen das, obwohl VOICE_COMMUNICATION es eigentlich schon
|
||||
// mitbringt. Failure ist nicht kritisch (continue ohne Effects).
|
||||
try {
|
||||
if (AcousticEchoCanceler.isAvailable()) {
|
||||
aec = AcousticEchoCanceler.create(record.audioSessionId)?.apply { enabled = true }
|
||||
Log.i(TAG, "AEC aktiviert (enabled=${aec?.enabled})")
|
||||
}
|
||||
} catch (e: Exception) { Log.w(TAG, "AEC failed: ${e.message}") }
|
||||
try {
|
||||
if (NoiseSuppressor.isAvailable()) {
|
||||
ns = NoiseSuppressor.create(record.audioSessionId)?.apply { enabled = true }
|
||||
}
|
||||
} catch (e: Exception) { Log.w(TAG, "NS failed: ${e.message}") }
|
||||
try {
|
||||
if (AutomaticGainControl.isAvailable()) {
|
||||
agc = AutomaticGainControl.create(record.audioSessionId)?.apply { enabled = true }
|
||||
}
|
||||
} catch (e: Exception) { Log.w(TAG, "AGC failed: ${e.message}") }
|
||||
|
||||
resetInferenceState()
|
||||
running.set(true)
|
||||
record.startRecording()
|
||||
acquireAndStartRecording()
|
||||
|
||||
// PARTIAL_WAKE_LOCK greifen damit die CPU nicht in Doze geht und
|
||||
// die JS-Bridge die emit("WakeWordDetected")-Events live verarbeitet.
|
||||
@@ -222,10 +203,10 @@ class OpenWakeWordModule(reactContext: ReactApplicationContext) : ReactContextBa
|
||||
Log.w(TAG, "WakeLock acquire fehlgeschlagen: ${e.message}")
|
||||
}
|
||||
|
||||
captureThread = Thread({ captureLoop() }, "OpenWakeWordCapture").apply {
|
||||
isDaemon = true
|
||||
start()
|
||||
}
|
||||
// AudioRecordingCallback registrieren: andere Apps (WhatsApp-
|
||||
// Sprachnachricht, Telefonate etc.) wollen das Mic — wir geben
|
||||
// es vorruebergehend frei statt sie ins Leere recorden zu lassen.
|
||||
registerRecordingCallback()
|
||||
|
||||
Log.i(TAG, "Lauschen gestartet (model=$modelName)")
|
||||
promise.resolve(true)
|
||||
@@ -238,6 +219,75 @@ class OpenWakeWordModule(reactContext: ReactApplicationContext) : ReactContextBa
|
||||
}
|
||||
}
|
||||
|
||||
/** Reine AudioRecord + Effects + Capture-Thread-Acquisition. Wirft bei
|
||||
* Fehler — Caller faengt + reportet. Kein WakeLock, keine Callbacks. */
|
||||
private fun acquireAndStartRecording() {
|
||||
val minBuf = AudioRecord.getMinBufferSize(
|
||||
SAMPLE_RATE,
|
||||
AudioFormat.CHANNEL_IN_MONO,
|
||||
AudioFormat.ENCODING_PCM_16BIT,
|
||||
).coerceAtLeast(CHUNK_SAMPLES * 2 * 4)
|
||||
|
||||
// VOICE_COMMUNICATION-Source: aktiviert auf den meisten Android-Geraeten
|
||||
// automatisch Echo-Cancellation + Noise-Suppression. Wichtig damit
|
||||
// ARIAs eigene Stimme nicht das Wake-Word triggert wenn parallel
|
||||
// zur TTS-Wiedergabe gelauscht wird.
|
||||
val record = AudioRecord(
|
||||
MediaRecorder.AudioSource.VOICE_COMMUNICATION,
|
||||
SAMPLE_RATE,
|
||||
AudioFormat.CHANNEL_IN_MONO,
|
||||
AudioFormat.ENCODING_PCM_16BIT,
|
||||
minBuf,
|
||||
)
|
||||
if (record.state != AudioRecord.STATE_INITIALIZED) {
|
||||
record.release()
|
||||
throw IllegalStateException("AudioRecord nicht initialisiert (Mikro belegt?)")
|
||||
}
|
||||
audioRecord = record
|
||||
|
||||
// Audio-Effects ZUSAETZLICH explizit aktivieren — manche Geraete
|
||||
// benoetigen das, obwohl VOICE_COMMUNICATION es eigentlich schon
|
||||
// mitbringt. Failure ist nicht kritisch (continue ohne Effects).
|
||||
try {
|
||||
if (AcousticEchoCanceler.isAvailable()) {
|
||||
aec = AcousticEchoCanceler.create(record.audioSessionId)?.apply { enabled = true }
|
||||
Log.i(TAG, "AEC aktiviert (enabled=${aec?.enabled})")
|
||||
}
|
||||
} catch (e: Exception) { Log.w(TAG, "AEC failed: ${e.message}") }
|
||||
try {
|
||||
if (NoiseSuppressor.isAvailable()) {
|
||||
ns = NoiseSuppressor.create(record.audioSessionId)?.apply { enabled = true }
|
||||
}
|
||||
} catch (e: Exception) { Log.w(TAG, "NS failed: ${e.message}") }
|
||||
try {
|
||||
if (AutomaticGainControl.isAvailable()) {
|
||||
agc = AutomaticGainControl.create(record.audioSessionId)?.apply { enabled = true }
|
||||
}
|
||||
} catch (e: Exception) { Log.w(TAG, "AGC failed: ${e.message}") }
|
||||
|
||||
resetInferenceState()
|
||||
running.set(true)
|
||||
record.startRecording()
|
||||
recordingStartedMs = System.currentTimeMillis()
|
||||
|
||||
captureThread = Thread({ captureLoop() }, "OpenWakeWordCapture").apply {
|
||||
isDaemon = true
|
||||
start()
|
||||
}
|
||||
}
|
||||
|
||||
/** Reine AudioRecord + Effects + Capture-Thread-Release. Sicher (catch all).
|
||||
* Kein WakeLock-Release, kein Unregistrieren der Callbacks. */
|
||||
private fun stopAndReleaseRecording() {
|
||||
running.set(false)
|
||||
try { captureThread?.join(1500) } catch (_: InterruptedException) {}
|
||||
captureThread = null
|
||||
try { audioRecord?.stop() } catch (_: Exception) {}
|
||||
try { audioRecord?.release() } catch (_: Exception) {}
|
||||
audioRecord = null
|
||||
releaseAudioEffects()
|
||||
}
|
||||
|
||||
private fun releaseAudioEffects() {
|
||||
try { aec?.release() } catch (_: Exception) {}
|
||||
try { ns?.release() } catch (_: Exception) {}
|
||||
@@ -247,15 +297,9 @@ class OpenWakeWordModule(reactContext: ReactApplicationContext) : ReactContextBa
|
||||
|
||||
@ReactMethod
|
||||
fun stop(promise: Promise) {
|
||||
running.set(false)
|
||||
try {
|
||||
captureThread?.join(1500)
|
||||
} catch (_: InterruptedException) {}
|
||||
captureThread = null
|
||||
try { audioRecord?.stop() } catch (_: Exception) {}
|
||||
try { audioRecord?.release() } catch (_: Exception) {}
|
||||
audioRecord = null
|
||||
releaseAudioEffects()
|
||||
unregisterRecordingCallback()
|
||||
externallyPaused = false
|
||||
stopAndReleaseRecording()
|
||||
releaseWakeLock()
|
||||
Log.i(TAG, "Lauschen gestoppt")
|
||||
promise.resolve(true)
|
||||
@@ -263,18 +307,94 @@ class OpenWakeWordModule(reactContext: ReactApplicationContext) : ReactContextBa
|
||||
|
||||
@ReactMethod
|
||||
fun dispose(promise: Promise) {
|
||||
running.set(false)
|
||||
try { captureThread?.join(1000) } catch (_: InterruptedException) {}
|
||||
captureThread = null
|
||||
try { audioRecord?.stop() } catch (_: Exception) {}
|
||||
try { audioRecord?.release() } catch (_: Exception) {}
|
||||
audioRecord = null
|
||||
releaseAudioEffects()
|
||||
unregisterRecordingCallback()
|
||||
externallyPaused = false
|
||||
stopAndReleaseRecording()
|
||||
releaseWakeLock()
|
||||
disposeSessions()
|
||||
promise.resolve(true)
|
||||
}
|
||||
|
||||
// ── External-Mic-Sharing (AudioRecordingCallback) ──────────────────────
|
||||
//
|
||||
// Wenn eine andere App das Mic anfordert (WhatsApp-Voicenote, Telefonie,
|
||||
// Sprach-Suche im Browser etc.), kriegt die zwar formal Audio — aber
|
||||
// unsere VOICE_COMMUNICATION-Pipeline blockiert die naively neue Aufnahme
|
||||
// mit Stille (Android-Audio-Policy). Loesung: AudioRecordingCallback
|
||||
// beobachten, andere Recorder-Sessions detecten, und unsere Pipeline
|
||||
// temporaer freigeben. Sobald die andere App fertig ist → reaktivieren.
|
||||
//
|
||||
// Effekt: Wake-Word funktioniert solange nicht — fairer Kompromiss.
|
||||
|
||||
private fun registerRecordingCallback() {
|
||||
if (recordingCallback != null) return
|
||||
if (Build.VERSION.SDK_INT < Build.VERSION_CODES.N) {
|
||||
Log.i(TAG, "AudioRecordingCallback nicht verfuegbar (API < 24) — Mic-Sharing inaktiv")
|
||||
return
|
||||
}
|
||||
val cb = object : AudioManager.AudioRecordingCallback() {
|
||||
override fun onRecordingConfigChanged(configs: MutableList<AudioRecordingConfiguration>?) {
|
||||
handleRecordingConfigChange(configs)
|
||||
}
|
||||
}
|
||||
try {
|
||||
audioManager.registerAudioRecordingCallback(cb, mainHandler)
|
||||
recordingCallback = cb
|
||||
Log.i(TAG, "AudioRecordingCallback registriert — beobachtet andere Mic-User")
|
||||
} catch (e: Exception) {
|
||||
Log.w(TAG, "registerAudioRecordingCallback failed: ${e.message}")
|
||||
}
|
||||
}
|
||||
|
||||
private fun unregisterRecordingCallback() {
|
||||
val cb = recordingCallback ?: return
|
||||
try { audioManager.unregisterAudioRecordingCallback(cb) } catch (_: Exception) {}
|
||||
recordingCallback = null
|
||||
}
|
||||
|
||||
private fun handleRecordingConfigChange(configs: MutableList<AudioRecordingConfiguration>?) {
|
||||
if (configs == null) return
|
||||
// Unsere eigene Session anhand der audioSessionId filtern. Wenn wir
|
||||
// gerade keinen AudioRecord halten (externallyPaused), ist alles
|
||||
// andere "extern" — dann zaehlt jeder Eintrag.
|
||||
val ourSessionId = audioRecord?.audioSessionId
|
||||
val externalActive = configs.any {
|
||||
ourSessionId == null || it.clientAudioSessionId != ourSessionId
|
||||
}
|
||||
if (running.get() && externalActive) {
|
||||
Log.i(TAG, "Andere App nutzt Mic — Wake-Word pausiert (configs=${configs.size})")
|
||||
externallyPaused = true
|
||||
stopAndReleaseRecording()
|
||||
return
|
||||
}
|
||||
if (externallyPaused && !externalActive) {
|
||||
Log.i(TAG, "Mic wieder frei — Wake-Word reaktiviert in 300ms")
|
||||
// Kurze Pause: der "andere" hat eben losgelassen, Audio-Stack braucht
|
||||
// ein paar ms bis VOICE_COMMUNICATION wieder sauber initialisiert.
|
||||
mainHandler.postDelayed({
|
||||
if (!externallyPaused) return@postDelayed // schon resumed
|
||||
// Sicherheitscheck: wenn inzwischen jemand wieder rein ist
|
||||
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.N) {
|
||||
val cur = audioManager.activeRecordingConfigurations
|
||||
if (cur != null && cur.isNotEmpty()) {
|
||||
Log.i(TAG, "Resume verworfen — anderer Mic-User noch da (${cur.size})")
|
||||
return@postDelayed
|
||||
}
|
||||
}
|
||||
externallyPaused = false
|
||||
try {
|
||||
acquireAndStartRecording()
|
||||
Log.i(TAG, "Wake-Word nach External-Pause reaktiviert")
|
||||
} catch (e: Exception) {
|
||||
Log.w(TAG, "Resume nach External-Pause failed: ${e.message}")
|
||||
// bleiben unten — falls anderer App das Mic doch wieder
|
||||
// freigibt, feuert der Callback erneut.
|
||||
externallyPaused = true
|
||||
}
|
||||
}, 300L)
|
||||
}
|
||||
}
|
||||
|
||||
private fun releaseWakeLock() {
|
||||
try {
|
||||
wakeLock?.takeIf { it.isHeld }?.release()
|
||||
@@ -313,6 +433,11 @@ class OpenWakeWordModule(reactContext: ReactApplicationContext) : ReactContextBa
|
||||
}
|
||||
|
||||
private fun emitDetected() {
|
||||
val sinceStart = System.currentTimeMillis() - recordingStartedMs
|
||||
if (sinceStart in 0 until STARTUP_SUPPRESSION_MS) {
|
||||
Log.i(TAG, "Wake-Word emit unterdrueckt (sinceStart=${sinceStart}ms < ${STARTUP_SUPPRESSION_MS}ms — Mikro-Spin-up-Spike)")
|
||||
return
|
||||
}
|
||||
val params = com.facebook.react.bridge.Arguments.createMap().apply {
|
||||
putString("model", modelName)
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "aria-cockpit",
|
||||
"version": "0.1.9.0",
|
||||
"version": "0.1.9.2",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"android": "react-native run-android",
|
||||
|
||||
@@ -23,6 +23,7 @@ import {
|
||||
} from 'react-native';
|
||||
|
||||
import brainApi, { Trigger } from '../services/brainApi';
|
||||
import rvs from '../services/rvs';
|
||||
|
||||
const COL_ACTIVE = '#34C759';
|
||||
const COL_INACTIVE = '#555570';
|
||||
@@ -65,6 +66,17 @@ export const TriggerBrowser: React.FC = () => {
|
||||
|
||||
useEffect(() => { load(); }, [load]);
|
||||
|
||||
// Auto-Reload bei RVS-Reconnect — sonst zeigt die Liste den Fast-Fail-
|
||||
// Fehler aus brainApi ewig an obwohl die Verbindung schon wieder da ist.
|
||||
useEffect(() => {
|
||||
const unsub = rvs.onStateChange((state) => {
|
||||
if (state === 'connected') {
|
||||
load();
|
||||
}
|
||||
});
|
||||
return () => unsub();
|
||||
}, [load]);
|
||||
|
||||
const visible = items.filter(t => {
|
||||
if (filter === 'active') return t.active;
|
||||
if (filter === 'inactive') return !t.active;
|
||||
|
||||
@@ -522,8 +522,9 @@ const ChatScreen: React.FC = () => {
|
||||
const sub = AppState.addEventListener('change', (next) => {
|
||||
if (next === 'background' || next === 'inactive') {
|
||||
lastBackgroundAt = Date.now();
|
||||
wakeWordService.setBackground();
|
||||
} else if (lastState !== 'active' && next === 'active') {
|
||||
wakeWordService.setResumeCooldown(3000);
|
||||
wakeWordService.setForeground();
|
||||
const bgDur = lastBackgroundAt > 0 ? Date.now() - lastBackgroundAt : 0;
|
||||
// Bei laengerer Hintergrund-Zeit (>30s): pruefen ob ein frisches
|
||||
// Wake-Word getriggert wurde wahrend die App weg war — wenn ja,
|
||||
|
||||
@@ -21,9 +21,37 @@ import {
|
||||
PermissionsAndroid,
|
||||
useWindowDimensions,
|
||||
DeviceEventEmitter,
|
||||
NativeModules,
|
||||
} from 'react-native';
|
||||
import AsyncStorage from '@react-native-async-storage/async-storage';
|
||||
import RNFS from 'react-native-fs';
|
||||
|
||||
const { FileOpener } = NativeModules as {
|
||||
FileOpener?: { open: (filePath: string, mimeType: string) => Promise<boolean> };
|
||||
};
|
||||
|
||||
// MIME-Type aus Dateinamen schaetzen — fuer den FileOpener-Intent. Android
|
||||
// nutzt den MIME-Type um die passende App zu finden. Unknown → octet-stream.
|
||||
function guessMimeFromName(name: string): string {
|
||||
const lower = name.toLowerCase();
|
||||
if (lower.endsWith('.pdf')) return 'application/pdf';
|
||||
if (lower.endsWith('.jpg') || lower.endsWith('.jpeg')) return 'image/jpeg';
|
||||
if (lower.endsWith('.png')) return 'image/png';
|
||||
if (lower.endsWith('.gif')) return 'image/gif';
|
||||
if (lower.endsWith('.webp')) return 'image/webp';
|
||||
if (lower.endsWith('.mp3')) return 'audio/mpeg';
|
||||
if (lower.endsWith('.wav')) return 'audio/wav';
|
||||
if (lower.endsWith('.ogg') || lower.endsWith('.opus')) return 'audio/ogg';
|
||||
if (lower.endsWith('.mp4') || lower.endsWith('.m4a')) return 'audio/mp4';
|
||||
if (lower.endsWith('.webm')) return 'video/webm';
|
||||
if (lower.endsWith('.txt')) return 'text/plain';
|
||||
if (lower.endsWith('.md')) return 'text/markdown';
|
||||
if (lower.endsWith('.json')) return 'application/json';
|
||||
if (lower.endsWith('.csv')) return 'text/csv';
|
||||
if (lower.endsWith('.html') || lower.endsWith('.htm')) return 'text/html';
|
||||
if (lower.endsWith('.zip')) return 'application/zip';
|
||||
return 'application/octet-stream';
|
||||
}
|
||||
import DocumentPicker from 'react-native-document-picker';
|
||||
import rvs, { ConnectionState, RVSMessage, ConnectionConfig, ConnectionLogEntry } from '../services/rvs';
|
||||
import {
|
||||
@@ -514,9 +542,11 @@ const SettingsScreen: React.FC = () => {
|
||||
if (message.type === ('file_response' as any)) {
|
||||
const p: any = message.payload || {};
|
||||
const reqId = (p.requestId as string) || '';
|
||||
if (!reqId.startsWith('single-')) return; // nicht unsere Anfrage
|
||||
const isDownload = reqId.startsWith('single-');
|
||||
const isOpen = reqId.startsWith('open-');
|
||||
if (!isDownload && !isOpen) return; // andere Caller (ChatScreen etc.)
|
||||
if (p.error) {
|
||||
ToastAndroid.show('Download fehlgeschlagen: ' + p.error, ToastAndroid.LONG);
|
||||
ToastAndroid.show((isOpen ? 'Öffnen' : 'Download') + ' fehlgeschlagen: ' + p.error, ToastAndroid.LONG);
|
||||
return;
|
||||
}
|
||||
const b64 = (p.base64 as string) || '';
|
||||
@@ -526,10 +556,28 @@ const SettingsScreen: React.FC = () => {
|
||||
'aria-download';
|
||||
(async () => {
|
||||
try {
|
||||
if (isOpen) {
|
||||
// Open-Pfad: nach Caches schreiben + per FileOpener mit System-
|
||||
// Viewer oeffnen. Caches damit der Speicher kein Dauer-Muell wird.
|
||||
const dir = RNFS.CachesDirectoryPath;
|
||||
const target = `${dir}/${fileName}`;
|
||||
await RNFS.writeFile(target, b64, 'base64');
|
||||
const mime = (p.mimeType as string) || guessMimeFromName(fileName);
|
||||
if (FileOpener?.open) {
|
||||
try {
|
||||
await FileOpener.open(target, mime);
|
||||
} catch (e: any) {
|
||||
ToastAndroid.show('Öffnen fehlgeschlagen: ' + (e?.message || e), ToastAndroid.LONG);
|
||||
}
|
||||
} else {
|
||||
ToastAndroid.show('FileOpener-Modul nicht verfügbar — APK neu bauen', ToastAndroid.LONG);
|
||||
}
|
||||
return;
|
||||
}
|
||||
// Download-Pfad: nach Downloads-Ordner schreiben, mit Suffix bei
|
||||
// Namens-Konflikt damit nichts ueberschrieben wird.
|
||||
const dir = RNFS.DownloadDirectoryPath;
|
||||
const filePath = `${dir}/${fileName}`;
|
||||
// Falls Datei schon existiert: Suffix anhaengen damit nichts
|
||||
// ueberschrieben wird.
|
||||
let target = filePath;
|
||||
let i = 1;
|
||||
while (await RNFS.exists(target)) {
|
||||
@@ -660,6 +708,20 @@ const SettingsScreen: React.FC = () => {
|
||||
};
|
||||
}, []);
|
||||
|
||||
// Datei-Manager: Auto-Reload bei RVS-Reconnect — sonst zeigt das offene
|
||||
// Modal den Fehler "Connection refused" ewig an, obwohl die Verbindung
|
||||
// schon wieder da ist. Triggered nur wenn das Modal gerade offen ist.
|
||||
useEffect(() => {
|
||||
const unsub = rvs.onStateChange((state) => {
|
||||
if (state === 'connected' && fileManagerOpen) {
|
||||
setFileManagerError('');
|
||||
setFileManagerLoading(true);
|
||||
rvs.send('file_list_request' as any, {});
|
||||
}
|
||||
});
|
||||
return () => unsub();
|
||||
}, [fileManagerOpen]);
|
||||
|
||||
// --- QR-Code scannen ---
|
||||
|
||||
const openQRScanner = useCallback(() => {
|
||||
@@ -1040,6 +1102,30 @@ const SettingsScreen: React.FC = () => {
|
||||
{fmtSize(f.size)} · {new Date(f.mtime).toLocaleString('de-DE')}
|
||||
</Text>
|
||||
</View>
|
||||
<TouchableOpacity
|
||||
onPress={() => {
|
||||
rvs.send('file_request' as any, {
|
||||
serverPath: f.path,
|
||||
requestId: 'open-' + Date.now(),
|
||||
});
|
||||
ToastAndroid.show('Öffne ' + f.name + '…', ToastAndroid.SHORT);
|
||||
}}
|
||||
style={{padding:8}}
|
||||
>
|
||||
<Text style={{color:'#0096FF', fontSize:18}}>👁</Text>
|
||||
</TouchableOpacity>
|
||||
<TouchableOpacity
|
||||
onPress={() => {
|
||||
rvs.send('file_request' as any, {
|
||||
serverPath: f.path,
|
||||
requestId: 'single-' + Date.now(),
|
||||
});
|
||||
ToastAndroid.show('Download läuft…', ToastAndroid.SHORT);
|
||||
}}
|
||||
style={{padding:8}}
|
||||
>
|
||||
<Text style={{color:'#34C759', fontSize:18}}>⬇</Text>
|
||||
</TouchableOpacity>
|
||||
<TouchableOpacity
|
||||
onPress={() => {
|
||||
// path-relativ-zu-uploads = nur der Dateiname,
|
||||
|
||||
@@ -77,6 +77,15 @@ interface SendOpts {
|
||||
|
||||
function _send(path: string, opts: SendOpts = {}): Promise<AnyJson> {
|
||||
_ensureListener();
|
||||
// Fast-Fail wenn RVS nicht verbunden — sonst tickt der Timeout 30s und
|
||||
// der TriggerBrowser / Dateimanager zeigt ne ewig drehende Spinner.
|
||||
// Stefan-Bug 06/2026: "Connection refused, App haengt 30 Sekunden".
|
||||
const rvsState = rvs.getState();
|
||||
if (rvsState !== 'connected') {
|
||||
return Promise.reject(new Error(
|
||||
`Keine Verbindung zum Brain (RVS: ${rvsState}). Warte auf Reconnect...`,
|
||||
));
|
||||
}
|
||||
return new Promise((resolve, reject) => {
|
||||
const requestId = _newRequestId();
|
||||
const timer = setTimeout(() => {
|
||||
|
||||
@@ -91,6 +91,18 @@ class WakeWordService {
|
||||
* ein false-positive war (Wake-Word im Hintergrund getriggert waehrend
|
||||
* Stefan gar nicht in der App war). */
|
||||
private lastTriggerAt: number = 0;
|
||||
/** App liegt im Hintergrund — alle Detections sperren. Wird vom
|
||||
* AppState-Listener im ChatScreen via setBackground/setForeground gesetzt.
|
||||
* Hintergrund-Detections sind quasi immer false-positives (TV, Husten,
|
||||
* AudioFocus-Switch beim Wechsel zu Musik etc.). */
|
||||
private inBackground: boolean = false;
|
||||
/** Re-Entry-Guard fuer onWakeDetected: native kann mehrere
|
||||
* WakeWordDetected-Events emitten BEVOR OpenWakeWord.stop() in JS
|
||||
* resolved (Bridge-Queue + Doze-Backlog). Mit dem Flag wird das zweite
|
||||
* Event sofort verworfen. Reset beim Verlassen von 'conversing'.
|
||||
* Ausnahme: bargeListening → Barge-In ist ein legitimer neuer Trigger
|
||||
* waehrend ARIA noch redet, NICHT vom Guard blockieren. */
|
||||
private detectionInProgress: boolean = false;
|
||||
|
||||
private keyword: WakeKeyword = DEFAULT_KEYWORD;
|
||||
private nativeReady: boolean = false;
|
||||
@@ -228,14 +240,44 @@ class WakeWordService {
|
||||
console.log('[WakeWord] Cooldown aktiv fuer %dms', ms);
|
||||
}
|
||||
|
||||
/** App in den Hintergrund: alle Wake-Word-Detections sperren.
|
||||
* Im Hintergrund will Stefan praktisch nie einen neuen Dialog starten —
|
||||
* was als „Wake-Word" reinkommt ist Husten/TV/AudioFocus-Switch. */
|
||||
setBackground(): void {
|
||||
this.inBackground = true;
|
||||
console.log('[WakeWord] App im Hintergrund — Detections gesperrt');
|
||||
}
|
||||
|
||||
/** App im Vordergrund: Detections wieder freigeben, plus 3s Cooldown
|
||||
* als Schutz gegen den AudioFocus-/AudioTrack-Spike der direkt nach
|
||||
* dem Resume kommt. Ersetzt das alte setResumeCooldown(3000)-Pattern. */
|
||||
setForeground(): void {
|
||||
this.inBackground = false;
|
||||
this.cooldownUntilMs = Date.now() + 3000;
|
||||
console.log('[WakeWord] App im Vordergrund — Cooldown 3s aktiv');
|
||||
}
|
||||
|
||||
/** Wake-Word getriggert: Native-Modul pausieren, Konversation starten. */
|
||||
private async onWakeDetected(): Promise<void> {
|
||||
if (this.inBackground) {
|
||||
console.log('[WakeWord] Trigger ignoriert (App im Hintergrund)');
|
||||
import('./logger').then(m => m.reportAppDebug('wake.detect', 'ignored: app in background')).catch(()=>{});
|
||||
return;
|
||||
}
|
||||
// Re-Entry-Guard: blocken wenn ein Detection-Zyklus schon laeuft.
|
||||
// Ausnahme: Barge-In waehrend ARIA-TTS ist ein legitimer neuer Trigger.
|
||||
if (this.detectionInProgress && !this.bargeListening) {
|
||||
console.log('[WakeWord] Trigger ignoriert (Detection-Zyklus laeuft schon — Native-Doppel-Event-Race)');
|
||||
import('./logger').then(m => m.reportAppDebug('wake.detect', 'ignored: detectionInProgress')).catch(()=>{});
|
||||
return;
|
||||
}
|
||||
const now = Date.now();
|
||||
if (now < this.cooldownUntilMs) {
|
||||
const left = this.cooldownUntilMs - now;
|
||||
console.log('[WakeWord] Trigger ignoriert (Cooldown noch %dms aktiv — wahrscheinlich App-Resume-Spike)', left);
|
||||
return;
|
||||
}
|
||||
this.detectionInProgress = true;
|
||||
console.log('[WakeWord] Wake-Word "%s" erkannt! (state=%s, barge=%s)',
|
||||
this.keyword, this.state, this.bargeListening);
|
||||
import('./logger').then(m => m.reportAppDebug('wake.detect',
|
||||
@@ -503,7 +545,12 @@ class WakeWordService {
|
||||
|
||||
private setState(state: WakeWordState): void {
|
||||
if (this.state !== state) {
|
||||
const wasConversing = this.state === 'conversing';
|
||||
this.state = state;
|
||||
// Re-Entry-Guard freigeben sobald wir 'conversing' verlassen — Zyklus ist durch
|
||||
if (wasConversing && state !== 'conversing') {
|
||||
this.detectionInProgress = false;
|
||||
}
|
||||
this.stateCallbacks.forEach(cb => cb(state));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -127,6 +127,25 @@ META_TOOLS = [
|
||||
"items": {"type": "object"},
|
||||
"description": "Argumente-Schema [{name, type, required, description}]",
|
||||
},
|
||||
"fast_patterns": {
|
||||
"type": "array",
|
||||
"items": {"type": "object"},
|
||||
"description": (
|
||||
"OPTIONAL — fuer 'reines Steuern'-Skills (Licht an/aus, Spotify "
|
||||
"pause/next, Rollade hoch/runter etc.) eine Liste von "
|
||||
"[{match, args, reply}] eintragen. Wenn ein User-Befehl gegen "
|
||||
"match (anchored Regex, case-insensitive) matched, ruft das "
|
||||
"Brain run_skill(name, args) DIREKT auf und gibt reply zurueck — "
|
||||
"ohne Claude (~5s Latenz gespart). Match wird gegen den "
|
||||
"normalisierten Text (lowercase, Endsatzzeichen weg) gemacht; "
|
||||
"schreibe Patterns mit ^...$ damit nur exakte Befehle matchen "
|
||||
"und nicht Teilstrings (z.B. ^pause$ statt pause). NICHT fuer "
|
||||
"Skills mit kreativem Output / parametrisierter Logik — die "
|
||||
"brauchen Claude. Beispiel: "
|
||||
"[{\"match\":\"^pause$\",\"args\":{\"path\":\"/v1/me/player/pause\",\"method\":\"PUT\"},"
|
||||
"\"reply\":\"Spotify: pausiert ⏸\"}]"
|
||||
),
|
||||
},
|
||||
},
|
||||
"required": ["name", "description", "entry_code"],
|
||||
},
|
||||
@@ -193,6 +212,16 @@ META_TOOLS = [
|
||||
"Setzt Stefan in Diagnostic; Skill bekommt CFG_<NAME> ENV."
|
||||
),
|
||||
},
|
||||
"fast_patterns": {
|
||||
"type": "array",
|
||||
"items": {"type": "object"},
|
||||
"description": (
|
||||
"Optional komplette Fast-Path-Patterns-Liste UEBERSCHREIBEN — "
|
||||
"[{match, args, reply}]. Siehe skill_create-Beschreibung fuer "
|
||||
"Format. Leere Liste = alle Fast-Paths entfernen (alles geht "
|
||||
"wieder durch Claude). Wenn nicht angegeben: bleibt unberuehrt."
|
||||
),
|
||||
},
|
||||
},
|
||||
"required": ["name"],
|
||||
},
|
||||
@@ -782,6 +811,28 @@ META_TOOLS = [
|
||||
]
|
||||
|
||||
|
||||
# ── Fast-Path (Skill-deklariert) ───────────────────────────────────────
|
||||
#
|
||||
# Skills koennen in ihrem Manifest `fast_patterns` deklarieren — eine Liste
|
||||
# von {match: regex, args: dict, reply: str}. Wenn ein User-Text gegen
|
||||
# ein Pattern matcht, ruft das Brain direkt run_skill(name, args) auf und
|
||||
# returnt `reply` an den User — Claude wird komplett uebersprungen. Spart
|
||||
# 5-10s LLM-Latenz pro "reines Steuern"-Befehl.
|
||||
#
|
||||
# Patterns sollten anchored (^...$) gegen den normalisierten Text (lower-
|
||||
# case, Endsatzzeichen weg, Whitespace gestrafft) geschrieben sein. Lieber
|
||||
# eng matchen als breit — false-positives sind teurer als ein Cache-Miss.
|
||||
#
|
||||
# Diese Logik ist generisch — ARIA deklariert die Patterns selbst beim
|
||||
# skill_create / skill_update, das Brain orchestriert nur.
|
||||
|
||||
def _normalize_for_fast_match(text: str) -> str:
|
||||
norm = (text or "").strip().lower()
|
||||
norm = re.sub(r"[.!?]+$", "", norm)
|
||||
norm = re.sub(r"\s+", " ", norm)
|
||||
return norm
|
||||
|
||||
|
||||
def _skill_to_tool(s: dict) -> dict:
|
||||
"""Mappt einen Skill auf ein OpenAI-Function-Tool."""
|
||||
args = s.get("args") or []
|
||||
@@ -849,6 +900,54 @@ class Agent:
|
||||
self._pending_events = []
|
||||
return events
|
||||
|
||||
def _try_skill_fast_path(self, user_message: str) -> Optional[str]:
|
||||
"""Iteriert ueber alle aktiven Skills und probiert deren fast_patterns
|
||||
gegen den normalisierten User-Text. Erster Treffer gewinnt — Skill
|
||||
wird direkt aufgerufen, Reply geht ohne Claude zurueck.
|
||||
|
||||
Returnt None wenn kein Pattern matcht. Bei Skill-Ausfuehrungs-Fehler
|
||||
(ok=False) wird eine ehrliche Fehler-Reply gegeben statt durch Claude
|
||||
zu fallen — sonst kostet ein gescheiterter Fast-Path doppelt (~1s
|
||||
Skill-Versuch + 5-10s Claude). Bei unerwarteter Exception fallen wir
|
||||
durch zu Claude (Claude kann ggf. besser diagnostizieren)."""
|
||||
norm = _normalize_for_fast_match(user_message)
|
||||
if not norm:
|
||||
return None
|
||||
|
||||
active_skills = [s for s in skills_mod.list_skills(active_only=False)
|
||||
if s.get("active", True)]
|
||||
for skill in active_skills:
|
||||
patterns = skill.get("fast_patterns") or []
|
||||
if not patterns:
|
||||
continue
|
||||
skill_name = skill.get("name") or ""
|
||||
for pat in patterns:
|
||||
rx = pat.get("match") or ""
|
||||
if not rx:
|
||||
continue
|
||||
try:
|
||||
if not re.match(rx, norm, re.IGNORECASE):
|
||||
continue
|
||||
except re.error:
|
||||
# Sollte durch _normalize_fast_patterns rausgefiltert sein.
|
||||
continue
|
||||
args = pat.get("args") or {}
|
||||
reply = pat.get("reply") or f"{skill_name}: ok"
|
||||
logger.info("[fast-path] match skill=%s pattern=%r msg=%r",
|
||||
skill_name, rx, user_message[:60])
|
||||
try:
|
||||
res = skills_mod.run_skill(skill_name, dict(args), timeout_sec=15)
|
||||
except Exception as exc:
|
||||
logger.warning("[fast-path] %s exception — fall through zu Claude: %s",
|
||||
skill_name, exc)
|
||||
return None
|
||||
if not res.get("ok"):
|
||||
tail = (res.get("stderr") or res.get("stdout") or "").strip().splitlines()
|
||||
hint = (tail[-1] if tail else "")[:120]
|
||||
return f"{skill_name}: {reply} — Fehler: {hint or 'siehe Brain-Log'}"
|
||||
return reply
|
||||
return None
|
||||
|
||||
# ── Hauptpfad: ein User-Turn → Tool-Loop → finaler Reply ──
|
||||
|
||||
MAX_TOOL_ITERATIONS = 8 # Schutz vor Endlos-Loops
|
||||
@@ -861,6 +960,15 @@ class Agent:
|
||||
# Events vom letzten Turn weglassen
|
||||
self._pending_events = []
|
||||
|
||||
# Fast-Path: einfache "reines Steuern"-Commands ueberspringen Claude komplett.
|
||||
# Jeder Skill kann in seinem Manifest fast_patterns deklarieren — das Brain
|
||||
# iteriert hier ueber alle aktiven Skills und matched. Spart 5-10s Latenz.
|
||||
fast_reply = self._try_skill_fast_path(user_message)
|
||||
if fast_reply is not None:
|
||||
self.conversation.add("user", user_message, source=source)
|
||||
self.conversation.add("assistant", fast_reply)
|
||||
return fast_reply
|
||||
|
||||
# 1. User-Turn an die Konversation
|
||||
self.conversation.add("user", user_message, source=source)
|
||||
|
||||
@@ -1001,6 +1109,7 @@ class Agent:
|
||||
args=arguments.get("args", []),
|
||||
pip_packages=arguments.get("pip_packages", []),
|
||||
config_schema=arguments.get("config_schema") or None,
|
||||
fast_patterns=arguments.get("fast_patterns") or None,
|
||||
author="aria",
|
||||
)
|
||||
# Side-Channel-Event: Stefan soll sehen wenn ARIA was anlegt
|
||||
@@ -1064,6 +1173,8 @@ class Agent:
|
||||
patch["pip_packages"] = arguments["pip_packages"]
|
||||
if "config_schema" in arguments and isinstance(arguments["config_schema"], list):
|
||||
patch["config_schema"] = arguments["config_schema"]
|
||||
if "fast_patterns" in arguments and isinstance(arguments["fast_patterns"], list):
|
||||
patch["fast_patterns"] = arguments["fast_patterns"]
|
||||
if not patch:
|
||||
return "FEHLER: keine Felder zum Update angegeben."
|
||||
try:
|
||||
|
||||
@@ -45,6 +45,54 @@ logger = logging.getLogger("aria-brain")
|
||||
QDRANT_HOST = os.environ.get("QDRANT_HOST", "aria-qdrant")
|
||||
QDRANT_PORT = int(os.environ.get("QDRANT_PORT", "6333"))
|
||||
|
||||
def _seed_spotify_fast_patterns() -> None:
|
||||
"""One-shot Migration: schreibt Standard-Steuer-Patterns ins Spotify-Skill
|
||||
wenn das Skill existiert + aktiv ist + noch keine fast_patterns hat.
|
||||
|
||||
Nach diesem Run kann ARIA die Patterns frei via skill_update aendern."""
|
||||
manifest = skills_mod.read_manifest("spotify")
|
||||
if not manifest:
|
||||
logger.info("[migrate] spotify skill nicht vorhanden — nichts zu tun")
|
||||
return
|
||||
if manifest.get("fast_patterns"):
|
||||
logger.info("[migrate] spotify hat schon fast_patterns (%d) — skip",
|
||||
len(manifest["fast_patterns"]))
|
||||
return
|
||||
default_patterns = [
|
||||
# NEXT
|
||||
{"match": r"^(naechster|nächster|naechste|nächste) (track|song|titel|lied)$",
|
||||
"args": {"path": "/v1/me/player/next", "method": "POST"},
|
||||
"reply": "Spotify: nächster Track ⏭"},
|
||||
{"match": r"^(weiter|skip|ueberspringen|überspringen|ueberspring|überspring)$",
|
||||
"args": {"path": "/v1/me/player/next", "method": "POST"},
|
||||
"reply": "Spotify: nächster Track ⏭"},
|
||||
# PREVIOUS
|
||||
{"match": r"^(vorheriger|vorheriges|letzter|letztes) (track|song|titel|lied)$",
|
||||
"args": {"path": "/v1/me/player/previous", "method": "POST"},
|
||||
"reply": "Spotify: vorheriger Track ⏮"},
|
||||
{"match": r"^(zurueck|zurück)$",
|
||||
"args": {"path": "/v1/me/player/previous", "method": "POST"},
|
||||
"reply": "Spotify: vorheriger Track ⏮"},
|
||||
# PAUSE
|
||||
{"match": r"^(pause|pausiere|pausieren|stop|stopp|halt)$",
|
||||
"args": {"path": "/v1/me/player/pause", "method": "PUT"},
|
||||
"reply": "Spotify: pausiert ⏸"},
|
||||
{"match": r"^(musik|spotify) (pause|aus|stop|stopp)$",
|
||||
"args": {"path": "/v1/me/player/pause", "method": "PUT"},
|
||||
"reply": "Spotify: pausiert ⏸"},
|
||||
# PLAY
|
||||
{"match": r"^(play|weiterspielen|weiter spielen|fortsetzen|abspielen)$",
|
||||
"args": {"path": "/v1/me/player/play", "method": "PUT"},
|
||||
"reply": "Spotify: spielt ▶"},
|
||||
{"match": r"^(musik|spotify) (an|wieder an|weiter|fortsetzen)$",
|
||||
"args": {"path": "/v1/me/player/play", "method": "PUT"},
|
||||
"reply": "Spotify: spielt ▶"},
|
||||
]
|
||||
skills_mod.update_skill("spotify", {"fast_patterns": default_patterns})
|
||||
logger.info("[migrate] spotify fast_patterns gesetzt (%d Eintraege)",
|
||||
len(default_patterns))
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""Beim Brain-Start: System-Seed-Regeln idempotent in DB schreiben,
|
||||
@@ -54,6 +102,15 @@ async def lifespan(app: FastAPI):
|
||||
logger.info("Lifespan: seed_rules angewendet (%s)", result)
|
||||
except Exception as exc:
|
||||
logger.exception("Lifespan: seed_rules fehlgeschlagen — Brain startet trotzdem (%s)", exc)
|
||||
|
||||
# Einmalige Migration: Spotify-Skill ohne fast_patterns kriegt die Standard-
|
||||
# Patterns injiziert. Idempotent — wenn schon welche da sind, nichts tun.
|
||||
# ARIA kann sie spaeter via skill_update beliebig erweitern/ersetzen.
|
||||
try:
|
||||
_seed_spotify_fast_patterns()
|
||||
except Exception as exc:
|
||||
logger.warning("Lifespan: spotify fast_patterns Migration: %s", exc)
|
||||
|
||||
task = asyncio.create_task(background_mod.run_loop(agent))
|
||||
logger.info("Lifespan: Trigger-Loop gestartet")
|
||||
try:
|
||||
|
||||
@@ -131,6 +131,54 @@ SEED_RULES: List[dict] = [
|
||||
"Skill-Friedhof und Stefan muss aufraeumen."
|
||||
),
|
||||
},
|
||||
{
|
||||
"migration_key": "seed/skill-rule/fast-patterns-for-control",
|
||||
"type": "rule",
|
||||
"title": "Skill-Regel: fast_patterns fuer reines Steuern (spart 5-10s Latenz)",
|
||||
"category": "skills",
|
||||
"content": (
|
||||
"Wenn Du einen Skill baust oder aktualisierst, der **reine Steuer-"
|
||||
"Befehle** behandelt (Licht an/aus, Spotify pause/next, Rollade "
|
||||
"hoch/runter, Heizung +1°), trag ins Manifest `fast_patterns` ein. "
|
||||
"Format pro Eintrag: `{match: \"^regex$\", args: {...}, reply: \"Text\"}`.\n"
|
||||
"\n"
|
||||
"Wirkung: das Brain matched eingehende User-Texte BEVOR Claude gerufen "
|
||||
"wird. Match → run_skill(name, args) direkt → reply zurueck → Claude "
|
||||
"uebersprungen. Stefan spart 5-10 Sekunden pro Befehl. Praktisch "
|
||||
"Pflicht im Auto, wo Latenz nervt.\n"
|
||||
"\n"
|
||||
"REGELN beim Patterns schreiben:\n"
|
||||
" - Mit `^` und `$` anchorn — sonst matched `pause` mitten in `pause "
|
||||
"die musik dann erzaehl mir nen witz` und zerschiesst den Befehl.\n"
|
||||
" - Case-insensitive (Brain matched mit re.IGNORECASE), Endsatzzeichen "
|
||||
"werden vorher entfernt — schreibe Lowercase ohne Punkt.\n"
|
||||
" - Mehrere Varianten = mehrere Eintraege (`^pause$`, `^pausiere$`, "
|
||||
"`^stop$`). Sprachlich wechselt Stefan zwischen synonymen Kurzformen.\n"
|
||||
" - reply = kurze Bestaetigung in genau einem Satz, gerne mit Emoji.\n"
|
||||
"\n"
|
||||
"NIE fast_patterns fuer:\n"
|
||||
" - Skills mit kreativem Output (zusammenfassen, generieren, raten).\n"
|
||||
" - Skills mit Parametern die aus Freitext extrahiert werden muessten "
|
||||
" ('spiele jazz' geht nicht — was ist 'jazz'? Lass Claude entscheiden).\n"
|
||||
" - Skills mit Multi-Step-Logik (z.B. Volumen +10 = erst Status holen, "
|
||||
" rechnen, setzen). Wenn unbedingt: in den Skill-Code packen und "
|
||||
" dem Skill einen `action`-Arg geben.\n"
|
||||
"\n"
|
||||
"Beispiel komplett:\n"
|
||||
"```\n"
|
||||
"fast_patterns = [\n"
|
||||
" {\"match\": \"^pause$\",\n"
|
||||
" \"args\": {\"path\": \"/v1/me/player/pause\", \"method\": \"PUT\"},\n"
|
||||
" \"reply\": \"Spotify: pausiert ⏸\"}\n"
|
||||
"]\n"
|
||||
"```\n"
|
||||
"\n"
|
||||
"Stefan-Hinweis 06/2026: das war frueher hardcoded in agent.py fuer "
|
||||
"Spotify und musste fuer jeden neuen Steuer-Skill nachgepflegt werden. "
|
||||
"Jetzt steckt's pro Skill im Manifest — dein Job, ARIA, das gleich "
|
||||
"mitzudenken wenn der Use-Case passt."
|
||||
),
|
||||
},
|
||||
{
|
||||
"migration_key": "seed/skill-rule/no-hardcoded-credentials",
|
||||
"type": "rule",
|
||||
|
||||
@@ -164,6 +164,7 @@ def create_skill(
|
||||
pip_packages: Optional[list[str]] = None,
|
||||
author: str = "aria",
|
||||
config_schema: Optional[list] = None,
|
||||
fast_patterns: Optional[list] = None,
|
||||
) -> dict:
|
||||
"""Legt einen neuen Skill an. Wirft ValueError bei ungueltigen Inputs.
|
||||
|
||||
@@ -213,6 +214,7 @@ def create_skill(
|
||||
"version": "1.0",
|
||||
"author": author,
|
||||
"config_schema": _normalize_config_schema(config_schema),
|
||||
"fast_patterns": _normalize_fast_patterns(fast_patterns),
|
||||
"version_history": [],
|
||||
}
|
||||
write_manifest(name, manifest)
|
||||
@@ -261,6 +263,38 @@ def _normalize_config_schema(schema: Optional[list]) -> list:
|
||||
return out
|
||||
|
||||
|
||||
def _normalize_fast_patterns(patterns: Optional[list]) -> list:
|
||||
"""Filter + Normalisiert fast_patterns. Erwartet Liste von Dicts mit:
|
||||
- match (str) : Regex, wird gegen normalisierten User-Text (lowercase,
|
||||
Endsatzzeichen weg, Whitespace gestrafft) gematched.
|
||||
Sollte mit ^...$ anchored sein damit keine Teilmatches
|
||||
reinrutschen. re.IGNORECASE wird automatisch gesetzt.
|
||||
- args (dict?): Args fuer run_skill — leerer Dict wenn weggelassen.
|
||||
- reply (str) : Fixe Antwort die ohne Claude an den User geht.
|
||||
|
||||
Patterns mit kaputter Regex werden ausgefiltert + geloggt — sonst wuerde
|
||||
der ganze Fast-Path-Pass jedes Mal crashen wenn ARIA mal ein Pattern
|
||||
falsch baut."""
|
||||
if not patterns:
|
||||
return []
|
||||
out = []
|
||||
for p in patterns:
|
||||
if not isinstance(p, dict):
|
||||
continue
|
||||
match = (p.get("match") or "").strip()
|
||||
reply = (p.get("reply") or "").strip()
|
||||
if not match or not reply:
|
||||
continue
|
||||
try:
|
||||
re.compile(match)
|
||||
except re.error as exc:
|
||||
logger.warning("fast_patterns: Regex %r kaputt — geskippt: %s", match, exc)
|
||||
continue
|
||||
args = p.get("args") if isinstance(p.get("args"), dict) else {}
|
||||
out.append({"match": match, "args": args, "reply": reply[:300]})
|
||||
return out
|
||||
|
||||
|
||||
def _setup_venv(skill_dir: Path, pip_packages: list[str]) -> None:
|
||||
venv = skill_dir / "venv"
|
||||
logger.info("venv erstellen: %s", venv)
|
||||
@@ -307,6 +341,8 @@ def update_skill(name: str, patch: dict) -> dict:
|
||||
manifest[k] = v
|
||||
if "config_schema" in patch:
|
||||
manifest["config_schema"] = _normalize_config_schema(patch["config_schema"])
|
||||
if "fast_patterns" in patch:
|
||||
manifest["fast_patterns"] = _normalize_fast_patterns(patch["fast_patterns"])
|
||||
|
||||
# Code austauschen
|
||||
if "entry_code" in patch and patch["entry_code"]:
|
||||
|
||||
@@ -1606,11 +1606,12 @@ class ARIABridge:
|
||||
try:
|
||||
url = f"{current_url}?token={self.rvs_token}"
|
||||
logger.info("[rvs] Verbinde: %s", current_url)
|
||||
# max_size=100MB synchron zum RVS-Server (siehe rvs/server.js).
|
||||
# max_size=1500MB synchron zum RVS-Server (siehe rvs/server.js).
|
||||
# File-Re-Download fuer Anhaenge braucht Platz fuer base64-
|
||||
# inflate (~1.33×). Groessere Files lehnt der file_request-
|
||||
# Handler proaktiv ab bevor's zur 1009-Disconnection kommt.
|
||||
async with websockets.connect(url, max_size=100 * 1024 * 1024) as ws:
|
||||
# inflate (~1.33×) — 1 GB binaer ≈ 1.34 GB base64, plus Margin.
|
||||
# Groessere Files lehnt der file_request-Handler proaktiv ab
|
||||
# bevor's zur 1009-Disconnection kommt.
|
||||
async with websockets.connect(url, max_size=1500 * 1024 * 1024) as ws:
|
||||
self.ws_rvs = ws
|
||||
retry_delay = 2
|
||||
logger.info("[rvs] Verbunden — warte auf App-Nachrichten")
|
||||
@@ -2594,7 +2595,7 @@ class ARIABridge:
|
||||
# Code 1009 (message too big) — RVS-Server droppt, Bridge crasht
|
||||
# im cleanup (websockets-Lib-Bug). Limit deckt typische Videos
|
||||
# und Bilder ab; alles drueber soll der User per SSH abholen.
|
||||
FILE_MAX_BYTES = 70 * 1024 * 1024
|
||||
FILE_MAX_BYTES = 1024 * 1024 * 1024 # 1 GB binaer
|
||||
try:
|
||||
file_size = os.path.getsize(server_path)
|
||||
except OSError as exc:
|
||||
|
||||
@@ -4038,6 +4038,7 @@
|
||||
<div style="color:#E0E0F0;font-size:12px;white-space:nowrap;overflow:hidden;text-overflow:ellipsis;">${badge}<strong>${escapeHtml(f.name)}</strong></div>
|
||||
<div style="color:#555570;font-size:10px;">${fmtSize(f.size)} · ${fmtDate(f.mtime)}</div>
|
||||
</div>
|
||||
<button class="btn secondary" onclick="openFileInline('${encodeURIComponent(f.path)}')" style="padding:2px 8px;font-size:10px;" title="Öffnen">👁</button>
|
||||
<button class="btn secondary" onclick="downloadFile('${encodeURIComponent(f.path)}')" style="padding:2px 8px;font-size:10px;" title="Herunterladen">⬇</button>
|
||||
<button class="btn secondary" onclick="showVersions('${escapeHtml(f.name)}')" style="padding:2px 8px;font-size:10px;" title="Versionen">🕒</button>
|
||||
<button class="btn secondary" onclick="deleteFile('${pathEsc}','${escapeHtml(f.name)}')" style="padding:2px 8px;font-size:10px;color:#FF6B6B;border-color:#FF6B6B;" title="Loeschen">🗑</button>
|
||||
@@ -4174,6 +4175,12 @@
|
||||
window.location.href = '/api/files-download?path=' + encPath;
|
||||
}
|
||||
|
||||
function openFileInline(encPath) {
|
||||
// Inline-View — Browser zeigt PDF / Bild / Text im neuen Tab,
|
||||
// bei unbekanntem MIME landet's als Download-Fallback.
|
||||
window.open('/api/files-view?path=' + encPath, '_blank', 'noopener');
|
||||
}
|
||||
|
||||
async function deleteFile(p, name) {
|
||||
if (!confirm(`Datei "${name}" wirklich löschen?\n\nIn allen Chat-Bubbles wird sie als gelöscht markiert.`)) return;
|
||||
try {
|
||||
|
||||
Binary file not shown.
@@ -26,6 +26,9 @@ services:
|
||||
- ./updates:/updates # APK-Dateien fuer Auto-Update
|
||||
environment:
|
||||
- MAX_SESSIONS=10
|
||||
# 4 GB V8-Heap — sonst OOM beim Empfang von 1 GB-Files
|
||||
# (base64 inflated ~1.34 GB plus WS-Frame-Margin).
|
||||
- NODE_OPTIONS=--max-old-space-size=4096
|
||||
networks:
|
||||
- aria-rvs-net
|
||||
|
||||
|
||||
+10
-5
@@ -93,15 +93,20 @@ function cleanupRooms() {
|
||||
// als WS-Message `oauth_callback` und antwortet dem Browser mit einer
|
||||
// schoenen "Tab schliessen"-Seite.
|
||||
//
|
||||
// maxPayload 100MB: TTS-Streaming + Voice-Upload (WAV als base64) +
|
||||
// maxPayload 1500MB: TTS-Streaming + Voice-Upload (WAV als base64) +
|
||||
// audio_pcm Chunks koennen die ws-Library Default 1MB ueberschreiten.
|
||||
// Plus: file_request/file_response fuer Re-Download von Anhaengen.
|
||||
// 40 MB MP4 → ~53 MB base64 → vorher mit 50 MB Limit zerschossen
|
||||
// (Code 1009 message too big, Bridge crashed im cleanup). 100 MB
|
||||
// deckt bis ~70 MB binaer ab; groessere Files werden Bridge-seitig
|
||||
// abgewiesen (siehe file_request-Handler) bevor die WS abreisst.
|
||||
// (Code 1009 message too big, Bridge crashed im cleanup). 1500 MB
|
||||
// deckt bis ~1 GB binaer ab (mit base64 ~33% Overhead + WS-Frame-
|
||||
// Margin); groessere Files werden Bridge-seitig abgewiesen (siehe
|
||||
// file_request-Handler) bevor die WS abreisst.
|
||||
//
|
||||
// WICHTIG: Node-Default-Heap ist ~1.5 GB. Fuer 1 GB-Files muss der
|
||||
// Container mit --max-old-space-size=4096 (oder NODE_OPTIONS env var)
|
||||
// gestartet werden, sonst OOM-Crash beim Empfang.
|
||||
const httpServer = http.createServer(handleHttpRequest);
|
||||
const wss = new WebSocketServer({ noServer: true, maxPayload: 100 * 1024 * 1024 });
|
||||
const wss = new WebSocketServer({ noServer: true, maxPayload: 1500 * 1024 * 1024 });
|
||||
|
||||
// HTTP-Upgrade-Pfad → an WebSocket-Server reichen
|
||||
httpServer.on("upgrade", (req, socket, head) => {
|
||||
|
||||
+76
-1
@@ -109,7 +109,27 @@ class WhisperRunner:
|
||||
segments, info = self.model.transcribe(
|
||||
audio, language=language, beam_size=beam_size, vad_filter=vad_filter,
|
||||
)
|
||||
text = " ".join(seg.text.strip() for seg in segments)
|
||||
# Per-segment no_speech_prob auswerten: faster-whisper liefert das
|
||||
# mit. Bei Stille/Rauschen halluziniert Whisper bekannte YouTube-
|
||||
# Untertitel-Patterns ("Untertitelung des ZDF", "Vielen Dank fuer's
|
||||
# Zuschauen", ...). Segmente mit hohem no_speech_prob filtern wir
|
||||
# raus. Plus: bekannte Hallucination-Patterns explizit blacklisten.
|
||||
kept = []
|
||||
for seg in segments:
|
||||
# no_speech_prob: 1.0 = sicher Stille; 0.0 = sicher Sprache.
|
||||
# Threshold 0.6 ist nicht zu strikt (echte leise Sprache geht
|
||||
# noch durch) und nicht zu locker (Halluzinationen werden
|
||||
# zuverlaessig erwischt).
|
||||
nsp = getattr(seg, "no_speech_prob", 0.0)
|
||||
if nsp is not None and nsp >= 0.6:
|
||||
continue
|
||||
stext = (seg.text or "").strip()
|
||||
if not stext:
|
||||
continue
|
||||
if _is_known_hallucination(stext):
|
||||
continue
|
||||
kept.append(stext)
|
||||
text = " ".join(kept)
|
||||
return text, info.duration
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
@@ -117,6 +137,61 @@ class WhisperRunner:
|
||||
return await loop.run_in_executor(None, _run)
|
||||
|
||||
|
||||
# Bekannte Whisper-Halluzinations-Patterns. Tritt typisch bei Stille oder
|
||||
# Rauschen auf — Whispers Trainings-Corpus enthaelt Stunden von YouTube-
|
||||
# Videos mit diesen Untertitel-Outros. Substring-Match (case-insensitive)
|
||||
# ueber gestrippten Text. Wenn ein Segment EXAKT (nach Normalisierung) so
|
||||
# aussieht, ist's mit ~99% Sicherheit eine Halluzination.
|
||||
_HALLUCINATION_PHRASES = (
|
||||
"untertitelung des zdf",
|
||||
"untertitel im auftrag des zdf",
|
||||
"untertitelung im auftrag des zdf",
|
||||
"untertitel der amara.org community",
|
||||
"untertitel von stephanie geiges",
|
||||
"amara.org",
|
||||
"untertitel: kerstin grass",
|
||||
"vielen dank fuers zuschauen",
|
||||
"vielen dank fürs zuschauen",
|
||||
"vielen dank für's zuschauen",
|
||||
"vielen dank fuer's zuschauen",
|
||||
"vielen dank für das zuschauen",
|
||||
"vielen dank fuer das zuschauen",
|
||||
"danke für's zuschauen",
|
||||
"danke fürs zuschauen",
|
||||
"danke fuers zuschauen",
|
||||
"subs by",
|
||||
"subtitle by",
|
||||
"subtitles by",
|
||||
"thanks for watching",
|
||||
)
|
||||
|
||||
|
||||
def _normalize_for_hallu(text: str) -> str:
|
||||
"""Lowercase + trailing-Satzzeichen/Whitespace strippen. Jahreszahlen
|
||||
(4 Ziffern am Ende) auch entfernen — 'Untertitelung des ZDF, 2020'
|
||||
matcht damit auf 'untertitelung des zdf'."""
|
||||
t = text.lower().strip()
|
||||
# Entferne trailing punctuation incl. comma+digits
|
||||
while t and t[-1] in ".,!? \t\n":
|
||||
t = t[:-1]
|
||||
# 4-stellige Jahreszahl am Ende
|
||||
import re
|
||||
t = re.sub(r"[,\s]+\d{4}$", "", t).strip()
|
||||
while t and t[-1] in ".,!? \t\n":
|
||||
t = t[:-1]
|
||||
return t
|
||||
|
||||
|
||||
def _is_known_hallucination(text: str) -> bool:
|
||||
norm = _normalize_for_hallu(text)
|
||||
if not norm:
|
||||
return True
|
||||
for pat in _HALLUCINATION_PHRASES:
|
||||
if pat in norm:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def ffmpeg_to_float32(audio_b64: str, mime_type: str) -> np.ndarray:
|
||||
"""Dekodiert beliebiges Audio-Format → 16kHz mono float32 PCM."""
|
||||
if "mp4" in mime_type or "m4a" in mime_type or "aac" in mime_type:
|
||||
|
||||
Reference in New Issue
Block a user