Compare commits
6 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| e04bbef361 | |||
| e82e07e3a2 | |||
| 886b4409d2 | |||
| bcea49365d | |||
| 05eb7ed144 | |||
| ddfc4261e5 |
@@ -79,8 +79,8 @@ android {
|
|||||||
applicationId "com.ariacockpit"
|
applicationId "com.ariacockpit"
|
||||||
minSdkVersion rootProject.ext.minSdkVersion
|
minSdkVersion rootProject.ext.minSdkVersion
|
||||||
targetSdkVersion rootProject.ext.targetSdkVersion
|
targetSdkVersion rootProject.ext.targetSdkVersion
|
||||||
versionCode 10900
|
versionCode 10901
|
||||||
versionName "0.1.9.0"
|
versionName "0.1.9.1"
|
||||||
// Fallback fuer Libraries mit Product Flavors
|
// Fallback fuer Libraries mit Product Flavors
|
||||||
missingDimensionStrategy 'react-native-camera', 'general'
|
missingDimensionStrategy 'react-native-camera', 'general'
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -49,6 +49,12 @@ class OpenWakeWordModule(reactContext: ReactApplicationContext) : ReactContextBa
|
|||||||
private const val EMBEDDING_DIM = 96
|
private const val EMBEDDING_DIM = 96
|
||||||
private const val MEL_BINS = 32
|
private const val MEL_BINS = 32
|
||||||
private const val DEFAULT_WW_INPUT_FRAMES = 16 // Fallback wenn Modell-Metadata fehlt
|
private const val DEFAULT_WW_INPUT_FRAMES = 16 // Fallback wenn Modell-Metadata fehlt
|
||||||
|
// Nach record.startRecording() erzeugt das Mikro fuer ~1s einen Spin-up-Spike
|
||||||
|
// (DC-Offset, AGC-Settling) der vom Wake-Word-Klassifikator faelschlich als
|
||||||
|
// Trigger eingestuft werden kann. Folge: App pausiert beim Oeffnen die Musik,
|
||||||
|
// weil der False-Positive die AudioFocus-Switch-Logik anwirft (Stefan-Bug 06/2026).
|
||||||
|
// Loesung: in dieser Phase keine Detections an JS weiterleiten.
|
||||||
|
private const val STARTUP_SUPPRESSION_MS = 1500L
|
||||||
}
|
}
|
||||||
|
|
||||||
private val env: OrtEnvironment = OrtEnvironment.getEnvironment()
|
private val env: OrtEnvironment = OrtEnvironment.getEnvironment()
|
||||||
@@ -95,6 +101,8 @@ class OpenWakeWordModule(reactContext: ReactApplicationContext) : ReactContextBa
|
|||||||
private val embBuffer: ArrayDeque<FloatArray> = ArrayDeque(32) // Ringpuffer letzter Embeddings
|
private val embBuffer: ArrayDeque<FloatArray> = ArrayDeque(32) // Ringpuffer letzter Embeddings
|
||||||
private var consecutiveAboveThreshold: Int = 0
|
private var consecutiveAboveThreshold: Int = 0
|
||||||
private var lastDetectionMs: Long = 0L
|
private var lastDetectionMs: Long = 0L
|
||||||
|
// Zeitpunkt des letzten startRecording — fuer STARTUP_SUPPRESSION_MS-Fenster
|
||||||
|
private var recordingStartedMs: Long = 0L
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initialisiert die ONNX-Sessions fuer ein bestimmtes Wake-Word.
|
* Initialisiert die ONNX-Sessions fuer ein bestimmtes Wake-Word.
|
||||||
@@ -206,6 +214,7 @@ class OpenWakeWordModule(reactContext: ReactApplicationContext) : ReactContextBa
|
|||||||
resetInferenceState()
|
resetInferenceState()
|
||||||
running.set(true)
|
running.set(true)
|
||||||
record.startRecording()
|
record.startRecording()
|
||||||
|
recordingStartedMs = System.currentTimeMillis()
|
||||||
|
|
||||||
// PARTIAL_WAKE_LOCK greifen damit die CPU nicht in Doze geht und
|
// PARTIAL_WAKE_LOCK greifen damit die CPU nicht in Doze geht und
|
||||||
// die JS-Bridge die emit("WakeWordDetected")-Events live verarbeitet.
|
// die JS-Bridge die emit("WakeWordDetected")-Events live verarbeitet.
|
||||||
@@ -313,6 +322,11 @@ class OpenWakeWordModule(reactContext: ReactApplicationContext) : ReactContextBa
|
|||||||
}
|
}
|
||||||
|
|
||||||
private fun emitDetected() {
|
private fun emitDetected() {
|
||||||
|
val sinceStart = System.currentTimeMillis() - recordingStartedMs
|
||||||
|
if (sinceStart in 0 until STARTUP_SUPPRESSION_MS) {
|
||||||
|
Log.i(TAG, "Wake-Word emit unterdrueckt (sinceStart=${sinceStart}ms < ${STARTUP_SUPPRESSION_MS}ms — Mikro-Spin-up-Spike)")
|
||||||
|
return
|
||||||
|
}
|
||||||
val params = com.facebook.react.bridge.Arguments.createMap().apply {
|
val params = com.facebook.react.bridge.Arguments.createMap().apply {
|
||||||
putString("model", modelName)
|
putString("model", modelName)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "aria-cockpit",
|
"name": "aria-cockpit",
|
||||||
"version": "0.1.9.0",
|
"version": "0.1.9.1",
|
||||||
"private": true,
|
"private": true,
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"android": "react-native run-android",
|
"android": "react-native run-android",
|
||||||
|
|||||||
@@ -23,6 +23,7 @@ import {
|
|||||||
} from 'react-native';
|
} from 'react-native';
|
||||||
|
|
||||||
import brainApi, { Trigger } from '../services/brainApi';
|
import brainApi, { Trigger } from '../services/brainApi';
|
||||||
|
import rvs from '../services/rvs';
|
||||||
|
|
||||||
const COL_ACTIVE = '#34C759';
|
const COL_ACTIVE = '#34C759';
|
||||||
const COL_INACTIVE = '#555570';
|
const COL_INACTIVE = '#555570';
|
||||||
@@ -65,6 +66,17 @@ export const TriggerBrowser: React.FC = () => {
|
|||||||
|
|
||||||
useEffect(() => { load(); }, [load]);
|
useEffect(() => { load(); }, [load]);
|
||||||
|
|
||||||
|
// Auto-Reload bei RVS-Reconnect — sonst zeigt die Liste den Fast-Fail-
|
||||||
|
// Fehler aus brainApi ewig an obwohl die Verbindung schon wieder da ist.
|
||||||
|
useEffect(() => {
|
||||||
|
const unsub = rvs.onStateChange((state) => {
|
||||||
|
if (state === 'connected') {
|
||||||
|
load();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return () => unsub();
|
||||||
|
}, [load]);
|
||||||
|
|
||||||
const visible = items.filter(t => {
|
const visible = items.filter(t => {
|
||||||
if (filter === 'active') return t.active;
|
if (filter === 'active') return t.active;
|
||||||
if (filter === 'inactive') return !t.active;
|
if (filter === 'inactive') return !t.active;
|
||||||
|
|||||||
@@ -522,8 +522,9 @@ const ChatScreen: React.FC = () => {
|
|||||||
const sub = AppState.addEventListener('change', (next) => {
|
const sub = AppState.addEventListener('change', (next) => {
|
||||||
if (next === 'background' || next === 'inactive') {
|
if (next === 'background' || next === 'inactive') {
|
||||||
lastBackgroundAt = Date.now();
|
lastBackgroundAt = Date.now();
|
||||||
|
wakeWordService.setBackground();
|
||||||
} else if (lastState !== 'active' && next === 'active') {
|
} else if (lastState !== 'active' && next === 'active') {
|
||||||
wakeWordService.setResumeCooldown(3000);
|
wakeWordService.setForeground();
|
||||||
const bgDur = lastBackgroundAt > 0 ? Date.now() - lastBackgroundAt : 0;
|
const bgDur = lastBackgroundAt > 0 ? Date.now() - lastBackgroundAt : 0;
|
||||||
// Bei laengerer Hintergrund-Zeit (>30s): pruefen ob ein frisches
|
// Bei laengerer Hintergrund-Zeit (>30s): pruefen ob ein frisches
|
||||||
// Wake-Word getriggert wurde wahrend die App weg war — wenn ja,
|
// Wake-Word getriggert wurde wahrend die App weg war — wenn ja,
|
||||||
|
|||||||
@@ -21,9 +21,37 @@ import {
|
|||||||
PermissionsAndroid,
|
PermissionsAndroid,
|
||||||
useWindowDimensions,
|
useWindowDimensions,
|
||||||
DeviceEventEmitter,
|
DeviceEventEmitter,
|
||||||
|
NativeModules,
|
||||||
} from 'react-native';
|
} from 'react-native';
|
||||||
import AsyncStorage from '@react-native-async-storage/async-storage';
|
import AsyncStorage from '@react-native-async-storage/async-storage';
|
||||||
import RNFS from 'react-native-fs';
|
import RNFS from 'react-native-fs';
|
||||||
|
|
||||||
|
const { FileOpener } = NativeModules as {
|
||||||
|
FileOpener?: { open: (filePath: string, mimeType: string) => Promise<boolean> };
|
||||||
|
};
|
||||||
|
|
||||||
|
// MIME-Type aus Dateinamen schaetzen — fuer den FileOpener-Intent. Android
|
||||||
|
// nutzt den MIME-Type um die passende App zu finden. Unknown → octet-stream.
|
||||||
|
function guessMimeFromName(name: string): string {
|
||||||
|
const lower = name.toLowerCase();
|
||||||
|
if (lower.endsWith('.pdf')) return 'application/pdf';
|
||||||
|
if (lower.endsWith('.jpg') || lower.endsWith('.jpeg')) return 'image/jpeg';
|
||||||
|
if (lower.endsWith('.png')) return 'image/png';
|
||||||
|
if (lower.endsWith('.gif')) return 'image/gif';
|
||||||
|
if (lower.endsWith('.webp')) return 'image/webp';
|
||||||
|
if (lower.endsWith('.mp3')) return 'audio/mpeg';
|
||||||
|
if (lower.endsWith('.wav')) return 'audio/wav';
|
||||||
|
if (lower.endsWith('.ogg') || lower.endsWith('.opus')) return 'audio/ogg';
|
||||||
|
if (lower.endsWith('.mp4') || lower.endsWith('.m4a')) return 'audio/mp4';
|
||||||
|
if (lower.endsWith('.webm')) return 'video/webm';
|
||||||
|
if (lower.endsWith('.txt')) return 'text/plain';
|
||||||
|
if (lower.endsWith('.md')) return 'text/markdown';
|
||||||
|
if (lower.endsWith('.json')) return 'application/json';
|
||||||
|
if (lower.endsWith('.csv')) return 'text/csv';
|
||||||
|
if (lower.endsWith('.html') || lower.endsWith('.htm')) return 'text/html';
|
||||||
|
if (lower.endsWith('.zip')) return 'application/zip';
|
||||||
|
return 'application/octet-stream';
|
||||||
|
}
|
||||||
import DocumentPicker from 'react-native-document-picker';
|
import DocumentPicker from 'react-native-document-picker';
|
||||||
import rvs, { ConnectionState, RVSMessage, ConnectionConfig, ConnectionLogEntry } from '../services/rvs';
|
import rvs, { ConnectionState, RVSMessage, ConnectionConfig, ConnectionLogEntry } from '../services/rvs';
|
||||||
import {
|
import {
|
||||||
@@ -514,9 +542,11 @@ const SettingsScreen: React.FC = () => {
|
|||||||
if (message.type === ('file_response' as any)) {
|
if (message.type === ('file_response' as any)) {
|
||||||
const p: any = message.payload || {};
|
const p: any = message.payload || {};
|
||||||
const reqId = (p.requestId as string) || '';
|
const reqId = (p.requestId as string) || '';
|
||||||
if (!reqId.startsWith('single-')) return; // nicht unsere Anfrage
|
const isDownload = reqId.startsWith('single-');
|
||||||
|
const isOpen = reqId.startsWith('open-');
|
||||||
|
if (!isDownload && !isOpen) return; // andere Caller (ChatScreen etc.)
|
||||||
if (p.error) {
|
if (p.error) {
|
||||||
ToastAndroid.show('Download fehlgeschlagen: ' + p.error, ToastAndroid.LONG);
|
ToastAndroid.show((isOpen ? 'Öffnen' : 'Download') + ' fehlgeschlagen: ' + p.error, ToastAndroid.LONG);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const b64 = (p.base64 as string) || '';
|
const b64 = (p.base64 as string) || '';
|
||||||
@@ -526,10 +556,28 @@ const SettingsScreen: React.FC = () => {
|
|||||||
'aria-download';
|
'aria-download';
|
||||||
(async () => {
|
(async () => {
|
||||||
try {
|
try {
|
||||||
|
if (isOpen) {
|
||||||
|
// Open-Pfad: nach Caches schreiben + per FileOpener mit System-
|
||||||
|
// Viewer oeffnen. Caches damit der Speicher kein Dauer-Muell wird.
|
||||||
|
const dir = RNFS.CachesDirectoryPath;
|
||||||
|
const target = `${dir}/${fileName}`;
|
||||||
|
await RNFS.writeFile(target, b64, 'base64');
|
||||||
|
const mime = (p.mimeType as string) || guessMimeFromName(fileName);
|
||||||
|
if (FileOpener?.open) {
|
||||||
|
try {
|
||||||
|
await FileOpener.open(target, mime);
|
||||||
|
} catch (e: any) {
|
||||||
|
ToastAndroid.show('Öffnen fehlgeschlagen: ' + (e?.message || e), ToastAndroid.LONG);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
ToastAndroid.show('FileOpener-Modul nicht verfügbar — APK neu bauen', ToastAndroid.LONG);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Download-Pfad: nach Downloads-Ordner schreiben, mit Suffix bei
|
||||||
|
// Namens-Konflikt damit nichts ueberschrieben wird.
|
||||||
const dir = RNFS.DownloadDirectoryPath;
|
const dir = RNFS.DownloadDirectoryPath;
|
||||||
const filePath = `${dir}/${fileName}`;
|
const filePath = `${dir}/${fileName}`;
|
||||||
// Falls Datei schon existiert: Suffix anhaengen damit nichts
|
|
||||||
// ueberschrieben wird.
|
|
||||||
let target = filePath;
|
let target = filePath;
|
||||||
let i = 1;
|
let i = 1;
|
||||||
while (await RNFS.exists(target)) {
|
while (await RNFS.exists(target)) {
|
||||||
@@ -660,6 +708,20 @@ const SettingsScreen: React.FC = () => {
|
|||||||
};
|
};
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
|
// Datei-Manager: Auto-Reload bei RVS-Reconnect — sonst zeigt das offene
|
||||||
|
// Modal den Fehler "Connection refused" ewig an, obwohl die Verbindung
|
||||||
|
// schon wieder da ist. Triggered nur wenn das Modal gerade offen ist.
|
||||||
|
useEffect(() => {
|
||||||
|
const unsub = rvs.onStateChange((state) => {
|
||||||
|
if (state === 'connected' && fileManagerOpen) {
|
||||||
|
setFileManagerError('');
|
||||||
|
setFileManagerLoading(true);
|
||||||
|
rvs.send('file_list_request' as any, {});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return () => unsub();
|
||||||
|
}, [fileManagerOpen]);
|
||||||
|
|
||||||
// --- QR-Code scannen ---
|
// --- QR-Code scannen ---
|
||||||
|
|
||||||
const openQRScanner = useCallback(() => {
|
const openQRScanner = useCallback(() => {
|
||||||
@@ -1040,6 +1102,30 @@ const SettingsScreen: React.FC = () => {
|
|||||||
{fmtSize(f.size)} · {new Date(f.mtime).toLocaleString('de-DE')}
|
{fmtSize(f.size)} · {new Date(f.mtime).toLocaleString('de-DE')}
|
||||||
</Text>
|
</Text>
|
||||||
</View>
|
</View>
|
||||||
|
<TouchableOpacity
|
||||||
|
onPress={() => {
|
||||||
|
rvs.send('file_request' as any, {
|
||||||
|
serverPath: f.path,
|
||||||
|
requestId: 'open-' + Date.now(),
|
||||||
|
});
|
||||||
|
ToastAndroid.show('Öffne ' + f.name + '…', ToastAndroid.SHORT);
|
||||||
|
}}
|
||||||
|
style={{padding:8}}
|
||||||
|
>
|
||||||
|
<Text style={{color:'#0096FF', fontSize:18}}>👁</Text>
|
||||||
|
</TouchableOpacity>
|
||||||
|
<TouchableOpacity
|
||||||
|
onPress={() => {
|
||||||
|
rvs.send('file_request' as any, {
|
||||||
|
serverPath: f.path,
|
||||||
|
requestId: 'single-' + Date.now(),
|
||||||
|
});
|
||||||
|
ToastAndroid.show('Download läuft…', ToastAndroid.SHORT);
|
||||||
|
}}
|
||||||
|
style={{padding:8}}
|
||||||
|
>
|
||||||
|
<Text style={{color:'#34C759', fontSize:18}}>⬇</Text>
|
||||||
|
</TouchableOpacity>
|
||||||
<TouchableOpacity
|
<TouchableOpacity
|
||||||
onPress={() => {
|
onPress={() => {
|
||||||
// path-relativ-zu-uploads = nur der Dateiname,
|
// path-relativ-zu-uploads = nur der Dateiname,
|
||||||
|
|||||||
@@ -77,6 +77,15 @@ interface SendOpts {
|
|||||||
|
|
||||||
function _send(path: string, opts: SendOpts = {}): Promise<AnyJson> {
|
function _send(path: string, opts: SendOpts = {}): Promise<AnyJson> {
|
||||||
_ensureListener();
|
_ensureListener();
|
||||||
|
// Fast-Fail wenn RVS nicht verbunden — sonst tickt der Timeout 30s und
|
||||||
|
// der TriggerBrowser / Dateimanager zeigt ne ewig drehende Spinner.
|
||||||
|
// Stefan-Bug 06/2026: "Connection refused, App haengt 30 Sekunden".
|
||||||
|
const rvsState = rvs.getState();
|
||||||
|
if (rvsState !== 'connected') {
|
||||||
|
return Promise.reject(new Error(
|
||||||
|
`Keine Verbindung zum Brain (RVS: ${rvsState}). Warte auf Reconnect...`,
|
||||||
|
));
|
||||||
|
}
|
||||||
return new Promise((resolve, reject) => {
|
return new Promise((resolve, reject) => {
|
||||||
const requestId = _newRequestId();
|
const requestId = _newRequestId();
|
||||||
const timer = setTimeout(() => {
|
const timer = setTimeout(() => {
|
||||||
|
|||||||
@@ -91,6 +91,18 @@ class WakeWordService {
|
|||||||
* ein false-positive war (Wake-Word im Hintergrund getriggert waehrend
|
* ein false-positive war (Wake-Word im Hintergrund getriggert waehrend
|
||||||
* Stefan gar nicht in der App war). */
|
* Stefan gar nicht in der App war). */
|
||||||
private lastTriggerAt: number = 0;
|
private lastTriggerAt: number = 0;
|
||||||
|
/** App liegt im Hintergrund — alle Detections sperren. Wird vom
|
||||||
|
* AppState-Listener im ChatScreen via setBackground/setForeground gesetzt.
|
||||||
|
* Hintergrund-Detections sind quasi immer false-positives (TV, Husten,
|
||||||
|
* AudioFocus-Switch beim Wechsel zu Musik etc.). */
|
||||||
|
private inBackground: boolean = false;
|
||||||
|
/** Re-Entry-Guard fuer onWakeDetected: native kann mehrere
|
||||||
|
* WakeWordDetected-Events emitten BEVOR OpenWakeWord.stop() in JS
|
||||||
|
* resolved (Bridge-Queue + Doze-Backlog). Mit dem Flag wird das zweite
|
||||||
|
* Event sofort verworfen. Reset beim Verlassen von 'conversing'.
|
||||||
|
* Ausnahme: bargeListening → Barge-In ist ein legitimer neuer Trigger
|
||||||
|
* waehrend ARIA noch redet, NICHT vom Guard blockieren. */
|
||||||
|
private detectionInProgress: boolean = false;
|
||||||
|
|
||||||
private keyword: WakeKeyword = DEFAULT_KEYWORD;
|
private keyword: WakeKeyword = DEFAULT_KEYWORD;
|
||||||
private nativeReady: boolean = false;
|
private nativeReady: boolean = false;
|
||||||
@@ -228,14 +240,44 @@ class WakeWordService {
|
|||||||
console.log('[WakeWord] Cooldown aktiv fuer %dms', ms);
|
console.log('[WakeWord] Cooldown aktiv fuer %dms', ms);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** App in den Hintergrund: alle Wake-Word-Detections sperren.
|
||||||
|
* Im Hintergrund will Stefan praktisch nie einen neuen Dialog starten —
|
||||||
|
* was als „Wake-Word" reinkommt ist Husten/TV/AudioFocus-Switch. */
|
||||||
|
setBackground(): void {
|
||||||
|
this.inBackground = true;
|
||||||
|
console.log('[WakeWord] App im Hintergrund — Detections gesperrt');
|
||||||
|
}
|
||||||
|
|
||||||
|
/** App im Vordergrund: Detections wieder freigeben, plus 3s Cooldown
|
||||||
|
* als Schutz gegen den AudioFocus-/AudioTrack-Spike der direkt nach
|
||||||
|
* dem Resume kommt. Ersetzt das alte setResumeCooldown(3000)-Pattern. */
|
||||||
|
setForeground(): void {
|
||||||
|
this.inBackground = false;
|
||||||
|
this.cooldownUntilMs = Date.now() + 3000;
|
||||||
|
console.log('[WakeWord] App im Vordergrund — Cooldown 3s aktiv');
|
||||||
|
}
|
||||||
|
|
||||||
/** Wake-Word getriggert: Native-Modul pausieren, Konversation starten. */
|
/** Wake-Word getriggert: Native-Modul pausieren, Konversation starten. */
|
||||||
private async onWakeDetected(): Promise<void> {
|
private async onWakeDetected(): Promise<void> {
|
||||||
|
if (this.inBackground) {
|
||||||
|
console.log('[WakeWord] Trigger ignoriert (App im Hintergrund)');
|
||||||
|
import('./logger').then(m => m.reportAppDebug('wake.detect', 'ignored: app in background')).catch(()=>{});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Re-Entry-Guard: blocken wenn ein Detection-Zyklus schon laeuft.
|
||||||
|
// Ausnahme: Barge-In waehrend ARIA-TTS ist ein legitimer neuer Trigger.
|
||||||
|
if (this.detectionInProgress && !this.bargeListening) {
|
||||||
|
console.log('[WakeWord] Trigger ignoriert (Detection-Zyklus laeuft schon — Native-Doppel-Event-Race)');
|
||||||
|
import('./logger').then(m => m.reportAppDebug('wake.detect', 'ignored: detectionInProgress')).catch(()=>{});
|
||||||
|
return;
|
||||||
|
}
|
||||||
const now = Date.now();
|
const now = Date.now();
|
||||||
if (now < this.cooldownUntilMs) {
|
if (now < this.cooldownUntilMs) {
|
||||||
const left = this.cooldownUntilMs - now;
|
const left = this.cooldownUntilMs - now;
|
||||||
console.log('[WakeWord] Trigger ignoriert (Cooldown noch %dms aktiv — wahrscheinlich App-Resume-Spike)', left);
|
console.log('[WakeWord] Trigger ignoriert (Cooldown noch %dms aktiv — wahrscheinlich App-Resume-Spike)', left);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
this.detectionInProgress = true;
|
||||||
console.log('[WakeWord] Wake-Word "%s" erkannt! (state=%s, barge=%s)',
|
console.log('[WakeWord] Wake-Word "%s" erkannt! (state=%s, barge=%s)',
|
||||||
this.keyword, this.state, this.bargeListening);
|
this.keyword, this.state, this.bargeListening);
|
||||||
import('./logger').then(m => m.reportAppDebug('wake.detect',
|
import('./logger').then(m => m.reportAppDebug('wake.detect',
|
||||||
@@ -503,7 +545,12 @@ class WakeWordService {
|
|||||||
|
|
||||||
private setState(state: WakeWordState): void {
|
private setState(state: WakeWordState): void {
|
||||||
if (this.state !== state) {
|
if (this.state !== state) {
|
||||||
|
const wasConversing = this.state === 'conversing';
|
||||||
this.state = state;
|
this.state = state;
|
||||||
|
// Re-Entry-Guard freigeben sobald wir 'conversing' verlassen — Zyklus ist durch
|
||||||
|
if (wasConversing && state !== 'conversing') {
|
||||||
|
this.detectionInProgress = false;
|
||||||
|
}
|
||||||
this.stateCallbacks.forEach(cb => cb(state));
|
this.stateCallbacks.forEach(cb => cb(state));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -782,6 +782,63 @@ META_TOOLS = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# ── Spotify Fast-Path ──────────────────────────────────────────────────
|
||||||
|
#
|
||||||
|
# Einfache Media-Commands (nächster Track, Pause, lauter, ...) gehen
|
||||||
|
# direkt aufs spotify-Skill statt durch die volle Claude-Reasoning-Pipeline.
|
||||||
|
# Latenz: ~1-1.5s statt 5-10s. Stefan-Bug 06/2026: "ARIA braucht ewig nur
|
||||||
|
# fuer 'nächster Track'". Wenn ein Pattern nicht matcht, faellt der Call
|
||||||
|
# wie bisher in die normale chat()-Loop und Claude entscheidet — keine
|
||||||
|
# Funktionalitaet geht verloren.
|
||||||
|
#
|
||||||
|
# Patterns sind anchored (^...$) gegen normalisierten Text (lowercase,
|
||||||
|
# Endsatzzeichen weg, Whitespace gestrafft). Bewusst eng gefasst: lieber
|
||||||
|
# einmal in Claude fallen als ein Kontextsatz wie "ich war kurz zurueck"
|
||||||
|
# faelschlich als "previous track" interpretieren.
|
||||||
|
_SPOTIFY_FAST_PATTERNS: list[tuple[str, str, str, Optional[int]]] = [
|
||||||
|
# (regex, action, http-method, volume-delta)
|
||||||
|
# NEXT
|
||||||
|
(r"^(naechster|nächster|naechste|nächste) (track|song|titel|lied)$", "next", "POST", None),
|
||||||
|
(r"^(weiter|skip|ueberspringen|überspringen|ueberspring|überspring)$", "next", "POST", None),
|
||||||
|
# PREVIOUS
|
||||||
|
(r"^(vorheriger|vorheriges|letzter|letztes) (track|song|titel|lied)$", "previous", "POST", None),
|
||||||
|
(r"^(zurueck|zurück)$", "previous", "POST", None),
|
||||||
|
# PAUSE
|
||||||
|
(r"^(pause|pausiere|pausieren|stop|stopp|halt)$", "pause", "PUT", None),
|
||||||
|
(r"^(musik|spotify) (pause|aus|stop|stopp)$", "pause", "PUT", None),
|
||||||
|
# PLAY / RESUME
|
||||||
|
(r"^(play|weiterspielen|weiter spielen|fortsetzen|abspielen)$", "play", "PUT", None),
|
||||||
|
(r"^(musik|spotify) (an|wieder an|weiter|fortsetzen)$", "play", "PUT", None),
|
||||||
|
# VOLUME — Delta wird auf den aktuell ermittelten Volume-Wert aufaddiert
|
||||||
|
(r"^(lauter|musik lauter|spotify lauter|volume hoch|lautstärke hoch)$", "volume", "PUT", 10),
|
||||||
|
(r"^(leiser|musik leiser|spotify leiser|volume runter|lautstärke runter)$", "volume", "PUT", -10),
|
||||||
|
(r"^(viel lauter|deutlich lauter)$", "volume", "PUT", 20),
|
||||||
|
(r"^(viel leiser|deutlich leiser)$", "volume", "PUT", -20),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _spotify_fast_match(text: str) -> Optional[tuple[str, str, Optional[int]]]:
|
||||||
|
"""Returns (action, method, volume_delta) wenn ein Pattern matcht — sonst None."""
|
||||||
|
norm = (text or "").strip().lower()
|
||||||
|
norm = re.sub(r"[.!?]+$", "", norm)
|
||||||
|
norm = re.sub(r"\s+", " ", norm)
|
||||||
|
if not norm:
|
||||||
|
return None
|
||||||
|
for rx, action, method, delta in _SPOTIFY_FAST_PATTERNS:
|
||||||
|
if re.match(rx, norm):
|
||||||
|
return action, method, delta
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _run_spotify_call(path: str, method: str, body: Optional[dict] = None) -> dict:
|
||||||
|
"""Fuehrt einen Spotify-Skill-Call aus. Skill-Args: path, method, body (JSON-String).
|
||||||
|
Returns das run_skill-Ergebnis."""
|
||||||
|
args: dict = {"path": path, "method": method}
|
||||||
|
if body is not None:
|
||||||
|
args["body"] = json.dumps(body)
|
||||||
|
return skills_mod.run_skill("spotify", args, timeout_sec=15)
|
||||||
|
|
||||||
|
|
||||||
def _skill_to_tool(s: dict) -> dict:
|
def _skill_to_tool(s: dict) -> dict:
|
||||||
"""Mappt einen Skill auf ein OpenAI-Function-Tool."""
|
"""Mappt einen Skill auf ein OpenAI-Function-Tool."""
|
||||||
args = s.get("args") or []
|
args = s.get("args") or []
|
||||||
@@ -849,6 +906,73 @@ class Agent:
|
|||||||
self._pending_events = []
|
self._pending_events = []
|
||||||
return events
|
return events
|
||||||
|
|
||||||
|
def _try_spotify_fast_path(self, user_message: str) -> Optional[str]:
|
||||||
|
"""Wenn die Nachricht ein einfacher Media-Command ist, direkt aufs
|
||||||
|
spotify-Skill routen und ein kurzes Reply zurueckgeben — Claude wird
|
||||||
|
komplett uebersprungen. Returnt None wenn kein Pattern matcht oder das
|
||||||
|
spotify-Skill nicht installiert ist (dann faellt's normal in Claude)."""
|
||||||
|
m = _spotify_fast_match(user_message)
|
||||||
|
if m is None:
|
||||||
|
return None
|
||||||
|
action, method, delta = m
|
||||||
|
|
||||||
|
# Skill muss installiert + aktiv sein. Sonst Fall-Through zu Claude.
|
||||||
|
try:
|
||||||
|
manifest = skills_mod.read_manifest("spotify")
|
||||||
|
except Exception:
|
||||||
|
manifest = None
|
||||||
|
if not manifest or not manifest.get("active", True):
|
||||||
|
logger.info("[spotify-fast] skill nicht verfuegbar — fall through zu Claude")
|
||||||
|
return None
|
||||||
|
|
||||||
|
logger.info("[spotify-fast] match action=%s method=%s delta=%s msg=%r",
|
||||||
|
action, method, delta, user_message[:60])
|
||||||
|
|
||||||
|
def _err_reply(label: str, res: dict) -> str:
|
||||||
|
# ok=False kommt von 401 (nicht eingeloggt), 404 (kein aktives
|
||||||
|
# Gerät) etc. — Skill schreibt den Spotify-Error nach stderr.
|
||||||
|
tail = (res.get("stderr") or res.get("stdout") or "").strip().splitlines()
|
||||||
|
hint = (tail[-1] if tail else "")[:120]
|
||||||
|
return f"Spotify: {label} fehlgeschlagen — {hint or 'siehe Brain-Log'}"
|
||||||
|
|
||||||
|
try:
|
||||||
|
if action == "next":
|
||||||
|
res = _run_spotify_call("/v1/me/player/next", method)
|
||||||
|
return "Spotify: nächster Track ⏭" if res.get("ok") else _err_reply("Skip", res)
|
||||||
|
if action == "previous":
|
||||||
|
res = _run_spotify_call("/v1/me/player/previous", method)
|
||||||
|
return "Spotify: vorheriger Track ⏮" if res.get("ok") else _err_reply("Zurück", res)
|
||||||
|
if action == "pause":
|
||||||
|
res = _run_spotify_call("/v1/me/player/pause", method)
|
||||||
|
return "Spotify: pausiert ⏸" if res.get("ok") else _err_reply("Pause", res)
|
||||||
|
if action == "play":
|
||||||
|
res = _run_spotify_call("/v1/me/player/play", method)
|
||||||
|
return "Spotify: spielt ▶" if res.get("ok") else _err_reply("Play", res)
|
||||||
|
if action == "volume" and delta is not None:
|
||||||
|
state = _run_spotify_call("/v1/me/player", "GET")
|
||||||
|
if not state.get("ok"):
|
||||||
|
return _err_reply("Lautstärke-Status", state)
|
||||||
|
cur_vol = 50
|
||||||
|
try:
|
||||||
|
out = (state.get("stdout") or "").strip()
|
||||||
|
if out:
|
||||||
|
data = json.loads(out)
|
||||||
|
dev = data.get("device") or {}
|
||||||
|
cur_vol = int(dev.get("volume_percent", 50))
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("[spotify-fast] volume-state parse: %s", exc)
|
||||||
|
new_vol = max(0, min(100, cur_vol + delta))
|
||||||
|
res = _run_spotify_call(f"/v1/me/player/volume?volume_percent={new_vol}", "PUT")
|
||||||
|
if not res.get("ok"):
|
||||||
|
return _err_reply("Lautstärke", res)
|
||||||
|
arrow = "🔊" if delta > 0 else "🔉"
|
||||||
|
return f"Spotify: Lautstärke {new_vol}% {arrow}"
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("[spotify-fast] action=%s exception — fall through zu Claude: %s",
|
||||||
|
action, exc)
|
||||||
|
return None
|
||||||
|
return None
|
||||||
|
|
||||||
# ── Hauptpfad: ein User-Turn → Tool-Loop → finaler Reply ──
|
# ── Hauptpfad: ein User-Turn → Tool-Loop → finaler Reply ──
|
||||||
|
|
||||||
MAX_TOOL_ITERATIONS = 8 # Schutz vor Endlos-Loops
|
MAX_TOOL_ITERATIONS = 8 # Schutz vor Endlos-Loops
|
||||||
@@ -861,6 +985,14 @@ class Agent:
|
|||||||
# Events vom letzten Turn weglassen
|
# Events vom letzten Turn weglassen
|
||||||
self._pending_events = []
|
self._pending_events = []
|
||||||
|
|
||||||
|
# Spotify Fast-Path: einfache Media-Commands ueberspringen Claude komplett.
|
||||||
|
# Spart 4-9s Latenz fuer 'naechster Track', 'Pause', 'lauter' etc.
|
||||||
|
fast_reply = self._try_spotify_fast_path(user_message)
|
||||||
|
if fast_reply is not None:
|
||||||
|
self.conversation.add("user", user_message, source=source)
|
||||||
|
self.conversation.add("assistant", fast_reply)
|
||||||
|
return fast_reply
|
||||||
|
|
||||||
# 1. User-Turn an die Konversation
|
# 1. User-Turn an die Konversation
|
||||||
self.conversation.add("user", user_message, source=source)
|
self.conversation.add("user", user_message, source=source)
|
||||||
|
|
||||||
|
|||||||
@@ -1606,11 +1606,12 @@ class ARIABridge:
|
|||||||
try:
|
try:
|
||||||
url = f"{current_url}?token={self.rvs_token}"
|
url = f"{current_url}?token={self.rvs_token}"
|
||||||
logger.info("[rvs] Verbinde: %s", current_url)
|
logger.info("[rvs] Verbinde: %s", current_url)
|
||||||
# max_size=100MB synchron zum RVS-Server (siehe rvs/server.js).
|
# max_size=1500MB synchron zum RVS-Server (siehe rvs/server.js).
|
||||||
# File-Re-Download fuer Anhaenge braucht Platz fuer base64-
|
# File-Re-Download fuer Anhaenge braucht Platz fuer base64-
|
||||||
# inflate (~1.33×). Groessere Files lehnt der file_request-
|
# inflate (~1.33×) — 1 GB binaer ≈ 1.34 GB base64, plus Margin.
|
||||||
# Handler proaktiv ab bevor's zur 1009-Disconnection kommt.
|
# Groessere Files lehnt der file_request-Handler proaktiv ab
|
||||||
async with websockets.connect(url, max_size=100 * 1024 * 1024) as ws:
|
# bevor's zur 1009-Disconnection kommt.
|
||||||
|
async with websockets.connect(url, max_size=1500 * 1024 * 1024) as ws:
|
||||||
self.ws_rvs = ws
|
self.ws_rvs = ws
|
||||||
retry_delay = 2
|
retry_delay = 2
|
||||||
logger.info("[rvs] Verbunden — warte auf App-Nachrichten")
|
logger.info("[rvs] Verbunden — warte auf App-Nachrichten")
|
||||||
@@ -2594,7 +2595,7 @@ class ARIABridge:
|
|||||||
# Code 1009 (message too big) — RVS-Server droppt, Bridge crasht
|
# Code 1009 (message too big) — RVS-Server droppt, Bridge crasht
|
||||||
# im cleanup (websockets-Lib-Bug). Limit deckt typische Videos
|
# im cleanup (websockets-Lib-Bug). Limit deckt typische Videos
|
||||||
# und Bilder ab; alles drueber soll der User per SSH abholen.
|
# und Bilder ab; alles drueber soll der User per SSH abholen.
|
||||||
FILE_MAX_BYTES = 70 * 1024 * 1024
|
FILE_MAX_BYTES = 1024 * 1024 * 1024 # 1 GB binaer
|
||||||
try:
|
try:
|
||||||
file_size = os.path.getsize(server_path)
|
file_size = os.path.getsize(server_path)
|
||||||
except OSError as exc:
|
except OSError as exc:
|
||||||
|
|||||||
@@ -4038,6 +4038,7 @@
|
|||||||
<div style="color:#E0E0F0;font-size:12px;white-space:nowrap;overflow:hidden;text-overflow:ellipsis;">${badge}<strong>${escapeHtml(f.name)}</strong></div>
|
<div style="color:#E0E0F0;font-size:12px;white-space:nowrap;overflow:hidden;text-overflow:ellipsis;">${badge}<strong>${escapeHtml(f.name)}</strong></div>
|
||||||
<div style="color:#555570;font-size:10px;">${fmtSize(f.size)} · ${fmtDate(f.mtime)}</div>
|
<div style="color:#555570;font-size:10px;">${fmtSize(f.size)} · ${fmtDate(f.mtime)}</div>
|
||||||
</div>
|
</div>
|
||||||
|
<button class="btn secondary" onclick="openFileInline('${encodeURIComponent(f.path)}')" style="padding:2px 8px;font-size:10px;" title="Öffnen">👁</button>
|
||||||
<button class="btn secondary" onclick="downloadFile('${encodeURIComponent(f.path)}')" style="padding:2px 8px;font-size:10px;" title="Herunterladen">⬇</button>
|
<button class="btn secondary" onclick="downloadFile('${encodeURIComponent(f.path)}')" style="padding:2px 8px;font-size:10px;" title="Herunterladen">⬇</button>
|
||||||
<button class="btn secondary" onclick="showVersions('${escapeHtml(f.name)}')" style="padding:2px 8px;font-size:10px;" title="Versionen">🕒</button>
|
<button class="btn secondary" onclick="showVersions('${escapeHtml(f.name)}')" style="padding:2px 8px;font-size:10px;" title="Versionen">🕒</button>
|
||||||
<button class="btn secondary" onclick="deleteFile('${pathEsc}','${escapeHtml(f.name)}')" style="padding:2px 8px;font-size:10px;color:#FF6B6B;border-color:#FF6B6B;" title="Loeschen">🗑</button>
|
<button class="btn secondary" onclick="deleteFile('${pathEsc}','${escapeHtml(f.name)}')" style="padding:2px 8px;font-size:10px;color:#FF6B6B;border-color:#FF6B6B;" title="Loeschen">🗑</button>
|
||||||
@@ -4174,6 +4175,12 @@
|
|||||||
window.location.href = '/api/files-download?path=' + encPath;
|
window.location.href = '/api/files-download?path=' + encPath;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function openFileInline(encPath) {
|
||||||
|
// Inline-View — Browser zeigt PDF / Bild / Text im neuen Tab,
|
||||||
|
// bei unbekanntem MIME landet's als Download-Fallback.
|
||||||
|
window.open('/api/files-view?path=' + encPath, '_blank', 'noopener');
|
||||||
|
}
|
||||||
|
|
||||||
async function deleteFile(p, name) {
|
async function deleteFile(p, name) {
|
||||||
if (!confirm(`Datei "${name}" wirklich löschen?\n\nIn allen Chat-Bubbles wird sie als gelöscht markiert.`)) return;
|
if (!confirm(`Datei "${name}" wirklich löschen?\n\nIn allen Chat-Bubbles wird sie als gelöscht markiert.`)) return;
|
||||||
try {
|
try {
|
||||||
|
|||||||
Binary file not shown.
@@ -26,6 +26,9 @@ services:
|
|||||||
- ./updates:/updates # APK-Dateien fuer Auto-Update
|
- ./updates:/updates # APK-Dateien fuer Auto-Update
|
||||||
environment:
|
environment:
|
||||||
- MAX_SESSIONS=10
|
- MAX_SESSIONS=10
|
||||||
|
# 4 GB V8-Heap — sonst OOM beim Empfang von 1 GB-Files
|
||||||
|
# (base64 inflated ~1.34 GB plus WS-Frame-Margin).
|
||||||
|
- NODE_OPTIONS=--max-old-space-size=4096
|
||||||
networks:
|
networks:
|
||||||
- aria-rvs-net
|
- aria-rvs-net
|
||||||
|
|
||||||
|
|||||||
+10
-5
@@ -93,15 +93,20 @@ function cleanupRooms() {
|
|||||||
// als WS-Message `oauth_callback` und antwortet dem Browser mit einer
|
// als WS-Message `oauth_callback` und antwortet dem Browser mit einer
|
||||||
// schoenen "Tab schliessen"-Seite.
|
// schoenen "Tab schliessen"-Seite.
|
||||||
//
|
//
|
||||||
// maxPayload 100MB: TTS-Streaming + Voice-Upload (WAV als base64) +
|
// maxPayload 1500MB: TTS-Streaming + Voice-Upload (WAV als base64) +
|
||||||
// audio_pcm Chunks koennen die ws-Library Default 1MB ueberschreiten.
|
// audio_pcm Chunks koennen die ws-Library Default 1MB ueberschreiten.
|
||||||
// Plus: file_request/file_response fuer Re-Download von Anhaengen.
|
// Plus: file_request/file_response fuer Re-Download von Anhaengen.
|
||||||
// 40 MB MP4 → ~53 MB base64 → vorher mit 50 MB Limit zerschossen
|
// 40 MB MP4 → ~53 MB base64 → vorher mit 50 MB Limit zerschossen
|
||||||
// (Code 1009 message too big, Bridge crashed im cleanup). 100 MB
|
// (Code 1009 message too big, Bridge crashed im cleanup). 1500 MB
|
||||||
// deckt bis ~70 MB binaer ab; groessere Files werden Bridge-seitig
|
// deckt bis ~1 GB binaer ab (mit base64 ~33% Overhead + WS-Frame-
|
||||||
// abgewiesen (siehe file_request-Handler) bevor die WS abreisst.
|
// Margin); groessere Files werden Bridge-seitig abgewiesen (siehe
|
||||||
|
// file_request-Handler) bevor die WS abreisst.
|
||||||
|
//
|
||||||
|
// WICHTIG: Node-Default-Heap ist ~1.5 GB. Fuer 1 GB-Files muss der
|
||||||
|
// Container mit --max-old-space-size=4096 (oder NODE_OPTIONS env var)
|
||||||
|
// gestartet werden, sonst OOM-Crash beim Empfang.
|
||||||
const httpServer = http.createServer(handleHttpRequest);
|
const httpServer = http.createServer(handleHttpRequest);
|
||||||
const wss = new WebSocketServer({ noServer: true, maxPayload: 100 * 1024 * 1024 });
|
const wss = new WebSocketServer({ noServer: true, maxPayload: 1500 * 1024 * 1024 });
|
||||||
|
|
||||||
// HTTP-Upgrade-Pfad → an WebSocket-Server reichen
|
// HTTP-Upgrade-Pfad → an WebSocket-Server reichen
|
||||||
httpServer.on("upgrade", (req, socket, head) => {
|
httpServer.on("upgrade", (req, socket, head) => {
|
||||||
|
|||||||
+76
-1
@@ -109,7 +109,27 @@ class WhisperRunner:
|
|||||||
segments, info = self.model.transcribe(
|
segments, info = self.model.transcribe(
|
||||||
audio, language=language, beam_size=beam_size, vad_filter=vad_filter,
|
audio, language=language, beam_size=beam_size, vad_filter=vad_filter,
|
||||||
)
|
)
|
||||||
text = " ".join(seg.text.strip() for seg in segments)
|
# Per-segment no_speech_prob auswerten: faster-whisper liefert das
|
||||||
|
# mit. Bei Stille/Rauschen halluziniert Whisper bekannte YouTube-
|
||||||
|
# Untertitel-Patterns ("Untertitelung des ZDF", "Vielen Dank fuer's
|
||||||
|
# Zuschauen", ...). Segmente mit hohem no_speech_prob filtern wir
|
||||||
|
# raus. Plus: bekannte Hallucination-Patterns explizit blacklisten.
|
||||||
|
kept = []
|
||||||
|
for seg in segments:
|
||||||
|
# no_speech_prob: 1.0 = sicher Stille; 0.0 = sicher Sprache.
|
||||||
|
# Threshold 0.6 ist nicht zu strikt (echte leise Sprache geht
|
||||||
|
# noch durch) und nicht zu locker (Halluzinationen werden
|
||||||
|
# zuverlaessig erwischt).
|
||||||
|
nsp = getattr(seg, "no_speech_prob", 0.0)
|
||||||
|
if nsp is not None and nsp >= 0.6:
|
||||||
|
continue
|
||||||
|
stext = (seg.text or "").strip()
|
||||||
|
if not stext:
|
||||||
|
continue
|
||||||
|
if _is_known_hallucination(stext):
|
||||||
|
continue
|
||||||
|
kept.append(stext)
|
||||||
|
text = " ".join(kept)
|
||||||
return text, info.duration
|
return text, info.duration
|
||||||
|
|
||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
@@ -117,6 +137,61 @@ class WhisperRunner:
|
|||||||
return await loop.run_in_executor(None, _run)
|
return await loop.run_in_executor(None, _run)
|
||||||
|
|
||||||
|
|
||||||
|
# Bekannte Whisper-Halluzinations-Patterns. Tritt typisch bei Stille oder
|
||||||
|
# Rauschen auf — Whispers Trainings-Corpus enthaelt Stunden von YouTube-
|
||||||
|
# Videos mit diesen Untertitel-Outros. Substring-Match (case-insensitive)
|
||||||
|
# ueber gestrippten Text. Wenn ein Segment EXAKT (nach Normalisierung) so
|
||||||
|
# aussieht, ist's mit ~99% Sicherheit eine Halluzination.
|
||||||
|
_HALLUCINATION_PHRASES = (
|
||||||
|
"untertitelung des zdf",
|
||||||
|
"untertitel im auftrag des zdf",
|
||||||
|
"untertitelung im auftrag des zdf",
|
||||||
|
"untertitel der amara.org community",
|
||||||
|
"untertitel von stephanie geiges",
|
||||||
|
"amara.org",
|
||||||
|
"untertitel: kerstin grass",
|
||||||
|
"vielen dank fuers zuschauen",
|
||||||
|
"vielen dank fürs zuschauen",
|
||||||
|
"vielen dank für's zuschauen",
|
||||||
|
"vielen dank fuer's zuschauen",
|
||||||
|
"vielen dank für das zuschauen",
|
||||||
|
"vielen dank fuer das zuschauen",
|
||||||
|
"danke für's zuschauen",
|
||||||
|
"danke fürs zuschauen",
|
||||||
|
"danke fuers zuschauen",
|
||||||
|
"subs by",
|
||||||
|
"subtitle by",
|
||||||
|
"subtitles by",
|
||||||
|
"thanks for watching",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_for_hallu(text: str) -> str:
|
||||||
|
"""Lowercase + trailing-Satzzeichen/Whitespace strippen. Jahreszahlen
|
||||||
|
(4 Ziffern am Ende) auch entfernen — 'Untertitelung des ZDF, 2020'
|
||||||
|
matcht damit auf 'untertitelung des zdf'."""
|
||||||
|
t = text.lower().strip()
|
||||||
|
# Entferne trailing punctuation incl. comma+digits
|
||||||
|
while t and t[-1] in ".,!? \t\n":
|
||||||
|
t = t[:-1]
|
||||||
|
# 4-stellige Jahreszahl am Ende
|
||||||
|
import re
|
||||||
|
t = re.sub(r"[,\s]+\d{4}$", "", t).strip()
|
||||||
|
while t and t[-1] in ".,!? \t\n":
|
||||||
|
t = t[:-1]
|
||||||
|
return t
|
||||||
|
|
||||||
|
|
||||||
|
def _is_known_hallucination(text: str) -> bool:
|
||||||
|
norm = _normalize_for_hallu(text)
|
||||||
|
if not norm:
|
||||||
|
return True
|
||||||
|
for pat in _HALLUCINATION_PHRASES:
|
||||||
|
if pat in norm:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def ffmpeg_to_float32(audio_b64: str, mime_type: str) -> np.ndarray:
|
def ffmpeg_to_float32(audio_b64: str, mime_type: str) -> np.ndarray:
|
||||||
"""Dekodiert beliebiges Audio-Format → 16kHz mono float32 PCM."""
|
"""Dekodiert beliebiges Audio-Format → 16kHz mono float32 PCM."""
|
||||||
if "mp4" in mime_type or "m4a" in mime_type or "aac" in mime_type:
|
if "mp4" in mime_type or "m4a" in mime_type or "aac" in mime_type:
|
||||||
|
|||||||
Reference in New Issue
Block a user