diff --git a/android/src/components/MessageText.tsx b/android/src/components/MessageText.tsx new file mode 100644 index 0000000..967f6be --- /dev/null +++ b/android/src/components/MessageText.tsx @@ -0,0 +1,90 @@ +/** + * MessageText — rendert Chat-Text mit Auto-Linkifizierung: + * - http(s)://... → tippbar, oeffnet im Browser + * - mailto: oder plain E-Mail → tippbar, oeffnet Mail-App + * - Telefonnummern → tippbar, oeffnet Android-Dialer + * + * Text ist durchgaengig markierbar/kopierbar (selectable). + */ + +import React from 'react'; +import { Text, Linking, TextStyle, StyleProp } from 'react-native'; + +// Regex kombiniert URL | Email | Telefonnummer. +// Gruppenreihenfolge ist wichtig fuer die Erkennung unten. +// +// URL: http://... oder https://... bis zum ersten Whitespace / Anfuehrungszeichen. +// Email: simpler Standard-Match (kein RFC-kompatibel aber gut genug). +// Telefon: internationale Form (+49..., 0049..., 0176...), darf Leerzeichen +// / Bindestriche / Schraegstriche / Klammern enthalten, mindestens 7 +// Ziffern insgesamt. Vermeidet banale Zahlen (Uhrzeiten, Datum). +const LINK_REGEX = new RegExp( + '(https?:\\/\\/[^\\s<>"]+)' + // 1: URL + '|([A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,})' + // 2: Email + '|((?:\\+|00)\\d[\\d\\s()\\-\\/]{6,}\\d|0\\d{2,4}[\\s\\/\\-]?[\\d\\s\\-\\/]{5,}\\d)', // 3: Telefon + 'g', +); + +const LINK_STYLE = { color: '#0096FF', textDecorationLine: 'underline' } as TextStyle; + +interface Segment { + text: string; + kind: 'text' | 'url' | 'email' | 'phone'; +} + +function tokenize(raw: string): Segment[] { + const out: Segment[] = []; + let lastEnd = 0; + LINK_REGEX.lastIndex = 0; + let m: RegExpExecArray | null; + while ((m = LINK_REGEX.exec(raw)) !== null) { + if (m.index > lastEnd) { + out.push({ text: raw.slice(lastEnd, m.index), kind: 'text' }); + } + if (m[1]) out.push({ text: m[1], kind: 'url' }); + else if (m[2]) out.push({ text: m[2], kind: 'email' }); + else if (m[3]) out.push({ text: m[3], kind: 'phone' }); + lastEnd = LINK_REGEX.lastIndex; + } + if (lastEnd < raw.length) out.push({ text: raw.slice(lastEnd), kind: 'text' }); + return out; +} + +function onPress(seg: Segment) { + try { + if (seg.kind === 'url') { + Linking.openURL(seg.text); + } else if (seg.kind === 'email') { + Linking.openURL(`mailto:${seg.text}`); + } else if (seg.kind === 'phone') { + // Android-Dialer erwartet tel:-Schema ohne Leerzeichen/Bindestriche + const clean = seg.text.replace(/[\s\-\/()]/g, ''); + Linking.openURL(`tel:${clean}`); + } + } catch {} +} + +interface Props { + text: string; + style?: StyleProp; +} + +const MessageText: React.FC = ({ text, style }) => { + const segments = React.useMemo(() => tokenize(text), [text]); + return ( + + {segments.map((seg, i) => { + if (seg.kind === 'text') { + return {seg.text}; + } + return ( + onPress(seg)}> + {seg.text} + + ); + })} + + ); +}; + +export default MessageText; diff --git a/android/src/screens/ChatScreen.tsx b/android/src/screens/ChatScreen.tsx index 8f156db..06698cb 100644 --- a/android/src/screens/ChatScreen.tsx +++ b/android/src/screens/ChatScreen.tsx @@ -29,7 +29,8 @@ import updateService from '../services/updater'; import VoiceButton from '../components/VoiceButton'; import FileUpload, { FileData } from '../components/FileUpload'; import CameraUpload, { PhotoData } from '../components/CameraUpload'; -import { RecordingResult, loadConvWindowMs } from '../services/audio'; +import MessageText from '../components/MessageText'; +import { RecordingResult, loadConvWindowMs, loadTtsSpeed, TTS_SPEED_DEFAULT } from '../services/audio'; import Geolocation from '@react-native-community/geolocation'; // --- Typen --- @@ -116,6 +117,13 @@ const ChatScreen: React.FC = () => { const [ttsMuted, setTtsMuted] = useState(false); // Gerätelokale XTTS-Voice-Wahl (bevorzugt gegenueber dem globalen Default) const localXttsVoiceRef = useRef(''); + // Geraetelokale TTS-Wiedergabegeschwindigkeit (speed-Param an F5-TTS) + const ttsSpeedRef = useRef(TTS_SPEED_DEFAULT); + // Spiegelung der TTS-Settings in einer Ref — damit die onMessage-Closure + // (useEffect mit []-deps) IMMER die aktuellen Werte sieht. Ohne Ref + // bliebe canPlay auf dem Mount-Initial-Wert haengen (mute ignoriert, + // oder AsyncStorage-Load nicht beruecksichtigt). + const ttsCanPlayRef = useRef(true); const flatListRef = useRef(null); const messageIdCounter = useRef(0); @@ -135,6 +143,7 @@ const ChatScreen: React.FC = () => { setTtsMuted(muted === 'true'); // default false const voice = await AsyncStorage.getItem('aria_xtts_voice'); localXttsVoiceRef.current = voice || ''; + ttsSpeedRef.current = await loadTtsSpeed(); }; loadTtsSettings(); // Poll alle 2s um Settings-Aenderung mitzubekommen (einfache Loesung ohne Context) @@ -147,6 +156,12 @@ const ChatScreen: React.FC = () => { wakeWordService.loadFromStorage().catch(() => {}); }, []); + // ttsCanPlayRef live aktuell halten — Closure in onMessage unten liest + // darueber statt direkt ttsDeviceEnabled/ttsMuted (sonst stale). + useEffect(() => { + ttsCanPlayRef.current = ttsDeviceEnabled && !ttsMuted; + }, [ttsDeviceEnabled, ttsMuted]); + const toggleMute = useCallback(() => { setTtsMuted(prev => { const next = !prev; @@ -299,7 +314,8 @@ const ChatScreen: React.FC = () => { } // TTS-Audio abspielen wenn vorhanden — respektiert geraetelokalen Mute/Disable - const canPlay = ttsDeviceEnabled && !ttsMuted; + // WICHTIG: via Ref statt direkt state lesen, sonst ist's stale (Closure-Bug). + const canPlay = ttsCanPlayRef.current; if (message.type === 'audio' && message.payload.base64) { const b64 = message.payload.base64 as string; const refId = (message.payload.messageId as string) || ''; @@ -439,6 +455,7 @@ const ChatScreen: React.FC = () => { durationMs: result.durationMs, mimeType: result.mimeType, voice: localXttsVoiceRef.current, + speed: ttsSpeedRef.current, ...(location && { location }), }); // resume() wird durch onPlaybackFinished nach ARIAs Antwort getriggert. @@ -460,7 +477,12 @@ const ChatScreen: React.FC = () => { // Wake Word Toggle Handler const toggleWakeWord = useCallback(async () => { if (wakeWordActive) { - wakeWordService.stop(); + // Vor Porcupine-Stop: eventuelle laufende Aufnahme abbrechen. Sonst + // bleibt audioService.recordingState=='recording' haengen und der + // normale Aufnahme-Button wirkt nicht mehr (startRecording lehnt + // ab weil "Aufnahme laeuft bereits"). + try { await audioService.stopRecording(); } catch {} + await wakeWordService.stop(); setWakeWordActive(false); } else { const started = await wakeWordService.start(); @@ -550,6 +572,7 @@ const ChatScreen: React.FC = () => { rvs.send('chat', { text, voice: localXttsVoiceRef.current, + speed: ttsSpeedRef.current, ...(location && { location }), }); }, [inputText, getCurrentLocation, pendingAttachments, sendPendingAttachments]); @@ -659,6 +682,7 @@ const ChatScreen: React.FC = () => { rvs.send('chat', { text: messageText, voice: localXttsVoiceRef.current, + speed: ttsSpeedRef.current, ...(location && { location }), }); } @@ -733,9 +757,10 @@ const ChatScreen: React.FC = () => { ))} {/* Text (nicht anzeigen wenn nur "Anhang empfangen" und ein Bild da ist) */} {!(item.text === 'Anhang empfangen' && item.attachments?.some(a => a.type === 'image' && a.uri)) && ( - - {item.text} - + )} {/* Play-Button fuer ARIA-Nachrichten — Cache bevorzugt, sonst Bridge-TTS mit aktueller Engine */} {!isUser && item.text.length > 0 && ( @@ -750,6 +775,7 @@ const ChatScreen: React.FC = () => { rvs.send('tts_request' as any, { text: item.text, voice: localXttsVoiceRef.current, + speed: ttsSpeedRef.current, messageId: item.messageId || '', }); } diff --git a/android/src/screens/SettingsScreen.tsx b/android/src/screens/SettingsScreen.tsx index d5bd7e3..3d91db6 100644 --- a/android/src/screens/SettingsScreen.tsx +++ b/android/src/screens/SettingsScreen.tsx @@ -35,6 +35,10 @@ import { CONV_WINDOW_MIN_SEC, CONV_WINDOW_MAX_SEC, CONV_WINDOW_STORAGE_KEY, + TTS_SPEED_DEFAULT, + TTS_SPEED_MIN, + TTS_SPEED_MAX, + TTS_SPEED_STORAGE_KEY, } from '../services/audio'; import wakeWordService, { BUILTIN_KEYWORDS, @@ -98,6 +102,7 @@ const SettingsScreen: React.FC = () => { const [ttsPrerollSec, setTtsPrerollSec] = useState(TTS_PREROLL_DEFAULT_SEC); const [vadSilenceSec, setVadSilenceSec] = useState(VAD_SILENCE_DEFAULT_SEC); const [convWindowSec, setConvWindowSec] = useState(CONV_WINDOW_DEFAULT_SEC); + const [ttsSpeed, setTtsSpeed] = useState(TTS_SPEED_DEFAULT); const [wakeAccessKey, setWakeAccessKey] = useState(''); const [wakeAccessKeyVisible, setWakeAccessKeyVisible] = useState(false); const [wakeKeyword, setWakeKeyword] = useState(DEFAULT_KEYWORD); @@ -153,6 +158,12 @@ const SettingsScreen: React.FC = () => { } } }); + AsyncStorage.getItem(TTS_SPEED_STORAGE_KEY).then(saved => { + if (saved != null) { + const n = parseFloat(saved); + if (isFinite(n) && n >= TTS_SPEED_MIN && n <= TTS_SPEED_MAX) setTtsSpeed(n); + } + }); AsyncStorage.getItem(WAKE_ACCESS_KEY_STORAGE).then(saved => { if (saved) setWakeAccessKey(saved); }); @@ -800,6 +811,38 @@ const SettingsScreen: React.FC = () => { +0.5 + + Sprechgeschwindigkeit + + Wie schnell ARIA spricht. 1.0 = Normal. Niedriger = langsamer, hoeher = schneller. + Wird an F5-TTS als speed-Param uebergeben und pro Geraet gespeichert. + Default: {TTS_SPEED_DEFAULT.toFixed(1)}x. + + + { + const next = Math.max(TTS_SPEED_MIN, Math.round((ttsSpeed - 0.1) * 10) / 10); + setTtsSpeed(next); + AsyncStorage.setItem(TTS_SPEED_STORAGE_KEY, String(next)); + }} + disabled={ttsSpeed <= TTS_SPEED_MIN} + > + −0.1 + + {ttsSpeed.toFixed(1)} x + { + const next = Math.min(TTS_SPEED_MAX, Math.round((ttsSpeed + 0.1) * 10) / 10); + setTtsSpeed(next); + AsyncStorage.setItem(TTS_SPEED_STORAGE_KEY, String(next)); + }} + disabled={ttsSpeed >= TTS_SPEED_MAX} + > + +0.1 + + )} diff --git a/android/src/services/audio.ts b/android/src/services/audio.ts index 605f530..be07774 100644 --- a/android/src/services/audio.ts +++ b/android/src/services/audio.ts @@ -92,6 +92,24 @@ export const CONV_WINDOW_MIN_SEC = 3.0; export const CONV_WINDOW_MAX_SEC = 20.0; export const CONV_WINDOW_STORAGE_KEY = 'aria_conv_window_sec'; +// TTS-Wiedergabegeschwindigkeit — wird pro Geraet gespeichert und an die +// Bridge mitgegeben (speed-Param im F5-TTS infer()). 1.0 = normal. +export const TTS_SPEED_DEFAULT = 1.0; +export const TTS_SPEED_MIN = 0.5; +export const TTS_SPEED_MAX = 2.0; +export const TTS_SPEED_STORAGE_KEY = 'aria_tts_speed'; + +export async function loadTtsSpeed(): Promise { + try { + const raw = await AsyncStorage.getItem(TTS_SPEED_STORAGE_KEY); + if (raw != null) { + const n = parseFloat(raw); + if (isFinite(n) && n >= TTS_SPEED_MIN && n <= TTS_SPEED_MAX) return n; + } + } catch {} + return TTS_SPEED_DEFAULT; +} + export async function loadConvWindowMs(): Promise { try { const raw = await AsyncStorage.getItem(CONV_WINDOW_STORAGE_KEY); diff --git a/android/src/services/wakeword.ts b/android/src/services/wakeword.ts index 886a1fe..fa9f4d7 100644 --- a/android/src/services/wakeword.ts +++ b/android/src/services/wakeword.ts @@ -90,12 +90,32 @@ class WakeWordService { if (this.initInProgress) return this.initInProgress; this.initInProgress = (async () => { try { - const { PorcupineManager } = require('@picovoice/porcupine-react-native'); - // Built-In Keyword-Identifier sind lower-case strings im SDK + const porcupineRN = require('@picovoice/porcupine-react-native'); + const { PorcupineManager, BuiltInKeywords } = porcupineRN; + // Manche Porcupine-Versionen wollen das BuiltInKeywords-Enum (Objekt + // mit keys wie JARVIS, COMPUTER, HEY_GOOGLE), andere akzeptieren + // den String direkt. Mappen mit Fallback auf String: + const enumKey = this.keyword.toUpperCase().replace(/\s+/g, '_'); + const kw = (BuiltInKeywords && BuiltInKeywords[enumKey]) || this.keyword; + console.log('[WakeWord] Porcupine init: keyword=%s (resolved=%s)', + this.keyword, typeof kw === 'string' ? kw : '[enum]'); this.porcupine = await PorcupineManager.fromBuiltInKeywords( this.accessKey, - [this.keyword], - (_keywordIndex: number) => this.onWakeDetected(), + [kw], + (keywordIndex: number) => { + console.log('[WakeWord] Porcupine callback fired (index=%d)', keywordIndex); + this.onWakeDetected().catch(err => + console.warn('[WakeWord] onWakeDetected crashed:', err)); + }, + // Error handler (wenn Porcupine im Background-Thread crashed, + // z.B. beim Audio-Engine-Konflikt mit audio-recorder-player) + (error: any) => { + console.warn('[WakeWord] Porcupine runtime error:', error?.message || error); + // Nicht in Loop crashen — state zurueck auf off damit der User + // mit dem Aufnahme-Button wieder normal arbeiten kann + this.setState('off'); + this.disposePorcupine().catch(() => {}); + }, ); console.log('[WakeWord] Porcupine init OK (keyword=%s)', this.keyword); return true; diff --git a/bridge/aria_bridge.py b/bridge/aria_bridge.py index 2a3686e..e824668 100644 --- a/bridge/aria_bridge.py +++ b/bridge/aria_bridge.py @@ -541,6 +541,9 @@ class ARIABridge: # Wird fuer die direkt folgende ARIA-Antwort genutzt und dann zurueckgesetzt. # So kann jedes Geraet seine bevorzugte Stimme bekommen (pro Request). self._next_voice_override: Optional[str] = None + # Gleiche Logik fuer die Wiedergabegeschwindigkeit (F5-TTS speed-Param, + # App-Setting aria_tts_speed, 1.0 = normal). + self._next_speed_override: Optional[float] = None # STT-Requests die aktuell auf Antwort von der whisper-bridge (Gamebox) warten. # requestId → Future mit dem Text (oder None bei Fehler). self._pending_stt: dict[str, asyncio.Future] = {} @@ -911,6 +914,12 @@ class ARIABridge: logger.info("[core] Nutze Voice-Override: %s", self._next_voice_override) self._next_voice_override = None + # Speed ebenfalls aus App-Override nehmen (fallback 1.0) + xtts_speed = self._next_speed_override or 1.0 + if self._next_speed_override: + logger.info("[core] Nutze Speed-Override: %.2fx", self._next_speed_override) + self._next_speed_override = None + tts_text = tts_text_preview or text if not tts_text: logger.info("[core] TTS-Text leer nach Cleanup — uebersprungen") @@ -926,6 +935,7 @@ class ARIABridge: "payload": { "text": tts_text, "voice": xtts_voice, + "speed": xtts_speed, "language": "de", "requestId": xtts_request_id, "messageId": message_id, @@ -1163,6 +1173,13 @@ class ARIABridge: if voice_override: self._next_voice_override = voice_override logger.info("[rvs] Voice-Override fuer naechste Antwort: %s", voice_override) + # Speed-Override (TTS-Wiedergabegeschwindigkeit, pro Geraet) + try: + speed = float(payload.get("speed", 0) or 0) + if 0.25 <= speed <= 4.0: + self._next_speed_override = speed + except (TypeError, ValueError): + pass if text: logger.info("[rvs] App-Chat: '%s'", text[:80]) await self.send_to_core(text, source="app") @@ -1215,8 +1232,14 @@ class ARIABridge: if not text: return tts_text = clean_text_for_tts(text) or text - # Voice aus App-Payload gewinnt, sonst global + # Voice + Speed aus App-Payload gewinnen, sonst global/default xtts_voice = payload.get("voice", "") or getattr(self, 'xtts_voice', '') + try: + xtts_speed = float(payload.get("speed", 0) or 0) + if not (0.25 <= xtts_speed <= 4.0): + xtts_speed = 1.0 + except (TypeError, ValueError): + xtts_speed = 1.0 try: xtts_request_id = str(uuid.uuid4()) if message_id: @@ -1226,6 +1249,7 @@ class ARIABridge: "payload": { "text": tts_text, "voice": xtts_voice, + "speed": xtts_speed, "language": "de", "requestId": xtts_request_id, "messageId": message_id, @@ -1424,6 +1448,12 @@ class ARIABridge: if voice_override: self._next_voice_override = voice_override logger.info("[rvs] Voice-Override (via Audio): %s", voice_override) + try: + speed = float(payload.get("speed", 0) or 0) + if 0.25 <= speed <= 4.0: + self._next_speed_override = speed + except (TypeError, ValueError): + pass logger.info("[rvs] Audio empfangen: %s, %dms, %dKB", mime_type, duration_ms, len(audio_b64) // 1365) asyncio.create_task(self._process_app_audio(audio_b64, mime_type)) diff --git a/diagnostic/index.html b/diagnostic/index.html index 4d2d868..85ff06d 100644 --- a/diagnostic/index.html +++ b/diagnostic/index.html @@ -136,6 +136,25 @@ + + +