Ohr-Button Absturz gefixt (LiveAudioStream entfernt, Phase 1 , Play-Button in ARIA-Nachrichten fuer Sprachwiedergabe

- [x] Chat-Suche in der App (Lupe in Statusleiste)
- [x] Watchdog mit Container-Restart (2min Warnung → 5min doctor --fix → 8min Restart),Abbrechen-Button im Diagnostic Chat
- [x] Nachrichten Backup on-the-fly (/shared/config/chat_backup.jsonl)
- [x] Grosse Nachrichten satzweise aufteilen fuer TTS
- [x] RVS Nachrichten vom Smartphone gehen durch
This commit is contained in:
duffyduck 2026-04-01 23:45:25 +02:00
parent 1a32098c9e
commit 81ca3cc7a7
7 changed files with 174 additions and 106 deletions

View File

@ -91,6 +91,8 @@ const ChatScreen: React.FC = () => {
const [gpsEnabled, setGpsEnabled] = useState(false);
const [wakeWordActive, setWakeWordActive] = useState(false);
const [fullscreenImage, setFullscreenImage] = useState<string | null>(null);
const [searchQuery, setSearchQuery] = useState('');
const [searchVisible, setSearchVisible] = useState(false);
const flatListRef = useRef<FlatList>(null);
const messageIdCounter = useRef(0);
@ -581,6 +583,18 @@ const ChatScreen: React.FC = () => {
{item.text}
</Text>
)}
{/* Play-Button fuer ARIA-Nachrichten */}
{!isUser && item.text.length > 0 && (
<TouchableOpacity
style={styles.playButton}
onPress={() => {
// TTS-Request an Bridge senden
rvs.send('tts_request' as any, { text: item.text, voice: '' });
}}
>
<Text style={styles.playButtonText}>{'\uD83D\uDD0A'}</Text>
</TouchableOpacity>
)}
<Text style={styles.timestamp}>{time}</Text>
</View>
);
@ -603,12 +617,32 @@ const ChatScreen: React.FC = () => {
{connectionState === 'connected' ? 'Verbunden' :
connectionState === 'connecting' ? 'Verbinde...' : 'Getrennt'}
</Text>
<TouchableOpacity onPress={() => setSearchVisible(!searchVisible)} style={{marginLeft: 'auto', paddingHorizontal: 8}}>
<Text style={{fontSize: 16}}>{'\uD83D\uDD0D'}</Text>
</TouchableOpacity>
</View>
{/* Suchleiste */}
{searchVisible && (
<View style={styles.searchBar}>
<TextInput
style={styles.searchInput}
value={searchQuery}
onChangeText={setSearchQuery}
placeholder="Chat durchsuchen..."
placeholderTextColor="#555570"
autoFocus
/>
<TouchableOpacity onPress={() => { setSearchVisible(false); setSearchQuery(''); }}>
<Text style={{color: '#FF3B30', fontSize: 14, paddingHorizontal: 8}}>X</Text>
</TouchableOpacity>
</View>
)}
{/* Nachrichtenliste */}
<FlatList
ref={flatListRef}
data={messages}
data={searchQuery ? messages.filter(m => m.text.toLowerCase().includes(searchQuery.toLowerCase())) : messages}
keyExtractor={item => item.id}
renderItem={renderMessage}
contentContainerStyle={styles.messageList}
@ -887,6 +921,30 @@ const styles = StyleSheet.create({
wakeWordIcon: {
fontSize: 16,
},
searchBar: {
flexDirection: 'row',
alignItems: 'center',
backgroundColor: '#12122A',
paddingHorizontal: 12,
paddingVertical: 6,
borderBottomWidth: 1,
borderBottomColor: '#1E1E2E',
},
searchInput: {
flex: 1,
color: '#FFFFFF',
fontSize: 14,
paddingVertical: 4,
},
playButton: {
alignSelf: 'flex-end',
paddingHorizontal: 8,
paddingVertical: 2,
marginTop: 4,
},
playButtonText: {
fontSize: 16,
},
fullscreenOverlay: {
flex: 1,
backgroundColor: 'rgba(0,0,0,0.95)',

View File

@ -1,21 +1,12 @@
/**
* Wake Word Service "ARIA" Erkennung
*
* Nutzt react-native-live-audio-stream fuer kontinuierliches Mikrofon-Monitoring.
* Erkennt Sprache per Energie-Schwellwert und sendet kurze Audio-Clips
* zur serverseitigen Wake-Word-Pruefung (openwakeword in der Bridge).
* Phase 1: Deaktiviert react-native-live-audio-stream hat native Bridge-Probleme.
* Nutzt stattdessen Tap-to-Talk (VoiceButton) als primaeren Eingabemodus.
*
* Architektur:
* App (Mikrofon) Energie-Erkennung Audio-Buffer
* RVS "wake_check" Bridge openwakeword Bestaetigung
* App startet Aufnahme
*
* Aktuell (Phase 1): Einfacher Tap-to-Talk + Auto-Stop.
* Spaeter (Phase 2): Porcupine on-device "ARIA" Keyword.
* Phase 2: Porcupine on-device "ARIA" Keyword (geplant).
*/
import LiveAudioStream from 'react-native-live-audio-stream';
type WakeWordCallback = () => void;
type StateCallback = (state: WakeWordState) => void;
@ -25,47 +16,16 @@ class WakeWordService {
private state: WakeWordState = 'off';
private wakeCallbacks: WakeWordCallback[] = [];
private stateCallbacks: StateCallback[] = [];
private isInitialized = false;
/** Wake Word Erkennung starten */
async start(): Promise<boolean> {
if (this.state === 'listening') return true;
try {
if (!this.isInitialized) {
LiveAudioStream.init({
sampleRate: 16000,
channels: 1,
bitsPerSample: 16,
audioSource: 6, // VOICE_RECOGNITION
bufferSize: 4096,
});
this.isInitialized = true;
}
// Audio-Stream starten und auf Energie pruefen
LiveAudioStream.start();
LiveAudioStream.on('data', (base64Chunk: string) => {
if (this.state !== 'listening') return;
// Base64 → Int16 Array → RMS berechnen
const raw = this._base64ToInt16(base64Chunk);
const rms = this._calculateRMS(raw);
// Schwellwert: wenn laut genug → Wake Word erkannt
// Phase 1: Einfache Energie-Erkennung (jemand spricht)
// Phase 2: Porcupine "ARIA" Keyword
if (rms > 2000) {
this.setState('detected');
this.wakeCallbacks.forEach(cb => cb());
// Nach Detection kurz pausieren, Aufnahme uebernimmt das Mikrofon
this.stop();
}
});
// Phase 1: LiveAudioStream deaktiviert (native Bridge instabil)
// Stattdessen: Tap-to-Talk als primaerer Modus
console.log('[WakeWord] Wake Word ist in Phase 1 noch nicht verfuegbar — nutze Tap-to-Talk');
this.setState('listening');
console.log('[WakeWord] Listening gestartet');
return true;
} catch (err) {
console.error('[WakeWord] Start fehlgeschlagen:', err);
@ -75,22 +35,12 @@ class WakeWordService {
/** Wake Word Erkennung stoppen */
stop(): void {
if (this.state === 'off') return;
try {
LiveAudioStream.stop();
} catch {}
this.setState('off');
console.log('[WakeWord] Gestoppt');
}
/** Nach Aufnahme erneut starten */
async resume(): Promise<void> {
// Kurze Pause damit Aufnahme das Mikrofon freigeben kann
setTimeout(() => {
if (this.state === 'off') {
this.start();
}
}, 500);
// Nichts zu tun in Phase 1
}
// --- Callbacks ---
@ -113,32 +63,12 @@ class WakeWordService {
return this.state;
}
// --- Hilfsfunktionen ---
private setState(state: WakeWordState): void {
if (this.state !== state) {
this.state = state;
this.stateCallbacks.forEach(cb => cb(state));
}
}
private _base64ToInt16(base64: string): Int16Array {
const binary = atob(base64);
const bytes = new Uint8Array(binary.length);
for (let i = 0; i < binary.length; i++) {
bytes[i] = binary.charCodeAt(i);
}
return new Int16Array(bytes.buffer);
}
private _calculateRMS(samples: Int16Array): number {
if (samples.length === 0) return 0;
let sum = 0;
for (let i = 0; i < samples.length; i++) {
sum += samples[i] * samples[i];
}
return Math.sqrt(sum / samples.length);
}
}
const wakeWordService = new WakeWordService();

View File

@ -1014,6 +1014,30 @@ class ARIABridge:
if sender in ("aria", "stt"):
return
elif msg_type == "tts_request":
# App fordert TTS-Audio fuer einen Text an (Play-Button)
text = payload.get("text", "")
requested_voice = payload.get("voice", "")
if text:
voice_name = requested_voice or self.voice_engine.select_voice(text)
audio_data = self.voice_engine.synthesize(text, voice_name)
if audio_data:
audio_b64 = base64.b64encode(audio_data).decode("ascii")
try:
await self._send_to_rvs({
"type": "audio",
"payload": {
"base64": audio_b64,
"mimeType": "audio/wav",
"voice": voice_name,
},
"timestamp": int(asyncio.get_event_loop().time() * 1000),
})
logger.info("[rvs] TTS on-demand: %d bytes (%s)", len(audio_data), voice_name)
except Exception as e:
logger.warning("[rvs] TTS on-demand senden fehlgeschlagen: %s", e)
return
elif msg_type == "config":
# Konfiguration von App/Diagnostic empfangen + persistent speichern
changed = False

View File

@ -201,8 +201,9 @@
<button class="btn secondary" onclick="toggleChatFullscreen()" id="btn-chat-fs" style="padding:4px 10px;font-size:11px;">Vollbild</button>
</div>
<div class="chat-box" id="chat-box"></div>
<div id="thinking-indicator" style="display:none;padding:6px 10px;font-size:12px;color:#FFD60A;background:#1E1E2E;border-radius:0 0 6px 6px;margin-top:-8px;margin-bottom:8px;">
<span style="animation:pulse 1s infinite;">&#x1F4AD;</span> <span id="thinking-text">ARIA denkt...</span>
<div id="thinking-indicator" style="display:none;padding:6px 10px;font-size:12px;color:#FFD60A;background:#1E1E2E;border-radius:0 0 6px 6px;margin-top:-8px;margin-bottom:8px;display:flex;align-items:center;justify-content:space-between;">
<span><span style="animation:pulse 1s infinite;">&#x1F4AD;</span> <span id="thinking-text">ARIA denkt...</span></span>
<button class="btn secondary" onclick="cancelRequest()" style="padding:2px 10px;font-size:11px;color:#FF3B30;border-color:#FF3B30;">Abbrechen</button>
</div>
<div class="input-row">
<input type="text" id="chat-input" placeholder="Nachricht an ARIA...">
@ -1166,6 +1167,13 @@
}, 120000);
}
// ── Abbrechen ──────────────────────────────
function cancelRequest() {
send({ action: 'cancel_request' });
updateThinkingIndicator({ activity: 'idle' });
addChat('error', 'Anfrage abgebrochen', 'system');
}
// ── Stimmen-Config ──────────────────────────
function sendVoiceConfig() {
const defaultVoice = document.getElementById('diag-default-voice').value;

View File

@ -355,6 +355,11 @@ function handleGatewayMessage(msg) {
broadcast({ type: "agent_activity", activity: "idle" });
pendingMessageTime = 0; // Watchdog: Antwort erhalten
updateAgentActivity();
// Antwort in Backup-Log schreiben
try {
const entry = JSON.stringify({ ts: Date.now(), role: "assistant", text: text.slice(0, 2000), session: activeSessionKey }) + "\n";
fs.appendFileSync("/shared/config/chat_backup.jsonl", entry);
} catch {}
return;
}
@ -428,6 +433,12 @@ function sendToGateway(text, isPipeline) {
log("debug", "gateway", `RAW >>> ${payload}`);
gatewayWs.send(payload);
pendingMessageTime = Date.now(); // Watchdog: Nachricht gesendet
// Nachricht sofort in Backup-Log schreiben (OpenClaw speichert erst nach Run-Ende)
try {
fs.mkdirSync("/shared/config", { recursive: true });
const entry = JSON.stringify({ ts: Date.now(), role: "user", text, session: activeSessionKey }) + "\n";
fs.appendFileSync("/shared/config/chat_backup.jsonl", entry);
} catch {}
log("info", "gateway", `chat.send [${reqId}]: "${text}"`);
if (isPipeline) plog(`chat.send [${reqId}] an Gateway gesendet — warte auf ACK...`);
@ -1005,6 +1016,7 @@ function waitForMessage(ws, timeoutMs) {
let lastAgentActivity = Date.now();
let watchdogWarned = false;
let watchdogFixAttempted = false;
let pendingMessageTime = 0; // Wann wurde die letzte Nachricht gesendet
function updateAgentActivity() {
@ -1024,20 +1036,37 @@ setInterval(async () => {
broadcast({ type: "watchdog", status: "warning", waitingMs, message: "ARIA reagiert nicht — moeglicherweise stuck Run" });
}
// Nach 5min: Auto-Fix anbieten
if (waitingMs > 300000 && watchdogWarned) {
// Nach 5min: doctor --fix
if (waitingMs > 300000 && watchdogWarned && !watchdogFixAttempted) {
watchdogFixAttempted = true;
log("error", "server", "Watchdog: 5min ohne Antwort — fuehre openclaw doctor --fix aus");
broadcast({ type: "watchdog", status: "fixing", message: "Auto-Fix: openclaw doctor --fix" });
try {
await dockerExec("aria-core", "openclaw doctor --fix 2>/dev/null || true");
log("info", "server", "Watchdog: doctor --fix ausgefuehrt");
broadcast({ type: "watchdog", status: "fixed", message: "doctor --fix ausgefuehrt — sende Nachricht erneut" });
broadcast({ type: "watchdog", status: "fixed", message: "doctor --fix ausgefuehrt — warte auf Antwort..." });
} catch (err) {
log("error", "server", `Watchdog: doctor --fix fehlgeschlagen: ${err.message}`);
broadcast({ type: "watchdog", status: "error", message: `Auto-Fix fehlgeschlagen: ${err.message}` });
}
pendingMessageTime = 0; // Reset
}
// Nach 8min: Container neustarten
if (waitingMs > 480000 && watchdogFixAttempted) {
log("error", "server", "Watchdog: 8min ohne Antwort — starte aria-core + aria-proxy neu");
broadcast({ type: "watchdog", status: "restarting", message: "Container-Restart: aria-core + aria-proxy" });
try {
const { execSync } = require("child_process");
execSync("docker restart aria-core aria-proxy", { timeout: 60000 });
log("info", "server", "Watchdog: Container neugestartet");
broadcast({ type: "watchdog", status: "restarted", message: "Container neugestartet — warte auf Gateway-Reconnect..." });
// Gateway wird sich automatisch neu verbinden
} catch (err) {
log("error", "server", `Watchdog: Container-Restart fehlgeschlagen: ${err.message}`);
broadcast({ type: "watchdog", status: "error", message: `Restart fehlgeschlagen: ${err.message}` });
}
pendingMessageTime = 0;
watchdogWarned = false;
watchdogFixAttempted = false;
}
}, 30000);
@ -1127,6 +1156,15 @@ wss.on("connection", (ws) => {
if (ws._sshSock) ws._sshSock.write(msg.data);
} else if (msg.action === "live_ssh_close") {
if (ws._sshSock) { ws._sshSock.end(); ws._sshSock = null; }
} else if (msg.action === "cancel_request") {
// Laufende Anfrage abbrechen — doctor --fix beendet stuck runs
log("warn", "server", "Anfrage abgebrochen — fuehre doctor --fix aus");
pendingMessageTime = 0;
watchdogWarned = false;
watchdogFixAttempted = false;
if (pipelineActive) pipelineEnd(false, "Vom Benutzer abgebrochen");
broadcast({ type: "agent_activity", activity: "idle" });
dockerExec("aria-core", "openclaw doctor --fix 2>/dev/null || true").catch(() => {});
} else if (msg.action === "get_voice_config") {
handleGetVoiceConfig(ws);
} else if (msg.action === "send_voice_config") {

View File

@ -1,26 +1,36 @@
# erledigt bildupload ghet noch nicht.
# ende
# erledigt
sprachnachrichten werden nicht als zweite nachricht dargestellt, damit man weiß was man gesendet hat
# ende
# ARIA Issues & Features
## Erledigt
# erledigt cache leeren, bilder werden nicht neu geladen beim antippen.
autoload geht nicht
# ende
- [x] Bildupload funktioniert (Shared Volume /shared/uploads/)
- [x] Sprachnachrichten werden als Text angezeigt (STT → Chat-Bubble)
- [x] Cache leeren + Auto-Download von Anhaengen
- [x] ARIA liest Nachrichten vor (TTS via Piper)
- [x] Autoscroll zur letzten Nachricht
- [x] Bilder im Chat groesser + Vollbild-Vorschau
- [x] Ohr-Button Absturz gefixt (LiveAudioStream entfernt, Phase 1 Placeholder)
- [x] Play-Button in ARIA-Nachrichten fuer Sprachwiedergabe
- [x] Chat-Suche in der App (Lupe in Statusleiste)
- [x] Watchdog mit Container-Restart (2min Warnung → 5min doctor --fix → 8min Restart)
- [x] Abbrechen-Button im Diagnostic Chat
- [x] Nachrichten Backup on-the-fly (/shared/config/chat_backup.jsonl)
- [x] Grosse Nachrichten satzweise aufteilen fuer TTS
- [x] RVS Nachrichten vom Smartphone gehen durch
- [x] Stimmen-Einstellungen (Ramona/Thorsten, Speed pro Stimme)
- [x] Highlight-Trigger konfigurierbar in Diagnostic
wenn man auf das ohr zum hören klickt stürzt ab
## Offen
# erledigt aria liest die nachrichten nicht vor
#ende
### TTS / Stimmen
- [ ] TTS Engine waehlbar: Piper (CPU, schnell) oder Coqui XTTS v2 (GPU, natuerlicher)
- [ ] Piper Voices Download ueber Diagnostic (neue Sprachen/Stimmen)
- [ ] Coqui XTTS v2 Integration (braucht GPU, bessere deutsche Stimme)
# erledigt autoscroll geht doch noch nicht zur letzten nachricht
unserer memory brain
# ende
### App
- [ ] Wake Word on-device (Porcupine "ARIA" Keyword, Phase 2)
- [ ] Chat-History zuverlaessiger laden (AsyncStorage Race Condition)
# erledigt bilder im chat größer darstellen
# ende
die viper voices downloaden über die diagnostic
# ende
### Architektur
- [ ] Bilder: Claude Vision direkt nutzen (aktuell nur Dateipfad an ARIA)
- [ ] Auto-Compacting und Memory/Brain Verwaltung (SQLite?)
- [ ] Diagnostic: System-Info Tab (Container-Status, Disk, RAM, CPU)

View File

@ -9,7 +9,7 @@ const MAX_SESSIONS = parseInt(process.env.MAX_SESSIONS || "10", 10);
// Erlaubte Nachrichtentypen — alles andere wird verworfen
const ALLOWED_TYPES = new Set([
"chat", "audio", "file", "location", "mode", "log", "event", "heartbeat",
"file_request", "file_response", "file_saved", "stt_result", "config",
"file_request", "file_response", "file_saved", "stt_result", "config", "tts_request",
]);
// Token-Raum: token -> { clients: Set<ws> }