Ohr-Button Absturz gefixt (LiveAudioStream entfernt, Phase 1 , Play-Button in ARIA-Nachrichten fuer Sprachwiedergabe

- [x] Chat-Suche in der App (Lupe in Statusleiste) - [x] Watchdog mit Container-Restart (2min Warnung → 5min doctor --fix → 8min Restart),Abbrechen-Button im Diagnostic Chat - [x] Nachrichten Backup on-the-fly (/shared/config/chat_backup.jsonl) - [x] Grosse Nachrichten satzweise aufteilen fuer TTS - [x] RVS Nachrichten vom Smartphone gehen durch
2026-04-01 23:45:25 +02:00 · 2026-04-01 23:45:25 +02:00 · 81ca3cc7a7
parent 1a32098c9e
commit 81ca3cc7a7
7 changed files with 174 additions and 106 deletions
--- a/android/src/screens/ChatScreen.tsx
+++ b/android/src/screens/ChatScreen.tsx
@ -91,6 +91,8 @@ const ChatScreen: React.FC = () => {
  const [gpsEnabled, setGpsEnabled] = useState(false);
  const [wakeWordActive, setWakeWordActive] = useState(false);
  const [fullscreenImage, setFullscreenImage] = useState<string | null>(null);
+  const [searchQuery, setSearchQuery] = useState('');
+  const [searchVisible, setSearchVisible] = useState(false);

  const flatListRef = useRef<FlatList>(null);
  const messageIdCounter = useRef(0);
@ -581,6 +583,18 @@ const ChatScreen: React.FC = () => {
            {item.text}
          </Text>
        )}
+        {/* Play-Button fuer ARIA-Nachrichten */}
+        {!isUser && item.text.length > 0 && (
+          <TouchableOpacity
+            style={styles.playButton}
+            onPress={() => {
+              // TTS-Request an Bridge senden
+              rvs.send('tts_request' as any, { text: item.text, voice: '' });
+            }}
+          >
+            <Text style={styles.playButtonText}>{'\uD83D\uDD0A'}</Text>
+          </TouchableOpacity>
+        )}
        <Text style={styles.timestamp}>{time}</Text>
      </View>
    );
@ -603,12 +617,32 @@ const ChatScreen: React.FC = () => {
          {connectionState === 'connected' ? 'Verbunden' :
           connectionState === 'connecting' ? 'Verbinde...' : 'Getrennt'}
        </Text>
+        <TouchableOpacity onPress={() => setSearchVisible(!searchVisible)} style={{marginLeft: 'auto', paddingHorizontal: 8}}>
+          <Text style={{fontSize: 16}}>{'\uD83D\uDD0D'}</Text>
+        </TouchableOpacity>
      </View>

+      {/* Suchleiste */}
+      {searchVisible && (
+        <View style={styles.searchBar}>
+          <TextInput
+            style={styles.searchInput}
+            value={searchQuery}
+            onChangeText={setSearchQuery}
+            placeholder="Chat durchsuchen..."
+            placeholderTextColor="#555570"
+            autoFocus
+          />
+          <TouchableOpacity onPress={() => { setSearchVisible(false); setSearchQuery(''); }}>
+            <Text style={{color: '#FF3B30', fontSize: 14, paddingHorizontal: 8}}>X</Text>
+          </TouchableOpacity>
+        </View>
+      )}
+
      {/* Nachrichtenliste */}
      <FlatList
        ref={flatListRef}
-        data={messages}
+        data={searchQuery ? messages.filter(m => m.text.toLowerCase().includes(searchQuery.toLowerCase())) : messages}
        keyExtractor={item => item.id}
        renderItem={renderMessage}
        contentContainerStyle={styles.messageList}
@ -887,6 +921,30 @@ const styles = StyleSheet.create({
  wakeWordIcon: {
    fontSize: 16,
  },
+  searchBar: {
+    flexDirection: 'row',
+    alignItems: 'center',
+    backgroundColor: '#12122A',
+    paddingHorizontal: 12,
+    paddingVertical: 6,
+    borderBottomWidth: 1,
+    borderBottomColor: '#1E1E2E',
+  },
+  searchInput: {
+    flex: 1,
+    color: '#FFFFFF',
+    fontSize: 14,
+    paddingVertical: 4,
+  },
+  playButton: {
+    alignSelf: 'flex-end',
+    paddingHorizontal: 8,
+    paddingVertical: 2,
+    marginTop: 4,
+  },
+  playButtonText: {
+    fontSize: 16,
+  },
  fullscreenOverlay: {
    flex: 1,
    backgroundColor: 'rgba(0,0,0,0.95)',
--- a/android/src/services/wakeword.ts
+++ b/android/src/services/wakeword.ts
@ -1,21 +1,12 @@
 /**
 * Wake Word Service — "ARIA" Erkennung
 *
- * Nutzt react-native-live-audio-stream fuer kontinuierliches Mikrofon-Monitoring.
- * Erkennt Sprache per Energie-Schwellwert und sendet kurze Audio-Clips
- * zur serverseitigen Wake-Word-Pruefung (openwakeword in der Bridge).
+ * Phase 1: Deaktiviert — react-native-live-audio-stream hat native Bridge-Probleme.
+ * Nutzt stattdessen Tap-to-Talk (VoiceButton) als primaeren Eingabemodus.
 *
- * Architektur:
- *   App (Mikrofon) → Energie-Erkennung → Audio-Buffer
- *   → RVS "wake_check" → Bridge → openwakeword → Bestaetigung
- *   → App startet Aufnahme
- *
- * Aktuell (Phase 1): Einfacher Tap-to-Talk + Auto-Stop.
- * Spaeter (Phase 2): Porcupine on-device "ARIA" Keyword.
+ * Phase 2: Porcupine on-device "ARIA" Keyword (geplant).
 */

-import LiveAudioStream from 'react-native-live-audio-stream';
-
 type WakeWordCallback = () => void;
 type StateCallback = (state: WakeWordState) => void;

@ -25,47 +16,16 @@ class WakeWordService {
  private state: WakeWordState = 'off';
  private wakeCallbacks: WakeWordCallback[] = [];
  private stateCallbacks: StateCallback[] = [];
-  private isInitialized = false;

  /** Wake Word Erkennung starten */
  async start(): Promise<boolean> {
    if (this.state === 'listening') return true;

    try {
-      if (!this.isInitialized) {
-        LiveAudioStream.init({
-          sampleRate: 16000,
-          channels: 1,
-          bitsPerSample: 16,
-          audioSource: 6, // VOICE_RECOGNITION
-          bufferSize: 4096,
-        });
-        this.isInitialized = true;
-      }
-
-      // Audio-Stream starten und auf Energie pruefen
-      LiveAudioStream.start();
-
-      LiveAudioStream.on('data', (base64Chunk: string) => {
-        if (this.state !== 'listening') return;
-
-        // Base64 → Int16 Array → RMS berechnen
-        const raw = this._base64ToInt16(base64Chunk);
-        const rms = this._calculateRMS(raw);
-
-        // Schwellwert: wenn laut genug → Wake Word erkannt
-        // Phase 1: Einfache Energie-Erkennung (jemand spricht)
-        // Phase 2: Porcupine "ARIA" Keyword
-        if (rms > 2000) {
-          this.setState('detected');
-          this.wakeCallbacks.forEach(cb => cb());
-          // Nach Detection kurz pausieren, Aufnahme uebernimmt das Mikrofon
-          this.stop();
-        }
-      });
-
+      // Phase 1: LiveAudioStream deaktiviert (native Bridge instabil)
+      // Stattdessen: Tap-to-Talk als primaerer Modus
+      console.log('[WakeWord] Wake Word ist in Phase 1 noch nicht verfuegbar — nutze Tap-to-Talk');
      this.setState('listening');
-      console.log('[WakeWord] Listening gestartet');
      return true;
    } catch (err) {
      console.error('[WakeWord] Start fehlgeschlagen:', err);
@ -75,22 +35,12 @@ class WakeWordService {

  /** Wake Word Erkennung stoppen */
  stop(): void {
-    if (this.state === 'off') return;
-    try {
-      LiveAudioStream.stop();
-    } catch {}
    this.setState('off');
-    console.log('[WakeWord] Gestoppt');
  }

  /** Nach Aufnahme erneut starten */
  async resume(): Promise<void> {
-    // Kurze Pause damit Aufnahme das Mikrofon freigeben kann
-    setTimeout(() => {
-      if (this.state === 'off') {
-        this.start();
-      }
-    }, 500);
+    // Nichts zu tun in Phase 1
  }

  // --- Callbacks ---
@ -113,32 +63,12 @@ class WakeWordService {
    return this.state;
  }

-  // --- Hilfsfunktionen ---
-
  private setState(state: WakeWordState): void {
    if (this.state !== state) {
      this.state = state;
      this.stateCallbacks.forEach(cb => cb(state));
    }
  }
-
-  private _base64ToInt16(base64: string): Int16Array {
-    const binary = atob(base64);
-    const bytes = new Uint8Array(binary.length);
-    for (let i = 0; i < binary.length; i++) {
-      bytes[i] = binary.charCodeAt(i);
-    }
-    return new Int16Array(bytes.buffer);
-  }
-
-  private _calculateRMS(samples: Int16Array): number {
-    if (samples.length === 0) return 0;
-    let sum = 0;
-    for (let i = 0; i < samples.length; i++) {
-      sum += samples[i] * samples[i];
-    }
-    return Math.sqrt(sum / samples.length);
-  }
 }

 const wakeWordService = new WakeWordService();
--- a/bridge/aria_bridge.py
+++ b/bridge/aria_bridge.py
@ -1014,6 +1014,30 @@ class ARIABridge:
            if sender in ("aria", "stt"):
                return

+        elif msg_type == "tts_request":
+            # App fordert TTS-Audio fuer einen Text an (Play-Button)
+            text = payload.get("text", "")
+            requested_voice = payload.get("voice", "")
+            if text:
+                voice_name = requested_voice or self.voice_engine.select_voice(text)
+                audio_data = self.voice_engine.synthesize(text, voice_name)
+                if audio_data:
+                    audio_b64 = base64.b64encode(audio_data).decode("ascii")
+                    try:
+                        await self._send_to_rvs({
+                            "type": "audio",
+                            "payload": {
+                                "base64": audio_b64,
+                                "mimeType": "audio/wav",
+                                "voice": voice_name,
+                            },
+                            "timestamp": int(asyncio.get_event_loop().time() * 1000),
+                        })
+                        logger.info("[rvs] TTS on-demand: %d bytes (%s)", len(audio_data), voice_name)
+                    except Exception as e:
+                        logger.warning("[rvs] TTS on-demand senden fehlgeschlagen: %s", e)
+            return
+
        elif msg_type == "config":
            # Konfiguration von App/Diagnostic empfangen + persistent speichern
            changed = False
--- a/diagnostic/index.html
+++ b/diagnostic/index.html
@ -201,8 +201,9 @@
        <button class="btn secondary" onclick="toggleChatFullscreen()" id="btn-chat-fs" style="padding:4px 10px;font-size:11px;">Vollbild</button>
      </div>
      <div class="chat-box" id="chat-box"></div>
-      <div id="thinking-indicator" style="display:none;padding:6px 10px;font-size:12px;color:#FFD60A;background:#1E1E2E;border-radius:0 0 6px 6px;margin-top:-8px;margin-bottom:8px;">
-        <span style="animation:pulse 1s infinite;">&#x1F4AD;</span> <span id="thinking-text">ARIA denkt...</span>
+      <div id="thinking-indicator" style="display:none;padding:6px 10px;font-size:12px;color:#FFD60A;background:#1E1E2E;border-radius:0 0 6px 6px;margin-top:-8px;margin-bottom:8px;display:flex;align-items:center;justify-content:space-between;">
+        <span><span style="animation:pulse 1s infinite;">&#x1F4AD;</span> <span id="thinking-text">ARIA denkt...</span></span>
+        <button class="btn secondary" onclick="cancelRequest()" style="padding:2px 10px;font-size:11px;color:#FF3B30;border-color:#FF3B30;">Abbrechen</button>
      </div>
      <div class="input-row">
        <input type="text" id="chat-input" placeholder="Nachricht an ARIA...">
@ -1166,6 +1167,13 @@
      }, 120000);
    }

+    // ── Abbrechen ──────────────────────────────
+    function cancelRequest() {
+      send({ action: 'cancel_request' });
+      updateThinkingIndicator({ activity: 'idle' });
+      addChat('error', 'Anfrage abgebrochen', 'system');
+    }
+
    // ── Stimmen-Config ──────────────────────────
    function sendVoiceConfig() {
      const defaultVoice = document.getElementById('diag-default-voice').value;
--- a/diagnostic/server.js
+++ b/diagnostic/server.js
@ -355,6 +355,11 @@ function handleGatewayMessage(msg) {
        broadcast({ type: "agent_activity", activity: "idle" });
        pendingMessageTime = 0; // Watchdog: Antwort erhalten
        updateAgentActivity();
+        // Antwort in Backup-Log schreiben
+        try {
+          const entry = JSON.stringify({ ts: Date.now(), role: "assistant", text: text.slice(0, 2000), session: activeSessionKey }) + "\n";
+          fs.appendFileSync("/shared/config/chat_backup.jsonl", entry);
+        } catch {}
        return;
      }

@ -428,6 +433,12 @@ function sendToGateway(text, isPipeline) {
  log("debug", "gateway", `RAW >>> ${payload}`);
  gatewayWs.send(payload);
  pendingMessageTime = Date.now(); // Watchdog: Nachricht gesendet
+  // Nachricht sofort in Backup-Log schreiben (OpenClaw speichert erst nach Run-Ende)
+  try {
+    fs.mkdirSync("/shared/config", { recursive: true });
+    const entry = JSON.stringify({ ts: Date.now(), role: "user", text, session: activeSessionKey }) + "\n";
+    fs.appendFileSync("/shared/config/chat_backup.jsonl", entry);
+  } catch {}
  log("info", "gateway", `chat.send [${reqId}]: "${text}"`);
  if (isPipeline) plog(`chat.send [${reqId}] an Gateway gesendet — warte auf ACK...`);

@ -1005,6 +1016,7 @@ function waitForMessage(ws, timeoutMs) {

 let lastAgentActivity = Date.now();
 let watchdogWarned = false;
+let watchdogFixAttempted = false;
 let pendingMessageTime = 0; // Wann wurde die letzte Nachricht gesendet

 function updateAgentActivity() {
@ -1024,20 +1036,37 @@ setInterval(async () => {
    broadcast({ type: "watchdog", status: "warning", waitingMs, message: "ARIA reagiert nicht — moeglicherweise stuck Run" });
  }

-  // Nach 5min: Auto-Fix anbieten
-  if (waitingMs > 300000 && watchdogWarned) {
+  // Nach 5min: doctor --fix
+  if (waitingMs > 300000 && watchdogWarned && !watchdogFixAttempted) {
+    watchdogFixAttempted = true;
    log("error", "server", "Watchdog: 5min ohne Antwort — fuehre openclaw doctor --fix aus");
    broadcast({ type: "watchdog", status: "fixing", message: "Auto-Fix: openclaw doctor --fix" });
    try {
      await dockerExec("aria-core", "openclaw doctor --fix 2>/dev/null || true");
      log("info", "server", "Watchdog: doctor --fix ausgefuehrt");
-      broadcast({ type: "watchdog", status: "fixed", message: "doctor --fix ausgefuehrt — sende Nachricht erneut" });
+      broadcast({ type: "watchdog", status: "fixed", message: "doctor --fix ausgefuehrt — warte auf Antwort..." });
    } catch (err) {
      log("error", "server", `Watchdog: doctor --fix fehlgeschlagen: ${err.message}`);
-      broadcast({ type: "watchdog", status: "error", message: `Auto-Fix fehlgeschlagen: ${err.message}` });
    }
-    pendingMessageTime = 0; // Reset
+  }
+
+  // Nach 8min: Container neustarten
+  if (waitingMs > 480000 && watchdogFixAttempted) {
+    log("error", "server", "Watchdog: 8min ohne Antwort — starte aria-core + aria-proxy neu");
+    broadcast({ type: "watchdog", status: "restarting", message: "Container-Restart: aria-core + aria-proxy" });
+    try {
+      const { execSync } = require("child_process");
+      execSync("docker restart aria-core aria-proxy", { timeout: 60000 });
+      log("info", "server", "Watchdog: Container neugestartet");
+      broadcast({ type: "watchdog", status: "restarted", message: "Container neugestartet — warte auf Gateway-Reconnect..." });
+      // Gateway wird sich automatisch neu verbinden
+    } catch (err) {
+      log("error", "server", `Watchdog: Container-Restart fehlgeschlagen: ${err.message}`);
+      broadcast({ type: "watchdog", status: "error", message: `Restart fehlgeschlagen: ${err.message}` });
+    }
+    pendingMessageTime = 0;
    watchdogWarned = false;
+    watchdogFixAttempted = false;
  }
 }, 30000);

@ -1127,6 +1156,15 @@ wss.on("connection", (ws) => {
        if (ws._sshSock) ws._sshSock.write(msg.data);
      } else if (msg.action === "live_ssh_close") {
        if (ws._sshSock) { ws._sshSock.end(); ws._sshSock = null; }
+      } else if (msg.action === "cancel_request") {
+        // Laufende Anfrage abbrechen — doctor --fix beendet stuck runs
+        log("warn", "server", "Anfrage abgebrochen — fuehre doctor --fix aus");
+        pendingMessageTime = 0;
+        watchdogWarned = false;
+        watchdogFixAttempted = false;
+        if (pipelineActive) pipelineEnd(false, "Vom Benutzer abgebrochen");
+        broadcast({ type: "agent_activity", activity: "idle" });
+        dockerExec("aria-core", "openclaw doctor --fix 2>/dev/null || true").catch(() => {});
      } else if (msg.action === "get_voice_config") {
        handleGetVoiceConfig(ws);
      } else if (msg.action === "send_voice_config") {
--- a/issue.md
+++ b/issue.md
@ -1,26 +1,36 @@
-# erledigt bildupload ghet noch nicht.
-# ende
-# erledigt
-sprachnachrichten werden nicht als zweite nachricht dargestellt, damit man weiß was man gesendet hat
-# ende
+# ARIA Issues & Features

+## Erledigt

-# erledigt cache leeren, bilder werden nicht neu geladen beim antippen.
-autoload geht nicht
-# ende 
+- [x] Bildupload funktioniert (Shared Volume /shared/uploads/)
+- [x] Sprachnachrichten werden als Text angezeigt (STT → Chat-Bubble)
+- [x] Cache leeren + Auto-Download von Anhaengen
+- [x] ARIA liest Nachrichten vor (TTS via Piper)
+- [x] Autoscroll zur letzten Nachricht
+- [x] Bilder im Chat groesser + Vollbild-Vorschau
+- [x] Ohr-Button Absturz gefixt (LiveAudioStream entfernt, Phase 1 Placeholder)
+- [x] Play-Button in ARIA-Nachrichten fuer Sprachwiedergabe
+- [x] Chat-Suche in der App (Lupe in Statusleiste)
+- [x] Watchdog mit Container-Restart (2min Warnung → 5min doctor --fix → 8min Restart)
+- [x] Abbrechen-Button im Diagnostic Chat
+- [x] Nachrichten Backup on-the-fly (/shared/config/chat_backup.jsonl)
+- [x] Grosse Nachrichten satzweise aufteilen fuer TTS
+- [x] RVS Nachrichten vom Smartphone gehen durch
+- [x] Stimmen-Einstellungen (Ramona/Thorsten, Speed pro Stimme)
+- [x] Highlight-Trigger konfigurierbar in Diagnostic

-wenn man auf das ohr zum hören klickt stürzt ab
+## Offen

-# erledigt aria liest die nachrichten nicht vor
-#ende
+### TTS / Stimmen
+- [ ] TTS Engine waehlbar: Piper (CPU, schnell) oder Coqui XTTS v2 (GPU, natuerlicher)
+- [ ] Piper Voices Download ueber Diagnostic (neue Sprachen/Stimmen)
+- [ ] Coqui XTTS v2 Integration (braucht GPU, bessere deutsche Stimme)

-# erledigt autoscroll geht doch noch nicht zur letzten nachricht
-unserer memory brain
-# ende 
+### App
+- [ ] Wake Word on-device (Porcupine "ARIA" Keyword, Phase 2)
+- [ ] Chat-History zuverlaessiger laden (AsyncStorage Race Condition)

-# erledigt bilder im chat größer darstellen
-# ende
-
-
-die viper voices downloaden über die diagnostic
-# ende
+### Architektur
+- [ ] Bilder: Claude Vision direkt nutzen (aktuell nur Dateipfad an ARIA)
+- [ ] Auto-Compacting und Memory/Brain Verwaltung (SQLite?)
+- [ ] Diagnostic: System-Info Tab (Container-Status, Disk, RAM, CPU)
--- a/rvs/server.js
+++ b/rvs/server.js
@ -9,7 +9,7 @@ const MAX_SESSIONS = parseInt(process.env.MAX_SESSIONS || "10", 10);
 // Erlaubte Nachrichtentypen — alles andere wird verworfen
 const ALLOWED_TYPES = new Set([
  "chat", "audio", "file", "location", "mode", "log", "event", "heartbeat",
-  "file_request", "file_response", "file_saved", "stt_result", "config",
+  "file_request", "file_response", "file_saved", "stt_result", "config", "tts_request",
 ]);

 // Token-Raum: token -> { clients: Set<ws> }