Compare commits
113 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 019c078393 | |||
| d411df4074 | |||
| 763e0d79ab | |||
| 47fe4ad655 | |||
| 99cb83202e | |||
| fc2438be2d | |||
| 40e48b046b | |||
| f801d99748 | |||
| 6ab6196739 | |||
| eb12281dfc | |||
| 1fb1fdef9e | |||
| 593d26e0ff | |||
| 394abb58be | |||
| fc3bee6d05 | |||
| b203503fd8 | |||
| 8b0a72dc9b | |||
| 23add7a107 | |||
| caf84196fb | |||
| 099b9651a6 | |||
| 76d72a1eef | |||
| 87deede078 | |||
| 6fec8588c1 | |||
| aafdbcd57a | |||
| 08da28f475 | |||
| 8c1014d281 | |||
| 271fc4edf6 | |||
| cd390a4115 | |||
| a65ed579d2 | |||
| 2ad1f57382 | |||
| 58e3cfd3e6 | |||
| 7de4ee8f5b | |||
| 213edac3a7 | |||
| acc13aef6b | |||
| 4bbc6f7787 | |||
| 20f2ea1829 | |||
| 2d23f0668b | |||
| d6030a06b7 | |||
| 0df76e2af6 | |||
| f80fe1df93 | |||
| cff421bc53 | |||
| bca925d385 | |||
| 9abde89805 | |||
| ea4f639fcb | |||
| 64cd5f7d52 | |||
| 843ebe1d8f | |||
| 764619f076 | |||
| e3a0cfb55a | |||
| 2929749314 | |||
| 51b9512f4e | |||
| ffcfa44eef | |||
| 6363da97b1 | |||
| 07ed2cdcf6 | |||
| 5ad68b7dfc | |||
| 8a6ee018ea | |||
| b42590ff95 | |||
| 056b579c47 | |||
| 576e612cd0 | |||
| c2faa06a15 | |||
| d3ed3556eb | |||
| d960d125c0 | |||
| 89d5d7ec0a | |||
| ea0c13936b | |||
| 773c976822 | |||
| cd05ed2379 | |||
| 054e4057d8 | |||
| 3943e79bb1 | |||
| 87f4317c15 | |||
| 50aa793910 | |||
| 5efc9865a8 | |||
| 949c573c49 | |||
| f7f450a09d | |||
| 81f7c38383 | |||
| 2c785cb37a | |||
| 57e65b061c | |||
| aa54765b03 | |||
| 8929bc99bb | |||
| 0428c06612 | |||
| a7eb3cf433 | |||
| e4e0e793a8 | |||
| b3d3b8b6bc | |||
| 06bc456221 | |||
| 3461f45207 | |||
| a17d4acc13 | |||
| 62fd9193a1 | |||
| 2329645df4 | |||
| 8a435ddf6c | |||
| 25b754ba31 | |||
| b734593bf2 | |||
| 16847ce6f7 | |||
| 6300829317 | |||
| a1e1ee31bd | |||
| 7ed70b876d | |||
| 3ca85da906 | |||
| d6a89168ef | |||
| cb33a20694 | |||
| a242693751 | |||
| 81ca3cc7a7 | |||
| 1a32098c9e | |||
| fa4c32270b | |||
| 9c43b875f4 | |||
| 63560e290b | |||
| 1ab8a6a2fe | |||
| a2c0196e05 | |||
| 680f7a64e2 | |||
| 4893616a5a | |||
| 04e8c0245d | |||
| 10cefaf1cd | |||
| adbb1fe80a | |||
| 79c50aedcc | |||
| eb72b35e23 | |||
| bbd02d46a6 | |||
| 3d3c8ce973 | |||
| 562f929056 |
+37
-7
@@ -1,20 +1,50 @@
|
||||
# ARIA Environment Configuration
|
||||
# Copy to .env and fill in values
|
||||
# ════════════════════════════════════════════════
|
||||
# ARIA — Umgebungsvariablen
|
||||
# Kopieren nach .env und Werte eintragen
|
||||
# ════════════════════════════════════════════════
|
||||
|
||||
# Auth token for ARIA Core (generate a long random string)
|
||||
# openssl rand -hex 32
|
||||
# ── ARIA Auth Token ──────────────────────────────
|
||||
# Authentifizierung fuer den OpenClaw Gateway (aria-core).
|
||||
# Wird von Diagnostic, Bridge und App genutzt um sich am Gateway anzumelden.
|
||||
# Alle Services die mit aria-core kommunizieren brauchen diesen Token.
|
||||
# Generieren: openssl rand -hex 32
|
||||
ARIA_AUTH_TOKEN=change-me-to-a-long-random-string
|
||||
|
||||
# RVS — Rendezvous-Server (Bridge + App verbinden sich hierüber)
|
||||
# ── RVS — Rendezvous-Server ─────────────────────
|
||||
# Der RVS ist ein WebSocket-Relay im Rechenzentrum.
|
||||
# App, Bridge, Diagnostic und XTTS-Bridge verbinden sich hierueber.
|
||||
# Alle muessen den gleichen Host, Port und Token nutzen.
|
||||
|
||||
# Hostname des RVS-Servers (z.B. rvs.example.de oder mobil.hacker-net.de)
|
||||
RVS_HOST=rvs.example.de
|
||||
|
||||
# Port auf dem der RVS laeuft (muss mit rvs/docker-compose.yml uebereinstimmen)
|
||||
RVS_PORT=443
|
||||
|
||||
# TLS (wss://) verwenden? true = verschluesselt, false = unverschluesselt (ws://)
|
||||
RVS_TLS=true
|
||||
|
||||
# Bei TLS-Fehler automatisch auf ws:// (ohne TLS) fallback?
|
||||
# true = Fallback erlaubt, false = nur mit TLS verbinden
|
||||
# Nuetzlich wenn kein TLS-Zertifikat vorhanden (z.B. Entwicklung)
|
||||
RVS_TLS_FALLBACK=true
|
||||
|
||||
# Pairing-Token: Wer den gleichen Token hat, landet im gleichen RVS-Room.
|
||||
# Wird von generate-token.sh automatisch generiert und hier eingetragen.
|
||||
# Die Android App bekommt den Token per QR-Code beim Pairing.
|
||||
# WICHTIG: Muss auf ARIA-VM, Gaming-PC (xtts/.env) und App identisch sein!
|
||||
# Generieren: ./generate-token.sh (traegt den Token automatisch ein)
|
||||
RVS_TOKEN=
|
||||
|
||||
# Gitea (for release.sh — Kennwort wird interaktiv abgefragt)
|
||||
# ── Gitea — Release-Verwaltung ───────────────────
|
||||
# Wird von release.sh genutzt um APKs auf Gitea zu veroeffentlichen.
|
||||
# Kennwort wird beim Release interaktiv abgefragt (nicht in .env!).
|
||||
GITEA_URL=https://git.hacker-net.de
|
||||
GITEA_REPO=Hacker-Software/ARIA-AGENT
|
||||
GITEA_USER=duffyduck
|
||||
|
||||
# ── Auto-Update — APK auf RVS-Server kopieren ───
|
||||
# SSH-Ziel fuer scp: release.sh kopiert die APK dorthin.
|
||||
# Der RVS-Server stellt sie dann per WebSocket an die App bereit.
|
||||
# Format: user@host (z.B. root@aria-rvs oder root@rvs.example.de)
|
||||
# Leer lassen = Auto-Update ueberspringen, APK manuell auf RVS kopieren.
|
||||
RVS_UPDATE_HOST=
|
||||
|
||||
@@ -36,6 +36,7 @@ android/local.properties
|
||||
android/package-lock.json
|
||||
*.apk
|
||||
*.aab
|
||||
rvs/updates/*.apk
|
||||
|
||||
# ── Tauri / Desktop Build ───────────────────────
|
||||
desktop/src-tauri/target/
|
||||
|
||||
@@ -29,11 +29,18 @@ ARIA hat zwei Rollen:
|
||||
┌─────────────────────────────────────────────────────────┐
|
||||
│ RVS — Rendezvous-Server │
|
||||
│ Node.js WebSocket Relay (Docker, Rechenzentrum) │
|
||||
│ Reiner Relay — kennt keine Tokens, leitet durch │
|
||||
│ Relay + Auto-Update (APK-Verteilung) │
|
||||
│ rvs/docker-compose.yml │
|
||||
└───────────────────────┬─────────────────────────────────┘
|
||||
│ WebSocket Tunnel
|
||||
▼
|
||||
└───────────┬───────────────────────────┬─────────────────┘
|
||||
│ WebSocket Tunnel │ WebSocket Tunnel
|
||||
▼ ▼
|
||||
┌───────────────────────────┐
|
||||
│ Gaming-PC (optional) │
|
||||
│ RTX 3060, Docker+WSL2 │
|
||||
│ XTTS v2 (natuerliche │
|
||||
│ Stimmen, Voice Cloning) │
|
||||
│ xtts/docker-compose.yml │
|
||||
└───────────────────────────┘
|
||||
┌─────────────────────────────────────────────────────────┐
|
||||
│ ARIA-VM (Proxmox, Debian 13) — ARIAs Wohnung │
|
||||
│ Basissystem + Docker. Rest richtet ARIA selbst ein. │
|
||||
@@ -50,8 +57,8 @@ ARIA hat zwei Rollen:
|
||||
│ │ Liest BOOTSTRAP.md + AGENT.md │ │
|
||||
│ │ │ │
|
||||
│ │ [bridge] ARIA Voice Bridge Container │ │
|
||||
│ │ Whisper STT · Piper TTS · Wake-Word │ │
|
||||
│ │ Ramona (weiblich) + Thorsten (tief) │ │
|
||||
│ │ Whisper STT · Wake-Word │ │
|
||||
│ │ TTS remote via XTTS v2 auf Gaming-PC │ │
|
||||
│ │ Bruecke: App <> RVS <> Bridge <> ARIA │ │
|
||||
│ │ │ │
|
||||
│ │ [diagnostic] Selbstcheck-UI + Einstellungen │ │
|
||||
@@ -66,13 +73,14 @@ ARIA hat zwei Rollen:
|
||||
└─────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**Drei separate Deployments:**
|
||||
**Vier separate Deployments:**
|
||||
|
||||
| Was | Wo | Wie |
|
||||
|-----|----|-----|
|
||||
| RVS | Rechenzentrum | `cd rvs && docker compose up -d` |
|
||||
| ARIA Core | Debian 13 VM | `docker compose up -d && ./aria-setup.sh` |
|
||||
| Android App | Stefans Handy | APK installieren, QR-Code scannen |
|
||||
| XTTS v2 (optional) | Gaming-PC (GPU) | `cd xtts && docker compose up -d` |
|
||||
| Android App | Stefans Handy | APK installieren (Auto-Update via RVS) |
|
||||
|
||||
---
|
||||
|
||||
@@ -95,16 +103,31 @@ cd ~/ARIA-AGENT
|
||||
cp .env.example .env
|
||||
```
|
||||
|
||||
`.env` Datei editieren:
|
||||
`.env` Datei editieren (Details siehe `.env.example`):
|
||||
```bash
|
||||
# Gateway-Auth: Alle Services die mit aria-core reden brauchen diesen Token
|
||||
# Diagnostic, Bridge, App nutzen ihn fuer den WebSocket-Handshake
|
||||
ARIA_AUTH_TOKEN= # openssl rand -hex 32
|
||||
|
||||
# RVS-Verbindung: Hostname + Port deines Rendezvous-Servers
|
||||
RVS_HOST= # z.B. rvs.hackersoft.de
|
||||
RVS_PORT=443
|
||||
RVS_TLS=true
|
||||
RVS_TLS_FALLBACK=true
|
||||
RVS_TOKEN= # wird von generate-token.sh automatisch gesetzt
|
||||
|
||||
# Pairing-Token: Verbindet App, Bridge, Diagnostic und XTTS im gleichen RVS-Room
|
||||
# MUSS auf allen Geraeten identisch sein (ARIA-VM, Gaming-PC, App)
|
||||
# Wird von generate-token.sh automatisch generiert und eingetragen
|
||||
RVS_TOKEN= # ./generate-token.sh
|
||||
|
||||
# Optional: SSH-Host des RVS-Servers fuer Auto-Update (z.B. root@aria-rvs)
|
||||
RVS_UPDATE_HOST=
|
||||
```
|
||||
|
||||
**Zwei Tokens, zwei Zwecke:**
|
||||
- **ARIA_AUTH_TOKEN**: Authentifizierung am OpenClaw Gateway (aria-core). Wer diesen Token hat, kann ARIA Befehle geben.
|
||||
- **RVS_TOKEN**: Pairing-Token fuer den Rendezvous-Server. Alle Geraete mit dem gleichen Token landen im gleichen "Room" und koennen kommunizieren. Die App bekommt diesen Token per QR-Code.
|
||||
|
||||
### 2. Claude CLI einloggen (Proxy-Auth)
|
||||
|
||||
Der Proxy-Container nutzt deine Claude Max Subscription. Die Credentials muessen
|
||||
@@ -120,21 +143,16 @@ claude login
|
||||
**Wichtig:** Der Ordner `~/.claude/` (nicht `~/.config/claude/`!) wird als Volume
|
||||
in den Proxy gemountet. Die Credentials ueberleben Container-Restarts.
|
||||
|
||||
### 3. Stimmen herunterladen
|
||||
|
||||
```bash
|
||||
./get-voices.sh
|
||||
# Laedt Ramona + Thorsten (Piper TTS) nach aria-data/voices/
|
||||
# Ca. 100MB, dauert ein paar Minuten
|
||||
```
|
||||
|
||||
### 4. Voice Bridge konfigurieren
|
||||
### 3. Voice Bridge konfigurieren
|
||||
|
||||
```bash
|
||||
cp aria-data/config/aria.env.example aria-data/config/aria.env
|
||||
# Bei Bedarf anpassen (Whisper-Modell, Sprache, Stimmen-Pfade)
|
||||
# Bei Bedarf anpassen (Whisper-Modell, Sprache, Wake-Word)
|
||||
```
|
||||
|
||||
TTS laeuft ausschliesslich ueber XTTS v2 auf dem Gaming-PC — siehe Abschnitt
|
||||
"XTTS v2 — High-Quality TTS" weiter unten.
|
||||
|
||||
### 5. RVS-Token generieren & Container starten
|
||||
|
||||
```bash
|
||||
@@ -230,7 +248,6 @@ Danach werden per `sed` vier Patches angewendet:
|
||||
- Sicherheitsregeln (kein ClawHub, Prompt Injection abwehren)
|
||||
- Tool-Freigaben (alle Claude Code Tools: WebFetch, Bash, etc.)
|
||||
- SSH-Zugriff auf aria-wohnung (VM)
|
||||
- Stimmen-Auswahl (Ramona vs Thorsten)
|
||||
- Gedaechtnis-System
|
||||
|
||||
### openclaw.json (via aria-setup.sh)
|
||||
@@ -276,14 +293,14 @@ Audio: App → RVS → Bridge → FFmpeg → Whisper STT → chat.send → aria
|
||||
Datei: App → RVS → Bridge → /shared/uploads/ → chat.send (mit Pfad) → aria-core
|
||||
|
||||
aria-core → Antwort → Gateway → Diagnostic → RVS → App
|
||||
→ Bridge → Piper TTS → RVS → App (Audio)
|
||||
→ Bridge → Lautsprecher (lokal)
|
||||
→ Bridge → XTTS (PCM-Stream) → RVS → App AudioTrack
|
||||
```
|
||||
|
||||
### Features
|
||||
|
||||
- **STT**: faster-whisper (lokal, offline, 16kHz mono)
|
||||
- **TTS**: Piper (Ramona + Thorsten, offline)
|
||||
- **TTS**: XTTS v2 (remote auf Gaming-PC, GPU, Voice Cloning) — Streaming ueber PCM-Chunks
|
||||
- **Text-Cleanup**: `<voice>...</voice>` Tag bevorzugt, Markdown/Code/Einheiten/URLs werden TTS-gerecht aufbereitet
|
||||
- **Wake-Word**: openwakeword (lokales Mikrofon auf der VM)
|
||||
- **App-Audio**: Base64 Audio von App → FFmpeg → Whisper STT → Text an aria-core
|
||||
- **Modi**: Normal, Nicht stoeren, Fluestern, Hangar, Gaming
|
||||
@@ -298,13 +315,6 @@ aria-core → Antwort → Gateway → Diagnostic → RVS → App
|
||||
| Hangar | `"ARIA, ich arbeite"` | Nur wichtige Meldungen |
|
||||
| Gaming | `"ARIA, Gaming-Modus"` | Nur auf direkte Fragen antworten |
|
||||
|
||||
### Stimmen
|
||||
|
||||
| Stimme | Modell | Wann |
|
||||
|--------|--------|------|
|
||||
| **Ramona** (weiblich) | `de_DE-ramona-low` | Alltag, Antworten, Gespraeche |
|
||||
| **Thorsten** (maennlich, tief) | `de_DE-thorsten-high` | Epische Momente, Alarme |
|
||||
|
||||
---
|
||||
|
||||
## Diagnostic — Selbstcheck-UI und Einstellungen
|
||||
@@ -314,13 +324,19 @@ Erreichbar unter `http://<VM-IP>:3001`. Teilt das Netzwerk mit aria-core.
|
||||
### Features
|
||||
|
||||
- **Status-Karten**: Gateway (Handshake), RVS (TLS-Fallback), Proxy (Auth)
|
||||
- **Chat-Test**: Nachrichten direkt an ARIA senden (Gateway oder via RVS)
|
||||
- **Session-Verwaltung**: Sessions auflisten, wechseln, erstellen, loeschen
|
||||
- **Chat-Test**: Nachrichten direkt an ARIA senden (Gateway oder via RVS), Vollbild-Modus
|
||||
- **"ARIA denkt..." Indikator**: Zeigt live was ARIA gerade tut (Denken, Tool, Schreiben)
|
||||
- **Abbrechen-Button**: Stoppt laufende Anfragen + doctor --fix
|
||||
- **Session-Verwaltung**: Sessions auflisten, wechseln, erstellen, loeschen, als Markdown exportieren (⬇ Button)
|
||||
- **Chat-History**: Wird beim Laden und Session-Wechsel angezeigt (read-only aus JSONL)
|
||||
- **TTS-Diagnose Tab**: Stimmen testen, Status pruefen, Fehler anzeigen
|
||||
- **Einstellungen**: TTS aktiv-Toggle, XTTS-Voice (gecloned), Betriebsmodi, Whisper-Modell (tiny…large-v3, Hot-Reload)
|
||||
- **XTTS Voice Cloning**: Audio-Samples hochladen, eigene Stimme erstellen
|
||||
- **Claude Login**: Browser-Terminal zum Einloggen in den Proxy
|
||||
- **Core Terminal**: Shell in aria-core (openclaw CLI)
|
||||
- **Container-Logs**: Echtzeit-Logs aller Container (gefiltert nach Tab)
|
||||
- **Container-Logs**: Echtzeit-Logs aller Container (gefiltert nach Tab + Pipeline)
|
||||
- **SSH Terminal**: Direkter SSH-Zugang zu aria-wohnung
|
||||
- **Watchdog**: Erkennt stuck Runs (2min Warnung → 5min doctor --fix → 8min Container-Restart)
|
||||
|
||||
### Session-Verwaltung
|
||||
|
||||
@@ -338,12 +354,19 @@ API-Endpoint fuer andere Services: `GET http://localhost:3001/api/session`
|
||||
|
||||
- Text-Chat mit ARIA
|
||||
- **Sprachaufnahme**: Push-to-Talk (halten) oder Tap-to-Talk (tippen, Auto-Stop bei Stille)
|
||||
- **Gespraechsmodus** (Ohr-Button): Nach jeder ARIA-Antwort startet automatisch die Aufnahme — wie ein natuerliches Gespraech hin und her, ohne Buttons druecken
|
||||
- **VAD (Voice Activity Detection)**: Erkennt 1.8s Stille und stoppt automatisch
|
||||
- **STT (Speech-to-Text)**: Audio wird in der Bridge per Whisper transkribiert, transkribierter Text erscheint im Chat
|
||||
- **Wake Word**: Toggle-Button (Ohr-Symbol) aktiviert kontinuierliches Mikrofon-Monitoring
|
||||
- **TTS-Wiedergabe**: ARIA antwortet per Lautsprecher (Ramona/Thorsten)
|
||||
- **Datei- und Bild-Upload**: Bilder inline im Chat, Dateien mit Icon + Name + Groesse
|
||||
- **Anhaenge**: Bridge speichert Dateien in Shared Volume (`/shared/uploads/`), ARIA kann darauf zugreifen
|
||||
- **Speech Gate**: Aufnahme wird verworfen wenn keine Sprache erkannt (kein Rauschen an Whisper)
|
||||
- **STT (Speech-to-Text)**: Audio wird als 16kHz mono aufgenommen und in der Bridge per Whisper transkribiert, transkribierter Text erscheint im Chat
|
||||
- **"ARIA denkt..." Indicator**: Zeigt live den Status vom Core (Denken, Tool, Schreiben) + Abbrechen-Button
|
||||
- **TTS-Wiedergabe**: ARIA antwortet per Lautsprecher — XTTS v2 PCM-Streaming direkt in AudioTrack, keine Wait-Gaps
|
||||
- **Play-Button**: Jede ARIA-Nachricht kann nochmal vorgelesen werden
|
||||
- **Chat-Suche**: Lupe in der Statusleiste filtert Nachrichten live
|
||||
- **Mehrere Anhaenge**: Bilder + Dateien sammeln, Text hinzufuegen, dann zusammen senden
|
||||
- **Paste-Support**: Bilder aus Zwischenablage einfuegen (Diagnostic)
|
||||
- **Anhaenge**: Bridge speichert in Shared Volume, ARIA kann darauf zugreifen, Re-Download ueber RVS
|
||||
- **Einstellungen**: TTS aktiv, XTTS-Voice, Speicherort, Auto-Download, GPS
|
||||
- **Auto-Update**: Prueft beim Start + per Button auf neue Version, Download + Installation ueber RVS (FileProvider)
|
||||
- GPS-Position (optional)
|
||||
- QR-Code Scanner fuer Token-Pairing
|
||||
|
||||
@@ -374,27 +397,50 @@ cd android
|
||||
```
|
||||
|
||||
Das Script macht alles in einem Schritt:
|
||||
1. Fragt Gitea-Kennwort ab (wird nirgends gespeichert)
|
||||
2. Baut die Release-APK
|
||||
3. Erstellt Git Tag + pusht
|
||||
4. Erstellt Gitea Release
|
||||
5. Laedt APK als Asset hoch
|
||||
1. Setzt Versionsnummern (package.json, build.gradle, SettingsScreen)
|
||||
2. Fragt Gitea-Kennwort ab (wird nirgends gespeichert)
|
||||
3. Baut die Release-APK
|
||||
4. Git Commit + Tag + Push
|
||||
5. Erstellt Gitea Release + laedt APK hoch
|
||||
6. Kopiert APK auf RVS-Server (Auto-Update, optional)
|
||||
|
||||
Voraussetzung in `.env`:
|
||||
```bash
|
||||
GITEA_URL=https://gitea.hackersoft.de
|
||||
GITEA_REPO=stefan/aria-agent
|
||||
GITEA_USER=stefan
|
||||
RVS_UPDATE_HOST=root@aria-rvs # Optional: fuer Auto-Update
|
||||
```
|
||||
|
||||
### Docker-Cleanup
|
||||
|
||||
Das Bridge-Image zieht grosse ML-Deps (faster-whisper, ctranslate2, onnxruntime,
|
||||
openwakeword) — bei jedem Rebuild waechst der Docker-Build-Cache. Wenn
|
||||
die VM voll laeuft:
|
||||
|
||||
```bash
|
||||
./cleanup.sh # sicher: Build-Cache + ungenutzte Images
|
||||
./cleanup.sh --full # aggressiv: zusaetzlich ungenutzte Volumes (mit Rueckfrage)
|
||||
```
|
||||
|
||||
### Auto-Update
|
||||
|
||||
Die App prueft beim Start ob eine neuere Version auf dem RVS liegt.
|
||||
Der Update-Flow:
|
||||
1. `./release.sh 0.0.3.0` → APK wird auf RVS kopiert (via scp)
|
||||
2. Alternativ: `git pull` auf dem RVS-Server → APK in `rvs/updates/`
|
||||
3. App sendet `update_check` mit aktueller Version
|
||||
4. RVS vergleicht → sendet `update_available`
|
||||
5. App zeigt Dialog → Download ueber WebSocket → Installation
|
||||
|
||||
### Audio-Pipeline (Spracheingabe)
|
||||
|
||||
```
|
||||
App (Mikrofon) → AAC/MP4 Aufnahme → Base64 → RVS → Bridge
|
||||
Bridge: FFmpeg (16kHz PCM) → Whisper STT → Text → aria-core
|
||||
Bridge: STT-Ergebnis → RVS → App (Placeholder wird durch transkribierten Text ersetzt)
|
||||
aria-core → Antwort → Bridge → Piper TTS (WAV) → Base64 → RVS → App
|
||||
App: Base64 → WAV → Lautsprecher
|
||||
aria-core → Antwort → Bridge → XTTS (Gaming-PC) → PCM-Stream → RVS → App
|
||||
App: AudioTrack MODE_STREAM (nahtlos), Cache als WAV pro Message
|
||||
```
|
||||
|
||||
### Datei-Pipeline (Bilder & Anhaenge)
|
||||
@@ -442,10 +488,6 @@ aria-data/
|
||||
│
|
||||
├── skills/ ← ARIAs Faehigkeiten (selbst geschrieben!)
|
||||
│
|
||||
├── voices/ ← Piper TTS Stimmen (offline)
|
||||
│ ├── de_DE-ramona-low.onnx
|
||||
│ └── de_DE-thorsten-high.onnx
|
||||
│
|
||||
├── config/
|
||||
│ ├── BOOTSTRAP.md ← System-Prompt (Identitaet, Regeln, Tools)
|
||||
│ ├── AGENT.md ← Persoenlichkeit & Arbeitsprinzipien
|
||||
@@ -454,6 +496,11 @@ aria-data/
|
||||
│ ├── aria.env ← Voice Bridge Config
|
||||
│ └── diag-state/ ← Diagnostic persistenter State
|
||||
│
|
||||
│ (im Shared Volume /shared/config/):
|
||||
│ ├── voice_config.json ← TTS-Einstellungen (Stimme, Speed, Engine)
|
||||
│ ├── highlight_triggers.json ← Highlight-Trigger Woerter
|
||||
│ └── chat_backup.jsonl ← Nachrichten-Backup (on-the-fly)
|
||||
│
|
||||
└── ssh/ ← SSH Keys fuer VM-Zugriff
|
||||
├── id_ed25519 ← Private Key (generiert von aria-setup.sh)
|
||||
├── id_ed25519.pub ← Public Key (muss in VM authorized_keys!)
|
||||
@@ -469,7 +516,7 @@ tar -czf aria-backup-$(date +%Y%m%d).tar.gz aria-data/
|
||||
|
||||
## RVS — Rendezvous-Server
|
||||
|
||||
Laeuft im Rechenzentrum. Reiner Relay — kennt keine Tokens, speichert nichts.
|
||||
Laeuft im Rechenzentrum. WebSocket Relay + Auto-Update Server.
|
||||
Wer sich mit dem gleichen Token verbindet, landet im gleichen Room.
|
||||
|
||||
```bash
|
||||
@@ -477,10 +524,90 @@ cd rvs
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
**Features:**
|
||||
- WebSocket Relay (alle Message-Types: chat, audio, file, config, xtts, update, etc.)
|
||||
- Auto-Update: APK-Verteilung an Apps ueber WebSocket
|
||||
- Heartbeat + tote Verbindungen aufraeumen
|
||||
|
||||
**Auto-Update APK bereitstellen:**
|
||||
```bash
|
||||
# APK in updates/ legen (manuell oder via release.sh)
|
||||
cp ARIA-v0.0.3.0.apk ~/ARIA-AGENT/rvs/updates/
|
||||
# RVS erkennt die Version aus dem Dateinamen
|
||||
```
|
||||
|
||||
**Multi-Instanz:** Mehrere ARIA-VMs koennen denselben RVS nutzen — jede mit eigenem Token.
|
||||
|
||||
---
|
||||
|
||||
## XTTS v2 — GPU TTS Server (optional)
|
||||
|
||||
Laeuft auf einem separaten Rechner mit NVIDIA GPU (z.B. Gaming-PC mit RTX 3060).
|
||||
Verbindet sich ueber RVS mit der ARIA-Infrastruktur — kein VPN noetig, funktioniert
|
||||
ueber verschiedene Netze hinweg.
|
||||
|
||||
### Architektur
|
||||
|
||||
```
|
||||
Gaming-PC (Windows, RTX 3060, Docker Desktop + WSL2)
|
||||
├── aria-xtts XTTS v2 GPU Server (Port 8020 intern)
|
||||
└── aria-xtts-bridge RVS-Relay (empfaengt Requests, sendet Audio)
|
||||
└── Beide teilen ./voices/ Volume fuer Voice Cloning
|
||||
|
||||
↕ RVS (Rechenzentrum, WebSocket Relay)
|
||||
|
||||
ARIA-VM
|
||||
└── aria-bridge: tts_engine="xtts" → xtts_request via RVS → wartet auf xtts_response
|
||||
```
|
||||
|
||||
### Voraussetzungen
|
||||
|
||||
- Docker Desktop mit WSL2 (Windows) oder Docker mit NVIDIA Runtime (Linux)
|
||||
- NVIDIA Container Toolkit
|
||||
- GPU mit mindestens 4GB VRAM (6GB+ empfohlen)
|
||||
- **Gleicher RVS_TOKEN wie auf der ARIA-VM!**
|
||||
|
||||
### Setup
|
||||
|
||||
```bash
|
||||
cd xtts
|
||||
cp .env.example .env
|
||||
# .env mit RVS-Verbindungsdaten fuellen (gleicher Token wie ARIA-VM!)
|
||||
docker compose up -d
|
||||
# Erster Start laedt ~2GB Model herunter (danach gecacht)
|
||||
```
|
||||
|
||||
**Wichtig:** Der XTTS-Server laeuft intern auf Port **8020** (nicht 8000).
|
||||
Das Model wird im Volume `xtts-models` gecacht und muss nur einmal geladen werden.
|
||||
|
||||
### Features
|
||||
|
||||
- **Natuerliche Stimmen**: Deutlich bessere Qualitaet als TTS der alten Generation
|
||||
- **Voice Cloning**: Eigene Stimme mit 6-10s Audio-Sample (~2s Latenz auf RTX 3060)
|
||||
- **Streaming**: PCM-Chunks alle ~170ms → App spielt ohne Warten nahtlos
|
||||
- **16 Sprachen**: Deutsch, Englisch, Franzoesisch, etc.
|
||||
|
||||
### TTS-Config
|
||||
|
||||
In der Diagnostic unter Einstellungen → Sprachausgabe:
|
||||
- **TTS aktiv**: Global An/Aus
|
||||
- **XTTS Stimme**: Default oder gecloned (Maia, etc.)
|
||||
|
||||
> XTTS ist die einzige Engine — wenn der Gaming-PC offline ist, bleibt ARIA stumm.
|
||||
> Chat-Antworten kommen weiter an (nur kein Audio).
|
||||
|
||||
### Stimme klonen
|
||||
|
||||
1. "Stimme klonen" → Audio-Dateien hochladen (WAV/MP3, 1-10 Dateien, min. 6-10s gesamt)
|
||||
2. Name vergeben → "Stimme erstellen"
|
||||
3. "Laden" klicken → neue Stimme in der Auswahl
|
||||
4. Stimme auswaehlen → Config wird automatisch gespeichert
|
||||
|
||||
> **Tipp:** Fuer beste Ergebnisse: saubere Aufnahme, eine Stimme, kein Hintergrund,
|
||||
> 10-30 Sekunden Gesamtlaenge. Mehrere kurze Dateien werden zusammengefuegt.
|
||||
|
||||
---
|
||||
|
||||
## Docker Volumes
|
||||
|
||||
| Volume | Pfad im Container | Zweck |
|
||||
@@ -491,7 +618,7 @@ docker compose up -d
|
||||
| `./aria-data/ssh` (bind) | `/root/.ssh`, `/home/node/.ssh` | SSH Keys |
|
||||
| `./aria-data/brain` (bind) | `/home/node/.openclaw/workspace/memory` | Gedaechtnis |
|
||||
| `./aria-data/skills` (bind) | `/home/node/.openclaw/workspace/skills` | Skills |
|
||||
| `aria-shared` | `/shared` (Core + Bridge) | Datei-Austausch (Uploads von App) |
|
||||
| `aria-shared` | `/shared` (Core + Bridge + Proxy + Diag) | Datei-Austausch, Config, Uploads |
|
||||
| `./aria-data/config/diag-state` (bind) | `/data` (Diagnostic) | Persistenter State (aktive Session) |
|
||||
|
||||
---
|
||||
@@ -549,6 +676,8 @@ docker exec aria-core ssh aria-wohnung hostname
|
||||
- **Wake Word nur auf VM**: Die Bridge hoert auf "ARIA" ueber das lokale Mikrofon der VM.
|
||||
In der App gibt es Energy-basierte Erkennung (Phase 1). On-device "ARIA"-Keyword (Porcupine) ist Phase 2.
|
||||
- **Audio-Format**: App nimmt AAC/MP4 auf, Bridge konvertiert via FFmpeg zu 16kHz PCM.
|
||||
- **RVS Zombie-Connections**: WebSocket-Verbindungen sterben gelegentlich ohne Fehlermeldung.
|
||||
Bridge hat Ping-Check (5s), Diagnostic nutzt frische Verbindungen pro Request.
|
||||
- **Bildanalyse eingeschraenkt**: Bilder werden in `/shared/uploads/` gespeichert. ARIA kann
|
||||
sie per Bash/Read-Tool oeffnen, aber Claude Vision (direkte Bildanalyse) ist ueber den
|
||||
Proxy-Pfad (`claude --print`) noch nicht moeglich. ARIA sieht den Dateipfad, nicht das Bild.
|
||||
@@ -569,8 +698,28 @@ docker exec aria-core ssh aria-wohnung hostname
|
||||
- [x] Android App (Chat + Sprache + Uploads)
|
||||
- [x] Tool-Permissions (alle Tools freigeschaltet)
|
||||
- [x] SSH-Zugriff auf VM (aria-wohnung)
|
||||
- [x] Diagnostic Web-UI
|
||||
- [x] Diagnostic Web-UI + Einstellungen
|
||||
- [x] Session-Verwaltung + Chat-History
|
||||
- [x] Stimmen-Einstellungen (Ramona/Thorsten, Speed, Highlight-Trigger) — durch XTTS v2 Voice Cloning ersetzt
|
||||
- [x] Piper komplett entfernt — nur noch XTTS v2 als TTS (Gaming-PC)
|
||||
- [x] Streaming TTS: PCM-Chunks direkt in AudioTrack, nahtlose Wiedergabe
|
||||
- [x] TTS satzweise fuer lange Texte
|
||||
- [x] Datei-/Bild-Upload mit Shared Volume
|
||||
- [x] Watchdog (stuck Run Erkennung + Auto-Fix + Container-Restart)
|
||||
- [x] Auto-Update System (APK via RVS)
|
||||
- [x] Chat-Suche, Play-Button, Abbrechen-Button
|
||||
- [x] XTTS v2 Integration (GPU, Voice Cloning, remote ueber RVS)
|
||||
- [x] Gespraechsmodus (Ohr-Button, automatische Aufnahme nach ARIA-Antwort)
|
||||
- [x] Mehrere Anhaenge + Text vor dem Senden + Paste-Support
|
||||
- [x] Markdown-Bereinigung fuer TTS
|
||||
- [x] Auto-Update mit FileProvider + Update-Check Button
|
||||
- [x] Inverted FlatList (zuverlaessiges Scroll-to-Bottom)
|
||||
- [x] Speech Gate (VAD verwirft Aufnahme ohne erkannte Sprache)
|
||||
- [x] Session-Persistenz ueber Container-Restarts (sessionFromFile + atomic write)
|
||||
- [x] Session-Export als Markdown-Datei (Download-Button pro Session)
|
||||
- [x] "ARIA denkt..."-Indicator + Abbrechen-Button in App (via Bridge → RVS)
|
||||
- [x] Whisper-Modell waehlbar in Diagnostic (tiny…large-v3, Hot-Reload)
|
||||
- [x] App-Aufnahme explizit 16kHz mono (optimal fuer Whisper, kein Resample)
|
||||
|
||||
### Phase 2 — ARIA wird produktiv
|
||||
|
||||
@@ -578,7 +727,8 @@ docker exec aria-core ssh aria-wohnung hostname
|
||||
- [ ] Gitea-Integration
|
||||
- [ ] VM einrichten (Desktop, Browser, Tools)
|
||||
- [ ] Heartbeat (periodische Selbst-Checks)
|
||||
- [ ] Lokales LLM als Wächter (Triage vor Claude-Call)
|
||||
- [ ] Lokales LLM als Waechter (Triage vor Claude-Call)
|
||||
- [ ] Auto-Compacting / Memory-Verwaltung
|
||||
|
||||
### Phase 3 — Erweiterungen
|
||||
|
||||
@@ -586,3 +736,4 @@ docker exec aria-core ssh aria-wohnung hostname
|
||||
- [ ] Desktop Client (Tauri)
|
||||
- [ ] bKVM Remote IT-Support
|
||||
- [ ] Porcupine Wake Word (on-device "ARIA" in der App)
|
||||
- [ ] Claude Vision direkt (Bildanalyse ohne Dateipfad-Umweg)
|
||||
|
||||
@@ -79,8 +79,8 @@ android {
|
||||
applicationId "com.ariacockpit"
|
||||
minSdkVersion rootProject.ext.minSdkVersion
|
||||
targetSdkVersion rootProject.ext.targetSdkVersion
|
||||
versionCode 109
|
||||
versionName "0.0.1.9"
|
||||
versionCode 402
|
||||
versionName "0.0.4.2"
|
||||
// Fallback fuer Libraries mit Product Flavors
|
||||
missingDimensionStrategy 'react-native-camera', 'general'
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
<uses-permission android:name="android.permission.INTERNET" />
|
||||
<uses-permission android:name="android.permission.CAMERA" />
|
||||
<uses-permission android:name="android.permission.RECORD_AUDIO" />
|
||||
<uses-permission android:name="android.permission.REQUEST_INSTALL_PACKAGES" />
|
||||
|
||||
<application
|
||||
android:name=".MainApplication"
|
||||
@@ -24,5 +25,15 @@
|
||||
<category android:name="android.intent.category.LAUNCHER" />
|
||||
</intent-filter>
|
||||
</activity>
|
||||
|
||||
<provider
|
||||
android:name="androidx.core.content.FileProvider"
|
||||
android:authorities="${applicationId}.fileprovider"
|
||||
android:exported="false"
|
||||
android:grantUriPermissions="true">
|
||||
<meta-data
|
||||
android:name="android.support.FILE_PROVIDER_PATHS"
|
||||
android:resource="@xml/file_paths" />
|
||||
</provider>
|
||||
</application>
|
||||
</manifest>
|
||||
|
||||
@@ -0,0 +1,44 @@
|
||||
package com.ariacockpit
|
||||
|
||||
import android.content.Intent
|
||||
import android.net.Uri
|
||||
import android.os.Build
|
||||
import androidx.core.content.FileProvider
|
||||
import com.facebook.react.bridge.ReactApplicationContext
|
||||
import com.facebook.react.bridge.ReactContextBaseJavaModule
|
||||
import com.facebook.react.bridge.ReactMethod
|
||||
import com.facebook.react.bridge.Promise
|
||||
import java.io.File
|
||||
|
||||
class ApkInstallerModule(reactContext: ReactApplicationContext) : ReactContextBaseJavaModule(reactContext) {
|
||||
override fun getName() = "ApkInstaller"
|
||||
|
||||
@ReactMethod
|
||||
fun install(filePath: String, promise: Promise) {
|
||||
try {
|
||||
val file = File(filePath)
|
||||
if (!file.exists()) {
|
||||
promise.reject("FILE_NOT_FOUND", "APK nicht gefunden: $filePath")
|
||||
return
|
||||
}
|
||||
|
||||
val context = reactApplicationContext
|
||||
val uri: Uri = if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.N) {
|
||||
FileProvider.getUriForFile(context, "${context.packageName}.fileprovider", file)
|
||||
} else {
|
||||
Uri.fromFile(file)
|
||||
}
|
||||
|
||||
val intent = Intent(Intent.ACTION_VIEW).apply {
|
||||
setDataAndType(uri, "application/vnd.android.package-archive")
|
||||
addFlags(Intent.FLAG_ACTIVITY_NEW_TASK)
|
||||
addFlags(Intent.FLAG_GRANT_READ_URI_PERMISSION)
|
||||
}
|
||||
|
||||
context.startActivity(intent)
|
||||
promise.resolve(true)
|
||||
} catch (e: Exception) {
|
||||
promise.reject("INSTALL_ERROR", e.message, e)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
package com.ariacockpit
|
||||
|
||||
import com.facebook.react.ReactPackage
|
||||
import com.facebook.react.bridge.NativeModule
|
||||
import com.facebook.react.bridge.ReactApplicationContext
|
||||
import com.facebook.react.uimanager.ViewManager
|
||||
|
||||
class ApkInstallerPackage : ReactPackage {
|
||||
override fun createNativeModules(reactContext: ReactApplicationContext): List<NativeModule> {
|
||||
return listOf(ApkInstallerModule(reactContext))
|
||||
}
|
||||
|
||||
override fun createViewManagers(reactContext: ReactApplicationContext): List<ViewManager<*, *>> {
|
||||
return emptyList()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,93 @@
|
||||
package com.ariacockpit
|
||||
|
||||
import android.content.Context
|
||||
import android.media.AudioAttributes
|
||||
import android.media.AudioFocusRequest
|
||||
import android.media.AudioManager
|
||||
import android.os.Build
|
||||
import com.facebook.react.bridge.Promise
|
||||
import com.facebook.react.bridge.ReactApplicationContext
|
||||
import com.facebook.react.bridge.ReactContextBaseJavaModule
|
||||
import com.facebook.react.bridge.ReactMethod
|
||||
|
||||
/**
|
||||
* Steuert Audio-Focus fuer Ducking/Muten anderer Apps.
|
||||
*
|
||||
* - requestDuck() → andere Apps werden leiser (ARIA spricht TTS)
|
||||
* - requestExclusive() → andere Apps werden pausiert (Mikrofon-Aufnahme)
|
||||
* - release() → Focus abgeben, andere Apps duerfen wieder
|
||||
*/
|
||||
class AudioFocusModule(reactContext: ReactApplicationContext) : ReactContextBaseJavaModule(reactContext) {
|
||||
override fun getName() = "AudioFocus"
|
||||
|
||||
private var currentRequest: AudioFocusRequest? = null
|
||||
|
||||
private fun audioManager(): AudioManager? =
|
||||
reactApplicationContext.getSystemService(Context.AUDIO_SERVICE) as? AudioManager
|
||||
|
||||
private fun requestFocus(durationHint: Int, usage: Int, promise: Promise) {
|
||||
val am = audioManager()
|
||||
if (am == null) {
|
||||
promise.reject("NO_AUDIO_MANAGER", "AudioManager nicht verfuegbar")
|
||||
return
|
||||
}
|
||||
|
||||
release()
|
||||
|
||||
val result: Int = if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
|
||||
val attrs = AudioAttributes.Builder()
|
||||
.setUsage(usage)
|
||||
.setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
|
||||
.build()
|
||||
val req = AudioFocusRequest.Builder(durationHint)
|
||||
.setAudioAttributes(attrs)
|
||||
.setOnAudioFocusChangeListener { /* kein Callback noetig */ }
|
||||
.build()
|
||||
currentRequest = req
|
||||
am.requestAudioFocus(req)
|
||||
} else {
|
||||
@Suppress("DEPRECATION")
|
||||
am.requestAudioFocus(null, AudioManager.STREAM_MUSIC, durationHint)
|
||||
}
|
||||
|
||||
promise.resolve(result == AudioManager.AUDIOFOCUS_REQUEST_GRANTED)
|
||||
}
|
||||
|
||||
/** Andere Apps werden leiser (TTS spricht). */
|
||||
@ReactMethod
|
||||
fun requestDuck(promise: Promise) {
|
||||
requestFocus(
|
||||
AudioManager.AUDIOFOCUS_GAIN_TRANSIENT_MAY_DUCK,
|
||||
AudioAttributes.USAGE_ASSISTANT,
|
||||
promise,
|
||||
)
|
||||
}
|
||||
|
||||
/** Andere Apps werden pausiert (Mikrofon-Aufnahme / Gespraech). */
|
||||
@ReactMethod
|
||||
fun requestExclusive(promise: Promise) {
|
||||
requestFocus(
|
||||
AudioManager.AUDIOFOCUS_GAIN_TRANSIENT_EXCLUSIVE,
|
||||
AudioAttributes.USAGE_VOICE_COMMUNICATION,
|
||||
promise,
|
||||
)
|
||||
}
|
||||
|
||||
/** Focus abgeben — andere Apps duerfen wieder volle Lautstaerke. */
|
||||
@ReactMethod
|
||||
fun release(promise: Promise) {
|
||||
release()
|
||||
promise.resolve(true)
|
||||
}
|
||||
|
||||
private fun release() {
|
||||
val am = audioManager() ?: return
|
||||
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
|
||||
currentRequest?.let { am.abandonAudioFocusRequest(it) }
|
||||
} else {
|
||||
@Suppress("DEPRECATION")
|
||||
am.abandonAudioFocus(null)
|
||||
}
|
||||
currentRequest = null
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
package com.ariacockpit
|
||||
|
||||
import com.facebook.react.ReactPackage
|
||||
import com.facebook.react.bridge.NativeModule
|
||||
import com.facebook.react.bridge.ReactApplicationContext
|
||||
import com.facebook.react.uimanager.ViewManager
|
||||
|
||||
class AudioFocusPackage : ReactPackage {
|
||||
override fun createNativeModules(reactContext: ReactApplicationContext): List<NativeModule> {
|
||||
return listOf(AudioFocusModule(reactContext))
|
||||
}
|
||||
|
||||
override fun createViewManagers(reactContext: ReactApplicationContext): List<ViewManager<*, *>> {
|
||||
return emptyList()
|
||||
}
|
||||
}
|
||||
@@ -18,8 +18,9 @@ class MainApplication : Application(), ReactApplication {
|
||||
object : DefaultReactNativeHost(this) {
|
||||
override fun getPackages(): List<ReactPackage> =
|
||||
PackageList(this).packages.apply {
|
||||
// Packages that cannot be autolinked yet can be added manually here, for example:
|
||||
// add(MyReactNativePackage())
|
||||
add(ApkInstallerPackage())
|
||||
add(AudioFocusPackage())
|
||||
add(PcmStreamPlayerPackage())
|
||||
}
|
||||
|
||||
override fun getJSMainModuleName(): String = "index"
|
||||
|
||||
@@ -0,0 +1,158 @@
|
||||
package com.ariacockpit
|
||||
|
||||
import android.media.AudioAttributes
|
||||
import android.media.AudioFormat
|
||||
import android.media.AudioManager
|
||||
import android.media.AudioTrack
|
||||
import android.util.Base64
|
||||
import android.util.Log
|
||||
import com.facebook.react.bridge.Promise
|
||||
import com.facebook.react.bridge.ReactApplicationContext
|
||||
import com.facebook.react.bridge.ReactContextBaseJavaModule
|
||||
import com.facebook.react.bridge.ReactMethod
|
||||
import java.util.concurrent.LinkedBlockingQueue
|
||||
|
||||
/**
|
||||
* Streamt PCM-s16le Audio direkt via AudioTrack MODE_STREAM.
|
||||
*
|
||||
* Flow:
|
||||
* JS: start(sampleRate, channels) → öffnet AudioTrack und startet Writer-Thread
|
||||
* JS: writeChunk(base64) → dekodiert, queued, Writer schreibt non-blocking
|
||||
* JS: end() → wartet bis Queue leer, schließt AudioTrack
|
||||
* JS: stop() → Hart stoppen, Queue leeren (Cancel)
|
||||
*
|
||||
* Vorteil gegenüber Sound-File-Queue:
|
||||
* - Keine Gap zwischen Chunks (AudioTrack puffert intern)
|
||||
* - Erste Samples beginnen zu spielen sobald der erste Chunk da ist
|
||||
* - Kein WAV-Header-Parsing pro Chunk
|
||||
*/
|
||||
class PcmStreamPlayerModule(reactContext: ReactApplicationContext) : ReactContextBaseJavaModule(reactContext) {
|
||||
companion object {
|
||||
private const val TAG = "PcmStreamPlayer"
|
||||
}
|
||||
|
||||
override fun getName() = "PcmStreamPlayer"
|
||||
|
||||
private var track: AudioTrack? = null
|
||||
private val queue = LinkedBlockingQueue<ByteArray>()
|
||||
private var writerThread: Thread? = null
|
||||
@Volatile private var writerShouldStop = false
|
||||
@Volatile private var endRequested = false
|
||||
|
||||
// ── Lifecycle ──
|
||||
|
||||
@ReactMethod
|
||||
fun start(sampleRate: Int, channels: Int, promise: Promise) {
|
||||
try {
|
||||
// Alte Session beenden falls vorhanden
|
||||
stopInternal()
|
||||
|
||||
val channelConfig = if (channels == 2) AudioFormat.CHANNEL_OUT_STEREO else AudioFormat.CHANNEL_OUT_MONO
|
||||
val encoding = AudioFormat.ENCODING_PCM_16BIT
|
||||
val minBuf = AudioTrack.getMinBufferSize(sampleRate, channelConfig, encoding)
|
||||
// Etwas grosszuegiger Buffer: 8x MinSize (ca. 200-400ms bei 24kHz) — glatt auch bei kleinen Netzwerk-Aussetzern
|
||||
val bufferSize = (minBuf * 8).coerceAtLeast(32 * 1024)
|
||||
|
||||
val newTrack = AudioTrack.Builder()
|
||||
.setAudioAttributes(
|
||||
AudioAttributes.Builder()
|
||||
.setUsage(AudioAttributes.USAGE_ASSISTANT)
|
||||
.setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
|
||||
.build(),
|
||||
)
|
||||
.setAudioFormat(
|
||||
AudioFormat.Builder()
|
||||
.setSampleRate(sampleRate)
|
||||
.setChannelMask(channelConfig)
|
||||
.setEncoding(encoding)
|
||||
.build(),
|
||||
)
|
||||
.setBufferSizeInBytes(bufferSize)
|
||||
.setTransferMode(AudioTrack.MODE_STREAM)
|
||||
.build()
|
||||
|
||||
newTrack.play()
|
||||
track = newTrack
|
||||
queue.clear()
|
||||
writerShouldStop = false
|
||||
endRequested = false
|
||||
|
||||
writerThread = Thread({
|
||||
val t = track ?: return@Thread
|
||||
try {
|
||||
while (!writerShouldStop) {
|
||||
val data = queue.poll(50, java.util.concurrent.TimeUnit.MILLISECONDS) ?: run {
|
||||
if (endRequested) return@Thread
|
||||
null
|
||||
} ?: continue
|
||||
var offset = 0
|
||||
while (offset < data.size && !writerShouldStop) {
|
||||
val written = t.write(data, offset, data.size - offset)
|
||||
if (written <= 0) break
|
||||
offset += written
|
||||
}
|
||||
}
|
||||
} catch (e: Exception) {
|
||||
Log.w(TAG, "Writer-Thread Fehler: ${e.message}")
|
||||
} finally {
|
||||
try { t.stop() } catch (_: Exception) {}
|
||||
try { t.release() } catch (_: Exception) {}
|
||||
}
|
||||
}, "PcmStreamWriter").apply { start() }
|
||||
|
||||
Log.i(TAG, "Stream gestartet: ${sampleRate}Hz ch=$channels buf=${bufferSize}B")
|
||||
promise.resolve(true)
|
||||
} catch (e: Exception) {
|
||||
Log.e(TAG, "start fehlgeschlagen", e)
|
||||
promise.reject("START_FAILED", e.message, e)
|
||||
}
|
||||
}
|
||||
|
||||
@ReactMethod
|
||||
fun writeChunk(base64Pcm: String, promise: Promise) {
|
||||
try {
|
||||
if (base64Pcm.isEmpty()) {
|
||||
promise.resolve(true)
|
||||
return
|
||||
}
|
||||
val bytes = Base64.decode(base64Pcm, Base64.DEFAULT)
|
||||
queue.put(bytes)
|
||||
promise.resolve(true)
|
||||
} catch (e: Exception) {
|
||||
promise.reject("WRITE_FAILED", e.message, e)
|
||||
}
|
||||
}
|
||||
|
||||
/** Signalisiert: keine weiteren Chunks. Writer wartet auf Queue-Abschluss, dann stoppt. */
|
||||
@ReactMethod
|
||||
fun end(promise: Promise) {
|
||||
endRequested = true
|
||||
promise.resolve(true)
|
||||
}
|
||||
|
||||
/** Harter Stop (Cancel) — Queue verwerfen. */
|
||||
@ReactMethod
|
||||
fun stop(promise: Promise) {
|
||||
stopInternal()
|
||||
promise.resolve(true)
|
||||
}
|
||||
|
||||
private fun stopInternal() {
|
||||
writerShouldStop = true
|
||||
endRequested = true
|
||||
queue.clear()
|
||||
writerThread?.interrupt()
|
||||
writerThread = null
|
||||
val t = track
|
||||
if (t != null) {
|
||||
try { t.stop() } catch (_: Exception) {}
|
||||
try { t.release() } catch (_: Exception) {}
|
||||
}
|
||||
track = null
|
||||
}
|
||||
|
||||
override fun onCatalystInstanceDestroy() {
|
||||
stopInternal()
|
||||
super.onCatalystInstanceDestroy()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
package com.ariacockpit
|
||||
|
||||
import com.facebook.react.ReactPackage
|
||||
import com.facebook.react.bridge.NativeModule
|
||||
import com.facebook.react.bridge.ReactApplicationContext
|
||||
import com.facebook.react.uimanager.ViewManager
|
||||
|
||||
class PcmStreamPlayerPackage : ReactPackage {
|
||||
override fun createNativeModules(reactContext: ReactApplicationContext): List<NativeModule> {
|
||||
return listOf(PcmStreamPlayerModule(reactContext))
|
||||
}
|
||||
|
||||
override fun createViewManagers(reactContext: ReactApplicationContext): List<ViewManager<*, *>> {
|
||||
return emptyList()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,4 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<paths>
|
||||
<cache-path name="cache" path="." />
|
||||
</paths>
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "aria-cockpit",
|
||||
"version": "0.0.1.9",
|
||||
"version": "0.0.4.2",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"android": "react-native run-android",
|
||||
@@ -24,8 +24,7 @@
|
||||
"react-native-camera-kit": "^13.0.0",
|
||||
"@react-native-async-storage/async-storage": "^1.21.0",
|
||||
"react-native-fs": "^2.20.0",
|
||||
"react-native-audio-recorder-player": "^3.6.7",
|
||||
"react-native-live-audio-stream": "^1.1.1"
|
||||
"react-native-audio-recorder-player": "^3.6.7"
|
||||
},
|
||||
"devDependencies": {
|
||||
"typescript": "^5.3.3",
|
||||
|
||||
@@ -0,0 +1,362 @@
|
||||
/**
|
||||
* VoiceCloneModal — Eigene Stimme aufnehmen und an XTTS uploaden.
|
||||
*
|
||||
* Flow:
|
||||
* - Modal zeigt Vorlesetext (>30s Lesedauer) + Aufnahme-Button
|
||||
* - Bei Aufnahme: max 30s, Fortschrittsbalken, Countdown
|
||||
* - Bei Stop: Name abfragen, dann als voice_upload ueber RVS schicken
|
||||
* - XTTS-Bridge speichert /voices/<name>.wav, antwortet mit xtts_voice_saved
|
||||
*/
|
||||
|
||||
import React, { useCallback, useEffect, useRef, useState } from 'react';
|
||||
import {
|
||||
Modal,
|
||||
View,
|
||||
Text,
|
||||
TouchableOpacity,
|
||||
StyleSheet,
|
||||
Alert,
|
||||
ScrollView,
|
||||
ActivityIndicator,
|
||||
TextInput,
|
||||
} from 'react-native';
|
||||
import audioService from '../services/audio';
|
||||
import rvs from '../services/rvs';
|
||||
|
||||
interface Props {
|
||||
visible: boolean;
|
||||
onClose: () => void;
|
||||
}
|
||||
|
||||
const SAMPLE_TEXT = `Das ist meine eigene Stimme fuer ARIA. Ich lese jetzt einen laengeren Absatz laut vor, damit das Voice-Cloning eine gute Grundlage hat. Guten Tag, ich heisse Stefan und baue gerade mit grosser Begeisterung an meinem persoenlichen KI-Assistenten. Wir automatisieren Infrastruktur, managen Sessions und spielen mit Sprachsynthese. Die letzten Jahre habe ich viel gelernt, vor allem dass Geduld genauso wichtig ist wie Neugier. Hoert sich das jetzt an wie ich selbst? Wenn alles klappt, spricht ARIA bald mit dieser Stimme.`;
|
||||
|
||||
const MAX_DURATION_MS = 30000;
|
||||
const TARGET_DURATION_MS = 15000;
|
||||
|
||||
const VoiceCloneModal: React.FC<Props> = ({ visible, onClose }) => {
|
||||
const [recording, setRecording] = useState(false);
|
||||
const [durationMs, setDurationMs] = useState(0);
|
||||
const [voiceName, setVoiceName] = useState('');
|
||||
const [processing, setProcessing] = useState(false);
|
||||
const [recordingPath, setRecordingPath] = useState('');
|
||||
const timerRef = useRef<ReturnType<typeof setInterval> | null>(null);
|
||||
const startTimeRef = useRef<number>(0);
|
||||
|
||||
// Zustand zuruecksetzen wenn Modal schliesst/oeffnet
|
||||
useEffect(() => {
|
||||
if (!visible) {
|
||||
setRecording(false);
|
||||
setDurationMs(0);
|
||||
setVoiceName('');
|
||||
setProcessing(false);
|
||||
setRecordingPath('');
|
||||
if (timerRef.current) clearInterval(timerRef.current);
|
||||
}
|
||||
}, [visible]);
|
||||
|
||||
// Cleanup bei Unmount
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
if (timerRef.current) clearInterval(timerRef.current);
|
||||
if (recording) audioService.stopRecording().catch(() => {});
|
||||
};
|
||||
}, [recording]);
|
||||
|
||||
const startRecording = useCallback(async () => {
|
||||
// Frische Aufnahme
|
||||
setDurationMs(0);
|
||||
setRecordingPath('');
|
||||
const ok = await audioService.startRecording(false);
|
||||
if (!ok) {
|
||||
Alert.alert('Fehler', 'Aufnahme konnte nicht gestartet werden (Mikrofon-Berechtigung?)');
|
||||
return;
|
||||
}
|
||||
setRecording(true);
|
||||
startTimeRef.current = Date.now();
|
||||
timerRef.current = setInterval(async () => {
|
||||
const elapsed = Date.now() - startTimeRef.current;
|
||||
setDurationMs(elapsed);
|
||||
if (elapsed >= MAX_DURATION_MS) {
|
||||
await stopRecording();
|
||||
}
|
||||
}, 100);
|
||||
}, []);
|
||||
|
||||
const stopRecording = useCallback(async () => {
|
||||
if (timerRef.current) {
|
||||
clearInterval(timerRef.current);
|
||||
timerRef.current = null;
|
||||
}
|
||||
if (!recording) return;
|
||||
const result = await audioService.stopRecording();
|
||||
setRecording(false);
|
||||
if (!result) {
|
||||
Alert.alert('Keine Sprache erkannt', 'Versuch es bitte nochmal — sprich bis der Timer mindestens 10 Sekunden anzeigt.');
|
||||
setDurationMs(0);
|
||||
return;
|
||||
}
|
||||
// Temp-Datei wurde schon geloescht (stopRecording cleaned up).
|
||||
// Wir brauchen aber base64 aus result direkt fuers Upload.
|
||||
// result.base64 ist bereits da.
|
||||
setRecordingPath(result.base64);
|
||||
}, [recording]);
|
||||
|
||||
const uploadVoice = useCallback(async () => {
|
||||
const name = voiceName.trim();
|
||||
if (!name) {
|
||||
Alert.alert('Name fehlt', 'Bitte gib der Stimme einen Namen (nur Buchstaben, Zahlen, _ und -).');
|
||||
return;
|
||||
}
|
||||
if (!/^[a-zA-Z0-9_-]+$/.test(name)) {
|
||||
Alert.alert('Ungueltiger Name', 'Nur Buchstaben, Zahlen, _ und - erlaubt.');
|
||||
return;
|
||||
}
|
||||
if (!recordingPath) {
|
||||
Alert.alert('Keine Aufnahme', 'Bitte zuerst aufnehmen.');
|
||||
return;
|
||||
}
|
||||
setProcessing(true);
|
||||
try {
|
||||
// voice_upload erwartet samples als Array mit base64 (aus Diagnostic-Format kopiert)
|
||||
rvs.send('voice_upload' as any, {
|
||||
name,
|
||||
samples: [{ base64: recordingPath }],
|
||||
});
|
||||
Alert.alert('Hochgeladen', `Stimme "${name}" wird vom XTTS-Server verarbeitet. Nach ein paar Sekunden in der Liste verfuegbar.`);
|
||||
onClose();
|
||||
} catch (err: any) {
|
||||
Alert.alert('Fehler', err.message);
|
||||
} finally {
|
||||
setProcessing(false);
|
||||
}
|
||||
}, [voiceName, recordingPath, onClose]);
|
||||
|
||||
const progress = Math.min(durationMs / MAX_DURATION_MS, 1);
|
||||
const sec = Math.floor(durationMs / 1000);
|
||||
const enoughRecorded = durationMs >= TARGET_DURATION_MS;
|
||||
|
||||
return (
|
||||
<Modal visible={visible} animationType="slide" onRequestClose={onClose}>
|
||||
<View style={styles.container}>
|
||||
<View style={styles.header}>
|
||||
<Text style={styles.title}>Eigene Stimme aufnehmen</Text>
|
||||
<TouchableOpacity onPress={onClose}>
|
||||
<Text style={styles.closeX}>{'\u2715'}</Text>
|
||||
</TouchableOpacity>
|
||||
</View>
|
||||
|
||||
<ScrollView style={styles.content} contentContainerStyle={{padding: 16}}>
|
||||
<Text style={styles.hint}>
|
||||
Lies den Text laut und deutlich vor. Maximal 30 Sekunden. Je mehr du sprichst
|
||||
(ziel: bis zum Ende des Textes, ca. 20-30s), desto besser wird die geklonte
|
||||
Stimme.
|
||||
</Text>
|
||||
|
||||
<View style={styles.sampleTextBox}>
|
||||
<Text style={styles.sampleText}>{SAMPLE_TEXT}</Text>
|
||||
</View>
|
||||
|
||||
{/* Timer + Fortschritt */}
|
||||
<View style={{marginTop: 20, alignItems: 'center'}}>
|
||||
<Text style={[styles.timer, recording && styles.timerActive]}>
|
||||
{sec.toString().padStart(2, '0')} / 30 s
|
||||
</Text>
|
||||
<View style={styles.progressBar}>
|
||||
<View style={[styles.progressFill, {width: `${progress * 100}%`, backgroundColor: recording ? '#FF3B30' : '#0096FF'}]} />
|
||||
</View>
|
||||
</View>
|
||||
|
||||
{/* Aufnahme-Button */}
|
||||
{!recordingPath && (
|
||||
<TouchableOpacity
|
||||
style={[styles.recordBtn, recording && styles.recordBtnActive]}
|
||||
onPress={recording ? stopRecording : startRecording}
|
||||
>
|
||||
<Text style={styles.recordIcon}>{recording ? '\u25A0' : '\u25CF'}</Text>
|
||||
<Text style={styles.recordLabel}>{recording ? 'Stop' : 'Aufnahme starten'}</Text>
|
||||
</TouchableOpacity>
|
||||
)}
|
||||
|
||||
{/* Nach Aufnahme: Name + Upload */}
|
||||
{recordingPath && (
|
||||
<View style={{marginTop: 20}}>
|
||||
<Text style={styles.hint}>
|
||||
Aufnahme ({sec}s) fertig. Vergib einen Namen und lade hoch.
|
||||
</Text>
|
||||
<TextInput
|
||||
style={styles.nameInput}
|
||||
value={voiceName}
|
||||
onChangeText={setVoiceName}
|
||||
placeholder="z.B. stefan"
|
||||
placeholderTextColor="#555570"
|
||||
autoCapitalize="none"
|
||||
autoCorrect={false}
|
||||
/>
|
||||
<View style={{flexDirection: 'row', gap: 8, marginTop: 12}}>
|
||||
<TouchableOpacity
|
||||
style={[styles.secondaryBtn, {flex: 1}]}
|
||||
onPress={() => { setRecordingPath(''); setDurationMs(0); }}
|
||||
>
|
||||
<Text style={styles.secondaryBtnText}>Nochmal aufnehmen</Text>
|
||||
</TouchableOpacity>
|
||||
<TouchableOpacity
|
||||
style={[styles.primaryBtn, {flex: 1}]}
|
||||
onPress={uploadVoice}
|
||||
disabled={processing}
|
||||
>
|
||||
{processing
|
||||
? <ActivityIndicator color="#fff" />
|
||||
: <Text style={styles.primaryBtnText}>Hochladen</Text>
|
||||
}
|
||||
</TouchableOpacity>
|
||||
</View>
|
||||
</View>
|
||||
)}
|
||||
|
||||
{recording && !enoughRecorded && (
|
||||
<Text style={[styles.hint, {marginTop: 12, color: '#FFD60A', textAlign: 'center'}]}>
|
||||
Bitte weiter lesen — mindestens 15 Sekunden
|
||||
</Text>
|
||||
)}
|
||||
|
||||
{recording && enoughRecorded && (
|
||||
<Text style={[styles.hint, {marginTop: 12, color: '#34C759', textAlign: 'center'}]}>
|
||||
Genug Audio fuer eine gute Clonung. Du kannst stoppen.
|
||||
</Text>
|
||||
)}
|
||||
</ScrollView>
|
||||
</View>
|
||||
</Modal>
|
||||
);
|
||||
};
|
||||
|
||||
const styles = StyleSheet.create({
|
||||
container: {
|
||||
flex: 1,
|
||||
backgroundColor: '#0D0D1A',
|
||||
},
|
||||
header: {
|
||||
flexDirection: 'row',
|
||||
alignItems: 'center',
|
||||
justifyContent: 'space-between',
|
||||
paddingHorizontal: 16,
|
||||
paddingTop: 48,
|
||||
paddingBottom: 16,
|
||||
borderBottomWidth: 1,
|
||||
borderBottomColor: '#1E1E2E',
|
||||
},
|
||||
title: {
|
||||
color: '#FFFFFF',
|
||||
fontSize: 18,
|
||||
fontWeight: '700',
|
||||
},
|
||||
closeX: {
|
||||
color: '#8888AA',
|
||||
fontSize: 24,
|
||||
paddingHorizontal: 8,
|
||||
},
|
||||
content: {
|
||||
flex: 1,
|
||||
},
|
||||
hint: {
|
||||
color: '#8888AA',
|
||||
fontSize: 13,
|
||||
lineHeight: 20,
|
||||
},
|
||||
sampleTextBox: {
|
||||
marginTop: 12,
|
||||
padding: 14,
|
||||
backgroundColor: '#12122A',
|
||||
borderRadius: 10,
|
||||
borderWidth: 1,
|
||||
borderColor: '#1E1E2E',
|
||||
},
|
||||
sampleText: {
|
||||
color: '#E0E0F0',
|
||||
fontSize: 15,
|
||||
lineHeight: 24,
|
||||
},
|
||||
timer: {
|
||||
color: '#666680',
|
||||
fontSize: 42,
|
||||
fontWeight: '700',
|
||||
fontVariant: ['tabular-nums'],
|
||||
},
|
||||
timerActive: {
|
||||
color: '#FF3B30',
|
||||
},
|
||||
progressBar: {
|
||||
marginTop: 8,
|
||||
width: '100%',
|
||||
height: 8,
|
||||
backgroundColor: '#1E1E2E',
|
||||
borderRadius: 4,
|
||||
overflow: 'hidden',
|
||||
},
|
||||
progressFill: {
|
||||
height: '100%',
|
||||
},
|
||||
recordBtn: {
|
||||
marginTop: 24,
|
||||
flexDirection: 'row',
|
||||
alignItems: 'center',
|
||||
justifyContent: 'center',
|
||||
gap: 12,
|
||||
backgroundColor: '#1E1E2E',
|
||||
borderRadius: 12,
|
||||
padding: 18,
|
||||
borderWidth: 2,
|
||||
borderColor: '#34C759',
|
||||
},
|
||||
recordBtnActive: {
|
||||
borderColor: '#FF3B30',
|
||||
backgroundColor: 'rgba(255,59,48,0.15)',
|
||||
},
|
||||
recordIcon: {
|
||||
color: '#FF3B30',
|
||||
fontSize: 24,
|
||||
fontWeight: '700',
|
||||
},
|
||||
recordLabel: {
|
||||
color: '#FFFFFF',
|
||||
fontSize: 17,
|
||||
fontWeight: '600',
|
||||
},
|
||||
nameInput: {
|
||||
marginTop: 10,
|
||||
backgroundColor: '#1E1E2E',
|
||||
borderRadius: 8,
|
||||
paddingHorizontal: 14,
|
||||
paddingVertical: 12,
|
||||
color: '#FFFFFF',
|
||||
fontSize: 15,
|
||||
borderWidth: 1,
|
||||
borderColor: '#2A2A3E',
|
||||
},
|
||||
primaryBtn: {
|
||||
backgroundColor: '#0096FF',
|
||||
borderRadius: 10,
|
||||
padding: 14,
|
||||
alignItems: 'center',
|
||||
},
|
||||
primaryBtnText: {
|
||||
color: '#FFFFFF',
|
||||
fontSize: 15,
|
||||
fontWeight: '700',
|
||||
},
|
||||
secondaryBtn: {
|
||||
backgroundColor: '#1E1E2E',
|
||||
borderRadius: 10,
|
||||
padding: 14,
|
||||
alignItems: 'center',
|
||||
borderWidth: 1,
|
||||
borderColor: '#2A2A3E',
|
||||
},
|
||||
secondaryBtnText: {
|
||||
color: '#8888AA',
|
||||
fontSize: 14,
|
||||
fontWeight: '600',
|
||||
},
|
||||
});
|
||||
|
||||
export default VoiceCloneModal;
|
||||
+390
-103
@@ -5,7 +5,7 @@
|
||||
* Datei- und Kamera-Upload.
|
||||
*/
|
||||
|
||||
import React, { useState, useEffect, useRef, useCallback } from 'react';
|
||||
import React, { useState, useEffect, useRef, useCallback, useMemo } from 'react';
|
||||
import {
|
||||
View,
|
||||
Text,
|
||||
@@ -16,6 +16,7 @@ import {
|
||||
Platform,
|
||||
StyleSheet,
|
||||
Image,
|
||||
ScrollView,
|
||||
Modal,
|
||||
} from 'react-native';
|
||||
import AsyncStorage from '@react-native-async-storage/async-storage';
|
||||
@@ -23,6 +24,7 @@ import RNFS from 'react-native-fs';
|
||||
import rvs, { RVSMessage, ConnectionState } from '../services/rvs';
|
||||
import audioService from '../services/audio';
|
||||
import wakeWordService from '../services/wakeword';
|
||||
import updateService from '../services/updater';
|
||||
import VoiceButton from '../components/VoiceButton';
|
||||
import FileUpload, { FileData } from '../components/FileUpload';
|
||||
import CameraUpload, { PhotoData } from '../components/CameraUpload';
|
||||
@@ -46,12 +48,22 @@ interface ChatMessage {
|
||||
text: string;
|
||||
timestamp: number;
|
||||
attachments?: Attachment[];
|
||||
/** Bridge-Message-ID zur Zuordnung von TTS-Audio */
|
||||
messageId?: string;
|
||||
/** Lokaler Pfad zur gecachten TTS-Audio-Datei (file://...) */
|
||||
audioPath?: string;
|
||||
}
|
||||
|
||||
// --- Konstanten ---
|
||||
|
||||
const CHAT_STORAGE_KEY = 'aria_chat_messages';
|
||||
const MAX_STORED_MESSAGES = 500;
|
||||
const MAX_MEMORY_MESSAGES = 500;
|
||||
|
||||
// Hilfe: Messages-Array auf Max kappen (aelteste raus) — verhindert OOM
|
||||
// im Gespraechsmodus bei sehr vielen Nachrichten.
|
||||
const capMessages = (msgs: ChatMessage[]): ChatMessage[] =>
|
||||
msgs.length > MAX_MEMORY_MESSAGES ? msgs.slice(-MAX_MEMORY_MESSAGES) : msgs;
|
||||
const DEFAULT_ATTACHMENT_DIR = `${RNFS.DocumentDirectoryPath}/chat_attachments`;
|
||||
const STORAGE_PATH_KEY = 'aria_attachment_storage_path';
|
||||
|
||||
@@ -91,6 +103,15 @@ const ChatScreen: React.FC = () => {
|
||||
const [gpsEnabled, setGpsEnabled] = useState(false);
|
||||
const [wakeWordActive, setWakeWordActive] = useState(false);
|
||||
const [fullscreenImage, setFullscreenImage] = useState<string | null>(null);
|
||||
const [searchQuery, setSearchQuery] = useState('');
|
||||
const [searchVisible, setSearchVisible] = useState(false);
|
||||
const [pendingAttachments, setPendingAttachments] = useState<{file: any, isPhoto: boolean}[]>([]);
|
||||
const [agentActivity, setAgentActivity] = useState<{activity: string, tool: string}>({activity: 'idle', tool: ''});
|
||||
// Gerätelokale TTS-Config: globaler Toggle (aus Settings) + temporäres Muten (Mund-Button)
|
||||
const [ttsDeviceEnabled, setTtsDeviceEnabled] = useState(true);
|
||||
const [ttsMuted, setTtsMuted] = useState(false);
|
||||
// Gerätelokale XTTS-Voice-Wahl (bevorzugt gegenueber dem globalen Default)
|
||||
const localXttsVoiceRef = useRef<string>('');
|
||||
|
||||
const flatListRef = useRef<FlatList>(null);
|
||||
const messageIdCounter = useRef(0);
|
||||
@@ -101,6 +122,32 @@ const ChatScreen: React.FC = () => {
|
||||
return `msg_${Date.now()}_${messageIdCounter.current}`;
|
||||
};
|
||||
|
||||
// TTS-Settings beim Mount + bei Screen-Fokus neu laden (damit Settings-Toggle sofort greift)
|
||||
useEffect(() => {
|
||||
const loadTtsSettings = async () => {
|
||||
const enabled = await AsyncStorage.getItem('aria_tts_enabled');
|
||||
setTtsDeviceEnabled(enabled !== 'false'); // default true
|
||||
const muted = await AsyncStorage.getItem('aria_tts_muted');
|
||||
setTtsMuted(muted === 'true'); // default false
|
||||
const voice = await AsyncStorage.getItem('aria_xtts_voice');
|
||||
localXttsVoiceRef.current = voice || '';
|
||||
};
|
||||
loadTtsSettings();
|
||||
// Poll alle 2s um Settings-Aenderung mitzubekommen (einfache Loesung ohne Context)
|
||||
const interval = setInterval(loadTtsSettings, 2000);
|
||||
return () => clearInterval(interval);
|
||||
}, []);
|
||||
|
||||
const toggleMute = useCallback(() => {
|
||||
setTtsMuted(prev => {
|
||||
const next = !prev;
|
||||
AsyncStorage.setItem('aria_tts_muted', String(next));
|
||||
// Bei Muten sofort laufende Wiedergabe stoppen
|
||||
if (next) audioService.stopPlayback();
|
||||
return next;
|
||||
});
|
||||
}, []);
|
||||
|
||||
// Chat-Verlauf aus AsyncStorage laden
|
||||
const isInitialLoad = useRef(true);
|
||||
useEffect(() => {
|
||||
@@ -212,12 +259,12 @@ const ChatScreen: React.FC = () => {
|
||||
if (sender === 'diagnostic') {
|
||||
const diagText = (message.payload.text as string) || '';
|
||||
if (diagText) {
|
||||
setMessages(prev => [...prev, {
|
||||
setMessages(prev => capMessages([...prev, {
|
||||
id: nextId(),
|
||||
sender: 'user',
|
||||
text: diagText,
|
||||
timestamp: message.timestamp,
|
||||
}]);
|
||||
}]));
|
||||
}
|
||||
return;
|
||||
}
|
||||
@@ -236,14 +283,47 @@ const ChatScreen: React.FC = () => {
|
||||
text,
|
||||
timestamp: ts,
|
||||
attachments: message.payload.attachments as Attachment[] | undefined,
|
||||
messageId: (message.payload.messageId as string) || undefined,
|
||||
};
|
||||
return [...prev, ariaMsg];
|
||||
return capMessages([...prev, ariaMsg]);
|
||||
});
|
||||
}
|
||||
|
||||
// TTS-Audio abspielen wenn vorhanden
|
||||
// TTS-Audio abspielen wenn vorhanden — respektiert geraetelokalen Mute/Disable
|
||||
const canPlay = ttsDeviceEnabled && !ttsMuted;
|
||||
if (message.type === 'audio' && message.payload.base64) {
|
||||
audioService.playAudio(message.payload.base64 as string);
|
||||
const b64 = message.payload.base64 as string;
|
||||
const refId = (message.payload.messageId as string) || '';
|
||||
if (canPlay) audioService.playAudio(b64);
|
||||
// Cache IMMER schreiben — Play-Button soll auch bei Mute spaeter funktionieren
|
||||
if (refId) {
|
||||
audioService.cacheAudio(b64, refId).then(audioPath => {
|
||||
if (!audioPath) return;
|
||||
setMessages(prev => prev.map(m =>
|
||||
m.messageId === refId ? { ...m, audioPath } : m
|
||||
));
|
||||
}).catch(() => {});
|
||||
}
|
||||
}
|
||||
|
||||
// XTTS PCM-Stream: Cache IMMER bauen, Playback nur wenn nicht gemutet
|
||||
if (message.type === ('audio_pcm' as any)) {
|
||||
const p = { ...(message.payload as any), silent: !canPlay };
|
||||
const refId = (p.messageId as string) || '';
|
||||
audioService.handlePcmChunk(p).then((audioPath: any) => {
|
||||
if (p.final && audioPath && refId) {
|
||||
setMessages(prev => prev.map(m =>
|
||||
m.messageId === refId ? { ...m, audioPath } : m
|
||||
));
|
||||
}
|
||||
}).catch(() => {});
|
||||
}
|
||||
|
||||
// Thinking-Indicator Status von der Bridge
|
||||
if (message.type === 'agent_activity') {
|
||||
const activity = (message.payload.activity as string) || 'idle';
|
||||
const tool = (message.payload.tool as string) || '';
|
||||
setAgentActivity({ activity, tool });
|
||||
}
|
||||
});
|
||||
|
||||
@@ -260,12 +340,30 @@ const ChatScreen: React.FC = () => {
|
||||
};
|
||||
}, []);
|
||||
|
||||
// Wake Word: "ARIA" Erkennung → Auto-Aufnahme starten
|
||||
// Auto-Update: Bei App-Start pruefen
|
||||
useEffect(() => {
|
||||
const unsubUpdate = updateService.onUpdateAvailable((info) => {
|
||||
updateService.promptUpdate(info);
|
||||
});
|
||||
// Nach 5s pruefen (RVS muss erst verbunden sein)
|
||||
const timer = setTimeout(() => updateService.checkForUpdate(), 5000);
|
||||
return () => { unsubUpdate(); clearTimeout(timer); };
|
||||
}, []);
|
||||
|
||||
// Gespraechsmodus: Nach TTS-Wiedergabe automatisch Aufnahme starten
|
||||
useEffect(() => {
|
||||
const unsubPlayback = audioService.onPlaybackFinished(() => {
|
||||
if (wakeWordService.isActive()) {
|
||||
wakeWordService.resume();
|
||||
}
|
||||
});
|
||||
return () => unsubPlayback();
|
||||
}, []);
|
||||
|
||||
// Wake Word / Gespraechsmodus: Auto-Aufnahme starten
|
||||
useEffect(() => {
|
||||
const unsubWake = wakeWordService.onWakeWord(async () => {
|
||||
console.log('[Chat] Wake Word erkannt — starte Auto-Aufnahme');
|
||||
// TTS stoppen damit ARIA sich nicht selbst hoert
|
||||
audioService.stopPlayback();
|
||||
console.log('[Chat] Gespraechsmodus — starte Auto-Aufnahme');
|
||||
// Aufnahme mit Auto-Stop (VAD) starten
|
||||
const started = await audioService.startRecording(true);
|
||||
if (!started) {
|
||||
@@ -287,11 +385,12 @@ const ChatScreen: React.FC = () => {
|
||||
timestamp: Date.now(),
|
||||
attachments: [{ type: 'audio', name: 'Sprachaufnahme' }],
|
||||
};
|
||||
setMessages(prev => [...prev, userMsg]);
|
||||
setMessages(prev => capMessages([...prev, userMsg]));
|
||||
rvs.send('audio', {
|
||||
base64: result.base64,
|
||||
durationMs: result.durationMs,
|
||||
mimeType: result.mimeType,
|
||||
voice: localXttsVoiceRef.current,
|
||||
...(location && { location }),
|
||||
});
|
||||
}
|
||||
@@ -346,22 +445,8 @@ const ChatScreen: React.FC = () => {
|
||||
return () => { if (saveTimer.current) clearTimeout(saveTimer.current); };
|
||||
}, [messages]);
|
||||
|
||||
// Auto-Scroll wird ueber onContentSizeChange der FlatList gesteuert
|
||||
const shouldAutoScroll = useRef(true);
|
||||
const handleContentSizeChange = useCallback(() => {
|
||||
if (shouldAutoScroll.current) {
|
||||
flatListRef.current?.scrollToEnd({ animated: false });
|
||||
}
|
||||
}, []);
|
||||
const handleScrollBeginDrag = useCallback(() => {
|
||||
shouldAutoScroll.current = false;
|
||||
}, []);
|
||||
const handleScrollEndDrag = useCallback((e: any) => {
|
||||
// Auto-Scroll wieder aktivieren wenn User ganz unten ist
|
||||
const { contentOffset, contentSize, layoutMeasurement } = e.nativeEvent;
|
||||
const isAtBottom = contentOffset.y + layoutMeasurement.height >= contentSize.height - 50;
|
||||
shouldAutoScroll.current = isAtBottom;
|
||||
}, []);
|
||||
// Inverted FlatList: neueste Nachrichten unten, kein manuelles Scrollen noetig
|
||||
const invertedMessages = useMemo(() => [...messages].reverse(), [messages]);
|
||||
|
||||
// GPS-Position holen (optional)
|
||||
const getCurrentLocation = useCallback((): Promise<{ lat: number; lon: number } | null> => {
|
||||
@@ -387,6 +472,13 @@ const ChatScreen: React.FC = () => {
|
||||
|
||||
const sendTextMessage = useCallback(async () => {
|
||||
const text = inputText.trim();
|
||||
|
||||
// Wenn pending Anhaenge vorhanden → Anhaenge + Text zusammen senden
|
||||
if (pendingAttachments.length > 0) {
|
||||
sendPendingAttachments(text);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!text) return;
|
||||
|
||||
setInputText('');
|
||||
@@ -399,14 +491,21 @@ const ChatScreen: React.FC = () => {
|
||||
text,
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
setMessages(prev => [...prev, userMsg]);
|
||||
setMessages(prev => capMessages([...prev, userMsg]));
|
||||
|
||||
// An RVS senden
|
||||
// An RVS senden — mit geraetelokaler Voice (Bridge nutzt sie fuer die Antwort)
|
||||
rvs.send('chat', {
|
||||
text,
|
||||
voice: localXttsVoiceRef.current,
|
||||
...(location && { location }),
|
||||
});
|
||||
}, [inputText, getCurrentLocation]);
|
||||
}, [inputText, getCurrentLocation, pendingAttachments, sendPendingAttachments]);
|
||||
|
||||
// Anfrage abbrechen — sofort lokalen Indicator weg, Bridge triggert doctor --fix
|
||||
const cancelRequest = useCallback(() => {
|
||||
setAgentActivity({ activity: 'idle', tool: '' });
|
||||
rvs.send('cancel_request' as any, {});
|
||||
}, []);
|
||||
|
||||
// Sprachaufnahme abgeschlossen
|
||||
const handleVoiceRecording = useCallback(async (result: RecordingResult) => {
|
||||
@@ -418,7 +517,7 @@ const ChatScreen: React.FC = () => {
|
||||
text: '🎙 Spracheingabe wird verarbeitet...',
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
setMessages(prev => [...prev, userMsg]);
|
||||
setMessages(prev => capMessages([...prev, userMsg]));
|
||||
|
||||
rvs.send('audio', {
|
||||
base64: result.base64,
|
||||
@@ -428,88 +527,92 @@ const ChatScreen: React.FC = () => {
|
||||
});
|
||||
}, [getCurrentLocation]);
|
||||
|
||||
// Datei senden
|
||||
// Datei auswaehlen → zur Pending-Liste hinzufuegen
|
||||
const handleFileSelected = useCallback(async (file: FileData) => {
|
||||
setShowFileUpload(false);
|
||||
const location = await getCurrentLocation();
|
||||
setPendingAttachments(prev => [...prev, { file, isPhoto: false }]);
|
||||
}, []);
|
||||
|
||||
const isImage = file.type.startsWith('image/');
|
||||
const msgId = nextId();
|
||||
let imageUri = isImage && file.base64 ? `data:${file.type};base64,${file.base64}` : file.uri;
|
||||
|
||||
const userMsg: ChatMessage = {
|
||||
id: msgId,
|
||||
sender: 'user',
|
||||
text: 'Anhang empfangen',
|
||||
timestamp: Date.now(),
|
||||
attachments: [{
|
||||
type: isImage ? 'image' : 'file',
|
||||
name: file.name,
|
||||
size: file.size,
|
||||
uri: imageUri,
|
||||
mimeType: file.type,
|
||||
}],
|
||||
};
|
||||
setMessages(prev => [...prev, userMsg]);
|
||||
|
||||
// Anhang auf Disk speichern fuer Persistenz
|
||||
if (file.base64) {
|
||||
persistAttachment(file.base64, msgId, file.name).then(filePath => {
|
||||
setMessages(prev => prev.map(m =>
|
||||
m.id === msgId ? { ...m, attachments: m.attachments?.map(a => ({ ...a, uri: filePath })) } : m
|
||||
));
|
||||
}).catch(() => {});
|
||||
}
|
||||
|
||||
rvs.send('file', {
|
||||
name: file.name,
|
||||
type: file.type,
|
||||
size: file.size,
|
||||
base64: file.base64,
|
||||
...(location && { location }),
|
||||
});
|
||||
}, [getCurrentLocation]);
|
||||
|
||||
// Foto senden
|
||||
// Foto auswaehlen → zur Pending-Liste hinzufuegen
|
||||
const handlePhotoSelected = useCallback(async (photo: PhotoData) => {
|
||||
setShowCameraUpload(false);
|
||||
setPendingAttachments(prev => [...prev, { file: photo, isPhoto: true }]);
|
||||
}, []);
|
||||
|
||||
// Alle Pending Anhaenge + Text senden
|
||||
const sendPendingAttachments = useCallback(async (messageText: string) => {
|
||||
if (pendingAttachments.length === 0) return;
|
||||
const location = await getCurrentLocation();
|
||||
|
||||
const msgId = nextId();
|
||||
const dataUri = photo.base64 ? `data:${photo.type};base64,${photo.base64}` : undefined;
|
||||
|
||||
// Alle Attachments fuer die Chat-Nachricht sammeln
|
||||
const attachments: Attachment[] = [];
|
||||
for (const { file, isPhoto } of pendingAttachments) {
|
||||
const isImage = isPhoto || (file.type && file.type.startsWith('image/'));
|
||||
const name = isPhoto ? file.fileName : file.name;
|
||||
const base64 = file.base64 || '';
|
||||
const mimeType = file.type || '';
|
||||
const imageUri = isImage && base64 ? `data:${mimeType};base64,${base64}` : file.uri;
|
||||
|
||||
attachments.push({
|
||||
type: isImage ? 'image' : 'file',
|
||||
name,
|
||||
size: file.size,
|
||||
uri: imageUri,
|
||||
mimeType,
|
||||
});
|
||||
}
|
||||
|
||||
// Chat-Nachricht mit allen Anhaengen
|
||||
const userMsg: ChatMessage = {
|
||||
id: msgId,
|
||||
sender: 'user',
|
||||
text: 'Anhang empfangen',
|
||||
text: messageText || `${pendingAttachments.length} Anhang/Anhaenge`,
|
||||
timestamp: Date.now(),
|
||||
attachments: [{
|
||||
type: 'image',
|
||||
name: photo.fileName,
|
||||
uri: dataUri,
|
||||
mimeType: photo.type,
|
||||
}],
|
||||
attachments,
|
||||
};
|
||||
setMessages(prev => [...prev, userMsg]);
|
||||
setMessages(prev => capMessages([...prev, userMsg]));
|
||||
|
||||
// Foto auf Disk speichern fuer Persistenz
|
||||
if (photo.base64) {
|
||||
persistAttachment(photo.base64, msgId, photo.fileName).then(filePath => {
|
||||
setMessages(prev => prev.map(m =>
|
||||
m.id === msgId ? { ...m, attachments: m.attachments?.map(a => ({ ...a, uri: filePath })) } : m
|
||||
));
|
||||
}).catch(() => {});
|
||||
// Alle Dateien an RVS senden + auf Disk speichern
|
||||
for (const { file, isPhoto } of pendingAttachments) {
|
||||
const name = isPhoto ? file.fileName : file.name;
|
||||
const base64 = file.base64 || '';
|
||||
const mimeType = file.type || '';
|
||||
|
||||
// Auf Disk speichern
|
||||
if (base64) {
|
||||
persistAttachment(base64, msgId + '_' + name, name).then(filePath => {
|
||||
setMessages(prev => prev.map(m =>
|
||||
m.id === msgId ? { ...m, attachments: m.attachments?.map(a =>
|
||||
a.name === name && !a.uri?.startsWith('file://') ? { ...a, uri: filePath } : a
|
||||
)} : m
|
||||
));
|
||||
}).catch(() => {});
|
||||
}
|
||||
|
||||
// An RVS senden
|
||||
rvs.send('file', {
|
||||
name,
|
||||
type: mimeType,
|
||||
size: file.size,
|
||||
base64,
|
||||
...(isPhoto && file.width && { width: file.width, height: file.height }),
|
||||
...(location && { location }),
|
||||
});
|
||||
}
|
||||
|
||||
rvs.send('file', {
|
||||
name: photo.fileName,
|
||||
type: photo.type,
|
||||
base64: photo.base64,
|
||||
width: photo.width,
|
||||
height: photo.height,
|
||||
...(location && { location }),
|
||||
});
|
||||
}, [getCurrentLocation]);
|
||||
// Text als separate Nachricht (damit ARIA weiss was zu tun ist)
|
||||
if (messageText) {
|
||||
rvs.send('chat', {
|
||||
text: messageText,
|
||||
voice: localXttsVoiceRef.current,
|
||||
...(location && { location }),
|
||||
});
|
||||
}
|
||||
|
||||
setPendingAttachments([]);
|
||||
setInputText('');
|
||||
}, [pendingAttachments, getCurrentLocation]);
|
||||
|
||||
// --- Rendering ---
|
||||
|
||||
@@ -581,6 +684,27 @@ const ChatScreen: React.FC = () => {
|
||||
{item.text}
|
||||
</Text>
|
||||
)}
|
||||
{/* Play-Button fuer ARIA-Nachrichten — Cache bevorzugt, sonst Bridge-TTS mit aktueller Engine */}
|
||||
{!isUser && item.text.length > 0 && (
|
||||
<TouchableOpacity
|
||||
style={styles.playButton}
|
||||
onPress={() => {
|
||||
if (item.audioPath) {
|
||||
audioService.playFromPath(item.audioPath);
|
||||
} else {
|
||||
// messageId mitschicken damit die Bridge das generierte Audio
|
||||
// wieder mit der Nachricht verknuepft (fuer den naechsten Replay aus Cache)
|
||||
rvs.send('tts_request' as any, {
|
||||
text: item.text,
|
||||
voice: localXttsVoiceRef.current,
|
||||
messageId: item.messageId || '',
|
||||
});
|
||||
}
|
||||
}}
|
||||
>
|
||||
<Text style={styles.playButtonText}>{'\uD83D\uDD0A'}</Text>
|
||||
</TouchableOpacity>
|
||||
)}
|
||||
<Text style={styles.timestamp}>{time}</Text>
|
||||
</View>
|
||||
);
|
||||
@@ -603,19 +727,37 @@ const ChatScreen: React.FC = () => {
|
||||
{connectionState === 'connected' ? 'Verbunden' :
|
||||
connectionState === 'connecting' ? 'Verbinde...' : 'Getrennt'}
|
||||
</Text>
|
||||
<TouchableOpacity onPress={() => setSearchVisible(!searchVisible)} style={{marginLeft: 'auto', paddingHorizontal: 8}}>
|
||||
<Text style={{fontSize: 16}}>{'\uD83D\uDD0D'}</Text>
|
||||
</TouchableOpacity>
|
||||
</View>
|
||||
|
||||
{/* Suchleiste */}
|
||||
{searchVisible && (
|
||||
<View style={styles.searchBar}>
|
||||
<TextInput
|
||||
style={styles.searchInput}
|
||||
value={searchQuery}
|
||||
onChangeText={setSearchQuery}
|
||||
placeholder="Chat durchsuchen..."
|
||||
placeholderTextColor="#555570"
|
||||
autoFocus
|
||||
/>
|
||||
<TouchableOpacity onPress={() => { setSearchVisible(false); setSearchQuery(''); }}>
|
||||
<Text style={{color: '#FF3B30', fontSize: 14, paddingHorizontal: 8}}>X</Text>
|
||||
</TouchableOpacity>
|
||||
</View>
|
||||
)}
|
||||
|
||||
{/* Nachrichtenliste */}
|
||||
<FlatList
|
||||
ref={flatListRef}
|
||||
data={messages}
|
||||
inverted
|
||||
data={searchQuery ? messages.filter(m => m.text.toLowerCase().includes(searchQuery.toLowerCase())).reverse() : invertedMessages}
|
||||
keyExtractor={item => item.id}
|
||||
renderItem={renderMessage}
|
||||
contentContainerStyle={styles.messageList}
|
||||
showsVerticalScrollIndicator={false}
|
||||
onContentSizeChange={handleContentSizeChange}
|
||||
onScrollBeginDrag={handleScrollBeginDrag}
|
||||
onScrollEndDrag={handleScrollEndDrag}
|
||||
ListEmptyComponent={
|
||||
<View style={styles.emptyContainer}>
|
||||
<Text style={styles.emptyIcon}>{'\uD83E\uDD16'}</Text>
|
||||
@@ -625,6 +767,56 @@ const ChatScreen: React.FC = () => {
|
||||
}
|
||||
/>
|
||||
|
||||
{/* Thinking-Indicator */}
|
||||
{agentActivity.activity !== 'idle' && (
|
||||
<View style={styles.thinkingBar}>
|
||||
<Text style={styles.thinkingText}>
|
||||
{agentActivity.activity === 'tool' && agentActivity.tool
|
||||
? `\uD83D\uDD27 ${agentActivity.tool}`
|
||||
: agentActivity.activity === 'assistant'
|
||||
? '\u270D\uFE0F ARIA schreibt...'
|
||||
: '\uD83D\uDCAD ARIA denkt...'}
|
||||
</Text>
|
||||
<TouchableOpacity style={styles.thinkingCancel} onPress={cancelRequest}>
|
||||
<Text style={styles.thinkingCancelText}>Abbrechen</Text>
|
||||
</TouchableOpacity>
|
||||
</View>
|
||||
)}
|
||||
|
||||
{/* Pending Anhaenge Vorschau */}
|
||||
{pendingAttachments.length > 0 && (
|
||||
<View style={styles.pendingBar}>
|
||||
<ScrollView horizontal showsHorizontalScrollIndicator={false} style={{flex: 1}}>
|
||||
{pendingAttachments.map((att, idx) => (
|
||||
<View key={idx} style={styles.pendingItem}>
|
||||
{att.file.type?.startsWith('image/') || att.isPhoto ? (
|
||||
<Image
|
||||
source={{ uri: att.file.base64
|
||||
? `data:${att.file.type};base64,${att.file.base64}`
|
||||
: att.file.uri }}
|
||||
style={styles.pendingThumb}
|
||||
/>
|
||||
) : (
|
||||
<View style={[styles.pendingThumb, {justifyContent: 'center', alignItems: 'center'}]}>
|
||||
<Text style={{fontSize: 20}}>{'\uD83D\uDCC4'}</Text>
|
||||
</View>
|
||||
)}
|
||||
<TouchableOpacity
|
||||
style={styles.pendingRemove}
|
||||
onPress={() => setPendingAttachments(prev => prev.filter((_, i) => i !== idx))}
|
||||
>
|
||||
<Text style={{color: '#fff', fontSize: 10, fontWeight: 'bold'}}>X</Text>
|
||||
</TouchableOpacity>
|
||||
</View>
|
||||
))}
|
||||
</ScrollView>
|
||||
<Text style={{color: '#8888AA', fontSize: 11, marginLeft: 8}}>{pendingAttachments.length}</Text>
|
||||
<TouchableOpacity onPress={() => setPendingAttachments([])}>
|
||||
<Text style={{color: '#FF3B30', fontSize: 14, paddingHorizontal: 8}}>Alle X</Text>
|
||||
</TouchableOpacity>
|
||||
</View>
|
||||
)}
|
||||
|
||||
{/* Eingabebereich */}
|
||||
<View style={styles.inputContainer}>
|
||||
{/* Datei-Buttons */}
|
||||
@@ -647,7 +839,7 @@ const ChatScreen: React.FC = () => {
|
||||
style={styles.textInput}
|
||||
value={inputText}
|
||||
onChangeText={setInputText}
|
||||
placeholder="Nachricht an ARIA..."
|
||||
placeholder={pendingAttachments.length > 0 ? "Text zu den Anhaengen (optional)..." : "Nachricht an ARIA..."}
|
||||
placeholderTextColor="#555570"
|
||||
multiline
|
||||
maxLength={4000}
|
||||
@@ -656,7 +848,7 @@ const ChatScreen: React.FC = () => {
|
||||
/>
|
||||
|
||||
{/* Senden oder Sprache */}
|
||||
{inputText.trim() ? (
|
||||
{inputText.trim() || pendingAttachments.length > 0 ? (
|
||||
<TouchableOpacity style={styles.sendButton} onPress={sendTextMessage}>
|
||||
<Text style={styles.sendIcon}>{'\u2B06\uFE0F'}</Text>
|
||||
</TouchableOpacity>
|
||||
@@ -667,6 +859,17 @@ const ChatScreen: React.FC = () => {
|
||||
disabled={connectionState !== 'connected'}
|
||||
wakeWordActive={wakeWordActive}
|
||||
/>
|
||||
{/* Mund-Button: TTS auf diesem Geraet muten/aufheben.
|
||||
Nur sichtbar wenn TTS in den Settings grundsaetzlich aktiv ist. */}
|
||||
{ttsDeviceEnabled && (
|
||||
<TouchableOpacity
|
||||
style={[styles.wakeWordBtn, ttsMuted && styles.mouthBtnMuted]}
|
||||
onPress={toggleMute}
|
||||
accessibilityLabel={ttsMuted ? 'Sprachausgabe einschalten' : 'Sprachausgabe stumm schalten'}
|
||||
>
|
||||
<Text style={styles.wakeWordIcon}>{ttsMuted ? '🤐' : '👄'}</Text>
|
||||
</TouchableOpacity>
|
||||
)}
|
||||
<TouchableOpacity
|
||||
style={[styles.wakeWordBtn, wakeWordActive && styles.wakeWordBtnActive]}
|
||||
onPress={toggleWakeWord}
|
||||
@@ -884,9 +1087,93 @@ const styles = StyleSheet.create({
|
||||
wakeWordBtnActive: {
|
||||
backgroundColor: 'rgba(52, 199, 89, 0.3)',
|
||||
},
|
||||
mouthBtnMuted: {
|
||||
backgroundColor: 'rgba(255, 59, 48, 0.25)',
|
||||
},
|
||||
wakeWordIcon: {
|
||||
fontSize: 16,
|
||||
},
|
||||
thinkingBar: {
|
||||
flexDirection: 'row',
|
||||
alignItems: 'center',
|
||||
justifyContent: 'space-between',
|
||||
backgroundColor: '#1E1E2E',
|
||||
paddingHorizontal: 12,
|
||||
paddingVertical: 6,
|
||||
borderTopWidth: 1,
|
||||
borderTopColor: '#2A2A3E',
|
||||
},
|
||||
thinkingText: {
|
||||
color: '#FFD60A',
|
||||
fontSize: 12,
|
||||
flex: 1,
|
||||
},
|
||||
thinkingCancel: {
|
||||
paddingHorizontal: 10,
|
||||
paddingVertical: 4,
|
||||
borderWidth: 1,
|
||||
borderColor: '#FF3B30',
|
||||
borderRadius: 4,
|
||||
},
|
||||
thinkingCancelText: {
|
||||
color: '#FF3B30',
|
||||
fontSize: 11,
|
||||
fontWeight: 'bold',
|
||||
},
|
||||
pendingBar: {
|
||||
flexDirection: 'row',
|
||||
alignItems: 'center',
|
||||
backgroundColor: '#1E1E2E',
|
||||
paddingHorizontal: 12,
|
||||
paddingVertical: 8,
|
||||
borderTopWidth: 1,
|
||||
borderTopColor: '#2A2A3E',
|
||||
},
|
||||
pendingItem: {
|
||||
position: 'relative',
|
||||
marginRight: 8,
|
||||
},
|
||||
pendingThumb: {
|
||||
width: 50,
|
||||
height: 50,
|
||||
borderRadius: 6,
|
||||
backgroundColor: '#0D0D1A',
|
||||
},
|
||||
pendingRemove: {
|
||||
position: 'absolute',
|
||||
top: -4,
|
||||
right: -4,
|
||||
width: 18,
|
||||
height: 18,
|
||||
borderRadius: 9,
|
||||
backgroundColor: '#FF3B30',
|
||||
justifyContent: 'center',
|
||||
alignItems: 'center',
|
||||
},
|
||||
searchBar: {
|
||||
flexDirection: 'row',
|
||||
alignItems: 'center',
|
||||
backgroundColor: '#12122A',
|
||||
paddingHorizontal: 12,
|
||||
paddingVertical: 6,
|
||||
borderBottomWidth: 1,
|
||||
borderBottomColor: '#1E1E2E',
|
||||
},
|
||||
searchInput: {
|
||||
flex: 1,
|
||||
color: '#FFFFFF',
|
||||
fontSize: 14,
|
||||
paddingVertical: 4,
|
||||
},
|
||||
playButton: {
|
||||
alignSelf: 'flex-end',
|
||||
paddingHorizontal: 8,
|
||||
paddingVertical: 2,
|
||||
marginTop: 4,
|
||||
},
|
||||
playButtonText: {
|
||||
fontSize: 16,
|
||||
},
|
||||
fullscreenOverlay: {
|
||||
flex: 1,
|
||||
backgroundColor: 'rgba(0,0,0,0.95)',
|
||||
|
||||
@@ -22,6 +22,7 @@ import DocumentPicker from 'react-native-document-picker';
|
||||
import rvs, { ConnectionState, RVSMessage, ConnectionConfig, ConnectionLogEntry } from '../services/rvs';
|
||||
import ModeSelector from '../components/ModeSelector';
|
||||
import QRScanner from '../components/QRScanner';
|
||||
import VoiceCloneModal from '../components/VoiceCloneModal';
|
||||
|
||||
const STORAGE_PATH_KEY = 'aria_attachment_storage_path';
|
||||
const DEFAULT_STORAGE_PATH = `${RNFS.DocumentDirectoryPath}/chat_attachments`;
|
||||
@@ -72,9 +73,10 @@ const SettingsScreen: React.FC = () => {
|
||||
const [autoDownload, setAutoDownload] = useState(true);
|
||||
const [storageSize, setStorageSize] = useState('...');
|
||||
const [ttsEnabled, setTtsEnabled] = useState(true);
|
||||
const [defaultVoice, setDefaultVoice] = useState('ramona');
|
||||
const [highlightVoice, setHighlightVoice] = useState('thorsten');
|
||||
const [editingPath, setEditingPath] = useState(false);
|
||||
const [xttsVoice, setXttsVoice] = useState('');
|
||||
const [availableVoices, setAvailableVoices] = useState<Array<{name: string, size: number}>>([]);
|
||||
const [voiceCloneVisible, setVoiceCloneVisible] = useState(false);
|
||||
const [tempPath, setTempPath] = useState('');
|
||||
|
||||
let logIdCounter = 0;
|
||||
@@ -97,12 +99,11 @@ const SettingsScreen: React.FC = () => {
|
||||
AsyncStorage.getItem('aria_tts_enabled').then(saved => {
|
||||
if (saved !== null) setTtsEnabled(saved === 'true');
|
||||
});
|
||||
AsyncStorage.getItem('aria_default_voice').then(saved => {
|
||||
if (saved) setDefaultVoice(saved);
|
||||
});
|
||||
AsyncStorage.getItem('aria_highlight_voice').then(saved => {
|
||||
if (saved) setHighlightVoice(saved);
|
||||
AsyncStorage.getItem('aria_xtts_voice').then(saved => {
|
||||
if (saved) setXttsVoice(saved);
|
||||
});
|
||||
// Voice-Liste vom XTTS-Server holen (via RVS)
|
||||
rvs.send('xtts_list_voices' as any, {});
|
||||
}, []);
|
||||
|
||||
// Speichergroesse berechnen
|
||||
@@ -233,6 +234,22 @@ const SettingsScreen: React.FC = () => {
|
||||
const mode = message.payload.mode as string;
|
||||
if (mode) setCurrentMode(mode);
|
||||
}
|
||||
|
||||
// XTTS-Voice-Liste
|
||||
if (message.type === ('xtts_voices_list' as any)) {
|
||||
const voices = ((message.payload as any).voices || []) as Array<{name: string, size: number}>;
|
||||
setAvailableVoices(voices);
|
||||
}
|
||||
|
||||
// Voice wurde gespeichert → Liste neu laden + ggf. auswaehlen
|
||||
if (message.type === ('xtts_voice_saved' as any)) {
|
||||
const name = (message.payload as any).name as string;
|
||||
if (name) {
|
||||
setXttsVoice(name);
|
||||
AsyncStorage.setItem('aria_xtts_voice', name);
|
||||
}
|
||||
rvs.send('xtts_list_voices' as any, {});
|
||||
}
|
||||
});
|
||||
|
||||
return () => {
|
||||
@@ -296,6 +313,36 @@ const SettingsScreen: React.FC = () => {
|
||||
// In Produktion: Wert in AsyncStorage persistieren
|
||||
}, []);
|
||||
|
||||
// --- XTTS Voice ---
|
||||
|
||||
const selectVoice = useCallback((voiceName: string) => {
|
||||
setXttsVoice(voiceName);
|
||||
AsyncStorage.setItem('aria_xtts_voice', voiceName);
|
||||
}, []);
|
||||
|
||||
const deleteVoice = useCallback((name: string) => {
|
||||
Alert.alert(
|
||||
'Stimme loeschen',
|
||||
`Stimme "${name}" vom Server endgueltig loeschen?\nAlle Apps verlieren sie.`,
|
||||
[
|
||||
{ text: 'Abbrechen', style: 'cancel' },
|
||||
{
|
||||
text: 'Loeschen',
|
||||
style: 'destructive',
|
||||
onPress: () => {
|
||||
rvs.send('xtts_delete_voice' as any, { name });
|
||||
if (xttsVoice === name) {
|
||||
setXttsVoice('');
|
||||
AsyncStorage.setItem('aria_xtts_voice', '');
|
||||
}
|
||||
// Liste nach kurzer Wartezeit neu laden (XTTS-Bridge schickt eh neue Liste)
|
||||
setTimeout(() => rvs.send('xtts_list_voices' as any, {}), 500);
|
||||
},
|
||||
},
|
||||
],
|
||||
);
|
||||
}, [xttsVoice]);
|
||||
|
||||
// --- Modus aendern ---
|
||||
|
||||
const handleModeChange = useCallback((modeId: string) => {
|
||||
@@ -329,6 +376,10 @@ const SettingsScreen: React.FC = () => {
|
||||
onScan={handleQRScan}
|
||||
onClose={() => setScannerVisible(false)}
|
||||
/>
|
||||
<VoiceCloneModal
|
||||
visible={voiceCloneVisible}
|
||||
onClose={() => setVoiceCloneVisible(false)}
|
||||
/>
|
||||
<ScrollView style={styles.container} contentContainerStyle={styles.content}>
|
||||
|
||||
{/* === Verbindung === */}
|
||||
@@ -454,81 +505,86 @@ const SettingsScreen: React.FC = () => {
|
||||
</View>
|
||||
</View>
|
||||
|
||||
{/* === Sprachausgabe === */}
|
||||
{/* === Sprachausgabe (geraetelokal) === */}
|
||||
<Text style={styles.sectionTitle}>Sprachausgabe</Text>
|
||||
<View style={styles.card}>
|
||||
{/* TTS An/Aus */}
|
||||
<View style={styles.toggleRow}>
|
||||
<View style={styles.toggleInfo}>
|
||||
<Text style={styles.toggleLabel}>Sprachausgabe</Text>
|
||||
<Text style={styles.toggleHint}>ARIA antwortet per Sprache (TTS)</Text>
|
||||
<Text style={styles.toggleLabel}>Sprachausgabe auf diesem Geraet</Text>
|
||||
<Text style={styles.toggleHint}>
|
||||
Nur lokal — andere Geraete sind unabhaengig.
|
||||
Wenn aus, erscheint im Chat auch kein Mund-Button.
|
||||
</Text>
|
||||
</View>
|
||||
<Switch
|
||||
value={ttsEnabled}
|
||||
onValueChange={(val) => {
|
||||
setTtsEnabled(val);
|
||||
AsyncStorage.setItem('aria_tts_enabled', String(val));
|
||||
rvs.send('config' as any, { ttsEnabled: val });
|
||||
}}
|
||||
trackColor={{ false: '#2A2A3E', true: '#0096FF' }}
|
||||
thumbColor={ttsEnabled ? '#FFFFFF' : '#666680'}
|
||||
/>
|
||||
</View>
|
||||
|
||||
{/* Standard-Stimme */}
|
||||
<View style={{marginTop: 16}}>
|
||||
<Text style={styles.toggleLabel}>Standard-Stimme</Text>
|
||||
<Text style={styles.toggleHint}>Fuer normale Antworten und Gespraeche</Text>
|
||||
<View style={{flexDirection: 'row', gap: 8, marginTop: 8}}>
|
||||
<TouchableOpacity
|
||||
style={[styles.voiceBtn, defaultVoice === 'ramona' && styles.voiceBtnActive]}
|
||||
onPress={() => { setDefaultVoice('ramona'); AsyncStorage.setItem('aria_default_voice', 'ramona'); }}
|
||||
>
|
||||
<Text style={styles.voiceBtnIcon}>{'\uD83D\uDE4E\u200D\u2640\uFE0F'}</Text>
|
||||
<Text style={[styles.voiceBtnText, defaultVoice === 'ramona' && styles.voiceBtnTextActive]}>Ramona</Text>
|
||||
<Text style={styles.voiceBtnHint}>Weiblich, warm</Text>
|
||||
</TouchableOpacity>
|
||||
<TouchableOpacity
|
||||
style={[styles.voiceBtn, defaultVoice === 'thorsten' && styles.voiceBtnActive]}
|
||||
onPress={() => { setDefaultVoice('thorsten'); AsyncStorage.setItem('aria_default_voice', 'thorsten'); }}
|
||||
>
|
||||
<Text style={styles.voiceBtnIcon}>{'\uD83E\uDDD4'}</Text>
|
||||
<Text style={[styles.voiceBtnText, defaultVoice === 'thorsten' && styles.voiceBtnTextActive]}>Thorsten</Text>
|
||||
<Text style={styles.voiceBtnHint}>Maennlich, tief</Text>
|
||||
</TouchableOpacity>
|
||||
</View>
|
||||
</View>
|
||||
{ttsEnabled && (
|
||||
<View style={{marginTop: 20}}>
|
||||
<Text style={styles.toggleLabel}>Stimme (geraetelokal)</Text>
|
||||
<Text style={styles.toggleHint}>
|
||||
Eigene Wahl fuer dieses Geraet. Ohne Auswahl gilt der Diagnostic-Default.
|
||||
</Text>
|
||||
|
||||
{/* Highlight-Stimme */}
|
||||
<View style={{marginTop: 16}}>
|
||||
<Text style={styles.toggleLabel}>Highlight-Stimme</Text>
|
||||
<Text style={styles.toggleHint}>Fuer besondere Ereignisse (Deploy, Alarm, Erfolg)</Text>
|
||||
<View style={{flexDirection: 'row', gap: 8, marginTop: 8}}>
|
||||
{/* Default-Option */}
|
||||
<TouchableOpacity
|
||||
style={[styles.voiceBtn, highlightVoice === 'thorsten' && styles.voiceBtnActive]}
|
||||
onPress={() => { setHighlightVoice('thorsten'); AsyncStorage.setItem('aria_highlight_voice', 'thorsten'); }}
|
||||
style={[styles.voiceRow, xttsVoice === '' && styles.voiceRowActive]}
|
||||
onPress={() => selectVoice('')}
|
||||
>
|
||||
<Text style={styles.voiceBtnIcon}>{'\uD83E\uDDD4'}</Text>
|
||||
<Text style={[styles.voiceBtnText, highlightVoice === 'thorsten' && styles.voiceBtnTextActive]}>Thorsten</Text>
|
||||
<Text style={[styles.voiceRowName, xttsVoice === '' && styles.voiceRowNameActive]}>
|
||||
Standard (Diagnostic-Default)
|
||||
</Text>
|
||||
{xttsVoice === '' && <Text style={styles.voiceRowCheck}>{'\u2713'}</Text>}
|
||||
</TouchableOpacity>
|
||||
<TouchableOpacity
|
||||
style={[styles.voiceBtn, highlightVoice === 'ramona' && styles.voiceBtnActive]}
|
||||
onPress={() => { setHighlightVoice('ramona'); AsyncStorage.setItem('aria_highlight_voice', 'ramona'); }}
|
||||
>
|
||||
<Text style={styles.voiceBtnIcon}>{'\uD83D\uDE4E\u200D\u2640\uFE0F'}</Text>
|
||||
<Text style={[styles.voiceBtnText, highlightVoice === 'ramona' && styles.voiceBtnTextActive]}>Ramona</Text>
|
||||
</TouchableOpacity>
|
||||
</View>
|
||||
</View>
|
||||
|
||||
{/* Highlight-Trigger Info */}
|
||||
<View style={{marginTop: 16, padding: 10, backgroundColor: '#1E1E2E', borderRadius: 8}}>
|
||||
<Text style={styles.toggleLabel}>{'\u26A1'} Highlight-Trigger</Text>
|
||||
<Text style={[styles.toggleHint, {marginTop: 4}]}>
|
||||
Die Highlight-Stimme wird automatisch bei diesen Woertern verwendet:{'\n'}
|
||||
deploy, erfolgreich, alarm, so soll es sein, kritisch, server down, sicherheitswarnung, ticket geloest, aufgabe abgeschlossen
|
||||
</Text>
|
||||
</View>
|
||||
{availableVoices.length === 0 ? (
|
||||
<Text style={[styles.toggleHint, {marginTop: 8, textAlign: 'center'}]}>
|
||||
Keine eigenen Stimmen auf dem XTTS-Server.
|
||||
</Text>
|
||||
) : (
|
||||
availableVoices.map(v => (
|
||||
<View key={v.name} style={[styles.voiceRow, xttsVoice === v.name && styles.voiceRowActive]}>
|
||||
<TouchableOpacity
|
||||
style={{flex: 1}}
|
||||
onPress={() => selectVoice(v.name)}
|
||||
>
|
||||
<Text style={[styles.voiceRowName, xttsVoice === v.name && styles.voiceRowNameActive]}>
|
||||
{v.name}
|
||||
</Text>
|
||||
<Text style={styles.voiceRowMeta}>{(v.size / 1024).toFixed(0)} KB</Text>
|
||||
</TouchableOpacity>
|
||||
{xttsVoice === v.name && <Text style={styles.voiceRowCheck}>{'\u2713'}</Text>}
|
||||
<TouchableOpacity onPress={() => deleteVoice(v.name)} style={styles.voiceRowDelete}>
|
||||
<Text style={styles.voiceRowDeleteIcon}>X</Text>
|
||||
</TouchableOpacity>
|
||||
</View>
|
||||
))
|
||||
)}
|
||||
|
||||
<View style={{flexDirection: 'row', gap: 8, marginTop: 12}}>
|
||||
<TouchableOpacity
|
||||
style={[styles.connectButton, {flex: 1}]}
|
||||
onPress={() => setVoiceCloneVisible(true)}
|
||||
>
|
||||
<Text style={styles.connectButtonText}>{'\uD83C\uDFA4'} Eigene Stimme aufnehmen</Text>
|
||||
</TouchableOpacity>
|
||||
<TouchableOpacity
|
||||
style={[styles.clearButton, {flex: 0.4, marginTop: 0}]}
|
||||
onPress={() => rvs.send('xtts_list_voices' as any, {})}
|
||||
>
|
||||
<Text style={styles.clearButtonText}>Aktualisieren</Text>
|
||||
</TouchableOpacity>
|
||||
</View>
|
||||
</View>
|
||||
)}
|
||||
</View>
|
||||
|
||||
{/* === Speicher === */}
|
||||
@@ -690,11 +746,21 @@ const SettingsScreen: React.FC = () => {
|
||||
<Text style={styles.sectionTitle}>{'\u00DC'}ber</Text>
|
||||
<View style={styles.card}>
|
||||
<Text style={styles.aboutTitle}>ARIA Cockpit</Text>
|
||||
<Text style={styles.aboutVersion}>Version 0.0.1.9 </Text>
|
||||
<Text style={styles.aboutVersion}>Version {require('../../package.json').version}</Text>
|
||||
<Text style={styles.aboutInfo}>
|
||||
Stefans Kommandozentrale f{'\u00FC'}r ARIA.{'\n'}
|
||||
Gebaut mit React Native + TypeScript.
|
||||
</Text>
|
||||
<TouchableOpacity
|
||||
style={[styles.connectButton, {marginTop: 12}]}
|
||||
onPress={() => {
|
||||
const updateService = require('../services/updater').default;
|
||||
updateService.checkForUpdate();
|
||||
Alert.alert('Update-Check', 'Pruefe auf neue Version...');
|
||||
}}
|
||||
>
|
||||
<Text style={styles.connectButtonText}>Auf Updates pr{'\u00FC'}fen</Text>
|
||||
</TouchableOpacity>
|
||||
</View>
|
||||
|
||||
{/* Platz am Ende */}
|
||||
@@ -833,6 +899,55 @@ const styles = StyleSheet.create({
|
||||
marginTop: 2,
|
||||
},
|
||||
|
||||
// XTTS Voice List
|
||||
voiceRow: {
|
||||
flexDirection: 'row',
|
||||
alignItems: 'center',
|
||||
backgroundColor: '#1E1E2E',
|
||||
borderRadius: 8,
|
||||
padding: 10,
|
||||
marginTop: 6,
|
||||
borderWidth: 1,
|
||||
borderColor: 'transparent',
|
||||
},
|
||||
voiceRowActive: {
|
||||
borderColor: '#0096FF',
|
||||
backgroundColor: '#0D1A2E',
|
||||
},
|
||||
voiceRowName: {
|
||||
color: '#CCCCDD',
|
||||
fontSize: 14,
|
||||
fontWeight: '500',
|
||||
},
|
||||
voiceRowNameActive: {
|
||||
color: '#FFFFFF',
|
||||
},
|
||||
voiceRowMeta: {
|
||||
color: '#666680',
|
||||
fontSize: 11,
|
||||
marginTop: 2,
|
||||
},
|
||||
voiceRowCheck: {
|
||||
color: '#34C759',
|
||||
fontSize: 16,
|
||||
fontWeight: '700',
|
||||
marginHorizontal: 6,
|
||||
},
|
||||
voiceRowDelete: {
|
||||
width: 28,
|
||||
height: 28,
|
||||
borderRadius: 14,
|
||||
backgroundColor: 'rgba(255,59,48,0.2)',
|
||||
alignItems: 'center',
|
||||
justifyContent: 'center',
|
||||
marginLeft: 4,
|
||||
},
|
||||
voiceRowDeleteIcon: {
|
||||
color: '#FF3B30',
|
||||
fontSize: 12,
|
||||
fontWeight: '700',
|
||||
},
|
||||
|
||||
// Stimmen
|
||||
voiceBtn: {
|
||||
flex: 1,
|
||||
|
||||
+431
-32
@@ -6,7 +6,7 @@
|
||||
* Nutzt react-native-audio-recorder-player fuer Aufnahme.
|
||||
*/
|
||||
|
||||
import { Platform, PermissionsAndroid } from 'react-native';
|
||||
import { Platform, PermissionsAndroid, NativeModules } from 'react-native';
|
||||
import Sound from 'react-native-sound';
|
||||
import RNFS from 'react-native-fs';
|
||||
import AudioRecorderPlayer, {
|
||||
@@ -16,6 +16,38 @@ import AudioRecorderPlayer, {
|
||||
OutputFormatAndroidType,
|
||||
} from 'react-native-audio-recorder-player';
|
||||
|
||||
// Base64-Encoder fuer Binary-Strings (Header-Bytes → Base64)
|
||||
const B64_CHARS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/';
|
||||
function btoaSafe(bin: string): string {
|
||||
let out = '';
|
||||
const len = bin.length;
|
||||
for (let i = 0; i < len; i += 3) {
|
||||
const b1 = bin.charCodeAt(i) & 0xff;
|
||||
const b2 = i + 1 < len ? bin.charCodeAt(i + 1) & 0xff : 0;
|
||||
const b3 = i + 2 < len ? bin.charCodeAt(i + 2) & 0xff : 0;
|
||||
out += B64_CHARS[b1 >> 2];
|
||||
out += B64_CHARS[((b1 & 0x03) << 4) | (b2 >> 4)];
|
||||
out += i + 1 < len ? B64_CHARS[((b2 & 0x0f) << 2) | (b3 >> 6)] : '=';
|
||||
out += i + 2 < len ? B64_CHARS[b3 & 0x3f] : '=';
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
// Native Module fuer Audio-Focus (Ducking/Muten anderer Apps)
|
||||
const { AudioFocus, PcmStreamPlayer } = NativeModules as {
|
||||
AudioFocus?: {
|
||||
requestDuck: () => Promise<boolean>;
|
||||
requestExclusive: () => Promise<boolean>;
|
||||
release: () => Promise<boolean>;
|
||||
};
|
||||
PcmStreamPlayer?: {
|
||||
start: (sampleRate: number, channels: number) => Promise<boolean>;
|
||||
writeChunk: (base64Pcm: string) => Promise<boolean>;
|
||||
end: () => Promise<boolean>;
|
||||
stop: () => Promise<boolean>;
|
||||
};
|
||||
};
|
||||
|
||||
// --- Typen ---
|
||||
|
||||
export interface RecordingResult {
|
||||
@@ -42,6 +74,11 @@ const AUDIO_ENCODING = 'audio/wav';
|
||||
// VAD (Voice Activity Detection) — Stille-Erkennung
|
||||
const VAD_SILENCE_THRESHOLD_DB = -45; // dB unter dem als "Stille" gilt
|
||||
const VAD_SILENCE_DURATION_MS = 1800; // ms Stille bevor Auto-Stop
|
||||
const VAD_SPEECH_THRESHOLD_DB = -28; // dB ueber dem als "Sprache" gilt (Sprach-Gate) — hoeher = weniger Umgebungsgeraeusche
|
||||
const VAD_SPEECH_MIN_MS = 500; // ms Sprache bevor Aufnahme zaehlt — laenger = keine Huestler/Klopfer mehr
|
||||
|
||||
// Max-Dauer einer Aufnahme in Gespraechsmodus (Notbremse gegen Runaway-Loops)
|
||||
const MAX_RECORDING_MS = 30000;
|
||||
|
||||
// --- Audio-Service ---
|
||||
|
||||
@@ -55,10 +92,30 @@ class AudioService {
|
||||
private recorder: AudioRecorderPlayer;
|
||||
private recordingPath: string = '';
|
||||
|
||||
// Audio-Queue fuer sequentielle TTS-Wiedergabe
|
||||
private audioQueue: string[] = [];
|
||||
private isPlaying: boolean = false;
|
||||
private preloadedSound: Sound | null = null;
|
||||
private preloadedPath: string = '';
|
||||
|
||||
// Sprach-Gate: Aufnahme erst senden wenn tatsaechlich gesprochen wurde
|
||||
private speechDetected: boolean = false;
|
||||
private speechStartTime: number = 0;
|
||||
|
||||
// PCM-Stream (XTTS): aktive Session + Cache-Puffer pro messageId
|
||||
private pcmStreamActive: boolean = false;
|
||||
private pcmMessageId: string = '';
|
||||
private pcmSampleRate: number = 24000;
|
||||
private pcmChannels: number = 1;
|
||||
private pcmBuffer: string[] = []; // base64-chunks zum spaeteren WAV-Build
|
||||
private pcmBytesCollected: number = 0;
|
||||
private readonly PCM_MAX_CACHE_BYTES = 30 * 1024 * 1024; // 30MB
|
||||
|
||||
// VAD State
|
||||
private vadEnabled: boolean = false;
|
||||
private lastSpeechTime: number = 0;
|
||||
private vadTimer: ReturnType<typeof setInterval> | null = null;
|
||||
private maxDurationTimer: ReturnType<typeof setTimeout> | null = null;
|
||||
|
||||
constructor() {
|
||||
this.recorder = new AudioRecorderPlayer();
|
||||
@@ -108,6 +165,10 @@ class AudioService {
|
||||
// Laufende Wiedergabe stoppen (damit ARIA sich nicht selbst hoert)
|
||||
this.stopPlayback();
|
||||
|
||||
// Aufraeumen: Alte aria_recording_ und aria_tts_ Files loeschen
|
||||
// (Schutz gegen Cache-Ueberlauf im Gespraechsmodus bei vielen Zyklen)
|
||||
this._cleanupStaleCacheFiles().catch(() => {});
|
||||
|
||||
this.recordingPath = `${RNFS.CachesDirectoryPath}/aria_recording_${Date.now()}.mp4`;
|
||||
|
||||
// Aufnahme mit Metering starten
|
||||
@@ -115,6 +176,8 @@ class AudioService {
|
||||
AudioEncoderAndroid: AudioEncoderAndroidType.AAC,
|
||||
AudioSourceAndroid: AudioSourceAndroidType.MIC,
|
||||
OutputFormatAndroid: OutputFormatAndroidType.MPEG_4,
|
||||
AudioSamplingRateAndroid: 16000,
|
||||
AudioChannelsAndroid: 1,
|
||||
}, true); // meteringEnabled = true
|
||||
|
||||
// Metering-Callback
|
||||
@@ -122,7 +185,21 @@ class AudioService {
|
||||
const db = e.currentMetering ?? -160;
|
||||
this.meterListeners.forEach(cb => cb(db));
|
||||
|
||||
// VAD: Stille erkennen
|
||||
// Sprach-Gate: Erkennen ob tatsaechlich gesprochen wird
|
||||
if (db > VAD_SPEECH_THRESHOLD_DB) {
|
||||
if (!this.speechDetected && this.speechStartTime === 0) {
|
||||
this.speechStartTime = Date.now();
|
||||
}
|
||||
if (this.speechStartTime > 0 && Date.now() - this.speechStartTime >= VAD_SPEECH_MIN_MS) {
|
||||
this.speechDetected = true;
|
||||
}
|
||||
} else {
|
||||
if (!this.speechDetected) {
|
||||
this.speechStartTime = 0; // Reset wenn noch nicht als Sprache erkannt
|
||||
}
|
||||
}
|
||||
|
||||
// VAD: Stille erkennen (nur wenn Sprache erkannt wurde)
|
||||
if (this.vadEnabled) {
|
||||
if (db > VAD_SILENCE_THRESHOLD_DB) {
|
||||
this.lastSpeechTime = Date.now();
|
||||
@@ -132,8 +209,13 @@ class AudioService {
|
||||
|
||||
this.recordingStartTime = Date.now();
|
||||
this.lastSpeechTime = Date.now();
|
||||
this.speechDetected = false;
|
||||
this.speechStartTime = 0;
|
||||
this.setState('recording');
|
||||
|
||||
// Andere Apps waehrend der Aufnahme pausieren (Musik, Videos etc.)
|
||||
AudioFocus?.requestExclusive().catch(() => {});
|
||||
|
||||
// VAD aktivieren
|
||||
this.vadEnabled = autoStop;
|
||||
if (autoStop) {
|
||||
@@ -144,6 +226,11 @@ class AudioService {
|
||||
this.silenceListeners.forEach(cb => cb());
|
||||
}
|
||||
}, 200);
|
||||
// Notbremse: Nach MAX_RECORDING_MS zwangsweise stoppen
|
||||
this.maxDurationTimer = setTimeout(() => {
|
||||
console.warn(`[Audio] Max-Dauer ${MAX_RECORDING_MS}ms erreicht — Zwangs-Stop`);
|
||||
this.silenceListeners.forEach(cb => cb());
|
||||
}, MAX_RECORDING_MS);
|
||||
}
|
||||
|
||||
console.log('[Audio] Aufnahme gestartet (autoStop: %s)', autoStop);
|
||||
@@ -168,12 +255,28 @@ class AudioService {
|
||||
clearInterval(this.vadTimer);
|
||||
this.vadTimer = null;
|
||||
}
|
||||
if (this.maxDurationTimer) {
|
||||
clearTimeout(this.maxDurationTimer);
|
||||
this.maxDurationTimer = null;
|
||||
}
|
||||
|
||||
try {
|
||||
await this.recorder.stopRecorder();
|
||||
this.recorder.removeRecordBackListener();
|
||||
|
||||
// Audio-Focus freigeben — andere Apps duerfen wieder
|
||||
AudioFocus?.release().catch(() => {});
|
||||
|
||||
const durationMs = Date.now() - this.recordingStartTime;
|
||||
const hadSpeech = this.speechDetected;
|
||||
|
||||
// Sprach-Gate: Wenn keine Sprache erkannt → Aufnahme verwerfen
|
||||
if (!hadSpeech) {
|
||||
RNFS.unlink(this.recordingPath).catch(() => {});
|
||||
this.setState('idle');
|
||||
console.log('[Audio] Aufnahme verworfen — keine Sprache erkannt (nur Umgebungsgeraeusche)');
|
||||
return null;
|
||||
}
|
||||
|
||||
// Audio-Datei als Base64 lesen
|
||||
const base64Data = await RNFS.readFile(this.recordingPath, 'base64');
|
||||
@@ -182,7 +285,7 @@ class AudioService {
|
||||
RNFS.unlink(this.recordingPath).catch(() => {});
|
||||
|
||||
this.setState('idle');
|
||||
console.log(`[Audio] Aufnahme beendet (${durationMs}ms, ${Math.round(base64Data.length / 1024)}KB)`);
|
||||
console.log(`[Audio] Aufnahme beendet (${durationMs}ms, ${Math.round(base64Data.length / 1024)}KB, Sprache erkannt)`);
|
||||
|
||||
return {
|
||||
base64: base64Data,
|
||||
@@ -198,47 +301,303 @@ class AudioService {
|
||||
|
||||
// --- Wiedergabe ---
|
||||
|
||||
/** Base64-kodiertes Audio abspielen (z.B. TTS-Antwort von ARIA) */
|
||||
/** Base64-kodiertes Audio in die Queue stellen und abspielen */
|
||||
async playAudio(base64Data: string): Promise<void> {
|
||||
if (!base64Data) return;
|
||||
|
||||
// Laufende Wiedergabe stoppen
|
||||
this.stopPlayback();
|
||||
|
||||
try {
|
||||
// Base64 -> temporaere WAV-Datei -> Sound abspielen
|
||||
const tmpPath = `${RNFS.CachesDirectoryPath}/aria_tts_${Date.now()}.wav`;
|
||||
await RNFS.writeFile(tmpPath, base64Data, 'base64');
|
||||
|
||||
this.currentSound = new Sound(tmpPath, '', (error) => {
|
||||
if (error) {
|
||||
console.error('[Audio] Fehler beim Laden:', error);
|
||||
RNFS.unlink(tmpPath).catch(() => {});
|
||||
return;
|
||||
}
|
||||
this.currentSound?.play((success) => {
|
||||
if (success) {
|
||||
console.log('[Audio] Wiedergabe abgeschlossen');
|
||||
} else {
|
||||
console.warn('[Audio] Wiedergabe fehlgeschlagen');
|
||||
}
|
||||
this.currentSound?.release();
|
||||
this.currentSound = null;
|
||||
RNFS.unlink(tmpPath).catch(() => {});
|
||||
});
|
||||
});
|
||||
} catch (err) {
|
||||
console.error('[Audio] Wiedergabefehler:', err);
|
||||
this.audioQueue.push(base64Data);
|
||||
if (!this.isPlaying) {
|
||||
this._playNext();
|
||||
}
|
||||
}
|
||||
|
||||
/** Laufende Wiedergabe stoppen */
|
||||
/** Base64-Audio persistent speichern. Gibt file:// Pfad zurueck (oder leer bei Fehler). */
|
||||
async cacheAudio(base64Data: string, messageId: string): Promise<string> {
|
||||
if (!base64Data || !messageId) return '';
|
||||
try {
|
||||
const dir = `${RNFS.DocumentDirectoryPath}/tts_cache`;
|
||||
await RNFS.mkdir(dir).catch(() => {});
|
||||
const path = `${dir}/${messageId}.wav`;
|
||||
// Wenn Datei schon existiert (z.B. XTTS Chunks) → anhaengen statt ueberschreiben
|
||||
const exists = await RNFS.exists(path);
|
||||
if (exists) {
|
||||
// Bestehende + neue Base64 laden, zusammenkleben (fuer jetzt: ueberschreiben)
|
||||
// XTTS sendet mehrere Chunks — bei mehrfacher Ueberschreibung bleibt nur der letzte
|
||||
// Fuer eine echte Konkatenation muesste WAV-Header gemerged werden
|
||||
await RNFS.writeFile(path, base64Data, 'base64');
|
||||
} else {
|
||||
await RNFS.writeFile(path, base64Data, 'base64');
|
||||
}
|
||||
return `file://${path}`;
|
||||
} catch (err) {
|
||||
console.warn('[Audio] cacheAudio fehlgeschlagen:', err);
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
/** Einen PCM-Chunk aus einer audio_pcm Nachricht empfangen.
|
||||
* silent=true → nur cachen, nicht abspielen (z.B. wenn TTS geraetelokal gemutet).
|
||||
* Gibt bei final=true den Cache-Pfad zurueck (file://) oder '' wenn nicht gecached. */
|
||||
async handlePcmChunk(payload: {
|
||||
base64: string;
|
||||
sampleRate?: number;
|
||||
channels?: number;
|
||||
messageId?: string;
|
||||
chunk?: number;
|
||||
final?: boolean;
|
||||
silent?: boolean;
|
||||
}): Promise<string> {
|
||||
const silent = !!payload.silent;
|
||||
if (!silent && !PcmStreamPlayer) {
|
||||
console.warn('[Audio] PcmStreamPlayer Native Module nicht verfuegbar');
|
||||
return '';
|
||||
}
|
||||
|
||||
const messageId = payload.messageId || '';
|
||||
const sampleRate = payload.sampleRate || 24000;
|
||||
const channels = payload.channels || 1;
|
||||
const base64 = payload.base64 || '';
|
||||
const isFinal = !!payload.final;
|
||||
|
||||
// Neuer Stream? (messageId Wechsel oder nicht aktiv)
|
||||
if (!this.pcmStreamActive || this.pcmMessageId !== messageId) {
|
||||
if (this.pcmStreamActive && !silent) {
|
||||
try { await PcmStreamPlayer!.stop(); } catch {}
|
||||
this.pcmBuffer = [];
|
||||
this.pcmBytesCollected = 0;
|
||||
}
|
||||
this.pcmStreamActive = true;
|
||||
this.pcmMessageId = messageId;
|
||||
this.pcmSampleRate = sampleRate;
|
||||
this.pcmChannels = channels;
|
||||
this.pcmBuffer = [];
|
||||
this.pcmBytesCollected = 0;
|
||||
if (!silent) {
|
||||
try {
|
||||
await PcmStreamPlayer!.start(sampleRate, channels);
|
||||
} catch (err) {
|
||||
console.error('[Audio] PcmStreamPlayer.start fehlgeschlagen:', err);
|
||||
this.pcmStreamActive = false;
|
||||
return '';
|
||||
}
|
||||
AudioFocus?.requestDuck().catch(() => {});
|
||||
}
|
||||
}
|
||||
|
||||
// Chunk — immer cachen, nur bei !silent auch abspielen
|
||||
if (base64) {
|
||||
if (!silent) {
|
||||
try { await PcmStreamPlayer!.writeChunk(base64); } catch (err) { console.warn('[Audio] writeChunk', err); }
|
||||
}
|
||||
if (messageId && this.pcmBytesCollected < this.PCM_MAX_CACHE_BYTES) {
|
||||
this.pcmBuffer.push(base64);
|
||||
this.pcmBytesCollected += Math.floor(base64.length * 0.75);
|
||||
}
|
||||
}
|
||||
|
||||
if (isFinal) {
|
||||
if (!silent) {
|
||||
try { await PcmStreamPlayer!.end(); } catch {}
|
||||
AudioFocus?.release().catch(() => {});
|
||||
}
|
||||
this.pcmStreamActive = false;
|
||||
|
||||
if (messageId && this.pcmBuffer.length > 0) {
|
||||
const audioPath = await this._savePcmBufferAsWav(messageId);
|
||||
this.pcmBuffer = [];
|
||||
this.pcmBytesCollected = 0;
|
||||
this.pcmMessageId = '';
|
||||
return audioPath;
|
||||
}
|
||||
this.pcmMessageId = '';
|
||||
}
|
||||
return '';
|
||||
}
|
||||
|
||||
/** Gesammelte PCM-Chunks als WAV speichern. Gibt file:// Pfad zurueck. */
|
||||
private async _savePcmBufferAsWav(messageId: string): Promise<string> {
|
||||
try {
|
||||
const dir = `${RNFS.DocumentDirectoryPath}/tts_cache`;
|
||||
await RNFS.mkdir(dir).catch(() => {});
|
||||
const path = `${dir}/${messageId}.wav`;
|
||||
|
||||
// WAV-Header fuer PCM s16le
|
||||
const sampleRate = this.pcmSampleRate;
|
||||
const channels = this.pcmChannels;
|
||||
const bitsPerSample = 16;
|
||||
const byteRate = sampleRate * channels * bitsPerSample / 8;
|
||||
const blockAlign = channels * bitsPerSample / 8;
|
||||
const dataSize = this.pcmBytesCollected;
|
||||
const fileSize = 36 + dataSize;
|
||||
|
||||
// Header als Base64 (44 bytes)
|
||||
const header = new Uint8Array(44);
|
||||
const dv = new DataView(header.buffer);
|
||||
// "RIFF"
|
||||
header[0] = 0x52; header[1] = 0x49; header[2] = 0x46; header[3] = 0x46;
|
||||
dv.setUint32(4, fileSize, true);
|
||||
// "WAVE"
|
||||
header[8] = 0x57; header[9] = 0x41; header[10] = 0x56; header[11] = 0x45;
|
||||
// "fmt "
|
||||
header[12] = 0x66; header[13] = 0x6d; header[14] = 0x74; header[15] = 0x20;
|
||||
dv.setUint32(16, 16, true); // fmt chunk size
|
||||
dv.setUint16(20, 1, true); // PCM format
|
||||
dv.setUint16(22, channels, true);
|
||||
dv.setUint32(24, sampleRate, true);
|
||||
dv.setUint32(28, byteRate, true);
|
||||
dv.setUint16(32, blockAlign, true);
|
||||
dv.setUint16(34, bitsPerSample, true);
|
||||
// "data"
|
||||
header[36] = 0x64; header[37] = 0x61; header[38] = 0x74; header[39] = 0x61;
|
||||
dv.setUint32(40, dataSize, true);
|
||||
|
||||
// Header als base64
|
||||
let headerB64 = '';
|
||||
const chunk = 1024;
|
||||
for (let i = 0; i < header.length; i += chunk) {
|
||||
headerB64 += String.fromCharCode(...Array.from(header.slice(i, i + chunk)));
|
||||
}
|
||||
headerB64 = btoaSafe(headerB64);
|
||||
|
||||
// Datei schreiben: Header + alle PCM-Chunks
|
||||
await RNFS.writeFile(path, headerB64, 'base64');
|
||||
for (const b64 of this.pcmBuffer) {
|
||||
await RNFS.appendFile(path, b64, 'base64');
|
||||
}
|
||||
console.log(`[Audio] PCM-Cache geschrieben: ${path} (${(dataSize / 1024).toFixed(0)}KB, ${this.pcmBuffer.length} chunks)`);
|
||||
return `file://${path}`;
|
||||
} catch (err) {
|
||||
console.warn('[Audio] _savePcmBufferAsWav fehlgeschlagen:', err);
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
/** Audio aus lokaler Datei (file:// Pfad) in die Queue und abspielen. */
|
||||
async playFromPath(filePath: string): Promise<void> {
|
||||
if (!filePath) return;
|
||||
try {
|
||||
const cleanPath = filePath.replace(/^file:\/\//, '');
|
||||
if (!(await RNFS.exists(cleanPath))) {
|
||||
console.warn('[Audio] Cache-Datei existiert nicht mehr:', cleanPath);
|
||||
return;
|
||||
}
|
||||
const b64 = await RNFS.readFile(cleanPath, 'base64');
|
||||
this.playAudio(b64);
|
||||
} catch (err) {
|
||||
console.warn('[Audio] playFromPath fehlgeschlagen:', err);
|
||||
}
|
||||
}
|
||||
|
||||
// Callback wenn alle Audio-Teile abgespielt sind
|
||||
private playbackFinishedListeners: (() => void)[] = [];
|
||||
|
||||
onPlaybackFinished(callback: () => void): () => void {
|
||||
this.playbackFinishedListeners.push(callback);
|
||||
return () => {
|
||||
this.playbackFinishedListeners = this.playbackFinishedListeners.filter(cb => cb !== callback);
|
||||
};
|
||||
}
|
||||
|
||||
/** Naechstes Audio aus der Queue abspielen */
|
||||
private async _playNext(): Promise<void> {
|
||||
if (this.audioQueue.length === 0) {
|
||||
this.isPlaying = false;
|
||||
// Audio-Focus abgeben → andere Apps volle Lautstaerke
|
||||
AudioFocus?.release().catch(() => {});
|
||||
// Alle Audio-Teile abgespielt → Listener benachrichtigen
|
||||
this.playbackFinishedListeners.forEach(cb => cb());
|
||||
return;
|
||||
}
|
||||
|
||||
// Beim ersten Playback-Start: andere Apps ducken
|
||||
if (!this.isPlaying) {
|
||||
AudioFocus?.requestDuck().catch(() => {});
|
||||
}
|
||||
this.isPlaying = true;
|
||||
|
||||
// Preloaded Sound verwenden wenn verfuegbar, sonst neu laden
|
||||
let sound: Sound;
|
||||
let soundPath: string;
|
||||
|
||||
if (this.preloadedSound) {
|
||||
sound = this.preloadedSound;
|
||||
soundPath = this.preloadedPath;
|
||||
this.preloadedSound = null;
|
||||
this.preloadedPath = '';
|
||||
// Daten aus Queue entfernen (wurde schon preloaded)
|
||||
this.audioQueue.shift();
|
||||
} else {
|
||||
const base64Data = this.audioQueue.shift()!;
|
||||
try {
|
||||
soundPath = `${RNFS.CachesDirectoryPath}/aria_tts_${Date.now()}.wav`;
|
||||
await RNFS.writeFile(soundPath, base64Data, 'base64');
|
||||
sound = await new Promise<Sound>((resolve, reject) => {
|
||||
const s = new Sound(soundPath, '', (err) => err ? reject(err) : resolve(s));
|
||||
});
|
||||
} catch (err) {
|
||||
console.error('[Audio] Laden fehlgeschlagen:', err);
|
||||
this._playNext();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
this.currentSound = sound;
|
||||
|
||||
// Naechstes Audio schon vorbereiten waehrend dieses abspielt
|
||||
this._preloadNext();
|
||||
|
||||
sound.play((success) => {
|
||||
if (!success) console.warn('[Audio] Wiedergabe fehlgeschlagen');
|
||||
sound.release();
|
||||
this.currentSound = null;
|
||||
RNFS.unlink(soundPath).catch(() => {});
|
||||
this._playNext();
|
||||
});
|
||||
}
|
||||
|
||||
/** Naechstes Audio im Hintergrund vorladen (verhindert Stottern) */
|
||||
private async _preloadNext(): Promise<void> {
|
||||
if (this.audioQueue.length === 0 || this.preloadedSound) return;
|
||||
|
||||
const base64Data = this.audioQueue[0]; // Nicht shift — bleibt in Queue
|
||||
try {
|
||||
const tmpPath = `${RNFS.CachesDirectoryPath}/aria_tts_pre_${Date.now()}.wav`;
|
||||
await RNFS.writeFile(tmpPath, base64Data, 'base64');
|
||||
this.preloadedSound = await new Promise<Sound>((resolve, reject) => {
|
||||
const s = new Sound(tmpPath, '', (err) => err ? reject(err) : resolve(s));
|
||||
});
|
||||
this.preloadedPath = tmpPath;
|
||||
} catch {
|
||||
this.preloadedSound = null;
|
||||
this.preloadedPath = '';
|
||||
}
|
||||
}
|
||||
|
||||
/** Laufende Wiedergabe stoppen + Queue leeren */
|
||||
stopPlayback(): void {
|
||||
this.audioQueue = [];
|
||||
this.isPlaying = false;
|
||||
if (this.currentSound) {
|
||||
this.currentSound.stop();
|
||||
this.currentSound.release();
|
||||
this.currentSound = null;
|
||||
}
|
||||
if (this.preloadedSound) {
|
||||
this.preloadedSound.release();
|
||||
this.preloadedSound = null;
|
||||
if (this.preloadedPath) RNFS.unlink(this.preloadedPath).catch(() => {});
|
||||
this.preloadedPath = '';
|
||||
}
|
||||
// PCM-Stream ebenfalls hart stoppen (Cancel/Abbruch)
|
||||
if (this.pcmStreamActive) {
|
||||
PcmStreamPlayer?.stop().catch(() => {});
|
||||
this.pcmStreamActive = false;
|
||||
this.pcmBuffer = [];
|
||||
this.pcmBytesCollected = 0;
|
||||
this.pcmMessageId = '';
|
||||
}
|
||||
// Audio-Focus freigeben
|
||||
AudioFocus?.release().catch(() => {});
|
||||
}
|
||||
|
||||
// --- Status & Callbacks ---
|
||||
@@ -277,6 +636,46 @@ class AudioService {
|
||||
this.stateListeners.forEach(cb => cb(state));
|
||||
}
|
||||
}
|
||||
|
||||
/** Alte Aufnahme- und TTS-Files aus dem Cache loeschen (>30s alt). */
|
||||
private async _cleanupStaleCacheFiles(): Promise<void> {
|
||||
try {
|
||||
const files = await RNFS.readDir(RNFS.CachesDirectoryPath);
|
||||
const now = Date.now();
|
||||
for (const f of files) {
|
||||
if (!f.isFile()) continue;
|
||||
if (!f.name.startsWith('aria_recording_') && !f.name.startsWith('aria_tts_')) continue;
|
||||
const age = now - (f.mtime ? f.mtime.getTime() : 0);
|
||||
if (age > 30000) {
|
||||
await RNFS.unlink(f.path).catch(() => {});
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// silent — cleanup ist best-effort
|
||||
}
|
||||
}
|
||||
|
||||
/** Alte TTS-Cache-Dateien loeschen die nicht mehr referenziert sind (>30 Tage). */
|
||||
async cleanupOldTTSCache(keepMessageIds: Set<string>, maxAgeDays = 30): Promise<void> {
|
||||
try {
|
||||
const dir = `${RNFS.DocumentDirectoryPath}/tts_cache`;
|
||||
if (!(await RNFS.exists(dir))) return;
|
||||
const files = await RNFS.readDir(dir);
|
||||
const maxAgeMs = maxAgeDays * 24 * 60 * 60 * 1000;
|
||||
const now = Date.now();
|
||||
for (const f of files) {
|
||||
if (!f.isFile() || !f.name.endsWith('.wav')) continue;
|
||||
const messageId = f.name.replace(/\.wav$/, '');
|
||||
const age = now - (f.mtime ? f.mtime.getTime() : 0);
|
||||
// Loeschen wenn: nicht mehr referenziert UND aelter als X Tage
|
||||
if (!keepMessageIds.has(messageId) && age > maxAgeMs) {
|
||||
await RNFS.unlink(f.path).catch(() => {});
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// silent
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Singleton
|
||||
|
||||
@@ -12,7 +12,7 @@ import AsyncStorage from '@react-native-async-storage/async-storage';
|
||||
|
||||
export type ConnectionState = 'connecting' | 'connected' | 'disconnected';
|
||||
|
||||
export type MessageType = 'chat' | 'audio' | 'file' | 'location' | 'mode' | 'log' | 'event';
|
||||
export type MessageType = 'chat' | 'audio' | 'file' | 'location' | 'mode' | 'log' | 'event' | 'update_available' | string;
|
||||
|
||||
export interface RVSMessage {
|
||||
type: MessageType;
|
||||
|
||||
@@ -0,0 +1,158 @@
|
||||
/**
|
||||
* Auto-Update Service — prueft und installiert App-Updates via RVS
|
||||
*
|
||||
* Flow:
|
||||
* 1. App sendet "update_check" mit aktueller Version an RVS
|
||||
* 2. RVS vergleicht → sendet "update_available" mit Download-URL
|
||||
* 3. App zeigt Benachrichtigung → User bestaetigt → Download + Install
|
||||
*/
|
||||
|
||||
import { Alert, Linking, Platform, NativeModules } from 'react-native';
|
||||
import RNFS from 'react-native-fs';
|
||||
import rvs, { RVSMessage } from './rvs';
|
||||
|
||||
// Version aus package.json (wird beim Build eingebettet)
|
||||
const packageJson = require('../../package.json');
|
||||
const APP_VERSION = packageJson.version || '0.0.0.0';
|
||||
|
||||
type UpdateCallback = (info: UpdateInfo) => void;
|
||||
|
||||
export interface UpdateInfo {
|
||||
version: string;
|
||||
downloadUrl: string;
|
||||
size: number;
|
||||
}
|
||||
|
||||
class UpdateService {
|
||||
private listeners: UpdateCallback[] = [];
|
||||
private checking = false;
|
||||
private downloading = false;
|
||||
|
||||
constructor() {
|
||||
// Auf update_available Nachrichten lauschen
|
||||
rvs.onMessage((msg: RVSMessage) => {
|
||||
if (msg.type === 'update_available' as any) {
|
||||
const info: UpdateInfo = {
|
||||
version: (msg.payload.version as string) || '',
|
||||
downloadUrl: (msg.payload.downloadUrl as string) || '',
|
||||
size: (msg.payload.size as number) || 0,
|
||||
};
|
||||
if (info.version && this.isNewer(info.version)) {
|
||||
console.log(`[Update] Neue Version verfuegbar: ${info.version} (aktuell: ${APP_VERSION})`);
|
||||
this.listeners.forEach(cb => cb(info));
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/** Bei App-Start Update pruefen */
|
||||
checkForUpdate(): void {
|
||||
if (this.checking) return;
|
||||
this.checking = true;
|
||||
|
||||
console.log(`[Update] Pruefe auf Updates (aktuell: ${APP_VERSION})`);
|
||||
rvs.send('update_check' as any, { version: APP_VERSION });
|
||||
|
||||
setTimeout(() => { this.checking = false; }, 10000);
|
||||
}
|
||||
|
||||
/** Callback registrieren */
|
||||
onUpdateAvailable(callback: UpdateCallback): () => void {
|
||||
this.listeners.push(callback);
|
||||
return () => {
|
||||
this.listeners = this.listeners.filter(cb => cb !== callback);
|
||||
};
|
||||
}
|
||||
|
||||
/** Update-Dialog anzeigen */
|
||||
promptUpdate(info: UpdateInfo): void {
|
||||
const sizeMB = (info.size / 1024 / 1024).toFixed(1);
|
||||
Alert.alert(
|
||||
'ARIA Update verfuegbar',
|
||||
`Version ${info.version} (${sizeMB} MB)\n\nAktuell: ${APP_VERSION}\n\nJetzt herunterladen und installieren?`,
|
||||
[
|
||||
{ text: 'Spaeter', style: 'cancel' },
|
||||
{
|
||||
text: 'Installieren',
|
||||
onPress: () => this.downloadAndInstall(info),
|
||||
},
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
/** APK ueber WebSocket herunterladen und installieren */
|
||||
async downloadAndInstall(info: UpdateInfo): Promise<void> {
|
||||
if (this.downloading) return;
|
||||
this.downloading = true;
|
||||
|
||||
try {
|
||||
console.log(`[Update] Fordere APK v${info.version} an...`);
|
||||
Alert.alert('Download gestartet', `Version ${info.version} wird ueber RVS heruntergeladen...`);
|
||||
|
||||
// APK ueber WebSocket anfordern
|
||||
rvs.send('update_download' as any, {});
|
||||
|
||||
// Auf update_data warten (einmalig)
|
||||
const apkData = await new Promise<{base64: string, fileName: string}>((resolve, reject) => {
|
||||
const timeout = setTimeout(() => reject(new Error('Download-Timeout (60s)')), 60000);
|
||||
const unsub = rvs.onMessage((msg: RVSMessage) => {
|
||||
if ((msg.type as string) === 'update_data') {
|
||||
clearTimeout(timeout);
|
||||
unsub();
|
||||
if (msg.payload.error) {
|
||||
reject(new Error(msg.payload.error as string));
|
||||
} else {
|
||||
resolve({
|
||||
base64: msg.payload.base64 as string,
|
||||
fileName: msg.payload.fileName as string || `ARIA-${info.version}.apk`,
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// Base64 als APK-Datei speichern
|
||||
const destPath = `${RNFS.CachesDirectoryPath}/${apkData.fileName}`;
|
||||
await RNFS.writeFile(destPath, apkData.base64, 'base64');
|
||||
const fileSize = await RNFS.stat(destPath);
|
||||
console.log(`[Update] APK gespeichert: ${destPath} (${(parseInt(fileSize.size) / 1024 / 1024).toFixed(1)}MB)`);
|
||||
|
||||
// APK installieren via natives ApkInstaller Module (FileProvider + Intent)
|
||||
if (Platform.OS === 'android') {
|
||||
try {
|
||||
const { ApkInstaller } = NativeModules;
|
||||
await ApkInstaller.install(destPath);
|
||||
} catch (installErr: any) {
|
||||
Alert.alert(
|
||||
'APK heruntergeladen',
|
||||
`Version ${info.version} gespeichert.\n\nBitte manuell installieren:\nDateimanager → ${apkData.fileName} antippen.\n\n(${installErr.message})`,
|
||||
);
|
||||
}
|
||||
}
|
||||
} catch (err: any) {
|
||||
console.error(`[Update] Fehler: ${err.message}`);
|
||||
Alert.alert('Update fehlgeschlagen', err.message);
|
||||
} finally {
|
||||
this.downloading = false;
|
||||
}
|
||||
}
|
||||
|
||||
/** Versionsvergleich */
|
||||
private isNewer(remote: string): boolean {
|
||||
const r = remote.split('.').map(Number);
|
||||
const l = APP_VERSION.split('.').map(Number);
|
||||
for (let i = 0; i < Math.max(r.length, l.length); i++) {
|
||||
const diff = (r[i] || 0) - (l[i] || 0);
|
||||
if (diff > 0) return true;
|
||||
if (diff < 0) return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
getCurrentVersion(): string {
|
||||
return APP_VERSION;
|
||||
}
|
||||
}
|
||||
|
||||
const updateService = new UpdateService();
|
||||
export default updateService;
|
||||
@@ -1,21 +1,13 @@
|
||||
/**
|
||||
* Wake Word Service — "ARIA" Erkennung
|
||||
* Gespraechsmodus — "Ohr-Button"
|
||||
*
|
||||
* Nutzt react-native-live-audio-stream fuer kontinuierliches Mikrofon-Monitoring.
|
||||
* Erkennt Sprache per Energie-Schwellwert und sendet kurze Audio-Clips
|
||||
* zur serverseitigen Wake-Word-Pruefung (openwakeword in der Bridge).
|
||||
* Wenn aktiv: Nach jeder ARIA-Antwort (TTS fertig) startet automatisch die Aufnahme.
|
||||
* Wie ein Walkie-Talkie / natuerliches Gespraech:
|
||||
* ARIA spricht → Aufnahme startet → User spricht → VAD stoppt → ARIA antwortet → ...
|
||||
*
|
||||
* Architektur:
|
||||
* App (Mikrofon) → Energie-Erkennung → Audio-Buffer
|
||||
* → RVS "wake_check" → Bridge → openwakeword → Bestaetigung
|
||||
* → App startet Aufnahme
|
||||
*
|
||||
* Aktuell (Phase 1): Einfacher Tap-to-Talk + Auto-Stop.
|
||||
* Spaeter (Phase 2): Porcupine on-device "ARIA" Keyword.
|
||||
* Phase 2 (geplant): Porcupine "ARIA" Wake Word fuer passives Lauschen.
|
||||
*/
|
||||
|
||||
import LiveAudioStream from 'react-native-live-audio-stream';
|
||||
|
||||
type WakeWordCallback = () => void;
|
||||
type StateCallback = (state: WakeWordState) => void;
|
||||
|
||||
@@ -25,72 +17,40 @@ class WakeWordService {
|
||||
private state: WakeWordState = 'off';
|
||||
private wakeCallbacks: WakeWordCallback[] = [];
|
||||
private stateCallbacks: StateCallback[] = [];
|
||||
private isInitialized = false;
|
||||
|
||||
/** Wake Word Erkennung starten */
|
||||
/** Gespraechsmodus starten */
|
||||
async start(): Promise<boolean> {
|
||||
if (this.state === 'listening') return true;
|
||||
|
||||
try {
|
||||
if (!this.isInitialized) {
|
||||
LiveAudioStream.init({
|
||||
sampleRate: 16000,
|
||||
channels: 1,
|
||||
bitsPerSample: 16,
|
||||
audioSource: 6, // VOICE_RECOGNITION
|
||||
bufferSize: 4096,
|
||||
});
|
||||
this.isInitialized = true;
|
||||
console.log('[WakeWord] Gespraechsmodus aktiviert — starte sofort Aufnahme');
|
||||
this.setState('listening');
|
||||
// Sofort erste Aufnahme starten
|
||||
setTimeout(() => {
|
||||
if (this.state === 'listening') {
|
||||
this.wakeCallbacks.forEach(cb => cb());
|
||||
}
|
||||
}, 500);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Audio-Stream starten und auf Energie pruefen
|
||||
LiveAudioStream.start();
|
||||
/** Gespraechsmodus stoppen */
|
||||
stop(): void {
|
||||
console.log('[WakeWord] Gespraechsmodus deaktiviert');
|
||||
this.setState('off');
|
||||
}
|
||||
|
||||
LiveAudioStream.on('data', (base64Chunk: string) => {
|
||||
if (this.state !== 'listening') return;
|
||||
|
||||
// Base64 → Int16 Array → RMS berechnen
|
||||
const raw = this._base64ToInt16(base64Chunk);
|
||||
const rms = this._calculateRMS(raw);
|
||||
|
||||
// Schwellwert: wenn laut genug → Wake Word erkannt
|
||||
// Phase 1: Einfache Energie-Erkennung (jemand spricht)
|
||||
// Phase 2: Porcupine "ARIA" Keyword
|
||||
if (rms > 2000) {
|
||||
this.setState('detected');
|
||||
this.wakeCallbacks.forEach(cb => cb());
|
||||
// Nach Detection kurz pausieren, Aufnahme uebernimmt das Mikrofon
|
||||
this.stop();
|
||||
}
|
||||
});
|
||||
|
||||
this.setState('listening');
|
||||
console.log('[WakeWord] Listening gestartet');
|
||||
return true;
|
||||
} catch (err) {
|
||||
console.error('[WakeWord] Start fehlgeschlagen:', err);
|
||||
return false;
|
||||
/** Nach ARIA-Antwort (TTS fertig): Aufnahme automatisch starten */
|
||||
async resume(): Promise<void> {
|
||||
if (this.state !== 'listening') return;
|
||||
// Kurze Pause damit TTS-Audio nicht ins Mikrofon geht
|
||||
await new Promise(resolve => setTimeout(resolve, 800));
|
||||
if (this.state === 'listening') {
|
||||
console.log('[WakeWord] TTS fertig — starte automatisch Aufnahme');
|
||||
this.wakeCallbacks.forEach(cb => cb());
|
||||
}
|
||||
}
|
||||
|
||||
/** Wake Word Erkennung stoppen */
|
||||
stop(): void {
|
||||
if (this.state === 'off') return;
|
||||
try {
|
||||
LiveAudioStream.stop();
|
||||
} catch {}
|
||||
this.setState('off');
|
||||
console.log('[WakeWord] Gestoppt');
|
||||
}
|
||||
|
||||
/** Nach Aufnahme erneut starten */
|
||||
async resume(): Promise<void> {
|
||||
// Kurze Pause damit Aufnahme das Mikrofon freigeben kann
|
||||
setTimeout(() => {
|
||||
if (this.state === 'off') {
|
||||
this.start();
|
||||
}
|
||||
}, 500);
|
||||
isActive(): boolean {
|
||||
return this.state === 'listening';
|
||||
}
|
||||
|
||||
// --- Callbacks ---
|
||||
@@ -113,32 +73,12 @@ class WakeWordService {
|
||||
return this.state;
|
||||
}
|
||||
|
||||
// --- Hilfsfunktionen ---
|
||||
|
||||
private setState(state: WakeWordState): void {
|
||||
if (this.state !== state) {
|
||||
this.state = state;
|
||||
this.stateCallbacks.forEach(cb => cb(state));
|
||||
}
|
||||
}
|
||||
|
||||
private _base64ToInt16(base64: string): Int16Array {
|
||||
const binary = atob(base64);
|
||||
const bytes = new Uint8Array(binary.length);
|
||||
for (let i = 0; i < binary.length; i++) {
|
||||
bytes[i] = binary.charCodeAt(i);
|
||||
}
|
||||
return new Int16Array(bytes.buffer);
|
||||
}
|
||||
|
||||
private _calculateRMS(samples: Int16Array): number {
|
||||
if (samples.length === 0) return 0;
|
||||
let sum = 0;
|
||||
for (let i = 0; i < samples.length; i++) {
|
||||
sum += samples[i] * samples[i];
|
||||
}
|
||||
return Math.sqrt(sum / samples.length);
|
||||
}
|
||||
}
|
||||
|
||||
const wakeWordService = new WakeWordService();
|
||||
|
||||
@@ -3,9 +3,12 @@
|
||||
# → localhost ist aria-core
|
||||
ARIA_CORE_WS=ws://127.0.0.1:18789
|
||||
|
||||
# Piper TTS Stimmen
|
||||
PIPER_RAMONA=/voices/de_DE-ramona-low.onnx
|
||||
PIPER_THORSTEN=/voices/de_DE-thorsten-high.onnx
|
||||
|
||||
# Wake-Word
|
||||
WAKE_WORD=aria
|
||||
|
||||
# Whisper STT — wird zur Laufzeit in der Diagnostic (Sektion "Whisper") umgeschaltet
|
||||
# und in /shared/config/voice_config.json gespeichert. Der Wert hier ist nur der
|
||||
# Initial-Default beim ersten Start.
|
||||
# Optionen: tiny | base | small | medium | large-v3
|
||||
WHISPER_MODEL=medium
|
||||
WHISPER_LANGUAGE=de
|
||||
|
||||
+1
-1
@@ -1,6 +1,6 @@
|
||||
# ════════════════════════════════════════════════
|
||||
# ARIA Voice Bridge — Dockerfile
|
||||
# Whisper STT + Piper TTS + Wake-Word
|
||||
# Whisper STT + Wake-Word (TTS via XTTS v2 remote)
|
||||
# ════════════════════════════════════════════════
|
||||
|
||||
FROM python:3.12-slim
|
||||
|
||||
+521
-184
@@ -26,7 +26,6 @@ import ssl
|
||||
import sys
|
||||
import tempfile
|
||||
import uuid
|
||||
import wave
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
@@ -37,9 +36,8 @@ import sounddevice as sd
|
||||
import websockets
|
||||
from faster_whisper import WhisperModel
|
||||
from openwakeword.model import Model as WakeWordModel
|
||||
from piper import PiperVoice
|
||||
|
||||
from modes import Mode, detect_mode_switch, should_speak
|
||||
from modes import Mode, canonical_id, detect_mode_switch, mode_from_id, should_speak
|
||||
|
||||
# ── Logging ──────────────────────────────────────────────────
|
||||
|
||||
@@ -62,7 +60,7 @@ RVS_TLS = os.getenv("RVS_TLS", "true") # true = wss://, false = ws://
|
||||
RVS_TLS_FALLBACK = os.getenv("RVS_TLS_FALLBACK", "true") # Bei TLS-Fehler ws:// versuchen
|
||||
RVS_TOKEN = os.getenv("RVS_TOKEN", "") # Pairing-Token (gleich wie in der App)
|
||||
DIAGNOSTIC_URL = os.getenv("DIAGNOSTIC_URL", "http://127.0.0.1:3001") # Diagnostic API
|
||||
WHISPER_MODEL = os.getenv("WHISPER_MODEL", "small")
|
||||
WHISPER_MODEL = os.getenv("WHISPER_MODEL", "medium")
|
||||
WHISPER_LANGUAGE = os.getenv("WHISPER_LANGUAGE", "de")
|
||||
|
||||
# Audio-Parameter
|
||||
@@ -71,22 +69,15 @@ CHANNELS = 1
|
||||
BLOCK_SIZE = 1280 # 80ms bei 16kHz — gut fuer Wake-Word-Erkennung
|
||||
RECORD_SECONDS = 8 # Max. Aufnahmedauer nach Wake-Word
|
||||
|
||||
# Epische Trigger — bei diesen Woertern spricht Thorsten
|
||||
EPIC_TRIGGERS = [
|
||||
"deploy",
|
||||
"erfolgreich",
|
||||
"alarm",
|
||||
"so soll es sein",
|
||||
"kritisch",
|
||||
"server down",
|
||||
"sicherheitswarnung",
|
||||
"ticket geloest",
|
||||
"aufgabe abgeschlossen",
|
||||
]
|
||||
|
||||
|
||||
def load_config() -> dict[str, str]:
|
||||
"""Laedt Konfiguration aus /config/aria.env."""
|
||||
"""Laedt Konfiguration.
|
||||
|
||||
Reihenfolge (hoechste Prioritaet zuletzt):
|
||||
1. /config/aria.env (bind-mount)
|
||||
2. /shared/config/runtime.json (zentral gepflegt ueber Diagnostic UI)
|
||||
|
||||
Werte aus runtime.json ueberschreiben die env-Datei.
|
||||
"""
|
||||
config: dict[str, str] = {}
|
||||
if CONFIG_PATH.exists():
|
||||
for line in CONFIG_PATH.read_text().splitlines():
|
||||
@@ -99,153 +90,169 @@ def load_config() -> dict[str, str]:
|
||||
logger.info("Konfiguration geladen aus %s", CONFIG_PATH)
|
||||
else:
|
||||
logger.warning("Keine Konfiguration gefunden: %s", CONFIG_PATH)
|
||||
|
||||
# Runtime-Overrides aus zentralem Shared-Volume (Diagnostic UI)
|
||||
runtime_path = Path("/shared/config/runtime.json")
|
||||
if runtime_path.exists():
|
||||
try:
|
||||
runtime = json.loads(runtime_path.read_text())
|
||||
overrides = {k: str(v) for k, v in runtime.items() if v not in (None, "")}
|
||||
if overrides:
|
||||
config.update(overrides)
|
||||
logger.info("Runtime-Overrides geladen: %s", sorted(overrides.keys()))
|
||||
except Exception as e:
|
||||
logger.warning("runtime.json konnte nicht gelesen werden: %s", e)
|
||||
return config
|
||||
|
||||
|
||||
# ── Voice Engine ─────────────────────────────────────────────
|
||||
|
||||
|
||||
class VoiceEngine:
|
||||
"""Verwaltet Piper TTS mit zwei Stimmen: Ramona und Thorsten."""
|
||||
import re as _re_tts
|
||||
|
||||
def __init__(self, voices_dir: Path) -> None:
|
||||
self.voices_dir = voices_dir
|
||||
self.voices: dict[str, PiperVoice] = {}
|
||||
_NUM_WORDS_DE = {
|
||||
0: "null", 1: "eins", 2: "zwei", 3: "drei", 4: "vier", 5: "fuenf",
|
||||
6: "sechs", 7: "sieben", 8: "acht", 9: "neun", 10: "zehn",
|
||||
11: "elf", 12: "zwoelf", 13: "dreizehn", 14: "vierzehn", 15: "fuenfzehn",
|
||||
16: "sechzehn", 17: "siebzehn", 18: "achtzehn", 19: "neunzehn", 20: "zwanzig",
|
||||
}
|
||||
_TENS_DE = {30: "dreissig", 40: "vierzig", 50: "fuenfzig"}
|
||||
|
||||
def initialize(self) -> None:
|
||||
"""Laedt die Piper-Stimmen aus dem Voices-Verzeichnis."""
|
||||
voice_configs = {
|
||||
"ramona": "de_DE-ramona-low",
|
||||
"thorsten": "de_DE-thorsten-high",
|
||||
}
|
||||
|
||||
for name, model_name in voice_configs.items():
|
||||
model_path = self.voices_dir / f"{model_name}.onnx"
|
||||
config_path = self.voices_dir / f"{model_name}.onnx.json"
|
||||
def _num_to_words_de(n: int) -> str:
|
||||
"""Zahlen 0-59 als deutsches Wort — fuer Uhrzeiten und kleine Bereiche."""
|
||||
if n in _NUM_WORDS_DE:
|
||||
return _NUM_WORDS_DE[n]
|
||||
if 21 <= n <= 29:
|
||||
return f"{_NUM_WORDS_DE[n - 20]}undzwanzig"
|
||||
if 30 <= n <= 59:
|
||||
tens = (n // 10) * 10
|
||||
ones = n % 10
|
||||
tens_word = _TENS_DE.get(tens, str(tens))
|
||||
if ones == 0:
|
||||
return tens_word
|
||||
return f"{_NUM_WORDS_DE.get(ones, str(ones))}und{tens_word}"
|
||||
return str(n)
|
||||
|
||||
if not model_path.exists():
|
||||
logger.error("Stimme nicht gefunden: %s", model_path)
|
||||
continue
|
||||
|
||||
self.voices[name] = PiperVoice.load(
|
||||
str(model_path),
|
||||
config_path=str(config_path) if config_path.exists() else None,
|
||||
)
|
||||
logger.info("Stimme geladen: %s (%s)", name, model_name)
|
||||
def _time_range_to_words(m):
|
||||
"""'8:00-9:00 Uhr' → 'acht bis neun Uhr', '8-9 Uhr' → 'acht bis neun Uhr'."""
|
||||
h1 = int(m.group(1))
|
||||
h2 = int(m.group(3))
|
||||
return f"{_num_to_words_de(h1)} bis {_num_to_words_de(h2)} Uhr"
|
||||
|
||||
if not self.voices:
|
||||
logger.error("Keine Stimmen geladen — TTS deaktiviert")
|
||||
|
||||
def select_voice(
|
||||
self, text: str, requested_voice: Optional[str] = None
|
||||
) -> str:
|
||||
"""Waehlt die passende Stimme basierend auf Text oder Anfrage.
|
||||
def _small_range_to_words(m):
|
||||
"""'5-6' → 'fuenf bis sechs' (nur wenn beide Zahlen ≤ 24)."""
|
||||
a, b = int(m.group(1)), int(m.group(2))
|
||||
if a > 24 or b > 24 or a >= b:
|
||||
return m.group(0)
|
||||
return f"{_num_to_words_de(a)} bis {_num_to_words_de(b)}"
|
||||
|
||||
Thorsten wird bei epischen Triggern verwendet,
|
||||
sonst Ramona als Standardstimme.
|
||||
|
||||
Args:
|
||||
text: Der zu sprechende Text (fuer Epic-Trigger-Erkennung).
|
||||
requested_voice: Explizit angeforderte Stimme ("ramona" | "thorsten").
|
||||
_UNIT_WORDS = [
|
||||
(r'\bTB\b', 'Terabyte'),
|
||||
(r'\bGB\b', 'Gigabyte'),
|
||||
(r'\bMB\b', 'Megabyte'),
|
||||
(r'\bKB\b', 'Kilobyte'),
|
||||
(r'\bkB\b', 'Kilobyte'),
|
||||
(r'\bms\b', 'Millisekunden'),
|
||||
(r'\bkm/h\b', 'Kilometer pro Stunde'),
|
||||
(r'\bkm\b', 'Kilometer'),
|
||||
(r'\bm/s\b', 'Meter pro Sekunde'),
|
||||
(r'\bkg\b', 'Kilogramm'),
|
||||
(r'\b°C\b', 'Grad Celsius'),
|
||||
(r'°C', ' Grad Celsius'),
|
||||
(r'\bMbps\b', 'Megabit pro Sekunde'),
|
||||
(r'\bGbps\b', 'Gigabit pro Sekunde'),
|
||||
(r'\bMhz\b|\bMHz\b', 'Megahertz'),
|
||||
(r'\bGhz\b|\bGHz\b', 'Gigahertz'),
|
||||
(r'%', ' Prozent'),
|
||||
(r'\bCPU\b', 'C P U'),
|
||||
(r'\bGPU\b', 'G P U'),
|
||||
(r'\bRAM\b', 'R A M'),
|
||||
(r'\bSSD\b', 'S S D'),
|
||||
(r'\bHDD\b', 'H D D'),
|
||||
(r'\bURL\b', 'U R L'),
|
||||
(r'\bAPI\b', 'A P I'),
|
||||
(r'\bRVS\b', 'R V S'),
|
||||
(r'\bSSH\b', 'S S H'),
|
||||
(r'\bVM\b', 'V M'),
|
||||
(r'\bUI\b', 'U I'),
|
||||
(r'\bTTS\b', 'T T S'),
|
||||
(r'\bSTT\b', 'S T T'),
|
||||
(r'\bTLS\b', 'T L S'),
|
||||
]
|
||||
|
||||
Returns:
|
||||
Name der gewaehlten Stimme.
|
||||
"""
|
||||
if requested_voice and requested_voice in self.voices:
|
||||
return requested_voice
|
||||
|
||||
# Epische Trigger pruefen
|
||||
text_lower = text.lower()
|
||||
for trigger in EPIC_TRIGGERS:
|
||||
if trigger in text_lower:
|
||||
logger.info("Epischer Trigger erkannt: '%s' — Thorsten spricht", trigger)
|
||||
return "thorsten"
|
||||
def clean_text_for_tts(text: str) -> str:
|
||||
"""Bereitet Chat-Text fuer Sprachausgabe auf.
|
||||
|
||||
return "ramona"
|
||||
- `<voice>...</voice>` Tag: wenn vorhanden, NUR dieser Inhalt wird gelesen
|
||||
- Code-Bloecke (```...``` und `...`) werden komplett entfernt
|
||||
- Markdown (Fett, Kursiv, Links, Headings, Listen, Zitate) wird abgeraeumt
|
||||
- Einheiten und gaengige Abkuerzungen werden ausgeschrieben (22GB → 22 Gigabyte)
|
||||
- URLs werden durch "ein Link" ersetzt
|
||||
- Mehrfach-Leerzeichen/Umbrueche normalisiert
|
||||
"""
|
||||
if not text:
|
||||
return ""
|
||||
|
||||
def synthesize(self, text: str, voice_name: str = "ramona") -> Optional[bytes]:
|
||||
"""Erzeugt Audio-Daten aus Text mit der gewaehlten Stimme.
|
||||
# <voice>...</voice> wenn vorhanden → nur das nehmen
|
||||
voice_match = _re_tts.search(r'<voice>([\s\S]*?)</voice>', text, _re_tts.IGNORECASE)
|
||||
if voice_match:
|
||||
text = voice_match.group(1)
|
||||
|
||||
Args:
|
||||
text: Der zu sprechende Text.
|
||||
voice_name: Name der Stimme ("ramona" oder "thorsten").
|
||||
t = text
|
||||
|
||||
Returns:
|
||||
WAV-Audiodaten als bytes oder None bei Fehler.
|
||||
"""
|
||||
voice = self.voices.get(voice_name)
|
||||
if voice is None:
|
||||
logger.error("Stimme '%s' nicht verfuegbar", voice_name)
|
||||
return None
|
||||
# Code-Bloecke komplett raus (Zeilenumbruch statt Platzhalter — sonst bricht Satzlogik)
|
||||
t = _re_tts.sub(r'```[\s\S]*?```', '. ', t)
|
||||
t = _re_tts.sub(r'`[^`]+`', '', t)
|
||||
|
||||
try:
|
||||
# Piper gibt PCM-Samples zurueck, wir schreiben sie als WAV
|
||||
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
|
||||
tmp_path = tmp.name
|
||||
# Markdown
|
||||
t = _re_tts.sub(r'\*\*([^*]+)\*\*', r'\1', t)
|
||||
t = _re_tts.sub(r'\*([^*]+)\*', r'\1', t)
|
||||
t = _re_tts.sub(r'__([^_]+)__', r'\1', t)
|
||||
t = _re_tts.sub(r'\[([^\]]+)\]\((https?://[^)]+)\)', r'\1, ein Link', t)
|
||||
t = _re_tts.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', t)
|
||||
t = _re_tts.sub(r'https?://\S+', 'ein Link', t)
|
||||
t = _re_tts.sub(r'^#{1,6}\s*', '', t, flags=_re_tts.MULTILINE)
|
||||
t = _re_tts.sub(r'^>\s*', '', t, flags=_re_tts.MULTILINE)
|
||||
t = _re_tts.sub(r'^[\-\*]\s+', '', t, flags=_re_tts.MULTILINE)
|
||||
|
||||
with wave.open(tmp_path, "wb") as wav_file:
|
||||
wav_file.setnchannels(1)
|
||||
wav_file.setsampwidth(2) # 16-bit
|
||||
wav_file.setframerate(voice.config.sample_rate)
|
||||
voice.synthesize(text, wav_file)
|
||||
# Zeitbereiche: "8:00-9:00 Uhr" / "8-9 Uhr" → "acht bis neun Uhr"
|
||||
t = _re_tts.sub(r'\b(\d{1,2})(:\d{2})?\s*[-–]\s*(\d{1,2})(:\d{2})?\s*Uhr\b', _time_range_to_words, t)
|
||||
# Uhrzeiten mit Minuten: "8:30 Uhr" → "acht Uhr dreissig", "8:00 Uhr" → "acht Uhr"
|
||||
def _single_time(m):
|
||||
h = int(m.group(1))
|
||||
mn = int(m.group(2)) if m.group(2) else 0
|
||||
words = _num_to_words_de(h) + " Uhr"
|
||||
if mn > 0:
|
||||
words += " " + _num_to_words_de(mn)
|
||||
return words
|
||||
t = _re_tts.sub(r'\b(\d{1,2}):(\d{2})\s*Uhr\b', _single_time, t)
|
||||
# Volle Uhrzeiten ohne ":" — "15 Uhr" → "fuenfzehn Uhr"
|
||||
t = _re_tts.sub(r'\b(\d{1,2})\s+Uhr\b', lambda m: f"{_num_to_words_de(int(m.group(1)))} Uhr", t)
|
||||
# Kleine Zahlen-Bereiche ohne "Uhr": "5-6" → "fuenf bis sechs"
|
||||
t = _re_tts.sub(r'\b(\d{1,2})\s*[-–]\s*(\d{1,2})\b', _small_range_to_words, t)
|
||||
|
||||
audio_data = Path(tmp_path).read_bytes()
|
||||
Path(tmp_path).unlink(missing_ok=True)
|
||||
# Zahlen + Einheit: "22GB" → "22 Gigabyte" (Leerzeichen einfuegen)
|
||||
t = _re_tts.sub(r'(\d+)([A-Za-z]{1,4})\b', r'\1 \2', t)
|
||||
|
||||
logger.info(
|
||||
"TTS: %d bytes erzeugt mit %s — '%s'",
|
||||
len(audio_data),
|
||||
voice_name,
|
||||
text[:60],
|
||||
)
|
||||
return audio_data
|
||||
# Einheiten/Abkuerzungen ausschreiben
|
||||
for pat, repl in _UNIT_WORDS:
|
||||
t = _re_tts.sub(pat, repl, t)
|
||||
|
||||
except Exception:
|
||||
logger.exception("TTS-Fehler bei Stimme '%s'", voice_name)
|
||||
return None
|
||||
# Anfuehrungszeichen
|
||||
t = _re_tts.sub(r'["""„`]', '', t)
|
||||
|
||||
def speak(self, text: str, requested_voice: Optional[str] = None) -> None:
|
||||
"""Spricht den Text ueber das Audio-Geraet.
|
||||
# Absaetze/Zeilenumbrueche normalisieren
|
||||
t = _re_tts.sub(r'\n{2,}', '. ', t)
|
||||
t = _re_tts.sub(r'\n', ', ', t)
|
||||
t = _re_tts.sub(r'\s{2,}', ' ', t)
|
||||
t = _re_tts.sub(r'\s*\.\s*\.\s*', '. ', t)
|
||||
|
||||
Waehlt automatisch die passende Stimme und gibt das Audio aus.
|
||||
|
||||
Args:
|
||||
text: Der zu sprechende Text.
|
||||
requested_voice: Optionale explizite Stimmenwahl.
|
||||
"""
|
||||
voice_name = self.select_voice(text, requested_voice)
|
||||
audio_data = self.synthesize(text, voice_name)
|
||||
|
||||
if audio_data is None:
|
||||
return
|
||||
|
||||
try:
|
||||
# WAV-Daten lesen und ueber sounddevice abspielen
|
||||
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
|
||||
tmp.write(audio_data)
|
||||
tmp_path = tmp.name
|
||||
|
||||
with wave.open(tmp_path, "rb") as wf:
|
||||
frames = wf.readframes(wf.getnframes())
|
||||
sample_width = wf.getsampwidth()
|
||||
rate = wf.getframerate()
|
||||
channels = wf.getnchannels()
|
||||
|
||||
Path(tmp_path).unlink(missing_ok=True)
|
||||
|
||||
# Numpy-Array aus PCM-Daten
|
||||
dtype_map = {1: np.int8, 2: np.int16, 4: np.int32}
|
||||
dtype = dtype_map.get(sample_width, np.int16)
|
||||
audio_array = np.frombuffer(frames, dtype=dtype)
|
||||
|
||||
if channels > 1:
|
||||
audio_array = audio_array.reshape(-1, channels)
|
||||
|
||||
sd.play(audio_array, samplerate=rate)
|
||||
sd.wait() # Warten bis Wiedergabe fertig
|
||||
|
||||
except Exception:
|
||||
logger.exception("Audio-Wiedergabe fehlgeschlagen")
|
||||
return t.strip()
|
||||
|
||||
|
||||
# ── STT Engine ───────────────────────────────────────────────
|
||||
@@ -269,6 +276,25 @@ class STTEngine:
|
||||
self.model = WhisperModel(self.model_size, device="cpu", compute_type="int8")
|
||||
logger.info("Whisper-Modell geladen")
|
||||
|
||||
def reload(self, model_size: str) -> bool:
|
||||
"""Laedt ein anderes Whisper-Modell (bei Config-Aenderung)."""
|
||||
if model_size == self.model_size and self.model is not None:
|
||||
return False
|
||||
allowed = {"tiny", "base", "small", "medium", "large-v3"}
|
||||
if model_size not in allowed:
|
||||
logger.warning("Ungueltiges Whisper-Modell: %s (erlaubt: %s)", model_size, allowed)
|
||||
return False
|
||||
logger.info("Lade Whisper-Modell neu: %s -> %s", self.model_size, model_size)
|
||||
self.model_size = model_size
|
||||
self.model = None
|
||||
try:
|
||||
self.model = WhisperModel(model_size, device="cpu", compute_type="int8")
|
||||
logger.info("Whisper-Modell '%s' geladen", model_size)
|
||||
return True
|
||||
except Exception:
|
||||
logger.exception("Whisper-Modell '%s' konnte nicht geladen werden", model_size)
|
||||
return False
|
||||
|
||||
def transcribe(self, audio_data: np.ndarray) -> str:
|
||||
"""Transkribiert Audio-Daten zu Text.
|
||||
|
||||
@@ -435,13 +461,29 @@ class ARIABridge:
|
||||
else:
|
||||
self.rvs_url = ""
|
||||
self.rvs_url_fallback = ""
|
||||
self.current_mode = Mode.NORMAL
|
||||
# Mode aus Shared Config laden (persistiert ueber Container-Restarts)
|
||||
self.current_mode = self._load_persisted_mode()
|
||||
self.running = False
|
||||
|
||||
# Komponenten
|
||||
self.voice_engine = VoiceEngine(VOICES_DIR)
|
||||
# Komponenten (TTS: immer XTTS remote, Piper wurde entfernt)
|
||||
self.tts_enabled = True
|
||||
self.xtts_voice = ""
|
||||
vc: dict = {}
|
||||
# Gespeicherte Voice-Config laden
|
||||
try:
|
||||
vc_path = "/shared/config/voice_config.json"
|
||||
if os.path.exists(vc_path):
|
||||
with open(vc_path) as f:
|
||||
vc = json.load(f)
|
||||
self.tts_enabled = vc.get("ttsEnabled", True)
|
||||
self.xtts_voice = vc.get("xttsVoice", "")
|
||||
logger.info("Voice-Config geladen: tts=%s voice=%s", self.tts_enabled, self.xtts_voice or "default")
|
||||
except Exception as e:
|
||||
logger.warning("Voice-Config laden fehlgeschlagen: %s", e)
|
||||
# Whisper-Modell: Config hat Vorrang, dann env/Default (medium)
|
||||
whisper_model = vc.get("whisperModel") or self.config.get("WHISPER_MODEL", WHISPER_MODEL)
|
||||
self.stt_engine = STTEngine(
|
||||
model_size=self.config.get("WHISPER_MODEL", WHISPER_MODEL),
|
||||
model_size=whisper_model,
|
||||
language=self.config.get("WHISPER_LANGUAGE", WHISPER_LANGUAGE),
|
||||
)
|
||||
self.wake_word = WakeWordDetector()
|
||||
@@ -450,6 +492,18 @@ class ARIABridge:
|
||||
self.ws_core: Optional[websockets.WebSocketClientProtocol] = None
|
||||
self.ws_rvs: Optional[websockets.WebSocketClientProtocol] = None
|
||||
|
||||
# Letzter gesendeter agent_activity-State (zum Entduplizieren)
|
||||
self._last_activity_state: Optional[tuple] = None
|
||||
# Zeitstempel des letzten chat:final — waehrend 3s danach werden
|
||||
# trailing Agent-Events unterdrueckt (Core raeumt manchmal nach).
|
||||
self._last_chat_final_at: float = 0.0
|
||||
# requestId → messageId Map fuer XTTS-Audio-Cache (App-seitige Zuordnung)
|
||||
self._xtts_request_to_message: dict[str, str] = {}
|
||||
# Voice-Override aus letzter Chat-Nachricht einer App.
|
||||
# Wird fuer die direkt folgende ARIA-Antwort genutzt und dann zurueckgesetzt.
|
||||
# So kann jedes Geraet seine bevorzugte Stimme bekommen (pro Request).
|
||||
self._next_voice_override: Optional[str] = None
|
||||
|
||||
def initialize(self) -> None:
|
||||
"""Initialisiert alle Komponenten.
|
||||
|
||||
@@ -461,20 +515,20 @@ class ARIABridge:
|
||||
logger.info("ARIA Voice Bridge startet...")
|
||||
logger.info("=" * 50)
|
||||
|
||||
# Voice-Engine IMMER laden — rendert Audio fuer die App (auch ohne Soundkarte)
|
||||
self.voice_engine.initialize()
|
||||
# STT IMMER laden — verarbeitet Audio von der App (braucht kein Sounddevice)
|
||||
self.stt_engine.initialize()
|
||||
|
||||
# Audio-Hardware pruefen (fuer lokales Mikro/Lautsprecher)
|
||||
self.audio_available = False
|
||||
try:
|
||||
sd.query_devices()
|
||||
devices = sd.query_devices()
|
||||
sd.query_devices(kind='output')
|
||||
self.audio_available = True
|
||||
logger.info("Audio-Geraet gefunden — Wake-Word und lokale TTS aktiv")
|
||||
self.stt_engine.initialize()
|
||||
self.wake_word.initialize()
|
||||
except (sd.PortAudioError, Exception):
|
||||
logger.warning("Kein Audio-Geraet — Wake-Word und lokale TTS deaktiviert")
|
||||
logger.info("Piper TTS rendert Audio fuer die App (via RVS)")
|
||||
logger.warning("Kein Audio-Geraet — Wake-Word und lokale Wiedergabe deaktiviert")
|
||||
logger.info("TTS rendert fuer App (via RVS), STT verarbeitet App-Audio")
|
||||
|
||||
logger.info("Alle Komponenten initialisiert")
|
||||
logger.info("aria-core: %s", self.ws_url)
|
||||
@@ -651,8 +705,18 @@ class ARIABridge:
|
||||
if event_name == "agent":
|
||||
data = payload.get("data", {})
|
||||
delta = data.get("delta", "")
|
||||
if delta and payload.get("stream") == "assistant":
|
||||
stream = payload.get("stream", "")
|
||||
if delta and stream == "assistant":
|
||||
logger.debug("[core] Delta: '%s'", delta[:40])
|
||||
# Activity-Signal zur App (entdupliziert)
|
||||
tool_name = data.get("name") or data.get("tool") or payload.get("tool") or ""
|
||||
if stream == "tool_use" or data.get("type") == "tool_use":
|
||||
activity = "tool"
|
||||
elif stream == "assistant":
|
||||
activity = "assistant"
|
||||
else:
|
||||
activity = "thinking"
|
||||
await self._emit_activity(activity, tool_name)
|
||||
return
|
||||
|
||||
# ── chat Events: Snapshots mit state=delta|final|error ──
|
||||
@@ -661,6 +725,8 @@ class ARIABridge:
|
||||
|
||||
if state == "final":
|
||||
text = self._extract_chat_text(payload)
|
||||
self._last_chat_final_at = asyncio.get_event_loop().time()
|
||||
await self._emit_activity("idle", "")
|
||||
if not text:
|
||||
logger.warning("[core] chat final ohne Text: %s", json.dumps(payload)[:200])
|
||||
return
|
||||
@@ -671,6 +737,8 @@ class ARIABridge:
|
||||
if state == "error":
|
||||
error = payload.get("error", "Unbekannt")
|
||||
logger.error("[core] Chat-Fehler: %s", error)
|
||||
self._last_chat_final_at = asyncio.get_event_loop().time()
|
||||
await self._emit_activity("idle", "")
|
||||
await self._send_to_rvs({
|
||||
"type": "chat",
|
||||
"payload": {
|
||||
@@ -742,27 +810,35 @@ class ARIABridge:
|
||||
- Leitet Antwort an die App weiter (via RVS)
|
||||
- Sprachausgabe ueber TTS (wenn Modus erlaubt)
|
||||
"""
|
||||
# NO_REPLY Token: ARIA signalisiert explizit "nicht antworten"
|
||||
# → komplett verwerfen (keine Chat-Nachricht, kein TTS)
|
||||
# Toleranz fuer Variationen: "NO_REPLY", "no_reply", mit Punkt/Anfuehrungszeichen
|
||||
stripped = text.strip().strip('."\'`*').upper()
|
||||
if stripped == "NO_REPLY" or stripped.startswith("NO_REPLY"):
|
||||
logger.info("[core] NO_REPLY empfangen — Antwort still verworfen")
|
||||
return
|
||||
|
||||
metadata = payload.get("metadata", {})
|
||||
is_critical = metadata.get("critical", False)
|
||||
requested_voice = metadata.get("voice")
|
||||
|
||||
# Modus-Wechsel pruefen
|
||||
# Modus-Wechsel pruefen (Sprachbefehl im Text)
|
||||
new_mode = detect_mode_switch(text)
|
||||
if new_mode is not None:
|
||||
self.current_mode = new_mode
|
||||
self._persist_mode()
|
||||
logger.info(
|
||||
"[core] Modus → %s %s",
|
||||
self.current_mode.config.emoji,
|
||||
self.current_mode.config.name,
|
||||
)
|
||||
await self._send_to_rvs({
|
||||
"type": "mode",
|
||||
"payload": {"mode": self.current_mode.name},
|
||||
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
||||
})
|
||||
await self._broadcast_current_mode()
|
||||
|
||||
# Stimme auswaehlen
|
||||
voice_name = requested_voice or self.voice_engine.select_voice(text)
|
||||
# Eindeutige Message-ID fuer Audio-Cache-Zuordnung
|
||||
message_id = str(uuid.uuid4())
|
||||
|
||||
# TTS-aufbereitete Variante fuer Debug (Diagnostic zeigt optional)
|
||||
tts_text_preview = clean_text_for_tts(text)
|
||||
|
||||
# Antwort an die App weiterleiten (als Chat-Nachricht)
|
||||
await self._send_to_rvs({
|
||||
@@ -770,32 +846,90 @@ class ARIABridge:
|
||||
"payload": {
|
||||
"text": text,
|
||||
"sender": "aria",
|
||||
"voice": voice_name,
|
||||
"messageId": message_id,
|
||||
# Debug: aufbereiteter Text fuer TTS (App ignoriert, Diagnostic zeigt optional)
|
||||
"ttsText": tts_text_preview if tts_text_preview != text else "",
|
||||
},
|
||||
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
||||
})
|
||||
|
||||
# TTS-Audio rendern und an die App senden (wenn Modus es erlaubt)
|
||||
if should_speak(self.current_mode, is_critical):
|
||||
audio_data = self.voice_engine.synthesize(text, voice_name)
|
||||
if audio_data:
|
||||
audio_b64 = base64.b64encode(audio_data).decode("ascii")
|
||||
await self._send_to_rvs({
|
||||
"type": "audio",
|
||||
"payload": {
|
||||
"base64": audio_b64,
|
||||
"mimeType": "audio/wav",
|
||||
"voice": voice_name,
|
||||
},
|
||||
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
||||
})
|
||||
logger.info("[core] TTS-Audio gesendet: %d bytes (%s)", len(audio_data), voice_name)
|
||||
|
||||
# Lokal abspielen (nur wenn Soundkarte vorhanden)
|
||||
if self.audio_available:
|
||||
self.voice_engine.speak(text, requested_voice)
|
||||
else:
|
||||
# TTS ueber XTTS (XTTS-Bridge auf Gaming-PC)
|
||||
if not (getattr(self, 'tts_enabled', True) and should_speak(self.current_mode, is_critical)):
|
||||
logger.info("[core] TTS unterdrueckt (Modus: %s)", self.current_mode.config.name)
|
||||
return
|
||||
|
||||
# Voice bestimmen: App-Override fuer diesen Request > globale Default-Voice
|
||||
xtts_voice = self._next_voice_override or getattr(self, 'xtts_voice', '')
|
||||
# Override verbrauchen (gilt nur fuer genau diese naechste Antwort)
|
||||
if self._next_voice_override:
|
||||
logger.info("[core] Nutze Voice-Override: %s", self._next_voice_override)
|
||||
self._next_voice_override = None
|
||||
|
||||
tts_text = tts_text_preview or text
|
||||
if not tts_text:
|
||||
logger.info("[core] TTS-Text leer nach Cleanup — uebersprungen")
|
||||
return
|
||||
try:
|
||||
xtts_request_id = str(uuid.uuid4())
|
||||
self._xtts_request_to_message[xtts_request_id] = message_id
|
||||
if len(self._xtts_request_to_message) > 100:
|
||||
oldest = next(iter(self._xtts_request_to_message))
|
||||
self._xtts_request_to_message.pop(oldest, None)
|
||||
await self._send_to_rvs({
|
||||
"type": "xtts_request",
|
||||
"payload": {
|
||||
"text": tts_text,
|
||||
"voice": xtts_voice,
|
||||
"language": "de",
|
||||
"requestId": xtts_request_id,
|
||||
"messageId": message_id,
|
||||
},
|
||||
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
||||
})
|
||||
logger.info("[core] XTTS-Request gesendet (%s): '%s'", xtts_voice or "default", tts_text[:60])
|
||||
except Exception as e:
|
||||
logger.error("[core] XTTS-Request fehlgeschlagen: %s — kein Audio", e)
|
||||
|
||||
# ── Mode Persistence (global, nicht pro Geraet) ──────
|
||||
_MODE_FILE = "/shared/config/mode.json"
|
||||
|
||||
def _load_persisted_mode(self) -> Mode:
|
||||
"""Laedt den zuletzt aktiven Modus aus Shared Config oder NORMAL."""
|
||||
try:
|
||||
if os.path.exists(self._MODE_FILE):
|
||||
data = json.loads(Path(self._MODE_FILE).read_text())
|
||||
mode_name = data.get("mode", "NORMAL")
|
||||
for m in Mode:
|
||||
if m.name == mode_name:
|
||||
logger.info("[mode] Persistierter Modus geladen: %s", m.config.name)
|
||||
return m
|
||||
except Exception as e:
|
||||
logger.warning("[mode] Laden fehlgeschlagen: %s", e)
|
||||
return Mode.NORMAL
|
||||
|
||||
def _persist_mode(self) -> None:
|
||||
"""Speichert den aktuellen Modus in Shared Config."""
|
||||
try:
|
||||
os.makedirs("/shared/config", exist_ok=True)
|
||||
Path(self._MODE_FILE).write_text(json.dumps({"mode": self.current_mode.name}))
|
||||
except Exception as e:
|
||||
logger.warning("[mode] Speichern fehlgeschlagen: %s", e)
|
||||
|
||||
async def _broadcast_current_mode(self) -> None:
|
||||
"""Broadcastet den aktuellen Modus an alle RVS-Clients (App + Diagnostic)."""
|
||||
try:
|
||||
await self._send_to_rvs({
|
||||
"type": "mode",
|
||||
"payload": {
|
||||
"mode": canonical_id(self.current_mode),
|
||||
"name": self.current_mode.config.name,
|
||||
"emoji": self.current_mode.config.emoji,
|
||||
"sender": "bridge", # Filter in mode-Handler gegen Loops
|
||||
},
|
||||
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
||||
})
|
||||
except Exception as e:
|
||||
logger.debug("[mode] Broadcast fehlgeschlagen: %s", e)
|
||||
|
||||
def _fetch_active_session(self) -> None:
|
||||
"""Holt die aktive Session vom Diagnostic-Endpoint."""
|
||||
@@ -862,6 +996,10 @@ class ARIABridge:
|
||||
retry_delay = 2
|
||||
logger.info("[rvs] Verbunden — warte auf App-Nachrichten")
|
||||
|
||||
# Aktuellen Modus broadcasten damit gerade verbundene Apps/Diagnostic
|
||||
# ihren UI-State sofort syncen koennen
|
||||
await self._broadcast_current_mode()
|
||||
|
||||
# Heartbeat senden (RVS erwartet Ping alle 30s)
|
||||
heartbeat_task = asyncio.create_task(self._rvs_heartbeat())
|
||||
|
||||
@@ -896,10 +1034,22 @@ class ARIABridge:
|
||||
retry_delay = min(retry_delay * 2, 30)
|
||||
|
||||
async def _rvs_heartbeat(self) -> None:
|
||||
"""Sendet Heartbeats an den RVS damit die Verbindung offen bleibt."""
|
||||
"""Sendet Heartbeats + WebSocket Pings an den RVS damit die Verbindung offen bleibt."""
|
||||
while True:
|
||||
await asyncio.sleep(25)
|
||||
await asyncio.sleep(15)
|
||||
if self.ws_rvs:
|
||||
try:
|
||||
# WebSocket Protocol-Level Ping (haelt TCP-Verbindung am Leben)
|
||||
pong = await self.ws_rvs.ping()
|
||||
await asyncio.wait_for(pong, timeout=10)
|
||||
except Exception:
|
||||
logger.warning("[rvs] Ping fehlgeschlagen — Verbindung tot, erzwinge Reconnect")
|
||||
try:
|
||||
await self.ws_rvs.close()
|
||||
except Exception:
|
||||
pass
|
||||
self.ws_rvs = None
|
||||
break
|
||||
try:
|
||||
await self.ws_rvs.send(json.dumps({
|
||||
"type": "heartbeat",
|
||||
@@ -933,21 +1083,152 @@ class ARIABridge:
|
||||
if sender in ("aria", "stt"):
|
||||
return
|
||||
text = payload.get("text", "")
|
||||
# Voice-Override fuer die naechste ARIA-Antwort merken
|
||||
voice_override = payload.get("voice", "")
|
||||
if voice_override:
|
||||
self._next_voice_override = voice_override
|
||||
logger.info("[rvs] Voice-Override fuer naechste Antwort: %s", voice_override)
|
||||
if text:
|
||||
logger.info("[rvs] App-Chat: '%s'", text[:80])
|
||||
await self.send_to_core(text, source="app")
|
||||
return
|
||||
|
||||
if msg_type == "cancel_request":
|
||||
logger.info("[rvs] Cancel-Request von App — rufe Diagnostic /api/cancel auf")
|
||||
await self._cancel_via_diagnostic()
|
||||
await self._emit_activity("idle", "")
|
||||
return
|
||||
|
||||
elif msg_type == "audio_pcm":
|
||||
# XTTS-PCM-Stream vom Gaming-PC empfangen → durchleiten zur App.
|
||||
# Wenn in payload kein messageId (alte XTTS-Bridge), aus requestId auflösen.
|
||||
error = payload.get("error", "")
|
||||
if error:
|
||||
logger.warning("[rvs] XTTS PCM-Fehler: %s", error)
|
||||
return
|
||||
linked_message_id = payload.get("messageId", "")
|
||||
if not linked_message_id:
|
||||
req_id_full = payload.get("requestId", "")
|
||||
req_id_base = req_id_full.rsplit("_", 1)[0] if "_" in req_id_full else req_id_full
|
||||
linked_message_id = self._xtts_request_to_message.get(req_id_base, "")
|
||||
# Einfach 1:1 weiterleiten mit eingefuellter messageId
|
||||
forwarded = dict(payload)
|
||||
forwarded["messageId"] = linked_message_id
|
||||
await self._send_to_rvs({
|
||||
"type": "audio_pcm",
|
||||
"payload": forwarded,
|
||||
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
||||
})
|
||||
return
|
||||
|
||||
elif msg_type == "xtts_response":
|
||||
# Legacy-Pfad (alte XTTS-Bridge mit WAV-Response). Weiterleiten als
|
||||
# type "audio" — App nutzt den bestehenden WAV-Queue-Spieler.
|
||||
audio_b64 = payload.get("base64", "")
|
||||
error = payload.get("error", "")
|
||||
req_id_full = payload.get("requestId", "")
|
||||
req_id_base = req_id_full.rsplit("_", 1)[0] if "_" in req_id_full else req_id_full
|
||||
linked_message_id = self._xtts_request_to_message.get(req_id_base, "")
|
||||
if error:
|
||||
logger.warning("[rvs] XTTS Fehler: %s", error)
|
||||
return
|
||||
if audio_b64:
|
||||
logger.info("[rvs] XTTS-Audio legacy empfangen: %dKB", len(audio_b64) // 1365)
|
||||
await self._send_to_rvs({
|
||||
"type": "audio",
|
||||
"payload": {
|
||||
"base64": audio_b64,
|
||||
"mimeType": payload.get("mimeType", "audio/wav"),
|
||||
"voice": payload.get("voice", "xtts"),
|
||||
"messageId": linked_message_id,
|
||||
},
|
||||
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
||||
})
|
||||
return
|
||||
|
||||
elif msg_type == "tts_request":
|
||||
# App fordert TTS-Audio fuer einen Text an (Play-Button) → immer XTTS.
|
||||
text = payload.get("text", "")
|
||||
message_id = payload.get("messageId", "")
|
||||
if not text:
|
||||
return
|
||||
tts_text = clean_text_for_tts(text) or text
|
||||
# Voice aus App-Payload gewinnt, sonst global
|
||||
xtts_voice = payload.get("voice", "") or getattr(self, 'xtts_voice', '')
|
||||
try:
|
||||
xtts_request_id = str(uuid.uuid4())
|
||||
if message_id:
|
||||
self._xtts_request_to_message[xtts_request_id] = message_id
|
||||
await self._send_to_rvs({
|
||||
"type": "xtts_request",
|
||||
"payload": {
|
||||
"text": tts_text,
|
||||
"voice": xtts_voice,
|
||||
"language": "de",
|
||||
"requestId": xtts_request_id,
|
||||
"messageId": message_id,
|
||||
},
|
||||
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
||||
})
|
||||
logger.info("[rvs] TTS on-demand via XTTS: '%s'", tts_text[:60])
|
||||
except Exception as e:
|
||||
logger.warning("[rvs] TTS on-demand fehlgeschlagen: %s", e)
|
||||
return
|
||||
|
||||
elif msg_type == "config":
|
||||
# Konfiguration von App/Diagnostic empfangen + persistent speichern
|
||||
changed = False
|
||||
if "ttsEnabled" in payload:
|
||||
self.tts_enabled = bool(payload["ttsEnabled"])
|
||||
logger.info("[rvs] TTS %s", "aktiviert" if self.tts_enabled else "deaktiviert")
|
||||
changed = True
|
||||
if "xttsVoice" in payload:
|
||||
self.xtts_voice = payload["xttsVoice"]
|
||||
logger.info("[rvs] XTTS-Stimme: %s", self.xtts_voice or "default")
|
||||
changed = True
|
||||
if "whisperModel" in payload:
|
||||
new_model = payload["whisperModel"]
|
||||
if new_model and new_model != self.stt_engine.model_size:
|
||||
logger.info("[rvs] Whisper-Modell Wechsel: %s -> %s (laedt...)", self.stt_engine.model_size, new_model)
|
||||
loop = asyncio.get_event_loop()
|
||||
if await loop.run_in_executor(None, self.stt_engine.reload, new_model):
|
||||
changed = True
|
||||
# Persistent speichern in Shared Volume
|
||||
if changed:
|
||||
try:
|
||||
os.makedirs("/shared/config", exist_ok=True)
|
||||
config_data = {
|
||||
"ttsEnabled": getattr(self, "tts_enabled", True),
|
||||
"xttsVoice": getattr(self, "xtts_voice", ""),
|
||||
"whisperModel": self.stt_engine.model_size,
|
||||
}
|
||||
with open("/shared/config/voice_config.json", "w") as f:
|
||||
json.dump(config_data, f, indent=2)
|
||||
logger.info("[rvs] Voice-Config gespeichert: %s", config_data)
|
||||
except Exception as e:
|
||||
logger.warning("[rvs] Config speichern fehlgeschlagen: %s", e)
|
||||
return
|
||||
|
||||
elif msg_type == "mode":
|
||||
# Moduswechsel von der App
|
||||
# Moduswechsel von der App — ID ('normal', 'dnd', ...) ODER Aktivierungsphrase
|
||||
mode_name = payload.get("mode", "")
|
||||
new_mode = detect_mode_switch(mode_name)
|
||||
if new_mode is not None:
|
||||
# Sender kann der Broadcast der Bridge selbst sein — den ignorieren damit
|
||||
# andere Apps nicht in eine Loop geraten
|
||||
if payload.get("sender") == "bridge":
|
||||
return
|
||||
new_mode = mode_from_id(mode_name) or detect_mode_switch(mode_name)
|
||||
if new_mode is not None and new_mode != self.current_mode:
|
||||
self.current_mode = new_mode
|
||||
self._persist_mode()
|
||||
logger.info(
|
||||
"[rvs] Modus → %s %s (von App)",
|
||||
self.current_mode.config.emoji,
|
||||
self.current_mode.config.name,
|
||||
)
|
||||
# Broadcast an ALLE Clients (App + Diagnostic) damit UI ueberall sync ist
|
||||
await self._broadcast_current_mode()
|
||||
elif new_mode is None:
|
||||
logger.warning("[rvs] Unbekannter Modus: '%s'", mode_name)
|
||||
|
||||
elif msg_type == "location":
|
||||
# GPS-Daten von der App
|
||||
@@ -1062,6 +1343,11 @@ class ARIABridge:
|
||||
if not audio_b64:
|
||||
logger.warning("[rvs] Audio ohne Daten empfangen")
|
||||
return
|
||||
# Voice-Override fuer die kommende ARIA-Antwort (App-lokal gewaehlt)
|
||||
voice_override = payload.get("voice", "")
|
||||
if voice_override:
|
||||
self._next_voice_override = voice_override
|
||||
logger.info("[rvs] Voice-Override (via Audio): %s", voice_override)
|
||||
logger.info("[rvs] Audio empfangen: %s, %dms, %dKB",
|
||||
mime_type, duration_ms, len(audio_b64) // 1365)
|
||||
asyncio.create_task(self._process_app_audio(audio_b64, mime_type))
|
||||
@@ -1142,10 +1428,24 @@ class ARIABridge:
|
||||
pass
|
||||
|
||||
async def _send_to_rvs(self, message: dict) -> None:
|
||||
"""Sendet eine Nachricht an die App (via RVS)."""
|
||||
"""Sendet eine Nachricht an die App (via RVS) mit Verbindungs-Check."""
|
||||
if self.ws_rvs is None:
|
||||
return
|
||||
|
||||
# Ping-Check: Verbindung wirklich aktiv?
|
||||
try:
|
||||
pong = await self.ws_rvs.ping()
|
||||
await asyncio.wait_for(pong, timeout=5)
|
||||
except Exception:
|
||||
logger.warning("[rvs] Ping fehlgeschlagen — Verbindung tot, erzwinge Reconnect")
|
||||
try:
|
||||
await self.ws_rvs.close()
|
||||
except Exception:
|
||||
pass
|
||||
self.ws_rvs = None
|
||||
# Reconnect wird vom connect_to_rvs Loop uebernommen
|
||||
return
|
||||
|
||||
try:
|
||||
await self.ws_rvs.send(json.dumps(message))
|
||||
except Exception:
|
||||
@@ -1153,6 +1453,43 @@ class ARIABridge:
|
||||
|
||||
# ── Log-Streaming an die App ─────────────────────────────
|
||||
|
||||
async def _cancel_via_diagnostic(self) -> None:
|
||||
"""Ruft das Diagnostic /api/cancel an — dort laeuft die volle Abbruch-Logik
|
||||
(openclaw doctor --fix mit Docker-Socket)."""
|
||||
def _do_request():
|
||||
try:
|
||||
req = urllib.request.Request(
|
||||
f"{self._diagnostic_url}/api/cancel",
|
||||
method="POST",
|
||||
data=b"",
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=5) as resp:
|
||||
return resp.status
|
||||
except Exception as e:
|
||||
return f"error: {e}"
|
||||
|
||||
status = await asyncio.get_event_loop().run_in_executor(None, _do_request)
|
||||
logger.info("[cancel] Diagnostic /api/cancel: %s", status)
|
||||
|
||||
async def _emit_activity(self, activity: str, tool: str = "") -> None:
|
||||
"""Sendet agent_activity an die App — nur wenn sich der State geaendert hat.
|
||||
|
||||
Trailing Agent-Events nach chat:final werden 3s lang unterdrueckt
|
||||
(nur 'idle' kommt immer durch)."""
|
||||
if activity != "idle" and self._last_chat_final_at > 0:
|
||||
since_final = asyncio.get_event_loop().time() - self._last_chat_final_at
|
||||
if since_final < 3.0:
|
||||
return
|
||||
state = (activity, tool)
|
||||
if state == self._last_activity_state:
|
||||
return
|
||||
self._last_activity_state = state
|
||||
await self._send_to_rvs({
|
||||
"type": "agent_activity",
|
||||
"payload": {"activity": activity, "tool": tool},
|
||||
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
||||
})
|
||||
|
||||
async def send_log_to_app(self, source: str, message: str, level: str = "info") -> None:
|
||||
"""Sendet einen Log-Eintrag an die App (erscheint im Log-Viewer)."""
|
||||
await self._send_to_rvs({
|
||||
|
||||
@@ -91,6 +91,39 @@ _ACTIVATION_MAP: dict[str, Mode] = {
|
||||
mode.config.activation_phrase.lower(): mode for mode in Mode
|
||||
}
|
||||
|
||||
# ID-Mapping fuer API-Mode-Wechsel (z.B. App ModeSelector schickt 'normal')
|
||||
_ID_MAP: dict[str, Mode] = {
|
||||
"normal": Mode.NORMAL,
|
||||
"nicht_stoeren": Mode.DND,
|
||||
"dnd": Mode.DND,
|
||||
"fluester": Mode.WHISPER,
|
||||
"whisper": Mode.WHISPER,
|
||||
"hangar": Mode.HANGAR,
|
||||
"gaming": Mode.GAMING,
|
||||
}
|
||||
|
||||
|
||||
def mode_from_id(mode_id: str) -> Optional[Mode]:
|
||||
"""ID-basiertes Mapping fuer API-Mode-Wechsel (ohne Aktivierungsphrase)."""
|
||||
if not mode_id:
|
||||
return None
|
||||
return _ID_MAP.get(mode_id.strip().lower())
|
||||
|
||||
|
||||
# Kanonische IDs fuer Broadcasts (matchen die App-UI-IDs in ModeSelector)
|
||||
_CANONICAL_ID: dict[Mode, str] = {
|
||||
Mode.NORMAL: "normal",
|
||||
Mode.DND: "nicht_stoeren",
|
||||
Mode.WHISPER: "fluester",
|
||||
Mode.HANGAR: "hangar",
|
||||
Mode.GAMING: "gaming",
|
||||
}
|
||||
|
||||
|
||||
def canonical_id(mode: Mode) -> str:
|
||||
"""Kanonische ID die App + Diagnostic + Bridge gleichermassen kennen."""
|
||||
return _CANONICAL_ID.get(mode, mode.name.lower())
|
||||
|
||||
|
||||
def detect_mode_switch(text: str) -> Optional[Mode]:
|
||||
"""Erkennt ob ein Text eine Modus-Umschaltung enthaelt.
|
||||
|
||||
@@ -5,8 +5,7 @@
|
||||
# STT — Whisper (lokal, keine API noetig)
|
||||
faster-whisper
|
||||
|
||||
# TTS — Piper (offline, deutsche Stimmen)
|
||||
piper-tts
|
||||
# TTS: laeuft remote ueber XTTS v2 auf dem Gaming-PC (keine lokalen Deps noetig)
|
||||
|
||||
# WebSocket-Verbindung zu aria-core
|
||||
websockets
|
||||
|
||||
Executable
+44
@@ -0,0 +1,44 @@
|
||||
#!/bin/bash
|
||||
# ARIA Docker Cleanup
|
||||
#
|
||||
# Standard: docker builder prune + image prune (sicher, loescht keine Volumes)
|
||||
# --full: Volle Reinigung inkl. --volumes (Vorsicht bei ungenutzten Volumes!)
|
||||
#
|
||||
# Usage:
|
||||
# ./cleanup.sh # sicherer Cleanup
|
||||
# ./cleanup.sh --full # aggressiver Cleanup (inkl. Volumes)
|
||||
|
||||
set -e
|
||||
|
||||
FULL=0
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
--full|-f) FULL=1 ;;
|
||||
-h|--help)
|
||||
grep '^#' "$0" | sed 's/^# \{0,1\}//'
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
echo "── Docker Speicher VOR Cleanup ───────────────────"
|
||||
docker system df
|
||||
echo
|
||||
|
||||
if [ "$FULL" = "1" ]; then
|
||||
echo ">>> VOLLE Reinigung (inkl. ungenutzter Volumes)"
|
||||
read -p "Wirklich? [y/N] " -n 1 -r REPLY
|
||||
echo
|
||||
[[ ! $REPLY =~ ^[Yy]$ ]] && { echo "Abgebrochen."; exit 0; }
|
||||
docker system prune -a --volumes -f
|
||||
else
|
||||
echo ">>> Sicherer Cleanup (Build-Cache + ungenutzte Images)"
|
||||
docker builder prune -a -f
|
||||
docker image prune -a -f
|
||||
fi
|
||||
|
||||
echo
|
||||
echo "── Docker Speicher NACH Cleanup ──────────────────"
|
||||
docker system df
|
||||
echo
|
||||
df -h / | head -2
|
||||
+766
-26
@@ -198,14 +198,27 @@
|
||||
<div class="card full">
|
||||
<div style="display:flex;align-items:center;justify-content:space-between;margin-bottom:8px;">
|
||||
<h2 style="margin:0;">Chat Test</h2>
|
||||
<button class="btn secondary" onclick="toggleChatFullscreen()" id="btn-chat-fs" style="padding:4px 10px;font-size:11px;">Vollbild</button>
|
||||
<div style="display:flex;align-items:center;gap:12px;">
|
||||
<label style="color:#8888AA;font-size:11px;cursor:pointer;">
|
||||
<input type="checkbox" id="tts-debug-toggle" onchange="toggleTtsDebug()" style="margin-right:4px;vertical-align:middle;">
|
||||
TTS-Text einblenden
|
||||
</label>
|
||||
<button class="btn secondary" onclick="toggleChatFullscreen()" id="btn-chat-fs" style="padding:4px 10px;font-size:11px;">Vollbild</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="chat-box" id="chat-box"></div>
|
||||
<div id="thinking-indicator" style="display:none;padding:6px 10px;font-size:12px;color:#FFD60A;background:#1E1E2E;border-radius:0 0 6px 6px;margin-top:-8px;margin-bottom:8px;">
|
||||
<span style="animation:pulse 1s infinite;">💭</span> <span id="thinking-text">ARIA denkt...</span>
|
||||
<div id="thinking-indicator" style="display:none;padding:6px 10px;font-size:12px;color:#FFD60A;background:#1E1E2E;border-radius:0 0 6px 6px;margin-top:-8px;margin-bottom:8px;align-items:center;justify-content:space-between;">
|
||||
<span><span style="animation:pulse 1s infinite;">💭</span> <span id="thinking-text">ARIA denkt...</span></span>
|
||||
<button class="btn secondary" onclick="cancelRequest()" style="padding:2px 10px;font-size:11px;color:#FF3B30;border-color:#FF3B30;">Abbrechen</button>
|
||||
</div>
|
||||
<div id="diag-pending-attachments" style="display:none;padding:6px 10px;background:#1E1E2E;border-radius:6px 6px 0 0;margin-bottom:-4px;display:flex;gap:6px;flex-wrap:wrap;align-items:center;">
|
||||
</div>
|
||||
<div class="input-row">
|
||||
<input type="text" id="chat-input" placeholder="Nachricht an ARIA...">
|
||||
<label class="btn secondary" style="padding:6px 10px;cursor:pointer;font-size:14px;" title="Datei anhaengen">
|
||||
📎
|
||||
<input type="file" id="diag-file-input" multiple accept="image/*,application/pdf,.doc,.docx,.txt" style="display:none;" onchange="handleDiagFileSelect(this.files)">
|
||||
</label>
|
||||
<input type="text" id="chat-input" placeholder="Nachricht an ARIA..." onpaste="handleDiagPaste(event)">
|
||||
<button class="btn" id="btn-gw" onclick="testGateway()">Gateway senden</button>
|
||||
<button class="btn" id="btn-rvs" onclick="testRVS()">Via RVS senden</button>
|
||||
</div>
|
||||
@@ -283,6 +296,7 @@
|
||||
<button class="tab-btn" data-tab="bridge" onclick="switchTab('bridge')">Bridge <span class="tab-count" id="count-bridge">0</span></button>
|
||||
<button class="tab-btn" data-tab="server" onclick="switchTab('server')">Server <span class="tab-count" id="count-server">0</span></button>
|
||||
<button class="tab-btn" data-tab="pipeline" onclick="switchTab('pipeline')" style="margin-left:auto;border-color:#0096FF44;color:#0096FF">Pipeline <span class="tab-count" id="count-pipeline">0</span></button>
|
||||
<button class="tab-btn" data-tab="tts" onclick="switchTab('tts')" style="border-color:#34C75944;color:#34C759">TTS</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="log-panel">
|
||||
@@ -302,6 +316,27 @@
|
||||
<div class="log-box hidden" id="log-bridge"></div>
|
||||
<div class="log-box hidden" id="log-server"></div>
|
||||
<div class="log-box hidden" id="log-pipeline"></div>
|
||||
<div class="log-box hidden" id="log-tts" style="padding:12px;">
|
||||
<h3 style="color:#34C759;margin:0 0 12px;">TTS Diagnose (XTTS)</h3>
|
||||
<div style="display:grid;grid-template-columns:1fr 1fr;gap:8px;margin-bottom:12px;">
|
||||
<div style="background:#1E1E2E;padding:8px;border-radius:6px;">
|
||||
<div style="color:#8888AA;font-size:10px;text-transform:uppercase;">Status</div>
|
||||
<div style="font-size:14px;margin-top:4px;" id="tts-status">Unbekannt</div>
|
||||
</div>
|
||||
<div style="background:#1E1E2E;padding:8px;border-radius:6px;">
|
||||
<div style="color:#8888AA;font-size:10px;text-transform:uppercase;">Letzter Fehler</div>
|
||||
<div style="color:#FF6B6B;font-size:12px;margin-top:4px;word-break:break-all;" id="tts-last-error">-</div>
|
||||
</div>
|
||||
</div>
|
||||
<div style="margin-bottom:8px;">
|
||||
<input type="text" id="tts-test-text" value="Hallo Stefan, ich bin ARIA." placeholder="Test-Text..." style="background:#1E1E2E;border:1px solid #2A2A3E;border-radius:6px;padding:8px;color:#fff;font-size:13px;width:100%;box-sizing:border-box;">
|
||||
</div>
|
||||
<div style="display:flex;gap:8px;">
|
||||
<button class="btn" onclick="testTTS('')" style="flex:1;">XTTS testen</button>
|
||||
<button class="btn secondary" onclick="checkTTSStatus()" style="flex:1;">Status pruefen</button>
|
||||
</div>
|
||||
<div id="tts-log" style="margin-top:12px;max-height:200px;overflow-y:auto;font-size:11px;font-family:monospace;color:#8888AA;"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -340,6 +375,184 @@
|
||||
<!-- ══════ TAB: Einstellungen ══════ -->
|
||||
<div id="tab-settings" class="main-tab">
|
||||
|
||||
<!-- Betriebsmodus -->
|
||||
<div class="settings-section">
|
||||
<h2>Betriebsmodus</h2>
|
||||
<div class="card" style="max-width:500px;">
|
||||
<div id="mode-selector" style="display:grid;grid-template-columns:1fr 1fr;gap:8px;">
|
||||
<button class="btn mode-btn" data-mode="normal" onclick="setMode('normal')" style="background:#1E1E2E;border:2px solid transparent;">
|
||||
<span style="font-size:18px;">🟢</span> Normal<br><span style="font-size:10px;color:#8888AA;">Hoert zu, antwortet, spricht</span>
|
||||
</button>
|
||||
<button class="btn mode-btn" data-mode="nicht_stoeren" onclick="setMode('nicht_stoeren')" style="background:#1E1E2E;border:2px solid transparent;">
|
||||
<span style="font-size:18px;">🔴</span> Nicht stoeren<br><span style="font-size:10px;color:#8888AA;">Nur Kritikalarme</span>
|
||||
</button>
|
||||
<button class="btn mode-btn" data-mode="fluester" onclick="setMode('fluester')" style="background:#1E1E2E;border:2px solid transparent;">
|
||||
<span style="font-size:18px;">🟡</span> Fluestern<br><span style="font-size:10px;color:#8888AA;">Nur Text, keine Sprache</span>
|
||||
</button>
|
||||
<button class="btn mode-btn" data-mode="hangar" onclick="setMode('hangar')" style="background:#1E1E2E;border:2px solid transparent;">
|
||||
<span style="font-size:18px;">✈️</span> Hangar<br><span style="font-size:10px;color:#8888AA;">Nur wichtige Meldungen</span>
|
||||
</button>
|
||||
<button class="btn mode-btn" data-mode="gaming" onclick="setMode('gaming')" style="background:#1E1E2E;border:2px solid transparent;grid-column:1/-1;">
|
||||
<span style="font-size:18px;">🎮</span> Gaming<br><span style="font-size:10px;color:#8888AA;">Nur direkte Fragen</span>
|
||||
</button>
|
||||
</div>
|
||||
<div style="margin-top:8px;font-size:11px;color:#555570;" id="mode-status">Aktueller Modus: Normal</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Stimmen -->
|
||||
<div class="settings-section">
|
||||
<h2>Sprachausgabe</h2>
|
||||
<div class="card" style="max-width:500px;">
|
||||
<!-- TTS aktiv (global) -->
|
||||
<div style="display:flex;align-items:center;gap:12px;margin-bottom:12px;">
|
||||
<label style="color:#8888AA;font-size:12px;">TTS aktiv:</label>
|
||||
<label class="toggle"><input type="checkbox" id="diag-tts-enabled" checked onchange="sendVoiceConfig()"><span class="slider"></span></label>
|
||||
</div>
|
||||
|
||||
<!-- XTTS Stimme -->
|
||||
<div style="display:flex;align-items:center;gap:12px;margin-bottom:12px;">
|
||||
<label style="color:#8888AA;font-size:12px;">XTTS Stimme:</label>
|
||||
<select id="diag-xtts-voice" onchange="sendVoiceConfig()" style="background:#1E1E2E;color:#fff;border:1px solid #2A2A3E;border-radius:6px;padding:6px 10px;font-size:13px;">
|
||||
<option value="">Standard (XTTS Default)</option>
|
||||
</select>
|
||||
<button class="btn secondary" onclick="loadXTTSVoices()" style="padding:4px 10px;font-size:11px;">Laden</button>
|
||||
</div>
|
||||
|
||||
<!-- Gecloned Stimmen — Liste mit Loeschen -->
|
||||
<div id="xtts-voice-list" style="margin-bottom:12px;"></div>
|
||||
|
||||
<!-- Voice Cloning -->
|
||||
<div style="background:#1E1E2E;border-radius:8px;padding:12px;margin-top:8px;">
|
||||
<div style="color:#0096FF;font-size:13px;font-weight:600;margin-bottom:8px;">Stimme klonen</div>
|
||||
<div style="color:#8888AA;font-size:11px;margin-bottom:8px;">
|
||||
Lade ein oder mehrere Audio-Samples hoch (WAV/MP3, min. 6-10 Sekunden).
|
||||
Mehrere Dateien werden automatisch zusammengefuegt.
|
||||
</div>
|
||||
<div style="margin-bottom:8px;">
|
||||
<input type="text" id="xtts-clone-name" placeholder="Name fuer die Stimme..." style="background:#0D0D1A;border:1px solid #2A2A3E;border-radius:6px;padding:6px 10px;color:#fff;font-size:13px;width:100%;box-sizing:border-box;">
|
||||
</div>
|
||||
<div style="margin-bottom:8px;">
|
||||
<input type="file" id="xtts-clone-files" accept="audio/*" multiple style="color:#8888AA;font-size:12px;">
|
||||
</div>
|
||||
<div style="display:flex;gap:8px;">
|
||||
<button class="btn" onclick="uploadVoiceSamples()" style="flex:1;">Stimme erstellen</button>
|
||||
</div>
|
||||
<div id="xtts-clone-status" style="font-size:11px;color:#555570;margin-top:6px;"></div>
|
||||
</div>
|
||||
|
||||
<!-- XTTS Status -->
|
||||
<div style="margin-top:8px;font-size:11px;color:#555570;" id="xtts-status">
|
||||
XTTS-Server: Nicht verbunden (starte xtts/ auf dem Gaming-PC)
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Whisper (STT) -->
|
||||
<div class="settings-section">
|
||||
<h2>Whisper (Spracherkennung)</h2>
|
||||
<div style="font-size:11px;color:#8888AA;margin-bottom:8px;">
|
||||
Aenderungen werden sofort an die Bridge gesendet und das Modell neu geladen
|
||||
(kann bei medium/large 10-30s dauern — waehrend dieser Zeit ist STT kurz pausiert).
|
||||
</div>
|
||||
<div class="card" style="max-width:500px;">
|
||||
<div style="display:flex;align-items:center;gap:12px;margin-bottom:8px;">
|
||||
<label style="color:#8888AA;font-size:12px;min-width:80px;">Modell:</label>
|
||||
<select id="diag-whisper-model" onchange="sendVoiceConfig()" style="flex:1;background:#1E1E2E;color:#fff;border:1px solid #2A2A3E;border-radius:6px;padding:6px 10px;font-size:13px;">
|
||||
<option value="tiny">tiny (39MB, schnell, niedrige Qualitaet)</option>
|
||||
<option value="base">base (74MB, schnell, ok)</option>
|
||||
<option value="small">small (244MB, mittel)</option>
|
||||
<option value="medium" selected>medium (769MB, gut — Empfehlung)</option>
|
||||
<option value="large-v3">large-v3 (1.5GB, beste Qualitaet, langsam auf CPU)</option>
|
||||
</select>
|
||||
</div>
|
||||
<div style="font-size:10px;color:#555570;">
|
||||
Tipp: <code>medium</code> ist der beste Kompromiss fuer CPU. <code>large-v3</code> nur bei GPU sinnvoll.
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Runtime-Konfiguration (migriert von .env) -->
|
||||
<div class="settings-section">
|
||||
<h2>Runtime-Konfiguration</h2>
|
||||
<div style="font-size:11px;color:#8888AA;margin-bottom:8px;">
|
||||
Werte werden in <code>/shared/config/runtime.json</code> persistiert und
|
||||
ueberschreiben die ENV-Variablen aus <code>aria.env</code>. Bridge liest
|
||||
sie beim naechsten Start — nach Aenderung <b>Bridge-Container neu starten</b>
|
||||
(Diagnostic-Container bleibt auf ENV).
|
||||
</div>
|
||||
<div class="card" style="max-width:600px;">
|
||||
<div style="display:grid;grid-template-columns:140px 1fr;gap:8px 10px;align-items:center;font-size:13px;">
|
||||
<label style="color:#8888AA;">RVS Host:</label>
|
||||
<input type="text" id="rc-rvs-host" style="width:100%;box-sizing:border-box;background:#1E1E2E;border:1px solid #2A2A3E;border-radius:4px;padding:6px;color:#fff;">
|
||||
<label style="color:#8888AA;">RVS Port:</label>
|
||||
<input type="text" id="rc-rvs-port" style="width:100%;box-sizing:border-box;background:#1E1E2E;border:1px solid #2A2A3E;border-radius:4px;padding:6px;color:#fff;">
|
||||
<label style="color:#8888AA;">RVS TLS:</label>
|
||||
<select id="rc-rvs-tls" style="width:100%;box-sizing:border-box;background:#1E1E2E;border:1px solid #2A2A3E;border-radius:4px;padding:6px;color:#fff;">
|
||||
<option value="true">true (wss://)</option>
|
||||
<option value="false">false (ws://)</option>
|
||||
</select>
|
||||
<label style="color:#8888AA;">RVS Token:</label>
|
||||
<div style="display:flex;gap:4px;min-width:0;">
|
||||
<input type="password" id="rc-rvs-token" style="flex:1;min-width:0;box-sizing:border-box;background:#1E1E2E;border:1px solid #2A2A3E;border-radius:4px;padding:6px;color:#fff;font-family:monospace;">
|
||||
<button type="button" class="btn secondary" onclick="toggleSecret('rc-rvs-token', this)" style="padding:4px 10px;flex-shrink:0;" title="Anzeigen/Verbergen">👁</button>
|
||||
</div>
|
||||
<label style="color:#8888AA;">Aria Auth Token:</label>
|
||||
<div style="display:flex;gap:4px;min-width:0;">
|
||||
<input type="password" id="rc-auth-token" style="flex:1;min-width:0;box-sizing:border-box;background:#1E1E2E;border:1px solid #2A2A3E;border-radius:4px;padding:6px;color:#fff;font-family:monospace;">
|
||||
<button type="button" class="btn secondary" onclick="toggleSecret('rc-auth-token', this)" style="padding:4px 10px;flex-shrink:0;" title="Anzeigen/Verbergen">👁</button>
|
||||
</div>
|
||||
</div>
|
||||
<div style="display:flex;gap:8px;margin-top:12px;">
|
||||
<button class="btn" onclick="saveRuntimeConfig()" style="flex:1;">Speichern</button>
|
||||
<button class="btn secondary" onclick="loadRuntimeConfig()" style="flex:1;">Neu laden</button>
|
||||
</div>
|
||||
<div id="rc-status" style="font-size:11px;color:#555570;margin-top:6px;"></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- App-Onboarding via QR-Code -->
|
||||
<div class="settings-section">
|
||||
<h2>App-Onboarding (QR-Code)</h2>
|
||||
<div style="font-size:11px;color:#8888AA;margin-bottom:8px;">
|
||||
RVS-Credentials als QR-Code — App scannt, keine manuelle Eingabe.
|
||||
Enthaelt Host, Port, TLS-Flag und Token.
|
||||
</div>
|
||||
<div class="card" style="max-width:500px;">
|
||||
<div style="display:flex;gap:12px;align-items:flex-start;">
|
||||
<div id="onboarding-qr" style="width:220px;height:220px;flex-shrink:0;background:#1E1E2E;border-radius:6px;overflow:hidden;display:flex;align-items:center;justify-content:center;color:#555570;font-size:11px;text-align:center;">
|
||||
QR-Code wird geladen...
|
||||
</div>
|
||||
<div style="flex:1;font-size:11px;color:#8888AA;line-height:1.5;">
|
||||
<div style="color:#FF9500;font-weight:bold;margin-bottom:4px;">Achtung</div>
|
||||
Dieser QR enthaelt den RVS-Token im Klartext — zeige ihn niemandem,
|
||||
speichere keine Screenshots davon in unsicheren Cloud-Diensten.
|
||||
<button class="btn" onclick="loadOnboardingQR()" style="margin-top:10px;width:100%;">
|
||||
QR neu generieren
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Highlight-Trigger -->
|
||||
<div class="settings-section">
|
||||
<h2>Highlight-Trigger</h2>
|
||||
<div style="font-size:11px;color:#8888AA;margin-bottom:8px;">
|
||||
Woerter die automatisch die Highlight-Stimme (Thorsten) ausloesen.
|
||||
Eines pro Zeile. Aenderungen werden in der Bridge gespeichert.
|
||||
</div>
|
||||
<div class="card" style="max-width:500px;">
|
||||
<textarea id="highlight-triggers" rows="8" style="width:100%;box-sizing:border-box;background:#1E1E2E;border:1px solid #2A2A3E;border-radius:6px;padding:8px;color:#fff;font-size:13px;font-family:monospace;resize:vertical;"
|
||||
placeholder="Lade..."></textarea>
|
||||
<div style="display:flex;gap:8px;margin-top:8px;">
|
||||
<button class="btn" onclick="saveHighlightTriggers()" style="flex:1;">Speichern</button>
|
||||
<button class="btn secondary" onclick="loadHighlightTriggers()" style="flex:1;">Neu laden</button>
|
||||
</div>
|
||||
<div id="trigger-status" style="font-size:11px;color:#555570;margin-top:6px;"></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Tool-Berechtigungen -->
|
||||
<div class="settings-section">
|
||||
<h2>Tool-Berechtigungen</h2>
|
||||
@@ -420,6 +633,7 @@
|
||||
bridge: document.getElementById('log-bridge'),
|
||||
server: document.getElementById('log-server'),
|
||||
pipeline: document.getElementById('log-pipeline'),
|
||||
tts: document.getElementById('log-tts'),
|
||||
};
|
||||
|
||||
// Scroll-Pause pro aktivem Tab
|
||||
@@ -513,11 +727,106 @@
|
||||
if (msg.type === 'state') { updateState(msg.state); return; }
|
||||
if (msg.type === 'log') { addLog(msg.entry.level, msg.entry.source, msg.entry.message, msg.entry.ts); return; }
|
||||
|
||||
if (msg.type === 'tts_result') {
|
||||
if (msg.ok) {
|
||||
ttsLog(`\u2705 ${msg.voice}: ${msg.duration}ms, ${msg.size} bytes`);
|
||||
document.getElementById('tts-status').textContent = 'OK';
|
||||
document.getElementById('tts-status').style.color = '#34C759';
|
||||
} else {
|
||||
ttsLog(`\u274C Fehler: ${msg.error}`);
|
||||
document.getElementById('tts-status').textContent = 'Fehler';
|
||||
document.getElementById('tts-status').style.color = '#FF3B30';
|
||||
document.getElementById('tts-last-error').textContent = msg.error;
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (msg.type === 'tts_status') {
|
||||
document.getElementById('tts-status').textContent = msg.ok ? 'OK' : 'Fehler';
|
||||
document.getElementById('tts-status').style.color = msg.ok ? '#34C759' : '#FF3B30';
|
||||
if (msg.error) { document.getElementById('tts-last-error').textContent = msg.error; ttsLog(`Fehler: ${msg.error}`); }
|
||||
else { document.getElementById('tts-last-error').textContent = '-'; ttsLog('TTS OK'); }
|
||||
return;
|
||||
}
|
||||
|
||||
if (msg.type === 'agent_activity') {
|
||||
updateThinkingIndicator(msg);
|
||||
return;
|
||||
}
|
||||
|
||||
if (msg.type === 'mode' && msg.payload) {
|
||||
// Bridge hat den Modus geaendert (evtl. von anderer App/Diagnostic) — UI syncen
|
||||
const mode = (msg.payload.mode || '').toLowerCase();
|
||||
if (MODE_LABELS[mode]) {
|
||||
updateModeUI(mode);
|
||||
log("info", "server", `Mode-Sync: ${mode}`);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (msg.type === 'xtts_voices_list') {
|
||||
const select = document.getElementById('diag-xtts-voice');
|
||||
// Aktuelle Auswahl merken damit Rebuild sie nicht zerstoert
|
||||
const previouslySelected = select.value;
|
||||
while (select.options.length > 1) select.remove(1);
|
||||
const voices = msg.payload?.voices || [];
|
||||
for (const v of voices) {
|
||||
const opt = document.createElement('option');
|
||||
opt.value = v.name;
|
||||
opt.textContent = `${v.name} (${(v.size / 1024).toFixed(0)}KB)`;
|
||||
select.appendChild(opt);
|
||||
}
|
||||
// Wenn die vorherige Auswahl weiter existiert → wiederherstellen
|
||||
if (previouslySelected && voices.some(v => v.name === previouslySelected)) {
|
||||
select.value = previouslySelected;
|
||||
}
|
||||
document.getElementById('xtts-status').textContent = `XTTS: ${voices.length} Stimme(n) verfuegbar`;
|
||||
document.getElementById('xtts-status').style.color = '#34C759';
|
||||
renderVoiceList(voices);
|
||||
return;
|
||||
}
|
||||
if (msg.type === 'xtts_voice_saved') {
|
||||
document.getElementById('xtts-clone-status').textContent = `Stimme "${msg.payload?.name}" gespeichert!`;
|
||||
document.getElementById('xtts-clone-status').style.color = '#34C759';
|
||||
loadXTTSVoices(); // Liste neu laden
|
||||
return;
|
||||
}
|
||||
|
||||
if (msg.type === 'voice_config') {
|
||||
document.getElementById('diag-tts-enabled').checked = msg.ttsEnabled !== false;
|
||||
// XTTS-Voice setzen — Option hinzufuegen falls nicht vorhanden
|
||||
const xttsSelect = document.getElementById('diag-xtts-voice');
|
||||
const xttsVoice = msg.xttsVoice || '';
|
||||
if (xttsVoice && !Array.from(xttsSelect.options).some(o => o.value === xttsVoice)) {
|
||||
const opt = document.createElement('option');
|
||||
opt.value = xttsVoice;
|
||||
opt.textContent = xttsVoice;
|
||||
xttsSelect.appendChild(opt);
|
||||
}
|
||||
xttsSelect.value = xttsVoice;
|
||||
// Whisper-Modell wiederherstellen (falls gesetzt)
|
||||
if (msg.whisperModel) {
|
||||
const wSel = document.getElementById('diag-whisper-model');
|
||||
if (wSel) wSel.value = msg.whisperModel;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (msg.type === 'trigger_list') {
|
||||
const textarea = document.getElementById('highlight-triggers');
|
||||
textarea.value = (msg.triggers || []).join('\n');
|
||||
document.getElementById('trigger-status').textContent = msg.triggers.length + ' Trigger geladen';
|
||||
document.getElementById('trigger-status').style.color = '#8888AA';
|
||||
return;
|
||||
}
|
||||
|
||||
if (msg.type === 'watchdog') {
|
||||
const colors = { warning: '#FFD60A', fixing: '#FF9500', fixed: '#34C759', error: '#FF3B30' };
|
||||
const color = colors[msg.status] || '#FFD60A';
|
||||
addChat('error', `\u26A0\uFE0F Watchdog: ${msg.message}`, `system — ${msg.status}`);
|
||||
addLog('warn', 'server', `Watchdog: ${msg.message}`);
|
||||
return;
|
||||
}
|
||||
|
||||
if (msg.type === 'chat_final') {
|
||||
addChat('received', msg.text, 'chat:final');
|
||||
return;
|
||||
@@ -627,6 +936,18 @@
|
||||
else alert('Loeschen fehlgeschlagen: ' + (msg.error || '?'));
|
||||
return;
|
||||
}
|
||||
if (msg.type === 'session_export') {
|
||||
if (!msg.ok) { alert('Export fehlgeschlagen: ' + (msg.error || '?')); return; }
|
||||
const blob = new Blob([msg.markdown], { type: 'text/markdown;charset=utf-8' });
|
||||
const url = URL.createObjectURL(blob);
|
||||
const a = document.createElement('a');
|
||||
a.href = url;
|
||||
a.download = msg.filename;
|
||||
document.body.appendChild(a);
|
||||
a.click();
|
||||
setTimeout(() => { URL.revokeObjectURL(url); a.remove(); }, 100);
|
||||
return;
|
||||
}
|
||||
if (msg.type === 'active_session') {
|
||||
updateActiveSessionBar(msg.sessionKey);
|
||||
loadSessions(); // Tabelle neu rendern
|
||||
@@ -681,21 +1002,39 @@
|
||||
}
|
||||
}
|
||||
|
||||
function sendDiagAttachments() {
|
||||
// Alle pending Dateien an RVS senden
|
||||
for (const f of diagPendingFiles) {
|
||||
send({ action: 'send_file', name: f.name, type: f.type, size: f.size, base64: f.base64 });
|
||||
}
|
||||
if (diagPendingFiles.length > 0) {
|
||||
addChat('sent', `${diagPendingFiles.length} Anhang/Anhaenge`, 'Datei');
|
||||
}
|
||||
diagPendingFiles = [];
|
||||
renderDiagPending();
|
||||
}
|
||||
|
||||
function testGateway() {
|
||||
const input = document.getElementById('chat-input');
|
||||
const text = input.value.trim();
|
||||
if (!text) return;
|
||||
addChat('sent', text, 'Gateway direkt');
|
||||
send({ action: 'test_gateway', text });
|
||||
if (!text && diagPendingFiles.length === 0) return;
|
||||
if (diagPendingFiles.length > 0) sendDiagAttachments();
|
||||
if (text) {
|
||||
addChat('sent', text, 'Gateway direkt');
|
||||
send({ action: 'test_gateway', text });
|
||||
}
|
||||
input.value = '';
|
||||
}
|
||||
|
||||
function testRVS() {
|
||||
const input = document.getElementById('chat-input');
|
||||
const text = input.value.trim();
|
||||
if (!text) return;
|
||||
addChat('sent', text, 'via RVS');
|
||||
send({ action: 'test_rvs', text });
|
||||
if (!text && diagPendingFiles.length === 0) return;
|
||||
if (diagPendingFiles.length > 0) sendDiagAttachments();
|
||||
if (text) {
|
||||
addChat('sent', text, 'via RVS');
|
||||
send({ action: 'test_rvs', text });
|
||||
}
|
||||
input.value = '';
|
||||
}
|
||||
|
||||
@@ -886,14 +1225,55 @@
|
||||
});
|
||||
}
|
||||
|
||||
function addChat(type, text, meta) {
|
||||
// Debug-Toggle: TTS-aufbereitete Variante unter ARIA-Nachrichten einblenden
|
||||
let showTtsDebug = localStorage.getItem('aria-show-tts-debug') === '1';
|
||||
function toggleTtsDebug() {
|
||||
showTtsDebug = !showTtsDebug;
|
||||
localStorage.setItem('aria-show-tts-debug', showTtsDebug ? '1' : '0');
|
||||
const el = document.getElementById('tts-debug-toggle');
|
||||
if (el) el.checked = showTtsDebug;
|
||||
}
|
||||
|
||||
// Minimal-JS-Port von clean_text_for_tts() (Bridge) — reine Anzeige
|
||||
function previewTtsText(text) {
|
||||
if (!text) return '';
|
||||
// <voice>...</voice>
|
||||
const vm = text.match(/<voice>([\s\S]*?)<\/voice>/i);
|
||||
if (vm) text = vm[1];
|
||||
let t = text;
|
||||
t = t.replace(/```[\s\S]*?```/g, '. ');
|
||||
t = t.replace(/`[^`]+`/g, '');
|
||||
t = t.replace(/\*\*([^*]+)\*\*/g, '$1');
|
||||
t = t.replace(/\*([^*]+)\*/g, '$1');
|
||||
t = t.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1');
|
||||
t = t.replace(/https?:\/\/\S+/g, 'ein Link');
|
||||
t = t.replace(/^#{1,6}\s*/gm, '');
|
||||
t = t.replace(/^>\s*/gm, '');
|
||||
t = t.replace(/^[\-\*]\s+/gm, '');
|
||||
t = t.replace(/(\d+)GB\b/g, '$1 Gigabyte');
|
||||
t = t.replace(/(\d+)MB\b/g, '$1 Megabyte');
|
||||
t = t.replace(/%/g, ' Prozent');
|
||||
t = t.replace(/\bCPU\b/g, 'C P U').replace(/\bAPI\b/g, 'A P I').replace(/\bRAM\b/g, 'R A M');
|
||||
t = t.replace(/\n{2,}/g, '. ').replace(/\n/g, ', ').replace(/\s{2,}/g, ' ');
|
||||
return t.trim();
|
||||
}
|
||||
|
||||
function addChat(type, text, meta, options) {
|
||||
const escaped = escapeHtml(text);
|
||||
let linked = linkifyText(escaped);
|
||||
// /shared/uploads/ Pfade als Inline-Bilder anzeigen
|
||||
linked = linked.replace(/\/shared\/uploads\/[^\s<"]+\.(jpg|jpeg|png|gif)/gi, (match) => {
|
||||
return `<a href="${match}" target="_blank">${match}</a><img src="${match}" class="chat-media" onclick="openLightbox('image','${match}')" onerror="this.style.display='none'">`;
|
||||
});
|
||||
const html = `${linked}<div class="meta">${escapeHtml(meta)} — ${new Date().toLocaleTimeString('de-DE')}</div>`;
|
||||
// Optional: TTS-Variante als zusaetzliches Block unter der Nachricht
|
||||
let ttsBlock = '';
|
||||
if (showTtsDebug && type === 'received') {
|
||||
const ttsText = (options && options.ttsText) || previewTtsText(text);
|
||||
if (ttsText && ttsText !== text) {
|
||||
ttsBlock = `<div style="margin-top:6px;padding:4px 8px;background:rgba(0,150,255,0.08);border-left:2px solid #0096FF;font-size:11px;color:#88AACC;"><span style="color:#0096FF;font-weight:bold;">TTS:</span> ${escapeHtml(ttsText)}</div>`;
|
||||
}
|
||||
}
|
||||
const html = `${linked}${ttsBlock}<div class="meta">${escapeHtml(meta)} — ${new Date().toLocaleTimeString('de-DE')}</div>`;
|
||||
|
||||
// Thinking-Indikator ausblenden bei neuer Nachricht
|
||||
updateThinkingIndicator({ activity: 'idle' });
|
||||
@@ -981,7 +1361,11 @@
|
||||
label = 'ARIA schreibt...';
|
||||
}
|
||||
|
||||
indicators.forEach(el => { if (el) el.style.display = 'block'; });
|
||||
indicators.forEach((el, i) => {
|
||||
if (!el) return;
|
||||
// Haupt-Indicator ist flex (Abbrechen-Button rechts), Vollbild-Variante block
|
||||
el.style.display = i === 0 ? 'flex' : 'block';
|
||||
});
|
||||
texts.forEach(el => { if (el) el.textContent = label; });
|
||||
|
||||
// Auto-Hide nach 2min (falls idle Event verpasst wird — ARIA arbeitet max 15min)
|
||||
@@ -991,6 +1375,319 @@
|
||||
}, 120000);
|
||||
}
|
||||
|
||||
// ── XTTS Panel ─────────────────────────────
|
||||
function renderVoiceList(voices) {
|
||||
const box = document.getElementById('xtts-voice-list');
|
||||
if (!box) return;
|
||||
if (!voices || voices.length === 0) {
|
||||
box.innerHTML = '<div style="color:#555570;font-size:11px;">Noch keine eigenen Stimmen vorhanden.</div>';
|
||||
return;
|
||||
}
|
||||
let html = '<div style="color:#8888AA;font-size:11px;margin-bottom:4px;">Geclonte Stimmen:</div>';
|
||||
html += '<div style="display:flex;flex-direction:column;gap:4px;">';
|
||||
for (const v of voices) {
|
||||
const esc = (s) => String(s).replace(/[&<>"']/g, c => ({ "&":"&", "<":"<", ">":">", '"':""", "'":"'" }[c]));
|
||||
html += `<div style="display:flex;align-items:center;gap:8px;background:#1E1E2E;border-radius:4px;padding:4px 8px;font-size:12px;">`
|
||||
+ `<span style="flex:1;color:#E0E0F0;">${esc(v.name)}</span>`
|
||||
+ `<span style="color:#555570;font-size:10px;">${(v.size/1024).toFixed(0)}KB</span>`
|
||||
+ `<button class="btn secondary" onclick="deleteXttsVoice('${esc(v.name).replace(/'/g, "\\'")}')" style="padding:2px 8px;font-size:10px;color:#FF6B6B;" title="Stimme loeschen">X</button>`
|
||||
+ `</div>`;
|
||||
}
|
||||
html += '</div>';
|
||||
box.innerHTML = html;
|
||||
}
|
||||
|
||||
function deleteXttsVoice(name) {
|
||||
if (!confirm(`Stimme "${name}" endgueltig loeschen?`)) return;
|
||||
send({ action: 'xtts_delete_voice', name });
|
||||
// Bei aktueller Auswahl: auf Default zuruecksetzen
|
||||
const sel = document.getElementById('diag-xtts-voice');
|
||||
if (sel.value === name) { sel.value = ''; sendVoiceConfig(); }
|
||||
}
|
||||
|
||||
// Legacy no-op (XTTS ist jetzt die einzige Engine, kein Panel-Toggle noetig)
|
||||
function toggleXTTSPanel() {
|
||||
void 0;
|
||||
if (engine === 'xtts') loadXTTSVoices();
|
||||
}
|
||||
|
||||
function loadXTTSVoices() {
|
||||
send({ action: 'xtts_list_voices' });
|
||||
}
|
||||
|
||||
function arrayBufferToBase64(buffer) {
|
||||
const bytes = new Uint8Array(buffer);
|
||||
let binary = '';
|
||||
for (let i = 0; i < bytes.length; i += 8192) {
|
||||
binary += String.fromCharCode.apply(null, bytes.subarray(i, i + 8192));
|
||||
}
|
||||
return btoa(binary);
|
||||
}
|
||||
|
||||
async function uploadVoiceSamples() {
|
||||
const name = document.getElementById('xtts-clone-name').value.trim();
|
||||
const files = document.getElementById('xtts-clone-files').files;
|
||||
if (!name) { alert('Bitte einen Namen eingeben'); return; }
|
||||
if (!files || files.length === 0) { alert('Bitte Audio-Dateien auswaehlen'); return; }
|
||||
if (files.length > 10) { alert('Maximal 10 Dateien'); return; }
|
||||
|
||||
const status = document.getElementById('xtts-clone-status');
|
||||
status.textContent = `Lade ${files.length} Datei(en)...`;
|
||||
status.style.color = '#FFD60A';
|
||||
|
||||
try {
|
||||
const samples = [];
|
||||
for (let i = 0; i < files.length; i++) {
|
||||
status.textContent = `Lese Datei ${i + 1}/${files.length}: ${files[i].name}...`;
|
||||
const buffer = await files[i].arrayBuffer();
|
||||
const base64 = arrayBufferToBase64(buffer);
|
||||
samples.push({ base64, name: files[i].name, size: files[i].size });
|
||||
}
|
||||
|
||||
const totalSize = samples.reduce((s, f) => s + f.size, 0);
|
||||
status.textContent = `Sende ${samples.length} Sample(s) (${(totalSize / 1024).toFixed(0)}KB)...`;
|
||||
|
||||
send({ action: 'voice_upload', name, samples });
|
||||
|
||||
status.textContent = `Gesendet — warte auf Bestaetigung vom XTTS-Server...`;
|
||||
} catch (err) {
|
||||
status.textContent = `Fehler: ${err.message}`;
|
||||
status.style.color = '#FF3B30';
|
||||
}
|
||||
}
|
||||
|
||||
// ── Diagnostic Anhang-Handling ─────────────
|
||||
let diagPendingFiles = [];
|
||||
|
||||
function handleDiagFileSelect(files) {
|
||||
for (const file of files) {
|
||||
const reader = new FileReader();
|
||||
reader.onload = () => {
|
||||
const base64 = reader.result.split(',')[1];
|
||||
diagPendingFiles.push({ name: file.name, type: file.type, size: file.size, base64 });
|
||||
renderDiagPending();
|
||||
};
|
||||
reader.readAsDataURL(file);
|
||||
}
|
||||
}
|
||||
|
||||
function handleDiagPaste(event) {
|
||||
const items = event.clipboardData?.items;
|
||||
if (!items) return;
|
||||
for (const item of items) {
|
||||
if (item.kind === 'file') {
|
||||
event.preventDefault();
|
||||
const file = item.getAsFile();
|
||||
if (file) handleDiagFileSelect([file]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function renderDiagPending() {
|
||||
const container = document.getElementById('diag-pending-attachments');
|
||||
if (diagPendingFiles.length === 0) {
|
||||
container.style.display = 'none';
|
||||
return;
|
||||
}
|
||||
container.style.display = 'flex';
|
||||
container.innerHTML = diagPendingFiles.map((f, i) => {
|
||||
const isImage = f.type.startsWith('image/');
|
||||
const preview = isImage ? `<img src="data:${f.type};base64,${f.base64}" style="width:40px;height:40px;border-radius:4px;object-fit:cover;">` : `<span style="font-size:20px;">📄</span>`;
|
||||
return `<div style="position:relative;display:inline-block;">
|
||||
${preview}
|
||||
<span onclick="removeDiagPending(${i})" style="position:absolute;top:-4px;right:-4px;width:16px;height:16px;border-radius:8px;background:#FF3B30;color:#fff;font-size:10px;cursor:pointer;display:flex;align-items:center;justify-content:center;">X</span>
|
||||
</div>`;
|
||||
}).join('') + `<span style="color:#8888AA;font-size:11px;margin-left:4px;">${diagPendingFiles.length} Datei(en)</span>
|
||||
<span onclick="diagPendingFiles=[];renderDiagPending();" style="color:#FF3B30;font-size:11px;cursor:pointer;margin-left:8px;">Alle X</span>`;
|
||||
}
|
||||
|
||||
function removeDiagPending(idx) {
|
||||
diagPendingFiles.splice(idx, 1);
|
||||
renderDiagPending();
|
||||
}
|
||||
|
||||
// ── Abbrechen ──────────────────────────────
|
||||
function cancelRequest() {
|
||||
send({ action: 'cancel_request' });
|
||||
updateThinkingIndicator({ activity: 'idle' });
|
||||
addChat('error', 'Anfrage abgebrochen', 'system');
|
||||
}
|
||||
|
||||
// ── Stimmen-Config ──────────────────────────
|
||||
function sendVoiceConfig() {
|
||||
const ttsEnabled = document.getElementById('diag-tts-enabled').checked;
|
||||
const xttsVoice = document.getElementById('diag-xtts-voice').value;
|
||||
const whisperModel = document.getElementById('diag-whisper-model').value;
|
||||
send({ action: 'send_voice_config', ttsEnabled, xttsVoice, whisperModel });
|
||||
}
|
||||
|
||||
// ── Passwort-Feld Anzeigen/Verbergen ─────────────────────
|
||||
function toggleSecret(inputId, btn) {
|
||||
const el = document.getElementById(inputId);
|
||||
if (!el) return;
|
||||
if (el.type === 'password') {
|
||||
el.type = 'text';
|
||||
btn.innerHTML = '👀'; // 👀
|
||||
btn.title = 'Verbergen';
|
||||
} else {
|
||||
el.type = 'password';
|
||||
btn.innerHTML = '👁'; // 👁
|
||||
btn.title = 'Anzeigen';
|
||||
}
|
||||
}
|
||||
|
||||
// ── Runtime-Konfiguration ─────────────────────
|
||||
async function loadRuntimeConfig() {
|
||||
const statusEl = document.getElementById('rc-status');
|
||||
statusEl.textContent = 'Lade...';
|
||||
try {
|
||||
const resp = await fetch('/api/runtime-config');
|
||||
const cfg = await resp.json();
|
||||
document.getElementById('rc-rvs-host').value = cfg.RVS_HOST || '';
|
||||
document.getElementById('rc-rvs-port').value = cfg.RVS_PORT || '443';
|
||||
document.getElementById('rc-rvs-tls').value = String(cfg.RVS_TLS) === 'false' ? 'false' : 'true';
|
||||
document.getElementById('rc-rvs-token').value = cfg.RVS_TOKEN || '';
|
||||
document.getElementById('rc-auth-token').value = cfg.ARIA_AUTH_TOKEN || '';
|
||||
statusEl.textContent = 'Geladen.';
|
||||
statusEl.style.color = '#34C759';
|
||||
loadOnboardingQR(); // QR bei Config-Wechsel neu generieren
|
||||
} catch (e) {
|
||||
statusEl.textContent = 'Fehler: ' + e.message;
|
||||
statusEl.style.color = '#FF6B6B';
|
||||
}
|
||||
}
|
||||
|
||||
async function saveRuntimeConfig() {
|
||||
const statusEl = document.getElementById('rc-status');
|
||||
statusEl.textContent = 'Speichere...';
|
||||
const patch = {
|
||||
RVS_HOST: document.getElementById('rc-rvs-host').value.trim(),
|
||||
RVS_PORT: document.getElementById('rc-rvs-port').value.trim(),
|
||||
RVS_TLS: document.getElementById('rc-rvs-tls').value,
|
||||
RVS_TOKEN: document.getElementById('rc-rvs-token').value.trim(),
|
||||
ARIA_AUTH_TOKEN: document.getElementById('rc-auth-token').value.trim(),
|
||||
};
|
||||
try {
|
||||
const resp = await fetch('/api/runtime-config', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(patch),
|
||||
});
|
||||
const data = await resp.json();
|
||||
if (data.ok) {
|
||||
statusEl.textContent = 'Gespeichert — Bridge-Container fuer Uebernahme neu starten.';
|
||||
statusEl.style.color = '#FFD60A';
|
||||
loadOnboardingQR(); // QR mit neuem Token
|
||||
} else {
|
||||
throw new Error(data.error || 'Unbekannt');
|
||||
}
|
||||
} catch (e) {
|
||||
statusEl.textContent = 'Fehler: ' + e.message;
|
||||
statusEl.style.color = '#FF6B6B';
|
||||
}
|
||||
}
|
||||
|
||||
// ── App-Onboarding QR-Code ────────────────────
|
||||
let qrLibReady = false;
|
||||
function ensureQRLib() {
|
||||
return new Promise((resolve) => {
|
||||
if (qrLibReady || window.qrcode) { qrLibReady = true; resolve(); return; }
|
||||
const s = document.createElement('script');
|
||||
s.src = 'https://cdn.jsdelivr.net/npm/qrcode-generator@1.4.4/qrcode.min.js';
|
||||
s.onload = () => { qrLibReady = true; resolve(); };
|
||||
s.onerror = () => resolve(); // silent fail
|
||||
document.head.appendChild(s);
|
||||
});
|
||||
}
|
||||
|
||||
async function loadOnboardingQR() {
|
||||
const box = document.getElementById('onboarding-qr');
|
||||
box.textContent = 'Lade...';
|
||||
try {
|
||||
await ensureQRLib();
|
||||
if (!window.qrcode) throw new Error('QR-Library nicht geladen');
|
||||
const resp = await fetch('/api/onboarding');
|
||||
const cfg = await resp.json();
|
||||
if (!cfg.rvsHost || !cfg.rvsToken) {
|
||||
box.innerHTML = '<div style="color:#FF6B6B;">RVS nicht konfiguriert (ENV Variablen fehlen)</div>';
|
||||
return;
|
||||
}
|
||||
// Format kompatibel mit android/src/components/QRScanner.tsx parseQRData()
|
||||
const payload = JSON.stringify({
|
||||
host: cfg.rvsHost,
|
||||
port: Number(cfg.rvsPort) || 443,
|
||||
tls: cfg.rvsTLS !== false,
|
||||
token: cfg.rvsToken,
|
||||
});
|
||||
const qr = window.qrcode(0, 'M');
|
||||
qr.addData(payload);
|
||||
qr.make();
|
||||
// Als SVG rendern — skaliert sauber auf Container-Groesse
|
||||
box.innerHTML = qr.createSvgTag({ cellSize: 4, margin: 2, scalable: true });
|
||||
const svg = box.querySelector('svg');
|
||||
if (svg) {
|
||||
svg.style.cssText = 'width:100%;height:100%;background:#fff;border-radius:4px;padding:6px;box-sizing:border-box;display:block;';
|
||||
}
|
||||
} catch (e) {
|
||||
box.innerHTML = `<div style="color:#FF6B6B;">Fehler: ${e.message}</div>`;
|
||||
}
|
||||
}
|
||||
|
||||
// ── Highlight-Trigger ────────────────────────
|
||||
function loadHighlightTriggers() {
|
||||
send({ action: 'get_triggers' });
|
||||
}
|
||||
function saveHighlightTriggers() {
|
||||
const text = document.getElementById('highlight-triggers').value;
|
||||
const triggers = text.split('\n').map(t => t.trim()).filter(t => t.length > 0);
|
||||
send({ action: 'save_triggers', triggers });
|
||||
document.getElementById('trigger-status').textContent = 'Gespeichert (' + triggers.length + ' Trigger)';
|
||||
document.getElementById('trigger-status').style.color = '#34C759';
|
||||
}
|
||||
// Beim Tab-Wechsel zu Einstellungen: Trigger laden
|
||||
const origSwitchMainTab = typeof switchMainTab === 'function' ? switchMainTab : null;
|
||||
|
||||
// ── Modus-Wechsel ────────────────────────────
|
||||
// Kanonische IDs (matchen bridge/modes.py canonical_id + android ModeSelector)
|
||||
const MODE_LABELS = { normal: 'Normal', nicht_stoeren: 'Nicht stoeren', fluester: 'Fluestern', hangar: 'Hangar', gaming: 'Gaming' };
|
||||
let currentMode = 'normal';
|
||||
|
||||
function updateModeUI(mode) {
|
||||
currentMode = mode;
|
||||
document.querySelectorAll('.mode-btn').forEach(btn => {
|
||||
btn.style.borderColor = btn.dataset.mode === mode ? '#0096FF' : 'transparent';
|
||||
});
|
||||
const label = MODE_LABELS[mode] || mode;
|
||||
document.getElementById('mode-status').textContent = `Aktueller Modus: ${label}`;
|
||||
}
|
||||
|
||||
function setMode(mode) {
|
||||
// Optimistic UI-Update — Bridge bestaetigt per Broadcast
|
||||
updateModeUI(mode);
|
||||
// Sauberer Weg: type=mode via RVS an Bridge — die broadcastet an alle Clients
|
||||
send({ action: 'set_mode', mode });
|
||||
}
|
||||
|
||||
// ── TTS Diagnose ─────────────────────────────
|
||||
function ttsLog(msg) {
|
||||
const el = document.getElementById('tts-log');
|
||||
const time = new Date().toLocaleTimeString('de-DE');
|
||||
el.innerHTML += `<div>[${time}] ${escapeHtml(msg)}</div>`;
|
||||
el.scrollTop = el.scrollHeight;
|
||||
}
|
||||
|
||||
function testTTS(voice) {
|
||||
const text = document.getElementById('tts-test-text').value.trim();
|
||||
if (!text) return;
|
||||
ttsLog(`Teste ${voice}: "${text}"...`);
|
||||
send({ action: 'test_tts', voice, text });
|
||||
}
|
||||
|
||||
function checkTTSStatus() {
|
||||
ttsLog('Pruefe TTS-Status...');
|
||||
send({ action: 'check_tts' });
|
||||
}
|
||||
|
||||
function openLightbox(mediaType, url) {
|
||||
const lb = document.getElementById('lightbox');
|
||||
if (mediaType === 'video') {
|
||||
@@ -1203,32 +1900,60 @@
|
||||
: '<div style="color:#555570;padding:8px;text-align:center;">Keine Sessions gefunden</div>';
|
||||
return;
|
||||
}
|
||||
let html = '<table style="width:100%;border-collapse:collapse;">';
|
||||
html += '<tr style="color:#8888AA;font-size:10px;text-align:left;border-bottom:1px solid #1E1E2E;">'
|
||||
|
||||
const active = data.sessions.filter(s => !s.archived);
|
||||
const archives = data.sessions.filter(s => s.archived);
|
||||
|
||||
const headerRow = '<tr style="color:#8888AA;font-size:10px;text-align:left;border-bottom:1px solid #1E1E2E;">'
|
||||
+ '<th style="padding:4px 6px;">Session</th>'
|
||||
+ '<th style="padding:4px 6px;">Msgs</th>'
|
||||
+ '<th style="padding:4px 6px;">Zuletzt</th>'
|
||||
+ '<th style="padding:4px 6px;"></th></tr>';
|
||||
for (const s of data.sessions) {
|
||||
|
||||
const rowFor = (s, opts) => {
|
||||
const date = s.modified ? new Date(s.modified * 1000).toLocaleString('de-DE', {day:'2-digit',month:'2-digit',hour:'2-digit',minute:'2-digit'}) : '?';
|
||||
const key = escapeHtml(s.sessionKey || s.path.split('/').pop());
|
||||
const orphanBadge = s.orphan ? ' <span style="background:#FF3B30;color:#fff;font-size:9px;padding:1px 4px;border-radius:3px;">verwaist</span>' : '';
|
||||
const archivedBadge = s.archived ? ' <span style="background:#555570;color:#fff;font-size:9px;padding:1px 4px;border-radius:3px;">archiv</span>' : '';
|
||||
const modelBadge = s.model ? `<div style="font-size:9px;color:#555570;">${escapeHtml(s.model)}</div>` : '';
|
||||
const isActive = (s.sessionKey === currentActiveSession);
|
||||
const keyColor = isActive ? '#34C759' : (s.orphan ? '#555570' : '#E0E0F0');
|
||||
const isActive = (s.sessionKey === currentActiveSession) && !s.archived;
|
||||
const keyColor = isActive ? '#34C759' : (s.archived || s.orphan ? '#8888AA' : '#E0E0F0');
|
||||
const activeBadge = isActive ? ' <span style="background:#34C759;color:#000;font-size:9px;padding:1px 4px;border-radius:3px;">aktiv</span>' : '';
|
||||
const rowBg = isActive ? 'background:rgba(52,199,89,0.08);' : '';
|
||||
html += `<tr style="border-bottom:1px solid #0D0D1A;cursor:pointer;${rowBg}" onmouseover="this.style.background='#1E1E2E'" onmouseout="this.style.background='${isActive ? 'rgba(52,199,89,0.08)' : ''}'">`
|
||||
const rowBg = isActive ? 'background:rgba(52,199,89,0.08);' : (s.archived ? 'background:rgba(136,136,170,0.04);' : '');
|
||||
|
||||
let actions = '';
|
||||
if (s.archived) {
|
||||
// Archive: nur Export + Loeschen (kein Aktivieren — wuerde aktive Session ueberschreiben)
|
||||
actions = `<button class="btn secondary" onclick="event.stopPropagation();deleteSession('${escapeHtml(s.path)}')" style="padding:2px 6px;font-size:10px;color:#FF6B6B;margin-right:2px;" title="Archiv endgueltig loeschen">X</button>`
|
||||
+ `<button class="btn secondary" onclick="event.stopPropagation();exportSession('${escapeHtml(s.path)}','${escapeHtml(s.sessionKey)}')" style="padding:2px 6px;font-size:10px;color:#8888AA;" title="Als Markdown exportieren">⬇</button>`;
|
||||
} else {
|
||||
actions = (isActive ? '' : `<button class="btn secondary" onclick="event.stopPropagation();activateSession('${escapeHtml(s.sessionKey)}')" style="padding:2px 6px;font-size:10px;color:#34C759;margin-right:2px;" title="Aktivieren">▶</button>`)
|
||||
+ `<button class="btn secondary" onclick="event.stopPropagation();deleteSession('${escapeHtml(s.path)}')" style="padding:2px 6px;font-size:10px;color:#FF6B6B;margin-right:2px;" title="Loeschen">X</button>`
|
||||
+ `<button class="btn secondary" onclick="event.stopPropagation();exportSession('${escapeHtml(s.path)}','${escapeHtml(s.sessionKey)}')" style="padding:2px 6px;font-size:10px;color:#8888AA;" title="Als Markdown exportieren">⬇</button>`;
|
||||
}
|
||||
|
||||
return `<tr style="border-bottom:1px solid #0D0D1A;cursor:pointer;${rowBg}" onmouseover="this.style.background='#1E1E2E'" onmouseout="this.style.background='${isActive ? 'rgba(52,199,89,0.08)' : (s.archived ? 'rgba(136,136,170,0.04)' : '')}'">`
|
||||
+ `<td style="padding:4px 6px;" onclick="viewSession('${escapeHtml(s.path)}')">`
|
||||
+ `<div style="color:${keyColor};">${key}${activeBadge}${orphanBadge}</div>${modelBadge}</td>`
|
||||
+ `<div style="color:${keyColor};">${key}${activeBadge}${orphanBadge}${archivedBadge}</div>${modelBadge}</td>`
|
||||
+ `<td style="padding:4px 6px;color:#8888AA;">${s.lines}</td>`
|
||||
+ `<td style="padding:4px 6px;color:#8888AA;font-size:10px;">${date}</td>`
|
||||
+ `<td style="padding:4px 6px;white-space:nowrap;">`
|
||||
+ (isActive ? '' : `<button class="btn secondary" onclick="event.stopPropagation();activateSession('${escapeHtml(s.sessionKey)}')" style="padding:2px 6px;font-size:10px;color:#34C759;margin-right:2px;" title="Aktivieren">▶</button>`)
|
||||
+ `<button class="btn secondary" onclick="event.stopPropagation();deleteSession('${escapeHtml(s.path)}')" style="padding:2px 6px;font-size:10px;color:#FF6B6B;" title="Loeschen">X</button>`
|
||||
+ `</td></tr>`;
|
||||
}
|
||||
+ `<td style="padding:4px 6px;white-space:nowrap;">${actions}</td></tr>`;
|
||||
};
|
||||
|
||||
let html = '<table style="width:100%;border-collapse:collapse;">' + headerRow;
|
||||
for (const s of active) html += rowFor(s);
|
||||
html += '</table>';
|
||||
|
||||
if (archives.length > 0) {
|
||||
html += `<details style="margin-top:12px;" ${archives.length <= 5 ? 'open' : ''}>`
|
||||
+ `<summary style="color:#8888AA;font-size:11px;cursor:pointer;padding:4px 0;">`
|
||||
+ `Archivierte Versionen (${archives.length}) — von OpenClaw beim Session-Reset gesichert`
|
||||
+ `</summary>`
|
||||
+ `<table style="width:100%;border-collapse:collapse;margin-top:6px;">` + headerRow;
|
||||
for (const s of archives) html += rowFor(s);
|
||||
html += '</table></details>';
|
||||
}
|
||||
|
||||
container.innerHTML = html;
|
||||
}
|
||||
|
||||
@@ -1289,6 +2014,10 @@
|
||||
send({ action: 'delete_session', sessionPath: path });
|
||||
}
|
||||
|
||||
function exportSession(path, sessionKey) {
|
||||
send({ action: 'export_session', sessionPath: path, sessionKey });
|
||||
}
|
||||
|
||||
function activateSession(sessionKey) {
|
||||
send({ action: 'set_active_session', sessionKey });
|
||||
}
|
||||
@@ -1389,6 +2118,13 @@
|
||||
document.querySelectorAll('.main-nav-btn').forEach(b => {
|
||||
if (b.textContent.trim().toLowerCase().includes(tab === 'main' ? 'main' : 'einstellung')) b.classList.add('active');
|
||||
});
|
||||
// Einstellungen: Config + Trigger + QR laden
|
||||
if (tab === 'settings') {
|
||||
loadHighlightTriggers();
|
||||
send({ action: 'get_voice_config' });
|
||||
loadRuntimeConfig();
|
||||
loadOnboardingQR();
|
||||
}
|
||||
}
|
||||
|
||||
// ── Einstellungen: Tool-Berechtigungen ──────────────────
|
||||
@@ -1415,6 +2151,10 @@
|
||||
send({ action: 'get_openclaw_config' });
|
||||
}
|
||||
|
||||
// Toggle-Checkbox initial korrekt setzen
|
||||
const ttsToggleEl = document.getElementById('tts-debug-toggle');
|
||||
if (ttsToggleEl) ttsToggleEl.checked = showTtsDebug;
|
||||
|
||||
connectWS();
|
||||
</script>
|
||||
</body>
|
||||
|
||||
+509
-69
@@ -37,15 +37,76 @@ const state = {
|
||||
};
|
||||
const SESSION_KEY_FILE = "/data/active-session";
|
||||
// /data Verzeichnis sicherstellen (Volume Mount)
|
||||
try { fs.mkdirSync("/data", { recursive: true }); } catch {}
|
||||
try { fs.mkdirSync("/data", { recursive: true }); } catch (e) {
|
||||
console.error(`[startup] /data mkdir fehlgeschlagen: ${e.message}`);
|
||||
}
|
||||
// sessionFromFile zeigt an, ob der aktive Key aus der Datei kam.
|
||||
// Wenn true, darf resolveActiveSession NICHT mehr auto-picken (Wahl respektieren).
|
||||
let sessionFromFile = false;
|
||||
let activeSessionKey = (() => {
|
||||
try {
|
||||
const saved = fs.readFileSync(SESSION_KEY_FILE, "utf-8").trim();
|
||||
if (saved) { console.log(`[startup] Gespeicherte Session geladen: '${saved}'`); return saved; }
|
||||
} catch {}
|
||||
if (saved) {
|
||||
console.log(`[startup] Gespeicherte Session geladen: '${saved}'`);
|
||||
sessionFromFile = true;
|
||||
return saved;
|
||||
}
|
||||
} catch (e) {
|
||||
console.error(`[startup] SESSION_KEY_FILE read: ${e.code || e.message}`);
|
||||
}
|
||||
console.log("[startup] Keine gespeicherte Session — Fallback 'main'");
|
||||
return "main";
|
||||
})();
|
||||
|
||||
// ── Runtime-Config: /shared/config/runtime.json ─────────────
|
||||
// ENV-Werte sind Defaults; Werte aus runtime.json haben Vorrang.
|
||||
// Bridge und ggf. andere Komponenten lesen dieselbe Datei.
|
||||
const RUNTIME_CONFIG_FILE = "/shared/config/runtime.json";
|
||||
const RUNTIME_CONFIG_FIELDS = [
|
||||
"RVS_HOST", "RVS_PORT", "RVS_TLS", "RVS_TOKEN",
|
||||
"ARIA_AUTH_TOKEN", "WHISPER_MODEL", "WHISPER_LANGUAGE",
|
||||
];
|
||||
function readRuntimeConfig() {
|
||||
const envDefaults = {
|
||||
RVS_HOST, RVS_PORT, RVS_TLS, RVS_TOKEN,
|
||||
ARIA_AUTH_TOKEN: process.env.ARIA_AUTH_TOKEN || "",
|
||||
WHISPER_MODEL: process.env.WHISPER_MODEL || "medium",
|
||||
WHISPER_LANGUAGE: process.env.WHISPER_LANGUAGE || "de",
|
||||
};
|
||||
try {
|
||||
const raw = fs.readFileSync(RUNTIME_CONFIG_FILE, "utf-8");
|
||||
const parsed = JSON.parse(raw);
|
||||
return { ...envDefaults, ...parsed };
|
||||
} catch {
|
||||
return envDefaults;
|
||||
}
|
||||
}
|
||||
function writeRuntimeConfig(patch) {
|
||||
let current = {};
|
||||
try { current = JSON.parse(fs.readFileSync(RUNTIME_CONFIG_FILE, "utf-8")); } catch {}
|
||||
for (const key of Object.keys(patch)) {
|
||||
if (RUNTIME_CONFIG_FIELDS.includes(key)) current[key] = patch[key];
|
||||
}
|
||||
fs.mkdirSync("/shared/config", { recursive: true });
|
||||
const tmp = RUNTIME_CONFIG_FILE + ".tmp";
|
||||
fs.writeFileSync(tmp, JSON.stringify(current, null, 2));
|
||||
fs.renameSync(tmp, RUNTIME_CONFIG_FILE);
|
||||
}
|
||||
|
||||
// Atomic write: temp-file + rename, laute Logs bei Fehler.
|
||||
function persistActiveSession(key) {
|
||||
try {
|
||||
const tmp = SESSION_KEY_FILE + ".tmp";
|
||||
fs.writeFileSync(tmp, key);
|
||||
fs.renameSync(tmp, SESSION_KEY_FILE);
|
||||
sessionFromFile = true;
|
||||
console.log(`[session] Aktive Session persistiert: '${key}'`);
|
||||
return true;
|
||||
} catch (e) {
|
||||
console.error(`[session] FEHLER beim Persistieren von '${key}': ${e.message}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
const logs = [];
|
||||
let gatewayWs = null;
|
||||
let rvsWs = null;
|
||||
@@ -56,6 +117,12 @@ const browserClients = new Set();
|
||||
let pipelineActive = false;
|
||||
let pipelineStartTime = 0;
|
||||
|
||||
// Nach chat:final kommen oft noch Trailing Agent-Events. Waehrend dieses
|
||||
// Fensters unterdruecken wir agent_activity-Broadcasts, damit der
|
||||
// Thinking-Indicator nicht wieder anspringt.
|
||||
let lastChatFinalAt = 0;
|
||||
const SETTLED_WINDOW_MS = 3000;
|
||||
|
||||
function plog(message, level) {
|
||||
const elapsed = pipelineActive ? `+${Date.now() - pipelineStartTime}ms` : "";
|
||||
const entry = { ts: new Date().toISOString(), level: level || "info", source: "pipeline", message: `${elapsed ? `[${elapsed}] ` : ""}${message}` };
|
||||
@@ -91,6 +158,9 @@ function pipelineEnd(ok, detail) {
|
||||
}
|
||||
plog(`━━━ Pipeline Ende ━━━`);
|
||||
pipelineActive = false;
|
||||
// Thinking-Indikator IMMER zuruecksetzen — auch bei Timeout/Fehler/Abbruch
|
||||
broadcast({ type: "agent_activity", activity: "idle" });
|
||||
pendingMessageTime = 0;
|
||||
}
|
||||
|
||||
// ── Auto-Restart bei Netzwerk-Namespace-Verlust ──────
|
||||
@@ -257,8 +327,10 @@ async function connectGateway() {
|
||||
state.gateway.handshakeOk = false;
|
||||
gatewayWs = null;
|
||||
broadcastState();
|
||||
// Stuck "ARIA denkt..." vermeiden, falls Gateway waehrend Pipeline abkackt
|
||||
if (pipelineActive) pipelineEnd(false, `Gateway-Verbindung verloren (${code})`);
|
||||
else broadcast({ type: "agent_activity", activity: "idle" });
|
||||
checkGatewayHealth();
|
||||
// Auto-Reconnect nach 5s
|
||||
setTimeout(connectGateway, 5000);
|
||||
});
|
||||
|
||||
@@ -325,17 +397,23 @@ function handleGatewayMessage(msg) {
|
||||
broadcast({ type: "chat_delta", delta, payload });
|
||||
}
|
||||
|
||||
// Nach chat:final trickeln noch Aufraeum-Events rein — unterdruecken,
|
||||
// damit der Thinking-Indicator nicht wieder anspringt.
|
||||
const settled = lastChatFinalAt && (Date.now() - lastChatFinalAt) < SETTLED_WINDOW_MS;
|
||||
|
||||
// Tool-Nutzung erkennen und broadcasten
|
||||
if (stream === "tool_use" || data.type === "tool_use") {
|
||||
const toolName = data.name || data.tool || payload.tool || "";
|
||||
if (toolName) {
|
||||
if (toolName && !settled) {
|
||||
broadcast({ type: "agent_activity", activity: "tool", tool: toolName, data });
|
||||
log("info", "gateway", `Tool: ${toolName}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Genereller Activity-Heartbeat (ARIA denkt)
|
||||
broadcast({ type: "agent_activity", activity: stream || "thinking" });
|
||||
if (!settled) {
|
||||
broadcast({ type: "agent_activity", activity: stream || "thinking" });
|
||||
}
|
||||
updateAgentActivity();
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -348,10 +426,31 @@ function handleGatewayMessage(msg) {
|
||||
const runId = payload.runId || "";
|
||||
if (runId && seenFinalRuns.has(runId)) return; // Duplikat
|
||||
if (runId) { seenFinalRuns.add(runId); setTimeout(() => seenFinalRuns.delete(runId), 60000); }
|
||||
|
||||
// NO_REPLY → ARIA signalisiert "nicht antworten", Pipeline beenden aber nichts zeigen
|
||||
const trimmed = (text || "").trim().replace(/^["'`*.\s]+|["'`*.\s]+$/g, "").toUpperCase();
|
||||
if (trimmed === "NO_REPLY" || trimmed.startsWith("NO_REPLY")) {
|
||||
log("info", "gateway", "NO_REPLY empfangen — still verworfen");
|
||||
lastChatFinalAt = Date.now();
|
||||
if (pipelineActive) pipelineEnd(true, "NO_REPLY (stumm)");
|
||||
broadcast({ type: "agent_activity", activity: "idle" });
|
||||
pendingMessageTime = 0;
|
||||
updateAgentActivity();
|
||||
return;
|
||||
}
|
||||
|
||||
log("info", "gateway", `ANTWORT: "${text.slice(0, 200)}"`);
|
||||
lastChatFinalAt = Date.now();
|
||||
if (pipelineActive) pipelineEnd(true, `"${text.slice(0, 120)}"`);
|
||||
broadcast({ type: "chat_final", text, payload });
|
||||
broadcast({ type: "agent_activity", activity: "idle" });
|
||||
pendingMessageTime = 0; // Watchdog: Antwort erhalten
|
||||
updateAgentActivity();
|
||||
// Antwort in Backup-Log schreiben
|
||||
try {
|
||||
const entry = JSON.stringify({ ts: Date.now(), role: "assistant", text: text.slice(0, 2000), session: activeSessionKey }) + "\n";
|
||||
fs.appendFileSync("/shared/config/chat_backup.jsonl", entry);
|
||||
} catch {}
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -364,6 +463,7 @@ function handleGatewayMessage(msg) {
|
||||
const error = payload.error || text || "Unbekannt";
|
||||
log("error", "gateway", `Chat-Fehler: ${error}`);
|
||||
if (pipelineActive) pipelineEnd(false, error);
|
||||
else broadcast({ type: "agent_activity", activity: "idle" });
|
||||
broadcast({ type: "chat_error", error, payload });
|
||||
return;
|
||||
}
|
||||
@@ -384,7 +484,9 @@ function handleGatewayMessage(msg) {
|
||||
if (runId) { seenFinalRuns.add(runId); setTimeout(() => seenFinalRuns.delete(runId), 60000); }
|
||||
const text = extractChatText(payload) || payload.text || "";
|
||||
log("info", "gateway", `ANTWORT: "${text.slice(0, 200)}"`);
|
||||
lastChatFinalAt = Date.now();
|
||||
if (pipelineActive) pipelineEnd(true, `"${text.slice(0, 120)}"`);
|
||||
else broadcast({ type: "agent_activity", activity: "idle" });
|
||||
broadcast({ type: "chat_final", text, payload });
|
||||
return;
|
||||
}
|
||||
@@ -392,6 +494,7 @@ function handleGatewayMessage(msg) {
|
||||
const error = payload.error || payload.message || "Unbekannt";
|
||||
log("error", "gateway", `Chat-Fehler: ${error}`);
|
||||
if (pipelineActive) pipelineEnd(false, error);
|
||||
else broadcast({ type: "agent_activity", activity: "idle" });
|
||||
broadcast({ type: "chat_error", error, payload });
|
||||
return;
|
||||
}
|
||||
@@ -424,6 +527,13 @@ function sendToGateway(text, isPipeline) {
|
||||
const payload = JSON.stringify(msg);
|
||||
log("debug", "gateway", `RAW >>> ${payload}`);
|
||||
gatewayWs.send(payload);
|
||||
pendingMessageTime = Date.now(); // Watchdog: Nachricht gesendet
|
||||
// Nachricht sofort in Backup-Log schreiben (OpenClaw speichert erst nach Run-Ende)
|
||||
try {
|
||||
fs.mkdirSync("/shared/config", { recursive: true });
|
||||
const entry = JSON.stringify({ ts: Date.now(), role: "user", text, session: activeSessionKey }) + "\n";
|
||||
fs.appendFileSync("/shared/config/chat_backup.jsonl", entry);
|
||||
} catch {}
|
||||
log("info", "gateway", `chat.send [${reqId}]: "${text}"`);
|
||||
if (isPipeline) plog(`chat.send [${reqId}] an Gateway gesendet — warte auf ACK...`);
|
||||
|
||||
@@ -512,6 +622,10 @@ function connectRVS(forcePlain) {
|
||||
}});
|
||||
} else if (msg.type === "heartbeat") {
|
||||
// ignorieren
|
||||
} else if (msg.type === "mode") {
|
||||
// Mode-Broadcast von der Bridge → an Browser-Clients weiterreichen
|
||||
log("info", "rvs", `Mode-Broadcast: ${msg.payload?.mode} (${msg.payload?.name})`);
|
||||
broadcast({ type: "mode", payload: msg.payload });
|
||||
} else {
|
||||
log("debug", "rvs", `Nachricht: ${JSON.stringify(msg).slice(0, 150)}`);
|
||||
}
|
||||
@@ -545,55 +659,60 @@ function connectRVS(forcePlain) {
|
||||
});
|
||||
}
|
||||
|
||||
function sendToRVS(text, isPipeline) {
|
||||
if (!RVS_HOST || !RVS_TOKEN) {
|
||||
log("error", "rvs", "Nicht konfiguriert");
|
||||
if (isPipeline) pipelineEnd(false, "RVS nicht konfiguriert");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Frische WebSocket-Verbindung fuer jede Nachricht (Zombie-Schutz)
|
||||
function sendToRVS_withResponse(sendType, sendPayload, expectType, clientWs) {
|
||||
if (!RVS_HOST || !RVS_TOKEN) return;
|
||||
const proto = RVS_TLS === "true" ? "wss" : "ws";
|
||||
const url = `${proto}://${RVS_HOST}:${RVS_PORT}?token=${RVS_TOKEN}`;
|
||||
const msg = JSON.stringify({
|
||||
const freshWs = new WebSocket(url);
|
||||
const timeout = setTimeout(() => {
|
||||
try { freshWs.close(); } catch (_) {}
|
||||
clientWs.send(JSON.stringify({ type: expectType, payload: { voices: [], error: "Timeout" }, timestamp: Date.now() }));
|
||||
}, 15000);
|
||||
freshWs.on("open", () => {
|
||||
freshWs.send(JSON.stringify({ type: sendType, payload: sendPayload, timestamp: Date.now() }));
|
||||
});
|
||||
freshWs.on("message", (raw) => {
|
||||
try {
|
||||
const resp = JSON.parse(raw.toString());
|
||||
if (resp.type === expectType) {
|
||||
clearTimeout(timeout);
|
||||
clientWs.send(JSON.stringify(resp));
|
||||
setTimeout(() => { try { freshWs.close(); } catch (_) {} }, 1000);
|
||||
}
|
||||
} catch {}
|
||||
});
|
||||
freshWs.on("error", () => {});
|
||||
}
|
||||
|
||||
function sendToRVS_raw(msgObj) {
|
||||
if (!RVS_HOST || !RVS_TOKEN) return;
|
||||
const proto = RVS_TLS === "true" ? "wss" : "ws";
|
||||
const url = `${proto}://${RVS_HOST}:${RVS_PORT}?token=${RVS_TOKEN}`;
|
||||
const freshWs = new WebSocket(url);
|
||||
freshWs.on("open", () => {
|
||||
freshWs.send(JSON.stringify(msgObj));
|
||||
setTimeout(() => { try { freshWs.close(); } catch (_) {} }, 5000);
|
||||
});
|
||||
freshWs.on("error", () => {});
|
||||
}
|
||||
|
||||
function sendToRVS(text, isPipeline) {
|
||||
// Ueber Gateway senden (zuverlaessig) UND an RVS fuer App-Sichtbarkeit
|
||||
// Die Bridge empfaengt RVS-Nachrichten von der App zuverlaessig,
|
||||
// aber die Diagnostic→RVS→Bridge Route hat Zombie-Probleme.
|
||||
// Deshalb: Gateway fuer ARIA, RVS nur fuer App-Anzeige.
|
||||
|
||||
// 1. An Gateway senden (damit ARIA antwortet)
|
||||
const gatewayOk = sendToGateway(text, isPipeline);
|
||||
|
||||
// 2. An RVS senden (damit die App die Nachricht sieht)
|
||||
sendToRVS_raw({
|
||||
type: "chat",
|
||||
payload: { text, sender: "diagnostic" },
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
|
||||
log("info", "rvs", `Sende via frische Verbindung: ${url.split('?')[0]}`);
|
||||
|
||||
const freshWs = new WebSocket(url);
|
||||
freshWs.on("open", () => {
|
||||
freshWs.send(msg);
|
||||
log("info", "rvs", `Gesendet via RVS: "${text}"`);
|
||||
// Verbindung offen lassen fuer Antwort-Empfang, nach 5min schliessen
|
||||
setTimeout(() => { try { freshWs.close(); } catch (_) {} }, 300000);
|
||||
});
|
||||
freshWs.on("message", (raw) => {
|
||||
try {
|
||||
const resp = JSON.parse(raw.toString());
|
||||
if (resp.type === "chat" && resp.payload) {
|
||||
const sender = resp.payload.sender || "?";
|
||||
// Eigene Nachrichten und STT ignorieren (werden von persistenter Verbindung gehandelt)
|
||||
if (sender === "diagnostic" || sender === "stt") return;
|
||||
log("info", "rvs", `Chat von ${sender}: "${(resp.payload.text || "").slice(0, 100)}"`);
|
||||
if (pipelineActive && sender !== "diagnostic") {
|
||||
pipelineEnd(true, `Antwort via RVS von ${sender}: "${(resp.payload.text || "").slice(0, 120)}"`);
|
||||
}
|
||||
broadcast({ type: "rvs_chat", msg: resp });
|
||||
} else if (resp.type !== "heartbeat") {
|
||||
log("debug", "rvs", `Nachricht: ${JSON.stringify(resp).slice(0, 150)}`);
|
||||
}
|
||||
} catch {}
|
||||
});
|
||||
freshWs.on("error", (err) => {
|
||||
log("error", "rvs", `Sende-Fehler: ${err.message}`);
|
||||
if (isPipeline) pipelineEnd(false, `RVS Fehler: ${err.message}`);
|
||||
});
|
||||
|
||||
if (isPipeline) plog(`Nachricht an RVS gesendet — warte auf Antwort via RVS...`);
|
||||
return true;
|
||||
return gatewayOk;
|
||||
}
|
||||
|
||||
// ── Claude Proxy Test ────────────────────────────────────
|
||||
@@ -1017,6 +1136,64 @@ function waitForMessage(ws, timeoutMs) {
|
||||
});
|
||||
}
|
||||
|
||||
// ── Watchdog: Stuck Run Erkennung ────────────────────────
|
||||
|
||||
let lastAgentActivity = Date.now();
|
||||
let watchdogWarned = false;
|
||||
let watchdogFixAttempted = false;
|
||||
let pendingMessageTime = 0; // Wann wurde die letzte Nachricht gesendet
|
||||
|
||||
function updateAgentActivity() {
|
||||
lastAgentActivity = Date.now();
|
||||
watchdogWarned = false;
|
||||
}
|
||||
|
||||
// Watchdog prüft alle 30s ob ARIA nach einer gesendeten Nachricht reagiert
|
||||
setInterval(async () => {
|
||||
if (pendingMessageTime === 0) return; // Keine Nachricht gesendet
|
||||
const waitingMs = Date.now() - pendingMessageTime;
|
||||
|
||||
// Nach 2min ohne Agent-Activity: Warnung
|
||||
if (waitingMs > 120000 && !watchdogWarned) {
|
||||
watchdogWarned = true;
|
||||
log("warn", "server", `Watchdog: Keine ARIA-Aktivitaet seit ${Math.round(waitingMs / 1000)}s — moeglicherweise stuck`);
|
||||
broadcast({ type: "watchdog", status: "warning", waitingMs, message: "ARIA reagiert nicht — moeglicherweise stuck Run" });
|
||||
}
|
||||
|
||||
// Nach 5min: doctor --fix
|
||||
if (waitingMs > 300000 && watchdogWarned && !watchdogFixAttempted) {
|
||||
watchdogFixAttempted = true;
|
||||
log("error", "server", "Watchdog: 5min ohne Antwort — fuehre openclaw doctor --fix aus");
|
||||
broadcast({ type: "watchdog", status: "fixing", message: "Auto-Fix: openclaw doctor --fix" });
|
||||
try {
|
||||
await dockerExec("aria-core", "openclaw doctor --fix 2>/dev/null || true");
|
||||
log("info", "server", "Watchdog: doctor --fix ausgefuehrt");
|
||||
broadcast({ type: "watchdog", status: "fixed", message: "doctor --fix ausgefuehrt — warte auf Antwort..." });
|
||||
} catch (err) {
|
||||
log("error", "server", `Watchdog: doctor --fix fehlgeschlagen: ${err.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Nach 8min: Container neustarten
|
||||
if (waitingMs > 480000 && watchdogFixAttempted) {
|
||||
log("error", "server", "Watchdog: 8min ohne Antwort — starte aria-core + aria-proxy neu");
|
||||
broadcast({ type: "watchdog", status: "restarting", message: "Container-Restart: aria-core + aria-proxy" });
|
||||
try {
|
||||
const { execSync } = require("child_process");
|
||||
execSync("docker restart aria-core aria-proxy", { timeout: 60000 });
|
||||
log("info", "server", "Watchdog: Container neugestartet");
|
||||
broadcast({ type: "watchdog", status: "restarted", message: "Container neugestartet — warte auf Gateway-Reconnect..." });
|
||||
// Gateway wird sich automatisch neu verbinden
|
||||
} catch (err) {
|
||||
log("error", "server", `Watchdog: Container-Restart fehlgeschlagen: ${err.message}`);
|
||||
broadcast({ type: "watchdog", status: "error", message: `Restart fehlgeschlagen: ${err.message}` });
|
||||
}
|
||||
pendingMessageTime = 0;
|
||||
watchdogWarned = false;
|
||||
watchdogFixAttempted = false;
|
||||
}
|
||||
}, 30000);
|
||||
|
||||
// ── HTTP Server + WebSocket fuer Browser ────────────────
|
||||
|
||||
const htmlPath = path.join(__dirname, "index.html");
|
||||
@@ -1031,6 +1208,45 @@ const server = http.createServer((req, res) => {
|
||||
} else if (req.url === "/api/session") {
|
||||
res.writeHead(200, { "Content-Type": "application/json" });
|
||||
res.end(JSON.stringify({ sessionKey: activeSessionKey }));
|
||||
} else if (req.url === "/api/runtime-config" && req.method === "GET") {
|
||||
// Zentrale Runtime-Config (ENV + Override aus /shared/config/runtime.json)
|
||||
res.writeHead(200, { "Content-Type": "application/json" });
|
||||
res.end(JSON.stringify(readRuntimeConfig()));
|
||||
} else if (req.url === "/api/runtime-config" && req.method === "POST") {
|
||||
let body = "";
|
||||
req.on("data", chunk => { body += chunk; if (body.length > 32768) req.destroy(); });
|
||||
req.on("end", () => {
|
||||
try {
|
||||
const patch = JSON.parse(body);
|
||||
writeRuntimeConfig(patch);
|
||||
res.writeHead(200, { "Content-Type": "application/json" });
|
||||
res.end(JSON.stringify({ ok: true, config: readRuntimeConfig() }));
|
||||
log("info", "server", `Runtime-Config aktualisiert: ${Object.keys(patch).join(", ")}`);
|
||||
} catch (err) {
|
||||
res.writeHead(400, { "Content-Type": "application/json" });
|
||||
res.end(JSON.stringify({ ok: false, error: err.message }));
|
||||
}
|
||||
});
|
||||
return;
|
||||
} else if (req.url === "/api/onboarding") {
|
||||
// RVS-Credentials fuer QR-Code App-Onboarding
|
||||
res.writeHead(200, { "Content-Type": "application/json" });
|
||||
res.end(JSON.stringify({
|
||||
rvsHost: RVS_HOST,
|
||||
rvsPort: RVS_PORT,
|
||||
rvsTLS: RVS_TLS === "true" || RVS_TLS === true,
|
||||
rvsToken: RVS_TOKEN,
|
||||
}));
|
||||
} else if (req.url === "/api/cancel" && req.method === "POST") {
|
||||
log("warn", "server", "HTTP /api/cancel — Cancel-Request (von Bridge)");
|
||||
pendingMessageTime = 0;
|
||||
watchdogWarned = false;
|
||||
watchdogFixAttempted = false;
|
||||
if (pipelineActive) pipelineEnd(false, "Vom Benutzer abgebrochen (App)");
|
||||
else broadcast({ type: "agent_activity", activity: "idle" });
|
||||
dockerExec("aria-core", "openclaw doctor --fix 2>/dev/null || true").catch(() => {});
|
||||
res.writeHead(200, { "Content-Type": "application/json" });
|
||||
res.end(JSON.stringify({ ok: true }));
|
||||
} else if (req.url.startsWith("/shared/")) {
|
||||
// Dateien aus Shared Volume ausliefern (Bilder, Uploads)
|
||||
const filePath = decodeURIComponent(req.url);
|
||||
@@ -1103,6 +1319,64 @@ wss.on("connection", (ws) => {
|
||||
if (ws._sshSock) ws._sshSock.write(msg.data);
|
||||
} else if (msg.action === "live_ssh_close") {
|
||||
if (ws._sshSock) { ws._sshSock.end(); ws._sshSock = null; }
|
||||
} else if (msg.action === "send_file") {
|
||||
// Datei von Diagnostic an Bridge via RVS senden
|
||||
sendToRVS_raw({
|
||||
type: "file",
|
||||
payload: { name: msg.name, type: msg.type, size: msg.size, base64: msg.base64 },
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
log("info", "server", `Datei gesendet: ${msg.name} (${msg.type})`);
|
||||
} else if (msg.action === "cancel_request") {
|
||||
// Laufende Anfrage abbrechen — doctor --fix beendet stuck runs
|
||||
log("warn", "server", "Anfrage abgebrochen — fuehre doctor --fix aus");
|
||||
pendingMessageTime = 0;
|
||||
watchdogWarned = false;
|
||||
watchdogFixAttempted = false;
|
||||
if (pipelineActive) pipelineEnd(false, "Vom Benutzer abgebrochen");
|
||||
broadcast({ type: "agent_activity", activity: "idle" });
|
||||
dockerExec("aria-core", "openclaw doctor --fix 2>/dev/null || true").catch(() => {});
|
||||
} else if (msg.action === "voice_upload") {
|
||||
// Voice-Samples an XTTS-Bridge via RVS weiterleiten, auf Bestätigung warten
|
||||
log("info", "server", `Voice-Upload '${msg.name}' (${(msg.samples || []).length} Samples) sende an RVS...`);
|
||||
sendToRVS_withResponse("voice_upload", { name: msg.name, samples: msg.samples }, "xtts_voice_saved", ws);
|
||||
} else if (msg.action === "xtts_list_voices") {
|
||||
// Frische Verbindung die auf Antwort wartet
|
||||
sendToRVS_withResponse("xtts_list_voices", {}, "xtts_voices_list", ws);
|
||||
} else if (msg.action === "xtts_delete_voice") {
|
||||
// Weiterleiten an XTTS-Bridge, die antwortet mit neuer Liste
|
||||
sendToRVS_raw({ type: "xtts_delete_voice", payload: { name: msg.name }, timestamp: Date.now() });
|
||||
log("info", "server", `Voice-Delete '${msg.name}' an XTTS-Bridge gesendet`);
|
||||
} else if (msg.action === "set_mode") {
|
||||
// Mode-Wechsel → Bridge bearbeitet und broadcastet an alle Clients
|
||||
sendToRVS_raw({ type: "mode", payload: { mode: msg.mode }, timestamp: Date.now() });
|
||||
log("info", "server", `Mode-Wechsel angefordert: ${msg.mode}`);
|
||||
} else if (msg.action === "get_voice_config") {
|
||||
handleGetVoiceConfig(ws);
|
||||
} else if (msg.action === "send_voice_config") {
|
||||
// Stimmen-Config persistent speichern + an Bridge via RVS senden
|
||||
let existing = {};
|
||||
try { existing = JSON.parse(fs.readFileSync("/shared/config/voice_config.json", "utf-8")); } catch {}
|
||||
const voiceConfig = {
|
||||
...existing,
|
||||
ttsEnabled: msg.ttsEnabled !== false,
|
||||
xttsVoice: msg.xttsVoice || "",
|
||||
};
|
||||
if (msg.whisperModel !== undefined) voiceConfig.whisperModel = msg.whisperModel;
|
||||
try {
|
||||
fs.mkdirSync("/shared/config", { recursive: true });
|
||||
fs.writeFileSync("/shared/config/voice_config.json", JSON.stringify(voiceConfig, null, 2));
|
||||
} catch {}
|
||||
sendToRVS_raw({ type: "config", payload: voiceConfig, timestamp: Date.now() });
|
||||
log("info", "server", `Voice-Config gespeichert: xttsVoice=${voiceConfig.xttsVoice || "default"}, whisper=${voiceConfig.whisperModel || "-"}`);
|
||||
} else if (msg.action === "get_triggers") {
|
||||
handleGetTriggers(ws);
|
||||
} else if (msg.action === "save_triggers") {
|
||||
handleSaveTriggers(ws, msg.triggers || []);
|
||||
} else if (msg.action === "test_tts") {
|
||||
handleTestTTS(ws, msg.text || "Test");
|
||||
} else if (msg.action === "check_tts") {
|
||||
handleCheckTTS(ws);
|
||||
} else if (msg.action === "check_desktop") {
|
||||
checkDesktopAvailable(ws);
|
||||
} else if (msg.action === "load_chat_history") {
|
||||
@@ -1111,6 +1385,8 @@ wss.on("connection", (ws) => {
|
||||
handleListSessions(ws);
|
||||
} else if (msg.action === "read_session") {
|
||||
handleReadSession(ws, msg.sessionPath);
|
||||
} else if (msg.action === "export_session") {
|
||||
handleExportSession(ws, msg.sessionPath, msg.sessionKey);
|
||||
} else if (msg.action === "delete_session") {
|
||||
handleDeleteSession(ws, msg.sessionPath);
|
||||
} else if (msg.action === "set_active_session") {
|
||||
@@ -1229,6 +1505,78 @@ function startLiveSSH(clientWs) {
|
||||
createReq.end(createBody);
|
||||
}
|
||||
|
||||
// ── Voice-Config laden ────────────────────────────────
|
||||
|
||||
function handleGetVoiceConfig(clientWs) {
|
||||
try {
|
||||
const configPath = "/shared/config/voice_config.json";
|
||||
if (fs.existsSync(configPath)) {
|
||||
const config = JSON.parse(fs.readFileSync(configPath, "utf-8"));
|
||||
clientWs.send(JSON.stringify({ type: "voice_config", ...config }));
|
||||
} else {
|
||||
clientWs.send(JSON.stringify({ type: "voice_config", ttsEnabled: true, xttsVoice: "" }));
|
||||
}
|
||||
} catch (err) {
|
||||
clientWs.send(JSON.stringify({ type: "voice_config", ttsEnabled: true, xttsVoice: "" }));
|
||||
}
|
||||
}
|
||||
|
||||
// ── Highlight-Trigger (legacy UI — wird nicht mehr ausgewertet seit Piper raus) ─
|
||||
const TRIGGERS_FILE = "/shared/config/highlight_triggers.json";
|
||||
|
||||
async function handleGetTriggers(clientWs) {
|
||||
try {
|
||||
const triggers = fs.existsSync(TRIGGERS_FILE)
|
||||
? JSON.parse(fs.readFileSync(TRIGGERS_FILE, "utf-8"))
|
||||
: [];
|
||||
clientWs.send(JSON.stringify({ type: "trigger_list", triggers }));
|
||||
} catch (err) {
|
||||
clientWs.send(JSON.stringify({ type: "trigger_list", triggers: [], error: err.message }));
|
||||
}
|
||||
}
|
||||
|
||||
async function handleSaveTriggers(clientWs, triggers) {
|
||||
try {
|
||||
fs.mkdirSync("/shared/config", { recursive: true });
|
||||
fs.writeFileSync(TRIGGERS_FILE, JSON.stringify(triggers, null, 2));
|
||||
log("info", "server", `${triggers.length} Highlight-Trigger gespeichert`);
|
||||
clientWs.send(JSON.stringify({ type: "trigger_list", triggers }));
|
||||
} catch (err) {
|
||||
log("error", "server", `Trigger speichern fehlgeschlagen: ${err.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
// ── TTS Diagnose (XTTS) ───────────────────────────────
|
||||
async function handleTestTTS(clientWs, text) {
|
||||
try {
|
||||
log("info", "server", `TTS-Test via XTTS: "${text}"`);
|
||||
// Via RVS an die XTTS-Bridge: xtts_request mit Test-Text
|
||||
const requestId = crypto.randomUUID();
|
||||
sendToRVS_raw({
|
||||
type: "xtts_request",
|
||||
payload: { text, language: "de", requestId, voice: "" },
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
clientWs.send(JSON.stringify({ type: "tts_result", ok: true, duration: "pending", size: "?" }));
|
||||
} catch (err) {
|
||||
clientWs.send(JSON.stringify({ type: "tts_result", ok: false, error: err.message }));
|
||||
}
|
||||
}
|
||||
|
||||
async function handleCheckTTS(clientWs) {
|
||||
try {
|
||||
// XTTS-Status ueber RVS abfragen (xtts_list_voices)
|
||||
sendToRVS_raw({ type: "xtts_list_voices", payload: {}, timestamp: Date.now() });
|
||||
clientWs.send(JSON.stringify({
|
||||
type: "tts_status",
|
||||
ok: true,
|
||||
error: null,
|
||||
}));
|
||||
} catch (err) {
|
||||
clientWs.send(JSON.stringify({ type: "tts_status", ok: false, error: err.message }));
|
||||
}
|
||||
}
|
||||
|
||||
function checkDesktopAvailable(clientWs) {
|
||||
// Pruefen ob VNC auf der VM laeuft (Port 5900/5901)
|
||||
const checkSock = net.connect({ host: "host.docker.internal", port: 5901 }, () => {
|
||||
@@ -1265,17 +1613,17 @@ async function handleListSessions(clientWs) {
|
||||
try {
|
||||
log("info", "server", "Lade Sessions aus aria-core...");
|
||||
|
||||
// sessions.json als Index lesen + Datei-Details holen
|
||||
// sessions.json als Index lesen + Datei-Details holen (inkl. .reset.* Archive)
|
||||
const raw = await dockerExec("aria-core", `
|
||||
cat ${SESSIONS_DIR}/sessions.json 2>/dev/null || echo '{}' &&
|
||||
echo '===FILE_DETAILS===' &&
|
||||
for f in ${SESSIONS_DIR}/*.jsonl; do
|
||||
for f in ${SESSIONS_DIR}/*.jsonl ${SESSIONS_DIR}/*.jsonl.reset.*; do
|
||||
[ -f "$f" ] || continue
|
||||
name=$(basename "$f")
|
||||
lines=$(wc -l < "$f" 2>/dev/null || echo 0)
|
||||
msgs=$(grep -cE '"role":"(user|assistant)"' "$f" 2>/dev/null || echo 0)
|
||||
size=$(du -h "$f" 2>/dev/null | cut -f1)
|
||||
modified=$(stat -c '%Y' "$f" 2>/dev/null || echo 0)
|
||||
echo "FILE:$name|LINES:$lines|SIZE:$size|MODIFIED:$modified"
|
||||
echo "FILE:$name|LINES:$msgs|SIZE:$size|MODIFIED:$modified"
|
||||
done
|
||||
`.trim());
|
||||
|
||||
@@ -1330,8 +1678,29 @@ async function handleListSessions(clientWs) {
|
||||
delete fileDetails[filename];
|
||||
}
|
||||
|
||||
// Dateien die nicht im Index stehen (Waisen / Reset-Files)
|
||||
// Dateien die nicht im Index stehen (Waisen ODER Reset-Archive)
|
||||
for (const [filename, details] of Object.entries(fileDetails)) {
|
||||
// .jsonl.reset.<ISO-Timestamp>Z → archivierte Session (OpenClaw-Reset)
|
||||
// Format: 528f4d70-...jsonl.reset.2026-04-18T09-49-44.814Z
|
||||
const resetMatch = filename.match(/^([a-f0-9-]+)\.jsonl\.reset\.(.+Z)$/);
|
||||
if (resetMatch) {
|
||||
const id = resetMatch[1];
|
||||
// Timestamp ISO-8601 parsen: 2026-04-18T09-49-44.814Z → 2026-04-18T09:49:44.814Z
|
||||
const tsStr = resetMatch[2].replace(/T(\d{2})-(\d{2})-(\d{2})/, "T$1:$2:$3");
|
||||
const resetAt = Math.floor(new Date(tsStr).getTime() / 1000) || parseInt(details.MODIFIED) || 0;
|
||||
sessions.push({
|
||||
path: `${SESSIONS_DIR}/${filename}`,
|
||||
sessionKey: id.slice(0, 8) + "… (archiv)",
|
||||
sessionId: id,
|
||||
lines: parseInt(details.LINES) || 0,
|
||||
size: details.SIZE || "?",
|
||||
modified: resetAt,
|
||||
archived: true,
|
||||
resetAt,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
// Echte Waisen (UUID.jsonl ohne Eintrag in sessions.json)
|
||||
const id = filename.replace(".jsonl", "");
|
||||
sessions.push({
|
||||
path: `${SESSIONS_DIR}/${filename}`,
|
||||
@@ -1376,6 +1745,68 @@ async function handleReadSession(clientWs, sessionPath) {
|
||||
}
|
||||
}
|
||||
|
||||
async function handleExportSession(clientWs, sessionPath, sessionKey) {
|
||||
if (!sessionPath || sessionPath.includes("..") || !sessionPath.startsWith(SESSIONS_DIR)) {
|
||||
clientWs.send(JSON.stringify({ type: "session_export", ok: false, error: "Ungueltiger Pfad" }));
|
||||
return;
|
||||
}
|
||||
try {
|
||||
const safePath = sessionPath.replace(/'/g, "");
|
||||
const raw = await dockerExec("aria-core", `cat '${safePath}'`);
|
||||
const lines = raw.split("\n").filter(l => l.trim());
|
||||
|
||||
const blocks = [];
|
||||
for (const line of lines) {
|
||||
let obj;
|
||||
try { obj = JSON.parse(line); } catch { continue; }
|
||||
if (obj.type !== "message" || !obj.message) continue;
|
||||
const role = obj.message.role;
|
||||
if (role !== "user" && role !== "assistant") continue;
|
||||
|
||||
let text = "";
|
||||
const content = obj.message.content;
|
||||
if (typeof content === "string") text = content;
|
||||
else if (Array.isArray(content)) text = content.filter(c => c.type === "text").map(c => c.text || "").join("\n");
|
||||
if (!text) continue;
|
||||
|
||||
if (role === "user") {
|
||||
text = text.replace(/^Sender \(untrusted metadata\):[\s\S]*?```[\s\S]*?```\s*\n*/m, "").trim();
|
||||
text = text.replace(/^\[.*?\]\s*/, "").trim();
|
||||
} else {
|
||||
text = text.replace(/^\[\[reply_to_\w+\]\]\s*/g, "").trim();
|
||||
}
|
||||
if (!text) continue;
|
||||
|
||||
const ts = obj.message.timestamp || obj.timestamp || 0;
|
||||
const when = ts ? new Date(ts).toISOString().replace("T", " ").slice(0, 19) : "";
|
||||
const heading = role === "user" ? "## 🧑 User" : "## 🤖 ARIA";
|
||||
blocks.push(`${heading}${when ? ` — ${when}` : ""}\n\n${text}`);
|
||||
}
|
||||
|
||||
const exportedAt = new Date().toISOString().replace("T", " ").slice(0, 19);
|
||||
const title = sessionKey || sessionPath.split("/").pop().replace(".jsonl", "");
|
||||
const markdown = [
|
||||
`# Session: ${title}`,
|
||||
``,
|
||||
`Exportiert: ${exportedAt} `,
|
||||
`Quelle: ${sessionPath}`,
|
||||
``,
|
||||
`---`,
|
||||
``,
|
||||
blocks.join("\n\n---\n\n"),
|
||||
``,
|
||||
].join("\n");
|
||||
|
||||
const safeKey = (sessionKey || "session").replace(/[^a-zA-Z0-9_-]/g, "_");
|
||||
const filename = `${exportedAt.slice(0, 10)}_${safeKey}.md`;
|
||||
clientWs.send(JSON.stringify({ type: "session_export", ok: true, filename, markdown }));
|
||||
log("info", "server", `Session exportiert: ${filename} (${blocks.length} Nachrichten)`);
|
||||
} catch (err) {
|
||||
log("error", "server", `Session-Export fehlgeschlagen: ${err.message}`);
|
||||
clientWs.send(JSON.stringify({ type: "session_export", ok: false, error: err.message }));
|
||||
}
|
||||
}
|
||||
|
||||
async function handleDeleteSession(clientWs, sessionPath) {
|
||||
if (!sessionPath || sessionPath.includes("..") || !sessionPath.startsWith(SESSIONS_DIR)) {
|
||||
clientWs.send(JSON.stringify({ type: "session_deleted", ok: false, error: "Ungueltiger Pfad" }));
|
||||
@@ -1416,13 +1847,11 @@ async function handleDeleteSession(clientWs, sessionPath) {
|
||||
}
|
||||
|
||||
// ── Session-Aufloesung: letzte aktive Session finden ────
|
||||
// Wird nach Gateway-(Re-)Connect aufgerufen. Darf die explizit gewaehlte
|
||||
// Session NIE ueberschreiben — nur beim absoluten Erststart auto-picken.
|
||||
async function resolveActiveSession() {
|
||||
// Nur bei Fallback-Key "main" automatisch aufloesen — gespeicherte Wahl respektieren
|
||||
const hasSavedSession = (() => {
|
||||
try { return !!fs.readFileSync(SESSION_KEY_FILE, "utf-8").trim(); } catch { return false; }
|
||||
})();
|
||||
if (hasSavedSession && activeSessionKey !== "main") {
|
||||
log("info", "server", `Gespeicherte Session '${activeSessionKey}' wird beibehalten`);
|
||||
if (sessionFromFile) {
|
||||
log("info", "server", `Session '${activeSessionKey}' aus /data — keine Auto-Wahl`);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1441,10 +1870,19 @@ async function resolveActiveSession() {
|
||||
const keys = entries.map(e => (e.key || e.sessionKey || e.name || "?").replace(/^agent:main:/, ""));
|
||||
log("info", "server", `Verfuegbare Sessions: [${keys.join(", ")}]`);
|
||||
|
||||
// Neueste Session nehmen
|
||||
// Neueste Session nehmen — aber user-definierte bevorzugen.
|
||||
// aria-bridge / aria-diagnostic werden von den Services auto-erstellt;
|
||||
// bei erstem Start soll lieber eine "echte" Session gewaehlt werden,
|
||||
// falls vorhanden.
|
||||
const AUTO_KEYS = new Set(["aria-bridge", "aria-diagnostic"]);
|
||||
const normalise = (e) => (e.key || e.sessionKey || e.name || "").replace(/^agent:main:/, "");
|
||||
|
||||
const userEntries = entries.filter(e => !AUTO_KEYS.has(normalise(e)));
|
||||
const pool = userEntries.length > 0 ? userEntries : entries;
|
||||
|
||||
let newest = null;
|
||||
let newestTime = 0;
|
||||
for (const entry of entries) {
|
||||
for (const entry of pool) {
|
||||
const t = entry.updatedAt || entry.createdAt || 0;
|
||||
if (t >= newestTime) {
|
||||
newestTime = t;
|
||||
@@ -1453,12 +1891,11 @@ async function resolveActiveSession() {
|
||||
}
|
||||
|
||||
if (newest) {
|
||||
const rawKey = newest.key || newest.sessionKey || newest.name || "";
|
||||
const key = rawKey.replace(/^agent:main:/, "");
|
||||
const key = normalise(newest);
|
||||
if (key) {
|
||||
activeSessionKey = key;
|
||||
try { fs.writeFileSync(SESSION_KEY_FILE, activeSessionKey); } catch {}
|
||||
log("info", "server", `Aktive Session auf neueste gewechselt: '${activeSessionKey}'`);
|
||||
persistActiveSession(activeSessionKey);
|
||||
log("info", "server", `Auto-Wahl Erststart: '${activeSessionKey}'`);
|
||||
for (const c of browserClients) {
|
||||
c.send(JSON.stringify({ type: "active_session", sessionKey: activeSessionKey }));
|
||||
}
|
||||
@@ -1547,8 +1984,11 @@ function handleSetActiveSession(clientWs, sessionKey) {
|
||||
return;
|
||||
}
|
||||
activeSessionKey = sessionKey;
|
||||
try { fs.writeFileSync(SESSION_KEY_FILE, activeSessionKey); } catch {}
|
||||
log("info", "server", `Aktive Session: ${activeSessionKey}`);
|
||||
const ok = persistActiveSession(activeSessionKey);
|
||||
log("info", "server", `Aktive Session: ${activeSessionKey}${ok ? "" : " (WARN: nicht persistiert!)"}`);
|
||||
if (!ok) {
|
||||
clientWs.send(JSON.stringify({ type: "active_session", ok: false, sessionKey: activeSessionKey, error: "Persistierung fehlgeschlagen — /data Volume pruefen" }));
|
||||
}
|
||||
// Allen Clients mitteilen
|
||||
for (const c of browserClients) {
|
||||
c.send(JSON.stringify({ type: "active_session", sessionKey: activeSessionKey }));
|
||||
@@ -1564,7 +2004,7 @@ async function handleCreateSession(clientWs, sessionName) {
|
||||
try {
|
||||
// Session wird automatisch erstellt wenn man die erste Nachricht sendet
|
||||
activeSessionKey = sessionName;
|
||||
try { fs.writeFileSync(SESSION_KEY_FILE, activeSessionKey); } catch {}
|
||||
persistActiveSession(activeSessionKey);
|
||||
log("info", "server", `Neue Session erstellt und aktiviert: ${sessionName}`);
|
||||
// Allen Clients mitteilen
|
||||
for (const c of browserClients) {
|
||||
|
||||
+2
-3
@@ -18,7 +18,7 @@ services:
|
||||
claude-max-api"
|
||||
volumes:
|
||||
- ~/.claude:/root/.claude # Claude CLI Auth (Credentials in /root/.claude/.credentials.json)
|
||||
- ./aria-data/ssh:/root/.ssh:ro # SSH Keys fuer VM-Zugriff (aria-wohnung)
|
||||
- ./aria-data/ssh:/root/.ssh # SSH Keys fuer VM-Zugriff (aria-wohnung, rw fuer ARIA)
|
||||
- aria-shared:/shared # Shared Volume fuer Datei-Austausch (Uploads von App)
|
||||
environment:
|
||||
- HOST=0.0.0.0
|
||||
@@ -72,7 +72,6 @@ services:
|
||||
- aria
|
||||
network_mode: "service:aria" # Teilt Netzwerk mit aria-core → localhost:18789
|
||||
volumes:
|
||||
- ./aria-data/voices:/voices:ro # TTS Stimmen
|
||||
- ./aria-data/config/aria.env:/config/aria.env
|
||||
- aria-shared:/shared # Shared Volume fuer Datei-Austausch (Bridge <> Core)
|
||||
# Audio-Zugriff
|
||||
@@ -100,7 +99,7 @@ services:
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||
- ./aria-data/config/diag-state:/data # Persistenter State (aktive Session etc.)
|
||||
- aria-shared:/shared:ro # Shared Volume (Uploads lesen fuer Vorschau)
|
||||
- aria-shared:/shared # Shared Volume (Uploads + Config)
|
||||
environment:
|
||||
- ARIA_AUTH_TOKEN=${ARIA_AUTH_TOKEN:-}
|
||||
- PROXY_URL=http://proxy:3456
|
||||
|
||||
@@ -1,32 +0,0 @@
|
||||
#!/bin/bash
|
||||
# ════════════════════════════════════════════════
|
||||
# ARIA — Piper Stimmen herunterladen
|
||||
# Ramona (Alltag) + Thorsten (epische Momente)
|
||||
# ════════════════════════════════════════════════
|
||||
|
||||
set -e
|
||||
|
||||
VOICES_DIR="aria-data/voices"
|
||||
BASE_URL="https://huggingface.co/rhasspy/piper-voices/resolve/main/de/de_DE"
|
||||
|
||||
mkdir -p "$VOICES_DIR"
|
||||
cd "$VOICES_DIR"
|
||||
|
||||
echo "Lade ARIA Stimmen..."
|
||||
echo ""
|
||||
|
||||
echo "[1/4] Ramona (Modell)..."
|
||||
wget -q --show-progress "$BASE_URL/ramona/low/de_DE-ramona-low.onnx"
|
||||
|
||||
echo "[2/4] Ramona (Config)..."
|
||||
wget -q --show-progress "$BASE_URL/ramona/low/de_DE-ramona-low.onnx.json"
|
||||
|
||||
echo "[3/4] Thorsten (Modell)..."
|
||||
wget -q --show-progress "$BASE_URL/thorsten/high/de_DE-thorsten-high.onnx"
|
||||
|
||||
echo "[4/4] Thorsten (Config)..."
|
||||
wget -q --show-progress "$BASE_URL/thorsten/high/de_DE-thorsten-high.onnx.json"
|
||||
|
||||
echo ""
|
||||
echo "Stimmen geladen!"
|
||||
ls -lh *.onnx
|
||||
@@ -1,25 +1,79 @@
|
||||
bildupload ghet noch nicht.
|
||||
# ARIA Issues & Features
|
||||
|
||||
#erledigt
|
||||
sprachnachrichten werden nicht als zweite nachricht dargestellt, damit man weiß was man gesendet hat
|
||||
# ende
|
||||
## Erledigt
|
||||
|
||||
- [x] Bildupload funktioniert (Shared Volume /shared/uploads/)
|
||||
- [x] Sprachnachrichten werden als Text angezeigt (STT → Chat-Bubble)
|
||||
- [x] Cache leeren + Auto-Download von Anhaengen
|
||||
- [x] ARIA liest Nachrichten vor (TTS via Piper)
|
||||
- [x] Autoscroll zur letzten Nachricht (inverted FlatList)
|
||||
- [x] Bilder im Chat groesser + Vollbild-Vorschau
|
||||
- [x] Ohr-Button → Gespraechsmodus (Auto-Aufnahme nach ARIA-Antwort)
|
||||
- [x] Play-Button in ARIA-Nachrichten fuer Sprachwiedergabe
|
||||
- [x] Chat-Suche in der App (Lupe in Statusleiste)
|
||||
- [x] Watchdog mit Container-Restart (2min Warnung → 5min doctor --fix → 8min Restart)
|
||||
- [x] Abbrechen-Button im Diagnostic Chat
|
||||
- [x] Nachrichten Backup on-the-fly (/shared/config/chat_backup.jsonl)
|
||||
- [x] Grosse Nachrichten satzweise aufteilen fuer TTS
|
||||
- [x] RVS Nachrichten vom Smartphone gehen durch
|
||||
- [x] Stimmen-Einstellungen (Ramona/Thorsten, Speed pro Stimme)
|
||||
- [x] Highlight-Trigger konfigurierbar in Diagnostic
|
||||
- [x] XTTS v2 Integration (Gaming-PC, GPU, Voice Cloning)
|
||||
- [x] XTTS Voice Cloning (Audio-Samples hochladen, eigene Stimme)
|
||||
- [x] TTS Engine waehlbar (Piper/XTTS) in Diagnostic + App
|
||||
- [x] Auto-Update System (APK via RVS WebSocket)
|
||||
- [x] Auto-Update: APK-Installation via FileProvider
|
||||
- [x] Auto-Update: "Auf Updates pruefen" Button in App-Einstellungen
|
||||
- [x] Audio-Queue (sequentielle Wiedergabe, kein Ueberlappen)
|
||||
- [x] Textnachrichten werden von ARIA beantwortet (Bridge chat handler fix)
|
||||
- [x] Mehrere Anhaenge + Text vor dem Senden (Pending-Vorschau)
|
||||
- [x] Paste-Support fuer Bilder in Diagnostic Chat
|
||||
- [x] Markdown-Bereinigung fuer TTS (fett, kursiv, code, links, etc.)
|
||||
- [x] SSH Volume read-write fuer Proxy (kein -F Workaround mehr)
|
||||
- [x] Diagnostic: Sessions als Markdown exportieren (Download-Button)
|
||||
- [x] Speech Gate: Aufnahme wird verworfen wenn keine Sprache erkannt (verhindert dass Umgebungsgeraeusche an Whisper gehen)
|
||||
- [x] Session-Persistenz: Gewaehlte Session bleibt ueber Container-Restarts erhalten (sessionFromFile-Flag, atomic write)
|
||||
- [x] Diagnostic: "ARIA denkt..." bleibt nicht mehr stehen (pipelineEnd broadcastet immer idle, auch bei Timeout/Fehler/Disconnect)
|
||||
- [x] App: "ARIA denkt..." Indicator + Abbrechen-Button (Bridge spiegelt agent_activity via RVS)
|
||||
- [x] Whisper STT: Model-Auswahl in Diagnostic (tiny/base/small/medium/large-v3), Hot-Reload in Bridge, Default auf medium
|
||||
- [x] App: Audio-Aufnahme explizit 16kHz mono (spart Resample, optimal fuer Whisper)
|
||||
- [x] Streaming TTS (Weg A): XTTS → PCM-Stream → aria-bridge → App AudioTrack MODE_STREAM, keine WAV-Gaps mehr
|
||||
- [x] Piper komplett entfernt: nur noch XTTS v2 als TTS-Engine (remote, GPU auf Gaming-PC). Wenn XTTS offline ist, ist ARIA stumm — bewusst akzeptiert.
|
||||
- [x] Gespraechsmodus: Speech-Gate strenger (-28dB / 500ms) — keine Umgebungsgeraeusche mehr
|
||||
- [x] Gespraechsmodus: Max-Dauer 30s pro Aufnahme, Cache-Cleanup alter Files, Messages-Array gekappt (500)
|
||||
- [x] Diagnostic: Archivierte Session-Versionen (.reset.*) werden angezeigt + exportierbar — OpenClaw resettet Sessions bei erster Nutzung nach Container-Restart, Inhalt ist aber in .reset.<timestamp> Dateien gesichert
|
||||
- [x] tools/export-jsonl-to-md.js: CLI-Konverter fuer beliebige Session-JSONL zu Markdown
|
||||
- [x] NO_REPLY-Filter in Bridge + Diagnostic — still verworfen (kein Chat, kein TTS)
|
||||
- [x] Audio-Ducking + Exklusiv-Focus (Kotlin AudioFocusModule): andere Apps leiser bei TTS, pausiert bei Aufnahme
|
||||
- [x] TTS-Cleanup serverseitig: Code-Bloecke raus, Einheiten ausgeschrieben (22GB → Gigabyte), Abkuerzungen buchstabiert (CPU), URLs zu "ein Link". `<voice></voice>` Tag wird bevorzugt wenn ARIA ihn liefert.
|
||||
- [x] QR-Code Onboarding: Diagnostic generiert QR, App scannt (bestehender QRScanner funktioniert out of the box)
|
||||
- [x] TTS-Audio-Cache im Filesystem: Piper-Audio wird mit messageId verknuepft, als WAV in DocumentDirectory/tts_cache gespeichert, Play-Button spielt aus Cache statt regenerieren
|
||||
- [x] Config via Diagnostic: RVS-Credentials + Aria-Auth-Token via /api/runtime-config, persistiert in /shared/config/runtime.json, Bridge liest beim Start (Overrides der ENV)
|
||||
|
||||
cache leeren, bilder werden nicht neu geladen beim antippen.
|
||||
autoload geht nicht
|
||||
## Offen
|
||||
|
||||
wenn man auf das ohr zum hören klickt stürzt ab
|
||||
### Bugs (Prioritaet)
|
||||
- [ ] App: Audioausgabe hoert ab und zu einfach auf (mitten im Satz oder zwischen Chunks)
|
||||
- [ ] NO_REPLY wird als "NO" im Chat angezeigt — sollte still verworfen werden (Token nicht gesaeubert)
|
||||
|
||||
aria liest die nachrichten nicht vor
|
||||
### App Features
|
||||
- [ ] Wake Word on-device (Porcupine "ARIA" Keyword, Phase 2 — passives Lauschen)
|
||||
- [ ] Chat-History zuverlaessiger laden (AsyncStorage Race Condition)
|
||||
- [ ] Background Audio Service (TTS auch bei minimierter App)
|
||||
- [ ] Audio-Ducking: andere App-Audio-Ausgaben leiser stellen waehrend ARIA spricht (AudioFocus API)
|
||||
- [ ] Audio-Muten waehrend Aufnahme/Ohr-Modus: andere Audio stumm (wie WhatsApp-Sprachaufnahme)
|
||||
- [ ] Spracheingabe-Timeout erhoehen fuer laengere Texte
|
||||
- [ ] Generierte TTS-Audiodaten in der Chat-Nachricht einbetten (oder lokal cachen), Play-Button spielt aus Cache statt Regenerierung via XTTS. Base64 im Tag <soundfile></soundfile> (invisible) oder lokaler Datei-Cache mit Referenz in der Message.
|
||||
- [ ] QR-Code Onboarding: Diagnostic generiert QR mit RVS-Credentials, App scannt — keine manuelle Eingabe mehr
|
||||
|
||||
### TTS / Audio
|
||||
- [ ] Audio-Normalisierung (Lautstaerke zwischen Chunks angleichen)
|
||||
|
||||
# erledigt autoscroll geht doch noch nicht zur letzten nachricht
|
||||
unserer memory brain
|
||||
# ende
|
||||
|
||||
bilder im chat größer darstellen
|
||||
# ende
|
||||
|
||||
|
||||
die viper voices downloaden über die diagnostic
|
||||
# ende
|
||||
### Architektur
|
||||
- [ ] Bilder: Claude Vision direkt nutzen (aktuell nur Dateipfad an ARIA)
|
||||
- [ ] Auto-Compacting und Memory/Brain Verwaltung (SQLite?)
|
||||
- [ ] Diagnostic: System-Info Tab (Container-Status, Disk, RAM, CPU)
|
||||
- [ ] RVS Zombie-Connections endgueltig loesen
|
||||
- [ ] Alle .env-Variablen ueber Diagnostic konfigurierbar machen (kein File-Sync mehr noetig, da alle ARIA-Container auf der gleichen VM laufen). Fallback .env bleibt fuer initialen Bootstrap.
|
||||
- [ ] XTTS-Container: kleine Web-Oberflaeche fuer Credentials/Server-Config, oder zentral aus Diagnostic per RVS push
|
||||
- [ ] Root-Cause OpenClaw Session-Reset: Herausfinden warum Sessions beim ersten chat.send nach Container-Restart verworfen werden (abortedLastRun / systemSent Theorie pruefen, ggf. Flag preemptiv patchen)
|
||||
|
||||
+23
-1
@@ -76,8 +76,11 @@ echo -e " ${GREEN}✓${NC} SettingsScreen → Version $VERSION"
|
||||
echo ""
|
||||
|
||||
# ── APK bauen ─────────────────────────────────
|
||||
echo -e "${GREEN}[2/5] APK bauen...${NC}"
|
||||
echo -e "${GREEN}[2/5] APK bauen (Cache leeren + Build)...${NC}"
|
||||
cd android
|
||||
# Metro + Gradle Cache leeren damit neue Version sauber eingebettet wird
|
||||
rm -rf node_modules/.cache 2>/dev/null
|
||||
cd android && ./gradlew clean 2>/dev/null; cd ..
|
||||
./build.sh release
|
||||
cd ..
|
||||
|
||||
@@ -170,6 +173,24 @@ else
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# ── Auto-Update: APK auf RVS-Server kopieren ─
|
||||
RVS_UPDATE_HOST="${RVS_UPDATE_HOST:-}"
|
||||
if [ -n "$RVS_UPDATE_HOST" ]; then
|
||||
echo -e "${GREEN}[6/6] APK auf RVS-Server kopieren (Auto-Update)...${NC}"
|
||||
# Alte APKs auf dem RVS loeschen, dann neue hochladen
|
||||
ssh "$RVS_UPDATE_HOST" "rm -f ~/ARIA-AGENT/rvs/updates/ARIA-*.apk" 2>/dev/null
|
||||
scp "$APK_PATH" "${RVS_UPDATE_HOST}:~/ARIA-AGENT/rvs/updates/${APK_NAME}" 2>/dev/null
|
||||
if [ $? -eq 0 ]; then
|
||||
echo -e " ${GREEN}✓${NC} APK auf RVS-Server kopiert (alte Versionen geloescht)"
|
||||
else
|
||||
echo -e " ${YELLOW}APK konnte nicht auf RVS kopiert werden (RVS_UPDATE_HOST=$RVS_UPDATE_HOST)${NC}"
|
||||
echo -e " ${YELLOW}Manuell: scp $APK_PATH $RVS_UPDATE_HOST:~/ARIA-AGENT/rvs/updates/${APK_NAME}${NC}"
|
||||
fi
|
||||
else
|
||||
echo -e "${YELLOW}Auto-Update uebersprungen (RVS_UPDATE_HOST nicht gesetzt)${NC}"
|
||||
echo -e "${YELLOW}Setze RVS_UPDATE_HOST in .env fuer automatische Verteilung${NC}"
|
||||
fi
|
||||
|
||||
# ── Fertig ────────────────────────────────────
|
||||
echo ""
|
||||
echo -e "${GREEN}╔═══════════════════════════════════════════════════╗${NC}"
|
||||
@@ -177,4 +198,5 @@ echo -e "${GREEN}║ Release $TAG ist live!$(printf '%*s' $((27 - ${#TAG})) ''
|
||||
echo -e "${GREEN}╠═══════════════════════════════════════════════════╣${NC}"
|
||||
echo -e "${GREEN}║${NC} $GITEA_URL/$GITEA_REPO/releases/tag/$TAG"
|
||||
echo -e "${GREEN}║${NC} APK: $APK_NAME ($APK_SIZE)"
|
||||
echo -e "${GREEN}║${NC} Auto-Update: ${RVS_UPDATE_HOST:-nicht konfiguriert}"
|
||||
echo -e "${GREEN}╚═══════════════════════════════════════════════════╝${NC}"
|
||||
|
||||
@@ -4,5 +4,7 @@ services:
|
||||
ports:
|
||||
- "${RVS_PORT:-443}:3000"
|
||||
restart: always
|
||||
volumes:
|
||||
- ./updates:/updates # APK-Dateien fuer Auto-Update
|
||||
environment:
|
||||
- MAX_SESSIONS=10
|
||||
|
||||
+116
-1
@@ -1,15 +1,24 @@
|
||||
"use strict";
|
||||
|
||||
const { WebSocketServer } = require("ws");
|
||||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
|
||||
// ── Konfiguration aus Umgebungsvariablen ────────────────────────────
|
||||
const PORT = parseInt(process.env.PORT || "3000", 10);
|
||||
const MAX_SESSIONS = parseInt(process.env.MAX_SESSIONS || "10", 10);
|
||||
const UPDATES_DIR = process.env.UPDATES_DIR || "/updates";
|
||||
// Kein Polling — APK wird manuell per git pull bereitgestellt
|
||||
|
||||
// Erlaubte Nachrichtentypen — alles andere wird verworfen
|
||||
const ALLOWED_TYPES = new Set([
|
||||
"chat", "audio", "file", "location", "mode", "log", "event", "heartbeat",
|
||||
"file_request", "file_response", "file_saved", "stt_result",
|
||||
"file_request", "file_response", "file_saved", "stt_result", "config", "tts_request",
|
||||
"xtts_request", "xtts_response", "xtts_list_voices", "xtts_voices_list", "voice_upload", "xtts_voice_saved",
|
||||
"update_check", "update_available", "update_download", "update_data",
|
||||
"agent_activity", "cancel_request",
|
||||
"audio_pcm",
|
||||
"xtts_delete_voice",
|
||||
]);
|
||||
|
||||
// Token-Raum: token -> { clients: Set<ws> }
|
||||
@@ -46,6 +55,9 @@ const wss = new WebSocketServer({ port: PORT });
|
||||
|
||||
wss.on("listening", () => {
|
||||
log(`RVS läuft auf Port ${PORT} | Max Sessions: ${MAX_SESSIONS}`);
|
||||
// Beim Start pruefen ob eine APK da ist
|
||||
const apkInfo = getLatestAPK();
|
||||
if (apkInfo) log(`APK bereit: v${apkInfo.version} (${(fs.statSync(apkInfo.path).size / 1024 / 1024).toFixed(1)}MB)`);
|
||||
});
|
||||
|
||||
wss.on("connection", (ws, req) => {
|
||||
@@ -107,6 +119,52 @@ function registerClient(ws, token) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Update-Check: direkt an den anfragenden Client antworten (nicht relay'en)
|
||||
if (msg.type === "update_check") {
|
||||
const clientVersion = msg.payload?.version || "0.0.0.0";
|
||||
const apkInfo = getLatestAPK();
|
||||
if (apkInfo && compareVersions(apkInfo.version, clientVersion) > 0) {
|
||||
ws.send(JSON.stringify({
|
||||
type: "update_available",
|
||||
payload: {
|
||||
version: apkInfo.version,
|
||||
downloadUrl: `/update/latest.apk`,
|
||||
size: fs.statSync(apkInfo.path).size,
|
||||
},
|
||||
timestamp: Date.now(),
|
||||
}));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Update-Download: APK als Base64 ueber WebSocket senden
|
||||
if (msg.type === "update_download") {
|
||||
const apkInfo = getLatestAPK();
|
||||
if (!apkInfo) {
|
||||
ws.send(JSON.stringify({ type: "update_data", payload: { error: "Keine APK verfuegbar" }, timestamp: Date.now() }));
|
||||
return;
|
||||
}
|
||||
try {
|
||||
const data = fs.readFileSync(apkInfo.path);
|
||||
const base64 = data.toString("base64");
|
||||
const sizeMB = (data.length / 1024 / 1024).toFixed(1);
|
||||
log(`APK sende: v${apkInfo.version} (${sizeMB}MB) an Client`);
|
||||
ws.send(JSON.stringify({
|
||||
type: "update_data",
|
||||
payload: {
|
||||
version: apkInfo.version,
|
||||
base64,
|
||||
size: data.length,
|
||||
fileName: `ARIA-v${apkInfo.version}.apk`,
|
||||
},
|
||||
timestamp: Date.now(),
|
||||
}));
|
||||
} catch (err) {
|
||||
ws.send(JSON.stringify({ type: "update_data", payload: { error: err.message }, timestamp: Date.now() }));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// An alle anderen Clients im Raum weiterleiten
|
||||
for (const client of room.clients) {
|
||||
if (client !== ws && client.readyState === 1) {
|
||||
@@ -167,6 +225,63 @@ wss.on("close", () => {
|
||||
clearInterval(cleanup);
|
||||
});
|
||||
|
||||
// ── Auto-Update: APK-Erkennung + Push ──────────────────────────────
|
||||
|
||||
let latestVersion = null;
|
||||
|
||||
function getLatestAPK() {
|
||||
try {
|
||||
if (!fs.existsSync(UPDATES_DIR)) return null;
|
||||
const files = fs.readdirSync(UPDATES_DIR)
|
||||
.filter(f => f.endsWith(".apk"))
|
||||
.map(f => {
|
||||
// ARIA-v0.0.2.3.apk oder ARIA-Cockpit-release.apk
|
||||
const match = f.match(/(\d+\.\d+\.\d+[\.\d]*)/);
|
||||
return { file: f, path: path.join(UPDATES_DIR, f), version: match ? match[1] : null };
|
||||
})
|
||||
.filter(f => f.version)
|
||||
.sort((a, b) => compareVersions(b.version, a.version)); // Neueste zuerst
|
||||
|
||||
return files[0] || null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function compareVersions(a, b) {
|
||||
const pa = a.split(".").map(Number);
|
||||
const pb = b.split(".").map(Number);
|
||||
for (let i = 0; i < Math.max(pa.length, pb.length); i++) {
|
||||
const diff = (pa[i] || 0) - (pb[i] || 0);
|
||||
if (diff !== 0) return diff;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
function notifyClientsAboutUpdate(apkInfo) {
|
||||
const msg = JSON.stringify({
|
||||
type: "update_available",
|
||||
payload: {
|
||||
version: apkInfo.version,
|
||||
downloadUrl: `/update/latest.apk`,
|
||||
size: fs.statSync(apkInfo.path).size,
|
||||
},
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
|
||||
// An alle Clients in allen Rooms senden
|
||||
for (const [, room] of rooms) {
|
||||
for (const client of room.clients) {
|
||||
if (client.readyState === 1) {
|
||||
client.send(msg);
|
||||
}
|
||||
}
|
||||
}
|
||||
log(`Update-Benachrichtigung gesendet: v${apkInfo.version} (${rooms.size} Raum/Raeume)`);
|
||||
}
|
||||
|
||||
// Kein Polling — Update-Check passiert on-demand (update_check Message von App)
|
||||
|
||||
// ── Sauberes Herunterfahren ─────────────────────────────────────────
|
||||
|
||||
process.on("SIGTERM", () => {
|
||||
|
||||
Executable
+74
@@ -0,0 +1,74 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* Exportiert ein OpenClaw Session-JSONL (auch .reset.*) als Markdown.
|
||||
*
|
||||
* Nutzung:
|
||||
* node export-jsonl-to-md.js <input.jsonl> [output.md]
|
||||
*
|
||||
* Oder direkt aus dem aria-core Container:
|
||||
* docker exec aria-core cat /home/node/.openclaw/agents/main/sessions/<ID>.jsonl.reset.<TS> \
|
||||
* | node export-jsonl-to-md.js - > output.md
|
||||
*/
|
||||
|
||||
const fs = require("fs");
|
||||
|
||||
const inputArg = process.argv[2];
|
||||
const outputArg = process.argv[3];
|
||||
|
||||
if (!inputArg) {
|
||||
console.error("Usage: export-jsonl-to-md.js <input.jsonl|-> [output.md]");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const raw = inputArg === "-" ? fs.readFileSync(0, "utf-8") : fs.readFileSync(inputArg, "utf-8");
|
||||
const lines = raw.split("\n").filter(l => l.trim());
|
||||
|
||||
const blocks = [];
|
||||
for (const line of lines) {
|
||||
let obj;
|
||||
try { obj = JSON.parse(line); } catch { continue; }
|
||||
if (obj.type !== "message" || !obj.message) continue;
|
||||
const role = obj.message.role;
|
||||
if (role !== "user" && role !== "assistant") continue;
|
||||
|
||||
let text = "";
|
||||
const content = obj.message.content;
|
||||
if (typeof content === "string") text = content;
|
||||
else if (Array.isArray(content)) text = content.filter(c => c.type === "text").map(c => c.text || "").join("\n");
|
||||
if (!text) continue;
|
||||
|
||||
if (role === "user") {
|
||||
text = text.replace(/^Sender \(untrusted metadata\):[\s\S]*?```[\s\S]*?```\s*\n*/m, "").trim();
|
||||
text = text.replace(/^\[.*?\]\s*/, "").trim();
|
||||
} else {
|
||||
text = text.replace(/^\[\[reply_to_\w+\]\]\s*/g, "").trim();
|
||||
}
|
||||
if (!text) continue;
|
||||
|
||||
const ts = obj.message.timestamp || obj.timestamp || 0;
|
||||
const when = ts ? new Date(ts).toISOString().replace("T", " ").slice(0, 19) : "";
|
||||
const heading = role === "user" ? "## 🧑 User" : "## 🤖 ARIA";
|
||||
blocks.push(`${heading}${when ? ` — ${when}` : ""}\n\n${text}`);
|
||||
}
|
||||
|
||||
const exportedAt = new Date().toISOString().replace("T", " ").slice(0, 19);
|
||||
const title = inputArg === "-" ? "Session" : inputArg.split("/").pop().replace(/\.jsonl.*/, "");
|
||||
const md = [
|
||||
`# Session: ${title}`,
|
||||
``,
|
||||
`Exportiert: ${exportedAt} `,
|
||||
`Quelle: ${inputArg === "-" ? "stdin" : inputArg}`,
|
||||
`Nachrichten: ${blocks.length}`,
|
||||
``,
|
||||
`---`,
|
||||
``,
|
||||
blocks.join("\n\n---\n\n"),
|
||||
``,
|
||||
].join("\n");
|
||||
|
||||
if (outputArg) {
|
||||
fs.writeFileSync(outputArg, md);
|
||||
console.error(`OK: ${blocks.length} Nachrichten → ${outputArg}`);
|
||||
} else {
|
||||
process.stdout.write(md);
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
# ════════════════════════════════════════════════
|
||||
# ARIA XTTS v2 — Konfiguration
|
||||
# Kopieren nach .env und anpassen
|
||||
# ════════════════════════════════════════════════
|
||||
|
||||
# RVS Verbindung (gleiche Daten wie auf der ARIA-VM)
|
||||
RVS_HOST=mobil.hacker-net.de
|
||||
RVS_PORT=444
|
||||
RVS_TLS=true
|
||||
RVS_TLS_FALLBACK=true
|
||||
RVS_TOKEN=dein_token_hier
|
||||
@@ -0,0 +1,5 @@
|
||||
FROM node:22-alpine
|
||||
WORKDIR /app
|
||||
COPY bridge.js package.json ./
|
||||
RUN npm install --production
|
||||
CMD ["node", "bridge.js"]
|
||||
+418
@@ -0,0 +1,418 @@
|
||||
/**
|
||||
* ARIA XTTS Bridge — Verbindet XTTS v2 Server mit dem RVS
|
||||
*
|
||||
* Empfaengt tts_request ueber RVS → rendert Audio via XTTS API → sendet zurueck
|
||||
* Empfaengt voice_upload → speichert Voice-Sample fuer Cloning
|
||||
* Empfaengt xtts_list_voices → listet verfuegbare Stimmen
|
||||
*/
|
||||
|
||||
const WebSocket = require("ws");
|
||||
const http = require("http");
|
||||
const https = require("https");
|
||||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
|
||||
const XTTS_API_URL = process.env.XTTS_API_URL || "http://xtts:8000";
|
||||
const RVS_HOST = process.env.RVS_HOST || "";
|
||||
const RVS_PORT = process.env.RVS_PORT || "443";
|
||||
const RVS_TLS = process.env.RVS_TLS || "true";
|
||||
const RVS_TLS_FALLBACK = process.env.RVS_TLS_FALLBACK || "true";
|
||||
const RVS_TOKEN = process.env.RVS_TOKEN || "";
|
||||
const VOICES_DIR = "/voices";
|
||||
|
||||
function log(msg) {
|
||||
console.log(`[${new Date().toISOString()}] ${msg}`);
|
||||
}
|
||||
|
||||
// ── RVS Verbindung ──────────────────────────────────
|
||||
|
||||
let rvsWs = null;
|
||||
let retryDelay = 2;
|
||||
|
||||
function connectRVS(forcePlain) {
|
||||
if (!RVS_HOST || !RVS_TOKEN) {
|
||||
log("RVS nicht konfiguriert — beende");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const useTls = RVS_TLS === "true" && !forcePlain;
|
||||
const proto = useTls ? "wss" : "ws";
|
||||
const url = `${proto}://${RVS_HOST}:${RVS_PORT}?token=${RVS_TOKEN}`;
|
||||
|
||||
log(`Verbinde zu RVS: ${proto}://${RVS_HOST}:${RVS_PORT}`);
|
||||
|
||||
const ws = new WebSocket(url);
|
||||
|
||||
ws.on("open", () => {
|
||||
log("RVS verbunden — warte auf TTS-Requests");
|
||||
rvsWs = ws;
|
||||
retryDelay = 2;
|
||||
|
||||
// Keepalive
|
||||
setInterval(() => {
|
||||
if (ws.readyState === WebSocket.OPEN) {
|
||||
ws.ping();
|
||||
ws.send(JSON.stringify({ type: "heartbeat", timestamp: Date.now() }));
|
||||
}
|
||||
}, 25000);
|
||||
});
|
||||
|
||||
ws.on("message", async (raw) => {
|
||||
try {
|
||||
const msg = JSON.parse(raw.toString());
|
||||
|
||||
if (msg.type === "xtts_request") {
|
||||
await handleTTSRequest(msg.payload);
|
||||
} else if (msg.type === "voice_upload") {
|
||||
await handleVoiceUpload(msg.payload);
|
||||
} else if (msg.type === "xtts_list_voices") {
|
||||
await handleListVoices();
|
||||
} else if (msg.type === "xtts_delete_voice") {
|
||||
await handleDeleteVoice(msg.payload);
|
||||
}
|
||||
} catch (err) {
|
||||
log(`Fehler: ${err.message}`);
|
||||
}
|
||||
});
|
||||
|
||||
ws.on("close", () => {
|
||||
log("RVS Verbindung geschlossen");
|
||||
rvsWs = null;
|
||||
setTimeout(() => connectRVS(), Math.min(retryDelay * 1000, 30000));
|
||||
retryDelay = Math.min(retryDelay * 2, 30);
|
||||
});
|
||||
|
||||
ws.on("error", (err) => {
|
||||
log(`RVS Fehler: ${err.message}`);
|
||||
if (useTls && RVS_TLS_FALLBACK === "true") {
|
||||
log("TLS fehlgeschlagen — Fallback auf ws://");
|
||||
ws.removeAllListeners();
|
||||
try { ws.close(); } catch (_) {}
|
||||
connectRVS(true);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// ── TTS Request Handler ─────────────────────────────
|
||||
|
||||
async function handleTTSRequest(payload) {
|
||||
const { text, voice, requestId, language, messageId } = payload;
|
||||
if (!text) return;
|
||||
|
||||
// Markdown-Cleanup (Bridge macht jetzt auch Cleanup, aber safety net)
|
||||
let cleanText = text
|
||||
.replace(/\*\*([^*]+)\*\*/g, "$1")
|
||||
.replace(/\*([^*]+)\*/g, "$1")
|
||||
.replace(/`([^`]+)`/g, "$1")
|
||||
.replace(/```[\s\S]*?```/g, "")
|
||||
.replace(/\[([^\]]+)\]\([^)]+\)/g, "$1")
|
||||
.replace(/#{1,6}\s*/g, "")
|
||||
.replace(/>\s*/g, "")
|
||||
.replace(/[-*]\s+/g, "")
|
||||
.replace(/\n{2,}/g, ". ")
|
||||
.replace(/\n/g, ", ")
|
||||
.replace(/\s{2,}/g, " ")
|
||||
.replace(/["""„]/g, "")
|
||||
.replace(/\(\)/g, "")
|
||||
.trim();
|
||||
|
||||
// Satzweise Chunks (XTTS Modell laedt Context pro Call — Saetze gruppieren)
|
||||
const sentences = cleanText.split(/(?<=[.!?])\s+/)
|
||||
.map(s => s.trim())
|
||||
.filter(s => s.length > 0)
|
||||
.map(s => s.replace(/[.]+$/, ''));
|
||||
|
||||
const MAX_CHUNK_CHARS = 150;
|
||||
const chunks = [];
|
||||
let currentChunk = '';
|
||||
for (const sentence of sentences) {
|
||||
if (currentChunk && (currentChunk.length + sentence.length + 2) > MAX_CHUNK_CHARS) {
|
||||
chunks.push(currentChunk);
|
||||
currentChunk = sentence;
|
||||
} else {
|
||||
currentChunk = currentChunk ? currentChunk + ', ' + sentence : sentence;
|
||||
}
|
||||
}
|
||||
if (currentChunk) chunks.push(currentChunk);
|
||||
if (chunks.length === 0) return;
|
||||
|
||||
log(`TTS-Request (streaming): "${cleanText.slice(0, 60)}..." (${chunks.length} Chunks, voice: ${voice || "default"})`);
|
||||
|
||||
try {
|
||||
const voiceSample = voice ? path.join(VOICES_DIR, `${voice}.wav`) : null;
|
||||
const hasCustomVoice = voiceSample && fs.existsSync(voiceSample);
|
||||
|
||||
let chunkIndex = 0;
|
||||
// Audio-Format (aus WAV-Header extrahiert, einmal pro Request)
|
||||
let pcmMeta = null;
|
||||
|
||||
for (let i = 0; i < chunks.length; i++) {
|
||||
const chunk = chunks[i];
|
||||
const isLastChunk = i === chunks.length - 1;
|
||||
try {
|
||||
// Streaming: PCM-Frames werden nacheinander an RVS gepusht,
|
||||
// sobald sie vom XTTS-Server reinkommen
|
||||
await streamXTTSAsPCM(
|
||||
chunk,
|
||||
language || "de",
|
||||
hasCustomVoice ? voiceSample : null,
|
||||
(pcmBase64, meta) => {
|
||||
if (!pcmMeta) pcmMeta = meta;
|
||||
sendToRVS({
|
||||
type: "audio_pcm",
|
||||
payload: {
|
||||
requestId: requestId || "",
|
||||
messageId: messageId || "",
|
||||
base64: pcmBase64,
|
||||
format: "pcm_s16le",
|
||||
sampleRate: meta.sampleRate,
|
||||
channels: meta.channels,
|
||||
voice: voice || "default",
|
||||
chunk: chunkIndex++,
|
||||
final: false,
|
||||
},
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
// Nach letztem Text-Chunk: final-Flag senden damit App weiss "fertig"
|
||||
if (isLastChunk && pcmMeta) {
|
||||
sendToRVS({
|
||||
type: "audio_pcm",
|
||||
payload: {
|
||||
requestId: requestId || "",
|
||||
messageId: messageId || "",
|
||||
base64: "",
|
||||
format: "pcm_s16le",
|
||||
sampleRate: pcmMeta.sampleRate,
|
||||
channels: pcmMeta.channels,
|
||||
voice: voice || "default",
|
||||
chunk: chunkIndex++,
|
||||
final: true,
|
||||
},
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
}
|
||||
} catch (chunkErr) {
|
||||
log(`TTS [${i + 1}/${chunks.length}] Fehler: ${chunkErr.message} — ueberspringe`);
|
||||
}
|
||||
}
|
||||
|
||||
log(`TTS komplett: ${chunkIndex} PCM-Frames gestreamt (${cleanText.length} chars)`);
|
||||
} catch (err) {
|
||||
log(`TTS Fehler: ${err.message}`);
|
||||
sendToRVS({
|
||||
type: "xtts_response",
|
||||
payload: { requestId, error: err.message },
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Ruft /tts_to_audio/ auf und streamt das resultierende WAV bereits waehrend
|
||||
* des Empfangs in PCM-Frames an den Callback. Der WAV-Header wird einmal
|
||||
* geparst, danach werden nur noch raw PCM-Samples weitergeleitet.
|
||||
*
|
||||
* Warum nicht echtes /tts_stream/? daswer123 hat den Endpoint, aber die
|
||||
* Audio-Quality ist dort niedriger und er produziert beim ersten Chunk
|
||||
* oft Artefakte. Pragmatischer Weg: /tts_to_audio/ + Response-Stream
|
||||
* chunkweise auslesen. Das ist zwar kein echtes Server-Streaming, aber
|
||||
* gibt uns deutlich kleinere Netzwerk-Haeppchen und die App kann via
|
||||
* AudioTrack MODE_STREAM sofort nahtlos abspielen.
|
||||
*/
|
||||
function streamXTTSAsPCM(text, language, speakerWav, onPcmChunk) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const body = JSON.stringify({
|
||||
text,
|
||||
language,
|
||||
speaker_wav: speakerWav || "",
|
||||
});
|
||||
|
||||
const url = new URL(`${XTTS_API_URL}/tts_to_audio/`);
|
||||
const options = {
|
||||
hostname: url.hostname,
|
||||
port: url.port,
|
||||
path: url.pathname,
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
"Content-Length": Buffer.byteLength(body),
|
||||
},
|
||||
timeout: 60000,
|
||||
};
|
||||
|
||||
const req = http.request(options, (res) => {
|
||||
if (res.statusCode !== 200) {
|
||||
let body = "";
|
||||
res.on("data", (d) => { body += d.toString(); });
|
||||
res.on("end", () => reject(new Error(`XTTS HTTP ${res.statusCode}: ${body.slice(0, 200)}`)));
|
||||
return;
|
||||
}
|
||||
|
||||
let headerParsed = false;
|
||||
let sampleRate = 24000;
|
||||
let channels = 1;
|
||||
let leftover = Buffer.alloc(0); // ungerade Byte-Reste fuer das naechste Chunk
|
||||
const HEADER_BYTES = 44;
|
||||
let headerBuf = Buffer.alloc(0);
|
||||
const PCM_CHUNK_BYTES = 8192; // ~170ms bei 24kHz s16 mono
|
||||
|
||||
res.on("data", (chunk) => {
|
||||
let data = chunk;
|
||||
|
||||
// WAV-Header konsumieren (44 Bytes)
|
||||
if (!headerParsed) {
|
||||
headerBuf = Buffer.concat([headerBuf, data]);
|
||||
if (headerBuf.length < HEADER_BYTES) return;
|
||||
// Header lesen
|
||||
const header = headerBuf.slice(0, HEADER_BYTES);
|
||||
try {
|
||||
channels = header.readUInt16LE(22);
|
||||
sampleRate = header.readUInt32LE(24);
|
||||
} catch (_) {}
|
||||
headerParsed = true;
|
||||
data = headerBuf.slice(HEADER_BYTES);
|
||||
}
|
||||
|
||||
// leftover aus vorherigem Chunk + neuer data
|
||||
let combined = Buffer.concat([leftover, data]);
|
||||
|
||||
// In PCM_CHUNK_BYTES-Happen zerlegen (Vielfache von 2 damit keine Sample-Splits)
|
||||
while (combined.length >= PCM_CHUNK_BYTES) {
|
||||
const slice = combined.slice(0, PCM_CHUNK_BYTES);
|
||||
combined = combined.slice(PCM_CHUNK_BYTES);
|
||||
onPcmChunk(slice.toString("base64"), { sampleRate, channels });
|
||||
}
|
||||
leftover = combined;
|
||||
});
|
||||
|
||||
res.on("end", () => {
|
||||
// Rest-Daten senden
|
||||
if (leftover.length > 0) {
|
||||
onPcmChunk(leftover.toString("base64"), { sampleRate, channels });
|
||||
}
|
||||
resolve();
|
||||
});
|
||||
|
||||
res.on("error", reject);
|
||||
});
|
||||
|
||||
req.on("error", reject);
|
||||
req.on("timeout", () => { req.destroy(); reject(new Error("XTTS API Timeout (60s)")); });
|
||||
req.write(body);
|
||||
req.end();
|
||||
});
|
||||
}
|
||||
|
||||
// ── Voice Upload Handler ────────────────────────────
|
||||
|
||||
async function handleVoiceUpload(payload) {
|
||||
const { name, samples } = payload;
|
||||
if (!name || !samples || !Array.isArray(samples) || samples.length === 0) {
|
||||
log("Voice Upload: Ungueltige Daten");
|
||||
return;
|
||||
}
|
||||
|
||||
log(`Voice Upload: "${name}" (${samples.length} Samples)`);
|
||||
|
||||
try {
|
||||
// Alle Samples zusammenfuegen
|
||||
const buffers = samples.map(s => Buffer.from(s.base64, "base64"));
|
||||
const combined = Buffer.concat(buffers);
|
||||
|
||||
// Als WAV speichern
|
||||
fs.mkdirSync(VOICES_DIR, { recursive: true });
|
||||
const filePath = path.join(VOICES_DIR, `${name.replace(/[^a-zA-Z0-9_-]/g, "_")}.wav`);
|
||||
fs.writeFileSync(filePath, combined);
|
||||
|
||||
log(`Voice gespeichert: ${filePath} (${(combined.length / 1024).toFixed(0)}KB)`);
|
||||
|
||||
sendToRVS({
|
||||
type: "xtts_voice_saved",
|
||||
payload: { name, size: combined.length, path: filePath },
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
} catch (err) {
|
||||
log(`Voice Upload Fehler: ${err.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Voice Delete Handler ────────────────────────────
|
||||
|
||||
async function handleDeleteVoice(payload) {
|
||||
const { name } = payload || {};
|
||||
if (!name || typeof name !== "string") {
|
||||
log("Voice Delete: ungueltiger Name");
|
||||
return;
|
||||
}
|
||||
const safe = name.replace(/[^a-zA-Z0-9_-]/g, "_");
|
||||
const filePath = path.join(VOICES_DIR, `${safe}.wav`);
|
||||
try {
|
||||
if (fs.existsSync(filePath)) {
|
||||
fs.unlinkSync(filePath);
|
||||
log(`Voice geloescht: ${filePath}`);
|
||||
} else {
|
||||
log(`Voice Delete: Datei existiert nicht (${filePath})`);
|
||||
}
|
||||
// Aktualisierte Liste an alle Clients senden
|
||||
await handleListVoices();
|
||||
} catch (err) {
|
||||
log(`Voice Delete Fehler: ${err.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Voice List Handler ──────────────────────────────
|
||||
|
||||
async function handleListVoices() {
|
||||
try {
|
||||
const files = fs.existsSync(VOICES_DIR)
|
||||
? fs.readdirSync(VOICES_DIR).filter(f => f.endsWith(".wav"))
|
||||
: [];
|
||||
|
||||
const voices = files.map(f => ({
|
||||
name: path.basename(f, ".wav"),
|
||||
file: f,
|
||||
size: fs.statSync(path.join(VOICES_DIR, f)).size,
|
||||
}));
|
||||
|
||||
log(`Stimmen: ${voices.length} verfuegbar`);
|
||||
|
||||
sendToRVS({
|
||||
type: "xtts_voices_list",
|
||||
payload: { voices },
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
} catch (err) {
|
||||
log(`Stimmen-Liste Fehler: ${err.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
// ── RVS senden ──────────────────────────────────────
|
||||
|
||||
function sendToRVS(msg) {
|
||||
if (rvsWs && rvsWs.readyState === WebSocket.OPEN) {
|
||||
rvsWs.send(JSON.stringify(msg));
|
||||
}
|
||||
}
|
||||
|
||||
// ── Start ───────────────────────────────────────────
|
||||
|
||||
log("ARIA XTTS Bridge startet...");
|
||||
log(`XTTS API: ${XTTS_API_URL}`);
|
||||
log(`RVS: ${RVS_HOST}:${RVS_PORT}`);
|
||||
|
||||
// Warten bis XTTS API erreichbar ist
|
||||
function waitForXTTS(callback, attempts) {
|
||||
if (attempts <= 0) { log("XTTS API nicht erreichbar — starte trotzdem"); callback(); return; }
|
||||
http.get(`${XTTS_API_URL}/docs`, (res) => {
|
||||
log(`XTTS API erreichbar (HTTP ${res.statusCode})`);
|
||||
callback();
|
||||
}).on("error", () => {
|
||||
log(`XTTS API noch nicht bereit — warte (${attempts} Versuche uebrig)...`);
|
||||
setTimeout(() => waitForXTTS(callback, attempts - 1), 10000); // 10s statt 5s (Model laden dauert)
|
||||
});
|
||||
}
|
||||
|
||||
waitForXTTS(() => connectRVS(), 30); // Max 5min warten
|
||||
@@ -0,0 +1,56 @@
|
||||
# ════════════════════════════════════════════════
|
||||
# ARIA XTTS v2 — GPU TTS Server
|
||||
# Laeuft auf dem Gaming-PC (RTX 3060)
|
||||
# Verbindet sich zum RVS fuer TTS-Requests
|
||||
# ════════════════════════════════════════════════
|
||||
#
|
||||
# Voraussetzungen:
|
||||
# - Docker Desktop mit WSL2
|
||||
# - NVIDIA Container Toolkit
|
||||
# - .env mit RVS-Verbindungsdaten
|
||||
#
|
||||
# Start: docker compose up -d
|
||||
# Test: curl http://localhost:8000/docs
|
||||
# ════════════════════════════════════════════════
|
||||
|
||||
services:
|
||||
|
||||
# ─── XTTS v2 API Server (GPU) ─────────────────
|
||||
xtts:
|
||||
image: daswer123/xtts-api-server:latest
|
||||
container_name: aria-xtts
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
ports:
|
||||
- "8000:8020"
|
||||
volumes:
|
||||
- xtts-models:/app/xtts_models # Model-Cache (~2GB)
|
||||
- ./voices:/voices # Custom Voice Samples
|
||||
environment:
|
||||
- COQUI_TOS_AGREED=1
|
||||
restart: unless-stopped
|
||||
|
||||
# ─── XTTS Bridge (verbindet zu RVS) ───────────
|
||||
xtts-bridge:
|
||||
build: .
|
||||
container_name: aria-xtts-bridge
|
||||
depends_on:
|
||||
- xtts
|
||||
volumes:
|
||||
- ./voices:/voices # Shared mit XTTS-Server
|
||||
environment:
|
||||
- XTTS_API_URL=http://xtts:8020
|
||||
- RVS_HOST=${RVS_HOST}
|
||||
- RVS_PORT=${RVS_PORT:-443}
|
||||
- RVS_TLS=${RVS_TLS:-true}
|
||||
- RVS_TLS_FALLBACK=${RVS_TLS_FALLBACK:-true}
|
||||
- RVS_TOKEN=${RVS_TOKEN}
|
||||
restart: unless-stopped
|
||||
|
||||
volumes:
|
||||
xtts-models:
|
||||
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"name": "aria-xtts-bridge",
|
||||
"version": "1.0.0",
|
||||
"private": true,
|
||||
"dependencies": {
|
||||
"ws": "^8.16.0"
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user