diff --git a/android/src/screens/ChatScreen.tsx b/android/src/screens/ChatScreen.tsx
index fc85c7d..0fb589a 100644
--- a/android/src/screens/ChatScreen.tsx
+++ b/android/src/screens/ChatScreen.tsx
@@ -636,7 +636,7 @@ const ChatScreen: React.FC = () => {
{item.text}
)}
- {/* Play-Button fuer ARIA-Nachrichten — Cache bevorzugt, sonst Regenerierung */}
+ {/* Play-Button fuer ARIA-Nachrichten — Cache bevorzugt, sonst Bridge-TTS mit aktueller Engine */}
{!isUser && item.text.length > 0 && (
{
if (item.audioPath) {
audioService.playFromPath(item.audioPath);
} else {
- rvs.send('tts_request' as any, { text: item.text, voice: '' });
+ // messageId mitschicken damit die Bridge das generierte Audio
+ // wieder mit der Nachricht verknuepft (fuer den naechsten Replay aus Cache)
+ rvs.send('tts_request' as any, {
+ text: item.text,
+ voice: '',
+ messageId: item.messageId || '',
+ });
}
}}
>
- {item.audioPath ? '\uD83D\uDD0A' : '\uD83D\uDD0A'}
+ {'\uD83D\uDD0A'}
)}
{time}
diff --git a/bridge/aria_bridge.py b/bridge/aria_bridge.py
index 7d0c985..9589f09 100644
--- a/bridge/aria_bridge.py
+++ b/bridge/aria_bridge.py
@@ -145,6 +145,46 @@ def load_config() -> dict[str, str]:
import re as _re_tts
+_NUM_WORDS_DE = {
+ 0: "null", 1: "eins", 2: "zwei", 3: "drei", 4: "vier", 5: "fuenf",
+ 6: "sechs", 7: "sieben", 8: "acht", 9: "neun", 10: "zehn",
+ 11: "elf", 12: "zwoelf", 13: "dreizehn", 14: "vierzehn", 15: "fuenfzehn",
+ 16: "sechzehn", 17: "siebzehn", 18: "achtzehn", 19: "neunzehn", 20: "zwanzig",
+}
+_TENS_DE = {30: "dreissig", 40: "vierzig", 50: "fuenfzig"}
+
+
+def _num_to_words_de(n: int) -> str:
+ """Zahlen 0-59 als deutsches Wort — fuer Uhrzeiten und kleine Bereiche."""
+ if n in _NUM_WORDS_DE:
+ return _NUM_WORDS_DE[n]
+ if 21 <= n <= 29:
+ return f"{_NUM_WORDS_DE[n - 20]}undzwanzig"
+ if 30 <= n <= 59:
+ tens = (n // 10) * 10
+ ones = n % 10
+ tens_word = _TENS_DE.get(tens, str(tens))
+ if ones == 0:
+ return tens_word
+ return f"{_NUM_WORDS_DE.get(ones, str(ones))}und{tens_word}"
+ return str(n)
+
+
+def _time_range_to_words(m):
+ """'8:00-9:00 Uhr' → 'acht bis neun Uhr', '8-9 Uhr' → 'acht bis neun Uhr'."""
+ h1 = int(m.group(1))
+ h2 = int(m.group(3))
+ return f"{_num_to_words_de(h1)} bis {_num_to_words_de(h2)} Uhr"
+
+
+def _small_range_to_words(m):
+ """'5-6' → 'fuenf bis sechs' (nur wenn beide Zahlen ≤ 24)."""
+ a, b = int(m.group(1)), int(m.group(2))
+ if a > 24 or b > 24 or a >= b:
+ return m.group(0)
+ return f"{_num_to_words_de(a)} bis {_num_to_words_de(b)}"
+
+
_UNIT_WORDS = [
(r'\bTB\b', 'Terabyte'),
(r'\bGB\b', 'Gigabyte'),
@@ -215,6 +255,22 @@ def clean_text_for_tts(text: str) -> str:
t = _re_tts.sub(r'^>\s*', '', t, flags=_re_tts.MULTILINE)
t = _re_tts.sub(r'^[\-\*]\s+', '', t, flags=_re_tts.MULTILINE)
+ # Zeitbereiche: "8:00-9:00 Uhr" / "8-9 Uhr" → "acht bis neun Uhr"
+ t = _re_tts.sub(r'\b(\d{1,2})(:\d{2})?\s*[-–]\s*(\d{1,2})(:\d{2})?\s*Uhr\b', _time_range_to_words, t)
+ # Uhrzeiten mit Minuten: "8:30 Uhr" → "acht Uhr dreissig", "8:00 Uhr" → "acht Uhr"
+ def _single_time(m):
+ h = int(m.group(1))
+ mn = int(m.group(2)) if m.group(2) else 0
+ words = _num_to_words_de(h) + " Uhr"
+ if mn > 0:
+ words += " " + _num_to_words_de(mn)
+ return words
+ t = _re_tts.sub(r'\b(\d{1,2}):(\d{2})\s*Uhr\b', _single_time, t)
+ # Volle Uhrzeiten ohne ":" — "15 Uhr" → "fuenfzehn Uhr"
+ t = _re_tts.sub(r'\b(\d{1,2})\s+Uhr\b', lambda m: f"{_num_to_words_de(int(m.group(1)))} Uhr", t)
+ # Kleine Zahlen-Bereiche ohne "Uhr": "5-6" → "fuenf bis sechs"
+ t = _re_tts.sub(r'\b(\d{1,2})\s*[-–]\s*(\d{1,2})\b', _small_range_to_words, t)
+
# Zahlen + Einheit: "22GB" → "22 Gigabyte" (Leerzeichen einfuegen)
t = _re_tts.sub(r'(\d+)([A-Za-z]{1,4})\b', r'\1 \2', t)
@@ -655,6 +711,8 @@ class ARIABridge:
# Zeitstempel des letzten chat:final — waehrend 3s danach werden
# trailing Agent-Events unterdrueckt (Core raeumt manchmal nach).
self._last_chat_final_at: float = 0.0
+ # requestId → messageId Map fuer XTTS-Audio-Cache (App-seitige Zuordnung)
+ self._xtts_request_to_message: dict[str, str] = {}
def initialize(self) -> None:
"""Initialisiert alle Komponenten.
@@ -998,6 +1056,9 @@ class ARIABridge:
# Eindeutige Message-ID fuer Audio-Cache-Zuordnung
message_id = str(uuid.uuid4())
+ # TTS-aufbereitete Variante fuer Debug (Diagnostic zeigt optional)
+ tts_text_preview = clean_text_for_tts(text)
+
# Antwort an die App weiterleiten (als Chat-Nachricht)
await self._send_to_rvs({
"type": "chat",
@@ -1006,6 +1067,8 @@ class ARIABridge:
"sender": "aria",
"voice": voice_name,
"messageId": message_id,
+ # Debug: aufbereiteter Text fuer TTS (App ignoriert, Diagnostic zeigt optional)
+ "ttsText": tts_text_preview if tts_text_preview != text else "",
},
"timestamp": int(asyncio.get_event_loop().time() * 1000),
})
@@ -1022,13 +1085,20 @@ class ARIABridge:
logger.info("[core] TTS-Text leer nach Cleanup — XTTS uebersprungen")
return
try:
+ xtts_request_id = str(uuid.uuid4())
+ # Map fuer xtts_response → App-Cache Zuordnung
+ self._xtts_request_to_message[xtts_request_id] = message_id
+ if len(self._xtts_request_to_message) > 100:
+ # Oldest entry raus damit der Dict nicht waechst
+ oldest = next(iter(self._xtts_request_to_message))
+ self._xtts_request_to_message.pop(oldest, None)
await self._send_to_rvs({
"type": "xtts_request",
"payload": {
"text": tts_text,
"voice": xtts_voice,
"language": "de",
- "requestId": str(uuid.uuid4()),
+ "requestId": xtts_request_id,
},
"timestamp": int(asyncio.get_event_loop().time() * 1000),
})
@@ -1230,6 +1300,10 @@ class ARIABridge:
# XTTS-Audio vom Gaming-PC empfangen → an App weiterleiten
audio_b64 = payload.get("base64", "")
error = payload.get("error", "")
+ req_id_full = payload.get("requestId", "")
+ # XTTS-Bridge suffixt chunkweise: "uuid_0", "uuid_1" → Basis-UUID extrahieren
+ req_id_base = req_id_full.rsplit("_", 1)[0] if "_" in req_id_full else req_id_full
+ linked_message_id = self._xtts_request_to_message.get(req_id_base, "")
if error:
logger.warning("[rvs] XTTS Fehler: %s", error)
return
@@ -1241,16 +1315,44 @@ class ARIABridge:
"base64": audio_b64,
"mimeType": payload.get("mimeType", "audio/wav"),
"voice": payload.get("voice", "xtts"),
+ "messageId": linked_message_id,
},
"timestamp": int(asyncio.get_event_loop().time() * 1000),
})
return
elif msg_type == "tts_request":
- # App fordert TTS-Audio fuer einen Text an (Play-Button)
+ # App fordert TTS-Audio fuer einen Text an (Play-Button).
+ # Nutze die aktuell konfigurierte Engine (Piper oder XTTS).
text = payload.get("text", "")
requested_voice = payload.get("voice", "")
- if text:
+ message_id = payload.get("messageId", "") # fuer Cache-Zuordnung
+ if not text:
+ return
+
+ tts_engine = getattr(self, 'tts_engine_type', 'piper')
+ tts_text = clean_text_for_tts(text) or text
+
+ if tts_engine == "xtts":
+ xtts_voice = getattr(self, 'xtts_voice', '')
+ try:
+ await self._send_to_rvs({
+ "type": "xtts_request",
+ "payload": {
+ "text": tts_text,
+ "voice": xtts_voice,
+ "language": "de",
+ "requestId": str(uuid.uuid4()),
+ "messageId": message_id,
+ },
+ "timestamp": int(asyncio.get_event_loop().time() * 1000),
+ })
+ logger.info("[rvs] TTS on-demand via XTTS: '%s'", tts_text[:60])
+ except Exception as e:
+ logger.warning("[rvs] XTTS-Request fehlgeschlagen, Fallback Piper: %s", e)
+ tts_engine = "piper"
+
+ if tts_engine == "piper":
voice_name = requested_voice or self.voice_engine.select_voice(text)
audio_data = self.voice_engine.synthesize(text, voice_name)
if audio_data:
@@ -1262,10 +1364,11 @@ class ARIABridge:
"base64": audio_b64,
"mimeType": "audio/wav",
"voice": voice_name,
+ "messageId": message_id,
},
"timestamp": int(asyncio.get_event_loop().time() * 1000),
})
- logger.info("[rvs] TTS on-demand: %d bytes (%s)", len(audio_data), voice_name)
+ logger.info("[rvs] TTS on-demand via Piper: %d bytes (%s)", len(audio_data), voice_name)
except Exception as e:
logger.warning("[rvs] TTS on-demand senden fehlgeschlagen: %s", e)
return
diff --git a/diagnostic/index.html b/diagnostic/index.html
index 9affa49..3e0f9ec 100644
--- a/diagnostic/index.html
+++ b/diagnostic/index.html
@@ -198,7 +198,13 @@
Chat Test
-
+
+
+
+
@@ -1272,14 +1278,55 @@
});
}
- function addChat(type, text, meta) {
+ // Debug-Toggle: TTS-aufbereitete Variante unter ARIA-Nachrichten einblenden
+ let showTtsDebug = localStorage.getItem('aria-show-tts-debug') === '1';
+ function toggleTtsDebug() {
+ showTtsDebug = !showTtsDebug;
+ localStorage.setItem('aria-show-tts-debug', showTtsDebug ? '1' : '0');
+ const el = document.getElementById('tts-debug-toggle');
+ if (el) el.checked = showTtsDebug;
+ }
+
+ // Minimal-JS-Port von clean_text_for_tts() (Bridge) — reine Anzeige
+ function previewTtsText(text) {
+ if (!text) return '';
+ //
...
+ const vm = text.match(/
([\s\S]*?)<\/voice>/i);
+ if (vm) text = vm[1];
+ let t = text;
+ t = t.replace(/```[\s\S]*?```/g, '. ');
+ t = t.replace(/`[^`]+`/g, '');
+ t = t.replace(/\*\*([^*]+)\*\*/g, '$1');
+ t = t.replace(/\*([^*]+)\*/g, '$1');
+ t = t.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1');
+ t = t.replace(/https?:\/\/\S+/g, 'ein Link');
+ t = t.replace(/^#{1,6}\s*/gm, '');
+ t = t.replace(/^>\s*/gm, '');
+ t = t.replace(/^[\-\*]\s+/gm, '');
+ t = t.replace(/(\d+)GB\b/g, '$1 Gigabyte');
+ t = t.replace(/(\d+)MB\b/g, '$1 Megabyte');
+ t = t.replace(/%/g, ' Prozent');
+ t = t.replace(/\bCPU\b/g, 'C P U').replace(/\bAPI\b/g, 'A P I').replace(/\bRAM\b/g, 'R A M');
+ t = t.replace(/\n{2,}/g, '. ').replace(/\n/g, ', ').replace(/\s{2,}/g, ' ');
+ return t.trim();
+ }
+
+ function addChat(type, text, meta, options) {
const escaped = escapeHtml(text);
let linked = linkifyText(escaped);
// /shared/uploads/ Pfade als Inline-Bilder anzeigen
linked = linked.replace(/\/shared\/uploads\/[^\s<"]+\.(jpg|jpeg|png|gif)/gi, (match) => {
return `${match}
`;
});
- const html = `${linked}${escapeHtml(meta)} — ${new Date().toLocaleTimeString('de-DE')}
`;
+ // Optional: TTS-Variante als zusaetzliches Block unter der Nachricht
+ let ttsBlock = '';
+ if (showTtsDebug && type === 'received') {
+ const ttsText = (options && options.ttsText) || previewTtsText(text);
+ if (ttsText && ttsText !== text) {
+ ttsBlock = `TTS: ${escapeHtml(ttsText)}
`;
+ }
+ }
+ const html = `${linked}${ttsBlock}${escapeHtml(meta)} — ${new Date().toLocaleTimeString('de-DE')}
`;
// Thinking-Indikator ausblenden bei neuer Nachricht
updateThinkingIndicator({ activity: 'idle' });
@@ -2129,6 +2176,10 @@
send({ action: 'get_openclaw_config' });
}
+ // Toggle-Checkbox initial korrekt setzen
+ const ttsToggleEl = document.getElementById('tts-debug-toggle');
+ if (ttsToggleEl) ttsToggleEl.checked = showTtsDebug;
+
connectWS();