feat: TTS-Zeitbereiche + Diagnostic-Debug-Toggle + Play-Button respektiert Engine
TTS-Cleanup erweitert: - Zeitbereiche: '8:00-9:00 Uhr' / '8-9 Uhr' → 'acht bis neun Uhr' - Uhrzeiten: '8:30 Uhr' → 'acht Uhr dreissig', '15 Uhr' → 'fuenfzehn Uhr' - Kleine Zahlen-Bereiche: '5-6' → 'fuenf bis sechs' (nur ≤24) - Zahlen 0-59 als deutsche Woerter (inkl. 'einundzwanzig', 'fuenfundvierzig') Diagnostic: TTS-Debug Einblenden - Checkbox 'TTS-Text einblenden' in der Chat-Test Kopfzeile - Unter ARIA-Nachrichten erscheint die aufbereitete Variante (blauer Border + Label 'TTS:') - Nur in Diagnostic, nicht in der App - LocalStorage persistiert den Toggle-Zustand - Minimaler JS-Port von clean_text_for_tts als Fallback Play-Button respektiert Engine: - Bridge: tts_request nutzt jetzt die aktive TTS-Engine (Piper/XTTS), Text wird durch clean_text_for_tts aufbereitet - messageId wird vom Play-Button mitgeschickt → Bridge verknuepft generiertes Audio mit der urspruenglichen Message - XTTS-Chunks: requestId → messageId Map (LRU 100 Eintraege), beim xtts_response wird die Basis-UUID extrahiert und die messageId dem audio-Frame angehaengt - App cached auch XTTS-Audio jetzt (letzter Satz pro Message — echte Chunk-Konkatenation bleibt TODO) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
1fb1fdef9e
commit
eb12281dfc
|
|
@ -636,7 +636,7 @@ const ChatScreen: React.FC = () => {
|
|||
{item.text}
|
||||
</Text>
|
||||
)}
|
||||
{/* Play-Button fuer ARIA-Nachrichten — Cache bevorzugt, sonst Regenerierung */}
|
||||
{/* Play-Button fuer ARIA-Nachrichten — Cache bevorzugt, sonst Bridge-TTS mit aktueller Engine */}
|
||||
{!isUser && item.text.length > 0 && (
|
||||
<TouchableOpacity
|
||||
style={styles.playButton}
|
||||
|
|
@ -644,11 +644,17 @@ const ChatScreen: React.FC = () => {
|
|||
if (item.audioPath) {
|
||||
audioService.playFromPath(item.audioPath);
|
||||
} else {
|
||||
rvs.send('tts_request' as any, { text: item.text, voice: '' });
|
||||
// messageId mitschicken damit die Bridge das generierte Audio
|
||||
// wieder mit der Nachricht verknuepft (fuer den naechsten Replay aus Cache)
|
||||
rvs.send('tts_request' as any, {
|
||||
text: item.text,
|
||||
voice: '',
|
||||
messageId: item.messageId || '',
|
||||
});
|
||||
}
|
||||
}}
|
||||
>
|
||||
<Text style={styles.playButtonText}>{item.audioPath ? '\uD83D\uDD0A' : '\uD83D\uDD0A'}</Text>
|
||||
<Text style={styles.playButtonText}>{'\uD83D\uDD0A'}</Text>
|
||||
</TouchableOpacity>
|
||||
)}
|
||||
<Text style={styles.timestamp}>{time}</Text>
|
||||
|
|
|
|||
|
|
@ -145,6 +145,46 @@ def load_config() -> dict[str, str]:
|
|||
|
||||
import re as _re_tts
|
||||
|
||||
_NUM_WORDS_DE = {
|
||||
0: "null", 1: "eins", 2: "zwei", 3: "drei", 4: "vier", 5: "fuenf",
|
||||
6: "sechs", 7: "sieben", 8: "acht", 9: "neun", 10: "zehn",
|
||||
11: "elf", 12: "zwoelf", 13: "dreizehn", 14: "vierzehn", 15: "fuenfzehn",
|
||||
16: "sechzehn", 17: "siebzehn", 18: "achtzehn", 19: "neunzehn", 20: "zwanzig",
|
||||
}
|
||||
_TENS_DE = {30: "dreissig", 40: "vierzig", 50: "fuenfzig"}
|
||||
|
||||
|
||||
def _num_to_words_de(n: int) -> str:
|
||||
"""Zahlen 0-59 als deutsches Wort — fuer Uhrzeiten und kleine Bereiche."""
|
||||
if n in _NUM_WORDS_DE:
|
||||
return _NUM_WORDS_DE[n]
|
||||
if 21 <= n <= 29:
|
||||
return f"{_NUM_WORDS_DE[n - 20]}undzwanzig"
|
||||
if 30 <= n <= 59:
|
||||
tens = (n // 10) * 10
|
||||
ones = n % 10
|
||||
tens_word = _TENS_DE.get(tens, str(tens))
|
||||
if ones == 0:
|
||||
return tens_word
|
||||
return f"{_NUM_WORDS_DE.get(ones, str(ones))}und{tens_word}"
|
||||
return str(n)
|
||||
|
||||
|
||||
def _time_range_to_words(m):
|
||||
"""'8:00-9:00 Uhr' → 'acht bis neun Uhr', '8-9 Uhr' → 'acht bis neun Uhr'."""
|
||||
h1 = int(m.group(1))
|
||||
h2 = int(m.group(3))
|
||||
return f"{_num_to_words_de(h1)} bis {_num_to_words_de(h2)} Uhr"
|
||||
|
||||
|
||||
def _small_range_to_words(m):
|
||||
"""'5-6' → 'fuenf bis sechs' (nur wenn beide Zahlen ≤ 24)."""
|
||||
a, b = int(m.group(1)), int(m.group(2))
|
||||
if a > 24 or b > 24 or a >= b:
|
||||
return m.group(0)
|
||||
return f"{_num_to_words_de(a)} bis {_num_to_words_de(b)}"
|
||||
|
||||
|
||||
_UNIT_WORDS = [
|
||||
(r'\bTB\b', 'Terabyte'),
|
||||
(r'\bGB\b', 'Gigabyte'),
|
||||
|
|
@ -215,6 +255,22 @@ def clean_text_for_tts(text: str) -> str:
|
|||
t = _re_tts.sub(r'^>\s*', '', t, flags=_re_tts.MULTILINE)
|
||||
t = _re_tts.sub(r'^[\-\*]\s+', '', t, flags=_re_tts.MULTILINE)
|
||||
|
||||
# Zeitbereiche: "8:00-9:00 Uhr" / "8-9 Uhr" → "acht bis neun Uhr"
|
||||
t = _re_tts.sub(r'\b(\d{1,2})(:\d{2})?\s*[-–]\s*(\d{1,2})(:\d{2})?\s*Uhr\b', _time_range_to_words, t)
|
||||
# Uhrzeiten mit Minuten: "8:30 Uhr" → "acht Uhr dreissig", "8:00 Uhr" → "acht Uhr"
|
||||
def _single_time(m):
|
||||
h = int(m.group(1))
|
||||
mn = int(m.group(2)) if m.group(2) else 0
|
||||
words = _num_to_words_de(h) + " Uhr"
|
||||
if mn > 0:
|
||||
words += " " + _num_to_words_de(mn)
|
||||
return words
|
||||
t = _re_tts.sub(r'\b(\d{1,2}):(\d{2})\s*Uhr\b', _single_time, t)
|
||||
# Volle Uhrzeiten ohne ":" — "15 Uhr" → "fuenfzehn Uhr"
|
||||
t = _re_tts.sub(r'\b(\d{1,2})\s+Uhr\b', lambda m: f"{_num_to_words_de(int(m.group(1)))} Uhr", t)
|
||||
# Kleine Zahlen-Bereiche ohne "Uhr": "5-6" → "fuenf bis sechs"
|
||||
t = _re_tts.sub(r'\b(\d{1,2})\s*[-–]\s*(\d{1,2})\b', _small_range_to_words, t)
|
||||
|
||||
# Zahlen + Einheit: "22GB" → "22 Gigabyte" (Leerzeichen einfuegen)
|
||||
t = _re_tts.sub(r'(\d+)([A-Za-z]{1,4})\b', r'\1 \2', t)
|
||||
|
||||
|
|
@ -655,6 +711,8 @@ class ARIABridge:
|
|||
# Zeitstempel des letzten chat:final — waehrend 3s danach werden
|
||||
# trailing Agent-Events unterdrueckt (Core raeumt manchmal nach).
|
||||
self._last_chat_final_at: float = 0.0
|
||||
# requestId → messageId Map fuer XTTS-Audio-Cache (App-seitige Zuordnung)
|
||||
self._xtts_request_to_message: dict[str, str] = {}
|
||||
|
||||
def initialize(self) -> None:
|
||||
"""Initialisiert alle Komponenten.
|
||||
|
|
@ -998,6 +1056,9 @@ class ARIABridge:
|
|||
# Eindeutige Message-ID fuer Audio-Cache-Zuordnung
|
||||
message_id = str(uuid.uuid4())
|
||||
|
||||
# TTS-aufbereitete Variante fuer Debug (Diagnostic zeigt optional)
|
||||
tts_text_preview = clean_text_for_tts(text)
|
||||
|
||||
# Antwort an die App weiterleiten (als Chat-Nachricht)
|
||||
await self._send_to_rvs({
|
||||
"type": "chat",
|
||||
|
|
@ -1006,6 +1067,8 @@ class ARIABridge:
|
|||
"sender": "aria",
|
||||
"voice": voice_name,
|
||||
"messageId": message_id,
|
||||
# Debug: aufbereiteter Text fuer TTS (App ignoriert, Diagnostic zeigt optional)
|
||||
"ttsText": tts_text_preview if tts_text_preview != text else "",
|
||||
},
|
||||
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
||||
})
|
||||
|
|
@ -1022,13 +1085,20 @@ class ARIABridge:
|
|||
logger.info("[core] TTS-Text leer nach Cleanup — XTTS uebersprungen")
|
||||
return
|
||||
try:
|
||||
xtts_request_id = str(uuid.uuid4())
|
||||
# Map fuer xtts_response → App-Cache Zuordnung
|
||||
self._xtts_request_to_message[xtts_request_id] = message_id
|
||||
if len(self._xtts_request_to_message) > 100:
|
||||
# Oldest entry raus damit der Dict nicht waechst
|
||||
oldest = next(iter(self._xtts_request_to_message))
|
||||
self._xtts_request_to_message.pop(oldest, None)
|
||||
await self._send_to_rvs({
|
||||
"type": "xtts_request",
|
||||
"payload": {
|
||||
"text": tts_text,
|
||||
"voice": xtts_voice,
|
||||
"language": "de",
|
||||
"requestId": str(uuid.uuid4()),
|
||||
"requestId": xtts_request_id,
|
||||
},
|
||||
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
||||
})
|
||||
|
|
@ -1230,6 +1300,10 @@ class ARIABridge:
|
|||
# XTTS-Audio vom Gaming-PC empfangen → an App weiterleiten
|
||||
audio_b64 = payload.get("base64", "")
|
||||
error = payload.get("error", "")
|
||||
req_id_full = payload.get("requestId", "")
|
||||
# XTTS-Bridge suffixt chunkweise: "uuid_0", "uuid_1" → Basis-UUID extrahieren
|
||||
req_id_base = req_id_full.rsplit("_", 1)[0] if "_" in req_id_full else req_id_full
|
||||
linked_message_id = self._xtts_request_to_message.get(req_id_base, "")
|
||||
if error:
|
||||
logger.warning("[rvs] XTTS Fehler: %s", error)
|
||||
return
|
||||
|
|
@ -1241,16 +1315,44 @@ class ARIABridge:
|
|||
"base64": audio_b64,
|
||||
"mimeType": payload.get("mimeType", "audio/wav"),
|
||||
"voice": payload.get("voice", "xtts"),
|
||||
"messageId": linked_message_id,
|
||||
},
|
||||
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
||||
})
|
||||
return
|
||||
|
||||
elif msg_type == "tts_request":
|
||||
# App fordert TTS-Audio fuer einen Text an (Play-Button)
|
||||
# App fordert TTS-Audio fuer einen Text an (Play-Button).
|
||||
# Nutze die aktuell konfigurierte Engine (Piper oder XTTS).
|
||||
text = payload.get("text", "")
|
||||
requested_voice = payload.get("voice", "")
|
||||
if text:
|
||||
message_id = payload.get("messageId", "") # fuer Cache-Zuordnung
|
||||
if not text:
|
||||
return
|
||||
|
||||
tts_engine = getattr(self, 'tts_engine_type', 'piper')
|
||||
tts_text = clean_text_for_tts(text) or text
|
||||
|
||||
if tts_engine == "xtts":
|
||||
xtts_voice = getattr(self, 'xtts_voice', '')
|
||||
try:
|
||||
await self._send_to_rvs({
|
||||
"type": "xtts_request",
|
||||
"payload": {
|
||||
"text": tts_text,
|
||||
"voice": xtts_voice,
|
||||
"language": "de",
|
||||
"requestId": str(uuid.uuid4()),
|
||||
"messageId": message_id,
|
||||
},
|
||||
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
||||
})
|
||||
logger.info("[rvs] TTS on-demand via XTTS: '%s'", tts_text[:60])
|
||||
except Exception as e:
|
||||
logger.warning("[rvs] XTTS-Request fehlgeschlagen, Fallback Piper: %s", e)
|
||||
tts_engine = "piper"
|
||||
|
||||
if tts_engine == "piper":
|
||||
voice_name = requested_voice or self.voice_engine.select_voice(text)
|
||||
audio_data = self.voice_engine.synthesize(text, voice_name)
|
||||
if audio_data:
|
||||
|
|
@ -1262,10 +1364,11 @@ class ARIABridge:
|
|||
"base64": audio_b64,
|
||||
"mimeType": "audio/wav",
|
||||
"voice": voice_name,
|
||||
"messageId": message_id,
|
||||
},
|
||||
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
||||
})
|
||||
logger.info("[rvs] TTS on-demand: %d bytes (%s)", len(audio_data), voice_name)
|
||||
logger.info("[rvs] TTS on-demand via Piper: %d bytes (%s)", len(audio_data), voice_name)
|
||||
except Exception as e:
|
||||
logger.warning("[rvs] TTS on-demand senden fehlgeschlagen: %s", e)
|
||||
return
|
||||
|
|
|
|||
|
|
@ -198,7 +198,13 @@
|
|||
<div class="card full">
|
||||
<div style="display:flex;align-items:center;justify-content:space-between;margin-bottom:8px;">
|
||||
<h2 style="margin:0;">Chat Test</h2>
|
||||
<button class="btn secondary" onclick="toggleChatFullscreen()" id="btn-chat-fs" style="padding:4px 10px;font-size:11px;">Vollbild</button>
|
||||
<div style="display:flex;align-items:center;gap:12px;">
|
||||
<label style="color:#8888AA;font-size:11px;cursor:pointer;">
|
||||
<input type="checkbox" id="tts-debug-toggle" onchange="toggleTtsDebug()" style="margin-right:4px;vertical-align:middle;">
|
||||
TTS-Text einblenden
|
||||
</label>
|
||||
<button class="btn secondary" onclick="toggleChatFullscreen()" id="btn-chat-fs" style="padding:4px 10px;font-size:11px;">Vollbild</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="chat-box" id="chat-box"></div>
|
||||
<div id="thinking-indicator" style="display:none;padding:6px 10px;font-size:12px;color:#FFD60A;background:#1E1E2E;border-radius:0 0 6px 6px;margin-top:-8px;margin-bottom:8px;align-items:center;justify-content:space-between;">
|
||||
|
|
@ -1272,14 +1278,55 @@
|
|||
});
|
||||
}
|
||||
|
||||
function addChat(type, text, meta) {
|
||||
// Debug-Toggle: TTS-aufbereitete Variante unter ARIA-Nachrichten einblenden
|
||||
let showTtsDebug = localStorage.getItem('aria-show-tts-debug') === '1';
|
||||
function toggleTtsDebug() {
|
||||
showTtsDebug = !showTtsDebug;
|
||||
localStorage.setItem('aria-show-tts-debug', showTtsDebug ? '1' : '0');
|
||||
const el = document.getElementById('tts-debug-toggle');
|
||||
if (el) el.checked = showTtsDebug;
|
||||
}
|
||||
|
||||
// Minimal-JS-Port von clean_text_for_tts() (Bridge) — reine Anzeige
|
||||
function previewTtsText(text) {
|
||||
if (!text) return '';
|
||||
// <voice>...</voice>
|
||||
const vm = text.match(/<voice>([\s\S]*?)<\/voice>/i);
|
||||
if (vm) text = vm[1];
|
||||
let t = text;
|
||||
t = t.replace(/```[\s\S]*?```/g, '. ');
|
||||
t = t.replace(/`[^`]+`/g, '');
|
||||
t = t.replace(/\*\*([^*]+)\*\*/g, '$1');
|
||||
t = t.replace(/\*([^*]+)\*/g, '$1');
|
||||
t = t.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1');
|
||||
t = t.replace(/https?:\/\/\S+/g, 'ein Link');
|
||||
t = t.replace(/^#{1,6}\s*/gm, '');
|
||||
t = t.replace(/^>\s*/gm, '');
|
||||
t = t.replace(/^[\-\*]\s+/gm, '');
|
||||
t = t.replace(/(\d+)GB\b/g, '$1 Gigabyte');
|
||||
t = t.replace(/(\d+)MB\b/g, '$1 Megabyte');
|
||||
t = t.replace(/%/g, ' Prozent');
|
||||
t = t.replace(/\bCPU\b/g, 'C P U').replace(/\bAPI\b/g, 'A P I').replace(/\bRAM\b/g, 'R A M');
|
||||
t = t.replace(/\n{2,}/g, '. ').replace(/\n/g, ', ').replace(/\s{2,}/g, ' ');
|
||||
return t.trim();
|
||||
}
|
||||
|
||||
function addChat(type, text, meta, options) {
|
||||
const escaped = escapeHtml(text);
|
||||
let linked = linkifyText(escaped);
|
||||
// /shared/uploads/ Pfade als Inline-Bilder anzeigen
|
||||
linked = linked.replace(/\/shared\/uploads\/[^\s<"]+\.(jpg|jpeg|png|gif)/gi, (match) => {
|
||||
return `<a href="${match}" target="_blank">${match}</a><img src="${match}" class="chat-media" onclick="openLightbox('image','${match}')" onerror="this.style.display='none'">`;
|
||||
});
|
||||
const html = `${linked}<div class="meta">${escapeHtml(meta)} — ${new Date().toLocaleTimeString('de-DE')}</div>`;
|
||||
// Optional: TTS-Variante als zusaetzliches Block unter der Nachricht
|
||||
let ttsBlock = '';
|
||||
if (showTtsDebug && type === 'received') {
|
||||
const ttsText = (options && options.ttsText) || previewTtsText(text);
|
||||
if (ttsText && ttsText !== text) {
|
||||
ttsBlock = `<div style="margin-top:6px;padding:4px 8px;background:rgba(0,150,255,0.08);border-left:2px solid #0096FF;font-size:11px;color:#88AACC;"><span style="color:#0096FF;font-weight:bold;">TTS:</span> ${escapeHtml(ttsText)}</div>`;
|
||||
}
|
||||
}
|
||||
const html = `${linked}${ttsBlock}<div class="meta">${escapeHtml(meta)} — ${new Date().toLocaleTimeString('de-DE')}</div>`;
|
||||
|
||||
// Thinking-Indikator ausblenden bei neuer Nachricht
|
||||
updateThinkingIndicator({ activity: 'idle' });
|
||||
|
|
@ -2129,6 +2176,10 @@
|
|||
send({ action: 'get_openclaw_config' });
|
||||
}
|
||||
|
||||
// Toggle-Checkbox initial korrekt setzen
|
||||
const ttsToggleEl = document.getElementById('tts-debug-toggle');
|
||||
if (ttsToggleEl) ttsToggleEl.checked = showTtsDebug;
|
||||
|
||||
connectWS();
|
||||
</script>
|
||||
</body>
|
||||
|
|
|
|||
Loading…
Reference in New Issue