feat: Piper komplett entfernt — nur noch XTTS v2 als TTS
Breaking Change: wenn XTTS-Bridge (Gaming-PC) offline ist, bleibt ARIA stumm. Chat-Antworten kommen weiter an, aber kein Audio. Das ist bewusst akzeptiert — XTTS klingt einfach grauenhaft viel besser. Bridge (aria_bridge.py): - from piper import ... raus - VoiceEngine-Klasse komplett entfernt (synthesize, speak, select_voice) - EPIC_TRIGGERS + load_epic_triggers raus (Highlight-Voice-Feature ohne Piper sinnlos) - self.voice_engine, voice_name, requested_voice Aufrufe weg - _process_core_response: immer XTTS, kein Fallback - tts_request Handler: immer XTTS - config Handler: nur ttsEnabled + xttsVoice + whisperModel - import wave raus bridge/requirements.txt: piper-tts raus bridge/Dockerfile: Kommentar aktualisiert docker-compose.yml: ./aria-data/voices Mount raus aria-data/config/aria.env.example: PIPER_RAMONA/PIPER_THORSTEN raus get-voices.sh: komplett geloescht (war nur Piper-Downloader) Diagnostic UI (index.html): - Piper Panel (Standard-Stimme / Highlight-Stimme / Speed-Sliders) weg - TTS Engine Dropdown weg (immer XTTS) - TTS Diagnose Tab zeigt nur noch XTTS-Status + Test-Button - sendVoiceConfig sendet nur noch ttsEnabled/xttsVoice/whisperModel - toggleXTTSPanel als no-op Legacy-Stub (JS-Calls bleiben safe) Diagnostic Server (server.js): - handleSendVoiceConfig: nur noch ttsEnabled + xttsVoice + whisperModel - handleTestTTS: via xtts_request (nicht mehr Piper subprocess) - handleCheckTTS: via xtts_list_voices ueber RVS - handleGetVoiceConfig/Defaults bereinigt - Highlight-Trigger UI bleibt, wird aber von Bridge nicht mehr ausgewertet (dead-code im UI, spaeter ggf. fuer XTTS-Voice-Switch) README + issue.md aktualisiert. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
+1
-1
@@ -1,6 +1,6 @@
|
||||
# ════════════════════════════════════════════════
|
||||
# ARIA Voice Bridge — Dockerfile
|
||||
# Whisper STT + Piper TTS + Wake-Word
|
||||
# Whisper STT + Wake-Word (TTS via XTTS v2 remote)
|
||||
# ════════════════════════════════════════════════
|
||||
|
||||
FROM python:3.12-slim
|
||||
|
||||
+56
-369
@@ -26,7 +26,6 @@ import ssl
|
||||
import sys
|
||||
import tempfile
|
||||
import uuid
|
||||
import wave
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
@@ -37,8 +36,6 @@ import sounddevice as sd
|
||||
import websockets
|
||||
from faster_whisper import WhisperModel
|
||||
from openwakeword.model import Model as WakeWordModel
|
||||
from piper import PiperVoice
|
||||
from piper.config import SynthesisConfig
|
||||
|
||||
from modes import Mode, detect_mode_switch, should_speak
|
||||
|
||||
@@ -72,38 +69,6 @@ CHANNELS = 1
|
||||
BLOCK_SIZE = 1280 # 80ms bei 16kHz — gut fuer Wake-Word-Erkennung
|
||||
RECORD_SECONDS = 8 # Max. Aufnahmedauer nach Wake-Word
|
||||
|
||||
# Epische Trigger — bei diesen Woertern spricht Thorsten
|
||||
EPIC_TRIGGERS_DEFAULT = [
|
||||
"deploy",
|
||||
"erfolgreich",
|
||||
"alarm",
|
||||
"so soll es sein",
|
||||
"kritisch",
|
||||
"server down",
|
||||
"sicherheitswarnung",
|
||||
"ticket geloest",
|
||||
"aufgabe abgeschlossen",
|
||||
]
|
||||
|
||||
# Trigger aus Shared-Config laden (von Diagnostic gespeichert)
|
||||
TRIGGERS_FILE = "/shared/config/highlight_triggers.json"
|
||||
|
||||
def load_epic_triggers():
|
||||
"""Laedt Highlight-Trigger aus Shared-Config oder nutzt Defaults."""
|
||||
try:
|
||||
if os.path.exists(TRIGGERS_FILE):
|
||||
with open(TRIGGERS_FILE) as f:
|
||||
triggers = json.load(f)
|
||||
if isinstance(triggers, list) and len(triggers) > 0:
|
||||
logger.info("Highlight-Trigger geladen: %d aus %s", len(triggers), TRIGGERS_FILE)
|
||||
return triggers
|
||||
except Exception as e:
|
||||
logger.warning("Highlight-Trigger laden fehlgeschlagen: %s — nutze Defaults", e)
|
||||
return EPIC_TRIGGERS_DEFAULT
|
||||
|
||||
EPIC_TRIGGERS = load_epic_triggers()
|
||||
|
||||
|
||||
def load_config() -> dict[str, str]:
|
||||
"""Laedt Konfiguration.
|
||||
|
||||
@@ -290,179 +255,6 @@ def clean_text_for_tts(text: str) -> str:
|
||||
return t.strip()
|
||||
|
||||
|
||||
class VoiceEngine:
|
||||
"""Verwaltet Piper TTS mit zwei Stimmen: Ramona und Thorsten."""
|
||||
|
||||
def __init__(self, voices_dir: Path) -> None:
|
||||
self.voices_dir = voices_dir
|
||||
self.voices: dict[str, PiperVoice] = {}
|
||||
self.default_voice = "ramona"
|
||||
self.highlight_voice = "thorsten"
|
||||
self.speech_speed = {"ramona": 1.0, "thorsten": 1.0}
|
||||
|
||||
def initialize(self) -> None:
|
||||
"""Laedt die Piper-Stimmen aus dem Voices-Verzeichnis."""
|
||||
voice_configs = {
|
||||
"ramona": "de_DE-ramona-low",
|
||||
"thorsten": "de_DE-thorsten-high",
|
||||
}
|
||||
|
||||
for name, model_name in voice_configs.items():
|
||||
model_path = self.voices_dir / f"{model_name}.onnx"
|
||||
config_path = self.voices_dir / f"{model_name}.onnx.json"
|
||||
|
||||
if not model_path.exists():
|
||||
logger.error("Stimme nicht gefunden: %s", model_path)
|
||||
continue
|
||||
|
||||
self.voices[name] = PiperVoice.load(
|
||||
str(model_path),
|
||||
config_path=str(config_path) if config_path.exists() else None,
|
||||
)
|
||||
logger.info("Stimme geladen: %s (%s)", name, model_name)
|
||||
|
||||
if not self.voices:
|
||||
logger.error("Keine Stimmen geladen — TTS deaktiviert")
|
||||
|
||||
def select_voice(
|
||||
self, text: str, requested_voice: Optional[str] = None
|
||||
) -> str:
|
||||
"""Waehlt die passende Stimme basierend auf Text oder Anfrage.
|
||||
|
||||
Thorsten wird bei epischen Triggern verwendet,
|
||||
sonst Ramona als Standardstimme.
|
||||
|
||||
Args:
|
||||
text: Der zu sprechende Text (fuer Epic-Trigger-Erkennung).
|
||||
requested_voice: Explizit angeforderte Stimme ("ramona" | "thorsten").
|
||||
|
||||
Returns:
|
||||
Name der gewaehlten Stimme.
|
||||
"""
|
||||
if requested_voice and requested_voice in self.voices:
|
||||
return requested_voice
|
||||
|
||||
# Highlight-Trigger pruefen
|
||||
text_lower = text.lower()
|
||||
for trigger in EPIC_TRIGGERS:
|
||||
if trigger in text_lower:
|
||||
logger.info("Highlight-Trigger erkannt: '%s' — %s spricht", trigger, self.highlight_voice)
|
||||
return self.highlight_voice
|
||||
|
||||
return self.default_voice
|
||||
|
||||
def synthesize(self, text: str, voice_name: str = "ramona") -> Optional[bytes]:
|
||||
"""Erzeugt Audio-Daten aus Text mit der gewaehlten Stimme.
|
||||
|
||||
Args:
|
||||
text: Der zu sprechende Text.
|
||||
voice_name: Name der Stimme ("ramona" oder "thorsten").
|
||||
|
||||
Returns:
|
||||
WAV-Audiodaten als bytes oder None bei Fehler.
|
||||
"""
|
||||
voice = self.voices.get(voice_name)
|
||||
if voice is None:
|
||||
logger.error("Stimme '%s' nicht verfuegbar", voice_name)
|
||||
return None
|
||||
|
||||
try:
|
||||
# Zentraler TTS-Cleanup (Markdown, Code, Einheiten, URLs)
|
||||
import re
|
||||
clean = clean_text_for_tts(text)
|
||||
sentences = re.split(r'(?<=[.!?])\s+', clean)
|
||||
sentences = [s.strip() for s in sentences if s.strip()]
|
||||
|
||||
if not sentences:
|
||||
return None
|
||||
|
||||
# Jeden Satz einzeln synthetisieren und WAVs zusammenfuegen
|
||||
all_audio = b""
|
||||
sample_rate = None
|
||||
for sentence in sentences:
|
||||
if not sentence:
|
||||
continue
|
||||
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
|
||||
tmp_path = tmp.name
|
||||
speed = self.speech_speed.get(voice_name, 1.0)
|
||||
syn_config = SynthesisConfig(length_scale=1.0 / max(0.3, speed))
|
||||
with wave.open(tmp_path, "wb") as wav_file:
|
||||
voice.synthesize_wav(sentence, wav_file, syn_config=syn_config)
|
||||
with wave.open(tmp_path, "rb") as wav_file:
|
||||
if sample_rate is None:
|
||||
sample_rate = wav_file.getframerate()
|
||||
all_audio += wav_file.readframes(wav_file.getnframes())
|
||||
Path(tmp_path).unlink(missing_ok=True)
|
||||
|
||||
# Zusammengefuegtes WAV erstellen
|
||||
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
|
||||
final_path = tmp.name
|
||||
with wave.open(final_path, "wb") as wav_file:
|
||||
wav_file.setnchannels(1)
|
||||
wav_file.setsampwidth(2)
|
||||
wav_file.setframerate(sample_rate or 22050)
|
||||
wav_file.writeframes(all_audio)
|
||||
|
||||
audio_data = Path(final_path).read_bytes()
|
||||
Path(final_path).unlink(missing_ok=True)
|
||||
|
||||
logger.info(
|
||||
"TTS: %d bytes erzeugt mit %s (%d Saetze) — '%s'",
|
||||
len(audio_data),
|
||||
voice_name,
|
||||
len(sentences),
|
||||
text[:60],
|
||||
)
|
||||
return audio_data
|
||||
|
||||
except Exception:
|
||||
logger.exception("TTS-Fehler bei Stimme '%s'", voice_name)
|
||||
return None
|
||||
|
||||
def speak(self, text: str, requested_voice: Optional[str] = None) -> None:
|
||||
"""Spricht den Text ueber das Audio-Geraet.
|
||||
|
||||
Waehlt automatisch die passende Stimme und gibt das Audio aus.
|
||||
|
||||
Args:
|
||||
text: Der zu sprechende Text.
|
||||
requested_voice: Optionale explizite Stimmenwahl.
|
||||
"""
|
||||
voice_name = self.select_voice(text, requested_voice)
|
||||
audio_data = self.synthesize(text, voice_name)
|
||||
|
||||
if audio_data is None:
|
||||
return
|
||||
|
||||
try:
|
||||
# WAV-Daten lesen und ueber sounddevice abspielen
|
||||
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
|
||||
tmp.write(audio_data)
|
||||
tmp_path = tmp.name
|
||||
|
||||
with wave.open(tmp_path, "rb") as wf:
|
||||
frames = wf.readframes(wf.getnframes())
|
||||
sample_width = wf.getsampwidth()
|
||||
rate = wf.getframerate()
|
||||
channels = wf.getnchannels()
|
||||
|
||||
Path(tmp_path).unlink(missing_ok=True)
|
||||
|
||||
# Numpy-Array aus PCM-Daten
|
||||
dtype_map = {1: np.int8, 2: np.int16, 4: np.int32}
|
||||
dtype = dtype_map.get(sample_width, np.int16)
|
||||
audio_array = np.frombuffer(frames, dtype=dtype)
|
||||
|
||||
if channels > 1:
|
||||
audio_array = audio_array.reshape(-1, channels)
|
||||
|
||||
sd.play(audio_array, samplerate=rate)
|
||||
sd.wait() # Warten bis Wiedergabe fertig
|
||||
|
||||
except Exception:
|
||||
logger.exception("Audio-Wiedergabe fehlgeschlagen")
|
||||
|
||||
|
||||
# ── STT Engine ───────────────────────────────────────────────
|
||||
|
||||
|
||||
@@ -672,9 +464,9 @@ class ARIABridge:
|
||||
self.current_mode = Mode.NORMAL
|
||||
self.running = False
|
||||
|
||||
# Komponenten
|
||||
self.voice_engine = VoiceEngine(VOICES_DIR)
|
||||
# Komponenten (TTS: immer XTTS remote, Piper wurde entfernt)
|
||||
self.tts_enabled = True
|
||||
self.xtts_voice = ""
|
||||
vc: dict = {}
|
||||
# Gespeicherte Voice-Config laden
|
||||
try:
|
||||
@@ -682,16 +474,9 @@ class ARIABridge:
|
||||
if os.path.exists(vc_path):
|
||||
with open(vc_path) as f:
|
||||
vc = json.load(f)
|
||||
self.voice_engine.default_voice = vc.get("defaultVoice", "ramona")
|
||||
self.voice_engine.highlight_voice = vc.get("highlightVoice", "thorsten")
|
||||
self.voice_engine.speech_speed = {
|
||||
"ramona": vc.get("speedRamona", 1.0),
|
||||
"thorsten": vc.get("speedThorsten", 1.0),
|
||||
}
|
||||
self.tts_enabled = vc.get("ttsEnabled", True)
|
||||
self.tts_engine_type = vc.get("ttsEngine", "piper")
|
||||
self.xtts_voice = vc.get("xttsVoice", "")
|
||||
logger.info("Voice-Config geladen: %s", vc)
|
||||
logger.info("Voice-Config geladen: tts=%s voice=%s", self.tts_enabled, self.xtts_voice or "default")
|
||||
except Exception as e:
|
||||
logger.warning("Voice-Config laden fehlgeschlagen: %s", e)
|
||||
# Whisper-Modell: Config hat Vorrang, dann env/Default (medium)
|
||||
@@ -725,9 +510,6 @@ class ARIABridge:
|
||||
logger.info("ARIA Voice Bridge startet...")
|
||||
logger.info("=" * 50)
|
||||
|
||||
# Voice-Engine IMMER laden — rendert Audio fuer die App (auch ohne Soundkarte)
|
||||
self.voice_engine.initialize()
|
||||
|
||||
# STT IMMER laden — verarbeitet Audio von der App (braucht kein Sounddevice)
|
||||
self.stt_engine.initialize()
|
||||
|
||||
@@ -1050,9 +832,6 @@ class ARIABridge:
|
||||
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
||||
})
|
||||
|
||||
# Stimme auswaehlen
|
||||
voice_name = requested_voice or self.voice_engine.select_voice(text)
|
||||
|
||||
# Eindeutige Message-ID fuer Audio-Cache-Zuordnung
|
||||
message_id = str(uuid.uuid4())
|
||||
|
||||
@@ -1065,7 +844,6 @@ class ARIABridge:
|
||||
"payload": {
|
||||
"text": text,
|
||||
"sender": "aria",
|
||||
"voice": voice_name,
|
||||
"messageId": message_id,
|
||||
# Debug: aufbereiteter Text fuer TTS (App ignoriert, Diagnostic zeigt optional)
|
||||
"ttsText": tts_text_preview if tts_text_preview != text else "",
|
||||
@@ -1073,69 +851,37 @@ class ARIABridge:
|
||||
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
||||
})
|
||||
|
||||
# TTS-Audio rendern und an die App senden (wenn Modus es erlaubt)
|
||||
if getattr(self, 'tts_enabled', True) and should_speak(self.current_mode, is_critical):
|
||||
tts_engine = getattr(self, 'tts_engine_type', 'piper')
|
||||
|
||||
if tts_engine == "xtts":
|
||||
# XTTS: aufbereiteter Text (Code-Bloecke raus, Einheiten ausgeschrieben)
|
||||
xtts_voice = getattr(self, 'xtts_voice', '')
|
||||
tts_text = clean_text_for_tts(text)
|
||||
if not tts_text:
|
||||
logger.info("[core] TTS-Text leer nach Cleanup — XTTS uebersprungen")
|
||||
return
|
||||
try:
|
||||
xtts_request_id = str(uuid.uuid4())
|
||||
# Map fuer xtts_response → App-Cache Zuordnung
|
||||
self._xtts_request_to_message[xtts_request_id] = message_id
|
||||
if len(self._xtts_request_to_message) > 100:
|
||||
# Oldest entry raus damit der Dict nicht waechst
|
||||
oldest = next(iter(self._xtts_request_to_message))
|
||||
self._xtts_request_to_message.pop(oldest, None)
|
||||
await self._send_to_rvs({
|
||||
"type": "xtts_request",
|
||||
"payload": {
|
||||
"text": tts_text,
|
||||
"voice": xtts_voice,
|
||||
"language": "de",
|
||||
"requestId": xtts_request_id,
|
||||
},
|
||||
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
||||
})
|
||||
logger.info("[core] XTTS-Request gesendet (%s): '%s'", xtts_voice or "default", tts_text[:60])
|
||||
except Exception as e:
|
||||
logger.warning("[core] XTTS-Request fehlgeschlagen: %s — Fallback auf Piper", e)
|
||||
# Fallback auf Piper
|
||||
audio_data = self.voice_engine.synthesize(text, voice_name)
|
||||
if audio_data:
|
||||
audio_b64 = base64.b64encode(audio_data).decode("ascii")
|
||||
await self._send_to_rvs({
|
||||
"type": "audio",
|
||||
"payload": {"base64": audio_b64, "mimeType": "audio/wav", "voice": voice_name, "messageId": message_id},
|
||||
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
||||
})
|
||||
else:
|
||||
# Piper: Lokal rendern
|
||||
audio_data = self.voice_engine.synthesize(text, voice_name)
|
||||
if audio_data:
|
||||
audio_b64 = base64.b64encode(audio_data).decode("ascii")
|
||||
await self._send_to_rvs({
|
||||
"type": "audio",
|
||||
"payload": {
|
||||
"base64": audio_b64,
|
||||
"mimeType": "audio/wav",
|
||||
"voice": voice_name,
|
||||
"messageId": message_id,
|
||||
},
|
||||
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
||||
})
|
||||
logger.info("[core] TTS-Audio gesendet: %d bytes (%s)", len(audio_data), voice_name)
|
||||
|
||||
# Lokal abspielen (nur wenn Soundkarte vorhanden)
|
||||
if self.audio_available:
|
||||
self.voice_engine.speak(text, requested_voice)
|
||||
else:
|
||||
# TTS ueber XTTS (XTTS-Bridge auf Gaming-PC)
|
||||
if not (getattr(self, 'tts_enabled', True) and should_speak(self.current_mode, is_critical)):
|
||||
logger.info("[core] TTS unterdrueckt (Modus: %s)", self.current_mode.config.name)
|
||||
return
|
||||
|
||||
xtts_voice = getattr(self, 'xtts_voice', '')
|
||||
tts_text = tts_text_preview or text
|
||||
if not tts_text:
|
||||
logger.info("[core] TTS-Text leer nach Cleanup — uebersprungen")
|
||||
return
|
||||
try:
|
||||
xtts_request_id = str(uuid.uuid4())
|
||||
# Map fuer audio_pcm/xtts_response → App-Cache Zuordnung
|
||||
self._xtts_request_to_message[xtts_request_id] = message_id
|
||||
if len(self._xtts_request_to_message) > 100:
|
||||
oldest = next(iter(self._xtts_request_to_message))
|
||||
self._xtts_request_to_message.pop(oldest, None)
|
||||
await self._send_to_rvs({
|
||||
"type": "xtts_request",
|
||||
"payload": {
|
||||
"text": tts_text,
|
||||
"voice": xtts_voice,
|
||||
"language": "de",
|
||||
"requestId": xtts_request_id,
|
||||
"messageId": message_id,
|
||||
},
|
||||
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
||||
})
|
||||
logger.info("[core] XTTS-Request gesendet (%s): '%s'", xtts_voice or "default", tts_text[:60])
|
||||
except Exception as e:
|
||||
logger.error("[core] XTTS-Request fehlgeschlagen: %s — kein Audio", e)
|
||||
|
||||
def _fetch_active_session(self) -> None:
|
||||
"""Holt die aktive Session vom Diagnostic-Endpoint."""
|
||||
@@ -1344,113 +1090,58 @@ class ARIABridge:
|
||||
return
|
||||
|
||||
elif msg_type == "tts_request":
|
||||
# App fordert TTS-Audio fuer einen Text an (Play-Button).
|
||||
# Nutze die aktuell konfigurierte Engine (Piper oder XTTS).
|
||||
# App fordert TTS-Audio fuer einen Text an (Play-Button) → immer XTTS.
|
||||
text = payload.get("text", "")
|
||||
requested_voice = payload.get("voice", "")
|
||||
message_id = payload.get("messageId", "") # fuer Cache-Zuordnung
|
||||
message_id = payload.get("messageId", "")
|
||||
if not text:
|
||||
return
|
||||
|
||||
tts_engine = getattr(self, 'tts_engine_type', 'piper')
|
||||
tts_text = clean_text_for_tts(text) or text
|
||||
|
||||
if tts_engine == "xtts":
|
||||
xtts_voice = getattr(self, 'xtts_voice', '')
|
||||
try:
|
||||
await self._send_to_rvs({
|
||||
"type": "xtts_request",
|
||||
"payload": {
|
||||
"text": tts_text,
|
||||
"voice": xtts_voice,
|
||||
"language": "de",
|
||||
"requestId": str(uuid.uuid4()),
|
||||
"messageId": message_id,
|
||||
},
|
||||
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
||||
})
|
||||
logger.info("[rvs] TTS on-demand via XTTS: '%s'", tts_text[:60])
|
||||
except Exception as e:
|
||||
logger.warning("[rvs] XTTS-Request fehlgeschlagen, Fallback Piper: %s", e)
|
||||
tts_engine = "piper"
|
||||
|
||||
if tts_engine == "piper":
|
||||
voice_name = requested_voice or self.voice_engine.select_voice(text)
|
||||
audio_data = self.voice_engine.synthesize(text, voice_name)
|
||||
if audio_data:
|
||||
audio_b64 = base64.b64encode(audio_data).decode("ascii")
|
||||
try:
|
||||
await self._send_to_rvs({
|
||||
"type": "audio",
|
||||
"payload": {
|
||||
"base64": audio_b64,
|
||||
"mimeType": "audio/wav",
|
||||
"voice": voice_name,
|
||||
"messageId": message_id,
|
||||
},
|
||||
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
||||
})
|
||||
logger.info("[rvs] TTS on-demand via Piper: %d bytes (%s)", len(audio_data), voice_name)
|
||||
except Exception as e:
|
||||
logger.warning("[rvs] TTS on-demand senden fehlgeschlagen: %s", e)
|
||||
xtts_voice = getattr(self, 'xtts_voice', '')
|
||||
try:
|
||||
xtts_request_id = str(uuid.uuid4())
|
||||
if message_id:
|
||||
self._xtts_request_to_message[xtts_request_id] = message_id
|
||||
await self._send_to_rvs({
|
||||
"type": "xtts_request",
|
||||
"payload": {
|
||||
"text": tts_text,
|
||||
"voice": xtts_voice,
|
||||
"language": "de",
|
||||
"requestId": xtts_request_id,
|
||||
"messageId": message_id,
|
||||
},
|
||||
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
||||
})
|
||||
logger.info("[rvs] TTS on-demand via XTTS: '%s'", tts_text[:60])
|
||||
except Exception as e:
|
||||
logger.warning("[rvs] TTS on-demand fehlgeschlagen: %s", e)
|
||||
return
|
||||
|
||||
elif msg_type == "config":
|
||||
# Konfiguration von App/Diagnostic empfangen + persistent speichern
|
||||
changed = False
|
||||
if "defaultVoice" in payload:
|
||||
new_voice = payload["defaultVoice"]
|
||||
if new_voice in self.voice_engine.voices:
|
||||
self.voice_engine.default_voice = new_voice
|
||||
logger.info("[rvs] Standard-Stimme gewechselt: %s", new_voice)
|
||||
changed = True
|
||||
if "highlightVoice" in payload:
|
||||
new_voice = payload["highlightVoice"]
|
||||
if new_voice in self.voice_engine.voices:
|
||||
self.voice_engine.highlight_voice = new_voice
|
||||
logger.info("[rvs] Highlight-Stimme gewechselt: %s", new_voice)
|
||||
changed = True
|
||||
if "ttsEnabled" in payload:
|
||||
self.tts_enabled = bool(payload["ttsEnabled"])
|
||||
logger.info("[rvs] TTS %s", "aktiviert" if self.tts_enabled else "deaktiviert")
|
||||
changed = True
|
||||
if "ttsEngine" in payload:
|
||||
self.tts_engine_type = payload["ttsEngine"]
|
||||
logger.info("[rvs] TTS-Engine: %s", self.tts_engine_type)
|
||||
changed = True
|
||||
if "xttsVoice" in payload:
|
||||
self.xtts_voice = payload["xttsVoice"]
|
||||
logger.info("[rvs] XTTS-Stimme: %s", self.xtts_voice)
|
||||
logger.info("[rvs] XTTS-Stimme: %s", self.xtts_voice or "default")
|
||||
changed = True
|
||||
if "speedRamona" in payload:
|
||||
self.voice_engine.speech_speed["ramona"] = max(0.3, min(2.0, float(payload["speedRamona"])))
|
||||
logger.info("[rvs] Speed Ramona: %.1f", self.voice_engine.speech_speed["ramona"])
|
||||
changed = True
|
||||
if "speedThorsten" in payload:
|
||||
self.voice_engine.speech_speed["thorsten"] = max(0.3, min(2.0, float(payload["speedThorsten"])))
|
||||
logger.info("[rvs] Speed Thorsten: %.1f", self.voice_engine.speech_speed["thorsten"])
|
||||
changed = True
|
||||
whisper_reloaded = False
|
||||
if "whisperModel" in payload:
|
||||
new_model = payload["whisperModel"]
|
||||
if new_model and new_model != self.stt_engine.model_size:
|
||||
logger.info("[rvs] Whisper-Modell Wechsel: %s -> %s (laedt...)", self.stt_engine.model_size, new_model)
|
||||
loop = asyncio.get_event_loop()
|
||||
whisper_reloaded = await loop.run_in_executor(None, self.stt_engine.reload, new_model)
|
||||
if whisper_reloaded:
|
||||
if await loop.run_in_executor(None, self.stt_engine.reload, new_model):
|
||||
changed = True
|
||||
# Persistent speichern in Shared Volume
|
||||
if changed:
|
||||
try:
|
||||
os.makedirs("/shared/config", exist_ok=True)
|
||||
config_data = {
|
||||
"defaultVoice": self.voice_engine.default_voice,
|
||||
"highlightVoice": self.voice_engine.highlight_voice,
|
||||
"ttsEnabled": getattr(self, "tts_enabled", True),
|
||||
"ttsEngine": getattr(self, "tts_engine_type", "piper"),
|
||||
"xttsVoice": getattr(self, "xtts_voice", ""),
|
||||
"speedRamona": self.voice_engine.speech_speed.get("ramona", 1.0),
|
||||
"speedThorsten": self.voice_engine.speech_speed.get("thorsten", 1.0),
|
||||
"whisperModel": self.stt_engine.model_size,
|
||||
}
|
||||
with open("/shared/config/voice_config.json", "w") as f:
|
||||
@@ -1459,10 +1150,6 @@ class ARIABridge:
|
||||
except Exception as e:
|
||||
logger.warning("[rvs] Config speichern fehlgeschlagen: %s", e)
|
||||
return
|
||||
text = payload.get("text", "")
|
||||
if text:
|
||||
logger.info("[rvs] App-Chat: '%s'", text[:80])
|
||||
await self.send_to_core(text, source="app")
|
||||
|
||||
elif msg_type == "mode":
|
||||
# Moduswechsel von der App
|
||||
|
||||
@@ -5,8 +5,7 @@
|
||||
# STT — Whisper (lokal, keine API noetig)
|
||||
faster-whisper
|
||||
|
||||
# TTS — Piper (offline, deutsche Stimmen)
|
||||
piper-tts
|
||||
# TTS: laeuft remote ueber XTTS v2 auf dem Gaming-PC (keine lokalen Deps noetig)
|
||||
|
||||
# WebSocket-Verbindung zu aria-core
|
||||
websockets
|
||||
|
||||
Reference in New Issue
Block a user