819 lines
29 KiB
Python
819 lines
29 KiB
Python
"""
|
|
ARIA Voice Bridge — Hauptmodul.
|
|
|
|
Verbindet die Android App (via RVS) mit ARIA-Core und bietet
|
|
lokale Spracheingabe (Wake-Word + Whisper STT) und Sprachausgabe (Piper TTS).
|
|
|
|
Nachrichtenfluss:
|
|
App → RVS → Bridge → aria-core
|
|
aria-core → Bridge → RVS → App
|
|
→ Lautsprecher (TTS)
|
|
|
|
Stimmen:
|
|
- Ramona (de_DE-ramona-low) — Alltag, Gespraeche
|
|
- Thorsten (de_DE-thorsten-high) — epische Momente, Alarme
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import json
|
|
import logging
|
|
import os
|
|
import signal
|
|
import sys
|
|
import tempfile
|
|
import wave
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
import numpy as np
|
|
import sounddevice as sd
|
|
import websockets
|
|
from faster_whisper import WhisperModel
|
|
from openwakeword.model import Model as WakeWordModel
|
|
from piper import PiperVoice
|
|
|
|
from modes import Mode, detect_mode_switch, should_speak
|
|
|
|
# ── Logging ──────────────────────────────────────────────────
|
|
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
|
stream=sys.stdout,
|
|
)
|
|
logger = logging.getLogger("aria-bridge")
|
|
|
|
# ── Konfiguration ───────────────────────────────────────────
|
|
|
|
CONFIG_PATH = Path("/config/aria.env")
|
|
VOICES_DIR = Path("/voices")
|
|
CORE_WS_URL = os.getenv("ARIA_CORE_WS", "ws://aria:8080")
|
|
RVS_HOST = os.getenv("RVS_HOST", "") # z.B. rvs.hackersoft.de
|
|
RVS_PORT = os.getenv("RVS_PORT", "443") # Port des RVS
|
|
RVS_TLS = os.getenv("RVS_TLS", "true") # true = wss://, false = ws://
|
|
RVS_TOKEN = os.getenv("RVS_TOKEN", "") # Pairing-Token (gleich wie in der App)
|
|
WHISPER_MODEL = os.getenv("WHISPER_MODEL", "small")
|
|
WHISPER_LANGUAGE = os.getenv("WHISPER_LANGUAGE", "de")
|
|
|
|
# Audio-Parameter
|
|
SAMPLE_RATE = 16000
|
|
CHANNELS = 1
|
|
BLOCK_SIZE = 1280 # 80ms bei 16kHz — gut fuer Wake-Word-Erkennung
|
|
RECORD_SECONDS = 8 # Max. Aufnahmedauer nach Wake-Word
|
|
|
|
# Epische Trigger — bei diesen Woertern spricht Thorsten
|
|
EPIC_TRIGGERS = [
|
|
"deploy",
|
|
"erfolgreich",
|
|
"alarm",
|
|
"so soll es sein",
|
|
"kritisch",
|
|
"server down",
|
|
"sicherheitswarnung",
|
|
"ticket geloest",
|
|
"aufgabe abgeschlossen",
|
|
]
|
|
|
|
|
|
def load_config() -> dict[str, str]:
|
|
"""Laedt Konfiguration aus /config/aria.env."""
|
|
config: dict[str, str] = {}
|
|
if CONFIG_PATH.exists():
|
|
for line in CONFIG_PATH.read_text().splitlines():
|
|
line = line.strip()
|
|
if not line or line.startswith("#"):
|
|
continue
|
|
if "=" in line:
|
|
key, _, value = line.partition("=")
|
|
config[key.strip()] = value.strip()
|
|
logger.info("Konfiguration geladen aus %s", CONFIG_PATH)
|
|
else:
|
|
logger.warning("Keine Konfiguration gefunden: %s", CONFIG_PATH)
|
|
return config
|
|
|
|
|
|
# ── Voice Engine ─────────────────────────────────────────────
|
|
|
|
|
|
class VoiceEngine:
|
|
"""Verwaltet Piper TTS mit zwei Stimmen: Ramona und Thorsten."""
|
|
|
|
def __init__(self, voices_dir: Path) -> None:
|
|
self.voices_dir = voices_dir
|
|
self.voices: dict[str, PiperVoice] = {}
|
|
|
|
def initialize(self) -> None:
|
|
"""Laedt die Piper-Stimmen aus dem Voices-Verzeichnis."""
|
|
voice_configs = {
|
|
"ramona": "de_DE-ramona-low",
|
|
"thorsten": "de_DE-thorsten-high",
|
|
}
|
|
|
|
for name, model_name in voice_configs.items():
|
|
model_path = self.voices_dir / f"{model_name}.onnx"
|
|
config_path = self.voices_dir / f"{model_name}.onnx.json"
|
|
|
|
if not model_path.exists():
|
|
logger.error("Stimme nicht gefunden: %s", model_path)
|
|
continue
|
|
|
|
self.voices[name] = PiperVoice.load(
|
|
str(model_path),
|
|
config_path=str(config_path) if config_path.exists() else None,
|
|
)
|
|
logger.info("Stimme geladen: %s (%s)", name, model_name)
|
|
|
|
if not self.voices:
|
|
logger.error("Keine Stimmen geladen — TTS deaktiviert")
|
|
|
|
def select_voice(
|
|
self, text: str, requested_voice: Optional[str] = None
|
|
) -> str:
|
|
"""Waehlt die passende Stimme basierend auf Text oder Anfrage.
|
|
|
|
Thorsten wird bei epischen Triggern verwendet,
|
|
sonst Ramona als Standardstimme.
|
|
|
|
Args:
|
|
text: Der zu sprechende Text (fuer Epic-Trigger-Erkennung).
|
|
requested_voice: Explizit angeforderte Stimme ("ramona" | "thorsten").
|
|
|
|
Returns:
|
|
Name der gewaehlten Stimme.
|
|
"""
|
|
if requested_voice and requested_voice in self.voices:
|
|
return requested_voice
|
|
|
|
# Epische Trigger pruefen
|
|
text_lower = text.lower()
|
|
for trigger in EPIC_TRIGGERS:
|
|
if trigger in text_lower:
|
|
logger.info("Epischer Trigger erkannt: '%s' — Thorsten spricht", trigger)
|
|
return "thorsten"
|
|
|
|
return "ramona"
|
|
|
|
def synthesize(self, text: str, voice_name: str = "ramona") -> Optional[bytes]:
|
|
"""Erzeugt Audio-Daten aus Text mit der gewaehlten Stimme.
|
|
|
|
Args:
|
|
text: Der zu sprechende Text.
|
|
voice_name: Name der Stimme ("ramona" oder "thorsten").
|
|
|
|
Returns:
|
|
WAV-Audiodaten als bytes oder None bei Fehler.
|
|
"""
|
|
voice = self.voices.get(voice_name)
|
|
if voice is None:
|
|
logger.error("Stimme '%s' nicht verfuegbar", voice_name)
|
|
return None
|
|
|
|
try:
|
|
# Piper gibt PCM-Samples zurueck, wir schreiben sie als WAV
|
|
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
|
|
tmp_path = tmp.name
|
|
|
|
with wave.open(tmp_path, "wb") as wav_file:
|
|
voice.synthesize(text, wav_file)
|
|
|
|
audio_data = Path(tmp_path).read_bytes()
|
|
Path(tmp_path).unlink(missing_ok=True)
|
|
|
|
logger.info(
|
|
"TTS: %d bytes erzeugt mit %s — '%s'",
|
|
len(audio_data),
|
|
voice_name,
|
|
text[:60],
|
|
)
|
|
return audio_data
|
|
|
|
except Exception:
|
|
logger.exception("TTS-Fehler bei Stimme '%s'", voice_name)
|
|
return None
|
|
|
|
def speak(self, text: str, requested_voice: Optional[str] = None) -> None:
|
|
"""Spricht den Text ueber das Audio-Geraet.
|
|
|
|
Waehlt automatisch die passende Stimme und gibt das Audio aus.
|
|
|
|
Args:
|
|
text: Der zu sprechende Text.
|
|
requested_voice: Optionale explizite Stimmenwahl.
|
|
"""
|
|
voice_name = self.select_voice(text, requested_voice)
|
|
audio_data = self.synthesize(text, voice_name)
|
|
|
|
if audio_data is None:
|
|
return
|
|
|
|
try:
|
|
# WAV-Daten lesen und ueber sounddevice abspielen
|
|
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
|
|
tmp.write(audio_data)
|
|
tmp_path = tmp.name
|
|
|
|
with wave.open(tmp_path, "rb") as wf:
|
|
frames = wf.readframes(wf.getnframes())
|
|
sample_width = wf.getsampwidth()
|
|
rate = wf.getframerate()
|
|
channels = wf.getnchannels()
|
|
|
|
Path(tmp_path).unlink(missing_ok=True)
|
|
|
|
# Numpy-Array aus PCM-Daten
|
|
dtype_map = {1: np.int8, 2: np.int16, 4: np.int32}
|
|
dtype = dtype_map.get(sample_width, np.int16)
|
|
audio_array = np.frombuffer(frames, dtype=dtype)
|
|
|
|
if channels > 1:
|
|
audio_array = audio_array.reshape(-1, channels)
|
|
|
|
sd.play(audio_array, samplerate=rate)
|
|
sd.wait() # Warten bis Wiedergabe fertig
|
|
|
|
except Exception:
|
|
logger.exception("Audio-Wiedergabe fehlgeschlagen")
|
|
|
|
|
|
# ── STT Engine ───────────────────────────────────────────────
|
|
|
|
|
|
class STTEngine:
|
|
"""Whisper Speech-to-Text — laeuft komplett lokal."""
|
|
|
|
def __init__(self, model_size: str = "small", language: str = "de") -> None:
|
|
self.model_size = model_size
|
|
self.language = language
|
|
self.model: Optional[WhisperModel] = None
|
|
|
|
def initialize(self) -> None:
|
|
"""Laedt das Whisper-Modell."""
|
|
logger.info(
|
|
"Lade Whisper-Modell '%s' (Sprache: %s)...",
|
|
self.model_size,
|
|
self.language,
|
|
)
|
|
self.model = WhisperModel(self.model_size, device="cpu", compute_type="int8")
|
|
logger.info("Whisper-Modell geladen")
|
|
|
|
def transcribe(self, audio_data: np.ndarray) -> str:
|
|
"""Transkribiert Audio-Daten zu Text.
|
|
|
|
Args:
|
|
audio_data: NumPy-Array mit Audio (float32, 16kHz, mono).
|
|
|
|
Returns:
|
|
Erkannter Text oder leerer String.
|
|
"""
|
|
if self.model is None:
|
|
logger.error("Whisper-Modell nicht initialisiert")
|
|
return ""
|
|
|
|
try:
|
|
# Audio als float32 normalisieren
|
|
if audio_data.dtype != np.float32:
|
|
audio_data = audio_data.astype(np.float32) / 32768.0
|
|
|
|
segments, info = self.model.transcribe(
|
|
audio_data,
|
|
language=self.language,
|
|
beam_size=5,
|
|
vad_filter=True,
|
|
)
|
|
|
|
text = " ".join(segment.text.strip() for segment in segments)
|
|
logger.info("STT: '%s' (Sprache: %s, Dauer: %.1fs)", text, info.language, info.duration)
|
|
return text
|
|
|
|
except Exception:
|
|
logger.exception("STT-Fehler")
|
|
return ""
|
|
|
|
|
|
# ── Wake-Word Erkennung ──────────────────────────────────────
|
|
|
|
|
|
class WakeWordDetector:
|
|
"""Erkennt das Wake-Word im Audio-Stream.
|
|
|
|
Nutzt ein Custom-Modell aus /voices/wake_aria.onnx falls vorhanden,
|
|
sonst das eingebaute 'hey_jarvis' als Fallback.
|
|
"""
|
|
|
|
CUSTOM_MODEL_PATH = "/voices/wake_aria.onnx"
|
|
FALLBACK_MODEL = "hey_jarvis"
|
|
THRESHOLD = 0.5
|
|
|
|
def __init__(self) -> None:
|
|
self.model: Optional[WakeWordModel] = None
|
|
self.wake_word_key: str = ""
|
|
|
|
def initialize(self) -> None:
|
|
"""Laedt das Wake-Word-Modell."""
|
|
logger.info("Lade Wake-Word-Modell...")
|
|
|
|
custom_path = Path(self.CUSTOM_MODEL_PATH)
|
|
if custom_path.exists():
|
|
# Custom "aria" Modell vorhanden
|
|
self.model = WakeWordModel(
|
|
wakeword_models=[str(custom_path)],
|
|
)
|
|
self.wake_word_key = custom_path.stem
|
|
logger.info("Custom Wake-Word-Modell geladen: %s", custom_path)
|
|
else:
|
|
# Fallback auf eingebautes Modell
|
|
self.model = WakeWordModel()
|
|
self.wake_word_key = self.FALLBACK_MODEL
|
|
logger.warning(
|
|
"Kein Custom-Modell (%s) — nutze Fallback '%s'",
|
|
self.CUSTOM_MODEL_PATH,
|
|
self.FALLBACK_MODEL,
|
|
)
|
|
logger.info(
|
|
"Tipp: Custom Wake-Word trainieren → "
|
|
"https://github.com/dscripka/openWakeWord#training-new-models"
|
|
)
|
|
|
|
def detect(self, audio_chunk: np.ndarray) -> bool:
|
|
"""Prueft ob das Wake-Word im Audio-Chunk enthalten ist.
|
|
|
|
Args:
|
|
audio_chunk: Audio-Daten (int16, 16kHz).
|
|
|
|
Returns:
|
|
True wenn Wake-Word erkannt wurde.
|
|
"""
|
|
if self.model is None:
|
|
return False
|
|
|
|
prediction = self.model.predict(audio_chunk)
|
|
|
|
# openwakeword gibt Scores pro Modell zurueck
|
|
score = prediction.get(self.wake_word_key, 0)
|
|
if score > self.THRESHOLD:
|
|
logger.info("Wake-Word erkannt! (Score: %.2f)", score)
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
# ── Audio-Aufnahme ───────────────────────────────────────────
|
|
|
|
|
|
def record_audio(duration: float = RECORD_SECONDS) -> np.ndarray:
|
|
"""Nimmt Audio vom Mikrofon auf.
|
|
|
|
Args:
|
|
duration: Aufnahmedauer in Sekunden.
|
|
|
|
Returns:
|
|
NumPy-Array mit Audio-Daten (int16, 16kHz, mono).
|
|
"""
|
|
logger.info("Aufnahme laeuft... (%d Sekunden)", duration)
|
|
audio = sd.rec(
|
|
int(duration * SAMPLE_RATE),
|
|
samplerate=SAMPLE_RATE,
|
|
channels=CHANNELS,
|
|
dtype="int16",
|
|
)
|
|
sd.wait()
|
|
logger.info("Aufnahme beendet")
|
|
return audio.flatten()
|
|
|
|
|
|
# ── Bridge Hauptklasse ───────────────────────────────────────
|
|
|
|
|
|
class ARIABridge:
|
|
"""ARIA Voice Bridge — verbindet App (via RVS) und Sprache mit ARIA-Core.
|
|
|
|
Drei parallele Aufgaben:
|
|
1. connect_to_core() — WebSocket zu aria-core (lokal)
|
|
2. connect_to_rvs() — WebSocket zum RVS (oeffentlich, fuer die App)
|
|
3. audio_loop() — Wake-Word + STT (lokales Mikrofon)
|
|
"""
|
|
|
|
def __init__(self) -> None:
|
|
self.config = load_config()
|
|
self.ws_url = self.config.get("ARIA_CORE_WS", CORE_WS_URL)
|
|
# RVS-Verbindungsinfo aus Config oder Env
|
|
rvs_host = self.config.get("RVS_HOST", RVS_HOST)
|
|
rvs_port = self.config.get("RVS_PORT", RVS_PORT)
|
|
rvs_tls = self.config.get("RVS_TLS", RVS_TLS).lower() == "true"
|
|
self.rvs_token = self.config.get("RVS_TOKEN", RVS_TOKEN)
|
|
# URL zusammenbauen
|
|
if rvs_host:
|
|
proto = "wss" if rvs_tls else "ws"
|
|
self.rvs_url = f"{proto}://{rvs_host}:{rvs_port}"
|
|
else:
|
|
self.rvs_url = ""
|
|
self.current_mode = Mode.NORMAL
|
|
self.running = False
|
|
|
|
# Komponenten
|
|
self.voice_engine = VoiceEngine(VOICES_DIR)
|
|
self.stt_engine = STTEngine(
|
|
model_size=self.config.get("WHISPER_MODEL", WHISPER_MODEL),
|
|
language=self.config.get("WHISPER_LANGUAGE", WHISPER_LANGUAGE),
|
|
)
|
|
self.wake_word = WakeWordDetector()
|
|
|
|
# WebSocket-Verbindungen
|
|
self.ws_core: Optional[websockets.WebSocketClientProtocol] = None
|
|
self.ws_rvs: Optional[websockets.WebSocketClientProtocol] = None
|
|
|
|
def initialize(self) -> None:
|
|
"""Initialisiert alle Komponenten."""
|
|
logger.info("=" * 50)
|
|
logger.info("ARIA Voice Bridge startet...")
|
|
logger.info("=" * 50)
|
|
|
|
# PulseAudio-Server pruefen
|
|
pulse_server = os.getenv("PULSE_SERVER")
|
|
if pulse_server:
|
|
logger.info("PulseAudio Server: %s", pulse_server)
|
|
else:
|
|
logger.warning("Kein PULSE_SERVER gesetzt — verwende Standard-Audio")
|
|
|
|
self.voice_engine.initialize()
|
|
self.stt_engine.initialize()
|
|
self.wake_word.initialize()
|
|
|
|
logger.info("Alle Komponenten initialisiert")
|
|
logger.info("aria-core: %s", self.ws_url)
|
|
if self.rvs_url and self.rvs_token:
|
|
logger.info("RVS: %s (Token: %s...)", self.rvs_url, self.rvs_token[:8])
|
|
else:
|
|
logger.warning("RVS nicht konfiguriert — App-Verbindung deaktiviert")
|
|
logger.warning(" Setze RVS_HOST, RVS_PORT, RVS_TOKEN in /config/aria.env")
|
|
logger.info("Modus: %s %s", self.current_mode.config.emoji, self.current_mode.config.name)
|
|
|
|
# ── aria-core Verbindung ─────────────────────────────────
|
|
|
|
async def connect_to_core(self) -> None:
|
|
"""Persistente WebSocket-Verbindung zu aria-core mit Auto-Reconnect."""
|
|
retry_delay = 2
|
|
|
|
while self.running:
|
|
try:
|
|
logger.info("[core] Verbinde: %s", self.ws_url)
|
|
async with websockets.connect(self.ws_url) as ws:
|
|
self.ws_core = ws
|
|
retry_delay = 2
|
|
logger.info("[core] Verbunden")
|
|
|
|
async for message in ws:
|
|
await self._handle_core_message(message)
|
|
|
|
except websockets.ConnectionClosed:
|
|
logger.warning("[core] Verbindung verloren")
|
|
except ConnectionRefusedError:
|
|
logger.warning("[core] Nicht erreichbar")
|
|
except Exception:
|
|
logger.exception("[core] WebSocket-Fehler")
|
|
finally:
|
|
self.ws_core = None
|
|
|
|
if self.running:
|
|
logger.info("[core] Reconnect in %ds...", retry_delay)
|
|
await asyncio.sleep(retry_delay)
|
|
retry_delay = min(retry_delay * 2, 30)
|
|
|
|
async def _handle_core_message(self, raw_message: str) -> None:
|
|
"""Verarbeitet Nachrichten von aria-core.
|
|
|
|
- Leitet Antworten an die App weiter (via RVS)
|
|
- Sprachausgabe ueber TTS (wenn Modus erlaubt)
|
|
"""
|
|
try:
|
|
message = json.loads(raw_message)
|
|
except json.JSONDecodeError:
|
|
logger.error("[core] Ungueltige JSON: %s", raw_message[:100])
|
|
return
|
|
|
|
text = message.get("text", "")
|
|
metadata = message.get("metadata", {})
|
|
is_critical = metadata.get("critical", False)
|
|
requested_voice = metadata.get("voice")
|
|
|
|
logger.info("[core] Nachricht: '%s'", text[:80])
|
|
|
|
# Modus-Wechsel pruefen
|
|
new_mode = detect_mode_switch(text)
|
|
if new_mode is not None:
|
|
self.current_mode = new_mode
|
|
logger.info(
|
|
"[core] Modus → %s %s",
|
|
self.current_mode.config.emoji,
|
|
self.current_mode.config.name,
|
|
)
|
|
# Modus-Aenderung auch an die App senden
|
|
await self._send_to_rvs({
|
|
"type": "mode",
|
|
"payload": {"mode": self.current_mode.name},
|
|
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
|
})
|
|
|
|
# Antwort an die App weiterleiten (als Chat-Nachricht)
|
|
await self._send_to_rvs({
|
|
"type": "chat",
|
|
"payload": {
|
|
"text": text,
|
|
"sender": "aria",
|
|
"voice": requested_voice or self.voice_engine.select_voice(text),
|
|
},
|
|
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
|
})
|
|
|
|
# Sprachausgabe lokal (wenn Modus es erlaubt)
|
|
if should_speak(self.current_mode, is_critical):
|
|
self.voice_engine.speak(text, requested_voice)
|
|
else:
|
|
logger.info("[core] TTS unterdrueckt (Modus: %s)", self.current_mode.config.name)
|
|
|
|
async def send_to_core(self, text: str, source: str = "bridge") -> None:
|
|
"""Sendet Text an aria-core."""
|
|
if self.ws_core is None:
|
|
logger.error("[core] Nicht verbunden — Nachricht verworfen: '%s'", text[:60])
|
|
return
|
|
|
|
message = json.dumps({
|
|
"type": "voice_input" if source == "bridge" else "chat_input",
|
|
"text": text,
|
|
"mode": self.current_mode.name,
|
|
"source": source,
|
|
})
|
|
|
|
try:
|
|
await self.ws_core.send(message)
|
|
logger.info("[core] Gesendet (%s): '%s'", source, text[:80])
|
|
except Exception:
|
|
logger.exception("[core] Sendefehler")
|
|
|
|
# ── RVS Verbindung (App-Relay) ──────────────────────────
|
|
|
|
async def connect_to_rvs(self) -> None:
|
|
"""Persistente WebSocket-Verbindung zum RVS mit Auto-Reconnect.
|
|
|
|
Authentifiziert sich mit dem gleichen Token wie die App.
|
|
Nachrichten von der App werden an aria-core weitergeleitet.
|
|
"""
|
|
if not self.rvs_url or not self.rvs_token:
|
|
logger.info("[rvs] Nicht konfiguriert — ueberspringe")
|
|
return
|
|
|
|
retry_delay = 2
|
|
url = f"{self.rvs_url}?token={self.rvs_token}"
|
|
|
|
while self.running:
|
|
try:
|
|
logger.info("[rvs] Verbinde: %s", self.rvs_url)
|
|
async with websockets.connect(url) as ws:
|
|
self.ws_rvs = ws
|
|
retry_delay = 2
|
|
logger.info("[rvs] Verbunden — warte auf App-Nachrichten")
|
|
|
|
# Heartbeat senden (RVS erwartet Ping alle 30s)
|
|
heartbeat_task = asyncio.create_task(self._rvs_heartbeat())
|
|
|
|
try:
|
|
async for raw_message in ws:
|
|
await self._handle_rvs_message(raw_message)
|
|
finally:
|
|
heartbeat_task.cancel()
|
|
|
|
except websockets.ConnectionClosed:
|
|
logger.warning("[rvs] Verbindung verloren")
|
|
except ConnectionRefusedError:
|
|
logger.warning("[rvs] Nicht erreichbar")
|
|
except Exception:
|
|
logger.exception("[rvs] WebSocket-Fehler")
|
|
finally:
|
|
self.ws_rvs = None
|
|
|
|
if self.running:
|
|
logger.info("[rvs] Reconnect in %ds...", retry_delay)
|
|
await asyncio.sleep(retry_delay)
|
|
retry_delay = min(retry_delay * 2, 30)
|
|
|
|
async def _rvs_heartbeat(self) -> None:
|
|
"""Sendet Heartbeats an den RVS damit die Verbindung offen bleibt."""
|
|
while True:
|
|
await asyncio.sleep(25)
|
|
if self.ws_rvs and self.ws_rvs.open:
|
|
try:
|
|
await self.ws_rvs.send(json.dumps({
|
|
"type": "heartbeat",
|
|
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
|
}))
|
|
except Exception:
|
|
break
|
|
|
|
async def _handle_rvs_message(self, raw_message: str) -> None:
|
|
"""Verarbeitet Nachrichten von der App (via RVS).
|
|
|
|
Unterstuetzte Typen:
|
|
- chat: Text-Nachricht → an aria-core weiterleiten
|
|
- audio: Audio-Daten → STT → an aria-core
|
|
- mode: Moduswechsel
|
|
- location: GPS-Daten (loggen, spaeter fuer Skills)
|
|
- file: Datei-Upload (an aria-core weiterleiten)
|
|
"""
|
|
try:
|
|
message = json.loads(raw_message)
|
|
except json.JSONDecodeError:
|
|
logger.error("[rvs] Ungueltige JSON: %s", raw_message[:100])
|
|
return
|
|
|
|
msg_type = message.get("type", "")
|
|
payload = message.get("payload", {})
|
|
|
|
if msg_type == "chat":
|
|
# Text von der App → an aria-core
|
|
text = payload.get("text", "")
|
|
if text:
|
|
logger.info("[rvs] App-Chat: '%s'", text[:80])
|
|
await self.send_to_core(text, source="app")
|
|
|
|
elif msg_type == "mode":
|
|
# Moduswechsel von der App
|
|
mode_name = payload.get("mode", "")
|
|
new_mode = detect_mode_switch(mode_name)
|
|
if new_mode is not None:
|
|
self.current_mode = new_mode
|
|
logger.info(
|
|
"[rvs] Modus → %s %s (von App)",
|
|
self.current_mode.config.emoji,
|
|
self.current_mode.config.name,
|
|
)
|
|
|
|
elif msg_type == "location":
|
|
# GPS-Daten von der App
|
|
lat = payload.get("lat")
|
|
lng = payload.get("lng")
|
|
speed = payload.get("speed")
|
|
logger.info("[rvs] GPS: lat=%.4f lng=%.4f speed=%s", lat or 0, lng or 0, speed)
|
|
# An aria-core weiterleiten (fuer kontextbasierte Skills)
|
|
if self.ws_core:
|
|
await self.ws_core.send(raw_message)
|
|
|
|
elif msg_type == "file":
|
|
# Datei von der App → an aria-core
|
|
logger.info("[rvs] Datei empfangen: %s", payload.get("name", "?"))
|
|
if self.ws_core:
|
|
await self.ws_core.send(raw_message)
|
|
|
|
elif msg_type == "audio":
|
|
# Audio von der App → STT → an aria-core
|
|
logger.info("[rvs] Audio empfangen — TODO: STT")
|
|
# Spaeter: Audio decodieren, durch Whisper jagen, Ergebnis an core
|
|
|
|
else:
|
|
logger.debug("[rvs] Unbekannter Typ: %s", msg_type)
|
|
|
|
async def _send_to_rvs(self, message: dict) -> None:
|
|
"""Sendet eine Nachricht an die App (via RVS)."""
|
|
if self.ws_rvs is None or not self.ws_rvs.open:
|
|
return
|
|
|
|
try:
|
|
await self.ws_rvs.send(json.dumps(message))
|
|
except Exception:
|
|
logger.exception("[rvs] Sendefehler")
|
|
|
|
# ── Log-Streaming an die App ─────────────────────────────
|
|
|
|
async def send_log_to_app(self, source: str, message: str, level: str = "info") -> None:
|
|
"""Sendet einen Log-Eintrag an die App (erscheint im Log-Viewer)."""
|
|
await self._send_to_rvs({
|
|
"type": "log",
|
|
"payload": {
|
|
"source": source,
|
|
"message": message,
|
|
"level": level,
|
|
},
|
|
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
|
})
|
|
|
|
async def send_event_to_app(self, title: str, description: str) -> None:
|
|
"""Sendet ein Event an die App (erscheint im Event-Feed)."""
|
|
await self._send_to_rvs({
|
|
"type": "event",
|
|
"payload": {
|
|
"title": title,
|
|
"description": description,
|
|
},
|
|
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
|
})
|
|
|
|
# ── Audio-Schleife (lokales Mikrofon) ────────────────────
|
|
|
|
async def audio_loop(self) -> None:
|
|
"""Wake-Word erkennen, aufnehmen, transkribieren, an aria-core senden."""
|
|
logger.info("Audio-Schleife gestartet — warte auf Wake-Word '%s'...", self.wake_word.wake_word_key)
|
|
|
|
loop = asyncio.get_event_loop()
|
|
|
|
while self.running:
|
|
try:
|
|
audio_chunk = sd.rec(
|
|
BLOCK_SIZE,
|
|
samplerate=SAMPLE_RATE,
|
|
channels=CHANNELS,
|
|
dtype="int16",
|
|
)
|
|
sd.wait()
|
|
|
|
detected = await loop.run_in_executor(
|
|
None, self.wake_word.detect, audio_chunk.flatten()
|
|
)
|
|
|
|
if detected:
|
|
logger.info("Wake-Word erkannt — starte Aufnahme")
|
|
await self.send_event_to_app(
|
|
"Wake-Word erkannt",
|
|
"ARIA hoert zu...",
|
|
)
|
|
|
|
audio_data = await loop.run_in_executor(None, record_audio)
|
|
|
|
text = await loop.run_in_executor(
|
|
None, self.stt_engine.transcribe, audio_data
|
|
)
|
|
|
|
if text.strip():
|
|
new_mode = detect_mode_switch(text)
|
|
if new_mode is not None:
|
|
self.current_mode = new_mode
|
|
|
|
await self.send_to_core(text, source="bridge")
|
|
else:
|
|
logger.info("Keine Sprache erkannt — ignoriert")
|
|
|
|
except sd.PortAudioError:
|
|
logger.error("Audio-Geraet nicht verfuegbar — warte 5 Sekunden")
|
|
await asyncio.sleep(5)
|
|
except Exception:
|
|
logger.exception("Fehler in der Audio-Schleife")
|
|
await asyncio.sleep(1)
|
|
|
|
# ── Run & Shutdown ───────────────────────────────────────
|
|
|
|
async def run(self) -> None:
|
|
"""Startet die Bridge mit allen drei Verbindungen parallel."""
|
|
self.running = True
|
|
|
|
tasks = [
|
|
asyncio.create_task(self.connect_to_core()),
|
|
asyncio.create_task(self.connect_to_rvs()),
|
|
asyncio.create_task(self.audio_loop()),
|
|
]
|
|
|
|
try:
|
|
await asyncio.gather(*tasks)
|
|
except asyncio.CancelledError:
|
|
logger.info("Bridge-Tasks abgebrochen")
|
|
|
|
def shutdown(self) -> None:
|
|
"""Faehrt die Bridge sauber herunter."""
|
|
logger.info("Bridge wird heruntergefahren...")
|
|
self.running = False
|
|
|
|
|
|
# ── Hauptprogramm ────────────────────────────────────────────
|
|
|
|
|
|
def main() -> None:
|
|
"""Startet die ARIA Voice Bridge."""
|
|
bridge = ARIABridge()
|
|
|
|
# Signal-Handler fuer sauberes Herunterfahren
|
|
def handle_signal(signum: int, _frame: object) -> None:
|
|
sig_name = signal.Signals(signum).name
|
|
logger.info("Signal %s empfangen — fahre herunter", sig_name)
|
|
bridge.shutdown()
|
|
|
|
signal.signal(signal.SIGTERM, handle_signal)
|
|
signal.signal(signal.SIGINT, handle_signal)
|
|
|
|
# Initialisierung (synchron — bevor die Event-Loop startet)
|
|
try:
|
|
bridge.initialize()
|
|
except Exception:
|
|
logger.exception("Initialisierung fehlgeschlagen")
|
|
sys.exit(1)
|
|
|
|
# Event-Loop starten
|
|
try:
|
|
asyncio.run(bridge.run())
|
|
except KeyboardInterrupt:
|
|
logger.info("Keyboard Interrupt — Bridge beendet")
|
|
finally:
|
|
logger.info("ARIA Voice Bridge beendet")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|