""" Claude's Eyes - Speech-to-Text Engine Converts Stefan's speech to text for Claude """ import logging import threading import queue from typing import Optional, Callable from dataclasses import dataclass logger = logging.getLogger(__name__) @dataclass class SpeechResult: """Result from speech recognition""" text: str confidence: float is_final: bool class STTEngine: """Speech-to-Text engine using SpeechRecognition library""" def __init__( self, energy_threshold: int = 300, pause_threshold: float = 0.8, phrase_time_limit: int = 120, service: str = "google", language: str = "de-DE" ): import speech_recognition as sr self.recognizer = sr.Recognizer() self.microphone = sr.Microphone() # Configure recognizer self.recognizer.energy_threshold = energy_threshold self.recognizer.pause_threshold = pause_threshold self.recognizer.phrase_time_limit = phrase_time_limit self.service = service self.language = language self._listening = False self._callback: Optional[Callable[[SpeechResult], None]] = None self._stop_flag = False self._thread: Optional[threading.Thread] = None self._results_queue = queue.Queue() # Calibrate microphone logger.info("Calibrating microphone...") with self.microphone as source: self.recognizer.adjust_for_ambient_noise(source, duration=1) logger.info(f"Energy threshold set to {self.recognizer.energy_threshold}") logger.info(f"STT engine initialized (service: {service}, language: {language})") def listen_once(self, timeout: Optional[float] = None) -> Optional[SpeechResult]: """ Listen for a single phrase (blocking) Args: timeout: Maximum time to wait for speech start Returns: SpeechResult or None if nothing recognized """ import speech_recognition as sr try: with self.microphone as source: logger.debug("Listening...") audio = self.recognizer.listen(source, timeout=timeout) return self._recognize(audio) except sr.WaitTimeoutError: logger.debug("Listen timeout") return None except Exception as e: logger.error(f"Listen error: {e}") return None def _recognize(self, audio) -> Optional[SpeechResult]: """Recognize speech from audio data""" import speech_recognition as sr try: if self.service == "google": text = self.recognizer.recognize_google(audio, language=self.language) return SpeechResult(text=text, confidence=0.9, is_final=True) elif self.service == "sphinx": # Offline recognition (needs pocketsphinx) text = self.recognizer.recognize_sphinx(audio) return SpeechResult(text=text, confidence=0.7, is_final=True) else: logger.error(f"Unknown service: {self.service}") return None except sr.UnknownValueError: logger.debug("Could not understand audio") return None except sr.RequestError as e: logger.error(f"Recognition service error: {e}") return None def start_continuous(self, callback: Callable[[SpeechResult], None]) -> None: """ Start continuous listening in background Args: callback: Function called with each recognized phrase """ if self._listening: logger.warning("Already listening") return self._callback = callback self._stop_flag = False self._listening = True self._thread = threading.Thread(target=self._listen_loop, daemon=True) self._thread.start() logger.info("Continuous listening started") def stop_continuous(self) -> None: """Stop continuous listening""" self._stop_flag = True self._listening = False if self._thread: self._thread.join(timeout=2) self._thread = None logger.info("Continuous listening stopped") def _listen_loop(self): """Background thread for continuous listening""" import speech_recognition as sr while not self._stop_flag: try: with self.microphone as source: # Short timeout to allow stop checks try: audio = self.recognizer.listen(source, timeout=1, phrase_time_limit=self.recognizer.phrase_time_limit) except sr.WaitTimeoutError: continue result = self._recognize(audio) if result and self._callback: self._callback(result) except Exception as e: if not self._stop_flag: logger.error(f"Listen loop error: {e}") def is_listening(self) -> bool: return self._listening def get_result_nonblocking(self) -> Optional[SpeechResult]: """Get result without blocking (for use with async callback)""" try: return self._results_queue.get_nowait() except queue.Empty: return None class TermuxSTTEngine: """ STT via Termux:API für Android Benötigt: - Termux App - Termux:API App - pkg install termux-api """ def __init__(self, language: str = "de-DE", timeout: int = 10): self.language = language self.timeout = timeout self._listening = False self._stop_flag = False self._thread: Optional[threading.Thread] = None self._callback: Optional[Callable[[SpeechResult], None]] = None # Teste ob termux-speech-to-text verfügbar ist import shutil if not shutil.which("termux-speech-to-text"): raise RuntimeError( "termux-speech-to-text nicht gefunden! " "Installiere mit: pkg install termux-api" ) logger.info(f"Termux STT engine initialized (language: {language})") def listen_once(self, timeout: Optional[float] = None) -> Optional[SpeechResult]: """ Listen for a single phrase via Termux API Args: timeout: Maximum time to wait (uses class timeout if None) Returns: SpeechResult or None if nothing recognized """ import subprocess import json actual_timeout = timeout if timeout else self.timeout try: # termux-speech-to-text gibt JSON zurück result = subprocess.run( ["termux-speech-to-text"], capture_output=True, text=True, timeout=actual_timeout + 5 # Extra Zeit für API ) if result.returncode != 0: logger.error(f"Termux STT error: {result.stderr}") return None # Output ist ein String (kein JSON bei Termux) text = result.stdout.strip() if text: return SpeechResult( text=text, confidence=0.8, # Termux gibt keine Konfidenz is_final=True ) return None except subprocess.TimeoutExpired: logger.debug("Termux STT timeout") return None except Exception as e: logger.error(f"Termux STT error: {e}") return None def start_continuous(self, callback: Callable[[SpeechResult], None]) -> None: """Start continuous listening in background""" if self._listening: logger.warning("Already listening") return self._callback = callback self._stop_flag = False self._listening = True self._thread = threading.Thread(target=self._listen_loop, daemon=True) self._thread.start() logger.info("Termux continuous listening started") def stop_continuous(self) -> None: """Stop continuous listening""" self._stop_flag = True self._listening = False if self._thread: self._thread.join(timeout=2) self._thread = None logger.info("Termux continuous listening stopped") def _listen_loop(self): """Background thread for continuous listening""" while not self._stop_flag: try: result = self.listen_once(timeout=5) if result and self._callback: self._callback(result) except Exception as e: if not self._stop_flag: logger.error(f"Termux listen loop error: {e}") # Kleine Pause zwischen Aufnahmen import time time.sleep(0.5) def is_listening(self) -> bool: return self._listening def create_stt_engine(engine_type: str = "standard", **kwargs): """ Factory function to create STT engine Args: engine_type: "standard" or "termux" **kwargs: Engine-specific options """ if engine_type == "termux": return TermuxSTTEngine( language=kwargs.get("language", "de-DE"), timeout=kwargs.get("phrase_time_limit", 15) ) else: # Standard SpeechRecognition engine return STTEngine( energy_threshold=kwargs.get("energy_threshold", 300), pause_threshold=kwargs.get("pause_threshold", 0.8), phrase_time_limit=kwargs.get("phrase_time_limit", 15), service=kwargs.get("service", "google"), language=kwargs.get("language", "de-DE") ) # Test when run directly if __name__ == "__main__": logging.basicConfig(level=logging.DEBUG) print("Speech-to-Text Test") print("=" * 40) engine = create_stt_engine(language="de-DE") print("\nSag etwas (du hast 10 Sekunden)...") result = engine.listen_once(timeout=10) if result: print(f"\nErkannt: '{result.text}'") print(f"Konfidenz: {result.confidence:.0%}") else: print("\nNichts erkannt.") print("\n\nKontinuierlicher Modus (5 Sekunden)...") def on_speech(result: SpeechResult): print(f" -> {result.text}") engine.start_continuous(on_speech) import time time.sleep(5) engine.stop_continuous() print("\nDone!")