diff --git a/python_bridge/chat_audio_bridge.py b/python_bridge/chat_audio_bridge.py index 8b8bf97..7dfb46f 100755 --- a/python_bridge/chat_audio_bridge.py +++ b/python_bridge/chat_audio_bridge.py @@ -614,12 +614,18 @@ Erst dann starten die automatischen TICKs mit Bildern!""" # Hole neue Nachrichten messages = self.chat.get_new_messages(since_id=self.last_assistant_message_id) + if messages: + logger.debug(f"TTS: {len(messages)} neue Nachrichten gefunden") + for msg in messages: + logger.debug(f"TTS: Nachricht - assistant={msg.is_from_assistant}, id={msg.id[:20]}..., text={msg.text[:50]}...") + if msg.is_from_assistant: self.last_assistant_message_id = msg.id # Text für Sprache aufbereiten speech_text = self._clean_for_speech(msg.text) + logger.debug(f"TTS: Nach Bereinigung: {len(speech_text) if speech_text else 0} Zeichen") if speech_text and len(speech_text) > 5: # In Konsole anzeigen @@ -628,8 +634,14 @@ Erst dann starten die automatischen TICKs mit Bildern!""" console.print(f"[dim]...({len(speech_text)} Zeichen)[/dim]") # Vorlesen + logger.info(f"TTS: Spreche {len(speech_text)} Zeichen...") self.tts.speak(speech_text) self.stats.messages_spoken += 1 + logger.debug("TTS: Sprechen beendet") + else: + logger.debug(f"TTS: Text zu kurz oder leer, übersprungen") + else: + logger.debug(f"TTS: Nachricht ist nicht von Claude, übersprungen") except Exception as e: logger.error(f"TTS-Loop-Fehler: {e}") diff --git a/python_bridge/chat_web_interface.py b/python_bridge/chat_web_interface.py index 635ddf3..7b2f2f3 100644 --- a/python_bridge/chat_web_interface.py +++ b/python_bridge/chat_web_interface.py @@ -559,11 +559,28 @@ class ClaudeChatInterface: continue # Bestimme ob Human oder Assistant + # Mehrere Methoden probieren: class_name = elem.get_attribute("class") or "" + data_role = elem.get_attribute("data-role") or "" + outer_html = "" + try: + # Prüfe Parent-Element auf Hinweise + parent = elem.find_element(By.XPATH, "..") + parent_class = parent.get_attribute("class") or "" + outer_html = parent_class + except: + pass + + # Kombiniere alle Hinweise + all_hints = (class_name + " " + data_role + " " + outer_html).lower() + is_assistant = ( - "assistant" in class_name.lower() or - "claude" in class_name.lower() or - "ai" in class_name.lower() + "assistant" in all_hints or + "claude" in all_hints or + "ai-message" in all_hints or + "response" in all_hints or + # Claude.ai spezifisch: Nachrichten ohne "human" sind von Claude + ("message" in all_hints and "human" not in all_hints and "user" not in all_hints) ) # Generiere ID diff --git a/python_bridge/config.yaml b/python_bridge/config.yaml index 0e05e20..3dad378 100644 --- a/python_bridge/config.yaml +++ b/python_bridge/config.yaml @@ -98,8 +98,8 @@ stt: pause_threshold: 0.8 # Maximale Aufnahmelänge pro Phrase in Sekunden - # Bei langen Sätzen höher setzen (Google STT unterstützt bis ~60s) - phrase_time_limit: 60 + # Bei langen Sätzen höher setzen (max 2 Minuten = 120s) + phrase_time_limit: 120 # ============================================================================ # Termux (Android) Einstellungen diff --git a/python_bridge/stt_engine.py b/python_bridge/stt_engine.py index 09d3345..a789039 100644 --- a/python_bridge/stt_engine.py +++ b/python_bridge/stt_engine.py @@ -28,7 +28,7 @@ class STTEngine: self, energy_threshold: int = 300, pause_threshold: float = 0.8, - phrase_time_limit: int = 60, + phrase_time_limit: int = 120, service: str = "google", language: str = "de-DE" ):