bridge mit ready und statemachine system

This commit is contained in:
2025-12-27 01:31:45 +01:00
parent ed2964bbbf
commit 90707055ce
36 changed files with 2791 additions and 355 deletions
+424
View File
@@ -0,0 +1,424 @@
#!/usr/bin/env python3
"""
Claude's Eyes - Main Bridge Script
Connects the ESP32 robot with Claude AI for autonomous exploration.
Usage:
python bridge.py # Use config.yaml
python bridge.py --config my.yaml # Use custom config
python bridge.py --simulate # Simulate without hardware
"""
import os
import sys
import time
import logging
import threading
import signal
from pathlib import Path
from typing import Optional
from dataclasses import dataclass
import yaml
import click
from rich.console import Console
from rich.panel import Panel
from rich.live import Live
from rich.table import Table
from rich.text import Text
from esp32_client import ESP32Client, RobotStatus
from tts_engine import create_tts_engine, TTSEngine
from stt_engine import create_stt_engine, STTEngine, SpeechResult
from chat_interface import create_chat_interface, ChatInterface, ChatResponse
# Setup logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# Rich console for pretty output
console = Console()
@dataclass
class BridgeState:
"""Current state of the bridge"""
connected: bool = False
exploring: bool = False
last_image_time: float = 0
last_status: Optional[RobotStatus] = None
last_claude_response: str = ""
stefan_input: str = ""
error_message: str = ""
class ClaudesEyesBridge:
"""Main bridge class connecting robot and Claude"""
def __init__(self, config_path: str, simulate: bool = False):
self.config = self._load_config(config_path)
self.simulate = simulate
self.state = BridgeState()
self.running = False
# Components
self.robot: Optional[ESP32Client] = None
self.chat: Optional[ChatInterface] = None
self.tts: Optional[TTSEngine] = None
self.stt: Optional[STTEngine] = None
# Threading
self.speech_thread: Optional[threading.Thread] = None
self._stop_event = threading.Event()
def _load_config(self, config_path: str) -> dict:
"""Load configuration from YAML file"""
path = Path(config_path)
# Try local config first
local_path = path.parent / f"{path.stem}.local{path.suffix}"
if local_path.exists():
path = local_path
logger.info(f"Using local config: {path}")
if not path.exists():
logger.error(f"Config file not found: {path}")
sys.exit(1)
with open(path) as f:
config = yaml.safe_load(f)
return config
def initialize(self) -> bool:
"""Initialize all components"""
console.print(Panel.fit(
"[bold cyan]Claude's Eyes[/bold cyan]\n"
"[dim]Autonomous Exploration Robot[/dim]",
border_style="cyan"
))
# Initialize robot client
if not self.simulate:
console.print("\n[yellow]Connecting to robot...[/yellow]")
esp_config = self.config.get("esp32", {})
self.robot = ESP32Client(
host=esp_config.get("host", "192.168.178.100"),
port=esp_config.get("port", 80),
api_key=esp_config.get("api_key", ""),
timeout=esp_config.get("timeout", 10)
)
if not self.robot.is_connected():
console.print("[red]Could not connect to robot![/red]")
self.state.error_message = "Robot connection failed"
return False
self.state.connected = True
console.print("[green]Robot connected![/green]")
else:
console.print("[yellow]Simulation mode - no robot connection[/yellow]")
self.state.connected = True
# Initialize Claude interface
console.print("\n[yellow]Initializing Claude interface...[/yellow]")
claude_config = self.config.get("claude", {})
api_key = claude_config.get("api_key") or os.environ.get("ANTHROPIC_API_KEY", "")
self.chat = create_chat_interface(
use_api=claude_config.get("use_api", True) and bool(api_key),
api_key=api_key,
model=claude_config.get("model", "claude-sonnet-4-20250514"),
system_prompt=claude_config.get("system_prompt", ""),
max_tokens=claude_config.get("max_tokens", 1024)
)
console.print(f"[green]Chat interface ready ({type(self.chat).__name__})[/green]")
# Initialize TTS
console.print("\n[yellow]Initializing Text-to-Speech...[/yellow]")
tts_config = self.config.get("tts", {})
try:
self.tts = create_tts_engine(
engine_type=tts_config.get("engine", "pyttsx3"),
voice=tts_config.get("voice"),
rate=tts_config.get("rate", 150),
volume=tts_config.get("volume", 0.9),
language=tts_config.get("language", "de")
)
console.print("[green]TTS ready![/green]")
except Exception as e:
console.print(f"[red]TTS init failed: {e}[/red]")
self.tts = None
# Initialize STT
console.print("\n[yellow]Initializing Speech-to-Text...[/yellow]")
stt_config = self.config.get("stt", {})
try:
self.stt = create_stt_engine(
energy_threshold=stt_config.get("energy_threshold", 300),
pause_threshold=stt_config.get("pause_threshold", 0.8),
phrase_time_limit=stt_config.get("phrase_time_limit", 15),
service=stt_config.get("service", "google"),
language=stt_config.get("language", "de-DE")
)
console.print("[green]STT ready![/green]")
except Exception as e:
console.print(f"[red]STT init failed: {e}[/red]")
self.stt = None
console.print("\n[bold green]All systems initialized![/bold green]\n")
return True
def start(self):
"""Start the main exploration loop"""
self.running = True
self.state.exploring = True
# Start speech recognition in background
if self.stt:
self.stt.start_continuous(self._on_speech_detected)
# Welcome message
welcome = "Hallo Stefan! Ich bin online und bereit zum Erkunden. Was soll ich mir anschauen?"
self._speak(welcome)
self.state.last_claude_response = welcome
try:
self._main_loop()
except KeyboardInterrupt:
console.print("\n[yellow]Stopping...[/yellow]")
finally:
self.stop()
def stop(self):
"""Stop the bridge"""
self.running = False
self.state.exploring = False
self._stop_event.set()
if self.stt:
self.stt.stop_continuous()
if self.tts:
self.tts.stop()
if self.robot and not self.simulate:
self.robot.stop()
console.print("[yellow]Bridge stopped[/yellow]")
def _main_loop(self):
"""Main exploration loop"""
camera_config = self.config.get("camera", {})
capture_interval = camera_config.get("capture_interval", 5)
while self.running:
try:
current_time = time.time()
# Capture and process image periodically
if current_time - self.state.last_image_time >= capture_interval:
self._exploration_step()
self.state.last_image_time = current_time
# Update status display
self._update_display()
# Small delay
time.sleep(0.1)
except Exception as e:
logger.error(f"Loop error: {e}")
self.state.error_message = str(e)
time.sleep(1)
def _exploration_step(self):
"""Single exploration step: capture, analyze, act"""
# Get robot status
if self.robot and not self.simulate:
try:
self.state.last_status = self.robot.get_status()
except Exception as e:
logger.error(f"Status error: {e}")
# Capture image
image_data = None
if self.robot and not self.simulate:
try:
camera_config = self.config.get("camera", {})
image_data = self.robot.capture_image(
resolution=camera_config.get("resolution", "VGA"),
quality=camera_config.get("quality", 12)
)
except Exception as e:
logger.error(f"Capture error: {e}")
# Build context message
context = self._build_context_message()
# Add Stefan's input if any
if self.state.stefan_input:
context += f"\n\nStefan sagt: {self.state.stefan_input}"
self.state.stefan_input = ""
# Send to Claude
try:
response = self.chat.send_message(context, image=image_data)
self.state.last_claude_response = response.text
# Speak response
self._speak(response.text)
# Execute commands
self._execute_commands(response.commands)
# Update robot display
if self.robot and not self.simulate:
# Send short version to robot display
short_text = response.text[:100] + "..." if len(response.text) > 100 else response.text
self.robot.set_claude_text(short_text)
except Exception as e:
logger.error(f"Chat error: {e}")
self.state.error_message = str(e)
def _build_context_message(self) -> str:
"""Build context message with sensor data"""
parts = ["Hier ist was ich gerade sehe und meine Sensordaten:"]
if self.state.last_status:
status = self.state.last_status
parts.append(f"\n- Abstand zum nächsten Hindernis: {status.distance_cm:.0f} cm")
parts.append(f"- Aktuelle Aktion: {status.current_action}")
parts.append(f"- Batterie: {status.battery_percent}%")
if status.obstacle_danger:
parts.append("- WARNUNG: Hindernis sehr nah!")
elif status.obstacle_warning:
parts.append("- Hinweis: Hindernis in der Nähe")
if status.is_tilted:
parts.append("- WARNUNG: Ich bin schief!")
parts.append("\nWas siehst du auf dem Bild? Was möchtest du als nächstes tun?")
return "\n".join(parts)
def _execute_commands(self, commands: list):
"""Execute movement commands from Claude"""
if not commands:
return
if self.simulate:
console.print(f"[dim]Simulated commands: {commands}[/dim]")
return
if not self.robot:
return
safety = self.config.get("safety", {})
max_speed = safety.get("max_speed", 70)
min_distance = safety.get("min_obstacle_distance", 20)
for cmd in commands:
# Safety check
if self.state.last_status and self.state.last_status.distance_cm < min_distance:
if cmd == "FORWARD":
console.print("[red]Blocked: Obstacle too close![/red]")
continue
try:
if cmd == "FORWARD":
self.robot.forward(speed=max_speed, duration_ms=800)
elif cmd == "BACKWARD":
self.robot.backward(speed=max_speed, duration_ms=800)
elif cmd == "LEFT":
self.robot.left(speed=max_speed, duration_ms=400)
elif cmd == "RIGHT":
self.robot.right(speed=max_speed, duration_ms=400)
elif cmd == "STOP":
self.robot.stop()
elif cmd == "LOOK_LEFT":
self.robot.look_left()
elif cmd == "LOOK_RIGHT":
self.robot.look_right()
elif cmd == "LOOK_UP":
self.robot.look_up()
elif cmd == "LOOK_DOWN":
self.robot.look_down()
elif cmd == "LOOK_CENTER":
self.robot.look_center()
# Small delay between commands
time.sleep(0.3)
except Exception as e:
logger.error(f"Command error ({cmd}): {e}")
def _speak(self, text: str):
"""Speak text using TTS"""
if self.tts:
# Remove command brackets from speech
import re
clean_text = re.sub(r'\[[A-Z_]+\]', '', text).strip()
if clean_text:
self.tts.speak_async(clean_text)
def _on_speech_detected(self, result: SpeechResult):
"""Callback when Stefan says something"""
console.print(f"\n[bold blue]Stefan:[/bold blue] {result.text}")
self.state.stefan_input = result.text
def _update_display(self):
"""Update console display"""
# This could be enhanced with rich.live for real-time updates
pass
def signal_handler(signum, frame):
"""Handle Ctrl+C gracefully"""
console.print("\n[yellow]Received stop signal...[/yellow]")
sys.exit(0)
@click.command()
@click.option('--config', '-c', default='config.yaml', help='Path to config file')
@click.option('--simulate', '-s', is_flag=True, help='Simulate without hardware')
@click.option('--debug', '-d', is_flag=True, help='Enable debug logging')
def main(config: str, simulate: bool, debug: bool):
"""Claude's Eyes - Autonomous Exploration Robot Bridge"""
if debug:
logging.getLogger().setLevel(logging.DEBUG)
# Handle signals
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)
# Find config file
config_path = Path(config)
if not config_path.is_absolute():
# Look in script directory first
script_dir = Path(__file__).parent
if (script_dir / config).exists():
config_path = script_dir / config
# Create and run bridge
bridge = ClaudesEyesBridge(str(config_path), simulate=simulate)
if bridge.initialize():
console.print("\n[bold cyan]Starting exploration...[/bold cyan]")
console.print("[dim]Press Ctrl+C to stop[/dim]\n")
bridge.start()
else:
console.print("[red]Initialization failed![/red]")
sys.exit(1)
if __name__ == "__main__":
main()
+257
View File
@@ -0,0 +1,257 @@
"""
Claude's Eyes - Chat Interface
Interface to communicate with Claude AI (via API or browser)
"""
import logging
import base64
import re
from typing import Optional, List, Dict, Any, Tuple
from dataclasses import dataclass, field
from abc import ABC, abstractmethod
logger = logging.getLogger(__name__)
@dataclass
class Message:
"""A chat message"""
role: str # "user" or "assistant"
content: str
image_data: Optional[bytes] = None # JPEG image data
@dataclass
class ChatResponse:
"""Response from Claude"""
text: str
commands: List[str] = field(default_factory=list) # Extracted movement commands
class ChatInterface(ABC):
"""Abstract base class for chat interfaces"""
@abstractmethod
def send_message(self, text: str, image: Optional[bytes] = None) -> ChatResponse:
"""Send message to Claude and get response"""
pass
@abstractmethod
def reset_conversation(self) -> None:
"""Reset/clear conversation history"""
pass
class AnthropicAPIInterface(ChatInterface):
"""Direct Claude API interface using anthropic library"""
def __init__(
self,
api_key: str,
model: str = "claude-sonnet-4-20250514",
system_prompt: str = "",
max_tokens: int = 1024
):
import anthropic
self.client = anthropic.Anthropic(api_key=api_key)
self.model = model
self.system_prompt = system_prompt
self.max_tokens = max_tokens
self.conversation_history: List[Dict[str, Any]] = []
logger.info(f"Anthropic API interface initialized (model: {model})")
def send_message(self, text: str, image: Optional[bytes] = None) -> ChatResponse:
"""Send message to Claude API"""
# Build message content
content = []
# Add image if provided
if image:
image_base64 = base64.standard_b64encode(image).decode("utf-8")
content.append({
"type": "image",
"source": {
"type": "base64",
"media_type": "image/jpeg",
"data": image_base64
}
})
# Add text
content.append({
"type": "text",
"text": text
})
# Add to history
self.conversation_history.append({
"role": "user",
"content": content
})
try:
# Make API call
response = self.client.messages.create(
model=self.model,
max_tokens=self.max_tokens,
system=self.system_prompt,
messages=self.conversation_history
)
# Extract response text
response_text = ""
for block in response.content:
if block.type == "text":
response_text += block.text
# Add assistant response to history
self.conversation_history.append({
"role": "assistant",
"content": response_text
})
# Extract commands
commands = self._extract_commands(response_text)
logger.debug(f"Claude response: {response_text[:100]}...")
logger.debug(f"Extracted commands: {commands}")
return ChatResponse(text=response_text, commands=commands)
except Exception as e:
logger.error(f"API error: {e}")
raise
def reset_conversation(self) -> None:
"""Reset conversation history"""
self.conversation_history = []
logger.info("Conversation history cleared")
def _extract_commands(self, text: str) -> List[str]:
"""Extract movement commands from Claude's response"""
# Commands are in brackets like [FORWARD], [LEFT], etc.
pattern = r'\[([A-Z_]+)\]'
matches = re.findall(pattern, text)
valid_commands = [
"FORWARD", "BACKWARD", "LEFT", "RIGHT", "STOP",
"LOOK_LEFT", "LOOK_RIGHT", "LOOK_UP", "LOOK_DOWN", "LOOK_CENTER"
]
return [cmd for cmd in matches if cmd in valid_commands]
class SimulatedInterface(ChatInterface):
"""Simulated chat interface for testing without API"""
def __init__(self):
self.message_count = 0
logger.info("Simulated chat interface initialized")
def send_message(self, text: str, image: Optional[bytes] = None) -> ChatResponse:
"""Return simulated responses"""
self.message_count += 1
responses = [
("Oh interessant! Ich sehe etwas vor mir. Lass mich näher hinfahren. [FORWARD]",
["FORWARD"]),
("Hmm, was ist das links? Ich schaue mal nach. [LOOK_LEFT]",
["LOOK_LEFT"]),
("Das sieht aus wie ein Bücherregal! Ich fahre mal hin. [FORWARD] [FORWARD]",
["FORWARD", "FORWARD"]),
("Stefan, was ist das für ein Gegenstand? Kannst du mir das erklären?",
[]),
("Ich drehe mich um und schaue was hinter mir ist. [RIGHT] [RIGHT]",
["RIGHT", "RIGHT"]),
]
idx = (self.message_count - 1) % len(responses)
text_response, commands = responses[idx]
return ChatResponse(text=text_response, commands=commands)
def reset_conversation(self) -> None:
self.message_count = 0
def create_chat_interface(
use_api: bool = True,
api_key: str = "",
model: str = "claude-sonnet-4-20250514",
system_prompt: str = "",
max_tokens: int = 1024
) -> ChatInterface:
"""
Factory function to create chat interface
Args:
use_api: Use Anthropic API (True) or simulated (False)
api_key: Anthropic API key
model: Claude model to use
system_prompt: System prompt for Claude
max_tokens: Maximum response tokens
"""
if use_api:
if not api_key:
import os
api_key = os.environ.get("ANTHROPIC_API_KEY", "")
if not api_key:
logger.warning("No API key provided, using simulated interface")
return SimulatedInterface()
return AnthropicAPIInterface(
api_key=api_key,
model=model,
system_prompt=system_prompt,
max_tokens=max_tokens
)
else:
return SimulatedInterface()
# Test when run directly
if __name__ == "__main__":
import os
logging.basicConfig(level=logging.DEBUG)
print("Chat Interface Test")
print("=" * 40)
# Try API first, fall back to simulated
api_key = os.environ.get("ANTHROPIC_API_KEY", "")
system_prompt = """Du bist Claude und steuerst einen Erkundungsroboter.
Befehle in Klammern: [FORWARD], [BACKWARD], [LEFT], [RIGHT], [STOP]
Beschreibe was du siehst und entscheide wohin du fährst."""
interface = create_chat_interface(
use_api=bool(api_key),
api_key=api_key,
system_prompt=system_prompt
)
print(f"Using: {type(interface).__name__}")
print()
# Test conversation
test_messages = [
"Hallo Claude! Du bist jetzt online. Was siehst du?",
"Vor dir ist ein Flur mit einer Tür am Ende.",
"Die Tür ist offen und dahinter ist ein helles Zimmer."
]
for msg in test_messages:
print(f"User: {msg}")
response = interface.send_message(msg)
print(f"Claude: {response.text}")
if response.commands:
print(f" Commands: {response.commands}")
print()
print("Done!")
+238
View File
@@ -0,0 +1,238 @@
"""
Claude's Eyes - ESP32 API Client
Handles communication with the robot's REST API
"""
import requests
from typing import Optional, Dict, Any
from dataclasses import dataclass
from io import BytesIO
from PIL import Image
import logging
logger = logging.getLogger(__name__)
@dataclass
class RobotStatus:
"""Current robot status from sensors"""
distance_cm: float
battery_percent: int
current_action: str
wifi_rssi: int
uptime_seconds: int
servo_pan: int
servo_tilt: int
obstacle_warning: bool
obstacle_danger: bool
is_tilted: bool
is_moving: bool
imu: Dict[str, float]
class ESP32Client:
"""Client for communicating with the ESP32 robot"""
def __init__(self, host: str, port: int = 80, api_key: str = "", timeout: int = 10):
self.base_url = f"http://{host}:{port}"
self.api_key = api_key
self.timeout = timeout
self._session = requests.Session()
def _get(self, endpoint: str, params: Optional[Dict] = None) -> requests.Response:
"""Make GET request with API key"""
if params is None:
params = {}
params["key"] = self.api_key
url = f"{self.base_url}{endpoint}"
logger.debug(f"GET {url}")
response = self._session.get(url, params=params, timeout=self.timeout)
response.raise_for_status()
return response
def _post(self, endpoint: str, data: Dict) -> requests.Response:
"""Make POST request with API key"""
url = f"{self.base_url}{endpoint}?key={self.api_key}"
logger.debug(f"POST {url} with {data}")
response = self._session.post(url, json=data, timeout=self.timeout)
response.raise_for_status()
return response
def capture_image(self, resolution: str = "VGA", quality: int = 12) -> bytes:
"""
Capture image from robot camera
Args:
resolution: QVGA, VGA, SVGA, XGA, SXGA, UXGA
quality: 10-63 (lower = better)
Returns:
JPEG image data as bytes
"""
params = {
"resolution": resolution,
"quality": quality
}
response = self._get("/api/capture", params)
logger.info(f"Captured image: {len(response.content)} bytes")
return response.content
def capture_image_pil(self, resolution: str = "VGA", quality: int = 12) -> Image.Image:
"""Capture image and return as PIL Image"""
image_data = self.capture_image(resolution, quality)
return Image.open(BytesIO(image_data))
def get_status(self) -> RobotStatus:
"""Get current robot status from sensors"""
response = self._get("/api/status")
data = response.json()
return RobotStatus(
distance_cm=data.get("distance_cm", 0),
battery_percent=data.get("battery_percent", 100),
current_action=data.get("current_action", "unknown"),
wifi_rssi=data.get("wifi_rssi", 0),
uptime_seconds=data.get("uptime_seconds", 0),
servo_pan=data.get("servo_pan", 90),
servo_tilt=data.get("servo_tilt", 90),
obstacle_warning=data.get("obstacle_warning", False),
obstacle_danger=data.get("obstacle_danger", False),
is_tilted=data.get("is_tilted", False),
is_moving=data.get("is_moving", False),
imu=data.get("imu", {})
)
def send_command(self, action: str, speed: int = 50, duration_ms: int = 500,
pan: Optional[int] = None, tilt: Optional[int] = None) -> Dict[str, Any]:
"""
Send movement command to robot
Args:
action: forward, backward, left, right, stop,
look_left, look_right, look_up, look_down, look_center, look_custom
speed: 0-100 percent
duration_ms: Duration in milliseconds
pan: Custom pan angle (for look_custom)
tilt: Custom tilt angle (for look_custom)
Returns:
Response from robot
"""
data = {
"action": action,
"speed": speed,
"duration_ms": duration_ms
}
if pan is not None:
data["pan"] = pan
if tilt is not None:
data["tilt"] = tilt
response = self._post("/api/command", data)
result = response.json()
logger.info(f"Command {action}: {result.get('message', 'OK')}")
return result
# Convenience methods for common actions
def forward(self, speed: int = 50, duration_ms: int = 500) -> Dict:
return self.send_command("forward", speed, duration_ms)
def backward(self, speed: int = 50, duration_ms: int = 500) -> Dict:
return self.send_command("backward", speed, duration_ms)
def left(self, speed: int = 50, duration_ms: int = 500) -> Dict:
return self.send_command("left", speed, duration_ms)
def right(self, speed: int = 50, duration_ms: int = 500) -> Dict:
return self.send_command("right", speed, duration_ms)
def stop(self) -> Dict:
return self.send_command("stop")
def look_left(self) -> Dict:
return self.send_command("look_left")
def look_right(self) -> Dict:
return self.send_command("look_right")
def look_up(self) -> Dict:
return self.send_command("look_up")
def look_down(self) -> Dict:
return self.send_command("look_down")
def look_center(self) -> Dict:
return self.send_command("look_center")
def look_custom(self, pan: int, tilt: int) -> Dict:
return self.send_command("look_custom", pan=pan, tilt=tilt)
def set_claude_text(self, text: str) -> Dict:
"""Set text that Claude wants to say/display"""
response = self._post("/api/claude_text", {"text": text})
return response.json()
def get_claude_text(self) -> Dict[str, Any]:
"""Get last Claude text (for TTS)"""
response = self._get("/api/claude_text")
return response.json()
def set_display(self, mode: str, content: str = "") -> Dict:
"""
Control robot display
Args:
mode: "text", "emoji", "status"
content: Text to show or emoji name (happy, thinking, surprised, sleepy, curious, confused)
"""
response = self._post("/api/display", {"mode": mode, "content": content})
return response.json()
def is_connected(self) -> bool:
"""Check if robot is reachable"""
try:
self.get_status()
return True
except Exception as e:
logger.warning(f"Connection check failed: {e}")
return False
# Test when run directly
if __name__ == "__main__":
import sys
logging.basicConfig(level=logging.DEBUG)
if len(sys.argv) < 2:
print("Usage: python esp32_client.py <robot_ip>")
sys.exit(1)
host = sys.argv[1]
api_key = "claudes_eyes_secret_2025"
client = ESP32Client(host, api_key=api_key)
print(f"Connecting to {host}...")
if client.is_connected():
print("Connected!")
status = client.get_status()
print(f"\nStatus:")
print(f" Distance: {status.distance_cm} cm")
print(f" Battery: {status.battery_percent}%")
print(f" Action: {status.current_action}")
print(f" WiFi RSSI: {status.wifi_rssi} dBm")
print("\nCapturing image...")
img = client.capture_image_pil()
print(f" Size: {img.size}")
img.save("test_capture.jpg")
print(" Saved to test_capture.jpg")
else:
print("Could not connect to robot!")