esp32-claude-robbie/claudes_eyes/python_bridge/chat_interface.py

258 lines
7.4 KiB
Python

"""
Claude's Eyes - Chat Interface
Interface to communicate with Claude AI (via API or browser)
"""
import logging
import base64
import re
from typing import Optional, List, Dict, Any, Tuple
from dataclasses import dataclass, field
from abc import ABC, abstractmethod
logger = logging.getLogger(__name__)
@dataclass
class Message:
"""A chat message"""
role: str # "user" or "assistant"
content: str
image_data: Optional[bytes] = None # JPEG image data
@dataclass
class ChatResponse:
"""Response from Claude"""
text: str
commands: List[str] = field(default_factory=list) # Extracted movement commands
class ChatInterface(ABC):
"""Abstract base class for chat interfaces"""
@abstractmethod
def send_message(self, text: str, image: Optional[bytes] = None) -> ChatResponse:
"""Send message to Claude and get response"""
pass
@abstractmethod
def reset_conversation(self) -> None:
"""Reset/clear conversation history"""
pass
class AnthropicAPIInterface(ChatInterface):
"""Direct Claude API interface using anthropic library"""
def __init__(
self,
api_key: str,
model: str = "claude-sonnet-4-20250514",
system_prompt: str = "",
max_tokens: int = 1024
):
import anthropic
self.client = anthropic.Anthropic(api_key=api_key)
self.model = model
self.system_prompt = system_prompt
self.max_tokens = max_tokens
self.conversation_history: List[Dict[str, Any]] = []
logger.info(f"Anthropic API interface initialized (model: {model})")
def send_message(self, text: str, image: Optional[bytes] = None) -> ChatResponse:
"""Send message to Claude API"""
# Build message content
content = []
# Add image if provided
if image:
image_base64 = base64.standard_b64encode(image).decode("utf-8")
content.append({
"type": "image",
"source": {
"type": "base64",
"media_type": "image/jpeg",
"data": image_base64
}
})
# Add text
content.append({
"type": "text",
"text": text
})
# Add to history
self.conversation_history.append({
"role": "user",
"content": content
})
try:
# Make API call
response = self.client.messages.create(
model=self.model,
max_tokens=self.max_tokens,
system=self.system_prompt,
messages=self.conversation_history
)
# Extract response text
response_text = ""
for block in response.content:
if block.type == "text":
response_text += block.text
# Add assistant response to history
self.conversation_history.append({
"role": "assistant",
"content": response_text
})
# Extract commands
commands = self._extract_commands(response_text)
logger.debug(f"Claude response: {response_text[:100]}...")
logger.debug(f"Extracted commands: {commands}")
return ChatResponse(text=response_text, commands=commands)
except Exception as e:
logger.error(f"API error: {e}")
raise
def reset_conversation(self) -> None:
"""Reset conversation history"""
self.conversation_history = []
logger.info("Conversation history cleared")
def _extract_commands(self, text: str) -> List[str]:
"""Extract movement commands from Claude's response"""
# Commands are in brackets like [FORWARD], [LEFT], etc.
pattern = r'\[([A-Z_]+)\]'
matches = re.findall(pattern, text)
valid_commands = [
"FORWARD", "BACKWARD", "LEFT", "RIGHT", "STOP",
"LOOK_LEFT", "LOOK_RIGHT", "LOOK_UP", "LOOK_DOWN", "LOOK_CENTER"
]
return [cmd for cmd in matches if cmd in valid_commands]
class SimulatedInterface(ChatInterface):
"""Simulated chat interface for testing without API"""
def __init__(self):
self.message_count = 0
logger.info("Simulated chat interface initialized")
def send_message(self, text: str, image: Optional[bytes] = None) -> ChatResponse:
"""Return simulated responses"""
self.message_count += 1
responses = [
("Oh interessant! Ich sehe etwas vor mir. Lass mich näher hinfahren. [FORWARD]",
["FORWARD"]),
("Hmm, was ist das links? Ich schaue mal nach. [LOOK_LEFT]",
["LOOK_LEFT"]),
("Das sieht aus wie ein Bücherregal! Ich fahre mal hin. [FORWARD] [FORWARD]",
["FORWARD", "FORWARD"]),
("Stefan, was ist das für ein Gegenstand? Kannst du mir das erklären?",
[]),
("Ich drehe mich um und schaue was hinter mir ist. [RIGHT] [RIGHT]",
["RIGHT", "RIGHT"]),
]
idx = (self.message_count - 1) % len(responses)
text_response, commands = responses[idx]
return ChatResponse(text=text_response, commands=commands)
def reset_conversation(self) -> None:
self.message_count = 0
def create_chat_interface(
use_api: bool = True,
api_key: str = "",
model: str = "claude-sonnet-4-20250514",
system_prompt: str = "",
max_tokens: int = 1024
) -> ChatInterface:
"""
Factory function to create chat interface
Args:
use_api: Use Anthropic API (True) or simulated (False)
api_key: Anthropic API key
model: Claude model to use
system_prompt: System prompt for Claude
max_tokens: Maximum response tokens
"""
if use_api:
if not api_key:
import os
api_key = os.environ.get("ANTHROPIC_API_KEY", "")
if not api_key:
logger.warning("No API key provided, using simulated interface")
return SimulatedInterface()
return AnthropicAPIInterface(
api_key=api_key,
model=model,
system_prompt=system_prompt,
max_tokens=max_tokens
)
else:
return SimulatedInterface()
# Test when run directly
if __name__ == "__main__":
import os
logging.basicConfig(level=logging.DEBUG)
print("Chat Interface Test")
print("=" * 40)
# Try API first, fall back to simulated
api_key = os.environ.get("ANTHROPIC_API_KEY", "")
system_prompt = """Du bist Claude und steuerst einen Erkundungsroboter.
Befehle in Klammern: [FORWARD], [BACKWARD], [LEFT], [RIGHT], [STOP]
Beschreibe was du siehst und entscheide wohin du fährst."""
interface = create_chat_interface(
use_api=bool(api_key),
api_key=api_key,
system_prompt=system_prompt
)
print(f"Using: {type(interface).__name__}")
print()
# Test conversation
test_messages = [
"Hallo Claude! Du bist jetzt online. Was siehst du?",
"Vor dir ist ein Flur mit einer Tür am Ende.",
"Die Tür ist offen und dahinter ist ein helles Zimmer."
]
for msg in test_messages:
print(f"User: {msg}")
response = interface.send_message(msg)
print(f"Claude: {response.text}")
if response.commands:
print(f" Commands: {response.commands}")
print()
print("Done!")