esp32-claude-robbie/claudes_eyes/python_bridge/chat_interface.py

"""
Claude's Eyes - Chat Interface

Interface to communicate with Claude AI (via API or browser)
"""

import logging
import base64
import re
from typing import Optional, List, Dict, Any, Tuple
from dataclasses import dataclass, field
from abc import ABC, abstractmethod

logger = logging.getLogger(__name__)


@dataclass
class Message:
    """A chat message"""
    role: str  # "user" or "assistant"
    content: str
    image_data: Optional[bytes] = None  # JPEG image data


@dataclass
class ChatResponse:
    """Response from Claude"""
    text: str
    commands: List[str] = field(default_factory=list)  # Extracted movement commands


class ChatInterface(ABC):
    """Abstract base class for chat interfaces"""

    @abstractmethod
    def send_message(self, text: str, image: Optional[bytes] = None) -> ChatResponse:
        """Send message to Claude and get response"""
        pass

    @abstractmethod
    def reset_conversation(self) -> None:
        """Reset/clear conversation history"""
        pass


class AnthropicAPIInterface(ChatInterface):
    """Direct Claude API interface using anthropic library"""

    def __init__(
        self,
        api_key: str,
        model: str = "claude-sonnet-4-20250514",
        system_prompt: str = "",
        max_tokens: int = 1024
    ):
        import anthropic

        self.client = anthropic.Anthropic(api_key=api_key)
        self.model = model
        self.system_prompt = system_prompt
        self.max_tokens = max_tokens
        self.conversation_history: List[Dict[str, Any]] = []

        logger.info(f"Anthropic API interface initialized (model: {model})")

    def send_message(self, text: str, image: Optional[bytes] = None) -> ChatResponse:
        """Send message to Claude API"""

        # Build message content
        content = []

        # Add image if provided
        if image:
            image_base64 = base64.standard_b64encode(image).decode("utf-8")
            content.append({
                "type": "image",
                "source": {
                    "type": "base64",
                    "media_type": "image/jpeg",
                    "data": image_base64
                }
            })

        # Add text
        content.append({
            "type": "text",
            "text": text
        })

        # Add to history
        self.conversation_history.append({
            "role": "user",
            "content": content
        })

        try:
            # Make API call
            response = self.client.messages.create(
                model=self.model,
                max_tokens=self.max_tokens,
                system=self.system_prompt,
                messages=self.conversation_history
            )

            # Extract response text
            response_text = ""
            for block in response.content:
                if block.type == "text":
                    response_text += block.text

            # Add assistant response to history
            self.conversation_history.append({
                "role": "assistant",
                "content": response_text
            })

            # Extract commands
            commands = self._extract_commands(response_text)

            logger.debug(f"Claude response: {response_text[:100]}...")
            logger.debug(f"Extracted commands: {commands}")

            return ChatResponse(text=response_text, commands=commands)

        except Exception as e:
            logger.error(f"API error: {e}")
            raise

    def reset_conversation(self) -> None:
        """Reset conversation history"""
        self.conversation_history = []
        logger.info("Conversation history cleared")

    def _extract_commands(self, text: str) -> List[str]:
        """Extract movement commands from Claude's response"""
        # Commands are in brackets like [FORWARD], [LEFT], etc.
        pattern = r'\[([A-Z_]+)\]'
        matches = re.findall(pattern, text)

        valid_commands = [
            "FORWARD", "BACKWARD", "LEFT", "RIGHT", "STOP",
            "LOOK_LEFT", "LOOK_RIGHT", "LOOK_UP", "LOOK_DOWN", "LOOK_CENTER"
        ]

        return [cmd for cmd in matches if cmd in valid_commands]


class SimulatedInterface(ChatInterface):
    """Simulated chat interface for testing without API"""

    def __init__(self):
        self.message_count = 0
        logger.info("Simulated chat interface initialized")

    def send_message(self, text: str, image: Optional[bytes] = None) -> ChatResponse:
        """Return simulated responses"""
        self.message_count += 1

        responses = [
            ("Oh interessant! Ich sehe etwas vor mir. Lass mich näher hinfahren. [FORWARD]",
             ["FORWARD"]),
            ("Hmm, was ist das links? Ich schaue mal nach. [LOOK_LEFT]",
             ["LOOK_LEFT"]),
            ("Das sieht aus wie ein Bücherregal! Ich fahre mal hin. [FORWARD] [FORWARD]",
             ["FORWARD", "FORWARD"]),
            ("Stefan, was ist das für ein Gegenstand? Kannst du mir das erklären?",
             []),
            ("Ich drehe mich um und schaue was hinter mir ist. [RIGHT] [RIGHT]",
             ["RIGHT", "RIGHT"]),
        ]

        idx = (self.message_count - 1) % len(responses)
        text_response, commands = responses[idx]

        return ChatResponse(text=text_response, commands=commands)

    def reset_conversation(self) -> None:
        self.message_count = 0


def create_chat_interface(
    use_api: bool = True,
    api_key: str = "",
    model: str = "claude-sonnet-4-20250514",
    system_prompt: str = "",
    max_tokens: int = 1024
) -> ChatInterface:
    """
    Factory function to create chat interface

    Args:
        use_api: Use Anthropic API (True) or simulated (False)
        api_key: Anthropic API key
        model: Claude model to use
        system_prompt: System prompt for Claude
        max_tokens: Maximum response tokens
    """
    if use_api:
        if not api_key:
            import os
            api_key = os.environ.get("ANTHROPIC_API_KEY", "")

        if not api_key:
            logger.warning("No API key provided, using simulated interface")
            return SimulatedInterface()

        return AnthropicAPIInterface(
            api_key=api_key,
            model=model,
            system_prompt=system_prompt,
            max_tokens=max_tokens
        )
    else:
        return SimulatedInterface()


# Test when run directly
if __name__ == "__main__":
    import os

    logging.basicConfig(level=logging.DEBUG)

    print("Chat Interface Test")
    print("=" * 40)

    # Try API first, fall back to simulated
    api_key = os.environ.get("ANTHROPIC_API_KEY", "")

    system_prompt = """Du bist Claude und steuerst einen Erkundungsroboter.
    Befehle in Klammern: [FORWARD], [BACKWARD], [LEFT], [RIGHT], [STOP]
    Beschreibe was du siehst und entscheide wohin du fährst."""

    interface = create_chat_interface(
        use_api=bool(api_key),
        api_key=api_key,
        system_prompt=system_prompt
    )

    print(f"Using: {type(interface).__name__}")
    print()

    # Test conversation
    test_messages = [
        "Hallo Claude! Du bist jetzt online. Was siehst du?",
        "Vor dir ist ein Flur mit einer Tür am Ende.",
        "Die Tür ist offen und dahinter ist ein helles Zimmer."
    ]

    for msg in test_messages:
        print(f"User: {msg}")
        response = interface.send_message(msg)
        print(f"Claude: {response.text}")
        if response.commands:
            print(f"  Commands: {response.commands}")
        print()

    print("Done!")