Compare commits
15 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 8ad3e39453 | |||
| afa96b1d44 | |||
| 0407c5bc3c | |||
| 2d348aeec7 | |||
| 7e53dcfed3 | |||
| 33d5be781f | |||
| 785f5d0805 | |||
| fac87474ec | |||
| 8227266aea | |||
| 5d24e01d4b | |||
| 4fe72cc4a8 | |||
| eeeb1d43f5 | |||
| 0044e222db | |||
| 048d231b60 | |||
| 2bac9c26ca |
@@ -79,8 +79,8 @@ android {
|
|||||||
applicationId "com.ariacockpit"
|
applicationId "com.ariacockpit"
|
||||||
minSdkVersion rootProject.ext.minSdkVersion
|
minSdkVersion rootProject.ext.minSdkVersion
|
||||||
targetSdkVersion rootProject.ext.targetSdkVersion
|
targetSdkVersion rootProject.ext.targetSdkVersion
|
||||||
versionCode 10505
|
versionCode 10601
|
||||||
versionName "0.1.5.5"
|
versionName "0.1.6.1"
|
||||||
// Fallback fuer Libraries mit Product Flavors
|
// Fallback fuer Libraries mit Product Flavors
|
||||||
missingDimensionStrategy 'react-native-camera', 'general'
|
missingDimensionStrategy 'react-native-camera', 'general'
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ import com.facebook.react.bridge.ReactApplicationContext
|
|||||||
import com.facebook.react.bridge.ReactContextBaseJavaModule
|
import com.facebook.react.bridge.ReactContextBaseJavaModule
|
||||||
import com.facebook.react.bridge.ReactMethod
|
import com.facebook.react.bridge.ReactMethod
|
||||||
import com.facebook.react.modules.core.DeviceEventManagerModule
|
import com.facebook.react.modules.core.DeviceEventManagerModule
|
||||||
|
import java.util.concurrent.Executors
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Lauscht auf Anruf-Statusaenderungen — wenn das Telefon klingelt oder ein
|
* Lauscht auf Anruf-Statusaenderungen — wenn das Telefon klingelt oder ein
|
||||||
@@ -35,6 +36,11 @@ class PhoneCallModule(reactContext: ReactApplicationContext) : ReactContextBaseJ
|
|||||||
private var legacyListener: PhoneStateListener? = null
|
private var legacyListener: PhoneStateListener? = null
|
||||||
private var modernCallback: Any? = null // TelephonyCallback ab API 31
|
private var modernCallback: Any? = null // TelephonyCallback ab API 31
|
||||||
private var lastState: Int = TelephonyManager.CALL_STATE_IDLE
|
private var lastState: Int = TelephonyManager.CALL_STATE_IDLE
|
||||||
|
// Eigener Single-Thread-Executor statt mainExecutor — der wird bei
|
||||||
|
// pausierter Activity verzoegert oder gar nicht abgearbeitet, der eigene
|
||||||
|
// Thread laeuft unabhaengig solange der App-Prozess lebt (was er ja tut,
|
||||||
|
// wir haben einen Foreground-Service der das garantiert).
|
||||||
|
private val callbackExecutor = Executors.newSingleThreadExecutor()
|
||||||
|
|
||||||
@ReactMethod
|
@ReactMethod
|
||||||
fun start(promise: Promise) {
|
fun start(promise: Promise) {
|
||||||
@@ -59,7 +65,7 @@ class PhoneCallModule(reactContext: ReactApplicationContext) : ReactContextBaseJ
|
|||||||
handleStateChange(state)
|
handleStateChange(state)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
tm.registerTelephonyCallback(reactApplicationContext.mainExecutor, cb)
|
tm.registerTelephonyCallback(callbackExecutor, cb)
|
||||||
modernCallback = cb
|
modernCallback = cb
|
||||||
} else {
|
} else {
|
||||||
@Suppress("DEPRECATION")
|
@Suppress("DEPRECATION")
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "aria-cockpit",
|
"name": "aria-cockpit",
|
||||||
"version": "0.1.5.5",
|
"version": "0.1.6.1",
|
||||||
"private": true,
|
"private": true,
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"android": "react-native run-android",
|
"android": "react-native run-android",
|
||||||
|
|||||||
@@ -149,6 +149,22 @@ const MAX_THOUGHTS = 500;
|
|||||||
// im Gespraechsmodus bei sehr vielen Nachrichten.
|
// im Gespraechsmodus bei sehr vielen Nachrichten.
|
||||||
const capMessages = (msgs: ChatMessage[]): ChatMessage[] =>
|
const capMessages = (msgs: ChatMessage[]): ChatMessage[] =>
|
||||||
msgs.length > MAX_MEMORY_MESSAGES ? msgs.slice(-MAX_MEMORY_MESSAGES) : msgs;
|
msgs.length > MAX_MEMORY_MESSAGES ? msgs.slice(-MAX_MEMORY_MESSAGES) : msgs;
|
||||||
|
|
||||||
|
// Bridge fuegt User-Texten Praefixe in eckigen Klammern hinzu damit Brain
|
||||||
|
// Kontext hat (GPS-Position, Barge-In-Hint etc.). Diese sollen nicht in der
|
||||||
|
// Bubble auftauchen — nur Brain sieht sie. Filtert alle aufeinanderfolgenden
|
||||||
|
// [...]-Bloecke am Textanfang weg, inkl. der Trennleerzeichen dahinter.
|
||||||
|
function stripSystemHints(text: string): string {
|
||||||
|
if (!text) return text;
|
||||||
|
let out = text;
|
||||||
|
// Mehrere Hints koennen aneinanderhaengen — "[A] [B] Hallo" → "Hallo"
|
||||||
|
while (true) {
|
||||||
|
const m = out.match(/^\s*\[[^\]]*\]\s*/);
|
||||||
|
if (!m) break;
|
||||||
|
out = out.slice(m[0].length);
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
}
|
||||||
const DEFAULT_ATTACHMENT_DIR = `${RNFS.DocumentDirectoryPath}/chat_attachments`;
|
const DEFAULT_ATTACHMENT_DIR = `${RNFS.DocumentDirectoryPath}/chat_attachments`;
|
||||||
const STORAGE_PATH_KEY = 'aria_attachment_storage_path';
|
const STORAGE_PATH_KEY = 'aria_attachment_storage_path';
|
||||||
|
|
||||||
@@ -274,11 +290,17 @@ const ChatScreen: React.FC = () => {
|
|||||||
// Stream zumuellen. Eigentlich seltener Fall, aber billig zu pruefen.
|
// Stream zumuellen. Eigentlich seltener Fall, aber billig zu pruefen.
|
||||||
const lastThoughtKeyRef = useRef<string>('');
|
const lastThoughtKeyRef = useRef<string>('');
|
||||||
// Service-Status (Gamebox: F5-TTS / Whisper Lade-Status) + Banner-Sichtbarkeit
|
// Service-Status (Gamebox: F5-TTS / Whisper Lade-Status) + Banner-Sichtbarkeit
|
||||||
const [serviceStatus, setServiceStatus] = useState<Record<string, {state: string, model?: string, loadSeconds?: number, error?: string}>>({});
|
const [serviceStatus, setServiceStatus] = useState<Record<string, {state: string, model?: string, loadSeconds?: number, error?: string, downloading?: boolean, freshlyDownloaded?: boolean}>>({});
|
||||||
const [serviceBannerDismissed, setServiceBannerDismissed] = useState(false);
|
const [serviceBannerDismissed, setServiceBannerDismissed] = useState(false);
|
||||||
// Gerätelokale TTS-Config: globaler Toggle (aus Settings) + temporäres Muten (Mund-Button)
|
// Gerätelokale TTS-Config: globaler Toggle (aus Settings) + temporäres Muten (Mund-Button)
|
||||||
const [ttsDeviceEnabled, setTtsDeviceEnabled] = useState(true);
|
const [ttsDeviceEnabled, setTtsDeviceEnabled] = useState(true);
|
||||||
const [ttsMuted, setTtsMuted] = useState(false);
|
const [ttsMuted, setTtsMuted] = useState(false);
|
||||||
|
// System-Hints in Bubble: Bridge fuegt User-Text Praefixe wie
|
||||||
|
// "[Stefans aktuelle GPS-Position: ...]" oder "[Hinweis: Stefan hat
|
||||||
|
// dich gerade unterbrochen...]" hinzu damit Brain Kontext hat. Die
|
||||||
|
// App soll sie standardmaessig NICHT anzeigen — Stefan sieht sonst
|
||||||
|
// jeden Hint mit. Toggle in Settings.
|
||||||
|
const [showSystemHints, setShowSystemHints] = useState(false);
|
||||||
// Gerätelokale XTTS-Voice-Wahl (bevorzugt gegenueber dem globalen Default)
|
// Gerätelokale XTTS-Voice-Wahl (bevorzugt gegenueber dem globalen Default)
|
||||||
const localXttsVoiceRef = useRef<string>('');
|
const localXttsVoiceRef = useRef<string>('');
|
||||||
// Geraetelokale TTS-Wiedergabegeschwindigkeit (speed-Param an F5-TTS)
|
// Geraetelokale TTS-Wiedergabegeschwindigkeit (speed-Param an F5-TTS)
|
||||||
@@ -446,6 +468,8 @@ const ChatScreen: React.FC = () => {
|
|||||||
ttsSpeedRef.current = await loadTtsSpeed();
|
ttsSpeedRef.current = await loadTtsSpeed();
|
||||||
const gps = await AsyncStorage.getItem('aria_gps_enabled');
|
const gps = await AsyncStorage.getItem('aria_gps_enabled');
|
||||||
setGpsEnabled(gps === 'true');
|
setGpsEnabled(gps === 'true');
|
||||||
|
const hints = await AsyncStorage.getItem('aria_show_hints');
|
||||||
|
setShowSystemHints(hints === 'true'); // default false
|
||||||
};
|
};
|
||||||
loadSettings();
|
loadSettings();
|
||||||
const interval = setInterval(loadSettings, 2000);
|
const interval = setInterval(loadSettings, 2000);
|
||||||
@@ -480,14 +504,40 @@ const ChatScreen: React.FC = () => {
|
|||||||
return () => { phoneCallService.stop().catch(() => {}); };
|
return () => { phoneCallService.stop().catch(() => {}); };
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
// App-Resume: kurzer Wake-Word-Cooldown — beim Wechsel Background→Foreground
|
// App-Resume: drei Schutzmaßnahmen gegen verirrte Wake-Word-Trigger
|
||||||
// gibt's haeufig Audio-Pegel-Spikes (AudioFocus-Switch, AudioTrack re-route)
|
// beim Wechsel Background→Foreground:
|
||||||
// die openWakeWord sonst faelschlich als Wake-Word interpretiert.
|
// (a) Cooldown 3s — Audio-Pegel-Spikes (AudioFocus-Switch, AudioTrack
|
||||||
|
// re-route) sollen openWakeWord nicht faelschlich triggern
|
||||||
|
// (b) Wenn die App laenger im Hintergrund war und in 'conversing'
|
||||||
|
// zurueckkommt: vermutlich false-positive durch ein Hintergrund-
|
||||||
|
// Geraeusch (TV, Husten etc.) waehrend Stefan gar nicht da war.
|
||||||
|
// Wir verwerfen den Trigger und gehen zurueck zu 'armed'.
|
||||||
|
// (c) Aktuelle Aufnahme abbrechen falls sie aus dem false-positive
|
||||||
|
// gerade gestartet wurde.
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
let lastState: string = AppState.currentState;
|
let lastState: string = AppState.currentState;
|
||||||
|
let lastBackgroundAt = 0;
|
||||||
const sub = AppState.addEventListener('change', (next) => {
|
const sub = AppState.addEventListener('change', (next) => {
|
||||||
if (lastState !== 'active' && next === 'active') {
|
if (next === 'background' || next === 'inactive') {
|
||||||
wakeWordService.setResumeCooldown(1500);
|
lastBackgroundAt = Date.now();
|
||||||
|
} else if (lastState !== 'active' && next === 'active') {
|
||||||
|
wakeWordService.setResumeCooldown(3000);
|
||||||
|
const bgDur = lastBackgroundAt > 0 ? Date.now() - lastBackgroundAt : 0;
|
||||||
|
// Bei laengerer Hintergrund-Zeit (>30s): pruefen ob ein frisches
|
||||||
|
// Wake-Word getriggert wurde wahrend die App weg war — wenn ja,
|
||||||
|
// verwerfen + laufende Aufnahme stoppen.
|
||||||
|
if (bgDur > 30_000) {
|
||||||
|
wakeWordService.discardIfFreshlyTriggered(15_000).then(discarded => {
|
||||||
|
if (discarded) {
|
||||||
|
try { audioService.cancelRecording(); } catch {}
|
||||||
|
}
|
||||||
|
}).catch(() => {});
|
||||||
|
}
|
||||||
|
// PhoneCall-Listener pruefen: kann passieren dass der nach laengerer
|
||||||
|
// Hintergrund-Zeit verloren geht (Bridge-Context recreated). Refresh
|
||||||
|
// versucht ihn neu zu attachen falls noetig — sonst kriegt die App
|
||||||
|
// bei display-aus / minimized keine Anruf-Events mit.
|
||||||
|
phoneCallService.refresh().catch(() => {});
|
||||||
}
|
}
|
||||||
lastState = next;
|
lastState = next;
|
||||||
});
|
});
|
||||||
@@ -838,6 +888,16 @@ const ChatScreen: React.FC = () => {
|
|||||||
const b64 = (message.payload.base64 as string) || '';
|
const b64 = (message.payload.base64 as string) || '';
|
||||||
const serverPath = (message.payload.serverPath as string) || '';
|
const serverPath = (message.payload.serverPath as string) || '';
|
||||||
const mimeType = (message.payload.mimeType as string) || '';
|
const mimeType = (message.payload.mimeType as string) || '';
|
||||||
|
// Fehler-Response (z.B. Datei zu gross, nicht gefunden) → Toast,
|
||||||
|
// kein erneuter Versuch. Hauptverdacht: 40+ MB Videos die ueber
|
||||||
|
// den 70 MB Bridge-Limit gehen.
|
||||||
|
const fileErr = (message.payload as any).error as string | undefined;
|
||||||
|
if (fileErr) {
|
||||||
|
const fname = (message.payload.name as string) || serverPath.split('/').pop() || 'Datei';
|
||||||
|
console.warn('[Chat] file_response Fehler fuer %s: %s', fname, fileErr);
|
||||||
|
ToastAndroid.show(`${fname}: ${fileErr}`, ToastAndroid.LONG);
|
||||||
|
return;
|
||||||
|
}
|
||||||
if (b64 && reqId) {
|
if (b64 && reqId) {
|
||||||
const fileName = (message.payload.name as string) || 'download';
|
const fileName = (message.payload.name as string) || 'download';
|
||||||
persistAttachment(b64, reqId, fileName).then(filePath => {
|
persistAttachment(b64, reqId, fileName).then(filePath => {
|
||||||
@@ -1111,22 +1171,39 @@ const ChatScreen: React.FC = () => {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Gamebox-Bridges (f5tts/whisper) melden Lade-Status — Banner oben
|
// Gamebox-Bridges (f5tts/whisper/flux) melden Lade-Status — Banner oben.
|
||||||
|
// Toast bei Download-Ende: erstmaliger HF-Download (mehrere GB) → User
|
||||||
|
// soll wissen dass er Bilder/Stimmen jetzt nutzen kann ohne in den
|
||||||
|
// Banner gucken zu muessen.
|
||||||
if (message.type === ('service_status' as any)) {
|
if (message.type === ('service_status' as any)) {
|
||||||
const p = message.payload as any;
|
const p = message.payload as any;
|
||||||
const svc = (p?.service as string) || '';
|
const svc = (p?.service as string) || '';
|
||||||
if (!svc) return;
|
if (!svc) return;
|
||||||
|
const newState = (p?.state as string) || 'unknown';
|
||||||
|
const freshlyDownloaded = p?.freshlyDownloaded === true;
|
||||||
setServiceStatus(prev => ({
|
setServiceStatus(prev => ({
|
||||||
...prev,
|
...prev,
|
||||||
[svc]: {
|
[svc]: {
|
||||||
state: (p?.state as string) || 'unknown',
|
state: newState,
|
||||||
model: p?.model as string | undefined,
|
model: p?.model as string | undefined,
|
||||||
loadSeconds: p?.loadSeconds as number | undefined,
|
loadSeconds: p?.loadSeconds as number | undefined,
|
||||||
error: p?.error as string | undefined,
|
error: p?.error as string | undefined,
|
||||||
|
downloading: p?.downloading === true,
|
||||||
|
freshlyDownloaded,
|
||||||
},
|
},
|
||||||
}));
|
}));
|
||||||
// Bei neuer Loading-Phase Banner wieder aktivieren
|
// Bei neuer Loading-Phase Banner wieder aktivieren
|
||||||
if (p?.state === 'loading') setServiceBannerDismissed(false);
|
if (newState === 'loading') setServiceBannerDismissed(false);
|
||||||
|
// Download-Fertig-Toast: Bridge setzt freshlyDownloaded=true bei dem
|
||||||
|
// 'ready'-Broadcast direkt nach einem Cache-Miss-Load. Ein einziger
|
||||||
|
// Toast pro Modell-Download, kein State-Tracking auf App-Seite noetig.
|
||||||
|
if (newState === 'ready' && freshlyDownloaded) {
|
||||||
|
const niceName = svc === 'flux' ? 'FLUX' : svc === 'f5tts' ? 'F5-TTS' : svc === 'whisper' ? 'Whisper' : svc;
|
||||||
|
const model = p?.model ? ` (${p.model})` : '';
|
||||||
|
try {
|
||||||
|
ToastAndroid.show(`${niceName}-Modell heruntergeladen${model} — jetzt einsatzbereit`, ToastAndroid.LONG);
|
||||||
|
} catch {}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -2006,7 +2083,7 @@ const ChatScreen: React.FC = () => {
|
|||||||
{/* Text (nicht anzeigen wenn nur "Anhang empfangen" und ein Bild da ist) */}
|
{/* Text (nicht anzeigen wenn nur "Anhang empfangen" und ein Bild da ist) */}
|
||||||
{!(item.text === 'Anhang empfangen' && item.attachments?.some(a => a.type === 'image' && a.uri)) && (
|
{!(item.text === 'Anhang empfangen' && item.attachments?.some(a => a.type === 'image' && a.uri)) && (
|
||||||
<MessageText
|
<MessageText
|
||||||
text={item.text}
|
text={showSystemHints ? item.text : stripSystemHints(item.text)}
|
||||||
style={[styles.messageText, isUser ? styles.userText : styles.ariaText]}
|
style={[styles.messageText, isUser ? styles.userText : styles.ariaText]}
|
||||||
/>
|
/>
|
||||||
)}
|
)}
|
||||||
@@ -2136,7 +2213,7 @@ const ChatScreen: React.FC = () => {
|
|||||||
const allReady = !anyLoading && !anyError && entries.every(([, v]) => v.state === 'ready');
|
const allReady = !anyLoading && !anyError && entries.every(([, v]) => v.state === 'ready');
|
||||||
const bg = anyError ? '#3A1F1F' : anyLoading ? '#3A331F' : '#1F3A2A';
|
const bg = anyError ? '#3A1F1F' : anyLoading ? '#3A331F' : '#1F3A2A';
|
||||||
const border = anyError ? '#FF3B30' : anyLoading ? '#FFD60A' : '#34C759';
|
const border = anyError ? '#FF3B30' : anyLoading ? '#FFD60A' : '#34C759';
|
||||||
const labels: Record<string, string> = { f5tts: 'F5-TTS', whisper: 'Whisper STT' };
|
const labels: Record<string, string> = { f5tts: 'F5-TTS', whisper: 'Whisper STT', flux: 'FLUX Image-Gen' };
|
||||||
return (
|
return (
|
||||||
<TouchableOpacity
|
<TouchableOpacity
|
||||||
activeOpacity={allReady ? 0.6 : 1.0}
|
activeOpacity={allReady ? 0.6 : 1.0}
|
||||||
@@ -2146,11 +2223,16 @@ const ChatScreen: React.FC = () => {
|
|||||||
{entries.map(([svc, info]) => {
|
{entries.map(([svc, info]) => {
|
||||||
let icon = '\u23F3', text = '';
|
let icon = '\u23F3', text = '';
|
||||||
if (info.state === 'loading') {
|
if (info.state === 'loading') {
|
||||||
text = `${labels[svc] || svc}: laedt${info.model ? ' ' + info.model : ''}...`;
|
icon = info.downloading ? '\u2B07' : '\u23F3'; // \u2B07 vs \u23F3
|
||||||
|
const action = info.downloading
|
||||||
|
? 'laedt erstmalig runter (mehrere GB, kann dauern)'
|
||||||
|
: 'laedt';
|
||||||
|
text = `${labels[svc] || svc}: ${action}${info.model ? ' ' + info.model : ''}...`;
|
||||||
} else if (info.state === 'ready') {
|
} else if (info.state === 'ready') {
|
||||||
icon = '\u2705';
|
icon = info.freshlyDownloaded ? '\uD83C\uDF89' : '\u2705'; // \uD83C\uDF89 vs \u2705
|
||||||
const sec = info.loadSeconds ? ` (${info.loadSeconds.toFixed(1)}s)` : '';
|
const sec = info.loadSeconds ? ` (${info.loadSeconds.toFixed(1)}s)` : '';
|
||||||
text = `${labels[svc] || svc}: bereit${info.model ? ' ' + info.model : ''}${sec}`;
|
const dl = info.freshlyDownloaded ? ' \u2014 Download fertig!' : '';
|
||||||
|
text = `${labels[svc] || svc}: bereit${info.model ? ' ' + info.model : ''}${sec}${dl}`;
|
||||||
} else if (info.state === 'error') {
|
} else if (info.state === 'error') {
|
||||||
icon = '\u274C';
|
icon = '\u274C';
|
||||||
text = `${labels[svc] || svc}: Fehler ${info.error || ''}`;
|
text = `${labels[svc] || svc}: Fehler ${info.error || ''}`;
|
||||||
|
|||||||
@@ -131,6 +131,7 @@ const SettingsScreen: React.FC = () => {
|
|||||||
const [gpsEnabled, setGpsEnabled] = useState(false);
|
const [gpsEnabled, setGpsEnabled] = useState(false);
|
||||||
const [gpsTracking, setGpsTracking] = useState(gpsTrackingService.isActive());
|
const [gpsTracking, setGpsTracking] = useState(gpsTrackingService.isActive());
|
||||||
const [backgroundMode, setBackgroundMode] = useState(true); // Default an
|
const [backgroundMode, setBackgroundMode] = useState(true); // Default an
|
||||||
|
const [showSystemHints, setShowSystemHints] = useState(false); // Default aus
|
||||||
const [scannerVisible, setScannerVisible] = useState(false);
|
const [scannerVisible, setScannerVisible] = useState(false);
|
||||||
const [logTab, setLogTab] = useState<LogTab>('live');
|
const [logTab, setLogTab] = useState<LogTab>('live');
|
||||||
const [logs, setLogs] = useState<LogEntry[]>([]);
|
const [logs, setLogs] = useState<LogEntry[]>([]);
|
||||||
@@ -202,6 +203,10 @@ const SettingsScreen: React.FC = () => {
|
|||||||
// Default ist an — nur explicit 'false' deaktiviert
|
// Default ist an — nur explicit 'false' deaktiviert
|
||||||
setBackgroundMode(saved !== 'false');
|
setBackgroundMode(saved !== 'false');
|
||||||
});
|
});
|
||||||
|
AsyncStorage.getItem('aria_show_hints').then(saved => {
|
||||||
|
// Default ist aus — nur explicit 'true' aktiviert
|
||||||
|
setShowSystemHints(saved === 'true');
|
||||||
|
});
|
||||||
// gpsTrackingService status syncen + auf Aenderungen lauschen
|
// gpsTrackingService status syncen + auf Aenderungen lauschen
|
||||||
setGpsTracking(gpsTrackingService.isActive());
|
setGpsTracking(gpsTrackingService.isActive());
|
||||||
const offGps = gpsTrackingService.onChange(setGpsTracking);
|
const offGps = gpsTrackingService.onChange(setGpsTracking);
|
||||||
@@ -616,6 +621,13 @@ const SettingsScreen: React.FC = () => {
|
|||||||
}
|
}
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
|
// --- System-Hints Toggle ---
|
||||||
|
|
||||||
|
const handleShowSystemHintsToggle = useCallback((value: boolean) => {
|
||||||
|
setShowSystemHints(value);
|
||||||
|
AsyncStorage.setItem('aria_show_hints', String(value)).catch(() => {});
|
||||||
|
}, []);
|
||||||
|
|
||||||
// --- XTTS Voice ---
|
// --- XTTS Voice ---
|
||||||
|
|
||||||
const selectVoice = useCallback((voiceName: string) => {
|
const selectVoice = useCallback((voiceName: string) => {
|
||||||
@@ -1103,6 +1115,28 @@ const SettingsScreen: React.FC = () => {
|
|||||||
</View>
|
</View>
|
||||||
</View>
|
</View>
|
||||||
|
|
||||||
|
{/* === Bubble-Anzeige === */}
|
||||||
|
<Text style={styles.sectionTitle}>Chat-Bubbles</Text>
|
||||||
|
<View style={styles.card}>
|
||||||
|
<View style={styles.toggleRow}>
|
||||||
|
<View style={styles.toggleInfo}>
|
||||||
|
<Text style={styles.toggleLabel}>System-Hints in Bubbles anzeigen</Text>
|
||||||
|
<Text style={styles.toggleHint}>
|
||||||
|
Wenn aktiviert: GPS-Position, Barge-In-Hinweise und andere
|
||||||
|
System-Praefixe in eckigen Klammern bleiben in der User-Bubble
|
||||||
|
sichtbar (Debug). Standardmaessig versteckt — Brain bekommt sie
|
||||||
|
trotzdem, sie sind nur fuer dich nicht relevant.
|
||||||
|
</Text>
|
||||||
|
</View>
|
||||||
|
<Switch
|
||||||
|
value={showSystemHints}
|
||||||
|
onValueChange={handleShowSystemHintsToggle}
|
||||||
|
trackColor={{ false: '#2A2A3E', true: '#0096FF' }}
|
||||||
|
thumbColor={showSystemHints ? '#FFFFFF' : '#666680'}
|
||||||
|
/>
|
||||||
|
</View>
|
||||||
|
</View>
|
||||||
|
|
||||||
{/* === Hintergrund-Modus === */}
|
{/* === Hintergrund-Modus === */}
|
||||||
<Text style={styles.sectionTitle}>Hintergrund-Modus</Text>
|
<Text style={styles.sectionTitle}>Hintergrund-Modus</Text>
|
||||||
<View style={styles.card}>
|
<View style={styles.card}>
|
||||||
|
|||||||
@@ -727,6 +727,31 @@ class AudioService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Aufnahme abbrechen ohne RecordingResult zu emittieren — z.B. bei
|
||||||
|
* Wake-Word-False-Positive beim App-Resume aus laengerem Hintergrund.
|
||||||
|
* Aufgenommene Datei wird sofort verworfen. */
|
||||||
|
async cancelRecording(): Promise<void> {
|
||||||
|
if (this.recordingState !== 'recording') return;
|
||||||
|
console.log('[Audio] Aufnahme abgebrochen (cancel)');
|
||||||
|
this.vadEnabled = false;
|
||||||
|
if (this.vadTimer) { clearInterval(this.vadTimer); this.vadTimer = null; }
|
||||||
|
if (this.maxDurationTimer) { clearTimeout(this.maxDurationTimer); this.maxDurationTimer = null; }
|
||||||
|
if (this.noSpeechTimer) { clearTimeout(this.noSpeechTimer); this.noSpeechTimer = null; }
|
||||||
|
try {
|
||||||
|
const path = await this.recorder.stopRecorder();
|
||||||
|
this.recorder.removeRecordBackListener();
|
||||||
|
// Datei loeschen wenn da
|
||||||
|
if (path && path !== 'Already stopped') {
|
||||||
|
const local = path.replace(/^file:\/\//, '');
|
||||||
|
try { await RNFS.unlink(local); } catch {}
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
console.warn('[Audio] cancelRecording stop fehlgeschlagen:', err);
|
||||||
|
}
|
||||||
|
this._releaseFocusDeferred();
|
||||||
|
this.setState('idle');
|
||||||
|
}
|
||||||
|
|
||||||
/** Aufnahme stoppen und Ergebnis zurueckgeben */
|
/** Aufnahme stoppen und Ergebnis zurueckgeben */
|
||||||
async stopRecording(): Promise<RecordingResult | null> {
|
async stopRecording(): Promise<RecordingResult | null> {
|
||||||
if (this.recordingState !== 'recording') {
|
if (this.recordingState !== 'recording') {
|
||||||
|
|||||||
@@ -43,6 +43,42 @@ class PhoneCallService {
|
|||||||
/** Damit Resume nach VoIP-Loss nicht doppelt feuert wenn auch
|
/** Damit Resume nach VoIP-Loss nicht doppelt feuert wenn auch
|
||||||
* TelephonyManager-IDLE-Event kommt. */
|
* TelephonyManager-IDLE-Event kommt. */
|
||||||
private interruptedByFocus: boolean = false;
|
private interruptedByFocus: boolean = false;
|
||||||
|
/** True wenn der TelephonyManager-Listener (Pfad 1) wirklich registriert
|
||||||
|
* ist. False wenn READ_PHONE_STATE abgelehnt wurde oder Native nicht ging. */
|
||||||
|
private telephonyAttached: boolean = false;
|
||||||
|
|
||||||
|
/** Status fuer Diagnose: laeuft die Anruf-Erkennung tatsaechlich? */
|
||||||
|
status(): { focusAttached: boolean; telephonyAttached: boolean } {
|
||||||
|
return {
|
||||||
|
focusAttached: this.focusSubscription !== null,
|
||||||
|
telephonyAttached: this.telephonyAttached,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Nach App-Resume: pruefen ob die Listener noch leben. Wenn der
|
||||||
|
* TelephonyManager-Listener verloren ging (kann passieren wenn der
|
||||||
|
* React-Bridge-Context recreated wurde), neu attachen. */
|
||||||
|
async refresh(): Promise<void> {
|
||||||
|
if (!this.started) return;
|
||||||
|
if (this.telephonyAttached) return; // alles ok
|
||||||
|
if (!PhoneCall) return;
|
||||||
|
try {
|
||||||
|
const ok = await PhoneCall.start();
|
||||||
|
if (ok) {
|
||||||
|
if (!this.subscription) {
|
||||||
|
const emitter = new NativeEventEmitter(NativeModules.PhoneCall as any);
|
||||||
|
this.subscription = emitter.addListener(
|
||||||
|
'PhoneCallStateChanged',
|
||||||
|
(e: { state: PhoneState }) => this._onStateChanged(e.state),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
this.telephonyAttached = true;
|
||||||
|
console.log('[PhoneCall] refresh: TelephonyManager-Listener re-attached');
|
||||||
|
}
|
||||||
|
} catch (err: any) {
|
||||||
|
console.warn('[PhoneCall] refresh fehlgeschlagen:', err?.message || err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async start(): Promise<boolean> {
|
async start(): Promise<boolean> {
|
||||||
if (this.started || Platform.OS !== 'android') return false;
|
if (this.started || Platform.OS !== 'android') return false;
|
||||||
@@ -82,7 +118,10 @@ class PhoneCallService {
|
|||||||
'PhoneCallStateChanged',
|
'PhoneCallStateChanged',
|
||||||
(e: { state: PhoneState }) => this._onStateChanged(e.state),
|
(e: { state: PhoneState }) => this._onStateChanged(e.state),
|
||||||
);
|
);
|
||||||
|
this.telephonyAttached = true;
|
||||||
console.log('[PhoneCall] TelephonyManager-Listener aktiv');
|
console.log('[PhoneCall] TelephonyManager-Listener aktiv');
|
||||||
|
} else {
|
||||||
|
console.warn('[PhoneCall] PhoneCall.start() lieferte false — Native-Listener nicht aktiv');
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
console.warn('[PhoneCall] READ_PHONE_STATE abgelehnt — VoIP-Calls werden trotzdem ueber AudioFocus erkannt');
|
console.warn('[PhoneCall] READ_PHONE_STATE abgelehnt — VoIP-Calls werden trotzdem ueber AudioFocus erkannt');
|
||||||
@@ -108,6 +147,7 @@ class PhoneCallService {
|
|||||||
this.started = false;
|
this.started = false;
|
||||||
this.lastState = 'idle';
|
this.lastState = 'idle';
|
||||||
this.interruptedByFocus = false;
|
this.interruptedByFocus = false;
|
||||||
|
this.telephonyAttached = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
private _onStateChanged(state: PhoneState): void {
|
private _onStateChanged(state: PhoneState): void {
|
||||||
|
|||||||
@@ -86,6 +86,11 @@ class WakeWordService {
|
|||||||
* oft einen Audio-Pegel-Spike (AudioFocus-Switch, AudioTrack re-route),
|
* oft einen Audio-Pegel-Spike (AudioFocus-Switch, AudioTrack re-route),
|
||||||
* der openWakeWord faelschlich triggern kann. */
|
* der openWakeWord faelschlich triggern kann. */
|
||||||
private cooldownUntilMs: number = 0;
|
private cooldownUntilMs: number = 0;
|
||||||
|
/** Zeitpunkt des letzten echten Wake-Word-Triggers — gebraucht damit
|
||||||
|
* ChatScreen entscheiden kann ob ein 'conversing'-State bei App-Resume
|
||||||
|
* ein false-positive war (Wake-Word im Hintergrund getriggert waehrend
|
||||||
|
* Stefan gar nicht in der App war). */
|
||||||
|
private lastTriggerAt: number = 0;
|
||||||
|
|
||||||
private keyword: WakeKeyword = DEFAULT_KEYWORD;
|
private keyword: WakeKeyword = DEFAULT_KEYWORD;
|
||||||
private nativeReady: boolean = false;
|
private nativeReady: boolean = false;
|
||||||
@@ -231,6 +236,7 @@ class WakeWordService {
|
|||||||
}
|
}
|
||||||
console.log('[WakeWord] Wake-Word "%s" erkannt! (state=%s, barge=%s)',
|
console.log('[WakeWord] Wake-Word "%s" erkannt! (state=%s, barge=%s)',
|
||||||
this.keyword, this.state, this.bargeListening);
|
this.keyword, this.state, this.bargeListening);
|
||||||
|
this.lastTriggerAt = now;
|
||||||
if (this.nativeReady && OpenWakeWord) {
|
if (this.nativeReady && OpenWakeWord) {
|
||||||
try { await OpenWakeWord.stop(); } catch {}
|
try { await OpenWakeWord.stop(); } catch {}
|
||||||
}
|
}
|
||||||
@@ -341,6 +347,33 @@ class WakeWordService {
|
|||||||
this.setState('off');
|
this.setState('off');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Wenn ein conversing-State auf einem Wake-Word-Trigger juenger als
|
||||||
|
* maxAgeMs basiert: false-positive verwerfen, zurueck zu armed.
|
||||||
|
* Wird vom ChatScreen aufgerufen wenn die App aus laengerem Hintergrund
|
||||||
|
* zurueck kommt — dann ist ein „gerade getriggertes" Wake-Word sehr
|
||||||
|
* wahrscheinlich ein TV-Spike, Husten, ARIAs eigene TTS-Aufnahme etc.
|
||||||
|
* Returnt true wenn verworfen wurde. */
|
||||||
|
async discardIfFreshlyTriggered(maxAgeMs: number = 10_000): Promise<boolean> {
|
||||||
|
if (this.state !== 'conversing') return false;
|
||||||
|
if (this.lastTriggerAt === 0) return false;
|
||||||
|
const age = Date.now() - this.lastTriggerAt;
|
||||||
|
if (age > maxAgeMs) return false;
|
||||||
|
console.log('[WakeWord] Resume: verwerfe verdaechtiges conversing (age=%dms)', age);
|
||||||
|
this.lastTriggerAt = 0;
|
||||||
|
if (this.nativeReady && OpenWakeWord) {
|
||||||
|
try {
|
||||||
|
await OpenWakeWord.start();
|
||||||
|
ToastAndroid.show('Hintergrund-Trigger verworfen — lausche wieder', ToastAndroid.SHORT);
|
||||||
|
this.setState('armed');
|
||||||
|
return true;
|
||||||
|
} catch (err) {
|
||||||
|
console.warn('[WakeWord] re-arm nach discard fehlgeschlagen:', err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this.setState('off');
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/** Nach ARIA-Antwort (TTS fertig): naechste Aufnahme im Conversation-Window starten */
|
/** Nach ARIA-Antwort (TTS fertig): naechste Aufnahme im Conversation-Window starten */
|
||||||
async resume(): Promise<void> {
|
async resume(): Promise<void> {
|
||||||
if (this.state !== 'conversing') return;
|
if (this.state !== 'conversing') return;
|
||||||
|
|||||||
+165
-1
@@ -18,6 +18,9 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
|
import urllib.error
|
||||||
|
import urllib.request
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from conversation import Conversation, Turn
|
from conversation import Conversation, Turn
|
||||||
@@ -28,6 +31,33 @@ import skills as skills_mod
|
|||||||
import triggers as triggers_mod
|
import triggers as triggers_mod
|
||||||
import watcher as watcher_mod
|
import watcher as watcher_mod
|
||||||
|
|
||||||
|
BRIDGE_URL = os.environ.get("BRIDGE_URL", "http://aria-bridge:8090")
|
||||||
|
# FLUX-Render kann bis ~90s dauern, beim ersten Render nach Container-Start
|
||||||
|
# laedt die flux-bridge zudem ~24 GB Modell von HF (~5-10 min). Brain wartet
|
||||||
|
# synchron — Stefan kuendigt es vorher an wenn er weiss dass es feuert.
|
||||||
|
FLUX_HTTP_TIMEOUT_SEC = 1200
|
||||||
|
# Diagnostic-Settings fuer FLUX (Default-Modell + User-Keywords) liegen im
|
||||||
|
# selben File wie F5-TTS/Whisper Config — von der aria-bridge geschrieben.
|
||||||
|
VOICE_CONFIG_PATH = "/shared/config/voice_config.json"
|
||||||
|
|
||||||
|
|
||||||
|
def _load_flux_config() -> dict:
|
||||||
|
"""Liest fluxXxx-Felder aus der Voice-Config. Default-Werte wenn nichts
|
||||||
|
persistiert ist — Stefan hat in Diagnostic vielleicht noch nichts gesetzt."""
|
||||||
|
try:
|
||||||
|
with open(VOICE_CONFIG_PATH, encoding="utf-8") as f:
|
||||||
|
data = json.load(f) or {}
|
||||||
|
except (FileNotFoundError, json.JSONDecodeError):
|
||||||
|
data = {}
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug("Voice-Config lesen fehlgeschlagen: %s", exc)
|
||||||
|
data = {}
|
||||||
|
return {
|
||||||
|
"fluxDefaultModel": data.get("fluxDefaultModel", "dev"),
|
||||||
|
"fluxKeywordRaw": data.get("fluxKeywordRaw", "flux"),
|
||||||
|
"fluxKeywordSwitch": data.get("fluxKeywordSwitch", "fix"),
|
||||||
|
}
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
@@ -215,6 +245,78 @@ META_TOOLS = [
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "flux_generate",
|
||||||
|
"description": (
|
||||||
|
"Generiere ein Bild aus einem Text-Prompt via FLUX auf der Gamebox-GPU. "
|
||||||
|
"Brauchbar fuer 'mal mir ein X', 'wie sieht ein Y aus?', Mockups, "
|
||||||
|
"Konzept-Skizzen, Memes. Render dauert 20-90s — kuendige es Stefan "
|
||||||
|
"kurz an, dann ist er nicht ueberrascht.\n\n"
|
||||||
|
"**Schreibe deine Antwort wie immer auf Deutsch**, und referenziere das "
|
||||||
|
"fertige Bild MIT dem `[FILE: ...]`-Marker, GENAU im Pfad-Format das das "
|
||||||
|
"Tool zurueckgibt. Beispiel:\n"
|
||||||
|
" 'Hier dein Aquarell:\\n[FILE: /shared/uploads/aria_generated_1234.png]'\n\n"
|
||||||
|
"Der Marker wird beim App-Renderer ausgeblendet und das Bild stattdessen "
|
||||||
|
"inline als Anhang gezeigt.\n\n"
|
||||||
|
"**Prompt-Sprache: bevorzugt Englisch.** FLUX versteht zwar Deutsch, "
|
||||||
|
"liefert aber mit englischen Prompts deutlich konsistentere Ergebnisse. "
|
||||||
|
"Uebersetze Stefans deutsche Beschreibung selbststaendig — AUSSER `raw=true`.\n\n"
|
||||||
|
"**Modus `raw=true` (Pipe-Modus):** Wenn Stefan das Raw-Keyword aus dem "
|
||||||
|
"FLUX-Settings-Block im System-Prompt nutzt (typischerweise `flux`), "
|
||||||
|
"leite seinen Text 1:1 als prompt durch — KEIN Uebersetzen, KEIN "
|
||||||
|
"Beautify, KEINE Qualitaets-Keywords. Stefan formuliert dann selbst und "
|
||||||
|
"der Prompt geht roh an FLUX. Brauchbar wenn er den vollen Output ohne "
|
||||||
|
"ARIAs Filter haben will.\n\n"
|
||||||
|
"**Modell-Wahl (`model`):** \n"
|
||||||
|
"- `default` (oder weglassen): das in den Diagnostic-Settings eingestellte "
|
||||||
|
"Default-Modell (steht im FLUX-Block im System-Prompt).\n"
|
||||||
|
"- `dev`: hochqualitatives FLUX.1-dev, 20-90s, ~28 steps.\n"
|
||||||
|
"- `schnell`: FLUX.1-schnell, 4-step distillation, ~5-15s.\n"
|
||||||
|
"Wenn Stefan das Switch-Keyword (steht ebenfalls im FLUX-Block) im Prompt "
|
||||||
|
"verwendet → setze `model` auf das ANDERE Modell als das Default. Bei "
|
||||||
|
"'in hoher Qualitaet'/'detailliert' → `dev`. Bei 'schnell mal'/'fix' → `schnell`.\n\n"
|
||||||
|
"Modell-Switch kostet einmalig 15-30s (Pipeline-Reload aus HF-Cache). "
|
||||||
|
"Stefan sieht den Status im Diagnostic-Banner.\n\n"
|
||||||
|
"Caps:\n"
|
||||||
|
"- `width`/`height`: 256-1536, wird auf Vielfache von 64 gesnappt (Default 1024)\n"
|
||||||
|
"- `steps`: 1-50 (Default 28 fuer dev, 4 fuer schnell)\n"
|
||||||
|
"- `guidance_scale`: 0.0-20.0 (Default 3.5)\n"
|
||||||
|
"- `seed`: optional, gleicher seed + gleicher prompt → gleiches Bild"
|
||||||
|
),
|
||||||
|
"parameters": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"prompt": {
|
||||||
|
"type": "string",
|
||||||
|
"description": (
|
||||||
|
"Bei raw=false (Default): englischer Bild-Prompt, von dir aus Stefans Worten gebaut, "
|
||||||
|
"mit Stil/Licht/Kamera-Stichworten. Bei raw=true: Stefans Text 1:1 ohne Aenderung."
|
||||||
|
),
|
||||||
|
},
|
||||||
|
"raw": {
|
||||||
|
"type": "boolean",
|
||||||
|
"description": (
|
||||||
|
"true = Pipe-Modus, kein Rewriting. Setzen wenn Stefan das Raw-Keyword "
|
||||||
|
"(siehe FLUX-Block im System-Prompt) am Anfang seiner Nachricht verwendet."
|
||||||
|
),
|
||||||
|
},
|
||||||
|
"model": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["default", "dev", "schnell"],
|
||||||
|
"description": "Default-Modell oder explizit dev/schnell. Default = Diagnostic-Setting.",
|
||||||
|
},
|
||||||
|
"width": {"type": "integer", "description": "Breite in px (Default 1024, max 1536)"},
|
||||||
|
"height": {"type": "integer", "description": "Hoehe in px (Default 1024, max 1536)"},
|
||||||
|
"steps": {"type": "integer", "description": "Inference-Steps (Default 28, max 50). Mehr = besser+langsamer."},
|
||||||
|
"guidance_scale": {"type": "number", "description": "Wie strikt am Prompt kleben (Default 3.5)"},
|
||||||
|
"seed": {"type": "integer", "description": "Reproduzierbarkeits-Seed (optional)"},
|
||||||
|
},
|
||||||
|
"required": ["prompt"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"type": "function",
|
"type": "function",
|
||||||
"function": {
|
"function": {
|
||||||
@@ -437,10 +539,12 @@ class Agent:
|
|||||||
condition_funcs = watcher_mod.describe_functions()
|
condition_funcs = watcher_mod.describe_functions()
|
||||||
|
|
||||||
# 5. System-Prompt + Window-Messages
|
# 5. System-Prompt + Window-Messages
|
||||||
|
flux_config = _load_flux_config()
|
||||||
system_prompt = build_system_prompt(hot, cold, skills=all_skills,
|
system_prompt = build_system_prompt(hot, cold, skills=all_skills,
|
||||||
triggers=all_triggers,
|
triggers=all_triggers,
|
||||||
condition_vars=condition_vars,
|
condition_vars=condition_vars,
|
||||||
condition_funcs=condition_funcs)
|
condition_funcs=condition_funcs,
|
||||||
|
flux_config=flux_config)
|
||||||
messages = [ProxyMessage(role="system", content=system_prompt)]
|
messages = [ProxyMessage(role="system", content=system_prompt)]
|
||||||
for t in self.conversation.window():
|
for t in self.conversation.window():
|
||||||
messages.append(ProxyMessage(role=t.role, content=t.content))
|
messages.append(ProxyMessage(role=t.role, content=t.content))
|
||||||
@@ -607,6 +711,66 @@ class Agent:
|
|||||||
else:
|
else:
|
||||||
lines.append(f"- {t['name']} ({t['type']}, {state})")
|
lines.append(f"- {t['name']} ({t['type']}, {state})")
|
||||||
return "\n".join(lines)
|
return "\n".join(lines)
|
||||||
|
if name == "flux_generate":
|
||||||
|
prompt = (arguments.get("prompt") or "").strip()
|
||||||
|
if not prompt:
|
||||||
|
return "FEHLER: prompt ist Pflicht."
|
||||||
|
req: dict = {"prompt": prompt}
|
||||||
|
for key in ("width", "height", "steps", "seed"):
|
||||||
|
if key in arguments and arguments[key] is not None:
|
||||||
|
try:
|
||||||
|
req[key] = int(arguments[key])
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
pass
|
||||||
|
if arguments.get("guidance_scale") is not None:
|
||||||
|
try:
|
||||||
|
req["guidance_scale"] = float(arguments["guidance_scale"])
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
pass
|
||||||
|
# Modell-Wahl: 'default' (oder weglassen) → flux-bridge nimmt Diagnostic-Default.
|
||||||
|
# 'dev' / 'schnell' → expliziter Override.
|
||||||
|
model_arg = (arguments.get("model") or "").strip().lower()
|
||||||
|
if model_arg in ("dev", "schnell"):
|
||||||
|
req["model"] = model_arg
|
||||||
|
# `raw` ist Brain-Domain (kein Rewriting des prompt) und wird hier
|
||||||
|
# nicht durchgereicht — der prompt enthaelt bei raw=true bereits
|
||||||
|
# Stefans Originaltext.
|
||||||
|
try:
|
||||||
|
body = json.dumps(req).encode("utf-8")
|
||||||
|
http_req = urllib.request.Request(
|
||||||
|
f"{BRIDGE_URL}/internal/flux-generate", data=body, method="POST",
|
||||||
|
headers={"Content-Type": "application/json"},
|
||||||
|
)
|
||||||
|
with urllib.request.urlopen(http_req, timeout=FLUX_HTTP_TIMEOUT_SEC) as resp:
|
||||||
|
raw = resp.read()
|
||||||
|
result = json.loads(raw.decode("utf-8", "ignore"))
|
||||||
|
except urllib.error.HTTPError as exc:
|
||||||
|
try:
|
||||||
|
err_body = exc.read().decode("utf-8", "ignore")
|
||||||
|
err_data = json.loads(err_body)
|
||||||
|
err = err_data.get("error") or err_body
|
||||||
|
except Exception:
|
||||||
|
err = str(exc)
|
||||||
|
return f"FEHLER (flux-bridge): {err}"
|
||||||
|
except Exception as exc:
|
||||||
|
logger.exception("flux_generate HTTP-Call fehlgeschlagen")
|
||||||
|
return f"FEHLER: flux-bridge nicht erreichbar ({exc})"
|
||||||
|
|
||||||
|
if not result.get("ok"):
|
||||||
|
return f"FEHLER (flux-bridge): {result.get('error', 'unbekannt')}"
|
||||||
|
# Kompakte Rueckmeldung: Pfad + Render-Stats. Brain bettet den
|
||||||
|
# Pfad in ihre Antwort als [FILE: ...]-Marker ein (siehe Tool-Beschreibung).
|
||||||
|
return (
|
||||||
|
f"OK — Bild generiert.\n"
|
||||||
|
f"path: {result['path']}\n"
|
||||||
|
f"size: {result.get('width','?')}x{result.get('height','?')} "
|
||||||
|
f"({result.get('sizeBytes',0)//1024} KB)\n"
|
||||||
|
f"steps={result.get('steps','?')} guidance={result.get('guidance','?')} "
|
||||||
|
f"seed={result.get('seed','?')} model={result.get('model','?')}\n"
|
||||||
|
f"renderSeconds={result.get('renderSeconds','?')}\n\n"
|
||||||
|
f"WICHTIG: Schreibe in deiner Antwort an Stefan den Pfad EXAKT als "
|
||||||
|
f"Marker: [FILE: {result['path']}] — dann zeigt die App das Bild inline."
|
||||||
|
)
|
||||||
if name == "memory_search":
|
if name == "memory_search":
|
||||||
query = (arguments.get("query") or "").strip()
|
query = (arguments.get("query") or "").strip()
|
||||||
if not query:
|
if not query:
|
||||||
|
|||||||
+36
-1
@@ -240,6 +240,37 @@ def build_triggers_section(
|
|||||||
return "\n".join(lines)
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def build_flux_section(flux_config: dict) -> str:
|
||||||
|
"""Block fuer den System-Prompt: aktuelle Diagnostic-Settings fuer
|
||||||
|
Bildgenerierung (Default-Modell + User-konfigurierbare Keywords).
|
||||||
|
|
||||||
|
flux_config kommt aus /shared/config/voice_config.json:
|
||||||
|
fluxDefaultModel: "dev" | "schnell" (Default "dev")
|
||||||
|
fluxKeywordRaw: z.B. "flux" (Pipe-Modus, kein Rewriting)
|
||||||
|
fluxKeywordSwitch:z.B. "fix" (anderes Modell als Default)
|
||||||
|
"""
|
||||||
|
default_model = (flux_config or {}).get("fluxDefaultModel", "dev")
|
||||||
|
kw_raw = (flux_config or {}).get("fluxKeywordRaw", "flux")
|
||||||
|
kw_switch = (flux_config or {}).get("fluxKeywordSwitch", "fix")
|
||||||
|
other_model = "schnell" if default_model == "dev" else "dev"
|
||||||
|
lines = [
|
||||||
|
"## FLUX Bildgenerierung",
|
||||||
|
f"- Default-Modell: `{default_model}` (alternativ: `{other_model}`).",
|
||||||
|
f"- Raw-Keyword: `{kw_raw}` — wenn Stefans Nachricht damit beginnt "
|
||||||
|
f"oder das Wort als ersten echten Wortteil enthaelt, ruf "
|
||||||
|
f"`flux_generate(..., raw=true)` und leite seinen Text 1:1 als prompt "
|
||||||
|
f"durch. KEIN Uebersetzen, KEIN Beautify, KEINE Stil-Adds.",
|
||||||
|
f"- Switch-Keyword: `{kw_switch}` — taucht's in der Nachricht auf, "
|
||||||
|
f"setze `model=\"{other_model}\"` (das ANDERE Modell als das Default).",
|
||||||
|
"- Natuerliche Sprache funktioniert auch: 'mal eben fix' / 'schnell' → schnell, "
|
||||||
|
"'in hoher Qualitaet' / 'detailliert' → dev.",
|
||||||
|
"- Whisper-Erkennung des Raw-Keywords ist nicht perfekt — wenn Stefans "
|
||||||
|
"Sprachnachricht z.B. mit 'fluks', 'flocks', 'fluxx' anfaengt, behandle "
|
||||||
|
"das auch als Raw-Keyword.",
|
||||||
|
]
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
def build_system_prompt(
|
def build_system_prompt(
|
||||||
pinned: List[MemoryPoint],
|
pinned: List[MemoryPoint],
|
||||||
cold: List[MemoryPoint] | None = None,
|
cold: List[MemoryPoint] | None = None,
|
||||||
@@ -247,8 +278,9 @@ def build_system_prompt(
|
|||||||
triggers: List[dict] | None = None,
|
triggers: List[dict] | None = None,
|
||||||
condition_vars: List[dict] | None = None,
|
condition_vars: List[dict] | None = None,
|
||||||
condition_funcs: List[dict] | None = None,
|
condition_funcs: List[dict] | None = None,
|
||||||
|
flux_config: dict | None = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Kompletter System-Prompt: Hot + Cold + Skills + Triggers."""
|
"""Kompletter System-Prompt: Hot + Cold + Skills + Triggers + FLUX."""
|
||||||
parts = [build_hot_memory_section(pinned), "", build_time_section()]
|
parts = [build_hot_memory_section(pinned), "", build_time_section()]
|
||||||
if skills:
|
if skills:
|
||||||
parts.append("")
|
parts.append("")
|
||||||
@@ -256,6 +288,9 @@ def build_system_prompt(
|
|||||||
if condition_vars:
|
if condition_vars:
|
||||||
parts.append("")
|
parts.append("")
|
||||||
parts.append(build_triggers_section(triggers or [], condition_vars, condition_funcs))
|
parts.append(build_triggers_section(triggers or [], condition_vars, condition_funcs))
|
||||||
|
if flux_config is not None:
|
||||||
|
parts.append("")
|
||||||
|
parts.append(build_flux_section(flux_config))
|
||||||
if cold:
|
if cold:
|
||||||
parts.append("")
|
parts.append("")
|
||||||
parts.append(build_cold_memory_section(cold))
|
parts.append(build_cold_memory_section(cold))
|
||||||
|
|||||||
+227
-5
@@ -487,6 +487,7 @@ class ARIABridge:
|
|||||||
self.tts_enabled = True
|
self.tts_enabled = True
|
||||||
self.xtts_voice = ""
|
self.xtts_voice = ""
|
||||||
self._f5tts_config: dict = {}
|
self._f5tts_config: dict = {}
|
||||||
|
self._flux_config: dict = {}
|
||||||
vc: dict = {}
|
vc: dict = {}
|
||||||
# Gespeicherte Voice-Config laden
|
# Gespeicherte Voice-Config laden
|
||||||
try:
|
try:
|
||||||
@@ -503,9 +504,14 @@ class ARIABridge:
|
|||||||
"f5ttsCfgStrength", "f5ttsNfeStep"):
|
"f5ttsCfgStrength", "f5ttsNfeStep"):
|
||||||
if k in vc:
|
if k in vc:
|
||||||
self._f5tts_config[k] = vc[k]
|
self._f5tts_config[k] = vc[k]
|
||||||
logger.info("Voice-Config geladen: tts=%s voice=%s f5tts=%s",
|
# FLUX-Felder (Default-Modell + Keywords) gleicher Mechanismus
|
||||||
|
for k in ("fluxDefaultModel", "fluxKeywordRaw", "fluxKeywordSwitch", "huggingfaceToken"):
|
||||||
|
if k in vc:
|
||||||
|
self._flux_config[k] = vc[k]
|
||||||
|
logger.info("Voice-Config geladen: tts=%s voice=%s f5tts=%s flux=%s",
|
||||||
self.tts_enabled, self.xtts_voice or "default",
|
self.tts_enabled, self.xtts_voice or "default",
|
||||||
self._f5tts_config or "defaults")
|
self._f5tts_config or "defaults",
|
||||||
|
self._flux_config or "defaults")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning("Voice-Config laden fehlgeschlagen: %s", e)
|
logger.warning("Voice-Config laden fehlgeschlagen: %s", e)
|
||||||
# Whisper-Modell: Config hat Vorrang, dann env/Default (medium)
|
# Whisper-Modell: Config hat Vorrang, dann env/Default (medium)
|
||||||
@@ -541,6 +547,12 @@ class ARIABridge:
|
|||||||
# Beeinflusst das Timeout fuer stt_request — bei "loading" warten wir laenger,
|
# Beeinflusst das Timeout fuer stt_request — bei "loading" warten wir laenger,
|
||||||
# weil das Modell beim ersten Request noch ~1-2 Min runtergeladen werden kann.
|
# weil das Modell beim ersten Request noch ~1-2 Min runtergeladen werden kann.
|
||||||
self._remote_stt_ready: bool = False
|
self._remote_stt_ready: bool = False
|
||||||
|
# FLUX-Render-Requests die aktuell auf Antwort der flux-bridge (Gamebox) warten.
|
||||||
|
# requestId → Future mit dem flux_response-Payload (oder None bei Fehler).
|
||||||
|
self._pending_flux: dict[str, asyncio.Future] = {}
|
||||||
|
# flux-bridge service_status: True wenn ready. Render-Timeouts werden
|
||||||
|
# bei 'loading' deutlich grosszuegiger gesetzt (Modell-Download ~24 GB).
|
||||||
|
self._remote_flux_ready: bool = False
|
||||||
# User-Message-Counter fuer Auto-Compact. Bei zu langer Konversation
|
# User-Message-Counter fuer Auto-Compact. Bei zu langer Konversation
|
||||||
# sprengt die argv-Liste beim Claude-Subprocess-Spawn (E2BIG). Bei
|
# sprengt die argv-Liste beim Claude-Subprocess-Spawn (E2BIG). Bei
|
||||||
# COMPACT_AFTER erreicht → Sessions reset + Container restart.
|
# COMPACT_AFTER erreicht → Sessions reset + Container restart.
|
||||||
@@ -1232,6 +1244,7 @@ class ARIABridge:
|
|||||||
"whisperModel": self.stt_engine.model_size,
|
"whisperModel": self.stt_engine.model_size,
|
||||||
}
|
}
|
||||||
payload.update(getattr(self, "_f5tts_config", {}) or {})
|
payload.update(getattr(self, "_f5tts_config", {}) or {})
|
||||||
|
payload.update(getattr(self, "_flux_config", {}) or {})
|
||||||
await self._send_to_rvs({
|
await self._send_to_rvs({
|
||||||
"type": "config",
|
"type": "config",
|
||||||
"payload": payload,
|
"payload": payload,
|
||||||
@@ -1478,8 +1491,11 @@ class ARIABridge:
|
|||||||
try:
|
try:
|
||||||
url = f"{current_url}?token={self.rvs_token}"
|
url = f"{current_url}?token={self.rvs_token}"
|
||||||
logger.info("[rvs] Verbinde: %s", current_url)
|
logger.info("[rvs] Verbinde: %s", current_url)
|
||||||
# max_size=50MB (siehe core-Connect oben — gleicher Grund).
|
# max_size=100MB synchron zum RVS-Server (siehe rvs/server.js).
|
||||||
async with websockets.connect(url, max_size=50 * 1024 * 1024) as ws:
|
# File-Re-Download fuer Anhaenge braucht Platz fuer base64-
|
||||||
|
# inflate (~1.33×). Groessere Files lehnt der file_request-
|
||||||
|
# Handler proaktiv ab bevor's zur 1009-Disconnection kommt.
|
||||||
|
async with websockets.connect(url, max_size=100 * 1024 * 1024) as ws:
|
||||||
self.ws_rvs = ws
|
self.ws_rvs = ws
|
||||||
retry_delay = 2
|
retry_delay = 2
|
||||||
logger.info("[rvs] Verbunden — warte auf App-Nachrichten")
|
logger.info("[rvs] Verbunden — warte auf App-Nachrichten")
|
||||||
@@ -1767,6 +1783,15 @@ class ARIABridge:
|
|||||||
self._f5tts_config = {}
|
self._f5tts_config = {}
|
||||||
self._f5tts_config[k] = payload[k]
|
self._f5tts_config[k] = payload[k]
|
||||||
changed = True
|
changed = True
|
||||||
|
# FLUX-Felder: gleiche Logik wie F5-TTS. flux-bridge applied
|
||||||
|
# fluxDefaultModel selbst (Pipeline-Swap). Keywords nutzt Brain
|
||||||
|
# via /shared/config/voice_config.json.
|
||||||
|
for k in ("fluxDefaultModel", "fluxKeywordRaw", "fluxKeywordSwitch", "huggingfaceToken"):
|
||||||
|
if k in payload:
|
||||||
|
if not hasattr(self, "_flux_config"):
|
||||||
|
self._flux_config = {}
|
||||||
|
self._flux_config[k] = payload[k]
|
||||||
|
changed = True
|
||||||
# Persistent speichern in Shared Volume
|
# Persistent speichern in Shared Volume
|
||||||
if changed:
|
if changed:
|
||||||
try:
|
try:
|
||||||
@@ -1777,6 +1802,7 @@ class ARIABridge:
|
|||||||
"whisperModel": self.stt_engine.model_size,
|
"whisperModel": self.stt_engine.model_size,
|
||||||
}
|
}
|
||||||
config_data.update(getattr(self, "_f5tts_config", {}))
|
config_data.update(getattr(self, "_f5tts_config", {}))
|
||||||
|
config_data.update(getattr(self, "_flux_config", {}))
|
||||||
with open("/shared/config/voice_config.json", "w") as f:
|
with open("/shared/config/voice_config.json", "w") as f:
|
||||||
json.dump(config_data, f, indent=2)
|
json.dump(config_data, f, indent=2)
|
||||||
logger.info("[rvs] Voice-Config gespeichert: %s", config_data)
|
logger.info("[rvs] Voice-Config gespeichert: %s", config_data)
|
||||||
@@ -2204,6 +2230,33 @@ class ARIABridge:
|
|||||||
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
||||||
})
|
})
|
||||||
return
|
return
|
||||||
|
# Groessen-Check VOR base64-Encode + Send. Sonst zerreisst's bei
|
||||||
|
# grossen Files (>~70 MB binaer) die WebSocket-Verbindung mit
|
||||||
|
# Code 1009 (message too big) — RVS-Server droppt, Bridge crasht
|
||||||
|
# im cleanup (websockets-Lib-Bug). Limit deckt typische Videos
|
||||||
|
# und Bilder ab; alles drueber soll der User per SSH abholen.
|
||||||
|
FILE_MAX_BYTES = 70 * 1024 * 1024
|
||||||
|
try:
|
||||||
|
file_size = os.path.getsize(server_path)
|
||||||
|
except OSError as exc:
|
||||||
|
logger.warning("[rvs] getsize fehlgeschlagen: %s", exc)
|
||||||
|
file_size = 0
|
||||||
|
if file_size > FILE_MAX_BYTES:
|
||||||
|
logger.warning("[rvs] Re-Download abgelehnt: %s zu gross (%dMB > %dMB)",
|
||||||
|
server_path, file_size // (1024 * 1024),
|
||||||
|
FILE_MAX_BYTES // (1024 * 1024))
|
||||||
|
await self._send_to_rvs({
|
||||||
|
"type": "file_response",
|
||||||
|
"payload": {
|
||||||
|
"requestId": req_id,
|
||||||
|
"serverPath": server_path,
|
||||||
|
"name": os.path.basename(server_path),
|
||||||
|
"error": f"Datei zu gross fuer Transfer ({file_size // (1024 * 1024)} MB, Limit {FILE_MAX_BYTES // (1024 * 1024)} MB)",
|
||||||
|
"sizeBytes": file_size,
|
||||||
|
},
|
||||||
|
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
||||||
|
})
|
||||||
|
return
|
||||||
with open(server_path, "rb") as f:
|
with open(server_path, "rb") as f:
|
||||||
file_b64 = base64.b64encode(f.read()).decode("ascii")
|
file_b64 = base64.b64encode(f.read()).decode("ascii")
|
||||||
mime, _ = mimetypes.guess_type(server_path)
|
mime, _ = mimetypes.guess_type(server_path)
|
||||||
@@ -2279,8 +2332,36 @@ class ARIABridge:
|
|||||||
future.set_result(text)
|
future.set_result(text)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
elif msg_type == "flux_response":
|
||||||
|
# Antwort der flux-bridge auf unseren flux_request. Erste Nachricht
|
||||||
|
# mit state='rendering' ist nur Progress-Ping — die echte Antwort
|
||||||
|
# kommt mit state='done' (oder error).
|
||||||
|
request_id = payload.get("requestId", "")
|
||||||
|
future = self._pending_flux.get(request_id)
|
||||||
|
if future is None or future.done():
|
||||||
|
return
|
||||||
|
error = payload.get("error", "")
|
||||||
|
if error:
|
||||||
|
logger.warning("[rvs] flux_response Fehler: %s", error)
|
||||||
|
future.set_result({"error": error})
|
||||||
|
return
|
||||||
|
state = payload.get("state", "")
|
||||||
|
if state == "rendering":
|
||||||
|
# Nur Progress-Info, future bleibt offen
|
||||||
|
logger.info("[rvs] flux: rendering %dx%d steps=%d ...",
|
||||||
|
payload.get("width", 0), payload.get("height", 0),
|
||||||
|
payload.get("steps", 0))
|
||||||
|
return
|
||||||
|
# state == "done" oder fehlt → final
|
||||||
|
logger.info("[rvs] flux fertig: %dx%d, %.1fs, %d KB",
|
||||||
|
payload.get("width", 0), payload.get("height", 0),
|
||||||
|
payload.get("renderSeconds", 0),
|
||||||
|
(payload.get("sizeBytes", 0)) // 1024)
|
||||||
|
future.set_result(payload)
|
||||||
|
return
|
||||||
|
|
||||||
elif msg_type == "service_status":
|
elif msg_type == "service_status":
|
||||||
# Gamebox-Bridges (whisper / f5tts) melden ihren Lade-Status.
|
# Gamebox-Bridges (whisper / f5tts / flux) melden ihren Lade-Status.
|
||||||
# Wir nutzen das fuer den dynamischen STT-Timeout: solange whisper
|
# Wir nutzen das fuer den dynamischen STT-Timeout: solange whisper
|
||||||
# im 'loading' steckt, geben wir der Bridge mehr Zeit (Modell-Download
|
# im 'loading' steckt, geben wir der Bridge mehr Zeit (Modell-Download
|
||||||
# kann 1-2 Min dauern), statt nach 45s lokal zu fallbacken.
|
# kann 1-2 Min dauern), statt nach 45s lokal zu fallbacken.
|
||||||
@@ -2291,6 +2372,11 @@ class ARIABridge:
|
|||||||
self._remote_stt_ready = (state == "ready")
|
self._remote_stt_ready = (state == "ready")
|
||||||
if self._remote_stt_ready != was_ready:
|
if self._remote_stt_ready != was_ready:
|
||||||
logger.info("[rvs] whisper-bridge -> %s", state)
|
logger.info("[rvs] whisper-bridge -> %s", state)
|
||||||
|
elif svc == "flux":
|
||||||
|
was_ready = self._remote_flux_ready
|
||||||
|
self._remote_flux_ready = (state == "ready")
|
||||||
|
if self._remote_flux_ready != was_ready:
|
||||||
|
logger.info("[rvs] flux-bridge -> %s", state)
|
||||||
return
|
return
|
||||||
|
|
||||||
elif msg_type == "config_request":
|
elif msg_type == "config_request":
|
||||||
@@ -2475,6 +2561,105 @@ class ARIABridge:
|
|||||||
except OSError:
|
except OSError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# ── Flux-Roundtrip: Brain → Bridge → RVS → flux-bridge → zurueck ──
|
||||||
|
# FLUX-Render auf der 3060 dauert je nach Aufloesung/Steps 20-90 s.
|
||||||
|
# Beim 1. Render frisch nach Container-Start muss zudem das ~24 GB
|
||||||
|
# Modell von HF geladen werden — daher der grosse Loading-Timeout.
|
||||||
|
_FLUX_TIMEOUT_READY_S = 240.0 # 4 min nach erstem Render
|
||||||
|
_FLUX_TIMEOUT_LOADING_S = 900.0 # 15 min beim allerersten Mal (Modell-Download)
|
||||||
|
|
||||||
|
async def _flux_generate(self, prompt: str, width: int, height: int,
|
||||||
|
steps: Optional[int], guidance: Optional[float],
|
||||||
|
seed: Optional[int], model: Optional[str] = None) -> dict:
|
||||||
|
"""Schickt einen flux_request an die flux-bridge, wartet auf das fertige
|
||||||
|
PNG, speichert es nach /shared/uploads/aria_generated_<ts>.png.
|
||||||
|
|
||||||
|
Rueckgabe:
|
||||||
|
{ok: True, path, sizeBytes, width, height, steps, guidance, seed, model, renderSeconds}
|
||||||
|
{ok: False, error}
|
||||||
|
"""
|
||||||
|
if self.ws_rvs is None:
|
||||||
|
return {"ok": False, "error": "RVS-Verbindung nicht aktiv"}
|
||||||
|
|
||||||
|
request_id = str(uuid.uuid4())
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
future: asyncio.Future = loop.create_future()
|
||||||
|
self._pending_flux[request_id] = future
|
||||||
|
|
||||||
|
try:
|
||||||
|
req_payload: dict = {"requestId": request_id, "prompt": prompt,
|
||||||
|
"width": width, "height": height}
|
||||||
|
if steps is not None:
|
||||||
|
req_payload["steps"] = steps
|
||||||
|
if guidance is not None:
|
||||||
|
req_payload["guidance_scale"] = guidance
|
||||||
|
if seed is not None:
|
||||||
|
req_payload["seed"] = seed
|
||||||
|
if model:
|
||||||
|
# 'dev' | 'schnell' — flux-bridge mappt das auf HF-IDs.
|
||||||
|
# Ohne Angabe nimmt die flux-bridge ihren konfigurierten Default.
|
||||||
|
req_payload["model"] = model
|
||||||
|
|
||||||
|
logger.info("[rvs] flux_request → flux-bridge (id=%s, %dx%d, steps=%s, model=%s, prompt=%r)",
|
||||||
|
request_id[:8], width, height, steps, model or "default", prompt[:60])
|
||||||
|
ok = await self._send_to_rvs({
|
||||||
|
"type": "flux_request",
|
||||||
|
"payload": req_payload,
|
||||||
|
"timestamp": int(time.time() * 1000),
|
||||||
|
})
|
||||||
|
if not ok:
|
||||||
|
return {"ok": False, "error": "flux_request konnte nicht gesendet werden"}
|
||||||
|
|
||||||
|
timeout_s = (self._FLUX_TIMEOUT_READY_S
|
||||||
|
if self._remote_flux_ready
|
||||||
|
else self._FLUX_TIMEOUT_LOADING_S)
|
||||||
|
result = await asyncio.wait_for(future, timeout=timeout_s)
|
||||||
|
|
||||||
|
if not isinstance(result, dict) or result.get("error"):
|
||||||
|
err = (result or {}).get("error") if isinstance(result, dict) else "leeres Resultat"
|
||||||
|
return {"ok": False, "error": err or "flux-bridge Fehler"}
|
||||||
|
|
||||||
|
b64 = result.get("base64") or ""
|
||||||
|
if not b64:
|
||||||
|
return {"ok": False, "error": "flux_response ohne Bilddaten"}
|
||||||
|
|
||||||
|
try:
|
||||||
|
png_bytes = base64.b64decode(b64)
|
||||||
|
except Exception as e:
|
||||||
|
return {"ok": False, "error": f"PNG-Decode fehlgeschlagen: {e}"}
|
||||||
|
|
||||||
|
SHARED_DIR = "/shared/uploads"
|
||||||
|
os.makedirs(SHARED_DIR, exist_ok=True)
|
||||||
|
ts_ms = int(time.time() * 1000)
|
||||||
|
file_name = f"aria_generated_{ts_ms}.png"
|
||||||
|
path = os.path.join(SHARED_DIR, file_name)
|
||||||
|
try:
|
||||||
|
with open(path, "wb") as f:
|
||||||
|
f.write(png_bytes)
|
||||||
|
except Exception as e:
|
||||||
|
return {"ok": False, "error": f"Speichern fehlgeschlagen: {e}"}
|
||||||
|
|
||||||
|
logger.info("[rvs] flux PNG gespeichert: %s (%d KB)", path, len(png_bytes) // 1024)
|
||||||
|
return {
|
||||||
|
"ok": True,
|
||||||
|
"path": path,
|
||||||
|
"sizeBytes": len(png_bytes),
|
||||||
|
"width": result.get("width", width),
|
||||||
|
"height": result.get("height", height),
|
||||||
|
"steps": result.get("steps"),
|
||||||
|
"guidance": result.get("guidance"),
|
||||||
|
"seed": result.get("seed"),
|
||||||
|
"model": result.get("model", ""),
|
||||||
|
"renderSeconds": result.get("renderSeconds", 0),
|
||||||
|
}
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
return {"ok": False, "error": f"Render-Timeout ({int(timeout_s)}s) — flux-bridge offline?"}
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception("[rvs] _flux_generate Fehler")
|
||||||
|
return {"ok": False, "error": str(e)[:200]}
|
||||||
|
finally:
|
||||||
|
self._pending_flux.pop(request_id, None)
|
||||||
|
|
||||||
async def _send_to_rvs(self, message: dict) -> bool:
|
async def _send_to_rvs(self, message: dict) -> bool:
|
||||||
"""Sendet eine Nachricht an die App (via RVS) mit Verbindungs-Check.
|
"""Sendet eine Nachricht an die App (via RVS) mit Verbindungs-Check.
|
||||||
|
|
||||||
@@ -2705,6 +2890,43 @@ class ARIABridge:
|
|||||||
# selbst wenn derselbe Name zweimal in Folge kommt.
|
# selbst wenn derselbe Name zweimal in Folge kommt.
|
||||||
asyncio.create_task(self._emit_activity("tool", tool, force=True))
|
asyncio.create_task(self._emit_activity("tool", tool, force=True))
|
||||||
await _send_response(writer, 200, {"ok": True})
|
await _send_response(writer, 200, {"ok": True})
|
||||||
|
elif method == "POST" and path == "/internal/flux-generate":
|
||||||
|
# Vom Brain (flux_generate-Tool) gefeuert. Wir routen den
|
||||||
|
# Render-Request via RVS an die flux-bridge (Gamebox),
|
||||||
|
# warten synchron auf die PNG-Antwort, speichern sie nach
|
||||||
|
# /shared/uploads/ und melden Pfad + Render-Stats zurueck.
|
||||||
|
# Brain referenziert das Bild dann mit [FILE:]-Marker in
|
||||||
|
# seiner Antwort, die Bridge broadcastet daraufhin
|
||||||
|
# automatisch ein file_from_aria-Event an App+Diagnostic.
|
||||||
|
try:
|
||||||
|
data = json.loads(body.decode("utf-8", "ignore"))
|
||||||
|
except Exception as exc:
|
||||||
|
await _send_response(writer, 400, {"error": f"bad json: {exc}"})
|
||||||
|
return
|
||||||
|
prompt = (data.get("prompt") or "").strip()
|
||||||
|
if not prompt:
|
||||||
|
await _send_response(writer, 400, {"error": "prompt erforderlich"})
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
width = int(data.get("width") or 1024)
|
||||||
|
height = int(data.get("height") or 1024)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
width, height = 1024, 1024
|
||||||
|
steps_raw = data.get("steps")
|
||||||
|
guidance_raw = data.get("guidance_scale")
|
||||||
|
seed_raw = data.get("seed")
|
||||||
|
steps = int(steps_raw) if isinstance(steps_raw, (int, float)) else None
|
||||||
|
guidance = float(guidance_raw) if isinstance(guidance_raw, (int, float)) else None
|
||||||
|
seed = int(seed_raw) if isinstance(seed_raw, (int, float)) else None
|
||||||
|
model_raw = data.get("model")
|
||||||
|
model = model_raw.strip() if isinstance(model_raw, str) and model_raw.strip() in ("dev", "schnell") else None
|
||||||
|
|
||||||
|
result = await self._flux_generate(
|
||||||
|
prompt=prompt, width=width, height=height,
|
||||||
|
steps=steps, guidance=guidance, seed=seed, model=model,
|
||||||
|
)
|
||||||
|
status = 200 if result.get("ok") else 502
|
||||||
|
await _send_response(writer, status, result)
|
||||||
elif method == "POST" and path == "/internal/delete-chat-message":
|
elif method == "POST" and path == "/internal/delete-chat-message":
|
||||||
try:
|
try:
|
||||||
data = json.loads(body.decode("utf-8", "ignore"))
|
data = json.loads(body.decode("utf-8", "ignore"))
|
||||||
|
|||||||
+88
-37
@@ -320,8 +320,7 @@
|
|||||||
<input type="file" id="diag-file-input" multiple accept="image/*,application/pdf,.doc,.docx,.txt" style="display:none;" onchange="handleDiagFileSelect(this.files)">
|
<input type="file" id="diag-file-input" multiple accept="image/*,application/pdf,.doc,.docx,.txt" style="display:none;" onchange="handleDiagFileSelect(this.files)">
|
||||||
</label>
|
</label>
|
||||||
<textarea id="chat-input" placeholder="Nachricht an ARIA... (Enter sendet, Shift+Enter neue Zeile)" rows="2" onpaste="handleDiagPaste(event)" oninput="autoResizeTextarea(this)"></textarea>
|
<textarea id="chat-input" placeholder="Nachricht an ARIA... (Enter sendet, Shift+Enter neue Zeile)" rows="2" onpaste="handleDiagPaste(event)" oninput="autoResizeTextarea(this)"></textarea>
|
||||||
<button class="btn" id="btn-gw" onclick="testGateway()">Gateway senden</button>
|
<button class="btn" id="btn-rvs" onclick="testRVS()">Senden</button>
|
||||||
<button class="btn" id="btn-rvs" onclick="testRVS()">Via RVS senden</button>
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -338,8 +337,7 @@
|
|||||||
</div>
|
</div>
|
||||||
<div class="input-row" style="margin-top:8px;">
|
<div class="input-row" style="margin-top:8px;">
|
||||||
<textarea id="chat-input-fs" placeholder="Nachricht an ARIA... (Enter sendet, Shift+Enter neue Zeile)" rows="2" oninput="autoResizeTextarea(this)"></textarea>
|
<textarea id="chat-input-fs" placeholder="Nachricht an ARIA... (Enter sendet, Shift+Enter neue Zeile)" rows="2" oninput="autoResizeTextarea(this)"></textarea>
|
||||||
<button class="btn" onclick="testGatewayFS()">Gateway senden</button>
|
<button class="btn" onclick="testRVSFS()">Senden</button>
|
||||||
<button class="btn" onclick="testRVSFS()">Via RVS senden</button>
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@@ -367,7 +365,6 @@
|
|||||||
<div style="padding: 0 12px;">
|
<div style="padding: 0 12px;">
|
||||||
<div class="tab-bar">
|
<div class="tab-bar">
|
||||||
<button class="tab-btn active" data-tab="all" onclick="switchTab('all')">Alle <span class="tab-count" id="count-all">0</span></button>
|
<button class="tab-btn active" data-tab="all" onclick="switchTab('all')">Alle <span class="tab-count" id="count-all">0</span></button>
|
||||||
<button class="tab-btn" data-tab="gateway" onclick="switchTab('gateway')">Gateway <span class="tab-count" id="count-gateway">0</span></button>
|
|
||||||
<button class="tab-btn" data-tab="rvs" onclick="switchTab('rvs')">RVS <span class="tab-count" id="count-rvs">0</span></button>
|
<button class="tab-btn" data-tab="rvs" onclick="switchTab('rvs')">RVS <span class="tab-count" id="count-rvs">0</span></button>
|
||||||
<button class="tab-btn" data-tab="proxy" onclick="switchTab('proxy')">Proxy <span class="tab-count" id="count-proxy">0</span></button>
|
<button class="tab-btn" data-tab="proxy" onclick="switchTab('proxy')">Proxy <span class="tab-count" id="count-proxy">0</span></button>
|
||||||
<button class="tab-btn" data-tab="bridge" onclick="switchTab('bridge')">Bridge <span class="tab-count" id="count-bridge">0</span></button>
|
<button class="tab-btn" data-tab="bridge" onclick="switchTab('bridge')">Bridge <span class="tab-count" id="count-bridge">0</span></button>
|
||||||
@@ -386,7 +383,6 @@
|
|||||||
</span>
|
</span>
|
||||||
</div>
|
</div>
|
||||||
<div class="log-box" id="log-all"></div>
|
<div class="log-box" id="log-all"></div>
|
||||||
<div class="log-box hidden" id="log-gateway"></div>
|
|
||||||
<div class="log-box hidden" id="log-rvs"></div>
|
<div class="log-box hidden" id="log-rvs"></div>
|
||||||
<div class="log-box hidden" id="log-proxy"></div>
|
<div class="log-box hidden" id="log-proxy"></div>
|
||||||
<div class="log-box hidden" id="log-bridge"></div>
|
<div class="log-box hidden" id="log-bridge"></div>
|
||||||
@@ -613,6 +609,66 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<!-- FLUX Bildgenerierung -->
|
||||||
|
<div class="settings-section">
|
||||||
|
<h2>FLUX Bildgenerierung</h2>
|
||||||
|
<div style="font-size:11px;color:#8888AA;margin-bottom:8px;">
|
||||||
|
Steuerung der Image-Generation (flux-bridge auf der Gamebox).
|
||||||
|
Default-Modell wird via RVS gepusht — Wechsel triggert Pipeline-Reload (15-30s
|
||||||
|
aus HF-Cache, mehrere Minuten beim Erst-Download). Keywords nutzt ARIAs Brain
|
||||||
|
im System-Prompt.
|
||||||
|
</div>
|
||||||
|
<div class="card" style="max-width:500px;">
|
||||||
|
<div style="display:flex;flex-direction:column;gap:8px;">
|
||||||
|
|
||||||
|
<label style="color:#8888AA;font-size:12px;">Default-Modell:</label>
|
||||||
|
<select id="diag-flux-default-model" onchange="sendVoiceConfig()"
|
||||||
|
style="background:#1E1E2E;color:#fff;border:1px solid #2A2A3E;border-radius:6px;padding:6px 10px;font-size:13px;">
|
||||||
|
<option value="dev">FLUX.1-dev (hoechste Qualitaet, 20-90s)</option>
|
||||||
|
<option value="schnell">FLUX.1-schnell (4-step, 5-15s)</option>
|
||||||
|
</select>
|
||||||
|
|
||||||
|
<label style="color:#8888AA;font-size:12px;">
|
||||||
|
Raw-Keyword — Pipe-Modus, ARIA leitet den Prompt 1:1 durch (kein Rewriting):
|
||||||
|
</label>
|
||||||
|
<input type="text" id="diag-flux-keyword-raw"
|
||||||
|
placeholder="flux"
|
||||||
|
style="background:#1E1E2E;color:#fff;border:1px solid #2A2A3E;border-radius:6px;padding:6px 10px;font-size:13px;">
|
||||||
|
|
||||||
|
<label style="color:#8888AA;font-size:12px;">
|
||||||
|
Switch-Keyword — zwingt das ANDERE Modell als das Default fuer diesen Request:
|
||||||
|
</label>
|
||||||
|
<input type="text" id="diag-flux-keyword-switch"
|
||||||
|
placeholder="fix"
|
||||||
|
style="background:#1E1E2E;color:#fff;border:1px solid #2A2A3E;border-radius:6px;padding:6px 10px;font-size:13px;">
|
||||||
|
|
||||||
|
<label style="color:#8888AA;font-size:12px;margin-top:4px;">
|
||||||
|
HuggingFace-Token (nur fuer FLUX.1-dev — gated Modell, Lizenz-Bestaetigung).
|
||||||
|
Wird per RVS an die flux-bridge gepusht. Leer = kein Token (Schnell-Modell laeuft auch ohne).
|
||||||
|
</label>
|
||||||
|
<div style="display:flex;gap:4px;">
|
||||||
|
<input type="password" id="diag-flux-hf-token"
|
||||||
|
placeholder="hf_..."
|
||||||
|
style="flex:1;min-width:0;box-sizing:border-box;background:#1E1E2E;color:#fff;border:1px solid #2A2A3E;border-radius:6px;padding:6px 10px;font-size:13px;font-family:monospace;">
|
||||||
|
<button type="button" class="btn secondary" onclick="toggleSecret('diag-flux-hf-token', this)" style="padding:4px 10px;flex-shrink:0;" title="Anzeigen/Verbergen">👁</button>
|
||||||
|
</div>
|
||||||
|
<div style="color:#666680;font-size:10px;">
|
||||||
|
Erst auf <a href="https://huggingface.co/black-forest-labs/FLUX.1-dev" target="_blank" style="color:#0096FF;">huggingface.co/.../FLUX.1-dev</a> "Agree" klicken,
|
||||||
|
dann unter <a href="https://huggingface.co/settings/tokens" target="_blank" style="color:#0096FF;">Settings → Tokens</a> einen Read-Token erzeugen.
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div style="display:flex;gap:8px;align-items:center;margin-top:6px;">
|
||||||
|
<button class="btn primary" onclick="sendVoiceConfig()" style="padding:6px 14px;font-size:12px;">
|
||||||
|
Anwenden
|
||||||
|
</button>
|
||||||
|
<div style="color:#666680;font-size:10px;">
|
||||||
|
Beide Modelle = volle Qualitaet, schnell ist nur ein 4-Step-Distillat (Apache-2.0).
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<!-- Whisper (STT) -->
|
<!-- Whisper (STT) -->
|
||||||
<div class="settings-section">
|
<div class="settings-section">
|
||||||
<h2>Whisper (Spracherkennung)</h2>
|
<h2>Whisper (Spracherkennung)</h2>
|
||||||
@@ -1118,13 +1174,12 @@
|
|||||||
const btnScroll = document.getElementById('btn-scroll');
|
const btnScroll = document.getElementById('btn-scroll');
|
||||||
let ws;
|
let ws;
|
||||||
let activeTab = 'all';
|
let activeTab = 'all';
|
||||||
const DOCKER_TABS = ['gateway', 'proxy', 'bridge'];
|
const DOCKER_TABS = ['proxy', 'bridge'];
|
||||||
const autoScroll = { all: true, gateway: true, rvs: true, proxy: true, bridge: true, server: true, trace: true };
|
const autoScroll = { all: true, rvs: true, proxy: true, bridge: true, server: true, trace: true };
|
||||||
const logCounts = { all: 0, gateway: 0, rvs: 0, proxy: 0, bridge: 0, server: 0, trace: 0 };
|
const logCounts = { all: 0, rvs: 0, proxy: 0, bridge: 0, server: 0, trace: 0 };
|
||||||
|
|
||||||
const logBoxes = {
|
const logBoxes = {
|
||||||
all: document.getElementById('log-all'),
|
all: document.getElementById('log-all'),
|
||||||
gateway: document.getElementById('log-gateway'),
|
|
||||||
rvs: document.getElementById('log-rvs'),
|
rvs: document.getElementById('log-rvs'),
|
||||||
proxy: document.getElementById('log-proxy'),
|
proxy: document.getElementById('log-proxy'),
|
||||||
bridge: document.getElementById('log-bridge'),
|
bridge: document.getElementById('log-bridge'),
|
||||||
@@ -1178,7 +1233,9 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
function mapSourceToTab(source) {
|
function mapSourceToTab(source) {
|
||||||
if (source === 'gateway') return 'gateway';
|
// Gateway-Source: deprecated — falls noch was reinkommt zeigen wir's
|
||||||
|
// einfach unter 'server'. Spart einen toten Tab.
|
||||||
|
if (source === 'gateway') return 'server';
|
||||||
if (source === 'rvs') return 'rvs';
|
if (source === 'rvs') return 'rvs';
|
||||||
if (source === 'proxy') return 'proxy';
|
if (source === 'proxy') return 'proxy';
|
||||||
if (source === 'bridge') return 'bridge';
|
if (source === 'bridge') return 'bridge';
|
||||||
@@ -1342,6 +1399,11 @@
|
|||||||
setIfPresent('diag-f5tts-vocab', msg.f5ttsVocabFile);
|
setIfPresent('diag-f5tts-vocab', msg.f5ttsVocabFile);
|
||||||
setIfPresent('diag-f5tts-cfg', msg.f5ttsCfgStrength);
|
setIfPresent('diag-f5tts-cfg', msg.f5ttsCfgStrength);
|
||||||
setIfPresent('diag-f5tts-nfe', msg.f5ttsNfeStep);
|
setIfPresent('diag-f5tts-nfe', msg.f5ttsNfeStep);
|
||||||
|
// FLUX-Settings wiederherstellen
|
||||||
|
setIfPresent('diag-flux-default-model', msg.fluxDefaultModel);
|
||||||
|
setIfPresent('diag-flux-keyword-raw', msg.fluxKeywordRaw);
|
||||||
|
setIfPresent('diag-flux-keyword-switch', msg.fluxKeywordSwitch);
|
||||||
|
setIfPresent('diag-flux-hf-token', msg.huggingfaceToken);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1620,18 +1682,6 @@
|
|||||||
renderDiagPending();
|
renderDiagPending();
|
||||||
}
|
}
|
||||||
|
|
||||||
function testGateway() {
|
|
||||||
const input = document.getElementById('chat-input');
|
|
||||||
const text = input.value.trim();
|
|
||||||
if (!text && diagPendingFiles.length === 0) return;
|
|
||||||
if (diagPendingFiles.length > 0) sendDiagAttachments();
|
|
||||||
if (text) {
|
|
||||||
addChat('sent', text, 'Gateway direkt');
|
|
||||||
send({ action: 'test_gateway', text });
|
|
||||||
}
|
|
||||||
input.value = '';
|
|
||||||
}
|
|
||||||
|
|
||||||
function testRVS() {
|
function testRVS() {
|
||||||
const input = document.getElementById('chat-input');
|
const input = document.getElementById('chat-input');
|
||||||
const text = input.value.trim();
|
const text = input.value.trim();
|
||||||
@@ -1771,7 +1821,6 @@
|
|||||||
if (proxy.models && proxy.models.length) showProxyModels(proxy.models);
|
if (proxy.models && proxy.models.length) showProxyModels(proxy.models);
|
||||||
|
|
||||||
// Buttons
|
// Buttons
|
||||||
document.getElementById('btn-gw').disabled = gw.status !== 'connected';
|
|
||||||
document.getElementById('btn-rvs').disabled = rvs.status !== 'connected';
|
document.getElementById('btn-rvs').disabled = rvs.status !== 'connected';
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2094,14 +2143,6 @@
|
|||||||
modal.style.display = 'none';
|
modal.style.display = 'none';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
function testGatewayFS() {
|
|
||||||
const input = document.getElementById('chat-input-fs');
|
|
||||||
const text = input.value.trim();
|
|
||||||
if (!text) return;
|
|
||||||
addChat('sent', text, 'Gateway direkt');
|
|
||||||
send({ action: 'test_gateway', text });
|
|
||||||
input.value = '';
|
|
||||||
}
|
|
||||||
function testRVSFS() {
|
function testRVSFS() {
|
||||||
const input = document.getElementById('chat-input-fs');
|
const input = document.getElementById('chat-input-fs');
|
||||||
const text = input.value.trim();
|
const text = input.value.trim();
|
||||||
@@ -2147,18 +2188,23 @@
|
|||||||
// Liste neu aufbauen
|
// Liste neu aufbauen
|
||||||
list.innerHTML = '';
|
list.innerHTML = '';
|
||||||
let anyLoading = false, anyError = false;
|
let anyLoading = false, anyError = false;
|
||||||
const labels = { f5tts: 'F5-TTS', whisper: 'Whisper STT' };
|
const labels = { f5tts: 'F5-TTS', whisper: 'Whisper STT', flux: 'FLUX Image-Gen' };
|
||||||
for (const [s, info] of Object.entries(_serviceState)) {
|
for (const [s, info] of Object.entries(_serviceState)) {
|
||||||
const row = document.createElement('div');
|
const row = document.createElement('div');
|
||||||
row.style.cssText = 'display:flex;align-items:center;gap:6px;';
|
row.style.cssText = 'display:flex;align-items:center;gap:6px;';
|
||||||
let dot = '⚫', color = '#666680', text = '';
|
let dot = '⚫', color = '#666680', text = '';
|
||||||
if (info.state === 'loading') {
|
if (info.state === 'loading') {
|
||||||
dot = '⏳'; color = '#FFD60A'; anyLoading = true;
|
dot = info.downloading ? '⬇' : '⏳';
|
||||||
text = `${labels[s] || s}: laedt${info.model ? ' ' + info.model : ''}...`;
|
color = '#FFD60A'; anyLoading = true;
|
||||||
|
const action = info.downloading
|
||||||
|
? 'laedt erstmalig runter (mehrere GB, kann dauern)'
|
||||||
|
: 'laedt';
|
||||||
|
text = `${labels[s] || s}: ${action}${info.model ? ' ' + info.model : ''}...`;
|
||||||
} else if (info.state === 'ready') {
|
} else if (info.state === 'ready') {
|
||||||
dot = '✅'; color = '#34C759';
|
dot = info.freshlyDownloaded ? '🎉' : '✅'; color = '#34C759';
|
||||||
const sec = info.loadSeconds ? ` (${info.loadSeconds.toFixed(1)}s)` : '';
|
const sec = info.loadSeconds ? ` (${info.loadSeconds.toFixed(1)}s)` : '';
|
||||||
text = `${labels[s] || s}: bereit${info.model ? ' ' + info.model : ''}${sec}`;
|
const downloadedHint = info.freshlyDownloaded ? ' — Download fertig!' : '';
|
||||||
|
text = `${labels[s] || s}: bereit${info.model ? ' ' + info.model : ''}${sec}${downloadedHint}`;
|
||||||
} else if (info.state === 'error') {
|
} else if (info.state === 'error') {
|
||||||
dot = '❌'; color = '#FF3B30'; anyError = true;
|
dot = '❌'; color = '#FF3B30'; anyError = true;
|
||||||
text = `${labels[s] || s}: Fehler ${info.error || ''}`;
|
text = `${labels[s] || s}: Fehler ${info.error || ''}`;
|
||||||
@@ -2673,11 +2719,16 @@
|
|||||||
const f5ttsNfeRaw = document.getElementById('diag-f5tts-nfe')?.value || '';
|
const f5ttsNfeRaw = document.getElementById('diag-f5tts-nfe')?.value || '';
|
||||||
const f5ttsCfgStrength = f5ttsCfgRaw ? parseFloat(f5ttsCfgRaw) : undefined;
|
const f5ttsCfgStrength = f5ttsCfgRaw ? parseFloat(f5ttsCfgRaw) : undefined;
|
||||||
const f5ttsNfeStep = f5ttsNfeRaw ? parseInt(f5ttsNfeRaw, 10) : undefined;
|
const f5ttsNfeStep = f5ttsNfeRaw ? parseInt(f5ttsNfeRaw, 10) : undefined;
|
||||||
|
const fluxDefaultModel = document.getElementById('diag-flux-default-model')?.value || undefined;
|
||||||
|
const fluxKeywordRaw = document.getElementById('diag-flux-keyword-raw')?.value;
|
||||||
|
const fluxKeywordSwitch = document.getElementById('diag-flux-keyword-switch')?.value;
|
||||||
|
const huggingfaceToken = document.getElementById('diag-flux-hf-token')?.value;
|
||||||
send({
|
send({
|
||||||
action: 'send_voice_config',
|
action: 'send_voice_config',
|
||||||
ttsEnabled, xttsVoice, whisperModel,
|
ttsEnabled, xttsVoice, whisperModel,
|
||||||
f5ttsModel, f5ttsCkptFile, f5ttsVocabFile,
|
f5ttsModel, f5ttsCkptFile, f5ttsVocabFile,
|
||||||
f5ttsCfgStrength, f5ttsNfeStep,
|
f5ttsCfgStrength, f5ttsNfeStep,
|
||||||
|
fluxDefaultModel, fluxKeywordRaw, fluxKeywordSwitch, huggingfaceToken,
|
||||||
});
|
});
|
||||||
const statusEl = document.getElementById('voice-status');
|
const statusEl = document.getElementById('voice-status');
|
||||||
if (statusEl && xttsVoice) {
|
if (statusEl && xttsVoice) {
|
||||||
|
|||||||
+37
-15
@@ -492,9 +492,10 @@ function handleGatewayMessage(msg) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function sendToGateway(text, isTrace) {
|
function sendToGateway(text, isTrace) {
|
||||||
|
// OpenClaw-Gateway ist raus — Brain via Bridge via RVS ist die einzige
|
||||||
|
// Route. Wir loggen nichts mehr; alte Trace-Aufrufe schliessen wir clean.
|
||||||
if (!gatewayWs || gatewayWs.readyState !== WebSocket.OPEN) {
|
if (!gatewayWs || gatewayWs.readyState !== WebSocket.OPEN) {
|
||||||
log("error", "gateway", "Nicht verbunden — kann nicht senden");
|
if (isTrace) traceEnd(false, "Gateway deprecated — nutze RVS");
|
||||||
if (isTrace) traceEnd(false, "Gateway nicht verbunden");
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -757,22 +758,20 @@ function sendToRVS_raw(msgObj) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function sendToRVS(text, isTrace) {
|
function sendToRVS(text, isTrace) {
|
||||||
// Ueber Gateway senden (zuverlaessig) UND an RVS fuer App-Sichtbarkeit
|
// Brain-Pipeline: Diagnostic → RVS → Bridge → Brain (HTTP). OpenClaw-
|
||||||
// Die Bridge empfaengt RVS-Nachrichten von der App zuverlaessig,
|
// Gateway-Pfad ist abgeschaltet. Sender 'diagnostic' damit die Bridge
|
||||||
// aber die Diagnostic→RVS→Bridge Route hat Zombie-Probleme.
|
// den Text als User-Nachricht ans Brain weiterleitet und die App +
|
||||||
// Deshalb: Gateway fuer ARIA, RVS nur fuer App-Anzeige.
|
// Diagnostic die Bubble live spiegeln koennen.
|
||||||
|
if (!rvsWs || rvsWs.readyState !== WebSocket.OPEN) {
|
||||||
// 1. An Gateway senden (damit ARIA antwortet)
|
if (isTrace) traceEnd(false, "RVS nicht verbunden");
|
||||||
const gatewayOk = sendToGateway(text, isTrace);
|
return false;
|
||||||
|
}
|
||||||
// 2. An RVS senden (damit die App die Nachricht sieht)
|
|
||||||
sendToRVS_raw({
|
sendToRVS_raw({
|
||||||
type: "chat",
|
type: "chat",
|
||||||
payload: { text, sender: "diagnostic" },
|
payload: { text, sender: "diagnostic" },
|
||||||
timestamp: Date.now(),
|
timestamp: Date.now(),
|
||||||
});
|
});
|
||||||
|
return true;
|
||||||
return gatewayOk;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Claude Proxy Test ────────────────────────────────────
|
// ── Claude Proxy Test ────────────────────────────────────
|
||||||
@@ -1836,8 +1835,11 @@ wss.on("connection", (ws) => {
|
|||||||
const msg = JSON.parse(raw.toString());
|
const msg = JSON.parse(raw.toString());
|
||||||
|
|
||||||
if (msg.action === "test_gateway") {
|
if (msg.action === "test_gateway") {
|
||||||
traceStart("Gateway", msg.text || "aria lebst du noch?");
|
// Deprecated — Gateway-Pfad ist raus. Wir leiten an RVS um damit
|
||||||
sendToGateway(msg.text || "aria lebst du noch?", true);
|
// alte Browser-Sessions die noch den Button anzeigen nicht stumm
|
||||||
|
// ins Leere klicken. Neue Versionen kennen den Button nicht mehr.
|
||||||
|
traceStart("RVS", msg.text || "aria lebst du noch?");
|
||||||
|
sendToRVS(msg.text || "aria lebst du noch?", true);
|
||||||
} else if (msg.action === "test_rvs") {
|
} else if (msg.action === "test_rvs") {
|
||||||
traceStart("RVS", msg.text || "aria lebst du noch?");
|
traceStart("RVS", msg.text || "aria lebst du noch?");
|
||||||
sendToRVS(msg.text || "aria lebst du noch?", true);
|
sendToRVS(msg.text || "aria lebst du noch?", true);
|
||||||
@@ -1943,6 +1945,26 @@ wss.on("connection", (ws) => {
|
|||||||
if (msg.f5ttsNfeStep !== undefined && !isNaN(msg.f5ttsNfeStep)) {
|
if (msg.f5ttsNfeStep !== undefined && !isNaN(msg.f5ttsNfeStep)) {
|
||||||
voiceConfig.f5ttsNfeStep = msg.f5ttsNfeStep;
|
voiceConfig.f5ttsNfeStep = msg.f5ttsNfeStep;
|
||||||
}
|
}
|
||||||
|
// FLUX-Settings (Default-Modell + User-Keywords). flux-bridge nutzt
|
||||||
|
// fluxDefaultModel zum Hot-Swap, Brain liest die Keywords direkt aus
|
||||||
|
// /shared/config/voice_config.json fuer den System-Prompt.
|
||||||
|
if (msg.fluxDefaultModel !== undefined) {
|
||||||
|
voiceConfig.fluxDefaultModel = (msg.fluxDefaultModel === "schnell") ? "schnell" : "dev";
|
||||||
|
}
|
||||||
|
if (msg.fluxKeywordRaw !== undefined) {
|
||||||
|
voiceConfig.fluxKeywordRaw = String(msg.fluxKeywordRaw || "").trim().toLowerCase() || "flux";
|
||||||
|
}
|
||||||
|
if (msg.fluxKeywordSwitch !== undefined) {
|
||||||
|
voiceConfig.fluxKeywordSwitch = String(msg.fluxKeywordSwitch || "").trim().toLowerCase() || "fix";
|
||||||
|
}
|
||||||
|
// HuggingFace-Token fuer gated FLUX.1-dev. Wird per RVS an die
|
||||||
|
// flux-bridge gepusht, dort als HF_TOKEN env gesetzt vor dem
|
||||||
|
// naechsten from_pretrained. Leerer String = "kein Token" (statt
|
||||||
|
// 'behalt was du hattest'), damit Stefan ihn auch wieder loeschen
|
||||||
|
// kann.
|
||||||
|
if (msg.huggingfaceToken !== undefined) {
|
||||||
|
voiceConfig.huggingfaceToken = String(msg.huggingfaceToken || "").trim();
|
||||||
|
}
|
||||||
try {
|
try {
|
||||||
fs.mkdirSync("/shared/config", { recursive: true });
|
fs.mkdirSync("/shared/config", { recursive: true });
|
||||||
fs.writeFileSync("/shared/config/voice_config.json", JSON.stringify(voiceConfig, null, 2));
|
fs.writeFileSync("/shared/config/voice_config.json", JSON.stringify(voiceConfig, null, 2));
|
||||||
|
|||||||
@@ -0,0 +1,180 @@
|
|||||||
|
# FLUX.1-dev Bildgenerierung — Architektur & Stand
|
||||||
|
|
||||||
|
Ergaenzung des ARIA-Agent-Stacks um native Text-to-Image-Generierung via
|
||||||
|
FLUX.1-dev auf der Gamebox. Folgt dem **gleichen Pattern wie f5tts / whisper**:
|
||||||
|
ein eigener Container auf dem Gaming-PC, der sich selbst per WebSocket zum
|
||||||
|
RVS verbindet und auf seinen Request-Typ lauscht.
|
||||||
|
|
||||||
|
## Pipeline
|
||||||
|
|
||||||
|
```
|
||||||
|
Stefan / App
|
||||||
|
│ Chat-Nachricht ("mal mir einen Sonnenuntergang ueberm Hangar")
|
||||||
|
▼
|
||||||
|
aria-bridge ── send_to_core ──▶ aria-brain
|
||||||
|
│ chooses tool: flux_generate(prompt=..., width=..., ...)
|
||||||
|
│ POST /internal/flux-generate
|
||||||
|
▼
|
||||||
|
aria-bridge (VM)
|
||||||
|
│ pushes {type: "flux_request",
|
||||||
|
│ payload: {requestId, prompt, ...}}
|
||||||
|
│ via RVS-Broadcast
|
||||||
|
▼
|
||||||
|
RVS
|
||||||
|
│ fanout
|
||||||
|
▼
|
||||||
|
flux-bridge (Gamebox)
|
||||||
|
│ FluxPipeline.from_pretrained(...)
|
||||||
|
│ pipeline(prompt, width, height, steps, guidance).images[0]
|
||||||
|
│ PIL → PNG → base64
|
||||||
|
│ {type: "flux_response", payload: {state:"done",
|
||||||
|
│ requestId, base64, mimeType, ...}}
|
||||||
|
▼
|
||||||
|
RVS
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
aria-bridge (VM)
|
||||||
|
│ _pending_flux[requestId].set_result(payload)
|
||||||
|
│ base64-decode → /shared/uploads/aria_generated_<ts>.png
|
||||||
|
│ HTTP 200 zurueck an Brain mit {path, sizeBytes, ...}
|
||||||
|
▼
|
||||||
|
aria-brain
|
||||||
|
│ Tool-Result + Hint: "schreib [FILE: {path}] in deine Antwort"
|
||||||
|
│ Final-Reply: "Hier dein Bild:\n[FILE: /shared/uploads/aria_generated_<ts>.png]"
|
||||||
|
▼
|
||||||
|
aria-bridge
|
||||||
|
│ _FILE_MARKER_RE → file_from_aria-Event
|
||||||
|
│ Marker bleibt im Chat-Text fuer Hist; App rendert das Bild inline
|
||||||
|
▼
|
||||||
|
App + Diagnostic
|
||||||
|
```
|
||||||
|
|
||||||
|
## Komponenten
|
||||||
|
|
||||||
|
### 1. `flux/bridge.py` (neu) — flux-bridge Container
|
||||||
|
|
||||||
|
- `FluxPipeline` (diffusers) mit `enable_model_cpu_offload()` als Default,
|
||||||
|
damit FLUX.1-dev (~24 GB on disk, ~12 B params) auf einer RTX 3060
|
||||||
|
(12 GB VRAM) ueberhaupt laeuft.
|
||||||
|
- Lazy-Load: Modell wird beim ersten `flux_request` (oder im Initial-Load)
|
||||||
|
geladen, `service_status: "flux", state: "loading" | "ready" | "error"`
|
||||||
|
wird via RVS broadcastet → Diagnostic-Badge zeigt's an.
|
||||||
|
- Single-Worker-Queue (`_flux_queue`) — GPU darf nicht parallel rendern,
|
||||||
|
sonst OOM oder Crash.
|
||||||
|
- Progress-Ping: `flux_response {state: "rendering"}` direkt nach
|
||||||
|
Queue-Pickup, damit die aria-bridge weiss "Auftrag angekommen", auch
|
||||||
|
wenn der eigentliche Render 60s braucht.
|
||||||
|
- Caps:
|
||||||
|
- `width`/`height`: 256 .. `FLUX_MAX_DIM` (Default 1536), gesnappt auf
|
||||||
|
Vielfache von 64.
|
||||||
|
- `steps`: 1 .. `FLUX_MAX_STEPS` (Default 50).
|
||||||
|
- `guidance_scale`: 0.0 .. 20.0.
|
||||||
|
- `prompt`: max 2000 chars.
|
||||||
|
- Env-Switches:
|
||||||
|
- `FLUX_MODEL` — Default `black-forest-labs/FLUX.1-dev` (non-commercial).
|
||||||
|
Alt: `FLUX.1-schnell` (Apache-2.0, 4 Steps, deutlich schneller).
|
||||||
|
- `FLUX_OFFLOAD` — `model` (default), `sequential` (sparsamer, langsamer)
|
||||||
|
oder `none` (alles auf GPU; nur fuer >=24 GB VRAM-Karten).
|
||||||
|
- `FLUX_DTYPE` — `bfloat16` (default) oder `float16`.
|
||||||
|
- `HF_TOKEN` — FLUX.1-dev braucht HuggingFace-Login.
|
||||||
|
|
||||||
|
### 2. `flux/docker-compose.yml` — eigener Stack
|
||||||
|
|
||||||
|
Bewusst NICHT mit in `xtts/docker-compose.yml` gepackt: FLUX kann auch
|
||||||
|
separat laufen (z.B. spaeter auf einer 4090, waehrend die 3060 weiter
|
||||||
|
TTS+STT bedient). Eigener Compose, eigene `.env.example`, eigenes
|
||||||
|
`hf-cache/`-Volume.
|
||||||
|
|
||||||
|
- GPU-Reservation analog zu f5tts/whisper.
|
||||||
|
- Volume `./hf-cache:/root/.cache/huggingface` — wenn flux auf der
|
||||||
|
gleichen Maschine wie xtts laeuft kann man `../xtts/hf-cache`
|
||||||
|
symlinken, dann ist der Modell-Cache geteilt.
|
||||||
|
- Restart `unless-stopped`.
|
||||||
|
|
||||||
|
### 3. `rvs/server.js` — Allowlist erweitert
|
||||||
|
|
||||||
|
Neue Typen: `flux_request`, `flux_response` (auch wenn das Initial-Load-
|
||||||
|
broadcast `service_status` bereits zugelassen war).
|
||||||
|
|
||||||
|
### 4. `bridge/aria_bridge.py`
|
||||||
|
|
||||||
|
- `self._pending_flux: dict[str, asyncio.Future]` — request_id → future.
|
||||||
|
- `self._remote_flux_ready: bool` — wird von `service_status` Updates
|
||||||
|
gefuellt; steuert den HTTP-Timeout (240 s wenn ready, 900 s waehrend
|
||||||
|
des allerersten Modell-Downloads).
|
||||||
|
- `flux_response`-Handler: Progress-Ping (`state == "rendering"`) bleibt
|
||||||
|
no-op auf der Future; `state == "done"` setzt die Future, Error setzt
|
||||||
|
`{"error": ...}`.
|
||||||
|
- `_flux_generate(prompt, width, height, steps, guidance, seed)` — Helper:
|
||||||
|
1. UUID + Future
|
||||||
|
2. `flux_request` broadcasten
|
||||||
|
3. `asyncio.wait_for(future, timeout=...)`
|
||||||
|
4. base64 → `/shared/uploads/aria_generated_<ts>.png`
|
||||||
|
5. dict mit `{ok, path, sizeBytes, width, height, steps, guidance, seed, model, renderSeconds}`
|
||||||
|
- HTTP-Endpoint `POST /internal/flux-generate` im internen Listener
|
||||||
|
(Port 8090). Validiert prompt + clamps, ruft `_flux_generate`, gibt
|
||||||
|
Result als JSON zurueck.
|
||||||
|
|
||||||
|
### 5. `aria-brain/agent.py` — META-Tool `flux_generate`
|
||||||
|
|
||||||
|
```jsonc
|
||||||
|
{
|
||||||
|
"name": "flux_generate",
|
||||||
|
"parameters": {
|
||||||
|
"prompt": "string (englischer Prompt — FLUX liefert auf EN besser)",
|
||||||
|
"width": "integer (256..1536, default 1024)",
|
||||||
|
"height": "integer (256..1536, default 1024)",
|
||||||
|
"steps": "integer (1..50, default 28)",
|
||||||
|
"guidance_scale": "number (default 3.5)",
|
||||||
|
"seed": "integer (optional)"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Dispatcher:
|
||||||
|
- POSTet `{prompt, width, height, steps, guidance_scale, seed}` an
|
||||||
|
`http://aria-bridge:8090/internal/flux-generate` (urllib, 1200 s Timeout
|
||||||
|
— der erste Render kann den 24 GB Modell-Download triggern).
|
||||||
|
- Bei `ok=true` gibt das Tool den **Pfad** + Render-Stats zurueck und
|
||||||
|
weist Claude explizit an: *"Schreibe `[FILE: <path>]` in deine
|
||||||
|
Antwort an Stefan, dann zeigt die App das Bild inline."*
|
||||||
|
- Brain ueberlegt sich den Begleittext selber und packt den Marker an
|
||||||
|
passende Stelle.
|
||||||
|
|
||||||
|
### 6. `diagnostic/index.html` — Status-Badge
|
||||||
|
|
||||||
|
Label `flux: 'FLUX Image-Gen'` zum bestehenden `updateServiceStatus()`-Switch
|
||||||
|
hinzugefuegt — kein neuer Code, gleicher Banner-Mechanismus wie F5-TTS /
|
||||||
|
Whisper.
|
||||||
|
|
||||||
|
## File-Lifecycle
|
||||||
|
|
||||||
|
Generierte Bilder leben unter `/shared/uploads/aria_generated_<ts>.png`
|
||||||
|
(gleicher Folder wie User-Uploads). Damit:
|
||||||
|
- `[FILE: ...]`-Marker funktioniert (Bridge erlaubt nur Pfade unter
|
||||||
|
`/shared/uploads/`).
|
||||||
|
- File-Manager-Endpoints in Diagnostic (Liste/Loeschen/Zip) sehen sie
|
||||||
|
ohne Sonderbehandlung.
|
||||||
|
- Memory-Anhaenge: ARIA kann ein generiertes Bild im selben Turn an
|
||||||
|
einen Memory-Eintrag haengen (`memory_save(attach_paths=[path])`).
|
||||||
|
|
||||||
|
## Bekannte Stolpersteine
|
||||||
|
|
||||||
|
- **HF-Login**: FLUX.1-dev ist gated. Vor erstem Start `HF_TOKEN` im
|
||||||
|
`.env` setzen oder im Container `huggingface-cli login` machen, sonst
|
||||||
|
403 beim ersten Download.
|
||||||
|
- **Erster Render dauert lang**: 24 GB Modell laden + CUDA-Warmup → 5-10
|
||||||
|
min realistisch. Brain-HTTP-Timeout ist 1200 s, RVS-Future-Timeout
|
||||||
|
900 s (loading-Modus). Stefan sollte beim ersten "Mal mir was"-Request
|
||||||
|
ein bisschen Geduld haben — danach sind Renders ~30-90 s.
|
||||||
|
- **Lizenz**: FLUX.1-dev ist *non-commercial* (FLUX.1 Dev Non-Commercial
|
||||||
|
License). Fuer kommerzielle Nutzung muss man auf `FLUX.1-schnell`
|
||||||
|
(Apache-2.0) oder `FLUX.1-pro` (API only) wechseln. Stefan kann das
|
||||||
|
ueber `FLUX_MODEL` in der `.env` umstellen.
|
||||||
|
- **VRAM**: 12 GB (3060) reichen NUR mit `enable_model_cpu_offload`. Bei
|
||||||
|
Out-of-Memory in den Logs auf `FLUX_OFFLOAD=sequential` switchen
|
||||||
|
(deutlich langsamer, aber peak-VRAM ~6 GB).
|
||||||
|
- **Parallele Calls**: Single-Worker-Queue in der flux-bridge — ein
|
||||||
|
zweiter `flux_generate`-Tool-Call von Brain wartet, bis der erste fertig
|
||||||
|
ist. In der Praxis kein Problem, weil Stefan eh nicht zwei Bilder
|
||||||
|
gleichzeitig macht.
|
||||||
@@ -0,0 +1,36 @@
|
|||||||
|
# ════════════════════════════════════════════════
|
||||||
|
# ARIA FLUX-Bridge — Konfiguration
|
||||||
|
# Kopieren nach .env und anpassen
|
||||||
|
# ════════════════════════════════════════════════
|
||||||
|
|
||||||
|
# RVS Verbindung (gleiche Daten wie auf der ARIA-VM / xtts/.env)
|
||||||
|
RVS_HOST=mobil.hacker-net.de
|
||||||
|
RVS_PORT=444
|
||||||
|
RVS_TLS=true
|
||||||
|
RVS_TLS_FALLBACK=true
|
||||||
|
RVS_TOKEN=dein_token_hier
|
||||||
|
|
||||||
|
# HuggingFace-Token + Default-Modell werden in ARIA Diagnostic verwaltet
|
||||||
|
# (Section "FLUX Bildgenerierung") und per RVS an die flux-bridge gepusht.
|
||||||
|
# Hier nichts noetig.
|
||||||
|
#
|
||||||
|
# Token-Pflicht NUR fuer FLUX.1-dev (gated). Workflow falls Du dev nutzen
|
||||||
|
# willst:
|
||||||
|
# 1) https://huggingface.co/black-forest-labs/FLUX.1-dev → "Agree"
|
||||||
|
# 2) https://huggingface.co/settings/tokens → "Read"-Token erzeugen
|
||||||
|
# 3) Token in Diagnostic > FLUX Bildgenerierung > HuggingFace-Token
|
||||||
|
# FLUX.1-schnell (Apache-2.0) laeuft ohne Token.
|
||||||
|
|
||||||
|
# Offloading-Strategie (VRAM-Steuerung):
|
||||||
|
# model — Default. Komponentenweise CPU-Offload, gut fuer 12 GB Karten.
|
||||||
|
# sequential — sparsamer (Peak ~6 GB), aber 2-3x langsamer.
|
||||||
|
# none — alles auf GPU. Nur fuer >= 24 GB VRAM-Karten.
|
||||||
|
FLUX_OFFLOAD=model
|
||||||
|
|
||||||
|
# Float-Type. bfloat16 ist FLUX-native; auf alten Karten ohne BF16-Support
|
||||||
|
# auf float16 wechseln.
|
||||||
|
FLUX_DTYPE=bfloat16
|
||||||
|
|
||||||
|
# Hard-Caps gegen versehentlich teure Renders
|
||||||
|
FLUX_MAX_STEPS=50
|
||||||
|
FLUX_MAX_DIM=1536
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
# HuggingFace Model-Cache (FLUX.1-dev ~24 GB on disk)
|
||||||
|
hf-cache/
|
||||||
|
|
||||||
|
# Docker .env
|
||||||
|
.env
|
||||||
@@ -0,0 +1,23 @@
|
|||||||
|
FROM nvidia/cuda:12.2.2-cudnn8-runtime-ubuntu22.04
|
||||||
|
|
||||||
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
|
ENV PYTHONUNBUFFERED=1
|
||||||
|
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
python3 python3-pip git \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# PyTorch CUDA-Wheels zuerst, damit diffusers nicht CPU-Torch zieht.
|
||||||
|
# Versionsmatrix wie bei f5tts gehalten (cu121, Torch 2.3.1) — gleicher
|
||||||
|
# Treiber-Footprint, gleicher HF-Cache-Pfad.
|
||||||
|
RUN pip3 install --no-cache-dir torch==2.3.1 \
|
||||||
|
--index-url https://download.pytorch.org/whl/cu121
|
||||||
|
|
||||||
|
COPY requirements.txt .
|
||||||
|
RUN pip3 install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
COPY bridge.py .
|
||||||
|
|
||||||
|
CMD ["python3", "bridge.py"]
|
||||||
+557
@@ -0,0 +1,557 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
ARIA FLUX-Bridge — laeuft auf der Gamebox (RTX 3060).
|
||||||
|
|
||||||
|
Empfaengt flux_request via RVS → FLUX.1-dev/-schnell auf GPU → sendet
|
||||||
|
flux_response mit base64-PNG zurueck an die aria-bridge. Diese speichert
|
||||||
|
die Datei nach /shared/uploads/ und ARIA referenziert sie mit
|
||||||
|
[FILE: ...]-Marker in ihrer Antwort.
|
||||||
|
|
||||||
|
12 GB VRAM auf der 3060 reichen fuer FLUX.1-dev nur mit
|
||||||
|
`enable_model_cpu_offload()` — sonst OOM. Setze FLUX_OFFLOAD=sequential
|
||||||
|
fuer Maximal-Sparsamkeit (langsamer) oder FLUX_OFFLOAD=none wenn die
|
||||||
|
GPU genug VRAM hat (z.B. spaeter 4090).
|
||||||
|
|
||||||
|
Env:
|
||||||
|
RVS_HOST, RVS_PORT, RVS_TLS, RVS_TLS_FALLBACK, RVS_TOKEN
|
||||||
|
FLUX_MODEL Default: black-forest-labs/FLUX.1-dev
|
||||||
|
Alt: black-forest-labs/FLUX.1-schnell (4-Step, Apache-2.0)
|
||||||
|
FLUX_DEVICE Default: cuda
|
||||||
|
FLUX_DTYPE Default: bfloat16 (alt: float16)
|
||||||
|
FLUX_OFFLOAD Default: model (alt: sequential | none)
|
||||||
|
FLUX_MAX_STEPS Default: 50
|
||||||
|
FLUX_MAX_DIM Default: 1536
|
||||||
|
"""
|
||||||
|
import asyncio
|
||||||
|
import base64
|
||||||
|
import io
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import uuid
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import websockets
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format="%(asctime)s [%(levelname)s] %(message)s",
|
||||||
|
datefmt="%H:%M:%S",
|
||||||
|
)
|
||||||
|
logger = logging.getLogger("flux-bridge")
|
||||||
|
# HuggingFace/Torch download-Logs daempfen
|
||||||
|
logging.getLogger("httpx").setLevel(logging.WARNING)
|
||||||
|
logging.getLogger("urllib3").setLevel(logging.WARNING)
|
||||||
|
|
||||||
|
RVS_HOST = os.getenv("RVS_HOST", "").strip()
|
||||||
|
RVS_PORT = int(os.getenv("RVS_PORT", "443"))
|
||||||
|
RVS_TLS = os.getenv("RVS_TLS", "true").lower() == "true"
|
||||||
|
RVS_TLS_FALLBACK = os.getenv("RVS_TLS_FALLBACK", "true").lower() == "true"
|
||||||
|
RVS_TOKEN = os.getenv("RVS_TOKEN", "").strip()
|
||||||
|
|
||||||
|
# Bootstrap-Fallback: nur relevant wenn beim allerersten Start KEIN
|
||||||
|
# Diagnostic-config-Broadcast eintrifft UND der erste Render-Request
|
||||||
|
# auch kein 'model' enthaelt. Default 'schnell', weil Apache-2.0
|
||||||
|
# (kein HF-Token noetig) — Stefan stellt sein gewuenschtes Default ueber
|
||||||
|
# Diagnostic ein. ENV ist also nur fuer den extremen Edge-Case da, in
|
||||||
|
# der .env.example absichtlich nicht mehr dokumentiert.
|
||||||
|
FLUX_MODEL = os.getenv("FLUX_MODEL", "black-forest-labs/FLUX.1-schnell").strip()
|
||||||
|
FLUX_DEVICE = os.getenv("FLUX_DEVICE", "cuda").strip()
|
||||||
|
FLUX_DTYPE = os.getenv("FLUX_DTYPE", "bfloat16").strip().lower()
|
||||||
|
FLUX_OFFLOAD = os.getenv("FLUX_OFFLOAD", "model").strip().lower()
|
||||||
|
FLUX_MAX_STEPS = int(os.getenv("FLUX_MAX_STEPS", "50"))
|
||||||
|
FLUX_MAX_DIM = int(os.getenv("FLUX_MAX_DIM", "1536"))
|
||||||
|
|
||||||
|
# FLUX-dev native: guidance=3.5, steps=28. FLUX-schnell: guidance=0.0, steps=4.
|
||||||
|
DEFAULT_STEPS_DEV = 28
|
||||||
|
DEFAULT_STEPS_SCHNELL = 4
|
||||||
|
DEFAULT_GUIDANCE_DEV = 3.5
|
||||||
|
DEFAULT_GUIDANCE_SCHNELL = 0.0
|
||||||
|
|
||||||
|
# Mapping fuer das User-facing Tag → HF-Modell-ID. Stefan stellt in Diagnostic
|
||||||
|
# nur 'dev' / 'schnell' ein; FLUX_MODEL aus der env kann zwar eine custom-ID
|
||||||
|
# sein (Bootstrap), wird aber beim ersten config-Broadcast normalerweise
|
||||||
|
# durch die Diagnostic-Wahl uebersteuert.
|
||||||
|
MODEL_TAGS: dict[str, str] = {
|
||||||
|
"dev": "black-forest-labs/FLUX.1-dev",
|
||||||
|
"schnell": "black-forest-labs/FLUX.1-schnell",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _tag_to_model_id(tag: str) -> str:
|
||||||
|
"""Mappt 'dev'/'schnell' auf HF-ID. Andere Strings werden 1:1 durchgereicht
|
||||||
|
(custom-IDs aus FLUX_MODEL env). Leere/ungueltige Werte → FLUX_MODEL Default."""
|
||||||
|
if not tag:
|
||||||
|
return FLUX_MODEL
|
||||||
|
t = tag.strip()
|
||||||
|
return MODEL_TAGS.get(t, t)
|
||||||
|
|
||||||
|
|
||||||
|
def _is_schnell(model_id: str) -> bool:
|
||||||
|
return "schnell" in model_id.lower()
|
||||||
|
|
||||||
|
|
||||||
|
def _is_model_cached(model_id: str) -> bool:
|
||||||
|
"""Prueft ob ein HF-Modell-Snapshot lokal im hf-cache vorhanden ist.
|
||||||
|
|
||||||
|
HF speichert unter ~/.cache/huggingface/hub/models--{org}--{name}/snapshots/{rev}/.
|
||||||
|
Wenn das snapshots-Verzeichnis nicht existiert oder leer ist → Erst-Download
|
||||||
|
steht an (24+ GB fuer FLUX.1-dev, 24+ GB fuer FLUX.1-schnell — Stefan kriegt
|
||||||
|
dann nen Hinweis im Banner).
|
||||||
|
"""
|
||||||
|
if not model_id:
|
||||||
|
return False
|
||||||
|
cache_root = os.environ.get("HF_HOME") or os.path.expanduser("~/.cache/huggingface")
|
||||||
|
safe = "models--" + model_id.replace("/", "--")
|
||||||
|
snapshots = os.path.join(cache_root, "hub", safe, "snapshots")
|
||||||
|
if not os.path.isdir(snapshots):
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
for rev in os.listdir(snapshots):
|
||||||
|
rev_dir = os.path.join(snapshots, rev)
|
||||||
|
if os.path.isdir(rev_dir) and any(os.scandir(rev_dir)):
|
||||||
|
return True
|
||||||
|
except OSError:
|
||||||
|
return False
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _torch_dtype():
|
||||||
|
"""Lazy-resolve damit Torch erst beim Modell-Laden importiert wird."""
|
||||||
|
import torch
|
||||||
|
return {"bfloat16": torch.bfloat16, "float16": torch.float16, "float32": torch.float32}\
|
||||||
|
.get(FLUX_DTYPE, torch.bfloat16)
|
||||||
|
|
||||||
|
|
||||||
|
def _snap_dim(v: int, default: int = 1024) -> int:
|
||||||
|
"""FLUX braucht Multiples von 16 (sicher: 64). Clamp + Snap."""
|
||||||
|
try:
|
||||||
|
n = int(v)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
n = default
|
||||||
|
n = max(256, min(FLUX_MAX_DIM, n))
|
||||||
|
# Auf naechstes Vielfaches von 64 abrunden
|
||||||
|
n = (n // 64) * 64
|
||||||
|
return max(256, n)
|
||||||
|
|
||||||
|
|
||||||
|
class FluxRunner:
|
||||||
|
"""Haelt EINE FLUX-Pipeline. Bei Modell-Wechsel wird die alte verworfen
|
||||||
|
und die neue geladen (~15-30 s aus HF-Cache, keine Re-Downloads).
|
||||||
|
|
||||||
|
Pro Request kann ein 'dev'/'schnell'-Tag mitkommen; ohne Angabe wird
|
||||||
|
`default_model_id` genommen (steht Bootstrap auf FLUX_MODEL, wird beim
|
||||||
|
ersten config-Broadcast von der aria-bridge auf die Diagnostic-Wahl
|
||||||
|
aktualisiert).
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.pipe = None
|
||||||
|
self._lock = asyncio.Lock()
|
||||||
|
# Aktuell geladenes Modell — leer solange noch nix geladen wurde.
|
||||||
|
self.model_id: str = ""
|
||||||
|
# Was bei einem Request OHNE explizite model-Angabe benutzt wird.
|
||||||
|
# Wird durch Diagnostic-config gesetzt; FLUX_MODEL bleibt nur als
|
||||||
|
# Edge-Case-Fallback wenn weder Config noch Request einen Wert nennen.
|
||||||
|
self.default_model_id: str = FLUX_MODEL
|
||||||
|
self.last_load_seconds: float = 0.0
|
||||||
|
# True wenn der letzte _load_blocking einen Fresh-Download triggern
|
||||||
|
# musste (Modell war nicht im HF-Cache). Wird vom Caller geprueft
|
||||||
|
# und in den 'ready'-service_status als freshlyDownloaded gesetzt.
|
||||||
|
self.last_load_was_download: bool = False
|
||||||
|
|
||||||
|
def _load_blocking(self, model_id: str) -> None:
|
||||||
|
import torch
|
||||||
|
from diffusers import FluxPipeline
|
||||||
|
|
||||||
|
# Alte Pipeline freigeben damit der HF-Loader VRAM/RAM kriegt
|
||||||
|
if self.pipe is not None:
|
||||||
|
logger.info("Verwerfe alte Pipeline '%s'", self.model_id)
|
||||||
|
try:
|
||||||
|
del self.pipe
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
self.pipe = None
|
||||||
|
try:
|
||||||
|
torch.cuda.empty_cache()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
import gc
|
||||||
|
gc.collect()
|
||||||
|
|
||||||
|
was_cached = _is_model_cached(model_id)
|
||||||
|
self.last_load_was_download = not was_cached
|
||||||
|
if not was_cached:
|
||||||
|
logger.warning("FLUX '%s' nicht im HF-Cache — Erst-Download steht bevor (kann 5-10 min dauern).",
|
||||||
|
model_id)
|
||||||
|
logger.info("Lade FLUX '%s' (dtype=%s, offload=%s, cached=%s)...",
|
||||||
|
model_id, FLUX_DTYPE, FLUX_OFFLOAD, was_cached)
|
||||||
|
t0 = time.time()
|
||||||
|
pipe = FluxPipeline.from_pretrained(model_id, torch_dtype=_torch_dtype())
|
||||||
|
|
||||||
|
if FLUX_OFFLOAD == "sequential":
|
||||||
|
pipe.enable_sequential_cpu_offload()
|
||||||
|
elif FLUX_OFFLOAD == "none":
|
||||||
|
pipe.to(FLUX_DEVICE)
|
||||||
|
else: # "model" — default, Sweet-Spot fuer 12 GB Karten
|
||||||
|
pipe.enable_model_cpu_offload()
|
||||||
|
|
||||||
|
# VAE-Tiling spart VRAM bei grossen Bildern (>1024)
|
||||||
|
try:
|
||||||
|
pipe.vae.enable_tiling()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
self.pipe = pipe
|
||||||
|
self.model_id = model_id
|
||||||
|
self.last_load_seconds = time.time() - t0
|
||||||
|
logger.info("FLUX '%s' geladen in %.1fs", model_id, self.last_load_seconds)
|
||||||
|
try:
|
||||||
|
torch.cuda.empty_cache()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
async def ensure_loaded(self, model_id: Optional[str] = None) -> bool:
|
||||||
|
"""Stellt sicher dass die richtige Pipeline geladen ist. Wenn ein
|
||||||
|
anderes Modell gewuenscht ist als gerade aktiv, wird geswappt.
|
||||||
|
Returns True wenn ein Swap/Load stattgefunden hat."""
|
||||||
|
target = model_id or self.default_model_id or FLUX_MODEL
|
||||||
|
async with self._lock:
|
||||||
|
if self.pipe is not None and self.model_id == target:
|
||||||
|
return False
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
await loop.run_in_executor(None, self._load_blocking, target)
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _generate_blocking(self, prompt: str, width: int, height: int,
|
||||||
|
steps: int, guidance: float, seed: Optional[int]) -> bytes:
|
||||||
|
import torch
|
||||||
|
gen = None
|
||||||
|
if seed is not None and seed >= 0:
|
||||||
|
gen = torch.Generator(device=FLUX_DEVICE).manual_seed(int(seed))
|
||||||
|
|
||||||
|
logger.info("Render (%s): %dx%d, steps=%d, guidance=%.2f, seed=%s, prompt=%r",
|
||||||
|
self.model_id, width, height, steps, guidance, seed, prompt[:80])
|
||||||
|
out = self.pipe(
|
||||||
|
prompt=prompt,
|
||||||
|
width=width,
|
||||||
|
height=height,
|
||||||
|
num_inference_steps=steps,
|
||||||
|
guidance_scale=guidance,
|
||||||
|
generator=gen,
|
||||||
|
)
|
||||||
|
image = out.images[0]
|
||||||
|
buf = io.BytesIO()
|
||||||
|
image.save(buf, format="PNG", optimize=True)
|
||||||
|
png_bytes = buf.getvalue()
|
||||||
|
# VRAM zurueckgeben fuer den naechsten Render
|
||||||
|
try:
|
||||||
|
torch.cuda.empty_cache()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return png_bytes
|
||||||
|
|
||||||
|
async def generate(self, prompt: str, width: int, height: int,
|
||||||
|
steps: int, guidance: float, seed: Optional[int],
|
||||||
|
model_id: Optional[str] = None) -> bytes:
|
||||||
|
await self.ensure_loaded(model_id)
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
return await loop.run_in_executor(
|
||||||
|
None, self._generate_blocking, prompt, width, height, steps, guidance, seed,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Helpers ─────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
async def _send(ws, mtype: str, payload: dict) -> None:
|
||||||
|
try:
|
||||||
|
await ws.send(json.dumps({
|
||||||
|
"type": mtype,
|
||||||
|
"payload": payload,
|
||||||
|
"timestamp": int(time.time() * 1000),
|
||||||
|
}))
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Send fehlgeschlagen (%s): %s", mtype, e)
|
||||||
|
|
||||||
|
|
||||||
|
async def _broadcast_status(ws, state: str, **extra) -> None:
|
||||||
|
"""Sendet service_status fuer das Flux-Modul.
|
||||||
|
state: 'loading' | 'ready' | 'error'."""
|
||||||
|
payload = {"service": "flux", "state": state}
|
||||||
|
payload.update(extra)
|
||||||
|
await _send(ws, "service_status", payload)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Flux-Request Queue ──────────────────────────────────────
|
||||||
|
|
||||||
|
# Eine GPU, ein Render gleichzeitig. Parallele Requests OOM-en sonst.
|
||||||
|
_flux_queue: "asyncio.Queue[tuple]" = asyncio.Queue()
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_request(payload: dict, runner: FluxRunner) -> tuple[str, int, int, int, float, Optional[int], str]:
|
||||||
|
"""Liest Felder aus dem flux_request payload + clampt auf Caps.
|
||||||
|
Returns (prompt, width, height, steps, guidance, seed, resolved_model_id).
|
||||||
|
"""
|
||||||
|
prompt = (payload.get("prompt") or "").strip()
|
||||||
|
if not prompt:
|
||||||
|
raise ValueError("prompt fehlt")
|
||||||
|
if len(prompt) > 2000:
|
||||||
|
prompt = prompt[:2000]
|
||||||
|
|
||||||
|
width = _snap_dim(payload.get("width", 1024))
|
||||||
|
height = _snap_dim(payload.get("height", 1024))
|
||||||
|
|
||||||
|
# Modell-Wahl: explizit per Request > runner.default_model_id > FLUX_MODEL.
|
||||||
|
req_model = (payload.get("model") or "").strip()
|
||||||
|
resolved_model_id = _tag_to_model_id(req_model) if req_model else (runner.default_model_id or FLUX_MODEL)
|
||||||
|
|
||||||
|
schnell = _is_schnell(resolved_model_id)
|
||||||
|
default_steps = DEFAULT_STEPS_SCHNELL if schnell else DEFAULT_STEPS_DEV
|
||||||
|
default_guidance = DEFAULT_GUIDANCE_SCHNELL if schnell else DEFAULT_GUIDANCE_DEV
|
||||||
|
|
||||||
|
try:
|
||||||
|
steps = int(payload.get("steps", default_steps))
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
steps = default_steps
|
||||||
|
steps = max(1, min(FLUX_MAX_STEPS, steps))
|
||||||
|
|
||||||
|
try:
|
||||||
|
guidance = float(payload.get("guidance_scale", default_guidance))
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
guidance = default_guidance
|
||||||
|
if not (0.0 <= guidance <= 20.0):
|
||||||
|
guidance = default_guidance
|
||||||
|
|
||||||
|
seed = payload.get("seed")
|
||||||
|
if seed is not None:
|
||||||
|
try:
|
||||||
|
seed = int(seed)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
seed = None
|
||||||
|
|
||||||
|
return prompt, width, height, steps, guidance, seed, resolved_model_id
|
||||||
|
|
||||||
|
|
||||||
|
async def _flux_worker(ws, runner: FluxRunner) -> None:
|
||||||
|
"""Serialisiert Renders — eine GPU, ein Bild gleichzeitig."""
|
||||||
|
while True:
|
||||||
|
payload = await _flux_queue.get()
|
||||||
|
request_id = payload.get("requestId") or str(uuid.uuid4())
|
||||||
|
try:
|
||||||
|
await _do_render(ws, runner, payload, request_id)
|
||||||
|
except Exception:
|
||||||
|
logger.exception("Flux-Worker Fehler")
|
||||||
|
await _send(ws, "flux_response", {
|
||||||
|
"requestId": request_id,
|
||||||
|
"error": "internal error",
|
||||||
|
})
|
||||||
|
finally:
|
||||||
|
_flux_queue.task_done()
|
||||||
|
|
||||||
|
|
||||||
|
async def _do_render(ws, runner: FluxRunner, payload: dict, request_id: str) -> None:
|
||||||
|
t0 = time.time()
|
||||||
|
try:
|
||||||
|
prompt, width, height, steps, guidance, seed, target_model_id = _resolve_request(payload, runner)
|
||||||
|
except ValueError as e:
|
||||||
|
logger.warning("flux_request invalid: %s", e)
|
||||||
|
await _send(ws, "flux_response", {"requestId": request_id, "error": str(e)})
|
||||||
|
return
|
||||||
|
|
||||||
|
# Modell-Swap noetig? Status broadcasten damit Diagnostic-Banner es zeigt.
|
||||||
|
swap_needed = (runner.pipe is None or runner.model_id != target_model_id)
|
||||||
|
will_download = swap_needed and not _is_model_cached(target_model_id)
|
||||||
|
if swap_needed:
|
||||||
|
await _broadcast_status(ws, "loading", model=target_model_id,
|
||||||
|
downloading=will_download)
|
||||||
|
await _send(ws, "flux_response", {
|
||||||
|
"requestId": request_id,
|
||||||
|
"state": "switching_model",
|
||||||
|
"model": target_model_id,
|
||||||
|
"downloading": will_download,
|
||||||
|
})
|
||||||
|
|
||||||
|
# Progress-Ping: User soll sehen dass was passiert (Render >30s realistisch)
|
||||||
|
await _send(ws, "flux_response", {
|
||||||
|
"requestId": request_id,
|
||||||
|
"state": "rendering",
|
||||||
|
"width": width, "height": height, "steps": steps,
|
||||||
|
"model": target_model_id,
|
||||||
|
})
|
||||||
|
|
||||||
|
try:
|
||||||
|
png = await runner.generate(prompt, width, height, steps, guidance, seed,
|
||||||
|
model_id=target_model_id)
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception("FLUX Render-Fehler")
|
||||||
|
await _send(ws, "flux_response", {"requestId": request_id, "error": str(e)[:200]})
|
||||||
|
if swap_needed:
|
||||||
|
await _broadcast_status(ws, "error", error=str(e)[:200])
|
||||||
|
return
|
||||||
|
|
||||||
|
if swap_needed:
|
||||||
|
await _broadcast_status(ws, "ready",
|
||||||
|
model=runner.model_id,
|
||||||
|
loadSeconds=runner.last_load_seconds,
|
||||||
|
freshlyDownloaded=runner.last_load_was_download)
|
||||||
|
|
||||||
|
dt = time.time() - t0
|
||||||
|
b64 = base64.b64encode(png).decode("ascii")
|
||||||
|
logger.info("Render fertig: %dx%d, %d KB PNG, %.1fs (%s)",
|
||||||
|
width, height, len(png) // 1024, dt, runner.model_id)
|
||||||
|
|
||||||
|
await _send(ws, "flux_response", {
|
||||||
|
"requestId": request_id,
|
||||||
|
"state": "done",
|
||||||
|
"base64": b64,
|
||||||
|
"mimeType": "image/png",
|
||||||
|
"width": width,
|
||||||
|
"height": height,
|
||||||
|
"steps": steps,
|
||||||
|
"guidance": guidance,
|
||||||
|
"seed": seed,
|
||||||
|
"model": runner.model_id,
|
||||||
|
"renderSeconds": round(dt, 2),
|
||||||
|
"sizeBytes": len(png),
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
# ── Haupt-Loop ──────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
async def run_loop(runner: FluxRunner) -> None:
|
||||||
|
use_tls = RVS_TLS
|
||||||
|
retry_s = 2
|
||||||
|
tls_fallback_tried = False
|
||||||
|
|
||||||
|
while True:
|
||||||
|
scheme = "wss" if use_tls else "ws"
|
||||||
|
url = f"{scheme}://{RVS_HOST}:{RVS_PORT}/ws?token={RVS_TOKEN}"
|
||||||
|
masked = url.replace(RVS_TOKEN, "***") if RVS_TOKEN else url
|
||||||
|
|
||||||
|
try:
|
||||||
|
logger.info("Verbinde zu RVS: %s", masked)
|
||||||
|
# max_size 100 MB damit ein 4 MP PNG (~5-10 MB → ~13 MB base64)
|
||||||
|
# locker reinpasst. Mit dem RVS-Limit (100 MB) konsistent.
|
||||||
|
async with websockets.connect(url, ping_interval=20, ping_timeout=10,
|
||||||
|
max_size=100 * 1024 * 1024) as ws:
|
||||||
|
logger.info("RVS verbunden")
|
||||||
|
retry_s = 2
|
||||||
|
tls_fallback_tried = False
|
||||||
|
|
||||||
|
async def _load_with_status():
|
||||||
|
"""Bei Connect KEIN Eager-Load — wir fragen erst die
|
||||||
|
Diagnostic-Config ab. Welches Modell tatsaechlich geladen
|
||||||
|
wird entscheidet sich entweder durch den config-Broadcast
|
||||||
|
(kommt direkt danach) oder durch den ersten flux_request.
|
||||||
|
Bis dahin gibt's keinen service_status, das Banner taucht
|
||||||
|
erst auf wenn wir wirklich was laden."""
|
||||||
|
try:
|
||||||
|
if runner.pipe is not None:
|
||||||
|
# Pipeline ueberlebt nur Container-Lifetime; hier
|
||||||
|
# also nur falls schon ein Modell aktiv ist (Reconnect).
|
||||||
|
await _broadcast_status(ws, "ready",
|
||||||
|
model=runner.model_id,
|
||||||
|
loadSeconds=runner.last_load_seconds)
|
||||||
|
logger.info("Initial: sende config_request an aria-bridge "
|
||||||
|
"(kein Eager-Load, warte auf Diagnostic-Wahl)")
|
||||||
|
await _send(ws, "config_request", {"service": "flux"})
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception("Initial-Setup crashed: %s", e)
|
||||||
|
try:
|
||||||
|
await _broadcast_status(ws, "error", error=str(e)[:200])
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
asyncio.create_task(_load_with_status())
|
||||||
|
|
||||||
|
worker = asyncio.create_task(_flux_worker(ws, runner))
|
||||||
|
|
||||||
|
async def _apply_default_change(new_tag: str):
|
||||||
|
"""Wechselt den Default. Wenn ein anderes Modell als aktuell
|
||||||
|
aktiv gewuenscht ist, wird eager geladen — der naechste
|
||||||
|
Render ist dann ohne Swap-Delay."""
|
||||||
|
new_model_id = _tag_to_model_id(new_tag)
|
||||||
|
runner.default_model_id = new_model_id
|
||||||
|
if runner.model_id == new_model_id:
|
||||||
|
logger.info("[config] Default-Modell bleibt: %s", new_model_id)
|
||||||
|
return
|
||||||
|
will_download = not _is_model_cached(new_model_id)
|
||||||
|
logger.info("[config] Default-Modell wechselt: %s → %s (download=%s)",
|
||||||
|
runner.model_id or "(none)", new_model_id, will_download)
|
||||||
|
try:
|
||||||
|
await _broadcast_status(ws, "loading", model=new_model_id,
|
||||||
|
downloading=will_download)
|
||||||
|
await runner.ensure_loaded(new_model_id)
|
||||||
|
await _broadcast_status(ws, "ready",
|
||||||
|
model=runner.model_id,
|
||||||
|
loadSeconds=runner.last_load_seconds,
|
||||||
|
freshlyDownloaded=runner.last_load_was_download)
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception("Modell-Swap fehlgeschlagen")
|
||||||
|
try:
|
||||||
|
await _broadcast_status(ws, "error", error=str(e)[:200])
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
async for raw in ws:
|
||||||
|
try:
|
||||||
|
msg = json.loads(raw)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
mtype = msg.get("type", "")
|
||||||
|
payload = msg.get("payload", {}) or {}
|
||||||
|
|
||||||
|
if mtype == "flux_request":
|
||||||
|
await _flux_queue.put(payload)
|
||||||
|
elif mtype == "config":
|
||||||
|
# Diagnostic-Broadcast (oder aria-bridge nach Reconnect).
|
||||||
|
# HuggingFace-Token MUSS vor dem Modell-Swap gesetzt sein,
|
||||||
|
# weil FluxPipeline.from_pretrained den Token aus der env
|
||||||
|
# liest. Reihenfolge im selben Tick gewaehrleistet das.
|
||||||
|
if "huggingfaceToken" in payload:
|
||||||
|
tok = (payload.get("huggingfaceToken") or "").strip()
|
||||||
|
if tok:
|
||||||
|
os.environ["HF_TOKEN"] = tok
|
||||||
|
os.environ["HUGGING_FACE_HUB_TOKEN"] = tok
|
||||||
|
logger.info("[config] HF-Token gesetzt (len=%d)", len(tok))
|
||||||
|
else:
|
||||||
|
os.environ.pop("HF_TOKEN", None)
|
||||||
|
os.environ.pop("HUGGING_FACE_HUB_TOKEN", None)
|
||||||
|
logger.info("[config] HF-Token entfernt (leerer Wert)")
|
||||||
|
tag = (payload.get("fluxDefaultModel") or "").strip()
|
||||||
|
if tag:
|
||||||
|
asyncio.create_task(_apply_default_change(tag))
|
||||||
|
finally:
|
||||||
|
worker.cancel()
|
||||||
|
try:
|
||||||
|
await worker
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
pass
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Verbindung verloren: %s", e)
|
||||||
|
if use_tls and RVS_TLS_FALLBACK and not tls_fallback_tried:
|
||||||
|
logger.info("TLS fehlgeschlagen — Fallback auf ws://")
|
||||||
|
use_tls = False
|
||||||
|
tls_fallback_tried = True
|
||||||
|
continue
|
||||||
|
await asyncio.sleep(min(retry_s, 30))
|
||||||
|
retry_s = min(retry_s * 2, 30)
|
||||||
|
|
||||||
|
|
||||||
|
async def main() -> None:
|
||||||
|
if not RVS_HOST:
|
||||||
|
logger.error("RVS_HOST nicht gesetzt — Abbruch")
|
||||||
|
sys.exit(1)
|
||||||
|
runner = FluxRunner()
|
||||||
|
await run_loop(runner)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
try:
|
||||||
|
asyncio.run(main())
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
sys.exit(0)
|
||||||
@@ -0,0 +1,57 @@
|
|||||||
|
# ════════════════════════════════════════════════
|
||||||
|
# ARIA FLUX-Bridge — Text-to-Image (GPU)
|
||||||
|
# Eigener Stack, weil FLUX auch auf einer anderen
|
||||||
|
# Maschine als f5tts/whisper laufen kann (z.B. 4090
|
||||||
|
# separat vom Gaming-PC). Verbindet sich selbst per
|
||||||
|
# WebSocket zum RVS und lauscht auf flux_request.
|
||||||
|
# ════════════════════════════════════════════════
|
||||||
|
#
|
||||||
|
# Voraussetzungen:
|
||||||
|
# - NVIDIA-GPU mit >= 12 GB VRAM (3060 reicht mit
|
||||||
|
# enable_model_cpu_offload). Bei < 12 GB:
|
||||||
|
# FLUX_OFFLOAD=sequential setzen, sonst OOM.
|
||||||
|
# - Docker mit NVIDIA Container Toolkit
|
||||||
|
# - HuggingFace-Token in .env (FLUX.1-dev ist gated)
|
||||||
|
# - .env mit RVS-Verbindungsdaten (gleiche wie xtts!)
|
||||||
|
#
|
||||||
|
# Start: docker compose up -d
|
||||||
|
# ════════════════════════════════════════════════
|
||||||
|
|
||||||
|
services:
|
||||||
|
|
||||||
|
# ─── FLUX Bildgenerierung (GPU) ─────────
|
||||||
|
# Empfaengt flux_request via RVS, rendert PNG mit FLUX (12B Params)
|
||||||
|
# und broadcastet flux_response mit base64-PNG zurueck. aria-bridge speichert
|
||||||
|
# die Datei nach /shared/uploads/ und ARIA referenziert sie via [FILE:]-Marker.
|
||||||
|
#
|
||||||
|
# Modell-Wahl + HuggingFace-Token werden in ARIA Diagnostic eingestellt
|
||||||
|
# ("FLUX Bildgenerierung") und per RVS gepusht — hier nichts noetig.
|
||||||
|
flux-bridge:
|
||||||
|
build: .
|
||||||
|
container_name: aria-flux-bridge
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
reservations:
|
||||||
|
devices:
|
||||||
|
- driver: nvidia
|
||||||
|
count: 1
|
||||||
|
capabilities: [gpu]
|
||||||
|
environment:
|
||||||
|
- RVS_HOST=${RVS_HOST}
|
||||||
|
- RVS_PORT=${RVS_PORT:-443}
|
||||||
|
- RVS_TLS=${RVS_TLS:-true}
|
||||||
|
- RVS_TLS_FALLBACK=${RVS_TLS_FALLBACK:-true}
|
||||||
|
- RVS_TOKEN=${RVS_TOKEN}
|
||||||
|
# Hardware-Bootstrap (Diagnostic-Settings uebersteuern alles andere
|
||||||
|
# zur Laufzeit — diese envs sind nur Edge-Case-Fallbacks).
|
||||||
|
- FLUX_DEVICE=${FLUX_DEVICE:-cuda}
|
||||||
|
- FLUX_DTYPE=${FLUX_DTYPE:-bfloat16}
|
||||||
|
- FLUX_OFFLOAD=${FLUX_OFFLOAD:-model}
|
||||||
|
- FLUX_MAX_STEPS=${FLUX_MAX_STEPS:-50}
|
||||||
|
- FLUX_MAX_DIM=${FLUX_MAX_DIM:-1536}
|
||||||
|
volumes:
|
||||||
|
- ./hf-cache:/root/.cache/huggingface # Bind-Mount. FLUX.1-dev ~24 GB on disk!
|
||||||
|
# Wenn flux auf der gleichen Maschine
|
||||||
|
# wie xtts laeuft: ../xtts/hf-cache
|
||||||
|
# symlinken um den Cache zu teilen.
|
||||||
|
restart: unless-stopped
|
||||||
@@ -0,0 +1,9 @@
|
|||||||
|
diffusers>=0.30.0
|
||||||
|
transformers>=4.43.0
|
||||||
|
accelerate>=0.33.0
|
||||||
|
sentencepiece>=0.2.0
|
||||||
|
protobuf>=4.25.0
|
||||||
|
pillow>=10.0.0
|
||||||
|
huggingface_hub>=0.24.0
|
||||||
|
websockets>=12.0
|
||||||
|
numpy>=1.24
|
||||||
+8
-3
@@ -39,6 +39,7 @@ const ALLOWED_TYPES = new Set([
|
|||||||
"stt_request", "stt_response",
|
"stt_request", "stt_response",
|
||||||
"service_status",
|
"service_status",
|
||||||
"config_request",
|
"config_request",
|
||||||
|
"flux_request", "flux_response",
|
||||||
]);
|
]);
|
||||||
|
|
||||||
// Token-Raum: token -> { clients: Set<ws> }
|
// Token-Raum: token -> { clients: Set<ws> }
|
||||||
@@ -71,10 +72,14 @@ function cleanupRooms() {
|
|||||||
|
|
||||||
// ── WebSocket-Server starten ────────────────────────────────────────
|
// ── WebSocket-Server starten ────────────────────────────────────────
|
||||||
|
|
||||||
// maxPayload 50MB: TTS-Streaming + Voice-Upload (WAV als base64) +
|
// maxPayload 100MB: TTS-Streaming + Voice-Upload (WAV als base64) +
|
||||||
// audio_pcm Chunks koennen die ws-Library Default 1MB ueberschreiten.
|
// audio_pcm Chunks koennen die ws-Library Default 1MB ueberschreiten.
|
||||||
// Default-Limit war der Killer fuer die voice_upload Pipeline.
|
// Plus: file_request/file_response fuer Re-Download von Anhaengen.
|
||||||
const wss = new WebSocketServer({ port: PORT, maxPayload: 50 * 1024 * 1024 });
|
// 40 MB MP4 → ~53 MB base64 → vorher mit 50 MB Limit zerschossen
|
||||||
|
// (Code 1009 message too big, Bridge crashed im cleanup). 100 MB
|
||||||
|
// deckt bis ~70 MB binaer ab; groessere Files werden Bridge-seitig
|
||||||
|
// abgewiesen (siehe file_request-Handler) bevor die WS abreisst.
|
||||||
|
const wss = new WebSocketServer({ port: PORT, maxPayload: 100 * 1024 * 1024 });
|
||||||
|
|
||||||
wss.on("listening", () => {
|
wss.on("listening", () => {
|
||||||
log(`RVS läuft auf Port ${PORT} | Max Sessions: ${MAX_SESSIONS}`);
|
log(`RVS läuft auf Port ${PORT} | Max Sessions: ${MAX_SESSIONS}`);
|
||||||
|
|||||||
@@ -2,6 +2,9 @@
|
|||||||
# ARIA Gamebox Stack — GPU F5-TTS + Whisper STT
|
# ARIA Gamebox Stack — GPU F5-TTS + Whisper STT
|
||||||
# Laeuft auf dem Gaming-PC (RTX 3060)
|
# Laeuft auf dem Gaming-PC (RTX 3060)
|
||||||
# Verbindet sich zum RVS fuer TTS/STT-Requests
|
# Verbindet sich zum RVS fuer TTS/STT-Requests
|
||||||
|
#
|
||||||
|
# FLUX-Bildgenerierung liegt im /flux Verzeichnis im Repo-Root —
|
||||||
|
# eigener Compose-Stack, kann auch auf einer anderen Maschine laufen.
|
||||||
# ════════════════════════════════════════════════
|
# ════════════════════════════════════════════════
|
||||||
#
|
#
|
||||||
# Voraussetzungen:
|
# Voraussetzungen:
|
||||||
|
|||||||
Reference in New Issue
Block a user