feat(flux): Modell-Wahl per Diagnostic + raw/switch-Keywords + Download-Hinweis
Diagnostic-Einstellungen fuer FLUX: - Default-Modell (dev | schnell) — wird via RVS gepusht, flux-bridge hot-swappt die Pipeline aus dem HF-Cache (~15-30s) - Raw-Keyword (Default 'flux') — Pipe-Modus, Brain leitet Stefans Text 1:1 als prompt durch, kein Rewriting/Beautify - Switch-Keyword (Default 'fix') — zwingt das ANDERE Modell als Default Brain-Tool flux_generate um model + raw erweitert, System-Prompt-Block mit den aktuellen Diagnostic-Settings + Whisper-Toleranz-Hinweis. Kein eager Bootstrap-Load: flux-bridge wartet auf config oder ersten Request. Bei erstem HF-Download zeigt Banner "laedt erstmalig runter" mit Pfeil-Icon, Toast in der App wenn fertig. FLUX_MODEL aus der .env entfernt (Steuerung jetzt komplett ueber Diagnostic). HF_TOKEN-Kommentar erklaert warum trotz lokaler Inference noetig (HF Gate-Mechanismus fuer FLUX.1-dev). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -290,7 +290,7 @@ const ChatScreen: React.FC = () => {
|
|||||||
// Stream zumuellen. Eigentlich seltener Fall, aber billig zu pruefen.
|
// Stream zumuellen. Eigentlich seltener Fall, aber billig zu pruefen.
|
||||||
const lastThoughtKeyRef = useRef<string>('');
|
const lastThoughtKeyRef = useRef<string>('');
|
||||||
// Service-Status (Gamebox: F5-TTS / Whisper Lade-Status) + Banner-Sichtbarkeit
|
// Service-Status (Gamebox: F5-TTS / Whisper Lade-Status) + Banner-Sichtbarkeit
|
||||||
const [serviceStatus, setServiceStatus] = useState<Record<string, {state: string, model?: string, loadSeconds?: number, error?: string}>>({});
|
const [serviceStatus, setServiceStatus] = useState<Record<string, {state: string, model?: string, loadSeconds?: number, error?: string, downloading?: boolean, freshlyDownloaded?: boolean}>>({});
|
||||||
const [serviceBannerDismissed, setServiceBannerDismissed] = useState(false);
|
const [serviceBannerDismissed, setServiceBannerDismissed] = useState(false);
|
||||||
// Gerätelokale TTS-Config: globaler Toggle (aus Settings) + temporäres Muten (Mund-Button)
|
// Gerätelokale TTS-Config: globaler Toggle (aus Settings) + temporäres Muten (Mund-Button)
|
||||||
const [ttsDeviceEnabled, setTtsDeviceEnabled] = useState(true);
|
const [ttsDeviceEnabled, setTtsDeviceEnabled] = useState(true);
|
||||||
@@ -1171,22 +1171,39 @@ const ChatScreen: React.FC = () => {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Gamebox-Bridges (f5tts/whisper) melden Lade-Status — Banner oben
|
// Gamebox-Bridges (f5tts/whisper/flux) melden Lade-Status — Banner oben.
|
||||||
|
// Toast bei Download-Ende: erstmaliger HF-Download (mehrere GB) → User
|
||||||
|
// soll wissen dass er Bilder/Stimmen jetzt nutzen kann ohne in den
|
||||||
|
// Banner gucken zu muessen.
|
||||||
if (message.type === ('service_status' as any)) {
|
if (message.type === ('service_status' as any)) {
|
||||||
const p = message.payload as any;
|
const p = message.payload as any;
|
||||||
const svc = (p?.service as string) || '';
|
const svc = (p?.service as string) || '';
|
||||||
if (!svc) return;
|
if (!svc) return;
|
||||||
|
const newState = (p?.state as string) || 'unknown';
|
||||||
|
const freshlyDownloaded = p?.freshlyDownloaded === true;
|
||||||
setServiceStatus(prev => ({
|
setServiceStatus(prev => ({
|
||||||
...prev,
|
...prev,
|
||||||
[svc]: {
|
[svc]: {
|
||||||
state: (p?.state as string) || 'unknown',
|
state: newState,
|
||||||
model: p?.model as string | undefined,
|
model: p?.model as string | undefined,
|
||||||
loadSeconds: p?.loadSeconds as number | undefined,
|
loadSeconds: p?.loadSeconds as number | undefined,
|
||||||
error: p?.error as string | undefined,
|
error: p?.error as string | undefined,
|
||||||
|
downloading: p?.downloading === true,
|
||||||
|
freshlyDownloaded,
|
||||||
},
|
},
|
||||||
}));
|
}));
|
||||||
// Bei neuer Loading-Phase Banner wieder aktivieren
|
// Bei neuer Loading-Phase Banner wieder aktivieren
|
||||||
if (p?.state === 'loading') setServiceBannerDismissed(false);
|
if (newState === 'loading') setServiceBannerDismissed(false);
|
||||||
|
// Download-Fertig-Toast: Bridge setzt freshlyDownloaded=true bei dem
|
||||||
|
// 'ready'-Broadcast direkt nach einem Cache-Miss-Load. Ein einziger
|
||||||
|
// Toast pro Modell-Download, kein State-Tracking auf App-Seite noetig.
|
||||||
|
if (newState === 'ready' && freshlyDownloaded) {
|
||||||
|
const niceName = svc === 'flux' ? 'FLUX' : svc === 'f5tts' ? 'F5-TTS' : svc === 'whisper' ? 'Whisper' : svc;
|
||||||
|
const model = p?.model ? ` (${p.model})` : '';
|
||||||
|
try {
|
||||||
|
ToastAndroid.show(`${niceName}-Modell heruntergeladen${model} — jetzt einsatzbereit`, ToastAndroid.LONG);
|
||||||
|
} catch {}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -2196,7 +2213,7 @@ const ChatScreen: React.FC = () => {
|
|||||||
const allReady = !anyLoading && !anyError && entries.every(([, v]) => v.state === 'ready');
|
const allReady = !anyLoading && !anyError && entries.every(([, v]) => v.state === 'ready');
|
||||||
const bg = anyError ? '#3A1F1F' : anyLoading ? '#3A331F' : '#1F3A2A';
|
const bg = anyError ? '#3A1F1F' : anyLoading ? '#3A331F' : '#1F3A2A';
|
||||||
const border = anyError ? '#FF3B30' : anyLoading ? '#FFD60A' : '#34C759';
|
const border = anyError ? '#FF3B30' : anyLoading ? '#FFD60A' : '#34C759';
|
||||||
const labels: Record<string, string> = { f5tts: 'F5-TTS', whisper: 'Whisper STT' };
|
const labels: Record<string, string> = { f5tts: 'F5-TTS', whisper: 'Whisper STT', flux: 'FLUX Image-Gen' };
|
||||||
return (
|
return (
|
||||||
<TouchableOpacity
|
<TouchableOpacity
|
||||||
activeOpacity={allReady ? 0.6 : 1.0}
|
activeOpacity={allReady ? 0.6 : 1.0}
|
||||||
@@ -2206,11 +2223,16 @@ const ChatScreen: React.FC = () => {
|
|||||||
{entries.map(([svc, info]) => {
|
{entries.map(([svc, info]) => {
|
||||||
let icon = '\u23F3', text = '';
|
let icon = '\u23F3', text = '';
|
||||||
if (info.state === 'loading') {
|
if (info.state === 'loading') {
|
||||||
text = `${labels[svc] || svc}: laedt${info.model ? ' ' + info.model : ''}...`;
|
icon = info.downloading ? '\u2B07' : '\u23F3'; // \u2B07 vs \u23F3
|
||||||
|
const action = info.downloading
|
||||||
|
? 'laedt erstmalig runter (mehrere GB, kann dauern)'
|
||||||
|
: 'laedt';
|
||||||
|
text = `${labels[svc] || svc}: ${action}${info.model ? ' ' + info.model : ''}...`;
|
||||||
} else if (info.state === 'ready') {
|
} else if (info.state === 'ready') {
|
||||||
icon = '\u2705';
|
icon = info.freshlyDownloaded ? '\uD83C\uDF89' : '\u2705'; // \uD83C\uDF89 vs \u2705
|
||||||
const sec = info.loadSeconds ? ` (${info.loadSeconds.toFixed(1)}s)` : '';
|
const sec = info.loadSeconds ? ` (${info.loadSeconds.toFixed(1)}s)` : '';
|
||||||
text = `${labels[svc] || svc}: bereit${info.model ? ' ' + info.model : ''}${sec}`;
|
const dl = info.freshlyDownloaded ? ' \u2014 Download fertig!' : '';
|
||||||
|
text = `${labels[svc] || svc}: bereit${info.model ? ' ' + info.model : ''}${sec}${dl}`;
|
||||||
} else if (info.state === 'error') {
|
} else if (info.state === 'error') {
|
||||||
icon = '\u274C';
|
icon = '\u274C';
|
||||||
text = `${labels[svc] || svc}: Fehler ${info.error || ''}`;
|
text = `${labels[svc] || svc}: Fehler ${info.error || ''}`;
|
||||||
|
|||||||
+70
-8
@@ -36,6 +36,27 @@ BRIDGE_URL = os.environ.get("BRIDGE_URL", "http://aria-bridge:8090")
|
|||||||
# laedt die flux-bridge zudem ~24 GB Modell von HF (~5-10 min). Brain wartet
|
# laedt die flux-bridge zudem ~24 GB Modell von HF (~5-10 min). Brain wartet
|
||||||
# synchron — Stefan kuendigt es vorher an wenn er weiss dass es feuert.
|
# synchron — Stefan kuendigt es vorher an wenn er weiss dass es feuert.
|
||||||
FLUX_HTTP_TIMEOUT_SEC = 1200
|
FLUX_HTTP_TIMEOUT_SEC = 1200
|
||||||
|
# Diagnostic-Settings fuer FLUX (Default-Modell + User-Keywords) liegen im
|
||||||
|
# selben File wie F5-TTS/Whisper Config — von der aria-bridge geschrieben.
|
||||||
|
VOICE_CONFIG_PATH = "/shared/config/voice_config.json"
|
||||||
|
|
||||||
|
|
||||||
|
def _load_flux_config() -> dict:
|
||||||
|
"""Liest fluxXxx-Felder aus der Voice-Config. Default-Werte wenn nichts
|
||||||
|
persistiert ist — Stefan hat in Diagnostic vielleicht noch nichts gesetzt."""
|
||||||
|
try:
|
||||||
|
with open(VOICE_CONFIG_PATH, encoding="utf-8") as f:
|
||||||
|
data = json.load(f) or {}
|
||||||
|
except (FileNotFoundError, json.JSONDecodeError):
|
||||||
|
data = {}
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug("Voice-Config lesen fehlgeschlagen: %s", exc)
|
||||||
|
data = {}
|
||||||
|
return {
|
||||||
|
"fluxDefaultModel": data.get("fluxDefaultModel", "dev"),
|
||||||
|
"fluxKeywordRaw": data.get("fluxKeywordRaw", "flux"),
|
||||||
|
"fluxKeywordSwitch": data.get("fluxKeywordSwitch", "fix"),
|
||||||
|
}
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -229,10 +250,10 @@ META_TOOLS = [
|
|||||||
"function": {
|
"function": {
|
||||||
"name": "flux_generate",
|
"name": "flux_generate",
|
||||||
"description": (
|
"description": (
|
||||||
"Generiere ein Bild aus einem Text-Prompt via FLUX.1-dev auf der Gamebox-"
|
"Generiere ein Bild aus einem Text-Prompt via FLUX auf der Gamebox-GPU. "
|
||||||
"GPU. Brauchbar fuer 'mal mir ein X', 'wie sieht ein Y aus?', "
|
"Brauchbar fuer 'mal mir ein X', 'wie sieht ein Y aus?', Mockups, "
|
||||||
"Mockups, Konzept-Skizzen. Render dauert 20-90s — Stefan kuendigt "
|
"Konzept-Skizzen, Memes. Render dauert 20-90s — kuendige es Stefan "
|
||||||
"es an wenn er weiss dass es laeuft.\n\n"
|
"kurz an, dann ist er nicht ueberrascht.\n\n"
|
||||||
"**Schreibe deine Antwort wie immer auf Deutsch**, und referenziere das "
|
"**Schreibe deine Antwort wie immer auf Deutsch**, und referenziere das "
|
||||||
"fertige Bild MIT dem `[FILE: ...]`-Marker, GENAU im Pfad-Format das das "
|
"fertige Bild MIT dem `[FILE: ...]`-Marker, GENAU im Pfad-Format das das "
|
||||||
"Tool zurueckgibt. Beispiel:\n"
|
"Tool zurueckgibt. Beispiel:\n"
|
||||||
@@ -241,10 +262,26 @@ META_TOOLS = [
|
|||||||
"inline als Anhang gezeigt.\n\n"
|
"inline als Anhang gezeigt.\n\n"
|
||||||
"**Prompt-Sprache: bevorzugt Englisch.** FLUX versteht zwar Deutsch, "
|
"**Prompt-Sprache: bevorzugt Englisch.** FLUX versteht zwar Deutsch, "
|
||||||
"liefert aber mit englischen Prompts deutlich konsistentere Ergebnisse. "
|
"liefert aber mit englischen Prompts deutlich konsistentere Ergebnisse. "
|
||||||
"Uebersetze Stefans deutsche Beschreibung selbststaendig.\n\n"
|
"Uebersetze Stefans deutsche Beschreibung selbststaendig — AUSSER `raw=true`.\n\n"
|
||||||
|
"**Modus `raw=true` (Pipe-Modus):** Wenn Stefan das Raw-Keyword aus dem "
|
||||||
|
"FLUX-Settings-Block im System-Prompt nutzt (typischerweise `flux`), "
|
||||||
|
"leite seinen Text 1:1 als prompt durch — KEIN Uebersetzen, KEIN "
|
||||||
|
"Beautify, KEINE Qualitaets-Keywords. Stefan formuliert dann selbst und "
|
||||||
|
"der Prompt geht roh an FLUX. Brauchbar wenn er den vollen Output ohne "
|
||||||
|
"ARIAs Filter haben will.\n\n"
|
||||||
|
"**Modell-Wahl (`model`):** \n"
|
||||||
|
"- `default` (oder weglassen): das in den Diagnostic-Settings eingestellte "
|
||||||
|
"Default-Modell (steht im FLUX-Block im System-Prompt).\n"
|
||||||
|
"- `dev`: hochqualitatives FLUX.1-dev, 20-90s, ~28 steps.\n"
|
||||||
|
"- `schnell`: FLUX.1-schnell, 4-step distillation, ~5-15s.\n"
|
||||||
|
"Wenn Stefan das Switch-Keyword (steht ebenfalls im FLUX-Block) im Prompt "
|
||||||
|
"verwendet → setze `model` auf das ANDERE Modell als das Default. Bei "
|
||||||
|
"'in hoher Qualitaet'/'detailliert' → `dev`. Bei 'schnell mal'/'fix' → `schnell`.\n\n"
|
||||||
|
"Modell-Switch kostet einmalig 15-30s (Pipeline-Reload aus HF-Cache). "
|
||||||
|
"Stefan sieht den Status im Diagnostic-Banner.\n\n"
|
||||||
"Caps:\n"
|
"Caps:\n"
|
||||||
"- `width`/`height`: 256-1536, wird auf Vielfache von 64 gesnappt (Default 1024)\n"
|
"- `width`/`height`: 256-1536, wird auf Vielfache von 64 gesnappt (Default 1024)\n"
|
||||||
"- `steps`: 1-50 (Default 28 fuer FLUX.1-dev, 4 fuer schnell)\n"
|
"- `steps`: 1-50 (Default 28 fuer dev, 4 fuer schnell)\n"
|
||||||
"- `guidance_scale`: 0.0-20.0 (Default 3.5)\n"
|
"- `guidance_scale`: 0.0-20.0 (Default 3.5)\n"
|
||||||
"- `seed`: optional, gleicher seed + gleicher prompt → gleiches Bild"
|
"- `seed`: optional, gleicher seed + gleicher prompt → gleiches Bild"
|
||||||
),
|
),
|
||||||
@@ -253,7 +290,22 @@ META_TOOLS = [
|
|||||||
"properties": {
|
"properties": {
|
||||||
"prompt": {
|
"prompt": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "Englischer Bild-Prompt. So konkret wie moeglich (Motiv, Stil, Licht, Kamera).",
|
"description": (
|
||||||
|
"Bei raw=false (Default): englischer Bild-Prompt, von dir aus Stefans Worten gebaut, "
|
||||||
|
"mit Stil/Licht/Kamera-Stichworten. Bei raw=true: Stefans Text 1:1 ohne Aenderung."
|
||||||
|
),
|
||||||
|
},
|
||||||
|
"raw": {
|
||||||
|
"type": "boolean",
|
||||||
|
"description": (
|
||||||
|
"true = Pipe-Modus, kein Rewriting. Setzen wenn Stefan das Raw-Keyword "
|
||||||
|
"(siehe FLUX-Block im System-Prompt) am Anfang seiner Nachricht verwendet."
|
||||||
|
),
|
||||||
|
},
|
||||||
|
"model": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["default", "dev", "schnell"],
|
||||||
|
"description": "Default-Modell oder explizit dev/schnell. Default = Diagnostic-Setting.",
|
||||||
},
|
},
|
||||||
"width": {"type": "integer", "description": "Breite in px (Default 1024, max 1536)"},
|
"width": {"type": "integer", "description": "Breite in px (Default 1024, max 1536)"},
|
||||||
"height": {"type": "integer", "description": "Hoehe in px (Default 1024, max 1536)"},
|
"height": {"type": "integer", "description": "Hoehe in px (Default 1024, max 1536)"},
|
||||||
@@ -487,10 +539,12 @@ class Agent:
|
|||||||
condition_funcs = watcher_mod.describe_functions()
|
condition_funcs = watcher_mod.describe_functions()
|
||||||
|
|
||||||
# 5. System-Prompt + Window-Messages
|
# 5. System-Prompt + Window-Messages
|
||||||
|
flux_config = _load_flux_config()
|
||||||
system_prompt = build_system_prompt(hot, cold, skills=all_skills,
|
system_prompt = build_system_prompt(hot, cold, skills=all_skills,
|
||||||
triggers=all_triggers,
|
triggers=all_triggers,
|
||||||
condition_vars=condition_vars,
|
condition_vars=condition_vars,
|
||||||
condition_funcs=condition_funcs)
|
condition_funcs=condition_funcs,
|
||||||
|
flux_config=flux_config)
|
||||||
messages = [ProxyMessage(role="system", content=system_prompt)]
|
messages = [ProxyMessage(role="system", content=system_prompt)]
|
||||||
for t in self.conversation.window():
|
for t in self.conversation.window():
|
||||||
messages.append(ProxyMessage(role=t.role, content=t.content))
|
messages.append(ProxyMessage(role=t.role, content=t.content))
|
||||||
@@ -673,6 +727,14 @@ class Agent:
|
|||||||
req["guidance_scale"] = float(arguments["guidance_scale"])
|
req["guidance_scale"] = float(arguments["guidance_scale"])
|
||||||
except (TypeError, ValueError):
|
except (TypeError, ValueError):
|
||||||
pass
|
pass
|
||||||
|
# Modell-Wahl: 'default' (oder weglassen) → flux-bridge nimmt Diagnostic-Default.
|
||||||
|
# 'dev' / 'schnell' → expliziter Override.
|
||||||
|
model_arg = (arguments.get("model") or "").strip().lower()
|
||||||
|
if model_arg in ("dev", "schnell"):
|
||||||
|
req["model"] = model_arg
|
||||||
|
# `raw` ist Brain-Domain (kein Rewriting des prompt) und wird hier
|
||||||
|
# nicht durchgereicht — der prompt enthaelt bei raw=true bereits
|
||||||
|
# Stefans Originaltext.
|
||||||
try:
|
try:
|
||||||
body = json.dumps(req).encode("utf-8")
|
body = json.dumps(req).encode("utf-8")
|
||||||
http_req = urllib.request.Request(
|
http_req = urllib.request.Request(
|
||||||
|
|||||||
+36
-1
@@ -240,6 +240,37 @@ def build_triggers_section(
|
|||||||
return "\n".join(lines)
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def build_flux_section(flux_config: dict) -> str:
|
||||||
|
"""Block fuer den System-Prompt: aktuelle Diagnostic-Settings fuer
|
||||||
|
Bildgenerierung (Default-Modell + User-konfigurierbare Keywords).
|
||||||
|
|
||||||
|
flux_config kommt aus /shared/config/voice_config.json:
|
||||||
|
fluxDefaultModel: "dev" | "schnell" (Default "dev")
|
||||||
|
fluxKeywordRaw: z.B. "flux" (Pipe-Modus, kein Rewriting)
|
||||||
|
fluxKeywordSwitch:z.B. "fix" (anderes Modell als Default)
|
||||||
|
"""
|
||||||
|
default_model = (flux_config or {}).get("fluxDefaultModel", "dev")
|
||||||
|
kw_raw = (flux_config or {}).get("fluxKeywordRaw", "flux")
|
||||||
|
kw_switch = (flux_config or {}).get("fluxKeywordSwitch", "fix")
|
||||||
|
other_model = "schnell" if default_model == "dev" else "dev"
|
||||||
|
lines = [
|
||||||
|
"## FLUX Bildgenerierung",
|
||||||
|
f"- Default-Modell: `{default_model}` (alternativ: `{other_model}`).",
|
||||||
|
f"- Raw-Keyword: `{kw_raw}` — wenn Stefans Nachricht damit beginnt "
|
||||||
|
f"oder das Wort als ersten echten Wortteil enthaelt, ruf "
|
||||||
|
f"`flux_generate(..., raw=true)` und leite seinen Text 1:1 als prompt "
|
||||||
|
f"durch. KEIN Uebersetzen, KEIN Beautify, KEINE Stil-Adds.",
|
||||||
|
f"- Switch-Keyword: `{kw_switch}` — taucht's in der Nachricht auf, "
|
||||||
|
f"setze `model=\"{other_model}\"` (das ANDERE Modell als das Default).",
|
||||||
|
"- Natuerliche Sprache funktioniert auch: 'mal eben fix' / 'schnell' → schnell, "
|
||||||
|
"'in hoher Qualitaet' / 'detailliert' → dev.",
|
||||||
|
"- Whisper-Erkennung des Raw-Keywords ist nicht perfekt — wenn Stefans "
|
||||||
|
"Sprachnachricht z.B. mit 'fluks', 'flocks', 'fluxx' anfaengt, behandle "
|
||||||
|
"das auch als Raw-Keyword.",
|
||||||
|
]
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
def build_system_prompt(
|
def build_system_prompt(
|
||||||
pinned: List[MemoryPoint],
|
pinned: List[MemoryPoint],
|
||||||
cold: List[MemoryPoint] | None = None,
|
cold: List[MemoryPoint] | None = None,
|
||||||
@@ -247,8 +278,9 @@ def build_system_prompt(
|
|||||||
triggers: List[dict] | None = None,
|
triggers: List[dict] | None = None,
|
||||||
condition_vars: List[dict] | None = None,
|
condition_vars: List[dict] | None = None,
|
||||||
condition_funcs: List[dict] | None = None,
|
condition_funcs: List[dict] | None = None,
|
||||||
|
flux_config: dict | None = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Kompletter System-Prompt: Hot + Cold + Skills + Triggers."""
|
"""Kompletter System-Prompt: Hot + Cold + Skills + Triggers + FLUX."""
|
||||||
parts = [build_hot_memory_section(pinned), "", build_time_section()]
|
parts = [build_hot_memory_section(pinned), "", build_time_section()]
|
||||||
if skills:
|
if skills:
|
||||||
parts.append("")
|
parts.append("")
|
||||||
@@ -256,6 +288,9 @@ def build_system_prompt(
|
|||||||
if condition_vars:
|
if condition_vars:
|
||||||
parts.append("")
|
parts.append("")
|
||||||
parts.append(build_triggers_section(triggers or [], condition_vars, condition_funcs))
|
parts.append(build_triggers_section(triggers or [], condition_vars, condition_funcs))
|
||||||
|
if flux_config is not None:
|
||||||
|
parts.append("")
|
||||||
|
parts.append(build_flux_section(flux_config))
|
||||||
if cold:
|
if cold:
|
||||||
parts.append("")
|
parts.append("")
|
||||||
parts.append(build_cold_memory_section(cold))
|
parts.append(build_cold_memory_section(cold))
|
||||||
|
|||||||
+29
-6
@@ -487,6 +487,7 @@ class ARIABridge:
|
|||||||
self.tts_enabled = True
|
self.tts_enabled = True
|
||||||
self.xtts_voice = ""
|
self.xtts_voice = ""
|
||||||
self._f5tts_config: dict = {}
|
self._f5tts_config: dict = {}
|
||||||
|
self._flux_config: dict = {}
|
||||||
vc: dict = {}
|
vc: dict = {}
|
||||||
# Gespeicherte Voice-Config laden
|
# Gespeicherte Voice-Config laden
|
||||||
try:
|
try:
|
||||||
@@ -503,9 +504,14 @@ class ARIABridge:
|
|||||||
"f5ttsCfgStrength", "f5ttsNfeStep"):
|
"f5ttsCfgStrength", "f5ttsNfeStep"):
|
||||||
if k in vc:
|
if k in vc:
|
||||||
self._f5tts_config[k] = vc[k]
|
self._f5tts_config[k] = vc[k]
|
||||||
logger.info("Voice-Config geladen: tts=%s voice=%s f5tts=%s",
|
# FLUX-Felder (Default-Modell + Keywords) gleicher Mechanismus
|
||||||
|
for k in ("fluxDefaultModel", "fluxKeywordRaw", "fluxKeywordSwitch"):
|
||||||
|
if k in vc:
|
||||||
|
self._flux_config[k] = vc[k]
|
||||||
|
logger.info("Voice-Config geladen: tts=%s voice=%s f5tts=%s flux=%s",
|
||||||
self.tts_enabled, self.xtts_voice or "default",
|
self.tts_enabled, self.xtts_voice or "default",
|
||||||
self._f5tts_config or "defaults")
|
self._f5tts_config or "defaults",
|
||||||
|
self._flux_config or "defaults")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning("Voice-Config laden fehlgeschlagen: %s", e)
|
logger.warning("Voice-Config laden fehlgeschlagen: %s", e)
|
||||||
# Whisper-Modell: Config hat Vorrang, dann env/Default (medium)
|
# Whisper-Modell: Config hat Vorrang, dann env/Default (medium)
|
||||||
@@ -1238,6 +1244,7 @@ class ARIABridge:
|
|||||||
"whisperModel": self.stt_engine.model_size,
|
"whisperModel": self.stt_engine.model_size,
|
||||||
}
|
}
|
||||||
payload.update(getattr(self, "_f5tts_config", {}) or {})
|
payload.update(getattr(self, "_f5tts_config", {}) or {})
|
||||||
|
payload.update(getattr(self, "_flux_config", {}) or {})
|
||||||
await self._send_to_rvs({
|
await self._send_to_rvs({
|
||||||
"type": "config",
|
"type": "config",
|
||||||
"payload": payload,
|
"payload": payload,
|
||||||
@@ -1776,6 +1783,15 @@ class ARIABridge:
|
|||||||
self._f5tts_config = {}
|
self._f5tts_config = {}
|
||||||
self._f5tts_config[k] = payload[k]
|
self._f5tts_config[k] = payload[k]
|
||||||
changed = True
|
changed = True
|
||||||
|
# FLUX-Felder: gleiche Logik wie F5-TTS. flux-bridge applied
|
||||||
|
# fluxDefaultModel selbst (Pipeline-Swap). Keywords nutzt Brain
|
||||||
|
# via /shared/config/voice_config.json.
|
||||||
|
for k in ("fluxDefaultModel", "fluxKeywordRaw", "fluxKeywordSwitch"):
|
||||||
|
if k in payload:
|
||||||
|
if not hasattr(self, "_flux_config"):
|
||||||
|
self._flux_config = {}
|
||||||
|
self._flux_config[k] = payload[k]
|
||||||
|
changed = True
|
||||||
# Persistent speichern in Shared Volume
|
# Persistent speichern in Shared Volume
|
||||||
if changed:
|
if changed:
|
||||||
try:
|
try:
|
||||||
@@ -1786,6 +1802,7 @@ class ARIABridge:
|
|||||||
"whisperModel": self.stt_engine.model_size,
|
"whisperModel": self.stt_engine.model_size,
|
||||||
}
|
}
|
||||||
config_data.update(getattr(self, "_f5tts_config", {}))
|
config_data.update(getattr(self, "_f5tts_config", {}))
|
||||||
|
config_data.update(getattr(self, "_flux_config", {}))
|
||||||
with open("/shared/config/voice_config.json", "w") as f:
|
with open("/shared/config/voice_config.json", "w") as f:
|
||||||
json.dump(config_data, f, indent=2)
|
json.dump(config_data, f, indent=2)
|
||||||
logger.info("[rvs] Voice-Config gespeichert: %s", config_data)
|
logger.info("[rvs] Voice-Config gespeichert: %s", config_data)
|
||||||
@@ -2553,7 +2570,7 @@ class ARIABridge:
|
|||||||
|
|
||||||
async def _flux_generate(self, prompt: str, width: int, height: int,
|
async def _flux_generate(self, prompt: str, width: int, height: int,
|
||||||
steps: Optional[int], guidance: Optional[float],
|
steps: Optional[int], guidance: Optional[float],
|
||||||
seed: Optional[int]) -> dict:
|
seed: Optional[int], model: Optional[str] = None) -> dict:
|
||||||
"""Schickt einen flux_request an die flux-bridge, wartet auf das fertige
|
"""Schickt einen flux_request an die flux-bridge, wartet auf das fertige
|
||||||
PNG, speichert es nach /shared/uploads/aria_generated_<ts>.png.
|
PNG, speichert es nach /shared/uploads/aria_generated_<ts>.png.
|
||||||
|
|
||||||
@@ -2578,9 +2595,13 @@ class ARIABridge:
|
|||||||
req_payload["guidance_scale"] = guidance
|
req_payload["guidance_scale"] = guidance
|
||||||
if seed is not None:
|
if seed is not None:
|
||||||
req_payload["seed"] = seed
|
req_payload["seed"] = seed
|
||||||
|
if model:
|
||||||
|
# 'dev' | 'schnell' — flux-bridge mappt das auf HF-IDs.
|
||||||
|
# Ohne Angabe nimmt die flux-bridge ihren konfigurierten Default.
|
||||||
|
req_payload["model"] = model
|
||||||
|
|
||||||
logger.info("[rvs] flux_request → flux-bridge (id=%s, %dx%d, steps=%s, prompt=%r)",
|
logger.info("[rvs] flux_request → flux-bridge (id=%s, %dx%d, steps=%s, model=%s, prompt=%r)",
|
||||||
request_id[:8], width, height, steps, prompt[:60])
|
request_id[:8], width, height, steps, model or "default", prompt[:60])
|
||||||
ok = await self._send_to_rvs({
|
ok = await self._send_to_rvs({
|
||||||
"type": "flux_request",
|
"type": "flux_request",
|
||||||
"payload": req_payload,
|
"payload": req_payload,
|
||||||
@@ -2897,10 +2918,12 @@ class ARIABridge:
|
|||||||
steps = int(steps_raw) if isinstance(steps_raw, (int, float)) else None
|
steps = int(steps_raw) if isinstance(steps_raw, (int, float)) else None
|
||||||
guidance = float(guidance_raw) if isinstance(guidance_raw, (int, float)) else None
|
guidance = float(guidance_raw) if isinstance(guidance_raw, (int, float)) else None
|
||||||
seed = int(seed_raw) if isinstance(seed_raw, (int, float)) else None
|
seed = int(seed_raw) if isinstance(seed_raw, (int, float)) else None
|
||||||
|
model_raw = data.get("model")
|
||||||
|
model = model_raw.strip() if isinstance(model_raw, str) and model_raw.strip() in ("dev", "schnell") else None
|
||||||
|
|
||||||
result = await self._flux_generate(
|
result = await self._flux_generate(
|
||||||
prompt=prompt, width=width, height=height,
|
prompt=prompt, width=width, height=height,
|
||||||
steps=steps, guidance=guidance, seed=seed,
|
steps=steps, guidance=guidance, seed=seed, model=model,
|
||||||
)
|
)
|
||||||
status = 200 if result.get("ok") else 502
|
status = 200 if result.get("ok") else 502
|
||||||
await _send_response(writer, status, result)
|
await _send_response(writer, status, result)
|
||||||
|
|||||||
+59
-4
@@ -583,6 +583,48 @@
|
|||||||
</div>
|
</div>
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
|
<!-- FLUX Bildgenerierung -->
|
||||||
|
<details style="background:#0D0D1A;border:1px solid #2A2A3E;border-radius:6px;padding:10px 12px;margin-bottom:12px;">
|
||||||
|
<summary style="color:#8888AA;font-size:12px;cursor:pointer;">FLUX Bildgenerierung (Modell + Keywords)</summary>
|
||||||
|
<div style="margin-top:10px;display:flex;flex-direction:column;gap:8px;">
|
||||||
|
<div style="color:#8888AA;font-size:11px;">
|
||||||
|
Steuerung der Image-Generation (flux-bridge auf der Gamebox).
|
||||||
|
Default-Modell wird via RVS gepusht — Wechsel triggert Pipeline-Reload (15-30s).
|
||||||
|
Keywords nutzt ARIAs Brain im System-Prompt.
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<label style="color:#8888AA;font-size:12px;">Default-Modell:</label>
|
||||||
|
<select id="diag-flux-default-model" onchange="sendVoiceConfig()"
|
||||||
|
style="background:#1E1E2E;color:#fff;border:1px solid #2A2A3E;border-radius:6px;padding:6px 10px;font-size:13px;">
|
||||||
|
<option value="dev">FLUX.1-dev (hoechste Qualitaet, 20-90s)</option>
|
||||||
|
<option value="schnell">FLUX.1-schnell (4-step, 5-15s)</option>
|
||||||
|
</select>
|
||||||
|
|
||||||
|
<label style="color:#8888AA;font-size:12px;">
|
||||||
|
Raw-Keyword — Pipe-Modus, ARIA leitet den Prompt 1:1 durch (kein Rewriting):
|
||||||
|
</label>
|
||||||
|
<input type="text" id="diag-flux-keyword-raw"
|
||||||
|
placeholder="flux"
|
||||||
|
style="background:#1E1E2E;color:#fff;border:1px solid #2A2A3E;border-radius:6px;padding:6px 10px;font-size:13px;">
|
||||||
|
|
||||||
|
<label style="color:#8888AA;font-size:12px;">
|
||||||
|
Switch-Keyword — zwingt das ANDERE Modell als das Default fuer diesen Request:
|
||||||
|
</label>
|
||||||
|
<input type="text" id="diag-flux-keyword-switch"
|
||||||
|
placeholder="fix"
|
||||||
|
style="background:#1E1E2E;color:#fff;border:1px solid #2A2A3E;border-radius:6px;padding:6px 10px;font-size:13px;">
|
||||||
|
|
||||||
|
<div style="display:flex;gap:8px;align-items:center;margin-top:6px;">
|
||||||
|
<button class="btn primary" onclick="sendVoiceConfig()" style="padding:6px 14px;font-size:12px;">
|
||||||
|
Anwenden
|
||||||
|
</button>
|
||||||
|
<div style="color:#666680;font-size:10px;">
|
||||||
|
Beide Modelle = volle Qualitaet, schnell ist nur ein 4-Step-Distillat (Apache-2.0).
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</details>
|
||||||
|
|
||||||
<!-- Voice Cloning -->
|
<!-- Voice Cloning -->
|
||||||
<div style="background:#1E1E2E;border-radius:8px;padding:12px;margin-top:8px;">
|
<div style="background:#1E1E2E;border-radius:8px;padding:12px;margin-top:8px;">
|
||||||
<div style="color:#0096FF;font-size:13px;font-weight:600;margin-bottom:8px;">Stimme klonen</div>
|
<div style="color:#0096FF;font-size:13px;font-weight:600;margin-bottom:8px;">Stimme klonen</div>
|
||||||
@@ -1339,6 +1381,10 @@
|
|||||||
setIfPresent('diag-f5tts-vocab', msg.f5ttsVocabFile);
|
setIfPresent('diag-f5tts-vocab', msg.f5ttsVocabFile);
|
||||||
setIfPresent('diag-f5tts-cfg', msg.f5ttsCfgStrength);
|
setIfPresent('diag-f5tts-cfg', msg.f5ttsCfgStrength);
|
||||||
setIfPresent('diag-f5tts-nfe', msg.f5ttsNfeStep);
|
setIfPresent('diag-f5tts-nfe', msg.f5ttsNfeStep);
|
||||||
|
// FLUX-Settings wiederherstellen
|
||||||
|
setIfPresent('diag-flux-default-model', msg.fluxDefaultModel);
|
||||||
|
setIfPresent('diag-flux-keyword-raw', msg.fluxKeywordRaw);
|
||||||
|
setIfPresent('diag-flux-keyword-switch', msg.fluxKeywordSwitch);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2129,12 +2175,17 @@
|
|||||||
row.style.cssText = 'display:flex;align-items:center;gap:6px;';
|
row.style.cssText = 'display:flex;align-items:center;gap:6px;';
|
||||||
let dot = '⚫', color = '#666680', text = '';
|
let dot = '⚫', color = '#666680', text = '';
|
||||||
if (info.state === 'loading') {
|
if (info.state === 'loading') {
|
||||||
dot = '⏳'; color = '#FFD60A'; anyLoading = true;
|
dot = info.downloading ? '⬇' : '⏳';
|
||||||
text = `${labels[s] || s}: laedt${info.model ? ' ' + info.model : ''}...`;
|
color = '#FFD60A'; anyLoading = true;
|
||||||
|
const action = info.downloading
|
||||||
|
? 'laedt erstmalig runter (mehrere GB, kann dauern)'
|
||||||
|
: 'laedt';
|
||||||
|
text = `${labels[s] || s}: ${action}${info.model ? ' ' + info.model : ''}...`;
|
||||||
} else if (info.state === 'ready') {
|
} else if (info.state === 'ready') {
|
||||||
dot = '✅'; color = '#34C759';
|
dot = info.freshlyDownloaded ? '🎉' : '✅'; color = '#34C759';
|
||||||
const sec = info.loadSeconds ? ` (${info.loadSeconds.toFixed(1)}s)` : '';
|
const sec = info.loadSeconds ? ` (${info.loadSeconds.toFixed(1)}s)` : '';
|
||||||
text = `${labels[s] || s}: bereit${info.model ? ' ' + info.model : ''}${sec}`;
|
const downloadedHint = info.freshlyDownloaded ? ' — Download fertig!' : '';
|
||||||
|
text = `${labels[s] || s}: bereit${info.model ? ' ' + info.model : ''}${sec}${downloadedHint}`;
|
||||||
} else if (info.state === 'error') {
|
} else if (info.state === 'error') {
|
||||||
dot = '❌'; color = '#FF3B30'; anyError = true;
|
dot = '❌'; color = '#FF3B30'; anyError = true;
|
||||||
text = `${labels[s] || s}: Fehler ${info.error || ''}`;
|
text = `${labels[s] || s}: Fehler ${info.error || ''}`;
|
||||||
@@ -2649,11 +2700,15 @@
|
|||||||
const f5ttsNfeRaw = document.getElementById('diag-f5tts-nfe')?.value || '';
|
const f5ttsNfeRaw = document.getElementById('diag-f5tts-nfe')?.value || '';
|
||||||
const f5ttsCfgStrength = f5ttsCfgRaw ? parseFloat(f5ttsCfgRaw) : undefined;
|
const f5ttsCfgStrength = f5ttsCfgRaw ? parseFloat(f5ttsCfgRaw) : undefined;
|
||||||
const f5ttsNfeStep = f5ttsNfeRaw ? parseInt(f5ttsNfeRaw, 10) : undefined;
|
const f5ttsNfeStep = f5ttsNfeRaw ? parseInt(f5ttsNfeRaw, 10) : undefined;
|
||||||
|
const fluxDefaultModel = document.getElementById('diag-flux-default-model')?.value || undefined;
|
||||||
|
const fluxKeywordRaw = document.getElementById('diag-flux-keyword-raw')?.value;
|
||||||
|
const fluxKeywordSwitch = document.getElementById('diag-flux-keyword-switch')?.value;
|
||||||
send({
|
send({
|
||||||
action: 'send_voice_config',
|
action: 'send_voice_config',
|
||||||
ttsEnabled, xttsVoice, whisperModel,
|
ttsEnabled, xttsVoice, whisperModel,
|
||||||
f5ttsModel, f5ttsCkptFile, f5ttsVocabFile,
|
f5ttsModel, f5ttsCkptFile, f5ttsVocabFile,
|
||||||
f5ttsCfgStrength, f5ttsNfeStep,
|
f5ttsCfgStrength, f5ttsNfeStep,
|
||||||
|
fluxDefaultModel, fluxKeywordRaw, fluxKeywordSwitch,
|
||||||
});
|
});
|
||||||
const statusEl = document.getElementById('voice-status');
|
const statusEl = document.getElementById('voice-status');
|
||||||
if (statusEl && xttsVoice) {
|
if (statusEl && xttsVoice) {
|
||||||
|
|||||||
@@ -1945,6 +1945,18 @@ wss.on("connection", (ws) => {
|
|||||||
if (msg.f5ttsNfeStep !== undefined && !isNaN(msg.f5ttsNfeStep)) {
|
if (msg.f5ttsNfeStep !== undefined && !isNaN(msg.f5ttsNfeStep)) {
|
||||||
voiceConfig.f5ttsNfeStep = msg.f5ttsNfeStep;
|
voiceConfig.f5ttsNfeStep = msg.f5ttsNfeStep;
|
||||||
}
|
}
|
||||||
|
// FLUX-Settings (Default-Modell + User-Keywords). flux-bridge nutzt
|
||||||
|
// fluxDefaultModel zum Hot-Swap, Brain liest die Keywords direkt aus
|
||||||
|
// /shared/config/voice_config.json fuer den System-Prompt.
|
||||||
|
if (msg.fluxDefaultModel !== undefined) {
|
||||||
|
voiceConfig.fluxDefaultModel = (msg.fluxDefaultModel === "schnell") ? "schnell" : "dev";
|
||||||
|
}
|
||||||
|
if (msg.fluxKeywordRaw !== undefined) {
|
||||||
|
voiceConfig.fluxKeywordRaw = String(msg.fluxKeywordRaw || "").trim().toLowerCase() || "flux";
|
||||||
|
}
|
||||||
|
if (msg.fluxKeywordSwitch !== undefined) {
|
||||||
|
voiceConfig.fluxKeywordSwitch = String(msg.fluxKeywordSwitch || "").trim().toLowerCase() || "fix";
|
||||||
|
}
|
||||||
try {
|
try {
|
||||||
fs.mkdirSync("/shared/config", { recursive: true });
|
fs.mkdirSync("/shared/config", { recursive: true });
|
||||||
fs.writeFileSync("/shared/config/voice_config.json", JSON.stringify(voiceConfig, null, 2));
|
fs.writeFileSync("/shared/config/voice_config.json", JSON.stringify(voiceConfig, null, 2));
|
||||||
|
|||||||
+15
-8
@@ -10,16 +10,23 @@ RVS_TLS=true
|
|||||||
RVS_TLS_FALLBACK=true
|
RVS_TLS_FALLBACK=true
|
||||||
RVS_TOKEN=dein_token_hier
|
RVS_TOKEN=dein_token_hier
|
||||||
|
|
||||||
# HuggingFace-Token — FLUX.1-dev ist gated (auf
|
# HuggingFace-Token — NUR noetig wenn Du FLUX.1-dev nutzen willst.
|
||||||
# https://huggingface.co/black-forest-labs/FLUX.1-dev "Agree" klicken,
|
# FLUX.1-dev ist auf HuggingFace 'gated' (= Du musst dort auf der
|
||||||
# dann unter https://huggingface.co/settings/tokens ein "Read"-Token
|
# Modell-Seite einmalig "Agree" klicken und beim Download wird Dein
|
||||||
# erzeugen). Fuer FLUX.1-schnell nicht noetig.
|
# Login geprueft). Reine Lizenz-Sache — die Inference laeuft danach
|
||||||
|
# komplett lokal, kein Netz, keine Telemetrie.
|
||||||
|
#
|
||||||
|
# Wenn Du nur FLUX.1-schnell nutzt (Apache-2.0, kein Gate), kannst Du
|
||||||
|
# das Feld leer lassen. Falls Du spaeter dev aktivierst:
|
||||||
|
# 1) https://huggingface.co/black-forest-labs/FLUX.1-dev → "Agree"
|
||||||
|
# 2) https://huggingface.co/settings/tokens → "Read"-Token erzeugen
|
||||||
|
# 3) Hier eintragen + docker compose restart flux-bridge
|
||||||
HF_TOKEN=
|
HF_TOKEN=
|
||||||
|
|
||||||
# Modell:
|
# Welches Modell beim Startup geladen wird, steuert ARIA selbst:
|
||||||
# black-forest-labs/FLUX.1-dev (Default, ~24 GB, non-commercial)
|
# Diagnostic > FLUX > Default-Modell. Solange dort nichts gewaehlt ist,
|
||||||
# black-forest-labs/FLUX.1-schnell (4 Steps, Apache-2.0, schneller)
|
# laedt die flux-bridge erstmal nichts — sie wartet auf den ersten
|
||||||
FLUX_MODEL=black-forest-labs/FLUX.1-dev
|
# Render-Request oder einen Diagnostic-config-Broadcast.
|
||||||
|
|
||||||
# Offloading-Strategie (VRAM-Steuerung):
|
# Offloading-Strategie (VRAM-Steuerung):
|
||||||
# model — Default. Komponentenweise CPU-Offload, gut fuer 12 GB Karten.
|
# model — Default. Komponentenweise CPU-Offload, gut fuer 12 GB Karten.
|
||||||
|
|||||||
+189
-37
@@ -51,7 +51,13 @@ RVS_TLS = os.getenv("RVS_TLS", "true").lower() == "true"
|
|||||||
RVS_TLS_FALLBACK = os.getenv("RVS_TLS_FALLBACK", "true").lower() == "true"
|
RVS_TLS_FALLBACK = os.getenv("RVS_TLS_FALLBACK", "true").lower() == "true"
|
||||||
RVS_TOKEN = os.getenv("RVS_TOKEN", "").strip()
|
RVS_TOKEN = os.getenv("RVS_TOKEN", "").strip()
|
||||||
|
|
||||||
FLUX_MODEL = os.getenv("FLUX_MODEL", "black-forest-labs/FLUX.1-dev").strip()
|
# Bootstrap-Fallback: nur relevant wenn beim allerersten Start KEIN
|
||||||
|
# Diagnostic-config-Broadcast eintrifft UND der erste Render-Request
|
||||||
|
# auch kein 'model' enthaelt. Default 'schnell', weil Apache-2.0
|
||||||
|
# (kein HF-Token noetig) — Stefan stellt sein gewuenschtes Default ueber
|
||||||
|
# Diagnostic ein. ENV ist also nur fuer den extremen Edge-Case da, in
|
||||||
|
# der .env.example absichtlich nicht mehr dokumentiert.
|
||||||
|
FLUX_MODEL = os.getenv("FLUX_MODEL", "black-forest-labs/FLUX.1-schnell").strip()
|
||||||
FLUX_DEVICE = os.getenv("FLUX_DEVICE", "cuda").strip()
|
FLUX_DEVICE = os.getenv("FLUX_DEVICE", "cuda").strip()
|
||||||
FLUX_DTYPE = os.getenv("FLUX_DTYPE", "bfloat16").strip().lower()
|
FLUX_DTYPE = os.getenv("FLUX_DTYPE", "bfloat16").strip().lower()
|
||||||
FLUX_OFFLOAD = os.getenv("FLUX_OFFLOAD", "model").strip().lower()
|
FLUX_OFFLOAD = os.getenv("FLUX_OFFLOAD", "model").strip().lower()
|
||||||
@@ -64,11 +70,54 @@ DEFAULT_STEPS_SCHNELL = 4
|
|||||||
DEFAULT_GUIDANCE_DEV = 3.5
|
DEFAULT_GUIDANCE_DEV = 3.5
|
||||||
DEFAULT_GUIDANCE_SCHNELL = 0.0
|
DEFAULT_GUIDANCE_SCHNELL = 0.0
|
||||||
|
|
||||||
|
# Mapping fuer das User-facing Tag → HF-Modell-ID. Stefan stellt in Diagnostic
|
||||||
|
# nur 'dev' / 'schnell' ein; FLUX_MODEL aus der env kann zwar eine custom-ID
|
||||||
|
# sein (Bootstrap), wird aber beim ersten config-Broadcast normalerweise
|
||||||
|
# durch die Diagnostic-Wahl uebersteuert.
|
||||||
|
MODEL_TAGS: dict[str, str] = {
|
||||||
|
"dev": "black-forest-labs/FLUX.1-dev",
|
||||||
|
"schnell": "black-forest-labs/FLUX.1-schnell",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _tag_to_model_id(tag: str) -> str:
|
||||||
|
"""Mappt 'dev'/'schnell' auf HF-ID. Andere Strings werden 1:1 durchgereicht
|
||||||
|
(custom-IDs aus FLUX_MODEL env). Leere/ungueltige Werte → FLUX_MODEL Default."""
|
||||||
|
if not tag:
|
||||||
|
return FLUX_MODEL
|
||||||
|
t = tag.strip()
|
||||||
|
return MODEL_TAGS.get(t, t)
|
||||||
|
|
||||||
|
|
||||||
def _is_schnell(model_id: str) -> bool:
|
def _is_schnell(model_id: str) -> bool:
|
||||||
return "schnell" in model_id.lower()
|
return "schnell" in model_id.lower()
|
||||||
|
|
||||||
|
|
||||||
|
def _is_model_cached(model_id: str) -> bool:
|
||||||
|
"""Prueft ob ein HF-Modell-Snapshot lokal im hf-cache vorhanden ist.
|
||||||
|
|
||||||
|
HF speichert unter ~/.cache/huggingface/hub/models--{org}--{name}/snapshots/{rev}/.
|
||||||
|
Wenn das snapshots-Verzeichnis nicht existiert oder leer ist → Erst-Download
|
||||||
|
steht an (24+ GB fuer FLUX.1-dev, 24+ GB fuer FLUX.1-schnell — Stefan kriegt
|
||||||
|
dann nen Hinweis im Banner).
|
||||||
|
"""
|
||||||
|
if not model_id:
|
||||||
|
return False
|
||||||
|
cache_root = os.environ.get("HF_HOME") or os.path.expanduser("~/.cache/huggingface")
|
||||||
|
safe = "models--" + model_id.replace("/", "--")
|
||||||
|
snapshots = os.path.join(cache_root, "hub", safe, "snapshots")
|
||||||
|
if not os.path.isdir(snapshots):
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
for rev in os.listdir(snapshots):
|
||||||
|
rev_dir = os.path.join(snapshots, rev)
|
||||||
|
if os.path.isdir(rev_dir) and any(os.scandir(rev_dir)):
|
||||||
|
return True
|
||||||
|
except OSError:
|
||||||
|
return False
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def _torch_dtype():
|
def _torch_dtype():
|
||||||
"""Lazy-resolve damit Torch erst beim Modell-Laden importiert wird."""
|
"""Lazy-resolve damit Torch erst beim Modell-Laden importiert wird."""
|
||||||
import torch
|
import torch
|
||||||
@@ -89,27 +138,58 @@ def _snap_dim(v: int, default: int = 1024) -> int:
|
|||||||
|
|
||||||
|
|
||||||
class FluxRunner:
|
class FluxRunner:
|
||||||
"""Haelt die FLUX-Pipeline. Synthese laeuft im Executor (blocking).
|
"""Haelt EINE FLUX-Pipeline. Bei Modell-Wechsel wird die alte verworfen
|
||||||
|
und die neue geladen (~15-30 s aus HF-Cache, keine Re-Downloads).
|
||||||
|
|
||||||
GPU ist die Engstelle — wir serialisieren via Queue im Caller, hier
|
Pro Request kann ein 'dev'/'schnell'-Tag mitkommen; ohne Angabe wird
|
||||||
nur Single-Lock fuer load. Ein Render auf der 3060 dauert je nach
|
`default_model_id` genommen (steht Bootstrap auf FLUX_MODEL, wird beim
|
||||||
Steps/Aufloesung 20-90 s.
|
ersten config-Broadcast von der aria-bridge auf die Diagnostic-Wahl
|
||||||
|
aktualisiert).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
self.pipe = None
|
self.pipe = None
|
||||||
self._lock = asyncio.Lock()
|
self._lock = asyncio.Lock()
|
||||||
self.model_id: str = FLUX_MODEL
|
# Aktuell geladenes Modell — leer solange noch nix geladen wurde.
|
||||||
|
self.model_id: str = ""
|
||||||
|
# Was bei einem Request OHNE explizite model-Angabe benutzt wird.
|
||||||
|
# Wird durch Diagnostic-config gesetzt; FLUX_MODEL bleibt nur als
|
||||||
|
# Edge-Case-Fallback wenn weder Config noch Request einen Wert nennen.
|
||||||
|
self.default_model_id: str = FLUX_MODEL
|
||||||
self.last_load_seconds: float = 0.0
|
self.last_load_seconds: float = 0.0
|
||||||
|
# True wenn der letzte _load_blocking einen Fresh-Download triggern
|
||||||
|
# musste (Modell war nicht im HF-Cache). Wird vom Caller geprueft
|
||||||
|
# und in den 'ready'-service_status als freshlyDownloaded gesetzt.
|
||||||
|
self.last_load_was_download: bool = False
|
||||||
|
|
||||||
def _load_blocking(self) -> None:
|
def _load_blocking(self, model_id: str) -> None:
|
||||||
import torch
|
import torch
|
||||||
from diffusers import FluxPipeline
|
from diffusers import FluxPipeline
|
||||||
|
|
||||||
logger.info("Lade FLUX '%s' (dtype=%s, offload=%s)...",
|
# Alte Pipeline freigeben damit der HF-Loader VRAM/RAM kriegt
|
||||||
self.model_id, FLUX_DTYPE, FLUX_OFFLOAD)
|
if self.pipe is not None:
|
||||||
|
logger.info("Verwerfe alte Pipeline '%s'", self.model_id)
|
||||||
|
try:
|
||||||
|
del self.pipe
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
self.pipe = None
|
||||||
|
try:
|
||||||
|
torch.cuda.empty_cache()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
import gc
|
||||||
|
gc.collect()
|
||||||
|
|
||||||
|
was_cached = _is_model_cached(model_id)
|
||||||
|
self.last_load_was_download = not was_cached
|
||||||
|
if not was_cached:
|
||||||
|
logger.warning("FLUX '%s' nicht im HF-Cache — Erst-Download steht bevor (kann 5-10 min dauern).",
|
||||||
|
model_id)
|
||||||
|
logger.info("Lade FLUX '%s' (dtype=%s, offload=%s, cached=%s)...",
|
||||||
|
model_id, FLUX_DTYPE, FLUX_OFFLOAD, was_cached)
|
||||||
t0 = time.time()
|
t0 = time.time()
|
||||||
pipe = FluxPipeline.from_pretrained(self.model_id, torch_dtype=_torch_dtype())
|
pipe = FluxPipeline.from_pretrained(model_id, torch_dtype=_torch_dtype())
|
||||||
|
|
||||||
if FLUX_OFFLOAD == "sequential":
|
if FLUX_OFFLOAD == "sequential":
|
||||||
pipe.enable_sequential_cpu_offload()
|
pipe.enable_sequential_cpu_offload()
|
||||||
@@ -125,20 +205,25 @@ class FluxRunner:
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
self.pipe = pipe
|
self.pipe = pipe
|
||||||
|
self.model_id = model_id
|
||||||
self.last_load_seconds = time.time() - t0
|
self.last_load_seconds = time.time() - t0
|
||||||
logger.info("FLUX geladen in %.1fs", self.last_load_seconds)
|
logger.info("FLUX '%s' geladen in %.1fs", model_id, self.last_load_seconds)
|
||||||
# CUDA-Cache nach dem Load aufraeumen
|
|
||||||
try:
|
try:
|
||||||
torch.cuda.empty_cache()
|
torch.cuda.empty_cache()
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
async def ensure_loaded(self) -> None:
|
async def ensure_loaded(self, model_id: Optional[str] = None) -> bool:
|
||||||
|
"""Stellt sicher dass die richtige Pipeline geladen ist. Wenn ein
|
||||||
|
anderes Modell gewuenscht ist als gerade aktiv, wird geswappt.
|
||||||
|
Returns True wenn ein Swap/Load stattgefunden hat."""
|
||||||
|
target = model_id or self.default_model_id or FLUX_MODEL
|
||||||
async with self._lock:
|
async with self._lock:
|
||||||
if self.pipe is not None:
|
if self.pipe is not None and self.model_id == target:
|
||||||
return
|
return False
|
||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
await loop.run_in_executor(None, self._load_blocking)
|
await loop.run_in_executor(None, self._load_blocking, target)
|
||||||
|
return True
|
||||||
|
|
||||||
def _generate_blocking(self, prompt: str, width: int, height: int,
|
def _generate_blocking(self, prompt: str, width: int, height: int,
|
||||||
steps: int, guidance: float, seed: Optional[int]) -> bytes:
|
steps: int, guidance: float, seed: Optional[int]) -> bytes:
|
||||||
@@ -147,8 +232,8 @@ class FluxRunner:
|
|||||||
if seed is not None and seed >= 0:
|
if seed is not None and seed >= 0:
|
||||||
gen = torch.Generator(device=FLUX_DEVICE).manual_seed(int(seed))
|
gen = torch.Generator(device=FLUX_DEVICE).manual_seed(int(seed))
|
||||||
|
|
||||||
logger.info("Render: %dx%d, steps=%d, guidance=%.2f, seed=%s, prompt=%r",
|
logger.info("Render (%s): %dx%d, steps=%d, guidance=%.2f, seed=%s, prompt=%r",
|
||||||
width, height, steps, guidance, seed, prompt[:80])
|
self.model_id, width, height, steps, guidance, seed, prompt[:80])
|
||||||
out = self.pipe(
|
out = self.pipe(
|
||||||
prompt=prompt,
|
prompt=prompt,
|
||||||
width=width,
|
width=width,
|
||||||
@@ -169,8 +254,9 @@ class FluxRunner:
|
|||||||
return png_bytes
|
return png_bytes
|
||||||
|
|
||||||
async def generate(self, prompt: str, width: int, height: int,
|
async def generate(self, prompt: str, width: int, height: int,
|
||||||
steps: int, guidance: float, seed: Optional[int]) -> bytes:
|
steps: int, guidance: float, seed: Optional[int],
|
||||||
await self.ensure_loaded()
|
model_id: Optional[str] = None) -> bytes:
|
||||||
|
await self.ensure_loaded(model_id)
|
||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
return await loop.run_in_executor(
|
return await loop.run_in_executor(
|
||||||
None, self._generate_blocking, prompt, width, height, steps, guidance, seed,
|
None, self._generate_blocking, prompt, width, height, steps, guidance, seed,
|
||||||
@@ -205,8 +291,10 @@ async def _broadcast_status(ws, state: str, **extra) -> None:
|
|||||||
_flux_queue: "asyncio.Queue[tuple]" = asyncio.Queue()
|
_flux_queue: "asyncio.Queue[tuple]" = asyncio.Queue()
|
||||||
|
|
||||||
|
|
||||||
def _resolve_request(payload: dict, runner: FluxRunner) -> tuple[str, int, int, int, float, Optional[int]]:
|
def _resolve_request(payload: dict, runner: FluxRunner) -> tuple[str, int, int, int, float, Optional[int], str]:
|
||||||
"""Liest Felder aus dem flux_request payload + clampt auf Caps."""
|
"""Liest Felder aus dem flux_request payload + clampt auf Caps.
|
||||||
|
Returns (prompt, width, height, steps, guidance, seed, resolved_model_id).
|
||||||
|
"""
|
||||||
prompt = (payload.get("prompt") or "").strip()
|
prompt = (payload.get("prompt") or "").strip()
|
||||||
if not prompt:
|
if not prompt:
|
||||||
raise ValueError("prompt fehlt")
|
raise ValueError("prompt fehlt")
|
||||||
@@ -216,7 +304,11 @@ def _resolve_request(payload: dict, runner: FluxRunner) -> tuple[str, int, int,
|
|||||||
width = _snap_dim(payload.get("width", 1024))
|
width = _snap_dim(payload.get("width", 1024))
|
||||||
height = _snap_dim(payload.get("height", 1024))
|
height = _snap_dim(payload.get("height", 1024))
|
||||||
|
|
||||||
schnell = _is_schnell(runner.model_id)
|
# Modell-Wahl: explizit per Request > runner.default_model_id > FLUX_MODEL.
|
||||||
|
req_model = (payload.get("model") or "").strip()
|
||||||
|
resolved_model_id = _tag_to_model_id(req_model) if req_model else (runner.default_model_id or FLUX_MODEL)
|
||||||
|
|
||||||
|
schnell = _is_schnell(resolved_model_id)
|
||||||
default_steps = DEFAULT_STEPS_SCHNELL if schnell else DEFAULT_STEPS_DEV
|
default_steps = DEFAULT_STEPS_SCHNELL if schnell else DEFAULT_STEPS_DEV
|
||||||
default_guidance = DEFAULT_GUIDANCE_SCHNELL if schnell else DEFAULT_GUIDANCE_DEV
|
default_guidance = DEFAULT_GUIDANCE_SCHNELL if schnell else DEFAULT_GUIDANCE_DEV
|
||||||
|
|
||||||
@@ -240,7 +332,7 @@ def _resolve_request(payload: dict, runner: FluxRunner) -> tuple[str, int, int,
|
|||||||
except (TypeError, ValueError):
|
except (TypeError, ValueError):
|
||||||
seed = None
|
seed = None
|
||||||
|
|
||||||
return prompt, width, height, steps, guidance, seed
|
return prompt, width, height, steps, guidance, seed, resolved_model_id
|
||||||
|
|
||||||
|
|
||||||
async def _flux_worker(ws, runner: FluxRunner) -> None:
|
async def _flux_worker(ws, runner: FluxRunner) -> None:
|
||||||
@@ -263,29 +355,53 @@ async def _flux_worker(ws, runner: FluxRunner) -> None:
|
|||||||
async def _do_render(ws, runner: FluxRunner, payload: dict, request_id: str) -> None:
|
async def _do_render(ws, runner: FluxRunner, payload: dict, request_id: str) -> None:
|
||||||
t0 = time.time()
|
t0 = time.time()
|
||||||
try:
|
try:
|
||||||
prompt, width, height, steps, guidance, seed = _resolve_request(payload, runner)
|
prompt, width, height, steps, guidance, seed, target_model_id = _resolve_request(payload, runner)
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
logger.warning("flux_request invalid: %s", e)
|
logger.warning("flux_request invalid: %s", e)
|
||||||
await _send(ws, "flux_response", {"requestId": request_id, "error": str(e)})
|
await _send(ws, "flux_response", {"requestId": request_id, "error": str(e)})
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Modell-Swap noetig? Status broadcasten damit Diagnostic-Banner es zeigt.
|
||||||
|
swap_needed = (runner.pipe is None or runner.model_id != target_model_id)
|
||||||
|
will_download = swap_needed and not _is_model_cached(target_model_id)
|
||||||
|
if swap_needed:
|
||||||
|
await _broadcast_status(ws, "loading", model=target_model_id,
|
||||||
|
downloading=will_download)
|
||||||
|
await _send(ws, "flux_response", {
|
||||||
|
"requestId": request_id,
|
||||||
|
"state": "switching_model",
|
||||||
|
"model": target_model_id,
|
||||||
|
"downloading": will_download,
|
||||||
|
})
|
||||||
|
|
||||||
# Progress-Ping: User soll sehen dass was passiert (Render >30s realistisch)
|
# Progress-Ping: User soll sehen dass was passiert (Render >30s realistisch)
|
||||||
await _send(ws, "flux_response", {
|
await _send(ws, "flux_response", {
|
||||||
"requestId": request_id,
|
"requestId": request_id,
|
||||||
"state": "rendering",
|
"state": "rendering",
|
||||||
"width": width, "height": height, "steps": steps,
|
"width": width, "height": height, "steps": steps,
|
||||||
|
"model": target_model_id,
|
||||||
})
|
})
|
||||||
|
|
||||||
try:
|
try:
|
||||||
png = await runner.generate(prompt, width, height, steps, guidance, seed)
|
png = await runner.generate(prompt, width, height, steps, guidance, seed,
|
||||||
|
model_id=target_model_id)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.exception("FLUX Render-Fehler")
|
logger.exception("FLUX Render-Fehler")
|
||||||
await _send(ws, "flux_response", {"requestId": request_id, "error": str(e)[:200]})
|
await _send(ws, "flux_response", {"requestId": request_id, "error": str(e)[:200]})
|
||||||
|
if swap_needed:
|
||||||
|
await _broadcast_status(ws, "error", error=str(e)[:200])
|
||||||
return
|
return
|
||||||
|
|
||||||
|
if swap_needed:
|
||||||
|
await _broadcast_status(ws, "ready",
|
||||||
|
model=runner.model_id,
|
||||||
|
loadSeconds=runner.last_load_seconds,
|
||||||
|
freshlyDownloaded=runner.last_load_was_download)
|
||||||
|
|
||||||
dt = time.time() - t0
|
dt = time.time() - t0
|
||||||
b64 = base64.b64encode(png).decode("ascii")
|
b64 = base64.b64encode(png).decode("ascii")
|
||||||
logger.info("Render fertig: %dx%d, %d KB PNG, %.1fs", width, height, len(png) // 1024, dt)
|
logger.info("Render fertig: %dx%d, %d KB PNG, %.1fs (%s)",
|
||||||
|
width, height, len(png) // 1024, dt, runner.model_id)
|
||||||
|
|
||||||
await _send(ws, "flux_response", {
|
await _send(ws, "flux_response", {
|
||||||
"requestId": request_id,
|
"requestId": request_id,
|
||||||
@@ -327,22 +443,24 @@ async def run_loop(runner: FluxRunner) -> None:
|
|||||||
tls_fallback_tried = False
|
tls_fallback_tried = False
|
||||||
|
|
||||||
async def _load_with_status():
|
async def _load_with_status():
|
||||||
|
"""Bei Connect KEIN Eager-Load — wir fragen erst die
|
||||||
|
Diagnostic-Config ab. Welches Modell tatsaechlich geladen
|
||||||
|
wird entscheidet sich entweder durch den config-Broadcast
|
||||||
|
(kommt direkt danach) oder durch den ersten flux_request.
|
||||||
|
Bis dahin gibt's keinen service_status, das Banner taucht
|
||||||
|
erst auf wenn wir wirklich was laden."""
|
||||||
try:
|
try:
|
||||||
if runner.pipe is not None:
|
if runner.pipe is not None:
|
||||||
logger.info("Initial: broadcaste ready (Pipeline schon im RAM: %s)",
|
# Pipeline ueberlebt nur Container-Lifetime; hier
|
||||||
runner.model_id)
|
# also nur falls schon ein Modell aktiv ist (Reconnect).
|
||||||
await _broadcast_status(ws, "ready",
|
|
||||||
model=runner.model_id,
|
|
||||||
loadSeconds=runner.last_load_seconds)
|
|
||||||
else:
|
|
||||||
logger.info("Initial: broadcaste loading + lade '%s'", runner.model_id)
|
|
||||||
await _broadcast_status(ws, "loading", model=runner.model_id)
|
|
||||||
await runner.ensure_loaded()
|
|
||||||
await _broadcast_status(ws, "ready",
|
await _broadcast_status(ws, "ready",
|
||||||
model=runner.model_id,
|
model=runner.model_id,
|
||||||
loadSeconds=runner.last_load_seconds)
|
loadSeconds=runner.last_load_seconds)
|
||||||
|
logger.info("Initial: sende config_request an aria-bridge "
|
||||||
|
"(kein Eager-Load, warte auf Diagnostic-Wahl)")
|
||||||
|
await _send(ws, "config_request", {"service": "flux"})
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.exception("Initial-Load crashed: %s", e)
|
logger.exception("Initial-Setup crashed: %s", e)
|
||||||
try:
|
try:
|
||||||
await _broadcast_status(ws, "error", error=str(e)[:200])
|
await _broadcast_status(ws, "error", error=str(e)[:200])
|
||||||
except Exception:
|
except Exception:
|
||||||
@@ -351,6 +469,33 @@ async def run_loop(runner: FluxRunner) -> None:
|
|||||||
|
|
||||||
worker = asyncio.create_task(_flux_worker(ws, runner))
|
worker = asyncio.create_task(_flux_worker(ws, runner))
|
||||||
|
|
||||||
|
async def _apply_default_change(new_tag: str):
|
||||||
|
"""Wechselt den Default. Wenn ein anderes Modell als aktuell
|
||||||
|
aktiv gewuenscht ist, wird eager geladen — der naechste
|
||||||
|
Render ist dann ohne Swap-Delay."""
|
||||||
|
new_model_id = _tag_to_model_id(new_tag)
|
||||||
|
runner.default_model_id = new_model_id
|
||||||
|
if runner.model_id == new_model_id:
|
||||||
|
logger.info("[config] Default-Modell bleibt: %s", new_model_id)
|
||||||
|
return
|
||||||
|
will_download = not _is_model_cached(new_model_id)
|
||||||
|
logger.info("[config] Default-Modell wechselt: %s → %s (download=%s)",
|
||||||
|
runner.model_id or "(none)", new_model_id, will_download)
|
||||||
|
try:
|
||||||
|
await _broadcast_status(ws, "loading", model=new_model_id,
|
||||||
|
downloading=will_download)
|
||||||
|
await runner.ensure_loaded(new_model_id)
|
||||||
|
await _broadcast_status(ws, "ready",
|
||||||
|
model=runner.model_id,
|
||||||
|
loadSeconds=runner.last_load_seconds,
|
||||||
|
freshlyDownloaded=runner.last_load_was_download)
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception("Modell-Swap fehlgeschlagen")
|
||||||
|
try:
|
||||||
|
await _broadcast_status(ws, "error", error=str(e)[:200])
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
try:
|
try:
|
||||||
async for raw in ws:
|
async for raw in ws:
|
||||||
try:
|
try:
|
||||||
@@ -362,6 +507,13 @@ async def run_loop(runner: FluxRunner) -> None:
|
|||||||
|
|
||||||
if mtype == "flux_request":
|
if mtype == "flux_request":
|
||||||
await _flux_queue.put(payload)
|
await _flux_queue.put(payload)
|
||||||
|
elif mtype == "config":
|
||||||
|
# Diagnostic-Broadcast (oder aria-bridge nach Reconnect).
|
||||||
|
# Wir interessieren uns nur fuer fluxDefaultModel — die
|
||||||
|
# Keywords nutzt das Brain, nicht wir.
|
||||||
|
tag = (payload.get("fluxDefaultModel") or "").strip()
|
||||||
|
if tag:
|
||||||
|
asyncio.create_task(_apply_default_change(tag))
|
||||||
finally:
|
finally:
|
||||||
worker.cancel()
|
worker.cancel()
|
||||||
try:
|
try:
|
||||||
|
|||||||
Reference in New Issue
Block a user