feat(flux): Bildgenerierung via FLUX.1-dev — flux-bridge auf Gamebox
Eigener Compose-Stack im /flux Verzeichnis (kann auf separater Maschine laufen). aria-bridge routet flux_request via RVS, ARIA referenziert das fertige PNG im Reply mit [FILE: ...]-Marker. Brain-Tool flux_generate mit Caps fuer steps/dimension. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,36 @@
|
||||
# ════════════════════════════════════════════════
|
||||
# ARIA FLUX-Bridge — Konfiguration
|
||||
# Kopieren nach .env und anpassen
|
||||
# ════════════════════════════════════════════════
|
||||
|
||||
# RVS Verbindung (gleiche Daten wie auf der ARIA-VM / xtts/.env)
|
||||
RVS_HOST=mobil.hacker-net.de
|
||||
RVS_PORT=444
|
||||
RVS_TLS=true
|
||||
RVS_TLS_FALLBACK=true
|
||||
RVS_TOKEN=dein_token_hier
|
||||
|
||||
# HuggingFace-Token — FLUX.1-dev ist gated (auf
|
||||
# https://huggingface.co/black-forest-labs/FLUX.1-dev "Agree" klicken,
|
||||
# dann unter https://huggingface.co/settings/tokens ein "Read"-Token
|
||||
# erzeugen). Fuer FLUX.1-schnell nicht noetig.
|
||||
HF_TOKEN=
|
||||
|
||||
# Modell:
|
||||
# black-forest-labs/FLUX.1-dev (Default, ~24 GB, non-commercial)
|
||||
# black-forest-labs/FLUX.1-schnell (4 Steps, Apache-2.0, schneller)
|
||||
FLUX_MODEL=black-forest-labs/FLUX.1-dev
|
||||
|
||||
# Offloading-Strategie (VRAM-Steuerung):
|
||||
# model — Default. Komponentenweise CPU-Offload, gut fuer 12 GB Karten.
|
||||
# sequential — sparsamer (Peak ~6 GB), aber 2-3x langsamer.
|
||||
# none — alles auf GPU. Nur fuer >= 24 GB VRAM-Karten.
|
||||
FLUX_OFFLOAD=model
|
||||
|
||||
# Float-Type. bfloat16 ist FLUX-native; auf alten Karten ohne BF16-Support
|
||||
# auf float16 wechseln.
|
||||
FLUX_DTYPE=bfloat16
|
||||
|
||||
# Hard-Caps gegen versehentlich teure Renders
|
||||
FLUX_MAX_STEPS=50
|
||||
FLUX_MAX_DIM=1536
|
||||
@@ -0,0 +1,5 @@
|
||||
# HuggingFace Model-Cache (FLUX.1-dev ~24 GB on disk)
|
||||
hf-cache/
|
||||
|
||||
# Docker .env
|
||||
.env
|
||||
@@ -0,0 +1,23 @@
|
||||
FROM nvidia/cuda:12.2.2-cudnn8-runtime-ubuntu22.04
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
python3 python3-pip git \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# PyTorch CUDA-Wheels zuerst, damit diffusers nicht CPU-Torch zieht.
|
||||
# Versionsmatrix wie bei f5tts gehalten (cu121, Torch 2.3.1) — gleicher
|
||||
# Treiber-Footprint, gleicher HF-Cache-Pfad.
|
||||
RUN pip3 install --no-cache-dir torch==2.3.1 \
|
||||
--index-url https://download.pytorch.org/whl/cu121
|
||||
|
||||
COPY requirements.txt .
|
||||
RUN pip3 install --no-cache-dir -r requirements.txt
|
||||
|
||||
COPY bridge.py .
|
||||
|
||||
CMD ["python3", "bridge.py"]
|
||||
+394
@@ -0,0 +1,394 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
ARIA FLUX-Bridge — laeuft auf der Gamebox (RTX 3060).
|
||||
|
||||
Empfaengt flux_request via RVS → FLUX.1-dev/-schnell auf GPU → sendet
|
||||
flux_response mit base64-PNG zurueck an die aria-bridge. Diese speichert
|
||||
die Datei nach /shared/uploads/ und ARIA referenziert sie mit
|
||||
[FILE: ...]-Marker in ihrer Antwort.
|
||||
|
||||
12 GB VRAM auf der 3060 reichen fuer FLUX.1-dev nur mit
|
||||
`enable_model_cpu_offload()` — sonst OOM. Setze FLUX_OFFLOAD=sequential
|
||||
fuer Maximal-Sparsamkeit (langsamer) oder FLUX_OFFLOAD=none wenn die
|
||||
GPU genug VRAM hat (z.B. spaeter 4090).
|
||||
|
||||
Env:
|
||||
RVS_HOST, RVS_PORT, RVS_TLS, RVS_TLS_FALLBACK, RVS_TOKEN
|
||||
FLUX_MODEL Default: black-forest-labs/FLUX.1-dev
|
||||
Alt: black-forest-labs/FLUX.1-schnell (4-Step, Apache-2.0)
|
||||
FLUX_DEVICE Default: cuda
|
||||
FLUX_DTYPE Default: bfloat16 (alt: float16)
|
||||
FLUX_OFFLOAD Default: model (alt: sequential | none)
|
||||
FLUX_MAX_STEPS Default: 50
|
||||
FLUX_MAX_DIM Default: 1536
|
||||
"""
|
||||
import asyncio
|
||||
import base64
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import uuid
|
||||
from typing import Optional
|
||||
|
||||
import websockets
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(message)s",
|
||||
datefmt="%H:%M:%S",
|
||||
)
|
||||
logger = logging.getLogger("flux-bridge")
|
||||
# HuggingFace/Torch download-Logs daempfen
|
||||
logging.getLogger("httpx").setLevel(logging.WARNING)
|
||||
logging.getLogger("urllib3").setLevel(logging.WARNING)
|
||||
|
||||
RVS_HOST = os.getenv("RVS_HOST", "").strip()
|
||||
RVS_PORT = int(os.getenv("RVS_PORT", "443"))
|
||||
RVS_TLS = os.getenv("RVS_TLS", "true").lower() == "true"
|
||||
RVS_TLS_FALLBACK = os.getenv("RVS_TLS_FALLBACK", "true").lower() == "true"
|
||||
RVS_TOKEN = os.getenv("RVS_TOKEN", "").strip()
|
||||
|
||||
FLUX_MODEL = os.getenv("FLUX_MODEL", "black-forest-labs/FLUX.1-dev").strip()
|
||||
FLUX_DEVICE = os.getenv("FLUX_DEVICE", "cuda").strip()
|
||||
FLUX_DTYPE = os.getenv("FLUX_DTYPE", "bfloat16").strip().lower()
|
||||
FLUX_OFFLOAD = os.getenv("FLUX_OFFLOAD", "model").strip().lower()
|
||||
FLUX_MAX_STEPS = int(os.getenv("FLUX_MAX_STEPS", "50"))
|
||||
FLUX_MAX_DIM = int(os.getenv("FLUX_MAX_DIM", "1536"))
|
||||
|
||||
# FLUX-dev native: guidance=3.5, steps=28. FLUX-schnell: guidance=0.0, steps=4.
|
||||
DEFAULT_STEPS_DEV = 28
|
||||
DEFAULT_STEPS_SCHNELL = 4
|
||||
DEFAULT_GUIDANCE_DEV = 3.5
|
||||
DEFAULT_GUIDANCE_SCHNELL = 0.0
|
||||
|
||||
|
||||
def _is_schnell(model_id: str) -> bool:
|
||||
return "schnell" in model_id.lower()
|
||||
|
||||
|
||||
def _torch_dtype():
|
||||
"""Lazy-resolve damit Torch erst beim Modell-Laden importiert wird."""
|
||||
import torch
|
||||
return {"bfloat16": torch.bfloat16, "float16": torch.float16, "float32": torch.float32}\
|
||||
.get(FLUX_DTYPE, torch.bfloat16)
|
||||
|
||||
|
||||
def _snap_dim(v: int, default: int = 1024) -> int:
|
||||
"""FLUX braucht Multiples von 16 (sicher: 64). Clamp + Snap."""
|
||||
try:
|
||||
n = int(v)
|
||||
except (TypeError, ValueError):
|
||||
n = default
|
||||
n = max(256, min(FLUX_MAX_DIM, n))
|
||||
# Auf naechstes Vielfaches von 64 abrunden
|
||||
n = (n // 64) * 64
|
||||
return max(256, n)
|
||||
|
||||
|
||||
class FluxRunner:
|
||||
"""Haelt die FLUX-Pipeline. Synthese laeuft im Executor (blocking).
|
||||
|
||||
GPU ist die Engstelle — wir serialisieren via Queue im Caller, hier
|
||||
nur Single-Lock fuer load. Ein Render auf der 3060 dauert je nach
|
||||
Steps/Aufloesung 20-90 s.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.pipe = None
|
||||
self._lock = asyncio.Lock()
|
||||
self.model_id: str = FLUX_MODEL
|
||||
self.last_load_seconds: float = 0.0
|
||||
|
||||
def _load_blocking(self) -> None:
|
||||
import torch
|
||||
from diffusers import FluxPipeline
|
||||
|
||||
logger.info("Lade FLUX '%s' (dtype=%s, offload=%s)...",
|
||||
self.model_id, FLUX_DTYPE, FLUX_OFFLOAD)
|
||||
t0 = time.time()
|
||||
pipe = FluxPipeline.from_pretrained(self.model_id, torch_dtype=_torch_dtype())
|
||||
|
||||
if FLUX_OFFLOAD == "sequential":
|
||||
pipe.enable_sequential_cpu_offload()
|
||||
elif FLUX_OFFLOAD == "none":
|
||||
pipe.to(FLUX_DEVICE)
|
||||
else: # "model" — default, Sweet-Spot fuer 12 GB Karten
|
||||
pipe.enable_model_cpu_offload()
|
||||
|
||||
# VAE-Tiling spart VRAM bei grossen Bildern (>1024)
|
||||
try:
|
||||
pipe.vae.enable_tiling()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
self.pipe = pipe
|
||||
self.last_load_seconds = time.time() - t0
|
||||
logger.info("FLUX geladen in %.1fs", self.last_load_seconds)
|
||||
# CUDA-Cache nach dem Load aufraeumen
|
||||
try:
|
||||
torch.cuda.empty_cache()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
async def ensure_loaded(self) -> None:
|
||||
async with self._lock:
|
||||
if self.pipe is not None:
|
||||
return
|
||||
loop = asyncio.get_event_loop()
|
||||
await loop.run_in_executor(None, self._load_blocking)
|
||||
|
||||
def _generate_blocking(self, prompt: str, width: int, height: int,
|
||||
steps: int, guidance: float, seed: Optional[int]) -> bytes:
|
||||
import torch
|
||||
gen = None
|
||||
if seed is not None and seed >= 0:
|
||||
gen = torch.Generator(device=FLUX_DEVICE).manual_seed(int(seed))
|
||||
|
||||
logger.info("Render: %dx%d, steps=%d, guidance=%.2f, seed=%s, prompt=%r",
|
||||
width, height, steps, guidance, seed, prompt[:80])
|
||||
out = self.pipe(
|
||||
prompt=prompt,
|
||||
width=width,
|
||||
height=height,
|
||||
num_inference_steps=steps,
|
||||
guidance_scale=guidance,
|
||||
generator=gen,
|
||||
)
|
||||
image = out.images[0]
|
||||
buf = io.BytesIO()
|
||||
image.save(buf, format="PNG", optimize=True)
|
||||
png_bytes = buf.getvalue()
|
||||
# VRAM zurueckgeben fuer den naechsten Render
|
||||
try:
|
||||
torch.cuda.empty_cache()
|
||||
except Exception:
|
||||
pass
|
||||
return png_bytes
|
||||
|
||||
async def generate(self, prompt: str, width: int, height: int,
|
||||
steps: int, guidance: float, seed: Optional[int]) -> bytes:
|
||||
await self.ensure_loaded()
|
||||
loop = asyncio.get_event_loop()
|
||||
return await loop.run_in_executor(
|
||||
None, self._generate_blocking, prompt, width, height, steps, guidance, seed,
|
||||
)
|
||||
|
||||
|
||||
# ── Helpers ─────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def _send(ws, mtype: str, payload: dict) -> None:
|
||||
try:
|
||||
await ws.send(json.dumps({
|
||||
"type": mtype,
|
||||
"payload": payload,
|
||||
"timestamp": int(time.time() * 1000),
|
||||
}))
|
||||
except Exception as e:
|
||||
logger.warning("Send fehlgeschlagen (%s): %s", mtype, e)
|
||||
|
||||
|
||||
async def _broadcast_status(ws, state: str, **extra) -> None:
|
||||
"""Sendet service_status fuer das Flux-Modul.
|
||||
state: 'loading' | 'ready' | 'error'."""
|
||||
payload = {"service": "flux", "state": state}
|
||||
payload.update(extra)
|
||||
await _send(ws, "service_status", payload)
|
||||
|
||||
|
||||
# ── Flux-Request Queue ──────────────────────────────────────
|
||||
|
||||
# Eine GPU, ein Render gleichzeitig. Parallele Requests OOM-en sonst.
|
||||
_flux_queue: "asyncio.Queue[tuple]" = asyncio.Queue()
|
||||
|
||||
|
||||
def _resolve_request(payload: dict, runner: FluxRunner) -> tuple[str, int, int, int, float, Optional[int]]:
|
||||
"""Liest Felder aus dem flux_request payload + clampt auf Caps."""
|
||||
prompt = (payload.get("prompt") or "").strip()
|
||||
if not prompt:
|
||||
raise ValueError("prompt fehlt")
|
||||
if len(prompt) > 2000:
|
||||
prompt = prompt[:2000]
|
||||
|
||||
width = _snap_dim(payload.get("width", 1024))
|
||||
height = _snap_dim(payload.get("height", 1024))
|
||||
|
||||
schnell = _is_schnell(runner.model_id)
|
||||
default_steps = DEFAULT_STEPS_SCHNELL if schnell else DEFAULT_STEPS_DEV
|
||||
default_guidance = DEFAULT_GUIDANCE_SCHNELL if schnell else DEFAULT_GUIDANCE_DEV
|
||||
|
||||
try:
|
||||
steps = int(payload.get("steps", default_steps))
|
||||
except (TypeError, ValueError):
|
||||
steps = default_steps
|
||||
steps = max(1, min(FLUX_MAX_STEPS, steps))
|
||||
|
||||
try:
|
||||
guidance = float(payload.get("guidance_scale", default_guidance))
|
||||
except (TypeError, ValueError):
|
||||
guidance = default_guidance
|
||||
if not (0.0 <= guidance <= 20.0):
|
||||
guidance = default_guidance
|
||||
|
||||
seed = payload.get("seed")
|
||||
if seed is not None:
|
||||
try:
|
||||
seed = int(seed)
|
||||
except (TypeError, ValueError):
|
||||
seed = None
|
||||
|
||||
return prompt, width, height, steps, guidance, seed
|
||||
|
||||
|
||||
async def _flux_worker(ws, runner: FluxRunner) -> None:
|
||||
"""Serialisiert Renders — eine GPU, ein Bild gleichzeitig."""
|
||||
while True:
|
||||
payload = await _flux_queue.get()
|
||||
request_id = payload.get("requestId") or str(uuid.uuid4())
|
||||
try:
|
||||
await _do_render(ws, runner, payload, request_id)
|
||||
except Exception:
|
||||
logger.exception("Flux-Worker Fehler")
|
||||
await _send(ws, "flux_response", {
|
||||
"requestId": request_id,
|
||||
"error": "internal error",
|
||||
})
|
||||
finally:
|
||||
_flux_queue.task_done()
|
||||
|
||||
|
||||
async def _do_render(ws, runner: FluxRunner, payload: dict, request_id: str) -> None:
|
||||
t0 = time.time()
|
||||
try:
|
||||
prompt, width, height, steps, guidance, seed = _resolve_request(payload, runner)
|
||||
except ValueError as e:
|
||||
logger.warning("flux_request invalid: %s", e)
|
||||
await _send(ws, "flux_response", {"requestId": request_id, "error": str(e)})
|
||||
return
|
||||
|
||||
# Progress-Ping: User soll sehen dass was passiert (Render >30s realistisch)
|
||||
await _send(ws, "flux_response", {
|
||||
"requestId": request_id,
|
||||
"state": "rendering",
|
||||
"width": width, "height": height, "steps": steps,
|
||||
})
|
||||
|
||||
try:
|
||||
png = await runner.generate(prompt, width, height, steps, guidance, seed)
|
||||
except Exception as e:
|
||||
logger.exception("FLUX Render-Fehler")
|
||||
await _send(ws, "flux_response", {"requestId": request_id, "error": str(e)[:200]})
|
||||
return
|
||||
|
||||
dt = time.time() - t0
|
||||
b64 = base64.b64encode(png).decode("ascii")
|
||||
logger.info("Render fertig: %dx%d, %d KB PNG, %.1fs", width, height, len(png) // 1024, dt)
|
||||
|
||||
await _send(ws, "flux_response", {
|
||||
"requestId": request_id,
|
||||
"state": "done",
|
||||
"base64": b64,
|
||||
"mimeType": "image/png",
|
||||
"width": width,
|
||||
"height": height,
|
||||
"steps": steps,
|
||||
"guidance": guidance,
|
||||
"seed": seed,
|
||||
"model": runner.model_id,
|
||||
"renderSeconds": round(dt, 2),
|
||||
"sizeBytes": len(png),
|
||||
})
|
||||
|
||||
|
||||
# ── Haupt-Loop ──────────────────────────────────────────────
|
||||
|
||||
|
||||
async def run_loop(runner: FluxRunner) -> None:
|
||||
use_tls = RVS_TLS
|
||||
retry_s = 2
|
||||
tls_fallback_tried = False
|
||||
|
||||
while True:
|
||||
scheme = "wss" if use_tls else "ws"
|
||||
url = f"{scheme}://{RVS_HOST}:{RVS_PORT}/ws?token={RVS_TOKEN}"
|
||||
masked = url.replace(RVS_TOKEN, "***") if RVS_TOKEN else url
|
||||
|
||||
try:
|
||||
logger.info("Verbinde zu RVS: %s", masked)
|
||||
# max_size 100 MB damit ein 4 MP PNG (~5-10 MB → ~13 MB base64)
|
||||
# locker reinpasst. Mit dem RVS-Limit (100 MB) konsistent.
|
||||
async with websockets.connect(url, ping_interval=20, ping_timeout=10,
|
||||
max_size=100 * 1024 * 1024) as ws:
|
||||
logger.info("RVS verbunden")
|
||||
retry_s = 2
|
||||
tls_fallback_tried = False
|
||||
|
||||
async def _load_with_status():
|
||||
try:
|
||||
if runner.pipe is not None:
|
||||
logger.info("Initial: broadcaste ready (Pipeline schon im RAM: %s)",
|
||||
runner.model_id)
|
||||
await _broadcast_status(ws, "ready",
|
||||
model=runner.model_id,
|
||||
loadSeconds=runner.last_load_seconds)
|
||||
else:
|
||||
logger.info("Initial: broadcaste loading + lade '%s'", runner.model_id)
|
||||
await _broadcast_status(ws, "loading", model=runner.model_id)
|
||||
await runner.ensure_loaded()
|
||||
await _broadcast_status(ws, "ready",
|
||||
model=runner.model_id,
|
||||
loadSeconds=runner.last_load_seconds)
|
||||
except Exception as e:
|
||||
logger.exception("Initial-Load crashed: %s", e)
|
||||
try:
|
||||
await _broadcast_status(ws, "error", error=str(e)[:200])
|
||||
except Exception:
|
||||
pass
|
||||
asyncio.create_task(_load_with_status())
|
||||
|
||||
worker = asyncio.create_task(_flux_worker(ws, runner))
|
||||
|
||||
try:
|
||||
async for raw in ws:
|
||||
try:
|
||||
msg = json.loads(raw)
|
||||
except Exception:
|
||||
continue
|
||||
mtype = msg.get("type", "")
|
||||
payload = msg.get("payload", {}) or {}
|
||||
|
||||
if mtype == "flux_request":
|
||||
await _flux_queue.put(payload)
|
||||
finally:
|
||||
worker.cancel()
|
||||
try:
|
||||
await worker
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.warning("Verbindung verloren: %s", e)
|
||||
if use_tls and RVS_TLS_FALLBACK and not tls_fallback_tried:
|
||||
logger.info("TLS fehlgeschlagen — Fallback auf ws://")
|
||||
use_tls = False
|
||||
tls_fallback_tried = True
|
||||
continue
|
||||
await asyncio.sleep(min(retry_s, 30))
|
||||
retry_s = min(retry_s * 2, 30)
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
if not RVS_HOST:
|
||||
logger.error("RVS_HOST nicht gesetzt — Abbruch")
|
||||
sys.exit(1)
|
||||
runner = FluxRunner()
|
||||
await run_loop(runner)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
asyncio.run(main())
|
||||
except KeyboardInterrupt:
|
||||
sys.exit(0)
|
||||
@@ -0,0 +1,60 @@
|
||||
# ════════════════════════════════════════════════
|
||||
# ARIA FLUX-Bridge — Text-to-Image (GPU)
|
||||
# Eigener Stack, weil FLUX auch auf einer anderen
|
||||
# Maschine als f5tts/whisper laufen kann (z.B. 4090
|
||||
# separat vom Gaming-PC). Verbindet sich selbst per
|
||||
# WebSocket zum RVS und lauscht auf flux_request.
|
||||
# ════════════════════════════════════════════════
|
||||
#
|
||||
# Voraussetzungen:
|
||||
# - NVIDIA-GPU mit >= 12 GB VRAM (3060 reicht mit
|
||||
# enable_model_cpu_offload). Bei < 12 GB:
|
||||
# FLUX_OFFLOAD=sequential setzen, sonst OOM.
|
||||
# - Docker mit NVIDIA Container Toolkit
|
||||
# - HuggingFace-Token in .env (FLUX.1-dev ist gated)
|
||||
# - .env mit RVS-Verbindungsdaten (gleiche wie xtts!)
|
||||
#
|
||||
# Start: docker compose up -d
|
||||
# ════════════════════════════════════════════════
|
||||
|
||||
services:
|
||||
|
||||
# ─── FLUX.1-dev Bildgenerierung (GPU) ─────────
|
||||
# Empfaengt flux_request via RVS, rendert PNG mit FLUX.1-dev (12B Params)
|
||||
# und broadcastet flux_response mit base64-PNG zurueck. aria-bridge speichert
|
||||
# die Datei nach /shared/uploads/ und ARIA referenziert sie via [FILE:]-Marker.
|
||||
#
|
||||
# Modell-Wahl per FLUX_MODEL:
|
||||
# - black-forest-labs/FLUX.1-dev (Default, 28 Steps, non-commercial)
|
||||
# - black-forest-labs/FLUX.1-schnell (4 Steps, Apache-2.0, schneller)
|
||||
# HuggingFace-Token noetig fuer FLUX.1-dev — vorher `huggingface-cli login`
|
||||
# oder HF_TOKEN in .env setzen, sonst 403 beim ersten Download.
|
||||
flux-bridge:
|
||||
build: .
|
||||
container_name: aria-flux-bridge
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
environment:
|
||||
- RVS_HOST=${RVS_HOST}
|
||||
- RVS_PORT=${RVS_PORT:-443}
|
||||
- RVS_TLS=${RVS_TLS:-true}
|
||||
- RVS_TLS_FALLBACK=${RVS_TLS_FALLBACK:-true}
|
||||
- RVS_TOKEN=${RVS_TOKEN}
|
||||
- FLUX_MODEL=${FLUX_MODEL:-black-forest-labs/FLUX.1-dev}
|
||||
- FLUX_DEVICE=${FLUX_DEVICE:-cuda}
|
||||
- FLUX_DTYPE=${FLUX_DTYPE:-bfloat16}
|
||||
- FLUX_OFFLOAD=${FLUX_OFFLOAD:-model}
|
||||
- FLUX_MAX_STEPS=${FLUX_MAX_STEPS:-50}
|
||||
- FLUX_MAX_DIM=${FLUX_MAX_DIM:-1536}
|
||||
- HF_TOKEN=${HF_TOKEN:-} # FLUX.1-dev braucht Login-Token
|
||||
volumes:
|
||||
- ./hf-cache:/root/.cache/huggingface # Bind-Mount. FLUX.1-dev ~24 GB on disk!
|
||||
# Wenn flux auf der gleichen Maschine
|
||||
# wie xtts laeuft: ../xtts/hf-cache
|
||||
# symlinken um den Cache zu teilen.
|
||||
restart: unless-stopped
|
||||
@@ -0,0 +1,9 @@
|
||||
diffusers>=0.30.0
|
||||
transformers>=4.43.0
|
||||
accelerate>=0.33.0
|
||||
sentencepiece>=0.2.0
|
||||
protobuf>=4.25.0
|
||||
pillow>=10.0.0
|
||||
huggingface_hub>=0.24.0
|
||||
websockets>=12.0
|
||||
numpy>=1.24
|
||||
Reference in New Issue
Block a user