From a08efb37954128394309ab2765ff551284443ab8 Mon Sep 17 00:00:00 2001 From: Stefan Hacker Date: Tue, 21 Apr 2026 13:27:57 +0200 Subject: [PATCH] Chunked upload for large OVAs The Convert tab now slices the OVA into 16 MiB chunks and PUTs them with an offset parameter instead of one multipart POST. Avoids the temp-file double-copy and tmpfs exhaustion that broke uploads of very large (>50 GiB) appliances. - POST /api/uploads creates an upload session, GET/PUT/DELETE manage a partial file written directly in upload_dir (seek+write, no copy) - POST /api/jobs now accepts JSON {upload_id, vmid, storage} to finalize the chunked upload; multipart form path kept for CLI/curl - Client: File.slice() loop with per-chunk XHR, progress bar driven by bytes sent / total, partial files resumable via upload_id Co-Authored-By: Claude Opus 4.7 (1M context) --- src/ova2vzdump/templates/index.html | 82 +++++++++++++++++-- src/ova2vzdump/web.py | 120 +++++++++++++++++++++++++--- 2 files changed, 186 insertions(+), 16 deletions(-) diff --git a/src/ova2vzdump/templates/index.html b/src/ova2vzdump/templates/index.html index 395503b..fca0497 100644 --- a/src/ova2vzdump/templates/index.html +++ b/src/ova2vzdump/templates/index.html @@ -139,6 +139,48 @@ const bar = document.getElementById('bar'); const msg = document.getElementById('msg'); const result = document.getElementById('result'); +const CHUNK_SIZE = 16 * 1024 * 1024; // 16 MiB per PUT + +function putChunk(uploadId, offset, blob) { + // XHR (not fetch) so we can abort and — if we ever want — observe + // intra-chunk upload progress. One chunk = one HTTP request. + return new Promise((resolve, reject) => { + const xhr = new XMLHttpRequest(); + xhr.open('PUT', '/api/uploads/' + uploadId + '?offset=' + offset); + xhr.onload = () => { + if (xhr.status >= 200 && xhr.status < 300) { + try { resolve(JSON.parse(xhr.responseText)); } + catch (e) { reject(new Error('bad server response')); } + } else { + reject(new Error('chunk @' + offset + ' failed: ' + + xhr.status + ' ' + xhr.statusText)); + } + }; + xhr.onerror = () => reject(new Error('network error @' + offset)); + xhr.send(blob); + }); +} + +async function uploadFileChunked(file, onProgress) { + const initResp = await fetch('/api/uploads', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ filename: file.name }), + }); + if (!initResp.ok) throw new Error('failed to start upload'); + const { upload_id } = await initResp.json(); + + let offset = 0; + while (offset < file.size) { + const end = Math.min(offset + CHUNK_SIZE, file.size); + const chunk = file.slice(offset, end); + await putChunk(upload_id, offset, chunk); + offset = end; + onProgress(offset, file.size); + } + return upload_id; +} + convertForm.addEventListener('submit', async (e) => { e.preventDefault(); goConvert.disabled = true; @@ -148,17 +190,47 @@ convertForm.addEventListener('submit', async (e) => { msg.textContent = ''; result.innerHTML = ''; - const fd = new FormData(convertForm); + const fileInput = convertForm.querySelector('input[name="ova"]'); + const file = fileInput.files[0]; + if (!file) { + msg.innerHTML = 'no file selected'; + goConvert.disabled = false; + return; + } + const vmid = convertForm.querySelector('[name="vmid"]').value; + const storage = convertForm.querySelector('[name="storage"]').value; + + let upload_id; + try { + upload_id = await uploadFileChunked(file, (sent, total) => { + bar.value = (sent / total) * 100; + stageEl.textContent = 'uploading ' + formatBytes(sent) + + ' / ' + formatBytes(total); + }); + } catch (err) { + msg.innerHTML = 'upload failed: ' + err.message + ''; + goConvert.disabled = false; + return; + } + + stageEl.textContent = 'queued'; + bar.value = 0; + let resp; - try { resp = await fetch('/api/jobs', { method: 'POST', body: fd }); } - catch (err) { - msg.innerHTML = 'upload failed: ' + err + ''; + try { + resp = await fetch('/api/jobs', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ upload_id, vmid: Number(vmid), storage }), + }); + } catch (err) { + msg.innerHTML = 'failed to start job: ' + err + ''; goConvert.disabled = false; return; } if (!resp.ok) { const body = await resp.text(); - msg.innerHTML = 'upload failed: ' + body + ''; + msg.innerHTML = 'failed to start job: ' + body + ''; goConvert.disabled = false; return; } diff --git a/src/ova2vzdump/web.py b/src/ova2vzdump/web.py index 34865cb..2928fd3 100644 --- a/src/ova2vzdump/web.py +++ b/src/ova2vzdump/web.py @@ -48,6 +48,16 @@ class Job: _JOBS: dict[str, Job] = {} _JOBS_LOCK = threading.Lock() +# In-memory registry of chunked upload sessions. `path` is the partial +# file on disk; `received` is the highest byte offset actually written +# (so clients can resume after a dropped connection). +_UPLOADS: dict[str, dict] = {} +_UPLOADS_LOCK = threading.Lock() + +# How much we pull from the WSGI input stream per sub-read. Bounds +# per-request memory during a chunk upload. +_STREAM_SUBCHUNK = 4 * 1024 * 1024 + def _run_job(job: Job, ova_path: Path, output_dir: Path) -> None: def progress(stage: str, pct: float) -> None: @@ -93,19 +103,107 @@ def create_app(upload_dir: Path, output_dir: Path) -> Flask: def index() -> str: return render_template("index.html") + @app.post("/api/uploads") + def upload_init(): + """Create a chunked-upload session. Client then PUTs chunks to + /api/uploads/?offset=N until the file is complete.""" + data = request.get_json(silent=True) or {} + filename = secure_filename(data.get("filename", "")) or "input.ova" + upload_id = uuid.uuid4().hex + path = upload_dir / f"partial-{upload_id}-{filename}" + path.touch() + with _UPLOADS_LOCK: + _UPLOADS[upload_id] = { + "filename": filename, "path": path, "received": 0, + } + return jsonify(upload_id=upload_id, filename=filename, received=0) + + @app.get("/api/uploads/") + def upload_status(upload_id: str): + with _UPLOADS_LOCK: + info = _UPLOADS.get(upload_id) + if not info: + return jsonify(error="not found"), 404 + return jsonify( + filename=info["filename"], received=info["received"], + ) + + @app.put("/api/uploads/") + def upload_chunk(upload_id: str): + with _UPLOADS_LOCK: + info = _UPLOADS.get(upload_id) + if not info: + return jsonify(error="not found"), 404 + try: + offset = int(request.args.get("offset", "0")) + except ValueError: + return jsonify(error="bad offset"), 400 + if offset < 0: + return jsonify(error="negative offset"), 400 + + written = 0 + with open(info["path"], "r+b") as f: + f.seek(offset) + while True: + buf = request.stream.read(_STREAM_SUBCHUNK) + if not buf: + break + f.write(buf) + written += len(buf) + # Track the highest byte actually persisted so resume can query. + new_high = offset + written + if new_high > info["received"]: + info["received"] = new_high + return jsonify(received=info["received"]) + + @app.delete("/api/uploads/") + def upload_abort(upload_id: str): + with _UPLOADS_LOCK: + info = _UPLOADS.pop(upload_id, None) + if info: + try: + info["path"].unlink(missing_ok=True) + except OSError: + pass + return jsonify(ok=True) + @app.post("/api/jobs") def create_job(): - if "ova" not in request.files: - return jsonify(error="no file 'ova' in multipart form"), 400 - f = request.files["ova"] - if not f.filename: - return jsonify(error="empty filename"), 400 - vmid = int(request.form.get("vmid", "100")) - storage = request.form.get("storage", "local-lvm") - filename = secure_filename(f.filename) or "input.ova" - job_id = uuid.uuid4().hex - ova_path = upload_dir / f"{job_id}-{filename}" - f.save(ova_path) + # Two input paths, decided by Content-Type: + # - application/json + {upload_id, ...} → finalize a chunked upload + # - multipart/form-data → legacy single-request upload (CLI/curl) + if request.is_json: + data = request.get_json() or {} + upload_id = data.get("upload_id") + if not upload_id: + return jsonify(error="upload_id required"), 400 + with _UPLOADS_LOCK: + info = _UPLOADS.pop(upload_id, None) + if not info: + return jsonify(error="upload session not found"), 404 + + try: + vmid = int(data.get("vmid", 100)) + except (TypeError, ValueError): + return jsonify(error="vmid must be an integer"), 400 + storage = data.get("storage", "local-lvm") + + job_id = uuid.uuid4().hex + ova_path = upload_dir / f"{job_id}-{info['filename']}" + info["path"].rename(ova_path) + filename = info["filename"] + else: + if "ova" not in request.files: + return jsonify(error="no file 'ova' in multipart form"), 400 + f = request.files["ova"] + if not f.filename: + return jsonify(error="empty filename"), 400 + vmid = int(request.form.get("vmid", "100")) + storage = request.form.get("storage", "local-lvm") + filename = secure_filename(f.filename) or "input.ova" + job_id = uuid.uuid4().hex + ova_path = upload_dir / f"{job_id}-{filename}" + f.save(ova_path) job = Job(id=job_id, ova_name=filename, vmid=vmid, storage=storage) with _JOBS_LOCK: