Chunked upload for large OVAs

The Convert tab now slices the OVA into 16 MiB chunks and PUTs them with an offset parameter instead of one multipart POST. Avoids the temp-file double-copy and tmpfs exhaustion that broke uploads of very large (>50 GiB) appliances. - POST /api/uploads creates an upload session, GET/PUT/DELETE manage a partial file written directly in upload_dir (seek+write, no copy) - POST /api/jobs now accepts JSON {upload_id, vmid, storage} to finalize the chunked upload; multipart form path kept for CLI/curl - Client: File.slice() loop with per-chunk XHR, progress bar driven by bytes sent / total, partial files resumable via upload_id Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-21 13:27:57 +02:00 · 2026-04-21 13:27:57 +02:00 · a08efb3795
parent dcbc957334
commit a08efb3795
2 changed files with 186 additions and 16 deletions
--- a/src/ova2vzdump/templates/index.html
+++ b/src/ova2vzdump/templates/index.html
@ -139,6 +139,48 @@ const bar = document.getElementById('bar');
 const msg = document.getElementById('msg');
 const result = document.getElementById('result');

+const CHUNK_SIZE = 16 * 1024 * 1024;  // 16 MiB per PUT
+
+function putChunk(uploadId, offset, blob) {
+  // XHR (not fetch) so we can abort and — if we ever want — observe
+  // intra-chunk upload progress. One chunk = one HTTP request.
+  return new Promise((resolve, reject) => {
+    const xhr = new XMLHttpRequest();
+    xhr.open('PUT', '/api/uploads/' + uploadId + '?offset=' + offset);
+    xhr.onload = () => {
+      if (xhr.status >= 200 && xhr.status < 300) {
+        try { resolve(JSON.parse(xhr.responseText)); }
+        catch (e) { reject(new Error('bad server response')); }
+      } else {
+        reject(new Error('chunk @' + offset + ' failed: ' +
+                         xhr.status + ' ' + xhr.statusText));
+      }
+    };
+    xhr.onerror = () => reject(new Error('network error @' + offset));
+    xhr.send(blob);
+  });
+}
+
+async function uploadFileChunked(file, onProgress) {
+  const initResp = await fetch('/api/uploads', {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify({ filename: file.name }),
+  });
+  if (!initResp.ok) throw new Error('failed to start upload');
+  const { upload_id } = await initResp.json();
+
+  let offset = 0;
+  while (offset < file.size) {
+    const end = Math.min(offset + CHUNK_SIZE, file.size);
+    const chunk = file.slice(offset, end);
+    await putChunk(upload_id, offset, chunk);
+    offset = end;
+    onProgress(offset, file.size);
+  }
+  return upload_id;
+}
+
 convertForm.addEventListener('submit', async (e) => {
  e.preventDefault();
  goConvert.disabled = true;
@ -148,17 +190,47 @@ convertForm.addEventListener('submit', async (e) => {
  msg.textContent = '';
  result.innerHTML = '';

-  const fd = new FormData(convertForm);
+  const fileInput = convertForm.querySelector('input[name="ova"]');
+  const file = fileInput.files[0];
+  if (!file) {
+    msg.innerHTML = '<span class="err">no file selected</span>';
+    goConvert.disabled = false;
+    return;
+  }
+  const vmid = convertForm.querySelector('[name="vmid"]').value;
+  const storage = convertForm.querySelector('[name="storage"]').value;
+
+  let upload_id;
+  try {
+    upload_id = await uploadFileChunked(file, (sent, total) => {
+      bar.value = (sent / total) * 100;
+      stageEl.textContent = 'uploading ' + formatBytes(sent) +
+        ' / ' + formatBytes(total);
+    });
+  } catch (err) {
+    msg.innerHTML = '<span class="err">upload failed: ' + err.message + '</span>';
+    goConvert.disabled = false;
+    return;
+  }
+
+  stageEl.textContent = 'queued';
+  bar.value = 0;
+
  let resp;
-  try { resp = await fetch('/api/jobs', { method: 'POST', body: fd }); }
-  catch (err) {
-    msg.innerHTML = '<span class="err">upload failed: ' + err + '</span>';
+  try {
+    resp = await fetch('/api/jobs', {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({ upload_id, vmid: Number(vmid), storage }),
+    });
+  } catch (err) {
+    msg.innerHTML = '<span class="err">failed to start job: ' + err + '</span>';
    goConvert.disabled = false;
    return;
  }
  if (!resp.ok) {
    const body = await resp.text();
-    msg.innerHTML = '<span class="err">upload failed: ' + body + '</span>';
+    msg.innerHTML = '<span class="err">failed to start job: ' + body + '</span>';
    goConvert.disabled = false;
    return;
  }
--- a/src/ova2vzdump/web.py
+++ b/src/ova2vzdump/web.py
@ -48,6 +48,16 @@ class Job:
 _JOBS: dict[str, Job] = {}
 _JOBS_LOCK = threading.Lock()

+# In-memory registry of chunked upload sessions. `path` is the partial
+# file on disk; `received` is the highest byte offset actually written
+# (so clients can resume after a dropped connection).
+_UPLOADS: dict[str, dict] = {}
+_UPLOADS_LOCK = threading.Lock()
+
+# How much we pull from the WSGI input stream per sub-read. Bounds
+# per-request memory during a chunk upload.
+_STREAM_SUBCHUNK = 4 * 1024 * 1024
+

 def _run_job(job: Job, ova_path: Path, output_dir: Path) -> None:
    def progress(stage: str, pct: float) -> None:
@ -93,19 +103,107 @@ def create_app(upload_dir: Path, output_dir: Path) -> Flask:
    def index() -> str:
        return render_template("index.html")

+    @app.post("/api/uploads")
+    def upload_init():
+        """Create a chunked-upload session. Client then PUTs chunks to
+        /api/uploads/<upload_id>?offset=N until the file is complete."""
+        data = request.get_json(silent=True) or {}
+        filename = secure_filename(data.get("filename", "")) or "input.ova"
+        upload_id = uuid.uuid4().hex
+        path = upload_dir / f"partial-{upload_id}-{filename}"
+        path.touch()
+        with _UPLOADS_LOCK:
+            _UPLOADS[upload_id] = {
+                "filename": filename, "path": path, "received": 0,
+            }
+        return jsonify(upload_id=upload_id, filename=filename, received=0)
+
+    @app.get("/api/uploads/<upload_id>")
+    def upload_status(upload_id: str):
+        with _UPLOADS_LOCK:
+            info = _UPLOADS.get(upload_id)
+        if not info:
+            return jsonify(error="not found"), 404
+        return jsonify(
+            filename=info["filename"], received=info["received"],
+        )
+
+    @app.put("/api/uploads/<upload_id>")
+    def upload_chunk(upload_id: str):
+        with _UPLOADS_LOCK:
+            info = _UPLOADS.get(upload_id)
+        if not info:
+            return jsonify(error="not found"), 404
+        try:
+            offset = int(request.args.get("offset", "0"))
+        except ValueError:
+            return jsonify(error="bad offset"), 400
+        if offset < 0:
+            return jsonify(error="negative offset"), 400
+
+        written = 0
+        with open(info["path"], "r+b") as f:
+            f.seek(offset)
+            while True:
+                buf = request.stream.read(_STREAM_SUBCHUNK)
+                if not buf:
+                    break
+                f.write(buf)
+                written += len(buf)
+        # Track the highest byte actually persisted so resume can query.
+        new_high = offset + written
+        if new_high > info["received"]:
+            info["received"] = new_high
+        return jsonify(received=info["received"])
+
+    @app.delete("/api/uploads/<upload_id>")
+    def upload_abort(upload_id: str):
+        with _UPLOADS_LOCK:
+            info = _UPLOADS.pop(upload_id, None)
+        if info:
+            try:
+                info["path"].unlink(missing_ok=True)
+            except OSError:
+                pass
+        return jsonify(ok=True)
+
    @app.post("/api/jobs")
    def create_job():
-        if "ova" not in request.files:
-            return jsonify(error="no file 'ova' in multipart form"), 400
-        f = request.files["ova"]
-        if not f.filename:
-            return jsonify(error="empty filename"), 400
-        vmid = int(request.form.get("vmid", "100"))
-        storage = request.form.get("storage", "local-lvm")
-        filename = secure_filename(f.filename) or "input.ova"
-        job_id = uuid.uuid4().hex
-        ova_path = upload_dir / f"{job_id}-{filename}"
-        f.save(ova_path)
+        # Two input paths, decided by Content-Type:
+        #   - application/json + {upload_id, ...} → finalize a chunked upload
+        #   - multipart/form-data → legacy single-request upload (CLI/curl)
+        if request.is_json:
+            data = request.get_json() or {}
+            upload_id = data.get("upload_id")
+            if not upload_id:
+                return jsonify(error="upload_id required"), 400
+            with _UPLOADS_LOCK:
+                info = _UPLOADS.pop(upload_id, None)
+            if not info:
+                return jsonify(error="upload session not found"), 404
+
+            try:
+                vmid = int(data.get("vmid", 100))
+            except (TypeError, ValueError):
+                return jsonify(error="vmid must be an integer"), 400
+            storage = data.get("storage", "local-lvm")
+
+            job_id = uuid.uuid4().hex
+            ova_path = upload_dir / f"{job_id}-{info['filename']}"
+            info["path"].rename(ova_path)
+            filename = info["filename"]
+        else:
+            if "ova" not in request.files:
+                return jsonify(error="no file 'ova' in multipart form"), 400
+            f = request.files["ova"]
+            if not f.filename:
+                return jsonify(error="empty filename"), 400
+            vmid = int(request.form.get("vmid", "100"))
+            storage = request.form.get("storage", "local-lvm")
+            filename = secure_filename(f.filename) or "input.ova"
+            job_id = uuid.uuid4().hex
+            ova_path = upload_dir / f"{job_id}-{filename}"
+            f.save(ova_path)

        job = Job(id=job_id, ova_name=filename, vmid=vmid, storage=storage)
        with _JOBS_LOCK: