Files
minmal-file-cloud-email-pim…/backend/app/api/backup.py
T
Stefan Hacker c6fe2c590f feat: Backup & Restore mit Chunked Upload fuer grosse Dateien
Backup:
- Erstellt streaming ZIP mit SQLite-DB (via sqlite3.backup API) +
  allen hochgeladenen Dateien + metadata.json
- Download als ZIP direkt aus dem Admin-Panel

Restore:
- Kleine Backups (<100MB): Direkter Upload
- Grosse Backups (>100MB bis TB+): Chunked Upload in 10MB-Stuecken
  mit Fortschrittsanzeige
- DB-Merge: INSERT OR REPLACE auf gemeinsame Spalten, so dass neue
  Schema-Aenderungen erhalten bleiben und Backup-Daten eingefuegt werden
- Dateien werden in data/files/ wiederhergestellt
- Restore-Anleitung direkt in der UI mit Hinweis auf SECRET_KEY/JWT_SECRET_KEY

Backend: /admin/backup, /admin/restore/init, /admin/restore/chunk,
         /admin/restore/finalize, /admin/restore/direct

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-11 17:55:39 +02:00

356 lines
12 KiB
Python

import io
import json
import os
import shutil
import sqlite3
import tempfile
import uuid
import zipfile
from datetime import datetime, timezone
from pathlib import Path
from flask import request, jsonify, current_app, Response
from app.api import api_bp
from app.api.auth import admin_required
from app.extensions import db
# Store active chunked uploads in memory (upload_id -> metadata)
_active_uploads = {}
# --- Backup ---
@api_bp.route('/admin/backup', methods=['POST'])
@admin_required
def create_backup():
"""Create a full backup as streaming ZIP download.
Contains:
- metadata.json (version, timestamp, stats)
- database.sqlite3 (copy of the SQLite DB)
- files/ (all uploaded user files)
"""
db_uri = current_app.config['SQLALCHEMY_DATABASE_URI']
db_path = db_uri.replace('sqlite:///', '')
upload_path = Path(current_app.config['UPLOAD_PATH'])
# Gather stats
from app.models.user import User
from app.models.file import File
user_count = User.query.count()
file_count = File.query.filter_by(is_folder=False).count()
metadata = {
'version': '1.0',
'created_at': datetime.now(timezone.utc).isoformat(),
'user_count': user_count,
'file_count': file_count,
'description': 'Mini-Cloud Full Backup',
}
def generate_zip():
"""Stream ZIP file in chunks."""
# We create the ZIP in a temp file to handle large data
with tempfile.NamedTemporaryFile(delete=False, suffix='.zip') as tmp:
tmp_path = tmp.name
try:
with zipfile.ZipFile(tmp_path, 'w', zipfile.ZIP_DEFLATED,
allowZip64=True) as zf:
# 1. Metadata
zf.writestr('metadata.json', json.dumps(metadata, indent=2))
# 2. SQLite database (safe copy via backup API)
db_backup_path = tmp_path + '.db'
try:
source = sqlite3.connect(db_path)
dest = sqlite3.connect(db_backup_path)
source.backup(dest)
source.close()
dest.close()
zf.write(db_backup_path, 'database.sqlite3')
finally:
if os.path.exists(db_backup_path):
os.unlink(db_backup_path)
# 3. User files
if upload_path.exists():
for file_path in upload_path.rglob('*'):
if file_path.is_file():
arcname = 'files/' + str(file_path.relative_to(upload_path))
zf.write(str(file_path), arcname)
# Stream the ZIP file in 1MB chunks
with open(tmp_path, 'rb') as f:
while True:
chunk = f.read(1024 * 1024)
if not chunk:
break
yield chunk
finally:
if os.path.exists(tmp_path):
os.unlink(tmp_path)
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
filename = f'minicloud_backup_{timestamp}.zip'
return Response(
generate_zip(),
mimetype='application/zip',
headers={
'Content-Disposition': f'attachment; filename="{filename}"',
'X-Accel-Buffering': 'no',
},
)
# --- Chunked Restore Upload ---
@api_bp.route('/admin/restore/init', methods=['POST'])
@admin_required
def restore_init():
"""Initialize a chunked restore upload.
Returns an upload_id to use for subsequent chunk uploads.
"""
data = request.get_json() or {}
total_size = data.get('total_size', 0)
total_chunks = data.get('total_chunks', 0)
filename = data.get('filename', 'backup.zip')
upload_id = str(uuid.uuid4())
upload_dir = Path(tempfile.gettempdir()) / f'minicloud_restore_{upload_id}'
upload_dir.mkdir(parents=True, exist_ok=True)
_active_uploads[upload_id] = {
'dir': str(upload_dir),
'total_size': total_size,
'total_chunks': total_chunks,
'received_chunks': set(),
'filename': filename,
'created_at': datetime.now(timezone.utc).isoformat(),
}
return jsonify({
'upload_id': upload_id,
'chunk_size': 10 * 1024 * 1024, # 10 MB recommended chunk size
}), 200
@api_bp.route('/admin/restore/chunk', methods=['POST'])
@admin_required
def restore_chunk():
"""Upload a single chunk of the backup file."""
upload_id = request.form.get('upload_id', '')
chunk_number = int(request.form.get('chunk_number', 0))
if upload_id not in _active_uploads:
return jsonify({'error': 'Upload-ID unbekannt. Bitte neu starten.'}), 404
if 'chunk' not in request.files:
return jsonify({'error': 'Kein Chunk gesendet'}), 400
upload_info = _active_uploads[upload_id]
upload_dir = Path(upload_info['dir'])
chunk_file = request.files['chunk']
chunk_path = upload_dir / f'chunk_{chunk_number:06d}'
chunk_file.save(str(chunk_path))
upload_info['received_chunks'].add(chunk_number)
return jsonify({
'chunk_number': chunk_number,
'received': len(upload_info['received_chunks']),
'total': upload_info['total_chunks'],
}), 200
@api_bp.route('/admin/restore/finalize', methods=['POST'])
@admin_required
def restore_finalize():
"""Assemble chunks and perform the restore."""
data = request.get_json() or {}
upload_id = data.get('upload_id', '')
if upload_id not in _active_uploads:
return jsonify({'error': 'Upload-ID unbekannt'}), 404
upload_info = _active_uploads[upload_id]
upload_dir = Path(upload_info['dir'])
try:
# Assemble chunks into ZIP
zip_path = upload_dir / 'backup.zip'
with open(str(zip_path), 'wb') as outfile:
chunk_num = 0
while True:
chunk_path = upload_dir / f'chunk_{chunk_num:06d}'
if not chunk_path.exists():
break
with open(str(chunk_path), 'rb') as cf:
shutil.copyfileobj(cf, outfile)
chunk_num += 1
if chunk_num == 0:
return jsonify({'error': 'Keine Chunks gefunden'}), 400
# Verify it's a valid ZIP
if not zipfile.is_zipfile(str(zip_path)):
return jsonify({'error': 'Ungueltige ZIP-Datei'}), 400
# Perform restore
result = _perform_restore(str(zip_path))
return jsonify(result), 200
except Exception as e:
return jsonify({'error': f'Restore fehlgeschlagen: {str(e)}'}), 500
finally:
# Cleanup
shutil.rmtree(str(upload_dir), ignore_errors=True)
_active_uploads.pop(upload_id, None)
@api_bp.route('/admin/restore/direct', methods=['POST'])
@admin_required
def restore_direct():
"""Direct restore from a small backup file (non-chunked, for files < 500MB)."""
if 'file' not in request.files:
return jsonify({'error': 'Keine Datei gesendet'}), 400
backup_file = request.files['file']
with tempfile.NamedTemporaryFile(delete=False, suffix='.zip') as tmp:
backup_file.save(tmp.name)
tmp_path = tmp.name
try:
if not zipfile.is_zipfile(tmp_path):
return jsonify({'error': 'Ungueltige ZIP-Datei'}), 400
result = _perform_restore(tmp_path)
return jsonify(result), 200
except Exception as e:
return jsonify({'error': f'Restore fehlgeschlagen: {str(e)}'}), 500
finally:
os.unlink(tmp_path)
def _perform_restore(zip_path):
"""Perform the actual restore from a validated ZIP file.
Strategy for DB merge:
- Open backup SQLite as a separate connection
- For each table in the backup, read all rows
- INSERT OR REPLACE into the live database
- This preserves any new tables/columns in the current schema
- Existing data with same primary keys gets overwritten by backup data
"""
upload_path = Path(current_app.config['UPLOAD_PATH'])
stats = {'users': 0, 'files_db': 0, 'files_disk': 0, 'tables': []}
with tempfile.TemporaryDirectory() as extract_dir:
extract_path = Path(extract_dir)
# Extract ZIP
with zipfile.ZipFile(zip_path, 'r') as zf:
zf.extractall(str(extract_path))
# Read metadata
metadata_path = extract_path / 'metadata.json'
metadata = {}
if metadata_path.exists():
metadata = json.loads(metadata_path.read_text())
stats['backup_date'] = metadata.get('created_at', 'Unbekannt')
stats['backup_users'] = metadata.get('user_count', '?')
stats['backup_files'] = metadata.get('file_count', '?')
# Restore database via merge
backup_db_path = extract_path / 'database.sqlite3'
if backup_db_path.exists():
live_db_uri = current_app.config['SQLALCHEMY_DATABASE_URI']
live_db_path = live_db_uri.replace('sqlite:///', '')
backup_conn = sqlite3.connect(str(backup_db_path))
backup_conn.row_factory = sqlite3.Row
live_conn = sqlite3.connect(live_db_path)
try:
# Get list of tables in backup
backup_tables = [row[0] for row in
backup_conn.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'"
).fetchall()]
# Get list of tables in live DB
live_tables = [row[0] for row in
live_conn.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'"
).fetchall()]
for table in backup_tables:
if table == 'alembic_version':
continue
if table not in live_tables:
continue
# Get column names from live table
live_cols = [col[1] for col in
live_conn.execute(f'PRAGMA table_info("{table}")').fetchall()]
backup_cols = [col[1] for col in
backup_conn.execute(f'PRAGMA table_info("{table}")').fetchall()]
# Use only columns that exist in both
common_cols = [c for c in backup_cols if c in live_cols]
if not common_cols:
continue
cols_str = ', '.join(f'"{c}"' for c in common_cols)
placeholders = ', '.join('?' for _ in common_cols)
rows = backup_conn.execute(
f'SELECT {cols_str} FROM "{table}"'
).fetchall()
row_count = 0
for row in rows:
try:
live_conn.execute(
f'INSERT OR REPLACE INTO "{table}" ({cols_str}) VALUES ({placeholders})',
tuple(row)
)
row_count += 1
except Exception:
continue
if row_count > 0:
stats['tables'].append({'name': table, 'rows': row_count})
live_conn.commit()
finally:
backup_conn.close()
live_conn.close()
# Restore files
backup_files_dir = extract_path / 'files'
if backup_files_dir.exists():
upload_path.mkdir(parents=True, exist_ok=True)
file_count = 0
for src_file in backup_files_dir.rglob('*'):
if src_file.is_file():
rel_path = src_file.relative_to(backup_files_dir)
dest = upload_path / rel_path
dest.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(str(src_file), str(dest))
file_count += 1
stats['files_disk'] = file_count
stats['success'] = True
stats['message'] = 'Restore erfolgreich abgeschlossen'
return stats