minmal-file-cloud-email-pim…/backend/app/api/backup.py

import io
import json
import os
import shutil
import sqlite3
import tempfile
import uuid
import zipfile
from datetime import datetime, timezone
from pathlib import Path

from flask import request, jsonify, current_app, Response

from app.api import api_bp
from app.api.auth import admin_required
from app.extensions import db


# Store active chunked uploads in memory (upload_id -> metadata)
_active_uploads = {}


# --- Backup ---

@api_bp.route('/admin/backup', methods=['POST'])
@admin_required
def create_backup():
    """Create a full backup as streaming ZIP download.

    Contains:
    - metadata.json (version, timestamp, stats)
    - database.sqlite3 (copy of the SQLite DB)
    - files/ (all uploaded user files)
    """
    db_uri = current_app.config['SQLALCHEMY_DATABASE_URI']
    db_path = db_uri.replace('sqlite:///', '')
    upload_path = Path(current_app.config['UPLOAD_PATH'])

    # Gather stats
    from app.models.user import User
    from app.models.file import File
    user_count = User.query.count()
    file_count = File.query.filter_by(is_folder=False).count()

    metadata = {
        'version': '1.0',
        'created_at': datetime.now(timezone.utc).isoformat(),
        'user_count': user_count,
        'file_count': file_count,
        'description': 'Mini-Cloud Full Backup',
    }

    def generate_zip():
        """Stream ZIP file in chunks."""
        # We create the ZIP in a temp file to handle large data
        with tempfile.NamedTemporaryFile(delete=False, suffix='.zip') as tmp:
            tmp_path = tmp.name

        try:
            with zipfile.ZipFile(tmp_path, 'w', zipfile.ZIP_DEFLATED,
                                 allowZip64=True) as zf:
                # 1. Metadata
                zf.writestr('metadata.json', json.dumps(metadata, indent=2))

                # 2. SQLite database (safe copy via backup API)
                db_backup_path = tmp_path + '.db'
                try:
                    source = sqlite3.connect(db_path)
                    dest = sqlite3.connect(db_backup_path)
                    source.backup(dest)
                    source.close()
                    dest.close()
                    zf.write(db_backup_path, 'database.sqlite3')
                finally:
                    if os.path.exists(db_backup_path):
                        os.unlink(db_backup_path)

                # 3. User files
                if upload_path.exists():
                    for file_path in upload_path.rglob('*'):
                        if file_path.is_file():
                            arcname = 'files/' + str(file_path.relative_to(upload_path))
                            zf.write(str(file_path), arcname)

            # Stream the ZIP file in 1MB chunks
            with open(tmp_path, 'rb') as f:
                while True:
                    chunk = f.read(1024 * 1024)
                    if not chunk:
                        break
                    yield chunk
        finally:
            if os.path.exists(tmp_path):
                os.unlink(tmp_path)

    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    filename = f'minicloud_backup_{timestamp}.zip'

    return Response(
        generate_zip(),
        mimetype='application/zip',
        headers={
            'Content-Disposition': f'attachment; filename="{filename}"',
            'X-Accel-Buffering': 'no',
        },
    )


# --- Chunked Restore Upload ---

@api_bp.route('/admin/restore/init', methods=['POST'])
@admin_required
def restore_init():
    """Initialize a chunked restore upload.

    Returns an upload_id to use for subsequent chunk uploads.
    """
    data = request.get_json() or {}
    total_size = data.get('total_size', 0)
    total_chunks = data.get('total_chunks', 0)
    filename = data.get('filename', 'backup.zip')

    upload_id = str(uuid.uuid4())
    upload_dir = Path(tempfile.gettempdir()) / f'minicloud_restore_{upload_id}'
    upload_dir.mkdir(parents=True, exist_ok=True)

    _active_uploads[upload_id] = {
        'dir': str(upload_dir),
        'total_size': total_size,
        'total_chunks': total_chunks,
        'received_chunks': set(),
        'filename': filename,
        'created_at': datetime.now(timezone.utc).isoformat(),
    }

    return jsonify({
        'upload_id': upload_id,
        'chunk_size': 10 * 1024 * 1024,  # 10 MB recommended chunk size
    }), 200


@api_bp.route('/admin/restore/chunk', methods=['POST'])
@admin_required
def restore_chunk():
    """Upload a single chunk of the backup file."""
    upload_id = request.form.get('upload_id', '')
    chunk_number = int(request.form.get('chunk_number', 0))

    if upload_id not in _active_uploads:
        return jsonify({'error': 'Upload-ID unbekannt. Bitte neu starten.'}), 404

    if 'chunk' not in request.files:
        return jsonify({'error': 'Kein Chunk gesendet'}), 400

    upload_info = _active_uploads[upload_id]
    upload_dir = Path(upload_info['dir'])

    chunk_file = request.files['chunk']
    chunk_path = upload_dir / f'chunk_{chunk_number:06d}'
    chunk_file.save(str(chunk_path))

    upload_info['received_chunks'].add(chunk_number)

    return jsonify({
        'chunk_number': chunk_number,
        'received': len(upload_info['received_chunks']),
        'total': upload_info['total_chunks'],
    }), 200


@api_bp.route('/admin/restore/finalize', methods=['POST'])
@admin_required
def restore_finalize():
    """Assemble chunks and perform the restore."""
    data = request.get_json() or {}
    upload_id = data.get('upload_id', '')

    if upload_id not in _active_uploads:
        return jsonify({'error': 'Upload-ID unbekannt'}), 404

    upload_info = _active_uploads[upload_id]
    upload_dir = Path(upload_info['dir'])

    try:
        # Assemble chunks into ZIP
        zip_path = upload_dir / 'backup.zip'
        with open(str(zip_path), 'wb') as outfile:
            chunk_num = 0
            while True:
                chunk_path = upload_dir / f'chunk_{chunk_num:06d}'
                if not chunk_path.exists():
                    break
                with open(str(chunk_path), 'rb') as cf:
                    shutil.copyfileobj(cf, outfile)
                chunk_num += 1

        if chunk_num == 0:
            return jsonify({'error': 'Keine Chunks gefunden'}), 400

        # Verify it's a valid ZIP
        if not zipfile.is_zipfile(str(zip_path)):
            return jsonify({'error': 'Ungueltige ZIP-Datei'}), 400

        # Perform restore
        result = _perform_restore(str(zip_path))

        return jsonify(result), 200

    except Exception as e:
        return jsonify({'error': f'Restore fehlgeschlagen: {str(e)}'}), 500

    finally:
        # Cleanup
        shutil.rmtree(str(upload_dir), ignore_errors=True)
        _active_uploads.pop(upload_id, None)


@api_bp.route('/admin/restore/direct', methods=['POST'])
@admin_required
def restore_direct():
    """Direct restore from a small backup file (non-chunked, for files < 500MB)."""
    if 'file' not in request.files:
        return jsonify({'error': 'Keine Datei gesendet'}), 400

    backup_file = request.files['file']

    with tempfile.NamedTemporaryFile(delete=False, suffix='.zip') as tmp:
        backup_file.save(tmp.name)
        tmp_path = tmp.name

    try:
        if not zipfile.is_zipfile(tmp_path):
            return jsonify({'error': 'Ungueltige ZIP-Datei'}), 400

        result = _perform_restore(tmp_path)
        return jsonify(result), 200

    except Exception as e:
        return jsonify({'error': f'Restore fehlgeschlagen: {str(e)}'}), 500
    finally:
        os.unlink(tmp_path)


def _perform_restore(zip_path):
    """Perform the actual restore from a validated ZIP file.

    Strategy for DB merge:
    - Open backup SQLite as a separate connection
    - For each table in the backup, read all rows
    - INSERT OR REPLACE into the live database
    - This preserves any new tables/columns in the current schema
    - Existing data with same primary keys gets overwritten by backup data
    """
    upload_path = Path(current_app.config['UPLOAD_PATH'])
    stats = {'users': 0, 'files_db': 0, 'files_disk': 0, 'tables': []}

    with tempfile.TemporaryDirectory() as extract_dir:
        extract_path = Path(extract_dir)

        # Extract ZIP
        with zipfile.ZipFile(zip_path, 'r') as zf:
            zf.extractall(str(extract_path))

        # Read metadata
        metadata_path = extract_path / 'metadata.json'
        metadata = {}
        if metadata_path.exists():
            metadata = json.loads(metadata_path.read_text())
        stats['backup_date'] = metadata.get('created_at', 'Unbekannt')
        stats['backup_users'] = metadata.get('user_count', '?')
        stats['backup_files'] = metadata.get('file_count', '?')

        # Restore database via merge
        backup_db_path = extract_path / 'database.sqlite3'
        if backup_db_path.exists():
            live_db_uri = current_app.config['SQLALCHEMY_DATABASE_URI']
            live_db_path = live_db_uri.replace('sqlite:///', '')

            backup_conn = sqlite3.connect(str(backup_db_path))
            backup_conn.row_factory = sqlite3.Row
            live_conn = sqlite3.connect(live_db_path)

            try:
                # Get list of tables in backup
                backup_tables = [row[0] for row in
                                 backup_conn.execute(
                                     "SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'"
                                 ).fetchall()]

                # Get list of tables in live DB
                live_tables = [row[0] for row in
                               live_conn.execute(
                                   "SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'"
                               ).fetchall()]

                for table in backup_tables:
                    if table == 'alembic_version':
                        continue
                    if table not in live_tables:
                        continue

                    # Get column names from live table
                    live_cols = [col[1] for col in
                                 live_conn.execute(f'PRAGMA table_info("{table}")').fetchall()]
                    backup_cols = [col[1] for col in
                                   backup_conn.execute(f'PRAGMA table_info("{table}")').fetchall()]

                    # Use only columns that exist in both
                    common_cols = [c for c in backup_cols if c in live_cols]
                    if not common_cols:
                        continue

                    cols_str = ', '.join(f'"{c}"' for c in common_cols)
                    placeholders = ', '.join('?' for _ in common_cols)

                    rows = backup_conn.execute(
                        f'SELECT {cols_str} FROM "{table}"'
                    ).fetchall()

                    row_count = 0
                    for row in rows:
                        try:
                            live_conn.execute(
                                f'INSERT OR REPLACE INTO "{table}" ({cols_str}) VALUES ({placeholders})',
                                tuple(row)
                            )
                            row_count += 1
                        except Exception:
                            continue

                    if row_count > 0:
                        stats['tables'].append({'name': table, 'rows': row_count})

                live_conn.commit()
            finally:
                backup_conn.close()
                live_conn.close()

        # Restore files
        backup_files_dir = extract_path / 'files'
        if backup_files_dir.exists():
            upload_path.mkdir(parents=True, exist_ok=True)
            file_count = 0
            for src_file in backup_files_dir.rglob('*'):
                if src_file.is_file():
                    rel_path = src_file.relative_to(backup_files_dir)
                    dest = upload_path / rel_path
                    dest.parent.mkdir(parents=True, exist_ok=True)
                    shutil.copy2(str(src_file), str(dest))
                    file_count += 1
            stats['files_disk'] = file_count

    stats['success'] = True
    stats['message'] = 'Restore erfolgreich abgeschlossen'
    return stats