minmal-file-cloud-email-pim.../backend/app/api/office.py

import io
from pathlib import Path

from flask import request, jsonify, current_app, send_file

from app.api import api_bp
from app.api.auth import token_required
from app.api.files import _get_file_or_403
from app.extensions import db


@api_bp.route('/files/<int:file_id>/preview', methods=['GET'])
@token_required
def preview_file(file_id):
    user = request.current_user
    f, err = _get_file_or_403(file_id, user, 'read')
    if err:
        return err

    if f.is_folder:
        return jsonify({'error': 'Ordner haben keine Vorschau'}), 400

    mime = f.mime_type or ''
    filepath = Path(current_app.config['UPLOAD_PATH']) / str(f.owner_id) / f.storage_path

    if not filepath.exists():
        return jsonify({'error': 'Datei nicht gefunden'}), 404

    # PDF -> just return URL for PDF.js to load
    if 'pdf' in mime:
        return jsonify({
            'type': 'pdf',
            'url': f'/api/files/{file_id}/download',
            'name': f.name,
        }), 200

    # DOCX
    if mime in ('application/vnd.openxmlformats-officedocument.wordprocessingml.document',
                'application/msword') or f.name.endswith('.docx'):
        try:
            html = _convert_docx(filepath)
            return jsonify({'type': 'html', 'content': html, 'name': f.name}), 200
        except Exception as e:
            return jsonify({'error': f'DOCX-Vorschau fehlgeschlagen: {str(e)}'}), 500

    # XLSX
    if mime in ('application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
                'application/vnd.ms-excel') or f.name.endswith('.xlsx'):
        try:
            data = _convert_xlsx(filepath)
            return jsonify({'type': 'spreadsheet', 'sheets': data, 'name': f.name}), 200
        except Exception as e:
            return jsonify({'error': f'XLSX-Vorschau fehlgeschlagen: {str(e)}'}), 500

    # PPTX
    if mime in ('application/vnd.openxmlformats-officedocument.presentationml.presentation',
                'application/vnd.ms-powerpoint') or f.name.endswith('.pptx'):
        try:
            slides = _convert_pptx(filepath)
            return jsonify({'type': 'slides', 'slides': slides, 'name': f.name}), 200
        except Exception as e:
            return jsonify({'error': f'PPTX-Vorschau fehlgeschlagen: {str(e)}'}), 500

    # Images
    if mime.startswith('image/'):
        return jsonify({
            'type': 'image',
            'url': f'/api/files/{file_id}/download',
            'name': f.name,
        }), 200

    # Text files
    if mime.startswith('text/') or f.name.endswith(('.txt', '.md', '.json', '.xml', '.csv',
                                                     '.py', '.js', '.html', '.css', '.yml', '.yaml')):
        try:
            content = filepath.read_text(encoding='utf-8', errors='replace')[:100000]
            return jsonify({'type': 'text', 'content': content, 'name': f.name}), 200
        except Exception:
            pass

    return jsonify({'type': 'unsupported', 'name': f.name, 'mime_type': mime}), 200


def _convert_docx(filepath):
    from docx import Document
    doc = Document(str(filepath))
    html_parts = []
    for para in doc.paragraphs:
        style = para.style.name if para.style else ''
        text = para.text
        if not text.strip():
            html_parts.append('<br/>')
            continue
        if 'Heading 1' in style:
            html_parts.append(f'<h1>{text}</h1>')
        elif 'Heading 2' in style:
            html_parts.append(f'<h2>{text}</h2>')
        elif 'Heading 3' in style:
            html_parts.append(f'<h3>{text}</h3>')
        else:
            # Check for bold/italic runs
            run_html = ''
            for run in para.runs:
                t = run.text
                if run.bold:
                    t = f'<strong>{t}</strong>'
                if run.italic:
                    t = f'<em>{t}</em>'
                if run.underline:
                    t = f'<u>{t}</u>'
                run_html += t
            html_parts.append(f'<p>{run_html}</p>')

    # Tables
    for table in doc.tables:
        html_parts.append('<table border="1" cellpadding="4" cellspacing="0" style="border-collapse: collapse; width: 100%">')
        for i, row in enumerate(table.rows):
            html_parts.append('<tr>')
            tag = 'th' if i == 0 else 'td'
            for cell in row.cells:
                html_parts.append(f'<{tag}>{cell.text}</{tag}>')
            html_parts.append('</tr>')
        html_parts.append('</table>')

    return '\n'.join(html_parts)


def _convert_xlsx(filepath):
    from openpyxl import load_workbook
    wb = load_workbook(str(filepath), read_only=True, data_only=True)
    sheets = []
    for ws in wb.worksheets:
        rows = []
        for row in ws.iter_rows(max_row=500, values_only=True):
            rows.append([str(cell) if cell is not None else '' for cell in row])
        sheets.append({
            'name': ws.title,
            'rows': rows,
        })
    wb.close()
    return sheets


def _convert_pptx(filepath):
    from pptx import Presentation
    prs = Presentation(str(filepath))
    slides = []
    for i, slide in enumerate(prs.slides):
        content_parts = []
        for shape in slide.shapes:
            if shape.has_text_frame:
                for para in shape.text_frame.paragraphs:
                    text = para.text.strip()
                    if text:
                        content_parts.append(f'<p>{text}</p>')
            if shape.has_table:
                table_html = '<table border="1" cellpadding="4" style="border-collapse: collapse">'
                for row in shape.table.rows:
                    table_html += '<tr>'
                    for cell in row.cells:
                        table_html += f'<td>{cell.text}</td>'
                    table_html += '</tr>'
                table_html += '</table>'
                content_parts.append(table_html)

        slides.append({
            'index': i,
            'html': '\n'.join(content_parts) if content_parts else '<p>(Leere Folie)</p>',
        })
    return slides