import io import os import hashlib from datetime import datetime, timezone, timedelta from pathlib import Path from flask import request, jsonify, current_app, send_file from app.api import api_bp from app.api.auth import token_required from app.api.files import _get_file_or_403 from app.extensions import db from app.models.settings import AppSettings @api_bp.route('/files//preview', methods=['GET']) @token_required def preview_file(file_id): from flask import after_this_request @after_this_request def add_no_cache(response): response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate' response.headers['Pragma'] = 'no-cache' return response user = request.current_user f, err = _get_file_or_403(file_id, user, 'read') if err: return err if f.is_folder: return jsonify({'error': 'Ordner haben keine Vorschau'}), 400 mime = f.mime_type or '' filepath = Path(current_app.config['UPLOAD_PATH']) / str(f.owner_id) / f.storage_path if not filepath.exists(): return jsonify({'error': 'Datei nicht gefunden'}), 404 # PDF -> just return URL for PDF.js to load if 'pdf' in mime: return jsonify({ 'type': 'pdf', 'url': f'/api/files/{file_id}/download', 'name': f.name, }), 200 # DOCX if mime in ('application/vnd.openxmlformats-officedocument.wordprocessingml.document', 'application/msword') or f.name.endswith('.docx'): try: html = _convert_docx(filepath) return jsonify({'type': 'html', 'content': html, 'name': f.name}), 200 except Exception as e: return jsonify({'error': f'DOCX-Vorschau fehlgeschlagen: {str(e)}'}), 500 # XLSX if mime in ('application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', 'application/vnd.ms-excel') or f.name.endswith('.xlsx'): try: data = _convert_xlsx(filepath) return jsonify({'type': 'spreadsheet', 'sheets': data, 'name': f.name}), 200 except Exception as e: return jsonify({'error': f'XLSX-Vorschau fehlgeschlagen: {str(e)}'}), 500 # PPTX if mime in ('application/vnd.openxmlformats-officedocument.presentationml.presentation', 'application/vnd.ms-powerpoint') or f.name.endswith('.pptx'): try: slides = _convert_pptx(filepath) return jsonify({'type': 'slides', 'slides': slides, 'name': f.name}), 200 except Exception as e: return jsonify({'error': f'PPTX-Vorschau fehlgeschlagen: {str(e)}'}), 500 # Images if mime.startswith('image/'): return jsonify({ 'type': 'image', 'url': f'/api/files/{file_id}/download', 'name': f.name, }), 200 # Text files if mime.startswith('text/') or f.name.endswith(('.txt', '.md', '.json', '.xml', '.csv', '.py', '.js', '.html', '.css', '.yml', '.yaml')): try: content = filepath.read_text(encoding='utf-8', errors='replace')[:100000] return jsonify({'type': 'text', 'content': content, 'name': f.name}), 200 except Exception: pass return jsonify({'type': 'unsupported', 'name': f.name, 'mime_type': mime}), 200 def _convert_docx(filepath): from docx import Document doc = Document(str(filepath)) html_parts = [] for para in doc.paragraphs: style = para.style.name if para.style else '' text = para.text if not text.strip(): html_parts.append('
') continue if 'Heading 1' in style: html_parts.append(f'

{text}

') elif 'Heading 2' in style: html_parts.append(f'

{text}

') elif 'Heading 3' in style: html_parts.append(f'

{text}

') else: # Check for bold/italic runs run_html = '' for run in para.runs: t = run.text if run.bold: t = f'{t}' if run.italic: t = f'{t}' if run.underline: t = f'{t}' run_html += t html_parts.append(f'

{run_html}

') # Tables for table in doc.tables: html_parts.append('') for i, row in enumerate(table.rows): html_parts.append('') tag = 'th' if i == 0 else 'td' for cell in row.cells: html_parts.append(f'<{tag}>{cell.text}') html_parts.append('') html_parts.append('
') return '\n'.join(html_parts) def _convert_xlsx(filepath): from openpyxl import load_workbook wb = load_workbook(str(filepath), read_only=True, data_only=True) sheets = [] for ws in wb.worksheets: rows = [] for row in ws.iter_rows(max_row=500, values_only=True): rows.append([str(cell) if cell is not None else '' for cell in row]) sheets.append({ 'name': ws.title, 'rows': rows, }) wb.close() return sheets def _convert_pptx(filepath): from pptx import Presentation prs = Presentation(str(filepath)) slides = [] for i, slide in enumerate(prs.slides): content_parts = [] for shape in slide.shapes: if shape.has_text_frame: for para in shape.text_frame.paragraphs: text = para.text.strip() if text: content_parts.append(f'

{text}

') if shape.has_table: table_html = '' for row in shape.table.rows: table_html += '' for cell in row.cells: table_html += f'' table_html += '' table_html += '
{cell.text}
' content_parts.append(table_html) slides.append({ 'index': i, 'html': '\n'.join(content_parts) if content_parts else '

(Leere Folie)

', }) return slides # ========== Save (write back edited documents) ========== @api_bp.route('/files//save', methods=['POST']) @token_required def save_file(file_id): """Save edited content back to the original file format.""" user = request.current_user f, err = _get_file_or_403(file_id, user, 'write') if err: return err if f.is_folder: return jsonify({'error': 'Ordner koennen nicht gespeichert werden'}), 400 data = request.get_json() save_type = data.get('type', '') filepath = Path(current_app.config['UPLOAD_PATH']) / str(f.owner_id) / f.storage_path try: if save_type == 'html' and f.name.endswith('.docx'): _save_html_to_docx(filepath, data.get('content', '')) elif save_type == 'spreadsheet' and (f.name.endswith('.xlsx') or f.name.endswith('.xls')): _save_sheets_to_xlsx(filepath, data.get('sheets', [])) elif save_type == 'text': filepath.write_text(data.get('content', ''), encoding='utf-8') else: return jsonify({'error': f'Speichern fuer diesen Typ nicht unterstuetzt'}), 400 # Update file metadata f.size = os.path.getsize(str(filepath)) h = hashlib.sha256() with open(str(filepath), 'rb') as fh: for chunk in iter(lambda: fh.read(8192), b''): h.update(chunk) f.checksum = h.hexdigest() f.updated_at = datetime.now(timezone.utc) db.session.commit() return jsonify({'message': 'Gespeichert', 'size': f.size}), 200 except Exception as e: return jsonify({'error': f'Speichern fehlgeschlagen: {str(e)}'}), 500 def _save_html_to_docx(filepath, html_content): """Convert HTML content back to DOCX.""" from docx import Document from docx.shared import Pt import re doc = Document() # Simple HTML to DOCX conversion # Strip tags and convert basic elements html = html_content.replace('\r\n', '\n').replace('\r', '\n') # Process block elements blocks = re.split(r'<(?:p|h[1-3]|br\s*/?)(?:\s[^>]*)?>|', html) # Find tag types tags = re.findall(r'<(/?(?:p|h[1-3]|br\s*/?)(?:\s[^>]*)?)>', html) current_tag = 'p' for i, block in enumerate(blocks): text = re.sub(r'<[^>]+>', '', block).strip() if not text: if i < len(tags): tag = tags[i] if i < len(tags) else '' if tag.startswith('h1'): current_tag = 'h1' elif tag.startswith('h2'): current_tag = 'h2' elif tag.startswith('h3'): current_tag = 'h3' else: current_tag = 'p' continue if current_tag == 'h1': doc.add_heading(text, level=1) elif current_tag == 'h2': doc.add_heading(text, level=2) elif current_tag == 'h3': doc.add_heading(text, level=3) else: # Check for bold/italic in remaining inline tags para = doc.add_paragraph() # Simple inline parsing parts = re.split(r'()', block) bold = False italic = False underline = False for part in parts: if part in ('', ''): bold = True elif part in ('', ''): bold = False elif part in ('', ''): italic = True elif part in ('', ''): italic = False elif part in ('',): underline = True elif part in ('',): underline = False else: clean = re.sub(r'<[^>]+>', '', part) if clean: run = para.add_run(clean) run.bold = bold run.italic = italic run.underline = underline current_tag = 'p' doc.save(str(filepath)) def _save_sheets_to_xlsx(filepath, sheets_data): """Save spreadsheet data back to XLSX.""" from openpyxl import Workbook wb = Workbook() # Remove default sheet wb.remove(wb.active) for sheet_data in sheets_data: ws = wb.create_sheet(title=sheet_data.get('name', 'Sheet')) for ri, row in enumerate(sheet_data.get('rows', []), 1): for ci, cell_value in enumerate(row, 1): val = cell_value # Try to convert to number try: if '.' in str(val): val = float(val) else: val = int(val) except (ValueError, TypeError): pass ws.cell(row=ri, column=ci, value=val if val != '' else None) wb.save(str(filepath)) # ========== OnlyOffice Integration ========== @api_bp.route('/files//onlyoffice-config', methods=['GET']) @token_required def onlyoffice_config(file_id): """Generate OnlyOffice editor config for a file.""" import secrets as _secrets user = request.current_user f, err = _get_file_or_403(file_id, user, 'read') if err: return err oo_url = os.environ.get('ONLYOFFICE_URL', '') if not oo_url: return jsonify({'error': 'OnlyOffice nicht konfiguriert', 'available': False}), 200 # Determine document type ext = f.name.rsplit('.', 1)[-1].lower() if '.' in f.name else '' doc_type_map = { 'docx': 'word', 'doc': 'word', 'odt': 'word', 'rtf': 'word', 'txt': 'word', 'xlsx': 'cell', 'xls': 'cell', 'ods': 'cell', 'csv': 'cell', 'pptx': 'slide', 'ppt': 'slide', 'odp': 'slide', } doc_type = doc_type_map.get(ext) if not doc_type: return jsonify({'error': 'Dateityp nicht von OnlyOffice unterstuetzt', 'available': False}), 200 # Check write permission can_write = _get_file_or_403(file_id, user, 'write')[1] is None # Generate a callback key for this editing session callback_key = _secrets.token_urlsafe(16) AppSettings.set(f'oo_callback_{callback_key}', str(file_id)) # Build the config internal_url = os.environ.get('ONLYOFFICE_INTERNAL_URL', 'http://minicloud:5000') # Generate a one-time file access key (no JWT needed, simpler for OnlyOffice) file_access_key = _secrets.token_urlsafe(32) AppSettings.set(f'oo_file_{file_access_key}', f'{file_id}:{user.id}') config = { 'available': True, 'onlyoffice_url': oo_url.rstrip('/'), 'config': { 'document': { 'fileType': ext, 'key': f'{file_id}_{int(datetime.now(timezone.utc).timestamp())}_{callback_key[:8]}', 'title': f.name, 'url': f'{internal_url}/api/files/oo-download/{file_access_key}', }, 'documentType': doc_type, 'editorConfig': { 'callbackUrl': f'{internal_url}/api/files/onlyoffice-callback?key={callback_key}', 'mode': 'edit' if can_write else 'view', 'forcesavetype': 0, 'lang': 'de', 'user': { 'id': str(user.id), 'name': user.username, }, }, }, } # Sign config with JWT for OnlyOffice validation jwt_secret = os.environ.get('JWT_SECRET_KEY', '') if jwt_secret: import jwt as pyjwt config['config']['token'] = pyjwt.encode(config['config'], jwt_secret, algorithm='HS256') return jsonify(config), 200 @api_bp.route('/files/oo-download/', methods=['GET']) def oo_download(access_key): """Dedicated download endpoint for OnlyOffice - no JWT auth, uses one-time key.""" data = AppSettings.get(f'oo_file_{access_key}', '') if not data: return jsonify({'error': 'Ungueltiger Zugangsschluessel'}), 403 parts = data.split(':') if len(parts) != 2: return jsonify({'error': 'Ungueltiger Zugangsschluessel'}), 403 file_id = int(parts[0]) from app.models.file import File f = db.session.get(File, file_id) if not f: return jsonify({'error': 'Datei nicht gefunden'}), 404 filepath = Path(current_app.config['UPLOAD_PATH']) / str(f.owner_id) / f.storage_path if not filepath.exists(): return jsonify({'error': 'Datei nicht auf Datentraeger'}), 404 return send_file(str(filepath), mimetype=f.mime_type or 'application/octet-stream', as_attachment=False, download_name=f.name) @api_bp.route('/files/onlyoffice-callback', methods=['POST']) def onlyoffice_callback(): """Callback from OnlyOffice when document is saved. OnlyOffice sends status codes: 1 = editing, 2 = ready to save, 4 = closed no changes, 6 = force save Must always return {"error": 0} for success. """ try: import jwt as pyjwt import urllib.request import shutil jwt_secret = os.environ.get('JWT_SECRET_KEY', '') # Get callback data - may be JWT-wrapped data = request.get_json(silent=True) or {} print(f'[OnlyOffice Callback] Raw status={data.get("status")}, key={request.args.get("key", "")}') # If body contains a JWT token, decode it to get the real data if 'token' in data and jwt_secret: try: decoded = pyjwt.decode(data['token'], jwt_secret, algorithms=['HS256']) data = decoded except Exception as e: print(f'[OnlyOffice Callback] Body JWT decode failed (using raw data): {e}') status = data.get('status', 0) callback_key = request.args.get('key', '') # Status 2 or 6: save the document if status in (2, 6): file_id_str = AppSettings.get(f'oo_callback_{callback_key}', '') if file_id_str: download_url = data.get('url', '') if download_url: from app.models.file import File file_id = int(file_id_str) f = db.session.get(File, file_id) if f and f.storage_path: filepath = Path(current_app.config['UPLOAD_PATH']) / str(f.owner_id) / f.storage_path print(f'[OnlyOffice Callback] Saving file {f.name} from {download_url}') # Download saved doc from OnlyOffice req = urllib.request.Request(download_url) with urllib.request.urlopen(req, timeout=30) as resp, \ open(str(filepath), 'wb') as out: shutil.copyfileobj(resp, out) # Update metadata f.size = os.path.getsize(str(filepath)) h = hashlib.sha256() with open(str(filepath), 'rb') as fh: for chunk in iter(lambda: fh.read(8192), b''): h.update(chunk) f.checksum = h.hexdigest() f.updated_at = datetime.now(timezone.utc) db.session.commit() print(f'[OnlyOffice Callback] File saved: {f.name} ({f.size} bytes)') # Status 2, 4, 6: cleanup if status in (2, 4, 6): try: setting = db.session.get(AppSettings, f'oo_callback_{callback_key}') if setting: db.session.delete(setting) db.session.commit() except Exception: pass except Exception as e: print(f'[OnlyOffice Callback] ERROR: {e}') import traceback traceback.print_exc() # Still return error: 0 so OnlyOffice doesn't retry endlessly return jsonify({'error': 0}), 200 return jsonify({'error': 0}), 200 @api_bp.route('/files/onlyoffice-status', methods=['GET']) @token_required def onlyoffice_status(): """Check if OnlyOffice is available.""" oo_url = os.environ.get('ONLYOFFICE_URL', '') return jsonify({ 'available': bool(oo_url), 'url': oo_url, }), 200