import io import os import hashlib from datetime import datetime, timezone from pathlib import Path from flask import request, jsonify, current_app, send_file from app.api import api_bp from app.api.auth import token_required from app.api.files import _get_file_or_403 from app.extensions import db @api_bp.route('/files//preview', methods=['GET']) @token_required def preview_file(file_id): user = request.current_user f, err = _get_file_or_403(file_id, user, 'read') if err: return err if f.is_folder: return jsonify({'error': 'Ordner haben keine Vorschau'}), 400 mime = f.mime_type or '' filepath = Path(current_app.config['UPLOAD_PATH']) / str(f.owner_id) / f.storage_path if not filepath.exists(): return jsonify({'error': 'Datei nicht gefunden'}), 404 # PDF -> just return URL for PDF.js to load if 'pdf' in mime: return jsonify({ 'type': 'pdf', 'url': f'/api/files/{file_id}/download', 'name': f.name, }), 200 # DOCX if mime in ('application/vnd.openxmlformats-officedocument.wordprocessingml.document', 'application/msword') or f.name.endswith('.docx'): try: html = _convert_docx(filepath) return jsonify({'type': 'html', 'content': html, 'name': f.name}), 200 except Exception as e: return jsonify({'error': f'DOCX-Vorschau fehlgeschlagen: {str(e)}'}), 500 # XLSX if mime in ('application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', 'application/vnd.ms-excel') or f.name.endswith('.xlsx'): try: data = _convert_xlsx(filepath) return jsonify({'type': 'spreadsheet', 'sheets': data, 'name': f.name}), 200 except Exception as e: return jsonify({'error': f'XLSX-Vorschau fehlgeschlagen: {str(e)}'}), 500 # PPTX if mime in ('application/vnd.openxmlformats-officedocument.presentationml.presentation', 'application/vnd.ms-powerpoint') or f.name.endswith('.pptx'): try: slides = _convert_pptx(filepath) return jsonify({'type': 'slides', 'slides': slides, 'name': f.name}), 200 except Exception as e: return jsonify({'error': f'PPTX-Vorschau fehlgeschlagen: {str(e)}'}), 500 # Images if mime.startswith('image/'): return jsonify({ 'type': 'image', 'url': f'/api/files/{file_id}/download', 'name': f.name, }), 200 # Text files if mime.startswith('text/') or f.name.endswith(('.txt', '.md', '.json', '.xml', '.csv', '.py', '.js', '.html', '.css', '.yml', '.yaml')): try: content = filepath.read_text(encoding='utf-8', errors='replace')[:100000] return jsonify({'type': 'text', 'content': content, 'name': f.name}), 200 except Exception: pass return jsonify({'type': 'unsupported', 'name': f.name, 'mime_type': mime}), 200 def _convert_docx(filepath): from docx import Document doc = Document(str(filepath)) html_parts = [] for para in doc.paragraphs: style = para.style.name if para.style else '' text = para.text if not text.strip(): html_parts.append('
') continue if 'Heading 1' in style: html_parts.append(f'

{text}

') elif 'Heading 2' in style: html_parts.append(f'

{text}

') elif 'Heading 3' in style: html_parts.append(f'

{text}

') else: # Check for bold/italic runs run_html = '' for run in para.runs: t = run.text if run.bold: t = f'{t}' if run.italic: t = f'{t}' if run.underline: t = f'{t}' run_html += t html_parts.append(f'

{run_html}

') # Tables for table in doc.tables: html_parts.append('') for i, row in enumerate(table.rows): html_parts.append('') tag = 'th' if i == 0 else 'td' for cell in row.cells: html_parts.append(f'<{tag}>{cell.text}') html_parts.append('') html_parts.append('
') return '\n'.join(html_parts) def _convert_xlsx(filepath): from openpyxl import load_workbook wb = load_workbook(str(filepath), read_only=True, data_only=True) sheets = [] for ws in wb.worksheets: rows = [] for row in ws.iter_rows(max_row=500, values_only=True): rows.append([str(cell) if cell is not None else '' for cell in row]) sheets.append({ 'name': ws.title, 'rows': rows, }) wb.close() return sheets def _convert_pptx(filepath): from pptx import Presentation prs = Presentation(str(filepath)) slides = [] for i, slide in enumerate(prs.slides): content_parts = [] for shape in slide.shapes: if shape.has_text_frame: for para in shape.text_frame.paragraphs: text = para.text.strip() if text: content_parts.append(f'

{text}

') if shape.has_table: table_html = '' for row in shape.table.rows: table_html += '' for cell in row.cells: table_html += f'' table_html += '' table_html += '
{cell.text}
' content_parts.append(table_html) slides.append({ 'index': i, 'html': '\n'.join(content_parts) if content_parts else '

(Leere Folie)

', }) return slides # ========== Save (write back edited documents) ========== @api_bp.route('/files//save', methods=['POST']) @token_required def save_file(file_id): """Save edited content back to the original file format.""" user = request.current_user f, err = _get_file_or_403(file_id, user, 'write') if err: return err if f.is_folder: return jsonify({'error': 'Ordner koennen nicht gespeichert werden'}), 400 data = request.get_json() save_type = data.get('type', '') filepath = Path(current_app.config['UPLOAD_PATH']) / str(f.owner_id) / f.storage_path try: if save_type == 'html' and f.name.endswith('.docx'): _save_html_to_docx(filepath, data.get('content', '')) elif save_type == 'spreadsheet' and (f.name.endswith('.xlsx') or f.name.endswith('.xls')): _save_sheets_to_xlsx(filepath, data.get('sheets', [])) elif save_type == 'text': filepath.write_text(data.get('content', ''), encoding='utf-8') else: return jsonify({'error': f'Speichern fuer diesen Typ nicht unterstuetzt'}), 400 # Update file metadata f.size = os.path.getsize(str(filepath)) h = hashlib.sha256() with open(str(filepath), 'rb') as fh: for chunk in iter(lambda: fh.read(8192), b''): h.update(chunk) f.checksum = h.hexdigest() f.updated_at = datetime.now(timezone.utc) db.session.commit() return jsonify({'message': 'Gespeichert', 'size': f.size}), 200 except Exception as e: return jsonify({'error': f'Speichern fehlgeschlagen: {str(e)}'}), 500 def _save_html_to_docx(filepath, html_content): """Convert HTML content back to DOCX.""" from docx import Document from docx.shared import Pt import re doc = Document() # Simple HTML to DOCX conversion # Strip tags and convert basic elements html = html_content.replace('\r\n', '\n').replace('\r', '\n') # Process block elements blocks = re.split(r'<(?:p|h[1-3]|br\s*/?)(?:\s[^>]*)?>|', html) # Find tag types tags = re.findall(r'<(/?(?:p|h[1-3]|br\s*/?)(?:\s[^>]*)?)>', html) current_tag = 'p' for i, block in enumerate(blocks): text = re.sub(r'<[^>]+>', '', block).strip() if not text: if i < len(tags): tag = tags[i] if i < len(tags) else '' if tag.startswith('h1'): current_tag = 'h1' elif tag.startswith('h2'): current_tag = 'h2' elif tag.startswith('h3'): current_tag = 'h3' else: current_tag = 'p' continue if current_tag == 'h1': doc.add_heading(text, level=1) elif current_tag == 'h2': doc.add_heading(text, level=2) elif current_tag == 'h3': doc.add_heading(text, level=3) else: # Check for bold/italic in remaining inline tags para = doc.add_paragraph() # Simple inline parsing parts = re.split(r'()', block) bold = False italic = False underline = False for part in parts: if part in ('', ''): bold = True elif part in ('', ''): bold = False elif part in ('', ''): italic = True elif part in ('', ''): italic = False elif part in ('',): underline = True elif part in ('',): underline = False else: clean = re.sub(r'<[^>]+>', '', part) if clean: run = para.add_run(clean) run.bold = bold run.italic = italic run.underline = underline current_tag = 'p' doc.save(str(filepath)) def _save_sheets_to_xlsx(filepath, sheets_data): """Save spreadsheet data back to XLSX.""" from openpyxl import Workbook wb = Workbook() # Remove default sheet wb.remove(wb.active) for sheet_data in sheets_data: ws = wb.create_sheet(title=sheet_data.get('name', 'Sheet')) for ri, row in enumerate(sheet_data.get('rows', []), 1): for ci, cell_value in enumerate(row, 1): val = cell_value # Try to convert to number try: if '.' in str(val): val = float(val) else: val = int(val) except (ValueError, TypeError): pass ws.cell(row=ri, column=ci, value=val if val != '' else None) wb.save(str(filepath))