From a4e39332c7d368618298053e7f2e8f62392bc3fa Mon Sep 17 00:00:00 2001 From: duffyduck Date: Fri, 20 Mar 2026 16:22:38 +0100 Subject: [PATCH] added amazon importer and logging smtp --- Dockerfile | 28 +- app/amazon_processor.py | 1384 ++++++++++++++++++++++++++++++++++ app/database.py | 166 +++- app/mail_processor.py | 294 +++++--- app/main.py | 153 +++- app/scanner.py | 18 +- app/scheduler.py | 12 + app/smb_processor.py | 247 +++--- app/static/style.css | 15 + app/templates/base.html | 7 +- app/templates/log.html | 77 +- app/templates/platforms.html | 363 +++++++++ app/templates/scan.html | 9 +- app/templates/settings.html | 98 ++- docker-compose.yml | 1 + requirements.txt | 2 + 16 files changed, 2619 insertions(+), 255 deletions(-) create mode 100644 app/amazon_processor.py create mode 100644 app/templates/platforms.html diff --git a/Dockerfile b/Dockerfile index c3a24c9..77acb20 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,15 +1,39 @@ FROM python:3.12-slim -RUN apt-get update && apt-get install -y --no-install-recommends libzbar0 && rm -rf /var/lib/apt/lists/* +RUN apt-get update && apt-get install -y --no-install-recommends \ + libzbar0 \ + # Chromium dependencies for Playwright + libglib2.0-0t64 \ + libnss3 \ + libnspr4 \ + libatk1.0-0t64 \ + libatk-bridge2.0-0t64 \ + libcups2t64 \ + libdrm2 \ + libexpat1 \ + libxcomposite1 \ + libxdamage1 \ + libxext6 \ + libxfixes3 \ + libxrandr2 \ + libgbm1 \ + libxkbcommon0 \ + libpango-1.0-0 \ + libcairo2 \ + libasound2t64 \ + libatspi2.0-0t64 \ + fonts-liberation \ + && rm -rf /var/lib/apt/lists/* WORKDIR /app COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt +RUN playwright install chromium COPY app/ ./app/ -RUN mkdir -p /data/uploads +RUN mkdir -p /data/uploads /data/amazon_session EXPOSE 8000 diff --git a/app/amazon_processor.py b/app/amazon_processor.py new file mode 100644 index 0000000..b188b59 --- /dev/null +++ b/app/amazon_processor.py @@ -0,0 +1,1384 @@ +import asyncio +import logging +import os +import random +import re +import tempfile +from datetime import datetime, timedelta +from pathlib import Path + +from app.database import get_settings, save_settings, add_log_entry, is_invoice_downloaded, mark_invoice_downloaded +from app.mail_processor import _connect_smtp, _build_forward_email, _send_with_log + +logger = logging.getLogger(__name__) + +SESSION_DIR = Path(os.environ.get("AMAZON_SESSION_DIR", "/data/amazon_session")) +DEBUG_DIR = Path(os.environ.get("UPLOAD_DIR", "/data/uploads")) / "amazon_debug" + +# Login state machine +_login_state = {"status": "idle", "message": ""} +_login_lock = asyncio.Lock() +_otp_future: asyncio.Future | None = None +_browser_context = None +_playwright_instance = None + +# Process lock to prevent concurrent runs +_process_lock = asyncio.Lock() +# Flag: True while process_amazon is actively working (page consumed but session valid) +_processing_active = False + +# Interactive login session (browser page kept alive for user interaction) +_interactive_page = None + + +async def _human_delay(min_s: float = 1.0, max_s: float = 3.0): + """Random delay to mimic human behavior.""" + await asyncio.sleep(random.uniform(min_s, max_s)) + + +async def _apply_stealth_to_context(context): + """Apply stealth measures to the browser context (all pages).""" + try: + from playwright_stealth import Stealth + stealth = Stealth() + await stealth.apply_stealth_async(context) + logger.info("Stealth erfolgreich auf Browser-Kontext angewendet") + except ImportError: + logger.warning("playwright-stealth nicht installiert, überspringe") + except Exception as e: + logger.warning(f"Stealth konnte nicht angewendet werden: {e}") + + +async def _add_virtual_authenticator(page): + """Add virtual WebAuthn authenticator to prevent passkey dialogs.""" + try: + client = await page.context.new_cdp_session(page) + await client.send("WebAuthn.enable") + await client.send("WebAuthn.addVirtualAuthenticator", { + "options": { + "protocol": "ctap2", + "transport": "internal", + "hasResidentKey": True, + "hasUserVerification": True, + "isUserVerified": True, + "automaticPresenceSimulation": True, + } + }) + logger.debug("Virtueller WebAuthn-Authenticator hinzugefügt") + except Exception as e: + logger.debug(f"Virtueller Authenticator fehlgeschlagen: {e}") + + +async def _get_browser_context(): + """Get or create persistent Chromium browser context.""" + global _browser_context, _playwright_instance + + if _browser_context is not None: + try: + # Check if context is still alive + pages = _browser_context.pages + return _browser_context + except Exception: + _browser_context = None + + from playwright.async_api import async_playwright + + SESSION_DIR.mkdir(parents=True, exist_ok=True) + + # Clean up stale Chromium lock files from previous container runs + for lock_file in ["SingletonLock", "SingletonSocket", "SingletonCookie"]: + lock_path = SESSION_DIR / lock_file + if lock_path.exists(): + try: + lock_path.unlink() + logger.info(f"Stale Lock-File entfernt: {lock_file}") + except Exception: + pass + + if _playwright_instance is None: + _playwright_instance = await async_playwright().start() + + _browser_context = await _playwright_instance.chromium.launch_persistent_context( + user_data_dir=str(SESSION_DIR), + headless=True, + locale="de-DE", + user_agent=( + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36" + ), + viewport={"width": 1280, "height": 800}, + args=[ + "--disable-blink-features=AutomationControlled", + "--disable-gpu", + "--disable-dev-shm-usage", + "--disable-extensions", + "--disable-background-networking", + "--disable-translate", + "--no-first-run", + "--no-sandbox", + ], + ) + await _apply_stealth_to_context(_browser_context) + return _browser_context + + +async def close_browser_context(): + """Close browser context and playwright instance.""" + global _browser_context, _playwright_instance + if _browser_context is not None: + try: + await _browser_context.close() + except Exception: + pass + _browser_context = None + if _playwright_instance is not None: + try: + await _playwright_instance.stop() + except Exception: + pass + _playwright_instance = None + + +def get_login_state() -> dict: + """Return current login state for polling.""" + return dict(_login_state) + + +async def _save_debug(page, name: str): + """Save screenshot and HTML dump for debugging (max 50 files).""" + ts = datetime.now().strftime('%Y%m%d_%H%M%S') + try: + DEBUG_DIR.mkdir(parents=True, exist_ok=True) + # Limit to 50 files - delete oldest if over limit + existing = sorted(DEBUG_DIR.iterdir(), key=lambda p: p.stat().st_mtime) + while len(existing) > 48: # leave room for 2 new files + existing.pop(0).unlink() + except Exception as e: + logger.error(f"Amazon Debug-Verzeichnis Fehler: {e}") + return + # Save HTML (most reliable) + try: + html_path = DEBUG_DIR / f"{name}_{ts}.html" + content = await page.content() + html_path.write_text(content, encoding="utf-8") + logger.info(f"Amazon Debug-HTML gespeichert: {html_path} ({len(content)} Bytes)") + except Exception as e: + logger.error(f"Amazon Debug-HTML fehlgeschlagen: {e}") + # Save screenshot + try: + png_path = DEBUG_DIR / f"{name}_{ts}.png" + await page.screenshot(path=str(png_path), full_page=True) + logger.info(f"Amazon Debug-Screenshot gespeichert: {png_path}") + except Exception as e: + logger.error(f"Amazon Debug-Screenshot fehlgeschlagen: {e}") + + +async def check_session_valid() -> bool: + """Check if an active Amazon session exists. + + Returns True if we have a live interactive page, or if processing is active + (page consumed but still working), or if login is in progress. + """ + has_page = _interactive_page is not None + is_logging_in = _login_state.get("status") in ("interactive", "logging_in") + logger.info(f"Amazon Session-Check: has_page={has_page}, login_active={is_logging_in}, processing={_processing_active}") + return has_page or is_logging_in or _processing_active + + +def is_interactive_login_active() -> bool: + """Check if interactive login modal is currently open (browser in use by user). + + Also returns True if user has logged in but hasn't closed the modal yet. + """ + status = _login_state.get("status", "idle") + # Active if login dialog is open (interactive, logging_in, or logged_in but page still held) + if status in ("interactive", "logging_in"): + return True + if status == "logged_in" and _interactive_page is not None: + return True + return False + + +async def clear_session(): + """Clear browser session data.""" + global _login_state, _interactive_page + await close_interactive_login(force_close=True) + await close_browser_context() + # Remove session files + if SESSION_DIR.exists(): + import shutil + try: + shutil.rmtree(SESSION_DIR) + except Exception as e: + logger.warning(f"Session-Verzeichnis konnte nicht gelöscht werden: {e}") + SESSION_DIR.mkdir(parents=True, exist_ok=True) + _login_state = {"status": "idle", "message": ""} + + +# --- Interactive Login (user solves CAPTCHAs via screenshot/click/type) --- + + +async def start_interactive_login(): + """Open browser page to Amazon login and keep it alive for user interaction.""" + global _login_state, _interactive_page + + if _interactive_page is not None: + # Already have an interactive session + return + + if _process_lock.locked(): + _login_state = {"status": "login_failed", "message": "Amazon-Abruf läuft gerade. Bitte warten bis der Abruf fertig ist."} + return + + _login_state = {"status": "interactive", "message": "Browser wird gestartet..."} + + try: + settings = await get_settings() + domain = settings.get("amazon_domain", "amazon.de") + ctx = await _get_browser_context() + page = await ctx.new_page() + # Stealth is applied at context level + await _add_virtual_authenticator(page) + + # Navigate to order history - Amazon redirects to login if not authenticated + await page.goto( + f"https://www.{domain}/gp/css/order-history", + wait_until="domcontentloaded", + timeout=60000, + ) + # Wait a bit for page to settle + await asyncio.sleep(2) + + _interactive_page = page + + # Check if already logged in (not on a login/auth page) + url = page.url + is_login = "signin" in url or "/ap/" in url or "/auth/" in url + if not is_login and "amazon." in url: + _login_state = {"status": "logged_in", "message": "Bereits angemeldet"} + else: + _login_state = {"status": "interactive", "message": "Bitte im Browser anmelden"} + + logger.info(f"Interaktive Login-Session gestartet, URL: {url}") + + except Exception as e: + logger.error(f"Interaktive Login-Session fehlgeschlagen: {e}") + _login_state = {"status": "login_failed", "message": f"Browser konnte nicht gestartet werden: {e}"} + if _interactive_page: + try: + await _interactive_page.close() + except Exception: + pass + _interactive_page = None + + +async def get_browser_screenshot() -> bytes | None: + """Take a screenshot of the interactive login page.""" + if _interactive_page is None: + return None + try: + return await _interactive_page.screenshot(type="png") + except Exception as e: + logger.error(f"Screenshot fehlgeschlagen: {e}") + return None + + +async def send_browser_click(x: int, y: int): + """Forward a mouse click to the interactive browser page.""" + global _login_state + if _interactive_page is None: + return + try: + await _interactive_page.mouse.click(x, y) + await asyncio.sleep(0.3) + # Check if login completed after click + await _check_interactive_login_complete() + except Exception as e: + logger.error(f"Browser-Klick fehlgeschlagen: {e}") + + +async def send_browser_type(text: str): + """Type text into the currently focused element in the browser.""" + global _login_state + if _interactive_page is None: + return + try: + await _interactive_page.keyboard.type(text, delay=50) + await asyncio.sleep(0.2) + except Exception as e: + logger.error(f"Browser-Texteingabe fehlgeschlagen: {e}") + + +async def send_browser_key(key: str): + """Send a special key (Enter, Tab, Backspace, Escape) to the browser.""" + global _login_state + if _interactive_page is None: + return + try: + await _interactive_page.keyboard.press(key) + await asyncio.sleep(0.5) + # Check if login completed after key press (e.g. Enter on password) + await _check_interactive_login_complete() + except Exception as e: + logger.error(f"Browser-Taste fehlgeschlagen: {e}") + + +async def _check_interactive_login_complete(): + """Check if the interactive login page has left the login flow.""" + global _login_state + if _interactive_page is None: + return + try: + url = _interactive_page.url + is_login = "signin" in url or "/ap/" in url or "/auth/" in url + is_captcha = "captcha" in url.lower() + + # Check page content for error indicators + try: + title = await _interactive_page.title() + except Exception: + title = "" + is_error = any(t in title.lower() for t in [ + "tut uns leid", "sorry", "fehler", "error", + "problem", "bot", "automated", "unusual", + ]) + is_blocked = "errors" in url or "/hz/approvalrequest" in url + + if is_error or is_blocked: + _login_state = {"status": "interactive", "message": "Amazon blockiert den Zugriff. Versuchen Sie es erneut oder lösen Sie die Sicherheitsabfrage."} + logger.warning(f"Interaktiver Login: Error-Seite erkannt. URL: {url}, Titel: {title}") + elif is_captcha: + _login_state = {"status": "interactive", "message": "Bitte CAPTCHA lösen"} + elif not is_login and "amazon." in url: + _login_state = {"status": "logged_in", "message": "Erfolgreich angemeldet"} + logger.info(f"Interaktiver Login erfolgreich! URL: {url}") + except Exception: + pass + + +async def close_interactive_login(force_close: bool = False): + """Close the interactive login modal. Page stays alive for reuse by process_amazon. + + Args: + force_close: If True, actually close the page (e.g. on error or explicit logout). + """ + global _interactive_page, _login_state + if force_close and _interactive_page is not None: + try: + await _interactive_page.close() + except Exception: + pass + _interactive_page = None + # Reset login state (page stays alive for process_amazon to consume) + _login_state = {"status": "idle", "message": ""} + logger.info(f"Interactive Login Modal geschlossen (page={'behalten' if _interactive_page else 'geschlossen'})") + + +async def start_login(): + """Start interactive Amazon login as background task.""" + global _login_state, _otp_future + + if _login_lock.locked(): + return + + async with _login_lock: + _login_state = {"status": "logging_in", "message": "Browser wird gestartet..."} + _otp_future = None + + try: + settings = await get_settings() + amazon_email = settings.get("amazon_email", "") + amazon_password = settings.get("amazon_password", "") + domain = settings.get("amazon_domain", "amazon.de") + + if not amazon_email or not amazon_password: + _login_state = { + "status": "login_failed", + "message": "Amazon E-Mail oder Passwort nicht konfiguriert", + } + return + + ctx = await _get_browser_context() + page = await ctx.new_page() + + try: + await _do_login(page, domain, amazon_email, amazon_password) + finally: + await page.close() + + except Exception as e: + logger.error(f"Amazon-Login fehlgeschlagen: {e}") + _login_state = { + "status": "login_failed", + "message": f"Login fehlgeschlagen: {e}", + } + + +async def _do_login(page, domain, email, password): + """Execute the login flow using semantic Playwright locators.""" + global _login_state, _otp_future + + _login_state = {"status": "logging_in", "message": "Navigiere zu Amazon..."} + + # Apply stealth and virtual authenticator to avoid bot detection + # Stealth is applied at context level + await _add_virtual_authenticator(page) + + # Navigate to order history - Amazon will redirect to login if not authenticated + await page.goto( + f"https://www.{domain}/gp/css/order-history", + wait_until="networkidle", + timeout=60000, + ) + + await _save_debug(page, "login_start") + + # Check if we're already logged in (no redirect to login page) + url = page.url + if ("order-history" in url or "your-orders" in url) and "signin" not in url and "/ap/" not in url: + logger.info("Amazon Login: Bereits eingeloggt!") + _login_state = {"status": "logged_in", "message": "Bereits angemeldet"} + return + + await _human_delay() + + # --- Step 1: Enter email --- + _login_state = {"status": "logging_in", "message": "E-Mail wird eingegeben..."} + + email_field = None + for locator in [ + page.locator("#ap_email_login"), + page.locator("#ap_email"), + page.locator("input[name='email']"), + page.locator("input[type='email']"), + page.get_by_label("Mobiltelefonnummer oder E-Mail-Adresse eingeben"), + page.get_by_label("E-Mail"), + page.get_by_label("E-Mail-Adresse"), + page.get_by_label("Email"), + ]: + try: + if await locator.count() > 0 and await locator.first.is_visible(): + email_field = locator.first + logger.info("Amazon Login: Email-Feld gefunden") + break + except Exception: + continue + + if not email_field: + await _save_debug(page, "login_no_email_field") + _login_state = {"status": "login_failed", "message": "Email-Feld nicht gefunden"} + return + + await email_field.fill(email) + await _human_delay(0.5, 1.5) + + # Click continue button + continue_btn = None + for locator in [ + page.get_by_role("button", name="Weiter"), + page.get_by_role("button", name="Continue"), + page.locator("#continue"), + page.locator("input[type='submit']"), + ]: + try: + if await locator.count() > 0 and await locator.first.is_visible(): + continue_btn = locator.first + break + except Exception: + continue + + if continue_btn: + logger.info("Amazon Login: Weiter-Button geklickt") + await continue_btn.click() + await page.wait_for_load_state("networkidle") + await _human_delay() + + await _save_debug(page, "login_after_email") + + # Check for CAPTCHA + if await page.locator("#auth-captcha-image, #captchacharacters, #cvf-aamation-container, #captcha-container, #aa-challenge-whole-page-iframe").count() > 0 or "captcha" in (await page.title()).lower() or "bestätige deine Identität" in (await page.title()): + _login_state = {"status": "login_failed", "message": "CAPTCHA/Sicherheitsabfrage erkannt. Bitte über den interaktiven Browser anmelden."} + await _save_debug(page, "login_captcha") + return + + # --- Step 2: Enter password --- + _login_state = {"status": "logging_in", "message": "Passwort wird eingegeben..."} + + pw_field = None + for locator in [ + page.get_by_label("Passwort"), + page.get_by_label("Password"), + page.locator("#ap_password"), + page.locator("input[name='password']"), + page.locator("input[type='password']"), + ]: + try: + if await locator.count() > 0 and await locator.first.is_visible(): + pw_field = locator.first + logger.info("Amazon Login: Passwort-Feld gefunden") + break + except Exception: + continue + + if not pw_field: + logger.info("Amazon Login: Kein Passwort-Feld sichtbar, prüfe ob bereits eingeloggt...") + await _save_debug(page, "login_no_password_field") + else: + await pw_field.fill(password) + await _human_delay(0.5, 1.5) + + # Click sign-in button + signin_btn = None + for locator in [ + page.get_by_role("button", name="Anmelden"), + page.get_by_role("button", name="Sign in"), + page.locator("#signInSubmit"), + page.locator("#auth-signin-button"), + page.locator("input[type='submit']"), + ]: + try: + if await locator.count() > 0 and await locator.first.is_visible(): + signin_btn = locator.first + break + except Exception: + continue + + if signin_btn: + logger.info("Amazon Login: Anmelden-Button geklickt") + await signin_btn.click() + await page.wait_for_load_state("networkidle") + await _human_delay(1.5, 3.0) + + await _save_debug(page, "login_after_password") + + # Check for CAPTCHA again + if await page.locator("#auth-captcha-image, #captchacharacters, #cvf-aamation-container, #captcha-container, #aa-challenge-whole-page-iframe").count() > 0 or "captcha" in (await page.title()).lower() or "bestätige deine Identität" in (await page.title()): + _login_state = {"status": "login_failed", "message": "CAPTCHA/Sicherheitsabfrage erkannt. Bitte über den interaktiven Browser anmelden."} + await _save_debug(page, "login_captcha") + return + + # --- Step 3: Handle 2FA/OTP --- + otp_field = page.locator("#auth-mfa-otpcode, input[name='otpCode'], #ap_dcq_hint") + if await otp_field.count() > 0: + _login_state = { + "status": "awaiting_otp", + "message": "Bitte geben Sie den Bestätigungscode ein", + } + loop = asyncio.get_event_loop() + _otp_future = loop.create_future() + try: + otp_code = await asyncio.wait_for(_otp_future, timeout=300) + except asyncio.TimeoutError: + _login_state = {"status": "login_failed", "message": "OTP-Zeitüberschreitung (5 Minuten)"} + return + finally: + _otp_future = None + + _login_state = {"status": "logging_in", "message": "OTP wird eingegeben..."} + + for sel in ["#auth-mfa-otpcode", "input[name='otpCode']"]: + field = page.locator(sel) + if await field.count() > 0: + await field.first.fill(otp_code) + break + + for sel in ["#auth-signin-button", "input[type='submit']", "#submitButton"]: + btn = page.locator(sel) + if await btn.count() > 0: + await btn.first.click() + break + + await page.wait_for_load_state("networkidle") + await _human_delay(1.5, 3.0) + + # --- Step 4: Handle device approval --- + approval = page.locator("#auth-approve-form, .cvf-widget-form-approve") + if await approval.count() > 0: + _login_state = { + "status": "awaiting_otp", + "message": "Bitte bestätigen Sie die Anmeldung auf Ihrem Gerät", + } + for _ in range(60): + await asyncio.sleep(2) + url = page.url + if ("signin" not in url and "/ap/" not in url) or domain + "/?ref" in url: + break + if await approval.count() == 0: + break + + # --- Verify login success --- + url = page.url + is_login_page = "signin" in url or "/ap/" in url + page_content = await page.content() + content_len = len(page_content) + is_error_page = "Suchen Sie etwas" in page_content or "Seite wurde nicht gefunden" in page_content + is_order_page = "order-history" in url or "your-orders" in url or "Meine Bestellungen" in page_content + is_success = not is_login_page and not is_error_page and domain in url and (is_order_page or content_len > 10000) + logger.info(f"Amazon Login: URL={url}, is_login_page={is_login_page}, is_error_page={is_error_page}, is_order_page={is_order_page}, content_len={content_len}, success={is_success}") + await _save_debug(page, "login_result") + + if is_success: + _login_state = {"status": "logged_in", "message": "Erfolgreich angemeldet"} + logger.info("Amazon-Login erfolgreich") + else: + error_el = page.locator("#auth-error-message-box, .a-alert-content") + error_msg = "" + if await error_el.count() > 0: + error_msg = await error_el.first.inner_text() + if is_error_page: + error_msg = "Amazon hat den Zugriff blockiert (Fehlerseite). Bitte später erneut versuchen." + _login_state = { + "status": "login_failed", + "message": f"Login fehlgeschlagen. {error_msg}".strip(), + } + + +async def submit_otp(code: str) -> bool: + """Submit OTP code from web UI.""" + global _otp_future + if _otp_future is not None and not _otp_future.done(): + _otp_future.set_result(code) + return True + return False + + +async def process_amazon() -> dict: + """Main function: fetch Amazon invoices and forward via email.""" + if _process_lock.locked(): + logger.info("Amazon-Import: Läuft bereits, überspringe") + return {"processed": 0, "errors": 0, "error": "Amazon-Abruf läuft bereits"} + + # Don't start processing while user is logging in (would freeze the browser) + if is_interactive_login_active(): + logger.info("Amazon-Import: Interaktiver Login läuft, überspringe") + return {"processed": 0, "errors": 0, "error": "Bitte zuerst den Login abschließen"} + + async with _process_lock: + return await _process_amazon_inner() + + +async def _process_amazon_inner() -> dict: + """Inner processing function (protected by _process_lock).""" + global _interactive_page, _processing_active + + settings = await get_settings() + + if settings.get("amazon_enabled") != "true": + return {"processed": 0, "errors": 0} + + # Check prerequisites + if not settings.get("smtp_server") or not settings.get("import_email"): + logger.warning("Amazon-Import: SMTP oder Import-Email nicht konfiguriert") + return {"processed": 0, "errors": 0, "error": "SMTP/Import-Email nicht konfiguriert"} + + if not settings.get("amazon_email") or not settings.get("amazon_password"): + logger.warning("Amazon-Import: Zugangsdaten nicht konfiguriert") + return {"processed": 0, "errors": 0, "error": "Amazon-Zugangsdaten nicht konfiguriert"} + + # Without interactive login page, new pages can't authenticate (session bound to page) + if _interactive_page is None: + logger.info("Amazon-Import: Keine aktive Login-Session, überspringe (bitte zuerst manuell anmelden)") + return {"processed": 0, "errors": 0, "error": "Bitte zuerst unter Plattformen bei Amazon anmelden"} + + domain = settings.get("amazon_domain", "amazon.de") + since_str = settings.get("amazon_since_date", "") + if since_str: + try: + since_date = datetime.strptime(since_str, "%Y-%m-%d") + except ValueError: + logger.warning(f"Amazon: Ungültiges Startdatum: {since_str}") + since_date = datetime.now() - timedelta(days=30) + else: + since_date = datetime.now() - timedelta(days=30) + + logger.info(f"Amazon-Import gestartet: domain={domain}, seit={since_date.strftime('%Y-%m-%d')}") + + processed = 0 + skipped = 0 + errors = 0 + + # Reuse interactive login page if available (session is bound to the page) + reused_page = False + if _interactive_page is not None: + page = _interactive_page + _interactive_page = None # Take ownership + _processing_active = True # Signal that session is still valid while processing + _login_state = {"status": "idle", "message": ""} # Reset login state + reused_page = True + logger.info("Amazon: Verwende interaktive Login-Page für Abruf") + else: + ctx = await _get_browser_context() + page = await ctx.new_page() + await _add_virtual_authenticator(page) + + smtp_conn = None + + try: + logger.info("Amazon: SMTP-Verbindung wird hergestellt...") + smtp_conn = _connect_smtp(settings) + logger.info("Amazon: SMTP-Verbindung OK, verarbeite Bestellungen seitenweise...") + import_email = settings.get("import_email_eingang") or settings.get("import_email", "") + + # Process orders PAGE BY PAGE (collect + process on same page so buttons are visible) + result = await _collect_and_process_orders( + page, domain, since_date, smtp_conn, settings, import_email + ) + if result is None: + error_detail = "Amazon-Sitzung abgelaufen. Bitte manuell unter Plattformen neu anmelden." + logger.warning(f"Amazon-Import: {error_detail}") + await add_log_entry( + email_subject="Amazon-Import", + email_from="Amazon", + attachments_count=0, + status="error", + error_message=error_detail, + ) + return {"processed": 0, "errors": 0, "error": error_detail} + + processed, skipped, errors = result["processed"], result["skipped"], result["errors"] + + # Update last sync date + await save_settings({"amazon_last_sync": datetime.now().strftime("%Y-%m-%d %H:%M")}) + + # Log summary if nothing was processed + if processed == 0 and errors == 0: + if skipped > 0: + summary = f"Alle Rechnungen bereits importiert ({skipped} übersprungen)" + else: + summary = "Keine neuen Rechnungen gefunden" + await add_log_entry( + email_subject="Amazon-Import (Zusammenfassung)", + email_from=f"Amazon ({domain})", + attachments_count=0, + status="success", + error_message=summary, + sent_to="", + ) + + except Exception as e: + logger.error(f"Amazon-Import Fehler: {e}") + await add_log_entry( + email_subject="Amazon-Import", + email_from=f"Amazon ({domain})", + attachments_count=0, + status="error", + error_message=str(e), + ) + return {"processed": processed, "skipped": skipped, "errors": errors + 1, "error": str(e)} + + finally: + _processing_active = False + # Keep page alive for next run instead of closing it (preserves session) + if reused_page and page: + _interactive_page = page # Return page for reuse + logger.info("Amazon: Page zurück in Session-Pool (Session bleibt erhalten)") + else: + await page.close() + if smtp_conn: + try: + smtp_conn.quit() + except Exception: + pass + + logger.info(f"Amazon-Import fertig: {processed} verarbeitet, {skipped} übersprungen, {errors} Fehler") + return {"processed": processed, "skipped": skipped, "errors": errors} + + +async def _collect_and_process_orders(page, domain, since_date, smtp_conn, settings, import_email) -> dict | None: + """Collect orders AND process invoices page by page. + + This ensures invoice buttons are visible when we try to click them, + because we process each page's orders before navigating to the next page. + Returns None if session is invalid, otherwise dict with processed/skipped/errors counts. + """ + processed = 0 + skipped = 0 + errors = 0 + + # Navigate to orders page if needed + actual_url = page.url + if "order-history" not in actual_url and "your-orders" not in actual_url: + if "signin" in actual_url or "/ap/" in actual_url: + return None + logger.info("Amazon: Nicht auf Bestellseite, versuche Navigation über Link...") + orders_link = page.locator("a[href*='order-history'], a[href*='your-orders']") + if await orders_link.count() > 0: + await orders_link.first.click() + await asyncio.sleep(3) + try: + await page.wait_for_load_state("networkidle", timeout=15000) + except Exception: + pass + actual_url = page.url + if "order-history" not in actual_url and "your-orders" not in actual_url: + return None + + # Try to set time filter + now = datetime.now() + days_back = (now - since_date).days + if days_back <= 30: + desired_filter = "last30" + elif days_back <= 90: + desired_filter = "months-3" + else: + desired_filter = f"year-{since_date.year}" + + logger.info(f"Amazon: Setze Zeitfilter: {desired_filter}") + try: + filter_dropdown = page.locator("select[name='orderFilter'], select#orderFilter, select#time-filter") + if await filter_dropdown.count() > 0: + await filter_dropdown.first.select_option(desired_filter) + await asyncio.sleep(3) + try: + await page.wait_for_load_state("networkidle", timeout=15000) + except Exception: + pass + else: + logger.info("Amazon: Kein Filter-Dropdown gefunden, verwende aktuelle Ansicht") + except Exception as e: + logger.warning(f"Amazon: Filter setzen fehlgeschlagen: {e}") + + await asyncio.sleep(2) + + seen_ids = set() + page_num = 1 + total_orders = 0 + + while True: + logger.info(f"Amazon: Verarbeite Seite {page_num}...") + + # Check for login redirect + if "signin" in page.url or "/ap/" in page.url: + if total_orders > 0: + logger.warning(f"Amazon: Login-Redirect auf Seite {page_num}, breche ab") + break + return None + + # Extract orders from current page + page_orders = await _extract_orders_from_page(page, since_date) + new_orders = [o for o in page_orders if o["id"] not in seen_ids] + for o in new_orders: + seen_ids.add(o["id"]) + logger.info(f"Amazon: Seite {page_num}: {len(page_orders)} gefunden, {len(new_orders)} neu") + total_orders += len(new_orders) + + # Process invoices for THIS page's orders immediately (buttons are visible now) + for order in new_orders: + order_id = order.get("id", "?") + try: + if await is_invoice_downloaded(order_id, order_id): + skipped += 1 + logger.debug(f"Amazon: Bestellung {order_id} bereits importiert") + continue + + pdf_list = await _download_order_invoices(page, domain, order_id) + if not pdf_list: + logger.debug(f"Amazon: Keine Rechnung für Bestellung {order_id}") + continue + + for inv_idx, pdf_bytes in enumerate(pdf_list): + suffix = f"_{inv_idx+1}" if len(pdf_list) > 1 else "" + try: + filename = f"Amazon_Rechnung_{order_id}{suffix}.pdf" + + if settings.get("debug_save_amazon_pdfs") == "true": + try: + tmp_dir = Path(os.environ.get("UPLOAD_DIR", "/data/uploads")) / "amazon_invoices" + tmp_dir.mkdir(parents=True, exist_ok=True) + (tmp_dir / filename).write_bytes(pdf_bytes) + logger.info(f"Amazon: Debug-PDF gespeichert: {tmp_dir / filename} ({len(pdf_bytes)} Bytes)") + except Exception as e: + logger.warning(f"Amazon: Debug-PDF speichern fehlgeschlagen: {e}") + + forward_msg = _build_forward_email( + from_addr=settings.get("smtp_username", ""), + to_addr=import_email, + original_subject=f"Amazon Rechnung - Bestellung {order_id}{suffix}", + original_from=f"Amazon ({domain})", + attachments=[(filename, pdf_bytes)], + ) + smtp_log = _send_with_log(smtp_conn, forward_msg) + processed += 1 + logger.info(f"Amazon: Rechnung {inv_idx+1}/{len(pdf_list)} für {order_id} gesendet") + await add_log_entry( + email_subject=f"Amazon Rechnung - {order_id}{suffix}", + email_from=f"Amazon ({domain})", + attachments_count=1, + status="success", + sent_to=import_email, + smtp_log=smtp_log, + ) + except Exception as e: + errors += 1 + logger.error(f"Amazon: Fehler bei Rechnung {inv_idx+1} für {order_id}: {e}") + await add_log_entry( + email_subject=f"Amazon Rechnung - {order_id}{suffix}", + email_from=f"Amazon ({domain})", + attachments_count=0, + status="error", + error_message=str(e), + ) + + await mark_invoice_downloaded(order_id, order_id) + await _human_delay(2.0, 4.0) + + except Exception as e: + errors += 1 + logger.error(f"Amazon: Fehler bei Bestellung {order_id}: {e}") + await add_log_entry( + email_subject=f"Amazon Rechnung - {order_id}", + email_from=f"Amazon ({domain})", + attachments_count=0, + status="error", + error_message=str(e), + ) + + # Navigate to next page + has_next = await page.evaluate("""() => { + const nextLink = document.querySelector('.a-pagination .a-last:not(.a-disabled) a'); + if (nextLink) { + nextLink.scrollIntoView({behavior: 'smooth', block: 'center'}); + return true; + } + return false; + }""") + + if has_next and page_orders: + logger.info("Amazon: Klicke auf nächste Seite (JS)...") + await asyncio.sleep(0.5) + await page.evaluate("""() => { + const nextLink = document.querySelector('.a-pagination .a-last:not(.a-disabled) a'); + if (nextLink) nextLink.click(); + }""") + await asyncio.sleep(3) + try: + await page.wait_for_load_state("networkidle", timeout=30000) + except Exception: + pass + page_num += 1 + await _human_delay(1.0, 2.0) + else: + break + + logger.info(f"Amazon: Gesamt {total_orders} Bestellungen auf {page_num} Seite(n)") + return {"processed": processed, "skipped": skipped, "errors": errors} + + +async def _collect_orders(page, domain: str, since_date: datetime) -> list[dict] | None: + """Collect order IDs from Amazon order history using SPA navigation (no page.goto). + + The page must already be on the Amazon orders page (from interactive login). + Uses dropdown/click navigation to avoid losing the session. + """ + orders = [] + + actual_url = page.url + logger.info(f"Amazon: Aktuelle Seite: {actual_url}") + + # Check if we're on the orders page or need to navigate there + if "order-history" not in actual_url and "your-orders" not in actual_url: + if "signin" in actual_url or "/ap/" in actual_url: + logger.error("Amazon: Seite ist Login-Seite - Session ungültig!") + await _save_debug(page, "orders_not_on_orders_page") + return None + # Try clicking the orders link within Amazon's SPA + logger.info("Amazon: Nicht auf Bestellseite, versuche Navigation über Link...") + orders_link = page.locator("a[href*='order-history'], a[href*='your-orders'], a:has-text('Bestellungen'), a:has-text('Meine Bestellungen')") + if await orders_link.count() > 0: + await orders_link.first.click() + await asyncio.sleep(3) + try: + await page.wait_for_load_state("networkidle", timeout=15000) + except Exception: + pass + actual_url = page.url + if "order-history" not in actual_url and "your-orders" not in actual_url: + logger.error(f"Amazon: Konnte nicht zur Bestellseite navigieren. URL: {actual_url}") + await _save_debug(page, "orders_navigation_failed") + return None + + # Determine desired time filter + now = datetime.now() + days_back = (now - since_date).days + if days_back <= 30: + desired_filter = "last30" + elif days_back <= 90: + desired_filter = "months-3" + else: + desired_filter = f"year-{since_date.year}" + + # Try to set the time filter via the dropdown + logger.info(f"Amazon: Setze Zeitfilter: {desired_filter}") + try: + filter_dropdown = page.locator("select[name='orderFilter'], select#orderFilter, select#time-filter") + if await filter_dropdown.count() > 0: + await filter_dropdown.first.select_option(desired_filter) + await asyncio.sleep(3) + try: + await page.wait_for_load_state("networkidle", timeout=15000) + except Exception: + pass + logger.info(f"Amazon: Zeitfilter '{desired_filter}' gesetzt") + else: + logger.info("Amazon: Kein Filter-Dropdown gefunden, verwende aktuelle Ansicht") + except Exception as e: + logger.warning(f"Amazon: Filter setzen fehlgeschlagen: {e}") + + # Wait for content to load + await asyncio.sleep(2) + + seen_ids = set() + page_num = 1 + + while True: + logger.info(f"Amazon: Verarbeite Seite {page_num}...") + + # Check for login redirect + if "signin" in page.url or "/ap/" in page.url: + if orders: + logger.warning(f"Amazon: Login-Redirect auf Seite {page_num}, verwende {len(orders)} bereits gefundene Bestellung(en)") + return orders + logger.error("Amazon: Session ungültig!") + await _save_debug(page, "orders_redirect_login") + return None + + page_orders = await _extract_orders_from_page(page, since_date) + new_orders = [o for o in page_orders if o["id"] not in seen_ids] + for o in new_orders: + seen_ids.add(o["id"]) + logger.info(f"Amazon: Seite {page_num}: {len(page_orders)} gefunden, {len(new_orders)} neu") + orders.extend(new_orders) + + # Try to click "Next" button for pagination via JavaScript (avoids visibility issues) + has_next = await page.evaluate("""() => { + const nextLink = document.querySelector('.a-pagination .a-last:not(.a-disabled) a'); + if (nextLink) { + nextLink.scrollIntoView({behavior: 'smooth', block: 'center'}); + return true; + } + return false; + }""") + + if has_next and page_orders: + logger.info("Amazon: Klicke auf nächste Seite (JS)...") + await asyncio.sleep(0.5) # Wait for scroll + # Use JavaScript click to bypass Playwright visibility checks + await page.evaluate("""() => { + const nextLink = document.querySelector('.a-pagination .a-last:not(.a-disabled) a'); + if (nextLink) nextLink.click(); + }""") + await asyncio.sleep(3) + try: + await page.wait_for_load_state("networkidle", timeout=30000) + except Exception: + pass + page_num += 1 + await _human_delay(1.0, 2.0) + else: + break + + if not orders: + logger.warning("Amazon: Keine Bestellungen gefunden!") + await _save_debug(page, "no_orders_found") + + return orders + + +async def _extract_orders_from_page(page, since_date: datetime) -> list[dict]: + """Extract order data from the current page.""" + orders = [] + + title = await page.title() + logger.info(f"Amazon: Seite analysieren: Titel='{title}', URL={page.url}") + + await _save_debug(page, "order_page") + + # Use JavaScript to extract only VISIBLE order cards (Amazon loads all in DOM, shows ~10 per page) + visible_orders = await page.evaluate("""() => { + const results = []; + // Try multiple selectors + const selectors = [ + '.order-card.js-order-card', + '.order-card', + '.order-info', + '.a-box-group.order', + '.order', + ]; + const seen = new Set(); + for (const sel of selectors) { + for (const el of document.querySelectorAll(sel)) { + // Only process visible elements (offsetParent !== null or check display) + if (el.offsetParent === null && getComputedStyle(el).position !== 'fixed') continue; + const text = el.innerText || ''; + const idMatch = text.match(/(\d{3}-\d{7}-\d{7})/); + if (idMatch && !seen.has(idMatch[1])) { + seen.add(idMatch[1]); + results.push({id: idMatch[1], text: text.substring(0, 500)}); + } + } + if (results.length > 0) break; + } + return results; + }""") + + logger.info(f"Amazon: Sichtbare Order-Cards gefunden: {len(visible_orders)}") + + if not visible_orders: + # Last resort: regex fallback on visible page text + visible_text = await page.evaluate("() => document.body.innerText") + order_ids = re.findall(r"\b(\d{3}-\d{7}-\d{7})\b", visible_text) + unique_ids = {oid for oid in set(order_ids) if not oid.startswith("000-")} + logger.info(f"Amazon: Keine Order-Cards, Fallback-Regex: {len(unique_ids)} Bestell-ID(s) im sichtbaren Text") + if not unique_ids: + logger.warning(f"Amazon: Seite hat keine Bestell-IDs. Titel: '{title}', URL: {page.url}") + for oid in unique_ids: + orders.append({"id": oid, "date": None}) + return orders + + for vo in visible_orders: + order_id = vo["id"] + if order_id.startswith("000-"): + continue + + order_date = _parse_german_date(vo["text"]) + if order_date and order_date < since_date: + logger.debug(f"Amazon: Bestellung {order_id} übersprungen (Datum {order_date.strftime('%Y-%m-%d')} < {since_date.strftime('%Y-%m-%d')})") + continue + + logger.debug(f"Amazon: Bestellung gefunden: {order_id}, Datum: {order_date}") + orders.append({"id": order_id, "date": order_date}) + + return orders + + +def _parse_german_date(text: str) -> datetime | None: + """Parse German date formats from order text.""" + months_de = { + "Januar": 1, "Februar": 2, "März": 3, "April": 4, + "Mai": 5, "Juni": 6, "Juli": 7, "August": 8, + "September": 9, "Oktober": 10, "November": 11, "Dezember": 12, + } + pattern = r"(\d{1,2})\.\s*(" + "|".join(months_de.keys()) + r")\s+(\d{4})" + match = re.search(pattern, text) + if match: + day = int(match.group(1)) + month = months_de[match.group(2)] + year = int(match.group(3)) + try: + return datetime(year, month, day) + except ValueError: + pass + + match = re.search(r"(\d{2})\.(\d{2})\.(\d{4})", text) + if match: + try: + return datetime(int(match.group(3)), int(match.group(2)), int(match.group(1))) + except ValueError: + pass + + return None + + +async def _close_all_popovers(page): + """Close all open Amazon popovers reliably. + + IMPORTANT: Do NOT set display:none - Amazon recycles popover containers, + so hiding them prevents future popovers from appearing. + """ + try: + await page.evaluate("""() => { + // Close via close buttons + document.querySelectorAll('.a-popover-footer button, .a-popover .a-button-close, .a-popover-close').forEach(b => { + try { b.click(); } catch(e) {} + }); + // Use Amazon's own popover API to close if available + if (window.P && window.P.when) { + try { + window.P.when('A').execute(function(A) { + if (A && A.popover) { + document.querySelectorAll('.a-popover:not(.a-popover-hidden)').forEach(p => { + const id = p.getAttribute('data-a-popover-id'); + if (id) try { A.popover.close(id); } catch(e) {} + }); + } + }); + } catch(e) {} + } + // Click outside to dismiss any remaining popovers + document.body.click(); + }""") + await asyncio.sleep(0.5) + except Exception: + pass + + +async def _download_order_invoices(page, domain: str, order_id: str) -> list[bytes]: + """Download invoice PDFs for an order. + + Strategy: Extract popover AJAX URL from data-a-popover attribute, + then use XMLHttpRequest with proper Amazon headers (anti-CSRF token, + X-Requested-With) to load the invoice popover HTML. + This is exactly what Amazon's own JavaScript does internally. + """ + import base64 + pdfs = [] + + logger.info(f"Amazon: Hole Rechnungs-Links für {order_id}") + + # Step 1: Extract the popover AJAX URL and download links via XMLHttpRequest + invoice_result = await page.evaluate(f"""async () => {{ + // Find the order card containing this order ID + const cards = document.querySelectorAll('.order-card, .order-info, .a-box-group, div'); + let popoverUrl = null; + + for (const card of cards) {{ + if (!card.innerText || !card.innerText.includes('{order_id}')) continue; + + // Find the popover trigger with invoice URL + const triggers = card.querySelectorAll('[data-a-popover*="invoice"]'); + for (const trigger of triggers) {{ + try {{ + const config = JSON.parse(trigger.getAttribute('data-a-popover')); + if (config && config.url && config.url.includes('{order_id}')) {{ + popoverUrl = config.url; + break; + }} + }} catch(e) {{}} + }} + if (popoverUrl) break; + }} + + if (!popoverUrl) {{ + return {{ found: false, error: 'Kein Popover-URL gefunden' }}; + }} + + // Step 2: Make XMLHttpRequest with proper Amazon headers + try {{ + const response = await new Promise((resolve, reject) => {{ + const xhr = new XMLHttpRequest(); + xhr.open('GET', popoverUrl, true); + xhr.setRequestHeader('X-Requested-With', 'XMLHttpRequest'); + xhr.setRequestHeader('Accept', 'text/html,*/*'); + xhr.onload = function() {{ + resolve({{ status: xhr.status, html: xhr.responseText }}); + }}; + xhr.onerror = function() {{ + reject(new Error('XHR failed')); + }}; + xhr.send(); + }}); + + if (response.status !== 200) {{ + return {{ found: false, error: 'HTTP ' + response.status, url: popoverUrl }}; + }} + + const html = response.html; + + // Check if response is a login page + if (html.includes('ap_signin') || html.includes('ap_error') || html.includes('/ap/')) {{ + return {{ found: false, error: 'Login-Seite erhalten', url: popoverUrl, isLogin: true }}; + }} + + // Extract PDF download links from the response HTML + const parser = new DOMParser(); + const doc = parser.parseFromString(html, 'text/html'); + const links = doc.querySelectorAll('a[href]'); + const pdfLinks = []; + + for (const link of links) {{ + const href = link.getAttribute('href') || ''; + const text = (link.innerText || '').trim(); + if (href.includes('/ap/') || href.includes('openid')) continue; + if (href.includes('contact.html') || href.includes('help/contact')) continue; + if (text.toLowerCase().includes('anfordern')) continue; + if ( + href.includes('.pdf') || + href.includes('documents/download') || + href.includes('/document/') || + href.includes('invoice/download') || + href.includes('generated_invoices') + ) {{ + pdfLinks.push({{ href: href, text: text.substring(0, 100) }}); + }} + }} + + return {{ found: true, url: popoverUrl, links: pdfLinks, htmlSize: html.length }}; + + }} catch(e) {{ + return {{ found: false, error: e.message, url: popoverUrl }}; + }} + }}""") + + logger.info(f"Amazon: Invoice-Ergebnis für {order_id}: found={invoice_result.get('found')}, " + f"links={invoice_result.get('links', [])}, error={invoice_result.get('error', '')}") + + if not invoice_result.get("found") or not invoice_result.get("links"): + if invoice_result.get("isLogin"): + logger.warning(f"Amazon: Session abgelaufen beim Rechnungsabruf für {order_id}") + return pdfs + + # Step 3: Download each PDF via XMLHttpRequest as base64 + for link_info in invoice_result["links"]: + href = link_info["href"] + text = link_info.get("text", "") + + # Make href absolute if relative + if href.startswith("/"): + fetch_href = href + elif href.startswith("http"): + from urllib.parse import urlparse + parsed = urlparse(href) + fetch_href = parsed.path + ("?" + parsed.query if parsed.query else "") + else: + fetch_href = "/" + href + + logger.info(f"Amazon: Lade PDF '{text}' -> {fetch_href[:100]}") + + try: + pdf_result = await page.evaluate(f"""async () => {{ + try {{ + const resp = await new Promise((resolve, reject) => {{ + const xhr = new XMLHttpRequest(); + xhr.open('GET', '{fetch_href}', true); + xhr.responseType = 'arraybuffer'; + xhr.onload = function() {{ + const bytes = new Uint8Array(xhr.response); + let binary = ''; + for (let i = 0; i < bytes.length; i++) {{ + binary += String.fromCharCode(bytes[i]); + }} + resolve({{ + ok: xhr.status === 200, + status: xhr.status, + data: btoa(binary), + size: bytes.length, + contentType: xhr.getResponseHeader('content-type') || '' + }}); + }}; + xhr.onerror = function() {{ reject(new Error('XHR failed')); }}; + xhr.send(); + }}); + return resp; + }} catch(e) {{ + return {{ ok: false, error: e.message }}; + }} + }}""") + + if pdf_result and pdf_result.get("ok") and pdf_result.get("size", 0) > 500: + pdf_bytes = base64.b64decode(pdf_result["data"]) + content_type = pdf_result.get("contentType", "") + if pdf_bytes[:5] == b"%PDF-" or "pdf" in content_type.lower(): + logger.info(f"Amazon: PDF heruntergeladen für {order_id}: {len(pdf_bytes)} Bytes") + pdfs.append(pdf_bytes) + else: + logger.debug(f"Amazon: Download kein PDF für {order_id} (type: {content_type}, size: {len(pdf_bytes)})") + elif pdf_result: + logger.debug(f"Amazon: PDF-Download fehlgeschlagen für {order_id}: {pdf_result.get('error', 'status=' + str(pdf_result.get('status')))}") + + except Exception as e: + logger.warning(f"Amazon: PDF-Download Exception für {order_id}: {e}") + + if not pdfs: + logger.info(f"Amazon: Keine PDFs für {order_id}") + + return pdfs diff --git a/app/database.py b/app/database.py index 6960de8..75fe50b 100644 --- a/app/database.py +++ b/app/database.py @@ -4,13 +4,13 @@ import aiosqlite from cryptography.fernet import Fernet DB_PATH = os.environ.get("DB_PATH", "/data/belegimport.db") -SCHEMA_VERSION = 2 +SCHEMA_VERSION = 8 logger = logging.getLogger(__name__) _fernet = None -ENCRYPTED_KEYS = {"imap_password", "smtp_password", "smb_password"} +ENCRYPTED_KEYS = {"imap_password", "smtp_password", "smb_password", "amazon_password"} DEFAULT_SETTINGS = { "imap_server": "", @@ -24,8 +24,12 @@ DEFAULT_SETTINGS = { "smtp_username": "", "smtp_password": "", "import_email": "", + "import_email_eingang": "", + "import_email_ausgang": "", "source_folder": "Rechnungen", "processed_folder": "Rechnungen/Verarbeitet", + "source_folder_ausgang": "", + "processed_folder_ausgang": "", "interval_minutes": "5", "scheduler_enabled": "false", "fetch_since_date": "", @@ -39,7 +43,18 @@ DEFAULT_SETTINGS = { "smb_share": "", "smb_source_path": "", "smb_processed_path": "Verarbeitet", + "smb_source_path_ausgang": "", + "smb_processed_path_ausgang": "", "smb_mode": "forward", + # Amazon + "amazon_enabled": "false", + "amazon_email": "", + "amazon_password": "", + "amazon_domain": "amazon.de", + "amazon_last_sync": "", + "amazon_since_date": "", + # Debug + "debug_save_amazon_pdfs": "false", } @@ -121,7 +136,10 @@ async def _run_migrations(db: aiosqlite.Connection, current_version: int): email_from TEXT, attachments_count INTEGER DEFAULT 0, status TEXT NOT NULL, - error_message TEXT + error_message TEXT, + sent_to TEXT DEFAULT '', + smtp_log TEXT DEFAULT '', + beleg_type TEXT DEFAULT 'eingang' ) """) await db.commit() @@ -150,10 +168,72 @@ async def _run_migrations(db: aiosqlite.Connection, current_version: int): await db.commit() await _set_schema_version(db, 2) - # --- Future migrations go here --- - # if current_version < 3: - # logger.info("Migration v3: ...") - # await _set_schema_version(db, 3) + if current_version < 3: + logger.info("Migration v3: Amazon-Plattform hinzugefügt") + await db.execute(""" + CREATE TABLE IF NOT EXISTS amazon_downloaded ( + order_id TEXT PRIMARY KEY, + downloaded_at TEXT NOT NULL DEFAULT (datetime('now', 'localtime')) + ) + """) + await db.commit() + await _set_schema_version(db, 3) + + if current_version < 4: + logger.info("Migration v4: sent_to Spalte im Verarbeitungslog") + await db.execute(""" + ALTER TABLE processing_log ADD COLUMN sent_to TEXT DEFAULT '' + """) + await db.commit() + await _set_schema_version(db, 4) + + if current_version < 5: + logger.info("Migration v5: SMTP-Protokoll im Verarbeitungslog") + await db.execute(""" + ALTER TABLE processing_log ADD COLUMN smtp_log TEXT DEFAULT '' + """) + await db.commit() + await _set_schema_version(db, 5) + + if current_version < 6: + logger.info("Migration v6: Per-Invoice Tracking statt per-Order") + try: + await db.execute(""" + ALTER TABLE amazon_downloaded ADD COLUMN invoice_url TEXT DEFAULT '' + """) + except Exception: + pass # column already exists + await db.commit() + await _set_schema_version(db, 6) + + if current_version < 8: + logger.info("Migration v7/8: Eingangs-/Ausgangsbelege Unterscheidung") + # Add beleg_type column to processing_log (check if it exists first) + cursor = await db.execute("PRAGMA table_info(processing_log)") + columns = [row[1] for row in await cursor.fetchall()] + if "beleg_type" not in columns: + await db.execute(""" + ALTER TABLE processing_log ADD COLUMN beleg_type TEXT DEFAULT 'eingang' + """) + logger.info(" beleg_type Spalte hinzugefügt") + # Migrate import_email -> import_email_eingang + cursor = await db.execute( + "SELECT value FROM settings WHERE key = 'import_email'" + ) + row = await cursor.fetchone() + if row and row[0]: + cursor2 = await db.execute( + "SELECT value FROM settings WHERE key = 'import_email_eingang'" + ) + row2 = await cursor2.fetchone() + if not row2 or not row2[0]: + await db.execute( + "INSERT OR REPLACE INTO settings (key, value) VALUES ('import_email_eingang', ?)", + (row[0],), + ) + logger.info(" import_email nach import_email_eingang übertragen") + await db.commit() + await _set_schema_version(db, 8) await db.commit() @@ -176,7 +256,18 @@ async def init_db(): email_from TEXT, attachments_count INTEGER DEFAULT 0, status TEXT NOT NULL, - error_message TEXT + error_message TEXT, + sent_to TEXT DEFAULT '', + smtp_log TEXT DEFAULT '', + beleg_type TEXT DEFAULT 'eingang' + ) + """) + await db.execute(""" + CREATE TABLE IF NOT EXISTS amazon_downloaded ( + order_id TEXT NOT NULL, + downloaded_at TEXT NOT NULL DEFAULT (datetime('now', 'localtime')), + invoice_url TEXT DEFAULT '', + PRIMARY KEY (order_id, invoice_url) ) """) await db.commit() @@ -235,19 +326,29 @@ async def save_settings(data: dict): await db.commit() +def get_import_email(settings: dict, beleg_type: str = "eingang") -> str: + """Resolve the correct import email address based on document type.""" + if beleg_type == "ausgang": + return settings.get("import_email_ausgang", "") + return settings.get("import_email_eingang", "") or settings.get("import_email", "") + + async def add_log_entry( email_subject: str, email_from: str, attachments_count: int, status: str, error_message: str = "", + sent_to: str = "", + smtp_log: str = "", + beleg_type: str = "eingang", ): async with aiosqlite.connect(DB_PATH) as db: await db.execute( """INSERT INTO processing_log - (email_subject, email_from, attachments_count, status, error_message) - VALUES (?, ?, ?, ?, ?)""", - (email_subject, email_from, attachments_count, status, error_message), + (email_subject, email_from, attachments_count, status, error_message, sent_to, smtp_log, beleg_type) + VALUES (?, ?, ?, ?, ?, ?, ?, ?)""", + (email_subject, email_from, attachments_count, status, error_message, sent_to, smtp_log, beleg_type), ) await db.commit() @@ -260,3 +361,46 @@ async def get_log_entries(limit: int = 100) -> list[dict]: ) rows = await cursor.fetchall() return [dict(row) for row in rows] + + +async def clear_log_entries() -> int: + async with aiosqlite.connect(DB_PATH) as db: + cursor = await db.execute("SELECT COUNT(*) FROM processing_log") + count = (await cursor.fetchone())[0] + await db.execute("DELETE FROM processing_log") + await db.commit() + return count + + +async def is_invoice_downloaded(order_id: str, invoice_url: str = "") -> bool: + """Check if a specific invoice has been downloaded. + If invoice_url is given, check per-URL. Otherwise check per order_id.""" + async with aiosqlite.connect(DB_PATH) as db: + if invoice_url: + cursor = await db.execute( + "SELECT 1 FROM amazon_downloaded WHERE order_id = ? AND invoice_url = ?", + (order_id, invoice_url), + ) + else: + cursor = await db.execute( + "SELECT 1 FROM amazon_downloaded WHERE order_id = ?", (order_id,) + ) + return await cursor.fetchone() is not None + + +async def mark_invoice_downloaded(order_id: str, invoice_url: str = ""): + async with aiosqlite.connect(DB_PATH) as db: + await db.execute( + "INSERT OR IGNORE INTO amazon_downloaded (order_id, invoice_url) VALUES (?, ?)", + (order_id, invoice_url), + ) + await db.commit() + + +async def reset_downloaded_invoices() -> int: + async with aiosqlite.connect(DB_PATH) as db: + cursor = await db.execute("SELECT COUNT(*) FROM amazon_downloaded") + count = (await cursor.fetchone())[0] + await db.execute("DELETE FROM amazon_downloaded") + await db.commit() + return count diff --git a/app/mail_processor.py b/app/mail_processor.py index af89dd2..5aa55a3 100644 --- a/app/mail_processor.py +++ b/app/mail_processor.py @@ -9,11 +9,30 @@ from email.mime.text import MIMEText from email import encoders import logging -from app.database import get_settings, add_log_entry +from app.database import get_settings, add_log_entry, get_import_email logger = logging.getLogger(__name__) +def _send_with_log(smtp_conn: smtplib.SMTP, msg) -> str: + """Send email and capture SMTP protocol exchange.""" + log_lines = [] + original_print_debug = smtp_conn._print_debug + + def _capture(*args): + log_lines.append(" ".join(str(a) for a in args)) + + smtp_conn._print_debug = _capture + old_level = smtp_conn.debuglevel + smtp_conn.set_debuglevel(1) + try: + smtp_conn.send_message(msg) + finally: + smtp_conn.set_debuglevel(old_level) + smtp_conn._print_debug = original_print_debug + return "\n".join(log_lines) + + def _connect_imap(settings: dict) -> imaplib.IMAP4_SSL | imaplib.IMAP4: server = settings["imap_server"] port = int(settings.get("imap_port", 993)) @@ -118,21 +137,117 @@ def _move_email(conn: imaplib.IMAP4, msg_uid: bytes, dest_folder: str): conn.expunge() -async def process_mailbox() -> dict: - settings = await get_settings() - - if not settings.get("imap_server") or not settings.get("import_email"): - logger.warning("IMAP oder Import-Email nicht konfiguriert") - return {"processed": 0, "skipped": 0, "errors": 0, "error": "Nicht konfiguriert"} - - source_folder = settings.get("source_folder", "INBOX") - processed_folder = settings.get("processed_folder", "INBOX/Verarbeitet") - import_email = settings["import_email"] +async def _process_folder( + imap_conn, smtp_conn, settings: dict, + source_folder: str, processed_folder: str, + import_email: str, beleg_type: str, fetch_since: str, +) -> dict: + """Process one IMAP folder pair. Returns counts dict.""" smtp_from = settings.get("smtp_username", "") - processed = 0 skipped = 0 errors = 0 + + _ensure_folder_exists(imap_conn, processed_folder) + + status, _ = imap_conn.select(f'"{source_folder}"') + if status != "OK": + logger.warning(f"Ordner '{source_folder}' konnte nicht geöffnet werden") + return {"processed": 0, "skipped": 0, "errors": 0} + + search_criteria = "ALL" + if fetch_since: + try: + from datetime import datetime + dt = datetime.strptime(fetch_since, "%Y-%m-%d") + imap_date = dt.strftime("%d-%b-%Y") + search_criteria = f'(SINCE {imap_date})' + except ValueError: + logger.warning(f"Ungültiges Datum: {fetch_since}, verwende ALL") + + status, data = imap_conn.uid("SEARCH", None, search_criteria) + if status != "OK" or not data[0]: + logger.info(f"Keine Emails im Ordner '{source_folder}' ({beleg_type})") + return {"processed": 0, "skipped": 0, "errors": 0} + + msg_uids = data[0].split() + logger.info(f"{len(msg_uids)} Email(s) im Ordner '{source_folder}' ({beleg_type})") + + for msg_uid in msg_uids: + subject = "?" + from_addr = "?" + try: + status, msg_data = imap_conn.uid("FETCH", msg_uid, "(RFC822)") + if status != "OK": + continue + + raw_email = msg_data[0][1] + msg = email.message_from_bytes(raw_email, policy=policy.default) + + subject = str(msg.get("Subject", "(Kein Betreff)")) + from_addr = str(msg.get("From", "(Unbekannt)")) + + attachments = _extract_attachments(msg) + + if not attachments: + skipped += 1 + logger.debug(f"Übersprungen (keine Anhänge): {subject}") + continue + + forward_msg = _build_forward_email( + from_addr=smtp_from, + to_addr=import_email, + original_subject=subject, + original_from=from_addr, + attachments=attachments, + ) + + smtp_log = _send_with_log(smtp_conn, forward_msg) + + imap_conn.select(f'"{source_folder}"') + _move_email(imap_conn, msg_uid, processed_folder) + imap_conn.select(f'"{source_folder}"') + + processed += 1 + logger.info(f"Verarbeitet ({beleg_type}): {subject} ({len(attachments)} Anhänge)") + await add_log_entry( + email_subject=subject, + email_from=from_addr, + attachments_count=len(attachments), + status="success", + sent_to=import_email, + smtp_log=smtp_log, + beleg_type=beleg_type, + ) + + except Exception as e: + errors += 1 + logger.error(f"Fehler bei Email UID {msg_uid}: {e}") + try: + await add_log_entry( + email_subject=subject, + email_from=from_addr, + attachments_count=0, + status="error", + error_message=str(e), + beleg_type=beleg_type, + ) + except Exception: + pass + + return {"processed": processed, "skipped": skipped, "errors": errors} + + +async def process_mailbox() -> dict: + settings = await get_settings() + + import_email_eingang = get_import_email(settings, "eingang") + if not settings.get("imap_server") or not import_email_eingang: + logger.warning("IMAP oder Import-Email nicht konfiguriert") + return {"processed": 0, "skipped": 0, "errors": 0, "error": "Nicht konfiguriert"} + + fetch_since = settings.get("fetch_since_date", "") + total = {"processed": 0, "skipped": 0, "errors": 0} imap_conn = None smtp_conn = None @@ -140,92 +255,31 @@ async def process_mailbox() -> dict: imap_conn = _connect_imap(settings) smtp_conn = _connect_smtp(settings) - _ensure_folder_exists(imap_conn, processed_folder) + # Eingangsbelege + source = settings.get("source_folder", "INBOX") + processed_folder = settings.get("processed_folder", "INBOX/Verarbeitet") + result = await _process_folder( + imap_conn, smtp_conn, settings, + source, processed_folder, + import_email_eingang, "eingang", fetch_since, + ) + for k in total: + total[k] += result[k] - status, _ = imap_conn.select(f'"{source_folder}"') - if status != "OK": - raise Exception(f"Ordner '{source_folder}' konnte nicht geöffnet werden") - - # Build IMAP search criteria - search_criteria = "ALL" - fetch_since = settings.get("fetch_since_date", "") - if fetch_since: - try: - from datetime import datetime - dt = datetime.strptime(fetch_since, "%Y-%m-%d") - imap_date = dt.strftime("%d-%b-%Y") - search_criteria = f'(SINCE {imap_date})' - except ValueError: - logger.warning(f"Ungültiges Datum: {fetch_since}, verwende ALL") - - status, data = imap_conn.uid("SEARCH", None, search_criteria) - if status != "OK" or not data[0]: - logger.info("Keine Emails im Ordner gefunden") - return {"processed": 0, "skipped": 0, "errors": 0} - - msg_uids = data[0].split() - logger.info(f"{len(msg_uids)} Email(s) im Ordner '{source_folder}' gefunden") - - for msg_uid in msg_uids: - try: - status, msg_data = imap_conn.uid("FETCH", msg_uid, "(RFC822)") - if status != "OK": - continue - - raw_email = msg_data[0][1] - msg = email.message_from_bytes(raw_email, policy=policy.default) - - subject = str(msg.get("Subject", "(Kein Betreff)")) - from_addr = str(msg.get("From", "(Unbekannt)")) - - attachments = _extract_attachments(msg) - - if not attachments: - skipped += 1 - logger.debug(f"Übersprungen (keine Anhänge): {subject}") - continue - - forward_msg = _build_forward_email( - from_addr=smtp_from, - to_addr=import_email, - original_subject=subject, - original_from=from_addr, - attachments=attachments, - ) - - smtp_conn.send_message(forward_msg) - - # Re-select source folder before move (in case _ensure_folder changed it) - imap_conn.select(f'"{source_folder}"') - _move_email(imap_conn, msg_uid, processed_folder) - - # Re-select after expunge to keep UIDs valid - imap_conn.select(f'"{source_folder}"') - - processed += 1 - logger.info( - f"Verarbeitet: {subject} ({len(attachments)} Anhänge)" - ) - await add_log_entry( - email_subject=subject, - email_from=from_addr, - attachments_count=len(attachments), - status="success", - ) - - except Exception as e: - errors += 1 - logger.error(f"Fehler bei Email UID {msg_uid}: {e}") - try: - await add_log_entry( - email_subject=subject if "subject" in dir() else "?", - email_from=from_addr if "from_addr" in dir() else "?", - attachments_count=0, - status="error", - error_message=str(e), - ) - except Exception: - pass + # Ausgangsbelege (optional) + import_email_ausgang = get_import_email(settings, "ausgang") + source_ausgang = settings.get("source_folder_ausgang", "") + processed_ausgang = settings.get("processed_folder_ausgang", "") + if import_email_ausgang and source_ausgang: + if not processed_ausgang: + processed_ausgang = source_ausgang + "/Verarbeitet" + result = await _process_folder( + imap_conn, smtp_conn, settings, + source_ausgang, processed_ausgang, + import_email_ausgang, "ausgang", fetch_since, + ) + for k in total: + total[k] += result[k] except Exception as e: logger.error(f"Verbindungsfehler: {e}") @@ -236,7 +290,7 @@ async def process_mailbox() -> dict: status="error", error_message=f"Verbindungsfehler: {e}", ) - return {"processed": processed, "skipped": skipped, "errors": errors + 1, "error": str(e)} + return {**total, "errors": total["errors"] + 1, "error": str(e)} finally: if imap_conn: @@ -250,36 +304,52 @@ async def process_mailbox() -> dict: except Exception: pass - logger.info( - f"Fertig: {processed} verarbeitet, {skipped} übersprungen, {errors} Fehler" - ) - return {"processed": processed, "skipped": skipped, "errors": errors} + logger.info(f"Fertig: {total['processed']} verarbeitet, {total['skipped']} übersprungen, {total['errors']} Fehler") + return total async def send_test_email() -> dict: settings = await get_settings() - if not settings.get("smtp_server") or not settings.get("import_email"): - return {"success": False, "error": "SMTP oder Import-Email nicht konfiguriert"} + import_email_eingang = get_import_email(settings, "eingang") + import_email_ausgang = get_import_email(settings, "ausgang") + + if not settings.get("smtp_server") or not import_email_eingang: + return {"success": False, "error": "SMTP oder Import-Email (Eingang) nicht konfiguriert"} try: smtp_conn = _connect_smtp(settings) + smtp_logs = [] + # Test Eingangsbelege msg = MIMEMultipart() msg["From"] = settings["smtp_username"] - msg["To"] = settings["import_email"] - msg["Subject"] = "Belegimport - Test-Email" + msg["To"] = import_email_eingang + msg["Subject"] = "Belegimport - Test-Email (Eingangsbelege)" msg.attach(MIMEText( "Dies ist eine Test-Email vom Belegimport Service.\n" - "Wenn Sie diese Email erhalten, funktioniert die SMTP-Verbindung.", - "plain", - "utf-8", + "Ziel: Eingangsbelege", + "plain", "utf-8", )) + smtp_logs.append("=== Eingangsbelege ===") + smtp_logs.append(_send_with_log(smtp_conn, msg)) + + # Test Ausgangsbelege (if configured) + if import_email_ausgang: + msg2 = MIMEMultipart() + msg2["From"] = settings["smtp_username"] + msg2["To"] = import_email_ausgang + msg2["Subject"] = "Belegimport - Test-Email (Ausgangsbelege)" + msg2.attach(MIMEText( + "Dies ist eine Test-Email vom Belegimport Service.\n" + "Ziel: Ausgangsbelege", + "plain", "utf-8", + )) + smtp_logs.append("=== Ausgangsbelege ===") + smtp_logs.append(_send_with_log(smtp_conn, msg2)) - smtp_conn.send_message(msg) smtp_conn.quit() - - return {"success": True} + return {"success": True, "smtp_log": "\n".join(smtp_logs)} except Exception as e: logger.error(f"Test-Email fehlgeschlagen: {e}") diff --git a/app/main.py b/app/main.py index f714aca..c370ef4 100644 --- a/app/main.py +++ b/app/main.py @@ -6,19 +6,34 @@ from contextlib import asynccontextmanager from pathlib import Path from fastapi import FastAPI, Request, Form, UploadFile -from fastapi.responses import HTMLResponse, JSONResponse, Response, StreamingResponse +from fastapi.responses import HTMLResponse, JSONResponse, Response from fastapi.staticfiles import StaticFiles from fastapi.templating import Jinja2Templates from sse_starlette.sse import EventSourceResponse -from app.database import init_db, get_settings, save_settings, get_log_entries +from app.database import init_db, get_settings, save_settings, get_log_entries, clear_log_entries, reset_downloaded_invoices from app.mail_processor import process_mailbox, send_test_email, test_imap_connection, create_imap_folder from app.scheduler import start_scheduler, configure_job, get_scheduler_status from app.scanner import process_scanned_pdf, generate_separator_pdf, UPLOAD_DIR from app.smb_processor import process_smb_share, test_smb_connection, create_smb_folder, list_smb_folders +from app.amazon_processor import ( + start_login as amazon_start_login, + submit_otp as amazon_submit_otp, + get_login_state as amazon_get_login_state, + check_session_valid as amazon_check_session, + clear_session as amazon_clear_session, + process_amazon, + start_interactive_login as amazon_start_interactive, + get_browser_screenshot as amazon_get_screenshot, + send_browser_click as amazon_browser_click, + send_browser_type as amazon_browser_type, + send_browser_key as amazon_browser_key, + close_interactive_login as amazon_close_interactive, + is_interactive_login_active as amazon_login_active, +) logging.basicConfig( - level=logging.INFO, + level=getattr(logging, os.environ.get("LOG_LEVEL", "INFO").upper(), logging.INFO), format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", ) logger = logging.getLogger(__name__) @@ -44,6 +59,11 @@ templates = Jinja2Templates(directory="app/templates") @app.get("/", response_class=HTMLResponse) async def index(request: Request): + return templates.TemplateResponse("scan.html", {"request": request}) + + +@app.get("/settings", response_class=HTMLResponse) +async def settings_page(request: Request): settings = await get_settings() logs = await get_log_entries(limit=20) status = get_scheduler_status() @@ -74,8 +94,12 @@ async def _save_form_settings(request: Request) -> dict: "smtp_username": form.get("smtp_username", ""), "smtp_password": form.get("smtp_password") or current.get("smtp_password", ""), "import_email": form.get("import_email", ""), + "import_email_eingang": form.get("import_email_eingang", ""), + "import_email_ausgang": form.get("import_email_ausgang", ""), "source_folder": form.get("source_folder", "Rechnungen"), "processed_folder": form.get("processed_folder", "Rechnungen/Verarbeitet"), + "source_folder_ausgang": form.get("source_folder_ausgang", ""), + "processed_folder_ausgang": form.get("processed_folder_ausgang", ""), "interval_minutes": form.get("interval_minutes", "5"), "scheduler_enabled": form.get("scheduler_enabled", "false"), "fetch_since_date": form.get("fetch_since_date", ""), @@ -89,7 +113,11 @@ async def _save_form_settings(request: Request) -> dict: "smb_share": form.get("smb_share", ""), "smb_source_path": form.get("smb_source_path", ""), "smb_processed_path": form.get("smb_processed_path", "Verarbeitet"), + "smb_source_path_ausgang": form.get("smb_source_path_ausgang", ""), + "smb_processed_path_ausgang": form.get("smb_processed_path_ausgang", ""), "smb_mode": form.get("smb_mode", "forward"), + # Debug + "debug_save_amazon_pdfs": form.get("debug_save_amazon_pdfs", "false"), } await save_settings(data) @@ -194,6 +222,12 @@ async def log_page(request: Request): }) +@app.post("/api/clear-log") +async def api_clear_log(): + count = await clear_log_entries() + return JSONResponse({"success": True, "count": count}) + + @app.get("/api/status") async def api_status(): return get_scheduler_status() @@ -247,6 +281,7 @@ async def scan_upload_chunk( async def scan_process(request: Request): body = await request.json() upload_id = body.get("upload_id", "") + beleg_type = body.get("beleg_type", "eingang") try: uuid.UUID(upload_id) @@ -269,7 +304,7 @@ async def scan_process(request: Request): # Process in background task async def _process(): try: - result = await process_scanned_pdf(str(pdf_path), progress_callback) + result = await process_scanned_pdf(str(pdf_path), progress_callback, beleg_type=beleg_type) _scan_progress.setdefault(upload_id, []).append({ "stage": "done", "result": result }) @@ -325,3 +360,113 @@ async def separator_pdf(): media_type="application/pdf", headers={"Content-Disposition": "attachment; filename=Trennseite.pdf"}, ) + + +# --- Plattformen (Amazon) --- + +@app.get("/platforms", response_class=HTMLResponse) +async def platforms_page(request: Request): + settings = await get_settings() + status = get_scheduler_status() + return templates.TemplateResponse("platforms.html", { + "request": request, + "settings": settings, + "status": status, + "message": None, + "message_type": None, + }) + + +@app.post("/api/amazon-settings") +async def api_amazon_settings(request: Request): + body = await request.json() + current = await get_settings() + + data = { + "amazon_enabled": body.get("amazon_enabled", "false"), + "amazon_domain": body.get("amazon_domain", "amazon.de"), + "amazon_email": body.get("amazon_email", ""), + "amazon_password": body.get("amazon_password") or current.get("amazon_password", ""), + "amazon_since_date": body.get("amazon_since_date", ""), + } + await save_settings(data) + return JSONResponse({"success": True}) + + +@app.get("/api/amazon-status") +async def api_amazon_status(): + valid = await amazon_check_session() + login_active = amazon_login_active() + return JSONResponse({"session_valid": valid, "login_active": login_active}) + + +@app.post("/api/amazon-login") +async def api_amazon_login(): + """Start interactive browser login.""" + await amazon_start_interactive() + return JSONResponse({"success": True}) + + +@app.get("/api/amazon-login-state") +async def api_amazon_login_state(): + return JSONResponse(amazon_get_login_state()) + + +@app.get("/api/amazon-browser-screenshot") +async def api_amazon_browser_screenshot(): + img = await amazon_get_screenshot() + if img is None: + return JSONResponse({"error": "Kein Browser offen"}, status_code=404) + return Response(content=img, media_type="image/png") + + +@app.post("/api/amazon-browser-click") +async def api_amazon_browser_click(request: Request): + body = await request.json() + await amazon_browser_click(int(body["x"]), int(body["y"])) + return JSONResponse({"success": True}) + + +@app.post("/api/amazon-browser-type") +async def api_amazon_browser_type(request: Request): + body = await request.json() + await amazon_browser_type(body["text"]) + return JSONResponse({"success": True}) + + +@app.post("/api/amazon-browser-key") +async def api_amazon_browser_key(request: Request): + body = await request.json() + await amazon_browser_key(body["key"]) + return JSONResponse({"success": True}) + + +@app.post("/api/amazon-login-close") +async def api_amazon_login_close(): + await amazon_close_interactive() + return JSONResponse({"success": True}) + + +@app.post("/api/amazon-otp") +async def api_amazon_otp(request: Request): + body = await request.json() + ok = await amazon_submit_otp(body.get("code", "")) + return JSONResponse({"success": ok}) + + +@app.post("/api/amazon-logout") +async def api_amazon_logout(): + await amazon_clear_session() + return JSONResponse({"success": True}) + + +@app.post("/api/amazon-process") +async def api_amazon_process(): + result = await process_amazon() + return JSONResponse(result) + + +@app.post("/api/amazon-reset") +async def api_amazon_reset(): + count = await reset_downloaded_invoices() + return JSONResponse({"success": True, "count": count}) diff --git a/app/scanner.py b/app/scanner.py index b12de99..6ee485f 100644 --- a/app/scanner.py +++ b/app/scanner.py @@ -11,8 +11,8 @@ from pypdf import PdfReader, PdfWriter import qrcode from qrcode.constants import ERROR_CORRECT_H -from app.database import get_settings, add_log_entry -from app.mail_processor import _connect_smtp, _build_forward_email +from app.database import get_settings, add_log_entry, get_import_email +from app.mail_processor import _connect_smtp, _build_forward_email, _send_with_log logger = logging.getLogger(__name__) @@ -84,11 +84,12 @@ def split_pdf(pdf_path: str, separator_pages: list[int]) -> list[bytes]: return documents -async def process_scanned_pdf(pdf_path: str, progress_callback=None) -> dict: +async def process_scanned_pdf(pdf_path: str, progress_callback=None, beleg_type: str = "eingang") -> dict: """Full pipeline: detect separators, split, send each document via email.""" settings = await get_settings() - if not settings.get("smtp_server") or not settings.get("import_email"): + import_email = get_import_email(settings, beleg_type) + if not settings.get("smtp_server") or not import_email: return {"error": "SMTP oder Import-Email nicht konfiguriert", "total_pages": 0, "documents": 0, "sent": 0, "errors": 1} # Step 1: Detect separator pages (CPU-bound, run in thread) @@ -135,12 +136,12 @@ async def process_scanned_pdf(pdf_path: str, progress_callback=None) -> dict: filename = f"Scan_Dokument_{i + 1}.pdf" msg = _build_forward_email( from_addr=settings["smtp_username"], - to_addr=settings["import_email"], + to_addr=import_email, original_subject=f"Scan-Upload Dokument {i + 1}/{len(documents)}", original_from="Scan-Upload", attachments=[(filename, doc_bytes)], ) - smtp_conn.send_message(msg) + smtp_log = _send_with_log(smtp_conn, msg) sent += 1 await add_log_entry( @@ -148,6 +149,9 @@ async def process_scanned_pdf(pdf_path: str, progress_callback=None) -> dict: email_from="Scan-Upload", attachments_count=1, status="success", + sent_to=import_email, + smtp_log=smtp_log, + beleg_type=beleg_type, ) except Exception as e: @@ -159,6 +163,8 @@ async def process_scanned_pdf(pdf_path: str, progress_callback=None) -> dict: attachments_count=1, status="error", error_message=str(e), + sent_to=import_email, + beleg_type=beleg_type, ) except Exception as e: diff --git a/app/scheduler.py b/app/scheduler.py index 1806af6..e22e09e 100644 --- a/app/scheduler.py +++ b/app/scheduler.py @@ -5,6 +5,7 @@ from apscheduler.triggers.interval import IntervalTrigger from app.mail_processor import process_mailbox from app.smb_processor import process_smb_share +from app.amazon_processor import process_amazon logger = logging.getLogger(__name__) @@ -21,6 +22,7 @@ async def _run_processor(): return _is_processing = True try: + # Email and SMB first - these are fast and must not be blocked by Amazon logger.info("Starte automatische Email-Verarbeitung...") result = await process_mailbox() logger.info(f"Email-Verarbeitung abgeschlossen: {result}") @@ -28,6 +30,16 @@ async def _run_processor(): logger.info("Starte automatische SMB-Verarbeitung...") smb_result = await process_smb_share() logger.info(f"SMB-Verarbeitung abgeschlossen: {smb_result}") + + # Amazon separately with timeout - must not block next scheduler runs + logger.info("Starte automatische Amazon-Verarbeitung...") + try: + amazon_result = await asyncio.wait_for(process_amazon(), timeout=300) + logger.info(f"Amazon-Verarbeitung abgeschlossen: {amazon_result}") + except asyncio.TimeoutError: + logger.error("Amazon-Verarbeitung nach 5 Minuten abgebrochen (Timeout)") + except Exception as e: + logger.error(f"Fehler bei Amazon-Verarbeitung: {e}") except Exception as e: logger.error(f"Fehler bei automatischer Verarbeitung: {e}") finally: diff --git a/app/smb_processor.py b/app/smb_processor.py index 9d21048..323603a 100644 --- a/app/smb_processor.py +++ b/app/smb_processor.py @@ -5,8 +5,8 @@ import tempfile import smbclient -from app.database import get_settings, add_log_entry -from app.mail_processor import _connect_smtp, _build_forward_email +from app.database import get_settings, add_log_entry, get_import_email +from app.mail_processor import _connect_smtp, _build_forward_email, _send_with_log from app.scanner import detect_separator_pages, split_pdf logger = logging.getLogger(__name__) @@ -114,6 +114,119 @@ def _list_smb_folders_recursive( return folders +async def _process_smb_folder( + smtp_conn, settings: dict, base_path: str, + source_rel: str, processed_rel: str, + import_email: str, beleg_type: str, mode: str, +) -> dict: + """Process one SMB folder pair. Returns counts dict.""" + smtp_from = settings.get("smtp_username", "") + processed = 0 + skipped = 0 + errors = 0 + + source_path = _smb_unc_path(base_path, source_rel) + processed_path = _smb_unc_path(base_path, processed_rel) + + await asyncio.to_thread(_ensure_smb_folder, processed_path) + + pdf_files = await asyncio.to_thread(_list_pdf_files, source_path) + if not pdf_files: + logger.info(f"Keine PDF-Dateien im SMB-Ordner '{source_rel}' ({beleg_type})") + return {"processed": 0, "skipped": 0, "errors": 0} + + logger.info(f"{len(pdf_files)} PDF-Datei(en) im SMB-Ordner '{source_rel}' ({beleg_type})") + + for filename in pdf_files: + file_path = _smb_unc_path(source_path, filename) + try: + pdf_data = await asyncio.to_thread(_read_smb_file, file_path) + + if mode == "separator": + with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tmp: + tmp.write(pdf_data) + tmp_path = tmp.name + try: + separator_pages = await asyncio.to_thread( + detect_separator_pages, tmp_path, None + ) + documents = await asyncio.to_thread( + split_pdf, tmp_path, separator_pages + ) + finally: + os.unlink(tmp_path) + + if not documents: + skipped += 1 + continue + + smtp_log_parts = [] + for i, doc_bytes in enumerate(documents): + doc_filename = f"{os.path.splitext(filename)[0]}_Teil_{i + 1}.pdf" + subject = f"SMB-Import: {filename} (Dokument {i + 1}/{len(documents)})" + msg = _build_forward_email( + from_addr=smtp_from, + to_addr=import_email, + original_subject=subject, + original_from="SMB-Import", + attachments=[(doc_filename, doc_bytes)], + ) + smtp_log_parts.append(_send_with_log(smtp_conn, msg)) + + await add_log_entry( + email_subject=f"SMB: {filename}", + email_from="SMB-Import", + attachments_count=len(documents), + status="success", + sent_to=import_email, + smtp_log="\n---\n".join(smtp_log_parts), + beleg_type=beleg_type, + ) + logger.info( + f"SMB verarbeitet ({beleg_type}): {filename} -> {len(documents)} Dokument(e)" + ) + else: + msg = _build_forward_email( + from_addr=smtp_from, + to_addr=import_email, + original_subject=f"SMB-Import: {filename}", + original_from="SMB-Import", + attachments=[(filename, pdf_data)], + ) + smtp_log = _send_with_log(smtp_conn, msg) + + await add_log_entry( + email_subject=f"SMB: {filename}", + email_from="SMB-Import", + attachments_count=1, + status="success", + sent_to=import_email, + smtp_log=smtp_log, + beleg_type=beleg_type, + ) + logger.info(f"SMB verarbeitet ({beleg_type}): {filename}") + + await asyncio.to_thread(_move_smb_file, file_path, processed_path, filename) + processed += 1 + + except Exception as e: + errors += 1 + logger.error(f"Fehler bei SMB-Datei {filename}: {e}") + try: + await add_log_entry( + email_subject=f"SMB: {filename}", + email_from="SMB-Import", + attachments_count=0, + status="error", + error_message=str(e), + beleg_type=beleg_type, + ) + except Exception: + pass + + return {"processed": processed, "skipped": skipped, "errors": errors} + + async def process_smb_share() -> dict: """Process PDF files from SMB share - main pipeline.""" settings = await get_settings() @@ -124,113 +237,43 @@ async def process_smb_share() -> dict: if not settings.get("smb_server") or not settings.get("smb_share"): return {"processed": 0, "skipped": 0, "errors": 0, "error": "SMB nicht konfiguriert"} - if not settings.get("import_email"): + import_email_eingang = get_import_email(settings, "eingang") + if not import_email_eingang: return {"processed": 0, "skipped": 0, "errors": 0, "error": "Import-Email nicht konfiguriert"} mode = settings.get("smb_mode", "forward") - smtp_from = settings.get("smtp_username", "") - import_email = settings["import_email"] - - processed = 0 - skipped = 0 - errors = 0 + total = {"processed": 0, "skipped": 0, "errors": 0} smtp_conn = None try: base_path = await asyncio.to_thread(_smb_register_session, settings) - source_path = _smb_unc_path(base_path, settings.get("smb_source_path", "")) - processed_path = _smb_unc_path(base_path, settings.get("smb_processed_path", "Verarbeitet")) - - await asyncio.to_thread(_ensure_smb_folder, processed_path) - - pdf_files = await asyncio.to_thread(_list_pdf_files, source_path) - if not pdf_files: - logger.info("Keine PDF-Dateien im SMB-Ordner gefunden") - return {"processed": 0, "skipped": 0, "errors": 0} - - logger.info(f"{len(pdf_files)} PDF-Datei(en) im SMB-Ordner gefunden") - smtp_conn = _connect_smtp(settings) - for filename in pdf_files: - file_path = _smb_unc_path(source_path, filename) - try: - pdf_data = await asyncio.to_thread(_read_smb_file, file_path) + # Eingangsbelege + source = settings.get("smb_source_path", "") + processed_rel = settings.get("smb_processed_path", "Verarbeitet") + result = await _process_smb_folder( + smtp_conn, settings, base_path, + source, processed_rel, + import_email_eingang, "eingang", mode, + ) + for k in total: + total[k] += result[k] - if mode == "separator": - with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tmp: - tmp.write(pdf_data) - tmp_path = tmp.name - try: - separator_pages = await asyncio.to_thread( - detect_separator_pages, tmp_path, None - ) - documents = await asyncio.to_thread( - split_pdf, tmp_path, separator_pages - ) - finally: - os.unlink(tmp_path) - - if not documents: - skipped += 1 - continue - - for i, doc_bytes in enumerate(documents): - doc_filename = f"{os.path.splitext(filename)[0]}_Teil_{i + 1}.pdf" - subject = f"SMB-Import: {filename} (Dokument {i + 1}/{len(documents)})" - msg = _build_forward_email( - from_addr=smtp_from, - to_addr=import_email, - original_subject=subject, - original_from="SMB-Import", - attachments=[(doc_filename, doc_bytes)], - ) - smtp_conn.send_message(msg) - - await add_log_entry( - email_subject=f"SMB: {filename}", - email_from="SMB-Import", - attachments_count=len(documents), - status="success", - ) - logger.info( - f"SMB verarbeitet: {filename} -> {len(documents)} Dokument(e) " - f"({len(separator_pages)} Trennseite(n))" - ) - else: - msg = _build_forward_email( - from_addr=smtp_from, - to_addr=import_email, - original_subject=f"SMB-Import: {filename}", - original_from="SMB-Import", - attachments=[(filename, pdf_data)], - ) - smtp_conn.send_message(msg) - - await add_log_entry( - email_subject=f"SMB: {filename}", - email_from="SMB-Import", - attachments_count=1, - status="success", - ) - logger.info(f"SMB verarbeitet: {filename}") - - await asyncio.to_thread(_move_smb_file, file_path, processed_path, filename) - processed += 1 - - except Exception as e: - errors += 1 - logger.error(f"Fehler bei SMB-Datei {filename}: {e}") - try: - await add_log_entry( - email_subject=f"SMB: {filename}", - email_from="SMB-Import", - attachments_count=0, - status="error", - error_message=str(e), - ) - except Exception: - pass + # Ausgangsbelege (optional) + import_email_ausgang = get_import_email(settings, "ausgang") + source_ausgang = settings.get("smb_source_path_ausgang", "") + processed_ausgang = settings.get("smb_processed_path_ausgang", "") + if import_email_ausgang and source_ausgang: + if not processed_ausgang: + processed_ausgang = source_ausgang + "/Verarbeitet" + result = await _process_smb_folder( + smtp_conn, settings, base_path, + source_ausgang, processed_ausgang, + import_email_ausgang, "ausgang", mode, + ) + for k in total: + total[k] += result[k] except Exception as e: logger.error(f"SMB-Verbindungsfehler: {e}") @@ -244,7 +287,7 @@ async def process_smb_share() -> dict: ) except Exception: pass - return {"processed": processed, "skipped": skipped, "errors": errors + 1, "error": str(e)} + return {**total, "errors": total["errors"] + 1, "error": str(e)} finally: if smtp_conn: @@ -253,8 +296,8 @@ async def process_smb_share() -> dict: except Exception: pass - logger.info(f"SMB fertig: {processed} verarbeitet, {skipped} übersprungen, {errors} Fehler") - return {"processed": processed, "skipped": skipped, "errors": errors} + logger.info(f"SMB fertig: {total['processed']} verarbeitet, {total['skipped']} übersprungen, {total['errors']} Fehler") + return total async def test_smb_connection() -> dict: diff --git a/app/static/style.css b/app/static/style.css index 1c9a4fc..1bdc0c8 100644 --- a/app/static/style.css +++ b/app/static/style.css @@ -200,6 +200,11 @@ main { color: #856404; } +.badge-info { + background: #d1ecf1; + color: #0c5460; +} + .badge-inactive { background: rgba(255, 255, 255, 0.2); color: rgba(255, 255, 255, 0.7); @@ -236,6 +241,16 @@ main { border: 1px solid #bee5eb; } +/* Allow cards with tables to scroll horizontally */ +.card-table { + overflow-x: auto; +} + +/* Wider main container for pages with large tables */ +.main-wide { + max-width: 95%; +} + table { width: 100%; border-collapse: collapse; diff --git a/app/templates/base.html b/app/templates/base.html index 103d448..b1e6f3f 100644 --- a/app/templates/base.html +++ b/app/templates/base.html @@ -10,8 +10,9 @@ -
+
{% if message %}
{{ message }} diff --git a/app/templates/log.html b/app/templates/log.html index 422adb1..d45229e 100644 --- a/app/templates/log.html +++ b/app/templates/log.html @@ -1,21 +1,30 @@ {% extends "base.html" %} {% set active_page = "log" %} +{% set main_class = "main-wide" %} {% set message = None %} {% block content %} -
-

Verarbeitungslog

+
+
+

Verarbeitungslog

+ {% if logs %} + + {% endif %} +
{% if logs %} + + + @@ -23,9 +32,17 @@ + + + {% endfor %} @@ -42,4 +65,54 @@

Noch keine Einträge vorhanden.

{% endif %} + + + + + + + {% endblock %} diff --git a/app/templates/platforms.html b/app/templates/platforms.html new file mode 100644 index 0000000..17c074b --- /dev/null +++ b/app/templates/platforms.html @@ -0,0 +1,363 @@ +{% extends "base.html" %} +{% set active_page = "platforms" %} + +{% block content %} + +{% if message %} +
{{ message }}
+{% endif %} + +
+

Amazon Business - Einstellungen

+
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + + Leer = letzte 30 Tage +
+
+ {% if settings.get('amazon_last_sync') %} + Letzter Abruf: {{ settings.get('amazon_last_sync') }} + {% endif %} +
+
+
+ +
+
+
+ +
+

Anmeldung & Abruf

+
+ Session: + Wird geprüft... +
+ +
+ + + + +
+
+
+ + + + + +{% endblock %} diff --git a/app/templates/scan.html b/app/templates/scan.html index a73f735..dc0625c 100644 --- a/app/templates/scan.html +++ b/app/templates/scan.html @@ -8,6 +8,13 @@ Mehrseitige PDF hochladen. Trennseiten mit QR-Code werden automatisch erkannt und die einzelnen Dokumente gesendet.

+ +
+ + + +
+
📄
@@ -175,7 +182,7 @@ async function startProcessing(uploadId) { const resp = await fetch('/api/scan-process', { method: 'POST', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ upload_id: uploadId }), + body: JSON.stringify({ upload_id: uploadId, beleg_type: document.querySelector('input[name="beleg_type"]:checked').value }), }); if (!resp.ok) { diff --git a/app/templates/settings.html b/app/templates/settings.html index 1db4123..2359b42 100644 --- a/app/templates/settings.html +++ b/app/templates/settings.html @@ -77,13 +77,14 @@
-

Import & Ordner

+

Import - Eingangsbelege

- - + +
+
@@ -101,6 +102,34 @@
+
+ +
+

Import - Ausgangsbelege (optional)

+
+
+ + + Leer lassen wenn keine Ausgangsbelege importiert werden sollen +
+
+ +
+ + +
+
+
+ +
+ + +
+
+
- +
@@ -166,13 +195,29 @@
- +
+
+ +
+ + +
+
+
+ +
+ + +
+
+
+

Debug

+
+
+ + + Speichert heruntergeladene Amazon-Rechnungen in /data/uploads/amazon_invoices/ +
+
+
+
+ @@ -238,6 +298,13 @@ +
ID ZeitpunktArt Betreff Absender AnhängeGesendet an Status FehlermeldungSMTP
{{ log.id }} {{ log.timestamp }} + {% if log.get('beleg_type', 'eingang') == 'ausgang' %} + Ausgang + {% else %} + Eingang + {% endif %} + {{ log.email_subject or '-' }} {{ log.email_from or '-' }} {{ log.attachments_count }}{{ log.sent_to or '-' }} {% if log.status == 'success' %} OK @@ -34,6 +51,12 @@ {% endif %} {{ log.error_message or '-' }} + {% if log.smtp_log %} + + + {% else %}-{% endif %} +
Betreff Absender AnhängeArt Status
{{ log.email_subject or '-' }} {{ log.email_from or '-' }} {{ log.attachments_count }} + {% if log.get('beleg_type', 'eingang') == 'ausgang' %} + Ausgang + {% else %} + Eingang + {% endif %} + {% if log.status == 'success' %} OK @@ -343,8 +410,11 @@ async function testEmail() { const resp = await fetch('/api/test-email', { method: 'POST', body: getFormData() }); const data = await resp.json(); if (data.success) { - const addr = document.getElementById('import_email').value; - showAlert('Test-Email erfolgreich an ' + addr + ' gesendet! Einstellungen gespeichert.', 'success'); + const eingang = document.getElementById('import_email_eingang').value; + const ausgang = document.getElementById('import_email_ausgang').value; + let targets = eingang; + if (ausgang) targets += ' + ' + ausgang; + showAlert('Test-Email erfolgreich an ' + targets + ' gesendet! Einstellungen gespeichert.', 'success'); } else { showAlert('Test-Email fehlgeschlagen: ' + data.error, 'error'); } @@ -447,8 +517,10 @@ function showFolderModal(targetField) { const currentValue = folderTargetField ? document.getElementById(folderTargetField).value : ''; let html = '
'; - html += ''; - html += ''; + html += ''; + html += ''; + html += ''; + html += ''; html += '
'; html += '
'; if (cachedFolders && cachedFolders.length > 0) { @@ -651,8 +723,10 @@ function showSmbFolderModal(targetField) { const currentValue = smbFolderTargetField ? document.getElementById(smbFolderTargetField).value : ''; let html = '
'; - html += ''; - html += ''; + html += ''; + html += ''; + html += ''; + html += ''; html += '
'; html += '
'; diff --git a/docker-compose.yml b/docker-compose.yml index 0badfc3..8d754d3 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -9,4 +9,5 @@ services: environment: - DB_PATH=/data/belegimport.db - TZ=Europe/Berlin + - LOG_LEVEL=DEBUG restart: unless-stopped diff --git a/requirements.txt b/requirements.txt index 058fd2d..2018afa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,3 +12,5 @@ PyMuPDF==1.25.3 qrcode==8.0 sse-starlette==2.2.1 smbprotocol==1.14.0 +playwright==1.49.1 +playwright-stealth==2.0.2