Amazon Business API integration replacing browser automation

- Add amazon_api.py with Reconciliation + Document API client - OAuth flow with manual code exchange for local installations - Dual mode: API (recommended) or Browser automation (fallback) - New settings: amazon_app_id, amazon_client_id, amazon_client_secret, amazon_refresh_token - Platform UI with mode switcher, API credential fields, OAuth button - Scheduler supports both API and browser modes - README with full Amazon API setup guide - httpx added for async HTTP requests Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-05 18:08:35 +02:00
parent a4e39332c7
commit 337e0e99a5
9 changed files with 1130 additions and 50 deletions
@@ -736,12 +736,23 @@ async def _process_amazon_inner() -> dict:
            return {"processed": 0, "errors": 0, "error": error_detail}

        processed, skipped, errors = result["processed"], result["skipped"], result["errors"]
+        batch_done = result.get("batch_done", False)

        # Update last sync date
        await save_settings({"amazon_last_sync": datetime.now().strftime("%Y-%m-%d %H:%M")})

-        # Log summary if nothing was processed
-        if processed == 0 and errors == 0:
+        # Log summary
+        if processed > 0 and batch_done:
+            summary = f"{processed} Rechnung(en) importiert. Weitere beim nächsten Abruf."
+            await add_log_entry(
+                email_subject="Amazon-Import (Batch)",
+                email_from=f"Amazon ({domain})",
+                attachments_count=processed,
+                status="success",
+                error_message=summary,
+                sent_to=import_email,
+            )
+        elif processed == 0 and errors == 0:
            if skipped > 0:
                summary = f"Alle Rechnungen bereits importiert ({skipped} übersprungen)"
            else:
@@ -787,13 +798,18 @@ async def _process_amazon_inner() -> dict:
 async def _collect_and_process_orders(page, domain, since_date, smtp_conn, settings, import_email) -> dict | None:
    """Collect orders AND process invoices page by page.

-    This ensures invoice buttons are visible when we try to click them,
-    because we process each page's orders before navigating to the next page.
+    Uses BATCH processing: only processes a limited number of invoices per run
+    to avoid Amazon session degradation. The scheduler will pick up remaining
+    orders in subsequent runs (already-imported orders are skipped automatically).
+
    Returns None if session is invalid, otherwise dict with processed/skipped/errors counts.
    """
+    MAX_INVOICES_PER_RUN = 2  # Limit to avoid Amazon session issues
+
    processed = 0
    skipped = 0
    errors = 0
+    batch_done = False  # Flag: batch limit reached, stop processing

    # Navigate to orders page if needed
    actual_url = page.url
@@ -813,6 +829,50 @@ async def _collect_and_process_orders(page, domain, since_date, smtp_conn, setti
        if "order-history" not in actual_url and "your-orders" not in actual_url:
            return None

+    # Reset to page 1 via SPA navigation (NOT page.reload() which kills session!)
+    # Click the "Bestellungen" tab or use the time filter to refresh the order list
+    logger.info(f"Amazon: Refreshe Bestellliste via SPA (aktuelle URL: {actual_url})...")
+    try:
+        refreshed = await page.evaluate("""() => {
+            // Strategy 1: Click the "Bestellungen" tab to reset to page 1
+            const tabs = document.querySelectorAll('a[href*="your-orders"], a[href*="order-history"]');
+            for (const tab of tabs) {
+                const text = (tab.innerText || '').trim();
+                if ((text === 'Bestellungen' || text === 'Orders') && tab.offsetParent !== null) {
+                    tab.click();
+                    return 'tab';
+                }
+            }
+            // Strategy 2: Click pagination page 1 link
+            const page1Links = document.querySelectorAll('.a-pagination a[href*="pagination/1"], .a-pagination li:first-child a');
+            for (const link of page1Links) {
+                if (link.offsetParent !== null) {
+                    link.click();
+                    return 'pagination';
+                }
+            }
+            // Strategy 3: Click the time filter to trigger a refresh
+            const filterSelect = document.querySelector('select[name="orderFilter"], select#orderFilter, select#time-filter');
+            if (filterSelect) {
+                // Re-select the current value to trigger change event
+                const event = new Event('change', {bubbles: true});
+                filterSelect.dispatchEvent(event);
+                return 'filter';
+            }
+            return null;
+        }""")
+        if refreshed:
+            logger.info(f"Amazon: Bestellliste refreshed via {refreshed}")
+            await asyncio.sleep(3)
+            try:
+                await page.wait_for_load_state("networkidle", timeout=15000)
+            except Exception:
+                pass
+        else:
+            logger.info("Amazon: Kein SPA-Refresh möglich, verwende aktuelle Ansicht")
+    except Exception as e:
+        logger.warning(f"Amazon: SPA-Refresh fehlgeschlagen: {e}")
+
    # Try to set time filter
    now = datetime.now()
    days_back = (now - since_date).days
@@ -862,8 +922,14 @@ async def _collect_and_process_orders(page, domain, since_date, smtp_conn, setti
        logger.info(f"Amazon: Seite {page_num}: {len(page_orders)} gefunden, {len(new_orders)} neu")
        total_orders += len(new_orders)

-        # Process invoices for THIS page's orders immediately (buttons are visible now)
+        # Process invoices for THIS page's orders immediately
        for order in new_orders:
+            # Check batch limit
+            if processed >= MAX_INVOICES_PER_RUN:
+                batch_done = True
+                logger.info(f"Amazon: Batch-Limit erreicht ({MAX_INVOICES_PER_RUN} Rechnungen). Rest beim nächsten Abruf.")
+                break
+
            order_id = order.get("id", "?")
            try:
                if await is_invoice_downloaded(order_id, order_id):
@@ -920,7 +986,8 @@ async def _collect_and_process_orders(page, domain, since_date, smtp_conn, setti
                        )

                await mark_invoice_downloaded(order_id, order_id)
-                await _human_delay(2.0, 4.0)
+                # Long delay between orders to avoid Amazon rate-limiting
+                await _human_delay(8.0, 15.0)

            except Exception as e:
                errors += 1
@@ -933,6 +1000,10 @@ async def _collect_and_process_orders(page, domain, since_date, smtp_conn, setti
                    error_message=str(e),
                )

+        # Stop if batch limit reached
+        if batch_done:
+            break
+
        # Navigate to next page
        has_next = await page.evaluate("""() => {
            const nextLink = document.querySelector('.a-pagination .a-last:not(.a-disabled) a');
@@ -960,8 +1031,9 @@ async def _collect_and_process_orders(page, domain, since_date, smtp_conn, setti
        else:
            break

-    logger.info(f"Amazon: Gesamt {total_orders} Bestellungen auf {page_num} Seite(n)")
-    return {"processed": processed, "skipped": skipped, "errors": errors}
+    status = "Batch-Limit" if batch_done else "komplett"
+    logger.info(f"Amazon: Gesamt {total_orders} Bestellungen auf {page_num} Seite(n), Status: {status}")
+    return {"processed": processed, "skipped": skipped, "errors": errors, "batch_done": batch_done}


 async def _collect_orders(page, domain: str, since_date: datetime) -> list[dict] | None: