Amazon Business API integration replacing browser automation

- Add amazon_api.py with Reconciliation + Document API client
- OAuth flow with manual code exchange for local installations
- Dual mode: API (recommended) or Browser automation (fallback)
- New settings: amazon_app_id, amazon_client_id, amazon_client_secret, amazon_refresh_token
- Platform UI with mode switcher, API credential fields, OAuth button
- Scheduler supports both API and browser modes
- README with full Amazon API setup guide
- httpx added for async HTTP requests

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-05 18:08:35 +02:00
parent a4e39332c7
commit 337e0e99a5
9 changed files with 1130 additions and 50 deletions
+80 -8
View File
@@ -736,12 +736,23 @@ async def _process_amazon_inner() -> dict:
return {"processed": 0, "errors": 0, "error": error_detail}
processed, skipped, errors = result["processed"], result["skipped"], result["errors"]
batch_done = result.get("batch_done", False)
# Update last sync date
await save_settings({"amazon_last_sync": datetime.now().strftime("%Y-%m-%d %H:%M")})
# Log summary if nothing was processed
if processed == 0 and errors == 0:
# Log summary
if processed > 0 and batch_done:
summary = f"{processed} Rechnung(en) importiert. Weitere beim nächsten Abruf."
await add_log_entry(
email_subject="Amazon-Import (Batch)",
email_from=f"Amazon ({domain})",
attachments_count=processed,
status="success",
error_message=summary,
sent_to=import_email,
)
elif processed == 0 and errors == 0:
if skipped > 0:
summary = f"Alle Rechnungen bereits importiert ({skipped} übersprungen)"
else:
@@ -787,13 +798,18 @@ async def _process_amazon_inner() -> dict:
async def _collect_and_process_orders(page, domain, since_date, smtp_conn, settings, import_email) -> dict | None:
"""Collect orders AND process invoices page by page.
This ensures invoice buttons are visible when we try to click them,
because we process each page's orders before navigating to the next page.
Uses BATCH processing: only processes a limited number of invoices per run
to avoid Amazon session degradation. The scheduler will pick up remaining
orders in subsequent runs (already-imported orders are skipped automatically).
Returns None if session is invalid, otherwise dict with processed/skipped/errors counts.
"""
MAX_INVOICES_PER_RUN = 2 # Limit to avoid Amazon session issues
processed = 0
skipped = 0
errors = 0
batch_done = False # Flag: batch limit reached, stop processing
# Navigate to orders page if needed
actual_url = page.url
@@ -813,6 +829,50 @@ async def _collect_and_process_orders(page, domain, since_date, smtp_conn, setti
if "order-history" not in actual_url and "your-orders" not in actual_url:
return None
# Reset to page 1 via SPA navigation (NOT page.reload() which kills session!)
# Click the "Bestellungen" tab or use the time filter to refresh the order list
logger.info(f"Amazon: Refreshe Bestellliste via SPA (aktuelle URL: {actual_url})...")
try:
refreshed = await page.evaluate("""() => {
// Strategy 1: Click the "Bestellungen" tab to reset to page 1
const tabs = document.querySelectorAll('a[href*="your-orders"], a[href*="order-history"]');
for (const tab of tabs) {
const text = (tab.innerText || '').trim();
if ((text === 'Bestellungen' || text === 'Orders') && tab.offsetParent !== null) {
tab.click();
return 'tab';
}
}
// Strategy 2: Click pagination page 1 link
const page1Links = document.querySelectorAll('.a-pagination a[href*="pagination/1"], .a-pagination li:first-child a');
for (const link of page1Links) {
if (link.offsetParent !== null) {
link.click();
return 'pagination';
}
}
// Strategy 3: Click the time filter to trigger a refresh
const filterSelect = document.querySelector('select[name="orderFilter"], select#orderFilter, select#time-filter');
if (filterSelect) {
// Re-select the current value to trigger change event
const event = new Event('change', {bubbles: true});
filterSelect.dispatchEvent(event);
return 'filter';
}
return null;
}""")
if refreshed:
logger.info(f"Amazon: Bestellliste refreshed via {refreshed}")
await asyncio.sleep(3)
try:
await page.wait_for_load_state("networkidle", timeout=15000)
except Exception:
pass
else:
logger.info("Amazon: Kein SPA-Refresh möglich, verwende aktuelle Ansicht")
except Exception as e:
logger.warning(f"Amazon: SPA-Refresh fehlgeschlagen: {e}")
# Try to set time filter
now = datetime.now()
days_back = (now - since_date).days
@@ -862,8 +922,14 @@ async def _collect_and_process_orders(page, domain, since_date, smtp_conn, setti
logger.info(f"Amazon: Seite {page_num}: {len(page_orders)} gefunden, {len(new_orders)} neu")
total_orders += len(new_orders)
# Process invoices for THIS page's orders immediately (buttons are visible now)
# Process invoices for THIS page's orders immediately
for order in new_orders:
# Check batch limit
if processed >= MAX_INVOICES_PER_RUN:
batch_done = True
logger.info(f"Amazon: Batch-Limit erreicht ({MAX_INVOICES_PER_RUN} Rechnungen). Rest beim nächsten Abruf.")
break
order_id = order.get("id", "?")
try:
if await is_invoice_downloaded(order_id, order_id):
@@ -920,7 +986,8 @@ async def _collect_and_process_orders(page, domain, since_date, smtp_conn, setti
)
await mark_invoice_downloaded(order_id, order_id)
await _human_delay(2.0, 4.0)
# Long delay between orders to avoid Amazon rate-limiting
await _human_delay(8.0, 15.0)
except Exception as e:
errors += 1
@@ -933,6 +1000,10 @@ async def _collect_and_process_orders(page, domain, since_date, smtp_conn, setti
error_message=str(e),
)
# Stop if batch limit reached
if batch_done:
break
# Navigate to next page
has_next = await page.evaluate("""() => {
const nextLink = document.querySelector('.a-pagination .a-last:not(.a-disabled) a');
@@ -960,8 +1031,9 @@ async def _collect_and_process_orders(page, domain, since_date, smtp_conn, setti
else:
break
logger.info(f"Amazon: Gesamt {total_orders} Bestellungen auf {page_num} Seite(n)")
return {"processed": processed, "skipped": skipped, "errors": errors}
status = "Batch-Limit" if batch_done else "komplett"
logger.info(f"Amazon: Gesamt {total_orders} Bestellungen auf {page_num} Seite(n), Status: {status}")
return {"processed": processed, "skipped": skipped, "errors": errors, "batch_done": batch_done}
async def _collect_orders(page, domain: str, since_date: datetime) -> list[dict] | None: