belege-import/app/smb_processor.py

311 lines
11 KiB
Python

import os
import asyncio
import logging
import tempfile
import smbclient
from app.database import get_settings, add_log_entry
from app.mail_processor import _connect_smtp, _build_forward_email
from app.scanner import detect_separator_pages, split_pdf
logger = logging.getLogger(__name__)
def _smb_register_session(settings: dict) -> str:
"""Register SMB session and return the UNC base path \\\\server\\share."""
server = settings["smb_server"]
port = int(settings.get("smb_port", 445))
username = settings.get("smb_username", "")
password = settings.get("smb_password", "")
domain = settings.get("smb_domain", "")
if domain:
auth_username = f"{domain}\\{username}"
else:
auth_username = username
smbclient.register_session(
server,
port=port,
username=auth_username,
password=password,
connection_timeout=10,
)
share = settings["smb_share"]
return f"\\\\{server}\\{share}"
def _smb_unc_path(base: str, *parts: str) -> str:
"""Join UNC path segments using backslash."""
result = base
for p in parts:
if p:
result = result.rstrip("\\") + "\\" + p.replace("/", "\\").strip("\\")
return result
def _ensure_smb_folder(path: str):
"""Create SMB directory if it does not exist."""
try:
smbclient.stat(path)
except OSError:
smbclient.makedirs(path, exist_ok=True)
def _list_pdf_files(source_path: str) -> list[str]:
"""Return list of .pdf filenames in the SMB source directory."""
try:
entries = smbclient.listdir(source_path)
except OSError:
return []
return sorted(
e for e in entries if e.lower().endswith(".pdf") and not e.startswith(".")
)
def _read_smb_file(filepath: str) -> bytes:
"""Read a file from SMB share into memory."""
with smbclient.open_file(filepath, mode="rb") as f:
return f.read()
def _move_smb_file(source: str, dest_dir: str, filename: str):
"""Move a file on the SMB share to the destination directory."""
dest = _smb_unc_path(dest_dir, filename)
# Handle duplicate filenames
try:
smbclient.stat(dest)
name, ext = os.path.splitext(filename)
counter = 1
while True:
new_name = f"{name}_{counter}{ext}"
dest = _smb_unc_path(dest_dir, new_name)
try:
smbclient.stat(dest)
counter += 1
except OSError:
break
except OSError:
pass
smbclient.rename(source, dest)
def _list_smb_folders_recursive(
base_path: str, max_depth: int = 3, _current_depth: int = 0, _prefix: str = ""
) -> list[str]:
"""Recursively list folders on the SMB share, returning relative paths."""
folders = []
try:
for entry in smbclient.scandir(base_path):
if entry.is_dir() and not entry.name.startswith("."):
rel_path = f"{_prefix}/{entry.name}" if _prefix else entry.name
folders.append(rel_path)
if _current_depth < max_depth - 1:
sub_path = _smb_unc_path(base_path, entry.name)
folders.extend(
_list_smb_folders_recursive(
sub_path, max_depth, _current_depth + 1, rel_path
)
)
except OSError:
pass
return folders
async def process_smb_share() -> dict:
"""Process PDF files from SMB share - main pipeline."""
settings = await get_settings()
if settings.get("smb_enabled") != "true":
return {"processed": 0, "skipped": 0, "errors": 0}
if not settings.get("smb_server") or not settings.get("smb_share"):
return {"processed": 0, "skipped": 0, "errors": 0, "error": "SMB nicht konfiguriert"}
if not settings.get("lexoffice_email"):
return {"processed": 0, "skipped": 0, "errors": 0, "error": "LexOffice-Email nicht konfiguriert"}
mode = settings.get("smb_mode", "forward")
smtp_from = settings.get("smtp_username", "")
lexoffice_email = settings["lexoffice_email"]
processed = 0
skipped = 0
errors = 0
smtp_conn = None
try:
base_path = await asyncio.to_thread(_smb_register_session, settings)
source_path = _smb_unc_path(base_path, settings.get("smb_source_path", ""))
processed_path = _smb_unc_path(base_path, settings.get("smb_processed_path", "Verarbeitet"))
await asyncio.to_thread(_ensure_smb_folder, processed_path)
pdf_files = await asyncio.to_thread(_list_pdf_files, source_path)
if not pdf_files:
logger.info("Keine PDF-Dateien im SMB-Ordner gefunden")
return {"processed": 0, "skipped": 0, "errors": 0}
logger.info(f"{len(pdf_files)} PDF-Datei(en) im SMB-Ordner gefunden")
smtp_conn = _connect_smtp(settings)
for filename in pdf_files:
file_path = _smb_unc_path(source_path, filename)
try:
pdf_data = await asyncio.to_thread(_read_smb_file, file_path)
if mode == "separator":
with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tmp:
tmp.write(pdf_data)
tmp_path = tmp.name
try:
separator_pages = await asyncio.to_thread(
detect_separator_pages, tmp_path, None
)
documents = await asyncio.to_thread(
split_pdf, tmp_path, separator_pages
)
finally:
os.unlink(tmp_path)
if not documents:
skipped += 1
continue
for i, doc_bytes in enumerate(documents):
doc_filename = f"{os.path.splitext(filename)[0]}_Teil_{i + 1}.pdf"
subject = f"SMB-Import: {filename} (Dokument {i + 1}/{len(documents)})"
msg = _build_forward_email(
from_addr=smtp_from,
to_addr=lexoffice_email,
original_subject=subject,
original_from="SMB-Import",
attachments=[(doc_filename, doc_bytes)],
)
smtp_conn.send_message(msg)
await add_log_entry(
email_subject=f"SMB: {filename}",
email_from="SMB-Import",
attachments_count=len(documents),
status="success",
)
logger.info(
f"SMB verarbeitet: {filename} -> {len(documents)} Dokument(e) "
f"({len(separator_pages)} Trennseite(n))"
)
else:
msg = _build_forward_email(
from_addr=smtp_from,
to_addr=lexoffice_email,
original_subject=f"SMB-Import: {filename}",
original_from="SMB-Import",
attachments=[(filename, pdf_data)],
)
smtp_conn.send_message(msg)
await add_log_entry(
email_subject=f"SMB: {filename}",
email_from="SMB-Import",
attachments_count=1,
status="success",
)
logger.info(f"SMB verarbeitet: {filename}")
await asyncio.to_thread(_move_smb_file, file_path, processed_path, filename)
processed += 1
except Exception as e:
errors += 1
logger.error(f"Fehler bei SMB-Datei {filename}: {e}")
try:
await add_log_entry(
email_subject=f"SMB: {filename}",
email_from="SMB-Import",
attachments_count=0,
status="error",
error_message=str(e),
)
except Exception:
pass
except Exception as e:
logger.error(f"SMB-Verbindungsfehler: {e}")
try:
await add_log_entry(
email_subject="",
email_from="SMB-Import",
attachments_count=0,
status="error",
error_message=f"SMB-Verbindungsfehler: {e}",
)
except Exception:
pass
return {"processed": processed, "skipped": skipped, "errors": errors + 1, "error": str(e)}
finally:
if smtp_conn:
try:
smtp_conn.quit()
except Exception:
pass
logger.info(f"SMB fertig: {processed} verarbeitet, {skipped} übersprungen, {errors} Fehler")
return {"processed": processed, "skipped": skipped, "errors": errors}
async def test_smb_connection() -> dict:
"""Test SMB connection and return folder list."""
settings = await get_settings()
if not settings.get("smb_server") or not settings.get("smb_share"):
return {"success": False, "error": "SMB-Server oder Freigabe nicht konfiguriert", "folders": []}
try:
base_path = await asyncio.to_thread(_smb_register_session, settings)
folders = await asyncio.to_thread(_list_smb_folders_recursive, base_path, 3)
return {"success": True, "folders": sorted(folders)}
except Exception as e:
logger.error(f"SMB-Test fehlgeschlagen: {e}")
return {"success": False, "error": str(e), "folders": []}
async def create_smb_folder(folder_path: str) -> dict:
"""Create a folder on the SMB share."""
settings = await get_settings()
if not settings.get("smb_server") or not settings.get("smb_share"):
return {"success": False, "error": "SMB nicht konfiguriert"}
if not folder_path or not folder_path.strip():
return {"success": False, "error": "Ordnername darf nicht leer sein"}
folder_path = folder_path.strip()
try:
base_path = await asyncio.to_thread(_smb_register_session, settings)
full_path = _smb_unc_path(base_path, folder_path)
await asyncio.to_thread(smbclient.makedirs, full_path, True)
return {"success": True}
except Exception as e:
logger.error(f"SMB-Ordner erstellen fehlgeschlagen: {e}")
return {"success": False, "error": str(e)}
async def list_smb_folders() -> dict:
"""Return current folder list from SMB share."""
settings = await get_settings()
if not settings.get("smb_server") or not settings.get("smb_share"):
return {"folders": []}
try:
base_path = await asyncio.to_thread(_smb_register_session, settings)
folders = await asyncio.to_thread(_list_smb_folders_recursive, base_path, 3)
return {"folders": sorted(folders)}
except Exception:
return {"folders": []}