import json
import csv
import re
from pathlib import Path
from datetime import datetime
from typing import Optional, Tuple, List

BASE_PATH = Path("/usr/local/var/www/webapp-businessplan")
IMPORT_PATH = BASE_PATH / "data/taschengeld/imports/zip_eingang"
DB_PATH = BASE_PATH / "data/taschengeld/db/taschengeld.json"

IMPORT_PATH.mkdir(parents=True, exist_ok=True)
DB_PATH.parent.mkdir(parents=True, exist_ok=True)

# --- Helpers: cleanup ---------------------------------------------------------

def cleanup_download_artifacts():
    """
    Löscht alles, was nach Download-Artefakt aussieht:
    - Dateien/Ordner, die auf .download enden
    - Dateien/Ordner, die .zip*.download enthalten
    """
    if not IMPORT_PATH.exists():
        return

    for p in IMPORT_PATH.iterdir():
        name = p.name.lower()
        if name.endswith(".download") or (".zip" in name and name.endswith(".download")):
            try:
                if p.is_dir():
                    for sub in sorted(p.rglob("*"), reverse=True):
                        try:
                            if sub.is_file() or sub.is_symlink():
                                sub.unlink()
                            elif sub.is_dir():
                                sub.rmdir()
                        except Exception:
                            pass
                    p.rmdir()
                else:
                    p.unlink()
            except Exception:
                pass

def cleanup_keep_newest_folders(keep: int = 5):
    """
    Behält nur die 'keep' neuesten Export-ORDNER, löscht ältere Exportordner.
    """
    folders = _list_export_folders()
    if len(folders) <= keep:
        return
    for old in folders[keep:]:
        try:
            for sub in sorted(old.rglob("*"), reverse=True):
                try:
                    if sub.is_file() or sub.is_symlink():
                        sub.unlink()
                    elif sub.is_dir():
                        sub.rmdir()
                except Exception:
                    pass
            old.rmdir()
        except Exception:
            pass

# --- Helpers: latest folder selection ----------------------------------------

_DATE_RE = re.compile(r"Finom_statement&docs_(\d{8})(?:-(\d+))?", re.IGNORECASE)

def _parse_folder_key(folder_name: str) -> Tuple[int, int]:
    """
    Liefert Sort-Key (date_yyyymmdd_int, suffix_int).
    - Datum kommt aus Name: ..._DDMMYYYY
    - Suffix kommt aus '-N' (mehrere Exporte am gleichen Tag)
    """
    m = _DATE_RE.search(folder_name)
    if not m:
        return (0, 0)
    ddmmyyyy = m.group(1)  # DDMMYYYY
    suffix = m.group(2)
    try:
        dt = datetime.strptime(ddmmyyyy, "%d%m%Y")
        date_key = int(dt.strftime("%Y%m%d"))
    except Exception:
        date_key = 0
    try:
        suffix_key = int(suffix) if suffix is not None else 0
    except Exception:
        suffix_key = 0
    return (date_key, suffix_key)

def _list_export_folders():
    """
    Listet echte Export-Ordner (keine *.download Artefakte).
    Sortierung: Datum im Namen (neu -> alt), dann suffix (hoch -> niedrig), dann mtime (neu -> alt).
    """
    folders = []
    if not IMPORT_PATH.exists():
        return folders

    for p in IMPORT_PATH.iterdir():
        if not p.is_dir():
            continue
        name = p.name.lower()
        if name.endswith(".download"):
            continue
        if ".zip" in name and name.endswith(".download"):
            continue
        if not _DATE_RE.search(p.name):
            continue
        folders.append(p)

    folders.sort(
        key=lambda x: (
            _parse_folder_key(x.name)[0],
            _parse_folder_key(x.name)[1],
            int(x.stat().st_mtime),
        ),
        reverse=True,
    )
    return folders

def get_latest_folder() -> Optional[Path]:
    cleanup_download_artifacts()
    folders = _list_export_folders()
    return folders[0] if folders else None

def find_csv_files(folder: Path) -> Tuple[Optional[Path], Optional[Path]]:
    """
    Findet statement + documents CSV im Exportordner.
    """
    statement_csv = None
    documents_csv = None

    for f in folder.rglob("*.csv"):
        name = f.name.lower()
        if "statement" in name and statement_csv is None:
            statement_csv = f
        elif "documents" in name and documents_csv is None:
            documents_csv = f

    return statement_csv, documents_csv

def _find_all_pdfs(folder: Path) -> List[Path]:
    """
    Findet PDFs im Exportordner (typisch: Belege/, Rechnungen/).
    """
    pdfs = []
    try:
        for p in folder.rglob("*.pdf"):
            if p.is_file():
                pdfs.append(p)
    except Exception:
        pass
    pdfs.sort(key=lambda p: int(p.stat().st_mtime), reverse=True)
    return pdfs

# --- DB ----------------------------------------------------------------------

def load_json():
    if not DB_PATH.exists():
        return {"meta": {}, "imports": [], "buchungen": []}
    with open(DB_PATH, "r", encoding="utf-8") as f:
        return json.load(f)

def save_json(data):
    with open(DB_PATH, "w", encoding="utf-8") as f:
        json.dump(data, f, indent=2, ensure_ascii=False)

def build_document_mapping(doc_csv_path: Path):
    mapping = {}
    with open(doc_csv_path, newline="", encoding="utf-8") as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            filename = row.get("Dateiname")
            if not filename:
                continue
            stem = Path(filename).stem
            mapping[stem] = filename
    return mapping

def next_tg_id(data, year: str):
    existing_ids = [
        b.get("id") for b in data.get("buchungen", [])
        if b.get("id") and str(b.get("id")).startswith(f"TG-{year}-")
    ]
    if not existing_ids:
        return f"TG-{year}-0001"

    nums = []
    for x in existing_ids:
        try:
            nums.append(int(str(x).split("-")[-1]))
        except Exception:
            pass
    n = (max(nums) + 1) if nums else 1
    return f"TG-{year}-{n:04d}"

# --- Import ------------------------------------------------------------------

def import_latest_folder():
    """
    Importiert aus dem aktuellsten Export-ORDNER in zip_eingang.
    - löscht *.download Artefakte
    - behält nur 5 neueste Exportordner
    - importiert nur neue Transaktions-IDs
    - aktualisiert bei bestehenden Transaktionen fehlende Belege (Fallback)
    - erkennt auch "überschriebenen" Ordner via mtime (last_folder_mtime)
    """
    try:
        cleanup_download_artifacts()
        cleanup_keep_newest_folders(keep=5)

        folder = get_latest_folder()
        if not folder:
            return {"ok": False, "error": "Kein Export-Ordner gefunden", "message": "Kein Export-Ordner gefunden"}

        folder_mtime = int(folder.stat().st_mtime)

        statement_csv, documents_csv = find_csv_files(folder)
        if not statement_csv or not documents_csv:
            return {
                "ok": False,
                "error": "CSV-Dateien nicht vollständig gefunden",
                "message": "CSV-Dateien nicht vollständig gefunden",
                "folder": folder.name,
                "folder_mtime": folder_mtime,
            }

        data = load_json()
        data.setdefault("meta", {})
        data.setdefault("imports", [])
        data.setdefault("buchungen", [])

        # Fingerprint, damit überschriebenes Verzeichnis sichtbar wird
        data["meta"]["last_folder"] = data["meta"].get("last_folder")
        data["meta"]["last_folder_mtime"] = data["meta"].get("last_folder_mtime")

        existing_tx_ids = {b.get("transaktions_id") for b in data.get("buchungen", []) if b.get("transaktions_id")}
        doc_mapping = build_document_mapping(documents_csv)

        # PDFs im Ordner (Fallback-Quelle)
        pdfs_in_folder = _find_all_pdfs(folder)

        new_count = 0
        updated_existing = 0

        with open(statement_csv, newline="", encoding="utf-8") as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                if row.get("Status") != "Completed":
                    continue

                tx_id = row.get("Transaktions-ID")
                if not tx_id:
                    continue

                doc_ref = row.get("Begleitende Dokumente")
                matched_doc = None

                # 1) Primär: Mapping über documents.csv
                if doc_ref and doc_ref != "N/A":
                    for key, filename in doc_mapping.items():
                        if key in doc_ref:
                            matched_doc = filename
                            break

                # 2) Fallback: Wenn KEIN Match, aber genau 1 PDF im Exportordner existiert -> nimm dieses
                # (nur als vorsichtiger Fallback, damit du nicht willkürlich falsche PDFs zuweist)
                if not matched_doc and len(pdfs_in_folder) == 1:
                    matched_doc = pdfs_in_folder[0].name

                # Wenn Transaktion bereits existiert → Dokument & Importordner aktualisieren (und zählen)
                if tx_id in existing_tx_ids:
                    for b in data["buchungen"]:
                        if b.get("transaktions_id") == tx_id:
                            # nur setzen, wenn sinnvoll:
                            # - wenn neu ermittelt (matched_doc) und entweder leer war oder sich geändert hat
                            old_doc = b.get("dokument")
                            if matched_doc and (not old_doc or old_doc != matched_doc):
                                b["dokument"] = matched_doc
                                updated_existing += 1
                            b["import_ordner"] = folder.name
                    continue

                date_str = row.get("Buchungsdatum") or ""
                try:
                    year = datetime.strptime(date_str, "%d.%m.%Y").strftime("%Y")
                except Exception:
                    continue

                new_id = next_tg_id(data, year)

                try:
                    betrag = float((row.get("Zahlungsbetrag", "0") or "0").replace(",", "."))
                    saldo = float((row.get("Wallet-Saldo nach Transaktion", "0") or "0").replace(",", "."))
                except Exception:
                    continue

                data["buchungen"].append({
                    "id": new_id,
                    "transaktions_id": tx_id,
                    "datum": date_str,
                    "betrag": betrag,
                    "saldo": saldo,
                    "empfaenger": row.get("Auftraggeber/Empfänger"),
                    "verwendungszweck": row.get("Verwendungszweck"),
                    "tags": row.get("Tags"),
                    "transaktionsart": row.get("Transaktionsart"),
                    "dokument": matched_doc,
                    "import_ordner": folder.name,
                    "imported_at": datetime.utcnow().isoformat()
                })
                new_count += 1

        data["meta"]["last_folder"] = folder.name
        data["meta"]["last_folder_mtime"] = folder_mtime
        data["meta"]["last_imported_at"] = datetime.utcnow().isoformat()
        save_json(data)

        # Hinweis: "imported 0" heißt NICHT "nichts passiert" – updated_existing kann >0 sein
        return {
            "ok": True,
            "imported": new_count,
            "updated_existing": updated_existing,
            "folder": folder.name,
            "folder_mtime": folder_mtime,
            "message": f"Importiert: {new_count} | Aktualisiert: {updated_existing}"
        }

    except json.JSONDecodeError:
        return {"ok": False, "error": "DB JSON ist ungültig", "message": "DB JSON ist ungültig"}
    except Exception as e:
        return {"ok": False, "error": str(e), "message": str(e)}
