common-cents-tui/src/common_cents/csv_import.py

import csv
from dataclasses import dataclass, field
from datetime import date
from pathlib import Path

from common_cents.money import parse_cents_csv

REQUIRED_COLS = {"DATE", "CENTS", "CATEGORY", "MERCHANT"}
SUGGESTED_COLS = {"NOTES", "TAGS"}


@dataclass
class ImportRow:
    date: str
    cents: int
    category: str
    merchant: str
    notes: str | None
    tags: list[str]


@dataclass
class ParseResult:
    rows: list[ImportRow] = field(default_factory=list)
    warnings: list[str] = field(default_factory=list)
    errors: list[str] = field(default_factory=list)


def parse_csv(path: Path) -> ParseResult:
    result = ParseResult()

    try:
        text = path.read_text(encoding="utf-8-sig")
    except OSError as e:
        result.errors.append(str(e))
        return result

    reader = csv.DictReader(text.splitlines())
    if reader.fieldnames is None:
        result.errors.append("File is empty or has no header row.")
        return result

    headers = {h.strip().upper() for h in reader.fieldnames}
    missing_required = REQUIRED_COLS - headers
    if missing_required:
        result.errors.append(
            f"Missing required columns: {', '.join(sorted(missing_required))}"
        )
        return result

    missing_suggested = SUGGESTED_COLS - headers
    if missing_suggested:
        result.warnings.append(
            f"Optional columns not found: {', '.join(sorted(missing_suggested))}. "
            "Adding notes and tags to your spending records is recommended."
        )

    extra_cols = headers - REQUIRED_COLS - SUGGESTED_COLS
    if extra_cols:
        result.warnings.append(
            f"Unknown columns will be ignored: {', '.join(sorted(extra_cols))}."
        )

    # Build normalised header map: upper-stripped → original fieldname
    header_map = {h.strip().upper(): h for h in reader.fieldnames}

    for line_num, raw_row in enumerate(reader, start=2):
        try:
            date_val = raw_row[header_map["DATE"]].strip()
            cents_raw = raw_row[header_map["CENTS"]].strip()
            category = raw_row[header_map["CATEGORY"]].strip()
            merchant = raw_row[header_map["MERCHANT"]].strip()

            if not date_val or not cents_raw or not category:
                result.errors.append(
                    f"Row {line_num}: date, cents, and category are required."
                )
                continue

            try:
                date.fromisoformat(date_val)
            except ValueError:
                result.errors.append(
                    f"Row {line_num}: invalid date '{date_val}' — use YYYY-MM-DD."
                )
                continue

            cents = parse_cents_csv(cents_raw)
            if cents <= 0:
                result.errors.append(
                    f"Row {line_num}: cents must be a positive number."
                )
                continue

            notes_key = header_map.get("NOTES")
            notes = raw_row[notes_key].strip() or None if notes_key else None

            tags_key = header_map.get("TAGS")
            tags_raw = raw_row[tags_key].strip() if tags_key else ""
            tags = (
                [t.strip() for t in tags_raw.split(",") if t.strip()]
                if tags_raw
                else []
            )

            result.rows.append(
                ImportRow(
                    date=date_val,
                    cents=cents,
                    category=category,
                    merchant=merchant,
                    notes=notes,
                    tags=tags,
                )
            )
        except (ValueError, KeyError) as e:
            result.errors.append(f"Row {line_num}: {e}")

    return result