import csv from dataclasses import dataclass, field from datetime import date from pathlib import Path from common_cents.money import parse_cents_csv REQUIRED_COLS = {"DATE", "CENTS", "CATEGORY", "MERCHANT"} SUGGESTED_COLS = {"NOTES", "TAGS"} @dataclass class ImportRow: date: str cents: int category: str merchant: str notes: str | None tags: list[str] @dataclass class ParseResult: rows: list[ImportRow] = field(default_factory=list) warnings: list[str] = field(default_factory=list) errors: list[str] = field(default_factory=list) def parse_csv(path: Path) -> ParseResult: result = ParseResult() try: text = path.read_text(encoding="utf-8-sig") except OSError as e: result.errors.append(str(e)) return result reader = csv.DictReader(text.splitlines()) if reader.fieldnames is None: result.errors.append("File is empty or has no header row.") return result headers = {h.strip().upper() for h in reader.fieldnames} missing_required = REQUIRED_COLS - headers if missing_required: result.errors.append( f"Missing required columns: {', '.join(sorted(missing_required))}" ) return result missing_suggested = SUGGESTED_COLS - headers if missing_suggested: result.warnings.append( f"Optional columns not found: {', '.join(sorted(missing_suggested))}. " "Adding notes and tags to your spending records is recommended." ) extra_cols = headers - REQUIRED_COLS - SUGGESTED_COLS if extra_cols: result.warnings.append( f"Unknown columns will be ignored: {', '.join(sorted(extra_cols))}." ) # Build normalised header map: upper-stripped → original fieldname header_map = {h.strip().upper(): h for h in reader.fieldnames} for line_num, raw_row in enumerate(reader, start=2): try: date_val = raw_row[header_map["DATE"]].strip() cents_raw = raw_row[header_map["CENTS"]].strip() category = raw_row[header_map["CATEGORY"]].strip() merchant = raw_row[header_map["MERCHANT"]].strip() if not date_val or not cents_raw or not category: result.errors.append( f"Row {line_num}: date, cents, and category are required." ) continue try: date.fromisoformat(date_val) except ValueError: result.errors.append( f"Row {line_num}: invalid date '{date_val}' — use YYYY-MM-DD." ) continue cents = parse_cents_csv(cents_raw) if cents <= 0: result.errors.append( f"Row {line_num}: cents must be a positive number." ) continue notes_key = header_map.get("NOTES") notes = raw_row[notes_key].strip() or None if notes_key else None tags_key = header_map.get("TAGS") tags_raw = raw_row[tags_key].strip() if tags_key else "" tags = ( [t.strip() for t in tags_raw.split(",") if t.strip()] if tags_raw else [] ) result.rows.append( ImportRow( date=date_val, cents=cents, category=category, merchant=merchant, notes=notes, tags=tags, ) ) except (ValueError, KeyError) as e: result.errors.append(f"Row {line_num}: {e}") return result