#!/usr/bin/env python3
"""
email_rules.py - Sender classification based on EMAIL-RULES.md

Tiers:
  IMPORTANT              -- always notify, flagged at top
  PRIORITY               -- notify once per msg_id
  CORRESPONDENCE         -- notify
  COMMERCE_NOTIFY        -- notify (delivery/invoice/action emails)
  COMMERCE_SILENT        -- discard silently
  CORRESPONDENCE_SILENT  -- discard silently
  SPAM                   -- discard silently
  UNKNOWN                -- ping EMAILS group once per sender, then silent

Accounts: 'gmail' (covers adalsey + krspamgang) | 'proton'

Global override: "[action needed]" in subject -> always notify, any tier
"""

import re

IMPORTANT             = 'IMPORTANT'
PRIORITY              = 'PRIORITY'
CORRESPONDENCE        = 'CORRESPONDENCE'
COMMERCE_NOTIFY       = 'COMMERCE_NOTIFY'
COMMERCE_SILENT       = 'COMMERCE_SILENT'
CORRESPONDENCE_SILENT = 'CORRESPONDENCE_SILENT'
SPAM                  = 'SPAM'
UNKNOWN               = 'UNKNOWN'

SILENT_TIERS = {SPAM, COMMERCE_SILENT, CORRESPONDENCE_SILENT}
NOTIFY_TIERS = {IMPORTANT, PRIORITY, CORRESPONDENCE, COMMERCE_NOTIFY}


def _has_any(text, keywords):
    t = text.lower()
    return any(kw in t for kw in keywords)

DELIVERY_KEYWORDS = [
    'tracking', 'has shipped', 'out for delivery', 'delivered', 'delivery',
    'shipping confirmation', 'your package', 'package has been', 'estimated delivery',
    'will be delivered', 'shipment',
]
INVOICE_KEYWORDS = [
    'invoice', 'payment due', 'payment is due', 'amount due', 'balance due', 'receipt',
    'refund', 'charge of', 'payment required', 'bill is ready', 'statement',
]
ORDER_KEYWORDS = [
    'order', 'shipping', 'shipped', 'delivery', 'tracking', 'confirmed', 'receipt',
    'purchase',
]
ALERT_KEYWORDS = [
    'alert', 'unusual activity', 'sign-in attempt', 'login attempt', 'security',
    'verify your account', 'suspicious', 'unauthorized',
]
PAYMENT_DUE_KEYWORDS = [
    'payment due', 'payment is due', 'amount owed', 'past due', 'overdue', 'balance due',
    'bill due',
]


def _cond_delivery_or_invoice(subject, body):
    text = subject + ' ' + (body or '')
    if _has_any(text, DELIVERY_KEYWORDS) or _has_any(text, INVOICE_KEYWORDS):
        return COMMERCE_NOTIFY
    return COMMERCE_SILENT

def _cond_order_or_shipping(subject, body):
    text = subject + ' ' + (body or '')
    if _has_any(text, ORDER_KEYWORDS):
        return COMMERCE_NOTIFY
    return COMMERCE_SILENT

def _cond_spam_but_alert(subject, body):
    text = subject + ' ' + (body or '')
    if _has_any(text, ALERT_KEYWORDS):
        return COMMERCE_NOTIFY
    return SPAM

def _cond_spam_but_payment_due(subject, body):
    if _has_any(subject, PAYMENT_DUE_KEYWORDS):
        return COMMERCE_NOTIFY
    return SPAM


# Exact email rules: (email, tier_or_fn, account)
# account: None=both, 'gmail'=gmail only, 'proton'=proton only
_EXACT_LIST = [
    # GMAIL IMPORTANT
    ('david.r.art@gmail.com',                                   IMPORTANT,                  'gmail'),
    ('jklimek@harrisbeachmurtha.com',                           IMPORTANT,                  'gmail'),
    ('mah.investor@gmail.com',                                  IMPORTANT,                  None),
    # PROTON IMPORTANT
    ('vtrendano@gmail.com',                                     IMPORTANT,                  'proton'),
    ('search-api@brave.com',                                    IMPORTANT,                  'proton'),
    # GMAIL PRIORITY
    ('chrissydags@gmail.com',                                   PRIORITY,                   'gmail'),
    # GMAIL COMMERCE_NOTIFY
    ('search-api@brave.com',                                    COMMERCE_NOTIFY,            'gmail'),
    ('info@em.aspendental.com',                                 COMMERCE_NOTIFY,            'gmail'),
    # Pam Cole - conditional, both accounts
    ('pam.cole@peaktech.com',                                   _cond_delivery_or_invoice,  None),
    # Disturbed Dimensions ProtonMail specific address
    ('store+45652181148@t.shopifyemail.com',                    _cond_order_or_shipping,    'proton'),
    # GMAIL CORRESPONDENCE
    ('ehuth@brilliantearth.com',                                CORRESPONDENCE,             'gmail'),
    ('rajesh.bhawnani@brilliantearth.com',                      CORRESPONDENCE,             'gmail'),
    # CORRESPONDENCE_SILENT both
    ('victoriastull@pm.me',                                     CORRESPONDENCE_SILENT,      None),
    # COMMERCE_SILENT both
    ('hello@1password.com',                                     COMMERCE_SILENT,            None),
    ('ea-donotreply@anthem.com',                                COMMERCE_SILENT,            None),
    ('coastalpayroll@myisolved.com',                            COMMERCE_SILENT,            None),
    ('specials@e.uline.com',                                    COMMERCE_SILENT,            None),
    ('invoice+statements@mail.anthropic.com',                   COMMERCE_SILENT,            'gmail'),
    # GMAIL SPAM exact
    ('onlinebanking@ealerts.bankofamerica.com',                 _cond_spam_but_alert,       'gmail'),
    ('bankofamericatransfers@mail.transfers.bankofamerica.com', SPAM,                       None),
    ('calendar-notification@google.com',                        SPAM,                       'gmail'),
    ('verify@x.com',                                            SPAM,                       'gmail'),
    ('no_reply@monday.com',                                     SPAM,                       'gmail'),
    ('sales@trueterpenes.com',                                  SPAM,                       None),
    ('marketing@udio.com',                                      SPAM,                       None),
    ('email@email.shopify.com',                                 SPAM,                       None),
    ('applecreatorstudio@insideapple.apple.com',                SPAM,                       None),
    ('duthie_orthodontics.sr@e.smilereminder.com',              SPAM,                       'gmail'),
    ('replies@mail.public.com',                                 SPAM,                       'gmail'),
    # PROTON SPAM exact
    ('will@insureinct.com',                                     SPAM,                       'proton'),
    ('evan.mcmenamin@adp.com',                                  SPAM,                       'proton'),
    ('rundonotreply@adp.com',                                   SPAM,                       'proton'),
    ('noreply@email.openai.com',                                SPAM,                       'proton'),
    ('noreply@billing.nyseg.com',                               _cond_spam_but_payment_due, 'proton'),
    ('support@info.printful.com',                               SPAM,                       'proton'),
    ('vzwmail@ecrmemail.verizonwireless.com',                   SPAM,                       'proton'),
    ('evolvingai@mail.beehiiv.com',                             SPAM,                       'proton'),
    ('noreply@h5.hilton.com',                                   SPAM,                       'proton'),
]

# Domain rules: (domain, tier_or_fn, account)
_DOMAIN_LIST = [
    # GMAIL SPAM domains
    ('mroutsidepro.com',             SPAM,                       'gmail'),
    ('pioneervalleystorage.com',     SPAM,                       'gmail'),
    ('mail.petsmart.com',            SPAM,                       'gmail'),
    ('mail.traxnyc.com',             SPAM,                       'gmail'),
    ('imktg.menswearhouse.com',      SPAM,                       'gmail'),
    ('talkingbass.net',              SPAM,                       'gmail'),
    ('uber.com',                     SPAM,                       'gmail'),
    ('engage.canva.com',             SPAM,                       'gmail'),
    ('eff.org',                      SPAM,                       'gmail'),
    ('mail.public.com',              SPAM,                       'gmail'),
    ('quora.com',                    SPAM,                       'gmail'),
    ('learn.mail.monday.com',        SPAM,                       None),
    ('shared1.ccsend.com',           SPAM,                       'gmail'),
    ('e.smilereminder.com',          SPAM,                       'gmail'),
    ('beehiiv.com',                  SPAM,                       'gmail'),
    ('insideapple.apple.com',        SPAM,                       None),
    # Hilton spam both
    ('h5.hilton.com',                SPAM,                       None),
    ('hiltongrandvacations.com',     SPAM,                       None),
    # Verizon B2B both
    ('b2binfo.verizonwireless.com',  SPAM,                       None),
    ('insureinct.com',               SPAM,                       None),
    # PROTON SPAM domains
    ('message.globalindustrial.com', SPAM,                       'proton'),
    ('ccsend.com',                   SPAM,                       'proton'),
    ('unsub.beehiiv.com',            SPAM,                       'proton'),
    ('jiffylubespecials.com',        SPAM,                       None),
    # Disturbed Dimensions gmail (shopify domain, order/shipping only)
    ('t.shopifyemail.com',           _cond_order_or_shipping,    'gmail'),
    # Anthropic mail domain gmail only (global [action needed] override applies separately)
    ('mail.anthropic.com',           COMMERCE_SILENT,            'gmail'),
]

# Build lookup maps
_EXACT_MAP = {}
for _e, _t, _a in _EXACT_LIST:
    _EXACT_MAP.setdefault(_e, []).append((_t, _a))

_DOMAIN_MAP = {}
for _d, _t, _a in _DOMAIN_LIST:
    _DOMAIN_MAP.setdefault(_d, []).append((_t, _a))

# Domain suffix rules (highest priority after .gov/.edu)
_SUFFIX_RULES = [
    ('.gov', IMPORTANT, None),
    ('.edu', IMPORTANT, None),
]


def extract_email_address(sender):
    m = re.search(r'<([^>]+)>', sender)
    if m:
        return m.group(1).lower().strip()
    return sender.lower().strip()


def get_domain(email_addr):
    parts = email_addr.split('@')
    return parts[1] if len(parts) == 2 else ''


def _resolve(tier_or_fn, subject, body):
    if callable(tier_or_fn):
        return tier_or_fn(subject, body)
    return tier_or_fn


def _pick(candidates, account, subject, body):
    for tier_or_fn, rule_acct in candidates:
        if rule_acct is None or rule_acct == account:
            return _resolve(tier_or_fn, subject, body)
    return None


def classify(sender_raw, subject, body, account):
    """
    Classify an email for the given account.

    Args:
        sender_raw: raw From: header
        subject:    subject line
        body:       plain text body
        account:    'gmail' or 'proton'

    Returns:
        tier string (IMPORTANT, PRIORITY, CORRESPONDENCE, COMMERCE_NOTIFY,
                     COMMERCE_SILENT, CORRESPONDENCE_SILENT, SPAM, UNKNOWN)
    """
    email_addr = extract_email_address(sender_raw)
    domain = get_domain(email_addr)

    # 1. Suffix rules (.gov, .edu)
    for suffix, tier, rule_acct in _SUFFIX_RULES:
        if domain.endswith(suffix):
            if rule_acct is None or rule_acct == account:
                return tier

    # 2. Exact email match
    if email_addr in _EXACT_MAP:
        result = _pick(_EXACT_MAP[email_addr], account, subject, body)
        if result is not None:
            return result

    # 3. Exact domain match
    if domain in _DOMAIN_MAP:
        result = _pick(_DOMAIN_MAP[domain], account, subject, body)
        if result is not None:
            return result

    # 4. Domain suffix match (e.g. sub.spam.com matches spam.com)
    for rule_domain, candidates in _DOMAIN_MAP.items():
        if domain != rule_domain and domain.endswith('.' + rule_domain):
            result = _pick(candidates, account, subject, body)
            if result is not None:
                return result

    return UNKNOWN


def get_display_name(sender_raw):
    m = re.match(r'^"?([^"<]+?)"?\s*<', sender_raw.strip())
    if m:
        return m.group(1).strip()
    return extract_email_address(sender_raw)


def subject_action_override(subject):
    """Return True if subject contains [action needed] -- global notify override."""
    return '[action needed]' in subject.lower()