"""Compute self-citation ratio for the paper.

Reads:  argv[1] (refs.bib path) or paper/bib/refs.bib
Prints: ratio = self_cites / total_refs; exits 0 if < 0.20, non-zero if >= 0.20
"""
import re
import sys
from pathlib import Path

AUTHORS_OF_SUBMISSION = {
    "osterrieder",
    "chan",
    "chu",
    "zhang",
    "mare",
}
HARD_GATE = 0.20


def parse_bib(path: Path) -> list[str]:
    """Return list of author fields per @entry."""
    text = path.read_text(encoding="utf-8")
    # Split into entries
    entries = re.split(r"\n@", text)
    authors = []
    for e in entries:
        m = re.search(r"author\s*=\s*\{([^}]*)\}", e, re.IGNORECASE)
        if m:
            authors.append(m.group(1).lower())
    return authors


def is_self_cite(author_field: str) -> bool:
    # Strip first names; split on "and"; check last names
    parts = [p.strip() for p in author_field.split(" and ")]
    for p in parts:
        # "Osterrieder, Joerg" or "Joerg Osterrieder"
        surname = p.split(",")[0].strip() if "," in p else p.split()[-1].strip() if p.split() else ""
        surname = re.sub(r"[^a-z]", "", surname.lower())
        if surname in AUTHORS_OF_SUBMISSION:
            return True
    return False


def main() -> None:
    bib_path = Path(sys.argv[1]) if len(sys.argv) > 1 else Path(__file__).parent.parent / "paper" / "bib" / "refs.bib"
    if not bib_path.exists():
        print(f"ERROR: {bib_path} not found")
        sys.exit(2)
    authors_list = parse_bib(bib_path)
    total = len(authors_list)
    if total == 0:
        print("ERROR: no @entries with author fields parsed")
        sys.exit(2)
    self_count = sum(1 for a in authors_list if is_self_cite(a))
    ratio = self_count / total
    print(f"Self-citations: {self_count}/{total} = {ratio:.3f}")
    print(f"Hard gate: {HARD_GATE}")
    if ratio >= HARD_GATE:
        print("GATE 6: BLOCKED")
        sys.exit(1)
    print("GATE 6: PASS")
    sys.exit(0)


if __name__ == "__main__":
    main()
