"""Extract body content from pandoc-generated LaTeX for use with main.tex preamble."""
import re
from pathlib import Path

ROOT = Path(__file__).parent.parent
SRC = ROOT / "paper" / "main_generated.tex"
DST = ROOT / "paper" / "body.tex"

text = SRC.read_text(encoding="utf-8")
start_marker = r"\begin{document}"
end_marker = r"\end{document}"
start = text.find(start_marker)
end = text.find(end_marker)
if start < 0 or end < 0:
    raise SystemExit("document markers not found")

body = text[start + len(start_marker):end].strip()
# Strip pandoc's \bibliography{...} line (main.tex has its own; two conflict in bibtex)
body = re.sub(r"\\bibliography\{[^}]*\}\s*", "", body)

# Strip title/author/maketitle artifacts if pandoc added any
for pattern in [
    r"\\title\{[^}]*\}\n?",
    r"\\author\{[^}]*\}\n?",
    r"\\date\{[^}]*\}\n?",
    r"\\maketitle\n?",
]:
    body = re.sub(pattern, "", body, flags=re.DOTALL)

# Drop everything before \section{Introduction} (removes duplicate title + author block + abstract that main.tex already provides)
intro = body.find(r"\section{Introduction}")
if intro > 0:
    body = body[intro:]

# Tag the Case studies chapter for cross-referencing
body = re.sub(r"\\section\{Case studies of anomaly and fraud detection in blockchain networks\}\\label\{[^}]*\}", r"\\section{Case studies of anomaly and fraud detection in blockchain networks}\\label{sec:ch5}", body)

DST.write_text("% Body content generated from revised.md via pandoc\n" + body, encoding="utf-8")
print(f"body.tex: {DST.stat().st_size} bytes")