#!/usr/bin/env python3
"""Deterministic pre-pass for sanctum architecture scanner.

Extracts structural metadata from a memory agent's sanctum architecture
that the LLM scanner can use instead of reading all files itself. Covers:
- SKILL.md content line count (non-blank, non-frontmatter)
- Template file inventory (which of the 6 standard templates exist)
- CREED template section inventory
- BOND template section inventory
- Capability reference frontmatter fields
- Init script parameter extraction (SKILL_NAME, TEMPLATE_FILES, EVOLVABLE)
- First-breath.md section inventory
- PULSE template presence and sections

Only runs for memory agents (agents with assets/ containing template files).
"""

# /// script
# requires-python = ">=3.9"
# dependencies = []
# ///

from __future__ import annotations

import argparse
import json
import re
import sys
from datetime import datetime, timezone
from pathlib import Path


STANDARD_TEMPLATES = [
    "INDEX-template.md",
    "PERSONA-template.md",
    "CREED-template.md",
    "BOND-template.md",
    "MEMORY-template.md",
    "CAPABILITIES-template.md",
]

OPTIONAL_TEMPLATES = [
    "PULSE-template.md",
]

CREED_REQUIRED_SECTIONS = [
    "The Sacred Truth",
    "Mission",
    "Core Values",
    "Standing Orders",
    "Philosophy",
    "Boundaries",
    "Anti-Patterns",
    "Dominion",
]

FIRST_BREATH_CALIBRATION_SECTIONS = [
    "Save As You Go",
    "Pacing",
    "Chase What Catches",
    "Absorb Their Voice",
    "Show Your Work",
    "Hear the Silence",
    "The Territories",
    "Wrapping Up",
]

FIRST_BREATH_CONFIG_SECTIONS = [
    "Save As You Go",
    "Discovery",
    "Urgency",
    "Wrapping Up",
]


def count_content_lines(file_path: Path) -> int:
    """Count non-blank, non-frontmatter lines in a markdown file."""
    content = file_path.read_text()

    # Strip frontmatter
    stripped = re.sub(r"^---\s*\n.*?\n---\s*\n", "", content, count=1, flags=re.DOTALL)

    lines = [line for line in stripped.split("\n") if line.strip()]
    return len(lines)


def extract_h2_h3_sections(file_path: Path) -> list[str]:
    """Extract H2 and H3 headings from a markdown file."""
    sections = []
    if not file_path.exists():
        return sections
    for line in file_path.read_text().split("\n"):
        match = re.match(r"^#{2,3}\s+(.+)", line)
        if match:
            sections.append(match.group(1).strip())
    return sections


def parse_frontmatter(file_path: Path) -> dict:
    """Extract YAML frontmatter from a markdown file."""
    meta = {}
    content = file_path.read_text()
    match = re.match(r"^---\s*\n(.*?)\n---", content, re.DOTALL)
    if not match:
        return meta
    for line in match.group(1).strip().split("\n"):
        if ":" in line:
            key, _, value = line.partition(":")
            meta[key.strip()] = value.strip().strip("'\"")
    return meta


def extract_init_script_params(script_path: Path) -> dict:
    """Extract agent-specific configuration from init-sanctum.py."""
    params = {
        "exists": script_path.exists(),
        "skill_name": None,
        "template_files": [],
        "skill_only_files": [],
        "evolvable": None,
    }
    if not script_path.exists():
        return params

    content = script_path.read_text()

    # SKILL_NAME
    match = re.search(r'SKILL_NAME\s*=\s*["\']([^"\']+)["\']', content)
    if match:
        params["skill_name"] = match.group(1)

    # TEMPLATE_FILES
    tmpl_match = re.search(
        r"TEMPLATE_FILES\s*=\s*\[(.*?)\]", content, re.DOTALL
    )
    if tmpl_match:
        params["template_files"] = re.findall(r'["\']([^"\']+)["\']', tmpl_match.group(1))

    # SKILL_ONLY_FILES
    only_match = re.search(
        r"SKILL_ONLY_FILES\s*=\s*\{(.*?)\}", content, re.DOTALL
    )
    if only_match:
        params["skill_only_files"] = re.findall(r'["\']([^"\']+)["\']', only_match.group(1))

    # EVOLVABLE
    ev_match = re.search(r"EVOLVABLE\s*=\s*(True|False)", content)
    if ev_match:
        params["evolvable"] = ev_match.group(1) == "True"

    return params


def check_section_present(sections: list[str], keyword: str) -> bool:
    """Check if any section heading contains the keyword (case-insensitive)."""
    keyword_lower = keyword.lower()
    return any(keyword_lower in s.lower() for s in sections)


def main():
    parser = argparse.ArgumentParser(
        description="Pre-pass for sanctum architecture scanner"
    )
    parser.add_argument("skill_path", help="Path to the agent skill directory")
    parser.add_argument(
        "-o", "--output", help="Output JSON file path (default: stdout)"
    )
    args = parser.parse_args()

    skill_path = Path(args.skill_path).resolve()
    if not skill_path.is_dir():
        print(f"Error: {skill_path} is not a directory", file=sys.stderr)
        sys.exit(2)

    assets_dir = skill_path / "assets"
    references_dir = skill_path / "references"
    scripts_dir = skill_path / "scripts"
    skill_md = skill_path / "SKILL.md"

    # Check if this is a memory agent (has template files in assets/)
    is_memory_agent = assets_dir.exists() and any(
        f.name.endswith("-template.md") for f in assets_dir.iterdir() if f.is_file()
    )

    if not is_memory_agent:
        result = {
            "timestamp": datetime.now(timezone.utc).isoformat(),
            "skill_path": str(skill_path),
            "is_memory_agent": False,
            "message": "Not a memory agent — no sanctum templates found in assets/",
        }
        output_json(result, args.output)
        return

    # SKILL.md analysis
    skill_analysis = {
        "exists": skill_md.exists(),
        "content_lines": count_content_lines(skill_md) if skill_md.exists() else 0,
        "sections": extract_h2_h3_sections(skill_md) if skill_md.exists() else [],
    }

    # Template inventory
    template_inventory = {}
    for tmpl in STANDARD_TEMPLATES:
        tmpl_path = assets_dir / tmpl
        template_inventory[tmpl] = {
            "exists": tmpl_path.exists(),
            "sections": extract_h2_h3_sections(tmpl_path) if tmpl_path.exists() else [],
            "content_lines": count_content_lines(tmpl_path) if tmpl_path.exists() else 0,
        }

    for tmpl in OPTIONAL_TEMPLATES:
        tmpl_path = assets_dir / tmpl
        template_inventory[tmpl] = {
            "exists": tmpl_path.exists(),
            "optional": True,
            "sections": extract_h2_h3_sections(tmpl_path) if tmpl_path.exists() else [],
            "content_lines": count_content_lines(tmpl_path) if tmpl_path.exists() else 0,
        }

    # CREED section check
    creed_path = assets_dir / "CREED-template.md"
    creed_sections = extract_h2_h3_sections(creed_path) if creed_path.exists() else []
    creed_check = {}
    for section in CREED_REQUIRED_SECTIONS:
        creed_check[section] = check_section_present(creed_sections, section)

    # First-breath analysis
    first_breath_path = references_dir / "first-breath.md"
    fb_sections = extract_h2_h3_sections(first_breath_path) if first_breath_path.exists() else []

    # Detect style: calibration has "Absorb Their Voice", configuration has "Discovery"
    is_calibration = check_section_present(fb_sections, "Absorb")
    is_configuration = check_section_present(fb_sections, "Discovery") and not is_calibration
    fb_style = "calibration" if is_calibration else ("configuration" if is_configuration else "unknown")

    expected_sections = (
        FIRST_BREATH_CALIBRATION_SECTIONS if is_calibration else FIRST_BREATH_CONFIG_SECTIONS
    )
    fb_check = {}
    for section in expected_sections:
        fb_check[section] = check_section_present(fb_sections, section)

    first_breath_analysis = {
        "exists": first_breath_path.exists(),
        "style": fb_style,
        "sections": fb_sections,
        "section_checks": fb_check,
    }

    # Capability frontmatter scan
    capabilities = []
    if references_dir.exists():
        for md_file in sorted(references_dir.glob("*.md")):
            if md_file.name == "first-breath.md":
                continue
            meta = parse_frontmatter(md_file)
            if meta:
                cap_info = {
                    "file": md_file.name,
                    "has_name": "name" in meta,
                    "has_code": "code" in meta,
                    "has_description": "description" in meta,
                    "sections": extract_h2_h3_sections(md_file),
                }
                # Check for memory agent patterns
                cap_info["has_memory_integration"] = check_section_present(
                    cap_info["sections"], "Memory Integration"
                )
                cap_info["has_after_session"] = check_section_present(
                    cap_info["sections"], "After"
                )
                cap_info["has_success"] = check_section_present(
                    cap_info["sections"], "Success"
                )
                capabilities.append(cap_info)

    # Init script analysis
    init_script_path = scripts_dir / "init-sanctum.py"
    init_params = extract_init_script_params(init_script_path)

    # Cross-check: init TEMPLATE_FILES vs actual templates
    actual_templates = [f.name for f in assets_dir.iterdir() if f.name.endswith("-template.md")] if assets_dir.exists() else []
    init_template_match = set(init_params.get("template_files", [])) == set(actual_templates) if init_params["exists"] else None

    # Cross-check: init SKILL_NAME vs folder name
    skill_name_match = init_params.get("skill_name") == skill_path.name if init_params["exists"] else None

    # Findings
    findings = []

    if skill_analysis["content_lines"] > 40:
        findings.append({
            "severity": "high",
            "file": "SKILL.md",
            "message": f"Bootloader has {skill_analysis['content_lines']} content lines (target: ~30, max: 40)",
        })

    for tmpl in STANDARD_TEMPLATES:
        if not template_inventory[tmpl]["exists"]:
            findings.append({
                "severity": "critical",
                "file": f"assets/{tmpl}",
                "message": f"Missing standard template: {tmpl}",
            })

    for section, present in creed_check.items():
        if not present:
            findings.append({
                "severity": "high",
                "file": "assets/CREED-template.md",
                "message": f"Missing required CREED section: {section}",
            })

    if not first_breath_analysis["exists"]:
        findings.append({
            "severity": "critical",
            "file": "references/first-breath.md",
            "message": "Missing first-breath.md",
        })
    else:
        for section, present in first_breath_analysis["section_checks"].items():
            if not present:
                findings.append({
                    "severity": "high",
                    "file": "references/first-breath.md",
                    "message": f"Missing First Breath section: {section}",
                })

    if not init_params["exists"]:
        findings.append({
            "severity": "critical",
            "file": "scripts/init-sanctum.py",
            "message": "Missing init-sanctum.py",
        })
    else:
        if skill_name_match is False:
            findings.append({
                "severity": "critical",
                "file": "scripts/init-sanctum.py",
                "message": f"SKILL_NAME mismatch: script has '{init_params['skill_name']}', folder is '{skill_path.name}'",
            })
        if init_template_match is False:
            findings.append({
                "severity": "high",
                "file": "scripts/init-sanctum.py",
                "message": "TEMPLATE_FILES does not match actual templates in assets/",
            })

    result = {
        "timestamp": datetime.now(timezone.utc).isoformat(),
        "skill_path": str(skill_path),
        "is_memory_agent": True,
        "skill_md": skill_analysis,
        "template_inventory": template_inventory,
        "creed_sections": creed_check,
        "first_breath": first_breath_analysis,
        "capabilities": capabilities,
        "init_script": init_params,
        "cross_checks": {
            "skill_name_match": skill_name_match,
            "template_files_match": init_template_match,
        },
        "findings": findings,
        "finding_count": len(findings),
        "critical_count": sum(1 for f in findings if f["severity"] == "critical"),
        "high_count": sum(1 for f in findings if f["severity"] == "high"),
    }

    output_json(result, args.output)


def output_json(data: dict, output_path: str | None) -> None:
    """Write JSON to file or stdout."""
    json_str = json.dumps(data, indent=2)
    if output_path:
        Path(output_path).parent.mkdir(parents=True, exist_ok=True)
        Path(output_path).write_text(json_str + "\n")
        print(f"Wrote: {output_path}", file=sys.stderr)
    else:
        print(json_str)


if __name__ == "__main__":
    main()