#!/usr/bin/env python3
"""Deterministic path standards scanner for BMad skills.

Validates all .md and .json files against BMad path conventions:
1. {project-root} only valid before /_bmad
2. Bare _bmad references must have {project-root} prefix
3. Config variables used directly (no double-prefix)
4. Skill-internal paths must use ./ prefix (references/, scripts/, assets/)
5. No ../ parent directory references
6. No absolute paths
7. Frontmatter allows only name and description
8. No .md files at skill root except SKILL.md
"""

# /// script
# requires-python = ">=3.9"
# ///

from __future__ import annotations

import argparse
import json
import re
import sys
from datetime import datetime, timezone
from pathlib import Path


# Patterns to detect
# {project-root} NOT followed by /_bmad
PROJECT_ROOT_NOT_BMAD_RE = re.compile(r'\{project-root\}/(?!_bmad)')
# Bare _bmad without {project-root} prefix — match _bmad at word boundary
# but not when preceded by {project-root}/
BARE_BMAD_RE = re.compile(r'(?<!\{project-root\}/)_bmad[/\s]')
# Absolute paths
ABSOLUTE_PATH_RE = re.compile(r'(?:^|[\s"`\'(])(/(?:Users|home|opt|var|tmp|etc|usr)/\S+)', re.MULTILINE)
HOME_PATH_RE = re.compile(r'(?:^|[\s"`\'(])(~/\S+)', re.MULTILINE)
# Parent directory reference (still invalid)
RELATIVE_DOT_RE = re.compile(r'(?:^|[\s"`\'(])(\.\./\S+)', re.MULTILINE)
# Bare skill-internal paths without ./ prefix
# Match references/, scripts/, assets/ when NOT preceded by ./
BARE_INTERNAL_RE = re.compile(r'(?:^|[\s"`\'(])(?<!\./)((?:references|scripts|assets)/\S+)', re.MULTILINE)

# Fenced code block detection (to skip examples showing wrong patterns)
FENCE_RE = re.compile(r'^```', re.MULTILINE)

# Valid frontmatter keys
VALID_FRONTMATTER_KEYS = {'name', 'description'}


def is_in_fenced_block(content: str, pos: int) -> bool:
    """Check if a position is inside a fenced code block."""
    fences = [m.start() for m in FENCE_RE.finditer(content[:pos])]
    # Odd number of fences before pos means we're inside a block
    return len(fences) % 2 == 1


def get_line_number(content: str, pos: int) -> int:
    """Get 1-based line number for a position in content."""
    return content[:pos].count('\n') + 1


def check_frontmatter(content: str, filepath: Path) -> list[dict]:
    """Validate SKILL.md frontmatter contains only allowed keys."""
    findings = []
    if filepath.name != 'SKILL.md':
        return findings

    if not content.startswith('---'):
        findings.append({
            'file': filepath.name,
            'line': 1,
            'severity': 'critical',
            'category': 'frontmatter',
            'title': 'SKILL.md missing frontmatter block',
            'detail': 'SKILL.md must start with --- frontmatter containing name and description',
            'action': 'Add frontmatter with name and description fields',
        })
        return findings

    # Find closing ---
    end = content.find('\n---', 3)
    if end == -1:
        findings.append({
            'file': filepath.name,
            'line': 1,
            'severity': 'critical',
            'category': 'frontmatter',
            'title': 'SKILL.md frontmatter block not closed',
            'detail': 'Missing closing --- for frontmatter',
            'action': 'Add closing --- after frontmatter fields',
        })
        return findings

    frontmatter = content[4:end]
    for i, line in enumerate(frontmatter.split('\n'), start=2):
        line = line.strip()
        if not line or line.startswith('#'):
            continue
        if ':' in line:
            key = line.split(':', 1)[0].strip()
            if key not in VALID_FRONTMATTER_KEYS:
                findings.append({
                    'file': filepath.name,
                    'line': i,
                    'severity': 'high',
                    'category': 'frontmatter',
                    'title': f'Invalid frontmatter key: {key}',
                    'detail': f'Only {", ".join(sorted(VALID_FRONTMATTER_KEYS))} are allowed in frontmatter',
                    'action': f'Remove {key} from frontmatter — use as content field in SKILL.md body instead',
                })

    return findings


def check_root_md_files(skill_path: Path) -> list[dict]:
    """Check that no .md files exist at skill root except SKILL.md."""
    findings = []
    for md_file in skill_path.glob('*.md'):
        if md_file.name != 'SKILL.md':
            findings.append({
                'file': md_file.name,
                'line': 0,
                'severity': 'high',
                'category': 'structure',
                'title': f'Prompt file at skill root: {md_file.name}',
                'detail': 'All progressive disclosure content must be in ./references/ — only SKILL.md belongs at root',
                'action': f'Move {md_file.name} to references/{md_file.name}',
            })
    return findings


def scan_file(filepath: Path, skip_fenced: bool = True) -> list[dict]:
    """Scan a single file for path standard violations."""
    findings = []
    content = filepath.read_text(encoding='utf-8')
    rel_path = filepath.name

    checks = [
        (PROJECT_ROOT_NOT_BMAD_RE, 'project-root-not-bmad', 'critical',
         '{project-root} used for non-_bmad path — only valid use is {project-root}/_bmad/...'),
        (ABSOLUTE_PATH_RE, 'absolute-path', 'high',
         'Absolute path found — not portable across machines'),
        (HOME_PATH_RE, 'absolute-path', 'high',
         'Home directory path (~/) found — environment-specific'),
        (RELATIVE_DOT_RE, 'relative-prefix', 'high',
         'Parent directory reference (../) found — fragile, breaks with reorganization'),
        (BARE_INTERNAL_RE, 'bare-internal-path', 'high',
         'Bare skill-internal path without ./ prefix — use ./references/, ./scripts/, ./assets/ to distinguish from {project-root} paths'),
    ]

    for pattern, category, severity, message in checks:
        for match in pattern.finditer(content):
            pos = match.start()
            if skip_fenced and is_in_fenced_block(content, pos):
                continue
            line_num = get_line_number(content, pos)
            line_content = content.split('\n')[line_num - 1].strip()
            findings.append({
                'file': rel_path,
                'line': line_num,
                'severity': severity,
                'category': category,
                'title': message,
                'detail': line_content[:120],
                'action': '',
            })

    # Bare _bmad check — more nuanced, need to avoid false positives
    # inside {project-root}/_bmad which is correct
    for match in BARE_BMAD_RE.finditer(content):
        pos = match.start()
        if skip_fenced and is_in_fenced_block(content, pos):
            continue
        start = max(0, pos - 30)
        before = content[start:pos]
        if '{project-root}/' in before:
            continue
        line_num = get_line_number(content, pos)
        line_content = content.split('\n')[line_num - 1].strip()
        findings.append({
            'file': rel_path,
            'line': line_num,
            'severity': 'high',
            'category': 'bare-bmad',
            'title': 'Bare _bmad reference without {project-root} prefix',
            'detail': line_content[:120],
            'action': '',
        })

    return findings


def scan_skill(skill_path: Path, skip_fenced: bool = True) -> dict:
    """Scan all .md and .json files in a skill directory."""
    all_findings = []

    # Check for .md files at root that aren't SKILL.md
    all_findings.extend(check_root_md_files(skill_path))

    # Check SKILL.md frontmatter
    skill_md = skill_path / 'SKILL.md'
    if skill_md.exists():
        content = skill_md.read_text(encoding='utf-8')
        all_findings.extend(check_frontmatter(content, skill_md))

    # Find all .md and .json files
    md_files = sorted(list(skill_path.rglob('*.md')) + list(skill_path.rglob('*.json')))
    if not md_files:
        print(f"Warning: No .md or .json files found in {skill_path}", file=sys.stderr)

    files_scanned = []
    for md_file in md_files:
        rel = md_file.relative_to(skill_path)
        files_scanned.append(str(rel))
        file_findings = scan_file(md_file, skip_fenced)
        for f in file_findings:
            f['file'] = str(rel)
        all_findings.extend(file_findings)

    # Build summary
    by_severity = {'critical': 0, 'high': 0, 'medium': 0, 'low': 0}
    by_category = {
        'project_root_not_bmad': 0,
        'bare_bmad': 0,
        'double_prefix': 0,
        'absolute_path': 0,
        'relative_prefix': 0,
        'bare_internal_path': 0,
        'frontmatter': 0,
        'structure': 0,
    }

    for f in all_findings:
        sev = f['severity']
        if sev in by_severity:
            by_severity[sev] += 1
        cat = f['category'].replace('-', '_')
        if cat in by_category:
            by_category[cat] += 1

    return {
        'scanner': 'path-standards',
        'script': 'scan-path-standards.py',
        'version': '2.0.0',
        'skill_path': str(skill_path),
        'timestamp': datetime.now(timezone.utc).isoformat(),
        'files_scanned': files_scanned,
        'status': 'pass' if not all_findings else 'fail',
        'findings': all_findings,
        'assessments': {},
        'summary': {
            'total_findings': len(all_findings),
            'by_severity': by_severity,
            'by_category': by_category,
            'assessment': 'Path standards scan complete',
        },
    }


def main() -> int:
    parser = argparse.ArgumentParser(
        description='Scan BMad skill for path standard violations',
    )
    parser.add_argument(
        'skill_path',
        type=Path,
        help='Path to the skill directory to scan',
    )
    parser.add_argument(
        '--output', '-o',
        type=Path,
        help='Write JSON output to file instead of stdout',
    )
    parser.add_argument(
        '--include-fenced',
        action='store_true',
        help='Also check inside fenced code blocks (by default they are skipped)',
    )
    args = parser.parse_args()

    if not args.skill_path.is_dir():
        print(f"Error: {args.skill_path} is not a directory", file=sys.stderr)
        return 2

    result = scan_skill(args.skill_path, skip_fenced=not args.include_fenced)
    output = json.dumps(result, indent=2)

    if args.output:
        args.output.parent.mkdir(parents=True, exist_ok=True)
        args.output.write_text(output)
        print(f"Results written to {args.output}", file=sys.stderr)
    else:
        print(output)

    return 0 if result['status'] == 'pass' else 1


if __name__ == '__main__':
    sys.exit(main())