386 lines
13 KiB
Python
386 lines
13 KiB
Python
#!/usr/bin/env python3
|
|
"""Deterministic pre-pass for sanctum architecture scanner.
|
|
|
|
Extracts structural metadata from a memory agent's sanctum architecture
|
|
that the LLM scanner can use instead of reading all files itself. Covers:
|
|
- SKILL.md content line count (non-blank, non-frontmatter)
|
|
- Template file inventory (which of the 6 standard templates exist)
|
|
- CREED template section inventory
|
|
- BOND template section inventory
|
|
- Capability reference frontmatter fields
|
|
- Init script parameter extraction (SKILL_NAME, TEMPLATE_FILES, EVOLVABLE)
|
|
- First-breath.md section inventory
|
|
- PULSE template presence and sections
|
|
|
|
Only runs for memory agents (agents with assets/ containing template files).
|
|
"""
|
|
|
|
# /// script
|
|
# requires-python = ">=3.9"
|
|
# dependencies = []
|
|
# ///
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import re
|
|
import sys
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
|
|
|
|
STANDARD_TEMPLATES = [
|
|
"INDEX-template.md",
|
|
"PERSONA-template.md",
|
|
"CREED-template.md",
|
|
"BOND-template.md",
|
|
"MEMORY-template.md",
|
|
"CAPABILITIES-template.md",
|
|
]
|
|
|
|
OPTIONAL_TEMPLATES = [
|
|
"PULSE-template.md",
|
|
]
|
|
|
|
CREED_REQUIRED_SECTIONS = [
|
|
"The Sacred Truth",
|
|
"Mission",
|
|
"Core Values",
|
|
"Standing Orders",
|
|
"Philosophy",
|
|
"Boundaries",
|
|
"Anti-Patterns",
|
|
"Dominion",
|
|
]
|
|
|
|
FIRST_BREATH_CALIBRATION_SECTIONS = [
|
|
"Save As You Go",
|
|
"Pacing",
|
|
"Chase What Catches",
|
|
"Absorb Their Voice",
|
|
"Show Your Work",
|
|
"Hear the Silence",
|
|
"The Territories",
|
|
"Wrapping Up",
|
|
]
|
|
|
|
FIRST_BREATH_CONFIG_SECTIONS = [
|
|
"Save As You Go",
|
|
"Discovery",
|
|
"Urgency",
|
|
"Wrapping Up",
|
|
]
|
|
|
|
|
|
def count_content_lines(file_path: Path) -> int:
|
|
"""Count non-blank, non-frontmatter lines in a markdown file."""
|
|
content = file_path.read_text()
|
|
|
|
# Strip frontmatter
|
|
stripped = re.sub(r"^---\s*\n.*?\n---\s*\n", "", content, count=1, flags=re.DOTALL)
|
|
|
|
lines = [line for line in stripped.split("\n") if line.strip()]
|
|
return len(lines)
|
|
|
|
|
|
def extract_h2_h3_sections(file_path: Path) -> list[str]:
|
|
"""Extract H2 and H3 headings from a markdown file."""
|
|
sections = []
|
|
if not file_path.exists():
|
|
return sections
|
|
for line in file_path.read_text().split("\n"):
|
|
match = re.match(r"^#{2,3}\s+(.+)", line)
|
|
if match:
|
|
sections.append(match.group(1).strip())
|
|
return sections
|
|
|
|
|
|
def parse_frontmatter(file_path: Path) -> dict:
|
|
"""Extract YAML frontmatter from a markdown file."""
|
|
meta = {}
|
|
content = file_path.read_text()
|
|
match = re.match(r"^---\s*\n(.*?)\n---", content, re.DOTALL)
|
|
if not match:
|
|
return meta
|
|
for line in match.group(1).strip().split("\n"):
|
|
if ":" in line:
|
|
key, _, value = line.partition(":")
|
|
meta[key.strip()] = value.strip().strip("'\"")
|
|
return meta
|
|
|
|
|
|
def extract_init_script_params(script_path: Path) -> dict:
|
|
"""Extract agent-specific configuration from init-sanctum.py."""
|
|
params = {
|
|
"exists": script_path.exists(),
|
|
"skill_name": None,
|
|
"template_files": [],
|
|
"skill_only_files": [],
|
|
"evolvable": None,
|
|
}
|
|
if not script_path.exists():
|
|
return params
|
|
|
|
content = script_path.read_text()
|
|
|
|
# SKILL_NAME
|
|
match = re.search(r'SKILL_NAME\s*=\s*["\']([^"\']+)["\']', content)
|
|
if match:
|
|
params["skill_name"] = match.group(1)
|
|
|
|
# TEMPLATE_FILES
|
|
tmpl_match = re.search(
|
|
r"TEMPLATE_FILES\s*=\s*\[(.*?)\]", content, re.DOTALL
|
|
)
|
|
if tmpl_match:
|
|
params["template_files"] = re.findall(r'["\']([^"\']+)["\']', tmpl_match.group(1))
|
|
|
|
# SKILL_ONLY_FILES
|
|
only_match = re.search(
|
|
r"SKILL_ONLY_FILES\s*=\s*\{(.*?)\}", content, re.DOTALL
|
|
)
|
|
if only_match:
|
|
params["skill_only_files"] = re.findall(r'["\']([^"\']+)["\']', only_match.group(1))
|
|
|
|
# EVOLVABLE
|
|
ev_match = re.search(r"EVOLVABLE\s*=\s*(True|False)", content)
|
|
if ev_match:
|
|
params["evolvable"] = ev_match.group(1) == "True"
|
|
|
|
return params
|
|
|
|
|
|
def check_section_present(sections: list[str], keyword: str) -> bool:
|
|
"""Check if any section heading contains the keyword (case-insensitive)."""
|
|
keyword_lower = keyword.lower()
|
|
return any(keyword_lower in s.lower() for s in sections)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Pre-pass for sanctum architecture scanner"
|
|
)
|
|
parser.add_argument("skill_path", help="Path to the agent skill directory")
|
|
parser.add_argument(
|
|
"-o", "--output", help="Output JSON file path (default: stdout)"
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
skill_path = Path(args.skill_path).resolve()
|
|
if not skill_path.is_dir():
|
|
print(f"Error: {skill_path} is not a directory", file=sys.stderr)
|
|
sys.exit(2)
|
|
|
|
assets_dir = skill_path / "assets"
|
|
references_dir = skill_path / "references"
|
|
scripts_dir = skill_path / "scripts"
|
|
skill_md = skill_path / "SKILL.md"
|
|
|
|
# Check if this is a memory agent (has template files in assets/)
|
|
is_memory_agent = assets_dir.exists() and any(
|
|
f.name.endswith("-template.md") for f in assets_dir.iterdir() if f.is_file()
|
|
)
|
|
|
|
if not is_memory_agent:
|
|
result = {
|
|
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
"skill_path": str(skill_path),
|
|
"is_memory_agent": False,
|
|
"message": "Not a memory agent — no sanctum templates found in assets/",
|
|
}
|
|
output_json(result, args.output)
|
|
return
|
|
|
|
# SKILL.md analysis
|
|
skill_analysis = {
|
|
"exists": skill_md.exists(),
|
|
"content_lines": count_content_lines(skill_md) if skill_md.exists() else 0,
|
|
"sections": extract_h2_h3_sections(skill_md) if skill_md.exists() else [],
|
|
}
|
|
|
|
# Template inventory
|
|
template_inventory = {}
|
|
for tmpl in STANDARD_TEMPLATES:
|
|
tmpl_path = assets_dir / tmpl
|
|
template_inventory[tmpl] = {
|
|
"exists": tmpl_path.exists(),
|
|
"sections": extract_h2_h3_sections(tmpl_path) if tmpl_path.exists() else [],
|
|
"content_lines": count_content_lines(tmpl_path) if tmpl_path.exists() else 0,
|
|
}
|
|
|
|
for tmpl in OPTIONAL_TEMPLATES:
|
|
tmpl_path = assets_dir / tmpl
|
|
template_inventory[tmpl] = {
|
|
"exists": tmpl_path.exists(),
|
|
"optional": True,
|
|
"sections": extract_h2_h3_sections(tmpl_path) if tmpl_path.exists() else [],
|
|
"content_lines": count_content_lines(tmpl_path) if tmpl_path.exists() else 0,
|
|
}
|
|
|
|
# CREED section check
|
|
creed_path = assets_dir / "CREED-template.md"
|
|
creed_sections = extract_h2_h3_sections(creed_path) if creed_path.exists() else []
|
|
creed_check = {}
|
|
for section in CREED_REQUIRED_SECTIONS:
|
|
creed_check[section] = check_section_present(creed_sections, section)
|
|
|
|
# First-breath analysis
|
|
first_breath_path = references_dir / "first-breath.md"
|
|
fb_sections = extract_h2_h3_sections(first_breath_path) if first_breath_path.exists() else []
|
|
|
|
# Detect style: calibration has "Absorb Their Voice", configuration has "Discovery"
|
|
is_calibration = check_section_present(fb_sections, "Absorb")
|
|
is_configuration = check_section_present(fb_sections, "Discovery") and not is_calibration
|
|
fb_style = "calibration" if is_calibration else ("configuration" if is_configuration else "unknown")
|
|
|
|
expected_sections = (
|
|
FIRST_BREATH_CALIBRATION_SECTIONS if is_calibration else FIRST_BREATH_CONFIG_SECTIONS
|
|
)
|
|
fb_check = {}
|
|
for section in expected_sections:
|
|
fb_check[section] = check_section_present(fb_sections, section)
|
|
|
|
first_breath_analysis = {
|
|
"exists": first_breath_path.exists(),
|
|
"style": fb_style,
|
|
"sections": fb_sections,
|
|
"section_checks": fb_check,
|
|
}
|
|
|
|
# Capability frontmatter scan
|
|
capabilities = []
|
|
if references_dir.exists():
|
|
for md_file in sorted(references_dir.glob("*.md")):
|
|
if md_file.name == "first-breath.md":
|
|
continue
|
|
meta = parse_frontmatter(md_file)
|
|
if meta:
|
|
cap_info = {
|
|
"file": md_file.name,
|
|
"has_name": "name" in meta,
|
|
"has_code": "code" in meta,
|
|
"has_description": "description" in meta,
|
|
"sections": extract_h2_h3_sections(md_file),
|
|
}
|
|
# Check for memory agent patterns
|
|
cap_info["has_memory_integration"] = check_section_present(
|
|
cap_info["sections"], "Memory Integration"
|
|
)
|
|
cap_info["has_after_session"] = check_section_present(
|
|
cap_info["sections"], "After"
|
|
)
|
|
cap_info["has_success"] = check_section_present(
|
|
cap_info["sections"], "Success"
|
|
)
|
|
capabilities.append(cap_info)
|
|
|
|
# Init script analysis
|
|
init_script_path = scripts_dir / "init-sanctum.py"
|
|
init_params = extract_init_script_params(init_script_path)
|
|
|
|
# Cross-check: init TEMPLATE_FILES vs actual templates
|
|
actual_templates = [f.name for f in assets_dir.iterdir() if f.name.endswith("-template.md")] if assets_dir.exists() else []
|
|
init_template_match = set(init_params.get("template_files", [])) == set(actual_templates) if init_params["exists"] else None
|
|
|
|
# Cross-check: init SKILL_NAME vs folder name
|
|
skill_name_match = init_params.get("skill_name") == skill_path.name if init_params["exists"] else None
|
|
|
|
# Findings
|
|
findings = []
|
|
|
|
if skill_analysis["content_lines"] > 40:
|
|
findings.append({
|
|
"severity": "high",
|
|
"file": "SKILL.md",
|
|
"message": f"Bootloader has {skill_analysis['content_lines']} content lines (target: ~30, max: 40)",
|
|
})
|
|
|
|
for tmpl in STANDARD_TEMPLATES:
|
|
if not template_inventory[tmpl]["exists"]:
|
|
findings.append({
|
|
"severity": "critical",
|
|
"file": f"assets/{tmpl}",
|
|
"message": f"Missing standard template: {tmpl}",
|
|
})
|
|
|
|
for section, present in creed_check.items():
|
|
if not present:
|
|
findings.append({
|
|
"severity": "high",
|
|
"file": "assets/CREED-template.md",
|
|
"message": f"Missing required CREED section: {section}",
|
|
})
|
|
|
|
if not first_breath_analysis["exists"]:
|
|
findings.append({
|
|
"severity": "critical",
|
|
"file": "references/first-breath.md",
|
|
"message": "Missing first-breath.md",
|
|
})
|
|
else:
|
|
for section, present in first_breath_analysis["section_checks"].items():
|
|
if not present:
|
|
findings.append({
|
|
"severity": "high",
|
|
"file": "references/first-breath.md",
|
|
"message": f"Missing First Breath section: {section}",
|
|
})
|
|
|
|
if not init_params["exists"]:
|
|
findings.append({
|
|
"severity": "critical",
|
|
"file": "scripts/init-sanctum.py",
|
|
"message": "Missing init-sanctum.py",
|
|
})
|
|
else:
|
|
if skill_name_match is False:
|
|
findings.append({
|
|
"severity": "critical",
|
|
"file": "scripts/init-sanctum.py",
|
|
"message": f"SKILL_NAME mismatch: script has '{init_params['skill_name']}', folder is '{skill_path.name}'",
|
|
})
|
|
if init_template_match is False:
|
|
findings.append({
|
|
"severity": "high",
|
|
"file": "scripts/init-sanctum.py",
|
|
"message": "TEMPLATE_FILES does not match actual templates in assets/",
|
|
})
|
|
|
|
result = {
|
|
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
"skill_path": str(skill_path),
|
|
"is_memory_agent": True,
|
|
"skill_md": skill_analysis,
|
|
"template_inventory": template_inventory,
|
|
"creed_sections": creed_check,
|
|
"first_breath": first_breath_analysis,
|
|
"capabilities": capabilities,
|
|
"init_script": init_params,
|
|
"cross_checks": {
|
|
"skill_name_match": skill_name_match,
|
|
"template_files_match": init_template_match,
|
|
},
|
|
"findings": findings,
|
|
"finding_count": len(findings),
|
|
"critical_count": sum(1 for f in findings if f["severity"] == "critical"),
|
|
"high_count": sum(1 for f in findings if f["severity"] == "high"),
|
|
}
|
|
|
|
output_json(result, args.output)
|
|
|
|
|
|
def output_json(data: dict, output_path: str | None) -> None:
|
|
"""Write JSON to file or stdout."""
|
|
json_str = json.dumps(data, indent=2)
|
|
if output_path:
|
|
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
|
Path(output_path).write_text(json_str + "\n")
|
|
print(f"Wrote: {output_path}", file=sys.stderr)
|
|
else:
|
|
print(json_str)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|