Files
Momento/.claude/skills/suno-lyric-transformer/scripts/validate-lyrics.py
Antigravity bd495be965
All checks were successful
Deploy to Production / Build and Deploy (push) Successful in 12s
feat: design system overhaul — sidebar, AI chats, settings, brainstorm, color cleanup
- Sidebar: dynamic brand-accent colors, brainstorm section restyled
- AI chat general: popup panel with expand/collapse, hides when contextual AI open
- AI chat contextual: tabs reordered (Actions first), X close button, height fix
- Settings: all tabs restyled, 6 new color presets (sage, terracotta, iron, etc.)
- Global color cleanup: emerald/orange hardcoded → brand-accent dynamic
- Brainstorm page: orange → brand-accent throughout
- PageEntry animation component added to key pages
- Floating AI button: bg-brand-accent instead of hardcoded black
- i18n: all 15 locales updated with new AI/billing keys
- Billing: freemium quota tracking, BYOK, stripe subscription scaffolding
- Admin: integrated into new design
- AGENTS.md + CLAUDE.md project rules added
2026-05-16 12:59:30 +00:00

428 lines
16 KiB
Python

#!/usr/bin/env python3
# /// script
# requires-python = ">=3.10"
# dependencies = []
# ///
"""Validate transformed lyrics structure for Suno compatibility.
Checks metatag formatting, section structure, blank line separators,
style cue contamination, and reasonable song length.
Usage:
python validate-lyrics.py <lyrics-file-or-text> [options]
# Validate lyrics from a file
python validate-lyrics.py lyrics.txt
# Validate lyrics from stdin
echo "[Verse 1]\\nHello world" | python validate-lyrics.py --stdin
# Validate with text argument
python validate-lyrics.py --text "[Verse 1]\\nHello world"
# Output to file
python validate-lyrics.py lyrics.txt -o results.json
"""
import argparse
import json
import re
import sys
from datetime import datetime, timezone
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent / "_shared"))
from suno_constants import SUNO_LYRICS_HARD_LIMIT, SUNO_LYRICS_QUALITY_BUDGET
SCRIPT_NAME = "validate-lyrics"
VERSION = "1.1.0"
# Valid section metatags (case-insensitive matching)
VALID_SECTIONS = {
"intro", "verse", "verse 1", "verse 2", "verse 3", "verse 4",
"pre-chorus", "chorus", "bridge", "breakdown", "build-up", "buildup",
"final chorus", "outro", "hook", "refrain", "interlude",
"post-chorus", "solo",
# Instrumental / solo variants
"guitar solo", "piano solo", "sax solo", "saxophone solo",
"drum solo", "bass solo", "instrumental",
# Structural tags
"build", "drop", "break", "end",
"fade out", "fade in",
}
# Valid vocal delivery cues (inline metatags, not section tags)
VALID_VOCAL_CUES = {
"harmonized", "hummed", "humming", "whistled", "whistling",
"crooning", "scat", "call and response",
}
# Valid descriptor metatag prefixes
VALID_DESCRIPTORS = {"mood", "energy", "vocal style", "instrument", "tempo", "key"}
# HIGH-confidence standalone bare-bracket tags from metatag-reference.md
# Kept in sync with the "Standalone Mood/Energy Tags" and "Timing & Rhythm Tags" sections.
VALID_STANDALONE_MOODS = {
"uplifting", "haunting", "dark", "nostalgic", "somber", "romantic",
"dreamy", "peaceful", "anxious", "euphoric", "mysterious", "playful",
"epic", "intimate", "bittersweet", "triumphant",
}
VALID_STANDALONE_ENERGY = {
"high energy", "medium energy", "low energy", "chill", "driving",
"explosive", "building", "relaxed", "frantic", "steady",
}
VALID_TIMING_RHYTHM = {
"half-time", "swung feel", "shuffle", "triplet feel", "syncopated",
"straight", "four on the floor", "polyrhythmic", "breakbeat",
}
# Style cues that should NOT be in lyrics
STYLE_CONTAMINATION_PATTERNS = [
r'\b(?:BPM|bpm)\b',
r'\b(?:stereo|mono)\s+(?:field|mix)\b',
r'\b(?:radio[- ]ready|lo[- ]fi|hi[- ]fi)\b',
r'\b(?:punchy|warm|crisp)\s+(?:drums|bass|mix|production)\b',
]
# Reasonable song length bounds (in non-empty, non-tag lines)
MIN_LYRIC_LINES = 8
MAX_LYRIC_LINES = 80
RECOMMENDED_MAX_SECTIONS = 12
def parse_lyrics(text: str) -> dict:
"""Parse lyrics into structured sections with line data."""
lines = text.split('\n')
sections = []
current_section = None
all_tags = []
for i, line in enumerate(lines, 1):
stripped = line.strip()
# Check if this is a metatag
tag_match = re.match(r'^\[([^\]]+)\]$', stripped)
if tag_match:
tag_content = tag_match.group(1).strip()
all_tags.append({"text": tag_content, "line": i})
# Check if it's a descriptor (has a colon)
if ':' in tag_content:
prefix = tag_content.split(':')[0].strip().lower()
if prefix in VALID_DESCRIPTORS:
if current_section is None:
# Global descriptor — fine
pass
# Descriptor attached to current/next section — fine
continue
# Check if it's a section tag
tag_lower = tag_content.lower()
# Strip numbers for matching: "Verse 1" -> "verse 1", but also match base "verse"
is_section = (tag_lower in VALID_SECTIONS or
tag_lower in VALID_VOCAL_CUES or
re.match(r'^(verse|chorus|bridge|breakdown|build-up|buildup|pre-chorus|post-chorus|hook|refrain|interlude|solo|instrumental|break|drop|build|end|fade\s*(?:out|in))\s*\d*$', tag_lower))
if is_section:
current_section = {
"tag": tag_content,
"line": i,
"lyric_lines": [],
"lyric_line_numbers": []
}
sections.append(current_section)
continue
# Non-tag, non-empty line
if stripped:
if current_section:
current_section["lyric_lines"].append(stripped)
current_section["lyric_line_numbers"].append(i)
return {
"sections": sections,
"all_tags": all_tags,
"total_lines": len(lines),
"raw_text": text
}
def validate_lyrics(text: str) -> list[dict]:
"""Validate lyrics text and return findings."""
findings = []
lines = text.split('\n')
if not text.strip():
findings.append({
"severity": "critical",
"category": "structure",
"issue": "Lyrics text is empty.",
"fix": "Provide lyrics with at least one section and content."
})
return findings
parsed = parse_lyrics(text)
sections = parsed["sections"]
# Check for at least one section tag
if not sections:
findings.append({
"severity": "high",
"category": "structure",
"issue": "No section metatags found. Suno uses tags like [Verse], [Chorus] to structure songs.",
"fix": "Add section tags to define song structure."
})
# Check for blank lines between sections
for section in sections:
line_num = section["line"]
if line_num > 1:
prev_line = lines[line_num - 2].strip() if line_num - 1 < len(lines) else ""
if prev_line and not prev_line.startswith('['):
findings.append({
"severity": "medium",
"category": "structure",
"location": {"line": line_num},
"issue": f"No blank line before section tag [{section['tag']}] at line {line_num}.",
"fix": "Add a blank line before each section tag for cleaner Suno parsing."
})
# Check for style cues in lyrics
for i, line in enumerate(lines, 1):
stripped = line.strip()
if not stripped or re.match(r'^\[.*\]$', stripped):
continue
for pattern in STYLE_CONTAMINATION_PATTERNS:
if re.search(pattern, stripped, re.IGNORECASE):
findings.append({
"severity": "high",
"category": "structure",
"location": {"line": i},
"issue": f"Possible style cue in lyrics at line {i}: '{stripped[:60]}...'",
"fix": "Style descriptions belong in the style prompt, not in lyrics."
})
break
# Check for asterisks
for i, line in enumerate(lines, 1):
if '*' in line:
findings.append({
"severity": "medium",
"category": "structure",
"location": {"line": i},
"issue": f"Asterisk found in lyrics at line {i}. Suno doesn't use markdown.",
"fix": "Remove asterisks from lyrics."
})
# Count actual lyric lines (non-empty, non-tag)
lyric_lines = [line.strip() for line in lines if line.strip() and not re.match(r'^\[.*\]$', line.strip())]
lyric_count = len(lyric_lines)
if lyric_count < MIN_LYRIC_LINES:
findings.append({
"severity": "low",
"category": "structure",
"issue": f"Very short lyrics ({lyric_count} lines). May produce a very short song.",
"fix": "Consider adding more content or sections for a full-length song."
})
# Character count check (Suno counts everything including metatags)
char_count = len(text)
if char_count > SUNO_LYRICS_HARD_LIMIT:
findings.append({
"severity": "high",
"category": "structure",
"issue": f"Total character count ({char_count}) exceeds Suno's {SUNO_LYRICS_HARD_LIMIT}-character limit. Suno will truncate your lyrics.",
"fix": "Trim lyrics to stay under 5,000 characters (hard limit). For best quality, aim for ~3,000 characters."
})
elif char_count > SUNO_LYRICS_QUALITY_BUDGET:
findings.append({
"severity": "medium",
"category": "structure",
"issue": f"Total character count ({char_count}) is approaching Suno's {SUNO_LYRICS_HARD_LIMIT}-character limit.",
"fix": "Consider trimming — quality degrades above ~3,000 characters. Hard limit is 5,000."
})
if lyric_count > MAX_LYRIC_LINES:
findings.append({
"severity": "medium",
"category": "structure",
"issue": f"Very long lyrics ({lyric_count} lines). Suno may not render all content.",
"fix": "Consider trimming to a more standard song length (20-50 lyric lines)."
})
# Check section count
if len(sections) > RECOMMENDED_MAX_SECTIONS:
findings.append({
"severity": "low",
"category": "structure",
"issue": f"High section count ({len(sections)}). Songs typically have 6-10 sections.",
"fix": "Consider consolidating sections for a cleaner structure."
})
# Check for invalid metatags
for tag_info in parsed["all_tags"]:
tag_text = tag_info["text"]
tag_lower = tag_text.lower()
# Is it a valid section?
is_section = (tag_lower in VALID_SECTIONS or
re.match(r'^(verse|chorus|bridge|breakdown|build-up|buildup|pre-chorus|post-chorus|hook|refrain|interlude|solo|instrumental|break|drop|build|end|fade\s*(?:out|in))\s*\d*$', tag_lower))
# Is it a valid vocal delivery cue?
is_vocal_cue = tag_lower in VALID_VOCAL_CUES
# Is it a valid descriptor?
is_descriptor = ':' in tag_text and tag_text.split(':')[0].strip().lower() in VALID_DESCRIPTORS
# Is it a HIGH-confidence standalone mood/energy/rhythm tag from metatag-reference.md?
is_standalone = (tag_lower in VALID_STANDALONE_MOODS or
tag_lower in VALID_STANDALONE_ENERGY or
tag_lower in VALID_TIMING_RHYTHM)
if not is_section and not is_vocal_cue and not is_descriptor and not is_standalone:
findings.append({
"severity": "low",
"category": "consistency",
"location": {"line": tag_info["line"]},
"issue": f"Unrecognized metatag [{tag_text}] at line {tag_info['line']}. May not be interpreted by Suno.",
"fix": "Use standard section tags or descriptor tags (Mood/Energy/Vocal Style/Instrument)."
})
# Punctuation density check
for i, line in enumerate(lines, 1):
stripped = line.strip()
if not stripped or re.match(r'^\[.*\]$', stripped):
continue
words = stripped.split()
word_count = len(words)
if word_count == 0:
continue
# Count commas, dashes, semicolons, colons, ellipses
punct_count = (
stripped.count(',') + stripped.count('-') + stripped.count(';')
+ stripped.count(':') + stripped.count('...')
)
density = punct_count / word_count
if density > 0.5:
findings.append({
"severity": "low",
"category": "rhythm",
"location": {"line": i},
"issue": f"Heavy punctuation density ({density:.2f}) at line {i}: '{stripped[:60]}'. Heavy punctuation can confuse Suno's cadence.",
"fix": "Simplify punctuation to let Suno interpret natural phrasing."
})
# Check for empty sections
for section in sections:
if not section["lyric_lines"]:
findings.append({
"severity": "low",
"category": "structure",
"location": {"line": section["line"]},
"issue": f"Empty section [{section['tag']}] at line {section['line']}.",
"fix": "Add lyrics to this section or remove the tag if it's meant to be instrumental."
})
return findings
def build_report(findings: list, text: str, skill_path: str = "") -> dict:
"""Build the standard output report."""
for f in findings:
if "location" not in f:
f["location"] = {"file": "lyrics"}
severity_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0, "info": 0}
for f in findings:
severity_counts[f["severity"]] = severity_counts.get(f["severity"], 0) + 1
status = "pass"
if severity_counts["critical"] > 0:
status = "fail"
elif severity_counts["high"] > 0:
status = "warning"
parsed = parse_lyrics(text)
lyric_lines = [line.strip() for line in text.split('\n')
if line.strip() and not re.match(r'^\[.*\]$', line.strip())]
return {
"script": SCRIPT_NAME,
"version": VERSION,
"skill_path": skill_path,
"timestamp": datetime.now(timezone.utc).isoformat(),
"status": status,
"metrics": {
"total_lines": parsed["total_lines"],
"lyric_lines": len(lyric_lines),
"character_count": len(text),
"section_count": len(parsed["sections"]),
"sections": [s["tag"] for s in parsed["sections"]]
},
"findings": findings,
"summary": {
"total": len(findings),
**severity_counts
}
}
def main():
parser = argparse.ArgumentParser(
description="Validate transformed lyrics structure for Suno compatibility.",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
%(prog)s lyrics.txt
%(prog)s --text "[Verse 1]\\nHello world"
%(prog)s --stdin < lyrics.txt
%(prog)s lyrics.txt -o results.json --verbose
Exit codes: 0=pass, 1=fail/warning, 2=error
"""
)
parser.add_argument("file", nargs="?", help="Path to lyrics text file")
parser.add_argument("--text", help="Lyrics text to validate directly")
parser.add_argument("--stdin", action="store_true", help="Read lyrics from stdin")
parser.add_argument("-o", "--output", help="Output file path (defaults to stdout)")
parser.add_argument("--verbose", action="store_true", help="Print diagnostics to stderr")
parser.add_argument("--skill-path", default="", help="Skill path for report context")
args = parser.parse_args()
text = ""
if args.text is not None:
text = args.text.replace('\\n', '\n')
elif args.stdin:
text = sys.stdin.read()
elif args.file:
file_path = Path(args.file)
if not file_path.exists():
print(f"Error: File not found: {args.file}", file=sys.stderr)
sys.exit(2)
text = file_path.read_text()
else:
parser.print_help()
sys.exit(2)
if args.verbose:
print(f"Validating lyrics ({len(text)} chars, {len(text.splitlines())} lines)...", file=sys.stderr)
findings = validate_lyrics(text)
report = build_report(findings, text, args.skill_path)
output_json = json.dumps(report, indent=2)
if args.output:
Path(args.output).write_text(output_json)
if args.verbose:
print(f"Report written to {args.output}", file=sys.stderr)
else:
print(output_json)
sys.exit(0 if report["status"] == "pass" else 1)
if __name__ == "__main__":
main()