All checks were successful
Deploy to Production / Build and Deploy (push) Successful in 12s
- Sidebar: dynamic brand-accent colors, brainstorm section restyled - AI chat general: popup panel with expand/collapse, hides when contextual AI open - AI chat contextual: tabs reordered (Actions first), X close button, height fix - Settings: all tabs restyled, 6 new color presets (sage, terracotta, iron, etc.) - Global color cleanup: emerald/orange hardcoded → brand-accent dynamic - Brainstorm page: orange → brand-accent throughout - PageEntry animation component added to key pages - Floating AI button: bg-brand-accent instead of hardcoded black - i18n: all 15 locales updated with new AI/billing keys - Billing: freemium quota tracking, BYOK, stripe subscription scaffolding - Admin: integrated into new design - AGENTS.md + CLAUDE.md project rules added
361 lines
11 KiB
Python
361 lines
11 KiB
Python
#!/usr/bin/env python3
|
|
# /// script
|
|
# requires-python = ">=3.10"
|
|
# dependencies = ["librosa>=0.10", "numpy>=1.24"]
|
|
# ///
|
|
"""Deep audio analysis -- chord progression, energy over time, spectral features,
|
|
section boundaries, and harmonic/percussive separation analysis.
|
|
|
|
Usage:
|
|
python audio-deep-analysis.py <audio-file> [options]
|
|
|
|
# Analyze a single track
|
|
python audio-deep-analysis.py track.mp3
|
|
|
|
# JSON output to file
|
|
python audio-deep-analysis.py track.mp3 --format json -o results.json
|
|
|
|
Exit codes:
|
|
0 = success
|
|
1 = invalid arguments or runtime error
|
|
2 = missing dependencies
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import sys
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
|
|
sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent / "_shared"))
|
|
from audio_deps import require_audio_deps
|
|
from json_archiver import resolve_archive_arg, write_archive
|
|
|
|
SCRIPT_NAME = "audio-deep-analysis"
|
|
VERSION = "1.0.0"
|
|
|
|
|
|
def format_time(seconds):
|
|
m = int(seconds // 60)
|
|
s = int(seconds % 60)
|
|
frac = int((seconds % 1) * 10)
|
|
return f"{m}:{s:02d}.{frac}"
|
|
|
|
|
|
def analyze_chords(y, sr, *, collect=False):
|
|
"""Estimate chord/key progression over time using chroma features.
|
|
|
|
When collect=True, returns data instead of printing.
|
|
"""
|
|
import numpy as np
|
|
|
|
pitch_classes = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
|
|
major_profile = np.array([6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88])
|
|
minor_profile = np.array([6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17])
|
|
|
|
chroma = librosa.feature.chroma_cqt(y=y, sr=sr)
|
|
hop_length = 512
|
|
window_seconds = 10
|
|
|
|
frames_per_window = int(window_seconds * sr / hop_length)
|
|
num_windows = chroma.shape[1] // frames_per_window
|
|
|
|
results = []
|
|
|
|
if not collect:
|
|
print("\n=== KEY/CHORD PROGRESSION ===")
|
|
print(f"{'Time':<15} {'Estimated Key':<15} {'Confidence':>10} {'Dominant Notes'}")
|
|
print("-" * 65)
|
|
|
|
for i in range(num_windows):
|
|
start_frame = i * frames_per_window
|
|
end_frame = (i + 1) * frames_per_window
|
|
chunk = chroma[:, start_frame:end_frame]
|
|
avg = np.mean(chunk, axis=1)
|
|
|
|
best_corr = -1
|
|
best_key = "Unknown"
|
|
for j in range(12):
|
|
rolled = np.roll(avg, -j)
|
|
maj_corr = np.corrcoef(rolled, major_profile)[0, 1]
|
|
min_corr = np.corrcoef(rolled, minor_profile)[0, 1]
|
|
if maj_corr > best_corr:
|
|
best_corr = maj_corr
|
|
best_key = f"{pitch_classes[j]} major"
|
|
if min_corr > best_corr:
|
|
best_corr = min_corr
|
|
best_key = f"{pitch_classes[j]} minor"
|
|
|
|
top_3 = np.argsort(avg)[-3:][::-1]
|
|
dominant = ", ".join([pitch_classes[p] for p in top_3])
|
|
|
|
start_time = i * window_seconds
|
|
end_time = (i + 1) * window_seconds
|
|
|
|
if collect:
|
|
results.append({
|
|
"time_start": start_time,
|
|
"time_end": end_time,
|
|
"key": best_key,
|
|
"confidence": round(best_corr, 3),
|
|
"dominant_notes": [pitch_classes[p] for p in top_3],
|
|
})
|
|
else:
|
|
print(f"{format_time(start_time)}-{format_time(end_time):<8} {best_key:<15} {best_corr:>10.3f} {dominant}")
|
|
|
|
return results
|
|
|
|
|
|
def analyze_energy(y, sr, *, collect=False):
|
|
"""Show energy/loudness over time.
|
|
|
|
When collect=True, returns data instead of printing.
|
|
"""
|
|
import numpy as np
|
|
|
|
rms = librosa.feature.rms(y=y)[0]
|
|
hop_length = 512
|
|
window_seconds = 5
|
|
frames_per_window = int(window_seconds * sr / hop_length)
|
|
|
|
max_rms = np.max(rms)
|
|
if max_rms == 0:
|
|
max_rms = 1
|
|
|
|
num_windows = len(rms) // frames_per_window
|
|
|
|
if not collect:
|
|
print("\n=== ENERGY / LOUDNESS ARC ===")
|
|
print(f"{'Time':<15} {'Energy':>7} {'Bar (visual)'}")
|
|
print("-" * 60)
|
|
|
|
energies = []
|
|
windows = []
|
|
for i in range(num_windows):
|
|
start = i * frames_per_window
|
|
end = (i + 1) * frames_per_window
|
|
avg = np.mean(rms[start:end])
|
|
pct = int((avg / max_rms) * 100)
|
|
energies.append(pct)
|
|
|
|
start_time = i * window_seconds
|
|
if collect:
|
|
windows.append({
|
|
"time": start_time,
|
|
"energy_pct": pct,
|
|
})
|
|
else:
|
|
bar = "\u2588" * (pct // 2)
|
|
print(f"{format_time(start_time):<15} {pct:>5}% {bar}")
|
|
|
|
# Detect significant energy shifts
|
|
shifts = []
|
|
if not collect:
|
|
print("\n--- Energy Shifts (>20% change) ---")
|
|
|
|
found = False
|
|
for i in range(1, len(energies)):
|
|
diff = energies[i] - energies[i-1]
|
|
if abs(diff) > 20:
|
|
t = i * window_seconds
|
|
direction = "UP" if diff > 0 else "DOWN"
|
|
if collect:
|
|
shifts.append({
|
|
"time": t,
|
|
"direction": direction,
|
|
"change_pct": abs(diff),
|
|
"from_pct": energies[i-1],
|
|
"to_pct": energies[i],
|
|
})
|
|
else:
|
|
print(f" {format_time(t)} \u2014 energy {direction} {abs(diff)}% ({energies[i-1]}% \u2192 {energies[i]}%)")
|
|
found = True
|
|
|
|
if not collect and not found:
|
|
print(" No dramatic energy shifts detected (all changes < 20%)")
|
|
|
|
return {"windows": windows, "shifts": shifts}
|
|
|
|
|
|
def analyze_sections(y, sr, *, collect=False):
|
|
"""Detect section boundaries using spectral novelty.
|
|
|
|
When collect=True, returns data instead of printing.
|
|
"""
|
|
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
|
|
bounds = librosa.segment.agglomerative(mfcc, k=8)
|
|
bound_times = librosa.frames_to_time(bounds, sr=sr)
|
|
|
|
results = []
|
|
|
|
if not collect:
|
|
print("\n=== SECTION BOUNDARIES (spectral novelty) ===")
|
|
print("Detected section changes at:")
|
|
|
|
for i, t in enumerate(bound_times):
|
|
if t > 0.5: # Skip very start
|
|
if collect:
|
|
results.append({
|
|
"section": i + 1,
|
|
"time": round(float(t), 2),
|
|
})
|
|
else:
|
|
print(f" Section {i+1}: {format_time(t)}")
|
|
|
|
return results
|
|
|
|
|
|
def analyze_spectral_balance(y, sr, *, collect=False):
|
|
"""Show low vs mid vs high frequency balance over time."""
|
|
import numpy as np
|
|
|
|
S = np.abs(librosa.stft(y))
|
|
freqs = librosa.fft_frequencies(sr=sr)
|
|
|
|
low_mask = freqs < 250
|
|
mid_mask = (freqs >= 250) & (freqs < 2000)
|
|
high_mask = freqs >= 2000
|
|
|
|
window_seconds = 10
|
|
hop_length = 512
|
|
frames_per_window = int(window_seconds * sr / hop_length)
|
|
num_windows = S.shape[1] // frames_per_window
|
|
|
|
if not collect:
|
|
print("\n=== SPECTRAL BALANCE (low/mid/high) ===")
|
|
print(f"{'Time':<15} {'Low(<250Hz)':>12} {'Mid(250-2k)':>12} {'High(>2kHz)':>12} {'Balance'}")
|
|
print("-" * 70)
|
|
|
|
results = []
|
|
for i in range(num_windows):
|
|
start = i * frames_per_window
|
|
end = (i + 1) * frames_per_window
|
|
|
|
chunk = S[:, start:end]
|
|
low = np.mean(chunk[low_mask, :])
|
|
mid = np.mean(chunk[mid_mask, :])
|
|
high = np.mean(chunk[high_mask, :])
|
|
|
|
total = low + mid + high
|
|
if total == 0:
|
|
total = 1
|
|
l_pct = int(low / total * 100)
|
|
m_pct = int(mid / total * 100)
|
|
h_pct = int(high / total * 100)
|
|
|
|
dominant = "BASS-heavy" if l_pct > 45 else "MID-heavy" if m_pct > 50 else "balanced"
|
|
|
|
start_time = i * window_seconds
|
|
if collect:
|
|
results.append({
|
|
"time": start_time,
|
|
"low_pct": l_pct,
|
|
"mid_pct": m_pct,
|
|
"high_pct": h_pct,
|
|
"balance": dominant,
|
|
})
|
|
else:
|
|
print(f"{format_time(start_time):<15} {l_pct:>10}% {m_pct:>10}% {h_pct:>10}% {dominant}")
|
|
|
|
return results
|
|
|
|
|
|
def format_json_output(filepath, duration, energy_data, chord_data, section_data, spectral_data):
|
|
"""Build structured JSON output."""
|
|
return {
|
|
"script": SCRIPT_NAME,
|
|
"version": VERSION,
|
|
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
"status": "pass",
|
|
"metrics": {
|
|
"file": os.path.basename(filepath),
|
|
"duration_seconds": round(duration, 2),
|
|
"energy_arc": energy_data,
|
|
"chord_progression": chord_data,
|
|
"section_boundaries": section_data,
|
|
"spectral_balance": spectral_data,
|
|
},
|
|
"findings": [],
|
|
"summary": {"total": 0},
|
|
}
|
|
|
|
|
|
def main():
|
|
require_audio_deps()
|
|
|
|
import librosa as _librosa # noqa: E402
|
|
import numpy as np # noqa: E402, F401
|
|
|
|
# Make librosa available to module-level helper functions
|
|
globals()["librosa"] = _librosa
|
|
|
|
parser = argparse.ArgumentParser(
|
|
description="Deep single-track audio analysis — energy, chords, sections, spectral balance.",
|
|
)
|
|
parser.add_argument(
|
|
"audio_file",
|
|
help="Path to the audio file to analyze",
|
|
)
|
|
parser.add_argument(
|
|
"--format",
|
|
choices=["json", "text"],
|
|
default="json",
|
|
dest="output_format",
|
|
help="Output format (default: json)",
|
|
)
|
|
parser.add_argument(
|
|
"-o", "--output",
|
|
default=None,
|
|
help="Output file path (default: stdout)",
|
|
)
|
|
parser.add_argument(
|
|
"--archive", nargs="?", const="", default="",
|
|
help=(
|
|
"Persist full JSON output to a per-song archive. "
|
|
"With no path: writes to docs/audio-analysis/songs/<song-slug>.json. "
|
|
"Pass an explicit path to override. Default: ON."
|
|
),
|
|
)
|
|
parser.add_argument(
|
|
"--no-archive", dest="archive", action="store_const", const=None,
|
|
help="Skip writing the JSON archive.",
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
filepath = args.audio_file
|
|
y, sr = _librosa.load(filepath, sr=22050)
|
|
duration = _librosa.get_duration(y=y, sr=sr)
|
|
|
|
if args.output_format == "text":
|
|
print(f"Loading: {os.path.basename(filepath)}")
|
|
print(f"Duration: {int(duration//60)}:{int(duration%60):02d}\n")
|
|
analyze_energy(y, sr)
|
|
analyze_chords(y, sr)
|
|
analyze_sections(y, sr)
|
|
analyze_spectral_balance(y, sr)
|
|
else:
|
|
energy_data = analyze_energy(y, sr, collect=True)
|
|
chord_data = analyze_chords(y, sr, collect=True)
|
|
section_data = analyze_sections(y, sr, collect=True)
|
|
spectral_data = analyze_spectral_balance(y, sr, collect=True)
|
|
|
|
result = format_json_output(filepath, duration, energy_data, chord_data, section_data, spectral_data)
|
|
output = json.dumps(result, indent=2)
|
|
|
|
if args.output:
|
|
Path(args.output).write_text(output + "\n")
|
|
else:
|
|
print(output)
|
|
|
|
# Per-song JSON archive (default ON unless --no-archive)
|
|
song_slug = os.path.splitext(os.path.basename(filepath))[0]
|
|
archive_target = resolve_archive_arg("songs", song_slug, args.archive)
|
|
if archive_target is not None:
|
|
res = write_archive(archive_target, result)
|
|
print(f" ARCHIVED: {res['path']} ({res['bytes_written']} bytes)", file=sys.stderr)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|