#!/bin/bash # bmalph-version: 2.11.0 # Claude Code Ralph Loop with Rate Limiting and Documentation # Adaptation of the Ralph technique for Claude Code with usage management set -e # Exit on any error # Note: CLAUDE_CODE_ENABLE_DANGEROUS_PERMISSIONS_IN_SANDBOX and IS_SANDBOX # environment variables are NOT exported here. Tool restrictions are handled # via --allowedTools flag in CLAUDE_CMD_ARGS, which is the proper approach. # Exporting sandbox variables without a verified sandbox would be misleading. # Source library components SCRIPT_DIR="$(dirname "${BASH_SOURCE[0]}")" source "$SCRIPT_DIR/lib/date_utils.sh" source "$SCRIPT_DIR/lib/timeout_utils.sh" source "$SCRIPT_DIR/lib/response_analyzer.sh" source "$SCRIPT_DIR/lib/circuit_breaker.sh" # Configuration # Ralph-specific files live in .ralph/ subfolder RALPH_DIR="${RALPH_DIR:-.ralph}" PROMPT_FILE="$RALPH_DIR/PROMPT.md" LOG_DIR="$RALPH_DIR/logs" DOCS_DIR="$RALPH_DIR/docs/generated" STATUS_FILE="$RALPH_DIR/status.json" PROGRESS_FILE="$RALPH_DIR/progress.json" CLAUDE_CODE_CMD="claude" DRIVER_DISPLAY_NAME="Claude Code" SLEEP_DURATION=3600 # 1 hour in seconds LIVE_OUTPUT=false # Show Claude Code output in real-time (streaming) LIVE_LOG_FILE="$RALPH_DIR/live.log" # Fixed file for live output monitoring CALL_COUNT_FILE="$RALPH_DIR/.call_count" TIMESTAMP_FILE="$RALPH_DIR/.last_reset" USE_TMUX=false PENDING_EXIT_REASON="" # Save environment variable state BEFORE setting defaults # These are used by load_ralphrc() to determine which values came from environment _env_MAX_CALLS_PER_HOUR="${MAX_CALLS_PER_HOUR:-}" _env_CLAUDE_TIMEOUT_MINUTES="${CLAUDE_TIMEOUT_MINUTES:-}" _env_CLAUDE_OUTPUT_FORMAT="${CLAUDE_OUTPUT_FORMAT:-}" _env_CLAUDE_ALLOWED_TOOLS="${CLAUDE_ALLOWED_TOOLS:-}" _env_has_CLAUDE_PERMISSION_MODE="${CLAUDE_PERMISSION_MODE+x}" _env_CLAUDE_PERMISSION_MODE="${CLAUDE_PERMISSION_MODE:-}" _env_CLAUDE_USE_CONTINUE="${CLAUDE_USE_CONTINUE:-}" _env_CLAUDE_SESSION_EXPIRY_HOURS="${CLAUDE_SESSION_EXPIRY_HOURS:-}" _env_ALLOWED_TOOLS="${ALLOWED_TOOLS:-}" _env_SESSION_CONTINUITY="${SESSION_CONTINUITY:-}" _env_SESSION_EXPIRY_HOURS="${SESSION_EXPIRY_HOURS:-}" _env_PERMISSION_DENIAL_MODE="${PERMISSION_DENIAL_MODE:-}" _env_RALPH_VERBOSE="${RALPH_VERBOSE:-}" _env_VERBOSE_PROGRESS="${VERBOSE_PROGRESS:-}" # CLI flags are parsed before main() runs, so capture explicit values separately. _CLI_MAX_CALLS_PER_HOUR="${_CLI_MAX_CALLS_PER_HOUR:-}" _CLI_CLAUDE_TIMEOUT_MINUTES="${_CLI_CLAUDE_TIMEOUT_MINUTES:-}" _CLI_CLAUDE_OUTPUT_FORMAT="${_CLI_CLAUDE_OUTPUT_FORMAT:-}" _CLI_ALLOWED_TOOLS="${_CLI_ALLOWED_TOOLS:-}" _CLI_SESSION_CONTINUITY="${_CLI_SESSION_CONTINUITY:-}" _CLI_SESSION_EXPIRY_HOURS="${_CLI_SESSION_EXPIRY_HOURS:-}" _CLI_VERBOSE_PROGRESS="${_CLI_VERBOSE_PROGRESS:-}" _cli_MAX_CALLS_PER_HOUR="${MAX_CALLS_PER_HOUR:-}" _cli_CLAUDE_TIMEOUT_MINUTES="${CLAUDE_TIMEOUT_MINUTES:-}" _cli_CLAUDE_OUTPUT_FORMAT="${CLAUDE_OUTPUT_FORMAT:-}" _cli_CLAUDE_ALLOWED_TOOLS="${CLAUDE_ALLOWED_TOOLS:-}" _cli_CLAUDE_USE_CONTINUE="${CLAUDE_USE_CONTINUE:-}" _cli_CLAUDE_SESSION_EXPIRY_HOURS="${CLAUDE_SESSION_EXPIRY_HOURS:-}" _cli_VERBOSE_PROGRESS="${VERBOSE_PROGRESS:-}" _env_CB_COOLDOWN_MINUTES="${CB_COOLDOWN_MINUTES:-}" _env_CB_AUTO_RESET="${CB_AUTO_RESET:-}" _env_TEST_COMMAND="${TEST_COMMAND:-}" _env_QUALITY_GATES="${QUALITY_GATES:-}" _env_QUALITY_GATE_MODE="${QUALITY_GATE_MODE:-}" _env_QUALITY_GATE_TIMEOUT="${QUALITY_GATE_TIMEOUT:-}" _env_QUALITY_GATE_ON_COMPLETION_ONLY="${QUALITY_GATE_ON_COMPLETION_ONLY:-}" _env_REVIEW_ENABLED="${REVIEW_ENABLED:-}" _env_REVIEW_INTERVAL="${REVIEW_INTERVAL:-}" _env_REVIEW_MODE="${REVIEW_MODE:-}" # Now set defaults (only if not already set by environment) MAX_CALLS_PER_HOUR="${MAX_CALLS_PER_HOUR:-100}" VERBOSE_PROGRESS="${VERBOSE_PROGRESS:-false}" CLAUDE_TIMEOUT_MINUTES="${CLAUDE_TIMEOUT_MINUTES:-15}" DEFAULT_CLAUDE_ALLOWED_TOOLS="Write,Read,Edit,MultiEdit,Glob,Grep,Task,TodoWrite,WebFetch,WebSearch,EnterPlanMode,ExitPlanMode,NotebookEdit,Bash" DEFAULT_PERMISSION_DENIAL_MODE="continue" # Modern Claude CLI configuration (Phase 1.1) CLAUDE_OUTPUT_FORMAT="${CLAUDE_OUTPUT_FORMAT:-json}" CLAUDE_ALLOWED_TOOLS="${CLAUDE_ALLOWED_TOOLS:-$DEFAULT_CLAUDE_ALLOWED_TOOLS}" CLAUDE_PERMISSION_MODE="${CLAUDE_PERMISSION_MODE:-bypassPermissions}" CLAUDE_USE_CONTINUE="${CLAUDE_USE_CONTINUE:-true}" PERMISSION_DENIAL_MODE="${PERMISSION_DENIAL_MODE:-$DEFAULT_PERMISSION_DENIAL_MODE}" CLAUDE_SESSION_FILE="$RALPH_DIR/.claude_session_id" # Session ID persistence file CLAUDE_MIN_VERSION="2.0.76" # Minimum required Claude CLI version # Session management configuration (Phase 1.2) # Note: SESSION_EXPIRATION_SECONDS is defined in lib/response_analyzer.sh (86400 = 24 hours) RALPH_SESSION_FILE="$RALPH_DIR/.ralph_session" # Ralph-specific session tracking (lifecycle) RALPH_SESSION_HISTORY_FILE="$RALPH_DIR/.ralph_session_history" # Session transition history # Session expiration: 24 hours default balances project continuity with fresh context # Too short = frequent context loss; Too long = stale context causes unpredictable behavior CLAUDE_SESSION_EXPIRY_HOURS=${CLAUDE_SESSION_EXPIRY_HOURS:-24} # Quality gates configuration TEST_COMMAND="${TEST_COMMAND:-}" QUALITY_GATES="${QUALITY_GATES:-}" QUALITY_GATE_MODE="${QUALITY_GATE_MODE:-warn}" QUALITY_GATE_TIMEOUT="${QUALITY_GATE_TIMEOUT:-120}" QUALITY_GATE_ON_COMPLETION_ONLY="${QUALITY_GATE_ON_COMPLETION_ONLY:-false}" QUALITY_GATE_RESULTS_FILE="$RALPH_DIR/.quality_gate_results" # Periodic code review configuration REVIEW_ENABLED="${REVIEW_ENABLED:-false}" REVIEW_INTERVAL="${REVIEW_INTERVAL:-5}" REVIEW_FINDINGS_FILE="$RALPH_DIR/.review_findings.json" REVIEW_PROMPT_FILE="$RALPH_DIR/REVIEW_PROMPT.md" REVIEW_LAST_SHA_FILE="$RALPH_DIR/.review_last_sha" # REVIEW_MODE is derived in initialize_runtime_context() after .ralphrc is loaded. # This ensures backwards compat: old .ralphrc files with only REVIEW_ENABLED=true # still map to enhanced mode. Env vars always win via the snapshot/restore mechanism. REVIEW_MODE="${REVIEW_MODE:-off}" # Valid tool patterns for --allowed-tools validation # Default: Claude Code tools. Platform driver overwrites via driver_valid_tools() in main(). # Validation runs in main() after load_platform_driver so the correct patterns are in effect. VALID_TOOL_PATTERNS=( "Write" "Read" "Edit" "MultiEdit" "Glob" "Grep" "Task" "TodoWrite" "WebFetch" "WebSearch" "AskUserQuestion" "EnterPlanMode" "ExitPlanMode" "Bash" "Bash(git *)" "Bash(npm *)" "Bash(bats *)" "Bash(python *)" "Bash(node *)" "NotebookEdit" ) ALLOWED_TOOLS_IGNORED_WARNED=false PERMISSION_DENIAL_ACTION="" # Exit detection configuration EXIT_SIGNALS_FILE="$RALPH_DIR/.exit_signals" RESPONSE_ANALYSIS_FILE="$RALPH_DIR/.response_analysis" MAX_CONSECUTIVE_TEST_LOOPS=3 MAX_CONSECUTIVE_DONE_SIGNALS=2 TEST_PERCENTAGE_THRESHOLD=30 # If more than 30% of recent loops are test-only, flag it # Ralph configuration file # bmalph installs .ralph/.ralphrc. Fall back to a project-root .ralphrc for # older standalone Ralph layouts. RALPHRC_FILE="${RALPHRC_FILE:-$RALPH_DIR/.ralphrc}" RALPHRC_LOADED=false # Platform driver (set from .ralphrc or environment) PLATFORM_DRIVER="${PLATFORM_DRIVER:-claude-code}" RUNTIME_CONTEXT_LOADED=false # resolve_ralphrc_file - Resolve the Ralph config path resolve_ralphrc_file() { if [[ -f "$RALPHRC_FILE" ]]; then echo "$RALPHRC_FILE" return 0 fi if [[ "$RALPHRC_FILE" != ".ralphrc" && -f ".ralphrc" ]]; then echo ".ralphrc" return 0 fi echo "$RALPHRC_FILE" } # load_ralphrc - Load project-specific configuration from .ralph/.ralphrc # # This function sources the bundled .ralph/.ralphrc file when present, falling # back to a project-root .ralphrc for older standalone Ralph layouts. # Environment variables take precedence over config values. # # Configuration values that can be overridden: # - MAX_CALLS_PER_HOUR # - CLAUDE_TIMEOUT_MINUTES # - CLAUDE_OUTPUT_FORMAT # - CLAUDE_PERMISSION_MODE # - ALLOWED_TOOLS (mapped to CLAUDE_ALLOWED_TOOLS for Claude Code only) # - PERMISSION_DENIAL_MODE # - SESSION_CONTINUITY (mapped to CLAUDE_USE_CONTINUE) # - SESSION_EXPIRY_HOURS (mapped to CLAUDE_SESSION_EXPIRY_HOURS) # - CB_NO_PROGRESS_THRESHOLD # - CB_SAME_ERROR_THRESHOLD # - CB_OUTPUT_DECLINE_THRESHOLD # - RALPH_VERBOSE # load_ralphrc() { local config_file config_file="$(resolve_ralphrc_file)" if [[ ! -f "$config_file" ]]; then return 0 fi # Source config (this may override default values) # shellcheck source=/dev/null source "$config_file" # Map config variable names to internal names if [[ -n "${ALLOWED_TOOLS:-}" ]]; then CLAUDE_ALLOWED_TOOLS="$ALLOWED_TOOLS" fi if [[ -n "${PERMISSION_DENIAL_MODE:-}" ]]; then PERMISSION_DENIAL_MODE="$PERMISSION_DENIAL_MODE" fi if [[ -n "${SESSION_CONTINUITY:-}" ]]; then CLAUDE_USE_CONTINUE="$SESSION_CONTINUITY" fi if [[ -n "${SESSION_EXPIRY_HOURS:-}" ]]; then CLAUDE_SESSION_EXPIRY_HOURS="$SESSION_EXPIRY_HOURS" fi if [[ -n "${RALPH_VERBOSE:-}" ]]; then VERBOSE_PROGRESS="$RALPH_VERBOSE" fi # Restore ONLY values that were explicitly set via environment variables # (not script defaults). The _env_* variables were captured BEFORE defaults were set. # Internal CLAUDE_* variables are kept for backward compatibility. [[ -n "$_env_MAX_CALLS_PER_HOUR" ]] && MAX_CALLS_PER_HOUR="$_env_MAX_CALLS_PER_HOUR" [[ -n "$_env_CLAUDE_TIMEOUT_MINUTES" ]] && CLAUDE_TIMEOUT_MINUTES="$_env_CLAUDE_TIMEOUT_MINUTES" [[ -n "$_env_CLAUDE_OUTPUT_FORMAT" ]] && CLAUDE_OUTPUT_FORMAT="$_env_CLAUDE_OUTPUT_FORMAT" [[ -n "$_env_CLAUDE_ALLOWED_TOOLS" ]] && CLAUDE_ALLOWED_TOOLS="$_env_CLAUDE_ALLOWED_TOOLS" if [[ "$_env_has_CLAUDE_PERMISSION_MODE" == "x" ]]; then CLAUDE_PERMISSION_MODE="$_env_CLAUDE_PERMISSION_MODE" fi [[ -n "$_env_CLAUDE_USE_CONTINUE" ]] && CLAUDE_USE_CONTINUE="$_env_CLAUDE_USE_CONTINUE" [[ -n "$_env_CLAUDE_SESSION_EXPIRY_HOURS" ]] && CLAUDE_SESSION_EXPIRY_HOURS="$_env_CLAUDE_SESSION_EXPIRY_HOURS" [[ -n "$_env_PERMISSION_DENIAL_MODE" ]] && PERMISSION_DENIAL_MODE="$_env_PERMISSION_DENIAL_MODE" [[ -n "$_env_VERBOSE_PROGRESS" ]] && VERBOSE_PROGRESS="$_env_VERBOSE_PROGRESS" # Public aliases are the preferred external interface and win over the # legacy internal environment variables when both are explicitly set. [[ -n "$_env_ALLOWED_TOOLS" ]] && CLAUDE_ALLOWED_TOOLS="$_env_ALLOWED_TOOLS" [[ -n "$_env_SESSION_CONTINUITY" ]] && CLAUDE_USE_CONTINUE="$_env_SESSION_CONTINUITY" [[ -n "$_env_SESSION_EXPIRY_HOURS" ]] && CLAUDE_SESSION_EXPIRY_HOURS="$_env_SESSION_EXPIRY_HOURS" [[ -n "$_env_RALPH_VERBOSE" ]] && VERBOSE_PROGRESS="$_env_RALPH_VERBOSE" # CLI flags are the highest-priority runtime inputs because they are # parsed before main() and would otherwise be overwritten by .ralphrc. # Keep every config-backed CLI flag here so the precedence contract stays # consistent: CLI > public env aliases > internal env vars > config. [[ "$_CLI_MAX_CALLS_PER_HOUR" == "true" ]] && MAX_CALLS_PER_HOUR="$_cli_MAX_CALLS_PER_HOUR" [[ "$_CLI_CLAUDE_TIMEOUT_MINUTES" == "true" ]] && CLAUDE_TIMEOUT_MINUTES="$_cli_CLAUDE_TIMEOUT_MINUTES" [[ "$_CLI_CLAUDE_OUTPUT_FORMAT" == "true" ]] && CLAUDE_OUTPUT_FORMAT="$_cli_CLAUDE_OUTPUT_FORMAT" [[ "$_CLI_ALLOWED_TOOLS" == "true" ]] && CLAUDE_ALLOWED_TOOLS="$_cli_CLAUDE_ALLOWED_TOOLS" [[ "$_CLI_SESSION_CONTINUITY" == "true" ]] && CLAUDE_USE_CONTINUE="$_cli_CLAUDE_USE_CONTINUE" [[ "$_CLI_SESSION_EXPIRY_HOURS" == "true" ]] && CLAUDE_SESSION_EXPIRY_HOURS="$_cli_CLAUDE_SESSION_EXPIRY_HOURS" [[ "$_CLI_VERBOSE_PROGRESS" == "true" ]] && VERBOSE_PROGRESS="$_cli_VERBOSE_PROGRESS" [[ -n "$_env_CB_COOLDOWN_MINUTES" ]] && CB_COOLDOWN_MINUTES="$_env_CB_COOLDOWN_MINUTES" [[ -n "$_env_CB_AUTO_RESET" ]] && CB_AUTO_RESET="$_env_CB_AUTO_RESET" [[ -n "$_env_TEST_COMMAND" ]] && TEST_COMMAND="$_env_TEST_COMMAND" [[ -n "$_env_QUALITY_GATES" ]] && QUALITY_GATES="$_env_QUALITY_GATES" [[ -n "$_env_QUALITY_GATE_MODE" ]] && QUALITY_GATE_MODE="$_env_QUALITY_GATE_MODE" [[ -n "$_env_QUALITY_GATE_TIMEOUT" ]] && QUALITY_GATE_TIMEOUT="$_env_QUALITY_GATE_TIMEOUT" [[ -n "$_env_QUALITY_GATE_ON_COMPLETION_ONLY" ]] && QUALITY_GATE_ON_COMPLETION_ONLY="$_env_QUALITY_GATE_ON_COMPLETION_ONLY" [[ -n "$_env_REVIEW_ENABLED" ]] && REVIEW_ENABLED="$_env_REVIEW_ENABLED" [[ -n "$_env_REVIEW_INTERVAL" ]] && REVIEW_INTERVAL="$_env_REVIEW_INTERVAL" [[ -n "$_env_REVIEW_MODE" ]] && REVIEW_MODE="$_env_REVIEW_MODE" normalize_claude_permission_mode RALPHRC_FILE="$config_file" RALPHRC_LOADED=true return 0 } driver_supports_tool_allowlist() { return 1 } driver_permission_denial_help() { echo " - Review the active driver's permission or approval settings." echo " - ALLOWED_TOOLS in $RALPHRC_FILE only applies to the Claude Code driver." echo " - Keep CLAUDE_PERMISSION_MODE=bypassPermissions for unattended Claude Code loops." echo " - After updating permissions, reset the session and restart the loop." } # Source platform driver load_platform_driver() { local driver_file="$SCRIPT_DIR/drivers/${PLATFORM_DRIVER}.sh" if [[ ! -f "$driver_file" ]]; then log_status "ERROR" "Platform driver not found: $driver_file" log_status "ERROR" "Available drivers: $(ls "$SCRIPT_DIR/drivers/"*.sh 2>/dev/null | xargs -n1 basename | sed 's/.sh$//' | tr '\n' ' ')" exit 1 fi # shellcheck source=/dev/null source "$driver_file" # Initialize driver-specific tool patterns driver_valid_tools # Set CLI binary from driver CLAUDE_CODE_CMD="$(driver_cli_binary)" DRIVER_DISPLAY_NAME="$(driver_display_name)" log_status "INFO" "Platform driver: $DRIVER_DISPLAY_NAME ($CLAUDE_CODE_CMD)" } initialize_runtime_context() { if [[ "$RUNTIME_CONTEXT_LOADED" == "true" ]]; then return 0 fi if load_ralphrc; then if [[ "$RALPHRC_LOADED" == "true" ]]; then log_status "INFO" "Loaded configuration from $RALPHRC_FILE" fi fi # Derive REVIEW_MODE after .ralphrc load so backwards-compat works: # old .ralphrc files with only REVIEW_ENABLED=true map to enhanced mode. if [[ "$REVIEW_MODE" == "off" && "$REVIEW_ENABLED" == "true" ]]; then REVIEW_MODE="enhanced" fi # Keep REVIEW_ENABLED in sync for any code that checks it [[ "$REVIEW_MODE" != "off" ]] && REVIEW_ENABLED="true" || REVIEW_ENABLED="false" # Load platform driver after config so PLATFORM_DRIVER can be overridden. load_platform_driver RUNTIME_CONTEXT_LOADED=true } # Colors for terminal output RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' PURPLE='\033[0;35m' NC='\033[0m' # No Color # Initialize directories mkdir -p "$LOG_DIR" "$DOCS_DIR" # Check if tmux is available check_tmux_available() { if ! command -v tmux &> /dev/null; then log_status "ERROR" "tmux is not installed. Please install tmux or run without --monitor flag." echo "Install tmux:" echo " Ubuntu/Debian: sudo apt-get install tmux" echo " macOS: brew install tmux" echo " CentOS/RHEL: sudo yum install tmux" exit 1 fi } # Get the tmux base-index for windows (handles custom tmux configurations) # Returns: the base window index (typically 0 or 1) get_tmux_base_index() { local base_index base_index=$(tmux show-options -gv base-index 2>/dev/null) # Default to 0 if not set or tmux command fails echo "${base_index:-0}" } # Setup tmux session with monitor setup_tmux_session() { local session_name="ralph-$(date +%s)" local ralph_home="${RALPH_HOME:-$SCRIPT_DIR}" local project_dir="$(pwd)" initialize_runtime_context # Get the tmux base-index to handle custom configurations (e.g., base-index 1) local base_win base_win=$(get_tmux_base_index) log_status "INFO" "Setting up tmux session: $session_name" # Initialize live.log file echo "=== Ralph Live Output - Waiting for first loop... ===" > "$LIVE_LOG_FILE" # Create new tmux session detached (left pane - Ralph loop) tmux new-session -d -s "$session_name" -c "$project_dir" # Split window vertically (right side) tmux split-window -h -t "$session_name" -c "$project_dir" # Split right pane horizontally (top: Claude output, bottom: status) tmux split-window -v -t "$session_name:${base_win}.1" -c "$project_dir" # Right-top pane (pane 1): Live driver output tmux send-keys -t "$session_name:${base_win}.1" "tail -f '$project_dir/$LIVE_LOG_FILE'" Enter # Right-bottom pane (pane 2): Ralph status monitor # Prefer bmalph watch (TypeScript, fully tested) over legacy ralph_monitor.sh if command -v bmalph &> /dev/null; then tmux send-keys -t "$session_name:${base_win}.2" "bmalph watch" Enter elif command -v ralph-monitor &> /dev/null; then tmux send-keys -t "$session_name:${base_win}.2" "ralph-monitor" Enter else tmux send-keys -t "$session_name:${base_win}.2" "'$ralph_home/ralph_monitor.sh'" Enter fi # Start ralph loop in the left pane (exclude tmux flag to avoid recursion) # Forward all CLI parameters that were set by the user local ralph_cmd if command -v ralph &> /dev/null; then ralph_cmd="ralph" else ralph_cmd="'$ralph_home/ralph_loop.sh'" fi # Always use --live mode in tmux for real-time streaming ralph_cmd="$ralph_cmd --live" # Forward --calls if non-default if [[ "$MAX_CALLS_PER_HOUR" != "100" ]]; then ralph_cmd="$ralph_cmd --calls $MAX_CALLS_PER_HOUR" fi # Forward --prompt if non-default if [[ "$PROMPT_FILE" != "$RALPH_DIR/PROMPT.md" ]]; then ralph_cmd="$ralph_cmd --prompt '$PROMPT_FILE'" fi # Forward --output-format if non-default (default is json) if [[ "$CLAUDE_OUTPUT_FORMAT" != "json" ]]; then ralph_cmd="$ralph_cmd --output-format $CLAUDE_OUTPUT_FORMAT" fi # Forward --verbose if enabled if [[ "$VERBOSE_PROGRESS" == "true" ]]; then ralph_cmd="$ralph_cmd --verbose" fi # Forward --timeout if non-default (default is 15) if [[ "$CLAUDE_TIMEOUT_MINUTES" != "15" ]]; then ralph_cmd="$ralph_cmd --timeout $CLAUDE_TIMEOUT_MINUTES" fi # Forward --allowed-tools only for drivers that support tool allowlists if driver_supports_tool_allowlist && [[ "$CLAUDE_ALLOWED_TOOLS" != "$DEFAULT_CLAUDE_ALLOWED_TOOLS" ]]; then ralph_cmd="$ralph_cmd --allowed-tools '$CLAUDE_ALLOWED_TOOLS'" fi # Forward --no-continue if session continuity disabled if [[ "$CLAUDE_USE_CONTINUE" == "false" ]]; then ralph_cmd="$ralph_cmd --no-continue" fi # Forward --session-expiry if non-default (default is 24) if [[ "$CLAUDE_SESSION_EXPIRY_HOURS" != "24" ]]; then ralph_cmd="$ralph_cmd --session-expiry $CLAUDE_SESSION_EXPIRY_HOURS" fi # Forward --auto-reset-circuit if enabled if [[ "$CB_AUTO_RESET" == "true" ]]; then ralph_cmd="$ralph_cmd --auto-reset-circuit" fi tmux send-keys -t "$session_name:${base_win}.0" "$ralph_cmd" Enter # Focus on left pane (main ralph loop) tmux select-pane -t "$session_name:${base_win}.0" # Set pane titles (requires tmux 2.6+) tmux select-pane -t "$session_name:${base_win}.0" -T "Ralph Loop" tmux select-pane -t "$session_name:${base_win}.1" -T "$DRIVER_DISPLAY_NAME Output" tmux select-pane -t "$session_name:${base_win}.2" -T "Status" # Set window title tmux rename-window -t "$session_name:${base_win}" "Ralph: Loop | Output | Status" log_status "SUCCESS" "Tmux session created with 3 panes:" log_status "INFO" " Left: Ralph loop" log_status "INFO" " Right-top: $DRIVER_DISPLAY_NAME live output" log_status "INFO" " Right-bottom: Status monitor" log_status "INFO" "" log_status "INFO" "Use Ctrl+B then D to detach from session" log_status "INFO" "Use 'tmux attach -t $session_name' to reattach" # Attach to session (this will block until session ends) tmux attach-session -t "$session_name" exit 0 } # Initialize call tracking init_call_tracking() { # Debug logging removed for cleaner output local current_hour=$(date +%Y%m%d%H) local last_reset_hour="" if [[ -f "$TIMESTAMP_FILE" ]]; then last_reset_hour=$(cat "$TIMESTAMP_FILE") fi # Reset counter if it's a new hour if [[ "$current_hour" != "$last_reset_hour" ]]; then echo "0" > "$CALL_COUNT_FILE" echo "$current_hour" > "$TIMESTAMP_FILE" log_status "INFO" "Call counter reset for new hour: $current_hour" fi # Initialize exit signals tracking if it doesn't exist if [[ ! -f "$EXIT_SIGNALS_FILE" ]]; then echo '{"test_only_loops": [], "done_signals": [], "completion_indicators": []}' > "$EXIT_SIGNALS_FILE" fi # Initialize circuit breaker init_circuit_breaker } # Log function with timestamps and colors log_status() { local level=$1 local message=$2 local timestamp=$(date '+%Y-%m-%d %H:%M:%S') local color="" case $level in "INFO") color=$BLUE ;; "WARN") color=$YELLOW ;; "ERROR") color=$RED ;; "SUCCESS") color=$GREEN ;; "LOOP") color=$PURPLE ;; esac # Write to stderr so log messages don't interfere with function return values echo -e "${color}[$timestamp] [$level] $message${NC}" >&2 echo "[$timestamp] [$level] $message" >> "$LOG_DIR/ralph.log" } # Human-readable label for a process exit code describe_exit_code() { local code=$1 case "$code" in 0) echo "completed" ;; 1) echo "error" ;; 124) echo "timed out" ;; 130) echo "interrupted (SIGINT)" ;; 137) echo "killed (OOM or SIGKILL)" ;; 143) echo "terminated (SIGTERM)" ;; *) echo "error (exit $code)" ;; esac } # Update status JSON for external monitoring update_status() { local loop_count=$1 local calls_made=$2 local last_action=$3 local status=$4 local exit_reason=${5:-""} local driver_exit_code=${6:-""} jq -n \ --arg timestamp "$(get_iso_timestamp)" \ --argjson loop_count "$loop_count" \ --argjson calls_made "$calls_made" \ --argjson max_calls "$MAX_CALLS_PER_HOUR" \ --arg last_action "$last_action" \ --arg status "$status" \ --arg exit_reason "$exit_reason" \ --arg next_reset "$(get_next_hour_time)" \ --arg driver_exit_code "$driver_exit_code" \ '{ timestamp: $timestamp, loop_count: $loop_count, calls_made_this_hour: $calls_made, max_calls_per_hour: $max_calls, last_action: $last_action, status: $status, exit_reason: $exit_reason, next_reset: $next_reset, driver_exit_code: (if $driver_exit_code != "" then ($driver_exit_code | tonumber) else null end) }' > "$STATUS_FILE" # Merge quality gate status if results exist if [[ -f "$QUALITY_GATE_RESULTS_FILE" ]]; then local qg_tmp="$STATUS_FILE.qg_tmp" if jq -s '.[0] * {quality_gates: {overall_status: .[1].overall_status, mode: .[1].mode}}' \ "$STATUS_FILE" "$QUALITY_GATE_RESULTS_FILE" > "$qg_tmp" 2>/dev/null; then mv "$qg_tmp" "$STATUS_FILE" else rm -f "$qg_tmp" 2>/dev/null fi fi } validate_permission_denial_mode() { local mode=$1 case "$mode" in continue|halt|threshold) return 0 ;; *) echo "Error: Invalid PERMISSION_DENIAL_MODE: '$mode'" echo "Valid modes: continue halt threshold" return 1 ;; esac } validate_quality_gate_mode() { local mode=$1 case "$mode" in warn|block|circuit-breaker) return 0 ;; *) echo "Error: Invalid QUALITY_GATE_MODE: '$mode'" echo "Valid modes: warn block circuit-breaker" return 1 ;; esac } validate_quality_gate_timeout() { local timeout=$1 if [[ ! "$timeout" =~ ^[0-9]+$ ]] || [[ "$timeout" -eq 0 ]]; then echo "Error: QUALITY_GATE_TIMEOUT must be a positive integer, got: '$timeout'" return 1 fi return 0 } normalize_claude_permission_mode() { if [[ -z "${CLAUDE_PERMISSION_MODE:-}" ]]; then CLAUDE_PERMISSION_MODE="bypassPermissions" fi } validate_claude_permission_mode() { local mode=$1 case "$mode" in auto|acceptEdits|bypassPermissions|default|dontAsk|plan) return 0 ;; *) echo "Error: Invalid CLAUDE_PERMISSION_MODE: '$mode'" echo "Valid modes: auto acceptEdits bypassPermissions default dontAsk plan" return 1 ;; esac } validate_git_repo() { if ! command -v git &>/dev/null; then log_status "ERROR" "git is not installed or not on PATH." echo "" echo "Ralph requires git for progress detection." echo "" echo "Install git:" echo " macOS: brew install git (or: xcode-select --install)" echo " Ubuntu: sudo apt-get install git" echo " Windows: https://git-scm.com/downloads" echo "" echo "After installing, run this command again." return 1 fi if ! git rev-parse --git-dir &>/dev/null 2>&1; then log_status "ERROR" "No git repository found in $(pwd)." echo "" echo "Ralph requires a git repository for progress detection." echo "" echo "To fix this, run:" echo " git init && git add -A && git commit -m 'initial commit'" return 1 fi if ! git rev-parse HEAD &>/dev/null 2>&1; then log_status "ERROR" "Git repository has no commits." echo "" echo "Ralph requires at least one commit for progress detection." echo "" echo "To fix this, run:" echo " git add -A && git commit -m 'initial commit'" return 1 fi return 0 } warn_if_allowed_tools_ignored() { if driver_supports_tool_allowlist; then return 0 fi if [[ "$ALLOWED_TOOLS_IGNORED_WARNED" == "true" ]]; then return 0 fi if [[ "${_CLI_ALLOWED_TOOLS:-}" == "true" || "$CLAUDE_ALLOWED_TOOLS" != "$DEFAULT_CLAUDE_ALLOWED_TOOLS" ]]; then log_status "WARN" "ALLOWED_TOOLS/--allowed-tools is ignored by $DRIVER_DISPLAY_NAME." ALLOWED_TOOLS_IGNORED_WARNED=true fi return 0 } show_current_allowed_tools() { if ! driver_supports_tool_allowlist; then return 0 fi if [[ -f "$RALPHRC_FILE" ]]; then local current_tools=$(grep "^ALLOWED_TOOLS=" "$RALPHRC_FILE" 2>/dev/null | cut -d= -f2- | tr -d '"') if [[ -n "$current_tools" ]]; then echo -e "${BLUE}Current ALLOWED_TOOLS:${NC} $current_tools" echo "" fi fi return 0 } response_analysis_has_permission_denials() { if [[ ! -f "$RESPONSE_ANALYSIS_FILE" ]]; then return 1 fi local has_permission_denials has_permission_denials=$(jq -r '.analysis.has_permission_denials // false' "$RESPONSE_ANALYSIS_FILE" 2>/dev/null || echo "false") [[ "$has_permission_denials" == "true" ]] } get_response_analysis_denied_commands() { if [[ ! -f "$RESPONSE_ANALYSIS_FILE" ]]; then echo "unknown" return 0 fi jq -r '.analysis.denied_commands | join(", ")' "$RESPONSE_ANALYSIS_FILE" 2>/dev/null || echo "unknown" } clear_response_analysis_permission_denials() { if [[ ! -f "$RESPONSE_ANALYSIS_FILE" ]]; then return 0 fi local tmp_file="$RESPONSE_ANALYSIS_FILE.tmp" if jq ' (.analysis //= {}) | .analysis.has_completion_signal = false | .analysis.exit_signal = false | .analysis.has_permission_denials = false | .analysis.permission_denial_count = 0 | .analysis.denied_commands = [] ' "$RESPONSE_ANALYSIS_FILE" > "$tmp_file" 2>/dev/null; then mv "$tmp_file" "$RESPONSE_ANALYSIS_FILE" return 0 fi rm -f "$tmp_file" 2>/dev/null return 1 } handle_permission_denial() { local loop_count=$1 local denied_cmds=${2:-unknown} local calls_made calls_made=$(cat "$CALL_COUNT_FILE" 2>/dev/null || echo "0") PERMISSION_DENIAL_ACTION="" case "$PERMISSION_DENIAL_MODE" in continue|threshold) log_status "WARN" "🚫 Permission denied in loop #$loop_count: $denied_cmds" log_status "WARN" "PERMISSION_DENIAL_MODE=$PERMISSION_DENIAL_MODE - continuing execution" update_status "$loop_count" "$calls_made" "permission_denied" "running" PERMISSION_DENIAL_ACTION="continue" return 0 ;; halt) log_status "ERROR" "🚫 Permission denied - halting loop" reset_session "permission_denied" update_status "$loop_count" "$calls_made" "permission_denied" "halted" "permission_denied" echo "" echo -e "${RED}╔════════════════════════════════════════════════════════════╗${NC}" echo -e "${RED}║ PERMISSION DENIED - Loop Halted ║${NC}" echo -e "${RED}╚════════════════════════════════════════════════════════════╝${NC}" echo "" echo -e "${YELLOW}$DRIVER_DISPLAY_NAME was denied permission to execute commands.${NC}" echo "" echo -e "${YELLOW}To fix this:${NC}" driver_permission_denial_help echo "" show_current_allowed_tools PERMISSION_DENIAL_ACTION="halt" return 0 ;; esac return 1 } consume_current_loop_permission_denial() { local loop_count=$1 PERMISSION_DENIAL_ACTION="" if ! response_analysis_has_permission_denials; then return 1 fi local denied_cmds denied_cmds=$(get_response_analysis_denied_commands) if ! clear_response_analysis_permission_denials; then log_status "WARN" "Failed to clear permission denial markers from response analysis" fi handle_permission_denial "$loop_count" "$denied_cmds" return 0 } # Check if we can make another call can_make_call() { local calls_made=0 if [[ -f "$CALL_COUNT_FILE" ]]; then calls_made=$(cat "$CALL_COUNT_FILE") fi if [[ $calls_made -ge $MAX_CALLS_PER_HOUR ]]; then return 1 # Cannot make call else return 0 # Can make call fi } # Wait for rate limit reset with countdown wait_for_reset() { local calls_made=$(cat "$CALL_COUNT_FILE" 2>/dev/null || echo "0") log_status "WARN" "Rate limit reached ($calls_made/$MAX_CALLS_PER_HOUR). Waiting for reset..." # Calculate time until next hour local current_minute=$(date +%M) local current_second=$(date +%S) local wait_time=$(((60 - current_minute - 1) * 60 + (60 - current_second))) log_status "INFO" "Sleeping for $wait_time seconds until next hour..." # Countdown display while [[ $wait_time -gt 0 ]]; do local hours=$((wait_time / 3600)) local minutes=$(((wait_time % 3600) / 60)) local seconds=$((wait_time % 60)) printf "\r${YELLOW}Time until reset: %02d:%02d:%02d${NC}" $hours $minutes $seconds sleep 1 ((wait_time--)) done printf "\n" # Reset counter echo "0" > "$CALL_COUNT_FILE" echo "$(date +%Y%m%d%H)" > "$TIMESTAMP_FILE" log_status "SUCCESS" "Rate limit reset! Ready for new calls." } count_fix_plan_checkboxes() { local fix_plan_file="${1:-$RALPH_DIR/@fix_plan.md}" local completed_items=0 local uncompleted_items=0 local total_items=0 if [[ -f "$fix_plan_file" ]]; then uncompleted_items=$(grep -cE "^[[:space:]]*- \[ \]" "$fix_plan_file" 2>/dev/null || true) [[ -z "$uncompleted_items" ]] && uncompleted_items=0 completed_items=$(grep -cE "^[[:space:]]*- \[[xX]\]" "$fix_plan_file" 2>/dev/null || true) [[ -z "$completed_items" ]] && completed_items=0 fi total_items=$((completed_items + uncompleted_items)) printf '%s %s %s\n' "$completed_items" "$uncompleted_items" "$total_items" } # Extract the first unchecked task line from @fix_plan.md. # Returns the raw checkbox line trimmed of leading whitespace, capped at 100 chars. # Outputs empty string if no unchecked tasks exist or file is missing. # Args: $1 = path to @fix_plan.md (optional, defaults to $RALPH_DIR/@fix_plan.md) extract_next_fix_plan_task() { local fix_plan_file="${1:-$RALPH_DIR/@fix_plan.md}" [[ -f "$fix_plan_file" ]] || return 0 local line line=$(grep -m 1 -E "^[[:space:]]*- \[ \]" "$fix_plan_file" 2>/dev/null || true) # Trim leading whitespace line="${line#"${line%%[![:space:]]*}"}" # Trim trailing whitespace line="${line%"${line##*[![:space:]]}"}" printf '%s' "${line:0:100}" } # Collapse completed story detail lines in @fix_plan.md. # For each [x]/[X] story line, strips subsequent indented blockquote lines ( > ...). # Incomplete stories keep their detail lines intact. # Args: $1 = path to @fix_plan.md (modifies in place via atomic write) collapse_completed_stories() { local fix_plan_file="${1:-$RALPH_DIR/@fix_plan.md}" [[ -f "$fix_plan_file" ]] || return 0 local tmp_file="${fix_plan_file}.collapse_tmp" local skipping=false while IFS= read -r line || [[ -n "$line" ]]; do if [[ "$line" =~ ^[[:space:]]*-[[:space:]]*\[[xX]\][[:space:]]*Story[[:space:]]+[0-9] ]]; then skipping=true printf '%s\n' "$line" continue fi if $skipping && [[ "$line" =~ ^[[:space:]]+\> ]]; then continue fi skipping=false printf '%s\n' "$line" done < "$fix_plan_file" > "$tmp_file" mv "$tmp_file" "$fix_plan_file" } enforce_fix_plan_progress_tracking() { local analysis_file=$1 local completed_before=$2 local completed_after=$3 if [[ ! -f "$analysis_file" ]]; then return 0 fi local claimed_tasks claimed_tasks=$(jq -r '.analysis.tasks_completed_this_loop // 0' "$analysis_file" 2>/dev/null || echo "0") if [[ ! "$claimed_tasks" =~ ^-?[0-9]+$ ]]; then claimed_tasks=0 fi local fix_plan_completed_delta=$((completed_after - completed_before)) local has_progress_tracking_mismatch=false if [[ $claimed_tasks -ne $fix_plan_completed_delta || $claimed_tasks -gt 1 || $fix_plan_completed_delta -gt 1 || $fix_plan_completed_delta -lt 0 ]]; then has_progress_tracking_mismatch=true fi local tmp_file="$analysis_file.tmp" if jq \ --argjson claimed_tasks "$claimed_tasks" \ --argjson fix_plan_completed_delta "$fix_plan_completed_delta" \ --argjson has_progress_tracking_mismatch "$has_progress_tracking_mismatch" \ ' (.analysis //= {}) | .analysis.tasks_completed_this_loop = $claimed_tasks | .analysis.fix_plan_completed_delta = $fix_plan_completed_delta | .analysis.has_progress_tracking_mismatch = $has_progress_tracking_mismatch | if $has_progress_tracking_mismatch then .analysis.has_completion_signal = false | .analysis.exit_signal = false else . end ' "$analysis_file" > "$tmp_file" 2>/dev/null; then mv "$tmp_file" "$analysis_file" else rm -f "$tmp_file" 2>/dev/null return 0 fi if [[ "$has_progress_tracking_mismatch" == "true" ]]; then log_status "WARN" "Progress tracking mismatch: claimed $claimed_tasks completed task(s) but checkbox delta was $fix_plan_completed_delta. Completion signals suppressed for this loop." fi return 0 } # Run the built-in test gate # Reads tests_status from .response_analysis. If FAILING and TEST_COMMAND is set, # runs the command to verify. Returns JSON with status/verified/output on stdout. run_test_gate() { local analysis_file=$1 if [[ ! -f "$analysis_file" ]]; then echo '{"status":"skip","tests_status_reported":"","verified":false,"output":""}' return 0 fi local tests_status tests_status=$(jq -r '.analysis.tests_status // "UNKNOWN"' "$analysis_file" 2>/dev/null || echo "UNKNOWN") if [[ "$tests_status" == "UNKNOWN" && -z "$TEST_COMMAND" ]]; then echo '{"status":"skip","tests_status_reported":"UNKNOWN","verified":false,"output":""}' return 0 fi if [[ "$tests_status" == "PASSING" && -z "$TEST_COMMAND" ]]; then jq -n --arg ts "$tests_status" '{"status":"pass","tests_status_reported":$ts,"verified":false,"output":""}' return 0 fi if [[ -n "$TEST_COMMAND" ]]; then local cmd_output="" local cmd_exit=0 cmd_output=$(portable_timeout "${QUALITY_GATE_TIMEOUT}s" bash -c "$TEST_COMMAND" 2>&1) || cmd_exit=$? cmd_output="${cmd_output:0:500}" local verified_status="pass" if [[ $cmd_exit -ne 0 ]]; then verified_status="fail" fi jq -n \ --arg status "$verified_status" \ --arg ts "$tests_status" \ --arg out "$cmd_output" \ '{"status":$status,"tests_status_reported":$ts,"verified":true,"output":$out}' return 0 fi # No TEST_COMMAND, trust the reported status local gate_status="pass" if [[ "$tests_status" == "FAILING" ]]; then gate_status="fail" fi jq -n \ --arg status "$gate_status" \ --arg ts "$tests_status" \ '{"status":$status,"tests_status_reported":$ts,"verified":false,"output":""}' } # Run user-defined quality gate commands # Splits QUALITY_GATES on semicolons, runs each with portable_timeout. # Returns JSON array of results on stdout. run_custom_gates() { if [[ -z "$QUALITY_GATES" ]]; then echo "[]" return 0 fi local results="[]" local gates IFS=";" read -ra gates <<< "$QUALITY_GATES" for gate_cmd in "${gates[@]}"; do gate_cmd=$(echo "$gate_cmd" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//') [[ -z "$gate_cmd" ]] && continue local cmd_output="" local cmd_exit=0 local start_time start_time=$(date +%s) local timed_out="false" cmd_output=$(portable_timeout "${QUALITY_GATE_TIMEOUT}s" bash -c "$gate_cmd" 2>&1) || cmd_exit=$? local end_time end_time=$(date +%s) local duration=$((end_time - start_time)) # portable_timeout returns 124 on timeout if [[ $cmd_exit -eq 124 ]]; then timed_out="true" fi cmd_output="${cmd_output:0:500}" local gate_status="pass" if [[ $cmd_exit -ne 0 ]]; then gate_status="fail" fi results=$(echo "$results" | jq \ --arg cmd "$gate_cmd" \ --arg status "$gate_status" \ --argjson exit_code "$cmd_exit" \ --arg out "$cmd_output" \ --argjson dur "$duration" \ --argjson timed_out "$timed_out" \ '. += [{"command":$cmd,"status":$status,"exit_code":$exit_code,"output":$out,"duration_seconds":$dur,"timed_out":$timed_out}]' ) done echo "$results" } # Orchestrator: run all quality gates and write results file # Args: loop_number exit_signal_active # Returns (on stdout): 0=pass/warn, 1=block failure, 2=circuit-breaker failure run_quality_gates() { local loop_number=$1 local exit_signal_active=${2:-"false"} # Skip if no gates configured if [[ -z "$TEST_COMMAND" && -z "$QUALITY_GATES" ]]; then echo "0" return 0 fi # Skip if completion-only mode and not completing if [[ "$QUALITY_GATE_ON_COMPLETION_ONLY" == "true" && "$exit_signal_active" != "true" ]]; then echo "0" return 0 fi local test_gate_json test_gate_json=$(run_test_gate "$RESPONSE_ANALYSIS_FILE") local custom_gates_json custom_gates_json=$(run_custom_gates) # Determine overall status local overall_status="pass" local test_gate_status test_gate_status=$(echo "$test_gate_json" | jq -r '.status' 2>/dev/null || echo "skip") if [[ "$test_gate_status" == "fail" ]]; then overall_status="fail" fi local custom_fail_count custom_fail_count=$(echo "$custom_gates_json" | jq '[.[] | select(.status == "fail")] | length' 2>/dev/null || echo "0") if [[ $custom_fail_count -gt 0 ]]; then overall_status="fail" fi # Write results file atomically (tmp+mv to avoid truncation on jq failure) local qg_tmp="$QUALITY_GATE_RESULTS_FILE.tmp" if jq -n \ --arg timestamp "$(get_iso_timestamp)" \ --argjson loop_number "$loop_number" \ --argjson test_gate "$test_gate_json" \ --argjson custom_gates "$custom_gates_json" \ --arg overall_status "$overall_status" \ --arg mode "$QUALITY_GATE_MODE" \ '{ timestamp: $timestamp, loop_number: $loop_number, test_gate: $test_gate, custom_gates: $custom_gates, overall_status: $overall_status, mode: $mode }' > "$qg_tmp" 2>/dev/null; then mv "$qg_tmp" "$QUALITY_GATE_RESULTS_FILE" else rm -f "$qg_tmp" 2>/dev/null fi if [[ "$overall_status" == "fail" ]]; then log_status "WARN" "Quality gate failure (mode=$QUALITY_GATE_MODE): test_gate=$test_gate_status, custom_failures=$custom_fail_count" fi # Return code based on mode if [[ "$overall_status" == "pass" ]]; then echo "0" return 0 fi case "$QUALITY_GATE_MODE" in block) echo "1" return 0 ;; circuit-breaker) echo "2" return 0 ;; *) # warn mode: return 0 even on failure echo "0" return 0 ;; esac } # Check if we should gracefully exit should_exit_gracefully() { if [[ ! -f "$EXIT_SIGNALS_FILE" ]]; then return 1 # Don't exit, file doesn't exist fi local signals=$(cat "$EXIT_SIGNALS_FILE") # Count recent signals (last 5 loops) - with error handling local recent_test_loops local recent_done_signals local recent_completion_indicators recent_test_loops=$(echo "$signals" | jq '.test_only_loops | length' 2>/dev/null || echo "0") recent_done_signals=$(echo "$signals" | jq '.done_signals | length' 2>/dev/null || echo "0") recent_completion_indicators=$(echo "$signals" | jq '.completion_indicators | length' 2>/dev/null || echo "0") # Check for exit conditions # 1. Too many consecutive test-only loops if [[ $recent_test_loops -ge $MAX_CONSECUTIVE_TEST_LOOPS ]]; then log_status "WARN" "Exit condition: Too many test-focused loops ($recent_test_loops >= $MAX_CONSECUTIVE_TEST_LOOPS)" echo "test_saturation" return 0 fi # 2. Multiple "done" signals if [[ $recent_done_signals -ge $MAX_CONSECUTIVE_DONE_SIGNALS ]]; then log_status "WARN" "Exit condition: Multiple completion signals ($recent_done_signals >= $MAX_CONSECUTIVE_DONE_SIGNALS)" echo "completion_signals" return 0 fi # 3. Safety circuit breaker - force exit after 5 consecutive EXIT_SIGNAL=true responses # Note: completion_indicators only accumulates when Claude explicitly sets EXIT_SIGNAL=true # (not based on confidence score). This safety breaker catches cases where Claude signals # completion 5+ times but the normal exit path (completion_indicators >= 2 + EXIT_SIGNAL=true) # didn't trigger for some reason. Threshold of 5 prevents API waste while being higher than # the normal threshold (2) to avoid false positives. if [[ $recent_completion_indicators -ge 5 ]]; then log_status "WARN" "🚨 SAFETY CIRCUIT BREAKER: Force exit after 5 consecutive EXIT_SIGNAL=true responses ($recent_completion_indicators)" >&2 echo "safety_circuit_breaker" return 0 fi # 4. Strong completion indicators (only if Claude's EXIT_SIGNAL is true) # This prevents premature exits when heuristics detect completion patterns # but Claude explicitly indicates work is still in progress via RALPH_STATUS block. # The exit_signal in .response_analysis represents Claude's explicit intent. local claude_exit_signal="false" if [[ -f "$RESPONSE_ANALYSIS_FILE" ]]; then claude_exit_signal=$(jq -r '.analysis.exit_signal // false' "$RESPONSE_ANALYSIS_FILE" 2>/dev/null || echo "false") fi if [[ $recent_completion_indicators -ge 2 ]] && [[ "$claude_exit_signal" == "true" ]]; then log_status "WARN" "Exit condition: Strong completion indicators ($recent_completion_indicators) with EXIT_SIGNAL=true" >&2 echo "project_complete" return 0 fi # 5. Check @fix_plan.md for completion # Fix #144: Only match valid markdown checkboxes, not date entries like [2026-01-29] # Valid patterns: "- [ ]" (uncompleted) and "- [x]" or "- [X]" (completed) if [[ -f "$RALPH_DIR/@fix_plan.md" ]]; then local completed_items=0 local uncompleted_items=0 local total_items=0 read -r completed_items uncompleted_items total_items < <(count_fix_plan_checkboxes "$RALPH_DIR/@fix_plan.md") if [[ $total_items -gt 0 ]] && [[ $completed_items -eq $total_items ]]; then log_status "WARN" "Exit condition: All @fix_plan.md items completed ($completed_items/$total_items)" >&2 echo "plan_complete" return 0 fi fi echo "" # Return empty string instead of using return code } # ============================================================================= # MODERN CLI HELPER FUNCTIONS (Phase 1.1) # ============================================================================= # Check Claude CLI version for compatibility with modern flags check_claude_version() { local version=$($CLAUDE_CODE_CMD --version 2>/dev/null | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' | head -1) if [[ -z "$version" ]]; then log_status "WARN" "Cannot detect Claude CLI version, assuming compatible" return 0 fi # Compare versions (simplified semver comparison) local required="$CLAUDE_MIN_VERSION" # Convert to comparable integers (major * 10000 + minor * 100 + patch) local ver_parts=(${version//./ }) local req_parts=(${required//./ }) local ver_num=$((${ver_parts[0]:-0} * 10000 + ${ver_parts[1]:-0} * 100 + ${ver_parts[2]:-0})) local req_num=$((${req_parts[0]:-0} * 10000 + ${req_parts[1]:-0} * 100 + ${req_parts[2]:-0})) if [[ $ver_num -lt $req_num ]]; then log_status "WARN" "Claude CLI version $version < $required. Some modern features may not work." log_status "WARN" "Consider upgrading: npm update -g @anthropic-ai/claude-code" return 1 fi log_status "INFO" "Claude CLI version $version (>= $required) - modern features enabled" return 0 } # Validate allowed tools against whitelist # Returns 0 if valid, 1 if invalid with error message validate_allowed_tools() { local tools_input=$1 if [[ -z "$tools_input" ]]; then return 0 # Empty is valid (uses defaults) fi # Split by comma local IFS=',' read -ra tools <<< "$tools_input" for tool in "${tools[@]}"; do # Trim whitespace tool=$(echo "$tool" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//') if [[ -z "$tool" ]]; then continue fi local valid=false # Check against valid patterns for pattern in "${VALID_TOOL_PATTERNS[@]}"; do if [[ "$tool" == "$pattern" ]]; then valid=true break fi # Check for Bash(*) pattern - any Bash with parentheses is allowed if [[ "$tool" =~ ^Bash\(.+\)$ ]]; then valid=true break fi done if [[ "$valid" == "false" ]]; then echo "Error: Invalid tool in --allowed-tools: '$tool'" echo "Valid tools: ${VALID_TOOL_PATTERNS[*]}" echo "Note: Bash(...) patterns with any content are allowed (e.g., 'Bash(git *)')" return 1 fi done return 0 } # Build loop context for Claude Code session # Provides loop-specific context via --append-system-prompt build_loop_context() { local loop_count=$1 local session_id="${2:-}" local context="" # Add loop number context="Loop #${loop_count}. " # Signal session continuity when resuming a valid session if [[ -n "$session_id" ]]; then context+="Session continued — do NOT re-read spec files. Resume implementation. " fi # Extract incomplete tasks from @fix_plan.md # Bug #3 Fix: Support indented markdown checkboxes with [[:space:]]* pattern if [[ -f "$RALPH_DIR/@fix_plan.md" ]]; then local completed_tasks=0 local incomplete_tasks=0 local total_tasks=0 read -r completed_tasks incomplete_tasks total_tasks < <(count_fix_plan_checkboxes "$RALPH_DIR/@fix_plan.md") context+="Remaining tasks: ${incomplete_tasks}. " # Inject the next unchecked task to give the AI a clear directive local next_task next_task=$(extract_next_fix_plan_task "$RALPH_DIR/@fix_plan.md") if [[ -n "$next_task" ]]; then context+="Next: ${next_task}. " fi fi # Add circuit breaker state if [[ -f "$RALPH_DIR/.circuit_breaker_state" ]]; then local cb_state=$(jq -r '.state // "UNKNOWN"' "$RALPH_DIR/.circuit_breaker_state" 2>/dev/null) if [[ "$cb_state" != "CLOSED" && "$cb_state" != "null" && -n "$cb_state" ]]; then context+="Circuit breaker: ${cb_state}. " fi fi # Add previous loop summary (truncated) if [[ -f "$RESPONSE_ANALYSIS_FILE" ]]; then local prev_summary=$(jq -r '.analysis.work_summary // ""' "$RESPONSE_ANALYSIS_FILE" 2>/dev/null | head -c 200) if [[ -n "$prev_summary" && "$prev_summary" != "null" ]]; then context+="Previous: ${prev_summary}. " fi fi # Add quality gate failure feedback (block and circuit-breaker modes only) if [[ -f "$QUALITY_GATE_RESULTS_FILE" ]]; then local qg_status qg_mode qg_status=$(jq -r '.overall_status // "pass"' "$QUALITY_GATE_RESULTS_FILE" 2>/dev/null) qg_mode=$(jq -r '.mode // "warn"' "$QUALITY_GATE_RESULTS_FILE" 2>/dev/null) if [[ "$qg_status" == "fail" && "$qg_mode" != "warn" ]]; then local test_gate_status test_gate_status=$(jq -r '.test_gate.status // "skip"' "$QUALITY_GATE_RESULTS_FILE" 2>/dev/null) if [[ "$test_gate_status" == "fail" ]]; then context+="TESTS FAILING. " fi local failed_gates failed_gates=$(jq -r '[.custom_gates[] | select(.status == "fail") | .command | split(" ")[0:2] | join(" ")] | join(", ")' "$QUALITY_GATE_RESULTS_FILE" 2>/dev/null) if [[ -n "$failed_gates" ]]; then context+="QG fail: ${failed_gates}. " fi fi fi # Add git diff summary from previous loop (last segment — truncated first if over budget) if [[ -f "$RALPH_DIR/.loop_diff_summary" ]]; then local diff_summary diff_summary=$(head -c 150 "$RALPH_DIR/.loop_diff_summary" 2>/dev/null) if [[ -n "$diff_summary" ]]; then context+="${diff_summary}. " fi fi # Limit total length to ~500 chars echo "${context:0:500}" } # Capture a compact git diff summary after each loop iteration. # Writes to $RALPH_DIR/.loop_diff_summary for the next loop's build_loop_context(). # Args: $1 = loop_start_sha (git HEAD at loop start) capture_loop_diff_summary() { local loop_start_sha="${1:-}" local summary_file="$RALPH_DIR/.loop_diff_summary" # Clear previous summary rm -f "$summary_file" # Require git and a valid repo if ! command -v git &>/dev/null || ! git rev-parse --git-dir &>/dev/null 2>&1; then return 0 fi local current_sha current_sha=$(git rev-parse HEAD 2>/dev/null || echo "") local numstat_output="" if [[ -n "$loop_start_sha" && -n "$current_sha" && "$loop_start_sha" != "$current_sha" ]]; then # Commits exist: union of committed + working tree changes, deduplicated by filename numstat_output=$( { git diff --numstat "$loop_start_sha" HEAD 2>/dev/null git diff --numstat HEAD 2>/dev/null git diff --numstat --cached 2>/dev/null } | awk -F'\t' '!seen[$3]++' ) else # No commits: staged + unstaged only numstat_output=$( { git diff --numstat 2>/dev/null git diff --numstat --cached 2>/dev/null } | awk -F'\t' '!seen[$3]++' ) fi [[ -z "$numstat_output" ]] && return 0 # Format: Changed: file (+add/-del), file2 (+add/-del) # Skip binary files (numstat shows - - for binary) # Use tab separator — numstat output is tab-delimited (handles filenames with spaces) local formatted formatted=$(echo "$numstat_output" | awk -F'\t' ' $1 != "-" { if (n++) printf ", " printf "%s (+%s/-%s)", $3, $1, $2 } ') [[ -z "$formatted" ]] && return 0 local result="Changed: ${formatted}" # Self-truncate to ~150 chars (144 content + "...") if [[ ${#result} -gt 147 ]]; then result="${result:0:144}..." fi echo "$result" > "$summary_file" } # Check if a code review should run this iteration # Returns 0 (true) when review is due, 1 (false) otherwise # Args: $1 = loop_count, $2 = fix_plan_completed_delta (optional, for ultimate mode) should_run_review() { [[ "$REVIEW_MODE" == "off" ]] && return 1 local loop_count=$1 local fix_plan_delta=${2:-0} # Never review on first loop (no implementation yet) (( loop_count < 1 )) && return 1 # Skip if circuit breaker is not CLOSED if [[ -f "$RALPH_DIR/.circuit_breaker_state" ]]; then local cb_state cb_state=$(jq -r '.state // "CLOSED"' "$RALPH_DIR/.circuit_breaker_state" 2>/dev/null) [[ "$cb_state" != "CLOSED" ]] && return 1 fi # Mode-specific trigger case "$REVIEW_MODE" in enhanced) (( loop_count % REVIEW_INTERVAL != 0 )) && return 1 ;; ultimate) (( fix_plan_delta < 1 )) && return 1 ;; *) # Unknown mode — treat as off return 1 ;; esac # Skip if no changes since last review (committed or uncommitted) if command -v git &>/dev/null && git rev-parse --git-dir &>/dev/null 2>&1; then local current_sha last_sha current_sha=$(git rev-parse HEAD 2>/dev/null || echo "unknown") last_sha="" [[ -f "$REVIEW_LAST_SHA_FILE" ]] && last_sha=$(cat "$REVIEW_LAST_SHA_FILE" 2>/dev/null) local has_uncommitted has_uncommitted=$(git status --porcelain 2>/dev/null | head -1) if [[ "$current_sha" == "$last_sha" && -z "$has_uncommitted" ]]; then return 1 fi fi return 0 } # Build review findings context for injection into the next implementation loop # Returns a compact string (max 500-700 chars) with unresolved findings # HIGH/CRITICAL findings get a PRIORITY prefix and a higher char cap (700) build_review_context() { if [[ ! -f "$REVIEW_FINDINGS_FILE" ]]; then echo "" return fi local severity issues_found summary severity=$(jq -r '.severity // ""' "$REVIEW_FINDINGS_FILE" 2>/dev/null) issues_found=$(jq -r '.issues_found // 0' "$REVIEW_FINDINGS_FILE" 2>/dev/null) summary=$(jq -r '.summary // ""' "$REVIEW_FINDINGS_FILE" 2>/dev/null | head -c 300) if [[ "$issues_found" == "0" || -z "$severity" || "$severity" == "null" ]]; then echo "" return fi # HIGH/CRITICAL findings: instruct the AI to fix them before picking a new story local context="" local max_len=500 if [[ "$severity" == "HIGH" || "$severity" == "CRITICAL" ]]; then context="PRIORITY: Fix these code review findings BEFORE picking a new story. " max_len=700 fi context+="REVIEW FINDINGS ($severity, $issues_found issues): $summary" # Include top details if space allows local top_details top_details=$(jq -r '(.details[:2] // []) | map("- [\(.severity)] \(.file): \(.issue)") | join("; ")' "$REVIEW_FINDINGS_FILE" 2>/dev/null | head -c 150) if [[ -n "$top_details" && "$top_details" != "null" ]]; then context+=" Details: $top_details" fi echo "${context:0:$max_len}" } # Execute a periodic code review loop (read-only, no file modifications) # Uses a fresh ephemeral session with restricted tool permissions run_review_loop() { local loop_count=$1 log_status "INFO" "Starting periodic code review (loop #$loop_count)" # Get diff context (committed + uncommitted changes) local last_sha="" [[ -f "$REVIEW_LAST_SHA_FILE" ]] && last_sha=$(cat "$REVIEW_LAST_SHA_FILE" 2>/dev/null) local diff_context="" if command -v git &>/dev/null && git rev-parse --git-dir &>/dev/null 2>&1; then local committed_diff="" uncommitted_diff="" if [[ -n "$last_sha" ]]; then committed_diff=$(git diff "$last_sha"..HEAD --stat 2>/dev/null | head -20 || true) else committed_diff=$(git diff HEAD~5..HEAD --stat 2>/dev/null | head -20 || true) fi uncommitted_diff=$(git diff --stat 2>/dev/null | head -10 || true) diff_context="${committed_diff}" if [[ -n "$uncommitted_diff" ]]; then diff_context+=$'\nUncommitted:\n'"${uncommitted_diff}" fi [[ -z "$diff_context" ]] && diff_context="No recent changes" fi # Check review prompt exists if [[ ! -f "$REVIEW_PROMPT_FILE" ]]; then log_status "WARN" "Review prompt file not found: $REVIEW_PROMPT_FILE — skipping review" return 0 fi # Build review-specific context local review_context="CODE REVIEW LOOP (read-only). Analyze changes since last review. Recent changes: $diff_context" # Save and override CLAUDE_ALLOWED_TOOLS for read-only mode local saved_tools="$CLAUDE_ALLOWED_TOOLS" CLAUDE_ALLOWED_TOOLS="Read,Glob,Grep" local timeout_seconds=$((CLAUDE_TIMEOUT_MINUTES * 60)) local review_output_file="$LOG_DIR/review_loop_${loop_count}.log" # Build command with review prompt and NO session resume (ephemeral) if driver_build_command "$REVIEW_PROMPT_FILE" "$review_context" ""; then # Execute review (capture output) portable_timeout "${timeout_seconds}s" "${CLAUDE_CMD_ARGS[@]}" \ < /dev/null > "$review_output_file" 2>&1 || true fi # Restore CLAUDE_ALLOWED_TOOLS CLAUDE_ALLOWED_TOOLS="$saved_tools" # Parse review findings from output if [[ -f "$review_output_file" ]]; then # Review ran successfully — save SHA so we don't re-review the same state git rev-parse HEAD > "$REVIEW_LAST_SHA_FILE" 2>/dev/null || true local findings_json="" # Extract JSON between ---REVIEW_FINDINGS--- and ---END_REVIEW_FINDINGS--- markers findings_json=$(sed -n '/---REVIEW_FINDINGS---/,/---END_REVIEW_FINDINGS---/{//!p;}' "$review_output_file" 2>/dev/null | tr -d '\n' | head -c 5000) # If output is JSON format, try extracting from result field first if [[ -z "$findings_json" ]]; then local raw_text raw_text=$(jq -r '.result // .content // ""' "$review_output_file" 2>/dev/null || cat "$review_output_file" 2>/dev/null) findings_json=$(echo "$raw_text" | sed -n '/---REVIEW_FINDINGS---/,/---END_REVIEW_FINDINGS---/{//!p;}' 2>/dev/null | tr -d '\n' | head -c 5000) fi if [[ -n "$findings_json" ]]; then # Validate it's valid JSON before writing if echo "$findings_json" | jq . > /dev/null 2>&1; then local tmp_findings="$REVIEW_FINDINGS_FILE.tmp" echo "$findings_json" > "$tmp_findings" mv "$tmp_findings" "$REVIEW_FINDINGS_FILE" local issue_count issue_count=$(echo "$findings_json" | jq -r '.issues_found // 0' 2>/dev/null) log_status "INFO" "Code review complete. $issue_count issue(s) found." else log_status "WARN" "Review findings JSON is malformed — skipping" fi else log_status "INFO" "Code review complete. No structured findings extracted." fi fi } # Get session file age in seconds (cross-platform) # Returns: age in seconds on stdout, or -1 if stat fails get_session_file_age_seconds() { local file=$1 if [[ ! -f "$file" ]]; then echo "0" return fi # Get file modification time using capability detection # Handles macOS with Homebrew coreutils where stat flags differ local file_mtime # Try GNU stat first (Linux, macOS with Homebrew coreutils) if file_mtime=$(stat -c %Y "$file" 2>/dev/null) && [[ -n "$file_mtime" && "$file_mtime" =~ ^[0-9]+$ ]]; then : # success # Try BSD stat (native macOS) elif file_mtime=$(stat -f %m "$file" 2>/dev/null) && [[ -n "$file_mtime" && "$file_mtime" =~ ^[0-9]+$ ]]; then : # success # Fallback to date -r (most portable) elif file_mtime=$(date -r "$file" +%s 2>/dev/null) && [[ -n "$file_mtime" && "$file_mtime" =~ ^[0-9]+$ ]]; then : # success else file_mtime="" fi # Handle stat failure - return -1 to indicate error # This prevents false expiration when stat fails if [[ -z "$file_mtime" || "$file_mtime" == "0" ]]; then echo "-1" return fi local current_time current_time=$(date +%s) local age_seconds=$((current_time - file_mtime)) echo "$age_seconds" } # Initialize or resume persisted driver session (with expiration check) # # Session Expiration Strategy: # - Default expiration: 24 hours (configurable via CLAUDE_SESSION_EXPIRY_HOURS) # - 24 hours chosen because: long enough for multi-day projects, short enough # to prevent stale context from causing unpredictable behavior # - Sessions auto-expire to ensure Claude starts fresh periodically # # Returns (stdout): # - Session ID string: when resuming a valid, non-expired session # - Empty string: when starting new session (no file, expired, or stat error) # # Return codes: # - 0: Always returns success (caller should check stdout for session ID) # init_claude_session() { if [[ -f "$CLAUDE_SESSION_FILE" ]]; then # Check session age local age_seconds age_seconds=$(get_session_file_age_seconds "$CLAUDE_SESSION_FILE") # Handle stat failure (-1) - treat as needing new session # Don't expire sessions when we can't determine age if [[ $age_seconds -eq -1 ]]; then log_status "WARN" "Could not determine session age, starting new session" rm -f "$CLAUDE_SESSION_FILE" echo "" return 0 fi local expiry_seconds=$((CLAUDE_SESSION_EXPIRY_HOURS * 3600)) # Check if session has expired if [[ $age_seconds -ge $expiry_seconds ]]; then local age_hours=$((age_seconds / 3600)) log_status "INFO" "Session expired (${age_hours}h old, max ${CLAUDE_SESSION_EXPIRY_HOURS}h), starting new session" rm -f "$CLAUDE_SESSION_FILE" echo "" return 0 fi # Session is valid, try to read it local session_id session_id=$(get_last_session_id) if [[ -n "$session_id" ]]; then local age_hours=$((age_seconds / 3600)) log_status "INFO" "Resuming session: ${session_id:0:20}... (${age_hours}h old)" echo "$session_id" return 0 fi fi log_status "INFO" "Starting new session" echo "" } # Save session ID after successful execution save_claude_session() { local output_file=$1 # Try to extract session ID from structured output if [[ -f "$output_file" ]]; then local session_id if declare -F driver_extract_session_id_from_output >/dev/null; then session_id=$(driver_extract_session_id_from_output "$output_file" 2>/dev/null || echo "") fi if [[ -z "$session_id" || "$session_id" == "null" ]]; then session_id=$(extract_session_id_from_output "$output_file" 2>/dev/null || echo "") fi if [[ -z "$session_id" || "$session_id" == "null" ]] && declare -F driver_fallback_session_id >/dev/null; then session_id=$(driver_fallback_session_id "$output_file" 2>/dev/null || echo "") fi if [[ -n "$session_id" && "$session_id" != "null" ]]; then echo "$session_id" > "$CLAUDE_SESSION_FILE" sync_ralph_session_with_driver "$session_id" log_status "INFO" "Saved session: ${session_id:0:20}..." fi fi } # ============================================================================= # SESSION LIFECYCLE MANAGEMENT FUNCTIONS (Phase 1.2) # ============================================================================= write_active_ralph_session() { local session_id=$1 local created_at=$2 local last_used=${3:-$created_at} jq -n \ --arg session_id "$session_id" \ --arg created_at "$created_at" \ --arg last_used "$last_used" \ '{ session_id: $session_id, created_at: $created_at, last_used: $last_used }' > "$RALPH_SESSION_FILE" } write_inactive_ralph_session() { local reset_at=$1 local reset_reason=$2 jq -n \ --arg session_id "" \ --arg reset_at "$reset_at" \ --arg reset_reason "$reset_reason" \ '{ session_id: $session_id, reset_at: $reset_at, reset_reason: $reset_reason }' > "$RALPH_SESSION_FILE" } get_ralph_session_state() { if [[ ! -f "$RALPH_SESSION_FILE" ]]; then echo "missing" return 0 fi if ! jq empty "$RALPH_SESSION_FILE" 2>/dev/null; then echo "invalid" return 0 fi local session_id_type session_id_type=$( jq -r 'if has("session_id") then (.session_id | type) else "missing" end' \ "$RALPH_SESSION_FILE" 2>/dev/null ) || { echo "invalid" return 0 } if [[ "$session_id_type" != "string" ]]; then echo "invalid" return 0 fi local session_id session_id=$(jq -r '.session_id' "$RALPH_SESSION_FILE" 2>/dev/null) || { echo "invalid" return 0 } if [[ "$session_id" == "" ]]; then echo "inactive" return 0 fi local created_at_type created_at_type=$( jq -r 'if has("created_at") then (.created_at | type) else "missing" end' \ "$RALPH_SESSION_FILE" 2>/dev/null ) || { echo "invalid" return 0 } if [[ "$created_at_type" != "string" ]]; then echo "invalid" return 0 fi local created_at created_at=$(jq -r '.created_at' "$RALPH_SESSION_FILE" 2>/dev/null) || { echo "invalid" return 0 } if ! is_usable_ralph_session_created_at "$created_at"; then echo "invalid" return 0 fi echo "active" } # Get current session ID from Ralph session file # Returns: session ID string or empty if not found get_session_id() { if [[ ! -f "$RALPH_SESSION_FILE" ]]; then echo "" return 0 fi # Extract session_id from JSON file (SC2155: separate declare from assign) local session_id session_id=$(jq -r '.session_id // ""' "$RALPH_SESSION_FILE" 2>/dev/null) local jq_status=$? # Handle jq failure or null/empty results if [[ $jq_status -ne 0 || -z "$session_id" || "$session_id" == "null" ]]; then session_id="" fi echo "$session_id" return 0 } is_usable_ralph_session_created_at() { local created_at=$1 if [[ -z "$created_at" || "$created_at" == "null" ]]; then return 1 fi local created_at_epoch created_at_epoch=$(parse_iso_to_epoch_strict "$created_at") || return 1 local now_epoch now_epoch=$(get_epoch_seconds) [[ "$created_at_epoch" -le "$now_epoch" ]] } get_active_session_created_at() { if [[ "$(get_ralph_session_state)" != "active" ]]; then echo "" return 0 fi local created_at created_at=$(jq -r '.created_at // ""' "$RALPH_SESSION_FILE" 2>/dev/null) if [[ "$created_at" == "null" ]]; then created_at="" fi if ! is_usable_ralph_session_created_at "$created_at"; then echo "" return 0 fi echo "$created_at" } sync_ralph_session_with_driver() { local driver_session_id=$1 if [[ -z "$driver_session_id" || "$driver_session_id" == "null" ]]; then return 0 fi local ts ts=$(get_iso_timestamp) if [[ "$(get_ralph_session_state)" == "active" ]]; then local current_session_id current_session_id=$(get_session_id) local current_created_at current_created_at=$(get_active_session_created_at) if [[ "$current_session_id" == "$driver_session_id" && -n "$current_created_at" ]]; then write_active_ralph_session "$driver_session_id" "$current_created_at" "$ts" return 0 fi fi write_active_ralph_session "$driver_session_id" "$ts" "$ts" } # Reset session with reason logging # Usage: reset_session "reason_for_reset" reset_session() { local reason=${1:-"manual_reset"} # Get current timestamp local reset_timestamp reset_timestamp=$(get_iso_timestamp) write_inactive_ralph_session "$reset_timestamp" "$reason" # Also clear the Claude session file for consistency rm -f "$CLAUDE_SESSION_FILE" 2>/dev/null # Clear exit signals to prevent stale completion indicators from causing premature exit (issue #91) # This ensures a fresh start without leftover state from previous sessions if [[ -f "$EXIT_SIGNALS_FILE" ]]; then echo '{"test_only_loops": [], "done_signals": [], "completion_indicators": []}' > "$EXIT_SIGNALS_FILE" [[ "${VERBOSE_PROGRESS:-}" == "true" ]] && log_status "INFO" "Cleared exit signals file" fi # Clear response analysis to prevent stale EXIT_SIGNAL from previous session rm -f "$RESPONSE_ANALYSIS_FILE" 2>/dev/null # Log the session transition (non-fatal to prevent script exit under set -e) log_session_transition "active" "reset" "$reason" "${loop_count:-0}" || true log_status "INFO" "Session reset: $reason" } # Log session state transitions to history file # Usage: log_session_transition from_state to_state reason loop_number log_session_transition() { local from_state=$1 local to_state=$2 local reason=$3 local loop_number=${4:-0} # Get timestamp once (SC2155: separate declare from assign) local ts ts=$(get_iso_timestamp) # Create transition entry using jq for safe JSON (SC2155: separate declare from assign) local transition transition=$(jq -n -c \ --arg timestamp "$ts" \ --arg from_state "$from_state" \ --arg to_state "$to_state" \ --arg reason "$reason" \ --argjson loop_number "$loop_number" \ '{ timestamp: $timestamp, from_state: $from_state, to_state: $to_state, reason: $reason, loop_number: $loop_number }') # Read history file defensively - fallback to empty array on any failure local history if [[ -f "$RALPH_SESSION_HISTORY_FILE" ]]; then history=$(cat "$RALPH_SESSION_HISTORY_FILE" 2>/dev/null) # Validate JSON, fallback to empty array if corrupted if ! echo "$history" | jq empty 2>/dev/null; then history='[]' fi else history='[]' fi # Append transition and keep only last 50 entries local updated_history updated_history=$(echo "$history" | jq ". += [$transition] | .[-50:]" 2>/dev/null) local jq_status=$? # Only write if jq succeeded if [[ $jq_status -eq 0 && -n "$updated_history" ]]; then echo "$updated_history" > "$RALPH_SESSION_HISTORY_FILE" else # Fallback: start fresh with just this transition echo "[$transition]" > "$RALPH_SESSION_HISTORY_FILE" fi } # Generate a unique session ID using timestamp and random component generate_session_id() { local ts ts=$(date +%s) local rand rand=$RANDOM echo "ralph-${ts}-${rand}" } # Initialize session tracking (called at loop start) init_session_tracking() { local ts ts=$(get_iso_timestamp) local session_state session_state=$(get_ralph_session_state) if [[ "$session_state" == "active" ]]; then return 0 fi if [[ "$session_state" == "invalid" ]]; then log_status "WARN" "Corrupted session file detected, recreating..." fi local new_session_id new_session_id=$(generate_session_id) write_active_ralph_session "$new_session_id" "$ts" "$ts" log_status "INFO" "Initialized session tracking (session: $new_session_id)" } # Update last_used timestamp in session file (called on each loop iteration) update_session_last_used() { if [[ "$(get_ralph_session_state)" != "active" ]]; then return 0 fi local ts ts=$(get_iso_timestamp) local session_id session_id=$(get_session_id) local created_at created_at=$(get_active_session_created_at) if [[ -n "$session_id" && -n "$created_at" ]]; then write_active_ralph_session "$session_id" "$created_at" "$ts" fi } # Global array for Claude command arguments (avoids shell injection) declare -a CLAUDE_CMD_ARGS=() declare -a LIVE_CMD_ARGS=() # Build CLI command with platform driver (shell-injection safe) # Delegates to the active platform driver's driver_build_command() # Populates global CLAUDE_CMD_ARGS array for direct execution build_claude_command() { driver_build_command "$@" } supports_driver_sessions() { if declare -F driver_supports_sessions >/dev/null; then driver_supports_sessions return $? fi return 0 } supports_live_output() { if declare -F driver_supports_live_output >/dev/null; then driver_supports_live_output return $? fi return 0 } prepare_live_command_args() { LIVE_CMD_ARGS=("${CLAUDE_CMD_ARGS[@]}") if declare -F driver_prepare_live_command >/dev/null; then driver_prepare_live_command return $? fi return 0 } get_live_stream_filter() { if declare -F driver_stream_filter >/dev/null; then driver_stream_filter return 0 fi echo "empty" return 1 } # Main execution function execute_claude_code() { local timestamp=$(date '+%Y-%m-%d_%H-%M-%S') local output_file="$LOG_DIR/claude_output_${timestamp}.log" local stderr_file="$LOG_DIR/claude_stderr_${timestamp}.log" local loop_count=$1 local calls_made=$(cat "$CALL_COUNT_FILE" 2>/dev/null || echo "0") calls_made=$((calls_made + 1)) local fix_plan_completed_before=0 read -r fix_plan_completed_before _ _ < <(count_fix_plan_checkboxes "$RALPH_DIR/@fix_plan.md") # Clear previous diff summary to prevent stale context on early exit (#117) rm -f "$RALPH_DIR/.loop_diff_summary" # Fix #141: Capture git HEAD SHA at loop start to detect commits as progress # Store in file for access by progress detection after Claude execution local loop_start_sha="" if command -v git &>/dev/null && git rev-parse --git-dir &>/dev/null 2>&1; then loop_start_sha=$(git rev-parse HEAD 2>/dev/null || echo "") fi echo "$loop_start_sha" > "$RALPH_DIR/.loop_start_sha" log_status "LOOP" "Executing $DRIVER_DISPLAY_NAME (Call $calls_made/$MAX_CALLS_PER_HOUR)" local timeout_seconds=$((CLAUDE_TIMEOUT_MINUTES * 60)) log_status "INFO" "⏳ Starting $DRIVER_DISPLAY_NAME execution... (timeout: ${CLAUDE_TIMEOUT_MINUTES}m)" # Initialize or resume session (must happen before build_loop_context # so the session_id can gate the "session continued" signal) local session_id="" if [[ "$CLAUDE_USE_CONTINUE" == "true" ]] && supports_driver_sessions; then session_id=$(init_claude_session) fi # Build loop context for session continuity local loop_context="" if [[ "$CLAUDE_USE_CONTINUE" == "true" ]]; then loop_context=$(build_loop_context "$loop_count" "$session_id") if [[ -n "$loop_context" && "$VERBOSE_PROGRESS" == "true" ]]; then log_status "INFO" "Loop context: $loop_context" fi fi # Live mode requires JSON output (stream-json) — override text format if [[ "$LIVE_OUTPUT" == "true" && "$CLAUDE_OUTPUT_FORMAT" == "text" ]]; then log_status "WARN" "Live mode requires JSON output format. Overriding text → json for this session." CLAUDE_OUTPUT_FORMAT="json" fi # Build the Claude CLI command with modern flags local use_modern_cli=false if build_claude_command "$PROMPT_FILE" "$loop_context" "$session_id"; then use_modern_cli=true log_status "INFO" "Using modern CLI mode (${CLAUDE_OUTPUT_FORMAT} output)" # Build review findings context (separate from loop context) local review_context="" review_context=$(build_review_context) if [[ -n "$review_context" ]]; then CLAUDE_CMD_ARGS+=("--append-system-prompt" "$review_context") fi else log_status "WARN" "Failed to build modern CLI command, falling back to legacy mode" if [[ "$LIVE_OUTPUT" == "true" ]]; then log_status "ERROR" "Live mode requires a built Claude command. Falling back to background mode." LIVE_OUTPUT=false fi fi # Execute Claude Code local exit_code=0 # Initialize live.log for this execution echo -e "\n\n=== Loop #$loop_count - $(date '+%Y-%m-%d %H:%M:%S') ===" > "$LIVE_LOG_FILE" if [[ "$LIVE_OUTPUT" == "true" ]]; then # LIVE MODE: Show streaming output in real-time using stream-json + jq # Based on: https://www.ytyng.com/en/blog/claude-stream-json-jq/ # # Uses CLAUDE_CMD_ARGS from build_claude_command() to preserve: # - --allowedTools (tool permissions) # - --append-system-prompt (loop context) # - --continue (session continuity) # - -p (prompt content) if ! supports_live_output; then log_status "WARN" "$DRIVER_DISPLAY_NAME does not support structured live streaming. Falling back to background mode." LIVE_OUTPUT=false fi # Check dependencies for live mode if [[ "$LIVE_OUTPUT" == "true" ]] && ! command -v jq &> /dev/null; then log_status "ERROR" "Live mode requires 'jq' but it's not installed. Falling back to background mode." LIVE_OUTPUT=false elif [[ "$LIVE_OUTPUT" == "true" ]] && ! command -v stdbuf &> /dev/null; then log_status "ERROR" "Live mode requires 'stdbuf' (from coreutils) but it's not installed. Falling back to background mode." LIVE_OUTPUT=false fi fi if [[ "$LIVE_OUTPUT" == "true" ]]; then # Safety check: live mode requires a successfully built modern command if [[ "$use_modern_cli" != "true" || ${#CLAUDE_CMD_ARGS[@]} -eq 0 ]]; then log_status "ERROR" "Live mode requires a built Claude command. Falling back to background mode." LIVE_OUTPUT=false fi fi if [[ "$LIVE_OUTPUT" == "true" ]]; then log_status "INFO" "📺 Live output mode enabled - showing $DRIVER_DISPLAY_NAME streaming..." echo -e "${PURPLE}━━━━━━━━━━━━━━━━ ${DRIVER_DISPLAY_NAME} Output ━━━━━━━━━━━━━━━━${NC}" if ! prepare_live_command_args; then log_status "ERROR" "Failed to prepare live streaming command. Falling back to background mode." LIVE_OUTPUT=false fi fi if [[ "$LIVE_OUTPUT" == "true" ]]; then local jq_filter jq_filter=$(get_live_stream_filter) # Execute with streaming, preserving all flags from build_claude_command() # Use stdbuf to disable buffering for real-time output # Use portable_timeout for consistent timeout protection (Issue: missing timeout) # Capture all pipeline exit codes for proper error handling # stdin must be redirected from /dev/null because newer Claude CLI versions # read from stdin even in -p (print) mode, causing the process to hang set -o pipefail portable_timeout ${timeout_seconds}s stdbuf -oL "${LIVE_CMD_ARGS[@]}" \ < /dev/null 2>"$stderr_file" | stdbuf -oL tee "$output_file" | stdbuf -oL jq --unbuffered -j "$jq_filter" 2>/dev/null | tee "$LIVE_LOG_FILE" # Capture exit codes from pipeline local -a pipe_status=("${PIPESTATUS[@]}") set +o pipefail # Primary exit code is from Claude/timeout (first command in pipeline) exit_code=${pipe_status[0]} # Check for tee failures (second command) - could break logging/session if [[ ${pipe_status[1]} -ne 0 ]]; then log_status "WARN" "Failed to write stream output to log file (exit code ${pipe_status[1]})" fi # Check for jq failures (third command) - warn but don't fail if [[ ${pipe_status[2]} -ne 0 ]]; then log_status "WARN" "jq filter had issues parsing some stream events (exit code ${pipe_status[2]})" fi echo "" echo -e "${PURPLE}━━━━━━━━━━━━━━━━ End of Output ━━━━━━━━━━━━━━━━━━━${NC}" # Preserve full stream output for downstream analysis and session extraction. # Claude-style stream_json can be collapsed to the final result record, # while Codex JSONL should remain as event output for the shared parser. if [[ "$CLAUDE_USE_CONTINUE" == "true" && -f "$output_file" ]]; then local stream_output_file="${output_file%.log}_stream.log" cp "$output_file" "$stream_output_file" # Collapse Claude-style stream_json to the final result object when present. local result_line=$(grep -E '"type"[[:space:]]*:[[:space:]]*"result"' "$output_file" 2>/dev/null | tail -1) if [[ -n "$result_line" ]]; then if echo "$result_line" | jq -e . >/dev/null 2>&1; then echo "$result_line" > "$output_file" log_status "INFO" "Collapsed streamed response to the final result record" else cp "$stream_output_file" "$output_file" log_status "WARN" "Final result record was invalid JSON, keeping full stream output" fi else log_status "INFO" "Keeping full stream output for shared response analysis" fi fi else # BACKGROUND MODE: Original behavior with progress monitoring if [[ "$use_modern_cli" == "true" ]]; then # Modern execution with command array (shell-injection safe) # Execute array directly without bash -c to prevent shell metacharacter interpretation # stdin must be redirected from /dev/null because newer Claude CLI versions # read from stdin even in -p (print) mode, causing SIGTTIN suspension # when the process is backgrounded if portable_timeout ${timeout_seconds}s "${CLAUDE_CMD_ARGS[@]}" < /dev/null > "$output_file" 2>"$stderr_file" & then : # Continue to wait loop else log_status "ERROR" "❌ Failed to start $DRIVER_DISPLAY_NAME process (modern mode)" # Fall back to legacy mode log_status "INFO" "Falling back to legacy mode..." use_modern_cli=false fi fi # Fall back to legacy stdin piping if modern mode failed or not enabled # Note: Legacy mode doesn't use --allowedTools, so tool permissions # will be handled by Claude Code's default permission system if [[ "$use_modern_cli" == "false" ]]; then if portable_timeout ${timeout_seconds}s $CLAUDE_CODE_CMD < "$PROMPT_FILE" > "$output_file" 2>"$stderr_file" & then : # Continue to wait loop else log_status "ERROR" "❌ Failed to start $DRIVER_DISPLAY_NAME process" return 1 fi fi # Get PID and monitor progress local claude_pid=$! local progress_counter=0 # Show progress while Claude Code is running while kill -0 $claude_pid 2>/dev/null; do progress_counter=$((progress_counter + 1)) case $((progress_counter % 4)) in 1) progress_indicator="⠋" ;; 2) progress_indicator="⠙" ;; 3) progress_indicator="⠹" ;; 0) progress_indicator="⠸" ;; esac # Get last line from output if available local last_line="" if [[ -f "$output_file" && -s "$output_file" ]]; then last_line=$(tail -1 "$output_file" 2>/dev/null | head -c 80) # Copy to live.log for tmux monitoring cp "$output_file" "$LIVE_LOG_FILE" 2>/dev/null fi # Update progress file for monitor cat > "$PROGRESS_FILE" << EOF { "status": "executing", "indicator": "$progress_indicator", "elapsed_seconds": $((progress_counter * 10)), "last_output": "$last_line", "timestamp": "$(date '+%Y-%m-%d %H:%M:%S')" } EOF # Only log if verbose mode is enabled if [[ "$VERBOSE_PROGRESS" == "true" ]]; then if [[ -n "$last_line" ]]; then log_status "INFO" "$progress_indicator $DRIVER_DISPLAY_NAME: $last_line... (${progress_counter}0s)" else log_status "INFO" "$progress_indicator $DRIVER_DISPLAY_NAME working... (${progress_counter}0s elapsed)" fi fi sleep 10 done # Wait for the process to finish and get exit code wait $claude_pid exit_code=$? fi # Expose the raw driver exit code to the main loop for status reporting LAST_DRIVER_EXIT_CODE=$exit_code if [ $exit_code -eq 0 ]; then # Only increment counter on successful execution echo "$calls_made" > "$CALL_COUNT_FILE" # Clear progress file echo '{"status": "completed", "timestamp": "'$(date '+%Y-%m-%d %H:%M:%S')'"}' > "$PROGRESS_FILE" log_status "SUCCESS" "✅ $DRIVER_DISPLAY_NAME execution completed successfully" # Save session ID from JSON output (Phase 1.1) if [[ "$CLAUDE_USE_CONTINUE" == "true" ]] && supports_driver_sessions; then save_claude_session "$output_file" fi # Analyze the response log_status "INFO" "🔍 Analyzing $DRIVER_DISPLAY_NAME response..." analyze_response "$output_file" "$loop_count" local analysis_exit_code=$? local fix_plan_completed_after=0 read -r fix_plan_completed_after _ _ < <(count_fix_plan_checkboxes "$RALPH_DIR/@fix_plan.md") enforce_fix_plan_progress_tracking "$RESPONSE_ANALYSIS_FILE" "$fix_plan_completed_before" "$fix_plan_completed_after" # Collapse completed story details so the agent doesn't re-read them if [[ $fix_plan_completed_after -gt $fix_plan_completed_before ]]; then collapse_completed_stories "$RALPH_DIR/@fix_plan.md" fi # Run quality gates local exit_signal_for_gates exit_signal_for_gates=$(jq -r '.analysis.exit_signal // false' "$RESPONSE_ANALYSIS_FILE" 2>/dev/null || echo "false") local qg_result qg_result=$(run_quality_gates "$loop_count" "$exit_signal_for_gates") # Block mode: suppress exit signals so the loop keeps running if [[ "$qg_result" == "1" ]]; then log_status "WARN" "Quality gate block: suppressing completion signals" local qg_tmp="$RESPONSE_ANALYSIS_FILE.qg_tmp" if jq '.analysis.has_completion_signal = false | .analysis.exit_signal = false' \ "$RESPONSE_ANALYSIS_FILE" > "$qg_tmp" 2>/dev/null; then mv "$qg_tmp" "$RESPONSE_ANALYSIS_FILE" else rm -f "$qg_tmp" 2>/dev/null fi fi # Update exit signals based on analysis update_exit_signals # Log analysis summary log_analysis_summary PENDING_EXIT_REASON=$(should_exit_gracefully) # Get file change count for circuit breaker # Fix #141: Detect both uncommitted changes AND committed changes local files_changed=0 local loop_start_sha="" local current_sha="" if [[ -f "$RALPH_DIR/.loop_start_sha" ]]; then loop_start_sha=$(cat "$RALPH_DIR/.loop_start_sha" 2>/dev/null || echo "") fi if command -v git &>/dev/null && git rev-parse --git-dir &>/dev/null 2>&1; then current_sha=$(git rev-parse HEAD 2>/dev/null || echo "") # Check if commits were made (HEAD changed) if [[ -n "$loop_start_sha" && -n "$current_sha" && "$loop_start_sha" != "$current_sha" ]]; then # Commits were made - count union of committed files AND working tree changes # This catches cases where Claude commits some files but still has other modified files files_changed=$( { git diff --name-only "$loop_start_sha" "$current_sha" 2>/dev/null git diff --name-only HEAD 2>/dev/null # unstaged changes git diff --name-only --cached 2>/dev/null # staged changes } | sort -u | wc -l ) [[ "$VERBOSE_PROGRESS" == "true" ]] && log_status "DEBUG" "Detected $files_changed unique files changed (commits + working tree) since loop start" else # No commits - check for uncommitted changes (staged + unstaged) files_changed=$( { git diff --name-only 2>/dev/null # unstaged changes git diff --name-only --cached 2>/dev/null # staged changes } | sort -u | wc -l ) fi fi # Capture diff summary for next loop's context (#117) capture_loop_diff_summary "$loop_start_sha" local has_errors="false" # Two-stage error detection to avoid JSON field false positives # Stage 1: Filter out JSON field patterns like "is_error": false # Stage 2: Look for actual error messages in specific contexts # Avoid type annotations like "error: Error" by requiring lowercase after ": error" if grep -v '"[^"]*error[^"]*":' "$output_file" 2>/dev/null | \ grep -qE '(^Error:|^ERROR:|^error:|\]: error|Link: error|Error occurred|failed with error|[Ee]xception|Fatal|FATAL)'; then has_errors="true" # Debug logging: show what triggered error detection if [[ "$VERBOSE_PROGRESS" == "true" ]]; then log_status "DEBUG" "Error patterns found:" grep -v '"[^"]*error[^"]*":' "$output_file" 2>/dev/null | \ grep -nE '(^Error:|^ERROR:|^error:|\]: error|Link: error|Error occurred|failed with error|[Ee]xception|Fatal|FATAL)' | \ head -3 | while IFS= read -r line; do log_status "DEBUG" " $line" done fi log_status "WARN" "Errors detected in output, check: $output_file" fi local output_length=$(wc -c < "$output_file" 2>/dev/null || echo 0) # Circuit-breaker mode: override progress signals so circuit breaker sees no-progress if [[ "$qg_result" == "2" ]]; then log_status "WARN" "Quality gate circuit-breaker: overriding progress signals" files_changed=0 has_errors="true" fi # Record result in circuit breaker record_loop_result "$loop_count" "$files_changed" "$has_errors" "$output_length" local circuit_result=$? if [[ $circuit_result -ne 0 ]]; then log_status "WARN" "Circuit breaker opened - halting execution" return 3 # Special code for circuit breaker trip fi return 0 else # Clear progress file on failure echo '{"status": "failed", "timestamp": "'$(date '+%Y-%m-%d %H:%M:%S')'"}' > "$PROGRESS_FILE" # Check if the failure is due to API 5-hour limit if grep -qi "5.*hour.*limit\|limit.*reached.*try.*back\|usage.*limit.*reached" "$output_file"; then log_status "ERROR" "🚫 Claude API 5-hour usage limit reached" return 2 # Special return code for API limit else local exit_desc exit_desc=$(describe_exit_code "$exit_code") log_status "ERROR" "❌ $DRIVER_DISPLAY_NAME exited: $exit_desc (code $exit_code)" if [[ -f "$stderr_file" && -s "$stderr_file" ]]; then log_status "ERROR" " stderr (last 3 lines): $(tail -3 "$stderr_file")" log_status "ERROR" " full stderr log: $stderr_file" fi return 1 fi fi } # Guard against double cleanup (EXIT fires after signal handler exits) _CLEANUP_DONE=false # EXIT trap — catches set -e failures and other unexpected exits _on_exit() { local code=$? [[ "$_CLEANUP_DONE" == "true" ]] && return _CLEANUP_DONE=true if [[ "$code" -ne 0 ]]; then local desc desc=$(describe_exit_code "$code") log_status "ERROR" "Ralph loop exiting unexpectedly: $desc (code $code)" update_status "$loop_count" "$(cat "$CALL_COUNT_FILE" 2>/dev/null || echo "0")" "unexpected_exit" "stopped" "$desc" "$code" fi } # Signal handler — preserves signal identity in exit code _on_signal() { local sig=$1 log_status "INFO" "Ralph loop interrupted by $sig. Cleaning up..." reset_session "manual_interrupt" update_status "$loop_count" "$(cat "$CALL_COUNT_FILE" 2>/dev/null || echo "0")" "interrupted" "stopped" "$sig" _CLEANUP_DONE=true [[ "$sig" == "SIGINT" ]] && exit 130 exit 143 } trap _on_exit EXIT trap '_on_signal SIGINT' SIGINT trap '_on_signal SIGTERM' SIGTERM # Global variable for loop count (needed by trap handlers) loop_count=0 # Main loop main() { initialize_runtime_context if ! validate_permission_denial_mode "$PERMISSION_DENIAL_MODE"; then exit 1 fi if [[ -n "$QUALITY_GATES" || -n "$TEST_COMMAND" ]]; then if ! validate_quality_gate_mode "$QUALITY_GATE_MODE"; then exit 1 fi if ! validate_quality_gate_timeout "$QUALITY_GATE_TIMEOUT"; then exit 1 fi if ! has_timeout_command; then log_status "WARN" "No timeout command available. Quality gate and test commands will fail. Install coreutils to enable timeout support." fi fi if [[ "$(driver_name)" == "claude-code" ]]; then normalize_claude_permission_mode if ! validate_claude_permission_mode "$CLAUDE_PERMISSION_MODE"; then exit 1 fi fi if driver_supports_tool_allowlist; then # Validate --allowed-tools now that platform-specific VALID_TOOL_PATTERNS are loaded if [[ "${_CLI_ALLOWED_TOOLS:-}" == "true" ]] && ! validate_allowed_tools "$CLAUDE_ALLOWED_TOOLS"; then exit 1 fi else warn_if_allowed_tools_ignored fi if [[ "${_CLI_ALLOWED_TOOLS:-}" == "true" ]] && ! driver_supports_tool_allowlist; then _CLI_ALLOWED_TOOLS="" fi log_status "SUCCESS" "🚀 Ralph loop starting with $DRIVER_DISPLAY_NAME" log_status "INFO" "Max calls per hour: $MAX_CALLS_PER_HOUR" log_status "INFO" "Logs: $LOG_DIR/ | Docs: $DOCS_DIR/ | Status: $STATUS_FILE" # Check if project uses old flat structure and needs migration if [[ -f "PROMPT.md" ]] && [[ ! -d ".ralph" ]]; then log_status "ERROR" "This project uses the old flat structure." echo "" echo "Ralph v0.10.0+ uses a .ralph/ subfolder to keep your project root clean." echo "" echo "To upgrade your project, run:" echo " ralph-migrate" echo "" echo "This will move Ralph-specific files to .ralph/ while preserving src/ at root." echo "A backup will be created before migration." exit 1 fi # Check if this is a Ralph project directory if [[ ! -f "$PROMPT_FILE" ]]; then log_status "ERROR" "Prompt file '$PROMPT_FILE' not found!" echo "" # Check if this looks like a partial Ralph project if [[ -f "$RALPH_DIR/@fix_plan.md" ]] || [[ -d "$RALPH_DIR/specs" ]] || [[ -f "$RALPH_DIR/@AGENT.md" ]]; then echo "This appears to be a bmalph/Ralph project but is missing .ralph/PROMPT.md." echo "You may need to create or restore the PROMPT.md file." else echo "This directory is not a bmalph/Ralph project." fi echo "" echo "To fix this:" echo " 1. Initialize bmalph in this project: bmalph init" echo " 2. Restore bundled Ralph files in an existing project: bmalph upgrade" echo " 3. Generate Ralph task files after planning: bmalph implement" echo " 4. Navigate to an existing bmalph/Ralph project directory" echo " 5. Or create .ralph/PROMPT.md manually in this directory" echo "" echo "Ralph projects should contain: .ralph/PROMPT.md, .ralph/@fix_plan.md, .ralph/specs/, src/, etc." exit 1 fi # Check required dependencies if ! command -v jq &> /dev/null; then log_status "ERROR" "Required dependency 'jq' is not installed." echo "" echo "jq is required for JSON processing in the Ralph loop." echo "" echo "Install jq:" echo " macOS: brew install jq" echo " Ubuntu: sudo apt-get install jq" echo " Windows: choco install jq (or: winget install jqlang.jq)" echo "" echo "After installing, run this command again." exit 1 fi # Check for git repository (required for progress detection) if ! validate_git_repo; then exit 1 fi # Initialize session tracking before entering the loop init_session_tracking log_status "INFO" "Starting main loop..." while true; do loop_count=$((loop_count + 1)) # Update session last_used timestamp update_session_last_used log_status "INFO" "Loop #$loop_count - calling init_call_tracking..." init_call_tracking log_status "LOOP" "=== Starting Loop #$loop_count ===" # Check circuit breaker before attempting execution if should_halt_execution; then reset_session "circuit_breaker_open" update_status "$loop_count" "$(cat "$CALL_COUNT_FILE")" "circuit_breaker_open" "halted" "stagnation_detected" log_status "ERROR" "🛑 Circuit breaker has opened - execution halted" break fi # Check rate limits if ! can_make_call; then wait_for_reset continue fi # Check for graceful exit conditions local exit_reason=$(should_exit_gracefully) if [[ "$exit_reason" != "" ]]; then log_status "SUCCESS" "🏁 Graceful exit triggered: $exit_reason" reset_session "project_complete" update_status "$loop_count" "$(cat "$CALL_COUNT_FILE")" "graceful_exit" "completed" "$exit_reason" log_status "SUCCESS" "🎉 Ralph has completed the project! Final stats:" log_status "INFO" " - Total loops: $loop_count" log_status "INFO" " - API calls used: $(cat "$CALL_COUNT_FILE")" log_status "INFO" " - Exit reason: $exit_reason" break fi # Update status local calls_made=$(cat "$CALL_COUNT_FILE" 2>/dev/null || echo "0") update_status "$loop_count" "$calls_made" "executing" "running" # Execute Claude Code execute_claude_code "$loop_count" local exec_result=$? if [ $exec_result -eq 0 ]; then if consume_current_loop_permission_denial "$loop_count"; then if [[ "$PERMISSION_DENIAL_ACTION" == "halt" ]]; then break fi # Brief pause between loops when the denial was recorded but # policy allows Ralph to continue. sleep 5 continue fi if [[ -n "$PENDING_EXIT_REASON" ]]; then local exit_reason="$PENDING_EXIT_REASON" PENDING_EXIT_REASON="" log_status "SUCCESS" "🏁 Graceful exit triggered: $exit_reason" reset_session "project_complete" update_status "$loop_count" "$(cat "$CALL_COUNT_FILE")" "graceful_exit" "completed" "$exit_reason" log_status "SUCCESS" "🎉 Ralph has completed the project! Final stats:" log_status "INFO" " - Total loops: $loop_count" log_status "INFO" " - API calls used: $(cat "$CALL_COUNT_FILE")" log_status "INFO" " - Exit reason: $exit_reason" break fi update_status "$loop_count" "$(cat "$CALL_COUNT_FILE")" "completed" "success" # Consume review findings after successful execution — the AI has received # the context via --append-system-prompt. Deleting here (not in # build_review_context) ensures findings survive transient loop failures. rm -f "$REVIEW_FINDINGS_FILE" # Code review check local fix_plan_delta=0 if [[ -f "$RESPONSE_ANALYSIS_FILE" ]]; then fix_plan_delta=$(jq -r '.analysis.fix_plan_completed_delta // 0' "$RESPONSE_ANALYSIS_FILE" 2>/dev/null || echo "0") [[ ! "$fix_plan_delta" =~ ^-?[0-9]+$ ]] && fix_plan_delta=0 fi if should_run_review "$loop_count" "$fix_plan_delta"; then run_review_loop "$loop_count" fi # Brief pause between successful executions sleep 5 elif [ $exec_result -eq 3 ]; then # Circuit breaker opened reset_session "circuit_breaker_trip" update_status "$loop_count" "$(cat "$CALL_COUNT_FILE")" "circuit_breaker_open" "halted" "stagnation_detected" log_status "ERROR" "🛑 Circuit breaker has opened - halting loop" log_status "INFO" "Run 'bash .ralph/ralph_loop.sh --reset-circuit' to reset the circuit breaker after addressing issues" break elif [ $exec_result -eq 2 ]; then # API 5-hour limit reached - handle specially update_status "$loop_count" "$(cat "$CALL_COUNT_FILE")" "api_limit" "paused" log_status "WARN" "🛑 Claude API 5-hour limit reached!" # Ask user whether to wait or exit echo -e "\n${YELLOW}The Claude API 5-hour usage limit has been reached.${NC}" echo -e "${YELLOW}You can either:${NC}" echo -e " ${GREEN}1)${NC} Wait for the limit to reset (usually within an hour)" echo -e " ${GREEN}2)${NC} Exit the loop and try again later" echo -e "\n${BLUE}Choose an option (1 or 2):${NC} " # Read user input with timeout read -t 30 -n 1 user_choice echo # New line after input if [[ "$user_choice" == "2" ]] || [[ -z "$user_choice" ]]; then log_status "INFO" "User chose to exit (or timed out). Exiting loop..." update_status "$loop_count" "$(cat "$CALL_COUNT_FILE")" "api_limit_exit" "stopped" "api_5hour_limit" break else log_status "INFO" "User chose to wait. Waiting for API limit reset..." # Wait for longer period when API limit is hit local wait_minutes=60 log_status "INFO" "Waiting $wait_minutes minutes before retrying..." # Countdown display local wait_seconds=$((wait_minutes * 60)) while [[ $wait_seconds -gt 0 ]]; do local minutes=$((wait_seconds / 60)) local seconds=$((wait_seconds % 60)) printf "\r${YELLOW}Time until retry: %02d:%02d${NC}" $minutes $seconds sleep 1 ((wait_seconds--)) done printf "\n" fi else # Infrastructure failures (timeout, crash, OOM) intentionally bypass # record_loop_result to avoid counting as agent stagnation. The circuit # breaker only tracks progress during successful executions. (Issue #145) local exit_desc exit_desc=$(describe_exit_code "${LAST_DRIVER_EXIT_CODE:-1}") update_status "$loop_count" "$(cat "$CALL_COUNT_FILE")" "failed" "error" "$exit_desc" "${LAST_DRIVER_EXIT_CODE:-}" log_status "WARN" "Execution failed, waiting 30 seconds before retry..." sleep 30 fi log_status "LOOP" "=== Completed Loop #$loop_count ===" done } # Help function show_help() { cat << HELPEOF Ralph Loop Usage: $0 [OPTIONS] IMPORTANT: This command must be run from a bmalph/Ralph project directory. Use 'bmalph init' in your project first. Options: -h, --help Show this help message -c, --calls NUM Set max calls per hour (default: $MAX_CALLS_PER_HOUR) -p, --prompt FILE Set prompt file (default: $PROMPT_FILE) -s, --status Show current status and exit -m, --monitor Start with tmux session and live monitor (requires tmux) -v, --verbose Show detailed progress updates during execution -l, --live Show live driver output in real-time (auto-switches to JSON output) -t, --timeout MIN Set driver execution timeout in minutes (default: $CLAUDE_TIMEOUT_MINUTES) --reset-circuit Reset circuit breaker to CLOSED state --circuit-status Show circuit breaker status and exit --auto-reset-circuit Auto-reset circuit breaker on startup (bypasses cooldown) --reset-session Reset session state and exit (clears session continuity) Modern CLI Options (Phase 1.1): --output-format FORMAT Set driver output format: json or text (default: $CLAUDE_OUTPUT_FORMAT) Note: --live mode requires JSON and will auto-switch --allowed-tools TOOLS Claude Code only. Ignored by codex, opencode, cursor, and copilot --no-continue Disable session continuity across loops --session-expiry HOURS Set session expiration time in hours (default: $CLAUDE_SESSION_EXPIRY_HOURS) Files created: - $LOG_DIR/: All execution logs - $DOCS_DIR/: Generated documentation - $STATUS_FILE: Current status (JSON) - .ralph/.ralph_session: Session lifecycle tracking - .ralph/.ralph_session_history: Session transition history (last 50) - .ralph/.call_count: API call counter for rate limiting - .ralph/.last_reset: Timestamp of last rate limit reset Example workflow: cd my-project # Enter project directory bmalph init # Install bmalph + Ralph files bmalph implement # Generate Ralph task files $0 --monitor # Start Ralph with monitoring Examples: bmalph run # Start Ralph via the bmalph CLI $0 --calls 50 --prompt my_prompt.md $0 --monitor # Start with integrated tmux monitoring $0 --live # Show live driver output in real-time (streaming) $0 --live --verbose # Live streaming + verbose logging $0 --monitor --timeout 30 # 30-minute timeout for complex tasks $0 --verbose --timeout 5 # 5-minute timeout with detailed progress $0 --output-format text # Use legacy text output format $0 --no-continue # Disable session continuity $0 --session-expiry 48 # 48-hour session expiration HELPEOF } # Only parse arguments and run main when executed directly, not when sourced if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then # Parse command line arguments while [[ $# -gt 0 ]]; do case $1 in -h|--help) show_help exit 0 ;; -c|--calls) MAX_CALLS_PER_HOUR="$2" _cli_MAX_CALLS_PER_HOUR="$MAX_CALLS_PER_HOUR" _CLI_MAX_CALLS_PER_HOUR=true shift 2 ;; -p|--prompt) PROMPT_FILE="$2" shift 2 ;; -s|--status) if [[ -f "$STATUS_FILE" ]]; then echo "Current Status:" cat "$STATUS_FILE" | jq . 2>/dev/null || cat "$STATUS_FILE" else echo "No status file found. Ralph may not be running." fi exit 0 ;; -m|--monitor) USE_TMUX=true shift ;; -v|--verbose) VERBOSE_PROGRESS=true _cli_VERBOSE_PROGRESS="$VERBOSE_PROGRESS" _CLI_VERBOSE_PROGRESS=true shift ;; -l|--live) LIVE_OUTPUT=true shift ;; -t|--timeout) if [[ "$2" =~ ^[1-9][0-9]*$ ]] && [[ "$2" -le 120 ]]; then CLAUDE_TIMEOUT_MINUTES="$2" _cli_CLAUDE_TIMEOUT_MINUTES="$CLAUDE_TIMEOUT_MINUTES" _CLI_CLAUDE_TIMEOUT_MINUTES=true else echo "Error: Timeout must be a positive integer between 1 and 120 minutes" exit 1 fi shift 2 ;; --reset-circuit) # Source the circuit breaker library SCRIPT_DIR="$(dirname "${BASH_SOURCE[0]}")" source "$SCRIPT_DIR/lib/circuit_breaker.sh" source "$SCRIPT_DIR/lib/date_utils.sh" reset_circuit_breaker "Manual reset via command line" reset_session "manual_circuit_reset" exit 0 ;; --reset-session) # Reset session state only SCRIPT_DIR="$(dirname "${BASH_SOURCE[0]}")" source "$SCRIPT_DIR/lib/date_utils.sh" reset_session "manual_reset_flag" echo -e "\033[0;32m✅ Session state reset successfully\033[0m" exit 0 ;; --circuit-status) # Source the circuit breaker library SCRIPT_DIR="$(dirname "${BASH_SOURCE[0]}")" source "$SCRIPT_DIR/lib/circuit_breaker.sh" show_circuit_status exit 0 ;; --output-format) if [[ "$2" == "json" || "$2" == "text" ]]; then CLAUDE_OUTPUT_FORMAT="$2" _cli_CLAUDE_OUTPUT_FORMAT="$CLAUDE_OUTPUT_FORMAT" _CLI_CLAUDE_OUTPUT_FORMAT=true else echo "Error: --output-format must be 'json' or 'text'" exit 1 fi shift 2 ;; --allowed-tools) CLAUDE_ALLOWED_TOOLS="$2" _cli_CLAUDE_ALLOWED_TOOLS="$2" _CLI_ALLOWED_TOOLS=true shift 2 ;; --no-continue) CLAUDE_USE_CONTINUE=false _cli_CLAUDE_USE_CONTINUE="$CLAUDE_USE_CONTINUE" _CLI_SESSION_CONTINUITY=true shift ;; --session-expiry) if [[ -z "$2" || ! "$2" =~ ^[1-9][0-9]*$ ]]; then echo "Error: --session-expiry requires a positive integer (hours)" exit 1 fi CLAUDE_SESSION_EXPIRY_HOURS="$2" _cli_CLAUDE_SESSION_EXPIRY_HOURS="$2" _CLI_SESSION_EXPIRY_HOURS=true shift 2 ;; --auto-reset-circuit) CB_AUTO_RESET=true shift ;; *) echo "Unknown option: $1" show_help exit 1 ;; esac done # If tmux mode requested, set it up if [[ "$USE_TMUX" == "true" ]]; then check_tmux_available setup_tmux_session fi # Start the main loop main fi # end: BASH_SOURCE[0] == $0