feat(scripts): port memory-safe execution and reliability improvements from revive-dev
Sync functional improvements developed in revive-dev into BMAD-METHOD fork while preserving repo-specific paths: - Add memory-safe Claude helpers (run_claude_to_file, read_phase_tail) that pipe output to temp files instead of bash variables, preventing GB-scale RAM usage during long epic executions - Add kill_orphaned_test_processes() to clean up zombie jest/vitest/ playwright/pytest processes between stories and on exit - Replace per-call `env -u CLAUDECODE` with global `unset CLAUDECODE` at script start for cleaner nested session support - Port metrics resume/accumulation logic that restores counters from existing YAML on resumed runs and accumulates duration - Add log truncation between stories (64KB cap) to prevent unbounded log growth across multi-story runs - Add log persistence and cleanup trap to epic-chain.sh - Revert regression-gate.sh test commands to direct execution (matching revive-dev pattern) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
1eedebaca8
commit
cdc92d0d90
|
|
@ -32,6 +32,9 @@
|
|||
|
||||
set -e
|
||||
|
||||
# Allow nested Claude Code sessions (when launched from within Claude Code)
|
||||
unset CLAUDECODE 2>/dev/null || true
|
||||
|
||||
# =============================================================================
|
||||
# Configuration
|
||||
# =============================================================================
|
||||
|
|
@ -47,6 +50,8 @@ UAT_DIR="$PROJECT_ROOT/docs/uat"
|
|||
HANDOFF_DIR="$PROJECT_ROOT/docs/handoffs"
|
||||
|
||||
LOG_FILE="/tmp/bmad-epic-chain-$$.log"
|
||||
LOGS_DIR="$SPRINT_ARTIFACTS_DIR/logs"
|
||||
FINAL_LOG_FILE=""
|
||||
CHAIN_PLAN_FILE="$SPRINT_ARTIFACTS_DIR/chain-plan.yaml"
|
||||
|
||||
# Colors for output
|
||||
|
|
@ -110,6 +115,76 @@ log_section() {
|
|||
echo -e "${BOLD}───────────────────────────────────────────────────────────${NC}"
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Orphaned Process Cleanup
|
||||
# =============================================================================
|
||||
|
||||
kill_orphaned_test_processes() {
|
||||
# Kill orphaned node/test processes that may have been spawned during epic execution
|
||||
local killed=0
|
||||
|
||||
for pattern in "node.*jest" "node.*vitest" "node.*playwright" "node.*next.*dev" "node.*tsx.*watch"; do
|
||||
local pids
|
||||
pids=$(pgrep -f "$pattern" 2>/dev/null || true)
|
||||
if [ -n "$pids" ]; then
|
||||
echo "$pids" | while read -r pid; do
|
||||
if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then
|
||||
kill "$pid" 2>/dev/null || true
|
||||
((killed++)) || true
|
||||
fi
|
||||
done
|
||||
fi
|
||||
done
|
||||
|
||||
local pytest_pids
|
||||
pytest_pids=$(pgrep -f "python.*pytest" 2>/dev/null || true)
|
||||
if [ -n "$pytest_pids" ]; then
|
||||
echo "$pytest_pids" | while read -r pid; do
|
||||
if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then
|
||||
kill "$pid" 2>/dev/null || true
|
||||
((killed++)) || true
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
if [ "${killed:-0}" -gt 0 ]; then
|
||||
log "Killed orphaned test processes"
|
||||
fi
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Log Persistence
|
||||
# =============================================================================
|
||||
|
||||
save_log_to_repo() {
|
||||
if [ ! -f "$LOG_FILE" ] || [ ! -s "$LOG_FILE" ]; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
mkdir -p "$LOGS_DIR" 2>/dev/null || true
|
||||
|
||||
local timestamp
|
||||
timestamp=$(date '+%Y%m%d-%H%M%S')
|
||||
FINAL_LOG_FILE="$LOGS_DIR/epic-chain-${timestamp}.log"
|
||||
|
||||
if cp "$LOG_FILE" "$FINAL_LOG_FILE" 2>/dev/null; then
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Log saved to: $FINAL_LOG_FILE" >> "$FINAL_LOG_FILE"
|
||||
fi
|
||||
}
|
||||
|
||||
cleanup_chain() {
|
||||
local exit_code=$?
|
||||
trap - EXIT INT TERM
|
||||
kill_orphaned_test_processes
|
||||
save_log_to_repo
|
||||
if [ -n "$FINAL_LOG_FILE" ] && [ -f "$FINAL_LOG_FILE" ]; then
|
||||
echo " - Log saved: $FINAL_LOG_FILE"
|
||||
fi
|
||||
exit $exit_code
|
||||
}
|
||||
|
||||
trap cleanup_chain EXIT INT TERM
|
||||
|
||||
# Helper function to create basic report if Claude fails
|
||||
create_basic_report() {
|
||||
local end_time_iso=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
||||
|
|
@ -578,6 +653,9 @@ for current_idx in "${!EXECUTION_ORDER[@]}"; do
|
|||
|
||||
((COMPLETED_EPICS++))
|
||||
|
||||
# Kill orphaned node/test processes between epics
|
||||
kill_orphaned_test_processes
|
||||
|
||||
# Generate handoff for next epic
|
||||
if [ "$NO_HANDOFF" = false ]; then
|
||||
next_idx=$((current_idx + 1))
|
||||
|
|
@ -803,7 +881,7 @@ REPORT_GENERATED: $CHAIN_REPORT_FILE"
|
|||
log "Invoking report generator..."
|
||||
|
||||
# Execute report generation
|
||||
report_result=$(env -u CLAUDECODE claude --dangerously-skip-permissions -p "$report_prompt" 2>&1) || true
|
||||
report_result=$(claude --dangerously-skip-permissions -p "$report_prompt" 2>&1) || true
|
||||
|
||||
echo "$report_result" >> "$LOG_FILE"
|
||||
|
||||
|
|
@ -847,7 +925,7 @@ echo " - Metrics: $METRICS_DIR/"
|
|||
if [ -f "$CHAIN_REPORT_FILE" ]; then
|
||||
echo " - Report: $CHAIN_REPORT_FILE"
|
||||
fi
|
||||
echo " - Log: $LOG_FILE"
|
||||
echo " - Log: (saved on exit to $LOGS_DIR/)"
|
||||
echo ""
|
||||
|
||||
if [ $FAILED_EPICS -gt 0 ]; then
|
||||
|
|
|
|||
|
|
@ -142,10 +142,10 @@ DESIGN COMPLETE: $story_id"
|
|||
return 0
|
||||
fi
|
||||
|
||||
# Pipe to file to avoid memory bloat
|
||||
run_claude_to_file "$design_prompt"
|
||||
local result
|
||||
result=$(env -u CLAUDECODE claude --dangerously-skip-permissions -p "$design_prompt" 2>&1) || true
|
||||
|
||||
echo "$result" >> "$LOG_FILE"
|
||||
result=$(read_phase_tail)
|
||||
|
||||
# Extract design block
|
||||
LAST_DESIGN=$(echo "$result" | sed -n '/DESIGN START/,/DESIGN END/p')
|
||||
|
|
|
|||
|
|
@ -126,9 +126,9 @@ init_regression_baseline() {
|
|||
|
||||
# Check if there's a test:json script for better parsing
|
||||
if grep -q '"test:json"' "$PROJECT_ROOT/package.json" 2>/dev/null; then
|
||||
test_output=$(cd "$PROJECT_ROOT" && run_with_timeout "${REGRESSION_TEST_TIMEOUT:-120}" npm run test:json) || true
|
||||
test_output=$(cd "$PROJECT_ROOT" && npm run test:json 2>&1) || true
|
||||
else
|
||||
test_output=$(cd "$PROJECT_ROOT" && run_with_timeout "${REGRESSION_TEST_TIMEOUT:-120}" npm test) || true
|
||||
test_output=$(cd "$PROJECT_ROOT" && npm test 2>&1) || true
|
||||
fi
|
||||
|
||||
BASELINE_PASSING_TESTS=$(extract_test_count "$test_output")
|
||||
|
|
@ -152,14 +152,14 @@ init_regression_baseline() {
|
|||
elif [ -f "$PROJECT_ROOT/Cargo.toml" ]; then
|
||||
# Rust project
|
||||
log "Capturing baseline test count (Rust)..."
|
||||
test_output=$(cd "$PROJECT_ROOT" && run_with_timeout "${REGRESSION_TEST_TIMEOUT:-120}" cargo test) || true
|
||||
test_output=$(cd "$PROJECT_ROOT" && cargo test 2>&1) || true
|
||||
BASELINE_PASSING_TESTS=$(extract_test_count "$test_output")
|
||||
log "Baseline passing tests: $BASELINE_PASSING_TESTS"
|
||||
|
||||
elif [ -f "$PROJECT_ROOT/go.mod" ]; then
|
||||
# Go project
|
||||
log "Capturing baseline test count (Go)..."
|
||||
test_output=$(cd "$PROJECT_ROOT" && run_with_timeout "${REGRESSION_TEST_TIMEOUT:-120}" go test ./... -v) || true
|
||||
test_output=$(cd "$PROJECT_ROOT" && go test ./... -v 2>&1) || true
|
||||
BASELINE_PASSING_TESTS=$(extract_test_count "$test_output")
|
||||
log "Baseline passing tests: $BASELINE_PASSING_TESTS"
|
||||
|
||||
|
|
@ -167,7 +167,7 @@ init_regression_baseline() {
|
|||
# Python project
|
||||
if command -v pytest >/dev/null 2>&1; then
|
||||
log "Capturing baseline test count (Python)..."
|
||||
test_output=$(cd "$PROJECT_ROOT" && run_with_timeout "${REGRESSION_TEST_TIMEOUT:-120}" pytest -v) || true
|
||||
test_output=$(cd "$PROJECT_ROOT" && pytest -v 2>&1) || true
|
||||
BASELINE_PASSING_TESTS=$(extract_test_count "$test_output")
|
||||
log "Baseline passing tests: $BASELINE_PASSING_TESTS"
|
||||
fi
|
||||
|
|
@ -199,23 +199,23 @@ execute_regression_gate() {
|
|||
if [ -f "$PROJECT_ROOT/package.json" ]; then
|
||||
# Check if there's a test:json script for better parsing
|
||||
if grep -q '"test:json"' "$PROJECT_ROOT/package.json" 2>/dev/null; then
|
||||
test_output=$(cd "$PROJECT_ROOT" && run_with_timeout "${REGRESSION_TEST_TIMEOUT:-120}" npm run test:json) || true
|
||||
test_output=$(cd "$PROJECT_ROOT" && npm run test:json 2>&1) || true
|
||||
else
|
||||
test_output=$(cd "$PROJECT_ROOT" && run_with_timeout "${REGRESSION_TEST_TIMEOUT:-120}" npm test) || true
|
||||
test_output=$(cd "$PROJECT_ROOT" && npm test 2>&1) || true
|
||||
fi
|
||||
current_tests=$(extract_test_count "$test_output")
|
||||
|
||||
elif [ -f "$PROJECT_ROOT/Cargo.toml" ]; then
|
||||
test_output=$(cd "$PROJECT_ROOT" && run_with_timeout "${REGRESSION_TEST_TIMEOUT:-120}" cargo test) || true
|
||||
test_output=$(cd "$PROJECT_ROOT" && cargo test 2>&1) || true
|
||||
current_tests=$(extract_test_count "$test_output")
|
||||
|
||||
elif [ -f "$PROJECT_ROOT/go.mod" ]; then
|
||||
test_output=$(cd "$PROJECT_ROOT" && run_with_timeout "${REGRESSION_TEST_TIMEOUT:-120}" go test ./... -v) || true
|
||||
test_output=$(cd "$PROJECT_ROOT" && go test ./... -v 2>&1) || true
|
||||
current_tests=$(extract_test_count "$test_output")
|
||||
|
||||
elif [ -f "$PROJECT_ROOT/requirements.txt" ] || [ -f "$PROJECT_ROOT/pyproject.toml" ]; then
|
||||
if command -v pytest >/dev/null 2>&1; then
|
||||
test_output=$(cd "$PROJECT_ROOT" && run_with_timeout "${REGRESSION_TEST_TIMEOUT:-120}" pytest -v) || true
|
||||
test_output=$(cd "$PROJECT_ROOT" && pytest -v 2>&1) || true
|
||||
current_tests=$(extract_test_count "$test_output")
|
||||
fi
|
||||
fi
|
||||
|
|
|
|||
|
|
@ -168,10 +168,10 @@ After outputting the spec block:
|
|||
return 0
|
||||
fi
|
||||
|
||||
# Pipe to file to avoid memory bloat
|
||||
run_claude_to_file "$spec_prompt"
|
||||
local result
|
||||
result=$(env -u CLAUDECODE claude --dangerously-skip-permissions -p "$spec_prompt" 2>&1) || true
|
||||
|
||||
echo "$result" >> "$LOG_FILE"
|
||||
result=$(read_phase_tail)
|
||||
|
||||
# Extract test spec block
|
||||
LAST_TEST_SPEC=$(echo "$result" | sed -n '/TEST SPEC START/,/TEST SPEC END/p')
|
||||
|
|
@ -314,10 +314,10 @@ After implementing the tests:
|
|||
return 0
|
||||
fi
|
||||
|
||||
# Pipe to file to avoid memory bloat
|
||||
run_claude_to_file "$impl_prompt"
|
||||
local result
|
||||
result=$(env -u CLAUDECODE claude --dangerously-skip-permissions -p "$impl_prompt" 2>&1) || true
|
||||
|
||||
echo "$result" >> "$LOG_FILE"
|
||||
result=$(read_phase_tail)
|
||||
|
||||
# Check completion
|
||||
local completion_status
|
||||
|
|
|
|||
|
|
@ -137,7 +137,7 @@ execute_claude_with_retry() {
|
|||
|
||||
# Wrapper function for retry
|
||||
_claude_invoke() {
|
||||
timeout "$timeout" env -u CLAUDECODE claude --dangerously-skip-permissions -p "$1" 2>&1
|
||||
timeout "$timeout" claude --dangerously-skip-permissions -p "$1" 2>&1
|
||||
local code=$?
|
||||
if [ $code -eq 124 ]; then
|
||||
echo "TIMEOUT: Claude invocation timed out after ${timeout}s"
|
||||
|
|
@ -626,7 +626,7 @@ execute_claude_verbose() {
|
|||
|
||||
# Execute with output tee'd to both terminal and log file
|
||||
local result
|
||||
result=$(timeout "$timeout" env -u CLAUDECODE claude --dangerously-skip-permissions -p "$prompt" 2>&1 | tee -a "$LOG_FILE")
|
||||
result=$(timeout "$timeout" claude --dangerously-skip-permissions -p "$prompt" 2>&1 | tee -a "$LOG_FILE")
|
||||
local exit_code=$?
|
||||
|
||||
if [ $exit_code -eq 124 ]; then
|
||||
|
|
@ -640,7 +640,7 @@ execute_claude_verbose() {
|
|||
else
|
||||
# Non-verbose mode: capture output silently
|
||||
local result
|
||||
result=$(timeout "$timeout" env -u CLAUDECODE claude --dangerously-skip-permissions -p "$prompt" 2>&1)
|
||||
result=$(timeout "$timeout" claude --dangerously-skip-permissions -p "$prompt" 2>&1)
|
||||
local exit_code=$?
|
||||
|
||||
# Log to file only
|
||||
|
|
|
|||
|
|
@ -20,6 +20,9 @@
|
|||
|
||||
set -e
|
||||
|
||||
# Allow nested Claude Code sessions (when launched from within Claude Code)
|
||||
unset CLAUDECODE 2>/dev/null || true
|
||||
|
||||
# =============================================================================
|
||||
# Cleanup and Signal Handling
|
||||
# =============================================================================
|
||||
|
|
@ -91,6 +94,12 @@ cleanup() {
|
|||
fi
|
||||
fi
|
||||
|
||||
# Kill orphaned node/test processes
|
||||
kill_orphaned_test_processes
|
||||
|
||||
# Clean up phase output temp file
|
||||
rm -f "$PHASE_OUTPUT_FILE" 2>/dev/null
|
||||
|
||||
# Save log to repo before exiting
|
||||
save_log_to_repo
|
||||
if [ -n "$FINAL_LOG_FILE" ] && [ -f "$FINAL_LOG_FILE" ]; then
|
||||
|
|
@ -275,6 +284,46 @@ flush_log_to_repo() {
|
|||
cp "$LOG_FILE" "$flush_file" 2>/dev/null || true
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Orphaned Process Cleanup
|
||||
# =============================================================================
|
||||
|
||||
kill_orphaned_test_processes() {
|
||||
# Kill orphaned node/test processes that may have been spawned during story execution
|
||||
# These can accumulate and consume memory if tests or dev servers aren't cleaned up
|
||||
local killed=0
|
||||
|
||||
# Kill orphaned node test runners (jest, vitest, playwright)
|
||||
for pattern in "node.*jest" "node.*vitest" "node.*playwright" "node.*next.*dev" "node.*tsx.*watch"; do
|
||||
local pids
|
||||
pids=$(pgrep -f "$pattern" 2>/dev/null || true)
|
||||
if [ -n "$pids" ]; then
|
||||
echo "$pids" | while read -r pid; do
|
||||
if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then
|
||||
kill "$pid" 2>/dev/null || true
|
||||
((killed++)) || true
|
||||
fi
|
||||
done
|
||||
fi
|
||||
done
|
||||
|
||||
# Kill orphaned pytest processes
|
||||
local pytest_pids
|
||||
pytest_pids=$(pgrep -f "python.*pytest" 2>/dev/null || true)
|
||||
if [ -n "$pytest_pids" ]; then
|
||||
echo "$pytest_pids" | while read -r pid; do
|
||||
if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then
|
||||
kill "$pid" 2>/dev/null || true
|
||||
((killed++)) || true
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
if [ "${killed:-0}" -gt 0 ]; then
|
||||
log "Killed orphaned test processes"
|
||||
fi
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Git Safety Functions
|
||||
# =============================================================================
|
||||
|
|
@ -449,6 +498,50 @@ log_prompt_size() {
|
|||
fi
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Memory-safe Claude execution helpers
|
||||
# =============================================================================
|
||||
# Instead of capturing claude output into a bash variable (which can consume
|
||||
# gigabytes of RAM), pipe output directly to a temp file and read only the
|
||||
# tail for completion signal parsing.
|
||||
|
||||
# Temp file for current phase output (reused across phases, cleaned up on exit)
|
||||
PHASE_OUTPUT_FILE="/tmp/bmad-phase-output-$$.txt"
|
||||
|
||||
# Run claude and pipe output directly to file + LOG_FILE (no bash variable)
|
||||
# Arguments:
|
||||
# $1 - prompt text (use "-f" as first arg to use file-based prompt)
|
||||
# $2 - prompt file path (only when $1 is "-f")
|
||||
# Sets: PHASE_OUTPUT_FILE with the output
|
||||
run_claude_to_file() {
|
||||
# Truncate phase output file
|
||||
: > "$PHASE_OUTPUT_FILE"
|
||||
|
||||
if [ "$1" = "-f" ]; then
|
||||
local prompt_file="$2"
|
||||
claude --dangerously-skip-permissions -f "$prompt_file" 2>&1 | tee -a "$LOG_FILE" > "$PHASE_OUTPUT_FILE" || true
|
||||
else
|
||||
local prompt="$1"
|
||||
claude --dangerously-skip-permissions -p "$prompt" 2>&1 | tee -a "$LOG_FILE" > "$PHASE_OUTPUT_FILE" || true
|
||||
fi
|
||||
}
|
||||
|
||||
# Read the tail of phase output for completion signal parsing.
|
||||
# Only reads the last 32KB — enough for JSON result blocks and signal lines,
|
||||
# but avoids loading megabytes of tool output into a bash variable.
|
||||
# Arguments: none (reads from PHASE_OUTPUT_FILE)
|
||||
# Returns: tail content on stdout
|
||||
read_phase_tail() {
|
||||
tail -c 32768 "$PHASE_OUTPUT_FILE" 2>/dev/null || echo ""
|
||||
}
|
||||
|
||||
# Read full phase output (use sparingly — only when you must search the entire output)
|
||||
# Arguments: none (reads from PHASE_OUTPUT_FILE)
|
||||
# Returns: full content on stdout
|
||||
read_phase_output() {
|
||||
cat "$PHASE_OUTPUT_FILE" 2>/dev/null || echo ""
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Shared Automated Prompt Builder
|
||||
# =============================================================================
|
||||
|
|
@ -538,32 +631,38 @@ PROMPT_EOF
|
|||
|
||||
METRICS_DIR=""
|
||||
METRICS_FILE=""
|
||||
METRICS_RESUMED=false
|
||||
|
||||
init_metrics() {
|
||||
METRICS_DIR="$SPRINT_ARTIFACTS_DIR/metrics"
|
||||
METRICS_FILE="$METRICS_DIR/epic-${EPIC_ID}-metrics.yaml"
|
||||
mkdir -p "$METRICS_DIR"
|
||||
|
||||
# L4: Archive existing metrics file to prevent unbounded growth
|
||||
if [ -f "$METRICS_FILE" ]; then
|
||||
local archive_name="epic-${EPIC_ID}-metrics.$(date +%Y%m%d%H%M%S).yaml"
|
||||
local archive_dir="$METRICS_DIR/archive"
|
||||
mkdir -p "$archive_dir"
|
||||
mv "$METRICS_FILE" "$archive_dir/$archive_name"
|
||||
log "Archived previous metrics to: archive/$archive_name"
|
||||
|
||||
# Clean up old archives (keep last 10)
|
||||
local archive_count
|
||||
archive_count=$(find "$archive_dir" -name "epic-${EPIC_ID}-metrics.*.yaml" 2>/dev/null | wc -l | tr -d ' ')
|
||||
if [ "$archive_count" -gt 10 ]; then
|
||||
log "Cleaning up old metrics archives (keeping last 10)..."
|
||||
find "$archive_dir" -name "epic-${EPIC_ID}-metrics.*.yaml" -type f | \
|
||||
sort | head -n -10 | xargs rm -f 2>/dev/null || true
|
||||
fi
|
||||
fi
|
||||
|
||||
local start_time=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
||||
|
||||
# If metrics file already exists, preserve it and seed in-memory counters
|
||||
if [ -f "$METRICS_FILE" ]; then
|
||||
METRICS_RESUMED=true
|
||||
log "Resuming with existing metrics: $METRICS_FILE"
|
||||
|
||||
# Seed in-memory counters from existing YAML so they accumulate
|
||||
if command -v yq >/dev/null 2>&1; then
|
||||
COMPLETED=$(yq '.stories.completed // 0' "$METRICS_FILE")
|
||||
FAILED=$(yq '.stories.failed // 0' "$METRICS_FILE")
|
||||
SKIPPED=$(yq '.stories.skipped // 0' "$METRICS_FILE")
|
||||
|
||||
log "Restored counters: completed=$COMPLETED failed=$FAILED skipped=$SKIPPED"
|
||||
|
||||
# Record resume event
|
||||
yq -i ".execution.resumed_at = \"$start_time\"" "$METRICS_FILE"
|
||||
else
|
||||
log_warn "yq not found - cannot restore counters from existing metrics"
|
||||
fi
|
||||
|
||||
return
|
||||
fi
|
||||
|
||||
# No existing file - create fresh metrics
|
||||
cat > "$METRICS_FILE" << EOF
|
||||
epic_id: "$EPIC_ID"
|
||||
execution:
|
||||
|
|
@ -675,31 +774,21 @@ finalize_metrics() {
|
|||
local end_time=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
||||
|
||||
if command -v yq >/dev/null 2>&1; then
|
||||
# Add current session duration to any prior duration (for resumed runs)
|
||||
local prior_duration
|
||||
prior_duration=$(yq '.execution.duration_seconds // 0' "$METRICS_FILE")
|
||||
local total_duration=$((prior_duration + duration))
|
||||
|
||||
yq -i ".execution.end_time = \"$end_time\"" "$METRICS_FILE"
|
||||
yq -i ".execution.duration_seconds = $duration" "$METRICS_FILE"
|
||||
yq -i ".execution.duration_seconds = $total_duration" "$METRICS_FILE"
|
||||
yq -i ".stories.total = $total_stories" "$METRICS_FILE"
|
||||
yq -i ".stories.completed = $completed" "$METRICS_FILE"
|
||||
yq -i ".stories.failed = $failed" "$METRICS_FILE"
|
||||
yq -i ".stories.skipped = $skipped" "$METRICS_FILE"
|
||||
else
|
||||
# Fallback: rewrite the file with final values
|
||||
cat > "$METRICS_FILE" << EOF
|
||||
epic_id: "$EPIC_ID"
|
||||
execution:
|
||||
start_time: "$EPIC_START_TIME"
|
||||
end_time: "$end_time"
|
||||
duration_seconds: $duration
|
||||
stories:
|
||||
total: $total_stories
|
||||
completed: $completed
|
||||
failed: $failed
|
||||
skipped: $skipped
|
||||
validation:
|
||||
gate_executed: false
|
||||
gate_status: "PENDING"
|
||||
fix_attempts: 0
|
||||
issues: []
|
||||
EOF
|
||||
# Fallback without yq: only update counters, don't overwrite the file
|
||||
# This preserves issues, story_details, and fix_loop data
|
||||
log_warn "yq not found - metrics finalization limited (counters may be stale)"
|
||||
fi
|
||||
|
||||
log "Metrics finalized: $METRICS_FILE"
|
||||
|
|
@ -744,7 +833,7 @@ update_sprint_status() {
|
|||
|
||||
# Find sprint-status.yaml file
|
||||
local sprint_file=""
|
||||
for search_dir in "$SPRINT_ARTIFACTS_DIR" "$SPRINTS_DIR" "$PROJECT_ROOT/docs"; do
|
||||
for search_dir in "$SPRINT_ARTIFACTS_DIR" "$SPRINTS_DIR" "$PROJECT_ROOT/_bmad-output" "$PROJECT_ROOT/docs"; do
|
||||
if [ -f "$search_dir/sprint-status.yaml" ]; then
|
||||
sprint_file="$search_dir/sprint-status.yaml"
|
||||
break
|
||||
|
|
@ -1418,11 +1507,10 @@ Do NOT use 'git add -A' or 'git add .' - only stage files you created or modifie
|
|||
return 0
|
||||
fi
|
||||
|
||||
# Execute in isolated context
|
||||
# Execute in isolated context — pipe to file to avoid memory bloat
|
||||
run_claude_to_file "$dev_prompt"
|
||||
local result
|
||||
result=$(env -u CLAUDECODE claude --dangerously-skip-permissions -p "$dev_prompt" 2>&1) || true
|
||||
|
||||
echo "$result" >> "$LOG_FILE"
|
||||
result=$(read_phase_tail)
|
||||
|
||||
# Check completion using JSON parsing with text fallback
|
||||
local completion_status
|
||||
|
|
@ -1558,11 +1646,10 @@ Stage any fixes with explicit file paths: git add <file1> <file2> ..."
|
|||
return 0
|
||||
fi
|
||||
|
||||
# Execute in isolated context
|
||||
# Execute in isolated context — pipe to file to avoid memory bloat
|
||||
run_claude_to_file "$review_prompt"
|
||||
local result
|
||||
result=$(env -u CLAUDECODE claude --dangerously-skip-permissions -p "$review_prompt" 2>&1) || true
|
||||
|
||||
echo "$result" >> "$LOG_FILE"
|
||||
result=$(read_phase_tail)
|
||||
|
||||
# Check completion using JSON parsing with text fallback
|
||||
local completion_status
|
||||
|
|
@ -1792,9 +1879,6 @@ Address all review findings now. This is attempt $attempt_num of 3."
|
|||
log "Truncated prompt size: ${prompt_size}B"
|
||||
fi
|
||||
|
||||
# Declare result variable outside the conditional blocks
|
||||
local result=""
|
||||
|
||||
# Final safety check - if still too large, write to temp file and use -f flag
|
||||
if [ "$prompt_size" -gt "$MAX_PROMPT_SIZE" ]; then
|
||||
log_warn "Prompt still too large after truncation - using file-based prompt"
|
||||
|
|
@ -1808,7 +1892,8 @@ Address all review findings now. This is attempt $attempt_num of 3."
|
|||
return 0
|
||||
fi
|
||||
|
||||
result=$(env -u CLAUDECODE claude --dangerously-skip-permissions -f "$temp_prompt_file" 2>&1) || true
|
||||
# Pipe to file to avoid memory bloat
|
||||
run_claude_to_file "-f" "$temp_prompt_file"
|
||||
rm -f "$temp_prompt_file"
|
||||
else
|
||||
if [ "$DRY_RUN" = true ]; then
|
||||
|
|
@ -1816,11 +1901,12 @@ Address all review findings now. This is attempt $attempt_num of 3."
|
|||
return 0
|
||||
fi
|
||||
|
||||
# Execute in isolated context
|
||||
result=$(env -u CLAUDECODE claude --dangerously-skip-permissions -p "$fix_prompt" 2>&1) || true
|
||||
# Execute in isolated context — pipe to file to avoid memory bloat
|
||||
run_claude_to_file "$fix_prompt"
|
||||
fi
|
||||
|
||||
echo "$result" >> "$LOG_FILE"
|
||||
local result
|
||||
result=$(read_phase_tail)
|
||||
|
||||
# Check completion using JSON parsing with text fallback
|
||||
local completion_status
|
||||
|
|
@ -1956,7 +2042,7 @@ $build_output
|
|||
if grep -q '"test"' "$PROJECT_ROOT/package.json" 2>/dev/null; then
|
||||
log "Running tests..."
|
||||
local test_output
|
||||
test_output=$(cd "$PROJECT_ROOT" && run_with_timeout "${REGRESSION_TEST_TIMEOUT:-120}" npm test) || {
|
||||
test_output=$(cd "$PROJECT_ROOT" && npm test 2>&1) || {
|
||||
local exit_code=$?
|
||||
|
||||
# Check if there are NEW failures (not just pre-existing baseline failures)
|
||||
|
|
@ -2273,10 +2359,10 @@ Stage any fixes with: git add <file1> <file2> ..."
|
|||
return 0
|
||||
fi
|
||||
|
||||
# Pipe to file to avoid memory bloat
|
||||
run_claude_to_file "$arch_prompt"
|
||||
local result
|
||||
result=$(env -u CLAUDECODE claude --dangerously-skip-permissions -p "$arch_prompt" 2>&1) || true
|
||||
|
||||
echo "$result" >> "$LOG_FILE"
|
||||
result=$(read_phase_tail)
|
||||
|
||||
if echo "$result" | grep -q "ARCH COMPLIANT"; then
|
||||
log_success "Architecture compliant: $story_id"
|
||||
|
|
@ -2350,10 +2436,10 @@ Stage any fixes with: git add <file1> <file2> ..."
|
|||
return 0
|
||||
fi
|
||||
|
||||
# Pipe to file to avoid memory bloat
|
||||
run_claude_to_file "$quality_prompt"
|
||||
local result
|
||||
result=$(env -u CLAUDECODE claude --dangerously-skip-permissions -p "$quality_prompt" 2>&1) || true
|
||||
|
||||
echo "$result" >> "$LOG_FILE"
|
||||
result=$(read_phase_tail)
|
||||
|
||||
if echo "$result" | grep -q "TEST QUALITY APPROVED"; then
|
||||
log_success "Test quality approved: $story_id"
|
||||
|
|
@ -2485,10 +2571,10 @@ Analyze traceability now. Read story files on-demand as needed."
|
|||
return 0
|
||||
fi
|
||||
|
||||
# Pipe to file to avoid memory bloat
|
||||
run_claude_to_file "$trace_prompt"
|
||||
local result
|
||||
result=$(env -u CLAUDECODE claude --dangerously-skip-permissions -p "$trace_prompt" 2>&1) || true
|
||||
|
||||
echo "$result" >> "$LOG_FILE"
|
||||
result=$(read_phase_tail)
|
||||
|
||||
if echo "$result" | grep -q "TRACEABILITY PASS"; then
|
||||
log_success "Traceability passed: Epic $EPIC_ID"
|
||||
|
|
@ -2563,10 +2649,10 @@ Generate missing tests now."
|
|||
return 0
|
||||
fi
|
||||
|
||||
# Pipe to file to avoid memory bloat
|
||||
run_claude_to_file "$fix_prompt"
|
||||
local result
|
||||
result=$(env -u CLAUDECODE claude --dangerously-skip-permissions -p "$fix_prompt" 2>&1) || true
|
||||
|
||||
echo "$result" >> "$LOG_FILE"
|
||||
result=$(read_phase_tail)
|
||||
|
||||
if echo "$result" | grep -q "TEST GENERATION COMPLETE"; then
|
||||
log_success "Test generation complete for Epic $EPIC_ID"
|
||||
|
|
@ -2917,10 +3003,10 @@ Generate the UAT document now. Read story files on-demand as needed."
|
|||
return 0
|
||||
fi
|
||||
|
||||
# Pipe to file to avoid memory bloat
|
||||
run_claude_to_file "$uat_prompt"
|
||||
local result
|
||||
result=$(env -u CLAUDECODE claude --dangerously-skip-permissions -p "$uat_prompt" 2>&1) || true
|
||||
|
||||
echo "$result" >> "$LOG_FILE"
|
||||
result=$(read_phase_tail)
|
||||
|
||||
if echo "$result" | grep -q "UAT GENERATED"; then
|
||||
log_success "UAT document generated"
|
||||
|
|
@ -2943,16 +3029,10 @@ log "=========================================="
|
|||
log "Starting execution of ${#STORIES[@]} stories"
|
||||
log "=========================================="
|
||||
|
||||
# Initialize counters (may be restored from checkpoint)
|
||||
if [ -z "$COMPLETED" ] || [ "$COMPLETED" = "0" ]; then
|
||||
COMPLETED=0
|
||||
fi
|
||||
if [ -z "$FAILED" ] || [ "$FAILED" = "0" ]; then
|
||||
FAILED=0
|
||||
fi
|
||||
if [ -z "$SKIPPED" ] || [ "$SKIPPED" = "0" ]; then
|
||||
SKIPPED=0
|
||||
fi
|
||||
# Initialize counters (may already be restored from metrics or checkpoint)
|
||||
: "${COMPLETED:=0}"
|
||||
: "${FAILED:=0}"
|
||||
: "${SKIPPED:=0}"
|
||||
START_TIME=$(date +%s)
|
||||
STARTED=false
|
||||
|
||||
|
|
@ -2976,10 +3056,13 @@ for story_file in "${STORIES[@]}"; do
|
|||
STARTED=true
|
||||
else
|
||||
log_warn "Skipping $story_id (waiting for $START_FROM)"
|
||||
((SKIPPED++))
|
||||
# Only count as skipped if this is a fresh run (no prior metrics)
|
||||
if [ "${METRICS_RESUMED:-false}" = false ]; then
|
||||
((SKIPPED++))
|
||||
update_story_metrics "skipped"
|
||||
fi
|
||||
((STORY_INDEX++))
|
||||
CURRENT_STORY_INDEX=$STORY_INDEX
|
||||
update_story_metrics "skipped"
|
||||
continue
|
||||
fi
|
||||
fi
|
||||
|
|
@ -2988,10 +3071,13 @@ for story_file in "${STORIES[@]}"; do
|
|||
if [ "$SKIP_DONE" = true ]; then
|
||||
if grep -qi "^Status:.*done" "$story_file" 2>/dev/null; then
|
||||
log_warn "Skipping $story_id (Status: Done)"
|
||||
((SKIPPED++))
|
||||
# Only count as skipped if this is a fresh run (no prior metrics)
|
||||
if [ "${METRICS_RESUMED:-false}" = false ]; then
|
||||
((SKIPPED++))
|
||||
update_story_metrics "skipped"
|
||||
fi
|
||||
((STORY_INDEX++))
|
||||
CURRENT_STORY_INDEX=$STORY_INDEX
|
||||
update_story_metrics "skipped"
|
||||
continue
|
||||
fi
|
||||
fi
|
||||
|
|
@ -3052,6 +3138,16 @@ for story_file in "${STORIES[@]}"; do
|
|||
update_story_metrics "completed"
|
||||
log_success "Story complete: $story_id ($COMPLETED/${#STORIES[@]})"
|
||||
|
||||
# Kill orphaned node/test processes between stories
|
||||
kill_orphaned_test_processes
|
||||
|
||||
# Truncate log file between stories to prevent unbounded growth.
|
||||
# Each Claude phase appends via tee -a, so across 6-7 phases per story
|
||||
# the log can grow to hundreds of MB. Keep only the last 64KB as context.
|
||||
if [ -f "$LOG_FILE" ]; then
|
||||
tail -c 65536 "$LOG_FILE" > "${LOG_FILE}.tmp" 2>/dev/null && mv "${LOG_FILE}.tmp" "$LOG_FILE" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# Track progress for checkpoint/resume
|
||||
((STORY_INDEX++))
|
||||
CURRENT_STORY_INDEX=$STORY_INDEX
|
||||
|
|
@ -3060,9 +3156,6 @@ for story_file in "${STORIES[@]}"; do
|
|||
if type save_checkpoint >/dev/null 2>&1; then
|
||||
save_checkpoint "$STORY_INDEX" "$story_id" "$COMPLETED" "$FAILED" "$SKIPPED"
|
||||
fi
|
||||
|
||||
# Flush log to repo after each completed story
|
||||
flush_log_to_repo
|
||||
done
|
||||
|
||||
# =============================================================================
|
||||
|
|
|
|||
|
|
@ -20,6 +20,9 @@
|
|||
|
||||
set -e
|
||||
|
||||
# Allow nested Claude Code sessions (when launched from within Claude Code)
|
||||
unset CLAUDECODE 2>/dev/null || true
|
||||
|
||||
# =============================================================================
|
||||
# Section 1: Configuration
|
||||
# =============================================================================
|
||||
|
|
@ -859,7 +862,7 @@ HUMAN_ACTION_NEEDED: {yes/no}"
|
|||
|
||||
# Execute in isolated context
|
||||
local result
|
||||
result=$(env -u CLAUDECODE claude --dangerously-skip-permissions -p "$fix_prompt" 2>&1) || true
|
||||
result=$(claude --dangerously-skip-permissions -p "$fix_prompt" 2>&1) || true
|
||||
|
||||
echo "$result" >> "$LOG_FILE"
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue