#!/bin/bash # # BMAD Epic Execute - Test Failure Filter Module # # Provides functions to: # 1. Extract only failure details from test output (not passing tests) # 2. Capture baseline failures before story execution # 3. Compare to identify new failures introduced by current story # # This prevents prompt size explosion and focuses fix phases on relevant failures. # # Usage: Sourced by epic-execute.sh # # ============================================================================= # Test Failure Filter Variables # ============================================================================= BASELINE_TEST_FAILURES="" BASELINE_FAILURE_COUNT=0 TEST_FILTER_INITIALIZED=false # Maximum size for test failure output in fix prompts (in bytes) MAX_TEST_FAILURE_SIZE="${MAX_TEST_FAILURE_SIZE:-50000}" # 50KB default # ============================================================================= # Test Output Filtering Functions # ============================================================================= # Extract only failure-related output from test results # Filters out passing test lines, keeps: # - FAIL lines and their details # - Error messages and stack traces # - Summary lines # Arguments: # $1 - full test output # Returns: filtered output (echoed) extract_test_failures() { local test_output="$1" local filtered="" # For Vitest/Jest/turbo output, use a more targeted extraction # We want: # 1. Lines containing "FAIL " (test failures) # 2. Lines with AssertionError or expected/received blocks # 3. Error location lines (file:line references) # 4. The final summary line # # We DO NOT want: # - Passing test lines (✓) # - stderr warnings (React warnings, etc.) # - Full stack traces from passing tests # Extract actual FAIL test blocks with their assertion errors # Use awk to capture FAIL blocks more intelligently filtered=$(echo "$test_output" | awk ' BEGIN { in_fail_block = 0; fail_count = 0 } # Start of a FAIL block - the actual failure report, not stderr /^@.*:test:[[:space:]]+FAIL[[:space:]]/ { in_fail_block = 1 fail_count++ print next } # Assertion details (expected vs received) /AssertionError:|expected.*to be|Expected|Received|expected.*to equal/ { print next } # Error location with line numbers /❯.*:[0-9]+:[0-9]+/ { print next } # Source code context (numbered lines around error) /^@.*:test:[[:space:]]+[0-9]+\|/ { if (in_fail_block) print next } # Keep the comparison markers /^@.*:test:[[:space:]]+-[[:space:]]/ { if (in_fail_block) print; next } /^@.*:test:[[:space:]]+\+[[:space:]]/ { if (in_fail_block) print; next } # End of fail block indicators /^@.*:test:[[:space:]]+⎯⎯⎯/ { if (in_fail_block) print in_fail_block = 0 next } # Summary lines - always keep /Test Files.*failed|Tests.*failed/ { print next } # Blank line ends a fail block context /^[[:space:]]*$/ { if (in_fail_block && fail_count > 0) { in_fail_block = 0 } } ') # If awk filtering produced too little, fall back to grep local line_count line_count=$(echo "$filtered" | wc -l | tr -d ' ') if [ "$line_count" -lt 5 ]; then # Minimal grep fallback - just get FAIL lines and summary filtered=$(echo "$test_output" | grep -E \ "^@.*FAIL[[:space:]]|Test Files.*failed|Tests.*failed|AssertionError" \ 2>/dev/null || echo "") fi # Always include the final summary line if present local summary summary=$(echo "$test_output" | grep -E "Test Files.*[0-9]+ failed.*Tests.*[0-9]+ failed" | tail -1) if [ -n "$summary" ]; then # Check if summary is already in filtered output if ! echo "$filtered" | grep -qF "$summary"; then filtered="$filtered"$'\n\n'"$summary" fi fi echo "$filtered" } # Extract failure signatures for comparison # Returns a sorted, deduplicated list of failing test identifiers # Arguments: # $1 - test output # Returns: sorted failure signatures (one per line) extract_failure_signatures() { local test_output="$1" # Extract test identifiers from FAIL lines # Handles formats like: # FAIL src/path/file.test.ts > Suite > Test Name # FAIL src/path/file.test.ts # @revive/web:test: FAIL src/path/file.test.ts (turbo output) # The pattern matches FAIL anywhere in line (handles turbo prefix) printf '%s\n' "$test_output" | grep -E "[[:space:]]FAIL[[:space:]]+" | \ sed 's/^.*FAIL[[:space:]]*//' | \ sort -u } # ============================================================================= # Baseline Management Functions # ============================================================================= # Capture current test failure state as baseline before story execution # Should be called at the start of each story's dev phase # Arguments: # $1 - story_id (for logging) capture_failure_baseline() { local story_id="${1:-unknown}" if [ -z "$PROJECT_ROOT" ]; then log_warn "Cannot capture failure baseline: PROJECT_ROOT not set" return 1 fi log "Capturing test failure baseline for $story_id..." local test_output="" # Run tests and capture output if [ -f "$PROJECT_ROOT/package.json" ]; then if grep -q '"test"' "$PROJECT_ROOT/package.json" 2>/dev/null; then test_output=$(cd "$PROJECT_ROOT" && npm test 2>&1) || true fi elif [ -f "$PROJECT_ROOT/Cargo.toml" ]; then test_output=$(cd "$PROJECT_ROOT" && cargo test 2>&1) || true elif [ -f "$PROJECT_ROOT/go.mod" ]; then test_output=$(cd "$PROJECT_ROOT" && go test ./... 2>&1) || true elif [ -f "$PROJECT_ROOT/requirements.txt" ] || [ -f "$PROJECT_ROOT/pyproject.toml" ]; then if command -v pytest >/dev/null 2>&1; then test_output=$(cd "$PROJECT_ROOT" && pytest 2>&1) || true fi fi # Extract and store baseline failures BASELINE_TEST_FAILURES=$(extract_failure_signatures "$test_output") # Count non-empty lines - use wc -l and trim whitespace for clean integer if [ -z "$BASELINE_TEST_FAILURES" ]; then BASELINE_FAILURE_COUNT=0 else BASELINE_FAILURE_COUNT=$(printf '%s\n' "$BASELINE_TEST_FAILURES" | grep -c . 2>/dev/null || echo "0") BASELINE_FAILURE_COUNT=$(echo "$BASELINE_FAILURE_COUNT" | tr -d '[:space:]') fi TEST_FILTER_INITIALIZED=true if [ "$BASELINE_FAILURE_COUNT" -gt 0 ]; then log_warn "Baseline has $BASELINE_FAILURE_COUNT pre-existing test failures" else log "Baseline captured: no pre-existing failures" fi return 0 } # Compare current failures against baseline and return only NEW failures # Arguments: # $1 - current test output # Returns: filtered output containing only new failures get_new_failures_only() { local current_output="$1" if [ "$TEST_FILTER_INITIALIZED" != true ]; then # No baseline - return all failures (filtered for size) extract_test_failures "$current_output" return 0 fi # Get current failure signatures local current_signatures current_signatures=$(extract_failure_signatures "$current_output") # Find signatures that are in current but not in baseline (new failures) local new_signatures new_signatures=$(comm -13 \ <(echo "$BASELINE_TEST_FAILURES" | sort) \ <(echo "$current_signatures" | sort) \ 2>/dev/null || echo "$current_signatures") local new_count if [ -z "$new_signatures" ]; then new_count=0 else new_count=$(printf '%s\n' "$new_signatures" | grep -c . 2>/dev/null || echo "0") new_count=$(echo "$new_count" | tr -d '[:space:]') fi if [ "$new_count" -eq 0 ]; then # No new failures - all failures are pre-existing echo "[INFO] All $BASELINE_FAILURE_COUNT failures are pre-existing from baseline." echo "No new failures introduced by this story." return 0 fi # Extract full failure details for only the new failures local filtered_output="" local full_failures full_failures=$(extract_test_failures "$current_output") # For each new failure signature, include its full output while IFS= read -r sig; do [ -z "$sig" ] && continue # Escape special regex characters in signature local escaped_sig escaped_sig=$(printf '%s' "$sig" | sed 's/[[\.*^$()+?{|]/\\&/g') # Extract the block for this failure local block block=$(echo "$full_failures" | grep -A 50 "$escaped_sig" | head -60) if [ -n "$block" ]; then filtered_output+="$block"$'\n\n' fi done <<< "$new_signatures" # Add summary local total_current if [ -z "$current_signatures" ]; then total_current=0 else total_current=$(printf '%s\n' "$current_signatures" | grep -c . 2>/dev/null || echo "0") total_current=$(echo "$total_current" | tr -d '[:space:]') fi filtered_output+=" --- **Failure Summary:** - New failures (this story): $new_count - Pre-existing failures (baseline): $BASELINE_FAILURE_COUNT - Total current failures: $total_current Only the $new_count NEW failures above need to be fixed by this story. Pre-existing failures from the baseline have been filtered out. " echo "$filtered_output" } # ============================================================================= # Truncation Functions # ============================================================================= # Truncate test failure output to fit within size limits # Preserves most relevant information (summary, first failures) # Arguments: # $1 - failure output # $2 - max size (optional, defaults to MAX_TEST_FAILURE_SIZE) # Returns: truncated output truncate_test_failures() { local failures="$1" local max_size="${2:-$MAX_TEST_FAILURE_SIZE}" local current_size current_size=$(printf '%s' "$failures" | wc -c | tr -d ' ') if [ "$current_size" -le "$max_size" ]; then printf '%s' "$failures" return 0 fi # Truncate but preserve summary at the end local summary summary=$(echo "$failures" | tail -20) local available=$((max_size - ${#summary} - 200)) # Reserve space for summary + notice local truncated truncated=$(printf '%s' "$failures" | head -c "$available") printf '%s\n\n... [TEST OUTPUT TRUNCATED: %sB total, showing first %sB + summary] ...\n\n%s' \ "$truncated" "$current_size" "$available" "$summary" } # ============================================================================= # Main Filter Function (Used by Static Analysis Gate) # ============================================================================= # Filter and prepare test failures for fix-phase prompt # Combines all filtering: extracts failures, compares to baseline, truncates # Arguments: # $1 - full test output # $2 - story_id (for logging) # Returns: filtered, truncated failure output suitable for fix prompt prepare_test_failures_for_fix() { local test_output="$1" local story_id="${2:-unknown}" # Step 1: Get only new failures (if baseline exists) local new_failures new_failures=$(get_new_failures_only "$test_output") # Step 2: Truncate if still too large local final_output final_output=$(truncate_test_failures "$new_failures") local final_size final_size=$(printf '%s' "$final_output" | wc -c | tr -d ' ') [ "$VERBOSE" = true ] && log "Test failure output for $story_id: ${final_size}B (limit: ${MAX_TEST_FAILURE_SIZE}B)" printf '%s' "$final_output" } # Count NEW test failures (not in baseline) # Used by static analysis gate to decide pass/fail # Arguments: # $1 - full test output # Returns: count of NEW failures (0 if all failures are pre-existing) count_new_test_failures() { local test_output="$1" if [ "$TEST_FILTER_INITIALIZED" != true ]; then # No baseline - count all failures local all_signatures all_signatures=$(extract_failure_signatures "$test_output") if [ -z "$all_signatures" ]; then echo "0" else printf '%s\n' "$all_signatures" | grep -c . 2>/dev/null || echo "0" fi return 0 fi # Get current failure signatures local current_signatures current_signatures=$(extract_failure_signatures "$test_output") # Find signatures that are in current but not in baseline (new failures) local new_signatures new_signatures=$(comm -13 \ <(printf '%s\n' "$BASELINE_TEST_FAILURES" | sort) \ <(printf '%s\n' "$current_signatures" | sort) \ 2>/dev/null || echo "") if [ -z "$new_signatures" ]; then echo "0" else local count count=$(printf '%s\n' "$new_signatures" | grep -c . 2>/dev/null || echo "0") echo "$count" | tr -d '[:space:]' fi }