feat(epic-execute): add test failure filtering and sync improvements from revive-dev

Port improvements developed in revive-dev: new test-failure-filter module for baseline-aware failure detection and prompt size management, broken pipe fixes in regression-gate, and log persistence in epic-execute. Paths adapted to BMAD-METHOD repo structure. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-10 13:49:55 -06:00 · 2026-02-10 13:49:55 -06:00 · 87223692d3
parent efc0bdd56f
commit 87223692d3
3 changed files with 972 additions and 568 deletions
--- a/scripts/epic-execute-lib/regression-gate.sh
+++ b/scripts/epic-execute-lib/regression-gate.sh
@ -31,16 +31,17 @@ extract_test_count() {

    # Method 1: Try JSON output first (most reliable)
    # Jest with --json, Vitest with --reporter=json
+    # Note: Use printf and redirect stderr to avoid broken pipe warnings when jq exits early
    if command -v jq >/dev/null 2>&1; then
        # Jest JSON format
-        count=$(echo "$test_output" | jq -r '.numPassedTests // empty' 2>/dev/null)
+        count=$(printf '%s' "$test_output" 2>/dev/null | jq -r '.numPassedTests // empty' 2>/dev/null) || true
        if [ -n "$count" ] && [ "$count" != "null" ] && [ "$count" -gt 0 ] 2>/dev/null; then
            echo "$count"
            return 0
        fi

        # Vitest JSON format (aggregate from testResults)
-        count=$(echo "$test_output" | jq -r '[.testResults[]?.assertionResults[]? | select(.status == "passed")] | length // empty' 2>/dev/null)
+        count=$(printf '%s' "$test_output" 2>/dev/null | jq -r '[.testResults[]?.assertionResults[]? | select(.status == "passed")] | length // empty' 2>/dev/null) || true
        if [ -n "$count" ] && [ "$count" != "null" ] && [ "$count" -gt 0 ] 2>/dev/null; then
            echo "$count"
            return 0
--- a/scripts/epic-execute-lib/test-failure-filter.sh
+++ b/scripts/epic-execute-lib/test-failure-filter.sh
@ -0,0 +1,391 @@
+#!/bin/bash
+#
+# BMAD Epic Execute - Test Failure Filter Module
+#
+# Provides functions to:
+# 1. Extract only failure details from test output (not passing tests)
+# 2. Capture baseline failures before story execution
+# 3. Compare to identify new failures introduced by current story
+#
+# This prevents prompt size explosion and focuses fix phases on relevant failures.
+#
+# Usage: Sourced by epic-execute.sh
+#
+
+# =============================================================================
+# Test Failure Filter Variables
+# =============================================================================
+
+BASELINE_TEST_FAILURES=""
+BASELINE_FAILURE_COUNT=0
+TEST_FILTER_INITIALIZED=false
+
+# Maximum size for test failure output in fix prompts (in bytes)
+MAX_TEST_FAILURE_SIZE="${MAX_TEST_FAILURE_SIZE:-50000}"  # 50KB default
+
+# =============================================================================
+# Test Output Filtering Functions
+# =============================================================================
+
+# Extract only failure-related output from test results
+# Filters out passing test lines, keeps:
+#   - FAIL lines and their details
+#   - Error messages and stack traces
+#   - Summary lines
+# Arguments:
+#   $1 - full test output
+# Returns: filtered output (echoed)
+extract_test_failures() {
+    local test_output="$1"
+    local filtered=""
+
+    # For Vitest/Jest/turbo output, use a more targeted extraction
+    # We want:
+    # 1. Lines containing "FAIL " (test failures)
+    # 2. Lines with AssertionError or expected/received blocks
+    # 3. Error location lines (file:line references)
+    # 4. The final summary line
+    #
+    # We DO NOT want:
+    # - Passing test lines (✓)
+    # - stderr warnings (React warnings, etc.)
+    # - Full stack traces from passing tests
+
+    # Extract actual FAIL test blocks with their assertion errors
+    # Use awk to capture FAIL blocks more intelligently
+    filtered=$(echo "$test_output" | awk '
+        BEGIN { in_fail_block = 0; fail_count = 0 }
+
+        # Start of a FAIL block - the actual failure report, not stderr
+        /^@.*:test:[[:space:]]+FAIL[[:space:]]/ {
+            in_fail_block = 1
+            fail_count++
+            print
+            next
+        }
+
+        # Assertion details (expected vs received)
+        /AssertionError:|expected.*to be|Expected|Received|expected.*to equal/ {
+            print
+            next
+        }
+
+        # Error location with line numbers
+        /❯.*:[0-9]+:[0-9]+/ {
+            print
+            next
+        }
+
+        # Source code context (numbered lines around error)
+        /^@.*:test:[[:space:]]+[0-9]+\|/ {
+            if (in_fail_block) print
+            next
+        }
+
+        # Keep the comparison markers
+        /^@.*:test:[[:space:]]+-[[:space:]]/ { if (in_fail_block) print; next }
+        /^@.*:test:[[:space:]]+\+[[:space:]]/ { if (in_fail_block) print; next }
+
+        # End of fail block indicators
+        /^@.*:test:[[:space:]]+⎯⎯⎯/ {
+            if (in_fail_block) print
+            in_fail_block = 0
+            next
+        }
+
+        # Summary lines - always keep
+        /Test Files.*failed|Tests.*failed/ {
+            print
+            next
+        }
+
+        # Blank line ends a fail block context
+        /^[[:space:]]*$/ {
+            if (in_fail_block && fail_count > 0) {
+                in_fail_block = 0
+            }
+        }
+    ')
+
+    # If awk filtering produced too little, fall back to grep
+    local line_count
+    line_count=$(echo "$filtered" | wc -l | tr -d ' ')
+
+    if [ "$line_count" -lt 5 ]; then
+        # Minimal grep fallback - just get FAIL lines and summary
+        filtered=$(echo "$test_output" | grep -E \
+            "^@.*FAIL[[:space:]]|Test Files.*failed|Tests.*failed|AssertionError" \
+            2>/dev/null || echo "")
+    fi
+
+    # Always include the final summary line if present
+    local summary
+    summary=$(echo "$test_output" | grep -E "Test Files.*[0-9]+ failed.*Tests.*[0-9]+ failed" | tail -1)
+    if [ -n "$summary" ]; then
+        # Check if summary is already in filtered output
+        if ! echo "$filtered" | grep -qF "$summary"; then
+            filtered="$filtered"$'\n\n'"$summary"
+        fi
+    fi
+
+    echo "$filtered"
+}
+
+# Extract failure signatures for comparison
+# Returns a sorted, deduplicated list of failing test identifiers
+# Arguments:
+#   $1 - test output
+# Returns: sorted failure signatures (one per line)
+extract_failure_signatures() {
+    local test_output="$1"
+
+    # Extract test identifiers from FAIL lines
+    # Handles formats like:
+    #   FAIL  src/path/file.test.ts > Suite > Test Name
+    #   FAIL  src/path/file.test.ts
+    #   @revive/web:test:  FAIL  src/path/file.test.ts (turbo output)
+    # The pattern matches FAIL anywhere in line (handles turbo prefix)
+    printf '%s\n' "$test_output" | grep -E "[[:space:]]FAIL[[:space:]]+" | \
+        sed 's/^.*FAIL[[:space:]]*//' | \
+        sort -u
+}
+
+# =============================================================================
+# Baseline Management Functions
+# =============================================================================
+
+# Capture current test failure state as baseline before story execution
+# Should be called at the start of each story's dev phase
+# Arguments:
+#   $1 - story_id (for logging)
+capture_failure_baseline() {
+    local story_id="${1:-unknown}"
+
+    if [ -z "$PROJECT_ROOT" ]; then
+        log_warn "Cannot capture failure baseline: PROJECT_ROOT not set"
+        return 1
+    fi
+
+    log "Capturing test failure baseline for $story_id..."
+
+    local test_output=""
+
+    # Run tests and capture output
+    if [ -f "$PROJECT_ROOT/package.json" ]; then
+        if grep -q '"test"' "$PROJECT_ROOT/package.json" 2>/dev/null; then
+            test_output=$(cd "$PROJECT_ROOT" && npm test 2>&1) || true
+        fi
+    elif [ -f "$PROJECT_ROOT/Cargo.toml" ]; then
+        test_output=$(cd "$PROJECT_ROOT" && cargo test 2>&1) || true
+    elif [ -f "$PROJECT_ROOT/go.mod" ]; then
+        test_output=$(cd "$PROJECT_ROOT" && go test ./... 2>&1) || true
+    elif [ -f "$PROJECT_ROOT/requirements.txt" ] || [ -f "$PROJECT_ROOT/pyproject.toml" ]; then
+        if command -v pytest >/dev/null 2>&1; then
+            test_output=$(cd "$PROJECT_ROOT" && pytest 2>&1) || true
+        fi
+    fi
+
+    # Extract and store baseline failures
+    BASELINE_TEST_FAILURES=$(extract_failure_signatures "$test_output")
+    # Count non-empty lines - use wc -l and trim whitespace for clean integer
+    if [ -z "$BASELINE_TEST_FAILURES" ]; then
+        BASELINE_FAILURE_COUNT=0
+    else
+        BASELINE_FAILURE_COUNT=$(printf '%s\n' "$BASELINE_TEST_FAILURES" | grep -c . 2>/dev/null || echo "0")
+        BASELINE_FAILURE_COUNT=$(echo "$BASELINE_FAILURE_COUNT" | tr -d '[:space:]')
+    fi
+    TEST_FILTER_INITIALIZED=true
+
+    if [ "$BASELINE_FAILURE_COUNT" -gt 0 ]; then
+        log_warn "Baseline has $BASELINE_FAILURE_COUNT pre-existing test failures"
+    else
+        log "Baseline captured: no pre-existing failures"
+    fi
+
+    return 0
+}
+
+# Compare current failures against baseline and return only NEW failures
+# Arguments:
+#   $1 - current test output
+# Returns: filtered output containing only new failures
+get_new_failures_only() {
+    local current_output="$1"
+
+    if [ "$TEST_FILTER_INITIALIZED" != true ]; then
+        # No baseline - return all failures (filtered for size)
+        extract_test_failures "$current_output"
+        return 0
+    fi
+
+    # Get current failure signatures
+    local current_signatures
+    current_signatures=$(extract_failure_signatures "$current_output")
+
+    # Find signatures that are in current but not in baseline (new failures)
+    local new_signatures
+    new_signatures=$(comm -13 \
+        <(echo "$BASELINE_TEST_FAILURES" | sort) \
+        <(echo "$current_signatures" | sort) \
+    2>/dev/null || echo "$current_signatures")
+
+    local new_count
+    if [ -z "$new_signatures" ]; then
+        new_count=0
+    else
+        new_count=$(printf '%s\n' "$new_signatures" | grep -c . 2>/dev/null || echo "0")
+        new_count=$(echo "$new_count" | tr -d '[:space:]')
+    fi
+
+    if [ "$new_count" -eq 0 ]; then
+        # No new failures - all failures are pre-existing
+        echo "[INFO] All $BASELINE_FAILURE_COUNT failures are pre-existing from baseline."
+        echo "No new failures introduced by this story."
+        return 0
+    fi
+
+    # Extract full failure details for only the new failures
+    local filtered_output=""
+    local full_failures
+    full_failures=$(extract_test_failures "$current_output")
+
+    # For each new failure signature, include its full output
+    while IFS= read -r sig; do
+        [ -z "$sig" ] && continue
+        # Escape special regex characters in signature
+        local escaped_sig
+        escaped_sig=$(printf '%s' "$sig" | sed 's/[[\.*^$()+?{|]/\\&/g')
+        # Extract the block for this failure
+        local block
+        block=$(echo "$full_failures" | grep -A 50 "$escaped_sig" | head -60)
+        if [ -n "$block" ]; then
+            filtered_output+="$block"$'\n\n'
+        fi
+    done <<< "$new_signatures"
+
+    # Add summary
+    local total_current
+    if [ -z "$current_signatures" ]; then
+        total_current=0
+    else
+        total_current=$(printf '%s\n' "$current_signatures" | grep -c . 2>/dev/null || echo "0")
+        total_current=$(echo "$total_current" | tr -d '[:space:]')
+    fi
+    filtered_output+="
+---
+**Failure Summary:**
+- New failures (this story): $new_count
+- Pre-existing failures (baseline): $BASELINE_FAILURE_COUNT
+- Total current failures: $total_current
+
+Only the $new_count NEW failures above need to be fixed by this story.
+Pre-existing failures from the baseline have been filtered out.
+"
+
+    echo "$filtered_output"
+}
+
+# =============================================================================
+# Truncation Functions
+# =============================================================================
+
+# Truncate test failure output to fit within size limits
+# Preserves most relevant information (summary, first failures)
+# Arguments:
+#   $1 - failure output
+#   $2 - max size (optional, defaults to MAX_TEST_FAILURE_SIZE)
+# Returns: truncated output
+truncate_test_failures() {
+    local failures="$1"
+    local max_size="${2:-$MAX_TEST_FAILURE_SIZE}"
+
+    local current_size
+    current_size=$(printf '%s' "$failures" | wc -c | tr -d ' ')
+
+    if [ "$current_size" -le "$max_size" ]; then
+        printf '%s' "$failures"
+        return 0
+    fi
+
+    # Truncate but preserve summary at the end
+    local summary
+    summary=$(echo "$failures" | tail -20)
+
+    local available=$((max_size - ${#summary} - 200))  # Reserve space for summary + notice
+
+    local truncated
+    truncated=$(printf '%s' "$failures" | head -c "$available")
+
+    printf '%s\n\n... [TEST OUTPUT TRUNCATED: %sB total, showing first %sB + summary] ...\n\n%s' \
+        "$truncated" "$current_size" "$available" "$summary"
+}
+
+# =============================================================================
+# Main Filter Function (Used by Static Analysis Gate)
+# =============================================================================
+
+# Filter and prepare test failures for fix-phase prompt
+# Combines all filtering: extracts failures, compares to baseline, truncates
+# Arguments:
+#   $1 - full test output
+#   $2 - story_id (for logging)
+# Returns: filtered, truncated failure output suitable for fix prompt
+prepare_test_failures_for_fix() {
+    local test_output="$1"
+    local story_id="${2:-unknown}"
+
+    # Step 1: Get only new failures (if baseline exists)
+    local new_failures
+    new_failures=$(get_new_failures_only "$test_output")
+
+    # Step 2: Truncate if still too large
+    local final_output
+    final_output=$(truncate_test_failures "$new_failures")
+
+    local final_size
+    final_size=$(printf '%s' "$final_output" | wc -c | tr -d ' ')
+
+    [ "$VERBOSE" = true ] && log "Test failure output for $story_id: ${final_size}B (limit: ${MAX_TEST_FAILURE_SIZE}B)"
+
+    printf '%s' "$final_output"
+}
+
+# Count NEW test failures (not in baseline)
+# Used by static analysis gate to decide pass/fail
+# Arguments:
+#   $1 - full test output
+# Returns: count of NEW failures (0 if all failures are pre-existing)
+count_new_test_failures() {
+    local test_output="$1"
+
+    if [ "$TEST_FILTER_INITIALIZED" != true ]; then
+        # No baseline - count all failures
+        local all_signatures
+        all_signatures=$(extract_failure_signatures "$test_output")
+        if [ -z "$all_signatures" ]; then
+            echo "0"
+        else
+            printf '%s\n' "$all_signatures" | grep -c . 2>/dev/null || echo "0"
+        fi
+        return 0
+    fi
+
+    # Get current failure signatures
+    local current_signatures
+    current_signatures=$(extract_failure_signatures "$test_output")
+
+    # Find signatures that are in current but not in baseline (new failures)
+    local new_signatures
+    new_signatures=$(comm -13 \
+        <(printf '%s\n' "$BASELINE_TEST_FAILURES" | sort) \
+        <(printf '%s\n' "$current_signatures" | sort) \
+    2>/dev/null || echo "")
+
+    if [ -z "$new_signatures" ]; then
+        echo "0"
+    else
+        local count
+        count=$(printf '%s\n' "$new_signatures" | grep -c . 2>/dev/null || echo "0")
+        echo "$count" | tr -d '[:space:]'
+    fi
+}
--- a/scripts/epic-execute.sh
+++ b/scripts/epic-execute.sh