feat(epic-execute): add test failure filtering and sync improvements from revive-dev

Port improvements developed in revive-dev: new test-failure-filter module for
baseline-aware failure detection and prompt size management, broken pipe fixes
in regression-gate, and log persistence in epic-execute. Paths adapted to
BMAD-METHOD repo structure.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Caleb 2026-02-10 13:49:55 -06:00
parent efc0bdd56f
commit 87223692d3
3 changed files with 972 additions and 568 deletions

View File

@ -31,16 +31,17 @@ extract_test_count() {
# Method 1: Try JSON output first (most reliable)
# Jest with --json, Vitest with --reporter=json
# Note: Use printf and redirect stderr to avoid broken pipe warnings when jq exits early
if command -v jq >/dev/null 2>&1; then
# Jest JSON format
count=$(echo "$test_output" | jq -r '.numPassedTests // empty' 2>/dev/null)
count=$(printf '%s' "$test_output" 2>/dev/null | jq -r '.numPassedTests // empty' 2>/dev/null) || true
if [ -n "$count" ] && [ "$count" != "null" ] && [ "$count" -gt 0 ] 2>/dev/null; then
echo "$count"
return 0
fi
# Vitest JSON format (aggregate from testResults)
count=$(echo "$test_output" | jq -r '[.testResults[]?.assertionResults[]? | select(.status == "passed")] | length // empty' 2>/dev/null)
count=$(printf '%s' "$test_output" 2>/dev/null | jq -r '[.testResults[]?.assertionResults[]? | select(.status == "passed")] | length // empty' 2>/dev/null) || true
if [ -n "$count" ] && [ "$count" != "null" ] && [ "$count" -gt 0 ] 2>/dev/null; then
echo "$count"
return 0

View File

@ -0,0 +1,391 @@
#!/bin/bash
#
# BMAD Epic Execute - Test Failure Filter Module
#
# Provides functions to:
# 1. Extract only failure details from test output (not passing tests)
# 2. Capture baseline failures before story execution
# 3. Compare to identify new failures introduced by current story
#
# This prevents prompt size explosion and focuses fix phases on relevant failures.
#
# Usage: Sourced by epic-execute.sh
#
# =============================================================================
# Test Failure Filter Variables
# =============================================================================
BASELINE_TEST_FAILURES=""
BASELINE_FAILURE_COUNT=0
TEST_FILTER_INITIALIZED=false
# Maximum size for test failure output in fix prompts (in bytes)
MAX_TEST_FAILURE_SIZE="${MAX_TEST_FAILURE_SIZE:-50000}" # 50KB default
# =============================================================================
# Test Output Filtering Functions
# =============================================================================
# Extract only failure-related output from test results
# Filters out passing test lines, keeps:
# - FAIL lines and their details
# - Error messages and stack traces
# - Summary lines
# Arguments:
# $1 - full test output
# Returns: filtered output (echoed)
extract_test_failures() {
local test_output="$1"
local filtered=""
# For Vitest/Jest/turbo output, use a more targeted extraction
# We want:
# 1. Lines containing "FAIL " (test failures)
# 2. Lines with AssertionError or expected/received blocks
# 3. Error location lines (file:line references)
# 4. The final summary line
#
# We DO NOT want:
# - Passing test lines (✓)
# - stderr warnings (React warnings, etc.)
# - Full stack traces from passing tests
# Extract actual FAIL test blocks with their assertion errors
# Use awk to capture FAIL blocks more intelligently
filtered=$(echo "$test_output" | awk '
BEGIN { in_fail_block = 0; fail_count = 0 }
# Start of a FAIL block - the actual failure report, not stderr
/^@.*:test:[[:space:]]+FAIL[[:space:]]/ {
in_fail_block = 1
fail_count++
print
next
}
# Assertion details (expected vs received)
/AssertionError:|expected.*to be|Expected|Received|expected.*to equal/ {
print
next
}
# Error location with line numbers
/.*:[0-9]+:[0-9]+/ {
print
next
}
# Source code context (numbered lines around error)
/^@.*:test:[[:space:]]+[0-9]+\|/ {
if (in_fail_block) print
next
}
# Keep the comparison markers
/^@.*:test:[[:space:]]+-[[:space:]]/ { if (in_fail_block) print; next }
/^@.*:test:[[:space:]]+\+[[:space:]]/ { if (in_fail_block) print; next }
# End of fail block indicators
/^@.*:test:[[:space:]]+⎯⎯⎯/ {
if (in_fail_block) print
in_fail_block = 0
next
}
# Summary lines - always keep
/Test Files.*failed|Tests.*failed/ {
print
next
}
# Blank line ends a fail block context
/^[[:space:]]*$/ {
if (in_fail_block && fail_count > 0) {
in_fail_block = 0
}
}
')
# If awk filtering produced too little, fall back to grep
local line_count
line_count=$(echo "$filtered" | wc -l | tr -d ' ')
if [ "$line_count" -lt 5 ]; then
# Minimal grep fallback - just get FAIL lines and summary
filtered=$(echo "$test_output" | grep -E \
"^@.*FAIL[[:space:]]|Test Files.*failed|Tests.*failed|AssertionError" \
2>/dev/null || echo "")
fi
# Always include the final summary line if present
local summary
summary=$(echo "$test_output" | grep -E "Test Files.*[0-9]+ failed.*Tests.*[0-9]+ failed" | tail -1)
if [ -n "$summary" ]; then
# Check if summary is already in filtered output
if ! echo "$filtered" | grep -qF "$summary"; then
filtered="$filtered"$'\n\n'"$summary"
fi
fi
echo "$filtered"
}
# Extract failure signatures for comparison
# Returns a sorted, deduplicated list of failing test identifiers
# Arguments:
# $1 - test output
# Returns: sorted failure signatures (one per line)
extract_failure_signatures() {
local test_output="$1"
# Extract test identifiers from FAIL lines
# Handles formats like:
# FAIL src/path/file.test.ts > Suite > Test Name
# FAIL src/path/file.test.ts
# @revive/web:test: FAIL src/path/file.test.ts (turbo output)
# The pattern matches FAIL anywhere in line (handles turbo prefix)
printf '%s\n' "$test_output" | grep -E "[[:space:]]FAIL[[:space:]]+" | \
sed 's/^.*FAIL[[:space:]]*//' | \
sort -u
}
# =============================================================================
# Baseline Management Functions
# =============================================================================
# Capture current test failure state as baseline before story execution
# Should be called at the start of each story's dev phase
# Arguments:
# $1 - story_id (for logging)
capture_failure_baseline() {
local story_id="${1:-unknown}"
if [ -z "$PROJECT_ROOT" ]; then
log_warn "Cannot capture failure baseline: PROJECT_ROOT not set"
return 1
fi
log "Capturing test failure baseline for $story_id..."
local test_output=""
# Run tests and capture output
if [ -f "$PROJECT_ROOT/package.json" ]; then
if grep -q '"test"' "$PROJECT_ROOT/package.json" 2>/dev/null; then
test_output=$(cd "$PROJECT_ROOT" && npm test 2>&1) || true
fi
elif [ -f "$PROJECT_ROOT/Cargo.toml" ]; then
test_output=$(cd "$PROJECT_ROOT" && cargo test 2>&1) || true
elif [ -f "$PROJECT_ROOT/go.mod" ]; then
test_output=$(cd "$PROJECT_ROOT" && go test ./... 2>&1) || true
elif [ -f "$PROJECT_ROOT/requirements.txt" ] || [ -f "$PROJECT_ROOT/pyproject.toml" ]; then
if command -v pytest >/dev/null 2>&1; then
test_output=$(cd "$PROJECT_ROOT" && pytest 2>&1) || true
fi
fi
# Extract and store baseline failures
BASELINE_TEST_FAILURES=$(extract_failure_signatures "$test_output")
# Count non-empty lines - use wc -l and trim whitespace for clean integer
if [ -z "$BASELINE_TEST_FAILURES" ]; then
BASELINE_FAILURE_COUNT=0
else
BASELINE_FAILURE_COUNT=$(printf '%s\n' "$BASELINE_TEST_FAILURES" | grep -c . 2>/dev/null || echo "0")
BASELINE_FAILURE_COUNT=$(echo "$BASELINE_FAILURE_COUNT" | tr -d '[:space:]')
fi
TEST_FILTER_INITIALIZED=true
if [ "$BASELINE_FAILURE_COUNT" -gt 0 ]; then
log_warn "Baseline has $BASELINE_FAILURE_COUNT pre-existing test failures"
else
log "Baseline captured: no pre-existing failures"
fi
return 0
}
# Compare current failures against baseline and return only NEW failures
# Arguments:
# $1 - current test output
# Returns: filtered output containing only new failures
get_new_failures_only() {
local current_output="$1"
if [ "$TEST_FILTER_INITIALIZED" != true ]; then
# No baseline - return all failures (filtered for size)
extract_test_failures "$current_output"
return 0
fi
# Get current failure signatures
local current_signatures
current_signatures=$(extract_failure_signatures "$current_output")
# Find signatures that are in current but not in baseline (new failures)
local new_signatures
new_signatures=$(comm -13 \
<(echo "$BASELINE_TEST_FAILURES" | sort) \
<(echo "$current_signatures" | sort) \
2>/dev/null || echo "$current_signatures")
local new_count
if [ -z "$new_signatures" ]; then
new_count=0
else
new_count=$(printf '%s\n' "$new_signatures" | grep -c . 2>/dev/null || echo "0")
new_count=$(echo "$new_count" | tr -d '[:space:]')
fi
if [ "$new_count" -eq 0 ]; then
# No new failures - all failures are pre-existing
echo "[INFO] All $BASELINE_FAILURE_COUNT failures are pre-existing from baseline."
echo "No new failures introduced by this story."
return 0
fi
# Extract full failure details for only the new failures
local filtered_output=""
local full_failures
full_failures=$(extract_test_failures "$current_output")
# For each new failure signature, include its full output
while IFS= read -r sig; do
[ -z "$sig" ] && continue
# Escape special regex characters in signature
local escaped_sig
escaped_sig=$(printf '%s' "$sig" | sed 's/[[\.*^$()+?{|]/\\&/g')
# Extract the block for this failure
local block
block=$(echo "$full_failures" | grep -A 50 "$escaped_sig" | head -60)
if [ -n "$block" ]; then
filtered_output+="$block"$'\n\n'
fi
done <<< "$new_signatures"
# Add summary
local total_current
if [ -z "$current_signatures" ]; then
total_current=0
else
total_current=$(printf '%s\n' "$current_signatures" | grep -c . 2>/dev/null || echo "0")
total_current=$(echo "$total_current" | tr -d '[:space:]')
fi
filtered_output+="
---
**Failure Summary:**
- New failures (this story): $new_count
- Pre-existing failures (baseline): $BASELINE_FAILURE_COUNT
- Total current failures: $total_current
Only the $new_count NEW failures above need to be fixed by this story.
Pre-existing failures from the baseline have been filtered out.
"
echo "$filtered_output"
}
# =============================================================================
# Truncation Functions
# =============================================================================
# Truncate test failure output to fit within size limits
# Preserves most relevant information (summary, first failures)
# Arguments:
# $1 - failure output
# $2 - max size (optional, defaults to MAX_TEST_FAILURE_SIZE)
# Returns: truncated output
truncate_test_failures() {
local failures="$1"
local max_size="${2:-$MAX_TEST_FAILURE_SIZE}"
local current_size
current_size=$(printf '%s' "$failures" | wc -c | tr -d ' ')
if [ "$current_size" -le "$max_size" ]; then
printf '%s' "$failures"
return 0
fi
# Truncate but preserve summary at the end
local summary
summary=$(echo "$failures" | tail -20)
local available=$((max_size - ${#summary} - 200)) # Reserve space for summary + notice
local truncated
truncated=$(printf '%s' "$failures" | head -c "$available")
printf '%s\n\n... [TEST OUTPUT TRUNCATED: %sB total, showing first %sB + summary] ...\n\n%s' \
"$truncated" "$current_size" "$available" "$summary"
}
# =============================================================================
# Main Filter Function (Used by Static Analysis Gate)
# =============================================================================
# Filter and prepare test failures for fix-phase prompt
# Combines all filtering: extracts failures, compares to baseline, truncates
# Arguments:
# $1 - full test output
# $2 - story_id (for logging)
# Returns: filtered, truncated failure output suitable for fix prompt
prepare_test_failures_for_fix() {
local test_output="$1"
local story_id="${2:-unknown}"
# Step 1: Get only new failures (if baseline exists)
local new_failures
new_failures=$(get_new_failures_only "$test_output")
# Step 2: Truncate if still too large
local final_output
final_output=$(truncate_test_failures "$new_failures")
local final_size
final_size=$(printf '%s' "$final_output" | wc -c | tr -d ' ')
[ "$VERBOSE" = true ] && log "Test failure output for $story_id: ${final_size}B (limit: ${MAX_TEST_FAILURE_SIZE}B)"
printf '%s' "$final_output"
}
# Count NEW test failures (not in baseline)
# Used by static analysis gate to decide pass/fail
# Arguments:
# $1 - full test output
# Returns: count of NEW failures (0 if all failures are pre-existing)
count_new_test_failures() {
local test_output="$1"
if [ "$TEST_FILTER_INITIALIZED" != true ]; then
# No baseline - count all failures
local all_signatures
all_signatures=$(extract_failure_signatures "$test_output")
if [ -z "$all_signatures" ]; then
echo "0"
else
printf '%s\n' "$all_signatures" | grep -c . 2>/dev/null || echo "0"
fi
return 0
fi
# Get current failure signatures
local current_signatures
current_signatures=$(extract_failure_signatures "$test_output")
# Find signatures that are in current but not in baseline (new failures)
local new_signatures
new_signatures=$(comm -13 \
<(printf '%s\n' "$BASELINE_TEST_FAILURES" | sort) \
<(printf '%s\n' "$current_signatures" | sort) \
2>/dev/null || echo "")
if [ -z "$new_signatures" ]; then
echo "0"
else
local count
count=$(printf '%s\n' "$new_signatures" | grep -c . 2>/dev/null || echo "0")
echo "$count" | tr -d '[:space:]'
fi
}

File diff suppressed because it is too large Load Diff