BMAD-METHOD/scripts/epic-execute-lib/test-failure-filter.sh

392 lines
13 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
#
# BMAD Epic Execute - Test Failure Filter Module
#
# Provides functions to:
# 1. Extract only failure details from test output (not passing tests)
# 2. Capture baseline failures before story execution
# 3. Compare to identify new failures introduced by current story
#
# This prevents prompt size explosion and focuses fix phases on relevant failures.
#
# Usage: Sourced by epic-execute.sh
#
# =============================================================================
# Test Failure Filter Variables
# =============================================================================
BASELINE_TEST_FAILURES=""
BASELINE_FAILURE_COUNT=0
TEST_FILTER_INITIALIZED=false
# Maximum size for test failure output in fix prompts (in bytes)
MAX_TEST_FAILURE_SIZE="${MAX_TEST_FAILURE_SIZE:-50000}" # 50KB default
# =============================================================================
# Test Output Filtering Functions
# =============================================================================
# Extract only failure-related output from test results
# Filters out passing test lines, keeps:
# - FAIL lines and their details
# - Error messages and stack traces
# - Summary lines
# Arguments:
# $1 - full test output
# Returns: filtered output (echoed)
extract_test_failures() {
local test_output="$1"
local filtered=""
# For Vitest/Jest/turbo output, use a more targeted extraction
# We want:
# 1. Lines containing "FAIL " (test failures)
# 2. Lines with AssertionError or expected/received blocks
# 3. Error location lines (file:line references)
# 4. The final summary line
#
# We DO NOT want:
# - Passing test lines (✓)
# - stderr warnings (React warnings, etc.)
# - Full stack traces from passing tests
# Extract actual FAIL test blocks with their assertion errors
# Use awk to capture FAIL blocks more intelligently
filtered=$(echo "$test_output" | awk '
BEGIN { in_fail_block = 0; fail_count = 0 }
# Start of a FAIL block - the actual failure report, not stderr
/^@.*:test:[[:space:]]+FAIL[[:space:]]/ {
in_fail_block = 1
fail_count++
print
next
}
# Assertion details (expected vs received)
/AssertionError:|expected.*to be|Expected|Received|expected.*to equal/ {
print
next
}
# Error location with line numbers
/.*:[0-9]+:[0-9]+/ {
print
next
}
# Source code context (numbered lines around error)
/^@.*:test:[[:space:]]+[0-9]+\|/ {
if (in_fail_block) print
next
}
# Keep the comparison markers
/^@.*:test:[[:space:]]+-[[:space:]]/ { if (in_fail_block) print; next }
/^@.*:test:[[:space:]]+\+[[:space:]]/ { if (in_fail_block) print; next }
# End of fail block indicators
/^@.*:test:[[:space:]]+⎯⎯⎯/ {
if (in_fail_block) print
in_fail_block = 0
next
}
# Summary lines - always keep
/Test Files.*failed|Tests.*failed/ {
print
next
}
# Blank line ends a fail block context
/^[[:space:]]*$/ {
if (in_fail_block && fail_count > 0) {
in_fail_block = 0
}
}
')
# If awk filtering produced too little, fall back to grep
local line_count
line_count=$(echo "$filtered" | wc -l | tr -d ' ')
if [ "$line_count" -lt 5 ]; then
# Minimal grep fallback - just get FAIL lines and summary
filtered=$(echo "$test_output" | grep -E \
"^@.*FAIL[[:space:]]|Test Files.*failed|Tests.*failed|AssertionError" \
2>/dev/null || echo "")
fi
# Always include the final summary line if present
local summary
summary=$(echo "$test_output" | grep -E "Test Files.*[0-9]+ failed.*Tests.*[0-9]+ failed" | tail -1)
if [ -n "$summary" ]; then
# Check if summary is already in filtered output
if ! echo "$filtered" | grep -qF "$summary"; then
filtered="$filtered"$'\n\n'"$summary"
fi
fi
echo "$filtered"
}
# Extract failure signatures for comparison
# Returns a sorted, deduplicated list of failing test identifiers
# Arguments:
# $1 - test output
# Returns: sorted failure signatures (one per line)
extract_failure_signatures() {
local test_output="$1"
# Extract test identifiers from FAIL lines
# Handles formats like:
# FAIL src/path/file.test.ts > Suite > Test Name
# FAIL src/path/file.test.ts
# @revive/web:test: FAIL src/path/file.test.ts (turbo output)
# The pattern matches FAIL anywhere in line (handles turbo prefix)
printf '%s\n' "$test_output" | grep -E "[[:space:]]FAIL[[:space:]]+" | \
sed 's/^.*FAIL[[:space:]]*//' | \
sort -u
}
# =============================================================================
# Baseline Management Functions
# =============================================================================
# Capture current test failure state as baseline before story execution
# Should be called at the start of each story's dev phase
# Arguments:
# $1 - story_id (for logging)
capture_failure_baseline() {
local story_id="${1:-unknown}"
if [ -z "$PROJECT_ROOT" ]; then
log_warn "Cannot capture failure baseline: PROJECT_ROOT not set"
return 1
fi
log "Capturing test failure baseline for $story_id..."
local test_output=""
# Run tests and capture output
if [ -f "$PROJECT_ROOT/package.json" ]; then
if grep -q '"test"' "$PROJECT_ROOT/package.json" 2>/dev/null; then
test_output=$(cd "$PROJECT_ROOT" && npm test 2>&1) || true
fi
elif [ -f "$PROJECT_ROOT/Cargo.toml" ]; then
test_output=$(cd "$PROJECT_ROOT" && cargo test 2>&1) || true
elif [ -f "$PROJECT_ROOT/go.mod" ]; then
test_output=$(cd "$PROJECT_ROOT" && go test ./... 2>&1) || true
elif [ -f "$PROJECT_ROOT/requirements.txt" ] || [ -f "$PROJECT_ROOT/pyproject.toml" ]; then
if command -v pytest >/dev/null 2>&1; then
test_output=$(cd "$PROJECT_ROOT" && pytest 2>&1) || true
fi
fi
# Extract and store baseline failures
BASELINE_TEST_FAILURES=$(extract_failure_signatures "$test_output")
# Count non-empty lines - use wc -l and trim whitespace for clean integer
if [ -z "$BASELINE_TEST_FAILURES" ]; then
BASELINE_FAILURE_COUNT=0
else
BASELINE_FAILURE_COUNT=$(printf '%s\n' "$BASELINE_TEST_FAILURES" | grep -c . 2>/dev/null || echo "0")
BASELINE_FAILURE_COUNT=$(echo "$BASELINE_FAILURE_COUNT" | tr -d '[:space:]')
fi
TEST_FILTER_INITIALIZED=true
if [ "$BASELINE_FAILURE_COUNT" -gt 0 ]; then
log_warn "Baseline has $BASELINE_FAILURE_COUNT pre-existing test failures"
else
log "Baseline captured: no pre-existing failures"
fi
return 0
}
# Compare current failures against baseline and return only NEW failures
# Arguments:
# $1 - current test output
# Returns: filtered output containing only new failures
get_new_failures_only() {
local current_output="$1"
if [ "$TEST_FILTER_INITIALIZED" != true ]; then
# No baseline - return all failures (filtered for size)
extract_test_failures "$current_output"
return 0
fi
# Get current failure signatures
local current_signatures
current_signatures=$(extract_failure_signatures "$current_output")
# Find signatures that are in current but not in baseline (new failures)
local new_signatures
new_signatures=$(comm -13 \
<(echo "$BASELINE_TEST_FAILURES" | sort) \
<(echo "$current_signatures" | sort) \
2>/dev/null || echo "$current_signatures")
local new_count
if [ -z "$new_signatures" ]; then
new_count=0
else
new_count=$(printf '%s\n' "$new_signatures" | grep -c . 2>/dev/null || echo "0")
new_count=$(echo "$new_count" | tr -d '[:space:]')
fi
if [ "$new_count" -eq 0 ]; then
# No new failures - all failures are pre-existing
echo "[INFO] All $BASELINE_FAILURE_COUNT failures are pre-existing from baseline."
echo "No new failures introduced by this story."
return 0
fi
# Extract full failure details for only the new failures
local filtered_output=""
local full_failures
full_failures=$(extract_test_failures "$current_output")
# For each new failure signature, include its full output
while IFS= read -r sig; do
[ -z "$sig" ] && continue
# Escape special regex characters in signature
local escaped_sig
escaped_sig=$(printf '%s' "$sig" | sed 's/[[\.*^$()+?{|]/\\&/g')
# Extract the block for this failure
local block
block=$(echo "$full_failures" | grep -A 50 "$escaped_sig" | head -60)
if [ -n "$block" ]; then
filtered_output+="$block"$'\n\n'
fi
done <<< "$new_signatures"
# Add summary
local total_current
if [ -z "$current_signatures" ]; then
total_current=0
else
total_current=$(printf '%s\n' "$current_signatures" | grep -c . 2>/dev/null || echo "0")
total_current=$(echo "$total_current" | tr -d '[:space:]')
fi
filtered_output+="
---
**Failure Summary:**
- New failures (this story): $new_count
- Pre-existing failures (baseline): $BASELINE_FAILURE_COUNT
- Total current failures: $total_current
Only the $new_count NEW failures above need to be fixed by this story.
Pre-existing failures from the baseline have been filtered out.
"
echo "$filtered_output"
}
# =============================================================================
# Truncation Functions
# =============================================================================
# Truncate test failure output to fit within size limits
# Preserves most relevant information (summary, first failures)
# Arguments:
# $1 - failure output
# $2 - max size (optional, defaults to MAX_TEST_FAILURE_SIZE)
# Returns: truncated output
truncate_test_failures() {
local failures="$1"
local max_size="${2:-$MAX_TEST_FAILURE_SIZE}"
local current_size
current_size=$(printf '%s' "$failures" | wc -c | tr -d ' ')
if [ "$current_size" -le "$max_size" ]; then
printf '%s' "$failures"
return 0
fi
# Truncate but preserve summary at the end
local summary
summary=$(echo "$failures" | tail -20)
local available=$((max_size - ${#summary} - 200)) # Reserve space for summary + notice
local truncated
truncated=$(printf '%s' "$failures" | head -c "$available")
printf '%s\n\n... [TEST OUTPUT TRUNCATED: %sB total, showing first %sB + summary] ...\n\n%s' \
"$truncated" "$current_size" "$available" "$summary"
}
# =============================================================================
# Main Filter Function (Used by Static Analysis Gate)
# =============================================================================
# Filter and prepare test failures for fix-phase prompt
# Combines all filtering: extracts failures, compares to baseline, truncates
# Arguments:
# $1 - full test output
# $2 - story_id (for logging)
# Returns: filtered, truncated failure output suitable for fix prompt
prepare_test_failures_for_fix() {
local test_output="$1"
local story_id="${2:-unknown}"
# Step 1: Get only new failures (if baseline exists)
local new_failures
new_failures=$(get_new_failures_only "$test_output")
# Step 2: Truncate if still too large
local final_output
final_output=$(truncate_test_failures "$new_failures")
local final_size
final_size=$(printf '%s' "$final_output" | wc -c | tr -d ' ')
[ "$VERBOSE" = true ] && log "Test failure output for $story_id: ${final_size}B (limit: ${MAX_TEST_FAILURE_SIZE}B)"
printf '%s' "$final_output"
}
# Count NEW test failures (not in baseline)
# Used by static analysis gate to decide pass/fail
# Arguments:
# $1 - full test output
# Returns: count of NEW failures (0 if all failures are pre-existing)
count_new_test_failures() {
local test_output="$1"
if [ "$TEST_FILTER_INITIALIZED" != true ]; then
# No baseline - count all failures
local all_signatures
all_signatures=$(extract_failure_signatures "$test_output")
if [ -z "$all_signatures" ]; then
echo "0"
else
printf '%s\n' "$all_signatures" | grep -c . 2>/dev/null || echo "0"
fi
return 0
fi
# Get current failure signatures
local current_signatures
current_signatures=$(extract_failure_signatures "$test_output")
# Find signatures that are in current but not in baseline (new failures)
local new_signatures
new_signatures=$(comm -13 \
<(printf '%s\n' "$BASELINE_TEST_FAILURES" | sort) \
<(printf '%s\n' "$current_signatures" | sort) \
2>/dev/null || echo "")
if [ -z "$new_signatures" ]; then
echo "0"
else
local count
count=$(printf '%s\n' "$new_signatures" | grep -c . 2>/dev/null || echo "0")
echo "$count" | tr -d '[:space:]'
fi
}