feat(epic-execute): add test failure filtering and sync improvements from revive-dev
Port improvements developed in revive-dev: new test-failure-filter module for baseline-aware failure detection and prompt size management, broken pipe fixes in regression-gate, and log persistence in epic-execute. Paths adapted to BMAD-METHOD repo structure. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
efc0bdd56f
commit
87223692d3
|
|
@ -31,16 +31,17 @@ extract_test_count() {
|
|||
|
||||
# Method 1: Try JSON output first (most reliable)
|
||||
# Jest with --json, Vitest with --reporter=json
|
||||
# Note: Use printf and redirect stderr to avoid broken pipe warnings when jq exits early
|
||||
if command -v jq >/dev/null 2>&1; then
|
||||
# Jest JSON format
|
||||
count=$(echo "$test_output" | jq -r '.numPassedTests // empty' 2>/dev/null)
|
||||
count=$(printf '%s' "$test_output" 2>/dev/null | jq -r '.numPassedTests // empty' 2>/dev/null) || true
|
||||
if [ -n "$count" ] && [ "$count" != "null" ] && [ "$count" -gt 0 ] 2>/dev/null; then
|
||||
echo "$count"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Vitest JSON format (aggregate from testResults)
|
||||
count=$(echo "$test_output" | jq -r '[.testResults[]?.assertionResults[]? | select(.status == "passed")] | length // empty' 2>/dev/null)
|
||||
count=$(printf '%s' "$test_output" 2>/dev/null | jq -r '[.testResults[]?.assertionResults[]? | select(.status == "passed")] | length // empty' 2>/dev/null) || true
|
||||
if [ -n "$count" ] && [ "$count" != "null" ] && [ "$count" -gt 0 ] 2>/dev/null; then
|
||||
echo "$count"
|
||||
return 0
|
||||
|
|
|
|||
|
|
@ -0,0 +1,391 @@
|
|||
#!/bin/bash
|
||||
#
|
||||
# BMAD Epic Execute - Test Failure Filter Module
|
||||
#
|
||||
# Provides functions to:
|
||||
# 1. Extract only failure details from test output (not passing tests)
|
||||
# 2. Capture baseline failures before story execution
|
||||
# 3. Compare to identify new failures introduced by current story
|
||||
#
|
||||
# This prevents prompt size explosion and focuses fix phases on relevant failures.
|
||||
#
|
||||
# Usage: Sourced by epic-execute.sh
|
||||
#
|
||||
|
||||
# =============================================================================
|
||||
# Test Failure Filter Variables
|
||||
# =============================================================================
|
||||
|
||||
BASELINE_TEST_FAILURES=""
|
||||
BASELINE_FAILURE_COUNT=0
|
||||
TEST_FILTER_INITIALIZED=false
|
||||
|
||||
# Maximum size for test failure output in fix prompts (in bytes)
|
||||
MAX_TEST_FAILURE_SIZE="${MAX_TEST_FAILURE_SIZE:-50000}" # 50KB default
|
||||
|
||||
# =============================================================================
|
||||
# Test Output Filtering Functions
|
||||
# =============================================================================
|
||||
|
||||
# Extract only failure-related output from test results
|
||||
# Filters out passing test lines, keeps:
|
||||
# - FAIL lines and their details
|
||||
# - Error messages and stack traces
|
||||
# - Summary lines
|
||||
# Arguments:
|
||||
# $1 - full test output
|
||||
# Returns: filtered output (echoed)
|
||||
extract_test_failures() {
|
||||
local test_output="$1"
|
||||
local filtered=""
|
||||
|
||||
# For Vitest/Jest/turbo output, use a more targeted extraction
|
||||
# We want:
|
||||
# 1. Lines containing "FAIL " (test failures)
|
||||
# 2. Lines with AssertionError or expected/received blocks
|
||||
# 3. Error location lines (file:line references)
|
||||
# 4. The final summary line
|
||||
#
|
||||
# We DO NOT want:
|
||||
# - Passing test lines (✓)
|
||||
# - stderr warnings (React warnings, etc.)
|
||||
# - Full stack traces from passing tests
|
||||
|
||||
# Extract actual FAIL test blocks with their assertion errors
|
||||
# Use awk to capture FAIL blocks more intelligently
|
||||
filtered=$(echo "$test_output" | awk '
|
||||
BEGIN { in_fail_block = 0; fail_count = 0 }
|
||||
|
||||
# Start of a FAIL block - the actual failure report, not stderr
|
||||
/^@.*:test:[[:space:]]+FAIL[[:space:]]/ {
|
||||
in_fail_block = 1
|
||||
fail_count++
|
||||
print
|
||||
next
|
||||
}
|
||||
|
||||
# Assertion details (expected vs received)
|
||||
/AssertionError:|expected.*to be|Expected|Received|expected.*to equal/ {
|
||||
print
|
||||
next
|
||||
}
|
||||
|
||||
# Error location with line numbers
|
||||
/❯.*:[0-9]+:[0-9]+/ {
|
||||
print
|
||||
next
|
||||
}
|
||||
|
||||
# Source code context (numbered lines around error)
|
||||
/^@.*:test:[[:space:]]+[0-9]+\|/ {
|
||||
if (in_fail_block) print
|
||||
next
|
||||
}
|
||||
|
||||
# Keep the comparison markers
|
||||
/^@.*:test:[[:space:]]+-[[:space:]]/ { if (in_fail_block) print; next }
|
||||
/^@.*:test:[[:space:]]+\+[[:space:]]/ { if (in_fail_block) print; next }
|
||||
|
||||
# End of fail block indicators
|
||||
/^@.*:test:[[:space:]]+⎯⎯⎯/ {
|
||||
if (in_fail_block) print
|
||||
in_fail_block = 0
|
||||
next
|
||||
}
|
||||
|
||||
# Summary lines - always keep
|
||||
/Test Files.*failed|Tests.*failed/ {
|
||||
print
|
||||
next
|
||||
}
|
||||
|
||||
# Blank line ends a fail block context
|
||||
/^[[:space:]]*$/ {
|
||||
if (in_fail_block && fail_count > 0) {
|
||||
in_fail_block = 0
|
||||
}
|
||||
}
|
||||
')
|
||||
|
||||
# If awk filtering produced too little, fall back to grep
|
||||
local line_count
|
||||
line_count=$(echo "$filtered" | wc -l | tr -d ' ')
|
||||
|
||||
if [ "$line_count" -lt 5 ]; then
|
||||
# Minimal grep fallback - just get FAIL lines and summary
|
||||
filtered=$(echo "$test_output" | grep -E \
|
||||
"^@.*FAIL[[:space:]]|Test Files.*failed|Tests.*failed|AssertionError" \
|
||||
2>/dev/null || echo "")
|
||||
fi
|
||||
|
||||
# Always include the final summary line if present
|
||||
local summary
|
||||
summary=$(echo "$test_output" | grep -E "Test Files.*[0-9]+ failed.*Tests.*[0-9]+ failed" | tail -1)
|
||||
if [ -n "$summary" ]; then
|
||||
# Check if summary is already in filtered output
|
||||
if ! echo "$filtered" | grep -qF "$summary"; then
|
||||
filtered="$filtered"$'\n\n'"$summary"
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "$filtered"
|
||||
}
|
||||
|
||||
# Extract failure signatures for comparison
|
||||
# Returns a sorted, deduplicated list of failing test identifiers
|
||||
# Arguments:
|
||||
# $1 - test output
|
||||
# Returns: sorted failure signatures (one per line)
|
||||
extract_failure_signatures() {
|
||||
local test_output="$1"
|
||||
|
||||
# Extract test identifiers from FAIL lines
|
||||
# Handles formats like:
|
||||
# FAIL src/path/file.test.ts > Suite > Test Name
|
||||
# FAIL src/path/file.test.ts
|
||||
# @revive/web:test: FAIL src/path/file.test.ts (turbo output)
|
||||
# The pattern matches FAIL anywhere in line (handles turbo prefix)
|
||||
printf '%s\n' "$test_output" | grep -E "[[:space:]]FAIL[[:space:]]+" | \
|
||||
sed 's/^.*FAIL[[:space:]]*//' | \
|
||||
sort -u
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Baseline Management Functions
|
||||
# =============================================================================
|
||||
|
||||
# Capture current test failure state as baseline before story execution
|
||||
# Should be called at the start of each story's dev phase
|
||||
# Arguments:
|
||||
# $1 - story_id (for logging)
|
||||
capture_failure_baseline() {
|
||||
local story_id="${1:-unknown}"
|
||||
|
||||
if [ -z "$PROJECT_ROOT" ]; then
|
||||
log_warn "Cannot capture failure baseline: PROJECT_ROOT not set"
|
||||
return 1
|
||||
fi
|
||||
|
||||
log "Capturing test failure baseline for $story_id..."
|
||||
|
||||
local test_output=""
|
||||
|
||||
# Run tests and capture output
|
||||
if [ -f "$PROJECT_ROOT/package.json" ]; then
|
||||
if grep -q '"test"' "$PROJECT_ROOT/package.json" 2>/dev/null; then
|
||||
test_output=$(cd "$PROJECT_ROOT" && npm test 2>&1) || true
|
||||
fi
|
||||
elif [ -f "$PROJECT_ROOT/Cargo.toml" ]; then
|
||||
test_output=$(cd "$PROJECT_ROOT" && cargo test 2>&1) || true
|
||||
elif [ -f "$PROJECT_ROOT/go.mod" ]; then
|
||||
test_output=$(cd "$PROJECT_ROOT" && go test ./... 2>&1) || true
|
||||
elif [ -f "$PROJECT_ROOT/requirements.txt" ] || [ -f "$PROJECT_ROOT/pyproject.toml" ]; then
|
||||
if command -v pytest >/dev/null 2>&1; then
|
||||
test_output=$(cd "$PROJECT_ROOT" && pytest 2>&1) || true
|
||||
fi
|
||||
fi
|
||||
|
||||
# Extract and store baseline failures
|
||||
BASELINE_TEST_FAILURES=$(extract_failure_signatures "$test_output")
|
||||
# Count non-empty lines - use wc -l and trim whitespace for clean integer
|
||||
if [ -z "$BASELINE_TEST_FAILURES" ]; then
|
||||
BASELINE_FAILURE_COUNT=0
|
||||
else
|
||||
BASELINE_FAILURE_COUNT=$(printf '%s\n' "$BASELINE_TEST_FAILURES" | grep -c . 2>/dev/null || echo "0")
|
||||
BASELINE_FAILURE_COUNT=$(echo "$BASELINE_FAILURE_COUNT" | tr -d '[:space:]')
|
||||
fi
|
||||
TEST_FILTER_INITIALIZED=true
|
||||
|
||||
if [ "$BASELINE_FAILURE_COUNT" -gt 0 ]; then
|
||||
log_warn "Baseline has $BASELINE_FAILURE_COUNT pre-existing test failures"
|
||||
else
|
||||
log "Baseline captured: no pre-existing failures"
|
||||
fi
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
# Compare current failures against baseline and return only NEW failures
|
||||
# Arguments:
|
||||
# $1 - current test output
|
||||
# Returns: filtered output containing only new failures
|
||||
get_new_failures_only() {
|
||||
local current_output="$1"
|
||||
|
||||
if [ "$TEST_FILTER_INITIALIZED" != true ]; then
|
||||
# No baseline - return all failures (filtered for size)
|
||||
extract_test_failures "$current_output"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Get current failure signatures
|
||||
local current_signatures
|
||||
current_signatures=$(extract_failure_signatures "$current_output")
|
||||
|
||||
# Find signatures that are in current but not in baseline (new failures)
|
||||
local new_signatures
|
||||
new_signatures=$(comm -13 \
|
||||
<(echo "$BASELINE_TEST_FAILURES" | sort) \
|
||||
<(echo "$current_signatures" | sort) \
|
||||
2>/dev/null || echo "$current_signatures")
|
||||
|
||||
local new_count
|
||||
if [ -z "$new_signatures" ]; then
|
||||
new_count=0
|
||||
else
|
||||
new_count=$(printf '%s\n' "$new_signatures" | grep -c . 2>/dev/null || echo "0")
|
||||
new_count=$(echo "$new_count" | tr -d '[:space:]')
|
||||
fi
|
||||
|
||||
if [ "$new_count" -eq 0 ]; then
|
||||
# No new failures - all failures are pre-existing
|
||||
echo "[INFO] All $BASELINE_FAILURE_COUNT failures are pre-existing from baseline."
|
||||
echo "No new failures introduced by this story."
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Extract full failure details for only the new failures
|
||||
local filtered_output=""
|
||||
local full_failures
|
||||
full_failures=$(extract_test_failures "$current_output")
|
||||
|
||||
# For each new failure signature, include its full output
|
||||
while IFS= read -r sig; do
|
||||
[ -z "$sig" ] && continue
|
||||
# Escape special regex characters in signature
|
||||
local escaped_sig
|
||||
escaped_sig=$(printf '%s' "$sig" | sed 's/[[\.*^$()+?{|]/\\&/g')
|
||||
# Extract the block for this failure
|
||||
local block
|
||||
block=$(echo "$full_failures" | grep -A 50 "$escaped_sig" | head -60)
|
||||
if [ -n "$block" ]; then
|
||||
filtered_output+="$block"$'\n\n'
|
||||
fi
|
||||
done <<< "$new_signatures"
|
||||
|
||||
# Add summary
|
||||
local total_current
|
||||
if [ -z "$current_signatures" ]; then
|
||||
total_current=0
|
||||
else
|
||||
total_current=$(printf '%s\n' "$current_signatures" | grep -c . 2>/dev/null || echo "0")
|
||||
total_current=$(echo "$total_current" | tr -d '[:space:]')
|
||||
fi
|
||||
filtered_output+="
|
||||
---
|
||||
**Failure Summary:**
|
||||
- New failures (this story): $new_count
|
||||
- Pre-existing failures (baseline): $BASELINE_FAILURE_COUNT
|
||||
- Total current failures: $total_current
|
||||
|
||||
Only the $new_count NEW failures above need to be fixed by this story.
|
||||
Pre-existing failures from the baseline have been filtered out.
|
||||
"
|
||||
|
||||
echo "$filtered_output"
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Truncation Functions
|
||||
# =============================================================================
|
||||
|
||||
# Truncate test failure output to fit within size limits
|
||||
# Preserves most relevant information (summary, first failures)
|
||||
# Arguments:
|
||||
# $1 - failure output
|
||||
# $2 - max size (optional, defaults to MAX_TEST_FAILURE_SIZE)
|
||||
# Returns: truncated output
|
||||
truncate_test_failures() {
|
||||
local failures="$1"
|
||||
local max_size="${2:-$MAX_TEST_FAILURE_SIZE}"
|
||||
|
||||
local current_size
|
||||
current_size=$(printf '%s' "$failures" | wc -c | tr -d ' ')
|
||||
|
||||
if [ "$current_size" -le "$max_size" ]; then
|
||||
printf '%s' "$failures"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Truncate but preserve summary at the end
|
||||
local summary
|
||||
summary=$(echo "$failures" | tail -20)
|
||||
|
||||
local available=$((max_size - ${#summary} - 200)) # Reserve space for summary + notice
|
||||
|
||||
local truncated
|
||||
truncated=$(printf '%s' "$failures" | head -c "$available")
|
||||
|
||||
printf '%s\n\n... [TEST OUTPUT TRUNCATED: %sB total, showing first %sB + summary] ...\n\n%s' \
|
||||
"$truncated" "$current_size" "$available" "$summary"
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Main Filter Function (Used by Static Analysis Gate)
|
||||
# =============================================================================
|
||||
|
||||
# Filter and prepare test failures for fix-phase prompt
|
||||
# Combines all filtering: extracts failures, compares to baseline, truncates
|
||||
# Arguments:
|
||||
# $1 - full test output
|
||||
# $2 - story_id (for logging)
|
||||
# Returns: filtered, truncated failure output suitable for fix prompt
|
||||
prepare_test_failures_for_fix() {
|
||||
local test_output="$1"
|
||||
local story_id="${2:-unknown}"
|
||||
|
||||
# Step 1: Get only new failures (if baseline exists)
|
||||
local new_failures
|
||||
new_failures=$(get_new_failures_only "$test_output")
|
||||
|
||||
# Step 2: Truncate if still too large
|
||||
local final_output
|
||||
final_output=$(truncate_test_failures "$new_failures")
|
||||
|
||||
local final_size
|
||||
final_size=$(printf '%s' "$final_output" | wc -c | tr -d ' ')
|
||||
|
||||
[ "$VERBOSE" = true ] && log "Test failure output for $story_id: ${final_size}B (limit: ${MAX_TEST_FAILURE_SIZE}B)"
|
||||
|
||||
printf '%s' "$final_output"
|
||||
}
|
||||
|
||||
# Count NEW test failures (not in baseline)
|
||||
# Used by static analysis gate to decide pass/fail
|
||||
# Arguments:
|
||||
# $1 - full test output
|
||||
# Returns: count of NEW failures (0 if all failures are pre-existing)
|
||||
count_new_test_failures() {
|
||||
local test_output="$1"
|
||||
|
||||
if [ "$TEST_FILTER_INITIALIZED" != true ]; then
|
||||
# No baseline - count all failures
|
||||
local all_signatures
|
||||
all_signatures=$(extract_failure_signatures "$test_output")
|
||||
if [ -z "$all_signatures" ]; then
|
||||
echo "0"
|
||||
else
|
||||
printf '%s\n' "$all_signatures" | grep -c . 2>/dev/null || echo "0"
|
||||
fi
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Get current failure signatures
|
||||
local current_signatures
|
||||
current_signatures=$(extract_failure_signatures "$test_output")
|
||||
|
||||
# Find signatures that are in current but not in baseline (new failures)
|
||||
local new_signatures
|
||||
new_signatures=$(comm -13 \
|
||||
<(printf '%s\n' "$BASELINE_TEST_FAILURES" | sort) \
|
||||
<(printf '%s\n' "$current_signatures" | sort) \
|
||||
2>/dev/null || echo "")
|
||||
|
||||
if [ -z "$new_signatures" ]; then
|
||||
echo "0"
|
||||
else
|
||||
local count
|
||||
count=$(printf '%s\n' "$new_signatures" | grep -c . 2>/dev/null || echo "0")
|
||||
echo "$count" | tr -d '[:space:]'
|
||||
fi
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue