BMAD-METHOD/scripts/uat-validate.sh

1306 lines
43 KiB
Bash
Executable File

#!/bin/bash
#
# BMAD UAT Validate - Automated UAT Scenario Execution with Self-Healing Fix Loop
#
# Usage: ./uat-validate.sh <epic-id> [options]
#
# Options:
# --gate-mode=MODE Validation mode: quick|full|skip (default: quick)
# --max-retries=N Max fix attempts before halt (default: 2)
# --skip-manual Skip manual-only scenarios (default: skip)
# --verbose Show detailed output
# --dry-run Show what would be executed without running
# --timeout=SECONDS Timeout per scenario (default: 30)
#
# Exit Codes:
# 0 - UAT PASS (all automatable scenarios passed)
# 1 - UAT FAIL (fixable, retries remain or self-heal succeeded)
# 2 - UAT FAIL (max retries exceeded)
#
set -e
# Allow nested Claude Code sessions (when launched from within Claude Code)
unset CLAUDECODE 2>/dev/null || true
# =============================================================================
# Section 1: Configuration
# =============================================================================
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
BMAD_DIR="$PROJECT_ROOT/.bmad"
UAT_DIR="$PROJECT_ROOT/docs/uat"
SPRINT_ARTIFACTS_DIR="$PROJECT_ROOT/docs/sprint-artifacts"
METRICS_DIR="$SPRINT_ARTIFACTS_DIR/metrics"
FIX_DIR="$SPRINT_ARTIFACTS_DIR/uat-fixes"
STORIES_DIR="$PROJECT_ROOT/docs/stories"
LOG_FILE="/tmp/bmad-uat-validate-$$.log"
# Default configuration
UAT_GATE_MODE="quick"
MAX_RETRIES=2
SKIP_MANUAL=true
VERBOSE=false
DRY_RUN=false
TIMEOUT_SECONDS=30
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
BOLD='\033[1m'
NC='\033[0m' # No Color
# =============================================================================
# Section 2: Helper Functions
# =============================================================================
log() {
echo -e "${BLUE}[UAT]${NC} $1"
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" >> "$LOG_FILE"
}
log_success() {
echo -e "${GREEN}[PASS]${NC} $1"
echo "[$(date '+%Y-%m-%d %H:%M:%S')] [PASS] $1" >> "$LOG_FILE"
}
log_error() {
echo -e "${RED}[FAIL]${NC} $1"
echo "[$(date '+%Y-%m-%d %H:%M:%S')] [FAIL] $1" >> "$LOG_FILE"
}
log_warn() {
echo -e "${YELLOW}[!]${NC} $1"
echo "[$(date '+%Y-%m-%d %H:%M:%S')] [WARN] $1" >> "$LOG_FILE"
}
log_section() {
echo ""
echo -e "${BOLD}───────────────────────────────────────────────────────────${NC}"
echo -e "${BOLD} $1${NC}"
echo -e "${BOLD}───────────────────────────────────────────────────────────${NC}"
}
log_header() {
echo ""
echo -e "${CYAN}${BOLD}═══════════════════════════════════════════════════════════${NC}"
echo -e "${CYAN}${BOLD} $1${NC}"
echo -e "${CYAN}${BOLD}═══════════════════════════════════════════════════════════${NC}"
echo ""
}
# =============================================================================
# Section 3: Argument Parsing
# =============================================================================
EPIC_ID=""
while [[ $# -gt 0 ]]; do
case $1 in
--gate-mode=*)
UAT_GATE_MODE="${1#*=}"
shift
;;
--max-retries=*)
MAX_RETRIES="${1#*=}"
shift
;;
--skip-manual)
SKIP_MANUAL=true
shift
;;
--include-manual)
SKIP_MANUAL=false
shift
;;
--verbose)
VERBOSE=true
shift
;;
--dry-run)
DRY_RUN=true
shift
;;
--timeout=*)
TIMEOUT_SECONDS="${1#*=}"
shift
;;
-*)
echo "Unknown option: $1"
exit 1
;;
*)
EPIC_ID="$1"
shift
;;
esac
done
if [ -z "$EPIC_ID" ]; then
echo "Usage: $0 <epic-id> [options]"
echo ""
echo "Options:"
echo " --gate-mode=MODE Validation mode: quick|full|skip (default: quick)"
echo " --max-retries=N Max fix attempts before halt (default: 2)"
echo " --skip-manual Skip manual-only scenarios (default)"
echo " --include-manual Include manual scenarios in checklist"
echo " --verbose Detailed output"
echo " --dry-run Show what would be executed"
echo " --timeout=SECONDS Timeout per scenario (default: 30)"
echo ""
echo "Exit Codes:"
echo " 0 - UAT PASS"
echo " 1 - UAT FAIL (fixable)"
echo " 2 - UAT FAIL (max retries exceeded)"
exit 1
fi
# Validate gate mode
if [[ ! "$UAT_GATE_MODE" =~ ^(quick|full|skip)$ ]]; then
echo "Invalid gate mode: $UAT_GATE_MODE"
echo "Valid modes: quick, full, skip"
exit 1
fi
# =============================================================================
# Section 4: UAT Document Loading
# =============================================================================
load_uat_document() {
local epic_id="$1"
# Find UAT document (try multiple patterns)
UAT_FILE=""
for pattern in "epic-${epic_id}-uat.md" "epic-0${epic_id}-uat.md" "${epic_id}-uat.md"; do
found=$(find "$UAT_DIR" -name "$pattern" 2>/dev/null | head -1)
if [ -n "$found" ]; then
UAT_FILE="$found"
break
fi
done
if [ -z "$UAT_FILE" ] || [ ! -f "$UAT_FILE" ]; then
log_error "UAT document not found for Epic $epic_id"
log_error "Searched in: $UAT_DIR"
log_error "Expected: epic-${epic_id}-uat.md"
return 1
fi
log "Found UAT document: $UAT_FILE"
# Validate structure - check for scenarios section
if ! grep -qE "^##.*[Ss]cenario|^##.*[Tt]est|^##.*[Cc]riteria" "$UAT_FILE"; then
log_warn "UAT document may not have standard scenario sections"
fi
# Count scenario blocks (lines starting with ### or numbered items under Test Scenarios)
SCENARIO_COUNT=$(grep -cE "^###|^[0-9]+\." "$UAT_FILE" 2>/dev/null || echo "0")
log "Found approximately $SCENARIO_COUNT scenario entries"
return 0
}
# =============================================================================
# Section 5: Scenario Classification
# =============================================================================
# Arrays to store classified scenarios
declare -a AUTOMATABLE_SCENARIOS
declare -a SEMI_AUTO_SCENARIOS
declare -a MANUAL_SCENARIOS
classify_scenarios() {
local uat_file="$1"
# Reset arrays
AUTOMATABLE_SCENARIOS=()
SEMI_AUTO_SCENARIOS=()
MANUAL_SCENARIOS=()
# Read the UAT file and extract scenario blocks
local current_scenario=""
local current_name=""
local in_scenario=false
local scenario_num=0
while IFS= read -r line; do
# Detect scenario headers (### or numbered items)
if [[ "$line" =~ ^###[[:space:]]*(.*) ]] || [[ "$line" =~ ^([0-9]+)\.[[:space:]]+(.*) ]]; then
# Save previous scenario if exists
if [ -n "$current_scenario" ]; then
classify_single_scenario "$scenario_num" "$current_name" "$current_scenario"
fi
# Start new scenario
((scenario_num++))
if [[ "$line" =~ ^###[[:space:]]*(.*) ]]; then
current_name="${BASH_REMATCH[1]}"
else
current_name="${BASH_REMATCH[2]}"
fi
current_scenario="$line"
in_scenario=true
elif [ "$in_scenario" = true ]; then
# Continue accumulating scenario content
current_scenario+=$'\n'"$line"
fi
done < "$uat_file"
# Handle last scenario
if [ -n "$current_scenario" ]; then
classify_single_scenario "$scenario_num" "$current_name" "$current_scenario"
fi
log "Classification complete:"
log " Automatable: ${#AUTOMATABLE_SCENARIOS[@]}"
log " Semi-auto: ${#SEMI_AUTO_SCENARIOS[@]}"
log " Manual: ${#MANUAL_SCENARIOS[@]}"
}
classify_single_scenario() {
local id="$1"
local name="$2"
local content="$3"
# Check for automatable indicators
if echo "$content" | grep -qiE 'npx|npm run|yarn|node |curl |wget |pytest|jest|vitest|--version|/health|/api/|exit code|returns [0-9]|\.sh |bash '; then
# Extract command from code block if present
local cmd=""
cmd=$(echo "$content" | grep -oE '`[^`]+`' | head -1 | tr -d '`')
if [ -z "$cmd" ]; then
cmd=$(echo "$content" | grep -oE 'npx [a-zA-Z0-9_-]+.*|npm run [a-zA-Z0-9_:-]+.*|curl [^[:space:]]+.*' | head -1)
fi
AUTOMATABLE_SCENARIOS+=("$id|$name|$cmd")
[ "$VERBOSE" = true ] && log " [AUTO] Scenario $id: $name"
# Check for semi-automated indicators
elif echo "$content" | grep -qiE 'test-send|email|inbox|check your|verify.*manually|setup.*first|start.*server'; then
SEMI_AUTO_SCENARIOS+=("$id|$name|")
[ "$VERBOSE" = true ] && log " [SEMI] Scenario $id: $name"
# Everything else is manual
else
MANUAL_SCENARIOS+=("$id|$name|")
[ "$VERBOSE" = true ] && log " [MANUAL] Scenario $id: $name"
fi
}
# =============================================================================
# Section 5.5: Human Intervention Detection
# =============================================================================
# Arrays to store human intervention items
declare -a HUMAN_INTERVENTION_BLOCKING
declare -a HUMAN_INTERVENTION_WARNING
# Patterns that indicate human intervention is required
BLOCKING_PATTERNS=(
"EACCES|permission denied"
"\.env|environment variable|not set|undefined|not defined"
"API[_-]?KEY|SECRET|TOKEN.*required|missing|invalid"
"authentication failed|unauthorized|401|403"
"license|subscription|quota exceeded"
"EPERM|operation not permitted"
"credentials.*required|credentials.*missing"
)
WARNING_PATTERNS=(
"connection refused|ECONNREFUSED|ETIMEDOUT|timeout"
"check.*inbox|verify.*email|manual.*verification"
"relation.*does not exist|migration|table.*not found"
"deprecated|warning:"
"rate limit|throttl"
"service unavailable|503"
"could not connect|connection failed"
)
detect_human_intervention() {
local error_output="$1"
local scenario_id="$2"
local scenario_name="$3"
# Check for BLOCKING patterns
for pattern in "${BLOCKING_PATTERNS[@]}"; do
if echo "$error_output" | grep -qiE "$pattern"; then
local matched_line
matched_line=$(echo "$error_output" | grep -iE "$pattern" | head -1)
HUMAN_INTERVENTION_BLOCKING+=("$scenario_id|$scenario_name|$matched_line")
[ "$VERBOSE" = true ] && log_warn " [BLOCKING] Detected: $matched_line"
fi
done
# Check for WARNING patterns
for pattern in "${WARNING_PATTERNS[@]}"; do
if echo "$error_output" | grep -qiE "$pattern"; then
local matched_line
matched_line=$(echo "$error_output" | grep -iE "$pattern" | head -1)
HUMAN_INTERVENTION_WARNING+=("$scenario_id|$scenario_name|$matched_line")
[ "$VERBOSE" = true ] && log_warn " [WARNING] Detected: $matched_line"
fi
done
}
analyze_root_cause() {
local error_output="$1"
local exit_code="$2"
# Analyze error patterns and return a hint
if echo "$error_output" | grep -qiE "\.env|environment variable|not set"; then
echo "Missing environment configuration. Check .env file or .env.example for required variables."
elif echo "$error_output" | grep -qiE "API[_-]?KEY|SECRET|TOKEN"; then
echo "Missing or invalid API credentials. Verify API keys are correctly configured."
elif echo "$error_output" | grep -qiE "connection refused|ECONNREFUSED"; then
echo "Service connection failed. Ensure the required service is running (database, redis, etc.)."
elif echo "$error_output" | grep -qiE "relation.*does not exist|table.*not found"; then
echo "Database schema issue. Run migrations or check database setup."
elif echo "$error_output" | grep -qiE "permission denied|EACCES|EPERM"; then
echo "Permission issue. Check file/directory permissions or run with appropriate privileges."
elif echo "$error_output" | grep -qiE "timeout|ETIMEDOUT"; then
echo "Operation timed out. Check network connectivity or increase timeout."
elif [ "$exit_code" -eq 1 ]; then
echo "Command failed with exit code 1. Check the error output for specific details."
elif [ "$exit_code" -eq 124 ]; then
echo "Command timed out. Consider increasing timeout or checking for blocking operations."
else
echo "Analyze error output above. Exit code: $exit_code"
fi
}
# =============================================================================
# Section 5.6: Story Context Extraction
# =============================================================================
extract_story_context() {
local epic_id="$1"
local output_file="$2"
log "Extracting story context for Epic $epic_id..."
# Find story files for this epic
local story_files=()
for pattern in "${epic_id}-" "epic-${epic_id}-" "0${epic_id}-"; do
while IFS= read -r -d '' file; do
story_files+=("$file")
done < <(find "$STORIES_DIR" -name "${pattern}*.md" -print0 2>/dev/null)
done
if [ ${#story_files[@]} -eq 0 ]; then
log_warn "No story files found for Epic $epic_id in $STORIES_DIR"
echo "No story files found for this epic." >> "$output_file"
return 1
fi
log "Found ${#story_files[@]} story file(s)"
echo "## Story Context" >> "$output_file"
echo "" >> "$output_file"
for story_file in "${story_files[@]}"; do
local story_name
story_name=$(basename "$story_file" .md)
echo "### $story_name" >> "$output_file"
echo "" >> "$output_file"
# Extract acceptance criteria section
local in_ac_section=false
local ac_content=""
while IFS= read -r line; do
# Detect start of acceptance criteria section
if echo "$line" | grep -qiE "^##.*[Aa]cceptance [Cc]riteria|^##.*AC"; then
in_ac_section=true
ac_content="**Acceptance Criteria:**"$'\n'
continue
fi
# Detect end of section (next ## header)
if [ "$in_ac_section" = true ] && echo "$line" | grep -qE "^##[^#]"; then
in_ac_section=false
fi
# Accumulate content
if [ "$in_ac_section" = true ]; then
ac_content+="$line"$'\n'
fi
done < "$story_file"
if [ -n "$ac_content" ]; then
echo "$ac_content" >> "$output_file"
else
echo "*No acceptance criteria section found in this story.*" >> "$output_file"
fi
echo "" >> "$output_file"
# Extract Dev Agent Record section (implementation notes)
local in_dar_section=false
local dar_content=""
while IFS= read -r line; do
# Detect start of Dev Agent Record section
if echo "$line" | grep -qiE "^##.*[Dd]ev [Aa]gent [Rr]ecord|^##.*Implementation [Nn]otes"; then
in_dar_section=true
dar_content="**Dev Agent Record (Implementation Notes):**"$'\n'
continue
fi
# Detect end of section (next ## header)
if [ "$in_dar_section" = true ] && echo "$line" | grep -qE "^##[^#]"; then
in_dar_section=false
fi
# Accumulate content
if [ "$in_dar_section" = true ]; then
dar_content+="$line"$'\n'
fi
done < "$story_file"
if [ -n "$dar_content" ]; then
echo "$dar_content" >> "$output_file"
fi
echo "" >> "$output_file"
echo "---" >> "$output_file"
echo "" >> "$output_file"
done
return 0
}
# =============================================================================
# Section 6: Scenario Execution
# =============================================================================
# Arrays to store results
declare -a PASSED_SCENARIOS
declare -a FAILED_SCENARIOS
declare -a FAILED_DETAILS
execute_scenarios() {
local gate_mode="$1"
# Reset results
PASSED_SCENARIOS=()
FAILED_SCENARIOS=()
FAILED_DETAILS=()
# Skip mode - pass automatically
if [ "$gate_mode" = "skip" ]; then
log "Gate mode: skip - bypassing scenario execution"
echo "UAT_GATE_RESULT: PASS"
echo "UAT_SCENARIOS_PASSED: 0/0 (skipped)"
return 0
fi
# Select scenarios based on gate mode
local scenarios_to_run=()
if [ "$gate_mode" = "quick" ]; then
scenarios_to_run=("${AUTOMATABLE_SCENARIOS[@]}")
elif [ "$gate_mode" = "full" ]; then
scenarios_to_run=("${AUTOMATABLE_SCENARIOS[@]}" "${SEMI_AUTO_SCENARIOS[@]}")
fi
if [ ${#scenarios_to_run[@]} -eq 0 ]; then
log_warn "No automatable scenarios found - gate passes by default"
echo "UAT_GATE_RESULT: PASS"
echo "UAT_SCENARIOS_PASSED: 0/0 (none automatable)"
return 0
fi
log_section "Executing ${#scenarios_to_run[@]} Scenarios"
for scenario_entry in "${scenarios_to_run[@]}"; do
IFS='|' read -r scenario_id scenario_name scenario_cmd <<< "$scenario_entry"
execute_single_scenario "$scenario_id" "$scenario_name" "$scenario_cmd"
done
# Report results
local total=${#scenarios_to_run[@]}
local passed=${#PASSED_SCENARIOS[@]}
local failed=${#FAILED_SCENARIOS[@]}
echo ""
log "Results: $passed/$total passed"
if [ $failed -eq 0 ]; then
return 0
else
return 1
fi
}
execute_single_scenario() {
local scenario_id="$1"
local scenario_name="$2"
local scenario_cmd="$3"
echo ""
log "Scenario $scenario_id: $scenario_name"
# If no command extracted, try to infer from name
if [ -z "$scenario_cmd" ]; then
log_warn " No command detected - marking as manual verification needed"
FAILED_SCENARIOS+=("$scenario_id")
FAILED_DETAILS+=("$scenario_id|$scenario_name|No automatable command found|manual|1")
return 1
fi
if [ "$VERBOSE" = true ]; then
log " Command: $scenario_cmd"
fi
if [ "$DRY_RUN" = true ]; then
echo " [DRY RUN] Would execute: $scenario_cmd"
PASSED_SCENARIOS+=("$scenario_id")
return 0
fi
# Execute with timeout
local start_time=$(date +%s%N)
local output=""
local exit_code=0
local stderr_file="/tmp/uat-stderr-$$.txt"
# Run command with timeout
set +e
if command -v timeout >/dev/null 2>&1; then
output=$(timeout "$TIMEOUT_SECONDS" bash -c "$scenario_cmd" 2>"$stderr_file")
exit_code=$?
# timeout returns 124 on timeout
if [ $exit_code -eq 124 ]; then
exit_code=124
fi
else
# macOS fallback using perl
output=$(perl -e 'alarm shift @ARGV; exec @ARGV' "$TIMEOUT_SECONDS" bash -c "$scenario_cmd" 2>"$stderr_file")
exit_code=$?
fi
set -e
local end_time=$(date +%s%N)
local duration_ms=$(( (end_time - start_time) / 1000000 ))
local stderr=""
[ -f "$stderr_file" ] && stderr=$(cat "$stderr_file")
rm -f "$stderr_file"
# Evaluate result
if [ $exit_code -eq 0 ]; then
log_success " Scenario $scenario_id: PASS (${duration_ms}ms)"
PASSED_SCENARIOS+=("$scenario_id")
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Scenario $scenario_id PASS: $scenario_cmd" >> "$LOG_FILE"
elif [ $exit_code -eq 124 ]; then
log_error " Scenario $scenario_id: FAIL (timeout after ${TIMEOUT_SECONDS}s)"
FAILED_SCENARIOS+=("$scenario_id")
FAILED_DETAILS+=("$scenario_id|$scenario_name|$scenario_cmd|timeout|$exit_code|$output|$stderr")
# Detect human intervention needs from output
detect_human_intervention "$output$stderr" "$scenario_id" "$scenario_name"
else
log_error " Scenario $scenario_id: FAIL (exit code $exit_code)"
if [ -n "$stderr" ] && [ "$VERBOSE" = true ]; then
echo " Error: $stderr"
fi
FAILED_SCENARIOS+=("$scenario_id")
FAILED_DETAILS+=("$scenario_id|$scenario_name|$scenario_cmd|error|$exit_code|$output|$stderr")
# Detect human intervention needs from error output
detect_human_intervention "$output$stderr" "$scenario_id" "$scenario_name"
fi
return $exit_code
}
# =============================================================================
# Section 7: Gate Evaluation
# =============================================================================
evaluate_gate() {
local total=${#AUTOMATABLE_SCENARIOS[@]}
local passed=${#PASSED_SCENARIOS[@]}
local failed=${#FAILED_SCENARIOS[@]}
log_section "Gate Evaluation"
if [ $failed -eq 0 ]; then
log_success "All automatable scenarios passed"
return 0
else
log_error "$failed scenario(s) failed"
return 1
fi
}
# =============================================================================
# Section 8: Self-Healing Loop
# =============================================================================
generate_fix_context() {
local epic_id="$1"
local attempt="$2"
mkdir -p "$FIX_DIR"
local fix_file="$FIX_DIR/epic-${epic_id}-fix-context-${attempt}.md"
local timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
# Find template
local template="$PROJECT_ROOT/src/bmm/workflows/5-validation/uat-validate/uat-fix-context-template.md"
if [ -f "$template" ]; then
# Render template with basic variable substitution
sed -e "s/{epic_id}/$epic_id/g" \
-e "s/{attempt}/$attempt/g" \
-e "s/{timestamp}/$timestamp/g" \
-e "s/{max_retries}/$MAX_RETRIES/g" \
-e "s/{next_attempt}/$((attempt + 1))/g" \
-e "s/{failure_count}/${#FAILED_SCENARIOS[@]}/g" \
-e "s|{uat_doc_path}|$UAT_FILE|g" \
"$template" > "$fix_file"
else
# Create minimal fix context without template
cat > "$fix_file" << EOF
# UAT Fix Context - Epic $epic_id (Attempt $attempt)
**Generated:** $timestamp
**Epic:** $epic_id
**Gate Result:** FAIL (${#PASSED_SCENARIOS[@]}/${#AUTOMATABLE_SCENARIOS[@]} scenarios passed)
---
## Summary
This document contains the context needed to fix UAT failures for Epic $epic_id.
**Failures to fix:** ${#FAILED_SCENARIOS[@]}
**Fix attempt:** $attempt of $MAX_RETRIES
---
EOF
fi
# Add human intervention section
echo "" >> "$fix_file"
echo "## Human Intervention Items" >> "$fix_file"
echo "" >> "$fix_file"
if [ ${#HUMAN_INTERVENTION_BLOCKING[@]} -gt 0 ]; then
echo "### BLOCKING (likely requires human action)" >> "$fix_file"
echo "" >> "$fix_file"
for item in "${HUMAN_INTERVENTION_BLOCKING[@]}"; do
IFS='|' read -r scenario_id scenario_name matched_line <<< "$item"
echo "- [ ] **Scenario $scenario_id ($scenario_name):** $matched_line" >> "$fix_file"
done
echo "" >> "$fix_file"
fi
if [ ${#HUMAN_INTERVENTION_WARNING[@]} -gt 0 ]; then
echo "### WARNING (may need attention)" >> "$fix_file"
echo "" >> "$fix_file"
for item in "${HUMAN_INTERVENTION_WARNING[@]}"; do
IFS='|' read -r scenario_id scenario_name matched_line <<< "$item"
echo "- [ ] **Scenario $scenario_id ($scenario_name):** $matched_line" >> "$fix_file"
done
echo "" >> "$fix_file"
fi
if [ ${#HUMAN_INTERVENTION_BLOCKING[@]} -eq 0 ] && [ ${#HUMAN_INTERVENTION_WARNING[@]} -eq 0 ]; then
echo "*No human intervention items detected. All failures appear to be code-fixable.*" >> "$fix_file"
echo "" >> "$fix_file"
fi
echo "**Instructions for Barry:** Attempt to fix what you can. For items you cannot resolve programmatically, document them clearly in the fix commit message and update the human-actions file." >> "$fix_file"
echo "" >> "$fix_file"
echo "---" >> "$fix_file"
# Append failed scenarios details with root cause hints
echo "" >> "$fix_file"
echo "## Failed Scenarios" >> "$fix_file"
echo "" >> "$fix_file"
for detail in "${FAILED_DETAILS[@]}"; do
IFS='|' read -r scenario_id scenario_name cmd error_type exit_code output stderr <<< "$detail"
# Generate root cause hint for this failure
local root_cause_hint
root_cause_hint=$(analyze_root_cause "$output$stderr" "$exit_code")
cat >> "$fix_file" << EOF
### Scenario $scenario_id: $scenario_name
**Command Executed:**
\`\`\`bash
$cmd
\`\`\`
**Error Type:** $error_type
**Exit Code:** $exit_code
**Output:**
\`\`\`
$output
\`\`\`
**Error Output:**
\`\`\`
$stderr
\`\`\`
**Root Cause Hint:** $root_cause_hint
---
EOF
done
# Extract and append story context (acceptance criteria + dev agent record)
echo "" >> "$fix_file"
extract_story_context "$epic_id" "$fix_file"
# Add context references section
cat >> "$fix_file" << EOF
## Context References
The following files provide additional context for fixing these failures:
| File | Purpose |
|------|---------|
| \`$UAT_FILE\` | Full UAT document with all scenarios |
| \`$STORIES_DIR/${epic_id}-*\` | Story files with acceptance criteria |
| \`$METRICS_DIR/epic-${epic_id}-metrics.yaml\` | Execution metrics |
## Fix Instructions
Address the failures above in priority order. For each fix:
1. **Analyze** - Understand why the scenario failed
2. **Locate** - Find the relevant code files
3. **Fix** - Implement the minimum change to resolve the failure
4. **Verify** - Run the scenario command locally to confirm fix
5. **Commit** - Use message format: \`fix(epic-$epic_id): {description}\`
### Constraints
- Only fix the identified failures - do not refactor unrelated code
- Run the specific failing commands to verify each fix
- Run project tests after all fixes: \`npm test\`
- If a fix requires changes that would break other scenarios, document the tradeoff
## After Fixing
Once all fixes are committed, the UAT validation will automatically re-run.
- **If all pass:** Epic continues to next phase
- **If failures remain:** Another fix context will be generated (attempt $((attempt + 1)))
- **If max retries exceeded:** Chain halts for human intervention
---
*Generated by UAT Validate Workflow*
*BMAD Method - Epic Chain Self-Healing*
*Fix Context: epic-${epic_id}-fix-context-${attempt}.md*
EOF
log "Fix context generated: $fix_file"
echo "$fix_file"
}
run_quick_dev_fix() {
local fix_context_file="$1"
local epic_id="$2"
local attempt="$3"
log "Spawning quick-dev fix session (attempt $attempt/$MAX_RETRIES)"
# Build human intervention summary for prompt
local human_intervention_note=""
if [ ${#HUMAN_INTERVENTION_BLOCKING[@]} -gt 0 ] || [ ${#HUMAN_INTERVENTION_WARNING[@]} -gt 0 ]; then
human_intervention_note="
IMPORTANT: Some failures may require human intervention (marked in the fix context).
- For items you CANNOT fix programmatically (missing API keys, .env configuration, etc.):
Document them clearly and proceed with what you CAN fix.
- Do NOT attempt to create fake credentials or placeholder values.
- Focus on code-level fixes that don't require external configuration."
fi
local fix_prompt="You are Barry, the Quick Flow Solo Dev.
FIRST: Read the fix context document at:
$fix_context_file
This document contains:
1. Human Intervention Items - issues that may require human action
2. Failed Scenarios - with commands, errors, and root cause hints
3. Story Context - acceptance criteria and implementation notes from the original stories
Your task:
1. Read the fix context document completely before starting
2. Review the Human Intervention Items section - note which issues you CAN vs CANNOT fix
3. For each failed scenario:
a. Check the root cause hint
b. Review the related acceptance criteria
c. Implement targeted fixes for code-level issues
4. Run the failing commands to verify your fixes work
5. Stage changes: git add -A
6. Commit with message: fix(epic-${epic_id}): UAT fix #${attempt} - {brief description}
$human_intervention_note
Constraints:
- Only fix the identified failures - do not refactor unrelated code
- Run the specific failing commands to verify each fix
- Run project tests after all fixes: npm test
- If a fix requires external configuration (API keys, .env), document it but don't block on it
When done, output exactly:
FIX_COMPLETE: {number_fixed}/${#FAILED_SCENARIOS[@]}
HUMAN_ACTION_NEEDED: {yes/no}"
if [ "$DRY_RUN" = true ]; then
echo "[DRY RUN] Would spawn Claude for fixes with prompt:"
echo " Fix context: $fix_context_file"
return 0
fi
# Execute in isolated context
local result
result=$(claude --dangerously-skip-permissions -p "$fix_prompt" 2>&1) || true
echo "$result" >> "$LOG_FILE"
if echo "$result" | grep -q "FIX_COMPLETE"; then
log_success "Quick-dev fix session completed"
# Check if human action was flagged
if echo "$result" | grep -q "HUMAN_ACTION_NEEDED: yes"; then
log_warn "Barry indicated human action is needed for some issues"
fi
return 0
else
log_warn "Quick-dev fix session may not have completed cleanly"
return 1
fi
}
generate_human_actions_file() {
local epic_id="$1"
local final_attempt="$2"
local human_actions_file="$FIX_DIR/epic-${epic_id}-human-actions.md"
local timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
# Only generate if there are human intervention items
if [ ${#HUMAN_INTERVENTION_BLOCKING[@]} -eq 0 ] && [ ${#HUMAN_INTERVENTION_WARNING[@]} -eq 0 ]; then
log "No human actions required"
return 0
fi
log "Generating human actions file: $human_actions_file"
cat > "$human_actions_file" << EOF
# Human Actions Required - Epic $epic_id
**Generated:** $timestamp
**After:** Fix attempt $final_attempt of $MAX_RETRIES
**UAT Result:** FAIL (${#PASSED_SCENARIOS[@]}/${#AUTOMATABLE_SCENARIOS[@]} scenarios passed)
---
## Required Actions
The following items could not be automatically fixed and require human intervention.
EOF
local action_num=0
# Add BLOCKING items
if [ ${#HUMAN_INTERVENTION_BLOCKING[@]} -gt 0 ]; then
for item in "${HUMAN_INTERVENTION_BLOCKING[@]}"; do
((action_num++))
IFS='|' read -r scenario_id scenario_name matched_line <<< "$item"
cat >> "$human_actions_file" << EOF
### $action_num. $scenario_name (Scenario $scenario_id)
**Priority:** High (BLOCKING)
**Issue:** $matched_line
**Suggested Action:**
EOF
# Add specific guidance based on pattern
if echo "$matched_line" | grep -qiE "\.env|environment variable"; then
cat >> "$human_actions_file" << EOF
Check your \`.env\` file and ensure all required environment variables are set.
Reference \`.env.example\` if available.
EOF
elif echo "$matched_line" | grep -qiE "API[_-]?KEY|SECRET|TOKEN"; then
cat >> "$human_actions_file" << EOF
Verify your API credentials are correctly configured.
Check the service dashboard for valid keys.
EOF
elif echo "$matched_line" | grep -qiE "permission denied|EACCES"; then
cat >> "$human_actions_file" << EOF
Check file/directory permissions. You may need to run with elevated privileges
or adjust ownership/permissions on the affected files.
EOF
else
echo "Review the error message and take appropriate action." >> "$human_actions_file"
echo "" >> "$human_actions_file"
fi
done
fi
# Add WARNING items
if [ ${#HUMAN_INTERVENTION_WARNING[@]} -gt 0 ]; then
for item in "${HUMAN_INTERVENTION_WARNING[@]}"; do
((action_num++))
IFS='|' read -r scenario_id scenario_name matched_line <<< "$item"
cat >> "$human_actions_file" << EOF
### $action_num. $scenario_name (Scenario $scenario_id)
**Priority:** Medium (WARNING)
**Issue:** $matched_line
**Suggested Action:**
EOF
# Add specific guidance based on pattern
if echo "$matched_line" | grep -qiE "connection refused|ECONNREFUSED"; then
cat >> "$human_actions_file" << EOF
Ensure the required service is running (database, Redis, etc.).
Check service logs for startup errors.
EOF
elif echo "$matched_line" | grep -qiE "inbox|email|manual.*verification"; then
cat >> "$human_actions_file" << EOF
Manual verification required. Check the relevant inbox or UI to confirm
the expected behavior.
EOF
elif echo "$matched_line" | grep -qiE "migration|relation.*does not exist"; then
cat >> "$human_actions_file" << EOF
Database schema may need updating. Run migrations:
\`\`\`bash
npm run db:migrate # or your project's migration command
\`\`\`
EOF
else
echo "Review the warning and take appropriate action if needed." >> "$human_actions_file"
echo "" >> "$human_actions_file"
fi
done
fi
cat >> "$human_actions_file" << EOF
---
## After Completing Actions
Re-run UAT validation:
\`\`\`bash
./scripts/uat-validate.sh $epic_id --gate-mode=$UAT_GATE_MODE
\`\`\`
---
*Generated by UAT Validate Workflow*
*BMAD Method - Epic Chain Self-Healing*
EOF
echo "$human_actions_file"
}
self_healing_loop() {
local epic_id="$1"
local attempt=0
while [ $attempt -lt $MAX_RETRIES ]; do
((attempt++))
log_section "Self-Healing Fix Loop (Attempt $attempt/$MAX_RETRIES)"
# Reset human intervention arrays for this attempt
HUMAN_INTERVENTION_BLOCKING=()
HUMAN_INTERVENTION_WARNING=()
# Generate fix context (this will detect human intervention items)
local fix_file
fix_file=$(generate_fix_context "$epic_id" "$attempt")
# Log human intervention summary
if [ ${#HUMAN_INTERVENTION_BLOCKING[@]} -gt 0 ]; then
log_warn "Detected ${#HUMAN_INTERVENTION_BLOCKING[@]} BLOCKING human intervention item(s)"
fi
if [ ${#HUMAN_INTERVENTION_WARNING[@]} -gt 0 ]; then
log_warn "Detected ${#HUMAN_INTERVENTION_WARNING[@]} WARNING human intervention item(s)"
fi
# Run quick-dev fix
if ! run_quick_dev_fix "$fix_file" "$epic_id" "$attempt"; then
log_warn "Fix attempt $attempt may have issues"
fi
# Re-run validation
log "Re-validating after fix attempt $attempt..."
# Reset scenario results but preserve human intervention items
PASSED_SCENARIOS=()
FAILED_SCENARIOS=()
FAILED_DETAILS=()
if execute_scenarios "$UAT_GATE_MODE"; then
log_success "UAT passed after fix attempt $attempt"
return 0
fi
log_warn "UAT still failing after attempt $attempt"
done
# Generate human actions file for remaining issues
generate_human_actions_file "$epic_id" "$MAX_RETRIES"
log_error "Max retries ($MAX_RETRIES) exceeded"
return 2
}
# =============================================================================
# Section 9: Output Signals and Metrics
# =============================================================================
update_metrics() {
local epic_id="$1"
local gate_status="$2"
local fix_attempts="$3"
mkdir -p "$METRICS_DIR"
local metrics_file="$METRICS_DIR/epic-${epic_id}-metrics.yaml"
# Calculate timing
local end_time=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
local end_epoch=$(date +%s)
local duration_seconds=$((end_epoch - UAT_START_EPOCH))
# Calculate human intervention counts
local blocking_count=${#HUMAN_INTERVENTION_BLOCKING[@]}
local warning_count=${#HUMAN_INTERVENTION_WARNING[@]}
# Check if yq is available for YAML manipulation
if command -v yq >/dev/null 2>&1; then
if [ -f "$metrics_file" ]; then
yq -i ".validation.gate_executed = true" "$metrics_file"
yq -i ".validation.gate_status = \"$gate_status\"" "$metrics_file"
yq -i ".validation.fix_attempts = $fix_attempts" "$metrics_file"
yq -i ".validation.scenarios_passed = ${#PASSED_SCENARIOS[@]}" "$metrics_file"
yq -i ".validation.scenarios_failed = ${#FAILED_SCENARIOS[@]}" "$metrics_file"
yq -i ".validation.start_time = \"$UAT_START_TIME\"" "$metrics_file"
yq -i ".validation.end_time = \"$end_time\"" "$metrics_file"
yq -i ".validation.duration_seconds = $duration_seconds" "$metrics_file"
yq -i ".validation.human_intervention.blocking = $blocking_count" "$metrics_file"
yq -i ".validation.human_intervention.warning = $warning_count" "$metrics_file"
else
# Create new metrics file
cat > "$metrics_file" << EOF
epic_id: "$epic_id"
validation:
gate_executed: true
gate_status: "$gate_status"
fix_attempts: $fix_attempts
scenarios_passed: ${#PASSED_SCENARIOS[@]}
scenarios_failed: ${#FAILED_SCENARIOS[@]}
start_time: "$UAT_START_TIME"
end_time: "$end_time"
duration_seconds: $duration_seconds
human_intervention:
blocking: $blocking_count
warning: $warning_count
EOF
fi
else
# Fallback: append to file or create new
if [ ! -f "$metrics_file" ]; then
cat > "$metrics_file" << EOF
epic_id: "$epic_id"
validation:
gate_executed: true
gate_status: "$gate_status"
fix_attempts: $fix_attempts
scenarios_passed: ${#PASSED_SCENARIOS[@]}
scenarios_failed: ${#FAILED_SCENARIOS[@]}
start_time: "$UAT_START_TIME"
end_time: "$end_time"
duration_seconds: $duration_seconds
human_intervention:
blocking: $blocking_count
warning: $warning_count
EOF
else
# Simple append for validation section
log_warn "yq not found - metrics update may be incomplete"
fi
fi
log "Metrics updated: $metrics_file"
log " Duration: ${duration_seconds}s"
}
output_signals() {
local gate_status="$1"
local fix_attempts="$2"
local total=${#AUTOMATABLE_SCENARIOS[@]}
local passed=${#PASSED_SCENARIOS[@]}
local human_action_count=$((${#HUMAN_INTERVENTION_BLOCKING[@]} + ${#HUMAN_INTERVENTION_WARNING[@]}))
local human_action_required="false"
[ $human_action_count -gt 0 ] && human_action_required="true"
echo ""
echo "UAT_GATE_RESULT: $gate_status"
echo "UAT_FIX_ATTEMPTS: $fix_attempts"
echo "UAT_SCENARIOS_PASSED: $passed/$total"
echo "UAT_HUMAN_ACTION_REQUIRED: $human_action_required"
echo "UAT_HUMAN_ACTION_COUNT: $human_action_count"
if [ "$human_action_required" = "true" ]; then
echo "UAT_HUMAN_ACTION_FILE: $FIX_DIR/epic-${EPIC_ID}-human-actions.md"
fi
}
print_summary() {
local gate_status="$1"
local fix_attempts="$2"
local human_action_count=$((${#HUMAN_INTERVENTION_BLOCKING[@]} + ${#HUMAN_INTERVENTION_WARNING[@]}))
# Calculate duration
local end_epoch=$(date +%s)
local duration_seconds=$((end_epoch - UAT_START_EPOCH))
local duration_display="${duration_seconds}s"
if [ $duration_seconds -ge 60 ]; then
local minutes=$((duration_seconds / 60))
local seconds=$((duration_seconds % 60))
duration_display="${minutes}m ${seconds}s"
fi
log_header "UAT VALIDATION COMPLETE"
echo " Epic: $EPIC_ID"
echo " Gate Mode: $UAT_GATE_MODE"
echo " Gate Result: $gate_status"
echo " Duration: $duration_display"
echo ""
echo " Scenarios:"
echo " Automatable: ${#AUTOMATABLE_SCENARIOS[@]}"
echo " Semi-automated: ${#SEMI_AUTO_SCENARIOS[@]}"
echo " Manual: ${#MANUAL_SCENARIOS[@]}"
echo ""
echo " Results:"
echo " Passed: ${#PASSED_SCENARIOS[@]}"
echo " Failed: ${#FAILED_SCENARIOS[@]}"
echo " Fix Attempts: $fix_attempts"
echo ""
echo " Human Intervention:"
echo " Blocking Items: ${#HUMAN_INTERVENTION_BLOCKING[@]}"
echo " Warning Items: ${#HUMAN_INTERVENTION_WARNING[@]}"
echo ""
echo " Artifacts:"
echo " Log: $LOG_FILE"
echo " UAT Document: $UAT_FILE"
if [ ${#FAILED_SCENARIOS[@]} -gt 0 ] && [ -d "$FIX_DIR" ]; then
echo " Fix Contexts: $FIX_DIR/"
fi
if [ $human_action_count -gt 0 ]; then
echo " Human Actions: $FIX_DIR/epic-${EPIC_ID}-human-actions.md"
fi
echo ""
# Print human intervention summary if any
if [ $human_action_count -gt 0 ]; then
echo -e "${YELLOW}${BOLD} ⚠ Human Action Required:${NC}"
if [ ${#HUMAN_INTERVENTION_BLOCKING[@]} -gt 0 ]; then
echo -e " ${RED}BLOCKING:${NC}"
for item in "${HUMAN_INTERVENTION_BLOCKING[@]}"; do
IFS='|' read -r scenario_id scenario_name matched_line <<< "$item"
echo " - Scenario $scenario_id: $matched_line"
done
fi
if [ ${#HUMAN_INTERVENTION_WARNING[@]} -gt 0 ]; then
echo -e " ${YELLOW}WARNING:${NC}"
for item in "${HUMAN_INTERVENTION_WARNING[@]}"; do
IFS='|' read -r scenario_id scenario_name matched_line <<< "$item"
echo " - Scenario $scenario_id: $matched_line"
done
fi
echo ""
echo " See $FIX_DIR/epic-${EPIC_ID}-human-actions.md for details."
echo ""
fi
}
# =============================================================================
# Main Execution
# =============================================================================
# Capture UAT evaluation start time
UAT_START_TIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
UAT_START_EPOCH=$(date +%s)
log_header "UAT VALIDATION: Epic $EPIC_ID"
log "Gate mode: $UAT_GATE_MODE"
log "Max retries: $MAX_RETRIES"
log "Timeout: ${TIMEOUT_SECONDS}s"
log "Started: $UAT_START_TIME"
# Ensure directories exist
mkdir -p "$METRICS_DIR"
mkdir -p "$FIX_DIR"
# Step 1: Load UAT document
log_section "Loading UAT Document"
if ! load_uat_document "$EPIC_ID"; then
echo "UAT_GATE_RESULT: FAIL"
echo "UAT_FIX_ATTEMPTS: 0"
echo "UAT_SCENARIOS_PASSED: 0/0"
exit 1
fi
# Step 2: Classify scenarios
log_section "Classifying Scenarios"
classify_scenarios "$UAT_FILE"
# Step 3: Execute scenarios
if ! execute_scenarios "$UAT_GATE_MODE"; then
# Gate failed - check if we should try self-healing
if [ "$DRY_RUN" = false ] && [ $MAX_RETRIES -gt 0 ]; then
if ! self_healing_loop "$EPIC_ID"; then
# Max retries exceeded
update_metrics "$EPIC_ID" "FAIL" "$MAX_RETRIES"
output_signals "FAIL" "$MAX_RETRIES"
print_summary "FAIL" "$MAX_RETRIES"
exit 2
fi
else
# No self-healing or dry-run
update_metrics "$EPIC_ID" "FAIL" "0"
output_signals "FAIL" "0"
print_summary "FAIL" "0"
exit 1
fi
fi
# Step 4: Gate passed
FINAL_ATTEMPTS=0
if [ ${#FAILED_SCENARIOS[@]} -gt 0 ]; then
# Passed after retries
FINAL_ATTEMPTS=$((MAX_RETRIES - $(ls -1 "$FIX_DIR"/epic-${EPIC_ID}-fix-context-*.md 2>/dev/null | wc -l) + 1))
fi
update_metrics "$EPIC_ID" "PASS" "$FINAL_ATTEMPTS"
output_signals "PASS" "$FINAL_ATTEMPTS"
print_summary "PASS" "$FINAL_ATTEMPTS"
log_success "UAT validation passed for Epic $EPIC_ID"
exit 0