Merge pull request #3 from rotationalphysics495/feat/design-phase-hardening

Design-phase hardening: domain-aware lenses + contract harness preflight
This commit is contained in:
Caleb 2026-06-03 09:17:36 -05:00 committed by GitHub
commit 8ad1792ed6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 703 additions and 10 deletions

View File

@ -0,0 +1,380 @@
#!/bin/bash
#
# BMAD Epic Execute - Contract Harness Preflight Module
#
# A project may declare a contract-validation harness (contract-harness.yaml)
# that describes how to bring up a SAMPLE/TEST environment and verify that the
# API and database contracts hold (the proper API is called and data lands in
# the right place).
#
# This module does NOT execute the per-story contract checks. It validates at
# STARTUP that the system has everything it needs to run them - credentials,
# commands, and files - so a misconfigured harness fails fast (or, in a dry
# run, produces an exit-code-honest readiness report) instead of blowing up
# mid-epic.
#
# The user never hand-maintains a checklist: prerequisites are inferred from the
# harness commands themselves (env var references, executables, file paths),
# with an optional `requires:` block for anything inference cannot see.
#
# Usage: sourced by epic-execute.sh
#
# Set true by contract_preflight when a required prerequisite is missing.
# epic-execute uses this to fail the run / dry-run exit code.
PREFLIGHT_FAILED=false
# =============================================================================
# Harness Discovery
# =============================================================================
# Locate the harness file (project root, then docs/). Echoes the path or "".
find_contract_harness() {
local candidate
for candidate in \
"$PROJECT_ROOT/contract-harness.yaml" \
"$PROJECT_ROOT/contract-harness.yml" \
"$PROJECT_ROOT/docs/contract-harness.yaml" \
"$PROJECT_ROOT/docs/contract-harness.yml"; do
if [ -f "$candidate" ]; then
echo "$candidate"
return 0
fi
done
echo ""
return 0
}
# =============================================================================
# Prerequisite Inference
# =============================================================================
# Emit every command string declared in the harness (setup, start, teardown,
# and the datastore verify command), one per line.
_harness_commands() {
local h="$1"
yq '(.environment.setup // [])[]' "$h" 2>/dev/null
yq '.environment.start.command // ""' "$h" 2>/dev/null
yq '(.environment.teardown // [])[]' "$h" 2>/dev/null
yq '.datastore.verify_command // ""' "$h" 2>/dev/null
}
# Derive required environment variables: references inside command strings
# ($VAR / ${VAR}), the datastore url_env value (itself a var name), and any
# explicit requires.env entries.
_derive_env_vars() {
local h="$1"
{
_harness_commands "$h" | grep -oE '\$\{?[A-Za-z_][A-Za-z0-9_]*\}?' | tr -d '${}'
yq '.datastore.url_env // ""' "$h" 2>/dev/null
yq '(.requires.env // [])[]' "$h" 2>/dev/null
} | sed '/^$/d' | sort -u
}
# Derive required executables: the first non-assignment token of each command,
# plus any explicit requires.commands entries.
_derive_commands() {
local h="$1"
{
_harness_commands "$h" | while IFS= read -r cmd; do
[ -z "$cmd" ] && continue
# shellcheck disable=SC2086
set -- $cmd
while [ $# -gt 0 ]; do
case "$1" in
*=*) shift ;; # skip leading VAR=value assignments
*) echo "$1"; break ;;
esac
done
done
yq '(.requires.commands // [])[]' "$h" 2>/dev/null
} | sed '/^$/d' | sort -u
}
# Derive referenced files: path-like tokens in command strings (best effort),
# plus any explicit requires.files entries.
_derive_files() {
local h="$1"
{
_harness_commands "$h" | tr ' ' '\n' | grep -E '/|\.(ya?ml|json|toml|sh|env)$' 2>/dev/null
yq '(.requires.files // [])[]' "$h" 2>/dev/null
} | sed '/^$/d' | sort -u
}
# =============================================================================
# Safety Guard
# =============================================================================
# Warn if the declared datastore connection looks like a real/production target.
# Contract validation writes data, so it must point at a throwaway/test store.
_check_datastore_safety() {
local h="$1"
local url_env
url_env=$(yq '.datastore.url_env // ""' "$h" 2>/dev/null)
[ -z "$url_env" ] && return 0
case "$url_env" in
DATABASE_URL|*PROD*|*PRODUCTION*)
log_warn " ! datastore.url_env='$url_env' looks production-scoped - use a TEST-only variable" ;;
esac
local val="${!url_env:-}"
if [ -n "$val" ]; then
case "$val" in
*localhost*|*127.0.0.1*|*test*) : ;;
*) log_warn " ! $url_env does not look local/test-scoped - contract validation must never run against a real database" ;;
esac
fi
return 0
}
# =============================================================================
# Preflight (presence checks + readiness report)
# =============================================================================
# Validate that everything needed to run the harness is present.
# Arguments:
# $1 - path to the harness file
# Returns: 0 if ready, 1 if a required prerequisite is missing.
contract_preflight() {
local h="$1"
[ -z "$h" ] && return 0
log "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
log "Contract Harness Preflight"
log "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
log "Harness: $h"
local check="✓" cross="✗"
local missing=0
# yq is required to parse the harness
if ! command -v yq >/dev/null 2>&1; then
log_error " $cross yq is required to parse the contract harness (install yq)"
PREFLIGHT_FAILED=true
return 1
fi
# Basic schema sanity (advisory)
local start_cmd
start_cmd=$(yq '.environment.start.command // ""' "$h" 2>/dev/null)
[ -z "$start_cmd" ] && log_warn " ! harness declares no environment.start.command"
# 1. Credentials / environment variables
local envs
envs=$(_derive_env_vars "$h")
if [ -n "$envs" ]; then
log "Credentials / environment variables:"
while IFS= read -r v; do
[ -z "$v" ] && continue
if [ -n "${!v:-}" ]; then
echo " $check $v"
else
echo " $cross $v (not set)"
missing=$((missing + 1))
fi
done <<< "$envs"
fi
# 2. Required executables
local cmds
cmds=$(_derive_commands "$h")
if [ -n "$cmds" ]; then
log "Required commands:"
while IFS= read -r c; do
[ -z "$c" ] && continue
if command -v "$c" >/dev/null 2>&1; then
echo " $check $c"
else
echo " $cross $c (not on PATH)"
missing=$((missing + 1))
fi
done <<< "$cmds"
fi
# 3. Referenced files
local files
files=$(_derive_files "$h")
if [ -n "$files" ]; then
log "Referenced files:"
while IFS= read -r f; do
[ -z "$f" ] && continue
local path="$f"
case "$f" in /*) path="$f" ;; *) path="$PROJECT_ROOT/$f" ;; esac
if [ -e "$path" ]; then
echo " $check $f"
else
echo " $cross $f (not found)"
missing=$((missing + 1))
fi
done <<< "$files"
fi
# 4. Safety guard on datastore connection
_check_datastore_safety "$h"
# 5. Optional deep connectivity smoke (boots the sample environment)
if [ "${PREFLIGHT_DEEP:-false}" = true ]; then
if [ "$missing" -gt 0 ]; then
log_warn "Skipping deep connectivity smoke - presence checks failed first"
elif ! contract_preflight_deep "$h"; then
missing=$((missing + 1))
fi
fi
if [ "$missing" -gt 0 ]; then
log_error "Contract preflight: $missing required prerequisite(s) missing"
PREFLIGHT_FAILED=true
return 1
fi
log_success "Contract preflight passed - ready to validate contracts"
return 0
}
# =============================================================================
# Deep Connectivity Smoke (opt-in: --preflight-deep)
# =============================================================================
# Run teardown commands (best effort, always safe to call).
_harness_teardown() {
local h="$1"
while IFS= read -r cmd; do
[ -z "$cmd" ] && continue
log " teardown: $cmd"
( cd "$PROJECT_ROOT" && eval "$cmd" ) >>"$LOG_FILE" 2>&1 || true
done < <(yq '(.environment.teardown // [])[]' "$h" 2>/dev/null)
}
# Actually bring the sample environment up, check readiness, then tear it down.
# Executes the project's own commands (same trust level as package.json scripts).
# Arguments:
# $1 - path to the harness file
# Returns: 0 on success, 1 on any failure.
contract_preflight_deep() {
local h="$1"
log "Deep connectivity smoke (booting the sample environment)..."
# Run setup commands
local rc=0 cmd
while IFS= read -r cmd; do
[ -z "$cmd" ] && continue
log " setup: $cmd"
if ! ( cd "$PROJECT_ROOT" && eval "$cmd" ) >>"$LOG_FILE" 2>&1; then
log_error " setup failed: $cmd"
rc=1
break
fi
done < <(yq '(.environment.setup // [])[]' "$h" 2>/dev/null)
if [ "$rc" -ne 0 ]; then
_harness_teardown "$h"
return 1
fi
# Start the app in the background (if a start command is declared)
local start_cmd start_pid=""
start_cmd=$(yq '.environment.start.command // ""' "$h" 2>/dev/null)
if [ -n "$start_cmd" ]; then
log " start: $start_cmd"
( cd "$PROJECT_ROOT" && eval "$start_cmd" ) >>"$LOG_FILE" 2>&1 &
start_pid=$!
fi
# Poll the readiness URL (if declared)
local ready_url timeout ok=1
ready_url=$(yq '.environment.start.ready.url // ""' "$h" 2>/dev/null)
timeout=$(yq '.environment.start.ready.timeout_seconds // 30' "$h" 2>/dev/null)
if [ -n "$ready_url" ]; then
ok=0
local waited=0
while [ "$waited" -lt "$timeout" ]; do
if curl -sf -o /dev/null "$ready_url" 2>/dev/null; then
ok=1
break
fi
sleep 2
waited=$((waited + 2))
done
if [ "$ok" -eq 1 ]; then
log_success " ready: $ready_url"
else
log_error " not ready after ${timeout}s: $ready_url"
fi
fi
# Stop the app and tear down
[ -n "$start_pid" ] && kill "$start_pid" 2>/dev/null || true
_harness_teardown "$h"
[ "$ok" -eq 1 ] && return 0 || return 1
}
# =============================================================================
# Scaffolder (--init-harness)
# =============================================================================
# Write a commented contract-harness.yaml template to the project root.
init_contract_harness() {
local target="$PROJECT_ROOT/contract-harness.yaml"
if [ -e "$target" ]; then
log_warn "Harness already exists: $target (not overwriting)"
return 0
fi
cat > "$target" <<'YAML'
# Contract validation harness for epic-execute.
#
# Declares how to bring up a SAMPLE/TEST environment and verify that the API and
# database contracts hold. epic-execute validates at startup that it has
# everything needed to run this - run `epic-execute <epic> --dry-run` to get an
# exit-code-honest readiness report (great as a CI gate).
version: 1
environment:
# Commands to provision the sample environment (DB, migrations, seed data).
setup:
- docker compose -f docker-compose.test.yml up -d db
# - npm run migrate:test
# - npm run seed:test
# How to start the app under test.
start:
command: npm run start:test
ready:
url: http://localhost:3000/health
timeout_seconds: 60
# Always run to clean up.
teardown:
- docker compose -f docker-compose.test.yml down -v
api:
base_url: http://localhost:3000
# How to verify data landed "in the right place". Prefer a command (no DB
# credentials needed); the system passes the table/where as arguments.
datastore:
verify_command: "npm run db:assert --"
# OR direct query via a TEST-scoped env var (never a real DATABASE_URL):
# url_env: TEST_DATABASE_URL
# Optional explicit prerequisites. The system also INFERS these from the
# commands above (env var references, executables, and file paths), so you only
# need to list things inference cannot see (e.g. an API token used at call time).
requires:
env: []
commands: []
files: []
# Contract cases: call the API and assert the response + persistence.
# (Validated by preflight now; executed by the per-story contract gate.)
cases:
- name: "example: create persists a row"
request: { method: POST, path: /api/example, body: { name: "x" } }
expect: { status: 201, body_contains: { name: "x" } }
verify_persistence: { table: example, where: { name: "x" }, exists: true }
YAML
log_success "Created harness template: $target"
log "Edit it, then run a dry run to validate readiness."
return 0
}

View File

@ -78,6 +78,117 @@ build_repo_map() {
"$lang_label" "${top:-(none)}" "${sources:-(none detected)}" "$lang_label" "${top:-(none)}" "${sources:-(none detected)}"
} }
# =============================================================================
# Feature Domain Classification (frontend / backend / fullstack)
# =============================================================================
# Auto-detect the feature domain for a story so the design phase can apply the
# right planning lens. Resolution order (all automatic):
# 1. An explicit Type:/Domain:/Feature-Type: field in the story file
# 2. Heuristic keyword scoring of the story content
# 3. Default to "fullstack" (the superset) when ambiguous - fail safe so we
# never under-plan a story.
# Returns one of: frontend | backend | fullstack
classify_feature_domain() {
local story_file="$1"
# 1. Explicit metadata field in the story (highest confidence, still auto)
local meta
meta=$(grep -iE '^(Type|Domain|Feature[ _-]?Type)[[:space:]]*:' "$story_file" 2>/dev/null | head -1 | tr '[:upper:]' '[:lower:]')
case "$meta" in
*fullstack*|*full-stack*|*full\ stack*) echo "fullstack"; return ;;
*frontend*|*front-end*|*ui*|*ux*) echo "frontend"; return ;;
*backend*|*back-end*|*api*|*server*) echo "backend"; return ;;
esac
# 2. Heuristic keyword scoring on story content
local content
content=$(cat "$story_file" 2>/dev/null)
local fe be
fe=$(printf '%s' "$content" | grep -ioE '\b(component|components|page|pages|screen|view|button|form|modal|dialog|css|tailwind|stylesheet|layout|responsive|render|UI|UX|accessibility|a11y|frontend|front-end|click|hover|route|router|navigation|nav)\b' 2>/dev/null | wc -l | tr -d ' ')
be=$(printf '%s' "$content" | grep -ioE '\b(endpoint|endpoints|API|REST|GraphQL|controller|service|repository|schema|migration|migrations|database|query|queries|SQL|model|models|auth|authentication|authorization|token|queue|job|cron|webhook|backend|back-end|server)\b' 2>/dev/null | wc -l | tr -d ' ')
fe=${fe:-0}; be=${be:-0}
local threshold=2
# One side clearly dominant -> that domain; otherwise fail safe to fullstack
if [ "$fe" -ge "$threshold" ] && [ "$be" -lt "$threshold" ]; then
echo "frontend"
elif [ "$be" -ge "$threshold" ] && [ "$fe" -lt "$threshold" ]; then
echo "backend"
else
echo "fullstack"
fi
}
# Build the planning-lens prompt block for a domain. The lens tells the planner
# which domain-specific questions it MUST answer (states, a11y, API contract,
# error handling, etc). Fullstack injects both lenses.
# Arguments:
# $1 - domain (frontend | backend | fullstack)
build_lens_block() {
local domain="$1"
local fe_lens="## Frontend Planning Lens
This story involves UI. Your plan MUST address these in the \"frontend\" object:
- Component breakdown: which components are new vs reused from the design system
- Every interactive component's states: loading, empty, error, success, disabled
- Accessibility: keyboard navigation, ARIA, focus management, color contrast
- Responsive behavior across breakpoints
- Which existing design-system components/tokens to reuse (do not reinvent)"
local be_lens="## Backend Planning Lens
This story involves backend logic. Your plan MUST address these in the \"backend\" object:
- API contract: method, path, request/response shape, and status codes
- Data model and any migrations (and whether they are reversible)
- Error handling and failure modes for each state-changing operation
- Concurrency / idempotency / transactions where state changes
- Observability: what to log and which metrics to emit
- Backward compatibility / versioning"
case "$domain" in
frontend) printf '%s\n' "$fe_lens" ;;
backend) printf '%s\n' "$be_lens" ;;
*) printf '%s\n\n%s\n\n%s\n' \
"This story spans BOTH the UI and backend tiers - address both lenses." \
"$fe_lens" "$be_lens" ;;
esac
}
# Build the domain-specific JSON schema fragment to inject into the plan schema.
# Arguments:
# $1 - domain (frontend | backend | fullstack)
build_domain_schema() {
local domain="$1"
local fe_schema=" \"frontend\": {
\"components\": [{\"name\": \"...\", \"new_or_existing\": \"new|existing\", \"states\": [\"loading\",\"empty\",\"error\",\"success\",\"disabled\"]}],
\"user_flows\": [\"...\"],
\"accessibility\": [\"...\"],
\"responsive\": [\"...\"],
\"design_system_usage\": [\"...\"]
},"
local be_schema=" \"backend\": {
\"api_contract\": [{\"method\": \"...\", \"path\": \"...\", \"request\": \"...\", \"response\": \"...\", \"status_codes\": [\"...\"]}],
\"data_model\": [\"...\"],
\"migrations\": [\"...\"],
\"error_handling\": [\"...\"],
\"concurrency\": [\"...\"],
\"observability\": [\"...\"],
\"backward_compatibility\": [\"...\"]
},"
case "$domain" in
frontend) printf '%s\n' "$fe_schema" ;;
backend) printf '%s\n' "$be_schema" ;;
*) printf '%s\n%s\n' "$fe_schema" "$be_schema" ;;
esac
}
# ============================================================================= # =============================================================================
# Design Phase Functions # Design Phase Functions
# ============================================================================= # =============================================================================
@ -127,8 +238,18 @@ execute_design_phase() {
repo_map=$(build_repo_map) repo_map=$(build_repo_map)
fi fi
# Auto-detect the feature domain and build the matching planning lens +
# schema fragment (frontend / backend / fullstack). Fails safe to fullstack.
local domain
domain=$(classify_feature_domain "$story_file")
log "Design domain for $story_id: $domain"
local lens_block
lens_block=$(build_lens_block "$domain")
local domain_schema
domain_schema=$(build_domain_schema "$domain")
if [ "$DRY_RUN" = true ]; then if [ "$DRY_RUN" = true ]; then
echo "[DRY RUN] Would execute design phase for $story_id" echo "[DRY RUN] Would execute design phase for $story_id (domain: $domain)"
return 0 return 0
fi fi
@ -198,17 +319,24 @@ Follow existing patterns rather than introducing new ones.
$repo_map $repo_map
</repo-map> </repo-map>
## Feature Domain: $domain
$lens_block
${revision_block}## Required Output ${revision_block}## Required Output
Output your implementation plan as a single JSON result block. Map EVERY Output your implementation plan as a single JSON result block. Map EVERY
acceptance criterion in the story to the files/functions that will implement acceptance criterion in the story to the files/functions that will implement
it - the \"ac\" field must use the exact AC identifier from the story (e.g. it - the \"ac\" field must use the exact AC identifier from the story (e.g.
\"AC1\", \"AC2\"). \"AC1\", \"AC2\"). Set \"feature_type\" to \"$domain\" (correct it only if the
story clearly belongs to a different domain) and fill the matching domain
object(s).
\`\`\`json \`\`\`json
{ {
\"status\": \"COMPLETE\", \"status\": \"COMPLETE\",
\"story_id\": \"$story_id\", \"story_id\": \"$story_id\",
\"feature_type\": \"$domain\",
\"summary\": \"<one-line description of the planned approach>\", \"summary\": \"<one-line description of the planned approach>\",
\"files_to_modify\": [ \"files_to_modify\": [
{\"path\": \"<file path>\", \"action\": \"create|modify\", \"purpose\": \"<why>\"} {\"path\": \"<file path>\", \"action\": \"create|modify\", \"purpose\": \"<why>\"}
@ -222,6 +350,7 @@ it - the \"ac\" field must use the exact AC identifier from the story (e.g.
\"acceptance_criteria_mapping\": [ \"acceptance_criteria_mapping\": [
{\"ac\": \"AC1\", \"covered_by\": \"<files/functions implementing this AC>\"} {\"ac\": \"AC1\", \"covered_by\": \"<files/functions implementing this AC>\"}
], ],
$domain_schema
\"risks\": [ \"risks\": [
{\"risk\": \"<potential issue>\", \"mitigation\": \"<how to mitigate>\"} {\"risk\": \"<potential issue>\", \"mitigation\": \"<how to mitigate>\"}
], ],
@ -278,13 +407,24 @@ DESIGN COMPLETE: $story_id"
return 1 return 1
fi fi
# Prefer the model's emitted feature_type (it has seen the code) over
# the heuristic; fall back to the heuristic domain.
local effective_domain="$domain"
if [ -n "$json" ] && type get_result_feature_type >/dev/null 2>&1; then
local model_ft
model_ft=$(get_result_feature_type "$json" | tr '[:upper:]' '[:lower:]')
case "$model_ft" in
frontend|backend|fullstack) effective_domain="$model_ft" ;;
esac
fi
# Critic disabled or no attempts budgeted - accept the first plan # Critic disabled or no attempts budgeted - accept the first plan
if [ "${SKIP_DESIGN_CRITIC:-false}" = true ] || [ "$max_attempts" -le 0 ]; then if [ "${SKIP_DESIGN_CRITIC:-false}" = true ] || [ "$max_attempts" -le 0 ]; then
break break
fi fi
# Run the critic against the plan # Run the critic against the plan (domain-aware)
run_design_critic "$story_file" "$story_id" "$arch_file" "$LAST_DESIGN" run_design_critic "$story_file" "$story_id" "$arch_file" "$LAST_DESIGN" "$effective_domain"
local verdict=$? local verdict=$?
if [ "$verdict" -ne 1 ]; then if [ "$verdict" -ne 1 ]; then
@ -323,6 +463,9 @@ DESIGN COMPLETE: $story_id"
# Validate that every acceptance criterion is mapped (advisory warning). # Validate that every acceptance criterion is mapped (advisory warning).
validate_design_coverage "$story_file" "$story_id" "$json" validate_design_coverage "$story_file" "$story_id" "$json"
# Validate domain-specific completeness (advisory; the critic enforces).
validate_domain_completeness "$story_id" "$effective_domain" "$json"
# Save to decision log # Save to decision log
if type append_to_decision_log >/dev/null 2>&1; then if type append_to_decision_log >/dev/null 2>&1; then
append_to_decision_log "DESIGN" "$story_id" "$LAST_DESIGN" append_to_decision_log "DESIGN" "$story_id" "$LAST_DESIGN"
@ -333,36 +476,52 @@ DESIGN COMPLETE: $story_id"
} }
# Run a fresh-context critic pass over a proposed design plan (#4). # Run a fresh-context critic pass over a proposed design plan (#4).
# The critic checks two things: (a) does the plan map every acceptance # The critic checks: (a) does the plan map every acceptance criterion, (b) does
# criterion, and (b) does it conform to the architecture. Gaps are stored in # it conform to the architecture, and (c) is it complete for its feature domain.
# DESIGN_CRITIC_GAPS for feedback into a regeneration pass. # Gaps are stored in DESIGN_CRITIC_GAPS for feedback into a regeneration pass.
# Arguments: # Arguments:
# $1 - story_file path # $1 - story_file path
# $2 - story_id # $2 - story_id
# $3 - architecture file path (may be empty) # $3 - architecture file path (may be empty)
# $4 - the proposed plan (JSON or text) # $4 - the proposed plan (JSON or text)
# $5 - feature domain (frontend | backend | fullstack)
# Returns: 0 approved, 1 needs revision, 2 unclear # Returns: 0 approved, 1 needs revision, 2 unclear
run_design_critic() { run_design_critic() {
local story_file="$1" local story_file="$1"
local story_id="$2" local story_id="$2"
local arch_file="$3" local arch_file="$3"
local plan="$4" local plan="$4"
local domain="${5:-fullstack}"
DESIGN_CRITIC_GAPS="" DESIGN_CRITIC_GAPS=""
local story_contents local story_contents
story_contents=$(cat "$story_file") story_contents=$(cat "$story_file")
# Domain-specific completeness checks the critic must enforce
local domain_checks=""
case "$domain" in
frontend) domain_checks="- Every interactive component enumerates ALL of its states (loading, empty, error, success, disabled)
- Accessibility is addressed (keyboard navigation, ARIA, focus management, contrast)
- Responsive behavior is specified" ;;
backend) domain_checks="- Every state-changing operation has an explicit error path AND defined status codes
- Data-model / migration impact is covered (and migration reversibility noted)
- Concurrency / idempotency is addressed where state changes" ;;
*) domain_checks="- (Frontend) every interactive component enumerates ALL states (loading/empty/error/success/disabled); accessibility and responsive behavior are addressed
- (Backend) every state-changing operation has an explicit error path and defined status codes; data-model/migration impact is covered" ;;
esac
local critic_prompt="You are a skeptical senior engineer reviewing an implementation PLAN before any code is written. local critic_prompt="You are a skeptical senior engineer reviewing an implementation PLAN before any code is written.
## Your Task ## Your Task
Critique the proposed plan for story: $story_id Critique the proposed plan for story: $story_id (feature domain: $domain)
You are reviewing a PLAN, not code. Be rigorous. Decide whether the plan: You are reviewing a PLAN, not code. Be rigorous. Decide whether the plan:
1. Maps EVERY acceptance criterion in the story to concrete files/functions 1. Maps EVERY acceptance criterion in the story to concrete files/functions
2. Conforms to the project's architecture 2. Conforms to the project's architecture
3. Is concrete and actionable (no vague hand-waving) 3. Is concrete and actionable (no vague hand-waving)
4. Is COMPLETE for its feature domain (see Domain Completeness below)
## Story ## Story
@ -380,6 +539,11 @@ $story_contents
$plan $plan
</plan> </plan>
## Domain Completeness (feature domain: $domain)
Treat any of the following that is missing as a NEEDS_REVISION gap:
$domain_checks
## Required Output ## Required Output
Output a single JSON result block: Output a single JSON result block:
@ -394,8 +558,9 @@ Output a single JSON result block:
} }
\`\`\` \`\`\`
Use APPROVED only if the plan covers every acceptance criterion and conforms to Use APPROVED only if the plan covers every acceptance criterion, conforms to the
the architecture. Otherwise use NEEDS_REVISION and list specific, actionable gaps. architecture, AND is complete for its feature domain. Otherwise use
NEEDS_REVISION and list specific, actionable gaps.
## Completion Signal ## Completion Signal
@ -480,6 +645,54 @@ validate_design_coverage() {
fi fi
} }
# Validate domain-specific completeness of the plan (advisory; the critic is the
# enforcing gate). Warns + records a metric for the most common omissions:
# frontend components missing their states, and backend APIs without an error
# path. Skips cleanly without a JSON plan or jq.
# Arguments:
# $1 - story_id
# $2 - feature domain (frontend | backend | fullstack)
# $3 - JSON plan (may be empty)
validate_domain_completeness() {
local story_id="$1"
local domain="$2"
local json="$3"
if [ -z "$json" ] || ! command -v jq >/dev/null 2>&1; then
return 0
fi
# Frontend: every interactive component should enumerate its states
if [ "$domain" = "frontend" ] || [ "$domain" = "fullstack" ]; then
local comp_count states_missing
comp_count=$(echo "$json" | jq '[.frontend.components[]?] | length' 2>/dev/null || echo 0)
comp_count=$(echo "$comp_count" | tr -d '[:space:]'); [ -z "$comp_count" ] && comp_count=0
if [ "$comp_count" -gt 0 ]; then
states_missing=$(echo "$json" | jq '[.frontend.components[]? | select((.states | length) == 0)] | length' 2>/dev/null || echo 0)
states_missing=$(echo "$states_missing" | tr -d '[:space:]'); [ -z "$states_missing" ] && states_missing=0
if [ "$states_missing" -gt 0 ]; then
log_warn "Design: $states_missing frontend component(s) missing states for $story_id"
type add_metrics_issue >/dev/null 2>&1 && add_metrics_issue "$story_id" "design_domain_incomplete" "$states_missing FE component(s) missing states"
fi
fi
fi
# Backend: an API contract without any error handling is a red flag
if [ "$domain" = "backend" ] || [ "$domain" = "fullstack" ]; then
local api_count err_count
api_count=$(echo "$json" | jq '[.backend.api_contract[]?] | length' 2>/dev/null || echo 0)
api_count=$(echo "$api_count" | tr -d '[:space:]'); [ -z "$api_count" ] && api_count=0
err_count=$(echo "$json" | jq '[.backend.error_handling[]?] | length' 2>/dev/null || echo 0)
err_count=$(echo "$err_count" | tr -d '[:space:]'); [ -z "$err_count" ] && err_count=0
if [ "$api_count" -gt 0 ] && [ "$err_count" -eq 0 ]; then
log_warn "Design: backend API planned without error handling for $story_id"
type add_metrics_issue >/dev/null 2>&1 && add_metrics_issue "$story_id" "design_domain_incomplete" "Backend API without error_handling"
fi
fi
return 0
}
# Persist a design plan to a per-story file under DESIGN_DIR. # Persist a design plan to a per-story file under DESIGN_DIR.
# Arguments: # Arguments:
# $1 - story_id # $1 - story_id
@ -535,6 +748,15 @@ build_planned_test_files_context() {
return return
fi fi
# Domain-aware hint on which kinds of tests to emphasize (#7 + domain)
local feature_type test_hint=""
feature_type=$(echo "$design" | jq -r '.feature_type // empty' 2>/dev/null || echo "")
case "$feature_type" in
frontend) test_hint="This is a frontend feature: emphasize component, interaction, and accessibility tests (plus visual regression where applicable)." ;;
backend) test_hint="This is a backend feature: emphasize unit, integration, contract, and migration tests." ;;
fullstack) test_hint="This is a fullstack feature: cover both UI (component/interaction/a11y) and backend (unit/integration/contract) tests." ;;
esac
cat << EOF cat << EOF
## Planned Test Files (from design phase) ## Planned Test Files (from design phase)
@ -542,6 +764,8 @@ build_planned_test_files_context() {
The design phase already identified the intended test files below. Align your The design phase already identified the intended test files below. Align your
specifications with these paths and reuse them; only introduce a new test file specifications with these paths and reuse them; only introduce a new test file
when a scenario genuinely isn't covered here, and call out any deviation. when a scenario genuinely isn't covered here, and call out any deviation.
${test_hint:+
$test_hint}
<planned-test-files> <planned-test-files>
$files $files

View File

@ -133,6 +133,25 @@ get_result_story_id() {
fi fi
} }
# Get the feature_type field from a JSON result (design phase)
# Arguments:
# $1 - JSON string (optional, uses LAST_JSON_RESULT if not provided)
# Returns: frontend | backend | fullstack (or empty if not present)
get_result_feature_type() {
local json="${1:-$LAST_JSON_RESULT}"
if [ -z "$json" ]; then
echo ""
return 1
fi
if command -v jq >/dev/null 2>&1; then
echo "$json" | jq -r '.feature_type // empty'
else
echo "$json" | grep -oE '"feature_type":\s*"[^"]+"' | sed 's/.*"\([^"]*\)"$/\1/'
fi
}
# Get the summary field from a JSON result # Get the summary field from a JSON result
# Arguments: # Arguments:
# $1 - JSON string (optional, uses LAST_JSON_RESULT if not provided) # $1 - JSON string (optional, uses LAST_JSON_RESULT if not provided)

View File

@ -132,6 +132,7 @@ LIB_DIR="$SCRIPT_DIR/epic-execute-lib"
[ -f "$LIB_DIR/design-phase.sh" ] && source "$LIB_DIR/design-phase.sh" [ -f "$LIB_DIR/design-phase.sh" ] && source "$LIB_DIR/design-phase.sh"
[ -f "$LIB_DIR/json-output.sh" ] && source "$LIB_DIR/json-output.sh" [ -f "$LIB_DIR/json-output.sh" ] && source "$LIB_DIR/json-output.sh"
[ -f "$LIB_DIR/tdd-flow.sh" ] && source "$LIB_DIR/tdd-flow.sh" [ -f "$LIB_DIR/tdd-flow.sh" ] && source "$LIB_DIR/tdd-flow.sh"
[ -f "$LIB_DIR/contract-harness.sh" ] && source "$LIB_DIR/contract-harness.sh"
STORIES_DIR="$PROJECT_ROOT/docs/stories" STORIES_DIR="$PROJECT_ROOT/docs/stories"
SPRINT_ARTIFACTS_DIR="$PROJECT_ROOT/docs/sprint-artifacts" SPRINT_ARTIFACTS_DIR="$PROJECT_ROOT/docs/sprint-artifacts"
@ -941,6 +942,11 @@ OPTIONS:
--skip-test-spec Skip test specification phase only --skip-test-spec Skip test specification phase only
--skip-test-impl Skip test implementation phase only --skip-test-impl Skip test implementation phase only
Contract Validation:
--init-harness Scaffold a contract-harness.yaml template and exit
--preflight-deep Also run a connectivity smoke (boots the sample env)
--skip-contract-validation Skip the contract harness preflight
Commit Control: Commit Control:
--no-commit Stage changes but don't commit --no-commit Stage changes but don't commit
--skip-done Skip stories with Status: Done --skip-done Skip stories with Status: Done
@ -983,6 +989,16 @@ FILES:
Logs: docs/sprint-artifacts/logs/epic-<id>-<timestamp>.log Logs: docs/sprint-artifacts/logs/epic-<id>-<timestamp>.log
Metrics: docs/sprint-artifacts/metrics/epic-<id>-metrics.yaml Metrics: docs/sprint-artifacts/metrics/epic-<id>-metrics.yaml
Checkpoint: docs/sprint-artifacts/.epic-<id>-checkpoint Checkpoint: docs/sprint-artifacts/.epic-<id>-checkpoint
Harness: contract-harness.yaml (project root or docs/) - optional
CONTRACT VALIDATION:
If a contract-harness.yaml is present, startup runs a preflight that checks
every credential, command, and file the harness needs (inferred from the
harness itself). A dry run prints a readiness report and exits non-zero when
anything required is missing, so it works as a CI readiness gate:
./epic-execute.sh <id> --dry-run # presence checks only
./epic-execute.sh <id> --dry-run --preflight-deep # + connectivity smoke
./epic-execute.sh --init-harness # scaffold a starter harness
For more information, see: docs/bmad_improvements_v2_fixes.md For more information, see: docs/bmad_improvements_v2_fixes.md
EOF EOF
@ -1009,6 +1025,9 @@ SKIP_STATIC_ANALYSIS=false
SKIP_DESIGN=false SKIP_DESIGN=false
SKIP_DESIGN_CRITIC=false SKIP_DESIGN_CRITIC=false
SKIP_REGRESSION=false SKIP_REGRESSION=false
SKIP_CONTRACT_VALIDATION=false
PREFLIGHT_DEEP=false
INIT_HARNESS=false
SKIP_TDD=false SKIP_TDD=false
SKIP_TEST_SPEC=false SKIP_TEST_SPEC=false
SKIP_TEST_IMPL=false SKIP_TEST_IMPL=false
@ -1084,6 +1103,18 @@ while [[ $# -gt 0 ]]; do
SKIP_REGRESSION=true SKIP_REGRESSION=true
shift shift
;; ;;
--skip-contract-validation)
SKIP_CONTRACT_VALIDATION=true
shift
;;
--preflight-deep)
PREFLIGHT_DEEP=true
shift
;;
--init-harness)
INIT_HARNESS=true
shift
;;
--skip-tdd) --skip-tdd)
SKIP_TDD=true SKIP_TDD=true
shift shift
@ -1111,6 +1142,17 @@ while [[ $# -gt 0 ]]; do
esac esac
done done
# --init-harness: scaffold a contract-harness.yaml template and exit (no epic needed)
if [ "$INIT_HARNESS" = true ]; then
if type init_contract_harness >/dev/null 2>&1; then
init_contract_harness
exit $?
else
echo "Contract harness module not available (scripts/epic-execute-lib/contract-harness.sh)"
exit 1
fi
fi
if [ -z "$EPIC_ID" ]; then if [ -z "$EPIC_ID" ]; then
echo "Usage: $0 <epic-id> [options]" echo "Usage: $0 <epic-id> [options]"
echo "" echo ""
@ -1210,6 +1252,26 @@ if [ "$NO_COMMIT" != true ] && type check_branch_protection >/dev/null 2>&1; the
fi fi
fi fi
# Contract harness preflight - validate readiness to run contract validation.
# Opt-in by presence of a contract-harness.yaml. In a real run this is a
# fail-fast gate (abort before story 1 if prerequisites are missing). In a dry
# run it reports readiness and makes the run exit non-zero (a CI readiness gate).
if [ "$SKIP_CONTRACT_VALIDATION" != true ] && type contract_preflight >/dev/null 2>&1; then
CONTRACT_HARNESS_FILE=$(find_contract_harness)
if [ -n "$CONTRACT_HARNESS_FILE" ]; then
if ! contract_preflight "$CONTRACT_HARNESS_FILE"; then
if [ "$DRY_RUN" = true ]; then
log_warn "Preflight found missing prerequisites - dry run will exit non-zero"
else
log_error "Contract harness preflight failed - aborting before execution"
exit 1
fi
fi
elif [ "$VERBOSE" = true ]; then
log "No contract-harness.yaml found - contract validation not configured"
fi
fi
# Ensure directories exist # Ensure directories exist
mkdir -p "$UAT_DIR" mkdir -p "$UAT_DIR"
mkdir -p "$SPRINTS_DIR" mkdir -p "$SPRINTS_DIR"
@ -3216,6 +3278,14 @@ echo " - Metrics: $METRICS_FILE"
echo " - Log: $LOGS_DIR/epic-${EPIC_ID}-<timestamp>.log (saved on exit)" echo " - Log: $LOGS_DIR/epic-${EPIC_ID}-<timestamp>.log (saved on exit)"
echo "" echo ""
# Contract preflight is an exit-code-honest gate: if a declared harness was
# missing prerequisites, fail the run (this is what makes --dry-run usable as a
# CI readiness check).
if [ "${PREFLIGHT_FAILED:-false}" = true ]; then
log_warn "Contract preflight reported missing prerequisites - see the readiness report above"
exit 1
fi
if [ $FAILED -gt 0 ]; then if [ $FAILED -gt 0 ]; then
log_warn "$FAILED stories failed - check log for details" log_warn "$FAILED stories failed - check log for details"
log "Checkpoint preserved for resume capability" log "Checkpoint preserved for resume capability"