diff --git a/scripts/epic-execute-lib/design-phase.sh b/scripts/epic-execute-lib/design-phase.sh
index 578e9d32e..37bbc39ec 100644
--- a/scripts/epic-execute-lib/design-phase.sh
+++ b/scripts/epic-execute-lib/design-phase.sh
@@ -15,6 +15,69 @@
# Stores the last design output for passing to dev phase
LAST_DESIGN=""
+# Stores the gaps reported by the most recent design critic pass
+DESIGN_CRITIC_GAPS=""
+
+# =============================================================================
+# Codebase Exploration (deterministic, language-aware)
+# =============================================================================
+
+# Detect the project's primary language/toolchain from marker files.
+# Mirrors the detection used by the static-analysis gate.
+# Returns one of: node | rust | go | python | unknown
+detect_project_type() {
+ if [ -f "$PROJECT_ROOT/package.json" ]; then
+ echo "node"
+ elif [ -f "$PROJECT_ROOT/Cargo.toml" ]; then
+ echo "rust"
+ elif [ -f "$PROJECT_ROOT/go.mod" ]; then
+ echo "go"
+ elif [ -f "$PROJECT_ROOT/requirements.txt" ] || [ -f "$PROJECT_ROOT/pyproject.toml" ]; then
+ echo "python"
+ else
+ echo "unknown"
+ fi
+}
+
+# Build a deterministic, bounded repository map for the planner to start from,
+# tailored to the detected language. This replaces the old hardcoded JS/TS
+# find commands and the "hope the model explores" approach with concrete,
+# pre-computed context.
+build_repo_map() {
+ local ptype
+ ptype=$(detect_project_type)
+
+ local lang_label=""
+ local find_expr=()
+ case "$ptype" in
+ node) lang_label="Node.js / TypeScript"; find_expr=(-name '*.ts' -o -name '*.tsx' -o -name '*.js' -o -name '*.jsx') ;;
+ rust) lang_label="Rust"; find_expr=(-name '*.rs') ;;
+ go) lang_label="Go"; find_expr=(-name '*.go') ;;
+ python) lang_label="Python"; find_expr=(-name '*.py') ;;
+ *) lang_label="Unknown" ;;
+ esac
+
+ # Top-level directory structure (excluding noise dirs)
+ local top
+ top=$(cd "$PROJECT_ROOT" 2>/dev/null && ls -d */ 2>/dev/null \
+ | grep -vE '^(node_modules|\.git|dist|build|target|vendor|__pycache__|\.venv|coverage)/' \
+ | head -30)
+
+ # Representative source files for the detected language (bounded)
+ local sources=""
+ if [ "${#find_expr[@]}" -gt 0 ]; then
+ sources=$(cd "$PROJECT_ROOT" 2>/dev/null && find . \( "${find_expr[@]}" \) \
+ -not -path '*/node_modules/*' -not -path '*/.git/*' \
+ -not -path '*/dist/*' -not -path '*/build/*' \
+ -not -path '*/target/*' -not -path '*/vendor/*' \
+ -not -path '*/__pycache__/*' -not -path '*/.venv/*' \
+ 2>/dev/null | sed 's|^\./||' | head -40)
+ fi
+
+ printf 'Detected project type: %s\n\nTop-level structure:\n%s\n\nRepresentative source files:\n%s\n' \
+ "$lang_label" "${top:-(none)}" "${sources:-(none detected)}"
+}
+
# =============================================================================
# Design Phase Functions
# =============================================================================
@@ -58,7 +121,41 @@ execute_design_phase() {
fi
fi
- local design_prompt="You are a senior developer planning the implementation of a story.
+ # Pre-compute a deterministic, language-aware repository map (#5)
+ local repo_map=""
+ if type build_repo_map >/dev/null 2>&1; then
+ repo_map=$(build_repo_map)
+ fi
+
+ if [ "$DRY_RUN" = true ]; then
+ echo "[DRY RUN] Would execute design phase for $story_id"
+ return 0
+ fi
+
+ # Critic loop (#4): generate a plan, have a fresh-context critic check it
+ # against the ACs and architecture, and regenerate with feedback if gaps
+ # remain. Design is advisory, so we always proceed with the best plan.
+ local max_attempts="${MAX_DESIGN_CRITIC_ATTEMPTS:-2}"
+ local attempt=1
+ local feedback=""
+ local json=""
+
+ while true; do
+ # Revision block is empty on the first pass, populated by the critic
+ local revision_block=""
+ if [ -n "$feedback" ]; then
+ revision_block="## Revision Required
+
+A previous version of this plan was reviewed and found incomplete. Produce an
+improved plan that resolves ALL of the following gaps:
+
+
+$feedback
+
+"
+ fi
+
+ local design_prompt="You are a senior developer planning the implementation of a story.
## Your Task
@@ -91,14 +188,17 @@ $story_contents
$decision_context
-## Exploration
+## Repository Map
-First, explore the codebase to understand existing patterns and conventions
-before planning. Use the repository's own structure and language to guide you
-(e.g. inspect the relevant source directories, find files similar to what this
-story touches, and follow existing patterns rather than introducing new ones).
+Use this pre-computed map of the codebase as your starting point, then explore
+further (read the listed files, find similar implementations) before planning.
+Follow existing patterns rather than introducing new ones.
-## Required Output
+
+$repo_map
+
+
+${revision_block}## Required Output
Output your implementation plan as a single JSON result block. Map EVERY
acceptance criterion in the story to the files/functions that will implement
@@ -143,45 +243,78 @@ explaining why.
After outputting the JSON block, output exactly:
DESIGN COMPLETE: $story_id"
- # Log prompt size in verbose mode (consistent with other phases)
- log_prompt_size "$design_prompt" "design-phase"
+ # Log prompt size in verbose mode (consistent with other phases)
+ log_prompt_size "$design_prompt" "design-phase"
- if [ "$DRY_RUN" = true ]; then
- echo "[DRY RUN] Would execute design phase for $story_id"
- return 0
- fi
+ # Pipe to file to avoid memory bloat
+ run_claude_to_file "$design_prompt"
+ local result
+ result=$(read_phase_tail)
- # Pipe to file to avoid memory bloat
- run_claude_to_file "$design_prompt"
- local result
- result=$(read_phase_tail)
+ # Extract the JSON plan using the shared parser (falls back to legacy
+ # text scraping if no JSON block is present, e.g. older models).
+ json=""
+ if type extract_json_result >/dev/null 2>&1; then
+ json=$(extract_json_result "$result")
+ fi
- # Extract the JSON plan using the shared parser (falls back to legacy
- # text scraping if no JSON block is present, e.g. older models).
- local json=""
- if type extract_json_result >/dev/null 2>&1; then
- json=$(extract_json_result "$result")
- fi
+ # Determine completion using the shared JSON-first checker
+ local completion_status=0
+ if type check_phase_completion >/dev/null 2>&1; then
+ check_phase_completion "$result" "design" "$story_id"
+ completion_status=$?
+ fi
- # Determine completion using the shared JSON-first checker
- local completion_status=0
- if type check_phase_completion >/dev/null 2>&1; then
- check_phase_completion "$result" "design" "$story_id"
- completion_status=$?
- fi
+ if [ -n "$json" ] && [ "$completion_status" -ne 1 ]; then
+ # Valid JSON plan
+ LAST_DESIGN="$json"
+ else
+ # Fall back to legacy DESIGN START/END text block (backward compat)
+ LAST_DESIGN=$(echo "$result" | sed -n '/DESIGN START/,/DESIGN END/p')
+ fi
- if [ -n "$json" ] && [ "$completion_status" -ne 1 ]; then
- # Valid JSON plan
- LAST_DESIGN="$json"
- else
- # Fall back to legacy DESIGN START/END text block (backward compat)
- LAST_DESIGN=$(echo "$result" | sed -n '/DESIGN START/,/DESIGN END/p')
- fi
+ if [ -z "$LAST_DESIGN" ] || [ "$completion_status" -eq 1 ]; then
+ log_error "Design phase did not produce a valid plan"
+ return 1
+ fi
- if [ -z "$LAST_DESIGN" ] || [ "$completion_status" -eq 1 ]; then
- log_error "Design phase did not produce a valid plan"
- return 1
- fi
+ # Critic disabled or no attempts budgeted - accept the first plan
+ if [ "${SKIP_DESIGN_CRITIC:-false}" = true ] || [ "$max_attempts" -le 0 ]; then
+ break
+ fi
+
+ # Run the critic against the plan
+ run_design_critic "$story_file" "$story_id" "$arch_file" "$LAST_DESIGN"
+ local verdict=$?
+
+ if [ "$verdict" -ne 1 ]; then
+ # Approved (0) or unclear (2) - accept the current plan
+ if [ "$verdict" -eq 2 ]; then
+ log_warn "Design critic result unclear for $story_id - accepting current plan"
+ elif [ "$VERBOSE" = true ]; then
+ log "Design critic approved the plan: $story_id"
+ fi
+ break
+ fi
+
+ # verdict == 1: revision requested
+ if [ -z "$DESIGN_CRITIC_GAPS" ]; then
+ log_warn "Design critic requested revision but listed no actionable gaps - proceeding"
+ break
+ fi
+
+ if [ "$attempt" -ge "$max_attempts" ]; then
+ log_warn "Design still has gaps after $attempt critic attempt(s) for $story_id - proceeding with documented gaps"
+ if type add_metrics_issue >/dev/null 2>&1; then
+ add_metrics_issue "$story_id" "design_critic_gaps" "Unresolved design gaps after $attempt critic attempt(s)"
+ fi
+ break
+ fi
+
+ log_warn "Design critic requested revisions (attempt $attempt of $max_attempts) for $story_id - regenerating plan"
+ feedback="$DESIGN_CRITIC_GAPS"
+ attempt=$((attempt + 1))
+ done
# Persist the plan to a per-story file so the dev phase can read it
# even after a resume (when the in-memory LAST_DESIGN is empty).
@@ -199,6 +332,111 @@ DESIGN COMPLETE: $story_id"
return 0
}
+# Run a fresh-context critic pass over a proposed design plan (#4).
+# The critic checks two things: (a) does the plan map every acceptance
+# criterion, and (b) does it conform to the architecture. Gaps are stored in
+# DESIGN_CRITIC_GAPS for feedback into a regeneration pass.
+# Arguments:
+# $1 - story_file path
+# $2 - story_id
+# $3 - architecture file path (may be empty)
+# $4 - the proposed plan (JSON or text)
+# Returns: 0 approved, 1 needs revision, 2 unclear
+run_design_critic() {
+ local story_file="$1"
+ local story_id="$2"
+ local arch_file="$3"
+ local plan="$4"
+
+ DESIGN_CRITIC_GAPS=""
+
+ local story_contents
+ story_contents=$(cat "$story_file")
+
+ local critic_prompt="You are a skeptical senior engineer reviewing an implementation PLAN before any code is written.
+
+## Your Task
+
+Critique the proposed plan for story: $story_id
+
+You are reviewing a PLAN, not code. Be rigorous. Decide whether the plan:
+1. Maps EVERY acceptance criterion in the story to concrete files/functions
+2. Conforms to the project's architecture
+3. Is concrete and actionable (no vague hand-waving)
+
+## Story
+
+
+$story_contents
+
+
+## Architecture Reference
+
+**Read the architecture document at:** ${arch_file:-"(none found)"}
+
+## Proposed Plan
+
+
+$plan
+
+
+## Required Output
+
+Output a single JSON result block:
+
+\`\`\`json
+{
+ \"status\": \"APPROVED\" | \"NEEDS_REVISION\",
+ \"story_id\": \"$story_id\",
+ \"gaps\": [
+ {\"issue\": \"\", \"recommendation\": \"\"}
+ ]
+}
+\`\`\`
+
+Use APPROVED only if the plan covers every acceptance criterion and conforms to
+the architecture. Otherwise use NEEDS_REVISION and list specific, actionable gaps.
+
+## Completion Signal
+
+After the JSON block, output exactly one of:
+DESIGN CRITIC APPROVED: $story_id
+DESIGN CRITIC NEEDS_REVISION: $story_id"
+
+ log_prompt_size "$critic_prompt" "design-critic"
+
+ run_claude_to_file "$critic_prompt"
+ local result
+ result=$(read_phase_tail)
+
+ # Parse verdict + gaps (JSON first, text fallback)
+ local status=""
+ local cjson=""
+ if type extract_json_result >/dev/null 2>&1; then
+ cjson=$(extract_json_result "$result")
+ fi
+
+ if [ -n "$cjson" ] && command -v jq >/dev/null 2>&1; then
+ status=$(echo "$cjson" | jq -r '.status // empty' | tr '[:lower:]' '[:upper:]')
+ DESIGN_CRITIC_GAPS=$(echo "$cjson" | jq -r '.gaps[]? | "- \(.issue) -> \(.recommendation)"' 2>/dev/null || echo "")
+ fi
+
+ # Text fallback if JSON missing/unparseable
+ if [ -z "$status" ]; then
+ if echo "$result" | grep -q "DESIGN CRITIC APPROVED"; then
+ status="APPROVED"
+ elif echo "$result" | grep -q "DESIGN CRITIC NEEDS_REVISION"; then
+ status="NEEDS_REVISION"
+ fi
+ fi
+
+ case "$status" in
+ APPROVED) return 0 ;;
+ NEEDS_REVISION) return 1 ;;
+ *) return 2 ;;
+ esac
+}
+
# Validate that the design plan maps every acceptance criterion in the story.
# This is advisory: it warns and records a metric but never fails the story
# (design is a non-blocking phase). AC extraction is heuristic since story
diff --git a/scripts/epic-execute.sh b/scripts/epic-execute.sh
index a52a4f8f3..892db3cca 100755
--- a/scripts/epic-execute.sh
+++ b/scripts/epic-execute.sh
@@ -936,6 +936,7 @@ OPTIONS:
TDD/Testing Options:
--skip-design Skip pre-implementation design phase
+ --skip-design-critic Skip the design plan critic/revision loop
--skip-tdd Skip all test-first development phases
--skip-test-spec Skip test specification phase only
--skip-test-impl Skip test implementation phase only
@@ -976,6 +977,7 @@ ENVIRONMENT VARIABLES:
MAX_PROMPT_SIZE Maximum prompt size in bytes (default: 150000)
RETRY_MAX_ATTEMPTS Max retry attempts for transient failures (default: 3)
RETRY_INITIAL_DELAY Initial retry delay in seconds (default: 5)
+ MAX_DESIGN_CRITIC_ATTEMPTS Max design plan revision rounds (default: 2)
FILES:
Logs: docs/sprint-artifacts/logs/epic--.log
@@ -1005,6 +1007,7 @@ SKIP_TEST_QUALITY=false
SKIP_TRACEABILITY=false
SKIP_STATIC_ANALYSIS=false
SKIP_DESIGN=false
+SKIP_DESIGN_CRITIC=false
SKIP_REGRESSION=false
SKIP_TDD=false
SKIP_TEST_SPEC=false
@@ -1073,6 +1076,10 @@ while [[ $# -gt 0 ]]; do
SKIP_DESIGN=true
shift
;;
+ --skip-design-critic)
+ SKIP_DESIGN_CRITIC=true
+ shift
+ ;;
--skip-regression)
SKIP_REGRESSION=true
shift
@@ -1120,6 +1127,7 @@ if [ -z "$EPIC_ID" ]; then
echo " --skip-traceability Skip traceability check (not recommended)"
echo " --skip-static-analysis Skip static analysis gate (runs real tooling)"
echo " --skip-design Skip pre-implementation design phase"
+ echo " --skip-design-critic Skip the design plan critic/revision loop"
echo " --skip-regression Skip regression test gate"
echo " --skip-tdd Skip test-first development phases"
echo " --skip-test-spec Skip test specification phase only"