feat(epic-execute): deterministic repo map + design critic loop

Design phase improvements #5 and #4: #5 Deterministic, language-aware exploration: - Add detect_project_type (node/rust/go/python) and build_repo_map, which pre-computes a bounded repository map (project type, top-level structure, representative source files) tailored to the detected language - Inject the map into the design prompt instead of hardcoded JS/TS find commands and "hope the model explores" guidance #4 Critic loop: - Add run_design_critic: a fresh-context skeptic that checks whether the plan maps every acceptance criterion and conforms to the architecture, emitting structured gaps - execute_design_phase now generates -> critiques -> regenerates with gap feedback, bounded by MAX_DESIGN_CRITIC_ATTEMPTS (default 2). Design stays advisory: it always proceeds with the best plan and records a metric when gaps remain - Add --skip-design-critic flag and document MAX_DESIGN_CRITIC_ATTEMPTS Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-03 06:04:52 -05:00 · 2026-06-03 06:04:52 -05:00 · 33d55f902c
parent 0e038f2a54
commit 33d55f902c
2 changed files with 286 additions and 40 deletions
--- a/scripts/epic-execute-lib/design-phase.sh
+++ b/scripts/epic-execute-lib/design-phase.sh
@ -15,6 +15,69 @@
 # Stores the last design output for passing to dev phase
 LAST_DESIGN=""
 # Stores the gaps reported by the most recent design critic pass
 DESIGN_CRITIC_GAPS=""
 # =============================================================================
 # Codebase Exploration (deterministic, language-aware)
 # =============================================================================
 # Detect the project's primary language/toolchain from marker files.
 # Mirrors the detection used by the static-analysis gate.
 # Returns one of: node | rust | go | python | unknown
 detect_project_type() {
    if [ -f "$PROJECT_ROOT/package.json" ]; then
        echo "node"
    elif [ -f "$PROJECT_ROOT/Cargo.toml" ]; then
        echo "rust"
    elif [ -f "$PROJECT_ROOT/go.mod" ]; then
        echo "go"
    elif [ -f "$PROJECT_ROOT/requirements.txt" ] || [ -f "$PROJECT_ROOT/pyproject.toml" ]; then
        echo "python"
    else
        echo "unknown"
    fi
 }
 # Build a deterministic, bounded repository map for the planner to start from,
 # tailored to the detected language. This replaces the old hardcoded JS/TS
 # find commands and the "hope the model explores" approach with concrete,
 # pre-computed context.
 build_repo_map() {
    local ptype
    ptype=$(detect_project_type)
    local lang_label=""
    local find_expr=()
    case "$ptype" in
        node)   lang_label="Node.js / TypeScript"; find_expr=(-name '*.ts' -o -name '*.tsx' -o -name '*.js' -o -name '*.jsx') ;;
        rust)   lang_label="Rust";                 find_expr=(-name '*.rs') ;;
        go)     lang_label="Go";                   find_expr=(-name '*.go') ;;
        python) lang_label="Python";               find_expr=(-name '*.py') ;;
        *)      lang_label="Unknown" ;;
    esac
    # Top-level directory structure (excluding noise dirs)
    local top
    top=$(cd "$PROJECT_ROOT" 2>/dev/null && ls -d */ 2>/dev/null \
        | grep -vE '^(node_modules|\.git|dist|build|target|vendor|__pycache__|\.venv|coverage)/' \
        | head -30)
    # Representative source files for the detected language (bounded)
    local sources=""
    if [ "${#find_expr[@]}" -gt 0 ]; then
        sources=$(cd "$PROJECT_ROOT" 2>/dev/null && find . \( "${find_expr[@]}" \) \
            -not -path '*/node_modules/*' -not -path '*/.git/*' \
            -not -path '*/dist/*' -not -path '*/build/*' \
            -not -path '*/target/*' -not -path '*/vendor/*' \
            -not -path '*/__pycache__/*' -not -path '*/.venv/*' \
            2>/dev/null | sed 's|^\./||' | head -40)
    fi
    printf 'Detected project type: %s\n\nTop-level structure:\n%s\n\nRepresentative source files:\n%s\n' \
        "$lang_label" "${top:-(none)}" "${sources:-(none detected)}"
 }
 # =============================================================================
 # Design Phase Functions
 # =============================================================================
@ -58,6 +121,40 @@ execute_design_phase() {
        fi
    fi
    # Pre-compute a deterministic, language-aware repository map (#5)
    local repo_map=""
    if type build_repo_map >/dev/null 2>&1; then
        repo_map=$(build_repo_map)
    fi
    if [ "$DRY_RUN" = true ]; then
        echo "[DRY RUN] Would execute design phase for $story_id"
        return 0
    fi
    # Critic loop (#4): generate a plan, have a fresh-context critic check it
    # against the ACs and architecture, and regenerate with feedback if gaps
    # remain. Design is advisory, so we always proceed with the best plan.
    local max_attempts="${MAX_DESIGN_CRITIC_ATTEMPTS:-2}"
    local attempt=1
    local feedback=""
    local json=""
    while true; do
        # Revision block is empty on the first pass, populated by the critic
        local revision_block=""
        if [ -n "$feedback" ]; then
            revision_block="## Revision Required
 A previous version of this plan was reviewed and found incomplete. Produce an
 improved plan that resolves ALL of the following gaps:
 <gaps>
 $feedback
 </gaps>
 "
        fi
        local design_prompt="You are a senior developer planning the implementation of a story.
 ## Your Task
@ -91,14 +188,17 @@ $story_contents
 $decision_context
 </decision-context>
-## Exploration
+## Repository Map
-First, explore the codebase to understand existing patterns and conventions
+Use this pre-computed map of the codebase as your starting point, then explore
-before planning. Use the repository's own structure and language to guide you
+further (read the listed files, find similar implementations) before planning.
-(e.g. inspect the relevant source directories, find files similar to what this
+Follow existing patterns rather than introducing new ones.
 story touches, and follow existing patterns rather than introducing new ones).
-## Required Output
+<repo-map>
 $repo_map
 </repo-map>
 ${revision_block}## Required Output
 Output your implementation plan as a single JSON result block. Map EVERY
 acceptance criterion in the story to the files/functions that will implement
@ -146,11 +246,6 @@ DESIGN COMPLETE: $story_id"
        # Log prompt size in verbose mode (consistent with other phases)
        log_prompt_size "$design_prompt" "design-phase"
    if [ "$DRY_RUN" = true ]; then
        echo "[DRY RUN] Would execute design phase for $story_id"
        return 0
    fi
        # Pipe to file to avoid memory bloat
        run_claude_to_file "$design_prompt"
        local result
@ -158,7 +253,7 @@ DESIGN COMPLETE: $story_id"
        # Extract the JSON plan using the shared parser (falls back to legacy
        # text scraping if no JSON block is present, e.g. older models).
-    local json=""
+        json=""
        if type extract_json_result >/dev/null 2>&1; then
            json=$(extract_json_result "$result")
        fi
@ -183,6 +278,44 @@ DESIGN COMPLETE: $story_id"
            return 1
        fi
        # Critic disabled or no attempts budgeted - accept the first plan
        if [ "${SKIP_DESIGN_CRITIC:-false}" = true ] || [ "$max_attempts" -le 0 ]; then
            break
        fi
        # Run the critic against the plan
        run_design_critic "$story_file" "$story_id" "$arch_file" "$LAST_DESIGN"
        local verdict=$?
        if [ "$verdict" -ne 1 ]; then
            # Approved (0) or unclear (2) - accept the current plan
            if [ "$verdict" -eq 2 ]; then
                log_warn "Design critic result unclear for $story_id - accepting current plan"
            elif [ "$VERBOSE" = true ]; then
                log "Design critic approved the plan: $story_id"
            fi
            break
        fi
        # verdict == 1: revision requested
        if [ -z "$DESIGN_CRITIC_GAPS" ]; then
            log_warn "Design critic requested revision but listed no actionable gaps - proceeding"
            break
        fi
        if [ "$attempt" -ge "$max_attempts" ]; then
            log_warn "Design still has gaps after $attempt critic attempt(s) for $story_id - proceeding with documented gaps"
            if type add_metrics_issue >/dev/null 2>&1; then
                add_metrics_issue "$story_id" "design_critic_gaps" "Unresolved design gaps after $attempt critic attempt(s)"
            fi
            break
        fi
        log_warn "Design critic requested revisions (attempt $attempt of $max_attempts) for $story_id - regenerating plan"
        feedback="$DESIGN_CRITIC_GAPS"
        attempt=$((attempt + 1))
    done
    # Persist the plan to a per-story file so the dev phase can read it
    # even after a resume (when the in-memory LAST_DESIGN is empty).
    persist_design "$story_id" "$LAST_DESIGN"
@ -199,6 +332,111 @@ DESIGN COMPLETE: $story_id"
    return 0
 }
 # Run a fresh-context critic pass over a proposed design plan (#4).
 # The critic checks two things: (a) does the plan map every acceptance
 # criterion, and (b) does it conform to the architecture. Gaps are stored in
 # DESIGN_CRITIC_GAPS for feedback into a regeneration pass.
 # Arguments:
 #   $1 - story_file path
 #   $2 - story_id
 #   $3 - architecture file path (may be empty)
 #   $4 - the proposed plan (JSON or text)
 # Returns: 0 approved, 1 needs revision, 2 unclear
 run_design_critic() {
    local story_file="$1"
    local story_id="$2"
    local arch_file="$3"
    local plan="$4"
    DESIGN_CRITIC_GAPS=""
    local story_contents
    story_contents=$(cat "$story_file")
    local critic_prompt="You are a skeptical senior engineer reviewing an implementation PLAN before any code is written.
 ## Your Task
 Critique the proposed plan for story: $story_id
 You are reviewing a PLAN, not code. Be rigorous. Decide whether the plan:
 1. Maps EVERY acceptance criterion in the story to concrete files/functions
 2. Conforms to the project's architecture
 3. Is concrete and actionable (no vague hand-waving)
 ## Story
 <story>
 $story_contents
 </story>
 ## Architecture Reference
 **Read the architecture document at:** ${arch_file:-"(none found)"}
 ## Proposed Plan
 <plan>
 $plan
 </plan>
 ## Required Output
 Output a single JSON result block:
 \`\`\`json
 {
  \"status\": \"APPROVED\" | \"NEEDS_REVISION\",
  \"story_id\": \"$story_id\",
  \"gaps\": [
    {\"issue\": \"<what is missing or wrong>\", \"recommendation\": \"<how to fix it>\"}
  ]
 }
 \`\`\`
 Use APPROVED only if the plan covers every acceptance criterion and conforms to
 the architecture. Otherwise use NEEDS_REVISION and list specific, actionable gaps.
 ## Completion Signal
 After the JSON block, output exactly one of:
 DESIGN CRITIC APPROVED: $story_id
 DESIGN CRITIC NEEDS_REVISION: $story_id"
    log_prompt_size "$critic_prompt" "design-critic"
    run_claude_to_file "$critic_prompt"
    local result
    result=$(read_phase_tail)
    # Parse verdict + gaps (JSON first, text fallback)
    local status=""
    local cjson=""
    if type extract_json_result >/dev/null 2>&1; then
        cjson=$(extract_json_result "$result")
    fi
    if [ -n "$cjson" ] && command -v jq >/dev/null 2>&1; then
        status=$(echo "$cjson" | jq -r '.status // empty' | tr '[:lower:]' '[:upper:]')
        DESIGN_CRITIC_GAPS=$(echo "$cjson" | jq -r '.gaps[]? | "- \(.issue) -> \(.recommendation)"' 2>/dev/null || echo "")
    fi
    # Text fallback if JSON missing/unparseable
    if [ -z "$status" ]; then
        if echo "$result" | grep -q "DESIGN CRITIC APPROVED"; then
            status="APPROVED"
        elif echo "$result" | grep -q "DESIGN CRITIC NEEDS_REVISION"; then
            status="NEEDS_REVISION"
        fi
    fi
    case "$status" in
        APPROVED)       return 0 ;;
        NEEDS_REVISION) return 1 ;;
        *)              return 2 ;;
    esac
 }
 # Validate that the design plan maps every acceptance criterion in the story.
 # This is advisory: it warns and records a metric but never fails the story
 # (design is a non-blocking phase). AC extraction is heuristic since story
--- a/scripts/epic-execute.sh
+++ b/scripts/epic-execute.sh
@ -936,6 +936,7 @@ OPTIONS:
    TDD/Testing Options:
      --skip-design         Skip pre-implementation design phase
      --skip-design-critic  Skip the design plan critic/revision loop
      --skip-tdd            Skip all test-first development phases
      --skip-test-spec      Skip test specification phase only
      --skip-test-impl      Skip test implementation phase only
@ -976,6 +977,7 @@ ENVIRONMENT VARIABLES:
    MAX_PROMPT_SIZE         Maximum prompt size in bytes (default: 150000)
    RETRY_MAX_ATTEMPTS      Max retry attempts for transient failures (default: 3)
    RETRY_INITIAL_DELAY     Initial retry delay in seconds (default: 5)
    MAX_DESIGN_CRITIC_ATTEMPTS  Max design plan revision rounds (default: 2)
 FILES:
    Logs:       docs/sprint-artifacts/logs/epic-<id>-<timestamp>.log
@ -1005,6 +1007,7 @@ SKIP_TEST_QUALITY=false
 SKIP_TRACEABILITY=false
 SKIP_STATIC_ANALYSIS=false
 SKIP_DESIGN=false
 SKIP_DESIGN_CRITIC=false
 SKIP_REGRESSION=false
 SKIP_TDD=false
 SKIP_TEST_SPEC=false
@ -1073,6 +1076,10 @@ while [[ $# -gt 0 ]]; do
            SKIP_DESIGN=true
            shift
            ;;
        --skip-design-critic)
            SKIP_DESIGN_CRITIC=true
            shift
            ;;
        --skip-regression)
            SKIP_REGRESSION=true
            shift
@ -1120,6 +1127,7 @@ if [ -z "$EPIC_ID" ]; then
    echo "  --skip-traceability  Skip traceability check (not recommended)"
    echo "  --skip-static-analysis  Skip static analysis gate (runs real tooling)"
    echo "  --skip-design     Skip pre-implementation design phase"
    echo "  --skip-design-critic  Skip the design plan critic/revision loop"
    echo "  --skip-regression Skip regression test gate"
    echo "  --skip-tdd        Skip test-first development phases"
    echo "  --skip-test-spec  Skip test specification phase only"