feat(epic-execute): deterministic repo map + design critic loop

Design phase improvements #5 and #4:

#5 Deterministic, language-aware exploration:
- Add detect_project_type (node/rust/go/python) and build_repo_map, which
  pre-computes a bounded repository map (project type, top-level structure,
  representative source files) tailored to the detected language
- Inject the map into the design prompt instead of hardcoded JS/TS find
  commands and "hope the model explores" guidance

#4 Critic loop:
- Add run_design_critic: a fresh-context skeptic that checks whether the plan
  maps every acceptance criterion and conforms to the architecture, emitting
  structured gaps
- execute_design_phase now generates -> critiques -> regenerates with gap
  feedback, bounded by MAX_DESIGN_CRITIC_ATTEMPTS (default 2). Design stays
  advisory: it always proceeds with the best plan and records a metric when
  gaps remain
- Add --skip-design-critic flag and document MAX_DESIGN_CRITIC_ATTEMPTS

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Caleb 2026-06-03 06:04:52 -05:00
parent 0e038f2a54
commit 33d55f902c
2 changed files with 286 additions and 40 deletions

View File

@ -15,6 +15,69 @@
# Stores the last design output for passing to dev phase
LAST_DESIGN=""
# Stores the gaps reported by the most recent design critic pass
DESIGN_CRITIC_GAPS=""
# =============================================================================
# Codebase Exploration (deterministic, language-aware)
# =============================================================================
# Detect the project's primary language/toolchain from marker files.
# Mirrors the detection used by the static-analysis gate.
# Returns one of: node | rust | go | python | unknown
detect_project_type() {
if [ -f "$PROJECT_ROOT/package.json" ]; then
echo "node"
elif [ -f "$PROJECT_ROOT/Cargo.toml" ]; then
echo "rust"
elif [ -f "$PROJECT_ROOT/go.mod" ]; then
echo "go"
elif [ -f "$PROJECT_ROOT/requirements.txt" ] || [ -f "$PROJECT_ROOT/pyproject.toml" ]; then
echo "python"
else
echo "unknown"
fi
}
# Build a deterministic, bounded repository map for the planner to start from,
# tailored to the detected language. This replaces the old hardcoded JS/TS
# find commands and the "hope the model explores" approach with concrete,
# pre-computed context.
build_repo_map() {
local ptype
ptype=$(detect_project_type)
local lang_label=""
local find_expr=()
case "$ptype" in
node) lang_label="Node.js / TypeScript"; find_expr=(-name '*.ts' -o -name '*.tsx' -o -name '*.js' -o -name '*.jsx') ;;
rust) lang_label="Rust"; find_expr=(-name '*.rs') ;;
go) lang_label="Go"; find_expr=(-name '*.go') ;;
python) lang_label="Python"; find_expr=(-name '*.py') ;;
*) lang_label="Unknown" ;;
esac
# Top-level directory structure (excluding noise dirs)
local top
top=$(cd "$PROJECT_ROOT" 2>/dev/null && ls -d */ 2>/dev/null \
| grep -vE '^(node_modules|\.git|dist|build|target|vendor|__pycache__|\.venv|coverage)/' \
| head -30)
# Representative source files for the detected language (bounded)
local sources=""
if [ "${#find_expr[@]}" -gt 0 ]; then
sources=$(cd "$PROJECT_ROOT" 2>/dev/null && find . \( "${find_expr[@]}" \) \
-not -path '*/node_modules/*' -not -path '*/.git/*' \
-not -path '*/dist/*' -not -path '*/build/*' \
-not -path '*/target/*' -not -path '*/vendor/*' \
-not -path '*/__pycache__/*' -not -path '*/.venv/*' \
2>/dev/null | sed 's|^\./||' | head -40)
fi
printf 'Detected project type: %s\n\nTop-level structure:\n%s\n\nRepresentative source files:\n%s\n' \
"$lang_label" "${top:-(none)}" "${sources:-(none detected)}"
}
# =============================================================================
# Design Phase Functions
# =============================================================================
@ -58,7 +121,41 @@ execute_design_phase() {
fi
fi
local design_prompt="You are a senior developer planning the implementation of a story.
# Pre-compute a deterministic, language-aware repository map (#5)
local repo_map=""
if type build_repo_map >/dev/null 2>&1; then
repo_map=$(build_repo_map)
fi
if [ "$DRY_RUN" = true ]; then
echo "[DRY RUN] Would execute design phase for $story_id"
return 0
fi
# Critic loop (#4): generate a plan, have a fresh-context critic check it
# against the ACs and architecture, and regenerate with feedback if gaps
# remain. Design is advisory, so we always proceed with the best plan.
local max_attempts="${MAX_DESIGN_CRITIC_ATTEMPTS:-2}"
local attempt=1
local feedback=""
local json=""
while true; do
# Revision block is empty on the first pass, populated by the critic
local revision_block=""
if [ -n "$feedback" ]; then
revision_block="## Revision Required
A previous version of this plan was reviewed and found incomplete. Produce an
improved plan that resolves ALL of the following gaps:
<gaps>
$feedback
</gaps>
"
fi
local design_prompt="You are a senior developer planning the implementation of a story.
## Your Task
@ -91,14 +188,17 @@ $story_contents
$decision_context
</decision-context>
## Exploration
## Repository Map
First, explore the codebase to understand existing patterns and conventions
before planning. Use the repository's own structure and language to guide you
(e.g. inspect the relevant source directories, find files similar to what this
story touches, and follow existing patterns rather than introducing new ones).
Use this pre-computed map of the codebase as your starting point, then explore
further (read the listed files, find similar implementations) before planning.
Follow existing patterns rather than introducing new ones.
## Required Output
<repo-map>
$repo_map
</repo-map>
${revision_block}## Required Output
Output your implementation plan as a single JSON result block. Map EVERY
acceptance criterion in the story to the files/functions that will implement
@ -143,45 +243,78 @@ explaining why.
After outputting the JSON block, output exactly:
DESIGN COMPLETE: $story_id"
# Log prompt size in verbose mode (consistent with other phases)
log_prompt_size "$design_prompt" "design-phase"
# Log prompt size in verbose mode (consistent with other phases)
log_prompt_size "$design_prompt" "design-phase"
if [ "$DRY_RUN" = true ]; then
echo "[DRY RUN] Would execute design phase for $story_id"
return 0
fi
# Pipe to file to avoid memory bloat
run_claude_to_file "$design_prompt"
local result
result=$(read_phase_tail)
# Pipe to file to avoid memory bloat
run_claude_to_file "$design_prompt"
local result
result=$(read_phase_tail)
# Extract the JSON plan using the shared parser (falls back to legacy
# text scraping if no JSON block is present, e.g. older models).
json=""
if type extract_json_result >/dev/null 2>&1; then
json=$(extract_json_result "$result")
fi
# Extract the JSON plan using the shared parser (falls back to legacy
# text scraping if no JSON block is present, e.g. older models).
local json=""
if type extract_json_result >/dev/null 2>&1; then
json=$(extract_json_result "$result")
fi
# Determine completion using the shared JSON-first checker
local completion_status=0
if type check_phase_completion >/dev/null 2>&1; then
check_phase_completion "$result" "design" "$story_id"
completion_status=$?
fi
# Determine completion using the shared JSON-first checker
local completion_status=0
if type check_phase_completion >/dev/null 2>&1; then
check_phase_completion "$result" "design" "$story_id"
completion_status=$?
fi
if [ -n "$json" ] && [ "$completion_status" -ne 1 ]; then
# Valid JSON plan
LAST_DESIGN="$json"
else
# Fall back to legacy DESIGN START/END text block (backward compat)
LAST_DESIGN=$(echo "$result" | sed -n '/DESIGN START/,/DESIGN END/p')
fi
if [ -n "$json" ] && [ "$completion_status" -ne 1 ]; then
# Valid JSON plan
LAST_DESIGN="$json"
else
# Fall back to legacy DESIGN START/END text block (backward compat)
LAST_DESIGN=$(echo "$result" | sed -n '/DESIGN START/,/DESIGN END/p')
fi
if [ -z "$LAST_DESIGN" ] || [ "$completion_status" -eq 1 ]; then
log_error "Design phase did not produce a valid plan"
return 1
fi
if [ -z "$LAST_DESIGN" ] || [ "$completion_status" -eq 1 ]; then
log_error "Design phase did not produce a valid plan"
return 1
fi
# Critic disabled or no attempts budgeted - accept the first plan
if [ "${SKIP_DESIGN_CRITIC:-false}" = true ] || [ "$max_attempts" -le 0 ]; then
break
fi
# Run the critic against the plan
run_design_critic "$story_file" "$story_id" "$arch_file" "$LAST_DESIGN"
local verdict=$?
if [ "$verdict" -ne 1 ]; then
# Approved (0) or unclear (2) - accept the current plan
if [ "$verdict" -eq 2 ]; then
log_warn "Design critic result unclear for $story_id - accepting current plan"
elif [ "$VERBOSE" = true ]; then
log "Design critic approved the plan: $story_id"
fi
break
fi
# verdict == 1: revision requested
if [ -z "$DESIGN_CRITIC_GAPS" ]; then
log_warn "Design critic requested revision but listed no actionable gaps - proceeding"
break
fi
if [ "$attempt" -ge "$max_attempts" ]; then
log_warn "Design still has gaps after $attempt critic attempt(s) for $story_id - proceeding with documented gaps"
if type add_metrics_issue >/dev/null 2>&1; then
add_metrics_issue "$story_id" "design_critic_gaps" "Unresolved design gaps after $attempt critic attempt(s)"
fi
break
fi
log_warn "Design critic requested revisions (attempt $attempt of $max_attempts) for $story_id - regenerating plan"
feedback="$DESIGN_CRITIC_GAPS"
attempt=$((attempt + 1))
done
# Persist the plan to a per-story file so the dev phase can read it
# even after a resume (when the in-memory LAST_DESIGN is empty).
@ -199,6 +332,111 @@ DESIGN COMPLETE: $story_id"
return 0
}
# Run a fresh-context critic pass over a proposed design plan (#4).
# The critic checks two things: (a) does the plan map every acceptance
# criterion, and (b) does it conform to the architecture. Gaps are stored in
# DESIGN_CRITIC_GAPS for feedback into a regeneration pass.
# Arguments:
# $1 - story_file path
# $2 - story_id
# $3 - architecture file path (may be empty)
# $4 - the proposed plan (JSON or text)
# Returns: 0 approved, 1 needs revision, 2 unclear
run_design_critic() {
local story_file="$1"
local story_id="$2"
local arch_file="$3"
local plan="$4"
DESIGN_CRITIC_GAPS=""
local story_contents
story_contents=$(cat "$story_file")
local critic_prompt="You are a skeptical senior engineer reviewing an implementation PLAN before any code is written.
## Your Task
Critique the proposed plan for story: $story_id
You are reviewing a PLAN, not code. Be rigorous. Decide whether the plan:
1. Maps EVERY acceptance criterion in the story to concrete files/functions
2. Conforms to the project's architecture
3. Is concrete and actionable (no vague hand-waving)
## Story
<story>
$story_contents
</story>
## Architecture Reference
**Read the architecture document at:** ${arch_file:-"(none found)"}
## Proposed Plan
<plan>
$plan
</plan>
## Required Output
Output a single JSON result block:
\`\`\`json
{
\"status\": \"APPROVED\" | \"NEEDS_REVISION\",
\"story_id\": \"$story_id\",
\"gaps\": [
{\"issue\": \"<what is missing or wrong>\", \"recommendation\": \"<how to fix it>\"}
]
}
\`\`\`
Use APPROVED only if the plan covers every acceptance criterion and conforms to
the architecture. Otherwise use NEEDS_REVISION and list specific, actionable gaps.
## Completion Signal
After the JSON block, output exactly one of:
DESIGN CRITIC APPROVED: $story_id
DESIGN CRITIC NEEDS_REVISION: $story_id"
log_prompt_size "$critic_prompt" "design-critic"
run_claude_to_file "$critic_prompt"
local result
result=$(read_phase_tail)
# Parse verdict + gaps (JSON first, text fallback)
local status=""
local cjson=""
if type extract_json_result >/dev/null 2>&1; then
cjson=$(extract_json_result "$result")
fi
if [ -n "$cjson" ] && command -v jq >/dev/null 2>&1; then
status=$(echo "$cjson" | jq -r '.status // empty' | tr '[:lower:]' '[:upper:]')
DESIGN_CRITIC_GAPS=$(echo "$cjson" | jq -r '.gaps[]? | "- \(.issue) -> \(.recommendation)"' 2>/dev/null || echo "")
fi
# Text fallback if JSON missing/unparseable
if [ -z "$status" ]; then
if echo "$result" | grep -q "DESIGN CRITIC APPROVED"; then
status="APPROVED"
elif echo "$result" | grep -q "DESIGN CRITIC NEEDS_REVISION"; then
status="NEEDS_REVISION"
fi
fi
case "$status" in
APPROVED) return 0 ;;
NEEDS_REVISION) return 1 ;;
*) return 2 ;;
esac
}
# Validate that the design plan maps every acceptance criterion in the story.
# This is advisory: it warns and records a metric but never fails the story
# (design is a non-blocking phase). AC extraction is heuristic since story

View File

@ -936,6 +936,7 @@ OPTIONS:
TDD/Testing Options:
--skip-design Skip pre-implementation design phase
--skip-design-critic Skip the design plan critic/revision loop
--skip-tdd Skip all test-first development phases
--skip-test-spec Skip test specification phase only
--skip-test-impl Skip test implementation phase only
@ -976,6 +977,7 @@ ENVIRONMENT VARIABLES:
MAX_PROMPT_SIZE Maximum prompt size in bytes (default: 150000)
RETRY_MAX_ATTEMPTS Max retry attempts for transient failures (default: 3)
RETRY_INITIAL_DELAY Initial retry delay in seconds (default: 5)
MAX_DESIGN_CRITIC_ATTEMPTS Max design plan revision rounds (default: 2)
FILES:
Logs: docs/sprint-artifacts/logs/epic-<id>-<timestamp>.log
@ -1005,6 +1007,7 @@ SKIP_TEST_QUALITY=false
SKIP_TRACEABILITY=false
SKIP_STATIC_ANALYSIS=false
SKIP_DESIGN=false
SKIP_DESIGN_CRITIC=false
SKIP_REGRESSION=false
SKIP_TDD=false
SKIP_TEST_SPEC=false
@ -1073,6 +1076,10 @@ while [[ $# -gt 0 ]]; do
SKIP_DESIGN=true
shift
;;
--skip-design-critic)
SKIP_DESIGN_CRITIC=true
shift
;;
--skip-regression)
SKIP_REGRESSION=true
shift
@ -1120,6 +1127,7 @@ if [ -z "$EPIC_ID" ]; then
echo " --skip-traceability Skip traceability check (not recommended)"
echo " --skip-static-analysis Skip static analysis gate (runs real tooling)"
echo " --skip-design Skip pre-implementation design phase"
echo " --skip-design-critic Skip the design plan critic/revision loop"
echo " --skip-regression Skip regression test gate"
echo " --skip-tdd Skip test-first development phases"
echo " --skip-test-spec Skip test specification phase only"