feat(epic-execute): deterministic repo map + design critic loop
Design phase improvements #5 and #4: #5 Deterministic, language-aware exploration: - Add detect_project_type (node/rust/go/python) and build_repo_map, which pre-computes a bounded repository map (project type, top-level structure, representative source files) tailored to the detected language - Inject the map into the design prompt instead of hardcoded JS/TS find commands and "hope the model explores" guidance #4 Critic loop: - Add run_design_critic: a fresh-context skeptic that checks whether the plan maps every acceptance criterion and conforms to the architecture, emitting structured gaps - execute_design_phase now generates -> critiques -> regenerates with gap feedback, bounded by MAX_DESIGN_CRITIC_ATTEMPTS (default 2). Design stays advisory: it always proceeds with the best plan and records a metric when gaps remain - Add --skip-design-critic flag and document MAX_DESIGN_CRITIC_ATTEMPTS Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
0e038f2a54
commit
33d55f902c
|
|
@ -15,6 +15,69 @@
|
||||||
# Stores the last design output for passing to dev phase
|
# Stores the last design output for passing to dev phase
|
||||||
LAST_DESIGN=""
|
LAST_DESIGN=""
|
||||||
|
|
||||||
|
# Stores the gaps reported by the most recent design critic pass
|
||||||
|
DESIGN_CRITIC_GAPS=""
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Codebase Exploration (deterministic, language-aware)
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Detect the project's primary language/toolchain from marker files.
|
||||||
|
# Mirrors the detection used by the static-analysis gate.
|
||||||
|
# Returns one of: node | rust | go | python | unknown
|
||||||
|
detect_project_type() {
|
||||||
|
if [ -f "$PROJECT_ROOT/package.json" ]; then
|
||||||
|
echo "node"
|
||||||
|
elif [ -f "$PROJECT_ROOT/Cargo.toml" ]; then
|
||||||
|
echo "rust"
|
||||||
|
elif [ -f "$PROJECT_ROOT/go.mod" ]; then
|
||||||
|
echo "go"
|
||||||
|
elif [ -f "$PROJECT_ROOT/requirements.txt" ] || [ -f "$PROJECT_ROOT/pyproject.toml" ]; then
|
||||||
|
echo "python"
|
||||||
|
else
|
||||||
|
echo "unknown"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Build a deterministic, bounded repository map for the planner to start from,
|
||||||
|
# tailored to the detected language. This replaces the old hardcoded JS/TS
|
||||||
|
# find commands and the "hope the model explores" approach with concrete,
|
||||||
|
# pre-computed context.
|
||||||
|
build_repo_map() {
|
||||||
|
local ptype
|
||||||
|
ptype=$(detect_project_type)
|
||||||
|
|
||||||
|
local lang_label=""
|
||||||
|
local find_expr=()
|
||||||
|
case "$ptype" in
|
||||||
|
node) lang_label="Node.js / TypeScript"; find_expr=(-name '*.ts' -o -name '*.tsx' -o -name '*.js' -o -name '*.jsx') ;;
|
||||||
|
rust) lang_label="Rust"; find_expr=(-name '*.rs') ;;
|
||||||
|
go) lang_label="Go"; find_expr=(-name '*.go') ;;
|
||||||
|
python) lang_label="Python"; find_expr=(-name '*.py') ;;
|
||||||
|
*) lang_label="Unknown" ;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
# Top-level directory structure (excluding noise dirs)
|
||||||
|
local top
|
||||||
|
top=$(cd "$PROJECT_ROOT" 2>/dev/null && ls -d */ 2>/dev/null \
|
||||||
|
| grep -vE '^(node_modules|\.git|dist|build|target|vendor|__pycache__|\.venv|coverage)/' \
|
||||||
|
| head -30)
|
||||||
|
|
||||||
|
# Representative source files for the detected language (bounded)
|
||||||
|
local sources=""
|
||||||
|
if [ "${#find_expr[@]}" -gt 0 ]; then
|
||||||
|
sources=$(cd "$PROJECT_ROOT" 2>/dev/null && find . \( "${find_expr[@]}" \) \
|
||||||
|
-not -path '*/node_modules/*' -not -path '*/.git/*' \
|
||||||
|
-not -path '*/dist/*' -not -path '*/build/*' \
|
||||||
|
-not -path '*/target/*' -not -path '*/vendor/*' \
|
||||||
|
-not -path '*/__pycache__/*' -not -path '*/.venv/*' \
|
||||||
|
2>/dev/null | sed 's|^\./||' | head -40)
|
||||||
|
fi
|
||||||
|
|
||||||
|
printf 'Detected project type: %s\n\nTop-level structure:\n%s\n\nRepresentative source files:\n%s\n' \
|
||||||
|
"$lang_label" "${top:-(none)}" "${sources:-(none detected)}"
|
||||||
|
}
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Design Phase Functions
|
# Design Phase Functions
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
@ -58,6 +121,40 @@ execute_design_phase() {
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Pre-compute a deterministic, language-aware repository map (#5)
|
||||||
|
local repo_map=""
|
||||||
|
if type build_repo_map >/dev/null 2>&1; then
|
||||||
|
repo_map=$(build_repo_map)
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$DRY_RUN" = true ]; then
|
||||||
|
echo "[DRY RUN] Would execute design phase for $story_id"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Critic loop (#4): generate a plan, have a fresh-context critic check it
|
||||||
|
# against the ACs and architecture, and regenerate with feedback if gaps
|
||||||
|
# remain. Design is advisory, so we always proceed with the best plan.
|
||||||
|
local max_attempts="${MAX_DESIGN_CRITIC_ATTEMPTS:-2}"
|
||||||
|
local attempt=1
|
||||||
|
local feedback=""
|
||||||
|
local json=""
|
||||||
|
|
||||||
|
while true; do
|
||||||
|
# Revision block is empty on the first pass, populated by the critic
|
||||||
|
local revision_block=""
|
||||||
|
if [ -n "$feedback" ]; then
|
||||||
|
revision_block="## Revision Required
|
||||||
|
|
||||||
|
A previous version of this plan was reviewed and found incomplete. Produce an
|
||||||
|
improved plan that resolves ALL of the following gaps:
|
||||||
|
|
||||||
|
<gaps>
|
||||||
|
$feedback
|
||||||
|
</gaps>
|
||||||
|
"
|
||||||
|
fi
|
||||||
|
|
||||||
local design_prompt="You are a senior developer planning the implementation of a story.
|
local design_prompt="You are a senior developer planning the implementation of a story.
|
||||||
|
|
||||||
## Your Task
|
## Your Task
|
||||||
|
|
@ -91,14 +188,17 @@ $story_contents
|
||||||
$decision_context
|
$decision_context
|
||||||
</decision-context>
|
</decision-context>
|
||||||
|
|
||||||
## Exploration
|
## Repository Map
|
||||||
|
|
||||||
First, explore the codebase to understand existing patterns and conventions
|
Use this pre-computed map of the codebase as your starting point, then explore
|
||||||
before planning. Use the repository's own structure and language to guide you
|
further (read the listed files, find similar implementations) before planning.
|
||||||
(e.g. inspect the relevant source directories, find files similar to what this
|
Follow existing patterns rather than introducing new ones.
|
||||||
story touches, and follow existing patterns rather than introducing new ones).
|
|
||||||
|
|
||||||
## Required Output
|
<repo-map>
|
||||||
|
$repo_map
|
||||||
|
</repo-map>
|
||||||
|
|
||||||
|
${revision_block}## Required Output
|
||||||
|
|
||||||
Output your implementation plan as a single JSON result block. Map EVERY
|
Output your implementation plan as a single JSON result block. Map EVERY
|
||||||
acceptance criterion in the story to the files/functions that will implement
|
acceptance criterion in the story to the files/functions that will implement
|
||||||
|
|
@ -146,11 +246,6 @@ DESIGN COMPLETE: $story_id"
|
||||||
# Log prompt size in verbose mode (consistent with other phases)
|
# Log prompt size in verbose mode (consistent with other phases)
|
||||||
log_prompt_size "$design_prompt" "design-phase"
|
log_prompt_size "$design_prompt" "design-phase"
|
||||||
|
|
||||||
if [ "$DRY_RUN" = true ]; then
|
|
||||||
echo "[DRY RUN] Would execute design phase for $story_id"
|
|
||||||
return 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Pipe to file to avoid memory bloat
|
# Pipe to file to avoid memory bloat
|
||||||
run_claude_to_file "$design_prompt"
|
run_claude_to_file "$design_prompt"
|
||||||
local result
|
local result
|
||||||
|
|
@ -158,7 +253,7 @@ DESIGN COMPLETE: $story_id"
|
||||||
|
|
||||||
# Extract the JSON plan using the shared parser (falls back to legacy
|
# Extract the JSON plan using the shared parser (falls back to legacy
|
||||||
# text scraping if no JSON block is present, e.g. older models).
|
# text scraping if no JSON block is present, e.g. older models).
|
||||||
local json=""
|
json=""
|
||||||
if type extract_json_result >/dev/null 2>&1; then
|
if type extract_json_result >/dev/null 2>&1; then
|
||||||
json=$(extract_json_result "$result")
|
json=$(extract_json_result "$result")
|
||||||
fi
|
fi
|
||||||
|
|
@ -183,6 +278,44 @@ DESIGN COMPLETE: $story_id"
|
||||||
return 1
|
return 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Critic disabled or no attempts budgeted - accept the first plan
|
||||||
|
if [ "${SKIP_DESIGN_CRITIC:-false}" = true ] || [ "$max_attempts" -le 0 ]; then
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Run the critic against the plan
|
||||||
|
run_design_critic "$story_file" "$story_id" "$arch_file" "$LAST_DESIGN"
|
||||||
|
local verdict=$?
|
||||||
|
|
||||||
|
if [ "$verdict" -ne 1 ]; then
|
||||||
|
# Approved (0) or unclear (2) - accept the current plan
|
||||||
|
if [ "$verdict" -eq 2 ]; then
|
||||||
|
log_warn "Design critic result unclear for $story_id - accepting current plan"
|
||||||
|
elif [ "$VERBOSE" = true ]; then
|
||||||
|
log "Design critic approved the plan: $story_id"
|
||||||
|
fi
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
|
||||||
|
# verdict == 1: revision requested
|
||||||
|
if [ -z "$DESIGN_CRITIC_GAPS" ]; then
|
||||||
|
log_warn "Design critic requested revision but listed no actionable gaps - proceeding"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$attempt" -ge "$max_attempts" ]; then
|
||||||
|
log_warn "Design still has gaps after $attempt critic attempt(s) for $story_id - proceeding with documented gaps"
|
||||||
|
if type add_metrics_issue >/dev/null 2>&1; then
|
||||||
|
add_metrics_issue "$story_id" "design_critic_gaps" "Unresolved design gaps after $attempt critic attempt(s)"
|
||||||
|
fi
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
|
||||||
|
log_warn "Design critic requested revisions (attempt $attempt of $max_attempts) for $story_id - regenerating plan"
|
||||||
|
feedback="$DESIGN_CRITIC_GAPS"
|
||||||
|
attempt=$((attempt + 1))
|
||||||
|
done
|
||||||
|
|
||||||
# Persist the plan to a per-story file so the dev phase can read it
|
# Persist the plan to a per-story file so the dev phase can read it
|
||||||
# even after a resume (when the in-memory LAST_DESIGN is empty).
|
# even after a resume (when the in-memory LAST_DESIGN is empty).
|
||||||
persist_design "$story_id" "$LAST_DESIGN"
|
persist_design "$story_id" "$LAST_DESIGN"
|
||||||
|
|
@ -199,6 +332,111 @@ DESIGN COMPLETE: $story_id"
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Run a fresh-context critic pass over a proposed design plan (#4).
|
||||||
|
# The critic checks two things: (a) does the plan map every acceptance
|
||||||
|
# criterion, and (b) does it conform to the architecture. Gaps are stored in
|
||||||
|
# DESIGN_CRITIC_GAPS for feedback into a regeneration pass.
|
||||||
|
# Arguments:
|
||||||
|
# $1 - story_file path
|
||||||
|
# $2 - story_id
|
||||||
|
# $3 - architecture file path (may be empty)
|
||||||
|
# $4 - the proposed plan (JSON or text)
|
||||||
|
# Returns: 0 approved, 1 needs revision, 2 unclear
|
||||||
|
run_design_critic() {
|
||||||
|
local story_file="$1"
|
||||||
|
local story_id="$2"
|
||||||
|
local arch_file="$3"
|
||||||
|
local plan="$4"
|
||||||
|
|
||||||
|
DESIGN_CRITIC_GAPS=""
|
||||||
|
|
||||||
|
local story_contents
|
||||||
|
story_contents=$(cat "$story_file")
|
||||||
|
|
||||||
|
local critic_prompt="You are a skeptical senior engineer reviewing an implementation PLAN before any code is written.
|
||||||
|
|
||||||
|
## Your Task
|
||||||
|
|
||||||
|
Critique the proposed plan for story: $story_id
|
||||||
|
|
||||||
|
You are reviewing a PLAN, not code. Be rigorous. Decide whether the plan:
|
||||||
|
1. Maps EVERY acceptance criterion in the story to concrete files/functions
|
||||||
|
2. Conforms to the project's architecture
|
||||||
|
3. Is concrete and actionable (no vague hand-waving)
|
||||||
|
|
||||||
|
## Story
|
||||||
|
|
||||||
|
<story>
|
||||||
|
$story_contents
|
||||||
|
</story>
|
||||||
|
|
||||||
|
## Architecture Reference
|
||||||
|
|
||||||
|
**Read the architecture document at:** ${arch_file:-"(none found)"}
|
||||||
|
|
||||||
|
## Proposed Plan
|
||||||
|
|
||||||
|
<plan>
|
||||||
|
$plan
|
||||||
|
</plan>
|
||||||
|
|
||||||
|
## Required Output
|
||||||
|
|
||||||
|
Output a single JSON result block:
|
||||||
|
|
||||||
|
\`\`\`json
|
||||||
|
{
|
||||||
|
\"status\": \"APPROVED\" | \"NEEDS_REVISION\",
|
||||||
|
\"story_id\": \"$story_id\",
|
||||||
|
\"gaps\": [
|
||||||
|
{\"issue\": \"<what is missing or wrong>\", \"recommendation\": \"<how to fix it>\"}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
\`\`\`
|
||||||
|
|
||||||
|
Use APPROVED only if the plan covers every acceptance criterion and conforms to
|
||||||
|
the architecture. Otherwise use NEEDS_REVISION and list specific, actionable gaps.
|
||||||
|
|
||||||
|
## Completion Signal
|
||||||
|
|
||||||
|
After the JSON block, output exactly one of:
|
||||||
|
DESIGN CRITIC APPROVED: $story_id
|
||||||
|
DESIGN CRITIC NEEDS_REVISION: $story_id"
|
||||||
|
|
||||||
|
log_prompt_size "$critic_prompt" "design-critic"
|
||||||
|
|
||||||
|
run_claude_to_file "$critic_prompt"
|
||||||
|
local result
|
||||||
|
result=$(read_phase_tail)
|
||||||
|
|
||||||
|
# Parse verdict + gaps (JSON first, text fallback)
|
||||||
|
local status=""
|
||||||
|
local cjson=""
|
||||||
|
if type extract_json_result >/dev/null 2>&1; then
|
||||||
|
cjson=$(extract_json_result "$result")
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -n "$cjson" ] && command -v jq >/dev/null 2>&1; then
|
||||||
|
status=$(echo "$cjson" | jq -r '.status // empty' | tr '[:lower:]' '[:upper:]')
|
||||||
|
DESIGN_CRITIC_GAPS=$(echo "$cjson" | jq -r '.gaps[]? | "- \(.issue) -> \(.recommendation)"' 2>/dev/null || echo "")
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Text fallback if JSON missing/unparseable
|
||||||
|
if [ -z "$status" ]; then
|
||||||
|
if echo "$result" | grep -q "DESIGN CRITIC APPROVED"; then
|
||||||
|
status="APPROVED"
|
||||||
|
elif echo "$result" | grep -q "DESIGN CRITIC NEEDS_REVISION"; then
|
||||||
|
status="NEEDS_REVISION"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
case "$status" in
|
||||||
|
APPROVED) return 0 ;;
|
||||||
|
NEEDS_REVISION) return 1 ;;
|
||||||
|
*) return 2 ;;
|
||||||
|
esac
|
||||||
|
}
|
||||||
|
|
||||||
# Validate that the design plan maps every acceptance criterion in the story.
|
# Validate that the design plan maps every acceptance criterion in the story.
|
||||||
# This is advisory: it warns and records a metric but never fails the story
|
# This is advisory: it warns and records a metric but never fails the story
|
||||||
# (design is a non-blocking phase). AC extraction is heuristic since story
|
# (design is a non-blocking phase). AC extraction is heuristic since story
|
||||||
|
|
|
||||||
|
|
@ -936,6 +936,7 @@ OPTIONS:
|
||||||
|
|
||||||
TDD/Testing Options:
|
TDD/Testing Options:
|
||||||
--skip-design Skip pre-implementation design phase
|
--skip-design Skip pre-implementation design phase
|
||||||
|
--skip-design-critic Skip the design plan critic/revision loop
|
||||||
--skip-tdd Skip all test-first development phases
|
--skip-tdd Skip all test-first development phases
|
||||||
--skip-test-spec Skip test specification phase only
|
--skip-test-spec Skip test specification phase only
|
||||||
--skip-test-impl Skip test implementation phase only
|
--skip-test-impl Skip test implementation phase only
|
||||||
|
|
@ -976,6 +977,7 @@ ENVIRONMENT VARIABLES:
|
||||||
MAX_PROMPT_SIZE Maximum prompt size in bytes (default: 150000)
|
MAX_PROMPT_SIZE Maximum prompt size in bytes (default: 150000)
|
||||||
RETRY_MAX_ATTEMPTS Max retry attempts for transient failures (default: 3)
|
RETRY_MAX_ATTEMPTS Max retry attempts for transient failures (default: 3)
|
||||||
RETRY_INITIAL_DELAY Initial retry delay in seconds (default: 5)
|
RETRY_INITIAL_DELAY Initial retry delay in seconds (default: 5)
|
||||||
|
MAX_DESIGN_CRITIC_ATTEMPTS Max design plan revision rounds (default: 2)
|
||||||
|
|
||||||
FILES:
|
FILES:
|
||||||
Logs: docs/sprint-artifacts/logs/epic-<id>-<timestamp>.log
|
Logs: docs/sprint-artifacts/logs/epic-<id>-<timestamp>.log
|
||||||
|
|
@ -1005,6 +1007,7 @@ SKIP_TEST_QUALITY=false
|
||||||
SKIP_TRACEABILITY=false
|
SKIP_TRACEABILITY=false
|
||||||
SKIP_STATIC_ANALYSIS=false
|
SKIP_STATIC_ANALYSIS=false
|
||||||
SKIP_DESIGN=false
|
SKIP_DESIGN=false
|
||||||
|
SKIP_DESIGN_CRITIC=false
|
||||||
SKIP_REGRESSION=false
|
SKIP_REGRESSION=false
|
||||||
SKIP_TDD=false
|
SKIP_TDD=false
|
||||||
SKIP_TEST_SPEC=false
|
SKIP_TEST_SPEC=false
|
||||||
|
|
@ -1073,6 +1076,10 @@ while [[ $# -gt 0 ]]; do
|
||||||
SKIP_DESIGN=true
|
SKIP_DESIGN=true
|
||||||
shift
|
shift
|
||||||
;;
|
;;
|
||||||
|
--skip-design-critic)
|
||||||
|
SKIP_DESIGN_CRITIC=true
|
||||||
|
shift
|
||||||
|
;;
|
||||||
--skip-regression)
|
--skip-regression)
|
||||||
SKIP_REGRESSION=true
|
SKIP_REGRESSION=true
|
||||||
shift
|
shift
|
||||||
|
|
@ -1120,6 +1127,7 @@ if [ -z "$EPIC_ID" ]; then
|
||||||
echo " --skip-traceability Skip traceability check (not recommended)"
|
echo " --skip-traceability Skip traceability check (not recommended)"
|
||||||
echo " --skip-static-analysis Skip static analysis gate (runs real tooling)"
|
echo " --skip-static-analysis Skip static analysis gate (runs real tooling)"
|
||||||
echo " --skip-design Skip pre-implementation design phase"
|
echo " --skip-design Skip pre-implementation design phase"
|
||||||
|
echo " --skip-design-critic Skip the design plan critic/revision loop"
|
||||||
echo " --skip-regression Skip regression test gate"
|
echo " --skip-regression Skip regression test gate"
|
||||||
echo " --skip-tdd Skip test-first development phases"
|
echo " --skip-tdd Skip test-first development phases"
|
||||||
echo " --skip-test-spec Skip test specification phase only"
|
echo " --skip-test-spec Skip test specification phase only"
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue