diff --git a/src/core/tasks/workflow.xml b/src/core/tasks/workflow.xml index 402678fc..dc9c3dd8 100644 --- a/src/core/tasks/workflow.xml +++ b/src/core/tasks/workflow.xml @@ -63,6 +63,8 @@ invoke-workflow xml tag → Execute another workflow with given inputs and the workflow.xml runner invoke-task xml tag → Execute specified task invoke-protocol name="protocol_name" xml tag → Execute reusable protocol from protocols section + invoke-bash cmd="command" → Execute shell command, capture stdout/stderr, set {{bash_exit_code}}, {{bash_stdout}}, {{bash_stderr}} + set-var name="varname" value="..." → Set runtime variable {{varname}} to specified value (supports expressions) goto step="x" → Jump to specified step @@ -126,6 +128,8 @@ invoke-workflow - Call another workflow invoke-task - Call a task invoke-protocol - Execute a reusable protocol (e.g., discover_inputs) + invoke-bash cmd="..." - Execute shell command, results in {{bash_exit_code}}, {{bash_stdout}}, {{bash_stderr}} + set-var name="..." value="..." - Set runtime variable dynamically template-output - Save content checkpoint diff --git a/src/modules/bmm/module.yaml b/src/modules/bmm/module.yaml index ed988217..22fa7bfd 100644 --- a/src/modules/bmm/module.yaml +++ b/src/modules/bmm/module.yaml @@ -51,3 +51,23 @@ tea_use_playwright_utils: - "You must install packages yourself, or use test architect's *framework command." default: false result: "{value}" + +# External Code Review Agent Selection +# Allows delegating code reviews to an external AI agent CLI for independent, unbiased reviews +# Useful when using a different AI as primary IDE agent (e.g., Codex/Gemini users can use Claude for reviews) +external_review_agent: + prompt: + - "Which external agent should perform code reviews?" + - "External agents provide independent, unbiased reviews separate from your primary IDE agent." + - "The selected CLI must be installed and configured on your system." + default: "none" + result: "{value}" + single-select: + - value: "codex" + label: "Codex (OpenAI) - Code review using OpenAI Codex CLI" + - value: "gemini" + label: "Gemini (Google) - Code review using Google Gemini CLI" + - value: "claude" + label: "Claude Code (Anthropic) - Code review using Claude Code CLI" + - value: "none" + label: "None - Use built-in review (no external agent)" diff --git a/src/modules/bmm/workflows/4-implementation/code-review/checklist.md b/src/modules/bmm/workflows/4-implementation/code-review/checklist.md index f213a6b9..ea84a99d 100644 --- a/src/modules/bmm/workflows/4-implementation/code-review/checklist.md +++ b/src/modules/bmm/workflows/4-implementation/code-review/checklist.md @@ -1,5 +1,7 @@ # Senior Developer Review - Validation Checklist +## Story Setup + - [ ] Story file loaded from `{{story_path}}` - [ ] Story Status verified as reviewable (review) - [ ] Epic and Story IDs resolved ({{epic_num}}.{{story_num}}) @@ -7,12 +9,33 @@ - [ ] Epic Tech Spec located or warning recorded - [ ] Architecture/standards docs loaded (as available) - [ ] Tech stack detected and documented -- [ ] MCP doc search performed (or web fallback) and references captured + +## External Agent Detection (Runtime) + +- [ ] `invoke-bash cmd="command -v codex"` executed → {{codex_available}} +- [ ] `invoke-bash cmd="command -v gemini"` executed → {{gemini_available}} +- [ ] `invoke-bash cmd="command -v claude"` executed → {{claude_available}} +- [ ] Review method determined: {{use_external_agent}} = true/false +- [ ] If external: {{external_agent_cmd}} = codex OR gemini OR claude +- [ ] Config updated with detection results and timestamp + +## Code Review Execution + +- [ ] Git vs Story discrepancies identified ({{git_findings}}) +- [ ] If external agent available: Prompt written to /tmp/code-review-prompt.txt +- [ ] If external agent available: CLI invoked via `invoke-bash` (MANDATORY - NO EXCEPTIONS) +- [ ] External agent output captured in {{bash_stdout}} +- [ ] If external agent CLI failed (non-zero exit): Fallback to built-in review +- [ ] ⚠️ VIOLATION CHECK: Did you skip external agent with a rationalization? If yes, RE-RUN with external agent. - [ ] Acceptance Criteria cross-checked against implementation - [ ] File List reviewed and validated for completeness - [ ] Tests identified and mapped to ACs; gaps noted -- [ ] Code quality review performed on changed files -- [ ] Security review performed on changed files and dependencies +- [ ] Code quality review performed (security, performance, maintainability) +- [ ] Minimum 3 issues found (adversarial review requirement) + +## Finalization + +- [ ] Findings categorized: HIGH/MEDIUM/LOW severity - [ ] Outcome decided (Approve/Changes Requested/Blocked) - [ ] Review notes appended under "Senior Developer Review (AI)" - [ ] Change Log updated with review entry @@ -21,3 +44,4 @@ - [ ] Story saved successfully _Reviewer: {{user_name}} on {{date}}_ +_External Agent: {{external_agent_cmd}} (codex:{{codex_available}} / gemini:{{gemini_available}} / claude:{{claude_available}})_ diff --git a/src/modules/bmm/workflows/4-implementation/code-review/external-agent-prompt.md b/src/modules/bmm/workflows/4-implementation/code-review/external-agent-prompt.md new file mode 100644 index 00000000..1e3ace7e --- /dev/null +++ b/src/modules/bmm/workflows/4-implementation/code-review/external-agent-prompt.md @@ -0,0 +1,35 @@ +You are an ADVERSARIAL code reviewer. Your job is to find problems, not approve code. + +VERY IMPORTANT! + +- This is a READ ONLY operation. You are not to change anything in this code. +- You are FORBIDDEN to write to any files. +- You are FORBIDDEN to change any files. +- You are FORBIDDEN to delete any files. + +REQUIREMENTS: + +- Find 3-10 specific issues minimum - no lazy looks good reviews +- Categorize as HIGH (must fix), MEDIUM (should fix), LOW (nice to fix) +- For each issue: specify file:line, describe problem, suggest fix +- Check: Security vulnerabilities, performance issues, error handling, test quality +- Verify: Tasks marked [x] are actually done, ACs are actually implemented + +STORY CONTEXT: {{story_path}} +FILES TO REVIEW: {{comprehensive_file_list}} +ACCEPTANCE CRITERIA: {{acceptance_criteria_list}} +TASKS: {{task_list}} + +OUTPUT FORMAT: + +## HIGH SEVERITY + +- [file:line] Issue description | Suggested fix + +## MEDIUM SEVERITY + +- [file:line] Issue description | Suggested fix + +## LOW SEVERITY + +- [file:line] Issue description | Suggested fix diff --git a/src/modules/bmm/workflows/4-implementation/code-review/instructions.xml b/src/modules/bmm/workflows/4-implementation/code-review/instructions.xml index 3c5a69e6..5436efe0 100644 --- a/src/modules/bmm/workflows/4-implementation/code-review/instructions.xml +++ b/src/modules/bmm/workflows/4-implementation/code-review/instructions.xml @@ -4,16 +4,56 @@ Communicate all responses in {communication_language} and language MUST be tailored to {user_skill_level} Generate all documents in {document_output_language} - 🔥 YOU ARE AN ADVERSARIAL CODE REVIEWER - Find what's wrong or missing! 🔥 + + + + + 🚨 FIRST ACTION REQUIRED: Kill all background shells 🚨 + Before ANY other workflow step, you MUST terminate all background processes. + DO NOT use BashOutput to read from stale shells - this causes context pollution. + DO NOT try to "check what they're doing" - just kill them. + + + IMMEDIATELY use KillShell to terminate ALL background shells - no exceptions + Do NOT read output from these shells first - kill them blind + Do NOT rationalize keeping them alive - they contain stale data + If multiple shells exist, kill ALL of them one by one + 🔪 Killed all background shells to prevent stale data contamination + + Only proceed to Step 1 AFTER all background shells are terminated. + + + + + 🚨 MANDATORY EXTERNAL AGENT RULE - NO EXCEPTIONS 🚨 + If an external agent CLI (codex, gemini, or claude) is detected as available, you MUST delegate the code review to that agent. + You are FORBIDDEN from performing your own code review analysis if an external agent is available. + The external agent provides an independent, unbiased review. Your job is to INVOKE it, not replace it. + Only perform built-in review if ALL external agents fail detection OR the CLI invocation actually fails with a non-zero exit code. + DO NOT SKIP the invoke-bash commands for detection and invocation - they are MANDATORY. + + + 🚫 PROHIBITED RATIONALIZATIONS - You may NOT skip the external agent for ANY of these reasons: + ❌ "The prompt is too long" - Long prompts are expected and supported. Invoke anyway. + ❌ "CLI is meant for simple operations" - FALSE. The CLI handles complex prompts. Invoke anyway. + ❌ "This is a re-review" - Re-reviews MUST use external agent. No exception. + ❌ "I can do this myself" - You are FORBIDDEN from self-review when external agent is available. + ❌ "It would be faster/better if I do it" - Irrelevant. External agent is MANDATORY. + ❌ "The context is too complex" - The external agent handles complexity. Invoke anyway. + If you find yourself rationalizing why to skip the external agent, STOP and invoke it anyway. + + 🔥 ADVERSARIAL CODE REVIEW REQUIREMENTS 🔥 Your purpose: Validate story file claims against actual implementation Challenge everything: Are tasks marked [x] actually done? Are ACs really implemented? - Find 3-10 specific issues in every review minimum - no lazy "looks good" reviews - YOU are so much better than the dev agent - that wrote this slop + Find 3-10 specific issues in every review minimum - no lazy "looks good" reviews Read EVERY file in the File List - verify implementation against story requirements Tasks marked complete but not done = CRITICAL finding Acceptance Criteria not implemented = HIGH severity finding - + Use provided {{story_path}} or ask user which story file to review Read COMPLETE story file Set {{story_key}} = extracted key from filename (e.g., "1-2-user-authentication.md" → "1-2-user-authentication") or story metadata @@ -38,6 +78,86 @@ Load {project_context} for coding standards (if exists) + + + + + + + + + + + + + + + 📋 External agent disabled in config - will use built-in adversarial review + + + + + 🔍 Detecting external agent availability... + + + + + + ✓ Codex CLI detected + + + + + + + ✓ Gemini CLI detected + + + + + + + ✓ Claude CLI detected + + + + + + + + + + + + + + + + + + + + + ⚠️ Preferred agent ({{preferred_agent}}) not available, falling back to Codex + + + + + ⚠️ Preferred agent ({{preferred_agent}}) not available, falling back to Gemini + + + + + ⚠️ Preferred agent ({{preferred_agent}}) not available, falling back to Claude + + + + 🤖 External agent selected: {{external_agent_cmd}} - will delegate code review + + + 📋 No external agent available - will use built-in adversarial review + + @@ -56,41 +176,105 @@ VALIDATE EVERY CLAIM - Check git reality vs story claims - + Review git vs story File List discrepancies: 1. **Files changed but not in story File List** → MEDIUM finding (incomplete documentation) 2. **Story lists files but no git changes** → HIGH finding (false claims) 3. **Uncommitted changes not documented** → MEDIUM finding (transparency issue) - - Create comprehensive review file list from story File List and git changes + Store git discrepancy findings in {{git_findings}} - - For EACH Acceptance Criterion: - 1. Read the AC requirement - 2. Search implementation files for evidence - 3. Determine: IMPLEMENTED, PARTIAL, or MISSING - 4. If MISSING/PARTIAL → HIGH SEVERITY finding - + + + + If {{use_external_agent}} == true, you MUST invoke the external agent via CLI. + DO NOT perform your own code review - delegate to the external agent. - - For EACH task marked [x]: - 1. Read the task description - 2. Search files for evidence it was actually done - 3. **CRITICAL**: If marked [x] but NOT DONE → CRITICAL finding - 4. Record specific proof (file:line) - + + 🔄 Invoking {{external_agent_cmd}} CLI for adversarial code review... - - For EACH file in comprehensive review list: - 1. **Security**: Look for injection risks, missing validation, auth issues - 2. **Performance**: N+1 queries, inefficient loops, missing caching - 3. **Error Handling**: Missing try/catch, poor error messages - 4. **Code Quality**: Complex functions, magic numbers, poor naming - 5. **Test Quality**: Are tests real assertions or placeholders? - + + + + 🚨 USE EXACT COMMAND SYNTAX - DO NOT MODIFY OR SIMPLIFY 🚨 + Copy the invoke-bash cmd attribute EXACTLY as written below. + DO NOT remove flags, reorder arguments, or "improve" the command. + + + Load {{external_prompt_file}} content into {{external_prompt}} + + + CODEX: Use codex exec with read-only sandbox and full-auto + + + + GEMINI: Use gemini -p with prompt from file and --yolo + + + + CLAUDE: Use claude -p with prompt from file + + + + + ⚠️ External agent CLI failed (exit code: {{bash_exit_code}}), falling back to built-in review + Error: {{bash_stderr}} + + + + + + + Parse {{external_findings}} into structured HIGH/MEDIUM/LOW lists + Merge {{git_findings}} with {{external_findings}} into {{all_findings}} + ✅ External review complete - {{external_agent_cmd}} CLI findings received + + + + + + + + + + + + + This section should ONLY execute if ALL external agents failed detection or invocation. + If you are here but an external agent was available, you have violated the workflow rules. + ⚠️ No external agent available - performing built-in adversarial review + + + For EACH Acceptance Criterion: + 1. Read the AC requirement + 2. Search implementation files for evidence + 3. Determine: IMPLEMENTED, PARTIAL, or MISSING + 4. If MISSING/PARTIAL → HIGH SEVERITY finding + + + + For EACH task marked [x]: + 1. Read the task description + 2. Search files for evidence it was actually done + 3. **CRITICAL**: If marked [x] but NOT DONE → CRITICAL finding + 4. Record specific proof (file:line) + + + + For EACH file in comprehensive review list: + 1. **Security**: Look for injection risks, missing validation, auth issues + 2. **Performance**: N+1 queries, inefficient loops, missing caching + 3. **Error Handling**: Missing try/catch, poor error messages + 4. **Code Quality**: Complex functions, magic numbers, poor naming + 5. **Test Quality**: Are tests real assertions or placeholders? + + + Merge {{git_findings}} with built-in findings into {{all_findings}} + + + NOT LOOKING HARD ENOUGH - Find more problems! Re-examine code for: @@ -113,6 +297,7 @@ **🔥 CODE REVIEW FINDINGS, {user_name}!** **Story:** {{story_file}} + **Review Method:** {{external_agent_cmd}} OR built-in **Git vs Story Discrepancies:** {{git_discrepancy_count}} found **Issues Found:** {{high_count}} High, {{medium_count}} Medium, {{low_count}} Low @@ -185,7 +370,7 @@ Set {{current_sprint_status}} = "no-sprint-tracking" - + Load the FULL file: {sprint_status} Find development_status key matching {{story_key}} @@ -221,4 +406,4 @@ - \ No newline at end of file + diff --git a/src/modules/bmm/workflows/4-implementation/code-review/workflow.yaml b/src/modules/bmm/workflows/4-implementation/code-review/workflow.yaml index c148ef89..8cdc0fe1 100644 --- a/src/modules/bmm/workflows/4-implementation/code-review/workflow.yaml +++ b/src/modules/bmm/workflows/4-implementation/code-review/workflow.yaml @@ -18,6 +18,7 @@ sprint_status: "{sprint_artifacts}/sprint-status.yaml || {output_folder}/sprint- installed_path: "{project-root}/.bmad/bmm/workflows/4-implementation/code-review" instructions: "{installed_path}/instructions.xml" validation: "{installed_path}/checklist.md" +external_agent_prompt: "{installed_path}/external-agent-prompt.md" template: false variables: @@ -25,6 +26,11 @@ variables: project_context: "**/project-context.md" story_dir: "{sprint_artifacts}" + # External code review agent configuration + # User selects preferred agent during install; detection verifies availability at runtime + # Supported values: codex, gemini, claude, none + external_review_agent: "{config_source}:external_review_agent || 'none'" + # Smart input file references - handles both whole docs and sharded docs # Priority: Whole document first, then sharded version # Strategy: SELECTIVE LOAD - only load the specific epic needed for this story review @@ -51,4 +57,3 @@ input_file_patterns: load_strategy: "INDEX_GUIDED" standalone: true -web_bundle: false