From 7509b0cbc2f8bed420b70e92d476078ddcb5c3a1 Mon Sep 17 00:00:00 2001 From: Scott Jennings Date: Sun, 7 Dec 2025 19:19:03 -0600 Subject: [PATCH] feat: add external agent support for code reviews Adds support for delegating adversarial code reviews to external CLI agents (Codex, Gemini, or Claude) when available. This provides independent, unbiased code reviews from a different AI model. Changes: - Add invoke-bash and set-var tags to workflow.xml execution engine - Add external_review_agents configuration to install-config.yaml - Rewrite code-review workflow to detect and invoke external agents - Cache agent detection in config.yaml to avoid repeated CLI checks - Add fallback to built-in review if external agents unavailable/fail - Update checklist to reflect new external agent workflow External agent invocation: - Codex: codex exec --full-auto "prompt" - Gemini: gemini -p "prompt" --yolo - Claude: claude -p "prompt" --dangerously-skip-permissions --- src/core/tasks/workflow.xml | 4 + src/modules/bmm/module.yaml | 32 ++ .../4-implementation/code-review/checklist.md | 30 +- .../code-review/instructions.xml | 316 ++++++++++++++++-- .../code-review/workflow.yaml | 11 +- 5 files changed, 356 insertions(+), 37 deletions(-) diff --git a/src/core/tasks/workflow.xml b/src/core/tasks/workflow.xml index 69f94e5a..04c3cc94 100644 --- a/src/core/tasks/workflow.xml +++ b/src/core/tasks/workflow.xml @@ -63,6 +63,8 @@ invoke-workflow xml tag → Execute another workflow with given inputs and the workflow.xml runner invoke-task xml tag → Execute specified task invoke-protocol name="protocol_name" xml tag → Execute reusable protocol from protocols section + invoke-bash cmd="command" → Execute shell command, capture stdout/stderr, set {{bash_exit_code}}, {{bash_stdout}}, {{bash_stderr}} + set-var name="varname" value="..." → Set runtime variable {{varname}} to specified value (supports expressions) goto step="x" → Jump to specified step @@ -126,6 +128,8 @@ invoke-workflow - Call another workflow invoke-task - Call a task invoke-protocol - Execute a reusable protocol (e.g., discover_inputs) + invoke-bash cmd="..." - Execute shell command, results in {{bash_exit_code}}, {{bash_stdout}}, {{bash_stderr}} + set-var name="..." value="..." - Set runtime variable dynamically template-output - Save content checkpoint diff --git a/src/modules/bmm/module.yaml b/src/modules/bmm/module.yaml index 5803e965..9ac9f606 100644 --- a/src/modules/bmm/module.yaml +++ b/src/modules/bmm/module.yaml @@ -52,3 +52,35 @@ tea_use_playwright_utils: - "You must install packages yourself, or use test architect's *framework command." default: false result: "{value}" + +# External Code Review Agents Configuration +# These are auto-detected at runtime, but user can set preference here +# Useful when using a different AI as primary IDE agent (e.g., Codex/Gemini users can use Claude for reviews) +external_review_agents: + codex_available: + prompt: false # Auto-detected at runtime + default: false + result: "{value}" + gemini_available: + prompt: false # Auto-detected at runtime + default: false + result: "{value}" + claude_available: + prompt: false # Auto-detected at runtime + default: false + result: "{value}" + preferred_agent: + prompt: "Which external code review agent do you prefer (if multiple are available)?" + default: "codex" + result: "{value}" + single-select: + - value: "codex" + label: "Codex (OpenAI) - Fast code review with OpenAI models" + - value: "gemini" + label: "Gemini (Google) - Code review with Google models" + - value: "claude" + label: "Claude (Anthropic) - Code review with Claude models (good for Codex/Gemini users)" + last_checked: + prompt: false # System-managed timestamp + default: null + result: "{value}" diff --git a/src/modules/bmm/workflows/4-implementation/code-review/checklist.md b/src/modules/bmm/workflows/4-implementation/code-review/checklist.md index f213a6b9..ea84a99d 100644 --- a/src/modules/bmm/workflows/4-implementation/code-review/checklist.md +++ b/src/modules/bmm/workflows/4-implementation/code-review/checklist.md @@ -1,5 +1,7 @@ # Senior Developer Review - Validation Checklist +## Story Setup + - [ ] Story file loaded from `{{story_path}}` - [ ] Story Status verified as reviewable (review) - [ ] Epic and Story IDs resolved ({{epic_num}}.{{story_num}}) @@ -7,12 +9,33 @@ - [ ] Epic Tech Spec located or warning recorded - [ ] Architecture/standards docs loaded (as available) - [ ] Tech stack detected and documented -- [ ] MCP doc search performed (or web fallback) and references captured + +## External Agent Detection (Runtime) + +- [ ] `invoke-bash cmd="command -v codex"` executed → {{codex_available}} +- [ ] `invoke-bash cmd="command -v gemini"` executed → {{gemini_available}} +- [ ] `invoke-bash cmd="command -v claude"` executed → {{claude_available}} +- [ ] Review method determined: {{use_external_agent}} = true/false +- [ ] If external: {{external_agent_cmd}} = codex OR gemini OR claude +- [ ] Config updated with detection results and timestamp + +## Code Review Execution + +- [ ] Git vs Story discrepancies identified ({{git_findings}}) +- [ ] If external agent available: Prompt written to /tmp/code-review-prompt.txt +- [ ] If external agent available: CLI invoked via `invoke-bash` (MANDATORY - NO EXCEPTIONS) +- [ ] External agent output captured in {{bash_stdout}} +- [ ] If external agent CLI failed (non-zero exit): Fallback to built-in review +- [ ] ⚠️ VIOLATION CHECK: Did you skip external agent with a rationalization? If yes, RE-RUN with external agent. - [ ] Acceptance Criteria cross-checked against implementation - [ ] File List reviewed and validated for completeness - [ ] Tests identified and mapped to ACs; gaps noted -- [ ] Code quality review performed on changed files -- [ ] Security review performed on changed files and dependencies +- [ ] Code quality review performed (security, performance, maintainability) +- [ ] Minimum 3 issues found (adversarial review requirement) + +## Finalization + +- [ ] Findings categorized: HIGH/MEDIUM/LOW severity - [ ] Outcome decided (Approve/Changes Requested/Blocked) - [ ] Review notes appended under "Senior Developer Review (AI)" - [ ] Change Log updated with review entry @@ -21,3 +44,4 @@ - [ ] Story saved successfully _Reviewer: {{user_name}} on {{date}}_ +_External Agent: {{external_agent_cmd}} (codex:{{codex_available}} / gemini:{{gemini_available}} / claude:{{claude_available}})_ diff --git a/src/modules/bmm/workflows/4-implementation/code-review/instructions.xml b/src/modules/bmm/workflows/4-implementation/code-review/instructions.xml index bf8b7d69..a3d624a4 100644 --- a/src/modules/bmm/workflows/4-implementation/code-review/instructions.xml +++ b/src/modules/bmm/workflows/4-implementation/code-review/instructions.xml @@ -4,16 +4,35 @@ Communicate all responses in {communication_language} and language MUST be tailored to {user_skill_level} Generate all documents in {document_output_language} - 🔥 YOU ARE AN ADVERSARIAL CODE REVIEWER - Find what's wrong or missing! 🔥 + + + + 🚨 MANDATORY EXTERNAL AGENT RULE - NO EXCEPTIONS 🚨 + If an external agent CLI (codex, gemini, or claude) is detected as available, you MUST delegate the code review to that agent. + You are FORBIDDEN from performing your own code review analysis if an external agent is available. + The external agent provides an independent, unbiased review. Your job is to INVOKE it, not replace it. + Only perform built-in review if ALL external agents fail detection OR the CLI invocation actually fails with a non-zero exit code. + DO NOT SKIP the invoke-bash commands for detection and invocation - they are MANDATORY. + + + 🚫 PROHIBITED RATIONALIZATIONS - You may NOT skip the external agent for ANY of these reasons: + ❌ "The prompt is too long" - Long prompts are expected and supported. Invoke anyway. + ❌ "CLI is meant for simple operations" - FALSE. The CLI handles complex prompts. Invoke anyway. + ❌ "This is a re-review" - Re-reviews MUST use external agent. No exception. + ❌ "I can do this myself" - You are FORBIDDEN from self-review when external agent is available. + ❌ "It would be faster/better if I do it" - Irrelevant. External agent is MANDATORY. + ❌ "The context is too complex" - The external agent handles complexity. Invoke anyway. + If you find yourself rationalizing why to skip the external agent, STOP and invoke it anyway. + + 🔥 ADVERSARIAL CODE REVIEW REQUIREMENTS 🔥 Your purpose: Validate story file claims against actual implementation Challenge everything: Are tasks marked [x] actually done? Are ACs really implemented? - Find 3-10 specific issues in every review minimum - no lazy "looks good" reviews - YOU are so much better than the dev agent - that wrote this slop + Find 3-10 specific issues in every review minimum - no lazy "looks good" reviews Read EVERY file in the File List - verify implementation against story requirements Tasks marked complete but not done = CRITICAL finding Acceptance Criteria not implemented = HIGH severity finding - + Use provided {{story_path}} or ask user which story file to review Read COMPLETE story file Set {{story_key}} = extracted key from filename (e.g., "1-2-user-authentication.md" → "1-2-user-authentication") or story metadata @@ -38,6 +57,114 @@ Load {project_context} for coding standards (if exists) + + + + + + + + + + + + + + + + + + + 📋 Using cached agent detection from config.yaml + Codex: {{codex_available}}, Gemini: {{gemini_available}}, Claude: {{claude_available}} + + + + + 🔍 No cached detection found - detecting available agents... + + + + + + ✓ Codex CLI detected + + + + + + + ✓ Gemini CLI detected + + + + + + + ✓ Claude CLI detected + + + + + 📝 Config updated with detection results + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 🤖 External agent selected: {{external_agent_cmd}} - will delegate code review + + + 📋 No external agent available - will use built-in adversarial review + @@ -56,41 +183,167 @@ VALIDATE EVERY CLAIM - Check git reality vs story claims - + Review git vs story File List discrepancies: 1. **Files changed but not in story File List** → MEDIUM finding (incomplete documentation) 2. **Story lists files but no git changes** → HIGH finding (false claims) 3. **Uncommitted changes not documented** → MEDIUM finding (transparency issue) - - Create comprehensive review file list from story File List and git changes + Store git discrepancy findings in {{git_findings}} - - For EACH Acceptance Criterion: - 1. Read the AC requirement - 2. Search implementation files for evidence - 3. Determine: IMPLEMENTED, PARTIAL, or MISSING - 4. If MISSING/PARTIAL → HIGH SEVERITY finding - + + + + If {{use_external_agent}} == true, you MUST invoke the external agent via CLI. + DO NOT perform your own code review - delegate to the external agent. - - For EACH task marked [x]: - 1. Read the task description - 2. Search files for evidence it was actually done - 3. **CRITICAL**: If marked [x] but NOT DONE → CRITICAL finding - 4. Record specific proof (file:line) - + + 🔄 Invoking {{external_agent_cmd}} CLI for adversarial code review... - - For EACH file in comprehensive review list: - 1. **Security**: Look for injection risks, missing validation, auth issues - 2. **Performance**: N+1 queries, inefficient loops, missing caching - 3. **Error Handling**: Missing try/catch, poor error messages - 4. **Code Quality**: Complex functions, magic numbers, poor naming - 5. **Test Quality**: Are tests real assertions or placeholders? - + + + + 🚨 USE EXACT COMMAND SYNTAX - DO NOT MODIFY OR SIMPLIFY 🚨 + Copy the invoke-bash cmd attribute EXACTLY as written below. + DO NOT remove flags, reorder arguments, or "improve" the command. + + CODEX: Use codex exec --full-auto with inline prompt + + + + GEMINI: Use gemini -p with inline prompt and --yolo + + + + CLAUDE: Use claude -p with inline prompt + + + + + ⚠️ External agent CLI failed (exit code: {{bash_exit_code}}), falling back to built-in review + Error: {{bash_stderr}} + + + + + + + Parse {{external_findings}} into structured HIGH/MEDIUM/LOW lists + Merge {{git_findings}} with {{external_findings}} into {{all_findings}} + ✅ External review complete - {{external_agent_cmd}} CLI findings received + + + + + + + + + + + + + This section should ONLY execute if ALL external agents failed detection or invocation. + If you are here but an external agent was available, you have violated the workflow rules. + ⚠️ No external agent available - performing built-in adversarial review + + + For EACH Acceptance Criterion: + 1. Read the AC requirement + 2. Search implementation files for evidence + 3. Determine: IMPLEMENTED, PARTIAL, or MISSING + 4. If MISSING/PARTIAL → HIGH SEVERITY finding + + + + For EACH task marked [x]: + 1. Read the task description + 2. Search files for evidence it was actually done + 3. **CRITICAL**: If marked [x] but NOT DONE → CRITICAL finding + 4. Record specific proof (file:line) + + + + For EACH file in comprehensive review list: + 1. **Security**: Look for injection risks, missing validation, auth issues + 2. **Performance**: N+1 queries, inefficient loops, missing caching + 3. **Error Handling**: Missing try/catch, poor error messages + 4. **Code Quality**: Complex functions, magic numbers, poor naming + 5. **Test Quality**: Are tests real assertions or placeholders? + + + Merge {{git_findings}} with built-in findings into {{all_findings}} + + + NOT LOOKING HARD ENOUGH - Find more problems! Re-examine code for: @@ -113,6 +366,7 @@ **🔥 CODE REVIEW FINDINGS, {user_name}!** **Story:** {{story_file}} + **Review Method:** {{external_agent_cmd}} OR built-in **Git vs Story Discrepancies:** {{git_discrepancy_count}} found **Issues Found:** {{high_count}} High, {{medium_count}} Medium, {{low_count}} Low @@ -185,7 +439,7 @@ Set {{current_sprint_status}} = "no-sprint-tracking" - + Load the FULL file: {sprint_status} Find development_status key matching {{story_key}} @@ -221,4 +475,4 @@ - \ No newline at end of file + diff --git a/src/modules/bmm/workflows/4-implementation/code-review/workflow.yaml b/src/modules/bmm/workflows/4-implementation/code-review/workflow.yaml index c055db20..522b7f39 100644 --- a/src/modules/bmm/workflows/4-implementation/code-review/workflow.yaml +++ b/src/modules/bmm/workflows/4-implementation/code-review/workflow.yaml @@ -4,7 +4,7 @@ description: "Perform an ADVERSARIAL Senior Developer code review that finds 3-1 author: "BMad" # Critical variables from config -config_source: "{project-root}/{bmad_folder}/bmm/config.yaml" +config_source: "{project-root}/.bmad/bmm/config.yaml" output_folder: "{config_source}:output_folder" user_name: "{config_source}:user_name" communication_language: "{config_source}:communication_language" @@ -15,7 +15,7 @@ sprint_artifacts: "{config_source}:sprint_artifacts" sprint_status: "{sprint_artifacts}/sprint-status.yaml || {output_folder}/sprint-status.yaml" # Workflow components -installed_path: "{project-root}/{bmad_folder}/bmm/workflows/4-implementation/code-review" +installed_path: "{project-root}/.bmad/bmm/workflows/4-implementation/code-review" instructions: "{installed_path}/instructions.xml" validation: "{installed_path}/checklist.md" template: false @@ -25,6 +25,12 @@ variables: project_context: "**/project-context.md" story_dir: "{sprint_artifacts}" + # External code review agents configuration + # Note: codex_available and gemini_available are auto-detected at runtime via invoke-bash + # The workflow uses runtime variables {{codex_available}}, {{gemini_available}}, {{use_external_agent}}, {{external_agent_cmd}} + external_review_agents: + preferred_agent: "{config_source}:external_review_agents.preferred_agent || 'codex'" + # Smart input file references - handles both whole docs and sharded docs # Priority: Whole document first, then sharded version # Strategy: SELECTIVE LOAD - only load the specific epic needed for this story review @@ -51,4 +57,3 @@ input_file_patterns: load_strategy: "INDEX_GUIDED" standalone: true -web_bundle: false