From 7509b0cbc2f8bed420b70e92d476078ddcb5c3a1 Mon Sep 17 00:00:00 2001
From: Scott Jennings <scott.jennings+CIGINT@cloudimperiumgames.com>
Date: Sun, 7 Dec 2025 19:19:03 -0600
Subject: [PATCH 1/7] feat: add external agent support for code reviews

Adds support for delegating adversarial code reviews to external CLI agents
(Codex, Gemini, or Claude) when available. This provides independent, unbiased
code reviews from a different AI model.

Changes:
- Add invoke-bash and set-var tags to workflow.xml execution engine
- Add external_review_agents configuration to install-config.yaml
- Rewrite code-review workflow to detect and invoke external agents
- Cache agent detection in config.yaml to avoid repeated CLI checks
- Add fallback to built-in review if external agents unavailable/fail
- Update checklist to reflect new external agent workflow

External agent invocation:
- Codex: codex exec --full-auto "prompt"
- Gemini: gemini -p "prompt" --yolo
- Claude: claude -p "prompt" --dangerously-skip-permissions
---
 src/core/tasks/workflow.xml                   |   4 +
 src/modules/bmm/module.yaml                   |  32 ++
 .../4-implementation/code-review/checklist.md |  30 +-
 .../code-review/instructions.xml              | 316 ++++++++++++++++--
 .../code-review/workflow.yaml                 |  11 +-
 5 files changed, 356 insertions(+), 37 deletions(-)
diff --git a/src/core/tasks/workflow.xml b/src/core/tasks/workflow.xml
index 69f94e5a..04c3cc94 100644
--- a/src/core/tasks/workflow.xml
+++ b/src/core/tasks/workflow.xml
@@ -63,6 +63,8 @@
           <tag>invoke-workflow xml tag → Execute another workflow with given inputs and the workflow.xml runner</tag>
           <tag>invoke-task xml tag → Execute specified task</tag>
           <tag>invoke-protocol name="protocol_name" xml tag → Execute reusable protocol from protocols section</tag>
+          <tag>invoke-bash cmd="command" → Execute shell command, capture stdout/stderr, set {{bash_exit_code}}, {{bash_stdout}}, {{bash_stderr}}</tag>
+          <tag>set-var name="varname" value="..." → Set runtime variable {{varname}} to specified value (supports expressions)</tag>
           <tag>goto step="x" → Jump to specified step</tag>
         </execute-tags>
       </substep>
@@ -126,6 +128,8 @@
       <tag>invoke-workflow - Call another workflow</tag>
       <tag>invoke-task - Call a task</tag>
       <tag>invoke-protocol - Execute a reusable protocol (e.g., discover_inputs)</tag>
+      <tag>invoke-bash cmd="..." - Execute shell command, results in {{bash_exit_code}}, {{bash_stdout}}, {{bash_stderr}}</tag>
+      <tag>set-var name="..." value="..." - Set runtime variable dynamically</tag>
     </execution>
     <output>
       <tag>template-output - Save content checkpoint</tag>
diff --git a/src/modules/bmm/module.yaml b/src/modules/bmm/module.yaml
index 5803e965..9ac9f606 100644
--- a/src/modules/bmm/module.yaml
+++ b/src/modules/bmm/module.yaml
@@ -52,3 +52,35 @@ tea_use_playwright_utils:
     - "You must install packages yourself, or use test architect's *framework command."
   default: false
   result: "{value}"
+
+# External Code Review Agents Configuration
+# These are auto-detected at runtime, but user can set preference here
+# Useful when using a different AI as primary IDE agent (e.g., Codex/Gemini users can use Claude for reviews)
+external_review_agents:
+  codex_available:
+    prompt: false # Auto-detected at runtime
+    default: false
+    result: "{value}"
+  gemini_available:
+    prompt: false # Auto-detected at runtime
+    default: false
+    result: "{value}"
+  claude_available:
+    prompt: false # Auto-detected at runtime
+    default: false
+    result: "{value}"
+  preferred_agent:
+    prompt: "Which external code review agent do you prefer (if multiple are available)?"
+    default: "codex"
+    result: "{value}"
+    single-select:
+      - value: "codex"
+        label: "Codex (OpenAI) - Fast code review with OpenAI models"
+      - value: "gemini"
+        label: "Gemini (Google) - Code review with Google models"
+      - value: "claude"
+        label: "Claude (Anthropic) - Code review with Claude models (good for Codex/Gemini users)"
+  last_checked:
+    prompt: false # System-managed timestamp
+    default: null
+    result: "{value}"
diff --git a/src/modules/bmm/workflows/4-implementation/code-review/checklist.md b/src/modules/bmm/workflows/4-implementation/code-review/checklist.md
index f213a6b9..ea84a99d 100644
--- a/src/modules/bmm/workflows/4-implementation/code-review/checklist.md
+++ b/src/modules/bmm/workflows/4-implementation/code-review/checklist.md
@@ -1,5 +1,7 @@
 # Senior Developer Review - Validation Checklist
 
+## Story Setup
+
 - [ ] Story file loaded from `{{story_path}}`
 - [ ] Story Status verified as reviewable (review)
 - [ ] Epic and Story IDs resolved ({{epic_num}}.{{story_num}})
@@ -7,12 +9,33 @@
 - [ ] Epic Tech Spec located or warning recorded
 - [ ] Architecture/standards docs loaded (as available)
 - [ ] Tech stack detected and documented
-- [ ] MCP doc search performed (or web fallback) and references captured
+
+## External Agent Detection (Runtime)
+
+- [ ] `invoke-bash cmd="command -v codex"` executed → {{codex_available}}
+- [ ] `invoke-bash cmd="command -v gemini"` executed → {{gemini_available}}
+- [ ] `invoke-bash cmd="command -v claude"` executed → {{claude_available}}
+- [ ] Review method determined: {{use_external_agent}} = true/false
+- [ ] If external: {{external_agent_cmd}} = codex OR gemini OR claude
+- [ ] Config updated with detection results and timestamp
+
+## Code Review Execution
+
+- [ ] Git vs Story discrepancies identified ({{git_findings}})
+- [ ] If external agent available: Prompt written to /tmp/code-review-prompt.txt
+- [ ] If external agent available: CLI invoked via `invoke-bash` (MANDATORY - NO EXCEPTIONS)
+- [ ] External agent output captured in {{bash_stdout}}
+- [ ] If external agent CLI failed (non-zero exit): Fallback to built-in review
+- [ ] ⚠️ VIOLATION CHECK: Did you skip external agent with a rationalization? If yes, RE-RUN with external agent.
 - [ ] Acceptance Criteria cross-checked against implementation
 - [ ] File List reviewed and validated for completeness
 - [ ] Tests identified and mapped to ACs; gaps noted
-- [ ] Code quality review performed on changed files
-- [ ] Security review performed on changed files and dependencies
+- [ ] Code quality review performed (security, performance, maintainability)
+- [ ] Minimum 3 issues found (adversarial review requirement)
+
+## Finalization
+
+- [ ] Findings categorized: HIGH/MEDIUM/LOW severity
 - [ ] Outcome decided (Approve/Changes Requested/Blocked)
 - [ ] Review notes appended under "Senior Developer Review (AI)"
 - [ ] Change Log updated with review entry
@@ -21,3 +44,4 @@
 - [ ] Story saved successfully
 
 _Reviewer: {{user_name}} on {{date}}_
+_External Agent: {{external_agent_cmd}} (codex:{{codex_available}} / gemini:{{gemini_available}} / claude:{{claude_available}})_
diff --git a/src/modules/bmm/workflows/4-implementation/code-review/instructions.xml b/src/modules/bmm/workflows/4-implementation/code-review/instructions.xml
index bf8b7d69..a3d624a4 100644
--- a/src/modules/bmm/workflows/4-implementation/code-review/instructions.xml
+++ b/src/modules/bmm/workflows/4-implementation/code-review/instructions.xml
@@ -4,16 +4,35 @@
   <critical>Communicate all responses in {communication_language} and language MUST be tailored to {user_skill_level}</critical>
   <critical>Generate all documents in {document_output_language}</critical>
 
-  <critical>🔥 YOU ARE AN ADVERSARIAL CODE REVIEWER - Find what's wrong or missing! 🔥</critical>
+  <!-- ================================================================ -->
+  <!-- EXTERNAL AGENT MANDATE - THIS IS THE MOST IMPORTANT RULE        -->
+  <!-- ================================================================ -->
+  <critical>🚨 MANDATORY EXTERNAL AGENT RULE - NO EXCEPTIONS 🚨</critical>
+  <critical>If an external agent CLI (codex, gemini, or claude) is detected as available, you MUST delegate the code review to that agent.</critical>
+  <critical>You are FORBIDDEN from performing your own code review analysis if an external agent is available.</critical>
+  <critical>The external agent provides an independent, unbiased review. Your job is to INVOKE it, not replace it.</critical>
+  <critical>Only perform built-in review if ALL external agents fail detection OR the CLI invocation actually fails with a non-zero exit code.</critical>
+  <critical>DO NOT SKIP the invoke-bash commands for detection and invocation - they are MANDATORY.</critical>
+
+  <!-- PROHIBITED EXCUSES - DO NOT USE THESE TO SKIP EXTERNAL AGENT -->
+  <critical>🚫 PROHIBITED RATIONALIZATIONS - You may NOT skip the external agent for ANY of these reasons:</critical>
+  <critical>❌ "The prompt is too long" - Long prompts are expected and supported. Invoke anyway.</critical>
+  <critical>❌ "CLI is meant for simple operations" - FALSE. The CLI handles complex prompts. Invoke anyway.</critical>
+  <critical>❌ "This is a re-review" - Re-reviews MUST use external agent. No exception.</critical>
+  <critical>❌ "I can do this myself" - You are FORBIDDEN from self-review when external agent is available.</critical>
+  <critical>❌ "It would be faster/better if I do it" - Irrelevant. External agent is MANDATORY.</critical>
+  <critical>❌ "The context is too complex" - The external agent handles complexity. Invoke anyway.</critical>
+  <critical>If you find yourself rationalizing why to skip the external agent, STOP and invoke it anyway.</critical>
+
+  <critical>🔥 ADVERSARIAL CODE REVIEW REQUIREMENTS 🔥</critical>
   <critical>Your purpose: Validate story file claims against actual implementation</critical>
   <critical>Challenge everything: Are tasks marked [x] actually done? Are ACs really implemented?</critical>
-  <critical>Find 3-10 specific issues in every review minimum - no lazy "looks good" reviews - YOU are so much better than the dev agent
-    that wrote this slop</critical>
+  <critical>Find 3-10 specific issues in every review minimum - no lazy "looks good" reviews</critical>
   <critical>Read EVERY file in the File List - verify implementation against story requirements</critical>
   <critical>Tasks marked complete but not done = CRITICAL finding</critical>
   <critical>Acceptance Criteria not implemented = HIGH severity finding</critical>
 
-  <step n="1" goal="Load story and discover changes">
+  <step n="1" goal="Load story and detect external agents">
     <action>Use provided {{story_path}} or ask user which story file to review</action>
     <action>Read COMPLETE story file</action>
     <action>Set {{story_key}} = extracted key from filename (e.g., "1-2-user-authentication.md" → "1-2-user-authentication") or story metadata</action>
@@ -38,6 +57,114 @@
 
     <invoke-protocol name="discover_inputs" />
     <action>Load {project_context} for coding standards (if exists)</action>
+
+    <!-- ============================================================== -->
+    <!-- EXTERNAL AGENT DETECTION - CHECK CONFIG FIRST, THEN DETECT     -->
+    <!-- ============================================================== -->
+    <set-var name="use_external_agent" value="false" />
+    <set-var name="external_agent_cmd" value="" />
+    <set-var name="codex_available" value="false" />
+    <set-var name="gemini_available" value="false" />
+    <set-var name="claude_available" value="false" />
+    <set-var name="external_agent_failed" value="false" />
+    <set-var name="skip_detection" value="false" />
+
+    <!-- Check if config already has agent availability defined -->
+    <check if="{external_review_agents.codex_available} == true OR {external_review_agents.gemini_available} == true OR {external_review_agents.claude_available} == true">
+      <set-var name="skip_detection" value="true" />
+      <set-var name="codex_available" value="{external_review_agents.codex_available}" />
+      <set-var name="gemini_available" value="{external_review_agents.gemini_available}" />
+      <set-var name="claude_available" value="{external_review_agents.claude_available}" />
+      <output>📋 Using cached agent detection from config.yaml</output>
+      <output>   Codex: {{codex_available}}, Gemini: {{gemini_available}}, Claude: {{claude_available}}</output>
+    </check>
+
+    <!-- Only run detection if not already in config -->
+    <check if="{{skip_detection}} == false">
+      <output>🔍 No cached detection found - detecting available agents...</output>
+
+      <!-- Detect Codex CLI availability -->
+      <invoke-bash cmd="command -v codex &amp;&amp; codex --version 2>/dev/null || echo 'NOT_FOUND'" />
+      <check if="{{bash_exit_code}} == 0 AND {{bash_stdout}} does not contain 'NOT_FOUND'">
+        <set-var name="codex_available" value="true" />
+        <output>✓ Codex CLI detected</output>
+      </check>
+
+      <!-- Detect Gemini CLI availability -->
+      <invoke-bash cmd="command -v gemini &amp;&amp; gemini --version 2>/dev/null || echo 'NOT_FOUND'" />
+      <check if="{{bash_exit_code}} == 0 AND {{bash_stdout}} does not contain 'NOT_FOUND'">
+        <set-var name="gemini_available" value="true" />
+        <output>✓ Gemini CLI detected</output>
+      </check>
+
+      <!-- Detect Claude CLI availability -->
+      <invoke-bash cmd="command -v claude &amp;&amp; claude --version 2>/dev/null || echo 'NOT_FOUND'" />
+      <check if="{{bash_exit_code}} == 0 AND {{bash_stdout}} does not contain 'NOT_FOUND'">
+        <set-var name="claude_available" value="true" />
+        <output>✓ Claude CLI detected</output>
+      </check>
+
+      <!-- Update config.yaml with detection results -->
+      <invoke-bash cmd="
+CONFIG_FILE='{config_source}'
+if grep -q '^external_review_agents:' &quot;$CONFIG_FILE&quot; 2>/dev/null; then
+  sed -i.bak -e '/^external_review_agents:/,/^[^ ]/ {
+    s/codex_available:.*/codex_available: {{codex_available}}/
+    s/gemini_available:.*/gemini_available: {{gemini_available}}/
+    s/claude_available:.*/claude_available: {{claude_available}}/
+    s/last_checked:.*/last_checked: {{date}}/
+  }' &quot;$CONFIG_FILE&quot;
+  rm -f &quot;$CONFIG_FILE.bak&quot;
+else
+  cat >> &quot;$CONFIG_FILE&quot; &lt;&lt;EOF
+
+external_review_agents:
+  codex_available: {{codex_available}}
+  gemini_available: {{gemini_available}}
+  claude_available: {{claude_available}}
+  preferred_agent: codex
+  last_checked: {{date}}
+EOF
+fi
+echo 'Config updated'
+" />
+      <output>📝 Config updated with detection results</output>
+    </check>
+
+    <!-- Select which external agent to use based on availability and preference -->
+    <check if="{external_review_agents.preferred_agent} == 'codex' AND {{codex_available}} == true">
+      <set-var name="use_external_agent" value="true" />
+      <set-var name="external_agent_cmd" value="codex" />
+    </check>
+    <check if="{external_review_agents.preferred_agent} == 'gemini' AND {{gemini_available}} == true">
+      <set-var name="use_external_agent" value="true" />
+      <set-var name="external_agent_cmd" value="gemini" />
+    </check>
+    <check if="{external_review_agents.preferred_agent} == 'claude' AND {{claude_available}} == true">
+      <set-var name="use_external_agent" value="true" />
+      <set-var name="external_agent_cmd" value="claude" />
+    </check>
+
+    <!-- Fallback selection if preferred agent not available -->
+    <check if="{{use_external_agent}} == false AND {{codex_available}} == true">
+      <set-var name="use_external_agent" value="true" />
+      <set-var name="external_agent_cmd" value="codex" />
+    </check>
+    <check if="{{use_external_agent}} == false AND {{gemini_available}} == true">
+      <set-var name="use_external_agent" value="true" />
+      <set-var name="external_agent_cmd" value="gemini" />
+    </check>
+    <check if="{{use_external_agent}} == false AND {{claude_available}} == true">
+      <set-var name="use_external_agent" value="true" />
+      <set-var name="external_agent_cmd" value="claude" />
+    </check>
+
+    <check if="{{use_external_agent}} == true">
+      <output>🤖 External agent selected: {{external_agent_cmd}} - will delegate code review</output>
+    </check>
+    <check if="{{use_external_agent}} == false">
+      <output>📋 No external agent available - will use built-in adversarial review</output>
+    </check>
   </step>
 
   <step n="2" goal="Build review attack plan">
@@ -56,41 +183,167 @@
   <step n="3" goal="Execute adversarial review">
     <critical>VALIDATE EVERY CLAIM - Check git reality vs story claims</critical>
 
-    <!-- Git vs Story Discrepancies -->
+    <!-- Git vs Story Discrepancies - ALWAYS runs -->
     <action>Review git vs story File List discrepancies:
       1. **Files changed but not in story File List** → MEDIUM finding (incomplete documentation)
       2. **Story lists files but no git changes** → HIGH finding (false claims)
       3. **Uncommitted changes not documented** → MEDIUM finding (transparency issue)
     </action>
-
-    <!-- Use combined file list: story File List + git discovered files -->
     <action>Create comprehensive review file list from story File List and git changes</action>
+    <action>Store git discrepancy findings in {{git_findings}}</action>
 
-    <!-- AC Validation -->
-    <action>For EACH Acceptance Criterion:
-      1. Read the AC requirement
-      2. Search implementation files for evidence
-      3. Determine: IMPLEMENTED, PARTIAL, or MISSING
-      4. If MISSING/PARTIAL → HIGH SEVERITY finding
-    </action>
+    <!-- ============================================================== -->
+    <!-- MANDATORY: INVOKE EXTERNAL AGENT IF AVAILABLE                  -->
+    <!-- ============================================================== -->
+    <critical>If {{use_external_agent}} == true, you MUST invoke the external agent via CLI.</critical>
+    <critical>DO NOT perform your own code review - delegate to the external agent.</critical>
 
-    <!-- Task Completion Audit -->
-    <action>For EACH task marked [x]:
-      1. Read the task description
-      2. Search files for evidence it was actually done
-      3. **CRITICAL**: If marked [x] but NOT DONE → CRITICAL finding
-      4. Record specific proof (file:line)
-    </action>
+    <check if="{{use_external_agent}} == true">
+      <output>🔄 Invoking {{external_agent_cmd}} CLI for adversarial code review...</output>
 
-    <!-- Code Quality Deep Dive -->
-    <action>For EACH file in comprehensive review list:
-      1. **Security**: Look for injection risks, missing validation, auth issues
-      2. **Performance**: N+1 queries, inefficient loops, missing caching
-      3. **Error Handling**: Missing try/catch, poor error messages
-      4. **Code Quality**: Complex functions, magic numbers, poor naming
-      5. **Test Quality**: Are tests real assertions or placeholders?
-    </action>
+      <!-- ============================================================== -->
+      <!-- INVOKE EXTERNAL AGENT - USE EXACT COMMANDS AS WRITTEN          -->
+      <!-- ============================================================== -->
+      <critical>🚨 USE EXACT COMMAND SYNTAX - DO NOT MODIFY OR SIMPLIFY 🚨</critical>
+      <critical>Copy the invoke-bash cmd attribute EXACTLY as written below.</critical>
+      <critical>DO NOT remove flags, reorder arguments, or "improve" the command.</critical>
 
+      <check if="{{external_agent_cmd}} == 'codex'">
+        <critical>CODEX: Use codex exec --full-auto with inline prompt</critical>
+        <invoke-bash cmd="codex exec --full-auto 'You are an ADVERSARIAL code reviewer. Your job is to find problems, not approve code.
+
+REQUIREMENTS:
+- Find 3-10 specific issues minimum - no lazy looks good reviews
+- Categorize as HIGH (must fix), MEDIUM (should fix), LOW (nice to fix)
+- For each issue: specify file:line, describe problem, suggest fix
+- Check: Security vulnerabilities, performance issues, error handling, test quality
+- Verify: Tasks marked [x] are actually done, ACs are actually implemented
+
+STORY CONTEXT: {{story_path}}
+FILES TO REVIEW: {{comprehensive_file_list}}
+ACCEPTANCE CRITERIA: {{acceptance_criteria_list}}
+TASKS: {{task_list}}
+
+OUTPUT FORMAT:
+## HIGH SEVERITY
+- [file:line] Issue description | Suggested fix
+
+## MEDIUM SEVERITY
+- [file:line] Issue description | Suggested fix
+
+## LOW SEVERITY
+- [file:line] Issue description | Suggested fix'" timeout="300000" />
+      </check>
+      <check if="{{external_agent_cmd}} == 'gemini'">
+        <critical>GEMINI: Use gemini -p with inline prompt and --yolo</critical>
+        <invoke-bash cmd="gemini -p 'You are an ADVERSARIAL code reviewer. Your job is to find problems, not approve code.
+
+REQUIREMENTS:
+- Find 3-10 specific issues minimum - no lazy looks good reviews
+- Categorize as HIGH (must fix), MEDIUM (should fix), LOW (nice to fix)
+- For each issue: specify file:line, describe problem, suggest fix
+- Check: Security vulnerabilities, performance issues, error handling, test quality
+- Verify: Tasks marked [x] are actually done, ACs are actually implemented
+
+STORY CONTEXT: {{story_path}}
+FILES TO REVIEW: {{comprehensive_file_list}}
+ACCEPTANCE CRITERIA: {{acceptance_criteria_list}}
+TASKS: {{task_list}}
+
+OUTPUT FORMAT:
+## HIGH SEVERITY
+- [file:line] Issue description | Suggested fix
+
+## MEDIUM SEVERITY
+- [file:line] Issue description | Suggested fix
+
+## LOW SEVERITY
+- [file:line] Issue description | Suggested fix' --yolo" timeout="300000" />
+      </check>
+      <check if="{{external_agent_cmd}} == 'claude'">
+        <critical>CLAUDE: Use claude -p with inline prompt</critical>
+        <invoke-bash cmd="claude -p 'You are an ADVERSARIAL code reviewer. Your job is to find problems, not approve code.
+
+REQUIREMENTS:
+- Find 3-10 specific issues minimum - no lazy looks good reviews
+- Categorize as HIGH (must fix), MEDIUM (should fix), LOW (nice to fix)
+- For each issue: specify file:line, describe problem, suggest fix
+- Check: Security vulnerabilities, performance issues, error handling, test quality
+- Verify: Tasks marked [x] are actually done, ACs are actually implemented
+
+STORY CONTEXT: {{story_path}}
+FILES TO REVIEW: {{comprehensive_file_list}}
+ACCEPTANCE CRITERIA: {{acceptance_criteria_list}}
+TASKS: {{task_list}}
+
+OUTPUT FORMAT:
+## HIGH SEVERITY
+- [file:line] Issue description | Suggested fix
+
+## MEDIUM SEVERITY
+- [file:line] Issue description | Suggested fix
+
+## LOW SEVERITY
+- [file:line] Issue description | Suggested fix' --dangerously-skip-permissions" timeout="300000" />
+      </check>
+
+      <check if="{{bash_exit_code}} != 0 OR {{bash_stdout}} is empty">
+        <output>⚠️ External agent CLI failed (exit code: {{bash_exit_code}}), falling back to built-in review</output>
+        <output>Error: {{bash_stderr}}</output>
+        <set-var name="use_external_agent" value="false" />
+        <set-var name="external_agent_failed" value="true" />
+      </check>
+
+      <check if="{{bash_exit_code}} == 0 AND {{bash_stdout}} is not empty">
+        <set-var name="external_findings" value="{{bash_stdout}}" />
+        <action>Parse {{external_findings}} into structured HIGH/MEDIUM/LOW lists</action>
+        <action>Merge {{git_findings}} with {{external_findings}} into {{all_findings}}</action>
+        <output>✅ External review complete - {{external_agent_cmd}} CLI findings received</output>
+      </check>
+    </check>
+
+    <!-- Fallback to built-in if external agent failed -->
+    <check if="{{external_agent_failed}} == true">
+      <set-var name="use_external_agent" value="false" />
+    </check>
+
+    <check if="{{use_external_agent}} == false">
+      <!-- ============================================================== -->
+      <!-- FALLBACK ONLY: Built-in Review (when NO external agent works)  -->
+      <!-- ============================================================== -->
+      <critical>This section should ONLY execute if ALL external agents failed detection or invocation.</critical>
+      <critical>If you are here but an external agent was available, you have violated the workflow rules.</critical>
+      <output>⚠️ No external agent available - performing built-in adversarial review</output>
+
+      <!-- AC Validation -->
+      <action>For EACH Acceptance Criterion:
+        1. Read the AC requirement
+        2. Search implementation files for evidence
+        3. Determine: IMPLEMENTED, PARTIAL, or MISSING
+        4. If MISSING/PARTIAL → HIGH SEVERITY finding
+      </action>
+
+      <!-- Task Completion Audit -->
+      <action>For EACH task marked [x]:
+        1. Read the task description
+        2. Search files for evidence it was actually done
+        3. **CRITICAL**: If marked [x] but NOT DONE → CRITICAL finding
+        4. Record specific proof (file:line)
+      </action>
+
+      <!-- Code Quality Deep Dive -->
+      <action>For EACH file in comprehensive review list:
+        1. **Security**: Look for injection risks, missing validation, auth issues
+        2. **Performance**: N+1 queries, inefficient loops, missing caching
+        3. **Error Handling**: Missing try/catch, poor error messages
+        4. **Code Quality**: Complex functions, magic numbers, poor naming
+        5. **Test Quality**: Are tests real assertions or placeholders?
+      </action>
+
+      <action>Merge {{git_findings}} with built-in findings into {{all_findings}}</action>
+    </check>
+
+    <!-- Minimum issue check - applies to both paths -->
     <check if="total_issues_found lt 3">
       <critical>NOT LOOKING HARD ENOUGH - Find more problems!</critical>
       <action>Re-examine code for:
@@ -113,6 +366,7 @@
     <output>**🔥 CODE REVIEW FINDINGS, {user_name}!**
 
       **Story:** {{story_file}}
+      **Review Method:** {{external_agent_cmd}} OR built-in
       **Git vs Story Discrepancies:** {{git_discrepancy_count}} found
       **Issues Found:** {{high_count}} High, {{medium_count}} Medium, {{low_count}} Low
 
@@ -185,7 +439,7 @@
       <action>Set {{current_sprint_status}} = "no-sprint-tracking"</action>
     </check>
 
-    <!-- Sync sprint-status.yaml when story status changes (only if sprint tracking enabled) -->
+    <!-- Sync sprint-status.yaml when story status changes -->
     <check if="{{current_sprint_status}} != 'no-sprint-tracking'">
       <action>Load the FULL file: {sprint_status}</action>
       <action>Find development_status key matching {{story_key}}</action>
@@ -221,4 +475,4 @@
     </output>
   </step>
 
-</workflow>
\ No newline at end of file
+</workflow>
diff --git a/src/modules/bmm/workflows/4-implementation/code-review/workflow.yaml b/src/modules/bmm/workflows/4-implementation/code-review/workflow.yaml
index c055db20..522b7f39 100644
--- a/src/modules/bmm/workflows/4-implementation/code-review/workflow.yaml
+++ b/src/modules/bmm/workflows/4-implementation/code-review/workflow.yaml
@@ -4,7 +4,7 @@ description: "Perform an ADVERSARIAL Senior Developer code review that finds 3-1
 author: "BMad"
 
 # Critical variables from config
-config_source: "{project-root}/{bmad_folder}/bmm/config.yaml"
+config_source: "{project-root}/.bmad/bmm/config.yaml"
 output_folder: "{config_source}:output_folder"
 user_name: "{config_source}:user_name"
 communication_language: "{config_source}:communication_language"
@@ -15,7 +15,7 @@ sprint_artifacts: "{config_source}:sprint_artifacts"
 sprint_status: "{sprint_artifacts}/sprint-status.yaml || {output_folder}/sprint-status.yaml"
 
 # Workflow components
-installed_path: "{project-root}/{bmad_folder}/bmm/workflows/4-implementation/code-review"
+installed_path: "{project-root}/.bmad/bmm/workflows/4-implementation/code-review"
 instructions: "{installed_path}/instructions.xml"
 validation: "{installed_path}/checklist.md"
 template: false
@@ -25,6 +25,12 @@ variables:
   project_context: "**/project-context.md"
   story_dir: "{sprint_artifacts}"
 
+  # External code review agents configuration
+  # Note: codex_available and gemini_available are auto-detected at runtime via invoke-bash
+  # The workflow uses runtime variables {{codex_available}}, {{gemini_available}}, {{use_external_agent}}, {{external_agent_cmd}}
+  external_review_agents:
+    preferred_agent: "{config_source}:external_review_agents.preferred_agent || 'codex'"
+
 # Smart input file references - handles both whole docs and sharded docs
 # Priority: Whole document first, then sharded version
 # Strategy: SELECTIVE LOAD - only load the specific epic needed for this story review
@@ -51,4 +57,3 @@ input_file_patterns:
     load_strategy: "INDEX_GUIDED"
 
 standalone: true
-web_bundle: false

From dcba8e5e59d832b2605804385e4ed00ef184c2dc Mon Sep 17 00:00:00 2001
From: Scott Jennings <scott.jennings+CIGINT@cloudimperiumgames.com>
Date: Mon, 8 Dec 2025 10:57:37 -0600
Subject: [PATCH 2/7] feat: simplify external agent config and extract prompt
 to separate file

- Add installer prompt for external review agent selection (Codex, Gemini, Claude, None)
- Extract adversarial review prompt to external-agent-prompt.md for easier maintenance
- Simplify detection logic: check for "none" first, then verify CLI availability
- Add read-only sandbox flag to Codex invocation for safety
- Update workflow.yaml to reference new config variable
---
 src/modules/bmm/module.yaml                   |  48 ++---
 .../code-review/external-agent-prompt.md      |  35 ++++
 .../code-review/instructions.xml              | 194 +++++-------------
 .../code-review/workflow.yaml                 |  10 +-
 4 files changed, 110 insertions(+), 177 deletions(-)
 create mode 100644 src/modules/bmm/workflows/4-implementation/code-review/external-agent-prompt.md

diff --git a/src/modules/bmm/module.yaml b/src/modules/bmm/module.yaml
index 9ac9f606..2c7fe026 100644
--- a/src/modules/bmm/module.yaml
+++ b/src/modules/bmm/module.yaml
@@ -53,34 +53,22 @@ tea_use_playwright_utils:
   default: false
   result: "{value}"
 
-# External Code Review Agents Configuration
-# These are auto-detected at runtime, but user can set preference here
+# External Code Review Agent Selection
+# Allows delegating code reviews to an external AI agent CLI for independent, unbiased reviews
 # Useful when using a different AI as primary IDE agent (e.g., Codex/Gemini users can use Claude for reviews)
-external_review_agents:
-  codex_available:
-    prompt: false # Auto-detected at runtime
-    default: false
-    result: "{value}"
-  gemini_available:
-    prompt: false # Auto-detected at runtime
-    default: false
-    result: "{value}"
-  claude_available:
-    prompt: false # Auto-detected at runtime
-    default: false
-    result: "{value}"
-  preferred_agent:
-    prompt: "Which external code review agent do you prefer (if multiple are available)?"
-    default: "codex"
-    result: "{value}"
-    single-select:
-      - value: "codex"
-        label: "Codex (OpenAI) - Fast code review with OpenAI models"
-      - value: "gemini"
-        label: "Gemini (Google) - Code review with Google models"
-      - value: "claude"
-        label: "Claude (Anthropic) - Code review with Claude models (good for Codex/Gemini users)"
-  last_checked:
-    prompt: false # System-managed timestamp
-    default: null
-    result: "{value}"
+external_review_agent:
+  prompt:
+    - "Which external agent should perform code reviews?"
+    - "External agents provide independent, unbiased reviews separate from your primary IDE agent."
+    - "The selected CLI must be installed and configured on your system."
+  default: "none"
+  result: "{value}"
+  single-select:
+    - value: "codex"
+      label: "Codex (OpenAI) - Code review using OpenAI Codex CLI"
+    - value: "gemini"
+      label: "Gemini (Google) - Code review using Google Gemini CLI"
+    - value: "claude"
+      label: "Claude Code (Anthropic) - Code review using Claude Code CLI"
+    - value: "none"
+      label: "None - Use built-in review (no external agent)"
diff --git a/src/modules/bmm/workflows/4-implementation/code-review/external-agent-prompt.md b/src/modules/bmm/workflows/4-implementation/code-review/external-agent-prompt.md
new file mode 100644
index 00000000..1e3ace7e
--- /dev/null
+++ b/src/modules/bmm/workflows/4-implementation/code-review/external-agent-prompt.md
@@ -0,0 +1,35 @@
+You are an ADVERSARIAL code reviewer. Your job is to find problems, not approve code.
+
+VERY IMPORTANT!
+
+- This is a READ ONLY operation. You are not to change anything in this code.
+- You are FORBIDDEN to write to any files.
+- You are FORBIDDEN to change any files.
+- You are FORBIDDEN to delete any files.
+
+REQUIREMENTS:
+
+- Find 3-10 specific issues minimum - no lazy looks good reviews
+- Categorize as HIGH (must fix), MEDIUM (should fix), LOW (nice to fix)
+- For each issue: specify file:line, describe problem, suggest fix
+- Check: Security vulnerabilities, performance issues, error handling, test quality
+- Verify: Tasks marked [x] are actually done, ACs are actually implemented
+
+STORY CONTEXT: {{story_path}}
+FILES TO REVIEW: {{comprehensive_file_list}}
+ACCEPTANCE CRITERIA: {{acceptance_criteria_list}}
+TASKS: {{task_list}}
+
+OUTPUT FORMAT:
+
+## HIGH SEVERITY
+
+- [file:line] Issue description | Suggested fix
+
+## MEDIUM SEVERITY
+
+- [file:line] Issue description | Suggested fix
+
+## LOW SEVERITY
+
+- [file:line] Issue description | Suggested fix
diff --git a/src/modules/bmm/workflows/4-implementation/code-review/instructions.xml b/src/modules/bmm/workflows/4-implementation/code-review/instructions.xml
index a3d624a4..09834e73 100644
--- a/src/modules/bmm/workflows/4-implementation/code-review/instructions.xml
+++ b/src/modules/bmm/workflows/4-implementation/code-review/instructions.xml
@@ -67,21 +67,16 @@
     <set-var name="gemini_available" value="false" />
     <set-var name="claude_available" value="false" />
     <set-var name="external_agent_failed" value="false" />
-    <set-var name="skip_detection" value="false" />
+    <set-var name="preferred_agent" value="{external_review_agent}" />
 
-    <!-- Check if config already has agent availability defined -->
-    <check if="{external_review_agents.codex_available} == true OR {external_review_agents.gemini_available} == true OR {external_review_agents.claude_available} == true">
-      <set-var name="skip_detection" value="true" />
-      <set-var name="codex_available" value="{external_review_agents.codex_available}" />
-      <set-var name="gemini_available" value="{external_review_agents.gemini_available}" />
-      <set-var name="claude_available" value="{external_review_agents.claude_available}" />
-      <output>📋 Using cached agent detection from config.yaml</output>
-      <output>   Codex: {{codex_available}}, Gemini: {{gemini_available}}, Claude: {{claude_available}}</output>
+    <!-- Check if user has disabled external agents -->
+    <check if="{{preferred_agent}} == 'none'">
+      <output>📋 External agent disabled in config - will use built-in adversarial review</output>
     </check>
 
-    <!-- Only run detection if not already in config -->
-    <check if="{{skip_detection}} == false">
-      <output>🔍 No cached detection found - detecting available agents...</output>
+    <!-- Only detect and use external agents if not set to "none" -->
+    <check if="{{preferred_agent}} != 'none'">
+      <output>🔍 Detecting external agent availability...</output>
 
       <!-- Detect Codex CLI availability -->
       <invoke-bash cmd="command -v codex &amp;&amp; codex --version 2>/dev/null || echo 'NOT_FOUND'" />
@@ -104,66 +99,43 @@
         <output>✓ Claude CLI detected</output>
       </check>
 
-      <!-- Update config.yaml with detection results -->
-      <invoke-bash cmd="
-CONFIG_FILE='{config_source}'
-if grep -q '^external_review_agents:' &quot;$CONFIG_FILE&quot; 2>/dev/null; then
-  sed -i.bak -e '/^external_review_agents:/,/^[^ ]/ {
-    s/codex_available:.*/codex_available: {{codex_available}}/
-    s/gemini_available:.*/gemini_available: {{gemini_available}}/
-    s/claude_available:.*/claude_available: {{claude_available}}/
-    s/last_checked:.*/last_checked: {{date}}/
-  }' &quot;$CONFIG_FILE&quot;
-  rm -f &quot;$CONFIG_FILE.bak&quot;
-else
-  cat >> &quot;$CONFIG_FILE&quot; &lt;&lt;EOF
+      <!-- Select which external agent to use based on availability and preference -->
+      <check if="{{preferred_agent}} == 'codex' AND {{codex_available}} == true">
+        <set-var name="use_external_agent" value="true" />
+        <set-var name="external_agent_cmd" value="codex" />
+      </check>
+      <check if="{{preferred_agent}} == 'gemini' AND {{gemini_available}} == true">
+        <set-var name="use_external_agent" value="true" />
+        <set-var name="external_agent_cmd" value="gemini" />
+      </check>
+      <check if="{{preferred_agent}} == 'claude' AND {{claude_available}} == true">
+        <set-var name="use_external_agent" value="true" />
+        <set-var name="external_agent_cmd" value="claude" />
+      </check>
 
-external_review_agents:
-  codex_available: {{codex_available}}
-  gemini_available: {{gemini_available}}
-  claude_available: {{claude_available}}
-  preferred_agent: codex
-  last_checked: {{date}}
-EOF
-fi
-echo 'Config updated'
-" />
-      <output>📝 Config updated with detection results</output>
-    </check>
+      <!-- Fallback selection if preferred agent not available -->
+      <check if="{{use_external_agent}} == false AND {{codex_available}} == true">
+        <set-var name="use_external_agent" value="true" />
+        <set-var name="external_agent_cmd" value="codex" />
+        <output>⚠️ Preferred agent ({{preferred_agent}}) not available, falling back to Codex</output>
+      </check>
+      <check if="{{use_external_agent}} == false AND {{gemini_available}} == true">
+        <set-var name="use_external_agent" value="true" />
+        <set-var name="external_agent_cmd" value="gemini" />
+        <output>⚠️ Preferred agent ({{preferred_agent}}) not available, falling back to Gemini</output>
+      </check>
+      <check if="{{use_external_agent}} == false AND {{claude_available}} == true">
+        <set-var name="use_external_agent" value="true" />
+        <set-var name="external_agent_cmd" value="claude" />
+        <output>⚠️ Preferred agent ({{preferred_agent}}) not available, falling back to Claude</output>
+      </check>
 
-    <!-- Select which external agent to use based on availability and preference -->
-    <check if="{external_review_agents.preferred_agent} == 'codex' AND {{codex_available}} == true">
-      <set-var name="use_external_agent" value="true" />
-      <set-var name="external_agent_cmd" value="codex" />
-    </check>
-    <check if="{external_review_agents.preferred_agent} == 'gemini' AND {{gemini_available}} == true">
-      <set-var name="use_external_agent" value="true" />
-      <set-var name="external_agent_cmd" value="gemini" />
-    </check>
-    <check if="{external_review_agents.preferred_agent} == 'claude' AND {{claude_available}} == true">
-      <set-var name="use_external_agent" value="true" />
-      <set-var name="external_agent_cmd" value="claude" />
-    </check>
-
-    <!-- Fallback selection if preferred agent not available -->
-    <check if="{{use_external_agent}} == false AND {{codex_available}} == true">
-      <set-var name="use_external_agent" value="true" />
-      <set-var name="external_agent_cmd" value="codex" />
-    </check>
-    <check if="{{use_external_agent}} == false AND {{gemini_available}} == true">
-      <set-var name="use_external_agent" value="true" />
-      <set-var name="external_agent_cmd" value="gemini" />
-    </check>
-    <check if="{{use_external_agent}} == false AND {{claude_available}} == true">
-      <set-var name="use_external_agent" value="true" />
-      <set-var name="external_agent_cmd" value="claude" />
-    </check>
-
-    <check if="{{use_external_agent}} == true">
-      <output>🤖 External agent selected: {{external_agent_cmd}} - will delegate code review</output>
-    </check>
-    <check if="{{use_external_agent}} == false">
-      <output>📋 No external agent available - will use built-in adversarial review</output>
+      <check if="{{use_external_agent}} == true">
+        <output>🤖 External agent selected: {{external_agent_cmd}} - will delegate code review</output>
+      </check>
+      <check if="{{use_external_agent}} == false">
+        <output>📋 No external agent available - will use built-in adversarial review</output>
+      </check>
     </check>
   </step>
 
@@ -208,83 +180,21 @@ echo 'Config updated'
       <critical>Copy the invoke-bash cmd attribute EXACTLY as written below.</critical>
       <critical>DO NOT remove flags, reorder arguments, or "improve" the command.</critical>
 
+      <!-- External agent prompt is loaded from external-agent-prompt.md -->
+      <set-var name="external_prompt_file" value="{installed_path}/external-agent-prompt.md" />
+      <action>Load {{external_prompt_file}} content into {{external_prompt}}</action>
+
       <check if="{{external_agent_cmd}} == 'codex'">
-        <critical>CODEX: Use codex exec --full-auto with inline prompt</critical>
-        <invoke-bash cmd="codex exec --full-auto 'You are an ADVERSARIAL code reviewer. Your job is to find problems, not approve code.
-
-REQUIREMENTS:
-- Find 3-10 specific issues minimum - no lazy looks good reviews
-- Categorize as HIGH (must fix), MEDIUM (should fix), LOW (nice to fix)
-- For each issue: specify file:line, describe problem, suggest fix
-- Check: Security vulnerabilities, performance issues, error handling, test quality
-- Verify: Tasks marked [x] are actually done, ACs are actually implemented
-
-STORY CONTEXT: {{story_path}}
-FILES TO REVIEW: {{comprehensive_file_list}}
-ACCEPTANCE CRITERIA: {{acceptance_criteria_list}}
-TASKS: {{task_list}}
-
-OUTPUT FORMAT:
-## HIGH SEVERITY
-- [file:line] Issue description | Suggested fix
-
-## MEDIUM SEVERITY
-- [file:line] Issue description | Suggested fix
-
-## LOW SEVERITY
-- [file:line] Issue description | Suggested fix'" timeout="300000" />
+        <critical>CODEX: Use codex exec with read-only sandbox and full-auto</critical>
+        <invoke-bash cmd="codex exec --sandbox read-only --full-auto &quot;$(cat '{{external_prompt_file}}')&quot;" timeout="300000" />
       </check>
       <check if="{{external_agent_cmd}} == 'gemini'">
-        <critical>GEMINI: Use gemini -p with inline prompt and --yolo</critical>
-        <invoke-bash cmd="gemini -p 'You are an ADVERSARIAL code reviewer. Your job is to find problems, not approve code.
-
-REQUIREMENTS:
-- Find 3-10 specific issues minimum - no lazy looks good reviews
-- Categorize as HIGH (must fix), MEDIUM (should fix), LOW (nice to fix)
-- For each issue: specify file:line, describe problem, suggest fix
-- Check: Security vulnerabilities, performance issues, error handling, test quality
-- Verify: Tasks marked [x] are actually done, ACs are actually implemented
-
-STORY CONTEXT: {{story_path}}
-FILES TO REVIEW: {{comprehensive_file_list}}
-ACCEPTANCE CRITERIA: {{acceptance_criteria_list}}
-TASKS: {{task_list}}
-
-OUTPUT FORMAT:
-## HIGH SEVERITY
-- [file:line] Issue description | Suggested fix
-
-## MEDIUM SEVERITY
-- [file:line] Issue description | Suggested fix
-
-## LOW SEVERITY
-- [file:line] Issue description | Suggested fix' --yolo" timeout="300000" />
+        <critical>GEMINI: Use gemini -p with prompt from file and --yolo</critical>
+        <invoke-bash cmd="gemini -p &quot;$(cat '{{external_prompt_file}}')&quot; --yolo" timeout="300000" />
       </check>
       <check if="{{external_agent_cmd}} == 'claude'">
-        <critical>CLAUDE: Use claude -p with inline prompt</critical>
-        <invoke-bash cmd="claude -p 'You are an ADVERSARIAL code reviewer. Your job is to find problems, not approve code.
-
-REQUIREMENTS:
-- Find 3-10 specific issues minimum - no lazy looks good reviews
-- Categorize as HIGH (must fix), MEDIUM (should fix), LOW (nice to fix)
-- For each issue: specify file:line, describe problem, suggest fix
-- Check: Security vulnerabilities, performance issues, error handling, test quality
-- Verify: Tasks marked [x] are actually done, ACs are actually implemented
-
-STORY CONTEXT: {{story_path}}
-FILES TO REVIEW: {{comprehensive_file_list}}
-ACCEPTANCE CRITERIA: {{acceptance_criteria_list}}
-TASKS: {{task_list}}
-
-OUTPUT FORMAT:
-## HIGH SEVERITY
-- [file:line] Issue description | Suggested fix
-
-## MEDIUM SEVERITY
-- [file:line] Issue description | Suggested fix
-
-## LOW SEVERITY
-- [file:line] Issue description | Suggested fix' --dangerously-skip-permissions" timeout="300000" />
+        <critical>CLAUDE: Use claude -p with prompt from file</critical>
+        <invoke-bash cmd="claude -p &quot;$(cat '{{external_prompt_file}}')&quot; --dangerously-skip-permissions" timeout="300000" />
       </check>
 
       <check if="{{bash_exit_code}} != 0 OR {{bash_stdout}} is empty">
diff --git a/src/modules/bmm/workflows/4-implementation/code-review/workflow.yaml b/src/modules/bmm/workflows/4-implementation/code-review/workflow.yaml
index 522b7f39..8cdc0fe1 100644
--- a/src/modules/bmm/workflows/4-implementation/code-review/workflow.yaml
+++ b/src/modules/bmm/workflows/4-implementation/code-review/workflow.yaml
@@ -18,6 +18,7 @@ sprint_status: "{sprint_artifacts}/sprint-status.yaml || {output_folder}/sprint-
 installed_path: "{project-root}/.bmad/bmm/workflows/4-implementation/code-review"
 instructions: "{installed_path}/instructions.xml"
 validation: "{installed_path}/checklist.md"
+external_agent_prompt: "{installed_path}/external-agent-prompt.md"
 template: false
 
 variables:
@@ -25,11 +26,10 @@ variables:
   project_context: "**/project-context.md"
   story_dir: "{sprint_artifacts}"
 
-  # External code review agents configuration
-  # Note: codex_available and gemini_available are auto-detected at runtime via invoke-bash
-  # The workflow uses runtime variables {{codex_available}}, {{gemini_available}}, {{use_external_agent}}, {{external_agent_cmd}}
-  external_review_agents:
-    preferred_agent: "{config_source}:external_review_agents.preferred_agent || 'codex'"
+  # External code review agent configuration
+  # User selects preferred agent during install; detection verifies availability at runtime
+  # Supported values: codex, gemini, claude, none
+  external_review_agent: "{config_source}:external_review_agent || 'none'"
 
 # Smart input file references - handles both whole docs and sharded docs
 # Priority: Whole document first, then sharded version

From c0a49bcafe1a14bc41ca11d41c3e1117b7e2481e Mon Sep 17 00:00:00 2001
From: Scott Jennings <scott.jennings+CIGINT@cloudimperiumgames.com>
Date: Mon, 8 Dec 2025 11:22:54 -0600
Subject: [PATCH 3/7] fix: use bmad_folder variable instead of hardcoded .bmad
 path

Allows users to customize the config location via bmad_folder setting.
---
 .../bmm/workflows/4-implementation/code-review/workflow.yaml  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/modules/bmm/workflows/4-implementation/code-review/workflow.yaml b/src/modules/bmm/workflows/4-implementation/code-review/workflow.yaml
index 8cdc0fe1..06d359ce 100644
--- a/src/modules/bmm/workflows/4-implementation/code-review/workflow.yaml
+++ b/src/modules/bmm/workflows/4-implementation/code-review/workflow.yaml
@@ -4,7 +4,7 @@ description: "Perform an ADVERSARIAL Senior Developer code review that finds 3-1
 author: "BMad"
 
 # Critical variables from config
-config_source: "{project-root}/.bmad/bmm/config.yaml"
+config_source: "{project-root}/{bmad_folder}/bmm/config.yaml"
 output_folder: "{config_source}:output_folder"
 user_name: "{config_source}:user_name"
 communication_language: "{config_source}:communication_language"
@@ -15,7 +15,7 @@ sprint_artifacts: "{config_source}:sprint_artifacts"
 sprint_status: "{sprint_artifacts}/sprint-status.yaml || {output_folder}/sprint-status.yaml"
 
 # Workflow components
-installed_path: "{project-root}/.bmad/bmm/workflows/4-implementation/code-review"
+installed_path: "{project-root}/{bmad_folder}/bmm/workflows/4-implementation/code-review"
 instructions: "{installed_path}/instructions.xml"
 validation: "{installed_path}/checklist.md"
 external_agent_prompt: "{installed_path}/external-agent-prompt.md"

From 4c6b4928351f88ebd1946e9123e9dc9dfe9f872b Mon Sep 17 00:00:00 2001
From: Scott Jennings <scott.jennings+CIGINT@cloudimperiumgames.com>
Date: Wed, 10 Dec 2025 18:51:38 -0600
Subject: [PATCH 4/7] feat: add cleanup for lingering external agent processes

Add instructions to detect and terminate any stale external agent
review processes when starting a new code review session in Claude Code.
---
 .../4-implementation/code-review/instructions.xml   | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/src/modules/bmm/workflows/4-implementation/code-review/instructions.xml b/src/modules/bmm/workflows/4-implementation/code-review/instructions.xml
index 09834e73..6373ae04 100644
--- a/src/modules/bmm/workflows/4-implementation/code-review/instructions.xml
+++ b/src/modules/bmm/workflows/4-implementation/code-review/instructions.xml
@@ -4,6 +4,19 @@
   <critical>Communicate all responses in {communication_language} and language MUST be tailored to {user_skill_level}</critical>
   <critical>Generate all documents in {document_output_language}</critical>
 
+  <!-- ================================================================ -->
+  <!-- CLAUDE CODE CLEANUP - KILL LINGERING EXTERNAL AGENT PROCESSES   -->
+  <!-- ================================================================ -->
+  <!-- If BMAD is running as Claude Code (not the external agent), clean up
+       any terminal processes from previous external agent code reviews.
+       This prevents context pollution from stale review sessions. -->
+  <check if="BMAD is running as Claude Code (detected via KillShell tool availability or CLAUDE_CODE env)">
+    <action>Check for any running background shells/terminals from previous external agent reviews</action>
+    <action>If any `claude -p` or similar external agent processes are found running, use KillShell to terminate them</action>
+    <action>This ensures a clean slate for the new code review session</action>
+    <output>🧹 Cleaned up previous external agent review processes (if any)</output>
+  </check>
+
   <!-- ================================================================ -->
   <!-- EXTERNAL AGENT MANDATE - THIS IS THE MOST IMPORTANT RULE        -->
   <!-- ================================================================ -->

From 81817672af2f24897443406f12783032d89cd4cd Mon Sep 17 00:00:00 2001
From: Scott Jennings <scott.jennings+CIGINT@cloudimperiumgames.com>
Date: Wed, 10 Dec 2025 19:06:24 -0600
Subject: [PATCH 5/7] fix: mandate killing background shells before code review

Change cleanup logic from "check and maybe kill" to "kill unconditionally".
The previous approach allowed agents to read stale output from lingering
external agent processes, causing context pollution and reuse of old data.

New behavior:
- Kill ALL background shells as the FIRST action
- Do NOT read output from stale shells (BashOutput forbidden)
- Do NOT check what processes are doing - just terminate them
- Only proceed to Step 1 after all shells are dead
---
 .../code-review/instructions.xml              | 26 ++++++++++++-------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/src/modules/bmm/workflows/4-implementation/code-review/instructions.xml b/src/modules/bmm/workflows/4-implementation/code-review/instructions.xml
index 6373ae04..28453411 100644
--- a/src/modules/bmm/workflows/4-implementation/code-review/instructions.xml
+++ b/src/modules/bmm/workflows/4-implementation/code-review/instructions.xml
@@ -5,17 +5,25 @@
   <critical>Generate all documents in {document_output_language}</critical>
 
   <!-- ================================================================ -->
-  <!-- CLAUDE CODE CLEANUP - KILL LINGERING EXTERNAL AGENT PROCESSES   -->
+  <!-- CLAUDE CODE CLEANUP - MANDATORY KILL OF ALL BACKGROUND SHELLS   -->
   <!-- ================================================================ -->
-  <!-- If BMAD is running as Claude Code (not the external agent), clean up
-       any terminal processes from previous external agent code reviews.
-       This prevents context pollution from stale review sessions. -->
-  <check if="BMAD is running as Claude Code (detected via KillShell tool availability or CLAUDE_CODE env)">
-    <action>Check for any running background shells/terminals from previous external agent reviews</action>
-    <action>If any `claude -p` or similar external agent processes are found running, use KillShell to terminate them</action>
-    <action>This ensures a clean slate for the new code review session</action>
-    <output>🧹 Cleaned up previous external agent review processes (if any)</output>
+  <!-- CRITICAL: Stale external agent processes cause context pollution.
+       You MUST kill ALL background shells BEFORE doing ANYTHING else.
+       DO NOT read their output. DO NOT check what they're doing.
+       Just KILL them unconditionally. -->
+  <critical>🚨 FIRST ACTION REQUIRED: Kill all background shells 🚨</critical>
+  <critical>Before ANY other workflow step, you MUST terminate all background processes.</critical>
+  <critical>DO NOT use BashOutput to read from stale shells - this causes context pollution.</critical>
+  <critical>DO NOT try to "check what they're doing" - just kill them.</critical>
+
+  <check if="BMAD is running as Claude Code (KillShell tool is available)">
+    <action>IMMEDIATELY use KillShell to terminate ALL background shells - no exceptions</action>
+    <action>Do NOT read output from these shells first - kill them blind</action>
+    <action>Do NOT rationalize keeping them alive - they contain stale data</action>
+    <action>If multiple shells exist, kill ALL of them one by one</action>
+    <output>🔪 Killed all background shells to prevent stale data contamination</output>
   </check>
+  <critical>Only proceed to Step 1 AFTER all background shells are terminated.</critical>
 
   <!-- ================================================================ -->
   <!-- EXTERNAL AGENT MANDATE - THIS IS THE MOST IMPORTANT RULE        -->

From 57f59b2e5ca4918b4184fe23a45362b570b63c7e Mon Sep 17 00:00:00 2001
From: Scott Jennings <scott.jennings+CIGINT@cloudimperiumgames.com>
Date: Sat, 13 Dec 2025 07:17:28 -0600
Subject: [PATCH 6/7] fix: address Copilot review feedback for external agent
 code review

- Rename misleading "CLAUDE CODE CLEANUP" comment to generic "SHELL CLEANUP"
- Remove unused external_prompt variable loading action
- Increase external agent timeout from 5 to 10 minutes for complex reviews
- Fix Review Method output to show actual method used instead of "OR"
- Restore web_bundle: false setting needed by web bundler
---
 .../4-implementation/code-review/instructions.xml     | 11 +++++------
 .../4-implementation/code-review/workflow.yaml        |  1 +
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/modules/bmm/workflows/4-implementation/code-review/instructions.xml b/src/modules/bmm/workflows/4-implementation/code-review/instructions.xml
index 5436efe0..87100fef 100644
--- a/src/modules/bmm/workflows/4-implementation/code-review/instructions.xml
+++ b/src/modules/bmm/workflows/4-implementation/code-review/instructions.xml
@@ -5,7 +5,7 @@
   <critical>Generate all documents in {document_output_language}</critical>
 
   <!-- ================================================================ -->
-  <!-- CLAUDE CODE CLEANUP - MANDATORY KILL OF ALL BACKGROUND SHELLS   -->
+  <!-- SHELL CLEANUP - MANDATORY KILL OF ALL BACKGROUND SHELLS         -->
   <!-- ================================================================ -->
   <!-- CRITICAL: Stale external agent processes cause context pollution.
        You MUST kill ALL background shells BEFORE doing ANYTHING else.
@@ -203,19 +203,18 @@
 
       <!-- External agent prompt is loaded from external-agent-prompt.md -->
       <set-var name="external_prompt_file" value="{installed_path}/external-agent-prompt.md" />
-      <action>Load {{external_prompt_file}} content into {{external_prompt}}</action>
 
       <check if="{{external_agent_cmd}} == 'codex'">
         <critical>CODEX: Use codex exec with read-only sandbox and full-auto</critical>
-        <invoke-bash cmd="codex exec --sandbox read-only --full-auto &quot;$(cat '{{external_prompt_file}}')&quot;" timeout="300000" />
+        <invoke-bash cmd="codex exec --sandbox read-only --full-auto &quot;$(cat '{{external_prompt_file}}')&quot;" timeout="600000" />
       </check>
       <check if="{{external_agent_cmd}} == 'gemini'">
         <critical>GEMINI: Use gemini -p with prompt from file and --yolo</critical>
-        <invoke-bash cmd="gemini -p &quot;$(cat '{{external_prompt_file}}')&quot; --yolo" timeout="300000" />
+        <invoke-bash cmd="gemini -p &quot;$(cat '{{external_prompt_file}}')&quot; --yolo" timeout="600000" />
       </check>
       <check if="{{external_agent_cmd}} == 'claude'">
         <critical>CLAUDE: Use claude -p with prompt from file</critical>
-        <invoke-bash cmd="claude -p &quot;$(cat '{{external_prompt_file}}')&quot; --dangerously-skip-permissions" timeout="300000" />
+        <invoke-bash cmd="claude -p &quot;$(cat '{{external_prompt_file}}')&quot; --dangerously-skip-permissions" timeout="600000" />
       </check>
 
       <check if="{{bash_exit_code}} != 0 OR {{bash_stdout}} is empty">
@@ -297,7 +296,7 @@
     <output>**🔥 CODE REVIEW FINDINGS, {user_name}!**
 
       **Story:** {{story_file}}
-      **Review Method:** {{external_agent_cmd}} OR built-in
+      **Review Method:** {{#if external_agent_cmd}}{{external_agent_cmd}} CLI{{else}}built-in{{/if}}
       **Git vs Story Discrepancies:** {{git_discrepancy_count}} found
       **Issues Found:** {{high_count}} High, {{medium_count}} Medium, {{low_count}} Low
 
diff --git a/src/modules/bmm/workflows/4-implementation/code-review/workflow.yaml b/src/modules/bmm/workflows/4-implementation/code-review/workflow.yaml
index 8cdc0fe1..9808e43e 100644
--- a/src/modules/bmm/workflows/4-implementation/code-review/workflow.yaml
+++ b/src/modules/bmm/workflows/4-implementation/code-review/workflow.yaml
@@ -57,3 +57,4 @@ input_file_patterns:
     load_strategy: "INDEX_GUIDED"
 
 standalone: true
+web_bundle: false

From 195664029d9dd5bdc06b19602cfbf787aea457ce Mon Sep 17 00:00:00 2001
From: Scott Jennings <scott.jennings+CIGINT@cloudimperiumgames.com>
Date: Sat, 13 Dec 2025 20:29:11 -0600
Subject: [PATCH 7/7] feat: add triage step to code review workflow

External agents find ALL possible issues, but not all are relevant.
This new step 4 evaluates findings against project context:
- Actually Important: AC violations, bugs, real security issues
- Context-Dependent: Performance, validation strictness (asks user)
- Theoretical/Nitpicking: Micro-optimizations, style preferences (skips)

This enables pragmatic code review that focuses on what matters.
---
 .../code-review/instructions.xml              | 145 +++++++++++++++---
 1 file changed, 121 insertions(+), 24 deletions(-)

diff --git a/src/modules/bmm/workflows/4-implementation/code-review/instructions.xml b/src/modules/bmm/workflows/4-implementation/code-review/instructions.xml
index 87100fef..eddb61eb 100644
--- a/src/modules/bmm/workflows/4-implementation/code-review/instructions.xml
+++ b/src/modules/bmm/workflows/4-implementation/code-review/instructions.xml
@@ -288,38 +288,135 @@
     </check>
   </step>
 
-  <step n="4" goal="Present findings and fix them">
-    <action>Categorize findings: HIGH (must fix), MEDIUM (should fix), LOW (nice to fix)</action>
+  <!-- ================================================================ -->
+  <!-- STEP 4: TRIAGE FINDINGS - SEPARATE SIGNAL FROM NOISE            -->
+  <!-- ================================================================ -->
+  <!-- This step evaluates external agent findings with project context.
+       External agents find ALL possible issues - but not all matter.
+       A symlink bypass is critical for a web API, irrelevant for a game.
+       The orchestrating agent applies pragmatic judgment here. -->
+
+  <step n="4" goal="Triage findings - separate signal from noise">
+    <critical>External agents are adversarial by design - they find EVERYTHING.</critical>
+    <critical>Your job: Apply project context to determine what ACTUALLY matters.</critical>
+    <critical>Do NOT blindly accept all findings. Do NOT dismiss valid concerns.</critical>
+
+    <action>For EACH finding from {{all_findings}}, evaluate against these criteria:</action>
+
+    <!-- Evaluation Framework -->
+    <action>**ACTUALLY IMPORTANT** (always fix):
+      - AC violations: Story claims something works but it doesn't
+      - Task fraud: Checkbox marked [x] but code doesn't exist
+      - Contract violations: Method signature doesn't match documented behavior
+      - Real bugs: Code will fail at runtime in normal usage
+      - Security issues for the ACTUAL threat model (game loading own files ≠ public API)
+    </action>
+
+    <action>**CONTEXT-DEPENDENT** (ask user):
+      - Performance: Does it matter at this scale? Game data vs. million-record DB
+      - Validation strictness: Nice-to-have vs. actually needed
+      - Edge cases: Will they ever happen in practice?
+      - Thread safety: Is this actually multi-threaded?
+    </action>
+
+    <action>**THEORETICAL/NITPICKING** (skip unless user insists):
+      - "Could be exploited if attacker controls X" when attacker never controls X
+      - Micro-optimizations that save nanoseconds
+      - Style preferences disguised as bugs
+      - "Best practice" violations that don't cause problems
+      - DoS concerns for trusted internal data
+    </action>
+
+    <!-- Build categorized lists -->
+    <set-var name="important_findings" value="[]" />
+    <set-var name="contextual_findings" value="[]" />
+    <set-var name="theoretical_findings" value="[]" />
+
+    <action>Categorize each finding into one of the three lists with brief reasoning</action>
+
+    <!-- Present triage to user via AskUserQuestion -->
+    <output>**🔍 FINDINGS TRIAGE**
+
+I've reviewed {{external_agent_cmd}}'s findings against your project context.
+Here's what I think actually matters vs. theoretical concerns:
+    </output>
+
+    <!-- Important findings - recommend fixing -->
+    <check if="{{important_findings}} is not empty">
+      <output>
+## ✅ ACTUALLY IMPORTANT (Recommend Fix)
+These are real issues that affect correctness or violate documented contracts:
+
+{{#each important_findings}}
+- **{{this.id}}**: {{this.summary}}
+  - *Why it matters*: {{this.reasoning}}
+{{/each}}
+      </output>
+    </check>
+
+    <!-- Context-dependent - ask user -->
+    <check if="{{contextual_findings}} is not empty">
+      <ask questions="[
+        {
+          'question': 'Which context-dependent issues should we address?',
+          'header': 'Fix these?',
+          'multiSelect': true,
+          'options': [
+            {{#each contextual_findings}}
+            {
+              'label': '{{this.id}}: {{this.short_summary}}',
+              'description': '{{this.reasoning}}'
+            },
+            {{/each}}
+            {
+              'label': 'None of these',
+              'description': 'Skip all context-dependent issues'
+            }
+          ]
+        }
+      ]" />
+      <action>Add user-selected contextual findings to {{important_findings}}</action>
+    </check>
+
+    <!-- Theoretical findings - inform user but don't push -->
+    <check if="{{theoretical_findings}} is not empty">
+      <output>
+## ⏭️ SKIPPING (Theoretical/Nitpicking)
+These findings are technically valid but don't matter for your use case:
+
+{{#each theoretical_findings}}
+- **{{this.id}}**: {{this.summary}} — *{{this.reasoning}}*
+{{/each}}
+
+*(Say "fix all" if you want these addressed anyway)*
+      </output>
+    </check>
+
+    <!-- Final confirmation -->
+    <set-var name="final_fix_list" value="{{important_findings}}" />
+    <output>
+**📋 FINAL FIX LIST: {{final_fix_list.length}} issues**
+    </output>
+  </step>
+
+  <step n="5" goal="Present findings and fix them">
     <action>Set {{fixed_count}} = 0</action>
     <action>Set {{action_count}} = 0</action>
 
-    <output>**🔥 CODE REVIEW FINDINGS, {user_name}!**
+    <output>**🔥 CODE REVIEW SUMMARY, {user_name}!**
 
       **Story:** {{story_file}}
       **Review Method:** {{#if external_agent_cmd}}{{external_agent_cmd}} CLI{{else}}built-in{{/if}}
-      **Git vs Story Discrepancies:** {{git_discrepancy_count}} found
-      **Issues Found:** {{high_count}} High, {{medium_count}} Medium, {{low_count}} Low
+      **Raw Findings:** {{all_findings.length}} total
+      **After Triage:** {{final_fix_list.length}} to address, {{theoretical_findings.length}} skipped
 
-      ## 🔴 CRITICAL ISSUES
-      - Tasks marked [x] but not actually implemented
-      - Acceptance Criteria not implemented
-      - Story claims files changed but no git evidence
-      - Security vulnerabilities
-
-      ## 🟡 MEDIUM ISSUES
-      - Files changed but not documented in story File List
-      - Uncommitted changes not tracked
-      - Performance problems
-      - Poor test coverage/quality
-      - Code maintainability issues
-
-      ## 🟢 LOW ISSUES
-      - Code style improvements
-      - Documentation gaps
-      - Git commit message quality
+      ## Issues to Fix
+      {{#each final_fix_list}}
+      - [{{this.severity}}] {{this.id}}: {{this.summary}}
+      {{/each}}
     </output>
 
-    <ask>What should I do with these issues?
+    <ask>What should I do with these {{final_fix_list.length}} issues?
 
       1. **Fix them automatically** - I'll update the code and tests
       2. **Create action items** - Add to story Tasks/Subtasks for later
@@ -349,7 +446,7 @@
     </check>
   </step>
 
-  <step n="5" goal="Update story status and sync sprint tracking">
+  <step n="6" goal="Update story status and sync sprint tracking">
     <!-- Determine new status based on review outcome -->
     <check if="all HIGH and MEDIUM issues fixed AND all ACs implemented">
       <action>Set {{new_status}} = "done"</action>