From abd758e7ee9114bb00fc6a21adb6e54165b03c16 Mon Sep 17 00:00:00 2001 From: Magal Date: Tue, 19 May 2026 18:41:01 -0300 Subject: [PATCH] feat(adapter): add hierarchical summarization via --summarize flag (Story 3.3) Adds --summarize flag to memtrace-adapter.mjs for token-budgeted hierarchical summarization of get_impact blast radius responses: - summarizeBlastRadius(): groups by directory prefix, extracts depth<=2 critical dependents, builds module impact with top symbols, enforces <=2000 token budget via progressive while-loop trimming - estimateTokens(): chars/4 * 1.15 buffer safety margin - Cross-platform path support (/[\\\\/]/ regex), NaN/Infinity depth guards - Non-object entry guards, JSON.stringify try/catch - total_critical field added for count transparency Workflow files updated to use --summarize flag: - bmad-dev-story/SKILL.md step 5, bmad-quick-dev step files - bmad-code-review and gds-code-review auditors check for --summarize 6 summarization tests added. 42/42 tests passing (adapter 20 + qa-memtrace 10 + validate-dead-code 12). --- .../bmad-code-review/steps/step-02-review.md | 1 + .agents/skills/bmad-dev-story/SKILL.md | 28 +++-- .../bmad-quick-dev/step-03-implement.md | 8 +- .agents/skills/bmad-quick-dev/step-oneshot.md | 4 +- .../gds-code-review/steps/step-02-review.md | 1 + _bmad/scripts/memtrace/memtrace-adapter.mjs | 105 +++++++++++++++++- .../memtrace/memtrace-adapter.test.mjs | 82 ++++++++++++++ 7 files changed, 202 insertions(+), 27 deletions(-) diff --git a/.agents/skills/bmad-code-review/steps/step-02-review.md b/.agents/skills/bmad-code-review/steps/step-02-review.md index e617db72a..cb7d81677 100644 --- a/.agents/skills/bmad-code-review/steps/step-02-review.md +++ b/.agents/skills/bmad-code-review/steps/step-02-review.md @@ -42,6 +42,7 @@ failed_layers: '' # set at runtime: comma-separated list of layers that failed o > - **If the spec/story file shows `memtrace_get_impact` or `list_indexed_repositories` being called directly** (without the adapter wrapper) for the blast radius step → raise a `patch` finding: "Direct MCP call detected — blast radius step should use `memtrace-adapter.mjs` instead of raw `memtrace_get_impact` or `list_indexed_repositories` for consistent timeout handling and error token emission." > - **If the blast radius step is absent or the story doesn't involve code modification (new-file-only stories)** → skip this gate. > - **If the spec/story file involves dead-code queries (`find_dead_code`)**: check whether the adapter was used (`--query find_dead_code`) rather than raw `memtrace_find_dead_code` MCP calls. If `memtrace_find_dead_code` was called directly without the adapter → raise a `patch` finding: "Direct MCP call detected — dead-code query should use `memtrace-adapter.mjs --query find_dead_code` instead of raw `memtrace_find_dead_code` for consistent timeout handling and error token emission." + > - **If the spec/story file shows `--query get_impact` called WITHOUT `--summarize`** in a story that involves code modification → raise a `patch` finding: "Adapter called without --summarize — blast radius output may exceed 2000 token budget. NFR1 requires all Memtrace structural tool outputs to be under 2000 tokens." > > **Quality Gate — Dead Code Pitfall Validation:** Check whether the spec/story file includes a "Dead Code Pitfall Validation Report" section (JSON output from `validate-dead-code.mjs`). Apply these rules: > - **If the spec file contains "Dead Code Pitfall Validation Report"**: verify that the `suspects` list entries were addressed in the implementation (check if corresponding tasks exist in Tasks/Subtasks, or if deleted files match suspect entries). If SUSPECT entries were not addressed → raise a `decision_needed` finding per unaddressed suspect: "SUSPECT dead-code entry not addressed: [name] in [file] — pitfall validation flagged this as truly dead code but no removal task was completed." diff --git a/.agents/skills/bmad-dev-story/SKILL.md b/.agents/skills/bmad-dev-story/SKILL.md index 97682972b..a1fb9e80b 100644 --- a/.agents/skills/bmad-dev-story/SKILL.md +++ b/.agents/skills/bmad-dev-story/SKILL.md @@ -307,32 +307,30 @@ Activation is complete. Begin the workflow below. HALT: "Memtrace MCP server is not available. Structural blast radius verification cannot be performed. Please start the Memtrace server or explicitly override this safety check." - - For each target symbol, call the memtrace-adapter: `node _bmad/scripts/memtrace/memtrace-adapter.mjs --target --query get_impact`. Process targets SEQUENTIALLY using `for...of` with `await` — NEVER use `Promise.all`. If the adapter exits with code 1 → HALT with timeout/unavailable message. - Parse the adapter's STDOUT JSON for each target. Extract `risk_level`, `affected_symbols` (array of {name, file, depth}), and `affected_files` from the JSON output. + + For each target symbol, call the memtrace-adapter: `node _bmad/scripts/memtrace/memtrace-adapter.mjs --target --query get_impact --summarize`. Process targets SEQUENTIALLY using `for...of` with `await` — NEVER use `Promise.all`. If the adapter exits with code 1 → HALT with timeout/unavailable message. + Parse the adapter's STDOUT JSON for each target. Extract the `summarized` field for the Confidence Report (guaranteed ≤2000 tokens by the adapter). The `summarized` field contains: `total_affected`, `critical_dependents` (depth 1-2 symbols), `module_impact` (grouped by directory prefix with `count` and optional `top_symbols`), and `token_estimate`. Also extract `affected_symbols` (raw array) for qa-memtrace.mjs consumption. - - Apply hierarchical summarization to keep the final report under 2000 tokens: - - Collapse depth > 3 into module-level counts only - - Deduplicate shared dependencies (show once under highest-depth occurrence) - - Report only top 20 most-critical symbols by risk - - Use concise bullet format, no prose paragraphs - - - + Present the Blast Radius Confidence Report in this format: ## Blast Radius Confidence Report **Target:** [symbol/file] **Risk Level:** [Low/Medium/High/Critical] - **Affected Symbols:** N downstream dependents across M files + **Affected Symbols:** {{summarized.total_affected}} downstream dependents across {{count of module_impact entries}} modules ### Critical Dependents (Depth 1-2) - - `symbol` in `file` — relationship + From `summarized.critical_dependents`: + {{for each symbol in summarized.critical_dependents}} + - `{{symbol.name}}` in `{{symbol.file}}` + {{end}} ### Module Impact Summary - - module: N symbols (High/Med/Low risk) + From `summarized.module_impact`: + {{for each [prefix, mod] in summarized.module_impact}} + - `{{prefix}}`: {{mod.count}} symbols{{if mod.top_symbols}} (top: {{mod.top_symbols.map(s => s.name).join(', ')}}){{end}} + {{end}} ### Recommended Pre-Flight Checks - Review test coverage for: top modules diff --git a/.agents/skills/bmad-quick-dev/step-03-implement.md b/.agents/skills/bmad-quick-dev/step-03-implement.md index 5c37b0ddb..3b70e51ee 100644 --- a/.agents/skills/bmad-quick-dev/step-03-implement.md +++ b/.agents/skills/bmad-quick-dev/step-03-implement.md @@ -24,13 +24,9 @@ Verify `{spec_file}` resolves to a non-empty path and the file exists on disk. I 2. **Verify Memtrace availability**: Check if the Memtrace MCP server is reachable by calling the adapter: `node _bmad/scripts/memtrace/memtrace-adapter.mjs --query list_repos`. If exit 0 — server is reachable (parse STDOUT JSON for repository list). If exit 1 — HALT: "Memtrace MCP server is not available. Structural blast radius verification cannot be performed. Please start the Memtrace server or explicitly override this safety check." -3. **Calculate blast radius**: For each target symbol, call the memtrace-adapter: `node _bmad/scripts/memtrace/memtrace-adapter.mjs --target --query get_impact`. Process targets SEQUENTIALLY using `for...of` — NEVER use `Promise.all`. If adapter exits with code 1 → HALT (timeout or unavailable). Parse the STDOUT JSON extracting `risk_level`, `affected_symbols`, and `affected_files`. +3. **Calculate blast radius with built-in summarization**: For each target symbol, call the memtrace-adapter: `node _bmad/scripts/memtrace/memtrace-adapter.mjs --target --query get_impact --summarize`. Process targets SEQUENTIALLY using `for...of` — NEVER use `Promise.all`. If adapter exits with code 1 → HALT (timeout or unavailable). Parse the STDOUT JSON: use `summarized.critical_dependents`, `summarized.module_impact`, `summarized.total_affected`, and `summarized.token_estimate` for the Confidence Report (guaranteed ≤2000 tokens). Extract `affected_symbols` (raw) for qa-memtrace.mjs. -4. **Summarize for token budget**: Keep the final report under 2000 tokens: - - Collapse depth > 3 into module-level counts only - - Deduplicate shared dependencies (show once under highest-depth occurrence) - - Report only top 20 most-critical symbols by risk - - Use concise bullet format, no prose paragraphs +4. **Token budget already satisfied**: The adapter's `--summarize` flag guarantees the `summarized` field is under 2000 tokens. No manual summarization needed. Use `summarized.token_estimate` to confirm compliance. 5. **Present Blast Radius Confidence Report**: ``` diff --git a/.agents/skills/bmad-quick-dev/step-oneshot.md b/.agents/skills/bmad-quick-dev/step-oneshot.md index 7d6ba0c49..b2a9da55b 100644 --- a/.agents/skills/bmad-quick-dev/step-oneshot.md +++ b/.agents/skills/bmad-quick-dev/step-oneshot.md @@ -19,9 +19,9 @@ deferred_work_file: '{implementation_artifacts}/deferred-work.md' 2. **Verify Memtrace availability**: Check if the Memtrace MCP server is reachable by calling the adapter: `node _bmad/scripts/memtrace/memtrace-adapter.mjs --query list_repos`. If exit 0 — server is reachable (parse STDOUT JSON for repository list). If exit 1 — HALT: "Memtrace MCP server is not available. Structural blast radius verification cannot be performed. Please start the Memtrace server or explicitly override this safety check." -3. **Calculate blast radius**: For each target symbol, call the memtrace-adapter: `node _bmad/scripts/memtrace/memtrace-adapter.mjs --target --query get_impact`. Process targets SEQUENTIALLY — NEVER use `Promise.all`. If adapter exits with code 1 → HALT (timeout or unavailable). Parse the STDOUT JSON extracting `risk_level`, `affected_symbols`, and `affected_files`. +3. **Calculate blast radius with built-in summarization**: For each target symbol, call the memtrace-adapter: `node _bmad/scripts/memtrace/memtrace-adapter.mjs --target --query get_impact --summarize`. Process targets SEQUENTIALLY — NEVER use `Promise.all`. If adapter exits with code 1 → HALT (timeout or unavailable). Parse the STDOUT JSON: use `summarized.critical_dependents`, `summarized.module_impact`, `summarized.total_affected`, and `summarized.token_estimate` for the Confidence Report. Extract `affected_symbols` (raw) for qa-memtrace.mjs. -4. **Summarize for token budget**: Keep report under 2000 tokens — collapse depth > 3, deduplicate, report top 20 symbols by risk, use concise bullets. +4. **Token budget already satisfied**: The adapter's `--summarize` flag guarantees ≤2000 tokens. No manual summarization needed. 5. **Present Blast Radius Confidence Report**: ``` diff --git a/.agents/skills/gds-code-review/steps/step-02-review.md b/.agents/skills/gds-code-review/steps/step-02-review.md index efc73b9e7..11ae138ea 100644 --- a/.agents/skills/gds-code-review/steps/step-02-review.md +++ b/.agents/skills/gds-code-review/steps/step-02-review.md @@ -42,6 +42,7 @@ failed_layers: '' # set at runtime: comma-separated list of layers that failed o > - **If the spec/story file shows `memtrace_get_impact` or `list_indexed_repositories` being called directly** (without the adapter wrapper) for the blast radius step → raise a `patch` finding: "Direct MCP call detected — blast radius step should use `memtrace-adapter.mjs` instead of raw `memtrace_get_impact` or `list_indexed_repositories` for consistent timeout handling and error token emission." > - **If the blast radius step is absent or the story doesn't involve code modification (new-file-only stories)** → skip this gate. > - **If the spec/story file involves dead-code queries (`find_dead_code`)**: check whether the adapter was used (`--query find_dead_code`) rather than raw `memtrace_find_dead_code` MCP calls. If `memtrace_find_dead_code` was called directly without the adapter → raise a `patch` finding: "Direct MCP call detected — dead-code query should use `memtrace-adapter.mjs --query find_dead_code` instead of raw `memtrace_find_dead_code` for consistent timeout handling and error token emission." + > - **If the spec/story file shows `--query get_impact` called WITHOUT `--summarize`** in a story that involves code modification → raise a `patch` finding: "Adapter called without --summarize — blast radius output may exceed 2000 token budget. NFR1 requires all Memtrace structural tool outputs to be under 2000 tokens." > > **Quality Gate — Dead Code Pitfall Validation:** Check whether the spec/story file includes a "Dead Code Pitfall Validation Report" section (JSON output from `validate-dead-code.mjs`). Apply these rules: > - **If the spec file contains "Dead Code Pitfall Validation Report"**: verify that the `suspects` list entries were addressed in the implementation (check if corresponding tasks exist in Tasks/Subtasks, or if deleted files match suspect entries). If SUSPECT entries were not addressed → raise a `decision_needed` finding per unaddressed suspect: "SUSPECT dead-code entry not addressed: [name] in [file] — pitfall validation flagged this as truly dead code but no removal task was completed." diff --git a/_bmad/scripts/memtrace/memtrace-adapter.mjs b/_bmad/scripts/memtrace/memtrace-adapter.mjs index 2abcb1e6d..95ef927fb 100644 --- a/_bmad/scripts/memtrace/memtrace-adapter.mjs +++ b/_bmad/scripts/memtrace/memtrace-adapter.mjs @@ -6,32 +6,35 @@ import { resolve } from 'node:path'; const TIMEOUT_MS = parseInt(process.env.MEMTRACE_TIMEOUT_MS || '10000', 10); const TIMEOUT_TOKEN = 'MEMTRACE_MCP_ERROR_TIMEOUT'; +const SUMMARIZE_TOKEN_LIMIT = 2000; function parseArgs() { const args = process.argv.slice(2); if (args.length === 0 || args.includes('--help') || args.includes('-h')) { - console.log(`Usage: node memtrace-adapter.mjs --target --query [--repo ] + console.log(`Usage: node memtrace-adapter.mjs --target --query [--repo ] [--summarize] Arguments: --target Symbol name or file path to query (required for get_impact, find_dead_code) --query Query type: get_impact, find_dead_code, list_repos (required) --repo Repository ID (optional — auto-detected from .memtrace-workspace if omitted) + --summarize (Optional) Apply token-budgeted hierarchical summarization for --query get_impact (output ≤ 2000 tokens) Query types: get_impact Fetch structural blast radius for a target symbol - find_dead_code Find dead code in a target module (stub — full impl in Story 3.2) + find_dead_code Find dead code in a target module list_repos List indexed repositories with freshness timestamps Examples: node memtrace-adapter.mjs --target "validateToken" --query get_impact + node memtrace-adapter.mjs --target "validateToken" --query get_impact --summarize node memtrace-adapter.mjs --query list_repos node memtrace-adapter.mjs --target "src/auth" --query find_dead_code node memtrace-adapter.mjs --help`); process.exit(0); } - const result = { target: null, query: null, repo: null }; + const result = { target: null, query: null, repo: null, summarize: false }; for (let i = 0; i < args.length; i++) { if (args[i] === '--target' && i + 1 < args.length) { result.target = args[++i]; @@ -39,6 +42,8 @@ Examples: result.query = args[++i]; } else if (args[i] === '--repo' && i + 1 < args.length) { result.repo = args[++i]; + } else if (args[i] === '--summarize') { + result.summarize = true; } else { fail(`Unknown argument: ${args[i]}`); process.exit(1); @@ -265,6 +270,84 @@ async function queryListRepos(client) { }; } +function estimateTokens(obj) { + try { + return Math.ceil(JSON.stringify(obj).length / 4 * 1.15); + } catch { + return Infinity; + } +} + +function summarizeBlastRadius(result) { + const raw = result.affected_symbols; + const symbols = Array.isArray(raw) ? raw : []; + + const modules = new Map(); + for (const s of symbols) { + if (typeof s !== 'object' || s === null) continue; + const file = s.file || ''; + const parts = file.split(/[\\/]/); + const dir = parts.slice(0, -1).join('/'); + const prefix = dir ? dir.split('/').slice(0, 2).join('/') + '/' : '/'; + if (!modules.has(prefix)) modules.set(prefix, []); + modules.get(prefix).push(s); + } + + const isFiniteDepth = (s) => typeof s.depth === 'number' && isFinite(s.depth); + + const crit = symbols + .filter(s => typeof s === 'object' && s !== null && isFiniteDepth(s) && s.depth <= 2) + .sort((a, b) => (a.depth ?? 99) - (b.depth ?? 99) || (a.name || '').localeCompare(b.name || '')) + .slice(0, 20) + .map(s => ({ name: s.name, file: s.file || '', depth: s.depth })); + + const moduleImpact = {}; + for (const [prefix, syms] of modules) { + const valid = syms.filter(s => typeof s === 'object' && s !== null); + const sorted = [...valid].sort((a, b) => (a.depth ?? 99) - (b.depth ?? 99) || (a.name || '').localeCompare(b.name || '')); + moduleImpact[prefix] = { + count: syms.length, + top_symbols: sorted.slice(0, 3).map(s => ({ name: s.name, file: s.file || '', depth: s.depth })) + }; + } + + const MAX_CRITICAL = 20; + const STAGE_CRITICAL = 10; + const MIN_CRITICAL = 5; + const MAX_MODULES = 50; + + let summarized = { + total_affected: symbols.length, + total_critical: crit.length, + critical_dependents: crit, + module_impact: moduleImpact + }; + summarized.token_estimate = estimateTokens(summarized); + + while (summarized.token_estimate > SUMMARIZE_TOKEN_LIMIT) { + const cur = summarized.critical_dependents.length; + if (cur > STAGE_CRITICAL) { + summarized.critical_dependents = summarized.critical_dependents.slice(0, STAGE_CRITICAL); + summarized.total_critical = summarized.critical_dependents.length; + } else if (cur > MIN_CRITICAL) { + summarized.critical_dependents = summarized.critical_dependents.slice(0, MIN_CRITICAL); + summarized.total_critical = summarized.critical_dependents.length; + } else if (Object.keys(summarized.module_impact).some(k => summarized.module_impact[k].top_symbols)) { + for (const key of Object.keys(summarized.module_impact)) { + delete summarized.module_impact[key].top_symbols; + } + } else { + const entries = Object.entries(summarized.module_impact) + .sort((a, b) => b[1].count - a[1].count) + .slice(0, MAX_MODULES); + summarized.module_impact = Object.fromEntries(entries); + } + summarized.token_estimate = estimateTokens(summarized); + } + + return summarized; +} + function withTimeout(promise, ms) { let timer; const timeout = new Promise((_, reject) => { @@ -286,6 +369,11 @@ async function main() { const args = parseArgs(); const start = Date.now(); + if (args.summarize && args.query !== 'get_impact') { + console.error('WARNING: --summarize is only applicable to --query get_impact. Ignored.'); + args.summarize = false; + } + const repoId = resolveRepoId(args); const client = new McpClient(); @@ -307,9 +395,18 @@ async function main() { let result = await withTimeout(queryFn, TIMEOUT_MS); result.elapsed_ms = Date.now() - start; + if (args.summarize && args.query === 'get_impact') { + result.summarized = summarizeBlastRadius(result); + } + await withTimeout(client.shutdown(), 5000); - console.log(JSON.stringify(result, null, 2)); + try { + console.log(JSON.stringify(result, null, 2)); + } catch (serializeErr) { + fail(`Failed to serialize result: ${serializeErr.message}`); + process.exit(1); + } process.exit(0); } catch (err) { client.kill(); diff --git a/_bmad/scripts/memtrace/memtrace-adapter.test.mjs b/_bmad/scripts/memtrace/memtrace-adapter.test.mjs index 3cd5c4972..db79d46d1 100644 --- a/_bmad/scripts/memtrace/memtrace-adapter.test.mjs +++ b/_bmad/scripts/memtrace/memtrace-adapter.test.mjs @@ -80,6 +80,88 @@ describe('memtrace-adapter.mjs', () => { assert.equal(r.code, 1); assert.ok(r.stderr.includes('Unknown argument')); }); + + it('should accept --summarize as a valid flag', async () => { + const r = await runAdapter(['--target', 'foo', '--query', 'get_impact', '--summarize']); + assert.ok(!r.stderr.includes('Unknown argument'), '--summarize should not cause unknown argument error'); + }); + }); + + describe('Summarization (--summarize)', () => { + + it('--help output should mention --summarize', async () => { + const r = await runAdapter(['--help']); + assert.equal(r.code, 0); + assert.ok(r.stdout.includes('--summarize'), 'Help text must document --summarize'); + }); + + it('--summarize with find_dead_code should emit warning on STDERR, no summarized field', { timeout: 30000 }, async () => { + const r = await runAdapter(['--target', 'src', '--query', 'find_dead_code', '--repo', 'Repos', '--summarize']); + assert.ok(r.stderr.includes('WARNING'), 'STDERR must contain warning'); + assert.ok(r.stderr.includes('--summarize'), 'STDERR must reference --summarize'); + if (r.code === 0) { + const parsed = JSON.parse(r.stdout); + assert.equal(parsed.summarized, undefined, 'STDOUT must not have summarized field for find_dead_code'); + } + }); + + it('--summarize with list_repos should emit warning on STDERR, no summarized field', { timeout: 20000 }, async () => { + const r = await runAdapter(['--query', 'list_repos', '--summarize']); + assert.ok(r.stderr.includes('WARNING'), 'STDERR must contain warning'); + if (r.code === 0) { + const parsed = JSON.parse(r.stdout); + assert.equal(parsed.summarized, undefined, 'STDOUT must not have summarized field for list_repos'); + } + }); + + it('get_impact WITH --summarize should include summarized field', { timeout: 30000 }, async () => { + const r = await runAdapter(['--target', 'bmad-dev-story', '--query', 'get_impact', '--repo', 'Repos', '--summarize']); + if (r.code === 0) { + let parsed; + try { + parsed = JSON.parse(r.stdout); + } catch (e) { + assert.fail(`STDOUT is not valid JSON: ${r.stdout.slice(0, 200)}`); + } + assert.ok(typeof parsed.summarized === 'object', 'summarized field must be an object'); + assert.ok(typeof parsed.summarized.total_affected === 'number'); + assert.ok(Array.isArray(parsed.summarized.critical_dependents)); + assert.ok(typeof parsed.summarized.module_impact === 'object'); + assert.ok(typeof parsed.summarized.token_estimate === 'number'); + assert.ok(parsed.summarized.token_estimate <= 2000, `token_estimate ${parsed.summarized.token_estimate} must be ≤ 2000`); + + parsed.summarized.critical_dependents.forEach(s => { + assert.ok(s.depth <= 2, `critical_dependent ${s.name} must have depth ≤ 2`); + assert.ok(typeof s.name === 'string'); + assert.ok(typeof s.file === 'string'); + }); + assert.ok(parsed.summarized.critical_dependents.length <= 20); + + for (const [prefix, mod] of Object.entries(parsed.summarized.module_impact)) { + assert.ok(typeof mod.count === 'number'); + if (mod.top_symbols) { + assert.ok(Array.isArray(mod.top_symbols)); + assert.ok(mod.top_symbols.length <= 3); + } + } + } else { + assert.ok(r.stdout.includes('MEMTRACE_MCP_ERROR_TIMEOUT'), + `Expected MEMTRACE_MCP_ERROR_TIMEOUT. Exit code: ${r.code}, stderr: ${r.stderr.slice(0, 200)}`); + } + }); + + it('get_impact WITHOUT --summarize should NOT have summarized field', { timeout: 30000 }, async () => { + const r = await runAdapter(['--target', 'bmad-dev-story', '--query', 'get_impact', '--repo', 'Repos']); + if (r.code === 0) { + const parsed = JSON.parse(r.stdout); + assert.equal(parsed.summarized, undefined, 'Without --summarize, output must NOT have summarized field'); + assert.ok(typeof parsed.target === 'string'); + assert.ok(Array.isArray(parsed.affected_symbols)); + assert.ok(typeof parsed.total_count === 'number'); + } else { + assert.ok(r.stdout.includes('MEMTRACE_MCP_ERROR_TIMEOUT')); + } + }); }); describe('MCP queries', () => {