BMAD-METHOD/core/tools/context-extractor.xml

172 lines
6.9 KiB
XML

<?xml version="1.0" encoding="UTF-8"?>
<tool id="context-extractor" name="Context Extractor" standalone="true">
<description>Extract key context from codebase for AI agent consumption (patterns, conventions, critical files)</description>
<parameters>
<param name="path" required="false" default="." description="Project root path"/>
<param name="focus" required="false" default="all" description="Focus area for extraction">
<allowed_values>
<value name="all" description="Extract all context areas"/>
<value name="patterns" description="Focus on coding patterns only"/>
<value name="dependencies" description="Focus on dependencies only"/>
<value name="structure" description="Focus on project structure only"/>
<value name="conventions" description="Focus on coding conventions only"/>
</allowed_values>
<constraint>Invalid values MUST produce a validation error before extraction begins</constraint>
</param>
<param name="exclude" required="false" default=".git,.env*,node_modules,dist,build,_bmad-output,.venv,coverage,*.key,*.pem,credentials*,secrets*" description="Comma-separated glob patterns for files/directories to skip during extraction (e.g., '.git,node_modules,dist'); used to exclude build artifacts, dependencies, and sensitive files"/>
<param name="max_tokens" required="false" default="4000" description="Maximum token budget for output"/>
</parameters>
<security>
<note>Secret-like files MUST be redacted from any output. This includes but is not limited to:</note>
<redact_patterns>
<pattern>.env, .env.*, *.env</pattern>
<pattern>*.key, *.pem, *.p12, *.pfx</pattern>
<pattern>credentials.*, secrets.*, *_secret*</pattern>
<pattern>API keys, tokens, passwords in any file</pattern>
</redact_patterns>
<requirement>Implementations MUST perform redaction checks before returning any content</requirement>
<requirement>If sensitive content is detected, either redact inline (replace with [REDACTED]) or exclude the file entirely</requirement>
</security>
<extraction_areas>
<area name="project_structure">
<description>Key directories and their purposes</description>
<sources>
<source>Directory listing (depth 3)</source>
<source>README.md</source>
<source>package.json / setup.py / Cargo.toml</source>
</sources>
</area>
<area name="coding_patterns">
<description>Common patterns used in the codebase</description>
<sources>
<source>Import patterns</source>
<source>Naming conventions</source>
<source>Error handling patterns</source>
<source>Async patterns</source>
</sources>
</area>
<area name="dependencies">
<description>Key dependencies and their usage</description>
<sources>
<source>package.json dependencies</source>
<source>Import frequency analysis</source>
</sources>
</area>
<area name="conventions">
<description>Coding conventions and style</description>
<sources>
<source>.eslintrc / .prettierrc</source>
<source>tsconfig.json / jsconfig.json</source>
<source>editorconfig</source>
<source>Observed patterns in code</source>
</sources>
</area>
<area name="critical_files">
<description>Most important files to understand</description>
<sources>
<source>Entry points (index, main, app)</source>
<source>Configuration files</source>
<source>Type definitions</source>
<source>Shared utilities</source>
</sources>
</area>
</extraction_areas>
<execution>
<step n="0" goal="Validate parameters and apply exclusions">
<action>Validate focus parameter against allowed_values (all, patterns, dependencies, structure, conventions)</action>
<action if="focus not in allowed_values">
HALT with error: "Invalid focus value '{focus}'. Allowed values: all, patterns, dependencies, structure, conventions"
</action>
<action>Parse exclude patterns from comma-separated list</action>
<action>Build exclusion filter to skip matching files/directories BEFORE any read operations</action>
<note>Exclusions MUST be applied prior to reading any files to prevent accidental exposure</note>
</step>
<step n="1" goal="Scan project structure">
<action>List directories up to depth 3 (applying exclude patterns)</action>
<action>Identify key directory patterns (src, lib, tests, etc.)</action>
<action>Note technology indicators (package.json, Cargo.toml, etc.)</action>
<action>Skip any paths matching exclude patterns</action>
</step>
<step n="2" goal="Analyze entry points">
<action>Find entry files (index.ts, main.py, main.go, etc.)</action>
<action>Extract high-level architecture from imports</action>
<action>Skip files matching exclude patterns</action>
</step>
<step n="3" goal="Extract conventions">
<action>Parse linter/formatter configs (skip if in exclude list)</action>
<action>Sample 10 representative files for pattern analysis (exclude sensitive files)</action>
<action>Identify naming conventions (camelCase, snake_case, etc.)</action>
</step>
<step n="4" goal="Analyze dependencies">
<action>Extract key dependencies from package manager files</action>
<action>Identify most-imported modules</action>
<action>Note framework/library choices</action>
</step>
<step n="5" goal="Redaction check (mandatory pre-return)">
<action>Scan all collected content for sensitive patterns:
- API keys (patterns like sk-*, AKIA*, etc.)
- Tokens (JWT, bearer tokens, auth tokens)
- Passwords or secrets in config
- Private keys or certificates
</action>
<action if="sensitive content detected">
Either replace with [REDACTED] or exclude the content entirely
</action>
<note>This step is MANDATORY and must complete before any output is returned</note>
</step>
<step n="6" goal="Generate context document">
<action>Compile findings within max_tokens budget</action>
<action>Prioritize most critical information</action>
<action>Format for AI agent consumption</action>
<action>Final verification: ensure no excluded or sensitive content in output</action>
</step>
</execution>
<output>
```markdown
# Project Context
## Technology Stack
- Language: {language}
- Framework: {framework}
- Key Libraries: {libraries}
## Project Structure
```
{directory_tree}
```
## Entry Points
{entry_points}
## Coding Conventions
- Naming: {naming_convention}
- Formatting: {formatting_rules}
- Imports: {import_pattern}
## Key Patterns
{patterns}
## Critical Files
{critical_files}
## Dependencies
{key_dependencies}
```
</output>
</tool>