diff --git a/.github/instructions/*.instructions.md b/.github/instructions/*.instructions.md new file mode 100644 index 000000000..ca19d7fd6 --- /dev/null +++ b/.github/instructions/*.instructions.md @@ -0,0 +1,3 @@ +# Rules +* Never creates PRs for altering code after review. Always offer a fix and the option to commit. +* Qualify the severity of the change requested. NORMAL | IMPROVEMENT | FIX | CRITICAL diff --git a/src/bmm/agents/analyst.agent.yaml b/src/bmm/agents/analyst.agent.yaml index 28120d098..de0b6a940 100644 --- a/src/bmm/agents/analyst.agent.yaml +++ b/src/bmm/agents/analyst.agent.yaml @@ -41,3 +41,7 @@ agent: - trigger: DP or fuzzy match on document-project workflow: "{project-root}/_bmad/bmm/workflows/document-project/workflow.yaml" description: "[DP] Document Project: Analyze an existing project to produce useful documentation for both human and LLM" + + - trigger: KS or fuzzy match on knowledge-sync + exec: "{project-root}/_bmad/bmm/workflows/4-implementation/genai-knowledge-sync/workflow.md" + description: "[KS] Knowledge Sync: Build a RAG-ready knowledge index from project artifacts for optimized AI agent retrieval" diff --git a/src/bmm/workflows/4-implementation/genai-knowledge-sync/knowledge-index-template.md b/src/bmm/workflows/4-implementation/genai-knowledge-sync/knowledge-index-template.md new file mode 100644 index 000000000..da3492d4c --- /dev/null +++ b/src/bmm/workflows/4-implementation/genai-knowledge-sync/knowledge-index-template.md @@ -0,0 +1,86 @@ +--- +project_name: '' +user_name: '' +date: '' +total_chunks: 0 +sources_indexed: 0 +tag_vocabulary_size: 0 +retrieval_tested: false +status: 'draft' +--- + +# Knowledge Index for {{project_name}} + +_RAG-optimized knowledge base for AI agent retrieval. Each chunk is self-contained and tagged for semantic search._ + +--- + +## Index Summary + +- **Total Chunks:** {{total_count}} +- **Critical:** {{critical_count}} | **High:** {{high_count}} | **Standard:** {{standard_count}} | **Reference:** {{ref_count}} +- **Sources Indexed:** {{source_count}} +- **Last Synced:** {{date}} + +--- + +## Critical Knowledge + + + +--- + +## Architecture Knowledge + + + +--- + +## Requirements Knowledge + + + +--- + +## Implementation Knowledge + + + +--- + +## Domain Knowledge + + + +--- + +## Operations Knowledge + + + +--- + +## Quality Knowledge + + + +--- + +## Retrieval Configuration + +### Query Mapping + +| Query Pattern | Target Categories | Priority Filter | Expected Chunks | +|---|---|---|---| +| "how to implement \*" | implementation, architecture | critical, high | 3-5 | +| "testing requirements for \*" | quality, implementation | critical, high | 2-4 | +| "business rules for \*" | requirements, domain | all | 2-3 | +| "architecture decision for \*" | architecture | all | 1-3 | +| "deployment process for \*" | operations | all | 1-2 | + +### Embedding Recommendations + +- **Model:** Use an embedding model that handles technical content well +- **Chunk Overlap:** 50-100 characters overlap between adjacent chunks from the same source +- **Metadata Filters:** Always filter by category and priority for focused retrieval +- **Top-K:** Retrieve 3-5 chunks per query for optimal context balance diff --git a/src/bmm/workflows/4-implementation/genai-knowledge-sync/steps/step-01-discover.md b/src/bmm/workflows/4-implementation/genai-knowledge-sync/steps/step-01-discover.md new file mode 100644 index 000000000..067d736ab --- /dev/null +++ b/src/bmm/workflows/4-implementation/genai-knowledge-sync/steps/step-01-discover.md @@ -0,0 +1,179 @@ +# Step 1: Artifact Discovery & Catalog + +## MANDATORY EXECUTION RULES (READ FIRST): + +- 🛑 NEVER generate content without user input +- ✅ ALWAYS treat this as collaborative discovery between technical peers +- 📋 YOU ARE A FACILITATOR, not a content generator +- 💬 FOCUS on discovering and cataloging all relevant project artifacts +- 🎯 IDENTIFY sources that provide high-value knowledge for RAG retrieval +- ⚠️ ABSOLUTELY NO TIME ESTIMATES - AI development speed has fundamentally changed +- ✅ YOU MUST ALWAYS SPEAK OUTPUT in your Agent communication style with the config `{communication_language}` + +## EXECUTION PROTOCOLS: + +- 🎯 Show your analysis before taking any action +- 📖 Read existing project files to catalog available artifacts +- 💾 Initialize document and update frontmatter +- 🚫 FORBIDDEN to load next step until discovery is complete + +## CONTEXT BOUNDARIES: + +- Variables from workflow.md are available in memory +- Focus on existing project artifacts and documentation +- Identify documents that contain reusable knowledge for AI agents +- Prioritize artifacts that prevent implementation mistakes and provide domain context + +## YOUR TASK: + +Discover, catalog, and classify all project artifacts that should be indexed for RAG retrieval by AI agents. + +## DISCOVERY SEQUENCE: + +### 1. Check for Existing Knowledge Index + +First, check if a knowledge index already exists: + +- Look for file at `{project_knowledge}/knowledge-index.md` or `{project-root}/**/knowledge-index.md` +- If exists: Read complete file to understand existing index +- Present to user: "Found existing knowledge index with {{chunk_count}} chunks across {{source_count}} sources. Would you like to update this or create a new one?" + +### 2. Scan Planning Artifacts + +Search `{planning_artifacts}` for documents containing project knowledge: + +**Product Requirements:** + +- Look for PRD files (`*prd*`, `*requirements*`) +- Extract key decisions, constraints, and acceptance criteria +- Note sections with high reuse value for agents + +**Architecture Documents:** + +- Look for architecture files (`*architecture*`, `*design*`) +- Extract technology decisions, patterns, and trade-offs +- Identify integration points and system boundaries + +**Epic and Story Files:** + +- Look for epic/story definitions (`*epic*`, `*stories*`) +- Extract acceptance criteria, implementation notes, and dependencies +- Identify cross-cutting concerns that appear across stories + +### 3. Scan Implementation Artifacts + +Search `{implementation_artifacts}` for implementation knowledge: + +**Sprint and Status Files:** + +- Look for sprint status, retrospectives, and course corrections +- Extract lessons learned and pattern changes +- Identify recurring issues and their resolutions + +**Code Review Findings:** + +- Look for code review artifacts +- Extract quality patterns and anti-patterns discovered +- Note corrections that should inform future implementation + +### 4. Scan Project Knowledge + +Search `{project_knowledge}` for existing knowledge assets: + +**Project Context:** + +- Look for `project-context.md` and similar files +- Extract implementation rules and coding conventions +- These are high-priority sources for RAG retrieval + +**Research Documents:** + +- Look for research outputs (market, domain, technical) +- Extract findings that inform implementation decisions +- Identify domain terminology and definitions + +### 5. Scan Source Code for Patterns + +Identify key code patterns worth indexing: + +**Configuration Files:** + +- Package manifests, build configs, linting rules +- Extract version constraints and tool configurations +- These provide critical context for code generation + +**Key Source Files:** + +- Identify entry points, shared utilities, and core modules +- Extract patterns that define the project's coding style +- Note any non-obvious conventions visible only in code + +### 6. Classify and Prioritize Sources + +For each discovered artifact, assign: + +**Knowledge Category:** + +- `architecture` - System design decisions and patterns +- `requirements` - Business rules and acceptance criteria +- `implementation` - Coding patterns and conventions +- `domain` - Business domain concepts and terminology +- `operations` - Deployment, monitoring, and workflow rules +- `quality` - Testing patterns, review standards, and anti-patterns + +**Retrieval Priority:** + +- `critical` - Must be retrieved for every implementation task +- `high` - Should be retrieved for related implementation tasks +- `standard` - Available when specifically relevant +- `reference` - Background context when explicitly needed + +### 7. Present Discovery Summary + +Report findings to user: + +"Welcome {{user_name}}! I've scanned your project {{project_name}} to catalog artifacts for your RAG knowledge base. + +**Artifacts Discovered:** + +| Category | Count | Priority Breakdown | +|---|---|---| +| Architecture | {{count}} | {{critical}}/{{high}}/{{standard}} | +| Requirements | {{count}} | {{critical}}/{{high}}/{{standard}} | +| Implementation | {{count}} | {{critical}}/{{high}}/{{standard}} | +| Domain | {{count}} | {{critical}}/{{high}}/{{standard}} | +| Operations | {{count}} | {{critical}}/{{high}}/{{standard}} | +| Quality | {{count}} | {{critical}}/{{high}}/{{standard}} | + +**Source Files Cataloged:** {{total_files}} + +**Recommended Chunking Strategy:** +Based on your artifact types, I recommend {{strategy}} chunking: +- {{strategy_rationale}} + +Ready to index and chunk your project knowledge for RAG retrieval. + +[C] Continue to knowledge indexing" + +## SUCCESS METRICS: + +✅ All relevant project artifacts discovered and cataloged +✅ Each artifact classified by category and retrieval priority +✅ Source file paths accurately recorded +✅ Chunking strategy recommended based on artifact analysis +✅ Discovery findings clearly presented to user +✅ User ready to proceed with indexing + +## FAILURE MODES: + +❌ Missing critical artifacts in planning or implementation directories +❌ Not checking for existing knowledge index before creating new one +❌ Incorrect classification of artifact categories or priorities +❌ Not scanning source code for pattern-level knowledge +❌ Not presenting clear discovery summary to user + +## NEXT STEP: + +After user selects [C] to continue, load `{project-root}/_bmad/bmm/workflows/4-implementation/genai-knowledge-sync/steps/step-02-index.md` to index and chunk the discovered artifacts. + +Remember: Do NOT proceed to step-02 until user explicitly selects [C] from the menu and discovery catalog is confirmed! diff --git a/src/bmm/workflows/4-implementation/genai-knowledge-sync/steps/step-02-index.md b/src/bmm/workflows/4-implementation/genai-knowledge-sync/steps/step-02-index.md new file mode 100644 index 000000000..65c2b8fcc --- /dev/null +++ b/src/bmm/workflows/4-implementation/genai-knowledge-sync/steps/step-02-index.md @@ -0,0 +1,243 @@ +# Step 2: Knowledge Indexing & Chunking + +## MANDATORY EXECUTION RULES (READ FIRST): + +- 🛑 NEVER generate content without user input +- ✅ ALWAYS treat this as collaborative indexing between technical peers +- 📋 YOU ARE A FACILITATOR, not a content generator +- 💬 FOCUS on creating self-contained, retrievable knowledge chunks +- 🎯 EACH CHUNK must be independently useful without requiring full document context +- ⚠️ ABSOLUTELY NO TIME ESTIMATES - AI development speed has fundamentally changed +- ✅ YOU MUST ALWAYS SPEAK OUTPUT In your Agent communication style with the config `{communication_language}` + +## EXECUTION PROTOCOLS: + +- 🎯 Show your analysis before taking any action +- 📝 Focus on creating atomic, self-contained knowledge chunks +- ⚠️ Present A/P/C menu after each major category +- 💾 ONLY save when user chooses C (Continue) +- 📖 Update frontmatter with completed categories +- 🚫 FORBIDDEN to load next step until all categories are indexed + +## COLLABORATION MENUS (A/P/C): + +This step will generate content and present choices for each knowledge category: + +- **A (Advanced Elicitation)**: Use discovery protocols to explore nuanced knowledge relationships +- **P (Party Mode)**: Bring multiple perspectives to identify missing knowledge connections +- **C (Continue)**: Save the current chunks and proceed to next category + +## PROTOCOL INTEGRATION: + +- When 'A' selected: Execute {project-root}/_bmad/core/workflows/advanced-elicitation/workflow.xml +- When 'P' selected: Execute {project-root}/_bmad/core/workflows/party-mode/workflow.md +- PROTOCOLS always return to display this step's A/P/C menu after the A or P have completed +- User accepts/rejects protocol changes before proceeding + +## CONTEXT BOUNDARIES: + +- Discovery catalog from step-1 is available +- All artifact paths and classifications are identified +- Focus on creating chunks optimized for embedding and retrieval +- Each chunk must carry enough context to be useful in isolation + +## YOUR TASK: + +Index each discovered artifact into self-contained knowledge chunks with metadata tags, source tracing, and retrieval-optimized formatting. + +## CHUNKING PRINCIPLES: + +### Chunk Design Rules + +1. **Self-Contained**: Each chunk must be understandable without reading the source document +2. **Tagged**: Every chunk has category, priority, source path, and semantic tags +3. **Atomic**: One concept or decision per chunk - no compound knowledge +4. **Traceable**: Every chunk links back to its source artifact and section +5. **Contextual**: Include enough surrounding context for accurate retrieval +6. **Deduplicated**: Avoid redundant chunks across different source artifacts + +### Chunk Format + +Each chunk follows this standard format: + +```markdown +### [CHUNK-ID] Chunk Title + +- **Source:** `{relative_path_to_source_file}` +- **Category:** architecture | requirements | implementation | domain | operations | quality +- **Priority:** critical | high | standard | reference +- **Tags:** comma-separated semantic tags for retrieval matching + +**Context:** One-line description of when this knowledge is relevant. + +**Content:** +The actual knowledge content - specific, actionable, self-contained. +``` + +## INDEXING SEQUENCE: + +### 1. Index Critical-Priority Artifacts + +Process all artifacts marked as `critical` priority first: + +**For each critical artifact:** + +- Read the complete source file +- Identify distinct knowledge units (decisions, rules, constraints) +- Create one chunk per knowledge unit +- Apply semantic tags for retrieval matching +- Present chunks to user for validation + +**Present results:** +"I've created {{chunk_count}} critical-priority chunks from {{source_count}} sources: + +{{list_of_chunk_titles_with_tags}} + +These chunks will be prioritized in every retrieval query. + +[A] Advanced Elicitation - Explore deeper knowledge connections +[P] Party Mode - Review from multiple implementation perspectives +[C] Continue - Save these chunks and proceed" + +### 2. Index High-Priority Artifacts + +Process all `high` priority artifacts: + +**For each high-priority artifact:** + +- Read source file and identify knowledge units +- Create chunks with appropriate tags +- Cross-reference with critical chunks for consistency +- Identify any overlaps and deduplicate + +### 3. Index Standard-Priority Artifacts + +Process `standard` priority artifacts: + +**For each standard artifact:** + +- Read source file for domain-specific knowledge +- Create chunks focused on contextual information +- Tag for specific retrieval scenarios + +### 4. Index Reference-Priority Artifacts + +Process `reference` priority artifacts: + +**For each reference artifact:** + +- Extract background context and terminology +- Create lighter-weight chunks for supplementary retrieval +- Tag for broad topic matching + +### 5. Cross-Reference and Deduplicate + +After all categories are indexed: + +**Deduplication Analysis:** + +- Identify chunks with overlapping content across sources +- Merge or consolidate redundant chunks +- Ensure cross-references between related chunks are tagged +- Present deduplication summary to user + +**Relationship Mapping:** + +- Identify chunks that frequently co-occur in implementation contexts +- Tag related chunks for retrieval grouping +- Create chunk clusters for common query patterns + +### 6. Generate Knowledge Index Document + +Compile all validated chunks into the knowledge index file: + +**Document Structure:** + +```markdown +# Knowledge Index for {{project_name}} + +_RAG-optimized knowledge base for AI agent retrieval. Each chunk is self-contained and tagged for semantic search._ + +--- + +## Index Summary + +- **Total Chunks:** {{total_count}} +- **Critical:** {{critical_count}} | **High:** {{high_count}} | **Standard:** {{standard_count}} | **Reference:** {{ref_count}} +- **Sources Indexed:** {{source_count}} +- **Last Synced:** {{date}} + +--- + +## Critical Knowledge + +{{critical_chunks}} + +## Architecture Knowledge + +{{architecture_chunks}} + +## Requirements Knowledge + +{{requirements_chunks}} + +## Implementation Knowledge + +{{implementation_chunks}} + +## Domain Knowledge + +{{domain_chunks}} + +## Operations Knowledge + +{{operations_chunks}} + +## Quality Knowledge + +{{quality_chunks}} +``` + +### 7. Present Indexing Summary + +"Knowledge indexing complete for {{project_name}}! + +**Chunks Created:** + +| Category | Critical | High | Standard | Reference | Total | +|---|---|---|---|---|---| +| Architecture | {{n}} | {{n}} | {{n}} | {{n}} | {{n}} | +| Requirements | {{n}} | {{n}} | {{n}} | {{n}} | {{n}} | +| Implementation | {{n}} | {{n}} | {{n}} | {{n}} | {{n}} | +| Domain | {{n}} | {{n}} | {{n}} | {{n}} | {{n}} | +| Operations | {{n}} | {{n}} | {{n}} | {{n}} | {{n}} | +| Quality | {{n}} | {{n}} | {{n}} | {{n}} | {{n}} | + +**Deduplication:** Removed {{removed_count}} redundant chunks +**Cross-References:** {{xref_count}} chunk relationships mapped + +[C] Continue to optimization" + +## SUCCESS METRICS: + +✅ All discovered artifacts indexed into self-contained chunks +✅ Each chunk has proper metadata tags and source tracing +✅ No redundant or overlapping chunks remain +✅ Cross-references between related chunks are mapped +✅ A/P/C menu presented and handled correctly for each category +✅ Knowledge index document properly structured + +## FAILURE MODES: + +❌ Creating chunks that require reading the full source document +❌ Missing semantic tags that prevent accurate retrieval +❌ Not deduplicating overlapping chunks from different sources +❌ Not cross-referencing related knowledge units +❌ Not getting user validation for each category +❌ Creating overly large chunks that reduce retrieval precision + +## NEXT STEP: + +After completing all categories and user selects [C], load `{project-root}/_bmad/bmm/workflows/4-implementation/genai-knowledge-sync/steps/step-03-optimize.md` to optimize the knowledge base for retrieval quality. + +Remember: Do NOT proceed to step-03 until all categories are indexed and user explicitly selects [C]! diff --git a/src/bmm/workflows/4-implementation/genai-knowledge-sync/steps/step-03-optimize.md b/src/bmm/workflows/4-implementation/genai-knowledge-sync/steps/step-03-optimize.md new file mode 100644 index 000000000..fb0a43723 --- /dev/null +++ b/src/bmm/workflows/4-implementation/genai-knowledge-sync/steps/step-03-optimize.md @@ -0,0 +1,289 @@ +# Step 3: Knowledge Base Optimization & Completion + +## MANDATORY EXECUTION RULES (READ FIRST): + +- 🛑 NEVER generate content without user input +- ✅ ALWAYS treat this as collaborative optimization between technical peers +- 📋 YOU ARE A FACILITATOR, not a content generator +- 💬 FOCUS on optimizing chunks for retrieval quality and accuracy +- 🎯 ENSURE every chunk is retrieval-ready and well-tagged +- ⚠️ ABSOLUTELY NO TIME ESTIMATES - AI development speed has fundamentally changed +- ✅ YOU MUST ALWAYS SPEAK OUTPUT in your Agent communication style with the config `{communication_language}` + +## EXECUTION PROTOCOLS: + +- 🎯 Show your analysis before taking any action +- 📝 Review and optimize chunks for retrieval precision +- 📖 Update frontmatter with completion status +- 🚫 NO MORE STEPS - this is the final step + +## CONTEXT BOUNDARIES: + +- All knowledge chunks from step-2 are indexed +- Cross-references and deduplication are complete +- Focus on retrieval quality optimization and finalization +- Ensure the knowledge index is ready for RAG pipeline integration + +## YOUR TASK: + +Optimize the knowledge index for retrieval quality, validate chunk completeness, and finalize the knowledge base for AI agent consumption. + +## OPTIMIZATION SEQUENCE: + +### 1. Retrieval Quality Analysis + +Analyze the indexed chunks for retrieval effectiveness: + +**Tag Coverage Analysis:** + +- Review semantic tags across all chunks +- Identify gaps where common queries would miss relevant chunks +- Suggest additional tags for better retrieval matching +- Present tag coverage report to user + +**Chunk Size Analysis:** + +- Identify chunks that are too large (reduce retrieval precision) +- Identify chunks that are too small (lack sufficient context) +- Recommend splits or merges for optimal retrieval size +- Target: Each chunk should be 100-500 words for optimal embedding + +**Context Sufficiency Check:** + +- Verify each chunk is understandable without its source document +- Add missing context where chunks reference undefined terms +- Ensure technical terms are defined or tagged for glossary lookup + +### 2. Semantic Tag Optimization + +Optimize tags for retrieval accuracy: + +**Tag Standardization:** + +- Normalize similar tags (e.g., "api-design" and "api-patterns" → single standard) +- Create a tag vocabulary for the project +- Apply consistent tag format across all chunks + +**Tag Enrichment:** + +- Add technology-specific tags (framework names, library names) +- Add pattern-type tags (e.g., "error-handling", "state-management") +- Add lifecycle tags (e.g., "setup", "implementation", "testing", "deployment") + +**Present Tag Summary:** +"I've optimized the semantic tags across {{chunk_count}} chunks: + +**Tag Vocabulary:** {{unique_tag_count}} standardized tags +**Most Connected Tags:** {{top_tags_by_frequency}} +**Coverage Gaps Fixed:** {{gaps_fixed}} + +Would you like to review the tag vocabulary? (y/n)" + +### 3. Retrieval Scenario Testing + +Validate retrieval quality with common query scenarios: + +**Test Queries:** + +Simulate these common developer queries against the knowledge index: + +1. "How should I structure a new feature?" → Should retrieve: architecture + implementation chunks +2. "What are the testing requirements?" → Should retrieve: quality + implementation chunks +3. "What technology versions are we using?" → Should retrieve: critical implementation chunks +4. "How do I handle errors in this project?" → Should retrieve: implementation + quality chunks +5. "What are the business rules for {{core_feature}}?" → Should retrieve: requirements + domain chunks + +**For each query, report:** + +- Chunks that would be retrieved (by tag matching) +- Missing chunks that should be retrieved but aren't +- False positive chunks that would be retrieved incorrectly +- Recommended tag adjustments + +### 4. Generate Retrieval Configuration + +Create a retrieval configuration section in the knowledge index: + +```markdown +## Retrieval Configuration + +### Query Mapping + +| Query Pattern | Target Categories | Priority Filter | Expected Chunks | +|---|---|---|---| +| "how to implement *" | implementation, architecture | critical, high | 3-5 | +| "testing requirements for *" | quality, implementation | critical, high | 2-4 | +| "business rules for *" | requirements, domain | all | 2-3 | +| "architecture decision for *" | architecture | all | 1-3 | +| "deployment process for *" | operations | all | 1-2 | + +### Embedding Recommendations + +- **Model:** Use an embedding model that handles technical content well +- **Chunk Overlap:** 50-100 characters overlap between adjacent chunks from the same source +- **Metadata Filters:** Always filter by category and priority for focused retrieval +- **Top-K:** Retrieve 3-5 chunks per query for optimal context balance +``` + +### 5. Finalize Knowledge Index + +Complete the knowledge index with optimization results: + +**Update Frontmatter:** + +```yaml +--- +project_name: '{{project_name}}' +user_name: '{{user_name}}' +date: '{{date}}' +total_chunks: {{total_count}} +sources_indexed: {{source_count}} +tag_vocabulary_size: {{tag_count}} +retrieval_tested: true +status: 'complete' +--- +``` + +**Append Usage Guidelines:** + +```markdown +--- + +## Usage Guidelines + +**For AI Agents (RAG Retrieval):** + +- Query this index using semantic search against chunk tags and content +- Always include critical-priority chunks in implementation context +- Filter by category when the task type is known +- Cross-reference related chunks using shared tags + +**For Humans (Maintenance):** + +- Re-run this workflow when new artifacts are created or significantly updated +- Add new chunks manually using the standard chunk format above +- Review and prune quarterly to remove outdated knowledge +- Update tags when new patterns or technologies are adopted + +**For RAG Pipeline Integration:** + +- Parse chunks by the `### [CHUNK-ID]` delimiter +- Extract metadata from the bullet-point headers (Source, Category, Priority, Tags) +- Use Tags field for semantic search indexing +- Use Priority field for retrieval ranking +``` + +### 6. Present Completion Summary + +Based on user skill level, present the completion: + +**Expert Mode:** +"Knowledge index complete. {{chunk_count}} chunks across {{source_count}} sources, {{tag_count}} semantic tags. Retrieval-tested and RAG-ready. + +File saved to: `{project_knowledge}/knowledge-index.md`" + +**Intermediate Mode:** +"Your project knowledge base is indexed and retrieval-ready! + +**What we created:** + +- {{chunk_count}} self-contained knowledge chunks +- {{source_count}} source artifacts indexed +- {{tag_count}} semantic tags for retrieval matching +- Retrieval configuration for RAG pipeline integration + +**How it works:** +AI agents can now search this index to find exactly the project knowledge they need for any implementation task, instead of loading entire documents. + +**Next steps:** + +- Integrate with your RAG pipeline using the retrieval configuration +- Re-run this workflow when artifacts change significantly +- Review quarterly to keep knowledge current" + +**Beginner Mode:** +"Your project knowledge base is ready! 🎉 + +**What this does:** +Think of this as a smart library catalog for your project. Instead of AI agents reading every document from start to finish, they can now search for exactly the knowledge they need. + +**What's included:** + +- {{chunk_count}} bite-sized knowledge pieces from your project documents +- Smart tags so agents can find the right knowledge quickly +- Priority labels so the most important knowledge comes first + +**How AI agents use it:** +When an agent needs to implement something, it searches this index for relevant knowledge chunks instead of reading entire documents. This makes them faster and more accurate!" + +### 7. Completion Validation + +Final checks before completion: + +**Content Validation:** +✅ All discovered artifacts indexed into chunks +✅ Each chunk has proper metadata and source tracing +✅ Semantic tags are standardized and comprehensive +✅ No redundant chunks remain after deduplication +✅ Retrieval scenarios tested successfully +✅ Retrieval configuration generated + +**Format Validation:** +✅ Consistent chunk format throughout +✅ Frontmatter properly updated +✅ Tag vocabulary is standardized +✅ Document is well-structured and scannable + +### 8. Completion Message + +"✅ **GenAI Knowledge Sync Complete!** + +Your retrieval-optimized knowledge index is ready at: +`{project_knowledge}/knowledge-index.md` + +**📊 Knowledge Base Summary:** + +- {{chunk_count}} indexed knowledge chunks +- {{source_count}} source artifacts cataloged +- {{tag_count}} semantic tags for retrieval +- {{category_count}} knowledge categories covered +- Retrieval-tested with {{test_count}} query scenarios + +**🎯 RAG Integration Ready:** + +- Self-contained chunks with metadata headers +- Standardized tag vocabulary for semantic search +- Priority-based retrieval ranking +- Query mapping configuration included + +**📋 Maintenance:** + +1. Re-sync when artifacts change significantly: run this workflow again +2. Add individual chunks manually using the standard format +3. Review quarterly to prune outdated knowledge +4. Update tags when new patterns emerge + +Your AI agents can now retrieve precisely the project knowledge they need for any task!" + +## SUCCESS METRICS: + +✅ Knowledge index fully optimized for retrieval quality +✅ Semantic tags standardized and comprehensive +✅ Retrieval scenarios tested with good coverage +✅ Retrieval configuration generated for RAG pipeline +✅ Usage guidelines included for agents, humans, and pipelines +✅ Frontmatter properly updated with completion status +✅ User provided with clear maintenance guidance + +## FAILURE MODES: + +❌ Chunks too large or too small for effective retrieval +❌ Semantic tags inconsistent or too sparse +❌ Not testing retrieval scenarios before finalizing +❌ Missing retrieval configuration for pipeline integration +❌ Not providing maintenance and usage guidelines +❌ Frontmatter not properly updated + +## WORKFLOW COMPLETE: + +This is the final step of the GenAI Knowledge Sync workflow. The user now has a retrieval-optimized knowledge index that enables AI agents to find and use exactly the project knowledge they need for any implementation task, improving both speed and accuracy of AI-assisted development. diff --git a/src/bmm/workflows/4-implementation/genai-knowledge-sync/workflow.md b/src/bmm/workflows/4-implementation/genai-knowledge-sync/workflow.md new file mode 100644 index 000000000..ce4e00e49 --- /dev/null +++ b/src/bmm/workflows/4-implementation/genai-knowledge-sync/workflow.md @@ -0,0 +1,50 @@ +--- +name: genai-knowledge-sync +description: 'Build and maintain a RAG-ready knowledge base from project artifacts. Use when the user says "build knowledge base", "sync knowledge", or "create RAG context"' +--- + +# GenAI Knowledge Sync Workflow + +**Goal:** Create a structured, chunked knowledge index (`knowledge-index.md`) from project artifacts that is optimized for Retrieval-Augmented Generation (RAG) pipelines and AI agent context loading. This enables AI agents to retrieve the most relevant project knowledge at inference time rather than loading entire documents. + +**Your Role:** You are a technical knowledge architect working with a peer to catalog, chunk, and index project artifacts into a retrieval-optimized format. You ensure every knowledge chunk is self-contained, well-tagged, and traceable to its source. + +--- + +## WORKFLOW ARCHITECTURE + +This uses **micro-file architecture** for disciplined execution: + +- Each step is a self-contained file with embedded rules +- Sequential progression with user control at each step +- Document state tracked in frontmatter +- Focus on lean, retrieval-optimized content generation +- You NEVER proceed to a step file if the current step file indicates the user must approve and indicate continuation. + +--- + +## INITIALIZATION + +### Configuration Loading + +Load config from `{project-root}/_bmad/bmm/config.yaml` and resolve: + +- `project_name`, `output_folder`, `user_name` +- `communication_language`, `document_output_language`, `user_skill_level` +- `planning_artifacts`, `implementation_artifacts`, `project_knowledge` +- `date` as system-generated current datetime +- ✅ YOU MUST ALWAYS SPEAK OUTPUT In your Agent communication style with the config `{communication_language}` + +### Paths + +- `installed_path` = `{project-root}/_bmad/bmm/workflows/4-implementation/genai-knowledge-sync` +- `template_path` = `{installed_path}/knowledge-index-template.md` +- `output_file` = `{project_knowledge}/knowledge-index.md` + +--- + +## EXECUTION + +Load and execute `{project-root}/_bmad/bmm/workflows/4-implementation/genai-knowledge-sync/steps/step-01-discover.md` to begin the workflow. + +**Note:** Artifact discovery, source cataloging, and chunking strategy selection are handled in step-01-discover.md. diff --git a/tools/cli/commands/status.js b/tools/cli/commands/status.js index ec931fe46..ff597d384 100644 --- a/tools/cli/commands/status.js +++ b/tools/cli/commands/status.js @@ -11,7 +11,7 @@ const ui = new UI(); module.exports = { command: 'status', description: 'Display BMAD installation status and module versions', - options: [], + options: [['-v, --verbose', 'Show detailed status including agent and workflow counts']], action: async (options) => { try { // Find the bmad directory @@ -53,6 +53,23 @@ module.exports = { bmadDir, }); + // Verbose mode: show agent and workflow counts per module + if (options.verbose) { + const { glob } = require('glob'); + for (const mod of modules) { + const moduleName = typeof mod === 'string' ? mod : (mod.id || mod.name || ''); + if (!moduleName) continue; + + const modDir = path.join(bmadDir, moduleName); + if (!(await fs.pathExists(modDir))) continue; + + const agents = await glob('agents/**/*.agent.yaml', { cwd: modDir }); + const workflows = await glob('workflows/**/*.{yaml,yml,md}', { cwd: modDir }); + + await prompts.log.info(`Module "${moduleName}": ${agents.length} agent(s), ${workflows.length} workflow(s)`); + } + } + process.exit(0); } catch (error) { await prompts.log.error(`Status check failed: ${error.message}`); diff --git a/tools/cli/lib/config.js b/tools/cli/lib/config.js index a78250305..ce6262875 100644 --- a/tools/cli/lib/config.js +++ b/tools/cli/lib/config.js @@ -7,8 +7,14 @@ const packageJson = require('../../../package.json'); * Configuration utility class */ class Config { + /** @type {Map} */ + #cache = new Map(); + /** - * Load a YAML configuration file + * Load a YAML configuration file with in-memory caching. + * Cached entries are automatically invalidated when the file's + * modification time changes, so callers always receive fresh data + * after a file is written. * @param {string} configPath - Path to config file * @returns {Object} Parsed configuration */ @@ -17,8 +23,26 @@ class Config { throw new Error(`Configuration file not found: ${configPath}`); } - const content = await fs.readFile(configPath, 'utf8'); - return yaml.parse(content); + const resolved = path.resolve(configPath); + const stat = await fs.stat(resolved); + const mtime = stat.mtimeMs; + + const cached = this.#cache.get(resolved); + if (cached && cached.mtime === mtime) { + return cached.data; + } + + const content = await fs.readFile(resolved, 'utf8'); + const data = yaml.parse(content); + this.#cache.set(resolved, { data, mtime }); + return data; + } + + /** + * Clear the in-memory YAML cache. + */ + clearCache() { + this.#cache.clear(); } /**