Merge 2e1949df76 into 259e8a11ba

Add contribution rules to instructions
2026-03-05 06:36:04 +01:00 · 2026-02-26 14:30:28 -03:00
9 changed files with 899 additions and 4 deletions
--- a/.github/instructions/*.instructions.md
+++ b/.github/instructions/*.instructions.md
@ -0,0 +1,3 @@
+# Rules
+* Never creates PRs for altering  code after review. Always offer a fix and the option to commit.
+* Qualify the severity of the change requested. NORMAL | IMPROVEMENT | FIX | CRITICAL
--- a/src/bmm/agents/analyst.agent.yaml
+++ b/src/bmm/agents/analyst.agent.yaml
@ -41,3 +41,7 @@ agent:
    - trigger: DP or fuzzy match on document-project
      workflow: "{project-root}/_bmad/bmm/workflows/document-project/workflow.yaml"
      description: "[DP] Document Project: Analyze an existing project to produce useful documentation for both human and LLM"
+
+    - trigger: KS or fuzzy match on knowledge-sync
+      exec: "{project-root}/_bmad/bmm/workflows/4-implementation/genai-knowledge-sync/workflow.md"
+      description: "[KS] Knowledge Sync: Build a RAG-ready knowledge index from project artifacts for optimized AI agent retrieval"
--- a/src/bmm/workflows/4-implementation/genai-knowledge-sync/knowledge-index-template.md
+++ b/src/bmm/workflows/4-implementation/genai-knowledge-sync/knowledge-index-template.md
@ -0,0 +1,86 @@
+---
+project_name: ''
+user_name: ''
+date: ''
+total_chunks: 0
+sources_indexed: 0
+tag_vocabulary_size: 0
+retrieval_tested: false
+status: 'draft'
+---
+
+# Knowledge Index for {{project_name}}
+
+_RAG-optimized knowledge base for AI agent retrieval. Each chunk is self-contained and tagged for semantic search._
+
+---
+
+## Index Summary
+
+- **Total Chunks:** {{total_count}}
+- **Critical:** {{critical_count}} | **High:** {{high_count}} | **Standard:** {{standard_count}} | **Reference:** {{ref_count}}
+- **Sources Indexed:** {{source_count}}
+- **Last Synced:** {{date}}
+
+---
+
+## Critical Knowledge
+
+<!-- Critical-priority chunks go here. These are retrieved for every implementation task. -->
+
+---
+
+## Architecture Knowledge
+
+<!-- Architecture decisions, system design patterns, and technology choices. -->
+
+---
+
+## Requirements Knowledge
+
+<!-- Business rules, acceptance criteria, and constraints. -->
+
+---
+
+## Implementation Knowledge
+
+<!-- Coding patterns, conventions, and implementation rules. -->
+
+---
+
+## Domain Knowledge
+
+<!-- Business domain concepts, terminology, and definitions. -->
+
+---
+
+## Operations Knowledge
+
+<!-- Deployment, monitoring, and workflow rules. -->
+
+---
+
+## Quality Knowledge
+
+<!-- Testing patterns, review standards, and anti-patterns. -->
+
+---
+
+## Retrieval Configuration
+
+### Query Mapping
+
+| Query Pattern | Target Categories | Priority Filter | Expected Chunks |
+|---|---|---|---|
+| "how to implement \*" | implementation, architecture | critical, high | 3-5 |
+| "testing requirements for \*" | quality, implementation | critical, high | 2-4 |
+| "business rules for \*" | requirements, domain | all | 2-3 |
+| "architecture decision for \*" | architecture | all | 1-3 |
+| "deployment process for \*" | operations | all | 1-2 |
+
+### Embedding Recommendations
+
+- **Model:** Use an embedding model that handles technical content well
+- **Chunk Overlap:** 50-100 characters overlap between adjacent chunks from the same source
+- **Metadata Filters:** Always filter by category and priority for focused retrieval
+- **Top-K:** Retrieve 3-5 chunks per query for optimal context balance
--- a/src/bmm/workflows/4-implementation/genai-knowledge-sync/steps/step-01-discover.md
+++ b/src/bmm/workflows/4-implementation/genai-knowledge-sync/steps/step-01-discover.md
@ -0,0 +1,179 @@
+# Step 1: Artifact Discovery & Catalog
+
+## MANDATORY EXECUTION RULES (READ FIRST):
+
+- 🛑 NEVER generate content without user input
+- ✅ ALWAYS treat this as collaborative discovery between technical peers
+- 📋 YOU ARE A FACILITATOR, not a content generator
+- 💬 FOCUS on discovering and cataloging all relevant project artifacts
+- 🎯 IDENTIFY sources that provide high-value knowledge for RAG retrieval
+- ⚠️ ABSOLUTELY NO TIME ESTIMATES - AI development speed has fundamentally changed
+- ✅ YOU MUST ALWAYS SPEAK OUTPUT in your Agent communication style with the config `{communication_language}`
+
+## EXECUTION PROTOCOLS:
+
+- 🎯 Show your analysis before taking any action
+- 📖 Read existing project files to catalog available artifacts
+- 💾 Initialize document and update frontmatter
+- 🚫 FORBIDDEN to load next step until discovery is complete
+
+## CONTEXT BOUNDARIES:
+
+- Variables from workflow.md are available in memory
+- Focus on existing project artifacts and documentation
+- Identify documents that contain reusable knowledge for AI agents
+- Prioritize artifacts that prevent implementation mistakes and provide domain context
+
+## YOUR TASK:
+
+Discover, catalog, and classify all project artifacts that should be indexed for RAG retrieval by AI agents.
+
+## DISCOVERY SEQUENCE:
+
+### 1. Check for Existing Knowledge Index
+
+First, check if a knowledge index already exists:
+
+- Look for file at `{project_knowledge}/knowledge-index.md` or `{project-root}/**/knowledge-index.md`
+- If exists: Read complete file to understand existing index
+- Present to user: "Found existing knowledge index with {{chunk_count}} chunks across {{source_count}} sources. Would you like to update this or create a new one?"
+
+### 2. Scan Planning Artifacts
+
+Search `{planning_artifacts}` for documents containing project knowledge:
+
+**Product Requirements:**
+
+- Look for PRD files (`*prd*`, `*requirements*`)
+- Extract key decisions, constraints, and acceptance criteria
+- Note sections with high reuse value for agents
+
+**Architecture Documents:**
+
+- Look for architecture files (`*architecture*`, `*design*`)
+- Extract technology decisions, patterns, and trade-offs
+- Identify integration points and system boundaries
+
+**Epic and Story Files:**
+
+- Look for epic/story definitions (`*epic*`, `*stories*`)
+- Extract acceptance criteria, implementation notes, and dependencies
+- Identify cross-cutting concerns that appear across stories
+
+### 3. Scan Implementation Artifacts
+
+Search `{implementation_artifacts}` for implementation knowledge:
+
+**Sprint and Status Files:**
+
+- Look for sprint status, retrospectives, and course corrections
+- Extract lessons learned and pattern changes
+- Identify recurring issues and their resolutions
+
+**Code Review Findings:**
+
+- Look for code review artifacts
+- Extract quality patterns and anti-patterns discovered
+- Note corrections that should inform future implementation
+
+### 4. Scan Project Knowledge
+
+Search `{project_knowledge}` for existing knowledge assets:
+
+**Project Context:**
+
+- Look for `project-context.md` and similar files
+- Extract implementation rules and coding conventions
+- These are high-priority sources for RAG retrieval
+
+**Research Documents:**
+
+- Look for research outputs (market, domain, technical)
+- Extract findings that inform implementation decisions
+- Identify domain terminology and definitions
+
+### 5. Scan Source Code for Patterns
+
+Identify key code patterns worth indexing:
+
+**Configuration Files:**
+
+- Package manifests, build configs, linting rules
+- Extract version constraints and tool configurations
+- These provide critical context for code generation
+
+**Key Source Files:**
+
+- Identify entry points, shared utilities, and core modules
+- Extract patterns that define the project's coding style
+- Note any non-obvious conventions visible only in code
+
+### 6. Classify and Prioritize Sources
+
+For each discovered artifact, assign:
+
+**Knowledge Category:**
+
+- `architecture` - System design decisions and patterns
+- `requirements` - Business rules and acceptance criteria
+- `implementation` - Coding patterns and conventions
+- `domain` - Business domain concepts and terminology
+- `operations` - Deployment, monitoring, and workflow rules
+- `quality` - Testing patterns, review standards, and anti-patterns
+
+**Retrieval Priority:**
+
+- `critical` - Must be retrieved for every implementation task
+- `high` - Should be retrieved for related implementation tasks
+- `standard` - Available when specifically relevant
+- `reference` - Background context when explicitly needed
+
+### 7. Present Discovery Summary
+
+Report findings to user:
+
+"Welcome {{user_name}}! I've scanned your project {{project_name}} to catalog artifacts for your RAG knowledge base.
+
+**Artifacts Discovered:**
+
+| Category | Count | Priority Breakdown |
+|---|---|---|
+| Architecture | {{count}} | {{critical}}/{{high}}/{{standard}} |
+| Requirements | {{count}} | {{critical}}/{{high}}/{{standard}} |
+| Implementation | {{count}} | {{critical}}/{{high}}/{{standard}} |
+| Domain | {{count}} | {{critical}}/{{high}}/{{standard}} |
+| Operations | {{count}} | {{critical}}/{{high}}/{{standard}} |
+| Quality | {{count}} | {{critical}}/{{high}}/{{standard}} |
+
+**Source Files Cataloged:** {{total_files}}
+
+**Recommended Chunking Strategy:**
+Based on your artifact types, I recommend {{strategy}} chunking:
+- {{strategy_rationale}}
+
+Ready to index and chunk your project knowledge for RAG retrieval.
+
+[C] Continue to knowledge indexing"
+
+## SUCCESS METRICS:
+
+✅ All relevant project artifacts discovered and cataloged
+✅ Each artifact classified by category and retrieval priority
+✅ Source file paths accurately recorded
+✅ Chunking strategy recommended based on artifact analysis
+✅ Discovery findings clearly presented to user
+✅ User ready to proceed with indexing
+
+## FAILURE MODES:
+
+❌ Missing critical artifacts in planning or implementation directories
+❌ Not checking for existing knowledge index before creating new one
+❌ Incorrect classification of artifact categories or priorities
+❌ Not scanning source code for pattern-level knowledge
+❌ Not presenting clear discovery summary to user
+
+## NEXT STEP:
+
+After user selects [C] to continue, load `{project-root}/_bmad/bmm/workflows/4-implementation/genai-knowledge-sync/steps/step-02-index.md` to index and chunk the discovered artifacts.
+
+Remember: Do NOT proceed to step-02 until user explicitly selects [C] from the menu and discovery catalog is confirmed!
--- a/src/bmm/workflows/4-implementation/genai-knowledge-sync/steps/step-02-index.md
+++ b/src/bmm/workflows/4-implementation/genai-knowledge-sync/steps/step-02-index.md
@ -0,0 +1,243 @@
+# Step 2: Knowledge Indexing & Chunking
+
+## MANDATORY EXECUTION RULES (READ FIRST):
+
+- 🛑 NEVER generate content without user input
+- ✅ ALWAYS treat this as collaborative indexing between technical peers
+- 📋 YOU ARE A FACILITATOR, not a content generator
+- 💬 FOCUS on creating self-contained, retrievable knowledge chunks
+- 🎯 EACH CHUNK must be independently useful without requiring full document context
+- ⚠️ ABSOLUTELY NO TIME ESTIMATES - AI development speed has fundamentally changed
+- ✅ YOU MUST ALWAYS SPEAK OUTPUT In your Agent communication style with the config `{communication_language}`
+
+## EXECUTION PROTOCOLS:
+
+- 🎯 Show your analysis before taking any action
+- 📝 Focus on creating atomic, self-contained knowledge chunks
+- ⚠️ Present A/P/C menu after each major category
+- 💾 ONLY save when user chooses C (Continue)
+- 📖 Update frontmatter with completed categories
+- 🚫 FORBIDDEN to load next step until all categories are indexed
+
+## COLLABORATION MENUS (A/P/C):
+
+This step will generate content and present choices for each knowledge category:
+
+- **A (Advanced Elicitation)**: Use discovery protocols to explore nuanced knowledge relationships
+- **P (Party Mode)**: Bring multiple perspectives to identify missing knowledge connections
+- **C (Continue)**: Save the current chunks and proceed to next category
+
+## PROTOCOL INTEGRATION:
+
+- When 'A' selected: Execute {project-root}/_bmad/core/workflows/advanced-elicitation/workflow.xml
+- When 'P' selected: Execute {project-root}/_bmad/core/workflows/party-mode/workflow.md
+- PROTOCOLS always return to display this step's A/P/C menu after the A or P have completed
+- User accepts/rejects protocol changes before proceeding
+
+## CONTEXT BOUNDARIES:
+
+- Discovery catalog from step-1 is available
+- All artifact paths and classifications are identified
+- Focus on creating chunks optimized for embedding and retrieval
+- Each chunk must carry enough context to be useful in isolation
+
+## YOUR TASK:
+
+Index each discovered artifact into self-contained knowledge chunks with metadata tags, source tracing, and retrieval-optimized formatting.
+
+## CHUNKING PRINCIPLES:
+
+### Chunk Design Rules
+
+1. **Self-Contained**: Each chunk must be understandable without reading the source document
+2. **Tagged**: Every chunk has category, priority, source path, and semantic tags
+3. **Atomic**: One concept or decision per chunk - no compound knowledge
+4. **Traceable**: Every chunk links back to its source artifact and section
+5. **Contextual**: Include enough surrounding context for accurate retrieval
+6. **Deduplicated**: Avoid redundant chunks across different source artifacts
+
+### Chunk Format
+
+Each chunk follows this standard format:
+
+```markdown
+### [CHUNK-ID] Chunk Title
+
+- **Source:** `{relative_path_to_source_file}`
+- **Category:** architecture | requirements | implementation | domain | operations | quality
+- **Priority:** critical | high | standard | reference
+- **Tags:** comma-separated semantic tags for retrieval matching
+
+**Context:** One-line description of when this knowledge is relevant.
+
+**Content:**
+The actual knowledge content - specific, actionable, self-contained.
+```
+
+## INDEXING SEQUENCE:
+
+### 1. Index Critical-Priority Artifacts
+
+Process all artifacts marked as `critical` priority first:
+
+**For each critical artifact:**
+
+- Read the complete source file
+- Identify distinct knowledge units (decisions, rules, constraints)
+- Create one chunk per knowledge unit
+- Apply semantic tags for retrieval matching
+- Present chunks to user for validation
+
+**Present results:**
+"I've created {{chunk_count}} critical-priority chunks from {{source_count}} sources:
+
+{{list_of_chunk_titles_with_tags}}
+
+These chunks will be prioritized in every retrieval query.
+
+[A] Advanced Elicitation - Explore deeper knowledge connections
+[P] Party Mode - Review from multiple implementation perspectives
+[C] Continue - Save these chunks and proceed"
+
+### 2. Index High-Priority Artifacts
+
+Process all `high` priority artifacts:
+
+**For each high-priority artifact:**
+
+- Read source file and identify knowledge units
+- Create chunks with appropriate tags
+- Cross-reference with critical chunks for consistency
+- Identify any overlaps and deduplicate
+
+### 3. Index Standard-Priority Artifacts
+
+Process `standard` priority artifacts:
+
+**For each standard artifact:**
+
+- Read source file for domain-specific knowledge
+- Create chunks focused on contextual information
+- Tag for specific retrieval scenarios
+
+### 4. Index Reference-Priority Artifacts
+
+Process `reference` priority artifacts:
+
+**For each reference artifact:**
+
+- Extract background context and terminology
+- Create lighter-weight chunks for supplementary retrieval
+- Tag for broad topic matching
+
+### 5. Cross-Reference and Deduplicate
+
+After all categories are indexed:
+
+**Deduplication Analysis:**
+
+- Identify chunks with overlapping content across sources
+- Merge or consolidate redundant chunks
+- Ensure cross-references between related chunks are tagged
+- Present deduplication summary to user
+
+**Relationship Mapping:**
+
+- Identify chunks that frequently co-occur in implementation contexts
+- Tag related chunks for retrieval grouping
+- Create chunk clusters for common query patterns
+
+### 6. Generate Knowledge Index Document
+
+Compile all validated chunks into the knowledge index file:
+
+**Document Structure:**
+
+```markdown
+# Knowledge Index for {{project_name}}
+
+_RAG-optimized knowledge base for AI agent retrieval. Each chunk is self-contained and tagged for semantic search._
+
+---
+
+## Index Summary
+
+- **Total Chunks:** {{total_count}}
+- **Critical:** {{critical_count}} | **High:** {{high_count}} | **Standard:** {{standard_count}} | **Reference:** {{ref_count}}
+- **Sources Indexed:** {{source_count}}
+- **Last Synced:** {{date}}
+
+---
+
+## Critical Knowledge
+
+{{critical_chunks}}
+
+## Architecture Knowledge
+
+{{architecture_chunks}}
+
+## Requirements Knowledge
+
+{{requirements_chunks}}
+
+## Implementation Knowledge
+
+{{implementation_chunks}}
+
+## Domain Knowledge
+
+{{domain_chunks}}
+
+## Operations Knowledge
+
+{{operations_chunks}}
+
+## Quality Knowledge
+
+{{quality_chunks}}
+```
+
+### 7. Present Indexing Summary
+
+"Knowledge indexing complete for {{project_name}}!
+
+**Chunks Created:**
+
+| Category | Critical | High | Standard | Reference | Total |
+|---|---|---|---|---|---|
+| Architecture | {{n}} | {{n}} | {{n}} | {{n}} | {{n}} |
+| Requirements | {{n}} | {{n}} | {{n}} | {{n}} | {{n}} |
+| Implementation | {{n}} | {{n}} | {{n}} | {{n}} | {{n}} |
+| Domain | {{n}} | {{n}} | {{n}} | {{n}} | {{n}} |
+| Operations | {{n}} | {{n}} | {{n}} | {{n}} | {{n}} |
+| Quality | {{n}} | {{n}} | {{n}} | {{n}} | {{n}} |
+
+**Deduplication:** Removed {{removed_count}} redundant chunks
+**Cross-References:** {{xref_count}} chunk relationships mapped
+
+[C] Continue to optimization"
+
+## SUCCESS METRICS:
+
+✅ All discovered artifacts indexed into self-contained chunks
+✅ Each chunk has proper metadata tags and source tracing
+✅ No redundant or overlapping chunks remain
+✅ Cross-references between related chunks are mapped
+✅ A/P/C menu presented and handled correctly for each category
+✅ Knowledge index document properly structured
+
+## FAILURE MODES:
+
+❌ Creating chunks that require reading the full source document
+❌ Missing semantic tags that prevent accurate retrieval
+❌ Not deduplicating overlapping chunks from different sources
+❌ Not cross-referencing related knowledge units
+❌ Not getting user validation for each category
+❌ Creating overly large chunks that reduce retrieval precision
+
+## NEXT STEP:
+
+After completing all categories and user selects [C], load `{project-root}/_bmad/bmm/workflows/4-implementation/genai-knowledge-sync/steps/step-03-optimize.md` to optimize the knowledge base for retrieval quality.
+
+Remember: Do NOT proceed to step-03 until all categories are indexed and user explicitly selects [C]!
--- a/src/bmm/workflows/4-implementation/genai-knowledge-sync/steps/step-03-optimize.md
+++ b/src/bmm/workflows/4-implementation/genai-knowledge-sync/steps/step-03-optimize.md
@ -0,0 +1,289 @@
+# Step 3: Knowledge Base Optimization & Completion
+
+## MANDATORY EXECUTION RULES (READ FIRST):
+
+- 🛑 NEVER generate content without user input
+- ✅ ALWAYS treat this as collaborative optimization between technical peers
+- 📋 YOU ARE A FACILITATOR, not a content generator
+- 💬 FOCUS on optimizing chunks for retrieval quality and accuracy
+- 🎯 ENSURE every chunk is retrieval-ready and well-tagged
+- ⚠️ ABSOLUTELY NO TIME ESTIMATES - AI development speed has fundamentally changed
+- ✅ YOU MUST ALWAYS SPEAK OUTPUT in your Agent communication style with the config `{communication_language}`
+
+## EXECUTION PROTOCOLS:
+
+- 🎯 Show your analysis before taking any action
+- 📝 Review and optimize chunks for retrieval precision
+- 📖 Update frontmatter with completion status
+- 🚫 NO MORE STEPS - this is the final step
+
+## CONTEXT BOUNDARIES:
+
+- All knowledge chunks from step-2 are indexed
+- Cross-references and deduplication are complete
+- Focus on retrieval quality optimization and finalization
+- Ensure the knowledge index is ready for RAG pipeline integration
+
+## YOUR TASK:
+
+Optimize the knowledge index for retrieval quality, validate chunk completeness, and finalize the knowledge base for AI agent consumption.
+
+## OPTIMIZATION SEQUENCE:
+
+### 1. Retrieval Quality Analysis
+
+Analyze the indexed chunks for retrieval effectiveness:
+
+**Tag Coverage Analysis:**
+
+- Review semantic tags across all chunks
+- Identify gaps where common queries would miss relevant chunks
+- Suggest additional tags for better retrieval matching
+- Present tag coverage report to user
+
+**Chunk Size Analysis:**
+
+- Identify chunks that are too large (reduce retrieval precision)
+- Identify chunks that are too small (lack sufficient context)
+- Recommend splits or merges for optimal retrieval size
+- Target: Each chunk should be 100-500 words for optimal embedding
+
+**Context Sufficiency Check:**
+
+- Verify each chunk is understandable without its source document
+- Add missing context where chunks reference undefined terms
+- Ensure technical terms are defined or tagged for glossary lookup
+
+### 2. Semantic Tag Optimization
+
+Optimize tags for retrieval accuracy:
+
+**Tag Standardization:**
+
+- Normalize similar tags (e.g., "api-design" and "api-patterns" → single standard)
+- Create a tag vocabulary for the project
+- Apply consistent tag format across all chunks
+
+**Tag Enrichment:**
+
+- Add technology-specific tags (framework names, library names)
+- Add pattern-type tags (e.g., "error-handling", "state-management")
+- Add lifecycle tags (e.g., "setup", "implementation", "testing", "deployment")
+
+**Present Tag Summary:**
+"I've optimized the semantic tags across {{chunk_count}} chunks:
+
+**Tag Vocabulary:** {{unique_tag_count}} standardized tags
+**Most Connected Tags:** {{top_tags_by_frequency}}
+**Coverage Gaps Fixed:** {{gaps_fixed}}
+
+Would you like to review the tag vocabulary? (y/n)"
+
+### 3. Retrieval Scenario Testing
+
+Validate retrieval quality with common query scenarios:
+
+**Test Queries:**
+
+Simulate these common developer queries against the knowledge index:
+
+1. "How should I structure a new feature?" → Should retrieve: architecture + implementation chunks
+2. "What are the testing requirements?" → Should retrieve: quality + implementation chunks
+3. "What technology versions are we using?" → Should retrieve: critical implementation chunks
+4. "How do I handle errors in this project?" → Should retrieve: implementation + quality chunks
+5. "What are the business rules for {{core_feature}}?" → Should retrieve: requirements + domain chunks
+
+**For each query, report:**
+
+- Chunks that would be retrieved (by tag matching)
+- Missing chunks that should be retrieved but aren't
+- False positive chunks that would be retrieved incorrectly
+- Recommended tag adjustments
+
+### 4. Generate Retrieval Configuration
+
+Create a retrieval configuration section in the knowledge index:
+
+```markdown
+## Retrieval Configuration
+
+### Query Mapping
+
+| Query Pattern | Target Categories | Priority Filter | Expected Chunks |
+|---|---|---|---|
+| "how to implement *" | implementation, architecture | critical, high | 3-5 |
+| "testing requirements for *" | quality, implementation | critical, high | 2-4 |
+| "business rules for *" | requirements, domain | all | 2-3 |
+| "architecture decision for *" | architecture | all | 1-3 |
+| "deployment process for *" | operations | all | 1-2 |
+
+### Embedding Recommendations
+
+- **Model:** Use an embedding model that handles technical content well
+- **Chunk Overlap:** 50-100 characters overlap between adjacent chunks from the same source
+- **Metadata Filters:** Always filter by category and priority for focused retrieval
+- **Top-K:** Retrieve 3-5 chunks per query for optimal context balance
+```
+
+### 5. Finalize Knowledge Index
+
+Complete the knowledge index with optimization results:
+
+**Update Frontmatter:**
+
+```yaml
+---
+project_name: '{{project_name}}'
+user_name: '{{user_name}}'
+date: '{{date}}'
+total_chunks: {{total_count}}
+sources_indexed: {{source_count}}
+tag_vocabulary_size: {{tag_count}}
+retrieval_tested: true
+status: 'complete'
+---
+```
+
+**Append Usage Guidelines:**
+
+```markdown
+---
+
+## Usage Guidelines
+
+**For AI Agents (RAG Retrieval):**
+
+- Query this index using semantic search against chunk tags and content
+- Always include critical-priority chunks in implementation context
+- Filter by category when the task type is known
+- Cross-reference related chunks using shared tags
+
+**For Humans (Maintenance):**
+
+- Re-run this workflow when new artifacts are created or significantly updated
+- Add new chunks manually using the standard chunk format above
+- Review and prune quarterly to remove outdated knowledge
+- Update tags when new patterns or technologies are adopted
+
+**For RAG Pipeline Integration:**
+
+- Parse chunks by the `### [CHUNK-ID]` delimiter
+- Extract metadata from the bullet-point headers (Source, Category, Priority, Tags)
+- Use Tags field for semantic search indexing
+- Use Priority field for retrieval ranking
+```
+
+### 6. Present Completion Summary
+
+Based on user skill level, present the completion:
+
+**Expert Mode:**
+"Knowledge index complete. {{chunk_count}} chunks across {{source_count}} sources, {{tag_count}} semantic tags. Retrieval-tested and RAG-ready.
+
+File saved to: `{project_knowledge}/knowledge-index.md`"
+
+**Intermediate Mode:**
+"Your project knowledge base is indexed and retrieval-ready!
+
+**What we created:**
+
+- {{chunk_count}} self-contained knowledge chunks
+- {{source_count}} source artifacts indexed
+- {{tag_count}} semantic tags for retrieval matching
+- Retrieval configuration for RAG pipeline integration
+
+**How it works:**
+AI agents can now search this index to find exactly the project knowledge they need for any implementation task, instead of loading entire documents.
+
+**Next steps:**
+
+- Integrate with your RAG pipeline using the retrieval configuration
+- Re-run this workflow when artifacts change significantly
+- Review quarterly to keep knowledge current"
+
+**Beginner Mode:**
+"Your project knowledge base is ready! 🎉
+
+**What this does:**
+Think of this as a smart library catalog for your project. Instead of AI agents reading every document from start to finish, they can now search for exactly the knowledge they need.
+
+**What's included:**
+
+- {{chunk_count}} bite-sized knowledge pieces from your project documents
+- Smart tags so agents can find the right knowledge quickly
+- Priority labels so the most important knowledge comes first
+
+**How AI agents use it:**
+When an agent needs to implement something, it searches this index for relevant knowledge chunks instead of reading entire documents. This makes them faster and more accurate!"
+
+### 7. Completion Validation
+
+Final checks before completion:
+
+**Content Validation:**
+✅ All discovered artifacts indexed into chunks
+✅ Each chunk has proper metadata and source tracing
+✅ Semantic tags are standardized and comprehensive
+✅ No redundant chunks remain after deduplication
+✅ Retrieval scenarios tested successfully
+✅ Retrieval configuration generated
+
+**Format Validation:**
+✅ Consistent chunk format throughout
+✅ Frontmatter properly updated
+✅ Tag vocabulary is standardized
+✅ Document is well-structured and scannable
+
+### 8. Completion Message
+
+"✅ **GenAI Knowledge Sync Complete!**
+
+Your retrieval-optimized knowledge index is ready at:
+`{project_knowledge}/knowledge-index.md`
+
+**📊 Knowledge Base Summary:**
+
+- {{chunk_count}} indexed knowledge chunks
+- {{source_count}} source artifacts cataloged
+- {{tag_count}} semantic tags for retrieval
+- {{category_count}} knowledge categories covered
+- Retrieval-tested with {{test_count}} query scenarios
+
+**🎯 RAG Integration Ready:**
+
+- Self-contained chunks with metadata headers
+- Standardized tag vocabulary for semantic search
+- Priority-based retrieval ranking
+- Query mapping configuration included
+
+**📋 Maintenance:**
+
+1. Re-sync when artifacts change significantly: run this workflow again
+2. Add individual chunks manually using the standard format
+3. Review quarterly to prune outdated knowledge
+4. Update tags when new patterns emerge
+
+Your AI agents can now retrieve precisely the project knowledge they need for any task!"
+
+## SUCCESS METRICS:
+
+✅ Knowledge index fully optimized for retrieval quality
+✅ Semantic tags standardized and comprehensive
+✅ Retrieval scenarios tested with good coverage
+✅ Retrieval configuration generated for RAG pipeline
+✅ Usage guidelines included for agents, humans, and pipelines
+✅ Frontmatter properly updated with completion status
+✅ User provided with clear maintenance guidance
+
+## FAILURE MODES:
+
+❌ Chunks too large or too small for effective retrieval
+❌ Semantic tags inconsistent or too sparse
+❌ Not testing retrieval scenarios before finalizing
+❌ Missing retrieval configuration for pipeline integration
+❌ Not providing maintenance and usage guidelines
+❌ Frontmatter not properly updated
+
+## WORKFLOW COMPLETE:
+
+This is the final step of the GenAI Knowledge Sync workflow. The user now has a retrieval-optimized knowledge index that enables AI agents to find and use exactly the project knowledge they need for any implementation task, improving both speed and accuracy of AI-assisted development.
--- a/src/bmm/workflows/4-implementation/genai-knowledge-sync/workflow.md
+++ b/src/bmm/workflows/4-implementation/genai-knowledge-sync/workflow.md
@ -0,0 +1,50 @@
+---
+name: genai-knowledge-sync
+description: 'Build and maintain a RAG-ready knowledge base from project artifacts. Use when the user says "build knowledge base", "sync knowledge", or "create RAG context"'
+---
+
+# GenAI Knowledge Sync Workflow
+
+**Goal:** Create a structured, chunked knowledge index (`knowledge-index.md`) from project artifacts that is optimized for Retrieval-Augmented Generation (RAG) pipelines and AI agent context loading. This enables AI agents to retrieve the most relevant project knowledge at inference time rather than loading entire documents.
+
+**Your Role:** You are a technical knowledge architect working with a peer to catalog, chunk, and index project artifacts into a retrieval-optimized format. You ensure every knowledge chunk is self-contained, well-tagged, and traceable to its source.
+
+---
+
+## WORKFLOW ARCHITECTURE
+
+This uses **micro-file architecture** for disciplined execution:
+
+- Each step is a self-contained file with embedded rules
+- Sequential progression with user control at each step
+- Document state tracked in frontmatter
+- Focus on lean, retrieval-optimized content generation
+- You NEVER proceed to a step file if the current step file indicates the user must approve and indicate continuation.
+
+---
+
+## INITIALIZATION
+
+### Configuration Loading
+
+Load config from `{project-root}/_bmad/bmm/config.yaml` and resolve:
+
+- `project_name`, `output_folder`, `user_name`
+- `communication_language`, `document_output_language`, `user_skill_level`
+- `planning_artifacts`, `implementation_artifacts`, `project_knowledge`
+- `date` as system-generated current datetime
+- ✅ YOU MUST ALWAYS SPEAK OUTPUT In your Agent communication style with the config `{communication_language}`
+
+### Paths
+
+- `installed_path` = `{project-root}/_bmad/bmm/workflows/4-implementation/genai-knowledge-sync`
+- `template_path` = `{installed_path}/knowledge-index-template.md`
+- `output_file` = `{project_knowledge}/knowledge-index.md`
+
+---
+
+## EXECUTION
+
+Load and execute `{project-root}/_bmad/bmm/workflows/4-implementation/genai-knowledge-sync/steps/step-01-discover.md` to begin the workflow.
+
+**Note:** Artifact discovery, source cataloging, and chunking strategy selection are handled in step-01-discover.md.
--- a/tools/cli/commands/status.js
+++ b/tools/cli/commands/status.js
@ -11,7 +11,7 @@ const ui = new UI();
 module.exports = {
  command: 'status',
  description: 'Display BMAD installation status and module versions',
-  options: [],
+  options: [['-v, --verbose', 'Show detailed status including agent and workflow counts']],
  action: async (options) => {
    try {
      // Find the bmad directory
@ -53,6 +53,23 @@ module.exports = {
        bmadDir,
      });

+      // Verbose mode: show agent and workflow counts per module
+      if (options.verbose) {
+        const { glob } = require('glob');
+        for (const mod of modules) {
+          const moduleName = typeof mod === 'string' ? mod : (mod.id || mod.name || '');
+          if (!moduleName) continue;
+
+          const modDir = path.join(bmadDir, moduleName);
+          if (!(await fs.pathExists(modDir))) continue;
+
+          const agents = await glob('agents/**/*.agent.yaml', { cwd: modDir });
+          const workflows = await glob('workflows/**/*.{yaml,yml,md}', { cwd: modDir });
+
+          await prompts.log.info(`Module "${moduleName}": ${agents.length} agent(s), ${workflows.length} workflow(s)`);
+        }
+      }
+
      process.exit(0);
    } catch (error) {
      await prompts.log.error(`Status check failed: ${error.message}`);
--- a/tools/cli/lib/config.js
+++ b/tools/cli/lib/config.js
@ -7,8 +7,14 @@ const packageJson = require('../../../package.json');
 * Configuration utility class
 */
 class Config {
+  /** @type {Map<string, { data: Object, mtime: number }>} */
+  #cache = new Map();
+
  /**
-   * Load a YAML configuration file
+   * Load a YAML configuration file with in-memory caching.
+   * Cached entries are automatically invalidated when the file's
+   * modification time changes, so callers always receive fresh data
+   * after a file is written.
   * @param {string} configPath - Path to config file
   * @returns {Object} Parsed configuration
   */
@ -17,8 +23,26 @@ class Config {
      throw new Error(`Configuration file not found: ${configPath}`);
    }

-    const content = await fs.readFile(configPath, 'utf8');
-    return yaml.parse(content);
+    const resolved = path.resolve(configPath);
+    const stat = await fs.stat(resolved);
+    const mtime = stat.mtimeMs;
+
+    const cached = this.#cache.get(resolved);
+    if (cached && cached.mtime === mtime) {
+      return cached.data;
+    }
+
+    const content = await fs.readFile(resolved, 'utf8');
+    const data = yaml.parse(content);
+    this.#cache.set(resolved, { data, mtime });
+    return data;
+  }
+
+  /**
+   * Clear the in-memory YAML cache.
+   */
+  clearCache() {
+    this.#cache.clear();
  }

  /**
Author	SHA1	Message	Date
Marcus Bergo	ba1d1ed829	Merge `2e1949df76` into `259e8a11ba`	2026-03-05 06:36:04 +01:00
Marcus Bergo	2e1949df76	Add contribution rules to instructions	2026-02-26 14:30:28 -03:00