feat(flattener): enhance codebase flattener with content aggregation

Add binary file detection and content aggregation to generate detailed XML output. The flattener now: - Identifies binary files by extension and content - Aggregates text file contents with metadata - Generates comprehensive XML with file statistics - Handles errors gracefully with detailed reporting Also update documentation formatting and add biome.json to gitignore
2025-07-19 11:33:16 -05:00 · 2025-07-19 11:33:16 -05:00 · 6aa7539aea
parent 61b6c3b2ec
commit 6aa7539aea
3 changed files with 255 additions and 25 deletions
--- a/.gitignore
+++ b/.gitignore
@ -36,3 +36,4 @@ docs/project-architecture.md
 tests/
 custom-output.xml
 flattened-codebase.xml
+biome.json
--- a/docs/bmad-workflow-guide.md
+++ b/docs/bmad-workflow-guide.md
@ -40,12 +40,13 @@ Use Google's Gemini for collaborative planning with the full team:
   - Copy contents of: `dist/teams/team-fullstack.txt` from your project
   - Paste this content into the Gem setup to configure the team
 4. **Collaborate with the team**:
+
   - Business Analyst: Requirements gathering
   - Product Manager: Feature prioritization
   - Solution Architect: Technical design
   - UX Expert: User experience design

-### Example Gemini Sessions:
+   **Example Gemini Sessions**

   ```text
   "I want to build a [type] application that [core purpose].
@ -65,11 +66,14 @@ Switch back to your IDE for document management:

 1. **Load bmad-master agent** (syntax varies by IDE)
 2. **Shard the PRD**:
-   ```
+
+   ```text
   *shard-doc docs/prd.md prd
   ```
+
 3. **Shard the architecture**:
-   ```
+
+   ```text
   *shard-doc docs/architecture.md architecture
   ```

@ -94,20 +98,26 @@ Follow the SM → Dev cycle for systematic story development:

 1. **Start new chat/conversation**
 2. **Load Dev agent**
-3. **Agent asks**: Which story to implement
-4. **Follow development tasks**
-5. **Complete implementation**
-6. **Update status**: Change to "Done"
+3. **Execute**: `{selected-story}` (runs execute-checklist task)
+4. **Review generated report** in `{selected-story}`
+
+#### Story Review (Quality Assurance)
+
+1. **Start new chat/conversation**
+2. **Load QA agent**
+3. **Execute**: `*review {selected-story}` (runs review-story task)
+4. **Review generated report** in `{selected-story}`

 #### Repeat Until Complete

 - **SM**: Create next story → Review → Approve
- **Dev**: Implement story → Complete → Mark done
+- **Dev**: Implement story → Complete → Mark Ready for Review
+- **QA**: Review story → Mark done
 - **Continue**: Until all features implemented

 ## IDE-Specific Syntax

-### Agent Loading Syntax by IDE:
+### Agent Loading Syntax by IDE

 - **Claude Code**: `/agent-name` (e.g., `/bmad-master`)
 - **Cursor**: `@agent-name` (e.g., `@bmad-master`)
@ -116,21 +126,21 @@ Follow the SM → Dev cycle for systematic story development:
 - **Roo Code**: Select mode from mode selector (e.g., `bmad-bmad-master`)
 - **GitHub Copilot**: Open the Chat view (`⌃⌘I` on Mac, `Ctrl+Alt+I` on Windows/Linux) and select **Agent** from the chat mode selector.

-### Chat Management:
+### Chat Management

 - **Claude Code, Cursor, Windsurf, Trae**: Start new chats when switching agents
 - **Roo Code**: Switch modes within the same conversation

 ## Available Agents

-### Core Development Agents:
+### Core Development Agents

 - **bmad-master**: Universal task executor, document management
 - **sm**: Scrum Master for story creation and agile process
 - **dev**: Full-stack developer for implementation
 - **architect**: Solution architect for technical design

-### Specialized Agents:
+### Specialized Agents

 - **pm**: Product manager for planning and prioritization
 - **analyst**: Business analyst for requirements
--- a/tools/flattener/main.js
+++ b/tools/flattener/main.js
@ -5,6 +5,9 @@ const fs = require('fs-extra');
 const path = require('path');
 const { glob } = require('glob');
 const { minimatch } = require('minimatch');
+const { promisify } = require('util');
+const { exec } = require('child_process');
+const execAsync = promisify(exec);

 /**
 * Recursively discover all files in a directory
@ -72,6 +75,215 @@ async function parseGitignore(gitignorePath) {
  }
 }

+/**
+ * Check if a file is binary using file command and heuristics
+ * @param {string} filePath - Path to the file
+ * @returns {Promise<boolean>} True if file is binary
+ */
+async function isBinaryFile(filePath) {
+  try {
+    // First check by file extension
+    const binaryExtensions = [
+      '.jpg', '.jpeg', '.png', '.gif', '.bmp', '.ico', '.svg',
+      '.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx',
+      '.zip', '.tar', '.gz', '.rar', '.7z',
+      '.exe', '.dll', '.so', '.dylib',
+      '.mp3', '.mp4', '.avi', '.mov', '.wav',
+      '.ttf', '.otf', '.woff', '.woff2',
+      '.bin', '.dat', '.db', '.sqlite'
+    ];
+    
+    const ext = path.extname(filePath).toLowerCase();
+    if (binaryExtensions.includes(ext)) {
+      return true;
+    }
+    
+    // For files without clear extensions, try to read a small sample
+    const stats = await fs.stat(filePath);
+    if (stats.size === 0) {
+      return false; // Empty files are considered text
+    }
+    
+    // Read first 1024 bytes to check for null bytes
+    const sampleSize = Math.min(1024, stats.size);
+    const buffer = await fs.readFile(filePath, { encoding: null, flag: 'r' });
+    const sample = buffer.slice(0, sampleSize);
+    // If we find null bytes, it's likely binary
+    return sample.includes(0);
+  } catch (error) {
+    console.warn(`Warning: Could not determine if file is binary: ${filePath} - ${error.message}`);
+    return false; // Default to text if we can't determine
+  }
+}
+
+/**
+ * Read and aggregate content from text files
+ * @param {string[]} files - Array of file paths
+ * @param {string} rootDir - The root directory
+ * @returns {Promise<Object>} Object containing file contents and metadata
+ */
+async function aggregateFileContents(files, rootDir) {
+  const results = {
+    textFiles: [],
+    binaryFiles: [],
+    errors: [],
+    totalFiles: files.length,
+    processedFiles: 0
+  };
+  
+  for (const filePath of files) {
+    try {
+      const relativePath = path.relative(rootDir, filePath);
+      const isBinary = await isBinaryFile(filePath);
+      
+      if (isBinary) {
+        results.binaryFiles.push({
+          path: relativePath,
+          absolutePath: filePath,
+          size: (await fs.stat(filePath)).size
+        });
+      } else {
+        // Read text file content
+        const content = await fs.readFile(filePath, 'utf8');
+        results.textFiles.push({
+          path: relativePath,
+          absolutePath: filePath,
+          content: content,
+          size: content.length,
+          lines: content.split('\n').length
+        });
+      }
+      
+      results.processedFiles++;
+    } catch (error) {
+      const relativePath = path.relative(rootDir, filePath);
+      const errorInfo = {
+        path: relativePath,
+        absolutePath: filePath,
+        error: error.message
+      };
+      
+      results.errors.push(errorInfo);
+      console.warn(`Warning: Could not read file ${relativePath}: ${error.message}`);
+      results.processedFiles++;
+    }
+  }
+  
+  return results;
+}
+
+/**
+ * Generate XML output with aggregated file contents
+ * @param {Object} aggregatedContent - The aggregated content object
+ * @param {string} projectRoot - The project root directory
+ * @returns {string} XML content
+ */
+function generateXMLOutput(aggregatedContent, projectRoot) {
+  const { textFiles, binaryFiles, errors, totalFiles, processedFiles } = aggregatedContent;
+  const timestamp = new Date().toISOString();
+  
+  let xml = `<?xml version="1.0" encoding="UTF-8"?>
+`;
+  xml += `<codebase>
+`;
+  xml += `  <metadata>
+`;
+  xml += `    <generated>${timestamp}</generated>
+`;
+  xml += `    <project_root>${escapeXml(projectRoot)}</project_root>
+`;
+  xml += `    <total_files>${totalFiles}</total_files>
+`;
+  xml += `    <processed_files>${processedFiles}</processed_files>
+`;
+  xml += `    <text_files>${textFiles.length}</text_files>
+`;
+  xml += `    <binary_files>${binaryFiles.length}</binary_files>
+`;
+  xml += `    <errors>${errors.length}</errors>
+`;
+  xml += `  </metadata>
+`;
+  
+  // Add text files with content
+  if (textFiles.length > 0) {
+    xml += `  <text_files>
+`;
+    for (const file of textFiles) {
+      xml += `    <file>
+`;
+      xml += `      <path>${escapeXml(file.path)}</path>
+`;
+      xml += `      <size>${file.size}</size>
+`;
+      xml += `      <lines>${file.lines}</lines>
+`;
+      xml += `      <content><![CDATA[${file.content}]]></content>
+`;
+      xml += `    </file>
+`;
+    }
+    xml += `  </text_files>
+`;
+  }
+  
+  // Add binary files (paths only)
+  if (binaryFiles.length > 0) {
+    xml += `  <binary_files>
+`;
+    for (const file of binaryFiles) {
+      xml += `    <file>
+`;
+      xml += `      <path>${escapeXml(file.path)}</path>
+`;
+      xml += `      <size>${file.size}</size>
+`;
+      xml += `    </file>
+`;
+    }
+    xml += `  </binary_files>
+`;
+  }
+  
+  // Add errors if any
+  if (errors.length > 0) {
+    xml += `  <errors>
+`;
+    for (const error of errors) {
+      xml += `    <error>
+`;
+      xml += `      <path>${escapeXml(error.path)}</path>
+`;
+      xml += `      <message>${escapeXml(error.error)}</message>
+`;
+      xml += `    </error>
+`;
+    }
+    xml += `  </errors>
+`;
+  }
+  
+  xml += `</codebase>`;
+  return xml;
+}
+
+/**
+ * Escape XML special characters
+ * @param {string} str - String to escape
+ * @returns {string} Escaped string
+ */
+function escapeXml(str) {
+  if (typeof str !== 'string') {
+    return String(str);
+  }
+  return str
+    .replace(/&/g, '&amp;')
+    .replace(/</g, '&lt;')
+    .replace(/>/g, '&gt;')
+    .replace(/"/g, '&quot;')
+    .replace(/'/g, '&apos;');
+}
+
 /**
 * Filter files based on .gitignore patterns
 * @param {string[]} files - Array of file paths
@ -154,12 +366,19 @@ program
        });
      }
      
-      // Create basic XML structure for now
-      const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
-<codebase>
-  <!-- Flattened codebase will be generated here -->
-  <!-- Files discovered: ${filteredFiles.length} -->
-</codebase>`;
+      // Aggregate file contents
+      console.log('Reading file contents...');
+      const aggregatedContent = await aggregateFileContents(filteredFiles, projectRoot);
+      
+      console.log(`Processed ${aggregatedContent.processedFiles}/${aggregatedContent.totalFiles} files`);
+      console.log(`Text files: ${aggregatedContent.textFiles.length}`);
+      console.log(`Binary files: ${aggregatedContent.binaryFiles.length}`);
+      if (aggregatedContent.errors.length > 0) {
+        console.log(`Errors: ${aggregatedContent.errors.length}`);
+      }
+      
+      // Generate XML content with file contents
+      const xmlContent = generateXMLOutput(aggregatedContent, projectRoot);
      
      await fs.writeFile(outputPath, xmlContent);
      console.log(`Codebase flattened successfully to: ${outputPath}`);