From 6a5a12599ee99c64f850ee38e739b3cd59acd055 Mon Sep 17 00:00:00 2001
From: manjaroblack <devin.stagner@outlook.com>
Date: Sat, 19 Jul 2025 18:08:20 -0500
Subject: [PATCH] refactor(flattener): improve xml generation and file
 discovery

- Simplify file discovery by using gitignore patterns
- Enhance XML generation with proper CDATA handling and indentation
- Remove unused dependencies and clean up code
---
 .gitignore              |  6 +--
 tools/flattener/main.js | 91 +++++++++++++++++++++++++++++------------
 2 files changed, 66 insertions(+), 31 deletions(-)
diff --git a/.gitignore b/.gitignore
index cf5aab6d..854a86cf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -33,8 +33,4 @@ docs/architecture/
 docs/prd/
 docs/stories/
 docs/project-architecture.md
-tests/
-custom-output.xml
-flattened-codebase.xml
-biome.json
-__tests__/
\ No newline at end of file
+biome.json
\ No newline at end of file
diff --git a/tools/flattener/main.js b/tools/flattener/main.js
index 70a9411f..22d355c6 100644
--- a/tools/flattener/main.js
+++ b/tools/flattener/main.js
@@ -2,12 +2,9 @@
 
 const { Command } = require('commander');
 const fs = require('fs-extra');
-const path = require('path');
+const path = require('node:path');
 const { glob } = require('glob');
 const { minimatch } = require('minimatch');
-const { promisify } = require('util');
-const { exec } = require('child_process');
-const execAsync = promisify(exec);
 
 /**
  * Recursively discover all files in a directory
@@ -16,27 +13,23 @@ const execAsync = promisify(exec);
  */
 async function discoverFiles(rootDir) {
   try {
+    const gitignorePath = path.join(rootDir, '.gitignore');
+    const gitignorePatterns = await parseGitignore(gitignorePath);
+
+    const combinedIgnores = [
+    ...gitignorePatterns,
+    '.git/**',
+    'flattened-codebase.xml',
+    'repomix-output.xml'
+  ];
+
     // Use glob to recursively find all files, excluding common ignore patterns
     const files = await glob('**/*', {
       cwd: rootDir,
       nodir: true, // Only files, not directories
       dot: true,   // Include hidden files
       follow: false, // Don't follow symbolic links
-      ignore: [
-        // Standard ignore patterns
-        'node_modules/**',
-        '.git/**',
-        'build/**',
-        'dist/**',
-        '.next/**',
-        'coverage/**',
-        '.nyc_output/**',
-        'tmp/**',
-        'temp/**',
-        '.gitignore',
-        '.gitattributes',
-        '.gitmodules'
-      ]
+      ignore: combinedIgnores
     });
     
     return files.map(file => path.resolve(rootDir, file));
@@ -192,7 +185,7 @@ async function aggregateFileContents(files, rootDir, spinner = null) {
  * @param {string} projectRoot - The project root directory
  * @returns {string} XML content
  */
-function generateXMLOutput(aggregatedContent, projectRoot) {
+function generateXMLOutput(aggregatedContent) {
   const { textFiles } = aggregatedContent;
   
   let xml = `<?xml version="1.0" encoding="UTF-8"?>
@@ -204,21 +197,36 @@ function generateXMLOutput(aggregatedContent, projectRoot) {
   for (const file of textFiles) {
     xml += `  <file path="${escapeXml(file.path)}">`;
     
-    // Use CDATA for code content to preserve formatting and handle special characters
-    if (file.content.trim()) {
-      xml += `<![CDATA[${file.content}]]>`;
+    // Use CDATA for code content, handling CDATA end sequences properly
+    if (file.content?.trim()) {
+      const indentedContent = indentFileContent(file.content);
+      if (file.content.includes(']]>')) {
+        // If content contains ]]>, split it and wrap each part in CDATA
+        xml += splitAndWrapCDATA(indentedContent);
+      } else {
+        xml += `<![CDATA[
+${indentedContent}
+    ]]>`;
+      }
+    } else if (file.content) {
+      // Handle empty or whitespace-only content
+      const indentedContent = indentFileContent(file.content);
+      xml += `<![CDATA[
+${indentedContent}
+    ]]>`;
     }
     
     xml += `</file>
 `;
   }
   
-  xml += `</files>`;
+  xml += `</files>
+`;
   return xml;
 }
 
 /**
- * Escape XML special characters
+ * Escape XML special characters for attributes
  * @param {string} str - String to escape
  * @returns {string} Escaped string
  */
@@ -234,6 +242,37 @@ function escapeXml(str) {
     .replace(/'/g, '&apos;');
 }
 
+/**
+ * Indent file content with 4 spaces for each line
+ * @param {string} content - Content to indent
+ * @returns {string} Indented content
+ */
+function indentFileContent(content) {
+  if (typeof content !== 'string') {
+    return String(content);
+  }
+  
+  // Split content into lines and add 4 spaces of indentation to each line
+  return content.split('\n').map(line => `    ${line}`).join('\n');
+}
+
+/**
+ * Split content containing ]]> and wrap each part in CDATA
+ * @param {string} content - Content to process
+ * @returns {string} Content with properly wrapped CDATA sections
+ */
+function splitAndWrapCDATA(content) {
+  if (typeof content !== 'string') {
+    return String(content);
+  }
+  
+  // Replace ]]> with ]]]]><![CDATA[> to escape it within CDATA
+  const escapedContent = content.replace(/]]>/g, ']]]]><![CDATA[>');
+  return `<![CDATA[
+${escapedContent}
+    ]]>`;
+}
+
 /**
  * Calculate statistics for the processed files
  * @param {Object} aggregatedContent - The aggregated content object
@@ -365,7 +404,7 @@ program
       
       // Generate XML output
       const xmlSpinner = ora('🔧 Generating XML output...').start();
-      const xmlOutput = generateXMLOutput(aggregatedContent, process.cwd());
+      const xmlOutput = generateXMLOutput(aggregatedContent);
       await fs.writeFile(options.output, xmlOutput);
       xmlSpinner.succeed('📝 XML generation completed');