From 6a5a12599ee99c64f850ee38e739b3cd59acd055 Mon Sep 17 00:00:00 2001 From: manjaroblack Date: Sat, 19 Jul 2025 18:08:20 -0500 Subject: [PATCH] refactor(flattener): improve xml generation and file discovery - Simplify file discovery by using gitignore patterns - Enhance XML generation with proper CDATA handling and indentation - Remove unused dependencies and clean up code --- .gitignore | 6 +-- tools/flattener/main.js | 91 +++++++++++++++++++++++++++++------------ 2 files changed, 66 insertions(+), 31 deletions(-) diff --git a/.gitignore b/.gitignore index cf5aab6d..854a86cf 100644 --- a/.gitignore +++ b/.gitignore @@ -33,8 +33,4 @@ docs/architecture/ docs/prd/ docs/stories/ docs/project-architecture.md -tests/ -custom-output.xml -flattened-codebase.xml -biome.json -__tests__/ \ No newline at end of file +biome.json \ No newline at end of file diff --git a/tools/flattener/main.js b/tools/flattener/main.js index 70a9411f..22d355c6 100644 --- a/tools/flattener/main.js +++ b/tools/flattener/main.js @@ -2,12 +2,9 @@ const { Command } = require('commander'); const fs = require('fs-extra'); -const path = require('path'); +const path = require('node:path'); const { glob } = require('glob'); const { minimatch } = require('minimatch'); -const { promisify } = require('util'); -const { exec } = require('child_process'); -const execAsync = promisify(exec); /** * Recursively discover all files in a directory @@ -16,27 +13,23 @@ const execAsync = promisify(exec); */ async function discoverFiles(rootDir) { try { + const gitignorePath = path.join(rootDir, '.gitignore'); + const gitignorePatterns = await parseGitignore(gitignorePath); + + const combinedIgnores = [ + ...gitignorePatterns, + '.git/**', + 'flattened-codebase.xml', + 'repomix-output.xml' + ]; + // Use glob to recursively find all files, excluding common ignore patterns const files = await glob('**/*', { cwd: rootDir, nodir: true, // Only files, not directories dot: true, // Include hidden files follow: false, // Don't follow symbolic links - ignore: [ - // Standard ignore patterns - 'node_modules/**', - '.git/**', - 'build/**', - 'dist/**', - '.next/**', - 'coverage/**', - '.nyc_output/**', - 'tmp/**', - 'temp/**', - '.gitignore', - '.gitattributes', - '.gitmodules' - ] + ignore: combinedIgnores }); return files.map(file => path.resolve(rootDir, file)); @@ -192,7 +185,7 @@ async function aggregateFileContents(files, rootDir, spinner = null) { * @param {string} projectRoot - The project root directory * @returns {string} XML content */ -function generateXMLOutput(aggregatedContent, projectRoot) { +function generateXMLOutput(aggregatedContent) { const { textFiles } = aggregatedContent; let xml = ` @@ -204,21 +197,36 @@ function generateXMLOutput(aggregatedContent, projectRoot) { for (const file of textFiles) { xml += ` `; - // Use CDATA for code content to preserve formatting and handle special characters - if (file.content.trim()) { - xml += ``; + // Use CDATA for code content, handling CDATA end sequences properly + if (file.content?.trim()) { + const indentedContent = indentFileContent(file.content); + if (file.content.includes(']]>')) { + // If content contains ]]>, split it and wrap each part in CDATA + xml += splitAndWrapCDATA(indentedContent); + } else { + xml += ``; + } + } else if (file.content) { + // Handle empty or whitespace-only content + const indentedContent = indentFileContent(file.content); + xml += ``; } xml += ` `; } - xml += ``; + xml += ` +`; return xml; } /** - * Escape XML special characters + * Escape XML special characters for attributes * @param {string} str - String to escape * @returns {string} Escaped string */ @@ -234,6 +242,37 @@ function escapeXml(str) { .replace(/'/g, '''); } +/** + * Indent file content with 4 spaces for each line + * @param {string} content - Content to indent + * @returns {string} Indented content + */ +function indentFileContent(content) { + if (typeof content !== 'string') { + return String(content); + } + + // Split content into lines and add 4 spaces of indentation to each line + return content.split('\n').map(line => ` ${line}`).join('\n'); +} + +/** + * Split content containing ]]> and wrap each part in CDATA + * @param {string} content - Content to process + * @returns {string} Content with properly wrapped CDATA sections + */ +function splitAndWrapCDATA(content) { + if (typeof content !== 'string') { + return String(content); + } + + // Replace ]]> with ]]]]> to escape it within CDATA + const escapedContent = content.replace(/]]>/g, ']]]]>'); + return ``; +} + /** * Calculate statistics for the processed files * @param {Object} aggregatedContent - The aggregated content object @@ -365,7 +404,7 @@ program // Generate XML output const xmlSpinner = ora('🔧 Generating XML output...').start(); - const xmlOutput = generateXMLOutput(aggregatedContent, process.cwd()); + const xmlOutput = generateXMLOutput(aggregatedContent); await fs.writeFile(options.output, xmlOutput); xmlSpinner.succeed('📝 XML generation completed');