refactor(flattener): improve xml generation and file discovery

- Simplify file discovery by using gitignore patterns
- Enhance XML generation with proper CDATA handling and indentation
- Remove unused dependencies and clean up code
This commit is contained in:
manjaroblack 2025-07-19 18:08:20 -05:00
parent d1823fb083
commit 6a5a12599e
2 changed files with 66 additions and 31 deletions

4
.gitignore vendored
View File

@ -33,8 +33,4 @@ docs/architecture/
docs/prd/ docs/prd/
docs/stories/ docs/stories/
docs/project-architecture.md docs/project-architecture.md
tests/
custom-output.xml
flattened-codebase.xml
biome.json biome.json
__tests__/

View File

@ -2,12 +2,9 @@
const { Command } = require('commander'); const { Command } = require('commander');
const fs = require('fs-extra'); const fs = require('fs-extra');
const path = require('path'); const path = require('node:path');
const { glob } = require('glob'); const { glob } = require('glob');
const { minimatch } = require('minimatch'); const { minimatch } = require('minimatch');
const { promisify } = require('util');
const { exec } = require('child_process');
const execAsync = promisify(exec);
/** /**
* Recursively discover all files in a directory * Recursively discover all files in a directory
@ -16,27 +13,23 @@ const execAsync = promisify(exec);
*/ */
async function discoverFiles(rootDir) { async function discoverFiles(rootDir) {
try { try {
const gitignorePath = path.join(rootDir, '.gitignore');
const gitignorePatterns = await parseGitignore(gitignorePath);
const combinedIgnores = [
...gitignorePatterns,
'.git/**',
'flattened-codebase.xml',
'repomix-output.xml'
];
// Use glob to recursively find all files, excluding common ignore patterns // Use glob to recursively find all files, excluding common ignore patterns
const files = await glob('**/*', { const files = await glob('**/*', {
cwd: rootDir, cwd: rootDir,
nodir: true, // Only files, not directories nodir: true, // Only files, not directories
dot: true, // Include hidden files dot: true, // Include hidden files
follow: false, // Don't follow symbolic links follow: false, // Don't follow symbolic links
ignore: [ ignore: combinedIgnores
// Standard ignore patterns
'node_modules/**',
'.git/**',
'build/**',
'dist/**',
'.next/**',
'coverage/**',
'.nyc_output/**',
'tmp/**',
'temp/**',
'.gitignore',
'.gitattributes',
'.gitmodules'
]
}); });
return files.map(file => path.resolve(rootDir, file)); return files.map(file => path.resolve(rootDir, file));
@ -192,7 +185,7 @@ async function aggregateFileContents(files, rootDir, spinner = null) {
* @param {string} projectRoot - The project root directory * @param {string} projectRoot - The project root directory
* @returns {string} XML content * @returns {string} XML content
*/ */
function generateXMLOutput(aggregatedContent, projectRoot) { function generateXMLOutput(aggregatedContent) {
const { textFiles } = aggregatedContent; const { textFiles } = aggregatedContent;
let xml = `<?xml version="1.0" encoding="UTF-8"?> let xml = `<?xml version="1.0" encoding="UTF-8"?>
@ -204,21 +197,36 @@ function generateXMLOutput(aggregatedContent, projectRoot) {
for (const file of textFiles) { for (const file of textFiles) {
xml += ` <file path="${escapeXml(file.path)}">`; xml += ` <file path="${escapeXml(file.path)}">`;
// Use CDATA for code content to preserve formatting and handle special characters // Use CDATA for code content, handling CDATA end sequences properly
if (file.content.trim()) { if (file.content?.trim()) {
xml += `<![CDATA[${file.content}]]>`; const indentedContent = indentFileContent(file.content);
if (file.content.includes(']]>')) {
// If content contains ]]>, split it and wrap each part in CDATA
xml += splitAndWrapCDATA(indentedContent);
} else {
xml += `<![CDATA[
${indentedContent}
]]>`;
}
} else if (file.content) {
// Handle empty or whitespace-only content
const indentedContent = indentFileContent(file.content);
xml += `<![CDATA[
${indentedContent}
]]>`;
} }
xml += `</file> xml += `</file>
`; `;
} }
xml += `</files>`; xml += `</files>
`;
return xml; return xml;
} }
/** /**
* Escape XML special characters * Escape XML special characters for attributes
* @param {string} str - String to escape * @param {string} str - String to escape
* @returns {string} Escaped string * @returns {string} Escaped string
*/ */
@ -234,6 +242,37 @@ function escapeXml(str) {
.replace(/'/g, '&apos;'); .replace(/'/g, '&apos;');
} }
/**
* Indent file content with 4 spaces for each line
* @param {string} content - Content to indent
* @returns {string} Indented content
*/
function indentFileContent(content) {
if (typeof content !== 'string') {
return String(content);
}
// Split content into lines and add 4 spaces of indentation to each line
return content.split('\n').map(line => ` ${line}`).join('\n');
}
/**
* Split content containing ]]> and wrap each part in CDATA
* @param {string} content - Content to process
* @returns {string} Content with properly wrapped CDATA sections
*/
function splitAndWrapCDATA(content) {
if (typeof content !== 'string') {
return String(content);
}
// Replace ]]> with ]]]]><![CDATA[> to escape it within CDATA
const escapedContent = content.replace(/]]>/g, ']]]]><![CDATA[>');
return `<![CDATA[
${escapedContent}
]]>`;
}
/** /**
* Calculate statistics for the processed files * Calculate statistics for the processed files
* @param {Object} aggregatedContent - The aggregated content object * @param {Object} aggregatedContent - The aggregated content object
@ -365,7 +404,7 @@ program
// Generate XML output // Generate XML output
const xmlSpinner = ora('🔧 Generating XML output...').start(); const xmlSpinner = ora('🔧 Generating XML output...').start();
const xmlOutput = generateXMLOutput(aggregatedContent, process.cwd()); const xmlOutput = generateXMLOutput(aggregatedContent);
await fs.writeFile(options.output, xmlOutput); await fs.writeFile(options.output, xmlOutput);
xmlSpinner.succeed('📝 XML generation completed'); xmlSpinner.succeed('📝 XML generation completed');