refactor(flattener): improve xml generation and file discovery
- Simplify file discovery by using gitignore patterns - Enhance XML generation with proper CDATA handling and indentation - Remove unused dependencies and clean up code
This commit is contained in:
parent
d1823fb083
commit
6a5a12599e
|
|
@ -33,8 +33,4 @@ docs/architecture/
|
||||||
docs/prd/
|
docs/prd/
|
||||||
docs/stories/
|
docs/stories/
|
||||||
docs/project-architecture.md
|
docs/project-architecture.md
|
||||||
tests/
|
biome.json
|
||||||
custom-output.xml
|
|
||||||
flattened-codebase.xml
|
|
||||||
biome.json
|
|
||||||
__tests__/
|
|
||||||
|
|
@ -2,12 +2,9 @@
|
||||||
|
|
||||||
const { Command } = require('commander');
|
const { Command } = require('commander');
|
||||||
const fs = require('fs-extra');
|
const fs = require('fs-extra');
|
||||||
const path = require('path');
|
const path = require('node:path');
|
||||||
const { glob } = require('glob');
|
const { glob } = require('glob');
|
||||||
const { minimatch } = require('minimatch');
|
const { minimatch } = require('minimatch');
|
||||||
const { promisify } = require('util');
|
|
||||||
const { exec } = require('child_process');
|
|
||||||
const execAsync = promisify(exec);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Recursively discover all files in a directory
|
* Recursively discover all files in a directory
|
||||||
|
|
@ -16,27 +13,23 @@ const execAsync = promisify(exec);
|
||||||
*/
|
*/
|
||||||
async function discoverFiles(rootDir) {
|
async function discoverFiles(rootDir) {
|
||||||
try {
|
try {
|
||||||
|
const gitignorePath = path.join(rootDir, '.gitignore');
|
||||||
|
const gitignorePatterns = await parseGitignore(gitignorePath);
|
||||||
|
|
||||||
|
const combinedIgnores = [
|
||||||
|
...gitignorePatterns,
|
||||||
|
'.git/**',
|
||||||
|
'flattened-codebase.xml',
|
||||||
|
'repomix-output.xml'
|
||||||
|
];
|
||||||
|
|
||||||
// Use glob to recursively find all files, excluding common ignore patterns
|
// Use glob to recursively find all files, excluding common ignore patterns
|
||||||
const files = await glob('**/*', {
|
const files = await glob('**/*', {
|
||||||
cwd: rootDir,
|
cwd: rootDir,
|
||||||
nodir: true, // Only files, not directories
|
nodir: true, // Only files, not directories
|
||||||
dot: true, // Include hidden files
|
dot: true, // Include hidden files
|
||||||
follow: false, // Don't follow symbolic links
|
follow: false, // Don't follow symbolic links
|
||||||
ignore: [
|
ignore: combinedIgnores
|
||||||
// Standard ignore patterns
|
|
||||||
'node_modules/**',
|
|
||||||
'.git/**',
|
|
||||||
'build/**',
|
|
||||||
'dist/**',
|
|
||||||
'.next/**',
|
|
||||||
'coverage/**',
|
|
||||||
'.nyc_output/**',
|
|
||||||
'tmp/**',
|
|
||||||
'temp/**',
|
|
||||||
'.gitignore',
|
|
||||||
'.gitattributes',
|
|
||||||
'.gitmodules'
|
|
||||||
]
|
|
||||||
});
|
});
|
||||||
|
|
||||||
return files.map(file => path.resolve(rootDir, file));
|
return files.map(file => path.resolve(rootDir, file));
|
||||||
|
|
@ -192,7 +185,7 @@ async function aggregateFileContents(files, rootDir, spinner = null) {
|
||||||
* @param {string} projectRoot - The project root directory
|
* @param {string} projectRoot - The project root directory
|
||||||
* @returns {string} XML content
|
* @returns {string} XML content
|
||||||
*/
|
*/
|
||||||
function generateXMLOutput(aggregatedContent, projectRoot) {
|
function generateXMLOutput(aggregatedContent) {
|
||||||
const { textFiles } = aggregatedContent;
|
const { textFiles } = aggregatedContent;
|
||||||
|
|
||||||
let xml = `<?xml version="1.0" encoding="UTF-8"?>
|
let xml = `<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
|
@ -204,21 +197,36 @@ function generateXMLOutput(aggregatedContent, projectRoot) {
|
||||||
for (const file of textFiles) {
|
for (const file of textFiles) {
|
||||||
xml += ` <file path="${escapeXml(file.path)}">`;
|
xml += ` <file path="${escapeXml(file.path)}">`;
|
||||||
|
|
||||||
// Use CDATA for code content to preserve formatting and handle special characters
|
// Use CDATA for code content, handling CDATA end sequences properly
|
||||||
if (file.content.trim()) {
|
if (file.content?.trim()) {
|
||||||
xml += `<![CDATA[${file.content}]]>`;
|
const indentedContent = indentFileContent(file.content);
|
||||||
|
if (file.content.includes(']]>')) {
|
||||||
|
// If content contains ]]>, split it and wrap each part in CDATA
|
||||||
|
xml += splitAndWrapCDATA(indentedContent);
|
||||||
|
} else {
|
||||||
|
xml += `<![CDATA[
|
||||||
|
${indentedContent}
|
||||||
|
]]>`;
|
||||||
|
}
|
||||||
|
} else if (file.content) {
|
||||||
|
// Handle empty or whitespace-only content
|
||||||
|
const indentedContent = indentFileContent(file.content);
|
||||||
|
xml += `<![CDATA[
|
||||||
|
${indentedContent}
|
||||||
|
]]>`;
|
||||||
}
|
}
|
||||||
|
|
||||||
xml += `</file>
|
xml += `</file>
|
||||||
`;
|
`;
|
||||||
}
|
}
|
||||||
|
|
||||||
xml += `</files>`;
|
xml += `</files>
|
||||||
|
`;
|
||||||
return xml;
|
return xml;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Escape XML special characters
|
* Escape XML special characters for attributes
|
||||||
* @param {string} str - String to escape
|
* @param {string} str - String to escape
|
||||||
* @returns {string} Escaped string
|
* @returns {string} Escaped string
|
||||||
*/
|
*/
|
||||||
|
|
@ -234,6 +242,37 @@ function escapeXml(str) {
|
||||||
.replace(/'/g, ''');
|
.replace(/'/g, ''');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Indent file content with 4 spaces for each line
|
||||||
|
* @param {string} content - Content to indent
|
||||||
|
* @returns {string} Indented content
|
||||||
|
*/
|
||||||
|
function indentFileContent(content) {
|
||||||
|
if (typeof content !== 'string') {
|
||||||
|
return String(content);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Split content into lines and add 4 spaces of indentation to each line
|
||||||
|
return content.split('\n').map(line => ` ${line}`).join('\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Split content containing ]]> and wrap each part in CDATA
|
||||||
|
* @param {string} content - Content to process
|
||||||
|
* @returns {string} Content with properly wrapped CDATA sections
|
||||||
|
*/
|
||||||
|
function splitAndWrapCDATA(content) {
|
||||||
|
if (typeof content !== 'string') {
|
||||||
|
return String(content);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Replace ]]> with ]]]]><![CDATA[> to escape it within CDATA
|
||||||
|
const escapedContent = content.replace(/]]>/g, ']]]]><![CDATA[>');
|
||||||
|
return `<![CDATA[
|
||||||
|
${escapedContent}
|
||||||
|
]]>`;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Calculate statistics for the processed files
|
* Calculate statistics for the processed files
|
||||||
* @param {Object} aggregatedContent - The aggregated content object
|
* @param {Object} aggregatedContent - The aggregated content object
|
||||||
|
|
@ -365,7 +404,7 @@ program
|
||||||
|
|
||||||
// Generate XML output
|
// Generate XML output
|
||||||
const xmlSpinner = ora('🔧 Generating XML output...').start();
|
const xmlSpinner = ora('🔧 Generating XML output...').start();
|
||||||
const xmlOutput = generateXMLOutput(aggregatedContent, process.cwd());
|
const xmlOutput = generateXMLOutput(aggregatedContent);
|
||||||
await fs.writeFile(options.output, xmlOutput);
|
await fs.writeFile(options.output, xmlOutput);
|
||||||
xmlSpinner.succeed('📝 XML generation completed');
|
xmlSpinner.succeed('📝 XML generation completed');
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue