perf(flattener): improve memory efficiency by streaming xml output
- Replace in-memory XML generation with streaming approach - Add comprehensive common ignore patterns list - Update statistics calculation to use file size instead of content length
This commit is contained in:
parent
3d27ebdf18
commit
855960318c
|
|
@ -210,10 +210,12 @@ The PO ensures:
|
|||
docs/brownfield-architecture.md
|
||||
2. Shard your docs:
|
||||
In your IDE
|
||||
|
||||
```bash
|
||||
@po
|
||||
shard docs/brownfield-prd.md
|
||||
```
|
||||
|
||||
```bash
|
||||
@po
|
||||
shard docs/brownfield-architecture.md
|
||||
|
|
|
|||
|
|
@ -16,12 +16,118 @@ async function discoverFiles(rootDir) {
|
|||
const gitignorePath = path.join(rootDir, '.gitignore');
|
||||
const gitignorePatterns = await parseGitignore(gitignorePath);
|
||||
|
||||
// Common gitignore patterns that should always be ignored
|
||||
const commonIgnorePatterns = [
|
||||
// Version control
|
||||
'.git/**',
|
||||
'.svn/**',
|
||||
'.hg/**',
|
||||
'.bzr/**',
|
||||
|
||||
// Dependencies
|
||||
'node_modules/**',
|
||||
'bower_components/**',
|
||||
'vendor/**',
|
||||
'packages/**',
|
||||
|
||||
// Build outputs
|
||||
'build/**',
|
||||
'dist/**',
|
||||
'out/**',
|
||||
'target/**',
|
||||
'bin/**',
|
||||
'obj/**',
|
||||
'release/**',
|
||||
'debug/**',
|
||||
|
||||
// Environment and config
|
||||
'.env',
|
||||
'.env.*',
|
||||
'*.env',
|
||||
'.config',
|
||||
|
||||
// Logs
|
||||
'logs/**',
|
||||
'*.log',
|
||||
'npm-debug.log*',
|
||||
'yarn-debug.log*',
|
||||
'yarn-error.log*',
|
||||
'lerna-debug.log*',
|
||||
|
||||
// Coverage and testing
|
||||
'coverage/**',
|
||||
'.nyc_output/**',
|
||||
'.coverage/**',
|
||||
'test-results/**',
|
||||
'junit.xml',
|
||||
|
||||
// Cache directories
|
||||
'.cache/**',
|
||||
'.tmp/**',
|
||||
'.temp/**',
|
||||
'tmp/**',
|
||||
'temp/**',
|
||||
'.sass-cache/**',
|
||||
'.eslintcache',
|
||||
'.stylelintcache',
|
||||
|
||||
// OS generated files
|
||||
'.DS_Store',
|
||||
'.DS_Store?',
|
||||
'._*',
|
||||
'.Spotlight-V100',
|
||||
'.Trashes',
|
||||
'ehthumbs.db',
|
||||
'Thumbs.db',
|
||||
'desktop.ini',
|
||||
|
||||
// IDE and editor files
|
||||
'.vscode/**',
|
||||
'.idea/**',
|
||||
'*.swp',
|
||||
'*.swo',
|
||||
'*~',
|
||||
'.project',
|
||||
'.classpath',
|
||||
'.settings/**',
|
||||
'*.sublime-project',
|
||||
'*.sublime-workspace',
|
||||
|
||||
// Package manager files
|
||||
'package-lock.json',
|
||||
'yarn.lock',
|
||||
'pnpm-lock.yaml',
|
||||
'composer.lock',
|
||||
'Pipfile.lock',
|
||||
|
||||
// Runtime and compiled files
|
||||
'*.pyc',
|
||||
'*.pyo',
|
||||
'*.pyd',
|
||||
'__pycache__/**',
|
||||
'*.class',
|
||||
'*.jar',
|
||||
'*.war',
|
||||
'*.ear',
|
||||
'*.o',
|
||||
'*.so',
|
||||
'*.dll',
|
||||
'*.exe',
|
||||
|
||||
// Documentation build
|
||||
'_site/**',
|
||||
'.jekyll-cache/**',
|
||||
'.jekyll-metadata',
|
||||
|
||||
// Flattener specific outputs
|
||||
'flattened-codebase.xml',
|
||||
'repomix-output.xml'
|
||||
];
|
||||
|
||||
const combinedIgnores = [
|
||||
...gitignorePatterns,
|
||||
'.git/**',
|
||||
'flattened-codebase.xml',
|
||||
'repomix-output.xml'
|
||||
];
|
||||
...gitignorePatterns,
|
||||
...commonIgnorePatterns
|
||||
];
|
||||
|
||||
// Use glob to recursively find all files, excluding common ignore patterns
|
||||
const files = await glob('**/*', {
|
||||
|
|
@ -180,49 +286,67 @@ async function aggregateFileContents(files, rootDir, spinner = null) {
|
|||
}
|
||||
|
||||
/**
|
||||
* Generate XML output with aggregated file contents
|
||||
* Generate XML output with aggregated file contents using streaming
|
||||
* @param {Object} aggregatedContent - The aggregated content object
|
||||
* @param {string} projectRoot - The project root directory
|
||||
* @returns {string} XML content
|
||||
* @param {string} outputPath - The output file path
|
||||
* @returns {Promise<void>} Promise that resolves when writing is complete
|
||||
*/
|
||||
function generateXMLOutput(aggregatedContent) {
|
||||
async function generateXMLOutput(aggregatedContent, outputPath) {
|
||||
const { textFiles } = aggregatedContent;
|
||||
|
||||
let xml = `<?xml version="1.0" encoding="UTF-8"?>
|
||||
`;
|
||||
xml += `<files>
|
||||
`;
|
||||
// Create write stream for efficient memory usage
|
||||
const writeStream = fs.createWriteStream(outputPath, { encoding: 'utf8' });
|
||||
|
||||
// Add text files with content (only text files as per story requirements)
|
||||
for (const file of textFiles) {
|
||||
xml += ` <file path="${escapeXml(file.path)}">`;
|
||||
return new Promise((resolve, reject) => {
|
||||
writeStream.on('error', reject);
|
||||
writeStream.on('finish', resolve);
|
||||
|
||||
// Use CDATA for code content, handling CDATA end sequences properly
|
||||
if (file.content?.trim()) {
|
||||
const indentedContent = indentFileContent(file.content);
|
||||
if (file.content.includes(']]>')) {
|
||||
// If content contains ]]>, split it and wrap each part in CDATA
|
||||
xml += splitAndWrapCDATA(indentedContent);
|
||||
} else {
|
||||
xml += `<![CDATA[
|
||||
${indentedContent}
|
||||
]]>`;
|
||||
// Write XML header
|
||||
writeStream.write('<?xml version="1.0" encoding="UTF-8"?>\n');
|
||||
writeStream.write('<files>\n');
|
||||
|
||||
// Process files one by one to minimize memory usage
|
||||
let fileIndex = 0;
|
||||
|
||||
const writeNextFile = () => {
|
||||
if (fileIndex >= textFiles.length) {
|
||||
// All files processed, close XML and stream
|
||||
writeStream.write('</files>\n');
|
||||
writeStream.end();
|
||||
return;
|
||||
}
|
||||
} else if (file.content) {
|
||||
// Handle empty or whitespace-only content
|
||||
const indentedContent = indentFileContent(file.content);
|
||||
xml += `<![CDATA[
|
||||
${indentedContent}
|
||||
]]>`;
|
||||
}
|
||||
|
||||
xml += `</file>
|
||||
`;
|
||||
}
|
||||
const file = textFiles[fileIndex];
|
||||
fileIndex++;
|
||||
|
||||
xml += `</files>
|
||||
`;
|
||||
return xml;
|
||||
// Write file opening tag
|
||||
writeStream.write(` <file path="${escapeXml(file.path)}">`);
|
||||
|
||||
// Use CDATA for code content, handling CDATA end sequences properly
|
||||
if (file.content?.trim()) {
|
||||
const indentedContent = indentFileContent(file.content);
|
||||
if (file.content.includes(']]>')) {
|
||||
// If content contains ]]>, split it and wrap each part in CDATA
|
||||
writeStream.write(splitAndWrapCDATA(indentedContent));
|
||||
} else {
|
||||
writeStream.write(`<![CDATA[\n${indentedContent}\n ]]>`);
|
||||
}
|
||||
} else if (file.content) {
|
||||
// Handle empty or whitespace-only content
|
||||
const indentedContent = indentFileContent(file.content);
|
||||
writeStream.write(`<![CDATA[\n${indentedContent}\n ]]>`);
|
||||
}
|
||||
|
||||
// Write file closing tag
|
||||
writeStream.write('</file>\n');
|
||||
|
||||
// Continue with next file on next tick to avoid stack overflow
|
||||
setImmediate(writeNextFile);
|
||||
};
|
||||
|
||||
// Start processing files
|
||||
writeNextFile();
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -276,10 +400,10 @@ ${escapedContent}
|
|||
/**
|
||||
* Calculate statistics for the processed files
|
||||
* @param {Object} aggregatedContent - The aggregated content object
|
||||
* @param {string} xmlContent - The generated XML content
|
||||
* @param {number} xmlFileSize - The size of the generated XML file in bytes
|
||||
* @returns {Object} Statistics object
|
||||
*/
|
||||
function calculateStatistics(aggregatedContent, xmlContent) {
|
||||
function calculateStatistics(aggregatedContent, xmlFileSize) {
|
||||
const { textFiles, binaryFiles, errors } = aggregatedContent;
|
||||
|
||||
// Calculate total file size in bytes
|
||||
|
|
@ -291,7 +415,7 @@ function calculateStatistics(aggregatedContent, xmlContent) {
|
|||
const totalLines = textFiles.reduce((sum, file) => sum + file.lines, 0);
|
||||
|
||||
// Estimate token count (rough approximation: 1 token ≈ 4 characters)
|
||||
const estimatedTokens = Math.ceil(xmlContent.length / 4);
|
||||
const estimatedTokens = Math.ceil(xmlFileSize / 4);
|
||||
|
||||
// Format file size
|
||||
const formatSize = (bytes) => {
|
||||
|
|
@ -306,7 +430,7 @@ function calculateStatistics(aggregatedContent, xmlContent) {
|
|||
binaryFiles: binaryFiles.length,
|
||||
errorFiles: errors.length,
|
||||
totalSize: formatSize(totalSize),
|
||||
xmlSize: formatSize(xmlContent.length),
|
||||
xmlSize: formatSize(xmlFileSize),
|
||||
totalLines,
|
||||
estimatedTokens: estimatedTokens.toLocaleString()
|
||||
};
|
||||
|
|
@ -402,14 +526,14 @@ program
|
|||
console.log(`Binary files: ${aggregatedContent.binaryFiles.length}`);
|
||||
}
|
||||
|
||||
// Generate XML output
|
||||
// Generate XML output using streaming
|
||||
const xmlSpinner = ora('🔧 Generating XML output...').start();
|
||||
const xmlOutput = generateXMLOutput(aggregatedContent);
|
||||
await fs.writeFile(options.output, xmlOutput);
|
||||
await generateXMLOutput(aggregatedContent, options.output);
|
||||
xmlSpinner.succeed('📝 XML generation completed');
|
||||
|
||||
// Calculate and display statistics
|
||||
const stats = calculateStatistics(aggregatedContent, xmlOutput);
|
||||
const outputStats = await fs.stat(options.output);
|
||||
const stats = calculateStatistics(aggregatedContent, outputStats.size);
|
||||
|
||||
// Display completion summary
|
||||
console.log('\n📊 Completion Summary:');
|
||||
|
|
|
|||
Loading…
Reference in New Issue