BMAD-METHOD/expansion-packs/bmad-javascript-fullstack/tools/token-counter.js

401 lines
11 KiB
JavaScript
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env node
/**
* Automated Token Counter for BMAD Context Management
* Estimates token count for files and provides real-time context budget tracking
*
* Usage:
* node token-counter.js <file-or-directory>
* node token-counter.js --watch <directory>
* node token-counter.js --budget 5000 <files...>
*/
const fs = require('fs');
const path = require('path');
// Token estimation constants (based on GPT/Claude tokenization patterns)
const TOKENS_PER_WORD = 1.3;
const TOKENS_PER_CODE_LINE = 20;
const TOKENS_PER_MARKDOWN_LINE = 15;
class TokenCounter {
constructor(options = {}) {
this.budget = options.budget || null;
this.verbose = options.verbose || false;
this.watch = options.watch || false;
this.totalTokens = 0;
this.fileStats = [];
}
/**
* Estimate tokens for text content
*/
estimateTokens(content, filename = '') {
const ext = path.extname(filename).toLowerCase();
// Different estimation strategies based on file type
if (['.js', '.ts', '.jsx', '.tsx', '.py', '.java'].includes(ext)) {
return this.estimateCodeTokens(content);
} else if (['.md', '.mdx'].includes(ext)) {
return this.estimateMarkdownTokens(content);
} else if (['.json', '.yaml', '.yml'].includes(ext)) {
return this.estimateStructuredTokens(content);
} else {
return this.estimateTextTokens(content);
}
}
/**
* Estimate tokens for plain text
*/
estimateTextTokens(content) {
// Count words (split by whitespace and punctuation)
const words = content.match(/\b\w+\b/g) || [];
const wordTokens = words.length * TOKENS_PER_WORD;
// Add overhead for punctuation and formatting
const overhead = content.length * 0.05;
return Math.ceil(wordTokens + overhead);
}
/**
* Estimate tokens for code files
*/
estimateCodeTokens(content) {
const lines = content.split('\n');
let tokens = 0;
for (const line of lines) {
const trimmed = line.trim();
// Skip empty lines and single-char lines
if (trimmed.length <= 1) {
tokens += 1;
}
// Comments get fewer tokens
else if (trimmed.startsWith('//') || trimmed.startsWith('#')) {
tokens += trimmed.length * 0.3;
}
// Import statements are compact
else if (trimmed.startsWith('import') || trimmed.startsWith('require')) {
tokens += 10;
}
// Regular code lines
else {
tokens += TOKENS_PER_CODE_LINE;
}
}
return Math.ceil(tokens);
}
/**
* Estimate tokens for Markdown files
*/
estimateMarkdownTokens(content) {
const lines = content.split('\n');
let tokens = 0;
let inCodeBlock = false;
for (const line of lines) {
// Code block detection
if (line.trim().startsWith('```')) {
inCodeBlock = !inCodeBlock;
tokens += 3;
continue;
}
if (inCodeBlock) {
tokens += TOKENS_PER_CODE_LINE;
} else {
// Headers are more compact
if (line.match(/^#+\s/)) {
tokens += line.length * 0.8;
}
// Lists are slightly compact
else if (line.match(/^[\*\-\+]\s/) || line.match(/^\d+\.\s/)) {
tokens += line.length * 0.9;
}
// Regular markdown text
else {
tokens += TOKENS_PER_MARKDOWN_LINE;
}
}
}
return Math.ceil(tokens);
}
/**
* Estimate tokens for structured data (JSON/YAML)
*/
estimateStructuredTokens(content) {
// Structured data is more compact than plain text
const chars = content.length;
const lines = content.split('\n').length;
// Estimate based on character count and structure
const charTokens = chars * 0.2;
const lineTokens = lines * 3;
return Math.ceil(Math.max(charTokens, lineTokens));
}
/**
* Count tokens in a file
*/
async countFile(filepath) {
try {
const content = fs.readFileSync(filepath, 'utf-8');
const tokens = this.estimateTokens(content, filepath);
const stats = {
file: path.basename(filepath),
path: filepath,
size: content.length,
lines: content.split('\n').length,
tokens: tokens,
tokensPerLine: Math.round(tokens / content.split('\n').length)
};
this.fileStats.push(stats);
this.totalTokens += tokens;
return stats;
} catch (error) {
console.error(`Error reading ${filepath}:`, error.message);
return null;
}
}
/**
* Count tokens in directory recursively
*/
async countDirectory(dirpath, pattern = '*') {
const files = this.getFiles(dirpath, pattern);
for (const file of files) {
await this.countFile(file);
}
return this.fileStats;
}
/**
* Get all files matching pattern
*/
getFiles(dirpath, pattern = '*') {
const files = [];
function walk(dir) {
const items = fs.readdirSync(dir);
for (const item of items) {
const fullPath = path.join(dir, item);
const stat = fs.statSync(fullPath);
if (stat.isDirectory() && !item.startsWith('.') && item !== 'node_modules') {
walk(fullPath);
} else if (stat.isFile()) {
// Simple pattern matching
if (pattern === '*' || fullPath.includes(pattern)) {
files.push(fullPath);
}
}
}
}
walk(dirpath);
return files;
}
/**
* Generate budget status with color codes
*/
getBudgetStatus() {
if (!this.budget) return '';
const percentage = (this.totalTokens / this.budget) * 100;
let status, color, emoji;
if (percentage < 50) {
status = 'GREEN';
color = '\x1b[32m'; // Green
emoji = '🟢';
} else if (percentage < 75) {
status = 'YELLOW';
color = '\x1b[33m'; // Yellow
emoji = '🟡';
} else if (percentage < 90) {
status = 'ORANGE';
color = '\x1b[38;5;208m'; // Orange
emoji = '🟠';
} else {
status = 'RED';
color = '\x1b[31m'; // Red
emoji = '🔴';
}
const reset = '\x1b[0m';
return `${color}${emoji} ${status} (${percentage.toFixed(1)}% of budget)${reset}`;
}
/**
* Print report
*/
printReport() {
console.log('\n' + '='.repeat(80));
console.log('TOKEN COUNT REPORT');
console.log('='.repeat(80));
if (this.verbose) {
console.log('\nDETAILED FILE ANALYSIS:');
console.log('-'.repeat(80));
// Sort by token count
const sorted = [...this.fileStats].sort((a, b) => b.tokens - a.tokens);
for (const stat of sorted) {
console.log(`\n📄 ${stat.file}`);
console.log(` Path: ${stat.path}`);
console.log(` Lines: ${stat.lines} | Size: ${stat.size} bytes`);
console.log(` Tokens: ~${stat.tokens} (${stat.tokensPerLine} per line avg)`);
}
} else {
console.log('\nTOP 10 FILES BY TOKEN COUNT:');
console.log('-'.repeat(80));
const top10 = [...this.fileStats]
.sort((a, b) => b.tokens - a.tokens)
.slice(0, 10);
for (const stat of top10) {
const bar = '█'.repeat(Math.floor(stat.tokens / 100));
console.log(`${stat.file.padEnd(40)} ${String(stat.tokens).padStart(6)} tokens ${bar}`);
}
}
console.log('\n' + '='.repeat(80));
console.log('SUMMARY:');
console.log('-'.repeat(80));
console.log(`Total Files: ${this.fileStats.length}`);
console.log(`Total Tokens: ~${this.totalTokens}`);
if (this.budget) {
console.log(`Token Budget: ${this.budget}`);
console.log(`Budget Status: ${this.getBudgetStatus()}`);
console.log(`Remaining: ${Math.max(0, this.budget - this.totalTokens)} tokens`);
}
console.log('\nRECOMMENDATIONS:');
if (this.totalTokens > 5000) {
console.log('⚠️ Consider creating checkpoints to compress context');
}
if (this.fileStats.some(f => f.tokens > 1500)) {
console.log('⚠️ Some files exceed 1500 tokens - consider splitting or section loading');
}
console.log('='.repeat(80));
}
/**
* Watch mode - monitor directory for changes
*/
watchDirectory(dirpath) {
console.log(`Watching ${dirpath} for changes...`);
const recalculate = () => {
this.totalTokens = 0;
this.fileStats = [];
this.countDirectory(dirpath).then(() => {
console.clear();
this.printReport();
console.log('\n👁 Watching for changes... (Ctrl+C to exit)');
});
};
recalculate();
fs.watch(dirpath, { recursive: true }, (eventType, filename) => {
if (filename && !filename.includes('node_modules')) {
console.log(`Change detected in ${filename}`);
setTimeout(recalculate, 100);
}
});
}
}
// CLI Interface
function main() {
const args = process.argv.slice(2);
if (args.length === 0 || args.includes('--help')) {
console.log(`
Token Counter - Estimate token usage for BMAD context management
Usage:
node token-counter.js <file-or-directory> Count tokens
node token-counter.js --budget <N> <files...> Count with budget
node token-counter.js --watch <directory> Watch mode
node token-counter.js --verbose <file/dir> Detailed output
Examples:
node token-counter.js agents/ Count all agent files
node token-counter.js --budget 5000 data/ Check data files against budget
node token-counter.js --watch . Monitor current directory
Token Estimation:
- Plain text: ~1.3 tokens per word
- Code: ~20 tokens per line
- Markdown: ~15 tokens per line
- JSON/YAML: ~0.2 tokens per character
`);
return;
}
const options = {
budget: null,
verbose: args.includes('--verbose'),
watch: args.includes('--watch')
};
// Parse budget
const budgetIndex = args.indexOf('--budget');
if (budgetIndex !== -1 && args[budgetIndex + 1]) {
options.budget = parseInt(args[budgetIndex + 1]);
args.splice(budgetIndex, 2);
}
// Remove flags
const paths = args.filter(arg => !arg.startsWith('--'));
const counter = new TokenCounter(options);
// Process paths
for (const p of paths) {
const fullPath = path.resolve(p);
if (fs.existsSync(fullPath)) {
const stat = fs.statSync(fullPath);
if (options.watch && stat.isDirectory()) {
counter.watchDirectory(fullPath);
return; // Watch mode runs indefinitely
} else if (stat.isDirectory()) {
counter.countDirectory(fullPath).then(() => counter.printReport());
} else {
counter.countFile(fullPath).then(() => counter.printReport());
}
} else {
console.error(`Path not found: ${p}`);
}
}
}
// Run if executed directly
if (require.main === module) {
main();
}
module.exports = TokenCounter;