401 lines
11 KiB
JavaScript
401 lines
11 KiB
JavaScript
#!/usr/bin/env node
|
||
|
||
/**
|
||
* Automated Token Counter for BMAD Context Management
|
||
* Estimates token count for files and provides real-time context budget tracking
|
||
*
|
||
* Usage:
|
||
* node token-counter.js <file-or-directory>
|
||
* node token-counter.js --watch <directory>
|
||
* node token-counter.js --budget 5000 <files...>
|
||
*/
|
||
|
||
const fs = require('fs');
|
||
const path = require('path');
|
||
|
||
// Token estimation constants (based on GPT/Claude tokenization patterns)
|
||
const TOKENS_PER_WORD = 1.3;
|
||
const TOKENS_PER_CODE_LINE = 20;
|
||
const TOKENS_PER_MARKDOWN_LINE = 15;
|
||
|
||
class TokenCounter {
|
||
constructor(options = {}) {
|
||
this.budget = options.budget || null;
|
||
this.verbose = options.verbose || false;
|
||
this.watch = options.watch || false;
|
||
this.totalTokens = 0;
|
||
this.fileStats = [];
|
||
}
|
||
|
||
/**
|
||
* Estimate tokens for text content
|
||
*/
|
||
estimateTokens(content, filename = '') {
|
||
const ext = path.extname(filename).toLowerCase();
|
||
|
||
// Different estimation strategies based on file type
|
||
if (['.js', '.ts', '.jsx', '.tsx', '.py', '.java'].includes(ext)) {
|
||
return this.estimateCodeTokens(content);
|
||
} else if (['.md', '.mdx'].includes(ext)) {
|
||
return this.estimateMarkdownTokens(content);
|
||
} else if (['.json', '.yaml', '.yml'].includes(ext)) {
|
||
return this.estimateStructuredTokens(content);
|
||
} else {
|
||
return this.estimateTextTokens(content);
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Estimate tokens for plain text
|
||
*/
|
||
estimateTextTokens(content) {
|
||
// Count words (split by whitespace and punctuation)
|
||
const words = content.match(/\b\w+\b/g) || [];
|
||
const wordTokens = words.length * TOKENS_PER_WORD;
|
||
|
||
// Add overhead for punctuation and formatting
|
||
const overhead = content.length * 0.05;
|
||
|
||
return Math.ceil(wordTokens + overhead);
|
||
}
|
||
|
||
/**
|
||
* Estimate tokens for code files
|
||
*/
|
||
estimateCodeTokens(content) {
|
||
const lines = content.split('\n');
|
||
let tokens = 0;
|
||
|
||
for (const line of lines) {
|
||
const trimmed = line.trim();
|
||
|
||
// Skip empty lines and single-char lines
|
||
if (trimmed.length <= 1) {
|
||
tokens += 1;
|
||
}
|
||
// Comments get fewer tokens
|
||
else if (trimmed.startsWith('//') || trimmed.startsWith('#')) {
|
||
tokens += trimmed.length * 0.3;
|
||
}
|
||
// Import statements are compact
|
||
else if (trimmed.startsWith('import') || trimmed.startsWith('require')) {
|
||
tokens += 10;
|
||
}
|
||
// Regular code lines
|
||
else {
|
||
tokens += TOKENS_PER_CODE_LINE;
|
||
}
|
||
}
|
||
|
||
return Math.ceil(tokens);
|
||
}
|
||
|
||
/**
|
||
* Estimate tokens for Markdown files
|
||
*/
|
||
estimateMarkdownTokens(content) {
|
||
const lines = content.split('\n');
|
||
let tokens = 0;
|
||
let inCodeBlock = false;
|
||
|
||
for (const line of lines) {
|
||
// Code block detection
|
||
if (line.trim().startsWith('```')) {
|
||
inCodeBlock = !inCodeBlock;
|
||
tokens += 3;
|
||
continue;
|
||
}
|
||
|
||
if (inCodeBlock) {
|
||
tokens += TOKENS_PER_CODE_LINE;
|
||
} else {
|
||
// Headers are more compact
|
||
if (line.match(/^#+\s/)) {
|
||
tokens += line.length * 0.8;
|
||
}
|
||
// Lists are slightly compact
|
||
else if (line.match(/^[\*\-\+]\s/) || line.match(/^\d+\.\s/)) {
|
||
tokens += line.length * 0.9;
|
||
}
|
||
// Regular markdown text
|
||
else {
|
||
tokens += TOKENS_PER_MARKDOWN_LINE;
|
||
}
|
||
}
|
||
}
|
||
|
||
return Math.ceil(tokens);
|
||
}
|
||
|
||
/**
|
||
* Estimate tokens for structured data (JSON/YAML)
|
||
*/
|
||
estimateStructuredTokens(content) {
|
||
// Structured data is more compact than plain text
|
||
const chars = content.length;
|
||
const lines = content.split('\n').length;
|
||
|
||
// Estimate based on character count and structure
|
||
const charTokens = chars * 0.2;
|
||
const lineTokens = lines * 3;
|
||
|
||
return Math.ceil(Math.max(charTokens, lineTokens));
|
||
}
|
||
|
||
/**
|
||
* Count tokens in a file
|
||
*/
|
||
async countFile(filepath) {
|
||
try {
|
||
const content = fs.readFileSync(filepath, 'utf-8');
|
||
const tokens = this.estimateTokens(content, filepath);
|
||
|
||
const stats = {
|
||
file: path.basename(filepath),
|
||
path: filepath,
|
||
size: content.length,
|
||
lines: content.split('\n').length,
|
||
tokens: tokens,
|
||
tokensPerLine: Math.round(tokens / content.split('\n').length)
|
||
};
|
||
|
||
this.fileStats.push(stats);
|
||
this.totalTokens += tokens;
|
||
|
||
return stats;
|
||
} catch (error) {
|
||
console.error(`Error reading ${filepath}:`, error.message);
|
||
return null;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Count tokens in directory recursively
|
||
*/
|
||
async countDirectory(dirpath, pattern = '*') {
|
||
const files = this.getFiles(dirpath, pattern);
|
||
|
||
for (const file of files) {
|
||
await this.countFile(file);
|
||
}
|
||
|
||
return this.fileStats;
|
||
}
|
||
|
||
/**
|
||
* Get all files matching pattern
|
||
*/
|
||
getFiles(dirpath, pattern = '*') {
|
||
const files = [];
|
||
|
||
function walk(dir) {
|
||
const items = fs.readdirSync(dir);
|
||
|
||
for (const item of items) {
|
||
const fullPath = path.join(dir, item);
|
||
const stat = fs.statSync(fullPath);
|
||
|
||
if (stat.isDirectory() && !item.startsWith('.') && item !== 'node_modules') {
|
||
walk(fullPath);
|
||
} else if (stat.isFile()) {
|
||
// Simple pattern matching
|
||
if (pattern === '*' || fullPath.includes(pattern)) {
|
||
files.push(fullPath);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
walk(dirpath);
|
||
return files;
|
||
}
|
||
|
||
/**
|
||
* Generate budget status with color codes
|
||
*/
|
||
getBudgetStatus() {
|
||
if (!this.budget) return '';
|
||
|
||
const percentage = (this.totalTokens / this.budget) * 100;
|
||
let status, color, emoji;
|
||
|
||
if (percentage < 50) {
|
||
status = 'GREEN';
|
||
color = '\x1b[32m'; // Green
|
||
emoji = '🟢';
|
||
} else if (percentage < 75) {
|
||
status = 'YELLOW';
|
||
color = '\x1b[33m'; // Yellow
|
||
emoji = '🟡';
|
||
} else if (percentage < 90) {
|
||
status = 'ORANGE';
|
||
color = '\x1b[38;5;208m'; // Orange
|
||
emoji = '🟠';
|
||
} else {
|
||
status = 'RED';
|
||
color = '\x1b[31m'; // Red
|
||
emoji = '🔴';
|
||
}
|
||
|
||
const reset = '\x1b[0m';
|
||
return `${color}${emoji} ${status} (${percentage.toFixed(1)}% of budget)${reset}`;
|
||
}
|
||
|
||
/**
|
||
* Print report
|
||
*/
|
||
printReport() {
|
||
console.log('\n' + '='.repeat(80));
|
||
console.log('TOKEN COUNT REPORT');
|
||
console.log('='.repeat(80));
|
||
|
||
if (this.verbose) {
|
||
console.log('\nDETAILED FILE ANALYSIS:');
|
||
console.log('-'.repeat(80));
|
||
|
||
// Sort by token count
|
||
const sorted = [...this.fileStats].sort((a, b) => b.tokens - a.tokens);
|
||
|
||
for (const stat of sorted) {
|
||
console.log(`\n📄 ${stat.file}`);
|
||
console.log(` Path: ${stat.path}`);
|
||
console.log(` Lines: ${stat.lines} | Size: ${stat.size} bytes`);
|
||
console.log(` Tokens: ~${stat.tokens} (${stat.tokensPerLine} per line avg)`);
|
||
}
|
||
} else {
|
||
console.log('\nTOP 10 FILES BY TOKEN COUNT:');
|
||
console.log('-'.repeat(80));
|
||
|
||
const top10 = [...this.fileStats]
|
||
.sort((a, b) => b.tokens - a.tokens)
|
||
.slice(0, 10);
|
||
|
||
for (const stat of top10) {
|
||
const bar = '█'.repeat(Math.floor(stat.tokens / 100));
|
||
console.log(`${stat.file.padEnd(40)} ${String(stat.tokens).padStart(6)} tokens ${bar}`);
|
||
}
|
||
}
|
||
|
||
console.log('\n' + '='.repeat(80));
|
||
console.log('SUMMARY:');
|
||
console.log('-'.repeat(80));
|
||
console.log(`Total Files: ${this.fileStats.length}`);
|
||
console.log(`Total Tokens: ~${this.totalTokens}`);
|
||
|
||
if (this.budget) {
|
||
console.log(`Token Budget: ${this.budget}`);
|
||
console.log(`Budget Status: ${this.getBudgetStatus()}`);
|
||
console.log(`Remaining: ${Math.max(0, this.budget - this.totalTokens)} tokens`);
|
||
}
|
||
|
||
console.log('\nRECOMMENDATIONS:');
|
||
if (this.totalTokens > 5000) {
|
||
console.log('⚠️ Consider creating checkpoints to compress context');
|
||
}
|
||
if (this.fileStats.some(f => f.tokens > 1500)) {
|
||
console.log('⚠️ Some files exceed 1500 tokens - consider splitting or section loading');
|
||
}
|
||
|
||
console.log('='.repeat(80));
|
||
}
|
||
|
||
/**
|
||
* Watch mode - monitor directory for changes
|
||
*/
|
||
watchDirectory(dirpath) {
|
||
console.log(`Watching ${dirpath} for changes...`);
|
||
|
||
const recalculate = () => {
|
||
this.totalTokens = 0;
|
||
this.fileStats = [];
|
||
this.countDirectory(dirpath).then(() => {
|
||
console.clear();
|
||
this.printReport();
|
||
console.log('\n👁️ Watching for changes... (Ctrl+C to exit)');
|
||
});
|
||
};
|
||
|
||
recalculate();
|
||
|
||
fs.watch(dirpath, { recursive: true }, (eventType, filename) => {
|
||
if (filename && !filename.includes('node_modules')) {
|
||
console.log(`Change detected in ${filename}`);
|
||
setTimeout(recalculate, 100);
|
||
}
|
||
});
|
||
}
|
||
}
|
||
|
||
// CLI Interface
|
||
function main() {
|
||
const args = process.argv.slice(2);
|
||
|
||
if (args.length === 0 || args.includes('--help')) {
|
||
console.log(`
|
||
Token Counter - Estimate token usage for BMAD context management
|
||
|
||
Usage:
|
||
node token-counter.js <file-or-directory> Count tokens
|
||
node token-counter.js --budget <N> <files...> Count with budget
|
||
node token-counter.js --watch <directory> Watch mode
|
||
node token-counter.js --verbose <file/dir> Detailed output
|
||
|
||
Examples:
|
||
node token-counter.js agents/ Count all agent files
|
||
node token-counter.js --budget 5000 data/ Check data files against budget
|
||
node token-counter.js --watch . Monitor current directory
|
||
|
||
Token Estimation:
|
||
- Plain text: ~1.3 tokens per word
|
||
- Code: ~20 tokens per line
|
||
- Markdown: ~15 tokens per line
|
||
- JSON/YAML: ~0.2 tokens per character
|
||
`);
|
||
return;
|
||
}
|
||
|
||
const options = {
|
||
budget: null,
|
||
verbose: args.includes('--verbose'),
|
||
watch: args.includes('--watch')
|
||
};
|
||
|
||
// Parse budget
|
||
const budgetIndex = args.indexOf('--budget');
|
||
if (budgetIndex !== -1 && args[budgetIndex + 1]) {
|
||
options.budget = parseInt(args[budgetIndex + 1]);
|
||
args.splice(budgetIndex, 2);
|
||
}
|
||
|
||
// Remove flags
|
||
const paths = args.filter(arg => !arg.startsWith('--'));
|
||
|
||
const counter = new TokenCounter(options);
|
||
|
||
// Process paths
|
||
for (const p of paths) {
|
||
const fullPath = path.resolve(p);
|
||
|
||
if (fs.existsSync(fullPath)) {
|
||
const stat = fs.statSync(fullPath);
|
||
|
||
if (options.watch && stat.isDirectory()) {
|
||
counter.watchDirectory(fullPath);
|
||
return; // Watch mode runs indefinitely
|
||
} else if (stat.isDirectory()) {
|
||
counter.countDirectory(fullPath).then(() => counter.printReport());
|
||
} else {
|
||
counter.countFile(fullPath).then(() => counter.printReport());
|
||
}
|
||
} else {
|
||
console.error(`Path not found: ${p}`);
|
||
}
|
||
}
|
||
}
|
||
|
||
// Run if executed directly
|
||
if (require.main === module) {
|
||
main();
|
||
}
|
||
|
||
module.exports = TokenCounter; |