#!/usr/bin/env node const { spawn } = require('child_process'); const path = require('path'); const fs = require('fs').promises; const readline = require('readline'); /** * Interactive test harness for BMAD-METHOD Claude Code integration * Tests Claude Code as a real user would through the TUI */ class ClaudeInteractiveTest { constructor(options = {}) { this.claudePath = options.claudePath || 'claude'; this.testDir = options.testDir || path.join(process.cwd(), 'test-workspace'); this.scenarios = []; this.results = []; this.currentTest = null; } async initialize() { // Create test workspace await fs.mkdir(this.testDir, { recursive: true }); // Create test files for scenarios await this.createTestFiles(); // Load test scenarios await this.loadScenarios(); } async createTestFiles() { // Create sample files for testing const files = { 'requirements.md': `# E-Commerce Platform Requirements - Support 100k concurrent users - Payment processing with PCI compliance - Mobile-responsive design - Real-time inventory tracking`, 'existing-api.yaml': `openapi: 3.0.0 info: title: Legacy API version: 1.0.0 paths: /users: get: summary: Get users (slow, needs optimization)`, 'package.json': `{ "name": "test-project", "version": "1.0.0", "dependencies": { "express": "^4.18.0", "react": "^18.0.0" } }` }; for (const [filename, content] of Object.entries(files)) { await fs.writeFile(path.join(this.testDir, filename), content); } } async loadScenarios() { this.scenarios = [ { name: 'Basic PM Agent Routing', commands: [ 'Create user stories for a login feature with OAuth support', 'bmad-respond: Google, GitHub, and traditional email/password', 'bmad-respond: Yes, with remember me for 30 days', 'bmad-respond: Standard security, 2FA optional' ], expectations: { agentRouting: 'pm', elicitationCount: 3, outputContains: ['As a user', 'login', 'OAuth'], sessionCreated: true } }, { name: 'Multi-Agent Workflow', commands: [ 'Design an e-commerce platform architecture', 'bmad-respond: B2C marketplace', 'bmad-respond: 100k users, $1M GMV/month', 'Now create user stories for the MVP', '/bmad-sessions', '/switch 1' ], expectations: { multipleAgents: ['architect', 'pm'], sessionCount: 2, contextPreserved: ['100k users', 'marketplace'], sessionSwitching: true } }, { name: 'Direct Agent Invocation', commands: [ '/bmad-architect Review the existing-api.yaml and suggest improvements', 'bmad-respond: Yes, we need to support 10x growth', 'Create stories for the optimization work' ], expectations: { directInvocation: true, fileAnalysis: 'existing-api.yaml', agentHandoff: ['architect', 'pm'] } }, { name: 'Concurrent Sessions', commands: [ 'Help me plan a sprint for next week', 'bmad-respond: 5 developers, 2-week sprint', 'In parallel, create a technical spec for the payment service', '/bmad-sessions', 'Continue with the sprint planning', '/switch 2' ], expectations: { concurrentSessions: true, clearAgentIdentification: true, sessionManagement: ['list', 'switch'] } }, { name: 'Error Recovery', commands: [ 'Create a story for', // Incomplete command '/bmad-unknown-command', // Invalid command 'Help me with the user story for login', // Recovery 'bmad-respond: Social login with Google' ], expectations: { errorHandling: true, gracefulRecovery: true, validOutput: true } } ]; } async runScenario(scenario) { console.log(`\n${'='.repeat(60)}`); console.log(`Running: ${scenario.name}`); console.log(`${'='.repeat(60)}\n`); const result = { name: scenario.name, success: true, details: {}, errors: [] }; try { // Start Claude process const claude = spawn(this.claudePath, ['-p', this.testDir], { cwd: this.testDir, env: { ...process.env, BMAD_TEST_MODE: 'true' } }); // Set up output capture let output = ''; let currentAgent = null; let sessionCount = 0; let elicitationCount = 0; claude.stdout.on('data', (data) => { const text = data.toString(); output += text; // Parse output for test validation this.parseOutput(text, result); }); claude.stderr.on('data', (data) => { result.errors.push(data.toString()); }); // Execute commands for (const command of scenario.commands) { await this.delay(1000); // Wait for Claude to be ready console.log(`> ${command}`); claude.stdin.write(command + '\n'); // Wait for response await this.waitForResponse(claude, command); } // Validate expectations await this.validateExpectations(scenario.expectations, result, output); // Clean up claude.kill(); await this.waitForExit(claude); } catch (error) { result.success = false; result.errors.push(error.message); } this.results.push(result); return result; } parseOutput(text, result) { // Detect agent routing const agentMatch = text.match(/(?:Routes? to|Invoking) (\w+) agent/i); if (agentMatch) { result.details.agentRouted = agentMatch[1].toLowerCase(); } // Detect elicitation if (text.includes('bmad-respond:') || text.includes('Question:')) { result.details.elicitationCount = (result.details.elicitationCount || 0) + 1; } // Detect session creation if (text.includes('Session created:') || text.includes('session-')) { result.details.sessionCreated = true; const sessionMatch = text.match(/session-[\w-]+/); if (sessionMatch) { result.details.sessionId = sessionMatch[0]; } } // Detect agent identification const agentIcons = ['๐Ÿ“‹', '๐Ÿ—๏ธ', '๐Ÿ’ป', '๐Ÿ›', '๐ŸŽจ', '๐Ÿƒ', '๐Ÿง™', '๐ŸŽญ']; for (const icon of agentIcons) { if (text.includes(icon)) { result.details.agentIconFound = true; break; } } // Detect errors if (text.includes('Error:') || text.includes('error')) { result.details.errorDetected = true; } } async waitForResponse(claude, command, timeout = 5000) { return new Promise((resolve) => { let responseReceived = false; const startTime = Date.now(); const checkResponse = setInterval(() => { // Check if we got a response or timeout if (responseReceived || Date.now() - startTime > timeout) { clearInterval(checkResponse); resolve(); } }, 100); // Listen for response indicators const listener = (data) => { const text = data.toString(); if (text.includes('>') || text.includes('bmad-respond:') || text.includes('Session')) { responseReceived = true; } }; claude.stdout.on('data', listener); }); } async validateExpectations(expectations, result, output) { for (const [key, expected] of Object.entries(expectations)) { switch (key) { case 'agentRouting': if (result.details.agentRouted !== expected) { result.success = false; result.errors.push(`Expected agent ${expected}, got ${result.details.agentRouted}`); } break; case 'elicitationCount': if (result.details.elicitationCount !== expected) { result.success = false; result.errors.push(`Expected ${expected} elicitations, got ${result.details.elicitationCount}`); } break; case 'outputContains': for (const phrase of expected) { if (!output.includes(phrase)) { result.success = false; result.errors.push(`Output missing expected phrase: ${phrase}`); } } break; case 'sessionCreated': if (!result.details.sessionCreated) { result.success = false; result.errors.push('No session created'); } break; case 'multipleAgents': // Check if multiple agents were invoked for (const agent of expected) { if (!output.toLowerCase().includes(agent)) { result.success = false; result.errors.push(`Agent ${agent} not invoked`); } } break; case 'contextPreserved': for (const context of expected) { if (!output.includes(context)) { result.success = false; result.errors.push(`Context not preserved: ${context}`); } } break; } } } async waitForExit(claude) { return new Promise((resolve) => { claude.on('exit', resolve); setTimeout(resolve, 1000); // Timeout fallback }); } delay(ms) { return new Promise(resolve => setTimeout(resolve, ms)); } async runAllScenarios() { await this.initialize(); console.log('๐Ÿงช BMAD-METHOD Claude Code Interactive Testing'); console.log(`Testing ${this.scenarios.length} scenarios...\n`); for (const scenario of this.scenarios) { await this.runScenario(scenario); } this.generateReport(); } generateReport() { console.log('\n' + '='.repeat(60)); console.log('๐Ÿ“Š Test Results Summary'); console.log('='.repeat(60) + '\n'); const passed = this.results.filter(r => r.success).length; const total = this.results.length; const passRate = (passed / total * 100).toFixed(1); console.log(`Overall: ${passed}/${total} passed (${passRate}%)\n`); for (const result of this.results) { const status = result.success ? 'โœ…' : 'โŒ'; console.log(`${status} ${result.name}`); if (!result.success && result.errors.length > 0) { for (const error of result.errors) { console.log(` โ””โ”€ ${error}`); } } } // Success criteria evaluation console.log('\n' + '='.repeat(60)); console.log('Success Criteria Evaluation'); console.log('='.repeat(60) + '\n'); const metrics = this.evaluateMetrics(); for (const [metric, value] of Object.entries(metrics)) { const status = value.pass ? 'โœ…' : 'โŒ'; console.log(`${status} ${metric}: ${value.score}% (target: ${value.target}%)`); } // Save detailed results this.saveResults(); } evaluateMetrics() { return { 'Agent Routing Accuracy': { score: this.calculateRoutingAccuracy(), target: 95, pass: this.calculateRoutingAccuracy() >= 95 }, 'Elicitation Flow': { score: this.calculateElicitationSuccess(), target: 100, pass: this.calculateElicitationSuccess() >= 100 }, 'Session Management': { score: this.calculateSessionSuccess(), target: 100, pass: this.calculateSessionSuccess() >= 100 }, 'Error Recovery': { score: this.calculateErrorRecovery(), target: 100, pass: this.calculateErrorRecovery() >= 100 } }; } calculateRoutingAccuracy() { const routingTests = this.results.filter(r => r.details.agentRouted); const correct = routingTests.filter(r => r.success && !r.errors.some(e => e.includes('Expected agent'))); return routingTests.length > 0 ? (correct.length / routingTests.length * 100) : 0; } calculateElicitationSuccess() { const elicitationTests = this.results.filter(r => r.details.elicitationCount > 0); const correct = elicitationTests.filter(r => r.success); return elicitationTests.length > 0 ? (correct.length / elicitationTests.length * 100) : 0; } calculateSessionSuccess() { const sessionTests = this.results.filter(r => r.details.sessionCreated); const correct = sessionTests.filter(r => r.success); return sessionTests.length > 0 ? (correct.length / sessionTests.length * 100) : 0; } calculateErrorRecovery() { const errorTests = this.results.filter(r => r.name.includes('Error')); const recovered = errorTests.filter(r => r.success || r.details.validOutput); return errorTests.length > 0 ? (recovered.length / errorTests.length * 100) : 0; } async saveResults() { const timestamp = new Date().toISOString().replace(/[:.]/g, '-'); const resultsPath = path.join(this.testDir, `test-results-${timestamp}.json`); await fs.writeFile(resultsPath, JSON.stringify({ timestamp: new Date().toISOString(), scenarios: this.scenarios.length, results: this.results, metrics: this.evaluateMetrics() }, null, 2)); console.log(`\n๐Ÿ“ Detailed results saved to: ${resultsPath}`); } async cleanup() { // Clean up test workspace await fs.rm(this.testDir, { recursive: true, force: true }); } } // CLI interface if (require.main === module) { const tester = new ClaudeInteractiveTest(); const args = process.argv.slice(2); const command = args[0]; switch (command) { case 'run': tester.runAllScenarios() .then(() => process.exit(0)) .catch(err => { console.error('Test failed:', err); process.exit(1); }); break; case 'scenario': const scenarioName = args[1]; tester.initialize() .then(() => { const scenario = tester.scenarios.find(s => s.name.includes(scenarioName)); if (scenario) { return tester.runScenario(scenario); } else { throw new Error(`Scenario not found: ${scenarioName}`); } }) .then(result => { console.log('\nResult:', result); process.exit(result.success ? 0 : 1); }) .catch(err => { console.error('Test failed:', err); process.exit(1); }); break; default: console.log('Usage: claude-interactive-test.js '); console.log('Commands:'); console.log(' run Run all test scenarios'); console.log(' scenario NAME Run specific scenario'); process.exit(1); } } module.exports = ClaudeInteractiveTest;