BMAD-METHOD/bmad-claude-integration/tests/harness/claude-interactive-test.js

#!/usr/bin/env node

const { spawn } = require('child_process');
const path = require('path');
const fs = require('fs').promises;
const readline = require('readline');

/**
 * Interactive test harness for BMAD-METHOD Claude Code integration
 * Tests Claude Code as a real user would through the TUI
 */
class ClaudeInteractiveTest {
  constructor(options = {}) {
    this.claudePath = options.claudePath || 'claude';
    this.testDir = options.testDir || path.join(process.cwd(), 'test-workspace');
    this.scenarios = [];
    this.results = [];
    this.currentTest = null;
  }

  async initialize() {
    // Create test workspace
    await fs.mkdir(this.testDir, { recursive: true });

    // Create test files for scenarios
    await this.createTestFiles();

    // Load test scenarios
    await this.loadScenarios();
  }

  async createTestFiles() {
    // Create sample files for testing
    const files = {
      'requirements.md': `# E-Commerce Platform Requirements
- Support 100k concurrent users
- Payment processing with PCI compliance
- Mobile-responsive design
- Real-time inventory tracking`,

      'existing-api.yaml': `openapi: 3.0.0
info:
  title: Legacy API
  version: 1.0.0
paths:
  /users:
    get:
      summary: Get users (slow, needs optimization)`,

      'package.json': `{
  "name": "test-project",
  "version": "1.0.0",
  "dependencies": {
    "express": "^4.18.0",
    "react": "^18.0.0"
  }
}`
    };

    for (const [filename, content] of Object.entries(files)) {
      await fs.writeFile(path.join(this.testDir, filename), content);
    }
  }

  async loadScenarios() {
    this.scenarios = [
      {
        name: 'Basic PM Agent Routing',
        commands: [
          'Create user stories for a login feature with OAuth support',
          'bmad-respond: Google, GitHub, and traditional email/password',
          'bmad-respond: Yes, with remember me for 30 days',
          'bmad-respond: Standard security, 2FA optional'
        ],
        expectations: {
          agentRouting: 'pm',
          elicitationCount: 3,
          outputContains: ['As a user', 'login', 'OAuth'],
          sessionCreated: true
        }
      },
      {
        name: 'Multi-Agent Workflow',
        commands: [
          'Design an e-commerce platform architecture',
          'bmad-respond: B2C marketplace',
          'bmad-respond: 100k users, $1M GMV/month',
          'Now create user stories for the MVP',
          '/bmad-sessions',
          '/switch 1'
        ],
        expectations: {
          multipleAgents: ['architect', 'pm'],
          sessionCount: 2,
          contextPreserved: ['100k users', 'marketplace'],
          sessionSwitching: true
        }
      },
      {
        name: 'Direct Agent Invocation',
        commands: [
          '/bmad-architect Review the existing-api.yaml and suggest improvements',
          'bmad-respond: Yes, we need to support 10x growth',
          'Create stories for the optimization work'
        ],
        expectations: {
          directInvocation: true,
          fileAnalysis: 'existing-api.yaml',
          agentHandoff: ['architect', 'pm']
        }
      },
      {
        name: 'Concurrent Sessions',
        commands: [
          'Help me plan a sprint for next week',
          'bmad-respond: 5 developers, 2-week sprint',
          'In parallel, create a technical spec for the payment service',
          '/bmad-sessions',
          'Continue with the sprint planning',
          '/switch 2'
        ],
        expectations: {
          concurrentSessions: true,
          clearAgentIdentification: true,
          sessionManagement: ['list', 'switch']
        }
      },
      {
        name: 'Error Recovery',
        commands: [
          'Create a story for', // Incomplete command
          '/bmad-unknown-command', // Invalid command
          'Help me with the user story for login', // Recovery
          'bmad-respond: Social login with Google'
        ],
        expectations: {
          errorHandling: true,
          gracefulRecovery: true,
          validOutput: true
        }
      }
    ];
  }

  async runScenario(scenario) {
    console.log(`\n${'='.repeat(60)}`);
    console.log(`Running: ${scenario.name}`);
    console.log(`${'='.repeat(60)}\n`);

    const result = {
      name: scenario.name,
      success: true,
      details: {},
      errors: []
    };

    try {
      // Start Claude process
      const claude = spawn(this.claudePath, ['-p', this.testDir], {
        cwd: this.testDir,
        env: { ...process.env, BMAD_TEST_MODE: 'true' }
      });

      // Set up output capture
      let output = '';
      let currentAgent = null;
      let sessionCount = 0;
      let elicitationCount = 0;

      claude.stdout.on('data', (data) => {
        const text = data.toString();
        output += text;

        // Parse output for test validation
        this.parseOutput(text, result);
      });

      claude.stderr.on('data', (data) => {
        result.errors.push(data.toString());
      });

      // Execute commands
      for (const command of scenario.commands) {
        await this.delay(1000); // Wait for Claude to be ready

        console.log(`> ${command}`);
        claude.stdin.write(command + '\n');

        // Wait for response
        await this.waitForResponse(claude, command);
      }

      // Validate expectations
      await this.validateExpectations(scenario.expectations, result, output);

      // Clean up
      claude.kill();
      await this.waitForExit(claude);

    } catch (error) {
      result.success = false;
      result.errors.push(error.message);
    }

    this.results.push(result);
    return result;
  }

  parseOutput(text, result) {
    // Detect agent routing
    const agentMatch = text.match(/(?:Routes? to|Invoking) (\w+) agent/i);
    if (agentMatch) {
      result.details.agentRouted = agentMatch[1].toLowerCase();
    }

    // Detect elicitation
    if (text.includes('bmad-respond:') || text.includes('Question:')) {
      result.details.elicitationCount = (result.details.elicitationCount || 0) + 1;
    }

    // Detect session creation
    if (text.includes('Session created:') || text.includes('session-')) {
      result.details.sessionCreated = true;
      const sessionMatch = text.match(/session-[\w-]+/);
      if (sessionMatch) {
        result.details.sessionId = sessionMatch[0];
      }
    }

    // Detect agent identification
    const agentIcons = ['📋', '🏗️', '💻', '🐛', '🎨', '🏃', '🧙', '🎭'];
    for (const icon of agentIcons) {
      if (text.includes(icon)) {
        result.details.agentIconFound = true;
        break;
      }
    }

    // Detect errors
    if (text.includes('Error:') || text.includes('error')) {
      result.details.errorDetected = true;
    }
  }

  async waitForResponse(claude, command, timeout = 5000) {
    return new Promise((resolve) => {
      let responseReceived = false;
      const startTime = Date.now();

      const checkResponse = setInterval(() => {
        // Check if we got a response or timeout
        if (responseReceived || Date.now() - startTime > timeout) {
          clearInterval(checkResponse);
          resolve();
        }
      }, 100);

      // Listen for response indicators
      const listener = (data) => {
        const text = data.toString();
        if (text.includes('>') || text.includes('bmad-respond:') || text.includes('Session')) {
          responseReceived = true;
        }
      };

      claude.stdout.on('data', listener);
    });
  }

  async validateExpectations(expectations, result, output) {
    for (const [key, expected] of Object.entries(expectations)) {
      switch (key) {
        case 'agentRouting':
          if (result.details.agentRouted !== expected) {
            result.success = false;
            result.errors.push(`Expected agent ${expected}, got ${result.details.agentRouted}`);
          }
          break;

        case 'elicitationCount':
          if (result.details.elicitationCount !== expected) {
            result.success = false;
            result.errors.push(`Expected ${expected} elicitations, got ${result.details.elicitationCount}`);
          }
          break;

        case 'outputContains':
          for (const phrase of expected) {
            if (!output.includes(phrase)) {
              result.success = false;
              result.errors.push(`Output missing expected phrase: ${phrase}`);
            }
          }
          break;

        case 'sessionCreated':
          if (!result.details.sessionCreated) {
            result.success = false;
            result.errors.push('No session created');
          }
          break;

        case 'multipleAgents':
          // Check if multiple agents were invoked
          for (const agent of expected) {
            if (!output.toLowerCase().includes(agent)) {
              result.success = false;
              result.errors.push(`Agent ${agent} not invoked`);
            }
          }
          break;

        case 'contextPreserved':
          for (const context of expected) {
            if (!output.includes(context)) {
              result.success = false;
              result.errors.push(`Context not preserved: ${context}`);
            }
          }
          break;
      }
    }
  }

  async waitForExit(claude) {
    return new Promise((resolve) => {
      claude.on('exit', resolve);
      setTimeout(resolve, 1000); // Timeout fallback
    });
  }

  delay(ms) {
    return new Promise(resolve => setTimeout(resolve, ms));
  }

  async runAllScenarios() {
    await this.initialize();

    console.log('🧪 BMAD-METHOD Claude Code Interactive Testing');
    console.log(`Testing ${this.scenarios.length} scenarios...\n`);

    for (const scenario of this.scenarios) {
      await this.runScenario(scenario);
    }

    this.generateReport();
  }

  generateReport() {
    console.log('\n' + '='.repeat(60));
    console.log('📊 Test Results Summary');
    console.log('='.repeat(60) + '\n');

    const passed = this.results.filter(r => r.success).length;
    const total = this.results.length;
    const passRate = (passed / total * 100).toFixed(1);

    console.log(`Overall: ${passed}/${total} passed (${passRate}%)\n`);

    for (const result of this.results) {
      const status = result.success ? '✅' : '❌';
      console.log(`${status} ${result.name}`);

      if (!result.success && result.errors.length > 0) {
        for (const error of result.errors) {
          console.log(`   └─ ${error}`);
        }
      }
    }

    // Success criteria evaluation
    console.log('\n' + '='.repeat(60));
    console.log('Success Criteria Evaluation');
    console.log('='.repeat(60) + '\n');

    const metrics = this.evaluateMetrics();
    for (const [metric, value] of Object.entries(metrics)) {
      const status = value.pass ? '✅' : '❌';
      console.log(`${status} ${metric}: ${value.score}% (target: ${value.target}%)`);
    }

    // Save detailed results
    this.saveResults();
  }

  evaluateMetrics() {
    return {
      'Agent Routing Accuracy': {
        score: this.calculateRoutingAccuracy(),
        target: 95,
        pass: this.calculateRoutingAccuracy() >= 95
      },
      'Elicitation Flow': {
        score: this.calculateElicitationSuccess(),
        target: 100,
        pass: this.calculateElicitationSuccess() >= 100
      },
      'Session Management': {
        score: this.calculateSessionSuccess(),
        target: 100,
        pass: this.calculateSessionSuccess() >= 100
      },
      'Error Recovery': {
        score: this.calculateErrorRecovery(),
        target: 100,
        pass: this.calculateErrorRecovery() >= 100
      }
    };
  }

  calculateRoutingAccuracy() {
    const routingTests = this.results.filter(r => r.details.agentRouted);
    const correct = routingTests.filter(r => r.success && !r.errors.some(e => e.includes('Expected agent')));
    return routingTests.length > 0 ? (correct.length / routingTests.length * 100) : 0;
  }

  calculateElicitationSuccess() {
    const elicitationTests = this.results.filter(r => r.details.elicitationCount > 0);
    const correct = elicitationTests.filter(r => r.success);
    return elicitationTests.length > 0 ? (correct.length / elicitationTests.length * 100) : 0;
  }

  calculateSessionSuccess() {
    const sessionTests = this.results.filter(r => r.details.sessionCreated);
    const correct = sessionTests.filter(r => r.success);
    return sessionTests.length > 0 ? (correct.length / sessionTests.length * 100) : 0;
  }

  calculateErrorRecovery() {
    const errorTests = this.results.filter(r => r.name.includes('Error'));
    const recovered = errorTests.filter(r => r.success || r.details.validOutput);
    return errorTests.length > 0 ? (recovered.length / errorTests.length * 100) : 0;
  }

  async saveResults() {
    const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
    const resultsPath = path.join(this.testDir, `test-results-${timestamp}.json`);

    await fs.writeFile(resultsPath, JSON.stringify({
      timestamp: new Date().toISOString(),
      scenarios: this.scenarios.length,
      results: this.results,
      metrics: this.evaluateMetrics()
    }, null, 2));

    console.log(`\n📁 Detailed results saved to: ${resultsPath}`);
  }

  async cleanup() {
    // Clean up test workspace
    await fs.rm(this.testDir, { recursive: true, force: true });
  }
}

// CLI interface
if (require.main === module) {
  const tester = new ClaudeInteractiveTest();

  const args = process.argv.slice(2);
  const command = args[0];

  switch (command) {
    case 'run':
      tester.runAllScenarios()
        .then(() => process.exit(0))
        .catch(err => {
          console.error('Test failed:', err);
          process.exit(1);
        });
      break;

    case 'scenario':
      const scenarioName = args[1];
      tester.initialize()
        .then(() => {
          const scenario = tester.scenarios.find(s => s.name.includes(scenarioName));
          if (scenario) {
            return tester.runScenario(scenario);
          } else {
            throw new Error(`Scenario not found: ${scenarioName}`);
          }
        })
        .then(result => {
          console.log('\nResult:', result);
          process.exit(result.success ? 0 : 1);
        })
        .catch(err => {
          console.error('Test failed:', err);
          process.exit(1);
        });
      break;

    default:
      console.log('Usage: claude-interactive-test.js <command>');
      console.log('Commands:');
      console.log('  run            Run all test scenarios');
      console.log('  scenario NAME  Run specific scenario');
      process.exit(1);
  }
}

module.exports = ClaudeInteractiveTest;