feat(core): add utility tools for codebase analysis

- Add dependency-check.xml for vulnerability scanning
- Add schema-validator.xml for JSON/YAML/OpenAPI validation
- Add code-metrics.xml for size and complexity analysis
- Add context-extractor.xml for AI-optimized context extraction
This commit is contained in:
Ibrahim Elsahafy 2025-12-31 21:07:00 +04:00
parent 1abc60e068
commit 2a746a6fc4
4 changed files with 383 additions and 0 deletions

View File

@ -0,0 +1,111 @@
<?xml version="1.0" encoding="UTF-8"?>
<tool id="code-metrics" name="Code Metrics Analyzer" standalone="true">
<description>Analyze codebase for size, complexity, and quality metrics</description>
<parameters>
<param name="path" required="false" default="." description="Path to analyze"/>
<param name="include" required="false" description="File patterns to include (e.g., '*.ts,*.tsx')"/>
<param name="exclude" required="false" default="node_modules,dist,build,.git" description="Directories to exclude"/>
<param name="output" required="false" default="summary" description="Output format: summary, detailed, json"/>
</parameters>
<metrics>
<category name="Size">
<metric name="total_files" description="Total number of source files"/>
<metric name="total_lines" description="Total lines of code"/>
<metric name="lines_of_code" description="Lines of actual code (excluding blanks/comments)"/>
<metric name="blank_lines" description="Number of blank lines"/>
<metric name="comment_lines" description="Number of comment lines"/>
</category>
<category name="Complexity">
<metric name="cyclomatic_complexity" description="Average cyclomatic complexity"/>
<metric name="max_complexity" description="Highest complexity in single function"/>
<metric name="deep_nesting" description="Functions with nesting > 4 levels"/>
</category>
<category name="Quality">
<metric name="duplicate_blocks" description="Detected code duplication"/>
<metric name="long_functions" description="Functions > 50 lines"/>
<metric name="large_files" description="Files > 500 lines"/>
<metric name="todo_count" description="TODO/FIXME comments"/>
</category>
<category name="Structure">
<metric name="avg_file_size" description="Average lines per file"/>
<metric name="avg_function_size" description="Average lines per function"/>
<metric name="dependency_depth" description="Maximum import depth"/>
</category>
</metrics>
<execution>
<step n="1" goal="Scan codebase">
<action>Find all files matching include patterns</action>
<action>Exclude directories from exclude list</action>
<action>Build file list for analysis</action>
</step>
<step n="2" goal="Count lines">
<action>For each file, count total, code, blank, comment lines</action>
<action>Aggregate totals by file type</action>
</step>
<step n="3" goal="Analyze complexity">
<action>Parse functions/methods in each file</action>
<action>Calculate cyclomatic complexity per function</action>
<action>Identify deeply nested code blocks</action>
</step>
<step n="4" goal="Quality checks">
<action>Detect duplicate code blocks</action>
<action>Find long functions (> 50 lines)</action>
<action>Find large files (> 500 lines)</action>
<action>Count TODO/FIXME comments</action>
</step>
<step n="5" goal="Generate report">
<action>Format metrics according to output parameter</action>
<action>Include recommendations for concerning metrics</action>
</step>
</execution>
<output>
```
Code Metrics Report
===================
Project: {project_name}
Path: {path}
Date: {date}
Size Metrics:
- Total Files: {total_files}
- Lines of Code: {lines_of_code}
- Blank Lines: {blank_lines}
- Comment Lines: {comment_lines}
- Comment Ratio: {comment_ratio}%
Complexity Metrics:
- Avg Cyclomatic Complexity: {avg_complexity}
- Max Complexity: {max_complexity} ({max_complexity_file})
- Deep Nesting Issues: {deep_nesting_count}
Quality Metrics:
- Duplicate Code Blocks: {duplicate_count}
- Long Functions (>50 lines): {long_functions_count}
- Large Files (>500 lines): {large_files_count}
- TODO/FIXME Count: {todo_count}
Structure:
- Avg File Size: {avg_file_size} lines
- Avg Function Size: {avg_function_size} lines
Breakdown by Language:
{for each language}
- {language}: {file_count} files, {line_count} lines
{end for}
Recommendations:
{recommendations}
```
</output>
</tool>

View File

@ -0,0 +1,123 @@
<?xml version="1.0" encoding="UTF-8"?>
<tool id="context-extractor" name="Context Extractor" standalone="true">
<description>Extract key context from codebase for AI agent consumption (patterns, conventions, critical files)</description>
<parameters>
<param name="path" required="false" default="." description="Project root path"/>
<param name="focus" required="false" default="all" description="Focus area: all, patterns, dependencies, structure, conventions"/>
<param name="max_tokens" required="false" default="4000" description="Maximum token budget for output"/>
</parameters>
<extraction_areas>
<area name="project_structure">
<description>Key directories and their purposes</description>
<sources>
<source>Directory listing (depth 3)</source>
<source>README.md</source>
<source>package.json / setup.py / Cargo.toml</source>
</sources>
</area>
<area name="coding_patterns">
<description>Common patterns used in the codebase</description>
<sources>
<source>Import patterns</source>
<source>Naming conventions</source>
<source>Error handling patterns</source>
<source>Async patterns</source>
</sources>
</area>
<area name="dependencies">
<description>Key dependencies and their usage</description>
<sources>
<source>package.json dependencies</source>
<source>Import frequency analysis</source>
</sources>
</area>
<area name="conventions">
<description>Coding conventions and style</description>
<sources>
<source>.eslintrc / .prettierrc</source>
<source>tsconfig.json / jsconfig.json</source>
<source>editorconfig</source>
<source>Observed patterns in code</source>
</sources>
</area>
<area name="critical_files">
<description>Most important files to understand</description>
<sources>
<source>Entry points (index, main, app)</source>
<source>Configuration files</source>
<source>Type definitions</source>
<source>Shared utilities</source>
</sources>
</area>
</extraction_areas>
<execution>
<step n="1" goal="Scan project structure">
<action>List directories up to depth 3</action>
<action>Identify key directory patterns (src, lib, tests, etc.)</action>
<action>Note technology indicators (package.json, Cargo.toml, etc.)</action>
</step>
<step n="2" goal="Analyze entry points">
<action>Find entry files (index.ts, main.py, main.go, etc.)</action>
<action>Extract high-level architecture from imports</action>
</step>
<step n="3" goal="Extract conventions">
<action>Parse linter/formatter configs</action>
<action>Sample 10 representative files for pattern analysis</action>
<action>Identify naming conventions (camelCase, snake_case, etc.)</action>
</step>
<step n="4" goal="Analyze dependencies">
<action>Extract key dependencies from package manager files</action>
<action>Identify most-imported modules</action>
<action>Note framework/library choices</action>
</step>
<step n="5" goal="Generate context document">
<action>Compile findings within max_tokens budget</action>
<action>Prioritize most critical information</action>
<action>Format for AI agent consumption</action>
</step>
</execution>
<output>
```markdown
# Project Context
## Technology Stack
- Language: {language}
- Framework: {framework}
- Key Libraries: {libraries}
## Project Structure
```
{directory_tree}
```
## Entry Points
{entry_points}
## Coding Conventions
- Naming: {naming_convention}
- Formatting: {formatting_rules}
- Imports: {import_pattern}
## Key Patterns
{patterns}
## Critical Files
{critical_files}
## Dependencies
{key_dependencies}
```
</output>
</tool>

View File

@ -0,0 +1,68 @@
<?xml version="1.0" encoding="UTF-8"?>
<tool id="dependency-check" name="Dependency Checker" standalone="true">
<description>Scan project dependencies for outdated packages and known vulnerabilities</description>
<parameters>
<param name="path" required="false" default="." description="Path to project root"/>
<param name="output_format" required="false" default="summary" description="Output format: summary, detailed, json"/>
<param name="severity_threshold" required="false" default="low" description="Minimum severity to report: low, medium, high, critical"/>
</parameters>
<detection>
<package_manager name="npm" files="['package.json', 'package-lock.json']" command="npm audit"/>
<package_manager name="yarn" files="['package.json', 'yarn.lock']" command="yarn audit"/>
<package_manager name="pnpm" files="['package.json', 'pnpm-lock.yaml']" command="pnpm audit"/>
<package_manager name="pip" files="['requirements.txt', 'Pipfile', 'pyproject.toml']" command="pip-audit"/>
<package_manager name="poetry" files="['pyproject.toml', 'poetry.lock']" command="poetry audit"/>
<package_manager name="go" files="['go.mod', 'go.sum']" command="govulncheck ./..."/>
<package_manager name="cargo" files="['Cargo.toml', 'Cargo.lock']" command="cargo audit"/>
<package_manager name="composer" files="['composer.json', 'composer.lock']" command="composer audit"/>
</detection>
<execution>
<step n="1" goal="Detect package manager">
<action>Scan {path} for package manager files</action>
<action>Identify primary package manager from detected files</action>
<action if="no package manager found">Report: "No supported package manager detected"</action>
</step>
<step n="2" goal="Run dependency audit">
<action>Execute audit command for detected package manager</action>
<action>Capture stdout and stderr</action>
<action>Parse output for vulnerabilities</action>
</step>
<step n="3" goal="Check for outdated packages">
<action>Run outdated check command (e.g., npm outdated, pip list --outdated)</action>
<action>Parse output for package versions</action>
</step>
<step n="4" goal="Generate report">
<action>Filter by severity_threshold</action>
<action>Format output according to output_format</action>
</step>
</execution>
<output_format name="summary">
```
Dependency Check Report
=======================
Project: {project_name}
Package Manager: {package_manager}
Date: {date}
Vulnerabilities:
- Critical: {critical_count}
- High: {high_count}
- Medium: {medium_count}
- Low: {low_count}
Outdated Packages: {outdated_count}
Top Issues:
1. {top_issue_1}
2. {top_issue_2}
3. {top_issue_3}
```
</output_format>
</tool>

View File

@ -0,0 +1,81 @@
<?xml version="1.0" encoding="UTF-8"?>
<tool id="schema-validator" name="Schema Validator" standalone="true">
<description>Validate JSON/YAML files against schemas (JSON Schema, OpenAPI, etc.)</description>
<parameters>
<param name="file" required="true" description="Path to file to validate"/>
<param name="schema" required="false" description="Path to schema file (auto-detect if not provided)"/>
<param name="schema_type" required="false" default="auto" description="Schema type: json-schema, openapi, asyncapi, auto"/>
</parameters>
<supported_schemas>
<schema type="json-schema" versions="['draft-04', 'draft-06', 'draft-07', '2019-09', '2020-12']"/>
<schema type="openapi" versions="['3.0', '3.1']"/>
<schema type="asyncapi" versions="['2.0', '2.1', '2.2', '2.3', '2.4', '2.5', '2.6']"/>
<schema type="yaml" description="YAML syntax validation"/>
<schema type="json" description="JSON syntax validation"/>
</supported_schemas>
<execution>
<step n="1" goal="Load and parse file">
<action>Read file content</action>
<action>Detect file format (JSON or YAML)</action>
<action>Parse content into object</action>
<action if="parse error">Return: "Syntax error: {error_message}"</action>
</step>
<step n="2" goal="Detect schema type">
<check if="schema_type == 'auto'">
<action>Check for $schema property (JSON Schema)</action>
<action>Check for openapi property (OpenAPI)</action>
<action>Check for asyncapi property (AsyncAPI)</action>
<action>Default to json-schema if detected</action>
</check>
</step>
<step n="3" goal="Load schema">
<check if="schema provided">
<action>Load schema from {schema} path</action>
</check>
<check if="schema not provided AND type detected">
<action>Use built-in schema for detected type</action>
</check>
</step>
<step n="4" goal="Validate">
<action>Run validation against schema</action>
<action>Collect all validation errors</action>
<action>Format error messages with line numbers (if possible)</action>
</step>
<step n="5" goal="Report">
<action if="valid">Return: "Valid {schema_type} document"</action>
<action if="invalid">Return: "Validation errors: {errors}"</action>
</step>
</execution>
<output>
```
Schema Validation Report
========================
File: {file}
Schema Type: {schema_type}
Status: {valid|invalid}
{if errors}
Errors ({error_count}):
{for each error}
- Line {line}: {path}
{message}
{end for}
{end if}
{if warnings}
Warnings ({warning_count}):
{for each warning}
- {path}: {message}
{end for}
{end if}
```
</output>
</tool>