feat(core): add utility tools for codebase analysis

- Add dependency-check.xml for vulnerability scanning - Add schema-validator.xml for JSON/YAML/OpenAPI validation - Add code-metrics.xml for size and complexity analysis - Add context-extractor.xml for AI-optimized context extraction
2025-12-31 21:07:00 +04:00 · 2025-12-31 21:07:00 +04:00 · 2a746a6fc4
parent 1abc60e068
commit 2a746a6fc4
4 changed files with 383 additions and 0 deletions
--- a/src/core/tools/code-metrics.xml
+++ b/src/core/tools/code-metrics.xml
@ -0,0 +1,111 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<tool id="code-metrics" name="Code Metrics Analyzer" standalone="true">
+  <description>Analyze codebase for size, complexity, and quality metrics</description>
+
+  <parameters>
+    <param name="path" required="false" default="." description="Path to analyze"/>
+    <param name="include" required="false" description="File patterns to include (e.g., '*.ts,*.tsx')"/>
+    <param name="exclude" required="false" default="node_modules,dist,build,.git" description="Directories to exclude"/>
+    <param name="output" required="false" default="summary" description="Output format: summary, detailed, json"/>
+  </parameters>
+
+  <metrics>
+    <category name="Size">
+      <metric name="total_files" description="Total number of source files"/>
+      <metric name="total_lines" description="Total lines of code"/>
+      <metric name="lines_of_code" description="Lines of actual code (excluding blanks/comments)"/>
+      <metric name="blank_lines" description="Number of blank lines"/>
+      <metric name="comment_lines" description="Number of comment lines"/>
+    </category>
+
+    <category name="Complexity">
+      <metric name="cyclomatic_complexity" description="Average cyclomatic complexity"/>
+      <metric name="max_complexity" description="Highest complexity in single function"/>
+      <metric name="deep_nesting" description="Functions with nesting > 4 levels"/>
+    </category>
+
+    <category name="Quality">
+      <metric name="duplicate_blocks" description="Detected code duplication"/>
+      <metric name="long_functions" description="Functions > 50 lines"/>
+      <metric name="large_files" description="Files > 500 lines"/>
+      <metric name="todo_count" description="TODO/FIXME comments"/>
+    </category>
+
+    <category name="Structure">
+      <metric name="avg_file_size" description="Average lines per file"/>
+      <metric name="avg_function_size" description="Average lines per function"/>
+      <metric name="dependency_depth" description="Maximum import depth"/>
+    </category>
+  </metrics>
+
+  <execution>
+    <step n="1" goal="Scan codebase">
+      <action>Find all files matching include patterns</action>
+      <action>Exclude directories from exclude list</action>
+      <action>Build file list for analysis</action>
+    </step>
+
+    <step n="2" goal="Count lines">
+      <action>For each file, count total, code, blank, comment lines</action>
+      <action>Aggregate totals by file type</action>
+    </step>
+
+    <step n="3" goal="Analyze complexity">
+      <action>Parse functions/methods in each file</action>
+      <action>Calculate cyclomatic complexity per function</action>
+      <action>Identify deeply nested code blocks</action>
+    </step>
+
+    <step n="4" goal="Quality checks">
+      <action>Detect duplicate code blocks</action>
+      <action>Find long functions (> 50 lines)</action>
+      <action>Find large files (> 500 lines)</action>
+      <action>Count TODO/FIXME comments</action>
+    </step>
+
+    <step n="5" goal="Generate report">
+      <action>Format metrics according to output parameter</action>
+      <action>Include recommendations for concerning metrics</action>
+    </step>
+  </execution>
+
+  <output>
+    ```
+    Code Metrics Report
+    ===================
+    Project: {project_name}
+    Path: {path}
+    Date: {date}
+
+    Size Metrics:
+    - Total Files: {total_files}
+    - Lines of Code: {lines_of_code}
+    - Blank Lines: {blank_lines}
+    - Comment Lines: {comment_lines}
+    - Comment Ratio: {comment_ratio}%
+
+    Complexity Metrics:
+    - Avg Cyclomatic Complexity: {avg_complexity}
+    - Max Complexity: {max_complexity} ({max_complexity_file})
+    - Deep Nesting Issues: {deep_nesting_count}
+
+    Quality Metrics:
+    - Duplicate Code Blocks: {duplicate_count}
+    - Long Functions (>50 lines): {long_functions_count}
+    - Large Files (>500 lines): {large_files_count}
+    - TODO/FIXME Count: {todo_count}
+
+    Structure:
+    - Avg File Size: {avg_file_size} lines
+    - Avg Function Size: {avg_function_size} lines
+
+    Breakdown by Language:
+    {for each language}
+    - {language}: {file_count} files, {line_count} lines
+    {end for}
+
+    Recommendations:
+    {recommendations}
+    ```
+  </output>
+</tool>
--- a/src/core/tools/context-extractor.xml
+++ b/src/core/tools/context-extractor.xml
@ -0,0 +1,123 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<tool id="context-extractor" name="Context Extractor" standalone="true">
+  <description>Extract key context from codebase for AI agent consumption (patterns, conventions, critical files)</description>
+
+  <parameters>
+    <param name="path" required="false" default="." description="Project root path"/>
+    <param name="focus" required="false" default="all" description="Focus area: all, patterns, dependencies, structure, conventions"/>
+    <param name="max_tokens" required="false" default="4000" description="Maximum token budget for output"/>
+  </parameters>
+
+  <extraction_areas>
+    <area name="project_structure">
+      <description>Key directories and their purposes</description>
+      <sources>
+        <source>Directory listing (depth 3)</source>
+        <source>README.md</source>
+        <source>package.json / setup.py / Cargo.toml</source>
+      </sources>
+    </area>
+
+    <area name="coding_patterns">
+      <description>Common patterns used in the codebase</description>
+      <sources>
+        <source>Import patterns</source>
+        <source>Naming conventions</source>
+        <source>Error handling patterns</source>
+        <source>Async patterns</source>
+      </sources>
+    </area>
+
+    <area name="dependencies">
+      <description>Key dependencies and their usage</description>
+      <sources>
+        <source>package.json dependencies</source>
+        <source>Import frequency analysis</source>
+      </sources>
+    </area>
+
+    <area name="conventions">
+      <description>Coding conventions and style</description>
+      <sources>
+        <source>.eslintrc / .prettierrc</source>
+        <source>tsconfig.json / jsconfig.json</source>
+        <source>editorconfig</source>
+        <source>Observed patterns in code</source>
+      </sources>
+    </area>
+
+    <area name="critical_files">
+      <description>Most important files to understand</description>
+      <sources>
+        <source>Entry points (index, main, app)</source>
+        <source>Configuration files</source>
+        <source>Type definitions</source>
+        <source>Shared utilities</source>
+      </sources>
+    </area>
+  </extraction_areas>
+
+  <execution>
+    <step n="1" goal="Scan project structure">
+      <action>List directories up to depth 3</action>
+      <action>Identify key directory patterns (src, lib, tests, etc.)</action>
+      <action>Note technology indicators (package.json, Cargo.toml, etc.)</action>
+    </step>
+
+    <step n="2" goal="Analyze entry points">
+      <action>Find entry files (index.ts, main.py, main.go, etc.)</action>
+      <action>Extract high-level architecture from imports</action>
+    </step>
+
+    <step n="3" goal="Extract conventions">
+      <action>Parse linter/formatter configs</action>
+      <action>Sample 10 representative files for pattern analysis</action>
+      <action>Identify naming conventions (camelCase, snake_case, etc.)</action>
+    </step>
+
+    <step n="4" goal="Analyze dependencies">
+      <action>Extract key dependencies from package manager files</action>
+      <action>Identify most-imported modules</action>
+      <action>Note framework/library choices</action>
+    </step>
+
+    <step n="5" goal="Generate context document">
+      <action>Compile findings within max_tokens budget</action>
+      <action>Prioritize most critical information</action>
+      <action>Format for AI agent consumption</action>
+    </step>
+  </execution>
+
+  <output>
+    ```markdown
+    # Project Context
+
+    ## Technology Stack
+    - Language: {language}
+    - Framework: {framework}
+    - Key Libraries: {libraries}
+
+    ## Project Structure
+    ```
+    {directory_tree}
+    ```
+
+    ## Entry Points
+    {entry_points}
+
+    ## Coding Conventions
+    - Naming: {naming_convention}
+    - Formatting: {formatting_rules}
+    - Imports: {import_pattern}
+
+    ## Key Patterns
+    {patterns}
+
+    ## Critical Files
+    {critical_files}
+
+    ## Dependencies
+    {key_dependencies}
+    ```
+  </output>
+</tool>
--- a/src/core/tools/dependency-check.xml
+++ b/src/core/tools/dependency-check.xml
@ -0,0 +1,68 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<tool id="dependency-check" name="Dependency Checker" standalone="true">
+  <description>Scan project dependencies for outdated packages and known vulnerabilities</description>
+
+  <parameters>
+    <param name="path" required="false" default="." description="Path to project root"/>
+    <param name="output_format" required="false" default="summary" description="Output format: summary, detailed, json"/>
+    <param name="severity_threshold" required="false" default="low" description="Minimum severity to report: low, medium, high, critical"/>
+  </parameters>
+
+  <detection>
+    <package_manager name="npm" files="['package.json', 'package-lock.json']" command="npm audit"/>
+    <package_manager name="yarn" files="['package.json', 'yarn.lock']" command="yarn audit"/>
+    <package_manager name="pnpm" files="['package.json', 'pnpm-lock.yaml']" command="pnpm audit"/>
+    <package_manager name="pip" files="['requirements.txt', 'Pipfile', 'pyproject.toml']" command="pip-audit"/>
+    <package_manager name="poetry" files="['pyproject.toml', 'poetry.lock']" command="poetry audit"/>
+    <package_manager name="go" files="['go.mod', 'go.sum']" command="govulncheck ./..."/>
+    <package_manager name="cargo" files="['Cargo.toml', 'Cargo.lock']" command="cargo audit"/>
+    <package_manager name="composer" files="['composer.json', 'composer.lock']" command="composer audit"/>
+  </detection>
+
+  <execution>
+    <step n="1" goal="Detect package manager">
+      <action>Scan {path} for package manager files</action>
+      <action>Identify primary package manager from detected files</action>
+      <action if="no package manager found">Report: "No supported package manager detected"</action>
+    </step>
+
+    <step n="2" goal="Run dependency audit">
+      <action>Execute audit command for detected package manager</action>
+      <action>Capture stdout and stderr</action>
+      <action>Parse output for vulnerabilities</action>
+    </step>
+
+    <step n="3" goal="Check for outdated packages">
+      <action>Run outdated check command (e.g., npm outdated, pip list --outdated)</action>
+      <action>Parse output for package versions</action>
+    </step>
+
+    <step n="4" goal="Generate report">
+      <action>Filter by severity_threshold</action>
+      <action>Format output according to output_format</action>
+    </step>
+  </execution>
+
+  <output_format name="summary">
+    ```
+    Dependency Check Report
+    =======================
+    Project: {project_name}
+    Package Manager: {package_manager}
+    Date: {date}
+
+    Vulnerabilities:
+    - Critical: {critical_count}
+    - High: {high_count}
+    - Medium: {medium_count}
+    - Low: {low_count}
+
+    Outdated Packages: {outdated_count}
+
+    Top Issues:
+    1. {top_issue_1}
+    2. {top_issue_2}
+    3. {top_issue_3}
+    ```
+  </output_format>
+</tool>
--- a/src/core/tools/schema-validator.xml
+++ b/src/core/tools/schema-validator.xml
@ -0,0 +1,81 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<tool id="schema-validator" name="Schema Validator" standalone="true">
+  <description>Validate JSON/YAML files against schemas (JSON Schema, OpenAPI, etc.)</description>
+
+  <parameters>
+    <param name="file" required="true" description="Path to file to validate"/>
+    <param name="schema" required="false" description="Path to schema file (auto-detect if not provided)"/>
+    <param name="schema_type" required="false" default="auto" description="Schema type: json-schema, openapi, asyncapi, auto"/>
+  </parameters>
+
+  <supported_schemas>
+    <schema type="json-schema" versions="['draft-04', 'draft-06', 'draft-07', '2019-09', '2020-12']"/>
+    <schema type="openapi" versions="['3.0', '3.1']"/>
+    <schema type="asyncapi" versions="['2.0', '2.1', '2.2', '2.3', '2.4', '2.5', '2.6']"/>
+    <schema type="yaml" description="YAML syntax validation"/>
+    <schema type="json" description="JSON syntax validation"/>
+  </supported_schemas>
+
+  <execution>
+    <step n="1" goal="Load and parse file">
+      <action>Read file content</action>
+      <action>Detect file format (JSON or YAML)</action>
+      <action>Parse content into object</action>
+      <action if="parse error">Return: "Syntax error: {error_message}"</action>
+    </step>
+
+    <step n="2" goal="Detect schema type">
+      <check if="schema_type == 'auto'">
+        <action>Check for $schema property (JSON Schema)</action>
+        <action>Check for openapi property (OpenAPI)</action>
+        <action>Check for asyncapi property (AsyncAPI)</action>
+        <action>Default to json-schema if detected</action>
+      </check>
+    </step>
+
+    <step n="3" goal="Load schema">
+      <check if="schema provided">
+        <action>Load schema from {schema} path</action>
+      </check>
+      <check if="schema not provided AND type detected">
+        <action>Use built-in schema for detected type</action>
+      </check>
+    </step>
+
+    <step n="4" goal="Validate">
+      <action>Run validation against schema</action>
+      <action>Collect all validation errors</action>
+      <action>Format error messages with line numbers (if possible)</action>
+    </step>
+
+    <step n="5" goal="Report">
+      <action if="valid">Return: "Valid {schema_type} document"</action>
+      <action if="invalid">Return: "Validation errors: {errors}"</action>
+    </step>
+  </execution>
+
+  <output>
+    ```
+    Schema Validation Report
+    ========================
+    File: {file}
+    Schema Type: {schema_type}
+    Status: {valid|invalid}
+
+    {if errors}
+    Errors ({error_count}):
+    {for each error}
+    - Line {line}: {path}
+      {message}
+    {end for}
+    {end if}
+
+    {if warnings}
+    Warnings ({warning_count}):
+    {for each warning}
+    - {path}: {message}
+    {end for}
+    {end if}
+    ```
+  </output>
+</tool>