PR #830 - Markdown Tooling (#839)

* PR #830 - Markdown Tooling

New Tools:
- check-md-conformance.js - CommonMark checker (lists, tables, fences, bullets)
- fix-fence-languages.js - Auto-adds languages to code fences with heuristics

Features:
- Exit codes for CI/CD integration
- Dry-run mode for safe preview
- Handles nested fences (3+ backticks)
- Language detection: yaml, json, bash, javascript, xml, markdown, text

* Fix code quality issues from Copilot review

- Remove duplicated code block (lines 292-304) in check-md-conformance.js
- Remove unused variable fenceStartLine in check-md-conformance.js
- Remove unused variable hasLanguage in fix-fence-languages.js
- Rename fixOpenTicks to fixOpenLine to store full original line
This commit is contained in:
Keimpe de Jong 2025-11-01 22:16:26 +00:00 committed by GitHub
parent 6d2b6810c2
commit 01a1752ccc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 576 additions and 0 deletions

View File

@ -0,0 +1,288 @@
/**
* MD Conformance Checker (CommonMark-oriented)
*
* Checks .md files for:
* 1) Blank line before/after bullet and numbered lists
* 2) Blank line before/after tables
* 3) Blank line before/after fenced code blocks
* 4) Bullet marker normalization: "-" only (not "*" or "+")
* 5) Code fence language present (fallback should be specified by author)
*
* Usage:
* node tools/markdown/check-md-conformance.js [paths...]
* - If a path is a directory, scans recursively for .md files
* - If a path is a file and ends with .md, scans that file
*
* Exit codes:
* 0 -> No violations
* 1 -> Violations found
*/
const fs = require('node:fs');
const path = require('node:path');
function listMarkdownFiles(targetPath) {
const results = [];
function walk(p) {
const stat = fs.statSync(p);
if (stat.isDirectory()) {
const entries = fs.readdirSync(p);
for (const e of entries) {
if (e === 'node_modules' || e.startsWith('.git')) continue;
walk(path.join(p, e));
}
} else if (stat.isFile() && p.toLowerCase().endsWith('.md')) {
results.push(p);
}
}
walk(targetPath);
return results;
}
function isListLine(line) {
return /^\s*([-*+])\s+/.test(line) || /^\s*\d+\.\s+/.test(line);
}
function isBulletLine(line) {
return /^\s*([-*+])\s+/.test(line);
}
function bulletMarker(line) {
const m = line.match(/^\s*([-*+])\s+/);
return m ? m[1] : null;
}
function isTableLine(line) {
// Simple heuristic: contains a pipe and not a code fence
// We'll treat a group of lines with pipes as a table block
const trimmed = line.trim();
if (trimmed.startsWith('```')) return false;
return /\|/.test(line) && !/^\s*\|\s*$/.test(line);
}
function isFenceStart(line) {
return /^\s*```/.test(line);
}
function fenceLanguage(line) {
const m = line.match(/^\s*```\s*([a-zA-Z0-9_+-]+)?/);
return m ? m[1] || '' : '';
}
function isBlank(line) {
return /^\s*$/.test(line);
}
function checkFile(filePath) {
const content = fs.readFileSync(filePath, 'utf8');
const lines = content.split(/\r?\n/);
const violations = [];
let inFence = false;
// Pass 1: fence tracking to avoid interpreting list/table inside code blocks
const excluded = Array.from({ length: lines.length }).fill(false);
for (const [i, line] of lines.entries()) {
if (isFenceStart(line)) {
if (inFence) {
// closing fence
inFence = false;
} else {
inFence = true;
}
excluded[i] = true;
continue;
}
if (inFence) excluded[i] = true;
}
// Pass 2: checks
// 2a) Code fences: language presence and blank lines around
inFence = false;
for (let i = 0; i < lines.length; i++) {
if (excluded[i]) {
if (isFenceStart(lines[i])) {
// Fence boundary
if (inFence) {
// closing
inFence = false;
// blank line after?
const next = i + 1;
if (next < lines.length && !isBlank(lines[next])) {
violations.push({
type: 'fence-blank-after',
line: i + 1,
message: 'Missing blank line after code fence',
});
}
} else {
// opening
inFence = true;
// language present?
const lang = fenceLanguage(lines[i]);
if (!lang) {
violations.push({
type: 'fence-language-missing',
line: i + 1,
message: 'Code fence missing language identifier (e.g., ```bash)',
});
}
// blank line before?
const prev = i - 1;
if (prev >= 0 && !isBlank(lines[prev])) {
violations.push({
type: 'fence-blank-before',
line: i + 1,
message: 'Missing blank line before code fence',
});
}
}
}
continue;
}
}
// 2b) Lists: blank lines before/after; bullets normalization
// We'll detect contiguous list blocks.
let i = 0;
while (i < lines.length) {
if (excluded[i]) {
i++;
continue;
}
if (isListLine(lines[i])) {
// Start of a list block
const start = i;
// Require immediate previous line to be blank (not previous non-blank)
const prev = start - 1;
if (prev >= 0 && !isBlank(lines[prev])) {
violations.push({ type: 'list-blank-before', line: start + 1, message: 'Missing blank line before list' });
}
// Track bullets normalization
if (isBulletLine(lines[i])) {
const marker = bulletMarker(lines[i]);
if (marker && marker !== '-') {
violations.push({ type: 'bullet-marker', line: i + 1, message: `Use '-' for bullets, found '${marker}'` });
}
}
// Move to end of the list block (stop at first non-list line; do not consume trailing blanks)
let end = start;
while (end < lines.length && isListLine(lines[end])) {
// Also check bullet markers inside block
if (!excluded[end] && isBulletLine(lines[end])) {
const marker = bulletMarker(lines[end]);
if (marker && marker !== '-') {
violations.push({ type: 'bullet-marker', line: end + 1, message: `Use '-' for bullets, found '${marker}'` });
}
}
end++;
}
// Require immediate next line after block to be blank
const next = end;
if (next < lines.length && !isBlank(lines[next])) {
const lastContentLine = end - 1;
violations.push({ type: 'list-blank-after', line: lastContentLine + 1, message: 'Missing blank line after list' });
}
i = end;
continue;
}
i++;
}
// 2c) Tables: detect blocks of lines containing '|' and ensure blank lines around
i = 0;
while (i < lines.length) {
if (excluded[i]) {
i++;
continue;
}
if (isTableLine(lines[i])) {
const start = i;
// scan forward while lines look like table lines
let end = start;
while (end < lines.length && !excluded[end] && isTableLine(lines[end])) end++;
// Require immediate previous line to be blank
const prev = start - 1;
if (prev >= 0 && !isBlank(lines[prev])) {
violations.push({ type: 'table-blank-before', line: start + 1, message: 'Missing blank line before table' });
}
// Require immediate next line after block to be blank
const next = end;
if (next < lines.length && !isBlank(lines[next])) {
const last = end - 1;
violations.push({ type: 'table-blank-after', line: last + 1, message: 'Missing blank line after table' });
}
i = end;
continue;
}
i++;
}
return violations;
}
function main() {
const args = process.argv.slice(2);
if (args.length === 0) {
console.error('Usage: node tools/markdown/check-md-conformance.js [paths...]');
process.exit(2);
}
// Expand inputs to files
const files = [];
for (const p of args) {
const abs = path.resolve(p);
if (!fs.existsSync(abs)) {
console.error(`Path not found: ${abs}`);
continue;
}
const stat = fs.statSync(abs);
if (stat.isDirectory()) {
files.push(...listMarkdownFiles(abs));
} else if (stat.isFile() && abs.toLowerCase().endsWith('.md')) {
files.push(abs);
}
}
const summary = [];
let total = 0;
for (const f of files) {
const violations = checkFile(f);
if (violations.length > 0) {
summary.push({ file: f, violations });
total += violations.length;
}
}
if (summary.length === 0) {
console.log('MD Conformance: PASS (no violations)');
process.exit(0);
}
// Pretty print
console.log(`MD Conformance: FAIL (${total} violation(s) in ${summary.length} file(s))`);
for (const { file, violations } of summary) {
console.log(`\n- ${path.relative(process.cwd(), file)}`);
for (const v of violations) {
console.log(` L${v.line.toString().padStart(4, ' ')} ${v.type} ${v.message}`);
}
}
process.exit(1);
}
if (require.main === module) {
main();
}
module.exports = { checkFile };

View File

@ -0,0 +1,288 @@
/**
* Fix Fence Languages - Add language identifiers to code fences
*
* This script detects fenced code blocks without language identifiers
* and adds appropriate languages based on content heuristics.
*
* Usage:
* node tools/markdown/fix-fence-languages.js [--dry-run] <file1> [file2...]
*
* Options:
* --dry-run Show what would be fixed without modifying files
*
* Exit codes:
* 0 -> No issues found or all fixed successfully
* 1 -> Issues found (dry-run mode) or errors during fix
* 2 -> Invalid usage (missing file arguments)
*/
const fs = require('node:fs');
const path = require('node:path');
const DRY_RUN = process.argv.includes('--dry-run');
/**
* Detect language from fence content using simple heuristics
*/
function detectLanguage(content) {
const trimmed = content.trim();
// Empty fence
if (!trimmed) return 'text';
// YAML detection
if (/^[a-zA-Z_][a-zA-Z0-9_-]*:\s*/.test(trimmed) || /^---\s*$/m.test(trimmed)) {
return 'yaml';
}
// JSON detection
if ((trimmed.startsWith('{') && trimmed.endsWith('}')) || (trimmed.startsWith('[') && trimmed.endsWith(']'))) {
try {
JSON.parse(trimmed);
return 'json';
} catch {
// Not valid JSON, continue
}
}
// Shell/Bash detection
if (
/^(npm|yarn|pnpm|git|node|npx|cd|mkdir|rm|cp|mv|ls|cat|echo|export|source|\$)\s/.test(trimmed) ||
/^\$/.test(trimmed) ||
/^#!\/bin\/(ba)?sh/.test(trimmed)
) {
return 'bash';
}
// JavaScript/TypeScript detection
if (/^(import|export|const|let|var|function|class|async|await)\s/.test(trimmed) || /^\/\//.test(trimmed) || /^\/\*/.test(trimmed)) {
return 'javascript';
}
// XML/HTML detection
if (/^<[a-zA-Z][^>]*>/.test(trimmed)) {
return 'xml';
}
// Markdown detection (for nested examples)
if (/^#{1,6}\s/.test(trimmed) || /^\[.*\]\(.*\)/.test(trimmed)) {
return 'markdown';
}
// Flow/diagram detection (arrows, boxes)
if (/[→↓←↑]/.test(trimmed) || /[┌┐└┘├┤┬┴┼─│]/.test(trimmed)) {
return 'text';
}
// Default to text for unknown content
return 'text';
}
/**
* Fix a single file
*/
function fixFile(filePath) {
const content = fs.readFileSync(filePath, 'utf8');
const lines = content.split(/\r?\n/);
const fixes = [];
let modified = false;
// Track any outer fence (of any backtick length >=3) to avoid touching nested content
const fenceStack = [];
// State for a target fence (3+ backticks) without language that we intend to fix
let fixing = false;
let fixFenceStart = -1;
let fixOpenIndent = '';
let fixOpenLine = '';
let fixOpenLen = 0;
let fenceContent = [];
const newLines = [];
for (const [i, line] of lines.entries()) {
// If we are currently fixing a fence (collecting content until closing ```)
if (fixing) {
const closeMatch = line.match(/^(\s*)(`+)(\s*)$/);
if (closeMatch) {
const closeTicks = closeMatch[2] || '';
// Only treat as closing if the number of backticks is >= opening length
if (closeTicks.length >= fixOpenLen) {
// Closing the target fence
const language = detectLanguage(fenceContent.join('\n'));
const fixedOpenLine = `${fixOpenIndent}\`\`\`${language}`;
newLines.push(fixedOpenLine, ...fenceContent, line);
fixes.push({
line: fixFenceStart + 1,
original: fixOpenLine,
fixed: fixedOpenLine,
detectedLanguage: language,
contentPreview: fenceContent.slice(0, 2).join('\n').slice(0, 60) + '...',
});
modified = true;
fixing = false;
fixFenceStart = -1;
fixOpenIndent = '';
fixOpenLine = '';
fixOpenLen = 0;
fenceContent = [];
continue;
}
}
// Not a valid closing line yet; keep collecting content
fenceContent.push(line);
continue;
}
// Not currently fixing; detect any fence line (opening or closing)
const fenceLineMatch = line.match(/^(\s*)(`{3,})(.*)$/);
if (fenceLineMatch) {
const indent = fenceLineMatch[1] || '';
const ticks = fenceLineMatch[2] || '';
const ticksLen = ticks.length;
const rest = fenceLineMatch[3] || '';
const restTrim = rest.trim();
// Determine if this is a closing fence for the current outer fence
if (fenceStack.length > 0) {
const top = fenceStack.at(-1);
if (restTrim === '' && ticksLen >= top.ticks.length) {
// Closing existing fence scope
fenceStack.pop();
newLines.push(line);
continue;
}
}
// If inside any outer fence, don't attempt to fix nested fences
if (fenceStack.length > 0) {
// Start a nested fence scope
fenceStack.push({ ticks });
newLines.push(line);
continue;
}
// Outside any fence
if (ticksLen >= 3 && restTrim === '') {
// Opening fence without language (3+ backticks): begin fixing mode
fixing = true;
fixFenceStart = i;
fixOpenIndent = indent;
fixOpenLine = line;
fixOpenLen = ticksLen;
fenceContent = [];
// Do not push the original opening line; we'll emit the fixed one at close
continue;
}
// Any other fence: treat as an outer fence start
fenceStack.push({ ticks });
newLines.push(line);
continue;
}
// Regular non-fence line
newLines.push(line);
}
// If we ended while "fixing" and never saw a closing fence, abort changes for safety
if (fixing) {
return {
filePath,
fixes: [],
modified: false,
newContent: content,
};
}
return {
filePath,
fixes,
modified,
newContent: newLines.join('\n') + (content.endsWith('\n') ? '\n' : ''),
};
}
/**
* Main execution
*/
function main() {
const args = process.argv.slice(2).filter((arg) => arg !== '--dry-run');
if (args.length === 0) {
console.error('Usage: node tools/markdown/fix-fence-languages.js [--dry-run] <file1> [file2...]');
process.exit(2);
}
const results = [];
let totalFixes = 0;
for (const filePath of args) {
const absPath = path.resolve(filePath);
if (!fs.existsSync(absPath)) {
console.error(`File not found: ${absPath}`);
continue;
}
if (!absPath.toLowerCase().endsWith('.md')) {
console.error(`Skipping non-markdown file: ${absPath}`);
continue;
}
const result = fixFile(absPath);
if (result.fixes.length > 0) {
results.push(result);
totalFixes += result.fixes.length;
}
}
// Print results
if (results.length === 0) {
console.log('✓ No fence language issues found');
process.exit(0);
}
if (DRY_RUN) {
console.log(`\n🔍 DRY RUN: Found ${totalFixes} fence(s) without language in ${results.length} file(s)\n`);
} else {
console.log(`\n🔧 Fixing ${totalFixes} fence(s) in ${results.length} file(s)\n`);
}
for (const result of results) {
console.log(`📄 ${path.relative(process.cwd(), result.filePath)}`);
for (const fix of result.fixes) {
console.log(` L${fix.line.toString().padStart(4, ' ')} ${fix.original.trim() || '```'}`);
console.log(`\`\`\`${fix.detectedLanguage}`);
console.log(` Content: ${fix.contentPreview}`);
}
console.log('');
// Apply fixes if not dry-run
if (!DRY_RUN) {
fs.writeFileSync(result.filePath, result.newContent, 'utf8');
console.log(` ✓ Fixed and saved\n`);
}
}
if (DRY_RUN) {
console.log('💡 Run without --dry-run to apply these fixes\n');
process.exit(1);
} else {
console.log('✓ All fixes applied successfully\n');
process.exit(0);
}
}
if (require.main === module) {
main();
}
module.exports = { detectLanguage, fixFile };