feat: enhance metadata extraction and implement Levenshtein distance for improved name matching

This commit is contained in:
Your Name 2026-02-07 21:32:00 +08:00
parent 2936bf8a4e
commit 474aaf5428
2 changed files with 51 additions and 10 deletions

View File

@ -96,6 +96,16 @@ export function detectBmadRoot(): string | undefined {
/**
* Lightweight extraction of metadata from YAML agent files.
* Reads plain text and uses regex no YAML parser dependency.
*
* LIMITATION: The regexes below match any indented `key: value` line in the
* file, not only keys under specific YAML blocks (e.g. `metadata:` or
* `persona:`). This means a `title:` nested under an unrelated section
* could be picked up. This is a deliberate trade-off:
* - Pro: zero external dependencies, fast, simple.
* - Con: may over-match in unusual YAML structures.
* A full YAML parser (e.g. `yaml` or `js-yaml`) would eliminate the
* ambiguity but add a dependency and complexity not justified for
* display-only metadata hints.
*/
function extractAgentMeta(content: string): { title?: string; description?: string; icon?: string; module?: string; role?: string } {
const meta: { title?: string; description?: string; icon?: string; module?: string; role?: string } = {};
@ -279,13 +289,20 @@ export function startWatching(ctx: vscode.ExtensionContext): void {
const pattern = new vscode.RelativePattern(wsRoot, '**/*.{yaml,md}');
_watcher = vscode.workspace.createFileSystemWatcher(pattern);
// Debounce: the glob matches all yaml/md files so unrelated edits may
// fire frequently. Collapse rapid bursts into a single rebuild.
let debounceTimer: ReturnType<typeof setTimeout> | undefined;
const rebuild = () => {
logInfo('File change detected — rebuilding index');
refreshIndex();
if (debounceTimer) { clearTimeout(debounceTimer); }
debounceTimer = setTimeout(() => {
logInfo('File change detected — rebuilding index');
refreshIndex();
}, 500);
};
_watcher.onDidCreate(rebuild);
_watcher.onDidDelete(rebuild);
_watcher.onDidChange(rebuild);
ctx.subscriptions.push(_watcher);
ctx.subscriptions.push({ dispose: () => { if (debounceTimer) { clearTimeout(debounceTimer); } } });
}

View File

@ -171,16 +171,40 @@ export function findClosestName(input: string, index: BmadIndex): string | undef
// Substring match
const sub = all.find(n => n.toLowerCase().includes(lower));
if (sub) { return sub; }
// Levenshtein-like: best character overlap
// Fall back to true Levenshtein distance — O(n*m) per candidate but
// the candidate list is small (tens of items) so this is fine for a
// hint-only code path.
let best = all[0];
let bestScore = 0;
let bestDist = Infinity;
for (const n of all) {
let score = 0;
const nl = n.toLowerCase();
for (let i = 0; i < Math.min(lower.length, nl.length); i++) {
if (lower[i] === nl[i]) { score++; }
}
if (score > bestScore) { bestScore = score; best = n; }
const d = levenshtein(lower, n.toLowerCase());
if (d < bestDist) { bestDist = d; best = n; }
}
return best;
}
/**
* Minimal Levenshtein distance (edit distance) between two strings.
* Handles insertions, deletions and substitutions.
* Uses a single-row DP approach to keep memory at O(min(a,b)).
*/
function levenshtein(a: string, b: string): number {
if (a === b) { return 0; }
if (a.length === 0) { return b.length; }
if (b.length === 0) { return a.length; }
// Ensure a is the shorter string for memory efficiency
if (a.length > b.length) { [a, b] = [b, a]; }
const row = Array.from({ length: a.length + 1 }, (_, i) => i);
for (let j = 1; j <= b.length; j++) {
let prev = row[0];
row[0] = j;
for (let i = 1; i <= a.length; i++) {
const cur = row[i];
row[i] = a[i - 1] === b[j - 1]
? prev
: 1 + Math.min(prev, row[i], row[i - 1]);
prev = cur;
}
}
return row[a.length];
}