feat: enhance task purpose extraction from markdown files with improved parsing and cleanup logic
This commit is contained in:
parent
81f8020613
commit
392d5f7852
|
|
@ -225,31 +225,93 @@ class IdeSetup extends BaseIdeSetup {
|
||||||
return null;
|
return null;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Helper: extract Purpose string from a task markdown file's YAML
|
// Helper: extract Purpose string from a task file (YAML fenced block, Markdown heading, or inline 'Purpose:')
|
||||||
const extractTaskPurposeFromFile = async (absPath) => {
|
const extractTaskPurposeFromFile = async (absPath) => {
|
||||||
|
const cleanupAndSummarize = (text) => {
|
||||||
|
if (!text) return null;
|
||||||
|
let t = String(text);
|
||||||
|
// Drop code fences and HTML comments
|
||||||
|
t = t.replaceAll(/```[\s\S]*?```/g, '');
|
||||||
|
t = t.replaceAll(/<!--([\s\S]*?)-->/g, '');
|
||||||
|
// Normalize line endings
|
||||||
|
t = t.replaceAll(/\r\n?/g, '\n');
|
||||||
|
// Take the first non-empty paragraph
|
||||||
|
const paragraphs = t.split(/\n\s*\n/g).map((p) => p.trim());
|
||||||
|
let first = paragraphs.find((p) => p.length > 0) || '';
|
||||||
|
// Remove leading list markers, quotes, and headings remnants
|
||||||
|
first = first.replaceAll(/^\s*[>*-]\s+/gm, '');
|
||||||
|
first = first.replaceAll(/^#{1,6}\s+/gm, '');
|
||||||
|
// Strip simple Markdown formatting
|
||||||
|
first = first.replaceAll(/\*\*([^*]+)\*\*/g, '$1').replaceAll(/\*([^*]+)\*/g, '$1');
|
||||||
|
first = first.replaceAll(/`([^`]+)`/g, '$1');
|
||||||
|
// Collapse whitespace
|
||||||
|
first = first.replaceAll(/\s+/g, ' ').trim();
|
||||||
|
if (!first) return null;
|
||||||
|
// Prefer ending at a sentence boundary if long
|
||||||
|
const maxLen = 320;
|
||||||
|
if (first.length > maxLen) {
|
||||||
|
const boundary = first.slice(0, maxLen + 40).match(/^[\s\S]*?[.!?](\s|$)/);
|
||||||
|
const cut = boundary ? boundary[0] : first.slice(0, maxLen);
|
||||||
|
return cut.trim();
|
||||||
|
}
|
||||||
|
return first;
|
||||||
|
};
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const raw = await fileManager.readFile(absPath);
|
const raw = await fileManager.readFile(absPath);
|
||||||
|
// 1) YAML fenced block: look for Purpose fields
|
||||||
const yamlMatch = raw.match(/```ya?ml\r?\n([\s\S]*?)```/);
|
const yamlMatch = raw.match(/```ya?ml\r?\n([\s\S]*?)```/);
|
||||||
const yamlBlock = yamlMatch ? yamlMatch[1].trim() : null;
|
const yamlBlock = yamlMatch ? yamlMatch[1].trim() : null;
|
||||||
if (!yamlBlock) return null;
|
if (yamlBlock) {
|
||||||
// Try parsing YAML for better robustness
|
try {
|
||||||
try {
|
const data = yaml.load(yamlBlock);
|
||||||
const data = yaml.load(yamlBlock);
|
if (data) {
|
||||||
if (data) {
|
let val = data.Purpose ?? data.purpose;
|
||||||
let val = data.Purpose ?? data.purpose;
|
if (!val && data.task && (data.task.Purpose || data.task.purpose)) {
|
||||||
if (!val && data.task && (data.task.Purpose || data.task.purpose)) {
|
val = data.task.Purpose ?? data.task.purpose;
|
||||||
val = data.task.Purpose ?? data.task.purpose;
|
}
|
||||||
|
if (typeof val === 'string') {
|
||||||
|
const cleaned = cleanupAndSummarize(val);
|
||||||
|
if (cleaned) return cleaned;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (typeof val === 'string') return val.trim();
|
} catch {
|
||||||
|
// ignore YAML parse errors
|
||||||
|
}
|
||||||
|
// Fallback regex inside YAML block
|
||||||
|
const quoted = yamlBlock.match(/(?:^|\n)\s*(?:Purpose|purpose):\s*"([^"]+)"/);
|
||||||
|
if (quoted && quoted[1]) {
|
||||||
|
const cleaned = cleanupAndSummarize(quoted[1]);
|
||||||
|
if (cleaned) return cleaned;
|
||||||
|
}
|
||||||
|
const unquoted = yamlBlock.match(/(?:^|\n)\s*(?:Purpose|purpose):\s*([^\n\r]+)/);
|
||||||
|
if (unquoted && unquoted[1]) {
|
||||||
|
const cleaned = cleanupAndSummarize(unquoted[1]);
|
||||||
|
if (cleaned) return cleaned;
|
||||||
}
|
}
|
||||||
} catch {
|
|
||||||
// ignore YAML parse errors
|
|
||||||
}
|
}
|
||||||
// Fallback regex
|
|
||||||
const quoted = yamlBlock.match(/(?:^|\n)\s*(?:Purpose|purpose):\s*"([^"]+)"/);
|
// 2) Markdown heading section: ## Purpose (any level >= 2)
|
||||||
if (quoted && quoted[1]) return quoted[1].trim();
|
const headingRe = /^(#{2,6})\s*Purpose\s*$/im;
|
||||||
const unquoted = yamlBlock.match(/(?:^|\n)\s*(?:Purpose|purpose):\s*([^\n\r]+)/);
|
const headingMatch = headingRe.exec(raw);
|
||||||
if (unquoted && unquoted[1]) return unquoted[1].trim();
|
if (headingMatch) {
|
||||||
|
const headingLevel = headingMatch[1].length;
|
||||||
|
const sectionStart = headingMatch.index + headingMatch[0].length;
|
||||||
|
const rest = raw.slice(sectionStart);
|
||||||
|
// Next heading of same or higher level ends the section
|
||||||
|
const nextHeadingRe = new RegExp(`^#{1,${headingLevel}}\\s+[^\n]+`, 'im');
|
||||||
|
const nextMatch = nextHeadingRe.exec(rest);
|
||||||
|
const section = nextMatch ? rest.slice(0, nextMatch.index) : rest;
|
||||||
|
const cleaned = cleanupAndSummarize(section);
|
||||||
|
if (cleaned) return cleaned;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3) Inline single-line fallback: Purpose: ...
|
||||||
|
const inline = raw.match(/(?:^|\n)\s*Purpose\s*:\s*([^\n\r]+)/i);
|
||||||
|
if (inline && inline[1]) {
|
||||||
|
const cleaned = cleanupAndSummarize(inline[1]);
|
||||||
|
if (cleaned) return cleaned;
|
||||||
|
}
|
||||||
} catch {
|
} catch {
|
||||||
// ignore
|
// ignore
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue