feat: enhance task purpose extraction from markdown files with improved parsing and cleanup logic

This commit is contained in:
Javier Gomez 2025-09-10 13:01:49 +02:00
parent 81f8020613
commit 392d5f7852
1 changed files with 79 additions and 17 deletions

View File

@ -225,14 +225,44 @@ class IdeSetup extends BaseIdeSetup {
return null; return null;
}; };
// Helper: extract Purpose string from a task markdown file's YAML // Helper: extract Purpose string from a task file (YAML fenced block, Markdown heading, or inline 'Purpose:')
const extractTaskPurposeFromFile = async (absPath) => { const extractTaskPurposeFromFile = async (absPath) => {
const cleanupAndSummarize = (text) => {
if (!text) return null;
let t = String(text);
// Drop code fences and HTML comments
t = t.replaceAll(/```[\s\S]*?```/g, '');
t = t.replaceAll(/<!--([\s\S]*?)-->/g, '');
// Normalize line endings
t = t.replaceAll(/\r\n?/g, '\n');
// Take the first non-empty paragraph
const paragraphs = t.split(/\n\s*\n/g).map((p) => p.trim());
let first = paragraphs.find((p) => p.length > 0) || '';
// Remove leading list markers, quotes, and headings remnants
first = first.replaceAll(/^\s*[>*-]\s+/gm, '');
first = first.replaceAll(/^#{1,6}\s+/gm, '');
// Strip simple Markdown formatting
first = first.replaceAll(/\*\*([^*]+)\*\*/g, '$1').replaceAll(/\*([^*]+)\*/g, '$1');
first = first.replaceAll(/`([^`]+)`/g, '$1');
// Collapse whitespace
first = first.replaceAll(/\s+/g, ' ').trim();
if (!first) return null;
// Prefer ending at a sentence boundary if long
const maxLen = 320;
if (first.length > maxLen) {
const boundary = first.slice(0, maxLen + 40).match(/^[\s\S]*?[.!?](\s|$)/);
const cut = boundary ? boundary[0] : first.slice(0, maxLen);
return cut.trim();
}
return first;
};
try { try {
const raw = await fileManager.readFile(absPath); const raw = await fileManager.readFile(absPath);
// 1) YAML fenced block: look for Purpose fields
const yamlMatch = raw.match(/```ya?ml\r?\n([\s\S]*?)```/); const yamlMatch = raw.match(/```ya?ml\r?\n([\s\S]*?)```/);
const yamlBlock = yamlMatch ? yamlMatch[1].trim() : null; const yamlBlock = yamlMatch ? yamlMatch[1].trim() : null;
if (!yamlBlock) return null; if (yamlBlock) {
// Try parsing YAML for better robustness
try { try {
const data = yaml.load(yamlBlock); const data = yaml.load(yamlBlock);
if (data) { if (data) {
@ -240,16 +270,48 @@ class IdeSetup extends BaseIdeSetup {
if (!val && data.task && (data.task.Purpose || data.task.purpose)) { if (!val && data.task && (data.task.Purpose || data.task.purpose)) {
val = data.task.Purpose ?? data.task.purpose; val = data.task.Purpose ?? data.task.purpose;
} }
if (typeof val === 'string') return val.trim(); if (typeof val === 'string') {
const cleaned = cleanupAndSummarize(val);
if (cleaned) return cleaned;
}
} }
} catch { } catch {
// ignore YAML parse errors // ignore YAML parse errors
} }
// Fallback regex // Fallback regex inside YAML block
const quoted = yamlBlock.match(/(?:^|\n)\s*(?:Purpose|purpose):\s*"([^"]+)"/); const quoted = yamlBlock.match(/(?:^|\n)\s*(?:Purpose|purpose):\s*"([^"]+)"/);
if (quoted && quoted[1]) return quoted[1].trim(); if (quoted && quoted[1]) {
const cleaned = cleanupAndSummarize(quoted[1]);
if (cleaned) return cleaned;
}
const unquoted = yamlBlock.match(/(?:^|\n)\s*(?:Purpose|purpose):\s*([^\n\r]+)/); const unquoted = yamlBlock.match(/(?:^|\n)\s*(?:Purpose|purpose):\s*([^\n\r]+)/);
if (unquoted && unquoted[1]) return unquoted[1].trim(); if (unquoted && unquoted[1]) {
const cleaned = cleanupAndSummarize(unquoted[1]);
if (cleaned) return cleaned;
}
}
// 2) Markdown heading section: ## Purpose (any level >= 2)
const headingRe = /^(#{2,6})\s*Purpose\s*$/im;
const headingMatch = headingRe.exec(raw);
if (headingMatch) {
const headingLevel = headingMatch[1].length;
const sectionStart = headingMatch.index + headingMatch[0].length;
const rest = raw.slice(sectionStart);
// Next heading of same or higher level ends the section
const nextHeadingRe = new RegExp(`^#{1,${headingLevel}}\\s+[^\n]+`, 'im');
const nextMatch = nextHeadingRe.exec(rest);
const section = nextMatch ? rest.slice(0, nextMatch.index) : rest;
const cleaned = cleanupAndSummarize(section);
if (cleaned) return cleaned;
}
// 3) Inline single-line fallback: Purpose: ...
const inline = raw.match(/(?:^|\n)\s*Purpose\s*:\s*([^\n\r]+)/i);
if (inline && inline[1]) {
const cleaned = cleanupAndSummarize(inline[1]);
if (cleaned) return cleaned;
}
} catch { } catch {
// ignore // ignore
} }