From b3ea262589532aa8bffa78983d45518e074a05c0 Mon Sep 17 00:00:00 2001 From: Brian Madison Date: Sat, 13 Jun 2026 23:18:17 -0500 Subject: [PATCH] =?UTF-8?q?bmad-architecture:=20review=20fixes=20=E2=80=94?= =?UTF-8?q?=20shared=20memlog,=20epic=20altitude,=20lint=20hardening?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-PR review fixes for the new architecture-spine skill: - Adopt the shared canonical memlog (#2462); drop the vendored copy and the status-flag lifecycle in favor of `event` entries - Wire epic-altitude inheritance: parent-spine load+bind, Inherited Invariants template section, epic-scoped run folder, parent-contradiction lens; state that per-story detail is deferred to bmad-create-story - Add the Update intent; honor a forwarded-activation handoff from the deprecated bmad-create-architecture shim - Rename modes to Coaching path / Fast path (suite-consistent with prd/brief) and sequence the offer as an activation step - Fix the brownfield project-context.md default path ({output_folder} no-op) - lint_spine: line-exact frontmatter, fence-blanking (AD-in-fence + accurate line numbers), map-form key_deps, AD-0 fix; soften template-token severity; add 8 tests - Carve the Reviewer Gate to references/ to stay under the SKILL.md token budget - Template: entities-only ERD, scale-down guidance, softened seed framing; gitignore .memlog.md --- .gitignore | 3 + .../3-solutioning/bmad-architecture/SKILL.md | 58 ++-- .../assets/spine-template.md | 36 ++- .../bmad-architecture/customize.toml | 16 +- .../bmad-architecture/references/headless.md | 2 +- .../references/reviewer-gate.md | 13 + .../bmad-architecture/scripts/lint_spine.py | 101 ++++--- .../bmad-architecture/scripts/memlog.py | 202 ------------- .../scripts/tests/test_lint_spine.py | 67 +++++ .../scripts/tests/test_memlog.py | 265 ------------------ 10 files changed, 217 insertions(+), 546 deletions(-) create mode 100644 src/bmm-skills/3-solutioning/bmad-architecture/references/reviewer-gate.md delete mode 100644 src/bmm-skills/3-solutioning/bmad-architecture/scripts/memlog.py delete mode 100644 src/bmm-skills/3-solutioning/bmad-architecture/scripts/tests/test_memlog.py diff --git a/.gitignore b/.gitignore index 1483c0538..99e48d9ab 100644 --- a/.gitignore +++ b/.gitignore @@ -26,6 +26,9 @@ design-artifacts/ __pycache__/ .pytest_cache/ +# BMad run artifacts (memlogs are per-run working memory, never committed) +.memlog.md + # System files .DS_Store Thumbs.db diff --git a/src/bmm-skills/3-solutioning/bmad-architecture/SKILL.md b/src/bmm-skills/3-solutioning/bmad-architecture/SKILL.md index 238eb78f7..70843baee 100644 --- a/src/bmm-skills/3-solutioning/bmad-architecture/SKILL.md +++ b/src/bmm-skills/3-solutioning/bmad-architecture/SKILL.md @@ -18,62 +18,68 @@ Record decisions, not rationale (rationale lives in the memlog). Carry shape in ## How you work -You're a coach, and **Guided is the default** — this runs against the model's instinct to just produce an architecture, so hold the line on it. Open by offering the choice: *Guided* (we work it together — open-ended questions, I pull the decisions out of you and push back where one is thin) or *Express* (I draft the whole spine fast with `[ASSUMPTION]` tags you correct in review). Unless the user clearly wants speed, **coach; don't silently draft.** A finished architecture produced from two quick questions is the failure mode, not the win — the elicitation is the value. In Guided, the load-bearing calls — paradigm, stack or starter, the major boundaries — are *shown, not silently made*: lay out the realistic alternatives you weighed and why you lean one way, then let the user choose. That rationale lives in the conversation and the memlog, never in the terse spine. +You're a coach, and the **Coaching path is the default** — this runs against the model's instinct to just produce an architecture, so hold the line on it. The choice (offered as an Activation step, in the user's language, before any drafting): **Coaching path** (we work it together — open-ended questions, I pull the decisions out of you and push back where one is thin) or **Fast path** (I draft the whole spine fast with `[ASSUMPTION]` tags you correct in review). Unless the user clearly wants speed, **coach; don't silently draft.** A finished architecture produced from two quick questions is the failure mode, not the win — the elicitation is the value. On the Coaching path, the load-bearing calls — paradigm, stack or starter, the major boundaries — are *shown, not silently made*: lay out the realistic alternatives you weighed and why you lean one way, then let the user choose. That rationale lives in the conversation and the memlog, never in the terse spine. -Elicit, don't quiz: open-ended "how are you thinking about X?" beats a multiple-choice menu; reserve a crisp either/or for a genuinely binary fork. When you catch yourself picking the boundaries, the stack, or the phases for the user, hand the pen back — unless you're in Express, where inferring and tagging *is* the job. +Elicit, don't quiz: open-ended "how are you thinking about X?" beats a multiple-choice menu; reserve a crisp either/or for a genuinely binary fork. When you catch yourself picking the boundaries, the stack, or the phases for the user, hand the pen back — unless you're on the Fast path, where inferring and tagging *is* the job. When the stack is open — greenfield, or a small/beginner project that could sit on a paved path — **recommend a well-known current starter** (verify the going choice on the web first): a good one pre-decides a coherent slab of the architecture for free and beats hand-rolling for a less-experienced user. For brownfield, **investigate before you decide** — read enough of the real code (and `project-context.md`; offer `bmad-document-project` if there is none) to ratify the conventions already there rather than invent new ones. ## Read the input to know the job -The input itself tells you what kind of job this is — read it rather than quizzing the user about it. A spec package (`SPEC.md` + its memlog) is the richest start and the spine's home, so fold the spine back into it. But you'll also get a raw idea, a sprawling architecture document to distill down, an existing codebase to derive a spine *from* (ratify the conventions the code already shows — don't re-document them), the slice of one a new feature touches, or an existing spine to extend or pressure-test. Prefer a `.memlog.md` over re-reading the source it came from. Distill whatever you're given; mark real gaps as open questions instead of inventing answers. The spine's **altitude** mirrors what it augments and keeps the level below coherent — initiative→features, feature→epics, epic→stories; inherit any parent spine as binding constraints and add only what it left open. +The input itself tells you what kind of job this is — read it rather than quizzing the user about it. A spec package (`SPEC.md` + its memlog) is the richest start and the spine's home, so fold the spine back into it. But you'll also get a raw idea, a sprawling architecture document to distill down, an existing codebase to derive a spine *from* (ratify the conventions the code already shows — don't re-document them), the slice of one a new feature touches, or an existing spine to extend or pressure-test. Prefer a `.memlog.md` over re-reading the source it came from. Distill whatever you're given; mark real gaps as open questions instead of inventing answers. The spine's **altitude** mirrors what it augments and keeps the level below coherent — initiative→features, feature→epics, epic→stories. + +**Inheriting a parent spine** (e.g. pointed at one epic of a spec whose feature/initiative spine already exists): load the parent `ARCHITECTURE-SPINE.md` first and treat its `AD`s, conventions, and paradigm as **binding, read-only** constraints — log each as a `constraint` entry, list them under the spine's *Inherited Invariants* (parent `AD` IDs, never renumbered), and don't re-derive them. Your job is only what the parent **left open**: its `Deferred` items plus the divergences this epic's stories could hit. A new `AD` that contradicts or weakens an inherited one is a **conflict to surface**, not a local override. An epic spine fixes the invariants the epic's stories must share — it does **not** expand per-story detail; that's deferred to `bmad-create-story` at story time. ## How a run works -The **memlog** (`.memlog.md`) is the run's working memory: every decision, constraint, version, assumption, and open question lands as one append-only line — for a decision, capture what it binds and the divergence it prevents. The spine is **distilled from the memlog at the end**, not written as you go. Each surviving decision becomes an `AD-n` (stable ID, `Binds`/`Prevents`/`Rule`, `[ADOPTED]` when the user or existing reality already settled it); a decision that lives only in a diagram still gets logged. Resume a prior run by reloading its memlog. +The **memlog** (`.memlog.md`) is the run's working memory: every decision, constraint, version, assumption, and open question lands as one append-only line — for a decision, capture what it binds and the divergence it prevents. It is the shared canonical memlog (the same `{project-root}/_bmad/scripts/memlog.py` bmad-spec writes through), so it carries no lifecycle status — terminal moments are logged as `event` entries, not a frontmatter flag. The spine is **distilled from the memlog at the end**, not written as you go. Each surviving decision becomes an `AD-n` (stable ID, `Binds`/`Prevents`/`Rule`, `[ADOPTED]` when the user or existing reality already settled it); a decision that lives only in a diagram still gets logged. Resume a prior run by reloading its memlog. -Writes go through the script (don't read the file back except on resume): +Writes go through the shared script (don't read the file back except on resume): -- `python3 {skill-root}/scripts/memlog.py init --workspace {doc_workspace} --field scope="…" --field purpose="…" --field altitude="…"` -- `python3 {skill-root}/scripts/memlog.py append --workspace {doc_workspace} --type --text "…"` -- `python3 {skill-root}/scripts/memlog.py set --workspace {doc_workspace} --key status --value complete` +- `python3 {project-root}/_bmad/scripts/memlog.py init --workspace {doc_workspace} --field scope="…" --field purpose="…" --field altitude="…"` +- `python3 {project-root}/_bmad/scripts/memlog.py append --workspace {doc_workspace} --type --text "…"` +- A terminal moment (spine finalized, a validation verdict) is an `append --type event` entry — there is no status field to set. -Paths: bare paths resolve from the skill root; `{skill-root}` is the install dir, `{project-root}` the project dir, `{workflow.}` a merged `customize.toml` field, `{doc_workspace}` the bound run folder. +## Resolution rules + +- Bare paths and `{skill-root}` (e.g. `references/headless.md`) resolve from this skill's installed directory. +- `{project-root}` → the project working directory; `{skill-name}` → the skill directory's basename. +- `{workflow.}` → a merged `customize.toml` field; `{doc_workspace}` → the bound run folder. +- Forward slashes only. Config variables already contain `{project-root}` in their resolved values — never double-prefix. ## On Activation +**Forwarded activation:** if a caller (e.g. the `bmad-create-architecture` shim) invoked you with a stated intent and pre-resolved customization fields, honor them verbatim — skip your own intent inference, use the supplied values for those named fields, and resolve only the remaining fields from your own `customize.toml`. So a legacy per-project override still reaches the run. + 1. Resolve customization: `python3 {project-root}/_bmad/scripts/resolve_customization.py --skill {skill-root} --key workflow` (on failure read `{skill-root}/customize.toml`, use defaults). Run `{workflow.activation_steps_prepend}`, then `{workflow.activation_steps_append}`. Hold `{workflow.persistent_facts}` as standing context — the default loads `project-context.md`, load-bearing for brownfield — and consult `{workflow.external_sources}` on demand. 2. Load `{project-root}/_bmad/bmm/config.yaml` (+ `config.user.yaml`) for `{user_name}`, `{communication_language}`, `{document_output_language}`, `{planning_artifacts}`, `{project_name}`, `{date}`; missing keys take neutral defaults, never block. -3. Headless (no interactive user) → follow `references/headless.md` for the whole run. Otherwise greet `{user_name}` in `{communication_language}`. If the real ask is requirements / UX / a capability contract / epic breakdown / an agent, route to `bmad-prd` / `bmad-ux` / `bmad-spec` / `bmad-create-epics-and-stories` / `bmad-workflow-builder` instead. +3. Headless (no interactive user) → follow `references/headless.md` for the whole run. Otherwise greet `{user_name}` in `{communication_language}`. Detect the intent from the conversation and input — **create** (the default), **update** an existing spine, or **validate** one (see those sections). If the real ask is requirements / UX / a capability contract / epic breakdown / an agent, route to `bmad-prd` / `bmad-ux` / `bmad-spec` / `bmad-create-epics-and-stories` / `bmad-workflow-builder` (if the BMad Builder module is installed) instead. 4. If a run folder for this target already exists under `{workflow.spine_output_path}`, offer to resume from its memlog rather than restart. +5. Interactive create: offer the working mode in `{communication_language}` — **Coaching path** (default) or **Fast path** (see *How you work*) — before any drafting; default to Coaching unless the user asks for speed. -For a new spine, bind `{doc_workspace}` to `{workflow.spine_output_path}/{workflow.run_folder_pattern}/`, seed `ARCHITECTURE-SPINE.md` from `{workflow.spine_template}`, `memlog.py init`, and tell the user the path. +For a new spine, bind `{doc_workspace}` to `{workflow.spine_output_path}/{workflow.run_folder_pattern}/`, seed `ARCHITECTURE-SPINE.md` from `{workflow.spine_template}`, run `memlog.py init`, and tell the user the path. **At epic altitude, scope the folder to the epic** (set `run_folder_pattern` per `customize.toml`) so per-epic runs don't collide. ## Reviewer Gate -Used by the Validate intent and at Finalize (step 3). Cheap deterministic pass first: `python3 {skill-root}/scripts/lint_spine.py --workspace {doc_workspace}` settles the mechanical misses (placeholders, duplicate `AD` IDs, missing Binds/Prevents/Rule, unpinned deps), so reviewers spend judgment on the semantic half. - -Assemble the menu: a **rubric walker** that judges the spine against the good-spine checklist below, **+ every entry in `{workflow.finalize_reviewers}` (always run)**, + ad-hoc lenses you invent or offer as the spine's rigor, altitude, and criticality warrant — a security/compliance lens for regulated stakes, a seam reviewer cross-team, a data-integrity lens for a heavy data model. Scale the set to the stakes: a throwaway prototype may run it quietly or skip; a high-criticality or platform-altitude spine earns more lenses and the explicit all / subset / skip menu. - -Dispatch every entry as a **parallel subagent against `ARCHITECTURE-SPINE.md`** (prefix convention: `skill:` / `file:` / plain text). Each writes its full review to `{doc_workspace}/review-{slug}.md` and returns ONLY a compact summary (verdict, top 2–5 findings, file path) — the parent never holds full review text. An inline self-check does not count: the independent context is the point, because a fresh reviewer finds the divergences the author talks past. If subagents are unavailable, run sequentially — write the file first, then flush it from context. - -**Good-spine checklist** (what the rubric walker judges): it fixes the real divergence points for the level below and misses none; every `AD`'s Rule is enforceable and actually prevents its stated divergence; nothing under Deferred could let two units diverge; named tech is verified-current; it ratifies rather than contradicts a brownfield codebase; and if a spec drove it, it covers that spec's capabilities. - -Surface findings tiered, never dumped: a one-sentence gate verdict, then critical + high; medium/low roll into a tail ("plus N more in {file}"). Per finding: autofix, discuss, defer to Deferred / open items, or ignore. **At Finalize this is your own gate — apply the clear fixes rather than handing over a list; surface only what genuinely needs the user.** Under the **Validate intent**, fold every reviewer's output into one bespoke HTML + markdown report and open the HTML. +The spine's pre-handoff review — full mechanics in `references/reviewer-gate.md`. Load it when finalizing or validating: a deterministic `lint_spine.py` pass, then a rubric walker (good-spine checklist) + every `{workflow.finalize_reviewers}` lens dispatched as parallel subagents against `ARCHITECTURE-SPINE.md`, scaled to stakes. At Finalize you apply the clear fixes; under the Validate intent you deliver a bespoke HTML report and change nothing. ## Finalize -Tell the user the sequence in a sentence, then walk it; reviewer fixes land before polish. +Walk the sequence; reviewer fixes land before polish. -1. **Distill.** Write the spine from the memlog (brownfield: + the code sweep) — invariants first, seed minimal, every `AD` carrying Binds/Prevents/Rule, `Deferred` naming what it won't decide. No placeholders; never invent to fill a gap. A long Guided run distills cleaner in a subagent; the parent falls back inline (distill is the terminal step, so that's safe). +1. **Distill.** Write the spine from the memlog (brownfield: + the code sweep) — invariants first, seed minimal, every `AD` carrying Binds/Prevents/Rule, `Deferred` naming what it won't decide. No placeholders; never invent to fill a gap. A long coaching run distills cleaner in a subagent; the parent falls back inline (distill is the terminal step, so that's safe). 2. **Reconcile inputs.** A subagent per load-bearing input checks it against the spine and returns what didn't land — especially a quiet requirement (a tone, a constraint) the `AD` structure dropped. Before the gate. -3. **Reviewer pass.** Run `## Reviewer Gate`. Resolve before polish. +3. **Reviewer pass.** Run the Reviewer Gate (`references/reviewer-gate.md`). Resolve before polish. 4. **Triage.** Open questions and `[ASSUMPTION]` tags: blockers (unsafe for what's next) resolved one at a time; the rest deferred with a revisit condition in the memlog. -5. **Renderings & polish.** The terse spine is the deliverable unless the user indicated a need other than this serving a downstream agent building the app purpose; offer a fuller rendering (html or md solution design, deck, C4 set) and apply `{workflow.doc_standards}` polish only to such prose, never to the spine. +5. **Renderings & polish.** The terse spine is the deliverable when the consumer is a downstream build agent. If the user needs a human-facing artifact instead, offer a fuller rendering (html or md solution design, deck, C4 set) and apply `{workflow.doc_standards}` polish only to that prose, never to the spine. 6. **External handoffs.** Run `{workflow.external_handoffs}`; surface returned URLs/IDs. Offer to hand the spine to `bmad-spec` as a companion, keeping `AD` IDs stable so downstream can cite them. -7. **Close.** Set `status: final`, `updated: {date}`; `memlog.py set status complete`. Share paths. Next: `bmad-spec`, `bmad-create-epics-and-stories`, or — epic altitude — `bmad-create-story`; `bmad-help` to route. +7. **Close.** Set the spine's own frontmatter `status: final`, `updated: {date}`; log a `memlog.py append --type event --text "spine finalized"` (the memlog has no status field). Share paths. Next: `bmad-spec`, `bmad-create-epics-and-stories`, or — epic altitude — `bmad-create-story`; `bmad-help` to route. 8. Run `{workflow.on_complete}`. +## Update + +Amend an existing spine. Resume from its `.memlog.md` (the authority on what was decided), not the rendered spine. Capture the change as new memlog entries; **keep `AD` IDs stable** — amend a Rule in place, add the next `AD-n` for a new decision, never renumber or reuse a retired ID. Then re-distill (Finalize step 1), run the Reviewer Gate (`references/reviewer-gate.md`), and close as in Finalize. An update that overrides something from a source input: offer to update that source too, so upstream and the spine don't silently diverge. + ## Validate -The standalone intent — critique an existing spine without changing it. Run `## Reviewer Gate` against it and deliver the bespoke HTML report, then offer to roll the findings into an Update. (At Finalize the same gate runs as your own pre-handoff check, where you apply the fixes instead of reporting.) +The standalone intent — critique an existing spine without changing it. Run the Reviewer Gate (`references/reviewer-gate.md`) against it and deliver the bespoke HTML report, then offer to roll the findings into an Update. (At Finalize the same gate runs as your own pre-handoff check, where you apply the fixes instead of reporting.) diff --git a/src/bmm-skills/3-solutioning/bmad-architecture/assets/spine-template.md b/src/bmm-skills/3-solutioning/bmad-architecture/assets/spine-template.md index 0e1a83263..aecd70986 100644 --- a/src/bmm-skills/3-solutioning/bmad-architecture/assets/spine-template.md +++ b/src/bmm-skills/3-solutioning/bmad-architecture/assets/spine-template.md @@ -12,7 +12,7 @@ stack: # SEED — verified current at authoring; the code o languages: [] frameworks: [] key_deps: [] # name@version -binds: [] # capability / unit IDs governed (from the driving spec) +binds: [] # capability / unit IDs governed (from the driving spec; at epic altitude, also the parent AD IDs inherited) sources: [] companions: [] --- @@ -23,12 +23,28 @@ companions: [] > independently-built level below ({features | epics | stories}) coherent — the durable rules a > clean codebase can't reveal. Structure is **seed**: the code owns the detail, the spine keeps the shape. > Decisions, not rationale (that lives in the memlog). Diagrams over prose. +> +> **Scale to the job — drop any section a project doesn't need.** A small intent may be just a +> paradigm + a few `AD`s + conventions, seed omitted; a platform earns the full set. An inherited +> epic spine is usually mostly Inherited Invariants + a thin Deferred. Empty sections are cut, not left as headers. ## Design Paradigm Name the pattern — a known one loads a whole model for free — and map its layers to namespaces / directories. The smallest, most durable thing in the file. +## Inherited Invariants + +Present only when this spine inherits a parent at a higher altitude (e.g. an epic spine under a +feature/initiative spine). The parent's `AD`s, conventions, and paradigm that bind here, listed by +their original parent IDs — **read-only, never renumbered, not re-derived**. This spine adds only +what the parent left open; anything here that a local decision would contradict is a conflict to +surface, not override. + +| Inherited | From parent | Binds here | +| --- | --- | --- | +| {AD-n / convention} | {parent spine} | {what it constrains in this scope} | + ## Invariants & Rules The durable heart: the calls a future builder can't read from compliant code. Each `AD-n` has a @@ -62,13 +78,16 @@ don't apply. ## Structural Seed -Cold-start scaffolding, kept minimal. The code owns the **detail** (every file, every column) — don't -mirror it here. But this stays the living source of truth for **shape**: evolve it when the shape -itself changes — a new container, a new core entity, a stack bump — and let the memlog keep the history. +Cold-start scaffolding, kept minimal — include an item only where its shape is non-obvious at this +altitude (at epic altitude the parent usually already fixed it, so the seed is often empty). The code +owns the **detail** (every file, every column); once code exists it becomes the source of truth for +detail, and this seed is a starting scaffold, not a mirror to maintain against it. Evolve a seed item +only when the **shape** itself changes — a new container, a new core entity, a stack bump — and let +the memlog keep the history. - **Stack & Versions** — the substrate (mirrors frontmatter `stack`). -- **System Shape** — a container/context view. Use `flowchart` with a `subgraph` per boundary; C4 mermaid is experimental and won't render in most viewers. -- **Data Model** — an ERD of entities and relationships, one attribute per line (ownership/mutation rules live above). +- **System Shape** — a container/context view (at epic altitude, the slice of the parent system this scope touches). Use `flowchart` with a `subgraph` per boundary; C4 mermaid is experimental and won't render in most viewers. +- **Core Entities** — an ERD of entities and their relationships. Names and relationships only; attributes belong to the code unless one is itself an invariant (then it's an `AD`, not seed). - **Project Structure** — a minimal source tree, only as deep as consistency needs. ```mermaid @@ -87,10 +106,7 @@ flowchart TD ```mermaid erDiagram ENTITY_A ||--o{ ENTITY_B : "{relationship}" - ENTITY_A { - uuid id PK - string name - } + ENTITY_B ||--o| ENTITY_C : "{relationship}" ``` ```text diff --git a/src/bmm-skills/3-solutioning/bmad-architecture/customize.toml b/src/bmm-skills/3-solutioning/bmad-architecture/customize.toml index a30999fd5..c0353174b 100644 --- a/src/bmm-skills/3-solutioning/bmad-architecture/customize.toml +++ b/src/bmm-skills/3-solutioning/bmad-architecture/customize.toml @@ -30,7 +30,7 @@ activation_steps_append = [] # "Our org is AWS-only -- do not propose GCP or Azure." # "file:{project-root}/docs/engineering-standards.md" persistent_facts = [ - "file:{output_folder}/project-context.md", + "file:{project-root}/**/project-context.md", ] # Executed when the workflow completes (after the spine is final and the user has been told). @@ -42,11 +42,15 @@ on_complete = "" # need. Override the path in team/user TOML to enforce a different spine shape. spine_template = "assets/spine-template.md" -# Run folder location. ARCHITECTURE-SPINE.md, its .memlog.md, the optional architecture-diagrams.md -# companion, and the optional narrative doc all land inside -# `{spine_output_path}/{run_folder_pattern}/`. Resume-check scans `{spine_output_path}` for prior -# unfinished runs. At epic altitude, override run_folder_pattern (team/user TOML) to nest under -# the epic. +# Run folder location. ARCHITECTURE-SPINE.md, its .memlog.md, and any fuller rendering the run +# produces all land inside `{spine_output_path}/{run_folder_pattern}/`. Resume-check scans +# `{spine_output_path}` for prior unfinished runs. +# +# The default pattern fits the common case (one spine per project, at the altitude above epics). +# At EPIC altitude, override run_folder_pattern to carry the epic identity so per-epic runs don't +# collide on the same day — e.g. set it (team/user TOML) to "architecture-epic-{epic_id}", binding +# {epic_id} from the driving spec / the activating payload. Headless callers may instead pass an +# explicit doc_workspace and bypass the pattern entirely. spine_output_path = "{planning_artifacts}/architecture" run_folder_pattern = "architecture-{project_name}-{date}" diff --git a/src/bmm-skills/3-solutioning/bmad-architecture/references/headless.md b/src/bmm-skills/3-solutioning/bmad-architecture/references/headless.md index c4e58a024..5fa17552c 100644 --- a/src/bmm-skills/3-solutioning/bmad-architecture/references/headless.md +++ b/src/bmm-skills/3-solutioning/bmad-architecture/references/headless.md @@ -2,7 +2,7 @@ No interactive user: infer everything, ask nothing, but never invent — record inferences as `assumptions[]` and gaps that need a human as `open_questions[]`. Detect headless from a `headless: true` flag, a non-interactive / no-TTY invocation, an activation hook that declares it, or a first message that pre-supplies all inputs and asks for an artifact path back; when ambiguous, default to interactive. -Drive the run from the payload in the first message — `intent`, `altitude`, `purpose`, the driving input (spec package / PRD / raw intent / brownfield path), a parent spine path at lower altitude, and `doc_workspace` if a specific folder is required. Infer anything absent from the inputs or workspace; don't invent stack, constraints, or scope to fill a gap. You still verify named tech on the web (you can't ask, but you can check) and still drive every write through `scripts/memlog.py`. Run the full Reviewer Gate non-interactively: `scripts/lint_spine.py` plus **every `{workflow.finalize_reviewers}` lens as a parallel subagent** (and any ad-hoc lens the spine's criticality warrants). Headless skips only the human picking from the menu — never the reviewers themselves; apply the clear fixes and record anything unresolved in `open_questions[]`. For a true authority collision, list it in `conflicts_with_prior_decisions[]`. For the Validate intent, always write the report to `{doc_workspace}` and add `"offer_to_update": true`. If intent stays ambiguous after inference, halt blocked. +Drive the run from the payload in the first message — `intent`, `altitude`, `purpose`, the driving input (spec package / PRD / raw intent / brownfield path), a parent spine path at lower altitude, and `doc_workspace` if a specific folder is required. Infer anything absent from the inputs or workspace; don't invent stack, constraints, or scope to fill a gap. You still verify named tech on the web (you can't ask, but you can check) and still drive every write through the shared `{project-root}/_bmad/scripts/memlog.py`. Run the full Reviewer Gate (`references/reviewer-gate.md`) non-interactively: `scripts/lint_spine.py` plus **every `{workflow.finalize_reviewers}` lens as a parallel subagent** (and any ad-hoc lens the spine's criticality warrants). Headless skips only the human picking from the menu — never the reviewers themselves; apply the clear fixes and record anything unresolved in `open_questions[]`. For a true authority collision, list it in `conflicts_with_prior_decisions[]`. For the Validate intent, always write the report to `{doc_workspace}` and add `"offer_to_update": true`. If intent stays ambiguous after inference, halt blocked. End with JSON only, omitting keys for artifacts not produced: diff --git a/src/bmm-skills/3-solutioning/bmad-architecture/references/reviewer-gate.md b/src/bmm-skills/3-solutioning/bmad-architecture/references/reviewer-gate.md new file mode 100644 index 000000000..67b612c81 --- /dev/null +++ b/src/bmm-skills/3-solutioning/bmad-architecture/references/reviewer-gate.md @@ -0,0 +1,13 @@ +# Reviewer Gate + +The spine's pre-handoff review. Runs at Finalize (after distill + reconcile) and *is* the Validate intent. The difference is the ending: at Finalize you apply the clear fixes yourself; under Validate you report and don't change the spine. + +Cheap deterministic pass first: `python3 {skill-root}/scripts/lint_spine.py --workspace {doc_workspace}` settles the mechanical misses (placeholders, duplicate `AD` IDs, missing Binds/Prevents/Rule, unpinned deps), so reviewers spend judgment on the semantic half. + +Assemble the menu: a **rubric walker** that judges the spine against the good-spine checklist below, **+ every entry in `{workflow.finalize_reviewers}` (always run)**, + ad-hoc lenses you invent or offer as the spine's rigor, altitude, and criticality warrant — a security/compliance lens for regulated stakes, a seam reviewer cross-team, a data-integrity lens for a heavy data model. Scale the set to the stakes: a throwaway prototype may run it quietly or skip; a high-criticality or platform-altitude spine earns more lenses and the explicit all / subset / skip menu. + +Dispatch every entry as a **parallel subagent against `ARCHITECTURE-SPINE.md`** (prefix convention: `skill:` / `file:` / plain text). Each writes its full review to `{doc_workspace}/review-{slug}.md` and returns ONLY a compact summary (verdict, top 2–5 findings, file path) — the parent never holds full review text. An inline self-check does not count: the independent context is the point, because a fresh reviewer finds the divergences the author talks past. If subagents are unavailable, run sequentially — write the file first, then flush it from context. + +**Good-spine checklist** (what the rubric walker judges): it fixes the real divergence points for the level below and misses none; every `AD`'s Rule is enforceable and actually prevents its stated divergence; nothing under Deferred could let two units diverge; named tech is verified-current; it ratifies rather than contradicts a brownfield codebase; if a spec drove it, it covers that spec's capabilities; and if a parent spine is inherited, no new `AD` weakens or contradicts an inherited one. + +Surface findings tiered, never dumped: a one-sentence gate verdict, then critical + high; medium/low roll into a tail ("plus N more in {file}"). Per finding: autofix, discuss, defer to Deferred / open items, or ignore. **At Finalize this is your own gate — apply the clear fixes rather than handing over a list; surface only what genuinely needs the user.** Under the **Validate intent**, fold every reviewer's output into one bespoke HTML + markdown report and open the HTML. diff --git a/src/bmm-skills/3-solutioning/bmad-architecture/scripts/lint_spine.py b/src/bmm-skills/3-solutioning/bmad-architecture/scripts/lint_spine.py index 472e365ee..ec034f22c 100644 --- a/src/bmm-skills/3-solutioning/bmad-architecture/scripts/lint_spine.py +++ b/src/bmm-skills/3-solutioning/bmad-architecture/scripts/lint_spine.py @@ -15,9 +15,11 @@ It reads ARCHITECTURE-SPINE.md from a workspace and reports, as compact JSON on - ad_fields an AD-n block missing Binds / Prevents / Rule - version_pin a frontmatter key_deps entry with no @version -Fenced code blocks are skipped for placeholder/brace scanning so mermaid and source -trees don't trip false positives. Exit code is always 0 — findings travel in the JSON; -the caller (Reviewer Gate / rubric walker) decides what to do with them. +Fenced code blocks are blanked (replaced with equal-count blank lines) before scanning, so +mermaid and source trees don't trip false positives AND reported line numbers still line up +with the real file. Reported lines are absolute file lines (frontmatter offset added). Exit +code is always 0 — findings travel in the JSON; the caller (Reviewer Gate / rubric walker) +decides what to do with them. """ from __future__ import annotations @@ -37,52 +39,65 @@ SIMILAR_TO = re.compile(r"similar to AD-\d+", re.IGNORECASE) TEMPLATE_TOKEN = re.compile(r"\{[a-z_][a-z0-9_ /.-]*\}") -def split_frontmatter(text: str) -> tuple[str, str]: - """Return (frontmatter, body). Frontmatter is the content between the first two - `---` fences, or '' when absent.""" - if text.startswith("---"): - end = text.find("\n---", 3) - if end != -1: - fm = text[3:end].lstrip("\n") - body = text[end + 4:] - return fm, body - return "", text +def split_frontmatter(text: str) -> tuple[str, str, int]: + """Return (frontmatter, body, body_line_offset). + + Frontmatter is the content between the first two lines that are *exactly* `---` + (line-exact, like memlog.split — a `---` inside a value or a body thematic break never + truncates it). body_line_offset is the number of file lines before the body begins, so a + body-relative line number plus the offset gives the absolute file line. Absent frontmatter + → ('', text, 0).""" + lines = text.split("\n") + if lines and lines[0] == "---": + for i in range(1, len(lines)): + if lines[i] == "---": + fm = "\n".join(lines[1:i]) + body = "\n".join(lines[i + 1:]) + return fm, body, i + 1 + return "", text, 0 -def strip_fences(text: str) -> str: - return FENCE.sub("", text) +def blank_fences(text: str) -> str: + """Replace each fenced block with the same number of newlines, so scanning skips fenced + content while every line number outside the fence stays put.""" + return FENCE.sub(lambda m: "\n" * m.group(0).count("\n"), text) def line_of(text: str, idx: int) -> int: return text.count("\n", 0, idx) + 1 -def find_placeholders(body: str) -> list[dict]: +def find_placeholders(body: str, offset: int) -> list[dict]: findings: list[dict] = [] - scan = strip_fences(body) - for rx, label in ( - (PLACEHOLDER_WORD, "placeholder marker"), - (SIMILAR_TO, "unresolved cross-reference"), - (TEMPLATE_TOKEN, "unfilled template token"), + scan = blank_fences(body) + # (regex, label, severity) — TBD/TODO and dangling cross-refs are unambiguous; a bare + # {template-token} can be legitimate brace prose, so it is flagged low ("possible") to keep + # the mechanical pass near-zero false-positive rather than train reviewers to ignore it. + for rx, label, severity in ( + (PLACEHOLDER_WORD, "placeholder marker", "high"), + (SIMILAR_TO, "unresolved cross-reference", "high"), + (TEMPLATE_TOKEN, "possible unfilled template token (verify)", "low"), ): for m in rx.finditer(scan): findings.append({ "category": "placeholder", - "severity": "high", + "severity": severity, "detail": f"{label}: {m.group(0)!r}", - "location": f"{SPINE} (approx line {line_of(scan, m.start())})", + "location": f"{SPINE} (line {offset + line_of(scan, m.start())})", }) return findings -def find_ad_issues(body: str) -> list[dict]: +def find_ad_issues(body: str, offset: int) -> list[dict]: findings: list[dict] = [] - matches = list(AD_HEADING.finditer(body)) + scan = blank_fences(body) # AD headings shown inside a code fence are not live ADs + matches = list(AD_HEADING.finditer(scan)) seen: dict[int, int] = {} - prev = 0 - for i, m in enumerate(matches): + prev: int | None = None + for m in matches: num = int(m.group(1)) - loc = f"{SPINE} AD-{num} (line {line_of(body, m.start())})" + file_line = offset + line_of(scan, m.start()) + loc = f"{SPINE} AD-{num} (line {file_line})" if num in seen: findings.append({ "category": "ad_id", @@ -91,20 +106,20 @@ def find_ad_issues(body: str) -> list[dict]: "location": loc, }) else: - seen[num] = line_of(body, m.start()) - if num <= prev: + seen[num] = file_line + if prev is not None and num <= prev: findings.append({ "category": "ad_id", "severity": "high", "detail": f"AD-{num} is non-monotonic (follows AD-{prev}); ids must ascend and never renumber", "location": loc, }) - prev = max(prev, num) + prev = num if prev is None else max(prev, num) # block text = from this heading to the next heading of any level start = m.end() - nxt = HEADING.search(body, start) - block = body[start:nxt.start()] if nxt else body[start:] + nxt = HEADING.search(scan, start) + block = scan[start:nxt.start()] if nxt else scan[start:] low = block.lower() missing = [f for f in ("binds", "prevents", "rule") if f not in low] if missing: @@ -143,7 +158,21 @@ def find_unpinned_deps(frontmatter: str) -> list[dict]: in_key_deps = False continue if stripped.startswith("-"): + # block-sequence form: `- name@version` _check_dep(_strip_comment(stripped[1:]).strip().strip("'\""), findings) + else: + # map form: `name: version` — pinned iff a non-empty value is present + mm = re.match(r"([^:]+):\s*(.*)$", stripped) + if mm: + name = mm.group(1).strip().strip("'\"") + val = _strip_comment(mm.group(2)).strip().strip("'\"") + if name and not val: + findings.append({ + "category": "version_pin", + "severity": "medium", + "detail": f"key_deps entry {name!r} has no version pin", + "location": f"{SPINE} frontmatter stack.key_deps", + }) return findings @@ -165,10 +194,10 @@ def _check_dep(item: str, findings: list[dict]) -> None: def lint(text: str) -> dict: - frontmatter, body = split_frontmatter(text) + frontmatter, body, offset = split_frontmatter(text) findings: list[dict] = [] - findings += find_placeholders(body) - findings += find_ad_issues(body) + findings += find_placeholders(body, offset) + findings += find_ad_issues(body, offset) findings += find_unpinned_deps(frontmatter) counts: dict[str, int] = {} for f in findings: diff --git a/src/bmm-skills/3-solutioning/bmad-architecture/scripts/memlog.py b/src/bmm-skills/3-solutioning/bmad-architecture/scripts/memlog.py deleted file mode 100644 index 99ec13225..000000000 --- a/src/bmm-skills/3-solutioning/bmad-architecture/scripts/memlog.py +++ /dev/null @@ -1,202 +0,0 @@ -#!/usr/bin/env python3 -# /// script -# requires-python = ">=3.10" -# /// -"""memlog — an append-only memory log: LLM-optimal working memory for a skill. - -A memlog is the dense, chronological record of everything that mattered in a piece of -work — every item the user generated or accepted — kept minimal like human memory: only -what's important, never bloated. It persists ACROSS sessions, so a fresh session can -load it and continue. It is NOT a deliverable; downstream artifacts (a brief, a PRD, a -deck, a report) are *derived* from it on demand. The host skill supplies the vocabulary -by how it calls `append` — the tool stays neutral. - -It is a FLAT log: there are no sections or grouping. Every entry is one line, recorded -at the END in the order it happened. The chronology itself is the structure — an event -like "started technique X" is just another entry, same as an idea or an insight. - -Two invariants make it trustworthy: - - 1. Append-only, chronological. Entries land at the end, in the order they happen. - Nothing is ever inserted backward, reordered, or grouped. - 2. Write-only / blind. Every command is an atomic, context-free write and echoes the - new state as JSON, so the caller never re-reads the file mid-session. The one time - the file is read is on resume — and the caller reads it itself, not via this script. - -The file shape (.memlog.md): - - --- - topic: Onboarding flow for a budgeting app - goal: lift week-1 retention - status: active - updated: 2026-05-30T14:22 - --- - - - (note) user picked techniques: SCAMPER, then Six Thinking Hats - - (technique) started SCAMPER - - (idea) skip the signup wall: let people try with sample data first - - (idea) auto-import one bank account so the first screen shows real numbers - - (question) is open-banking consent too heavy for step one? - - (technique) started Six Thinking Hats - - (idea) black-hat: imported transactions look scary before they're categorized - - (insight) the "scary numbers" risk and the "real numbers" idea are one lever: show real data, pre-categorized - - (direction) user wants to optimize for the anxious first-timer, not the power user - - (decision) lead with one pre-categorized account; defer multi-account import - -Each entry may carry an optional `--type` — what KIND it is (idea, insight, question, -decision, technique, …) — and an optional `--by` naming who it came from (e.g. `user`, -`coach`), for sessions where authorship matters. Both render into one short inline tag: -`(idea)`, `(idea by user)`, `(by coach)`. Omit them for a plain note. The host skill -names the vocabulary; the script does not. - -Commands: - init --workspace DIR [--field k=v ...] create the memlog (errors if it exists) - append --workspace DIR --text STR [--type T] [--by W] append one entry at the end - set --workspace DIR --key K --value V set/replace a frontmatter field - -The workspace is the run folder; the memlog is always {workspace}/.memlog.md. -""" -import argparse -import json -import os -import sys -from datetime import datetime -from pathlib import Path - -MEMLOG = ".memlog.md" - - -def now() -> str: - return datetime.now().strftime("%Y-%m-%dT%H:%M") - - -def memlog_path(workspace: str) -> Path: - return Path(workspace) / MEMLOG - - -def split(text: str) -> tuple[dict, str]: - """Return (frontmatter dict in source order, body str). Frontmatter is plain key: value. - - The closing fence is the first line that is *exactly* `---`, so a `---` inside a - field value (topic/goal are free user text) never truncates the frontmatter. - """ - lines = text.splitlines() - if not lines or lines[0] != "---": - raise ValueError(".memlog.md has no frontmatter") - end = next((i for i in range(1, len(lines)) if lines[i] == "---"), None) - if end is None: - raise ValueError(".memlog.md frontmatter is not terminated") - meta: dict[str, str] = {} - for line in lines[1:end]: - if ":" in line: - k, v = line.split(":", 1) - meta[k.strip()] = v.strip() - return meta, "\n".join(lines[end + 1:]).lstrip("\n") - - -def render(meta: dict, body: str) -> str: - # Neutralize newlines in values so a multi-line field can't break the fence on re-read. - fm = "\n".join(f"{k}: {' '.join(str(v).splitlines())}" for k, v in meta.items()) - return "---\n" + fm + "\n---\n\n" + body.rstrip("\n") + "\n" - - -def touch(meta: dict) -> None: - """Stamp `updated` and keep it last so the field order stays predictable.""" - meta.pop("updated", None) - meta["updated"] = now() - - -def write_atomic(path: Path, text: str) -> None: - tmp = path.with_suffix(path.suffix + ".tmp") - tmp.write_text(text, encoding="utf-8") - os.replace(tmp, path) - - -def entry_count(body: str) -> int: - return sum(1 for ln in body.splitlines() if ln.startswith("- ")) - - -def ack(path: Path, meta: dict, body: str) -> None: - """Echo new state so the caller never re-reads the file to know where it stands.""" - print(json.dumps({ - "ok": True, - "memlog": str(path), - "status": meta.get("status", ""), - "entries": entry_count(body), - })) - - -def cmd_init(args) -> int: - path = memlog_path(args.workspace) - if path.exists(): - print(f"error: {path} already exists; use append/set to update it", file=sys.stderr) - return 2 - path.parent.mkdir(parents=True, exist_ok=True) - meta: dict[str, str] = {} - for pair in args.field or []: - if "=" not in pair: - print(f"error: --field expects key=value, got {pair!r}", file=sys.stderr) - return 2 - k, v = pair.split("=", 1) - meta[k.strip()] = v.strip() - meta.setdefault("status", "active") - touch(meta) - write_atomic(path, render(meta, "")) - ack(path, meta, "") - return 0 - - -def cmd_append(args) -> int: - path = memlog_path(args.workspace) - meta, body = split(path.read_text(encoding="utf-8")) - text = " ".join(args.text.split()) # collapse newlines/runs → one-line entry, no prose bloat - label = args.type or "" - if args.by: - label = f"{label} by {args.by}".strip() # attribution: "(idea by user)" / "(by coach)" - tag = f"({label}) " if label else "" - entry = f"- {tag}{text}" - body = (body.rstrip("\n") + "\n" + entry) if body.strip() else entry # always at the end - touch(meta) - write_atomic(path, render(meta, body)) - ack(path, meta, body) - return 0 - - -def cmd_set(args) -> int: - path = memlog_path(args.workspace) - meta, body = split(path.read_text(encoding="utf-8")) - meta[args.key] = args.value - touch(meta) - write_atomic(path, render(meta, body)) - ack(path, meta, body) - return 0 - - -def main(argv: list[str] | None = None) -> int: - p = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) - sub = p.add_subparsers(dest="cmd", required=True) - - pi = sub.add_parser("init", help="create the memlog") - pi.add_argument("--workspace", required=True) - pi.add_argument("--field", action="append", metavar="KEY=VALUE", help="frontmatter field (repeatable)") - pi.set_defaults(func=cmd_init) - - pa = sub.add_parser("append", help="append one entry at the end") - pa.add_argument("--workspace", required=True) - pa.add_argument("--text", required=True) - pa.add_argument("--type", help="entry kind, rendered as an inline tag") - pa.add_argument("--by", help="who the entry came from (e.g. user, coach); rendered into the tag") - pa.set_defaults(func=cmd_append) - - pset = sub.add_parser("set", help="set a frontmatter field") - pset.add_argument("--workspace", required=True) - pset.add_argument("--key", required=True) - pset.add_argument("--value", required=True) - pset.set_defaults(func=cmd_set) - - args = p.parse_args(argv) - return args.func(args) - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/src/bmm-skills/3-solutioning/bmad-architecture/scripts/tests/test_lint_spine.py b/src/bmm-skills/3-solutioning/bmad-architecture/scripts/tests/test_lint_spine.py index 8a8120a62..82fd28a85 100644 --- a/src/bmm-skills/3-solutioning/bmad-architecture/scripts/tests/test_lint_spine.py +++ b/src/bmm-skills/3-solutioning/bmad-architecture/scripts/tests/test_lint_spine.py @@ -9,6 +9,8 @@ mechanical defects a prompt is unreliable at — literal placeholders, AD-n id b AD-n blocks missing required fields, and unpinned dependency versions. """ import importlib.util +import json +import re import sys from pathlib import Path @@ -134,5 +136,70 @@ def test_yaml_comments_not_parsed_as_deps(): assert "version_pin" not in cats(result) +def test_template_token_is_low_severity(): + # a bare {token} can be legitimate brace prose; it is flagged, but low (not high) so the + # mechanical pass stays near-zero false-positive + text = CLEAN.replace("single write path", "{decision}") + result = lint_spine.lint(text) + toks = [f for f in result["findings"] if f["category"] == "placeholder" and "template token" in f["detail"]] + assert toks and all(f["severity"] == "low" for f in toks) + + +def test_no_frontmatter_body_still_scanned(): + text = "## Invariants\n\n### AD-1 — x\n\n- **Binds:** all\n- **Prevents:** drift\n- **Rule:** TBD\n" + result = lint_spine.lint(text) + assert "placeholder" in cats(result) # TBD caught even with no frontmatter + + +def test_frontmatter_value_with_dashes_not_truncated(): + # a value containing '---' must not be read as the closing fence (line-exact close) + text = "---\nscope: 'phase 1 --- phase 2'\nstack:\n key_deps:\n - fastapi\n---\n\n## Invariants\n" + result = lint_spine.lint(text) + assert any(f["category"] == "version_pin" for f in result["findings"]) # read past the inline --- + + +def test_ad_heading_in_fence_not_counted(): + text = ( + "---\nname: 'x'\n---\n\n" + "### AD-1 — real\n\n- **Binds:** all\n- **Prevents:** drift\n- **Rule:** do x\n\n" + "## Docs\n\n```text\n### AD-2 — illustrative only, no fields\n```\n" + ) + result = lint_spine.lint(text) + assert result["ok"] is True # the fenced AD-2 is not a live AD → no ad_fields/ad_id finding + + +def test_map_form_key_deps_unpinned_caught(): + text = "---\nstack:\n key_deps:\n fastapi: '0.115'\n redis:\n---\n\n## Invariants\n" + result = lint_spine.lint(text) + pins = [f for f in result["findings"] if f["category"] == "version_pin"] + assert len(pins) == 1 and "redis" in pins[0]["detail"] + + +def test_map_form_key_deps_pinned_ok(): + text = "---\nstack:\n key_deps:\n fastapi: '0.115'\n---\n\n## Invariants\n" + result = lint_spine.lint(text) + assert "version_pin" not in cats(result) + + +def test_placeholder_line_number_is_absolute(): + # a TBD after a multi-line fence reports its real file line (fence blanked, not collapsed) + text = ( + "---\nname: 'x'\n---\n\n" + "## A\n\n" + "```text\nf1\nf2\nf3\n```\n\n" + "TBD here\n" + ) + result = lint_spine.lint(text) + ph = [f for f in result["findings"] if "TBD" in f["detail"]][0] + n = int(re.search(r"line (\d+)", ph["location"]).group(1)) + assert n == 13 + + +def test_missing_spine_file_reports_error(tmp_path, capsys): + rc = lint_spine.main(["--workspace", str(tmp_path)]) + out = json.loads(capsys.readouterr().out) + assert rc == 0 and out["ok"] is False and "not found" in out["error"] + + if __name__ == "__main__": sys.exit(pytest.main([__file__, "-q"])) diff --git a/src/bmm-skills/3-solutioning/bmad-architecture/scripts/tests/test_memlog.py b/src/bmm-skills/3-solutioning/bmad-architecture/scripts/tests/test_memlog.py deleted file mode 100644 index 5e7813829..000000000 --- a/src/bmm-skills/3-solutioning/bmad-architecture/scripts/tests/test_memlog.py +++ /dev/null @@ -1,265 +0,0 @@ -# /// script -# requires-python = ">=3.10" -# dependencies = ["pytest>=8.0"] -# /// -"""Tests for memlog.py. Run: uv run --with pytest pytest scripts/tests/test_memlog.py - -The spine under test is the flat, append-only, chronological invariant: every entry is -one line recorded at the end in the order it happened — no sections, no grouping. -""" -import json -import sys -from pathlib import Path - -import pytest - -sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) -import memlog # noqa: E402 - -MEMLOG = ".memlog.md" - - -@pytest.fixture -def ws(tmp_path): - return str(tmp_path) - - -def read(ws): - return (Path(ws) / MEMLOG).read_text(encoding="utf-8") - - -def body_of(ws): - return memlog.split(read(ws))[1] - - -def entries(ws): - return [ln for ln in body_of(ws).splitlines() if ln.startswith("- ")] - - -def init(ws, **fields): - fields = fields or {"topic": "Reinvent the lunchbox", "goal": "ideas for a pitch"} - argv = ["init", "--workspace", ws] - for k, v in fields.items(): - argv += ["--field", f"{k}={v}"] - assert memlog.main(argv) == 0 - - -def append(ws, text, entry_type=None, by=None): - argv = ["append", "--workspace", ws, "--text", text] - if entry_type: - argv += ["--type", entry_type] - if by: - argv += ["--by", by] - assert memlog.main(argv) == 0 - - -# --- init --------------------------------------------------------------- - -def test_init_writes_frontmatter_fields(ws): - init(ws) - meta, body = memlog.split(read(ws)) - assert meta["topic"] == "Reinvent the lunchbox" - assert meta["goal"] == "ideas for a pitch" - assert meta["status"] == "active" - assert "updated" in meta - assert body.strip() == "" - - -def test_init_arbitrary_fields(ws): - init(ws, topic="T", audience="board") - meta, _ = memlog.split(read(ws)) - assert meta["audience"] == "board" - - -def test_init_refuses_overwrite(ws): - init(ws) - assert memlog.main(["init", "--workspace", ws, "--field", "topic=other"]) == 2 - - -def test_init_creates_missing_workspace(tmp_path): - nested = str(tmp_path / "a" / "b") - assert memlog.main(["init", "--workspace", nested, "--field", "topic=T"]) == 0 - assert (Path(nested) / MEMLOG).is_file() - - -def test_init_rejects_malformed_field(ws): - assert memlog.main(["init", "--workspace", ws, "--field", "noequals"]) == 2 - - -# --- append: flat chronological order is the whole point ----------------- - -def test_append_lands_at_end_in_order(ws): - init(ws) - append(ws, "first") - append(ws, "second") - append(ws, "third") - assert entries(ws) == ["- first", "- second", "- third"] - - -def test_no_sections_or_headings_ever(ws): - init(ws) - append(ws, "started foo", entry_type="technique") - append(ws, "an idea", entry_type="idea") - append(ws, "started bar", entry_type="technique") - assert "## " not in body_of(ws) # the flat log never grows headings - - -def test_type_renders_as_inline_tag(ws): - init(ws) - append(ws, "the earth revolves around the sun", entry_type="idea") - append(ws, "how do we handle stampede?", entry_type="question") - body = body_of(ws) - assert "- (idea) the earth revolves around the sun" in body - assert "- (question) how do we handle stampede?" in body - - -def test_append_without_type_is_plain_note(ws): - init(ws) - append(ws, "bare entry") - assert entries(ws) == ["- bare entry"] - - -def test_append_collapses_newlines_into_one_line(ws): - init(ws) - append(ws, "line one\nline two\n spaced out") - assert entries(ws) == ["- line one line two spaced out"] - - -def test_revisited_technique_is_just_a_later_entry(ws): - # the user's model: switching techniques is an entry, not a section to return to - init(ws) - append(ws, "started SCAMPER", entry_type="technique") - append(ws, "magnetic latch", entry_type="idea") - append(ws, "started Six Hats", entry_type="technique") - append(ws, "stale data risk", entry_type="idea") - append(ws, "started SCAMPER", entry_type="technique") # back to SCAMPER — just appended again - append(ws, "stackable tiers", entry_type="idea") - assert entries(ws) == [ - "- (technique) started SCAMPER", - "- (idea) magnetic latch", - "- (technique) started Six Hats", - "- (idea) stale data risk", - "- (technique) started SCAMPER", - "- (idea) stackable tiers", - ] - - -def test_by_renders_attribution_in_tag(ws): - # Creative Partner mode must record whose idea each one was - init(ws) - append(ws, "magnetic latch lid", entry_type="idea", by="user") - append(ws, "lid doubles as a plate", entry_type="idea", by="coach") - body = body_of(ws) - assert "- (idea by user) magnetic latch lid" in body - assert "- (idea by coach) lid doubles as a plate" in body - - -def test_by_without_type_renders_alone(ws): - init(ws) - append(ws, "off-the-cuff thought", by="coach") - assert entries(ws) == ["- (by coach) off-the-cuff thought"] - - -def test_heterogeneous_entry_types_coexist(ws): - init(ws) - append(ws, "an idea", entry_type="idea") - append(ws, "an open question", entry_type="question") - append(ws, "a decision we made", entry_type="decision") - append(ws, "user wants mobile-first", entry_type="direction") - body = body_of(ws) - for tag in ("(idea)", "(question)", "(decision)", "(direction)"): - assert tag in body - - -# --- set ---------------------------------------------------------------- - -def test_set_flips_status(ws): - init(ws) - memlog.main(["set", "--workspace", ws, "--key", "status", "--value", "complete"]) - assert memlog.split(read(ws))[0]["status"] == "complete" - - -def test_set_preserves_body(ws): - init(ws) - append(ws, "keep me", entry_type="idea") - memlog.main(["set", "--workspace", ws, "--key", "status", "--value", "complete"]) - meta, body = memlog.split(read(ws)) - assert meta["status"] == "complete" - assert "- (idea) keep me" in body - - -def test_set_can_add_new_field(ws): - init(ws) - memlog.main(["set", "--workspace", ws, "--key", "owner", "--value", "BMad"]) - assert memlog.split(read(ws))[0]["owner"] == "BMad" - - -def test_updated_stays_last(ws): - init(ws) - memlog.main(["set", "--workspace", ws, "--key", "owner", "--value", "BMad"]) - meta = memlog.split(read(ws))[0] - assert list(meta)[-1] == "updated" - - -# --- robustness --------------------------------------------------------- - -def test_roundtrip_render_is_stable(ws): - init(ws) - append(ws, "one", entry_type="idea") - first = read(ws) - meta, body = memlog.split(first) - assert memlog.render(meta, body) == first - - -def test_commas_in_field_survive(ws): - init(ws, topic="cars, trains, and planes") - append(ws, "z", entry_type="idea") - meta, _ = memlog.split(read(ws)) - assert meta["topic"] == "cars, trains, and planes" - - -def test_triple_dash_in_field_does_not_corrupt_frontmatter(ws): - # A `---` inside a value must NOT be read as the closing fence: topic stays intact, - # status survives, and the body never leaks frontmatter text. - init(ws, topic="Pricing --- tiers --- and add-ons") - append(ws, "an idea", entry_type="idea") - meta, body = memlog.split(read(ws)) - assert meta["topic"] == "Pricing --- tiers --- and add-ons" - assert meta["status"] == "active" - assert entries(ws) == ["- (idea) an idea"] - assert "status:" not in body # frontmatter never bled into the body - - -def test_triple_dash_status_survives_in_ack(ws, capsys): - init(ws, topic="a --- b") - append(ws, "x", entry_type="idea") - out = json.loads(capsys.readouterr().out.strip().splitlines()[-1]) - assert out["status"] == "active" # not "" — frontmatter recovered cleanly - - -def test_newline_in_field_is_neutralized(ws): - # A value carrying a newline can't break the fence on the next round-trip. - memlog.main(["init", "--workspace", ws, "--field", "topic=line one\nline two"]) - append(ws, "x", entry_type="idea") - meta, _ = memlog.split(read(ws)) - assert "\n" not in meta["topic"] - assert meta["status"] == "active" - - -def test_append_emits_json_ack(ws, capsys): - init(ws) - append(ws, "x", entry_type="idea") - out = json.loads(capsys.readouterr().out.strip().splitlines()[-1]) - assert out["ok"] is True - assert out["status"] == "active" - assert out["entries"] == 1 - assert out["memlog"].endswith(MEMLOG) - assert "section" not in out # sections are gone - - -def test_ack_entry_count_climbs(ws, capsys): - init(ws) - append(ws, "a") - append(ws, "b") - out = json.loads(capsys.readouterr().out.strip().splitlines()[-1]) - assert out["entries"] == 2