solution-erp/.claude/agent-memory/memory-budget.json

{
  "_note": "Harness-9 (S70, 2026-06-17) memory budget. Caps SEEDED BY MEASUREMENT (scripts/measure-agent-memory.ps1), NOT imagined headroom. Budget-audit @session-start (session-start.md §2.1.2): if curate-to-fit is dropping important markers, BUMP the relevant cap rather than cut markers. Re-measure with the script; never hand-edit measured_bytes.",
  "seeded_date": "2026-06-18 (S71 re-measure after G1 curate — reviewer 36.7KB + investigator 29.8KB over-cap from S71 same-role race, curated L1->L2 back under auto-inject cap)",
  "last_sleep_at": "2026-06-18",
  "_last_sleep_at_note": "Harness-10b sleep-recovery (S72): timestamp lan cuoi chay /sleep-recovery-memory-l2. null = chua tung. session-start §2.1.2 + session-end §L.b(c) doc field nay -> INFORM goi-y nen L2 neu null hoac today-last_sleep_at>=7 ngay. Lead=single-writer (chi command sleep set field nay).",
  "tiers": {
    "l1_hot": {
      "file": "MEMORY.md",
      "injected": "auto (harness injects ~first 200 lines / 25KB on spawn)",
      "autoinject_cap_bytes": 25600,
      "soft_cap_bytes": 30720,
      "rule": "keep MEMORY.md < autoinject_cap so the WHOLE hot file injects; over soft_cap => curate L1->L2"
    },
    "l2_index": {
      "file": "archive/_INDEX.md",
      "injected": "read-on-demand map (inject the map, not the territory)",
      "cap_bytes": 20480,
      "seeded_from": "max measured _INDEX = cicd-monitor 16779B (+ ~22% headroom). cicd-monitor at ~82% = WATCH-agent (grows with each run); when it nears cap -> gist-of-index or split, do NOT drop markers",
      "pointer_style": "substring (git-sha / Run#NNN / unique-phrase keyed), fallback Ctrl-F. NO line-hints (additive appends shift lines)"
    },
    "l2_verbatim": {
      "file": "archive/<period>.md",
      "injected": "read-on-demand content (frozen, additive-only). NO inject cap",
      "rule": "NEVER rewrite existing bytes; curated L1 entries APPEND to end only"
    },
    "l2_gist": {
      "file": "archive/<period>.gist.md",
      "injected": "read-on-demand 4-field distill (distill-gen counter guards re-distill). NO inject cap",
      "rule": "coverage-diff GATE: every surprise/guard/file:line/root-cause/gotcha# in verbatim must survive in gist (or marked N/A)"
    }
  },
  "archive_gate": {
    "_note": "Harness-11 PART-A (S73, 2026-06-18) params for scripts/memory-archive-gate.ps1 (DRY-RUN planner, NO-API, grep+measure only). ADDITIVE — does not touch measured/tiers/last_sleep_at. A4 hysteresis = drain to BELOW low_watermark (not just to the line). A5 = never auto-drain below keep_floor newest entries (WARN instead). A6 = only PROPOSE archive after strike_threshold consecutive over-cap runs (stateless script persists strikes in .claude/agent-memory/.archive-strikes.json).",
    "autoinject_cap_bytes": 25600,
    "low_watermark_ratio": 0.85,
    "keep_floor_entries": 5,
    "strike_threshold": 2,
    "value_protect": {
      "_note": "Harness-15 B(b) value-gated archival (S81, 2026-06-20): keep_floor_entries protects NEWEST-n (recency/age axis); value_protect protects HIGH-VALUE entries regardless of age (value axis, orthogonal). Recurring-bug / anti-pattern / gotcha / root-cause entries STAY in L1-hot even when old -- archival cuts LOW-VALUE, NOT FIFO-by-date. This is mark RC-...10-29-11 (time/age=false-proxy) applied to the memory layer. Mechanism = CONVENTION (em-main judgement when condensing L1->L2); the patterns below are an advisory grep-hint for the DRY-RUN planner to FLAG protected entries, NOT an enforced auto-exclude (no overclaim: archive-gate stays DRY-RUN, em-main decides).",
      "patterns": ["gotcha #", "anti-pattern", "recurring", "lost-update", "race", "bai hoc", "lesson", "guard", "root-cause", "silent-fail"]
    }
  },
  "harness_floor": {
    "_note": "Harness-15 A1/A3 (S81, 2026-06-20): SAN-harness = fixed per-spawn cost (NOT tunable) = tool-schema + framing + own persona/role file + lead-pasted base-doc slice + task prompt. SEPARATE HOUSE (A3 anti-double-count): persona + lead-pasted-docs belong HERE (floor), NOT counted in token_governor.l1_always (which = own agent-memory + archive index + work-state block only). MEASURED-ESTIMATE not exact (H15 honest-note b): persona = directly measured bytes (.claude/agents/<name>.md 4.3KB-13.3KB => ~1.3K-4.0K tok via /3.3); tool-schema + framing = harness-injected (cannot byte-count locally), estimated comparable to AI_INFRA same-toolset-family (Read/Write/Edit/Bash/Grep/Glob/Skill/RAG).",
    "measured_token_estimate": 21000,
    "rounded_up_tokens": 30000,
    "components_note": "persona(measured 1.3K-4.0K, DIRECTLY via Get-ChildItem byte-count of .claude/agents/*.md, NOT via measure-agent-memory.ps1 which only sizes agent-memory tiers) + tool-schema(est ~5K) + framing(est ~2K) + lead-pasted base-slice(est ~5-9K; em-main injects CLAUDE.md/README-slice + task ctx) + prompt(est ~1K). README decision-tree corpus = 32704B but em-main pastes a SLICE, not whole. The ~21K sum is SE's OWN reasoning over SE's OWN agents (persona byte-measured + each harness-injected sub-component estimated independently); it lands NEAR AI_INFRA's ~21K because the toolset family is identical (Read/Write/Edit/Bash/Grep/Glob/Skill/RAG), NOT because borrowed. SE's governing cap = 30K (SE's own round-up), independent of AI_INFRA's figure."
  },
  "token_governor": {
    "_note": "Harness-15-v2 (S82, 2026-06-21): UPDATED by delta broadcast 2026-06-20-Governance-harness-15-v2-hot-feed-update (supersedes_scope = tier-1-sizing + L2/L3-caps ONLY; rest of H15 unchanged). TWO CHANGES vs S81: (1) Tier-1 = HOT-FEED LARGE per-role (was flat 12K -- too thin, caused lead to forget work across sessions); (2) L2/L3 caps REMOVED (on-demand, no artificial tier-limit, bounded only by model context window). Still the SECOND governor (token) ORTHOGONAL to the BYTE governor (tiers/archive_gate above) -- keep BOTH (B(e)); byte measures file-size-on-disk, token measures context-loaded; VN text ~3.0-3.5 byte/tok so byte/4 = upper bound => real headroom LARGER. Budget = MINIMUM-to-USE floor (FILL Tier-1 with real work-state up to the number; under-fill ONLY when high-value content exhausted; NEVER garbage-stuff -- token-saving = forgetting work).",
    "role_boundary_note": "v2 §6 ROLE BOUNDARY (🔴): the budget numbers (Tier-1 per-role cap + per-bucket allocation) are ANH's (project-owner / chu-du-an) RIGHT to set -- NOT the AI-lead's. em-main's job is exactly two parts: (1) EXECUTE the config faithfully (load Tier-1 to the number, no-truncate, pull each bucket to target) + (2) REPORT %-composition at session-start (§2.1.6) and session-end (§L.b(c)) so anh decides. em-main self-measures + proposes numbers; em-main does NOT auto-tune them down. This corrects the S81 'LEAD-AUTHORITY' framing which conflated AI-lead with project-owner.",
    "tier1_hotfeed_tokens": {
      "_note": "Tier-1 always-loaded HOT-FEED, PER-ROLE (v3 S83 2026-06-22: FULL AI_INFRA parity, owner-set). FILL with the 4 work buckets: (1) WIP work-state, (2) recurring-bugs/anti-patterns/gotcha (value_protect, kept regardless of age), (3) backlog, (4) pending-decisions. anh-set: lead 220K / mem-sub 60K / wf-sub 50K = EXACT AI_INFRA parity. This SUPERSEDES the S82 'SE numbers are SMALLER (lead 60K; subs stay 20K/16K because the byte-cap binds first)' self-justification -- that was the AI under-shrinking, forbidden by role_boundary_note + mark RC-...01-58-01 (token-saving = forgetting work). KEY CORRECTION on subs: the sub MEMORY.md byte-cap (30720B ~9.3K tok) is ONLY ONE SLICE of a sub's Tier-1 -- the SPAWN PROMPT (relevant gotchas + state + full task-context + related docs/memory, written by em-main) fills the REST up to the token budget. So 60K/50K is NOT 'unusable headroom'; it is the target em-main fills via a RICH spawn prompt (see spawn_fill_directive). %-print at the two session ends shows the REAL composition.",
      "lead_tokens": 220000,
      "lead_note": "ANH-SET 220K (S83 2026-06-22 owner-directive, full AI_INFRA parity; raised from the 200K interim earlier in S83 and the 60K S82 self-shrink). DO NOT auto-reduce (role_boundary_note + mark RC-...01-58-01). Hot-feed = STATUS full current-state + 4-bucket work-state block + ACTIVE-MARKS + recent-3-session HANDOFF + active roadmap (migration-todos) + roster-slice + task-relevant gotchas + active-task files + task-relevant docs/code -- read GENEROUSLY ('dau phien nap them tier1_lead cho du'), NOT 'on-demand-deferred', but HIGHEST-VALUE distilled only, NEVER garbage-stuff. Opus 4.8 1M window => 220K is ~22% of window, ample. anh-adjustable (owner authority).",
      "memory_sub_tokens": 60000,
      "memory_sub_note": "memory-bearing sub (agent-chinh), anh-set 60K (S83 full parity): own MEMORY.md (<=30720B ~9.3K tok auto-inject) + archive _INDEX map + work-state slice + THE RICH SPAWN PROMPT em-main writes (relevant gotchas + current state + full task-context + related docs/memory) = fills toward 60K. The byte-cap on MEMORY.md is NOT the binding limit on the sub's Tier-1; the spawn prompt is. em-main MUST write a context-rich brief, NOT a thin 8K brief (that old anti-truncation heuristic guarded RETURN-truncation #53, mitigated by lean memoryDelta RETURN -- not by starving INPUT).",
      "workflow_sub_tokens": 50000,
      "workflow_sub_note": "agent-in-workflow, anh-set 50K (S83 full parity): MEMORY-PACK slice (hmw.js:124 args inject) + RICH task context (relevant gotchas + state + full task-context + related docs/memory passed via the workflow agent() prompt). Same rule: fill to budget with high-value content, lean structured return."
    },
    "spawn_fill_directive": {
      "_note": "Harness-15-v3 (S83 2026-06-22 owner-directive): when em-main SPAWNS any sub-agent or workflow-agent, FILL its context toward its token budget (mem-sub 60K / wf-sub 50K) via a RICH prompt -- relevant gotchas + current state + full task-context + related docs/memory. The sub's MEMORY.md byte-cap is only one slice; the spawn prompt supplies the rest. RULE: highest-value distilled content ONLY (the hot-load tokens must be the most valuable, filtered through many stages), never garbage-stuff to hit the number. RECONCILES the agents/README anti-truncation '<=8K brief' heuristic: that guarded against RETURN-truncation (#53), now mitigated by return-delta-only (memoryDelta) + em-main recover-disk -- so the INPUT prompt is no longer starved; it is filled rich.",
      "applies_to": ["Agent tool spawn", "Workflow hmw.js agent() calls"],
      "quality_gate": "highest-value distilled, NOT padding"
    },
    "l2_ondemand": "NO-CAP (v2: removed the 6K cap). On-demand: archive verbatim/gist sections + skill sections; pulled per-need, no artificial tier-limit; bounded only by model context window. On-demand => no permanent context-cost when unused.",
    "l3_rag": "NO-CAP (v2: removed the 4K cap). On-demand: RAG search_memory/search_code per query; bounded only by model context window.",
    "pct_print": {
      "_note": "v2 §6: %-print Tier-1 composition at TWO session ends so anh sees what Tier-1 holds, which bucket is thin, headroom left. Estimate-by-ratio is enough (no exact measure). Headroom > 0 WHILE high-value content still unloaded = under-fill (WRONG) -> load more; chua headroom ONLY when high-value content truly exhausted. Headroom = a FLAG, NOT a saving target.",
      "session_start": "session-start.md §2.1.6 (composition by % per bucket)",
      "session_end": "session-end.md §L.b(c) (% after load + Headroom remaining)"
    },
    "honest_caveat": "v2 §5: large Tier-1 = HIGHER context-rot on the always-loaded part -- an accepted, deliberate trade-off (forgetting-work judged worse than rot), NOT 'rot disappears'. Small-context-window / light-workload projects may optimally pick a SMALLER Tier-1 -- the FLOOR is the ARCHITECTURE (hot-feed large + L2/L3 on-demand no-cap), not the numbers. 'No-cap' L2/L3 = no artificial tier-limit, still within the model context window."
  },
  "measured": {
    "_note": "S81 2026-06-20 FRESH re-measure (scripts/measure-agent-memory.ps1) post-S80 curate. Supersedes stale S71-seeded values. byte-governor snapshot (l1_hot = file-size-on-disk); cross-check token_governor for the orthogonal token-thread.",
    "cicd-monitor":          { "l1_hot": 12227, "l2_verbatim": 194964, "l2_index": 17626, "l2_gist": 29737, "rollout": "done (re-curated S80: keep-floor-hit manual condense, ~3 huge run-records -> archive)" },
    "investigator-codebase": { "l1_hot": 10281, "l2_verbatim": 102446, "l2_index": 12308, "l2_gist": 27297, "rollout": "done (re-curated S80: 15 recon entries -> archive)" },
    "reviewer":              { "l1_hot": 10242, "l2_verbatim": 92316,  "l2_index": 10067, "l2_gist": 19114, "rollout": "done (re-curated S80: 13 war-story entries -> archive)" },
    "implementer-backend":   { "l1_hot": 19394, "l2_verbatim": 59233,  "l2_index": 10105, "l2_gist": 23079, "rollout": "done" },
    "frontend-designer":     { "l1_hot": 26129, "l2_verbatim": 0, "l2_index": 0, "l2_gist": 0, "rollout": "n/a (no archive); OVER autoinject-cap 25600 (under soft-cap 30720) -- WATCH strike-1, first-overflow needs _INDEX/gist build" },
    "test-specialist":       { "l1_hot": 27723, "l2_verbatim": 5546, "l2_index": 0, "l2_gist": 0, "rollout": "n/a (tiny archive, no _INDEX); OVER autoinject-cap 25600 (under soft-cap) -- WATCH strike-1; value-protect FLAG fired S81 (gotcha#/guard in move-set)" },
    "harvest-curator":       { "l1_hot": 18952, "l2_verbatim": 0, "l2_index": 0, "l2_gist": 0, "rollout": "n/a (no archive)" },
    "tooling-auditor":       { "l1_hot": 18431, "l2_verbatim": 0, "l2_index": 0, "l2_gist": 0, "rollout": "n/a (no archive)" },
    "implementer-frontend":  { "l1_hot": 17386, "l2_verbatim": 0, "l2_index": 0, "l2_gist": 0, "rollout": "n/a (empty archive)" },
    "investigator-api":      { "l1_hot": 8510,  "l2_verbatim": 0, "l2_index": 0, "l2_gist": 0, "rollout": "n/a (empty archive)" },
    "database-agent":        { "l1_hot": 5917,  "l2_verbatim": 0, "l2_index": 0, "l2_gist": 0, "rollout": "n/a (no archive)" }
  }
}