solution-erp/eval/runs/2026-05-26-baseline-v1.1-pending.json

{
  "run_date": "2026-05-26",
  "golden_set_version": "v1.1",
  "spec": "A",
  "status": "PENDING_RELOAD",
  "note": "v1.1 baseline attempted after re-bootstrap (2949 chunks, correct SOLUTION_ERP root_path). Results unexpectedly worse than v1.0 — MCP server likely needs CLI restart to reload Qdrant/BM25 cache after bootstrap. Re-run needed.",
  "recall_at_5_tentative": 0.3636,
  "hits_tentative": 4,
  "positive_queries": 11,
  "pass_gate": false,
  "results_tentative": [
    {"id":"q01","hit":true,"top1_source":"docs/architecture.md","top1_rerank":0.887},
    {"id":"q02","hit":true,"top1_source":"docs/architecture.md","top1_rerank":0.910},
    {"id":"q03","hit":true,"top1_source":"docs/changelog/sessions/s18","top1_rerank":0.859},
    {"id":"q04","hit":false,"note":"0 results — pending reload verify"},
    {"id":"q05","hit":false,"note":"0 results — pending reload verify"},
    {"id":"q06","hit":false,"note":"0 results — pending reload verify"},
    {"id":"q07","hit":false,"note":"0 results — pending reload verify"},
    {"id":"q08","hit":true,"top1_source":".claude/agent-memory/investigator/MEMORY.md","top1_rerank":0.824},
    {"id":"q09","hit":false,"note":"0 results — pending reload verify"},
    {"id":"q10","hit":false,"note":"0 results — pending reload verify"},
    {"id":"q11","hit":false,"note":"0 results — pending reload verify BUT BM25 direct search returns 3 hits investigator MEMORY.md — pipeline issue"},
    {"id":"q12","hit":true,"note":"CORRECT EXCLUSION"},
    {"id":"q13","hit":true,"note":"CORRECT EXCLUSION"},
    {"id":"q14","hit":true,"note":"CORRECT EXCLUSION"}
  ],
  "_diagnosis": {
    "bm25_confirmed": "BM25 search 'ApprovalWorkflow V1 V2' → 3 hits investigator MEMORY.md (direct SQLite query). Data IS indexed.",
    "qdrant_confirmed": "Qdrant 2949 points green. Source paths all SOLUTION_ERP correct.",
    "likely_cause": "MCP server caches Qdrant collection discovery or vector index. After bootstrap.py cleared+replaced collection, MCP server may use stale embedding cache or connection. CLI restart needed.",
    "action": "After CLI restart, re-run 14 queries as v1.1 official baseline."
  }
}