refactor: remove adapt md and its reference

2026-04-01 10:37:48 -07:00
parent c8a25a0287
commit f3fefe0cbc
9 changed files with 28 additions and 184 deletions
@@ -1,9 +1,8 @@
 """Queen global cross-session memory.

-Three-tier memory architecture:
+Two-tier memory architecture:
  ~/.hive/queen/MEMORY.md                            — semantic (who, what, why)
  ~/.hive/queen/memories/MEMORY-YYYY-MM-DD.md        — episodic (daily journals)
-  ~/.hive/queen/session/{id}/data/adapt.md           — working (session-scoped)

 Semantic and episodic files are injected at queen session start.

@@ -197,21 +196,14 @@ no preamble, no code fences.


 def read_session_context(session_dir: Path, max_messages: int = 80) -> str:
-    """Extract a readable transcript from conversation parts + adapt.md.
+    """Extract a readable transcript from conversation parts.

-    Reads the last ``max_messages`` conversation parts and the session's
-    adapt.md (working memory). Tool results are omitted — only user and
-    assistant turns (with tool-call names noted) are included.
+    Reads the last ``max_messages`` conversation parts. Tool results are
+    omitted — only user and assistant turns (with tool-call names noted)
+    are included.
    """
    parts: list[str] = []

-    # Working notes
-    adapt_path = session_dir / "data" / "adapt.md"
-    if adapt_path.exists():
-        text = adapt_path.read_text(encoding="utf-8").strip()
-        if text:
-            parts.append(f"## Session Working Notes (adapt.md)\n\n{text}")
-
    # Conversation transcript
    parts_dir = session_dir / "conversations" / "parts"
    if parts_dir.exists():
@@ -306,12 +298,12 @@ async def consolidate_queen_memory(
 ) -> None:
    """Update MEMORY.md and append a diary entry based on the current session.

-    Reads conversation parts and adapt.md from session_dir. Called
-    periodically in the background and once at session end. Failures are
-    logged and silently swallowed so they never block teardown.
+    Reads conversation parts from session_dir. Called periodically in
+    the background and once at session end. Failures are logged and
+    silently swallowed so they never block teardown.

    Args:
-        session_id: The session ID (used for the adapt.md path reference).
+        session_id: The session ID.
        session_dir: Path to the session directory (~/.hive/queen/session/{id}).
        llm: LLMProvider instance (must support acomplete()).
    """
@@ -337,7 +329,6 @@ async def consolidate_queen_memory(
        today_journal = read_episodic_memory()
        today = date.today()
        today_str = format_memory_date(today)
-        adapt_path = session_dir / "data" / "adapt.md"

        user_msg = (
            f"## Existing Semantic Memory (MEMORY.md)\n\n"
@@ -347,7 +338,7 @@ async def consolidate_queen_memory(
            f"{session_context}\n\n"
            f"## Session Reference\n\n"
            f"Session ID: {session_id}\n"
-            f"Session path: {adapt_path}\n"
+            f"Session dir: {session_dir}\n"
        )

        logger.debug(
@@ -20,7 +20,6 @@
 │           │       ├── conversation_2.md
 │           │       └── ...
 │           └── data/
-│               ├── adapt.md              ← Working memory (session-scoped)
 │               ├── web_search_1.txt      ← Spillover: large tool results
 │               ├── web_search_2.txt
 │               └── ...
@@ -28,13 +27,12 @@

 ---

-## The three memory tiers
+## The two memory tiers

 | File | Tier | Written by | Read at |
 |---|---|---|---|
 | `MEMORY.md` | Semantic | Consolidation LLM (auto, post-session) | Session start (injected into system prompt) |
 | `memories/MEMORY-YYYY-MM-DD.md` | Episodic | Queen via `write_to_diary` tool + consolidation LLM | Session start (today's file injected) |
-| `data/adapt.md` | Working | Queen via `update_session_notes` tool | Every turn (inlined in system prompt) |

 ---

@@ -52,7 +50,6 @@ in the original directory rather than fragmenting across multiple folders.
 end. It reads:

 1. `conversations/parts/*.json` — full message history (user + assistant turns; tool results skipped)
-2. `data/adapt.md` — current working notes

 It then makes two LLM writes:

@@ -580,8 +580,6 @@ def build_emergency_summary(

    # 5. Spillover files — list actual files so the LLM can load
    # them immediately instead of having to call list_data_files first.
-    # Inline adapt.md (agent memory) directly — it contains user rules
-    # and identity preferences that must survive emergency compaction.
    spillover_dir = config.spillover_dir if config else None
    if spillover_dir:
        try:
@@ -589,15 +587,8 @@ def build_emergency_summary(

            data_dir = Path(spillover_dir)
            if data_dir.is_dir():
-                # Inline adapt.md content directly
-                adapt_path = data_dir / "adapt.md"
-                if adapt_path.is_file():
-                    adapt_text = adapt_path.read_text(encoding="utf-8").strip()
-                    if adapt_text:
-                        parts.append(f"AGENT MEMORY (adapt.md):\n{adapt_text}")
-
                all_files = sorted(
-                    f.name for f in data_dir.iterdir() if f.is_file() and f.name != "adapt.md"
+                    f.name for f in data_dir.iterdir() if f.is_file()
                )
                # Separate conversation history files from regular data files
                conv_files = [f for f in all_files if re.match(r"conversation_\d+\.md$", f)]
@@ -476,49 +476,6 @@ async def execute_tool(
    return result


-def record_learning(key: str, value: Any, spillover_dir: str | None) -> None:
-    """Append a set_output value to adapt.md as a learning entry.
-
-    Called at set_output time — the moment knowledge is produced — so that
-    adapt.md accumulates the agent's outputs across the session.  Since
-    adapt.md is injected into the system prompt, these persist through
-    any compaction.
-    """
-    if not spillover_dir:
-        return
-    try:
-        adapt_path = Path(spillover_dir) / "adapt.md"
-        adapt_path.parent.mkdir(parents=True, exist_ok=True)
-        content = adapt_path.read_text(encoding="utf-8") if adapt_path.exists() else ""
-
-        if "## Outputs" not in content:
-            content += "\n\n## Outputs\n"
-
-        # Truncate long values for memory (full value is in shared memory)
-        v_str = str(value)
-        if len(v_str) > 500:
-            v_str = v_str[:500] + "…"
-
-        entry = f"- {key}: {v_str}\n"
-
-        # Replace existing entry for same key (update, not duplicate)
-        lines = content.splitlines(keepends=True)
-        replaced = False
-        for i, line in enumerate(lines):
-            if line.startswith(f"- {key}:"):
-                lines[i] = entry
-                replaced = True
-                break
-        if replaced:
-            content = "".join(lines)
-        else:
-            content += entry
-
-        adapt_path.write_text(content, encoding="utf-8")
-    except Exception as e:
-        logger.warning("Failed to record learning for key=%s: %s", key, e)
-
-
 def next_spill_filename(tool_name: str, counter: int) -> str:
    """Return a short, monotonic filename for a tool result spill."""
    # Shorten common tool name prefixes to save tokens
@@ -80,7 +80,6 @@ from framework.graph.event_loop.tool_result_handler import (
    execute_tool,
    extract_json_metadata,
    is_transient_error,
-    record_learning,
    restore_spill_counter,
    truncate_tool_result,
 )
@@ -477,38 +476,6 @@ class EventLoopNode(NodeProtocol):
                        system_prompt = f"{system_prompt}\n\n{ctx.default_skill_batch_nudge}"
                        logger.info("[%s] DS-12: batch scenario detected, nudge injected", node_id)

-                # Inject agent working memory (adapt.md).
-                # If it doesn't exist yet, seed it with available context.
-                if self._config.spillover_dir:
-                    _adapt_path = Path(self._config.spillover_dir) / "adapt.md"
-                    if not _adapt_path.exists():
-                        _adapt_path.parent.mkdir(parents=True, exist_ok=True)
-                        seed = (
-                            f"## Identity\n{ctx.accounts_prompt}\n"
-                            if ctx.accounts_prompt
-                            else "# Session Working Memory\n"
-                        )
-                        _adapt_path.write_text(seed, encoding="utf-8")
-                    if _adapt_path.exists():
-                        _adapt_text = _adapt_path.read_text(encoding="utf-8").strip()
-                        if _adapt_text:
-                            system_prompt = (
-                                f"{system_prompt}\n\n"
-                                "--- Session Working Memory ---\n"
-                                f"{_adapt_text}\n"
-                                "--- End Session Working Memory ---\n\n"
-                                "Maintain your session working memory by calling "
-                                'save_data("adapt.md", ...) or edit_data("adapt.md", ...)'
-                                " as you work.\n"
-                                "This is session-scoped scratch space. "
-                                "IMMEDIATELY save: account/identity rules, "
-                                "behavioral constraints, and preferences specific to "
-                                "this session. Also record current task state, "
-                                "decisions, and working notes. "
-                                "For lasting knowledge about the user, use "
-                                "update_queen_memory() and append_queen_journal() instead."
-                            )
-
                conversation = NodeConversation(
                    system_prompt=system_prompt,
                    max_context_tokens=self._config.max_context_tokens,
@@ -2215,7 +2182,6 @@ class EventLoopNode(NodeProtocol):
                                ),
                                is_error=False,
                            )
-                        self._record_learning(key, stored)
                        outputs_set_this_turn.append(key)
                        await self._publish_output_key_set(stream_id, node_id, key, execution_id)
                    logged_tool_calls.append(
@@ -2979,20 +2945,6 @@ class EventLoopNode(NodeProtocol):
            skill_dirs=getattr(self, "_skill_dirs", []),
        )

-    def _record_learning(self, key: str, value: Any) -> None:
-        """Append a set_output value to adapt.md as a learning entry.
-
-        Called at set_output time — the moment knowledge is produced — so that
-        adapt.md accumulates the agent's outputs across the session.  Since
-        adapt.md is injected into the system prompt, these persist through
-        any compaction.
-        """
-        return record_learning(
-            key=key,
-            value=value,
-            spillover_dir=self._config.spillover_dir,
-        )
-
    def _next_spill_filename(self, tool_name: str) -> str:
        """Return a short, monotonic filename for a tool result spill."""
        self._spill_counter += 1
@@ -1439,23 +1439,6 @@ class GraphExecutor:
                        # Build Layer 2 (narrative) from current state
                        narrative = build_narrative(memory, path, graph)

-                        # Read agent working memory (adapt.md) once for both
-                        # system prompt and transition marker.
-                        _adapt_text: str | None = None
-                        if self._storage_path:
-                            _adapt_path = self._storage_path / "data" / "adapt.md"
-                            if _adapt_path.exists():
-                                _raw = _adapt_path.read_text(encoding="utf-8").strip()
-                                _adapt_text = _raw or None
-
-                        # Merge adapt.md into narrative for system prompt
-                        if _adapt_text:
-                            narrative = (
-                                f"{narrative}\n\n--- Agent Memory ---\n{_adapt_text}"
-                                if narrative
-                                else _adapt_text
-                            )
-
                        # Build per-node accounts prompt for the next node
                        _node_accounts = self.accounts_prompt or None
                        if self.accounts_data and self.tool_provider_map:
@@ -1490,7 +1473,6 @@ class GraphExecutor:
                            memory=memory,
                            cumulative_tool_names=sorted(cumulative_tool_names),
                            data_dir=data_dir,
-                            adapt_content=_adapt_text,
                        )
                        await continuous_conversation.add_user_message(
                            marker,
@@ -264,7 +264,6 @@ def build_transition_marker(
    memory: SharedMemory,
    cumulative_tool_names: list[str],
    data_dir: Path | str | None = None,
-    adapt_content: str | None = None,
 ) -> str:
    """Build a 'State of the World' transition marker.

@@ -278,7 +277,6 @@ def build_transition_marker(
        memory: Current shared memory state.
        cumulative_tool_names: All tools available (cumulative set).
        data_dir: Path to spillover data directory.
-        adapt_content: Agent working memory (adapt.md) content.

    Returns:
        Transition marker message text.
@@ -344,10 +342,6 @@ def build_transition_marker(
                        "\nData files (use load_data to access):\n" + "\n".join(file_lines)
                    )

-    # Agent working memory
-    if adapt_content:
-        sections.append(f"\n--- Agent Memory ---\n{adapt_content}")
-
    # Available tools
    if cumulative_tool_names:
        sections.append("\nAvailable tools: " + ", ".join(sorted(cumulative_tool_names)))
@@ -231,7 +231,6 @@ flowchart LR
        File1["web_search_1.txt"]
        File2["web_scrape_2.txt"]
        Conv1["conversation_1.md"]
-        Adapt["adapt.md"]
    end

    SaveFile --> SpilloverDir
@@ -256,7 +255,6 @@ flowchart LR
    subgraph SysPrompt [System Prompt Injection]
        FileList["DATA FILES:<br/>  - web_search_1.txt<br/>  - web_scrape_2.txt"]
        ConvList["CONVERSATION HISTORY:<br/>  - conversation_1.md"]
-        AdaptInline["AGENT MEMORY:<br/>(adapt.md inlined)"]
    end

    SpilloverDir -->|"Listed on<br/>every turn"| SysPrompt
@@ -277,7 +275,7 @@ flowchart LR

 **4. File pointers survive compaction.** When the conversation exceeds the context budget, structure-preserving compaction (`compact_preserving_structure`) keeps tool-call messages (which are already tiny pointers) and spills freeform text (user/assistant prose) to numbered `conversation_N.md` files. A reference message replaces the removed text: `"[Previous conversation saved to 'conversation_1.md'. Use load_data('conversation_1.md') to review if needed.]"`. This means the agent retains exact knowledge of every tool it called and where each result is stored.

-**5. The system prompt lists all files** in the spillover directory on every turn. Data files (spilled tool results) and conversation history files are listed separately. `adapt.md` (agent memory / learned preferences) is inlined directly into the system prompt rather than listed — it survives even emergency compaction.
+**5. The system prompt lists all files** in the spillover directory on every turn. Data files (spilled tool results) and conversation history files are listed separately.

 ### Why This Pattern

@@ -291,7 +289,7 @@ flowchart LR

 ## Memory Reflection Logic

-Agents in Hive maintain memory through four interconnected mechanisms: a durable working memory file (`adapt.md`), the conversation history itself, a structured output accumulator, and a three-layer prompt composition system. Together they form a reflection loop where outputs, judge feedback, and execution state are continuously folded back into the agent's context.
+Agents in Hive maintain memory through three interconnected mechanisms: the conversation history itself, a structured output accumulator, and a three-layer prompt composition system. Together they form a reflection loop where outputs, judge feedback, and execution state are continuously folded back into the agent's context.

 ```mermaid
 flowchart TB
@@ -318,18 +316,6 @@ flowchart TB
    SetOutput --> OA_Mem
    OA_Mem --> OA_Cursor

-    %% =========================================
-    %% ADAPT.MD (AGENT WORKING MEMORY)
-    %% =========================================
-    subgraph AdaptMD [adapt.md — Agent Working Memory]
-        Seed["Seeded with<br/>identity + accounts"]
-        RecordLearning["_record_learning():<br/>append output entry<br/>(truncated to 500 chars)"]
-        AgentEdit["Agent calls<br/>save_data / edit_data<br/>to write rules,<br/>preferences, notes"]
-    end
-
-    SetOutput -->|"triggers"| RecordLearning
-    Seed -.->|"first run"| AdaptMD
-
    %% =========================================
    %% JUDGE EVALUATION PIPELINE
    %% =========================================
@@ -394,11 +380,9 @@ flowchart TB
        Layer1["Layer 1 — Identity<br/>(static, never changes)"]
        Layer2["Layer 2 — Narrative<br/>(auto-built from<br/>SharedMemory +<br/>execution path)"]
        Layer3["Layer 3 — Focus<br/>(current node's<br/>system_prompt)"]
-        InlinedAdapt["adapt.md inlined<br/>(survives compaction)"]
    end

    SharedMem -->|"read_all()"| Layer2
-    AdaptMD -->|"inlined every turn"| InlinedAdapt

    %% =========================================
    %% NEXT ITERATION
@@ -417,7 +401,6 @@ flowchart TB
    %% =========================================
    %% STYLING
    %% =========================================
-    style AdaptMD fill:#e8f5e9
    style PromptOnion fill:#e3f2fd
    style JudgePipeline fill:#fff3e0
    style ConvHistory fill:#f3e5f5
@@ -425,17 +408,15 @@ flowchart TB

 ### How It Works

-**1. Outputs trigger dual persistence.** When the LLM calls `set_output(key, value)`, two things happen simultaneously: the `OutputAccumulator` stores the value in memory and writes through to the `ConversationStore` cursor (for crash recovery), and `_record_learning()` appends a truncated entry (≤500 chars) to `adapt.md` under an `## Outputs` section. Duplicate keys are updated in-place, not appended.
+**1. Outputs are persisted via the accumulator.** When the LLM calls `set_output(key, value)`, the `OutputAccumulator` stores the value in memory and writes through to the `ConversationStore` cursor (for crash recovery).

-**2. adapt.md is the agent's durable working memory.** It is seeded on first run with identity and account info. The agent can also write to it directly via `save_data("adapt.md", ...)` or `edit_data("adapt.md", ...)` — storing user rules, behavioral constraints, preferences, and working notes. Unlike conversation history, `adapt.md` is inlined directly into the system prompt every turn, so it survives all compaction tiers including emergency compaction. It is the last thing standing when context is tight.
+**2. Judge feedback becomes conversation memory.** When the judge issues a RETRY verdict with feedback, that feedback is injected as a `[Judge feedback]: ...` user message into the conversation. On the next LLM turn, the agent sees its prior attempt, the judge's critique, and can adjust. This is the core reflexion mechanism — in-context learning without model retraining.

-**3. Judge feedback becomes conversation memory.** When the judge issues a RETRY verdict with feedback, that feedback is injected as a `[Judge feedback]: ...` user message into the conversation. On the next LLM turn, the agent sees its prior attempt, the judge's critique, and can adjust. This is the core reflexion mechanism — in-context learning without model retraining.
+**3. The three-layer prompt onion refreshes each turn.** Layer 1 (identity) is static. Layer 2 (narrative) is rebuilt deterministically from `SharedMemory.read_all()` and the execution path — listing completed phases and current state values. Layer 3 (focus) is the current node's `system_prompt`. At phase transitions in continuous mode, Layer 3 swaps while Layers 1-2 and the full conversation history carry forward.

-**4. The three-layer prompt onion refreshes each turn.** Layer 1 (identity) is static. Layer 2 (narrative) is rebuilt deterministically from `SharedMemory.read_all()` and the execution path — listing completed phases and current state values. Layer 3 (focus) is the current node's `system_prompt`. At phase transitions in continuous mode, Layer 3 swaps while Layers 1-2 and the full conversation history carry forward.
+**4. Phase transitions inject structured reflection.** When execution moves between nodes, a transition marker is inserted into the conversation containing: what phase completed, all outputs in memory, available data files, available tools, and an explicit reflection prompt: *"Before proceeding, briefly reflect: what went well in the previous phase? Are there any gaps or surprises worth noting?"* This engineered metacognition surfaces issues before they compound.

-**5. Phase transitions inject structured reflection.** When execution moves between nodes, a transition marker is inserted into the conversation containing: what phase completed, all outputs in memory, available data files, agent memory content, available tools, and an explicit reflection prompt: *"Before proceeding, briefly reflect: what went well in the previous phase? Are there any gaps or surprises worth noting?"* This engineered metacognition surfaces issues before they compound.
-
-**6. Shared memory connects phases.** On ACCEPT, the accumulator's outputs are written to `SharedMemory`. The narrative layer reads these values to describe progress. In continuous mode, subsequent nodes see both the conversation history (what was discussed) and the structured memory (what was decided). In isolated mode, a `ContextHandoff` summarizes the prior node's conversation for the next node's input.
+**5. Shared memory connects phases.** On ACCEPT, the accumulator's outputs are written to `SharedMemory`. The narrative layer reads these values to describe progress. In continuous mode, subsequent nodes see both the conversation history (what was discussed) and the structured memory (what was decided). In isolated mode, a `ContextHandoff` summarizes the prior node's conversation for the next node's input.

 ### The Judge Evaluation Pipeline

@@ -1078,7 +1059,7 @@ class SignalWeights:
 | **Rule Generation**           | Research               | Transforming human decisions into deterministic rules (closing the loop)     |
 | **HybridJudge**               | Engineering            | Implementation of triangulation with priority-ordered evaluation             |
 | **Reflexion Loop**            | Engineering            | Worker-Judge architecture with RETRY/REPLAN/ESCALATE                         |
-| **Memory Reflection**         | Engineering            | adapt.md durable memory, 3-layer prompt onion, judge feedback injection      |
+| **Memory Reflection**         | Engineering            | 3-layer prompt onion, judge feedback injection, shared memory                |
 | **Graph Execution**           | Engineering            | Node composition, shared memory, edge traversal, sub-agent delegation        |
 | **HITL Protocol**             | Engineering            | Pause/resume, approval workflows, escalation handling                        |

@@ -1096,7 +1077,7 @@ The Hive Agent Framework addresses the fundamental reliability crisis in agentic

 4. **The Foundation**: Goal-driven architecture ensures we're optimizing for user intent, not metric gaming. The reflexion loop between Worker Bees and Judge enables learning from failure without expensive search.

-5. **The Memory System**: Agents reflect through four mechanisms — `adapt.md` (durable working memory inlined into the system prompt, surviving all compaction), the conversation history (carrying judge feedback as injected user messages), the three-layer prompt onion (identity → narrative → focus, rebuilt each turn from shared memory), and structured phase transition markers with explicit reflection prompts at node boundaries.
+5. **The Memory System**: Agents reflect through three mechanisms — the conversation history (carrying judge feedback as injected user messages), the three-layer prompt onion (identity → narrative → focus, rebuilt each turn from shared memory), and structured phase transition markers with explicit reflection prompts at node boundaries.

 6. **The Learning Path**: Human escalations aren't just fallbacks—they're training signals. Confidence calibration tunes thresholds automatically. Rule generation transforms repeated human decisions into deterministic automation.

@@ -884,14 +884,13 @@ Phase 0 and Phase 1 can proceed in parallel — default skills depend on the pro
 | Q1  | Should the registry repo live under `aden-hive` org or a shared `agentskills` org?                                                     | Platform            | Open   |
 | Q2  | Should default skill protocols be adaptive (e.g., `hive.batch-ledger` adjusts checkpoint frequency based on item size)?                | Engineering         | Open   |
 | Q3  | Should default skills be tunable per-node (not just per-agent)?                                                                        | Engineering         | Open   |
-| Q4  | How should default skill protocols interact with existing `adapt.md` working memory? Should `_working_notes` replace or supplement it? | Engineering         | Open   |
-| Q5  | Should `hive.quality-monitor` self-assessments feed into judge decisions (auto-trigger RETRY on self-reported degradation)?            | Engineering         | Open   |
-| Q6  | What is the right combined token budget for default skill prompts? 2000 tokens proposed — configurable or fixed?                       | Engineering         | Open   |
-| Q7  | Should Hive support subagent delegation for skill execution (run skill in isolated session, return summary)?                           | Engineering         | Open   |
-| Q8  | Should Hive also scan `.claude/skills/` for pragmatic compatibility with Claude Code's native skill location?                          | Engineering         | Open   |
-| Q9  | What is the process for promoting a `community` skill to `verified`?                                                                   | Platform + Security | Open   |
-| Q10 | Should the registry support private/enterprise skill indexes (`hive skill config --index-url`)?                                        | Platform            | Open   |
-| Q11 | Should `hive skill test` use the official `skills-ref` library or a Hive-native implementation?                                        | Engineering         | Open   |
+| Q4  | Should `hive.quality-monitor` self-assessments feed into judge decisions (auto-trigger RETRY on self-reported degradation)?            | Engineering         | Open   |
+| Q5  | What is the right combined token budget for default skill prompts? 2000 tokens proposed — configurable or fixed?                       | Engineering         | Open   |
+| Q6  | Should Hive support subagent delegation for skill execution (run skill in isolated session, return summary)?                           | Engineering         | Open   |
+| Q7  | Should Hive also scan `.claude/skills/` for pragmatic compatibility with Claude Code's native skill location?                          | Engineering         | Open   |
+| Q8  | What is the process for promoting a `community` skill to `verified`?                                                                   | Platform + Security | Open   |
+| Q9  | Should the registry support private/enterprise skill indexes (`hive skill config --index-url`)?                                        | Platform            | Open   |
+| Q10 | Should `hive skill test` use the official `skills-ref` library or a Hive-native implementation?                                        | Engineering         | Open   |
 | Q12 | How should skill-level telemetry (activation counts, eval pass rates) be collected without compromising privacy?                       | Product + Privacy   | Open   |

 ---