wip: inbox management agent setup and gmail tool updates

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-12 13:22:19 -08:00
parent 930f417729
commit 7bceab3cea
13 changed files with 203 additions and 149 deletions
@@ -68,6 +68,7 @@ from .storage import (
 )
 from .store import CredentialStore
 from .template import TemplateResolver
+from .validation import ensure_credential_key_env, validate_agent_credentials

 # Aden sync components (lazy import to avoid httpx dependency when not needed)
 # Usage: from core.framework.credentials.aden import AdenSyncProvider
@@ -111,6 +112,9 @@ __all__ = [
    "CredentialRefreshError",
    "CredentialValidationError",
    "CredentialDecryptionError",
+    # Validation
+    "ensure_credential_key_env",
+    "validate_agent_credentials",
    # Aden sync (optional - requires httpx)
    "AdenSyncProvider",
    "AdenCredentialClient",
@@ -0,0 +1,133 @@
+"""Credential validation utilities.
+
+Provides reusable credential validation for agents, whether run through
+the AgentRunner or directly via GraphExecutor.
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+
+logger = logging.getLogger(__name__)
+
+
+def ensure_credential_key_env() -> None:
+    """Load HIVE_CREDENTIAL_KEY from shell config if not already in environment.
+
+    The setup-credentials skill writes the encryption key to ~/.zshrc or ~/.bashrc.
+    If the user hasn't sourced their config in the current shell, this reads it
+    directly so the runner (and any MCP subprocesses it spawns) can unlock the
+    encrypted credential store.
+
+    Only HIVE_CREDENTIAL_KEY is loaded this way — all other secrets (API keys, etc.)
+    come from the credential store itself.
+    """
+    if os.environ.get("HIVE_CREDENTIAL_KEY"):
+        return
+
+    try:
+        from aden_tools.credentials.shell_config import check_env_var_in_shell_config
+
+        found, value = check_env_var_in_shell_config("HIVE_CREDENTIAL_KEY")
+        if found and value:
+            os.environ["HIVE_CREDENTIAL_KEY"] = value
+            logger.debug("Loaded HIVE_CREDENTIAL_KEY from shell config")
+    except ImportError:
+        pass
+
+
+def validate_agent_credentials(nodes: list) -> None:
+    """Check that required credentials are available before running an agent.
+
+    Scans node specs for required tools and node types, then checks whether
+    the corresponding credentials exist in the credential store.
+
+    Raises CredentialError with actionable guidance if any are missing.
+
+    Args:
+        nodes: List of NodeSpec objects from the agent graph.
+    """
+    required_tools: set[str] = set()
+    for node in nodes:
+        if node.tools:
+            required_tools.update(node.tools)
+    node_types: set[str] = {node.node_type for node in nodes}
+
+    try:
+        from aden_tools.credentials import CREDENTIAL_SPECS
+
+        from framework.credentials import CredentialStore
+        from framework.credentials.storage import (
+            CompositeStorage,
+            EncryptedFileStorage,
+            EnvVarStorage,
+        )
+    except ImportError:
+        return  # aden_tools not installed, skip check
+
+    # Build credential store
+    env_mapping = {
+        (spec.credential_id or name): spec.env_var for name, spec in CREDENTIAL_SPECS.items()
+    }
+    storages: list = [EnvVarStorage(env_mapping=env_mapping)]
+    if os.environ.get("HIVE_CREDENTIAL_KEY"):
+        storages.insert(0, EncryptedFileStorage())
+    if len(storages) == 1:
+        storage = storages[0]
+    else:
+        storage = CompositeStorage(primary=storages[0], fallbacks=storages[1:])
+    store = CredentialStore(storage=storage)
+
+    # Build reverse mappings
+    tool_to_cred: dict[str, str] = {}
+    node_type_to_cred: dict[str, str] = {}
+    for cred_name, spec in CREDENTIAL_SPECS.items():
+        for tool_name in spec.tools:
+            tool_to_cred[tool_name] = cred_name
+        for nt in spec.node_types:
+            node_type_to_cred[nt] = cred_name
+
+    missing: list[str] = []
+    checked: set[str] = set()
+
+    # Check tool credentials
+    for tool_name in sorted(required_tools):
+        cred_name = tool_to_cred.get(tool_name)
+        if cred_name is None or cred_name in checked:
+            continue
+        checked.add(cred_name)
+        spec = CREDENTIAL_SPECS[cred_name]
+        cred_id = spec.credential_id or cred_name
+        if spec.required and not store.is_available(cred_id):
+            affected = sorted(t for t in required_tools if t in spec.tools)
+            entry = f"  {spec.env_var} for {', '.join(affected)}"
+            if spec.help_url:
+                entry += f"\n    Get it at: {spec.help_url}"
+            missing.append(entry)
+
+    # Check node type credentials (e.g., ANTHROPIC_API_KEY for LLM nodes)
+    for nt in sorted(node_types):
+        cred_name = node_type_to_cred.get(nt)
+        if cred_name is None or cred_name in checked:
+            continue
+        checked.add(cred_name)
+        spec = CREDENTIAL_SPECS[cred_name]
+        cred_id = spec.credential_id or cred_name
+        if spec.required and not store.is_available(cred_id):
+            affected_types = sorted(t for t in node_types if t in spec.node_types)
+            entry = f"  {spec.env_var} for {', '.join(affected_types)} nodes"
+            if spec.help_url:
+                entry += f"\n    Get it at: {spec.help_url}"
+            missing.append(entry)
+
+    if missing:
+        from framework.credentials.models import CredentialError
+
+        lines = ["Missing required credentials:\n"]
+        lines.extend(missing)
+        lines.append(
+            "\nTo fix: run /hive-credentials in Claude Code."
+            "\nIf you've already set up credentials, restart your terminal to load them."
+        )
+        raise CredentialError("\n".join(lines))
@@ -9,6 +9,10 @@ from pathlib import Path
 from typing import TYPE_CHECKING, Any

 from framework.config import get_hive_config, get_preferred_model
+from framework.credentials.validation import (
+    ensure_credential_key_env as _ensure_credential_key_env,
+    validate_agent_credentials,
+)
 from framework.graph import Goal
 from framework.graph.edge import (
    DEFAULT_MAX_TOKENS,
@@ -31,32 +35,6 @@ if TYPE_CHECKING:

 logger = logging.getLogger(__name__)

-
-def _ensure_credential_key_env() -> None:
-    """Load HIVE_CREDENTIAL_KEY from shell config if not already in environment.
-
-    The setup-credentials skill writes the encryption key to ~/.zshrc or ~/.bashrc.
-    If the user hasn't sourced their config in the current shell, this reads it
-    directly so the runner (and any MCP subprocesses it spawns) can unlock the
-    encrypted credential store.
-
-    Only HIVE_CREDENTIAL_KEY is loaded this way — all other secrets (API keys, etc.)
-    come from the credential store itself.
-    """
-    if os.environ.get("HIVE_CREDENTIAL_KEY"):
-        return
-
-    try:
-        from aden_tools.credentials.shell_config import check_env_var_in_shell_config
-
-        found, value = check_env_var_in_shell_config("HIVE_CREDENTIAL_KEY")
-        if found and value:
-            os.environ["HIVE_CREDENTIAL_KEY"] = value
-            logger.debug("Loaded HIVE_CREDENTIAL_KEY from shell config")
-    except ImportError:
-        pass
-
-
 CLAUDE_CREDENTIALS_FILE = Path.home() / ".claude" / ".credentials.json"


@@ -331,88 +309,8 @@ class AgentRunner:
        """Check that required credentials are available before spawning MCP servers.

        Raises CredentialError with actionable guidance if any are missing.
-        Uses graph node specs + CREDENTIAL_SPECS — no tool registry needed.
        """
-        required_tools: set[str] = set()
-        for node in self.graph.nodes:
-            if node.tools:
-                required_tools.update(node.tools)
-        node_types: set[str] = {node.node_type for node in self.graph.nodes}
-
-        try:
-            from aden_tools.credentials import CREDENTIAL_SPECS
-
-            from framework.credentials import CredentialStore
-            from framework.credentials.storage import (
-                CompositeStorage,
-                EncryptedFileStorage,
-                EnvVarStorage,
-            )
-        except ImportError:
-            return  # aden_tools not installed, skip check
-
-        # Build credential store (same logic as validate())
-        env_mapping = {
-            (spec.credential_id or name): spec.env_var for name, spec in CREDENTIAL_SPECS.items()
-        }
-        storages: list = [EnvVarStorage(env_mapping=env_mapping)]
-        if os.environ.get("HIVE_CREDENTIAL_KEY"):
-            storages.insert(0, EncryptedFileStorage())
-        if len(storages) == 1:
-            storage = storages[0]
-        else:
-            storage = CompositeStorage(primary=storages[0], fallbacks=storages[1:])
-        store = CredentialStore(storage=storage)
-
-        # Build reverse mappings
-        tool_to_cred: dict[str, str] = {}
-        node_type_to_cred: dict[str, str] = {}
-        for cred_name, spec in CREDENTIAL_SPECS.items():
-            for tool_name in spec.tools:
-                tool_to_cred[tool_name] = cred_name
-            for nt in spec.node_types:
-                node_type_to_cred[nt] = cred_name
-
-        missing: list[str] = []
-        checked: set[str] = set()
-
-        # Check tool credentials
-        for tool_name in sorted(required_tools):
-            cred_name = tool_to_cred.get(tool_name)
-            if cred_name is None or cred_name in checked:
-                continue
-            checked.add(cred_name)
-            spec = CREDENTIAL_SPECS[cred_name]
-            cred_id = spec.credential_id or cred_name
-            if spec.required and not store.is_available(cred_id):
-                affected = sorted(t for t in required_tools if t in spec.tools)
-                entry = f"  {spec.env_var} for {', '.join(affected)}"
-                if spec.help_url:
-                    entry += f"\n    Get it at: {spec.help_url}"
-                missing.append(entry)
-
-        # Check node type credentials (e.g., ANTHROPIC_API_KEY for LLM nodes)
-        for nt in sorted(node_types):
-            cred_name = node_type_to_cred.get(nt)
-            if cred_name is None or cred_name in checked:
-                continue
-            checked.add(cred_name)
-            spec = CREDENTIAL_SPECS[cred_name]
-            cred_id = spec.credential_id or cred_name
-            if spec.required and not store.is_available(cred_id):
-                affected_types = sorted(t for t in node_types if t in spec.node_types)
-                entry = f"  {spec.env_var} for {', '.join(affected_types)} nodes"
-                if spec.help_url:
-                    entry += f"\n    Get it at: {spec.help_url}"
-                missing.append(entry)
-
-        if missing:
-            from framework.credentials.models import CredentialError
-
-            lines = ["Missing required credentials:\n"]
-            lines.extend(missing)
-            lines.append("\nTo fix: run /hive-credentials in Claude Code.")
-            raise CredentialError("\n".join(lines))
+        validate_agent_credentials(self.graph.nodes)

    @staticmethod
    def _import_agent_module(agent_path: Path):
@@ -521,6 +419,7 @@ class AgentRunner:
                nodes=nodes,
                edges=edges,
                max_tokens=max_tokens,
+                loop_config=getattr(agent_module, "loop_config", {}),
            )

            return cls(
@@ -651,6 +651,7 @@ class ExecutionStream:
            max_tokens=self.graph.max_tokens,
            max_steps=self.graph.max_steps,
            cleanup_llm_model=self.graph.cleanup_llm_model,
+            loop_config=self.graph.loop_config,
        )

    async def wait_for_completion(
@@ -177,6 +177,14 @@ class DeepResearchAgent:
        """Set up the executor with all components."""
        from pathlib import Path

+        from framework.credentials.validation import (
+            ensure_credential_key_env,
+            validate_agent_credentials,
+        )
+
+        ensure_credential_key_env()
+        validate_agent_credentials(self.nodes)
+
        storage_path = Path.home() / ".hive" / "agents" / "deep_research_agent"
        storage_path.mkdir(parents=True, exist_ok=True)

@@ -6,7 +6,7 @@

 ## Overview

-Automatically triage unread Gmail emails using user-defined free-text rules. Fetch unread emails (configurable batch size, default 100), classify each by urgency and type, then take appropriate actions — trash spam, archive low-priority messages, mark important emails, and categorize the rest as Action Needed, FYI, or Waiting On.
+Automatically manage Gmail inbox emails using user-defined free-text rules. Fetch emails from the inbox (configurable batch size, default 100, supports pagination for any count), then take appropriate actions — trash junk, mark spam, mark important, mark as unread/read, archive, star, and categorize for reporting.

 ## Architecture

@@ -19,22 +19,22 @@ intake → fetch-emails → classify-and-act → report
 ### Nodes (4 total)

 1. **intake** (event_loop)
-   - Receive and validate input parameters: triage rules and max_emails. Present the interpreted rules back to the user for confirmation before proceeding.
+   - Receive and validate input parameters: rules and max_emails. Present the interpreted rules back to the user for confirmation.
   - Reads: `rules, max_emails`
-   - Writes: `triage_rules, max_emails`
+   - Writes: `rules, max_emails`
   - Client-facing: Yes (blocks for user input)
 2. **fetch-emails** (event_loop)
-   - Fetch unread emails from Gmail up to the configured batch limit. Only retrieves emails with the UNREAD label.
-   - Reads: `triage_rules, max_emails`
+   - Fetch emails from the Gmail inbox up to the configured batch limit. Processes in small batches across multiple iterations.
+   - Reads: `rules, max_emails`
   - Writes: `emails`
   - Tools: `gmail_list_messages, gmail_get_message`
 3. **classify-and-act** (event_loop)
-   - Classify each email against the user's triage rules, then execute the appropriate Gmail actions (trash, archive, mark important, add labels).
-   - Reads: `triage_rules, emails`
+   - Execute the user's rules on each email using the appropriate Gmail actions (trash, spam, mark important, mark unread/read, archive, star).
+   - Reads: `rules, emails`
   - Writes: `actions_taken`
   - Tools: `gmail_trash_message, gmail_modify_message, gmail_batch_modify_messages`
 4. **report** (event_loop)
-   - Generate a summary report of all triage actions taken, organized by category.
+   - Generate a summary report of all actions taken, organized by action type.
   - Reads: `actions_taken`
   - Writes: `summary_report`

@@ -49,30 +49,32 @@ intake → fetch-emails → classify-and-act → report

 ### Success Criteria

-**Each unread email is classified according to the user's free-text rules with appropriate urgency category (action needed, FYI, waiting on) and type (spam, newsletter, important, etc.)** (weight 0.3)
+**Each email is acted upon according to the user's free-text rules** (weight 0.3)
 - Metric: classification_match_rate
 - Target: >=90%
-**Trash, archive, mark-important, and label actions are applied correctly to the right emails based on classification** (weight 0.25)
+**Trash, spam, mark-important, mark-unread, mark-read, archive, star actions are applied correctly using only valid Gmail system labels** (weight 0.25)
 - Metric: action_correctness
 - Target: >=95%
-**Only unread emails are fetched and processed; read emails are never modified** (weight 0.2)
- Metric: read_email_modifications
- Target: 0
-**Produces a summary report showing what was done: how many trashed, archived, marked important, and categorized, with email subjects listed per category** (weight 0.15)
+**Only inbox emails are fetched and processed (label:INBOX scope)** (weight 0.2)
+- Metric: inbox_scope_accuracy
+- Target: 100%
+**Produces a summary report showing what was done, with email subjects listed per action** (weight 0.15)
 - Metric: report_completeness
 - Target: 100%
-**All fetched emails up to the configured max are classified and acted upon; none are silently skipped** (weight 0.1)
+**All fetched emails up to the configured max are processed; none are silently skipped** (weight 0.1)
 - Metric: emails_processed_ratio
 - Target: 100%

 ### Constraints

-**Must never modify, trash, or relabel emails that are already read** (hard)
+**Must only fetch and process emails from the inbox (label:INBOX)** (hard)
 - Category: safety
 **Must not process more emails than the configured max_emails parameter** (hard)
 - Category: operational
-**Archiving removes from inbox but preserves the email; only explicit trash rules move emails to trash** (hard)
+**Marking as spam moves to spam folder but preserves the email; only explicit trash rules permanently delete emails** (hard)
 - Category: safety
+**Must only use valid Gmail system labels; custom labels like 'FYI' or 'Action Needed' must NOT be applied via Gmail API** (hard)
+- Category: operational

 ## Required Tools

@@ -5,7 +5,7 @@ Triage unread Gmail emails by trashing spam, archiving low-priority,
 marking important, and categorizing by urgency (Action Needed, FYI, Waiting On).
 """

-from .agent import InboxManagementAgent, default_agent, goal, nodes, edges
+from .agent import InboxManagementAgent, default_agent, goal, nodes, edges, loop_config
 from .config import RuntimeConfig, AgentMetadata, default_config, metadata

 __version__ = "1.0.0"
@@ -16,6 +16,7 @@ __all__ = [
    "goal",
    "nodes",
    "edges",
+    "loop_config",
    "RuntimeConfig",
    "AgentMetadata",
    "default_config",
@@ -34,7 +34,7 @@ def cli():

@cli.command()
@click.option("--rules", "-r", type=str, required=True, help="Free-text triage rules")
-@click.option("--max-emails", "-m", type=int, default=100, help="Max emails to process (default: 100)")
+@click.option("--max-emails", "-m", type=int, default=100, help="Max emails to process, supports any count via pagination (default: 100)")
@click.option("--mock", is_flag=True, help="Run in mock mode")
@click.option("--quiet", "-q", is_flag=True, help="Only output result JSON")
@click.option("--verbose", "-v", is_flag=True, help="Show execution details")
@@ -21,14 +21,14 @@
      {
        "id": "intake",
        "name": "Intake",
-        "description": "Receive and validate input parameters: triage rules and max_emails. Present the interpreted rules back to the user for confirmation before proceeding.",
+        "description": "Receive and validate input parameters: rules and max_emails. Present the interpreted rules back to the user for confirmation before proceeding.",
        "node_type": "event_loop",
        "input_keys": [
          "rules",
          "max_emails"
        ],
        "output_keys": [
-          "triage_rules",
+          "rules",
          "max_emails"
        ],
        "nullable_output_keys": [],
@@ -49,10 +49,10 @@
      {
        "id": "fetch-emails",
        "name": "Fetch Emails",
-        "description": "Fetch unread emails from Gmail up to the configured batch limit. Only retrieves emails with the UNREAD label.",
+        "description": "Fetch emails from the Gmail inbox up to the configured batch limit. Processes emails in small batches across multiple iterations to stay within tool call limits. Saves results to a file.",
        "node_type": "event_loop",
        "input_keys": [
-          "triage_rules",
+          "rules",
          "max_emails"
        ],
        "output_keys": [
@@ -61,10 +61,11 @@
        "nullable_output_keys": [],
        "input_schema": {},
        "output_schema": {},
-        "system_prompt": "You are an inbox management assistant. Your job is to fetch unread emails from Gmail.\n\n**IMPORTANT CONSTRAINTS:**\n- ONLY fetch emails that are UNREAD. Use the query \"is:unread\" with gmail_list_messages.\n- Fetch at most the number specified in max_emails (from context).\n- For each email returned by gmail_list_messages, use gmail_get_message to get its full details (subject, from, snippet, body, labels).\n\n**PROCESS:**\n1. Call gmail_list_messages with query \"is:unread\" and max_results set to the max_emails value.\n2. For each message in the results, call gmail_get_message to get full details.\n3. Collect all email data into a structured list.\n4. Call set_output(\"emails\", <JSON string of the email list>).\n\nEach email in the list should include: id, subject, from, date, snippet, body (or body preview), and current labels.\n\nIf there are no unread emails, set_output(\"emails\", \"[]\") \u2014 an empty list is valid.",
+        "system_prompt": "You are an inbox management assistant. Your job is to fetch emails from the Gmail inbox.\n\n**CRITICAL \u2014 WORK IN SMALL BATCHES:**\nYou MUST limit yourself to at most 25 tool calls per iteration. This means:\n- First iteration: call gmail_list_messages ONCE, then call gmail_get_message for UP TO 24 of the returned messages. If there are more messages remaining, do NOT call set_output yet \u2014 just stop and wait for the next iteration.\n- Subsequent iterations: continue calling gmail_get_message for remaining messages from the current page (up to 25 calls per iteration). When the current page is done and you need more emails, call gmail_list_messages with the page_token to get the next page.\n- Final iteration: once you have fetched enough emails (up to max_emails) or there are no more pages, save to file and set output.\n\n**IMPORTANT CONSTRAINTS:**\n- Fetch emails from the inbox using the query \"label:INBOX\" with gmail_list_messages.\n- Fetch at most the number specified in max_emails (from context).\n- Use max_results of 50 per gmail_list_messages call (to keep pages manageable).\n\n**CRITICAL \u2014 Message IDs:**\n- gmail_list_messages returns objects with BOTH \"id\" and \"threadId\". These are DIFFERENT values.\n- You MUST use the \"id\" field (NOT \"threadId\") when calling gmail_get_message.\n- If gmail_get_message returns {\"error\": \"Message not found\"}, SKIP that message and continue.\n\n**PROCESS (repeat across iterations):**\n1. If this is your first iteration, call gmail_list_messages with query \"label:INBOX\" and max_results=50.\n2. Call gmail_get_message for up to 24 messages (using the \"id\" field, NOT \"threadId\").\n3. Keep a running count of all emails fetched so far across all iterations.\n4. If you still need more emails AND there are more messages on the current page or a next_page_token exists:\n   - Do NOT call set_output yet. Stop and wait for the next iteration to continue.\n5. Once you have fetched enough emails (reached max_emails) or exhausted all pages:\n   a. Build a JSON array of all collected emails.\n   b. Call save_data(filename=\"emails.json\", data=<the JSON array as a string>) to write the emails to a file.\n   c. Call set_output(\"emails\", \"emails.json\") \u2014 pass ONLY the filename, NOT the email data.\n\n**CRITICAL \u2014 DO NOT pass email data directly to set_output. The data is too large. Always save to file first with save_data, then pass the filename.**\n\nEach email should include: id, subject, from, date, snippet, and current labels.\n\nIf there are no emails, set_output(\"emails\", \"[]\").",
        "tools": [
          "gmail_list_messages",
-          "gmail_get_message"
+          "gmail_get_message",
+          "save_data"
        ],
        "model": null,
        "function": null,
@@ -79,10 +80,10 @@
      {
        "id": "classify-and-act",
        "name": "Classify and Act",
-        "description": "Classify each email against the user's triage rules, then execute the appropriate Gmail actions (trash, archive, mark important, add labels).",
+        "description": "Execute the user's rules on each email using the appropriate Gmail actions, with batch operations where possible. Loads emails from a file saved by the fetch-emails node.",
        "node_type": "event_loop",
        "input_keys": [
-          "triage_rules",
+          "rules",
          "emails"
        ],
        "output_keys": [
@@ -91,11 +92,12 @@
        "nullable_output_keys": [],
        "input_schema": {},
        "output_schema": {},
-        "system_prompt": "You are an inbox management assistant. Your job is to classify emails and take action based on the user's triage rules.\n\n**TRIAGE RULES** are provided in the context as \"triage_rules\". Apply these rules to each email.\n\n**AVAILABLE ACTIONS:**\n1. **TRASH** \u2014 For spam, unwanted emails. Use gmail_trash_message(message_id).\n2. **ARCHIVE** \u2014 For low-priority, newsletters. Use gmail_modify_message(message_id, remove_labels=[\"INBOX\"]) to remove from inbox but keep in All Mail.\n3. **MARK IMPORTANT** \u2014 For urgent, action-needed emails. Use gmail_modify_message(message_id, add_labels=[\"IMPORTANT\"]).\n4. **CATEGORIZE** \u2014 Add urgency labels. Use gmail_modify_message(message_id, add_labels=[<category>]) where category is one of: \"Action Needed\", \"FYI\", \"Waiting On\".\n\n**IMPORTANT CONSTRAINTS:**\n- NEVER modify read emails. The emails list from context contains ONLY unread emails, so you are safe to act on all of them.\n- Apply the MOST appropriate action to each email based on the rules.\n- An email can have BOTH an action (trash/archive/mark important) AND a category (Action Needed/FYI/Waiting On) if appropriate \u2014 but trashed emails don't need a category.\n\n**PROCESS:**\n1. Read the emails list from context.\n2. For each email, classify it against the triage rules.\n3. Execute the appropriate Gmail action(s) for each email.\n4. Track every action taken: {email_id, subject, from, classification, action, category}.\n5. After processing ALL emails, call set_output(\"actions_taken\", <JSON string of the actions list>).\n\nIf the emails list is empty, set_output(\"actions_taken\", \"[]\").",
+        "system_prompt": "You are an inbox management assistant. Your job is to execute the user's rules on their emails.\n\n**STEP 1 \u2014 LOAD EMAILS FROM FILE:**\nThe \"emails\" key in context contains a FILENAME (e.g. \"emails.json\"), NOT the actual email data.\nYou MUST call load_data(filename=<the filename from context>) to retrieve the email list.\nUse pagination (offset/limit) if the file is large \u2014 load_data returns 50 lines at a time by default.\n\n**RULES** are provided in the context as \"rules\". Read them carefully \u2014 they tell you exactly what to do.\n\nIf the rules say to apply ONE action to ALL emails (e.g. \"mark all as unread\"), just do that. Do NOT classify or categorize emails unless the rules specifically ask for it.\n\n**AVAILABLE ACTIONS (use ONLY these Gmail system labels):**\n1. **TRASH** \u2014 Use gmail_trash_message(message_id) individually.\n2. **SPAM** \u2014 Use gmail_batch_modify_messages(message_ids=[...], add_labels=[\"SPAM\"], remove_labels=[\"INBOX\"]).\n3. **MARK IMPORTANT** \u2014 Use gmail_batch_modify_messages(message_ids=[...], add_labels=[\"IMPORTANT\"]).\n4. **MARK AS UNREAD** \u2014 Use gmail_batch_modify_messages(message_ids=[...], add_labels=[\"UNREAD\"]).\n5. **MARK AS READ** \u2014 Use gmail_batch_modify_messages(message_ids=[...], remove_labels=[\"UNREAD\"]).\n6. **ARCHIVE** \u2014 Use gmail_batch_modify_messages(message_ids=[...], remove_labels=[\"INBOX\"]).\n7. **STAR** \u2014 Use gmail_batch_modify_messages(message_ids=[...], add_labels=[\"STARRED\"]).\n\n**CRITICAL \u2014 VALID GMAIL LABELS ONLY:**\nYou can ONLY use these system label IDs: INBOX, UNREAD, IMPORTANT, STARRED, SPAM, TRASH, SENT, DRAFT, CATEGORY_PERSONAL, CATEGORY_SOCIAL, CATEGORY_PROMOTIONS, CATEGORY_UPDATES, CATEGORY_FORUMS.\nDo NOT use custom labels like \"FYI\", \"Action Needed\", or \"Waiting On\" \u2014 these will cause API errors.\n\n**IMPORTANT \u2014 USE BATCH OPERATIONS TO MINIMIZE TOOL CALLS:**\n- Group emails by action and execute in bulk using gmail_batch_modify_messages \u2014 one call per action group.\n- Only use gmail_trash_message individually for emails that need trashing.\n- Process in batches of up to 25 tool calls per iteration. If you have more actions, stop and continue in the next iteration.\n\n**PROCESS:**\n1. Load emails from the file using load_data(filename=<emails filename from context>). Page through with offset/limit if needed.\n2. Read the rules from context.\n3. For each email, determine what action the rules require.\n4. Group emails by action and execute batch operations.\n5. Track every action: {email_id, subject, from, action}.\n6. After processing ALL emails, call set_output(\"actions_taken\", <JSON string of the actions list>).\n\nIf the emails list is empty, set_output(\"actions_taken\", \"[]\").",
        "tools": [
          "gmail_trash_message",
          "gmail_modify_message",
-          "gmail_batch_modify_messages"
+          "gmail_batch_modify_messages",
+          "load_data"
        ],
        "model": null,
        "function": null,
@@ -110,7 +112,7 @@
      {
        "id": "report",
        "name": "Report",
-        "description": "Generate a summary report of all triage actions taken, organized by category.",
+        "description": "Generate a summary report of all actions taken, organized by action type.",
        "node_type": "event_loop",
        "input_keys": [
          "actions_taken"
@@ -253,7 +255,9 @@
    "gmail_list_messages",
    "gmail_get_message",
    "gmail_trash_message",
-    "gmail_batch_modify_messages"
+    "gmail_batch_modify_messages",
+    "save_data",
+    "load_data"
  ],
  "metadata": {
    "created_at": "2026-02-11T19:09:55.831691",
@@ -143,6 +143,11 @@ entry_node = "intake"
 entry_points = {"start": "intake"}
 pause_nodes = []
 terminal_nodes = ["report"]
+loop_config = {
+    "max_iterations": 100,
+    "max_tool_calls_per_turn": 20,
+    "max_history_tokens": 32000,
+}


 class InboxManagementAgent:
@@ -180,11 +185,7 @@ class InboxManagementAgent:
            edges=self.edges,
            default_model=self.config.model,
            max_tokens=self.config.max_tokens,
-            loop_config={
-                "max_iterations": 100,
-                "max_tool_calls_per_turn": 20,
-                "max_history_tokens": 32000,
-            },
+            loop_config=loop_config,
        )

    def _setup(self, mock_mode=False) -> GraphExecutor:
@@ -36,7 +36,8 @@ EMAIL_CREDENTIALS = {
    "google": CredentialSpec(
        env_var="GOOGLE_ACCESS_TOKEN",
        tools=[
-            "send_email",
+            # send_email is excluded: it's a multi-provider tool that checks
+            # credentials at runtime based on the provider parameter.
            "gmail_reply_email",
            "gmail_list_messages",
            "gmail_get_message",
@@ -45,7 +46,7 @@ EMAIL_CREDENTIALS = {
            "gmail_batch_modify_messages",
        ],
        node_types=[],
-        required=False,
+        required=True,
        startup_required=False,
        help_url="https://hive.adenhq.com",
        description="Google OAuth2 access token (via Aden) - used for Gmail",
@@ -97,7 +97,7 @@ def register_tools(
    @mcp.tool()
    def gmail_list_messages(
        query: str = "is:unread",
-        max_results: int = 20,
+        max_results: int = 100,
        page_token: str | None = None,
    ) -> dict:
        """
@@ -109,7 +109,7 @@ def register_tools(

        Args:
            query: Gmail search query (default: "is:unread").
-            max_results: Maximum messages to return (1-100, default 20).
+            max_results: Maximum messages to return (1-500, default 100).
            page_token: Token for fetching the next page of results.

        Returns:
@@ -121,7 +121,7 @@ def register_tools(
        if isinstance(token, dict):
            return token

-        max_results = max(1, min(100, max_results))
+        max_results = max(1, min(500, max_results))

        params: dict[str, str | int] = {"q": query, "maxResults": max_results}
        if page_token:
@@ -152,7 +152,7 @@ class TestListMessages:
        with patch(HTTPX_MODULE, return_value=mock_resp) as mock_req:
            list_fn(max_results=999)

-        assert mock_req.call_args[1]["params"]["maxResults"] == 100
+        assert mock_req.call_args[1]["params"]["maxResults"] == 500

    def test_list_token_expired(self, list_fn, monkeypatch):
        monkeypatch.setenv("GOOGLE_ACCESS_TOKEN", "expired")