From e76b6cb5752dcb0a8d098538e02d6c8970e95755 Mon Sep 17 00:00:00 2001 From: Antiarin Date: Wed, 11 Mar 2026 02:45:12 +0530 Subject: [PATCH 01/45] feat(notion): enhance Notion tool functionality with new block types and improved page creation - Added BlockType enum for various Notion block types. - Updated notion_create_page to allow specifying parent_page_id and title_property. - Enhanced notion_query_database to support sorting and pagination. - Introduced notion_create_database for creating databases under a parent page. - Improved error handling for required parameters in page and database creation. --- tools/src/aden_tools/credentials/notion.py | 9 +- .../tools/notion_tool/notion_tool.py | 465 +++++++++++++++--- 2 files changed, 403 insertions(+), 71 deletions(-) diff --git a/tools/src/aden_tools/credentials/notion.py b/tools/src/aden_tools/credentials/notion.py index 089ba879..2da70cb7 100644 --- a/tools/src/aden_tools/credentials/notion.py +++ b/tools/src/aden_tools/credentials/notion.py @@ -14,10 +14,15 @@ NOTION_CREDENTIALS = { "notion_search", "notion_get_page", "notion_create_page", + "notion_update_page", "notion_query_database", "notion_get_database", - "notion_update_page", - "notion_archive_page", + "notion_create_database", + "notion_update_database", + "notion_get_block_children", + "notion_get_block", + "notion_update_block", + "notion_delete_block", "notion_append_blocks", ], required=True, diff --git a/tools/src/aden_tools/tools/notion_tool/notion_tool.py b/tools/src/aden_tools/tools/notion_tool/notion_tool.py index 316cbe0a..e70dc2ca 100644 --- a/tools/src/aden_tools/tools/notion_tool/notion_tool.py +++ b/tools/src/aden_tools/tools/notion_tool/notion_tool.py @@ -11,6 +11,7 @@ API Reference: https://developers.notion.com/reference from __future__ import annotations import os +from enum import StrEnum from typing import TYPE_CHECKING, Any import httpx @@ -23,6 +24,18 @@ API_BASE = "https://api.notion.com/v1" NOTION_VERSION = "2022-06-28" +class BlockType(StrEnum): + PARAGRAPH = "paragraph" + HEADING_1 = "heading_1" + HEADING_2 = "heading_2" + HEADING_3 = "heading_3" + BULLETED_LIST_ITEM = "bulleted_list_item" + NUMBERED_LIST_ITEM = "numbered_list_item" + TO_DO = "to_do" + QUOTE = "quote" + CALLOUT = "callout" + + def _get_credentials(credentials: CredentialStoreAdapter | None) -> str | None: """Return the Notion integration token.""" if credentials is not None: @@ -201,20 +214,29 @@ def register_tools( @mcp.tool() def notion_create_page( - parent_database_id: str, title: str, + parent_database_id: str = "", + parent_page_id: str = "", + title_property: str = "", properties_json: str = "", content: str = "", ) -> dict[str, Any]: """ - Create a new page in a Notion database. + Create a new page in a Notion database or as a child of another page. + + Provide exactly one of parent_database_id or parent_page_id. Args: - parent_database_id: ID of the parent database (required) title: Page title (required) + parent_database_id: ID of the parent database (optional) + parent_page_id: ID of the parent page (optional) + title_property: Name of the title column in the database + (required when using parent_database_id). Use + notion_get_database to find the correct property name. + Ignored when parent_page_id is used. properties_json: Additional properties as JSON string e.g. '{"Status": {"select": {"name": "Done"}}}' - (optional) + Ignored when parent_page_id is used. (optional) content: Plain text content for the page body (optional) Returns: @@ -225,22 +247,37 @@ def register_tools( token = _get_credentials(credentials) if not token: return _auth_error() - if not parent_database_id or not title: - return {"error": "parent_database_id and title are required"} + if not title: + return {"error": "title is required"} + if not parent_database_id and not parent_page_id: + return {"error": "Provide parent_database_id or parent_page_id"} + if parent_database_id and parent_page_id: + return {"error": "Provide only one of parent_database_id or parent_page_id, not both"} - body: dict[str, Any] = { - "parent": {"database_id": parent_database_id}, - "properties": { - "Name": {"title": [{"text": {"content": title}}]}, - }, - } + body: dict[str, Any] = {} - if properties_json: - try: - extra = json_mod.loads(properties_json) - body["properties"].update(extra) - except json_mod.JSONDecodeError: - return {"error": "properties_json is not valid JSON"} + match (bool(parent_database_id), bool(parent_page_id)): + case (True, False): + if not title_property: + return { + "error": "title_property is required when using parent_database_id. " + "Use notion_get_database to find the title column name.", + } + body["parent"] = {"database_id": parent_database_id} + body["properties"] = { + title_property: {"title": [{"text": {"content": title}}]}, + } + if properties_json: + try: + extra = json_mod.loads(properties_json) + body["properties"].update(extra) + except json_mod.JSONDecodeError: + return {"error": "properties_json is not valid JSON"} + case (False, True): + body["parent"] = {"page_id": parent_page_id} + body["properties"] = { + "title": {"title": [{"text": {"content": title}}]}, + } if content: body["children"] = [ @@ -265,6 +302,8 @@ def register_tools( def notion_query_database( database_id: str, filter_json: str = "", + sorts_json: str = "", + start_cursor: str = "", page_size: int = 50, ) -> dict[str, Any]: """ @@ -273,10 +312,16 @@ def register_tools( Args: database_id: Notion database ID (required) filter_json: Notion filter object as JSON string (optional) + e.g. '{"property": "Status", "select": {"equals": "Done"}}' + sorts_json: Sort order as JSON array string (optional) + e.g. '[{"property": "Created", "direction": "descending"}]' + or '[{"timestamp": "last_edited_time", "direction": "ascending"}]' + start_cursor: Pagination cursor from a previous response's + next_cursor field (optional) page_size: Max results (1-100, default 50) Returns: - Dict with matching pages and their properties + Dict with matching pages, count, has_more, and next_cursor """ import json as json_mod @@ -296,6 +341,15 @@ def register_tools( except json_mod.JSONDecodeError: return {"error": "filter_json is not valid JSON"} + if sorts_json: + try: + body["sorts"] = json_mod.loads(sorts_json) + except json_mod.JSONDecodeError: + return {"error": "sorts_json is not valid JSON"} + + if start_cursor: + body["start_cursor"] = start_cursor + data = _request("post", f"/databases/{database_id}/query", token, json=body) if "error" in data: return data @@ -312,7 +366,12 @@ def register_tools( "last_edited_time": item.get("last_edited_time", ""), } ) - return {"pages": pages, "count": len(pages), "has_more": data.get("has_more", False)} + return { + "pages": pages, + "count": len(pages), + "has_more": data.get("has_more", False), + "next_cursor": data.get("next_cursor"), + } @mcp.tool() def notion_get_database(database_id: str) -> dict[str, Any]: @@ -352,36 +411,109 @@ def register_tools( } @mcp.tool() - def notion_update_page( - page_id: str, - properties_json: str, + def notion_create_database( + parent_page_id: str, + title: str, + properties_json: str = "", ) -> dict[str, Any]: """ - Update properties on an existing Notion page. + Create a new database as a child of an existing page. Args: - page_id: Notion page ID (required) - properties_json: Properties to update as JSON string. - e.g. '{"Status": {"select": {"name": "Done"}}}' - or '{"Priority": {"number": 1}}' + parent_page_id: ID of the parent page (required) + title: Database title (required) + properties_json: Property definitions as JSON string (optional). + If omitted, creates a database with a single "Name" title + column. Example with extra columns: + '{"Status": {"select": {"options": [{"name": "To Do"}, + {"name": "Done"}]}}, "Priority": {"number": {}}}' Returns: - Dict with updated page (id, url) or error + Dict with created database (id, url) """ import json as json_mod token = _get_credentials(credentials) if not token: return _auth_error() - if not page_id or not properties_json: - return {"error": "page_id and properties_json are required"} + if not parent_page_id or not title: + return {"error": "parent_page_id and title are required"} - try: - props = json_mod.loads(properties_json) - except json_mod.JSONDecodeError: - return {"error": "properties_json is not valid JSON"} + properties: dict[str, Any] = { + "Name": {"title": {}}, + } - data = _request("patch", f"/pages/{page_id}", token, json={"properties": props}) + if properties_json: + try: + extra = json_mod.loads(properties_json) + properties.update(extra) + except json_mod.JSONDecodeError: + return {"error": "properties_json is not valid JSON"} + + body: dict[str, Any] = { + "parent": {"type": "page_id", "page_id": parent_page_id}, + "title": [{"type": "text", "text": {"content": title}}], + "properties": properties, + } + + data = _request("post", "/databases", token, json=body) + if "error" in data: + return data + + return { + "id": data.get("id", ""), + "url": data.get("url", ""), + "status": "created", + } + + @mcp.tool() + def notion_update_database( + database_id: str, + title: str = "", + properties_json: str = "", + archived: bool | None = None, + ) -> dict[str, Any]: + """ + Update a database's title, properties, or archive it. + + Args: + database_id: Notion database ID (required) + title: New database title (optional) + properties_json: Property schema changes as JSON string (optional). + Add new columns, rename, or change types. + e.g. '{"Priority": {"number": {}}}' + archived: Set to true to archive (delete), false to restore + (optional) + + Returns: + Dict with updated database (id, url, status) + """ + import json as json_mod + + token = _get_credentials(credentials) + if not token: + return _auth_error() + if not database_id: + return {"error": "database_id is required"} + + body: dict[str, Any] = {} + + if title: + body["title"] = [{"type": "text", "text": {"content": title}}] + + if properties_json: + try: + body["properties"] = json_mod.loads(properties_json) + except json_mod.JSONDecodeError: + return {"error": "properties_json is not valid JSON"} + + if archived is not None: + body["archived"] = archived + + if not body: + return {"error": "No updates provided. Set title, properties_json, or archived."} + + data = _request("patch", f"/databases/{database_id}", token, json=body) if "error" in data: return data @@ -392,49 +524,247 @@ def register_tools( } @mcp.tool() - def notion_archive_page( + def notion_update_page( page_id: str, - archived: bool = True, + properties_json: str = "", + archived: bool | None = None, ) -> dict[str, Any]: """ - Archive or unarchive a Notion page. + Update a Notion page's properties. Args: page_id: Notion page ID (required) - archived: True to archive, False to restore (default True) + properties_json: Properties to update as JSON string + e.g. '{"Status": {"select": {"name": "Done"}}}' + (optional) + archived: Set to true to archive, false to unarchive (optional) Returns: - Dict with page status or error + Dict with updated page (id, url, status) """ + import json as json_mod + token = _get_credentials(credentials) if not token: return _auth_error() if not page_id: return {"error": "page_id is required"} - data = _request("patch", f"/pages/{page_id}", token, json={"archived": archived}) + body: dict[str, Any] = {} + + if properties_json: + try: + body["properties"] = json_mod.loads(properties_json) + except json_mod.JSONDecodeError: + return {"error": "properties_json is not valid JSON"} + + if archived is not None: + body["archived"] = archived + + if not body: + return {"error": "No updates provided. Set properties_json or archived."} + + data = _request("patch", f"/pages/{page_id}", token, json=body) if "error" in data: return data return { "id": data.get("id", ""), - "archived": data.get("archived", archived), - "status": "archived" if archived else "restored", + "url": data.get("url", ""), + "status": "updated", + } + + @mcp.tool() + def notion_get_block_children( + block_id: str, + page_size: int = 50, + ) -> dict[str, Any]: + """ + Get child blocks (content) of a page or block. + + Args: + block_id: Page ID or block ID (required) + page_size: Max results (1-100, default 50) + + Returns: + Dict with block content (type, text, children indicator) + """ + token = _get_credentials(credentials) + if not token: + return _auth_error() + if not block_id: + return {"error": "block_id is required"} + + params = {"page_size": max(1, min(page_size, 100))} + data = _request("get", f"/blocks/{block_id}/children", token, params=params) + if "error" in data: + return data + + blocks = [] + for item in data.get("results", []): + block_type = item.get("type", "") + block_data: dict[str, Any] = { + "id": item.get("id", ""), + "type": block_type, + "has_children": item.get("has_children", False), + } + + # Extract text content from common block types + type_data = item.get(block_type, {}) + rich_text = type_data.get("rich_text", []) + if rich_text: + block_data["text"] = "".join( + p.get("text", {}).get("content", "") for p in rich_text + ) + + blocks.append(block_data) + + return { + "blocks": blocks, + "count": len(blocks), + "has_more": data.get("has_more", False), + } + + @mcp.tool() + def notion_get_block(block_id: str) -> dict[str, Any]: + """ + Retrieve a single block by ID. + + Args: + block_id: Notion block ID (required) + + Returns: + Dict with block details (id, type, text, has_children) + """ + token = _get_credentials(credentials) + if not token: + return _auth_error() + if not block_id: + return {"error": "block_id is required"} + + data = _request("get", f"/blocks/{block_id}", token) + if "error" in data: + return data + + block_type = data.get("type", "") + result: dict[str, Any] = { + "id": data.get("id", ""), + "type": block_type, + "has_children": data.get("has_children", False), + "archived": data.get("archived", False), + "created_time": data.get("created_time", ""), + "last_edited_time": data.get("last_edited_time", ""), + } + + type_data = data.get(block_type, {}) + rich_text = type_data.get("rich_text", []) + if rich_text: + result["text"] = "".join(p.get("text", {}).get("content", "") for p in rich_text) + + return result + + @mcp.tool() + def notion_update_block( + block_id: str, + content: str = "", + block_type: str = "", + archived: bool | None = None, + ) -> dict[str, Any]: + """ + Update a block's content or archive it. + + Args: + block_id: Notion block ID (required) + content: New text content for the block (optional). + Only works for text-based blocks (paragraph, heading, etc.) + block_type: The block's current type (required when setting content). + Use notion_get_block to find the type first. + archived: Set to true to archive (soft-delete), false to restore + (optional) + + Returns: + Dict with updated block info (id, type, status) + """ + token = _get_credentials(credentials) + if not token: + return _auth_error() + if not block_id: + return {"error": "block_id is required"} + + body: dict[str, Any] = {} + + if content: + if not block_type: + return { + "error": "block_type is required when setting content. " + "Use notion_get_block to find the type.", + } + try: + validated = BlockType(block_type) + except ValueError: + return { + "error": f"Invalid block_type: {block_type!r}", + "help": f"Must be one of: {', '.join(sorted(BlockType))}", + } + body[validated] = { + "rich_text": [{"type": "text", "text": {"content": content}}], + } + + if archived is not None: + body["archived"] = archived + + if not body: + return {"error": "No updates provided. Set content or archived."} + + data = _request("patch", f"/blocks/{block_id}", token, json=body) + if "error" in data: + return data + + return { + "id": data.get("id", ""), + "type": data.get("type", ""), + "status": "updated", + } + + @mcp.tool() + def notion_delete_block(block_id: str) -> dict[str, Any]: + """ + Delete a block (moves to trash). + + Args: + block_id: Notion block ID to delete (required) + + Returns: + Dict with deleted block info (id, status) + """ + token = _get_credentials(credentials) + if not token: + return _auth_error() + if not block_id: + return {"error": "block_id is required"} + + data = _request("delete", f"/blocks/{block_id}", token) + if "error" in data: + return data + + return { + "id": data.get("id", ""), + "status": "deleted", } @mcp.tool() def notion_append_blocks( - page_id: str, + block_id: str, content: str, block_type: str = "paragraph", ) -> dict[str, Any]: """ - Append content blocks to an existing Notion page. + Append content blocks to a page or block. Args: - page_id: Notion page ID to append to (required) + block_id: Page ID or parent block ID to append to (required) content: Text content to append (required). For multiple blocks, - separate with newlines. + separate with newlines. Max 100 blocks per request. block_type: Block type to create: "paragraph", "heading_1", "heading_2", "heading_3", "bulleted_list_item", "numbered_list_item", "to_do", "quote", "callout" @@ -446,43 +776,40 @@ def register_tools( token = _get_credentials(credentials) if not token: return _auth_error() - if not page_id or not content: - return {"error": "page_id and content are required"} + if not block_id or not content: + return {"error": "block_id and content are required"} - valid_types = { - "paragraph", - "heading_1", - "heading_2", - "heading_3", - "bulleted_list_item", - "numbered_list_item", - "to_do", - "quote", - "callout", - } - if block_type not in valid_types: + try: + validated = BlockType(block_type) + except ValueError: return { "error": f"Invalid block_type: {block_type!r}", - "help": f"Must be one of: {', '.join(sorted(valid_types))}", + "help": f"Must be one of: {', '.join(sorted(BlockType))}", } lines = [line for line in content.split("\n") if line.strip()] + if not lines: + return {"error": "content is empty after stripping blank lines"} + if len(lines) > 100: + return {"error": "Too many blocks. Notion API allows max 100 per request."} + children = [] for line in lines: block: dict[str, Any] = { "object": "block", - "type": block_type, - block_type: { + "type": validated, + validated: { "rich_text": [{"type": "text", "text": {"content": line}}], }, } - if block_type == "to_do": - block[block_type]["checked"] = False + match validated: + case BlockType.TO_DO: + block[validated]["checked"] = False children.append(block) data = _request( "patch", - f"/blocks/{page_id}/children", + f"/blocks/{block_id}/children", token, json={"children": children}, ) @@ -490,7 +817,7 @@ def register_tools( return data return { - "page_id": page_id, + "block_id": block_id, "blocks_added": len(children), "status": "appended", } From d00f3216275c99c2d300053ddf986f09098606b0 Mon Sep 17 00:00:00 2001 From: Antiarin Date: Wed, 11 Mar 2026 02:45:30 +0530 Subject: [PATCH 02/45] test(notion): add comprehensive tests for error handling and credential store in Notion tool - Implemented tests for HTTP error codes, timeouts, and generic exceptions in _request. - Added tests to verify the use of credential store when provided. - Enhanced tests for notion_search to include filter types and page size clamping. - Updated test assertions for successful responses from notion_get_page. --- tools/tests/tools/test_notion_tool.py | 1015 ++++++++++++++++++++++++- 1 file changed, 996 insertions(+), 19 deletions(-) diff --git a/tools/tests/tools/test_notion_tool.py b/tools/tests/tools/test_notion_tool.py index d2d11715..5f1bb171 100644 --- a/tools/tests/tools/test_notion_tool.py +++ b/tools/tests/tools/test_notion_tool.py @@ -2,12 +2,14 @@ from unittest.mock import MagicMock, patch +import httpx import pytest from fastmcp import FastMCP from aden_tools.tools.notion_tool.notion_tool import register_tools ENV = {"NOTION_API_TOKEN": "test-token"} +PATCH_BASE = "aden_tools.tools.notion_tool.notion_tool" def _mock_resp(data, status_code=200): @@ -25,6 +27,92 @@ def tool_fns(mcp: FastMCP): return {name: tools[name].fn for name in tools} +# --------------------------------------------------------------------------- +# _request error handling (applies to all tools via shared helper) +# --------------------------------------------------------------------------- + + +class TestRequestErrors: + """Test HTTP error codes, timeouts, and exceptions in _request.""" + + @pytest.mark.parametrize( + ("status_code", "expected_fragment"), + [ + (401, "Unauthorized"), + (403, "Forbidden"), + (404, "Not found"), + (429, "Rate limited"), + (500, "Notion API error 500"), + ], + ) + def test_http_error_codes(self, tool_fns, status_code, expected_fragment): + with ( + patch.dict("os.environ", ENV), + patch( + f"{PATCH_BASE}.httpx.post", + return_value=_mock_resp({}, status_code), + ), + ): + result = tool_fns["notion_search"](query="test") + assert "error" in result + assert expected_fragment in result["error"] + + def test_timeout_exception(self, tool_fns): + with ( + patch.dict("os.environ", ENV), + patch( + f"{PATCH_BASE}.httpx.post", + side_effect=httpx.TimeoutException("timed out"), + ), + ): + result = tool_fns["notion_search"](query="test") + assert "error" in result + assert "timed out" in result["error"] + + def test_generic_exception(self, tool_fns): + with ( + patch.dict("os.environ", ENV), + patch( + f"{PATCH_BASE}.httpx.post", + side_effect=ConnectionError("connection refused"), + ), + ): + result = tool_fns["notion_search"](query="test") + assert "error" in result + assert "connection refused" in result["error"] + + +# --------------------------------------------------------------------------- +# Credential store adapter +# --------------------------------------------------------------------------- + + +class TestCredentialStoreAdapter: + def test_credential_store_used_when_provided(self, mcp: FastMCP): + mock_creds = MagicMock() + mock_creds.get.return_value = "store-token" + register_tools(mcp, credentials=mock_creds) + tools = mcp._tool_manager._tools + fn = tools["notion_search"].fn + + data = {"results": [], "has_more": False} + with patch(f"{PATCH_BASE}.httpx.post", return_value=_mock_resp(data)) as mock_post: + result = fn(query="test") + + mock_creds.get.assert_called_with("notion_token") + assert result["count"] == 0 + # Verify the token from the store was used in the Authorization header + call_kwargs = mock_post.call_args + assert "Bearer store-token" in call_kwargs.kwargs.get("headers", {}).get( + "Authorization", call_kwargs[1].get("headers", {}).get("Authorization", "") + ) + + +# --------------------------------------------------------------------------- +# notion_search +# --------------------------------------------------------------------------- + + class TestNotionSearch: def test_missing_credentials(self, tool_fns): with patch.dict("os.environ", {}, clear=True): @@ -52,15 +140,79 @@ class TestNotionSearch: } with ( patch.dict("os.environ", ENV), - patch( - "aden_tools.tools.notion_tool.notion_tool.httpx.post", return_value=_mock_resp(data) - ), + patch(f"{PATCH_BASE}.httpx.post", return_value=_mock_resp(data)), ): result = tool_fns["notion_search"](query="My Page") assert result["count"] == 1 assert result["results"][0]["title"] == "My Page" + def test_filter_type_page(self, tool_fns): + data = {"results": [], "has_more": False} + with ( + patch.dict("os.environ", ENV), + patch(f"{PATCH_BASE}.httpx.post", return_value=_mock_resp(data)) as mock_post, + ): + tool_fns["notion_search"](filter_type="page") + + body = mock_post.call_args.kwargs["json"] + assert body["filter"] == {"property": "object", "value": "page"} + + def test_filter_type_database(self, tool_fns): + data = { + "results": [ + { + "object": "database", + "id": "db-1", + "url": "https://notion.so/db-1", + "created_time": "2024-01-01T00:00:00Z", + "last_edited_time": "2024-01-15T00:00:00Z", + "title": [{"text": {"content": "My DB"}}], + } + ], + "has_more": True, + } + with ( + patch.dict("os.environ", ENV), + patch(f"{PATCH_BASE}.httpx.post", return_value=_mock_resp(data)), + ): + result = tool_fns["notion_search"](filter_type="database") + + assert result["results"][0]["title"] == "My DB" + assert result["has_more"] is True + + def test_filter_type_invalid_ignored(self, tool_fns): + data = {"results": [], "has_more": False} + with ( + patch.dict("os.environ", ENV), + patch(f"{PATCH_BASE}.httpx.post", return_value=_mock_resp(data)) as mock_post, + ): + tool_fns["notion_search"](filter_type="invalid") + + body = mock_post.call_args.kwargs["json"] + assert "filter" not in body + + def test_page_size_clamped(self, tool_fns): + data = {"results": [], "has_more": False} + with ( + patch.dict("os.environ", ENV), + patch(f"{PATCH_BASE}.httpx.post", return_value=_mock_resp(data)) as mock_post, + ): + tool_fns["notion_search"](page_size=0) + assert mock_post.call_args.kwargs["json"]["page_size"] == 1 + + with ( + patch.dict("os.environ", ENV), + patch(f"{PATCH_BASE}.httpx.post", return_value=_mock_resp(data)) as mock_post, + ): + tool_fns["notion_search"](page_size=200) + assert mock_post.call_args.kwargs["json"]["page_size"] == 100 + + +# --------------------------------------------------------------------------- +# notion_get_page +# --------------------------------------------------------------------------- + class TestNotionGetPage: def test_missing_page_id(self, tool_fns): @@ -88,36 +240,318 @@ class TestNotionGetPage: } with ( patch.dict("os.environ", ENV), - patch( - "aden_tools.tools.notion_tool.notion_tool.httpx.get", return_value=_mock_resp(data) - ), + patch(f"{PATCH_BASE}.httpx.get", return_value=_mock_resp(data)), ): result = tool_fns["notion_get_page"](page_id="page-1") assert result["title"] == "Test Page" assert result["properties"]["Status"] == "Done" + def test_all_property_types(self, tool_fns): + data = { + "id": "page-1", + "url": "https://notion.so/page-1", + "archived": False, + "created_time": "2024-01-01T00:00:00Z", + "last_edited_time": "2024-01-15T00:00:00Z", + "properties": { + "Name": { + "type": "title", + "title": [{"text": {"content": "Test"}}], + }, + "Description": { + "type": "rich_text", + "rich_text": [ + {"text": {"content": "Hello "}}, + {"text": {"content": "World"}}, + ], + }, + "Tags": { + "type": "multi_select", + "multi_select": [{"name": "bug"}, {"name": "urgent"}], + }, + "Priority": { + "type": "number", + "number": 5, + }, + "Done": { + "type": "checkbox", + "checkbox": True, + }, + "Due": { + "type": "date", + "date": {"start": "2024-06-01"}, + }, + "Progress": { + "type": "status", + "status": {"name": "In Progress"}, + }, + "EmptySelect": { + "type": "select", + "select": None, + }, + "EmptyDate": { + "type": "date", + "date": None, + }, + "EmptyStatus": { + "type": "status", + "status": None, + }, + }, + } + with ( + patch.dict("os.environ", ENV), + patch(f"{PATCH_BASE}.httpx.get", return_value=_mock_resp(data)), + ): + result = tool_fns["notion_get_page"](page_id="page-1") + + props = result["properties"] + assert props["Description"] == "Hello World" + assert props["Tags"] == ["bug", "urgent"] + assert props["Priority"] == 5 + assert props["Done"] is True + assert props["Due"] == "2024-06-01" + assert props["Progress"] == "In Progress" + assert props["EmptySelect"] == "" + assert props["EmptyDate"] == "" + assert props["EmptyStatus"] == "" + + +# --------------------------------------------------------------------------- +# notion_create_page +# --------------------------------------------------------------------------- + class TestNotionCreatePage: - def test_missing_params(self, tool_fns): + def test_missing_title(self, tool_fns): with patch.dict("os.environ", ENV): - result = tool_fns["notion_create_page"](parent_database_id="", title="") + result = tool_fns["notion_create_page"](title="") assert "error" in result + assert "title is required" in result["error"] + + def test_missing_parent(self, tool_fns): + with patch.dict("os.environ", ENV): + result = tool_fns["notion_create_page"](title="Test") + assert "error" in result + assert "parent_database_id or parent_page_id" in result["error"] + + def test_both_parents(self, tool_fns): + with patch.dict("os.environ", ENV): + result = tool_fns["notion_create_page"]( + title="Test", + parent_database_id="db-1", + parent_page_id="page-1", + ) + assert "error" in result + assert "not both" in result["error"] def test_successful_create(self, tool_fns): data = {"id": "new-page", "url": "https://notion.so/new-page"} with ( patch.dict("os.environ", ENV), - patch( - "aden_tools.tools.notion_tool.notion_tool.httpx.post", - return_value=_mock_resp(data, 201), - ), + patch(f"{PATCH_BASE}.httpx.post", return_value=_mock_resp(data, 201)), ): - result = tool_fns["notion_create_page"](parent_database_id="db-1", title="New Page") + result = tool_fns["notion_create_page"]( + parent_database_id="db-1", + title="New Page", + title_property="Name", + ) assert result["status"] == "created" assert result["id"] == "new-page" + def test_missing_title_property_for_database(self, tool_fns): + with patch.dict("os.environ", ENV): + result = tool_fns["notion_create_page"]( + parent_database_id="db-1", + title="New Page", + ) + + assert "error" in result + assert "title_property is required" in result["error"] + + def test_with_properties_json(self, tool_fns): + data = {"id": "new-page", "url": "https://notion.so/new-page"} + with ( + patch.dict("os.environ", ENV), + patch( + f"{PATCH_BASE}.httpx.post", + return_value=_mock_resp(data, 201), + ) as mock_post, + ): + result = tool_fns["notion_create_page"]( + parent_database_id="db-1", + title="New Page", + title_property="Name", + properties_json='{"Status": {"select": {"name": "Open"}}}', + ) + + assert result["status"] == "created" + body = mock_post.call_args.kwargs["json"] + assert body["properties"]["Status"] == {"select": {"name": "Open"}} + + def test_with_content(self, tool_fns): + data = {"id": "new-page", "url": "https://notion.so/new-page"} + with ( + patch.dict("os.environ", ENV), + patch( + f"{PATCH_BASE}.httpx.post", + return_value=_mock_resp(data, 201), + ) as mock_post, + ): + result = tool_fns["notion_create_page"]( + parent_database_id="db-1", + title="New Page", + title_property="Name", + content="Some body text", + ) + + assert result["status"] == "created" + body = mock_post.call_args.kwargs["json"] + assert len(body["children"]) == 1 + assert body["children"][0]["type"] == "paragraph" + + def test_custom_title_property(self, tool_fns): + data = {"id": "new-page", "url": "https://notion.so/new-page"} + with ( + patch.dict("os.environ", ENV), + patch( + f"{PATCH_BASE}.httpx.post", + return_value=_mock_resp(data, 201), + ) as mock_post, + ): + result = tool_fns["notion_create_page"]( + parent_database_id="db-1", + title="My Task", + title_property="Task name", + ) + + assert result["status"] == "created" + body = mock_post.call_args.kwargs["json"] + assert "Task name" in body["properties"] + assert "Name" not in body["properties"] + + def test_invalid_properties_json(self, tool_fns): + with patch.dict("os.environ", ENV): + result = tool_fns["notion_create_page"]( + parent_database_id="db-1", + title="New Page", + title_property="Name", + properties_json="not valid json{{{", + ) + assert "error" in result + assert "not valid JSON" in result["error"] + + def test_create_under_parent_page(self, tool_fns): + data = {"id": "child-page", "url": "https://notion.so/child-page"} + with ( + patch.dict("os.environ", ENV), + patch( + f"{PATCH_BASE}.httpx.post", + return_value=_mock_resp(data, 201), + ) as mock_post, + ): + result = tool_fns["notion_create_page"]( + parent_page_id="parent-page-1", + title="Child Page", + content="Some content", + ) + + assert result["status"] == "created" + assert result["id"] == "child-page" + body = mock_post.call_args.kwargs["json"] + assert body["parent"] == {"page_id": "parent-page-1"} + assert body["properties"]["title"]["title"][0]["text"]["content"] == "Child Page" + assert len(body["children"]) == 1 + + def test_create_under_parent_page_ignores_properties_json(self, tool_fns): + data = {"id": "child-page", "url": "https://notion.so/child-page"} + with ( + patch.dict("os.environ", ENV), + patch( + f"{PATCH_BASE}.httpx.post", + return_value=_mock_resp(data, 201), + ) as mock_post, + ): + result = tool_fns["notion_create_page"]( + parent_page_id="parent-page-1", + title="Child Page", + properties_json='{"Status": {"select": {"name": "Open"}}}', + ) + + assert result["status"] == "created" + body = mock_post.call_args.kwargs["json"] + # properties_json is ignored for page parents + assert "Status" not in body.get("properties", {}) + + +# --------------------------------------------------------------------------- +# notion_update_page +# --------------------------------------------------------------------------- + + +class TestNotionUpdatePage: + def test_missing_page_id(self, tool_fns): + with patch.dict("os.environ", ENV): + result = tool_fns["notion_update_page"](page_id="") + assert "error" in result + + def test_no_updates_provided(self, tool_fns): + with patch.dict("os.environ", ENV): + result = tool_fns["notion_update_page"](page_id="page-1") + assert "error" in result + assert "No updates" in result["error"] + + def test_successful_update_properties(self, tool_fns): + data = {"id": "page-1", "url": "https://notion.so/page-1"} + with ( + patch.dict("os.environ", ENV), + patch(f"{PATCH_BASE}.httpx.patch", return_value=_mock_resp(data)) as mock_patch, + ): + result = tool_fns["notion_update_page"]( + page_id="page-1", + properties_json='{"Status": {"select": {"name": "Done"}}}', + ) + + assert result["status"] == "updated" + body = mock_patch.call_args.kwargs["json"] + assert body["properties"]["Status"] == {"select": {"name": "Done"}} + + def test_archive_page(self, tool_fns): + data = {"id": "page-1", "url": "https://notion.so/page-1"} + with ( + patch.dict("os.environ", ENV), + patch(f"{PATCH_BASE}.httpx.patch", return_value=_mock_resp(data)) as mock_patch, + ): + result = tool_fns["notion_update_page"](page_id="page-1", archived=True) + + assert result["status"] == "updated" + body = mock_patch.call_args.kwargs["json"] + assert body["archived"] is True + + def test_invalid_properties_json(self, tool_fns): + with patch.dict("os.environ", ENV): + result = tool_fns["notion_update_page"]( + page_id="page-1", + properties_json="{bad json", + ) + assert "error" in result + assert "not valid JSON" in result["error"] + + def test_missing_credentials(self, tool_fns): + with patch.dict("os.environ", {}, clear=True): + result = tool_fns["notion_update_page"]( + page_id="page-1", + properties_json='{"Status": {"select": {"name": "Done"}}}', + ) + assert "error" in result + + +# --------------------------------------------------------------------------- +# notion_query_database +# --------------------------------------------------------------------------- + class TestNotionQueryDatabase: def test_missing_database_id(self, tool_fns): @@ -145,15 +579,99 @@ class TestNotionQueryDatabase: } with ( patch.dict("os.environ", ENV), - patch( - "aden_tools.tools.notion_tool.notion_tool.httpx.post", return_value=_mock_resp(data) - ), + patch(f"{PATCH_BASE}.httpx.post", return_value=_mock_resp(data)), ): result = tool_fns["notion_query_database"](database_id="db-1") assert result["count"] == 1 assert result["pages"][0]["title"] == "Task 1" + def test_with_filter_json(self, tool_fns): + data = {"results": [], "has_more": False} + with ( + patch.dict("os.environ", ENV), + patch(f"{PATCH_BASE}.httpx.post", return_value=_mock_resp(data)) as mock_post, + ): + tool_fns["notion_query_database"]( + database_id="db-1", + filter_json='{"property": "Status", "select": {"equals": "Done"}}', + ) + + body = mock_post.call_args.kwargs["json"] + assert body["filter"]["property"] == "Status" + + def test_invalid_filter_json(self, tool_fns): + with patch.dict("os.environ", ENV): + result = tool_fns["notion_query_database"]( + database_id="db-1", + filter_json="not json!!!", + ) + assert "error" in result + assert "not valid JSON" in result["error"] + + def test_page_size_clamped(self, tool_fns): + data = {"results": [], "has_more": False} + with ( + patch.dict("os.environ", ENV), + patch(f"{PATCH_BASE}.httpx.post", return_value=_mock_resp(data)) as mock_post, + ): + tool_fns["notion_query_database"](database_id="db-1", page_size=0) + assert mock_post.call_args.kwargs["json"]["page_size"] == 1 + + def test_with_sorts_json(self, tool_fns): + data = {"results": [], "has_more": False} + with ( + patch.dict("os.environ", ENV), + patch(f"{PATCH_BASE}.httpx.post", return_value=_mock_resp(data)) as mock_post, + ): + tool_fns["notion_query_database"]( + database_id="db-1", + sorts_json='[{"property": "Created", "direction": "descending"}]', + ) + + body = mock_post.call_args.kwargs["json"] + assert body["sorts"][0]["property"] == "Created" + assert body["sorts"][0]["direction"] == "descending" + + def test_invalid_sorts_json(self, tool_fns): + with patch.dict("os.environ", ENV): + result = tool_fns["notion_query_database"]( + database_id="db-1", + sorts_json="not json!!!", + ) + assert "error" in result + assert "not valid JSON" in result["error"] + + def test_with_start_cursor(self, tool_fns): + data = {"results": [], "has_more": False} + with ( + patch.dict("os.environ", ENV), + patch(f"{PATCH_BASE}.httpx.post", return_value=_mock_resp(data)) as mock_post, + ): + tool_fns["notion_query_database"]( + database_id="db-1", + start_cursor="cursor-abc-123", + ) + + body = mock_post.call_args.kwargs["json"] + assert body["start_cursor"] == "cursor-abc-123" + + def test_next_cursor_returned(self, tool_fns): + data = {"results": [], "has_more": True, "next_cursor": "cursor-next-456"} + with ( + patch.dict("os.environ", ENV), + patch(f"{PATCH_BASE}.httpx.post", return_value=_mock_resp(data)), + ): + result = tool_fns["notion_query_database"](database_id="db-1") + + assert result["has_more"] is True + assert result["next_cursor"] == "cursor-next-456" + + +# --------------------------------------------------------------------------- +# notion_get_database +# --------------------------------------------------------------------------- + class TestNotionGetDatabase: def test_missing_database_id(self, tool_fns): @@ -161,6 +679,11 @@ class TestNotionGetDatabase: result = tool_fns["notion_get_database"](database_id="") assert "error" in result + def test_missing_credentials(self, tool_fns): + with patch.dict("os.environ", {}, clear=True): + result = tool_fns["notion_get_database"](database_id="db-1") + assert "error" in result + def test_successful_get(self, tool_fns): data = { "id": "db-1", @@ -175,11 +698,465 @@ class TestNotionGetDatabase: } with ( patch.dict("os.environ", ENV), - patch( - "aden_tools.tools.notion_tool.notion_tool.httpx.get", return_value=_mock_resp(data) - ), + patch(f"{PATCH_BASE}.httpx.get", return_value=_mock_resp(data)), ): result = tool_fns["notion_get_database"](database_id="db-1") assert result["title"] == "Tasks" assert "Name" in result["properties"] + + +# --------------------------------------------------------------------------- +# notion_create_database +# --------------------------------------------------------------------------- + + +class TestNotionCreateDatabase: + def test_missing_params(self, tool_fns): + with patch.dict("os.environ", ENV): + result = tool_fns["notion_create_database"](parent_page_id="", title="") + assert "error" in result + + def test_missing_credentials(self, tool_fns): + with patch.dict("os.environ", {}, clear=True): + result = tool_fns["notion_create_database"](parent_page_id="page-1", title="My DB") + assert "error" in result + + def test_successful_create_default_properties(self, tool_fns): + data = {"id": "db-new", "url": "https://notion.so/db-new"} + with ( + patch.dict("os.environ", ENV), + patch( + f"{PATCH_BASE}.httpx.post", + return_value=_mock_resp(data, 201), + ) as mock_post, + ): + result = tool_fns["notion_create_database"](parent_page_id="page-1", title="Tasks") + + assert result["status"] == "created" + assert result["id"] == "db-new" + body = mock_post.call_args.kwargs["json"] + assert body["parent"]["page_id"] == "page-1" + assert "Name" in body["properties"] + assert body["properties"]["Name"] == {"title": {}} + + def test_with_extra_properties(self, tool_fns): + data = {"id": "db-new", "url": "https://notion.so/db-new"} + with ( + patch.dict("os.environ", ENV), + patch( + f"{PATCH_BASE}.httpx.post", + return_value=_mock_resp(data, 201), + ) as mock_post, + ): + result = tool_fns["notion_create_database"]( + parent_page_id="page-1", + title="Tasks", + properties_json='{"Priority": {"number": {}}}', + ) + + assert result["status"] == "created" + body = mock_post.call_args.kwargs["json"] + assert "Priority" in body["properties"] + assert "Name" in body["properties"] + + def test_invalid_properties_json(self, tool_fns): + with patch.dict("os.environ", ENV): + result = tool_fns["notion_create_database"]( + parent_page_id="page-1", + title="Tasks", + properties_json="{bad", + ) + assert "error" in result + assert "not valid JSON" in result["error"] + + +# --------------------------------------------------------------------------- +# notion_update_database +# --------------------------------------------------------------------------- + + +class TestNotionUpdateDatabase: + def test_missing_database_id(self, tool_fns): + with patch.dict("os.environ", ENV): + result = tool_fns["notion_update_database"](database_id="") + assert "error" in result + + def test_missing_credentials(self, tool_fns): + with patch.dict("os.environ", {}, clear=True): + result = tool_fns["notion_update_database"](database_id="db-1", title="New Title") + assert "error" in result + + def test_no_updates_provided(self, tool_fns): + with patch.dict("os.environ", ENV): + result = tool_fns["notion_update_database"](database_id="db-1") + assert "error" in result + assert "No updates" in result["error"] + + def test_update_title(self, tool_fns): + data = {"id": "db-1", "url": "https://notion.so/db-1"} + with ( + patch.dict("os.environ", ENV), + patch(f"{PATCH_BASE}.httpx.patch", return_value=_mock_resp(data)) as mock_patch, + ): + result = tool_fns["notion_update_database"](database_id="db-1", title="Renamed DB") + + assert result["status"] == "updated" + body = mock_patch.call_args.kwargs["json"] + assert body["title"][0]["text"]["content"] == "Renamed DB" + + def test_update_properties(self, tool_fns): + data = {"id": "db-1", "url": "https://notion.so/db-1"} + with ( + patch.dict("os.environ", ENV), + patch(f"{PATCH_BASE}.httpx.patch", return_value=_mock_resp(data)) as mock_patch, + ): + result = tool_fns["notion_update_database"]( + database_id="db-1", + properties_json='{"Priority": {"number": {}}}', + ) + + assert result["status"] == "updated" + body = mock_patch.call_args.kwargs["json"] + assert body["properties"]["Priority"] == {"number": {}} + + def test_archive_database(self, tool_fns): + data = {"id": "db-1", "url": "https://notion.so/db-1"} + with ( + patch.dict("os.environ", ENV), + patch(f"{PATCH_BASE}.httpx.patch", return_value=_mock_resp(data)) as mock_patch, + ): + result = tool_fns["notion_update_database"](database_id="db-1", archived=True) + + assert result["status"] == "updated" + body = mock_patch.call_args.kwargs["json"] + assert body["archived"] is True + + def test_invalid_properties_json(self, tool_fns): + with patch.dict("os.environ", ENV): + result = tool_fns["notion_update_database"]( + database_id="db-1", + properties_json="not json", + ) + assert "error" in result + assert "not valid JSON" in result["error"] + + +# --------------------------------------------------------------------------- +# notion_get_block_children +# --------------------------------------------------------------------------- + + +class TestNotionGetBlockChildren: + def test_missing_block_id(self, tool_fns): + with patch.dict("os.environ", ENV): + result = tool_fns["notion_get_block_children"](block_id="") + assert "error" in result + + def test_missing_credentials(self, tool_fns): + with patch.dict("os.environ", {}, clear=True): + result = tool_fns["notion_get_block_children"](block_id="page-1") + assert "error" in result + + def test_successful_get(self, tool_fns): + data = { + "results": [ + { + "id": "block-1", + "type": "paragraph", + "has_children": False, + "paragraph": { + "rich_text": [{"text": {"content": "Hello world"}}], + }, + }, + { + "id": "block-2", + "type": "heading_2", + "has_children": False, + "heading_2": { + "rich_text": [{"text": {"content": "Section"}}], + }, + }, + { + "id": "block-3", + "type": "divider", + "has_children": False, + "divider": {}, + }, + ], + "has_more": False, + } + with ( + patch.dict("os.environ", ENV), + patch(f"{PATCH_BASE}.httpx.get", return_value=_mock_resp(data)), + ): + result = tool_fns["notion_get_block_children"](block_id="page-1") + + assert result["count"] == 3 + assert result["blocks"][0]["text"] == "Hello world" + assert result["blocks"][1]["text"] == "Section" + # divider has no rich_text, so no "text" key + assert "text" not in result["blocks"][2] + + +# --------------------------------------------------------------------------- +# notion_get_block +# --------------------------------------------------------------------------- + + +class TestNotionGetBlock: + def test_missing_block_id(self, tool_fns): + with patch.dict("os.environ", ENV): + result = tool_fns["notion_get_block"](block_id="") + assert "error" in result + + def test_missing_credentials(self, tool_fns): + with patch.dict("os.environ", {}, clear=True): + result = tool_fns["notion_get_block"](block_id="block-1") + assert "error" in result + + def test_successful_get_paragraph(self, tool_fns): + data = { + "id": "block-1", + "type": "paragraph", + "has_children": False, + "archived": False, + "created_time": "2024-01-01T00:00:00Z", + "last_edited_time": "2024-01-15T00:00:00Z", + "paragraph": { + "rich_text": [{"text": {"content": "Hello world"}}], + }, + } + with ( + patch.dict("os.environ", ENV), + patch(f"{PATCH_BASE}.httpx.get", return_value=_mock_resp(data)), + ): + result = tool_fns["notion_get_block"](block_id="block-1") + + assert result["id"] == "block-1" + assert result["type"] == "paragraph" + assert result["text"] == "Hello world" + assert result["archived"] is False + + def test_block_without_text(self, tool_fns): + data = { + "id": "block-2", + "type": "divider", + "has_children": False, + "archived": False, + "created_time": "2024-01-01T00:00:00Z", + "last_edited_time": "2024-01-15T00:00:00Z", + "divider": {}, + } + with ( + patch.dict("os.environ", ENV), + patch(f"{PATCH_BASE}.httpx.get", return_value=_mock_resp(data)), + ): + result = tool_fns["notion_get_block"](block_id="block-2") + + assert result["type"] == "divider" + assert "text" not in result + + +# --------------------------------------------------------------------------- +# notion_update_block +# --------------------------------------------------------------------------- + + +class TestNotionUpdateBlock: + def test_missing_block_id(self, tool_fns): + with patch.dict("os.environ", ENV): + result = tool_fns["notion_update_block"](block_id="") + assert "error" in result + + def test_missing_credentials(self, tool_fns): + with patch.dict("os.environ", {}, clear=True): + result = tool_fns["notion_update_block"]( + block_id="block-1", content="text", block_type="paragraph" + ) + assert "error" in result + + def test_no_updates_provided(self, tool_fns): + with patch.dict("os.environ", ENV): + result = tool_fns["notion_update_block"](block_id="block-1") + assert "error" in result + assert "No updates" in result["error"] + + def test_content_without_block_type(self, tool_fns): + with patch.dict("os.environ", ENV): + result = tool_fns["notion_update_block"](block_id="block-1", content="new text") + assert "error" in result + assert "block_type is required" in result["error"] + + def test_successful_content_update(self, tool_fns): + data = {"id": "block-1", "type": "paragraph"} + with ( + patch.dict("os.environ", ENV), + patch(f"{PATCH_BASE}.httpx.patch", return_value=_mock_resp(data)) as mock_patch, + ): + result = tool_fns["notion_update_block"]( + block_id="block-1", content="Updated text", block_type="paragraph" + ) + + assert result["status"] == "updated" + body = mock_patch.call_args.kwargs["json"] + assert body["paragraph"]["rich_text"][0]["text"]["content"] == "Updated text" + + def test_invalid_block_type(self, tool_fns): + with patch.dict("os.environ", ENV): + result = tool_fns["notion_update_block"]( + block_id="block-1", content="text", block_type="invalid_type" + ) + assert "error" in result + assert "Invalid block_type" in result["error"] + + def test_archive_block(self, tool_fns): + data = {"id": "block-1", "type": "paragraph"} + with ( + patch.dict("os.environ", ENV), + patch(f"{PATCH_BASE}.httpx.patch", return_value=_mock_resp(data)) as mock_patch, + ): + result = tool_fns["notion_update_block"](block_id="block-1", archived=True) + + assert result["status"] == "updated" + body = mock_patch.call_args.kwargs["json"] + assert body["archived"] is True + + +# --------------------------------------------------------------------------- +# notion_delete_block +# --------------------------------------------------------------------------- + + +class TestNotionDeleteBlock: + def test_missing_block_id(self, tool_fns): + with patch.dict("os.environ", ENV): + result = tool_fns["notion_delete_block"](block_id="") + assert "error" in result + + def test_missing_credentials(self, tool_fns): + with patch.dict("os.environ", {}, clear=True): + result = tool_fns["notion_delete_block"](block_id="block-1") + assert "error" in result + + def test_successful_delete(self, tool_fns): + data = {"id": "block-1", "archived": True} + with ( + patch.dict("os.environ", ENV), + patch(f"{PATCH_BASE}.httpx.delete", return_value=_mock_resp(data)), + ): + result = tool_fns["notion_delete_block"](block_id="block-1") + + assert result["status"] == "deleted" + assert result["id"] == "block-1" + + +# --------------------------------------------------------------------------- +# notion_append_blocks +# --------------------------------------------------------------------------- + + +class TestNotionAppendBlocks: + def test_missing_params(self, tool_fns): + with patch.dict("os.environ", ENV): + result = tool_fns["notion_append_blocks"](block_id="", content="") + assert "error" in result + + def test_missing_credentials(self, tool_fns): + with patch.dict("os.environ", {}, clear=True): + result = tool_fns["notion_append_blocks"](block_id="page-1", content="text") + assert "error" in result + + def test_successful_append(self, tool_fns): + data = {"results": []} + with ( + patch.dict("os.environ", ENV), + patch(f"{PATCH_BASE}.httpx.patch", return_value=_mock_resp(data)) as mock_patch, + ): + result = tool_fns["notion_append_blocks"]( + block_id="page-1", + content="First paragraph\nSecond paragraph", + ) + + assert result["status"] == "appended" + assert result["blocks_added"] == 2 + assert result["block_id"] == "page-1" + body = mock_patch.call_args.kwargs["json"] + assert len(body["children"]) == 2 + assert body["children"][0]["type"] == "paragraph" + + def test_blank_lines_stripped(self, tool_fns): + data = {"results": []} + with ( + patch.dict("os.environ", ENV), + patch(f"{PATCH_BASE}.httpx.patch", return_value=_mock_resp(data)) as mock_patch, + ): + result = tool_fns["notion_append_blocks"]( + block_id="page-1", + content="Line one\n\n\nLine two", + ) + + assert result["blocks_added"] == 2 + body = mock_patch.call_args.kwargs["json"] + assert len(body["children"]) == 2 + + def test_only_blank_lines(self, tool_fns): + with patch.dict("os.environ", ENV): + result = tool_fns["notion_append_blocks"]( + block_id="page-1", + content="\n\n\n", + ) + assert "error" in result + assert "empty" in result["error"] + + def test_block_type_heading(self, tool_fns): + data = {"results": []} + with ( + patch.dict("os.environ", ENV), + patch(f"{PATCH_BASE}.httpx.patch", return_value=_mock_resp(data)) as mock_patch, + ): + result = tool_fns["notion_append_blocks"]( + block_id="page-1", + content="Section Title", + block_type="heading_1", + ) + + assert result["blocks_added"] == 1 + body = mock_patch.call_args.kwargs["json"] + assert body["children"][0]["type"] == "heading_1" + + def test_block_type_to_do(self, tool_fns): + data = {"results": []} + with ( + patch.dict("os.environ", ENV), + patch(f"{PATCH_BASE}.httpx.patch", return_value=_mock_resp(data)) as mock_patch, + ): + result = tool_fns["notion_append_blocks"]( + block_id="page-1", + content="Buy milk\nWalk the dog", + block_type="to_do", + ) + + assert result["blocks_added"] == 2 + body = mock_patch.call_args.kwargs["json"] + assert body["children"][0]["type"] == "to_do" + assert body["children"][0]["to_do"]["checked"] is False + + def test_invalid_block_type(self, tool_fns): + with patch.dict("os.environ", ENV): + result = tool_fns["notion_append_blocks"]( + block_id="page-1", + content="text", + block_type="invalid_type", + ) + assert "error" in result + assert "Invalid block_type" in result["error"] + + def test_exceeds_100_block_limit(self, tool_fns): + with patch.dict("os.environ", ENV): + result = tool_fns["notion_append_blocks"]( + block_id="page-1", + content="\n".join(f"line {i}" for i in range(101)), + ) + assert "error" in result + assert "100" in result["error"] From 2398e04e112486fd1c55061da2d4f90aebabdb2d Mon Sep 17 00:00:00 2001 From: Antiarin Date: Wed, 11 Mar 2026 02:45:41 +0530 Subject: [PATCH 03/45] docs(notion): add README for Notion tool with setup instructions and usage examples - Introduced a comprehensive README.md for the Notion tool. - Included setup instructions for the Notion API token and credential store configuration. - Documented available tools and their functionalities. - Provided usage examples for searching, creating, updating, and managing pages and databases. --- .../aden_tools/tools/notion_tool/README.md | 270 ++++++++++++++++++ 1 file changed, 270 insertions(+) create mode 100644 tools/src/aden_tools/tools/notion_tool/README.md diff --git a/tools/src/aden_tools/tools/notion_tool/README.md b/tools/src/aden_tools/tools/notion_tool/README.md new file mode 100644 index 00000000..61aba700 --- /dev/null +++ b/tools/src/aden_tools/tools/notion_tool/README.md @@ -0,0 +1,270 @@ +# Notion Tool + +Search pages, retrieve and update page content, create pages, manage databases, and manipulate blocks via the Notion API. + +## Setup + +```bash +# Required - Internal Integration Token +export NOTION_API_TOKEN=your-notion-integration-token +``` + +**Get your token:** +1. Go to https://www.notion.so/my-integrations +2. Click "New integration" and give it a name +3. Copy the "Internal Integration Secret" +4. Set `NOTION_API_TOKEN` environment variable + +**Important:** You must share each page or database with your integration. Open the page in Notion, click the `...` menu, select "Connections", and add your integration. + +Alternatively, configure via the credential store (`CredentialStoreAdapter`) using the key `notion_token`. + +## Tools (13) + +| Tool | Description | +|------|-------------| +| `notion_search` | Search Notion pages and databases by title | +| `notion_get_page` | Get a page by ID with simplified properties | +| `notion_create_page` | Create a new page in a database | +| `notion_update_page` | Update a page's properties or archive/unarchive it | +| `notion_query_database` | Query rows/pages from a database with filters, sorts, and pagination | +| `notion_get_database` | Get a database schema (property names and types) | +| `notion_create_database` | Create a new database as a child of a page | +| `notion_update_database` | Update a database's title, properties, or archive it | +| `notion_get_block_children` | Get child blocks (content) of a page or block | +| `notion_get_block` | Retrieve a single block by ID | +| `notion_update_block` | Update a block's content or archive it | +| `notion_delete_block` | Delete a block (moves to trash) | +| `notion_append_blocks` | Append content blocks (paragraphs, headings, lists, todos, quotes) to a page or block | + +## Usage + +### Search pages and databases + +```python +# Search by title text +result = notion_search(query="Meeting Notes") + +# Filter to only databases +result = notion_search(query="Tasks", filter_type="database") + +# List all accessible pages (empty query) +result = notion_search(page_size=50) +``` + +### Get a page + +```python +# Retrieve page details with simplified properties +result = notion_get_page(page_id="a1b2c3d4-e5f6-7890-abcd-ef1234567890") +# Returns id, title, url, properties (title, rich_text, select, multi_select, +# number, checkbox, date, status) +``` + +### Create a page + +When creating a page in a database, you must provide `title_property` (the +name of the database's title column). Use `notion_get_database` to find it +first. The `title_property` parameter is ignored when using `parent_page_id`. + +```python +# Step 1: Find the database's title property name +schema = notion_get_database(database_id="a1b2c3d4-e5f6-7890-abcd-ef1234567890") +# schema["properties"] -> {"Task name": {"type": "title"}, "Status": {"type": "status"}, ...} + +# Step 2: Create a page using the correct title property +result = notion_create_page( + title="Weekly Standup Notes", + parent_database_id="a1b2c3d4-e5f6-7890-abcd-ef1234567890", + title_property="Task name", +) + +# Create with additional properties and body content +result = notion_create_page( + title="Bug Report: Login Timeout", + parent_database_id="a1b2c3d4-e5f6-7890-abcd-ef1234567890", + title_property="Task name", + properties_json='{"Status": {"select": {"name": "Open"}}}', + content="Users are experiencing timeouts when logging in during peak hours.", +) + +# Create a page as a child of another page (no title_property needed) +result = notion_create_page( + title="Meeting Notes - March 10", + parent_page_id="a1b2c3d4-e5f6-7890-abcd-ef1234567890", + content="Discussion points and action items.", +) +``` + +### Update a page + +```python +# Update properties +result = notion_update_page( + page_id="a1b2c3d4-e5f6-7890-abcd-ef1234567890", + properties_json='{"Status": {"select": {"name": "Done"}}}' +) + +# Archive a page +result = notion_update_page( + page_id="a1b2c3d4-e5f6-7890-abcd-ef1234567890", + archived=True +) +``` + +### Query a database + +```python +# Get all rows from a database +result = notion_query_database( + database_id="a1b2c3d4-e5f6-7890-abcd-ef1234567890" +) + +# Query with a filter +result = notion_query_database( + database_id="a1b2c3d4-e5f6-7890-abcd-ef1234567890", + filter_json='{"property": "Status", "select": {"equals": "In Progress"}}', + page_size=25 +) + +# Sort results +result = notion_query_database( + database_id="a1b2c3d4-e5f6-7890-abcd-ef1234567890", + sorts_json='[{"property": "Created", "direction": "descending"}]' +) + +# Paginate through results +result = notion_query_database( + database_id="a1b2c3d4-e5f6-7890-abcd-ef1234567890", + start_cursor=previous_result["next_cursor"] +) +``` + +### Get a database schema + +```python +# Retrieve property names and types for a database +result = notion_get_database( + database_id="a1b2c3d4-e5f6-7890-abcd-ef1234567890" +) +# Returns id, title, url, properties (each with type and id) +``` + +### Create a database + +```python +# Create a database with default Name column +result = notion_create_database( + parent_page_id="a1b2c3d4-e5f6-7890-abcd-ef1234567890", + title="Project Tasks" +) + +# Create with custom columns +result = notion_create_database( + parent_page_id="a1b2c3d4-e5f6-7890-abcd-ef1234567890", + title="Bug Tracker", + properties_json='{"Status": {"select": {"options": [{"name": "Open"}, {"name": "Closed"}]}}, "Priority": {"number": {}}}' +) +``` + +### Update or delete a database + +```python +# Rename a database +result = notion_update_database( + database_id="a1b2c3d4-e5f6-7890-abcd-ef1234567890", + title="Renamed Database" +) + +# Add a new column +result = notion_update_database( + database_id="a1b2c3d4-e5f6-7890-abcd-ef1234567890", + properties_json='{"Priority": {"number": {}}}' +) + +# Archive (delete) a database +result = notion_update_database( + database_id="a1b2c3d4-e5f6-7890-abcd-ef1234567890", + archived=True +) +``` + +### Read page content (block tree) + +```python +# Get the body content (blocks) of a page +result = notion_get_block_children( + block_id="a1b2c3d4-e5f6-7890-abcd-ef1234567890" +) +# Returns blocks with type, text content, and has_children indicator +``` + +### Get, update, or delete a block + +```python +# Get a single block +result = notion_get_block(block_id="a1b2c3d4-e5f6-7890-abcd-ef1234567890") +# Returns id, type, text, has_children, archived + +# Update block content (must specify the block's type) +result = notion_update_block( + block_id="a1b2c3d4-e5f6-7890-abcd-ef1234567890", + content="Updated paragraph text", + block_type="paragraph" +) + +# Archive a block (soft-delete) +result = notion_update_block( + block_id="a1b2c3d4-e5f6-7890-abcd-ef1234567890", + archived=True +) + +# Delete a block (moves to trash) +result = notion_delete_block(block_id="a1b2c3d4-e5f6-7890-abcd-ef1234567890") +``` + +### Append content to a page + +```python +# Add paragraphs to a page (newlines create separate blocks) +result = notion_append_blocks( + block_id="a1b2c3d4-e5f6-7890-abcd-ef1234567890", + content="First paragraph\nSecond paragraph" +) + +# Add a heading +result = notion_append_blocks( + block_id="a1b2c3d4-e5f6-7890-abcd-ef1234567890", + content="Section Title", + block_type="heading_1" +) + +# Add a to-do list +result = notion_append_blocks( + block_id="a1b2c3d4-e5f6-7890-abcd-ef1234567890", + content="Buy groceries\nClean the house\nWalk the dog", + block_type="to_do" +) + +# Supported block types: paragraph, heading_1, heading_2, heading_3, +# bulleted_list_item, numbered_list_item, to_do, quote, callout +# Max 100 blocks per request +``` + +## Error Handling + +| Error | Cause | +|-------|-------| +| `Unauthorized` | Invalid or missing integration token | +| `Forbidden` | Page/database not shared with the integration | +| `Not found` | Page/database does not exist or is not shared | +| `Rate limited` | Too many requests, retry after a short wait | +| `Request timed out` | Request exceeded the 30-second timeout | + +## Rate Limits + +The Notion API enforces rate limits of approximately 3 requests per second per integration. When rate limited, the tool returns `{"error": "Rate limited. Try again shortly."}`. Callers should wait a few seconds before retrying. + +## API Reference + +- [Notion API Docs](https://developers.notion.com/reference) From 5076278dcb45c53656f90a55aa89493e8f7577cf Mon Sep 17 00:00:00 2001 From: Antiarin Date: Wed, 11 Mar 2026 02:45:51 +0530 Subject: [PATCH 04/45] feat(notion): register Notion tool in verified and unverified registration functions - Added the Notion tool registration to the _register_verified function. - Removed the Notion tool registration from the _register_unverified function to ensure proper handling. --- tools/src/aden_tools/tools/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/src/aden_tools/tools/__init__.py b/tools/src/aden_tools/tools/__init__.py index bedbe238..68ec6090 100644 --- a/tools/src/aden_tools/tools/__init__.py +++ b/tools/src/aden_tools/tools/__init__.py @@ -223,6 +223,7 @@ def _register_verified( register_telegram(mcp, credentials=credentials) register_google_docs(mcp, credentials=credentials) register_google_maps(mcp, credentials=credentials) + register_notion(mcp, credentials=credentials) register_account_info(mcp, credentials=credentials) @@ -272,7 +273,6 @@ def _register_unverified( register_microsoft_graph(mcp, credentials=credentials) register_mongodb(mcp, credentials=credentials) register_n8n(mcp, credentials=credentials) - register_notion(mcp, credentials=credentials) register_obsidian(mcp, credentials=credentials) register_pagerduty(mcp, credentials=credentials) register_pinecone(mcp, credentials=credentials) From cdaec8a83790935af57869f59eba38938cbd824c Mon Sep 17 00:00:00 2001 From: Timothy Date: Fri, 13 Mar 2026 18:56:34 -0700 Subject: [PATCH 05/45] feat: agent skills --- core/framework/graph/event_loop_node.py | 8 + core/framework/graph/executor.py | 33 ++- core/framework/graph/node.py | 4 + core/framework/graph/prompt_composer.py | 14 +- core/framework/runner/runner.py | 61 ++++- core/framework/runtime/agent_runtime.py | 28 ++- core/framework/runtime/execution_stream.py | 8 + core/framework/skills/__init__.py | 23 ++ .../_default_skills/batch-ledger/SKILL.md | 24 ++ .../context-preservation/SKILL.md | 22 ++ .../_default_skills/error-recovery/SKILL.md | 18 ++ .../_default_skills/note-taking/SKILL.md | 27 +++ .../_default_skills/quality-monitor/SKILL.md | 20 ++ .../task-decomposition/SKILL.md | 17 ++ core/framework/skills/catalog.py | 109 +++++++++ core/framework/skills/config.py | 99 ++++++++ core/framework/skills/defaults.py | 151 ++++++++++++ core/framework/skills/discovery.py | 182 +++++++++++++++ core/framework/skills/parser.py | 160 +++++++++++++ core/framework/tools/queen_lifecycle_tools.py | 6 +- core/tests/test_default_skills.py | 190 +++++++++++++++ core/tests/test_skill_catalog.py | 172 ++++++++++++++ core/tests/test_skill_discovery.py | 145 ++++++++++++ core/tests/test_skill_integration.py | 218 ++++++++++++++++++ core/tests/test_skill_parser.py | 180 +++++++++++++++ tools/src/aden_tools/credentials/slack.py | 2 +- 26 files changed, 1907 insertions(+), 14 deletions(-) create mode 100644 core/framework/skills/__init__.py create mode 100644 core/framework/skills/_default_skills/batch-ledger/SKILL.md create mode 100644 core/framework/skills/_default_skills/context-preservation/SKILL.md create mode 100644 core/framework/skills/_default_skills/error-recovery/SKILL.md create mode 100644 core/framework/skills/_default_skills/note-taking/SKILL.md create mode 100644 core/framework/skills/_default_skills/quality-monitor/SKILL.md create mode 100644 core/framework/skills/_default_skills/task-decomposition/SKILL.md create mode 100644 core/framework/skills/catalog.py create mode 100644 core/framework/skills/config.py create mode 100644 core/framework/skills/defaults.py create mode 100644 core/framework/skills/discovery.py create mode 100644 core/framework/skills/parser.py create mode 100644 core/tests/test_default_skills.py create mode 100644 core/tests/test_skill_catalog.py create mode 100644 core/tests/test_skill_discovery.py create mode 100644 core/tests/test_skill_integration.py create mode 100644 core/tests/test_skill_parser.py diff --git a/core/framework/graph/event_loop_node.py b/core/framework/graph/event_loop_node.py index 8df47104..b4ff9571 100644 --- a/core/framework/graph/event_loop_node.py +++ b/core/framework/graph/event_loop_node.py @@ -473,6 +473,8 @@ class EventLoopNode(NodeProtocol): focus_prompt=ctx.node_spec.system_prompt, narrative=ctx.narrative or None, accounts_prompt=ctx.accounts_prompt or None, + skills_catalog_prompt=ctx.skills_catalog_prompt or None, + protocols_prompt=ctx.protocols_prompt or None, ) if conversation.system_prompt != _current_prompt: conversation.update_system_prompt(_current_prompt) @@ -494,6 +496,12 @@ class EventLoopNode(NodeProtocol): if ctx.accounts_prompt: system_prompt = f"{system_prompt}\n\n{ctx.accounts_prompt}" + # Append skill catalog and operational protocols + if ctx.skills_catalog_prompt: + system_prompt = f"{system_prompt}\n\n{ctx.skills_catalog_prompt}" + if ctx.protocols_prompt: + system_prompt = f"{system_prompt}\n\n{ctx.protocols_prompt}" + # Inject agent working memory (adapt.md). # If it doesn't exist yet, seed it with available context. if self._config.spillover_dir: diff --git a/core/framework/graph/executor.py b/core/framework/graph/executor.py index d87b50cf..681bec2b 100644 --- a/core/framework/graph/executor.py +++ b/core/framework/graph/executor.py @@ -149,6 +149,8 @@ class GraphExecutor: dynamic_tools_provider: Callable | None = None, dynamic_prompt_provider: Callable | None = None, iteration_metadata_provider: Callable | None = None, + skills_catalog_prompt: str = "", + protocols_prompt: str = "", ): """ Initialize the executor. @@ -174,6 +176,8 @@ class GraphExecutor: tool list (for mode switching) dynamic_prompt_provider: Optional callback returning current system prompt (for phase switching) + skills_catalog_prompt: Available skills catalog for system prompt + protocols_prompt: Default skill operational protocols for system prompt """ self.runtime = runtime self.llm = llm @@ -195,6 +199,8 @@ class GraphExecutor: self.dynamic_tools_provider = dynamic_tools_provider self.dynamic_prompt_provider = dynamic_prompt_provider self.iteration_metadata_provider = iteration_metadata_provider + self.skills_catalog_prompt = skills_catalog_prompt + self.protocols_prompt = protocols_prompt # Parallel execution settings self.enable_parallel_execution = enable_parallel_execution @@ -1765,10 +1771,29 @@ class GraphExecutor: if node_spec.tools: available_tools = [t for t in self.tools if t.name in node_spec.tools] - # Create scoped memory view + # Create scoped memory view. + # When permissions are restricted (non-empty key lists), auto-include + # _-prefixed keys used by default skill protocols so agents can read/write + # operational state (e.g. _working_notes, _batch_ledger) regardless of + # what the node declares. When key lists are empty (unrestricted), leave + # unchanged — empty means "allow all". + read_keys = list(node_spec.input_keys) + write_keys = list(node_spec.output_keys) + if read_keys or write_keys: + from framework.skills.defaults import SHARED_MEMORY_KEYS as _skill_keys + + # Also include any _-prefixed keys already written to memory + existing_underscore = [k for k in memory._data if k.startswith("_")] + extra_keys = set(_skill_keys) | set(existing_underscore) + for k in extra_keys: + if k not in read_keys: + read_keys.append(k) + if k not in write_keys: + write_keys.append(k) + scoped_memory = memory.with_permissions( - read_keys=node_spec.input_keys, - write_keys=node_spec.output_keys, + read_keys=read_keys, + write_keys=write_keys, ) # Build per-node accounts prompt (filtered to this node's tools) @@ -1812,6 +1837,8 @@ class GraphExecutor: dynamic_tools_provider=self.dynamic_tools_provider, dynamic_prompt_provider=self.dynamic_prompt_provider, iteration_metadata_provider=self.iteration_metadata_provider, + skills_catalog_prompt=self.skills_catalog_prompt, + protocols_prompt=self.protocols_prompt, ) VALID_NODE_TYPES = { diff --git a/core/framework/graph/node.py b/core/framework/graph/node.py index ea2182b8..46695be9 100644 --- a/core/framework/graph/node.py +++ b/core/framework/graph/node.py @@ -565,6 +565,10 @@ class NodeContext: # staging / running) without restarting the conversation. dynamic_prompt_provider: Any = None # Callable[[], str] | None + # Skill system prompts — injected by the skill discovery pipeline + skills_catalog_prompt: str = "" # Available skills XML catalog + protocols_prompt: str = "" # Default skill operational protocols + # Per-iteration metadata provider — when set, EventLoopNode merges # the returned dict into node_loop_iteration event data. Used by # the queen to record the current phase per iteration. diff --git a/core/framework/graph/prompt_composer.py b/core/framework/graph/prompt_composer.py index 12b6f177..bdf37f31 100644 --- a/core/framework/graph/prompt_composer.py +++ b/core/framework/graph/prompt_composer.py @@ -140,14 +140,18 @@ def compose_system_prompt( focus_prompt: str | None, narrative: str | None = None, accounts_prompt: str | None = None, + skills_catalog_prompt: str | None = None, + protocols_prompt: str | None = None, ) -> str: - """Compose the three-layer system prompt. + """Compose the multi-layer system prompt. Args: identity_prompt: Layer 1 — static agent identity (from GraphSpec). focus_prompt: Layer 3 — per-node focus directive (from NodeSpec.system_prompt). narrative: Layer 2 — auto-generated from conversation state. accounts_prompt: Connected accounts block (sits between identity and narrative). + skills_catalog_prompt: Available skills catalog XML (Agent Skills standard). + protocols_prompt: Default skill operational protocols section. Returns: Composed system prompt with all layers present, plus current datetime. @@ -162,6 +166,14 @@ def compose_system_prompt( if accounts_prompt: parts.append(f"\n{accounts_prompt}") + # Skills catalog (discovered skills available for activation) + if skills_catalog_prompt: + parts.append(f"\n{skills_catalog_prompt}") + + # Operational protocols (default skill behavioral guidance) + if protocols_prompt: + parts.append(f"\n{protocols_prompt}") + # Layer 2: Narrative (what's happened so far) if narrative: parts.append(f"\n--- Context (what has happened so far) ---\n{narrative}") diff --git a/core/framework/runner/runner.py b/core/framework/runner/runner.py index 6a5cc2d4..b8792c17 100644 --- a/core/framework/runner/runner.py +++ b/core/framework/runner/runner.py @@ -959,6 +959,10 @@ class AgentRunner: graph = GraphSpec(**graph_kwargs) + # Read skill configuration from agent module + agent_default_skills = getattr(agent_module, "default_skills", None) + agent_skills = getattr(agent_module, "skills", None) + # Read runtime config (webhook settings, etc.) if defined agent_runtime_config = getattr(agent_module, "runtime_config", None) @@ -970,7 +974,7 @@ class AgentRunner: configure_fn = getattr(agent_module, "configure_for_account", None) list_accts_fn = getattr(agent_module, "list_connected_accounts", None) - return cls( + runner = cls( agent_path=agent_path, graph=graph, goal=goal, @@ -986,6 +990,10 @@ class AgentRunner: list_accounts=list_accts_fn, credential_store=credential_store, ) + # Stash skill config for use in _setup() + runner._agent_default_skills = agent_default_skills + runner._agent_skills = agent_skills + return runner # Fallback: load from agent.json (legacy JSON-based agents) agent_json_path = agent_path / "agent.json" @@ -1003,7 +1011,7 @@ class AgentRunner: except json.JSONDecodeError as exc: raise ValueError(f"Invalid JSON in agent export file: {agent_json_path}") from exc - return cls( + runner = cls( agent_path=agent_path, graph=graph, goal=goal, @@ -1014,6 +1022,9 @@ class AgentRunner: skip_credential_validation=skip_credential_validation or False, credential_store=credential_store, ) + runner._agent_default_skills = None + runner._agent_skills = None + return runner def register_tool( self, @@ -1323,6 +1334,46 @@ class AgentRunner: except Exception: pass # Best-effort — agent works without account info + # Skill discovery and default skill loading + skills_catalog_prompt = "" + protocols_prompt = "" + try: + from framework.skills.config import SkillsConfig + from framework.skills.catalog import SkillCatalog + from framework.skills.defaults import DefaultSkillManager + from framework.skills.discovery import DiscoveryConfig, SkillDiscovery + + # Build skills config from agent module vars + skills_config = SkillsConfig.from_agent_vars( + default_skills=getattr(self, "_agent_default_skills", None), + skills=getattr(self, "_agent_skills", None), + ) + + # Discover community skills + discovery = SkillDiscovery(DiscoveryConfig(project_root=self.agent_path)) + discovered = discovery.discover() + + # Build catalog (community skills only — defaults handled separately) + catalog = SkillCatalog(discovered) + skills_catalog_prompt = catalog.to_prompt() + + # Handle pre-activated skills + if skills_config.skills: + pre_activated = catalog.build_pre_activated_prompt(skills_config.skills) + if pre_activated: + if skills_catalog_prompt: + skills_catalog_prompt = f"{skills_catalog_prompt}\n\n{pre_activated}" + else: + skills_catalog_prompt = pre_activated + + # Load and configure default skills + default_mgr = DefaultSkillManager(config=skills_config) + default_mgr.load() + default_mgr.log_active_skills() + protocols_prompt = default_mgr.build_protocols_prompt() + except Exception: + logger.debug("Skill system init failed (non-fatal)", exc_info=True) + self._setup_agent_runtime( tools, tool_executor, @@ -1330,6 +1381,8 @@ class AgentRunner: accounts_data=accounts_data, tool_provider_map=tool_provider_map, event_bus=event_bus, + skills_catalog_prompt=skills_catalog_prompt, + protocols_prompt=protocols_prompt, ) def _get_api_key_env_var(self, model: str) -> str | None: @@ -1425,6 +1478,8 @@ class AgentRunner: accounts_data: list[dict] | None = None, tool_provider_map: dict[str, str] | None = None, event_bus=None, + skills_catalog_prompt: str = "", + protocols_prompt: str = "", ) -> None: """Set up multi-entry-point execution using AgentRuntime.""" entry_points = [] @@ -1484,6 +1539,8 @@ class AgentRunner: accounts_data=accounts_data, tool_provider_map=tool_provider_map, event_bus=event_bus, + skills_catalog_prompt=skills_catalog_prompt, + protocols_prompt=protocols_prompt, ) # Pass intro_message through for TUI display diff --git a/core/framework/runtime/agent_runtime.py b/core/framework/runtime/agent_runtime.py index 7caeecd4..9714c33e 100644 --- a/core/framework/runtime/agent_runtime.py +++ b/core/framework/runtime/agent_runtime.py @@ -132,6 +132,8 @@ class AgentRuntime: accounts_data: list[dict] | None = None, tool_provider_map: dict[str, str] | None = None, event_bus: "EventBus | None" = None, + skills_catalog_prompt: str = "", + protocols_prompt: str = "", ): """ Initialize agent runtime. @@ -153,6 +155,8 @@ class AgentRuntime: event_bus: Optional external EventBus. If provided, the runtime shares this bus instead of creating its own. Used by SessionManager to share a single bus between queen, worker, and judge. + skills_catalog_prompt: Available skills catalog for system prompt + protocols_prompt: Default skill operational protocols for system prompt """ self.graph = graph self.goal = goal @@ -160,6 +164,8 @@ class AgentRuntime: self._runtime_log_store = runtime_log_store self._checkpoint_config = checkpoint_config self.accounts_prompt = accounts_prompt + self.skills_catalog_prompt = skills_catalog_prompt + self.protocols_prompt = protocols_prompt # Primary graph identity self._graph_id: str = graph_id or "primary" @@ -293,6 +299,8 @@ class AgentRuntime: accounts_prompt=self._accounts_prompt, accounts_data=self._accounts_data, tool_provider_map=self._tool_provider_map, + skills_catalog_prompt=self.skills_catalog_prompt, + protocols_prompt=self.protocols_prompt, ) await stream.start() self._streams[ep_id] = stream @@ -393,7 +401,8 @@ class AgentRuntime: tc = spec.trigger_config cron_expr = tc.get("cron") - interval = tc.get("interval_minutes") + _raw_interval = tc.get("interval_minutes") + interval = float(_raw_interval) if _raw_interval is not None else None run_immediately = tc.get("run_immediately", False) if cron_expr: @@ -544,7 +553,7 @@ class AgentRuntime: ep_id, cron_expr, run_immediately, - idle_timeout=tc.get("idle_timeout_seconds", 300), + idle_timeout=float(tc.get("idle_timeout_seconds", 300)), )() ) self._timer_tasks.append(task) @@ -674,7 +683,7 @@ class AgentRuntime: ep_id, interval, run_immediately, - idle_timeout=tc.get("idle_timeout_seconds", 300), + idle_timeout=float(tc.get("idle_timeout_seconds", 300)), )() ) self._timer_tasks.append(task) @@ -921,6 +930,8 @@ class AgentRuntime: accounts_prompt=self._accounts_prompt, accounts_data=self._accounts_data, tool_provider_map=self._tool_provider_map, + skills_catalog_prompt=self.skills_catalog_prompt, + protocols_prompt=self.protocols_prompt, ) if self._running: await stream.start() @@ -999,7 +1010,8 @@ class AgentRuntime: if spec.trigger_type != "timer": continue tc = spec.trigger_config - interval = tc.get("interval_minutes") + _raw_interval = tc.get("interval_minutes") + interval = float(_raw_interval) if _raw_interval is not None else None run_immediately = tc.get("run_immediately", False) if interval and interval > 0 and self._running: @@ -1144,7 +1156,7 @@ class AgentRuntime: ep_id, interval, run_immediately, - idle_timeout=tc.get("idle_timeout_seconds", 300), + idle_timeout=float(tc.get("idle_timeout_seconds", 300)), )() ) timer_tasks.append(task) @@ -1699,6 +1711,8 @@ def create_agent_runtime( accounts_data: list[dict] | None = None, tool_provider_map: dict[str, str] | None = None, event_bus: "EventBus | None" = None, + skills_catalog_prompt: str = "", + protocols_prompt: str = "", ) -> AgentRuntime: """ Create and configure an AgentRuntime with entry points. @@ -1725,6 +1739,8 @@ def create_agent_runtime( accounts_data: Raw account data for per-node prompt generation. tool_provider_map: Tool name to provider name mapping for account routing. event_bus: Optional external EventBus to share with other components. + skills_catalog_prompt: Available skills catalog for system prompt. + protocols_prompt: Default skill operational protocols for system prompt. Returns: Configured AgentRuntime (not yet started) @@ -1751,6 +1767,8 @@ def create_agent_runtime( accounts_data=accounts_data, tool_provider_map=tool_provider_map, event_bus=event_bus, + skills_catalog_prompt=skills_catalog_prompt, + protocols_prompt=protocols_prompt, ) for spec in entry_points: diff --git a/core/framework/runtime/execution_stream.py b/core/framework/runtime/execution_stream.py index f485e3db..2af9188d 100644 --- a/core/framework/runtime/execution_stream.py +++ b/core/framework/runtime/execution_stream.py @@ -186,6 +186,8 @@ class ExecutionStream: accounts_prompt: str = "", accounts_data: list[dict] | None = None, tool_provider_map: dict[str, str] | None = None, + skills_catalog_prompt: str = "", + protocols_prompt: str = "", ): """ Initialize execution stream. @@ -209,6 +211,8 @@ class ExecutionStream: accounts_prompt: Connected accounts block for system prompt injection accounts_data: Raw account data for per-node prompt generation tool_provider_map: Tool name to provider name mapping for account routing + skills_catalog_prompt: Available skills catalog for system prompt + protocols_prompt: Default skill operational protocols for system prompt """ self.stream_id = stream_id self.entry_spec = entry_spec @@ -230,6 +234,8 @@ class ExecutionStream: self._accounts_prompt = accounts_prompt self._accounts_data = accounts_data self._tool_provider_map = tool_provider_map + self._skills_catalog_prompt = skills_catalog_prompt + self._protocols_prompt = protocols_prompt # Create stream-scoped runtime self._runtime = StreamRuntime( @@ -675,6 +681,8 @@ class ExecutionStream: accounts_prompt=self._accounts_prompt, accounts_data=self._accounts_data, tool_provider_map=self._tool_provider_map, + skills_catalog_prompt=self._skills_catalog_prompt, + protocols_prompt=self._protocols_prompt, ) # Track executor so inject_input() can reach EventLoopNode instances self._active_executors[execution_id] = executor diff --git a/core/framework/skills/__init__.py b/core/framework/skills/__init__.py new file mode 100644 index 00000000..4727085e --- /dev/null +++ b/core/framework/skills/__init__.py @@ -0,0 +1,23 @@ +"""Hive Agent Skills — discovery, parsing, and injection of SKILL.md packages. + +Implements the open Agent Skills standard (agentskills.io) for portable +skill discovery and activation, plus built-in default skills for runtime +operational discipline. +""" + +from framework.skills.catalog import SkillCatalog +from framework.skills.config import DefaultSkillConfig, SkillsConfig +from framework.skills.defaults import DefaultSkillManager +from framework.skills.discovery import DiscoveryConfig, SkillDiscovery +from framework.skills.parser import ParsedSkill, parse_skill_md + +__all__ = [ + "DefaultSkillConfig", + "DefaultSkillManager", + "DiscoveryConfig", + "ParsedSkill", + "SkillCatalog", + "SkillDiscovery", + "SkillsConfig", + "parse_skill_md", +] diff --git a/core/framework/skills/_default_skills/batch-ledger/SKILL.md b/core/framework/skills/_default_skills/batch-ledger/SKILL.md new file mode 100644 index 00000000..967088e0 --- /dev/null +++ b/core/framework/skills/_default_skills/batch-ledger/SKILL.md @@ -0,0 +1,24 @@ +--- +name: hive.batch-ledger +description: Track per-item status when processing collections to prevent skipped or duplicated items. +metadata: + author: hive + type: default-skill +--- + +## Operational Protocol: Batch Progress Ledger + +When processing a collection of items, maintain a batch ledger in `_batch_ledger`. + +Initialize when you identify the batch: +- `_batch_total`: total item count +- `_batch_ledger`: JSON with per-item status + +Per-item statuses: pending → in_progress → completed|failed|skipped + +- Set `in_progress` BEFORE processing +- Set final status AFTER processing with 1-line result_summary +- Include error reason for failed/skipped items +- Update aggregate counts after each item +- NEVER remove items from the ledger +- If resuming, skip items already marked completed diff --git a/core/framework/skills/_default_skills/context-preservation/SKILL.md b/core/framework/skills/_default_skills/context-preservation/SKILL.md new file mode 100644 index 00000000..753889fa --- /dev/null +++ b/core/framework/skills/_default_skills/context-preservation/SKILL.md @@ -0,0 +1,22 @@ +--- +name: hive.context-preservation +description: Proactively preserve critical information before automatic context pruning destroys it. +metadata: + author: hive + type: default-skill +--- + +## Operational Protocol: Context Preservation + +You operate under a finite context window. Important information WILL be pruned. + +Save-As-You-Go: After any tool call producing information you'll need later, +immediately extract key data into `_working_notes` or `_preserved_data`. +Do NOT rely on referring back to old tool results. + +What to extract: URLs and key snippets (not full pages), relevant API fields +(not raw JSON), specific lines/values (not entire files), analysis results +(not raw data). + +Before transitioning to the next phase/node, write a handoff summary to +`_handoff_context` with everything the next phase needs to know. diff --git a/core/framework/skills/_default_skills/error-recovery/SKILL.md b/core/framework/skills/_default_skills/error-recovery/SKILL.md new file mode 100644 index 00000000..4cc2458c --- /dev/null +++ b/core/framework/skills/_default_skills/error-recovery/SKILL.md @@ -0,0 +1,18 @@ +--- +name: hive.error-recovery +description: Follow a structured recovery protocol when tool calls fail instead of blindly retrying or giving up. +metadata: + author: hive + type: default-skill +--- + +## Operational Protocol: Error Recovery + +When a tool call fails: + +1. Diagnose — record error in notes, classify as transient or structural +2. Decide — transient: retry once. Structural fixable: fix and retry. + Structural unfixable: record as failed, move to next item. + Blocking all progress: record escalation note. +3. Adapt — if same tool failed 3+ times, stop using it and find alternative. + Update plan in notes. Never silently drop the failed item. diff --git a/core/framework/skills/_default_skills/note-taking/SKILL.md b/core/framework/skills/_default_skills/note-taking/SKILL.md new file mode 100644 index 00000000..d5f7e492 --- /dev/null +++ b/core/framework/skills/_default_skills/note-taking/SKILL.md @@ -0,0 +1,27 @@ +--- +name: hive.note-taking +description: Maintain structured working notes throughout execution to prevent information loss during context pruning. +metadata: + author: hive + type: default-skill +--- + +## Operational Protocol: Structured Note-Taking + +Maintain structured working notes in shared memory key `_working_notes`. +Update at these checkpoints: + +- After completing each discrete subtask or batch item +- After receiving new information that changes your plan +- Before any tool call that will produce substantial output + +Structure: + +### Objective — restate the goal +### Current Plan — numbered steps, mark completed with ✓ +### Key Decisions — decisions made and WHY +### Working Data — intermediate results, extracted values +### Open Questions — uncertainties to verify +### Blockers — anything preventing progress + +Update incrementally — do not rewrite from scratch each time. diff --git a/core/framework/skills/_default_skills/quality-monitor/SKILL.md b/core/framework/skills/_default_skills/quality-monitor/SKILL.md new file mode 100644 index 00000000..8ae2ade6 --- /dev/null +++ b/core/framework/skills/_default_skills/quality-monitor/SKILL.md @@ -0,0 +1,20 @@ +--- +name: hive.quality-monitor +description: Periodically self-assess output quality to catch degradation before the judge does. +metadata: + author: hive + type: default-skill +--- + +## Operational Protocol: Quality Self-Assessment + +Every 5 iterations, self-assess: + +1. On-task? Still working toward the stated objective? +2. Thorough? Cutting corners compared to earlier? +3. Non-repetitive? Producing new value or rehashing? +4. Consistent? Latest output contradict earlier decisions? +5. Complete? Tracking all items, or silently dropped some? + +If degrading: write assessment to `_quality_log`, re-read `_working_notes`, +change approach explicitly. If acceptable: brief note in `_quality_log`. diff --git a/core/framework/skills/_default_skills/task-decomposition/SKILL.md b/core/framework/skills/_default_skills/task-decomposition/SKILL.md new file mode 100644 index 00000000..60bc6ee8 --- /dev/null +++ b/core/framework/skills/_default_skills/task-decomposition/SKILL.md @@ -0,0 +1,17 @@ +--- +name: hive.task-decomposition +description: Decompose complex tasks into explicit subtasks before diving in. +metadata: + author: hive + type: default-skill +--- + +## Operational Protocol: Task Decomposition + +Before starting a complex task: + +1. Decompose — break into numbered subtasks in `_working_notes` Current Plan +2. Estimate — relative effort per subtask (small/medium/large) +3. Execute — work through in order, mark ✓ when complete +4. Budget — if running low on iterations, prioritize by impact +5. Verify — before declaring done, every subtask must be ✓, skipped (with reason), or blocked diff --git a/core/framework/skills/catalog.py b/core/framework/skills/catalog.py new file mode 100644 index 00000000..e0029df3 --- /dev/null +++ b/core/framework/skills/catalog.py @@ -0,0 +1,109 @@ +"""Skill catalog — in-memory index with system prompt generation. + +Builds the XML catalog injected into the system prompt for model-driven +skill activation per the Agent Skills standard. +""" + +from __future__ import annotations + +import logging +from xml.sax.saxutils import escape + +from framework.skills.parser import ParsedSkill + +logger = logging.getLogger(__name__) + +_BEHAVIORAL_INSTRUCTION = ( + "The following skills provide specialized instructions for specific tasks.\n" + "When a task matches a skill's description, read the SKILL.md at the listed\n" + "location to load the full instructions before proceeding.\n" + "When a skill references relative paths, resolve them against the skill's\n" + "directory (the parent of SKILL.md) and use absolute paths in tool calls." +) + + +class SkillCatalog: + """In-memory catalog of discovered skills.""" + + def __init__(self, skills: list[ParsedSkill] | None = None): + self._skills: dict[str, ParsedSkill] = {} + self._activated: set[str] = set() + if skills: + for skill in skills: + self.add(skill) + + def add(self, skill: ParsedSkill) -> None: + """Add a skill to the catalog.""" + self._skills[skill.name] = skill + + def get(self, name: str) -> ParsedSkill | None: + """Look up a skill by name.""" + return self._skills.get(name) + + def mark_activated(self, name: str) -> None: + """Mark a skill as activated in the current session.""" + self._activated.add(name) + + def is_activated(self, name: str) -> bool: + """Check if a skill has been activated.""" + return name in self._activated + + @property + def skill_count(self) -> int: + return len(self._skills) + + @property + def allowlisted_dirs(self) -> list[str]: + """All skill base directories for file access allowlisting.""" + return [skill.base_dir for skill in self._skills.values()] + + def to_prompt(self) -> str: + """Generate the catalog prompt for system prompt injection. + + Returns empty string if no community/user skills are discovered + (default skills are handled separately by DefaultSkillManager). + """ + # Filter out framework-scope skills (default skills) — they're + # injected via the protocols prompt, not the catalog + community_skills = [ + s for s in self._skills.values() if s.source_scope != "framework" + ] + + if not community_skills: + return "" + + lines = [""] + for skill in sorted(community_skills, key=lambda s: s.name): + lines.append(" ") + lines.append(f" {escape(skill.name)}") + lines.append(f" {escape(skill.description)}") + lines.append(f" {escape(skill.location)}") + lines.append(" ") + lines.append("") + + xml_block = "\n".join(lines) + return f"{_BEHAVIORAL_INSTRUCTION}\n\n{xml_block}" + + def build_pre_activated_prompt(self, skill_names: list[str]) -> str: + """Build prompt content for pre-activated skills. + + Pre-activated skills get their full SKILL.md body loaded into + the system prompt at startup (tier 2), bypassing model-driven + activation. + + Returns empty string if no skills match. + """ + parts: list[str] = [] + + for name in skill_names: + skill = self.get(name) + if skill is None: + logger.warning("Pre-activated skill '%s' not found in catalog", name) + continue + if self.is_activated(name): + continue # Already activated, skip duplicate + + self.mark_activated(name) + parts.append(f"--- Pre-Activated Skill: {skill.name} ---\n{skill.body}") + + return "\n\n".join(parts) diff --git a/core/framework/skills/config.py b/core/framework/skills/config.py new file mode 100644 index 00000000..12fece71 --- /dev/null +++ b/core/framework/skills/config.py @@ -0,0 +1,99 @@ +"""Skill configuration dataclasses. + +Handles agent-level skill configuration from module-level variables +(``default_skills`` and ``skills``). +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any + + +@dataclass +class DefaultSkillConfig: + """Configuration for a single default skill.""" + + enabled: bool = True + overrides: dict[str, Any] = field(default_factory=dict) + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> DefaultSkillConfig: + enabled = data.get("enabled", True) + overrides = {k: v for k, v in data.items() if k != "enabled"} + return cls(enabled=enabled, overrides=overrides) + + +@dataclass +class SkillsConfig: + """Agent-level skill configuration. + + Built from module-level variables in agent.py:: + + # Pre-activated community skills + skills = ["deep-research", "code-review"] + + # Default skill configuration + default_skills = { + "hive.note-taking": {"enabled": True}, + "hive.batch-ledger": {"enabled": True, "checkpoint_every_n": 10}, + "hive.quality-monitor": {"enabled": False}, + } + """ + + # Per-default-skill config, keyed by skill name (e.g. "hive.note-taking") + default_skills: dict[str, DefaultSkillConfig] = field(default_factory=dict) + + # Pre-activated community skills (by name) + skills: list[str] = field(default_factory=list) + + # Master switch: disable all default skills at once + all_defaults_disabled: bool = False + + def is_default_enabled(self, skill_name: str) -> bool: + """Check if a specific default skill is enabled.""" + if self.all_defaults_disabled: + return False + config = self.default_skills.get(skill_name) + if config is None: + return True # enabled by default + return config.enabled + + def get_default_overrides(self, skill_name: str) -> dict[str, Any]: + """Get skill-specific configuration overrides.""" + config = self.default_skills.get(skill_name) + if config is None: + return {} + return config.overrides + + @classmethod + def from_agent_vars( + cls, + default_skills: dict[str, Any] | None = None, + skills: list[str] | None = None, + ) -> SkillsConfig: + """Build config from agent module-level variables. + + Args: + default_skills: Dict from agent module (e.g. ``{"hive.note-taking": {"enabled": True}}``) + skills: List of pre-activated skill names from agent module + """ + all_disabled = False + parsed_defaults: dict[str, DefaultSkillConfig] = {} + + if default_skills: + for name, config_dict in default_skills.items(): + if name == "_all": + if isinstance(config_dict, dict) and not config_dict.get("enabled", True): + all_disabled = True + continue + if isinstance(config_dict, dict): + parsed_defaults[name] = DefaultSkillConfig.from_dict(config_dict) + elif isinstance(config_dict, bool): + parsed_defaults[name] = DefaultSkillConfig(enabled=config_dict) + + return cls( + default_skills=parsed_defaults, + skills=list(skills or []), + all_defaults_disabled=all_disabled, + ) diff --git a/core/framework/skills/defaults.py b/core/framework/skills/defaults.py new file mode 100644 index 00000000..a902e4e0 --- /dev/null +++ b/core/framework/skills/defaults.py @@ -0,0 +1,151 @@ +"""DefaultSkillManager — load, configure, and inject built-in default skills. + +Default skills are SKILL.md packages shipped with the framework that provide +runtime operational protocols (note-taking, batch tracking, error recovery, etc.). +""" + +from __future__ import annotations + +import logging +from pathlib import Path + +from framework.skills.config import SkillsConfig +from framework.skills.parser import ParsedSkill, parse_skill_md + +logger = logging.getLogger(__name__) + +# Default skills directory relative to this module +_DEFAULT_SKILLS_DIR = Path(__file__).parent / "_default_skills" + +# Ordered list of default skills (name → directory) +SKILL_REGISTRY: dict[str, str] = { + "hive.note-taking": "note-taking", + "hive.batch-ledger": "batch-ledger", + "hive.context-preservation": "context-preservation", + "hive.quality-monitor": "quality-monitor", + "hive.error-recovery": "error-recovery", + "hive.task-decomposition": "task-decomposition", +} + +# All shared memory keys used by default skills (for permission auto-inclusion) +SHARED_MEMORY_KEYS: list[str] = [ + # note-taking + "_working_notes", + "_notes_updated_at", + # batch-ledger + "_batch_ledger", + "_batch_total", + "_batch_completed", + "_batch_failed", + # context-preservation + "_handoff_context", + "_preserved_data", + # quality-monitor + "_quality_log", + "_quality_degradation_count", + # error-recovery + "_error_log", + "_failed_tools", + "_escalation_needed", + # task-decomposition + "_subtasks", + "_iteration_budget_remaining", +] + + +class DefaultSkillManager: + """Manages loading, configuration, and prompt generation for default skills.""" + + def __init__(self, config: SkillsConfig | None = None): + self._config = config or SkillsConfig() + self._skills: dict[str, ParsedSkill] = {} + self._loaded = False + + def load(self) -> None: + """Load all enabled default skill SKILL.md files.""" + if self._loaded: + return + + for skill_name, dir_name in SKILL_REGISTRY.items(): + if not self._config.is_default_enabled(skill_name): + logger.info("Default skill '%s' disabled by config", skill_name) + continue + + skill_path = _DEFAULT_SKILLS_DIR / dir_name / "SKILL.md" + if not skill_path.is_file(): + logger.error("Default skill SKILL.md not found: %s", skill_path) + continue + + parsed = parse_skill_md(skill_path, source_scope="framework") + if parsed is None: + logger.error("Failed to parse default skill: %s", skill_path) + continue + + self._skills[skill_name] = parsed + + self._loaded = True + + def build_protocols_prompt(self) -> str: + """Build the combined operational protocols section. + + Extracts protocol sections from all enabled default skills and + combines them into a single ``## Operational Protocols`` block + for system prompt injection. + + Returns empty string if all defaults are disabled. + """ + if not self._skills: + return "" + + parts: list[str] = ["## Operational Protocols\n"] + + for skill_name in SKILL_REGISTRY: + skill = self._skills.get(skill_name) + if skill is None: + continue + # Use the full body — each SKILL.md contains exactly one protocol section + parts.append(skill.body) + + if len(parts) <= 1: + return "" + + combined = "\n\n".join(parts) + + # Token budget warning (approximate: 1 token ≈ 4 chars) + approx_tokens = len(combined) // 4 + if approx_tokens > 2000: + logger.warning( + "Default skill protocols exceed 2000 token budget " + "(~%d tokens, %d chars). Consider trimming.", + approx_tokens, + len(combined), + ) + + return combined + + def log_active_skills(self) -> None: + """Log which default skills are active and their configuration.""" + if not self._skills: + logger.info("Default skills: all disabled") + return + + active = [] + for skill_name in SKILL_REGISTRY: + if skill_name in self._skills: + overrides = self._config.get_default_overrides(skill_name) + if overrides: + active.append(f"{skill_name} ({overrides})") + else: + active.append(skill_name) + + logger.info("Default skills active: %s", ", ".join(active)) + + @property + def active_skill_names(self) -> list[str]: + """Names of all currently active default skills.""" + return list(self._skills.keys()) + + @property + def active_skills(self) -> dict[str, ParsedSkill]: + """All active default skills keyed by name.""" + return dict(self._skills) diff --git a/core/framework/skills/discovery.py b/core/framework/skills/discovery.py new file mode 100644 index 00000000..c2a58a54 --- /dev/null +++ b/core/framework/skills/discovery.py @@ -0,0 +1,182 @@ +"""Skill discovery — scan standard directories for SKILL.md files. + +Implements the Agent Skills standard discovery paths plus Hive-specific +locations. Resolves name collisions deterministically. +""" + +from __future__ import annotations + +import logging +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + +from framework.skills.parser import ParsedSkill, parse_skill_md + +logger = logging.getLogger(__name__) + +# Directories to skip during scanning +_SKIP_DIRS = frozenset({ + ".git", + "node_modules", + "__pycache__", + ".venv", + "venv", + ".mypy_cache", + ".pytest_cache", + ".ruff_cache", +}) + +# Scope priority (higher = takes precedence) +_SCOPE_PRIORITY = { + "framework": 0, + "user": 1, + "project": 2, +} + +# Within the same scope, Hive-specific paths override cross-client paths. +# We encode this by scanning cross-client first, then Hive-specific (later wins). + + +@dataclass +class DiscoveryConfig: + """Configuration for skill discovery.""" + + project_root: Path | None = None + skip_user_scope: bool = False + skip_framework_scope: bool = False + max_depth: int = 4 + max_dirs: int = 2000 + + +class SkillDiscovery: + """Scans standard directories for SKILL.md files and resolves collisions.""" + + def __init__(self, config: DiscoveryConfig | None = None): + self._config = config or DiscoveryConfig() + + def discover(self) -> list[ParsedSkill]: + """Scan all scopes and return deduplicated skill list. + + Scanning order (lowest to highest precedence): + 1. Framework defaults + 2. User cross-client (~/.agents/skills/) + 3. User Hive-specific (~/.hive/skills/) + 4. Project cross-client (/.agents/skills/) + 5. Project Hive-specific (/.hive/skills/) + + Later entries override earlier ones on name collision. + """ + all_skills: list[ParsedSkill] = [] + + # Framework scope (lowest precedence) + if not self._config.skip_framework_scope: + framework_dir = Path(__file__).parent / "_default_skills" + if framework_dir.is_dir(): + all_skills.extend(self._scan_scope(framework_dir, "framework")) + + # User scope + if not self._config.skip_user_scope: + home = Path.home() + + # Cross-client (lower precedence within user scope) + user_agents = home / ".agents" / "skills" + if user_agents.is_dir(): + all_skills.extend(self._scan_scope(user_agents, "user")) + + # Hive-specific (higher precedence within user scope) + user_hive = home / ".hive" / "skills" + if user_hive.is_dir(): + all_skills.extend(self._scan_scope(user_hive, "user")) + + # Project scope (highest precedence) + if self._config.project_root: + root = self._config.project_root + + # Cross-client + project_agents = root / ".agents" / "skills" + if project_agents.is_dir(): + all_skills.extend(self._scan_scope(project_agents, "project")) + + # Hive-specific + project_hive = root / ".hive" / "skills" + if project_hive.is_dir(): + all_skills.extend(self._scan_scope(project_hive, "project")) + + resolved = self._resolve_collisions(all_skills) + + logger.info( + "Skill discovery: found %d skills (%d after dedup) across all scopes", + len(all_skills), + len(resolved), + ) + return resolved + + def _scan_scope(self, root: Path, scope: str) -> list[ParsedSkill]: + """Scan a single directory for skill directories containing SKILL.md.""" + skills: list[ParsedSkill] = [] + dirs_scanned = 0 + + for skill_md in self._find_skill_files(root, depth=0): + if dirs_scanned >= self._config.max_dirs: + logger.warning( + "Hit max directory limit (%d) scanning %s", + self._config.max_dirs, + root, + ) + break + + parsed = parse_skill_md(skill_md, source_scope=scope) + if parsed is not None: + skills.append(parsed) + dirs_scanned += 1 + + return skills + + def _find_skill_files(self, directory: Path, depth: int) -> list[Path]: + """Recursively find SKILL.md files up to max_depth.""" + if depth > self._config.max_depth: + return [] + + results: list[Path] = [] + + try: + entries = sorted(directory.iterdir()) + except OSError: + return [] + + for entry in entries: + if not entry.is_dir(): + continue + if entry.name in _SKIP_DIRS: + continue + + skill_md = entry / "SKILL.md" + if skill_md.is_file(): + results.append(skill_md) + else: + # Recurse into subdirectories + results.extend(self._find_skill_files(entry, depth + 1)) + + return results + + def _resolve_collisions(self, skills: list[ParsedSkill]) -> list[ParsedSkill]: + """Resolve name collisions deterministically. + + Later entries in the list override earlier ones (because we scan + from lowest to highest precedence). On collision, log a warning. + """ + seen: dict[str, ParsedSkill] = {} + + for skill in skills: + if skill.name in seen: + existing = seen[skill.name] + logger.warning( + "Skill name collision: '%s' from %s overrides %s", + skill.name, + skill.location, + existing.location, + ) + seen[skill.name] = skill + + return list(seen.values()) diff --git a/core/framework/skills/parser.py b/core/framework/skills/parser.py new file mode 100644 index 00000000..a4e00aac --- /dev/null +++ b/core/framework/skills/parser.py @@ -0,0 +1,160 @@ +"""SKILL.md parser — extracts YAML frontmatter and markdown body. + +Parses SKILL.md files per the Agent Skills standard (agentskills.io/specification). +Lenient validation: warns on non-critical issues, skips only on missing description +or completely unparseable YAML. +""" + +from __future__ import annotations + +import logging +import re +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + +logger = logging.getLogger(__name__) + +# Maximum name length before a warning is logged +_MAX_NAME_LENGTH = 64 + + +@dataclass +class ParsedSkill: + """In-memory representation of a parsed SKILL.md file.""" + + name: str + description: str + location: str # absolute path to SKILL.md + base_dir: str # parent directory of SKILL.md + source_scope: str # "project", "user", or "framework" + body: str # markdown body after closing --- + + # Optional frontmatter fields + license: str | None = None + compatibility: list[str] | None = None + metadata: dict[str, Any] | None = None + allowed_tools: list[str] | None = None + + +def _try_fix_yaml(raw: str) -> str: + """Attempt to fix common YAML issues (unquoted colon values). + + Some SKILL.md files written for other clients may contain unquoted + values with colons, e.g. ``description: Use for: research tasks``. + This wraps such values in quotes as a best-effort fixup. + """ + lines = raw.split("\n") + fixed = [] + for line in lines: + # Match "key: value" where value contains an unquoted colon + m = re.match(r"^(\s*\w[\w-]*:\s*)(.+)$", line) + if m: + key_part, value_part = m.group(1), m.group(2) + # If value contains a colon and isn't already quoted + if ":" in value_part and not ( + value_part.startswith('"') or value_part.startswith("'") + ): + value_part = f'"{value_part}"' + fixed.append(f"{key_part}{value_part}") + else: + fixed.append(line) + return "\n".join(fixed) + + +def parse_skill_md(path: Path, source_scope: str = "project") -> ParsedSkill | None: + """Parse a SKILL.md file into a ParsedSkill record. + + Args: + path: Absolute path to the SKILL.md file. + source_scope: One of "project", "user", or "framework". + + Returns: + ParsedSkill on success, None if the file is unparseable or + missing required fields (description). + """ + try: + content = path.read_text(encoding="utf-8") + except OSError as exc: + logger.error("Failed to read %s: %s", path, exc) + return None + + if not content.strip(): + logger.error("Empty SKILL.md: %s", path) + return None + + # Split on --- delimiters (first two occurrences) + parts = content.split("---", 2) + if len(parts) < 3: + logger.error("SKILL.md missing YAML frontmatter delimiters (---): %s", path) + return None + + # parts[0] is content before first --- (should be empty or whitespace) + # parts[1] is the YAML frontmatter + # parts[2] is the markdown body + raw_yaml = parts[1].strip() + body = parts[2].strip() + + if not raw_yaml: + logger.error("Empty YAML frontmatter in %s", path) + return None + + # Parse YAML + import yaml + + frontmatter: dict[str, Any] | None = None + try: + frontmatter = yaml.safe_load(raw_yaml) + except yaml.YAMLError: + # Fallback: try fixing unquoted colon values + try: + fixed = _try_fix_yaml(raw_yaml) + frontmatter = yaml.safe_load(fixed) + logger.warning("Fixed YAML parse issues in %s (unquoted colons)", path) + except yaml.YAMLError as exc: + logger.error("Unparseable YAML in %s: %s", path, exc) + return None + + if not isinstance(frontmatter, dict): + logger.error("YAML frontmatter is not a mapping in %s", path) + return None + + # Required: description + description = frontmatter.get("description") + if not description or not str(description).strip(): + logger.error("Missing or empty 'description' in %s — skipping skill", path) + return None + + # Required: name (fallback to parent directory name) + name = frontmatter.get("name") + parent_dir_name = path.parent.name + if not name or not str(name).strip(): + name = parent_dir_name + logger.warning("Missing 'name' in %s — using directory name '%s'", path, name) + else: + name = str(name).strip() + + # Lenient warnings + if len(name) > _MAX_NAME_LENGTH: + logger.warning("Skill name exceeds %d chars in %s: '%s'", _MAX_NAME_LENGTH, path, name) + + if name != parent_dir_name and not name.endswith(f".{parent_dir_name}"): + logger.warning( + "Skill name '%s' doesn't match parent directory '%s' in %s", + name, + parent_dir_name, + path, + ) + + return ParsedSkill( + name=name, + description=str(description).strip(), + location=str(path.resolve()), + base_dir=str(path.parent.resolve()), + source_scope=source_scope, + body=body, + license=frontmatter.get("license"), + compatibility=frontmatter.get("compatibility"), + metadata=frontmatter.get("metadata"), + allowed_tools=frontmatter.get("allowed-tools"), + ) diff --git a/core/framework/tools/queen_lifecycle_tools.py b/core/framework/tools/queen_lifecycle_tools.py index 147d900c..8d613bb0 100644 --- a/core/framework/tools/queen_lifecycle_tools.py +++ b/core/framework/tools/queen_lifecycle_tools.py @@ -36,6 +36,7 @@ from __future__ import annotations import asyncio import json import logging +import time from dataclasses import dataclass, field from datetime import UTC, datetime from pathlib import Path @@ -451,10 +452,11 @@ async def _start_trigger_timer(session: Any, trigger_id: str, tdef: Any) -> None else: await asyncio.sleep(float(interval_minutes) * 60) - # Record next fire time for introspection + # Record next fire time for introspection (monotonic, matches routes) fire_times = getattr(session, "trigger_next_fire", None) if fire_times is not None: - fire_times[trigger_id] = datetime.now(tz=UTC).isoformat() + _next_delay = float(interval_minutes) * 60 if interval_minutes else 60 + fire_times[trigger_id] = time.monotonic() + _next_delay # Gate on worker being loaded if getattr(session, "worker_runtime", None) is None: diff --git a/core/tests/test_default_skills.py b/core/tests/test_default_skills.py new file mode 100644 index 00000000..94830e9e --- /dev/null +++ b/core/tests/test_default_skills.py @@ -0,0 +1,190 @@ +"""Tests for default skills — parsing, token budget, and configuration.""" + +import pytest + +from framework.skills.config import DefaultSkillConfig, SkillsConfig +from framework.skills.defaults import ( + SKILL_REGISTRY, + SHARED_MEMORY_KEYS, + DefaultSkillManager, +) +from framework.skills.parser import parse_skill_md +from pathlib import Path + + +_DEFAULT_SKILLS_DIR = Path(__file__).resolve().parent.parent / "framework" / "skills" / "_default_skills" + + +class TestDefaultSkillFiles: + """Verify all 6 built-in SKILL.md files parse correctly.""" + + def test_all_six_skills_exist(self): + assert len(SKILL_REGISTRY) == 6 + + @pytest.mark.parametrize("skill_name,dir_name", list(SKILL_REGISTRY.items())) + def test_skill_parses(self, skill_name, dir_name): + path = _DEFAULT_SKILLS_DIR / dir_name / "SKILL.md" + assert path.is_file(), f"Missing SKILL.md at {path}" + + parsed = parse_skill_md(path, source_scope="framework") + assert parsed is not None, f"Failed to parse {path}" + assert parsed.name == skill_name + assert parsed.description + assert parsed.body + assert parsed.source_scope == "framework" + + def test_combined_token_budget(self): + """All default skill bodies combined should be under 2000 tokens (~8000 chars).""" + total_chars = 0 + for dir_name in SKILL_REGISTRY.values(): + path = _DEFAULT_SKILLS_DIR / dir_name / "SKILL.md" + parsed = parse_skill_md(path, source_scope="framework") + assert parsed is not None + total_chars += len(parsed.body) + + approx_tokens = total_chars // 4 + assert approx_tokens < 2000, ( + f"Combined default skill bodies are ~{approx_tokens} tokens " + f"({total_chars} chars), exceeding the 2000 token budget" + ) + + def test_shared_memory_keys_all_prefixed(self): + """All shared memory keys must start with underscore.""" + for key in SHARED_MEMORY_KEYS: + assert key.startswith("_"), f"Shared memory key missing _ prefix: {key}" + + +class TestDefaultSkillManager: + def test_load_all_defaults(self): + manager = DefaultSkillManager() + manager.load() + + assert len(manager.active_skill_names) == 6 + for name in SKILL_REGISTRY: + assert name in manager.active_skill_names + + def test_load_idempotent(self): + manager = DefaultSkillManager() + manager.load() + first_skills = dict(manager.active_skills) + manager.load() + assert manager.active_skills == first_skills + + def test_build_protocols_prompt(self): + manager = DefaultSkillManager() + manager.load() + prompt = manager.build_protocols_prompt() + + assert prompt.startswith("## Operational Protocols") + # Should contain content from each active skill + for name in SKILL_REGISTRY: + skill = manager.active_skills[name] + # At least some of the body should appear + assert skill.body[:20] in prompt + + def test_protocols_prompt_empty_when_all_disabled(self): + config = SkillsConfig(all_defaults_disabled=True) + manager = DefaultSkillManager(config) + manager.load() + + assert manager.build_protocols_prompt() == "" + assert manager.active_skill_names == [] + + def test_disable_single_skill(self): + config = SkillsConfig.from_agent_vars( + default_skills={"hive.quality-monitor": {"enabled": False}} + ) + manager = DefaultSkillManager(config) + manager.load() + + assert "hive.quality-monitor" not in manager.active_skill_names + assert len(manager.active_skill_names) == 5 + + def test_disable_all_via_convention(self): + config = SkillsConfig.from_agent_vars( + default_skills={"_all": {"enabled": False}} + ) + manager = DefaultSkillManager(config) + manager.load() + + assert manager.active_skill_names == [] + + def test_log_active_skills(self, caplog): + import logging + with caplog.at_level(logging.INFO, logger="framework.skills.defaults"): + manager = DefaultSkillManager() + manager.load() + manager.log_active_skills() + + assert "Default skills active:" in caplog.text + + def test_log_all_disabled(self, caplog): + import logging + config = SkillsConfig(all_defaults_disabled=True) + with caplog.at_level(logging.INFO, logger="framework.skills.defaults"): + manager = DefaultSkillManager(config) + manager.load() + manager.log_active_skills() + + assert "all disabled" in caplog.text + + +class TestSkillsConfig: + def test_default_is_enabled(self): + config = SkillsConfig() + assert config.is_default_enabled("hive.note-taking") is True + + def test_explicit_disable(self): + config = SkillsConfig( + default_skills={"hive.note-taking": DefaultSkillConfig(enabled=False)} + ) + assert config.is_default_enabled("hive.note-taking") is False + assert config.is_default_enabled("hive.batch-ledger") is True + + def test_all_disabled_flag(self): + config = SkillsConfig(all_defaults_disabled=True) + assert config.is_default_enabled("hive.note-taking") is False + assert config.is_default_enabled("anything") is False + + def test_from_agent_vars_basic(self): + config = SkillsConfig.from_agent_vars( + default_skills={ + "hive.note-taking": {"enabled": True}, + "hive.quality-monitor": {"enabled": False}, + }, + skills=["deep-research"], + ) + assert config.is_default_enabled("hive.note-taking") is True + assert config.is_default_enabled("hive.quality-monitor") is False + assert config.skills == ["deep-research"] + + def test_from_agent_vars_bool_shorthand(self): + config = SkillsConfig.from_agent_vars( + default_skills={"hive.note-taking": False} + ) + assert config.is_default_enabled("hive.note-taking") is False + + def test_from_agent_vars_all_disabled(self): + config = SkillsConfig.from_agent_vars( + default_skills={"_all": {"enabled": False}} + ) + assert config.all_defaults_disabled is True + + def test_get_default_overrides(self): + config = SkillsConfig.from_agent_vars( + default_skills={ + "hive.batch-ledger": {"enabled": True, "checkpoint_every_n": 10}, + } + ) + overrides = config.get_default_overrides("hive.batch-ledger") + assert overrides == {"checkpoint_every_n": 10} + + def test_get_default_overrides_empty(self): + config = SkillsConfig() + assert config.get_default_overrides("hive.note-taking") == {} + + def test_from_agent_vars_none_inputs(self): + config = SkillsConfig.from_agent_vars(default_skills=None, skills=None) + assert config.skills == [] + assert config.default_skills == {} + assert config.all_defaults_disabled is False diff --git a/core/tests/test_skill_catalog.py b/core/tests/test_skill_catalog.py new file mode 100644 index 00000000..e78d0c82 --- /dev/null +++ b/core/tests/test_skill_catalog.py @@ -0,0 +1,172 @@ +"""Tests for the skill catalog and prompt generation.""" + +import pytest + +from framework.skills.catalog import SkillCatalog +from framework.skills.parser import ParsedSkill + + +def _make_skill( + name: str = "my-skill", + description: str = "A test skill.", + source_scope: str = "project", + body: str = "Instructions here.", + location: str = "/tmp/skills/my-skill/SKILL.md", + base_dir: str = "/tmp/skills/my-skill", +) -> ParsedSkill: + return ParsedSkill( + name=name, + description=description, + location=location, + base_dir=base_dir, + source_scope=source_scope, + body=body, + ) + + +class TestSkillCatalog: + def test_add_and_get(self): + catalog = SkillCatalog() + skill = _make_skill() + catalog.add(skill) + + assert catalog.get("my-skill") is skill + assert catalog.get("nonexistent") is None + assert catalog.skill_count == 1 + + def test_init_with_skills_list(self): + skills = [_make_skill("a", "Skill A"), _make_skill("b", "Skill B")] + catalog = SkillCatalog(skills) + + assert catalog.skill_count == 2 + assert catalog.get("a") is not None + assert catalog.get("b") is not None + + def test_activation_tracking(self): + catalog = SkillCatalog([_make_skill()]) + assert not catalog.is_activated("my-skill") + + catalog.mark_activated("my-skill") + assert catalog.is_activated("my-skill") + + def test_allowlisted_dirs(self): + skills = [ + _make_skill("a", base_dir="/skills/a"), + _make_skill("b", base_dir="/skills/b"), + ] + catalog = SkillCatalog(skills) + dirs = catalog.allowlisted_dirs + + assert "/skills/a" in dirs + assert "/skills/b" in dirs + + def test_to_prompt_empty_catalog(self): + catalog = SkillCatalog() + assert catalog.to_prompt() == "" + + def test_to_prompt_framework_only(self): + """Framework-scope skills should NOT appear in the catalog prompt.""" + catalog = SkillCatalog([_make_skill(source_scope="framework")]) + assert catalog.to_prompt() == "" + + def test_to_prompt_xml_generation(self): + skills = [ + _make_skill("alpha", "Alpha skill", "project", location="/p/alpha/SKILL.md"), + _make_skill("beta", "Beta skill", "user", location="/u/beta/SKILL.md"), + ] + catalog = SkillCatalog(skills) + prompt = catalog.to_prompt() + + assert "" in prompt + assert "" in prompt + assert "alpha" in prompt + assert "beta" in prompt + assert "Alpha skill" in prompt + assert "/p/alpha/SKILL.md" in prompt + + def test_to_prompt_sorted_by_name(self): + skills = [ + _make_skill("zebra", "Z skill", "project"), + _make_skill("alpha", "A skill", "project"), + ] + catalog = SkillCatalog(skills) + prompt = catalog.to_prompt() + + alpha_pos = prompt.index("alpha") + zebra_pos = prompt.index("zebra") + assert alpha_pos < zebra_pos + + def test_to_prompt_xml_escaping(self): + skill = _make_skill("test", 'Has & "chars"', "project") + catalog = SkillCatalog([skill]) + prompt = catalog.to_prompt() + + assert "<special>" in prompt + assert "&" in prompt + + def test_to_prompt_excludes_framework_includes_others(self): + """Mixed scopes: only framework skills are excluded from catalog.""" + skills = [ + _make_skill("proj", "Project skill", "project"), + _make_skill("usr", "User skill", "user"), + _make_skill("fw", "Framework skill", "framework"), + ] + catalog = SkillCatalog(skills) + prompt = catalog.to_prompt() + + assert "proj" in prompt + assert "usr" in prompt + assert "fw" not in prompt + + def test_to_prompt_contains_behavioral_instruction(self): + catalog = SkillCatalog([_make_skill(source_scope="project")]) + prompt = catalog.to_prompt() + + assert "When a task matches a skill's description" in prompt + assert "SKILL.md" in prompt + + def test_build_pre_activated_prompt(self): + skill = _make_skill("research", body="## Deep Research\nDo thorough research.") + catalog = SkillCatalog([skill]) + prompt = catalog.build_pre_activated_prompt(["research"]) + + assert "Pre-Activated Skill: research" in prompt + assert "## Deep Research" in prompt + assert catalog.is_activated("research") + + def test_build_pre_activated_skips_already_activated(self): + skill = _make_skill("research", body="Research body") + catalog = SkillCatalog([skill]) + catalog.mark_activated("research") + + prompt = catalog.build_pre_activated_prompt(["research"]) + assert prompt == "" + + def test_build_pre_activated_missing_skill(self): + catalog = SkillCatalog() + prompt = catalog.build_pre_activated_prompt(["nonexistent"]) + assert prompt == "" + + def test_build_pre_activated_multiple(self): + skills = [ + _make_skill("a", body="Body A"), + _make_skill("b", body="Body B"), + ] + catalog = SkillCatalog(skills) + prompt = catalog.build_pre_activated_prompt(["a", "b"]) + + assert "Pre-Activated Skill: a" in prompt + assert "Body A" in prompt + assert "Pre-Activated Skill: b" in prompt + assert "Body B" in prompt + assert catalog.is_activated("a") + assert catalog.is_activated("b") + + def test_duplicate_add_overwrites(self): + """Adding a skill with the same name replaces the previous one.""" + catalog = SkillCatalog() + catalog.add(_make_skill("x", "First")) + catalog.add(_make_skill("x", "Second")) + + assert catalog.skill_count == 1 + assert catalog.get("x").description == "Second" diff --git a/core/tests/test_skill_discovery.py b/core/tests/test_skill_discovery.py new file mode 100644 index 00000000..1b2362b9 --- /dev/null +++ b/core/tests/test_skill_discovery.py @@ -0,0 +1,145 @@ +"""Tests for skill discovery.""" + +import pytest +from pathlib import Path + +from framework.skills.discovery import SkillDiscovery, DiscoveryConfig + + +def _write_skill(base: Path, name: str, description: str = "A test skill.") -> Path: + """Create a minimal skill directory with SKILL.md.""" + skill_dir = base / name + skill_dir.mkdir(parents=True, exist_ok=True) + (skill_dir / "SKILL.md").write_text( + f"---\nname: {name}\ndescription: {description}\n---\n\nInstructions.\n", + encoding="utf-8", + ) + return skill_dir + + +class TestSkillDiscovery: + def test_discover_project_skills(self, tmp_path): + # Create project-level skills + agents_skills = tmp_path / ".agents" / "skills" + _write_skill(agents_skills, "skill-a") + _write_skill(agents_skills, "skill-b") + + discovery = SkillDiscovery(DiscoveryConfig( + project_root=tmp_path, + skip_user_scope=True, + skip_framework_scope=True, + )) + skills = discovery.discover() + + names = {s.name for s in skills} + assert "skill-a" in names + assert "skill-b" in names + assert all(s.source_scope == "project" for s in skills) + + def test_hive_skills_path(self, tmp_path): + hive_skills = tmp_path / ".hive" / "skills" + _write_skill(hive_skills, "hive-skill") + + discovery = SkillDiscovery(DiscoveryConfig( + project_root=tmp_path, + skip_user_scope=True, + skip_framework_scope=True, + )) + skills = discovery.discover() + + assert len(skills) == 1 + assert skills[0].name == "hive-skill" + + def test_collision_project_overrides_user(self, tmp_path, monkeypatch): + # User-level skill + user_skills = tmp_path / "home" / ".agents" / "skills" + _write_skill(user_skills, "shared-skill", "User version") + + # Project-level skill with same name + project_skills = tmp_path / "project" / ".agents" / "skills" + _write_skill(project_skills, "shared-skill", "Project version") + + monkeypatch.setattr(Path, "home", lambda: tmp_path / "home") + + discovery = SkillDiscovery(DiscoveryConfig( + project_root=tmp_path / "project", + skip_framework_scope=True, + )) + skills = discovery.discover() + + matching = [s for s in skills if s.name == "shared-skill"] + assert len(matching) == 1 + assert matching[0].description == "Project version" + + def test_collision_hive_overrides_agents(self, tmp_path): + # Cross-client path + agents_skills = tmp_path / ".agents" / "skills" + _write_skill(agents_skills, "override-test", "Agents version") + + # Hive-specific path (higher precedence) + hive_skills = tmp_path / ".hive" / "skills" + _write_skill(hive_skills, "override-test", "Hive version") + + discovery = SkillDiscovery(DiscoveryConfig( + project_root=tmp_path, + skip_user_scope=True, + skip_framework_scope=True, + )) + skills = discovery.discover() + + matching = [s for s in skills if s.name == "override-test"] + assert len(matching) == 1 + assert matching[0].description == "Hive version" + + def test_skips_git_and_node_modules(self, tmp_path): + skills_dir = tmp_path / ".agents" / "skills" + _write_skill(skills_dir / ".git", "git-skill") + _write_skill(skills_dir / "node_modules", "npm-skill") + _write_skill(skills_dir, "real-skill") + + discovery = SkillDiscovery(DiscoveryConfig( + project_root=tmp_path, + skip_user_scope=True, + skip_framework_scope=True, + )) + skills = discovery.discover() + + names = {s.name for s in skills} + assert "real-skill" in names + assert "git-skill" not in names + assert "npm-skill" not in names + + def test_empty_scan(self, tmp_path): + discovery = SkillDiscovery(DiscoveryConfig( + project_root=tmp_path, + skip_user_scope=True, + skip_framework_scope=True, + )) + skills = discovery.discover() + assert skills == [] + + def test_framework_scope_loads_defaults(self): + """Framework scope should find the built-in default skills.""" + discovery = SkillDiscovery(DiscoveryConfig( + skip_user_scope=True, + )) + skills = discovery.discover() + + framework_skills = [s for s in skills if s.source_scope == "framework"] + names = {s.name for s in framework_skills} + assert "hive.note-taking" in names + assert "hive.batch-ledger" in names + + def test_max_depth_limit(self, tmp_path): + # Create a skill nested beyond max_depth + deep = tmp_path / ".agents" / "skills" / "a" / "b" / "c" / "d" / "e" + _write_skill(deep, "too-deep") + + discovery = SkillDiscovery(DiscoveryConfig( + project_root=tmp_path, + skip_user_scope=True, + skip_framework_scope=True, + max_depth=2, + )) + skills = discovery.discover() + assert not any(s.name == "too-deep" for s in skills) diff --git a/core/tests/test_skill_integration.py b/core/tests/test_skill_integration.py new file mode 100644 index 00000000..14bad861 --- /dev/null +++ b/core/tests/test_skill_integration.py @@ -0,0 +1,218 @@ +"""Integration tests for the skill system — prompt composition and backward compatibility.""" + +import pytest + +from framework.graph.prompt_composer import compose_system_prompt +from framework.skills.catalog import SkillCatalog +from framework.skills.config import SkillsConfig +from framework.skills.defaults import DefaultSkillManager +from framework.skills.discovery import DiscoveryConfig, SkillDiscovery +from framework.skills.parser import ParsedSkill + + +def _make_skill( + name: str = "test-skill", + description: str = "A test skill.", + source_scope: str = "project", + body: str = "Skill instructions.", + location: str = "/tmp/skills/test-skill/SKILL.md", + base_dir: str = "/tmp/skills/test-skill", +) -> ParsedSkill: + return ParsedSkill( + name=name, + description=description, + location=location, + base_dir=base_dir, + source_scope=source_scope, + body=body, + ) + + +class TestPromptComposition: + """Test that skill prompts integrate correctly with compose_system_prompt.""" + + def test_backward_compat_no_skill_params(self): + """compose_system_prompt works without skill params (backward compat).""" + prompt = compose_system_prompt( + identity_prompt="You are a helpful agent.", + focus_prompt="Focus on the task.", + ) + assert "You are a helpful agent." in prompt + assert "Focus on the task." in prompt + assert "Current date and time" in prompt + + def test_skills_catalog_in_prompt(self): + catalog = SkillCatalog([_make_skill(source_scope="project")]) + catalog_prompt = catalog.to_prompt() + + prompt = compose_system_prompt( + identity_prompt="You are an agent.", + focus_prompt=None, + skills_catalog_prompt=catalog_prompt, + ) + assert "" in prompt + assert "test-skill" in prompt + + def test_protocols_in_prompt(self): + manager = DefaultSkillManager() + manager.load() + protocols_prompt = manager.build_protocols_prompt() + + prompt = compose_system_prompt( + identity_prompt="You are an agent.", + focus_prompt=None, + protocols_prompt=protocols_prompt, + ) + assert "## Operational Protocols" in prompt + + def test_full_prompt_ordering(self): + """Verify the three-layer onion ordering with all sections present.""" + catalog = SkillCatalog([_make_skill(source_scope="project")]) + + prompt = compose_system_prompt( + identity_prompt="IDENTITY_SECTION", + focus_prompt="FOCUS_SECTION", + narrative="NARRATIVE_SECTION", + accounts_prompt="ACCOUNTS_SECTION", + skills_catalog_prompt=catalog.to_prompt(), + protocols_prompt="PROTOCOLS_SECTION", + ) + + identity_pos = prompt.index("IDENTITY_SECTION") + accounts_pos = prompt.index("ACCOUNTS_SECTION") + skills_pos = prompt.index("available_skills") + protocols_pos = prompt.index("PROTOCOLS_SECTION") + narrative_pos = prompt.index("NARRATIVE_SECTION") + focus_pos = prompt.index("FOCUS_SECTION") + + # Identity → Accounts → Skills → Protocols → Narrative → Focus + assert identity_pos < accounts_pos + assert accounts_pos < skills_pos + assert skills_pos < protocols_pos + assert protocols_pos < narrative_pos + assert narrative_pos < focus_pos + + def test_none_skill_prompts_excluded(self): + """None values for skill prompts should not add content.""" + prompt = compose_system_prompt( + identity_prompt="Hello", + focus_prompt=None, + skills_catalog_prompt=None, + protocols_prompt=None, + ) + assert "available_skills" not in prompt + assert "Operational Protocols" not in prompt + + def test_empty_skill_prompts_excluded(self): + """Empty string skill prompts should not add content.""" + prompt = compose_system_prompt( + identity_prompt="Hello", + focus_prompt=None, + skills_catalog_prompt="", + protocols_prompt="", + ) + assert "available_skills" not in prompt + assert "Operational Protocols" not in prompt + + +class TestEndToEndPipeline: + """Test the full discovery → catalog → prompt pipeline.""" + + def test_discovery_to_catalog_to_prompt(self, tmp_path): + # Create a project skill + skill_dir = tmp_path / ".agents" / "skills" / "my-tool" + skill_dir.mkdir(parents=True) + (skill_dir / "SKILL.md").write_text( + "---\nname: my-tool\ndescription: Tool for testing.\n---\n\n" + "## Usage\nUse this tool when testing.\n", + encoding="utf-8", + ) + + # Discovery + discovery = SkillDiscovery(DiscoveryConfig( + project_root=tmp_path, + skip_user_scope=True, + skip_framework_scope=True, + )) + skills = discovery.discover() + assert len(skills) == 1 + + # Catalog + catalog = SkillCatalog(skills) + assert catalog.skill_count == 1 + + # Prompt generation + prompt = catalog.to_prompt() + assert "my-tool" in prompt + assert "Tool for testing." in prompt + + # Pre-activation + activated = catalog.build_pre_activated_prompt(["my-tool"]) + assert "## Usage" in activated + assert catalog.is_activated("my-tool") + + def test_defaults_plus_community_skills(self, tmp_path): + """Default skills and community skills produce separate prompt sections.""" + # Create a community skill + skill_dir = tmp_path / ".agents" / "skills" / "community-skill" + skill_dir.mkdir(parents=True) + (skill_dir / "SKILL.md").write_text( + "---\nname: community-skill\ndescription: A community skill.\n---\n\nDo stuff.\n", + encoding="utf-8", + ) + + # Discover community skills + discovery = SkillDiscovery(DiscoveryConfig( + project_root=tmp_path, + skip_user_scope=True, + skip_framework_scope=True, + )) + community_skills = discovery.discover() + catalog = SkillCatalog(community_skills) + catalog_prompt = catalog.to_prompt() + + # Load default skills + manager = DefaultSkillManager() + manager.load() + protocols_prompt = manager.build_protocols_prompt() + + # Compose + prompt = compose_system_prompt( + identity_prompt="Agent identity.", + focus_prompt=None, + skills_catalog_prompt=catalog_prompt, + protocols_prompt=protocols_prompt, + ) + + # Both sections present + assert "" in prompt + assert "community-skill" in prompt + assert "## Operational Protocols" in prompt + + def test_config_disables_defaults_keeps_community(self, tmp_path): + """Disabling all defaults should still allow community skills.""" + skill_dir = tmp_path / ".agents" / "skills" / "still-here" + skill_dir.mkdir(parents=True) + (skill_dir / "SKILL.md").write_text( + "---\nname: still-here\ndescription: Survives config.\n---\n\nBody.\n", + encoding="utf-8", + ) + + # Community skills + discovery = SkillDiscovery(DiscoveryConfig( + project_root=tmp_path, + skip_user_scope=True, + skip_framework_scope=True, + )) + catalog = SkillCatalog(discovery.discover()) + + # Disabled defaults + config = SkillsConfig(all_defaults_disabled=True) + manager = DefaultSkillManager(config) + manager.load() + + catalog_prompt = catalog.to_prompt() + protocols_prompt = manager.build_protocols_prompt() + + assert "still-here" in catalog_prompt + assert protocols_prompt == "" diff --git a/core/tests/test_skill_parser.py b/core/tests/test_skill_parser.py new file mode 100644 index 00000000..b39570a7 --- /dev/null +++ b/core/tests/test_skill_parser.py @@ -0,0 +1,180 @@ +"""Tests for SKILL.md parser.""" + +import pytest +from pathlib import Path + +from framework.skills.parser import parse_skill_md, ParsedSkill + + +@pytest.fixture +def tmp_skill(tmp_path): + """Helper to create a SKILL.md file and return its path.""" + def _create(content: str, dir_name: str = "my-skill") -> Path: + skill_dir = tmp_path / dir_name + skill_dir.mkdir(parents=True, exist_ok=True) + skill_md = skill_dir / "SKILL.md" + skill_md.write_text(content, encoding="utf-8") + return skill_md + return _create + + +class TestParseSkillMd: + def test_happy_path(self, tmp_skill): + content = """--- +name: my-skill +description: A test skill for unit testing. +license: MIT +--- + +## Instructions + +Do the thing. +""" + result = parse_skill_md(tmp_skill(content), source_scope="project") + assert result is not None + assert result.name == "my-skill" + assert result.description == "A test skill for unit testing." + assert result.license == "MIT" + assert result.source_scope == "project" + assert "Do the thing." in result.body + + def test_missing_description_returns_none(self, tmp_skill): + content = """--- +name: no-desc +--- + +Body here. +""" + result = parse_skill_md(tmp_skill(content, "no-desc")) + assert result is None + + def test_missing_name_uses_directory(self, tmp_skill): + content = """--- +description: Skill without a name field. +--- + +Body. +""" + result = parse_skill_md(tmp_skill(content, "fallback-dir")) + assert result is not None + assert result.name == "fallback-dir" + + def test_empty_file_returns_none(self, tmp_skill): + result = parse_skill_md(tmp_skill("", "empty")) + assert result is None + + def test_no_frontmatter_delimiters_returns_none(self, tmp_skill): + content = "Just plain text without YAML frontmatter." + result = parse_skill_md(tmp_skill(content, "no-yaml")) + assert result is None + + def test_unparseable_yaml_returns_none(self, tmp_skill): + content = """--- +name: [invalid yaml + - broken: {{ +--- + +Body. +""" + result = parse_skill_md(tmp_skill(content, "bad-yaml")) + assert result is None + + def test_unquoted_colon_fixup(self, tmp_skill): + content = """--- +name: colon-test +description: Use for: research tasks +--- + +Body. +""" + result = parse_skill_md(tmp_skill(content, "colon-test")) + assert result is not None + assert "research tasks" in result.description + + def test_long_name_warns_but_loads(self, tmp_skill): + long_name = "a" * 100 + content = f"""--- +name: {long_name} +description: A skill with an excessively long name. +--- + +Body. +""" + result = parse_skill_md(tmp_skill(content, "long-name")) + assert result is not None + assert result.name == long_name + + def test_name_mismatch_warns_but_loads(self, tmp_skill): + content = """--- +name: different-name +description: Name doesn't match directory. +--- + +Body. +""" + result = parse_skill_md(tmp_skill(content, "actual-dir")) + assert result is not None + assert result.name == "different-name" + + def test_optional_fields(self, tmp_skill): + content = """--- +name: full-skill +description: Skill with all optional fields. +license: Apache-2.0 +compatibility: + - claude-code + - cursor +metadata: + author: tester + version: "1.0" +allowed-tools: + - web_search + - read_file +--- + +Instructions here. +""" + result = parse_skill_md(tmp_skill(content, "full-skill")) + assert result is not None + assert result.license == "Apache-2.0" + assert result.compatibility == ["claude-code", "cursor"] + assert result.metadata == {"author": "tester", "version": "1.0"} + assert result.allowed_tools == ["web_search", "read_file"] + + def test_body_extraction(self, tmp_skill): + content = """--- +name: body-test +description: Test body extraction. +--- + +## Step 1 + +Do this first. + +## Step 2 + +Then do this. +""" + result = parse_skill_md(tmp_skill(content, "body-test")) + assert result is not None + assert "## Step 1" in result.body + assert "## Step 2" in result.body + assert "Do this first." in result.body + + def test_location_is_absolute(self, tmp_skill): + content = """--- +name: abs-path +description: Check absolute path. +--- + +Body. +""" + path = tmp_skill(content, "abs-path") + result = parse_skill_md(path) + assert result is not None + assert Path(result.location).is_absolute() + assert Path(result.base_dir).is_absolute() + + def test_nonexistent_file_returns_none(self, tmp_path): + result = parse_skill_md(tmp_path / "nonexistent" / "SKILL.md") + assert result is None diff --git a/tools/src/aden_tools/credentials/slack.py b/tools/src/aden_tools/credentials/slack.py index 82864e06..5347ead5 100644 --- a/tools/src/aden_tools/credentials/slack.py +++ b/tools/src/aden_tools/credentials/slack.py @@ -67,7 +67,7 @@ SLACK_CREDENTIALS = { help_url="https://api.slack.com/apps", description="Slack Bot Token (starts with xoxb-)", # Auth method support - aden_supported=True, + aden_supported=False, aden_provider_name="slack", direct_api_key_supported=True, api_key_instructions="""To get a Slack Bot Token: From f2ddd1051d958946aa4492c54283495713b201df Mon Sep 17 00:00:00 2001 From: Waryjustice Date: Sat, 14 Mar 2026 18:52:25 +0530 Subject: [PATCH 06/45] fix: make state.json progress writes atomic Use atomic_write for GraphExecutor._write_progress and log persistence failures instead of silently swallowing exceptions. Add regression tests for atomic write usage and warning logs on write failure. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- core/framework/graph/executor.py | 12 ++++-- core/tests/test_graph_executor.py | 70 +++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 3 deletions(-) diff --git a/core/framework/graph/executor.py b/core/framework/graph/executor.py index 04d2a14a..9c0f9118 100644 --- a/core/framework/graph/executor.py +++ b/core/framework/graph/executor.py @@ -32,6 +32,7 @@ from framework.observability import set_trace_context from framework.runtime.core import Runtime from framework.schemas.checkpoint import Checkpoint from framework.storage.checkpoint_store import CheckpointStore +from framework.utils.io import atomic_write logger = logging.getLogger(__name__) @@ -226,11 +227,11 @@ class GraphExecutor: """ if not self._storage_path: return + state_path = self._storage_path / "state.json" try: import json as _json from datetime import datetime - state_path = self._storage_path / "state.json" if state_path.exists(): state_data = _json.loads(state_path.read_text(encoding="utf-8")) else: @@ -253,9 +254,14 @@ class GraphExecutor: state_data["memory"] = memory_snapshot state_data["memory_keys"] = list(memory_snapshot.keys()) - state_path.write_text(_json.dumps(state_data, indent=2), encoding="utf-8") + with atomic_write(state_path, encoding="utf-8") as f: + _json.dump(state_data, f, indent=2) except Exception: - pass # Best-effort — never block execution + logger.warning( + "Failed to persist progress state to %s", + state_path, + exc_info=True, + ) def _validate_tools(self, graph: GraphSpec) -> list[str]: """ diff --git a/core/tests/test_graph_executor.py b/core/tests/test_graph_executor.py index 3d30d36b..5eb0ad3d 100644 --- a/core/tests/test_graph_executor.py +++ b/core/tests/test_graph_executor.py @@ -3,12 +3,16 @@ Tests for core GraphExecutor execution paths. Focused on minimal success and failure scenarios. """ +import json +import logging + import pytest from framework.graph.edge import GraphSpec from framework.graph.executor import GraphExecutor from framework.graph.goal import Goal from framework.graph.node import NodeResult, NodeSpec +from framework.utils.io import atomic_write # ---- Dummy runtime (no real logging) ---- @@ -25,6 +29,14 @@ class DummyRuntime: pass +class DummyMemory: + def __init__(self, data): + self._data = data + + def read_all(self): + return self._data + + # ---- Fake node that always succeeds ---- class SuccessNode: def validate_input(self, ctx): @@ -245,3 +257,61 @@ async def test_executor_no_events_without_event_bus(): result = await executor.execute(graph=graph, goal=goal) assert result.success is True + + +def test_write_progress_uses_atomic_write_and_updates_state(tmp_path, monkeypatch): + runtime = DummyRuntime() + executor = GraphExecutor(runtime=runtime, storage_path=tmp_path) + state_path = tmp_path / "state.json" + state_path.write_text(json.dumps({"entry_point": "primary"}), encoding="utf-8") + memory = DummyMemory({"foo": "bar"}) + + called = {} + + def recording_atomic_write(path, *args, **kwargs): + called["path"] = path + return atomic_write(path, *args, **kwargs) + + monkeypatch.setattr("framework.graph.executor.atomic_write", recording_atomic_write) + + executor._write_progress( + current_node="node-b", + path=["node-a", "node-b"], + memory=memory, + node_visit_counts={"node-a": 1, "node-b": 1}, + ) + + state = json.loads(state_path.read_text(encoding="utf-8")) + assert called["path"] == state_path + assert state["entry_point"] == "primary" + assert state["progress"]["current_node"] == "node-b" + assert state["progress"]["path"] == ["node-a", "node-b"] + assert state["progress"]["node_visit_counts"] == {"node-a": 1, "node-b": 1} + assert state["progress"]["steps_executed"] == 2 + assert state["memory"] == {"foo": "bar"} + assert state["memory_keys"] == ["foo"] + assert "updated_at" in state["timestamps"] + + +def test_write_progress_logs_warning_on_atomic_write_failure(tmp_path, monkeypatch, caplog): + runtime = DummyRuntime() + executor = GraphExecutor(runtime=runtime, storage_path=tmp_path) + state_path = tmp_path / "state.json" + state_path.write_text(json.dumps({"entry_point": "primary"}), encoding="utf-8") + memory = DummyMemory({"foo": "bar"}) + + def failing_atomic_write(*args, **kwargs): + raise OSError("disk full") + + monkeypatch.setattr("framework.graph.executor.atomic_write", failing_atomic_write) + + with caplog.at_level(logging.WARNING): + executor._write_progress( + current_node="node-b", + path=["node-a", "node-b"], + memory=memory, + node_visit_counts={"node-a": 1, "node-b": 1}, + ) + + assert "Failed to persist progress state to" in caplog.text + assert str(state_path) in caplog.text From 5e9f74744a7d1c9e5c740588eaeb04f24b499fc5 Mon Sep 17 00:00:00 2001 From: Timothy Date: Mon, 16 Mar 2026 10:14:05 -0700 Subject: [PATCH 07/45] fix: google sheet tools account param --- .../tools/google_sheets_tool/google_sheets_tool.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/src/aden_tools/tools/google_sheets_tool/google_sheets_tool.py b/tools/src/aden_tools/tools/google_sheets_tool/google_sheets_tool.py index 4799ce4e..4435b5db 100644 --- a/tools/src/aden_tools/tools/google_sheets_tool/google_sheets_tool.py +++ b/tools/src/aden_tools/tools/google_sheets_tool/google_sheets_tool.py @@ -296,6 +296,7 @@ def register_tools( include_grid_data: bool = False, # Tracking parameters (injected by framework, ignored by tool) workspace_id: str | None = None, + account: str | None = None, agent_id: str | None = None, session_id: str | None = None, ) -> dict: @@ -325,6 +326,7 @@ def register_tools( sheet_titles: list[str] | None = None, # Tracking parameters (injected by framework, ignored by tool) workspace_id: str | None = None, + account: str | None = None, agent_id: str | None = None, session_id: str | None = None, ) -> dict: @@ -357,6 +359,7 @@ def register_tools( value_render_option: str = "FORMATTED_VALUE", # Tracking parameters (injected by framework, ignored by tool) workspace_id: str | None = None, + account: str | None = None, agent_id: str | None = None, session_id: str | None = None, ) -> dict: @@ -392,6 +395,7 @@ def register_tools( value_input_option: str = "USER_ENTERED", # Tracking parameters (injected by framework, ignored by tool) workspace_id: str | None = None, + account: str | None = None, agent_id: str | None = None, session_id: str | None = None, ) -> dict: @@ -426,6 +430,7 @@ def register_tools( value_input_option: str = "USER_ENTERED", # Tracking parameters (injected by framework, ignored by tool) workspace_id: str | None = None, + account: str | None = None, agent_id: str | None = None, session_id: str | None = None, ) -> dict: @@ -458,6 +463,7 @@ def register_tools( range_name: str, # Tracking parameters (injected by framework, ignored by tool) workspace_id: str | None = None, + account: str | None = None, agent_id: str | None = None, session_id: str | None = None, ) -> dict: @@ -490,6 +496,7 @@ def register_tools( value_input_option: str = "USER_ENTERED", # Tracking parameters (injected by framework, ignored by tool) workspace_id: str | None = None, + account: str | None = None, agent_id: str | None = None, session_id: str | None = None, ) -> dict: @@ -521,6 +528,7 @@ def register_tools( ranges: list[str], # Tracking parameters (injected by framework, ignored by tool) workspace_id: str | None = None, + account: str | None = None, agent_id: str | None = None, session_id: str | None = None, ) -> dict: @@ -554,6 +562,7 @@ def register_tools( column_count: int = 26, # Tracking parameters (injected by framework, ignored by tool) workspace_id: str | None = None, + account: str | None = None, agent_id: str | None = None, session_id: str | None = None, ) -> dict: @@ -585,6 +594,7 @@ def register_tools( sheet_id: int, # Tracking parameters (injected by framework, ignored by tool) workspace_id: str | None = None, + account: str | None = None, agent_id: str | None = None, session_id: str | None = None, ) -> dict: From 19f7ae862e9b2c6a6628b01426a6c288b2827f74 Mon Sep 17 00:00:00 2001 From: Timothy Date: Mon, 16 Mar 2026 10:14:33 -0700 Subject: [PATCH 08/45] fix: skill loading log --- core/framework/graph/event_loop_node.py | 8 ++++++++ core/framework/runner/runner.py | 11 ++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/core/framework/graph/event_loop_node.py b/core/framework/graph/event_loop_node.py index 08a37a3e..f6b752c6 100644 --- a/core/framework/graph/event_loop_node.py +++ b/core/framework/graph/event_loop_node.py @@ -499,8 +499,16 @@ class EventLoopNode(NodeProtocol): # Append skill catalog and operational protocols if ctx.skills_catalog_prompt: system_prompt = f"{system_prompt}\n\n{ctx.skills_catalog_prompt}" + logger.info( + "[%s] Injected skills catalog (%d chars)", + node_id, len(ctx.skills_catalog_prompt), + ) if ctx.protocols_prompt: system_prompt = f"{system_prompt}\n\n{ctx.protocols_prompt}" + logger.info( + "[%s] Injected operational protocols (%d chars)", + node_id, len(ctx.protocols_prompt), + ) # Inject agent working memory (adapt.md). # If it doesn't exist yet, seed it with available context. diff --git a/core/framework/runner/runner.py b/core/framework/runner/runner.py index b8792c17..aeca0625 100644 --- a/core/framework/runner/runner.py +++ b/core/framework/runner/runner.py @@ -1372,7 +1372,16 @@ class AgentRunner: default_mgr.log_active_skills() protocols_prompt = default_mgr.build_protocols_prompt() except Exception: - logger.debug("Skill system init failed (non-fatal)", exc_info=True) + logger.warning("Skill system init failed (non-fatal)", exc_info=True) + + if protocols_prompt: + logger.info( + "Skill system ready: protocols=%d chars, catalog=%d chars", + len(protocols_prompt), + len(skills_catalog_prompt), + ) + else: + logger.warning("Skill system produced empty protocols_prompt") self._setup_agent_runtime( tools, From c2e560fc07355ccaecbdd70a7687ca9b37b2f755 Mon Sep 17 00:00:00 2001 From: Timothy Date: Mon, 16 Mar 2026 10:30:05 -0700 Subject: [PATCH 09/45] fix: queen message display --- core/frontend/src/pages/workspace.tsx | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/core/frontend/src/pages/workspace.tsx b/core/frontend/src/pages/workspace.tsx index 0516a16f..635e4bcc 100644 --- a/core/frontend/src/pages/workspace.tsx +++ b/core/frontend/src/pages/workspace.tsx @@ -1584,12 +1584,15 @@ export default function Workspace() { const chatMsg = sseEventToChatMessage(event, agentType, displayName, currentTurn); if (isQueen) console.log('[QUEEN] chatMsg:', chatMsg?.id, chatMsg?.content?.slice(0, 50), 'turn:', currentTurn); if (chatMsg && !suppressQueenMessages) { - // Queen may emit multiple client_output_delta / llm_text_delta snapshots - // for a single execution as it iterates internally. Use a stable ID so - // those snapshots collapse into a single bubble instead of rendering as - // multiple independent replies to the same user message. + // Queen emits multiple client_output_delta / llm_text_delta snapshots + // across iterations and inner tool-loop turns. Build a stable ID that + // groups streaming deltas for the *same* output (same execution + + // iteration + inner_turn) into one bubble, while keeping distinct + // outputs as separate bubbles so earlier text isn't overwritten. if (isQueen && (event.type === "client_output_delta" || event.type === "llm_text_delta") && event.execution_id) { - chatMsg.id = `queen-stream-${event.execution_id}`; + const iter = event.data?.iteration ?? 0; + const inner = event.data?.inner_turn ?? 0; + chatMsg.id = `queen-stream-${event.execution_id}-${iter}-${inner}`; } if (isQueen) { chatMsg.role = role; From 2ced873fb5c17b496d9321475cc8b4a1d3626e6c Mon Sep 17 00:00:00 2001 From: bryan Date: Mon, 16 Mar 2026 12:12:17 -0700 Subject: [PATCH 10/45] refactor: extract flowchart utils into dedicated module with fallback generation --- core/framework/runner/runner.py | 7 + core/framework/tools/flowchart_utils.py | 379 ++++++++++++++++ core/framework/tools/queen_lifecycle_tools.py | 412 ++---------------- 3 files changed, 412 insertions(+), 386 deletions(-) create mode 100644 core/framework/tools/flowchart_utils.py diff --git a/core/framework/runner/runner.py b/core/framework/runner/runner.py index 6a5cc2d4..3ac3cc1d 100644 --- a/core/framework/runner/runner.py +++ b/core/framework/runner/runner.py @@ -28,6 +28,7 @@ from framework.runner.tool_registry import ToolRegistry from framework.runtime.agent_runtime import AgentRuntime, AgentRuntimeConfig, create_agent_runtime from framework.runtime.execution_stream import EntryPointSpec from framework.runtime.runtime_log_store import RuntimeLogStore +from framework.tools.flowchart_utils import generate_fallback_flowchart if TYPE_CHECKING: from framework.runner.protocol import AgentMessage, CapabilityResponse @@ -959,6 +960,9 @@ class AgentRunner: graph = GraphSpec(**graph_kwargs) + # Generate flowchart.json if missing (for template/legacy agents) + generate_fallback_flowchart(graph, goal, agent_path) + # Read runtime config (webhook settings, etc.) if defined agent_runtime_config = getattr(agent_module, "runtime_config", None) @@ -1003,6 +1007,9 @@ class AgentRunner: except json.JSONDecodeError as exc: raise ValueError(f"Invalid JSON in agent export file: {agent_json_path}") from exc + # Generate flowchart.json if missing (for legacy JSON-based agents) + generate_fallback_flowchart(graph, goal, agent_path) + return cls( agent_path=agent_path, graph=graph, diff --git a/core/framework/tools/flowchart_utils.py b/core/framework/tools/flowchart_utils.py new file mode 100644 index 00000000..3ff52bd7 --- /dev/null +++ b/core/framework/tools/flowchart_utils.py @@ -0,0 +1,379 @@ +"""Flowchart utilities for generating and persisting flowchart.json files. + +Extracted from queen_lifecycle_tools so that non-Queen code paths +(e.g., AgentRunner.load) can generate flowcharts for legacy agents +that lack a flowchart.json. +""" + +from __future__ import annotations + +import json +import logging +from pathlib import Path +from typing import Any + +logger = logging.getLogger(__name__) + +FLOWCHART_FILENAME = "flowchart.json" + +# ── Flowchart type catalogue (9 types) ─────────────────────────────────────── +_FLOWCHART_TYPES = { + "start": {"shape": "stadium", "color": "#3fa66a"}, # sage green + "terminal": {"shape": "stadium", "color": "#a04444"}, # dusty red + "process": {"shape": "rectangle", "color": "#616d83"}, # blue-gray + "decision": {"shape": "diamond", "color": "#d89d26"}, # warm amber + "io": {"shape": "parallelogram", "color": "#7a4fa5"}, # dusty purple + "document": {"shape": "document", "color": "#507485"}, # steel blue + "database": {"shape": "cylinder", "color": "#459077"}, # muted teal + "subprocess": {"shape": "subroutine", "color": "#4c7f7f"}, # dark cyan + "browser": {"shape": "hexagon", "color": "#3a4a9b"}, # deep blue +} + +# Backward-compat remap: old type names → canonical type +_FLOWCHART_REMAP: dict[str, str] = { + "delay": "process", + "manual_operation": "process", + "preparation": "process", + "merge": "process", + "alternate_process": "process", + "connector": "process", + "offpage_connector": "process", + "extract": "process", + "sort": "process", + "collate": "process", + "summing_junction": "process", + "or": "process", + "comment": "process", + "display": "io", + "manual_input": "io", + "multi_document": "document", + "stored_data": "database", + "internal_storage": "database", +} + + +# ── File persistence ───────────────────────────────────────────────────────── + + +def save_flowchart_file( + agent_path: Path | str | None, + original_draft: dict, + flowchart_map: dict[str, list[str]] | None, +) -> None: + """Persist the flowchart to the agent's folder.""" + if agent_path is None: + return + p = Path(agent_path) + if not p.is_dir(): + return + try: + target = p / FLOWCHART_FILENAME + target.write_text( + json.dumps( + {"original_draft": original_draft, "flowchart_map": flowchart_map}, + indent=2, + ), + encoding="utf-8", + ) + logger.debug("Flowchart saved to %s", target) + except Exception: + logger.warning("Failed to save flowchart to %s", p, exc_info=True) + + +def load_flowchart_file( + agent_path: Path | str | None, +) -> tuple[dict | None, dict[str, list[str]] | None]: + """Load flowchart from the agent's folder. Returns (original_draft, flowchart_map).""" + if agent_path is None: + return None, None + target = Path(agent_path) / FLOWCHART_FILENAME + if not target.is_file(): + return None, None + try: + data = json.loads(target.read_text(encoding="utf-8")) + return data.get("original_draft"), data.get("flowchart_map") + except Exception: + logger.warning("Failed to load flowchart from %s", target, exc_info=True) + return None, None + + +# ── Node classification ────────────────────────────────────────────────────── + + +def classify_flowchart_node( + node: dict, + index: int, + total: int, + edges: list[dict], + terminal_ids: set[str], +) -> str: + """Auto-detect the ISO 5807 flowchart type for a draft node. + + Priority: explicit override > structural detection > heuristic > default. + """ + # Explicit override from the queen + explicit = node.get("flowchart_type", "").strip() + if explicit and explicit in _FLOWCHART_TYPES: + return explicit + if explicit and explicit in _FLOWCHART_REMAP: + return _FLOWCHART_REMAP[explicit] + + node_id = node["id"] + node_type = node.get("node_type", "event_loop") + node_tools = set(node.get("tools") or []) + desc = (node.get("description") or "").lower() + name = (node.get("name") or "").lower() + + # GCU / browser automation nodes → hexagon + if node_type == "gcu": + return "browser" + + # Entry node (first node or no incoming edges) → start terminator + incoming = {e["target"] for e in edges} + if index == 0 or (node_id not in incoming and index == 0): + return "start" + + # Terminal node → end terminator + if node_id in terminal_ids: + return "terminal" + + # Decision node: has outgoing edges with branching conditions → diamond + outgoing = [e for e in edges if e["source"] == node_id] + if len(outgoing) >= 2: + conditions = {e.get("condition", "on_success") for e in outgoing} + if len(conditions) > 1 or conditions - {"on_success"}: + return "decision" + + # Sub-agent / subprocess nodes → subroutine (double-bordered rect) + if node.get("sub_agents"): + return "subprocess" + + # Database / data store nodes → cylinder + db_tool_hints = { + "query_database", + "sql_query", + "read_table", + "write_table", + "save_data", + "load_data", + } + db_desc_hints = {"database", "data store", "storage", "persist", "cache"} + if node_tools & db_tool_hints or any(h in desc for h in db_desc_hints): + return "database" + + # Document generation nodes → document shape + doc_tool_hints = { + "generate_report", + "create_document", + "write_report", + "render_template", + "export_pdf", + } + doc_desc_hints = {"report", "document", "summary", "write up", "writeup"} + if node_tools & doc_tool_hints or any(h in desc for h in doc_desc_hints): + return "document" + + # I/O nodes: external data ingestion or delivery → parallelogram + io_tool_hints = { + "serve_file_to_user", + "send_email", + "post_message", + "upload_file", + "download_file", + "fetch_url", + "post_to_slack", + "send_notification", + "display_results", + } + io_desc_hints = {"deliver", "send", "output", "notify", "publish"} + if node_tools & io_tool_hints or any(h in desc for h in io_desc_hints): + return "io" + + # Default: process (rectangle) + return "process" + + +# ── Draft synthesis from runtime graph ─────────────────────────────────────── + + +def synthesize_draft_from_runtime( + runtime_nodes: list, + runtime_edges: list, + agent_name: str = "", + goal_name: str = "", +) -> tuple[dict, dict[str, list[str]]]: + """Generate a flowchart draft from a loaded runtime graph. + + Used for agents that were never planned through the draft workflow + (e.g., hand-coded or loaded from "my agents"). Produces a valid + DraftGraph structure with auto-classified flowchart types. + """ + nodes: list[dict] = [] + edges: list[dict] = [] + node_ids = {n.id for n in runtime_nodes} + + # Build edge dicts first (needed for classification) + for i, re in enumerate(runtime_edges): + edges.append( + { + "id": f"edge-{i}", + "source": re.source, + "target": re.target, + "condition": str(re.condition.value) + if hasattr(re.condition, "value") + else str(re.condition), + "description": getattr(re, "description", "") or "", + "label": "", + } + ) + + # Terminal detection — exclude sub-agent nodes (they are leaf helpers, not endpoints) + sub_agent_ids: set[str] = set() + for rn in runtime_nodes: + for sa_id in getattr(rn, "sub_agents", None) or []: + sub_agent_ids.add(sa_id) + sources = {e["source"] for e in edges} + terminal_ids = node_ids - sources - sub_agent_ids + if not terminal_ids and runtime_nodes: + terminal_ids = {runtime_nodes[-1].id} + + # Build node dicts with classification + total = len(runtime_nodes) + for i, rn in enumerate(runtime_nodes): + node: dict = { + "id": rn.id, + "name": rn.name, + "description": rn.description or "", + "node_type": getattr(rn, "node_type", "event_loop") or "event_loop", + "tools": list(rn.tools) if rn.tools else [], + "input_keys": list(rn.input_keys) if rn.input_keys else [], + "output_keys": list(rn.output_keys) if rn.output_keys else [], + "success_criteria": getattr(rn, "success_criteria", "") or "", + "sub_agents": list(rn.sub_agents) if getattr(rn, "sub_agents", None) else [], + } + fc_type = classify_flowchart_node(node, i, total, edges, terminal_ids) + fc_meta = _FLOWCHART_TYPES[fc_type] + node["flowchart_type"] = fc_type + node["flowchart_shape"] = fc_meta["shape"] + node["flowchart_color"] = fc_meta["color"] + nodes.append(node) + + # Add visual edges from parent nodes to their sub_agents. + # Sub-agents are connected via the sub_agents field, not via EdgeSpec, + # so they'd appear as disconnected islands without this. + # Two edges per sub-agent: delegate (parent→sub) and report (sub→parent). + edge_counter = len(edges) + for node in nodes: + for sa_id in node.get("sub_agents") or []: + if sa_id in node_ids: + edges.append( + { + "id": f"edge-subagent-{edge_counter}", + "source": node["id"], + "target": sa_id, + "condition": "always", + "description": "sub-agent delegation", + "label": "delegate", + } + ) + edge_counter += 1 + edges.append( + { + "id": f"edge-subagent-{edge_counter}", + "source": sa_id, + "target": node["id"], + "condition": "always", + "description": "sub-agent report back", + "label": "report", + } + ) + edge_counter += 1 + + # Group sub-agent nodes under their parent in the flowchart map + # (mirrors what _dissolve_planning_nodes does for planned drafts) + sub_agent_ids_final: set[str] = set() + for node in nodes: + for sa_id in node.get("sub_agents") or []: + if sa_id in node_ids: + sub_agent_ids_final.add(sa_id) + + fmap: dict[str, list[str]] = {} + for node in nodes: + nid = node["id"] + if nid in sub_agent_ids_final: + continue # skip — will be included via parent + absorbed = [nid] + for sa_id in node.get("sub_agents") or []: + if sa_id in node_ids: + absorbed.append(sa_id) + fmap[nid] = absorbed + + draft = { + "agent_name": agent_name, + "goal": goal_name, + "description": "", + "success_criteria": [], + "constraints": [], + "nodes": nodes, + "edges": edges, + "entry_node": nodes[0]["id"] if nodes else "", + "terminal_nodes": sorted(terminal_ids), + "flowchart_legend": { + fc_type: {"shape": meta["shape"], "color": meta["color"]} + for fc_type, meta in _FLOWCHART_TYPES.items() + }, + } + + return draft, fmap + + +# ── Fallback generation entry point ────────────────────────────────────────── + + +def generate_fallback_flowchart( + graph: Any, + goal: Any, + agent_path: Path, +) -> None: + """Generate flowchart.json from a runtime GraphSpec if none exists. + + This is a no-op if flowchart.json already exists. On failure, logs a + warning but never raises — agent loading must not be blocked by + flowchart generation. + """ + try: + existing_draft, _ = load_flowchart_file(agent_path) + if existing_draft is not None: + return # already have one + + draft, fmap = synthesize_draft_from_runtime( + runtime_nodes=list(graph.nodes), + runtime_edges=list(graph.edges), + agent_name=agent_path.name, + goal_name=goal.name if goal else "", + ) + + # Enrich with Goal metadata + if goal: + draft["goal"] = goal.description or goal.name or "" + draft["success_criteria"] = [ + sc.description for sc in (goal.success_criteria or []) + ] + draft["constraints"] = [ + c.description for c in (goal.constraints or []) + ] + + # Use entry_node/terminal_nodes from GraphSpec if available + if graph.entry_node: + draft["entry_node"] = graph.entry_node + if graph.terminal_nodes: + draft["terminal_nodes"] = list(graph.terminal_nodes) + + save_flowchart_file(agent_path, draft, fmap) + logger.info("Generated fallback flowchart.json for %s", agent_path.name) + except Exception: + logger.warning( + "Failed to generate fallback flowchart for %s", + agent_path, + exc_info=True, + ) diff --git a/core/framework/tools/queen_lifecycle_tools.py b/core/framework/tools/queen_lifecycle_tools.py index ed88dd66..6a596c7f 100644 --- a/core/framework/tools/queen_lifecycle_tools.py +++ b/core/framework/tools/queen_lifecycle_tools.py @@ -45,6 +45,14 @@ from framework.credentials.models import CredentialError from framework.runner.preload_validation import credential_errors_to_json, validate_credentials from framework.runtime.event_bus import AgentEvent, EventType from framework.server.app import validate_agent_path +from framework.tools.flowchart_utils import ( + FLOWCHART_FILENAME, + _FLOWCHART_TYPES, + classify_flowchart_node, + load_flowchart_file, + save_flowchart_file, + synthesize_draft_from_runtime, +) if TYPE_CHECKING: from framework.runner.tool_registry import ToolRegistry @@ -285,66 +293,8 @@ def build_worker_profile(runtime: AgentRuntime, agent_path: Path | str | None = return "\n".join(lines) -_FLOWCHART_TYPES = { - # ── Core symbols (ISO 5807 §4) ────────────────────────── - # Terminator — rounded rectangle (stadium shape) - "start": {"shape": "stadium", "color": "#4CAF50"}, # green - "terminal": {"shape": "stadium", "color": "#F44336"}, # red - # Process — rectangle - "process": {"shape": "rectangle", "color": "#2196F3"}, # blue - # Decision — diamond - "decision": {"shape": "diamond", "color": "#FF9800"}, # amber - # Data (Input/Output) — parallelogram - "io": {"shape": "parallelogram", "color": "#9C27B0"}, # purple - # Document — rectangle with wavy bottom - "document": {"shape": "document", "color": "#607D8B"}, # blue-grey - # Multi-document — stacked documents - "multi_document": {"shape": "multi_document", "color": "#78909C"}, # blue-grey light - # Predefined process / subroutine — rectangle with double vertical bars - "subprocess": {"shape": "subroutine", "color": "#009688"}, # teal - # Preparation — hexagon - "preparation": {"shape": "hexagon", "color": "#795548"}, # brown - # Manual input — trapezoid with slanted top - "manual_input": {"shape": "manual_input", "color": "#E91E63"}, # pink - # Manual operation — inverted trapezoid - "manual_operation": {"shape": "trapezoid", "color": "#AD1457"}, # dark pink - # Delay — half-rounded rectangle (D-shape) - "delay": {"shape": "delay", "color": "#FF5722"}, # deep orange - # Display — rounded rectangle with pointed left - "display": {"shape": "display", "color": "#00BCD4"}, # cyan - # ── Data storage symbols ──────────────────────────────── - # Database / direct access storage — cylinder - "database": {"shape": "cylinder", "color": "#8BC34A"}, # light green - # Stored data — generic data store - "stored_data": {"shape": "stored_data", "color": "#CDDC39"}, # lime - # Internal storage — rectangle with cross-hatch - "internal_storage": {"shape": "internal_storage", "color": "#FFC107"}, # amber light - # ── Connectors ────────────────────────────────────────── - # On-page connector — small circle - "connector": {"shape": "circle", "color": "#9E9E9E"}, # grey - # Off-page connector — pentagon / home-plate - "offpage_connector": {"shape": "pentagon", "color": "#757575"}, # dark grey - # ── Flow operations ───────────────────────────────────── - # Merge — inverted triangle - "merge": {"shape": "triangle_inv", "color": "#3F51B5"}, # indigo - # Extract — upward triangle - "extract": {"shape": "triangle", "color": "#5C6BC0"}, # indigo light - # Sort — hourglass / double triangle - "sort": {"shape": "hourglass", "color": "#7986CB"}, # indigo lighter - # Collate — merged hourglass - "collate": {"shape": "hourglass_inv", "color": "#9FA8DA"}, # indigo lightest - # Summing junction — circle with cross - "summing_junction": {"shape": "circle_cross", "color": "#F06292"}, # pink light - # Or — circle with horizontal bar - "or": {"shape": "circle_bar", "color": "#CE93D8"}, # purple light - # ── Domain-specific (Hive agent context) ──────────────── - # Browser automation (GCU) — mapped to preparation/hexagon - "browser": {"shape": "hexagon", "color": "#1A237E"}, # dark indigo - # Comment / annotation — flag shape - "comment": {"shape": "flag", "color": "#BDBDBD"}, # light grey - # Alternate process — rounded rectangle - "alternate_process": {"shape": "rounded_rect", "color": "#42A5F5"}, # light blue -} + +# _FLOWCHART_TYPES is imported from framework.tools.flowchart_utils def _read_agent_triggers_json(agent_path: Path) -> list[dict]: @@ -1147,309 +1097,20 @@ def register_queen_lifecycle_tools( registry.register("replan_agent", _replan_tool, lambda inputs: replan_agent()) tools_registered += 1 - # --- Flowchart file persistence ------------------------------------------- - # The flowchart is saved as flowchart.json in the agent's folder so it - # survives restarts and is available when loading any agent. - - FLOWCHART_FILENAME = "flowchart.json" - - def _save_flowchart_file( - agent_path: Path | str | None, - original_draft: dict, - flowchart_map: dict[str, list[str]] | None, - ) -> None: - """Persist the flowchart to the agent's folder.""" - if agent_path is None: - return - p = Path(agent_path) - if not p.is_dir(): - return - try: - target = p / FLOWCHART_FILENAME - target.write_text( - json.dumps( - {"original_draft": original_draft, "flowchart_map": flowchart_map}, - indent=2, - ), - encoding="utf-8", - ) - logger.debug("Flowchart saved to %s", target) - except Exception: - logger.warning("Failed to save flowchart to %s", p, exc_info=True) - - def _load_flowchart_file( - agent_path: Path | str | None, - ) -> tuple[dict | None, dict[str, list[str]] | None]: - """Load flowchart from the agent's folder. Returns (original_draft, flowchart_map).""" - if agent_path is None: - return None, None - target = Path(agent_path) / FLOWCHART_FILENAME - if not target.is_file(): - return None, None - try: - data = json.loads(target.read_text(encoding="utf-8")) - return data.get("original_draft"), data.get("flowchart_map") - except Exception: - logger.warning("Failed to load flowchart from %s", target, exc_info=True) - return None, None - - def _synthesize_draft_from_runtime( - runtime_nodes: list, - runtime_edges: list, - agent_name: str = "", - goal_name: str = "", - ) -> tuple[dict, dict[str, list[str]]]: - """Generate a flowchart draft from a loaded runtime graph. - - Used for agents that were never planned through the draft workflow - (e.g., hand-coded or loaded from "my agents"). Produces a valid - DraftGraph structure with auto-classified flowchart types. - """ - nodes: list[dict] = [] - edges: list[dict] = [] - node_ids = {n.id for n in runtime_nodes} - - # Build edge dicts first (needed for classification) - for i, re in enumerate(runtime_edges): - edges.append( - { - "id": f"edge-{i}", - "source": re.source, - "target": re.target, - "condition": str(re.condition.value) - if hasattr(re.condition, "value") - else str(re.condition), - "description": getattr(re, "description", "") or "", - "label": "", - } - ) - - # Terminal detection — exclude sub-agent nodes (they are leaf helpers, not endpoints) - sub_agent_ids: set[str] = set() - for rn in runtime_nodes: - for sa_id in getattr(rn, "sub_agents", None) or []: - sub_agent_ids.add(sa_id) - sources = {e["source"] for e in edges} - terminal_ids = node_ids - sources - sub_agent_ids - if not terminal_ids and runtime_nodes: - terminal_ids = {runtime_nodes[-1].id} - - # Build node dicts with classification - total = len(runtime_nodes) - for i, rn in enumerate(runtime_nodes): - node: dict = { - "id": rn.id, - "name": rn.name, - "description": rn.description or "", - "node_type": getattr(rn, "node_type", "event_loop") or "event_loop", - "tools": list(rn.tools) if rn.tools else [], - "input_keys": list(rn.input_keys) if rn.input_keys else [], - "output_keys": list(rn.output_keys) if rn.output_keys else [], - "success_criteria": getattr(rn, "success_criteria", "") or "", - "sub_agents": list(rn.sub_agents) if getattr(rn, "sub_agents", None) else [], - } - fc_type = _classify_flowchart_node(node, i, total, edges, terminal_ids) - fc_meta = _FLOWCHART_TYPES[fc_type] - node["flowchart_type"] = fc_type - node["flowchart_shape"] = fc_meta["shape"] - node["flowchart_color"] = fc_meta["color"] - nodes.append(node) - - # Add visual edges from parent nodes to their sub_agents. - # Sub-agents are connected via the sub_agents field, not via EdgeSpec, - # so they'd appear as disconnected islands without this. - # Two edges per sub-agent: delegate (parent→sub) and report (sub→parent). - edge_counter = len(edges) - for node in nodes: - for sa_id in node.get("sub_agents") or []: - if sa_id in node_ids: - edges.append( - { - "id": f"edge-subagent-{edge_counter}", - "source": node["id"], - "target": sa_id, - "condition": "always", - "description": "sub-agent delegation", - "label": "delegate", - } - ) - edge_counter += 1 - edges.append( - { - "id": f"edge-subagent-{edge_counter}", - "source": sa_id, - "target": node["id"], - "condition": "always", - "description": "sub-agent report back", - "label": "report", - } - ) - edge_counter += 1 - - # Group sub-agent nodes under their parent in the flowchart map - # (mirrors what _dissolve_planning_nodes does for planned drafts) - sub_agent_ids: set[str] = set() - for node in nodes: - for sa_id in node.get("sub_agents") or []: - if sa_id in node_ids: - sub_agent_ids.add(sa_id) - - fmap: dict[str, list[str]] = {} - for node in nodes: - nid = node["id"] - if nid in sub_agent_ids: - continue # skip — will be included via parent - absorbed = [nid] - for sa_id in node.get("sub_agents") or []: - if sa_id in node_ids: - absorbed.append(sa_id) - fmap[nid] = absorbed - - draft = { - "agent_name": agent_name, - "goal": goal_name, - "description": "", - "success_criteria": [], - "constraints": [], - "nodes": nodes, - "edges": edges, - "entry_node": nodes[0]["id"] if nodes else "", - "terminal_nodes": sorted(terminal_ids), - "flowchart_legend": { - fc_type: {"shape": meta["shape"], "color": meta["color"]} - for fc_type, meta in _FLOWCHART_TYPES.items() - }, - } - - return draft, fmap + # --- Flowchart utilities --------------------------------------------------- + # Flowchart persistence, classification, and synthesis functions are now in + # framework.tools.flowchart_utils. Local aliases for backward compatibility + # within this closure: + _save_flowchart_file = save_flowchart_file + _load_flowchart_file = load_flowchart_file + _synthesize_draft_from_runtime = synthesize_draft_from_runtime + _classify_flowchart_node = classify_flowchart_node # --- save_agent_draft (Planning phase — declarative graph preview) --------- # Creates a lightweight draft graph with nodes, edges, and business metadata. # Loose validation: only requires names and descriptions. Emits an event # so the frontend can render the graph during planning (before any code). - def _classify_flowchart_node( - node: dict, - index: int, - total: int, - edges: list[dict], - terminal_ids: set[str], - ) -> str: - """Auto-detect the ISO 5807 flowchart type for a draft node. - - Priority: explicit override > structural detection > heuristic > default. - """ - # Explicit override from the queen - explicit = node.get("flowchart_type", "").strip() - if explicit and explicit in _FLOWCHART_TYPES: - return explicit - - node_id = node["id"] - node_type = node.get("node_type", "event_loop") - node_tools = set(node.get("tools") or []) - desc = (node.get("description") or "").lower() - name = (node.get("name") or "").lower() - - # GCU / browser automation nodes → hexagon - if node_type == "gcu": - return "browser" - - # Entry node (first node or no incoming edges) → start terminator - incoming = {e["target"] for e in edges} - if index == 0 or (node_id not in incoming and index == 0): - return "start" - - # Terminal node → end terminator - if node_id in terminal_ids: - return "terminal" - - # Decision node: has outgoing edges with branching conditions → diamond - outgoing = [e for e in edges if e["source"] == node_id] - if len(outgoing) >= 2: - conditions = {e.get("condition", "on_success") for e in outgoing} - if len(conditions) > 1 or conditions - {"on_success"}: - return "decision" - - # Sub-agent / subprocess nodes → subroutine (double-bordered rect) - if node.get("sub_agents"): - return "subprocess" - - # Database / data store nodes → cylinder - db_tool_hints = { - "query_database", - "sql_query", - "read_table", - "write_table", - "save_data", - "load_data", - } - db_desc_hints = {"database", "data store", "storage", "persist", "cache"} - if node_tools & db_tool_hints or any(h in desc for h in db_desc_hints): - return "database" - - # Document generation nodes → document shape - doc_tool_hints = { - "generate_report", - "create_document", - "write_report", - "render_template", - "export_pdf", - } - doc_desc_hints = {"report", "document", "summary", "write up", "writeup"} - if node_tools & doc_tool_hints or any(h in desc for h in doc_desc_hints): - return "document" - - # I/O nodes: external data ingestion or delivery → parallelogram - io_tool_hints = { - "serve_file_to_user", - "send_email", - "post_message", - "upload_file", - "download_file", - "fetch_url", - "post_to_slack", - "send_notification", - } - io_desc_hints = {"deliver", "send", "output", "notify", "publish"} - if node_tools & io_tool_hints or any(h in desc for h in io_desc_hints): - return "io" - - # Manual / human-in-the-loop nodes → trapezoid - manual_desc_hints = { - "human review", - "manual", - "approval", - "human-in-the-loop", - "user review", - "manual check", - } - if any(h in desc for h in manual_desc_hints) or any(h in name for h in manual_desc_hints): - return "manual_operation" - - # Preparation / setup nodes → hexagon - prep_desc_hints = {"setup", "initialize", "prepare", "configure", "provision"} - if any(h in desc for h in prep_desc_hints) or any(h in name for h in prep_desc_hints): - return "preparation" - - # Delay / wait nodes → D-shape - delay_desc_hints = {"wait", "delay", "pause", "cooldown", "throttle", "sleep"} - if any(h in desc for h in delay_desc_hints): - return "delay" - - # Merge nodes → inverted triangle - merge_desc_hints = {"merge", "combine", "aggregate", "consolidate"} - if any(h in desc for h in merge_desc_hints) or any(h in name for h in merge_desc_hints): - return "merge" - - # Display nodes → display shape - display_desc_hints = {"display", "show", "present", "render", "visualize"} - display_tool_hints = {"serve_file_to_user", "display_results"} - if node_tools & display_tool_hints or any(h in name for h in display_desc_hints): - return "display" - - # Default: process (rectangle) - return "process" - def _dissolve_planning_nodes( draft: dict, ) -> tuple[dict, dict[str, list[str]]]: @@ -2276,39 +1937,18 @@ def register_queen_lifecycle_tools( "decision", "io", "document", - "multi_document", - "subprocess", - "preparation", - "manual_input", - "manual_operation", - "delay", - "display", "database", - "stored_data", - "internal_storage", - "connector", - "offpage_connector", - "merge", - "extract", - "sort", - "collate", - "summing_junction", - "or", + "subprocess", "browser", - "comment", - "alternate_process", ], "description": ( - "ISO 5807 flowchart symbol type. Auto-detected if omitted. " - "Core: start (green stadium), terminal (red stadium), " - "process (blue rect), decision (amber diamond), " - "io (purple parallelogram), document (grey wavy rect), " - "subprocess (teal subroutine), preparation (brown hexagon), " - "manual_operation (pink trapezoid), delay (orange D-shape), " - "display (cyan), database (green cylinder), " - "merge (indigo triangle), browser (dark indigo hexagon — " - "for GCU/browser sub-agents; must be a leaf node connected " - "only to its managing parent)" + "Flowchart symbol type. Auto-detected if omitted. " + "start (sage green stadium), terminal (dusty red stadium), " + "process (blue-gray rect), decision (amber diamond), " + "io (purple parallelogram), document (steel blue wavy rect), " + "database (teal cylinder), subprocess (cyan subroutine), " + "browser (deep blue hexagon — for GCU/browser " + "sub-agents; must be a leaf node)" ), }, "tools": { From 3c6a30fcae37560e591c58c9b8eae2a091dd303e Mon Sep 17 00:00:00 2001 From: bryan Date: Mon, 16 Mar 2026 12:12:35 -0700 Subject: [PATCH 11/45] refactor: trim queen prompt to 9 flowchart types with dark theme colors --- core/framework/agents/queen/nodes/__init__.py | 46 ++++++------------- 1 file changed, 15 insertions(+), 31 deletions(-) diff --git a/core/framework/agents/queen/nodes/__init__.py b/core/framework/agents/queen/nodes/__init__.py index 960b7078..821d2d69 100644 --- a/core/framework/agents/queen/nodes/__init__.py +++ b/core/framework/agents/queen/nodes/__init__.py @@ -287,44 +287,28 @@ visible to the user immediately. The draft captures business logic \ Include in each node: id, name, description, planned tools, \ input/output keys, and success criteria as high-level hints. -Each node is auto-classified into an ISO 5807 flowchart symbol type \ -with a unique color. You can override auto-detection by setting \ -`flowchart_type` explicitly on a node. Common types: +Each node is auto-classified into a flowchart symbol type with a unique \ +color. You can override auto-detection by setting `flowchart_type` \ +explicitly on a node. Available types: -**Core symbols:** -- **start** (green, stadium): Entry point / trigger -- **terminal** (red, stadium): End of flow -- **process** (blue, rectangle): Standard processing step -- **decision** (amber, diamond): Conditional branching -- **io** (purple, parallelogram): External data input/output -- **document** (blue-grey, wavy rect): Report or document generation -- **subprocess** (teal, subroutine): Delegated sub-agent / predefined process -- **preparation** (brown, hexagon): Setup / initialization step -- **manual_operation** (pink, trapezoid): Human-in-the-loop / manual review -- **delay** (orange, D-shape): Wait / throttle / cooldown -- **display** (cyan): Present results to user - -**Data storage:** -- **database** (light green, cylinder): Database or data store -- **stored_data** (lime): Generic persistent data -- **internal_storage** (amber): In-memory / cache - -**Flow operations:** -- **merge** (indigo, inv. triangle): Combine multiple inputs -- **extract** (indigo, triangle): Split or filter data -- **connector** (grey, circle): On-page link -- **offpage_connector** (dark grey, pentagon): Cross-page link - -**Domain-specific:** -- **browser** (dark indigo, hexagon): GCU browser automation / sub-agent \ +- **start** (sage green, stadium): Entry point / trigger +- **terminal** (dusty red, stadium): End of flow +- **process** (blue-gray, rectangle): Standard processing step +- **decision** (warm amber, diamond): Conditional branching +- **io** (dusty purple, parallelogram): External data input/output +- **document** (steel blue, wavy rect): Report or document generation +- **database** (muted teal, cylinder): Database or data store +- **subprocess** (dark cyan, subroutine): Delegated sub-agent / predefined process +- **browser** (deep blue, hexagon): GCU browser automation / sub-agent \ delegation. At build time, browser nodes are dissolved into the parent \ node's sub_agents list. Use for any GCU or sub-agent leaf node. Auto-detection works well for most cases: first node → start, nodes with \ no outgoing edges → terminal, nodes with multiple conditional outgoing \ edges → decision, GCU nodes → browser, nodes mentioning "database" → \ -database, nodes mentioning "report/document" → document, etc. Set \ -flowchart_type explicitly only when auto-detection would be wrong. +database, nodes mentioning "report/document" → document, I/O tools like \ +send_email → io. Everything else defaults to process. Set flowchart_type \ +explicitly only when auto-detection would be wrong. ## Decision Nodes — Planning-Only Conditional Branching From ac3fe38b3370370979fe1d7c0723ffba21e7d0ea Mon Sep 17 00:00:00 2001 From: bryan Date: Mon, 16 Mar 2026 12:12:50 -0700 Subject: [PATCH 12/45] refactor: remove dead shape cases and update imports --- core/frontend/src/components/DraftGraph.tsx | 132 +------------------- 1 file changed, 3 insertions(+), 129 deletions(-) diff --git a/core/frontend/src/components/DraftGraph.tsx b/core/frontend/src/components/DraftGraph.tsx index 1d45bb6d..d4f1fd4a 100644 --- a/core/frontend/src/components/DraftGraph.tsx +++ b/core/frontend/src/components/DraftGraph.tsx @@ -1,8 +1,8 @@ import { useEffect, useMemo, useRef, useState, useCallback } from "react"; import { Loader2 } from "lucide-react"; import type { DraftGraph as DraftGraphData, DraftNode } from "@/api/types"; -import { RunButton } from "./AgentGraph"; -import type { GraphNode, RunState } from "./AgentGraph"; +import { RunButton } from "./RunButton"; +import type { GraphNode, RunState } from "./graph-types"; // Read a CSS custom property value (space-separated HSL components) function cssVar(name: string): string { @@ -144,13 +144,9 @@ function FlowchartShape({ case "rectangle": return ; - case "rounded_rect": - return ; - case "diamond": { const cx = x + w / 2; const cy = y + h / 2; - // Keep diamond within bounding box return ( ; } - case "multi_document": { - const off = 3; - const d = `M ${x} ${y + 4 + off} Q ${x} ${y + off}, ${x + 8} ${y + off} L ${x + w - 8 - off} ${y + off} Q ${x + w - off} ${y + off}, ${x + w - off} ${y + 4 + off} L ${x + w - off} ${y + h - 8} C ${x + (w - off) * 0.75} ${y + h + 2}, ${x + (w - off) * 0.25} ${y + h - 10}, ${x} ${y + h - 4} Z`; - return ( - - - - - - ); - } - case "subroutine": { const inset = 7; return ( @@ -207,34 +191,6 @@ function FlowchartShape({ ); } - case "manual_input": - return ( - - ); - - case "trapezoid": { - const inset = 12; - return ( - - ); - } - - case "delay": { - const d = `M ${x} ${y + 4} Q ${x} ${y}, ${x + 4} ${y} L ${x + w * 0.65} ${y} A ${w * 0.35} ${h / 2} 0 0 1 ${x + w * 0.65} ${y + h} L ${x + 4} ${y + h} Q ${x} ${y + h}, ${x} ${y + h - 4} Z`; - return ; - } - - case "display": { - const d = `M ${x + 16} ${y} L ${x + w * 0.65} ${y} A ${w * 0.35} ${h / 2} 0 0 1 ${x + w * 0.65} ${y + h} L ${x + 16} ${y + h} L ${x} ${y + h / 2} Z`; - return ; - } - case "cylinder": { const ry = 7; return ( @@ -249,88 +205,6 @@ function FlowchartShape({ ); } - case "stored_data": { - const d = `M ${x + 14} ${y} L ${x + w} ${y} A 10 ${h / 2} 0 0 0 ${x + w} ${y + h} L ${x + 14} ${y + h} A 10 ${h / 2} 0 0 1 ${x + 14} ${y} Z`; - return ; - } - - case "internal_storage": - return ( - - - - - - ); - - case "circle": { - const r = Math.min(w, h) / 2 - 2; - return ; - } - - case "pentagon": - return ( - - ); - - case "triangle_inv": - return ( - - ); - - case "triangle": - return ( - - ); - - case "hourglass": - return ( - - ); - - case "circle_cross": { - const r = Math.min(w, h) / 2 - 2; - const cx = x + w / 2; - const cy = y + h / 2; - return ( - - - - - - ); - } - - case "circle_bar": { - const r = Math.min(w, h) / 2 - 2; - const cx = x + w / 2; - const cy = y + h / 2; - return ( - - - - - - ); - } - - case "flag": { - const d = `M ${x} ${y} L ${x + w} ${y} L ${x + w - 8} ${y + h / 2} L ${x + w} ${y + h} L ${x} ${y + h} Z`; - return ; - } - default: return ; } @@ -977,7 +851,7 @@ export default function DraftGraph({ draft, onNodeClick, flowchartMap, runtimeNo {loading || !draft ? ( <> -

Designing flowchart…

+

Loading flowchart…

) : (

From b51852541819d12a7ebb3dafb5bd147adf0baa61 Mon Sep 17 00:00:00 2001 From: bryan Date: Mon, 16 Mar 2026 12:13:06 -0700 Subject: [PATCH 13/45] docs: update flowchart schema for 9 types with new color palette --- docs/draft-flowchart-schema.md | 87 +++++++--------------------------- 1 file changed, 17 insertions(+), 70 deletions(-) diff --git a/docs/draft-flowchart-schema.md b/docs/draft-flowchart-schema.md index 699e5fc1..5ce81579 100644 --- a/docs/draft-flowchart-schema.md +++ b/docs/draft-flowchart-schema.md @@ -27,7 +27,8 @@ decisions │ │ ``` **Key files:** -- Backend: `core/framework/tools/queen_lifecycle_tools.py` — draft creation, classification, dissolution +- Backend: `core/framework/tools/queen_lifecycle_tools.py` — draft creation, dissolution +- Backend: `core/framework/tools/flowchart_utils.py` — type definitions, classification, persistence - Backend: `core/framework/server/routes_graphs.py` — REST endpoints - Frontend: `core/frontend/src/components/DraftGraph.tsx` — SVG flowchart renderer - Frontend: `core/frontend/src/api/types.ts` — TypeScript interfaces @@ -114,17 +115,9 @@ decisions │ │ "type": "string", "enum": [ "start", "terminal", "process", "decision", - "io", "document", "multi_document", - "subprocess", "preparation", - "manual_input", "manual_operation", - "delay", "display", - "database", "stored_data", "internal_storage", - "connector", "offpage_connector", - "merge", "extract", "sort", "collate", - "summing_junction", "or", - "browser", "comment", "alternate_process" + "io", "document", "database", "subprocess", "browser" ], - "description": "ISO 5807 flowchart symbol. Auto-detected if omitted." + "description": "Flowchart symbol type. Auto-detected if omitted." }, "tools": { "type": "array", @@ -290,67 +283,27 @@ Returned by `GET /api/sessions/{id}/flowchart-map` after `confirm_and_build()` d --- -## 2. ISO 5807 Flowchart Types - -### Core Symbols +## 2. Flowchart Types | Type | Shape | Color | SVG Primitive | Description | |---|---|---|---|---| -| `start` | stadium | `#4CAF50` green | `` | Entry point / start terminator | -| `terminal` | stadium | `#F44336` red | `` | End point / stop terminator | -| `process` | rectangle | `#2196F3` blue | `` | General processing step | -| `decision` | diamond | `#FF9800` amber | `` 4-point | Branching / conditional logic | -| `io` | parallelogram | `#9C27B0` purple | `` skewed | Data input or output | -| `document` | document | `#607D8B` blue-grey | `` wavy bottom | Single document output | -| `multi_document` | multi_document | `#78909C` blue-grey | stacked `` + `` | Multiple documents | -| `subprocess` | subroutine | `#009688` teal | `` + inner `` | Predefined process / sub-agent | -| `preparation` | hexagon | `#795548` brown | `` 6-point | Setup / initialization step | -| `manual_input` | manual_input | `#E91E63` pink | `` sloped top | Manual data entry | -| `manual_operation` | trapezoid | `#AD1457` dark pink | `` tapered bottom | Human-in-the-loop / approval | -| `delay` | delay | `#FF5722` deep orange | `` D-shape | Wait / pause / cooldown | -| `display` | display | `#00BCD4` cyan | `` pointed left | Display / render output | - -### Data Storage Symbols - -| Type | Shape | Color | SVG Primitive | Description | -|---|---|---|---|---| -| `database` | cylinder | `#8BC34A` light green | `` + `` top/bottom | Database / direct access storage | -| `stored_data` | stored_data | `#CDDC39` lime | `` curved left | Generic data store | -| `internal_storage` | internal_storage | `#FFC107` amber | `` + internal `` grid | Internal memory / cache | - -### Connectors - -| Type | Shape | Color | SVG Primitive | Description | -|---|---|---|---|---| -| `connector` | circle | `#9E9E9E` grey | `` | On-page connector | -| `offpage_connector` | pentagon | `#757575` dark grey | `` 5-point | Off-page connector | - -### Flow Operations - -| Type | Shape | Color | SVG Primitive | Description | -|---|---|---|---|---| -| `merge` | triangle_inv | `#3F51B5` indigo | `` inverted | Merge multiple flows | -| `extract` | triangle | `#5C6BC0` indigo light | `` upward | Extract / split flow | -| `sort` | hourglass | `#7986CB` indigo lighter | `` X-shape | Sort operation | -| `collate` | hourglass_inv | `#9FA8DA` indigo lightest | `` X-shape inv | Collate operation | -| `summing_junction` | circle_cross | `#F06292` pink light | `` + cross `` | Summing junction | -| `or` | circle_bar | `#CE93D8` purple light | `` + plus `` | Logical OR | - -### Domain-Specific (Hive) - -| Type | Shape | Color | SVG Primitive | Description | -|---|---|---|---|---| -| `browser` | hexagon | `#1A237E` dark indigo | `` 6-point | Browser automation (GCU node) | -| `comment` | flag | `#BDBDBD` light grey | `` notched right | Annotation / comment | -| `alternate_process` | rounded_rect | `#42A5F5` light blue | `` | Alternate process variant | +| `start` | stadium | `#3fa66a` sage green | `` | Entry point / start terminator | +| `terminal` | stadium | `#a04444` dusty red | `` | End point / stop terminator | +| `process` | rectangle | `#616d83` blue-gray | `` | General processing step (default) | +| `decision` | diamond | `#d89d26` warm amber | `` 4-point | Branching / conditional logic | +| `io` | parallelogram | `#7a4fa5` dusty purple | `` skewed | Data input or output | +| `document` | document | `#507485` steel blue | `` wavy bottom | Document / report generation | +| `database` | cylinder | `#459077` muted teal | `` + `` | Database / data store | +| `subprocess` | subroutine | `#4c7f7f` dark cyan | `` + inner `` | Predefined process / sub-agent | +| `browser` | hexagon | `#3a4a9b` deep blue | `` 6-point | Browser automation (GCU node) | --- ## 3. Auto-Classification Priority -When `flowchart_type` is omitted from a node, the backend classifies it automatically using this priority (function `_classify_flowchart_node` in `queen_lifecycle_tools.py`): +When `flowchart_type` is omitted from a node, the backend classifies it automatically using this priority (function `classify_flowchart_node` in `flowchart_utils.py`): -1. **Explicit override** — if `flowchart_type` is set and valid, use it +1. **Explicit override** — if `flowchart_type` is set and valid, use it (old type names are remapped automatically) 2. **Node type** — `gcu` nodes become `browser` 3. **Position** — first node becomes `start` 4. **Terminal detection** — nodes in `terminal_nodes` (or with no outgoing edges) become `terminal` @@ -359,14 +312,8 @@ When `flowchart_type` is omitted from a node, the backend classifies it automati 7. **Tool heuristics** — tool names match known patterns: - DB tools (`query_database`, `sql_query`, `read_table`, etc.) → `database` - Doc tools (`generate_report`, `create_document`, etc.) → `document` - - I/O tools (`send_email`, `post_to_slack`, `fetch_url`, etc.) → `io` - - Display tools (`serve_file_to_user`, `display_results`) → `display` + - I/O tools (`send_email`, `post_to_slack`, `fetch_url`, `display_results`, etc.) → `io` 8. **Description keyword heuristics**: - - `"manual"`, `"approval"`, `"human review"` → `manual_operation` - - `"setup"`, `"prepare"`, `"configure"` → `preparation` - - `"wait"`, `"delay"`, `"pause"` → `delay` - - `"merge"`, `"combine"`, `"aggregate"` → `merge` - - `"display"`, `"show"`, `"render"` → `display` - `"database"`, `"data store"`, `"persist"` → `database` - `"report"`, `"document"`, `"summary"` → `document` - `"deliver"`, `"send"`, `"notify"` → `io` From ae19121802de9a7fb3ab7aa8831e2bfabb4ac8bf Mon Sep 17 00:00:00 2001 From: bryan Date: Mon, 16 Mar 2026 12:13:16 -0700 Subject: [PATCH 14/45] test: add tests for flowchart_utils classification and remap --- core/tests/test_flowchart_utils.py | 264 +++++++++++++++++++++++++++++ 1 file changed, 264 insertions(+) create mode 100644 core/tests/test_flowchart_utils.py diff --git a/core/tests/test_flowchart_utils.py b/core/tests/test_flowchart_utils.py new file mode 100644 index 00000000..985cc7a9 --- /dev/null +++ b/core/tests/test_flowchart_utils.py @@ -0,0 +1,264 @@ +"""Tests for framework/tools/flowchart_utils.py.""" + +import json +from pathlib import Path +from types import SimpleNamespace + +from framework.tools.flowchart_utils import ( + FLOWCHART_FILENAME, + _FLOWCHART_TYPES, + classify_flowchart_node, + generate_fallback_flowchart, + load_flowchart_file, + save_flowchart_file, + synthesize_draft_from_runtime, +) + + +def _make_node(id, name="Node", description="", node_type="event_loop", + tools=None, input_keys=None, output_keys=None, + success_criteria="", sub_agents=None): + """Create a minimal node-like object matching NodeSpec interface.""" + return SimpleNamespace( + id=id, name=name, description=description, node_type=node_type, + tools=tools or [], input_keys=input_keys or [], + output_keys=output_keys or [], success_criteria=success_criteria, + sub_agents=sub_agents or [], + ) + + +def _make_edge(source, target, condition="on_success", description=""): + """Create a minimal edge-like object matching EdgeSpec interface.""" + return SimpleNamespace( + source=source, target=target, + condition=SimpleNamespace(value=condition), + description=description, + ) + + +def _make_goal(name="Test Goal", description="A test goal", + success_criteria=None, constraints=None): + """Create a minimal goal-like object matching Goal interface.""" + return SimpleNamespace( + name=name, description=description, + success_criteria=success_criteria or [], + constraints=constraints or [], + ) + + +def _make_graph(nodes, edges, entry_node=None, terminal_nodes=None): + """Create a minimal graph-like object matching GraphSpec interface.""" + return SimpleNamespace( + nodes=nodes, edges=edges, + entry_node=entry_node or (nodes[0].id if nodes else ""), + terminal_nodes=terminal_nodes or [], + ) + + +class TestClassifyFlowchartNode: + """Test flowchart node classification logic.""" + + def test_first_node_is_start(self): + node = {"id": "n1", "node_type": "event_loop", "tools": []} + result = classify_flowchart_node(node, 0, 3, [], set()) + assert result == "start" + + def test_terminal_node(self): + node = {"id": "n3", "node_type": "event_loop", "tools": []} + edges = [{"source": "n1", "target": "n3"}] + result = classify_flowchart_node(node, 2, 3, edges, {"n3"}) + assert result == "terminal" + + def test_gcu_node_is_browser(self): + node = {"id": "n2", "node_type": "gcu", "tools": []} + edges = [{"source": "n1", "target": "n2"}] + result = classify_flowchart_node(node, 1, 3, edges, set()) + assert result == "browser" + + def test_subprocess_node(self): + node = {"id": "n2", "node_type": "event_loop", "tools": [], + "sub_agents": ["sub1"]} + edges = [{"source": "n1", "target": "n2"}, {"source": "n2", "target": "n3"}] + result = classify_flowchart_node(node, 1, 3, edges, set()) + assert result == "subprocess" + + def test_default_is_process(self): + node = {"id": "n2", "node_type": "event_loop", "tools": [], + "description": "do stuff"} + edges = [{"source": "n1", "target": "n2"}, {"source": "n2", "target": "n3"}] + result = classify_flowchart_node(node, 1, 3, edges, set()) + assert result == "process" + + def test_explicit_override(self): + node = {"id": "n2", "node_type": "event_loop", "tools": [], + "flowchart_type": "database"} + edges = [{"source": "n1", "target": "n2"}] + result = classify_flowchart_node(node, 1, 3, edges, set()) + assert result == "database" + + def test_decision_node_with_branching(self): + node = {"id": "n2", "node_type": "event_loop", "tools": []} + edges = [ + {"source": "n1", "target": "n2"}, + {"source": "n2", "target": "n3", "condition": "on_success"}, + {"source": "n2", "target": "n4", "condition": "on_failure"}, + ] + result = classify_flowchart_node(node, 1, 4, edges, set()) + assert result == "decision" + + +class TestSynthesizeDraftFromRuntime: + """Test runtime graph to DraftGraph conversion.""" + + def test_basic_linear_graph(self): + nodes = [ + _make_node("intake", "Intake"), + _make_node("process", "Process"), + _make_node("deliver", "Deliver"), + ] + edges = [ + _make_edge("intake", "process"), + _make_edge("process", "deliver"), + ] + draft, fmap = synthesize_draft_from_runtime( + nodes, edges, agent_name="test_agent", goal_name="Test" + ) + + assert draft["agent_name"] == "test_agent" + assert draft["goal"] == "Test" + assert len(draft["nodes"]) == 3 + assert len(draft["edges"]) == 2 + assert draft["entry_node"] == "intake" + assert "deliver" in draft["terminal_nodes"] + + # First node should be start type + assert draft["nodes"][0]["flowchart_type"] == "start" + # Last node (terminal) should be terminal type + assert draft["nodes"][2]["flowchart_type"] == "terminal" + # Middle node should be process + assert draft["nodes"][1]["flowchart_type"] == "process" + + # All nodes should have shape and color + for node in draft["nodes"]: + assert "flowchart_shape" in node + assert "flowchart_color" in node + + # Flowchart map should be identity + assert fmap == {"intake": ["intake"], "process": ["process"], "deliver": ["deliver"]} + + # Legend should contain all types + assert draft["flowchart_legend"] == { + k: {"shape": v["shape"], "color": v["color"]} + for k, v in _FLOWCHART_TYPES.items() + } + + def test_graph_with_sub_agents(self): + nodes = [ + _make_node("main", "Main", sub_agents=["helper"]), + _make_node("helper", "Helper"), + ] + edges = [_make_edge("main", "helper")] + draft, fmap = synthesize_draft_from_runtime(nodes, edges) + + # Sub-agent edges should be added + assert len(draft["edges"]) > 1 + + # Helper should be grouped under main in the flowchart map + assert "helper" not in fmap + assert fmap["main"] == ["main", "helper"] + + +class TestFlowchartFilePersistence: + """Test save/load of flowchart.json.""" + + def test_save_and_load(self, tmp_path): + draft = {"agent_name": "test", "nodes": [], "edges": []} + fmap = {"n1": ["n1"]} + + save_flowchart_file(tmp_path, draft, fmap) + loaded_draft, loaded_map = load_flowchart_file(tmp_path) + + assert loaded_draft == draft + assert loaded_map == fmap + + def test_load_missing_file(self, tmp_path): + draft, fmap = load_flowchart_file(tmp_path) + assert draft is None + assert fmap is None + + def test_load_none_path(self): + draft, fmap = load_flowchart_file(None) + assert draft is None + assert fmap is None + + def test_save_none_path(self): + # Should not raise + save_flowchart_file(None, {}, {}) + + +class TestGenerateFallbackFlowchart: + """Test the main entry point for fallback generation.""" + + def test_generates_file_when_missing(self, tmp_path): + nodes = [ + _make_node("n1", "Start Node"), + _make_node("n2", "End Node"), + ] + edges = [_make_edge("n1", "n2")] + graph = _make_graph(nodes, edges, entry_node="n1", terminal_nodes=["n2"]) + goal = _make_goal() + + generate_fallback_flowchart(graph, goal, tmp_path) + + flowchart_path = tmp_path / FLOWCHART_FILENAME + assert flowchart_path.exists() + + data = json.loads(flowchart_path.read_text()) + assert data["original_draft"]["agent_name"] == tmp_path.name + assert data["original_draft"]["goal"] == "A test goal" + assert data["flowchart_map"] is not None + # Entry/terminal from GraphSpec should be used + assert data["original_draft"]["entry_node"] == "n1" + assert "n2" in data["original_draft"]["terminal_nodes"] + + def test_skips_when_file_exists(self, tmp_path): + # Pre-create a flowchart.json + existing = {"original_draft": {"agent_name": "existing"}, "flowchart_map": {}} + (tmp_path / FLOWCHART_FILENAME).write_text(json.dumps(existing)) + + nodes = [_make_node("n1", "Node")] + graph = _make_graph(nodes, [], entry_node="n1") + goal = _make_goal() + + generate_fallback_flowchart(graph, goal, tmp_path) + + # Should not have been overwritten + data = json.loads((tmp_path / FLOWCHART_FILENAME).read_text()) + assert data["original_draft"]["agent_name"] == "existing" + + def test_handles_errors_gracefully(self, tmp_path): + # Pass an invalid path (file, not directory) + fake_path = tmp_path / "not_a_dir.txt" + fake_path.write_text("hello") + + graph = _make_graph([], []) + goal = _make_goal() + + # Should not raise + generate_fallback_flowchart(graph, goal, fake_path) + + def test_enriches_with_goal_metadata(self, tmp_path): + nodes = [_make_node("n1", "Node")] + graph = _make_graph(nodes, [], entry_node="n1") + goal = _make_goal( + description="Find bugs", + success_criteria=[SimpleNamespace(description="All bugs found")], + constraints=[SimpleNamespace(description="No false positives")], + ) + + generate_fallback_flowchart(graph, goal, tmp_path) + + data = json.loads((tmp_path / FLOWCHART_FILENAME).read_text()) + assert data["original_draft"]["goal"] == "Find bugs" + assert data["original_draft"]["success_criteria"] == ["All bugs found"] + assert data["original_draft"]["constraints"] == ["No false positives"] From 4ab33a39d68bb00bd9f170b7e6f9682890c4abd0 Mon Sep 17 00:00:00 2001 From: bryan Date: Mon, 16 Mar 2026 12:13:29 -0700 Subject: [PATCH 15/45] chore: add generated flowchart.json for template agents --- .../competitive_intel_agent/flowchart.json | 307 ++++++++++++++++++ .../deep_research_agent/flowchart.json | 221 +++++++++++++ .../email_inbox_management/flowchart.json | 218 +++++++++++++ .../email_reply_agent/flowchart.json | 168 ++++++++++ examples/templates/job_hunter/flowchart.json | 186 +++++++++++ .../local_business_extractor/flowchart.json | 165 ++++++++++ .../meeting_scheduler/flowchart.json | 172 ++++++++++ .../tech_news_reporter/flowchart.json | 150 +++++++++ .../twitter_news_agent/flowchart.json | 172 ++++++++++ .../vulnerability_assessment/flowchart.json | 237 ++++++++++++++ 10 files changed, 1996 insertions(+) create mode 100644 examples/templates/competitive_intel_agent/flowchart.json create mode 100644 examples/templates/deep_research_agent/flowchart.json create mode 100644 examples/templates/email_inbox_management/flowchart.json create mode 100644 examples/templates/email_reply_agent/flowchart.json create mode 100644 examples/templates/job_hunter/flowchart.json create mode 100644 examples/templates/local_business_extractor/flowchart.json create mode 100644 examples/templates/meeting_scheduler/flowchart.json create mode 100644 examples/templates/tech_news_reporter/flowchart.json create mode 100644 examples/templates/twitter_news_agent/flowchart.json create mode 100644 examples/templates/vulnerability_assessment/flowchart.json diff --git a/examples/templates/competitive_intel_agent/flowchart.json b/examples/templates/competitive_intel_agent/flowchart.json new file mode 100644 index 00000000..f0e1111b --- /dev/null +++ b/examples/templates/competitive_intel_agent/flowchart.json @@ -0,0 +1,307 @@ +{ + "original_draft": { + "agent_name": "competitive_intel_agent", + "goal": "Monitor competitor websites, news sources, and GitHub repositories to produce a structured weekly digest with key insights, detailed findings per competitor, and 30-day trend analysis.", + "description": "", + "success_criteria": [ + "Check multiple source types per competitor (website, news, GitHub)", + "All findings structured with competitor, category, update, source, and date", + "Uses stored data to compare with previous reports for trend analysis", + "User receives a formatted, readable competitive intelligence digest" + ], + "constraints": [ + "Never fabricate findings, news, or data \u2014 only report what was found", + "Every finding must include a source URL", + "Prioritize findings from the past 7 days; include up to 30 days" + ], + "nodes": [ + { + "id": "intake", + "name": "Competitor Intake", + "description": "Collect competitor list, focus areas, and report preferences from the user", + "node_type": "event_loop", + "tools": [], + "input_keys": [ + "competitors_input" + ], + "output_keys": [ + "competitors", + "focus_areas", + "report_frequency", + "has_github_competitors" + ], + "success_criteria": "", + "sub_agents": [], + "flowchart_type": "start", + "flowchart_shape": "stadium", + "flowchart_color": "#3fa66a" + }, + { + "id": "web-scraper", + "name": "Website Monitor", + "description": "Scrape competitor websites for pricing, features, and announcements", + "node_type": "event_loop", + "tools": [ + "web_search", + "web_scrape" + ], + "input_keys": [ + "competitors", + "focus_areas" + ], + "output_keys": [ + "web_findings" + ], + "success_criteria": "", + "sub_agents": [], + "flowchart_type": "process", + "flowchart_shape": "rectangle", + "flowchart_color": "#616d83" + }, + { + "id": "news-search", + "name": "News & Press Monitor", + "description": "Search for competitor mentions in news, press releases, and industry publications", + "node_type": "event_loop", + "tools": [ + "web_search", + "web_scrape" + ], + "input_keys": [ + "competitors", + "focus_areas" + ], + "output_keys": [ + "news_findings" + ], + "success_criteria": "", + "sub_agents": [], + "flowchart_type": "decision", + "flowchart_shape": "diamond", + "flowchart_color": "#d89d26" + }, + { + "id": "github-monitor", + "name": "GitHub Activity Monitor", + "description": "Track public GitHub repository activity for competitors with GitHub presence", + "node_type": "event_loop", + "tools": [ + "github_list_repos", + "github_get_repo", + "github_search_repos" + ], + "input_keys": [ + "competitors" + ], + "output_keys": [ + "github_findings" + ], + "success_criteria": "", + "sub_agents": [], + "flowchart_type": "process", + "flowchart_shape": "rectangle", + "flowchart_color": "#616d83" + }, + { + "id": "aggregator", + "name": "Data Aggregator", + "description": "Combine findings from all sources, deduplicate, and structure for analysis", + "node_type": "event_loop", + "tools": [ + "save_data", + "load_data", + "list_data_files" + ], + "input_keys": [ + "competitors", + "web_findings", + "news_findings", + "github_findings" + ], + "output_keys": [ + "aggregated_findings" + ], + "success_criteria": "", + "sub_agents": [], + "flowchart_type": "database", + "flowchart_shape": "cylinder", + "flowchart_color": "#459077" + }, + { + "id": "analysis", + "name": "Insight Analysis", + "description": "Extract key insights, detect trends, and compare with historical data", + "node_type": "event_loop", + "tools": [ + "load_data", + "save_data", + "list_data_files" + ], + "input_keys": [ + "aggregated_findings", + "competitors", + "focus_areas" + ], + "output_keys": [ + "key_highlights", + "trend_analysis", + "detailed_findings" + ], + "success_criteria": "", + "sub_agents": [], + "flowchart_type": "database", + "flowchart_shape": "cylinder", + "flowchart_color": "#459077" + }, + { + "id": "report", + "name": "Report Generator", + "description": "Generate and deliver the competitive intelligence digest as an HTML report", + "node_type": "event_loop", + "tools": [ + "save_data", + "load_data", + "serve_file_to_user", + "list_data_files" + ], + "input_keys": [ + "key_highlights", + "trend_analysis", + "detailed_findings", + "competitors" + ], + "output_keys": [ + "delivery_status" + ], + "success_criteria": "", + "sub_agents": [], + "flowchart_type": "terminal", + "flowchart_shape": "stadium", + "flowchart_color": "#a04444" + } + ], + "edges": [ + { + "id": "edge-0", + "source": "intake", + "target": "web-scraper", + "condition": "on_success", + "description": "", + "label": "" + }, + { + "id": "edge-1", + "source": "web-scraper", + "target": "news-search", + "condition": "on_success", + "description": "", + "label": "" + }, + { + "id": "edge-2", + "source": "news-search", + "target": "github-monitor", + "condition": "conditional", + "description": "", + "label": "" + }, + { + "id": "edge-3", + "source": "news-search", + "target": "aggregator", + "condition": "conditional", + "description": "", + "label": "" + }, + { + "id": "edge-4", + "source": "github-monitor", + "target": "aggregator", + "condition": "on_success", + "description": "", + "label": "" + }, + { + "id": "edge-5", + "source": "aggregator", + "target": "analysis", + "condition": "on_success", + "description": "", + "label": "" + }, + { + "id": "edge-6", + "source": "analysis", + "target": "report", + "condition": "on_success", + "description": "", + "label": "" + } + ], + "entry_node": "intake", + "terminal_nodes": [ + "report" + ], + "flowchart_legend": { + "start": { + "shape": "stadium", + "color": "#3fa66a" + }, + "terminal": { + "shape": "stadium", + "color": "#a04444" + }, + "process": { + "shape": "rectangle", + "color": "#616d83" + }, + "decision": { + "shape": "diamond", + "color": "#d89d26" + }, + "io": { + "shape": "parallelogram", + "color": "#7a4fa5" + }, + "document": { + "shape": "document", + "color": "#507485" + }, + "database": { + "shape": "cylinder", + "color": "#459077" + }, + "subprocess": { + "shape": "subroutine", + "color": "#4c7f7f" + }, + "browser": { + "shape": "hexagon", + "color": "#3a4a9b" + } + } + }, + "flowchart_map": { + "intake": [ + "intake" + ], + "web-scraper": [ + "web-scraper" + ], + "news-search": [ + "news-search" + ], + "github-monitor": [ + "github-monitor" + ], + "aggregator": [ + "aggregator" + ], + "analysis": [ + "analysis" + ], + "report": [ + "report" + ] + } +} \ No newline at end of file diff --git a/examples/templates/deep_research_agent/flowchart.json b/examples/templates/deep_research_agent/flowchart.json new file mode 100644 index 00000000..d3daef83 --- /dev/null +++ b/examples/templates/deep_research_agent/flowchart.json @@ -0,0 +1,221 @@ +{ + "original_draft": { + "agent_name": "deep_research_agent", + "goal": "Research any topic by searching diverse sources, analyzing findings, and producing a cited report \u2014 with user checkpoints to guide direction.", + "description": "", + "success_criteria": [ + "Use multiple diverse, authoritative sources", + "Every factual claim in the report cites its source", + "User reviews findings before report generation", + "Final report answers the original research questions" + ], + "constraints": [ + "Only include information found in fetched sources", + "Every claim must cite its source with a numbered reference", + "Present findings to the user before writing the final report" + ], + "nodes": [ + { + "id": "intake", + "name": "Research Intake", + "description": "Discuss the research topic with the user, clarify scope, and confirm direction", + "node_type": "event_loop", + "tools": [], + "input_keys": [ + "user_request" + ], + "output_keys": [ + "research_brief" + ], + "success_criteria": "The research brief is specific and actionable: it states the topic, the key questions to answer, the desired scope, and depth.", + "sub_agents": [], + "flowchart_type": "start", + "flowchart_shape": "stadium", + "flowchart_color": "#3fa66a" + }, + { + "id": "research", + "name": "Research", + "description": "Search the web, fetch source content, and compile findings", + "node_type": "event_loop", + "tools": [ + "web_search", + "web_scrape", + "load_data", + "save_data", + "append_data", + "list_data_files" + ], + "input_keys": [ + "research_brief", + "feedback" + ], + "output_keys": [ + "findings", + "sources", + "gaps" + ], + "success_criteria": "Findings reference at least 3 distinct sources with URLs. Key claims are substantiated by fetched content, not generated.", + "sub_agents": [], + "flowchart_type": "database", + "flowchart_shape": "cylinder", + "flowchart_color": "#459077" + }, + { + "id": "review", + "name": "Review Findings", + "description": "Present findings to user and decide whether to research more or write the report", + "node_type": "event_loop", + "tools": [], + "input_keys": [ + "findings", + "sources", + "gaps", + "research_brief" + ], + "output_keys": [ + "needs_more_research", + "feedback" + ], + "success_criteria": "The user has been presented with findings and has explicitly indicated whether they want more research or are ready for the report.", + "sub_agents": [], + "flowchart_type": "decision", + "flowchart_shape": "diamond", + "flowchart_color": "#d89d26" + }, + { + "id": "report", + "name": "Write & Deliver Report", + "description": "Write a cited HTML report from the findings and present it to the user", + "node_type": "event_loop", + "tools": [ + "save_data", + "append_data", + "serve_file_to_user", + "load_data", + "list_data_files" + ], + "input_keys": [ + "findings", + "sources", + "research_brief" + ], + "output_keys": [ + "delivery_status", + "next_action" + ], + "success_criteria": "An HTML report has been saved, the file link has been presented to the user, and the user has indicated what they want to do next.", + "sub_agents": [], + "flowchart_type": "terminal", + "flowchart_shape": "stadium", + "flowchart_color": "#a04444" + } + ], + "edges": [ + { + "id": "edge-0", + "source": "intake", + "target": "research", + "condition": "on_success", + "description": "", + "label": "" + }, + { + "id": "edge-1", + "source": "research", + "target": "review", + "condition": "on_success", + "description": "", + "label": "" + }, + { + "id": "edge-2", + "source": "review", + "target": "research", + "condition": "conditional", + "description": "", + "label": "" + }, + { + "id": "edge-3", + "source": "review", + "target": "report", + "condition": "conditional", + "description": "", + "label": "" + }, + { + "id": "edge-4", + "source": "report", + "target": "research", + "condition": "conditional", + "description": "", + "label": "" + }, + { + "id": "edge-5", + "source": "report", + "target": "intake", + "condition": "conditional", + "description": "", + "label": "" + } + ], + "entry_node": "intake", + "terminal_nodes": [ + "report" + ], + "flowchart_legend": { + "start": { + "shape": "stadium", + "color": "#3fa66a" + }, + "terminal": { + "shape": "stadium", + "color": "#a04444" + }, + "process": { + "shape": "rectangle", + "color": "#616d83" + }, + "decision": { + "shape": "diamond", + "color": "#d89d26" + }, + "io": { + "shape": "parallelogram", + "color": "#7a4fa5" + }, + "document": { + "shape": "document", + "color": "#507485" + }, + "database": { + "shape": "cylinder", + "color": "#459077" + }, + "subprocess": { + "shape": "subroutine", + "color": "#4c7f7f" + }, + "browser": { + "shape": "hexagon", + "color": "#3a4a9b" + } + } + }, + "flowchart_map": { + "intake": [ + "intake" + ], + "research": [ + "research" + ], + "review": [ + "review" + ], + "report": [ + "report" + ] + } +} \ No newline at end of file diff --git a/examples/templates/email_inbox_management/flowchart.json b/examples/templates/email_inbox_management/flowchart.json new file mode 100644 index 00000000..20ceda15 --- /dev/null +++ b/examples/templates/email_inbox_management/flowchart.json @@ -0,0 +1,218 @@ +{ + "original_draft": { + "agent_name": "email_inbox_management", + "goal": "Manage Gmail inbox emails autonomously using user-defined free-text rules. For every five minutes, fetch inbox emails (configurable batch size, default 100), apply the user's rules to each email, and execute the appropriate Gmail actions \u2014 trash, mark as spam, mark important, mark read/unread, star, draft replies, create/apply custom labels, and more.", + "description": "", + "success_criteria": [ + "Gmail actions are applied correctly to the right emails based on the user's rules", + "Produces a summary report showing what was done: how many emails were affected by each action type, with email subjects listed", + "All fetched emails up to the configured max are processed and acted upon; none are silently skipped", + "Custom labels are created and applied correctly when rules require them" + ], + "constraints": [ + "Must loop through all inbox emails by paginating with max_emails as page size; no emails should be silently skipped", + "Archiving removes from inbox but preserves the email; only explicit trash rules move emails to trash", + "Agent creates draft replies but NEVER sends them automatically" + ], + "nodes": [ + { + "id": "intake", + "name": "Intake", + "description": "Receive and validate input parameters: rules and max_emails. Present the interpreted rules back to the user for confirmation.", + "node_type": "event_loop", + "tools": [ + "gmail_list_labels" + ], + "input_keys": [ + "rules", + "max_emails" + ], + "output_keys": [ + "rules", + "max_emails", + "query" + ], + "success_criteria": "", + "sub_agents": [], + "flowchart_type": "start", + "flowchart_shape": "stadium", + "flowchart_color": "#3fa66a" + }, + { + "id": "fetch-emails", + "name": "Fetch Emails", + "description": "Fetch one page of emails from Gmail inbox. Returns emails filename and next_page_token for pagination. The graph loops back here if more pages remain.", + "node_type": "event_loop", + "tools": [ + "bulk_fetch_emails" + ], + "input_keys": [ + "rules", + "max_emails", + "next_page_token", + "last_processed_timestamp", + "query" + ], + "output_keys": [ + "emails", + "next_page_token" + ], + "success_criteria": "", + "sub_agents": [], + "flowchart_type": "process", + "flowchart_shape": "rectangle", + "flowchart_color": "#616d83" + }, + { + "id": "classify-and-act", + "name": "Classify and Act", + "description": "Apply the user's rules to each email and execute the appropriate Gmail actions.", + "node_type": "event_loop", + "tools": [ + "gmail_trash_message", + "gmail_modify_message", + "gmail_batch_modify_messages", + "gmail_create_draft", + "gmail_create_label", + "gmail_list_labels", + "load_data", + "append_data" + ], + "input_keys": [ + "rules", + "emails" + ], + "output_keys": [ + "actions_taken" + ], + "success_criteria": "", + "sub_agents": [], + "flowchart_type": "decision", + "flowchart_shape": "diamond", + "flowchart_color": "#d89d26" + }, + { + "id": "report", + "name": "Report", + "description": "Generate a summary report of all actions taken on the emails and present it to the user.", + "node_type": "event_loop", + "tools": [ + "load_data", + "get_current_timestamp" + ], + "input_keys": [ + "actions_taken", + "rules" + ], + "output_keys": [ + "summary_report", + "rules", + "last_processed_timestamp" + ], + "success_criteria": "", + "sub_agents": [], + "flowchart_type": "terminal", + "flowchart_shape": "stadium", + "flowchart_color": "#a04444" + } + ], + "edges": [ + { + "id": "edge-0", + "source": "intake", + "target": "fetch-emails", + "condition": "on_success", + "description": "", + "label": "" + }, + { + "id": "edge-1", + "source": "fetch-emails", + "target": "classify-and-act", + "condition": "on_success", + "description": "", + "label": "" + }, + { + "id": "edge-2", + "source": "classify-and-act", + "target": "fetch-emails", + "condition": "conditional", + "description": "", + "label": "" + }, + { + "id": "edge-3", + "source": "classify-and-act", + "target": "report", + "condition": "conditional", + "description": "", + "label": "" + }, + { + "id": "edge-4", + "source": "report", + "target": "intake", + "condition": "on_success", + "description": "", + "label": "" + } + ], + "entry_node": "intake", + "terminal_nodes": [ + "report" + ], + "flowchart_legend": { + "start": { + "shape": "stadium", + "color": "#3fa66a" + }, + "terminal": { + "shape": "stadium", + "color": "#a04444" + }, + "process": { + "shape": "rectangle", + "color": "#616d83" + }, + "decision": { + "shape": "diamond", + "color": "#d89d26" + }, + "io": { + "shape": "parallelogram", + "color": "#7a4fa5" + }, + "document": { + "shape": "document", + "color": "#507485" + }, + "database": { + "shape": "cylinder", + "color": "#459077" + }, + "subprocess": { + "shape": "subroutine", + "color": "#4c7f7f" + }, + "browser": { + "shape": "hexagon", + "color": "#3a4a9b" + } + } + }, + "flowchart_map": { + "intake": [ + "intake" + ], + "fetch-emails": [ + "fetch-emails" + ], + "classify-and-act": [ + "classify-and-act" + ], + "report": [ + "report" + ] + } +} \ No newline at end of file diff --git a/examples/templates/email_reply_agent/flowchart.json b/examples/templates/email_reply_agent/flowchart.json new file mode 100644 index 00000000..0405ce83 --- /dev/null +++ b/examples/templates/email_reply_agent/flowchart.json @@ -0,0 +1,168 @@ +{ + "original_draft": { + "agent_name": "email_reply_agent", + "goal": "Filter unreplied emails by user criteria, confirm recipients, send personalized replies.", + "description": "", + "success_criteria": [ + "Accurately finds unreplied emails matching user criteria", + "User confirms recipient list before sending", + "Replies are personalized based on email content and tone guidance" + ], + "constraints": [ + "Never send emails without explicit user confirmation; always present recipient list and get approval first", + "Process up to 50 emails per batch" + ], + "nodes": [ + { + "id": "intake", + "name": "Intake", + "description": "Gather email filter criteria from user", + "node_type": "event_loop", + "tools": [], + "input_keys": [ + "batch_complete", + "restart" + ], + "output_keys": [ + "filter_criteria" + ], + "success_criteria": "Filter criteria is specific enough to search Gmail (sender, subject, date range, or keywords).", + "sub_agents": [], + "flowchart_type": "start", + "flowchart_shape": "stadium", + "flowchart_color": "#3fa66a" + }, + { + "id": "search", + "name": "Search Emails", + "description": "Search Gmail for unreplied emails matching filter criteria", + "node_type": "event_loop", + "tools": [ + "gmail_list_messages", + "gmail_get_message", + "gmail_batch_get_messages" + ], + "input_keys": [ + "filter_criteria" + ], + "output_keys": [ + "email_list" + ], + "success_criteria": "Found unreplied emails matching criteria with sender, subject, snippet, message_id.", + "sub_agents": [], + "flowchart_type": "process", + "flowchart_shape": "rectangle", + "flowchart_color": "#616d83" + }, + { + "id": "confirm-draft", + "name": "Confirm & Reply", + "description": "Present emails for confirmation, send personalized replies", + "node_type": "event_loop", + "tools": [ + "gmail_reply_email" + ], + "input_keys": [ + "email_list", + "filter_criteria" + ], + "output_keys": [ + "batch_complete", + "restart" + ], + "success_criteria": "User confirmed recipients and personalized replies sent for each.", + "sub_agents": [], + "flowchart_type": "terminal", + "flowchart_shape": "stadium", + "flowchart_color": "#a04444" + } + ], + "edges": [ + { + "id": "edge-0", + "source": "intake", + "target": "search", + "condition": "on_success", + "description": "", + "label": "" + }, + { + "id": "edge-1", + "source": "search", + "target": "confirm-draft", + "condition": "on_success", + "description": "", + "label": "" + }, + { + "id": "edge-2", + "source": "confirm-draft", + "target": "intake", + "condition": "conditional", + "description": "", + "label": "" + }, + { + "id": "edge-3", + "source": "confirm-draft", + "target": "intake", + "condition": "conditional", + "description": "", + "label": "" + } + ], + "entry_node": "intake", + "terminal_nodes": [ + "confirm-draft" + ], + "flowchart_legend": { + "start": { + "shape": "stadium", + "color": "#3fa66a" + }, + "terminal": { + "shape": "stadium", + "color": "#a04444" + }, + "process": { + "shape": "rectangle", + "color": "#616d83" + }, + "decision": { + "shape": "diamond", + "color": "#d89d26" + }, + "io": { + "shape": "parallelogram", + "color": "#7a4fa5" + }, + "document": { + "shape": "document", + "color": "#507485" + }, + "database": { + "shape": "cylinder", + "color": "#459077" + }, + "subprocess": { + "shape": "subroutine", + "color": "#4c7f7f" + }, + "browser": { + "shape": "hexagon", + "color": "#3a4a9b" + } + } + }, + "flowchart_map": { + "intake": [ + "intake" + ], + "search": [ + "search" + ], + "confirm-draft": [ + "confirm-draft" + ] + } +} \ No newline at end of file diff --git a/examples/templates/job_hunter/flowchart.json b/examples/templates/job_hunter/flowchart.json new file mode 100644 index 00000000..56a15d9a --- /dev/null +++ b/examples/templates/job_hunter/flowchart.json @@ -0,0 +1,186 @@ +{ + "original_draft": { + "agent_name": "job_hunter", + "goal": "Analyze a user's resume to identify their strongest role fits, find 10 matching job opportunities, let the user select which to pursue, then generate a resume customization list and cold outreach email for each selected job.", + "description": "", + "success_criteria": [ + "Identifies 2-3 role types that genuinely match the user's experience", + "Found jobs align with identified roles and user's background", + "Resume changes are specific, actionable, and tailored to each job posting", + "Cold emails are personalized, professional, and reference specific company/role details", + "User approves outputs without major revisions needed" + ], + "constraints": [ + "Only suggest roles the user is realistically qualified for - no aspirational stretch roles", + "Resume customizations must be truthful - enhance presentation, never fabricate experience", + "Cold emails must be professional and not spammy", + "Only customize for jobs the user explicitly selects" + ], + "nodes": [ + { + "id": "intake", + "name": "Intake", + "description": "Analyze resume and identify 3-5 strongest role types", + "node_type": "event_loop", + "tools": [], + "input_keys": [ + "resume_text" + ], + "output_keys": [ + "resume_text", + "role_analysis" + ], + "success_criteria": "The user's resume has been analyzed and 3-5 target roles identified based on their actual experience.", + "sub_agents": [], + "flowchart_type": "start", + "flowchart_shape": "stadium", + "flowchart_color": "#3fa66a" + }, + { + "id": "job-search", + "name": "Job Search", + "description": "Search for 10 jobs matching identified roles by scraping job board sites directly", + "node_type": "event_loop", + "tools": [ + "web_scrape" + ], + "input_keys": [ + "role_analysis" + ], + "output_keys": [ + "job_listings" + ], + "success_criteria": "10 relevant job listings have been found with complete details including title, company, location, description, and URL.", + "sub_agents": [], + "flowchart_type": "process", + "flowchart_shape": "rectangle", + "flowchart_color": "#616d83" + }, + { + "id": "job-review", + "name": "Job Review", + "description": "Present all 10 jobs to the user, let them select which to pursue", + "node_type": "event_loop", + "tools": [], + "input_keys": [ + "job_listings", + "resume_text" + ], + "output_keys": [ + "selected_jobs" + ], + "success_criteria": "User has reviewed all job listings and explicitly selected which jobs they want to apply to.", + "sub_agents": [], + "flowchart_type": "process", + "flowchart_shape": "rectangle", + "flowchart_color": "#616d83" + }, + { + "id": "customize", + "name": "Customize", + "description": "For each selected job, generate resume customization list and cold outreach email, create Gmail drafts", + "node_type": "event_loop", + "tools": [ + "save_data", + "append_data", + "serve_file_to_user", + "gmail_create_draft" + ], + "input_keys": [ + "selected_jobs", + "resume_text" + ], + "output_keys": [ + "application_materials" + ], + "success_criteria": "Resume customization list and cold outreach email generated for each selected job, saved as HTML, and Gmail drafts created in user's inbox.", + "sub_agents": [], + "flowchart_type": "terminal", + "flowchart_shape": "stadium", + "flowchart_color": "#a04444" + } + ], + "edges": [ + { + "id": "edge-0", + "source": "intake", + "target": "job-search", + "condition": "on_success", + "description": "", + "label": "" + }, + { + "id": "edge-1", + "source": "job-search", + "target": "job-review", + "condition": "on_success", + "description": "", + "label": "" + }, + { + "id": "edge-2", + "source": "job-review", + "target": "customize", + "condition": "on_success", + "description": "", + "label": "" + } + ], + "entry_node": "intake", + "terminal_nodes": [ + "customize" + ], + "flowchart_legend": { + "start": { + "shape": "stadium", + "color": "#3fa66a" + }, + "terminal": { + "shape": "stadium", + "color": "#a04444" + }, + "process": { + "shape": "rectangle", + "color": "#616d83" + }, + "decision": { + "shape": "diamond", + "color": "#d89d26" + }, + "io": { + "shape": "parallelogram", + "color": "#7a4fa5" + }, + "document": { + "shape": "document", + "color": "#507485" + }, + "database": { + "shape": "cylinder", + "color": "#459077" + }, + "subprocess": { + "shape": "subroutine", + "color": "#4c7f7f" + }, + "browser": { + "shape": "hexagon", + "color": "#3a4a9b" + } + } + }, + "flowchart_map": { + "intake": [ + "intake" + ], + "job-search": [ + "job-search" + ], + "job-review": [ + "job-review" + ], + "customize": [ + "customize" + ] + } +} \ No newline at end of file diff --git a/examples/templates/local_business_extractor/flowchart.json b/examples/templates/local_business_extractor/flowchart.json new file mode 100644 index 00000000..f4df5155 --- /dev/null +++ b/examples/templates/local_business_extractor/flowchart.json @@ -0,0 +1,165 @@ +{ + "original_draft": { + "agent_name": "local_business_extractor", + "goal": "Find local businesses on Maps, extract contacts, and sync to Google Sheets.", + "description": "", + "success_criteria": [ + "Extract business details from Maps", + "Sync data to Google Sheets" + ], + "constraints": [ + "Must verify website presence before scraping" + ], + "nodes": [ + { + "id": "map-search-worker", + "name": "Maps Browser Worker", + "description": "Browser subagent that searches Google Maps and extracts business links.", + "node_type": "gcu", + "tools": [], + "input_keys": [ + "query" + ], + "output_keys": [ + "business_list" + ], + "success_criteria": "", + "sub_agents": [], + "flowchart_type": "browser", + "flowchart_shape": "hexagon", + "flowchart_color": "#3a4a9b" + }, + { + "id": "extract-contacts", + "name": "Extract Business Details", + "description": "Scrapes business websites and Maps for comprehensive business details.", + "node_type": "event_loop", + "tools": [ + "exa_get_contents", + "exa_search" + ], + "input_keys": [ + "user_request" + ], + "output_keys": [ + "business_data" + ], + "success_criteria": "Comprehensive business details (reviews, hours, contacts) extracted.", + "sub_agents": [ + "map-search-worker" + ], + "flowchart_type": "subprocess", + "flowchart_shape": "subroutine", + "flowchart_color": "#4c7f7f" + }, + { + "id": "sheets-sync", + "name": "Google Sheets Sync", + "description": "Appends the extracted business data to a Google Sheets spreadsheet.", + "node_type": "event_loop", + "tools": [ + "google_sheets_create_spreadsheet", + "google_sheets_update_values", + "google_sheets_append_values", + "google_sheets_get_values" + ], + "input_keys": [ + "business_data" + ], + "output_keys": [ + "spreadsheet_id" + ], + "success_criteria": "Data successfully synced to Google Sheets.", + "sub_agents": [], + "flowchart_type": "terminal", + "flowchart_shape": "stadium", + "flowchart_color": "#a04444" + } + ], + "edges": [ + { + "id": "edge-0", + "source": "extract-contacts", + "target": "sheets-sync", + "condition": "on_success", + "description": "", + "label": "" + }, + { + "id": "edge-1", + "source": "sheets-sync", + "target": "extract-contacts", + "condition": "always", + "description": "", + "label": "" + }, + { + "id": "edge-subagent-2", + "source": "extract-contacts", + "target": "map-search-worker", + "condition": "always", + "description": "sub-agent delegation", + "label": "delegate" + }, + { + "id": "edge-subagent-3", + "source": "map-search-worker", + "target": "extract-contacts", + "condition": "always", + "description": "sub-agent report back", + "label": "report" + } + ], + "entry_node": "extract-contacts", + "terminal_nodes": [ + "sheets-sync" + ], + "flowchart_legend": { + "start": { + "shape": "stadium", + "color": "#3fa66a" + }, + "terminal": { + "shape": "stadium", + "color": "#a04444" + }, + "process": { + "shape": "rectangle", + "color": "#616d83" + }, + "decision": { + "shape": "diamond", + "color": "#d89d26" + }, + "io": { + "shape": "parallelogram", + "color": "#7a4fa5" + }, + "document": { + "shape": "document", + "color": "#507485" + }, + "database": { + "shape": "cylinder", + "color": "#459077" + }, + "subprocess": { + "shape": "subroutine", + "color": "#4c7f7f" + }, + "browser": { + "shape": "hexagon", + "color": "#3a4a9b" + } + } + }, + "flowchart_map": { + "extract-contacts": [ + "extract-contacts", + "map-search-worker" + ], + "sheets-sync": [ + "sheets-sync" + ] + } +} \ No newline at end of file diff --git a/examples/templates/meeting_scheduler/flowchart.json b/examples/templates/meeting_scheduler/flowchart.json new file mode 100644 index 00000000..753967e5 --- /dev/null +++ b/examples/templates/meeting_scheduler/flowchart.json @@ -0,0 +1,172 @@ +{ + "original_draft": { + "agent_name": "meeting_scheduler", + "goal": "Check calendar availability, find optimal meeting times, record meetings, and send reminders.", + "description": "", + "success_criteria": [ + "Meeting time found within requested duration", + "Meeting recorded in spreadsheet accurately", + "Attendee email reminder sent", + "User confirms meeting details" + ], + "constraints": [ + "Must use Google Calendar API for availability check", + "Meeting duration must match requested time", + "Spreadsheet record must include date, time, attendee, title" + ], + "nodes": [ + { + "id": "intake", + "name": "Intake", + "description": "Gather meeting details from the user", + "node_type": "event_loop", + "tools": [], + "input_keys": [ + "attendee_email", + "meeting_duration_minutes" + ], + "output_keys": [ + "attendee_email", + "meeting_duration_minutes", + "meeting_title" + ], + "success_criteria": "User has provided attendee email, meeting duration, and title.", + "sub_agents": [], + "flowchart_type": "start", + "flowchart_shape": "stadium", + "flowchart_color": "#3fa66a" + }, + { + "id": "schedule", + "name": "Schedule", + "description": "Find available time on calendar, book meeting with Google Meet, and log to Google Sheet", + "node_type": "event_loop", + "tools": [ + "calendar_check_availability", + "calendar_create_event", + "calendar_list_events", + "google_sheets_create_spreadsheet", + "google_sheets_get_spreadsheet", + "google_sheets_append_values", + "send_email" + ], + "input_keys": [ + "attendee_email", + "meeting_duration_minutes", + "meeting_title" + ], + "output_keys": [ + "meeting_time", + "booking_confirmed", + "spreadsheet_recorded", + "email_sent", + "meet_link" + ], + "success_criteria": "Meeting time found, Google Meet created, Google Sheet 'Meeting Scheduler' updated with date/time/attendee/title/meet_link, and confirmation email sent.", + "sub_agents": [], + "flowchart_type": "io", + "flowchart_shape": "parallelogram", + "flowchart_color": "#7a4fa5" + }, + { + "id": "confirm", + "name": "Confirm", + "description": "Present booking confirmation to user with Google Meet link", + "node_type": "event_loop", + "tools": [], + "input_keys": [ + "meeting_time", + "booking_confirmed", + "meet_link" + ], + "output_keys": [ + "next_action" + ], + "success_criteria": "User has acknowledged the booking and received the Google Meet link.", + "sub_agents": [], + "flowchart_type": "terminal", + "flowchart_shape": "stadium", + "flowchart_color": "#a04444" + } + ], + "edges": [ + { + "id": "edge-0", + "source": "intake", + "target": "schedule", + "condition": "on_success", + "description": "", + "label": "" + }, + { + "id": "edge-1", + "source": "schedule", + "target": "confirm", + "condition": "on_success", + "description": "", + "label": "" + }, + { + "id": "edge-2", + "source": "confirm", + "target": "intake", + "condition": "conditional", + "description": "", + "label": "" + } + ], + "entry_node": "intake", + "terminal_nodes": [ + "confirm" + ], + "flowchart_legend": { + "start": { + "shape": "stadium", + "color": "#3fa66a" + }, + "terminal": { + "shape": "stadium", + "color": "#a04444" + }, + "process": { + "shape": "rectangle", + "color": "#616d83" + }, + "decision": { + "shape": "diamond", + "color": "#d89d26" + }, + "io": { + "shape": "parallelogram", + "color": "#7a4fa5" + }, + "document": { + "shape": "document", + "color": "#507485" + }, + "database": { + "shape": "cylinder", + "color": "#459077" + }, + "subprocess": { + "shape": "subroutine", + "color": "#4c7f7f" + }, + "browser": { + "shape": "hexagon", + "color": "#3a4a9b" + } + } + }, + "flowchart_map": { + "intake": [ + "intake" + ], + "schedule": [ + "schedule" + ], + "confirm": [ + "confirm" + ] + } +} \ No newline at end of file diff --git a/examples/templates/tech_news_reporter/flowchart.json b/examples/templates/tech_news_reporter/flowchart.json new file mode 100644 index 00000000..f36662e0 --- /dev/null +++ b/examples/templates/tech_news_reporter/flowchart.json @@ -0,0 +1,150 @@ +{ + "original_draft": { + "agent_name": "tech_news_reporter", + "goal": "Research the latest technology and AI news from the web, summarize key stories, and produce a well-organized report for the user to read.", + "description": "", + "success_criteria": [ + "Finds recent, relevant tech/AI news articles", + "Covers diverse topics, not just one story", + "Produces a structured, readable report with sections, summaries, and links", + "Includes source attribution with URLs for every story", + "Delivers the report to the user in a viewable format" + ], + "constraints": [ + "Never fabricate news stories or URLs", + "Always attribute sources with links", + "Only include news from the past week" + ], + "nodes": [ + { + "id": "intake", + "name": "Intake", + "description": "Greet the user and ask if they have specific tech/AI topics to focus on, or if they want a general news roundup.", + "node_type": "event_loop", + "tools": [], + "input_keys": [], + "output_keys": [ + "research_brief" + ], + "success_criteria": "", + "sub_agents": [], + "flowchart_type": "start", + "flowchart_shape": "stadium", + "flowchart_color": "#3fa66a" + }, + { + "id": "research", + "name": "Research", + "description": "Scrape well-known tech news sites for recent articles and extract key information including titles, summaries, sources, and topics.", + "node_type": "event_loop", + "tools": [ + "web_scrape" + ], + "input_keys": [ + "research_brief" + ], + "output_keys": [ + "articles_data" + ], + "success_criteria": "", + "sub_agents": [], + "flowchart_type": "process", + "flowchart_shape": "rectangle", + "flowchart_color": "#616d83" + }, + { + "id": "compile-report", + "name": "Compile Report", + "description": "Organize the researched articles into a structured HTML report, save it, and deliver a clickable link to the user.", + "node_type": "event_loop", + "tools": [ + "save_data", + "append_data", + "serve_file_to_user" + ], + "input_keys": [ + "articles_data" + ], + "output_keys": [ + "report_file" + ], + "success_criteria": "", + "sub_agents": [], + "flowchart_type": "terminal", + "flowchart_shape": "stadium", + "flowchart_color": "#a04444" + } + ], + "edges": [ + { + "id": "edge-0", + "source": "intake", + "target": "research", + "condition": "on_success", + "description": "", + "label": "" + }, + { + "id": "edge-1", + "source": "research", + "target": "compile-report", + "condition": "on_success", + "description": "", + "label": "" + } + ], + "entry_node": "intake", + "terminal_nodes": [ + "compile-report" + ], + "flowchart_legend": { + "start": { + "shape": "stadium", + "color": "#3fa66a" + }, + "terminal": { + "shape": "stadium", + "color": "#a04444" + }, + "process": { + "shape": "rectangle", + "color": "#616d83" + }, + "decision": { + "shape": "diamond", + "color": "#d89d26" + }, + "io": { + "shape": "parallelogram", + "color": "#7a4fa5" + }, + "document": { + "shape": "document", + "color": "#507485" + }, + "database": { + "shape": "cylinder", + "color": "#459077" + }, + "subprocess": { + "shape": "subroutine", + "color": "#4c7f7f" + }, + "browser": { + "shape": "hexagon", + "color": "#3a4a9b" + } + } + }, + "flowchart_map": { + "intake": [ + "intake" + ], + "research": [ + "research" + ], + "compile-report": [ + "compile-report" + ] + } +} \ No newline at end of file diff --git a/examples/templates/twitter_news_agent/flowchart.json b/examples/templates/twitter_news_agent/flowchart.json new file mode 100644 index 00000000..95e495ee --- /dev/null +++ b/examples/templates/twitter_news_agent/flowchart.json @@ -0,0 +1,172 @@ +{ + "original_draft": { + "agent_name": "twitter_news_agent", + "goal": "Achieve an accurate and concise daily news digest based on Twitter feed monitoring.", + "description": "", + "success_criteria": [ + "Navigate and extract tweets from at least 3 handles.", + "Provide a summary of the most important stories.", + "Maintain a persistent log of daily digests." + ], + "constraints": [ + "Respect rate limits and ethical web usage." + ], + "nodes": [ + { + "id": "fetch-tweets", + "name": "Fetch Tech Tweets", + "description": "Browser subagent to navigate to tech news Twitter profiles and extract latest tweets.", + "node_type": "gcu", + "tools": [], + "input_keys": [ + "twitter_handles" + ], + "output_keys": [ + "raw_tweets" + ], + "success_criteria": "", + "sub_agents": [], + "flowchart_type": "browser", + "flowchart_shape": "hexagon", + "flowchart_color": "#3a4a9b" + }, + { + "id": "process-news", + "name": "Process Tech News", + "description": "Analyze and summarize the raw tweets into a daily tech digest.", + "node_type": "event_loop", + "tools": [ + "save_data", + "load_data" + ], + "input_keys": [ + "user_request", + "feedback", + "raw_tweets" + ], + "output_keys": [ + "daily_digest" + ], + "success_criteria": "A high-quality, tech-focused news summary.", + "sub_agents": [ + "fetch-tweets" + ], + "flowchart_type": "subprocess", + "flowchart_shape": "subroutine", + "flowchart_color": "#4c7f7f" + }, + { + "id": "review-digest", + "name": "Review Digest", + "description": "Present the news digest for user review and approval.", + "node_type": "event_loop", + "tools": [], + "input_keys": [ + "daily_digest" + ], + "output_keys": [ + "status", + "feedback" + ], + "success_criteria": "User has reviewed the digest and provided feedback or approval.", + "sub_agents": [], + "flowchart_type": "terminal", + "flowchart_shape": "stadium", + "flowchart_color": "#a04444" + } + ], + "edges": [ + { + "id": "edge-0", + "source": "process-news", + "target": "review-digest", + "condition": "on_success", + "description": "", + "label": "" + }, + { + "id": "edge-1", + "source": "review-digest", + "target": "process-news", + "condition": "conditional", + "description": "", + "label": "" + }, + { + "id": "edge-2", + "source": "review-digest", + "target": "process-news", + "condition": "conditional", + "description": "", + "label": "" + }, + { + "id": "edge-subagent-3", + "source": "process-news", + "target": "fetch-tweets", + "condition": "always", + "description": "sub-agent delegation", + "label": "delegate" + }, + { + "id": "edge-subagent-4", + "source": "fetch-tweets", + "target": "process-news", + "condition": "always", + "description": "sub-agent report back", + "label": "report" + } + ], + "entry_node": "process-news", + "terminal_nodes": [ + "review-digest" + ], + "flowchart_legend": { + "start": { + "shape": "stadium", + "color": "#3fa66a" + }, + "terminal": { + "shape": "stadium", + "color": "#a04444" + }, + "process": { + "shape": "rectangle", + "color": "#616d83" + }, + "decision": { + "shape": "diamond", + "color": "#d89d26" + }, + "io": { + "shape": "parallelogram", + "color": "#7a4fa5" + }, + "document": { + "shape": "document", + "color": "#507485" + }, + "database": { + "shape": "cylinder", + "color": "#459077" + }, + "subprocess": { + "shape": "subroutine", + "color": "#4c7f7f" + }, + "browser": { + "shape": "hexagon", + "color": "#3a4a9b" + } + } + }, + "flowchart_map": { + "process-news": [ + "process-news", + "fetch-tweets" + ], + "review-digest": [ + "review-digest" + ] + } +} \ No newline at end of file diff --git a/examples/templates/vulnerability_assessment/flowchart.json b/examples/templates/vulnerability_assessment/flowchart.json new file mode 100644 index 00000000..169096e6 --- /dev/null +++ b/examples/templates/vulnerability_assessment/flowchart.json @@ -0,0 +1,237 @@ +{ + "original_draft": { + "agent_name": "vulnerability_assessment", + "goal": "A passive, OSINT-based website vulnerability assessment agent that accepts a website domain, performs non-intrusive security scanning using purpose-built Python tools, produces letter-grade risk scores (A-F) per category, and delivers a structured vulnerability report with remediation guidance. The user is consulted after scanning to decide whether to investigate further or generate the final report.", + "description": "", + "success_criteria": [ + "Overall risk grade (A-F) generated from combined scan results", + "At least 5 of 6 security categories scored (SSL/TLS, HTTP Headers, DNS, Network, Technology, Attack Surface)", + "At least 3 security findings identified across different categories", + "Every finding includes clear, actionable remediation steps a developer can follow", + "User is presented findings with risk grades and given checkpoint to continue deeper scanning or generate report" + ], + "constraints": [ + "Never execute active attacks, send exploit payloads, or perform actions that could trigger WAF/IDS systems. Passive and OSINT-based scanning only \u2014 no nmap, sqlmap, or attack payloads.", + "All findings and remediation steps must be written for developers using clear language, not security jargon" + ], + "nodes": [ + { + "id": "intake", + "name": "Intake", + "description": "Collect the target website domain from the user and confirm the scanning scope", + "node_type": "event_loop", + "tools": [], + "input_keys": [], + "output_keys": [ + "target_domain" + ], + "success_criteria": "", + "sub_agents": [], + "flowchart_type": "start", + "flowchart_shape": "stadium", + "flowchart_color": "#3fa66a" + }, + { + "id": "passive-recon", + "name": "Passive Reconnaissance", + "description": "Run all 6 passive scanning tools against the target domain: SSL/TLS, HTTP headers, DNS security, port scanning, tech stack detection, and subdomain enumeration", + "node_type": "event_loop", + "tools": [ + "ssl_tls_scan", + "http_headers_scan", + "dns_security_scan", + "port_scan", + "tech_stack_detect", + "subdomain_enumerate" + ], + "input_keys": [ + "target_domain", + "feedback" + ], + "output_keys": [ + "scan_results" + ], + "success_criteria": "", + "sub_agents": [], + "flowchart_type": "process", + "flowchart_shape": "rectangle", + "flowchart_color": "#616d83" + }, + { + "id": "risk-scoring", + "name": "Risk Scoring", + "description": "Calculate weighted letter grades (A-F) per security category and overall risk score from scan results", + "node_type": "event_loop", + "tools": [ + "risk_score" + ], + "input_keys": [ + "scan_results" + ], + "output_keys": [ + "risk_report" + ], + "success_criteria": "", + "sub_agents": [], + "flowchart_type": "process", + "flowchart_shape": "rectangle", + "flowchart_color": "#616d83" + }, + { + "id": "findings-review", + "name": "Findings Review", + "description": "Present risk grades and security findings to the user, ask whether to continue deeper scanning or generate the final report", + "node_type": "event_loop", + "tools": [], + "input_keys": [ + "scan_results", + "risk_report", + "target_domain" + ], + "output_keys": [ + "continue_scanning", + "feedback", + "all_findings" + ], + "success_criteria": "", + "sub_agents": [], + "flowchart_type": "decision", + "flowchart_shape": "diamond", + "flowchart_color": "#d89d26" + }, + { + "id": "final-report", + "name": "Risk Dashboard Report", + "description": "Generate an HTML risk dashboard with color-coded grades, category breakdown, detailed findings, and remediation steps", + "node_type": "event_loop", + "tools": [ + "save_data", + "append_data", + "serve_file_to_user" + ], + "input_keys": [ + "all_findings", + "risk_report", + "target_domain" + ], + "output_keys": [ + "report_status" + ], + "success_criteria": "", + "sub_agents": [], + "flowchart_type": "terminal", + "flowchart_shape": "stadium", + "flowchart_color": "#a04444" + } + ], + "edges": [ + { + "id": "edge-0", + "source": "intake", + "target": "passive-recon", + "condition": "on_success", + "description": "", + "label": "" + }, + { + "id": "edge-1", + "source": "passive-recon", + "target": "risk-scoring", + "condition": "on_success", + "description": "", + "label": "" + }, + { + "id": "edge-2", + "source": "risk-scoring", + "target": "findings-review", + "condition": "on_success", + "description": "", + "label": "" + }, + { + "id": "edge-3", + "source": "findings-review", + "target": "passive-recon", + "condition": "conditional", + "description": "", + "label": "" + }, + { + "id": "edge-4", + "source": "findings-review", + "target": "final-report", + "condition": "conditional", + "description": "", + "label": "" + }, + { + "id": "edge-5", + "source": "final-report", + "target": "intake", + "condition": "on_success", + "description": "", + "label": "" + } + ], + "entry_node": "intake", + "terminal_nodes": [ + "final-report" + ], + "flowchart_legend": { + "start": { + "shape": "stadium", + "color": "#3fa66a" + }, + "terminal": { + "shape": "stadium", + "color": "#a04444" + }, + "process": { + "shape": "rectangle", + "color": "#616d83" + }, + "decision": { + "shape": "diamond", + "color": "#d89d26" + }, + "io": { + "shape": "parallelogram", + "color": "#7a4fa5" + }, + "document": { + "shape": "document", + "color": "#507485" + }, + "database": { + "shape": "cylinder", + "color": "#459077" + }, + "subprocess": { + "shape": "subroutine", + "color": "#4c7f7f" + }, + "browser": { + "shape": "hexagon", + "color": "#3a4a9b" + } + } + }, + "flowchart_map": { + "intake": [ + "intake" + ], + "passive-recon": [ + "passive-recon" + ], + "risk-scoring": [ + "risk-scoring" + ], + "findings-review": [ + "findings-review" + ], + "final-report": [ + "final-report" + ] + } +} \ No newline at end of file From cf44ee1d9be571a03dd6f7a5f4ea32916a8531e8 Mon Sep 17 00:00:00 2001 From: bryan Date: Mon, 16 Mar 2026 12:13:56 -0700 Subject: [PATCH 16/45] refactor: remove AgentGraph, extract shared types, add resizable graph panel --- core/frontend/src/components/AgentGraph.tsx | 770 ------------------ .../src/components/NodeDetailPanel.tsx | 2 +- core/frontend/src/components/RunButton.tsx | 40 + core/frontend/src/components/graph-types.ts | 28 + core/frontend/src/lib/graph-converter.ts | 2 +- core/frontend/src/lib/tab-persistence.ts | 2 +- core/frontend/src/pages/workspace.tsx | 77 +- 7 files changed, 117 insertions(+), 804 deletions(-) delete mode 100644 core/frontend/src/components/AgentGraph.tsx create mode 100644 core/frontend/src/components/RunButton.tsx create mode 100644 core/frontend/src/components/graph-types.ts diff --git a/core/frontend/src/components/AgentGraph.tsx b/core/frontend/src/components/AgentGraph.tsx deleted file mode 100644 index 80b30064..00000000 --- a/core/frontend/src/components/AgentGraph.tsx +++ /dev/null @@ -1,770 +0,0 @@ -import { memo, useMemo, useState, useRef, useEffect, useCallback } from "react"; -import { Play, Pause, Loader2, CheckCircle2 } from "lucide-react"; - -export type NodeStatus = "running" | "complete" | "pending" | "error" | "looping"; - -export type NodeType = "execution" | "trigger"; - -export interface GraphNode { - id: string; - label: string; - status: NodeStatus; - nodeType?: NodeType; - triggerType?: string; - triggerConfig?: Record; - next?: string[]; - backEdges?: string[]; - iterations?: number; - maxIterations?: number; - statusLabel?: string; - edgeLabels?: Record; -} - -export type RunState = "idle" | "deploying" | "running"; - -interface AgentGraphProps { - nodes: GraphNode[]; - title: string; - onNodeClick?: (node: GraphNode) => void; - onRun?: () => void; - onPause?: () => void; - version?: string; - runState?: RunState; - building?: boolean; - queenPhase?: "planning" | "building" | "staging" | "running"; -} - -// --- Extracted RunButton so hover state survives parent re-renders --- -export interface RunButtonProps { - runState: RunState; - disabled: boolean; - onRun: () => void; - onPause: () => void; - btnRef: React.Ref; -} - -export const RunButton = memo(function RunButton({ runState, disabled, onRun, onPause, btnRef }: RunButtonProps) { - const [hovered, setHovered] = useState(false); - const showPause = runState === "running" && hovered; - - return ( - - ); -}); - -const NODE_W_MAX = 180; -const NODE_H = 44; -const GAP_Y = 48; -const TOP_Y = 30; -const MARGIN_LEFT = 20; -const MARGIN_RIGHT = 50; // space for back-edge arcs -const SVG_BASE_W = 320; -const GAP_X = 12; - -// Read a CSS custom property value (space-separated HSL components) -function cssVar(name: string): string { - return getComputedStyle(document.documentElement).getPropertyValue(name).trim(); -} - -type StatusColorSet = Record; -type TriggerColorSet = { bg: string; border: string; text: string; icon: string }; - -function buildStatusColors(): StatusColorSet { - const running = cssVar("--node-running") || "45 95% 58%"; - const looping = cssVar("--node-looping") || "38 90% 55%"; - const complete = cssVar("--node-complete") || "43 70% 45%"; - const pending = cssVar("--node-pending") || "35 15% 28%"; - const pendingBg = cssVar("--node-pending-bg") || "35 10% 12%"; - const pendingBorder = cssVar("--node-pending-border") || "35 10% 20%"; - const error = cssVar("--node-error") || "0 65% 55%"; - - return { - running: { - dot: `hsl(${running})`, - bg: `hsl(${running} / 0.08)`, - border: `hsl(${running} / 0.5)`, - glow: `hsl(${running} / 0.15)`, - }, - looping: { - dot: `hsl(${looping})`, - bg: `hsl(${looping} / 0.08)`, - border: `hsl(${looping} / 0.5)`, - glow: `hsl(${looping} / 0.15)`, - }, - complete: { - dot: `hsl(${complete})`, - bg: `hsl(${complete} / 0.05)`, - border: `hsl(${complete} / 0.25)`, - glow: "none", - }, - pending: { - dot: `hsl(${pending})`, - bg: `hsl(${pendingBg})`, - border: `hsl(${pendingBorder})`, - glow: "none", - }, - error: { - dot: `hsl(${error})`, - bg: `hsl(${error} / 0.06)`, - border: `hsl(${error} / 0.3)`, - glow: `hsl(${error} / 0.1)`, - }, - }; -} - -function buildTriggerColors(): TriggerColorSet { - const bg = cssVar("--trigger-bg") || "210 25% 14%"; - const border = cssVar("--trigger-border") || "210 30% 30%"; - const text = cssVar("--trigger-text") || "210 30% 65%"; - const icon = cssVar("--trigger-icon") || "210 40% 55%"; - return { - bg: `hsl(${bg})`, - border: `hsl(${border})`, - text: `hsl(${text})`, - icon: `hsl(${icon})`, - }; -} - -/** Hook that reads node/trigger colors from CSS vars and updates on theme changes. */ -function useThemeColors() { - const [statusColors, setStatusColors] = useState(buildStatusColors); - const [triggerColors, setTriggerColors] = useState(buildTriggerColors); - - useEffect(() => { - const rebuild = () => { - setStatusColors(buildStatusColors()); - setTriggerColors(buildTriggerColors()); - }; - const obs = new MutationObserver(rebuild); - obs.observe(document.documentElement, { attributes: true, attributeFilter: ["class", "style"] }); - return () => obs.disconnect(); - }, []); - - return { statusColors, triggerColors }; -} - -// Active trigger — brighter, more saturated blue -const activeTriggerColors = { - bg: "hsl(210,30%,18%)", - border: "hsl(210,50%,50%)", - text: "hsl(210,40%,75%)", - icon: "hsl(210,60%,65%)", -}; - -const triggerIcons: Record = { - webhook: "\u26A1", // lightning bolt - timer: "\u23F1", // stopwatch - api: "\u2192", // right arrow - event: "\u223F", // sine wave -}; - -/** Truncate label to fit within `availablePx` at the given fontSize. */ -function truncateLabel(label: string, availablePx: number, fontSize: number): string { - const avgCharW = fontSize * 0.58; - const maxChars = Math.floor(availablePx / avgCharW); - if (label.length <= maxChars) return label; - return label.slice(0, Math.max(maxChars - 1, 1)) + "\u2026"; -} - -// ─── Pan & Zoom wrapper ─── -function PanZoomSvg({ svgW, svgH, className, children }: { svgW: number; svgH: number; className?: string; children: React.ReactNode }) { - const [zoom, setZoom] = useState(1); - const [pan, setPan] = useState({ x: 0, y: 0 }); - const [dragging, setDragging] = useState(false); - const dragStart = useRef({ x: 0, y: 0, panX: 0, panY: 0 }); - - const MIN_ZOOM = 0.4; - const MAX_ZOOM = 3; - - const handleWheel = useCallback((e: React.WheelEvent) => { - e.preventDefault(); - const delta = e.deltaY > 0 ? 0.9 : 1.1; - setZoom(z => Math.min(MAX_ZOOM, Math.max(MIN_ZOOM, z * delta))); - }, []); - - const handleMouseDown = useCallback((e: React.MouseEvent) => { - if (e.button !== 0) return; - setDragging(true); - dragStart.current = { x: e.clientX, y: e.clientY, panX: pan.x, panY: pan.y }; - }, [pan]); - - const handleMouseMove = useCallback((e: React.MouseEvent) => { - if (!dragging) return; - setPan({ - x: dragStart.current.panX + (e.clientX - dragStart.current.x), - y: dragStart.current.panY + (e.clientY - dragStart.current.y), - }); - }, [dragging]); - - const handleMouseUp = useCallback(() => setDragging(false), []); - - const resetView = useCallback(() => { - setZoom(1); - setPan({ x: 0, y: 0 }); - }, []); - - return ( -

-
- - {children} - -
- - {/* Zoom controls */} -
- - - -
-
- ); -} - -export default function AgentGraph({ nodes, title: _title, onNodeClick, onRun, onPause, version, runState: externalRunState, building, queenPhase }: AgentGraphProps) { - const [localRunState, setLocalRunState] = useState("idle"); - const runState = externalRunState ?? localRunState; - const runBtnRef = useRef(null); - const { statusColors, triggerColors } = useThemeColors(); - - const handleRun = () => { - if (runState !== "idle") return; - if (onRun) { - onRun(); - } else { - setLocalRunState("deploying"); - setTimeout(() => setLocalRunState("running"), 1800); - setTimeout(() => setLocalRunState("idle"), 5000); - } - }; - - const idxMap = useMemo(() => Object.fromEntries(nodes.map((n, i) => [n.id, i])), [nodes]); - - const backEdges = useMemo(() => { - const edges: { fromIdx: number; toIdx: number }[] = []; - nodes.forEach((n, i) => { - (n.next || []).forEach((toId) => { - const toIdx = idxMap[toId]; - if (toIdx !== undefined && toIdx <= i) edges.push({ fromIdx: i, toIdx }); - }); - (n.backEdges || []).forEach((toId) => { - const toIdx = idxMap[toId]; - if (toIdx !== undefined) edges.push({ fromIdx: i, toIdx }); - }); - }); - return edges; - }, [nodes, idxMap]); - - const forwardEdges = useMemo(() => { - const edges: { fromIdx: number; toIdx: number; fanCount: number; fanIndex: number; label?: string }[] = []; - nodes.forEach((n, i) => { - const targets = (n.next || []) - .map((toId) => ({ toId, toIdx: idxMap[toId] })) - .filter((t): t is { toId: string; toIdx: number } => t.toIdx !== undefined && t.toIdx > i); - targets.forEach(({ toId, toIdx }, fi) => { - edges.push({ - fromIdx: i, - toIdx, - fanCount: targets.length, - fanIndex: fi, - label: n.edgeLabels?.[toId], - }); - }); - }); - return edges; - }, [nodes, idxMap]); - - // --- Layer-based layout computation --- - const layout = useMemo(() => { - if (nodes.length === 0) { - return { layers: [] as number[], cols: [] as number[], maxCols: 1, nodeW: NODE_W_MAX, colSpacing: 0, firstColX: MARGIN_LEFT }; - } - - // 1. Build reverse adjacency from forward edges (who are the parents of each node) - const parents = new Map(); - nodes.forEach((_, i) => parents.set(i, [])); - forwardEdges.forEach((e) => { - parents.get(e.toIdx)!.push(e.fromIdx); - }); - - // 2. Assign layers via longest-path from entry - const layers = new Array(nodes.length).fill(0); - for (let i = 0; i < nodes.length; i++) { - const pars = parents.get(i) || []; - if (pars.length > 0) { - layers[i] = Math.max(...pars.map((p) => layers[p])) + 1; - } - } - - // 3. Group nodes by layer - const layerGroups = new Map(); - layers.forEach((l, i) => { - const group = layerGroups.get(l) || []; - group.push(i); - layerGroups.set(l, group); - }); - - // 4. Compute max columns and dynamic node width - let maxCols = 1; - layerGroups.forEach((group) => { - maxCols = Math.max(maxCols, group.length); - }); - - const usableW = SVG_BASE_W - MARGIN_LEFT - MARGIN_RIGHT; - const nodeW = Math.min(NODE_W_MAX, Math.floor((usableW - (maxCols - 1) * GAP_X) / maxCols)); - const colSpacing = nodeW + GAP_X; - const totalNodesW = maxCols * nodeW + (maxCols - 1) * GAP_X; - const firstColX = MARGIN_LEFT + (usableW - totalNodesW) / 2; - - // 5. Assign columns within each layer (centered, ordered by parent column) - const cols = new Array(nodes.length).fill(0); - layerGroups.forEach((group) => { - if (group.length === 1) { - // Center single node: place at middle column - cols[group[0]] = (maxCols - 1) / 2; - } else { - // Sort group by average parent column to reduce crossings - const sorted = [...group].sort((a, b) => { - const aParents = parents.get(a) || []; - const bParents = parents.get(b) || []; - const aAvg = aParents.length > 0 ? aParents.reduce((s, p) => s + cols[p], 0) / aParents.length : 0; - const bAvg = bParents.length > 0 ? bParents.reduce((s, p) => s + cols[p], 0) / bParents.length : 0; - return aAvg - bAvg; - }); - // Spread evenly, centered within maxCols - const offset = (maxCols - group.length) / 2; - sorted.forEach((nodeIdx, i) => { - cols[nodeIdx] = offset + i; - }); - } - }); - - return { layers, cols, maxCols, nodeW, colSpacing, firstColX }; - }, [nodes, forwardEdges]); - - if (nodes.length === 0) { - return ( -
-
-
-

Pipeline

- {version && ( - - {version} - - )} -
- {})} btnRef={runBtnRef} /> -
-
- {building ? ( -
- -

Building agent...

-
- ) : ( -

No pipeline configured yet.
Chat with the Queen to get started.

- )} -
-
- ); - } - - const { layers, cols, nodeW, colSpacing, firstColX } = layout; - - const nodePos = (i: number) => ({ - x: firstColX + cols[i] * colSpacing, - y: TOP_Y + layers[i] * (NODE_H + GAP_Y), - }); - - const maxLayer = nodes.length > 0 ? Math.max(...layers) : 0; - const svgHeight = TOP_Y * 2 + (maxLayer + 1) * NODE_H + maxLayer * GAP_Y + 10; - const backEdgeSpace = backEdges.length > 0 ? MARGIN_RIGHT + backEdges.length * 18 : 20; - const svgWidth = Math.max(SVG_BASE_W, firstColX + layout.maxCols * nodeW + (layout.maxCols - 1) * GAP_X + backEdgeSpace); - - // Check if a skip-level forward edge would collide with intermediate nodes - const hasCollision = (fromLayer: number, toLayer: number, fromX: number, toX: number): boolean => { - const minX = Math.min(fromX, toX); - const maxX = Math.max(fromX, toX) + nodeW; - for (let i = 0; i < nodes.length; i++) { - const l = layers[i]; - if (l > fromLayer && l < toLayer) { - const nx = firstColX + cols[i] * colSpacing; - // Check horizontal overlap - if (nx < maxX && nx + nodeW > minX) return true; - } - } - return false; - }; - - const renderForwardEdge = (edge: { fromIdx: number; toIdx: number; fanCount: number; fanIndex: number; label?: string }, i: number) => { - const from = nodePos(edge.fromIdx); - const to = nodePos(edge.toIdx); - const fromCenterX = from.x + nodeW / 2; - const toCenterX = to.x + nodeW / 2; - const y1 = from.y + NODE_H; - const y2 = to.y; - - // Fan-out: spread exit points across the source node's bottom - let startX = fromCenterX; - if (edge.fanCount > 1) { - const spread = nodeW * 0.5; - const step = edge.fanCount > 1 ? spread / (edge.fanCount - 1) : 0; - startX = fromCenterX - spread / 2 + edge.fanIndex * step; - } - - const midY = (y1 + y2) / 2; - const fromLayer = layers[edge.fromIdx]; - const toLayer = layers[edge.toIdx]; - const skipsLayers = toLayer - fromLayer > 1; - - let d: string; - if (skipsLayers && hasCollision(fromLayer, toLayer, from.x, to.x)) { - // Route around intermediate nodes: orthogonal detour to the left - const detourX = Math.min(from.x, to.x) - nodeW * 0.4; - d = `M ${startX} ${y1} L ${startX} ${midY} L ${detourX} ${midY} L ${detourX} ${y2 - 10} L ${toCenterX} ${y2 - 10} L ${toCenterX} ${y2}`; - } else if (Math.abs(startX - toCenterX) < 2) { - // Straight vertical line when aligned - d = `M ${startX} ${y1} L ${toCenterX} ${y2}`; - } else { - // Orthogonal: down, across, down - d = `M ${startX} ${y1} L ${startX} ${midY} L ${toCenterX} ${midY} L ${toCenterX} ${y2}`; - } - - const fromNode = nodes[edge.fromIdx]; - const isActive = fromNode.status === "complete" || fromNode.status === "running" || fromNode.status === "looping"; - const strokeColor = isActive ? statusColors.complete.border : statusColors.pending.border; - const arrowColor = isActive ? statusColors.complete.dot : statusColors.pending.border; - - return ( - - - - {edge.label && ( - - {edge.label} - - )} - - ); - }; - - const renderBackEdge = (edge: { fromIdx: number; toIdx: number }, i: number) => { - const from = nodePos(edge.fromIdx); - const to = nodePos(edge.toIdx); - - const rightX = Math.max(from.x, to.x) + nodeW; - const rightOffset = 28 + i * 18; - const startX = from.x + nodeW; - const startY = from.y + NODE_H / 2; - const endX = to.x + nodeW; - const endY = to.y + NODE_H / 2; - const curveX = rightX + rightOffset; - const r = 12; - - const fromNode = nodes[edge.fromIdx]; - const isActive = fromNode.status === "complete" || fromNode.status === "running" || fromNode.status === "looping"; - const color = isActive ? statusColors.looping.border : statusColors.pending.border; - - // Bezier curve with rounded corners (kept as curves for back edges) - const path = `M ${startX} ${startY} C ${startX + r} ${startY}, ${curveX} ${startY}, ${curveX} ${startY - r} L ${curveX} ${endY + r} C ${curveX} ${endY}, ${endX + r} ${endY}, ${endX + 6} ${endY}`; - - return ( - - - - - ); - }; - - const renderTriggerNode = (node: GraphNode, i: number) => { - const pos = nodePos(i); - const icon = triggerIcons[node.triggerType || ""] || "\u26A1"; - const triggerFontSize = nodeW < 140 ? 10.5 : 11.5; - const triggerAvailW = nodeW - 38; - const triggerDisplayLabel = truncateLabel(node.label, triggerAvailW, triggerFontSize); - const nextFireIn = node.triggerConfig?.next_fire_in as number | undefined; - const isActive = node.status === "running" || node.status === "complete"; - const colors = isActive ? activeTriggerColors : triggerColors; - - // Format countdown for display below node - let countdownLabel: string | null = null; - if (isActive && nextFireIn != null && nextFireIn > 0) { - const h = Math.floor(nextFireIn / 3600); - const m = Math.floor((nextFireIn % 3600) / 60); - const s = Math.floor(nextFireIn % 60); - countdownLabel = h > 0 - ? `next in ${h}h ${String(m).padStart(2, "0")}m` - : `next in ${m}m ${String(s).padStart(2, "0")}s`; - } - - // Status label below countdown - const statusLabel = isActive ? "active" : "inactive"; - const statusColor = isActive ? "hsl(140,40%,50%)" : "hsl(210,20%,40%)"; - - return ( - onNodeClick?.(node)} style={{ cursor: onNodeClick ? "pointer" : "default" }}> - {node.label} - {/* Pill-shaped background — solid border when active, dashed when inactive */} - - - {/* Trigger type icon */} - - {icon} - - - {/* Label */} - - {triggerDisplayLabel} - - - {/* Countdown label below node */} - {countdownLabel && ( - - {countdownLabel} - - )} - - {/* Status label */} - - {statusLabel} - - - ); - }; - - const renderNode = (node: GraphNode, i: number) => { - if (node.nodeType === "trigger") return renderTriggerNode(node, i); - - const pos = nodePos(i); - const isActive = node.status === "running" || node.status === "looping"; - const isDone = node.status === "complete"; - const colors = statusColors[node.status]; - - const fontSize = nodeW < 140 ? 10.5 : 12.5; - const labelAvailW = nodeW - 38; - const displayLabel = truncateLabel(node.label, labelAvailW, fontSize); - - return ( - onNodeClick?.(node)} style={{ cursor: onNodeClick ? "pointer" : "default" }}> - {node.label} - {/* Ambient glow for active nodes */} - {isActive && ( - <> - - - - )} - - {/* Node background */} - - - {/* Status dot */} - - {isActive && ( - - - - - )} - - {/* Check mark for complete */} - {isDone && ( - - ✓ - - )} - - {/* Label -- truncated with ellipsis for narrow nodes */} - - {displayLabel} - - - {/* Status label for active nodes */} - {node.statusLabel && isActive && ( - - {node.statusLabel} - - )} - - {/* Iteration badge */} - {node.iterations !== undefined && node.iterations > 0 && ( - - - - {node.iterations}{node.maxIterations ? `/${node.maxIterations}` : "\u00d7"} - - - )} - - ); - }; - - return ( -
- {/* Compact sub-label */} -
-
-

Pipeline

- {version && ( - - {version} - - )} -
- {})} btnRef={runBtnRef} /> -
- - {/* Graph */} - - {forwardEdges.map((e, i) => renderForwardEdge(e, i))} - {backEdges.map((e, i) => renderBackEdge(e, i))} - {nodes.map((n, i) => renderNode(n, i))} - - {building && ( -
-
- -

Rebuilding agent...

-
-
- )} -
- ); -} diff --git a/core/frontend/src/components/NodeDetailPanel.tsx b/core/frontend/src/components/NodeDetailPanel.tsx index a487e37c..addee038 100644 --- a/core/frontend/src/components/NodeDetailPanel.tsx +++ b/core/frontend/src/components/NodeDetailPanel.tsx @@ -1,6 +1,6 @@ import { useState, useEffect, useRef } from "react"; import { X, Cpu, Zap, Clock, RotateCcw, CheckCircle2, AlertCircle, Loader2, ChevronDown, ChevronRight, Copy, Check, Terminal, Wrench, BookOpen, GitBranch, Bot } from "lucide-react"; -import type { GraphNode, NodeStatus } from "./AgentGraph"; +import type { GraphNode, NodeStatus } from "./graph-types"; import type { NodeSpec, ToolInfo, NodeCriteria } from "../api/types"; import { graphsApi } from "../api/graphs"; import { logsApi } from "../api/logs"; diff --git a/core/frontend/src/components/RunButton.tsx b/core/frontend/src/components/RunButton.tsx new file mode 100644 index 00000000..3a6a24b3 --- /dev/null +++ b/core/frontend/src/components/RunButton.tsx @@ -0,0 +1,40 @@ +import { memo, useState } from "react"; +import { Play, Pause, Loader2, CheckCircle2 } from "lucide-react"; +import type { RunButtonProps } from "./graph-types"; + +export const RunButton = memo(function RunButton({ runState, disabled, onRun, onPause, btnRef }: RunButtonProps) { + const [hovered, setHovered] = useState(false); + const showPause = runState === "running" && hovered; + + return ( + + ); +}); diff --git a/core/frontend/src/components/graph-types.ts b/core/frontend/src/components/graph-types.ts new file mode 100644 index 00000000..c5e95b13 --- /dev/null +++ b/core/frontend/src/components/graph-types.ts @@ -0,0 +1,28 @@ +export type NodeStatus = "running" | "complete" | "pending" | "error" | "looping"; + +export type NodeType = "execution" | "trigger"; + +export interface GraphNode { + id: string; + label: string; + status: NodeStatus; + nodeType?: NodeType; + triggerType?: string; + triggerConfig?: Record; + next?: string[]; + backEdges?: string[]; + iterations?: number; + maxIterations?: number; + statusLabel?: string; + edgeLabels?: Record; +} + +export type RunState = "idle" | "deploying" | "running"; + +export interface RunButtonProps { + runState: RunState; + disabled: boolean; + onRun: () => void; + onPause: () => void; + btnRef: React.Ref; +} diff --git a/core/frontend/src/lib/graph-converter.ts b/core/frontend/src/lib/graph-converter.ts index 1e6cc4fc..78ba24b9 100644 --- a/core/frontend/src/lib/graph-converter.ts +++ b/core/frontend/src/lib/graph-converter.ts @@ -1,5 +1,5 @@ import type { GraphTopology, NodeSpec } from "@/api/types"; -import type { GraphNode, NodeStatus } from "@/components/AgentGraph"; +import type { GraphNode, NodeStatus } from "@/components/graph-types"; /** * Convert a backend GraphTopology (nodes + edges + entry_node) into diff --git a/core/frontend/src/lib/tab-persistence.ts b/core/frontend/src/lib/tab-persistence.ts index 6dd9c6d8..fc2f5eb7 100644 --- a/core/frontend/src/lib/tab-persistence.ts +++ b/core/frontend/src/lib/tab-persistence.ts @@ -4,7 +4,7 @@ */ import type { ChatMessage } from "@/components/ChatPanel"; -import type { GraphNode } from "@/components/AgentGraph"; +import type { GraphNode } from "@/components/graph-types"; export const TAB_STORAGE_KEY = "hive:workspace-tabs"; diff --git a/core/frontend/src/pages/workspace.tsx b/core/frontend/src/pages/workspace.tsx index 635e4bcc..26bd08b7 100644 --- a/core/frontend/src/pages/workspace.tsx +++ b/core/frontend/src/pages/workspace.tsx @@ -2,7 +2,7 @@ import { useState, useCallback, useRef, useEffect, useMemo } from "react"; import ReactDOM from "react-dom"; import { useSearchParams, useNavigate } from "react-router-dom"; import { Plus, KeyRound, Sparkles, Layers, ChevronLeft, Bot, Loader2, WifiOff, X } from "lucide-react"; -import AgentGraph, { type GraphNode, type NodeStatus } from "@/components/AgentGraph"; +import type { GraphNode, NodeStatus } from "@/components/graph-types"; import DraftGraph from "@/components/DraftGraph"; import ChatPanel, { type ChatMessage } from "@/components/ChatPanel"; import TopBar from "@/components/TopBar"; @@ -557,6 +557,27 @@ export default function Workspace() { const [triggerTaskSaving, setTriggerTaskSaving] = useState(false); const [newTabOpen, setNewTabOpen] = useState(false); const newTabBtnRef = useRef(null); + const [graphPanelPct, setGraphPanelPct] = useState(30); + const resizing = useRef(false); + + // Drag-to-resize the graph panel + useEffect(() => { + const onMouseMove = (e: MouseEvent) => { + if (!resizing.current) return; + const pct = (e.clientX / window.innerWidth) * 100; + setGraphPanelPct(Math.max(15, Math.min(50, pct))); + }; + const onMouseUp = () => { + resizing.current = false; + document.body.style.cursor = ""; + }; + window.addEventListener("mousemove", onMouseMove); + window.addEventListener("mouseup", onMouseUp); + return () => { + window.removeEventListener("mousemove", onMouseMove); + window.removeEventListener("mouseup", onMouseUp); + }; + }, []); // Ref mirror of sessionsByAgent so SSE callback can read current graph // state without adding sessionsByAgent to its dependency array. @@ -2827,38 +2848,32 @@ export default function Workspace() { {/* Main content area */}
- {/* ── Pipeline graph + chat ──────────────────────────────────── */} -
+ {/* ── Draft flowchart + chat ─────────────────────────────────── */} +
- {activeAgentState?.queenPhase === "planning" || activeAgentState?.queenPhase === "building" ? ( - - ) : activeAgentState?.originalDraft ? ( - { - const node = currentGraph.nodes.find(n => n.id === runtimeNodeId); - if (node) setSelectedNode(prev => prev?.id === node.id ? null : node); - }} - /> - ) : ( - setSelectedNode(prev => prev?.id === node.id ? null : node)} - onRun={handleRun} - onPause={handlePause} - runState={activeAgentState?.workerRunState ?? "idle"} - building={activeAgentState?.queenBuilding ?? false} - queenPhase={activeAgentState?.queenPhase ?? "building"} - /> - )} + { + const node = currentGraph.nodes.find(n => n.id === runtimeNodeId); + if (node) setSelectedNode(prev => prev?.id === node.id ? null : node); + }} + />
+ {/* Resize handle */} +
{ resizing.current = true; document.body.style.cursor = "col-resize"; }} + />
From 9fad385ddf1c5cfac0c27cae01ab86ed25784326 Mon Sep 17 00:00:00 2001 From: bryan Date: Mon, 16 Mar 2026 12:14:20 -0700 Subject: [PATCH 17/45] fix: return staging phase for disk-loaded agents to prevent false planning loader --- core/framework/server/routes_sessions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/framework/server/routes_sessions.py b/core/framework/server/routes_sessions.py index 957106f0..2a872664 100644 --- a/core/framework/server/routes_sessions.py +++ b/core/framework/server/routes_sessions.py @@ -64,7 +64,7 @@ def _session_to_live_dict(session) -> dict: "loaded_at": session.loaded_at, "uptime_seconds": round(time.time() - session.loaded_at, 1), "intro_message": getattr(session.runner, "intro_message", "") or "", - "queen_phase": phase_state.phase if phase_state else "planning", + "queen_phase": phase_state.phase if phase_state else ("staging" if session.worker_runtime else "planning"), } From 5477408d402430010006874fe29cab1dad2a5caf Mon Sep 17 00:00:00 2001 From: bryan Date: Mon, 16 Mar 2026 12:18:46 -0700 Subject: [PATCH 18/45] chore: code quality updates --- core/framework/tools/flowchart_utils.py | 15 +++++++-------- core/framework/tools/queen_lifecycle_tools.py | 12 ++++++------ core/frontend/src/lib/graph-converter.ts | 2 +- core/tests/test_flowchart_utils.py | 4 ++-- 4 files changed, 16 insertions(+), 17 deletions(-) diff --git a/core/framework/tools/flowchart_utils.py b/core/framework/tools/flowchart_utils.py index 3ff52bd7..bb797562 100644 --- a/core/framework/tools/flowchart_utils.py +++ b/core/framework/tools/flowchart_utils.py @@ -17,7 +17,7 @@ logger = logging.getLogger(__name__) FLOWCHART_FILENAME = "flowchart.json" # ── Flowchart type catalogue (9 types) ─────────────────────────────────────── -_FLOWCHART_TYPES = { +FLOWCHART_TYPES = { "start": {"shape": "stadium", "color": "#3fa66a"}, # sage green "terminal": {"shape": "stadium", "color": "#a04444"}, # dusty red "process": {"shape": "rectangle", "color": "#616d83"}, # blue-gray @@ -30,7 +30,7 @@ _FLOWCHART_TYPES = { } # Backward-compat remap: old type names → canonical type -_FLOWCHART_REMAP: dict[str, str] = { +FLOWCHART_REMAP: dict[str, str] = { "delay": "process", "manual_operation": "process", "preparation": "process", @@ -113,16 +113,15 @@ def classify_flowchart_node( """ # Explicit override from the queen explicit = node.get("flowchart_type", "").strip() - if explicit and explicit in _FLOWCHART_TYPES: + if explicit and explicit in FLOWCHART_TYPES: return explicit - if explicit and explicit in _FLOWCHART_REMAP: - return _FLOWCHART_REMAP[explicit] + if explicit and explicit in FLOWCHART_REMAP: + return FLOWCHART_REMAP[explicit] node_id = node["id"] node_type = node.get("node_type", "event_loop") node_tools = set(node.get("tools") or []) desc = (node.get("description") or "").lower() - name = (node.get("name") or "").lower() # GCU / browser automation nodes → hexagon if node_type == "gcu": @@ -252,7 +251,7 @@ def synthesize_draft_from_runtime( "sub_agents": list(rn.sub_agents) if getattr(rn, "sub_agents", None) else [], } fc_type = classify_flowchart_node(node, i, total, edges, terminal_ids) - fc_meta = _FLOWCHART_TYPES[fc_type] + fc_meta = FLOWCHART_TYPES[fc_type] node["flowchart_type"] = fc_type node["flowchart_shape"] = fc_meta["shape"] node["flowchart_color"] = fc_meta["color"] @@ -320,7 +319,7 @@ def synthesize_draft_from_runtime( "terminal_nodes": sorted(terminal_ids), "flowchart_legend": { fc_type: {"shape": meta["shape"], "color": meta["color"]} - for fc_type, meta in _FLOWCHART_TYPES.items() + for fc_type, meta in FLOWCHART_TYPES.items() }, } diff --git a/core/framework/tools/queen_lifecycle_tools.py b/core/framework/tools/queen_lifecycle_tools.py index 6a596c7f..6bace20b 100644 --- a/core/framework/tools/queen_lifecycle_tools.py +++ b/core/framework/tools/queen_lifecycle_tools.py @@ -47,7 +47,7 @@ from framework.runtime.event_bus import AgentEvent, EventType from framework.server.app import validate_agent_path from framework.tools.flowchart_utils import ( FLOWCHART_FILENAME, - _FLOWCHART_TYPES, + FLOWCHART_TYPES, classify_flowchart_node, load_flowchart_file, save_flowchart_file, @@ -294,7 +294,7 @@ def build_worker_profile(runtime: AgentRuntime, agent_path: Path | str | None = -# _FLOWCHART_TYPES is imported from framework.tools.flowchart_utils +# FLOWCHART_TYPES is imported from framework.tools.flowchart_utils def _read_agent_triggers_json(agent_path: Path) -> list[dict]: @@ -585,7 +585,7 @@ def _dissolve_planning_nodes( if not predecessors: # Decision at start: convert to regular process node d_node["flowchart_type"] = "process" - fc_meta = _FLOWCHART_TYPES["process"] + fc_meta = FLOWCHART_TYPES["process"] d_node["flowchart_shape"] = fc_meta["shape"] d_node["flowchart_color"] = fc_meta["color"] if not d_node.get("success_criteria"): @@ -1196,7 +1196,7 @@ def register_queen_lifecycle_tools( if not predecessors: # Decision at start: convert to regular process node d_node["flowchart_type"] = "process" - fc_meta = _FLOWCHART_TYPES["process"] + fc_meta = FLOWCHART_TYPES["process"] d_node["flowchart_shape"] = fc_meta["shape"] d_node["flowchart_color"] = fc_meta["color"] if not d_node.get("success_criteria"): @@ -1748,7 +1748,7 @@ def register_queen_lifecycle_tools( validated_edges, terminal_ids, ) - fc_meta = _FLOWCHART_TYPES[fc_type] + fc_meta = FLOWCHART_TYPES[fc_type] node["flowchart_type"] = fc_type node["flowchart_shape"] = fc_meta["shape"] node["flowchart_color"] = fc_meta["color"] @@ -1766,7 +1766,7 @@ def register_queen_lifecycle_tools( # Color legend for the frontend "flowchart_legend": { fc_type: {"shape": meta["shape"], "color": meta["color"]} - for fc_type, meta in _FLOWCHART_TYPES.items() + for fc_type, meta in FLOWCHART_TYPES.items() }, } diff --git a/core/frontend/src/lib/graph-converter.ts b/core/frontend/src/lib/graph-converter.ts index 78ba24b9..ab1fc828 100644 --- a/core/frontend/src/lib/graph-converter.ts +++ b/core/frontend/src/lib/graph-converter.ts @@ -3,7 +3,7 @@ import type { GraphNode, NodeStatus } from "@/components/graph-types"; /** * Convert a backend GraphTopology (nodes + edges + entry_node) into - * the GraphNode[] shape that AgentGraph renders. + * the GraphNode[] shape that DraftGraph renders. * * Four jobs: * 1. Synthesize trigger nodes from non-manual entry_points diff --git a/core/tests/test_flowchart_utils.py b/core/tests/test_flowchart_utils.py index 985cc7a9..0344d5a8 100644 --- a/core/tests/test_flowchart_utils.py +++ b/core/tests/test_flowchart_utils.py @@ -6,7 +6,7 @@ from types import SimpleNamespace from framework.tools.flowchart_utils import ( FLOWCHART_FILENAME, - _FLOWCHART_TYPES, + FLOWCHART_TYPES, classify_flowchart_node, generate_fallback_flowchart, load_flowchart_file, @@ -149,7 +149,7 @@ class TestSynthesizeDraftFromRuntime: # Legend should contain all types assert draft["flowchart_legend"] == { k: {"shape": v["shape"], "color": v["color"]} - for k, v in _FLOWCHART_TYPES.items() + for k, v in FLOWCHART_TYPES.items() } def test_graph_with_sub_agents(self): From c3f13c50ebe0b50bc20c41beb3b55ae6da81a000 Mon Sep 17 00:00:00 2001 From: bryan Date: Mon, 16 Mar 2026 12:22:01 -0700 Subject: [PATCH 19/45] docs: remove stale iso 5807 references --- docs/draft-flowchart-schema.md | 41 +++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/docs/draft-flowchart-schema.md b/docs/draft-flowchart-schema.md index 5ce81579..8fd8c982 100644 --- a/docs/draft-flowchart-schema.md +++ b/docs/draft-flowchart-schema.md @@ -1,6 +1,6 @@ # Draft Flowchart System — Complete Reference -The draft flowchart system bridges user-facing workflow design (planning phase) and the runtime agent graph (execution phase). During planning, the queen agent creates an ISO 5807 flowchart that the user reviews. On approval, decision nodes are dissolved into runtime-compatible structures, and the original flowchart is preserved for live status overlay during execution. +The draft flowchart system bridges user-facing workflow design (planning phase) and the runtime agent graph (execution phase). During planning, the queen agent creates a flowchart that the user reviews. On approval, decision nodes are dissolved into runtime-compatible structures, and the original flowchart is preserved for live status overlay during execution. --- @@ -20,7 +20,7 @@ DraftGraph (SSE) ────► │ Decision diamonds │ │ │ merged into │ Flowchart Map ▼ │ predecessor criteria │ inverts to Frontend renders │ │ overlay status -ISO 5807 flowchart │ Original draft │ on original +Flowchart with │ Original draft │ on original with diamond │ preserved │ flowchart decisions │ │ └──────────────────────┘ @@ -206,7 +206,7 @@ After `save_agent_draft` processes the input, it stores and emits an enriched dr "sub_agents": [], "flowchart_type": "start", "flowchart_shape": "stadium", - "flowchart_color": "#4CAF50" + "flowchart_color": "#3fa66a" }, { "id": "check-tier", @@ -216,7 +216,7 @@ After `save_agent_draft` processes the input, it stores and emits an enriched dr "decision_clause": "Is lead score > 80?", "flowchart_type": "decision", "flowchart_shape": "diamond", - "flowchart_color": "#FF9800" + "flowchart_color": "#d89d26" } ], "edges": [ @@ -246,10 +246,10 @@ After `save_agent_draft` processes the input, it stores and emits an enriched dr } ], "flowchart_legend": { - "start": { "shape": "stadium", "color": "#4CAF50" }, - "terminal": { "shape": "stadium", "color": "#F44336" }, - "process": { "shape": "rectangle", "color": "#2196F3" }, - "decision": { "shape": "diamond", "color": "#FF9800" } + "start": { "shape": "stadium", "color": "#3fa66a" }, + "terminal": { "shape": "stadium", "color": "#a04444" }, + "process": { "shape": "rectangle", "color": "#616d83" }, + "decision": { "shape": "diamond", "color": "#d89d26" } } } ``` @@ -258,7 +258,7 @@ After `save_agent_draft` processes the input, it stores and emits an enriched dr | Field | Type | Description | |---|---|---| -| `flowchart_type` | `string` | The resolved ISO 5807 symbol type | +| `flowchart_type` | `string` | The resolved flowchart symbol type | | `flowchart_shape` | `string` | SVG shape identifier for the frontend renderer | | `flowchart_color` | `string` | Hex color code for the symbol | @@ -388,7 +388,7 @@ The runtime Level 2 judge evaluates the decision clause against the node's conve An SVG-based flowchart renderer that operates in two modes: -1. **Planning mode** — renders the draft graph with ISO 5807 shapes during the planning phase +1. **Planning mode** — renders the draft graph with flowchart shapes during the planning phase 2. **Runtime overlay mode** — renders the original (pre-dissolution) draft with live execution status when `flowchartMap` and `runtimeNodes` props are provided #### Props @@ -422,7 +422,7 @@ Constants: #### Shape Rendering -The `FlowchartShape` component renders each ISO 5807 shape as SVG primitives. Each shape receives: +The `FlowchartShape` component renders each flowchart shape as SVG primitives. Each shape receives: - `x, y, w, h` — bounding box in SVG units - `color` — the hex color from the flowchart type - `selected` — hover state (increases fill opacity from 18% to 28%, brightens stroke) @@ -482,17 +482,22 @@ const STATUS_COLORS = { ### Workspace Integration (`workspace.tsx`) -The workspace conditionally renders `DraftGraph` in three scenarios: +The workspace always renders a single `` component, selecting the best available draft: -| Condition | Renders | Panel Width | -|---|---|---| -| `queenPhase === "planning"` and `draftGraph` exists | `` | 500px | -| `originalDraft` exists (post-planning) | `` | 500px | -| Neither | `` (runtime pipeline view) | 300px | +```tsx + +``` + +The graph panel is user-resizable (drag handle on the right edge, 15%–50% of viewport width, default 30%). **State management:** - `draftGraph`: Set by `draft_graph_updated` SSE event during planning; cleared on phase change -- `originalDraft` + `flowchartMap`: Fetched from `GET /api/sessions/{id}/flowchart-map` when phase transitions away from planning +- `originalDraft` + `flowchartMap`: Fetched from `GET /api/sessions/{id}/flowchart-map` when phase transitions away from planning. For template/legacy agents, `originalDraft` is generated at load time via `generate_fallback_flowchart()`. --- From 69f0ff7ac952a5b3a04e58724d751455125a4bc6 Mon Sep 17 00:00:00 2001 From: bryan Date: Mon, 16 Mar 2026 12:22:29 -0700 Subject: [PATCH 20/45] chore: linter update --- core/framework/server/routes_sessions.py | 4 +- core/framework/tools/flowchart_utils.py | 8 +-- core/framework/tools/queen_lifecycle_tools.py | 2 - core/tests/test_flowchart_utils.py | 52 ++++++++++++------- 4 files changed, 37 insertions(+), 29 deletions(-) diff --git a/core/framework/server/routes_sessions.py b/core/framework/server/routes_sessions.py index 2a872664..ce1af646 100644 --- a/core/framework/server/routes_sessions.py +++ b/core/framework/server/routes_sessions.py @@ -64,7 +64,9 @@ def _session_to_live_dict(session) -> dict: "loaded_at": session.loaded_at, "uptime_seconds": round(time.time() - session.loaded_at, 1), "intro_message": getattr(session.runner, "intro_message", "") or "", - "queen_phase": phase_state.phase if phase_state else ("staging" if session.worker_runtime else "planning"), + "queen_phase": phase_state.phase + if phase_state + else ("staging" if session.worker_runtime else "planning"), } diff --git a/core/framework/tools/flowchart_utils.py b/core/framework/tools/flowchart_utils.py index bb797562..2f5b09a1 100644 --- a/core/framework/tools/flowchart_utils.py +++ b/core/framework/tools/flowchart_utils.py @@ -355,12 +355,8 @@ def generate_fallback_flowchart( # Enrich with Goal metadata if goal: draft["goal"] = goal.description or goal.name or "" - draft["success_criteria"] = [ - sc.description for sc in (goal.success_criteria or []) - ] - draft["constraints"] = [ - c.description for c in (goal.constraints or []) - ] + draft["success_criteria"] = [sc.description for sc in (goal.success_criteria or [])] + draft["constraints"] = [c.description for c in (goal.constraints or [])] # Use entry_node/terminal_nodes from GraphSpec if available if graph.entry_node: diff --git a/core/framework/tools/queen_lifecycle_tools.py b/core/framework/tools/queen_lifecycle_tools.py index 6bace20b..a9c88362 100644 --- a/core/framework/tools/queen_lifecycle_tools.py +++ b/core/framework/tools/queen_lifecycle_tools.py @@ -46,7 +46,6 @@ from framework.runner.preload_validation import credential_errors_to_json, valid from framework.runtime.event_bus import AgentEvent, EventType from framework.server.app import validate_agent_path from framework.tools.flowchart_utils import ( - FLOWCHART_FILENAME, FLOWCHART_TYPES, classify_flowchart_node, load_flowchart_file, @@ -293,7 +292,6 @@ def build_worker_profile(runtime: AgentRuntime, agent_path: Path | str | None = return "\n".join(lines) - # FLOWCHART_TYPES is imported from framework.tools.flowchart_utils diff --git a/core/tests/test_flowchart_utils.py b/core/tests/test_flowchart_utils.py index 0344d5a8..f1a43738 100644 --- a/core/tests/test_flowchart_utils.py +++ b/core/tests/test_flowchart_utils.py @@ -1,7 +1,6 @@ """Tests for framework/tools/flowchart_utils.py.""" import json -from pathlib import Path from types import SimpleNamespace from framework.tools.flowchart_utils import ( @@ -15,14 +14,27 @@ from framework.tools.flowchart_utils import ( ) -def _make_node(id, name="Node", description="", node_type="event_loop", - tools=None, input_keys=None, output_keys=None, - success_criteria="", sub_agents=None): +def _make_node( + id, + name="Node", + description="", + node_type="event_loop", + tools=None, + input_keys=None, + output_keys=None, + success_criteria="", + sub_agents=None, +): """Create a minimal node-like object matching NodeSpec interface.""" return SimpleNamespace( - id=id, name=name, description=description, node_type=node_type, - tools=tools or [], input_keys=input_keys or [], - output_keys=output_keys or [], success_criteria=success_criteria, + id=id, + name=name, + description=description, + node_type=node_type, + tools=tools or [], + input_keys=input_keys or [], + output_keys=output_keys or [], + success_criteria=success_criteria, sub_agents=sub_agents or [], ) @@ -30,17 +42,20 @@ def _make_node(id, name="Node", description="", node_type="event_loop", def _make_edge(source, target, condition="on_success", description=""): """Create a minimal edge-like object matching EdgeSpec interface.""" return SimpleNamespace( - source=source, target=target, + source=source, + target=target, condition=SimpleNamespace(value=condition), description=description, ) -def _make_goal(name="Test Goal", description="A test goal", - success_criteria=None, constraints=None): +def _make_goal( + name="Test Goal", description="A test goal", success_criteria=None, constraints=None +): """Create a minimal goal-like object matching Goal interface.""" return SimpleNamespace( - name=name, description=description, + name=name, + description=description, success_criteria=success_criteria or [], constraints=constraints or [], ) @@ -49,7 +64,8 @@ def _make_goal(name="Test Goal", description="A test goal", def _make_graph(nodes, edges, entry_node=None, terminal_nodes=None): """Create a minimal graph-like object matching GraphSpec interface.""" return SimpleNamespace( - nodes=nodes, edges=edges, + nodes=nodes, + edges=edges, entry_node=entry_node or (nodes[0].id if nodes else ""), terminal_nodes=terminal_nodes or [], ) @@ -76,22 +92,19 @@ class TestClassifyFlowchartNode: assert result == "browser" def test_subprocess_node(self): - node = {"id": "n2", "node_type": "event_loop", "tools": [], - "sub_agents": ["sub1"]} + node = {"id": "n2", "node_type": "event_loop", "tools": [], "sub_agents": ["sub1"]} edges = [{"source": "n1", "target": "n2"}, {"source": "n2", "target": "n3"}] result = classify_flowchart_node(node, 1, 3, edges, set()) assert result == "subprocess" def test_default_is_process(self): - node = {"id": "n2", "node_type": "event_loop", "tools": [], - "description": "do stuff"} + node = {"id": "n2", "node_type": "event_loop", "tools": [], "description": "do stuff"} edges = [{"source": "n1", "target": "n2"}, {"source": "n2", "target": "n3"}] result = classify_flowchart_node(node, 1, 3, edges, set()) assert result == "process" def test_explicit_override(self): - node = {"id": "n2", "node_type": "event_loop", "tools": [], - "flowchart_type": "database"} + node = {"id": "n2", "node_type": "event_loop", "tools": [], "flowchart_type": "database"} edges = [{"source": "n1", "target": "n2"}] result = classify_flowchart_node(node, 1, 3, edges, set()) assert result == "database" @@ -148,8 +161,7 @@ class TestSynthesizeDraftFromRuntime: # Legend should contain all types assert draft["flowchart_legend"] == { - k: {"shape": v["shape"], "color": v["color"]} - for k, v in FLOWCHART_TYPES.items() + k: {"shape": v["shape"], "color": v["color"]} for k, v in FLOWCHART_TYPES.items() } def test_graph_with_sub_agents(self): From 6f23a30eedc065f643c4ade81b49f94e6cc1293b Mon Sep 17 00:00:00 2001 From: Timothy Date: Mon, 16 Mar 2026 13:46:49 -0700 Subject: [PATCH 21/45] fix: skill lifecycle to runtime --- core/framework/graph/event_loop_node.py | 71 ++++++++-- core/framework/runner/runner.py | 63 ++------- core/framework/runtime/agent_runtime.py | 57 +++++++- core/framework/skills/__init__.py | 3 + core/framework/skills/manager.py | 172 ++++++++++++++++++++++++ core/tests/test_subagent.py | 60 +++++++++ 6 files changed, 360 insertions(+), 66 deletions(-) create mode 100644 core/framework/skills/manager.py diff --git a/core/framework/graph/event_loop_node.py b/core/framework/graph/event_loop_node.py index f6b752c6..dd66e3e8 100644 --- a/core/framework/graph/event_loop_node.py +++ b/core/framework/graph/event_loop_node.py @@ -225,6 +225,12 @@ class LoopConfig: cf_grace_turns: int = 1 tool_doom_loop_enabled: bool = True + # --- Per-tool-call timeout --- + # Maximum seconds a single tool call may take before being killed. + # Prevents hung MCP servers (especially browser/GCU tools) from + # blocking the entire event loop indefinitely. 0 = no timeout. + tool_call_timeout_seconds: float = 120.0 + # --- Lifecycle hooks --- # Hooks are async callables keyed by event name. Supported events: # "session_start" — fires once after the first user message is added, @@ -3356,7 +3362,14 @@ class EventLoopNode(NodeProtocol): return False, "" async def _execute_tool(self, tc: ToolCallEvent) -> ToolResult: - """Execute a tool call, handling both sync and async executors.""" + """Execute a tool call, handling both sync and async executors. + + Applies ``tool_call_timeout_seconds`` from LoopConfig to prevent + hung MCP servers from blocking the event loop indefinitely. + The initial executor call is offloaded to a thread pool so that + sync executors (MCP STDIO tools that block on ``future.result()``) + don't freeze the event loop. + """ if self._tool_executor is None: return ToolResult( tool_use_id=tc.tool_use_id, @@ -3364,9 +3377,39 @@ class EventLoopNode(NodeProtocol): is_error=True, ) tool_use = ToolUse(id=tc.tool_use_id, name=tc.tool_name, input=tc.tool_input) - result = self._tool_executor(tool_use) - if asyncio.iscoroutine(result) or asyncio.isfuture(result): - result = await result + timeout = self._config.tool_call_timeout_seconds + + async def _run() -> ToolResult: + # Offload the executor call to a thread. Sync MCP executors + # block on future.result() — running in a thread keeps the + # event loop free so asyncio.wait_for can fire the timeout. + loop = asyncio.get_running_loop() + result = await loop.run_in_executor( + None, self._tool_executor, tool_use + ) + # Async executors return a coroutine — await it on the loop + if asyncio.iscoroutine(result) or asyncio.isfuture(result): + result = await result + return result + + try: + if timeout > 0: + result = await asyncio.wait_for(_run(), timeout=timeout) + else: + result = await _run() + except TimeoutError: + logger.warning( + "Tool '%s' timed out after %.0fs", tc.tool_name, timeout + ) + return ToolResult( + tool_use_id=tc.tool_use_id, + content=( + f"Tool '{tc.tool_name}' timed out after {timeout:.0f}s. " + "The operation took too long and was cancelled. " + "Try a simpler request or a different approach." + ), + is_error=True, + ) return result def _record_learning(self, key: str, value: Any) -> None: @@ -4619,11 +4662,21 @@ class EventLoopNode(NodeProtocol): subagent_tool_names = set(subagent_spec.tools or []) tool_source = ctx.all_tools if ctx.all_tools else ctx.available_tools - subagent_tools = [ - t - for t in tool_source - if t.name in subagent_tool_names and t.name != "delegate_to_sub_agent" - ] + # GCU auto-population: GCU nodes declare tools=[] because the runner + # auto-populates them at setup time. But that expansion doesn't reach + # subagents invoked via delegate_to_sub_agent — the subagent spec still + # has the original empty list. When a GCU subagent has no declared + # tools, include all catalog tools so browser tools are available. + if subagent_spec.node_type == "gcu" and not subagent_tool_names: + subagent_tools = [ + t for t in tool_source if t.name != "delegate_to_sub_agent" + ] + else: + subagent_tools = [ + t + for t in tool_source + if t.name in subagent_tool_names and t.name != "delegate_to_sub_agent" + ] missing = subagent_tool_names - {t.name for t in subagent_tools} if missing: diff --git a/core/framework/runner/runner.py b/core/framework/runner/runner.py index aeca0625..ea27b0b3 100644 --- a/core/framework/runner/runner.py +++ b/core/framework/runner/runner.py @@ -1334,54 +1334,18 @@ class AgentRunner: except Exception: pass # Best-effort — agent works without account info - # Skill discovery and default skill loading - skills_catalog_prompt = "" - protocols_prompt = "" - try: - from framework.skills.config import SkillsConfig - from framework.skills.catalog import SkillCatalog - from framework.skills.defaults import DefaultSkillManager - from framework.skills.discovery import DiscoveryConfig, SkillDiscovery + # Skill configuration — the runtime handles discovery, loading, and + # prompt rasterization. The runner just builds the config. + from framework.skills.config import SkillsConfig + from framework.skills.manager import SkillsManagerConfig - # Build skills config from agent module vars - skills_config = SkillsConfig.from_agent_vars( + skills_manager_config = SkillsManagerConfig( + skills_config=SkillsConfig.from_agent_vars( default_skills=getattr(self, "_agent_default_skills", None), skills=getattr(self, "_agent_skills", None), - ) - - # Discover community skills - discovery = SkillDiscovery(DiscoveryConfig(project_root=self.agent_path)) - discovered = discovery.discover() - - # Build catalog (community skills only — defaults handled separately) - catalog = SkillCatalog(discovered) - skills_catalog_prompt = catalog.to_prompt() - - # Handle pre-activated skills - if skills_config.skills: - pre_activated = catalog.build_pre_activated_prompt(skills_config.skills) - if pre_activated: - if skills_catalog_prompt: - skills_catalog_prompt = f"{skills_catalog_prompt}\n\n{pre_activated}" - else: - skills_catalog_prompt = pre_activated - - # Load and configure default skills - default_mgr = DefaultSkillManager(config=skills_config) - default_mgr.load() - default_mgr.log_active_skills() - protocols_prompt = default_mgr.build_protocols_prompt() - except Exception: - logger.warning("Skill system init failed (non-fatal)", exc_info=True) - - if protocols_prompt: - logger.info( - "Skill system ready: protocols=%d chars, catalog=%d chars", - len(protocols_prompt), - len(skills_catalog_prompt), - ) - else: - logger.warning("Skill system produced empty protocols_prompt") + ), + project_root=self.agent_path, + ) self._setup_agent_runtime( tools, @@ -1390,8 +1354,7 @@ class AgentRunner: accounts_data=accounts_data, tool_provider_map=tool_provider_map, event_bus=event_bus, - skills_catalog_prompt=skills_catalog_prompt, - protocols_prompt=protocols_prompt, + skills_manager_config=skills_manager_config, ) def _get_api_key_env_var(self, model: str) -> str | None: @@ -1487,8 +1450,7 @@ class AgentRunner: accounts_data: list[dict] | None = None, tool_provider_map: dict[str, str] | None = None, event_bus=None, - skills_catalog_prompt: str = "", - protocols_prompt: str = "", + skills_manager_config=None, ) -> None: """Set up multi-entry-point execution using AgentRuntime.""" entry_points = [] @@ -1548,8 +1510,7 @@ class AgentRunner: accounts_data=accounts_data, tool_provider_map=tool_provider_map, event_bus=event_bus, - skills_catalog_prompt=skills_catalog_prompt, - protocols_prompt=protocols_prompt, + skills_manager_config=skills_manager_config, ) # Pass intro_message through for TUI display diff --git a/core/framework/runtime/agent_runtime.py b/core/framework/runtime/agent_runtime.py index bc2c4290..d7e2ed16 100644 --- a/core/framework/runtime/agent_runtime.py +++ b/core/framework/runtime/agent_runtime.py @@ -29,6 +29,7 @@ if TYPE_CHECKING: from framework.graph.edge import GraphSpec from framework.graph.goal import Goal from framework.llm.provider import LLMProvider, Tool + from framework.skills.manager import SkillsManagerConfig logger = logging.getLogger(__name__) @@ -132,6 +133,8 @@ class AgentRuntime: accounts_data: list[dict] | None = None, tool_provider_map: dict[str, str] | None = None, event_bus: "EventBus | None" = None, + skills_manager_config: "SkillsManagerConfig | None" = None, + # Deprecated — pass skills_manager_config instead. skills_catalog_prompt: str = "", protocols_prompt: str = "", ): @@ -155,17 +158,42 @@ class AgentRuntime: event_bus: Optional external EventBus. If provided, the runtime shares this bus instead of creating its own. Used by SessionManager to share a single bus between queen, worker, and judge. - skills_catalog_prompt: Available skills catalog for system prompt - protocols_prompt: Default skill operational protocols for system prompt + skills_manager_config: Skill configuration — the runtime owns + discovery, loading, and prompt renderation internally. + skills_catalog_prompt: Deprecated. Pre-rendered skills catalog. + protocols_prompt: Deprecated. Pre-rendered operational protocols. """ + from framework.skills.manager import SkillsManager + self.graph = graph self.goal = goal self._config = config or AgentRuntimeConfig() self._runtime_log_store = runtime_log_store self._checkpoint_config = checkpoint_config self.accounts_prompt = accounts_prompt - self.skills_catalog_prompt = skills_catalog_prompt - self.protocols_prompt = protocols_prompt + + # --- Skill lifecycle: runtime owns the SkillsManager --- + if skills_manager_config is not None: + # New path: config-driven, runtime handles loading + self._skills_manager = SkillsManager(skills_manager_config) + self._skills_manager.load() + elif skills_catalog_prompt or protocols_prompt: + # Legacy path: caller passed pre-rendered strings + import warnings + + warnings.warn( + "Passing pre-rendered skills_catalog_prompt/protocols_prompt " + "is deprecated. Pass skills_manager_config instead.", + DeprecationWarning, + stacklevel=2, + ) + self._skills_manager = SkillsManager.from_precomputed( + skills_catalog_prompt, protocols_prompt + ) + else: + # Bare constructor: auto-load defaults + self._skills_manager = SkillsManager() + self._skills_manager.load() # Primary graph identity self._graph_id: str = graph_id or "primary" @@ -222,6 +250,18 @@ class AgentRuntime: # Optional greeting shown to user on TUI load (set by AgentRunner) self.intro_message: str = "" + # ------------------------------------------------------------------ + # Skill prompt accessors (read by ExecutionStream constructors) + # ------------------------------------------------------------------ + + @property + def skills_catalog_prompt(self) -> str: + return self._skills_manager.skills_catalog_prompt + + @property + def protocols_prompt(self) -> str: + return self._skills_manager.protocols_prompt + def register_entry_point(self, spec: EntryPointSpec) -> None: """ Register a named entry point for the agent. @@ -1716,6 +1756,8 @@ def create_agent_runtime( accounts_data: list[dict] | None = None, tool_provider_map: dict[str, str] | None = None, event_bus: "EventBus | None" = None, + skills_manager_config: "SkillsManagerConfig | None" = None, + # Deprecated — pass skills_manager_config instead. skills_catalog_prompt: str = "", protocols_prompt: str = "", ) -> AgentRuntime: @@ -1744,8 +1786,10 @@ def create_agent_runtime( accounts_data: Raw account data for per-node prompt generation. tool_provider_map: Tool name to provider name mapping for account routing. event_bus: Optional external EventBus to share with other components. - skills_catalog_prompt: Available skills catalog for system prompt. - protocols_prompt: Default skill operational protocols for system prompt. + skills_manager_config: Skill configuration — the runtime owns + discovery, loading, and prompt renderation internally. + skills_catalog_prompt: Deprecated. Pre-rendered skills catalog. + protocols_prompt: Deprecated. Pre-rendered operational protocols. Returns: Configured AgentRuntime (not yet started) @@ -1772,6 +1816,7 @@ def create_agent_runtime( accounts_data=accounts_data, tool_provider_map=tool_provider_map, event_bus=event_bus, + skills_manager_config=skills_manager_config, skills_catalog_prompt=skills_catalog_prompt, protocols_prompt=protocols_prompt, ) diff --git a/core/framework/skills/__init__.py b/core/framework/skills/__init__.py index 4727085e..d3e39ca9 100644 --- a/core/framework/skills/__init__.py +++ b/core/framework/skills/__init__.py @@ -9,6 +9,7 @@ from framework.skills.catalog import SkillCatalog from framework.skills.config import DefaultSkillConfig, SkillsConfig from framework.skills.defaults import DefaultSkillManager from framework.skills.discovery import DiscoveryConfig, SkillDiscovery +from framework.skills.manager import SkillsManager, SkillsManagerConfig from framework.skills.parser import ParsedSkill, parse_skill_md __all__ = [ @@ -19,5 +20,7 @@ __all__ = [ "SkillCatalog", "SkillDiscovery", "SkillsConfig", + "SkillsManager", + "SkillsManagerConfig", "parse_skill_md", ] diff --git a/core/framework/skills/manager.py b/core/framework/skills/manager.py new file mode 100644 index 00000000..562cd52a --- /dev/null +++ b/core/framework/skills/manager.py @@ -0,0 +1,172 @@ +"""Unified skill lifecycle manager. + +``SkillsManager`` is the single facade that owns skill discovery, loading, +and prompt renderation. The runtime creates one at startup and downstream +layers read the cached prompt strings. + +Typical usage — **config-driven** (runner passes configuration):: + + config = SkillsManagerConfig( + skills_config=SkillsConfig.from_agent_vars(...), + project_root=agent_path, + ) + mgr = SkillsManager(config) + mgr.load() + print(mgr.protocols_prompt) # default skill protocols + print(mgr.skills_catalog_prompt) # community skills XML + +Typical usage — **bare** (exported agents, SDK users):: + + mgr = SkillsManager() # default config + mgr.load() # loads all 6 default skills, no community discovery +""" + +from __future__ import annotations + +import logging +from dataclasses import dataclass, field +from pathlib import Path + +from framework.skills.config import SkillsConfig + +logger = logging.getLogger(__name__) + + +@dataclass +class SkillsManagerConfig: + """Everything the runtime needs to configure skills. + + Attributes: + skills_config: Per-skill enable/disable and overrides. + project_root: Agent directory for community skill discovery. + When ``None``, community discovery is skipped. + skip_community_discovery: Explicitly skip community scanning + even when ``project_root`` is set. + """ + + skills_config: SkillsConfig = field(default_factory=SkillsConfig) + project_root: Path | None = None + skip_community_discovery: bool = False + + +class SkillsManager: + """Unified skill lifecycle: discovery → loading → prompt renderation. + + The runtime creates one instance during init and owns it for the + lifetime of the process. Downstream layers (``ExecutionStream``, + ``GraphExecutor``, ``NodeContext``, ``EventLoopNode``) receive the + cached prompt strings via property accessors. + """ + + def __init__(self, config: SkillsManagerConfig | None = None) -> None: + self._config = config or SkillsManagerConfig() + self._loaded = False + self._catalog_prompt: str = "" + self._protocols_prompt: str = "" + + # ------------------------------------------------------------------ + # Factory for backwards-compat bridge + # ------------------------------------------------------------------ + + @classmethod + def from_precomputed( + cls, + skills_catalog_prompt: str = "", + protocols_prompt: str = "", + ) -> SkillsManager: + """Wrap pre-rendered prompt strings (legacy callers). + + Returns a manager that skips discovery/loading and just returns + the provided strings. Used by the deprecation bridge in + ``AgentRuntime`` when callers pass raw prompt strings. + """ + mgr = cls.__new__(cls) + mgr._config = SkillsManagerConfig() + mgr._loaded = True # skip load() + mgr._catalog_prompt = skills_catalog_prompt + mgr._protocols_prompt = protocols_prompt + return mgr + + # ------------------------------------------------------------------ + # Lifecycle + # ------------------------------------------------------------------ + + def load(self) -> None: + """Discover, load, and cache skill prompts. Idempotent.""" + if self._loaded: + return + self._loaded = True + + try: + self._do_load() + except Exception: + logger.warning("Skill system init failed (non-fatal)", exc_info=True) + + def _do_load(self) -> None: + """Internal load — may raise; caller catches.""" + from framework.skills.catalog import SkillCatalog + from framework.skills.defaults import DefaultSkillManager + from framework.skills.discovery import DiscoveryConfig, SkillDiscovery + + skills_config = self._config.skills_config + + # 1. Community skill discovery (when project_root is available) + catalog_prompt = "" + if ( + self._config.project_root is not None + and not self._config.skip_community_discovery + ): + discovery = SkillDiscovery( + DiscoveryConfig(project_root=self._config.project_root) + ) + discovered = discovery.discover() + catalog = SkillCatalog(discovered) + catalog_prompt = catalog.to_prompt() + + # Pre-activated community skills + if skills_config.skills: + pre_activated = catalog.build_pre_activated_prompt( + skills_config.skills + ) + if pre_activated: + if catalog_prompt: + catalog_prompt = f"{catalog_prompt}\n\n{pre_activated}" + else: + catalog_prompt = pre_activated + + # 2. Default skills (always loaded unless explicitly disabled) + default_mgr = DefaultSkillManager(config=skills_config) + default_mgr.load() + default_mgr.log_active_skills() + protocols_prompt = default_mgr.build_protocols_prompt() + + # 3. Cache + self._catalog_prompt = catalog_prompt + self._protocols_prompt = protocols_prompt + + if protocols_prompt: + logger.info( + "Skill system ready: protocols=%d chars, catalog=%d chars", + len(protocols_prompt), + len(catalog_prompt), + ) + else: + logger.warning("Skill system produced empty protocols_prompt") + + # ------------------------------------------------------------------ + # Prompt accessors (consumed by downstream layers) + # ------------------------------------------------------------------ + + @property + def skills_catalog_prompt(self) -> str: + """Community skills XML catalog for system prompt injection.""" + return self._catalog_prompt + + @property + def protocols_prompt(self) -> str: + """Default skill operational protocols for system prompt injection.""" + return self._protocols_prompt + + @property + def is_loaded(self) -> bool: + return self._loaded diff --git a/core/tests/test_subagent.py b/core/tests/test_subagent.py index e87c5967..07297c1b 100644 --- a/core/tests/test_subagent.py +++ b/core/tests/test_subagent.py @@ -299,6 +299,66 @@ class TestSubagentExecution: assert "metadata" in result_data assert result_data["metadata"]["agent_id"] == "researcher" + @pytest.mark.asyncio + async def test_gcu_subagent_auto_populates_tools_from_catalog(self, runtime): + """GCU subagent with tools=[] should receive all catalog tools (auto-populate). + + GCU nodes declare tools=[] because the runner expands them at setup time. + But _execute_subagent filters by subagent_spec.tools, which is still empty. + The fix: when subagent is GCU with no declared tools, include all catalog tools. + """ + gcu_spec = NodeSpec( + id="browser_worker", + name="Browser Worker", + description="GCU browser subagent", + node_type="gcu", + output_keys=["result"], + tools=[], # Empty — expects auto-population + ) + + parent_spec = NodeSpec( + id="parent", + name="Parent", + description="Orchestrator", + node_type="event_loop", + output_keys=["result"], + sub_agents=["browser_worker"], + ) + + spy_llm = MockStreamingLLM( + [set_output_scenario("result", "scraped"), text_finish_scenario()] + ) + + browser_tool = Tool(name="browser_snapshot", description="Snapshot") + + node = EventLoopNode(config=LoopConfig(max_iterations=5)) + memory = SharedMemory() + scoped = memory.with_permissions(read_keys=[], write_keys=["result"]) + + ctx = NodeContext( + runtime=runtime, + node_id="parent", + node_spec=parent_spec, + memory=scoped, + input_data={}, + llm=spy_llm, + available_tools=[], + all_tools=[browser_tool], + goal_context="", + goal=None, + node_registry={"browser_worker": gcu_spec}, + ) + + result = await node._execute_subagent(ctx, "browser_worker", "Scrape example.com") + assert result.is_error is False + + # Verify subagent LLM received browser tools from catalog + assert spy_llm.stream_calls, "LLM should have been called" + first_call_tools = spy_llm.stream_calls[0]["tools"] + tool_names = {t.name for t in first_call_tools} if first_call_tools else set() + assert "browser_snapshot" in tool_names + assert "delegate_to_sub_agent" not in tool_names + # --------------------------------------------------------------------------- # Tests for nested subagent prevention From b47175d1df0a4570aca94848d25c58d272dc674d Mon Sep 17 00:00:00 2001 From: Richard Tang Date: Mon, 16 Mar 2026 14:10:30 -0700 Subject: [PATCH 22/45] feat: add hive llm spec in the quickstart --- core/framework/llm/litellm.py | 10 +++ core/framework/runner/runner.py | 4 + core/tests/dummy_agents/run_all.py | 1 + quickstart.ps1 | 121 +++++++++++++++++++++++++---- quickstart.sh | 67 +++++++++++----- scripts/check_llm_key.py | 8 ++ 6 files changed, 178 insertions(+), 33 deletions(-) diff --git a/core/framework/llm/litellm.py b/core/framework/llm/litellm.py index 4340f6b1..08f23e53 100644 --- a/core/framework/llm/litellm.py +++ b/core/framework/llm/litellm.py @@ -158,6 +158,10 @@ def _model_supports_cache_control(model: str) -> bool: # enforces a coding-agent whitelist that blocks unknown User-Agents. KIMI_API_BASE = "https://api.kimi.com/coding" +# Hive LLM uses an Anthropic-compatible endpoint at api.adenhq.com. +# litellm's Anthropic handler appends /v1/messages, so the base is just the host. +HIVE_API_BASE = "https://api.adenhq.com" + # Empty-stream retries use a short fixed delay, not the rate-limit backoff. # Conversation-structure issues are deterministic — long waits don't help. EMPTY_STREAM_MAX_RETRIES = 3 @@ -399,6 +403,10 @@ class LiteLLMProvider(LLMProvider): # Strip a trailing /v1 in case the user's saved config has the old value. if api_base and api_base.rstrip("/").endswith("/v1"): api_base = api_base.rstrip("/")[:-3] + elif model.lower().startswith("hive/"): + model = "anthropic/" + model[len("hive/"):] + if api_base and api_base.rstrip("/").endswith("/v1"): + api_base = api_base.rstrip("/")[:-3] self.model = model self.api_key = api_key self.api_base = api_base or self._default_api_base_for_model(_original_model) @@ -428,6 +436,8 @@ class LiteLLMProvider(LLMProvider): return MINIMAX_API_BASE if model_lower.startswith("kimi/"): return KIMI_API_BASE + if model_lower.startswith("hive/"): + return HIVE_API_BASE return None def _completion_with_rate_limit_retry( diff --git a/core/framework/runner/runner.py b/core/framework/runner/runner.py index 6a5cc2d4..71dfe559 100644 --- a/core/framework/runner/runner.py +++ b/core/framework/runner/runner.py @@ -1364,6 +1364,8 @@ class AgentRunner: return "MINIMAX_API_KEY" elif model_lower.startswith("kimi/"): return "KIMI_API_KEY" + elif model_lower.startswith("hive/"): + return "HIVE_API_KEY" else: # Default: assume OpenAI-compatible return "OPENAI_API_KEY" @@ -1386,6 +1388,8 @@ class AgentRunner: cred_id = "minimax" elif model_lower.startswith("kimi/"): cred_id = "kimi" + elif model_lower.startswith("hive/"): + cred_id = "hive" # Add more mappings as providers are added to LLM_CREDENTIALS if cred_id is None: diff --git a/core/tests/dummy_agents/run_all.py b/core/tests/dummy_agents/run_all.py index d5ca4ea5..120f53c8 100644 --- a/core/tests/dummy_agents/run_all.py +++ b/core/tests/dummy_agents/run_all.py @@ -33,6 +33,7 @@ API_KEY_PROVIDERS = [ ("TOGETHER_API_KEY", "Together AI", "together_ai/meta-llama/Llama-3.3-70B-Instruct-Turbo"), ("DEEPSEEK_API_KEY", "DeepSeek", "deepseek-chat"), ("MINIMAX_API_KEY", "MiniMax", "MiniMax-M2.5"), + ("HIVE_API_KEY", "Hive LLM", "hive/kimi-2.5"), ] diff --git a/quickstart.ps1 b/quickstart.ps1 index 354b9164..c6f28ec6 100644 --- a/quickstart.ps1 +++ b/quickstart.ps1 @@ -903,6 +903,11 @@ $kimiKey = [System.Environment]::GetEnvironmentVariable("KIMI_API_KEY", "User") if (-not $kimiKey) { $kimiKey = $env:KIMI_API_KEY } if ($kimiKey) { $KimiCredDetected = $true } +$HiveCredDetected = $false +$hiveKey = [System.Environment]::GetEnvironmentVariable("HIVE_API_KEY", "User") +if (-not $hiveKey) { $hiveKey = $env:HIVE_API_KEY } +if ($hiveKey) { $HiveCredDetected = $true } + # Detect API key providers $ProviderMenuEnvVars = @("ANTHROPIC_API_KEY", "OPENAI_API_KEY", "GEMINI_API_KEY", "GROQ_API_KEY", "CEREBRAS_API_KEY") $ProviderMenuNames = @("Anthropic (Claude) - Recommended", "OpenAI (GPT)", "Google Gemini - Free tier available", "Groq - Fast, free tier", "Cerebras - Fast, free tier") @@ -933,6 +938,7 @@ if (Test-Path $HiveConfigFile) { elseif ($prevLlm.use_kimi_code_subscription) { $PrevSubMode = "kimi_code" } elseif ($prevLlm.api_base -and $prevLlm.api_base -like "*api.z.ai*") { $PrevSubMode = "zai_code" } elseif ($prevLlm.api_base -and $prevLlm.api_base -like "*api.kimi.com*") { $PrevSubMode = "kimi_code" } + elseif ($prevLlm.provider -eq "hive" -or ($prevLlm.api_base -and $prevLlm.api_base -like "*api.adenhq.com*")) { $PrevSubMode = "hive_llm" } } } catch { } } @@ -946,6 +952,7 @@ if ($PrevSubMode -or $PrevProvider) { "zai_code" { if ($ZaiCredDetected) { $prevCredValid = $true } } "codex" { if ($CodexCredDetected) { $prevCredValid = $true } } "kimi_code" { if ($KimiCredDetected) { $prevCredValid = $true } } + "hive_llm" { if ($HiveCredDetected) { $prevCredValid = $true } } default { if ($PrevEnvVar) { $envVal = [System.Environment]::GetEnvironmentVariable($PrevEnvVar, "Process") @@ -960,14 +967,15 @@ if ($PrevSubMode -or $PrevProvider) { "zai_code" { $DefaultChoice = "2" } "codex" { $DefaultChoice = "3" } "kimi_code" { $DefaultChoice = "4" } + "hive_llm" { $DefaultChoice = "5" } } if (-not $DefaultChoice) { switch ($PrevProvider) { - "anthropic" { $DefaultChoice = "5" } - "openai" { $DefaultChoice = "6" } - "gemini" { $DefaultChoice = "7" } - "groq" { $DefaultChoice = "8" } - "cerebras" { $DefaultChoice = "9" } + "anthropic" { $DefaultChoice = "6" } + "openai" { $DefaultChoice = "7" } + "gemini" { $DefaultChoice = "8" } + "groq" { $DefaultChoice = "9" } + "cerebras" { $DefaultChoice = "10" } "kimi" { $DefaultChoice = "4" } } } @@ -1007,12 +1015,19 @@ Write-Host ") Kimi Code Subscription " -NoNewline Write-Color -Text "(use your Kimi Code plan)" -Color DarkGray -NoNewline if ($KimiCredDetected) { Write-Color -Text " (credential detected)" -Color Green } else { Write-Host "" } +# 5) Hive LLM +Write-Host " " -NoNewline +Write-Color -Text "5" -Color Cyan -NoNewline +Write-Host ") Hive LLM " -NoNewline +Write-Color -Text "(use your Hive API key)" -Color DarkGray -NoNewline +if ($HiveCredDetected) { Write-Color -Text " (credential detected)" -Color Green } else { Write-Host "" } + Write-Host "" Write-Color -Text " API key providers:" -Color Cyan -# 5-9) API key providers +# 6-10) API key providers for ($idx = 0; $idx -lt $ProviderMenuEnvVars.Count; $idx++) { - $num = $idx + 5 + $num = $idx + 6 $envVal = [System.Environment]::GetEnvironmentVariable($ProviderMenuEnvVars[$idx], "Process") if (-not $envVal) { $envVal = [System.Environment]::GetEnvironmentVariable($ProviderMenuEnvVars[$idx], "User") } Write-Host " " -NoNewline @@ -1022,7 +1037,7 @@ for ($idx = 0; $idx -lt $ProviderMenuEnvVars.Count; $idx++) { } Write-Host " " -NoNewline -Write-Color -Text "10" -Color Cyan -NoNewline +Write-Color -Text "11" -Color Cyan -NoNewline Write-Host ") Skip for now" Write-Host "" @@ -1033,16 +1048,16 @@ if ($DefaultChoice) { while ($true) { if ($DefaultChoice) { - $raw = Read-Host "Enter choice (1-10) [$DefaultChoice]" + $raw = Read-Host "Enter choice (1-11) [$DefaultChoice]" if ([string]::IsNullOrWhiteSpace($raw)) { $raw = $DefaultChoice } } else { - $raw = Read-Host "Enter choice (1-10)" + $raw = Read-Host "Enter choice (1-11)" } if ($raw -match '^\d+$') { $num = [int]$raw - if ($num -ge 1 -and $num -le 10) { break } + if ($num -ge 1 -and $num -le 11) { break } } - Write-Color -Text "Invalid choice. Please enter 1-10" -Color Red + Write-Color -Text "Invalid choice. Please enter 1-11" -Color Red } switch ($num) { @@ -1121,9 +1136,21 @@ switch ($num) { Write-Ok "Using Kimi Code subscription" Write-Color -Text " Model: kimi-k2.5 | API: api.kimi.com/coding" -Color DarkGray } - { $_ -ge 5 -and $_ -le 9 } { + 5 { + # Hive LLM + $SubscriptionMode = "hive_llm" + $SelectedProviderId = "hive" + $SelectedEnvVar = "HIVE_API_KEY" + $SelectedModel = "kimi-2.5" + $SelectedMaxTokens = 32768 + $SelectedMaxContextTokens = 120000 + Write-Host "" + Write-Ok "Using Hive LLM" + Write-Color -Text " Model: kimi-2.5 | API: api.adenhq.com" -Color DarkGray + } + { $_ -ge 6 -and $_ -le 10 } { # API key providers - $provIdx = $num - 5 + $provIdx = $num - 6 $SelectedEnvVar = $ProviderMenuEnvVars[$provIdx] $SelectedProviderId = $ProviderMenuIds[$provIdx] $providerName = $ProviderMenuNames[$provIdx] -replace ' - .*', '' # strip description @@ -1194,7 +1221,7 @@ switch ($num) { } } } - 10 { + 11 { Write-Host "" Write-Warn "Skipped. An LLM API key is required to test and use worker agents." Write-Host " Add your API key later by running:" @@ -1335,6 +1362,67 @@ if ($SubscriptionMode -eq "kimi_code") { } } +# For Hive LLM: prompt for API key with verification + retry +if ($SubscriptionMode -eq "hive_llm") { + while ($true) { + $existingHive = [System.Environment]::GetEnvironmentVariable("HIVE_API_KEY", "User") + if (-not $existingHive) { $existingHive = $env:HIVE_API_KEY } + + if ($existingHive) { + $masked = $existingHive.Substring(0, [Math]::Min(4, $existingHive.Length)) + "..." + $existingHive.Substring([Math]::Max(0, $existingHive.Length - 4)) + Write-Host "" + Write-Color -Text " $([char]0x2B22) Current Hive key: $masked" -Color Green + Write-Host "" + $apiKey = Read-Host "Paste a new Hive API key (or press Enter to keep current)" + } else { + Write-Host "" + $apiKey = Read-Host "Paste your Hive API key (or press Enter to skip)" + } + + if ($apiKey) { + [System.Environment]::SetEnvironmentVariable("HIVE_API_KEY", $apiKey, "User") + $env:HIVE_API_KEY = $apiKey + Write-Host "" + Write-Ok "Hive API key saved as User environment variable" + + # Health check the new key + Write-Host " Verifying Hive API key... " -NoNewline + try { + $hcOutput = & $PythonCmd scripts/check_llm_key.py hive $apiKey "https://api.adenhq.com" 2>&1 + $hcJson = $hcOutput | ConvertFrom-Json + if ($hcJson.valid -eq $true) { + Write-Color -Text "ok" -Color Green + break + } elseif ($hcJson.valid -eq $false) { + Write-Color -Text "failed" -Color Red + Write-Warn $hcJson.message + [System.Environment]::SetEnvironmentVariable("HIVE_API_KEY", $null, "User") + Remove-Item -Path "Env:\HIVE_API_KEY" -ErrorAction SilentlyContinue + Write-Host "" + Read-Host " Press Enter to try again" + } else { + Write-Color -Text "--" -Color Yellow + Write-Color -Text " Could not verify key (network issue). The key has been saved." -Color DarkGray + break + } + } catch { + Write-Color -Text "--" -Color Yellow + break + } + } elseif (-not $existingHive) { + Write-Host "" + Write-Warn "Skipped. Add your Hive API key later:" + Write-Color -Text " [System.Environment]::SetEnvironmentVariable('HIVE_API_KEY', 'your-key', 'User')" -Color Cyan + $SelectedEnvVar = "" + $SelectedProviderId = "" + $SubscriptionMode = "" + break + } else { + break + } + } +} + # Prompt for model if not already selected (manual provider path) if ($SelectedProviderId -and -not $SelectedModel) { $modelSel = Get-ModelSelection $SelectedProviderId @@ -1375,6 +1463,9 @@ if ($SelectedProviderId) { } elseif ($SubscriptionMode -eq "kimi_code") { $config.llm["api_base"] = "https://api.kimi.com/coding" $config.llm["api_key_env_var"] = $SelectedEnvVar + } elseif ($SubscriptionMode -eq "hive_llm") { + $config.llm["api_base"] = "https://api.adenhq.com" + $config.llm["api_key_env_var"] = $SelectedEnvVar } else { $config.llm["api_key_env_var"] = $SelectedEnvVar } diff --git a/quickstart.sh b/quickstart.sh index 7b76fa9a..a38b3d83 100755 --- a/quickstart.sh +++ b/quickstart.sh @@ -864,6 +864,11 @@ elif [ -n "${KIMI_API_KEY:-}" ]; then KIMI_CRED_DETECTED=true fi +HIVE_CRED_DETECTED=false +if [ -n "${HIVE_API_KEY:-}" ]; then + HIVE_CRED_DETECTED=true +fi + # Detect API key providers if [ "$USE_ASSOC_ARRAYS" = true ]; then for env_var in "${!PROVIDER_NAMES[@]}"; do @@ -901,6 +906,7 @@ try: elif llm.get('use_codex_subscription'): sub = 'codex' elif llm.get('use_kimi_code_subscription'): sub = 'kimi_code' elif llm.get('provider', '') == 'minimax' or 'api.minimax.io' in llm.get('api_base', ''): sub = 'minimax_code' + elif llm.get('provider', '') == 'hive' or 'api.adenhq.com' in llm.get('api_base', ''): sub = 'hive_llm' elif 'api.z.ai' in llm.get('api_base', ''): sub = 'zai_code' print(f'PREV_SUB_MODE={sub}') except Exception: @@ -917,6 +923,7 @@ if [ -n "$PREV_SUB_MODE" ] || [ -n "$PREV_PROVIDER" ]; then zai_code) [ "$ZAI_CRED_DETECTED" = true ] && PREV_CRED_VALID=true ;; codex) [ "$CODEX_CRED_DETECTED" = true ] && PREV_CRED_VALID=true ;; kimi_code) [ "$KIMI_CRED_DETECTED" = true ] && PREV_CRED_VALID=true ;; + hive_llm) [ "$HIVE_CRED_DETECTED" = true ] && PREV_CRED_VALID=true ;; *) # API key provider — check if the env var is set if [ -n "$PREV_ENV_VAR" ] && [ -n "${!PREV_ENV_VAR}" ]; then @@ -932,16 +939,18 @@ if [ -n "$PREV_SUB_MODE" ] || [ -n "$PREV_PROVIDER" ]; then codex) DEFAULT_CHOICE=3 ;; minimax_code) DEFAULT_CHOICE=4 ;; kimi_code) DEFAULT_CHOICE=5 ;; + hive_llm) DEFAULT_CHOICE=6 ;; esac if [ -z "$DEFAULT_CHOICE" ]; then case "$PREV_PROVIDER" in - anthropic) DEFAULT_CHOICE=6 ;; - openai) DEFAULT_CHOICE=7 ;; - gemini) DEFAULT_CHOICE=8 ;; - groq) DEFAULT_CHOICE=9 ;; - cerebras) DEFAULT_CHOICE=10 ;; + anthropic) DEFAULT_CHOICE=7 ;; + openai) DEFAULT_CHOICE=8 ;; + gemini) DEFAULT_CHOICE=9 ;; + groq) DEFAULT_CHOICE=10 ;; + cerebras) DEFAULT_CHOICE=11 ;; minimax) DEFAULT_CHOICE=4 ;; kimi) DEFAULT_CHOICE=5 ;; + hive) DEFAULT_CHOICE=6 ;; esac fi fi @@ -987,14 +996,21 @@ else echo -e " ${CYAN}5)${NC} Kimi Code Subscription ${DIM}(use your Kimi Code plan)${NC}" fi +# 6) Hive LLM +if [ "$HIVE_CRED_DETECTED" = true ]; then + echo -e " ${CYAN}6)${NC} Hive LLM ${DIM}(use your Hive API key)${NC} ${GREEN}(credential detected)${NC}" +else + echo -e " ${CYAN}6)${NC} Hive LLM ${DIM}(use your Hive API key)${NC}" +fi + echo "" echo -e " ${CYAN}${BOLD}API key providers:${NC}" -# 6-10) API key providers — show (credential detected) if key already set +# 7-11) API key providers — show (credential detected) if key already set PROVIDER_MENU_ENVS=(ANTHROPIC_API_KEY OPENAI_API_KEY GEMINI_API_KEY GROQ_API_KEY CEREBRAS_API_KEY) PROVIDER_MENU_NAMES=("Anthropic (Claude) - Recommended" "OpenAI (GPT)" "Google Gemini - Free tier available" "Groq - Fast, free tier" "Cerebras - Fast, free tier") for idx in 0 1 2 3 4; do - num=$((idx + 6)) + num=$((idx + 7)) env_var="${PROVIDER_MENU_ENVS[$idx]}" if [ -n "${!env_var}" ]; then echo -e " ${CYAN}$num)${NC} ${PROVIDER_MENU_NAMES[$idx]} ${GREEN}(credential detected)${NC}" @@ -1003,7 +1019,7 @@ for idx in 0 1 2 3 4; do fi done -echo -e " ${CYAN}11)${NC} Skip for now" +echo -e " ${CYAN}12)${NC} Skip for now" echo "" if [ -n "$DEFAULT_CHOICE" ]; then @@ -1013,15 +1029,15 @@ fi while true; do if [ -n "$DEFAULT_CHOICE" ]; then - read -r -p "Enter choice (1-11) [$DEFAULT_CHOICE]: " choice || true + read -r -p "Enter choice (1-12) [$DEFAULT_CHOICE]: " choice || true choice="${choice:-$DEFAULT_CHOICE}" else - read -r -p "Enter choice (1-11): " choice || true + read -r -p "Enter choice (1-12): " choice || true fi - if [[ "$choice" =~ ^[0-9]+$ ]] && [ "$choice" -ge 1 ] && [ "$choice" -le 11 ]; then + if [[ "$choice" =~ ^[0-9]+$ ]] && [ "$choice" -ge 1 ] && [ "$choice" -le 12 ]; then break fi - echo -e "${RED}Invalid choice. Please enter 1-11${NC}" + echo -e "${RED}Invalid choice. Please enter 1-12${NC}" done case $choice in @@ -1118,36 +1134,51 @@ case $choice in echo -e " ${DIM}Model: kimi-k2.5 | API: api.kimi.com/coding${NC}" ;; 6) + # Hive LLM + SUBSCRIPTION_MODE="hive_llm" + SELECTED_PROVIDER_ID="hive" + SELECTED_ENV_VAR="HIVE_API_KEY" + SELECTED_MODEL="kimi-2.5" + SELECTED_MAX_TOKENS=32768 + SELECTED_MAX_CONTEXT_TOKENS=120000 + SELECTED_API_BASE="https://api.adenhq.com" + PROVIDER_NAME="Hive" + SIGNUP_URL="https://adenhq.com" + echo "" + echo -e "${GREEN}⬢${NC} Using Hive LLM" + echo -e " ${DIM}Model: kimi-2.5 | API: api.adenhq.com${NC}" + ;; + 7) SELECTED_ENV_VAR="ANTHROPIC_API_KEY" SELECTED_PROVIDER_ID="anthropic" PROVIDER_NAME="Anthropic" SIGNUP_URL="https://console.anthropic.com/settings/keys" ;; - 7) + 8) SELECTED_ENV_VAR="OPENAI_API_KEY" SELECTED_PROVIDER_ID="openai" PROVIDER_NAME="OpenAI" SIGNUP_URL="https://platform.openai.com/api-keys" ;; - 8) + 9) SELECTED_ENV_VAR="GEMINI_API_KEY" SELECTED_PROVIDER_ID="gemini" PROVIDER_NAME="Google Gemini" SIGNUP_URL="https://aistudio.google.com/apikey" ;; - 9) + 10) SELECTED_ENV_VAR="GROQ_API_KEY" SELECTED_PROVIDER_ID="groq" PROVIDER_NAME="Groq" SIGNUP_URL="https://console.groq.com/keys" ;; - 10) + 11) SELECTED_ENV_VAR="CEREBRAS_API_KEY" SELECTED_PROVIDER_ID="cerebras" PROVIDER_NAME="Cerebras" SIGNUP_URL="https://cloud.cerebras.ai/" ;; - 11) + 12) echo "" echo -e "${YELLOW}Skipped.${NC} An LLM API key is required to test and use worker agents." echo -e "Add your API key later by running:" @@ -1160,7 +1191,7 @@ case $choice in esac # For API-key providers: prompt for key (allow replacement if already set) -if { [ -z "$SUBSCRIPTION_MODE" ] || [ "$SUBSCRIPTION_MODE" = "minimax_code" ] || [ "$SUBSCRIPTION_MODE" = "kimi_code" ]; } && [ -n "$SELECTED_ENV_VAR" ]; then +if { [ -z "$SUBSCRIPTION_MODE" ] || [ "$SUBSCRIPTION_MODE" = "minimax_code" ] || [ "$SUBSCRIPTION_MODE" = "kimi_code" ] || [ "$SUBSCRIPTION_MODE" = "hive_llm" ]; } && [ -n "$SELECTED_ENV_VAR" ]; then while true; do CURRENT_KEY="${!SELECTED_ENV_VAR}" if [ -n "$CURRENT_KEY" ]; then diff --git a/scripts/check_llm_key.py b/scripts/check_llm_key.py index 3e6e07d0..ba1a076a 100644 --- a/scripts/check_llm_key.py +++ b/scripts/check_llm_key.py @@ -135,6 +135,10 @@ PROVIDERS = { "kimi": lambda key, **kw: check_anthropic_compatible( key, "https://api.kimi.com/coding/v1/messages", "Kimi" ), + # Hive LLM uses an Anthropic-compatible endpoint + "hive": lambda key, **kw: check_anthropic_compatible( + key, "https://api.adenhq.com/v1/messages", "Hive" + ), } @@ -162,6 +166,10 @@ def main() -> None: result = check_anthropic_compatible( api_key, api_base.rstrip("/") + "/v1/messages", "Kimi" ) + elif api_base and provider_id == "hive": + result = check_anthropic_compatible( + api_key, api_base.rstrip("/") + "/v1/messages", "Hive" + ) elif api_base: # Custom API base (ZAI or other OpenAI-compatible) endpoint = api_base.rstrip("/") + "/models" From 0d6267bcf19ed4e7eb03f8fd2b14b238743c5d6d Mon Sep 17 00:00:00 2001 From: Timothy Date: Mon, 16 Mar 2026 14:49:33 -0700 Subject: [PATCH 23/45] fix: add delegation notice --- core/framework/graph/event_loop_node.py | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/core/framework/graph/event_loop_node.py b/core/framework/graph/event_loop_node.py index dd66e3e8..ee78934a 100644 --- a/core/framework/graph/event_loop_node.py +++ b/core/framework/graph/event_loop_node.py @@ -597,10 +597,26 @@ class EventLoopNode(NodeProtocol): # - Node has sub_agents defined # - We are NOT in subagent mode (prevents nested delegation) if not ctx.is_subagent_mode: - sub_agents = getattr(ctx.node_spec, "sub_agents", []) - delegate_tool = self._build_delegate_tool(sub_agents, ctx.node_registry) - if delegate_tool: - tools.append(delegate_tool) + sub_agents = getattr(ctx.node_spec, "sub_agents", None) or [] + if sub_agents: + delegate_tool = self._build_delegate_tool(sub_agents, ctx.node_registry) + if delegate_tool: + tools.append(delegate_tool) + logger.info( + "[%s] delegate_to_sub_agent injected (sub_agents=%s)", + node_id, + sub_agents, + ) + else: + logger.error( + "[%s] _build_delegate_tool returned None for sub_agents=%s", + node_id, + sub_agents, + ) + else: + logger.debug( + "[%s] Skipped delegate tool (is_subagent_mode=True)", node_id + ) # Add report_to_parent tool for sub-agents with a report callback if ctx.is_subagent_mode and ctx.report_callback is not None: From 0a91b494171caf2a91d2b5876d9417ea6f1142ed Mon Sep 17 00:00:00 2001 From: Richard Tang Date: Mon, 16 Mar 2026 16:07:13 -0700 Subject: [PATCH 24/45] feat: add validation and config for baseURL --- core/demos/org_demo.py | 1 + core/framework/config.py | 4 ++++ core/framework/llm/litellm.py | 4 +--- core/tests/dummy_agents/run_all.py | 2 +- quickstart.ps1 | 25 ++++++++++++++++++++----- quickstart.sh | 23 +++++++++++++++++++---- scripts/check_llm_key.py | 4 +++- 7 files changed, 49 insertions(+), 14 deletions(-) diff --git a/core/demos/org_demo.py b/core/demos/org_demo.py index 2e910488..cbc75a1d 100644 --- a/core/demos/org_demo.py +++ b/core/demos/org_demo.py @@ -1374,3 +1374,4 @@ async def main(): if __name__ == "__main__": asyncio.run(main()) +postman request \ No newline at end of file diff --git a/core/framework/config.py b/core/framework/config.py index 1132411c..54c7b42a 100644 --- a/core/framework/config.py +++ b/core/framework/config.py @@ -19,6 +19,10 @@ from framework.graph.edge import DEFAULT_MAX_TOKENS # --------------------------------------------------------------------------- HIVE_CONFIG_FILE = Path.home() / ".hive" / "configuration.json" + +# Hive LLM router endpoint (Anthropic-compatible). +# litellm's Anthropic handler appends /v1/messages, so this is just the base host. +HIVE_LLM_ENDPOINT = "https://api.adenhq.com" logger = logging.getLogger(__name__) diff --git a/core/framework/llm/litellm.py b/core/framework/llm/litellm.py index 08f23e53..bff21e57 100644 --- a/core/framework/llm/litellm.py +++ b/core/framework/llm/litellm.py @@ -158,9 +158,7 @@ def _model_supports_cache_control(model: str) -> bool: # enforces a coding-agent whitelist that blocks unknown User-Agents. KIMI_API_BASE = "https://api.kimi.com/coding" -# Hive LLM uses an Anthropic-compatible endpoint at api.adenhq.com. -# litellm's Anthropic handler appends /v1/messages, so the base is just the host. -HIVE_API_BASE = "https://api.adenhq.com" +from framework.config import HIVE_LLM_ENDPOINT as HIVE_API_BASE # Empty-stream retries use a short fixed delay, not the rate-limit backoff. # Conversation-structure issues are deterministic — long waits don't help. diff --git a/core/tests/dummy_agents/run_all.py b/core/tests/dummy_agents/run_all.py index 120f53c8..0d447949 100644 --- a/core/tests/dummy_agents/run_all.py +++ b/core/tests/dummy_agents/run_all.py @@ -33,7 +33,7 @@ API_KEY_PROVIDERS = [ ("TOGETHER_API_KEY", "Together AI", "together_ai/meta-llama/Llama-3.3-70B-Instruct-Turbo"), ("DEEPSEEK_API_KEY", "DeepSeek", "deepseek-chat"), ("MINIMAX_API_KEY", "MiniMax", "MiniMax-M2.5"), - ("HIVE_API_KEY", "Hive LLM", "hive/kimi-2.5"), + ("HIVE_API_KEY", "Hive LLM", "hive/queen"), ] diff --git a/quickstart.ps1 b/quickstart.ps1 index c6f28ec6..1b86299d 100644 --- a/quickstart.ps1 +++ b/quickstart.ps1 @@ -21,6 +21,9 @@ $ErrorActionPreference = "Continue" $ScriptDir = Split-Path -Parent $MyInvocation.MyCommand.Definition $UvHelperPath = Join-Path $ScriptDir "scripts\uv-discovery.ps1" +# Hive LLM router endpoint +$HiveLlmEndpoint = "https://api.adenhq.com" + . $UvHelperPath # ============================================================ @@ -938,7 +941,7 @@ if (Test-Path $HiveConfigFile) { elseif ($prevLlm.use_kimi_code_subscription) { $PrevSubMode = "kimi_code" } elseif ($prevLlm.api_base -and $prevLlm.api_base -like "*api.z.ai*") { $PrevSubMode = "zai_code" } elseif ($prevLlm.api_base -and $prevLlm.api_base -like "*api.kimi.com*") { $PrevSubMode = "kimi_code" } - elseif ($prevLlm.provider -eq "hive" -or ($prevLlm.api_base -and $prevLlm.api_base -like "*api.adenhq.com*")) { $PrevSubMode = "hive_llm" } + elseif ($prevLlm.provider -eq "hive" -or ($prevLlm.api_base -and $prevLlm.api_base -like "*adenhq.com*")) { $PrevSubMode = "hive_llm" } } } catch { } } @@ -1141,12 +1144,24 @@ switch ($num) { $SubscriptionMode = "hive_llm" $SelectedProviderId = "hive" $SelectedEnvVar = "HIVE_API_KEY" - $SelectedModel = "kimi-2.5" $SelectedMaxTokens = 32768 $SelectedMaxContextTokens = 120000 Write-Host "" Write-Ok "Using Hive LLM" - Write-Color -Text " Model: kimi-2.5 | API: api.adenhq.com" -Color DarkGray + Write-Host "" + Write-Host " Select a model:" + Write-Host " " -NoNewline; Write-Color -Text "1)" -Color Cyan -NoNewline; Write-Host " queen " -NoNewline; Write-Color -Text "(default - Hive flagship)" -Color DarkGray + Write-Host " " -NoNewline; Write-Color -Text "2)" -Color Cyan -NoNewline; Write-Host " kimi-2.5" + Write-Host " " -NoNewline; Write-Color -Text "3)" -Color Cyan -NoNewline; Write-Host " GLM-5" + Write-Host "" + $hiveModelChoice = Read-Host " Enter model choice (1-3) [1]" + if (-not $hiveModelChoice) { $hiveModelChoice = "1" } + switch ($hiveModelChoice) { + "2" { $SelectedModel = "kimi-2.5" } + "3" { $SelectedModel = "GLM-5" } + default { $SelectedModel = "queen" } + } + Write-Color -Text " Model: $SelectedModel | API: $HiveLlmEndpoint" -Color DarkGray } { $_ -ge 6 -and $_ -le 10 } { # API key providers @@ -1388,7 +1403,7 @@ if ($SubscriptionMode -eq "hive_llm") { # Health check the new key Write-Host " Verifying Hive API key... " -NoNewline try { - $hcOutput = & $PythonCmd scripts/check_llm_key.py hive $apiKey "https://api.adenhq.com" 2>&1 + $hcOutput = & $PythonCmd scripts/check_llm_key.py hive $apiKey "$HiveLlmEndpoint" 2>&1 $hcJson = $hcOutput | ConvertFrom-Json if ($hcJson.valid -eq $true) { Write-Color -Text "ok" -Color Green @@ -1464,7 +1479,7 @@ if ($SelectedProviderId) { $config.llm["api_base"] = "https://api.kimi.com/coding" $config.llm["api_key_env_var"] = $SelectedEnvVar } elseif ($SubscriptionMode -eq "hive_llm") { - $config.llm["api_base"] = "https://api.adenhq.com" + $config.llm["api_base"] = $HiveLlmEndpoint $config.llm["api_key_env_var"] = $SelectedEnvVar } else { $config.llm["api_key_env_var"] = $SelectedEnvVar diff --git a/quickstart.sh b/quickstart.sh index a38b3d83..e32c8c51 100755 --- a/quickstart.sh +++ b/quickstart.sh @@ -32,6 +32,9 @@ NC='\033[0m' # No Color # Get the directory where this script is located SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +# Hive LLM router endpoint +HIVE_LLM_ENDPOINT="https://api.adenhq.com" + # Helper function for prompts prompt_yes_no() { local prompt="$1" @@ -906,7 +909,7 @@ try: elif llm.get('use_codex_subscription'): sub = 'codex' elif llm.get('use_kimi_code_subscription'): sub = 'kimi_code' elif llm.get('provider', '') == 'minimax' or 'api.minimax.io' in llm.get('api_base', ''): sub = 'minimax_code' - elif llm.get('provider', '') == 'hive' or 'api.adenhq.com' in llm.get('api_base', ''): sub = 'hive_llm' + elif llm.get('provider', '') == 'hive' or 'adenhq.com' in llm.get('api_base', ''): sub = 'hive_llm' elif 'api.z.ai' in llm.get('api_base', ''): sub = 'zai_code' print(f'PREV_SUB_MODE={sub}') except Exception: @@ -1138,15 +1141,27 @@ case $choice in SUBSCRIPTION_MODE="hive_llm" SELECTED_PROVIDER_ID="hive" SELECTED_ENV_VAR="HIVE_API_KEY" - SELECTED_MODEL="kimi-2.5" SELECTED_MAX_TOKENS=32768 SELECTED_MAX_CONTEXT_TOKENS=120000 - SELECTED_API_BASE="https://api.adenhq.com" + SELECTED_API_BASE="$HIVE_LLM_ENDPOINT" PROVIDER_NAME="Hive" SIGNUP_URL="https://adenhq.com" echo "" echo -e "${GREEN}⬢${NC} Using Hive LLM" - echo -e " ${DIM}Model: kimi-2.5 | API: api.adenhq.com${NC}" + echo "" + echo -e " Select a model:" + echo -e " ${CYAN}1)${NC} queen ${DIM}(default — Hive flagship)${NC}" + echo -e " ${CYAN}2)${NC} kimi-2.5" + echo -e " ${CYAN}3)${NC} GLM-5" + echo "" + read -r -p " Enter model choice (1-3) [1]: " hive_model_choice || true + hive_model_choice="${hive_model_choice:-1}" + case "$hive_model_choice" in + 2) SELECTED_MODEL="kimi-2.5" ;; + 3) SELECTED_MODEL="GLM-5" ;; + *) SELECTED_MODEL="queen" ;; + esac + echo -e " ${DIM}Model: $SELECTED_MODEL | API: ${HIVE_LLM_ENDPOINT}${NC}" ;; 7) SELECTED_ENV_VAR="ANTHROPIC_API_KEY" diff --git a/scripts/check_llm_key.py b/scripts/check_llm_key.py index ba1a076a..957bdd55 100644 --- a/scripts/check_llm_key.py +++ b/scripts/check_llm_key.py @@ -16,6 +16,8 @@ import sys import httpx +from framework.config import HIVE_LLM_ENDPOINT + TIMEOUT = 10.0 @@ -137,7 +139,7 @@ PROVIDERS = { ), # Hive LLM uses an Anthropic-compatible endpoint "hive": lambda key, **kw: check_anthropic_compatible( - key, "https://api.adenhq.com/v1/messages", "Hive" + key, f"{HIVE_LLM_ENDPOINT}/v1/messages", "Hive" ), } From d59f8e99cbe5aa38074c9192ce108c2460492fec Mon Sep 17 00:00:00 2001 From: Richard Tang Date: Mon, 16 Mar 2026 16:09:47 -0700 Subject: [PATCH 25/45] chore: prompt users to go to discord for hive key --- quickstart.ps1 | 3 +++ quickstart.sh | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/quickstart.ps1 b/quickstart.ps1 index 1b86299d..af4a275c 100644 --- a/quickstart.ps1 +++ b/quickstart.ps1 @@ -1390,6 +1390,9 @@ if ($SubscriptionMode -eq "hive_llm") { Write-Host "" $apiKey = Read-Host "Paste a new Hive API key (or press Enter to keep current)" } else { + Write-Host "" + Write-Host " Get your API key from: " -NoNewline + Write-Color -Text "https://discord.com/invite/hQdU7QDkgR" -Color Cyan Write-Host "" $apiKey = Read-Host "Paste your Hive API key (or press Enter to skip)" } diff --git a/quickstart.sh b/quickstart.sh index e32c8c51..e7dc10b5 100755 --- a/quickstart.sh +++ b/quickstart.sh @@ -1145,7 +1145,7 @@ case $choice in SELECTED_MAX_CONTEXT_TOKENS=120000 SELECTED_API_BASE="$HIVE_LLM_ENDPOINT" PROVIDER_NAME="Hive" - SIGNUP_URL="https://adenhq.com" + SIGNUP_URL="https://discord.com/invite/hQdU7QDkgR" echo "" echo -e "${GREEN}⬢${NC} Using Hive LLM" echo "" From c7d70e0fb1d75beacbfa0927e210422b378137c4 Mon Sep 17 00:00:00 2001 From: Timothy Date: Mon, 16 Mar 2026 16:26:16 -0700 Subject: [PATCH 26/45] fix: skill injection, tool call timeout --- core/framework/graph/event_loop_node.py | 2 +- core/framework/graph/executor.py | 11 +++++++++++ core/framework/runtime/execution_stream.py | 12 ++++++++++++ core/framework/server/queen_orchestrator.py | 10 ++++++++++ core/framework/tools/queen_lifecycle_tools.py | 10 +++++++++- 5 files changed, 43 insertions(+), 2 deletions(-) diff --git a/core/framework/graph/event_loop_node.py b/core/framework/graph/event_loop_node.py index ee78934a..5961be91 100644 --- a/core/framework/graph/event_loop_node.py +++ b/core/framework/graph/event_loop_node.py @@ -229,7 +229,7 @@ class LoopConfig: # Maximum seconds a single tool call may take before being killed. # Prevents hung MCP servers (especially browser/GCU tools) from # blocking the entire event loop indefinitely. 0 = no timeout. - tool_call_timeout_seconds: float = 120.0 + tool_call_timeout_seconds: float = 60.0 # --- Lifecycle hooks --- # Hooks are async callables keyed by event name. Supported events: diff --git a/core/framework/graph/executor.py b/core/framework/graph/executor.py index 7d682d9d..6b512a58 100644 --- a/core/framework/graph/executor.py +++ b/core/framework/graph/executor.py @@ -205,6 +205,17 @@ class GraphExecutor: self.skills_catalog_prompt = skills_catalog_prompt self.protocols_prompt = protocols_prompt + if protocols_prompt: + self.logger.info( + "GraphExecutor[%s] received protocols_prompt (%d chars)", + stream_id, len(protocols_prompt), + ) + else: + self.logger.warning( + "GraphExecutor[%s] received EMPTY protocols_prompt", + stream_id, + ) + # Parallel execution settings self.enable_parallel_execution = enable_parallel_execution self._parallel_config = parallel_config or ParallelExecutionConfig() diff --git a/core/framework/runtime/execution_stream.py b/core/framework/runtime/execution_stream.py index 2af9188d..132359a8 100644 --- a/core/framework/runtime/execution_stream.py +++ b/core/framework/runtime/execution_stream.py @@ -237,6 +237,18 @@ class ExecutionStream: self._skills_catalog_prompt = skills_catalog_prompt self._protocols_prompt = protocols_prompt + _es_logger = logging.getLogger(__name__) + if protocols_prompt: + _es_logger.info( + "ExecutionStream[%s] received protocols_prompt (%d chars)", + stream_id, len(protocols_prompt), + ) + else: + _es_logger.warning( + "ExecutionStream[%s] received EMPTY protocols_prompt", + stream_id, + ) + # Create stream-scoped runtime self._runtime = StreamRuntime( stream_id=stream_id, diff --git a/core/framework/server/queen_orchestrator.py b/core/framework/server/queen_orchestrator.py index f011f541..5989cbcb 100644 --- a/core/framework/server/queen_orchestrator.py +++ b/core/framework/server/queen_orchestrator.py @@ -216,6 +216,16 @@ async def create_queen( + worker_identity ) + # ---- Default skill protocols ------------------------------------- + try: + from framework.skills.manager import SkillsManager + + _queen_skills_mgr = SkillsManager() + _queen_skills_mgr.load() + phase_state.protocols_prompt = _queen_skills_mgr.protocols_prompt + except Exception: + logger.debug("Queen skill loading failed (non-fatal)", exc_info=True) + # ---- Persona hook ------------------------------------------------ _session_llm = session.llm _session_event_bus = session.event_bus diff --git a/core/framework/tools/queen_lifecycle_tools.py b/core/framework/tools/queen_lifecycle_tools.py index 799e103a..e1b7722f 100644 --- a/core/framework/tools/queen_lifecycle_tools.py +++ b/core/framework/tools/queen_lifecycle_tools.py @@ -109,6 +109,9 @@ class QueenPhaseState: prompt_staging: str = "" prompt_running: str = "" + # Default skill operational protocols — appended to every phase prompt + protocols_prompt: str = "" + def get_current_tools(self) -> list: """Return tools for the current phase.""" if self.phase == "planning": @@ -133,7 +136,12 @@ class QueenPhaseState: from framework.agents.queen.queen_memory import format_for_injection memory = format_for_injection() - return base + ("\n\n" + memory if memory else "") + parts = [base] + if self.protocols_prompt: + parts.append(self.protocols_prompt) + if memory: + parts.append(memory) + return "\n\n".join(parts) async def _emit_phase_event(self) -> None: """Publish a QUEEN_PHASE_CHANGED event so the frontend updates the tag.""" From 491a3f24da8a0f63fe0d23860daf16e72486351b Mon Sep 17 00:00:00 2001 From: Richard Tang Date: Mon, 16 Mar 2026 16:45:23 -0700 Subject: [PATCH 27/45] chore: Suppress noisy LiteLLM INFO logs --- core/framework/observability/logging.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/core/framework/observability/logging.py b/core/framework/observability/logging.py index 21e7c332..15ac1a95 100644 --- a/core/framework/observability/logging.py +++ b/core/framework/observability/logging.py @@ -206,6 +206,10 @@ def configure_logging( root_logger.addHandler(handler) root_logger.setLevel(level.upper()) + # Suppress noisy LiteLLM INFO logs (model/provider line + Provider List URL + # printed on every single completion call). Warnings and errors still show. + logging.getLogger("LiteLLM").setLevel(logging.WARNING) + # When in JSON mode, configure known third-party loggers to use JSON formatter # This ensures libraries like LiteLLM, httpcore also output clean JSON if format == "json": From c10eea04ec32b69397a60977d19bcd538130adaa Mon Sep 17 00:00:00 2001 From: bryan Date: Mon, 16 Mar 2026 17:26:57 -0700 Subject: [PATCH 28/45] refactor: update graph node colors --- core/framework/tools/flowchart_utils.py | 18 ++++++++--------- docs/draft-flowchart-schema.md | 26 ++++++++++++------------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/core/framework/tools/flowchart_utils.py b/core/framework/tools/flowchart_utils.py index 2f5b09a1..f436ceb8 100644 --- a/core/framework/tools/flowchart_utils.py +++ b/core/framework/tools/flowchart_utils.py @@ -18,15 +18,15 @@ FLOWCHART_FILENAME = "flowchart.json" # ── Flowchart type catalogue (9 types) ─────────────────────────────────────── FLOWCHART_TYPES = { - "start": {"shape": "stadium", "color": "#3fa66a"}, # sage green - "terminal": {"shape": "stadium", "color": "#a04444"}, # dusty red - "process": {"shape": "rectangle", "color": "#616d83"}, # blue-gray - "decision": {"shape": "diamond", "color": "#d89d26"}, # warm amber - "io": {"shape": "parallelogram", "color": "#7a4fa5"}, # dusty purple - "document": {"shape": "document", "color": "#507485"}, # steel blue - "database": {"shape": "cylinder", "color": "#459077"}, # muted teal - "subprocess": {"shape": "subroutine", "color": "#4c7f7f"}, # dark cyan - "browser": {"shape": "hexagon", "color": "#3a4a9b"}, # deep blue + "start": {"shape": "stadium", "color": "#8aad3f"}, # spring pollen + "terminal": {"shape": "stadium", "color": "#b5453a"}, # propolis red + "process": {"shape": "rectangle", "color": "#b5a575"}, # warm wheat + "decision": {"shape": "diamond", "color": "#d89d26"}, # royal honey + "io": {"shape": "parallelogram", "color": "#d06818"}, # burnt orange + "document": {"shape": "document", "color": "#c4b830"}, # goldenrod + "database": {"shape": "cylinder", "color": "#508878"}, # sage teal + "subprocess": {"shape": "subroutine", "color": "#887a48"}, # propolis gold + "browser": {"shape": "hexagon", "color": "#cc8850"}, # honey copper } # Backward-compat remap: old type names → canonical type diff --git a/docs/draft-flowchart-schema.md b/docs/draft-flowchart-schema.md index 8fd8c982..f3f84566 100644 --- a/docs/draft-flowchart-schema.md +++ b/docs/draft-flowchart-schema.md @@ -206,7 +206,7 @@ After `save_agent_draft` processes the input, it stores and emits an enriched dr "sub_agents": [], "flowchart_type": "start", "flowchart_shape": "stadium", - "flowchart_color": "#3fa66a" + "flowchart_color": "#8aad3f" }, { "id": "check-tier", @@ -246,9 +246,9 @@ After `save_agent_draft` processes the input, it stores and emits an enriched dr } ], "flowchart_legend": { - "start": { "shape": "stadium", "color": "#3fa66a" }, - "terminal": { "shape": "stadium", "color": "#a04444" }, - "process": { "shape": "rectangle", "color": "#616d83" }, + "start": { "shape": "stadium", "color": "#8aad3f" }, + "terminal": { "shape": "stadium", "color": "#b5453a" }, + "process": { "shape": "rectangle", "color": "#b5a575" }, "decision": { "shape": "diamond", "color": "#d89d26" } } } @@ -287,15 +287,15 @@ Returned by `GET /api/sessions/{id}/flowchart-map` after `confirm_and_build()` d | Type | Shape | Color | SVG Primitive | Description | |---|---|---|---|---| -| `start` | stadium | `#3fa66a` sage green | `` | Entry point / start terminator | -| `terminal` | stadium | `#a04444` dusty red | `` | End point / stop terminator | -| `process` | rectangle | `#616d83` blue-gray | `` | General processing step (default) | -| `decision` | diamond | `#d89d26` warm amber | `` 4-point | Branching / conditional logic | -| `io` | parallelogram | `#7a4fa5` dusty purple | `` skewed | Data input or output | -| `document` | document | `#507485` steel blue | `` wavy bottom | Document / report generation | -| `database` | cylinder | `#459077` muted teal | `` + `` | Database / data store | -| `subprocess` | subroutine | `#4c7f7f` dark cyan | `` + inner `` | Predefined process / sub-agent | -| `browser` | hexagon | `#3a4a9b` deep blue | `` 6-point | Browser automation (GCU node) | +| `start` | stadium | `#8aad3f` spring pollen | `` | Entry point / start terminator | +| `terminal` | stadium | `#b5453a` propolis red | `` | End point / stop terminator | +| `process` | rectangle | `#b5a575` warm wheat | `` | General processing step (default) | +| `decision` | diamond | `#d89d26` royal honey | `` 4-point | Branching / conditional logic | +| `io` | parallelogram | `#d06818` burnt orange | `` skewed | Data input or output | +| `document` | document | `#c4b830` goldenrod | `` wavy bottom | Document / report generation | +| `database` | cylinder | `#508878` sage teal | `` + `` | Database / data store | +| `subprocess` | subroutine | `#887a48` propolis gold | `` + inner `` | Predefined process / sub-agent | +| `browser` | hexagon | `#cc8850` honey copper | `` 6-point | Browser automation (GCU node) | --- From b77a3031fe887cca8793c66c099f9aa754e2dbbe Mon Sep 17 00:00:00 2001 From: bryan Date: Mon, 16 Mar 2026 17:27:28 -0700 Subject: [PATCH 29/45] refactor: update flowchart.json for templates --- .../competitive_intel_agent/flowchart.json | 28 +++++++++---------- .../deep_research_agent/flowchart.json | 22 +++++++-------- .../email_inbox_management/flowchart.json | 22 +++++++-------- .../email_reply_agent/flowchart.json | 22 +++++++-------- examples/templates/job_hunter/flowchart.json | 24 ++++++++-------- .../local_business_extractor/flowchart.json | 22 +++++++-------- .../meeting_scheduler/flowchart.json | 22 +++++++-------- .../tech_news_reporter/flowchart.json | 22 +++++++-------- .../twitter_news_agent/flowchart.json | 22 +++++++-------- .../vulnerability_assessment/flowchart.json | 24 ++++++++-------- 10 files changed, 115 insertions(+), 115 deletions(-) diff --git a/examples/templates/competitive_intel_agent/flowchart.json b/examples/templates/competitive_intel_agent/flowchart.json index f0e1111b..a2188935 100644 --- a/examples/templates/competitive_intel_agent/flowchart.json +++ b/examples/templates/competitive_intel_agent/flowchart.json @@ -34,7 +34,7 @@ "sub_agents": [], "flowchart_type": "start", "flowchart_shape": "stadium", - "flowchart_color": "#3fa66a" + "flowchart_color": "#8aad3f" }, { "id": "web-scraper", @@ -56,7 +56,7 @@ "sub_agents": [], "flowchart_type": "process", "flowchart_shape": "rectangle", - "flowchart_color": "#616d83" + "flowchart_color": "#b5a575" }, { "id": "news-search", @@ -100,7 +100,7 @@ "sub_agents": [], "flowchart_type": "process", "flowchart_shape": "rectangle", - "flowchart_color": "#616d83" + "flowchart_color": "#b5a575" }, { "id": "aggregator", @@ -125,7 +125,7 @@ "sub_agents": [], "flowchart_type": "database", "flowchart_shape": "cylinder", - "flowchart_color": "#459077" + "flowchart_color": "#508878" }, { "id": "analysis", @@ -151,7 +151,7 @@ "sub_agents": [], "flowchart_type": "database", "flowchart_shape": "cylinder", - "flowchart_color": "#459077" + "flowchart_color": "#508878" }, { "id": "report", @@ -177,7 +177,7 @@ "sub_agents": [], "flowchart_type": "terminal", "flowchart_shape": "stadium", - "flowchart_color": "#a04444" + "flowchart_color": "#b5453a" } ], "edges": [ @@ -245,15 +245,15 @@ "flowchart_legend": { "start": { "shape": "stadium", - "color": "#3fa66a" + "color": "#8aad3f" }, "terminal": { "shape": "stadium", - "color": "#a04444" + "color": "#b5453a" }, "process": { "shape": "rectangle", - "color": "#616d83" + "color": "#b5a575" }, "decision": { "shape": "diamond", @@ -261,23 +261,23 @@ }, "io": { "shape": "parallelogram", - "color": "#7a4fa5" + "color": "#d06818" }, "document": { "shape": "document", - "color": "#507485" + "color": "#c4b830" }, "database": { "shape": "cylinder", - "color": "#459077" + "color": "#508878" }, "subprocess": { "shape": "subroutine", - "color": "#4c7f7f" + "color": "#887a48" }, "browser": { "shape": "hexagon", - "color": "#3a4a9b" + "color": "#cc8850" } } }, diff --git a/examples/templates/deep_research_agent/flowchart.json b/examples/templates/deep_research_agent/flowchart.json index d3daef83..086dddfd 100644 --- a/examples/templates/deep_research_agent/flowchart.json +++ b/examples/templates/deep_research_agent/flowchart.json @@ -31,7 +31,7 @@ "sub_agents": [], "flowchart_type": "start", "flowchart_shape": "stadium", - "flowchart_color": "#3fa66a" + "flowchart_color": "#8aad3f" }, { "id": "research", @@ -59,7 +59,7 @@ "sub_agents": [], "flowchart_type": "database", "flowchart_shape": "cylinder", - "flowchart_color": "#459077" + "flowchart_color": "#508878" }, { "id": "review", @@ -108,7 +108,7 @@ "sub_agents": [], "flowchart_type": "terminal", "flowchart_shape": "stadium", - "flowchart_color": "#a04444" + "flowchart_color": "#b5453a" } ], "edges": [ @@ -168,15 +168,15 @@ "flowchart_legend": { "start": { "shape": "stadium", - "color": "#3fa66a" + "color": "#8aad3f" }, "terminal": { "shape": "stadium", - "color": "#a04444" + "color": "#b5453a" }, "process": { "shape": "rectangle", - "color": "#616d83" + "color": "#b5a575" }, "decision": { "shape": "diamond", @@ -184,23 +184,23 @@ }, "io": { "shape": "parallelogram", - "color": "#7a4fa5" + "color": "#d06818" }, "document": { "shape": "document", - "color": "#507485" + "color": "#c4b830" }, "database": { "shape": "cylinder", - "color": "#459077" + "color": "#508878" }, "subprocess": { "shape": "subroutine", - "color": "#4c7f7f" + "color": "#887a48" }, "browser": { "shape": "hexagon", - "color": "#3a4a9b" + "color": "#cc8850" } } }, diff --git a/examples/templates/email_inbox_management/flowchart.json b/examples/templates/email_inbox_management/flowchart.json index 20ceda15..a05e3442 100644 --- a/examples/templates/email_inbox_management/flowchart.json +++ b/examples/templates/email_inbox_management/flowchart.json @@ -36,7 +36,7 @@ "sub_agents": [], "flowchart_type": "start", "flowchart_shape": "stadium", - "flowchart_color": "#3fa66a" + "flowchart_color": "#8aad3f" }, { "id": "fetch-emails", @@ -61,7 +61,7 @@ "sub_agents": [], "flowchart_type": "process", "flowchart_shape": "rectangle", - "flowchart_color": "#616d83" + "flowchart_color": "#b5a575" }, { "id": "classify-and-act", @@ -113,7 +113,7 @@ "sub_agents": [], "flowchart_type": "terminal", "flowchart_shape": "stadium", - "flowchart_color": "#a04444" + "flowchart_color": "#b5453a" } ], "edges": [ @@ -165,15 +165,15 @@ "flowchart_legend": { "start": { "shape": "stadium", - "color": "#3fa66a" + "color": "#8aad3f" }, "terminal": { "shape": "stadium", - "color": "#a04444" + "color": "#b5453a" }, "process": { "shape": "rectangle", - "color": "#616d83" + "color": "#b5a575" }, "decision": { "shape": "diamond", @@ -181,23 +181,23 @@ }, "io": { "shape": "parallelogram", - "color": "#7a4fa5" + "color": "#d06818" }, "document": { "shape": "document", - "color": "#507485" + "color": "#c4b830" }, "database": { "shape": "cylinder", - "color": "#459077" + "color": "#508878" }, "subprocess": { "shape": "subroutine", - "color": "#4c7f7f" + "color": "#887a48" }, "browser": { "shape": "hexagon", - "color": "#3a4a9b" + "color": "#cc8850" } } }, diff --git a/examples/templates/email_reply_agent/flowchart.json b/examples/templates/email_reply_agent/flowchart.json index 0405ce83..a4e90082 100644 --- a/examples/templates/email_reply_agent/flowchart.json +++ b/examples/templates/email_reply_agent/flowchart.json @@ -30,7 +30,7 @@ "sub_agents": [], "flowchart_type": "start", "flowchart_shape": "stadium", - "flowchart_color": "#3fa66a" + "flowchart_color": "#8aad3f" }, { "id": "search", @@ -52,7 +52,7 @@ "sub_agents": [], "flowchart_type": "process", "flowchart_shape": "rectangle", - "flowchart_color": "#616d83" + "flowchart_color": "#b5a575" }, { "id": "confirm-draft", @@ -74,7 +74,7 @@ "sub_agents": [], "flowchart_type": "terminal", "flowchart_shape": "stadium", - "flowchart_color": "#a04444" + "flowchart_color": "#b5453a" } ], "edges": [ @@ -118,15 +118,15 @@ "flowchart_legend": { "start": { "shape": "stadium", - "color": "#3fa66a" + "color": "#8aad3f" }, "terminal": { "shape": "stadium", - "color": "#a04444" + "color": "#b5453a" }, "process": { "shape": "rectangle", - "color": "#616d83" + "color": "#b5a575" }, "decision": { "shape": "diamond", @@ -134,23 +134,23 @@ }, "io": { "shape": "parallelogram", - "color": "#7a4fa5" + "color": "#d06818" }, "document": { "shape": "document", - "color": "#507485" + "color": "#c4b830" }, "database": { "shape": "cylinder", - "color": "#459077" + "color": "#508878" }, "subprocess": { "shape": "subroutine", - "color": "#4c7f7f" + "color": "#887a48" }, "browser": { "shape": "hexagon", - "color": "#3a4a9b" + "color": "#cc8850" } } }, diff --git a/examples/templates/job_hunter/flowchart.json b/examples/templates/job_hunter/flowchart.json index 56a15d9a..1144f496 100644 --- a/examples/templates/job_hunter/flowchart.json +++ b/examples/templates/job_hunter/flowchart.json @@ -34,7 +34,7 @@ "sub_agents": [], "flowchart_type": "start", "flowchart_shape": "stadium", - "flowchart_color": "#3fa66a" + "flowchart_color": "#8aad3f" }, { "id": "job-search", @@ -54,7 +54,7 @@ "sub_agents": [], "flowchart_type": "process", "flowchart_shape": "rectangle", - "flowchart_color": "#616d83" + "flowchart_color": "#b5a575" }, { "id": "job-review", @@ -73,7 +73,7 @@ "sub_agents": [], "flowchart_type": "process", "flowchart_shape": "rectangle", - "flowchart_color": "#616d83" + "flowchart_color": "#b5a575" }, { "id": "customize", @@ -97,7 +97,7 @@ "sub_agents": [], "flowchart_type": "terminal", "flowchart_shape": "stadium", - "flowchart_color": "#a04444" + "flowchart_color": "#b5453a" } ], "edges": [ @@ -133,15 +133,15 @@ "flowchart_legend": { "start": { "shape": "stadium", - "color": "#3fa66a" + "color": "#8aad3f" }, "terminal": { "shape": "stadium", - "color": "#a04444" + "color": "#b5453a" }, "process": { "shape": "rectangle", - "color": "#616d83" + "color": "#b5a575" }, "decision": { "shape": "diamond", @@ -149,23 +149,23 @@ }, "io": { "shape": "parallelogram", - "color": "#7a4fa5" + "color": "#d06818" }, "document": { "shape": "document", - "color": "#507485" + "color": "#c4b830" }, "database": { "shape": "cylinder", - "color": "#459077" + "color": "#508878" }, "subprocess": { "shape": "subroutine", - "color": "#4c7f7f" + "color": "#887a48" }, "browser": { "shape": "hexagon", - "color": "#3a4a9b" + "color": "#cc8850" } } }, diff --git a/examples/templates/local_business_extractor/flowchart.json b/examples/templates/local_business_extractor/flowchart.json index f4df5155..e525c744 100644 --- a/examples/templates/local_business_extractor/flowchart.json +++ b/examples/templates/local_business_extractor/flowchart.json @@ -27,7 +27,7 @@ "sub_agents": [], "flowchart_type": "browser", "flowchart_shape": "hexagon", - "flowchart_color": "#3a4a9b" + "flowchart_color": "#cc8850" }, { "id": "extract-contacts", @@ -50,7 +50,7 @@ ], "flowchart_type": "subprocess", "flowchart_shape": "subroutine", - "flowchart_color": "#4c7f7f" + "flowchart_color": "#887a48" }, { "id": "sheets-sync", @@ -73,7 +73,7 @@ "sub_agents": [], "flowchart_type": "terminal", "flowchart_shape": "stadium", - "flowchart_color": "#a04444" + "flowchart_color": "#b5453a" } ], "edges": [ @@ -117,15 +117,15 @@ "flowchart_legend": { "start": { "shape": "stadium", - "color": "#3fa66a" + "color": "#8aad3f" }, "terminal": { "shape": "stadium", - "color": "#a04444" + "color": "#b5453a" }, "process": { "shape": "rectangle", - "color": "#616d83" + "color": "#b5a575" }, "decision": { "shape": "diamond", @@ -133,23 +133,23 @@ }, "io": { "shape": "parallelogram", - "color": "#7a4fa5" + "color": "#d06818" }, "document": { "shape": "document", - "color": "#507485" + "color": "#c4b830" }, "database": { "shape": "cylinder", - "color": "#459077" + "color": "#508878" }, "subprocess": { "shape": "subroutine", - "color": "#4c7f7f" + "color": "#887a48" }, "browser": { "shape": "hexagon", - "color": "#3a4a9b" + "color": "#cc8850" } } }, diff --git a/examples/templates/meeting_scheduler/flowchart.json b/examples/templates/meeting_scheduler/flowchart.json index 753967e5..c3c381d3 100644 --- a/examples/templates/meeting_scheduler/flowchart.json +++ b/examples/templates/meeting_scheduler/flowchart.json @@ -34,7 +34,7 @@ "sub_agents": [], "flowchart_type": "start", "flowchart_shape": "stadium", - "flowchart_color": "#3fa66a" + "flowchart_color": "#8aad3f" }, { "id": "schedule", @@ -66,7 +66,7 @@ "sub_agents": [], "flowchart_type": "io", "flowchart_shape": "parallelogram", - "flowchart_color": "#7a4fa5" + "flowchart_color": "#d06818" }, { "id": "confirm", @@ -86,7 +86,7 @@ "sub_agents": [], "flowchart_type": "terminal", "flowchart_shape": "stadium", - "flowchart_color": "#a04444" + "flowchart_color": "#b5453a" } ], "edges": [ @@ -122,15 +122,15 @@ "flowchart_legend": { "start": { "shape": "stadium", - "color": "#3fa66a" + "color": "#8aad3f" }, "terminal": { "shape": "stadium", - "color": "#a04444" + "color": "#b5453a" }, "process": { "shape": "rectangle", - "color": "#616d83" + "color": "#b5a575" }, "decision": { "shape": "diamond", @@ -138,23 +138,23 @@ }, "io": { "shape": "parallelogram", - "color": "#7a4fa5" + "color": "#d06818" }, "document": { "shape": "document", - "color": "#507485" + "color": "#c4b830" }, "database": { "shape": "cylinder", - "color": "#459077" + "color": "#508878" }, "subprocess": { "shape": "subroutine", - "color": "#4c7f7f" + "color": "#887a48" }, "browser": { "shape": "hexagon", - "color": "#3a4a9b" + "color": "#cc8850" } } }, diff --git a/examples/templates/tech_news_reporter/flowchart.json b/examples/templates/tech_news_reporter/flowchart.json index f36662e0..9a0d78a8 100644 --- a/examples/templates/tech_news_reporter/flowchart.json +++ b/examples/templates/tech_news_reporter/flowchart.json @@ -30,7 +30,7 @@ "sub_agents": [], "flowchart_type": "start", "flowchart_shape": "stadium", - "flowchart_color": "#3fa66a" + "flowchart_color": "#8aad3f" }, { "id": "research", @@ -50,7 +50,7 @@ "sub_agents": [], "flowchart_type": "process", "flowchart_shape": "rectangle", - "flowchart_color": "#616d83" + "flowchart_color": "#b5a575" }, { "id": "compile-report", @@ -72,7 +72,7 @@ "sub_agents": [], "flowchart_type": "terminal", "flowchart_shape": "stadium", - "flowchart_color": "#a04444" + "flowchart_color": "#b5453a" } ], "edges": [ @@ -100,15 +100,15 @@ "flowchart_legend": { "start": { "shape": "stadium", - "color": "#3fa66a" + "color": "#8aad3f" }, "terminal": { "shape": "stadium", - "color": "#a04444" + "color": "#b5453a" }, "process": { "shape": "rectangle", - "color": "#616d83" + "color": "#b5a575" }, "decision": { "shape": "diamond", @@ -116,23 +116,23 @@ }, "io": { "shape": "parallelogram", - "color": "#7a4fa5" + "color": "#d06818" }, "document": { "shape": "document", - "color": "#507485" + "color": "#c4b830" }, "database": { "shape": "cylinder", - "color": "#459077" + "color": "#508878" }, "subprocess": { "shape": "subroutine", - "color": "#4c7f7f" + "color": "#887a48" }, "browser": { "shape": "hexagon", - "color": "#3a4a9b" + "color": "#cc8850" } } }, diff --git a/examples/templates/twitter_news_agent/flowchart.json b/examples/templates/twitter_news_agent/flowchart.json index 95e495ee..422d5fb4 100644 --- a/examples/templates/twitter_news_agent/flowchart.json +++ b/examples/templates/twitter_news_agent/flowchart.json @@ -28,7 +28,7 @@ "sub_agents": [], "flowchart_type": "browser", "flowchart_shape": "hexagon", - "flowchart_color": "#3a4a9b" + "flowchart_color": "#cc8850" }, { "id": "process-news", @@ -53,7 +53,7 @@ ], "flowchart_type": "subprocess", "flowchart_shape": "subroutine", - "flowchart_color": "#4c7f7f" + "flowchart_color": "#887a48" }, { "id": "review-digest", @@ -72,7 +72,7 @@ "sub_agents": [], "flowchart_type": "terminal", "flowchart_shape": "stadium", - "flowchart_color": "#a04444" + "flowchart_color": "#b5453a" } ], "edges": [ @@ -124,15 +124,15 @@ "flowchart_legend": { "start": { "shape": "stadium", - "color": "#3fa66a" + "color": "#8aad3f" }, "terminal": { "shape": "stadium", - "color": "#a04444" + "color": "#b5453a" }, "process": { "shape": "rectangle", - "color": "#616d83" + "color": "#b5a575" }, "decision": { "shape": "diamond", @@ -140,23 +140,23 @@ }, "io": { "shape": "parallelogram", - "color": "#7a4fa5" + "color": "#d06818" }, "document": { "shape": "document", - "color": "#507485" + "color": "#c4b830" }, "database": { "shape": "cylinder", - "color": "#459077" + "color": "#508878" }, "subprocess": { "shape": "subroutine", - "color": "#4c7f7f" + "color": "#887a48" }, "browser": { "shape": "hexagon", - "color": "#3a4a9b" + "color": "#cc8850" } } }, diff --git a/examples/templates/vulnerability_assessment/flowchart.json b/examples/templates/vulnerability_assessment/flowchart.json index 169096e6..49007f5b 100644 --- a/examples/templates/vulnerability_assessment/flowchart.json +++ b/examples/templates/vulnerability_assessment/flowchart.json @@ -29,7 +29,7 @@ "sub_agents": [], "flowchart_type": "start", "flowchart_shape": "stadium", - "flowchart_color": "#3fa66a" + "flowchart_color": "#8aad3f" }, { "id": "passive-recon", @@ -55,7 +55,7 @@ "sub_agents": [], "flowchart_type": "process", "flowchart_shape": "rectangle", - "flowchart_color": "#616d83" + "flowchart_color": "#b5a575" }, { "id": "risk-scoring", @@ -75,7 +75,7 @@ "sub_agents": [], "flowchart_type": "process", "flowchart_shape": "rectangle", - "flowchart_color": "#616d83" + "flowchart_color": "#b5a575" }, { "id": "findings-review", @@ -121,7 +121,7 @@ "sub_agents": [], "flowchart_type": "terminal", "flowchart_shape": "stadium", - "flowchart_color": "#a04444" + "flowchart_color": "#b5453a" } ], "edges": [ @@ -181,15 +181,15 @@ "flowchart_legend": { "start": { "shape": "stadium", - "color": "#3fa66a" + "color": "#8aad3f" }, "terminal": { "shape": "stadium", - "color": "#a04444" + "color": "#b5453a" }, "process": { "shape": "rectangle", - "color": "#616d83" + "color": "#b5a575" }, "decision": { "shape": "diamond", @@ -197,23 +197,23 @@ }, "io": { "shape": "parallelogram", - "color": "#7a4fa5" + "color": "#d06818" }, "document": { "shape": "document", - "color": "#507485" + "color": "#c4b830" }, "database": { "shape": "cylinder", - "color": "#459077" + "color": "#508878" }, "subprocess": { "shape": "subroutine", - "color": "#4c7f7f" + "color": "#887a48" }, "browser": { "shape": "hexagon", - "color": "#3a4a9b" + "color": "#cc8850" } } }, From 379d3df46b0d2c915c34839f6bdfe2be74e152b7 Mon Sep 17 00:00:00 2001 From: Timothy Date: Mon, 16 Mar 2026 19:14:45 -0700 Subject: [PATCH 30/45] feat: file path first data passing --- core/framework/graph/event_loop_node.py | 345 ++++++++++++++++++++++-- 1 file changed, 318 insertions(+), 27 deletions(-) diff --git a/core/framework/graph/event_loop_node.py b/core/framework/graph/event_loop_node.py index 5961be91..27185419 100644 --- a/core/framework/graph/event_loop_node.py +++ b/core/framework/graph/event_loop_node.py @@ -202,6 +202,14 @@ class LoopConfig: max_tool_result_chars: int = 30_000 spillover_dir: str | None = None # Path string; created on first use + # --- set_output value spilling --- + # When a set_output value exceeds this character count it is auto-saved + # to a file in *spillover_dir* and the stored value is replaced with a + # lightweight file reference. This keeps shared memory / adapt.md / + # transition markers small and forces the next node to load the full + # data from the file. Set to 0 to disable. + max_output_value_chars: int = 2_000 + # --- Stream retry (transient error recovery within EventLoopNode) --- # When _run_single_turn() raises a transient error (network, rate limit, # server error), retry up to this many times with exponential backoff @@ -231,6 +239,12 @@ class LoopConfig: # blocking the entire event loop indefinitely. 0 = no timeout. tool_call_timeout_seconds: float = 60.0 + # --- Subagent delegation timeout --- + # Maximum seconds a delegate_to_sub_agent call may run before being + # killed. Subagents run a full event-loop so they naturally take + # longer than a single tool call — default is 10 minutes. 0 = no timeout. + subagent_timeout_seconds: float = 300.0 + # --- Lifecycle hooks --- # Hooks are async callables keyed by event name. Supported events: # "session_start" — fires once after the first user message is added, @@ -2182,6 +2196,59 @@ class EventLoopNode(NodeProtocol): except (json.JSONDecodeError, TypeError): pass key = tc.tool_input.get("key", "") + + # Auto-spill: save large values to data files and + # replace with a lightweight file reference so shared + # memory / adapt.md / transition markers stay small. + spill_dir = self._config.spillover_dir + max_val = self._config.max_output_value_chars + if max_val > 0 and spill_dir: + val_str = ( + json.dumps(value, ensure_ascii=False) + if not isinstance(value, str) + else value + ) + if len(val_str) > max_val: + spill_path = Path(spill_dir) + spill_path.mkdir(parents=True, exist_ok=True) + ext = ".json" if isinstance(value, (dict, list)) else ".txt" + filename = f"output_{key}{ext}" + write_content = ( + json.dumps(value, indent=2, ensure_ascii=False) + if isinstance(value, (dict, list)) + else str(value) + ) + (spill_path / filename).write_text( + write_content, encoding="utf-8" + ) + file_size = (spill_path / filename).stat().st_size + logger.info( + "set_output value auto-spilled: key=%s, " + "%d chars → %s (%d bytes)", + key, + len(val_str), + filename, + file_size, + ) + # Replace value with reference + value = ( + f"[Saved to '{filename}' ({file_size:,} bytes). " + f"Use load_data(filename='{filename}') " + f"to access full data.]" + ) + # Update tool result to inform the LLM + result = ToolResult( + tool_use_id=tc.tool_use_id, + content=( + f"Output '{key}' was large " + f"({len(val_str):,} chars) — data saved " + f"to '{filename}' ({file_size:,} bytes). " + f"The next phase will see the file " + f"reference and can load full data." + ), + is_error=False, + ) + await accumulator.set(key, value) self._record_learning(key, value) outputs_set_this_turn.append(key) @@ -2486,20 +2553,44 @@ class EventLoopNode(NodeProtocol): # Phase 2b: execute subagent delegations in parallel. if pending_subagent: + _subagent_timeout = self._config.subagent_timeout_seconds + async def _timed_subagent( _ctx: NodeContext, _tc: ToolCallEvent, _acc: OutputAccumulator = accumulator, + _timeout: float = _subagent_timeout, ) -> tuple[ToolResult | BaseException, str, float]: _s = time.time() _iso = datetime.now(UTC).isoformat() try: - _r = await self._execute_subagent( + _coro = self._execute_subagent( _ctx, _tc.tool_input.get("agent_id", ""), _tc.tool_input.get("task", ""), accumulator=_acc, ) + if _timeout > 0: + _r = await asyncio.wait_for(_coro, timeout=_timeout) + else: + _r = await _coro + except TimeoutError: + _agent_id = _tc.tool_input.get("agent_id", "unknown") + logger.warning( + "Subagent '%s' timed out after %.0fs", + _agent_id, + _timeout, + ) + _r = ToolResult( + tool_use_id=_tc.tool_use_id, + content=( + f"Subagent '{_agent_id}' timed out after " + f"{_timeout:.0f}s. The delegation took " + "too long and was cancelled. Try a simpler task " + "or break it into smaller pieces." + ), + is_error=True, + ) except BaseException as _exc: _r = _exc _dur = round(time.time() - _s, 3) @@ -2842,6 +2933,12 @@ class EventLoopNode(NodeProtocol): name="set_output", description=( "Set an output value for this node. Call once per output key. " + "Use this for brief notes, counts, status, and file references — " + "NOT for large data payloads. When a tool result was saved to a " + "data file, pass the filename as the value " + "(e.g. 'google_sheets_get_values_1.txt') so the next phase can " + "load the full data. Values exceeding ~2000 characters are " + "auto-saved to data files. " f"Valid keys: {output_keys}" ), parameters={ @@ -2854,7 +2951,10 @@ class EventLoopNode(NodeProtocol): }, "value": { "type": "string", - "description": "The output value to store.", + "description": ( + "The output value — a brief note, count, status, " + "or data filename reference." + ), }, }, "required": ["key", "value"], @@ -3496,6 +3596,127 @@ class EventLoopNode(NodeProtocol): self._spill_counter = max_n logger.info("Restored spill counter to %d from existing files", max_n) + # ------------------------------------------------------------------ + # JSON metadata / smart preview helpers for truncation + # ------------------------------------------------------------------ + + @staticmethod + def _extract_json_metadata( + parsed: Any, *, _depth: int = 0, _max_depth: int = 3 + ) -> str: + """Return a concise structural summary of parsed JSON. + + Reports key names, value types, and — crucially — array lengths so + the LLM knows how much data exists beyond the preview. + + Returns an empty string for simple scalars. + """ + if _depth >= _max_depth: + if isinstance(parsed, dict): + return f"dict with {len(parsed)} keys" + if isinstance(parsed, list): + return f"list of {len(parsed)} items" + return type(parsed).__name__ + + if isinstance(parsed, dict): + if not parsed: + return "empty dict" + lines: list[str] = [] + indent = " " * (_depth + 1) + for key, value in list(parsed.items())[:20]: + if isinstance(value, list): + line = f'{indent}"{key}": list of {len(value)} items' + if value: + first = value[0] + if isinstance(first, dict): + sample_keys = list(first.keys())[:10] + line += f" (each item: dict with keys {sample_keys})" + elif isinstance(first, list): + line += f" (each item: list of {len(first)} elements)" + lines.append(line) + elif isinstance(value, dict): + child = EventLoopNode._extract_json_metadata( + value, _depth=_depth + 1, _max_depth=_max_depth + ) + lines.append(f'{indent}"{key}": {child}') + else: + lines.append(f'{indent}"{key}": {type(value).__name__}') + if len(parsed) > 20: + lines.append(f"{indent}... and {len(parsed) - 20} more keys") + return "\n".join(lines) + + if isinstance(parsed, list): + if not parsed: + return "empty list" + desc = f"list of {len(parsed)} items" + first = parsed[0] + if isinstance(first, dict): + sample_keys = list(first.keys())[:10] + desc += f" (each item: dict with keys {sample_keys})" + elif isinstance(first, list): + desc += f" (each item: list of {len(first)} elements)" + return desc + + return "" + + @staticmethod + def _build_json_preview(parsed: Any, *, max_chars: int = 5000) -> str | None: + """Build a smart preview of parsed JSON, truncating large arrays. + + Shows first 3 + last 1 items of large arrays with explicit count + markers so the LLM cannot mistake the preview for the full dataset. + + Returns ``None`` if no truncation was needed (no large arrays). + """ + _LARGE_ARRAY_THRESHOLD = 10 + + def _truncate_arrays(obj: Any) -> tuple[Any, bool]: + """Return (truncated_copy, was_truncated).""" + if isinstance(obj, list) and len(obj) > _LARGE_ARRAY_THRESHOLD: + n = len(obj) + head = obj[:3] + tail = obj[-1:] + marker = f"... ({n - 4} more items omitted, {n} total) ..." + return head + [marker] + tail, True + if isinstance(obj, dict): + changed = False + out: dict[str, Any] = {} + for k, v in obj.items(): + new_v, did = _truncate_arrays(v) + out[k] = new_v + changed = changed or did + return (out, True) if changed else (obj, False) + return obj, False + + preview_obj, was_truncated = _truncate_arrays(parsed) + if not was_truncated: + return None # No large arrays — caller should use raw slicing + + try: + result = json.dumps(preview_obj, indent=2, ensure_ascii=False) + except (TypeError, ValueError): + return None + + if len(result) > max_chars: + # Even 3+1 items too big — try just 1 item + def _minimal_arrays(obj: Any) -> Any: + if isinstance(obj, list) and len(obj) > _LARGE_ARRAY_THRESHOLD: + n = len(obj) + return obj[:1] + [f"... ({n - 1} more items omitted, {n} total) ..."] + if isinstance(obj, dict): + return {k: _minimal_arrays(v) for k, v in obj.items()} + return obj + + preview_obj = _minimal_arrays(parsed) + try: + result = json.dumps(preview_obj, indent=2, ensure_ascii=False) + except (TypeError, ValueError): + return None + if len(result) > max_chars: + result = result[:max_chars] + "…" + + return result + def _truncate_tool_result( self, result: ToolResult, @@ -3524,15 +3745,38 @@ class EventLoopNode(NodeProtocol): if tool_name == "load_data": if limit <= 0 or len(result.content) <= limit: return result # Small load_data result — pass through as-is - # Large load_data result — truncate with pagination hint - preview_chars = max(limit - 300, limit // 2) - preview = result.content[:preview_chars] - truncated = ( - f"[{tool_name} result: {len(result.content)} chars — " - f"too large for context. Use offset/limit parameters " - f"to read smaller chunks.]\n\n" - f"Preview:\n{preview}…" + # Large load_data result — truncate with smart preview + PREVIEW_CAP = min(5000, max(limit - 500, limit // 2)) + + metadata_str = "" + smart_preview: str | None = None + try: + parsed_ld = json.loads(result.content) + metadata_str = self._extract_json_metadata(parsed_ld) + smart_preview = self._build_json_preview( + parsed_ld, max_chars=PREVIEW_CAP + ) + except (json.JSONDecodeError, TypeError, ValueError): + pass + + if smart_preview is not None: + preview_block = smart_preview + else: + preview_block = result.content[:PREVIEW_CAP] + "…" + + header = ( + f"[{tool_name} result: {len(result.content):,} chars — " + f"too large for context. Use offset_bytes/limit_bytes " + f"parameters to read smaller chunks.]" ) + if metadata_str: + header += f"\n\nData structure:\n{metadata_str}" + header += ( + "\n\nWARNING: This is an INCOMPLETE preview. " + "Do NOT draw conclusions or counts from it." + ) + + truncated = f"{header}\n\nPreview (small sample only):\n{preview_block}" logger.info( "%s result truncated: %d → %d chars (use offset/limit to paginate)", tool_name, @@ -3554,25 +3798,49 @@ class EventLoopNode(NodeProtocol): # Pretty-print JSON content so load_data's line-based # pagination works correctly. write_content = result.content + parsed_json: Any = None # track for metadata extraction try: - parsed = json.loads(result.content) - write_content = json.dumps(parsed, indent=2, ensure_ascii=False) + parsed_json = json.loads(result.content) + write_content = json.dumps(parsed_json, indent=2, ensure_ascii=False) except (json.JSONDecodeError, TypeError, ValueError): pass # Not JSON — write as-is (spill_path / filename).write_text(write_content, encoding="utf-8") if limit > 0 and len(result.content) > limit: - # Large result: preview + file reference - preview_chars = max(limit - 300, limit // 2) - preview = result.content[:preview_chars] - content = ( - f"[Result from {tool_name}: {len(result.content)} chars — " - f"too large for context, saved to '{filename}'. " - f"Use load_data(filename='{filename}') " - f"to read the full result.]\n\n" - f"Preview:\n{preview}…" + # Large result: build a small, metadata-rich preview so the + # LLM cannot mistake it for the complete dataset. + PREVIEW_CAP = 5000 + + # Extract structural metadata (array lengths, key names) + metadata_str = "" + smart_preview: str | None = None + if parsed_json is not None: + metadata_str = self._extract_json_metadata(parsed_json) + smart_preview = self._build_json_preview( + parsed_json, max_chars=PREVIEW_CAP + ) + + if smart_preview is not None: + preview_block = smart_preview + else: + preview_block = result.content[:PREVIEW_CAP] + "…" + + # Assemble header with structural info + warning + header = ( + f"[Result from {tool_name}: {len(result.content):,} chars — " + f"too large for context, saved to '{filename}'.]" ) + if metadata_str: + header += f"\n\nData structure:\n{metadata_str}" + header += ( + f"\n\nWARNING: The preview below is INCOMPLETE. " + f"Do NOT draw conclusions or counts from it. " + f"Use load_data(filename='{filename}') to read the " + f"full data before analysis." + ) + + content = f"{header}\n\nPreview (small sample only):\n{preview_block}" logger.info( "Tool result spilled to file: %s (%d chars → %s)", tool_name, @@ -3597,13 +3865,36 @@ class EventLoopNode(NodeProtocol): # No spillover_dir — truncate in-place if needed if limit > 0 and len(result.content) > limit: - preview_chars = max(limit - 300, limit // 2) - preview = result.content[:preview_chars] - truncated = ( - f"[Result from {tool_name}: {len(result.content)} chars — " - f"truncated to fit context budget. Only the first " - f"{preview_chars} chars are shown.]\n\n{preview}…" + PREVIEW_CAP = min(5000, max(limit - 500, limit // 2)) + + metadata_str = "" + smart_preview: str | None = None + try: + parsed_inline = json.loads(result.content) + metadata_str = self._extract_json_metadata(parsed_inline) + smart_preview = self._build_json_preview( + parsed_inline, max_chars=PREVIEW_CAP + ) + except (json.JSONDecodeError, TypeError, ValueError): + pass + + if smart_preview is not None: + preview_block = smart_preview + else: + preview_block = result.content[:PREVIEW_CAP] + "…" + + header = ( + f"[Result from {tool_name}: {len(result.content):,} chars — " + f"truncated to fit context budget.]" ) + if metadata_str: + header += f"\n\nData structure:\n{metadata_str}" + header += ( + "\n\nWARNING: This is an INCOMPLETE preview. " + "Do NOT draw conclusions or counts from the preview alone." + ) + + truncated = f"{header}\n\n{preview_block}" logger.info( "Tool result truncated in-place: %s (%d → %d chars)", tool_name, From b51e688d1aa0c1265ef088fed52df82c31306df9 Mon Sep 17 00:00:00 2001 From: bryan Date: Mon, 16 Mar 2026 19:17:16 -0700 Subject: [PATCH 31/45] feat: transition when loading --- core/framework/runner/tool_registry.py | 22 +++- core/frontend/src/components/DraftGraph.tsx | 133 +++++++++++++++++--- core/frontend/src/pages/workspace.tsx | 84 +++++++++++-- 3 files changed, 205 insertions(+), 34 deletions(-) diff --git a/core/framework/runner/tool_registry.py b/core/framework/runner/tool_registry.py index 04db6917..542099c6 100644 --- a/core/framework/runner/tool_registry.py +++ b/core/framework/runner/tool_registry.py @@ -455,11 +455,23 @@ class ToolRegistry: for server_config in server_list: server_config = self._resolve_mcp_server_config(server_config, base_dir) - try: - self.register_mcp_server(server_config) - except Exception as e: - name = server_config.get("name", "unknown") - logger.warning(f"Failed to register MCP server '{name}': {e}") + for _attempt in range(2): + try: + self.register_mcp_server(server_config) + break + except Exception as e: + name = server_config.get("name", "unknown") + if _attempt == 0: + logger.warning( + "MCP server '%s' failed to register, retrying in 2s: %s", + name, e, + ) + import time + time.sleep(2) + else: + logger.warning( + "MCP server '%s' failed after retry: %s", name, e + ) # Snapshot credential files and ADEN_API_KEY so we can detect mid-session changes self._mcp_cred_snapshot = self._snapshot_credentials() diff --git a/core/frontend/src/components/DraftGraph.tsx b/core/frontend/src/components/DraftGraph.tsx index d4f1fd4a..c70107fb 100644 --- a/core/frontend/src/components/DraftGraph.tsx +++ b/core/frontend/src/components/DraftGraph.tsx @@ -1,4 +1,4 @@ -import { useEffect, useMemo, useRef, useState, useCallback } from "react"; +import { useEffect, useLayoutEffect, useMemo, useRef, useState, useCallback } from "react"; import { Loader2 } from "lucide-react"; import type { DraftGraph as DraftGraphData, DraftNode } from "@/api/types"; import { RunButton } from "./RunButton"; @@ -74,6 +74,8 @@ type DraftNodeStatus = "pending" | "running" | "complete" | "error"; interface DraftGraphProps { draft: DraftGraphData | null; + /** The post-build originalDraft — animation fires when this changes to a new non-null value. */ + originalDraft?: DraftGraphData | null; onNodeClick?: (node: DraftNode) => void; /** Runtime node ID → list of original draft node IDs (post-dissolution mapping). */ flowchartMap?: Record; @@ -83,8 +85,8 @@ interface DraftGraphProps { onRuntimeNodeClick?: (runtimeNodeId: string) => void; /** True while the queen is building the agent from the draft. */ building?: boolean; - /** True while the queen is designing the draft (no draft yet). Shows a spinner. */ - loading?: boolean; + /** Message to show with a spinner while loading/designing. Null = no spinner. */ + loadingMessage?: string | null; /** Called when the user clicks Run. */ onRun?: () => void; /** Called when the user clicks Pause. */ @@ -231,7 +233,7 @@ function Tooltip({ node, style }: { node: DraftNode; style: React.CSSProperties ); } -export default function DraftGraph({ draft, onNodeClick, flowchartMap, runtimeNodes, onRuntimeNodeClick, building, loading, onRun, onPause, runState = "idle" }: DraftGraphProps) { +export default function DraftGraph({ draft, originalDraft, onNodeClick, flowchartMap, runtimeNodes, onRuntimeNodeClick, building, loadingMessage, onRun, onPause, runState = "idle" }: DraftGraphProps) { const [hoveredNode, setHoveredNode] = useState(null); const [mousePos, setMousePos] = useState<{ x: number; y: number } | null>(null); const containerRef = useRef(null); @@ -239,6 +241,37 @@ export default function DraftGraph({ draft, onNodeClick, flowchartMap, runtimeNo const [containerW, setContainerW] = useState(484); const chrome = useDraftChromeColors(); + // ── Entrance animation — fires when originalDraft becomes a new non-null value ── + // This covers: agent loaded, build finished, queen modifies flowchart. + // Tab switches remount via React key={activeWorker}, resetting all refs. + const prevOriginalDraft = useRef(null); + const pendingAnimation = useRef(false); + const [entrancePhase, setEntrancePhase] = useState<"idle" | "hidden" | "visible">("idle"); + + const nodes = draft?.nodes ?? []; + + useLayoutEffect(() => { + const prev = prevOriginalDraft.current; + prevOriginalDraft.current = originalDraft ?? null; + + // Detect a new non-null originalDraft (object identity — each API/SSE response is a fresh object) + if (originalDraft && originalDraft !== prev) { + pendingAnimation.current = true; + } + + // Fire when we have a pending animation, nodes are ready, and not mid-build + if (pendingAnimation.current && nodes.length > 0 && !building) { + pendingAnimation.current = false; + setEntrancePhase("hidden"); + let raf1 = 0, raf2 = 0; + raf1 = requestAnimationFrame(() => { + raf2 = requestAnimationFrame(() => setEntrancePhase("visible")); + }); + const t = setTimeout(() => setEntrancePhase("idle"), nodes.length * 120 + 1000); + return () => { clearTimeout(t); cancelAnimationFrame(raf1); cancelAnimationFrame(raf2); }; + } + }, [originalDraft, nodes.length, building]); + // Shift-to-pin tooltip const shiftHeld = useRef(false); useEffect(() => { @@ -339,7 +372,6 @@ export default function DraftGraph({ draft, onNodeClick, flowchartMap, runtimeNo const hasStatusOverlay = Object.keys(nodeStatuses).length > 0; - const nodes = draft?.nodes ?? []; const edges = draft?.edges ?? []; const idxMap = useMemo( @@ -413,6 +445,11 @@ export default function DraftGraph({ draft, onNodeClick, flowchartMap, runtimeNo layerGroups.forEach((group) => { maxCols = Math.max(maxCols, group.length); }); + // Ensure maxCols accommodates any parent's children fan-out + // (prevents fan-out scaling from collapsing to zero) + children.forEach((kids) => { + maxCols = Math.max(maxCols, kids.length); + }); // Compute node width — keep back-edge overflow out of node sizing so nodes // get full width. The viewBox is expanded later to fit back-edge curves. @@ -518,6 +555,17 @@ export default function DraftGraph({ draft, onNodeClick, flowchartMap, runtimeNo } } + // Post-process: enforce minimum spacing within each layer + for (const [, group] of layerGroups) { + if (group.length <= 1) continue; + const sorted = [...group].sort((a, b) => colPos[a] - colPos[b]); + for (let j = 1; j < sorted.length; j++) { + if (colPos[sorted[j]] < colPos[sorted[j - 1]] + 1) { + colPos[sorted[j]] = colPos[sorted[j - 1]] + 1; + } + } + } + // Convert fractional column positions to pixel X positions const colSpacing = nodeW + GAP_X; const usedMin = Math.min(...colPos); @@ -670,13 +718,13 @@ export default function DraftGraph({ draft, onNodeClick, flowchartMap, runtimeNo // Compute group areas for runtime node boundaries on the draft const groupAreas = useMemo(() => { - if (!flowchartMap || !runtimeNodes?.length) return []; + if (!flowchartMap) return []; const groups: { runtimeId: string; label: string; draftIds: string[] }[] = []; for (const [runtimeId, draftIds] of Object.entries(flowchartMap)) { groups.push({ runtimeId, label: formatNodeId(runtimeId), draftIds }); } return groups; - }, [flowchartMap, runtimeNodes]); + }, [flowchartMap]); // Legend const usedTypes = (() => { @@ -714,12 +762,27 @@ export default function DraftGraph({ draft, onNodeClick, flowchartMap, runtimeNo ? `M ${startX} ${y1} L ${toCenterX} ${y2}` : `M ${startX} ${y1} L ${startX} ${midY} L ${toCenterX} ${midY} L ${toCenterX} ${y2}`; + // Edge draw-in animation (stroke-dashoffset) + const isAnimating = entrancePhase !== "idle"; + const pathLength = Math.abs(y2 - y1) + Math.abs(startX - toCenterX) + 1; + const edgeDelay = 200 + i * 80; + const edgeStyle: React.CSSProperties | undefined = isAnimating ? { + strokeDasharray: pathLength, + strokeDashoffset: entrancePhase === "hidden" ? pathLength : 0, + transition: `stroke-dashoffset 400ms ease-in-out ${edgeDelay}ms`, + } : undefined; + const edgeEndStyle: React.CSSProperties | undefined = isAnimating ? { + opacity: entrancePhase === "hidden" ? 0 : 1, + transition: `opacity 100ms ease-out ${edgeDelay + 350}ms`, + } : undefined; + return ( - + {edge.label && ( {truncateLabel(edge.label, 80, 9)} @@ -751,12 +815,26 @@ export default function DraftGraph({ draft, onNodeClick, flowchartMap, runtimeNo const path = `M ${startX} ${startY} C ${startX + r} ${startY}, ${curveX} ${startY}, ${curveX} ${startY - r} L ${curveX} ${endY + r} C ${curveX} ${endY}, ${endX + r} ${endY}, ${endX + 5} ${endY}`; + // Back-edge draw-in animation (starts after forward edges) + const isAnimating = entrancePhase !== "idle"; + const backPathLength = Math.abs(curveX - startX) + Math.abs(startY - endY) + Math.abs(curveX - endX) + 20; + const backDelay = nodes.length * 120 + 300 + i * 80; + const backEdgeStyle: React.CSSProperties | undefined = isAnimating ? { + strokeDashoffset: entrancePhase === "hidden" ? backPathLength : 0, + transition: `stroke-dashoffset 400ms ease-in-out ${backDelay}ms`, + } : undefined; + const backEndStyle: React.CSSProperties | undefined = isAnimating ? { + opacity: entrancePhase === "hidden" ? 0 : 1, + transition: `opacity 100ms ease-out ${backDelay + 350}ms`, + } : undefined; + return ( - + ); @@ -800,7 +878,13 @@ export default function DraftGraph({ draft, onNodeClick, flowchartMap, runtimeNo if (rect) setMousePos({ x: e.clientX - rect.left, y: e.clientY - rect.top }); }} onMouseLeave={() => { if (!shiftHeld.current) { setHoveredNode(null); setMousePos(null); } }} - style={{ cursor: "pointer" }} + style={{ + cursor: "pointer", + ...(entrancePhase !== "idle" ? { + opacity: entrancePhase === "hidden" ? 0 : 1, + transition: `opacity 300ms ease-out ${i * 120}ms`, + } : {}), + }} >

Draft

- planning
- {loading || !draft ? ( + {loadingMessage ? ( <> -

Loading flowchart…

+

{loadingMessage}

) : (

@@ -878,6 +961,11 @@ export default function DraftGraph({ draft, onNodeClick, flowchartMap, runtimeNo building + ) : loadingMessage ? ( + + + updating + ) : ( {hasStatusOverlay ? "live" : "planning"} @@ -897,8 +985,12 @@ export default function DraftGraph({ draft, onNodeClick, flowchartMap, runtimeNo onMouseMove={handleMouseMove} onMouseUp={handleMouseUp} onMouseLeave={handleMouseUp} - className={`w-full h-full${building ? " opacity-30" : ""}`} - style={{ cursor: dragging ? "grabbing" : "grab" }} + className="w-full h-full" + style={{ + opacity: building || loadingMessage ? 0.3 : 1, + transition: building || loadingMessage ? "none" : "opacity 300ms ease-out", + cursor: dragging ? "grabbing" : "grab", + }} > )} + {!building && loadingMessage && ( +

+
+ +

{loadingMessage}

+
+
+ )} + {/* Zoom controls */}
+ )} + + )}
) : null; })()} @@ -3100,14 +3217,17 @@ export default function Workspace() { if (!sessionId) return; setTriggerTaskSaving(true); try { - await sessionsApi.updateTriggerTask(sessionId, triggerId, triggerTaskDraft); + await sessionsApi.updateTrigger(sessionId, triggerId, { task: triggerTaskDraft }); + patchTriggerNode(activeWorker, resolvedSelectedNode.id, { task: triggerTaskDraft }); + setTriggerTaskSaved(true); + setTimeout(() => setTriggerTaskSaved(false), 2000); } finally { setTriggerTaskSaving(false); } }} className="mt-1.5 w-full text-[11px] px-3 py-1.5 rounded-lg border border-primary/30 text-primary hover:bg-primary/10 transition-colors disabled:opacity-50" > - {triggerTaskSaving ? "Saving..." : "Save Task"} + {triggerTaskSaving ? "Saving..." : triggerTaskSaved ? "Saved" : "Save Task"} ); })()} From 22f5534f083e7ba4f7429434b7518fa29756f66c Mon Sep 17 00:00:00 2001 From: Antiarin Date: Tue, 17 Mar 2026 04:01:31 +0530 Subject: [PATCH 41/45] fix: ensure Queen calls remove_trigger when user asks to remove scheduler Added explicit prompt guidance requiring the Queen to call the remove_trigger tool instead of just saying "it's removed." --- core/framework/agents/queen/nodes/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/core/framework/agents/queen/nodes/__init__.py b/core/framework/agents/queen/nodes/__init__.py index 821d2d69..9b9d3e4f 100644 --- a/core/framework/agents/queen/nodes/__init__.py +++ b/core/framework/agents/queen/nodes/__init__.py @@ -1144,6 +1144,8 @@ Batch your response — do not call run_agent_with_input() once per trigger. config since last run), skip it and inform the user. - Never disable a trigger without telling the user. Use remove_trigger() only \ when explicitly asked or when the trigger is clearly obsolete. +- When the user asks to remove or disable a trigger, you MUST call remove_trigger(trigger_id). \ +Never just say "it's removed" without actually calling the tool. """ # -- Backward-compatible composed versions (used by queen_node.system_prompt default) -- From 738641d35f5b6e16a403d173f6cb123758a8862b Mon Sep 17 00:00:00 2001 From: Antiarin Date: Tue, 17 Mar 2026 05:42:20 +0530 Subject: [PATCH 42/45] fix: correct trigger target, label, and SSE event data - Add name and entry_node to all trigger SSE events (TRIGGER_AVAILABLE, TRIGGER_ACTIVATED, TRIGGER_DEACTIVATED) so frontend gets correct data immediately instead of guessing - Use ep.entry_node from backend in polling instead of guessing first non-trigger node - Compute cronToLabel from trigger config during polling so pill labels show human-readable schedule - Fix AsyncMock for event_bus.publish in tests --- core/framework/server/routes_sessions.py | 6 ++ core/framework/server/session_manager.py | 6 ++ core/framework/server/tests/test_api.py | 1 + core/framework/tools/queen_lifecycle_tools.py | 13 +++- core/frontend/src/lib/graphUtils.ts | 2 +- core/frontend/src/pages/workspace.tsx | 69 ++++++++++++++----- 6 files changed, 79 insertions(+), 18 deletions(-) diff --git a/core/framework/server/routes_sessions.py b/core/framework/server/routes_sessions.py index ea5927a0..8a2b3775 100644 --- a/core/framework/server/routes_sessions.py +++ b/core/framework/server/routes_sessions.py @@ -544,6 +544,12 @@ async def handle_update_trigger_task(request: web.Request) -> web.Response: "task": tdef.task, "trigger_config": tdef.trigger_config, "trigger_type": tdef.trigger_type, + "name": tdef.description or trigger_id, + "entry_node": getattr( + getattr(getattr(session, "runner", None), "graph", None), + "entry_node", + None, + ), }, ) ) diff --git a/core/framework/server/session_manager.py b/core/framework/server/session_manager.py index 40834719..ffd61121 100644 --- a/core/framework/server/session_manager.py +++ b/core/framework/server/session_manager.py @@ -868,6 +868,10 @@ class SessionManager: event_type = ( EventType.TRIGGER_AVAILABLE if kind == "available" else EventType.TRIGGER_REMOVED ) + # Resolve graph entry node for trigger target + runner = getattr(session, "runner", None) + graph_entry = runner.graph.entry_node if runner else None + for t in triggers.values(): await session.event_bus.publish( AgentEvent( @@ -877,6 +881,8 @@ class SessionManager: "trigger_id": t.id, "trigger_type": t.trigger_type, "trigger_config": t.trigger_config, + "name": t.description or t.id, + **({"entry_node": graph_entry} if graph_entry else {}), }, ) ) diff --git a/core/framework/server/tests/test_api.py b/core/framework/server/tests/test_api.py index cef10483..4815192f 100644 --- a/core/framework/server/tests/test_api.py +++ b/core/framework/server/tests/test_api.py @@ -174,6 +174,7 @@ def _make_session( runner.intro_message = "Test intro" mock_event_bus = MagicMock() + mock_event_bus.publish = AsyncMock() mock_llm = MagicMock() queen_executor = _make_queen_executor() if with_queen else None diff --git a/core/framework/tools/queen_lifecycle_tools.py b/core/framework/tools/queen_lifecycle_tools.py index a9c88362..e350ad8a 100644 --- a/core/framework/tools/queen_lifecycle_tools.py +++ b/core/framework/tools/queen_lifecycle_tools.py @@ -3692,6 +3692,8 @@ def register_queen_lifecycle_tools( _save_trigger_to_agent(session, trigger_id, tdef) bus = getattr(session, "event_bus", None) if bus: + _runner = getattr(session, "runner", None) + _graph_entry = _runner.graph.entry_node if _runner else None await bus.publish( AgentEvent( type=EventType.TRIGGER_ACTIVATED, @@ -3700,6 +3702,8 @@ def register_queen_lifecycle_tools( "trigger_id": trigger_id, "trigger_type": t_type, "trigger_config": t_config, + "name": tdef.description or trigger_id, + **({"entry_node": _graph_entry} if _graph_entry else {}), }, ) ) @@ -3752,6 +3756,8 @@ def register_queen_lifecycle_tools( # Emit event bus = getattr(session, "event_bus", None) if bus: + _runner = getattr(session, "runner", None) + _graph_entry = _runner.graph.entry_node if _runner else None await bus.publish( AgentEvent( type=EventType.TRIGGER_ACTIVATED, @@ -3760,6 +3766,8 @@ def register_queen_lifecycle_tools( "trigger_id": trigger_id, "trigger_type": t_type, "trigger_config": t_config, + "name": tdef.description or trigger_id, + **({"entry_node": _graph_entry} if _graph_entry else {}), }, ) ) @@ -3858,7 +3866,10 @@ def register_queen_lifecycle_tools( AgentEvent( type=EventType.TRIGGER_DEACTIVATED, stream_id="queen", - data={"trigger_id": trigger_id}, + data={ + "trigger_id": trigger_id, + "name": tdef.description or trigger_id if tdef else trigger_id, + }, ) ) diff --git a/core/frontend/src/lib/graphUtils.ts b/core/frontend/src/lib/graphUtils.ts index 3171cf09..44e9f741 100644 --- a/core/frontend/src/lib/graphUtils.ts +++ b/core/frontend/src/lib/graphUtils.ts @@ -51,7 +51,7 @@ export const TRIGGER_ICONS: Record = { /** Format a cron expression into a human-readable schedule label. */ export function cronToLabel(cron: string): string { const parts = cron.trim().split(/\s+/); - if (parts.length < 5) return cron; + if (parts.length !== 5) return cron; const [min, hour, dom, mon, dow] = parts; // */N * * * * -> "Every Nm" diff --git a/core/frontend/src/pages/workspace.tsx b/core/frontend/src/pages/workspace.tsx index a21c24fb..c68e4b7b 100644 --- a/core/frontend/src/pages/workspace.tsx +++ b/core/frontend/src/pages/workspace.tsx @@ -1265,12 +1265,28 @@ export default function Workspace() { const fireMap = new Map(); const taskMap = new Map(); + const labelMap = new Map(); + const targetMap = new Map(); for (const ep of triggerEps) { + const nodeId = `__trigger_${ep.id}`; if (ep.next_fire_in != null) { - fireMap.set(`__trigger_${ep.id}`, ep.next_fire_in); + fireMap.set(nodeId, ep.next_fire_in); } if (ep.task != null) { - taskMap.set(`__trigger_${ep.id}`, ep.task); + taskMap.set(nodeId, ep.task); + } + const cron = ep.trigger_config?.cron as string | undefined; + const interval = ep.trigger_config?.interval_minutes as number | undefined; + const epLabel = cron + ? cronToLabel(cron) + : interval + ? `Every ${interval >= 60 ? `${interval / 60}h` : `${interval}m`}` + : ep.name || undefined; + if (epLabel) { + labelMap.set(nodeId, epLabel); + } + if (ep.entry_node) { + targetMap.set(nodeId, ep.entry_node); } } @@ -1279,14 +1295,18 @@ export default function Workspace() { if (!ss?.length) return prev; const existingIds = new Set(ss[0].graphNodes.map(n => n.id)); - // Update existing trigger nodes + // Update existing trigger nodes (countdown, task, label, target) let updated = ss[0].graphNodes.map((n) => { if (n.nodeType !== "trigger") return n; const nfi = fireMap.get(n.id); const task = taskMap.get(n.id); - if (nfi == null && task == null) return n; + const label = labelMap.get(n.id); + const target = targetMap.get(n.id); + if (nfi == null && task == null && !label && !target) return n; return { ...n, + ...(label && label !== n.label ? { label } : {}), + ...(target ? { next: [target] } : {}), triggerConfig: { ...n.triggerConfig, ...(nfi != null ? { next_fire_in: nfi } : {}), @@ -1296,14 +1316,15 @@ export default function Workspace() { }); // Discover new triggers not yet in the graph - const entryNode = ss[0].graphNodes.find(n => n.nodeType !== "trigger")?.id; + const fallbackEntry = ss[0].graphNodes.find(n => n.nodeType !== "trigger")?.id; const newNodes: GraphNode[] = []; for (const ep of triggerEps) { const nodeId = `__trigger_${ep.id}`; if (existingIds.has(nodeId)) continue; + const target = ep.entry_node || fallbackEntry; newNodes.push({ id: nodeId, - label: ep.name || ep.id, + label: labelMap.get(nodeId) || ep.name || ep.id, status: "pending", nodeType: "trigger", triggerType: ep.trigger_type, @@ -1312,7 +1333,7 @@ export default function Workspace() { ...(ep.next_fire_in != null ? { next_fire_in: ep.next_fire_in } : {}), ...(ep.task ? { task: ep.task } : {}), }, - ...(entryNode ? { next: [entryNode] } : {}), + ...(target ? { next: [target] } : {}), }); } if (newNodes.length > 0) { @@ -2242,10 +2263,18 @@ export default function Workspace() { // Synthesize new trigger node at the front of the graph const triggerType = (event.data?.trigger_type as string) || "timer"; const triggerConfig = (event.data?.trigger_config as Record) || {}; - const entryNode = s.graphNodes.find(n => n.nodeType !== "trigger")?.id; + const entryNode = (event.data?.entry_node as string) || s.graphNodes.find(n => n.nodeType !== "trigger")?.id; + const triggerName = (event.data?.name as string) || triggerId; + const _cron = triggerConfig.cron as string | undefined; + const _interval = triggerConfig.interval_minutes as number | undefined; + const computedLabel = _cron + ? cronToLabel(_cron) + : _interval + ? `Every ${_interval >= 60 ? `${_interval / 60}h` : `${_interval}m`}` + : triggerName; const newNode: GraphNode = { id: nodeId, - label: triggerId, + label: computedLabel, status: "running", nodeType: "trigger", triggerType, @@ -2310,10 +2339,18 @@ export default function Workspace() { if (s.graphNodes.some(n => n.id === nodeId)) return s; const triggerType = (event.data?.trigger_type as string) || "timer"; const triggerConfig = (event.data?.trigger_config as Record) || {}; - const entryNode = s.graphNodes.find(n => n.nodeType !== "trigger")?.id; + const entryNode = (event.data?.entry_node as string) || s.graphNodes.find(n => n.nodeType !== "trigger")?.id; + const triggerName = (event.data?.name as string) || triggerId; + const _cron2 = triggerConfig.cron as string | undefined; + const _interval2 = triggerConfig.interval_minutes as number | undefined; + const computedLabel2 = _cron2 + ? cronToLabel(_cron2) + : _interval2 + ? `Every ${_interval2 >= 60 ? `${_interval2 / 60}h` : `${_interval2}m`}` + : triggerName; const newNode: GraphNode = { id: nodeId, - label: triggerId, + label: computedLabel2, status: "pending", nodeType: "trigger", triggerType, @@ -2441,7 +2478,7 @@ export default function Workspace() { if (n.id !== triggerNodeId) return n; return { ...n, - ...(patch.label ? { label: patch.label } : {}), + ...(patch.label !== undefined ? { label: patch.label } : {}), triggerConfig: { ...n.triggerConfig, ...(patch.trigger_config || {}), @@ -3150,9 +3187,9 @@ export default function Workspace() {

Edit the cron expression for this timer trigger.

- {cronChanged && ( + {(cronChanged || triggerCronSaved) && ( - -
- - - -""" -) - - -# ------------------------------------------------------------------------- -# WebSocket handler -# ------------------------------------------------------------------------- - - -async def handle_ws(websocket): - """Persistent WebSocket: long-lived EventLoopNode with client_facing blocking.""" - global STORE - - # -- Event forwarding (WebSocket ← EventBus) ---------------------------- - bus = EventBus() - - async def forward_event(event): - try: - payload = {"type": event.type.value, **event.data} - if event.node_id: - payload["node_id"] = event.node_id - await websocket.send(json.dumps(payload)) - except Exception: - pass - - bus.subscribe( - event_types=[ - EventType.NODE_LOOP_STARTED, - EventType.NODE_LOOP_ITERATION, - EventType.NODE_LOOP_COMPLETED, - EventType.LLM_TEXT_DELTA, - EventType.TOOL_CALL_STARTED, - EventType.TOOL_CALL_COMPLETED, - EventType.NODE_STALLED, - ], - handler=forward_event, - ) - - # -- Per-connection state ----------------------------------------------- - node = None - loop_task = None - - tools = list(TOOL_REGISTRY.get_tools().values()) - tool_executor = TOOL_REGISTRY.get_executor() - - node_spec = NodeSpec( - id="assistant", - name="Chat Assistant", - description="A conversational assistant that remembers context across messages", - node_type="event_loop", - client_facing=True, - system_prompt=( - "You are a helpful assistant with access to tools. " - "You can search the web, scrape webpages, and query HubSpot CRM. " - "Use tools when the user asks for current information or external data. " - "You have full conversation history, so you can reference previous messages." - ), - ) - - # -- Ready callback: subscribe to CLIENT_INPUT_REQUESTED on the bus --- - async def on_input_requested(event): - try: - await websocket.send(json.dumps({"type": "ready"})) - except Exception: - pass - - bus.subscribe( - event_types=[EventType.CLIENT_INPUT_REQUESTED], - handler=on_input_requested, - ) - - async def start_loop(first_message: str): - """Create an EventLoopNode and run it as a background task.""" - nonlocal node, loop_task - - memory = SharedMemory() - ctx = NodeContext( - runtime=RUNTIME, - node_id="assistant", - node_spec=node_spec, - memory=memory, - input_data={}, - llm=LLM, - available_tools=tools, - ) - node = EventLoopNode( - event_bus=bus, - config=LoopConfig(max_iterations=10_000, max_context_tokens=32_000), - conversation_store=STORE, - tool_executor=tool_executor, - ) - await node.inject_event(first_message) - - async def _run(): - try: - result = await node.execute(ctx) - try: - await websocket.send( - json.dumps( - { - "type": "result", - "success": result.success, - "output": result.output, - "error": result.error, - "tokens": result.tokens_used, - } - ) - ) - except Exception: - pass - logger.info(f"Loop ended: success={result.success}, tokens={result.tokens_used}") - except websockets.exceptions.ConnectionClosed: - logger.info("Loop stopped: WebSocket closed") - except Exception as e: - logger.exception("Loop error") - try: - await websocket.send( - json.dumps( - { - "type": "result", - "success": False, - "error": str(e), - "output": {}, - } - ) - ) - except Exception: - pass - - loop_task = asyncio.create_task(_run()) - - async def stop_loop(): - """Signal the node and wait for the loop task to finish.""" - nonlocal node, loop_task - if loop_task and not loop_task.done(): - if node: - node.signal_shutdown() - try: - await asyncio.wait_for(loop_task, timeout=5.0) - except (TimeoutError, asyncio.CancelledError): - loop_task.cancel() - node = None - loop_task = None - - # -- Message loop (runs for the lifetime of this WebSocket) ------------- - try: - async for raw in websocket: - try: - msg = json.loads(raw) - except Exception: - continue - - # Clear command - if msg.get("command") == "clear": - import shutil - - await stop_loop() - await STORE.close() - conv_dir = STORE_DIR / "conversation" - if conv_dir.exists(): - shutil.rmtree(conv_dir) - STORE = FileConversationStore(conv_dir) - await websocket.send(json.dumps({"type": "cleared"})) - logger.info("Conversation cleared") - continue - - topic = msg.get("topic", "") - if not topic: - continue - - if node is None: - # First message — spin up the loop - logger.info(f"Starting persistent loop: {topic}") - await start_loop(topic) - else: - # Subsequent message — inject into the running loop - logger.info(f"Injecting message: {topic}") - await node.inject_event(topic) - - except websockets.exceptions.ConnectionClosed: - pass - finally: - await stop_loop() - logger.info("WebSocket closed, loop stopped") - - -# ------------------------------------------------------------------------- -# HTTP handler for serving the HTML page -# ------------------------------------------------------------------------- - - -async def process_request(connection, request: Request): - """Serve HTML on GET /, upgrade to WebSocket on /ws.""" - if request.path == "/ws": - return None # let websockets handle the upgrade - # Serve the HTML page for any other path - return Response( - HTTPStatus.OK, - "OK", - websockets.Headers({"Content-Type": "text/html; charset=utf-8"}), - HTML_PAGE.encode(), - ) - - -# ------------------------------------------------------------------------- -# Main -# ------------------------------------------------------------------------- - - -async def main(): - port = 8765 - async with websockets.serve( - handle_ws, - "0.0.0.0", - port, - process_request=process_request, - ): - logger.info(f"Demo running at http://localhost:{port}") - logger.info("Open in your browser and enter a topic to research.") - await asyncio.Future() # run forever - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/core/demos/github_outreach_demo.py b/core/demos/github_outreach_demo.py deleted file mode 100644 index 7d6d9cf3..00000000 --- a/core/demos/github_outreach_demo.py +++ /dev/null @@ -1,2000 +0,0 @@ -#!/usr/bin/env python3 -""" -GitHub Outreach Pipeline Demo - -Demonstrates the full GraphExecutor framework: sequential pipeline, -fan-out/fan-in parallel execution, feedback/callback edges, and -client-facing HITL checkpoints — using a realistic GitHub outreach workflow. - -Pipeline: - Intake (HITL) → Scanner → [Profiler || Scorer] → Extractor - → Review (HITL) ⇄ Extractor (feedback) - → Campaign Builder → Approval (HITL) ⇄ Campaign Builder (feedback) - → Sender (terminal) - -Features demonstrated: -- Sequential pipeline (Intake → Scanner → ...) -- Fan-out / fan-in (Scanner → [Profiler, Scorer] → Extractor) -- Feedback edges (Review → Extractor, Approval → Campaign Builder) -- Client-facing HITL checkpoints (Intake, Review, Approval) -- SchemaJudge for output validation + native client_facing blocking for HITL -- max_node_visits for feedback loop control - -Usage: - cd /home/timothy/oss/hive/core - python demos/github_outreach_demo.py - - Then open http://localhost:8768 in your browser. -""" - -from __future__ import annotations - -import asyncio -import json -import logging -import os -import sys -import tempfile -from collections.abc import Callable -from http import HTTPStatus -from pathlib import Path -from typing import Any - -import httpx -import websockets -from pydantic import BaseModel, ValidationError -from websockets.http11 import Request, Response - -# Add core, tools, and hive root to path -_CORE_DIR = Path(__file__).resolve().parent.parent -_HIVE_DIR = _CORE_DIR.parent -sys.path.insert(0, str(_CORE_DIR)) -sys.path.insert(0, str(_HIVE_DIR / "tools" / "src")) -sys.path.insert(0, str(_HIVE_DIR)) - -from aden_tools.credentials import CREDENTIAL_SPECS, CredentialStoreAdapter # noqa: E402 -from core.framework.credentials import CredentialStore # noqa: E402 - -from framework.credentials.storage import ( # noqa: E402 - CompositeStorage, - EncryptedFileStorage, - EnvVarStorage, -) -from framework.graph.edge import EdgeCondition, EdgeSpec, GraphSpec # noqa: E402 -from framework.graph.event_loop_node import ( # noqa: E402 - EventLoopNode, - JudgeVerdict, - LoopConfig, -) -from framework.graph.executor import GraphExecutor # noqa: E402 -from framework.graph.goal import Goal # noqa: E402 -from framework.graph.node import NodeContext, NodeProtocol, NodeResult, NodeSpec # noqa: E402 -from framework.llm.litellm import LiteLLMProvider # noqa: E402 -from framework.runner.tool_registry import ToolRegistry # noqa: E402 -from framework.runtime.core import Runtime # noqa: E402 -from framework.runtime.event_bus import ( # noqa: E402 - AgentEvent, - EventBus, - EventType, -) - -logging.basicConfig(level=logging.INFO, format="%(asctime)s %(name)s %(message)s") -logger = logging.getLogger("github_outreach_demo") - -# ------------------------------------------------------------------------- -# Persistent state -# ------------------------------------------------------------------------- - -STORE_DIR = Path(tempfile.mkdtemp(prefix="hive_outreach_")) -RUNTIME = Runtime(STORE_DIR / "runtime") -LLM = LiteLLMProvider(model="claude-haiku-4-5-20251001") - -# ------------------------------------------------------------------------- -# Credentials -# ------------------------------------------------------------------------- - -_env_mapping = {name: spec.env_var for name, spec in CREDENTIAL_SPECS.items()} -_local_storage = CompositeStorage( - primary=EncryptedFileStorage(), - fallbacks=[EnvVarStorage(env_mapping=_env_mapping)], -) - -if os.environ.get("ADEN_API_KEY"): - try: - from framework.credentials.aden import ( # noqa: E402 - AdenCachedStorage, - AdenClientConfig, - AdenCredentialClient, - AdenSyncProvider, - ) - - _client = AdenCredentialClient(AdenClientConfig(base_url="https://api.adenhq.com")) - _provider = AdenSyncProvider(client=_client) - _storage = AdenCachedStorage( - local_storage=_local_storage, - aden_provider=_provider, - ) - _cred_store = CredentialStore(storage=_storage, providers=[_provider], auto_refresh=True) - _synced = _provider.sync_all(_cred_store) - logger.info("Synced %d credentials from Aden", _synced) - except Exception as e: - logger.warning("Aden sync unavailable: %s", e) - _cred_store = CredentialStore(storage=_local_storage) -else: - logger.info("ADEN_API_KEY not set, using local credential storage") - _cred_store = CredentialStore(storage=_local_storage) - -CREDENTIALS = CredentialStoreAdapter(_cred_store) - - -# ========================================================================= -# Pydantic Output Models (structural validation for hybrid judges) -# ========================================================================= - - -class IntakeOutput(BaseModel): - repo_url: str - project_url: str - scan_config: str - min_leads: str # Minimum number of leads to collect (numeric string) - - -class GitHubUser(BaseModel): - username: str - user_type: str # stargazer | contributor | issue_author - - -class ScannerOutput(BaseModel): - github_users: str # Filename of JSON file containing list[GitHubUser] - - -class UserProfile(BaseModel): - username: str - name: str | None = None - bio: str | None = None - company: str | None = None - languages: list[str] = [] - - -class ProfilerOutput(BaseModel): - user_profiles: str # Filename of JSON file containing list[UserProfile] - - -class RelevanceScore(BaseModel): - username: str - score: float - reasoning: str - - -class ScorerOutput(BaseModel): - relevance_scores: str # Filename of JSON file containing list[RelevanceScore] - - -class Contact(BaseModel): - username: str - name: str | None = None - email: str | None = None - twitter: str | None = None - relevance_score: float - - -class ExtractorOutput(BaseModel): - contact_list: str # Filename of JSON file containing list[Contact] - - -class DraftEmail(BaseModel): - recipient: str - subject: str - body: str - - -class CampaignOutput(BaseModel): - draft_emails: str # Filename of JSON file containing list[DraftEmail] - - -# ========================================================================= -# Tool Registration -# ========================================================================= - -TOOL_REGISTRY = ToolRegistry() - -# ---- MCP server tools (GitHub, web search, web scrape, email) ---- -# The aden-tools MCP server provides the official tool implementations. -# Tools are auto-discovered via register_mcp_server() and available to -# nodes through their NodeSpec.tools lists. - -_MCP_SERVER_PATH = str(_HIVE_DIR / "tools" / "mcp_server.py") - -_mcp_tool_count = TOOL_REGISTRY.register_mcp_server( - { - "name": "aden-tools", - "transport": "stdio", - "command": sys.executable, - "args": [_MCP_SERVER_PATH, "--stdio"], - "description": "Aden tools MCP server (GitHub, web search, email, etc.)", - } -) -logger.info("Registered %d tools from MCP server", _mcp_tool_count) - - -# ---- Demo-specific tools (context management for large data) ---- -# Agents use these to write large intermediate results to disk and pass -# filenames between nodes, keeping the LLM conversation context small. - -_DATA_DIR = STORE_DIR / "data" - - -def load_campaign_template() -> dict: - """Load the outreach campaign email template with placeholders.""" - return { - "template": ( - "Subject: {project_name} - Collaboration Opportunity\n\n" - "Hi {name},\n\n" - "I noticed your work on {user_repo_highlights} and thought " - "you might be interested in {project_name}. " - "{personalized_hook}\n\n" - "We'd love to have you involved. " - "Would you be open to a quick chat?\n\n" - "Best,\nThe {project_name} Team" - ), - } - - -def save_data(filename: str, data: str) -> dict: - """Save data to a file for later retrieval by this or downstream nodes. - - Use this to store large results (user lists, profiles, scores, emails) - instead of passing them inline through set_output. Returns a brief - summary with the filename to reference in set_output calls. - - Args: - filename: Simple filename like 'github_users.json'. No paths or '..'. - data: The string data to write (typically JSON). - """ - if not filename or ".." in filename or "/" in filename: - return {"error": "Invalid filename. Use simple names like 'users.json'"} - _DATA_DIR.mkdir(parents=True, exist_ok=True) - path = _DATA_DIR / filename - path.write_text(data, encoding="utf-8") - lines = data.count("\n") + 1 - return { - "success": True, - "filename": filename, - "size_bytes": len(data.encode("utf-8")), - "lines": lines, - "preview": data[:200] + ("..." if len(data) > 200 else ""), - } - - -def load_data(filename: str, offset: int = 0, limit: int = 50) -> dict: - """Load data from a previously saved file with pagination. - - Always returns a page of lines with metadata about the full file, - so you know the shape of the data without loading it all into context. - - Args: - filename: The filename that was used with save_data. - offset: 0-based line number to start reading from. Default 0. - limit: Max number of lines to return. Default 50. - - Examples: - load_data('users.json') # first 50 lines - load_data('users.json', offset=50, limit=50) # next 50 lines - load_data('users.json', limit=200) # first 200 lines - """ - if not filename or ".." in filename or "/" in filename: - return {"error": "Invalid filename"} - offset = int(offset) - limit = int(limit) - path = _DATA_DIR / filename - if not path.exists(): - return {"error": f"File not found: {filename}"} - content = path.read_text(encoding="utf-8") - size_bytes = len(content.encode("utf-8")) - - # If content is a single long line, try to pretty-print JSON so - # line-based pagination actually works. Handles spillover files - # written before the pretty-print fix in _truncate_tool_result(). - all_lines = content.split("\n") - if len(all_lines) <= 2 and size_bytes > 500: - try: - parsed = json.loads(content) - content = json.dumps(parsed, indent=2, ensure_ascii=False) - all_lines = content.split("\n") - except (json.JSONDecodeError, TypeError, ValueError): - pass # Not JSON — keep original lines - - total = len(all_lines) - - start = min(offset, total) - end = min(start + limit, total) - sliced = all_lines[start:end] - - return { - "success": True, - "filename": filename, - "content": "\n".join(sliced), - "total_lines": total, - "size_bytes": size_bytes, - "offset": start, - "lines_returned": len(sliced), - "has_more": end < total, - } - - -def list_data_files() -> dict: - """List all data files saved during this pipeline run. - - Use this to discover what intermediate results are available - from previous nodes in the pipeline. - """ - if not _DATA_DIR.exists(): - return {"files": []} - files = [] - for f in sorted(_DATA_DIR.iterdir()): - if f.is_file(): - files.append( - { - "filename": f.name, - "size_bytes": f.stat().st_size, - } - ) - return {"files": files} - - -TOOL_REGISTRY.register_function(load_campaign_template) -TOOL_REGISTRY.register_function(save_data) -TOOL_REGISTRY.register_function(load_data) -TOOL_REGISTRY.register_function(list_data_files) - -logger.info("Tools registered: %s", ", ".join(TOOL_REGISTRY.get_registered_names())) - - -# ========================================================================= -# Node Specifications -# ========================================================================= - -NODE_SPECS = { - "intake": NodeSpec( - id="intake", - name="Intake", - description="Gather repo URL, project URL, and scan configuration from the operator", - node_type="event_loop", - client_facing=True, - input_keys=[], - output_keys=["repo_url", "project_url", "scan_config", "min_leads"], - system_prompt=( - "You are the Intake agent for a GitHub outreach pipeline. " - "Your job is to collect four pieces of information from the operator:\n\n" - "1. **repo_url** — The GitHub repository URL to scan for potential contacts " - "(e.g., https://github.com/anthropics/claude-code)\n" - "2. **project_url** — The project URL we're promoting " - "(e.g., https://github.com/our-org/our-project)\n" - "3. **scan_config** — Scan parameters as a brief description " - "(e.g., 'stargazers and contributors, last 6 months')\n" - "4. **min_leads** — Minimum number of leads to collect (a number). " - "Ask the operator: 'How many leads do you need at minimum?' " - "If they say something vague like 'as many as possible', default to 50.\n\n" - "CRITICAL: Once you have all four, you MUST call the set_output tool for EACH " - "key. Call set_output(key='repo_url', value='...'), then " - "set_output(key='project_url', value='...'), etc. for all four keys.\n" - "Do NOT just say 'ready to proceed' or 'handing off' — the pipeline ONLY " - "advances when you make actual set_output tool calls.\n" - "For min_leads, set it as a plain number string (e.g., '100').\n" - "Be conversational but efficient. Ask for missing information if the operator " - "doesn't provide everything at once." - ), - ), - "scanner": NodeSpec( - id="scanner", - name="Scanner", - description="Scan a GitHub repository to find contributors, issue authors, and stargazers", - node_type="event_loop", - input_keys=["repo_url", "scan_config"], - output_keys=["github_users"], - tools=[ - "github_get_repo", - "github_list_issues", - "github_list_pull_requests", - "github_list_stargazers", - "execute_command_tool", - "save_data", - "load_data", - "list_data_files", - ], - system_prompt=( - "You are a GitHub Scanner agent. You receive a repository URL and scan config.\n\n" - "Parse the repository URL to extract the owner and repo name.\n" - "Then use the available GitHub tools to discover users:\n" - "1. Use github_get_repo to get repo metadata (stars, language, description)\n" - "2. Use github_list_issues to find issue authors (look at each issue's user.login)\n" - "3. Use github_list_pull_requests to find PR contributors " - "(look at each PR's user.login)\n" - "4. Use github_list_stargazers to find stargazers (each entry has a login field)\n\n" - "PAGINATION — CRITICAL:\n" - "Each GitHub list tool returns up to 100 items per page. You MUST paginate:\n" - " - Always call with limit=100 for maximum results per page.\n" - " - If a page returns 100 items, call again with page=2, page=3, etc.\n" - " - Stop when a page returns fewer than 100 items (that's the last page).\n" - " - Example: github_list_stargazers(owner, repo, page=1, limit=100)\n" - " github_list_stargazers(owner, repo, page=2, limit=100)\n\n" - "SPILLOVER RECOVERY:\n" - "Tool results larger than 8KB are truncated. When you see a message like\n" - " 'saved to tool_xxx.txt — Use load_data(filename) to read the full result'\n" - "you MUST call load_data(filename) to retrieve the full data. The truncated\n" - "preview only shows a fraction of the users. Use load_data with offset/limit\n" - "to page through the file: load_data(filename, offset=0, limit=50), then\n" - "offset=50, offset=100, etc. until has_more is false.\n\n" - "USER COLLECTION:\n" - "Collect unique usernames from all sources. Classify each user:\n" - "- 'contributor' if they authored a pull request\n" - "- 'issue_author' if they authored an issue\n" - "- 'stargazer' if they starred the repo\n" - "If a user appears in multiple categories, use the highest-value type " - "(contributor > issue_author > stargazer).\n\n" - "Respect the scan_config for any limits or filtering preferences.\n\n" - "SHELL TOOL:\n" - "You have execute_command_tool for running shell commands. Use it for file " - "management tasks like merging JSON files, deduplicating data, or processing " - "saved data with jq/python one-liners. Example:\n" - " execute_command_tool(command='cat data/file.json | python3 -c \"import sys,json; " - "d=json.load(sys.stdin); print(len(d))\"')\n\n" - "CONTEXT MANAGEMENT:\n" - "Format the result as a JSON array of objects with 'username' and 'user_type'.\n" - "Use save_data('github_users.json', ) to write the list to a file.\n" - "Then call set_output(key='github_users', value='github_users.json')\n" - "This passes a filename — not the raw data — to downstream nodes, " - "keeping the conversation context small." - ), - ), - "profiler": NodeSpec( - id="profiler", - name="Profiler", - description="Fetch detailed profiles for each discovered user", - node_type="event_loop", - input_keys=["github_users"], - output_keys=["user_profiles"], - tools=[ - "github_get_user_profile", - "github_list_repos", - "execute_command_tool", - "load_data", - "save_data", - ], - system_prompt=( - "You are a GitHub Profiler agent. Your input 'github_users' is a filename.\n\n" - "WORKFLOW:\n" - "1. Use load_data to read the user list from the input filename\n" - "2. For each user, call github_get_user_profile to get their profile, and " - "github_list_repos(username=) to list their public repos\n" - "3. Compile profiles into a JSON array with: username, name, bio, company, " - "languages (from repos)\n" - "4. Use save_data('user_profiles.json', ) to write results to a file\n" - "5. Call set_output(key='user_profiles', value='user_profiles.json')\n\n" - "SHELL TOOL:\n" - "You have execute_command_tool for running shell commands. Use it for file " - "management tasks like filtering JSON, extracting fields, or merging data. " - "Example: execute_command_tool(command='python3 -c \"import json; ...')\n\n" - "CONTEXT MANAGEMENT:\n" - "Start by loading the user list file. For large lists, use load_data with " - "offset and limit to page through users in batches " - "(e.g. load_data('github_users.json', offset=0, limit=20), then offset=20, etc.). " - "When done, save all profiles to a file and pass the filename — not the raw data." - ), - ), - "scorer": NodeSpec( - id="scorer", - name="Scorer", - description="Score each user's relevance to our project", - node_type="event_loop", - input_keys=["github_users", "project_url"], - output_keys=["relevance_scores"], - tools=["load_data", "save_data"], - system_prompt=( - "You are a Relevance Scorer agent. Your input 'github_users' is a filename " - "and 'project_url' is a URL string.\n\n" - "WORKFLOW:\n" - "1. Use load_data to read the user list from the github_users filename\n" - "2. For each user, assess relevance to the project based on:\n" - " - Their user_type (contributor > issue_author > stargazer)\n" - " - Assumed technical overlap with the project\n" - "3. Output a JSON array of objects with: username, score (0.0-1.0), reasoning\n" - "4. Use save_data('relevance_scores.json', ) to write results\n" - "5. Call set_output(key='relevance_scores', value='relevance_scores.json')\n\n" - "Score generously — this is a demo." - ), - ), - "extractor": NodeSpec( - id="extractor", - name="Extractor", - description="Build a curated contact list from profiles and scores", - node_type="event_loop", - input_keys=["user_profiles", "relevance_scores"], - output_keys=["contact_list"], - tools=[ - "github_get_user_emails", - "web_search", - "web_scrape", - "execute_command_tool", - "load_data", - "save_data", - ], - max_node_visits=3, - system_prompt=( - "You are a Contact Extractor agent. Your inputs 'user_profiles' and " - "'relevance_scores' are filenames pointing to JSON data files.\n\n" - "WORKFLOW:\n" - "1. Use load_data to read both files (profiles and scores). For large files, " - "use offset and limit to page through them incrementally " - "(e.g. load_data('file.json', offset=0, limit=30))\n" - "2. For each user with relevance score >= 0.3, enrich their contact info:\n" - " - Use github_get_user_emails(username) to find emails from their " - "public commits and profile (this is the BEST source for GitHub emails)\n" - " - If no email found, try web_search ('{username} github email')\n" - " - If results include personal sites, use web_scrape to extract details\n" - " - Look for: email addresses, Twitter/X handles, LinkedIn profiles\n" - "3. Compile a JSON array of contacts with: username, name, email, twitter, " - "relevance_score\n" - "4. Use save_data('contact_list.json', ) to write results\n" - "5. Call set_output(key='contact_list', value='contact_list.json')\n\n" - "SHELL TOOL:\n" - "You have execute_command_tool for running shell commands. Use it for file " - "management tasks like merging profiles and scores, deduplicating contacts, " - "or batch-processing data. Example:\n" - " execute_command_tool(command='python3 -c \"import json; profiles=json.load(" - 'open(\\"data/user_profiles.json\\")); print(len(profiles))"\')\n\n' - "Include all users who have at least one contact method." - ), - ), - "review": NodeSpec( - id="review", - name="Review", - description="Human operator reviews and approves the contact list", - node_type="event_loop", - client_facing=True, - input_keys=["contact_list"], - output_keys=["approved_contacts", "redo_extraction"], - nullable_output_keys=["approved_contacts", "redo_extraction"], - max_node_visits=3, - tools=["load_data", "save_data"], - system_prompt=( - "You are the Review agent at a human checkpoint. Your input 'contact_list' " - "is a filename pointing to a JSON data file.\n\n" - "WORKFLOW:\n" - "1. Use load_data to read the contact list from the file\n" - "2. Present the contacts to the operator in a clear, readable format\n" - " Show each contact with: name, username, email, twitter, relevance score\n" - "3. Ask the operator to either approve or request a redo\n" - "4. Based on the operator's response, call set_output EXACTLY ONCE:\n\n" - " IF APPROVED: save the approved list with save_data, then call:\n" - " set_output(key='approved_contacts', value='approved_contacts.json')\n\n" - " IF REDO REQUESTED: call:\n" - " set_output(key='redo_extraction', value='true')\n\n" - "CRITICAL RULE: Call set_output EXACTLY ONCE with EXACTLY ONE key.\n" - "NEVER call set_output twice. NEVER set both keys.\n" - "The two output keys are mutually exclusive — setting both will cause an error." - ), - ), - "campaign_builder": NodeSpec( - id="campaign_builder", - name="Campaign Builder", - description="Iteratively build personalized outreach emails in batches", - node_type="event_loop", - client_facing=True, - input_keys=["approved_contacts", "project_url"], - output_keys=["draft_emails"], - nullable_output_keys=["draft_emails"], - tools=["load_campaign_template", "load_data", "save_data"], - max_node_visits=5, - system_prompt=( - "You are the Campaign Builder agent. Your input 'approved_contacts' is a " - "filename and 'project_url' is a URL string.\n\n" - "ITERATIVE BATCH WORKFLOW:\n" - "You build draft emails in batches of up to 10 contacts at a time, " - "presenting each batch to the operator for review before continuing.\n\n" - "STEP 1 — SETUP:\n" - "- Use load_data to read the approved contacts from the file\n" - "- Load the campaign template using load_campaign_template\n" - "- Count total contacts and plan batches of up to 10\n\n" - "STEP 2 — DRAFT A BATCH (max 10 emails):\n" - "- For each contact in the current batch, write a personalized email:\n" - " * Fill in their name and relevant details from their profile\n" - " * Add a personalized hook based on their interests/contributions\n" - " * Format: {recipient, subject, body}\n" - "- Present the drafted emails to the operator clearly, showing:\n" - " * Batch number (e.g. 'Batch 1 of 4')\n" - " * Each email with recipient, subject, and body preview\n" - " * How many contacts remain\n\n" - "STEP 3 — ASK THE OPERATOR:\n" - "After presenting each batch, ask:\n" - " 'Create more drafts for the next batch, or submit all drafts " - "for outbound email?'\n\n" - " IF 'create more' → go back to STEP 2 for the next batch of contacts\n" - " IF 'submit' → go to STEP 4\n\n" - "STEP 4 — FINALIZE:\n" - "- Combine ALL drafted batches into a single JSON array\n" - "- Use save_data('draft_emails.json', ) to write them\n" - "- Call set_output(key='draft_emails', value='draft_emails.json')\n\n" - "RULES:\n" - "- Never draft more than 10 emails at once — always pause for operator input\n" - "- Keep a running total of emails drafted across batches\n" - "- Make each email feel personal and relevant\n" - "- Do NOT call set_output until the operator says to submit" - ), - ), - "approval": NodeSpec( - id="approval", - name="Approval", - description="Human operator reviews and approves campaign emails", - node_type="event_loop", - client_facing=True, - input_keys=["draft_emails"], - output_keys=["approved_emails", "revise_campaigns"], - nullable_output_keys=["approved_emails", "revise_campaigns"], - max_node_visits=3, - tools=["load_data", "save_data"], - system_prompt=( - "You are the Approval agent at the final human checkpoint. Your input " - "'draft_emails' is a filename pointing to a JSON data file.\n\n" - "WORKFLOW:\n" - "1. Use load_data to read the draft emails from the file\n" - "2. Present each email to the operator with: recipient, subject, and body\n" - "3. Ask the operator to either approve or request revision\n" - "4. Based on the operator's response, call set_output EXACTLY ONCE:\n\n" - " IF APPROVED: save the approved list with save_data, then call:\n" - " set_output(key='approved_emails', value='approved_emails.json')\n\n" - " IF REVISION REQUESTED: call:\n" - " set_output(key='revise_campaigns', value='true')\n\n" - "CRITICAL RULE: Call set_output EXACTLY ONCE with EXACTLY ONE key.\n" - "NEVER call set_output twice. NEVER set both keys.\n" - "The two output keys are mutually exclusive — setting both will cause an error." - ), - ), - "sender": NodeSpec( - id="sender", - name="Sender", - description="Send approved campaign emails", - node_type="event_loop", - input_keys=["approved_emails"], - output_keys=["send_results"], - ), -} - - -# ========================================================================= -# Edge + Graph Definitions -# ========================================================================= - -EDGES = [ - EdgeSpec( - id="intake_to_scanner", - source="intake", - target="scanner", - condition=EdgeCondition.ON_SUCCESS, - ), - # Fan-out: scanner → profiler AND scorer (both ON_SUCCESS) - EdgeSpec( - id="scanner_to_profiler", - source="scanner", - target="profiler", - condition=EdgeCondition.ON_SUCCESS, - ), - EdgeSpec( - id="scanner_to_scorer", - source="scanner", - target="scorer", - condition=EdgeCondition.ON_SUCCESS, - ), - # Fan-in: profiler → extractor AND scorer → extractor - EdgeSpec( - id="profiler_to_extractor", - source="profiler", - target="extractor", - condition=EdgeCondition.ON_SUCCESS, - ), - EdgeSpec( - id="scorer_to_extractor", - source="scorer", - target="extractor", - condition=EdgeCondition.ON_SUCCESS, - ), - # Extractor → Review - EdgeSpec( - id="extractor_to_review", - source="extractor", - target="review", - condition=EdgeCondition.ON_SUCCESS, - ), - # Review: forward to campaign_builder OR feedback to extractor - EdgeSpec( - id="review_to_campaign", - source="review", - target="campaign_builder", - condition=EdgeCondition.CONDITIONAL, - condition_expr="output.get('approved_contacts') is not None", - priority=1, - ), - EdgeSpec( - id="review_feedback", - source="review", - target="extractor", - condition=EdgeCondition.CONDITIONAL, - condition_expr="output.get('redo_extraction') is not None", - priority=-1, - ), - # Campaign Builder → Approval - EdgeSpec( - id="campaign_to_approval", - source="campaign_builder", - target="approval", - condition=EdgeCondition.ON_SUCCESS, - ), - # Approval: forward to sender OR feedback to campaign_builder - EdgeSpec( - id="approval_to_sender", - source="approval", - target="sender", - condition=EdgeCondition.CONDITIONAL, - condition_expr="output.get('approved_emails') is not None", - priority=1, - ), - EdgeSpec( - id="approval_feedback", - source="approval", - target="campaign_builder", - condition=EdgeCondition.CONDITIONAL, - condition_expr="output.get('revise_campaigns') is not None", - priority=-1, - ), -] - -GRAPH = GraphSpec( - id="github_outreach_pipeline", - goal_id="outreach_goal", - name="GitHub Outreach Pipeline", - entry_node="intake", - nodes=list(NODE_SPECS.values()), - edges=EDGES, - terminal_nodes=["sender"], - max_steps=30, - max_tokens=64000, -) - -GOAL = Goal( - id="outreach_goal", - name="GitHub Outreach Campaign", - description=( - "Scan a GitHub repository to identify potential collaborators, " - "profile and score them, build a curated contact list, " - "create personalized outreach emails, and send approved campaigns." - ), -) - - -# ========================================================================= -# Sender Function (terminal node) -# ========================================================================= - - -def _send_email_via_resend( - to: str, - subject: str, - html: str, - from_email: str, -) -> dict: - """Send a single email via the Resend REST API.""" - api_key = CREDENTIALS.get("resend") or os.getenv("RESEND_API_KEY") - if not api_key: - return {"error": "Resend API key not configured"} - - # Testing override: redirect all recipients to a single address - override_to = os.getenv("EMAIL_OVERRIDE_TO") - if override_to: - subject = f"[TEST -> {to}] {subject}" - to = override_to - - try: - resp = httpx.post( - "https://api.resend.com/emails", - headers={ - "Authorization": f"Bearer {api_key}", - "Content-Type": "application/json", - }, - json={ - "from": from_email, - "to": [to] if isinstance(to, str) else to, - "subject": subject, - "html": html, - }, - timeout=30.0, - ) - if resp.status_code == 200: - data = resp.json() - return { - "success": True, - "id": data.get("id", ""), - } - return { - "error": (f"Resend API ({resp.status_code}): {resp.text[:200]}"), - } - except httpx.TimeoutException: - return {"error": "Email send timed out"} - except httpx.RequestError as e: - return {"error": f"Network error: {e}"} - - -class SenderNode(NodeProtocol): - """Node wrapper for send_emails function.""" - - async def execute(self, ctx: NodeContext) -> NodeResult: - approved = ctx.input_data.get("approved_emails", "") - result_str = send_emails(approved_emails=approved) - ctx.memory.write("send_results", result_str) - return NodeResult(success=True, output={"send_results": result_str}) - - -def send_emails(approved_emails: str = "") -> str: - """Send approved campaign emails via Resend, or log if unconfigured. - - Returns a JSON string. - """ - approved = approved_emails - if not approved: - return json.dumps({"error": "No approved emails to send"}) - - # Load from file if the value is a filename (file-ops pattern) - if isinstance(approved, str) and not approved.strip().startswith("["): - data_path = _DATA_DIR / approved - if data_path.exists(): - approved = data_path.read_text(encoding="utf-8") - - try: - emails = json.loads(approved) if isinstance(approved, str) else approved - except (json.JSONDecodeError, TypeError): - emails = [{"recipient": "unknown", "status": "parse_error"}] - - from_email = os.getenv("EMAIL_FROM", "noreply@example.com") - has_resend = bool(CREDENTIALS.get("resend") or os.getenv("RESEND_API_KEY")) - - results = [] - for email in emails: - recipient = email.get("recipient", "unknown") - subject = email.get("subject", "") - body = email.get("body", "") - html_body = "
" + body.replace("\n", "
") + "
" - - if has_resend: - result = _send_email_via_resend( - to=recipient, - subject=subject, - html=html_body, - from_email=from_email, - ) - sent_ok = result.get("success", False) - status = "sent" if sent_ok else "failed" - msg_id = result.get("id", "") - error = result.get("error", "") - else: - status = "logged" - msg_id = f"mock_{len(results) + 1:03d}" - error = "" - logger.info( - "(mock) Would send to %s: %s", - recipient, - subject, - ) - - results.append( - { - "recipient": recipient, - "subject": subject, - "status": status, - "message_id": msg_id, - "error": error, - } - ) - if status == "sent": - logger.info( - "Sent to %s: %s", - recipient, - subject, - ) - - return json.dumps(results) - - -# ========================================================================= -# Judges (SchemaJudge for output validation) -# ========================================================================= - - -class SchemaJudge: - """Judge that validates event loop output against a Pydantic model. - - For internal (non-client-facing) nodes: - 1. Check if required output keys are set - 2. Validate accumulated values against Pydantic model - 3. Optionally load referenced files and check minimum item counts - 4. RETRY with structural feedback on validation failure - 5. ACCEPT on valid output - """ - - def __init__( - self, - output_model: type[BaseModel], - min_items: dict[str, int | Callable[[], int]] | None = None, - data_dir: Path | None = None, - ): - self._model = output_model - self._min_items = min_items or {} - self._data_dir = data_dir - - async def evaluate(self, context: dict) -> JudgeVerdict: - accumulator = context.get("output_accumulator", {}) - missing = context.get("missing_keys", []) - - if missing: - return JudgeVerdict( - action="RETRY", - feedback=f"Missing output keys: {missing}. Use set_output to provide them.", - ) - - # Try to validate against schema - try: - parsed = {} - for key, value in accumulator.items(): - if value is None: - continue - if isinstance(value, str): - stripped = value.strip() - if stripped and stripped[0] in ("{", "["): - try: - parsed[key] = json.loads(value) - except json.JSONDecodeError: - parsed[key] = value - else: - parsed[key] = value - else: - parsed[key] = value - self._model.model_validate(parsed) - except ValidationError as e: - errors = "; ".join( - f"{'.'.join(str(x) for x in err['loc'])}: {err['msg']}" for err in e.errors() - ) - return JudgeVerdict( - action="RETRY", - feedback=f"Output schema validation failed: {errors}. Fix and re-set outputs.", - ) - - # Check minimum item counts for file-based outputs - for key, min_count_or_fn in self._min_items.items(): - min_count = min_count_or_fn() if callable(min_count_or_fn) else min_count_or_fn - value = accumulator.get(key) - if not value or not self._data_dir: - continue - fpath = self._data_dir / value - if not fpath.exists(): - return JudgeVerdict( - action="RETRY", - feedback=f"Output file '{value}' not found. Use save_data to create it.", - ) - try: - content = fpath.read_text(encoding="utf-8") - data = json.loads(content) - if isinstance(data, list) and len(data) < min_count: - return JudgeVerdict( - action="RETRY", - feedback=( - f"Insufficient results: '{value}' contains {len(data)} items " - f"but at least {min_count} are required. " - f"Use pagination (page=1, page=2, ...) on the GitHub tools " - f"and load_data on spillover files to collect more users." - ), - ) - except (json.JSONDecodeError, OSError): - pass # File exists but can't be parsed — let it through - - return JudgeVerdict(action="ACCEPT") - - -# ========================================================================= -# HTML Page (embedded) -# ========================================================================= - -HTML_PAGE = """ - - - - -GitHub Outreach Pipeline - - - -
-

GitHub Outreach Pipeline

- Intake - Scanner - Profiler - Scorer - Extractor - Review - Campaign - Approval - Sender - Ready - -
- -
-
-
-
Pipeline Graph
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - Intake - - - - Scanner - - - - Profiler - - - - Scorer - - - - Extractor - - - - Review - - - - Campaign Builder - - - - Approval - - - - Sender - - - - - - - - - - - - -
- Client-facing (HITL) - Internal - Parallel - Terminal -
-
-
- -
- - -
- - - -""" - - -# ========================================================================= -# WebSocket Handler — Pipeline Orchestrator -# ========================================================================= - - -async def handle_ws(websocket): - """Handle WebSocket connections for the outreach pipeline demo. - - Uses a single recv() for the start message instead of ``async for`` - so the websocket's recv lock is released before _run_pipeline creates - its own reader task. (Two concurrent recv calls raise ConcurrencyError.) - """ - try: - raw = await websocket.recv() - try: - msg = json.loads(raw) - except Exception: - return - - if msg.get("type") != "start": - return - - try: - await _run_pipeline(websocket, msg.get("message", "")) - except websockets.exceptions.ConnectionClosed: - logger.info("WebSocket closed during pipeline") - except Exception as e: - logger.exception("Pipeline error") - try: - await websocket.send(json.dumps({"type": "error", "message": str(e)})) - except Exception: - pass - except websockets.exceptions.ConnectionClosed: - pass - - -async def _run_pipeline(websocket, initial_message: str): - """Execute the GitHub outreach pipeline through GraphExecutor.""" - bus = EventBus() - - # State for routing user messages to active client-facing node - active_node: EventLoopNode | None = None - pending_messages: list[str] = [] - - # --- Shared pipeline config (updated by Intake, read by judges) --- - - pipeline_config: dict[str, Any] = {"min_leads": 50} # default - - # --- Build judges --- - # Client-facing nodes (intake, review, approval) have client_facing=True - # in their NodeSpec — EventLoopNode blocks for user input natively. - # Only nodes that need output schema validation get a judge. - - # Intake needs schema validation for its structured outputs - client_judges: dict[str, SchemaJudge] = { - "intake": SchemaJudge(IntakeOutput), - } - # review & approval: no judge — implicit judge checks output keys, - # and client_facing blocking is handled by the node itself. - - # Internal: SchemaJudge (validates output structure) - schema_judges: dict[str, SchemaJudge] = { - "scanner": SchemaJudge( - ScannerOutput, - min_items={"github_users": lambda: pipeline_config["min_leads"]}, - data_dir=_DATA_DIR, - ), - "profiler": SchemaJudge(ProfilerOutput), - "scorer": SchemaJudge(ScorerOutput), - "extractor": SchemaJudge(ExtractorOutput), - # campaign_builder is now client_facing — implicit judge + native - # blocking handle termination (same pattern as review/approval). - } - - all_judges: dict = {**client_judges, **schema_judges} - - # --- Build EventLoopNode for each event_loop node --- - - tool_executor = TOOL_REGISTRY.get_executor() - all_tools = list(TOOL_REGISTRY.get_tools().values()) - nodes: dict[str, EventLoopNode] = {} - - for nid, spec in NODE_SPECS.items(): - if spec.node_type != "event_loop": - continue - judge = all_judges.get(nid) - node = EventLoopNode( - event_bus=bus, - judge=judge, - config=LoopConfig( - max_iterations=30, - max_tool_calls_per_turn=30, - max_context_tokens=64000, - max_tool_result_chars=8_000, - spillover_dir=str(_DATA_DIR), - ), - conversation_store=None, - tool_executor=tool_executor if spec.tools else None, - ) - nodes[nid] = node - - # --- Build GraphExecutor and register all nodes --- - - executor = GraphExecutor( - runtime=RUNTIME, - llm=LLM, - tools=all_tools, - tool_executor=tool_executor, - enable_parallel_execution=True, - ) - for nid, impl in nodes.items(): - executor.register_node(nid, impl) - executor.register_node("sender", SenderNode()) - - # --- Event forwarding: bus → WebSocket --- - - async def forward_event(event: AgentEvent): - try: - payload = {"type": event.type.value, **event.data} - if event.node_id: - payload["node_id"] = event.node_id - - # Remap CUSTOM events to their custom_type - if event.type == EventType.CUSTOM and "custom_type" in event.data: - payload["type"] = event.data["custom_type"] - - # Remap CLIENT_INPUT_REQUESTED to awaiting_input for JS compatibility - if event.type == EventType.CLIENT_INPUT_REQUESTED: - payload["type"] = "awaiting_input" - - await websocket.send(json.dumps(payload)) - except Exception: - pass - - bus.subscribe( - event_types=[ - EventType.NODE_LOOP_STARTED, - EventType.NODE_LOOP_ITERATION, - EventType.NODE_LOOP_COMPLETED, - EventType.LLM_TEXT_DELTA, - EventType.TOOL_CALL_STARTED, - EventType.TOOL_CALL_COMPLETED, - EventType.CLIENT_OUTPUT_DELTA, - EventType.CLIENT_INPUT_REQUESTED, - EventType.NODE_STALLED, - EventType.CUSTOM, - ], - handler=forward_event, - ) - - # --- Track active client-facing node for message routing --- - # EventLoopNode publishes CLIENT_INPUT_REQUESTED when a client_facing - # node blocks for user input (native blocking, no judge needed). - - CLIENT_FACING_NODES = {"intake", "review", "approval"} - - async def on_awaiting_input(event: AgentEvent): - nonlocal active_node - nid = event.node_id or "" - if nid not in CLIENT_FACING_NODES: - return - active_node = nodes.get(nid) - logger.info("Active HITL node: %s", nid) - # Deliver any pending messages - while pending_messages: - msg_text = pending_messages.pop(0) - if active_node: - await active_node.inject_event(msg_text) - - bus.subscribe(event_types=[EventType.CLIENT_INPUT_REQUESTED], handler=on_awaiting_input) - - # --- Capture min_leads from Intake's set_output tool call --- - - async def on_tool_started(event: AgentEvent): - if event.node_id != "intake": - return - if event.data.get("tool_name") != "set_output": - return - tool_input = event.data.get("tool_input", {}) - if tool_input.get("key") != "min_leads": - return - raw = tool_input.get("value", "") - try: - val = int(raw) - if val > 0: - pipeline_config["min_leads"] = val - logger.info("Scanner min_leads set to %d (from user)", val) - except (ValueError, TypeError): - logger.info( - "No valid min_leads from user, using default %d", - pipeline_config["min_leads"], - ) - - bus.subscribe(event_types=[EventType.TOOL_CALL_STARTED], handler=on_tool_started) - - # --- Inject initial user message into intake node --- - - if initial_message: - await nodes["intake"].inject_event(initial_message) - - # --- Run pipeline as background task --- - - pipeline_task = asyncio.create_task(executor.execute(GRAPH, GOAL, input_data={})) - - # --- WS message loop: route incoming messages to active node --- - - async def ws_reader(): - try: - async for raw in websocket: - try: - msg = json.loads(raw) - except Exception: - continue - text = msg.get("message", "") - if not text: - continue - - # Forward to browser as user bubble - await websocket.send( - json.dumps( - { - "type": "user_message", - "content": text, - } - ) - ) - - if active_node: - await active_node.inject_event(text) - else: - pending_messages.append(text) - except websockets.exceptions.ConnectionClosed: - pass - - reader_task = asyncio.create_task(ws_reader()) - - # --- Wait for pipeline to complete --- - - try: - result = await asyncio.wait_for(pipeline_task, timeout=1800) - except TimeoutError: - for nid in CLIENT_FACING_NODES: - if nid in nodes: - nodes[nid].signal_shutdown() - reader_task.cancel() - await websocket.send( - json.dumps({"type": "error", "message": "Pipeline timed out (10 min)"}) - ) - return - except Exception as e: - reader_task.cancel() - await websocket.send(json.dumps({"type": "error", "message": str(e)})) - return - - reader_task.cancel() - - # --- Send final result --- - - send_results = result.output.get("send_results", "") - await websocket.send( - json.dumps( - { - "type": "pipeline_done", - "success": result.success, - "send_results": send_results, - "total_tokens": result.total_tokens, - "steps": result.steps_executed, - "path": result.path, - "node_visit_counts": result.node_visit_counts, - "error": result.error, - } - ) - ) - - logger.info( - "Pipeline complete: success=%s, steps=%d, tokens=%d, path=%s, error=%s", - result.success, - result.steps_executed, - result.total_tokens, - " -> ".join(result.path), - result.error, - ) - - -# ========================================================================= -# HTTP Handler -# ========================================================================= - - -async def process_request(connection, request: Request): - """Serve HTML on GET /, upgrade to WebSocket on /ws.""" - if request.path == "/ws": - return None - return Response( - HTTPStatus.OK, - "OK", - websockets.Headers({"Content-Type": "text/html; charset=utf-8"}), - HTML_PAGE.encode(), - ) - - -# ========================================================================= -# Main -# ========================================================================= - - -async def main(): - port = 8768 - async with websockets.serve( - handle_ws, - "0.0.0.0", - port, - process_request=process_request, - ): - logger.info(f"GitHub Outreach Pipeline demo running at http://localhost:{port}") - logger.info("Open in your browser to start the pipeline.") - await asyncio.Future() - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/core/demos/handoff_demo.py b/core/demos/handoff_demo.py deleted file mode 100644 index 68e04624..00000000 --- a/core/demos/handoff_demo.py +++ /dev/null @@ -1,930 +0,0 @@ -#!/usr/bin/env python3 -""" -Two-Node ContextHandoff Demo - -Demonstrates ContextHandoff between two EventLoopNode instances: - Node A (Researcher) → ContextHandoff → Node B (Analyst) - -Real LLM, real FileConversationStore, real EventBus. -Streams both nodes to a browser via WebSocket. - -Usage: - cd /home/timothy/oss/hive/core - python demos/handoff_demo.py - - Then open http://localhost:8766 in your browser. -""" - -import asyncio -import json -import logging -import sys -import tempfile -from http import HTTPStatus -from pathlib import Path - -import httpx -import websockets -from bs4 import BeautifulSoup -from websockets.http11 import Request, Response - -# Add core, tools, and hive root to path -_CORE_DIR = Path(__file__).resolve().parent.parent -_HIVE_DIR = _CORE_DIR.parent -sys.path.insert(0, str(_CORE_DIR)) # framework.* -sys.path.insert(0, str(_HIVE_DIR / "tools" / "src")) # aden_tools.* -sys.path.insert(0, str(_HIVE_DIR)) # core.framework.* (for aden_tools imports) - -from aden_tools.credentials import CREDENTIAL_SPECS, CredentialStoreAdapter # noqa: E402 -from core.framework.credentials import CredentialStore # noqa: E402 - -from framework.credentials.storage import ( # noqa: E402 - CompositeStorage, - EncryptedFileStorage, - EnvVarStorage, -) -from framework.graph.context_handoff import ContextHandoff # noqa: E402 -from framework.graph.conversation import NodeConversation # noqa: E402 -from framework.graph.event_loop_node import EventLoopNode, LoopConfig # noqa: E402 -from framework.graph.node import NodeContext, NodeSpec, SharedMemory # noqa: E402 -from framework.llm.litellm import LiteLLMProvider # noqa: E402 -from framework.llm.provider import Tool # noqa: E402 -from framework.runner.tool_registry import ToolRegistry # noqa: E402 -from framework.runtime.core import Runtime # noqa: E402 -from framework.runtime.event_bus import EventBus, EventType # noqa: E402 -from framework.storage.conversation_store import FileConversationStore # noqa: E402 - -logging.basicConfig(level=logging.INFO, format="%(asctime)s %(name)s %(message)s") -logger = logging.getLogger("handoff_demo") - -# ------------------------------------------------------------------------- -# Persistent state -# ------------------------------------------------------------------------- - -STORE_DIR = Path(tempfile.mkdtemp(prefix="hive_handoff_")) -RUNTIME = Runtime(STORE_DIR / "runtime") -LLM = LiteLLMProvider(model="claude-sonnet-4-5-20250929") - -# ------------------------------------------------------------------------- -# Credentials -# ------------------------------------------------------------------------- - -# Composite credential store: encrypted files (primary) + env vars (fallback) -_env_mapping = {name: spec.env_var for name, spec in CREDENTIAL_SPECS.items()} -_composite = CompositeStorage( - primary=EncryptedFileStorage(), - fallbacks=[EnvVarStorage(env_mapping=_env_mapping)], -) -CREDENTIALS = CredentialStoreAdapter(CredentialStore(storage=_composite)) - -for _name in ["brave_search", "hubspot"]: - _val = CREDENTIALS.get(_name) - if _val: - logger.debug("credential %s: OK (len=%d)", _name, len(_val)) - else: - logger.debug("credential %s: not found", _name) - -# ------------------------------------------------------------------------- -# Tool Registry — web_search + web_scrape for Node A (Researcher) -# ------------------------------------------------------------------------- - -TOOL_REGISTRY = ToolRegistry() - - -def _exec_web_search(inputs: dict) -> dict: - api_key = CREDENTIALS.get("brave_search") - if not api_key: - return {"error": "brave_search credential not configured"} - query = inputs.get("query", "") - num_results = min(inputs.get("num_results", 10), 20) - resp = httpx.get( - "https://api.search.brave.com/res/v1/web/search", - params={"q": query, "count": num_results}, - headers={ - "X-Subscription-Token": api_key, - "Accept": "application/json", - }, - timeout=30.0, - ) - if resp.status_code != 200: - return {"error": f"Brave API HTTP {resp.status_code}"} - data = resp.json() - results = [ - { - "title": item.get("title", ""), - "url": item.get("url", ""), - "snippet": item.get("description", ""), - } - for item in data.get("web", {}).get("results", [])[:num_results] - ] - return {"query": query, "results": results, "total": len(results)} - - -TOOL_REGISTRY.register( - name="web_search", - tool=Tool( - name="web_search", - description=( - "Search the web for current information. " - "Returns titles, URLs, and snippets from search results." - ), - parameters={ - "type": "object", - "properties": { - "query": { - "type": "string", - "description": "The search query (1-500 characters)", - }, - "num_results": { - "type": "integer", - "description": "Number of results (1-20, default 10)", - }, - }, - "required": ["query"], - }, - ), - executor=lambda inputs: _exec_web_search(inputs), -) - -_SCRAPE_HEADERS = { - "User-Agent": ( - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " - "AppleWebKit/537.36 (KHTML, like Gecko) " - "Chrome/131.0.0.0 Safari/537.36" - ), - "Accept": "text/html,application/xhtml+xml", -} - - -def _exec_web_scrape(inputs: dict) -> dict: - url = inputs.get("url", "") - max_length = max(1000, min(inputs.get("max_length", 50000), 500000)) - if not url.startswith(("http://", "https://")): - url = "https://" + url - try: - resp = httpx.get( - url, - timeout=30.0, - follow_redirects=True, - headers=_SCRAPE_HEADERS, - ) - if resp.status_code != 200: - return {"error": f"HTTP {resp.status_code}"} - soup = BeautifulSoup(resp.text, "html.parser") - for tag in soup(["script", "style", "nav", "footer", "header", "aside", "noscript"]): - tag.decompose() - title = soup.title.get_text(strip=True) if soup.title else "" - main = ( - soup.find("article") - or soup.find("main") - or soup.find(attrs={"role": "main"}) - or soup.find("body") - ) - text = main.get_text(separator=" ", strip=True) if main else "" - text = " ".join(text.split()) - if len(text) > max_length: - text = text[:max_length] + "..." - return { - "url": url, - "title": title, - "content": text, - "length": len(text), - } - except httpx.TimeoutException: - return {"error": "Request timed out"} - except Exception as e: - return {"error": f"Scrape failed: {e}"} - - -TOOL_REGISTRY.register( - name="web_scrape", - tool=Tool( - name="web_scrape", - description=( - "Scrape and extract text content from a webpage URL. " - "Returns the page title and main text content." - ), - parameters={ - "type": "object", - "properties": { - "url": { - "type": "string", - "description": "URL of the webpage to scrape", - }, - "max_length": { - "type": "integer", - "description": "Maximum text length (default 50000)", - }, - }, - "required": ["url"], - }, - ), - executor=lambda inputs: _exec_web_scrape(inputs), -) - -logger.info( - "ToolRegistry loaded: %s", - ", ".join(TOOL_REGISTRY.get_registered_names()), -) - -# ------------------------------------------------------------------------- -# Node Specs -# ------------------------------------------------------------------------- - -RESEARCHER_SPEC = NodeSpec( - id="researcher", - name="Researcher", - description="Researches a topic using web search and scraping tools", - node_type="event_loop", - input_keys=["topic"], - output_keys=["research_summary"], - system_prompt=( - "You are a thorough research assistant. Your job is to research " - "the given topic using the web_search and web_scrape tools.\n\n" - "1. Search for relevant information on the topic\n" - "2. Scrape 1-2 of the most promising URLs for details\n" - "3. Synthesize your findings into a comprehensive summary\n" - "4. Use set_output with key='research_summary' to save your " - "findings\n\n" - "Be thorough but efficient. Aim for 2-4 search/scrape calls, " - "then summarize and set_output." - ), -) - -ANALYST_SPEC = NodeSpec( - id="analyst", - name="Analyst", - description="Analyzes research findings and provides insights", - node_type="event_loop", - input_keys=["context"], - output_keys=["analysis"], - system_prompt=( - "You are a strategic analyst. You receive research findings from " - "a previous researcher and must:\n\n" - "1. Identify key themes and patterns\n" - "2. Assess the reliability and significance of the findings\n" - "3. Provide actionable insights and recommendations\n" - "4. Use set_output with key='analysis' to save your analysis\n\n" - "Be concise but insightful. Focus on what matters most." - ), -) - - -# ------------------------------------------------------------------------- -# HTML page -# ------------------------------------------------------------------------- - -HTML_PAGE = ( # noqa: E501 - """ - - - - -ContextHandoff Demo - - - -
-

ContextHandoff Demo

- Idle - -
-
-
- - -
- - - -""" -) - - -# ------------------------------------------------------------------------- -# WebSocket handler — sequential Node A → Handoff → Node B -# ------------------------------------------------------------------------- - - -async def handle_ws(websocket): - """Run the two-node handoff pipeline per user message.""" - try: - async for raw in websocket: - try: - msg = json.loads(raw) - except Exception: - continue - - topic = msg.get("topic", "") - if not topic: - continue - - logger.info(f"Starting handoff pipeline for: {topic}") - - try: - await _run_pipeline(websocket, topic) - except websockets.exceptions.ConnectionClosed: - logger.info("WebSocket closed during pipeline") - return - except Exception as e: - logger.exception("Pipeline error") - try: - await websocket.send(json.dumps({"type": "error", "message": str(e)})) - except Exception: - pass - - except websockets.exceptions.ConnectionClosed: - pass - - -async def _run_pipeline(websocket, topic: str): - """Execute: Node A (research) → ContextHandoff → Node B (analysis).""" - import shutil - - # Fresh stores for each run - run_dir = Path(tempfile.mkdtemp(prefix="hive_run_", dir=STORE_DIR)) - store_a = FileConversationStore(run_dir / "node_a") - store_b = FileConversationStore(run_dir / "node_b") - - # Shared event bus - bus = EventBus() - - async def forward_event(event): - try: - payload = {"type": event.type.value, **event.data} - if event.node_id: - payload["node_id"] = event.node_id - await websocket.send(json.dumps(payload)) - except Exception: - pass - - bus.subscribe( - event_types=[ - EventType.NODE_LOOP_STARTED, - EventType.NODE_LOOP_ITERATION, - EventType.NODE_LOOP_COMPLETED, - EventType.LLM_TEXT_DELTA, - EventType.TOOL_CALL_STARTED, - EventType.TOOL_CALL_COMPLETED, - EventType.NODE_STALLED, - ], - handler=forward_event, - ) - - tools = list(TOOL_REGISTRY.get_tools().values()) - tool_executor = TOOL_REGISTRY.get_executor() - - # ---- Phase 1: Researcher ------------------------------------------------ - await websocket.send(json.dumps({"type": "phase", "phase": "researcher"})) - - node_a = EventLoopNode( - event_bus=bus, - judge=None, # implicit judge: accept when output_keys filled - config=LoopConfig( - max_iterations=20, - max_tool_calls_per_turn=30, - max_context_tokens=32_000, - ), - conversation_store=store_a, - tool_executor=tool_executor, - ) - - ctx_a = NodeContext( - runtime=RUNTIME, - node_id="researcher", - node_spec=RESEARCHER_SPEC, - memory=SharedMemory(), - input_data={"topic": topic}, - llm=LLM, - available_tools=tools, - ) - - result_a = await node_a.execute(ctx_a) - logger.info( - "Researcher done: success=%s, tokens=%s", - result_a.success, - result_a.tokens_used, - ) - - await websocket.send( - json.dumps( - { - "type": "node_result", - "node_id": "researcher", - "success": result_a.success, - "output": result_a.output, - } - ) - ) - - if not result_a.success: - await websocket.send( - json.dumps( - { - "type": "error", - "message": f"Researcher failed: {result_a.error}", - } - ) - ) - return - - # ---- Phase 2: Context Handoff ------------------------------------------- - await websocket.send(json.dumps({"type": "phase", "phase": "handoff"})) - - # Restore the researcher's conversation from store - conversation_a = await NodeConversation.restore(store_a) - if conversation_a is None: - await websocket.send( - json.dumps( - { - "type": "error", - "message": "Failed to restore researcher conversation", - } - ) - ) - return - - handoff_engine = ContextHandoff(llm=LLM) - handoff_context = handoff_engine.summarize_conversation( - conversation=conversation_a, - node_id="researcher", - output_keys=["research_summary"], - ) - - formatted_handoff = ContextHandoff.format_as_input(handoff_context) - logger.info( - "Handoff: %d turns, ~%d tokens, keys=%s", - handoff_context.turn_count, - handoff_context.total_tokens_used, - list(handoff_context.key_outputs.keys()), - ) - - # Send handoff context to browser - await websocket.send( - json.dumps( - { - "type": "handoff_context", - "summary": handoff_context.summary[:500], - "turn_count": handoff_context.turn_count, - "tokens": handoff_context.total_tokens_used, - "key_outputs": handoff_context.key_outputs, - } - ) - ) - - # ---- Phase 3: Analyst --------------------------------------------------- - await websocket.send(json.dumps({"type": "phase", "phase": "analyst"})) - - node_b = EventLoopNode( - event_bus=bus, - judge=None, # implicit judge - config=LoopConfig( - max_iterations=10, - max_tool_calls_per_turn=30, - max_context_tokens=32_000, - ), - conversation_store=store_b, - ) - - ctx_b = NodeContext( - runtime=RUNTIME, - node_id="analyst", - node_spec=ANALYST_SPEC, - memory=SharedMemory(), - input_data={"context": formatted_handoff}, - llm=LLM, - available_tools=[], - ) - - result_b = await node_b.execute(ctx_b) - logger.info( - "Analyst done: success=%s, tokens=%s", - result_b.success, - result_b.tokens_used, - ) - - # ---- Done --------------------------------------------------------------- - await websocket.send( - json.dumps( - { - "type": "done", - "researcher": result_a.output, - "analyst": result_b.output, - "total_tokens": ((result_a.tokens_used or 0) + (result_b.tokens_used or 0)), - } - ) - ) - - # Clean up temp stores - try: - shutil.rmtree(run_dir) - except Exception: - pass - - -# ------------------------------------------------------------------------- -# HTTP handler -# ------------------------------------------------------------------------- - - -async def process_request(connection, request: Request): - """Serve HTML on GET /, upgrade to WebSocket on /ws.""" - if request.path == "/ws": - return None - return Response( - HTTPStatus.OK, - "OK", - websockets.Headers({"Content-Type": "text/html; charset=utf-8"}), - HTML_PAGE.encode(), - ) - - -# ------------------------------------------------------------------------- -# Main -# ------------------------------------------------------------------------- - - -async def main(): - port = 8766 - async with websockets.serve( - handle_ws, - "0.0.0.0", - port, - process_request=process_request, - ): - logger.info(f"Handoff demo at http://localhost:{port}") - logger.info("Enter a research topic to start the pipeline.") - await asyncio.Future() - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/core/demos/org_demo.py b/core/demos/org_demo.py deleted file mode 100644 index cbc75a1d..00000000 --- a/core/demos/org_demo.py +++ /dev/null @@ -1,1377 +0,0 @@ -#!/usr/bin/env python3 -""" -Multi-Agent Organization Demo - -Demonstrates multiple EventLoopNode agents communicating in arbitrary -directions, simulating a research consultancy organization. - -Four agents (Director, Researcher, Analyst, Writer) collaborate via -a send_message tool backed by EventBus + inject_event(). A split-panel -UI shows the chat stream alongside a real-time SVG graph with -active-node glow and message-edge animation. - -Usage: - cd /home/timothy/oss/hive/core - python demos/org_demo.py - - Then open http://localhost:8767 in your browser. -""" - -import asyncio -import json -import logging -import sys -import tempfile -from http import HTTPStatus -from pathlib import Path - -import httpx -import websockets -from bs4 import BeautifulSoup -from websockets.http11 import Request, Response - -# Add core, tools, and hive root to path -_CORE_DIR = Path(__file__).resolve().parent.parent -_HIVE_DIR = _CORE_DIR.parent -sys.path.insert(0, str(_CORE_DIR)) -sys.path.insert(0, str(_HIVE_DIR / "tools" / "src")) -sys.path.insert(0, str(_HIVE_DIR)) - -import os # noqa: E402 - -from aden_tools.credentials import CREDENTIAL_SPECS, CredentialStoreAdapter # noqa: E402 -from core.framework.credentials import CredentialStore # noqa: E402 - -from framework.credentials.storage import ( # noqa: E402 - CompositeStorage, - EncryptedFileStorage, - EnvVarStorage, -) -from framework.graph.event_loop_node import ( # noqa: E402 - EventLoopNode, - JudgeVerdict, - LoopConfig, -) -from framework.graph.node import NodeContext, NodeSpec, SharedMemory # noqa: E402 -from framework.llm.litellm import LiteLLMProvider # noqa: E402 -from framework.llm.provider import Tool, ToolResult, ToolUse # noqa: E402 -from framework.runner.tool_registry import ToolRegistry # noqa: E402 -from framework.runtime.core import Runtime # noqa: E402 -from framework.runtime.event_bus import ( # noqa: E402 - AgentEvent, - EventBus, - EventType, -) -from framework.storage.conversation_store import FileConversationStore # noqa: E402 - -logging.basicConfig(level=logging.INFO, format="%(asctime)s %(name)s %(message)s") -logger = logging.getLogger("org_demo") - -# ------------------------------------------------------------------------- -# Persistent state -# ------------------------------------------------------------------------- - -STORE_DIR = Path(tempfile.mkdtemp(prefix="hive_org_")) -RUNTIME = Runtime(STORE_DIR / "runtime") -LLM = LiteLLMProvider(model="claude-haiku-4-5-20251001") - -# ------------------------------------------------------------------------- -# Credentials -# ------------------------------------------------------------------------- - -_env_mapping = {name: spec.env_var for name, spec in CREDENTIAL_SPECS.items()} -_local_storage = CompositeStorage( - primary=EncryptedFileStorage(), - fallbacks=[EnvVarStorage(env_mapping=_env_mapping)], -) - -if os.environ.get("ADEN_API_KEY"): - try: - from framework.credentials.aden import ( # noqa: E402 - AdenCachedStorage, - AdenClientConfig, - AdenCredentialClient, - AdenSyncProvider, - ) - - _client = AdenCredentialClient(AdenClientConfig(base_url="https://api.adenhq.com")) - _provider = AdenSyncProvider(client=_client) - _storage = AdenCachedStorage( - local_storage=_local_storage, - aden_provider=_provider, - ) - _cred_store = CredentialStore(storage=_storage, providers=[_provider], auto_refresh=True) - _synced = _provider.sync_all(_cred_store) - logger.info("Synced %d credentials from Aden", _synced) - except Exception as e: - logger.warning("Aden sync unavailable: %s", e) - _cred_store = CredentialStore(storage=_local_storage) -else: - logger.info("ADEN_API_KEY not set, using local credential storage") - _cred_store = CredentialStore(storage=_local_storage) - -CREDENTIALS = CredentialStoreAdapter(_cred_store) - -# ------------------------------------------------------------------------- -# Tool Registry — web_search + web_scrape (for Researcher) -# ------------------------------------------------------------------------- - -TOOL_REGISTRY = ToolRegistry() - - -def _exec_web_search(inputs: dict) -> dict: - api_key = CREDENTIALS.get("brave_search") - if not api_key: - return {"error": "brave_search credential not configured"} - query = inputs.get("query", "") - num_results = min(inputs.get("num_results", 5), 20) - resp = httpx.get( - "https://api.search.brave.com/res/v1/web/search", - params={"q": query, "count": num_results}, - headers={"X-Subscription-Token": api_key, "Accept": "application/json"}, - timeout=30.0, - ) - if resp.status_code != 200: - return {"error": f"Brave API HTTP {resp.status_code}"} - data = resp.json() - results = [ - { - "title": item.get("title", ""), - "url": item.get("url", ""), - "snippet": item.get("description", ""), - } - for item in data.get("web", {}).get("results", [])[:num_results] - ] - return {"query": query, "results": results, "total": len(results)} - - -TOOL_REGISTRY.register( - name="web_search", - tool=Tool( - name="web_search", - description="Search the web for current information. Returns titles, URLs, and snippets.", - parameters={ - "type": "object", - "properties": { - "query": {"type": "string", "description": "Search query"}, - "num_results": {"type": "integer", "description": "Results (1-20, default 5)"}, - }, - "required": ["query"], - }, - ), - executor=lambda inputs: _exec_web_search(inputs), -) - -_SCRAPE_HEADERS = { - "User-Agent": ( - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " - "AppleWebKit/537.36 (KHTML, like Gecko) " - "Chrome/131.0.0.0 Safari/537.36" - ), - "Accept": "text/html,application/xhtml+xml", -} - - -def _exec_web_scrape(inputs: dict) -> dict: - url = inputs.get("url", "") - max_length = max(1000, min(inputs.get("max_length", 50000), 500000)) - if not url.startswith(("http://", "https://")): - url = "https://" + url - try: - resp = httpx.get(url, timeout=30.0, follow_redirects=True, headers=_SCRAPE_HEADERS) - if resp.status_code != 200: - return {"error": f"HTTP {resp.status_code}"} - soup = BeautifulSoup(resp.text, "html.parser") - for tag in soup(["script", "style", "nav", "footer", "header", "aside", "noscript"]): - tag.decompose() - title = soup.title.get_text(strip=True) if soup.title else "" - main = ( - soup.find("article") - or soup.find("main") - or soup.find(attrs={"role": "main"}) - or soup.find("body") - ) - text = main.get_text(separator=" ", strip=True) if main else "" - text = " ".join(text.split()) - if len(text) > max_length: - text = text[:max_length] + "..." - return {"url": url, "title": title, "content": text, "length": len(text)} - except httpx.TimeoutException: - return {"error": "Request timed out"} - except Exception as e: - return {"error": f"Scrape failed: {e}"} - - -TOOL_REGISTRY.register( - name="web_scrape", - tool=Tool( - name="web_scrape", - description="Scrape text content from a webpage URL.", - parameters={ - "type": "object", - "properties": { - "url": {"type": "string", "description": "URL to scrape"}, - "max_length": {"type": "integer", "description": "Max text length (default 50000)"}, - }, - "required": ["url"], - }, - ), - executor=lambda inputs: _exec_web_scrape(inputs), -) - -logger.info("Tools loaded: %s", ", ".join(TOOL_REGISTRY.get_registered_names())) - -# ------------------------------------------------------------------------- -# Node Specs -# ------------------------------------------------------------------------- - -ROLES = ["director", "researcher", "analyst", "writer"] - -ROLE_SPECS = { - "director": NodeSpec( - id="director", - name="Director", - description="Coordinates the team and synthesizes the final report", - node_type="event_loop", - input_keys=["topic"], - output_keys=["final_report"], - system_prompt=( - "You are the Director of a research consultancy team. " - "You receive research topics and coordinate your team.\n\n" - "Your team:\n" - "- researcher: Web research specialist (has web_search/web_scrape)\n" - "- analyst: Data analysis and pattern recognition\n" - "- writer: Technical writer for polished deliverables\n\n" - "Workflow:\n" - "1. Break the topic into specific research tasks\n" - "2. Send tasks to researcher AND analyst via send_message\n" - "3. Wait for their responses (they will message you back)\n" - "4. Send all material to writer for drafting\n" - "5. When writer returns the draft, review it\n" - "6. Call set_output(key='final_report', value=)\n\n" - "IMPORTANT: Delegate, don't do research or writing yourself." - ), - ), - "researcher": NodeSpec( - id="researcher", - name="Researcher", - description="Researches topics using web tools", - node_type="event_loop", - system_prompt=( - "You are a Research Specialist. You receive tasks from the " - "team and use web_search and web_scrape to gather info.\n\n" - "When you receive a task:\n" - "1. Search for relevant information (2-3 searches)\n" - "2. Scrape 1-2 promising URLs for detail\n" - "3. Synthesize findings into a clear summary\n" - "4. Send findings back to whoever asked via send_message\n\n" - "Be thorough but efficient. Focus on facts and data." - ), - ), - "analyst": NodeSpec( - id="analyst", - name="Analyst", - description="Analyzes data and identifies patterns", - node_type="event_loop", - system_prompt=( - "You are a Data Analyst. You receive data and context from " - "team members and provide analytical insights.\n\n" - "When you receive a request:\n" - "1. Analyze the provided information\n" - "2. Identify key themes, patterns, and trends\n" - "3. Assess reliability and significance\n" - "4. Send analysis back via send_message\n\n" - "Be concise but insightful." - ), - ), - "writer": NodeSpec( - id="writer", - name="Writer", - description="Drafts polished deliverables", - node_type="event_loop", - system_prompt=( - "You are a Technical Writer. You receive research findings " - "and analysis from team members and draft polished reports.\n\n" - "When you receive material:\n" - "1. Organize information into a logical structure\n" - "2. Write a clear, professional report with sections\n" - "3. Include findings, analysis, and recommendations\n" - "4. Send the draft back to director via send_message\n\n" - "Write professionally but accessibly." - ), - ), -} - - -def _build_send_tool(role: str) -> Tool: - """Build a send_message tool with 'to' enum excluding the node itself.""" - targets = [r for r in ROLES if r != role] - return Tool( - name="send_message", - description=( - "Send a message to another team member. " - "Use this to delegate tasks, share findings, or return work." - ), - parameters={ - "type": "object", - "properties": { - "to": { - "type": "string", - "enum": targets, - "description": f"Team member: {', '.join(targets)}", - }, - "message": { - "type": "string", - "description": "The message content", - }, - }, - "required": ["to", "message"], - }, - ) - - -# Per-role tool lists -_web_tools = list(TOOL_REGISTRY.get_tools().values()) -ROLE_TOOLS: dict[str, list[Tool]] = {} -for _role in ROLES: - _tools = [_build_send_tool(_role)] - if _role == "researcher": - _tools = _web_tools + _tools - ROLE_TOOLS[_role] = _tools - - -# ------------------------------------------------------------------------- -# OrgJudge — blocks between messages, manages node lifecycle -# ------------------------------------------------------------------------- - - -class OrgJudge: - """Judge for org demo nodes. - - - Director: blocks until message arrives, ACCEPTs when output_keys filled - - Specialists: block until message arrives, ACCEPT on done signal - """ - - # Director gets a longer window (waiting for multiple specialist - # replies); specialists only need to wait for follow-ups. - _DIRECTOR_TIMEOUT = 120 - _SPECIALIST_TIMEOUT = 30 - - def __init__( - self, - is_director: bool = False, - bus: EventBus | None = None, - node_id: str = "", - ): - self._is_director = is_director - self._message_queue: asyncio.Queue = asyncio.Queue() - self._done = asyncio.Event() - self._bus = bus - self._node_id = node_id - self._timeout = self._DIRECTOR_TIMEOUT if is_director else self._SPECIALIST_TIMEOUT - - async def evaluate(self, context: dict) -> JudgeVerdict: - if self._done.is_set(): - return JudgeVerdict(action="ACCEPT") - - # Director: accept when final_report is set - if self._is_director: - missing = context.get("missing_keys", []) - if not missing: - return JudgeVerdict(action="ACCEPT") - - # Signal UI that this node is waiting for a message - if self._bus: - await self._bus.publish( - AgentEvent( - type=EventType.CUSTOM, - stream_id="org", - node_id=self._node_id, - data={"custom_type": "node_waiting", "node_id": self._node_id}, - ) - ) - - # Block until next message or done - try: - await asyncio.wait_for(self._wait_signal(), timeout=self._timeout) - except TimeoutError: - logger.info("OrgJudge %s idle timeout (%ds)", self._node_id, self._timeout) - return JudgeVerdict(action="ACCEPT") - - if self._done.is_set(): - return JudgeVerdict(action="ACCEPT") - - return JudgeVerdict(action="RETRY") - - async def _wait_signal(self): - """Wait for either a message or done signal.""" - msg_task = asyncio.create_task(self._message_queue.get()) - done_task = asyncio.create_task(self._done.wait()) - try: - _done, pending = await asyncio.wait( - {msg_task, done_task}, - return_when=asyncio.FIRST_COMPLETED, - ) - for t in pending: - t.cancel() - except Exception: - msg_task.cancel() - done_task.cancel() - - def signal_message(self): - """Signal that a new message has been injected.""" - self._message_queue.put_nowait(True) - - def signal_done(self): - """Signal global shutdown.""" - self._done.set() - try: - self._message_queue.put_nowait(None) - except asyncio.QueueFull: - pass - - -# ------------------------------------------------------------------------- -# MessageRouter — routes inter-node messages with lazy start -# ------------------------------------------------------------------------- - - -class MessageRouter: - """Routes messages between nodes via inject_event + judge signaling.""" - - def __init__(self, bus: EventBus): - self._bus = bus - self._nodes: dict[str, EventLoopNode] = {} - self._judges: dict[str, OrgJudge] = {} - self._contexts: dict[str, NodeContext] = {} - self._tasks: dict[str, asyncio.Task] = {} - - def register( - self, - role: str, - node: EventLoopNode, - judge: OrgJudge, - context: NodeContext, - ): - self._nodes[role] = node - self._judges[role] = judge - self._contexts[role] = context - - def start(self, role: str): - """Start a node's event loop as a background task.""" - if role not in self._tasks: - self._tasks[role] = asyncio.create_task(self._nodes[role].execute(self._contexts[role])) - logger.info(f"Started node: {role}") - - async def send(self, from_id: str, to_id: str, message: str): - """Send a message from one node to another (lazy start).""" - if to_id not in self._nodes: - raise ValueError(f"Unknown target node: {to_id}") - - # Lazy start the target node if not running - first_start = to_id not in self._tasks - if first_start: - self.start(to_id) - await self._bus.publish( - AgentEvent( - type=EventType.CUSTOM, - stream_id="org", - node_id=to_id, - data={"custom_type": "node_started", "node_id": to_id}, - ) - ) - - # Inject message into target's queue - formatted = f"[Message from {from_id}]: {message}" - await self._nodes[to_id].inject_event(formatted) - # Only signal existing nodes — newly started nodes will drain the - # injection queue on their first iteration, so the signal would be - # stale by the time the judge sees it (causing a spurious RETRY - # that leads to an LLM call with no new content → empty stream). - if not first_start: - self._judges[to_id].signal_message() - - logger.info(f"Message: {from_id} -> {to_id} ({len(message)} chars)") - - # Emit event for UI edge animation - await self._bus.publish( - AgentEvent( - type=EventType.CUSTOM, - stream_id="org", - data={ - "custom_type": "message_sent", - "from": from_id, - "to": to_id, - "preview": message[:150], - }, - ) - ) - - def shutdown_all(self): - """Signal all judges to accept and shut down.""" - for judge in self._judges.values(): - judge.signal_done() - - async def wait_all(self, exclude: str = "", timeout: float = 10.0): - """Wait for all running tasks (except exclude) to finish.""" - remaining = [t for r, t in self._tasks.items() if r != exclude and not t.done()] - if remaining: - _done, pending = await asyncio.wait(remaining, timeout=timeout) - for t in pending: - t.cancel() - - def total_tokens(self) -> int: - """Sum tokens across all completed tasks.""" - total = 0 - for t in self._tasks.values(): - if t.done() and not t.cancelled(): - try: - total += t.result().tokens_used or 0 - except Exception: - pass - return total - - -# ------------------------------------------------------------------------- -# Tool executor factory -# ------------------------------------------------------------------------- - - -def _recover_send_args(raw: str) -> tuple[str, str]: - """Try to extract 'to' and 'message' from a malformed JSON string. - - When the LLM produces a very long message value with unescaped - characters, json.loads fails and we get {"_raw": "..."}. Regex - extraction is a best-effort fallback. - """ - import re - - to = "" - message = "" - to_match = re.search(r'"to"\s*:\s*"(\w+)"', raw) - if to_match: - to = to_match.group(1) - # message is typically the longest field; grab everything after the key - msg_match = re.search(r'"message"\s*:\s*"', raw) - if msg_match: - # Take from after the opening quote to the end, strip trailing "} - start = msg_match.end() - message = raw[start:].rstrip() - # Strip trailing close-quote + brace(s) if present - for suffix in ('"}\n', '"}', '"'): - if message.endswith(suffix): - message = message[: -len(suffix)] - break - return to, message - - -def make_executor(role: str, router: MessageRouter, base_executor): - """Build a tool executor that handles send_message + delegates rest.""" - - async def _send_message(tool_use: ToolUse) -> ToolResult: - to = tool_use.input.get("to", "") - message = tool_use.input.get("message", "") - - # Recover from malformed JSON (long messages break json.loads) - if not to and "_raw" in tool_use.input: - raw = tool_use.input["_raw"] - to, message = _recover_send_args(raw) - if to: - logger.info("Recovered send_message args from raw string: to=%s", to) - - if to == role: - return ToolResult( - tool_use_id=tool_use.id, - content="Cannot send message to yourself.", - is_error=True, - ) - if to not in router._nodes: - valid = [r for r in ROLES if r != role] - return ToolResult( - tool_use_id=tool_use.id, - content=( - f"Unknown team member: '{to}'. " - f"Valid targets: {', '.join(valid)}. " - f"Use send_message with {{'to': '', 'message': ''}}." - ), - is_error=True, - ) - await router.send(role, to, message) - return ToolResult( - tool_use_id=tool_use.id, - content=f"Message delivered to {to}.", - ) - - def executor(tool_use: ToolUse): - if tool_use.name == "send_message": - return _send_message(tool_use) # coroutine, awaited by EventLoopNode - return base_executor(tool_use) - - return executor - - -# ------------------------------------------------------------------------- -# HTML page (embedded) -# ------------------------------------------------------------------------- - -HTML_PAGE = """ - - - - -Multi-Agent Org Demo - - - -
-

Multi-Agent Org

- Director - Researcher - Analyst - Writer - Idle -
- -
-
-
-
Organization Graph
- - - - - - - - - - - - - - - - - Director - - idle - - - - - Researcher - - idle - - - - - Analyst - - idle - - - - - Writer - - idle - -
- Director - Researcher - Analyst - Writer -
-
-
- -
- - -
- - - -""" - - -# ------------------------------------------------------------------------- -# WebSocket handler — org pipeline -# ------------------------------------------------------------------------- - - -async def handle_ws(websocket): - """Handle WebSocket connections for the org demo.""" - try: - async for raw in websocket: - try: - msg = json.loads(raw) - except Exception: - continue - - topic = msg.get("topic", "") - if not topic: - continue - - logger.info(f"Starting org pipeline for: {topic}") - - try: - await _run_org_pipeline(websocket, topic) - except websockets.exceptions.ConnectionClosed: - logger.info("WebSocket closed during pipeline") - return - except Exception as e: - logger.exception("Pipeline error") - try: - await websocket.send(json.dumps({"type": "error", "message": str(e)})) - except Exception: - pass - - except websockets.exceptions.ConnectionClosed: - pass - - -async def _run_org_pipeline(websocket, topic: str): - """Execute the multi-agent org pipeline.""" - import shutil - - run_dir = Path(tempfile.mkdtemp(prefix="hive_run_", dir=STORE_DIR)) - bus = EventBus() - - # Forward bus events to WebSocket - async def forward_event(event): - try: - payload = {"type": event.type.value, **event.data} - if event.node_id: - payload["node_id"] = event.node_id - # Remap CUSTOM events to their custom_type - if event.type == EventType.CUSTOM and "custom_type" in event.data: - payload["type"] = event.data["custom_type"] - await websocket.send(json.dumps(payload)) - except Exception: - pass - - bus.subscribe( - event_types=[ - EventType.NODE_LOOP_STARTED, - EventType.NODE_LOOP_ITERATION, - EventType.NODE_LOOP_COMPLETED, - EventType.LLM_TEXT_DELTA, - EventType.TOOL_CALL_STARTED, - EventType.TOOL_CALL_COMPLETED, - EventType.NODE_STALLED, - EventType.CUSTOM, - ], - handler=forward_event, - ) - - # Build router with all nodes - router = MessageRouter(bus=bus) - base_executor = TOOL_REGISTRY.get_executor() - - for role in ROLES: - store = FileConversationStore(run_dir / role) - judge = OrgJudge( - is_director=(role == "director"), - bus=bus, - node_id=role, - ) - executor = make_executor(role, router, base_executor) - - node = EventLoopNode( - event_bus=bus, - judge=judge, - config=LoopConfig( - max_iterations=30, - max_tool_calls_per_turn=30, - max_context_tokens=32_000, - ), - conversation_store=store, - tool_executor=executor, - ) - - input_data = {"topic": topic} if role == "director" else {} - ctx = NodeContext( - runtime=RUNTIME, - node_id=role, - node_spec=ROLE_SPECS[role], - memory=SharedMemory(), - input_data=input_data, - llm=LLM, - available_tools=ROLE_TOOLS[role], - max_tokens=64000, - ) - - router.register(role, node, judge, ctx) - - # Start director (specialists start lazily via MessageRouter.send) - router.start("director") - - # Wait for director to complete (with global timeout) - try: - director_result = await asyncio.wait_for( - router._tasks["director"], - timeout=600, - ) - except TimeoutError: - router.shutdown_all() - await router.wait_all(timeout=5.0) - msg = {"type": "error", "message": "Pipeline timed out (10 min)"} - await websocket.send(json.dumps(msg)) - shutil.rmtree(run_dir, ignore_errors=True) - return - - logger.info( - "Director done: success=%s, tokens=%s", - director_result.success, - director_result.tokens_used, - ) - - # Shut down all specialist nodes - router.shutdown_all() - await router.wait_all(exclude="director", timeout=10.0) - - total_tokens = router.total_tokens() - - # Extract final report - final_report = director_result.output.get("final_report", "") - if not final_report and director_result.output: - final_report = json.dumps(director_result.output, indent=2) - - # Send result to browser - if director_result.success: - await websocket.send( - json.dumps( - { - "type": "org_done", - "final_report": final_report, - "total_tokens": total_tokens, - } - ) - ) - else: - await websocket.send( - json.dumps( - { - "type": "error", - "message": f"Director failed: {director_result.error}", - } - ) - ) - - # Clean up - shutil.rmtree(run_dir, ignore_errors=True) - - -# ------------------------------------------------------------------------- -# HTTP handler -# ------------------------------------------------------------------------- - - -async def process_request(connection, request: Request): - """Serve HTML on GET /, upgrade to WebSocket on /ws.""" - if request.path == "/ws": - return None - return Response( - HTTPStatus.OK, - "OK", - websockets.Headers({"Content-Type": "text/html; charset=utf-8"}), - HTML_PAGE.encode(), - ) - - -# ------------------------------------------------------------------------- -# Main -# ------------------------------------------------------------------------- - - -async def main(): - port = 8767 - async with websockets.serve( - handle_ws, - "0.0.0.0", - port, - process_request=process_request, - ): - logger.info(f"Org demo running at http://localhost:{port}") - logger.info("Open in your browser and enter a research topic.") - await asyncio.Future() - - -if __name__ == "__main__": - asyncio.run(main()) -postman request \ No newline at end of file diff --git a/core/framework/llm/litellm.py b/core/framework/llm/litellm.py index bff21e57..a5d59ff9 100644 --- a/core/framework/llm/litellm.py +++ b/core/framework/llm/litellm.py @@ -23,6 +23,7 @@ except ImportError: litellm = None # type: ignore[assignment] RateLimitError = Exception # type: ignore[assignment, misc] +from framework.config import HIVE_LLM_ENDPOINT as HIVE_API_BASE from framework.llm.provider import LLMProvider, LLMResponse, Tool from framework.llm.stream_events import StreamEvent @@ -158,8 +159,6 @@ def _model_supports_cache_control(model: str) -> bool: # enforces a coding-agent whitelist that blocks unknown User-Agents. KIMI_API_BASE = "https://api.kimi.com/coding" -from framework.config import HIVE_LLM_ENDPOINT as HIVE_API_BASE - # Empty-stream retries use a short fixed delay, not the rate-limit backoff. # Conversation-structure issues are deterministic — long waits don't help. EMPTY_STREAM_MAX_RETRIES = 3 @@ -402,7 +401,7 @@ class LiteLLMProvider(LLMProvider): if api_base and api_base.rstrip("/").endswith("/v1"): api_base = api_base.rstrip("/")[:-3] elif model.lower().startswith("hive/"): - model = "anthropic/" + model[len("hive/"):] + model = "anthropic/" + model[len("hive/") :] if api_base and api_base.rstrip("/").endswith("/v1"): api_base = api_base.rstrip("/")[:-3] self.model = model From bf39bcdec91991756f6cf9306df78f68ea56b264 Mon Sep 17 00:00:00 2001 From: mma2027 Date: Tue, 17 Mar 2026 12:36:54 -0400 Subject: [PATCH 44/45] fixed race condition deadlock, missing short-circuit eval, unhandled format exceptions (#4012) --- core/framework/graph/safe_eval.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/core/framework/graph/safe_eval.py b/core/framework/graph/safe_eval.py index 01a12728..a2aa5a63 100644 --- a/core/framework/graph/safe_eval.py +++ b/core/framework/graph/safe_eval.py @@ -115,11 +115,23 @@ class SafeEvalVisitor(ast.NodeVisitor): return True def visit_BoolOp(self, node: ast.BoolOp) -> Any: - values = [self.visit(v) for v in node.values] + # Short-circuit evaluation to match Python semantics. + # Previously all operands were eagerly evaluated, which broke + # guard patterns like: ``x is not None and x.get("key")`` if isinstance(node.op, ast.And): - return all(values) + result = True + for v in node.values: + result = self.visit(v) + if not result: + return result + return result elif isinstance(node.op, ast.Or): - return any(values) + result = False + for v in node.values: + result = self.visit(v) + if result: + return result + return result raise ValueError(f"Boolean operator {type(node.op).__name__} is not allowed") def visit_IfExp(self, node: ast.IfExp) -> Any: From 23a7b080eba0a4a8a7300c39d7b800f94f1e44b3 Mon Sep 17 00:00:00 2001 From: mma2027 Date: Tue, 17 Mar 2026 13:01:31 -0400 Subject: [PATCH 45/45] test: add comprehensive test suite for safe_eval (#4015) * test: add comprehensive test suite for safe_eval sandboxed evaluator Adds 113 tests across 14 test classes covering the full surface area of the safe_eval expression evaluator used by edge conditions: - Literals, data structures, arithmetic, unary/binary/boolean operators - Short-circuit semantics for `and`/`or` (including guard patterns) - Ternary expressions, variable lookup, subscript/attribute access - Whitelisted function and method calls - Security boundaries (private attrs, disallowed AST nodes, blocked builtins) - Real-world EdgeSpec.condition_expr patterns from graph executor usage * style: fix import sort order --------- Co-authored-by: mma2027 Co-authored-by: hundao --- core/tests/test_safe_eval.py | 520 +++++++++++++++++++++++++++++++++++ 1 file changed, 520 insertions(+) create mode 100644 core/tests/test_safe_eval.py diff --git a/core/tests/test_safe_eval.py b/core/tests/test_safe_eval.py new file mode 100644 index 00000000..8bc13416 --- /dev/null +++ b/core/tests/test_safe_eval.py @@ -0,0 +1,520 @@ +"""Tests for safe_eval — the sandboxed expression evaluator used by edge conditions. + +Covers: literals, data structures, arithmetic, comparisons, boolean logic +(including short-circuit semantics), variable lookup, subscript/attribute +access, whitelisted function calls, method calls, ternary expressions, +chained comparisons, and security boundaries (private attrs, disallowed +AST nodes, disallowed function calls). +""" + +import pytest + +from framework.graph.safe_eval import safe_eval + +# --------------------------------------------------------------------------- +# Literals and constants +# --------------------------------------------------------------------------- + + +class TestLiterals: + def test_integer(self): + assert safe_eval("42") == 42 + + def test_negative_integer(self): + assert safe_eval("-1") == -1 + + def test_float(self): + assert safe_eval("3.14") == pytest.approx(3.14) + + def test_string(self): + assert safe_eval("'hello'") == "hello" + + def test_double_quoted_string(self): + assert safe_eval('"world"') == "world" + + def test_boolean_true(self): + assert safe_eval("True") is True + + def test_boolean_false(self): + assert safe_eval("False") is False + + def test_none(self): + assert safe_eval("None") is None + + +# --------------------------------------------------------------------------- +# Data structures +# --------------------------------------------------------------------------- + + +class TestDataStructures: + def test_list(self): + assert safe_eval("[1, 2, 3]") == [1, 2, 3] + + def test_empty_list(self): + assert safe_eval("[]") == [] + + def test_nested_list(self): + assert safe_eval("[[1, 2], [3, 4]]") == [[1, 2], [3, 4]] + + def test_tuple(self): + assert safe_eval("(1, 2, 3)") == (1, 2, 3) + + def test_dict(self): + assert safe_eval("{'a': 1, 'b': 2}") == {"a": 1, "b": 2} + + def test_empty_dict(self): + assert safe_eval("{}") == {} + + +# --------------------------------------------------------------------------- +# Arithmetic and binary operators +# --------------------------------------------------------------------------- + + +class TestArithmetic: + def test_addition(self): + assert safe_eval("2 + 3") == 5 + + def test_subtraction(self): + assert safe_eval("10 - 4") == 6 + + def test_multiplication(self): + assert safe_eval("3 * 7") == 21 + + def test_division(self): + assert safe_eval("10 / 4") == 2.5 + + def test_floor_division(self): + assert safe_eval("10 // 3") == 3 + + def test_modulo(self): + assert safe_eval("10 % 3") == 1 + + def test_power(self): + assert safe_eval("2 ** 10") == 1024 + + def test_complex_expression(self): + assert safe_eval("(2 + 3) * 4 - 1") == 19 + + +# --------------------------------------------------------------------------- +# Unary operators +# --------------------------------------------------------------------------- + + +class TestUnaryOps: + def test_negation(self): + assert safe_eval("-5") == -5 + + def test_positive(self): + assert safe_eval("+5") == 5 + + def test_not_true(self): + assert safe_eval("not True") is False + + def test_not_false(self): + assert safe_eval("not False") is True + + def test_bitwise_invert(self): + assert safe_eval("~0") == -1 + + +# --------------------------------------------------------------------------- +# Comparisons +# --------------------------------------------------------------------------- + + +class TestComparisons: + def test_equal(self): + assert safe_eval("1 == 1") is True + + def test_not_equal(self): + assert safe_eval("1 != 2") is True + + def test_less_than(self): + assert safe_eval("1 < 2") is True + + def test_greater_than(self): + assert safe_eval("2 > 1") is True + + def test_less_equal(self): + assert safe_eval("2 <= 2") is True + + def test_greater_equal(self): + assert safe_eval("3 >= 2") is True + + def test_is_none(self): + assert safe_eval("x is None", {"x": None}) is True + + def test_is_not_none(self): + assert safe_eval("x is not None", {"x": 42}) is True + + def test_in_list(self): + assert safe_eval("'a' in x", {"x": ["a", "b", "c"]}) is True + + def test_not_in_list(self): + assert safe_eval("'z' not in x", {"x": ["a", "b"]}) is True + + def test_chained_comparison(self): + """Chained comparisons like 1 < x < 10 should work.""" + assert safe_eval("1 < x < 10", {"x": 5}) is True + + def test_chained_comparison_false(self): + assert safe_eval("1 < x < 3", {"x": 5}) is False + + def test_chained_three_way(self): + assert safe_eval("0 <= x <= 100", {"x": 50}) is True + + +# --------------------------------------------------------------------------- +# Boolean operators (with short-circuit semantics) +# --------------------------------------------------------------------------- + + +class TestBooleanOps: + def test_and_true(self): + assert safe_eval("True and True") is True + + def test_and_false(self): + assert safe_eval("True and False") is False + + def test_or_true(self): + assert safe_eval("False or True") is True + + def test_or_false(self): + assert safe_eval("False or False") is False + + def test_and_returns_last_truthy(self): + """Python `and` returns the last value if all truthy.""" + assert safe_eval("1 and 2 and 3") == 3 + + def test_and_returns_first_falsy(self): + """Python `and` returns the first falsy value.""" + assert safe_eval("1 and 0 and 3") == 0 + + def test_or_returns_first_truthy(self): + """Python `or` returns the first truthy value.""" + assert safe_eval("0 or '' or 42") == 42 + + def test_or_returns_last_falsy(self): + """Python `or` returns the last value if all falsy.""" + assert safe_eval("0 or '' or None") is None + + def test_and_short_circuits(self): + """and should NOT evaluate the right side if left is falsy. + + This is the bug we fixed — previously this would crash with + TypeError because all operands were eagerly evaluated. + """ + # x is None, so `x.get("key")` would crash if evaluated + assert safe_eval("x is not None and x.get('key')", {"x": None}) is False + + def test_or_short_circuits(self): + """or should NOT evaluate the right side if left is truthy.""" + # x is truthy, so the crash-prone right side should never run + assert safe_eval("x or y.get('missing')", {"x": "found", "y": {}}) == "found" + + def test_and_guard_pattern_truthy(self): + """Guard pattern: check not None, then access — when value exists.""" + ctx = {"x": {"key": "value"}} + assert safe_eval("x is not None and x.get('key')", ctx) == "value" + + def test_multi_and(self): + assert safe_eval("True and True and True") is True + + def test_multi_or(self): + assert safe_eval("False or False or True") is True + + def test_mixed_and_or(self): + assert safe_eval("True or False and False") is True + + +# --------------------------------------------------------------------------- +# Ternary (if/else) expressions +# --------------------------------------------------------------------------- + + +class TestTernary: + def test_ternary_true_branch(self): + assert safe_eval("'yes' if True else 'no'") == "yes" + + def test_ternary_false_branch(self): + assert safe_eval("'yes' if False else 'no'") == "no" + + def test_ternary_with_context(self): + assert safe_eval("x * 2 if x > 0 else -x", {"x": 5}) == 10 + + def test_ternary_false_with_context(self): + assert safe_eval("x * 2 if x > 0 else -x", {"x": -3}) == 3 + + +# --------------------------------------------------------------------------- +# Variable lookup +# --------------------------------------------------------------------------- + + +class TestVariables: + def test_simple_variable(self): + assert safe_eval("x", {"x": 42}) == 42 + + def test_string_variable(self): + assert safe_eval("name", {"name": "Alice"}) == "Alice" + + def test_dict_variable(self): + ctx = {"output": {"status": "ok"}} + assert safe_eval("output", ctx) == {"status": "ok"} + + def test_undefined_variable_raises(self): + with pytest.raises(NameError, match="not defined"): + safe_eval("undefined_var") + + def test_multiple_variables(self): + assert safe_eval("x + y", {"x": 10, "y": 20}) == 30 + + +# --------------------------------------------------------------------------- +# Subscript access (indexing) +# --------------------------------------------------------------------------- + + +class TestSubscript: + def test_dict_subscript(self): + assert safe_eval("d['key']", {"d": {"key": "value"}}) == "value" + + def test_list_subscript(self): + assert safe_eval("items[0]", {"items": [10, 20, 30]}) == 10 + + def test_nested_subscript(self): + ctx = {"data": {"users": [{"name": "Alice"}]}} + assert safe_eval("data['users'][0]['name']", ctx) == "Alice" + + def test_missing_key_raises(self): + with pytest.raises(KeyError): + safe_eval("d['missing']", {"d": {}}) + + +# --------------------------------------------------------------------------- +# Attribute access +# --------------------------------------------------------------------------- + + +class TestAttributeAccess: + def test_private_attr_blocked(self): + """Attributes starting with _ must be blocked for security.""" + with pytest.raises(ValueError, match="private attribute"): + safe_eval("x.__class__", {"x": 42}) + + def test_dunder_blocked(self): + with pytest.raises(ValueError, match="private attribute"): + safe_eval("x.__dict__", {"x": {}}) + + def test_single_underscore_blocked(self): + with pytest.raises(ValueError, match="private attribute"): + safe_eval("x._internal", {"x": {}}) + + +# --------------------------------------------------------------------------- +# Whitelisted function calls +# --------------------------------------------------------------------------- + + +class TestFunctionCalls: + def test_len(self): + assert safe_eval("len(x)", {"x": [1, 2, 3]}) == 3 + + def test_int_conversion(self): + assert safe_eval("int('42')") == 42 + + def test_float_conversion(self): + assert safe_eval("float('3.14')") == pytest.approx(3.14) + + def test_str_conversion(self): + assert safe_eval("str(42)") == "42" + + def test_bool_conversion(self): + assert safe_eval("bool(1)") is True + + def test_abs(self): + assert safe_eval("abs(-5)") == 5 + + def test_min(self): + assert safe_eval("min(3, 1, 2)") == 1 + + def test_max(self): + assert safe_eval("max(3, 1, 2)") == 3 + + def test_sum(self): + assert safe_eval("sum(x)", {"x": [1, 2, 3]}) == 6 + + def test_round(self): + assert safe_eval("round(3.7)") == 4 + + def test_all(self): + assert safe_eval("all([True, True, True])") is True + + def test_any(self): + assert safe_eval("any([False, False, True])") is True + + def test_list_constructor(self): + assert safe_eval("list(x)", {"x": (1, 2, 3)}) == [1, 2, 3] + + def test_dict_constructor(self): + assert safe_eval("dict(a=1, b=2)") == {"a": 1, "b": 2} + + def test_tuple_constructor(self): + assert safe_eval("tuple(x)", {"x": [1, 2]}) == (1, 2) + + def test_set_constructor(self): + assert safe_eval("set(x)", {"x": [1, 2, 2, 3]}) == {1, 2, 3} + + +# --------------------------------------------------------------------------- +# Whitelisted method calls +# --------------------------------------------------------------------------- + + +class TestMethodCalls: + def test_dict_get(self): + assert safe_eval("d.get('key', 'default')", {"d": {"key": "val"}}) == "val" + + def test_dict_get_missing(self): + assert safe_eval("d.get('missing', 'default')", {"d": {}}) == "default" + + def test_dict_keys(self): + result = safe_eval("list(d.keys())", {"d": {"a": 1, "b": 2}}) + assert sorted(result) == ["a", "b"] + + def test_dict_values(self): + result = safe_eval("list(d.values())", {"d": {"a": 1, "b": 2}}) + assert sorted(result) == [1, 2] + + def test_string_lower(self): + assert safe_eval("s.lower()", {"s": "HELLO"}) == "hello" + + def test_string_upper(self): + assert safe_eval("s.upper()", {"s": "hello"}) == "HELLO" + + def test_string_strip(self): + assert safe_eval("s.strip()", {"s": " hi "}) == "hi" + + def test_string_split(self): + assert safe_eval("s.split(',')", {"s": "a,b,c"}) == ["a", "b", "c"] + + +# --------------------------------------------------------------------------- +# Security: disallowed operations +# --------------------------------------------------------------------------- + + +class TestSecurity: + def test_import_blocked(self): + """__import__ is not in context, so NameError is raised.""" + with pytest.raises(NameError, match="not defined"): + safe_eval("__import__('os')") + + def test_lambda_blocked(self): + with pytest.raises(ValueError, match="not allowed"): + safe_eval("(lambda: 1)()") + + def test_comprehension_blocked(self): + with pytest.raises(ValueError, match="not allowed"): + safe_eval("[x for x in range(10)]") + + def test_assignment_blocked(self): + """Assignment expressions should not parse in eval mode.""" + with pytest.raises(SyntaxError): + safe_eval("x = 5") + + def test_disallowed_function_blocked(self): + """eval is not in safe functions, so NameError is raised.""" + with pytest.raises(NameError, match="not defined"): + safe_eval("eval('1+1')") + + def test_exec_blocked(self): + """exec is not in safe functions, so NameError is raised.""" + with pytest.raises(NameError, match="not defined"): + safe_eval("exec('x=1')") + + def test_type_call_blocked(self): + """type is not in safe functions, so NameError is raised.""" + with pytest.raises(NameError, match="not defined"): + safe_eval("type(42)") + + def test_getattr_builtin_blocked(self): + """getattr is not in safe functions, so NameError is raised.""" + with pytest.raises(NameError, match="not defined"): + safe_eval("getattr(x, '__class__')", {"x": 42}) + + def test_empty_expression_raises(self): + with pytest.raises(SyntaxError): + safe_eval("") + + +# --------------------------------------------------------------------------- +# Real-world edge condition patterns (from graph executor usage) +# --------------------------------------------------------------------------- + + +class TestEdgeConditionPatterns: + """Patterns commonly used in EdgeSpec.condition_expr.""" + + def test_output_key_exists_and_not_none(self): + ctx = {"output": {"approved_contacts": ["alice@example.com"]}} + assert safe_eval("output.get('approved_contacts') is not None", ctx) is True + + def test_output_key_missing(self): + ctx = {"output": {}} + assert safe_eval("output.get('approved_contacts') is not None", ctx) is False + + def test_output_key_check_with_fallback(self): + ctx = {"output": {"redo_extraction": True}} + assert safe_eval("output.get('redo_extraction') is not None", ctx) is True + + def test_guard_then_length_check(self): + """Guard pattern: check key exists, then check length.""" + ctx = {"output": {"results": [1, 2, 3]}} + assert ( + safe_eval( + "output.get('results') is not None and len(output['results']) > 0", + ctx, + ) + is True + ) + + def test_guard_short_circuits_on_none(self): + """Guard pattern: short-circuit prevents crash on None.""" + ctx = {"output": {}} + assert ( + safe_eval( + "output.get('results') is not None and len(output['results']) > 0", + ctx, + ) + is False + ) + + def test_success_flag_check(self): + ctx = {"output": {"success": True}, "memory": {"attempts": 2}} + assert safe_eval("output.get('success') == True", ctx) is True + + def test_memory_threshold(self): + ctx = {"memory": {"score": 0.85}} + assert safe_eval("memory.get('score', 0) >= 0.8", ctx) is True + + def test_string_contains_check(self): + ctx = {"output": {"status": "completed_with_warnings"}} + assert safe_eval("'completed' in output.get('status', '')", ctx) is True + + def test_fallback_chain(self): + """or-chain for fallback values.""" + ctx = {"output": {}} + result = safe_eval( + "output.get('primary') or output.get('secondary') or 'default'", + ctx, + ) + assert result == "default" + + def test_no_context_needed(self): + """Some edges use constant expressions.""" + assert safe_eval("True") is True + assert safe_eval("1 == 1") is True