Compare commits

...

695 Commits

Author SHA1 Message Date
Timothy b1f3d931cd fix: remove output cleaner 2026-03-05 19:48:13 -08:00
Richard Tang 8fd66a12c5 Merge remote-tracking branch 'origin/feat/queen-responsibility' into feat/queen-responsibility 2026-03-05 19:03:27 -08:00
Richard Tang f23d5a3ff5 feat: add terminal node back in graph 2026-03-05 19:02:41 -08:00
Timothy be8ec867e5 fix: better stall detection 2026-03-05 18:51:36 -08:00
Timothy b2ba42e541 Merge branch 'feat/queen-responsibility' into feat/worker-progressive-disclosure 2026-03-05 15:26:09 -08:00
Richard Tang 94d0038e03 chore: remove duplicates in anti-patterns 2026-03-05 14:54:46 -08:00
Timothy e1bf300e3c feat: progressive disclosure of runtime data 2026-03-05 14:44:02 -08:00
Richard Tang c6b922e831 feat: condense framework guide 2026-03-05 14:26:03 -08:00
Richard Tang 71d12a7904 feat: condensed queen building prompts 2026-03-05 14:18:03 -08:00
Richard Tang 24c25d408c feat: remove unused prompts 2026-03-05 14:13:02 -08:00
Richard Tang 2e99fc9fe5 feat: change graph guidelines 2026-03-05 14:08:52 -08:00
Richard Tang c1f066b8ba feat: add gcu and validation in initialize_agent_package 2026-03-05 14:01:43 -08:00
Richard Tang e7a6074800 fix: prevent duplicate session creation when starting from home 2026-03-05 13:44:39 -08:00
Richard Tang 719942d29a fix: bug for multiple session calls 2026-03-05 13:04:17 -08:00
Richard Tang 190450a2b2 refactor: skip judge logic improvement 2026-03-05 12:38:18 -08:00
Richard Tang 44d609b719 feat: allow judge to wait queen input 2026-03-05 12:33:27 -08:00
Richard Tang 8c9892f9f6 feat: re-organized the tools for list mcp tools 2026-03-05 12:06:57 -08:00
Richard Tang 6ade844722 feat: escalation implementation 2026-03-04 19:59:02 -08:00
Richard Tang b9a3c67fea feat: dynamically load the system prompt in the context 2026-03-04 19:40:20 -08:00
Richard Tang 219bbe00fc feat: move guardrail to validation 2026-03-04 19:11:37 -08:00
Richard Tang ef6af5404f refactor: new builder flow 2026-03-04 18:42:20 -08:00
Richard Tang b7d57f3d49 feat: fix run_command and remove agent search 2026-03-04 18:04:42 -08:00
Richard Tang 58c892babb Merge remote-tracking branch 'origin/main' into feat/queen-responsibility 2026-03-04 18:03:03 -08:00
Timothy @aden 4b5ec796bc Merge pull request #5829 from aden-hive/feat/remove-old-session-status-tools
fix: remove the reference in the coder agent init
2026-03-04 17:42:34 -08:00
Richard Tang 24df4729ca fix: remove the reference in the coder agent init 2026-03-04 17:40:28 -08:00
Richard Tang 9e2004e33b Merge remote-tracking branch 'origin/main' into feat/queen-responsibility 2026-03-04 17:30:09 -08:00
Timothy @aden 1e6538efac Merge pull request #5828 from aden-hive/feat/remove-old-session-status-tools
Remove deprecated get_agent_session_state and get_agent_session_memory tools
2026-03-04 17:29:35 -08:00
Richard Tang f9e53f58af refactor: remove old get_agent_session_state and get_agent_session_memory tools 2026-03-04 17:23:10 -08:00
Timothy 41388efc31 fix: Windows compat — guard os.fchmod and remove deleted LLM_CREDENTIALS import
os.fchmod does not exist on Windows; guard with hasattr check.
Remove LLM_CREDENTIALS reference from test (module deleted in e1db3a4).
2026-03-04 17:22:21 -08:00
Timothy @aden fab5ce6fd0 Merge pull request #5824 from aden-hive/chore/fix-tool-tests
chore(micro-fix): fix test
2026-03-04 17:16:10 -08:00
Richard Tang b8be3056ed feat: list agent tool instruction 2026-03-04 17:03:45 -08:00
Timothy 207d6baee5 chore: fix test 2026-03-04 16:49:39 -08:00
Timothy @aden fec72bb2b6 Merge pull request #5294 from Antiarin/feat/hashline-edit-tool
[Integration]feat: add hashline anchor-based file editing tool
2026-03-04 16:38:13 -08:00
Richard Tang 39029b82d6 refactor: remove the coding agent check in quickstart 2026-03-04 16:27:33 -08:00
Richard Tang 232890b970 docs: remove the reference of the old agent skills 2026-03-04 16:23:30 -08:00
Timothy c4c4c24c59 Merge branch 'main' into feat/hashline-edit-tool 2026-03-04 16:23:07 -08:00
Richard Tang 13a8e28ae2 refactor: remove all old unused skills 2026-03-04 16:18:28 -08:00
Timothy @aden 2005ba2dca Merge pull request #5823 from aden-hive/micro-fix/lint
chore(micro-fix): lint
2026-03-04 16:11:51 -08:00
Richard Tang 34a44aa83c chore: update remaining reference for queen mode 2026-03-04 16:11:28 -08:00
Timothy 557d5fd6e5 chore: lint 2026-03-04 16:10:35 -08:00
Richard Tang 8468c45dc2 refactor: rename the queen mode to queen phase for clarity 2026-03-04 16:10:15 -08:00
Timothy @aden 79d2a15f95 Merge pull request #5814 from fermano/feature/windows-filesysten
Feature/windows filesystem
2026-03-04 16:07:33 -08:00
Richard Tang d2c3649566 refactor: re-organize the agent initialization tools 2026-03-04 16:06:00 -08:00
Timothy ab32e44128 style: ruff format fixes
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-04 16:00:13 -08:00
Bryan @ Aden 047059f85f Merge pull request #5774 from kostasuser01gr/docs/4780-roadmap-updates
docs: update roadmap to reflect completed features (refs #4780)
2026-03-04 23:59:56 +00:00
Timothy e8364f616d Merge remote-tracking branch 'origin/main' into feature/windows-filesysten 2026-03-04 15:59:49 -08:00
Bryan @ Aden 9098c9b6c6 Merge pull request #5785 from code-Miracle49/fix/remove-duplicate-execute-subagent
micro-fix: remove duplicate _execute_subagent method in EventLoopNode
2026-03-04 23:55:36 +00:00
Timothy 84fd9ebac8 style: fix E501 line-too-long lint errors
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-04 15:53:45 -08:00
Timothy @aden 23d5d76d56 Merge pull request #5822 from aden-hive/fix/anthropic-vendor-issue
fix: remove hardcoded Anthropic dependencies from core framework
2026-03-04 15:46:16 -08:00
Timothy b0c86588b6 chore: lint 2026-03-04 15:44:11 -08:00
Timothy 5aff1f9489 chore: lint 2026-03-04 15:43:59 -08:00
Timothy Zhang 199cb3d8cc fix: stdin conflicts 2026-03-04 14:45:02 -08:00
Fernando Mano a98a4ca0b6 feature(WindowsFilesystemSupport): #5677 - Windows File System Support and Testing -- fixing lint issues 2026-03-04 21:22:11 -03:00
Fernando Mano c4f49aadfa feature(WindowsFilesystemSupport): #5677 - Windows File System Support and Testing -- fixing lint issues 2026-03-04 21:20:33 -03:00
Fernando Mano ca5ac389cf feature(WindowsFilesystemSupport): #5677 - Windows File System Support and Testing -- fixing lint issues 2026-03-04 21:15:08 -03:00
Fernando Mano 7a658f7953 feature(WindowsFilesystemSupport): #5677 - Windows File System Support and Testing -- fixing lint issues 2026-03-04 21:05:49 -03:00
Timothy e05fc99da7 Merge branch 'main' into fix/anthropic-vendor-issue 2026-03-04 14:43:27 -08:00
Bryan @ Aden 787090667e Merge pull request #5816 from aden-hive/fix/pause-stop-worker
(micro-fix): update pause in pipeline uses stop_worker like queen
2026-03-04 21:50:23 +00:00
Antiarin 80b36b4052 fix: CRLF double-conversion in hashline edit and add large file skip reporting
- Replace joined.replace("\n", "\r\n") with re.sub(r"(?<!\r)\n", "\r\n", joined to prevent \r\n in replace op new_content from becoming \r\r\n (fixed in both hashline_edit.py and file_ops.py)
 - Track and report skipped large files in grep_search instead of silently skipping them
 - Extract HASHLINE_MAX_FILE_BYTES constant to hashline.py as single source of truth, imported by view_file, grep_search, hashline_edit, and file_ops
 - Add tests for CRLF replace op (both copies) and large file skip reporting
2026-03-05 03:12:35 +05:30
bryan 0b8ed521c0 fix:update pause in pipeline uses stop_worker like queen 2026-03-04 13:42:20 -08:00
Timothy 1c58ccb0c1 chore: lint 2026-03-04 12:45:27 -08:00
Timothy 79b80fe817 feat: coder tools to also support hashline editing 2026-03-04 12:41:07 -08:00
Antiarin c0f3841af7 feat: add file size check in grep_search to skip large files and switch case in hashline edit
- Implemented a check to skip files larger than 10MB in the grep_search function to optimize memory usage.
2026-03-04 12:41:07 -08:00
Antiarin 2b7d9bc471 feat:Updating the docs 2026-03-04 12:41:07 -08:00
Antiarin 98dc493a39 feat: Add cross-tool hashing anchor, grep search, and all viewfile 2026-03-04 12:41:07 -08:00
Antiarin cfaa57b28d feat:Add hashing tool 2026-03-04 12:40:25 -08:00
Timothy @aden 219e603de6 Merge pull request #5813 from aden-hive/refactor/quickstart
Refactor/quickstart
2026-03-04 12:27:45 -08:00
Timothy @aden 7663a5bce8 Merge pull request #5797 from Waryjustice/fix/windows-browser-auto-open
fix: browser auto-open after quickstart does not work on Windows
2026-03-04 12:27:35 -08:00
Timothy f2841b945d chore: lint 2026-03-04 12:24:08 -08:00
bryan faff64c413 chore: agents.md update 2026-03-04 12:12:27 -08:00
Timothy 6fbcdc1d87 fix: auto install node 20 2026-03-04 12:11:29 -08:00
bryan 69a11af949 chore: best effort alignment of windows quickstart 2026-03-04 11:43:50 -08:00
bryan 9ef272020e chore: added llm key health check 2026-03-04 11:35:12 -08:00
Richard Tang 71226d9625 fix: logger schema mismatch 2026-03-04 10:55:28 -08:00
bryan 258cfe7de5 chore: added easy way to update llm provider key 2026-03-04 10:42:57 -08:00
bryan 0d53b21133 chore: doc updates about hive open 2026-03-04 10:33:34 -08:00
Richard Tang 9102328d1c feat: make LLM logger by default on 2026-03-04 10:30:17 -08:00
Fernando Mano 704a0fd63a feature(WindowsFilesystemSupport): #5677 - Windows File System Support and Testing -- remove testing codeand prepare for PR 2026-03-04 15:09:06 -03:00
bryan 0ccb28ffab fix: enter to use previously configured 2026-03-04 10:05:59 -08:00
Fernando Mano bf4101ac38 feature(WindowsFilesystemSupport): #5677 - Windows File System Support and Testing -- remove testing codeand prepare for PR 2026-03-04 15:03:02 -03:00
bryan b30b571b44 chore: update recommended models 2026-03-04 09:54:29 -08:00
bryan bc44c3a401 chore: make gcu enabled by default 2026-03-04 09:52:42 -08:00
bryan 7fbf57cbb7 fix: linter update 2026-03-04 09:52:16 -08:00
Fernando Mano bc349e8fde feature(WindowsFilesystemSupport): #5677 - Windows File System Support and Testing -- remove testing codeand prepare for PR 2026-03-04 14:41:42 -03:00
bryan 67d094f51a fix: tool tests 2026-03-04 09:22:34 -08:00
bryan 873af04c6e fix: utilize mac keychain for claude code subscription 2026-03-04 09:22:12 -08:00
Shaurya Singh 2f0439dca8 Merge branch 'main' into fix/windows-browser-auto-open 2026-03-04 22:50:39 +05:30
Fernando Mano 8470c6a980 feature(WindowsFilesystemSupport): #5677 - Windows File System Support and Testing 2026-03-04 14:16:48 -03:00
bryan 1920192656 feat: hive open cmd 2026-03-04 08:55:18 -08:00
Waryjustice f56feaf821 fix: browser auto-open after quickstart does not work on Windows 2026-03-04 22:12:53 +05:30
Timothy @aden 4cbd5a4c6c Merge pull request #5786 from osb910/fix/charmap-decode-error
fix(core): add utf-8 encoding to backend open calls (micro-fix)
2026-03-04 08:39:10 -08:00
Timothy 65aa5629e8 chore: fix lint 2026-03-04 08:34:01 -08:00
Omar Shareef 7193d09bed formatting warning fix 2026-03-04 16:43:46 +02:00
Omar Shareef 49f8fae0b4 fix: systematically enforce UTF-8 encoding across tools and core to fix Windows charmap decode errors 2026-03-04 16:04:53 +02:00
Omar Shareef e1a490756e fix: systematically enforce UTF-8 encoding across tools and core to fix Windows charmap decode errors 2026-03-04 15:58:03 +02:00
code-Miracle49 c313ea7ee2 micro-fix: remove duplicate _execute_subagent method in EventLoopNode 2026-03-04 12:54:43 +01:00
Omar Shareef 91bfaf36e3 fix(core): add utf-8 encoding to backend open calls
This fixes a charmap decoding error on Windows when opening agent files without explicitly specifying the encoding.
2026-03-04 13:32:59 +02:00
kostasuser01gr 99d41d8cc6 docs: update roadmap to reflect completed features (refs #4780) 2026-03-04 08:37:14 +02:00
Timothy @aden 465adf5b1f Merge pull request #5767 from aden-hive/feat/integrations
Feat/integrations
2026-03-03 22:04:08 -08:00
RichardTang-Aden 132d00d166 Merge pull request #5769 from aden-hive/queen-mode-separation
Release / Create Release (push) Waiting to run
Queen mode separation: building, staging, and running modes
2026-03-03 21:31:23 -08:00
Richard Tang a604fee3aa chore: mode label update 2026-03-03 20:47:35 -08:00
Timothy 8018325923 style: fix all ruff lint errors (E501, E722, E741, F841)
- Break long lines (E501) across 25+ files
- Replace bare except with except Exception (E722)
- Rename ambiguous variable `l` to `item` (E741)
- Prefix unused variables with underscore (F841)
2026-03-03 20:42:30 -08:00
Richard Tang 3f86bd4009 chore: lint fix 2026-03-03 20:39:04 -08:00
Timothy b4cf10214b chore: lint issues 2026-03-03 20:38:30 -08:00
Bryan @ Aden c7818c2c33 Merge pull request #5766 from aden-hive/fix/credential-modal-delete
(micro-fix): Fix/credential modal delete
2026-03-04 04:38:23 +00:00
Timothy e421bcc326 chore: lint issues 2026-03-03 20:36:28 -08:00
Richard Tang 09e5a4dcc0 chore: frontend verbrige 2026-03-03 20:31:26 -08:00
Richard Tang ce08c44235 feat: improve ui indicator 2026-03-03 20:28:32 -08:00
Richard Tang e743234324 fix: strenghthen prompt to collect user intent 2026-03-03 20:23:53 -08:00
Timothy 9b76ac48b7 chore: new depedency 2026-03-03 20:23:10 -08:00
Richard Tang 6ae16345a8 fix: reference err from merging 2026-03-03 20:15:37 -08:00
Richard Tang 8daaf000b1 Merge remote-tracking branch 'origin/feat/question-widget' into queen-mode-separation 2026-03-03 20:09:10 -08:00
Richard Tang 273f411eee feat: replace the reload agent to stop worker 2026-03-03 20:01:27 -08:00
Richard Tang 6929cecf8a fix: tag for frontend 2026-03-03 19:53:18 -08:00
Richard Tang 9221a7ff03 Merge remote-tracking branch 'origin/queen-mode-separation' into queen-mode-separation 2026-03-03 19:43:33 -08:00
Richard Tang a6089c5b3b feat: returning queen bee status when starting session 2026-03-03 19:43:04 -08:00
Richard Tang a7ee972b32 feat: enable the frontend to cancel the current queen run and sync queen mode 2026-03-03 19:30:55 -08:00
Richard Tang c817989b99 feat: allow frontend change to control mode 2026-03-03 19:29:33 -08:00
Richard Tang 2272a6854c refactor: consolidate discorver_mcp_tools and list_agent_tools 2026-03-03 19:08:58 -08:00
Timothy 040fc1ee8d feat: corrected agent generation guidelines 2026-03-03 18:53:40 -08:00
Richard Tang f00b8d7b8c fix: update the initial state condition 2026-03-03 18:35:24 -08:00
Timothy @aden 6c8c6d7048 Merge pull request #5234 from Antiarin/fix/guardian-self-trigger-loop
fix(tui): fix pause/stop to cancel all running tasks across all graphs
2026-03-03 18:17:15 -08:00
Richard Tang f27ef52c7a feat: update queen initial state 2026-03-03 18:15:51 -08:00
Richard Tang 0a2ff1db97 feat: new queen stages and tools 2026-03-03 18:07:47 -08:00
Timothy 6da48eac6f feat: split tool loading into verified and unverified tiers
register_all_tools() now only loads verified (stable) tools by default.
Pass include_unverified=True to also load new/community integrations.
This prevents unverified tools from being loaded in production.

Also fixes duplicate register_brevo and register_pushover calls.
2026-03-03 17:54:45 -08:00
Timothy 638ff04e24 fix: remove duplicate community tool directories and fix credential wiring
- Remove s3_tool (duplicate of aws_s3_tool), power_bi_tool (duplicate of
  powerbi_tool), x_tool (duplicate of twitter_tool)
- Remove integrations/plaid (duplicate of plaid_tool), integrations/sap_s4hana
  (duplicate of sap_tool), stray tools/mssql.py
- Add help key to credential error responses across 14 tool modules
- Fix health checker registry keys (calendly -> calendly_pat, lusha -> lusha_api_key)
- Add health_check_endpoint to calendly and lusha credential specs
- Fix Trello env var (TRELLO_TOKEN -> TRELLO_API_TOKEN) and remove duplicate
  Trello specs from hubspot.py
- Add credential_group="aws" to AWS S3 and Redshift specs sharing env vars
- Update conftest UNREGISTERED_COMMUNITY_MODULES to only contain mssql_tool
2026-03-03 17:46:28 -08:00
Timothy d7075b459b fix: cleanse llm conversations 2026-03-03 17:44:21 -08:00
bryan d0e7aa14b6 fix: hide delete button for Aden-managed credentials 2026-03-03 17:36:04 -08:00
bryan 59fee56c54 fix: share server credential store with runner to avoid redundant Aden syncs 2026-03-03 17:35:24 -08:00
bryan 2207306169 fix: resolve MCP server cwd from repo root instead of agent path 2026-03-03 17:34:51 -08:00
Richard Tang 8ff2e91f2d feat: add queen agent building and running mode switching 2026-03-03 16:01:41 -08:00
Richard Tang 61afaa4c8b feat: add uv instruction to agents 2026-03-03 14:51:58 -08:00
Richard Tang 0de47dbc3f feat: agents.md for agent collaboration 2026-03-03 14:51:58 -08:00
Richard Tang 676ef56134 fix: mcp path 2026-03-03 14:51:58 -08:00
Richard Tang f0899bb35d feat: use send instead of draft for email reply agent 2026-03-03 14:51:58 -08:00
Richard Tang f490038e36 chore: move the email reply sample agent 2026-03-03 14:51:58 -08:00
Richard Tang cbf220eb00 feat: email reply sample agent 2026-03-03 14:51:58 -08:00
Richard Tang bf0d80ea20 docs: reorder section in documentation 2026-03-03 14:51:58 -08:00
Richard Tang 3ae889a6f8 docs: add running screenshot and update the coding agent instruction 2026-03-03 14:51:58 -08:00
Richard Tang 03ca1067ac docs: sync all i18n READMEs with primary README 2026-03-03 14:51:58 -08:00
Richard Tang 3cda30a40a docs: update the latest features from recent changes 2026-03-03 14:51:58 -08:00
Richard Tang 26934527b9 docs: update readme instructions 2026-03-03 14:51:58 -08:00
Richard Tang 2619acde22 docs: remove TUI in the readme 2026-03-03 14:51:58 -08:00
Richard Tang b983d3cfd2 chore: ignore local dev skills 2026-03-03 14:51:58 -08:00
Richard Tang 87a9dd15fe fix: load-new-session from home 2026-03-03 14:51:58 -08:00
RichardTang-Aden 4066962ade Merge pull request #5751 from aden-hive/load-new-session-from-home
Fix new session from home and add email reply agent template
2026-03-03 14:48:17 -08:00
Richard Tang 0f26e34f09 fix: improve the reply template 2026-03-03 14:45:07 -08:00
Richard Tang d76e436e3d fix: new session should have their own id 2026-03-03 14:44:51 -08:00
Timothy 4ff531dec7 fix: update expected health checkers set (add calendly, zoho_crm) 2026-03-03 14:10:34 -08:00
Timothy 4f8b3d7aff fix: update credential specs for community Linear/Trello tools, skip unregistered community modules 2026-03-03 14:09:04 -08:00
Timothy 210fa9c474 fix: use community Brevo implementation (6 tools), remove orphaned x_tool test 2026-03-03 14:06:00 -08:00
Timothy 25361cac8c fix: align tests with community implementations, revert Reddit to httpx (praw unavailable) 2026-03-03 14:02:33 -08:00
Timothy 28defebd6d fix: remove community youtube_transcript tool.py requiring uninstalled SDK 2026-03-03 13:58:45 -08:00
Timothy c74381619e Merge branch 'feature/queen-worker-comm' into feat/question-widget 2026-03-03 13:57:52 -08:00
Timothy d58f3103dd fix: guard register_tools for s3_tool and mssql_tool when SDK not available 2026-03-03 13:54:46 -08:00
Timothy 5d1ed35660 fix: remove shell heredoc artifacts from community power_bi_tool 2026-03-03 13:52:20 -08:00
Timothy 1f3e305534 fix: guard optional SDK imports (boto3, pyodbc) and remove s3_tool registration 2026-03-03 13:51:04 -08:00
Timothy 7d8fdd279c fix: revert Asana to httpx-based implementation (asana SDK not available) 2026-03-03 13:33:35 -08:00
Timothy cacae9f290 fix: compaction logics 2026-03-03 13:33:01 -08:00
Timothy bb061b770f merge: incorporate QuickBooks community PR #4158
# Conflicts:
#	examples/templates/deep_research_agent/config.py
#	examples/templates/tech_news_reporter/config.py
#	tools/README.md
#	tools/src/aden_tools/credentials/__init__.py
#	tools/src/aden_tools/credentials/quickbooks.py
#	tools/src/aden_tools/tools/__init__.py
#	tools/src/aden_tools/tools/quickbooks_tool/__init__.py
#	tools/src/aden_tools/tools/quickbooks_tool/quickbooks_tool.py
#	tools/tests/tools/test_quickbooks_tool.py
2026-03-03 13:27:04 -08:00
Timothy a8768b9ed6 merge: incorporate MSSQL community PR #4200
# Conflicts:
#	tools/pyproject.toml
#	tools/src/aden_tools/credentials/integrations.py
#	tools/src/aden_tools/tools/__init__.py
2026-03-03 13:26:36 -08:00
Timothy b437aa5f6c merge: incorporate Linear community PR #3585
# Conflicts:
#	.claude/skills/hive-credentials/SKILL.md
#	tools/README.md
#	tools/src/aden_tools/tools/__init__.py
#	tools/src/aden_tools/tools/linear_tool/__init__.py
#	tools/src/aden_tools/tools/linear_tool/linear_tool.py
2026-03-03 13:24:57 -08:00
Timothy 9248182570 merge: incorporate Trello community PR #3376
# Conflicts:
#	tools/README.md
#	tools/src/aden_tools/tools/__init__.py
#	tools/src/aden_tools/tools/trello_tool/__init__.py
#	tools/src/aden_tools/tools/trello_tool/trello_tool.py
#	tools/tests/tools/test_trello_tool.py
2026-03-03 13:24:23 -08:00
bryan 511c1a6ed5 fix: update queen prompt around ask_user 2026-03-03 13:22:59 -08:00
Timothy 7c77c7170f merge: incorporate YouTube Transcript community PR #3520
# Conflicts:
#	tools/pyproject.toml
#	tools/src/aden_tools/tools/__init__.py
2026-03-03 13:22:46 -08:00
Timothy 85fcb6516c merge: incorporate Redshift community PR #3533
# Conflicts:
#	tools/pyproject.toml
#	tools/src/aden_tools/tools/__init__.py
#	tools/src/aden_tools/tools/redshift_tool/__init__.py
#	tools/src/aden_tools/tools/redshift_tool/redshift_tool.py
#	tools/tests/tools/test_redshift_tool.py
2026-03-03 13:17:41 -08:00
Timothy e8e76d85f7 merge: incorporate Pushover community PR #5424
# Conflicts:
#	tools/src/aden_tools/tools/pushover_tool/__init__.py
#	tools/src/aden_tools/tools/pushover_tool/pushover_tool.py
2026-03-03 13:17:18 -08:00
Timothy 5aaa5ae4d5 merge: incorporate Twitter/X community PR #3807
# Conflicts:
#	tools/src/aden_tools/credentials/__init__.py
#	tools/src/aden_tools/tools/__init__.py
#	tools/tests/test_credentials.py
2026-03-03 13:16:45 -08:00
Timothy c3a8ee9c7b merge: incorporate Calendly community PR #3947
# Conflicts:
#	tools/src/aden_tools/credentials/__init__.py
#	tools/src/aden_tools/credentials/calendly.py
#	tools/src/aden_tools/tools/__init__.py
#	tools/src/aden_tools/tools/calendly_tool/__init__.py
#	tools/src/aden_tools/tools/calendly_tool/calendly_tool.py
#	tools/tests/test_health_checks.py
#	tools/tests/tools/test_calendly_tool.py
2026-03-03 13:14:20 -08:00
Timothy 5d07a8aba5 merge: incorporate Airtable community PR #3953
# Conflicts:
#	tools/src/aden_tools/credentials/__init__.py
#	tools/src/aden_tools/credentials/airtable.py
#	tools/src/aden_tools/credentials/health_check.py
#	tools/src/aden_tools/tools/__init__.py
#	tools/src/aden_tools/tools/airtable_tool/__init__.py
#	tools/src/aden_tools/tools/airtable_tool/airtable_tool.py
#	tools/tests/test_health_checks.py
#	tools/tests/tools/test_airtable_tool.py
2026-03-03 13:13:47 -08:00
Timothy d18e0594b8 merge: incorporate Reddit community PR #3963
# Conflicts:
#	tools/pyproject.toml
#	tools/src/aden_tools/credentials/__init__.py
#	tools/src/aden_tools/credentials/health_check.py
#	tools/src/aden_tools/credentials/reddit.py
#	tools/src/aden_tools/tools/__init__.py
#	tools/src/aden_tools/tools/reddit_tool/__init__.py
#	tools/src/aden_tools/tools/reddit_tool/reddit_tool.py
#	tools/tests/tools/test_reddit_tool.py
#	uv.lock
2026-03-03 13:12:55 -08:00
Timothy 26dcc86a24 merge: incorporate Zoho CRM community PR #4713
# Conflicts:
#	tools/src/aden_tools/credentials/__init__.py
#	tools/src/aden_tools/tools/__init__.py
#	tools/src/aden_tools/tools/zoho_crm_tool/__init__.py
#	tools/src/aden_tools/tools/zoho_crm_tool/zoho_crm_tool.py
#	tools/tests/test_health_checks.py
2026-03-03 13:11:51 -08:00
Timothy e928ad19e5 merge: incorporate Lusha community PR #4714
# Conflicts:
#	tools/src/aden_tools/credentials/__init__.py
#	tools/src/aden_tools/credentials/lusha.py
#	tools/src/aden_tools/tools/__init__.py
#	tools/src/aden_tools/tools/lusha_tool/__init__.py
#	tools/src/aden_tools/tools/lusha_tool/lusha_tool.py
#	tools/tests/tools/test_lusha_tool.py
2026-03-03 13:11:33 -08:00
Timothy 6768aaa575 merge: incorporate Apify community PR #4770
# Conflicts:
#	tools/src/aden_tools/credentials/__init__.py
#	tools/src/aden_tools/credentials/apify.py
#	tools/src/aden_tools/tools/__init__.py
#	tools/src/aden_tools/tools/apify_tool/__init__.py
#	tools/src/aden_tools/tools/apify_tool/apify_tool.py
#	tools/tests/tools/test_apify_tool.py
2026-03-03 13:10:45 -08:00
Timothy f561aacbfc merge: incorporate Attio community PR #4832
# Conflicts:
#	tools/src/aden_tools/credentials/__init__.py
#	tools/src/aden_tools/credentials/attio.py
#	tools/src/aden_tools/tools/__init__.py
#	tools/src/aden_tools/tools/attio_tool/__init__.py
#	tools/src/aden_tools/tools/attio_tool/attio_tool.py
2026-03-03 13:10:09 -08:00
RichardTang-Aden af1ece40c2 Merge pull request #5742 from aden-hive/load-new-session-from-home
Load new session from home
2026-03-03 13:09:44 -08:00
Timothy d9edd7adf7 merge: incorporate Asana community PR #4857
# Conflicts:
#	tools/src/aden_tools/credentials/__init__.py
#	tools/src/aden_tools/credentials/asana.py
#	tools/src/aden_tools/tools/__init__.py
#	tools/src/aden_tools/tools/asana_tool/__init__.py
#	tools/tests/tools/test_asana_tool.py
2026-03-03 13:08:30 -08:00
Richard Tang 3541fab363 feat: add uv instruction to agents 2026-03-03 13:06:50 -08:00
Richard Tang 1160dceeff feat: agents.md for agent collaboration 2026-03-03 13:06:09 -08:00
bryan bbe8efeba2 fix: prevent queen auto-block from overwriting pending worker questions 2026-03-03 13:04:33 -08:00
Timothy b4a5323009 merge: incorporate Brevo community PR #5136
# Conflicts:
#	tools/src/aden_tools/credentials/__init__.py
#	tools/src/aden_tools/credentials/brevo.py
#	tools/src/aden_tools/tools/brevo_tool/__init__.py
#	tools/src/aden_tools/tools/brevo_tool/brevo_tool.py
2026-03-03 13:04:29 -08:00
Timothy ade8b5b9a7 merge: incorporate Databricks community PR #5428
# Conflicts:
#	tools/src/aden_tools/credentials/__init__.py
#	tools/src/aden_tools/credentials/databricks.py
#	tools/src/aden_tools/tools/__init__.py
#	tools/src/aden_tools/tools/databricks_tool/__init__.py
#	tools/src/aden_tools/tools/databricks_tool/databricks_tool.py
#	tools/tests/tools/test_databricks_tool.py
2026-03-03 13:02:30 -08:00
Timothy e4ace3d484 merge: incorporate YouTube community PR #5673 (resolve conflicts, preserve README) 2026-03-03 12:29:32 -08:00
Timothy f3dd25adc5 merge: incorporate Power BI community PR #4341 2026-03-03 12:27:06 -08:00
Timothy ec251f8168 merge: incorporate SAP S/4HANA community PR #5519 2026-03-03 12:27:02 -08:00
Timothy 1bb9579dc5 merge: incorporate Plaid community PR #5518 2026-03-03 12:26:56 -08:00
Timothy 7ebf4146ce merge: incorporate AWS S3 community PR #5521 2026-03-03 12:26:50 -08:00
Richard Tang a8db4cb2f5 fix: mcp path 2026-03-03 12:19:32 -08:00
Richard Tang 24433396dd feat: use send instead of draft for email reply agent 2026-03-03 12:04:44 -08:00
Richard Tang 02bdf17641 chore: move the email reply sample agent 2026-03-03 11:59:14 -08:00
Timothy e0e05f3488 chore: register Obsidian tool in tool/credential registries 2026-03-03 11:55:12 -08:00
Timothy c92f2510c8 test: add Obsidian tool unit tests (read, write, append, search, list, active) 2026-03-03 11:55:12 -08:00
Timothy ea1fbe9ee1 chore: add Obsidian credential spec (REST API key) 2026-03-03 11:55:11 -08:00
Timothy 84a0be0179 feat: add Obsidian knowledge management integration (#3741)
6 tools: obsidian_read_note, obsidian_write_note, obsidian_append_note,
obsidian_search, obsidian_list_files, obsidian_get_active.
Uses Local REST API plugin with Bearer token auth. Supports vault
browsing, full-text search, and note CRUD with frontmatter metadata.
2026-03-03 11:55:04 -08:00
RichardTang-Aden 54f5c0dc91 Merge pull request #5735 from aden-hive/docs/readme/v6
docs: reorder section in documentation
2026-03-03 11:54:09 -08:00
Richard Tang adf1a10318 docs: reorder section in documentation 2026-03-03 11:53:05 -08:00
RichardTang-Aden e2a679a265 Merge pull request #5734 from aden-hive/docs/readme/v6
docs: add running screenshot and update the coding agent instruction
2026-03-03 11:50:56 -08:00
Richard Tang a3916a6932 docs: add running screenshot and update the coding agent instruction 2026-03-03 11:49:19 -08:00
Timothy 1b5780461e chore: register Langfuse tool in tool/credential registries 2026-03-03 11:42:49 -08:00
Timothy c8d35b63a4 test: add Langfuse tool unit tests (traces, scores, prompts) 2026-03-03 11:42:49 -08:00
Timothy feb1ebae04 chore: add Langfuse credential specs (public key, secret key) 2026-03-03 11:42:48 -08:00
Timothy efe49d0a5b feat: add Langfuse LLM observability integration (#5322)
6 tools: langfuse_list_traces, langfuse_get_trace, langfuse_list_scores,
langfuse_create_score, langfuse_list_prompts, langfuse_get_prompt.
Uses HTTP Basic Auth with public/secret key pair. Supports cloud and
self-hosted instances with offset-based pagination.
2026-03-03 11:41:11 -08:00
Timothy e50a5ea22a chore: register Zoom and n8n tools in tool/credential registries 2026-03-03 11:31:25 -08:00
Timothy 6382c94d0a test: add n8n tool unit tests (workflows, executions, activate/deactivate) 2026-03-03 11:31:21 -08:00
Timothy 58ce84c9cc chore: add n8n credential specs (API key, base URL) 2026-03-03 11:31:20 -08:00
Timothy 08fd6ff765 feat: add n8n workflow automation integration (#2931)
6 tools: n8n_list_workflows, n8n_get_workflow, n8n_activate_workflow,
n8n_deactivate_workflow, n8n_list_executions, n8n_get_execution.
Uses X-N8N-API-KEY header auth with configurable base URL.
Supports cursor-based pagination and execution status filtering.
2026-03-03 11:31:15 -08:00
Timothy a9cb79909c test: add Zoom tool unit tests (user, meetings, recordings) 2026-03-03 11:31:07 -08:00
Timothy 852f8ccd94 chore: add Zoom credential spec (Server-to-Server OAuth token) 2026-03-03 11:31:07 -08:00
Timothy 9388ef3e99 feat: add Zoom meeting management integration (#2867)
6 tools: zoom_get_user, zoom_list_meetings, zoom_get_meeting,
zoom_create_meeting, zoom_delete_meeting, zoom_list_recordings.
Uses Server-to-Server OAuth Bearer token. Supports token-based
pagination and cloud recording retrieval by date range.
2026-03-03 11:31:00 -08:00
Timothy 04afb0c4bb chore: register Salesforce and Shopify tools in tool/credential registries 2026-03-03 11:22:40 -08:00
Timothy a07fd44de3 test: add Shopify tool unit tests (orders, products, customers, search) 2026-03-03 11:22:35 -08:00
Timothy f6c1b13846 chore: add Shopify credential specs (access token, store name) 2026-03-03 11:22:35 -08:00
Timothy 654fa3dd1f feat: add Shopify Admin REST API integration - orders, products, customers (#2984)
6 tools: shopify_list_orders, shopify_get_order, shopify_list_products,
shopify_get_product, shopify_list_customers, shopify_search_customers.
Uses X-Shopify-Access-Token header auth with store subdomain.
2026-03-03 11:22:29 -08:00
Timothy 8183449d27 test: add Salesforce CRM tool unit tests (SOQL, CRUD, describe, list objects) 2026-03-03 11:22:16 -08:00
Timothy a9acfb86ad chore: add Salesforce credential specs (access token, instance URL) 2026-03-03 11:22:15 -08:00
Timothy d7d070ac5f feat: add Salesforce CRM integration - SOQL, records, and metadata (#2916)
6 tools: salesforce_soql_query, salesforce_get_record, salesforce_create_record,
salesforce_update_record, salesforce_describe_object, salesforce_list_objects.
Uses OAuth2 Bearer token auth with instance URL. Supports pagination via
nextRecordsUrl and field-level describe with picklist values.
2026-03-03 11:22:08 -08:00
RichardTang-Aden ead51f1eb6 Merge pull request #5732 from aden-hive/docs/readme/v6
docs: update README and sync all i18n translations
2026-03-03 11:19:06 -08:00
Timothy 8c01b573ce chore: register Redshift and SAP S/4HANA in tool/credential registries 2026-03-03 11:11:12 -08:00
Timothy 7744f21b9d test: add SAP S/4HANA tool unit tests (POs, partners, products, sales orders) 2026-03-03 11:11:08 -08:00
Timothy 9ed23a235f chore: add SAP S/4HANA credential specs (base URL, username, password) 2026-03-03 11:11:07 -08:00
Timothy e88328321f feat: add SAP S/4HANA Cloud read-only procurement integration (#3182) 2026-03-03 11:11:06 -08:00
Timothy a4c516bea1 test: add Redshift tool unit tests (execute, describe, results, databases, tables) 2026-03-03 11:11:00 -08:00
Timothy 1c932a04ef chore: add Redshift credential specs (AWS access key, secret key) 2026-03-03 11:11:00 -08:00
Timothy 76d34be4c2 feat: add Amazon Redshift Data API integration - SQL and schema browsing (#3267) 2026-03-03 11:10:59 -08:00
bryan cb0e9ff9ec chore: fixing tests 2026-03-03 11:07:49 -08:00
Timothy d6e8afe316 chore: register Azure SQL and Kafka in tool/credential registries 2026-03-03 11:03:31 -08:00
Timothy a04f2bcf99 test: add Kafka tool unit tests (topics, produce, consumer groups) 2026-03-03 11:03:27 -08:00
Timothy c138e7c638 chore: add Kafka credential specs (REST URL, cluster ID) 2026-03-03 11:03:27 -08:00
Timothy fc08c7007f feat: add Apache Kafka integration via Confluent REST Proxy (#4774) 2026-03-03 11:03:26 -08:00
Timothy d559bb3446 test: add Azure SQL tool unit tests (servers, databases, firewall rules) 2026-03-03 11:03:18 -08:00
Timothy 55a8c39e4b chore: add Azure SQL credential specs (token, subscription ID) 2026-03-03 11:03:17 -08:00
Timothy 02d6f10e5f feat: add Azure SQL Database management integration (#3377) 2026-03-03 11:03:16 -08:00
Timothy 77428a91cc chore: register Power BI and Snowflake in tool/credential registries 2026-03-03 10:56:46 -08:00
Timothy 51403dc276 test: add Snowflake tool unit tests (execute, status, cancel) 2026-03-03 10:56:43 -08:00
Timothy 914a07a35d chore: add Snowflake credential specs (account, token) 2026-03-03 10:56:42 -08:00
Timothy 3c70d7b424 feat: add Snowflake SQL REST API integration (#3230) 2026-03-03 10:56:41 -08:00
Timothy ce1ee4ff17 test: add Power BI tool unit tests (workspaces, datasets, reports, refresh) 2026-03-03 10:56:35 -08:00
Timothy fca41d9bda chore: add Power BI credential spec (POWERBI_ACCESS_TOKEN) 2026-03-03 10:56:34 -08:00
Timothy ff889e02f7 feat: add Power BI integration - workspaces, datasets, reports (#3973) 2026-03-03 10:56:34 -08:00
Richard Tang cbd2c86bbf docs: sync all i18n READMEs with primary README 2026-03-03 10:53:11 -08:00
Timothy 43ab460462 chore: register Terraform Cloud and Lusha in tool/credential registries 2026-03-03 10:49:21 -08:00
Timothy caa06e266b test: add Lusha tool unit tests (enrich, search, usage) 2026-03-03 10:49:17 -08:00
Timothy 3622ca78ee chore: add Lusha credential spec (LUSHA_API_KEY) 2026-03-03 10:49:17 -08:00
Timothy 019e3f9659 feat: add Lusha B2B contact and company enrichment integration (#3461) 2026-03-03 10:49:16 -08:00
Timothy 208cb579a2 test: add Terraform Cloud tool unit tests (workspaces, runs) 2026-03-03 10:49:09 -08:00
Timothy 17de7e4485 chore: add Terraform Cloud credential spec (TFC_TOKEN) 2026-03-03 10:49:08 -08:00
Timothy 810616eee1 feat: add Terraform Cloud integration - workspaces and runs (#4773) 2026-03-03 10:48:41 -08:00
Timothy 191f583669 chore: register Twitter/X and Tines in tool/credential registries 2026-03-03 10:35:46 -08:00
Timothy 1d638cc18e test: add Tines tool unit tests (stories, actions, logs) 2026-03-03 10:35:42 -08:00
Timothy 3efa1f3b88 chore: add Tines credential specs (domain, api_key) 2026-03-03 10:35:42 -08:00
Timothy 4daa33db09 feat: add Tines integration - security automation stories and actions
Implements 5 tools via Tines REST API:
- tines_list_stories: List workflow stories with search/filter
- tines_get_story: Get story details including entry/exit agents
- tines_list_actions: List actions (agents) in stories
- tines_get_action: Get action details with sources/receivers
- tines_get_action_logs: Get action execution logs by level

Uses Bearer token auth with tenant domain.
2026-03-03 10:35:37 -08:00
Timothy fab2fb0056 test: add Twitter/X tool unit tests (search, user, timeline, tweet) 2026-03-03 10:35:29 -08:00
Timothy ce885c120e chore: add Twitter/X credential spec (bearer_token) 2026-03-03 10:35:28 -08:00
Timothy 75b53c47ff feat: add Twitter/X integration - tweet search and user lookup via API v2
Implements 4 tools via X API v2:
- twitter_search_tweets: Search recent tweets with query operators
- twitter_get_user: Get user profile by username
- twitter_get_user_tweets: Get user timeline
- twitter_get_tweet: Get tweet details by ID

Uses Bearer token auth (app-only, read access).
2026-03-03 10:35:21 -08:00
Timothy 2936f73707 chore: register AWS S3 and QuickBooks in tool/credential registries 2026-03-03 10:22:46 -08:00
Timothy e26426b138 test: add QuickBooks tool unit tests (query, entities, invoices) 2026-03-03 10:22:42 -08:00
Timothy 62cacb8e28 chore: add QuickBooks credential specs (access_token, realm_id) 2026-03-03 10:22:42 -08:00
Timothy f3e37190ce feat: add QuickBooks Online integration - accounting API
Implements 5 tools via QuickBooks Online API v3:
- quickbooks_query: Query entities with SQL-like syntax
- quickbooks_get_entity: Get entity by type and ID
- quickbooks_create_customer: Create customers
- quickbooks_create_invoice: Create invoices with line items
- quickbooks_get_company_info: Get company details

Uses OAuth 2.0 Bearer token auth. Supports sandbox mode.
2026-03-03 10:22:35 -08:00
Timothy 0863bbbd2f test: add AWS S3 tool unit tests (buckets, objects, get, put, delete) 2026-03-03 10:22:25 -08:00
Timothy b23fa1daad chore: add AWS S3 credential specs (access_key_id, secret_access_key) 2026-03-03 10:22:24 -08:00
Timothy 05cc1ce599 feat: add AWS S3 integration - object storage via REST API with SigV4
Implements 5 tools via AWS S3 REST API:
- s3_list_buckets: List all buckets in the account
- s3_list_objects: List objects with prefix/delimiter filtering
- s3_get_object: Get object content and metadata
- s3_put_object: Upload text objects
- s3_delete_object: Delete objects

Uses AWS Signature V4 signing (no boto3 dependency).
2026-03-03 10:22:16 -08:00
RichardTang-Aden a1c045fd91 Merge pull request #5727 from aden-hive/docs/readme/v6
Docs: Remove TUI references from README
2026-03-03 10:14:13 -08:00
Timothy e6939f8d51 chore: register PagerDuty and Calendly in tool/credential registries 2026-03-03 10:13:18 -08:00
Timothy 801fef12e1 test: add Calendly tool unit tests (user, events, invitees) 2026-03-03 10:13:14 -08:00
Timothy 5845629175 chore: add Calendly credential spec (personal_access_token) 2026-03-03 10:13:13 -08:00
Timothy 11b916301a feat: add Calendly integration - scheduling events and invitees
Implements 5 tools via Calendly API v2:
- calendly_get_current_user: Get user URI and profile info
- calendly_list_event_types: List meeting templates
- calendly_list_scheduled_events: List booked meetings with date filters
- calendly_get_scheduled_event: Get event details by URI
- calendly_list_invitees: List invitees for an event

Uses Bearer token auth (Personal Access Token).
2026-03-03 10:13:07 -08:00
Timothy aa5d80b1d2 test: add PagerDuty tool unit tests (incidents, services) 2026-03-03 10:13:02 -08:00
Timothy aa5f990acd chore: add PagerDuty credential specs (api_key, from_email) 2026-03-03 10:13:01 -08:00
Timothy 9764c82c2a feat: add PagerDuty integration - incident management and services
Implements 5 tools via PagerDuty REST API v2:
- pagerduty_list_incidents: List incidents with status/urgency/date filters
- pagerduty_get_incident: Get incident details by ID
- pagerduty_create_incident: Create incidents on a service
- pagerduty_update_incident: Acknowledge or resolve incidents
- pagerduty_list_services: List services with name search

Uses Token auth header, From header for write operations.
2026-03-03 10:12:55 -08:00
Richard Tang f921846879 docs: update the latest features from recent changes 2026-03-03 10:12:43 -08:00
Richard Tang a370403b16 docs: update readme instructions 2026-03-03 10:06:13 -08:00
Timothy 543a71eb6c chore: register MongoDB and Airtable in tool/credential registries 2026-03-03 10:06:12 -08:00
Timothy 8285593c13 test: add Airtable tool unit tests (records, bases, schema) 2026-03-03 10:06:08 -08:00
Timothy 6fbfe773fb chore: add Airtable credential spec (personal_access_token) 2026-03-03 10:06:07 -08:00
Timothy a8c54b1e5f feat: add Airtable integration - record CRUD and base metadata
Implements 6 tools via Airtable Web API:
- airtable_list_records: List records with filters, sort, field selection
- airtable_get_record: Get a single record by ID
- airtable_create_records: Create up to 10 records per request
- airtable_update_records: Partial update up to 10 records per request
- airtable_list_bases: List accessible bases
- airtable_get_base_schema: Get table and field schema for a base

Uses Bearer token auth (Personal Access Token).
2026-03-03 10:06:03 -08:00
Timothy a5323abfca test: add MongoDB tool unit tests (find, insert, update, delete, aggregate) 2026-03-03 10:05:53 -08:00
Timothy ba4df2d2c4 chore: add MongoDB credential specs (data_api_url, api_key, data_source) 2026-03-03 10:05:52 -08:00
Timothy 6510633a8c feat: add MongoDB Atlas Data API integration - document CRUD and aggregation
Implements 6 tools via MongoDB Atlas Data API:
- mongodb_find: Find documents with filters, projection, sort, limit
- mongodb_find_one: Find a single document
- mongodb_insert_one: Insert a document
- mongodb_update_one: Update a document with MongoDB operators
- mongodb_delete_one: Delete a document
- mongodb_aggregate: Run aggregation pipelines

Uses API key auth header. All endpoints are POST.
2026-03-03 10:05:42 -08:00
Timothy 9172e5f46b chore: register Twilio and Zendesk in tool/credential registries 2026-03-03 09:56:14 -08:00
Timothy ed3e3848c0 test: add Zendesk tool unit tests (list, get, create, update, search) 2026-03-03 09:56:10 -08:00
Timothy ee90185d5c chore: add Zendesk credential specs (subdomain, email, api_token) 2026-03-03 09:56:09 -08:00
Timothy 6eb2633677 feat: add Zendesk integration - ticket management and search
Implements 5 tools via Zendesk Support API v2:
- zendesk_list_tickets: List tickets with status/sort filters
- zendesk_get_ticket: Get ticket details by ID
- zendesk_create_ticket: Create tickets with priority/type/tags
- zendesk_update_ticket: Update ticket fields and add comments
- zendesk_search_tickets: Search tickets with Zendesk query syntax

Uses Basic auth (email/token:api_token).
2026-03-03 09:56:00 -08:00
Timothy c1f215dcf2 test: add Twilio tool unit tests (SMS, WhatsApp, list, get) 2026-03-03 09:55:50 -08:00
Timothy 97cc9a1045 chore: add Twilio credential specs (account_sid, auth_token) 2026-03-03 09:55:49 -08:00
Timothy 5f7b02a4b7 feat: add Twilio integration - SMS and WhatsApp messaging
Implements 4 tools via Twilio REST API:
- twilio_send_sms: Send SMS messages
- twilio_send_whatsapp: Send WhatsApp messages
- twilio_list_messages: List message history with filters
- twilio_get_message: Get message details by SID

Uses Basic auth (AccountSID:AuthToken), form-urlencoded POST.
2026-03-03 09:55:43 -08:00
Richard Tang ad6d504ea4 docs: remove TUI in the readme 2026-03-03 09:52:06 -08:00
Timothy e696b41a0e chore: register GitLab and Google Sheets in tool/credential registries 2026-03-03 09:49:23 -08:00
Timothy 1f9acc6135 test: add Google Sheets tool unit tests (metadata, read, batch read) 2026-03-03 09:49:23 -08:00
Timothy 7e8699cb4b chore: add Google Sheets credential spec (api_key) 2026-03-03 09:49:22 -08:00
Timothy fd4fc657d6 feat: add Google Sheets integration - read spreadsheet data via API v4
3 tools: sheets_get_spreadsheet, sheets_read_range, sheets_batch_read.
Uses API key auth for read-only access to public spreadsheets.
2026-03-03 09:49:21 -08:00
Timothy 34403648b9 test: add GitLab tool unit tests (projects, issues, MRs) 2026-03-03 09:49:15 -08:00
Timothy 3795d50eb9 chore: add GitLab credential spec (personal access token) 2026-03-03 09:49:14 -08:00
Timothy 80515dde5a feat: add GitLab integration - projects, issues, merge requests
6 tools: gitlab_list_projects, gitlab_get_project, gitlab_list_issues,
gitlab_get_issue, gitlab_create_issue, gitlab_list_merge_requests.
Supports GitLab.com and self-hosted via configurable base URL.
2026-03-03 09:49:13 -08:00
Timothy b59094d35f fix: queen should not return on empty stream 2026-03-03 09:44:15 -08:00
Timothy efcd296d83 chore: register Notion and Jira tools in tool/credential registries 2026-03-03 09:43:32 -08:00
Timothy 802cb292b0 test: add Jira tool unit tests (issues, projects, comments) 2026-03-03 09:43:32 -08:00
Timothy 8e55f74d73 chore: add Jira credential specs (domain, email, api_token) 2026-03-03 09:43:31 -08:00
Timothy 3d810485a0 feat: add Jira integration - issues, projects, comments via REST API v3
6 tools: jira_search_issues, jira_get_issue, jira_create_issue,
jira_list_projects, jira_get_project, jira_add_comment. Uses Basic auth
with email + API token and Atlassian Document Format for text fields.
2026-03-03 09:43:30 -08:00
Timothy 94cfd48661 test: add Notion tool unit tests (search, pages, databases) 2026-03-03 09:43:16 -08:00
Timothy 87c8e741f3 chore: add Notion credential spec (api_token) 2026-03-03 09:43:15 -08:00
Timothy d0e92ed18d feat: add Notion integration - pages, databases, and search
5 tools: notion_search, notion_get_page, notion_create_page,
notion_query_database, notion_get_database. Uses Bearer auth
with Notion internal integration token.
2026-03-03 09:43:14 -08:00
Richard Tang 88640f9222 feat: email reply sample agent 2026-03-03 09:41:20 -08:00
Timothy 1927045519 chore: register Greenhouse and YouTube Transcript in tool/credential registries 2026-03-03 09:36:47 -08:00
Timothy 68cffb86c9 test: add YouTube Transcript tool unit tests (get, list transcripts) 2026-03-03 09:36:47 -08:00
Timothy 5bec989647 feat: add YouTube Transcript integration - captions and transcript retrieval
2 tools: youtube_get_transcript, youtube_list_transcripts.
Uses youtube-transcript-api library, no API key required.
2026-03-03 09:36:46 -08:00
Timothy 66f5d2f36c test: add Greenhouse tool unit tests (jobs, candidates, applications) 2026-03-03 09:36:40 -08:00
Timothy 941f815254 chore: add Greenhouse credential spec (api_token) 2026-03-03 09:36:39 -08:00
Timothy 42afd10518 feat: add Greenhouse integration - ATS jobs, candidates, applications
6 tools: greenhouse_list_jobs, greenhouse_get_job, greenhouse_list_candidates,
greenhouse_get_candidate, greenhouse_list_applications, greenhouse_get_application.
Uses Harvest API v1 with Basic auth (API token).
2026-03-03 09:36:38 -08:00
Timothy 3efa285a59 chore: register Cloudinary and Reddit tools in tool/credential registries 2026-03-03 09:31:22 -08:00
Timothy 4f2b4172b4 test: add Reddit tool unit tests (search, posts, comments, user) 2026-03-03 09:31:18 -08:00
Timothy 0d7de71b94 chore: add Reddit credential specs (client_id, client_secret) 2026-03-03 09:31:17 -08:00
Timothy f0f5b4bede feat: add Reddit integration - search, posts, comments, user info
4 tools: reddit_search, reddit_get_posts, reddit_get_comments, reddit_get_user.
Uses OAuth2 client_credentials flow for app-only access.
2026-03-03 09:31:17 -08:00
Timothy bfd27e97d3 test: add Cloudinary tool unit tests (upload, list, get, delete, search) 2026-03-03 09:31:10 -08:00
Timothy f2def27390 chore: add Cloudinary credential specs (cloud_name, api_key, api_secret) 2026-03-03 09:31:10 -08:00
Timothy b3f7bd6cc0 feat: add Cloudinary integration - upload, manage, search media assets
5 tools: cloudinary_upload, cloudinary_list_resources, cloudinary_get_resource,
cloudinary_delete_resource, cloudinary_search. Uses Basic auth with
API key/secret and supports image, video, and raw resource types.
2026-03-03 09:31:09 -08:00
Timothy 0e8e78dc5b chore: register Trello and Confluence tools in tool/credential registries 2026-03-03 09:22:03 -08:00
Timothy b259d85776 test: add Confluence tool tests (9 tests) 2026-03-03 09:22:02 -08:00
Timothy 175d9c3b7c feat: add Confluence credential spec with Basic auth (email + API token) 2026-03-03 09:21:55 -08:00
Timothy a2a810aabf feat: add Confluence integration - spaces, pages, content search via CQL 2026-03-03 09:21:54 -08:00
Timothy 175c7cfd51 test: add Trello tool tests (12 tests) 2026-03-03 09:21:47 -08:00
Timothy 5ada973d38 feat: add Trello credential spec with API key and token auth 2026-03-03 09:21:39 -08:00
Timothy 0103276136 feat: add Trello integration - boards, lists, cards management 2026-03-03 09:21:37 -08:00
Timothy 1d9e8ec138 chore: register HuggingFace tool in tool/credential registries 2026-03-03 09:11:59 -08:00
Timothy 83ac2e71bb test: add HuggingFace tool tests (10 tests) 2026-03-03 09:11:56 -08:00
Timothy 0b35a729a7 feat: add HuggingFace credential spec with token auth 2026-03-03 09:11:55 -08:00
Timothy 56723a519a feat: add HuggingFace Hub integration - models, datasets, spaces search 2026-03-03 09:11:49 -08:00
Timothy ebff394c76 chore: register Plaid tool in tool/credential registries 2026-03-03 09:08:44 -08:00
Timothy ceecc97bc8 test: add Plaid tool tests (13 tests) 2026-03-03 09:08:40 -08:00
Timothy 313154f880 feat: add Plaid credential spec with client_id and secret auth 2026-03-03 09:08:38 -08:00
Timothy 3eb6417cdc feat: add Plaid integration - accounts, balances, transactions, institutions 2026-03-03 09:08:29 -08:00
Timothy 1b35d6ca0a chore: register Pinecone tool in tool/credential registries 2026-03-03 09:05:20 -08:00
Timothy 1d89f0ba9d test: add Pinecone tool tests (18 tests) 2026-03-03 09:05:16 -08:00
Timothy 864df0e21a feat: add Pinecone credential spec with API key auth 2026-03-03 09:05:14 -08:00
Timothy 3f626decc4 feat: add Pinecone vector database integration - indexes, vectors, queries 2026-03-03 09:05:06 -08:00
Timothy bf1760b1a9 chore: register DuckDuckGo tool in tool registry 2026-03-03 08:56:06 -08:00
Timothy 8a58ea6344 test: add DuckDuckGo tool tests (6 tests) 2026-03-03 08:56:06 -08:00
Timothy 662ff4c35f feat: add DuckDuckGo search integration - web search, news, images 2026-03-03 08:56:01 -08:00
Timothy af02352b49 chore: register Linear tool in tool/credential registries 2026-03-03 08:43:41 -08:00
Timothy db9f987d46 test: add Linear tool tests (10 tests) 2026-03-03 08:43:41 -08:00
Timothy 8490ce1389 feat: add Linear credential spec with API key auth 2026-03-03 08:43:41 -08:00
Timothy 55ea9a56a4 feat: add Linear integration - issues, projects, teams, search via GraphQL 2026-03-03 08:43:41 -08:00
Timothy bd2381b10d chore: register Asana tool in tool/credential registries 2026-03-03 08:40:02 -08:00
Timothy 443de755bd test: add Asana tool tests (12 tests) 2026-03-03 08:40:02 -08:00
Timothy 55ec5f14ee feat: add Asana credential spec with PAT auth 2026-03-03 08:40:02 -08:00
Timothy 2e019302c9 feat: add Asana integration - tasks, projects, workspaces, search 2026-03-03 08:40:02 -08:00
Timothy b1e829644b chore: register Yahoo Finance tool in tool registry 2026-03-03 08:36:20 -08:00
Timothy 18f773e91b test: add Yahoo Finance tool tests (8 tests) 2026-03-03 08:36:19 -08:00
Timothy 987cfee930 feat: add Yahoo Finance integration - quotes, history, financials, company info 2026-03-03 08:36:19 -08:00
Timothy 57f6b8498a chore: register Google Search Console tool in tool/credential registries 2026-03-03 08:34:30 -08:00
Timothy 9f0d35977c test: add Google Search Console tool tests (10 tests) 2026-03-03 08:34:30 -08:00
Timothy e5910bbf2f feat: add Google Search Console credential spec with OAuth2 auth 2026-03-03 08:34:30 -08:00
Timothy 0015bf7b38 feat: add Google Search Console integration - analytics, sitemaps, URL inspection 2026-03-03 08:34:30 -08:00
Timothy a6b9234abb chore: register Zoho CRM tool in tool/credential registries 2026-03-03 08:32:13 -08:00
Timothy 086f3942b8 test: add Zoho CRM tool tests (12 tests) 2026-03-03 08:32:13 -08:00
Timothy 924f4abede feat: add Zoho CRM credential spec with OAuth token auth 2026-03-03 08:32:13 -08:00
Timothy 02be91cb08 feat: add Zoho CRM integration - leads, contacts, deals, accounts, notes 2026-03-03 08:32:13 -08:00
Timothy c2298393ab chore: register Apify tool in tool/credential registries 2026-03-03 08:29:33 -08:00
Timothy 4b8c63bf6e test: add Apify tool tests (11 tests) 2026-03-03 08:29:33 -08:00
Timothy e089c3b72c feat: add Apify credential spec with API token auth 2026-03-03 08:29:33 -08:00
Timothy a93983b5db feat: add Apify integration - actors, runs, datasets, key-value stores 2026-03-03 08:29:27 -08:00
Timothy 20f6329004 chore: register Attio tool in tool/credential registries 2026-03-03 08:25:12 -08:00
Timothy 3c2cf71c47 test: add Attio tool tests (14 tests) 2026-03-03 08:25:08 -08:00
Timothy 56288c3137 feat: add Attio credential spec with API key auth 2026-03-03 08:25:04 -08:00
Timothy 79188921a5 feat: add Attio CRM integration - records, lists, notes, tasks 2026-03-03 08:24:58 -08:00
RichardTang-Aden 65962ddf58 Merge pull request #5709 from aden-hive/load-new-session-from-home
Fix new session creation when submitting prompt from home page
2026-03-03 08:20:20 -08:00
Timothy 5ab66008ae chore: register Pipedrive tool in tool/credential registries 2026-03-03 08:18:45 -08:00
Timothy f38c9ee049 test: add Pipedrive tool tests (16 tests) 2026-03-03 08:18:41 -08:00
Timothy 86f5e71ec2 feat: add Pipedrive credential spec with API token auth 2026-03-03 08:18:29 -08:00
Timothy 1e15cc8495 feat: add Pipedrive CRM integration - deals, contacts, orgs, activities, pipelines 2026-03-03 08:18:24 -08:00
Richard Tang bba44430c4 chore: ignore local dev skills 2026-03-03 08:17:32 -08:00
Timothy 077d82ad82 chore: register Docker Hub tool in tool/credential registries 2026-03-03 08:14:27 -08:00
Timothy e4cf7f3da2 test: add Docker Hub tool tests (9 tests) 2026-03-03 08:14:24 -08:00
Timothy e3bdc9e8d7 feat: add Docker Hub credential spec with PAT auth 2026-03-03 08:14:20 -08:00
Timothy f1c1c9aab3 feat: add Docker Hub integration - search, repos, tags, image details 2026-03-03 08:14:15 -08:00
Timothy 97cbcf7658 fix: adapt path guarantee 2026-03-03 08:11:49 -08:00
Richard Tang 69c71d77fb fix: load-new-session from home 2026-03-03 08:09:22 -08:00
Timothy 4860739a2f chore: register Vercel in tool/credential registries (#5044) 2026-03-03 08:08:16 -08:00
Timothy 791ee40cd6 test: add Vercel tool unit tests (#5044) 2026-03-03 08:08:12 -08:00
Timothy e0191ac52b feat: add Vercel credential spec (#5044) 2026-03-03 08:08:07 -08:00
Timothy e0724df196 feat: add Vercel tool - deployments, projects, domains, env vars (#5044) 2026-03-03 08:08:00 -08:00
Timothy 2a56294638 chore: register Databricks in tool/credential registries (#5167) 2026-03-03 08:05:25 -08:00
Timothy d5cd557013 test: add Databricks tool unit tests (#5167) 2026-03-03 08:05:21 -08:00
Timothy 2a43f23a3d feat: add Databricks credential spec (#5167) 2026-03-03 08:05:03 -08:00
Timothy 69af8f569a feat: add Databricks tool - SQL, jobs, clusters, workspace (#5167) 2026-03-03 08:04:34 -08:00
bryan dcc11c9ea3 chore: move test deps to testing extra and dev group 2026-03-03 08:03:02 -08:00
Timothy 4b4abb47b0 Merge branch 'feature/queen-worker-comm' into fix/queen-recovery 2026-03-03 08:02:59 -08:00
Timothy 0e86dbcc9b chore: register Redis tool in tool/credential registries (#5370) 2026-03-03 08:01:43 -08:00
Timothy 92c75aa6f5 test: add Redis tool unit tests (#5370) 2026-03-03 08:01:37 -08:00
Timothy be41d848e5 feat: add Redis credential spec (#5370) 2026-03-03 08:01:32 -08:00
Timothy f7c299f6f0 feat: add Redis tool implementation - KV, hash, list, pub/sub (#5370) 2026-03-03 08:01:25 -08:00
Timothy b6a0f65a09 feat: add Pushover push notification integration (#5415)
4 tools: pushover_send, pushover_validate_user, pushover_list_sounds,
pushover_check_receipt. Supports priority levels, HTML, sounds, TTL.
All 12 unit tests and 13 conformance tests passing.
2026-03-03 07:58:29 -08:00
Timothy 1e7b0068ed chore: register Supabase tool in tool/credential registries 2026-03-03 07:54:34 -08:00
bryan 207d2fb911 feat: wire QuestionWidget into ChatPanel and workspace 2026-03-03 07:54:32 -08:00
Timothy de5105f313 feat: add Supabase integration - DB, Auth, Edge Functions (#5489)
7 tools: supabase_select, supabase_insert, supabase_update, supabase_delete,
supabase_auth_signup, supabase_auth_signin, supabase_edge_invoke.
All 19 unit tests and 13 conformance tests passing.
2026-03-03 07:54:27 -08:00
bryan c65a99c87d feat: add QuestionWidget component 2026-03-03 07:54:21 -08:00
bryan b4d7e57250 feat: update queen prompt for structured ask_user 2026-03-03 07:53:35 -08:00
bryan 63845a07aa feat: add queen-context endpoint and SSE replay 2026-03-03 07:53:22 -08:00
bryan 68ac73aa55 feat: add options support to ask_user tool 2026-03-03 07:53:05 -08:00
Timothy 6d32f1bb36 chore: register YouTube and Microsoft Graph tools in tool/credential registries 2026-03-03 07:51:33 -08:00
Timothy 9c316cee28 feat: add Microsoft Graph integration - Outlook, Teams, OneDrive (#5601)
11 tools: outlook_list_messages, outlook_get_message, outlook_send_mail,
teams_list_teams, teams_list_channels, teams_send_channel_message,
teams_get_channel_messages, onedrive_search_files, onedrive_list_files,
onedrive_download_file, onedrive_upload_file.
All 15 unit tests and 13 conformance tests passing.
2026-03-03 07:47:49 -08:00
Timothy 6af4f2d6e6 feat: add YouTube Data API integration (#5603)
8 tools: search_videos, get_video_details, get_channel, list_channel_videos,
get_playlist, search_channels, get_video_comments, get_video_categories.
All 17 unit tests and 13 conformance tests passing.
2026-03-03 07:47:34 -08:00
Timothy bc9a43d5a9 fix: execution recovery 2026-03-03 07:43:05 -08:00
Amdev-5 57651900f1 Merge remote-tracking branch 'origin/main' into lusha 2026-03-03 18:46:12 +05:30
Amdev-5 46b0617018 Merge remote-tracking branch 'origin/main' into lusha
# Conflicts:
#	tools/src/aden_tools/credentials/health_check.py
#	tools/src/aden_tools/tools/__init__.py
#	tools/tests/test_health_checks.py
2026-03-03 18:34:54 +05:30
RichardTang-Aden 7b98a6613a Merge pull request #5656 from aden-hive/feature/queen-worker-comm
Release / Create Release (push) Waiting to run
Feature/queen worker comm
2026-03-02 22:50:13 -08:00
Richard Tang 26481e27a6 fix: fix tests and lint 2026-03-02 22:46:38 -08:00
Aaryann Chandola 87a26db779 Merge branch 'aden-hive:main' into fix/guardian-self-trigger-loop 2026-03-03 11:56:15 +05:30
Richard Tang bb227b3d73 chore: ruff lint 2026-03-02 21:30:07 -08:00
Richard Tang 8a0cf5e0ae Merge remote-tracking branch 'origin/feature/queen-worker-comm' into feature/queen-worker-comm 2026-03-02 21:27:22 -08:00
Timothy 69218d5699 chore: lint codes 2026-03-02 20:16:34 -08:00
Timothy 7d1433af21 fix: queen agent flakiness 2026-03-02 19:57:18 -08:00
Richard Tang 0bfbf1e9c5 fix: unused /hive-credentials prompts in the validation 2026-03-02 19:53:57 -08:00
Richard Tang 1ca4f5b22b refactor: update the preload_validation logics 2026-03-02 19:46:50 -08:00
Richard Tang 0984e4c1e8 feat: add gcu subagent validation and refactor the prestart validation steps 2026-03-02 18:35:25 -08:00
P Gokul Sree Chandra 7d9bd2e86b feat(tools): add YouTube Data API integration
- Implement 6 YouTube API tools (search videos, get video/channel details, list channel videos, get playlist items, search channels)
- Add YOUTUBE_API_KEY credential spec with help_url and description
- Register YouTube tool in tools/__init__.py
- Add comprehensive test coverage (18 tests) with mocking
- Add detailed README with setup instructions and examples
- Use httpx for HTTP requests to YouTube Data API v3
- Verified with real API integration testing

Implements #5603
2026-03-03 07:35:04 +05:30
Sarthak Karode 4cbf5a7434 feat(core): add pytest framework testing integration with helpful error messages (#5485) 2026-03-03 10:01:33 +08:00
Hundao b33178c5be fix(graph): move auto-block grace period check before _await_user_input (#5672)
The grace period logic for client-facing auto-blocks was placed after
_await_user_input(), which blocks forever since no inject_event is
scheduled for text-only turns. This caused test_text_after_user_input
_goes_to_judge to hang indefinitely, blocking CI framework tests.

Move the grace period check before the blocking call so that within
the grace window, auto-blocks with missing outputs skip blocking
entirely and continue to the next LLM turn for judge RETRY pressure.

Also adds an _auto_missing check: nodes with no missing outputs
(e.g. queen monitoring with output_keys=[]) should still block
as their text-only output is legitimate conversation.

Fixes #5633
2026-03-03 09:39:14 +08:00
Richard Tang dc6a336c60 fix: removed the unused build_capability_summary 2026-03-02 16:26:47 -08:00
Antiarin 20ef5cb14f test(runtime): add async test for canceling multiple tasks across streams 2026-03-03 05:54:42 +05:30
Antiarin 2c3ec7e74c fix(tui): fix pause/stop to cancel all running tasks across all graphs 2026-03-03 05:30:20 +05:30
Richard Tang b855336448 chore: ruff format issue 2026-03-02 15:47:30 -08:00
Richard Tang de021977fd Merge remote-tracking branch 'origin/main' into feature/queen-worker-comm 2026-03-02 15:39:15 -08:00
Timothy cd2b3fcd16 Merge branch 'feature/new-inbox-management-agent' into feature/queen-worker-comm 2026-03-02 14:46:14 -08:00
Timothy b64024ede5 fix: gcu error log throwing 2026-03-02 14:45:57 -08:00
bryan a280d23113 fix: removing escalate to coder from worker tools 2026-03-02 12:02:35 -08:00
Timothy 41785abdba fix: rephrasing 2026-03-02 11:54:22 -08:00
Timothy de494c7e55 Merge branch 'feature/queen-worker-comm' into feature/new-inbox-management-agent 2026-03-02 11:44:08 -08:00
Timothy 5fa0903ea8 fix: teach email agent to search emails 2026-03-02 11:43:40 -08:00
Timothy 7bd99fe074 fix: email inbox management agent 2026-03-02 11:01:21 -08:00
bryan c838e1ca6d feat: agent building animation 2026-03-02 10:54:57 -08:00
bryan f475923353 feat: subagents populate node panel 2026-03-02 09:59:24 -08:00
Timothy 43f43c92e3 Merge branch 'feature/queen-worker-comm' into feature/new-inbox-management-agent 2026-03-02 09:40:55 -08:00
Timothy 5463134322 fix: inbox management template v2 2026-03-02 09:40:36 -08:00
Timothy 3fbb392103 fix: add credentials to queen lifecycle tools 2026-03-02 09:39:38 -08:00
RichardTang-Aden a162da17e1 Merge pull request #5639 from RichardTang-Aden/main
feat: support Gemini 3.1 pro
2026-03-02 09:24:27 -08:00
Richard Tang b565134d57 chore: fix the ruff lint 2026-03-02 09:23:02 -08:00
Richard Tang 3aafc89912 feat: support Gemini 3.1 pro 2026-03-02 09:20:48 -08:00
bryan 93449f92fe fix: clear build cache in quickstart 2026-03-02 09:00:48 -08:00
Bryan @ Aden d766e68d42 Merge pull request #5494 from Antiarin/security/harden-validate-agent-path
[Bug][Security]: agent_path accepts arbitrary filesystem paths with no validation
2026-03-02 16:57:51 +00:00
Hundao 1d8b1f9774 fix: enforce 0600 permissions on OAuth token files (#5631)
* fix: enforce 0600 permissions on OAuth token files

Credential files were written with default umask permissions.
Use os.open with explicit 0o600 mode to ensure token files
are always owner-read/write only, regardless of umask.

Fixes #5530

* style: fix line too long in checkpoint_store.py
2026-03-02 18:30:40 +08:00
Rajneesh Chaudhary 5ea9abae83 fix(core): prevent sse critical event queue from blocking event bus (#5533) (#5536)
Disconnects slow clients instead of blocking the publisher task.

Signed-off-by: Rajneesh180 <rajneeshrehsaan48@gmail.com>
2026-03-02 17:57:52 +08:00
ArshpreetSingh04 15957499c5 docs(core): fix outdated goal-agent path reference in README (#5629)
Update the MCP client configuration example in core/README.md to replace the outdated `goal-agent` path with the correct `hive/core` path.

Fixes #5628
2026-03-02 17:07:25 +08:00
Timothy 0b50d9e874 fix: block idle event 2026-03-01 21:01:59 -08:00
Amdev-5 cce073dbdb fix(lusha): add pagination and empty filter validation
- Expose page parameter on search_people and search_companies
  (client + MCP tool) enabling access beyond the first 50 results
- Add guard requiring at least one filter on both search endpoints
  to prevent broad requests that burn API credits
- Add unit tests for pagination and empty filter validation
2026-03-02 10:20:08 +05:30
Timothy a1e54922bd fix: timer count down update 2026-03-01 20:22:46 -08:00
Timothy 63c0ca34ea Merge branch 'feature/agent-runtime-idling' into feature/queen-worker-comm 2026-03-01 20:14:46 -08:00
Timothy 135477e516 feat: agent idling detection 2026-03-01 20:14:35 -08:00
Timothy 8cac49cd91 feat: frontend display of scheduler count down 2026-03-01 20:13:21 -08:00
Timothy 28dce63682 fix: conversation ordering 2026-03-01 18:56:41 -08:00
Timothy 313ac952e0 Merge branch 'feature/tool-pill-v2' into feature/queen-worker-comm 2026-03-01 18:33:54 -08:00
Timothy 0633d5130b fix: command line refresh frontend build 2026-03-01 18:33:43 -08:00
Timothy 995e487b49 Merge branch 'feature/tool-pill-v2' into feature/queen-worker-comm 2026-03-01 18:26:49 -08:00
Timothy 64b58b57e0 fix: remove reddish color 2026-03-01 18:26:27 -08:00
Timothy c6465908df feat: colorful tool pills 2026-03-01 18:11:57 -08:00
Timothy ca96bcc09f fix: add pending question content to worker status 2026-03-01 18:11:15 -08:00
Timothy 65ee628fae fix: tool pill turn id 2026-03-01 17:58:31 -08:00
Timothy 02043614e5 feat: consolidate worker status report, fix conversation order 2026-03-01 17:56:27 -08:00
Timothy 212b9bf9d4 fix: load agent 2026-03-01 16:26:55 -08:00
Timothy 6070c30a88 Merge branch 'feat/open-hive' into feature/queen-worker-comm 2026-03-01 16:06:43 -08:00
Timothy 8a653e51bc feat: separate worker and queen input 2026-03-01 15:50:28 -08:00
Vasu Bansal 6a92588264 fix(plaid): update v0.6 credential compatibility and stabilize tests 2026-03-01 01:16:16 +05:30
Vasu Bansal 276aad6f0d feat: add Plaid banking integration
- Implement Plaid connector for account balances
- Add transaction history retrieval
- Include GL reconciliation functionality
- Add institution metadata lookup
- Include comprehensive tests and documentation

Closes #4016
2026-03-01 01:16:16 +05:30
Vasu Bansal 10620bda4f fix(sap): update credential-store compatibility and test imports 2026-03-01 01:07:00 +05:30
Vasu Bansal c214401a00 feat(integration): add SAP S/4HANA connector
Add complete SAP S/4HANA integration with:
- Connector for OData API access
- Credential management following Hive patterns
- Unit tests with mocked responses
- Documentation and usage examples

Refs #3182
2026-03-01 01:07:00 +05:30
Vasu Bansal 260ac33324 fix(s3): support v0.6 credential refs and register S3 tools 2026-03-01 00:56:22 +05:30
Vasu Bansal d4cd643860 feat: add AWS S3 integration for cloud object storage
- Add S3Storage class with upload, download, list, delete operations
- Support IAM roles, environment variables, and credential store
- Implement retry logic with adaptive backoff
- Add MCP tools: s3_upload, s3_download, s3_list, s3_delete, s3_check_credentials
- Include comprehensive tests with moto mocking
- Add documentation for setup and IAM permissions

Closes #3012
2026-03-01 00:54:57 +05:30
IamSayeed dc16cfda21 Merge branch 'main' into feature/add-asana-integration 2026-02-28 11:28:43 +05:30
RichardTang-Aden d562670425 Merge pull request #5501 from aden-hive/feat/open-hive
Feat: v6 windows compatibility support
2026-02-27 19:58:48 -08:00
Timothy Zhang 677bee6fe5 Merge branch 'feat/open-hive' of https://github.com/adenhq/hive into feat/open-hive 2026-02-27 19:55:54 -08:00
Timothy Zhang de27bfe76f fix: windows competibility 2026-02-27 19:55:48 -08:00
Timothy 1c1dcb9c33 chore: new architecture 2026-02-27 19:55:05 -08:00
RichardTang-Aden 4ba950f155 Merge pull request #5499 from aden-hive/feat/open-hive
Release / Create Release (push) Waiting to run
feat: tool call revamp, Intercom & GA integrations, credential improvements
2026-02-27 19:41:11 -08:00
bryan 9c3a11d7bb chore: remove load agent 2026-02-27 19:14:35 -08:00
Richard Tang b7d357aea2 Merge remote-tracking branch 'upstream/feat/open-hive' into feat/sub-agent-framework 2026-02-27 19:07:45 -08:00
bryan b2fed68346 chore: fix linter 2026-02-27 18:57:52 -08:00
bryan 0e996928be fix: load credentials check into new agent session 2026-02-27 18:50:03 -08:00
Timothy 6ff4ec3643 Merge branch 'feature/tool-call-revamp' into feat/open-hive 2026-02-27 18:45:35 -08:00
Timothy a0eda3e492 fix: event loop iterations 2026-02-27 18:41:13 -08:00
bryan 099f9514ef Merge branch 'main' into feat/open-hive 2026-02-27 18:10:42 -08:00
Timothy b2096e4a55 Merge branch 'feat/open-hive' into feature/tool-call-revamp 2026-02-27 18:10:32 -08:00
Timothy 1bf2164745 fix: spamming session update 2026-02-27 18:10:09 -08:00
bryan 48205bbde7 fix: dismiss credential banner 2026-02-27 18:09:51 -08:00
Bryan @ Aden 296aab6ecb Merge pull request #5171 from Ttian18/feat/tina/intercom-tool-4256
feat(tools): add Intercom tool integration (#4256)
2026-02-28 02:01:57 +00:00
Richard Tang 14182c45fc refactor: reorganized file tools 2026-02-27 17:52:21 -08:00
Richard Tang 2fa8f4283c Merge remote-tracking branch 'upstream/feat/open-hive' into feat/sub-agent-framework 2026-02-27 17:51:43 -08:00
Bryan @ Aden ad3cec2361 Merge pull request #4239 from Ttian18/feat/tina-google-analytics-tool
[Integration]: Google Analytics - Website Traffic & Marketing Performance #3727
2026-02-28 01:50:07 +00:00
bryan eddb628298 fix: remove mock_mode from queen/coder system prompt templates 2026-02-27 17:38:03 -08:00
bryan f63b226d8d fix: pipeline visual update 2026-02-27 17:32:59 -08:00
Timothy cc5bd61d86 feature: new tool calling logic 2026-02-27 17:29:00 -08:00
bryan 8bd14fb16f fix: graph summary for intake 2026-02-27 17:08:34 -08:00
bryan 30b5472e33 fix: center text and open hive 2026-02-27 16:47:20 -08:00
Adam Albarghouthi bc836db0f9 micro-fix: fix incorrect CLI commands and docstring in core docs (#5457)
- Replace non-existent CLI commands (calculate, interactive, analyze)
  with actual commands (run, shell, info) in core/README.md
- Fix test-list argument from <goal_id> to <agent_path> in core/README.md
- Fix misleading docstring on MockProvider.complete_with_tools()

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
Co-authored-by: hundao <alchemy_wimp@hotmail.com>
2026-02-28 08:40:58 +08:00
bryan bd3b0fb8eb chore: windows quickstart update 2026-02-27 16:13:36 -08:00
Adam Albarghouthi 7f28474967 micro-fix: fix wrong credential path and env var in docs (#5458)
* micro-fix: fix wrong credential path and env var in docs

Both docs/configuration.md and docs/environment-setup.md reference a
non-existent ADEN_CREDENTIALS_PATH env var and wrong default path
(~/.aden/credentials). The actual env var is HIVE_CREDENTIAL_KEY and
the default path is ~/.hive/credentials (see storage.py:119,125).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* micro-fix: clarify HIVE_CREDENTIAL_KEY comment wording

Reword comment to avoid implying the env var controls the path.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-28 08:01:16 +08:00
bryan 09460b28bc refactor: move credentials from shell config to ~/.hive 2026-02-27 15:55:08 -08:00
wlkjyy 5d8ba1e49c micro-fix: tests: use unified session_* run IDs in runtime logging tests (#5480)
* tests: use session_* run IDs in runtime logging tests

* refactor: extract _sid() helper for session IDs in runtime logger tests
2026-02-28 07:54:59 +08:00
Richard Tang ccb394675b Merge remote-tracking branch 'upstream/feat/open-hive' into feat/sub-agent-framework 2026-02-27 14:48:47 -08:00
Richard Tang 931487a7d4 feat: clean the options for browser open tools that should not be used by LLM 2026-02-27 14:48:31 -08:00
bryan 3654c57f66 Merge branch 'main' into feat/open-hive 2026-02-27 14:48:10 -08:00
Richard Tang fb28280ced feat: human-friendly LLM and tool calls logs 2026-02-27 14:45:12 -08:00
bryan 6215441b58 fix: SSE reconnect on session change, tool pill per-call tracking, cancel/pause event emission 2026-02-27 14:37:54 -08:00
Richard Tang 52f16d5bb6 Merge remote-tracking branch 'upstream/feat/open-hive' into feat/sub-agent-framework 2026-02-27 13:49:14 -08:00
Antiarin e5b6c8581a feat: implement agent path validation and restrict loading to allowed directories 2026-02-28 02:56:31 +05:30
Timothy e1db3a4af9 fix: remove hardcoded anthropic logics 2026-02-27 10:23:59 -08:00
bryan 5dcca99913 fix: credential modal updates 2026-02-26 20:54:11 -08:00
Zhang 890b906f15 fix(tools): address review feedback on Google Analytics tool
- Use Credentials.from_service_account_file() instead of mutating os.environ
- Remove unused dimensions param from _format_report_response
- Remove unused metrics param from _format_realtime_response
- Extract duplicated property_id/limit validation into _validate_inputs helper
- Add credential_group="google_cloud" to GA and BigQuery specs
- Update tests to mock Credentials class
2026-02-26 20:46:20 -08:00
Timothy @aden 6a8286d4cf Merge pull request #5462 from aden-hive/feat/open-hive
Release / Create Release (push) Waiting to run
Feat/open hive
2026-02-26 20:41:58 -08:00
Timothy 680024f790 fix: cancel worker logic 2026-02-26 20:35:17 -08:00
Timothy 6f7bfb92a8 fix: patch the errorneous skip judge logic 2026-02-26 20:31:45 -08:00
Zhang 335a9603e8 feat(tools): add Google Analytics 4 integration (#3727)
Add read-only GA4 Data API v1 tools: ga_run_report, ga_get_realtime,
ga_get_top_pages, and ga_get_traffic_sources. Includes credential spec,
unit tests, and README.
2026-02-26 20:22:12 -08:00
Zhang 5e8a6202e7 fix(credentials): add Intercom health checker to registry (#4256)
Add IntercomHealthChecker (subclass of OAuthBearerHealthChecker) and
register it in HEALTH_CHECKERS so the credential registry completeness
test passes in CI.
2026-02-26 20:01:43 -08:00
Zhang 55a4cdefd7 fix(tools): pass assignee_type through to Intercom API and add README (#4256)
- Pass assignee_type from intercom_assign_conversation tool function
  through to _IntercomClient.assign_conversation() and into the API payload
- Add tests for assignee_type="team" passthrough at client and tool levels
- Add tool README with setup, usage examples, and error handling

Addresses PR #5171 review feedback from @bryanadenhq
2026-02-26 19:56:36 -08:00
Richard Tang 2b63135afb Merge remote-tracking branch 'upstream/feat/open-hive' into feat/sub-agent-framework 2026-02-26 19:33:24 -08:00
Timothy 49d8c3572d fix: stalled agent stop tools 2026-02-26 19:09:01 -08:00
bryan 4b40962186 feat: agent loading after change 2026-02-26 19:08:28 -08:00
Richard Tang 779b376c6e Merge remote-tracking branch 'upstream/feat/open-hive' into feat/sub-agent-framework 2026-02-26 19:02:35 -08:00
bryan 4e2a9a247a patch: credentials modal blocking incorrectly 2026-02-26 18:34:51 -08:00
Richard Tang b1f3d6b155 Merge remote-tracking branch 'upstream/feat/open-hive' into feat/sub-agent-framework 2026-02-26 17:59:15 -08:00
Timothy ea28a9d3c3 fix: turn off judge for now 2026-02-26 17:57:49 -08:00
bryan 69a03e463f cancel + queue msg 2026-02-26 17:57:21 -08:00
Richard Tang e7da62e61c Merge remote-tracking branch 'upstream/feat/open-hive' into feat/sub-agent-framework 2026-02-26 17:17:37 -08:00
Richard Tang 7176745e1c feat: GCU enabled in the quickstart menu 2026-02-26 17:15:37 -08:00
Timothy cce0e26f5c Merge branch 'feature/system-prompt-v2-worker-path' into feat/open-hive 2026-02-26 17:13:46 -08:00
Timothy 641af16dfc fix: nuanced preference tweaking 2026-02-26 17:09:47 -08:00
Timothy a335c427ef fix: worker file path fix 2026-02-26 17:05:51 -08:00
bryan 9ea6c959ae feat: mid-session credential management and MCP resync 2026-02-26 17:03:06 -08:00
Richard Tang 20efd523c9 Merge remote-tracking branch 'upstream/feature/llm-turn-logging' into feat/sub-agent-framework 2026-02-26 16:16:37 -08:00
Timothy 8fc7fff496 feature: log llm turn stop reasons 2026-02-26 16:14:51 -08:00
Richard Tang edf51e6996 feat: prompts for GCU 2026-02-26 15:45:03 -08:00
Richard Tang 6b867883ce chore: ruff lint 2026-02-26 15:03:06 -08:00
Richard Tang 35a05f4120 Merge remote-tracking branch 'upstream/feat/open-hive' into feat/sub-agent-framework 2026-02-26 14:59:48 -08:00
Richard Tang e0e78a97ce refactor: re-organize all the broswer tool and make them built-in for the gcu node type 2026-02-26 12:51:10 -08:00
Navya Bijoy ddd30a950d Integration: add Databricks MCP tool integration
Implements the Databricks MCP tool integration for the Hive agent framework
2026-02-26 21:01:59 +05:30
Richard Tang e4e476f463 Merge remote-tracking branch 'origin/feat/open-hive' into fix/codex-and-litellm-improvement 2026-02-26 07:24:16 -08:00
KRYSTALM7 3ca0e63d54 feat(tools): add Pushover push notification integration
Closes #5415
2026-02-26 13:54:34 +00:00
hundao c4c8917ecb fix: skip auto-block when weak models output text instead of calling tools
Client-facing nodes auto-block on text-only turns (wait for user input).
This breaks weak models (Codex) that output text like "Understood" instead
of calling tools after user responds.

Add _cf_expecting_work state: after user input, text-only turns with
missing output keys skip auto-block and go to judge, which pushes the
LLM to call set_output. Tool calls reset the state back to presenting
mode (auto-block on next text-only).

No behavioral change for strong models (they always call tools after
user input, so the new code path is never triggered).
2026-02-26 20:58:33 +08:00
hundao 1524d2ef00 fix: remove implementation hints from judge feedback for weak models
Judge feedback was saying "Use set_output tool to provide them" which
caused Codex to skip all work and call set_output directly. Changed to
"Follow your system prompt instructions to complete the work."
2026-02-26 20:56:14 +08:00
bryan 5032834034 fix: exit quickstart if claude code not configured 2026-02-25 20:38:28 -08:00
Richard Tang 0b83f6ea99 fix(wip): codex tool use bug fixes 2026-02-25 20:09:49 -08:00
bryan 415201f467 remove gpt-nano 2026-02-25 19:58:12 -08:00
bryan 73005a8498 fix: validate credentials before queen-initiated worker start 2026-02-25 19:37:54 -08:00
Timothy 4edb960fbd fix: stupid hard limit 2026-02-25 19:35:41 -08:00
Timothy 42d11ead01 fix: fake goal prompt injection 2026-02-25 19:30:20 -08:00
bryan 5e18f85b10 fix: deferred cred validation + dismissable error banners 2026-02-25 19:14:55 -08:00
Timothy 85b25bf006 fix: missing mcp reference 2026-02-25 19:10:35 -08:00
Timothy c1ba108489 Merge branch 'feature/refactor-system-prompt' into feat/open-hive 2026-02-25 18:46:16 -08:00
Richard Tang 214098aaae fix: remove the run_command tool from the predefined engineering tool set for worker agent 2026-02-25 18:36:00 -08:00
bryan 241a0b7adc fix: clean up stale active sessions on worker load 2026-02-25 18:29:15 -08:00
bryan 9a7b41a4be feat: show schedule info when clicking trigger nodes 2026-02-25 18:12:20 -08:00
Timothy fe918adb16 fix: cosmetics 2026-02-25 18:01:58 -08:00
bryan 746f026654 feat: 3-layer resume prompts + trigger node visualization 2026-02-25 17:49:44 -08:00
Richard Tang 8294cd3dd9 feat: fix codex tool call usage 2026-02-25 17:49:41 -08:00
Timothy 3bbc63b1db feature/refactored-system-prompt-narratives 2026-02-25 17:33:21 -08:00
Richard Tang 337fb6d922 refactor: deprecate the unused llm functions 2026-02-25 17:32:33 -08:00
bryan bda6b18e8a fix: session reconnect + iteration-based message IDs 2026-02-25 16:55:33 -08:00
bryan d256ff929f fix: faster input_requested + execution_id tracking 2026-02-25 15:38:46 -08:00
bryan f71b20cf07 filter out queen judge from action plan 2026-02-25 12:26:03 -08:00
Richard Tang db26b0afd6 feat: pop out the codex OAuth consent page 2026-02-25 11:53:00 -08:00
Timothy 145860f42e fix: consolidate validation endpoints 2026-02-25 09:54:06 -08:00
bryan d9f84648d0 revoke credential ux 2026-02-25 09:46:46 -08:00
Timothy 9fb7e0bae7 fix: load agent graph consistently 2026-02-25 09:02:37 -08:00
Bryan @ Aden b00203702e Merge pull request #5344 from juni2003/docs/fix-readme-org-links
docs(readme): fix broken org links
2026-02-25 16:57:00 +00:00
bryan ead85dd41f fix closing tab, remove 0/0 from credential modal 2026-02-25 08:11:24 -08:00
bryan cf5bf6f174 initial prompt from home page 2026-02-25 07:58:49 -08:00
bryan 46237e7309 kill judge and queen 2026-02-24 20:01:43 -08:00
bryan afa686b47b Merge branch 'main' into feat/open-hive 2026-02-24 19:38:46 -08:00
Timothy 21e02c9e50 Merge branch 'fix/credential-popup' into feat/open-hive 2026-02-24 19:29:21 -08:00
Timothy 30a188d7c8 fix: credential popup 2026-02-24 19:28:53 -08:00
bryan 355f51b25e quickstart update 2026-02-24 19:26:54 -08:00
Timothy 8e1cde86e8 Merge branch 'feat/openhive-cred-fixes' into feat/open-hive 2026-02-24 19:10:30 -08:00
Timothy c13b02c7d9 Merge branch 'fix/credential-loading' into feat/open-hive 2026-02-24 19:09:39 -08:00
bryan 9e72801c28 agent loading 2026-02-24 19:05:12 -08:00
RichardTang-Aden 3a3d538b73 Merge pull request #5367 from RichardTang-Aden/feat/codex-subscription-rebased
Feat/codex subscription rebased
2026-02-24 18:53:39 -08:00
Richard Tang b11bca0c67 chore: lint reformat 2026-02-24 18:53:04 -08:00
Richard Tang faf8975b42 chore: improve script code and solved lint errors 2026-02-24 18:51:37 -08:00
Richard Tang 863168880e fix: unused credential detect path removed 2026-02-24 18:41:36 -08:00
bryan 4bd1b1b9e6 credential updated 2026-02-24 18:33:09 -08:00
Richard Tang 8c3866a014 feat: optimized for the LLM selection option 2026-02-24 18:27:03 -08:00
Richard Tang 61283d9bd6 feat: Codex subscription OAuth 2026-02-24 18:24:36 -08:00
Richard Tang 585a7186d4 feat: support openai codex subscription as the LLM provider 2026-02-24 18:24:36 -08:00
RichardTang-Aden 10d9e54857 Merge pull request #4576 from mubarakar95/perf/reduce-subprocess-spawning-windows
perf: reduce subprocess spawning in quickstart scripts (#4427)
2026-02-24 17:47:22 -08:00
RichardTang-Aden 11379fc0ef Merge branch 'main' into perf/reduce-subprocess-spawning-windows 2026-02-24 17:43:25 -08:00
Richard Tang 754e33a1ae feat: browser tools optimization 2026-02-24 14:05:26 -08:00
Junaid 51154a3070 docs(readme): fix broken org links
Update repository URLs from adenhq/hive to aden-hive/hive to prevent 404s
2026-02-24 23:55:23 +05:00
Richard Tang b11b43bbe1 feat: reorganized the log structure for subagents 2026-02-24 10:41:13 -08:00
Richard Tang 86f4645d1c fix: inherit the tool call overflow margin for subagent 2026-02-24 08:20:08 -08:00
Richard Tang 2d05e96cd5 fix: spillover for subagent 2026-02-24 08:18:52 -08:00
RichardTang-Aden ebeac68707 Merge pull request #5272 from SANTHAN-KUMAR/main
fix(web_scrape): reorder status checks before wait & replace hardcoded sleep with networkidle
2026-02-24 08:02:41 -08:00
SANTHAN-KUMAR de5fcab933 fix(web_scrape): implement robots.txt support & clean up dead mock
- Add respect_robots_txt parameter (default True) using stdlib
  urllib.robotparser, checked before browser launch to skip
  disallowed URLs early
- Remove dead wait_for_timeout mock from test helper
- Restore respect_robots_txt docs in README (param, error, note)
- Add 2 tests: blocked by robots.txt, disabled robots.txt check
- Fix import ordering (ruff I001)
2026-02-24 15:29:13 +05:30
SANTHAN-KUMAR a7a2100472 Merge branch 'aden-hive:main' into main 2026-02-24 15:24:10 +05:30
Richard Tang 9c44d3b793 feat: add the upgraded file operation tools 2026-02-23 20:25:25 -08:00
Richard Tang 9b89ac694e feat: new snapshot tools 2026-02-23 19:34:42 -08:00
Richard Tang 630d8208cf fix: avoid using headless broswer 2026-02-23 19:09:18 -08:00
Richard Tang 9b342dc593 feat: add health check for the browser start 2026-02-23 18:28:59 -08:00
Richard Tang ad879de6ff feat: clean the browser snapshot tool 2026-02-23 17:56:05 -08:00
Richard Tang 795266aab4 feat: store the subagent logs in the node logs folder 2026-02-23 16:02:39 -08:00
Richard Tang 4e4ef121f9 feat: Progressive feedback in SubagentJudge 2026-02-23 15:48:34 -08:00
Richard Tang ddb9126955 fix: result the bug for calling the snapshot tool too many times 2026-02-23 15:38:04 -08:00
Richard Tang bac6d6dd68 feat: subagent ending judge and communication 2026-02-23 15:25:59 -08:00
Richard Tang 3451570541 feat: enable subagent to talk back to the parent via tools 2026-02-23 12:31:51 -08:00
Richard Tang e5e939f344 feat: add a basic test tool for the broswer control tools validity 2026-02-23 11:08:08 -08:00
Richard Tang 0d51d25482 feat: highlight interactive actions 2026-02-23 11:03:19 -08:00
Richard Tang a0a5b10df0 fix: remove the max subagent logic 2026-02-23 10:35:55 -08:00
Richard Tang 04bac93c14 feat: fix tool bugs and add background tabs option 2026-02-23 10:20:52 -08:00
SANTHAN-KUMAR 4961d3ba8c fix(web_scrape): reorder status checks & replace hardcoded wait with networkidle
- Move response validation (null, HTTP status, content-type) before
  the rendering wait so errors return immediately without sleeping
- Replace wait_for_timeout(2000) with wait_for_load_state("networkidle")
  to align code with README (timeout=3000, wrapped in try/except)
- Fix README: remove phantom respect_robots_txt param, fix timeout
  30s→60s, remove false robots.txt claim
- Add 3 tests for early-exit error paths
2026-02-23 23:40:10 +05:30
Richard Tang 047f4a1a0c Merge branch 'main' into feat/sub-agent-framework 2026-02-22 18:31:47 -08:00
Richard Tang 7994b90dfa feat: add the max_sub_agents config and constrain 2026-02-22 18:23:52 -08:00
Richard Tang 04b6a80370 feat: shared agent profile 2026-02-22 18:17:40 -08:00
Shivam Shahi– oss/acc 0f8627f17a format 2026-02-22 00:25:15 +05:30
Zhang fc0c3e169f feat(tools): add Intercom tool with conversations, contacts, and tags (#4256) 2026-02-20 17:14:30 -08:00
Zhang 4760f95bda feat(credentials): add Intercom credential spec (#4256)
Register INTERCOM_ACCESS_TOKEN in INTEGRATION_CREDENTIALS for the
8 Intercom tools (search/get conversations, contacts, notes, tags,
assignment, teams). Tool implementation follows in subsequent commits.
2026-02-20 17:13:54 -08:00
Utkarsh Singh cd0cf69099 feat(tools): add Brevo transactional email and SMS integration
- Add brevo_tool with 6 MCP tools: brevo_send_email, brevo_send_sms,
  brevo_create_contact, brevo_get_contact, brevo_update_contact,
  brevo_get_email_stats
- Add CredentialSpec for BREVO_API_KEY in credentials/brevo.py
- Register brevo_tool in tools/__init__.py and credentials/__init__.py
- Add README with setup instructions and usage examples
- Add 34 unit tests covering all tools, validation and error handling

Closes #5127
2026-02-20 13:19:07 +05:30
Richard Tang a04a8a866d fix: sub-agents reachability check 2026-02-19 11:33:32 -08:00
Richard Tang 8c9baa62b0 feat: create default hive profile for browser use 2026-02-18 18:10:37 -08:00
Richard Tang 262eaa6d84 feat: mcp dependencies for gcu 2026-02-18 16:34:19 -08:00
Richard Tang fc1a48f3bc feat: breaking the browser use tools by types 2026-02-18 16:10:17 -08:00
Richard Tang 060f320cd1 feat(wip): gcu node and basic browser tools 2026-02-18 15:52:46 -08:00
Richard Tang bff32bcaa3 feat: allow sub_agent in the agent framework 2026-02-18 14:43:01 -08:00
Amdev-5 9744363342 fix(lusha): address PR review round 2 — structured filters, pagination, correct types
- search_people: replaced freetext searchText concatenation with proper
  structured Lusha API filters (jobTitles, seniority as list[int],
  departments, locations as dict, company_names, industry_ids, search_text)
- search_companies: added locations, company_names, search_text params;
  made all params optional for flexible queries
- Pagination: exposed limit param (clamped 10-50 per Lusha API constraints)
  on both search tools, replacing hardcoded size=25
- get_signals: changed ids from list[str] to list[int], removed internal
  str-to-int conversion as Lusha IDs are always numeric
- seniority type corrected to list[int] (API rejects string-encoded values
  despite OpenAPI spec suggesting strings — verified via live integration)
- Unit tests updated for all changes (19/19 pass)

Verified against live Lusha API: all 6 tools return correct responses.
2026-02-17 22:00:09 +05:30
Amdev-5 6fe8439e94 fix(lusha): use mainIndustriesIds for company search, safer credential handling
- search_companies: replace names filter with mainIndustriesIds (numeric
  industry IDs) per Lusha API schema. Parameter changed from
  industry: str to industry_ids: list[int] | None.
- _get_api_key: return None instead of raising TypeError on unexpected
  credential type. Lets _get_client handle it with the standard error dict
  pattern used across all tools.
- Updated unit tests for new industry_ids parameter and added test for
  non-string credential handling.
2026-02-17 21:33:02 +05:30
Amdev-5 8e61ffe377 fix(tools): remove invalid searchText field from Lusha prospecting filters
Lusha API rejects filters.companies.include.searchText (HTTP 400).
Replaced with valid 'names' field in search_companies and removed
redundant company searchText from search_people. Updated unit tests.
2026-02-17 21:33:02 +05:30
Amdev-5 723476f7a7 feat(tools): add Lusha MCP integration with credentials and health checks 2026-02-17 21:33:02 +05:30
IamSayeed 0f253027ae Merge branch 'main' into feature/add-asana-integration 2026-02-17 12:20:01 +05:30
Sayeed Rizwan 6053895a82 fix(asana): resolve from PR feedback - refactor client, fix specs, add tests 2026-02-17 12:18:06 +05:30
Shivam Shahi– oss/acc ceffa38717 Merge branch 'main' into feat/zoho-crm 2026-02-17 02:46:29 +05:30
Your hh3538962 ae205fa3f2 fix(tools): address Power BI integration code review feedback
- Fix export endpoint: /Export -> /ExportTo
- Add 202 Accepted response handling
- Add notifyOption to refresh_dataset API call
- Rename format parameter to export_format (avoid shadowing builtin)
- Add PNG support to export formats
- All critical API issues from review addressed
2026-02-16 14:00:09 +05:00
Shivam Shahi– oss/acc 669a05892b Merge branch 'main' into feat/zoho-crm 2026-02-15 21:47:52 +05:30
IamSayeed 4898a9759a Merge branch 'main' into feature/add-asana-integration 2026-02-15 13:07:15 +05:30
Sayeed Rizwan 2c2fa25580 fix: Resolve merge conflicts in credential and tool registries 2026-02-15 13:00:23 +05:30
Sayeed Rizwan 56496d7dbd feat: Add Asana integration for project management automation
- Implement 25 MCP tools for comprehensive Asana operations
  - Task management (create, update, search, delete, complete, comment, subtask)
  - Project management (create, update, list, get tasks)
  - Workspace & team operations (list workspaces, get users)
  - Section management for Kanban workflows
  - Tag and custom field support

- Add Personal Access Token (PAT) authentication
- Use official asana>=3.2.0 Python SDK (v5+ API)
- Include comprehensive error handling with ApiException
- Add 5 unit tests with 100% pass rate
- Provide detailed documentation and usage examples

Technical Details:
- Uses asana.ApiClient with Configuration pattern
- Implements workspace resolution by name or GID
- Handles paginated responses automatically
- Follows CredentialStoreAdapter pattern
- Matches existing tool structure (slack_tool, github_tool)

Closes #4156
2026-02-15 11:33:17 +05:30
y0sif dd0696e44d chore: resolve merge conflicts with main 2026-02-14 21:38:44 +02:00
y0sif dcda273e0b chore: resolve merge conflicts with main 2026-02-14 21:32:33 +02:00
y0sif f3b159c650 docs(tools): document Attio CRM in README 2026-02-14 21:23:47 +02:00
y0sif 06df037e28 chore: add Attio credentials to test spec file 2026-02-14 21:22:55 +02:00
y0sif e814e516d1 chore: add Attio credentials to init file 2026-02-14 21:21:37 +02:00
y0sif 0375e068ed test(tools): add Attio tool tests 2026-02-14 21:20:03 +02:00
y0sif 34ffc533d3 feat(tools): add Attio CRM integration 2026-02-14 21:19:14 +02:00
mubarakar95 ea2ea1a4ae Merge branch 'main' into integration/apify 2026-02-14 17:53:39 +05:30
mubarakar95 9e11947687 style: apply ruff formatting to apify_tool.py 2026-02-14 17:22:35 +05:30
mubarakar95 47117281e1 fix(test): resolve E501 line too long in test_apify_tool.py 2026-02-14 17:22:33 +05:30
mubarakar95 032dd13f5a feat(tools): implement Apify integration with 4 tools and comprehensive tests
- Added credential spec with health check endpoint
- Implemented apify_run_actor (sync/async execution)
- Implemented apify_get_dataset (result retrieval)
- Implemented apify_get_run (status checking)
- Implemented apify_search_actors (marketplace search)
- Created comprehensive README with examples and use cases
- Added 24 unit tests with mocked API responses
- All tests passing, conformance validated, linting clean

Resolves: #4510
2026-02-14 17:22:25 +05:30
mubarakar95 13d8ebbeff feat: Add Apify integration (issue #4510)
Implements comprehensive Apify integration for web scraping and automation:

- Added 4 new tools: apify_run_actor, apify_get_dataset, apify_get_run, apify_search_actors
- Credential management for APIFY_API_TOKEN with health check
- Support for synchronous (wait=True) and asynchronous (wait=False) actor execution
- Actor ID validation and comprehensive error handling
- Full test coverage (26 tests passing)
- README with usage examples and documentation

Addresses #4510
2026-02-14 11:53:56 +05:30
Shivam Shahi– oss/acc 2efa0e01df ruff format fix 2026-02-14 00:35:30 +05:30
Shivam Shahi– oss/acc 6044369fdf feat(tools): add Zoho CRM v8 integration with OAuth2 and MCP tools
Add Zoho CRM MCP integration for lead/contact/account/deal workflows with notes support. Implements 5 MCP tools:
- zoho_crm_search: Search Leads/Contacts/Accounts/Deals by criteria or word with pagination
- zoho_crm_get_record: Fetch a single record by module and ID
- zoho_crm_create_record: Create records with pass-through field payloads
- zoho_crm_update_record: Update records by ID with partial field payloads
- zoho_crm_add_note: Create notes linked to CRM records via Parent_Id mapping

Features:
- Zoho OAuth2 provider added in core credentials (refresh-token flow)
- Zoho auth format: Authorization: Zoho-oauthtoken <token>
- Region/DC-aware routing using accounts domain/region + api_domain usage
- Persisted DC metadata on refresh (api_domain/accounts_domain/location)
- Credential spec and health check registration for zoho_crm
- Tool registration and allowed-tool list updates
- Normalized tool responses with retriable 429 handling
- README with setup, auth modes, usage, and testing instructions
- Comprehensive unit/integration coverage updates for tool, provider, and health checks

Validation:
- Scoped ruff lint/format checks passed
- Targeted test suite passed: 563 passed, 18 skipped

Closes #4418
2026-02-13 18:28:12 +05:30
mubarakar95 40e74e408b perf: reduce subprocess spawning in quickstart scripts (#4427)
## Problem
Windows process creation (CreateProcess) is 10-100x slower than Linux fork/exec.
The quickstart scripts were spawning 4+ separate `uv run python -c "import X"`
processes to verify imports, adding ~600ms overhead on Windows.

## Solution
Consolidated all import checks into a single batch script that checks multiple
modules in one subprocess call, reducing spawn overhead by ~75%.

## Changes
- **New**: `scripts/check_requirements.py` - Batched import checker
- **New**: `scripts/test_check_requirements.py` - Test suite
- **New**: `scripts/benchmark_quickstart.ps1` - Performance benchmark tool
- **Modified**: `quickstart.ps1` - Updated import verification (2 sections)
- **Modified**: `quickstart.sh` - Updated import verification

## Performance Impact
**Benchmark results on Windows:**
- Before: ~19.8 seconds for import checks
- After: ~4.9 seconds for import checks
- **Improvement: 14.9 seconds saved (75.2% faster)**

## Testing
-  All functional tests pass (`scripts/test_check_requirements.py`)
-  Quickstart scripts work correctly on Windows
-  Error handling verified (invalid imports reported correctly)
-  Performance benchmark confirms 75%+ improvement

Fixes #4427
2026-02-12 15:38:58 +05:30
RichardTang-Aden 97440f9e8a Merge branch 'main' into feature/x-twitter-integration 2026-02-11 17:13:33 -08:00
Your hh3538962 765f7cae58 feat(tools): add get_datasets, get_reports, and export_report functions to Power BI integration 2026-02-11 22:19:51 +05:00
Your hh3538962 b455c8a2ad Merge remote-tracking branch 'origin/main' into feat/power-bi-integration 2026-02-11 22:07:00 +05:00
Sapna vishnoi da25e0ffa5 Merge branch 'main' into feat/redshift-integration 2026-02-11 13:42:26 +05:30
Your hh3538962 e07703c01f feat(tools): add Power BI integration - initial structure with workspace and dataset refresh functions 2026-02-10 13:23:32 +05:00
mishrapravin114 a4abf3eb2b Merge upstream/main: resolve conflicts with Apollo integration
- Keep both APOLLO_CREDENTIALS and AIRTABLE_CREDENTIALS
- Keep both apollo_tool and airtable_tool imports (alphabetical)

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-10 00:25:17 +05:30
mishrapravin114 269d72d073 Merge upstream/main: resolve conflicts with Apollo integration
- Keep both APOLLO_CREDENTIALS and CALENDLY_CREDENTIALS
- Keep both apollo_tool and calendly_tool imports (alphabetical)

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-10 00:20:17 +05:30
mishrapravin114 c8f5dccbd2 docs(airtable): add rate limit section to README
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-10 00:17:49 +05:30
mishrapravin114 8b797ee73f feat(airtable): add rate limit retry and retry_after
- Add 429 handling with retry_after from Retry-After header
- Add _request_with_retry (2 retries) for all API calls
- Update tests to use httpx.request

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-10 00:17:37 +05:30
mishrapravin114 de38adb1e4 feat(calendly): add rate limit handling, retry, 7-day validation
- Add 429 handling with retry_after from Retry-After header
- Add _request_with_retry (2 retries) for all API calls
- Validate get_availability date range <= 7 days
- Update tests to use httpx.request

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-10 00:16:37 +05:30
Sapna vishnoi c169bcc5d8 Merge branch 'main' into feat/redshift-integration 2026-02-09 23:32:08 +05:30
kubrakaradirek 80ea286beb fix: resolve complex merge conflicts and restore integrations 2026-02-09 16:09:43 +03:00
kubrakaradirek 3499be782e feat: implement MSSQL tool with schema discovery closes #3377 2026-02-09 15:32:57 +03:00
Gordon Ng 16603ae49c Test MCP 2026-02-09 01:48:49 -05:00
Gordon Ng bf6bd9ce7f test mcp 2026-02-09 01:48:46 -05:00
Gordon Ng a54c0f6f46 update 2026-02-09 01:20:25 -05:00
Gordon Ng beeed11d48 update 2026-02-09 01:11:33 -05:00
Manas Dutta 25331590a7 feat(reddit): add Reddit health checker and update tool functions 2026-02-08 19:26:01 +05:30
GastonAQS bff9f8976e Merge branch 'main' into feature/add-trello-integration 2026-02-07 15:57:48 -03:00
Manas Dutta b71628e211 Merge branch 'main' into feature/reddit-integration 2026-02-07 19:35:02 +05:30
Manas Dutta 8c1cb1f55b feat: add Reddit integration with 18 MCP tools
Implements Reddit API integration for community management and content monitoring.

Features:
- Search & Monitoring: search posts/comments, get subreddit feeds (new/hot), get posts/comments (6 tools)
- Content Creation: submit posts, reply, edit, delete comments (5 tools)
- User Engagement: get profiles, upvote, downvote, save posts (4 tools)
- Moderation: remove/approve posts, ban users (3 tools)

Implementation:
- OAuth 2.0 authentication via REDDIT_CREDENTIALS
- PRAW library for Reddit API integration
- Comprehensive error handling and validation
- Full test coverage (25 tests passing)

Resolves #3595
2026-02-07 18:38:59 +05:30
mishrapravin114 66214384a9 fix: add register_airtable import and fix ruff I001 import order
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-07 17:18:26 +05:30
mishrapravin114 6d6646887c feat(tools): add Airtable bases and records integration
- Add Airtable tool with 5 MCP tools:
  - airtable_list_bases
  - airtable_list_tables
  - airtable_list_records (with filter/sort)
  - airtable_create_record
  - airtable_update_record
- Add AIRTABLE_CREDENTIALS with credentialSpec + credentialStore
- Add AirtableHealthChecker for token validation
- Add README with setup and usage
- Add unit tests (9 tests total)

Fixes #2911

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-07 17:14:46 +05:30
mishrapravin114 6f8db0ed08 style: apply ruff format to calendly and health check files
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-07 17:00:05 +05:30
mishrapravin114 6aaf6836ea fix(calendly): resolve ruff lint errors (UP017, E501)
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-07 16:58:48 +05:30
mishrapravin114 4f2348f50e feat(tools): add Calendly scheduling integration
- Add Calendly tool with 4 MCP tools:
  - calendly_list_event_types
  - calendly_get_availability
  - calendly_get_booking_link
  - calendly_cancel_event
- Add CALENDLY_CREDENTIALS with credentialSpec + credentialStore
- Add CalendlyHealthChecker for token validation
- Add README with setup and usage
- Add unit tests (12 tests total)

Fixes #2930

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-07 16:51:27 +05:30
RichardTang-Aden deb7f2f72a Merge pull request #3814 from Amdev-5/feature/x-twitter-integration
fix(tests): update credential group test for X integration
2026-02-06 09:16:42 -08:00
Amdev-5 d989d9c65a fix(tests): update credential group test for X integration
Add test_x_credentials_share_credential_group to verify all X credentials
share the 'x' credential group. Update test_credential_group_default_empty
to account for X credentials alongside existing Google exceptions.
2026-02-06 22:17:40 +05:30
bryan 4173c606ab Merge feature/x-twitter-final-integration from Amdev-5/hive - X (Twitter) tool with DM support 2026-02-06 08:03:43 -08:00
Amdev-5 a01430d20f Merge verification fixes into PR branch 2026-02-06 16:42:56 +05:30
Amdev-5 2a8f775732 feat(tools): enhance X tool with DM support and robust error handling
- Added `x_send_dm` tool using v2 endpoint (`POST /dm_conversations/with/:id/messages`) for reliable 1:1 messaging.
- Fixed 403 Forbidden payload validation errors by simplifying DM payload structure.
- Enhanced `_handle_response` to verify `x_tool.py` returns raw API error details for 403/400 responses, aiding in permission debugging.
- Updated `demo_x_tools.py` to support standard `.env` variable names (e.g., `X_API_KEY`) and added user lookup for DM testing.
- Added unit tests covering new DM functionality and payload verification in `test_x_tool.py`.
- Audited credential handling: Read-only tools (Search/Mentions) correctly use Bearer Token, while Write tools (Post/Reply/Delete/DM) enforce OAuth 1.0a User Context.

Verified with live API tests (see PR description for logs).
2026-02-06 15:48:20 +05:30
Sapna vishnoi 4a0d9b2855 Merge branch 'main' into feat/redshift-integration 2026-02-05 11:44:09 +05:30
y0sif 92c65d69ea chore: resolve merge conflicts with main 2026-02-05 07:13:36 +02:00
Yosif Soliman 910a8968c4 fix(linear): correct GraphQL variable type for workflow states query 2026-02-05 07:00:28 +02:00
Sapna vishnoi cdb4679c5a Merge branch 'main' into feat/redshift-integration 2026-02-05 00:05:38 +05:30
Sapna.Vishnoi 1a9dce89b4 feat(tools): Add Amazon Redshift integration
- Implement 5 core functions for data warehouse querying
- Add boto3 integration with Redshift Data API
- Security: Read-only SELECT queries by default
- Full credential store support
- 26/26 tests passing (100% coverage)
- Complete documentation with examples
2026-02-04 23:58:35 +05:30
Aneesh cf1e4d7f88 Merge remote-tracking branch 'origin/main' into feature/youtube-transcript 2026-02-04 19:46:52 +05:30
Aneesh f2f0b4fc61 feat(tools): add youtube transcript integration via youtube-transcript-api 2026-02-04 19:24:40 +05:30
y0sif b21dd25181 fix(linear): handle credential decryption errors gracefully, handle mcp tool issue with credentials 2026-02-04 05:21:23 +02:00
y0sif 04a18bcbe5 docs(tools): document Linear integration in README and setup credentials claude skill 2026-02-04 04:05:15 +02:00
y0sif 7f66dd67eb feat(linear): add OAuth setup instructions 2026-02-04 04:03:37 +02:00
y0sif cfa03b89c8 test(tools): add comprehensive Linear tool tests 2026-02-04 03:47:28 +02:00
y0sif 9866d7a22b feat(tools): add Linear project management integration 2026-02-04 03:47:03 +02:00
GastonAQS 331a6e442f feat: add Trello integration tools and API client 2026-02-03 10:32:25 -03:00
Sashank Thapa 1c2295b2b5 Merge branch 'adenhq:main' into feature/twitter-x-mcp-tool 2026-02-03 16:20:45 +05:30
Sashank Thapa fa43ca3785 Merge branch 'adenhq:main' into feature/twitter-x-mcp-tool 2026-01-31 16:26:39 +05:30
kozuedoingregression b4a2c3bd14 ruff formatting and lint fixes 2026-01-31 16:18:16 +05:30
kozuedoingregression 2d4ec4f462 lint fix 2026-01-31 16:14:25 +05:30
kozuedoingregression 1e8b933da0 add X (Twitter) integration tool 2026-01-31 15:49:16 +05:30
Aneesh 48b1e0e038 Docs: clarify agent creation assumptions in Getting Started 2026-01-28 22:49:30 +05:30
575 changed files with 80417 additions and 15069 deletions
-9
View File
@@ -1,9 +0,0 @@
{
"mcpServers": {
"agent-builder": {
"command": "uv",
"args": ["run", "--directory", "core", "-m", "framework.mcp.agent_builder_server"],
"disabled": false
}
}
}
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-concepts
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-create
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-credentials
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-patterns
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-test
-5
View File
@@ -1,5 +0,0 @@
---
description: hive-concepts
---
use hive-concepts skill
-5
View File
@@ -1,5 +0,0 @@
---
description: hive-create
---
use hive-create skill
-5
View File
@@ -1,5 +0,0 @@
---
description: hive-credentials
---
use hive-credentials skill
-5
View File
@@ -1,5 +0,0 @@
---
description: hive-patterns
---
use hive-patterns skill
-5
View File
@@ -1,5 +0,0 @@
---
description: hive-test
---
use hive-test skill
-5
View File
@@ -1,5 +0,0 @@
---
description: hive
---
use hive skill
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-concepts
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-create
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-credentials
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-patterns
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-test
-399
View File
@@ -1,399 +0,0 @@
---
name: hive-concepts
description: Core concepts for goal-driven agents - architecture, node types (event_loop, function), tool discovery, and workflow overview. Use when starting agent development or need to understand agent fundamentals.
license: Apache-2.0
metadata:
author: hive
version: "2.0"
type: foundational
part_of: hive
---
# Building Agents - Core Concepts
Foundational knowledge for building goal-driven agents as Python packages.
## Architecture: Python Services (Not JSON Configs)
Agents are built as Python packages:
```
exports/my_agent/
├── __init__.py # Package exports
├── __main__.py # CLI (run, info, validate, shell)
├── agent.py # Graph construction (goal, edges, agent class)
├── nodes/__init__.py # Node definitions (NodeSpec)
├── config.py # Runtime config
└── README.md # Documentation
```
**Key Principle: Agent is visible and editable during build**
- Files created immediately as components are approved
- User can watch files grow in their editor
- No session state - just direct file writes
- No "export" step - agent is ready when build completes
## Core Concepts
### Goal
Success criteria and constraints (written to agent.py)
```python
goal = Goal(
id="research-goal",
name="Technical Research Agent",
description="Research technical topics thoroughly",
success_criteria=[
SuccessCriterion(
id="completeness",
description="Cover all aspects of topic",
metric="coverage_score",
target=">=0.9",
weight=0.4,
),
# 3-5 success criteria total
],
constraints=[
Constraint(
id="accuracy",
description="All information must be verified",
constraint_type="hard",
category="quality",
),
# 1-5 constraints total
],
)
```
### Node
Unit of work (written to nodes/__init__.py)
**Node Types:**
- `event_loop` — Multi-turn streaming loop with tool execution and judge-based evaluation. Works with or without tools.
- `function` — Deterministic Python operations. No LLM involved.
```python
search_node = NodeSpec(
id="search-web",
name="Search Web",
description="Search for information and extract results",
node_type="event_loop",
input_keys=["query"],
output_keys=["search_results"],
system_prompt="Search the web for: {query}. Use the web_search tool to find results, then call set_output to store them.",
tools=["web_search"],
)
```
**NodeSpec Fields for Event Loop Nodes:**
| Field | Default | Description |
|-------|---------|-------------|
| `client_facing` | `False` | If True, streams output to user and blocks for input between turns |
| `nullable_output_keys` | `[]` | Output keys that may remain unset (for mutually exclusive outputs) |
| `max_node_visits` | `1` | Max times this node executes per run. Set >1 for feedback loop targets |
### Edge
Connection between nodes (written to agent.py)
**Edge Conditions:**
- `on_success` — Proceed if node succeeds (most common)
- `on_failure` — Handle errors
- `always` — Always proceed
- `conditional` — Based on expression evaluating node output
**Edge Priority:**
Priority controls evaluation order when multiple edges leave the same node. Higher priority edges are evaluated first. Use negative priority for feedback edges (edges that loop back to earlier nodes).
```python
# Forward edge (evaluated first)
EdgeSpec(
id="review-to-campaign",
source="review",
target="campaign-builder",
condition=EdgeCondition.CONDITIONAL,
condition_expr="output.get('approved_contacts') is not None",
priority=1,
)
# Feedback edge (evaluated after forward edges)
EdgeSpec(
id="review-feedback",
source="review",
target="extractor",
condition=EdgeCondition.CONDITIONAL,
condition_expr="output.get('redo_extraction') is not None",
priority=-1,
)
```
### Client-Facing Nodes
For multi-turn conversations with the user, set `client_facing=True` on a node. The node will:
- Stream its LLM output directly to the end user
- Block for user input between conversational turns
- Resume when new input is injected via `inject_event()`
```python
intake_node = NodeSpec(
id="intake",
name="Intake",
description="Gather requirements from the user",
node_type="event_loop",
client_facing=True,
input_keys=[],
output_keys=["repo_url", "project_url"],
system_prompt="You are the intake agent. Ask the user for the repo URL and project URL.",
)
```
> **Legacy Note:** The old `pause_nodes` / `entry_points` pattern still works but `client_facing=True` is preferred for new agents.
**STEP 1 / STEP 2 Prompt Pattern:** For client-facing nodes, structure the system prompt with two explicit phases:
```python
system_prompt="""\
**STEP 1 — Respond to the user (text only, NO tool calls):**
[Present information, ask questions, etc.]
**STEP 2 — After the user responds, call set_output:**
[Call set_output with the structured outputs]
"""
```
This prevents the LLM from calling `set_output` prematurely before the user has had a chance to respond.
### Node Design: Fewer, Richer Nodes
Prefer fewer nodes that do more work over many thin single-purpose nodes:
- **Bad**: 8 thin nodes (parse query → search → fetch → evaluate → synthesize → write → check → save)
- **Good**: 4 rich nodes (intake → research → review → report)
Why: Each node boundary requires serializing outputs and passing context. Fewer nodes means the LLM retains full context of its work within the node. A research node that searches, fetches, and analyzes keeps all the source material in its conversation history.
### nullable_output_keys for Cross-Edge Inputs
When a node receives inputs that only arrive on certain edges (e.g., `feedback` only comes from a review → research feedback loop, not from intake → research), mark those keys as `nullable_output_keys`:
```python
research_node = NodeSpec(
id="research",
input_keys=["research_brief", "feedback"],
nullable_output_keys=["feedback"], # Not present on first visit
max_node_visits=3,
...
)
```
## Event Loop Architecture Concepts
### How EventLoopNode Works
An event loop node runs a multi-turn loop:
1. LLM receives system prompt + conversation history
2. LLM responds (text and/or tool calls)
3. Tool calls are executed, results added to conversation
4. Judge evaluates: ACCEPT (exit loop), RETRY (loop again), or ESCALATE
5. Repeat until judge ACCEPTs or max_iterations reached
### EventLoopNode Runtime
EventLoopNodes are **auto-created** by `GraphExecutor` at runtime. You do NOT need to manually register them. Both `GraphExecutor` (direct) and `AgentRuntime` / `create_agent_runtime()` handle event_loop nodes automatically.
```python
# Direct execution — executor auto-creates EventLoopNodes
from framework.graph.executor import GraphExecutor
from framework.runtime.core import Runtime
runtime = Runtime(storage_path)
executor = GraphExecutor(
runtime=runtime,
llm=llm,
tools=tools,
tool_executor=tool_executor,
storage_path=storage_path,
)
result = await executor.execute(graph=graph, goal=goal, input_data=input_data)
# TUI execution — AgentRuntime also works
from framework.runtime.agent_runtime import create_agent_runtime
runtime = create_agent_runtime(
graph=graph, goal=goal, storage_path=storage_path,
entry_points=[...], llm=llm, tools=tools, tool_executor=tool_executor,
)
```
### set_output
Nodes produce structured outputs by calling `set_output(key, value)` — a synthetic tool injected by the framework. When the LLM calls `set_output`, the value is stored in the output accumulator and made available to downstream nodes via shared memory.
`set_output` is NOT a real tool — it is excluded from `real_tool_results`. For client-facing nodes, this means a turn where the LLM only calls `set_output` (no other tools) is treated as a conversational boundary and will block for user input.
### JudgeProtocol
**The judge is the SOLE mechanism for acceptance decisions.** Do not add ad-hoc framework gating, output rollback, or premature rejection logic. If the LLM calls `set_output` too early, fix it with better prompts or a custom judge — not framework-level guards.
The judge controls when a node's loop exits:
- **Implicit judge** (default, no judge configured): ACCEPTs when the LLM finishes with no tool calls and all required output keys are set
- **SchemaJudge**: Validates outputs against a Pydantic model
- **Custom judges**: Implement `evaluate(context) -> JudgeVerdict`
### LoopConfig
Controls loop behavior:
- `max_iterations` (default 50) — prevents infinite loops
- `max_tool_calls_per_turn` (default 10) — limits tool calls per LLM response
- `tool_call_overflow_margin` (default 0.5) — wiggle room before discarding extra tool calls (50% means hard cutoff at 150% of limit)
- `stall_detection_threshold` (default 3) — detects repeated identical responses
- `max_history_tokens` (default 32000) — triggers conversation compaction
### Data Tools (Spillover Management)
When tool results exceed the context window, the framework automatically saves them to a spillover directory and truncates with a hint. Nodes that produce or consume large data should include the data tools:
- `save_data(filename, data)` — Write data to a file in the data directory
- `load_data(filename, offset=0, limit=50)` — Read data with line-based pagination
- `list_data_files()` — List available data files
- `serve_file_to_user(filename, label="")` — Get a clickable file:// URI for the user
Note: `data_dir` is a framework-injected context parameter — the LLM never sees or passes it. `GraphExecutor.execute()` sets it per-execution via `contextvars`, so data tools and spillover always share the same session-scoped directory.
These are real MCP tools (not synthetic). Add them to nodes that handle large tool results:
```python
research_node = NodeSpec(
...
tools=["web_search", "web_scrape", "load_data", "save_data", "list_data_files"],
)
```
### Fan-Out / Fan-In
Multiple ON_SUCCESS edges from the same source create parallel execution. All branches run concurrently via `asyncio.gather()`. Parallel event_loop nodes must have disjoint `output_keys`.
### max_node_visits
Controls how many times a node can execute in one graph run. Default is 1. Set higher for nodes that are targets of feedback edges (review-reject loops). Set 0 for unlimited (guarded by max_steps).
## Tool Discovery & Validation
**CRITICAL:** Before adding a node with tools, you MUST verify the tools exist.
Tools are provided by MCP servers. Never assume a tool exists - always discover dynamically.
### Step 1: Register MCP Server (if not already done)
```python
mcp__agent-builder__add_mcp_server(
name="tools",
transport="stdio",
command="python",
args='["mcp_server.py", "--stdio"]',
cwd="../tools"
)
```
### Step 2: Discover Available Tools
```python
# List all tools from all registered servers
mcp__agent-builder__list_mcp_tools()
# Or list tools from a specific server
mcp__agent-builder__list_mcp_tools(server_name="tools")
```
### Step 3: Validate Before Adding Nodes
Before writing a node with `tools=[...]`:
1. Call `list_mcp_tools()` to get available tools
2. Check each tool in your node exists in the response
3. If a tool doesn't exist:
- **DO NOT proceed** with the node
- Inform the user: "The tool 'X' is not available. Available tools are: ..."
- Ask if they want to use an alternative or proceed without the tool
### Tool Validation Anti-Patterns
- **Never assume a tool exists** - always call `list_mcp_tools()` first
- **Never write a node with unverified tools** - validate before writing
- **Never silently drop tools** - if a tool doesn't exist, inform the user
- **Never guess tool names** - use exact names from discovery response
## Workflow Overview: Incremental File Construction
```
1. CREATE PACKAGE → mkdir + write skeletons
2. DEFINE GOAL → Write to agent.py + config.py
3. FOR EACH NODE:
- Propose design (event_loop for LLM work, function for deterministic)
- User approves
- Write to nodes/__init__.py IMMEDIATELY
- (Optional) Validate with test_node
4. CONNECT EDGES → Update agent.py
- Use priority for feedback edges (negative priority)
- (Optional) Validate with validate_graph
5. FINALIZE → Write agent class to agent.py
6. DONE - Agent ready at exports/my_agent/
```
**Files written immediately. MCP tools optional for validation/testing bookkeeping.**
## When to Use This Skill
Use hive-concepts when:
- Starting a new agent project and need to understand fundamentals
- Need to understand agent architecture before building
- Want to validate tool availability before proceeding
- Learning about node types, edges, and graph execution
**Next Steps:**
- Ready to build? → Use `hive-create` skill
- Need patterns and examples? → Use `hive-patterns` skill
## MCP Tools for Validation
After writing files, optionally use MCP tools for validation:
**test_node** - Validate node configuration with mock inputs
```python
mcp__agent-builder__test_node(
node_id="search-web",
test_input='{"query": "test query"}',
mock_llm_response='{"results": "mock output"}'
)
```
**validate_graph** - Check graph structure
```python
mcp__agent-builder__validate_graph()
# Returns: unreachable nodes, missing connections, event_loop validation, etc.
```
**configure_loop** - Set event loop parameters
```python
mcp__agent-builder__configure_loop(
max_iterations=50,
max_tool_calls_per_turn=10,
stall_detection_threshold=3,
max_history_tokens=32000
)
```
**Key Point:** Files are written FIRST. MCP tools are for validation only.
## Related Skills
- **hive-create** - Step-by-step building process
- **hive-patterns** - Best practices: judges, feedback edges, fan-out, context management
- **hive** - Complete workflow orchestrator
- **hive-test** - Test and validate completed agents
File diff suppressed because it is too large Load Diff
@@ -1,24 +0,0 @@
"""
Deep Research Agent - Interactive, rigorous research with TUI conversation.
Research any topic through multi-source web search, quality evaluation,
and synthesis. Features client-facing TUI interaction at key checkpoints
for user guidance and iterative deepening.
"""
from .agent import DeepResearchAgent, default_agent, goal, nodes, edges
from .config import RuntimeConfig, AgentMetadata, default_config, metadata
__version__ = "1.0.0"
__all__ = [
"DeepResearchAgent",
"default_agent",
"goal",
"nodes",
"edges",
"RuntimeConfig",
"AgentMetadata",
"default_config",
"metadata",
]
@@ -1,241 +0,0 @@
"""
CLI entry point for Deep Research Agent.
Uses AgentRuntime for multi-entrypoint support with HITL pause/resume.
"""
import asyncio
import json
import logging
import sys
import click
from .agent import default_agent, DeepResearchAgent
def setup_logging(verbose=False, debug=False):
"""Configure logging for execution visibility."""
if debug:
level, fmt = logging.DEBUG, "%(asctime)s %(name)s: %(message)s"
elif verbose:
level, fmt = logging.INFO, "%(message)s"
else:
level, fmt = logging.WARNING, "%(levelname)s: %(message)s"
logging.basicConfig(level=level, format=fmt, stream=sys.stderr)
logging.getLogger("framework").setLevel(level)
@click.group()
@click.version_option(version="1.0.0")
def cli():
"""Deep Research Agent - Interactive, rigorous research with TUI conversation."""
pass
@cli.command()
@click.option("--topic", "-t", type=str, required=True, help="Research topic")
@click.option("--mock", is_flag=True, help="Run in mock mode")
@click.option("--quiet", "-q", is_flag=True, help="Only output result JSON")
@click.option("--verbose", "-v", is_flag=True, help="Show execution details")
@click.option("--debug", is_flag=True, help="Show debug logging")
def run(topic, mock, quiet, verbose, debug):
"""Execute research on a topic."""
if not quiet:
setup_logging(verbose=verbose, debug=debug)
context = {"topic": topic}
result = asyncio.run(default_agent.run(context, mock_mode=mock))
output_data = {
"success": result.success,
"steps_executed": result.steps_executed,
"output": result.output,
}
if result.error:
output_data["error"] = result.error
click.echo(json.dumps(output_data, indent=2, default=str))
sys.exit(0 if result.success else 1)
@cli.command()
@click.option("--mock", is_flag=True, help="Run in mock mode")
@click.option("--verbose", "-v", is_flag=True, help="Show execution details")
@click.option("--debug", is_flag=True, help="Show debug logging")
def tui(mock, verbose, debug):
"""Launch the TUI dashboard for interactive research."""
setup_logging(verbose=verbose, debug=debug)
try:
from framework.tui.app import AdenTUI
except ImportError:
click.echo(
"TUI requires the 'textual' package. Install with: pip install textual"
)
sys.exit(1)
from pathlib import Path
from framework.llm import LiteLLMProvider
from framework.runner.tool_registry import ToolRegistry
from framework.runtime.agent_runtime import create_agent_runtime
from framework.runtime.event_bus import EventBus
from framework.runtime.execution_stream import EntryPointSpec
async def run_with_tui():
agent = DeepResearchAgent()
# Build graph and tools
agent._event_bus = EventBus()
agent._tool_registry = ToolRegistry()
storage_path = Path.home() / ".hive" / "agents" / "deep_research_agent"
storage_path.mkdir(parents=True, exist_ok=True)
mcp_config_path = Path(__file__).parent / "mcp_servers.json"
if mcp_config_path.exists():
agent._tool_registry.load_mcp_config(mcp_config_path)
llm = None
if not mock:
llm = LiteLLMProvider(
model=agent.config.model,
api_key=agent.config.api_key,
api_base=agent.config.api_base,
)
tools = list(agent._tool_registry.get_tools().values())
tool_executor = agent._tool_registry.get_executor()
graph = agent._build_graph()
runtime = create_agent_runtime(
graph=graph,
goal=agent.goal,
storage_path=storage_path,
entry_points=[
EntryPointSpec(
id="start",
name="Start Research",
entry_node="intake",
trigger_type="manual",
isolation_level="isolated",
),
],
llm=llm,
tools=tools,
tool_executor=tool_executor,
)
await runtime.start()
try:
app = AdenTUI(runtime)
await app.run_async()
finally:
await runtime.stop()
asyncio.run(run_with_tui())
@cli.command()
@click.option("--json", "output_json", is_flag=True)
def info(output_json):
"""Show agent information."""
info_data = default_agent.info()
if output_json:
click.echo(json.dumps(info_data, indent=2))
else:
click.echo(f"Agent: {info_data['name']}")
click.echo(f"Version: {info_data['version']}")
click.echo(f"Description: {info_data['description']}")
click.echo(f"\nNodes: {', '.join(info_data['nodes'])}")
click.echo(f"Client-facing: {', '.join(info_data['client_facing_nodes'])}")
click.echo(f"Entry: {info_data['entry_node']}")
click.echo(f"Terminal: {', '.join(info_data['terminal_nodes'])}")
@cli.command()
def validate():
"""Validate agent structure."""
validation = default_agent.validate()
if validation["valid"]:
click.echo("Agent is valid")
if validation["warnings"]:
for warning in validation["warnings"]:
click.echo(f" WARNING: {warning}")
else:
click.echo("Agent has errors:")
for error in validation["errors"]:
click.echo(f" ERROR: {error}")
sys.exit(0 if validation["valid"] else 1)
@cli.command()
@click.option("--verbose", "-v", is_flag=True)
def shell(verbose):
"""Interactive research session (CLI, no TUI)."""
asyncio.run(_interactive_shell(verbose))
async def _interactive_shell(verbose=False):
"""Async interactive shell."""
setup_logging(verbose=verbose)
click.echo("=== Deep Research Agent ===")
click.echo("Enter a topic to research (or 'quit' to exit):\n")
agent = DeepResearchAgent()
await agent.start()
try:
while True:
try:
topic = await asyncio.get_event_loop().run_in_executor(
None, input, "Topic> "
)
if topic.lower() in ["quit", "exit", "q"]:
click.echo("Goodbye!")
break
if not topic.strip():
continue
click.echo("\nResearching...\n")
result = await agent.trigger_and_wait("start", {"topic": topic})
if result is None:
click.echo("\n[Execution timed out]\n")
continue
if result.success:
output = result.output
if "report_content" in output:
click.echo("\n--- Report ---\n")
click.echo(output["report_content"])
click.echo("\n")
if "references" in output:
click.echo("--- References ---\n")
for ref in output.get("references", []):
click.echo(
f" [{ref.get('number', '?')}] {ref.get('title', '')} - {ref.get('url', '')}"
)
click.echo("\n")
else:
click.echo(f"\nResearch failed: {result.error}\n")
except KeyboardInterrupt:
click.echo("\nGoodbye!")
break
except Exception as e:
click.echo(f"Error: {e}", err=True)
import traceback
traceback.print_exc()
finally:
await agent.stop()
if __name__ == "__main__":
cli()
@@ -1,358 +0,0 @@
"""Agent graph construction for Deep Research Agent."""
from pathlib import Path
from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
from framework.graph.edge import GraphSpec
from framework.graph.executor import ExecutionResult
from framework.graph.checkpoint_config import CheckpointConfig
from framework.llm import LiteLLMProvider
from framework.runner.tool_registry import ToolRegistry
from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
from framework.runtime.execution_stream import EntryPointSpec
from .config import default_config, metadata
from .nodes import (
intake_node,
research_node,
review_node,
report_node,
)
# Goal definition
goal = Goal(
id="rigorous-interactive-research",
name="Rigorous Interactive Research",
description=(
"Research any topic by searching diverse sources, analyzing findings, "
"and producing a cited report — with user checkpoints to guide direction."
),
success_criteria=[
SuccessCriterion(
id="source-diversity",
description="Use multiple diverse, authoritative sources",
metric="source_count",
target=">=5",
weight=0.25,
),
SuccessCriterion(
id="citation-coverage",
description="Every factual claim in the report cites its source",
metric="citation_coverage",
target="100%",
weight=0.25,
),
SuccessCriterion(
id="user-satisfaction",
description="User reviews findings before report generation",
metric="user_approval",
target="true",
weight=0.25,
),
SuccessCriterion(
id="report-completeness",
description="Final report answers the original research questions",
metric="question_coverage",
target="90%",
weight=0.25,
),
],
constraints=[
Constraint(
id="no-hallucination",
description="Only include information found in fetched sources",
constraint_type="quality",
category="accuracy",
),
Constraint(
id="source-attribution",
description="Every claim must cite its source with a numbered reference",
constraint_type="quality",
category="accuracy",
),
Constraint(
id="user-checkpoint",
description="Present findings to the user before writing the final report",
constraint_type="functional",
category="interaction",
),
],
)
# Node list
nodes = [
intake_node,
research_node,
review_node,
report_node,
]
# Edge definitions
edges = [
# intake -> research
EdgeSpec(
id="intake-to-research",
source="intake",
target="research",
condition=EdgeCondition.ON_SUCCESS,
priority=1,
),
# research -> review
EdgeSpec(
id="research-to-review",
source="research",
target="review",
condition=EdgeCondition.ON_SUCCESS,
priority=1,
),
# review -> research (feedback loop)
EdgeSpec(
id="review-to-research-feedback",
source="review",
target="research",
condition=EdgeCondition.CONDITIONAL,
condition_expr="needs_more_research == True",
priority=1,
),
# review -> report (user satisfied)
EdgeSpec(
id="review-to-report",
source="review",
target="report",
condition=EdgeCondition.CONDITIONAL,
condition_expr="needs_more_research == False",
priority=2,
),
# report -> research (user wants deeper research on current topic)
EdgeSpec(
id="report-to-research",
source="report",
target="research",
condition=EdgeCondition.CONDITIONAL,
condition_expr="str(next_action).lower() == 'more_research'",
priority=2,
),
# report -> intake (user wants a new topic — default when not more_research)
EdgeSpec(
id="report-to-intake",
source="report",
target="intake",
condition=EdgeCondition.CONDITIONAL,
condition_expr="str(next_action).lower() != 'more_research'",
priority=1,
),
]
# Graph configuration
entry_node = "intake"
entry_points = {"start": "intake"}
pause_nodes = []
terminal_nodes = []
class DeepResearchAgent:
"""
Deep Research Agent 4-node pipeline with user checkpoints.
Flow: intake -> research -> review -> report
^ |
+-- feedback loop (if user wants more)
Uses AgentRuntime for proper session management:
- Session-scoped storage (sessions/{session_id}/)
- Checkpointing for resume capability
- Runtime logging
- Data folder for save_data/load_data
"""
def __init__(self, config=None):
self.config = config or default_config
self.goal = goal
self.nodes = nodes
self.edges = edges
self.entry_node = entry_node
self.entry_points = entry_points
self.pause_nodes = pause_nodes
self.terminal_nodes = terminal_nodes
self._graph: GraphSpec | None = None
self._agent_runtime: AgentRuntime | None = None
self._tool_registry: ToolRegistry | None = None
self._storage_path: Path | None = None
def _build_graph(self) -> GraphSpec:
"""Build the GraphSpec."""
return GraphSpec(
id="deep-research-agent-graph",
goal_id=self.goal.id,
version="1.0.0",
entry_node=self.entry_node,
entry_points=self.entry_points,
terminal_nodes=self.terminal_nodes,
pause_nodes=self.pause_nodes,
nodes=self.nodes,
edges=self.edges,
default_model=self.config.model,
max_tokens=self.config.max_tokens,
loop_config={
"max_iterations": 100,
"max_tool_calls_per_turn": 20,
"max_history_tokens": 32000,
},
conversation_mode="continuous",
identity_prompt=(
"You are a rigorous research agent. You search for information "
"from diverse, authoritative sources, analyze findings critically, "
"and produce well-cited reports. You never fabricate information — "
"every claim must trace back to a source you actually retrieved."
),
)
def _setup(self, mock_mode=False) -> None:
"""Set up the agent runtime with sessions, checkpoints, and logging."""
self._storage_path = Path.home() / ".hive" / "agents" / "deep_research_agent"
self._storage_path.mkdir(parents=True, exist_ok=True)
self._tool_registry = ToolRegistry()
mcp_config_path = Path(__file__).parent / "mcp_servers.json"
if mcp_config_path.exists():
self._tool_registry.load_mcp_config(mcp_config_path)
llm = None
if not mock_mode:
llm = LiteLLMProvider(
model=self.config.model,
api_key=self.config.api_key,
api_base=self.config.api_base,
)
tool_executor = self._tool_registry.get_executor()
tools = list(self._tool_registry.get_tools().values())
self._graph = self._build_graph()
checkpoint_config = CheckpointConfig(
enabled=True,
checkpoint_on_node_start=False,
checkpoint_on_node_complete=True,
checkpoint_max_age_days=7,
async_checkpoint=True,
)
entry_point_specs = [
EntryPointSpec(
id="default",
name="Default",
entry_node=self.entry_node,
trigger_type="manual",
isolation_level="shared",
)
]
self._agent_runtime = create_agent_runtime(
graph=self._graph,
goal=self.goal,
storage_path=self._storage_path,
entry_points=entry_point_specs,
llm=llm,
tools=tools,
tool_executor=tool_executor,
checkpoint_config=checkpoint_config,
)
async def start(self, mock_mode=False) -> None:
"""Set up and start the agent runtime."""
if self._agent_runtime is None:
self._setup(mock_mode=mock_mode)
if not self._agent_runtime.is_running:
await self._agent_runtime.start()
async def stop(self) -> None:
"""Stop the agent runtime and clean up."""
if self._agent_runtime and self._agent_runtime.is_running:
await self._agent_runtime.stop()
self._agent_runtime = None
async def trigger_and_wait(
self,
entry_point: str = "default",
input_data: dict | None = None,
timeout: float | None = None,
session_state: dict | None = None,
) -> ExecutionResult | None:
"""Execute the graph and wait for completion."""
if self._agent_runtime is None:
raise RuntimeError("Agent not started. Call start() first.")
return await self._agent_runtime.trigger_and_wait(
entry_point_id=entry_point,
input_data=input_data or {},
session_state=session_state,
)
async def run(
self, context: dict, mock_mode=False, session_state=None
) -> ExecutionResult:
"""Run the agent (convenience method for single execution)."""
await self.start(mock_mode=mock_mode)
try:
result = await self.trigger_and_wait(
"default", context, session_state=session_state
)
return result or ExecutionResult(success=False, error="Execution timeout")
finally:
await self.stop()
def info(self):
"""Get agent information."""
return {
"name": metadata.name,
"version": metadata.version,
"description": metadata.description,
"goal": {
"name": self.goal.name,
"description": self.goal.description,
},
"nodes": [n.id for n in self.nodes],
"edges": [e.id for e in self.edges],
"entry_node": self.entry_node,
"entry_points": self.entry_points,
"pause_nodes": self.pause_nodes,
"terminal_nodes": self.terminal_nodes,
"client_facing_nodes": [n.id for n in self.nodes if n.client_facing],
}
def validate(self):
"""Validate agent structure."""
errors = []
warnings = []
node_ids = {node.id for node in self.nodes}
for edge in self.edges:
if edge.source not in node_ids:
errors.append(f"Edge {edge.id}: source '{edge.source}' not found")
if edge.target not in node_ids:
errors.append(f"Edge {edge.id}: target '{edge.target}' not found")
if self.entry_node not in node_ids:
errors.append(f"Entry node '{self.entry_node}' not found")
for terminal in self.terminal_nodes:
if terminal not in node_ids:
errors.append(f"Terminal node '{terminal}' not found")
for ep_id, node_id in self.entry_points.items():
if node_id not in node_ids:
errors.append(
f"Entry point '{ep_id}' references unknown node '{node_id}'"
)
return {
"valid": len(errors) == 0,
"errors": errors,
"warnings": warnings,
}
# Create default instance
default_agent = DeepResearchAgent()
@@ -1,26 +0,0 @@
"""Runtime configuration."""
from dataclasses import dataclass
from framework.config import RuntimeConfig
default_config = RuntimeConfig()
@dataclass
class AgentMetadata:
name: str = "Deep Research Agent"
version: str = "1.0.0"
description: str = (
"Interactive research agent that rigorously investigates topics through "
"multi-source search, quality evaluation, and synthesis - with TUI conversation "
"at key checkpoints for user guidance and feedback."
)
intro_message: str = (
"Hi! I'm your deep research assistant. Tell me a topic and I'll investigate it "
"thoroughly — searching multiple sources, evaluating quality, and synthesizing "
"a comprehensive report. What would you like me to research?"
)
metadata = AgentMetadata()
@@ -1,204 +0,0 @@
"""Node definitions for Deep Research Agent."""
from framework.graph import NodeSpec
# Node 1: Intake (client-facing)
# Brief conversation to clarify what the user wants researched.
intake_node = NodeSpec(
id="intake",
name="Research Intake",
description="Discuss the research topic with the user, clarify scope, and confirm direction",
node_type="event_loop",
client_facing=True,
max_node_visits=0,
input_keys=["topic"],
output_keys=["research_brief"],
success_criteria=(
"The research brief is specific and actionable: it states the topic, "
"the key questions to answer, the desired scope, and depth."
),
system_prompt="""\
You are a research intake specialist. The user wants to research a topic.
Have a brief conversation to clarify what they need.
**STEP 1 Read and respond (text only, NO tool calls):**
1. Read the topic provided
2. If it's vague, ask 1-2 clarifying questions (scope, angle, depth)
3. If it's already clear, confirm your understanding and ask the user to confirm
Keep it short. Don't over-ask.
**STEP 2 After the user confirms, call set_output:**
- set_output("research_brief", "A clear paragraph describing exactly what to research, \
what questions to answer, what scope to cover, and how deep to go.")
""",
tools=[],
)
# Node 2: Research
# The workhorse — searches the web, fetches content, analyzes sources.
# One node with both tools avoids the context-passing overhead of 5 separate nodes.
research_node = NodeSpec(
id="research",
name="Research",
description="Search the web, fetch source content, and compile findings",
node_type="event_loop",
max_node_visits=0,
input_keys=["research_brief", "feedback"],
output_keys=["findings", "sources", "gaps"],
nullable_output_keys=["feedback"],
success_criteria=(
"Findings reference at least 3 distinct sources with URLs. "
"Key claims are substantiated by fetched content, not generated."
),
system_prompt="""\
You are a research agent. Given a research brief, find and analyze sources.
If feedback is provided, this is a follow-up round focus on the gaps identified.
Work in phases:
1. **Search**: Use web_search with 3-5 diverse queries covering different angles.
Prioritize authoritative sources (.edu, .gov, established publications).
2. **Fetch**: Use web_scrape on the most promising URLs (aim for 5-8 sources).
Skip URLs that fail. Extract the substantive content.
3. **Analyze**: Review what you've collected. Identify key findings, themes,
and any contradictions between sources.
Important:
- Work in batches of 3-4 tool calls at a time never more than 10 per turn
- After each batch, assess whether you have enough material
- Prefer quality over quantity 5 good sources beat 15 thin ones
- Track which URL each finding comes from (you'll need citations later)
- Call set_output for each key in a SEPARATE turn (not in the same turn as other tool calls)
When done, use set_output (one key at a time, separate turns):
- set_output("findings", "Structured summary: key findings with source URLs for each claim. \
Include themes, contradictions, and confidence levels.")
- set_output("sources", [{"url": "...", "title": "...", "summary": "..."}])
- set_output("gaps", "What aspects of the research brief are NOT well-covered yet, if any.")
""",
tools=[
"web_search",
"web_scrape",
"load_data",
"save_data",
"append_data",
"list_data_files",
],
)
# Node 3: Review (client-facing)
# Shows the user what was found and asks whether to dig deeper or proceed.
review_node = NodeSpec(
id="review",
name="Review Findings",
description="Present findings to user and decide whether to research more or write the report",
node_type="event_loop",
client_facing=True,
max_node_visits=0,
input_keys=["findings", "sources", "gaps", "research_brief"],
output_keys=["needs_more_research", "feedback"],
success_criteria=(
"The user has been presented with findings and has explicitly indicated "
"whether they want more research or are ready for the report."
),
system_prompt="""\
Present the research findings to the user clearly and concisely.
**STEP 1 Present (your first message, text only, NO tool calls):**
1. **Summary** (2-3 sentences of what was found)
2. **Key Findings** (bulleted, with confidence levels)
3. **Sources Used** (count and quality assessment)
4. **Gaps** (what's still unclear or under-covered)
End by asking: Are they satisfied, or do they want deeper research? \
Should we proceed to writing the final report?
**STEP 2 After the user responds, call set_output:**
- set_output("needs_more_research", "true") if they want more
- set_output("needs_more_research", "false") if they're satisfied
- set_output("feedback", "What the user wants explored further, or empty string")
""",
tools=[],
)
# Node 4: Report (client-facing)
# Writes an HTML report, serves the link to the user, and answers follow-ups.
report_node = NodeSpec(
id="report",
name="Write & Deliver Report",
description="Write a cited HTML report from the findings and present it to the user",
node_type="event_loop",
client_facing=True,
max_node_visits=0,
input_keys=["findings", "sources", "research_brief"],
output_keys=["delivery_status", "next_action"],
success_criteria=(
"An HTML report has been saved, the file link has been presented to the user, "
"and the user has indicated what they want to do next."
),
system_prompt="""\
Write a research report as an HTML file and present it to the user.
IMPORTANT: save_data requires TWO separate arguments: filename and data.
Call it like: save_data(filename="report.html", data="<html>...</html>")
Do NOT use _raw, do NOT nest arguments inside a JSON string.
**STEP 1 Write and save the HTML report (tool calls, NO text to user yet):**
Build a clean HTML document. Keep the HTML concise aim for clarity over length.
Use minimal embedded CSS (a few lines of style, not a full framework).
Report structure:
- Title & date
- Executive Summary (2-3 paragraphs)
- Key Findings (organized by theme, with [n] citation links)
- Analysis (synthesis, implications)
- Conclusion (key takeaways)
- References (numbered list with clickable URLs)
Requirements:
- Every factual claim must cite its source with [n] notation
- Be objective present multiple viewpoints where sources disagree
- Answer the original research questions from the brief
Save the HTML:
save_data(filename="report.html", data="<html>...</html>")
Then get the clickable link:
serve_file_to_user(filename="report.html", label="Research Report")
If save_data fails, simplify and shorten the HTML, then retry.
**STEP 2 Present the link to the user (text only, NO tool calls):**
Tell the user the report is ready and include the file:// URI from
serve_file_to_user so they can click it to open. Give a brief summary
of what the report covers. Ask if they have questions or want to continue.
**STEP 3 After the user responds:**
- Answer any follow-up questions from the research material
- When the user is ready to move on, ask what they'd like to do next:
- Research a new topic?
- Dig deeper into the current topic?
- Then call set_output:
- set_output("delivery_status", "completed")
- set_output("next_action", "new_topic") if they want a new topic
- set_output("next_action", "more_research") if they want deeper research
""",
tools=[
"save_data",
"append_data",
"edit_data",
"serve_file_to_user",
"load_data",
"list_data_files",
],
)
__all__ = [
"intake_node",
"research_node",
"review_node",
"report_node",
]
-640
View File
@@ -1,640 +0,0 @@
---
name: hive-credentials
description: Set up and install credentials for an agent. Detects missing credentials from agent config, collects them from the user, and stores them securely in the local encrypted store at ~/.hive/credentials.
license: Apache-2.0
metadata:
author: hive
version: "2.3"
type: utility
---
# Setup Credentials
Interactive credential setup for agents with multiple authentication options. Detects what's missing, offers auth method choices, validates with health checks, and stores credentials securely.
## When to Use
- Before running or testing an agent for the first time
- When `AgentRunner.run()` fails with "missing required credentials"
- When a user asks to configure credentials for an agent
- After building a new agent that uses tools requiring API keys
## Workflow
### Step 1: Identify the Agent
Determine which agent needs credentials. The user will either:
- Name the agent directly (e.g., "set up credentials for hubspot-agent")
- Have an agent directory open (check `exports/` for agent dirs)
- Be working on an agent in the current session
Locate the agent's directory under `exports/{agent_name}/`.
### Step 2: Detect Missing Credentials
Use the `check_missing_credentials` MCP tool to detect what the agent needs and what's already configured. This tool loads the agent, inspects its required tools and node types, maps them to credentials via `CREDENTIAL_SPECS`, and checks both the encrypted store and environment variables.
```
check_missing_credentials(agent_path="exports/{agent_name}")
```
The tool returns a JSON response:
```json
{
"agent": "exports/{agent_name}",
"missing": [
{
"credential_name": "brave_search",
"env_var": "BRAVE_SEARCH_API_KEY",
"description": "Brave Search API key for web search",
"help_url": "https://brave.com/search/api/",
"tools": ["web_search"]
}
],
"available": [
{
"credential_name": "anthropic",
"env_var": "ANTHROPIC_API_KEY",
"source": "encrypted_store"
}
],
"total_missing": 1,
"ready": false
}
```
**If `ready` is true (nothing missing):** Report all credentials as configured and skip Steps 3-5. Example:
```
All required credentials are already configured:
✓ anthropic (ANTHROPIC_API_KEY)
✓ brave_search (BRAVE_SEARCH_API_KEY)
Your agent is ready to run!
```
**If credentials are missing:** Continue to Step 3 with the `missing` list.
### Step 3: Present Auth Options for Each Missing Credential
For each missing credential, check what authentication methods are available:
```python
from aden_tools.credentials import CREDENTIAL_SPECS
spec = CREDENTIAL_SPECS.get("hubspot")
if spec:
# Determine available auth options
auth_options = []
if spec.aden_supported:
auth_options.append("aden")
if spec.direct_api_key_supported:
auth_options.append("direct")
auth_options.append("custom") # Always available
# Get setup info
setup_info = {
"env_var": spec.env_var,
"description": spec.description,
"help_url": spec.help_url,
"api_key_instructions": spec.api_key_instructions,
}
```
Present the available options using AskUserQuestion:
```
Choose how to configure HUBSPOT_ACCESS_TOKEN:
1) Aden Platform (OAuth) (Recommended)
Secure OAuth2 flow via hive.adenhq.com
- Quick setup with automatic token refresh
- No need to manage API keys manually
2) Direct API Key
Enter your own API key manually
- Requires creating a HubSpot Private App
- Full control over scopes and permissions
3) Local Credential Setup (Advanced)
Programmatic configuration for CI/CD
- For automated deployments
- Requires manual API calls
```
### Step 4: Execute Auth Flow Based on User Choice
#### Prerequisite: Ensure HIVE_CREDENTIAL_KEY Is Available
Before storing any credentials, verify `HIVE_CREDENTIAL_KEY` is set (needed to encrypt/decrypt the local store). Check both the current session and shell config:
```bash
# Check current session
printenv HIVE_CREDENTIAL_KEY > /dev/null 2>&1 && echo "session: set" || echo "session: not set"
# Check shell config files
for f in ~/.zshrc ~/.bashrc ~/.profile; do [ -f "$f" ] && grep -q 'HIVE_CREDENTIAL_KEY' "$f" && echo "$f"; done
```
- **In current session** — proceed to store credentials
- **In shell config but NOT in current session** — run `source ~/.zshrc` (or `~/.bashrc`) first, then proceed
- **Not set anywhere** — `EncryptedFileStorage` will auto-generate one. After storing, tell the user to persist it: `export HIVE_CREDENTIAL_KEY="{generated_key}"` in their shell profile
> **⚠️ IMPORTANT: After adding `HIVE_CREDENTIAL_KEY` to the user's shell config, always display:**
> ```
> ⚠️ Environment variables were added to your shell config.
> Open a NEW TERMINAL for them to take effect outside this session.
> ```
#### Option 1: Aden Platform (OAuth)
This is the recommended flow for supported integrations (HubSpot, etc.).
**How Aden OAuth Works:**
The ADEN_API_KEY represents a user who has already completed OAuth authorization on Aden's platform. When users sign up and connect integrations on Aden, those OAuth tokens are stored server-side. Having an ADEN_API_KEY means:
1. User has an Aden account
2. User has already authorized integrations (HubSpot, etc.) via OAuth on Aden
3. We just need to sync those credentials down to the local credential store
**4.1a. Check for ADEN_API_KEY**
```python
import os
aden_key = os.environ.get("ADEN_API_KEY")
```
If not set, guide user to get one from Aden (this is where they do OAuth):
```python
from aden_tools.credentials import open_browser, get_aden_setup_url
# Open browser to Aden - user will sign up and connect integrations there
url = get_aden_setup_url() # https://hive.adenhq.com
success, msg = open_browser(url)
print("Please sign in to Aden and connect your integrations (HubSpot, etc.).")
print("Once done, copy your API key and return here.")
```
Ask user to provide the ADEN_API_KEY they received.
**4.1b. Save ADEN_API_KEY to Shell Config**
With user approval, persist ADEN_API_KEY to their shell config:
```python
from aden_tools.credentials import (
detect_shell,
add_env_var_to_shell_config,
get_shell_source_command,
)
shell_type = detect_shell() # 'bash', 'zsh', or 'unknown'
# Ask user for approval before modifying shell config
# If approved:
success, config_path = add_env_var_to_shell_config(
"ADEN_API_KEY",
user_provided_key,
comment="Aden Platform (OAuth) API key"
)
if success:
source_cmd = get_shell_source_command()
print(f"Saved to {config_path}")
print(f"Run: {source_cmd}")
```
> **⚠️ IMPORTANT: After adding `ADEN_API_KEY` to the user's shell config, always display:**
> ```
> ⚠️ Environment variables were added to your shell config.
> Open a NEW TERMINAL for them to take effect outside this session.
> ```
Also save to `~/.hive/configuration.json` for the framework:
```python
import json
from pathlib import Path
config_path = Path.home() / ".hive" / "configuration.json"
config = json.loads(config_path.read_text()) if config_path.exists() else {}
config["aden"] = {
"api_key_configured": True,
"api_url": "https://api.adenhq.com"
}
config_path.parent.mkdir(parents=True, exist_ok=True)
config_path.write_text(json.dumps(config, indent=2))
```
**4.1c. Sync Credentials from Aden Server**
Since the user has already authorized integrations on Aden, use the one-liner factory method:
```python
from core.framework.credentials import CredentialStore
# This single call handles everything:
# - Creates encrypted local storage at ~/.hive/credentials
# - Configures Aden client from ADEN_API_KEY env var
# - Syncs all credentials from Aden server automatically
store = CredentialStore.with_aden_sync(
base_url="https://api.adenhq.com",
auto_sync=True, # Syncs on creation
)
# Check what was synced
synced = store.list_credentials()
print(f"Synced credentials: {synced}")
# If the required credential wasn't synced, the user hasn't authorized it on Aden yet
if "hubspot" not in synced:
print("HubSpot not found in your Aden account.")
print("Please visit https://hive.adenhq.com to connect HubSpot, then try again.")
```
For more control over the sync process:
```python
from core.framework.credentials import CredentialStore
from core.framework.credentials.aden import (
AdenCredentialClient,
AdenClientConfig,
AdenSyncProvider,
)
# Create client (API key loaded from ADEN_API_KEY env var)
client = AdenCredentialClient(AdenClientConfig(
base_url="https://api.adenhq.com",
))
# Create provider and store
provider = AdenSyncProvider(client=client)
store = CredentialStore.with_encrypted_storage()
# Manual sync
synced_count = provider.sync_all(store)
print(f"Synced {synced_count} credentials from Aden")
```
**4.1d. Run Health Check**
```python
from aden_tools.credentials import check_credential_health
# Get the token from the store
cred = store.get_credential("hubspot")
token = cred.keys["access_token"].value.get_secret_value()
result = check_credential_health("hubspot", token)
if result.valid:
print("HubSpot credentials validated successfully!")
else:
print(f"Validation failed: {result.message}")
# Offer to retry the OAuth flow
```
#### Option 2: Direct API Key
For users who prefer manual API key management.
**4.2a. Show Setup Instructions**
```python
from aden_tools.credentials import CREDENTIAL_SPECS
spec = CREDENTIAL_SPECS.get("hubspot")
if spec and spec.api_key_instructions:
print(spec.api_key_instructions)
# Output:
# To get a HubSpot Private App token:
# 1. Go to HubSpot Settings > Integrations > Private Apps
# 2. Click "Create a private app"
# 3. Name your app (e.g., "Hive Agent")
# ...
if spec and spec.help_url:
print(f"More info: {spec.help_url}")
```
**4.2b. Collect API Key from User**
Use AskUserQuestion to securely collect the API key:
```
Please provide your HubSpot access token:
(This will be stored securely in ~/.hive/credentials)
```
**4.2c. Run Health Check Before Storing**
```python
from aden_tools.credentials import check_credential_health
result = check_credential_health("hubspot", user_provided_token)
if not result.valid:
print(f"Warning: {result.message}")
# Ask user if they want to:
# 1. Try a different token
# 2. Continue anyway (not recommended)
```
**4.2d. Store in Local Encrypted Store**
```python
from core.framework.credentials import CredentialStore, CredentialObject, CredentialKey
from pydantic import SecretStr
store = CredentialStore.with_encrypted_storage()
cred = CredentialObject(
id="hubspot",
name="HubSpot Access Token",
keys={
"access_token": CredentialKey(
name="access_token",
value=SecretStr(user_provided_token),
)
},
)
store.save_credential(cred)
```
**4.2e. Export to Current Session**
```bash
export HUBSPOT_ACCESS_TOKEN="the-value"
```
#### Option 3: Local Credential Setup (Advanced)
For programmatic/CI/CD setups.
**4.3a. Show Documentation**
```
For advanced credential management, you can use the CredentialStore API directly:
from core.framework.credentials import CredentialStore, CredentialObject, CredentialKey
from pydantic import SecretStr
store = CredentialStore.with_encrypted_storage()
cred = CredentialObject(
id="hubspot",
name="HubSpot Access Token",
keys={"access_token": CredentialKey(name="access_token", value=SecretStr("..."))}
)
store.save_credential(cred)
For CI/CD environments:
- Set HIVE_CREDENTIAL_KEY for encryption
- Pre-populate ~/.hive/credentials programmatically
- Or use environment variables directly (HUBSPOT_ACCESS_TOKEN)
Documentation: See core/framework/credentials/README.md
```
### Step 5: Record Configuration Method
Track which auth method was used for each credential in `~/.hive/configuration.json`:
```python
import json
from pathlib import Path
from datetime import datetime
config_path = Path.home() / ".hive" / "configuration.json"
config = json.loads(config_path.read_text()) if config_path.exists() else {}
if "credential_methods" not in config:
config["credential_methods"] = {}
config["credential_methods"]["hubspot"] = {
"method": "aden", # or "direct" or "custom"
"configured_at": datetime.now().isoformat(),
}
config_path.write_text(json.dumps(config, indent=2))
```
### Step 6: Verify All Credentials
Use the `verify_credentials` MCP tool to confirm everything is properly configured:
```
verify_credentials(agent_path="exports/{agent_name}")
```
The tool returns:
```json
{
"agent": "exports/{agent_name}",
"ready": true,
"missing_credentials": [],
"warnings": [],
"errors": []
}
```
If `ready` is true, report success. If `missing_credentials` is non-empty, identify what failed and loop back to Step 3 for the remaining credentials.
## Health Check Reference
Health checks validate credentials by making lightweight API calls:
| Credential | Endpoint | What It Checks |
| --------------- | --------------------------------------- | --------------------------------- |
| `anthropic` | `POST /v1/messages` | API key validity |
| `brave_search` | `GET /res/v1/web/search?q=test&count=1` | API key validity |
| `google_search` | `GET /customsearch/v1?q=test&num=1` | API key + CSE ID validity |
| `github` | `GET /user` | Token validity, user identity |
| `hubspot` | `GET /crm/v3/objects/contacts?limit=1` | Bearer token validity, CRM scopes |
| `resend` | `GET /domains` | API key validity |
```python
from aden_tools.credentials import check_credential_health, HealthCheckResult
result: HealthCheckResult = check_credential_health("hubspot", token_value)
# result.valid: bool
# result.message: str
# result.details: dict (status_code, rate_limited, etc.)
```
## Encryption Key (HIVE_CREDENTIAL_KEY)
The local encrypted store requires `HIVE_CREDENTIAL_KEY` to encrypt/decrypt credentials.
- If the user doesn't have one, `EncryptedFileStorage` will auto-generate one and log it
- The user MUST persist this key (e.g., in `~/.bashrc`/`~/.zshrc` or a secrets manager)
- Without this key, stored credentials cannot be decrypted
**Shell config rule:** Only TWO keys belong in shell config (`~/.zshrc`/`~/.bashrc`):
- `HIVE_CREDENTIAL_KEY` — encryption key for the credential store
- `ADEN_API_KEY` — Aden platform auth key (needed before the store can sync)
All other API keys (Brave, Google, HubSpot, etc.) must go in the encrypted store only. **Never offer to add them to shell config.**
If `HIVE_CREDENTIAL_KEY` is not set:
1. Let the store generate one
2. Tell the user to save it: `export HIVE_CREDENTIAL_KEY="{generated_key}"`
3. Recommend adding it to `~/.bashrc` or their shell profile
## Security Rules
- **NEVER** log, print, or echo credential values in tool output
- **NEVER** store credentials in plaintext files, git-tracked files, or agent configs
- **NEVER** hardcode credentials in source code
- **NEVER** offer to save API keys to shell config (`~/.zshrc`/`~/.bashrc`) — the **only** keys that belong in shell config are `HIVE_CREDENTIAL_KEY` and `ADEN_API_KEY`. All other credentials (Brave, Google, HubSpot, GitHub, Resend, etc.) go in the encrypted store only.
- **ALWAYS** use `SecretStr` from Pydantic when handling credential values in Python
- **ALWAYS** use the local encrypted store (`~/.hive/credentials`) for persistence
- **ALWAYS** run health checks before storing credentials (when possible)
- **ALWAYS** verify credentials were stored by re-running validation, not by reading them back
- When modifying `~/.bashrc` or `~/.zshrc`, confirm with the user first
## Credential Sources Reference
All credential specs are defined in `tools/src/aden_tools/credentials/`:
| File | Category | Credentials | Aden Supported |
| ----------------- | ------------- | --------------------------------------------- | -------------- |
| `llm.py` | LLM Providers | `anthropic` | No |
| `search.py` | Search Tools | `brave_search`, `google_search`, `google_cse` | No |
| `email.py` | Email | `resend` | No |
| `integrations.py` | Integrations | `github`, `hubspot`, `google_calendar_oauth` | No / Yes |
**Note:** Additional LLM providers (Cerebras, Groq, OpenAI) are handled by LiteLLM via environment
variables (`CEREBRAS_API_KEY`, `GROQ_API_KEY`, `OPENAI_API_KEY`) but are not yet in CREDENTIAL_SPECS.
Add them to `llm.py` as needed.
To check what's registered:
```python
from aden_tools.credentials import CREDENTIAL_SPECS
for name, spec in CREDENTIAL_SPECS.items():
print(f"{name}: aden={spec.aden_supported}, direct={spec.direct_api_key_supported}")
```
## Migration: CredentialManager → CredentialStore
**CredentialManager is deprecated.** Use CredentialStore instead.
| Old (Deprecated) | New (Recommended) |
| ----------------------------------------- | -------------------------------------------------------------------- |
| `CredentialManager()` | `CredentialStore.with_encrypted_storage()` |
| `creds.get("hubspot")` | `store.get("hubspot")` or `store.get_key("hubspot", "access_token")` |
| `creds.validate_for_tools(tools)` | Use `store.is_available(cred_id)` per credential |
| `creds.get_auth_options("hubspot")` | Check `CREDENTIAL_SPECS["hubspot"].aden_supported` |
| `creds.get_setup_instructions("hubspot")` | Access `CREDENTIAL_SPECS["hubspot"]` directly |
**Why migrate?**
- **CredentialStore** supports encrypted storage, multi-key credentials, template resolution, and automatic token refresh
- **CredentialManager** only reads from environment variables and .env files (no encryption, no refresh)
- **CredentialStoreAdapter** exists for backward compatibility during migration
```python
# Old way (deprecated)
from aden_tools.credentials import CredentialManager
creds = CredentialManager()
token = creds.get("hubspot")
# New way (recommended)
from core.framework.credentials import CredentialStore
store = CredentialStore.with_encrypted_storage()
token = store.get("hubspot")
# With Aden sync (recommended for OAuth integrations)
store = CredentialStore.with_aden_sync()
token = store.get_key("hubspot", "access_token")
```
## Example Session
```
User: /hive-credentials for my research-agent
Agent: Let me check what credentials your research-agent needs.
[Calls check_missing_credentials(agent_path="exports/research-agent")]
→ Returns:
available: anthropic (encrypted_store), brave_search (encrypted_store)
missing: google_search (GOOGLE_API_KEY), google_cse (GOOGLE_CSE_ID)
ready: false
Agent: 2 of 4 required credentials are already configured. Only Google Custom
Search needs setup (2 values).
--- Setting up Google Custom Search (google_search + google_cse) ---
This requires two values that work together.
[Checks HIVE_CREDENTIAL_KEY before storing]
$ printenv HIVE_CREDENTIAL_KEY > /dev/null 2>&1 && echo "set" || echo "not set"
set
First, the Google API Key:
1. Go to https://console.cloud.google.com/apis/credentials
2. Create a new project (or select an existing one)
3. Enable the "Custom Search API" from the API Library
4. Go to Credentials > Create Credentials > API Key
5. Copy the generated API key
[AskUserQuestion: "Please provide your Google API key:"]
[User provides key]
Now, the Custom Search Engine ID:
1. Go to https://programmablesearchengine.google.com/controlpanel/all
2. Click "Add" to create a new search engine
3. Under "What to search", select "Search the entire web"
4. Give your search engine a name
5. Click "Create"
6. Copy the Search Engine ID (cx value)
[AskUserQuestion: "Please provide your Google CSE ID:"]
[User provides ID]
[Runs health check with both values - GET /customsearch/v1?q=test&num=1 → 200 OK]
[Stores both in local encrypted store, exports to env]
✓ Google Custom Search credentials valid
[Calls verify_credentials(agent_path="exports/research-agent")]
→ Returns: ready: true, missing_credentials: []
All credentials are now configured:
✓ anthropic (ANTHROPIC_API_KEY) — already in encrypted store
✓ brave_search (BRAVE_SEARCH_API_KEY) — already in encrypted store
✓ google_search (GOOGLE_API_KEY) — stored in encrypted store
✓ google_cse (GOOGLE_CSE_ID) — stored in encrypted store
┌─────────────────────────────────────────────────────────────────────────────┐
│ ✅ CREDENTIALS CONFIGURED │
├─────────────────────────────────────────────────────────────────────────────┤
│ │
│ OPEN A NEW TERMINAL before running commands below. │
│ Environment variables were saved to your shell config but │
│ only take effect in new terminal sessions. │
│ │
│ NEXT STEPS: │
│ │
│ 1. RUN YOUR AGENT: │
│ │
│ hive tui │
│ │
│ 2. IF YOU ENCOUNTER ISSUES, USE THE DEBUGGER: │
│ │
│ /hive-debugger │
│ │
│ The debugger analyzes runtime logs, identifies retry loops, tool │
│ failures, stalled execution, and provides actionable fix suggestions. │
│ │
└─────────────────────────────────────────────────────────────────────────────┘
```
File diff suppressed because it is too large Load Diff
-385
View File
@@ -1,385 +0,0 @@
---
name: hive-patterns
description: Best practices, patterns, and examples for building goal-driven agents. Includes client-facing interaction, feedback edges, judge patterns, fan-out/fan-in, context management, and anti-patterns.
license: Apache-2.0
metadata:
author: hive
version: "2.0"
type: reference
part_of: hive
---
# Building Agents - Patterns & Best Practices
Design patterns, examples, and best practices for building robust goal-driven agents.
**Prerequisites:** Complete agent structure using `hive-create`.
## Practical Example: Hybrid Workflow
How to build a node using both direct file writes and optional MCP validation:
```python
# 1. WRITE TO FILE FIRST (Primary - makes it visible)
node_code = '''
search_node = NodeSpec(
id="search-web",
node_type="event_loop",
input_keys=["query"],
output_keys=["search_results"],
system_prompt="Search the web for: {query}. Use web_search, then call set_output to store results.",
tools=["web_search"],
)
'''
Edit(
file_path="exports/research_agent/nodes/__init__.py",
old_string="# Nodes will be added here",
new_string=node_code
)
# 2. OPTIONALLY VALIDATE WITH MCP (Secondary - bookkeeping)
validation = mcp__agent-builder__test_node(
node_id="search-web",
test_input='{"query": "python tutorials"}',
mock_llm_response='{"search_results": [...mock results...]}'
)
```
**User experience:**
- Immediately sees node in their editor (from step 1)
- Gets validation feedback (from step 2)
- Can edit the file directly if needed
## Multi-Turn Interaction Patterns
For agents needing multi-turn conversations with users, use `client_facing=True` on event_loop nodes.
### Client-Facing Nodes
A client-facing node streams LLM output to the user and blocks for user input between conversational turns. This replaces the old pause/resume pattern.
```python
# Client-facing node with STEP 1/STEP 2 prompt pattern
intake_node = NodeSpec(
id="intake",
name="Intake",
description="Gather requirements from the user",
node_type="event_loop",
client_facing=True,
input_keys=["topic"],
output_keys=["research_brief"],
system_prompt="""\
You are an intake specialist.
**STEP 1 — Read and respond (text only, NO tool calls):**
1. Read the topic provided
2. If it's vague, ask 1-2 clarifying questions
3. If it's clear, confirm your understanding
**STEP 2 — After the user confirms, call set_output:**
- set_output("research_brief", "Clear description of what to research")
""",
)
# Internal node runs without user interaction
research_node = NodeSpec(
id="research",
name="Research",
description="Search and analyze sources",
node_type="event_loop",
input_keys=["research_brief"],
output_keys=["findings", "sources"],
system_prompt="Research the topic using web_search and web_scrape...",
tools=["web_search", "web_scrape", "load_data", "save_data"],
)
```
**How it works:**
- Client-facing nodes stream LLM text to the user and block for input after each response
- User input is injected via `node.inject_event(text)`
- When the LLM calls `set_output` to produce structured outputs, the judge evaluates and ACCEPTs
- Internal nodes (non-client-facing) run their entire loop without blocking
- `set_output` is a synthetic tool — a turn with only `set_output` calls (no real tools) triggers user input blocking
**STEP 1/STEP 2 pattern:** Always structure client-facing prompts with explicit phases. STEP 1 is text-only conversation. STEP 2 calls `set_output` after user confirmation. This prevents the LLM from calling `set_output` prematurely before the user responds.
### When to Use client_facing
| Scenario | client_facing | Why |
| ----------------------------------- | :-----------: | ---------------------- |
| Gathering user requirements | Yes | Need user input |
| Human review/approval checkpoint | Yes | Need human decision |
| Data processing (scanning, scoring) | No | Runs autonomously |
| Report generation | No | No user input needed |
| Final confirmation before action | Yes | Need explicit approval |
> **Legacy Note:** The `pause_nodes` / `entry_points` pattern still works for backward compatibility but `client_facing=True` is preferred for new agents.
## Edge-Based Routing and Feedback Loops
### Conditional Edge Routing
Multiple conditional edges from the same source replace the old `router` node type. Each edge checks a condition on the node's output.
```python
# Node with mutually exclusive outputs
review_node = NodeSpec(
id="review",
name="Review",
node_type="event_loop",
client_facing=True,
output_keys=["approved_contacts", "redo_extraction"],
nullable_output_keys=["approved_contacts", "redo_extraction"],
max_node_visits=3,
system_prompt="Present the contact list to the operator. If they approve, call set_output('approved_contacts', ...). If they want changes, call set_output('redo_extraction', 'true').",
)
# Forward edge (positive priority, evaluated first)
EdgeSpec(
id="review-to-campaign",
source="review",
target="campaign-builder",
condition=EdgeCondition.CONDITIONAL,
condition_expr="output.get('approved_contacts') is not None",
priority=1,
)
# Feedback edge (negative priority, evaluated after forward edges)
EdgeSpec(
id="review-feedback",
source="review",
target="extractor",
condition=EdgeCondition.CONDITIONAL,
condition_expr="output.get('redo_extraction') is not None",
priority=-1,
)
```
**Key concepts:**
- `nullable_output_keys`: Lists output keys that may remain unset. The node sets exactly one of the mutually exclusive keys per execution.
- `max_node_visits`: Must be >1 on the feedback target (extractor) so it can re-execute. Default is 1.
- `priority`: Positive = forward edge (evaluated first). Negative = feedback edge. The executor tries forward edges first; if none match, falls back to feedback edges.
### Routing Decision Table
| Pattern | Old Approach | New Approach |
| ---------------------- | ----------------------- | --------------------------------------------- |
| Conditional branching | `router` node | Conditional edges with `condition_expr` |
| Binary approve/reject | `pause_nodes` + resume | `client_facing=True` + `nullable_output_keys` |
| Loop-back on rejection | Manual entry_points | Feedback edge with `priority=-1` |
| Multi-way routing | Router with routes dict | Multiple conditional edges with priorities |
## Judge Patterns
**Core Principle: The judge is the SOLE mechanism for acceptance decisions.** Never add ad-hoc framework gating to compensate for LLM behavior. If the LLM calls `set_output` prematurely, fix the system prompt or use a custom judge. Anti-patterns to avoid:
- Output rollback logic
- `_user_has_responded` flags
- Premature set_output rejection
- Interaction protocol injection into system prompts
Judges control when an event_loop node's loop exits. Choose based on validation needs.
### Implicit Judge (Default)
When no judge is configured, the implicit judge ACCEPTs when:
- The LLM finishes its response with no tool calls
- All required output keys have been set via `set_output`
Best for simple nodes where "all outputs set" is sufficient validation.
### SchemaJudge
Validates outputs against a Pydantic model. Use when you need structural validation.
```python
from pydantic import BaseModel
class ScannerOutput(BaseModel):
github_users: list[dict] # Must be a list of user objects
class SchemaJudge:
def __init__(self, output_model: type[BaseModel]):
self._model = output_model
async def evaluate(self, context: dict) -> JudgeVerdict:
missing = context.get("missing_keys", [])
if missing:
return JudgeVerdict(
action="RETRY",
feedback=f"Missing output keys: {missing}. Use set_output to provide them.",
)
try:
self._model.model_validate(context["output_accumulator"])
return JudgeVerdict(action="ACCEPT")
except ValidationError as e:
return JudgeVerdict(action="RETRY", feedback=str(e))
```
### When to Use Which Judge
| Judge | Use When | Example |
| --------------- | ------------------------------------- | ---------------------- |
| Implicit (None) | Output keys are sufficient validation | Simple data extraction |
| SchemaJudge | Need structural validation of outputs | API response parsing |
| Custom | Domain-specific validation logic | Score must be 0.0-1.0 |
## Fan-Out / Fan-In (Parallel Execution)
Multiple ON_SUCCESS edges from the same source trigger parallel execution. All branches run concurrently via `asyncio.gather()`.
```python
# Scanner fans out to Profiler and Scorer in parallel
EdgeSpec(id="scanner-to-profiler", source="scanner", target="profiler",
condition=EdgeCondition.ON_SUCCESS)
EdgeSpec(id="scanner-to-scorer", source="scanner", target="scorer",
condition=EdgeCondition.ON_SUCCESS)
# Both fan in to Extractor
EdgeSpec(id="profiler-to-extractor", source="profiler", target="extractor",
condition=EdgeCondition.ON_SUCCESS)
EdgeSpec(id="scorer-to-extractor", source="scorer", target="extractor",
condition=EdgeCondition.ON_SUCCESS)
```
**Requirements:**
- Parallel event_loop nodes must have **disjoint output_keys** (no key written by both)
- Only one parallel branch may contain a `client_facing` node
- Fan-in node receives outputs from all completed branches in shared memory
## Context Management Patterns
### Tiered Compaction
EventLoopNode automatically manages context window usage with tiered compaction:
1. **Pruning** — Old tool results replaced with compact placeholders (zero-cost, no LLM call)
2. **Normal compaction** — LLM summarizes older messages
3. **Aggressive compaction** — Keeps only recent messages + summary
4. **Emergency** — Hard reset with tool history preservation
### Spillover Pattern
The framework automatically truncates large tool results and saves full content to a spillover directory. The LLM receives a truncation message with instructions to use `load_data` to read the full result.
For explicit data management, use the data tools (real MCP tools, not synthetic):
```python
# save_data, load_data, list_data_files, serve_file_to_user are real MCP tools
# data_dir is auto-injected by the framework — the LLM never sees it
# Saving large results
save_data(filename="sources.json", data=large_json_string)
# Reading with pagination (line-based offset/limit)
load_data(filename="sources.json", offset=0, limit=50)
# Listing available files
list_data_files()
# Serving a file to the user as a clickable link
serve_file_to_user(filename="report.html", label="Research Report")
```
Add data tools to nodes that handle large tool results:
```python
research_node = NodeSpec(
...
tools=["web_search", "web_scrape", "load_data", "save_data", "list_data_files"],
)
```
`data_dir` is a framework context parameter — auto-injected at call time. `GraphExecutor.execute()` sets it per-execution via `ToolRegistry.set_execution_context(data_dir=...)` (using `contextvars` for concurrency safety), ensuring it matches the session-scoped spillover directory.
## Anti-Patterns
### What NOT to Do
- **Don't rely on `export_graph`** — Write files immediately, not at end
- **Don't hide code in session** — Write to files as components are approved
- **Don't wait to write files** — Agent visible from first step
- **Don't batch everything** — Write incrementally, one component at a time
- **Don't create too many thin nodes** — Prefer fewer, richer nodes (see below)
- **Don't add framework gating for LLM behavior** — Fix prompts or use judges instead
### Fewer, Richer Nodes
A common mistake is splitting work into too many small single-purpose nodes. Each node boundary requires serializing outputs, losing in-context information, and adding edge complexity.
| Bad (8 thin nodes) | Good (4 rich nodes) |
| ------------------- | ----------------------------------- |
| parse-query | intake (client-facing) |
| search-sources | research (search + fetch + analyze) |
| fetch-content | review (client-facing) |
| evaluate-sources | report (write + deliver) |
| synthesize-findings | |
| write-report | |
| quality-check | |
| save-report | |
**Why fewer nodes are better:**
- The LLM retains full context of its work within a single node
- A research node that searches, fetches, and analyzes keeps all source material in its conversation history
- Fewer edges means simpler graph and fewer failure points
- Data tools (`save_data`/`load_data`) handle context window limits within a single node
### MCP Tools - Correct Usage
**MCP tools OK for:**
- `test_node` — Validate node configuration with mock inputs
- `validate_graph` — Check graph structure
- `configure_loop` — Set event loop parameters
- `create_session` — Track session state for bookkeeping
**Just don't:** Use MCP as the primary construction method or rely on export_graph
## Error Handling Patterns
### Graceful Failure with Fallback
```python
edges = [
# Success path
EdgeSpec(id="api-success", source="api-call", target="process-results",
condition=EdgeCondition.ON_SUCCESS),
# Fallback on failure
EdgeSpec(id="api-to-fallback", source="api-call", target="fallback-cache",
condition=EdgeCondition.ON_FAILURE, priority=1),
# Report if fallback also fails
EdgeSpec(id="fallback-to-error", source="fallback-cache", target="report-error",
condition=EdgeCondition.ON_FAILURE, priority=1),
]
```
## Handoff to Testing
When agent is complete, transition to testing phase:
### Pre-Testing Checklist
- [ ] Agent structure validates: `uv run python -m agent_name validate`
- [ ] All nodes defined in nodes/**init**.py
- [ ] All edges connect valid nodes with correct priorities
- [ ] Feedback edge targets have `max_node_visits > 1`
- [ ] Client-facing nodes have meaningful system prompts
- [ ] Agent can be imported: `from exports.agent_name import default_agent`
## Related Skills
- **hive-concepts** — Fundamental concepts (node types, edges, event loop architecture)
- **hive-create** — Step-by-step building process
- **hive-test** — Test and validate agents
- **hive** — Complete workflow orchestrator
---
**Remember: Agent is actively constructed, visible the whole time. No hidden state. No surprise exports. Just transparent, incremental file building.**
-940
View File
@@ -1,940 +0,0 @@
---
name: hive-test
description: Iterative agent testing with session recovery. Execute, analyze, fix, resume from checkpoints. Use when testing an agent, debugging test failures, or verifying fixes without re-running from scratch.
---
# Agent Testing
Test agents iteratively: execute, analyze failures, fix, resume from checkpoint, repeat.
## When to Use
- Testing a newly built agent against its goal
- Debugging a failing agent iteratively
- Verifying fixes without re-running expensive early nodes
- Running final regression tests before deployment
## Prerequisites
1. Agent package at `exports/{agent_name}/` (built with `/hive-create`)
2. Credentials configured (`/hive-credentials`)
3. `ANTHROPIC_API_KEY` set (or appropriate LLM provider key)
**Path distinction** (critical — don't confuse these):
- `exports/{agent_name}/` — agent source code (edit here)
- `~/.hive/agents/{agent_name}/` — runtime data: sessions, checkpoints, logs (read here)
---
## The Iterative Test Loop
This is the core workflow. Don't re-run the entire agent when a late node fails — analyze, fix, and resume from the last clean checkpoint.
```
┌──────────────────────────────────────┐
│ PHASE 1: Generate Test Scenarios │
│ Goal → synthetic test inputs + tests │
└──────────────┬───────────────────────┘
┌──────────────────────────────────────┐
│ PHASE 2: Execute │◄────────────────┐
│ Run agent (CLI or pytest) │ │
└──────────────┬───────────────────────┘ │
↓ │
Pass? ──yes──► PHASE 6: Final Verification │
│ │
no │
↓ │
┌──────────────────────────────────────┐ │
│ PHASE 3: Analyze │ │
│ Session + runtime logs + checkpoints │ │
└──────────────┬───────────────────────┘ │
↓ │
┌──────────────────────────────────────┐ │
│ PHASE 4: Fix │ │
│ Prompt / code / graph / goal │ │
└──────────────┬───────────────────────┘ │
↓ │
┌──────────────────────────────────────┐ │
│ PHASE 5: Recover & Resume │─────────────────┘
│ Checkpoint resume OR fresh re-run │
└──────────────────────────────────────┘
```
---
### Phase 1: Generate Test Scenarios
Create synthetic tests from the agent's goal, constraints, and success criteria.
#### Step 1a: Read the goal
```python
# Read goal from agent.py
Read(file_path="exports/{agent_name}/agent.py")
# Extract the Goal definition and convert to JSON string
```
#### Step 1b: Get test guidelines
```python
# Get constraint test guidelines
generate_constraint_tests(
goal_id="your-goal-id",
goal_json='{"id": "...", "constraints": [...]}',
agent_path="exports/{agent_name}"
)
# Get success criteria test guidelines
generate_success_tests(
goal_id="your-goal-id",
goal_json='{"id": "...", "success_criteria": [...]}',
node_names="intake,research,review,report",
tool_names="web_search,web_scrape",
agent_path="exports/{agent_name}"
)
```
These return `file_header`, `test_template`, `constraints_formatted`/`success_criteria_formatted`, and `test_guidelines`. They do NOT generate test code — you write the tests.
#### Step 1c: Write tests
```python
Write(
file_path=result["output_file"],
content=result["file_header"] + "\n\n" + your_test_code
)
```
#### Test writing rules
- Every test MUST be `async` with `@pytest.mark.asyncio`
- Every test MUST accept `runner, auto_responder, mock_mode` fixtures
- Use `await auto_responder.start()` before running, `await auto_responder.stop()` in `finally`
- Use `await runner.run(input_dict)` — this goes through AgentRunner → AgentRuntime → ExecutionStream
- Access output via `result.output.get("key")` — NEVER `result.output["key"]`
- `result.success=True` means no exception, NOT goal achieved — always check output
- Write 8-15 tests total, not 30+
- Each real test costs ~3 seconds + LLM tokens
- NEVER use `default_agent.run()` — it bypasses the runtime (no sessions, no logs, client-facing nodes hang)
#### Step 1d: Check existing tests
Before generating, check if tests already exist:
```python
list_tests(
goal_id="your-goal-id",
agent_path="exports/{agent_name}"
)
```
---
### Phase 2: Execute
Two execution paths, use the right one for your situation.
#### Iterative debugging (for complex agents)
Run the agent via CLI. This creates sessions with checkpoints at `~/.hive/agents/{agent_name}/sessions/`:
```bash
uv run hive run exports/{agent_name} --input '{"query": "test topic"}'
```
Sessions and checkpoints are saved automatically.
**Client-facing nodes**: Agents with `client_facing=True` nodes (interactive conversation) work in headless mode when run from a real terminal — the agent streams output to stdout and reads user input from stdin via a `>>> ` prompt. In non-interactive shells (like Claude Code's Bash tool), client-facing nodes will hang because there is no stdin. For testing interactive agents from Claude Code, use `run_tests` with mock mode or have the user run the agent manually in their terminal.
#### Automated regression (for CI or final verification)
Use the `run_tests` MCP tool to run all pytest tests:
```python
run_tests(
goal_id="your-goal-id",
agent_path="exports/{agent_name}"
)
```
Returns structured results:
```json
{
"overall_passed": false,
"summary": {"total": 12, "passed": 10, "failed": 2, "pass_rate": "83.3%"},
"test_results": [{"test_name": "test_success_source_diversity", "status": "failed"}],
"failures": [{"test_name": "test_success_source_diversity", "details": "..."}]
}
```
**Options:**
```python
# Run only constraint tests
run_tests(goal_id, agent_path, test_types='["constraint"]')
# Stop on first failure
run_tests(goal_id, agent_path, fail_fast=True)
# Parallel execution
run_tests(goal_id, agent_path, parallel=4)
```
**Note:** `run_tests` uses `AgentRunner` with `tmp_path` storage, so sessions are isolated per test run. For checkpoint-based recovery with persistent sessions, use CLI execution. Use `run_tests` for quick regression checks and final verification.
---
### Phase 3: Analyze Failures
When a test fails, drill down systematically. Don't guess — use the tools.
#### Step 3a: Get error category
```python
debug_test(
goal_id="your-goal-id",
test_name="test_success_source_diversity",
agent_path="exports/{agent_name}"
)
```
Returns error category (`IMPLEMENTATION_ERROR`, `ASSERTION_FAILURE`, `TIMEOUT`, `IMPORT_ERROR`, `API_ERROR`) plus full traceback and suggestions.
#### Step 3b: Find the failed session
```python
list_agent_sessions(
agent_work_dir="~/.hive/agents/{agent_name}",
status="failed",
limit=5
)
```
Returns session list with IDs, timestamps, current_node (where it failed), execution_quality.
#### Step 3c: Inspect session state
```python
get_agent_session_state(
agent_work_dir="~/.hive/agents/{agent_name}",
session_id="session_20260209_143022_abc12345"
)
```
Returns execution path, which node was current, step count, timestamps — but excludes memory values (to avoid context bloat). Shows `memory_keys` and `memory_size` instead.
#### Step 3d: Examine runtime logs (L2/L3)
```python
# L2: Per-node success/failure, retry counts
query_runtime_log_details(
agent_work_dir="~/.hive/agents/{agent_name}",
run_id="session_20260209_143022_abc12345",
needs_attention_only=True
)
# L3: Exact LLM responses, tool call inputs/outputs
query_runtime_log_raw(
agent_work_dir="~/.hive/agents/{agent_name}",
run_id="session_20260209_143022_abc12345",
node_id="research"
)
```
#### Step 3e: Inspect memory data
```python
# See what data a node actually produced
get_agent_session_memory(
agent_work_dir="~/.hive/agents/{agent_name}",
session_id="session_20260209_143022_abc12345",
key="research_results"
)
```
#### Step 3f: Find recovery points
```python
list_agent_checkpoints(
agent_work_dir="~/.hive/agents/{agent_name}",
session_id="session_20260209_143022_abc12345",
is_clean="true"
)
```
Returns checkpoint summaries with IDs, types (`node_start`, `node_complete`), which node, and `is_clean` flag. Clean checkpoints are safe resume points.
#### Step 3g: Compare checkpoints (optional)
To understand what changed between two points in execution:
```python
compare_agent_checkpoints(
agent_work_dir="~/.hive/agents/{agent_name}",
session_id="session_20260209_143022_abc12345",
checkpoint_id_before="cp_node_complete_research_143030",
checkpoint_id_after="cp_node_complete_review_143115"
)
```
Returns memory diff (added/removed/changed keys) and execution path diff.
---
### Phase 4: Fix Based on Root Cause
Use the analysis from Phase 3 to determine what to fix and where.
| Root Cause | What to Fix | Where to Edit |
|------------|------------|---------------|
| **Prompt issue** — LLM produces wrong output format, misses instructions | Node `system_prompt` | `exports/{agent}/nodes/__init__.py` |
| **Code bug** — TypeError, KeyError, logic error in Python | Agent code | `exports/{agent}/agent.py`, `nodes/__init__.py` |
| **Graph issue** — wrong routing, missing edge, bad condition_expr | Edges, node config | `exports/{agent}/agent.py` |
| **Tool issue** — MCP tool fails, wrong config, missing credential | Tool config | `exports/{agent}/mcp_servers.json`, `/hive-credentials` |
| **Goal issue** — success criteria too strict/vague, wrong constraints | Goal definition | `exports/{agent}/agent.py` (goal section) |
| **Test issue** — test expectations don't match actual agent behavior | Test code | `exports/{agent}/tests/test_*.py` |
#### Fix strategies by error category
**IMPLEMENTATION_ERROR** (TypeError, AttributeError, KeyError):
```python
# Read the failing code
Read(file_path="exports/{agent_name}/nodes/__init__.py")
# Fix the bug
Edit(
file_path="exports/{agent_name}/nodes/__init__.py",
old_string="results.get('videos')",
new_string="(results or {}).get('videos', [])"
)
```
**ASSERTION_FAILURE** (test assertions fail but agent ran successfully):
- Check if the agent's output is actually wrong → fix the prompt
- Check if the test's expectations are unrealistic → fix the test
- Use `get_agent_session_memory` to see what the agent actually produced
**TIMEOUT / STALL** (agent runs too long):
- Check `node_visit_counts` for feedback loops hitting max_node_visits
- Check L3 logs for tool calls that hang
- Reduce `max_iterations` in loop_config or fix the prompt to converge faster
**API_ERROR** (connection, rate limit, auth):
- Verify credentials with `/hive-credentials`
- Check MCP server configuration
---
### Phase 5: Recover & Resume
After fixing the agent, decide whether to resume or re-run.
#### When to resume from checkpoint
Resume when ALL of these are true:
- The fix is to a node that comes AFTER existing clean checkpoints
- Clean checkpoints exist (from a CLI execution with checkpointing)
- The early nodes are expensive (web scraping, API calls, long LLM chains)
```bash
# Resume from the last clean checkpoint before the failing node
uv run hive run exports/{agent_name} \
--resume-session session_20260209_143022_abc12345 \
--checkpoint cp_node_complete_research_143030
```
This skips all nodes before the checkpoint and only re-runs the fixed node onward.
#### When to re-run from scratch
Re-run when ANY of these are true:
- The fix is to the entry node or an early node
- No checkpoints exist (e.g., agent was run via `run_tests`)
- The agent is fast (2-3 nodes, completes in seconds)
- You changed the graph structure (added/removed nodes/edges)
```bash
uv run hive run exports/{agent_name} --input '{"query": "test topic"}'
```
#### Inspecting a checkpoint before resuming
```python
get_agent_checkpoint(
agent_work_dir="~/.hive/agents/{agent_name}",
session_id="session_20260209_143022_abc12345",
checkpoint_id="cp_node_complete_research_143030"
)
```
Returns the full checkpoint: shared_memory snapshot, execution_path, current_node, next_node, is_clean.
#### Loop back to Phase 2
After resuming or re-running, check if the fix worked. If not, go back to Phase 3.
---
### Phase 6: Final Verification
Once the iterative fix loop converges (the agent produces correct output), run the full automated test suite:
```python
run_tests(
goal_id="your-goal-id",
agent_path="exports/{agent_name}"
)
```
All tests should pass. If not, repeat the loop for remaining failures.
---
## Credential Requirements
**CRITICAL: Testing requires ALL credentials the agent depends on.** This includes both the LLM API key AND any tool-specific credentials (HubSpot, Brave Search, etc.).
### Prerequisites
Before running agent tests, you MUST collect ALL required credentials from the user.
**Step 1: LLM API Key (always required)**
```bash
export ANTHROPIC_API_KEY="your-key-here"
```
**Step 2: Tool-specific credentials (depends on agent's tools)**
Inspect the agent's `mcp_servers.json` and tool configuration to determine which tools the agent uses, then check for all required credentials:
```python
from aden_tools.credentials import CredentialManager, CREDENTIAL_SPECS
creds = CredentialManager()
# Determine which tools the agent uses (from agent.json or mcp_servers.json)
agent_tools = [...] # e.g., ["hubspot_search_contacts", "web_search", ...]
# Find all missing credentials for those tools
missing = creds.get_missing_for_tools(agent_tools)
```
Common tool credentials:
| Tool | Env Var | Help URL |
|------|---------|----------|
| HubSpot CRM | `HUBSPOT_ACCESS_TOKEN` | https://developers.hubspot.com/docs/api/private-apps |
| Brave Search | `BRAVE_SEARCH_API_KEY` | https://brave.com/search/api/ |
| Google Search | `GOOGLE_SEARCH_API_KEY` + `GOOGLE_SEARCH_CX` | https://developers.google.com/custom-search |
**Why ALL credentials are required:**
- Tests need to execute the agent's LLM nodes to validate behavior
- Tools with missing credentials will return error dicts instead of real data
- Mock mode bypasses everything, providing no confidence in real-world performance
### Mock Mode Limitations
Mock mode (`--mock` flag or `MOCK_MODE=1`) is **ONLY for structure validation**:
- Validates graph structure (nodes, edges, connections)
- Validates that `AgentRunner.load()` succeeds and the agent is importable
- Does NOT execute event_loop agents — MockLLMProvider never calls `set_output`, so event_loop nodes loop forever
- Does NOT test LLM reasoning, content quality, or constraint validation
- Does NOT test real API integrations or tool use
**Bottom line:** If you're testing whether an agent achieves its goal, you MUST use real credentials.
### Enforcing Credentials in Tests
When writing tests, **ALWAYS include credential checks**:
```python
import os
import pytest
from aden_tools.credentials import CredentialManager
pytestmark = pytest.mark.skipif(
not CredentialManager().is_available("anthropic") and not os.environ.get("MOCK_MODE"),
reason="API key required for real testing. Set ANTHROPIC_API_KEY or use MOCK_MODE=1."
)
@pytest.fixture(scope="session", autouse=True)
def check_credentials():
"""Ensure ALL required credentials are set for real testing."""
creds = CredentialManager()
mock_mode = os.environ.get("MOCK_MODE")
if not creds.is_available("anthropic"):
if mock_mode:
print("\nRunning in MOCK MODE - structure validation only")
else:
pytest.fail(
"\nANTHROPIC_API_KEY not set!\n"
"Set API key: export ANTHROPIC_API_KEY='your-key-here'\n"
"Or run structure validation: MOCK_MODE=1 pytest exports/{agent}/tests/"
)
if not mock_mode:
agent_tools = [] # Update per agent
missing = creds.get_missing_for_tools(agent_tools)
if missing:
lines = ["\nMissing tool credentials!"]
for name in missing:
spec = creds.specs.get(name)
if spec:
lines.append(f" {spec.env_var} - {spec.description}")
pytest.fail("\n".join(lines))
```
### User Communication
When the user asks to test an agent, **ALWAYS check for ALL credentials first**:
1. **Identify the agent's tools** from `mcp_servers.json`
2. **Check ALL required credentials** using `CredentialManager`
3. **Ask the user to provide any missing credentials** before proceeding
4. Collect ALL missing credentials in a single prompt — not one at a time
---
## Safe Test Patterns
### OutputCleaner
The framework automatically validates and cleans node outputs using a fast LLM at edge traversal time. Tests should still use safe patterns because OutputCleaner may not catch all issues.
### Safe Access (REQUIRED)
```python
# UNSAFE - will crash on missing keys
approval = result.output["approval_decision"]
category = result.output["analysis"]["category"]
# SAFE - use .get() with defaults
output = result.output or {}
approval = output.get("approval_decision", "UNKNOWN")
# SAFE - type check before operations
analysis = output.get("analysis", {})
if isinstance(analysis, dict):
category = analysis.get("category", "unknown")
# SAFE - handle JSON parsing trap (LLM response as string)
import json
recommendation = output.get("recommendation", "{}")
if isinstance(recommendation, str):
try:
parsed = json.loads(recommendation)
if isinstance(parsed, dict):
approval = parsed.get("approval_decision", "UNKNOWN")
except json.JSONDecodeError:
approval = "UNKNOWN"
elif isinstance(recommendation, dict):
approval = recommendation.get("approval_decision", "UNKNOWN")
# SAFE - type check before iteration
items = output.get("items", [])
if isinstance(items, list):
for item in items:
...
```
### Helper Functions for conftest.py
```python
import json
import re
def _parse_json_from_output(result, key):
"""Parse JSON from agent output (framework may store full LLM response as string)."""
response_text = result.output.get(key, "")
json_text = re.sub(r'```json\s*|\s*```', '', response_text).strip()
try:
return json.loads(json_text)
except (json.JSONDecodeError, AttributeError, TypeError):
return result.output.get(key)
def safe_get_nested(result, key_path, default=None):
"""Safely get nested value from result.output."""
output = result.output or {}
current = output
for key in key_path:
if isinstance(current, dict):
current = current.get(key)
elif isinstance(current, str):
try:
json_text = re.sub(r'```json\s*|\s*```', '', current).strip()
parsed = json.loads(json_text)
if isinstance(parsed, dict):
current = parsed.get(key)
else:
return default
except json.JSONDecodeError:
return default
else:
return default
return current if current is not None else default
# Make available in tests
pytest.parse_json_from_output = _parse_json_from_output
pytest.safe_get_nested = safe_get_nested
```
### ExecutionResult Fields
**`result.success=True` means NO exception, NOT goal achieved**
```python
# WRONG
assert result.success
# RIGHT
assert result.success, f"Agent failed: {result.error}"
output = result.output or {}
approval = output.get("approval_decision")
assert approval == "APPROVED", f"Expected APPROVED, got {approval}"
```
All fields:
- `success: bool` — Completed without exception (NOT goal achieved!)
- `output: dict` — Complete memory snapshot (may contain raw strings)
- `error: str | None` — Error message if failed
- `steps_executed: int` — Number of nodes executed
- `total_tokens: int` — Cumulative token usage
- `total_latency_ms: int` — Total execution time
- `path: list[str]` — Node IDs traversed (may repeat in feedback loops)
- `paused_at: str | None` — Node ID if paused
- `session_state: dict` — State for resuming
- `node_visit_counts: dict[str, int]` — Visit counts per node (feedback loop testing)
- `execution_quality: str` — "clean", "degraded", or "failed"
### Test Count Guidance
**Write 8-15 tests, not 30+**
- 2-3 tests per success criterion
- 1 happy path test
- 1 boundary/edge case test
- 1 error handling test (optional)
Each real test costs ~3 seconds + LLM tokens. 12 tests = ~36 seconds, $0.12.
---
## Test Patterns
### Happy Path
```python
@pytest.mark.asyncio
async def test_happy_path(runner, auto_responder, mock_mode):
"""Test normal successful execution."""
await auto_responder.start()
try:
result = await runner.run({"query": "python tutorials"})
finally:
await auto_responder.stop()
assert result.success, f"Agent failed: {result.error}"
output = result.output or {}
assert output.get("report"), "No report produced"
```
### Boundary Condition
```python
@pytest.mark.asyncio
async def test_minimum_sources(runner, auto_responder, mock_mode):
"""Test at minimum source threshold."""
await auto_responder.start()
try:
result = await runner.run({"query": "niche topic"})
finally:
await auto_responder.stop()
assert result.success, f"Agent failed: {result.error}"
output = result.output or {}
sources = output.get("sources", [])
if isinstance(sources, list):
assert len(sources) >= 3, f"Expected >= 3 sources, got {len(sources)}"
```
### Error Handling
```python
@pytest.mark.asyncio
async def test_empty_input(runner, auto_responder, mock_mode):
"""Test graceful handling of empty input."""
await auto_responder.start()
try:
result = await runner.run({"query": ""})
finally:
await auto_responder.stop()
# Agent should either fail gracefully or produce an error message
output = result.output or {}
assert not result.success or output.get("error"), "Should handle empty input"
```
### Feedback Loop
```python
@pytest.mark.asyncio
async def test_feedback_loop_terminates(runner, auto_responder, mock_mode):
"""Test that feedback loops don't run forever."""
await auto_responder.start()
try:
result = await runner.run({"query": "test"})
finally:
await auto_responder.stop()
visits = result.node_visit_counts or {}
for node_id, count in visits.items():
assert count <= 5, f"Node {node_id} visited {count} times — possible infinite loop"
```
---
## MCP Tool Reference
### Phase 1: Test Generation
```python
# Check existing tests
list_tests(goal_id, agent_path)
# Get constraint test guidelines (returns templates, NOT generated tests)
generate_constraint_tests(goal_id, goal_json, agent_path)
# Returns: output_file, file_header, test_template, constraints_formatted, test_guidelines
# Get success criteria test guidelines
generate_success_tests(goal_id, goal_json, node_names, tool_names, agent_path)
# Returns: output_file, file_header, test_template, success_criteria_formatted, test_guidelines
```
### Phase 2: Execution
```python
# Automated regression (no checkpoints, fresh runs)
run_tests(goal_id, agent_path, test_types='["all"]', parallel=-1, fail_fast=False)
# Run only specific test types
run_tests(goal_id, agent_path, test_types='["constraint"]')
run_tests(goal_id, agent_path, test_types='["success"]')
```
```bash
# Iterative debugging with checkpoints (via CLI)
uv run hive run exports/{agent_name} --input '{"query": "test"}'
```
### Phase 3: Analysis
```python
# Debug a specific failed test
debug_test(goal_id, test_name, agent_path)
# Find failed sessions
list_agent_sessions(agent_work_dir, status="failed", limit=5)
# Inspect session state (excludes memory values)
get_agent_session_state(agent_work_dir, session_id)
# Inspect memory data
get_agent_session_memory(agent_work_dir, session_id, key="research_results")
# Runtime logs: L1 summaries
query_runtime_logs(agent_work_dir, status="needs_attention")
# Runtime logs: L2 per-node details
query_runtime_log_details(agent_work_dir, run_id, needs_attention_only=True)
# Runtime logs: L3 tool/LLM raw data
query_runtime_log_raw(agent_work_dir, run_id, node_id="research")
# Find clean checkpoints
list_agent_checkpoints(agent_work_dir, session_id, is_clean="true")
# Compare checkpoints (memory diff)
compare_agent_checkpoints(agent_work_dir, session_id, cp_before, cp_after)
```
### Phase 5: Recovery
```python
# Inspect checkpoint before resuming
get_agent_checkpoint(agent_work_dir, session_id, checkpoint_id)
# Empty checkpoint_id = latest checkpoint
```
```bash
# Resume from checkpoint via CLI (headless)
uv run hive run exports/{agent_name} \
--resume-session {session_id} --checkpoint {checkpoint_id}
```
---
## Anti-Patterns
| Don't | Do Instead |
|-------|-----------|
| Use `default_agent.run()` in tests | Use `runner.run()` with `auto_responder` fixtures (goes through AgentRuntime) |
| Re-run entire agent when a late node fails | Resume from last clean checkpoint |
| Treat `result.success` as goal achieved | Check `result.output` for actual criteria |
| Access `result.output["key"]` directly | Use `result.output.get("key")` |
| Fix random things hoping tests pass | Analyze L2/L3 logs to find root cause first |
| Write 30+ tests | Write 8-15 focused tests |
| Skip credential check | Use `/hive-credentials` before testing |
| Confuse `exports/` with `~/.hive/agents/` | Code in `exports/`, runtime data in `~/.hive/` |
| Use `run_tests` for iterative debugging | Use headless CLI with checkpoints for iterative debugging |
| Use headless CLI for final regression | Use `run_tests` for automated regression |
| Use `--tui` from Claude Code | Use headless `run` command — TUI hangs in non-interactive shells |
| Test client-facing nodes from Claude Code | Use mock mode, or have the user run the agent in their terminal |
| Run tests without reading goal first | Always understand the goal before writing tests |
| Skip Phase 3 analysis and guess | Use session + log tools to identify root cause |
---
## Example Walkthrough: Deep Research Agent
A complete iteration showing the test loop for an agent with nodes: `intake → research → review → report`.
### Phase 1: Generate tests
```python
# Read the goal
Read(file_path="exports/deep_research_agent/agent.py")
# Get success criteria test guidelines
result = generate_success_tests(
goal_id="rigorous-interactive-research",
goal_json='{"id": "rigorous-interactive-research", "success_criteria": [{"id": "source-diversity", "target": ">=5"}, {"id": "citation-coverage", "target": "100%"}, {"id": "report-completeness", "target": "90%"}]}',
node_names="intake,research,review,report",
tool_names="web_search,web_scrape",
agent_path="exports/deep_research_agent"
)
# Write tests
Write(
file_path=result["output_file"],
content=result["file_header"] + "\n\n" + test_code
)
```
### Phase 2: First execution
```python
run_tests(
goal_id="rigorous-interactive-research",
agent_path="exports/deep_research_agent",
fail_fast=True
)
```
Result: `test_success_source_diversity` fails — agent only found 2 sources instead of 5.
### Phase 3: Analyze
```python
# Debug the failing test
debug_test(
goal_id="rigorous-interactive-research",
test_name="test_success_source_diversity",
agent_path="exports/deep_research_agent"
)
# → ASSERTION_FAILURE: Expected >= 5 sources, got 2
# Find the session
list_agent_sessions(
agent_work_dir="~/.hive/agents/deep_research_agent",
status="completed",
limit=1
)
# → session_20260209_150000_abc12345
# See what the research node produced
get_agent_session_memory(
agent_work_dir="~/.hive/agents/deep_research_agent",
session_id="session_20260209_150000_abc12345",
key="research_results"
)
# → Only 2 web_search calls made, each returned 1 source
# Check the LLM's behavior in the research node
query_runtime_log_raw(
agent_work_dir="~/.hive/agents/deep_research_agent",
run_id="session_20260209_150000_abc12345",
node_id="research"
)
# → LLM called web_search only twice, then called set_output
```
Root cause: The research node's prompt doesn't tell the LLM to search for at least 5 diverse sources. It stops after the first couple of searches.
### Phase 4: Fix the prompt
```python
Read(file_path="exports/deep_research_agent/nodes/__init__.py")
Edit(
file_path="exports/deep_research_agent/nodes/__init__.py",
old_string='system_prompt="Search for information on the user\'s topic."',
new_string='system_prompt="Search for information on the user\'s topic. You MUST find at least 5 diverse, authoritative sources. Use multiple different search queries to ensure source diversity. Do not stop searching until you have at least 5 distinct sources."'
)
```
### Phase 5: Resume from checkpoint
For this example, the fix is to the `research` node. If we had run via CLI with checkpointing, we could resume from the checkpoint after `intake` to skip re-running intake:
```bash
# Check if clean checkpoint exists after intake
list_agent_checkpoints(
agent_work_dir="~/.hive/agents/deep_research_agent",
session_id="session_20260209_150000_abc12345",
is_clean="true"
)
# → cp_node_complete_intake_150005
# Resume from after intake, re-run research with fixed prompt
uv run hive run exports/deep_research_agent \
--resume-session session_20260209_150000_abc12345 \
--checkpoint cp_node_complete_intake_150005
```
Or for this simple case (intake is fast), just re-run:
```bash
uv run hive run exports/deep_research_agent --input '{"topic": "test"}'
```
### Phase 6: Final verification
```python
run_tests(
goal_id="rigorous-interactive-research",
agent_path="exports/deep_research_agent"
)
# → All 12 tests pass
```
---
## Test File Structure
```
exports/{agent_name}/
├── agent.py ← Agent to test (goal, nodes, edges)
├── nodes/__init__.py ← Node implementations (prompts, config)
├── config.py ← Agent configuration
├── mcp_servers.json ← Tool server config
└── tests/
├── conftest.py ← Shared fixtures + safe access helpers
├── test_constraints.py ← Constraint tests
├── test_success_criteria.py ← Success criteria tests
└── test_edge_cases.py ← Edge case tests
```
## Integration with Other Skills
| Scenario | From | To | Action |
|----------|------|----|--------|
| Agent built, ready to test | `/hive-create` | `/hive-test` | Generate tests, start loop |
| Prompt fix needed | `/hive-test` Phase 4 | Direct edit | Edit `nodes/__init__.py`, resume |
| Goal definition wrong | `/hive-test` Phase 4 | `/hive-create` | Update goal, may need rebuild |
| Missing credentials | `/hive-test` Phase 3 | `/hive-credentials` | Set up credentials |
| Complex runtime failure | `/hive-test` Phase 3 | `/hive-debugger` | Deep L1/L2/L3 analysis |
| All tests pass | `/hive-test` Phase 6 | Done | Agent validated |
@@ -1,333 +0,0 @@
# Example: Iterative Testing of a Research Agent
This example walks through the full iterative test loop for a research agent that searches the web, reviews findings, and produces a cited report.
## Agent Structure
```
exports/deep_research_agent/
├── agent.py # Goal + graph: intake → research → review → report
├── nodes/__init__.py # Node definitions (system_prompt, input/output keys)
├── config.py # Model config
├── mcp_servers.json # Tools: web_search, web_scrape
└── tests/ # Test files (we'll create these)
```
**Goal:** "Rigorous Interactive Research" — find 5+ diverse sources, cite every claim, produce a complete report.
---
## Phase 1: Generate Tests
### Read the goal
```python
Read(file_path="exports/deep_research_agent/agent.py")
# Extract: goal_id="rigorous-interactive-research"
# success_criteria: source-diversity (>=5), citation-coverage (100%), report-completeness (90%)
# constraints: no-hallucination, source-attribution
```
### Get test guidelines
```python
result = generate_success_tests(
goal_id="rigorous-interactive-research",
goal_json='{"id": "rigorous-interactive-research", "success_criteria": [{"id": "source-diversity", "description": "Use multiple diverse sources", "target": ">=5"}, {"id": "citation-coverage", "description": "Every claim cites its source", "target": "100%"}, {"id": "report-completeness", "description": "Report answers the research questions", "target": "90%"}]}',
node_names="intake,research,review,report",
tool_names="web_search,web_scrape",
agent_path="exports/deep_research_agent"
)
```
### Write tests
```python
Write(
file_path="exports/deep_research_agent/tests/test_success_criteria.py",
content=result["file_header"] + '''
@pytest.mark.asyncio
async def test_success_source_diversity(runner, auto_responder, mock_mode):
"""At least 5 diverse sources are found."""
await auto_responder.start()
try:
result = await runner.run({"query": "impact of remote work on productivity"})
finally:
await auto_responder.stop()
assert result.success, f"Agent failed: {result.error}"
output = result.output or {}
sources = output.get("sources", [])
if isinstance(sources, list):
assert len(sources) >= 5, f"Expected >= 5 sources, got {len(sources)}"
@pytest.mark.asyncio
async def test_success_citation_coverage(runner, auto_responder, mock_mode):
"""Every factual claim in the report cites its source."""
await auto_responder.start()
try:
result = await runner.run({"query": "climate change effects on agriculture"})
finally:
await auto_responder.stop()
assert result.success, f"Agent failed: {result.error}"
output = result.output or {}
report = output.get("report", "")
# Check that report contains numbered references
assert "[1]" in str(report) or "[source" in str(report).lower(), "Report lacks citations"
@pytest.mark.asyncio
async def test_success_report_completeness(runner, auto_responder, mock_mode):
"""Report addresses the original research question."""
query = "pros and cons of nuclear energy"
await auto_responder.start()
try:
result = await runner.run({"query": query})
finally:
await auto_responder.stop()
assert result.success, f"Agent failed: {result.error}"
output = result.output or {}
report = output.get("report", "")
assert len(str(report)) > 200, f"Report too short: {len(str(report))} chars"
@pytest.mark.asyncio
async def test_empty_query_handling(runner, auto_responder, mock_mode):
"""Agent handles empty input gracefully."""
await auto_responder.start()
try:
result = await runner.run({"query": ""})
finally:
await auto_responder.stop()
output = result.output or {}
assert not result.success or output.get("error"), "Should handle empty query"
@pytest.mark.asyncio
async def test_feedback_loop_terminates(runner, auto_responder, mock_mode):
"""Feedback loop between review and research terminates."""
await auto_responder.start()
try:
result = await runner.run({"query": "quantum computing basics"})
finally:
await auto_responder.stop()
visits = result.node_visit_counts or {}
for node_id, count in visits.items():
assert count <= 5, f"Node {node_id} visited {count} times"
'''
)
```
---
## Phase 2: First Execution
```python
run_tests(
goal_id="rigorous-interactive-research",
agent_path="exports/deep_research_agent",
fail_fast=True
)
```
**Result:**
```json
{
"overall_passed": false,
"summary": {"total": 5, "passed": 3, "failed": 2, "pass_rate": "60.0%"},
"failures": [
{"test_name": "test_success_source_diversity", "details": "AssertionError: Expected >= 5 sources, got 2"},
{"test_name": "test_success_citation_coverage", "details": "AssertionError: Report lacks citations"}
]
}
```
---
## Phase 3: Analyze (Iteration 1)
### Debug the first failure
```python
debug_test(
goal_id="rigorous-interactive-research",
test_name="test_success_source_diversity",
agent_path="exports/deep_research_agent"
)
# Category: ASSERTION_FAILURE — Expected >= 5 sources, got 2
```
### Find the session and inspect memory
```python
list_agent_sessions(
agent_work_dir="~/.hive/agents/deep_research_agent",
status="completed",
limit=1
)
# → session_20260209_150000_abc12345
get_agent_session_memory(
agent_work_dir="~/.hive/agents/deep_research_agent",
session_id="session_20260209_150000_abc12345",
key="research_results"
)
# → Only 2 sources found. LLM stopped searching after 2 queries.
```
### Check LLM behavior in the research node
```python
query_runtime_log_raw(
agent_work_dir="~/.hive/agents/deep_research_agent",
run_id="session_20260209_150000_abc12345",
node_id="research"
)
# → LLM called web_search twice, got results, immediately called set_output.
# → Prompt doesn't instruct it to find at least 5 sources.
```
**Root cause:** The research node's system_prompt doesn't specify minimum source requirements.
---
## Phase 4: Fix (Iteration 1)
```python
Read(file_path="exports/deep_research_agent/nodes/__init__.py")
# Fix the research node prompt
Edit(
file_path="exports/deep_research_agent/nodes/__init__.py",
old_string='system_prompt="Search for information on the user\'s topic using web search."',
new_string='system_prompt="Search for information on the user\'s topic using web search. You MUST find at least 5 diverse, authoritative sources. Use multiple different search queries with varied keywords. Do NOT call set_output until you have gathered at least 5 distinct sources from different domains."'
)
```
---
## Phase 5: Recover & Resume (Iteration 1)
The fix is to the `research` node. Since this was a `run_tests` execution (no checkpoints), we re-run from scratch:
```python
run_tests(
goal_id="rigorous-interactive-research",
agent_path="exports/deep_research_agent",
fail_fast=True
)
```
**Result:**
```json
{
"overall_passed": false,
"summary": {"total": 5, "passed": 4, "failed": 1, "pass_rate": "80.0%"},
"failures": [
{"test_name": "test_success_citation_coverage", "details": "AssertionError: Report lacks citations"}
]
}
```
Source diversity now passes. Citation coverage still fails.
---
## Phase 3: Analyze (Iteration 2)
```python
debug_test(
goal_id="rigorous-interactive-research",
test_name="test_success_citation_coverage",
agent_path="exports/deep_research_agent"
)
# Category: ASSERTION_FAILURE — Report lacks citations
# Check what the report node produced
list_agent_sessions(
agent_work_dir="~/.hive/agents/deep_research_agent",
status="completed",
limit=1
)
# → session_20260209_151500_def67890
get_agent_session_memory(
agent_work_dir="~/.hive/agents/deep_research_agent",
session_id="session_20260209_151500_def67890",
key="report"
)
# → Report text exists but uses no numbered references.
# → Sources are in memory but report node doesn't cite them.
```
**Root cause:** The report node's prompt doesn't instruct the LLM to include numbered citations.
---
## Phase 4: Fix (Iteration 2)
```python
Edit(
file_path="exports/deep_research_agent/nodes/__init__.py",
old_string='system_prompt="Write a comprehensive report based on the research findings."',
new_string='system_prompt="Write a comprehensive report based on the research findings. You MUST include numbered citations [1], [2], etc. for every factual claim. At the end, include a References section listing all sources with their URLs. Every claim must be traceable to a specific source."'
)
```
---
## Phase 5: Resume (Iteration 2)
The fix is to the `report` node (the last node). To demonstrate checkpoint recovery, run via CLI:
```bash
# Run via CLI to get checkpoints
uv run hive run exports/deep_research_agent --input '{"topic": "climate change effects"}'
# After it runs, find the clean checkpoint before report
list_agent_checkpoints(
agent_work_dir="~/.hive/agents/deep_research_agent",
session_id="session_20260209_152000_ghi34567",
is_clean="true"
)
# → cp_node_complete_review_152100 (after review, before report)
# Resume — skips intake, research, review entirely
uv run hive run exports/deep_research_agent \
--resume-session session_20260209_152000_ghi34567 \
--checkpoint cp_node_complete_review_152100
```
Only the `report` node re-runs with the fixed prompt, using research data from the checkpoint.
---
## Phase 6: Final Verification
```python
run_tests(
goal_id="rigorous-interactive-research",
agent_path="exports/deep_research_agent"
)
```
**Result:**
```json
{
"overall_passed": true,
"summary": {"total": 5, "passed": 5, "failed": 0, "pass_rate": "100.0%"}
}
```
All tests pass.
---
## Summary
| Iteration | Failure | Root Cause | Fix | Recovery |
|-----------|---------|------------|-----|----------|
| 1 | Source diversity (2 < 5) | Research prompt too vague | Added "at least 5 sources" to prompt | Re-run (no checkpoints) |
| 2 | No citations in report | Report prompt lacks citation instructions | Added citation requirements | Checkpoint resume (skipped 3 nodes) |
**Key takeaways:**
- Phase 3 analysis (session memory + L3 logs) identified root causes without guessing
- Checkpoint recovery in iteration 2 saved time by skipping 3 expensive nodes
- Final `run_tests` confirms all scenarios pass end-to-end
-526
View File
@@ -1,526 +0,0 @@
---
name: hive
description: Complete workflow for building, implementing, and testing goal-driven agents. Orchestrates hive-* skills. Use when starting a new agent project, unsure which skill to use, or need end-to-end guidance.
license: Apache-2.0
metadata:
author: hive
version: "2.0"
type: workflow-orchestrator
orchestrates:
- hive-concepts
- hive-create
- hive-patterns
- hive-test
- hive-credentials
- hive-debugger
---
# Agent Development Workflow
**THIS IS AN EXECUTABLE WORKFLOW. DO NOT explore the codebase or read source files. ROUTE to the correct skill IMMEDIATELY.**
When this skill is loaded, **ALWAYS use the AskUserQuestion tool** to present options:
```
Use AskUserQuestion with these options:
- "Build a new agent" → Then invoke /hive-create
- "Test an existing agent" → Then invoke /hive-test
- "Learn agent concepts" → Then invoke /hive-concepts
- "Optimize agent design" → Then invoke /hive-patterns
- "Set up credentials" → Then invoke /hive-credentials
- "Debug a failing agent" → Then invoke /hive-debugger
- "Other" (please describe what you want to achieve)
```
**DO NOT:** Read source files, explore the codebase, search for code, or do any investigation before routing. The sub-skills handle all of that.
---
Complete Standard Operating Procedure (SOP) for building production-ready goal-driven agents.
## Overview
This workflow orchestrates specialized skills to take you from initial concept to production-ready agent:
1. **Understand Concepts**`/hive-concepts` (optional)
2. **Build Structure**`/hive-create`
3. **Optimize Design**`/hive-patterns` (optional)
4. **Setup Credentials**`/hive-credentials` (if agent uses tools requiring API keys)
5. **Test & Validate**`/hive-test`
6. **Debug Issues**`/hive-debugger` (if agent fails at runtime)
## When to Use This Workflow
Use this meta-skill when:
- Starting a new agent from scratch
- Unclear which skill to use first
- Need end-to-end guidance for agent development
- Want consistent, repeatable agent builds
**Skip this workflow** if:
- You only need to test an existing agent → use `/hive-test` directly
- You know exactly which phase you're in → use specific skill directly
## Quick Decision Tree
```
"Need to understand agent concepts" → hive-concepts
"Build a new agent" → hive-create
"Optimize my agent design" → hive-patterns
"Need client-facing nodes or feedback loops" → hive-patterns
"Set up API keys for my agent" → hive-credentials
"Test my agent" → hive-test
"My agent is failing/stuck/has errors" → hive-debugger
"Not sure what I need" → Read phases below, then decide
"Agent has structure but needs implementation" → See agent directory STATUS.md
```
## Phase 0: Understand Concepts (Optional)
**Skill**: `/hive-concepts`
**Input**: Questions about agent architecture
### When to Use
- First time building an agent
- Need to understand node types, edges, goals
- Want to validate tool availability
- Learning about event loop architecture and client-facing nodes
### What This Phase Provides
- Architecture overview (Python packages, not JSON)
- Core concepts (Goal, Node, Edge, Event Loop, Judges)
- Tool discovery and validation procedures
- Workflow overview
**Skip this phase** if you already understand agent fundamentals.
## Phase 1: Build Agent Structure
**Skill**: `/hive-create`
**Input**: User requirements ("Build an agent that...") or a template to start from
### What This Phase Does
Creates the complete agent architecture:
- Package structure (`exports/agent_name/`)
- Goal with success criteria and constraints
- Workflow graph (nodes and edges)
- Node specifications
- CLI interface
- Documentation
### Process
1. **Create package** - Directory structure with skeleton files
2. **Define goal** - Success criteria and constraints written to agent.py
3. **Design nodes** - Each node approved and written incrementally
4. **Connect edges** - Workflow graph with conditional routing
5. **Finalize** - Agent class, exports, and documentation
### Outputs
-`exports/agent_name/` package created
- ✅ Goal defined in agent.py
- ✅ 3-5 success criteria defined
- ✅ 1-5 constraints defined
- ✅ 5-10 nodes specified in nodes/__init__.py
- ✅ 8-15 edges connecting workflow
- ✅ Validated structure (passes `uv run python -m agent_name validate`)
- ✅ README.md with usage instructions
- ✅ CLI commands (info, validate, run, shell)
### Success Criteria
You're ready for Phase 2 when:
- Agent structure validates without errors
- All nodes and edges are defined
- CLI commands work (info, validate)
- You see: "Agent complete: exports/agent_name/"
### Common Outputs
The hive-create skill produces:
```
exports/agent_name/
├── __init__.py (package exports)
├── __main__.py (CLI interface)
├── agent.py (goal, graph, agent class)
├── nodes/__init__.py (node specifications)
├── config.py (configuration)
├── implementations.py (may be created for Python functions)
└── README.md (documentation)
```
### Next Steps
**If structure complete and validated:**
→ Check `exports/agent_name/STATUS.md` or `IMPLEMENTATION_GUIDE.md`
→ These files explain implementation options
→ You may need to add Python functions or MCP tools (not covered by current skills)
**If want to optimize design:**
→ Proceed to Phase 1.5 (hive-patterns)
**If ready to test:**
→ Proceed to Phase 2
## Phase 1.5: Optimize Design (Optional)
**Skill**: `/hive-patterns`
**Input**: Completed agent structure
### When to Use
- Want to add client-facing blocking or feedback edges
- Need judge patterns for output validation
- Want fan-out/fan-in (parallel execution)
- Need error handling patterns
- Want best practices guidance
### What This Phase Provides
- Client-facing interaction patterns
- Feedback edge routing with nullable output keys
- Judge patterns (implicit, SchemaJudge)
- Fan-out/fan-in parallel execution
- Context management and spillover patterns
- Anti-patterns to avoid
**Skip this phase** if your agent design is straightforward.
## Phase 2: Test & Validate
**Skill**: `/hive-test`
**Input**: Working agent from Phase 1
### What This Phase Does
Guides the creation and execution of a comprehensive test suite:
- Constraint tests
- Success criteria tests
- Edge case tests
- Integration tests
### Process
1. **Analyze agent** - Read goal, constraints, success criteria
2. **Generate tests** - The calling agent writes pytest files in `exports/agent_name/tests/` using hive-test guidelines and templates
3. **User approval** - Review and approve each test
4. **Run evaluation** - Execute tests and collect results
5. **Debug failures** - Identify and fix issues
6. **Iterate** - Repeat until all tests pass
### Outputs
- ✅ Test files in `exports/agent_name/tests/`
- ✅ Test report with pass/fail metrics
- ✅ Coverage of all success criteria
- ✅ Coverage of all constraints
- ✅ Edge case handling verified
### Success Criteria
You're done when:
- All tests pass
- All success criteria validated
- All constraints verified
- Agent handles edge cases
- Test coverage is comprehensive
### Next Steps
**Agent ready for:**
- Production deployment
- Integration into larger systems
- Documentation and handoff
- Continuous monitoring
## Phase Transitions
### From Phase 1 to Phase 2
**Trigger signals:**
- "Agent complete: exports/..."
- Structure validation passes
- README indicates implementation complete
**Before proceeding:**
- Verify agent can be imported: `from exports.agent_name import default_agent`
- Check if implementation is needed (see STATUS.md or IMPLEMENTATION_GUIDE.md)
- Confirm agent executes without import errors
### Skipping Phases
**When to skip Phase 1:**
- Agent structure already exists
- Only need to add tests
- Modifying existing agent
**When to skip Phase 2:**
- Prototyping or exploring
- Agent not production-bound
- Manual testing sufficient
## Common Patterns
### Pattern 1: Complete New Build (Simple)
```
User: "Build an agent that monitors files"
→ Use /hive-create
→ Agent structure created
→ Use /hive-test
→ Tests created and passing
→ Done: Production-ready agent
```
### Pattern 1b: Complete New Build (With Learning)
```
User: "Build an agent (first time)"
→ Use /hive-concepts (understand concepts)
→ Use /hive-create (build structure)
→ Use /hive-patterns (optimize design)
→ Use /hive-test (validate)
→ Done: Production-ready agent
```
### Pattern 1c: Build from Template
```
User: "Build an agent based on the deep research template"
→ Use /hive-create
→ Select "From a template" path
→ Pick template, name new agent
→ Review/modify goal, nodes, graph
→ Agent exported with customizations
→ Use /hive-test
→ Done: Customized agent
```
### Pattern 2: Test Existing Agent
```
User: "Test my agent at exports/my_agent"
→ Skip Phase 1
→ Use /hive-test directly
→ Tests created
→ Done: Validated agent
```
### Pattern 3: Iterative Development
```
User: "Build an agent"
→ Use /hive-create (Phase 1)
→ Implementation needed (see STATUS.md)
→ [User implements functions]
→ Use /hive-test (Phase 2)
→ Tests reveal bugs
→ [Fix bugs manually]
→ Re-run tests
→ Done: Working agent
```
### Pattern 4: Agent with Review Loops and HITL Checkpoints
```
User: "Build an agent with human review and feedback loops"
→ Use /hive-concepts (learn event loop, client-facing nodes)
→ Use /hive-create (build structure with feedback edges)
→ Use /hive-patterns (implement client-facing + feedback patterns)
→ Use /hive-test (validate review flows and edge routing)
→ Done: Agent with HITL checkpoints and review loops
```
## Skill Dependencies
```
hive (meta-skill)
├── hive-concepts (foundational)
│ ├── Architecture concepts (event loop, judges)
│ ├── Node types (event_loop, function)
│ ├── Edge routing and priority
│ ├── Tool discovery procedures
│ └── Workflow overview
├── hive-create (procedural)
│ ├── Creates package structure
│ ├── Defines goal
│ ├── Adds nodes (event_loop, function)
│ ├── Connects edges with priority routing
│ ├── Finalizes agent class
│ └── Requires: hive-concepts
├── hive-patterns (reference)
│ ├── Client-facing interaction patterns
│ ├── Feedback edges and review loops
│ ├── Judge patterns (implicit, SchemaJudge)
│ ├── Fan-out/fan-in parallel execution
│ └── Context management and anti-patterns
├── hive-credentials (utility)
│ ├── Detects missing credentials
│ ├── Offers auth method choices (Aden OAuth, direct API key)
│ ├── Stores securely in ~/.hive/credentials
│ └── Validates with health checks
├── hive-test (validation)
│ ├── Reads agent goal
│ ├── Generates tests
│ ├── Runs evaluation
│ └── Reports results
└── hive-debugger (troubleshooting)
├── Monitors runtime logs (L1/L2/L3)
├── Identifies retry loops, tool failures
├── Categorizes issues (10 categories)
└── Provides fix recommendations
```
## Troubleshooting
### "Agent structure won't validate"
- Check node IDs match between nodes/__init__.py and agent.py
- Verify all edges reference valid node IDs
- Ensure entry_node exists in nodes list
- Run: `PYTHONPATH=exports uv run python -m agent_name validate`
### "Agent has structure but won't run"
- Check for STATUS.md or IMPLEMENTATION_GUIDE.md in agent directory
- Implementation may be needed (Python functions or MCP tools)
- This is expected - hive-create creates structure, not implementation
- See implementation guide for completion options
### "Tests are failing"
- Review test output for specific failures
- Check agent goal and success criteria
- Verify constraints are met
- Use `/hive-test` to debug and iterate
- Fix agent code and re-run tests
### "Agent is failing at runtime"
- Use `/hive-debugger` to analyze runtime logs
- The debugger identifies retry loops, tool failures, and stalled execution
- Get actionable fix recommendations with code changes
- Monitor the agent in real-time during TUI sessions
### "Not sure which phase I'm in"
Run these checks:
```bash
# Check if agent structure exists
ls exports/my_agent/agent.py
# Check if it validates
PYTHONPATH=exports uv run python -m my_agent validate
# Check if tests exist
ls exports/my_agent/tests/
# If structure exists and validates → Phase 2 (testing)
# If structure doesn't exist → Phase 1 (building)
# If tests exist but failing → Debug phase
```
## Best Practices
### For Phase 1 (Building)
1. **Start with clear requirements** - Know what the agent should do
2. **Define success criteria early** - Measurable goals drive design
3. **Keep nodes focused** - One responsibility per node
4. **Use descriptive names** - Node IDs should explain purpose
5. **Validate incrementally** - Check structure after each major addition
### For Phase 2 (Testing)
1. **Test constraints first** - Hard requirements must pass
2. **Mock external dependencies** - Use mock mode for LLMs/APIs
3. **Cover edge cases** - Test failures, not just success paths
4. **Iterate quickly** - Fix one test at a time
5. **Document test patterns** - Future tests follow same structure
### General Workflow
1. **Use version control** - Git commit after each phase
2. **Document decisions** - Update README with changes
3. **Keep iterations small** - Build → Test → Fix → Repeat
4. **Preserve working states** - Tag successful iterations
5. **Learn from failures** - Failed tests reveal design issues
## Exit Criteria
You're done with the workflow when:
✅ Agent structure validates
✅ All tests pass
✅ Success criteria met
✅ Constraints verified
✅ Documentation complete
✅ Agent ready for deployment
## Additional Resources
- **hive-concepts**: See `.claude/skills/hive-concepts/SKILL.md`
- **hive-create**: See `.claude/skills/hive-create/SKILL.md`
- **hive-patterns**: See `.claude/skills/hive-patterns/SKILL.md`
- **hive-test**: See `.claude/skills/hive-test/SKILL.md`
- **Agent framework docs**: See `core/README.md`
- **Example agents**: See `exports/` directory
## Summary
This workflow provides a proven path from concept to production-ready agent:
1. **Learn** with `/hive-concepts` → Understand fundamentals (optional)
2. **Build** with `/hive-create` → Get validated structure
3. **Optimize** with `/hive-patterns` → Apply best practices (optional)
4. **Configure** with `/hive-credentials` → Set up API keys (if needed)
5. **Test** with `/hive-test` → Get verified functionality
6. **Debug** with `/hive-debugger` → Fix runtime issues (if needed)
The workflow is **flexible** - skip phases as needed, iterate freely, and adapt to your specific requirements. The goal is **production-ready agents** built with **consistent, repeatable processes**.
## Skill Selection Guide
**Choose hive-concepts when:**
- First time building agents
- Need to understand event loop architecture
- Validating tool availability
- Learning about node types, edges, and judges
**Choose hive-create when:**
- Actually building an agent
- Have clear requirements
- Ready to write code
- Want step-by-step guidance
- Want to start from an existing template and customize it
**Choose hive-patterns when:**
- Agent structure complete
- Need client-facing nodes or feedback edges
- Implementing review loops or fan-out/fan-in
- Want judge patterns or context management
- Want best practices
**Choose hive-test when:**
- Agent structure complete
- Ready to validate functionality
- Need comprehensive test coverage
- Testing feedback loops, output keys, or fan-out
**Choose hive-debugger when:**
- Agent is failing or stuck at runtime
- Seeing retry loops or escalations
- Tool calls are failing
- Need to understand why a node isn't completing
- Want real-time monitoring of agent execution
@@ -1,199 +0,0 @@
# Example: File Monitor Agent
This example shows the complete /hive workflow in action for building a file monitoring agent.
## Initial Request
```
User: "Build an agent that monitors ~/Downloads and copies new files to ~/Documents"
```
## Phase 1: Building (20 minutes)
### Step 1: Create Structure
Agent invokes `/hive-create` skill and:
1. Creates `exports/file_monitor_agent/` package
2. Writes skeleton files (__init__.py, __main__.py, agent.py, etc.)
**Output**: Package structure visible immediately
### Step 2: Define Goal
```python
goal = Goal(
id="file-monitor-copy",
name="Automated File Monitor & Copy",
success_criteria=[
# 100% detection rate
# 100% copy success
# 100% conflict resolution
# >99% uptime
],
constraints=[
# Preserve originals
# Handle errors gracefully
# Track state
# Respect permissions
]
)
```
**Output**: Goal written to agent.py
### Step 3: Design Nodes
7 nodes approved and written incrementally:
1. `initialize-state` - Set up tracking
2. `list-downloads` - Scan directory
3. `identify-new-files` - Find new files
4. `check-for-new-files` - Router
5. `copy-files` - Copy with conflict resolution
6. `update-state` - Mark as processed
7. `wait-interval` - Sleep between cycles
**Output**: All nodes in nodes/__init__.py
### Step 4: Connect Edges
8 edges connecting the workflow loop:
```
initialize → list → identify → check
↓ ↓
copy wait
↓ ↑
update ↓
↓ ↓
wait → list (loop)
```
**Output**: Edges written to agent.py
### Step 5: Finalize
```bash
$ PYTHONPATH=exports uv run python -m file_monitor_agent validate
✓ Agent is valid
$ PYTHONPATH=exports uv run python -m file_monitor_agent info
Agent: File Monitor & Copy Agent
Nodes: 7
Edges: 8
```
**Phase 1 Complete**: Structure validated ✅
### Status After Phase 1
```
exports/file_monitor_agent/
├── __init__.py ✅ (exports)
├── __main__.py ✅ (CLI)
├── agent.py ✅ (goal, graph, agent class)
├── nodes/__init__.py ✅ (7 nodes)
├── config.py ✅ (configuration)
├── implementations.py ✅ (Python functions)
├── README.md ✅ (documentation)
├── IMPLEMENTATION_GUIDE.md ✅ (next steps)
└── STATUS.md ✅ (current state)
```
**Note**: Implementation gap exists - data flow needs connection (covered in STATUS.md)
## Phase 2: Testing (25 minutes)
### Step 1: Analyze Agent
Agent invokes `/hive-test` skill and:
1. Reads goal from `exports/file_monitor_agent/agent.py`
2. Identifies 4 success criteria to test
3. Identifies 4 constraints to verify
4. Plans test coverage
### Step 2: Generate Tests
Creates test files:
```
exports/file_monitor_agent/tests/
├── conftest.py (fixtures)
├── test_constraints.py (4 constraint tests)
├── test_success_criteria.py (4 success tests)
└── test_edge_cases.py (error handling)
```
Tests approved incrementally by user.
### Step 3: Run Tests
```bash
$ PYTHONPATH=exports uv run pytest exports/file_monitor_agent/tests/
test_constraints.py::test_preserves_originals PASSED
test_constraints.py::test_handles_errors PASSED
test_constraints.py::test_tracks_state PASSED
test_constraints.py::test_respects_permissions PASSED
test_success_criteria.py::test_detects_all_files PASSED
test_success_criteria.py::test_copies_all_files PASSED
test_success_criteria.py::test_resolves_conflicts PASSED
test_success_criteria.py::test_continuous_run PASSED
test_edge_cases.py::test_empty_directory PASSED
test_edge_cases.py::test_permission_denied PASSED
test_edge_cases.py::test_disk_full PASSED
test_edge_cases.py::test_large_files PASSED
========================== 12 passed in 3.42s ==========================
```
**Phase 2 Complete**: All tests pass ✅
## Final Output
**Production-Ready Agent:**
```bash
# Run the agent
./RUN_AGENT.sh
# Or manually
PYTHONPATH=exports uv run python -m file_monitor_agent run
```
**Capabilities:**
- Monitors ~/Downloads continuously
- Copies new files to ~/Documents
- Resolves conflicts with timestamps
- Handles errors gracefully
- Tracks processed files
- Runs as background service
**Total Time**: ~45 minutes from concept to production
## Key Learnings
1. **Incremental building** - Files written immediately, visible throughout
2. **Validation early** - Structure validated before moving to implementation
3. **Test-driven** - Tests reveal real behavior
4. **Documentation included** - README, STATUS, and guides auto-generated
5. **Repeatable process** - Same workflow for any agent type
## Variations
**For simpler agents:**
- Fewer nodes (3-5 instead of 7)
- Simpler workflow (linear instead of looping)
- Faster build time (10-15 minutes)
**For complex agents:**
- More nodes (10-15+)
- Multiple subgraphs
- Pause/resume points for human-in-the-loop
- Longer build time (45-60 minutes)
The workflow scales to your needs!
-7
View File
@@ -1,7 +0,0 @@
# Project-level Codex config for Hive.
# Keep this file minimal: MCP connectivity + skill discovery.
[mcp_servers.agent-builder]
command = "uv"
args = ["run", "--directory", "core", "-m", "framework.mcp.agent_builder_server"]
cwd = "."
-20
View File
@@ -1,20 +0,0 @@
{
"mcpServers": {
"agent-builder": {
"command": "python",
"args": ["-m", "framework.mcp.agent_builder_server"],
"cwd": "core",
"env": {
"PYTHONPATH": "../tools/src"
}
},
"tools": {
"command": "python",
"args": ["mcp_server.py", "--stdio"],
"cwd": "tools",
"env": {
"PYTHONPATH": "src"
}
}
}
}
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-concepts
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-create
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-credentials
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-patterns
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-test
+5 -2
View File
@@ -62,8 +62,11 @@ jobs:
uv run pytest tests/ -v
test-tools:
name: Test Tools
runs-on: ubuntu-latest
name: Test Tools (${{ matrix.os }})
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, windows-latest]
steps:
- uses: actions/checkout@v4
+2
View File
@@ -70,6 +70,7 @@ exports/*
.agent-builder-sessions/*
.claude/settings.local.json
.claude/skills/ship-it/
.venv
@@ -78,3 +79,4 @@ core/tests/*dumps/*
screenshots/*
.gemini/*
-30
View File
@@ -1,30 +0,0 @@
{
"mcpServers": {
"agent-builder": {
"command": "uv",
"args": [
"run",
"python",
"-m",
"framework.mcp.agent_builder_server"
],
"cwd": "core",
"env": {
"PYTHONPATH": "../tools/src"
}
},
"tools": {
"command": "uv",
"args": [
"run",
"python",
"mcp_server.py",
"--stdio"
],
"cwd": "tools",
"env": {
"PYTHONPATH": "src"
}
}
}
}
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-concepts
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-create
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-credentials
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-debugger
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-patterns
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/hive-test
-1
View File
@@ -1 +0,0 @@
../../.claude/skills/triage-issue
-7
View File
@@ -1,7 +0,0 @@
{
"recommendations": [
"charliermarsh.ruff",
"editorconfig.editorconfig",
"ms-python.python"
]
}
+34
View File
@@ -0,0 +1,34 @@
# Repository Guidelines
Shared agent instructions for this workspace.
## Deprecations
- **TUI is deprecated.** The terminal UI (`hive tui`) is no longer maintained. Use the browser-based interface (`hive open`) instead.
## Coding Agent Notes
-
- When working on a GitHub Issue or PR, print the full URL at the end of the task.
- When answering questions, respond with high-confidence answers only: verify in code; do not guess.
- Do not update dependencies casually. Version bumps, patched dependencies, overrides, or vendored dependency changes require explicit approval.
- Add brief comments for tricky logic. Keep files reasonably small when practical; split or refactor large files instead of growing them indefinitely.
- If shared guardrails are available locally, review them; otherwise follow this repo's guidance.
- Use `uv` for Python execution and package management. Do not use `python` or `python3` directly unless the user explicitly asks for it.
- Prefer `uv run` for scripts and tests, and `uv pip` for package operations.
## Multi-Agent Safety
- Do not create, apply, or drop `git stash` entries unless explicitly requested.
- Do not create, remove, or modify `git worktree` checkouts unless explicitly requested.
- Do not switch branches or check out a different branch unless explicitly requested.
- When the user says `push`, you may `git pull --rebase` to integrate latest changes, but never discard other in-progress work.
- When the user says `commit`, commit only your changes. When the user says `commit all`, commit everything in grouped chunks.
- When you see unrecognized files or unrelated changes, keep going and focus on your scoped changes.
## Change Hygiene
- If staged and unstaged diffs are formatting-only, resolve them without asking.
- If a commit or push was already requested, include formatting-only follow-up changes in that same commit when practical.
- Only stop to ask for confirmation when changes are semantic and may alter behavior.
Symlink
+1
View File
@@ -0,0 +1 @@
AGENTS.md
+17 -2
View File
@@ -1,4 +1,4 @@
.PHONY: lint format check test install-hooks help frontend-dev frontend-build
.PHONY: lint format check test install-hooks help frontend-install frontend-dev frontend-build
help: ## Show this help
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | \
@@ -20,13 +20,28 @@ check: ## Run all checks without modifying files (CI-safe)
cd core && ruff format --check .
cd tools && ruff format --check .
test: ## Run all tests
test: ## Run all tests (core + tools, excludes live)
cd core && uv run python -m pytest tests/ -v
cd tools && uv run python -m pytest -v
test-tools: ## Run tool tests only (mocked, no credentials needed)
cd tools && uv run python -m pytest -v
test-live: ## Run live integration tests (requires real API credentials)
cd tools && uv run python -m pytest -m live -s -o "addopts=" --log-cli-level=INFO
test-all: ## Run everything including live tests
cd core && uv run python -m pytest tests/ -v
cd tools && uv run python -m pytest -v
cd tools && uv run python -m pytest -m live -s -o "addopts=" --log-cli-level=INFO
install-hooks: ## Install pre-commit hooks
uv pip install pre-commit
pre-commit install
frontend-install: ## Install frontend npm packages
cd core/frontend && npm install
frontend-dev: ## Start frontend dev server
cd core/frontend && npm run dev
+28 -94
View File
@@ -14,7 +14,7 @@
</p>
<p align="center">
<a href="https://github.com/adenhq/hive/blob/main/LICENSE"><img src="https://img.shields.io/badge/License-Apache%202.0-blue.svg" alt="Apache 2.0 License" /></a>
<a href="https://github.com/aden-hive/hive/blob/main/LICENSE"><img src="https://img.shields.io/badge/License-Apache%202.0-blue.svg" alt="Apache 2.0 License" /></a>
<a href="https://www.ycombinator.com/companies/aden"><img src="https://img.shields.io/badge/Y%20Combinator-Aden-orange" alt="Y Combinator" /></a>
<a href="https://discord.com/invite/MXE49hrKDk"><img src="https://img.shields.io/discord/1172610340073242735?logo=discord&labelColor=%235462eb&logoColor=%23f5f5f5&color=%235462eb" alt="Discord" /></a>
<a href="https://x.com/aden_hq"><img src="https://img.shields.io/twitter/follow/teamaden?logo=X&color=%23f5f5f5" alt="Twitter Follow" /></a>
@@ -37,11 +37,11 @@
## Overview
Build autonomous, reliable, self-improving AI agents without hardcoding workflows. Define your goal through conversation with a coding agent, and the framework generates a node graph with dynamically created connection code. When things break, the framework captures failure data, evolves the agent through the coding agent, and redeploys. Built-in human-in-the-loop nodes, credential management, and real-time monitoring give you control without sacrificing adaptability.
Build autonomous, reliable, self-improving AI agents without hardcoding workflows. Define your goal through conversation with hive coding agent(queen), and the framework generates a node graph with dynamically created connection code. When things break, the framework captures failure data, evolves the agent through the coding agent, and redeploys. Built-in human-in-the-loop nodes, credential management, and real-time monitoring give you control without sacrificing adaptability.
Visit [adenhq.com](https://adenhq.com) for complete documentation, examples, and guides.
https://github.com/user-attachments/assets/846c0cc7-ffd6-47fa-b4b7-495494857a55
[![Hive Demo](https://img.youtube.com/vi/XDOG9fOaLjU/maxresdefault.jpg)](https://www.youtube.com/watch?v=XDOG9fOaLjU)
## Who Is Hive For?
@@ -50,7 +50,7 @@ Hive is designed for developers and teams who want to build **production-grade A
Hive is a good fit if you:
- Want AI agents that **execute real business processes**, not demos
- Prefer **goal-driven development** over hardcoded workflows
- Need **fast or high volume agent execution** over open workflow
- Need **self-healing and adaptive agents** that improve over time
- Require **human-in-the-loop control**, observability, and cost limits
- Plan to run agents in **production environments**
@@ -71,7 +71,7 @@ Use Hive when you need:
- **[Documentation](https://docs.adenhq.com/)** - Complete guides and API reference
- **[Self-Hosting Guide](https://docs.adenhq.com/getting-started/quickstart)** - Deploy Hive on your infrastructure
- **[Changelog](https://github.com/adenhq/hive/releases)** - Latest updates and releases
- **[Changelog](https://github.com/aden-hive/hive/releases)** - Latest updates and releases
- **[Roadmap](docs/roadmap.md)** - Upcoming features and plans
- **[Report Issues](https://github.com/adenhq/hive/issues)** - Bug reports and feature requests
- **[Contributing](CONTRIBUTING.md)** - How to contribute and submit PRs
@@ -81,7 +81,8 @@ Use Hive when you need:
### Prerequisites
- Python 3.11+ for agent development
- Claude Code, Codex CLI, or Cursor for utilizing agent skills
- An LLM provider that powers the agents
- **ripgrep (optional, recommended on Windows):** The `search_files` tool uses ripgrep for faster file search. If not installed, a Python fallback is used. On Windows: `winget install BurntSushi.ripgrep` or `scoop install ripgrep`
> **Note for Windows Users:** It is strongly recommended to use **WSL (Windows Subsystem for Linux)** or **Git Bash** to run this framework. Some core automation scripts may not execute correctly in standard Command Prompt or PowerShell.
@@ -94,9 +95,10 @@ Use Hive when you need:
```bash
# Clone the repository
git clone https://github.com/adenhq/hive.git
git clone https://github.com/aden-hive/hive.git
cd hive
# Run quickstart setup
./quickstart.sh
```
@@ -109,77 +111,43 @@ This sets up:
- **LLM provider** - Interactive default model configuration
- All required Python dependencies with `uv`
- At last, it will initiate the open hive interface in your browser
> **Tip:** To reopen the dashboard later, run `hive open` from the project directory.
<img width="2500" height="1214" alt="home-screen" src="https://github.com/user-attachments/assets/134d897f-5e75-4874-b00b-e0505f6b45c4" />
### Build Your First Agent
```bash
# Build an agent using Claude Code
claude> /hive
Type the agent you want to build in the home input box
# Test your agent
claude> /hive-debugger
<img width="2500" height="1214" alt="Image" src="https://github.com/user-attachments/assets/1ce19141-a78b-46f5-8d64-dbf987e048f4" />
# (at separate terminal) Launch the interactive dashboard
hive tui
### Use Template Agents
# Or run directly
hive run exports/your_agent_name --input '{"key": "value"}'
```
Click "Try a sample agent" and check the templates. You can run a templates directly or choose to build your version on top of the existing template.
## Coding Agent Support
### Run Agents
### Codex CLI
Now you can run an agent by selectiing the agent (either an existing agent or example agent). You can click the Run button on the top left, or talk to the queen agent and it can run the agent for you.
Hive includes native support for [OpenAI Codex CLI](https://github.com/openai/codex) (v0.101.0+).
1. **Config:** `.codex/config.toml` with `agent-builder` MCP server (tracked in git)
2. **Skills:** `.agents/skills/` symlinks to Hive skills (tracked in git)
3. **Launch:** Run `codex` in the repo root, then type `use hive`
Example:
```
codex> use hive
```
### Opencode
Hive includes native support for [Opencode](https://github.com/opencode-ai/opencode).
1. **Setup:** Run the quickstart script
2. **Launch:** Open Opencode in the project root.
3. **Activate:** Type `/hive` in the chat to switch to the Hive Agent.
4. **Verify:** Ask the agent _"List your tools"_ to confirm the connection.
The agent has access to all Hive skills and can scaffold agents, add tools, and debug workflows directly from the chat.
**[📖 Complete Setup Guide](docs/environment-setup.md)** - Detailed instructions for agent development
### Antigravity IDE Support
Skills and MCP servers are also available in [Antigravity IDE](https://antigravity.google/) (Google's AI-powered IDE). **Easiest:** open a terminal in the hive repo folder and run (use `./` — the script is inside the repo):
```bash
./scripts/setup-antigravity-mcp.sh
```
**Important:** Always restart/refresh Antigravity IDE after running the setup script—MCP servers only load on startup. After restart, **agent-builder** and **tools** MCP servers should connect. Skills are under `.agent/skills/` (symlinks to `.claude/skills/`). See [docs/antigravity-setup.md](docs/antigravity-setup.md) for manual setup and troubleshooting.
<img width="2500" height="1214" alt="Image" src="https://github.com/user-attachments/assets/71c38206-2ad5-49aa-bde8-6698d0bc55f5" />
## Features
- **[Goal-Driven Development](docs/key_concepts/goals_outcome.md)** - Define objectives in natural language; the coding agent generates the agent graph and connection code to achieve them
- **Browser-Use** - Control the browser on your computer to achieve hard tasks
- **Parallel Execution** - Execute the generated graph in parallel. This way you can have multiple agent compelteing the jobs for you
- **[Goal-Driven Generation](docs/key_concepts/goals_outcome.md)** - Define objectives in natural language; the coding agent generates the agent graph and connection code to achieve them
- **[Adaptiveness](docs/key_concepts/evolution.md)** - Framework captures failures, calibrates according to the objectives, and evolves the agent graph
- **[Dynamic Node Connections](docs/key_concepts/graph.md)** - No predefined edges; connection code is generated by any capable LLM based on your goals
- **SDK-Wrapped Nodes** - Every node gets shared memory, local RLM memory, monitoring, tools, and LLM access out of the box
- **[Human-in-the-Loop](docs/key_concepts/graph.md#human-in-the-loop)** - Intervention nodes that pause execution for human input with configurable timeouts and escalation
- **Real-time Observability** - WebSocket streaming for live monitoring of agent execution, decisions, and node-to-node communication
- **Interactive TUI Dashboard** - Terminal-based dashboard with live graph view, event log, and chat interface for agent interaction
- **Cost & Budget Control** - Set spending limits, throttles, and automatic model degradation policies
- **Production-Ready** - Self-hostable, built for scale and reliability
## Integration
<a href="https://github.com/adenhq/hive/tree/main/tools/src/aden_tools/tools"><img width="100%" alt="Integration" src="https://github.com/user-attachments/assets/a1573f93-cf02-4bb8-b3d5-b305b05b1e51" /></a>
<a href="https://github.com/aden-hive/hive/tree/main/tools/src/aden_tools/tools"><img width="100%" alt="Integration" src="https://github.com/user-attachments/assets/a1573f93-cf02-4bb8-b3d5-b305b05b1e51" /></a>
Hive is built to be model-agnostic and system-agnostic.
- **LLM flexibility** - Hive Framework is designed to support various types of LLMs, including hosted and local models through LiteLLM-compatible providers.
@@ -240,35 +208,10 @@ flowchart LR
4. **Control Plane Monitors** → Real-time metrics, budget enforcement, policy management
5. **[Adaptiveness](docs/key_concepts/evolution.md)** → On failure, the system evolves the graph and redeploys automatically
## Run Agents
The `hive` CLI is the primary interface for running agents.
```bash
# Browse and run agents interactively (Recommended)
hive tui
# Run a specific agent directly
hive run exports/my_agent --input '{"task": "Your input here"}'
# Run a specific agent with the TUI dashboard
hive run exports/my_agent --tui
# Interactive REPL
hive shell
```
The TUI scans both `exports/` and `examples/templates/` for available agents.
> **Using Python directly (alternative):** You can also run agents with `PYTHONPATH=exports uv run python -m agent_name run --input '{...}'`
See [environment-setup.md](docs/environment-setup.md) for complete setup instructions.
## Documentation
- **[Developer Guide](docs/developer-guide.md)** - Comprehensive guide for developers
- [Getting Started](docs/getting-started.md) - Quick setup instructions
- [TUI Guide](docs/tui-selection-guide.md) - Interactive dashboard usage
- [Configuration Guide](docs/configuration.md) - All configuration options
- [Architecture Overview](docs/architecture/README.md) - System design and structure
@@ -398,8 +341,7 @@ flowchart TB
```
## Contributing
We welcome contributions from the community! Were especially looking for help building tools, integrations, and example agents for the framework ([check #2805](https://github.com/adenhq/hive/issues/2805)). If youre interested in extending its functionality, this is the perfect place to start. Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
We welcome contributions from the community! Were especially looking for help building tools, integrations, and example agents for the framework ([check #2805](https://github.com/aden-hive/hive/issues/2805)). If youre interested in extending its functionality, this is the perfect place to start. Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
**Important:** Please get assigned to an issue before submitting a PR. Comment on an issue to claim it, and a maintainer will assign you. Issues with reproducible steps and proposals are prioritized. This helps prevent duplicate work.
@@ -436,7 +378,7 @@ This project is licensed under the Apache License 2.0 - see the [LICENSE](LICENS
**Q: What LLM providers does Hive support?**
Hive supports 100+ LLM providers through LiteLLM integration, including OpenAI (GPT-4, GPT-4o), Anthropic (Claude models), Google Gemini, DeepSeek, Mistral, Groq, and many more. Simply set the appropriate API key environment variable and specify the model name.
Hive supports 100+ LLM providers through LiteLLM integration, including OpenAI (GPT-4, GPT-4o), Anthropic (Claude models), Google Gemini, DeepSeek, Mistral, Groq, and many more. Simply set the appropriate API key environment variable and specify the model name. We recommend using Claude, GLM and Gemini as they have the best performance.
**Q: Can I use Hive with local AI models like Ollama?**
@@ -478,14 +420,6 @@ Visit [docs.adenhq.com](https://docs.adenhq.com/) for complete guides, API refer
Contributions are welcome! Fork the repository, create your feature branch, implement your changes, and submit a pull request. See [CONTRIBUTING.md](CONTRIBUTING.md) for detailed guidelines.
**Q: When will my team start seeing results from Aden's adaptive agents?**
Aden's adaptation loop begins working from the first execution. When an agent fails, the framework captures the failure data, helping developers evolve the agent graph through the coding agent. How quickly this translates to measurable results depends on the complexity of your use case, the quality of your goal definitions, and the volume of executions generating feedback.
**Q: How does Hive compare to other agent frameworks?**
Hive focuses on generating agents that run real business processes, rather than generic agents. This vision emphasizes outcome-driven design, adaptability, and an easy-to-use set of tools and integrations.
---
<p align="center">
+31
View File
@@ -0,0 +1,31 @@
perf: reduce subprocess spawning in quickstart scripts (#4427)
## Problem
Windows process creation (CreateProcess) is 10-100x slower than Linux fork/exec.
The quickstart scripts were spawning 4+ separate `uv run python -c "import X"`
processes to verify imports, adding ~600ms overhead on Windows.
## Solution
Consolidated all import checks into a single batch script that checks multiple
modules in one subprocess call, reducing spawn overhead by ~75%.
## Changes
- **New**: `scripts/check_requirements.py` - Batched import checker
- **New**: `scripts/test_check_requirements.py` - Test suite
- **New**: `scripts/benchmark_quickstart.ps1` - Performance benchmark tool
- **Modified**: `quickstart.ps1` - Updated import verification (2 sections)
- **Modified**: `quickstart.sh` - Updated import verification
## Performance Impact
**Benchmark results on Windows:**
- Before: ~19.8 seconds for import checks
- After: ~4.9 seconds for import checks
- **Improvement: 14.9 seconds saved (75.2% faster)**
## Testing
- ✅ All functional tests pass (`scripts/test_check_requirements.py`)
- ✅ Quickstart scripts work correctly on Windows
- ✅ Error handling verified (invalid imports reported correctly)
- ✅ Performance benchmark confirms 75%+ improvement
Fixes #4427
+10 -10
View File
@@ -64,7 +64,7 @@ To use the agent builder with Claude Desktop or other MCP clients, add this to y
"agent-builder": {
"command": "python",
"args": ["-m", "framework.mcp.agent_builder_server"],
"cwd": "/path/to/goal-agent"
"cwd": "/path/to/hive/core"
}
}
}
@@ -85,14 +85,14 @@ The MCP server provides tools for:
Run an LLM-powered calculator:
```bash
# Single calculation
uv run python -m framework calculate "2 + 3 * 4"
# Run an exported agent
uv run python -m framework run exports/calculator --input '{"expression": "2 + 3 * 4"}'
# Interactive mode
uv run python -m framework interactive
# Interactive shell session
uv run python -m framework shell exports/calculator
# Analyze runs with Builder
uv run python -m framework analyze calculator
# Show agent info
uv run python -m framework info exports/calculator
```
### Using the Runtime
@@ -141,11 +141,11 @@ uv run python -m framework test-run <agent_path> --goal <goal_id> --parallel 4
# Debug failed tests
uv run python -m framework test-debug <agent_path> <test_name>
# List tests for a goal
uv run python -m framework test-list <goal_id>
# List tests for an agent
uv run python -m framework test-list <agent_path>
```
For detailed testing workflows, see the [hive-test skill](../.claude/skills/hive-test/SKILL.md).
For detailed testing workflows, see [developer-guide.md](../docs/developer-guide.md).
### Analyzing Agent Behavior with Builder
+387
View File
@@ -0,0 +1,387 @@
"""OpenAI Codex OAuth PKCE login flow.
Runs the full browser-based OAuth flow so users can authenticate with their
ChatGPT Plus/Pro subscription without needing the Codex CLI installed.
Usage (from quickstart.sh):
uv run python codex_oauth.py
Exit codes:
0 - success (credentials saved to ~/.codex/auth.json)
1 - failure (user cancelled, timeout, or token exchange error)
"""
import base64
import hashlib
import http.server
import json
import os
import platform
import secrets
import subprocess
import sys
import threading
import time
import urllib.error
import urllib.parse
import urllib.request
from datetime import UTC, datetime
from pathlib import Path
# OAuth constants (from the Codex CLI binary)
CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann"
AUTHORIZE_URL = "https://auth.openai.com/oauth/authorize"
TOKEN_URL = "https://auth.openai.com/oauth/token"
REDIRECT_URI = "http://localhost:1455/auth/callback"
SCOPE = "openid profile email offline_access"
CALLBACK_PORT = 1455
# Where to save credentials (same location the Codex CLI uses)
CODEX_AUTH_FILE = Path.home() / ".codex" / "auth.json"
# JWT claim path for account_id
JWT_CLAIM_PATH = "https://api.openai.com/auth"
def _base64url(data: bytes) -> str:
return base64.urlsafe_b64encode(data).rstrip(b"=").decode("ascii")
def generate_pkce() -> tuple[str, str]:
"""Generate PKCE code_verifier and code_challenge (S256)."""
verifier_bytes = secrets.token_bytes(32)
verifier = _base64url(verifier_bytes)
challenge = _base64url(hashlib.sha256(verifier.encode("ascii")).digest())
return verifier, challenge
def build_authorize_url(state: str, challenge: str) -> str:
"""Build the OpenAI OAuth authorize URL with PKCE."""
params = urllib.parse.urlencode(
{
"response_type": "code",
"client_id": CLIENT_ID,
"redirect_uri": REDIRECT_URI,
"scope": SCOPE,
"code_challenge": challenge,
"code_challenge_method": "S256",
"state": state,
"id_token_add_organizations": "true",
"codex_cli_simplified_flow": "true",
"originator": "hive",
}
)
return f"{AUTHORIZE_URL}?{params}"
def exchange_code_for_tokens(code: str, verifier: str) -> dict | None:
"""Exchange the authorization code for tokens."""
data = urllib.parse.urlencode(
{
"grant_type": "authorization_code",
"client_id": CLIENT_ID,
"code": code,
"code_verifier": verifier,
"redirect_uri": REDIRECT_URI,
}
).encode("utf-8")
req = urllib.request.Request(
TOKEN_URL,
data=data,
headers={"Content-Type": "application/x-www-form-urlencoded"},
method="POST",
)
try:
with urllib.request.urlopen(req, timeout=15) as resp:
token_data = json.loads(resp.read())
except (urllib.error.URLError, json.JSONDecodeError, TimeoutError, OSError) as exc:
print(f"\033[0;31mToken exchange failed: {exc}\033[0m", file=sys.stderr)
return None
if not token_data.get("access_token") or not token_data.get("refresh_token"):
print("\033[0;31mToken response missing required fields\033[0m", file=sys.stderr)
return None
return token_data
def decode_jwt_payload(token: str) -> dict | None:
"""Decode the payload of a JWT (no signature verification)."""
try:
parts = token.split(".")
if len(parts) != 3:
return None
payload = parts[1]
# Add padding
padding = 4 - len(payload) % 4
if padding != 4:
payload += "=" * padding
decoded = base64.urlsafe_b64decode(payload)
return json.loads(decoded)
except Exception:
return None
def get_account_id(access_token: str) -> str | None:
"""Extract the ChatGPT account_id from the access token JWT."""
payload = decode_jwt_payload(access_token)
if not payload:
return None
auth = payload.get(JWT_CLAIM_PATH)
if isinstance(auth, dict):
account_id = auth.get("chatgpt_account_id")
if isinstance(account_id, str) and account_id:
return account_id
return None
def save_credentials(token_data: dict, account_id: str) -> None:
"""Save credentials to ~/.codex/auth.json in the same format the Codex CLI uses."""
auth_data = {
"tokens": {
"access_token": token_data["access_token"],
"refresh_token": token_data["refresh_token"],
"account_id": account_id,
},
"auth_mode": "chatgpt",
"last_refresh": datetime.now(UTC).isoformat(),
}
if "id_token" in token_data:
auth_data["tokens"]["id_token"] = token_data["id_token"]
CODEX_AUTH_FILE.parent.mkdir(parents=True, exist_ok=True, mode=0o700)
fd = os.open(CODEX_AUTH_FILE, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
with os.fdopen(fd, "w") as f:
json.dump(auth_data, f, indent=2)
def open_browser(url: str) -> bool:
"""Open the URL in the user's default browser."""
system = platform.system()
try:
devnull = subprocess.DEVNULL
if system == "Darwin":
subprocess.Popen(["open", url], stdout=devnull, stderr=devnull)
elif system == "Windows":
subprocess.Popen(["cmd", "/c", "start", url], stdout=devnull, stderr=devnull)
else:
subprocess.Popen(["xdg-open", url], stdout=devnull, stderr=devnull)
return True
except OSError:
return False
class OAuthCallbackHandler(http.server.BaseHTTPRequestHandler):
"""HTTP handler that captures the OAuth callback."""
auth_code: str | None = None
received_state: str | None = None
def do_GET(self) -> None:
parsed = urllib.parse.urlparse(self.path)
if parsed.path != "/auth/callback":
self.send_response(404)
self.end_headers()
self.wfile.write(b"Not found")
return
params = urllib.parse.parse_qs(parsed.query)
code = params.get("code", [None])[0]
state = params.get("state", [None])[0]
if not code:
self.send_response(400)
self.end_headers()
self.wfile.write(b"Missing authorization code")
return
OAuthCallbackHandler.auth_code = code
OAuthCallbackHandler.received_state = state
self.send_response(200)
self.send_header("Content-Type", "text/html; charset=utf-8")
self.end_headers()
self.wfile.write(
b"<!doctype html><html><head><meta charset='utf-8'/></head>"
b"<body><h2>Authentication successful</h2>"
b"<p>Return to your terminal to continue.</p></body></html>"
)
def log_message(self, format: str, *args: object) -> None:
# Suppress request logging
pass
def wait_for_callback(state: str, timeout_secs: int = 120) -> str | None:
"""Start a local HTTP server and wait for the OAuth callback.
Returns the authorization code on success, None on timeout.
"""
OAuthCallbackHandler.auth_code = None
OAuthCallbackHandler.received_state = None
server = http.server.HTTPServer(("127.0.0.1", CALLBACK_PORT), OAuthCallbackHandler)
server.timeout = 1
deadline = time.time() + timeout_secs
server_thread = threading.Thread(target=_serve_until_done, args=(server, deadline, state))
server_thread.daemon = True
server_thread.start()
server_thread.join(timeout=timeout_secs + 2)
server.server_close()
if OAuthCallbackHandler.auth_code and OAuthCallbackHandler.received_state == state:
return OAuthCallbackHandler.auth_code
return None
def _serve_until_done(server: http.server.HTTPServer, deadline: float, state: str) -> None:
while time.time() < deadline:
server.handle_request()
if OAuthCallbackHandler.auth_code and OAuthCallbackHandler.received_state == state:
return
def parse_manual_input(value: str, expected_state: str) -> str | None:
"""Parse user-pasted redirect URL or auth code."""
value = value.strip()
if not value:
return None
try:
parsed = urllib.parse.urlparse(value)
params = urllib.parse.parse_qs(parsed.query)
code = params.get("code", [None])[0]
state = params.get("state", [None])[0]
if state and state != expected_state:
return None
return code
except Exception:
pass
# Maybe it's just the raw code
if len(value) > 10 and " " not in value:
return value
return None
def main() -> int:
# Generate PKCE and state
verifier, challenge = generate_pkce()
state = secrets.token_hex(16)
# Build URL
auth_url = build_authorize_url(state, challenge)
print()
print("\033[1mOpenAI Codex OAuth Login\033[0m")
print()
# Try to start the local callback server first
try:
server_available = True
# Quick test that port is free
import socket
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(1)
result = sock.connect_ex(("127.0.0.1", CALLBACK_PORT))
sock.close()
if result == 0:
print(f"\033[1;33mPort {CALLBACK_PORT} is in use. Using manual paste mode.\033[0m")
server_available = False
except Exception:
server_available = True
# Open browser
browser_opened = open_browser(auth_url)
if browser_opened:
print(" Browser opened for OpenAI sign-in...")
else:
print(" Could not open browser automatically.")
print()
print(" If the browser didn't open, visit this URL:")
print(f" \033[0;36m{auth_url}\033[0m")
print()
code = None
if server_available:
print(" Waiting for authentication (up to 2 minutes)...")
print(" \033[2mOr paste the redirect URL below if the callback didn't work:\033[0m")
print()
# Start callback server in background
callback_result: list[str | None] = [None]
def run_server() -> None:
callback_result[0] = wait_for_callback(state, timeout_secs=120)
server_thread = threading.Thread(target=run_server)
server_thread.daemon = True
server_thread.start()
# Also accept manual input in parallel
# We poll for both the server result and stdin
try:
import select
while server_thread.is_alive():
# Check if stdin has data (non-blocking on unix)
if hasattr(select, "select"):
ready, _, _ = select.select([sys.stdin], [], [], 0.5)
if ready:
manual = sys.stdin.readline()
if manual.strip():
code = parse_manual_input(manual, state)
if code:
break
else:
time.sleep(0.5)
if callback_result[0]:
code = callback_result[0]
break
except (KeyboardInterrupt, EOFError):
print("\n\033[0;31mCancelled.\033[0m")
return 1
if not code:
code = callback_result[0]
else:
# Manual paste mode
try:
manual = input(" Paste the redirect URL: ").strip()
code = parse_manual_input(manual, state)
except (KeyboardInterrupt, EOFError):
print("\n\033[0;31mCancelled.\033[0m")
return 1
if not code:
print("\n\033[0;31mAuthentication timed out or failed.\033[0m")
return 1
# Exchange code for tokens
print()
print(" Exchanging authorization code for tokens...")
token_data = exchange_code_for_tokens(code, verifier)
if not token_data:
return 1
# Extract account_id from JWT
account_id = get_account_id(token_data["access_token"])
if not account_id:
print("\033[0;31mFailed to extract account ID from token.\033[0m", file=sys.stderr)
return 1
# Save credentials
save_credentials(token_data, account_id)
print(" \033[0;32mAuthentication successful!\033[0m")
print(f" Credentials saved to {CODEX_AUTH_FILE}")
return 0
if __name__ == "__main__":
sys.exit(main())
+1 -1
View File
@@ -1768,7 +1768,7 @@ async def _run_pipeline(websocket, initial_message: str):
judge=judge,
config=LoopConfig(
max_iterations=30,
max_tool_calls_per_turn=15,
max_tool_calls_per_turn=30,
max_history_tokens=64000,
max_tool_result_chars=8_000,
spillover_dir=str(_DATA_DIR),
+2 -2
View File
@@ -751,7 +751,7 @@ async def _run_pipeline(websocket, topic: str):
judge=None, # implicit judge: accept when output_keys filled
config=LoopConfig(
max_iterations=20,
max_tool_calls_per_turn=10,
max_tool_calls_per_turn=30,
max_history_tokens=32_000,
),
conversation_store=store_a,
@@ -849,7 +849,7 @@ async def _run_pipeline(websocket, topic: str):
judge=None, # implicit judge
config=LoopConfig(
max_iterations=10,
max_tool_calls_per_turn=5,
max_tool_calls_per_turn=30,
max_history_tokens=32_000,
),
conversation_store=store_b,
+1 -1
View File
@@ -1257,7 +1257,7 @@ async def _run_org_pipeline(websocket, topic: str):
judge=judge,
config=LoopConfig(
max_iterations=30,
max_tool_calls_per_turn=25,
max_tool_calls_per_turn=30,
max_history_tokens=32_000,
),
conversation_store=store,
@@ -406,7 +406,8 @@ nodes = [
client_facing=True,
max_node_visits=0,
input_keys=[],
output_keys=[],
output_keys=["test_result"],
nullable_output_keys=["test_result"],
tools=["get_account_info"],
system_prompt="""\
You are a credential tester. Your job is to help the user verify that their \
@@ -444,7 +445,7 @@ edges = []
entry_node = "tester"
entry_points = {"start": "tester"}
pause_nodes = []
terminal_nodes = [] # Forever-alive: loops until user exits
terminal_nodes = ["tester"] # Tester node can terminate
conversation_mode = "continuous"
identity_prompt = (
@@ -453,7 +454,7 @@ identity_prompt = (
)
loop_config = {
"max_iterations": 50,
"max_tool_calls_per_turn": 10,
"max_tool_calls_per_turn": 30,
"max_history_tokens": 32000,
}
@@ -531,7 +532,7 @@ class CredentialTesterAgent:
version="1.0.0",
entry_node="tester",
entry_points={"start": "tester"},
terminal_nodes=[],
terminal_nodes=["tester"], # Tester node can terminate
pause_nodes=[],
nodes=[tester_node],
edges=[],
@@ -539,7 +540,7 @@ class CredentialTesterAgent:
max_tokens=self.config.max_tokens,
loop_config={
"max_iterations": 50,
"max_tool_calls_per_turn": 10,
"max_tool_calls_per_turn": 30,
"max_history_tokens": 32000,
},
conversation_mode="continuous",
@@ -51,7 +51,8 @@ The key is pre-injected into the session environment and tools read it automatic
client_facing=True,
max_node_visits=0,
input_keys=[],
output_keys=[],
output_keys=["test_result"],
nullable_output_keys=["test_result"],
tools=tools,
system_prompt=f"""\
You are a credential tester for the {account_label}: {provider}/{alias}{detail}
+8 -8
View File
@@ -57,7 +57,7 @@ goal = Goal(
id="framework-compliance",
description=(
"Generated code follows framework patterns: STEP 1/STEP 2 "
"for client-facing, correct imports, entry_points format"
"for client-facing and correct imports"
),
metric="pattern_compliance",
target="100%",
@@ -69,7 +69,7 @@ goal = Goal(
id="dynamic-tool-discovery",
description=(
"Always discover available tools dynamically via "
"discover_mcp_tools before referencing tools in agent designs"
"list_agent_tools before referencing tools in agent designs"
),
constraint_type="hard",
category="correctness",
@@ -99,14 +99,14 @@ goal = Goal(
# GraphExecutor with queen_node — not as part of this graph.
nodes = [coder_node]
# No edges needed — single forever-alive event_loop node
# No edges needed — single event_loop node
edges = []
# Graph configuration
entry_node = "coder"
entry_points = {"start": "coder"}
pause_nodes = []
terminal_nodes = [] # Forever-alive: loops until user exits
terminal_nodes = ["coder"] # Coder node has output_keys and can terminate
# No async entry points needed — the queen is now an independent executor,
# not a secondary graph receiving events via add_graph().
@@ -127,7 +127,7 @@ identity_prompt = (
)
loop_config = {
"max_iterations": 100,
"max_tool_calls_per_turn": 20,
"max_tool_calls_per_turn": 30,
"max_history_tokens": 32000,
}
@@ -154,14 +154,14 @@ queen_graph = GraphSpec(
version="1.0.0",
entry_node="queen",
entry_points={"start": "queen"},
terminal_nodes=[],
terminal_nodes=["queen"], # Queen node can terminate
pause_nodes=[],
nodes=[queen_node],
edges=[],
conversation_mode="continuous",
loop_config={
"max_iterations": 200,
"max_tool_calls_per_turn": 10,
"max_iterations": 999_999,
"max_tool_calls_per_turn": 30,
"max_history_tokens": 32000,
},
)
+2 -2
View File
@@ -10,7 +10,7 @@ def _load_preferred_model() -> str:
config_path = Path.home() / ".hive" / "configuration.json"
if config_path.exists():
try:
with open(config_path) as f:
with open(config_path, encoding="utf-8") as f:
config = json.load(f)
llm = config.get("llm", {})
if llm.get("provider") and llm.get("model"):
@@ -24,7 +24,7 @@ def _load_preferred_model() -> str:
class RuntimeConfig:
model: str = field(default_factory=_load_preferred_model)
temperature: float = 0.7
max_tokens: int = 40000
max_tokens: int = 8000
api_key: str | None = None
api_base: str | None = None
File diff suppressed because it is too large Load Diff
@@ -1,107 +1,32 @@
# Common Mistakes When Building Hive Agents
## Critical Errors
1. **Using tools that don't exist** — Always verify tools are available in the hive-tools MCP server before assigning them to nodes. Never guess tool names.
2. **Wrong entry_points format** — MUST be `{"start": "first-node-id"}`. NOT a set, NOT `{node_id: [keys]}`.
3. **Wrong mcp_servers.json format** — Flat dict (no `"mcpServers"` wrapper). `cwd` must be `"../../tools"`. `command` must be `"uv"` with args `["run", "python", ...]`.
4. **Missing STEP 1/STEP 2 in client-facing prompts** — Without explicit phases, the LLM calls set_output before the user responds. Always use the pattern.
5. **Forgetting nullable_output_keys** — When a node receives inputs from multiple edges and some inputs only arrive on certain edges (e.g., feedback), mark those as nullable. Without this, the executor blocks waiting for a value that will never arrive.
6. **Creating dead-end nodes in forever-alive graphs** — Every node must have at least one outgoing edge. A node with no outgoing edges ends the execution, breaking the loop.
7. **Setting max_node_visits to a non-zero value in forever-alive agents** — The framework default is `max_node_visits=0` (unbounded). Setting it to any positive value (e.g., 1) means the node stops executing after that many visits, silently breaking the forever-alive loop. Only set `max_node_visits > 0` in one-shot agents with feedback loops that need bounded retries.
7. **Missing module-level exports in `__init__.py`** — The runner loads agents via `importlib.import_module(package_name)`, which imports `__init__.py`. It then reads `goal`, `nodes`, `edges`, `entry_node`, `entry_points`, `pause_nodes`, `terminal_nodes`, `conversation_mode`, `identity_prompt`, `loop_config` via `getattr()`. If ANY of these are missing from `__init__.py`, they default to `None` or `{}` — causing "must define goal, nodes, edges" errors or "node X is unreachable" validation failures. **ALL module-level variables from agent.py must be re-exported in `__init__.py`.**
1. **Using tools that don't exist** — Always verify tools via `list_agent_tools()` before designing. Common hallucinations: `csv_read`, `csv_write`, `file_upload`, `database_query`, `bulk_fetch_emails`.
2. **Wrong mcp_servers.json format** — Flat dict (no `"mcpServers"` wrapper). `cwd` must be `"../../tools"`. `command` must be `"uv"` with args `["run", "python", ...]`.
3. **Missing module-level exports in `__init__.py`** — The runner reads `goal`, `nodes`, `edges`, `entry_node`, `entry_points`, `terminal_nodes`, `conversation_mode`, `identity_prompt`, `loop_config` via `getattr()`. ALL module-level variables from agent.py must be re-exported in `__init__.py`.
## Value Errors
8. **Invalid `conversation_mode` value** — Only two valid values: `"continuous"` (recommended for interactive agents) or omit entirely (for isolated per-node conversations). Values like `"client_facing"`, `"interactive"`, `"adaptive"` do NOT exist and will cause runtime errors.
9. **Invalid `loop_config` keys** — Only three valid keys: `max_iterations` (int), `max_tool_calls_per_turn` (int), `max_history_tokens` (int). Keys like `"strategy"`, `"mode"`, `"timeout"` are NOT valid and are silently ignored or cause errors.
10. **Fabricating tools that don't exist** — Never guess tool names. Always verify via `list_agent_tools()` before designing and `validate_agent_tools()` after building. Common hallucinations: `csv_read`, `csv_write`, `csv_append`, `file_upload`, `database_query`, `bulk_fetch_emails`. If a required tool doesn't exist, redesign the agent to use tools that DO exist (e.g., `save_data`/`load_data` for data persistence).
4. **Fabricating tools** — Always verify via `list_agent_tools()` before designing and `validate_agent_tools()` after building.
## Design Errors
11. **Too many thin nodes** — Hard limit: **2-4 nodes** for most agents. Each node boundary serializes outputs to shared memory and loses all in-context information (tool results, intermediate reasoning, conversation history). A node with 0 tools that just does LLM reasoning is NOT a real node — merge it into its predecessor or successor.
**Merge when:**
- Node has NO tools — pure LLM reasoning belongs in the node that produces or consumes its data
- Node sets only 1 trivial output (e.g., `set_output("done", "true")`) — collapse into predecessor
- Multiple consecutive autonomous nodes with same/similar tools — combine into one
- A "report" or "summary" node that just presents analysis — merge into the client-facing node
- A "schedule" or "confirm" node that doesn't actually schedule anything — remove entirely
**Keep separate when:**
- Client-facing vs autonomous — different interaction models require separate nodes
- Fundamentally different tool sets (e.g., web search vs file I/O)
- Fan-out parallelism — parallel branches MUST be separate nodes
**Bad example** (7 nodes — WAY too many):
```
profile_setup → daily_intake → update_tracker → analyze_progress → generate_plan → schedule_reminders → report
```
`analyze_progress` has no tools. `schedule_reminders` just sets one boolean. `report` just presents analysis. `update_tracker` and `generate_plan` are sequential autonomous work.
**Good example** (3 nodes):
```
intake (client-facing) → process (autonomous: track + analyze + plan) → intake (loop back)
```
One client-facing node handles ALL user interaction (setup, logging, reports). One autonomous node handles ALL backend work (CSV update, analysis, plan generation) with tools and context preserved.
12. **Adding framework gating for LLM behavior** — Don't add output rollback, premature rejection, or interaction protocol injection. Fix with better prompts or custom judges.
13. **Not using continuous conversation mode** — Interactive agents should use `conversation_mode="continuous"`. Without it, each node starts with blank context.
14. **Adding terminal nodes by default** — ALL agents should use `terminal_nodes=[]` (forever-alive) unless the user explicitly requests a one-shot/batch agent. Forever-alive is the standard pattern. Every node must have at least one outgoing edge. Dead-end nodes break the loop.
15. **Calling set_output in same turn as tool calls** — Instruct the LLM to call set_output in a SEPARATE turn from real tool calls.
5. **Adding framework gating for LLM behavior** — Don't add output rollback or premature rejection. Fix with better prompts or custom judges.
6. **Calling set_output in same turn as tool calls** — Call set_output in a SEPARATE turn.
## File Template Errors
16. **Wrong import paths**Use `from framework.graph import ...`, NOT `from core.framework.graph import ...`. The PYTHONPATH includes `core/`.
17. **Missing storage path** — Agent class must set `self._storage_path = Path.home() / ".hive" / "agents" / "agent_name"`.
18. **Missing mcp_servers.json** — Without this, the agent has no tools at runtime.
19. **Bare `python` command in mcp_servers.json** — Use `"command": "uv"` with args `["run", "python", ...]`.
7. **Wrong import paths** — Use `from framework.graph import ...`, NOT `from core.framework.graph import ...`.
8. **Missing storage path**Agent class must set `self._storage_path = Path.home() / ".hive" / "agents" / "agent_name"`.
9. **Missing mcp_servers.json** — Without this, the agent has no tools at runtime.
10. **Bare `python` command** — Use `"command": "uv"` with args `["run", "python", ...]`.
## Testing Errors
11. **Using `runner.run()` on forever-alive agents**`runner.run()` hangs forever because forever-alive agents have no terminal node. Write structural tests instead: validate graph structure, verify node specs, test `AgentRunner.load()` succeeds (no API key needed).
12. **Stale tests after restructuring** — When changing nodes/edges, update tests to match. Tests referencing old node names will fail.
13. **Running integration tests without API keys** — Use `pytest.skip()` when credentials are missing.
14. **Forgetting sys.path setup in conftest.py** — Tests need `exports/` and `core/` on sys.path.
20. **Using `runner.run()` on forever-alive agents**`runner.run()` calls `trigger_and_wait()` which blocks until the graph reaches a terminal node. Forever-alive agents have `terminal_nodes=[]`, so **`runner.run()` hangs forever**. This is the #1 cause of stuck test suites.
## GCU Errors
15. **Manually wiring browser tools on event_loop nodes** — Use `node_type="gcu"` which auto-includes browser tools. Do NOT manually list browser tool names.
16. **Using GCU nodes as regular graph nodes** — GCU nodes are subagents only. They must ONLY appear in `sub_agents=["gcu-node-id"]` and be invoked via `delegate_to_sub_agent()`. Never connect via edges or use as entry/terminal nodes.
**For forever-alive agents, write structural tests instead:**
- Validate graph structure (nodes, edges, entry points)
- Verify node specs (tools, prompts, client-facing flag)
- Check goal/constraints/success criteria definitions
- Test that `AgentRunner.load()` + `_setup()` succeeds (skip if no API key)
**What NOT to do:**
```python
# WRONG — hangs forever on forever-alive agents
result = await runner.run({"topic": "quantum computing"})
```
**Correct pattern for structure tests:**
```python
def test_research_has_web_tools(self):
assert "web_search" in research_node.tools
def test_research_routes_back_to_interact(self):
edges_to_interact = [e for e in edges if e.source == "research" and e.target == "interact"]
assert edges_to_interact
```
21. **Stale tests after agent restructuring** — When you change an agent's node count or names (e.g., 4 nodes → 2 nodes), the tests MUST be updated too. Tests referencing old node names (e.g., `"review"`, `"report"`) will fail or hang. Always check that test assertions match the current `nodes/__init__.py`.
22. **Running full integration tests without API keys** — Structural tests (validate, import) work without keys. Full integration tests need `ANTHROPIC_API_KEY`. Use `pytest.skip()` in the runner fixture when `_setup()` fails due to missing credentials.
23. **Forgetting sys.path setup in conftest.py** — Tests need `exports/` and `core/` on sys.path.
24. **Not using auto_responder for client-facing nodes** — Tests with client-facing nodes hang without an auto-responder that injects input. But note: even WITH auto_responder, forever-alive agents still hang because the graph never terminates. Auto-responder only helps for agents with terminal nodes.
## Worker Agent Errors
17. **Adding client-facing intake node to workers** — The queen owns intake. Workers should start with an autonomous processing node. Client-facing nodes in workers are for mid-execution review/approval only.
@@ -57,85 +57,63 @@ metadata = AgentMetadata()
from framework.graph import NodeSpec
# Node 1: Intake (client-facing)
intake_node = NodeSpec(
id="intake",
name="Intake",
description="Gather requirements from the user",
# Node 1: Process (autonomous entry node)
# The queen handles intake and passes structured input via
# run_agent_with_input(task). NO client-facing intake node.
# The queen defines input_keys at build time and fills them at run time.
process_node = NodeSpec(
id="process",
name="Process",
description="Execute the task using available tools",
node_type="event_loop",
client_facing=True,
max_node_visits=0, # Unlimited for forever-alive
input_keys=["topic"],
output_keys=["brief"],
success_criteria="The brief is specific and actionable.",
system_prompt="""\
You are an intake specialist.
**STEP 1 — Read and respond (text only, NO tool calls):**
1. Read the topic provided
2. If vague, ask 1-2 clarifying questions
3. If clear, confirm your understanding
**STEP 2 — After the user confirms, call set_output:**
- set_output("brief", "Clear description of what to do")
""",
tools=[],
)
# Node 2: Worker (autonomous)
worker_node = NodeSpec(
id="worker",
name="Worker",
description="Do the main work",
node_type="event_loop",
max_node_visits=0,
input_keys=["brief", "feedback"],
input_keys=["user_request", "feedback"],
output_keys=["results"],
nullable_output_keys=["feedback"], # Only on feedback edge
success_criteria="Results are complete and accurate.",
system_prompt="""\
You are a worker agent. Given a brief, do the work.
If feedback is provided, this is a follow-up — address the feedback.
You are a processing agent. Your task is in memory under "user_request". \
If "feedback" is present, this is a revision — address the feedback.
Work in phases:
1. Use tools to gather/process data
2. Analyze results
3. Call set_output for each key in a SEPARATE turn:
3. Call set_output in a SEPARATE turn:
- set_output("results", "structured results")
""",
tools=["web_search", "web_scrape", "save_data", "load_data", "list_data_files"],
)
# Node 3: Review (client-facing)
review_node = NodeSpec(
id="review",
name="Review",
description="Present results for user approval",
# Node 2: Handoff (autonomous)
handoff_node = NodeSpec(
id="handoff",
name="Handoff",
description="Prepare worker results for queen review",
node_type="event_loop",
client_facing=True,
client_facing=False,
max_node_visits=0,
input_keys=["results", "brief"],
output_keys=["next_action", "feedback"],
nullable_output_keys=["feedback"],
success_criteria="User has reviewed and decided next steps.",
input_keys=["results", "user_request"],
output_keys=["next_action", "feedback", "worker_summary"],
nullable_output_keys=["feedback", "worker_summary"],
success_criteria="Results are packaged for queen decision-making.",
system_prompt="""\
Present the results to the user.
Do NOT talk to the user directly. The queen is the only user interface.
**STEP 1 — Present (text only, NO tool calls):**
1. Summary of work done
2. Key results
3. Ask: satisfied, or want changes?
If blocked by tool failures, missing credentials, or unclear constraints, call:
- escalate_to_coder(reason, context)
Then set:
- set_output("next_action", "escalated")
- set_output("feedback", "what help is needed")
**STEP 2 — After user responds, call set_output:**
- set_output("next_action", "new_topic") — if starting fresh
- set_output("next_action", "revise") — if changes needed
- set_output("feedback", "what to change") only if revising
Otherwise summarize findings for queen and set:
- set_output("worker_summary", "short summary for queen")
- set_output("next_action", "done") or set_output("next_action", "revise")
- set_output("feedback", "what to revise") only when revising
""",
tools=[],
)
__all__ = ["intake_node", "worker_node", "review_node"]
__all__ = ["process_node", "handoff_node"]
```
## agent.py
@@ -155,7 +133,7 @@ from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
from framework.runtime.execution_stream import EntryPointSpec
from .config import default_config, metadata
from .nodes import intake_node, worker_node, review_node
from .nodes import process_node, handoff_node
# Goal definition
goal = Goal(
@@ -172,27 +150,30 @@ goal = Goal(
)
# Node list
nodes = [intake_node, worker_node, review_node]
nodes = [process_node, handoff_node]
# Edge definitions
edges = [
EdgeSpec(id="intake-to-worker", source="intake", target="worker",
EdgeSpec(id="process-to-handoff", source="process", target="handoff",
condition=EdgeCondition.ON_SUCCESS, priority=1),
EdgeSpec(id="worker-to-review", source="worker", target="review",
condition=EdgeCondition.ON_SUCCESS, priority=1),
# Feedback loop
EdgeSpec(id="review-to-worker", source="review", target="worker",
# Feedback loop — revise results
EdgeSpec(id="handoff-to-process", source="handoff", target="process",
condition=EdgeCondition.CONDITIONAL,
condition_expr="str(next_action).lower() == 'revise'", priority=2),
# Loop back for new topic
EdgeSpec(id="review-to-intake", source="review", target="intake",
# Escalation loop — queen injects guidance and worker retries
EdgeSpec(id="handoff-escalated", source="handoff", target="process",
condition=EdgeCondition.CONDITIONAL,
condition_expr="str(next_action).lower() == 'new_topic'", priority=1),
condition_expr="str(next_action).lower() == 'escalated'", priority=3),
# Loop back for next task after queen decision
EdgeSpec(id="handoff-done", source="handoff", target="process",
condition=EdgeCondition.CONDITIONAL,
condition_expr="str(next_action).lower() == 'done'", priority=1),
]
# Graph configuration
entry_node = "intake"
entry_points = {"start": "intake"}
# Graph configuration — entry is the autonomous process node
# The queen handles intake and passes the task via run_agent_with_input(task)
entry_node = "process"
entry_points = {"start": "process"}
pause_nodes = []
terminal_nodes = [] # Forever-alive
@@ -208,7 +189,7 @@ class MyAgent:
self.goal = goal
self.nodes = nodes
self.edges = edges
self.entry_node = entry_node
self.entry_node = entry_node # "process" — autonomous entry
self.entry_points = entry_points
self.pause_nodes = pause_nodes
self.terminal_nodes = terminal_nodes
@@ -235,16 +216,14 @@ class MyAgent:
identity_prompt=identity_prompt,
)
def _setup(self, mock_mode=False):
def _setup(self):
self._storage_path = Path.home() / ".hive" / "agents" / "my_agent"
self._storage_path.mkdir(parents=True, exist_ok=True)
self._tool_registry = ToolRegistry()
mcp_config = Path(__file__).parent / "mcp_servers.json"
if mcp_config.exists():
self._tool_registry.load_mcp_config(mcp_config)
llm = None
if not mock_mode:
llm = LiteLLMProvider(model=self.config.model, api_key=self.config.api_key, api_base=self.config.api_base)
llm = LiteLLMProvider(model=self.config.model, api_key=self.config.api_key, api_base=self.config.api_base)
tools = list(self._tool_registry.get_tools().values())
tool_executor = self._tool_registry.get_executor()
self._graph = self._build_graph()
@@ -257,9 +236,9 @@ class MyAgent:
checkpoint_max_age_days=7, async_checkpoint=True),
)
async def start(self, mock_mode=False):
async def start(self):
if self._agent_runtime is None:
self._setup(mock_mode=mock_mode)
self._setup()
if not self._agent_runtime.is_running:
await self._agent_runtime.start()
@@ -274,8 +253,8 @@ class MyAgent:
return await self._agent_runtime.trigger_and_wait(
entry_point_id=entry_point, input_data=input_data or {}, session_state=session_state)
async def run(self, context, mock_mode=False, session_state=None):
await self.start(mock_mode=mock_mode)
async def run(self, context, session_state=None):
await self.start()
try:
result = await self.trigger_and_wait("default", context, session_state=session_state)
return result or ExecutionResult(success=False, error="Execution timeout")
@@ -293,16 +272,60 @@ class MyAgent:
}
def validate(self):
"""Validate graph wiring and entry-point contract."""
errors, warnings = [], []
node_ids = {n.id for n in self.nodes}
for e in self.edges:
if e.source not in node_ids: errors.append(f"Edge {e.id}: source '{e.source}' not found")
if e.target not in node_ids: errors.append(f"Edge {e.id}: target '{e.target}' not found")
if self.entry_node not in node_ids: errors.append(f"Entry node '{self.entry_node}' not found")
if e.source not in node_ids:
errors.append(f"Edge {e.id}: source '{e.source}' not found")
if e.target not in node_ids:
errors.append(f"Edge {e.id}: target '{e.target}' not found")
if self.entry_node not in node_ids:
errors.append(f"Entry node '{self.entry_node}' not found")
for t in self.terminal_nodes:
if t not in node_ids: errors.append(f"Terminal node '{t}' not found")
for ep_id, nid in self.entry_points.items():
if nid not in node_ids: errors.append(f"Entry point '{ep_id}' references unknown node '{nid}'")
if t not in node_ids:
errors.append(f"Terminal node '{t}' not found")
if not isinstance(self.entry_points, dict):
errors.append(
"Invalid entry_points: expected dict[str, str] like "
"{'start': '<entry-node-id>'}. "
f"Got {type(self.entry_points).__name__}. "
"Fix agent.py: set entry_points = {'start': '<entry-node-id>'}."
)
else:
if "start" not in self.entry_points:
errors.append(
"entry_points must include 'start' mapped to entry_node. "
"Example: {'start': '<entry-node-id>'}."
)
else:
start_node = self.entry_points.get("start")
if start_node != self.entry_node:
errors.append(
f"entry_points['start'] points to '{start_node}' "
f"but entry_node is '{self.entry_node}'. Keep these aligned."
)
for ep_id, nid in self.entry_points.items():
if not isinstance(ep_id, str):
errors.append(
f"Invalid entry_points key {ep_id!r} "
f"({type(ep_id).__name__}). Entry point names must be strings."
)
continue
if not isinstance(nid, str):
errors.append(
f"Invalid entry_points['{ep_id}']={nid!r} "
f"({type(nid).__name__}). Node ids must be strings."
)
continue
if nid not in node_ids:
errors.append(
f"Entry point '{ep_id}' references unknown node '{nid}'. "
f"Known nodes: {sorted(node_ids)}"
)
return {"valid": len(errors) == 0, "errors": errors, "warnings": warnings}
@@ -471,19 +494,17 @@ def cli():
@cli.command()
@click.option("--topic", "-t", required=True)
@click.option("--mock", is_flag=True)
@click.option("--verbose", "-v", is_flag=True)
def run(topic, mock, verbose):
def run(topic, verbose):
"""Execute the agent."""
setup_logging(verbose=verbose)
result = asyncio.run(default_agent.run({"topic": topic}, mock_mode=mock))
result = asyncio.run(default_agent.run({"topic": topic}))
click.echo(json.dumps({"success": result.success, "output": result.output}, indent=2, default=str))
sys.exit(0 if result.success else 1)
@cli.command()
@click.option("--mock", is_flag=True)
def tui(mock):
def tui():
"""Launch TUI dashboard."""
from pathlib import Path
from framework.tui.app import AdenTUI
@@ -499,10 +520,10 @@ def tui(mock):
storage.mkdir(parents=True, exist_ok=True)
mcp_cfg = Path(__file__).parent / "mcp_servers.json"
if mcp_cfg.exists(): agent._tool_registry.load_mcp_config(mcp_cfg)
llm = None if mock else LiteLLMProvider(model=agent.config.model, api_key=agent.config.api_key, api_base=agent.config.api_base)
llm = LiteLLMProvider(model=agent.config.model, api_key=agent.config.api_key, api_base=agent.config.api_base)
runtime = create_agent_runtime(
graph=agent._build_graph(), goal=agent.goal, storage_path=storage,
entry_points=[EntryPointSpec(id="start", name="Start", entry_node="intake", trigger_type="manual", isolation_level="isolated")],
entry_points=[EntryPointSpec(id="start", name="Start", entry_node="process", trigger_type="manual", isolation_level="isolated")],
llm=llm, tools=list(agent._tool_registry.get_tools().values()), tool_executor=agent._tool_registry.get_executor())
await runtime.start()
try:
@@ -564,7 +585,6 @@ import sys
from pathlib import Path
import pytest
import pytest_asyncio
_repo_root = Path(__file__).resolve().parents[3]
for _p in ["exports", "core"]:
@@ -576,18 +596,17 @@ AGENT_PATH = str(Path(__file__).resolve().parents[1])
@pytest.fixture(scope="session")
def mock_mode():
return True
def agent_module():
"""Import the agent package for structural validation."""
import importlib
return importlib.import_module(Path(AGENT_PATH).name)
@pytest_asyncio.fixture(scope="session")
async def runner(tmp_path_factory, mock_mode):
@pytest.fixture(scope="session")
def runner_loaded():
"""Load the agent through AgentRunner (structural only, no LLM needed)."""
from framework.runner.runner import AgentRunner
storage = tmp_path_factory.mktemp("agent_storage")
r = AgentRunner.load(AGENT_PATH, mock_mode=mock_mode, storage_path=storage)
r._setup()
yield r
await r.cleanup_async()
return AgentRunner.load(AGENT_PATH)
```
## entry_points Format
@@ -26,7 +26,7 @@ module-level variables via `getattr()`:
| `edges` | YES | `None` | **FATAL** — same error |
| `entry_node` | no | `nodes[0].id` | Probably wrong node |
| `entry_points` | no | `{}` | **Nodes unreachable** — validation fails |
| `terminal_nodes` | no | `[]` | OK for forever-alive |
| `terminal_nodes` | **YES** | `[]` | **FATAL** — graph must have at least one terminal node |
| `pause_nodes` | no | `[]` | OK |
| `conversation_mode` | no | not passed | Isolated mode (no context carryover) |
| `identity_prompt` | no | not passed | No agent-level identity |
@@ -72,7 +72,7 @@ goal = Goal(
| id | str | required | kebab-case identifier |
| name | str | required | Display name |
| description | str | required | What the node does |
| node_type | str | required | Always `"event_loop"` |
| node_type | str | required | `"event_loop"` or `"gcu"` (browser automation — see GCU Guide appendix) |
| input_keys | list[str] | required | Memory keys this node reads |
| output_keys | list[str] | required | Memory keys this node writes via set_output |
| system_prompt | str | "" | LLM instructions |
@@ -131,13 +131,19 @@ downstream node only sees the serialized summary string.
- A "report" node that presents analysis → merge into the client-facing node
- A "confirm" or "schedule" node that doesn't call any external service → remove
**Typical agent structure (3 nodes):**
**Typical agent structure (2 nodes):**
```
intake (client-facing) ←→ process (autonomous) ←→ review (client-facing)
process (autonomous) ←→ review (client-facing)
```
Or for simpler agents, just 2 nodes:
The queen owns intake — she gathers requirements from the user, then
passes structured input via `run_agent_with_input(task)`. When building
the agent, design the entry node's `input_keys` to match what the queen
will provide at run time. Worker agents should NOT have a client-facing
intake node. Client-facing nodes are for mid-execution review/approval only.
For simpler agents, just 1 autonomous node:
```
interact (client-facing) → process (autonomous) → interact (loop)
process (autonomous) — loops back to itself
```
### nullable_output_keys
@@ -159,8 +165,9 @@ review_node = NodeSpec(
)
```
### Forever-Alive Pattern
`terminal_nodes=[]` — every node has outgoing edges, graph loops until user exits.
### Continuous Loop Pattern
Mark the primary event_loop node as terminal: `terminal_nodes=["process"]`.
The node has `output_keys` and can complete when the agent finishes its work.
Use `conversation_mode="continuous"` to preserve context across transitions.
### set_output
@@ -186,16 +193,16 @@ condition_expr examples:
| Pattern | terminal_nodes | When |
|---------|---------------|------|
| **Forever-alive** | `[]` | **DEFAULT for all agents** |
| Linear | `["last-node"]` | Only if user explicitly requests one-shot/batch |
| **Continuous loop** | `["node-with-output-keys"]` | **DEFAULT for all agents** |
| Linear | `["last-node"]` | One-shot/batch agents |
**Forever-alive is the default.** Always use `terminal_nodes=[]`.
The framework default for `max_node_visits` is 0 (unbounded), so
nodes work correctly in forever-alive loops without explicit override.
Only set `max_node_visits > 0` in one-shot agents with feedback loops.
Every node must have at least one outgoing edge — no dead ends. The
user exits by closing the TUI. Only use terminal nodes if the user
explicitly asks for a batch/one-shot agent that runs once and exits.
**Every graph must have at least one terminal node.** Terminal nodes
define where execution ends. For interactive agents that loop continuously,
mark the primary event_loop node as terminal (it has `output_keys` and can
complete at any point). The framework default for `max_node_visits` is 0
(unbounded), so nodes work correctly in continuous loops without explicit
override. Only set `max_node_visits > 0` in one-shot agents with feedback loops.
Every node must have at least one outgoing edge — no dead ends.
## Continuous Conversation Mode
@@ -252,174 +259,54 @@ Judge is the SOLE acceptance mechanism — no ad-hoc framework gating.
## Async Entry Points (Webhooks, Timers, Events)
For agents that need to react to external events (incoming emails, scheduled
tasks, API calls), use `AsyncEntryPointSpec` and optionally `AgentRuntimeConfig`.
### Imports
```python
from framework.graph.edge import GraphSpec, AsyncEntryPointSpec
from framework.runtime.agent_runtime import AgentRuntime, AgentRuntimeConfig, create_agent_runtime
```
Note: `AsyncEntryPointSpec` is in `framework.graph.edge` (the graph/declarative layer).
`AgentRuntimeConfig` is in `framework.runtime.agent_runtime` (the runtime layer).
### AsyncEntryPointSpec Fields
| Field | Type | Default | Description |
|-------|------|---------|-------------|
| id | str | required | Unique identifier |
| name | str | required | Human-readable name |
| entry_node | str | required | Node ID to start execution from |
| trigger_type | str | `"manual"` | `webhook`, `api`, `timer`, `event`, `manual` |
| trigger_config | dict | `{}` | Trigger-specific config (see below) |
| isolation_level | str | `"shared"` | `isolated`, `shared`, `synchronized` |
| priority | int | `0` | Execution priority (higher = more priority) |
| max_concurrent | int | `10` | Max concurrent executions |
### Trigger Types
**timer** — Fires on a schedule. Two modes: cron expressions or fixed interval.
Cron (preferred for precise scheduling):
```python
AsyncEntryPointSpec(
id="daily-digest",
name="Daily Digest",
entry_node="check-node",
trigger_type="timer",
trigger_config={"cron": "0 9 * * *"}, # daily at 9am
isolation_level="shared",
max_concurrent=1,
)
```
- `cron` (str) — standard cron expression (5 fields: min hour dom month dow)
- Examples: `"0 9 * * *"` (daily 9am), `"0 9 * * MON-FRI"` (weekdays 9am), `"*/30 * * * *"` (every 30 min)
Fixed interval (simpler, for polling-style tasks):
```python
AsyncEntryPointSpec(
id="scheduled-check",
name="Scheduled Check",
entry_node="check-node",
trigger_type="timer",
trigger_config={"interval_minutes": 20, "run_immediately": False},
isolation_level="shared",
max_concurrent=1,
)
```
- `interval_minutes` (float) — how often to fire
- `run_immediately` (bool, default False) — fire once on startup
**event** — Subscribes to EventBus (e.g., webhook events):
```python
AsyncEntryPointSpec(
id="email-event",
name="Email Event Handler",
entry_node="process-emails",
trigger_type="event",
trigger_config={"event_types": ["webhook_received"]},
isolation_level="shared",
max_concurrent=10,
)
```
- `event_types` (list[str]) — EventType values to subscribe to
- `filter_stream` (str, optional) — only receive from this stream
- `filter_node` (str, optional) — only receive from this node
**webhook** — HTTP endpoint (requires AgentRuntimeConfig):
The webhook server publishes `WEBHOOK_RECEIVED` events on the EventBus.
An `event` trigger type with `event_types: ["webhook_received"]` subscribes
to those events. The flow is:
```
HTTP POST /webhooks/gmail → WebhookServer → EventBus (WEBHOOK_RECEIVED)
→ event entry point → triggers graph execution from entry_node
```
**manual** — Triggered programmatically via `runtime.trigger()`.
### Isolation Levels
| Level | Meaning |
|-------|---------|
| `isolated` | Private state per execution |
| `shared` | Eventual consistency — async executions can read primary session memory |
| `synchronized` | Shared with write locks (use when ordering matters) |
For most async patterns, use `shared` — the async execution reads the primary
session's memory (e.g., user-configured rules) and runs its own workflow.
### AgentRuntimeConfig (for webhook servers)
For agents that react to external events, use `AsyncEntryPointSpec`:
```python
from framework.graph.edge import AsyncEntryPointSpec
from framework.runtime.agent_runtime import AgentRuntimeConfig
# Timer trigger (cron or interval)
async_entry_points = [
AsyncEntryPointSpec(
id="daily-check",
name="Daily Check",
entry_node="process",
trigger_type="timer",
trigger_config={"cron": "0 9 * * *"}, # daily at 9am
isolation_level="shared",
)
]
# Webhook server (optional)
runtime_config = AgentRuntimeConfig(
webhook_host="127.0.0.1",
webhook_port=8080,
webhook_routes=[
{
"source_id": "gmail",
"path": "/webhooks/gmail",
"methods": ["POST"],
"secret": None, # Optional HMAC-SHA256 secret
},
],
)
```
`runtime_config` is a module-level variable read by `AgentRunner.load()`.
The runner passes it to `create_agent_runtime()`. On `runtime.start()`,
if webhook_routes is non-empty, an embedded HTTP server starts.
### Session Sharing
Timer and event triggers automatically call `_get_primary_session_state()`
before execution. This finds the active user-facing session and provides
its memory to the async execution, filtered to only the async entry node's
`input_keys`. This means the async flow can read user-configured values
(like rules, preferences) without needing separate configuration.
### Module-Level Variables
Agents with async entry points must export two additional variables:
```python
# In agent.py:
async_entry_points = [AsyncEntryPointSpec(...), ...]
runtime_config = AgentRuntimeConfig(...) # Only if using webhooks
```
Both must be re-exported from `__init__.py`:
```python
from .agent import (
..., async_entry_points, runtime_config,
webhook_routes=[{"source_id": "gmail", "path": "/webhooks/gmail", "methods": ["POST"]}],
)
```
### Reference Agent
### Key Fields
- `trigger_type`: `"timer"`, `"event"`, `"webhook"`, `"manual"`
- `trigger_config`: `{"cron": "0 9 * * *"}` or `{"interval_minutes": 20}`
- `isolation_level`: `"shared"` (recommended), `"isolated"`, `"synchronized"`
- `event_types`: For event triggers, e.g., `["webhook_received"]`
See `exports/gmail_inbox_guardian/agent.py` for a complete example with:
- Primary client-facing intake node (user configures rules)
- Timer-based scheduled inbox checks (every 20 min)
- Webhook-triggered email event handling
- Shared isolation for memory access across streams
### Exports Required
Both `async_entry_points` and `runtime_config` must be exported from `__init__.py`.
## Framework Capabilities
**Works well:** Multi-turn conversations, HITL review, tool orchestration, structured outputs, parallel execution, context management, error recovery, session persistence.
**Limitations:** LLM latency (2-10s/turn), context window limits (~128K), cost per run, rate limits, node boundaries lose context.
**Not designed for:** Sub-second responses, millions of items, real-time streaming, guaranteed determinism, offline/air-gapped.
See `exports/gmail_inbox_guardian/agent.py` for complete example.
## Tool Discovery
Do NOT rely on a static tool list — it will be outdated. Always use
`list_agent_tools()` to get available tool names grouped by category.
For full schemas with parameter details, use `discover_mcp_tools()`.
Do NOT rely on a static tool list — it will be outdated. Always call
`list_agent_tools()` with NO arguments first to see ALL available tools.
Only use `group=` or `output_schema=` as follow-up calls after seeing the
full list.
```
list_agent_tools() # all available tools
list_agent_tools("exports/my_agent/mcp_servers.json") # specific agent
discover_mcp_tools() # full schemas with params
list_agent_tools() # ALWAYS call this first
list_agent_tools(group="gmail", output_schema="full") # then drill into a category
list_agent_tools("exports/my_agent/mcp_servers.json") # specific agent's tools
```
After building, validate tools exist: `validate_agent_tools("exports/{name}")`
@@ -0,0 +1,119 @@
# GCU Browser Automation Guide
## When to Use GCU Nodes
Use `node_type="gcu"` when:
- The user's workflow requires **navigating real websites** (scraping, form-filling, social media interaction, testing web UIs)
- The task involves **dynamic/JS-rendered pages** that `web_scrape` cannot handle (SPAs, infinite scroll, login-gated content)
- The agent needs to **interact with a website** — clicking, typing, scrolling, selecting, uploading files
Do NOT use GCU for:
- Static content that `web_scrape` handles fine
- API-accessible data (use the API directly)
- PDF/file processing
- Anything that doesn't require a browser UI
## What GCU Nodes Are
- `node_type="gcu"` — a declarative enhancement over `event_loop`
- Framework auto-prepends browser best-practices system prompt
- Framework auto-includes all 31 browser tools from `gcu-tools` MCP server
- Same underlying `EventLoopNode` class — no new imports needed
- `tools=[]` is correct — tools are auto-populated at runtime
## GCU Architecture Pattern
GCU nodes are **subagents** — invoked via `delegate_to_sub_agent()`, not connected via edges.
- Primary nodes (`event_loop`, client-facing) orchestrate; GCU nodes do browser work
- Parent node declares `sub_agents=["gcu-node-id"]` and calls `delegate_to_sub_agent(agent_id="gcu-node-id", task="...")`
- GCU nodes set `max_node_visits=1` (single execution per delegation), `client_facing=False`
- GCU nodes use `output_keys=["result"]` and return structured JSON via `set_output("result", ...)`
## GCU Node Definition Template
```python
gcu_browser_node = NodeSpec(
id="gcu-browser-worker",
name="Browser Worker",
description="Browser subagent that does X.",
node_type="gcu",
client_facing=False,
max_node_visits=1,
input_keys=[],
output_keys=["result"],
tools=[], # Auto-populated with all browser tools
system_prompt="""\
You are a browser agent. Your job: [specific task].
## Workflow
1. browser_start (only if no browser is running yet)
2. browser_open(url=TARGET_URL) — note the returned targetId
3. browser_snapshot to read the page
4. [task-specific steps]
5. set_output("result", JSON)
## Output format
set_output("result", JSON) with:
- [field]: [type and description]
""",
)
```
## Parent Node Template (orchestrating GCU subagents)
```python
orchestrator_node = NodeSpec(
id="orchestrator",
...
node_type="event_loop",
sub_agents=["gcu-browser-worker"],
system_prompt="""\
...
delegate_to_sub_agent(
agent_id="gcu-browser-worker",
task="Navigate to [URL]. Do [specific task]. Return JSON with [fields]."
)
...
""",
tools=[], # Orchestrator doesn't need browser tools
)
```
## mcp_servers.json with GCU
```json
{
"hive-tools": { ... },
"gcu-tools": {
"transport": "stdio",
"command": "uv",
"args": ["run", "python", "-m", "gcu.server", "--stdio"],
"cwd": "../../tools",
"description": "GCU tools for browser automation"
}
}
```
Note: `gcu-tools` is auto-added if any node uses `node_type="gcu"`, but including it explicitly is fine.
## GCU System Prompt Best Practices
Key rules to bake into GCU node prompts:
- Prefer `browser_snapshot` over `browser_get_text("body")` — compact accessibility tree vs 100KB+ raw HTML
- Always `browser_wait` after navigation
- Use large scroll amounts (~2000-5000) for lazy-loaded content
- For spillover files, use `run_command` with grep, not `read_file`
- If auth wall detected, report immediately — don't attempt login
- Keep tool calls per turn ≤10
- Tab isolation: when browser is already running, use `browser_open(background=true)` and pass `target_id` to every call
## GCU Anti-Patterns
- Using `browser_screenshot` to read text (use `browser_snapshot`)
- Re-navigating after scrolling (resets scroll position)
- Attempting login on auth walls
- Forgetting `target_id` in multi-tab scenarios
- Putting browser tools directly on `event_loop` nodes instead of using GCU subagent pattern
- Making GCU nodes `client_facing=True` (they should be autonomous subagents)
+3 -3
View File
@@ -660,7 +660,7 @@ class GraphBuilder:
# Generate Python code
code = self._generate_code(graph)
Path(path).write_text(code)
Path(path).write_text(code, encoding="utf-8")
self.session.phase = BuildPhase.EXPORTED
self._save_session()
@@ -754,14 +754,14 @@ class GraphBuilder:
"""Save session to disk."""
self.session.updated_at = datetime.now()
path = self.storage_path / f"{self.session.id}.json"
path.write_text(self.session.model_dump_json(indent=2))
path.write_text(self.session.model_dump_json(indent=2), encoding="utf-8")
def _load_session(self, session_id: str) -> BuildSession:
"""Load session from disk."""
path = self.storage_path / f"{session_id}.json"
if not path.exists():
raise FileNotFoundError(f"Session not found: {session_id}")
return BuildSession.model_validate_json(path.read_text())
return BuildSession.model_validate_json(path.read_text(encoding="utf-8"))
@classmethod
def list_sessions(cls, storage_path: Path | str | None = None) -> list[str]:
+49 -3
View File
@@ -50,12 +50,14 @@ def get_max_tokens() -> int:
def get_api_key() -> str | None:
"""Return the API key, supporting env var, Claude Code subscription, and ZAI Code.
"""Return the API key, supporting env var, Claude Code subscription, Codex, and ZAI Code.
Priority:
1. Claude Code subscription (``use_claude_code_subscription: true``)
reads the OAuth token from ``~/.claude/.credentials.json``.
2. Environment variable named in ``api_key_env_var``.
2. Codex subscription (``use_codex_subscription: true``)
reads the OAuth token from macOS Keychain or ``~/.codex/auth.json``.
3. Environment variable named in ``api_key_env_var``.
"""
llm = get_hive_config().get("llm", {})
@@ -70,6 +72,17 @@ def get_api_key() -> str | None:
except ImportError:
pass
# Codex subscription: read OAuth token from Keychain / auth.json
if llm.get("use_codex_subscription"):
try:
from framework.runner.runner import get_codex_token
token = get_codex_token()
if token:
return token
except ImportError:
pass
# Standard env-var path (covers ZAI Code and all API-key providers)
api_key_env_var = llm.get("api_key_env_var")
if api_key_env_var:
@@ -77,9 +90,18 @@ def get_api_key() -> str | None:
return None
def get_gcu_enabled() -> bool:
"""Return whether GCU (browser automation) is enabled in user config."""
return get_hive_config().get("gcu_enabled", True)
def get_api_base() -> str | None:
"""Return the api_base URL for OpenAI-compatible endpoints, if configured."""
return get_hive_config().get("llm", {}).get("api_base")
llm = get_hive_config().get("llm", {})
if llm.get("use_codex_subscription"):
# Codex subscription routes through the ChatGPT backend, not api.openai.com.
return "https://chatgpt.com/backend-api/codex"
return llm.get("api_base")
def get_llm_extra_kwargs() -> dict[str, Any]:
@@ -88,6 +110,10 @@ def get_llm_extra_kwargs() -> dict[str, Any]:
When ``use_claude_code_subscription`` is enabled, returns
``extra_headers`` with the OAuth Bearer token so that litellm's
built-in Anthropic OAuth handler adds the required beta headers.
When ``use_codex_subscription`` is enabled, returns
``extra_headers`` with the Bearer token, ``ChatGPT-Account-Id``,
and ``store=False`` (required by the ChatGPT backend).
"""
llm = get_hive_config().get("llm", {})
if llm.get("use_claude_code_subscription"):
@@ -96,6 +122,26 @@ def get_llm_extra_kwargs() -> dict[str, Any]:
return {
"extra_headers": {"authorization": f"Bearer {api_key}"},
}
if llm.get("use_codex_subscription"):
api_key = get_api_key()
if api_key:
headers: dict[str, str] = {
"Authorization": f"Bearer {api_key}",
"User-Agent": "CodexBar",
}
try:
from framework.runner.runner import get_codex_account_id
account_id = get_codex_account_id()
if account_id:
headers["ChatGPT-Account-Id"] = account_id
except ImportError:
pass
return {
"extra_headers": headers,
"store": False,
"allowed_openai_params": ["store"],
}
return {}
+25 -3
View File
@@ -42,6 +42,14 @@ For Vault integration:
from core.framework.credentials.vault import HashiCorpVaultStorage
"""
from .key_storage import (
delete_aden_api_key,
generate_and_save_credential_key,
load_aden_api_key,
load_credential_key,
save_aden_api_key,
save_credential_key,
)
from .models import (
CredentialDecryptionError,
CredentialError,
@@ -63,7 +71,7 @@ from .setup import (
CredentialSetupSession,
MissingCredential,
SetupResult,
detect_missing_credentials_from_nodes,
load_agent_nodes,
run_credential_setup_cli,
)
from .storage import (
@@ -75,7 +83,12 @@ from .storage import (
)
from .store import CredentialStore
from .template import TemplateResolver
from .validation import ensure_credential_key_env, validate_agent_credentials
from .validation import (
CredentialStatus,
CredentialValidationResult,
ensure_credential_key_env,
validate_agent_credentials,
)
# Aden sync components (lazy import to avoid httpx dependency when not needed)
# Usage: from core.framework.credentials.aden import AdenSyncProvider
@@ -127,14 +140,23 @@ __all__ = [
"CredentialRefreshError",
"CredentialValidationError",
"CredentialDecryptionError",
# Key storage (bootstrap credentials)
"load_credential_key",
"save_credential_key",
"generate_and_save_credential_key",
"load_aden_api_key",
"save_aden_api_key",
"delete_aden_api_key",
# Validation
"ensure_credential_key_env",
"validate_agent_credentials",
"CredentialStatus",
"CredentialValidationResult",
# Interactive setup
"CredentialSetupSession",
"MissingCredential",
"SetupResult",
"detect_missing_credentials_from_nodes",
"load_agent_nodes",
"run_credential_setup_cli",
# Aden sync (optional - requires httpx)
"AdenSyncProvider",
+2 -2
View File
@@ -26,7 +26,7 @@ Usage:
storage = AdenCachedStorage(
local_storage=EncryptedFileStorage(),
aden_provider=provider,
cache_ttl_seconds=300, # Re-check Aden every 5 minutes
cache_ttl_seconds=600, # Re-check Aden every 5 minutes
)
# Create store
@@ -77,7 +77,7 @@ class AdenCachedStorage(CredentialStorage):
storage = AdenCachedStorage(
local_storage=EncryptedFileStorage(),
aden_provider=provider,
cache_ttl_seconds=300, # 5 minutes
cache_ttl_seconds=00, # 5 minutes
)
store = CredentialStore(
+201
View File
@@ -0,0 +1,201 @@
"""
Dedicated file-based storage for bootstrap credentials.
HIVE_CREDENTIAL_KEY -> ~/.hive/secrets/credential_key (plain text, chmod 600)
ADEN_API_KEY -> ~/.hive/credentials/ (encrypted via EncryptedFileStorage)
Boot order:
1. load_credential_key() -- reads/generates the Fernet key, sets os.environ
2. load_aden_api_key() -- uses the encrypted store (which needs the key from step 1)
"""
from __future__ import annotations
import logging
import os
import stat
from pathlib import Path
logger = logging.getLogger(__name__)
CREDENTIAL_KEY_PATH = Path.home() / ".hive" / "secrets" / "credential_key"
CREDENTIAL_KEY_ENV_VAR = "HIVE_CREDENTIAL_KEY"
ADEN_CREDENTIAL_ID = "aden_api_key"
ADEN_ENV_VAR = "ADEN_API_KEY"
# ---------------------------------------------------------------------------
# HIVE_CREDENTIAL_KEY
# ---------------------------------------------------------------------------
def load_credential_key() -> str | None:
"""Load HIVE_CREDENTIAL_KEY with priority: env > file > shell config.
Sets ``os.environ["HIVE_CREDENTIAL_KEY"]`` as a side-effect when found.
Returns the key string, or ``None`` if unavailable everywhere.
"""
# 1. Already in environment (set by parent process, CI, Windows Registry, etc.)
key = os.environ.get(CREDENTIAL_KEY_ENV_VAR)
if key:
return key
# 2. Dedicated secrets file
key = _read_credential_key_file()
if key:
os.environ[CREDENTIAL_KEY_ENV_VAR] = key
return key
# 3. Shell config fallback (backward compat for old installs)
key = _read_from_shell_config(CREDENTIAL_KEY_ENV_VAR)
if key:
os.environ[CREDENTIAL_KEY_ENV_VAR] = key
return key
return None
def save_credential_key(key: str) -> Path:
"""Save HIVE_CREDENTIAL_KEY to ``~/.hive/secrets/credential_key``.
Creates parent dirs with mode 700, writes the file with mode 600.
Also sets ``os.environ["HIVE_CREDENTIAL_KEY"]``.
Returns:
The path that was written.
"""
path = CREDENTIAL_KEY_PATH
path.parent.mkdir(parents=True, exist_ok=True)
# Restrict the secrets directory itself
path.parent.chmod(stat.S_IRWXU) # 0o700
path.write_text(key, encoding="utf-8")
path.chmod(stat.S_IRUSR | stat.S_IWUSR) # 0o600
os.environ[CREDENTIAL_KEY_ENV_VAR] = key
return path
def generate_and_save_credential_key() -> str:
"""Generate a new Fernet key and persist it to ``~/.hive/secrets/credential_key``.
Returns:
The generated key string.
"""
from cryptography.fernet import Fernet
key = Fernet.generate_key().decode()
save_credential_key(key)
return key
# ---------------------------------------------------------------------------
# ADEN_API_KEY
# ---------------------------------------------------------------------------
def load_aden_api_key() -> str | None:
"""Load ADEN_API_KEY with priority: env > encrypted store > shell config.
**Must** be called after ``load_credential_key()`` because the encrypted
store depends on HIVE_CREDENTIAL_KEY.
Sets ``os.environ["ADEN_API_KEY"]`` as a side-effect when found.
Returns the key string, or ``None`` if unavailable everywhere.
"""
# 1. Already in environment
key = os.environ.get(ADEN_ENV_VAR)
if key:
return key
# 2. Encrypted credential store
key = _read_aden_from_encrypted_store()
if key:
os.environ[ADEN_ENV_VAR] = key
return key
# 3. Shell config fallback (backward compat)
key = _read_from_shell_config(ADEN_ENV_VAR)
if key:
os.environ[ADEN_ENV_VAR] = key
return key
return None
def save_aden_api_key(key: str) -> None:
"""Save ADEN_API_KEY to the encrypted credential store.
Also sets ``os.environ["ADEN_API_KEY"]``.
"""
from pydantic import SecretStr
from .models import CredentialKey, CredentialObject
from .storage import EncryptedFileStorage
storage = EncryptedFileStorage()
cred = CredentialObject(
id=ADEN_CREDENTIAL_ID,
keys={"api_key": CredentialKey(name="api_key", value=SecretStr(key))},
)
storage.save(cred)
os.environ[ADEN_ENV_VAR] = key
def delete_aden_api_key() -> None:
"""Remove ADEN_API_KEY from the encrypted store and ``os.environ``."""
try:
from .storage import EncryptedFileStorage
storage = EncryptedFileStorage()
storage.delete(ADEN_CREDENTIAL_ID)
except Exception:
logger.debug("Could not delete %s from encrypted store", ADEN_CREDENTIAL_ID)
os.environ.pop(ADEN_ENV_VAR, None)
# ---------------------------------------------------------------------------
# Internal helpers
# ---------------------------------------------------------------------------
def _read_credential_key_file() -> str | None:
"""Read the credential key from ``~/.hive/secrets/credential_key``."""
try:
if CREDENTIAL_KEY_PATH.is_file():
value = CREDENTIAL_KEY_PATH.read_text(encoding="utf-8").strip()
if value:
return value
except Exception:
logger.debug("Could not read %s", CREDENTIAL_KEY_PATH)
return None
def _read_from_shell_config(env_var: str) -> str | None:
"""Fallback: read an env var from ~/.zshrc or ~/.bashrc."""
try:
from aden_tools.credentials.shell_config import check_env_var_in_shell_config
found, value = check_env_var_in_shell_config(env_var)
if found and value:
return value
except ImportError:
pass
return None
def _read_aden_from_encrypted_store() -> str | None:
"""Try to load ADEN_API_KEY from the encrypted credential store."""
if not os.environ.get(CREDENTIAL_KEY_ENV_VAR):
return None
try:
from .storage import EncryptedFileStorage
storage = EncryptedFileStorage()
cred = storage.load(ADEN_CREDENTIAL_ID)
if cred:
return cred.get_key("api_key")
except Exception:
logger.debug("Could not load %s from encrypted store", ADEN_CREDENTIAL_ID)
return None
@@ -73,6 +73,7 @@ from .provider import (
TokenExpiredError,
TokenPlacement,
)
from .zoho_provider import ZohoOAuth2Provider
__all__ = [
# Types
@@ -82,6 +83,7 @@ __all__ = [
# Providers
"BaseOAuth2Provider",
"HubSpotOAuth2Provider",
"ZohoOAuth2Provider",
# Lifecycle
"TokenLifecycleManager",
"TokenRefreshResult",
@@ -0,0 +1,198 @@
"""
Zoho CRM-specific OAuth2 provider.
Pre-configured for Zoho's OAuth2 endpoints and CRM scopes.
Extends BaseOAuth2Provider for Zoho-specific behavior.
Usage:
provider = ZohoOAuth2Provider(
client_id="your-client-id",
client_secret="your-client-secret",
accounts_domain="https://accounts.zoho.com", # or .in, .eu, etc.
)
# Use with credential store
store = CredentialStore(
storage=EncryptedFileStorage(),
providers=[provider],
)
See: https://www.zoho.com/crm/developer/docs/api/v2/access-refresh.html
"""
from __future__ import annotations
import logging
import os
from typing import Any
from ..models import CredentialObject, CredentialRefreshError, CredentialType
from .base_provider import BaseOAuth2Provider
from .provider import OAuth2Config, OAuth2Token, TokenPlacement
logger = logging.getLogger(__name__)
# Default CRM scopes for Phase 1 (Leads, Contacts, Accounts, Deals, Notes)
ZOHO_DEFAULT_SCOPES = [
"ZohoCRM.modules.leads.ALL",
"ZohoCRM.modules.contacts.ALL",
"ZohoCRM.modules.accounts.ALL",
"ZohoCRM.modules.deals.ALL",
"ZohoCRM.modules.notes.CREATE",
]
class ZohoOAuth2Provider(BaseOAuth2Provider):
"""
Zoho CRM OAuth2 provider with pre-configured endpoints.
Handles Zoho-specific OAuth2 behavior:
- Pre-configured token and authorization URLs (region-aware)
- Default CRM scopes for Leads, Contacts, Accounts, Deals, Notes
- Token validation via Zoho CRM API
- Authorization header format: "Authorization: Zoho-oauthtoken {token}"
Example:
provider = ZohoOAuth2Provider(
client_id="your-zoho-client-id",
client_secret="your-zoho-client-secret",
accounts_domain="https://accounts.zoho.com", # US
# or "https://accounts.zoho.in" for India
# or "https://accounts.zoho.eu" for EU
)
"""
def __init__(
self,
client_id: str,
client_secret: str,
accounts_domain: str = "https://accounts.zoho.com",
api_domain: str | None = None,
scopes: list[str] | None = None,
):
"""
Initialize Zoho OAuth2 provider.
Args:
client_id: Zoho OAuth2 client ID
client_secret: Zoho OAuth2 client secret
accounts_domain: Zoho accounts domain (region-specific)
- US: https://accounts.zoho.com
- India: https://accounts.zoho.in
- EU: https://accounts.zoho.eu
- etc.
api_domain: Zoho API domain for CRM calls (used in validate).
Defaults to ZOHO_API_DOMAIN env or https://www.zohoapis.com
scopes: Override default scopes if needed
"""
base = accounts_domain.rstrip("/")
token_url = f"{base}/oauth/v2/token"
auth_url = f"{base}/oauth/v2/auth"
config = OAuth2Config(
token_url=token_url,
authorization_url=auth_url,
client_id=client_id,
client_secret=client_secret,
default_scopes=scopes or ZOHO_DEFAULT_SCOPES,
token_placement=TokenPlacement.HEADER_CUSTOM,
custom_header_name="Authorization",
)
super().__init__(config, provider_id="zoho_crm_oauth2")
self._accounts_domain = base
self._api_domain = (
api_domain or os.getenv("ZOHO_API_DOMAIN", "https://www.zohoapis.com")
).rstrip("/")
@property
def supported_types(self) -> list[CredentialType]:
return [CredentialType.OAUTH2]
def format_for_request(self, token: OAuth2Token) -> dict[str, Any]:
"""
Format token for Zoho CRM API requests.
Zoho uses Authorization header: "Zoho-oauthtoken {access_token}"
(not Bearer).
"""
return {
"headers": {
"Authorization": f"Zoho-oauthtoken {token.access_token}",
"Content-Type": "application/json",
"Accept": "application/json",
}
}
def validate(self, credential: CredentialObject) -> bool:
"""
Validate Zoho credential by making a lightweight API call.
Uses GET /crm/v2/users?type=CurrentUser (doesn't require module access).
Treats 429 as valid-but-rate-limited.
"""
access_token = credential.get_key("access_token")
if not access_token:
return False
try:
client = self._get_client()
response = client.get(
f"{self._api_domain}/crm/v2/users?type=CurrentUser",
headers={
"Authorization": f"Zoho-oauthtoken {access_token}",
"Accept": "application/json",
},
timeout=self.config.request_timeout,
)
return response.status_code in (200, 429)
except Exception as e:
logger.debug("Zoho credential validation failed: %s", e)
return False
def _parse_token_response(self, response_data: dict[str, Any]) -> OAuth2Token:
"""
Parse Zoho token response.
Zoho returns:
{
"access_token": "...",
"refresh_token": "...",
"expires_in": 3600,
"api_domain": "https://www.zohoapis.com",
"token_type": "Bearer"
}
"""
token = OAuth2Token.from_token_response(response_data)
if "api_domain" in response_data:
token.raw_response["api_domain"] = response_data["api_domain"]
return token
def refresh(self, credential: CredentialObject) -> CredentialObject:
"""Refresh Zoho OAuth2 credential and persist DC metadata."""
refresh_tok = credential.get_key("refresh_token")
if not refresh_tok:
raise CredentialRefreshError(f"Credential '{credential.id}' has no refresh_token")
try:
new_token = self.refresh_access_token(refresh_tok)
except Exception as e:
raise CredentialRefreshError(f"Failed to refresh '{credential.id}': {e}") from e
credential.set_key("access_token", new_token.access_token, expires_at=new_token.expires_at)
if new_token.refresh_token and new_token.refresh_token != refresh_tok:
credential.set_key("refresh_token", new_token.refresh_token)
api_domain = new_token.raw_response.get("api_domain")
if isinstance(api_domain, str) and api_domain:
credential.set_key("api_domain", api_domain.rstrip("/"))
accounts_server = new_token.raw_response.get("accounts-server")
if isinstance(accounts_server, str) and accounts_server:
credential.set_key("accounts_domain", accounts_server.rstrip("/"))
location = new_token.raw_response.get("location")
if isinstance(location, str) and location:
credential.set_key("location", location.strip().lower())
return credential
+35 -181
View File
@@ -160,7 +160,10 @@ class CredentialSetupSession:
@classmethod
def from_nodes(cls, nodes: list[NodeSpec]) -> CredentialSetupSession:
"""Create a setup session by detecting missing credentials from nodes."""
missing = detect_missing_credentials_from_nodes(nodes)
from framework.credentials.validation import _status_to_missing, validate_agent_credentials
result = validate_agent_credentials(nodes, verify=False, raise_on_error=False)
missing = [_status_to_missing(c) for c in result.credentials if not c.available]
return cls(missing)
@classmethod
@@ -178,22 +181,15 @@ class CredentialSetupSession:
are NOT yet available. If False, include all required
credentials regardless of availability.
"""
agent_path = Path(agent_path)
from framework.credentials.validation import _status_to_missing, validate_agent_credentials
# Load agent to get nodes
agent_json = agent_path / "agent.json"
agent_py = agent_path / "agent.py"
nodes = []
if agent_py.exists():
# Python-based agent
nodes = _load_nodes_from_python_agent(agent_path)
elif agent_json.exists():
# JSON-based agent
nodes = _load_nodes_from_json_agent(agent_json)
creds = detect_missing_credentials_from_nodes(nodes, missing_only=missing_only)
return cls(creds)
nodes = load_agent_nodes(agent_path)
result = validate_agent_credentials(nodes, verify=False, raise_on_error=False)
if missing_only:
missing = [_status_to_missing(c) for c in result.credentials if not c.available]
else:
missing = [_status_to_missing(c) for c in result.credentials]
return cls(missing)
def run_interactive(self) -> SetupResult:
"""Run the interactive setup flow."""
@@ -260,57 +256,23 @@ class CredentialSetupSession:
def _ensure_credential_key(self) -> bool:
"""Ensure HIVE_CREDENTIAL_KEY is available for encrypted storage."""
if os.environ.get("HIVE_CREDENTIAL_KEY"):
from .key_storage import generate_and_save_credential_key, load_credential_key
if load_credential_key():
return True
# Try to load from shell config
try:
from aden_tools.credentials.shell_config import check_env_var_in_shell_config
found, value = check_env_var_in_shell_config("HIVE_CREDENTIAL_KEY")
if found and value:
os.environ["HIVE_CREDENTIAL_KEY"] = value
return True
except ImportError:
pass
# Generate a new key
self._print(f"{Colors.YELLOW}Initializing credential store...{Colors.NC}")
try:
from cryptography.fernet import Fernet
generated_key = Fernet.generate_key().decode()
os.environ["HIVE_CREDENTIAL_KEY"] = generated_key
# Save to shell config
self._save_key_to_shell_config(generated_key)
generate_and_save_credential_key()
self._print(
f"{Colors.GREEN}✓ Encryption key saved to ~/.hive/secrets/credential_key{Colors.NC}"
)
return True
except Exception as e:
self._print(f"{Colors.RED}Failed to initialize credential store: {e}{Colors.NC}")
return False
def _save_key_to_shell_config(self, key: str) -> None:
"""Save HIVE_CREDENTIAL_KEY to shell config."""
try:
from aden_tools.credentials.shell_config import (
add_env_var_to_shell_config,
)
success, config_path = add_env_var_to_shell_config(
"HIVE_CREDENTIAL_KEY",
key,
comment="Encryption key for Hive credential store",
)
if success:
self._print(f"{Colors.GREEN}✓ Encryption key saved to {config_path}{Colors.NC}")
except Exception:
# Fallback: just tell the user
self._print("\n")
self._print(
f"{Colors.YELLOW}Add this to your shell config (~/.zshrc or ~/.bashrc):{Colors.NC}"
)
self._print(f' export HIVE_CREDENTIAL_KEY="{key}"')
def _setup_single_credential(self, cred: MissingCredential) -> bool:
"""Set up a single credential. Returns True if configured."""
self._print(f"\n{Colors.CYAN}{'' * 60}{Colors.NC}")
@@ -448,19 +410,10 @@ class CredentialSetupSession:
self._print(f"{Colors.YELLOW}No key entered. Skipping.{Colors.NC}")
return False
os.environ["ADEN_API_KEY"] = aden_key
# Persist to encrypted store and set os.environ
from .key_storage import save_aden_api_key
# Save to shell config
try:
from aden_tools.credentials.shell_config import add_env_var_to_shell_config
add_env_var_to_shell_config(
"ADEN_API_KEY",
aden_key,
comment="Aden Platform API key",
)
except Exception:
pass
save_aden_api_key(aden_key)
# Sync from Aden
try:
@@ -564,123 +517,24 @@ class CredentialSetupSession:
self._print("")
def detect_missing_credentials_from_nodes(
nodes: list,
*,
missing_only: bool = True,
) -> list[MissingCredential]:
"""
Detect credentials required by a list of nodes.
def load_agent_nodes(agent_path: str | Path) -> list:
"""Load NodeSpec list from an agent's agent.py or agent.json.
Args:
nodes: List of NodeSpec objects
missing_only: If True (default), only return credentials that are
NOT yet available. If False, return ALL required credentials
regardless of availability.
agent_path: Path to agent directory.
Returns:
List of MissingCredential objects for credentials that need setup
(or all required credentials when missing_only=False).
List of NodeSpec objects (empty list if agent can't be loaded).
"""
try:
from aden_tools.credentials import CREDENTIAL_SPECS
agent_path = Path(agent_path)
agent_py = agent_path / "agent.py"
agent_json = agent_path / "agent.json"
from framework.credentials import CredentialStore
from framework.credentials.storage import (
CompositeStorage,
EncryptedFileStorage,
EnvVarStorage,
)
except ImportError:
return []
# Collect required tools and node types
required_tools: set[str] = set()
node_types: set[str] = set()
for node in nodes:
if hasattr(node, "tools") and node.tools:
required_tools.update(node.tools)
if hasattr(node, "node_type"):
node_types.add(node.node_type)
# Build credential store to check availability.
# Env vars take priority over encrypted store (fresh key wins over stale).
env_mapping = {
(spec.credential_id or name): spec.env_var for name, spec in CREDENTIAL_SPECS.items()
}
env_storage = EnvVarStorage(env_mapping=env_mapping)
if os.environ.get("HIVE_CREDENTIAL_KEY"):
storage = CompositeStorage(primary=env_storage, fallbacks=[EncryptedFileStorage()])
else:
storage = env_storage
store = CredentialStore(storage=storage)
# Build reverse mappings
tool_to_cred: dict[str, str] = {}
node_type_to_cred: dict[str, str] = {}
for cred_name, spec in CREDENTIAL_SPECS.items():
for tool_name in spec.tools:
tool_to_cred[tool_name] = cred_name
for nt in spec.node_types:
node_type_to_cred[nt] = cred_name
missing: list[MissingCredential] = []
checked: set[str] = set()
# Check tool credentials
for tool_name in sorted(required_tools):
cred_name = tool_to_cred.get(tool_name)
if cred_name is None or cred_name in checked:
continue
checked.add(cred_name)
spec = CREDENTIAL_SPECS[cred_name]
cred_id = spec.credential_id or cred_name
if spec.required and (not missing_only or not store.is_available(cred_id)):
affected_tools = sorted(t for t in required_tools if t in spec.tools)
missing.append(
MissingCredential(
credential_name=cred_name,
env_var=spec.env_var,
description=spec.description,
help_url=spec.help_url,
api_key_instructions=spec.api_key_instructions,
tools=affected_tools,
aden_supported=spec.aden_supported,
direct_api_key_supported=spec.direct_api_key_supported,
credential_id=spec.credential_id,
credential_key=spec.credential_key,
)
)
# Check node type credentials
for nt in sorted(node_types):
cred_name = node_type_to_cred.get(nt)
if cred_name is None or cred_name in checked:
continue
checked.add(cred_name)
spec = CREDENTIAL_SPECS[cred_name]
cred_id = spec.credential_id or cred_name
if spec.required and (not missing_only or not store.is_available(cred_id)):
affected_types = sorted(t for t in node_types if t in spec.node_types)
missing.append(
MissingCredential(
credential_name=cred_name,
env_var=spec.env_var,
description=spec.description,
help_url=spec.help_url,
api_key_instructions=spec.api_key_instructions,
node_types=affected_types,
aden_supported=spec.aden_supported,
direct_api_key_supported=spec.direct_api_key_supported,
credential_id=spec.credential_id,
credential_key=spec.credential_key,
)
)
return missing
if agent_py.exists():
return _load_nodes_from_python_agent(agent_path)
elif agent_json.exists():
return _load_nodes_from_json_agent(agent_json)
return []
def _load_nodes_from_python_agent(agent_path: Path) -> list:
@@ -714,7 +568,7 @@ def _load_nodes_from_python_agent(agent_path: Path) -> list:
def _load_nodes_from_json_agent(agent_json: Path) -> list:
"""Load nodes from a JSON-based agent."""
try:
with open(agent_json) as f:
with open(agent_json, encoding="utf-8") as f:
data = json.load(f)
from framework.graph import NodeSpec
+3 -3
View File
@@ -227,7 +227,7 @@ class EncryptedFileStorage(CredentialStorage):
index_path = self.base_path / "metadata" / "index.json"
if not index_path.exists():
return []
with open(index_path) as f:
with open(index_path, encoding="utf-8") as f:
index = json.load(f)
return list(index.get("credentials", {}).keys())
@@ -268,7 +268,7 @@ class EncryptedFileStorage(CredentialStorage):
index_path = self.base_path / "metadata" / "index.json"
if index_path.exists():
with open(index_path) as f:
with open(index_path, encoding="utf-8") as f:
index = json.load(f)
else:
index = {"credentials": {}, "version": "1.0"}
@@ -283,7 +283,7 @@ class EncryptedFileStorage(CredentialStorage):
index["last_modified"] = datetime.now(UTC).isoformat()
with open(index_path, "w") as f:
with open(index_path, "w", encoding="utf-8") as f:
json.dump(index, f, indent=2)
+315 -153
View File
@@ -14,56 +14,165 @@ logger = logging.getLogger(__name__)
def ensure_credential_key_env() -> None:
"""Load credentials from shell config if not in environment.
"""Load bootstrap credentials into ``os.environ``.
The quickstart.sh and setup-credentials skill write API keys to ~/.zshrc
or ~/.bashrc. If the user hasn't sourced their config in the current shell,
this reads them directly so the runner (and any MCP subprocesses) can use them.
Priority chain for each credential:
1. ``os.environ`` (already set nothing to do)
2. Dedicated file storage (``~/.hive/secrets/`` or encrypted store)
3. Shell config fallback (``~/.zshrc`` / ``~/.bashrc``) for backward compat
Loads:
- HIVE_CREDENTIAL_KEY (encrypted credential store)
- ADEN_API_KEY (Aden OAuth sync)
- All LLM API keys (ANTHROPIC_API_KEY, OPENAI_API_KEY, ZAI_API_KEY, etc.)
Boot order matters: HIVE_CREDENTIAL_KEY must load BEFORE ADEN_API_KEY
because the encrypted store depends on it.
Remaining LLM/tool API keys still load from shell config.
"""
from .key_storage import load_aden_api_key, load_credential_key
# Step 1: HIVE_CREDENTIAL_KEY (must come first — encrypted store depends on it)
load_credential_key()
# Step 2: ADEN_API_KEY (uses encrypted store, then shell config fallback)
load_aden_api_key()
# Step 3: Load remaining LLM/tool API keys from shell config
try:
from aden_tools.credentials.shell_config import check_env_var_in_shell_config
except ImportError:
return
# Core credentials that are always checked
env_vars_to_load = ["HIVE_CREDENTIAL_KEY", "ADEN_API_KEY"]
# Add all LLM/tool API keys from CREDENTIAL_SPECS
try:
from aden_tools.credentials import CREDENTIAL_SPECS
for spec in CREDENTIAL_SPECS.values():
if spec.env_var and spec.env_var not in env_vars_to_load:
env_vars_to_load.append(spec.env_var)
var_name = spec.env_var
if var_name and var_name not in ("HIVE_CREDENTIAL_KEY", "ADEN_API_KEY"):
if not os.environ.get(var_name):
found, value = check_env_var_in_shell_config(var_name)
if found and value:
os.environ[var_name] = value
logger.debug("Loaded %s from shell config", var_name)
except ImportError:
pass
for var_name in env_vars_to_load:
if os.environ.get(var_name):
continue
found, value = check_env_var_in_shell_config(var_name)
if found and value:
os.environ[var_name] = value
logger.debug("Loaded %s from shell config", var_name)
@dataclass
class CredentialStatus:
"""Status of a single required credential after validation."""
credential_name: str
credential_id: str
env_var: str
description: str
help_url: str
api_key_instructions: str
tools: list[str]
node_types: list[str]
available: bool
valid: bool | None # None = not checked
validation_message: str | None
aden_supported: bool
direct_api_key_supported: bool
credential_key: str
aden_not_connected: bool # Aden-only cred, ADEN_API_KEY set, but integration missing
alternative_group: str | None = None # non-None when multiple providers can satisfy a tool
@dataclass
class _CredentialCheck:
"""Result of checking a single credential."""
class CredentialValidationResult:
"""Result of validating all credentials required by an agent."""
env_var: str
source: str
used_by: str
available: bool
help_url: str = ""
credentials: list[CredentialStatus]
has_aden_key: bool
@property
def failed(self) -> list[CredentialStatus]:
"""Credentials that are missing, invalid, or Aden-not-connected.
For alternative groups (multi-provider tools like send_email), the group
is satisfied if ANY member is available and valid only report failures
when the entire group is unsatisfied.
"""
# Check which alternative groups are satisfied
alt_satisfied: dict[str, bool] = {}
for c in self.credentials:
if not c.alternative_group:
continue
if c.alternative_group not in alt_satisfied:
alt_satisfied[c.alternative_group] = False
if c.available and c.valid is not False:
alt_satisfied[c.alternative_group] = True
result = []
for c in self.credentials:
if c.alternative_group:
# Skip if any alternative in the group is satisfied
if alt_satisfied.get(c.alternative_group, False):
continue
if not c.available or c.valid is False:
result.append(c)
else:
if not c.available or c.valid is False:
result.append(c)
return result
@property
def has_errors(self) -> bool:
return bool(self.failed)
@property
def failed_cred_names(self) -> list[str]:
"""Credential names that need (re-)collection, excluding Aden-not-connected."""
return [c.credential_name for c in self.failed if not c.aden_not_connected]
def format_error_message(self) -> str:
"""Format a human-readable error message for CLI/runner output."""
missing = [c for c in self.credentials if not c.available and not c.aden_not_connected]
invalid = [c for c in self.credentials if c.available and c.valid is False]
aden_nc = [c for c in self.credentials if c.aden_not_connected]
lines: list[str] = []
if missing:
lines.append("Missing credentials:\n")
for c in missing:
entry = f" {c.env_var} for {_label(c)}"
if c.help_url:
entry += f"\n Get it at: {c.help_url}"
lines.append(entry)
if invalid:
if missing:
lines.append("")
lines.append("Invalid or expired credentials:\n")
for c in invalid:
entry = f" {c.env_var} for {_label(c)}{c.validation_message}"
if c.help_url:
entry += f"\n Get a new key at: {c.help_url}"
lines.append(entry)
if aden_nc:
if missing or invalid:
lines.append("")
lines.append(
"Aden integrations not connected "
"(ADEN_API_KEY is set but OAuth tokens unavailable):\n"
)
for c in aden_nc:
lines.append(
f" {c.env_var} for {_label(c)}"
f"\n Connect this integration at hive.adenhq.com first."
)
lines.append("\nIf you've already set up credentials, restart your terminal to load them.")
return "\n".join(lines)
def _presync_aden_tokens(credential_specs: dict) -> None:
def _label(c: CredentialStatus) -> str:
"""Build a human-readable label from tools/node_types."""
if c.tools:
return ", ".join(c.tools)
if c.node_types:
return ", ".join(c.node_types) + " nodes"
return c.credential_name
def _presync_aden_tokens(credential_specs: dict, *, force: bool = False) -> None:
"""Sync Aden-backed OAuth tokens into env vars for validation.
When ADEN_API_KEY is available, fetches fresh OAuth tokens from the Aden
@@ -71,6 +180,11 @@ def _presync_aden_tokens(credential_specs: dict) -> None:
tokens instead of stale or mis-stored values in the encrypted store.
Only touches credentials that are ``aden_supported`` AND whose env var
is not already set (so explicit user exports always win).
Args:
force: When True, overwrite env vars that are already set. Used by
the credentials modal to pick up freshly reauthorized tokens
from Aden instead of reusing stale values from a prior sync.
"""
from framework.credentials.store import CredentialStore
@@ -83,7 +197,7 @@ def _presync_aden_tokens(credential_specs: dict) -> None:
for name, spec in credential_specs.items():
if not spec.aden_supported:
continue
if os.environ.get(spec.env_var):
if not force and os.environ.get(spec.env_var):
continue # Already set — don't overwrite
cred_id = spec.credential_id or name
# sync_all() already fetched everything available from Aden.
@@ -112,7 +226,13 @@ def _presync_aden_tokens(credential_specs: dict) -> None:
)
def validate_agent_credentials(nodes: list, quiet: bool = False, verify: bool = True) -> None:
def validate_agent_credentials(
nodes: list,
quiet: bool = False,
verify: bool = True,
raise_on_error: bool = True,
force_refresh: bool = False,
) -> CredentialValidationResult:
"""Check that required credentials are available and valid before running an agent.
Two-phase validation:
@@ -124,15 +244,30 @@ def validate_agent_credentials(nodes: list, quiet: bool = False, verify: bool =
nodes: List of NodeSpec objects from the agent graph.
quiet: If True, suppress the credential summary output.
verify: If True (default), run health checks on present credentials.
raise_on_error: If True (default), raise CredentialError when validation
fails. Set to False to get the result without raising.
force_refresh: If True, force re-sync of Aden OAuth tokens even when
env vars are already set. Used by the credentials modal after
reauthorization.
Returns:
CredentialValidationResult with status of ALL required credentials.
"""
empty_result = CredentialValidationResult(credentials=[], has_aden_key=False)
# Collect required tools and node types
required_tools = {tool for node in nodes if node.tools for tool in node.tools}
node_types = {node.node_type for node in nodes}
required_tools: set[str] = set()
node_types: set[str] = set()
for node in nodes:
if hasattr(node, "tools") and node.tools:
required_tools.update(node.tools)
if hasattr(node, "node_type"):
node_types.add(node.node_type)
try:
from aden_tools.credentials import CREDENTIAL_SPECS
except ImportError:
return # aden_tools not installed, skip check
return empty_result # aden_tools not installed, skip check
from framework.credentials.storage import CompositeStorage, EncryptedFileStorage, EnvVarStorage
from framework.credentials.store import CredentialStore
@@ -145,7 +280,7 @@ def validate_agent_credentials(nodes: list, quiet: bool = False, verify: bool =
# into env vars so validation sees fresh tokens instead of stale values
# in the encrypted store (e.g., a previously mis-stored google.enc).
if os.environ.get("ADEN_API_KEY"):
_presync_aden_tokens(CREDENTIAL_SPECS)
_presync_aden_tokens(CREDENTIAL_SPECS, force=force_refresh)
env_mapping = {
(spec.credential_id or name): spec.env_var for name, spec in CREDENTIAL_SPECS.items()
@@ -157,57 +292,114 @@ def validate_agent_credentials(nodes: list, quiet: bool = False, verify: bool =
storage = env_storage
store = CredentialStore(storage=storage)
# Build reverse mappings
tool_to_cred: dict[str, str] = {}
# Build reverse mappings — 1:many for multi-provider tools (e.g. send_email → resend OR google)
tool_to_creds: dict[str, list[str]] = {}
node_type_to_cred: dict[str, str] = {}
for cred_name, spec in CREDENTIAL_SPECS.items():
for tool_name in spec.tools:
tool_to_cred[tool_name] = cred_name
tool_to_creds.setdefault(tool_name, []).append(cred_name)
for nt in spec.node_types:
node_type_to_cred[nt] = cred_name
missing: list[str] = []
invalid: list[str] = []
# Aden-backed creds where ADEN_API_KEY is set but integration not connected
aden_not_connected: list[str] = []
failed_cred_names: list[str] = [] # all cred names that need (re-)collection
has_aden_key = bool(os.environ.get("ADEN_API_KEY"))
checked: set[str] = set()
all_credentials: list[CredentialStatus] = []
# Credentials that are present and should be health-checked
to_verify: list[tuple[str, str]] = [] # (cred_name, used_by_label)
to_verify: list[int] = [] # indices into all_credentials
def _check_credential(spec, cred_name: str, label: str) -> None:
def _check_credential(
spec,
cred_name: str,
affected_tools: list[str],
affected_node_types: list[str],
alternative_group: str | None = None,
) -> None:
cred_id = spec.credential_id or cred_name
if not store.is_available(cred_id):
# If ADEN_API_KEY is set and this is an Aden-only credential,
# the issue is that the integration isn't connected on hive.adenhq.com,
# NOT that the user needs to re-enter ADEN_API_KEY.
if has_aden_key and spec.aden_supported and not spec.direct_api_key_supported:
aden_not_connected.append(
f" {spec.env_var} for {label}"
f"\n Connect this integration at hive.adenhq.com first."
)
else:
entry = f" {spec.env_var} for {label}"
if spec.help_url:
entry += f"\n Get it at: {spec.help_url}"
missing.append(entry)
failed_cred_names.append(cred_name)
elif verify and spec.health_check_endpoint:
to_verify.append((cred_name, label))
available = store.is_available(cred_id)
# Aden-not-connected: ADEN_API_KEY set, Aden-only cred, but integration missing
is_aden_nc = (
not available
and has_aden_key
and spec.aden_supported
and not spec.direct_api_key_supported
)
status = CredentialStatus(
credential_name=cred_name,
credential_id=cred_id,
env_var=spec.env_var,
description=spec.description,
help_url=spec.help_url,
api_key_instructions=getattr(spec, "api_key_instructions", ""),
tools=affected_tools,
node_types=affected_node_types,
available=available,
valid=None,
validation_message=None,
aden_supported=spec.aden_supported,
direct_api_key_supported=spec.direct_api_key_supported,
credential_key=spec.credential_key,
aden_not_connected=is_aden_nc,
alternative_group=alternative_group,
)
all_credentials.append(status)
if available and verify and spec.health_check_endpoint:
to_verify.append(len(all_credentials) - 1)
# Check tool credentials
for tool_name in sorted(required_tools):
cred_name = tool_to_cred.get(tool_name)
if cred_name is None or cred_name in checked:
cred_names = tool_to_creds.get(tool_name)
if cred_names is None:
continue
checked.add(cred_name)
spec = CREDENTIAL_SPECS[cred_name]
if not spec.required:
# Filter to credentials we haven't already checked
unchecked = [cn for cn in cred_names if cn not in checked]
if not unchecked:
continue
affected = sorted(t for t in required_tools if t in spec.tools)
label = ", ".join(affected)
_check_credential(spec, cred_name, label)
# Single provider — existing behavior
if len(unchecked) == 1:
cred_name = unchecked[0]
checked.add(cred_name)
spec = CREDENTIAL_SPECS[cred_name]
if not spec.required:
continue
affected = sorted(t for t in required_tools if t in spec.tools)
_check_credential(spec, cred_name, affected_tools=affected, affected_node_types=[])
continue
# Multi-provider (e.g. send_email → resend OR google):
# satisfied if ANY provider credential is available.
available_cn = None
for cn in unchecked:
spec = CREDENTIAL_SPECS[cn]
cred_id = spec.credential_id or cn
if store.is_available(cred_id):
available_cn = cn
break
if available_cn is not None:
# Found an available provider — check (and health-check) it
checked.add(available_cn)
spec = CREDENTIAL_SPECS[available_cn]
affected = sorted(t for t in required_tools if t in spec.tools)
_check_credential(spec, available_cn, affected_tools=affected, affected_node_types=[])
else:
# None available — report ALL alternatives so the modal can show them
group_key = tool_name # e.g. "send_email"
for cn in unchecked:
checked.add(cn)
spec = CREDENTIAL_SPECS[cn]
affected = sorted(t for t in required_tools if t in spec.tools)
_check_credential(
spec,
cn,
affected_tools=affected,
affected_node_types=[],
alternative_group=group_key,
)
# Check node type credentials (e.g., ANTHROPIC_API_KEY for LLM nodes)
for nt in sorted(node_types):
@@ -219,8 +411,7 @@ def validate_agent_credentials(nodes: list, quiet: bool = False, verify: bool =
if not spec.required:
continue
affected_types = sorted(t for t in node_types if t in spec.node_types)
label = ", ".join(affected_types) + " nodes"
_check_credential(spec, cred_name, label)
_check_credential(spec, cred_name, affected_tools=[], affected_node_types=affected_types)
# Phase 2: health-check present credentials
if to_verify:
@@ -230,69 +421,52 @@ def validate_agent_credentials(nodes: list, quiet: bool = False, verify: bool =
check_credential_health = None # type: ignore[assignment]
if check_credential_health is not None:
for cred_name, label in to_verify:
spec = CREDENTIAL_SPECS[cred_name]
cred_id = spec.credential_id or cred_name
value = store.get(cred_id)
for idx in to_verify:
status = all_credentials[idx]
spec = CREDENTIAL_SPECS[status.credential_name]
value = store.get(status.credential_id)
if not value:
continue
try:
result = check_credential_health(
cred_name,
status.credential_name,
value,
health_check_endpoint=spec.health_check_endpoint,
health_check_method=spec.health_check_method,
)
if not result.valid:
entry = f" {spec.env_var} for {label}{result.message}"
if spec.help_url:
entry += f"\n Get a new key at: {spec.help_url}"
invalid.append(entry)
failed_cred_names.append(cred_name)
elif result.valid:
status.valid = result.valid
status.validation_message = result.message
if result.valid:
# Persist identity from health check (best-effort)
identity_data = result.details.get("identity")
if identity_data and isinstance(identity_data, dict):
try:
cred_obj = store.get_credential(cred_id, refresh_if_needed=False)
cred_obj = store.get_credential(
status.credential_id, refresh_if_needed=False
)
if cred_obj:
cred_obj.set_identity(**identity_data)
store.save_credential(cred_obj)
except Exception:
pass # Identity persistence is best-effort
except Exception as exc:
logger.debug("Health check for %s failed: %s", cred_name, exc)
logger.debug("Health check for %s failed: %s", status.credential_name, exc)
errors = missing + invalid + aden_not_connected
if errors:
validation_result = CredentialValidationResult(
credentials=all_credentials,
has_aden_key=has_aden_key,
)
if raise_on_error and validation_result.has_errors:
from framework.credentials.models import CredentialError
lines: list[str] = []
if missing:
lines.append("Missing credentials:\n")
lines.extend(missing)
if invalid:
if missing:
lines.append("")
lines.append("Invalid or expired credentials:\n")
lines.extend(invalid)
if aden_not_connected:
if missing or invalid:
lines.append("")
lines.append(
"Aden integrations not connected "
"(ADEN_API_KEY is set but OAuth tokens unavailable):\n"
)
lines.extend(aden_not_connected)
lines.append(
"\nTo fix: run /hive-credentials in Claude Code."
"\nIf you've already set up credentials, "
"restart your terminal to load them."
)
exc = CredentialError("\n".join(lines))
exc.failed_cred_names = failed_cred_names # type: ignore[attr-defined]
exc = CredentialError(validation_result.format_error_message())
exc.validation_result = validation_result # type: ignore[attr-defined]
exc.failed_cred_names = validation_result.failed_cred_names # type: ignore[attr-defined]
raise exc
return validation_result
def build_setup_session_from_error(
credential_error: Exception,
@@ -301,56 +475,44 @@ def build_setup_session_from_error(
):
"""Build a ``CredentialSetupSession`` that covers all failed credentials.
``validate_agent_credentials`` attaches ``failed_cred_names`` (both missing
and invalid) to the ``CredentialError``. This helper converts those names
into ``MissingCredential`` entries so the setup screen can re-collect them.
Falls back to the normal ``from_nodes`` / ``from_agent_path`` detection
when the attribute is absent.
Uses the ``CredentialValidationResult`` attached to the ``CredentialError``
when available. Falls back to re-detecting from nodes / agent_path.
Args:
credential_error: The ``CredentialError`` raised by validation.
nodes: Graph nodes (preferred avoids re-loading from disk).
agent_path: Agent directory path (used when nodes aren't available).
"""
from framework.credentials.setup import CredentialSetupSession, MissingCredential
from framework.credentials.setup import CredentialSetupSession
# Start with normal detection (picks up truly missing creds)
# Prefer the validation result attached to the exception
result: CredentialValidationResult | None = getattr(credential_error, "validation_result", None)
if result is not None:
missing = [_status_to_missing(c) for c in result.failed]
return CredentialSetupSession(missing)
# Fallback: re-detect from nodes or agent_path
if nodes is not None:
session = CredentialSetupSession.from_nodes(nodes)
return CredentialSetupSession.from_nodes(nodes)
elif agent_path is not None:
session = CredentialSetupSession.from_agent_path(agent_path)
else:
session = CredentialSetupSession(missing=[])
return CredentialSetupSession.from_agent_path(agent_path)
return CredentialSetupSession(missing=[])
# Add credentials that are present but failed health checks
already = {m.credential_name for m in session.missing}
failed_names: list[str] = getattr(credential_error, "failed_cred_names", [])
if failed_names:
try:
from aden_tools.credentials import CREDENTIAL_SPECS
for name in failed_names:
if name in already:
continue
spec = CREDENTIAL_SPECS.get(name)
if spec is None:
continue
session.missing.append(
MissingCredential(
credential_name=name,
env_var=spec.env_var,
description=spec.description,
help_url=spec.help_url,
api_key_instructions=spec.api_key_instructions,
tools=list(spec.tools),
aden_supported=spec.aden_supported,
direct_api_key_supported=spec.direct_api_key_supported,
credential_id=spec.credential_id,
credential_key=spec.credential_key,
)
)
except ImportError:
pass
def _status_to_missing(c: CredentialStatus):
"""Convert a CredentialStatus to a MissingCredential for the setup flow."""
from framework.credentials.setup import MissingCredential
return session
return MissingCredential(
credential_name=c.credential_name,
env_var=c.env_var,
description=c.description,
help_url=c.help_url,
api_key_instructions=c.api_key_instructions,
tools=c.tools,
node_types=c.node_types,
aden_supported=c.aden_supported,
direct_api_key_supported=c.direct_api_key_supported,
credential_id=c.credential_id,
credential_key=c.credential_key,
)
+6 -1
View File
@@ -46,9 +46,11 @@ class ActiveNodeClientIO(NodeClientIO):
self,
node_id: str,
event_bus: EventBus | None = None,
execution_id: str = "",
) -> None:
self.node_id = node_id
self._event_bus = event_bus
self._execution_id = execution_id
self._output_queue: asyncio.Queue[str | None] = asyncio.Queue()
self._output_snapshot = ""
@@ -66,6 +68,7 @@ class ActiveNodeClientIO(NodeClientIO):
node_id=self.node_id,
content=content,
snapshot=self._output_snapshot,
execution_id=self._execution_id or None,
)
if is_final:
@@ -83,6 +86,7 @@ class ActiveNodeClientIO(NodeClientIO):
stream_id=self.node_id,
node_id=self.node_id,
prompt=prompt,
execution_id=self._execution_id or None,
)
try:
@@ -158,11 +162,12 @@ class ClientIOGateway:
def __init__(self, event_bus: EventBus | None = None) -> None:
self._event_bus = event_bus
def create_io(self, node_id: str, client_facing: bool) -> NodeClientIO:
def create_io(self, node_id: str, client_facing: bool, execution_id: str = "") -> NodeClientIO:
if client_facing:
return ActiveNodeClientIO(
node_id=node_id,
event_bus=self._event_bus,
execution_id=execution_id,
)
return InertNodeClientIO(
node_id=node_id,
+366 -8
View File
@@ -5,6 +5,7 @@ from __future__ import annotations
import json
import re
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Literal, Protocol, runtime_checkable
@@ -90,15 +91,133 @@ class Message:
def _extract_spillover_filename(content: str) -> str | None:
"""Extract spillover filename from a truncated tool result.
"""Extract spillover filename from a tool result annotation.
Matches the pattern produced by EventLoopNode._truncate_tool_result():
"saved to 'tool_github_list_stargazers_abc123.txt'"
Matches patterns produced by EventLoopNode._truncate_tool_result():
- Large result: "saved to 'web_search_1.txt'"
- Small result: "[Saved to 'web_search_1.txt']"
"""
match = re.search(r"saved to '([^']+)'", content)
match = re.search(r"[Ss]aved to '([^']+)'", content)
return match.group(1) if match else None
_TC_ARG_LIMIT = 200 # max chars per tool_call argument after compaction
def _compact_tool_calls(tool_calls: list[dict[str, Any]]) -> list[dict[str, Any]]:
"""Truncate tool_call arguments to save context tokens during compaction.
Preserves ``id``, ``type``, and ``function.name`` exactly. When arguments
exceed ``_TC_ARG_LIMIT``, replaces the full JSON string with a compact
**valid** JSON summary. The Anthropic API parses tool_call arguments and
rejects requests with malformed JSON (e.g. unterminated strings), so we
must never produce broken JSON here.
"""
compact = []
for tc in tool_calls:
func = tc.get("function", {})
args = func.get("arguments", "")
if len(args) > _TC_ARG_LIMIT:
# Build a valid JSON summary instead of slicing mid-string.
# Try to extract top-level keys for a meaningful preview.
try:
parsed = json.loads(args)
if isinstance(parsed, dict):
# Preserve key names, truncate values
summary_parts = []
for k, v in parsed.items():
v_str = str(v)
if len(v_str) > 60:
v_str = v_str[:60] + "..."
summary_parts.append(f"{k}={v_str}")
summary = ", ".join(summary_parts)
if len(summary) > _TC_ARG_LIMIT:
summary = summary[:_TC_ARG_LIMIT] + "..."
args = json.dumps({"_compacted": summary})
else:
args = json.dumps({"_compacted": str(parsed)[:_TC_ARG_LIMIT]})
except (json.JSONDecodeError, TypeError):
# Args were already invalid JSON — wrap the preview safely
args = json.dumps({"_compacted": args[:_TC_ARG_LIMIT]})
compact.append(
{
"id": tc.get("id", ""),
"type": tc.get("type", "function"),
"function": {
"name": func.get("name", ""),
"arguments": args,
},
}
)
return compact
def extract_tool_call_history(messages: list[Message], max_entries: int = 30) -> str:
"""Build a compact tool call history from a list of messages.
Used in compaction summaries to prevent the LLM from re-calling
tools it already called. Extracts tool call details, files saved,
outputs set, and errors encountered.
"""
tool_calls_detail: dict[str, list[str]] = {}
files_saved: list[str] = []
outputs_set: list[str] = []
errors: list[str] = []
def _summarize_input(name: str, args: dict) -> str:
if name == "web_search":
return args.get("query", "")
if name == "web_scrape":
return args.get("url", "")
if name in ("load_data", "save_data"):
return args.get("filename", "")
return ""
for msg in messages:
if msg.role == "assistant" and msg.tool_calls:
for tc in msg.tool_calls:
func = tc.get("function", {})
name = func.get("name", "unknown")
try:
args = json.loads(func.get("arguments", "{}"))
except (json.JSONDecodeError, TypeError):
args = {}
summary = _summarize_input(name, args)
tool_calls_detail.setdefault(name, []).append(summary)
if name == "save_data" and args.get("filename"):
files_saved.append(args["filename"])
if name == "set_output" and args.get("key"):
outputs_set.append(args["key"])
if msg.role == "tool" and msg.is_error:
preview = msg.content[:120].replace("\n", " ")
errors.append(preview)
parts: list[str] = []
if tool_calls_detail:
lines: list[str] = []
for name, inputs in list(tool_calls_detail.items())[:max_entries]:
count = len(inputs)
non_empty = [s for s in inputs if s]
if non_empty:
detail_lines = [f" - {s[:120]}" for s in non_empty[:8]]
lines.append(f" {name} ({count}x):\n" + "\n".join(detail_lines))
else:
lines.append(f" {name} ({count}x)")
parts.append("TOOLS ALREADY CALLED:\n" + "\n".join(lines))
if files_saved:
unique = list(dict.fromkeys(files_saved))
parts.append("FILES SAVED: " + ", ".join(unique))
if outputs_set:
unique = list(dict.fromkeys(outputs_set))
parts.append("OUTPUTS SET: " + ", ".join(unique))
if errors:
parts.append("ERRORS (do NOT retry these):\n" + "\n".join(f" - {e}" for e in errors[:10]))
return "\n\n".join(parts)
# ---------------------------------------------------------------------------
# ConversationStore protocol (Phase 2)
# ---------------------------------------------------------------------------
@@ -320,9 +439,36 @@ class NodeConversation:
def _repair_orphaned_tool_calls(
msgs: list[dict[str, Any]],
) -> list[dict[str, Any]]:
"""Ensure every tool_call has a matching tool-result message."""
"""Ensure tool_call / tool_result pairs are consistent.
1. **Orphaned tool results** (tool_result with no preceding tool_use)
are dropped. This happens when compaction removes an assistant
message but leaves its tool-result messages behind.
2. **Orphaned tool calls** (tool_use with no following tool_result)
get a synthetic error result appended. This happens when a loop
is cancelled mid-tool-execution.
"""
# Pass 1: collect all tool_call IDs from assistant messages so we
# can identify orphaned tool-result messages.
all_tool_call_ids: set[str] = set()
for m in msgs:
if m.get("role") == "assistant":
for tc in m.get("tool_calls") or []:
tc_id = tc.get("id")
if tc_id:
all_tool_call_ids.add(tc_id)
# Pass 2: build repaired list — drop orphaned tool results, patch
# missing tool results.
repaired: list[dict[str, Any]] = []
for i, m in enumerate(msgs):
# Drop tool-result messages whose tool_call_id has no matching
# tool_use in any assistant message (orphaned by compaction).
if m.get("role") == "tool":
tid = m.get("tool_call_id")
if tid and tid not in all_tool_call_ids:
continue # skip orphaned result
repaired.append(m)
tool_calls = m.get("tool_calls")
if m.get("role") != "assistant" or not tool_calls:
@@ -353,12 +499,20 @@ class NodeConversation:
"""Best available token estimate.
Uses actual API input token count when available (set via
:meth:`update_token_count`), otherwise falls back to the rough
``total_chars / 4`` heuristic.
:meth:`update_token_count`), otherwise falls back to a
``total_chars / 4`` heuristic that includes both message content
AND tool_call argument sizes.
"""
if self._last_api_input_tokens is not None:
return self._last_api_input_tokens
total_chars = sum(len(m.content) for m in self._messages)
total_chars = 0
for m in self._messages:
total_chars += len(m.content)
if m.tool_calls:
for tc in m.tool_calls:
func = tc.get("function", {})
total_chars += len(func.get("arguments", ""))
total_chars += len(func.get("name", ""))
return total_chars // 4
def update_token_count(self, actual_input_tokens: int) -> None:
@@ -587,6 +741,210 @@ class NodeConversation:
self._messages = [summary_msg] + recent_messages
self._last_api_input_tokens = None # reset; next LLM call will recalibrate
async def compact_preserving_structure(
self,
spillover_dir: str,
keep_recent: int = 4,
phase_graduated: bool = False,
aggressive: bool = False,
) -> None:
"""Structure-preserving compaction: save freeform text to file, keep tool messages.
Unlike ``compact()`` which replaces ALL old messages with a single LLM
summary, this method preserves the tool call structure (assistant
messages with tool_calls + tool result messages) that are already tiny
after pruning. Only freeform text exchanges (user messages,
text-only assistant messages) are saved to a file and removed.
When *aggressive* is True, non-essential tool call pairs are also
collapsed into a compact summary instead of being kept individually.
Only ``set_output`` calls and error results are preserved; all other
old tool pairs are replaced by a tool-call history summary.
The result: the agent retains exact knowledge of what tools it called,
where each result is stored, and can load the conversation text if
needed. No LLM summary call. No heuristics. Nothing lost.
"""
if not self._messages:
return
total = len(self._messages)
# Determine split point (same logic as compact)
if phase_graduated and self._current_phase:
split = self._find_phase_graduated_split()
else:
split = None
if split is None:
keep_recent = max(0, min(keep_recent, total - 1))
split = total - keep_recent if keep_recent > 0 else total
# Advance split past orphaned tool results at the boundary
while split < total and self._messages[split].role == "tool":
split += 1
if split == 0:
return
old_messages = self._messages[:split]
# Classify old messages: structural (keep) vs freeform (save to file)
kept_structural: list[Message] = []
freeform_lines: list[str] = []
collapsed_msgs: list[Message] = []
if aggressive:
# Aggressive: only keep set_output tool pairs and error results.
# Everything else is collapsed into a tool-call history summary.
# We need to track tool_call IDs to pair assistant messages with
# their tool results.
protected_tc_ids: set[str] = set()
collapsible_tc_ids: set[str] = set()
# First pass: classify assistant messages
for msg in old_messages:
if msg.role != "assistant" or not msg.tool_calls:
continue
has_protected = any(
tc.get("function", {}).get("name") == "set_output" for tc in msg.tool_calls
)
tc_ids = {tc.get("id", "") for tc in msg.tool_calls}
if has_protected:
protected_tc_ids |= tc_ids
else:
collapsible_tc_ids |= tc_ids
# Second pass: classify all messages
for msg in old_messages:
if msg.role == "tool":
tc_id = msg.tool_use_id or ""
if tc_id in protected_tc_ids:
kept_structural.append(msg)
elif msg.is_error:
# Error results are always protected
kept_structural.append(msg)
# Protect the parent assistant message too
protected_tc_ids.add(tc_id)
else:
collapsed_msgs.append(msg)
elif msg.role == "assistant" and msg.tool_calls:
tc_ids = {tc.get("id", "") for tc in msg.tool_calls}
if tc_ids & protected_tc_ids:
# Has at least one protected tool call — keep entire msg
compact_tcs = _compact_tool_calls(msg.tool_calls)
kept_structural.append(
Message(
seq=msg.seq,
role=msg.role,
content="",
tool_calls=compact_tcs,
is_error=msg.is_error,
phase_id=msg.phase_id,
is_transition_marker=msg.is_transition_marker,
)
)
else:
collapsed_msgs.append(msg)
else:
# Freeform text — save to file
role_label = msg.role
text = msg.content
if len(text) > 2000:
text = text[:2000] + ""
freeform_lines.append(f"[{role_label}] (seq={msg.seq}): {text}")
else:
# Standard mode: keep all tool call pairs as structural
for msg in old_messages:
if msg.role == "tool":
kept_structural.append(msg)
elif msg.role == "assistant" and msg.tool_calls:
compact_tcs = _compact_tool_calls(msg.tool_calls)
kept_structural.append(
Message(
seq=msg.seq,
role=msg.role,
content="",
tool_calls=compact_tcs,
is_error=msg.is_error,
phase_id=msg.phase_id,
is_transition_marker=msg.is_transition_marker,
)
)
else:
role_label = msg.role
text = msg.content
if len(text) > 2000:
text = text[:2000] + ""
freeform_lines.append(f"[{role_label}] (seq={msg.seq}): {text}")
# Write freeform text to a numbered conversation file
spill_path = Path(spillover_dir)
spill_path.mkdir(parents=True, exist_ok=True)
# Find next conversation file number
existing = sorted(spill_path.glob("conversation_*.md"))
next_n = len(existing) + 1
conv_filename = f"conversation_{next_n}.md"
if freeform_lines:
header = f"## Compacted conversation (messages 1-{split})\n\n"
conv_text = header + "\n\n".join(freeform_lines)
(spill_path / conv_filename).write_text(conv_text, encoding="utf-8")
else:
# Nothing to save — skip file creation
conv_filename = ""
# Build reference message
ref_parts: list[str] = []
if conv_filename:
ref_parts.append(
f"[Previous conversation saved to '{conv_filename}'. "
f"Use load_data('{conv_filename}') to review if needed.]"
)
elif not collapsed_msgs:
ref_parts.append("[Previous freeform messages compacted.]")
# Aggressive: add collapsed tool-call history to the reference
if collapsed_msgs:
tool_history = extract_tool_call_history(collapsed_msgs)
if tool_history:
ref_parts.append(tool_history)
elif not ref_parts:
ref_parts.append("[Previous tool calls compacted.]")
ref_content = "\n\n".join(ref_parts)
# Use a seq just before the first kept message
recent_messages = list(self._messages[split:])
if kept_structural:
ref_seq = kept_structural[0].seq - 1
elif recent_messages:
ref_seq = recent_messages[0].seq - 1
else:
ref_seq = self._next_seq
self._next_seq += 1
ref_msg = Message(seq=ref_seq, role="user", content=ref_content)
# Persist: delete old messages from store, write reference + kept structural.
# In aggressive mode, collapsed messages may be interspersed with kept
# messages, so we delete everything before the recent boundary and
# rewrite only what we want to keep.
if self._store:
recent_boundary = recent_messages[0].seq if recent_messages else self._next_seq
await self._store.delete_parts_before(recent_boundary)
# Write the reference message
await self._store.write_part(ref_msg.seq, ref_msg.to_storage_dict())
# Write kept structural messages (they may have been modified)
for msg in kept_structural:
await self._store.write_part(msg.seq, msg.to_storage_dict())
await self._store.write_cursor({"next_seq": self._next_seq})
# Reassemble: reference + kept structural (in original order) + recent
self._messages = [ref_msg] + kept_structural + recent_messages
self._last_api_input_tokens = None
def _find_phase_graduated_split(self) -> int | None:
"""Find split point that preserves current + previous phase.
+32 -7
View File
@@ -103,7 +103,12 @@ FEEDBACK: (reason if RETRY, empty if ACCEPT)"""
def _extract_recent_context(conversation: NodeConversation, max_messages: int = 10) -> str:
"""Extract recent conversation messages for evaluation."""
"""Extract recent conversation messages for evaluation.
Includes tool-call summaries from assistant messages so the judge
can see what tools were invoked (especially set_output values) even
when the assistant message body is empty.
"""
messages = conversation.messages
recent = messages[-max_messages:] if len(messages) > max_messages else messages
@@ -112,8 +117,24 @@ def _extract_recent_context(conversation: NodeConversation, max_messages: int =
role = msg.role.upper()
content = msg.content or ""
# Truncate long tool results
if msg.role == "tool" and len(content) > 200:
content = content[:200] + "..."
if msg.role == "tool" and len(content) > 500:
content = content[:500] + "..."
# For assistant messages with empty content but tool_calls,
# summarise the tool calls so the judge knows what happened.
if msg.role == "assistant" and not content.strip():
tool_calls = getattr(msg, "tool_calls", None)
if tool_calls:
tc_parts = []
for tc in tool_calls:
fn = tc.get("function", {}) if isinstance(tc, dict) else {}
name = fn.get("name", "")
args = fn.get("arguments", "")
if name == "set_output":
# Show the value so the judge can evaluate content quality
tc_parts.append(f" called {name}({args[:1000]})")
else:
tc_parts.append(f" called {name}(...)")
content = "Tool calls:\n" + "\n".join(tc_parts)
if content.strip():
parts.append(f"[{role}]: {content.strip()}")
@@ -125,6 +146,10 @@ def _format_outputs(accumulator_state: dict[str, Any]) -> str:
Lists and dicts get structural formatting so the judge can assess
quantity and structure, not just a truncated stringification.
String values are given a generous limit (2000 chars) so the judge
can verify substantive content (e.g. a research brief with key
questions, scope boundaries, and deliverables).
"""
if not accumulator_state:
return "(none)"
@@ -144,12 +169,12 @@ def _format_outputs(accumulator_state: dict[str, Any]) -> str:
val_str += f"\n ... and {len(value) - 8} more"
elif isinstance(value, dict):
val_str = str(value)
if len(val_str) > 400:
val_str = val_str[:400] + "..."
if len(val_str) > 2000:
val_str = val_str[:2000] + "..."
else:
val_str = str(value)
if len(val_str) > 300:
val_str = val_str[:300] + "..."
if len(val_str) > 2000:
val_str = val_str[:2000] + "..."
parts.append(f" {key}: {val_str}")
return "\n".join(parts)
+63 -2
View File
@@ -338,6 +338,10 @@ class AsyncEntryPointSpec(BaseModel):
max_concurrent: int = Field(
default=10, description="Maximum concurrent executions for this entry point"
)
max_resurrections: int = Field(
default=3,
description="Auto-restart on non-fatal failure (0 to disable)",
)
model_config = {"extra": "allow"}
@@ -427,8 +431,7 @@ class GraphSpec(BaseModel):
max_tokens: int = Field(default=None) # resolved by _resolve_max_tokens validator
# Cleanup LLM for JSON extraction fallback (fast/cheap model preferred)
# If not set, uses CEREBRAS_API_KEY -> cerebras/llama-3.3-70b or
# ANTHROPIC_API_KEY -> claude-haiku-4-5 as fallback
# If not set, uses CEREBRAS_API_KEY -> cerebras/llama-3.3-70b
cleanup_llm_model: str | None = None
# Execution limits
@@ -615,6 +618,13 @@ class GraphSpec(BaseModel):
if not self.get_node(term):
errors.append(f"Terminal node '{term}' not found")
# Require at least one terminal node (graphs must have termination points)
if not self.terminal_nodes:
errors.append(
"Graph must have at least one terminal node in 'terminal_nodes'. "
"Every graph needs a termination point where execution ends."
)
# Check edge references
for edge in self.edges:
if not self.get_node(edge.source):
@@ -644,6 +654,13 @@ class GraphSpec(BaseModel):
for edge in self.get_outgoing_edges(current):
to_visit.append(edge.target)
# Also mark sub-agents as reachable (they're invoked via delegate_to_sub_agent, not edges)
for node in self.nodes:
if node.id in reachable:
sub_agents = getattr(node, "sub_agents", []) or []
for sub_agent_id in sub_agents:
reachable.add(sub_agent_id)
# Build set of async entry point nodes for quick lookup
async_entry_nodes = {ep.entry_node for ep in self.async_entry_points}
@@ -695,4 +712,48 @@ class GraphSpec(BaseModel):
else:
seen_keys[key] = node_id
# GCU nodes must only be used as subagents
gcu_node_ids = {n.id for n in self.nodes if n.node_type == "gcu"}
if gcu_node_ids:
# GCU nodes must not be entry nodes
if self.entry_node in gcu_node_ids:
errors.append(
f"GCU node '{self.entry_node}' is used as entry node. "
"GCU nodes must only be used as subagents via delegate_to_sub_agent()."
)
# GCU nodes must not be terminal nodes
for term in self.terminal_nodes:
if term in gcu_node_ids:
errors.append(
f"GCU node '{term}' is used as terminal node. "
"GCU nodes must only be used as subagents."
)
# GCU nodes must not be connected via edges
for edge in self.edges:
if edge.source in gcu_node_ids:
errors.append(
f"GCU node '{edge.source}' is used as edge source (edge '{edge.id}'). "
"GCU nodes must only be used as subagents, not connected via edges."
)
if edge.target in gcu_node_ids:
errors.append(
f"GCU node '{edge.target}' is used as edge target (edge '{edge.id}'). "
"GCU nodes must only be used as subagents, not connected via edges."
)
# GCU nodes must be referenced in at least one parent's sub_agents
referenced_subagents = set()
for node in self.nodes:
for sa_id in node.sub_agents or []:
referenced_subagents.add(sa_id)
orphaned = gcu_node_ids - referenced_subagents
for nid in orphaned:
errors.append(
f"GCU node '{nid}' is not referenced in any node's sub_agents list. "
"GCU nodes must be declared as subagents of a parent node."
)
return errors
File diff suppressed because it is too large Load Diff
+241 -100
View File
@@ -26,7 +26,6 @@ from framework.graph.node import (
NodeSpec,
SharedMemory,
)
from framework.graph.output_cleaner import CleansingConfig, OutputCleaner
from framework.graph.validator import OutputValidator
from framework.llm.provider import LLMProvider, Tool
from framework.observability import set_trace_context
@@ -126,7 +125,6 @@ class GraphExecutor:
tool_executor: Callable | None = None,
node_registry: dict[str, NodeProtocol] | None = None,
approval_callback: Callable | None = None,
cleansing_config: CleansingConfig | None = None,
enable_parallel_execution: bool = True,
parallel_config: ParallelExecutionConfig | None = None,
event_bus: Any | None = None,
@@ -138,6 +136,8 @@ class GraphExecutor:
accounts_prompt: str = "",
accounts_data: list[dict] | None = None,
tool_provider_map: dict[str, str] | None = None,
dynamic_tools_provider: Callable | None = None,
dynamic_prompt_provider: Callable | None = None,
):
"""
Initialize the executor.
@@ -149,7 +149,6 @@ class GraphExecutor:
tool_executor: Function to execute tools
node_registry: Custom node implementations by ID
approval_callback: Optional callback for human-in-the-loop approval
cleansing_config: Optional output cleansing configuration
enable_parallel_execution: Enable parallel fan-out execution (default True)
parallel_config: Configuration for parallel execution behavior
event_bus: Optional event bus for emitting node lifecycle events
@@ -160,6 +159,10 @@ class GraphExecutor:
accounts_prompt: Connected accounts block for system prompt injection
accounts_data: Raw account data for per-node prompt generation
tool_provider_map: Tool name to provider name mapping for account routing
dynamic_tools_provider: Optional callback returning current
tool list (for mode switching)
dynamic_prompt_provider: Optional callback returning current
system prompt (for phase switching)
"""
self.runtime = runtime
self.llm = llm
@@ -178,13 +181,8 @@ class GraphExecutor:
self.accounts_prompt = accounts_prompt
self.accounts_data = accounts_data
self.tool_provider_map = tool_provider_map
# Initialize output cleaner
self.cleansing_config = cleansing_config or CleansingConfig()
self.output_cleaner = OutputCleaner(
config=self.cleansing_config,
llm_provider=llm,
)
self.dynamic_tools_provider = dynamic_tools_provider
self.dynamic_prompt_provider = dynamic_prompt_provider
# Parallel execution settings
self.enable_parallel_execution = enable_parallel_execution
@@ -193,6 +191,9 @@ class GraphExecutor:
# Pause/resume control
self._pause_requested = asyncio.Event()
# Track the currently executing node for external injection routing
self.current_node_id: str | None = None
def _write_progress(
self,
current_node: str,
@@ -283,6 +284,125 @@ class GraphExecutor:
return errors
# Max chars of formatted messages before proactively splitting for LLM.
_PHASE_LLM_CHAR_LIMIT = 240_000
_PHASE_LLM_MAX_DEPTH = 10
async def _phase_llm_compact(
self,
conversation: Any,
next_spec: NodeSpec,
messages: list,
_depth: int = 0,
) -> str:
"""Summarise messages for phase-boundary compaction.
Uses the same recursive binary-search splitting as EventLoopNode.
"""
from framework.graph.conversation import extract_tool_call_history
from framework.graph.event_loop_node import _is_context_too_large_error
if _depth > self._PHASE_LLM_MAX_DEPTH:
raise RuntimeError("Phase LLM compaction recursion limit")
# Format messages
lines: list[str] = []
for m in messages:
if m.role == "tool":
c = m.content[:500] + ("..." if len(m.content) > 500 else "")
lines.append(f"[tool result]: {c}")
elif m.role == "assistant" and m.tool_calls:
names = [tc.get("function", {}).get("name", "?") for tc in m.tool_calls]
lines.append(
f"[assistant (calls: {', '.join(names)})]: "
f"{m.content[:200] if m.content else ''}"
)
else:
lines.append(f"[{m.role}]: {m.content}")
formatted = "\n\n".join(lines)
# Proactive split
if len(formatted) > self._PHASE_LLM_CHAR_LIMIT and len(messages) > 1:
summary = await self._phase_llm_compact_split(
conversation,
next_spec,
messages,
_depth,
)
else:
max_tokens = getattr(conversation, "_max_history_tokens", 32000)
target_tokens = max_tokens // 2
target_chars = target_tokens * 4
prompt = (
"You are compacting an AI agent's conversation history "
"at a phase boundary.\n\n"
f"NEXT PHASE: {next_spec.name}\n"
)
if next_spec.description:
prompt += f"NEXT PHASE PURPOSE: {next_spec.description}\n"
prompt += (
f"\nCONVERSATION MESSAGES:\n{formatted}\n\n"
"INSTRUCTIONS:\n"
f"Write a summary of approximately {target_chars} characters "
f"(~{target_tokens} tokens).\n"
"Preserve user-stated rules, constraints, and preferences "
"verbatim. Preserve key decisions and results from earlier "
"phases. Preserve context needed for the next phase.\n"
)
summary_budget = max(1024, max_tokens // 2)
try:
response = await self._llm.acomplete(
messages=[{"role": "user", "content": prompt}],
system=(
"You are a conversation compactor. Write a detailed "
"summary preserving context for the next phase."
),
max_tokens=summary_budget,
)
summary = response.content
except Exception as e:
if _is_context_too_large_error(e) and len(messages) > 1:
summary = await self._phase_llm_compact_split(
conversation,
next_spec,
messages,
_depth,
)
else:
raise
# Append tool history at top level only
if _depth == 0:
tool_history = extract_tool_call_history(messages)
if tool_history and "TOOLS ALREADY CALLED" not in summary:
summary += "\n\n" + tool_history
return summary
async def _phase_llm_compact_split(
self,
conversation: Any,
next_spec: NodeSpec,
messages: list,
_depth: int,
) -> str:
"""Split messages in half and summarise each half."""
mid = max(1, len(messages) // 2)
s1 = await self._phase_llm_compact(
conversation,
next_spec,
messages[:mid],
_depth + 1,
)
s2 = await self._phase_llm_compact(
conversation,
next_spec,
messages[mid:],
_depth + 1,
)
return s1 + "\n\n" + s2
async def execute(
self,
graph: GraphSpec,
@@ -338,6 +458,9 @@ class GraphExecutor:
cumulative_tool_names: set[str] = set()
cumulative_output_keys: list[str] = [] # Output keys from all visited nodes
# Build node registry for subagent lookup
node_registry: dict[str, NodeSpec] = {node.id: node for node in graph.nodes}
# Initialize checkpoint store if checkpointing is enabled
checkpoint_store: CheckpointStore | None = None
if checkpoint_config and checkpoint_config.enabled and self._storage_path:
@@ -694,6 +817,9 @@ class GraphExecutor:
# Execute this node, then pause
# (We'll check again after execution and save state)
# Expose current node for external injection routing
self.current_node_id = current_node_id
self.logger.info(f"\n▶ Step {steps}: {node_spec.name} ({node_spec.node_type})")
self.logger.info(f" Inputs: {node_spec.input_keys}")
self.logger.info(f" Outputs: {node_spec.output_keys}")
@@ -709,6 +835,14 @@ class GraphExecutor:
if k not in cumulative_output_keys:
cumulative_output_keys.append(k)
# Build resume narrative (Layer 2) when restoring a session
# so the EventLoopNode can rebuild the full 3-layer system prompt.
_resume_narrative = ""
if _is_resuming and path:
from framework.graph.prompt_composer import build_narrative
_resume_narrative = build_narrative(memory, path, graph)
# Build context for node
ctx = self._build_context(
node_spec=node_spec,
@@ -721,6 +855,10 @@ class GraphExecutor:
override_tools=cumulative_tools if is_continuous else None,
cumulative_output_keys=cumulative_output_keys if is_continuous else None,
event_triggered=_event_triggered,
node_registry=node_registry,
identity_prompt=getattr(graph, "identity_prompt", ""),
narrative=_resume_narrative,
graph=graph,
)
# Log actual input data being read
@@ -1120,6 +1258,7 @@ class GraphExecutor:
source_result=result,
source_node_spec=node_spec,
path=path,
node_registry=node_registry,
)
total_tokens += branch_tokens
@@ -1269,27 +1408,78 @@ class GraphExecutor:
# Set current phase for phase-aware compaction
continuous_conversation.set_current_phase(next_spec.id)
# Opportunistic compaction at transition:
# 1. Prune old tool results (free, no LLM call)
# 2. If still over 80%, do a phase-graduated compact
# Phase-boundary compaction (same flow as EventLoopNode._compact)
if continuous_conversation.usage_ratio() > 0.5:
await continuous_conversation.prune_old_tool_results(
protect_tokens=2000,
)
if continuous_conversation.needs_compaction():
_phase_ratio = continuous_conversation.usage_ratio()
self.logger.info(
" Phase-boundary compaction (%.0f%% usage)",
continuous_conversation.usage_ratio() * 100,
_phase_ratio * 100,
)
summary = (
f"Summary of earlier phases (before {next_spec.name}). "
"See transition markers for phase details."
)
await continuous_conversation.compact(
summary,
keep_recent=4,
phase_graduated=True,
_data_dir = (
str(self._storage_path / "data") if self._storage_path else None
)
# Step 1: Structural compaction (>=80%)
if _data_dir:
_pre = continuous_conversation.usage_ratio()
await continuous_conversation.compact_preserving_structure(
spillover_dir=_data_dir,
keep_recent=4,
phase_graduated=True,
)
if continuous_conversation.usage_ratio() >= 0.9 * _pre:
await continuous_conversation.compact_preserving_structure(
spillover_dir=_data_dir,
keep_recent=4,
phase_graduated=True,
aggressive=True,
)
# Step 2: LLM compaction (>95%)
if (
continuous_conversation.usage_ratio() > 0.95
and self._llm is not None
):
self.logger.info(
" LLM phase-boundary compaction (%.0f%% usage)",
continuous_conversation.usage_ratio() * 100,
)
try:
_llm_summary = await self._phase_llm_compact(
continuous_conversation,
next_spec,
list(continuous_conversation.messages),
)
await continuous_conversation.compact(
_llm_summary,
keep_recent=2,
phase_graduated=True,
)
except Exception as e:
self.logger.warning(
" Phase LLM compaction failed: %s",
e,
)
# Step 3: Emergency (only if still over budget)
if continuous_conversation.needs_compaction():
self.logger.warning(
" Emergency phase compaction (%.0f%%)",
continuous_conversation.usage_ratio() * 100,
)
summary = (
f"Summary of earlier phases "
f"(before {next_spec.name}). "
"See transition markers for phase details."
)
await continuous_conversation.compact(
summary,
keep_recent=1,
phase_graduated=True,
)
# Update input_data for next node
input_data = result.output
@@ -1541,6 +1731,10 @@ class GraphExecutor:
override_tools: list | None = None,
cumulative_output_keys: list[str] | None = None,
event_triggered: bool = False,
identity_prompt: str = "",
narrative: str = "",
node_registry: dict[str, NodeSpec] | None = None,
graph: "GraphSpec | None" = None,
) -> NodeContext:
"""Build execution context for a node."""
# Filter tools to those available to this node
@@ -1569,6 +1763,8 @@ class GraphExecutor:
node_tool_names=node_spec.tools,
)
goal_context = goal.to_prompt_context()
return NodeContext(
runtime=self.runtime,
node_id=node_spec.id,
@@ -1577,7 +1773,7 @@ class GraphExecutor:
input_data=input_data,
llm=self.llm,
available_tools=available_tools,
goal_context=goal.to_prompt_context(),
goal_context=goal_context,
goal=goal, # Pass Goal object for LLM-powered routers
max_tokens=max_tokens,
runtime_logger=self.runtime_logger,
@@ -1587,12 +1783,20 @@ class GraphExecutor:
cumulative_output_keys=cumulative_output_keys or [],
event_triggered=event_triggered,
accounts_prompt=node_accounts_prompt,
identity_prompt=identity_prompt,
narrative=narrative,
execution_id=self._execution_id,
stream_id=self._stream_id,
node_registry=node_registry or {},
all_tools=list(self.tools), # Full catalog for subagent tool resolution
shared_node_registry=self.node_registry, # For subagent escalation routing
dynamic_tools_provider=self.dynamic_tools_provider,
dynamic_prompt_provider=self.dynamic_prompt_provider,
)
VALID_NODE_TYPES = {
"event_loop",
"gcu",
}
# Node types removed in v0.5 — provide migration guidance
REMOVED_NODE_TYPES = {
@@ -1627,8 +1831,8 @@ class GraphExecutor:
f"Must be one of: {sorted(self.VALID_NODE_TYPES)}."
)
# Create based on type (only event_loop is valid)
if node_spec.node_type == "event_loop":
# Create based on type
if node_spec.node_type in ("event_loop", "gcu"):
# Auto-create EventLoopNode with sensible defaults.
# Custom configs can still be pre-registered via node_registry.
from framework.graph.event_loop_node import EventLoopNode, LoopConfig
@@ -1658,11 +1862,11 @@ class GraphExecutor:
judge=None, # implicit judge: accept when output_keys are filled
config=LoopConfig(
max_iterations=lc.get("max_iterations", default_max_iter),
max_tool_calls_per_turn=lc.get("max_tool_calls_per_turn", 10),
max_tool_calls_per_turn=lc.get("max_tool_calls_per_turn", 30),
tool_call_overflow_margin=lc.get("tool_call_overflow_margin", 0.5),
stall_detection_threshold=lc.get("stall_detection_threshold", 3),
max_history_tokens=lc.get("max_history_tokens", 32000),
max_tool_result_chars=lc.get("max_tool_result_chars", 3_000),
max_tool_result_chars=lc.get("max_tool_result_chars", 30_000),
spillover_dir=spillover,
),
tool_executor=self.tool_executor,
@@ -1699,52 +1903,6 @@ class GraphExecutor:
source_node_name=current_node_spec.name if current_node_spec else current_node_id,
target_node_name=target_node_spec.name if target_node_spec else edge.target,
):
# Validate and clean output before mapping inputs.
# Use full memory state (not just result.output) because
# target input_keys may come from earlier nodes in the
# graph, not only from the immediate source node.
if self.cleansing_config.enabled and target_node_spec:
output_to_validate = memory.read_all()
validation = self.output_cleaner.validate_output(
output=output_to_validate,
source_node_id=current_node_id,
target_node_spec=target_node_spec,
)
if not validation.valid:
self.logger.warning(f"⚠ Output validation failed: {validation.errors}")
# Clean the output
cleaned_output = await self.output_cleaner.clean_output(
output=output_to_validate,
source_node_id=current_node_id,
target_node_spec=target_node_spec,
validation_errors=validation.errors,
)
# Update result with cleaned output
result.output = cleaned_output
# Write cleaned output back to memory (skip validation for LLM output)
for key, value in cleaned_output.items():
memory.write(key, value, validate=False)
# Revalidate
revalidation = self.output_cleaner.validate_output(
output=cleaned_output,
source_node_id=current_node_id,
target_node_spec=target_node_spec,
)
if revalidation.valid:
self.logger.info("✓ Output cleaned and validated successfully")
else:
self.logger.error(
f"✗ Cleaning failed, errors remain: {revalidation.errors}"
)
# Continue anyway if fallback_to_raw is True
# Map inputs (skip validation for processed LLM output)
mapped = edge.map_inputs(result.output, memory.read_all())
for key, value in mapped.items():
@@ -1845,6 +2003,7 @@ class GraphExecutor:
source_result: NodeResult,
source_node_spec: Any,
path: list[str],
node_registry: dict[str, NodeSpec] | None = None,
) -> tuple[dict[str, NodeResult], int, int]:
"""
Execute multiple branches in parallel using asyncio.gather.
@@ -1905,32 +2064,6 @@ class GraphExecutor:
branch.status = "running"
try:
# Validate and clean output before mapping inputs (same as _follow_edges).
# Use full memory state since target input_keys may come
# from earlier nodes, not just the immediate source.
if self.cleansing_config.enabled and node_spec:
mem_snapshot = memory.read_all()
validation = self.output_cleaner.validate_output(
output=mem_snapshot,
source_node_id=source_node_spec.id if source_node_spec else "unknown",
target_node_spec=node_spec,
)
if not validation.valid:
self.logger.warning(
f"⚠ Output validation failed for branch "
f"{branch.node_id}: {validation.errors}"
)
cleaned_output = await self.output_cleaner.clean_output(
output=mem_snapshot,
source_node_id=source_node_spec.id if source_node_spec else "unknown",
target_node_spec=node_spec,
validation_errors=validation.errors,
)
# Write cleaned output to memory
for key, value in cleaned_output.items():
await memory.write_async(key, value)
# Map inputs via edge
mapped = branch.edge.map_inputs(source_result.output, memory.read_all())
for key, value in mapped.items():
@@ -1942,7 +2075,15 @@ class GraphExecutor:
branch.retry_count = attempt
# Build context for this branch
ctx = self._build_context(node_spec, memory, goal, mapped, graph.max_tokens)
ctx = self._build_context(
node_spec,
memory,
goal,
mapped,
graph.max_tokens,
node_registry=node_registry,
graph=graph,
)
node_impl = self._get_node_implementation(node_spec, graph.cleanup_llm_model)
# Emit node-started event (skip event_loop nodes)
+23
View File
@@ -0,0 +1,23 @@
"""File tools MCP server constants.
Analogous to ``gcu.py`` defines the server name and default stdio config
so the runner can auto-register the files MCP server for any agent that has
``event_loop`` or ``gcu`` nodes.
"""
# ---------------------------------------------------------------------------
# MCP server identity
# ---------------------------------------------------------------------------
FILES_MCP_SERVER_NAME = "files-tools"
"""Name used to identify the file tools MCP server in ``mcp_servers.json``."""
FILES_MCP_SERVER_CONFIG: dict = {
"name": FILES_MCP_SERVER_NAME,
"transport": "stdio",
"command": "uv",
"args": ["run", "python", "files_server.py", "--stdio"],
"cwd": "../../tools",
"description": "File tools for reading, writing, editing, and searching files",
}
"""Default stdio config for the file tools MCP server (relative to exports/<agent>/)."""
+86
View File
@@ -0,0 +1,86 @@
"""GCU (browser automation) node type constants.
A ``gcu`` node is an ``event_loop`` node with two automatic enhancements:
1. A canonical browser best-practices system prompt is prepended.
2. All tools from the GCU MCP server are auto-included.
No new ``NodeProtocol`` subclass the ``gcu`` type is purely a declarative
signal processed by the runner and executor at setup time.
"""
# ---------------------------------------------------------------------------
# MCP server identity
# ---------------------------------------------------------------------------
GCU_SERVER_NAME = "gcu-tools"
"""Name used to identify the GCU MCP server in ``mcp_servers.json``."""
GCU_MCP_SERVER_CONFIG: dict = {
"name": GCU_SERVER_NAME,
"transport": "stdio",
"command": "uv",
"args": ["run", "python", "-m", "gcu.server", "--stdio"],
"cwd": "../../tools",
"description": "GCU tools for browser automation",
}
"""Default stdio config for the GCU MCP server (relative to exports/<agent>/)."""
# ---------------------------------------------------------------------------
# Browser best-practices system prompt
# ---------------------------------------------------------------------------
GCU_BROWSER_SYSTEM_PROMPT = """\
# Browser Automation Best Practices
Follow these rules for reliable, efficient browser interaction.
## Reading Pages
- ALWAYS prefer `browser_snapshot` over `browser_get_text("body")`
it returns a compact ~1-5 KB accessibility tree vs 100+ KB of raw HTML.
- Use `browser_snapshot_aria` when you need full ARIA properties
for detailed element inspection.
- Do NOT use `browser_screenshot` for reading text content
it produces huge base64 images with no searchable text.
- Only fall back to `browser_get_text` for extracting specific
small elements by CSS selector.
## Navigation & Waiting
- Always call `browser_wait` after navigation actions
(`browser_open`, `browser_navigate`, `browser_click` on links)
to let the page load.
- NEVER re-navigate to the same URL after scrolling
this resets your scroll position and loses loaded content.
## Scrolling
- Use large scroll amounts ~2000 when loading more content
sites like twitter and linkedin have lazy loading for paging.
- After scrolling, take a new `browser_snapshot` to see updated content.
## Error Recovery
- If a tool fails, retry once with the same approach.
- If it fails a second time, STOP retrying and switch approach.
- If `browser_snapshot` fails try `browser_get_text` with a
specific small selector as fallback.
- If `browser_open` fails or page seems stale `browser_stop`,
then `browser_start`, then retry.
## Tab Management
- Use `browser_tabs` to list open tabs when managing multiple pages.
- Pass `target_id` to tools when operating on a specific tab.
- Open background tabs with `browser_open(url=..., background=true)`
to avoid losing your current context.
- Close tabs you no longer need with `browser_close` to free resources.
## Login & Auth Walls
- If you see a "Log in" or "Sign up" prompt instead of expected
content, report the auth wall immediately do NOT attempt to log in.
- Check for cookie consent banners and dismiss them if they block content.
## Efficiency
- Minimize tool calls combine actions where possible.
- When a snapshot result is saved to a spillover file, use
`run_command` with grep to extract specific data rather than
re-reading the full file.
- Call `set_output` in the same turn as your last browser action
when possible don't waste a turn.
"""
+11 -1
View File
@@ -176,7 +176,17 @@ class Goal(BaseModel):
return True
def to_prompt_context(self) -> str:
"""Generate context string for LLM prompts."""
"""Generate context string for LLM prompts.
Returns empty string when the goal is a stub (no success criteria,
no constraints, no context). Stub goals are metadata-only used for
graph identification but not communicated to the LLM as actionable
intent. This prevents runtime agents (e.g. the queen) from
misinterpreting their own goal as a user request.
"""
if not self.success_criteria and not self.constraints and not self.context:
return ""
lines = [
f"# Goal: {self.name}",
f"{self.description}",
+4 -56
View File
@@ -154,69 +154,17 @@ class HITLProtocol:
"""
Parse human's raw input into structured response.
Uses Haiku to intelligently extract answers for each question.
Maps the raw input to the first question. For multi-question HITL,
the caller should present one question at a time.
"""
import os
response = HITLResponse(request_id=request.request_id, raw_input=raw_input)
# If no questions, just return raw input
if not request.questions:
return response
# Try to use Haiku for intelligent parsing
api_key = os.environ.get("ANTHROPIC_API_KEY")
if not use_haiku or not api_key:
# Simple fallback: treat as answer to first question
if request.questions:
response.answers[request.questions[0].id] = raw_input
return response
# Use Haiku to extract answers
try:
import json
import anthropic
questions_str = "\n".join(
[f"{i + 1}. {q.question} (id: {q.id})" for i, q in enumerate(request.questions)]
)
prompt = f"""Parse the user's response and extract answers for each question.
Questions asked:
{questions_str}
User's response:
{raw_input}
Extract the answer for each question. Output JSON with question IDs as keys.
Example format:
{{"question-1": "answer here", "question-2": "answer here"}}"""
client = anthropic.Anthropic(api_key=api_key)
message = client.messages.create(
model="claude-haiku-4-5-20251001",
max_tokens=500,
messages=[{"role": "user", "content": prompt}],
)
# Parse Haiku's response
import re
response_text = message.content[0].text.strip()
json_match = re.search(r"\{[^{}]*\}", response_text, re.DOTALL)
if json_match:
parsed = json.loads(json_match.group())
response.answers = parsed
except Exception:
# Fallback: use raw input for first question
if request.questions:
response.answers[request.questions[0].id] = raw_input
# Map raw input to first question
response.answers[request.questions[0].id] = raw_input
return response
@staticmethod
+48 -55
View File
@@ -166,7 +166,7 @@ class NodeSpec(BaseModel):
# Node behavior type
node_type: str = Field(
default="event_loop",
description="Type: 'event_loop' (recommended), 'router', 'human_input'.",
description="Type: 'event_loop' (recommended), 'gcu' (browser automation).",
)
# Data flow
@@ -204,6 +204,16 @@ class NodeSpec(BaseModel):
default=None, description="Specific model to use (defaults to graph default)"
)
# For subagent delegation
sub_agents: list[str] = Field(
default_factory=list,
description="Node IDs that can be invoked as subagents from this node",
)
# For function nodes
function: str | None = Field(
default=None, description="Function name or path for function nodes"
)
# For router nodes
routes: dict[str, str] = Field(
default_factory=dict, description="Condition -> target_node_id mapping for routers"
@@ -505,6 +515,11 @@ class NodeContext:
# Connected accounts prompt (injected from runner)
accounts_prompt: str = ""
# Resume context — Layer 1 (identity) and Layer 2 (narrative) for
# rebuilding the full system prompt when restoring from conversation store.
identity_prompt: str = ""
narrative: str = ""
# Event-triggered execution (no interactive user attached)
event_triggered: bool = False
@@ -515,6 +530,31 @@ class NodeContext:
# Falls back to node_id when not set (legacy / standalone executor).
stream_id: str = ""
# Subagent mode
is_subagent_mode: bool = False # True when running as a subagent (prevents nested delegation)
report_callback: Any = None # async (message: str, data: dict | None) -> None
node_registry: dict[str, "NodeSpec"] = field(default_factory=dict) # For subagent lookup
# Full tool catalog (unfiltered) — used by _execute_subagent to resolve
# subagent tools that aren't in the parent node's filtered available_tools.
all_tools: list[Tool] = field(default_factory=list)
# Shared reference to the executor's node_registry — used by subagent
# escalation (_EscalationReceiver) to register temporary receivers that
# the inject_input() routing chain can find.
shared_node_registry: dict[str, Any] = field(default_factory=dict)
# Dynamic tool provider — when set, EventLoopNode rebuilds the tool
# list from this callback at the start of each iteration. Used by
# the queen to switch between building-mode and running-mode tools.
dynamic_tools_provider: Any = None # Callable[[], list[Tool]] | None
# Dynamic prompt provider — when set, EventLoopNode checks each
# iteration and updates the system prompt if it changed. Used by
# the queen to switch between phase-specific prompts (building /
# staging / running) without restarting the conversation.
dynamic_prompt_provider: Any = None # Callable[[], str] | None
@dataclass
class NodeResult:
@@ -551,7 +591,6 @@ class NodeResult:
Generate a human-readable summary of this node's execution and output.
This is like toString() - it describes what the node produced in its current state.
Uses Haiku to intelligently summarize complex outputs.
"""
if not self.success:
return f"❌ Failed: {self.error}"
@@ -559,59 +598,13 @@ class NodeResult:
if not self.output:
return "✓ Completed (no output)"
# Use Haiku to generate intelligent summary
import os
api_key = os.environ.get("ANTHROPIC_API_KEY")
if not api_key:
# Fallback: simple key-value listing
parts = [f"✓ Completed with {len(self.output)} outputs:"]
for key, value in list(self.output.items())[:5]: # Limit to 5 keys
value_str = str(value)[:100]
if len(str(value)) > 100:
value_str += "..."
parts.append(f"{key}: {value_str}")
return "\n".join(parts)
# Use Haiku to generate intelligent summary
try:
import json
import anthropic
node_context = ""
if node_spec:
node_context = f"\nNode: {node_spec.name}\nPurpose: {node_spec.description}"
output_json = json.dumps(self.output, indent=2, default=str)[:2000]
prompt = (
f"Generate a 1-2 sentence human-readable summary of "
f"what this node produced.{node_context}\n\n"
f"Node output:\n{output_json}\n\n"
"Provide a concise, clear summary that a human can quickly "
"understand. Focus on the key information produced."
)
client = anthropic.Anthropic(api_key=api_key)
message = client.messages.create(
model="claude-haiku-4-5-20251001",
max_tokens=200,
messages=[{"role": "user", "content": prompt}],
)
summary = message.content[0].text.strip()
return f"{summary}"
except Exception:
# Fallback on error
parts = [f"✓ Completed with {len(self.output)} outputs:"]
for key, value in list(self.output.items())[:3]:
value_str = str(value)[:80]
if len(str(value)) > 80:
value_str += "..."
parts.append(f"{key}: {value_str}")
return "\n".join(parts)
parts = [f"✓ Completed with {len(self.output)} outputs:"]
for key, value in list(self.output.items())[:5]: # Limit to 5 keys
value_str = str(value)[:100]
if len(str(value)) > 100:
value_str += "..."
parts.append(f"{key}: {value_str}")
return "\n".join(parts)
class NodeProtocol(ABC):
-395
View File
@@ -1,395 +0,0 @@
"""
Output Cleaner - Framework-level I/O validation and cleaning.
Validates node outputs match expected schemas and uses fast LLM
to clean malformed outputs before they flow to the next node.
This prevents cascading failures and dramatically improves execution success rates.
"""
import json
import logging
import re
from dataclasses import dataclass, field
from typing import Any
logger = logging.getLogger(__name__)
def _heuristic_repair(text: str) -> dict | None:
"""
Attempt to repair JSON without an LLM call.
Handles common errors:
- Markdown code blocks
- Python booleans/None (True -> true)
- Single quotes instead of double quotes
"""
if not isinstance(text, str):
return None
# 1. Strip Markdown code blocks
text = re.sub(r"^```(?:json)?\s*", "", text, flags=re.MULTILINE)
text = re.sub(r"\s*```$", "", text, flags=re.MULTILINE)
text = text.strip()
# 2. Find outermost JSON-like structure (greedy match)
match = re.search(r"(\{.*\}|\[.*\])", text, re.DOTALL)
if match:
candidate = match.group(1)
# 3. Common fixes
# Fix Python constants
candidate = re.sub(r"\bTrue\b", "true", candidate)
candidate = re.sub(r"\bFalse\b", "false", candidate)
candidate = re.sub(r"\bNone\b", "null", candidate)
# 4. Attempt load
try:
return json.loads(candidate)
except json.JSONDecodeError:
# 5. Advanced: Try swapping single quotes if double quotes fail
# This is risky but effective for simple dicts
try:
if "'" in candidate and '"' not in candidate:
candidate_swapped = candidate.replace("'", '"')
return json.loads(candidate_swapped)
except json.JSONDecodeError:
pass
return None
@dataclass
class CleansingConfig:
"""Configuration for output cleansing."""
enabled: bool = True
fast_model: str = "cerebras/llama-3.3-70b" # Fast, cheap model for cleaning
max_retries: int = 2
cache_successful_patterns: bool = True
fallback_to_raw: bool = True # If cleaning fails, pass raw output
log_cleanings: bool = True # Log when cleansing happens
@dataclass
class ValidationResult:
"""Result of output validation."""
valid: bool
errors: list[str] = field(default_factory=list)
warnings: list[str] = field(default_factory=list)
cleaned_output: dict[str, Any] | None = None
class OutputCleaner:
"""
Framework-level output validation and cleaning.
Uses heuristics and fast LLM to clean malformed outputs
before they flow to the next node.
"""
def __init__(self, config: CleansingConfig, llm_provider=None):
"""
Initialize the output cleaner.
Args:
config: Cleansing configuration
llm_provider: Optional LLM provider.
"""
self.config = config
self.success_cache: dict[str, Any] = {} # Cache successful patterns
self.failure_count: dict[str, int] = {} # Track edge failures
self.cleansing_count = 0 # Track total cleanings performed
# Initialize LLM provider for cleaning
if llm_provider:
self.llm = llm_provider
elif config.enabled:
# Create dedicated fast LLM provider for cleaning
try:
import os
from framework.llm.litellm import LiteLLMProvider
api_key = os.environ.get("CEREBRAS_API_KEY")
if api_key:
self.llm = LiteLLMProvider(
api_key=api_key,
model=config.fast_model,
)
logger.info(f"✓ Initialized OutputCleaner with {config.fast_model}")
else:
logger.warning("⚠ CEREBRAS_API_KEY not found, output cleaning will be disabled")
self.llm = None
except ImportError:
logger.warning("⚠ LiteLLMProvider not available, output cleaning disabled")
self.llm = None
else:
self.llm = None
def validate_output(
self,
output: dict[str, Any],
source_node_id: str,
target_node_spec: Any, # NodeSpec
) -> ValidationResult:
"""
Validate output matches target node's expected input schema.
Returns:
ValidationResult with errors and optionally cleaned output
"""
errors = []
warnings = []
# Check 1: Required input keys present (skip nullable keys)
nullable = set(getattr(target_node_spec, "nullable_output_keys", None) or [])
for key in target_node_spec.input_keys:
if key in nullable:
continue
if key not in output:
errors.append(f"Missing required key: '{key}'")
continue
value = output[key]
# Check 2: Detect if value is JSON string (the JSON parsing trap!)
if isinstance(value, str):
# Try parsing as JSON to detect the trap
try:
parsed = json.loads(value)
if isinstance(parsed, dict):
if key in parsed:
# Key exists in parsed JSON - classic parsing failure!
errors.append(
f"Key '{key}' contains JSON string with nested '{key}' field - "
f"likely parsing failure from LLM node"
)
elif len(value) > 100:
# Large JSON string, but doesn't contain the key
warnings.append(
f"Key '{key}' contains JSON string ({len(value)} chars)"
)
except json.JSONDecodeError:
# Not JSON, check if suspiciously large
if len(value) > 500:
warnings.append(
f"Key '{key}' contains large string ({len(value)} chars), "
f"possibly entire LLM response"
)
# Check 3: Type validation (if schema provided)
if hasattr(target_node_spec, "input_schema") and target_node_spec.input_schema:
expected_schema = target_node_spec.input_schema.get(key)
if expected_schema:
expected_type = expected_schema.get("type")
if expected_type and not self._type_matches(value, expected_type):
actual_type = type(value).__name__
errors.append(
f"Key '{key}': expected type '{expected_type}', got '{actual_type}'"
)
# Warnings don't make validation fail, but errors do
is_valid = len(errors) == 0
if not is_valid and self.config.log_cleanings:
logger.warning(
f"⚠ Output validation failed for {source_node_id}{target_node_spec.id}: "
f"{len(errors)} error(s), {len(warnings)} warning(s)"
)
return ValidationResult(
valid=is_valid,
errors=errors,
warnings=warnings,
)
async def clean_output(
self,
output: dict[str, Any],
source_node_id: str,
target_node_spec: Any, # NodeSpec
validation_errors: list[str],
) -> dict[str, Any]:
"""
Use heuristics and fast LLM to clean malformed output.
Args:
output: Raw output from source node
source_node_id: ID of source node
target_node_spec: Target node spec (for schema)
validation_errors: Errors from validation
Returns:
Cleaned output matching target schema
"""
if not self.config.enabled:
logger.warning("⚠ Output cleansing disabled in config")
return output
# --- PHASE 1: Fast Heuristic Repair (Avoids LLM call) ---
# Often the output is just a string containing JSON, or has minor syntax errors
# If output is a dictionary but malformed, we might need to serialize it first
# to try and fix the underlying string representation if it came from raw text
# Heuristic: Check if any value is actually a JSON string that should be promoted
# This handles the "JSON Parsing Trap" where LLM returns {"key": "{\"nested\": ...}"}
heuristic_fixed = False
fixed_output = output.copy()
for key, value in output.items():
if isinstance(value, str):
repaired = _heuristic_repair(value)
if repaired and isinstance(repaired, dict | list):
# Check if this repaired structure looks like what we want
# e.g. if the key is 'data' and the string contained valid JSON
fixed_output[key] = repaired
heuristic_fixed = True
# If we fixed something, re-validate manually to see if it's enough
if heuristic_fixed:
logger.info("⚡ Heuristic repair applied (nested JSON expansion)")
return fixed_output
# --- PHASE 2: LLM-based Repair ---
if not self.llm:
logger.warning("⚠ No LLM provider available for cleansing")
return output
# Build schema description for target node
schema_desc = self._build_schema_description(target_node_spec)
# Create cleansing prompt
prompt = f"""Clean this malformed agent output to match the expected schema.
VALIDATION ERRORS:
{chr(10).join(f"- {e}" for e in validation_errors)}
EXPECTED SCHEMA for node '{target_node_spec.id}':
{schema_desc}
RAW OUTPUT from node '{source_node_id}':
{json.dumps(output, indent=2, default=str)}
INSTRUCTIONS:
1. Extract values that match the expected schema keys
2. If a value is a JSON string, parse it and extract the correct field
3. Convert types to match the schema (string, dict, list, number, boolean)
4. Remove extra fields not in the expected schema
5. Ensure all required keys are present
Return ONLY valid JSON matching the expected schema. No explanations, no markdown."""
try:
if self.config.log_cleanings:
logger.info(
f"🧹 Cleaning output from '{source_node_id}' using {self.config.fast_model}"
)
response = await self.llm.acomplete(
messages=[{"role": "user", "content": prompt}],
system=(
"You clean malformed agent outputs. Return only valid JSON matching the schema."
),
max_tokens=2048, # Sufficient for cleaning most outputs
)
# Parse cleaned output
cleaned_text = response.content.strip()
# Apply heuristic repair to the LLM's output too (just in case)
cleaned = _heuristic_repair(cleaned_text)
if not cleaned:
# Fallback to standard load if heuristic returns None (unlikely for LLM output)
cleaned = json.loads(cleaned_text)
if isinstance(cleaned, dict):
self.cleansing_count += 1
if self.config.log_cleanings:
logger.info(
f"✓ Output cleaned successfully (total cleanings: {self.cleansing_count})"
)
return cleaned
else:
logger.warning(f"⚠ Cleaned output is not a dict: {type(cleaned)}")
if self.config.fallback_to_raw:
return output
else:
raise ValueError(f"Cleaning produced {type(cleaned)}, expected dict")
except json.JSONDecodeError as e:
logger.error(f"✗ Failed to parse cleaned JSON: {e}")
if self.config.fallback_to_raw:
logger.info("↩ Falling back to raw output")
return output
else:
raise
except Exception as e:
logger.error(f"✗ Output cleaning failed: {e}")
if self.config.fallback_to_raw:
logger.info("↩ Falling back to raw output")
return output
else:
raise
def _build_schema_description(self, node_spec: Any) -> str:
"""Build human-readable schema description from NodeSpec."""
lines = ["{"]
for key in node_spec.input_keys:
# Get type hint and description if available
if hasattr(node_spec, "input_schema") and node_spec.input_schema:
schema = node_spec.input_schema.get(key, {})
type_hint = schema.get("type", "any")
description = schema.get("description", "")
required = schema.get("required", True)
line = f' "{key}": {type_hint}'
if description:
line += f" // {description}"
if required:
line += " (required)"
lines.append(line + ",")
else:
# No schema, just show the key
lines.append(f' "{key}": any // (required)')
lines.append("}")
return "\n".join(lines)
def _type_matches(self, value: Any, expected_type: str) -> bool:
"""Check if value matches expected type."""
type_map = {
"string": str,
"str": str,
"int": int,
"integer": int,
"float": float,
"number": (int, float),
"bool": bool,
"boolean": bool,
"dict": dict,
"object": dict,
"list": list,
"array": list,
"any": object, # Matches everything
}
expected_class = type_map.get(expected_type.lower())
if expected_class:
return isinstance(value, expected_class)
# Unknown type, allow it
return True
def get_stats(self) -> dict[str, Any]:
"""Get cleansing statistics."""
return {
"total_cleanings": self.cleansing_count,
"failure_count": dict(self.failure_count),
"cache_size": len(self.success_cache),
}
+1 -1
View File
@@ -280,7 +280,7 @@ def build_transition_marker(
]
if file_lines:
sections.append(
"\nData files (use load_data to access):\n" + "\n".join(file_lines)
"\nData files (use read_file to access):\n" + "\n".join(file_lines)
)
# Agent working memory
@@ -1,242 +0,0 @@
"""
Test OutputCleaner with real Cerebras LLM.
Demonstrates how OutputCleaner fixes the JSON parsing trap using llama-3.3-70b.
"""
import json
import os
from framework.graph.node import NodeSpec
from framework.graph.output_cleaner import CleansingConfig, OutputCleaner
from framework.llm.litellm import LiteLLMProvider
def test_cleaning_with_cerebras():
"""Test that cleaning fixes malformed output using Cerebras llama-3.3-70b."""
print("\n" + "=" * 80)
print("LIVE TEST: Cleaning with Cerebras llama-3.3-70b")
print("=" * 80)
# Get API key
api_key = os.environ.get("CEREBRAS_API_KEY")
if not api_key:
print("\n⚠ Skipping: CEREBRAS_API_KEY not found in environment")
return
# Initialize LLM
llm = LiteLLMProvider(
api_key=api_key,
model="cerebras/llama-3.3-70b",
)
# Initialize cleaner with Cerebras
cleaner = OutputCleaner(
config=CleansingConfig(
enabled=True,
fast_model="cerebras/llama-3.3-70b",
log_cleanings=True,
),
llm_provider=llm,
)
# Scenario 1: JSON parsing trap (entire response in one key)
print("\n--- Scenario 1: JSON Parsing Trap ---")
malformed_output = {
"recommendation": (
'{\n "approval_decision": "APPROVED",\n "risk_score": 3.5,\n '
'"reason": "Standard terms, low risk"\n}'
),
}
target_spec = NodeSpec(
id="generate-recommendation",
name="Generate Recommendation",
description="Test",
input_keys=["recommendation"],
output_keys=["result"],
input_schema={
"recommendation": {
"type": "dict",
"required": True,
"description": "Recommendation with approval_decision and risk_score",
},
},
)
# Validate
validation = cleaner.validate_output(
output=malformed_output,
source_node_id="analyze-contract",
target_node_spec=target_spec,
)
print("\nMalformed output:")
print(json.dumps(malformed_output, indent=2))
print(f"\nValidation errors: {validation.errors}")
# Clean the output
print("\n🧹 Cleaning with Cerebras llama-3.3-70b...")
cleaned = cleaner.clean_output(
output=malformed_output,
source_node_id="analyze-contract",
target_node_spec=target_spec,
validation_errors=validation.errors,
)
print("\n✓ Cleaned output:")
print(json.dumps(cleaned, indent=2))
assert isinstance(cleaned, dict), "Should return dict"
assert "approval_decision" in str(cleaned) or isinstance(cleaned.get("recommendation"), dict), (
"Should have recommendation structure"
)
# Scenario 2: Multiple keys with JSON string
print("\n\n--- Scenario 2: Multiple Keys, JSON String ---")
malformed_output2 = {
"analysis": (
'{"high_risk_clauses": ["unlimited liability"], '
'"compliance_issues": [], "category": "high-risk"}'
),
"risk_score": "7.5", # String instead of number
}
target_spec2 = NodeSpec(
id="next-node",
name="Next Node",
description="Test",
input_keys=["analysis", "risk_score"],
output_keys=["result"],
input_schema={
"analysis": {"type": "dict", "required": True},
"risk_score": {"type": "number", "required": True},
},
)
validation2 = cleaner.validate_output(
output=malformed_output2,
source_node_id="analyze",
target_node_spec=target_spec2,
)
print("\nMalformed output:")
print(json.dumps(malformed_output2, indent=2))
print(f"\nValidation errors: {validation2.errors}")
if not validation2.valid:
print("\n🧹 Cleaning with Cerebras llama-3.3-70b...")
cleaned2 = cleaner.clean_output(
output=malformed_output2,
source_node_id="analyze",
target_node_spec=target_spec2,
validation_errors=validation2.errors,
)
print("\n✓ Cleaned output:")
print(json.dumps(cleaned2, indent=2))
assert isinstance(cleaned2, dict), "Should return dict"
assert isinstance(cleaned2.get("analysis"), dict), "analysis should be dict"
assert isinstance(cleaned2.get("risk_score"), (int, float)), "risk_score should be number"
# Stats
stats = cleaner.get_stats()
print("\n\nCleaner Statistics:")
print(f" Total cleanings: {stats['total_cleanings']}")
print(f" Cache size: {stats['cache_size']}")
print("\n" + "=" * 80)
print("✓ LIVE TEST PASSED")
print("=" * 80)
def test_validation_only():
"""Test validation without LLM (no cleaning)."""
print("\n" + "=" * 80)
print("TEST: Validation Only (No LLM)")
print("=" * 80)
cleaner = OutputCleaner(
config=CleansingConfig(enabled=True),
llm_provider=None, # No LLM
)
# Test 1: JSON parsing trap detection
malformed = {
"approval_decision": '{"approval_decision": "APPROVED", "risk_score": 3}',
}
target = NodeSpec(
id="target",
name="Target",
description="Test",
input_keys=["approval_decision"],
output_keys=["result"],
)
result = cleaner.validate_output(
output=malformed,
source_node_id="source",
target_node_spec=target,
)
print(f"\nInput: {json.dumps(malformed, indent=2)}")
print(f"Errors: {result.errors}")
print(f"Warnings: {result.warnings}")
assert not result.valid or len(result.warnings) > 0, "Should detect JSON string"
print("✓ Detected JSON parsing trap")
# Test 2: Missing keys
malformed2 = {"field1": "value"}
target2 = NodeSpec(
id="target",
name="Target",
description="Test",
input_keys=["field1", "field2"],
output_keys=["result"],
)
result2 = cleaner.validate_output(
output=malformed2,
source_node_id="source",
target_node_spec=target2,
)
print(f"\nInput: {json.dumps(malformed2, indent=2)}")
print(f"Errors: {result2.errors}")
assert not result2.valid, "Should be invalid"
assert "field2" in result2.errors[0], "Should mention missing field"
print("✓ Detected missing keys")
print("\n✓ Validation tests passed")
if __name__ == "__main__":
print("\n" + "=" * 80)
print("OUTPUT CLEANER LIVE TEST SUITE (with Cerebras)")
print("=" * 80)
try:
# Test validation (no LLM needed)
test_validation_only()
# Test cleaning with Cerebras
test_cleaning_with_cerebras()
print("\n" + "=" * 80)
print("ALL TESTS PASSED ✓")
print("=" * 80)
print("\nOutputCleaner is working with Cerebras llama-3.3-70b!")
print("- Fast cleaning (~200-500ms per operation)")
print("- Fixes JSON parsing trap")
print("- Converts types to match schema")
print("- Low cost (~$0.001 per cleaning)")
except Exception as e:
print(f"\n✗ TEST FAILED: {e}")
import traceback
traceback.print_exc()
raise
+1 -36
View File
@@ -1,11 +1,10 @@
"""Anthropic Claude LLM provider - backward compatible wrapper around LiteLLM."""
import os
from collections.abc import Callable
from typing import Any
from framework.llm.litellm import LiteLLMProvider
from framework.llm.provider import LLMProvider, LLMResponse, Tool, ToolResult, ToolUse
from framework.llm.provider import LLMProvider, LLMResponse, Tool
def _get_api_key_from_credential_store() -> str | None:
@@ -83,23 +82,6 @@ class AnthropicProvider(LLMProvider):
max_retries=max_retries,
)
def complete_with_tools(
self,
messages: list[dict[str, Any]],
system: str,
tools: list[Tool],
tool_executor: Callable[[ToolUse], ToolResult],
max_iterations: int = 10,
) -> LLMResponse:
"""Run a tool-use loop until Claude produces a final response (via LiteLLM)."""
return self._provider.complete_with_tools(
messages=messages,
system=system,
tools=tools,
tool_executor=tool_executor,
max_iterations=max_iterations,
)
async def acomplete(
self,
messages: list[dict[str, Any]],
@@ -120,20 +102,3 @@ class AnthropicProvider(LLMProvider):
json_mode=json_mode,
max_retries=max_retries,
)
async def acomplete_with_tools(
self,
messages: list[dict[str, Any]],
system: str,
tools: list[Tool],
tool_executor: Callable[[ToolUse], ToolResult],
max_iterations: int = 10,
) -> LLMResponse:
"""Async tool-use loop via LiteLLM."""
return await self._provider.acomplete_with_tools(
messages=messages,
system=system,
tools=tools,
tool_executor=tool_executor,
max_iterations=max_iterations,
)
+247 -274
View File
@@ -11,7 +11,7 @@ import asyncio
import json
import logging
import time
from collections.abc import AsyncIterator, Callable
from collections.abc import AsyncIterator
from datetime import datetime
from pathlib import Path
from typing import Any
@@ -23,7 +23,7 @@ except ImportError:
litellm = None # type: ignore[assignment]
RateLimitError = Exception # type: ignore[assignment, misc]
from framework.llm.provider import LLMProvider, LLMResponse, Tool, ToolResult, ToolUse
from framework.llm.provider import LLMProvider, LLMResponse, Tool
from framework.llm.stream_events import StreamEvent
logger = logging.getLogger(__name__)
@@ -70,13 +70,59 @@ def _patch_litellm_anthropic_oauth() -> None:
AnthropicModelInfo.validate_environment = _patched_validate_environment
def _patch_litellm_metadata_nonetype() -> None:
"""Patch litellm entry points to prevent metadata=None TypeError.
litellm bug: the @client decorator in utils.py has four places that do
"model_group" in kwargs.get("metadata", {})
but kwargs["metadata"] can be explicitly None (set internally by
litellm_params), causing:
TypeError: argument of type 'NoneType' is not iterable
This masks the real API error with a confusing APIConnectionError.
Fix: wrap the four litellm entry points (completion, acompletion,
responses, aresponses) to pop metadata=None before the @client
decorator's error handler can crash on it.
"""
import functools
for fn_name in ("completion", "acompletion", "responses", "aresponses"):
original = getattr(litellm, fn_name, None)
if original is None:
continue
if asyncio.iscoroutinefunction(original):
@functools.wraps(original)
async def _async_wrapper(*args, _orig=original, **kwargs):
if kwargs.get("metadata") is None:
kwargs.pop("metadata", None)
return await _orig(*args, **kwargs)
setattr(litellm, fn_name, _async_wrapper)
else:
@functools.wraps(original)
def _sync_wrapper(*args, _orig=original, **kwargs):
if kwargs.get("metadata") is None:
kwargs.pop("metadata", None)
return _orig(*args, **kwargs)
setattr(litellm, fn_name, _sync_wrapper)
if litellm is not None:
_patch_litellm_anthropic_oauth()
_patch_litellm_metadata_nonetype()
RATE_LIMIT_MAX_RETRIES = 10
RATE_LIMIT_BACKOFF_BASE = 2 # seconds
RATE_LIMIT_MAX_DELAY = 120 # seconds - cap to prevent absurd waits
# Empty-stream retries use a short fixed delay, not the rate-limit backoff.
# Conversation-structure issues are deterministic — long waits don't help.
EMPTY_STREAM_MAX_RETRIES = 3
EMPTY_STREAM_RETRY_DELAY = 1.0 # seconds
# Directory for dumping failed requests
FAILED_REQUESTS_DIR = Path.home() / ".hive" / "failed_requests"
@@ -124,7 +170,7 @@ def _dump_failed_request(
"temperature": kwargs.get("temperature"),
}
with open(filepath, "w") as f:
with open(filepath, "w", encoding="utf-8") as f:
json.dump(dump_data, f, indent=2, default=str)
return str(filepath)
@@ -191,6 +237,11 @@ def _is_stream_transient_error(exc: BaseException) -> bool:
Transient errors (recoverable=True): network issues, server errors, timeouts.
Permanent errors (recoverable=False): auth, bad request, context window, etc.
NOTE: "Failed to parse tool call arguments" (malformed LLM output) is NOT
transient at the stream level retrying with the same messages produces the
same malformed output. This error is handled at the EventLoopNode level
where the conversation can be modified before retrying.
"""
try:
from litellm.exceptions import (
@@ -275,12 +326,21 @@ class LiteLLMProvider(LLMProvider):
self.api_key = api_key
self.api_base = api_base
self.extra_kwargs = kwargs
# The Codex ChatGPT backend (chatgpt.com/backend-api/codex) rejects
# several standard OpenAI params: max_output_tokens, stream_options.
self._codex_backend = bool(api_base and "chatgpt.com/backend-api/codex" in api_base)
if litellm is None:
raise ImportError(
"LiteLLM is not installed. Please install it with: uv pip install litellm"
)
# Note: The Codex ChatGPT backend is a Responses API endpoint at
# chatgpt.com/backend-api/codex/responses. LiteLLM's model registry
# correctly marks codex models with mode="responses", so we do NOT
# override the mode. The responses_api_bridge in litellm handles
# converting Chat Completions requests to Responses API format.
def _completion_with_rate_limit_retry(
self, max_retries: int | None = None, **kwargs: Any
) -> Any:
@@ -404,6 +464,21 @@ class LiteLLMProvider(LLMProvider):
max_retries: int | None = None,
) -> LLMResponse:
"""Generate a completion using LiteLLM."""
# Codex ChatGPT backend requires streaming — delegate to the unified
# async streaming path which properly handles tool calls.
if self._codex_backend:
return asyncio.run(
self.acomplete(
messages=messages,
system=system,
tools=tools,
max_tokens=max_tokens,
response_format=response_format,
json_mode=json_mode,
max_retries=max_retries,
)
)
# Prepare messages with system prompt
full_messages = []
if system:
@@ -466,127 +541,6 @@ class LiteLLMProvider(LLMProvider):
raw_response=response,
)
def complete_with_tools(
self,
messages: list[dict[str, Any]],
system: str,
tools: list[Tool],
tool_executor: Callable[[ToolUse], ToolResult],
max_iterations: int = 10,
max_tokens: int = 4096,
) -> LLMResponse:
"""Run a tool-use loop until the LLM produces a final response."""
# Prepare messages with system prompt
current_messages = []
if system:
current_messages.append({"role": "system", "content": system})
current_messages.extend(messages)
total_input_tokens = 0
total_output_tokens = 0
# Convert tools to OpenAI format
openai_tools = [self._tool_to_openai_format(t) for t in tools]
for _ in range(max_iterations):
# Build kwargs
kwargs: dict[str, Any] = {
"model": self.model,
"messages": current_messages,
"max_tokens": max_tokens,
"tools": openai_tools,
**self.extra_kwargs,
}
if self.api_key:
kwargs["api_key"] = self.api_key
if self.api_base:
kwargs["api_base"] = self.api_base
response = self._completion_with_rate_limit_retry(**kwargs)
# Track tokens
usage = response.usage
if usage:
total_input_tokens += usage.prompt_tokens
total_output_tokens += usage.completion_tokens
choice = response.choices[0]
message = choice.message
# Check if we're done (no tool calls)
if choice.finish_reason == "stop" or not message.tool_calls:
return LLMResponse(
content=message.content or "",
model=response.model or self.model,
input_tokens=total_input_tokens,
output_tokens=total_output_tokens,
stop_reason=choice.finish_reason or "stop",
raw_response=response,
)
# Process tool calls.
# Add assistant message with tool calls.
current_messages.append(
{
"role": "assistant",
"content": message.content,
"tool_calls": [
{
"id": tc.id,
"type": "function",
"function": {
"name": tc.function.name,
"arguments": tc.function.arguments,
},
}
for tc in message.tool_calls
],
}
)
# Execute tools and add results.
for tool_call in message.tool_calls:
try:
args = json.loads(tool_call.function.arguments)
except json.JSONDecodeError:
# Surface error to LLM and skip tool execution
current_messages.append(
{
"role": "tool",
"tool_call_id": tool_call.id,
"content": "Invalid JSON arguments provided to tool.",
}
)
continue
tool_use = ToolUse(
id=tool_call.id,
name=tool_call.function.name,
input=args,
)
result = tool_executor(tool_use)
# Add tool result message
current_messages.append(
{
"role": "tool",
"tool_call_id": result.tool_use_id,
"content": result.content,
}
)
# Max iterations reached
return LLMResponse(
content="Max tool iterations reached",
model=self.model,
input_tokens=total_input_tokens,
output_tokens=total_output_tokens,
stop_reason="max_iterations",
raw_response=None,
)
# ------------------------------------------------------------------
# Async variants — non-blocking on the event loop
# ------------------------------------------------------------------
@@ -709,6 +663,19 @@ class LiteLLMProvider(LLMProvider):
max_retries: int | None = None,
) -> LLMResponse:
"""Async version of complete(). Uses litellm.acompletion — non-blocking."""
# Codex ChatGPT backend requires streaming — route through stream() which
# already handles Codex quirks and has proper tool call accumulation.
if self._codex_backend:
stream_iter = self.stream(
messages=messages,
system=system,
tools=tools,
max_tokens=max_tokens,
response_format=response_format,
json_mode=json_mode,
)
return await self._collect_stream_to_response(stream_iter)
full_messages: list[dict[str, Any]] = []
if system:
full_messages.append({"role": "system", "content": system})
@@ -753,115 +720,6 @@ class LiteLLMProvider(LLMProvider):
raw_response=response,
)
async def acomplete_with_tools(
self,
messages: list[dict[str, Any]],
system: str,
tools: list[Tool],
tool_executor: Callable[[ToolUse], ToolResult],
max_iterations: int = 10,
max_tokens: int = 4096,
) -> LLMResponse:
"""Async version of complete_with_tools(). Uses litellm.acompletion — non-blocking."""
current_messages: list[dict[str, Any]] = []
if system:
current_messages.append({"role": "system", "content": system})
current_messages.extend(messages)
total_input_tokens = 0
total_output_tokens = 0
openai_tools = [self._tool_to_openai_format(t) for t in tools]
for _ in range(max_iterations):
kwargs: dict[str, Any] = {
"model": self.model,
"messages": current_messages,
"max_tokens": max_tokens,
"tools": openai_tools,
**self.extra_kwargs,
}
if self.api_key:
kwargs["api_key"] = self.api_key
if self.api_base:
kwargs["api_base"] = self.api_base
response = await self._acompletion_with_rate_limit_retry(**kwargs)
usage = response.usage
if usage:
total_input_tokens += usage.prompt_tokens
total_output_tokens += usage.completion_tokens
choice = response.choices[0]
message = choice.message
if choice.finish_reason == "stop" or not message.tool_calls:
return LLMResponse(
content=message.content or "",
model=response.model or self.model,
input_tokens=total_input_tokens,
output_tokens=total_output_tokens,
stop_reason=choice.finish_reason or "stop",
raw_response=response,
)
current_messages.append(
{
"role": "assistant",
"content": message.content,
"tool_calls": [
{
"id": tc.id,
"type": "function",
"function": {
"name": tc.function.name,
"arguments": tc.function.arguments,
},
}
for tc in message.tool_calls
],
}
)
for tool_call in message.tool_calls:
try:
args = json.loads(tool_call.function.arguments)
except json.JSONDecodeError:
current_messages.append(
{
"role": "tool",
"tool_call_id": tool_call.id,
"content": "Invalid JSON arguments provided to tool.",
}
)
continue
tool_use = ToolUse(
id=tool_call.id,
name=tool_call.function.name,
input=args,
)
result = tool_executor(tool_use)
current_messages.append(
{
"role": "tool",
"tool_call_id": result.tool_use_id,
"content": result.content,
}
)
return LLMResponse(
content="Max tool iterations reached",
model=self.model,
input_tokens=total_input_tokens,
output_tokens=total_output_tokens,
stop_reason="max_iterations",
raw_response=None,
)
def _tool_to_openai_format(self, tool: Tool) -> dict[str, Any]:
"""Convert Tool to OpenAI function calling format."""
return {
@@ -883,6 +741,8 @@ class LiteLLMProvider(LLMProvider):
system: str = "",
tools: list[Tool] | None = None,
max_tokens: int = 4096,
response_format: dict[str, Any] | None = None,
json_mode: bool = False,
) -> AsyncIterator[StreamEvent]:
"""Stream a completion via litellm.acompletion(stream=True).
@@ -907,6 +767,31 @@ class LiteLLMProvider(LLMProvider):
full_messages.append({"role": "system", "content": system})
full_messages.extend(messages)
# Codex Responses API requires an `instructions` field (system prompt).
# Inject a minimal one when callers don't provide a system message.
if self._codex_backend and not any(m["role"] == "system" for m in full_messages):
full_messages.insert(0, {"role": "system", "content": "You are a helpful assistant."})
# Add JSON mode via prompt engineering (works across all providers)
if json_mode:
json_instruction = "\n\nPlease respond with a valid JSON object."
if full_messages and full_messages[0]["role"] == "system":
full_messages[0]["content"] += json_instruction
else:
full_messages.insert(0, {"role": "system", "content": json_instruction.strip()})
# Remove ghost empty assistant messages (content="" and no tool_calls).
# These arise when a model returns an empty stream after a tool result
# (an "expected" no-op turn). Keeping them in history confuses some
# models (notably Codex/gpt-5.3) and causes cascading empty streams.
full_messages = [
m
for m in full_messages
if not (
m.get("role") == "assistant" and not m.get("content") and not m.get("tool_calls")
)
]
kwargs: dict[str, Any] = {
"model": self.model,
"messages": full_messages,
@@ -921,6 +806,12 @@ class LiteLLMProvider(LLMProvider):
kwargs["api_base"] = self.api_base
if tools:
kwargs["tools"] = [self._tool_to_openai_format(t) for t in tools]
if response_format:
kwargs["response_format"] = response_format
# The Codex ChatGPT backend (Responses API) rejects several params.
if self._codex_backend:
kwargs.pop("max_tokens", None)
kwargs.pop("stream_options", None)
for attempt in range(RATE_LIMIT_MAX_RETRIES + 1):
# Post-stream events (ToolCall, TextEnd, Finish) are buffered
@@ -929,6 +820,7 @@ class LiteLLMProvider(LLMProvider):
tail_events: list[StreamEvent] = []
accumulated_text = ""
tool_calls_acc: dict[int, dict[str, str]] = {}
_last_tool_idx = 0 # tracks most recently opened tool call slot
input_tokens = 0
output_tokens = 0
stream_finish_reason: str | None = None
@@ -952,9 +844,36 @@ class LiteLLMProvider(LLMProvider):
)
# --- Tool calls (accumulate across chunks) ---
# The Codex/Responses API bridge (litellm bug) hardcodes
# index=0 on every ChatCompletionToolCallChunk, even for
# parallel tool calls. We work around this by using tc.id
# (set on output_item.added events) as a "new tool call"
# signal and tracking the most recently opened slot for
# argument deltas that arrive with id=None.
if delta and delta.tool_calls:
for tc in delta.tool_calls:
idx = tc.index if hasattr(tc, "index") and tc.index is not None else 0
if tc.id:
# New tool call announced (or done event re-sent).
# Check if this id already has a slot.
existing_idx = next(
(k for k, v in tool_calls_acc.items() if v["id"] == tc.id),
None,
)
if existing_idx is not None:
idx = existing_idx
elif idx in tool_calls_acc and tool_calls_acc[idx]["id"] not in (
"",
tc.id,
):
# Slot taken by a different call — assign new index
idx = max(tool_calls_acc.keys()) + 1
_last_tool_idx = idx
else:
# Argument delta with no id — route to last opened slot
idx = _last_tool_idx
if idx not in tool_calls_acc:
tool_calls_acc[idx] = {"id": "", "name": "", "arguments": ""}
if tc.id:
@@ -1002,27 +921,7 @@ class LiteLLMProvider(LLMProvider):
# (If text deltas were yielded above, has_content is True
# and we skip the retry path — nothing was yielded in vain.)
has_content = accumulated_text or tool_calls_acc
if not has_content and attempt < RATE_LIMIT_MAX_RETRIES:
# If the conversation ends with an assistant or tool
# message, an empty stream is expected — the LLM has
# nothing new to say. Don't burn retries on this;
# let the caller (EventLoopNode) decide what to do.
# Typical case: client_facing node where the LLM set
# all outputs via set_output tool calls, and the tool
# results are the last messages.
last_role = next(
(m["role"] for m in reversed(full_messages) if m.get("role") != "system"),
None,
)
if last_role in ("assistant", "tool"):
logger.debug(
"[stream] Empty response after %s message — expected, not retrying.",
last_role,
)
for event in tail_events:
yield event
return
if not has_content:
# finish_reason=length means the model exhausted
# max_tokens before producing content. Retrying with
# the same max_tokens will never help.
@@ -1040,28 +939,49 @@ class LiteLLMProvider(LLMProvider):
yield event
return
wait = _compute_retry_delay(attempt)
token_count, token_method = _estimate_tokens(
self.model,
full_messages,
# Empty stream — always retry regardless of last message
# role. Ghost empty streams after tool results are NOT
# expected no-ops; they create infinite loops when the
# conversation doesn't change between iterations.
# After retries, return the empty result and let the
# caller (EventLoopNode) decide how to handle it.
last_role = next(
(m["role"] for m in reversed(full_messages) if m.get("role") != "system"),
None,
)
dump_path = _dump_failed_request(
model=self.model,
kwargs=kwargs,
error_type="empty_stream",
attempt=attempt,
)
logger.warning(
f"[stream-retry] {self.model} returned empty stream — "
f"~{token_count} tokens ({token_method}). "
f"Request dumped to: {dump_path}. "
f"Retrying in {wait}s "
f"(attempt {attempt + 1}/{RATE_LIMIT_MAX_RETRIES})"
)
await asyncio.sleep(wait)
continue
if attempt < EMPTY_STREAM_MAX_RETRIES:
token_count, token_method = _estimate_tokens(
self.model,
full_messages,
)
dump_path = _dump_failed_request(
model=self.model,
kwargs=kwargs,
error_type="empty_stream",
attempt=attempt,
)
logger.warning(
f"[stream-retry] {self.model} returned empty stream "
f"after {last_role} message — "
f"~{token_count} tokens ({token_method}). "
f"Request dumped to: {dump_path}. "
f"Retrying in {EMPTY_STREAM_RETRY_DELAY}s "
f"(attempt {attempt + 1}/{EMPTY_STREAM_MAX_RETRIES})"
)
await asyncio.sleep(EMPTY_STREAM_RETRY_DELAY)
continue
# Success (or final attempt) — flush remaining events.
# All retries exhausted — log and return the empty
# result. EventLoopNode's empty response guard will
# accept if all outputs are set, or handle the ghost
# stream case if outputs are still missing.
logger.error(
f"[stream] {self.model} returned empty stream after "
f"{EMPTY_STREAM_MAX_RETRIES} retries "
f"(last_role={last_role}). Returning empty result."
)
# Success (or empty after exhausted retries) — flush events.
for event in tail_events:
yield event
return
@@ -1093,3 +1013,56 @@ class LiteLLMProvider(LLMProvider):
recoverable = _is_stream_transient_error(e)
yield StreamErrorEvent(error=str(e), recoverable=recoverable)
return
async def _collect_stream_to_response(
self,
stream: AsyncIterator[StreamEvent],
) -> LLMResponse:
"""Consume a stream() iterator and collect it into a single LLMResponse.
Used by acomplete() to route through the unified streaming path so that
all backends (including Codex) get proper tool call handling.
"""
from framework.llm.stream_events import (
FinishEvent,
StreamErrorEvent,
TextDeltaEvent,
ToolCallEvent,
)
content = ""
tool_calls: list[dict[str, Any]] = []
input_tokens = 0
output_tokens = 0
stop_reason = ""
model = self.model
async for event in stream:
if isinstance(event, TextDeltaEvent):
content = event.snapshot # snapshot is the accumulated text
elif isinstance(event, ToolCallEvent):
tool_calls.append(
{
"id": event.tool_use_id,
"name": event.tool_name,
"input": event.tool_input,
}
)
elif isinstance(event, FinishEvent):
input_tokens = event.input_tokens
output_tokens = event.output_tokens
stop_reason = event.stop_reason
if event.model:
model = event.model
elif isinstance(event, StreamErrorEvent):
if not event.recoverable:
raise RuntimeError(f"Stream error: {event.error}")
return LLMResponse(
content=content,
model=model,
input_tokens=input_tokens,
output_tokens=output_tokens,
stop_reason=stop_reason,
raw_response={"tool_calls": tool_calls} if tool_calls else None,
)
+2 -56
View File
@@ -2,10 +2,10 @@
import json
import re
from collections.abc import AsyncIterator, Callable
from collections.abc import AsyncIterator
from typing import Any
from framework.llm.provider import LLMProvider, LLMResponse, Tool, ToolResult, ToolUse
from framework.llm.provider import LLMProvider, LLMResponse, Tool
from framework.llm.stream_events import (
FinishEvent,
StreamEvent,
@@ -146,43 +146,6 @@ class MockLLMProvider(LLMProvider):
stop_reason="mock_complete",
)
def complete_with_tools(
self,
messages: list[dict[str, Any]],
system: str,
tools: list[Tool],
tool_executor: Callable[[ToolUse], ToolResult],
max_iterations: int = 10,
) -> LLMResponse:
"""
Generate a mock completion without tool use.
In mock mode, we skip tool execution and return a final response immediately.
Args:
messages: Initial conversation (ignored in mock mode)
system: System prompt (used to extract expected output keys)
tools: Available tools (ignored in mock mode)
tool_executor: Tool executor function (ignored in mock mode)
max_iterations: Max iterations (ignored in mock mode)
Returns:
LLMResponse with mock content
"""
# In mock mode, we don't execute tools - just return a final response
# Try to generate JSON if the system prompt suggests structured output
json_mode = "json" in system.lower() or "output_keys" in system.lower()
content = self._generate_mock_response(system=system, json_mode=json_mode)
return LLMResponse(
content=content,
model=self.model,
input_tokens=0,
output_tokens=0,
stop_reason="mock_complete",
)
async def acomplete(
self,
messages: list[dict[str, Any]],
@@ -204,23 +167,6 @@ class MockLLMProvider(LLMProvider):
max_retries=max_retries,
)
async def acomplete_with_tools(
self,
messages: list[dict[str, Any]],
system: str,
tools: list[Tool],
tool_executor: Callable[[ToolUse], ToolResult],
max_iterations: int = 10,
) -> LLMResponse:
"""Async mock tool-use completion (no I/O, returns immediately)."""
return self.complete_with_tools(
messages=messages,
system=system,
tools=tools,
tool_executor=tool_executor,
max_iterations=max_iterations,
)
async def stream(
self,
messages: list[dict[str, Any]],
+1 -51
View File
@@ -2,7 +2,7 @@
import asyncio
from abc import ABC, abstractmethod
from collections.abc import AsyncIterator, Callable
from collections.abc import AsyncIterator
from dataclasses import dataclass, field
from functools import partial
from typing import Any
@@ -90,30 +90,6 @@ class LLMProvider(ABC):
"""
pass
@abstractmethod
def complete_with_tools(
self,
messages: list[dict[str, Any]],
system: str,
tools: list[Tool],
tool_executor: Callable[["ToolUse"], "ToolResult"],
max_iterations: int = 10,
) -> LLMResponse:
"""
Run a tool-use loop until the LLM produces a final response.
Args:
messages: Initial conversation
system: System prompt
tools: Available tools
tool_executor: Function to execute tools: (ToolUse) -> ToolResult
max_iterations: Max tool calls before stopping
Returns:
Final LLMResponse after tool use completes
"""
pass
async def acomplete(
self,
messages: list[dict[str, Any]],
@@ -144,32 +120,6 @@ class LLMProvider(ABC):
),
)
async def acomplete_with_tools(
self,
messages: list[dict[str, Any]],
system: str,
tools: list["Tool"],
tool_executor: Callable[["ToolUse"], "ToolResult"],
max_iterations: int = 10,
) -> "LLMResponse":
"""Async version of complete_with_tools(). Non-blocking on the event loop.
Default implementation offloads the sync complete_with_tools() to a thread pool.
Subclasses SHOULD override for native async I/O.
"""
loop = asyncio.get_running_loop()
return await loop.run_in_executor(
None,
partial(
self.complete_with_tools,
messages=messages,
system=system,
tools=tools,
tool_executor=tool_executor,
max_iterations=max_iterations,
),
)
async def stream(
self,
messages: list[dict[str, Any]],

Some files were not shown because too many files have changed in this diff Show More