Compare commits

...

850 Commits

Author SHA1 Message Date
bryan 0b8ed521c0 fix:update pause in pipeline uses stop_worker like queen 2026-03-04 13:42:20 -08:00
Timothy @aden 219e603de6 Merge pull request #5813 from aden-hive/refactor/quickstart
Refactor/quickstart
2026-03-04 12:27:45 -08:00
Timothy @aden 7663a5bce8 Merge pull request #5797 from Waryjustice/fix/windows-browser-auto-open
fix: browser auto-open after quickstart does not work on Windows
2026-03-04 12:27:35 -08:00
Timothy f2841b945d chore: lint 2026-03-04 12:24:08 -08:00
bryan faff64c413 chore: agents.md update 2026-03-04 12:12:27 -08:00
Timothy 6fbcdc1d87 fix: auto install node 20 2026-03-04 12:11:29 -08:00
bryan 69a11af949 chore: best effort alignment of windows quickstart 2026-03-04 11:43:50 -08:00
bryan 9ef272020e chore: added llm key health check 2026-03-04 11:35:12 -08:00
bryan 258cfe7de5 chore: added easy way to update llm provider key 2026-03-04 10:42:57 -08:00
bryan 0d53b21133 chore: doc updates about hive open 2026-03-04 10:33:34 -08:00
bryan 0ccb28ffab fix: enter to use previously configured 2026-03-04 10:05:59 -08:00
bryan b30b571b44 chore: update recommended models 2026-03-04 09:54:29 -08:00
bryan bc44c3a401 chore: make gcu enabled by default 2026-03-04 09:52:42 -08:00
bryan 7fbf57cbb7 fix: linter update 2026-03-04 09:52:16 -08:00
bryan 67d094f51a fix: tool tests 2026-03-04 09:22:34 -08:00
bryan 873af04c6e fix: utilize mac keychain for claude code subscription 2026-03-04 09:22:12 -08:00
Shaurya Singh 2f0439dca8 Merge branch 'main' into fix/windows-browser-auto-open 2026-03-04 22:50:39 +05:30
bryan 1920192656 feat: hive open cmd 2026-03-04 08:55:18 -08:00
Waryjustice f56feaf821 fix: browser auto-open after quickstart does not work on Windows 2026-03-04 22:12:53 +05:30
Timothy @aden 4cbd5a4c6c Merge pull request #5786 from osb910/fix/charmap-decode-error
fix(core): add utf-8 encoding to backend open calls (micro-fix)
2026-03-04 08:39:10 -08:00
Timothy 65aa5629e8 chore: fix lint 2026-03-04 08:34:01 -08:00
Omar Shareef 7193d09bed formatting warning fix 2026-03-04 16:43:46 +02:00
Omar Shareef 49f8fae0b4 fix: systematically enforce UTF-8 encoding across tools and core to fix Windows charmap decode errors 2026-03-04 16:04:53 +02:00
Omar Shareef e1a490756e fix: systematically enforce UTF-8 encoding across tools and core to fix Windows charmap decode errors 2026-03-04 15:58:03 +02:00
Omar Shareef 91bfaf36e3 fix(core): add utf-8 encoding to backend open calls
This fixes a charmap decoding error on Windows when opening agent files without explicitly specifying the encoding.
2026-03-04 13:32:59 +02:00
Timothy @aden 465adf5b1f Merge pull request #5767 from aden-hive/feat/integrations
Feat/integrations
2026-03-03 22:04:08 -08:00
RichardTang-Aden 132d00d166 Merge pull request #5769 from aden-hive/queen-mode-separation
Release / Create Release (push) Waiting to run
Queen mode separation: building, staging, and running modes
2026-03-03 21:31:23 -08:00
Richard Tang a604fee3aa chore: mode label update 2026-03-03 20:47:35 -08:00
Timothy 8018325923 style: fix all ruff lint errors (E501, E722, E741, F841)
- Break long lines (E501) across 25+ files
- Replace bare except with except Exception (E722)
- Rename ambiguous variable `l` to `item` (E741)
- Prefix unused variables with underscore (F841)
2026-03-03 20:42:30 -08:00
Richard Tang 3f86bd4009 chore: lint fix 2026-03-03 20:39:04 -08:00
Timothy b4cf10214b chore: lint issues 2026-03-03 20:38:30 -08:00
Bryan @ Aden c7818c2c33 Merge pull request #5766 from aden-hive/fix/credential-modal-delete
(micro-fix): Fix/credential modal delete
2026-03-04 04:38:23 +00:00
Timothy e421bcc326 chore: lint issues 2026-03-03 20:36:28 -08:00
Richard Tang 09e5a4dcc0 chore: frontend verbrige 2026-03-03 20:31:26 -08:00
Richard Tang ce08c44235 feat: improve ui indicator 2026-03-03 20:28:32 -08:00
Richard Tang e743234324 fix: strenghthen prompt to collect user intent 2026-03-03 20:23:53 -08:00
Timothy 9b76ac48b7 chore: new depedency 2026-03-03 20:23:10 -08:00
Richard Tang 6ae16345a8 fix: reference err from merging 2026-03-03 20:15:37 -08:00
Richard Tang 8daaf000b1 Merge remote-tracking branch 'origin/feat/question-widget' into queen-mode-separation 2026-03-03 20:09:10 -08:00
Richard Tang 273f411eee feat: replace the reload agent to stop worker 2026-03-03 20:01:27 -08:00
Richard Tang 6929cecf8a fix: tag for frontend 2026-03-03 19:53:18 -08:00
Richard Tang 9221a7ff03 Merge remote-tracking branch 'origin/queen-mode-separation' into queen-mode-separation 2026-03-03 19:43:33 -08:00
Richard Tang a6089c5b3b feat: returning queen bee status when starting session 2026-03-03 19:43:04 -08:00
Richard Tang a7ee972b32 feat: enable the frontend to cancel the current queen run and sync queen mode 2026-03-03 19:30:55 -08:00
Richard Tang c817989b99 feat: allow frontend change to control mode 2026-03-03 19:29:33 -08:00
Richard Tang 2272a6854c refactor: consolidate discorver_mcp_tools and list_agent_tools 2026-03-03 19:08:58 -08:00
Timothy 040fc1ee8d feat: corrected agent generation guidelines 2026-03-03 18:53:40 -08:00
Richard Tang f00b8d7b8c fix: update the initial state condition 2026-03-03 18:35:24 -08:00
Timothy @aden 6c8c6d7048 Merge pull request #5234 from Antiarin/fix/guardian-self-trigger-loop
fix(tui): fix pause/stop to cancel all running tasks across all graphs
2026-03-03 18:17:15 -08:00
Richard Tang f27ef52c7a feat: update queen initial state 2026-03-03 18:15:51 -08:00
Richard Tang 0a2ff1db97 feat: new queen stages and tools 2026-03-03 18:07:47 -08:00
Timothy 6da48eac6f feat: split tool loading into verified and unverified tiers
register_all_tools() now only loads verified (stable) tools by default.
Pass include_unverified=True to also load new/community integrations.
This prevents unverified tools from being loaded in production.

Also fixes duplicate register_brevo and register_pushover calls.
2026-03-03 17:54:45 -08:00
Timothy 638ff04e24 fix: remove duplicate community tool directories and fix credential wiring
- Remove s3_tool (duplicate of aws_s3_tool), power_bi_tool (duplicate of
  powerbi_tool), x_tool (duplicate of twitter_tool)
- Remove integrations/plaid (duplicate of plaid_tool), integrations/sap_s4hana
  (duplicate of sap_tool), stray tools/mssql.py
- Add help key to credential error responses across 14 tool modules
- Fix health checker registry keys (calendly -> calendly_pat, lusha -> lusha_api_key)
- Add health_check_endpoint to calendly and lusha credential specs
- Fix Trello env var (TRELLO_TOKEN -> TRELLO_API_TOKEN) and remove duplicate
  Trello specs from hubspot.py
- Add credential_group="aws" to AWS S3 and Redshift specs sharing env vars
- Update conftest UNREGISTERED_COMMUNITY_MODULES to only contain mssql_tool
2026-03-03 17:46:28 -08:00
Timothy d7075b459b fix: cleanse llm conversations 2026-03-03 17:44:21 -08:00
bryan d0e7aa14b6 fix: hide delete button for Aden-managed credentials 2026-03-03 17:36:04 -08:00
bryan 59fee56c54 fix: share server credential store with runner to avoid redundant Aden syncs 2026-03-03 17:35:24 -08:00
bryan 2207306169 fix: resolve MCP server cwd from repo root instead of agent path 2026-03-03 17:34:51 -08:00
Richard Tang 8ff2e91f2d feat: add queen agent building and running mode switching 2026-03-03 16:01:41 -08:00
Richard Tang 61afaa4c8b feat: add uv instruction to agents 2026-03-03 14:51:58 -08:00
Richard Tang 0de47dbc3f feat: agents.md for agent collaboration 2026-03-03 14:51:58 -08:00
Richard Tang 676ef56134 fix: mcp path 2026-03-03 14:51:58 -08:00
Richard Tang f0899bb35d feat: use send instead of draft for email reply agent 2026-03-03 14:51:58 -08:00
Richard Tang f490038e36 chore: move the email reply sample agent 2026-03-03 14:51:58 -08:00
Richard Tang cbf220eb00 feat: email reply sample agent 2026-03-03 14:51:58 -08:00
Richard Tang bf0d80ea20 docs: reorder section in documentation 2026-03-03 14:51:58 -08:00
Richard Tang 3ae889a6f8 docs: add running screenshot and update the coding agent instruction 2026-03-03 14:51:58 -08:00
Richard Tang 03ca1067ac docs: sync all i18n READMEs with primary README 2026-03-03 14:51:58 -08:00
Richard Tang 3cda30a40a docs: update the latest features from recent changes 2026-03-03 14:51:58 -08:00
Richard Tang 26934527b9 docs: update readme instructions 2026-03-03 14:51:58 -08:00
Richard Tang 2619acde22 docs: remove TUI in the readme 2026-03-03 14:51:58 -08:00
Richard Tang b983d3cfd2 chore: ignore local dev skills 2026-03-03 14:51:58 -08:00
Richard Tang 87a9dd15fe fix: load-new-session from home 2026-03-03 14:51:58 -08:00
RichardTang-Aden 4066962ade Merge pull request #5751 from aden-hive/load-new-session-from-home
Fix new session from home and add email reply agent template
2026-03-03 14:48:17 -08:00
Richard Tang 0f26e34f09 fix: improve the reply template 2026-03-03 14:45:07 -08:00
Richard Tang d76e436e3d fix: new session should have their own id 2026-03-03 14:44:51 -08:00
Timothy 4ff531dec7 fix: update expected health checkers set (add calendly, zoho_crm) 2026-03-03 14:10:34 -08:00
Timothy 4f8b3d7aff fix: update credential specs for community Linear/Trello tools, skip unregistered community modules 2026-03-03 14:09:04 -08:00
Timothy 210fa9c474 fix: use community Brevo implementation (6 tools), remove orphaned x_tool test 2026-03-03 14:06:00 -08:00
Timothy 25361cac8c fix: align tests with community implementations, revert Reddit to httpx (praw unavailable) 2026-03-03 14:02:33 -08:00
Timothy 28defebd6d fix: remove community youtube_transcript tool.py requiring uninstalled SDK 2026-03-03 13:58:45 -08:00
Timothy c74381619e Merge branch 'feature/queen-worker-comm' into feat/question-widget 2026-03-03 13:57:52 -08:00
Timothy d58f3103dd fix: guard register_tools for s3_tool and mssql_tool when SDK not available 2026-03-03 13:54:46 -08:00
Timothy 5d1ed35660 fix: remove shell heredoc artifacts from community power_bi_tool 2026-03-03 13:52:20 -08:00
Timothy 1f3e305534 fix: guard optional SDK imports (boto3, pyodbc) and remove s3_tool registration 2026-03-03 13:51:04 -08:00
Timothy 7d8fdd279c fix: revert Asana to httpx-based implementation (asana SDK not available) 2026-03-03 13:33:35 -08:00
Timothy cacae9f290 fix: compaction logics 2026-03-03 13:33:01 -08:00
Timothy bb061b770f merge: incorporate QuickBooks community PR #4158
# Conflicts:
#	examples/templates/deep_research_agent/config.py
#	examples/templates/tech_news_reporter/config.py
#	tools/README.md
#	tools/src/aden_tools/credentials/__init__.py
#	tools/src/aden_tools/credentials/quickbooks.py
#	tools/src/aden_tools/tools/__init__.py
#	tools/src/aden_tools/tools/quickbooks_tool/__init__.py
#	tools/src/aden_tools/tools/quickbooks_tool/quickbooks_tool.py
#	tools/tests/tools/test_quickbooks_tool.py
2026-03-03 13:27:04 -08:00
Timothy a8768b9ed6 merge: incorporate MSSQL community PR #4200
# Conflicts:
#	tools/pyproject.toml
#	tools/src/aden_tools/credentials/integrations.py
#	tools/src/aden_tools/tools/__init__.py
2026-03-03 13:26:36 -08:00
Timothy b437aa5f6c merge: incorporate Linear community PR #3585
# Conflicts:
#	.claude/skills/hive-credentials/SKILL.md
#	tools/README.md
#	tools/src/aden_tools/tools/__init__.py
#	tools/src/aden_tools/tools/linear_tool/__init__.py
#	tools/src/aden_tools/tools/linear_tool/linear_tool.py
2026-03-03 13:24:57 -08:00
Timothy 9248182570 merge: incorporate Trello community PR #3376
# Conflicts:
#	tools/README.md
#	tools/src/aden_tools/tools/__init__.py
#	tools/src/aden_tools/tools/trello_tool/__init__.py
#	tools/src/aden_tools/tools/trello_tool/trello_tool.py
#	tools/tests/tools/test_trello_tool.py
2026-03-03 13:24:23 -08:00
bryan 511c1a6ed5 fix: update queen prompt around ask_user 2026-03-03 13:22:59 -08:00
Timothy 7c77c7170f merge: incorporate YouTube Transcript community PR #3520
# Conflicts:
#	tools/pyproject.toml
#	tools/src/aden_tools/tools/__init__.py
2026-03-03 13:22:46 -08:00
Timothy 85fcb6516c merge: incorporate Redshift community PR #3533
# Conflicts:
#	tools/pyproject.toml
#	tools/src/aden_tools/tools/__init__.py
#	tools/src/aden_tools/tools/redshift_tool/__init__.py
#	tools/src/aden_tools/tools/redshift_tool/redshift_tool.py
#	tools/tests/tools/test_redshift_tool.py
2026-03-03 13:17:41 -08:00
Timothy e8e76d85f7 merge: incorporate Pushover community PR #5424
# Conflicts:
#	tools/src/aden_tools/tools/pushover_tool/__init__.py
#	tools/src/aden_tools/tools/pushover_tool/pushover_tool.py
2026-03-03 13:17:18 -08:00
Timothy 5aaa5ae4d5 merge: incorporate Twitter/X community PR #3807
# Conflicts:
#	tools/src/aden_tools/credentials/__init__.py
#	tools/src/aden_tools/tools/__init__.py
#	tools/tests/test_credentials.py
2026-03-03 13:16:45 -08:00
Timothy c3a8ee9c7b merge: incorporate Calendly community PR #3947
# Conflicts:
#	tools/src/aden_tools/credentials/__init__.py
#	tools/src/aden_tools/credentials/calendly.py
#	tools/src/aden_tools/tools/__init__.py
#	tools/src/aden_tools/tools/calendly_tool/__init__.py
#	tools/src/aden_tools/tools/calendly_tool/calendly_tool.py
#	tools/tests/test_health_checks.py
#	tools/tests/tools/test_calendly_tool.py
2026-03-03 13:14:20 -08:00
Timothy 5d07a8aba5 merge: incorporate Airtable community PR #3953
# Conflicts:
#	tools/src/aden_tools/credentials/__init__.py
#	tools/src/aden_tools/credentials/airtable.py
#	tools/src/aden_tools/credentials/health_check.py
#	tools/src/aden_tools/tools/__init__.py
#	tools/src/aden_tools/tools/airtable_tool/__init__.py
#	tools/src/aden_tools/tools/airtable_tool/airtable_tool.py
#	tools/tests/test_health_checks.py
#	tools/tests/tools/test_airtable_tool.py
2026-03-03 13:13:47 -08:00
Timothy d18e0594b8 merge: incorporate Reddit community PR #3963
# Conflicts:
#	tools/pyproject.toml
#	tools/src/aden_tools/credentials/__init__.py
#	tools/src/aden_tools/credentials/health_check.py
#	tools/src/aden_tools/credentials/reddit.py
#	tools/src/aden_tools/tools/__init__.py
#	tools/src/aden_tools/tools/reddit_tool/__init__.py
#	tools/src/aden_tools/tools/reddit_tool/reddit_tool.py
#	tools/tests/tools/test_reddit_tool.py
#	uv.lock
2026-03-03 13:12:55 -08:00
Timothy 26dcc86a24 merge: incorporate Zoho CRM community PR #4713
# Conflicts:
#	tools/src/aden_tools/credentials/__init__.py
#	tools/src/aden_tools/tools/__init__.py
#	tools/src/aden_tools/tools/zoho_crm_tool/__init__.py
#	tools/src/aden_tools/tools/zoho_crm_tool/zoho_crm_tool.py
#	tools/tests/test_health_checks.py
2026-03-03 13:11:51 -08:00
Timothy e928ad19e5 merge: incorporate Lusha community PR #4714
# Conflicts:
#	tools/src/aden_tools/credentials/__init__.py
#	tools/src/aden_tools/credentials/lusha.py
#	tools/src/aden_tools/tools/__init__.py
#	tools/src/aden_tools/tools/lusha_tool/__init__.py
#	tools/src/aden_tools/tools/lusha_tool/lusha_tool.py
#	tools/tests/tools/test_lusha_tool.py
2026-03-03 13:11:33 -08:00
Timothy 6768aaa575 merge: incorporate Apify community PR #4770
# Conflicts:
#	tools/src/aden_tools/credentials/__init__.py
#	tools/src/aden_tools/credentials/apify.py
#	tools/src/aden_tools/tools/__init__.py
#	tools/src/aden_tools/tools/apify_tool/__init__.py
#	tools/src/aden_tools/tools/apify_tool/apify_tool.py
#	tools/tests/tools/test_apify_tool.py
2026-03-03 13:10:45 -08:00
Timothy f561aacbfc merge: incorporate Attio community PR #4832
# Conflicts:
#	tools/src/aden_tools/credentials/__init__.py
#	tools/src/aden_tools/credentials/attio.py
#	tools/src/aden_tools/tools/__init__.py
#	tools/src/aden_tools/tools/attio_tool/__init__.py
#	tools/src/aden_tools/tools/attio_tool/attio_tool.py
2026-03-03 13:10:09 -08:00
RichardTang-Aden af1ece40c2 Merge pull request #5742 from aden-hive/load-new-session-from-home
Load new session from home
2026-03-03 13:09:44 -08:00
Timothy d9edd7adf7 merge: incorporate Asana community PR #4857
# Conflicts:
#	tools/src/aden_tools/credentials/__init__.py
#	tools/src/aden_tools/credentials/asana.py
#	tools/src/aden_tools/tools/__init__.py
#	tools/src/aden_tools/tools/asana_tool/__init__.py
#	tools/tests/tools/test_asana_tool.py
2026-03-03 13:08:30 -08:00
Richard Tang 3541fab363 feat: add uv instruction to agents 2026-03-03 13:06:50 -08:00
Richard Tang 1160dceeff feat: agents.md for agent collaboration 2026-03-03 13:06:09 -08:00
bryan bbe8efeba2 fix: prevent queen auto-block from overwriting pending worker questions 2026-03-03 13:04:33 -08:00
Timothy b4a5323009 merge: incorporate Brevo community PR #5136
# Conflicts:
#	tools/src/aden_tools/credentials/__init__.py
#	tools/src/aden_tools/credentials/brevo.py
#	tools/src/aden_tools/tools/brevo_tool/__init__.py
#	tools/src/aden_tools/tools/brevo_tool/brevo_tool.py
2026-03-03 13:04:29 -08:00
Timothy ade8b5b9a7 merge: incorporate Databricks community PR #5428
# Conflicts:
#	tools/src/aden_tools/credentials/__init__.py
#	tools/src/aden_tools/credentials/databricks.py
#	tools/src/aden_tools/tools/__init__.py
#	tools/src/aden_tools/tools/databricks_tool/__init__.py
#	tools/src/aden_tools/tools/databricks_tool/databricks_tool.py
#	tools/tests/tools/test_databricks_tool.py
2026-03-03 13:02:30 -08:00
Timothy e4ace3d484 merge: incorporate YouTube community PR #5673 (resolve conflicts, preserve README) 2026-03-03 12:29:32 -08:00
Timothy f3dd25adc5 merge: incorporate Power BI community PR #4341 2026-03-03 12:27:06 -08:00
Timothy ec251f8168 merge: incorporate SAP S/4HANA community PR #5519 2026-03-03 12:27:02 -08:00
Timothy 1bb9579dc5 merge: incorporate Plaid community PR #5518 2026-03-03 12:26:56 -08:00
Timothy 7ebf4146ce merge: incorporate AWS S3 community PR #5521 2026-03-03 12:26:50 -08:00
Richard Tang a8db4cb2f5 fix: mcp path 2026-03-03 12:19:32 -08:00
Richard Tang 24433396dd feat: use send instead of draft for email reply agent 2026-03-03 12:04:44 -08:00
Richard Tang 02bdf17641 chore: move the email reply sample agent 2026-03-03 11:59:14 -08:00
Timothy e0e05f3488 chore: register Obsidian tool in tool/credential registries 2026-03-03 11:55:12 -08:00
Timothy c92f2510c8 test: add Obsidian tool unit tests (read, write, append, search, list, active) 2026-03-03 11:55:12 -08:00
Timothy ea1fbe9ee1 chore: add Obsidian credential spec (REST API key) 2026-03-03 11:55:11 -08:00
Timothy 84a0be0179 feat: add Obsidian knowledge management integration (#3741)
6 tools: obsidian_read_note, obsidian_write_note, obsidian_append_note,
obsidian_search, obsidian_list_files, obsidian_get_active.
Uses Local REST API plugin with Bearer token auth. Supports vault
browsing, full-text search, and note CRUD with frontmatter metadata.
2026-03-03 11:55:04 -08:00
RichardTang-Aden 54f5c0dc91 Merge pull request #5735 from aden-hive/docs/readme/v6
docs: reorder section in documentation
2026-03-03 11:54:09 -08:00
Richard Tang adf1a10318 docs: reorder section in documentation 2026-03-03 11:53:05 -08:00
RichardTang-Aden e2a679a265 Merge pull request #5734 from aden-hive/docs/readme/v6
docs: add running screenshot and update the coding agent instruction
2026-03-03 11:50:56 -08:00
Richard Tang a3916a6932 docs: add running screenshot and update the coding agent instruction 2026-03-03 11:49:19 -08:00
Timothy 1b5780461e chore: register Langfuse tool in tool/credential registries 2026-03-03 11:42:49 -08:00
Timothy c8d35b63a4 test: add Langfuse tool unit tests (traces, scores, prompts) 2026-03-03 11:42:49 -08:00
Timothy feb1ebae04 chore: add Langfuse credential specs (public key, secret key) 2026-03-03 11:42:48 -08:00
Timothy efe49d0a5b feat: add Langfuse LLM observability integration (#5322)
6 tools: langfuse_list_traces, langfuse_get_trace, langfuse_list_scores,
langfuse_create_score, langfuse_list_prompts, langfuse_get_prompt.
Uses HTTP Basic Auth with public/secret key pair. Supports cloud and
self-hosted instances with offset-based pagination.
2026-03-03 11:41:11 -08:00
Timothy e50a5ea22a chore: register Zoom and n8n tools in tool/credential registries 2026-03-03 11:31:25 -08:00
Timothy 6382c94d0a test: add n8n tool unit tests (workflows, executions, activate/deactivate) 2026-03-03 11:31:21 -08:00
Timothy 58ce84c9cc chore: add n8n credential specs (API key, base URL) 2026-03-03 11:31:20 -08:00
Timothy 08fd6ff765 feat: add n8n workflow automation integration (#2931)
6 tools: n8n_list_workflows, n8n_get_workflow, n8n_activate_workflow,
n8n_deactivate_workflow, n8n_list_executions, n8n_get_execution.
Uses X-N8N-API-KEY header auth with configurable base URL.
Supports cursor-based pagination and execution status filtering.
2026-03-03 11:31:15 -08:00
Timothy a9cb79909c test: add Zoom tool unit tests (user, meetings, recordings) 2026-03-03 11:31:07 -08:00
Timothy 852f8ccd94 chore: add Zoom credential spec (Server-to-Server OAuth token) 2026-03-03 11:31:07 -08:00
Timothy 9388ef3e99 feat: add Zoom meeting management integration (#2867)
6 tools: zoom_get_user, zoom_list_meetings, zoom_get_meeting,
zoom_create_meeting, zoom_delete_meeting, zoom_list_recordings.
Uses Server-to-Server OAuth Bearer token. Supports token-based
pagination and cloud recording retrieval by date range.
2026-03-03 11:31:00 -08:00
Timothy 04afb0c4bb chore: register Salesforce and Shopify tools in tool/credential registries 2026-03-03 11:22:40 -08:00
Timothy a07fd44de3 test: add Shopify tool unit tests (orders, products, customers, search) 2026-03-03 11:22:35 -08:00
Timothy f6c1b13846 chore: add Shopify credential specs (access token, store name) 2026-03-03 11:22:35 -08:00
Timothy 654fa3dd1f feat: add Shopify Admin REST API integration - orders, products, customers (#2984)
6 tools: shopify_list_orders, shopify_get_order, shopify_list_products,
shopify_get_product, shopify_list_customers, shopify_search_customers.
Uses X-Shopify-Access-Token header auth with store subdomain.
2026-03-03 11:22:29 -08:00
Timothy 8183449d27 test: add Salesforce CRM tool unit tests (SOQL, CRUD, describe, list objects) 2026-03-03 11:22:16 -08:00
Timothy a9acfb86ad chore: add Salesforce credential specs (access token, instance URL) 2026-03-03 11:22:15 -08:00
Timothy d7d070ac5f feat: add Salesforce CRM integration - SOQL, records, and metadata (#2916)
6 tools: salesforce_soql_query, salesforce_get_record, salesforce_create_record,
salesforce_update_record, salesforce_describe_object, salesforce_list_objects.
Uses OAuth2 Bearer token auth with instance URL. Supports pagination via
nextRecordsUrl and field-level describe with picklist values.
2026-03-03 11:22:08 -08:00
RichardTang-Aden ead51f1eb6 Merge pull request #5732 from aden-hive/docs/readme/v6
docs: update README and sync all i18n translations
2026-03-03 11:19:06 -08:00
Timothy 8c01b573ce chore: register Redshift and SAP S/4HANA in tool/credential registries 2026-03-03 11:11:12 -08:00
Timothy 7744f21b9d test: add SAP S/4HANA tool unit tests (POs, partners, products, sales orders) 2026-03-03 11:11:08 -08:00
Timothy 9ed23a235f chore: add SAP S/4HANA credential specs (base URL, username, password) 2026-03-03 11:11:07 -08:00
Timothy e88328321f feat: add SAP S/4HANA Cloud read-only procurement integration (#3182) 2026-03-03 11:11:06 -08:00
Timothy a4c516bea1 test: add Redshift tool unit tests (execute, describe, results, databases, tables) 2026-03-03 11:11:00 -08:00
Timothy 1c932a04ef chore: add Redshift credential specs (AWS access key, secret key) 2026-03-03 11:11:00 -08:00
Timothy 76d34be4c2 feat: add Amazon Redshift Data API integration - SQL and schema browsing (#3267) 2026-03-03 11:10:59 -08:00
bryan cb0e9ff9ec chore: fixing tests 2026-03-03 11:07:49 -08:00
Timothy d6e8afe316 chore: register Azure SQL and Kafka in tool/credential registries 2026-03-03 11:03:31 -08:00
Timothy a04f2bcf99 test: add Kafka tool unit tests (topics, produce, consumer groups) 2026-03-03 11:03:27 -08:00
Timothy c138e7c638 chore: add Kafka credential specs (REST URL, cluster ID) 2026-03-03 11:03:27 -08:00
Timothy fc08c7007f feat: add Apache Kafka integration via Confluent REST Proxy (#4774) 2026-03-03 11:03:26 -08:00
Timothy d559bb3446 test: add Azure SQL tool unit tests (servers, databases, firewall rules) 2026-03-03 11:03:18 -08:00
Timothy 55a8c39e4b chore: add Azure SQL credential specs (token, subscription ID) 2026-03-03 11:03:17 -08:00
Timothy 02d6f10e5f feat: add Azure SQL Database management integration (#3377) 2026-03-03 11:03:16 -08:00
Timothy 77428a91cc chore: register Power BI and Snowflake in tool/credential registries 2026-03-03 10:56:46 -08:00
Timothy 51403dc276 test: add Snowflake tool unit tests (execute, status, cancel) 2026-03-03 10:56:43 -08:00
Timothy 914a07a35d chore: add Snowflake credential specs (account, token) 2026-03-03 10:56:42 -08:00
Timothy 3c70d7b424 feat: add Snowflake SQL REST API integration (#3230) 2026-03-03 10:56:41 -08:00
Timothy ce1ee4ff17 test: add Power BI tool unit tests (workspaces, datasets, reports, refresh) 2026-03-03 10:56:35 -08:00
Timothy fca41d9bda chore: add Power BI credential spec (POWERBI_ACCESS_TOKEN) 2026-03-03 10:56:34 -08:00
Timothy ff889e02f7 feat: add Power BI integration - workspaces, datasets, reports (#3973) 2026-03-03 10:56:34 -08:00
Richard Tang cbd2c86bbf docs: sync all i18n READMEs with primary README 2026-03-03 10:53:11 -08:00
Timothy 43ab460462 chore: register Terraform Cloud and Lusha in tool/credential registries 2026-03-03 10:49:21 -08:00
Timothy caa06e266b test: add Lusha tool unit tests (enrich, search, usage) 2026-03-03 10:49:17 -08:00
Timothy 3622ca78ee chore: add Lusha credential spec (LUSHA_API_KEY) 2026-03-03 10:49:17 -08:00
Timothy 019e3f9659 feat: add Lusha B2B contact and company enrichment integration (#3461) 2026-03-03 10:49:16 -08:00
Timothy 208cb579a2 test: add Terraform Cloud tool unit tests (workspaces, runs) 2026-03-03 10:49:09 -08:00
Timothy 17de7e4485 chore: add Terraform Cloud credential spec (TFC_TOKEN) 2026-03-03 10:49:08 -08:00
Timothy 810616eee1 feat: add Terraform Cloud integration - workspaces and runs (#4773) 2026-03-03 10:48:41 -08:00
Timothy 191f583669 chore: register Twitter/X and Tines in tool/credential registries 2026-03-03 10:35:46 -08:00
Timothy 1d638cc18e test: add Tines tool unit tests (stories, actions, logs) 2026-03-03 10:35:42 -08:00
Timothy 3efa1f3b88 chore: add Tines credential specs (domain, api_key) 2026-03-03 10:35:42 -08:00
Timothy 4daa33db09 feat: add Tines integration - security automation stories and actions
Implements 5 tools via Tines REST API:
- tines_list_stories: List workflow stories with search/filter
- tines_get_story: Get story details including entry/exit agents
- tines_list_actions: List actions (agents) in stories
- tines_get_action: Get action details with sources/receivers
- tines_get_action_logs: Get action execution logs by level

Uses Bearer token auth with tenant domain.
2026-03-03 10:35:37 -08:00
Timothy fab2fb0056 test: add Twitter/X tool unit tests (search, user, timeline, tweet) 2026-03-03 10:35:29 -08:00
Timothy ce885c120e chore: add Twitter/X credential spec (bearer_token) 2026-03-03 10:35:28 -08:00
Timothy 75b53c47ff feat: add Twitter/X integration - tweet search and user lookup via API v2
Implements 4 tools via X API v2:
- twitter_search_tweets: Search recent tweets with query operators
- twitter_get_user: Get user profile by username
- twitter_get_user_tweets: Get user timeline
- twitter_get_tweet: Get tweet details by ID

Uses Bearer token auth (app-only, read access).
2026-03-03 10:35:21 -08:00
Timothy 2936f73707 chore: register AWS S3 and QuickBooks in tool/credential registries 2026-03-03 10:22:46 -08:00
Timothy e26426b138 test: add QuickBooks tool unit tests (query, entities, invoices) 2026-03-03 10:22:42 -08:00
Timothy 62cacb8e28 chore: add QuickBooks credential specs (access_token, realm_id) 2026-03-03 10:22:42 -08:00
Timothy f3e37190ce feat: add QuickBooks Online integration - accounting API
Implements 5 tools via QuickBooks Online API v3:
- quickbooks_query: Query entities with SQL-like syntax
- quickbooks_get_entity: Get entity by type and ID
- quickbooks_create_customer: Create customers
- quickbooks_create_invoice: Create invoices with line items
- quickbooks_get_company_info: Get company details

Uses OAuth 2.0 Bearer token auth. Supports sandbox mode.
2026-03-03 10:22:35 -08:00
Timothy 0863bbbd2f test: add AWS S3 tool unit tests (buckets, objects, get, put, delete) 2026-03-03 10:22:25 -08:00
Timothy b23fa1daad chore: add AWS S3 credential specs (access_key_id, secret_access_key) 2026-03-03 10:22:24 -08:00
Timothy 05cc1ce599 feat: add AWS S3 integration - object storage via REST API with SigV4
Implements 5 tools via AWS S3 REST API:
- s3_list_buckets: List all buckets in the account
- s3_list_objects: List objects with prefix/delimiter filtering
- s3_get_object: Get object content and metadata
- s3_put_object: Upload text objects
- s3_delete_object: Delete objects

Uses AWS Signature V4 signing (no boto3 dependency).
2026-03-03 10:22:16 -08:00
RichardTang-Aden a1c045fd91 Merge pull request #5727 from aden-hive/docs/readme/v6
Docs: Remove TUI references from README
2026-03-03 10:14:13 -08:00
Timothy e6939f8d51 chore: register PagerDuty and Calendly in tool/credential registries 2026-03-03 10:13:18 -08:00
Timothy 801fef12e1 test: add Calendly tool unit tests (user, events, invitees) 2026-03-03 10:13:14 -08:00
Timothy 5845629175 chore: add Calendly credential spec (personal_access_token) 2026-03-03 10:13:13 -08:00
Timothy 11b916301a feat: add Calendly integration - scheduling events and invitees
Implements 5 tools via Calendly API v2:
- calendly_get_current_user: Get user URI and profile info
- calendly_list_event_types: List meeting templates
- calendly_list_scheduled_events: List booked meetings with date filters
- calendly_get_scheduled_event: Get event details by URI
- calendly_list_invitees: List invitees for an event

Uses Bearer token auth (Personal Access Token).
2026-03-03 10:13:07 -08:00
Timothy aa5d80b1d2 test: add PagerDuty tool unit tests (incidents, services) 2026-03-03 10:13:02 -08:00
Timothy aa5f990acd chore: add PagerDuty credential specs (api_key, from_email) 2026-03-03 10:13:01 -08:00
Timothy 9764c82c2a feat: add PagerDuty integration - incident management and services
Implements 5 tools via PagerDuty REST API v2:
- pagerduty_list_incidents: List incidents with status/urgency/date filters
- pagerduty_get_incident: Get incident details by ID
- pagerduty_create_incident: Create incidents on a service
- pagerduty_update_incident: Acknowledge or resolve incidents
- pagerduty_list_services: List services with name search

Uses Token auth header, From header for write operations.
2026-03-03 10:12:55 -08:00
Richard Tang f921846879 docs: update the latest features from recent changes 2026-03-03 10:12:43 -08:00
Richard Tang a370403b16 docs: update readme instructions 2026-03-03 10:06:13 -08:00
Timothy 543a71eb6c chore: register MongoDB and Airtable in tool/credential registries 2026-03-03 10:06:12 -08:00
Timothy 8285593c13 test: add Airtable tool unit tests (records, bases, schema) 2026-03-03 10:06:08 -08:00
Timothy 6fbfe773fb chore: add Airtable credential spec (personal_access_token) 2026-03-03 10:06:07 -08:00
Timothy a8c54b1e5f feat: add Airtable integration - record CRUD and base metadata
Implements 6 tools via Airtable Web API:
- airtable_list_records: List records with filters, sort, field selection
- airtable_get_record: Get a single record by ID
- airtable_create_records: Create up to 10 records per request
- airtable_update_records: Partial update up to 10 records per request
- airtable_list_bases: List accessible bases
- airtable_get_base_schema: Get table and field schema for a base

Uses Bearer token auth (Personal Access Token).
2026-03-03 10:06:03 -08:00
Timothy a5323abfca test: add MongoDB tool unit tests (find, insert, update, delete, aggregate) 2026-03-03 10:05:53 -08:00
Timothy ba4df2d2c4 chore: add MongoDB credential specs (data_api_url, api_key, data_source) 2026-03-03 10:05:52 -08:00
Timothy 6510633a8c feat: add MongoDB Atlas Data API integration - document CRUD and aggregation
Implements 6 tools via MongoDB Atlas Data API:
- mongodb_find: Find documents with filters, projection, sort, limit
- mongodb_find_one: Find a single document
- mongodb_insert_one: Insert a document
- mongodb_update_one: Update a document with MongoDB operators
- mongodb_delete_one: Delete a document
- mongodb_aggregate: Run aggregation pipelines

Uses API key auth header. All endpoints are POST.
2026-03-03 10:05:42 -08:00
Timothy 9172e5f46b chore: register Twilio and Zendesk in tool/credential registries 2026-03-03 09:56:14 -08:00
Timothy ed3e3848c0 test: add Zendesk tool unit tests (list, get, create, update, search) 2026-03-03 09:56:10 -08:00
Timothy ee90185d5c chore: add Zendesk credential specs (subdomain, email, api_token) 2026-03-03 09:56:09 -08:00
Timothy 6eb2633677 feat: add Zendesk integration - ticket management and search
Implements 5 tools via Zendesk Support API v2:
- zendesk_list_tickets: List tickets with status/sort filters
- zendesk_get_ticket: Get ticket details by ID
- zendesk_create_ticket: Create tickets with priority/type/tags
- zendesk_update_ticket: Update ticket fields and add comments
- zendesk_search_tickets: Search tickets with Zendesk query syntax

Uses Basic auth (email/token:api_token).
2026-03-03 09:56:00 -08:00
Timothy c1f215dcf2 test: add Twilio tool unit tests (SMS, WhatsApp, list, get) 2026-03-03 09:55:50 -08:00
Timothy 97cc9a1045 chore: add Twilio credential specs (account_sid, auth_token) 2026-03-03 09:55:49 -08:00
Timothy 5f7b02a4b7 feat: add Twilio integration - SMS and WhatsApp messaging
Implements 4 tools via Twilio REST API:
- twilio_send_sms: Send SMS messages
- twilio_send_whatsapp: Send WhatsApp messages
- twilio_list_messages: List message history with filters
- twilio_get_message: Get message details by SID

Uses Basic auth (AccountSID:AuthToken), form-urlencoded POST.
2026-03-03 09:55:43 -08:00
Richard Tang ad6d504ea4 docs: remove TUI in the readme 2026-03-03 09:52:06 -08:00
Timothy e696b41a0e chore: register GitLab and Google Sheets in tool/credential registries 2026-03-03 09:49:23 -08:00
Timothy 1f9acc6135 test: add Google Sheets tool unit tests (metadata, read, batch read) 2026-03-03 09:49:23 -08:00
Timothy 7e8699cb4b chore: add Google Sheets credential spec (api_key) 2026-03-03 09:49:22 -08:00
Timothy fd4fc657d6 feat: add Google Sheets integration - read spreadsheet data via API v4
3 tools: sheets_get_spreadsheet, sheets_read_range, sheets_batch_read.
Uses API key auth for read-only access to public spreadsheets.
2026-03-03 09:49:21 -08:00
Timothy 34403648b9 test: add GitLab tool unit tests (projects, issues, MRs) 2026-03-03 09:49:15 -08:00
Timothy 3795d50eb9 chore: add GitLab credential spec (personal access token) 2026-03-03 09:49:14 -08:00
Timothy 80515dde5a feat: add GitLab integration - projects, issues, merge requests
6 tools: gitlab_list_projects, gitlab_get_project, gitlab_list_issues,
gitlab_get_issue, gitlab_create_issue, gitlab_list_merge_requests.
Supports GitLab.com and self-hosted via configurable base URL.
2026-03-03 09:49:13 -08:00
Timothy b59094d35f fix: queen should not return on empty stream 2026-03-03 09:44:15 -08:00
Timothy efcd296d83 chore: register Notion and Jira tools in tool/credential registries 2026-03-03 09:43:32 -08:00
Timothy 802cb292b0 test: add Jira tool unit tests (issues, projects, comments) 2026-03-03 09:43:32 -08:00
Timothy 8e55f74d73 chore: add Jira credential specs (domain, email, api_token) 2026-03-03 09:43:31 -08:00
Timothy 3d810485a0 feat: add Jira integration - issues, projects, comments via REST API v3
6 tools: jira_search_issues, jira_get_issue, jira_create_issue,
jira_list_projects, jira_get_project, jira_add_comment. Uses Basic auth
with email + API token and Atlassian Document Format for text fields.
2026-03-03 09:43:30 -08:00
Timothy 94cfd48661 test: add Notion tool unit tests (search, pages, databases) 2026-03-03 09:43:16 -08:00
Timothy 87c8e741f3 chore: add Notion credential spec (api_token) 2026-03-03 09:43:15 -08:00
Timothy d0e92ed18d feat: add Notion integration - pages, databases, and search
5 tools: notion_search, notion_get_page, notion_create_page,
notion_query_database, notion_get_database. Uses Bearer auth
with Notion internal integration token.
2026-03-03 09:43:14 -08:00
Richard Tang 88640f9222 feat: email reply sample agent 2026-03-03 09:41:20 -08:00
Timothy 1927045519 chore: register Greenhouse and YouTube Transcript in tool/credential registries 2026-03-03 09:36:47 -08:00
Timothy 68cffb86c9 test: add YouTube Transcript tool unit tests (get, list transcripts) 2026-03-03 09:36:47 -08:00
Timothy 5bec989647 feat: add YouTube Transcript integration - captions and transcript retrieval
2 tools: youtube_get_transcript, youtube_list_transcripts.
Uses youtube-transcript-api library, no API key required.
2026-03-03 09:36:46 -08:00
Timothy 66f5d2f36c test: add Greenhouse tool unit tests (jobs, candidates, applications) 2026-03-03 09:36:40 -08:00
Timothy 941f815254 chore: add Greenhouse credential spec (api_token) 2026-03-03 09:36:39 -08:00
Timothy 42afd10518 feat: add Greenhouse integration - ATS jobs, candidates, applications
6 tools: greenhouse_list_jobs, greenhouse_get_job, greenhouse_list_candidates,
greenhouse_get_candidate, greenhouse_list_applications, greenhouse_get_application.
Uses Harvest API v1 with Basic auth (API token).
2026-03-03 09:36:38 -08:00
Timothy 3efa285a59 chore: register Cloudinary and Reddit tools in tool/credential registries 2026-03-03 09:31:22 -08:00
Timothy 4f2b4172b4 test: add Reddit tool unit tests (search, posts, comments, user) 2026-03-03 09:31:18 -08:00
Timothy 0d7de71b94 chore: add Reddit credential specs (client_id, client_secret) 2026-03-03 09:31:17 -08:00
Timothy f0f5b4bede feat: add Reddit integration - search, posts, comments, user info
4 tools: reddit_search, reddit_get_posts, reddit_get_comments, reddit_get_user.
Uses OAuth2 client_credentials flow for app-only access.
2026-03-03 09:31:17 -08:00
Timothy bfd27e97d3 test: add Cloudinary tool unit tests (upload, list, get, delete, search) 2026-03-03 09:31:10 -08:00
Timothy f2def27390 chore: add Cloudinary credential specs (cloud_name, api_key, api_secret) 2026-03-03 09:31:10 -08:00
Timothy b3f7bd6cc0 feat: add Cloudinary integration - upload, manage, search media assets
5 tools: cloudinary_upload, cloudinary_list_resources, cloudinary_get_resource,
cloudinary_delete_resource, cloudinary_search. Uses Basic auth with
API key/secret and supports image, video, and raw resource types.
2026-03-03 09:31:09 -08:00
Timothy 0e8e78dc5b chore: register Trello and Confluence tools in tool/credential registries 2026-03-03 09:22:03 -08:00
Timothy b259d85776 test: add Confluence tool tests (9 tests) 2026-03-03 09:22:02 -08:00
Timothy 175d9c3b7c feat: add Confluence credential spec with Basic auth (email + API token) 2026-03-03 09:21:55 -08:00
Timothy a2a810aabf feat: add Confluence integration - spaces, pages, content search via CQL 2026-03-03 09:21:54 -08:00
Timothy 175c7cfd51 test: add Trello tool tests (12 tests) 2026-03-03 09:21:47 -08:00
Timothy 5ada973d38 feat: add Trello credential spec with API key and token auth 2026-03-03 09:21:39 -08:00
Timothy 0103276136 feat: add Trello integration - boards, lists, cards management 2026-03-03 09:21:37 -08:00
Timothy 1d9e8ec138 chore: register HuggingFace tool in tool/credential registries 2026-03-03 09:11:59 -08:00
Timothy 83ac2e71bb test: add HuggingFace tool tests (10 tests) 2026-03-03 09:11:56 -08:00
Timothy 0b35a729a7 feat: add HuggingFace credential spec with token auth 2026-03-03 09:11:55 -08:00
Timothy 56723a519a feat: add HuggingFace Hub integration - models, datasets, spaces search 2026-03-03 09:11:49 -08:00
Timothy ebff394c76 chore: register Plaid tool in tool/credential registries 2026-03-03 09:08:44 -08:00
Timothy ceecc97bc8 test: add Plaid tool tests (13 tests) 2026-03-03 09:08:40 -08:00
Timothy 313154f880 feat: add Plaid credential spec with client_id and secret auth 2026-03-03 09:08:38 -08:00
Timothy 3eb6417cdc feat: add Plaid integration - accounts, balances, transactions, institutions 2026-03-03 09:08:29 -08:00
Timothy 1b35d6ca0a chore: register Pinecone tool in tool/credential registries 2026-03-03 09:05:20 -08:00
Timothy 1d89f0ba9d test: add Pinecone tool tests (18 tests) 2026-03-03 09:05:16 -08:00
Timothy 864df0e21a feat: add Pinecone credential spec with API key auth 2026-03-03 09:05:14 -08:00
Timothy 3f626decc4 feat: add Pinecone vector database integration - indexes, vectors, queries 2026-03-03 09:05:06 -08:00
Timothy bf1760b1a9 chore: register DuckDuckGo tool in tool registry 2026-03-03 08:56:06 -08:00
Timothy 8a58ea6344 test: add DuckDuckGo tool tests (6 tests) 2026-03-03 08:56:06 -08:00
Timothy 662ff4c35f feat: add DuckDuckGo search integration - web search, news, images 2026-03-03 08:56:01 -08:00
Timothy af02352b49 chore: register Linear tool in tool/credential registries 2026-03-03 08:43:41 -08:00
Timothy db9f987d46 test: add Linear tool tests (10 tests) 2026-03-03 08:43:41 -08:00
Timothy 8490ce1389 feat: add Linear credential spec with API key auth 2026-03-03 08:43:41 -08:00
Timothy 55ea9a56a4 feat: add Linear integration - issues, projects, teams, search via GraphQL 2026-03-03 08:43:41 -08:00
Timothy bd2381b10d chore: register Asana tool in tool/credential registries 2026-03-03 08:40:02 -08:00
Timothy 443de755bd test: add Asana tool tests (12 tests) 2026-03-03 08:40:02 -08:00
Timothy 55ec5f14ee feat: add Asana credential spec with PAT auth 2026-03-03 08:40:02 -08:00
Timothy 2e019302c9 feat: add Asana integration - tasks, projects, workspaces, search 2026-03-03 08:40:02 -08:00
Timothy b1e829644b chore: register Yahoo Finance tool in tool registry 2026-03-03 08:36:20 -08:00
Timothy 18f773e91b test: add Yahoo Finance tool tests (8 tests) 2026-03-03 08:36:19 -08:00
Timothy 987cfee930 feat: add Yahoo Finance integration - quotes, history, financials, company info 2026-03-03 08:36:19 -08:00
Timothy 57f6b8498a chore: register Google Search Console tool in tool/credential registries 2026-03-03 08:34:30 -08:00
Timothy 9f0d35977c test: add Google Search Console tool tests (10 tests) 2026-03-03 08:34:30 -08:00
Timothy e5910bbf2f feat: add Google Search Console credential spec with OAuth2 auth 2026-03-03 08:34:30 -08:00
Timothy 0015bf7b38 feat: add Google Search Console integration - analytics, sitemaps, URL inspection 2026-03-03 08:34:30 -08:00
Timothy a6b9234abb chore: register Zoho CRM tool in tool/credential registries 2026-03-03 08:32:13 -08:00
Timothy 086f3942b8 test: add Zoho CRM tool tests (12 tests) 2026-03-03 08:32:13 -08:00
Timothy 924f4abede feat: add Zoho CRM credential spec with OAuth token auth 2026-03-03 08:32:13 -08:00
Timothy 02be91cb08 feat: add Zoho CRM integration - leads, contacts, deals, accounts, notes 2026-03-03 08:32:13 -08:00
Timothy c2298393ab chore: register Apify tool in tool/credential registries 2026-03-03 08:29:33 -08:00
Timothy 4b8c63bf6e test: add Apify tool tests (11 tests) 2026-03-03 08:29:33 -08:00
Timothy e089c3b72c feat: add Apify credential spec with API token auth 2026-03-03 08:29:33 -08:00
Timothy a93983b5db feat: add Apify integration - actors, runs, datasets, key-value stores 2026-03-03 08:29:27 -08:00
Timothy 20f6329004 chore: register Attio tool in tool/credential registries 2026-03-03 08:25:12 -08:00
Timothy 3c2cf71c47 test: add Attio tool tests (14 tests) 2026-03-03 08:25:08 -08:00
Timothy 56288c3137 feat: add Attio credential spec with API key auth 2026-03-03 08:25:04 -08:00
Timothy 79188921a5 feat: add Attio CRM integration - records, lists, notes, tasks 2026-03-03 08:24:58 -08:00
RichardTang-Aden 65962ddf58 Merge pull request #5709 from aden-hive/load-new-session-from-home
Fix new session creation when submitting prompt from home page
2026-03-03 08:20:20 -08:00
Timothy 5ab66008ae chore: register Pipedrive tool in tool/credential registries 2026-03-03 08:18:45 -08:00
Timothy f38c9ee049 test: add Pipedrive tool tests (16 tests) 2026-03-03 08:18:41 -08:00
Timothy 86f5e71ec2 feat: add Pipedrive credential spec with API token auth 2026-03-03 08:18:29 -08:00
Timothy 1e15cc8495 feat: add Pipedrive CRM integration - deals, contacts, orgs, activities, pipelines 2026-03-03 08:18:24 -08:00
Richard Tang bba44430c4 chore: ignore local dev skills 2026-03-03 08:17:32 -08:00
Timothy 077d82ad82 chore: register Docker Hub tool in tool/credential registries 2026-03-03 08:14:27 -08:00
Timothy e4cf7f3da2 test: add Docker Hub tool tests (9 tests) 2026-03-03 08:14:24 -08:00
Timothy e3bdc9e8d7 feat: add Docker Hub credential spec with PAT auth 2026-03-03 08:14:20 -08:00
Timothy f1c1c9aab3 feat: add Docker Hub integration - search, repos, tags, image details 2026-03-03 08:14:15 -08:00
Timothy 97cbcf7658 fix: adapt path guarantee 2026-03-03 08:11:49 -08:00
Richard Tang 69c71d77fb fix: load-new-session from home 2026-03-03 08:09:22 -08:00
Timothy 4860739a2f chore: register Vercel in tool/credential registries (#5044) 2026-03-03 08:08:16 -08:00
Timothy 791ee40cd6 test: add Vercel tool unit tests (#5044) 2026-03-03 08:08:12 -08:00
Timothy e0191ac52b feat: add Vercel credential spec (#5044) 2026-03-03 08:08:07 -08:00
Timothy e0724df196 feat: add Vercel tool - deployments, projects, domains, env vars (#5044) 2026-03-03 08:08:00 -08:00
Timothy 2a56294638 chore: register Databricks in tool/credential registries (#5167) 2026-03-03 08:05:25 -08:00
Timothy d5cd557013 test: add Databricks tool unit tests (#5167) 2026-03-03 08:05:21 -08:00
Timothy 2a43f23a3d feat: add Databricks credential spec (#5167) 2026-03-03 08:05:03 -08:00
Timothy 69af8f569a feat: add Databricks tool - SQL, jobs, clusters, workspace (#5167) 2026-03-03 08:04:34 -08:00
bryan dcc11c9ea3 chore: move test deps to testing extra and dev group 2026-03-03 08:03:02 -08:00
Timothy 4b4abb47b0 Merge branch 'feature/queen-worker-comm' into fix/queen-recovery 2026-03-03 08:02:59 -08:00
Timothy 0e86dbcc9b chore: register Redis tool in tool/credential registries (#5370) 2026-03-03 08:01:43 -08:00
Timothy 92c75aa6f5 test: add Redis tool unit tests (#5370) 2026-03-03 08:01:37 -08:00
Timothy be41d848e5 feat: add Redis credential spec (#5370) 2026-03-03 08:01:32 -08:00
Timothy f7c299f6f0 feat: add Redis tool implementation - KV, hash, list, pub/sub (#5370) 2026-03-03 08:01:25 -08:00
Timothy b6a0f65a09 feat: add Pushover push notification integration (#5415)
4 tools: pushover_send, pushover_validate_user, pushover_list_sounds,
pushover_check_receipt. Supports priority levels, HTML, sounds, TTL.
All 12 unit tests and 13 conformance tests passing.
2026-03-03 07:58:29 -08:00
Timothy 1e7b0068ed chore: register Supabase tool in tool/credential registries 2026-03-03 07:54:34 -08:00
bryan 207d2fb911 feat: wire QuestionWidget into ChatPanel and workspace 2026-03-03 07:54:32 -08:00
Timothy de5105f313 feat: add Supabase integration - DB, Auth, Edge Functions (#5489)
7 tools: supabase_select, supabase_insert, supabase_update, supabase_delete,
supabase_auth_signup, supabase_auth_signin, supabase_edge_invoke.
All 19 unit tests and 13 conformance tests passing.
2026-03-03 07:54:27 -08:00
bryan c65a99c87d feat: add QuestionWidget component 2026-03-03 07:54:21 -08:00
bryan b4d7e57250 feat: update queen prompt for structured ask_user 2026-03-03 07:53:35 -08:00
bryan 63845a07aa feat: add queen-context endpoint and SSE replay 2026-03-03 07:53:22 -08:00
bryan 68ac73aa55 feat: add options support to ask_user tool 2026-03-03 07:53:05 -08:00
Timothy 6d32f1bb36 chore: register YouTube and Microsoft Graph tools in tool/credential registries 2026-03-03 07:51:33 -08:00
Timothy 9c316cee28 feat: add Microsoft Graph integration - Outlook, Teams, OneDrive (#5601)
11 tools: outlook_list_messages, outlook_get_message, outlook_send_mail,
teams_list_teams, teams_list_channels, teams_send_channel_message,
teams_get_channel_messages, onedrive_search_files, onedrive_list_files,
onedrive_download_file, onedrive_upload_file.
All 15 unit tests and 13 conformance tests passing.
2026-03-03 07:47:49 -08:00
Timothy 6af4f2d6e6 feat: add YouTube Data API integration (#5603)
8 tools: search_videos, get_video_details, get_channel, list_channel_videos,
get_playlist, search_channels, get_video_comments, get_video_categories.
All 17 unit tests and 13 conformance tests passing.
2026-03-03 07:47:34 -08:00
Timothy bc9a43d5a9 fix: execution recovery 2026-03-03 07:43:05 -08:00
Amdev-5 57651900f1 Merge remote-tracking branch 'origin/main' into lusha 2026-03-03 18:46:12 +05:30
Amdev-5 46b0617018 Merge remote-tracking branch 'origin/main' into lusha
# Conflicts:
#	tools/src/aden_tools/credentials/health_check.py
#	tools/src/aden_tools/tools/__init__.py
#	tools/tests/test_health_checks.py
2026-03-03 18:34:54 +05:30
RichardTang-Aden 7b98a6613a Merge pull request #5656 from aden-hive/feature/queen-worker-comm
Release / Create Release (push) Waiting to run
Feature/queen worker comm
2026-03-02 22:50:13 -08:00
Richard Tang 26481e27a6 fix: fix tests and lint 2026-03-02 22:46:38 -08:00
Aaryann Chandola 87a26db779 Merge branch 'aden-hive:main' into fix/guardian-self-trigger-loop 2026-03-03 11:56:15 +05:30
Richard Tang bb227b3d73 chore: ruff lint 2026-03-02 21:30:07 -08:00
Richard Tang 8a0cf5e0ae Merge remote-tracking branch 'origin/feature/queen-worker-comm' into feature/queen-worker-comm 2026-03-02 21:27:22 -08:00
Timothy 69218d5699 chore: lint codes 2026-03-02 20:16:34 -08:00
Timothy 7d1433af21 fix: queen agent flakiness 2026-03-02 19:57:18 -08:00
Richard Tang 0bfbf1e9c5 fix: unused /hive-credentials prompts in the validation 2026-03-02 19:53:57 -08:00
Richard Tang 1ca4f5b22b refactor: update the preload_validation logics 2026-03-02 19:46:50 -08:00
Richard Tang 0984e4c1e8 feat: add gcu subagent validation and refactor the prestart validation steps 2026-03-02 18:35:25 -08:00
P Gokul Sree Chandra 7d9bd2e86b feat(tools): add YouTube Data API integration
- Implement 6 YouTube API tools (search videos, get video/channel details, list channel videos, get playlist items, search channels)
- Add YOUTUBE_API_KEY credential spec with help_url and description
- Register YouTube tool in tools/__init__.py
- Add comprehensive test coverage (18 tests) with mocking
- Add detailed README with setup instructions and examples
- Use httpx for HTTP requests to YouTube Data API v3
- Verified with real API integration testing

Implements #5603
2026-03-03 07:35:04 +05:30
Sarthak Karode 4cbf5a7434 feat(core): add pytest framework testing integration with helpful error messages (#5485) 2026-03-03 10:01:33 +08:00
Hundao b33178c5be fix(graph): move auto-block grace period check before _await_user_input (#5672)
The grace period logic for client-facing auto-blocks was placed after
_await_user_input(), which blocks forever since no inject_event is
scheduled for text-only turns. This caused test_text_after_user_input
_goes_to_judge to hang indefinitely, blocking CI framework tests.

Move the grace period check before the blocking call so that within
the grace window, auto-blocks with missing outputs skip blocking
entirely and continue to the next LLM turn for judge RETRY pressure.

Also adds an _auto_missing check: nodes with no missing outputs
(e.g. queen monitoring with output_keys=[]) should still block
as their text-only output is legitimate conversation.

Fixes #5633
2026-03-03 09:39:14 +08:00
Richard Tang dc6a336c60 fix: removed the unused build_capability_summary 2026-03-02 16:26:47 -08:00
Antiarin 20ef5cb14f test(runtime): add async test for canceling multiple tasks across streams 2026-03-03 05:54:42 +05:30
Antiarin 2c3ec7e74c fix(tui): fix pause/stop to cancel all running tasks across all graphs 2026-03-03 05:30:20 +05:30
Richard Tang b855336448 chore: ruff format issue 2026-03-02 15:47:30 -08:00
Richard Tang de021977fd Merge remote-tracking branch 'origin/main' into feature/queen-worker-comm 2026-03-02 15:39:15 -08:00
Timothy cd2b3fcd16 Merge branch 'feature/new-inbox-management-agent' into feature/queen-worker-comm 2026-03-02 14:46:14 -08:00
Timothy b64024ede5 fix: gcu error log throwing 2026-03-02 14:45:57 -08:00
bryan a280d23113 fix: removing escalate to coder from worker tools 2026-03-02 12:02:35 -08:00
Timothy 41785abdba fix: rephrasing 2026-03-02 11:54:22 -08:00
Timothy de494c7e55 Merge branch 'feature/queen-worker-comm' into feature/new-inbox-management-agent 2026-03-02 11:44:08 -08:00
Timothy 5fa0903ea8 fix: teach email agent to search emails 2026-03-02 11:43:40 -08:00
Timothy 7bd99fe074 fix: email inbox management agent 2026-03-02 11:01:21 -08:00
bryan c838e1ca6d feat: agent building animation 2026-03-02 10:54:57 -08:00
bryan f475923353 feat: subagents populate node panel 2026-03-02 09:59:24 -08:00
Timothy 43f43c92e3 Merge branch 'feature/queen-worker-comm' into feature/new-inbox-management-agent 2026-03-02 09:40:55 -08:00
Timothy 5463134322 fix: inbox management template v2 2026-03-02 09:40:36 -08:00
Timothy 3fbb392103 fix: add credentials to queen lifecycle tools 2026-03-02 09:39:38 -08:00
RichardTang-Aden a162da17e1 Merge pull request #5639 from RichardTang-Aden/main
feat: support Gemini 3.1 pro
2026-03-02 09:24:27 -08:00
Richard Tang b565134d57 chore: fix the ruff lint 2026-03-02 09:23:02 -08:00
Richard Tang 3aafc89912 feat: support Gemini 3.1 pro 2026-03-02 09:20:48 -08:00
bryan 93449f92fe fix: clear build cache in quickstart 2026-03-02 09:00:48 -08:00
Bryan @ Aden d766e68d42 Merge pull request #5494 from Antiarin/security/harden-validate-agent-path
[Bug][Security]: agent_path accepts arbitrary filesystem paths with no validation
2026-03-02 16:57:51 +00:00
Hundao 1d8b1f9774 fix: enforce 0600 permissions on OAuth token files (#5631)
* fix: enforce 0600 permissions on OAuth token files

Credential files were written with default umask permissions.
Use os.open with explicit 0o600 mode to ensure token files
are always owner-read/write only, regardless of umask.

Fixes #5530

* style: fix line too long in checkpoint_store.py
2026-03-02 18:30:40 +08:00
Rajneesh Chaudhary 5ea9abae83 fix(core): prevent sse critical event queue from blocking event bus (#5533) (#5536)
Disconnects slow clients instead of blocking the publisher task.

Signed-off-by: Rajneesh180 <rajneeshrehsaan48@gmail.com>
2026-03-02 17:57:52 +08:00
ArshpreetSingh04 15957499c5 docs(core): fix outdated goal-agent path reference in README (#5629)
Update the MCP client configuration example in core/README.md to replace the outdated `goal-agent` path with the correct `hive/core` path.

Fixes #5628
2026-03-02 17:07:25 +08:00
Timothy 0b50d9e874 fix: block idle event 2026-03-01 21:01:59 -08:00
Amdev-5 cce073dbdb fix(lusha): add pagination and empty filter validation
- Expose page parameter on search_people and search_companies
  (client + MCP tool) enabling access beyond the first 50 results
- Add guard requiring at least one filter on both search endpoints
  to prevent broad requests that burn API credits
- Add unit tests for pagination and empty filter validation
2026-03-02 10:20:08 +05:30
Timothy a1e54922bd fix: timer count down update 2026-03-01 20:22:46 -08:00
Timothy 63c0ca34ea Merge branch 'feature/agent-runtime-idling' into feature/queen-worker-comm 2026-03-01 20:14:46 -08:00
Timothy 135477e516 feat: agent idling detection 2026-03-01 20:14:35 -08:00
Timothy 8cac49cd91 feat: frontend display of scheduler count down 2026-03-01 20:13:21 -08:00
Timothy 28dce63682 fix: conversation ordering 2026-03-01 18:56:41 -08:00
Timothy 313ac952e0 Merge branch 'feature/tool-pill-v2' into feature/queen-worker-comm 2026-03-01 18:33:54 -08:00
Timothy 0633d5130b fix: command line refresh frontend build 2026-03-01 18:33:43 -08:00
Timothy 995e487b49 Merge branch 'feature/tool-pill-v2' into feature/queen-worker-comm 2026-03-01 18:26:49 -08:00
Timothy 64b58b57e0 fix: remove reddish color 2026-03-01 18:26:27 -08:00
Timothy c6465908df feat: colorful tool pills 2026-03-01 18:11:57 -08:00
Timothy ca96bcc09f fix: add pending question content to worker status 2026-03-01 18:11:15 -08:00
Timothy 65ee628fae fix: tool pill turn id 2026-03-01 17:58:31 -08:00
Timothy 02043614e5 feat: consolidate worker status report, fix conversation order 2026-03-01 17:56:27 -08:00
Timothy 212b9bf9d4 fix: load agent 2026-03-01 16:26:55 -08:00
Timothy 6070c30a88 Merge branch 'feat/open-hive' into feature/queen-worker-comm 2026-03-01 16:06:43 -08:00
Timothy 8a653e51bc feat: separate worker and queen input 2026-03-01 15:50:28 -08:00
Vasu Bansal 6a92588264 fix(plaid): update v0.6 credential compatibility and stabilize tests 2026-03-01 01:16:16 +05:30
Vasu Bansal 276aad6f0d feat: add Plaid banking integration
- Implement Plaid connector for account balances
- Add transaction history retrieval
- Include GL reconciliation functionality
- Add institution metadata lookup
- Include comprehensive tests and documentation

Closes #4016
2026-03-01 01:16:16 +05:30
Vasu Bansal 10620bda4f fix(sap): update credential-store compatibility and test imports 2026-03-01 01:07:00 +05:30
Vasu Bansal c214401a00 feat(integration): add SAP S/4HANA connector
Add complete SAP S/4HANA integration with:
- Connector for OData API access
- Credential management following Hive patterns
- Unit tests with mocked responses
- Documentation and usage examples

Refs #3182
2026-03-01 01:07:00 +05:30
Vasu Bansal 260ac33324 fix(s3): support v0.6 credential refs and register S3 tools 2026-03-01 00:56:22 +05:30
Vasu Bansal d4cd643860 feat: add AWS S3 integration for cloud object storage
- Add S3Storage class with upload, download, list, delete operations
- Support IAM roles, environment variables, and credential store
- Implement retry logic with adaptive backoff
- Add MCP tools: s3_upload, s3_download, s3_list, s3_delete, s3_check_credentials
- Include comprehensive tests with moto mocking
- Add documentation for setup and IAM permissions

Closes #3012
2026-03-01 00:54:57 +05:30
IamSayeed dc16cfda21 Merge branch 'main' into feature/add-asana-integration 2026-02-28 11:28:43 +05:30
RichardTang-Aden d562670425 Merge pull request #5501 from aden-hive/feat/open-hive
Feat: v6 windows compatibility support
2026-02-27 19:58:48 -08:00
Timothy Zhang 677bee6fe5 Merge branch 'feat/open-hive' of https://github.com/adenhq/hive into feat/open-hive 2026-02-27 19:55:54 -08:00
Timothy Zhang de27bfe76f fix: windows competibility 2026-02-27 19:55:48 -08:00
Timothy 1c1dcb9c33 chore: new architecture 2026-02-27 19:55:05 -08:00
RichardTang-Aden 4ba950f155 Merge pull request #5499 from aden-hive/feat/open-hive
Release / Create Release (push) Waiting to run
feat: tool call revamp, Intercom & GA integrations, credential improvements
2026-02-27 19:41:11 -08:00
bryan 9c3a11d7bb chore: remove load agent 2026-02-27 19:14:35 -08:00
Richard Tang b7d357aea2 Merge remote-tracking branch 'upstream/feat/open-hive' into feat/sub-agent-framework 2026-02-27 19:07:45 -08:00
bryan b2fed68346 chore: fix linter 2026-02-27 18:57:52 -08:00
bryan 0e996928be fix: load credentials check into new agent session 2026-02-27 18:50:03 -08:00
Timothy 6ff4ec3643 Merge branch 'feature/tool-call-revamp' into feat/open-hive 2026-02-27 18:45:35 -08:00
Timothy a0eda3e492 fix: event loop iterations 2026-02-27 18:41:13 -08:00
bryan 099f9514ef Merge branch 'main' into feat/open-hive 2026-02-27 18:10:42 -08:00
Timothy b2096e4a55 Merge branch 'feat/open-hive' into feature/tool-call-revamp 2026-02-27 18:10:32 -08:00
Timothy 1bf2164745 fix: spamming session update 2026-02-27 18:10:09 -08:00
bryan 48205bbde7 fix: dismiss credential banner 2026-02-27 18:09:51 -08:00
Bryan @ Aden 296aab6ecb Merge pull request #5171 from Ttian18/feat/tina/intercom-tool-4256
feat(tools): add Intercom tool integration (#4256)
2026-02-28 02:01:57 +00:00
Richard Tang 14182c45fc refactor: reorganized file tools 2026-02-27 17:52:21 -08:00
Richard Tang 2fa8f4283c Merge remote-tracking branch 'upstream/feat/open-hive' into feat/sub-agent-framework 2026-02-27 17:51:43 -08:00
Bryan @ Aden ad3cec2361 Merge pull request #4239 from Ttian18/feat/tina-google-analytics-tool
[Integration]: Google Analytics - Website Traffic & Marketing Performance #3727
2026-02-28 01:50:07 +00:00
bryan eddb628298 fix: remove mock_mode from queen/coder system prompt templates 2026-02-27 17:38:03 -08:00
bryan f63b226d8d fix: pipeline visual update 2026-02-27 17:32:59 -08:00
Timothy cc5bd61d86 feature: new tool calling logic 2026-02-27 17:29:00 -08:00
bryan 8bd14fb16f fix: graph summary for intake 2026-02-27 17:08:34 -08:00
bryan 30b5472e33 fix: center text and open hive 2026-02-27 16:47:20 -08:00
Adam Albarghouthi bc836db0f9 micro-fix: fix incorrect CLI commands and docstring in core docs (#5457)
- Replace non-existent CLI commands (calculate, interactive, analyze)
  with actual commands (run, shell, info) in core/README.md
- Fix test-list argument from <goal_id> to <agent_path> in core/README.md
- Fix misleading docstring on MockProvider.complete_with_tools()

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
Co-authored-by: hundao <alchemy_wimp@hotmail.com>
2026-02-28 08:40:58 +08:00
bryan bd3b0fb8eb chore: windows quickstart update 2026-02-27 16:13:36 -08:00
Adam Albarghouthi 7f28474967 micro-fix: fix wrong credential path and env var in docs (#5458)
* micro-fix: fix wrong credential path and env var in docs

Both docs/configuration.md and docs/environment-setup.md reference a
non-existent ADEN_CREDENTIALS_PATH env var and wrong default path
(~/.aden/credentials). The actual env var is HIVE_CREDENTIAL_KEY and
the default path is ~/.hive/credentials (see storage.py:119,125).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* micro-fix: clarify HIVE_CREDENTIAL_KEY comment wording

Reword comment to avoid implying the env var controls the path.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-28 08:01:16 +08:00
bryan 09460b28bc refactor: move credentials from shell config to ~/.hive 2026-02-27 15:55:08 -08:00
wlkjyy 5d8ba1e49c micro-fix: tests: use unified session_* run IDs in runtime logging tests (#5480)
* tests: use session_* run IDs in runtime logging tests

* refactor: extract _sid() helper for session IDs in runtime logger tests
2026-02-28 07:54:59 +08:00
Richard Tang ccb394675b Merge remote-tracking branch 'upstream/feat/open-hive' into feat/sub-agent-framework 2026-02-27 14:48:47 -08:00
Richard Tang 931487a7d4 feat: clean the options for browser open tools that should not be used by LLM 2026-02-27 14:48:31 -08:00
bryan 3654c57f66 Merge branch 'main' into feat/open-hive 2026-02-27 14:48:10 -08:00
Richard Tang fb28280ced feat: human-friendly LLM and tool calls logs 2026-02-27 14:45:12 -08:00
bryan 6215441b58 fix: SSE reconnect on session change, tool pill per-call tracking, cancel/pause event emission 2026-02-27 14:37:54 -08:00
Richard Tang 52f16d5bb6 Merge remote-tracking branch 'upstream/feat/open-hive' into feat/sub-agent-framework 2026-02-27 13:49:14 -08:00
Antiarin e5b6c8581a feat: implement agent path validation and restrict loading to allowed directories 2026-02-28 02:56:31 +05:30
bryan 5dcca99913 fix: credential modal updates 2026-02-26 20:54:11 -08:00
Zhang 890b906f15 fix(tools): address review feedback on Google Analytics tool
- Use Credentials.from_service_account_file() instead of mutating os.environ
- Remove unused dimensions param from _format_report_response
- Remove unused metrics param from _format_realtime_response
- Extract duplicated property_id/limit validation into _validate_inputs helper
- Add credential_group="google_cloud" to GA and BigQuery specs
- Update tests to mock Credentials class
2026-02-26 20:46:20 -08:00
Timothy @aden 6a8286d4cf Merge pull request #5462 from aden-hive/feat/open-hive
Release / Create Release (push) Waiting to run
Feat/open hive
2026-02-26 20:41:58 -08:00
Timothy 680024f790 fix: cancel worker logic 2026-02-26 20:35:17 -08:00
Timothy 6f7bfb92a8 fix: patch the errorneous skip judge logic 2026-02-26 20:31:45 -08:00
Zhang 335a9603e8 feat(tools): add Google Analytics 4 integration (#3727)
Add read-only GA4 Data API v1 tools: ga_run_report, ga_get_realtime,
ga_get_top_pages, and ga_get_traffic_sources. Includes credential spec,
unit tests, and README.
2026-02-26 20:22:12 -08:00
Zhang 5e8a6202e7 fix(credentials): add Intercom health checker to registry (#4256)
Add IntercomHealthChecker (subclass of OAuthBearerHealthChecker) and
register it in HEALTH_CHECKERS so the credential registry completeness
test passes in CI.
2026-02-26 20:01:43 -08:00
Zhang 55a4cdefd7 fix(tools): pass assignee_type through to Intercom API and add README (#4256)
- Pass assignee_type from intercom_assign_conversation tool function
  through to _IntercomClient.assign_conversation() and into the API payload
- Add tests for assignee_type="team" passthrough at client and tool levels
- Add tool README with setup, usage examples, and error handling

Addresses PR #5171 review feedback from @bryanadenhq
2026-02-26 19:56:36 -08:00
Richard Tang 2b63135afb Merge remote-tracking branch 'upstream/feat/open-hive' into feat/sub-agent-framework 2026-02-26 19:33:24 -08:00
Timothy 49d8c3572d fix: stalled agent stop tools 2026-02-26 19:09:01 -08:00
bryan 4b40962186 feat: agent loading after change 2026-02-26 19:08:28 -08:00
Richard Tang 779b376c6e Merge remote-tracking branch 'upstream/feat/open-hive' into feat/sub-agent-framework 2026-02-26 19:02:35 -08:00
bryan 4e2a9a247a patch: credentials modal blocking incorrectly 2026-02-26 18:34:51 -08:00
Richard Tang b1f3d6b155 Merge remote-tracking branch 'upstream/feat/open-hive' into feat/sub-agent-framework 2026-02-26 17:59:15 -08:00
Timothy ea28a9d3c3 fix: turn off judge for now 2026-02-26 17:57:49 -08:00
bryan 69a03e463f cancel + queue msg 2026-02-26 17:57:21 -08:00
Richard Tang e7da62e61c Merge remote-tracking branch 'upstream/feat/open-hive' into feat/sub-agent-framework 2026-02-26 17:17:37 -08:00
Richard Tang 7176745e1c feat: GCU enabled in the quickstart menu 2026-02-26 17:15:37 -08:00
Timothy cce0e26f5c Merge branch 'feature/system-prompt-v2-worker-path' into feat/open-hive 2026-02-26 17:13:46 -08:00
Timothy 641af16dfc fix: nuanced preference tweaking 2026-02-26 17:09:47 -08:00
Timothy a335c427ef fix: worker file path fix 2026-02-26 17:05:51 -08:00
bryan 9ea6c959ae feat: mid-session credential management and MCP resync 2026-02-26 17:03:06 -08:00
Richard Tang 20efd523c9 Merge remote-tracking branch 'upstream/feature/llm-turn-logging' into feat/sub-agent-framework 2026-02-26 16:16:37 -08:00
Timothy 8fc7fff496 feature: log llm turn stop reasons 2026-02-26 16:14:51 -08:00
Richard Tang edf51e6996 feat: prompts for GCU 2026-02-26 15:45:03 -08:00
Richard Tang 6b867883ce chore: ruff lint 2026-02-26 15:03:06 -08:00
Richard Tang 35a05f4120 Merge remote-tracking branch 'upstream/feat/open-hive' into feat/sub-agent-framework 2026-02-26 14:59:48 -08:00
Richard Tang e0e78a97ce refactor: re-organize all the broswer tool and make them built-in for the gcu node type 2026-02-26 12:51:10 -08:00
Navya Bijoy ddd30a950d Integration: add Databricks MCP tool integration
Implements the Databricks MCP tool integration for the Hive agent framework
2026-02-26 21:01:59 +05:30
Richard Tang e4e476f463 Merge remote-tracking branch 'origin/feat/open-hive' into fix/codex-and-litellm-improvement 2026-02-26 07:24:16 -08:00
KRYSTALM7 3ca0e63d54 feat(tools): add Pushover push notification integration
Closes #5415
2026-02-26 13:54:34 +00:00
hundao c4c8917ecb fix: skip auto-block when weak models output text instead of calling tools
Client-facing nodes auto-block on text-only turns (wait for user input).
This breaks weak models (Codex) that output text like "Understood" instead
of calling tools after user responds.

Add _cf_expecting_work state: after user input, text-only turns with
missing output keys skip auto-block and go to judge, which pushes the
LLM to call set_output. Tool calls reset the state back to presenting
mode (auto-block on next text-only).

No behavioral change for strong models (they always call tools after
user input, so the new code path is never triggered).
2026-02-26 20:58:33 +08:00
hundao 1524d2ef00 fix: remove implementation hints from judge feedback for weak models
Judge feedback was saying "Use set_output tool to provide them" which
caused Codex to skip all work and call set_output directly. Changed to
"Follow your system prompt instructions to complete the work."
2026-02-26 20:56:14 +08:00
bryan 5032834034 fix: exit quickstart if claude code not configured 2026-02-25 20:38:28 -08:00
Richard Tang 0b83f6ea99 fix(wip): codex tool use bug fixes 2026-02-25 20:09:49 -08:00
bryan 415201f467 remove gpt-nano 2026-02-25 19:58:12 -08:00
bryan 73005a8498 fix: validate credentials before queen-initiated worker start 2026-02-25 19:37:54 -08:00
Timothy 4edb960fbd fix: stupid hard limit 2026-02-25 19:35:41 -08:00
Timothy 42d11ead01 fix: fake goal prompt injection 2026-02-25 19:30:20 -08:00
bryan 5e18f85b10 fix: deferred cred validation + dismissable error banners 2026-02-25 19:14:55 -08:00
Timothy 85b25bf006 fix: missing mcp reference 2026-02-25 19:10:35 -08:00
Timothy c1ba108489 Merge branch 'feature/refactor-system-prompt' into feat/open-hive 2026-02-25 18:46:16 -08:00
Richard Tang 214098aaae fix: remove the run_command tool from the predefined engineering tool set for worker agent 2026-02-25 18:36:00 -08:00
bryan 241a0b7adc fix: clean up stale active sessions on worker load 2026-02-25 18:29:15 -08:00
bryan 9a7b41a4be feat: show schedule info when clicking trigger nodes 2026-02-25 18:12:20 -08:00
Timothy fe918adb16 fix: cosmetics 2026-02-25 18:01:58 -08:00
bryan 746f026654 feat: 3-layer resume prompts + trigger node visualization 2026-02-25 17:49:44 -08:00
Richard Tang 8294cd3dd9 feat: fix codex tool call usage 2026-02-25 17:49:41 -08:00
Timothy 3bbc63b1db feature/refactored-system-prompt-narratives 2026-02-25 17:33:21 -08:00
Richard Tang 337fb6d922 refactor: deprecate the unused llm functions 2026-02-25 17:32:33 -08:00
bryan bda6b18e8a fix: session reconnect + iteration-based message IDs 2026-02-25 16:55:33 -08:00
bryan d256ff929f fix: faster input_requested + execution_id tracking 2026-02-25 15:38:46 -08:00
bryan f71b20cf07 filter out queen judge from action plan 2026-02-25 12:26:03 -08:00
Richard Tang db26b0afd6 feat: pop out the codex OAuth consent page 2026-02-25 11:53:00 -08:00
Timothy 145860f42e fix: consolidate validation endpoints 2026-02-25 09:54:06 -08:00
bryan d9f84648d0 revoke credential ux 2026-02-25 09:46:46 -08:00
Timothy 9fb7e0bae7 fix: load agent graph consistently 2026-02-25 09:02:37 -08:00
Bryan @ Aden b00203702e Merge pull request #5344 from juni2003/docs/fix-readme-org-links
docs(readme): fix broken org links
2026-02-25 16:57:00 +00:00
bryan ead85dd41f fix closing tab, remove 0/0 from credential modal 2026-02-25 08:11:24 -08:00
bryan cf5bf6f174 initial prompt from home page 2026-02-25 07:58:49 -08:00
bryan 46237e7309 kill judge and queen 2026-02-24 20:01:43 -08:00
bryan afa686b47b Merge branch 'main' into feat/open-hive 2026-02-24 19:38:46 -08:00
Timothy 21e02c9e50 Merge branch 'fix/credential-popup' into feat/open-hive 2026-02-24 19:29:21 -08:00
Timothy 30a188d7c8 fix: credential popup 2026-02-24 19:28:53 -08:00
bryan 355f51b25e quickstart update 2026-02-24 19:26:54 -08:00
Timothy 8e1cde86e8 Merge branch 'feat/openhive-cred-fixes' into feat/open-hive 2026-02-24 19:10:30 -08:00
Timothy c13b02c7d9 Merge branch 'fix/credential-loading' into feat/open-hive 2026-02-24 19:09:39 -08:00
bryan 9e72801c28 agent loading 2026-02-24 19:05:12 -08:00
RichardTang-Aden 3a3d538b73 Merge pull request #5367 from RichardTang-Aden/feat/codex-subscription-rebased
Feat/codex subscription rebased
2026-02-24 18:53:39 -08:00
Richard Tang b11bca0c67 chore: lint reformat 2026-02-24 18:53:04 -08:00
Richard Tang faf8975b42 chore: improve script code and solved lint errors 2026-02-24 18:51:37 -08:00
Richard Tang 863168880e fix: unused credential detect path removed 2026-02-24 18:41:36 -08:00
Timothy 384a1f0560 fix: credential loading 2026-02-24 18:40:39 -08:00
bryan 4bd1b1b9e6 credential updated 2026-02-24 18:33:09 -08:00
Richard Tang 8c3866a014 feat: optimized for the LLM selection option 2026-02-24 18:27:03 -08:00
Richard Tang 61283d9bd6 feat: Codex subscription OAuth 2026-02-24 18:24:36 -08:00
Richard Tang 585a7186d4 feat: support openai codex subscription as the LLM provider 2026-02-24 18:24:36 -08:00
Timothy 72a31c2a65 fix: credential validity, update api readme 2026-02-24 18:11:10 -08:00
RichardTang-Aden 10d9e54857 Merge pull request #4576 from mubarakar95/perf/reduce-subprocess-spawning-windows
perf: reduce subprocess spawning in quickstart scripts (#4427)
2026-02-24 17:47:22 -08:00
bryan e68695ee92 merge 2026-02-24 17:43:29 -08:00
RichardTang-Aden 11379fc0ef Merge branch 'main' into perf/reduce-subprocess-spawning-windows 2026-02-24 17:43:25 -08:00
Timothy 6d102382bd fix: session id issues 2026-02-24 17:42:09 -08:00
bryan 56335927e7 change from agentid to session id 2026-02-24 15:53:14 -08:00
Timothy a3fe994b22 fix: remove duplicative queen session starter api 2026-02-24 15:14:02 -08:00
Timothy 5754bdcc78 Merge branch 'feature/session-manager' into feat/open-hive 2026-02-24 15:01:01 -08:00
Timothy eef2fa9ffb feature: session manager, superceding agent manager 2026-02-24 15:00:09 -08:00
bryan 7286907cd4 multiple agent session running 2026-02-24 14:56:24 -08:00
Richard Tang 754e33a1ae feat: browser tools optimization 2026-02-24 14:05:26 -08:00
Timothy 1fbb431f1b Merge branch 'fix/globalize-queen-judge' into feat/open-hive 2026-02-24 13:28:29 -08:00
Timothy 0ad52b90d8 fix: globalize queen and judge agent's storage 2026-02-24 13:27:33 -08:00
bryan c44b12cc8b remove subgraph, persistent tabs, node action plan 2026-02-24 12:42:07 -08:00
Timothy 8381c95617 Merge branch 'fix/session-loading-isolation' into feat/open-hive 2026-02-24 11:18:48 -08:00
Timothy 3963855d1d fix: isolate session loading 2026-02-24 11:02:58 -08:00
Junaid 51154a3070 docs(readme): fix broken org links
Update repository URLs from adenhq/hive to aden-hive/hive to prevent 404s
2026-02-24 23:55:23 +05:00
Richard Tang b11b43bbe1 feat: reorganized the log structure for subagents 2026-02-24 10:41:13 -08:00
bryan 7a7ece1805 markdown support, removed subgraph, stop button 2026-02-24 10:40:24 -08:00
bryan 28a71b70a8 readme for http apis 2026-02-24 09:22:56 -08:00
bryan 33d3a13fde Merge branch 'feature/concurrent-judge-runtime' into feat/open-hive 2026-02-24 09:11:42 -08:00
bryan 5ea278a08d integrated queen, worker, judge 2026-02-24 09:09:28 -08:00
Timothy fd95f8da28 feat: active streams and waiting nodes 2026-02-24 09:03:21 -08:00
Richard Tang 86f4645d1c fix: inherit the tool call overflow margin for subagent 2026-02-24 08:20:08 -08:00
Richard Tang 2d05e96cd5 fix: spillover for subagent 2026-02-24 08:18:52 -08:00
bryan c1d5952ad9 Merge branch 'feature/concurrent-judge-runtime' into feat/open-hive 2026-02-24 08:07:31 -08:00
RichardTang-Aden ebeac68707 Merge pull request #5272 from SANTHAN-KUMAR/main
fix(web_scrape): reorder status checks before wait & replace hardcoded sleep with networkidle
2026-02-24 08:02:41 -08:00
bryan 72673e12fb remove mock data 2026-02-24 08:02:08 -08:00
Timothy 3867d3926b Merge branch 'main' into feature/concurrent-judge-runtime 2026-02-24 07:43:22 -08:00
Timothy 0b2b7a2622 feat: event bus logging 2026-02-24 07:43:05 -08:00
bryan 3951ee1a7d Merge branch 'main' into feat/open-hive 2026-02-24 07:28:42 -08:00
bryan 1afde51c7b additional graph update 2026-02-24 07:28:11 -08:00
bryan cbeef18f0a wip graph 2026-02-24 07:27:48 -08:00
SANTHAN-KUMAR de5fcab933 fix(web_scrape): implement robots.txt support & clean up dead mock
- Add respect_robots_txt parameter (default True) using stdlib
  urllib.robotparser, checked before browser launch to skip
  disallowed URLs early
- Remove dead wait_for_timeout mock from test helper
- Restore respect_robots_txt docs in README (param, error, note)
- Add 2 tests: blocked by robots.txt, disabled robots.txt check
- Fix import ordering (ruff I001)
2026-02-24 15:29:13 +05:30
SANTHAN-KUMAR a7a2100472 Merge branch 'aden-hive:main' into main 2026-02-24 15:24:10 +05:30
Uttkarsh Joshi 1947d8c3ca Fix asyncio.run crash in GraphBuilder and enhance ToolRegistry type inference (fixes: #2680) (#2895)
* Enhance ToolRegistry type inference for function parameters

- Add _infer_schema() helper to handle Union types (Union[T, U] and T | U)
- Support Optional[T] and Union[T, None] with correct optional flag
- Infer generic types: list[T] -> array with items schema, dict[K, V] -> object with additionalProperties
- Detect Pydantic BaseModel parameters and use model_json_schema()
- Correctly mark parameters as required/optional based on type annotations
- Add comprehensive test suite covering all type inference scenarios
- Maintain backward compatibility for unannotated parameters

* Fix asyncio.run crash in GraphBuilder.run_test

* Revert "Enhance ToolRegistry type inference for function parameters"

This reverts commit dacd0fa8b926e01d3f29e7c9b2ff5101b4a52c3b.
2026-02-24 17:06:11 +08:00
austin931114 55c63736ef Merge pull request #5315 from sabasiddique1/fix/roadmap-mermaid-diagram-render
docs: fix Roadmap Mermaid diagram not rendering on GitHub
2026-02-24 10:05:48 +01:00
austin931114 a2b68d893f Merge pull request #5317 from kart1ka/fix/retired-haiku-3.5-model
micro-fix: replace retired claude-3-5-haiku-20241022 with claude-haiku-4-5
2026-02-24 09:50:17 +01:00
Kartik Saini fd06e43d9c Merge branch 'main' into fix/retired-haiku-3.5-model 2026-02-24 11:36:18 +05:30
Kartik Saini b550f6efa0 fix(llm): replace retired claude-3-5-haiku-20241022 with claude-haiku-4-5-20251001 2026-02-24 11:22:40 +05:30
Saba Siddique 47adf88773 docs: fix Roadmap Mermaid diagram for GitHub rendering 2026-02-24 10:36:35 +05:00
Richard Tang 9c44d3b793 feat: add the upgraded file operation tools 2026-02-23 20:25:25 -08:00
Richard Tang 9b89ac694e feat: new snapshot tools 2026-02-23 19:34:42 -08:00
RichardTang-Aden 8748da38cf Merge pull request #5310 from RichardTang-Aden/fix/llm-token-source
Feat: tui workflow improvement and the fix for quickstart  problem for GLM
2026-02-23 19:23:59 -08:00
Timothy f697dc99fb feat: queen primitives 2026-02-23 19:15:55 -08:00
Richard Tang 630d8208cf fix: avoid using headless broswer 2026-02-23 19:09:18 -08:00
bryan ecb038c955 chat now creates multiple chats msgs 2026-02-23 19:07:54 -08:00
Richard Tang 77ff31cec6 feat: add back the quickstart prompt to restart terminal 2026-02-23 18:33:33 -08:00
Richard Tang 9b342dc593 feat: add health check for the browser start 2026-02-23 18:28:59 -08:00
Richard Tang 5ea8677a5d feat: tui get started menu 2026-02-23 18:06:59 -08:00
Richard Tang ad879de6ff feat: clean the browser snapshot tool 2026-02-23 17:56:05 -08:00
Richard Tang 97f5b3423f feat: source the llm token after quickstart 2026-02-23 17:50:53 -08:00
Timothy @aden 4968207eef Merge pull request #5276 from TimothyZhang7/fix/identity-persistence
Fix/identity persistence
2026-02-23 17:47:53 -08:00
Timothy @aden f859e2203a Merge branch 'main' into fix/identity-persistence 2026-02-23 17:45:23 -08:00
Bryan @ Aden fb3dad4354 Merge pull request #5231 from vakrahul/fix/local-llm-keyless-crash
fix(core): support local LLMs (Ollama, vLLM, LM Studio, Llama.cpp) in AgentRunner #3994
2026-02-24 01:42:53 +00:00
Timothy adc82c6a65 fix: lint issue 2026-02-23 17:42:36 -08:00
bryan 96084fea16 wip chat 2026-02-23 17:41:12 -08:00
Timothy @aden 6f52026c84 Merge branch 'main' into fix/identity-persistence 2026-02-23 17:35:44 -08:00
Timothy @aden 3576218ea9 Merge pull request #5270 from aden-hive/feature/local-credential-namespace
feat: local credential testing
2026-02-23 17:32:34 -08:00
vakrahul 4c662db530 fix: add missing accounts_prompt to add_graph in AgentRuntime 2026-02-24 07:01:48 +05:30
Timothy da1ce4e5a7 fix: lint 2026-02-23 17:30:27 -08:00
vakrahul c4944c5662 fix: pass accounts_prompt to ExecutionStream in add_graph and GraphExecutor 2026-02-24 06:31:56 +05:30
Bryan @ Aden d892f87651 Merge pull request #1814 from nafiyad/feat/wikipedia-search-tool
Feat/wikipedia search tool
2026-02-24 00:34:54 +00:00
Nafiyad Adane 447f23d157 style: run ruff check and format on tools/ 2026-02-23 17:17:58 -07:00
Nafiyad Adane aa12f0d295 Merge main into feat/wikipedia-search-tool 2026-02-23 17:12:31 -07:00
Richard Tang 795266aab4 feat: store the subagent logs in the node logs folder 2026-02-23 16:02:39 -08:00
Richard Tang 4e4ef121f9 feat: Progressive feedback in SubagentJudge 2026-02-23 15:48:34 -08:00
Richard Tang ddb9126955 fix: result the bug for calling the snapshot tool too many times 2026-02-23 15:38:04 -08:00
Richard Tang bac6d6dd68 feat: subagent ending judge and communication 2026-02-23 15:25:59 -08:00
bryan de9226aae0 credentials 2026-02-23 14:11:16 -08:00
Timothy 16e1ab1a87 feat: concurrent judge session 2026-02-23 13:56:59 -08:00
Bryan @ Aden 54287e06ad Merge pull request #4519 from Rudra2637/clarify-criterion-evaluation
Clarify supported criterion evaluation and progress semantics
2026-02-23 20:39:48 +00:00
Richard Tang 3451570541 feat: enable subagent to talk back to the parent via tools 2026-02-23 12:31:51 -08:00
Rudra2637 b33de5f0e1 Fix lint and formatting issues 2026-02-24 01:49:45 +05:30
Rudra2637 2d5ef20d4d Restore comment explaining 0.8 threshold 2026-02-24 01:13:52 +05:30
Rudra2637 177346b159 Fix docstring indentation 2026-02-24 01:09:40 +05:30
bryan 08819b1609 Merge branch 'main' into feat/open-hive 2026-02-23 11:13:32 -08:00
Richard Tang e5e939f344 feat: add a basic test tool for the broswer control tools validity 2026-02-23 11:08:08 -08:00
Richard Tang 0d51d25482 feat: highlight interactive actions 2026-02-23 11:03:19 -08:00
Rudra2637 35b1332551 Add type field to SuccessCriterion and restore evaluation guard 2026-02-24 00:29:31 +05:30
Bryan @ Aden 52586a024b Merge pull request #5273 from aden-hive/chore/add-community-cred
(micro-fix): add community credit for competitive intelligence agent
2026-02-23 18:53:45 +00:00
bryan 05a314b121 add community credit for competitive intelligence agent 2026-02-23 10:42:13 -08:00
Richard Tang a0a5b10df0 fix: remove the max subagent logic 2026-02-23 10:35:55 -08:00
Richard Tang 04bac93c14 feat: fix tool bugs and add background tabs option 2026-02-23 10:20:52 -08:00
Bryan @ Aden 8e262e2270 Merge pull request #5179 from nafiyad/feature/competitive-intelligence-agent-4153
Add competitive intelligence agent template
2026-02-23 18:20:02 +00:00
SANTHAN-KUMAR 4961d3ba8c fix(web_scrape): reorder status checks & replace hardcoded wait with networkidle
- Move response validation (null, HTTP status, content-type) before
  the rendering wait so errors return immediately without sleeping
- Replace wait_for_timeout(2000) with wait_for_load_state("networkidle")
  to align code with README (timeout=3000, wrapped in try/except)
- Fix README: remove phantom respect_robots_txt param, fix timeout
  30s→60s, remove false robots.txt claim
- Add 3 tests for early-exit error paths
2026-02-23 23:40:10 +05:30
Timothy 733bb4d2dd fix: get all account info including local apis 2026-02-23 10:09:22 -08:00
vakrahul ba31c760a6 fix: restore accounts_prompt propagation chain to ExecutionStream 2026-02-23 23:31:08 +05:30
Timothy a388bc6837 feat: local credential testing 2026-02-23 09:55:38 -08:00
Timothy 3f5bbbf1e3 feat: implementation of concurrent judge 2026-02-23 09:52:11 -08:00
Emmanuel Nwanguma 002da15375 docs(tools): add README for security tools (#5164)
* docs(tools): add README + comprehensive tests for security tools

READMEs added for 7 security scanning tools:
- port_scanner: TCP connect scans, banner grabbing, risky port detection
- ssl_tls_scanner: TLS version, cipher, certificate analysis
- http_headers_scanner: OWASP security headers validation
- dns_security_scanner: SPF, DMARC, DKIM, DNSSEC, zone transfer
- subdomain_enumerator: Passive CT log subdomain discovery
- tech_stack_detector: Web technology fingerprinting
- risk_scorer: Weighted letter-grade risk scoring

Comprehensive unit tests (92 total):
- Port scanner: constants, port categories, _check_port async tests
- SSL/TLS scanner: weak ciphers, TLS versions, cert parsing helpers
- HTTP headers scanner: security headers, leaky headers validation
- DNS security scanner: SPF/DMARC/DKIM/DNSSEC checks
- Subdomain enumerator: keyword detection, severity levels
- Tech stack detector: cookies, CDN, CMS, framework detection
- Risk scorer: grading logic, category scoring, JSON parsing

Fixes #5094

* revert: remove test changes per review feedback
2026-02-23 21:46:51 +08:00
Shubham Yadav 005609da3a Feat/PostgreSQL (Read-Only MCP) (#4160)
* feat(tools): add read-only PostgreSQL MCP tool

* test(tools): add postgres tool tests

* docs(tools): add postgres tool README

* feat(tools): update PostgreSQL MCP tool with refactored code structure and adding postgres credentials

* feat(tools): implement thread-safe connection pooling for PostgreSQL MCP tool

* fix(postgres): correct psycopg2 dependency and README setup instructions

---------

Co-authored-by: hundao <alchemy_wimp@hotmail.com>
2026-02-23 21:17:10 +08:00
Youssef Mohammed Abdelal Mohammed 182d9ca6f9 feat(tools): add arXiv search and download tools (#5222)
* feat(arxiv): implement search_papers and initial download_paper tools

* feat(arxiv): improve PDF download handling with temp files and validation (WIP)

Switch to NamedTemporaryFile for safer temp file handling

Force export.arxiv.org domain for PDF downloads

Add custom User-Agent header

Validate Content-Type to ensure PDF response

Improve error handling and cleanup logic

Add timeout to requests

Work in progress – download_paper still under refinement.

* feat(arxiv): replace NamedTemporaryFile with module-level TemporaryDirectory

Switch from NamedTemporaryFile(delete=False) to a shared _TEMP_DIR for
the lifetime of the server process. Scopes file lifetime to the session,
guarantees cleanup via atexit, and removes the need for manual file
handle management.

Expand README with full args/returns/error reference and implementation
notes explaining the temp storage design decision.

* test(arxiv): add comprehensive tests for search_papers and download_paper

fix(arxiv): return structured error instead of raising on invalid PDF content type

- Add full test coverage for search_papers (validation, success, id_list, errors)
- Add full test coverage for download_paper (success, network errors, invalid content, cleanup)
- Mock arxiv client and requests to isolate behavior
- Ensure partial files are cleaned up on failure
- Align download_paper behavior with tool contract (no exceptions, structured responses)

* style(tools): apply ruff formatting to arxiv tool and update lockfile
2026-02-23 20:57:04 +08:00
vakrahul a6b43f8016 fix: address PR review feedback (accounts_prompt, tests, and remove markdowns) 2026-02-23 17:38:22 +05:30
vakrahul 31700fa8da fix: address PR review feedback (accounts_prompt, tests, and remove markdown) 2026-02-23 17:38:04 +05:30
Rudra2637 6b475ec1cf Removed invalid type guard and clean up comments 2026-02-23 11:06:10 +05:30
Timothy 1b27844c52 feat: local credential testing 2026-02-22 20:58:42 -08:00
RichardTang-Aden 3a0b91f7ab Merge pull request #5251 from vincentjiang777/main
docs: roadmap updates for architecure v3
2026-02-22 20:48:43 -08:00
RichardTang-Aden 82108e32fa Merge branch 'main' into main 2026-02-22 20:48:10 -08:00
Timothy 28f4fecfb3 feat: handle account identity systematically 2026-02-22 20:45:36 -08:00
Vincent Jiang ff1bb08217 docs: roadmap updates for architecure v3 2026-02-22 20:41:26 -08:00
Nafiyad Adane 10617fee0d chore(templates): export agent.json configuration
Generated the agent.json fallback configuration using the agent-builder MCP server export functionality as requested by the reviewer.
2026-02-22 21:12:08 -07:00
Bryan @ Aden 866103ddf4 Merge pull request #5212 from JamieJiHeonKim/docs/fix-readme-formatting-and-links
Docs/fix readme formatting and links
2026-02-23 03:51:32 +00:00
bryan fcfaca6bd0 Merge branch 'main' into feat/open-hive 2026-02-22 19:50:39 -08:00
bryan 4c7d9ab0fb added click cursor and rename dashboard to workspace 2026-02-22 19:21:37 -08:00
bryan 061aec4b3d my agents configured 2026-02-22 19:04:48 -08:00
Richard Tang 047f4a1a0c Merge branch 'main' into feat/sub-agent-framework 2026-02-22 18:31:47 -08:00
Bryan @ Aden f12ab10725 Merge pull request #4930 from Ttian18/fix/tina/shift-enter-newline-4565
fix(tui): add Ctrl+J as newline fallback in chat input
2026-02-23 02:31:47 +00:00
Bryan @ Aden 0882fa6ce5 Merge pull request #5165 from ishaannk/main
feat: add stop/cancel execution control for agents
2026-02-23 02:24:46 +00:00
Richard Tang 7994b90dfa feat: add the max_sub_agents config and constrain 2026-02-22 18:23:52 -08:00
Richard Tang 04b6a80370 feat: shared agent profile 2026-02-22 18:17:40 -08:00
RichardTang-Aden 0b87e4c45d Merge pull request #5245 from TimothyZhang7/main
Release / Create Release (push) Waiting to run
doc(architecture): update documents
2026-02-22 18:04:51 -08:00
Timothy 9c7e846828 chore: put event loop node zoom inside worker bee graph 2026-02-22 18:03:31 -08:00
bryan 30bd0e483a home page and mock chatroom 2026-02-22 18:03:02 -08:00
Timothy 13cc93c334 chore: architecture 2026-02-22 17:54:12 -08:00
Timothy 564b1bb752 chore: roadmap diagram 2026-02-22 17:45:56 -08:00
bryan 2f31a92d31 Merge branch 'main' into feat/open-hive 2026-02-22 16:06:44 -08:00
ishaannk fd89c7f56f Fix: Add trailing newlines for ruff format compliance 2026-02-23 04:41:38 +05:30
bryan 35738c8279 react structure 2026-02-22 14:52:15 -08:00
vakrahul a0d14b8a25 fix(core): add zero-config local LLM support and fix AgentRunner crash (#3994) and adding docs 2026-02-22 22:59:11 +05:30
Timothy @aden 9c781ed78e Merge pull request #5224 from TimothyZhang7/feature/credential-v2
fix(micro-fix): tui select account
2026-02-21 23:13:18 -08:00
Timothy 460a24e34a fix: tui select account 2026-02-21 23:01:41 -08:00
Shivam Shahi– oss/acc 0f8627f17a format 2026-02-22 00:25:15 +05:30
JamieJiHeonKim 8ae030e16e docs: add link to linting and formatting setup in CONTRIBUTING.md 2026-02-21 13:00:46 -05:00
JamieJiHeonKim 3c6467c814 docs: fix unclosed code block in deep_research_agent README 2026-02-21 12:54:45 -05:00
ishank 2f11f0c911 Merge branch 'aden-hive:main' into main 2026-02-21 17:29:51 +05:30
ishaannk c3ae67fb1d address review comments: rename Stop to Pause and UI toggle change 2026-02-21 17:08:42 +05:30
Timothy @aden 8c750c7edd Merge pull request #5194 from Antiarin/fix/escalate-to-coder-execution-id
fix[bug](graph): add execution_id to base Runtime and restore ctx.execution_id in escalation handler
2026-02-21 03:05:14 -08:00
Antiarin 571838a289 fix(graph): add execution_id to NodeContext for escalate_to_coder 2026-02-21 16:20:04 +05:30
RichardTang-Aden dafaaae792 Merge pull request #5182 from TimothyZhang7/feature/credential-v2
Feature/credential v2
2026-02-20 19:52:00 -08:00
Timothy b45e14efb4 fix: zai api key setup 2026-02-20 19:40:07 -08:00
RichardTang-Aden e70cbf26e2 Merge pull request #5095 from NSkogstad-AUS/docs/update-tools-readme
Docs/update tools readme
2026-02-20 19:08:23 -08:00
RichardTang-Aden daafdc3704 Merge pull request #5103 from alhousseynou-ndiaye/feature/document-agent
docs: add document processing recipe
2026-02-20 19:06:59 -08:00
bryan 6661934fed harden server apis and agent loading 2026-02-20 18:28:52 -08:00
Nafiyad Adane f568728de1 Add competitive intelligence agent template
- Adds a new autonomous agent template that monitors competitor websites, news, and GitHub
- Implements a 7-node graph workflow to collect, aggregate, and analyze competitive data
- Generates a weekly structured HTML digest with key highlights and 30-day trends
- Utilizes existing web_scrape, web_search, and github MCP tools
- Addresses issue #4153

Closes #4153
2026-02-20 19:13:47 -07:00
bryan 263d35bbd6 Merge branch 'main' into feat/open-hive 2026-02-20 18:09:01 -08:00
Bryan @ Aden bece21d217 Merge pull request #5169 from Schlaflied/docs/sync-zh-CN-readme
docs(i18n): sync zh-CN.md with latest README and fix broken links
2026-02-21 02:06:48 +00:00
bryan d4788e147a backend apis for open hive 2026-02-20 18:01:51 -08:00
Timothy f4594ecf37 fix: gmail batch tool schema coercion 2026-02-20 17:53:35 -08:00
Bryan @ Aden 8f1462cb79 Merge pull request #5113 from vakrahul/features/stripe-tools
feat(tools): add Stripe payment processing integration
2026-02-21 01:44:23 +00:00
Timothy 76d4d0de69 feat: credential v2 with provider loading and test agent 2026-02-20 17:43:00 -08:00
vakrahul 6ab4e1d641 fix: address maintainer PR reviews feedback for Stripe 2026-02-21 07:06:20 +05:30
Zhang fc0c3e169f feat(tools): add Intercom tool with conversations, contacts, and tags (#4256) 2026-02-20 17:14:30 -08:00
Zhang 4760f95bda feat(credentials): add Intercom credential spec (#4256)
Register INTERCOM_ACCESS_TOKEN in INTEGRATION_CREDENTIALS for the
8 Intercom tools (search/get conversations, contacts, notes, tags,
assignment, teams). Tool implementation follows in subsequent commits.
2026-02-20 17:13:54 -08:00
vakrahul c5d87c99fd fix: address maintainer PR review feedback for Stripe 2026-02-21 06:30:31 +05:30
Schlaflied f53f403022 docs(i18n): sync zh-CN.md with latest README and fix broken links 2026-02-20 19:29:45 -05:00
Timothy b887b2951e wip: credential v2 2026-02-20 13:55:06 -08:00
ishaannk 842b69b155 feat: add stop/cancel execution control for agents 2026-02-21 02:03:29 +05:30
Nicolas Suescun d6c34106fc docs: fix CLI arguments mismatch for test-debug and test-list (#4113)
* docs: fix CLI usage args for test-debug/test-list to match implementation

* docs: restore 'uv run' prefix to test commands

Reverts unintentional removal of 'uv run' in usage examples as requested in code review.

* chore: changes to .gitignore
2026-02-20 17:58:17 +08:00
Nihal 67cbd31280 fix(graph): harden JSON parsing for async safety and large LLM outputs (#4869)
* perf(json): add json.loads fast path + asyncio.to_thread for extract_json

Addresses maintainer feedback:
- json.loads candidate fast path in find_json_object (300x speedup source)
- asyncio.to_thread wrappers for both _extract_json call sites (unblocks event loop)
- Remove ~480 lines of over-engineered incremental parsing logic

Total: ~16 lines, zero duplication, zero API surface change

* fix: simplify async JSON handling per maintainer feedback and align tests

* fix(test): replace tautology assertion in test_mismatched_then_valid

The original assertion `assert result is not None or result is None`
is always true. Replace with a meaningful type check.

---------

Co-authored-by: hundao <alchemy_wimp@hotmail.com>
2026-02-20 17:23:25 +08:00
Utkarsh Singh cd0cf69099 feat(tools): add Brevo transactional email and SMS integration
- Add brevo_tool with 6 MCP tools: brevo_send_email, brevo_send_sms,
  brevo_create_contact, brevo_get_contact, brevo_update_contact,
  brevo_get_email_stats
- Add CredentialSpec for BREVO_API_KEY in credentials/brevo.py
- Register brevo_tool in tools/__init__.py and credentials/__init__.py
- Add README with setup instructions and usage examples
- Add 34 unit tests covering all tools, validation and error handling

Closes #5127
2026-02-20 13:19:07 +05:30
Timothy @aden cf877f2b49 Merge pull request #5121 from TimothyZhang7/fix/credential-error-types
Fix(micro-fix)/credential error types
2026-02-19 16:23:31 -08:00
Timothy 6f34cb2c8a fix: credential error types 2026-02-19 14:52:29 -08:00
Richard Tang a04a8a866d fix: sub-agents reachability check 2026-02-19 11:33:32 -08:00
Timothy b88aa2b53c Merge branch 'feature/tui-credential-setup' 2026-02-19 11:12:28 -08:00
Timothy 356cab19eb Merge branch 'fix/google-tool-healthcheck' into feature/tui-credential-setup 2026-02-19 11:12:15 -08:00
vakrahul 7c6d5fa446 test_credentials changess 2026-02-19 19:46:43 +05:30
vakrahul 2dae3e47fd test_credentials changes 2026-02-19 19:43:45 +05:30
vakrahul 6fce789607 feat: add Stripe tool integration and testss 2026-02-19 17:14:57 +05:30
vakrahul 9bbb5b38e6 feat: add Stripe tool integration and tests 2026-02-19 16:55:22 +05:30
vakrahul ac73aa93bf feat: add Stripe tool integration and tests 2026-02-19 16:51:08 +05:30
Timothy 52a56e4a10 fix: google tools need healthcheck 2026-02-18 23:07:12 -08:00
alhousseynou-ndiaye a1cede510d docs: add document processing recipe 2026-02-19 07:47:13 +01:00
Timothy @aden 682c10e873 Merge pull request #5099 from TimothyZhang7/main
release(docs): v0.5.1
2026-02-18 22:11:45 -08:00
Timothy 5605e24a0d fix: streaming output leakage 2026-02-18 22:10:02 -08:00
Timothy f7268a44d9 fix: worker credential setup 2026-02-18 21:50:18 -08:00
Timothy af7a4ff4e8 release: v0.5.1
- Bump framework version 0.5.0 → 0.5.1
- Add CHANGELOG.md with full release notes

Highlights: Hive Coder meta-agent, multi-graph runtime, TUI revamp,
subscription model support, 5 new tool integrations, deprecated node
type removal.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-18 21:15:20 -08:00
Timothy 60b9c0d763 release: v0.5.1
- Bump framework version 0.5.0 → 0.5.1
- Add CHANGELOG.md with full release notes

Highlights: Hive Coder meta-agent, multi-graph runtime, TUI revamp,
subscription model support, 5 new tool integrations, deprecated node
type removal.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-18 21:00:41 -08:00
Timothy @aden 5c550270c6 Merge pull request #5071 from TimothyZhang7/feature/queen-bee
Release / Create Release (push) Waiting to run
Feature/queen bee
2026-02-18 20:59:04 -08:00
Timothy e03fd48e48 fix: lint 2026-02-18 20:51:40 -08:00
Timothy 6420c74c24 fix: ci tests 2026-02-18 20:47:07 -08:00
Timothy ad74351530 fix: agent switch return 2026-02-18 20:39:25 -08:00
Timothy @aden 1b5f656429 Merge branch 'main' into feature/queen-bee 2026-02-18 20:34:19 -08:00
Timothy @aden 132d84c529 Merge pull request #5058 from adenhq/fix/deprecation
fix(arch): remove all deprecated concepts and deadcodes
2026-02-18 20:32:24 -08:00
Timothy @aden a03b378e9b Merge branch 'main' into fix/deprecation 2026-02-18 20:29:39 -08:00
Timothy 74635e1d7d feat: subscription model support, tui revamp 2026-02-18 20:28:11 -08:00
Bryan @ Aden 893053ede7 Merge pull request #5098 from adenhq/update/inbox-agent-fixes
(micro-fix): Update/inbox agent fixes
2026-02-19 03:35:25 +00:00
bryan 596ec6fec5 fixed credentials 2026-02-18 19:26:59 -08:00
bryan 5863b83172 Merge branch 'main' into update/inbox-agent-fixes 2026-02-18 19:12:01 -08:00
bryan 20c92b197a fixes to inbox agent 2026-02-18 19:08:55 -08:00
Richard Tang 8c9baa62b0 feat: create default hive profile for browser use 2026-02-18 18:10:37 -08:00
RichardTang-Aden ec9c6b4666 Merge pull request #5097 from RichardTang-Aden/feat/credential-setup-cli
Feat/credential setup cli
2026-02-18 17:22:53 -08:00
Richard Tang 8a73e5c119 chore: ruff lint 2026-02-18 17:21:45 -08:00
Richard Tang 717f0eee9a Merge branch 'main' into feat/credential-setup-cli 2026-02-18 17:20:40 -08:00
Richard Tang 09fb47f089 chore: ruff format 2026-02-18 17:14:26 -08:00
Richard Tang b46d943e71 chore: lint issues 2026-02-18 17:13:01 -08:00
Richard Tang 262eaa6d84 feat: mcp dependencies for gcu 2026-02-18 16:34:19 -08:00
NSkogstad-AUS b980d6f6ab docs(tools): fixed small inaccuracy with gmail description 2026-02-19 11:33:50 +11:00
NSkogstad-AUS 61f27369ef docs(tools): update Available Tools table with additional search functionalities 2026-02-19 11:28:18 +11:00
NSkogstad-AUS 204b0b4744 docs(tools): expand Available Tools table with all tools by category
Previously the table listed ~20 of ~50 available tools. This expands
it to cover all tools, grouped into categories: File System, Data Files,
Web & Search, Communication, Productivity & CRM, Cloud & APIs,
Security, and Utilities.

All tool names verified against registered @mcp.tool() functions in source.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2026-02-19 11:20:49 +11:00
Richard Tang fc1a48f3bc feat: breaking the browser use tools by types 2026-02-18 16:10:17 -08:00
Richard Tang 060f320cd1 feat(wip): gcu node and basic browser tools 2026-02-18 15:52:46 -08:00
Timothy 1b6ebb1e42 fix: put guardian back to hive coder 2026-02-18 15:06:25 -08:00
Richard Tang bff32bcaa3 feat: allow sub_agent in the agent framework 2026-02-18 14:43:01 -08:00
Timothy 7dfc75b3e6 feat: muti graph agent session 2026-02-18 12:46:59 -08:00
Richard Tang 2920b5ab01 chore: lint issues 2026-02-17 20:05:05 -08:00
Richard Tang 81ad0467b0 Merge branch 'main' into fix/deprecation 2026-02-17 20:02:47 -08:00
Richard Tang 115ca55ea0 fix: broken ci tests 2026-02-17 20:00:47 -08:00
Richard Tang f2814a26e6 chore: lint issue 2026-02-17 19:57:31 -08:00
Richard Tang 4d309950b0 fix: unused code and ci 2026-02-17 19:55:54 -08:00
RichardTang-Aden 39216a4c12 Merge pull request #5016 from adenhq/feat/pdf-ingestion
Feat/pdf ingestion
2026-02-17 19:29:35 -08:00
Aden HQ c7fa621aeb Merge branch 'main' into feat/pdf-ingestion 2026-02-17 19:28:54 -08:00
Timothy 5914d28cbe feat(queen): hive queen bee implementation v1 2026-02-17 19:19:09 -08:00
Richard Tang 8c3ad3d70a fix: email agent version 2026-02-17 19:17:07 -08:00
Richard Tang 9eb3fc6285 fix: fix email agent version 2026-02-17 19:09:17 -08:00
Richard Tang e95f7e7339 fix: make the email inbox management agent identical to main 2026-02-17 19:02:08 -08:00
RichardTang-Aden d949551399 Merge pull request #3332 from haliaeetusvocifer/feat/google-docs-integration
added feature google docs integration
2026-02-17 18:25:03 -08:00
Richard Tang a7dbd85ed4 fix: google docs credentials 2026-02-17 18:24:31 -08:00
Richard Tang 1f288dab1c fix: tools registration problems 2026-02-17 18:15:41 -08:00
Richard Tang 021754d941 Merge branch 'main' into feat/google-docs-integration 2026-02-17 18:05:07 -08:00
bryan 7412904fbf update to job hunter and website vulnerability 2026-02-17 16:58:12 -08:00
Timothy cd1976e2b9 feat: support openai compatible endpoints 2026-02-17 16:27:36 -08:00
haliaeetusvocifer 5f3e9379a3 completed task 2026-02-17 23:59:47 +00:00
Richard Tang 0e565d6cea feat: add the agent start confirmation and credential update option 2026-02-17 13:17:03 -08:00
Richard Tang 67b249dcd5 feat: add the credential setup step after credential validation 2026-02-17 12:59:20 -08:00
Timothy bbf1c8c790 fix(arch): remove all deprecated concepts and deadcodes 2026-02-17 10:59:15 -08:00
Amdev-5 9744363342 fix(lusha): address PR review round 2 — structured filters, pagination, correct types
- search_people: replaced freetext searchText concatenation with proper
  structured Lusha API filters (jobTitles, seniority as list[int],
  departments, locations as dict, company_names, industry_ids, search_text)
- search_companies: added locations, company_names, search_text params;
  made all params optional for flexible queries
- Pagination: exposed limit param (clamped 10-50 per Lusha API constraints)
  on both search tools, replacing hardcoded size=25
- get_signals: changed ids from list[str] to list[int], removed internal
  str-to-int conversion as Lusha IDs are always numeric
- seniority type corrected to list[int] (API rejects string-encoded values
  despite OpenAPI spec suggesting strings — verified via live integration)
- Unit tests updated for all changes (19/19 pass)

Verified against live Lusha API: all 6 tools return correct responses.
2026-02-17 22:00:09 +05:30
Amdev-5 6fe8439e94 fix(lusha): use mainIndustriesIds for company search, safer credential handling
- search_companies: replace names filter with mainIndustriesIds (numeric
  industry IDs) per Lusha API schema. Parameter changed from
  industry: str to industry_ids: list[int] | None.
- _get_api_key: return None instead of raising TypeError on unexpected
  credential type. Lets _get_client handle it with the standard error dict
  pattern used across all tools.
- Updated unit tests for new industry_ids parameter and added test for
  non-string credential handling.
2026-02-17 21:33:02 +05:30
Amdev-5 8e61ffe377 fix(tools): remove invalid searchText field from Lusha prospecting filters
Lusha API rejects filters.companies.include.searchText (HTTP 400).
Replaced with valid 'names' field in search_companies and removed
redundant company searchText from search_people. Updated unit tests.
2026-02-17 21:33:02 +05:30
Amdev-5 723476f7a7 feat(tools): add Lusha MCP integration with credentials and health checks 2026-02-17 21:33:02 +05:30
IamSayeed 0f253027ae Merge branch 'main' into feature/add-asana-integration 2026-02-17 12:20:01 +05:30
Sayeed Rizwan 6053895a82 fix(asana): resolve from PR feedback - refactor client, fix specs, add tests 2026-02-17 12:18:06 +05:30
bryan 44a8b453b5 Merge branch 'main' into feat/pdf-ingestion 2026-02-16 18:40:46 -08:00
bryan 26511fe962 added pdf select and updated job hunter 2026-02-16 18:38:13 -08:00
RichardTang-Aden ce5893216a Merge pull request #4871 from paarths-collab/docs/root-install-warning
docs(readme): clarify uv workspace setup and prevent root pip install misuse
2026-02-16 18:36:57 -08:00
RichardTang-Aden 4e821e4dbf Merge pull request #5011 from RichardTang-Aden/main
micro-fix: chore: update the intro message of the agent
2026-02-16 18:05:18 -08:00
Richard Tang d11e97de59 chore: update the intro message of the agent 2026-02-16 18:03:58 -08:00
RichardTang-Aden 4b10d3e360 Merge pull request #5010 from RichardTang-Aden/main
feat: merge the sample agent so we have one email inbox management agent
2026-02-16 18:00:31 -08:00
Richard Tang e04479930f chore: update descriptions 2026-02-16 17:56:17 -08:00
Richard Tang 8a8c4cc3f5 chore: rename the email 2026-02-16 17:53:05 -08:00
Richard Tang 1e06ff611e refactor: merge the sample agent so we have one email inbox management agent 2026-02-16 17:43:18 -08:00
Pravin Mishra 1edc7bb9c7 feat(tools): add Discord integration (#2913) (#4247)
* feat(tools): add Discord integration (#2913)

- discord_list_guilds: list servers the bot is in
- discord_list_channels: list channels for a guild
- discord_send_message: send message to channel
- discord_get_messages: get recent messages

Auth: DISCORD_BOT_TOKEN, credential spec, health checker.
Uses Discord API v10 (Bot token).

Co-authored-by: Cursor <cursoragent@cursor.com>

* style: apply ruff format to discord tool files

Co-authored-by: Cursor <cursoragent@cursor.com>

* feat(discord): add rate limit handling, message validation, channel filter

- Rate limit (429): return clear error with retry_after from API
- Message length: validate before send, max 2000 chars per Discord limit
- Channel filter: text_only param (default True) for list_channels
- Add 6 new tests for rate limit, validation, filtering

Co-authored-by: Cursor <cursoragent@cursor.com>

* feat(discord): add retry on 429 rate limit

- Retry up to 2 times using Discord's retry_after
- Cap wait at 60s, fallback to exponential backoff if no retry_after
- Add _request_with_retry helper for all API calls
- Add 3 tests: retry then success, retry exhausted, tool-level retry

Co-authored-by: Cursor <cursoragent@cursor.com>

* fix(discord): remove unused DISCORD_API_BASE import

Co-authored-by: Cursor <cursoragent@cursor.com>

---------

Co-authored-by: mishrapravin114 <mishrapravin114@users.noreply.github.com>
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-17 09:29:56 +08:00
Shivam Shahi– oss/acc ceffa38717 Merge branch 'main' into feat/zoho-crm 2026-02-17 02:46:29 +05:30
Siddharth Varshney 7b1e0af155 feat(utils): add proper __init__.py exports for utils module (#3979) 2026-02-16 20:20:09 +08:00
Jeet Karia 7b15616e29 feat(tools): add Exa Search API integration with 4 MCP tools (#4941)
Implements AI-powered web search, content extraction, and research tools
via the Exa API for agent workflows.

Tools: exa_search, exa_find_similar, exa_get_contents, exa_answer

Follows existing tool pattern (web_search_tool, hubspot_tool, slack_tool):
- register_tools(mcp, credentials) with @mcp.tool() decorators
- Credential fallback: CredentialStoreAdapter -> EXA_API_KEY env var
- Error handling: always returns dicts, never raises
- Retry with exponential backoff on HTTP 429

Includes:
- Neural/keyword search with domain, date, and category filters
- Similar page discovery via neural embeddings
- Content extraction from up to 10 URLs per request
- Citation-backed answer generation
- CredentialSpec in credentials/search.py
- Comprehensive unit tests (21 tests)
- 500/500 integration CI tests passing

Fixes #4177
2026-02-16 19:28:32 +08:00
Your hh3538962 ae205fa3f2 fix(tools): address Power BI integration code review feedback
- Fix export endpoint: /Export -> /ExportTo
- Add 202 Accepted response handling
- Add notifyOption to refresh_dataset API call
- Rename format parameter to export_format (avoid shadowing builtin)
- Add PNG support to export formats
- All critical API issues from review addressed
2026-02-16 14:00:09 +05:00
Zhang bd7d2277d8 fix(tui): add Ctrl+J as newline fallback in chat input
Terminals without extended key reporting (VS Code, Cursor) send
identical events for Enter and Shift+Enter, making it impossible
to insert newlines. Ctrl+J produces a distinct key event in all
terminals.
2026-02-15 20:37:52 -08:00
Shivam Shahi– oss/acc 99ed00fd02 feat(tools): add Razorpay payment processing integration (#4467)
* feat(tools): add Razorpay payment processing integration

Add Razorpay MCP tool integration for payment processing, invoicing,
and refund management. Implements 6 MCP tools:

- razorpay_list_payments: List recent payments with filters (pagination, date range)
- razorpay_get_payment: Fetch detailed payment information by ID
- razorpay_create_payment_link: Create one-time payment links with shareable URLs
- razorpay_list_invoices: List invoices with status and type filtering
- razorpay_get_invoice: Fetch invoice details including line items
- razorpay_create_refund: Create full or partial refunds for payments

Features:
- Authentication via HTTP Basic Auth (RAZORPAY_API_KEY + RAZORPAY_API_SECRET)
- Credential spec in dedicated razorpay.py (follows repo pattern)
- Comprehensive error handling (401, 403, 404, 400, 429, 500, timeouts)
- Input validation (payment IDs, invoice IDs, amounts, currencies)
- Full test coverage (42 unit tests, 26 integration tests)

Closes #4404

* style: fix ruff I001 import order and W291 in tools

* fix: improve Razorpay credential tracking and validation

- Add razorpay_secret CredentialSpec with credential_group
- Fix amount=0 bug by using 'is not None' checks
- Add regex validation for payment/invoice IDs

* fix: use graceful credential handling instead of raising TypeError

Match codebase convention (calcom, lusha) - return None for non-string
credentials instead of raising TypeError, so the tool returns an error
dict instead of crashing.

---------

Co-authored-by: hundao <alchemy_wimp@hotmail.com>
2026-02-16 12:02:16 +08:00
Timothy @aden f7af5f9ee8 Merge pull request #4926 from TimothyZhang7/example/gmail-inbox-guardian-agent
Release / Create Release (push) Waiting to run
chore(micro-fix): change the timer to 5 minutes
2026-02-15 19:30:27 -08:00
RichardTang-Aden e5bcc8005f Merge pull request #4922 from adenhq/feat/vulnerability_agent
Feat/vulnerability agent
2026-02-15 19:21:00 -08:00
Timothy 352d285212 chore: change the timer to 5 minutes 2026-02-15 18:47:22 -08:00
Timothy @aden 3ef60f9d14 Merge pull request #4925 from TimothyZhang7/example/gmail-inbox-guardian-agent
Example(micro-fix)/gmail inbox guardian agent
2026-02-15 18:44:19 -08:00
Timothy a103312127 feature: display timer status interactively 2026-02-15 18:41:45 -08:00
Timothy 3d0bba4167 example(agents): ready-to-use gmail automation agent 2026-02-15 18:34:14 -08:00
Timothy @aden 3df718cc14 Merge pull request #4920 from TimothyZhang7/fix/issue-4905
Fix/issue 4905
2026-02-15 18:08:28 -08:00
RichardTang-Aden c7497a180e Merge pull request #4918 from TimothyZhang7/feature/multi-entry-event-driven-agents
Fix/multi entry event driven agents
2026-02-15 18:05:43 -08:00
Bryan @ Aden 3f39039a21 Merge pull request #4800 from LukeM94/doc/typo-fixes-in-roadmap.md
doc: Fix typos in docs/roadmap.md
2026-02-16 01:57:35 +00:00
Bryan @ Aden 88fbd90fcc Merge pull request #4799 from zhanglinqian/fix-recipes-readme-links
docs: fix incorrect directory names in recipes README
2026-02-16 01:55:51 +00:00
bryan e0bf09dd78 lint fixes 2026-02-15 17:45:56 -08:00
bryan 3e158b07af Merge branch 'main' into feat/vulnerability_agent 2026-02-15 17:35:59 -08:00
Timothy 5319ed7ee1 chore: remove unsolicited docs 2026-02-15 17:32:36 -08:00
Timothy 978904d2a4 fix(executor): async operations on non-streaming llm complete for healthy event loop 2026-02-15 17:31:18 -08:00
bryan 4d876ecc54 vulnerability check to sample agents 2026-02-15 17:27:09 -08:00
RichardTang-Aden ba327d0b9e Merge pull request #4919 from adenhq/feat/sample-agent/job_hunter
Sample agent, micro-fix: remove dependency of brave search
2026-02-15 16:58:06 -08:00
Timothy 4d8c8e9308 feat(arch): architecture patches to support multi-entry agents consuming external events 2026-02-15 16:19:58 -08:00
Shivam Shahi– oss/acc 669a05892b Merge branch 'main' into feat/zoho-crm 2026-02-15 21:47:52 +05:30
Amit Kumar b70885934c [Integration]: Cal.com - Open Source Scheduling Infrastructure #3188 (#3255)
* feat(tools): add Cal.com scheduling integration with 8 MCP tools

Adds Cal.com API integration for booking and scheduling management:
- calcom_list_bookings, calcom_get_booking, calcom_create_booking, calcom_cancel_booking
- calcom_get_availability, calcom_update_schedule
- calcom_list_event_types, calcom_get_event_type

Includes dedicated credential spec, 20 unit tests, and full integration conformance.
Resolves #3188

* fix(calcom): address PR review + add missing list_schedules MCP tool

- Add isinstance(api_key, str) guard in _get_api_key, return None on
  non-string values for consistent error-dict handling
- Remove duplicate metadata from responses object in create_booking;
  metadata stays at top-level per Cal.com v1 API spec
- Expose availability parameter in calcom_update_schedule MCP tool
- Add calcom_list_schedules tool wrapping existing client method —
  needed to discover schedule IDs before calling update_schedule
- Register calcom_list_schedules in credential spec for CI conformance
- Add tests for credential handling, schedule availability, and
  list_schedules (24 tests total, 9 tools)

* docs: update README to include calcom_list_schedules (9 tools)

---------

Co-authored-by: hundao <alchemy_wimp@hotmail.com>
2026-02-15 20:14:00 +08:00
paarths-collab 722b087fc0 docs(readme): clarify installation and prevent root pip install misuse 2026-02-15 17:39:41 +05:30
IamSayeed 4898a9759a Merge branch 'main' into feature/add-asana-integration 2026-02-15 13:07:15 +05:30
Sayeed Rizwan 2c2fa25580 fix: Resolve merge conflicts in credential and tool registries 2026-02-15 13:00:23 +05:30
Sayeed Rizwan 56496d7dbd feat: Add Asana integration for project management automation
- Implement 25 MCP tools for comprehensive Asana operations
  - Task management (create, update, search, delete, complete, comment, subtask)
  - Project management (create, update, list, get tasks)
  - Workspace & team operations (list workspaces, get users)
  - Section management for Kanban workflows
  - Tag and custom field support

- Add Personal Access Token (PAT) authentication
- Use official asana>=3.2.0 Python SDK (v5+ API)
- Include comprehensive error handling with ApiException
- Add 5 unit tests with 100% pass rate
- Provide detailed documentation and usage examples

Technical Details:
- Uses asana.ApiClient with Configuration pattern
- Implements workspace resolution by name or GID
- Handles paginated responses automatically
- Follows CredentialStoreAdapter pattern
- Matches existing tool structure (slack_tool, github_tool)

Closes #4156
2026-02-15 11:33:17 +05:30
Aaryann Chandola 0c7ea272db [integration] feat(tools): add Google Calendar integration (#3171)
* feat(calendar): add Google Calendar integration with event management tools and health checks

* fix(calendar): align google_calendar_oauth credential spec with codebase pattern
2026-02-15 08:25:53 +08:00
y0sif dd0696e44d chore: resolve merge conflicts with main 2026-02-14 21:38:44 +02:00
y0sif dcda273e0b chore: resolve merge conflicts with main 2026-02-14 21:32:33 +02:00
y0sif f3b159c650 docs(tools): document Attio CRM in README 2026-02-14 21:23:47 +02:00
y0sif 06df037e28 chore: add Attio credentials to test spec file 2026-02-14 21:22:55 +02:00
y0sif e814e516d1 chore: add Attio credentials to init file 2026-02-14 21:21:37 +02:00
y0sif 0375e068ed test(tools): add Attio tool tests 2026-02-14 21:20:03 +02:00
y0sif 34ffc533d3 feat(tools): add Attio CRM integration 2026-02-14 21:19:14 +02:00
Aaryann Chandola 5e4f322fc0 add new time tool for current date/time retrieval (#3425) 2026-02-14 21:57:07 +08:00
zhanglinqian c02e45f1aa docs: fix incorrect directory names in recipes README 2026-02-14 21:19:47 +08:00
LukeM94 a7217f138c Fix typos in docs/roadmap.md
Correct hyphenation and spelling in the product roadmap: change 'outcome oriented' to 'outcome-oriented' and fix 'Workder' to 'Worker' in the Deployment section.
2026-02-14 13:18:03 +00:00
mubarakar95 ea2ea1a4ae Merge branch 'main' into integration/apify 2026-02-14 17:53:39 +05:30
mubarakar95 9e11947687 style: apply ruff formatting to apify_tool.py 2026-02-14 17:22:35 +05:30
mubarakar95 47117281e1 fix(test): resolve E501 line too long in test_apify_tool.py 2026-02-14 17:22:33 +05:30
mubarakar95 032dd13f5a feat(tools): implement Apify integration with 4 tools and comprehensive tests
- Added credential spec with health check endpoint
- Implemented apify_run_actor (sync/async execution)
- Implemented apify_get_dataset (result retrieval)
- Implemented apify_get_run (status checking)
- Implemented apify_search_actors (marketplace search)
- Created comprehensive README with examples and use cases
- Added 24 unit tests with mocked API responses
- All tests passing, conformance validated, linting clean

Resolves: #4510
2026-02-14 17:22:25 +05:30
Emmanuel Nwanguma 3502f25048 [Integration] feat(tools): add BigQuery MCP tool for SQL querying and data analysis (#3350)
* feat(tools): add BigQuery MCP tool for SQL querying and data analysis

- Add run_bigquery_query tool for executing read-only SQL queries
- Add describe_dataset tool for exploring dataset schemas
- Implement safety features: read-only enforcement, row limits (max 10k)
- Add comprehensive unit tests (27 tests passing)
- Follow CredentialStoreAdapter pattern from email tool
- Support ADC and service account authentication

Fixes #3067

* fix(bigquery): address PR review feedback

- Add credential_id and api_key_instructions to CredentialSpec
- Fix credential key name from 'bigquery_credentials' to 'bigquery'
- Pass credential path to BigQuery client via environment variable
- Fix ADC error message detection for both error variants
- Move google-cloud-bigquery to optional dependencies
- Update tests to use correct credential key names

All 27 tests passing

* fix(bigquery): return {error, help} when dependency missing

* fix(bigquery): return full ImportError message for missing dependency

* fix(bigquery): include 'help' key when dependency missing
2026-02-14 19:48:03 +08:00
mubarakar95 13d8ebbeff feat: Add Apify integration (issue #4510)
Implements comprehensive Apify integration for web scraping and automation:

- Added 4 new tools: apify_run_actor, apify_get_dataset, apify_get_run, apify_search_actors
- Credential management for APIFY_API_TOKEN with health check
- Support for synchronous (wait=True) and asynchronous (wait=False) actor execution
- Actor ID validation and comprehensive error handling
- Full test coverage (26 tests passing)
- README with usage examples and documentation

Addresses #4510
2026-02-14 11:53:56 +05:30
RichardTang-Aden 93c026fe31 Merge pull request #4759 from adenhq/feat/sample-agent/job_hunter
[Feature][Sample Agent]: Job Hunting Agent
2026-02-13 20:24:35 -08:00
bryan e515977b96 Merge branch 'main' into feat/vulnerability_agent 2026-02-13 20:16:48 -08:00
bryan 045490a097 testing agent run 1-5 2026-02-13 20:16:12 -08:00
bryan acf4bd5152 tools for sample agent 2026-02-13 19:14:59 -08:00
Timothy 1f5711e1a1 Merge branch 'fix/transient-error-handlings' into feat/inbox-management 2026-02-13 18:53:33 -08:00
Timothy 1f8a47203f fix: common transient errors and loop detection 2026-02-13 16:14:43 -08:00
Shivam Shahi– oss/acc 2efa0e01df ruff format fix 2026-02-14 00:35:30 +05:30
Shivam Shahi– oss/acc 6044369fdf feat(tools): add Zoho CRM v8 integration with OAuth2 and MCP tools
Add Zoho CRM MCP integration for lead/contact/account/deal workflows with notes support. Implements 5 MCP tools:
- zoho_crm_search: Search Leads/Contacts/Accounts/Deals by criteria or word with pagination
- zoho_crm_get_record: Fetch a single record by module and ID
- zoho_crm_create_record: Create records with pass-through field payloads
- zoho_crm_update_record: Update records by ID with partial field payloads
- zoho_crm_add_note: Create notes linked to CRM records via Parent_Id mapping

Features:
- Zoho OAuth2 provider added in core credentials (refresh-token flow)
- Zoho auth format: Authorization: Zoho-oauthtoken <token>
- Region/DC-aware routing using accounts domain/region + api_domain usage
- Persisted DC metadata on refresh (api_domain/accounts_domain/location)
- Credential spec and health check registration for zoho_crm
- Tool registration and allowed-tool list updates
- Normalized tool responses with retriable 429 handling
- README with setup, auth modes, usage, and testing instructions
- Comprehensive unit/integration coverage updates for tool, provider, and health checks

Validation:
- Scoped ruff lint/format checks passed
- Targeted test suite passed: 563 passed, 18 skipped

Closes #4418
2026-02-13 18:28:12 +05:30
mubarakar95 40e74e408b perf: reduce subprocess spawning in quickstart scripts (#4427)
## Problem
Windows process creation (CreateProcess) is 10-100x slower than Linux fork/exec.
The quickstart scripts were spawning 4+ separate `uv run python -c "import X"`
processes to verify imports, adding ~600ms overhead on Windows.

## Solution
Consolidated all import checks into a single batch script that checks multiple
modules in one subprocess call, reducing spawn overhead by ~75%.

## Changes
- **New**: `scripts/check_requirements.py` - Batched import checker
- **New**: `scripts/test_check_requirements.py` - Test suite
- **New**: `scripts/benchmark_quickstart.ps1` - Performance benchmark tool
- **Modified**: `quickstart.ps1` - Updated import verification (2 sections)
- **Modified**: `quickstart.sh` - Updated import verification

## Performance Impact
**Benchmark results on Windows:**
- Before: ~19.8 seconds for import checks
- After: ~4.9 seconds for import checks
- **Improvement: 14.9 seconds saved (75.2% faster)**

## Testing
-  All functional tests pass (`scripts/test_check_requirements.py`)
-  Quickstart scripts work correctly on Windows
-  Error handling verified (invalid imports reported correctly)
-  Performance benchmark confirms 75%+ improvement

Fixes #4427
2026-02-12 15:38:58 +05:30
RichardTang-Aden 97440f9e8a Merge branch 'main' into feature/x-twitter-integration 2026-02-11 17:13:33 -08:00
Rudra2637 906480a6e8 Guard unsupported criterion types in _evaluate_criterion 2026-02-12 01:12:52 +05:30
Your hh3538962 765f7cae58 feat(tools): add get_datasets, get_reports, and export_report functions to Power BI integration 2026-02-11 22:19:51 +05:00
Your hh3538962 b455c8a2ad Merge remote-tracking branch 'origin/main' into feat/power-bi-integration 2026-02-11 22:07:00 +05:00
Sapna vishnoi da25e0ffa5 Merge branch 'main' into feat/redshift-integration 2026-02-11 13:42:26 +05:30
Your hh3538962 e07703c01f feat(tools): add Power BI integration - initial structure with workspace and dataset refresh functions 2026-02-10 13:23:32 +05:00
mishrapravin114 a4abf3eb2b Merge upstream/main: resolve conflicts with Apollo integration
- Keep both APOLLO_CREDENTIALS and AIRTABLE_CREDENTIALS
- Keep both apollo_tool and airtable_tool imports (alphabetical)

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-10 00:25:17 +05:30
mishrapravin114 269d72d073 Merge upstream/main: resolve conflicts with Apollo integration
- Keep both APOLLO_CREDENTIALS and CALENDLY_CREDENTIALS
- Keep both apollo_tool and calendly_tool imports (alphabetical)

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-10 00:20:17 +05:30
mishrapravin114 c8f5dccbd2 docs(airtable): add rate limit section to README
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-10 00:17:49 +05:30
mishrapravin114 8b797ee73f feat(airtable): add rate limit retry and retry_after
- Add 429 handling with retry_after from Retry-After header
- Add _request_with_retry (2 retries) for all API calls
- Update tests to use httpx.request

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-10 00:17:37 +05:30
mishrapravin114 de38adb1e4 feat(calendly): add rate limit handling, retry, 7-day validation
- Add 429 handling with retry_after from Retry-After header
- Add _request_with_retry (2 retries) for all API calls
- Validate get_availability date range <= 7 days
- Update tests to use httpx.request

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-10 00:16:37 +05:30
Sapna vishnoi c169bcc5d8 Merge branch 'main' into feat/redshift-integration 2026-02-09 23:32:08 +05:30
kubrakaradirek 80ea286beb fix: resolve complex merge conflicts and restore integrations 2026-02-09 16:09:43 +03:00
kubrakaradirek 3499be782e feat: implement MSSQL tool with schema discovery closes #3377 2026-02-09 15:32:57 +03:00
Gordon Ng 16603ae49c Test MCP 2026-02-09 01:48:49 -05:00
Gordon Ng bf6bd9ce7f test mcp 2026-02-09 01:48:46 -05:00
Gordon Ng a54c0f6f46 update 2026-02-09 01:20:25 -05:00
Gordon Ng beeed11d48 update 2026-02-09 01:11:33 -05:00
Manas Dutta 25331590a7 feat(reddit): add Reddit health checker and update tool functions 2026-02-08 19:26:01 +05:30
Nafiyad Adane cddae0ed18 Refactor Wikipedia tool and improve test structure
Reorganized imports in __init__.py for clarity and consistency. Cleaned up formatting and comments in wikipedia_tool.py. Enhanced test_wikipedia_tool.py by improving patching targets, clarifying comments, and refining test structure for better maintainability.
2026-02-07 18:49:49 -07:00
Nafiyad Adane 9dca42be27 Fix Wikipedia tool import order and test patching
Reorders imports in tools/__init__.py for clarity and groups web and PDF tools together. Updates Wikipedia tool tests to patch httpx.get using the correct import path, ensuring mocks work as intended. Removes unnecessary print statement in Wikipedia tool error handling.
2026-02-07 18:49:44 -07:00
Nafiyad Adane a1f3fe4d55 Add Wikipedia search tool and tests
Introduces a new 'search_wikipedia' tool for searching Wikipedia and retrieving article summaries using the public Wikipedia REST API. Updates documentation and tool registration, and adds unit tests for the new tool.
2026-02-07 18:49:38 -07:00
GastonAQS bff9f8976e Merge branch 'main' into feature/add-trello-integration 2026-02-07 15:57:48 -03:00
Manas Dutta b71628e211 Merge branch 'main' into feature/reddit-integration 2026-02-07 19:35:02 +05:30
Manas Dutta 8c1cb1f55b feat: add Reddit integration with 18 MCP tools
Implements Reddit API integration for community management and content monitoring.

Features:
- Search & Monitoring: search posts/comments, get subreddit feeds (new/hot), get posts/comments (6 tools)
- Content Creation: submit posts, reply, edit, delete comments (5 tools)
- User Engagement: get profiles, upvote, downvote, save posts (4 tools)
- Moderation: remove/approve posts, ban users (3 tools)

Implementation:
- OAuth 2.0 authentication via REDDIT_CREDENTIALS
- PRAW library for Reddit API integration
- Comprehensive error handling and validation
- Full test coverage (25 tests passing)

Resolves #3595
2026-02-07 18:38:59 +05:30
mishrapravin114 66214384a9 fix: add register_airtable import and fix ruff I001 import order
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-07 17:18:26 +05:30
mishrapravin114 6d6646887c feat(tools): add Airtable bases and records integration
- Add Airtable tool with 5 MCP tools:
  - airtable_list_bases
  - airtable_list_tables
  - airtable_list_records (with filter/sort)
  - airtable_create_record
  - airtable_update_record
- Add AIRTABLE_CREDENTIALS with credentialSpec + credentialStore
- Add AirtableHealthChecker for token validation
- Add README with setup and usage
- Add unit tests (9 tests total)

Fixes #2911

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-07 17:14:46 +05:30
mishrapravin114 6f8db0ed08 style: apply ruff format to calendly and health check files
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-07 17:00:05 +05:30
mishrapravin114 6aaf6836ea fix(calendly): resolve ruff lint errors (UP017, E501)
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-07 16:58:48 +05:30
mishrapravin114 4f2348f50e feat(tools): add Calendly scheduling integration
- Add Calendly tool with 4 MCP tools:
  - calendly_list_event_types
  - calendly_get_availability
  - calendly_get_booking_link
  - calendly_cancel_event
- Add CALENDLY_CREDENTIALS with credentialSpec + credentialStore
- Add CalendlyHealthChecker for token validation
- Add README with setup and usage
- Add unit tests (12 tests total)

Fixes #2930

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-07 16:51:27 +05:30
RichardTang-Aden deb7f2f72a Merge pull request #3814 from Amdev-5/feature/x-twitter-integration
fix(tests): update credential group test for X integration
2026-02-06 09:16:42 -08:00
Amdev-5 d989d9c65a fix(tests): update credential group test for X integration
Add test_x_credentials_share_credential_group to verify all X credentials
share the 'x' credential group. Update test_credential_group_default_empty
to account for X credentials alongside existing Google exceptions.
2026-02-06 22:17:40 +05:30
bryan 4173c606ab Merge feature/x-twitter-final-integration from Amdev-5/hive - X (Twitter) tool with DM support 2026-02-06 08:03:43 -08:00
Amdev-5 a01430d20f Merge verification fixes into PR branch 2026-02-06 16:42:56 +05:30
Amdev-5 2a8f775732 feat(tools): enhance X tool with DM support and robust error handling
- Added `x_send_dm` tool using v2 endpoint (`POST /dm_conversations/with/:id/messages`) for reliable 1:1 messaging.
- Fixed 403 Forbidden payload validation errors by simplifying DM payload structure.
- Enhanced `_handle_response` to verify `x_tool.py` returns raw API error details for 403/400 responses, aiding in permission debugging.
- Updated `demo_x_tools.py` to support standard `.env` variable names (e.g., `X_API_KEY`) and added user lookup for DM testing.
- Added unit tests covering new DM functionality and payload verification in `test_x_tool.py`.
- Audited credential handling: Read-only tools (Search/Mentions) correctly use Bearer Token, while Write tools (Post/Reply/Delete/DM) enforce OAuth 1.0a User Context.

Verified with live API tests (see PR description for logs).
2026-02-06 15:48:20 +05:30
Sapna vishnoi 4a0d9b2855 Merge branch 'main' into feat/redshift-integration 2026-02-05 11:44:09 +05:30
y0sif 92c65d69ea chore: resolve merge conflicts with main 2026-02-05 07:13:36 +02:00
Yosif Soliman 910a8968c4 fix(linear): correct GraphQL variable type for workflow states query 2026-02-05 07:00:28 +02:00
Sapna vishnoi cdb4679c5a Merge branch 'main' into feat/redshift-integration 2026-02-05 00:05:38 +05:30
Sapna.Vishnoi 1a9dce89b4 feat(tools): Add Amazon Redshift integration
- Implement 5 core functions for data warehouse querying
- Add boto3 integration with Redshift Data API
- Security: Read-only SELECT queries by default
- Full credential store support
- 26/26 tests passing (100% coverage)
- Complete documentation with examples
2026-02-04 23:58:35 +05:30
Aneesh cf1e4d7f88 Merge remote-tracking branch 'origin/main' into feature/youtube-transcript 2026-02-04 19:46:52 +05:30
Aneesh f2f0b4fc61 feat(tools): add youtube transcript integration via youtube-transcript-api 2026-02-04 19:24:40 +05:30
y0sif b21dd25181 fix(linear): handle credential decryption errors gracefully, handle mcp tool issue with credentials 2026-02-04 05:21:23 +02:00
y0sif 04a18bcbe5 docs(tools): document Linear integration in README and setup credentials claude skill 2026-02-04 04:05:15 +02:00
y0sif 7f66dd67eb feat(linear): add OAuth setup instructions 2026-02-04 04:03:37 +02:00
y0sif cfa03b89c8 test(tools): add comprehensive Linear tool tests 2026-02-04 03:47:28 +02:00
y0sif 9866d7a22b feat(tools): add Linear project management integration 2026-02-04 03:47:03 +02:00
GastonAQS 331a6e442f feat: add Trello integration tools and API client 2026-02-03 10:32:25 -03:00
oluwasegun.haziz.omd 7fae57f311 more fixes 2026-02-03 11:53:45 +00:00
Sashank Thapa 1c2295b2b5 Merge branch 'adenhq:main' into feature/twitter-x-mcp-tool 2026-02-03 16:20:45 +05:30
haliaeetusvocifer 1f653969a9 added feature google docs integration 2026-02-03 03:51:39 +00:00
Sashank Thapa fa43ca3785 Merge branch 'adenhq:main' into feature/twitter-x-mcp-tool 2026-01-31 16:26:39 +05:30
kozuedoingregression b4a2c3bd14 ruff formatting and lint fixes 2026-01-31 16:18:16 +05:30
kozuedoingregression 2d4ec4f462 lint fix 2026-01-31 16:14:25 +05:30
kozuedoingregression 1e8b933da0 add X (Twitter) integration tool 2026-01-31 15:49:16 +05:30
Aneesh 48b1e0e038 Docs: clarify agent creation assumptions in Getting Started 2026-01-28 22:49:30 +05:30
718 changed files with 132979 additions and 31060 deletions
+3 -4
View File
@@ -492,7 +492,7 @@ AskUserQuestion(questions=[{
- node_id (kebab-case)
- name
- description
- node_type: `"event_loop"` (recommended for all LLM work) or `"function"` (deterministic, no LLM)
- node_type: `"event_loop"` (the only valid type; use `client_facing: True` for HITL)
- input_keys (what data this node receives)
- output_keys (what data this node produces)
- tools (ONLY tools that exist from Step 1 — empty list if no tools needed)
@@ -852,8 +852,7 @@ cd /home/timothy/oss/hive && PYTHONPATH=exports uv run python -m AGENT_NAME vali
| Type | tools param | Use when |
| ------------ | ----------------------- | --------------------------------------- |
| `event_loop` | `'["tool1"]'` or `'[]'` | LLM-powered work with or without tools |
| `function` | N/A | Deterministic Python operations, no LLM |
| `event_loop` | `'["tool1"]'` or `'[]'` | All agent work (with or without tools, HITL via client_facing) |
---
@@ -1008,7 +1007,7 @@ Use this reference during STEP 2 to give accurate, honest assessments.
| Sub-second responses | LLM latency is inherent | Traditional code, no LLM |
| Processing millions of items | Context windows and rate limits | Batch processing + sampling |
| Real-time streaming data | No built-in pub/sub or streaming input | Custom MCP server + agent |
| Guaranteed determinism | LLM outputs vary | Function nodes for deterministic parts |
| Guaranteed determinism | LLM outputs vary | Traditional code for deterministic parts |
| Offline/air-gapped | Requires LLM API access | Local models (not currently supported) |
| Multi-user concurrency | Single-user session model | Separate agent instances per user |
@@ -195,7 +195,7 @@ class DeepResearchAgent:
max_tokens=self.config.max_tokens,
loop_config={
"max_iterations": 100,
"max_tool_calls_per_turn": 20,
"max_tool_calls_per_turn": 30,
"max_history_tokens": 32000,
},
conversation_mode="continuous",
@@ -71,6 +71,12 @@ Important:
- Track which URL each finding comes from (you'll need citations later)
- Call set_output for each key in a SEPARATE turn (not in the same turn as other tool calls)
Context management:
- Your tool results are automatically saved to files. After compaction, the file \
references remain in the conversation use load_data() to recover any content you need.
- Use append_data('research_notes.md', ...) to maintain a running log of key findings \
as you go. This survives compaction and helps the report node produce a detailed report.
When done, use set_output (one key at a time, separate turns):
- set_output("findings", "Structured summary: key findings with source URLs for each claim. \
Include themes, contradictions, and confidence levels.")
@@ -161,6 +167,9 @@ Requirements:
- Every factual claim must cite its source with [n] notation
- Be objective present multiple viewpoints where sources disagree
- Answer the original research questions from the brief
- If findings appear incomplete or summarized, call list_data_files() and load_data() \
to access the detailed source material from the research phase. The research node's \
tool results and research_notes.md contain the full data.
Save the HTML:
save_data(filename="report.html", data="<html>...</html>")
+1 -1
View File
@@ -508,7 +508,7 @@ All credential specs are defined in `tools/src/aden_tools/credentials/`:
| `llm.py` | LLM Providers | `anthropic` | No |
| `search.py` | Search Tools | `brave_search`, `google_search`, `google_cse` | No |
| `email.py` | Email | `resend` | No |
| `integrations.py` | Integrations | `github`, `hubspot` | No / Yes |
| `integrations.py` | Integrations | `github`, `hubspot`, `google_calendar_oauth` | No / Yes |
**Note:** Additional LLM providers (Cerebras, Groq, OpenAI) are handled by LiteLLM via environment
variables (`CEREBRAS_API_KEY`, `GROQ_API_KEY`, `OPENAI_API_KEY`) but are not yet in CREDENTIAL_SPECS.
+3
View File
@@ -46,6 +46,7 @@ coverage/
# TypeScript
*.tsbuildinfo
vite.config.d.ts
# Python
__pycache__/
@@ -69,6 +70,7 @@ exports/*
.agent-builder-sessions/*
.claude/settings.local.json
.claude/skills/ship-it/
.venv
@@ -77,3 +79,4 @@ core/tests/*dumps/*
screenshots/*
.gemini/*
+34
View File
@@ -0,0 +1,34 @@
# Repository Guidelines
Shared agent instructions for this workspace.
## Deprecations
- **TUI is deprecated.** The terminal UI (`hive tui`) is no longer maintained. Use the browser-based interface (`hive open`) instead.
## Coding Agent Notes
-
- When working on a GitHub Issue or PR, print the full URL at the end of the task.
- When answering questions, respond with high-confidence answers only: verify in code; do not guess.
- Do not update dependencies casually. Version bumps, patched dependencies, overrides, or vendored dependency changes require explicit approval.
- Add brief comments for tricky logic. Keep files reasonably small when practical; split or refactor large files instead of growing them indefinitely.
- If shared guardrails are available locally, review them; otherwise follow this repo's guidance.
- Use `uv` for Python execution and package management. Do not use `python` or `python3` directly unless the user explicitly asks for it.
- Prefer `uv run` for scripts and tests, and `uv pip` for package operations.
## Multi-Agent Safety
- Do not create, apply, or drop `git stash` entries unless explicitly requested.
- Do not create, remove, or modify `git worktree` checkouts unless explicitly requested.
- Do not switch branches or check out a different branch unless explicitly requested.
- When the user says `push`, you may `git pull --rebase` to integrate latest changes, but never discard other in-progress work.
- When the user says `commit`, commit only your changes. When the user says `commit all`, commit everything in grouped chunks.
- When you see unrecognized files or unrelated changes, keep going and focus on your scoped changes.
## Change Hygiene
- If staged and unstaged diffs are formatting-only, resolve them without asking.
- If a commit or push was already requested, include formatting-only follow-up changes in that same commit when practical.
- Only stop to ask for confirmation when changes are semantic and may alter behavior.
+207
View File
@@ -0,0 +1,207 @@
# Release Notes
**Release Date:** February 18, 2026
**Tag:** v0.5.1
## The Hive Gets a Brain
v0.5.1 is our most ambitious release yet. Hive agents can now **build other agents** -- the new Hive Coder meta-agent writes, tests, and fixes agent packages from natural language. The runtime grows multi-graph support so one session can orchestrate multiple agents simultaneously. The TUI gets a complete overhaul with an in-app agent picker, live streaming, and seamless escalation to the Coder. And we're now provider-agnostic: Claude Code subscriptions, OpenAI-compatible endpoints, and any LiteLLM-supported model work out of the box.
---
## Highlights
### Hive Coder -- The Agent That Builds Agents
A native meta-agent that lives inside the framework at `core/framework/agents/hive_coder/`. Give it a natural-language specification and it produces a complete agent package -- goal definition, node prompts, edge routing, MCP tool wiring, tests, and all boilerplate files.
```bash
# Launch the Coder directly
hive code
# Or escalate from any running agent (TUI)
Ctrl+E # or /coder in chat
```
The Coder ships with:
- **Reference documentation** -- anti-patterns, construction guide, and design patterns baked into its system prompt
- **Guardian watchdog** -- an event-driven monitor that catches agent failures and triggers automatic remediation
- **Coder Tools MCP server** -- file I/O, fuzzy-match editing, git snapshots, and sandboxed shell execution (`tools/coder_tools_server.py`)
- **Test generation** -- structural tests for forever-alive agents that don't hang on `runner.run()`
### Multi-Graph Agent Runtime
`AgentRuntime` now supports loading, managing, and switching between multiple agent graphs within a single session. Six new lifecycle tools give agents (and the TUI) full control:
```python
# Load a second agent into the runtime
await runtime.add_graph("exports/deep_research_agent")
# Tools available to agents:
# load_agent, unload_agent, start_agent, restart_agent, list_agents, get_user_presence
```
The Hive Coder uses multi-graph internally -- when you escalate from a worker agent, the Coder loads as a separate graph while the worker stays alive in the background.
### TUI Revamp
The Terminal UI gets a ground-up rebuild with five major additions:
- **Agent Picker** (Ctrl+A) -- tabbed modal screen for browsing Your Agents, Framework agents, and Examples with metadata badges (node count, tool count, session count, tags)
- **Runtime-optional startup** -- TUI launches without a pre-loaded agent, showing the picker on first open
- **Live streaming pane** -- dedicated RichLog widget shows LLM tokens as they arrive, replacing the old one-token-per-line display
- **PDF attachments** -- `/attach` and `/detach` commands with native OS file dialog (macOS, Linux, Windows)
- **Multi-graph commands** -- `/graphs`, `/graph <id>`, `/load <path>`, `/unload <id>` for managing agent graphs in-session
### Provider-Agnostic LLM Support
Hive is no longer Anthropic-only. v0.5.1 adds first-class support for:
- **Claude Code subscriptions** -- `use_claude_code_subscription: true` in `~/.hive/configuration.json` reads OAuth tokens from `~/.claude/.credentials.json` with automatic refresh
- **OpenAI-compatible endpoints** -- `api_base` config routes traffic through any compatible API (Azure OpenAI, vLLM, Ollama, etc.)
- **Any LiteLLM model** -- `RuntimeConfig` now passes `api_key`, `api_base`, and `extra_kwargs` through to LiteLLM
The quickstart script auto-detects Claude Code subscriptions and ZAI Code installations.
---
## What's New
### Architecture & Runtime
- **Hive Coder meta-agent** -- Natural-language agent builder with reference docs, guardian watchdog, and `hive code` CLI command. (@TimothyZhang7)
- **Multi-graph agent sessions** -- `add_graph`/`remove_graph` on AgentRuntime with 6 lifecycle tools (`load_agent`, `unload_agent`, `start_agent`, `restart_agent`, `list_agents`, `get_user_presence`). (@TimothyZhang7)
- **Claude Code subscription support** -- OAuth token refresh via `use_claude_code_subscription` config, auto-detection in quickstart, LiteLLM header patching. (@TimothyZhang7)
- **OpenAI-compatible endpoint support** -- `api_base` and `extra_kwargs` in `RuntimeConfig` for any OpenAI-compatible API. (@TimothyZhang7)
- **Remove deprecated node types** -- Delete `FlexibleGraphExecutor`, `WorkerNode`, `HybridJudge`, `CodeSandbox`, `Plan`, `FunctionNode`, `LLMNode`, `RouterNode`. Deprecated types (`llm_tool_use`, `llm_generate`, `function`, `router`, `human_input`) now raise `RuntimeError` with migration guidance. (@TimothyZhang7)
- **Interactive credential setup** -- Guided `CredentialSetupSession` with health checks and encrypted storage, accessible via `hive setup-credentials` or automatic prompting on credential errors. (@RichardTang-Aden)
- **Pre-start confirmation prompt** -- Interactive prompt before agent execution allowing credential updates or abort. (@RichardTang-Aden)
- **Event bus multi-graph support** -- `graph_id` on events, `filter_graph` on subscriptions, `ESCALATION_REQUESTED` event type, `exclude_own_graph` filter. (@TimothyZhang7)
### TUI Improvements
- **In-app agent picker** (Ctrl+A) -- Tabbed modal for browsing agents with metadata badges (nodes, tools, sessions, tags). (@TimothyZhang7)
- **Runtime-optional TUI startup** -- Launches without a pre-loaded agent, shows agent picker on startup. (@TimothyZhang7)
- **Hive Coder escalation** (Ctrl+E) -- Escalate to Hive Coder and return; also available via `/coder` and `/back` chat commands. (@TimothyZhang7)
- **PDF attachment support** -- `/attach` and `/detach` commands with native OS file dialog. (@TimothyZhang7)
- **Streaming output pane** -- Dedicated RichLog widget for live LLM token streaming. (@TimothyZhang7)
- **Multi-graph TUI commands** -- `/graphs`, `/graph <id>`, `/load <path>`, `/unload <id>`. (@TimothyZhang7)
- **Agent Guardian watchdog** -- Event-driven monitor that catches secondary agent failures and triggers automatic remediation, with `--no-guardian` CLI flag. (@TimothyZhang7)
### New Tool Integrations
| Tool | Description | Contributor |
| ---------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------ |
| **Discord** | 4 MCP tools (`discord_list_guilds`, `discord_list_channels`, `discord_send_message`, `discord_get_messages`) with rate-limit retry and channel filtering | @mishrapravin114 |
| **Exa Search API** | 4 AI-powered search tools (`exa_search`, `exa_find_similar`, `exa_get_contents`, `exa_answer`) with neural/keyword search, domain filters, and citation-backed answers | @JeetKaria06 |
| **Razorpay** | 6 payment processing tools for payments, invoices, payment links, and refunds with HTTP Basic Auth | @shivamshahi07 |
| **Google Docs** | Document creation, reading, and editing with OAuth credential support | @haliaeetusvocifer |
| **Gmail enhancements** | Expanded mail operations for inbox management | @bryanadenhq |
### Infrastructure
- **Default node type → `event_loop`** -- `NodeSpec.node_type` defaults to `"event_loop"` instead of `"llm_tool_use"`. (@TimothyZhang7)
- **Default `max_node_visits` → 0 (unlimited)** -- Nodes default to unlimited visits, reducing friction for feedback loops and forever-alive agents. (@TimothyZhang7)
- **Remove `function` field from NodeSpec** -- Follows deprecation of `FunctionNode`. (@TimothyZhang7)
- **LiteLLM OAuth patch** -- Correct header construction for OAuth tokens (remove `x-api-key` when Bearer token is present). (@TimothyZhang7)
- **Orchestrator config centralization** -- Reads `api_key`, `api_base`, `extra_kwargs` from centralized `~/.hive/configuration.json`. (@TimothyZhang7)
- **System prompt datetime injection** -- All system prompts now include current date/time for time-aware agent behavior. (@TimothyZhang7)
- **Utils module exports** -- Proper `__init__.py` exports for the utils module. (@Siddharth2624)
- **Increased default max_tokens** -- Opus 4.6 defaults to 32768, Sonnet 4.5 to 16384 (up from 8192). (@TimothyZhang7)
---
## Bug Fixes
- Flush WIP accumulator outputs on cancel/failure so edge conditions see correct values on resume
- Stall detection state preserved across resume (no more resets on checkpoint restore)
- Skip client-facing blocking for event-triggered executions (timer/webhook)
- Executor retry override scoped to actual EventLoopNode instances only
- Add `_awaiting_input` flag to EventLoopNode to prevent input injection race conditions
- Fix TUI streaming display (tokens no longer appear one-per-line)
- Fix `_return_from_escalation` crash when ChatRepl widgets not yet mounted
- Fix tools registration problems for Google Docs credentials (@RichardTang-Aden)
- Fix email agent version conflicts (@RichardTang-Aden)
- Fix coder tool timeouts (120s for tests, 300s cap for commands)
## Documentation
- Clarify installation and prevent root pip install misuse (@paarths-collab)
---
## Agent Updates
- **Email Inbox Management** -- Consolidate `gmail_inbox_guardian` and `inbox_management` into a single unified agent with updated prompts and config. (@RichardTang-Aden, @bryanadenhq)
- **Job Hunter** -- Updated node prompts, config, and agent metadata; added PDF resume selection. (@bryanadenhq)
- **Deep Research Agent** -- Revised node implementations with updated prompts and output handling.
- **Tech News Reporter** -- Revised node prompts for improved output quality.
- **Vulnerability Assessment** -- Expanded prompts with more detailed assessment instructions. (@bryanadenhq)
---
## Breaking Changes
- **Deprecated node types raise `RuntimeError`** -- `llm_tool_use`, `llm_generate`, `function`, `router`, `human_input` now fail instead of warning. Migrate to `event_loop`.
- **`NodeSpec.node_type` defaults to `"event_loop"`** (was `"llm_tool_use"`)
- **`NodeSpec.max_node_visits` defaults to `0` / unlimited** (was `1`)
- **`NodeSpec.function` field removed** -- `FunctionNode` is deleted; use event_loop nodes with tools instead.
---
## Community Contributors
A huge thank you to everyone who contributed to this release:
- **Richard Tang** (@RichardTang-Aden) -- Interactive credential setup, pre-start confirmation, email agent consolidation, tool registration fixes, lint and formatting
- **Pravin Mishra** (@mishrapravin114) -- Discord integration with 4 MCP tools
- **Jeet Karia** (@JeetKaria06) -- Exa Search API integration with 4 AI-powered search tools
- **Shivam Shahi** (@shivamshahi07) -- Razorpay payment processing integration
- **Siddharth Varshney** (@Siddharth2624) -- Utils module exports
- **@haliaeetusvocifer** -- Google Docs integration with OAuth support
- **Bryan** (@bryanadenhq) -- PDF selection, inbox agent fixes, Job Hunter and Vulnerability Assessment updates
- **@paarths-collab** -- Documentation improvements
---
## Upgrading
```bash
git pull origin main
uv sync
```
### Migration Guide
If your agents use deprecated node types, update them:
```python
# Before (v0.5.0) -- these now raise RuntimeError
NodeSpec(node_type="llm_tool_use", ...)
NodeSpec(node_type="function", function=my_func, ...)
# After (v0.5.1) -- use event_loop for everything
NodeSpec(node_type="event_loop", ...) # or just omit node_type (it's the default now)
```
If your agents set `max_node_visits=1` explicitly, they'll still work. The only change is the _default_ -- new agents without an explicit value now get unlimited visits.
To try the new Hive Coder:
```bash
# Launch Coder directly
hive code
# Or from TUI -- press Ctrl+E to escalate
hive tui
```
---
## What's Next
- **Agent-to-agent communication** -- one agent's output triggers another agent's entry point
- **Cost visibility** -- detailed runtime log of LLM costs per node and per session
- **Persistent webhook subscriptions** -- survive agent restarts without re-registering
- **Remote agent deployment** -- run agents as long-lived services with HTTP APIs
Symlink
+1
View File
@@ -0,0 +1 @@
AGENTS.md
+2
View File
@@ -126,6 +126,8 @@ feat(component): add new feature description
- Use meaningful variable and function names
- Keep functions focused and small
For linting and formatting (Ruff, pre-commit hooks), see [Linting & Formatting Setup](docs/contributing-lint-setup.md).
## Testing
> **Note:** When testing agents in `exports/`, always set PYTHONPATH:
+10 -1
View File
@@ -1,4 +1,4 @@
.PHONY: lint format check test install-hooks help
.PHONY: lint format check test install-hooks help frontend-install frontend-dev frontend-build
help: ## Show this help
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | \
@@ -26,3 +26,12 @@ test: ## Run all tests
install-hooks: ## Install pre-commit hooks
uv pip install pre-commit
pre-commit install
frontend-install: ## Install frontend npm packages
cd core/frontend && npm install
frontend-dev: ## Start frontend dev server
cd core/frontend && npm run dev
frontend-build: ## Build frontend for production
cd core/frontend && npm run build
+147 -174
View File
@@ -14,7 +14,7 @@
</p>
<p align="center">
<a href="https://github.com/adenhq/hive/blob/main/LICENSE"><img src="https://img.shields.io/badge/License-Apache%202.0-blue.svg" alt="Apache 2.0 License" /></a>
<a href="https://github.com/aden-hive/hive/blob/main/LICENSE"><img src="https://img.shields.io/badge/License-Apache%202.0-blue.svg" alt="Apache 2.0 License" /></a>
<a href="https://www.ycombinator.com/companies/aden"><img src="https://img.shields.io/badge/Y%20Combinator-Aden-orange" alt="Y Combinator" /></a>
<a href="https://discord.com/invite/MXE49hrKDk"><img src="https://img.shields.io/discord/1172610340073242735?logo=discord&labelColor=%235462eb&logoColor=%23f5f5f5&color=%235462eb" alt="Discord" /></a>
<a href="https://x.com/aden_hq"><img src="https://img.shields.io/twitter/follow/teamaden?logo=X&color=%23f5f5f5" alt="Twitter Follow" /></a>
@@ -37,11 +37,11 @@
## Overview
Build autonomous, reliable, self-improving AI agents without hardcoding workflows. Define your goal through conversation with a coding agent, and the framework generates a node graph with dynamically created connection code. When things break, the framework captures failure data, evolves the agent through the coding agent, and redeploys. Built-in human-in-the-loop nodes, credential management, and real-time monitoring give you control without sacrificing adaptability.
Build autonomous, reliable, self-improving AI agents without hardcoding workflows. Define your goal through conversation with hive coding agent(queen), and the framework generates a node graph with dynamically created connection code. When things break, the framework captures failure data, evolves the agent through the coding agent, and redeploys. Built-in human-in-the-loop nodes, credential management, and real-time monitoring give you control without sacrificing adaptability.
Visit [adenhq.com](https://adenhq.com) for complete documentation, examples, and guides.
https://github.com/user-attachments/assets/846c0cc7-ffd6-47fa-b4b7-495494857a55
[![Hive Demo](https://img.youtube.com/vi/XDOG9fOaLjU/maxresdefault.jpg)](https://www.youtube.com/watch?v=XDOG9fOaLjU)
## Who Is Hive For?
@@ -50,7 +50,7 @@ Hive is designed for developers and teams who want to build **production-grade A
Hive is a good fit if you:
- Want AI agents that **execute real business processes**, not demos
- Prefer **goal-driven development** over hardcoded workflows
- Need **fast or high volume agent execution** over open workflow
- Need **self-healing and adaptive agents** that improve over time
- Require **human-in-the-loop control**, observability, and cost limits
- Plan to run agents in **production environments**
@@ -71,7 +71,7 @@ Use Hive when you need:
- **[Documentation](https://docs.adenhq.com/)** - Complete guides and API reference
- **[Self-Hosting Guide](https://docs.adenhq.com/getting-started/quickstart)** - Deploy Hive on your infrastructure
- **[Changelog](https://github.com/adenhq/hive/releases)** - Latest updates and releases
- **[Changelog](https://github.com/aden-hive/hive/releases)** - Latest updates and releases
- **[Roadmap](docs/roadmap.md)** - Upcoming features and plans
- **[Report Issues](https://github.com/adenhq/hive/issues)** - Bug reports and feature requests
- **[Contributing](CONTRIBUTING.md)** - How to contribute and submit PRs
@@ -81,17 +81,23 @@ Use Hive when you need:
### Prerequisites
- Python 3.11+ for agent development
- Claude Code, Codex CLI, or Cursor for utilizing agent skills
- An LLM provider that powers the agents
> **Note for Windows Users:** It is strongly recommended to use **WSL (Windows Subsystem for Linux)** or **Git Bash** to run this framework. Some core automation scripts may not execute correctly in standard Command Prompt or PowerShell.
### Installation
> **Note**
> Hive uses a `uv` workspace layout and is not installed with `pip install`.
> Running `pip install -e .` from the repository root will create a placeholder package and Hive will not function correctly.
> Please use the quickstart script below to set up the environment.
```bash
# Clone the repository
git clone https://github.com/adenhq/hive.git
git clone https://github.com/aden-hive/hive.git
cd hive
# Run quickstart setup
./quickstart.sh
```
@@ -104,78 +110,48 @@ This sets up:
- **LLM provider** - Interactive default model configuration
- All required Python dependencies with `uv`
- At last, it will initiate the open hive interface in your browser
> **Tip:** To reopen the dashboard later, run `hive open` from the project directory.
<img width="2500" height="1214" alt="home-screen" src="https://github.com/user-attachments/assets/134d897f-5e75-4874-b00b-e0505f6b45c4" />
### Build Your First Agent
```bash
# Build an agent using Claude Code
claude> /hive
Type the agent you want to build in the home input box
# Test your agent
claude> /hive-debugger
<img width="2500" height="1214" alt="Image" src="https://github.com/user-attachments/assets/1ce19141-a78b-46f5-8d64-dbf987e048f4" />
# (at separate terminal) Launch the interactive dashboard
hive tui
### Use Template Agents
# Or run directly
hive run exports/your_agent_name --input '{"key": "value"}'
```
## Coding Agent Support
### Codex CLI
Hive includes native support for [OpenAI Codex CLI](https://github.com/openai/codex) (v0.101.0+).
Click "Try a sample agent" and check the templates. You can run a templates directly or choose to build your version on top of the existing template.
1. **Config:** `.codex/config.toml` with `agent-builder` MCP server (tracked in git)
2. **Skills:** `.agents/skills/` symlinks to Hive skills (tracked in git)
3. **Launch:** Run `codex` in the repo root, then type `use hive`
### Run Agents
Example:
```
codex> use hive
```
Now you can run an agent by selectiing the agent (either an existing agent or example agent). You can click the Run button on the top left, or talk to the queen agent and it can run the agent for you.
### Opencode
Hive includes native support for [Opencode](https://github.com/opencode-ai/opencode).
1. **Setup:** Run the quickstart script
2. **Launch:** Open Opencode in the project root.
3. **Activate:** Type `/hive` in the chat to switch to the Hive Agent.
4. **Verify:** Ask the agent *"List your tools"* to confirm the connection.
The agent has access to all Hive skills and can scaffold agents, add tools, and debug workflows directly from the chat.
**[📖 Complete Setup Guide](docs/environment-setup.md)** - Detailed instructions for agent development
### Antigravity IDE Support
Skills and MCP servers are also available in [Antigravity IDE](https://antigravity.google/) (Google's AI-powered IDE). **Easiest:** open a terminal in the hive repo folder and run (use `./` — the script is inside the repo):
```bash
./scripts/setup-antigravity-mcp.sh
```
**Important:** Always restart/refresh Antigravity IDE after running the setup script—MCP servers only load on startup. After restart, **agent-builder** and **tools** MCP servers should connect. Skills are under `.agent/skills/` (symlinks to `.claude/skills/`). See [docs/antigravity-setup.md](docs/antigravity-setup.md) for manual setup and troubleshooting.
<img width="2500" height="1214" alt="Image" src="https://github.com/user-attachments/assets/71c38206-2ad5-49aa-bde8-6698d0bc55f5" />
## Features
- **[Goal-Driven Development](docs/key_concepts/goals_outcome.md)** - Define objectives in natural language; the coding agent generates the agent graph and connection code to achieve them
- **Browser-Use** - Control the browser on your computer to achieve hard tasks
- **Parallel Execution** - Execute the generated graph in parallel. This way you can have multiple agent compelteing the jobs for you
- **[Goal-Driven Generation](docs/key_concepts/goals_outcome.md)** - Define objectives in natural language; the coding agent generates the agent graph and connection code to achieve them
- **[Adaptiveness](docs/key_concepts/evolution.md)** - Framework captures failures, calibrates according to the objectives, and evolves the agent graph
- **[Dynamic Node Connections](docs/key_concepts/graph.md)** - No predefined edges; connection code is generated by any capable LLM based on your goals
- **SDK-Wrapped Nodes** - Every node gets shared memory, local RLM memory, monitoring, tools, and LLM access out of the box
- **[Human-in-the-Loop](docs/key_concepts/graph.md#human-in-the-loop)** - Intervention nodes that pause execution for human input with configurable timeouts and escalation
- **Real-time Observability** - WebSocket streaming for live monitoring of agent execution, decisions, and node-to-node communication
- **Interactive TUI Dashboard** - Terminal-based dashboard with live graph view, event log, and chat interface for agent interaction
- **Cost & Budget Control** - Set spending limits, throttles, and automatic model degradation policies
- **Production-Ready** - Self-hostable, built for scale and reliability
## Integration
<a href="https://github.com/adenhq/hive/tree/main/tools/src/aden_tools/tools"><img width="100%" alt="Integration" src="https://github.com/user-attachments/assets/a1573f93-cf02-4bb8-b3d5-b305b05b1e51" /></a>
<a href="https://github.com/aden-hive/hive/tree/main/tools/src/aden_tools/tools"><img width="100%" alt="Integration" src="https://github.com/user-attachments/assets/a1573f93-cf02-4bb8-b3d5-b305b05b1e51" /></a>
Hive is built to be model-agnostic and system-agnostic.
- **LLM flexibility** - Hive Framework is designed to support various types of LLMs, including hosted and local models through LiteLLM-compatible providers.
- **Business system connectivity** - Hive Framework is designed to connect to all kinds of business systems as tools, such as CRM, support, messaging, data, file, and internal APIs via MCP.
## Why Aden
Hive focuses on generating agents that run real business processes rather than generic agents. Instead of requiring you to manually design workflows, define agent interactions, and handle failures reactively, Hive flips the paradigm: **you describe outcomes, and the system builds itself**—delivering an outcome-driven, adaptive experience with an easy-to-use set of tools and integrations.
@@ -231,35 +207,10 @@ flowchart LR
4. **Control Plane Monitors** → Real-time metrics, budget enforcement, policy management
5. **[Adaptiveness](docs/key_concepts/evolution.md)** → On failure, the system evolves the graph and redeploys automatically
## Run Agents
The `hive` CLI is the primary interface for running agents.
```bash
# Browse and run agents interactively (Recommended)
hive tui
# Run a specific agent directly
hive run exports/my_agent --input '{"task": "Your input here"}'
# Run a specific agent with the TUI dashboard
hive run exports/my_agent --tui
# Interactive REPL
hive shell
```
The TUI scans both `exports/` and `examples/templates/` for available agents.
> **Using Python directly (alternative):** You can also run agents with `PYTHONPATH=exports uv run python -m agent_name run --input '{...}'`
See [environment-setup.md](docs/environment-setup.md) for complete setup instructions.
## Documentation
- **[Developer Guide](docs/developer-guide.md)** - Comprehensive guide for developers
- [Getting Started](docs/getting-started.md) - Quick setup instructions
- [TUI Guide](docs/tui-selection-guide.md) - Interactive dashboard usage
- [Configuration Guide](docs/configuration.md) - All configuration options
- [Architecture Overview](docs/architecture/README.md) - System design and structure
@@ -268,98 +219,128 @@ See [environment-setup.md](docs/environment-setup.md) for complete setup instruc
Aden Hive Agent Framework aims to help developers build outcome-oriented, self-adaptive agents. See [roadmap.md](docs/roadmap.md) for details.
```mermaid
flowchart TD
subgraph Foundation
direction LR
subgraph arch["Architecture"]
a1["Node-Based Architecture"]:::done
a2["Python SDK"]:::done
a3["LLM Integration"]:::done
a4["Communication Protocol"]:::done
end
subgraph ca["Coding Agent"]
b1["Goal Creation Session"]:::done
b2["Worker Agent Creation"]
b3["MCP Tools"]:::done
end
subgraph wa["Worker Agent"]
c1["Human-in-the-Loop"]:::done
c2["Callback Handlers"]:::done
c3["Intervention Points"]:::done
c4["Streaming Interface"]
end
subgraph cred["Credentials"]
d1["Setup Process"]:::done
d2["Pluggable Sources"]:::done
d3["Enterprise Secrets"]
d4["Integration Tools"]:::done
end
subgraph tools["Tools"]
e1["File Use"]:::done
e2["Memory STM/LTM"]:::done
e3["Web Search/Scraper"]:::done
e4["CSV/PDF"]:::done
e5["Excel/Email"]
end
subgraph core["Core"]
f1["Eval System"]
f2["Pydantic Validation"]:::done
f3["Documentation"]:::done
f4["Adaptiveness"]
f5["Sample Agents"]
end
end
flowchart TB
%% Main Entity
User([User])
subgraph Expansion
direction LR
subgraph intel["Intelligence"]
g1["Guardrails"]
g2["Streaming Mode"]
g3["Image Generation"]
g4["Semantic Search"]
%% =========================================
%% EXTERNAL EVENT SOURCES
%% =========================================
subgraph ExtEventSource [External Event Source]
E_Sch["Schedulers"]
E_WH["Webhook"]
E_SSE["SSE"]
end
subgraph mem["Memory Iteration"]
h1["Message Model & Sessions"]
h2["Storage Migration"]
h3["Context Building"]
h4["Proactive Compaction"]
h5["Token Tracking"]
end
subgraph evt["Event System"]
i1["Event Bus for Nodes"]
end
subgraph cas["Coding Agent Support"]
j1["Claude Code"]
j2["Cursor"]
j3["Opencode"]
j4["Antigravity"]
j5["Codex CLI"]
end
subgraph plat["Platform"]
k1["JavaScript/TypeScript SDK"]
k2["Custom Tool Integrator"]
k3["Windows Support"]
end
subgraph dep["Deployment"]
l1["Self-Hosted"]
l2["Cloud Services"]
l3["CI/CD Pipeline"]
end
subgraph tmpl["Templates"]
m1["Sales Agent"]
m2["Marketing Agent"]
m3["Analytics Agent"]
m4["Training Agent"]
m5["Smart Form Agent"]
end
end
classDef done fill:#9e9e9e,color:#fff,stroke:#757575
%% =========================================
%% SYSTEM NODES
%% =========================================
subgraph WorkerBees [Worker Bees]
WB_C["Conversation"]
WB_SP["System prompt"]
subgraph Graph [Graph]
direction TB
N1["Node"] --> N2["Node"] --> N3["Node"]
N1 -.-> AN["Active Node"]
N2 -.-> AN
N3 -.-> AN
%% Nested Event Loop Node
subgraph EventLoopNode [Event Loop Node]
ELN_L["listener"]
ELN_SP["System Prompt<br/>(Task)"]
ELN_EL["Event loop"]
ELN_C["Conversation"]
end
end
end
subgraph JudgeNode [Judge]
J_C["Criteria"]
J_P["Principles"]
J_EL["Event loop"] <--> J_S["Scheduler"]
end
subgraph QueenBee [Queen Bee]
QB_SP["System prompt"]
QB_EL["Event loop"]
QB_C["Conversation"]
end
subgraph Infra [Infra]
SA["Sub Agent"]
TR["Tool Registry"]
WTM["Write through Conversation Memory<br/>(Logs/RAM/Harddrive)"]
SM["Shared Memory<br/>(State/Harddrive)"]
EB["Event Bus<br/>(RAM)"]
CS["Credential Store<br/>(Harddrive/Cloud)"]
end
subgraph PC [PC]
B["Browser"]
CB["Codebase<br/>v 0.0.x ... v n.n.n"]
end
%% =========================================
%% CONNECTIONS & DATA FLOW
%% =========================================
%% External Event Routing
E_Sch --> ELN_L
E_WH --> ELN_L
E_SSE --> ELN_L
ELN_L -->|"triggers"| ELN_EL
%% User Interactions
User -->|"Talk"| WB_C
User -->|"Talk"| QB_C
User -->|"Read/Write Access"| CS
%% Inter-System Logic
ELN_C <-->|"Mirror"| WB_C
WB_C -->|"Focus"| AN
WorkerBees -->|"Inquire"| JudgeNode
JudgeNode -->|"Approve"| WorkerBees
%% Judge Alignments
J_C <-.->|"aligns"| WB_SP
J_P <-.->|"aligns"| QB_SP
%% Escalate path
J_EL -->|"Report (Escalate)"| QB_EL
%% Pub/Sub Logic
AN -->|"publish"| EB
EB -->|"subscribe"| QB_C
%% Infra and Process Spawning
ELN_EL -->|"Spawn"| SA
SA -->|"Inform"| ELN_EL
SA -->|"Starts"| B
B -->|"Report"| ELN_EL
TR -->|"Assigned"| ELN_EL
CB -->|"Modify Worker Bee"| WB_C
%% =========================================
%% SHARED MEMORY & LOGS ACCESS
%% =========================================
%% Worker Bees Access (link to node inside Graph subgraph)
AN <-->|"Read/Write"| WTM
AN <-->|"Read/Write"| SM
%% Queen Bee Access
QB_C <-->|"Read/Write"| WTM
QB_EL <-->|"Read/Write"| SM
%% Credentials Access
CS -->|"Read Access"| QB_C
```
## Contributing
We welcome contributions from the community! Were especially looking for help building tools, integrations, and example agents for the framework ([check #2805](https://github.com/adenhq/hive/issues/2805)). If youre interested in extending its functionality, this is the perfect place to start. Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
We welcome contributions from the community! Were especially looking for help building tools, integrations, and example agents for the framework ([check #2805](https://github.com/aden-hive/hive/issues/2805)). If youre interested in extending its functionality, this is the perfect place to start. Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
**Important:** Please get assigned to an issue before submitting a PR. Comment on an issue to claim it, and a maintainer will assign you. Issues with reproducible steps and proposals are prioritized. This helps prevent duplicate work.
@@ -396,7 +377,7 @@ This project is licensed under the Apache License 2.0 - see the [LICENSE](LICENS
**Q: What LLM providers does Hive support?**
Hive supports 100+ LLM providers through LiteLLM integration, including OpenAI (GPT-4, GPT-4o), Anthropic (Claude models), Google Gemini, DeepSeek, Mistral, Groq, and many more. Simply set the appropriate API key environment variable and specify the model name.
Hive supports 100+ LLM providers through LiteLLM integration, including OpenAI (GPT-4, GPT-4o), Anthropic (Claude models), Google Gemini, DeepSeek, Mistral, Groq, and many more. Simply set the appropriate API key environment variable and specify the model name. We recommend using Claude, GLM and Gemini as they have the best performance.
**Q: Can I use Hive with local AI models like Ollama?**
@@ -438,14 +419,6 @@ Visit [docs.adenhq.com](https://docs.adenhq.com/) for complete guides, API refer
Contributions are welcome! Fork the repository, create your feature branch, implement your changes, and submit a pull request. See [CONTRIBUTING.md](CONTRIBUTING.md) for detailed guidelines.
**Q: When will my team start seeing results from Aden's adaptive agents?**
Aden's adaptation loop begins working from the first execution. When an agent fails, the framework captures the failure data, helping developers evolve the agent graph through the coding agent. How quickly this translates to measurable results depends on the complexity of your use case, the quality of your goal definitions, and the volume of executions generating feedback.
**Q: How does Hive compare to other agent frameworks?**
Hive focuses on generating agents that run real business processes, rather than generic agents. This vision emphasizes outcome-driven design, adaptability, and an easy-to-use set of tools and integrations.
---
<p align="center">
+31
View File
@@ -0,0 +1,31 @@
perf: reduce subprocess spawning in quickstart scripts (#4427)
## Problem
Windows process creation (CreateProcess) is 10-100x slower than Linux fork/exec.
The quickstart scripts were spawning 4+ separate `uv run python -c "import X"`
processes to verify imports, adding ~600ms overhead on Windows.
## Solution
Consolidated all import checks into a single batch script that checks multiple
modules in one subprocess call, reducing spawn overhead by ~75%.
## Changes
- **New**: `scripts/check_requirements.py` - Batched import checker
- **New**: `scripts/test_check_requirements.py` - Test suite
- **New**: `scripts/benchmark_quickstart.ps1` - Performance benchmark tool
- **Modified**: `quickstart.ps1` - Updated import verification (2 sections)
- **Modified**: `quickstart.sh` - Updated import verification
## Performance Impact
**Benchmark results on Windows:**
- Before: ~19.8 seconds for import checks
- After: ~4.9 seconds for import checks
- **Improvement: 14.9 seconds saved (75.2% faster)**
## Testing
- ✅ All functional tests pass (`scripts/test_check_requirements.py`)
- ✅ Quickstart scripts work correctly on Windows
- ✅ Error handling verified (invalid imports reported correctly)
- ✅ Performance benchmark confirms 75%+ improvement
Fixes #4427
+3 -3
View File
@@ -82,7 +82,7 @@ Register an MCP server as a tool source for your agent.
"example_tool"
],
"total_mcp_servers": 1,
"note": "MCP server 'tools' registered with 6 tools. These tools can now be used in llm_tool_use nodes."
"note": "MCP server 'tools' registered with 6 tools. These tools can now be used in event_loop nodes."
}
```
@@ -149,7 +149,7 @@ List tools available from registered MCP servers.
]
},
"total_tools": 6,
"note": "Use these tool names in the 'tools' parameter when adding llm_tool_use nodes"
"note": "Use these tool names in the 'tools' parameter when adding event_loop nodes"
}
```
@@ -246,7 +246,7 @@ Here's a complete workflow for building an agent with MCP tools:
"node_id": "web-searcher",
"name": "Web Search",
"description": "Search the web for information",
"node_type": "llm_tool_use",
"node_type": "event_loop",
"input_keys": "[\"query\"]",
"output_keys": "[\"search_results\"]",
"system_prompt": "Search for {query} using the web_search tool",
+2 -2
View File
@@ -119,7 +119,7 @@ builder = WorkflowBuilder()
builder.add_node(
node_id="researcher",
name="Web Researcher",
node_type="llm_tool_use",
node_type="event_loop",
system_prompt="Research the topic using web_search",
tools=["web_search"], # Tool from tools MCP server
input_keys=["topic"],
@@ -137,7 +137,7 @@ Tools from MCP servers can be referenced in your agent.json just like built-in t
{
"id": "searcher",
"name": "Web Searcher",
"node_type": "llm_tool_use",
"node_type": "event_loop",
"system_prompt": "Search for information about {topic}",
"tools": ["web_search", "web_scrape"],
"input_keys": ["topic"],
+17 -70
View File
@@ -103,31 +103,20 @@ Add a processing node to the agent graph.
- `node_id` (string, required): Unique node identifier
- `name` (string, required): Human-readable name
- `description` (string, required): What this node does
- `node_type` (string, required): One of: `llm_generate`, `llm_tool_use`, `router`, `function`
- `node_type` (string, required): Must be `event_loop` (the only valid type)
- `input_keys` (string, required): JSON array of input variable names
- `output_keys` (string, required): JSON array of output variable names
- `system_prompt` (string, optional): System prompt for LLM nodes
- `tools` (string, optional): JSON array of tool names for tool_use nodes
- `routes` (string, optional): JSON object of route mappings for router nodes
- `system_prompt` (string, optional): System prompt for the LLM
- `tools` (string, optional): JSON array of tool names
- `client_facing` (boolean, optional): Set to true for human-in-the-loop interaction
**Node Types:**
**Node Type:**
1. **llm_generate**: Uses LLM to generate output from inputs
- Requires: `system_prompt`
- Tools: Not used
2. **llm_tool_use**: Uses LLM with tools to accomplish tasks
- Requires: `system_prompt`, `tools`
- Tools: Array of tool names (e.g., `["web_search", "web_fetch"]`)
3. **router**: LLM-powered routing to different paths
- Requires: `system_prompt`, `routes`
- Routes: Object mapping route names to target node IDs
- Example: `{"pass": "success_node", "fail": "retry_node"}`
4. **function**: Executes a pre-defined function
- System prompt describes the function behavior
- No LLM calls, pure computation
**event_loop**: LLM-powered node with self-correction loop
- Requires: `system_prompt`
- Optional: `tools` (array of tool names, e.g., `["web_search", "web_fetch"]`)
- Optional: `client_facing` (set to true for HITL / user interaction)
- Supports: iterative refinement, judge-based evaluation, tool use, streaming
**Example:**
```json
@@ -135,7 +124,7 @@ Add a processing node to the agent graph.
"node_id": "search_sources",
"name": "Search Sources",
"description": "Searches for relevant sources on the topic",
"node_type": "llm_tool_use",
"node_type": "event_loop",
"input_keys": "[\"topic\", \"search_queries\"]",
"output_keys": "[\"sources\", \"source_count\"]",
"system_prompt": "Search for sources using the provided queries...",
@@ -198,7 +187,7 @@ Export the validated graph as an agent specification.
**What it does:**
1. Validates the graph
2. Auto-generates missing edges from router routes
2. Validates edge connectivity
3. Writes files to disk:
- `exports/{agent-name}/agent.json` - Full agent specification
- `exports/{agent-name}/README.md` - Auto-generated documentation
@@ -252,47 +241,6 @@ Test the complete agent graph with sample inputs.
---
### Evaluation Rules
#### `add_evaluation_rule`
Add a rule for the HybridJudge to evaluate node outputs.
**Parameters:**
- `rule_id` (string, required): Unique rule identifier
- `description` (string, required): What this rule checks
- `condition` (string, required): Python expression to evaluate
- `action` (string, required): Action to take: `accept`, `retry`, `escalate`
- `priority` (integer, optional): Rule priority (default: 0)
- `feedback_template` (string, optional): Feedback message template
**Condition Examples:**
- `'result.get("success") == True'` - Check for success flag
- `'result.get("error_type") == "timeout"'` - Check error type
- `'len(result.get("data", [])) > 0'` - Check for non-empty data
**Example:**
```json
{
"rule_id": "timeout_retry",
"description": "Retry on timeout errors",
"condition": "result.get('error_type') == 'timeout'",
"action": "retry",
"priority": 10,
"feedback_template": "Timeout occurred, retrying..."
}
```
#### `list_evaluation_rules`
List all configured evaluation rules.
#### `remove_evaluation_rule`
Remove an evaluation rule.
**Parameters:**
- `rule_id` (string, required): Rule to remove
---
## Example Workflow
Here's a complete workflow for building a research agent:
@@ -320,7 +268,7 @@ add_node(
node_id="planner",
name="Research Planner",
description="Creates research strategy",
node_type="llm_generate",
node_type="event_loop",
input_keys='["topic"]',
output_keys='["strategy", "queries"]',
system_prompt="Analyze topic and create research plan..."
@@ -330,7 +278,7 @@ add_node(
node_id="searcher",
name="Search Sources",
description="Find relevant sources",
node_type="llm_tool_use",
node_type="event_loop",
input_keys='["queries"]',
output_keys='["sources"]',
system_prompt="Search for sources...",
@@ -359,10 +307,9 @@ The exported agent will be saved to `exports/research-agent/`.
1. **Start with the goal**: Define clear success criteria before building nodes
2. **Test nodes individually**: Use `test_node` to verify each node works
3. **Use router nodes for branching**: Don't create edges manually for routers - define routes and they'll be auto-generated
4. **Add evaluation rules**: Help the judge evaluate outputs deterministically
5. **Validate early, validate often**: Run `validate_graph` after adding nodes/edges
6. **Check exports**: Review the generated README.md to verify your agent structure
3. **Use conditional edges for branching**: Define condition_expr on edges for decision points
4. **Validate early, validate often**: Run `validate_graph` after adding nodes/edges
5. **Check exports**: Review the generated README.md to verify your agent structure
---
+10 -10
View File
@@ -64,7 +64,7 @@ To use the agent builder with Claude Desktop or other MCP clients, add this to y
"agent-builder": {
"command": "python",
"args": ["-m", "framework.mcp.agent_builder_server"],
"cwd": "/path/to/goal-agent"
"cwd": "/path/to/hive/core"
}
}
}
@@ -73,7 +73,7 @@ To use the agent builder with Claude Desktop or other MCP clients, add this to y
The MCP server provides tools for:
- Creating agent building sessions
- Defining goals with success criteria
- Adding nodes (llm_generate, llm_tool_use, router, function)
- Adding nodes (event_loop only)
- Connecting nodes with edges
- Validating and exporting agent graphs
- Testing nodes and full agent graphs
@@ -85,14 +85,14 @@ The MCP server provides tools for:
Run an LLM-powered calculator:
```bash
# Single calculation
uv run python -m framework calculate "2 + 3 * 4"
# Run an exported agent
uv run python -m framework run exports/calculator --input '{"expression": "2 + 3 * 4"}'
# Interactive mode
uv run python -m framework interactive
# Interactive shell session
uv run python -m framework shell exports/calculator
# Analyze runs with Builder
uv run python -m framework analyze calculator
# Show agent info
uv run python -m framework info exports/calculator
```
### Using the Runtime
@@ -141,8 +141,8 @@ uv run python -m framework test-run <agent_path> --goal <goal_id> --parallel 4
# Debug failed tests
uv run python -m framework test-debug <agent_path> <test_name>
# List tests for a goal
uv run python -m framework test-list <goal_id>
# List tests for an agent
uv run python -m framework test-list <agent_path>
```
For detailed testing workflows, see the [hive-test skill](../.claude/skills/hive-test/SKILL.md).
+387
View File
@@ -0,0 +1,387 @@
"""OpenAI Codex OAuth PKCE login flow.
Runs the full browser-based OAuth flow so users can authenticate with their
ChatGPT Plus/Pro subscription without needing the Codex CLI installed.
Usage (from quickstart.sh):
uv run python codex_oauth.py
Exit codes:
0 - success (credentials saved to ~/.codex/auth.json)
1 - failure (user cancelled, timeout, or token exchange error)
"""
import base64
import hashlib
import http.server
import json
import os
import platform
import secrets
import subprocess
import sys
import threading
import time
import urllib.error
import urllib.parse
import urllib.request
from datetime import UTC, datetime
from pathlib import Path
# OAuth constants (from the Codex CLI binary)
CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann"
AUTHORIZE_URL = "https://auth.openai.com/oauth/authorize"
TOKEN_URL = "https://auth.openai.com/oauth/token"
REDIRECT_URI = "http://localhost:1455/auth/callback"
SCOPE = "openid profile email offline_access"
CALLBACK_PORT = 1455
# Where to save credentials (same location the Codex CLI uses)
CODEX_AUTH_FILE = Path.home() / ".codex" / "auth.json"
# JWT claim path for account_id
JWT_CLAIM_PATH = "https://api.openai.com/auth"
def _base64url(data: bytes) -> str:
return base64.urlsafe_b64encode(data).rstrip(b"=").decode("ascii")
def generate_pkce() -> tuple[str, str]:
"""Generate PKCE code_verifier and code_challenge (S256)."""
verifier_bytes = secrets.token_bytes(32)
verifier = _base64url(verifier_bytes)
challenge = _base64url(hashlib.sha256(verifier.encode("ascii")).digest())
return verifier, challenge
def build_authorize_url(state: str, challenge: str) -> str:
"""Build the OpenAI OAuth authorize URL with PKCE."""
params = urllib.parse.urlencode(
{
"response_type": "code",
"client_id": CLIENT_ID,
"redirect_uri": REDIRECT_URI,
"scope": SCOPE,
"code_challenge": challenge,
"code_challenge_method": "S256",
"state": state,
"id_token_add_organizations": "true",
"codex_cli_simplified_flow": "true",
"originator": "hive",
}
)
return f"{AUTHORIZE_URL}?{params}"
def exchange_code_for_tokens(code: str, verifier: str) -> dict | None:
"""Exchange the authorization code for tokens."""
data = urllib.parse.urlencode(
{
"grant_type": "authorization_code",
"client_id": CLIENT_ID,
"code": code,
"code_verifier": verifier,
"redirect_uri": REDIRECT_URI,
}
).encode("utf-8")
req = urllib.request.Request(
TOKEN_URL,
data=data,
headers={"Content-Type": "application/x-www-form-urlencoded"},
method="POST",
)
try:
with urllib.request.urlopen(req, timeout=15) as resp:
token_data = json.loads(resp.read())
except (urllib.error.URLError, json.JSONDecodeError, TimeoutError, OSError) as exc:
print(f"\033[0;31mToken exchange failed: {exc}\033[0m", file=sys.stderr)
return None
if not token_data.get("access_token") or not token_data.get("refresh_token"):
print("\033[0;31mToken response missing required fields\033[0m", file=sys.stderr)
return None
return token_data
def decode_jwt_payload(token: str) -> dict | None:
"""Decode the payload of a JWT (no signature verification)."""
try:
parts = token.split(".")
if len(parts) != 3:
return None
payload = parts[1]
# Add padding
padding = 4 - len(payload) % 4
if padding != 4:
payload += "=" * padding
decoded = base64.urlsafe_b64decode(payload)
return json.loads(decoded)
except Exception:
return None
def get_account_id(access_token: str) -> str | None:
"""Extract the ChatGPT account_id from the access token JWT."""
payload = decode_jwt_payload(access_token)
if not payload:
return None
auth = payload.get(JWT_CLAIM_PATH)
if isinstance(auth, dict):
account_id = auth.get("chatgpt_account_id")
if isinstance(account_id, str) and account_id:
return account_id
return None
def save_credentials(token_data: dict, account_id: str) -> None:
"""Save credentials to ~/.codex/auth.json in the same format the Codex CLI uses."""
auth_data = {
"tokens": {
"access_token": token_data["access_token"],
"refresh_token": token_data["refresh_token"],
"account_id": account_id,
},
"auth_mode": "chatgpt",
"last_refresh": datetime.now(UTC).isoformat(),
}
if "id_token" in token_data:
auth_data["tokens"]["id_token"] = token_data["id_token"]
CODEX_AUTH_FILE.parent.mkdir(parents=True, exist_ok=True, mode=0o700)
fd = os.open(CODEX_AUTH_FILE, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
with os.fdopen(fd, "w") as f:
json.dump(auth_data, f, indent=2)
def open_browser(url: str) -> bool:
"""Open the URL in the user's default browser."""
system = platform.system()
try:
devnull = subprocess.DEVNULL
if system == "Darwin":
subprocess.Popen(["open", url], stdout=devnull, stderr=devnull)
elif system == "Windows":
subprocess.Popen(["cmd", "/c", "start", url], stdout=devnull, stderr=devnull)
else:
subprocess.Popen(["xdg-open", url], stdout=devnull, stderr=devnull)
return True
except OSError:
return False
class OAuthCallbackHandler(http.server.BaseHTTPRequestHandler):
"""HTTP handler that captures the OAuth callback."""
auth_code: str | None = None
received_state: str | None = None
def do_GET(self) -> None:
parsed = urllib.parse.urlparse(self.path)
if parsed.path != "/auth/callback":
self.send_response(404)
self.end_headers()
self.wfile.write(b"Not found")
return
params = urllib.parse.parse_qs(parsed.query)
code = params.get("code", [None])[0]
state = params.get("state", [None])[0]
if not code:
self.send_response(400)
self.end_headers()
self.wfile.write(b"Missing authorization code")
return
OAuthCallbackHandler.auth_code = code
OAuthCallbackHandler.received_state = state
self.send_response(200)
self.send_header("Content-Type", "text/html; charset=utf-8")
self.end_headers()
self.wfile.write(
b"<!doctype html><html><head><meta charset='utf-8'/></head>"
b"<body><h2>Authentication successful</h2>"
b"<p>Return to your terminal to continue.</p></body></html>"
)
def log_message(self, format: str, *args: object) -> None:
# Suppress request logging
pass
def wait_for_callback(state: str, timeout_secs: int = 120) -> str | None:
"""Start a local HTTP server and wait for the OAuth callback.
Returns the authorization code on success, None on timeout.
"""
OAuthCallbackHandler.auth_code = None
OAuthCallbackHandler.received_state = None
server = http.server.HTTPServer(("127.0.0.1", CALLBACK_PORT), OAuthCallbackHandler)
server.timeout = 1
deadline = time.time() + timeout_secs
server_thread = threading.Thread(target=_serve_until_done, args=(server, deadline, state))
server_thread.daemon = True
server_thread.start()
server_thread.join(timeout=timeout_secs + 2)
server.server_close()
if OAuthCallbackHandler.auth_code and OAuthCallbackHandler.received_state == state:
return OAuthCallbackHandler.auth_code
return None
def _serve_until_done(server: http.server.HTTPServer, deadline: float, state: str) -> None:
while time.time() < deadline:
server.handle_request()
if OAuthCallbackHandler.auth_code and OAuthCallbackHandler.received_state == state:
return
def parse_manual_input(value: str, expected_state: str) -> str | None:
"""Parse user-pasted redirect URL or auth code."""
value = value.strip()
if not value:
return None
try:
parsed = urllib.parse.urlparse(value)
params = urllib.parse.parse_qs(parsed.query)
code = params.get("code", [None])[0]
state = params.get("state", [None])[0]
if state and state != expected_state:
return None
return code
except Exception:
pass
# Maybe it's just the raw code
if len(value) > 10 and " " not in value:
return value
return None
def main() -> int:
# Generate PKCE and state
verifier, challenge = generate_pkce()
state = secrets.token_hex(16)
# Build URL
auth_url = build_authorize_url(state, challenge)
print()
print("\033[1mOpenAI Codex OAuth Login\033[0m")
print()
# Try to start the local callback server first
try:
server_available = True
# Quick test that port is free
import socket
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(1)
result = sock.connect_ex(("127.0.0.1", CALLBACK_PORT))
sock.close()
if result == 0:
print(f"\033[1;33mPort {CALLBACK_PORT} is in use. Using manual paste mode.\033[0m")
server_available = False
except Exception:
server_available = True
# Open browser
browser_opened = open_browser(auth_url)
if browser_opened:
print(" Browser opened for OpenAI sign-in...")
else:
print(" Could not open browser automatically.")
print()
print(" If the browser didn't open, visit this URL:")
print(f" \033[0;36m{auth_url}\033[0m")
print()
code = None
if server_available:
print(" Waiting for authentication (up to 2 minutes)...")
print(" \033[2mOr paste the redirect URL below if the callback didn't work:\033[0m")
print()
# Start callback server in background
callback_result: list[str | None] = [None]
def run_server() -> None:
callback_result[0] = wait_for_callback(state, timeout_secs=120)
server_thread = threading.Thread(target=run_server)
server_thread.daemon = True
server_thread.start()
# Also accept manual input in parallel
# We poll for both the server result and stdin
try:
import select
while server_thread.is_alive():
# Check if stdin has data (non-blocking on unix)
if hasattr(select, "select"):
ready, _, _ = select.select([sys.stdin], [], [], 0.5)
if ready:
manual = sys.stdin.readline()
if manual.strip():
code = parse_manual_input(manual, state)
if code:
break
else:
time.sleep(0.5)
if callback_result[0]:
code = callback_result[0]
break
except (KeyboardInterrupt, EOFError):
print("\n\033[0;31mCancelled.\033[0m")
return 1
if not code:
code = callback_result[0]
else:
# Manual paste mode
try:
manual = input(" Paste the redirect URL: ").strip()
code = parse_manual_input(manual, state)
except (KeyboardInterrupt, EOFError):
print("\n\033[0;31mCancelled.\033[0m")
return 1
if not code:
print("\n\033[0;31mAuthentication timed out or failed.\033[0m")
return 1
# Exchange code for tokens
print()
print(" Exchanging authorization code for tokens...")
token_data = exchange_code_for_tokens(code, verifier)
if not token_data:
return 1
# Extract account_id from JWT
account_id = get_account_id(token_data["access_token"])
if not account_id:
print("\033[0;31mFailed to extract account ID from token.\033[0m", file=sys.stderr)
return 1
# Save credentials
save_credentials(token_data, account_id)
print(" \033[0;32mAuthentication successful!\033[0m")
print(f" Credentials saved to {CODEX_AUTH_FILE}")
return 0
if __name__ == "__main__":
sys.exit(main())
+15 -6
View File
@@ -68,7 +68,7 @@ from framework.graph.event_loop_node import ( # noqa: E402
)
from framework.graph.executor import GraphExecutor # noqa: E402
from framework.graph.goal import Goal # noqa: E402
from framework.graph.node import NodeSpec # noqa: E402
from framework.graph.node import NodeContext, NodeProtocol, NodeResult, NodeSpec # noqa: E402
from framework.llm.litellm import LiteLLMProvider # noqa: E402
from framework.runner.tool_registry import ToolRegistry # noqa: E402
from framework.runtime.core import Runtime # noqa: E402
@@ -654,7 +654,7 @@ NODE_SPECS = {
id="sender",
name="Sender",
description="Send approved campaign emails",
node_type="function",
node_type="event_loop",
input_keys=["approved_emails"],
output_keys=["send_results"],
),
@@ -823,11 +823,20 @@ def _send_email_via_resend(
return {"error": f"Network error: {e}"}
class SenderNode(NodeProtocol):
"""Node wrapper for send_emails function."""
async def execute(self, ctx: NodeContext) -> NodeResult:
approved = ctx.input_data.get("approved_emails", "")
result_str = send_emails(approved_emails=approved)
ctx.memory.write("send_results", result_str)
return NodeResult(success=True, output={"send_results": result_str})
def send_emails(approved_emails: str = "") -> str:
"""Send approved campaign emails via Resend, or log if unconfigured.
Called by FunctionNode which unpacks input_keys as kwargs.
Returns a JSON string (FunctionNode wraps it in NodeResult).
Returns a JSON string.
"""
approved = approved_emails
if not approved:
@@ -1759,7 +1768,7 @@ async def _run_pipeline(websocket, initial_message: str):
judge=judge,
config=LoopConfig(
max_iterations=30,
max_tool_calls_per_turn=15,
max_tool_calls_per_turn=30,
max_history_tokens=64000,
max_tool_result_chars=8_000,
spillover_dir=str(_DATA_DIR),
@@ -1780,7 +1789,7 @@ async def _run_pipeline(websocket, initial_message: str):
)
for nid, impl in nodes.items():
executor.register_node(nid, impl)
executor.register_function("sender", send_emails)
executor.register_node("sender", SenderNode())
# --- Event forwarding: bus → WebSocket ---
+2 -2
View File
@@ -751,7 +751,7 @@ async def _run_pipeline(websocket, topic: str):
judge=None, # implicit judge: accept when output_keys filled
config=LoopConfig(
max_iterations=20,
max_tool_calls_per_turn=10,
max_tool_calls_per_turn=30,
max_history_tokens=32_000,
),
conversation_store=store_a,
@@ -849,7 +849,7 @@ async def _run_pipeline(websocket, topic: str):
judge=None, # implicit judge
config=LoopConfig(
max_iterations=10,
max_tool_calls_per_turn=5,
max_tool_calls_per_turn=30,
max_history_tokens=32_000,
),
conversation_store=store_b,
+1 -1
View File
@@ -1257,7 +1257,7 @@ async def _run_org_pipeline(websocket, topic: str):
judge=judge,
config=LoopConfig(
max_iterations=30,
max_tool_calls_per_turn=25,
max_tool_calls_per_turn=30,
max_history_tokens=32_000,
),
conversation_store=store,
+28 -19
View File
@@ -4,8 +4,8 @@ Minimal Manual Agent Example
This example demonstrates how to build and run an agent programmatically
without using the Claude Code CLI or external LLM APIs.
It uses 'function' nodes to define logic in pure Python, making it perfect
for understanding the core runtime loop:
It uses custom NodeProtocol implementations to define logic in pure Python,
making it perfect for understanding the core runtime loop:
Setup -> Graph definition -> Execution -> Result
Run with:
@@ -16,22 +16,33 @@ import asyncio
from framework.graph import EdgeCondition, EdgeSpec, Goal, GraphSpec, NodeSpec
from framework.graph.executor import GraphExecutor
from framework.graph.node import NodeContext, NodeProtocol, NodeResult
from framework.runtime.core import Runtime
# 1. Define Node Logic (Pure Python Functions)
def greet(name: str) -> str:
# 1. Define Node Logic (Custom NodeProtocol implementations)
class GreeterNode(NodeProtocol):
"""Generate a simple greeting."""
return f"Hello, {name}!"
async def execute(self, ctx: NodeContext) -> NodeResult:
name = ctx.input_data.get("name", "World")
greeting = f"Hello, {name}!"
ctx.memory.write("greeting", greeting)
return NodeResult(success=True, output={"greeting": greeting})
def uppercase(greeting: str) -> str:
class UppercaserNode(NodeProtocol):
"""Convert text to uppercase."""
return greeting.upper()
async def execute(self, ctx: NodeContext) -> NodeResult:
greeting = ctx.input_data.get("greeting") or ctx.memory.read("greeting") or ""
result = greeting.upper()
ctx.memory.write("final_greeting", result)
return NodeResult(success=True, output={"final_greeting": result})
async def main():
print("🚀 Setting up Manual Agent...")
print("Setting up Manual Agent...")
# 2. Define the Goal
# Every agent needs a goal with success criteria
@@ -55,8 +66,7 @@ async def main():
id="greeter",
name="Greeter",
description="Generates a simple greeting",
node_type="function",
function="greet", # Matches the registered function name
node_type="event_loop",
input_keys=["name"],
output_keys=["greeting"],
)
@@ -65,8 +75,7 @@ async def main():
id="uppercaser",
name="Uppercaser",
description="Converts greeting to uppercase",
node_type="function",
function="uppercase",
node_type="event_loop",
input_keys=["greeting"],
output_keys=["final_greeting"],
)
@@ -98,23 +107,23 @@ async def main():
runtime = Runtime(storage_path=Path("./agent_logs"))
executor = GraphExecutor(runtime=runtime)
# 7. Register Function Implementations
# Connect string names in NodeSpecs to actual Python functions
executor.register_function("greeter", greet)
executor.register_function("uppercaser", uppercase)
# 7. Register Node Implementations
# Connect node IDs in the graph to actual Python implementations
executor.register_node("greeter", GreeterNode())
executor.register_node("uppercaser", UppercaserNode())
# 8. Execute Agent
print("Executing agent with input: name='Alice'...")
print("Executing agent with input: name='Alice'...")
result = await executor.execute(graph=graph, goal=goal, input_data={"name": "Alice"})
# 9. Verify Results
if result.success:
print("\nSuccess!")
print("\nSuccess!")
print(f"Path taken: {' -> '.join(result.path)}")
print(f"Final output: {result.output.get('final_greeting')}")
else:
print(f"\nFailed: {result.error}")
print(f"\nFailed: {result.error}")
if __name__ == "__main__":
+2 -2
View File
@@ -122,7 +122,7 @@ async def example_4_custom_agent_with_mcp_tools():
node_id="web-searcher",
name="Web Search",
description="Search the web for information",
node_type="llm_tool_use",
node_type="event_loop",
system_prompt="Search for {query} and return the top results. Use the web_search tool.",
tools=["web_search"], # This tool comes from tools MCP server
input_keys=["query"],
@@ -133,7 +133,7 @@ async def example_4_custom_agent_with_mcp_tools():
node_id="summarizer",
name="Summarize Results",
description="Summarize the search results",
node_type="llm_generate",
node_type="event_loop",
system_prompt="Summarize the following search results in 2-3 sentences: {search_results}",
input_keys=["search_results"],
output_keys=["summary"],
+13
View File
@@ -0,0 +1,13 @@
"""Framework-provided agents."""
from pathlib import Path
FRAMEWORK_AGENTS_DIR = Path(__file__).parent
def list_framework_agents() -> list[Path]:
"""List all framework agent directories."""
return sorted(
[p for p in FRAMEWORK_AGENTS_DIR.iterdir() if p.is_dir() and (p / "agent.py").exists()],
key=lambda p: p.name,
)
@@ -0,0 +1,55 @@
"""
Credential Tester verify credentials (Aden OAuth + local API keys) via live API calls.
Interactive agent that lists all testable accounts, lets the user pick one,
loads the provider's tools, and runs a chat session to test the credential.
"""
from .agent import (
CredentialTesterAgent,
_list_aden_accounts,
_list_env_fallback_accounts,
_list_local_accounts,
configure_for_account,
conversation_mode,
edges,
entry_node,
entry_points,
get_tools_for_provider,
goal,
identity_prompt,
list_connected_accounts,
loop_config,
nodes,
pause_nodes,
requires_account_selection,
skip_credential_validation,
terminal_nodes,
)
from .config import default_config
__version__ = "1.0.0"
__all__ = [
"CredentialTesterAgent",
"configure_for_account",
"conversation_mode",
"default_config",
"edges",
"entry_node",
"entry_points",
"get_tools_for_provider",
"goal",
"identity_prompt",
"list_connected_accounts",
"loop_config",
"nodes",
"pause_nodes",
"requires_account_selection",
"skip_credential_validation",
"terminal_nodes",
# Internal list helpers (exposed for testing)
"_list_aden_accounts",
"_list_local_accounts",
"_list_env_fallback_accounts",
]
@@ -0,0 +1,148 @@
"""CLI entry point for Credential Tester agent."""
import asyncio
import logging
import sys
import click
from .agent import CredentialTesterAgent
def setup_logging(verbose=False, debug=False):
if debug:
level, fmt = logging.DEBUG, "%(asctime)s %(name)s: %(message)s"
elif verbose:
level, fmt = logging.INFO, "%(message)s"
else:
level, fmt = logging.WARNING, "%(levelname)s: %(message)s"
logging.basicConfig(level=level, format=fmt, stream=sys.stderr)
def pick_account(agent: CredentialTesterAgent) -> dict | None:
"""Interactive account picker. Returns selected account dict or None."""
accounts = agent.list_accounts()
if not accounts:
click.echo("No connected accounts found.")
click.echo("Set ADEN_API_KEY and connect accounts at https://app.adenhq.com")
return None
click.echo("\nConnected accounts:\n")
for i, acct in enumerate(accounts, 1):
provider = acct.get("provider", "?")
alias = acct.get("alias", "?")
identity = acct.get("identity", {})
detail_parts = [f"{k}: {v}" for k, v in identity.items() if v]
detail = f" ({', '.join(detail_parts)})" if detail_parts else ""
click.echo(f" {i}. {provider}/{alias}{detail}")
click.echo()
while True:
choice = click.prompt("Pick an account to test", type=int, default=1)
if 1 <= choice <= len(accounts):
return accounts[choice - 1]
click.echo(f"Invalid choice. Enter 1-{len(accounts)}.")
@click.group()
@click.version_option(version="1.0.0")
def cli():
"""Credential Tester — verify synced credentials via live API calls."""
pass
@cli.command()
@click.option("--verbose", "-v", is_flag=True)
@click.option("--debug", is_flag=True)
def tui(verbose, debug):
"""Launch TUI to test a credential interactively."""
setup_logging(verbose=verbose, debug=debug)
try:
from framework.tui.app import AdenTUI
except ImportError:
click.echo("TUI requires 'textual'. Install with: pip install textual")
sys.exit(1)
agent = CredentialTesterAgent()
account = pick_account(agent)
if account is None:
sys.exit(1)
agent.select_account(account)
provider = account.get("provider", "?")
alias = account.get("alias", "?")
click.echo(f"\nTesting {provider}/{alias}...\n")
async def run_tui():
agent._setup()
runtime = agent._agent_runtime
await runtime.start()
try:
app = AdenTUI(runtime)
await app.run_async()
finally:
await runtime.stop()
asyncio.run(run_tui())
@cli.command()
@click.option("--verbose", "-v", is_flag=True)
@click.option("--debug", is_flag=True)
def shell(verbose, debug):
"""Interactive CLI session to test a credential."""
setup_logging(verbose=verbose, debug=debug)
asyncio.run(_interactive_shell(verbose))
async def _interactive_shell(verbose=False):
agent = CredentialTesterAgent()
account = pick_account(agent)
if account is None:
return
agent.select_account(account)
provider = account.get("provider", "?")
alias = account.get("alias", "?")
click.echo(f"\nTesting {provider}/{alias}")
click.echo("Type your requests or 'quit' to exit.\n")
await agent.start()
try:
result = await agent._agent_runtime.trigger_and_wait(
entry_point_id="start",
input_data={},
)
if result:
click.echo(f"\nSession ended: {'success' if result.success else result.error}")
except KeyboardInterrupt:
click.echo("\nGoodbye!")
finally:
await agent.stop()
@cli.command(name="list")
def list_accounts():
"""List all connected accounts."""
agent = CredentialTesterAgent()
accounts = agent.list_accounts()
if not accounts:
click.echo("No connected accounts found.")
return
click.echo("\nConnected accounts:\n")
for acct in accounts:
provider = acct.get("provider", "?")
alias = acct.get("alias", "?")
identity = acct.get("identity", {})
detail_parts = [f"{k}: {v}" for k, v in identity.items() if v]
detail = f" ({', '.join(detail_parts)})" if detail_parts else ""
click.echo(f" {provider}/{alias}{detail}")
if __name__ == "__main__":
cli()
@@ -0,0 +1,621 @@
"""Credential Tester agent — verify credentials via live API calls.
Supports both Aden OAuth2-synced accounts AND locally-stored API key accounts.
Aden accounts use account="alias" routing; local accounts inject the key into
the session environment so tools read it without an account= parameter.
When loaded via AgentRunner.load() (TUI picker, ``hive run``), the module-level
``nodes`` / ``edges`` variables provide a static graph. The TUI detects
``requires_account_selection`` and shows an account picker *before* starting
the agent. ``configure_for_account()`` then scopes the node's tools to the
selected provider.
When used directly (``CredentialTesterAgent``), the graph is built dynamically
after the user picks an account programmatically.
"""
from __future__ import annotations
from pathlib import Path
from typing import TYPE_CHECKING
from framework.graph import Goal, NodeSpec, SuccessCriterion
from framework.graph.checkpoint_config import CheckpointConfig
from framework.graph.edge import GraphSpec
from framework.graph.executor import ExecutionResult
from framework.llm import LiteLLMProvider
from framework.runner.tool_registry import ToolRegistry
from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
from framework.runtime.execution_stream import EntryPointSpec
from .config import default_config
from .nodes import build_tester_node
if TYPE_CHECKING:
from framework.runner import AgentRunner
# ---------------------------------------------------------------------------
# Goal
# ---------------------------------------------------------------------------
goal = Goal(
id="credential-tester",
name="Credential Tester",
description="Verify that a credential can make real API calls.",
success_criteria=[
SuccessCriterion(
id="api-call-success",
description="At least one API call succeeds using the credential",
metric="api_call_success",
target="true",
weight=1.0,
),
],
constraints=[],
)
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def get_tools_for_provider(provider_name: str) -> list[str]:
"""Collect tool names for a credential by credential_id OR credential_group.
Matches on both ``credential_id`` (e.g. "google" Gmail tools) and
``credential_group`` (e.g. "google_custom_search" all google search tools).
"""
from aden_tools.credentials import CREDENTIAL_SPECS
tools: list[str] = []
for spec in CREDENTIAL_SPECS.values():
if spec.credential_id == provider_name or spec.credential_group == provider_name:
tools.extend(spec.tools)
return sorted(set(tools))
def _list_aden_accounts() -> list[dict]:
"""List active accounts from the Aden platform (requires ADEN_API_KEY)."""
import os
api_key = os.environ.get("ADEN_API_KEY")
if not api_key:
return []
try:
from framework.credentials.aden.client import AdenClientConfig, AdenCredentialClient
client = AdenCredentialClient(
AdenClientConfig(
base_url=os.environ.get("ADEN_API_URL", "https://api.adenhq.com"),
)
)
try:
integrations = client.list_integrations()
finally:
client.close()
return [
{
"provider": c.provider,
"alias": c.alias,
"identity": {"email": c.email} if c.email else {},
"integration_id": c.integration_id,
"source": "aden",
}
for c in integrations
if c.status == "active"
]
except Exception:
return []
def _list_local_accounts() -> list[dict]:
"""List named local API key accounts from LocalCredentialRegistry."""
try:
from framework.credentials.local.registry import LocalCredentialRegistry
return [
info.to_account_dict() for info in LocalCredentialRegistry.default().list_accounts()
]
except Exception:
return []
def _list_env_fallback_accounts() -> list[dict]:
"""Surface configured-but-unregistered credentials as testable entries.
Detects credentials available via env vars OR stored in the encrypted
store in the old flat format (e.g. ``brave_search`` with no alias).
These are users who haven't yet run ``save_account()`` but have a working key.
Shows with alias="default" and status="unknown".
"""
import os
from aden_tools.credentials import CREDENTIAL_SPECS
# Collect IDs in encrypted store (includes old flat entries like "brave_search")
try:
from framework.credentials.storage import EncryptedFileStorage
encrypted_ids: set[str] = set(EncryptedFileStorage().list_all())
except Exception:
encrypted_ids = set()
def _is_configured(cred_name: str, spec) -> bool:
# 1. Env var present
if os.environ.get(spec.env_var):
return True
# 2. Old flat encrypted entry (no slash — new entries have {x}/{y})
if cred_name in encrypted_ids:
return True
return False
seen_groups: set[str] = set()
accounts: list[dict] = []
for cred_name, spec in CREDENTIAL_SPECS.items():
if not spec.direct_api_key_supported or not spec.tools:
continue
if spec.credential_group:
if spec.credential_group in seen_groups:
continue
group_available = all(
_is_configured(n, s)
for n, s in CREDENTIAL_SPECS.items()
if s.credential_group == spec.credential_group
)
if not group_available:
continue
seen_groups.add(spec.credential_group)
provider = spec.credential_group
else:
if not _is_configured(cred_name, spec):
continue
provider = cred_name
accounts.append(
{
"provider": provider,
"alias": "default",
"identity": {},
"integration_id": None,
"source": "local",
"status": "unknown",
}
)
return accounts
def list_connected_accounts() -> list[dict]:
"""List all testable accounts: Aden-synced + named local + env-var fallbacks."""
aden = _list_aden_accounts()
local = _list_local_accounts()
# Show env-var fallbacks only for credentials not already in the named registry
local_providers = {a["provider"] for a in local}
env_fallbacks = [
a for a in _list_env_fallback_accounts() if a["provider"] not in local_providers
]
return aden + local + env_fallbacks
# ---------------------------------------------------------------------------
# Module-level hooks (read by AgentRunner.load / TUI)
# ---------------------------------------------------------------------------
skip_credential_validation = True
"""Don't validate credentials at load time — we don't know which provider yet."""
requires_account_selection = True
"""Signal TUI to show account picker before starting the agent."""
def configure_for_account(runner: AgentRunner, account: dict) -> None:
"""Scope the tester node's tools to the selected provider.
Handles both Aden accounts (account= routing) and local accounts
(session-level env var injection, no account= parameter in prompt).
"""
provider = account["provider"]
source = account.get("source", "aden")
alias = account.get("alias", "unknown")
identity = account.get("identity", {})
tools = get_tools_for_provider(provider)
if source == "aden":
tools.append("get_account_info")
email = identity.get("email", "")
detail = f" (email: {email})" if email else ""
_configure_aden_node(runner, provider, alias, detail, tools)
else:
status = account.get("status", "unknown")
_activate_local_account(provider, alias)
_configure_local_node(runner, provider, alias, identity, tools, status)
def _activate_local_account(credential_id: str, alias: str) -> None:
"""Inject a named local account's key into the session environment.
Handles three cases:
1. Named account in LocalCredentialRegistry (new format: {credential_id}/{alias})
2. Old flat credential in EncryptedFileStorage (id == credential_id, no alias)
3. Env var already set skip injection (nothing to do)
"""
import os
from aden_tools.credentials import CREDENTIAL_SPECS
# Collect specs for this credential (handles grouped credentials too)
group_specs = [
(cred_name, spec)
for cred_name, spec in CREDENTIAL_SPECS.items()
if spec.credential_group == credential_id
or spec.credential_id == credential_id
or cred_name == credential_id
]
# Deduplicate — credential_id and credential_group may both match the same spec
seen_env_vars: set[str] = set()
try:
from framework.credentials.local.registry import LocalCredentialRegistry
from framework.credentials.storage import EncryptedFileStorage
registry = LocalCredentialRegistry.default()
flat_storage = EncryptedFileStorage()
for _cred_name, spec in group_specs:
if spec.env_var in seen_env_vars:
continue
# If env var is already set, nothing to do for this one
if os.environ.get(spec.env_var):
seen_env_vars.add(spec.env_var)
continue
seen_env_vars.add(spec.env_var)
# Determine key name based on spec
key_name = "api_key"
if spec.credential_group and "cse" in spec.env_var.lower():
key_name = "cse_id"
key: str | None = None
# 1. Try named account in registry (new format)
if alias != "default":
key = registry.get_key(credential_id, alias, key_name)
else:
# For "default" alias, check registry first, then fall back to flat store
key = registry.get_key(credential_id, "default", key_name)
# 2. Fall back to old flat encrypted entry (id == credential_id, no alias)
if key is None:
flat_cred = flat_storage.load(credential_id)
if flat_cred is not None:
key = flat_cred.get_key(key_name) or flat_cred.get_default_key()
if key:
os.environ[spec.env_var] = key
except Exception:
pass
def _configure_aden_node(
runner: AgentRunner,
provider: str,
alias: str,
detail: str,
tools: list[str],
) -> None:
for node in runner.graph.nodes:
if node.id == "tester":
node.tools = sorted(set(tools))
node.system_prompt = f"""\
You are a credential tester for the account: {provider}/{alias}{detail}
# Instructions
1. Suggest a simple read-only API call to verify the credential works \
(e.g. list messages, list channels, list contacts).
2. Execute the call when the user agrees.
3. Report the result: success (with sample data) or failure (with error).
4. Let the user request additional API calls to further test the credential.
# Account routing
IMPORTANT: Always pass `account="{alias}"` when calling any tool. \
This routes the API call to the correct credential. Never use the email \
or any other identifier always use the alias exactly as shown.
# Rules
- Start with read-only operations (list, get) before write operations.
- Always confirm with the user before performing write operations.
- If a call fails, report the exact error this helps diagnose credential issues.
- Be concise. No emojis.
"""
break
runner.intro_message = (
f"Testing {provider}/{alias}{detail}"
f"{len(tools)} tools loaded. "
"I'll suggest a read-only API call to verify the credential works."
)
def _configure_local_node(
runner: AgentRunner,
provider: str,
alias: str,
identity: dict,
tools: list[str],
status: str,
) -> None:
identity_parts = [f"{k}: {v}" for k, v in identity.items() if v]
detail = f" ({', '.join(identity_parts)})" if identity_parts else ""
status_note = " [key not yet validated]" if status == "unknown" else ""
for node in runner.graph.nodes:
if node.id == "tester":
node.tools = sorted(set(tools))
node.system_prompt = f"""\
You are a credential tester for the local API key: {provider}/{alias}{detail}{status_note}
# Instructions
1. Suggest a simple test call to verify the credential works \
(e.g. search for "test", list items, get profile info).
2. Execute the call when the user agrees.
3. Report the result: success (with sample data) or failure (with error).
4. Let the user request additional API calls to further test the credential.
# Rules
- Do NOT pass an `account` parameter this credential is injected \
directly into the session environment and tools read it automatically.
- Start with read-only operations before write operations.
- Always confirm with the user before performing write operations.
- If a call fails, report the exact error this helps diagnose credential issues.
- Be concise. No emojis.
"""
break
runner.intro_message = (
f"Testing {provider}/{alias}{detail}"
f"{len(tools)} tools loaded. "
"I'll suggest a test API call to verify the credential works."
)
# ---------------------------------------------------------------------------
# Module-level graph variables (read by AgentRunner.load)
# ---------------------------------------------------------------------------
nodes = [
NodeSpec(
id="tester",
name="Credential Tester",
description=(
"Interactive credential testing — lets the user pick an account "
"and verify it via API calls."
),
node_type="event_loop",
client_facing=True,
max_node_visits=0,
input_keys=[],
output_keys=[],
tools=["get_account_info"],
system_prompt="""\
You are a credential tester. Your job is to help the user verify that their \
connected accounts and API keys can make real API calls.
# Startup
1. Call ``get_account_info`` to list the user's connected accounts.
2. Present the list and ask the user which account to test.
3. Once they pick one, note the account's **alias** (e.g. "Timothy", "work-slack").
4. Suggest a simple read-only API call to verify the credential works \
(e.g. list messages, list channels, list contacts).
5. Execute the call when the user agrees.
6. Report the result: success (with sample data) or failure (with error).
7. Let the user request additional API calls to further test the credential.
# Account routing (Aden accounts only)
IMPORTANT: For Aden-synced accounts, always pass the account's **alias** as the \
``account`` parameter when calling any tool. For local API key accounts, do NOT \
pass an account parameter they are pre-injected into the session.
# Rules
- Start with read-only operations (list, get) before write operations.
- Always confirm with the user before performing write operations.
- If a call fails, report the exact error this helps diagnose credential issues.
- Be concise. No emojis.
""",
),
]
edges = []
entry_node = "tester"
entry_points = {"start": "tester"}
pause_nodes = []
terminal_nodes = [] # Forever-alive: loops until user exits
conversation_mode = "continuous"
identity_prompt = (
"You are a credential tester that verifies connected accounts and API keys "
"can make real API calls."
)
loop_config = {
"max_iterations": 50,
"max_tool_calls_per_turn": 30,
"max_history_tokens": 32000,
}
# ---------------------------------------------------------------------------
# Programmatic agent class (used by __main__.py CLI)
# ---------------------------------------------------------------------------
class CredentialTesterAgent:
"""Interactive agent that tests a specific credential via API calls.
Usage:
agent = CredentialTesterAgent()
accounts = agent.list_accounts()
agent.select_account(accounts[0])
await agent.start()
await agent.stop()
"""
def __init__(self, config=None):
self.config = config or default_config
self._selected_account: dict | None = None
self._agent_runtime: AgentRuntime | None = None
self._tool_registry: ToolRegistry | None = None
self._storage_path: Path | None = None
def list_accounts(self) -> list[dict]:
"""List all testable accounts (Aden + local named + env-var fallbacks)."""
return list_connected_accounts()
def select_account(self, account: dict) -> None:
"""Select an account to test.
Args:
account: Account dict from list_accounts() with
provider, alias, identity, source keys.
"""
self._selected_account = account
@property
def selected_provider(self) -> str:
if self._selected_account is None:
raise RuntimeError("No account selected. Call select_account() first.")
return self._selected_account["provider"]
@property
def selected_alias(self) -> str:
if self._selected_account is None:
raise RuntimeError("No account selected. Call select_account() first.")
return self._selected_account.get("alias", "unknown")
def _build_graph(self) -> GraphSpec:
provider = self.selected_provider
alias = self.selected_alias
source = self._selected_account.get("source", "aden")
identity = self._selected_account.get("identity", {})
tools = get_tools_for_provider(provider)
if source == "local":
_activate_local_account(provider, alias)
elif source == "aden":
tools.append("get_account_info")
tester_node = build_tester_node(
provider=provider,
alias=alias,
tools=tools,
identity=identity,
source=source,
)
return GraphSpec(
id="credential-tester-graph",
goal_id=goal.id,
version="1.0.0",
entry_node="tester",
entry_points={"start": "tester"},
terminal_nodes=[],
pause_nodes=[],
nodes=[tester_node],
edges=[],
default_model=self.config.model,
max_tokens=self.config.max_tokens,
loop_config={
"max_iterations": 50,
"max_tool_calls_per_turn": 30,
"max_history_tokens": 32000,
},
conversation_mode="continuous",
identity_prompt=(
f"You are testing the {provider}/{alias} credential. "
"Help the user verify it works by making real API calls."
),
)
def _setup(self) -> None:
if self._selected_account is None:
raise RuntimeError("No account selected. Call select_account() first.")
self._storage_path = Path.home() / ".hive" / "agents" / "credential_tester"
self._storage_path.mkdir(parents=True, exist_ok=True)
self._tool_registry = ToolRegistry()
mcp_config_path = Path(__file__).parent / "mcp_servers.json"
if mcp_config_path.exists():
self._tool_registry.load_mcp_config(mcp_config_path)
extra_kwargs = getattr(self.config, "extra_kwargs", {}) or {}
llm = LiteLLMProvider(
model=self.config.model,
api_key=self.config.api_key,
api_base=self.config.api_base,
**extra_kwargs,
)
tool_executor = self._tool_registry.get_executor()
tools = list(self._tool_registry.get_tools().values())
graph = self._build_graph()
self._agent_runtime = create_agent_runtime(
graph=graph,
goal=goal,
storage_path=self._storage_path,
entry_points=[
EntryPointSpec(
id="start",
name="Test Credential",
entry_node="tester",
trigger_type="manual",
isolation_level="isolated",
),
],
llm=llm,
tools=tools,
tool_executor=tool_executor,
checkpoint_config=CheckpointConfig(enabled=False),
graph_id="credential_tester",
)
async def start(self) -> None:
"""Set up and start the agent runtime."""
if self._agent_runtime is None:
self._setup()
if not self._agent_runtime.is_running:
await self._agent_runtime.start()
async def stop(self) -> None:
"""Stop the agent runtime."""
if self._agent_runtime and self._agent_runtime.is_running:
await self._agent_runtime.stop()
self._agent_runtime = None
async def run(self) -> ExecutionResult:
"""Run the agent (convenience for single execution)."""
await self.start()
try:
result = await self._agent_runtime.trigger_and_wait(
entry_point_id="start",
input_data={},
)
return result or ExecutionResult(success=False, error="Execution timeout")
finally:
await self.stop()
@@ -0,0 +1,19 @@
"""Runtime configuration for Credential Tester agent."""
from dataclasses import dataclass
from framework.config import RuntimeConfig
@dataclass
class AgentMetadata:
name: str = "Credential Tester"
version: str = "1.0.0"
description: str = (
"Test connected accounts by making real API calls. "
"Pick an account, verify credentials work, and explore available tools."
)
metadata = AgentMetadata()
default_config = RuntimeConfig(temperature=0.3)
@@ -0,0 +1,9 @@
{
"hive-tools": {
"transport": "stdio",
"command": "uv",
"args": ["run", "python", "mcp_server.py", "--stdio"],
"cwd": "../../../../tools",
"description": "Hive tools MCP server with provider-specific tools"
}
}
@@ -0,0 +1,84 @@
"""Node definitions for Credential Tester agent."""
from framework.graph import NodeSpec
def build_tester_node(
provider: str,
alias: str,
tools: list[str],
identity: dict[str, str],
source: str = "aden",
) -> NodeSpec:
"""Build the tester node dynamically for the selected account.
Args:
provider: Provider / credential name (e.g. "google", "brave_search").
alias: User-set alias (e.g. "Timothy", "work").
tools: Tool names available for this provider.
identity: Identity dict (email, workspace, etc.) for context.
source: "aden" or "local" controls routing instructions in the prompt.
"""
detail_parts = [f"{k}: {v}" for k, v in identity.items() if v]
detail = f" ({', '.join(detail_parts)})" if detail_parts else ""
if source == "aden":
routing_section = f"""\
# Account routing
IMPORTANT: Always pass `account="{alias}"` when calling any tool. \
This routes the API call to the correct credential. Never use the email \
or any other identifier always use the alias exactly as shown.
"""
else:
routing_section = """\
# Credential routing
This is a local API key credential do NOT pass an `account` parameter. \
The key is pre-injected into the session environment and tools read it automatically.
"""
account_label = "account" if source == "aden" else "local API key"
return NodeSpec(
id="tester",
name="Credential Tester",
description=(
f"Interactive testing node for {provider}/{alias}. "
f"Has access to all {provider} tools to verify the credential works."
),
node_type="event_loop",
client_facing=True,
max_node_visits=0,
input_keys=[],
output_keys=[],
tools=tools,
system_prompt=f"""\
You are a credential tester for the {account_label}: {provider}/{alias}{detail}
Your job is to help the user verify that this credential works by making \
real API calls using the available tools.
{routing_section}
# Instructions
1. Start by greeting the user and confirming which account you're testing.
2. Suggest a simple, safe, read-only API call to verify the credential works \
(e.g. list messages, list channels, list contacts, search for "test").
3. Execute the call when the user agrees.
4. Report the result clearly: success (with sample data) or failure (with error).
5. Let the user request additional API calls to further test the credential.
# Available tools
You have access to {len(tools)} tools for {provider}:
{chr(10).join(f"- {t}" for t in tools)}
# Rules
- Start with read-only operations (list, get) before write operations (create, update, delete).
- Always confirm with the user before performing write operations.
- If a call fails, report the exact error this helps diagnose credential issues.
- Be concise. No emojis.
""",
)
@@ -0,0 +1,44 @@
"""
Hive Coder Native coding agent that builds Hive agent packages.
Deeply understands the agent framework and produces complete Python packages
with goals, nodes, edges, system prompts, MCP configuration, and tests
from natural language specifications.
"""
from .agent import (
HiveCoderAgent,
conversation_mode,
default_agent,
edges,
entry_node,
entry_points,
goal,
identity_prompt,
loop_config,
nodes,
pause_nodes,
terminal_nodes,
)
from .config import AgentMetadata, RuntimeConfig, default_config, metadata
__version__ = "1.0.0"
__all__ = [
"HiveCoderAgent",
"default_agent",
"goal",
"nodes",
"edges",
"entry_node",
"entry_points",
"pause_nodes",
"terminal_nodes",
"conversation_mode",
"identity_prompt",
"loop_config",
"RuntimeConfig",
"AgentMetadata",
"default_config",
"metadata",
]
@@ -0,0 +1,223 @@
"""CLI entry point for Hive Coder agent."""
import asyncio
import json
import logging
import sys
import click
from .agent import HiveCoderAgent, default_agent
def setup_logging(verbose=False, debug=False):
"""Configure logging for execution visibility."""
if debug:
level, fmt = logging.DEBUG, "%(asctime)s %(name)s: %(message)s"
elif verbose:
level, fmt = logging.INFO, "%(message)s"
else:
level, fmt = logging.WARNING, "%(levelname)s: %(message)s"
logging.basicConfig(level=level, format=fmt, stream=sys.stderr)
logging.getLogger("framework").setLevel(level)
@click.group()
@click.version_option(version="1.0.0")
def cli():
"""Hive Coder — Build Hive agent packages from natural language."""
pass
@cli.command()
@click.option("--request", "-r", type=str, required=True, help="What agent to build")
@click.option("--mock", is_flag=True, help="Run in mock mode")
@click.option("--quiet", "-q", is_flag=True, help="Only output result JSON")
@click.option("--verbose", "-v", is_flag=True, help="Show execution details")
@click.option("--debug", is_flag=True, help="Show debug logging")
def run(request, mock, quiet, verbose, debug):
"""Execute agent building from a request."""
if not quiet:
setup_logging(verbose=verbose, debug=debug)
context = {"user_request": request}
result = asyncio.run(default_agent.run(context, mock_mode=mock))
output_data = {
"success": result.success,
"steps_executed": result.steps_executed,
"output": result.output,
}
if result.error:
output_data["error"] = result.error
click.echo(json.dumps(output_data, indent=2, default=str))
sys.exit(0 if result.success else 1)
@cli.command()
@click.option("--mock", is_flag=True, help="Run in mock mode")
@click.option("--verbose", "-v", is_flag=True, help="Show execution details")
@click.option("--debug", is_flag=True, help="Show debug logging")
def tui(mock, verbose, debug):
"""Launch the TUI dashboard for interactive agent building."""
setup_logging(verbose=verbose, debug=debug)
try:
from framework.tui.app import AdenTUI
except ImportError:
click.echo("TUI requires the 'textual' package. Install with: pip install textual")
sys.exit(1)
from pathlib import Path
from framework.llm import LiteLLMProvider
from framework.runner.tool_registry import ToolRegistry
from framework.runtime.agent_runtime import create_agent_runtime
from framework.runtime.execution_stream import EntryPointSpec
async def run_with_tui():
agent = HiveCoderAgent()
agent._tool_registry = ToolRegistry()
storage_path = Path.home() / ".hive" / "agents" / "hive_coder"
storage_path.mkdir(parents=True, exist_ok=True)
mcp_config_path = Path(__file__).parent / "mcp_servers.json"
if mcp_config_path.exists():
agent._tool_registry.load_mcp_config(mcp_config_path)
llm = None
if not mock:
llm = LiteLLMProvider(
model=agent.config.model,
api_key=agent.config.api_key,
api_base=agent.config.api_base,
)
tools = list(agent._tool_registry.get_tools().values())
tool_executor = agent._tool_registry.get_executor()
graph = agent._build_graph()
runtime = create_agent_runtime(
graph=graph,
goal=agent.goal,
storage_path=storage_path,
entry_points=[
EntryPointSpec(
id="start",
name="Build Agent",
entry_node="coder",
trigger_type="manual",
isolation_level="isolated",
),
],
llm=llm,
tools=tools,
tool_executor=tool_executor,
)
await runtime.start()
try:
app = AdenTUI(runtime)
await app.run_async()
finally:
await runtime.stop()
asyncio.run(run_with_tui())
@cli.command()
@click.option("--json", "output_json", is_flag=True)
def info(output_json):
"""Show agent information."""
info_data = default_agent.info()
if output_json:
click.echo(json.dumps(info_data, indent=2))
else:
click.echo(f"Agent: {info_data['name']}")
click.echo(f"Version: {info_data['version']}")
click.echo(f"Description: {info_data['description']}")
click.echo(f"\nNodes: {', '.join(info_data['nodes'])}")
click.echo(f"Client-facing: {', '.join(info_data['client_facing_nodes'])}")
click.echo(f"Entry: {info_data['entry_node']}")
click.echo(f"Terminal: {', '.join(info_data['terminal_nodes']) or '(forever-alive)'}")
@cli.command()
def validate():
"""Validate agent structure."""
validation = default_agent.validate()
if validation["valid"]:
click.echo("Agent is valid")
if validation["warnings"]:
for warning in validation["warnings"]:
click.echo(f" WARNING: {warning}")
else:
click.echo("Agent has errors:")
for error in validation["errors"]:
click.echo(f" ERROR: {error}")
sys.exit(0 if validation["valid"] else 1)
@cli.command()
@click.option("--verbose", "-v", is_flag=True)
def shell(verbose):
"""Interactive agent building session (CLI, no TUI)."""
asyncio.run(_interactive_shell(verbose))
async def _interactive_shell(verbose=False):
"""Async interactive shell."""
setup_logging(verbose=verbose)
click.echo("=== Hive Coder ===")
click.echo("Describe the agent you want to build (or 'quit' to exit):\n")
agent = HiveCoderAgent()
await agent.start()
try:
while True:
try:
request = await asyncio.get_event_loop().run_in_executor(None, input, "Build> ")
if request.lower() in ["quit", "exit", "q"]:
click.echo("Goodbye!")
break
if not request.strip():
continue
click.echo("\nBuilding agent...\n")
result = await agent.trigger_and_wait("default", {"user_request": request})
if result is None:
click.echo("\n[Execution timed out]\n")
continue
if result.success:
output = result.output or {}
agent_name = output.get("agent_name", "unknown")
validation = output.get("validation_result", "unknown")
click.echo(f"\nAgent '{agent_name}' built. Validation: {validation}\n")
else:
click.echo(f"\nBuild failed: {result.error}\n")
except KeyboardInterrupt:
click.echo("\nGoodbye!")
break
except Exception as e:
click.echo(f"Error: {e}", err=True)
import traceback
traceback.print_exc()
finally:
await agent.stop()
if __name__ == "__main__":
cli()
+357
View File
@@ -0,0 +1,357 @@
"""Agent graph construction for Hive Coder."""
from pathlib import Path
from framework.graph import Constraint, Goal, SuccessCriterion
from framework.graph.checkpoint_config import CheckpointConfig
from framework.graph.edge import GraphSpec
from framework.graph.executor import ExecutionResult
from framework.llm import LiteLLMProvider
from framework.runner.tool_registry import ToolRegistry
from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
from framework.runtime.execution_stream import EntryPointSpec
from .config import default_config, metadata
from .nodes import coder_node, queen_node
# ticket_receiver is no longer needed — the queen runs as an independent
# GraphExecutor and receives escalation tickets via inject_event().
# Keeping the import commented for reference:
# from .ticket_receiver import TICKET_RECEIVER_ENTRY_POINT
# Goal definition
goal = Goal(
id="agent-builder",
name="Hive Agent Builder",
description=(
"Build complete, validated Hive agent packages from natural language "
"specifications. Produces production-ready Python packages with goals, "
"nodes, edges, system prompts, MCP configuration, and tests."
),
success_criteria=[
SuccessCriterion(
id="valid-package",
description="Generated agent package passes structural validation",
metric="validation_pass",
target="true",
weight=0.30,
),
SuccessCriterion(
id="complete-files",
description=(
"All required files generated: agent.py, config.py, "
"nodes/__init__.py, __init__.py, __main__.py, mcp_servers.json"
),
metric="file_count",
target=">=6",
weight=0.25,
),
SuccessCriterion(
id="user-satisfaction",
description="User reviews and approves the generated agent",
metric="user_approval",
target="true",
weight=0.25,
),
SuccessCriterion(
id="framework-compliance",
description=(
"Generated code follows framework patterns: STEP 1/STEP 2 "
"for client-facing, correct imports, entry_points format"
),
metric="pattern_compliance",
target="100%",
weight=0.20,
),
],
constraints=[
Constraint(
id="dynamic-tool-discovery",
description=(
"Always discover available tools dynamically via "
"list_agent_tools before referencing tools in agent designs"
),
constraint_type="hard",
category="correctness",
),
Constraint(
id="no-fabricated-tools",
description="Only reference tools that exist in hive-tools MCP",
constraint_type="hard",
category="correctness",
),
Constraint(
id="valid-python",
description="All generated Python files must be syntactically correct",
constraint_type="hard",
category="correctness",
),
Constraint(
id="self-verification",
description="Run validation after writing code; fix errors before presenting",
constraint_type="hard",
category="quality",
),
],
)
# Nodes: primary coder node only. The queen runs as an independent
# GraphExecutor with queen_node — not as part of this graph.
nodes = [coder_node]
# No edges needed — single forever-alive event_loop node
edges = []
# Graph configuration
entry_node = "coder"
entry_points = {"start": "coder"}
pause_nodes = []
terminal_nodes = [] # Forever-alive: loops until user exits
# No async entry points needed — the queen is now an independent executor,
# not a secondary graph receiving events via add_graph().
async_entry_points = []
# Module-level variables read by AgentRunner.load()
conversation_mode = "continuous"
identity_prompt = (
"You are Hive Coder, the best agent-building coding agent on the planet. "
"You deeply understand the Hive agent framework at the source code level "
"and produce production-ready agent packages from natural language. "
"You can dynamically discover available framework tools, inspect runtime "
"sessions and checkpoints from agents you build, and run their test suites. "
"You follow coding agent discipline: read before writing, verify "
"assumptions by reading actual code, adhere to project conventions, "
"self-verify with validation, and fix your own errors. You are concise, "
"direct, and technically rigorous. No emojis. No fluff."
)
loop_config = {
"max_iterations": 100,
"max_tool_calls_per_turn": 30,
"max_history_tokens": 32000,
}
# ---------------------------------------------------------------------------
# Queen graph — runs as an independent persistent conversation in the TUI.
# Loaded by _load_judge_and_queen() in app.py, NOT by AgentRunner.
# ---------------------------------------------------------------------------
queen_goal = Goal(
id="queen-manager",
name="Queen Manager",
description=(
"Manage the worker agent lifecycle and serve as the user's primary "
"interactive interface. Triage health escalations from the judge."
),
success_criteria=[],
constraints=[],
)
queen_graph = GraphSpec(
id="queen-graph",
goal_id=queen_goal.id,
version="1.0.0",
entry_node="queen",
entry_points={"start": "queen"},
terminal_nodes=[],
pause_nodes=[],
nodes=[queen_node],
edges=[],
conversation_mode="continuous",
loop_config={
"max_iterations": 999_999,
"max_tool_calls_per_turn": 30,
"max_history_tokens": 32000,
},
)
class HiveCoderAgent:
"""
Hive Coder builds Hive agent packages from natural language.
Single-node architecture: the coder runs in a continuous while(true) loop.
The queen runs as an independent GraphExecutor (loaded by the TUI via
_load_judge_and_queen), not as part of this graph.
"""
def __init__(self, config=None):
self.config = config or default_config
self.goal = goal
self.nodes = nodes
self.edges = edges
self.entry_node = entry_node
self.entry_points = entry_points
self.pause_nodes = pause_nodes
self.terminal_nodes = terminal_nodes
self.async_entry_points = async_entry_points
self._graph: GraphSpec | None = None
self._agent_runtime: AgentRuntime | None = None
self._tool_registry: ToolRegistry | None = None
self._storage_path: Path | None = None
def _build_graph(self) -> GraphSpec:
"""Build the GraphSpec."""
return GraphSpec(
id="hive-coder-graph",
goal_id=self.goal.id,
version="1.0.0",
entry_node=self.entry_node,
entry_points=self.entry_points,
terminal_nodes=self.terminal_nodes,
pause_nodes=self.pause_nodes,
nodes=self.nodes,
edges=self.edges,
default_model=self.config.model,
max_tokens=self.config.max_tokens,
loop_config=loop_config,
conversation_mode=conversation_mode,
identity_prompt=identity_prompt,
async_entry_points=self.async_entry_points,
)
def _setup(self, mock_mode=False) -> None:
"""Set up the agent runtime."""
self._storage_path = Path.home() / ".hive" / "agents" / "hive_coder"
self._storage_path.mkdir(parents=True, exist_ok=True)
self._tool_registry = ToolRegistry()
mcp_config_path = Path(__file__).parent / "mcp_servers.json"
if mcp_config_path.exists():
self._tool_registry.load_mcp_config(mcp_config_path)
llm = None
if not mock_mode:
llm = LiteLLMProvider(
model=self.config.model,
api_key=self.config.api_key,
api_base=self.config.api_base,
)
tool_executor = self._tool_registry.get_executor()
tools = list(self._tool_registry.get_tools().values())
self._graph = self._build_graph()
checkpoint_config = CheckpointConfig(
enabled=True,
checkpoint_on_node_start=False,
checkpoint_on_node_complete=True,
checkpoint_max_age_days=7,
async_checkpoint=True,
)
entry_point_specs = [
EntryPointSpec(
id="default",
name="Default",
entry_node=self.entry_node,
trigger_type="manual",
isolation_level="shared",
),
]
self._agent_runtime = create_agent_runtime(
graph=self._graph,
goal=self.goal,
storage_path=self._storage_path,
entry_points=entry_point_specs,
llm=llm,
tools=tools,
tool_executor=tool_executor,
checkpoint_config=checkpoint_config,
graph_id="hive_coder",
)
async def start(self, mock_mode=False) -> None:
"""Set up and start the agent runtime."""
if self._agent_runtime is None:
self._setup(mock_mode=mock_mode)
if not self._agent_runtime.is_running:
await self._agent_runtime.start()
async def stop(self) -> None:
"""Stop the agent runtime and clean up."""
if self._agent_runtime and self._agent_runtime.is_running:
await self._agent_runtime.stop()
self._agent_runtime = None
async def trigger_and_wait(
self,
entry_point: str = "default",
input_data: dict | None = None,
timeout: float | None = None,
session_state: dict | None = None,
) -> ExecutionResult | None:
"""Execute the graph and wait for completion."""
if self._agent_runtime is None:
raise RuntimeError("Agent not started. Call start() first.")
return await self._agent_runtime.trigger_and_wait(
entry_point_id=entry_point,
input_data=input_data or {},
session_state=session_state,
)
async def run(self, context: dict, mock_mode=False, session_state=None) -> ExecutionResult:
"""Run the agent (convenience method for single execution)."""
await self.start(mock_mode=mock_mode)
try:
result = await self.trigger_and_wait("default", context, session_state=session_state)
return result or ExecutionResult(success=False, error="Execution timeout")
finally:
await self.stop()
def info(self):
"""Get agent information."""
return {
"name": metadata.name,
"version": metadata.version,
"description": metadata.description,
"goal": {
"name": self.goal.name,
"description": self.goal.description,
},
"nodes": [n.id for n in self.nodes],
"edges": [e.id for e in self.edges],
"entry_node": self.entry_node,
"entry_points": self.entry_points,
"pause_nodes": self.pause_nodes,
"terminal_nodes": self.terminal_nodes,
"client_facing_nodes": [n.id for n in self.nodes if n.client_facing],
}
def validate(self):
"""Validate agent structure."""
errors = []
warnings = []
node_ids = {node.id for node in self.nodes}
for edge in self.edges:
if edge.source not in node_ids:
errors.append(f"Edge {edge.id}: source '{edge.source}' not found")
if edge.target not in node_ids:
errors.append(f"Edge {edge.id}: target '{edge.target}' not found")
if self.entry_node not in node_ids:
errors.append(f"Entry node '{self.entry_node}' not found")
for terminal in self.terminal_nodes:
if terminal not in node_ids:
errors.append(f"Terminal node '{terminal}' not found")
for ep_id, node_id in self.entry_points.items():
if node_id not in node_ids:
errors.append(f"Entry point '{ep_id}' references unknown node '{node_id}'")
return {
"valid": len(errors) == 0,
"errors": errors,
"warnings": warnings,
}
# Create default instance
default_agent = HiveCoderAgent()
@@ -0,0 +1,51 @@
"""Runtime configuration for Hive Coder agent."""
import json
from dataclasses import dataclass, field
from pathlib import Path
def _load_preferred_model() -> str:
"""Load preferred model from ~/.hive/configuration.json."""
config_path = Path.home() / ".hive" / "configuration.json"
if config_path.exists():
try:
with open(config_path, encoding="utf-8") as f:
config = json.load(f)
llm = config.get("llm", {})
if llm.get("provider") and llm.get("model"):
return f"{llm['provider']}/{llm['model']}"
except Exception:
pass
return "anthropic/claude-sonnet-4-20250514"
@dataclass
class RuntimeConfig:
model: str = field(default_factory=_load_preferred_model)
temperature: float = 0.7
max_tokens: int = 8000
api_key: str | None = None
api_base: str | None = None
default_config = RuntimeConfig()
@dataclass
class AgentMetadata:
name: str = "Hive Coder"
version: str = "1.0.0"
description: str = (
"Native coding agent that builds production-ready Hive agent packages "
"from natural language specifications. Deeply understands the agent framework "
"and produces complete Python packages with goals, nodes, edges, system prompts, "
"MCP configuration, and tests."
)
intro_message: str = (
"I'm Hive Coder — I build Hive agents. Describe what kind of agent "
"you want to create and I'll design, implement, and validate it for you."
)
metadata = AgentMetadata()
@@ -0,0 +1,9 @@
{
"coder-tools": {
"transport": "stdio",
"command": "uv",
"args": ["run", "python", "coder_tools_server.py", "--stdio"],
"cwd": "../../../../tools",
"description": "Unsandboxed file system tools for code generation and validation"
}
}
@@ -0,0 +1,961 @@
"""Node definitions for Hive Coder agent."""
from pathlib import Path
from framework.graph import NodeSpec
# Load reference docs at import time so they're always in the system prompt.
# No voluntary read_file() calls needed — the LLM gets everything upfront.
_ref_dir = Path(__file__).parent.parent / "reference"
_framework_guide = (_ref_dir / "framework_guide.md").read_text(encoding="utf-8")
_file_templates = (_ref_dir / "file_templates.md").read_text(encoding="utf-8")
_anti_patterns = (_ref_dir / "anti_patterns.md").read_text(encoding="utf-8")
_gcu_guide_path = _ref_dir / "gcu_guide.md"
_gcu_guide = _gcu_guide_path.read_text(encoding="utf-8") if _gcu_guide_path.exists() else ""
def _is_gcu_enabled() -> bool:
try:
from framework.config import get_gcu_enabled
return get_gcu_enabled()
except Exception:
return False
def _build_appendices() -> str:
parts = (
"\n\n# Appendix: Framework Reference\n\n"
+ _framework_guide
+ "\n\n# Appendix: File Templates\n\n"
+ _file_templates
+ "\n\n# Appendix: Anti-Patterns\n\n"
+ _anti_patterns
)
if _is_gcu_enabled() and _gcu_guide:
parts += "\n\n# Appendix: GCU Browser Automation Guide\n\n" + _gcu_guide
return parts
# Shared appendices — appended to every coding node's system prompt.
_appendices = _build_appendices()
# Tools available to both coder (worker) and queen.
_SHARED_TOOLS = [
# File I/O
"read_file",
"write_file",
"edit_file",
"list_directory",
"search_files",
"run_command",
"undo_changes",
# Meta-agent
"list_agent_tools",
"validate_agent_tools",
"list_agents",
"list_agent_sessions",
"get_agent_session_state",
"get_agent_session_memory",
"list_agent_checkpoints",
"get_agent_checkpoint",
"run_agent_tests",
]
# Queen mode-specific tool sets.
# Building mode: full coding + agent construction tools.
_QUEEN_BUILDING_TOOLS = _SHARED_TOOLS + [
"load_built_agent",
"list_credentials",
]
# Staging mode: agent loaded but not yet running — inspect, configure, launch.
_QUEEN_STAGING_TOOLS = [
# Read-only (inspect agent files, logs)
"read_file",
"list_directory",
"search_files",
"run_command",
# Agent inspection
"list_credentials",
"get_worker_status",
# Launch or go back
"run_agent_with_input",
"stop_worker_and_edit",
]
# Running mode: worker is executing — monitor and control.
_QUEEN_RUNNING_TOOLS = [
# Read-only coding (for inspecting logs, files)
"read_file",
"list_directory",
"search_files",
"run_command",
# Credentials
"list_credentials",
# Worker lifecycle
"stop_worker",
"stop_worker_and_edit",
"get_worker_status",
"inject_worker_message",
# Monitoring
"get_worker_health_summary",
"notify_operator",
]
# ---------------------------------------------------------------------------
# Shared agent-building knowledge: core mandates, tool docs, meta-agent
# capabilities, and workflow phases 1-6. Both the coder (worker) and
# queen compose their system prompts from this block + role-specific
# additions.
# ---------------------------------------------------------------------------
_agent_builder_knowledge = """\
# Core Mandates
- **Read before writing.** NEVER write code from assumptions. Read \
reference agents and templates first. Read every file before editing.
- **Conventions first.** Follow existing project patterns exactly. \
Analyze imports, structure, and style in reference agents.
- **Verify assumptions.** Never assume a class, import, or pattern \
exists. Read actual source to confirm. Search if unsure.
- **Discover tools dynamically.** NEVER reference tools from static \
docs. Always run list_agent_tools() to see what actually exists.
- **Professional objectivity.** If a use case is a poor fit for the \
framework, say so. Technical accuracy over validation.
- **Concise.** No emojis. No preambles. No postambles. Substance only.
- **Self-verify.** After writing code, run validation and tests. Fix \
errors yourself. Don't declare success until validation passes.
# Tools
## File I/O
- read_file(path, offset?, limit?) read with line numbers
- write_file(path, content) create/overwrite, auto-mkdir
- edit_file(path, old_text, new_text, replace_all?) fuzzy-match edit
- list_directory(path, recursive?) list contents
- search_files(pattern, path?, include?) regex search
- run_command(command, cwd?, timeout?) shell execution
- undo_changes(path?) restore from git snapshot
## Meta-Agent
- list_agent_tools(server_config_path?, output_schema?, group?) discover \
available tools grouped by category. output_schema: "simple" (default) or \
"full" (includes input_schema). group: "all" (default) or a prefix like \
"gmail". Call FIRST before designing.
- validate_agent_tools(agent_path) validate that all tools declared \
in an agent's nodes actually exist. Call after building.
- list_agents() list all agent packages in exports/ with session counts
- list_agent_sessions(agent_name, status?, limit?) list sessions
- get_agent_session_state(agent_name, session_id) full session state
- get_agent_session_memory(agent_name, session_id, key?) memory data
- list_agent_checkpoints(agent_name, session_id) list checkpoints
- get_agent_checkpoint(agent_name, session_id, checkpoint_id?) load checkpoint
- run_agent_tests(agent_name, test_types?, fail_fast?) run pytest with parsing
# Meta-Agent Capabilities
You are not just a file writer. You have deep integration with the \
Hive framework:
## Tool Discovery (MANDATORY before designing)
Before designing any agent, run list_agent_tools() to discover all \
available tools. ONLY use tools from this list in your node definitions. \
NEVER guess or fabricate tool names from memory.
list_agent_tools() # names + descriptions
list_agent_tools(output_schema="full") # include input_schema
list_agent_tools(group="gmail") # only gmail_* tools
list_agent_tools("exports/{agent_name}/mcp_servers.json") # specific agent
## Agent Awareness
Run list_agents() to see what agents already exist. Read their code \
for patterns:
read_file("exports/{name}/agent.py")
read_file("exports/{name}/nodes/__init__.py")
## Post-Build Testing
After writing agent code, validate structurally AND run tests:
run_command("python -c 'from {name} import default_agent; \\
print(default_agent.validate())'")
run_agent_tests("{name}")
## Debugging Built Agents
When a user says "my agent is failing" or "debug this agent":
1. list_agent_sessions("{agent_name}") find the session
2. get_agent_session_state("{agent_name}", "{session_id}") see status
3. get_agent_session_memory("{agent_name}", "{session_id}") inspect data
4. list_agent_checkpoints / get_agent_checkpoint trace execution
# Agent Building Workflow
You operate in a continuous loop. The user describes what they want, \
you build it. No rigid phases use judgment. But the general flow is:
## 1. Understand & Qualify (3-5 turns)
This is ONE conversation, not two phases. Discovery and qualification \
happen together. Surface problems as you find them, not in a batch.
**Before your first response**, silently run list_agent_tools() and \
consult the **Framework Reference** appendix. Know what's possible \
before you speak.
### How to respond to the user's first message
**Listen like an architect.** While they talk, hear the structure:
- **The actors**: Who are the people/systems involved?
- **The trigger**: What kicks off the workflow?
- **The core loop**: What's the main thing that happens repeatedly?
- **The output**: What's the valuable thing produced?
- **The pain**: What about today is broken, slow, or missing?
| They say... | You're hearing... |
|-------------|-------------------|
| Nouns they repeat | Your entities |
| Verbs they emphasize | Your core operations |
| Frustrations they mention | Your design constraints |
| Workarounds they describe | What the system must replace |
**Use domain knowledge aggressively.** If they say "research agent," \
you already know it involves search, summarization, source tracking, \
iteration. Don't ask about each — use them as defaults and let their \
specifics override. Merge your general knowledge with their specifics: \
60-80% right before you ask a single question.
### Play back a model WITH qualification baked in
Don't separate "here's what I understood" from "here's what might be \
a problem." Weave them together. Your playback should sound like:
"Here's how I'm picturing this: [concrete proposed solution]. \
The framework handles [X and Y] well for this. [One concern: Z tool \
doesn't exist, so we'd use W instead / Z would need real-time which \
isn't a fit, but we could do polling]. For MVP I'd focus on \
[highest-value thing]. Before I start [1-2 questions]."
If there's a deal-breaker, lead with it: "Before I go further — \
this needs [X] which the framework can't do because [Y]. We could \
[workaround] or reconsider the approach. What do you think?"
**Surface problems immediately. Don't save them for a formal review.**
### Ask only what you CANNOT infer
Every question must earn its place by preventing a costly wrong turn, \
unlocking a shortcut, or surfacing a dealbreaker.
Good questions: "Who's the primary user?", "Is this replacing \
something or net new?", "Does this integrate with anything?"
Bad questions (DON'T ask): "What should happen on error?", "Should \
it have search?", "What tools should I use?" — these are your job.
### Conversation flow
| Turn | Who | What |
|------|-----|------|
| 1 | User | Describes what they need |
| 2 | You | Play back model with concerns baked in. 1-2 questions max. |
| 3 | User | Corrects, confirms, or adds detail |
| 4 | You | Adjust model, confirm scope, move to design |
### Anti-patterns
| Don't | Do instead |
|-------|------------|
| Open with a list of questions | Open with what you understood |
| Separate "assessment" dump | Weave concerns into your playback |
| Good/Bad/Ugly formal section | Mention issues naturally in context |
| Ask about every edge case | Smart defaults, flag in summary |
| 10+ turn discovery | 3-5 turns, then start building |
| Wait for certainty | Start at 80% confidence, iterate |
| Ask what tech/tools to use | Decide, disclose, move on |
## 3. Design
Design the agent architecture:
- Goal: id, name, description, 3-5 success criteria, 2-4 constraints
- Nodes: **2-4 nodes MAXIMUM** (see rules below)
- Edges: on_success for linear, conditional for routing
- Lifecycle: ALWAYS forever-alive (`terminal_nodes=[]`) unless the user \
explicitly requests a one-shot/batch agent. Forever-alive agents loop \
continuously the user exits by closing the TUI. This is the standard \
pattern for all interactive agents.
### Node Design Rules
Each node boundary serializes outputs to shared memory \
and DESTROYS all in-context information (tool results, reasoning, history). \
Use as many nodes as the use case requires, but don't create nodes without \
tools merge them into nodes that do real work.
**MERGE nodes when:**
- Node has NO tools (pure LLM reasoning) merge into predecessor/successor
- Node sets only 1 trivial output collapse into predecessor
- Multiple consecutive autonomous nodes combine into one rich node
- A "report" or "summary" node merge into the client-facing node
- A "confirm" or "schedule" node that calls no external service remove
**SEPARATE nodes only when:**
- Client-facing vs autonomous (different interaction models)
- Fundamentally different tool sets
- Fan-out parallelism (parallel branches MUST be separate)
**Typical patterns (queen manages intake NO client-facing intake node):**
- 2 nodes: `process (autonomous) review (client-facing) process`
- 1 node: `process (autonomous)` simplest; queen handles all interaction
- WRONG: 7 nodes where half have no tools and just do LLM reasoning
- WRONG: Intake node that asks the user for requirements the queen does intake
Read reference agents before designing:
list_agents()
read_file("exports/deep_research_agent/agent.py")
read_file("exports/deep_research_agent/nodes/__init__.py")
Present the design to the user. Lead with a large ASCII graph inside \
a code block so it renders in monospace. Make it visually prominent \
use box-drawing characters and clear flow arrows:
```
process (autonomous)
in: user_request
tools: web_search,
save_data
on_success
review (client-facing)
tools: set_output
on_success
back to process
```
The queen owns intake: she gathers user requirements, then calls \
`run_agent_with_input(task)` with a structured task description. \
When building the agent, design the entry node's `input_keys` to \
match what the queen will provide at run time. No client-facing \
intake node in the worker.
Follow the graph with a brief summary of each node's purpose. \
Get user approval before implementing.
## 4. Implement
Consult the **File Templates** and **Anti-Patterns** appendices below.
Write files in order:
1. mkdir -p exports/{name}/nodes exports/{name}/tests
2. config.py RuntimeConfig + AgentMetadata
3. nodes/__init__.py NodeSpec definitions with system prompts
4. agent.py Goal, edges, graph, agent class
5. __init__.py package exports
6. __main__.py CLI with click
7. mcp_servers.json tool server config
8. tests/ fixtures
### Critical Rules
**Imports** (must match exactly only import what you use):
```python
from framework.graph import (
NodeSpec, EdgeSpec, EdgeCondition,
Goal, SuccessCriterion, Constraint,
)
from framework.graph.edge import GraphSpec
from framework.graph.executor import ExecutionResult
from framework.graph.checkpoint_config import CheckpointConfig
from framework.llm import LiteLLMProvider
from framework.runner.tool_registry import ToolRegistry
from framework.runtime.agent_runtime import (
AgentRuntime, create_agent_runtime,
)
from framework.runtime.execution_stream import EntryPointSpec
```
For agents with async entry points (timers, webhooks, events), also add:
```python
from framework.graph.edge import GraphSpec, AsyncEntryPointSpec
from framework.runtime.agent_runtime import (
AgentRuntime, AgentRuntimeConfig, create_agent_runtime,
)
```
NEVER `from core.framework...` PYTHONPATH includes core/.
**__init__.py MUST re-export ALL module-level variables** \
(THIS IS THE #1 SOURCE OF AGENT LOAD FAILURES):
The runner imports the package (__init__.py), NOT agent.py. It reads \
goal, nodes, edges, entry_node, entry_points, pause_nodes, \
terminal_nodes, conversation_mode, identity_prompt, loop_config via \
getattr(). If ANY are missing from __init__.py, they silently default \
to None or {} causing "must define goal, nodes, edges" or "node X \
is unreachable" errors. The __init__.py MUST import and re-export \
ALL of these from .agent:
```python
from .agent import (
MyAgent, default_agent, goal, nodes, edges,
entry_node, entry_points, pause_nodes, terminal_nodes,
conversation_mode, identity_prompt, loop_config,
)
```
**entry_points**: `{"start": "first-node-id"}`
The first node should be an autonomous processing node (NOT a \
client-facing intake). For agents with multiple entry points, \
add them: `{"start": "process", "reminder": "check"}`
**conversation_mode** ONLY two valid values:
- `"continuous"` recommended for interactive agents (context carries \
across node transitions)
- Omit entirely for isolated per-node conversations
NEVER use: "client_facing", "interactive", "adaptive", or any other \
value. These DO NOT EXIST.
**loop_config** ONLY three valid keys:
```python
loop_config = {
"max_iterations": 100,
"max_tool_calls_per_turn": 30,
"max_history_tokens": 32000,
}
```
NEVER add: "strategy", "mode", "timeout", or other keys.
**mcp_servers.json**:
```json
{
"hive-tools": {
"transport": "stdio",
"command": "uv",
"args": ["run", "python", "mcp_server.py", "--stdio"],
"cwd": "../../tools"
}
}
```
NO "mcpServers" wrapper. cwd "../../tools". command "uv".
**Storage**: `Path.home() / ".hive" / "agents" / "{name}"`
**Client-facing system prompts** (review/approval nodes only, NOT intake) \
STEP 1/STEP 2 pattern:
```
STEP 1 Present to user (text only, NO tool calls):
[instructions]
STEP 2 After user responds, call set_output:
[set_output calls]
```
The queen manages intake. Workers should NOT have a client-facing node \
that asks for requirements. Use client_facing=True only for review or \
approval checkpoints mid-execution.
**Autonomous system prompts** set_output in SEPARATE turn.
**Tools** NEVER fabricate tool names. Common hallucinations: \
csv_read, csv_write, csv_append, file_upload, database_query. \
If list_agent_tools() shows these don't exist, use alternatives \
(e.g. save_data/load_data for data persistence).
**Node rules**:
- **NO intake nodes.** The queen owns intake. She defines the entry \
node's input_keys at build time and fills them via \
`run_agent_with_input(task)` at run time.
- Don't abuse nodes without tools — merge them into a node that does work.
- A node with 0 tools is NOT a real node merge it.
- node_type "event_loop" for all regular graph nodes. Use "gcu" ONLY for
browser automation subagents (see GCU appendix). GCU nodes MUST be in a
parent node's sub_agents list, NEVER connected via edges, and NEVER used
as entry/terminal nodes.
- max_node_visits default is 0 (unbounded) correct for forever-alive. \
Only set >0 in one-shot agents with bounded feedback loops.
- Feedback inputs: nullable_output_keys
- terminal_nodes=[] for forever-alive (the default)
- Every node MUST have at least one outgoing edge (no dead ends)
- Agents are forever-alive unless user explicitly asks for one-shot
**Agent class**: CamelCase name, default_agent at module level. \
Constructor takes `config=None`. Follow the exact pattern in \
file_templates.md do NOT invent constructor params like \
`llm_provider` or `tool_registry`.
**Module-level variables** (read by AgentRunner.load()):
goal, nodes, edges, entry_node, entry_points, pause_nodes,
terminal_nodes, conversation_mode, identity_prompt, loop_config
For agents with async triggers, also export:
async_entry_points, runtime_config
**Async entry points** (timers, webhooks, events):
When an agent needs scheduled tasks, webhook reactions, or event-driven \
triggers, use `AsyncEntryPointSpec` (from framework.graph.edge) and \
`AgentRuntimeConfig` (from framework.runtime.agent_runtime):
- Timer (cron): `trigger_type="timer"`, \
`trigger_config={"cron": "0 9 * * *"}` standard 5-field cron expression \
(e.g. `"0 9 * * MON-FRI"` weekdays 9am, `"*/30 * * * *"` every 30 min)
- Timer (interval): `trigger_type="timer"`, \
`trigger_config={"interval_minutes": 20, "run_immediately": False}`
- Event (for webhooks): `trigger_type="event"`, \
`trigger_config={"event_types": ["webhook_received"]}`
- `isolation_level="shared"` so async runs can read primary session memory
- `runtime_config = AgentRuntimeConfig(webhook_routes=[...])` for HTTP webhooks
- Reference: `exports/gmail_inbox_guardian/agent.py`
- Full docs: see **Framework Reference** appendix (Async Entry Points section)
## 5. Verify
Run FOUR validation steps after writing. All must pass:
**Step A Class validation** (checks graph structure):
```
run_command("python -c 'from {name} import default_agent; \\
print(default_agent.validate())'")
```
**Step B Runner load test** (checks package export contract \
THIS IS THE SAME PATH THE TUI USES):
```
run_command("python -c 'from framework.runner.runner import \\
AgentRunner; r = AgentRunner.load(\"exports/{name}\"); \\
print(\"AgentRunner.load: OK\")'")
```
This catches missing __init__.py exports, bad conversation_mode, \
invalid loop_config, and unreachable nodes. If Step A passes but \
Step B fails, the problem is in __init__.py exports.
**Step C Tool validation** (checks that declared tools actually exist \
in the agent's MCP servers — catches hallucinated tool names):
```
validate_agent_tools("exports/{name}")
```
If any tools are missing: fix the node definitions to use only tools \
that exist. Run list_agent_tools() to see what's available.
**Step D Run tests:**
```
run_agent_tests("{name}")
```
If anything fails: read error, fix with edit_file, re-validate. Up to 3x.
**CRITICAL: Testing forever-alive agents**
Most agents use `terminal_nodes=[]` (forever-alive). This means \
`runner.run()` NEVER returns it hangs forever waiting for a \
terminal node that doesn't exist. Agent tests MUST be structural:
- Validate graph, node specs, edges, tools, prompts
- Check goal/constraints/success criteria definitions
- Test `AgentRunner.load()` succeeds (structural, no API key needed)
- NEVER call `runner.run()` or `trigger_and_wait()` in tests for \
forever-alive agents they will hang and time out.
When you restructure an agent (change nodes/edges), always update \
the tests to match. Stale tests referencing old node names will fail.
## 6. Present
Show the user what you built: agent name, goal summary, graph (same \
ASCII style as Design), files created, validation status. Offer to \
revise or build another.
"""
# ---------------------------------------------------------------------------
# Coder-specific: set_output after presentation + standalone phase 7
# ---------------------------------------------------------------------------
_coder_completion = """
After user confirms satisfaction:
set_output("agent_name", "the_agent_name")
set_output("validation_result", "valid")
If building another agent, just start the loop again no need to \
set_output until the user is done.
## 7. Live Test (optional)
After the user approves, offer to load and run the agent in-session.
If running with a queen (server/frontend):
```
load_built_agent("exports/{name}") # loads as the session worker
```
The frontend updates automatically the user sees the agent's graph, \
the tab renames, and you can delegate via start_worker(task).
If running standalone (TUI):
```
load_agent("exports/{name}") # registers as secondary graph
start_agent("{name}") # triggers default entry point
```
"""
# ---------------------------------------------------------------------------
# Queen-specific: extra tool docs, behavior, phase 7, style
# ---------------------------------------------------------------------------
_queen_tools_docs = """
## Operating Modes
You operate in one of three modes. Your available tools change based on the \
mode. The system notifies you when a mode change occurs.
### BUILDING mode (default)
You have full coding tools for building and modifying agents:
- File I/O: read_file, write_file, edit_file, list_directory, search_files, \
run_command, undo_changes
- Meta-agent: list_agent_tools, validate_agent_tools, \
list_agents, list_agent_sessions, get_agent_session_state, get_agent_session_memory, \
list_agent_checkpoints, get_agent_checkpoint, run_agent_tests
- load_built_agent(agent_path) Load the agent and switch to STAGING mode
- list_credentials(credential_id?) List authorized credentials
When you finish building an agent, call load_built_agent(path) to stage it.
### STAGING mode (agent loaded, not yet running)
The agent is loaded and ready to run. You can inspect it and launch it:
- Read-only: read_file, list_directory, search_files, run_command
- list_credentials(credential_id?) Verify credentials are configured
- get_worker_status() Check the loaded worker
- run_agent_with_input(task) Start the worker and switch to RUNNING mode
- stop_worker_and_edit() Go back to BUILDING mode
In STAGING mode you do NOT have write tools. If you need to modify the agent, \
call stop_worker_and_edit() to go back to BUILDING mode.
### RUNNING mode (worker is executing)
The worker is running. You have monitoring and lifecycle tools:
- Read-only: read_file, list_directory, search_files, run_command
- get_worker_status() Check worker status (idle, running, waiting)
- inject_worker_message(content) Send a message to the running worker
- get_worker_health_summary() Read the latest health data
- notify_operator(ticket_id, analysis, urgency) Alert the user (use sparingly)
- stop_worker() Stop the worker and return to STAGING mode, then ask the user what to do next
- stop_worker_and_edit() Stop the worker and switch back to BUILDING mode
In RUNNING mode you do NOT have write tools or agent construction tools. \
If you need to modify the agent, call stop_worker_and_edit() to switch back \
to BUILDING mode. To stop the worker and ask the user what to do next, call \
stop_worker() to return to STAGING mode.
### Mode transitions
- load_built_agent(path) switches to STAGING mode
- run_agent_with_input(task) starts worker, switches to RUNNING mode
- stop_worker() stops worker, switches to STAGING mode (ask user: re-run or edit?)
- stop_worker_and_edit() stops worker (if running), switches to BUILDING mode
"""
_queen_behavior = """
# Behavior
## CRITICAL RULE — ask_user tool
Every response that ends with a question, a prompt, or expects user \
input MUST finish with a call to ask_user(prompt, options). This is \
NON-NEGOTIABLE. The system CANNOT detect that you are waiting for \
input unless you call ask_user. You MUST call ask_user as the LAST \
action in your response.
NEVER end a response with a question in text without calling ask_user. \
NEVER rely on the user seeing your text and replying call ask_user.
Always provide 2-4 short options that cover the most likely answers. \
The user can always type a custom response.
Examples:
- ask_user("What do you need?",
["Build a new agent", "Run the loaded worker", "Help with code"])
- ask_user("Which pattern?",
["Simple 2-node", "Rich with feedback", "Custom"])
- ask_user("Ready to proceed?",
["Yes, go ahead", "Let me change something"])
## Greeting and identity
When the user greets you or asks what you can do, respond concisely \
(under 10 lines). DO NOT list internal processes. Focus on:
1. Direct capabilities: coding, agent building & debugging.
2. What the loaded worker does (one sentence from Worker Profile). \
If no worker is loaded, say so.
3. THEN call ask_user to prompt them do NOT just write text.
## Direct coding
You can do any coding task directly reading files, writing code, running \
commands, building agents, debugging. For quick tasks, do them yourself.
## Worker delegation
The worker is a specialized agent (see Worker Profile at the end of this \
prompt). It can ONLY do what its goal and tools allow.
**Decision rule read the Worker Profile first:**
- The user's request directly matches the worker's goal use \
run_agent_with_input(task) (if in staging) or load then run (if in building)
- Anything else do it yourself. Do NOT reframe user requests into \
subtasks to justify delegation.
- Building, modifying, or configuring agents is ALWAYS your job. Never \
delegate agent construction to the worker, even as a "research" subtask.
## When the user says "run", "execute", or "start" (without specifics)
The loaded worker is described in the Worker Profile below. You MUST \
ask the user what task or input they want using ask_user do NOT \
invent a task, do NOT call list_agents() or list directories. \
The worker is already loaded. Just ask for the specific input the \
worker needs (e.g., a research topic, a target domain, a job description). \
NEVER call run_agent_with_input until the user has provided their input.
If NO worker is loaded, say so and offer to build one.
## When in staging mode (agent loaded, not running):
- Tell the user the agent is loaded and ready.
- For tasks matching the worker's goal: ALWAYS ask the user for their \
specific input BEFORE calling run_agent_with_input(task). NEVER make up \
or assume what the user wants. Use ask_user to collect the task details \
(e.g., topic, target, requirements). Once you have the user's answer, \
compose a structured task description from their input and call \
run_agent_with_input(task). The worker has no intake node it receives \
your task and starts processing.
- If the user wants to modify the agent, call stop_worker_and_edit().
## When idle (worker not running):
- Greet the user. Mention what the worker can do in one sentence.
- For tasks matching the worker's goal, use run_agent_with_input(task) \
(if in staging) or load the agent first (if in building).
- For everything else, do it directly.
## When the user clicks Run (external event notification)
When you receive an event that the user clicked Run:
- If the worker started successfully, briefly acknowledge it do NOT \
repeat the full status. The user can see the graph is running.
- If the worker failed to start (credential or structural error), \
explain the problem clearly and help fix it. For credential errors, \
guide the user to set up the missing credentials. For structural \
issues, offer to fix the agent graph directly.
## When worker is running — GO SILENT
Once you call start_worker(), your job is DONE. Do NOT call ask_user, \
do NOT call get_worker_status(), do NOT emit any text. Just stop. \
The worker owns the conversation now it has its own client-facing \
nodes that talk to the user directly.
**After start_worker, your ENTIRE response should be ONE short \
confirmation sentence with NO tool calls.** Example: \
"Started the vulnerability assessment." that's it. No ask_user, \
no get_worker_status, no follow-up questions.
You only wake up again when:
- The user explicitly addresses you (not answering a worker question)
- A worker question is forwarded to you for relay
- An escalation ticket arrives from the judge
- The worker finishes
If the user explicitly asks about progress, call get_worker_status() \
ONCE and report. Do NOT poll or check proactively.
For escalation tickets: low/transient acknowledge silently. \
High/critical notify the user with a brief analysis.
## When the worker asks the user a question:
- The user's answer is routed to you with context: \
[Worker asked: "...", Options: ...] User answered: "...".
- If the user is answering the worker's question normally, relay it \
using inject_worker_message(answer_text). Then go silent again.
- If the user is rejecting the approach, asking to stop, or giving \
you an instruction, handle it yourself do NOT relay.
## Showing or describing the loaded worker
When the user asks to "show the graph", "describe the agent", or \
"re-generate the graph", read the Worker Profile and present the \
worker's current architecture as an ASCII diagram. Use the processing \
stages, tools, and edges from the loaded worker. Do NOT enter the \
agent building workflow you are describing what already exists, not \
building something new.
## Modifying the loaded worker
When the user asks to change, modify, or update the loaded worker \
(e.g., "change the report node", "add a node", "delete node X"):
1. Call stop_worker_and_edit() this stops the worker and gives you \
coding tools (switches to BUILDING mode).
2. Use the **Path** from the Worker Profile to locate the agent files.
3. Read the relevant files (nodes/__init__.py, agent.py, etc.).
4. Make the requested changes using edit_file / write_file.
5. Run validation (default_agent.validate(), AgentRunner.load(), \
validate_agent_tools()).
6. **Reload the modified worker**: call load_built_agent("{path}") \
so the changes take effect immediately (switches to STAGING mode). \
Then call run_agent_with_input(task) to restart execution.
Do NOT skip step 6 without reloading, the user will still be \
interacting with the old version.
"""
_queen_phase_7 = """
## 7. Load into Session
After building and verifying, load the agent into the current session:
load_built_agent("exports/{name}")
This switches to STAGING mode the user sees the agent's graph and \
the tab name updates. Then call run_agent_with_input(task) to start it. \
Do NOT tell the user to run `python -m {name} run` load and run it here.
"""
_queen_style = """
# Style
- Concise. No fluff. Direct. No emojis.
- **One phase per response.** Stop after each phase and get user \
confirmation before moving on. Never combine understand + design + \
implement in one response.
- When starting the worker, describe what you told it in one sentence.
- When an escalation arrives, lead with severity and recommended action.
"""
# ---------------------------------------------------------------------------
# Node definitions
# ---------------------------------------------------------------------------
# Single node — like opencode's while(true) loop.
# One continuous context handles the entire workflow:
# discover → design → implement → verify → present → iterate.
coder_node = NodeSpec(
id="coder",
name="Hive Coder",
description=(
"Autonomous coding agent that builds Hive agent packages. "
"Handles the full lifecycle: understanding user intent, "
"designing architecture, writing code, validating, and "
"iterating on feedback — all in one continuous conversation."
),
node_type="event_loop",
client_facing=True,
max_node_visits=0,
input_keys=["user_request"],
output_keys=["agent_name", "validation_result"],
success_criteria=(
"A complete, validated Hive agent package exists at "
"exports/{agent_name}/ and passes structural validation."
),
tools=_SHARED_TOOLS
+ [
# Graph lifecycle tools (multi-graph sessions)
"load_agent",
"unload_agent",
"start_agent",
"restart_agent",
"get_user_presence",
],
system_prompt=(
"You are Hive Coder, the best agent-building coding agent. You build "
"production-ready Hive agent packages from natural language.\n"
+ _agent_builder_knowledge
+ _coder_completion
+ _appendices
),
)
ticket_triage_node = NodeSpec(
id="ticket_triage",
name="Ticket Triage",
description=(
"Queen's triage node. Receives an EscalationTicket from the Health Judge "
"via event-driven entry point and decides: dismiss or notify the operator."
),
node_type="event_loop",
client_facing=True, # Operator can chat with queen once connected (Ctrl+Q)
max_node_visits=0,
input_keys=["ticket"],
output_keys=["intervention_decision"],
nullable_output_keys=["intervention_decision"],
success_criteria=(
"A clear intervention decision: either dismissed with documented reasoning, "
"or operator notified via notify_operator with specific analysis."
),
tools=["notify_operator"],
system_prompt="""\
You are the Queen (Hive Coder). The Worker Health Judge has escalated a worker \
issue to you. The ticket is in your memory under key "ticket". Read it carefully.
## Dismiss criteria — do NOT call notify_operator:
- severity is "low" AND steps_since_last_accept < 8
- Cause is clearly a transient issue (single API timeout, brief stall that \
self-resolved based on the evidence)
- Evidence shows the agent is making real progress despite bad verdicts
## Intervene criteria — call notify_operator:
- severity is "high" or "critical"
- steps_since_last_accept >= 10 with no sign of recovery
- stall_minutes > 4 (worker definitively stuck)
- Evidence shows a doom loop (same error, same tool, no progress)
- Cause suggests a logic bug, missing configuration, or unrecoverable state
## When intervening:
Call notify_operator with:
ticket_id: <ticket["ticket_id"]>
analysis: "<2-3 sentences: what is wrong, why it matters, suggested action>"
urgency: "<low|medium|high|critical>"
## After deciding:
set_output("intervention_decision", "dismissed: <reason>" or "escalated: <summary>")
Be conservative but not passive. You are the last quality gate before the human \
is disturbed. One unnecessary alert is less costly than alert fatigue but \
genuine stuck agents must be caught.
""",
)
ALL_QUEEN_TRIAGE_TOOLS = ["notify_operator"]
queen_node = NodeSpec(
id="queen",
name="Queen",
description=(
"User's primary interactive interface with full coding capability. "
"Can build agents directly or delegate to the worker. Manages the "
"worker agent lifecycle and triages health escalations from the judge."
),
node_type="event_loop",
client_facing=True,
max_node_visits=0,
input_keys=["greeting"],
output_keys=[],
nullable_output_keys=[],
success_criteria=(
"User's intent is understood, coding tasks are completed correctly, "
"and the worker is managed effectively when delegated to."
),
tools=sorted(set(_QUEEN_BUILDING_TOOLS + _QUEEN_STAGING_TOOLS + _QUEEN_RUNNING_TOOLS)),
system_prompt=(
"You are the Queen — the user's primary interface. You are a coding agent "
"with the same capabilities as the Hive Coder worker, PLUS the ability to "
"manage the worker's lifecycle.\n"
+ _agent_builder_knowledge
+ _queen_tools_docs
+ _queen_behavior
+ _queen_phase_7
+ _queen_style
+ _appendices
),
)
ALL_QUEEN_TOOLS = sorted(set(_QUEEN_BUILDING_TOOLS + _QUEEN_STAGING_TOOLS + _QUEEN_RUNNING_TOOLS))
__all__ = [
"coder_node",
"ticket_triage_node",
"queen_node",
"ALL_QUEEN_TRIAGE_TOOLS",
"ALL_QUEEN_TOOLS",
"_QUEEN_BUILDING_TOOLS",
"_QUEEN_STAGING_TOOLS",
"_QUEEN_RUNNING_TOOLS",
]
@@ -0,0 +1,113 @@
# Common Mistakes When Building Hive Agents
## Critical Errors
1. **Using tools that don't exist** — Always verify tools are available in the hive-tools MCP server before assigning them to nodes. Never guess tool names.
2. **Wrong entry_points format** — MUST be `{"start": "first-node-id"}`. NOT a set, NOT `{node_id: [keys]}`.
3. **Wrong mcp_servers.json format** — Flat dict (no `"mcpServers"` wrapper). `cwd` must be `"../../tools"`. `command` must be `"uv"` with args `["run", "python", ...]`.
4. **Missing STEP 1/STEP 2 in client-facing prompts** — Without explicit phases, the LLM calls set_output before the user responds. Always use the pattern.
5. **Forgetting nullable_output_keys** — When a node receives inputs from multiple edges and some inputs only arrive on certain edges (e.g., feedback), mark those as nullable. Without this, the executor blocks waiting for a value that will never arrive.
6. **Creating dead-end nodes in forever-alive graphs** — Every node must have at least one outgoing edge. A node with no outgoing edges ends the execution, breaking the loop.
7. **Setting max_node_visits to a non-zero value in forever-alive agents** — The framework default is `max_node_visits=0` (unbounded). Setting it to any positive value (e.g., 1) means the node stops executing after that many visits, silently breaking the forever-alive loop. Only set `max_node_visits > 0` in one-shot agents with feedback loops that need bounded retries.
7. **Missing module-level exports in `__init__.py`** — The runner loads agents via `importlib.import_module(package_name)`, which imports `__init__.py`. It then reads `goal`, `nodes`, `edges`, `entry_node`, `entry_points`, `pause_nodes`, `terminal_nodes`, `conversation_mode`, `identity_prompt`, `loop_config` via `getattr()`. If ANY of these are missing from `__init__.py`, they default to `None` or `{}` — causing "must define goal, nodes, edges" errors or "node X is unreachable" validation failures. **ALL module-level variables from agent.py must be re-exported in `__init__.py`.**
## Value Errors
8. **Invalid `conversation_mode` value** — Only two valid values: `"continuous"` (recommended for interactive agents) or omit entirely (for isolated per-node conversations). Values like `"client_facing"`, `"interactive"`, `"adaptive"` do NOT exist and will cause runtime errors.
9. **Invalid `loop_config` keys** — Only three valid keys: `max_iterations` (int), `max_tool_calls_per_turn` (int), `max_history_tokens` (int). Keys like `"strategy"`, `"mode"`, `"timeout"` are NOT valid and are silently ignored or cause errors.
10. **Fabricating tools that don't exist** — Never guess tool names. Always verify via `list_agent_tools()` before designing and `validate_agent_tools()` after building. Common hallucinations: `csv_read`, `csv_write`, `csv_append`, `file_upload`, `database_query`, `bulk_fetch_emails`. If a required tool doesn't exist, redesign the agent to use tools that DO exist (e.g., `save_data`/`load_data` for data persistence).
## Design Errors
11. **Too many thin nodes** — Hard limit: **2-4 nodes** for most agents. Each node boundary serializes outputs to shared memory and loses all in-context information (tool results, intermediate reasoning, conversation history). A node with 0 tools that just does LLM reasoning is NOT a real node — merge it into its predecessor or successor.
**Merge when:**
- Node has NO tools — pure LLM reasoning belongs in the node that produces or consumes its data
- Node sets only 1 trivial output (e.g., `set_output("done", "true")`) — collapse into predecessor
- Multiple consecutive autonomous nodes with same/similar tools — combine into one
- A "report" or "summary" node that just presents analysis — merge into the client-facing node
- A "schedule" or "confirm" node that doesn't actually schedule anything — remove entirely
**Keep separate when:**
- Client-facing vs autonomous — different interaction models require separate nodes
- Fundamentally different tool sets (e.g., web search vs file I/O)
- Fan-out parallelism — parallel branches MUST be separate nodes
**Bad example** (7 nodes — WAY too many):
```
profile_setup → daily_intake → update_tracker → analyze_progress → generate_plan → schedule_reminders → report
```
`analyze_progress` has no tools. `schedule_reminders` just sets one boolean. `report` just presents analysis. `update_tracker` and `generate_plan` are sequential autonomous work.
**Good example** (2 nodes):
```
process (autonomous: track + analyze + plan) → review (client-facing) → process (loop back)
```
The queen handles intake (gathering requirements from the user) and passes the task via `run_agent_with_input(task)`. One autonomous node handles ALL backend work (CSV update, analysis, plan generation) with tools and context preserved. One client-facing node handles review/approval when needed.
12. **Adding framework gating for LLM behavior** — Don't add output rollback, premature rejection, or interaction protocol injection. Fix with better prompts or custom judges.
13. **Not using continuous conversation mode** — Interactive agents should use `conversation_mode="continuous"`. Without it, each node starts with blank context.
14. **Adding terminal nodes by default** — ALL agents should use `terminal_nodes=[]` (forever-alive) unless the user explicitly requests a one-shot/batch agent. Forever-alive is the standard pattern. Every node must have at least one outgoing edge. Dead-end nodes break the loop.
15. **Calling set_output in same turn as tool calls** — Instruct the LLM to call set_output in a SEPARATE turn from real tool calls.
## File Template Errors
16. **Wrong import paths** — Use `from framework.graph import ...`, NOT `from core.framework.graph import ...`. The PYTHONPATH includes `core/`.
17. **Missing storage path** — Agent class must set `self._storage_path = Path.home() / ".hive" / "agents" / "agent_name"`.
18. **Missing mcp_servers.json** — Without this, the agent has no tools at runtime.
19. **Bare `python` command in mcp_servers.json** — Use `"command": "uv"` with args `["run", "python", ...]`.
## Testing Errors
20. **Using `runner.run()` on forever-alive agents**`runner.run()` calls `trigger_and_wait()` which blocks until the graph reaches a terminal node. Forever-alive agents have `terminal_nodes=[]`, so **`runner.run()` hangs forever**. This is the #1 cause of stuck test suites.
**For forever-alive agents, write structural tests instead:**
- Validate graph structure (nodes, edges, entry points)
- Verify node specs (tools, prompts, client-facing flag)
- Check goal/constraints/success criteria definitions
- Test that `AgentRunner.load()` succeeds (structural, no API key needed)
**What NOT to do:**
```python
# WRONG — hangs forever on forever-alive agents
result = await runner.run({"topic": "quantum computing"})
```
**Correct pattern for structure tests:**
```python
def test_research_has_web_tools(self):
assert "web_search" in research_node.tools
def test_research_routes_back_to_interact(self):
edges_to_interact = [e for e in edges if e.source == "research" and e.target == "interact"]
assert edges_to_interact
```
21. **Stale tests after agent restructuring** — When you change an agent's node count or names (e.g., 4 nodes → 2 nodes), the tests MUST be updated too. Tests referencing old node names (e.g., `"review"`, `"report"`) will fail or hang. Always check that test assertions match the current `nodes/__init__.py`.
22. **Running full integration tests without API keys** — Structural tests (validate, import) work without keys. Full integration tests need `ANTHROPIC_API_KEY`. Use `pytest.skip()` in the runner fixture when `_setup()` fails due to missing credentials.
23. **Forgetting sys.path setup in conftest.py** — Tests need `exports/` and `core/` on sys.path.
24. **Not using auto_responder for client-facing nodes** — Tests with client-facing nodes hang without an auto-responder that injects input. But note: even WITH auto_responder, forever-alive agents still hang because the graph never terminates. Auto-responder only helps for agents with terminal nodes.
25. **Manually wiring browser tools on event_loop nodes** — If the agent needs browser automation, use `node_type="gcu"` which auto-includes all browser tools and prepends best-practices guidance. Do NOT manually list browser tool names on event_loop nodes — they may not exist in the MCP server or may be incomplete. See the GCU Guide appendix.
26. **Using GCU nodes as regular graph nodes** — GCU nodes (`node_type="gcu"`) are exclusively subagents. They must ONLY appear in a parent node's `sub_agents=["gcu-node-id"]` list and be invoked via `delegate_to_sub_agent()`. They must NEVER be connected via edges, used as entry nodes, or used as terminal nodes. If a GCU node appears as an edge source or target, the graph will fail pre-load validation.
27. **Adding a client-facing intake node to worker agents** — The queen owns intake. She defines the entry node's `input_keys` at build time and fills them via `run_agent_with_input(task)` at run time. Worker agents should start with an autonomous processing node, NOT a client-facing intake node that asks the user for requirements. Client-facing nodes in workers are for mid-execution review/approval only.
@@ -0,0 +1,567 @@
# Agent File Templates
Complete code templates for each file in a Hive agent package.
## config.py
```python
"""Runtime configuration."""
import json
from dataclasses import dataclass, field
from pathlib import Path
def _load_preferred_model() -> str:
"""Load preferred model from ~/.hive/configuration.json."""
config_path = Path.home() / ".hive" / "configuration.json"
if config_path.exists():
try:
with open(config_path) as f:
config = json.load(f)
llm = config.get("llm", {})
if llm.get("provider") and llm.get("model"):
return f"{llm['provider']}/{llm['model']}"
except Exception:
pass
return "anthropic/claude-sonnet-4-20250514"
@dataclass
class RuntimeConfig:
model: str = field(default_factory=_load_preferred_model)
temperature: float = 0.7
max_tokens: int = 40000
api_key: str | None = None
api_base: str | None = None
default_config = RuntimeConfig()
@dataclass
class AgentMetadata:
name: str = "My Agent Name"
version: str = "1.0.0"
description: str = "What this agent does."
intro_message: str = "Welcome! What would you like me to do?"
metadata = AgentMetadata()
```
## nodes/__init__.py
```python
"""Node definitions for My Agent."""
from framework.graph import NodeSpec
# Node 1: Process (autonomous entry node)
# The queen handles intake and passes structured input via
# run_agent_with_input(task). NO client-facing intake node.
# The queen defines input_keys at build time and fills them at run time.
process_node = NodeSpec(
id="process",
name="Process",
description="Execute the task using available tools",
node_type="event_loop",
max_node_visits=0, # Unlimited for forever-alive
input_keys=["user_request", "feedback"],
output_keys=["results"],
nullable_output_keys=["feedback"], # Only on feedback edge
success_criteria="Results are complete and accurate.",
system_prompt="""\
You are a processing agent. Your task is in memory under "user_request". \
If "feedback" is present, this is a revision — address the feedback.
Work in phases:
1. Use tools to gather/process data
2. Analyze results
3. Call set_output in a SEPARATE turn:
- set_output("results", "structured results")
""",
tools=["web_search", "web_scrape", "save_data", "load_data", "list_data_files"],
)
# Node 3: Review (client-facing)
review_node = NodeSpec(
id="review",
name="Review",
description="Present results for user approval",
node_type="event_loop",
client_facing=True,
max_node_visits=0,
input_keys=["results", "user_request"],
output_keys=["next_action", "feedback"],
nullable_output_keys=["feedback"],
success_criteria="User has reviewed and decided next steps.",
system_prompt="""\
Present the results to the user.
**STEP 1 — Present (text only, NO tool calls):**
1. Summary of work done
2. Key results
3. Ask: satisfied, or want changes?
**STEP 2 — After user responds, call set_output:**
- set_output("next_action", "done") — if satisfied
- set_output("next_action", "revise") — if changes needed
- set_output("feedback", "what to change") — only if revising
""",
tools=[],
)
__all__ = ["process_node", "review_node"]
```
## agent.py
```python
"""Agent graph construction for My Agent."""
from pathlib import Path
from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
from framework.graph.edge import GraphSpec
from framework.graph.executor import ExecutionResult
from framework.graph.checkpoint_config import CheckpointConfig
from framework.llm import LiteLLMProvider
from framework.runner.tool_registry import ToolRegistry
from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
from framework.runtime.execution_stream import EntryPointSpec
from .config import default_config, metadata
from .nodes import process_node, review_node
# Goal definition
goal = Goal(
id="my-agent-goal",
name="My Agent Goal",
description="What this agent achieves.",
success_criteria=[
SuccessCriterion(id="sc-1", description="...", metric="...", target="...", weight=0.5),
SuccessCriterion(id="sc-2", description="...", metric="...", target="...", weight=0.5),
],
constraints=[
Constraint(id="c-1", description="...", constraint_type="hard", category="quality"),
],
)
# Node list
nodes = [process_node, review_node]
# Edge definitions
edges = [
EdgeSpec(id="process-to-review", source="process", target="review",
condition=EdgeCondition.ON_SUCCESS, priority=1),
# Feedback loop — revise results
EdgeSpec(id="review-to-process", source="review", target="process",
condition=EdgeCondition.CONDITIONAL,
condition_expr="str(next_action).lower() == 'revise'", priority=2),
# Loop back for next task (queen sends new input)
EdgeSpec(id="review-done", source="review", target="process",
condition=EdgeCondition.CONDITIONAL,
condition_expr="str(next_action).lower() == 'done'", priority=1),
]
# Graph configuration — entry is the autonomous process node
# The queen handles intake and passes the task via run_agent_with_input(task)
entry_node = "process"
entry_points = {"start": "process"}
pause_nodes = []
terminal_nodes = [] # Forever-alive
# Module-level vars read by AgentRunner.load()
conversation_mode = "continuous"
identity_prompt = "You are a helpful agent."
loop_config = {"max_iterations": 100, "max_tool_calls_per_turn": 20, "max_history_tokens": 32000}
class MyAgent:
def __init__(self, config=None):
self.config = config or default_config
self.goal = goal
self.nodes = nodes
self.edges = edges
self.entry_node = entry_node # "process" — autonomous entry
self.entry_points = entry_points
self.pause_nodes = pause_nodes
self.terminal_nodes = terminal_nodes
self._graph = None
self._agent_runtime = None
self._tool_registry = None
self._storage_path = None
def _build_graph(self):
return GraphSpec(
id="my-agent-graph",
goal_id=self.goal.id,
version="1.0.0",
entry_node=self.entry_node,
entry_points=self.entry_points,
terminal_nodes=self.terminal_nodes,
pause_nodes=self.pause_nodes,
nodes=self.nodes,
edges=self.edges,
default_model=self.config.model,
max_tokens=self.config.max_tokens,
loop_config=loop_config,
conversation_mode=conversation_mode,
identity_prompt=identity_prompt,
)
def _setup(self):
self._storage_path = Path.home() / ".hive" / "agents" / "my_agent"
self._storage_path.mkdir(parents=True, exist_ok=True)
self._tool_registry = ToolRegistry()
mcp_config = Path(__file__).parent / "mcp_servers.json"
if mcp_config.exists():
self._tool_registry.load_mcp_config(mcp_config)
llm = LiteLLMProvider(model=self.config.model, api_key=self.config.api_key, api_base=self.config.api_base)
tools = list(self._tool_registry.get_tools().values())
tool_executor = self._tool_registry.get_executor()
self._graph = self._build_graph()
self._agent_runtime = create_agent_runtime(
graph=self._graph, goal=self.goal, storage_path=self._storage_path,
entry_points=[EntryPointSpec(id="default", name="Default", entry_node=self.entry_node,
trigger_type="manual", isolation_level="shared")],
llm=llm, tools=tools, tool_executor=tool_executor,
checkpoint_config=CheckpointConfig(enabled=True, checkpoint_on_node_complete=True,
checkpoint_max_age_days=7, async_checkpoint=True),
)
async def start(self):
if self._agent_runtime is None:
self._setup()
if not self._agent_runtime.is_running:
await self._agent_runtime.start()
async def stop(self):
if self._agent_runtime and self._agent_runtime.is_running:
await self._agent_runtime.stop()
self._agent_runtime = None
async def trigger_and_wait(self, entry_point="default", input_data=None, timeout=None, session_state=None):
if self._agent_runtime is None:
raise RuntimeError("Agent not started. Call start() first.")
return await self._agent_runtime.trigger_and_wait(
entry_point_id=entry_point, input_data=input_data or {}, session_state=session_state)
async def run(self, context, session_state=None):
await self.start()
try:
result = await self.trigger_and_wait("default", context, session_state=session_state)
return result or ExecutionResult(success=False, error="Execution timeout")
finally:
await self.stop()
def info(self):
return {
"name": metadata.name, "version": metadata.version, "description": metadata.description,
"goal": {"name": self.goal.name, "description": self.goal.description},
"nodes": [n.id for n in self.nodes], "edges": [e.id for e in self.edges],
"entry_node": self.entry_node, "entry_points": self.entry_points,
"terminal_nodes": self.terminal_nodes,
"client_facing_nodes": [n.id for n in self.nodes if n.client_facing],
}
def validate(self):
errors, warnings = [], []
node_ids = {n.id for n in self.nodes}
for e in self.edges:
if e.source not in node_ids: errors.append(f"Edge {e.id}: source '{e.source}' not found")
if e.target not in node_ids: errors.append(f"Edge {e.id}: target '{e.target}' not found")
if self.entry_node not in node_ids: errors.append(f"Entry node '{self.entry_node}' not found")
for t in self.terminal_nodes:
if t not in node_ids: errors.append(f"Terminal node '{t}' not found")
for ep_id, nid in self.entry_points.items():
if nid not in node_ids: errors.append(f"Entry point '{ep_id}' references unknown node '{nid}'")
return {"valid": len(errors) == 0, "errors": errors, "warnings": warnings}
default_agent = MyAgent()
```
## agent.py — Async Entry Points Variant
When an agent needs timers, webhooks, or event-driven triggers, add
`async_entry_points` and optionally `runtime_config` as module-level variables.
These are IN ADDITION to the standard variables above.
```python
# Additional imports for async entry points
from framework.graph.edge import GraphSpec, AsyncEntryPointSpec
from framework.runtime.agent_runtime import (
AgentRuntime, AgentRuntimeConfig, create_agent_runtime,
)
# ... (goal, nodes, edges, entry_node, entry_points, etc. as above) ...
# Async entry points — event-driven triggers
async_entry_points = [
# Timer with cron: daily at 9am
AsyncEntryPointSpec(
id="daily-check",
name="Daily Check",
entry_node="process-node",
trigger_type="timer",
trigger_config={"cron": "0 9 * * *"},
isolation_level="shared",
max_concurrent=1,
),
# Timer with fixed interval: every 20 minutes
AsyncEntryPointSpec(
id="scheduled-check",
name="Scheduled Check",
entry_node="process-node",
trigger_type="timer",
trigger_config={"interval_minutes": 20, "run_immediately": False},
isolation_level="shared",
max_concurrent=1,
),
# Event: reacts to webhook events
AsyncEntryPointSpec(
id="webhook-event",
name="Webhook Event Handler",
entry_node="process-node",
trigger_type="event",
trigger_config={"event_types": ["webhook_received"]},
isolation_level="shared",
max_concurrent=10,
),
]
# Webhook server config (only needed if using webhooks)
runtime_config = AgentRuntimeConfig(
webhook_host="127.0.0.1",
webhook_port=8080,
webhook_routes=[
{
"source_id": "my-source",
"path": "/webhooks/my-source",
"methods": ["POST"],
},
],
)
```
**Key rules for async entry points:**
- `async_entry_points` is a list of `AsyncEntryPointSpec` (NOT `EntryPointSpec`)
- `runtime_config` is `AgentRuntimeConfig` (NOT `RuntimeConfig` from config.py)
- Valid trigger_types: `timer`, `event`, `webhook`, `manual`, `api`
- Valid isolation_levels: `isolated`, `shared`, `synchronized`
- Timer trigger_config (cron): `{"cron": "0 9 * * *"}` — standard 5-field cron expression
- Timer trigger_config (interval): `{"interval_minutes": float, "run_immediately": bool}`
- Event trigger_config: `{"event_types": ["webhook_received"], "filter_stream": "...", "filter_node": "..."}`
- Use `isolation_level="shared"` for async entry points that need to read
the primary session's memory (e.g., user-configured rules)
- The `_build_graph()` method passes `async_entry_points` to GraphSpec
- Reference: `exports/gmail_inbox_guardian/agent.py`
## __init__.py
**CRITICAL:** The runner imports the package (`__init__.py`) and reads ALL module-level
variables via `getattr()`. Every variable defined in `agent.py` that the runner needs
MUST be re-exported here. Missing exports cause silent failures (variables default to
`None` or `{}`), leading to "must define goal, nodes, edges" errors or graph validation
failures like "node X is unreachable".
```python
"""My Agent — description."""
from .agent import (
MyAgent,
default_agent,
goal,
nodes,
edges,
entry_node,
entry_points,
pause_nodes,
terminal_nodes,
conversation_mode,
identity_prompt,
loop_config,
)
from .config import default_config, metadata
__all__ = [
"MyAgent",
"default_agent",
"goal",
"nodes",
"edges",
"entry_node",
"entry_points",
"pause_nodes",
"terminal_nodes",
"conversation_mode",
"identity_prompt",
"loop_config",
"default_config",
"metadata",
]
```
**If the agent uses async entry points**, also import and export:
```python
from .agent import (
...,
async_entry_points,
runtime_config, # Only if using webhooks
)
__all__ = [
...,
"async_entry_points",
"runtime_config",
]
```
## __main__.py
```python
"""CLI entry point for My Agent."""
import asyncio, json, logging, sys
import click
from .agent import default_agent, MyAgent
def setup_logging(verbose=False, debug=False):
if debug: level, fmt = logging.DEBUG, "%(asctime)s %(name)s: %(message)s"
elif verbose: level, fmt = logging.INFO, "%(message)s"
else: level, fmt = logging.WARNING, "%(levelname)s: %(message)s"
logging.basicConfig(level=level, format=fmt, stream=sys.stderr)
@click.group()
@click.version_option(version="1.0.0")
def cli():
"""My Agent — description."""
pass
@cli.command()
@click.option("--topic", "-t", required=True)
@click.option("--verbose", "-v", is_flag=True)
def run(topic, verbose):
"""Execute the agent."""
setup_logging(verbose=verbose)
result = asyncio.run(default_agent.run({"topic": topic}))
click.echo(json.dumps({"success": result.success, "output": result.output}, indent=2, default=str))
sys.exit(0 if result.success else 1)
@cli.command()
def tui():
"""Launch TUI dashboard."""
from pathlib import Path
from framework.tui.app import AdenTUI
from framework.llm import LiteLLMProvider
from framework.runner.tool_registry import ToolRegistry
from framework.runtime.agent_runtime import create_agent_runtime
from framework.runtime.execution_stream import EntryPointSpec
async def run_tui():
agent = MyAgent()
agent._tool_registry = ToolRegistry()
storage = Path.home() / ".hive" / "agents" / "my_agent"
storage.mkdir(parents=True, exist_ok=True)
mcp_cfg = Path(__file__).parent / "mcp_servers.json"
if mcp_cfg.exists(): agent._tool_registry.load_mcp_config(mcp_cfg)
llm = LiteLLMProvider(model=agent.config.model, api_key=agent.config.api_key, api_base=agent.config.api_base)
runtime = create_agent_runtime(
graph=agent._build_graph(), goal=agent.goal, storage_path=storage,
entry_points=[EntryPointSpec(id="start", name="Start", entry_node="process", trigger_type="manual", isolation_level="isolated")],
llm=llm, tools=list(agent._tool_registry.get_tools().values()), tool_executor=agent._tool_registry.get_executor())
await runtime.start()
try:
app = AdenTUI(runtime)
await app.run_async()
finally:
await runtime.stop()
asyncio.run(run_tui())
@cli.command()
def info():
"""Show agent info."""
data = default_agent.info()
click.echo(f"Agent: {data['name']}\nVersion: {data['version']}\nDescription: {data['description']}")
click.echo(f"Nodes: {', '.join(data['nodes'])}\nClient-facing: {', '.join(data['client_facing_nodes'])}")
@cli.command()
def validate():
"""Validate agent structure."""
v = default_agent.validate()
if v["valid"]: click.echo("Agent is valid")
else:
click.echo("Errors:")
for e in v["errors"]: click.echo(f" {e}")
sys.exit(0 if v["valid"] else 1)
if __name__ == "__main__":
cli()
```
## mcp_servers.json
```json
{
"hive-tools": {
"transport": "stdio",
"command": "uv",
"args": ["run", "python", "mcp_server.py", "--stdio"],
"cwd": "../../tools",
"description": "Hive tools MCP server"
}
}
```
**CRITICAL FORMAT RULES:**
- NO `"mcpServers"` wrapper (flat dict, not nested)
- `cwd` MUST be `"../../tools"` (relative from `exports/AGENT_NAME/` to `tools/`)
- `command` MUST be `"uv"` with `"args": ["run", "python", ...]` (NOT bare `"python"`)
## tests/conftest.py
```python
"""Test fixtures."""
import sys
from pathlib import Path
import pytest
_repo_root = Path(__file__).resolve().parents[3]
for _p in ["exports", "core"]:
_path = str(_repo_root / _p)
if _path not in sys.path:
sys.path.insert(0, _path)
AGENT_PATH = str(Path(__file__).resolve().parents[1])
@pytest.fixture(scope="session")
def agent_module():
"""Import the agent package for structural validation."""
import importlib
return importlib.import_module(Path(AGENT_PATH).name)
@pytest.fixture(scope="session")
def runner_loaded():
"""Load the agent through AgentRunner (structural only, no LLM needed)."""
from framework.runner.runner import AgentRunner
return AgentRunner.load(AGENT_PATH)
```
## entry_points Format
MUST be: `{"start": "first-node-id"}`
NOT: `{"first-node-id": ["input_keys"]}` (WRONG)
NOT: `{"first-node-id"}` (WRONG — this is a set)
@@ -0,0 +1,441 @@
# Hive Agent Framework — Condensed Reference
## Architecture
Agents are Python packages in `exports/`:
```
exports/my_agent/
├── __init__.py # MUST re-export ALL module-level vars from agent.py
├── __main__.py # CLI (run, tui, info, validate, shell)
├── agent.py # Graph construction (goal, edges, agent class)
├── config.py # Runtime config
├── nodes/__init__.py # Node definitions (NodeSpec)
├── mcp_servers.json # MCP tool server config
└── tests/ # pytest tests
```
## Agent Loading Contract
`AgentRunner.load()` imports the package (`__init__.py`) and reads these
module-level variables via `getattr()`:
| Variable | Required | Default if missing | Consequence |
|----------|----------|--------------------|-------------|
| `goal` | YES | `None` | **FATAL** — "must define goal, nodes, edges" |
| `nodes` | YES | `None` | **FATAL** — same error |
| `edges` | YES | `None` | **FATAL** — same error |
| `entry_node` | no | `nodes[0].id` | Probably wrong node |
| `entry_points` | no | `{}` | **Nodes unreachable** — validation fails |
| `terminal_nodes` | no | `[]` | OK for forever-alive |
| `pause_nodes` | no | `[]` | OK |
| `conversation_mode` | no | not passed | Isolated mode (no context carryover) |
| `identity_prompt` | no | not passed | No agent-level identity |
| `loop_config` | no | `{}` | No iteration limits |
| `async_entry_points` | no | `[]` | No async triggers (timers, webhooks, events) |
| `runtime_config` | no | `None` | No webhook server |
**CRITICAL:** `__init__.py` MUST import and re-export ALL of these from
`agent.py`. Missing exports silently fall back to defaults, causing
hard-to-debug failures.
**Why `default_agent.validate()` is NOT sufficient:**
`validate()` checks the agent CLASS's internal graph (self.nodes, self.edges).
These are always correct because the constructor references agent.py's module
vars directly. But `AgentRunner.load()` reads from the PACKAGE (`__init__.py`),
not the class. So `validate()` passes while `AgentRunner.load()` fails.
Always test with `AgentRunner.load("exports/{name}")` — this is the same
code path the TUI and `hive run` use.
## Goal
Defines success criteria and constraints:
```python
goal = Goal(
id="kebab-case-id",
name="Display Name",
description="What the agent does",
success_criteria=[
SuccessCriterion(id="sc-id", description="...", metric="...", target="...", weight=0.25),
],
constraints=[
Constraint(id="c-id", description="...", constraint_type="hard", category="quality"),
],
)
```
- 3-5 success criteria, weights sum to 1.0
- 1-5 constraints (hard/soft, categories: quality, accuracy, interaction, functional)
## NodeSpec Fields
| Field | Type | Default | Description |
|-------|------|---------|-------------|
| id | str | required | kebab-case identifier |
| name | str | required | Display name |
| description | str | required | What the node does |
| node_type | str | required | `"event_loop"` or `"gcu"` (browser automation — see GCU Guide appendix) |
| input_keys | list[str] | required | Memory keys this node reads |
| output_keys | list[str] | required | Memory keys this node writes via set_output |
| system_prompt | str | "" | LLM instructions |
| tools | list[str] | [] | Tool names from MCP servers |
| client_facing | bool | False | If True, streams to user and blocks for input |
| nullable_output_keys | list[str] | [] | Keys that may remain unset |
| max_node_visits | int | 0 | 0=unlimited (default); >1 for one-shot feedback loops |
| max_retries | int | 3 | Retries on failure |
| success_criteria | str | "" | Natural language for judge evaluation |
## EdgeSpec Fields
| Field | Type | Description |
|-------|------|-------------|
| id | str | kebab-case identifier |
| source | str | Source node ID |
| target | str | Target node ID |
| condition | EdgeCondition | ON_SUCCESS, ON_FAILURE, ALWAYS, CONDITIONAL |
| condition_expr | str | Python expression evaluated against memory (for CONDITIONAL) |
| priority | int | Positive=forward (evaluated first), negative=feedback (loop-back) |
## Key Patterns
### STEP 1/STEP 2 (Client-Facing Nodes)
```
**STEP 1 — Respond to the user (text only, NO tool calls):**
[Present information, ask questions]
**STEP 2 — After the user responds, call set_output:**
- set_output("key", "value based on user response")
```
This prevents premature set_output before user interaction.
### Fewer, Richer Nodes (CRITICAL)
**Hard limit: 2-4 nodes for most agents.** Never exceed 5 unless the user
explicitly requests a complex multi-phase pipeline.
Each node boundary serializes outputs to shared memory and **destroys** all
in-context information: tool call results, intermediate reasoning, conversation
history. A research node that searches, fetches, and analyzes in ONE node keeps
all source material in its conversation context. Split across 3 nodes, each
downstream node only sees the serialized summary string.
**Decision framework — merge unless ANY of these apply:**
1. **Client-facing boundary** — Autonomous and client-facing work MUST be
separate nodes (different interaction models)
2. **Disjoint tool sets** — If tools are fundamentally different (e.g., web
search vs database), separate nodes make sense
3. **Parallel execution** — Fan-out branches must be separate nodes
**Red flags that you have too many nodes:**
- A node with 0 tools (pure LLM reasoning) → merge into predecessor/successor
- A node that sets only 1 trivial output → collapse into predecessor
- Multiple consecutive autonomous nodes → combine into one rich node
- A "report" node that presents analysis → merge into the client-facing node
- A "confirm" or "schedule" node that doesn't call any external service → remove
**Typical agent structure (2 nodes):**
```
process (autonomous) ←→ review (client-facing)
```
The queen owns intake — she gathers requirements from the user, then
passes structured input via `run_agent_with_input(task)`. When building
the agent, design the entry node's `input_keys` to match what the queen
will provide at run time. Worker agents should NOT have a client-facing
intake node. Client-facing nodes are for mid-execution review/approval only.
For simpler agents, just 1 autonomous node:
```
process (autonomous) — loops back to itself
```
### nullable_output_keys
For inputs that only arrive on certain edges:
```python
research_node = NodeSpec(
input_keys=["brief", "feedback"],
nullable_output_keys=["feedback"], # Only present on feedback edge
max_node_visits=3,
)
```
### Mutually Exclusive Outputs
For routing decisions:
```python
review_node = NodeSpec(
output_keys=["approved", "feedback"],
nullable_output_keys=["approved", "feedback"], # Node sets one or the other
)
```
### Forever-Alive Pattern
`terminal_nodes=[]` — every node has outgoing edges, graph loops until user exits.
Use `conversation_mode="continuous"` to preserve context across transitions.
### set_output
- Synthetic tool injected by framework
- Call separately from real tool calls (separate turn)
- `set_output("key", "value")` stores to shared memory
## Edge Conditions
| Condition | When |
|-----------|------|
| ON_SUCCESS | Node completed successfully |
| ON_FAILURE | Node failed |
| ALWAYS | Unconditional |
| CONDITIONAL | condition_expr evaluates to True against memory |
condition_expr examples:
- `"needs_more_research == True"`
- `"str(next_action).lower() == 'new_agent'"`
- `"feedback is not None"`
## Graph Lifecycle
| Pattern | terminal_nodes | When |
|---------|---------------|------|
| **Forever-alive** | `[]` | **DEFAULT for all agents** |
| Linear | `["last-node"]` | Only if user explicitly requests one-shot/batch |
**Forever-alive is the default.** Always use `terminal_nodes=[]`.
The framework default for `max_node_visits` is 0 (unbounded), so
nodes work correctly in forever-alive loops without explicit override.
Only set `max_node_visits > 0` in one-shot agents with feedback loops.
Every node must have at least one outgoing edge — no dead ends. The
user exits by closing the TUI. Only use terminal nodes if the user
explicitly asks for a batch/one-shot agent that runs once and exits.
## Continuous Conversation Mode
`conversation_mode` has ONLY two valid states:
- `"continuous"` — recommended for interactive agents
- Omit entirely — isolated per-node conversations (each node starts fresh)
**INVALID values** (do NOT use): `"client_facing"`, `"interactive"`,
`"adaptive"`, `"shared"`. These do not exist in the framework.
When `conversation_mode="continuous"`:
- Same conversation thread carries across node transitions
- Layered system prompts: identity (agent-level) + narrative + focus (per-node)
- Transition markers inserted at boundaries
- Compaction happens opportunistically at phase transitions
## loop_config
Only three valid keys:
```python
loop_config = {
"max_iterations": 100, # Max LLM turns per node visit
"max_tool_calls_per_turn": 20, # Max tool calls per LLM response
"max_history_tokens": 32000, # Triggers conversation compaction
}
```
**INVALID keys** (do NOT use): `"strategy"`, `"mode"`, `"timeout"`,
`"temperature"`. These are silently ignored or cause errors.
## Data Tools (Spillover)
For large data that exceeds context:
- `save_data(filename, data)` — Write to session data dir
- `load_data(filename, offset, limit)` — Read with pagination
- `list_data_files()` — List files
- `serve_file_to_user(filename, label)` — Clickable file:// URI
`data_dir` is auto-injected by framework — LLM never sees it.
## Fan-Out / Fan-In
Multiple ON_SUCCESS edges from same source → parallel execution via asyncio.gather().
- Parallel nodes must have disjoint output_keys
- Only one branch may have client_facing nodes
- Fan-in node gets all outputs in shared memory
## Judge System
- **Implicit** (default): ACCEPTs when LLM finishes with no tool calls and all required outputs set
- **SchemaJudge**: Validates against Pydantic model
- **Custom**: Implement `evaluate(context) -> JudgeVerdict`
Judge is the SOLE acceptance mechanism — no ad-hoc framework gating.
## Async Entry Points (Webhooks, Timers, Events)
For agents that need to react to external events (incoming emails, scheduled
tasks, API calls), use `AsyncEntryPointSpec` and optionally `AgentRuntimeConfig`.
### Imports
```python
from framework.graph.edge import GraphSpec, AsyncEntryPointSpec
from framework.runtime.agent_runtime import AgentRuntime, AgentRuntimeConfig, create_agent_runtime
```
Note: `AsyncEntryPointSpec` is in `framework.graph.edge` (the graph/declarative layer).
`AgentRuntimeConfig` is in `framework.runtime.agent_runtime` (the runtime layer).
### AsyncEntryPointSpec Fields
| Field | Type | Default | Description |
|-------|------|---------|-------------|
| id | str | required | Unique identifier |
| name | str | required | Human-readable name |
| entry_node | str | required | Node ID to start execution from |
| trigger_type | str | `"manual"` | `webhook`, `api`, `timer`, `event`, `manual` |
| trigger_config | dict | `{}` | Trigger-specific config (see below) |
| isolation_level | str | `"shared"` | `isolated`, `shared`, `synchronized` |
| priority | int | `0` | Execution priority (higher = more priority) |
| max_concurrent | int | `10` | Max concurrent executions |
### Trigger Types
**timer** — Fires on a schedule. Two modes: cron expressions or fixed interval.
Cron (preferred for precise scheduling):
```python
AsyncEntryPointSpec(
id="daily-digest",
name="Daily Digest",
entry_node="check-node",
trigger_type="timer",
trigger_config={"cron": "0 9 * * *"}, # daily at 9am
isolation_level="shared",
max_concurrent=1,
)
```
- `cron` (str) — standard cron expression (5 fields: min hour dom month dow)
- Examples: `"0 9 * * *"` (daily 9am), `"0 9 * * MON-FRI"` (weekdays 9am), `"*/30 * * * *"` (every 30 min)
Fixed interval (simpler, for polling-style tasks):
```python
AsyncEntryPointSpec(
id="scheduled-check",
name="Scheduled Check",
entry_node="check-node",
trigger_type="timer",
trigger_config={"interval_minutes": 20, "run_immediately": False},
isolation_level="shared",
max_concurrent=1,
)
```
- `interval_minutes` (float) — how often to fire
- `run_immediately` (bool, default False) — fire once on startup
**event** — Subscribes to EventBus (e.g., webhook events):
```python
AsyncEntryPointSpec(
id="email-event",
name="Email Event Handler",
entry_node="process-emails",
trigger_type="event",
trigger_config={"event_types": ["webhook_received"]},
isolation_level="shared",
max_concurrent=10,
)
```
- `event_types` (list[str]) — EventType values to subscribe to
- `filter_stream` (str, optional) — only receive from this stream
- `filter_node` (str, optional) — only receive from this node
**webhook** — HTTP endpoint (requires AgentRuntimeConfig):
The webhook server publishes `WEBHOOK_RECEIVED` events on the EventBus.
An `event` trigger type with `event_types: ["webhook_received"]` subscribes
to those events. The flow is:
```
HTTP POST /webhooks/gmail → WebhookServer → EventBus (WEBHOOK_RECEIVED)
→ event entry point → triggers graph execution from entry_node
```
**manual** — Triggered programmatically via `runtime.trigger()`.
### Isolation Levels
| Level | Meaning |
|-------|---------|
| `isolated` | Private state per execution |
| `shared` | Eventual consistency — async executions can read primary session memory |
| `synchronized` | Shared with write locks (use when ordering matters) |
For most async patterns, use `shared` — the async execution reads the primary
session's memory (e.g., user-configured rules) and runs its own workflow.
### AgentRuntimeConfig (for webhook servers)
```python
from framework.runtime.agent_runtime import AgentRuntimeConfig
runtime_config = AgentRuntimeConfig(
webhook_host="127.0.0.1",
webhook_port=8080,
webhook_routes=[
{
"source_id": "gmail",
"path": "/webhooks/gmail",
"methods": ["POST"],
"secret": None, # Optional HMAC-SHA256 secret
},
],
)
```
`runtime_config` is a module-level variable read by `AgentRunner.load()`.
The runner passes it to `create_agent_runtime()`. On `runtime.start()`,
if webhook_routes is non-empty, an embedded HTTP server starts.
### Session Sharing
Timer and event triggers automatically call `_get_primary_session_state()`
before execution. This finds the active user-facing session and provides
its memory to the async execution, filtered to only the async entry node's
`input_keys`. This means the async flow can read user-configured values
(like rules, preferences) without needing separate configuration.
### Module-Level Variables
Agents with async entry points must export two additional variables:
```python
# In agent.py:
async_entry_points = [AsyncEntryPointSpec(...), ...]
runtime_config = AgentRuntimeConfig(...) # Only if using webhooks
```
Both must be re-exported from `__init__.py`:
```python
from .agent import (
..., async_entry_points, runtime_config,
)
```
### Reference Agent
See `exports/gmail_inbox_guardian/agent.py` for a complete example with:
- Primary client-facing node (user configures rules)
- Timer-based scheduled inbox checks (every 20 min)
- Webhook-triggered email event handling
- Shared isolation for memory access across streams
## Framework Capabilities
**Works well:** Multi-turn conversations, HITL review, tool orchestration, structured outputs, parallel execution, context management, error recovery, session persistence.
**Limitations:** LLM latency (2-10s/turn), context window limits (~128K), cost per run, rate limits, node boundaries lose context.
**Not designed for:** Sub-second responses, millions of items, real-time streaming, guaranteed determinism, offline/air-gapped.
## Tool Discovery
Do NOT rely on a static tool list — it will be outdated. Always use
`list_agent_tools()` to discover available tools, grouped by category.
```
list_agent_tools() # names + descriptions, all groups
list_agent_tools(output_schema="full") # include input_schema
list_agent_tools(group="gmail") # only gmail_* tools
list_agent_tools("exports/my_agent/mcp_servers.json") # specific agent's tools
```
After building, validate tools exist: `validate_agent_tools("exports/{name}")`
Common tool categories (verify via list_agent_tools):
- **Web**: search, scrape, PDF
- **Data**: save/load/append/list data files, serve to user
- **File**: view, write, replace, diff, list, grep
- **Communication**: email, gmail, slack, telegram
- **CRM**: hubspot, apollo, calcom
- **GitHub**: stargazers, user profiles, repos
- **Vision**: image analysis
- **Time**: current time
@@ -0,0 +1,119 @@
# GCU Browser Automation Guide
## When to Use GCU Nodes
Use `node_type="gcu"` when:
- The user's workflow requires **navigating real websites** (scraping, form-filling, social media interaction, testing web UIs)
- The task involves **dynamic/JS-rendered pages** that `web_scrape` cannot handle (SPAs, infinite scroll, login-gated content)
- The agent needs to **interact with a website** — clicking, typing, scrolling, selecting, uploading files
Do NOT use GCU for:
- Static content that `web_scrape` handles fine
- API-accessible data (use the API directly)
- PDF/file processing
- Anything that doesn't require a browser UI
## What GCU Nodes Are
- `node_type="gcu"` — a declarative enhancement over `event_loop`
- Framework auto-prepends browser best-practices system prompt
- Framework auto-includes all 31 browser tools from `gcu-tools` MCP server
- Same underlying `EventLoopNode` class — no new imports needed
- `tools=[]` is correct — tools are auto-populated at runtime
## GCU Architecture Pattern
GCU nodes are **subagents** — invoked via `delegate_to_sub_agent()`, not connected via edges.
- Primary nodes (`event_loop`, client-facing) orchestrate; GCU nodes do browser work
- Parent node declares `sub_agents=["gcu-node-id"]` and calls `delegate_to_sub_agent(agent_id="gcu-node-id", task="...")`
- GCU nodes set `max_node_visits=1` (single execution per delegation), `client_facing=False`
- GCU nodes use `output_keys=["result"]` and return structured JSON via `set_output("result", ...)`
## GCU Node Definition Template
```python
gcu_browser_node = NodeSpec(
id="gcu-browser-worker",
name="Browser Worker",
description="Browser subagent that does X.",
node_type="gcu",
client_facing=False,
max_node_visits=1,
input_keys=[],
output_keys=["result"],
tools=[], # Auto-populated with all browser tools
system_prompt="""\
You are a browser agent. Your job: [specific task].
## Workflow
1. browser_start (only if no browser is running yet)
2. browser_open(url=TARGET_URL) — note the returned targetId
3. browser_snapshot to read the page
4. [task-specific steps]
5. set_output("result", JSON)
## Output format
set_output("result", JSON) with:
- [field]: [type and description]
""",
)
```
## Parent Node Template (orchestrating GCU subagents)
```python
orchestrator_node = NodeSpec(
id="orchestrator",
...
node_type="event_loop",
sub_agents=["gcu-browser-worker"],
system_prompt="""\
...
delegate_to_sub_agent(
agent_id="gcu-browser-worker",
task="Navigate to [URL]. Do [specific task]. Return JSON with [fields]."
)
...
""",
tools=[], # Orchestrator doesn't need browser tools
)
```
## mcp_servers.json with GCU
```json
{
"hive-tools": { ... },
"gcu-tools": {
"transport": "stdio",
"command": "uv",
"args": ["run", "python", "-m", "gcu.server", "--stdio"],
"cwd": "../../tools",
"description": "GCU tools for browser automation"
}
}
```
Note: `gcu-tools` is auto-added if any node uses `node_type="gcu"`, but including it explicitly is fine.
## GCU System Prompt Best Practices
Key rules to bake into GCU node prompts:
- Prefer `browser_snapshot` over `browser_get_text("body")` — compact accessibility tree vs 100KB+ raw HTML
- Always `browser_wait` after navigation
- Use large scroll amounts (~2000-5000) for lazy-loaded content
- For spillover files, use `run_command` with grep, not `read_file`
- If auth wall detected, report immediately — don't attempt login
- Keep tool calls per turn ≤10
- Tab isolation: when browser is already running, use `browser_open(background=true)` and pass `target_id` to every call
## GCU Anti-Patterns
- Using `browser_screenshot` to read text (use `browser_snapshot`)
- Re-navigating after scrolling (resets scroll position)
- Attempting login on auth walls
- Forgetting `target_id` in multi-tab scenarios
- Putting browser tools directly on `event_loop` nodes instead of using GCU subagent pattern
- Making GCU nodes `client_facing=True` (they should be autonomous subagents)
@@ -0,0 +1,31 @@
"""Test fixtures for Hive Coder agent."""
import sys
from pathlib import Path
import pytest
import pytest_asyncio
_repo_root = Path(__file__).resolve().parents[3]
for _p in ["exports", "core"]:
_path = str(_repo_root / _p)
if _path not in sys.path:
sys.path.insert(0, _path)
AGENT_PATH = str(Path(__file__).resolve().parents[1])
@pytest.fixture(scope="session")
def mock_mode():
return True
@pytest_asyncio.fixture(scope="session")
async def runner(tmp_path_factory, mock_mode):
from framework.runner.runner import AgentRunner
storage = tmp_path_factory.mktemp("agent_storage")
r = AgentRunner.load(AGENT_PATH, mock_mode=mock_mode, storage_path=storage)
r._setup()
yield r
await r.cleanup_async()
@@ -0,0 +1,27 @@
"""Queen's ticket receiver entry point.
When the Worker Health Judge emits a WORKER_ESCALATION_TICKET event on the
shared EventBus, this entry point fires and routes to the ``ticket_triage``
node, where the Queen deliberates and decides whether to notify the operator.
Isolation level is ``isolated`` the queen's triage memory is kept separate
from the worker's shared memory. Each ticket triage runs in its own context.
"""
from __future__ import annotations
from framework.graph.edge import AsyncEntryPointSpec
TICKET_RECEIVER_ENTRY_POINT = AsyncEntryPointSpec(
id="ticket_receiver",
name="Worker Escalation Ticket Receiver",
entry_node="ticket_triage",
trigger_type="event",
trigger_config={
"event_types": ["worker_escalation_ticket"],
# Do not fire on our own graph's events (prevents loops if queen
# somehow emits a worker_escalation_ticket for herself)
"exclude_own_graph": True,
},
isolation_level="isolated",
)
+49 -24
View File
@@ -245,20 +245,14 @@ class GraphBuilder:
warnings.append(f"Node '{node.id}' should have a description")
# Type-specific validation
if node.node_type == "llm_tool_use":
if not node.tools:
errors.append(f"LLM tool node '{node.id}' must specify tools")
if not node.system_prompt:
warnings.append(f"LLM node '{node.id}' should have a system_prompt")
if node.node_type == "event_loop":
if node.tools and not node.system_prompt:
warnings.append(f"Event loop node '{node.id}' should have a system_prompt")
if node.node_type == "router":
if not node.routes:
errors.append(f"Router node '{node.id}' must specify routes")
if node.node_type == "function":
if not node.function:
errors.append(f"Function node '{node.id}' must specify function name")
# Check input/output keys
if not node.input_keys:
suggestions.append(f"Consider specifying input_keys for '{node.id}'")
@@ -400,9 +394,13 @@ class GraphBuilder:
if not terminal_candidates and self.session.nodes:
warnings.append("No terminal nodes found (all nodes have outgoing edges)")
# Check reachability
# Check reachability from ALL entry candidates (not just the first one).
# Agents with async entry points have multiple nodes with no incoming
# edges (e.g., a primary entry node and an event-driven entry node).
if entry_candidates and self.session.nodes:
reachable = self._compute_reachable(entry_candidates[0])
reachable = set()
for candidate in entry_candidates:
reachable |= self._compute_reachable(candidate)
unreachable = [n.id for n in self.session.nodes if n.id not in reachable]
if unreachable:
errors.append(f"Unreachable nodes: {unreachable}")
@@ -443,14 +441,15 @@ class GraphBuilder:
self.session.test_cases.append(test)
self._save_session()
def run_test(
async def run_test_async(
self,
test: TestCase,
executor_factory: Callable,
) -> TestResult:
"""
Run a single test case.
Run a single test case asynchronously.
This method is safe to call from async contexts (Jupyter, FastAPI, etc.).
executor_factory should return a configured GraphExecutor.
"""
self._require_phase([BuildPhase.ADDING_NODES, BuildPhase.ADDING_EDGES, BuildPhase.TESTING])
@@ -462,14 +461,10 @@ class GraphBuilder:
executor = executor_factory()
# Run the test
import asyncio
result = asyncio.run(
executor.execute(
graph=graph,
goal=self.session.goal,
input_data=test.input,
)
result = await executor.execute(
graph=graph,
goal=self.session.goal,
input_data=test.input,
)
# Check result
@@ -499,6 +494,36 @@ class GraphBuilder:
return test_result
def run_test(
self,
test: TestCase,
executor_factory: Callable,
) -> TestResult:
"""
Run a single test case.
This is a synchronous wrapper around run_test_async().
If called from an async context (Jupyter, FastAPI, etc.), use run_test_async() instead.
executor_factory should return a configured GraphExecutor.
"""
import asyncio
# Check if an event loop is already running
# get_running_loop() returns a loop if one exists, or raises RuntimeError if none exists
try:
asyncio.get_running_loop()
except RuntimeError:
# No event loop running - safe to use asyncio.run()
return asyncio.run(self.run_test_async(test, executor_factory))
# Event loop is running - cannot use asyncio.run()
raise RuntimeError(
"Cannot call run_test() from an async context. "
"An event loop is already running. "
"Please use 'await builder.run_test_async(test, executor_factory)' instead."
)
def run_all_tests(self, executor_factory: Callable) -> list[TestResult]:
"""Run all test cases."""
results = []
@@ -635,7 +660,7 @@ class GraphBuilder:
# Generate Python code
code = self._generate_code(graph)
Path(path).write_text(code)
Path(path).write_text(code, encoding="utf-8")
self.session.phase = BuildPhase.EXPORTED
self._save_session()
@@ -729,14 +754,14 @@ class GraphBuilder:
"""Save session to disk."""
self.session.updated_at = datetime.now()
path = self.storage_path / f"{self.session.id}.json"
path.write_text(self.session.model_dump_json(indent=2))
path.write_text(self.session.model_dump_json(indent=2), encoding="utf-8")
def _load_session(self, session_id: str) -> BuildSession:
"""Load session from disk."""
path = self.storage_path / f"{session_id}.json"
if not path.exists():
raise FileNotFoundError(f"Session not found: {session_id}")
return BuildSession.model_validate_json(path.read_text())
return BuildSession.model_validate_json(path.read_text(encoding="utf-8"))
@classmethod
def list_sessions(cls, storage_path: Path | str | None = None) -> list[str]:
+10 -3
View File
@@ -11,9 +11,9 @@ Usage:
Testing commands:
hive test-run <agent_path> --goal <goal_id>
hive test-debug <goal_id> <test_id>
hive test-list <goal_id>
hive test-stats <goal_id>
hive test-debug <agent_path> <test_name>
hive test-list <agent_path>
hive test-stats <agent_path>
"""
import argparse
@@ -56,6 +56,13 @@ def _configure_paths():
if (project_root / "core").is_dir() and core_str not in sys.path:
sys.path.insert(0, core_str)
# Add core/framework/agents/ so framework agents are importable as top-level packages
framework_agents_dir = project_root / "core" / "framework" / "agents"
if framework_agents_dir.is_dir():
fa_str = str(framework_agents_dir)
if fa_str not in sys.path:
sys.path.insert(0, fa_str)
def main():
_configure_paths()
+90 -2
View File
@@ -50,14 +50,101 @@ def get_max_tokens() -> int:
def get_api_key() -> str | None:
"""Return the API key from the environment variable specified in configuration."""
"""Return the API key, supporting env var, Claude Code subscription, Codex, and ZAI Code.
Priority:
1. Claude Code subscription (``use_claude_code_subscription: true``)
reads the OAuth token from ``~/.claude/.credentials.json``.
2. Codex subscription (``use_codex_subscription: true``)
reads the OAuth token from macOS Keychain or ``~/.codex/auth.json``.
3. Environment variable named in ``api_key_env_var``.
"""
llm = get_hive_config().get("llm", {})
# Claude Code subscription: read OAuth token directly
if llm.get("use_claude_code_subscription"):
try:
from framework.runner.runner import get_claude_code_token
token = get_claude_code_token()
if token:
return token
except ImportError:
pass
# Codex subscription: read OAuth token from Keychain / auth.json
if llm.get("use_codex_subscription"):
try:
from framework.runner.runner import get_codex_token
token = get_codex_token()
if token:
return token
except ImportError:
pass
# Standard env-var path (covers ZAI Code and all API-key providers)
api_key_env_var = llm.get("api_key_env_var")
if api_key_env_var:
return os.environ.get(api_key_env_var)
return None
def get_gcu_enabled() -> bool:
"""Return whether GCU (browser automation) is enabled in user config."""
return get_hive_config().get("gcu_enabled", True)
def get_api_base() -> str | None:
"""Return the api_base URL for OpenAI-compatible endpoints, if configured."""
llm = get_hive_config().get("llm", {})
if llm.get("use_codex_subscription"):
# Codex subscription routes through the ChatGPT backend, not api.openai.com.
return "https://chatgpt.com/backend-api/codex"
return llm.get("api_base")
def get_llm_extra_kwargs() -> dict[str, Any]:
"""Return extra kwargs for LiteLLMProvider (e.g. OAuth headers).
When ``use_claude_code_subscription`` is enabled, returns
``extra_headers`` with the OAuth Bearer token so that litellm's
built-in Anthropic OAuth handler adds the required beta headers.
When ``use_codex_subscription`` is enabled, returns
``extra_headers`` with the Bearer token, ``ChatGPT-Account-Id``,
and ``store=False`` (required by the ChatGPT backend).
"""
llm = get_hive_config().get("llm", {})
if llm.get("use_claude_code_subscription"):
api_key = get_api_key()
if api_key:
return {
"extra_headers": {"authorization": f"Bearer {api_key}"},
}
if llm.get("use_codex_subscription"):
api_key = get_api_key()
if api_key:
headers: dict[str, str] = {
"Authorization": f"Bearer {api_key}",
"User-Agent": "CodexBar",
}
try:
from framework.runner.runner import get_codex_account_id
account_id = get_codex_account_id()
if account_id:
headers["ChatGPT-Account-Id"] = account_id
except ImportError:
pass
return {
"extra_headers": headers,
"store": False,
"allowed_openai_params": ["store"],
}
return {}
# ---------------------------------------------------------------------------
# RuntimeConfig shared across agent templates
# ---------------------------------------------------------------------------
@@ -71,4 +158,5 @@ class RuntimeConfig:
temperature: float = 0.7
max_tokens: int = field(default_factory=get_max_tokens)
api_key: str | None = field(default_factory=get_api_key)
api_base: str | None = None
api_base: str | None = field(default_factory=get_api_base)
extra_kwargs: dict[str, Any] = field(default_factory=get_llm_extra_kwargs)
+48 -1
View File
@@ -42,6 +42,14 @@ For Vault integration:
from core.framework.credentials.vault import HashiCorpVaultStorage
"""
from .key_storage import (
delete_aden_api_key,
generate_and_save_credential_key,
load_aden_api_key,
load_credential_key,
save_aden_api_key,
save_credential_key,
)
from .models import (
CredentialDecryptionError,
CredentialError,
@@ -59,6 +67,13 @@ from .provider import (
CredentialProvider,
StaticProvider,
)
from .setup import (
CredentialSetupSession,
MissingCredential,
SetupResult,
load_agent_nodes,
run_credential_setup_cli,
)
from .storage import (
CompositeStorage,
CredentialStorage,
@@ -68,7 +83,12 @@ from .storage import (
)
from .store import CredentialStore
from .template import TemplateResolver
from .validation import ensure_credential_key_env, validate_agent_credentials
from .validation import (
CredentialStatus,
CredentialValidationResult,
ensure_credential_key_env,
validate_agent_credentials,
)
# Aden sync components (lazy import to avoid httpx dependency when not needed)
# Usage: from core.framework.credentials.aden import AdenSyncProvider
@@ -85,6 +105,14 @@ try:
except ImportError:
_ADEN_AVAILABLE = False
# Local credential registry (named API key accounts with identity metadata)
try:
from .local import LocalAccountInfo, LocalCredentialRegistry
_LOCAL_AVAILABLE = True
except ImportError:
_LOCAL_AVAILABLE = False
__all__ = [
# Main store
"CredentialStore",
@@ -112,15 +140,34 @@ __all__ = [
"CredentialRefreshError",
"CredentialValidationError",
"CredentialDecryptionError",
# Key storage (bootstrap credentials)
"load_credential_key",
"save_credential_key",
"generate_and_save_credential_key",
"load_aden_api_key",
"save_aden_api_key",
"delete_aden_api_key",
# Validation
"ensure_credential_key_env",
"validate_agent_credentials",
"CredentialStatus",
"CredentialValidationResult",
# Interactive setup
"CredentialSetupSession",
"MissingCredential",
"SetupResult",
"load_agent_nodes",
"run_credential_setup_cli",
# Aden sync (optional - requires httpx)
"AdenSyncProvider",
"AdenCredentialClient",
"AdenClientConfig",
"AdenCachedStorage",
# Local credential registry (optional - requires cryptography)
"LocalCredentialRegistry",
"LocalAccountInfo",
]
# Track Aden availability for runtime checks
ADEN_AVAILABLE = _ADEN_AVAILABLE
LOCAL_AVAILABLE = _LOCAL_AVAILABLE
+155 -202
View File
@@ -1,29 +1,31 @@
"""
Aden Credential Client.
HTTP client for communicating with the Aden authentication server.
The Aden server handles OAuth2 authorization flows and token management.
This client fetches tokens and delegates refresh operations to Aden.
HTTP client for the Aden authentication server.
Aden holds all OAuth secrets; agents receive only short-lived access tokens.
API (all endpoints authenticated with Bearer {api_key}):
GET /v1/credentials list integrations
GET /v1/credentials/{integration_id} get access token (auto-refreshes)
POST /v1/credentials/{integration_id}/refresh force refresh
GET /v1/credentials/{integration_id}/validate check validity
Integration IDs are base64-encoded hashes assigned by the Aden platform
(e.g. "Z29vZ2xlOlRpbW90aHk6MTYwNjc6MTM2ODQ"), NOT provider names.
Usage:
# API key loaded from ADEN_API_KEY environment variable by default
client = AdenCredentialClient(AdenClientConfig(
base_url="https://api.adenhq.com",
))
# Or explicitly provide the API key
client = AdenCredentialClient(AdenClientConfig(
base_url="https://api.adenhq.com",
api_key="your-api-key",
))
# List what's connected
for info in client.list_integrations():
print(f"{info.provider}/{info.alias}: {info.status}")
# Fetch a credential
response = client.get_credential("hubspot")
if response:
print(f"Token expires at: {response.expires_at}")
# Request a refresh
refreshed = client.request_refresh("hubspot")
# Get an access token
cred = client.get_credential(info.integration_id)
print(cred.access_token)
"""
from __future__ import annotations
@@ -88,8 +90,7 @@ class AdenClientConfig:
"""Base URL of the Aden server (e.g., 'https://api.adenhq.com')."""
api_key: str | None = None
"""Agent's API key for authenticating with Aden.
If not provided, loaded from ADEN_API_KEY environment variable."""
"""Agent API key. Loaded from ADEN_API_KEY env var if not provided."""
tenant_id: str | None = None
"""Optional tenant ID for multi-tenant deployments."""
@@ -104,7 +105,6 @@ class AdenClientConfig:
"""Base delay between retries in seconds (exponential backoff)."""
def __post_init__(self) -> None:
"""Load API key from environment if not provided."""
if self.api_key is None:
self.api_key = os.environ.get("ADEN_API_KEY")
if not self.api_key:
@@ -115,86 +115,124 @@ class AdenClientConfig:
@dataclass
class AdenCredentialResponse:
"""Response from Aden server containing credential data."""
class AdenIntegrationInfo:
"""An integration from GET /v1/credentials.
Example response item::
{
"integration_id": "Z29vZ2xlOlRpbW90aHk6MTYwNjc6MTM2ODQ",
"provider": "google",
"alias": "Timothy",
"status": "active",
"email": "timothy@acho.io",
"expires_at": "2026-02-20T21:46:04.863Z"
}
"""
integration_id: str
"""Unique identifier for the integration (e.g., 'hubspot')."""
"""Base64-encoded hash ID assigned by Aden."""
integration_type: str
"""Type of integration (e.g., 'hubspot', 'github', 'slack')."""
provider: str
"""Provider type (e.g. "google", "slack", "hubspot")."""
access_token: str
"""The access token for API calls."""
alias: str
"""User-set alias on the Aden platform."""
token_type: str = "Bearer"
"""Token type (usually 'Bearer')."""
status: str
"""Status: "active", "expired", "requires_reauth"."""
email: str = ""
"""Email associated with this connection."""
expires_at: datetime | None = None
"""When the access token expires (UTC)."""
"""When the current access token expires."""
scopes: list[str] = field(default_factory=list)
"""OAuth2 scopes granted to this token."""
metadata: dict[str, Any] = field(default_factory=dict)
"""Additional integration-specific metadata."""
# Backward compat — old code reads integration_type
@property
def integration_type(self) -> str:
return self.provider
@classmethod
def from_dict(
cls, data: dict[str, Any], integration_id: str | None = None
) -> AdenCredentialResponse:
"""Create from API response dictionary or normalized credential dict."""
def from_dict(cls, data: dict[str, Any]) -> AdenIntegrationInfo:
expires_at = None
if data.get("expires_at"):
expires_at = datetime.fromisoformat(data["expires_at"].replace("Z", "+00:00"))
resolved_integration_id = (
integration_id
or data.get("integration_id")
or data.get("alias")
or data.get("provider", "")
)
resolved_integration_type = data.get("integration_type") or data.get("provider", "")
metadata = data.get("metadata")
if metadata is None and data.get("email"):
metadata = {"email": data.get("email")}
if metadata is None:
metadata = {}
return cls(
integration_id=resolved_integration_id,
integration_type=resolved_integration_type,
access_token=data["access_token"],
token_type=data.get("token_type", "Bearer"),
integration_id=data.get("integration_id", ""),
provider=data.get("provider", ""),
alias=data.get("alias", ""),
status=data.get("status", "unknown"),
email=data.get("email", ""),
expires_at=expires_at,
scopes=data.get("scopes", []),
metadata=metadata,
)
@dataclass
class AdenIntegrationInfo:
"""Information about an available integration."""
class AdenCredentialResponse:
"""Response from GET /v1/credentials/{integration_id}.
Example::
{
"access_token": "ya29.a0AfH6SM...",
"token_type": "Bearer",
"expires_at": "2026-02-20T12:00:00.000Z",
"provider": "google",
"alias": "Timothy",
"email": "timothy@acho.io"
}
"""
integration_id: str
integration_type: str
status: str # "active", "requires_reauth", "expired"
"""The integration_id used in the request."""
access_token: str
"""Short-lived access token for API calls."""
token_type: str = "Bearer"
expires_at: datetime | None = None
provider: str = ""
"""Provider type (e.g. "google")."""
alias: str = ""
"""User-set alias."""
email: str = ""
"""Email associated with this connection."""
scopes: list[str] = field(default_factory=list)
metadata: dict[str, Any] = field(default_factory=dict)
# Backward compat
@property
def integration_type(self) -> str:
return self.provider
@classmethod
def from_dict(cls, data: dict[str, Any]) -> AdenIntegrationInfo:
"""Create from API response dictionary."""
def from_dict(cls, data: dict[str, Any], integration_id: str = "") -> AdenCredentialResponse:
expires_at = None
if data.get("expires_at"):
expires_at = datetime.fromisoformat(data["expires_at"].replace("Z", "+00:00"))
# Build metadata from email if present
metadata = data.get("metadata") or {}
if not metadata and data.get("email"):
metadata = {"email": data["email"]}
return cls(
integration_id=data["integration_id"],
integration_type=data.get("provider", data["integration_id"]),
status=data.get("status", "unknown"),
integration_id=integration_id or data.get("integration_id", ""),
access_token=data["access_token"],
token_type=data.get("token_type", "Bearer"),
expires_at=expires_at,
provider=data.get("provider", ""),
alias=data.get("alias", ""),
email=data.get("email", ""),
scopes=data.get("scopes", []),
metadata=metadata,
)
@@ -202,56 +240,33 @@ class AdenCredentialClient:
"""
HTTP client for Aden credential server.
Handles communication with the Aden authentication server,
including fetching credentials, requesting refreshes, and
reporting usage statistics.
The client automatically handles:
- Retries with exponential backoff for transient failures
- Proper error classification (auth, not found, rate limit, etc.)
- Request headers for authentication and tenant isolation
Usage:
# API key loaded from ADEN_API_KEY environment variable
config = AdenClientConfig(
client = AdenCredentialClient(AdenClientConfig(
base_url="https://api.adenhq.com",
)
))
client = AdenCredentialClient(config)
# List integrations
for info in client.list_integrations():
print(f"{info.provider}/{info.alias}: {info.status}")
# Fetch a credential
cred = client.get_credential("hubspot")
if cred:
headers = {"Authorization": f"Bearer {cred.access_token}"}
# Get access token (uses base64 integration_id, NOT provider name)
cred = client.get_credential(info.integration_id)
headers = {"Authorization": f"Bearer {cred.access_token}"}
# List all integrations
integrations = client.list_integrations()
for info in integrations:
print(f"{info.integration_id}: {info.status}")
# Clean up
client.close()
"""
def __init__(self, config: AdenClientConfig):
"""
Initialize the Aden client.
Args:
config: Client configuration including base URL and API key.
"""
self.config = config
self._client: httpx.Client | None = None
def _get_client(self) -> httpx.Client:
"""Get or create the HTTP client."""
if self._client is None:
headers = {
"Authorization": f"Bearer {self.config.api_key}",
"Content-Type": "application/json",
"User-Agent": "hive-credential-store/1.0",
}
if self.config.tenant_id:
headers["X-Tenant-ID"] = self.config.tenant_id
@@ -260,7 +275,6 @@ class AdenCredentialClient:
timeout=self.config.timeout,
headers=headers,
)
return self._client
def _request_with_retry(
@@ -277,10 +291,13 @@ class AdenCredentialClient:
try:
response = client.request(method, path, **kwargs)
# Handle specific error codes
if response.status_code == 401:
raise AdenAuthenticationError("Agent API key is invalid or revoked")
if response.status_code == 403:
data = response.json()
raise AdenClientError(data.get("message", "Forbidden"))
if response.status_code == 404:
raise AdenNotFoundError(f"Integration not found: {path}")
@@ -293,14 +310,15 @@ class AdenCredentialClient:
if response.status_code == 400:
data = response.json()
if data.get("error") == "refresh_failed":
msg = data.get("message", "Bad request")
if data.get("error") == "refresh_failed" or "refresh" in msg.lower():
raise AdenRefreshError(
data.get("message", "Token refresh failed"),
msg,
requires_reauthorization=data.get("requires_reauthorization", False),
reauthorization_url=data.get("reauthorization_url"),
)
raise AdenClientError(f"Bad request: {msg}")
# Success or other error
response.raise_for_status()
return response
@@ -321,30 +339,40 @@ class AdenCredentialClient:
AdenRefreshError,
AdenRateLimitError,
):
# Don't retry these errors
raise
# Should not reach here, but just in case
raise AdenClientError(
f"Request failed after {self.config.retry_attempts} attempts"
) from last_error
def get_credential(self, integration_id: str) -> AdenCredentialResponse | None:
def list_integrations(self) -> list[AdenIntegrationInfo]:
"""
Fetch the current credential for an integration.
List all integrations for this agent's team.
The Aden server may refresh the token internally if it's expired
before returning it.
Args:
integration_id: The integration identifier (e.g., 'hubspot').
GET /v1/credentials {"integrations": [...]}
Returns:
Credential response with access token, or None if not found.
List of AdenIntegrationInfo with integration_id, provider,
alias, status, email, expires_at.
"""
response = self._request_with_retry("GET", "/v1/credentials")
data = response.json()
return [AdenIntegrationInfo.from_dict(item) for item in data.get("integrations", [])]
Raises:
AdenAuthenticationError: If API key is invalid.
AdenClientError: For connection failures.
# Alias
list_connections = list_integrations
def get_credential(self, integration_id: str) -> AdenCredentialResponse | None:
"""
Get access token for an integration. Auto-refreshes if near expiry.
GET /v1/credentials/{integration_id}
Args:
integration_id: Base64 hash ID from list_integrations().
Returns:
AdenCredentialResponse with access_token, or None if not found.
"""
try:
response = self._request_with_retry("GET", f"/v1/credentials/{integration_id}")
@@ -355,100 +383,34 @@ class AdenCredentialClient:
def request_refresh(self, integration_id: str) -> AdenCredentialResponse:
"""
Request the Aden server to refresh the token.
Force refresh the access token.
Use this when the local store detects an expired or near-expiry token.
The Aden server handles the actual OAuth2 refresh token flow.
POST /v1/credentials/{integration_id}/refresh
Args:
integration_id: The integration identifier.
integration_id: Base64 hash ID.
Returns:
Credential response with new access token.
Raises:
AdenRefreshError: If refresh fails (may require re-authorization).
AdenNotFoundError: If integration not found.
AdenAuthenticationError: If API key is invalid.
AdenRateLimitError: If rate limited.
AdenCredentialResponse with new access_token.
"""
response = self._request_with_retry("POST", f"/v1/credentials/{integration_id}/refresh")
data = response.json()
return AdenCredentialResponse.from_dict(data, integration_id=integration_id)
def list_integrations(self) -> list[AdenIntegrationInfo]:
"""
List all integrations available for this agent/tenant.
Returns:
List of integration info objects.
Raises:
AdenAuthenticationError: If API key is invalid.
AdenClientError: For connection failures.
"""
response = self._request_with_retry("GET", "/v1/credentials")
data = response.json()
return [AdenIntegrationInfo.from_dict(item) for item in data.get("integrations", [])]
def validate_token(self, integration_id: str) -> dict[str, Any]:
"""
Check if a token is still valid without fetching it.
Check if an integration's OAuth connection is valid.
Args:
integration_id: The integration identifier.
GET /v1/credentials/{integration_id}/validate
Returns:
Dict with 'valid' bool and optional 'expires_at', 'reason',
'requires_reauthorization', 'reauthorization_url'.
Raises:
AdenNotFoundError: If integration not found.
AdenAuthenticationError: If API key is invalid.
{"valid": bool, "status": str, "expires_at": str, "error": str|null}
"""
response = self._request_with_retry("GET", f"/v1/credentials/{integration_id}/validate")
return response.json()
def report_usage(
self,
integration_id: str,
operation: str,
status: str = "success",
metadata: dict[str, Any] | None = None,
) -> None:
"""
Report credential usage statistics to Aden.
This is optional and used for analytics/billing.
Args:
integration_id: The integration identifier.
operation: Operation name (e.g., 'api_call').
status: Operation status ('success', 'error').
metadata: Additional operation metadata.
"""
try:
self._request_with_retry(
"POST",
f"/v1/credentials/{integration_id}/usage",
json={
"operation": operation,
"status": status,
"timestamp": datetime.utcnow().isoformat() + "Z",
"metadata": metadata or {},
},
)
except Exception as e:
# Usage reporting is best-effort, don't fail on errors
logger.warning(f"Failed to report usage for '{integration_id}': {e}")
def health_check(self) -> dict[str, Any]:
"""
Check Aden server health and connectivity.
Returns:
Dict with 'status', 'version', 'timestamp', and optionally 'error'.
"""
"""Check Aden server health."""
try:
client = self._get_client()
response = client.get("/health")
@@ -456,26 +418,17 @@ class AdenCredentialClient:
data = response.json()
data["latency_ms"] = response.elapsed.total_seconds() * 1000
return data
return {
"status": "degraded",
"error": f"Unexpected status code: {response.status_code}",
}
return {"status": "degraded", "error": f"HTTP {response.status_code}"}
except Exception as e:
return {
"status": "unhealthy",
"error": str(e),
}
return {"status": "unhealthy", "error": str(e)}
def close(self) -> None:
"""Close the HTTP client and release resources."""
if self._client:
self._client.close()
self._client = None
def __enter__(self) -> AdenCredentialClient:
"""Context manager entry."""
return self
def __exit__(self, *args: Any) -> None:
"""Context manager exit."""
self.close()
+35 -7
View File
@@ -282,8 +282,8 @@ class AdenSyncProvider(CredentialProvider):
"""
Sync all credentials from Aden server to local store.
Fetches the list of available integrations from Aden and
populates the local credential store with current tokens.
Calls GET /v1/credentials to list integrations, then fetches
access tokens for each active one.
Args:
store: The credential store to populate.
@@ -298,9 +298,7 @@ class AdenSyncProvider(CredentialProvider):
for info in integrations:
if info.status != "active":
logger.warning(
f"Skipping integration '{info.integration_id}': status={info.status}"
)
logger.warning(f"Skipping connection '{info.alias}': status={info.status}")
continue
try:
@@ -308,9 +306,9 @@ class AdenSyncProvider(CredentialProvider):
if cred:
store.save_credential(cred)
synced += 1
logger.info(f"Synced credential '{info.integration_id}' from Aden")
logger.info(f"Synced credential '{info.alias}' from Aden")
except Exception as e:
logger.warning(f"Failed to sync '{info.integration_id}': {e}")
logger.warning(f"Failed to sync '{info.alias}': {e}")
except AdenClientError as e:
logger.error(f"Failed to list integrations from Aden: {e}")
@@ -373,6 +371,21 @@ class AdenSyncProvider(CredentialProvider):
value=SecretStr(aden_response.integration_type),
)
# Store alias (user-set name from Aden platform)
if aden_response.alias:
credential.keys["_alias"] = CredentialKey(
name="_alias",
value=SecretStr(aden_response.alias),
)
# Persist Aden metadata as identity keys
for meta_key, meta_value in (aden_response.metadata or {}).items():
if meta_value and isinstance(meta_value, str):
credential.keys[f"_identity_{meta_key}"] = CredentialKey(
name=f"_identity_{meta_key}",
value=SecretStr(meta_value),
)
# Update timestamps
credential.last_refreshed = datetime.now(UTC)
credential.provider_id = self.provider_id
@@ -400,12 +413,27 @@ class AdenSyncProvider(CredentialProvider):
),
}
# Store alias (user-set name from Aden platform)
if aden_response.alias:
keys["_alias"] = CredentialKey(
name="_alias",
value=SecretStr(aden_response.alias),
)
if aden_response.scopes:
keys["scope"] = CredentialKey(
name="scope",
value=SecretStr(" ".join(aden_response.scopes)),
)
# Persist Aden metadata as identity keys
for meta_key, meta_value in (aden_response.metadata or {}).items():
if meta_value and isinstance(meta_value, str):
keys[f"_identity_{meta_key}"] = CredentialKey(
name=f"_identity_{meta_key}",
value=SecretStr(meta_value),
)
return CredentialObject(
id=aden_response.integration_id,
credential_type=CredentialType.OAUTH2,
+83 -34
View File
@@ -26,7 +26,7 @@ Usage:
storage = AdenCachedStorage(
local_storage=EncryptedFileStorage(),
aden_provider=provider,
cache_ttl_seconds=300, # Re-check Aden every 5 minutes
cache_ttl_seconds=600, # Re-check Aden every 5 minutes
)
# Create store
@@ -77,7 +77,7 @@ class AdenCachedStorage(CredentialStorage):
storage = AdenCachedStorage(
local_storage=EncryptedFileStorage(),
aden_provider=provider,
cache_ttl_seconds=300, # 5 minutes
cache_ttl_seconds=00, # 5 minutes
)
store = CredentialStore(
@@ -114,8 +114,10 @@ class AdenCachedStorage(CredentialStorage):
self._cache_ttl = timedelta(seconds=cache_ttl_seconds)
self._prefer_local = prefer_local
self._cache_timestamps: dict[str, datetime] = {}
# Index: provider name (e.g., "hubspot") -> credential hash ID
self._provider_index: dict[str, str] = {}
# Index: provider name (e.g., "hubspot") -> list of credential hash IDs
self._provider_index: dict[str, list[str]] = {}
# Index: "provider:alias" -> credential hash ID (for alias-based routing)
self._alias_index: dict[str, str] = {}
def save(self, credential: CredentialObject) -> None:
"""
@@ -160,14 +162,16 @@ class AdenCachedStorage(CredentialStorage):
CredentialObject if found, None otherwise.
"""
# Check provider index first — Aden-synced credentials take priority
resolved_id = self._provider_index.get(credential_id)
if resolved_id and resolved_id != credential_id:
result = self._load_by_id(resolved_id)
if result is not None:
logger.info(
f"Loaded credential '{credential_id}' via provider index (id='{resolved_id}')"
)
return result
resolved_ids = self._provider_index.get(credential_id)
if resolved_ids:
for rid in resolved_ids:
if rid != credential_id:
result = self._load_by_id(rid)
if result is not None:
logger.info(
f"Loaded credential '{credential_id}' via provider index (id='{rid}')"
)
return result
# Direct lookup (exact credential_id match)
return self._load_by_id(credential_id)
@@ -189,25 +193,42 @@ class AdenCachedStorage(CredentialStorage):
logger.debug(f"Using cached credential '{credential_id}'")
return local_cred
# Try to fetch from Aden
# If nothing local, there's nothing to refresh from Aden.
# sync_all() already fetched all available credentials — anything
# not in local storage doesn't exist on the Aden server.
if local_cred is None:
return None
# Try to refresh stale local credential from Aden
try:
aden_cred = self._aden_provider.fetch_from_aden(credential_id)
if aden_cred:
# Update local cache
self.save(aden_cred)
logger.debug(f"Fetched credential '{credential_id}' from Aden")
return aden_cred
except Exception as e:
logger.warning(f"Failed to fetch '{credential_id}' from Aden: {e}")
logger.info(f"Using stale cached credential '{credential_id}'")
return local_cred
# Fall back to local cache if Aden fails
if local_cred:
logger.info(f"Using stale cached credential '{credential_id}'")
return local_cred
# Return local credential if it exists (may be None)
return local_cred
def load_all_for_provider(self, provider_name: str) -> list[CredentialObject]:
"""Load all credentials for a given provider type.
Args:
provider_name: Provider name (e.g. "google", "slack").
Returns:
List of CredentialObjects for all accounts of this provider.
"""
results: list[CredentialObject] = []
for cid in self._provider_index.get(provider_name, []):
cred = self._load_by_id(cid)
if cred:
results.append(cred)
return results
def delete(self, credential_id: str) -> bool:
"""
Delete credential from local cache.
@@ -246,9 +267,11 @@ class AdenCachedStorage(CredentialStorage):
if self._local.exists(credential_id):
return True
# Check provider index
resolved_id = self._provider_index.get(credential_id)
if resolved_id and resolved_id != credential_id:
return self._local.exists(resolved_id)
resolved_ids = self._provider_index.get(credential_id)
if resolved_ids:
for rid in resolved_ids:
if rid != credential_id and self._local.exists(rid):
return True
return False
def _is_cache_fresh(self, credential_id: str) -> bool:
@@ -285,13 +308,15 @@ class AdenCachedStorage(CredentialStorage):
def _index_provider(self, credential: CredentialObject) -> None:
"""
Index a credential by its provider/integration type.
Index a credential by its provider/integration type and alias.
Aden credentials carry an ``_integration_type`` key whose value is
the provider name (e.g., ``hubspot``). This method maps that
provider name to the credential's hash ID so that subsequent
``load("hubspot")`` calls resolve to the correct credential.
Also indexes by ``_alias`` for alias-based multi-account routing.
Args:
credential: The credential to index.
"""
@@ -300,19 +325,45 @@ class AdenCachedStorage(CredentialStorage):
return
provider_name = integration_type_key.value.get_secret_value()
if provider_name:
self._provider_index[provider_name] = credential.id
if provider_name not in self._provider_index:
self._provider_index[provider_name] = []
if credential.id not in self._provider_index[provider_name]:
self._provider_index[provider_name].append(credential.id)
logger.debug(f"Indexed provider '{provider_name}' -> '{credential.id}'")
# Index by alias for multi-account routing
alias_key = credential.keys.get("_alias")
if alias_key:
alias = alias_key.value.get_secret_value()
if alias:
self._alias_index[f"{provider_name}:{alias}"] = credential.id
def load_by_alias(self, provider_name: str, alias: str) -> CredentialObject | None:
"""Load a credential by provider name and alias.
Args:
provider_name: Provider type (e.g. "google", "slack").
alias: User-set alias from the Aden platform.
Returns:
CredentialObject if found, None otherwise.
"""
cred_id = self._alias_index.get(f"{provider_name}:{alias}")
if cred_id:
return self._load_by_id(cred_id)
return None
def rebuild_provider_index(self) -> int:
"""
Rebuild the provider index from all locally cached credentials.
Rebuild the provider and alias indexes from all locally cached credentials.
Useful after loading from disk when the in-memory index is empty.
Useful after loading from disk when the in-memory indexes are empty.
Returns:
Number of provider mappings indexed.
"""
self._provider_index.clear()
self._alias_index.clear()
indexed = 0
for cred_id in self._local.list_all():
cred = self._local.load(cred_id)
@@ -328,8 +379,8 @@ class AdenCachedStorage(CredentialStorage):
"""
Sync all credentials from Aden server to local cache.
Fetches the list of available integrations from Aden and
updates the local cache with current tokens.
Calls GET /v1/credentials to list active integrations,
then fetches tokens for each.
Returns:
Number of credentials synced.
@@ -341,9 +392,7 @@ class AdenCachedStorage(CredentialStorage):
for info in integrations:
if info.status != "active":
logger.warning(
f"Skipping integration '{info.integration_id}': status={info.status}"
)
logger.warning(f"Skipping integration '{info.alias}': status={info.status}")
continue
try:
@@ -351,9 +400,9 @@ class AdenCachedStorage(CredentialStorage):
if cred:
self.save(cred)
synced += 1
logger.info(f"Synced credential '{info.integration_id}' from Aden")
logger.info(f"Synced credential '{info.alias}' from Aden")
except Exception as e:
logger.warning(f"Failed to sync '{info.integration_id}': {e}")
logger.warning(f"Failed to sync '{info.alias}': {e}")
except Exception as e:
logger.error(f"Failed to list integrations from Aden: {e}")
@@ -61,11 +61,13 @@ def mock_client(aden_config):
def aden_response():
"""Create a sample Aden credential response."""
return AdenCredentialResponse(
integration_id="hubspot",
integration_type="hubspot",
integration_id="aHVic3BvdDp0ZXN0OjEzNjExOjExNTI1",
access_token="test-access-token",
token_type="Bearer",
expires_at=datetime.now(UTC) + timedelta(hours=1),
provider="hubspot",
alias="My HubSpot",
email="test@example.com",
scopes=["crm.objects.contacts.read", "crm.objects.contacts.write"],
metadata={"portal_id": "12345"},
)
@@ -108,18 +110,20 @@ class TestAdenCredentialResponse:
"""Tests for AdenCredentialResponse dataclass."""
def test_from_dict_basic(self):
"""Test creating response from dict."""
"""Test creating response from dict (real get-token format)."""
data = {
"integration_id": "github",
"integration_type": "github",
"access_token": "ghp_xxxxx",
"token_type": "Bearer",
"provider": "github",
"alias": "Work",
}
response = AdenCredentialResponse.from_dict(data)
response = AdenCredentialResponse.from_dict(data, integration_id="Z2l0aHViOldvcms6MTIzNDU")
assert response.integration_id == "github"
assert response.integration_type == "github"
assert response.integration_id == "Z2l0aHViOldvcms6MTIzNDU"
assert response.access_token == "ghp_xxxxx"
assert response.provider == "github"
assert response.integration_type == "github" # backward compat property
assert response.token_type == "Bearer"
assert response.expires_at is None
assert response.scopes == []
@@ -127,19 +131,23 @@ class TestAdenCredentialResponse:
def test_from_dict_full(self):
"""Test creating response with all fields."""
data = {
"integration_id": "hubspot",
"integration_type": "hubspot",
"access_token": "token123",
"token_type": "Bearer",
"expires_at": "2026-01-28T15:30:00Z",
"provider": "hubspot",
"alias": "My HubSpot",
"email": "test@example.com",
"scopes": ["read", "write"],
"metadata": {"key": "value"},
}
response = AdenCredentialResponse.from_dict(data)
response = AdenCredentialResponse.from_dict(data, integration_id="aHVic3BvdDp0ZXN0")
assert response.integration_id == "hubspot"
assert response.integration_id == "aHVic3BvdDp0ZXN0"
assert response.access_token == "token123"
assert response.provider == "hubspot"
assert response.alias == "My HubSpot"
assert response.email == "test@example.com"
assert response.expires_at is not None
assert response.scopes == ["read", "write"]
assert response.metadata == {"key": "value"}
@@ -149,21 +157,44 @@ class TestAdenIntegrationInfo:
"""Tests for AdenIntegrationInfo dataclass."""
def test_from_dict(self):
"""Test creating integration info from dict."""
"""Test creating integration info from real API format."""
data = {
"integration_id": "slack",
"integration_type": "slack",
"integration_id": "c2xhY2s6V29yayBTbGFjazoxMjM0NQ",
"provider": "slack",
"alias": "Work Slack",
"status": "active",
"expires_at": "2026-02-01T00:00:00Z",
"email": "user@example.com",
"expires_at": "2026-02-20T21:46:04.863Z",
}
info = AdenIntegrationInfo.from_dict(data)
assert info.integration_id == "slack"
assert info.integration_type == "slack"
assert info.integration_id == "c2xhY2s6V29yayBTbGFjazoxMjM0NQ"
assert info.provider == "slack"
assert info.integration_type == "slack" # backward compat property
assert info.alias == "Work Slack"
assert info.email == "user@example.com"
assert info.status == "active"
assert info.expires_at is not None
def test_from_dict_minimal(self):
"""Test creating integration info with minimal fields."""
data = {
"integration_id": "Z29vZ2xlOlRpbW90aHk6MTYwNjc",
"provider": "google",
"alias": "Timothy",
"status": "requires_reauth",
}
info = AdenIntegrationInfo.from_dict(data)
assert info.integration_id == "Z29vZ2xlOlRpbW90aHk6MTYwNjc"
assert info.provider == "google"
assert info.alias == "Timothy"
assert info.status == "requires_reauth"
assert info.email == ""
assert info.expires_at is None
# =============================================================================
# AdenSyncProvider Tests
@@ -220,10 +251,11 @@ class TestAdenSyncProvider:
def test_refresh_success(self, provider, mock_client, aden_response):
"""Test successful credential refresh."""
hash_id = "aHVic3BvdDp0ZXN0OjEzNjExOjExNTI1"
mock_client.request_refresh.return_value = aden_response
cred = CredentialObject(
id="hubspot",
id=hash_id,
credential_type=CredentialType.OAUTH2,
keys={
"access_token": CredentialKey(
@@ -239,7 +271,7 @@ class TestAdenSyncProvider:
assert refreshed.keys["access_token"].value.get_secret_value() == "test-access-token"
assert refreshed.keys["_aden_managed"].value.get_secret_value() == "true"
assert refreshed.last_refreshed is not None
mock_client.request_refresh.assert_called_once_with("hubspot")
mock_client.request_refresh.assert_called_once_with(hash_id)
def test_refresh_requires_reauth(self, provider, mock_client):
"""Test refresh that requires re-authorization."""
@@ -339,12 +371,13 @@ class TestAdenSyncProvider:
def test_fetch_from_aden(self, provider, mock_client, aden_response):
"""Test fetching credential from Aden."""
hash_id = "aHVic3BvdDp0ZXN0OjEzNjExOjExNTI1"
mock_client.get_credential.return_value = aden_response
cred = provider.fetch_from_aden("hubspot")
cred = provider.fetch_from_aden(hash_id)
assert cred is not None
assert cred.id == "hubspot"
assert cred.id == hash_id
assert cred.keys["access_token"].value.get_secret_value() == "test-access-token"
assert cred.auto_refresh is True
@@ -360,13 +393,15 @@ class TestAdenSyncProvider:
"""Test syncing all credentials."""
mock_client.list_integrations.return_value = [
AdenIntegrationInfo(
integration_id="hubspot",
integration_type="hubspot",
integration_id="aHVic3BvdDp0ZXN0OjEzNjExOjExNTI1",
provider="hubspot",
alias="My HubSpot",
status="active",
),
AdenIntegrationInfo(
integration_id="github",
integration_type="github",
integration_id="Z2l0aHViOnRlc3Q6OTk5",
provider="github",
alias="Work GitHub",
status="requires_reauth", # Should be skipped
),
]
@@ -376,7 +411,7 @@ class TestAdenSyncProvider:
synced = provider.sync_all(store)
assert synced == 1 # Only active one was synced
assert store.get_credential("hubspot") is not None
assert store.get_credential("aHVic3BvdDp0ZXN0OjEzNjExOjExNTI1") is not None
def test_validate_via_aden(self, provider, mock_client):
"""Test validation via Aden introspection."""
@@ -608,7 +643,7 @@ class TestAdenCachedStorage:
cached_storage.save(cred)
assert cached_storage._provider_index["hubspot"] == "aHVic3BvdDp0ZXN0OjEzNjExOjExNTI1"
assert cached_storage._provider_index["hubspot"] == ["aHVic3BvdDp0ZXN0OjEzNjExOjExNTI1"]
def test_load_by_provider_name(self, cached_storage):
"""Test load resolves provider name to hash-based credential ID."""
@@ -711,8 +746,8 @@ class TestAdenCachedStorage:
indexed = cached_storage.rebuild_provider_index()
assert indexed == 2
assert cached_storage._provider_index["hubspot"] == "hash_hub"
assert cached_storage._provider_index["slack"] == "hash_slack"
assert cached_storage._provider_index["hubspot"] == ["hash_hub"]
assert cached_storage._provider_index["slack"] == ["hash_slack"]
def test_save_without_integration_type_no_index(self, cached_storage):
"""Test save does not index credentials without _integration_type key."""
@@ -743,19 +778,23 @@ class TestAdenIntegration:
def test_full_workflow(self, mock_client, aden_response):
"""Test full workflow: sync, get, refresh."""
hash_id = "aHVic3BvdDp0ZXN0OjEzNjExOjExNTI1"
# Setup
mock_client.list_integrations.return_value = [
AdenIntegrationInfo(
integration_id="hubspot",
integration_type="hubspot",
integration_id=hash_id,
provider="hubspot",
alias="My HubSpot",
status="active",
),
]
mock_client.get_credential.return_value = aden_response
mock_client.request_refresh.return_value = AdenCredentialResponse(
integration_id="hubspot",
integration_type="hubspot",
integration_id=hash_id,
access_token="refreshed-token",
provider="hubspot",
alias="My HubSpot",
expires_at=datetime.now(UTC) + timedelta(hours=2),
scopes=[],
)
@@ -772,8 +811,8 @@ class TestAdenIntegration:
synced = provider.sync_all(store)
assert synced == 1
# Get credential
cred = store.get_credential("hubspot")
# Get credential by hash ID
cred = store.get_credential(hash_id)
assert cred is not None
assert cred.keys["access_token"].value.get_secret_value() == "test-access-token"
+201
View File
@@ -0,0 +1,201 @@
"""
Dedicated file-based storage for bootstrap credentials.
HIVE_CREDENTIAL_KEY -> ~/.hive/secrets/credential_key (plain text, chmod 600)
ADEN_API_KEY -> ~/.hive/credentials/ (encrypted via EncryptedFileStorage)
Boot order:
1. load_credential_key() -- reads/generates the Fernet key, sets os.environ
2. load_aden_api_key() -- uses the encrypted store (which needs the key from step 1)
"""
from __future__ import annotations
import logging
import os
import stat
from pathlib import Path
logger = logging.getLogger(__name__)
CREDENTIAL_KEY_PATH = Path.home() / ".hive" / "secrets" / "credential_key"
CREDENTIAL_KEY_ENV_VAR = "HIVE_CREDENTIAL_KEY"
ADEN_CREDENTIAL_ID = "aden_api_key"
ADEN_ENV_VAR = "ADEN_API_KEY"
# ---------------------------------------------------------------------------
# HIVE_CREDENTIAL_KEY
# ---------------------------------------------------------------------------
def load_credential_key() -> str | None:
"""Load HIVE_CREDENTIAL_KEY with priority: env > file > shell config.
Sets ``os.environ["HIVE_CREDENTIAL_KEY"]`` as a side-effect when found.
Returns the key string, or ``None`` if unavailable everywhere.
"""
# 1. Already in environment (set by parent process, CI, Windows Registry, etc.)
key = os.environ.get(CREDENTIAL_KEY_ENV_VAR)
if key:
return key
# 2. Dedicated secrets file
key = _read_credential_key_file()
if key:
os.environ[CREDENTIAL_KEY_ENV_VAR] = key
return key
# 3. Shell config fallback (backward compat for old installs)
key = _read_from_shell_config(CREDENTIAL_KEY_ENV_VAR)
if key:
os.environ[CREDENTIAL_KEY_ENV_VAR] = key
return key
return None
def save_credential_key(key: str) -> Path:
"""Save HIVE_CREDENTIAL_KEY to ``~/.hive/secrets/credential_key``.
Creates parent dirs with mode 700, writes the file with mode 600.
Also sets ``os.environ["HIVE_CREDENTIAL_KEY"]``.
Returns:
The path that was written.
"""
path = CREDENTIAL_KEY_PATH
path.parent.mkdir(parents=True, exist_ok=True)
# Restrict the secrets directory itself
path.parent.chmod(stat.S_IRWXU) # 0o700
path.write_text(key, encoding="utf-8")
path.chmod(stat.S_IRUSR | stat.S_IWUSR) # 0o600
os.environ[CREDENTIAL_KEY_ENV_VAR] = key
return path
def generate_and_save_credential_key() -> str:
"""Generate a new Fernet key and persist it to ``~/.hive/secrets/credential_key``.
Returns:
The generated key string.
"""
from cryptography.fernet import Fernet
key = Fernet.generate_key().decode()
save_credential_key(key)
return key
# ---------------------------------------------------------------------------
# ADEN_API_KEY
# ---------------------------------------------------------------------------
def load_aden_api_key() -> str | None:
"""Load ADEN_API_KEY with priority: env > encrypted store > shell config.
**Must** be called after ``load_credential_key()`` because the encrypted
store depends on HIVE_CREDENTIAL_KEY.
Sets ``os.environ["ADEN_API_KEY"]`` as a side-effect when found.
Returns the key string, or ``None`` if unavailable everywhere.
"""
# 1. Already in environment
key = os.environ.get(ADEN_ENV_VAR)
if key:
return key
# 2. Encrypted credential store
key = _read_aden_from_encrypted_store()
if key:
os.environ[ADEN_ENV_VAR] = key
return key
# 3. Shell config fallback (backward compat)
key = _read_from_shell_config(ADEN_ENV_VAR)
if key:
os.environ[ADEN_ENV_VAR] = key
return key
return None
def save_aden_api_key(key: str) -> None:
"""Save ADEN_API_KEY to the encrypted credential store.
Also sets ``os.environ["ADEN_API_KEY"]``.
"""
from pydantic import SecretStr
from .models import CredentialKey, CredentialObject
from .storage import EncryptedFileStorage
storage = EncryptedFileStorage()
cred = CredentialObject(
id=ADEN_CREDENTIAL_ID,
keys={"api_key": CredentialKey(name="api_key", value=SecretStr(key))},
)
storage.save(cred)
os.environ[ADEN_ENV_VAR] = key
def delete_aden_api_key() -> None:
"""Remove ADEN_API_KEY from the encrypted store and ``os.environ``."""
try:
from .storage import EncryptedFileStorage
storage = EncryptedFileStorage()
storage.delete(ADEN_CREDENTIAL_ID)
except Exception:
logger.debug("Could not delete %s from encrypted store", ADEN_CREDENTIAL_ID)
os.environ.pop(ADEN_ENV_VAR, None)
# ---------------------------------------------------------------------------
# Internal helpers
# ---------------------------------------------------------------------------
def _read_credential_key_file() -> str | None:
"""Read the credential key from ``~/.hive/secrets/credential_key``."""
try:
if CREDENTIAL_KEY_PATH.is_file():
value = CREDENTIAL_KEY_PATH.read_text(encoding="utf-8").strip()
if value:
return value
except Exception:
logger.debug("Could not read %s", CREDENTIAL_KEY_PATH)
return None
def _read_from_shell_config(env_var: str) -> str | None:
"""Fallback: read an env var from ~/.zshrc or ~/.bashrc."""
try:
from aden_tools.credentials.shell_config import check_env_var_in_shell_config
found, value = check_env_var_in_shell_config(env_var)
if found and value:
return value
except ImportError:
pass
return None
def _read_aden_from_encrypted_store() -> str | None:
"""Try to load ADEN_API_KEY from the encrypted credential store."""
if not os.environ.get(CREDENTIAL_KEY_ENV_VAR):
return None
try:
from .storage import EncryptedFileStorage
storage = EncryptedFileStorage()
cred = storage.load(ADEN_CREDENTIAL_ID)
if cred:
return cred.get_key("api_key")
except Exception:
logger.debug("Could not load %s from encrypted store", ADEN_CREDENTIAL_ID)
return None
@@ -0,0 +1,31 @@
"""
Local credential registry named API key accounts with identity metadata.
Provides feature parity with Aden OAuth credentials for locally-stored API keys:
aliases, identity metadata, status tracking, CRUD, and health validation.
Usage:
from framework.credentials.local import LocalCredentialRegistry, LocalAccountInfo
registry = LocalCredentialRegistry.default()
# Add a named account
info, health = registry.save_account("brave_search", "work", "BSA-xxx")
# List all stored local accounts
for account in registry.list_accounts():
print(f"{account.credential_id}/{account.alias}: {account.status}")
if account.identity.is_known:
print(f" Identity: {account.identity.label}")
# Re-validate a stored account
result = registry.validate_account("github", "personal")
"""
from .models import LocalAccountInfo
from .registry import LocalCredentialRegistry
__all__ = [
"LocalAccountInfo",
"LocalCredentialRegistry",
]
@@ -0,0 +1,58 @@
"""
Data models for the local credential registry.
LocalAccountInfo mirrors AdenIntegrationInfo, giving local API key credentials
the same identity/status metadata as Aden OAuth credentials.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from datetime import datetime
from framework.credentials.models import CredentialIdentity
@dataclass
class LocalAccountInfo:
"""
A locally-stored named credential account.
Mirrors AdenIntegrationInfo so local and Aden accounts can be treated
uniformly in the credential tester and account selection UI.
Attributes:
credential_id: The logical credential name (e.g. "brave_search", "github")
alias: User-provided name for this account (e.g. "work", "personal")
status: "active" | "failed" | "unknown"
identity: Email, username, workspace, or account_id extracted from health check
last_validated: When the key was last verified against the live API
created_at: When this account was first stored
"""
credential_id: str
alias: str
status: str = "unknown"
identity: CredentialIdentity = field(default_factory=CredentialIdentity)
last_validated: datetime | None = None
created_at: datetime = field(default_factory=datetime.utcnow)
@property
def storage_id(self) -> str:
"""The key used in EncryptedFileStorage: '{credential_id}/{alias}'."""
return f"{self.credential_id}/{self.alias}"
def to_account_dict(self) -> dict:
"""
Format compatible with AccountSelectionScreen and configure_for_account().
Same shape as Aden account dicts, with source='local' added.
"""
return {
"provider": self.credential_id,
"alias": self.alias,
"identity": self.identity.to_dict(),
"integration_id": None,
"source": "local",
"status": self.status,
}
@@ -0,0 +1,326 @@
"""
Local Credential Registry.
Manages named local API key accounts stored in EncryptedFileStorage.
Mirrors the Aden integration model so local credentials have feature parity:
aliases, identity metadata, status tracking, CRUD, and health validation.
Storage convention:
{credential_id}/{alias} CredentialObject
e.g. "brave_search/work" { api_key: "BSA-xxx", _alias: "work",
_integration_type: "brave_search",
_status: "active",
_identity_username: "acme", ... }
Usage:
registry = LocalCredentialRegistry.default()
# Add a new account
info, health = registry.save_account("brave_search", "work", "BSA-xxx")
print(info.status, info.identity.label)
# List all accounts
for account in registry.list_accounts():
print(f"{account.credential_id}/{account.alias}: {account.status}")
# Get the raw API key for a specific account
key = registry.get_key("github", "personal")
# Re-validate a stored account
result = registry.validate_account("github", "personal")
"""
from __future__ import annotations
import logging
from datetime import UTC, datetime
from pathlib import Path
from typing import TYPE_CHECKING, Any
from framework.credentials.models import CredentialIdentity, CredentialObject
from framework.credentials.storage import EncryptedFileStorage
from .models import LocalAccountInfo
if TYPE_CHECKING:
from aden_tools.credentials.health_check import HealthCheckResult
logger = logging.getLogger(__name__)
_SEPARATOR = "/"
class LocalCredentialRegistry:
"""
Named local API key account store backed by EncryptedFileStorage.
Provides the same list/save/get/delete/validate surface as the Aden
client, but for locally-stored API keys.
"""
def __init__(self, storage: EncryptedFileStorage) -> None:
self._storage = storage
# ------------------------------------------------------------------
# Listing
# ------------------------------------------------------------------
def list_accounts(self, credential_id: str | None = None) -> list[LocalAccountInfo]:
"""
List all stored local accounts.
Args:
credential_id: If given, filter to this credential type only.
Returns:
List of LocalAccountInfo sorted by credential_id then alias.
"""
all_ids = self._storage.list_all()
accounts: list[LocalAccountInfo] = []
for storage_id in all_ids:
if _SEPARATOR not in storage_id:
continue # Skip legacy un-aliased entries
try:
cred_obj = self._storage.load(storage_id)
except Exception as exc:
logger.debug("Skipping unreadable credential %s: %s", storage_id, exc)
continue
if cred_obj is None:
continue
info = self._to_account_info(cred_obj)
if info is None:
continue
if credential_id and info.credential_id != credential_id:
continue
accounts.append(info)
return sorted(accounts, key=lambda a: (a.credential_id, a.alias))
# ------------------------------------------------------------------
# Save / add
# ------------------------------------------------------------------
def save_account(
self,
credential_id: str,
alias: str,
api_key: str,
run_health_check: bool = True,
extra_keys: dict[str, str] | None = None,
) -> tuple[LocalAccountInfo, HealthCheckResult | None]:
"""
Store a named account, optionally validating it first.
Args:
credential_id: Logical credential name (e.g. "brave_search").
alias: User-chosen name (e.g. "work"). Defaults to "default".
api_key: The raw API key / token value.
run_health_check: If True, verify the key against the live API
and extract identity metadata. Failure still saves with
status="failed" so the user can re-validate later.
extra_keys: Additional key/value pairs to store (e.g.
cse_id for google_custom_search).
Returns:
(LocalAccountInfo, HealthCheckResult | None)
"""
alias = alias or "default"
health_result: HealthCheckResult | None = None
identity: dict[str, str] = {}
status = "active"
if run_health_check:
try:
from aden_tools.credentials.health_check import check_credential_health
kwargs: dict[str, Any] = {}
if extra_keys and "cse_id" in extra_keys:
kwargs["cse_id"] = extra_keys["cse_id"]
health_result = check_credential_health(credential_id, api_key, **kwargs)
status = "active" if health_result.valid else "failed"
identity = health_result.details.get("identity", {})
except Exception as exc:
logger.warning("Health check failed for %s/%s: %s", credential_id, alias, exc)
status = "unknown"
storage_id = f"{credential_id}{_SEPARATOR}{alias}"
now = datetime.now(UTC)
cred_obj = CredentialObject(id=storage_id)
cred_obj.set_key("api_key", api_key)
cred_obj.set_key("_alias", alias)
cred_obj.set_key("_integration_type", credential_id)
cred_obj.set_key("_status", status)
if extra_keys:
for k, v in extra_keys.items():
cred_obj.set_key(k, v)
if identity:
valid_fields = set(CredentialIdentity.model_fields)
filtered = {k: v for k, v in identity.items() if k in valid_fields}
if filtered:
cred_obj.set_identity(**filtered)
cred_obj.last_refreshed = now if run_health_check else None
self._storage.save(cred_obj)
account_info = LocalAccountInfo(
credential_id=credential_id,
alias=alias,
status=status,
identity=cred_obj.identity,
last_validated=cred_obj.last_refreshed,
created_at=cred_obj.created_at,
)
return account_info, health_result
# ------------------------------------------------------------------
# Get
# ------------------------------------------------------------------
def get_account(self, credential_id: str, alias: str) -> CredentialObject | None:
"""Load the raw CredentialObject for a specific account."""
return self._storage.load(f"{credential_id}{_SEPARATOR}{alias}")
def get_key(self, credential_id: str, alias: str, key_name: str = "api_key") -> str | None:
"""
Return the stored secret value for a specific account.
Args:
credential_id: Logical credential name (e.g. "brave_search").
alias: Account alias (e.g. "work").
key_name: Key within the credential (default "api_key").
Returns:
The secret value, or None if not found.
"""
cred = self.get_account(credential_id, alias)
if cred is None:
return None
return cred.get_key(key_name)
def get_account_info(self, credential_id: str, alias: str) -> LocalAccountInfo | None:
"""Load a LocalAccountInfo for a specific account."""
cred = self.get_account(credential_id, alias)
if cred is None:
return None
return self._to_account_info(cred)
# ------------------------------------------------------------------
# Delete
# ------------------------------------------------------------------
def delete_account(self, credential_id: str, alias: str) -> bool:
"""
Remove a stored account.
Returns:
True if the account existed and was deleted, False otherwise.
"""
return self._storage.delete(f"{credential_id}{_SEPARATOR}{alias}")
# ------------------------------------------------------------------
# Validate
# ------------------------------------------------------------------
def validate_account(self, credential_id: str, alias: str) -> HealthCheckResult:
"""
Re-run health check for a stored account and update its status.
Args:
credential_id: Logical credential name.
alias: Account alias.
Returns:
HealthCheckResult from the live API check.
Raises:
KeyError: If the account doesn't exist.
"""
from aden_tools.credentials.health_check import HealthCheckResult, check_credential_health
cred = self.get_account(credential_id, alias)
if cred is None:
raise KeyError(f"No local account found: {credential_id}/{alias}")
api_key = cred.get_key("api_key")
if not api_key:
return HealthCheckResult(valid=False, message="No api_key stored for this account")
try:
kwargs: dict[str, Any] = {}
cse_id = cred.get_key("cse_id")
if cse_id:
kwargs["cse_id"] = cse_id
result = check_credential_health(credential_id, api_key, **kwargs)
except Exception as exc:
result = HealthCheckResult(
valid=False,
message=f"Health check error: {exc}",
details={"error": str(exc)},
)
# Update status and timestamp in-place
new_status = "active" if result.valid else "failed"
cred.set_key("_status", new_status)
cred.last_refreshed = datetime.now(UTC)
# Re-extract identity if available
identity = result.details.get("identity", {})
if identity:
valid_fields = set(CredentialIdentity.model_fields)
filtered = {k: v for k, v in identity.items() if k in valid_fields}
if filtered:
cred.set_identity(**filtered)
self._storage.save(cred)
return result
# ------------------------------------------------------------------
# Factory
# ------------------------------------------------------------------
@classmethod
def default(cls) -> LocalCredentialRegistry:
"""Create a registry using the default encrypted storage at ~/.hive/credentials."""
return cls(EncryptedFileStorage())
@classmethod
def at_path(cls, path: str | Path) -> LocalCredentialRegistry:
"""Create a registry using a custom storage path."""
return cls(EncryptedFileStorage(base_path=path))
# ------------------------------------------------------------------
# Internals
# ------------------------------------------------------------------
def _to_account_info(self, cred_obj: CredentialObject) -> LocalAccountInfo | None:
"""Build LocalAccountInfo from a CredentialObject."""
cred_type_key = cred_obj.keys.get("_integration_type")
if cred_type_key is None:
return None
cred_id = cred_type_key.get_secret_value()
alias_key = cred_obj.keys.get("_alias")
alias = alias_key.get_secret_value() if alias_key else cred_obj.id.split(_SEPARATOR, 1)[-1]
status_key = cred_obj.keys.get("_status")
status = status_key.get_secret_value() if status_key else "unknown"
return LocalAccountInfo(
credential_id=cred_id,
alias=alias,
status=status,
identity=cred_obj.identity,
last_validated=cred_obj.last_refreshed,
created_at=cred_obj.created_at,
)
+52
View File
@@ -70,6 +70,29 @@ class CredentialKey(BaseModel):
return self.value.get_secret_value()
class CredentialIdentity(BaseModel):
"""Identity information for a credential (whose account is this?)."""
email: str | None = None
username: str | None = None
workspace: str | None = None
account_id: str | None = None
@property
def label(self) -> str:
"""Best human-readable identifier for display."""
return self.email or self.username or self.workspace or self.account_id or "unknown"
@property
def is_known(self) -> bool:
"""Whether any identity field is populated."""
return bool(self.email or self.username or self.workspace or self.account_id)
def to_dict(self) -> dict[str, str]:
"""Return only non-None identity fields."""
return {k: v for k, v in self.model_dump().items() if v is not None}
class CredentialObject(BaseModel):
"""
A credential object containing one or more keys.
@@ -202,6 +225,35 @@ class CredentialObject(BaseModel):
return None
@property
def identity(self) -> CredentialIdentity:
"""Extract identity from ``_identity_*`` keys in the vault."""
fields = {}
for key_name, key_obj in self.keys.items():
if key_name.startswith("_identity_"):
field_name = key_name[len("_identity_") :]
if field_name in CredentialIdentity.model_fields:
fields[field_name] = key_obj.value.get_secret_value()
return CredentialIdentity(**fields)
@property
def provider_type(self) -> str | None:
"""Return the integration/provider type (e.g. 'google', 'slack')."""
key = self.keys.get("_integration_type")
return key.value.get_secret_value() if key else None
@property
def alias(self) -> str | None:
"""Return the user-set alias from the Aden platform."""
key = self.keys.get("_alias")
return key.value.get_secret_value() if key else None
def set_identity(self, **fields: str) -> None:
"""Persist identity fields as ``_identity_*`` keys."""
for field_name, value in fields.items():
if value:
self.set_key(f"_identity_{field_name}", value)
class CredentialUsageSpec(BaseModel):
"""
@@ -73,6 +73,7 @@ from .provider import (
TokenExpiredError,
TokenPlacement,
)
from .zoho_provider import ZohoOAuth2Provider
__all__ = [
# Types
@@ -82,6 +83,7 @@ __all__ = [
# Providers
"BaseOAuth2Provider",
"HubSpotOAuth2Provider",
"ZohoOAuth2Provider",
# Lifecycle
"TokenLifecycleManager",
"TokenRefreshResult",
@@ -0,0 +1,198 @@
"""
Zoho CRM-specific OAuth2 provider.
Pre-configured for Zoho's OAuth2 endpoints and CRM scopes.
Extends BaseOAuth2Provider for Zoho-specific behavior.
Usage:
provider = ZohoOAuth2Provider(
client_id="your-client-id",
client_secret="your-client-secret",
accounts_domain="https://accounts.zoho.com", # or .in, .eu, etc.
)
# Use with credential store
store = CredentialStore(
storage=EncryptedFileStorage(),
providers=[provider],
)
See: https://www.zoho.com/crm/developer/docs/api/v2/access-refresh.html
"""
from __future__ import annotations
import logging
import os
from typing import Any
from ..models import CredentialObject, CredentialRefreshError, CredentialType
from .base_provider import BaseOAuth2Provider
from .provider import OAuth2Config, OAuth2Token, TokenPlacement
logger = logging.getLogger(__name__)
# Default CRM scopes for Phase 1 (Leads, Contacts, Accounts, Deals, Notes)
ZOHO_DEFAULT_SCOPES = [
"ZohoCRM.modules.leads.ALL",
"ZohoCRM.modules.contacts.ALL",
"ZohoCRM.modules.accounts.ALL",
"ZohoCRM.modules.deals.ALL",
"ZohoCRM.modules.notes.CREATE",
]
class ZohoOAuth2Provider(BaseOAuth2Provider):
"""
Zoho CRM OAuth2 provider with pre-configured endpoints.
Handles Zoho-specific OAuth2 behavior:
- Pre-configured token and authorization URLs (region-aware)
- Default CRM scopes for Leads, Contacts, Accounts, Deals, Notes
- Token validation via Zoho CRM API
- Authorization header format: "Authorization: Zoho-oauthtoken {token}"
Example:
provider = ZohoOAuth2Provider(
client_id="your-zoho-client-id",
client_secret="your-zoho-client-secret",
accounts_domain="https://accounts.zoho.com", # US
# or "https://accounts.zoho.in" for India
# or "https://accounts.zoho.eu" for EU
)
"""
def __init__(
self,
client_id: str,
client_secret: str,
accounts_domain: str = "https://accounts.zoho.com",
api_domain: str | None = None,
scopes: list[str] | None = None,
):
"""
Initialize Zoho OAuth2 provider.
Args:
client_id: Zoho OAuth2 client ID
client_secret: Zoho OAuth2 client secret
accounts_domain: Zoho accounts domain (region-specific)
- US: https://accounts.zoho.com
- India: https://accounts.zoho.in
- EU: https://accounts.zoho.eu
- etc.
api_domain: Zoho API domain for CRM calls (used in validate).
Defaults to ZOHO_API_DOMAIN env or https://www.zohoapis.com
scopes: Override default scopes if needed
"""
base = accounts_domain.rstrip("/")
token_url = f"{base}/oauth/v2/token"
auth_url = f"{base}/oauth/v2/auth"
config = OAuth2Config(
token_url=token_url,
authorization_url=auth_url,
client_id=client_id,
client_secret=client_secret,
default_scopes=scopes or ZOHO_DEFAULT_SCOPES,
token_placement=TokenPlacement.HEADER_CUSTOM,
custom_header_name="Authorization",
)
super().__init__(config, provider_id="zoho_crm_oauth2")
self._accounts_domain = base
self._api_domain = (
api_domain or os.getenv("ZOHO_API_DOMAIN", "https://www.zohoapis.com")
).rstrip("/")
@property
def supported_types(self) -> list[CredentialType]:
return [CredentialType.OAUTH2]
def format_for_request(self, token: OAuth2Token) -> dict[str, Any]:
"""
Format token for Zoho CRM API requests.
Zoho uses Authorization header: "Zoho-oauthtoken {access_token}"
(not Bearer).
"""
return {
"headers": {
"Authorization": f"Zoho-oauthtoken {token.access_token}",
"Content-Type": "application/json",
"Accept": "application/json",
}
}
def validate(self, credential: CredentialObject) -> bool:
"""
Validate Zoho credential by making a lightweight API call.
Uses GET /crm/v2/users?type=CurrentUser (doesn't require module access).
Treats 429 as valid-but-rate-limited.
"""
access_token = credential.get_key("access_token")
if not access_token:
return False
try:
client = self._get_client()
response = client.get(
f"{self._api_domain}/crm/v2/users?type=CurrentUser",
headers={
"Authorization": f"Zoho-oauthtoken {access_token}",
"Accept": "application/json",
},
timeout=self.config.request_timeout,
)
return response.status_code in (200, 429)
except Exception as e:
logger.debug("Zoho credential validation failed: %s", e)
return False
def _parse_token_response(self, response_data: dict[str, Any]) -> OAuth2Token:
"""
Parse Zoho token response.
Zoho returns:
{
"access_token": "...",
"refresh_token": "...",
"expires_in": 3600,
"api_domain": "https://www.zohoapis.com",
"token_type": "Bearer"
}
"""
token = OAuth2Token.from_token_response(response_data)
if "api_domain" in response_data:
token.raw_response["api_domain"] = response_data["api_domain"]
return token
def refresh(self, credential: CredentialObject) -> CredentialObject:
"""Refresh Zoho OAuth2 credential and persist DC metadata."""
refresh_tok = credential.get_key("refresh_token")
if not refresh_tok:
raise CredentialRefreshError(f"Credential '{credential.id}' has no refresh_token")
try:
new_token = self.refresh_access_token(refresh_tok)
except Exception as e:
raise CredentialRefreshError(f"Failed to refresh '{credential.id}': {e}") from e
credential.set_key("access_token", new_token.access_token, expires_at=new_token.expires_at)
if new_token.refresh_token and new_token.refresh_token != refresh_tok:
credential.set_key("refresh_token", new_token.refresh_token)
api_domain = new_token.raw_response.get("api_domain")
if isinstance(api_domain, str) and api_domain:
credential.set_key("api_domain", api_domain.rstrip("/"))
accounts_server = new_token.raw_response.get("accounts-server")
if isinstance(accounts_server, str) and accounts_server:
credential.set_key("accounts_domain", accounts_server.rstrip("/"))
location = new_token.raw_response.get("location")
if isinstance(location, str) and location:
credential.set_key("location", location.strip().lower())
return credential
+618
View File
@@ -0,0 +1,618 @@
"""
Interactive credential setup for CLI applications.
Provides a modular, reusable credential setup flow that can be triggered
when validate_agent_credentials() fails. Works with both TUI and headless CLIs.
Usage:
from framework.credentials.setup import CredentialSetupSession
# From agent path
session = CredentialSetupSession.from_agent_path("exports/my-agent")
result = session.run_interactive()
# From nodes directly
session = CredentialSetupSession.from_nodes(nodes)
result = session.run_interactive()
# With custom I/O (for integration with other UIs)
session = CredentialSetupSession(
missing=missing_creds,
input_fn=my_input,
print_fn=my_print,
)
"""
from __future__ import annotations
import getpass
import json
import os
import sys
from collections.abc import Callable
from dataclasses import dataclass, field
from pathlib import Path
from typing import TYPE_CHECKING, Any
if TYPE_CHECKING:
from framework.graph import NodeSpec
# ANSI colors for terminal output
class Colors:
RED = "\033[0;31m"
GREEN = "\033[0;32m"
YELLOW = "\033[1;33m"
BLUE = "\033[0;34m"
CYAN = "\033[0;36m"
BOLD = "\033[1m"
DIM = "\033[2m"
NC = "\033[0m" # No Color
@classmethod
def disable(cls):
"""Disable colors (for non-TTY output)."""
cls.RED = cls.GREEN = cls.YELLOW = cls.BLUE = ""
cls.CYAN = cls.BOLD = cls.DIM = cls.NC = ""
@dataclass
class MissingCredential:
"""A credential that needs to be configured."""
credential_name: str
"""Internal credential name (e.g., 'brave_search')"""
env_var: str
"""Environment variable name (e.g., 'BRAVE_SEARCH_API_KEY')"""
description: str
"""Human-readable description"""
help_url: str
"""URL where user can obtain credential"""
api_key_instructions: str
"""Step-by-step instructions for getting API key"""
tools: list[str] = field(default_factory=list)
"""Tools that require this credential"""
node_types: list[str] = field(default_factory=list)
"""Node types that require this credential"""
aden_supported: bool = False
"""Whether Aden OAuth flow is supported"""
direct_api_key_supported: bool = True
"""Whether direct API key entry is supported"""
credential_id: str = ""
"""Credential store ID"""
credential_key: str = "api_key"
"""Key name within the credential"""
@dataclass
class SetupResult:
"""Result of credential setup session."""
success: bool
"""Whether all required credentials were configured"""
configured: list[str] = field(default_factory=list)
"""Credentials that were successfully set up"""
skipped: list[str] = field(default_factory=list)
"""Credentials user chose to skip"""
errors: list[str] = field(default_factory=list)
"""Any errors encountered"""
class CredentialSetupSession:
"""
Interactive credential setup session.
Can be used by any CLI (runner, coding agent, etc.) to guide users
through credential configuration when validation fails.
Example:
from framework.credentials.setup import CredentialSetupSession
from framework.credentials.models import CredentialError
try:
validate_agent_credentials(nodes)
except CredentialError:
session = CredentialSetupSession.from_nodes(nodes)
result = session.run_interactive()
if result.success:
# Retry - credentials are now configured
validate_agent_credentials(nodes)
"""
def __init__(
self,
missing: list[MissingCredential],
input_fn: Callable[[str], str] | None = None,
print_fn: Callable[[str], None] | None = None,
password_fn: Callable[[str], str] | None = None,
):
"""
Initialize the setup session.
Args:
missing: List of credentials that need setup
input_fn: Custom input function (default: built-in input)
print_fn: Custom print function (default: built-in print)
password_fn: Custom password input function (default: getpass.getpass)
"""
self.missing = missing
self.input_fn = input_fn or input
self.print_fn = print_fn or print
self.password_fn = password_fn or getpass.getpass
# Disable colors if not a TTY
if not sys.stdout.isatty():
Colors.disable()
@classmethod
def from_nodes(cls, nodes: list[NodeSpec]) -> CredentialSetupSession:
"""Create a setup session by detecting missing credentials from nodes."""
from framework.credentials.validation import _status_to_missing, validate_agent_credentials
result = validate_agent_credentials(nodes, verify=False, raise_on_error=False)
missing = [_status_to_missing(c) for c in result.credentials if not c.available]
return cls(missing)
@classmethod
def from_agent_path(
cls,
agent_path: str | Path,
*,
missing_only: bool = True,
) -> CredentialSetupSession:
"""Create a setup session for an agent by path.
Args:
agent_path: Path to agent folder.
missing_only: If True (default), only include credentials that
are NOT yet available. If False, include all required
credentials regardless of availability.
"""
from framework.credentials.validation import _status_to_missing, validate_agent_credentials
nodes = load_agent_nodes(agent_path)
result = validate_agent_credentials(nodes, verify=False, raise_on_error=False)
if missing_only:
missing = [_status_to_missing(c) for c in result.credentials if not c.available]
else:
missing = [_status_to_missing(c) for c in result.credentials]
return cls(missing)
def run_interactive(self) -> SetupResult:
"""Run the interactive setup flow."""
configured: list[str] = []
skipped: list[str] = []
errors: list[str] = []
if not self.missing:
self._print(f"\n{Colors.GREEN}✓ All credentials are already configured!{Colors.NC}\n")
return SetupResult(success=True)
self._print_header()
# Ensure HIVE_CREDENTIAL_KEY is set before storing anything
if not self._ensure_credential_key():
return SetupResult(
success=False,
errors=["Failed to initialize credential store encryption key"],
)
for cred in self.missing:
try:
result = self._setup_single_credential(cred)
if result:
configured.append(cred.credential_name)
else:
skipped.append(cred.credential_name)
except KeyboardInterrupt:
self._print(f"\n{Colors.YELLOW}Setup interrupted.{Colors.NC}")
skipped.append(cred.credential_name)
break
except Exception as e:
errors.append(f"{cred.credential_name}: {e}")
self._print_summary(configured, skipped, errors)
return SetupResult(
success=len(errors) == 0 and len(skipped) == 0,
configured=configured,
skipped=skipped,
errors=errors,
)
def _print(self, msg: str) -> None:
"""Print a message."""
self.print_fn(msg)
def _input(self, prompt: str) -> str:
"""Get input from user."""
return self.input_fn(prompt)
def _print_header(self) -> None:
"""Print the setup header."""
self._print("")
self._print(f"{Colors.YELLOW}{'=' * 60}{Colors.NC}")
self._print(f"{Colors.BOLD} CREDENTIAL SETUP{Colors.NC}")
self._print(f"{Colors.YELLOW}{'=' * 60}{Colors.NC}")
self._print("")
self._print(f" {len(self.missing)} credential(s) need to be configured:")
for cred in self.missing:
affected = cred.tools or cred.node_types
self._print(f"{cred.env_var} ({', '.join(affected)})")
self._print("")
def _ensure_credential_key(self) -> bool:
"""Ensure HIVE_CREDENTIAL_KEY is available for encrypted storage."""
from .key_storage import generate_and_save_credential_key, load_credential_key
if load_credential_key():
return True
# Generate a new key
self._print(f"{Colors.YELLOW}Initializing credential store...{Colors.NC}")
try:
generate_and_save_credential_key()
self._print(
f"{Colors.GREEN}✓ Encryption key saved to ~/.hive/secrets/credential_key{Colors.NC}"
)
return True
except Exception as e:
self._print(f"{Colors.RED}Failed to initialize credential store: {e}{Colors.NC}")
return False
def _setup_single_credential(self, cred: MissingCredential) -> bool:
"""Set up a single credential. Returns True if configured."""
self._print(f"\n{Colors.CYAN}{'' * 60}{Colors.NC}")
self._print(f"{Colors.BOLD}Setting up: {cred.credential_name}{Colors.NC}")
affected = cred.tools or cred.node_types
self._print(f"{Colors.DIM}Required for: {', '.join(affected)}{Colors.NC}")
if cred.description:
self._print(f"{Colors.DIM}{cred.description}{Colors.NC}")
self._print(f"{Colors.CYAN}{'' * 60}{Colors.NC}")
# Show auth options
options = self._get_auth_options(cred)
choice = self._prompt_choice(options)
if choice == "skip":
return False
elif choice == "aden":
return self._setup_via_aden(cred)
elif choice == "direct":
return self._setup_direct_api_key(cred)
return False
def _get_auth_options(self, cred: MissingCredential) -> list[tuple[str, str, str]]:
"""Get available auth options as (key, label, description) tuples."""
options = []
if cred.direct_api_key_supported:
options.append(
(
"direct",
"Enter API key directly",
"Paste your API key from the provider's dashboard",
)
)
if cred.aden_supported:
options.append(
(
"aden",
"Use Aden Platform (OAuth)",
"Secure OAuth2 flow via hive.adenhq.com",
)
)
options.append(
(
"skip",
"Skip for now",
"Configure this credential later",
)
)
return options
def _prompt_choice(self, options: list[tuple[str, str, str]]) -> str:
"""Prompt user to choose from options."""
self._print("")
for i, (key, label, desc) in enumerate(options, 1):
if key == "skip":
self._print(f" {Colors.DIM}{i}) {label}{Colors.NC}")
else:
self._print(f" {Colors.CYAN}{i}){Colors.NC} {label}")
self._print(f" {Colors.DIM}{desc}{Colors.NC}")
self._print("")
while True:
try:
choice_str = self._input(f"Select option (1-{len(options)}): ").strip()
if not choice_str:
continue
choice_num = int(choice_str)
if 1 <= choice_num <= len(options):
return options[choice_num - 1][0]
except ValueError:
pass
self._print(f"{Colors.RED}Invalid choice. Enter 1-{len(options)}{Colors.NC}")
def _setup_direct_api_key(self, cred: MissingCredential) -> bool:
"""Guide user through direct API key setup."""
# Show instructions
if cred.api_key_instructions:
self._print(f"\n{Colors.BOLD}Setup Instructions:{Colors.NC}")
self._print(cred.api_key_instructions)
if cred.help_url:
self._print(f"\n{Colors.CYAN}Get your API key at:{Colors.NC} {cred.help_url}")
# Collect key (use password input to hide the value)
self._print("")
try:
api_key = self.password_fn(f"Paste your {cred.env_var}: ").strip()
except Exception:
# Fallback to regular input if password input fails
api_key = self._input(f"Paste your {cred.env_var}: ").strip()
if not api_key:
self._print(f"{Colors.YELLOW}No value entered. Skipping.{Colors.NC}")
return False
# Health check
health_result = self._run_health_check(cred, api_key)
if health_result is not None:
if health_result["valid"]:
self._print(f"{Colors.GREEN}{health_result['message']}{Colors.NC}")
else:
self._print(f"{Colors.YELLOW}{health_result['message']}{Colors.NC}")
confirm = self._input("Continue anyway? [y/N]: ").strip().lower()
if confirm != "y":
return False
# Store credential
self._store_credential(cred, api_key)
return True
def _setup_via_aden(self, cred: MissingCredential) -> bool:
"""Guide user through Aden OAuth flow."""
self._print(f"\n{Colors.BOLD}Aden Platform Setup{Colors.NC}")
self._print("This will sync credentials from your Aden account.")
self._print("")
# Check for ADEN_API_KEY
aden_key = os.environ.get("ADEN_API_KEY")
if not aden_key:
self._print("You need an Aden API key to use this method.")
self._print(f"{Colors.CYAN}Get one at:{Colors.NC} https://hive.adenhq.com")
self._print("")
try:
aden_key = self.password_fn("Paste your ADEN_API_KEY: ").strip()
except Exception:
aden_key = self._input("Paste your ADEN_API_KEY: ").strip()
if not aden_key:
self._print(f"{Colors.YELLOW}No key entered. Skipping.{Colors.NC}")
return False
# Persist to encrypted store and set os.environ
from .key_storage import save_aden_api_key
save_aden_api_key(aden_key)
# Sync from Aden
try:
from framework.credentials import CredentialStore
store = CredentialStore.with_aden_sync(
base_url="https://api.adenhq.com",
auto_sync=True,
)
# Check if the credential was synced
cred_id = cred.credential_id or cred.credential_name
if store.is_available(cred_id):
self._print(f"{Colors.GREEN}{cred.credential_name} synced from Aden{Colors.NC}")
# Export to current session
try:
value = store.get_key(cred_id, cred.credential_key)
if value:
os.environ[cred.env_var] = value
except Exception:
pass
return True
else:
self._print(
f"{Colors.YELLOW}{cred.credential_name} not found in Aden account.{Colors.NC}"
)
self._print("Please connect this integration on https://hive.adenhq.com first.")
return False
except Exception as e:
self._print(f"{Colors.RED}Failed to sync from Aden: {e}{Colors.NC}")
return False
def _run_health_check(self, cred: MissingCredential, value: str) -> dict[str, Any] | None:
"""Run health check on credential value."""
try:
from aden_tools.credentials import check_credential_health
result = check_credential_health(cred.credential_name, value)
return {
"valid": result.valid,
"message": result.message,
"details": result.details,
}
except Exception:
# No health checker available
return None
def _store_credential(self, cred: MissingCredential, value: str) -> None:
"""Store credential in encrypted store and export to env."""
from pydantic import SecretStr
from framework.credentials import CredentialKey, CredentialObject, CredentialStore
try:
store = CredentialStore.with_encrypted_storage()
cred_id = cred.credential_id or cred.credential_name
key_name = cred.credential_key or "api_key"
cred_obj = CredentialObject(
id=cred_id,
name=cred.description or cred.credential_name,
keys={key_name: CredentialKey(name=key_name, value=SecretStr(value))},
)
store.save_credential(cred_obj)
self._print(f"{Colors.GREEN}✓ Stored in ~/.hive/credentials/{Colors.NC}")
except Exception as e:
self._print(f"{Colors.YELLOW}⚠ Could not store in credential store: {e}{Colors.NC}")
# Export to current session
os.environ[cred.env_var] = value
self._print(f"{Colors.GREEN}✓ Exported to current session{Colors.NC}")
def _print_summary(self, configured: list[str], skipped: list[str], errors: list[str]) -> None:
"""Print final summary."""
self._print("")
self._print(f"{Colors.YELLOW}{'=' * 60}{Colors.NC}")
self._print(f"{Colors.BOLD} SETUP COMPLETE{Colors.NC}")
self._print(f"{Colors.YELLOW}{'=' * 60}{Colors.NC}")
if configured:
self._print(f"\n{Colors.GREEN}✓ Configured:{Colors.NC}")
for name in configured:
self._print(f"{name}")
if skipped:
self._print(f"\n{Colors.YELLOW}⏭ Skipped:{Colors.NC}")
for name in skipped:
self._print(f"{name}")
if errors:
self._print(f"\n{Colors.RED}✗ Errors:{Colors.NC}")
for err in errors:
self._print(f"{err}")
if not skipped and not errors:
self._print(f"\n{Colors.GREEN}All credentials configured successfully!{Colors.NC}")
elif skipped:
self._print(f"\n{Colors.YELLOW}Note: Skipped credentials must be configured ")
self._print(f"before running the agent.{Colors.NC}")
self._print("")
def load_agent_nodes(agent_path: str | Path) -> list:
"""Load NodeSpec list from an agent's agent.py or agent.json.
Args:
agent_path: Path to agent directory.
Returns:
List of NodeSpec objects (empty list if agent can't be loaded).
"""
agent_path = Path(agent_path)
agent_py = agent_path / "agent.py"
agent_json = agent_path / "agent.json"
if agent_py.exists():
return _load_nodes_from_python_agent(agent_path)
elif agent_json.exists():
return _load_nodes_from_json_agent(agent_json)
return []
def _load_nodes_from_python_agent(agent_path: Path) -> list:
"""Load nodes from a Python-based agent."""
import importlib.util
agent_py = agent_path / "agent.py"
if not agent_py.exists():
return []
try:
# Add agent path and its parent to sys.path so imports work
paths_to_add = [str(agent_path), str(agent_path.parent)]
for p in paths_to_add:
if p not in sys.path:
sys.path.insert(0, p)
spec = importlib.util.spec_from_file_location(
f"{agent_path.name}.agent",
agent_py,
submodule_search_locations=[str(agent_path)],
)
module = importlib.util.module_from_spec(spec)
sys.modules[spec.name] = module
spec.loader.exec_module(module)
return getattr(module, "nodes", [])
except Exception:
return []
def _load_nodes_from_json_agent(agent_json: Path) -> list:
"""Load nodes from a JSON-based agent."""
try:
with open(agent_json, encoding="utf-8") as f:
data = json.load(f)
from framework.graph import NodeSpec
nodes_data = data.get("graph", {}).get("nodes", [])
nodes = []
for node_data in nodes_data:
nodes.append(
NodeSpec(
id=node_data.get("id", ""),
name=node_data.get("name", ""),
description=node_data.get("description", ""),
node_type=node_data.get("node_type", ""),
tools=node_data.get("tools", []),
input_keys=node_data.get("input_keys", []),
output_keys=node_data.get("output_keys", []),
)
)
return nodes
except Exception:
return []
def run_credential_setup_cli(agent_path: str | Path | None = None) -> int:
"""
Standalone CLI entry point for credential setup.
Can be called from:
- `hive setup-credentials <agent>`
- After CredentialError in runner CLI
- From coding agent CLI
Args:
agent_path: Optional path to agent directory
Returns:
Exit code (0 = success, 1 = failure/skipped)
"""
if agent_path:
session = CredentialSetupSession.from_agent_path(agent_path)
else:
# No agent specified - detect from current context or show error
print("Usage: hive setup-credentials <agent_path>")
return 1
result = session.run_interactive()
return 0 if result.success else 1
+3 -3
View File
@@ -227,7 +227,7 @@ class EncryptedFileStorage(CredentialStorage):
index_path = self.base_path / "metadata" / "index.json"
if not index_path.exists():
return []
with open(index_path) as f:
with open(index_path, encoding="utf-8") as f:
index = json.load(f)
return list(index.get("credentials", {}).keys())
@@ -268,7 +268,7 @@ class EncryptedFileStorage(CredentialStorage):
index_path = self.base_path / "metadata" / "index.json"
if index_path.exists():
with open(index_path) as f:
with open(index_path, encoding="utf-8") as f:
index = json.load(f)
else:
index = {"credentials": {}, "version": "1.0"}
@@ -283,7 +283,7 @@ class EncryptedFileStorage(CredentialStorage):
index["last_modified"] = datetime.now(UTC).isoformat()
with open(index_path, "w") as f:
with open(index_path, "w", encoding="utf-8") as f:
json.dump(index, f, indent=2)
+57
View File
@@ -362,6 +362,59 @@ class CredentialStore:
"""
return self._storage.list_all()
def list_accounts(self, provider_name: str) -> list[dict[str, Any]]:
"""List all accounts for a provider type with their identities.
Args:
provider_name: Provider type name (e.g. "google", "slack").
Returns:
List of dicts with credential_id, provider, alias, identity, label.
"""
if hasattr(self._storage, "load_all_for_provider"):
creds = self._storage.load_all_for_provider(provider_name)
else:
cred = self.get_credential(provider_name)
creds = [cred] if cred else []
return [
{
"credential_id": c.id,
"provider": provider_name,
"alias": c.alias,
"identity": c.identity.to_dict(),
}
for c in creds
]
def get_credential_by_alias(self, provider_name: str, alias: str) -> CredentialObject | None:
"""Find a credential by provider name and alias.
Args:
provider_name: Provider type name (e.g. "google").
alias: User-set alias from the Aden platform.
Returns:
CredentialObject if found, None otherwise.
"""
# LLMs sometimes pass "provider/alias" as the alias (e.g. "google/wrok"
# instead of just "wrok"). Strip the provider prefix when present.
if alias.startswith(f"{provider_name}/"):
alias = alias[len(provider_name) + 1 :]
if hasattr(self._storage, "load_by_alias"):
return self._storage.load_by_alias(provider_name, alias)
# Scan fallback for storage backends without alias index
if hasattr(self._storage, "load_all_for_provider"):
for cred in self._storage.load_all_for_provider(provider_name):
if cred.alias == alias:
return cred
return None
def get_credential_by_identity(self, provider_name: str, label: str) -> CredentialObject | None:
"""Alias for get_credential_by_alias (backward compat)."""
return self.get_credential_by_alias(provider_name, label)
def is_available(self, credential_id: str) -> bool:
"""
Check if a credential is available.
@@ -374,6 +427,10 @@ class CredentialStore:
"""
return self.get_credential(credential_id, refresh_if_needed=False) is not None
def exists(self, credential_id: str) -> bool:
"""Check if a credential exists in storage without triggering provider fetches."""
return self._storage.exists(credential_id)
# --- Validation ---
def validate_for_usage(self, credential_id: str) -> list[str]:
+460 -75
View File
@@ -8,103 +8,398 @@ from __future__ import annotations
import logging
import os
from dataclasses import dataclass
logger = logging.getLogger(__name__)
def ensure_credential_key_env() -> None:
"""Load HIVE_CREDENTIAL_KEY from shell config if not already in environment.
"""Load bootstrap credentials into ``os.environ``.
The setup-credentials skill writes the encryption key to ~/.zshrc or ~/.bashrc.
If the user hasn't sourced their config in the current shell, this reads it
directly so the runner (and any MCP subprocesses it spawns) can unlock the
encrypted credential store.
Priority chain for each credential:
1. ``os.environ`` (already set nothing to do)
2. Dedicated file storage (``~/.hive/secrets/`` or encrypted store)
3. Shell config fallback (``~/.zshrc`` / ``~/.bashrc``) for backward compat
Only HIVE_CREDENTIAL_KEY is loaded this way all other secrets (API keys, etc.)
come from the credential store itself.
Boot order matters: HIVE_CREDENTIAL_KEY must load BEFORE ADEN_API_KEY
because the encrypted store depends on it.
Remaining LLM/tool API keys still load from shell config.
"""
if os.environ.get("HIVE_CREDENTIAL_KEY"):
return
from .key_storage import load_aden_api_key, load_credential_key
# Step 1: HIVE_CREDENTIAL_KEY (must come first — encrypted store depends on it)
load_credential_key()
# Step 2: ADEN_API_KEY (uses encrypted store, then shell config fallback)
load_aden_api_key()
# Step 3: Load remaining LLM/tool API keys from shell config
try:
from aden_tools.credentials.shell_config import check_env_var_in_shell_config
found, value = check_env_var_in_shell_config("HIVE_CREDENTIAL_KEY")
if found and value:
os.environ["HIVE_CREDENTIAL_KEY"] = value
logger.debug("Loaded HIVE_CREDENTIAL_KEY from shell config")
except ImportError:
pass
def validate_agent_credentials(nodes: list) -> None:
"""Check that required credentials are available before running an agent.
Scans node specs for required tools and node types, then checks whether
the corresponding credentials exist in the credential store.
Raises CredentialError with actionable guidance if any are missing.
Args:
nodes: List of NodeSpec objects from the agent graph.
"""
required_tools: set[str] = set()
for node in nodes:
if node.tools:
required_tools.update(node.tools)
node_types: set[str] = {node.node_type for node in nodes}
return
try:
from aden_tools.credentials import CREDENTIAL_SPECS
from framework.credentials import CredentialStore
from framework.credentials.storage import (
CompositeStorage,
EncryptedFileStorage,
EnvVarStorage,
)
for spec in CREDENTIAL_SPECS.values():
var_name = spec.env_var
if var_name and var_name not in ("HIVE_CREDENTIAL_KEY", "ADEN_API_KEY"):
if not os.environ.get(var_name):
found, value = check_env_var_in_shell_config(var_name)
if found and value:
os.environ[var_name] = value
logger.debug("Loaded %s from shell config", var_name)
except ImportError:
return # aden_tools not installed, skip check
pass
@dataclass
class CredentialStatus:
"""Status of a single required credential after validation."""
credential_name: str
credential_id: str
env_var: str
description: str
help_url: str
api_key_instructions: str
tools: list[str]
node_types: list[str]
available: bool
valid: bool | None # None = not checked
validation_message: str | None
aden_supported: bool
direct_api_key_supported: bool
credential_key: str
aden_not_connected: bool # Aden-only cred, ADEN_API_KEY set, but integration missing
alternative_group: str | None = None # non-None when multiple providers can satisfy a tool
@dataclass
class CredentialValidationResult:
"""Result of validating all credentials required by an agent."""
credentials: list[CredentialStatus]
has_aden_key: bool
@property
def failed(self) -> list[CredentialStatus]:
"""Credentials that are missing, invalid, or Aden-not-connected.
For alternative groups (multi-provider tools like send_email), the group
is satisfied if ANY member is available and valid only report failures
when the entire group is unsatisfied.
"""
# Check which alternative groups are satisfied
alt_satisfied: dict[str, bool] = {}
for c in self.credentials:
if not c.alternative_group:
continue
if c.alternative_group not in alt_satisfied:
alt_satisfied[c.alternative_group] = False
if c.available and c.valid is not False:
alt_satisfied[c.alternative_group] = True
result = []
for c in self.credentials:
if c.alternative_group:
# Skip if any alternative in the group is satisfied
if alt_satisfied.get(c.alternative_group, False):
continue
if not c.available or c.valid is False:
result.append(c)
else:
if not c.available or c.valid is False:
result.append(c)
return result
@property
def has_errors(self) -> bool:
return bool(self.failed)
@property
def failed_cred_names(self) -> list[str]:
"""Credential names that need (re-)collection, excluding Aden-not-connected."""
return [c.credential_name for c in self.failed if not c.aden_not_connected]
def format_error_message(self) -> str:
"""Format a human-readable error message for CLI/runner output."""
missing = [c for c in self.credentials if not c.available and not c.aden_not_connected]
invalid = [c for c in self.credentials if c.available and c.valid is False]
aden_nc = [c for c in self.credentials if c.aden_not_connected]
lines: list[str] = []
if missing:
lines.append("Missing credentials:\n")
for c in missing:
entry = f" {c.env_var} for {_label(c)}"
if c.help_url:
entry += f"\n Get it at: {c.help_url}"
lines.append(entry)
if invalid:
if missing:
lines.append("")
lines.append("Invalid or expired credentials:\n")
for c in invalid:
entry = f" {c.env_var} for {_label(c)}{c.validation_message}"
if c.help_url:
entry += f"\n Get a new key at: {c.help_url}"
lines.append(entry)
if aden_nc:
if missing or invalid:
lines.append("")
lines.append(
"Aden integrations not connected "
"(ADEN_API_KEY is set but OAuth tokens unavailable):\n"
)
for c in aden_nc:
lines.append(
f" {c.env_var} for {_label(c)}"
f"\n Connect this integration at hive.adenhq.com first."
)
lines.append("\nIf you've already set up credentials, restart your terminal to load them.")
return "\n".join(lines)
def _label(c: CredentialStatus) -> str:
"""Build a human-readable label from tools/node_types."""
if c.tools:
return ", ".join(c.tools)
if c.node_types:
return ", ".join(c.node_types) + " nodes"
return c.credential_name
def _presync_aden_tokens(credential_specs: dict, *, force: bool = False) -> None:
"""Sync Aden-backed OAuth tokens into env vars for validation.
When ADEN_API_KEY is available, fetches fresh OAuth tokens from the Aden
server and exports them to env vars. This ensures validation sees real
tokens instead of stale or mis-stored values in the encrypted store.
Only touches credentials that are ``aden_supported`` AND whose env var
is not already set (so explicit user exports always win).
Args:
force: When True, overwrite env vars that are already set. Used by
the credentials modal to pick up freshly reauthorized tokens
from Aden instead of reusing stale values from a prior sync.
"""
from framework.credentials.store import CredentialStore
try:
aden_store = CredentialStore.with_aden_sync(auto_sync=True)
except Exception as e:
logger.warning("Aden pre-sync unavailable: %s", e)
return
for name, spec in credential_specs.items():
if not spec.aden_supported:
continue
if not force and os.environ.get(spec.env_var):
continue # Already set — don't overwrite
cred_id = spec.credential_id or name
# sync_all() already fetched everything available from Aden.
# Skip credentials not in the store — they aren't connected,
# so fetching individually would fail with "Invalid integration ID".
if not aden_store.exists(cred_id):
continue
try:
value = aden_store.get_key(cred_id, spec.credential_key)
if value:
os.environ[spec.env_var] = value
logger.debug("Pre-synced %s from Aden", spec.env_var)
else:
logger.warning(
"Pre-sync: %s (id=%s) available but key '%s' returned None",
spec.env_var,
cred_id,
spec.credential_key,
)
except Exception as e:
logger.warning(
"Pre-sync failed for %s (id=%s): %s",
spec.env_var,
cred_id,
e,
)
def validate_agent_credentials(
nodes: list,
quiet: bool = False,
verify: bool = True,
raise_on_error: bool = True,
force_refresh: bool = False,
) -> CredentialValidationResult:
"""Check that required credentials are available and valid before running an agent.
Two-phase validation:
1. **Presence** is the credential set (env var, encrypted store, or Aden sync)?
2. **Health check** does the credential actually work? Uses each tool's
registered ``check_credential_health`` endpoint (lightweight HTTP call).
Args:
nodes: List of NodeSpec objects from the agent graph.
quiet: If True, suppress the credential summary output.
verify: If True (default), run health checks on present credentials.
raise_on_error: If True (default), raise CredentialError when validation
fails. Set to False to get the result without raising.
force_refresh: If True, force re-sync of Aden OAuth tokens even when
env vars are already set. Used by the credentials modal after
reauthorization.
Returns:
CredentialValidationResult with status of ALL required credentials.
"""
empty_result = CredentialValidationResult(credentials=[], has_aden_key=False)
# Collect required tools and node types
required_tools: set[str] = set()
node_types: set[str] = set()
for node in nodes:
if hasattr(node, "tools") and node.tools:
required_tools.update(node.tools)
if hasattr(node, "node_type"):
node_types.add(node.node_type)
try:
from aden_tools.credentials import CREDENTIAL_SPECS
except ImportError:
return empty_result # aden_tools not installed, skip check
from framework.credentials.storage import CompositeStorage, EncryptedFileStorage, EnvVarStorage
from framework.credentials.store import CredentialStore
# Build credential store.
# Env vars take priority — if a user explicitly exports a fresh key it
# must win over a potentially stale value in the encrypted store.
#
# Pre-sync: when ADEN_API_KEY is available, sync OAuth tokens from Aden
# into env vars so validation sees fresh tokens instead of stale values
# in the encrypted store (e.g., a previously mis-stored google.enc).
if os.environ.get("ADEN_API_KEY"):
_presync_aden_tokens(CREDENTIAL_SPECS, force=force_refresh)
# Build credential store
env_mapping = {
(spec.credential_id or name): spec.env_var for name, spec in CREDENTIAL_SPECS.items()
}
storages: list = [EnvVarStorage(env_mapping=env_mapping)]
env_storage = EnvVarStorage(env_mapping=env_mapping)
if os.environ.get("HIVE_CREDENTIAL_KEY"):
storages.insert(0, EncryptedFileStorage())
if len(storages) == 1:
storage = storages[0]
storage = CompositeStorage(primary=env_storage, fallbacks=[EncryptedFileStorage()])
else:
storage = CompositeStorage(primary=storages[0], fallbacks=storages[1:])
storage = env_storage
store = CredentialStore(storage=storage)
# Build reverse mappings
tool_to_cred: dict[str, str] = {}
# Build reverse mappings — 1:many for multi-provider tools (e.g. send_email → resend OR google)
tool_to_creds: dict[str, list[str]] = {}
node_type_to_cred: dict[str, str] = {}
for cred_name, spec in CREDENTIAL_SPECS.items():
for tool_name in spec.tools:
tool_to_cred[tool_name] = cred_name
tool_to_creds.setdefault(tool_name, []).append(cred_name)
for nt in spec.node_types:
node_type_to_cred[nt] = cred_name
missing: list[str] = []
has_aden_key = bool(os.environ.get("ADEN_API_KEY"))
checked: set[str] = set()
all_credentials: list[CredentialStatus] = []
# Credentials that are present and should be health-checked
to_verify: list[int] = [] # indices into all_credentials
def _check_credential(
spec,
cred_name: str,
affected_tools: list[str],
affected_node_types: list[str],
alternative_group: str | None = None,
) -> None:
cred_id = spec.credential_id or cred_name
available = store.is_available(cred_id)
# Aden-not-connected: ADEN_API_KEY set, Aden-only cred, but integration missing
is_aden_nc = (
not available
and has_aden_key
and spec.aden_supported
and not spec.direct_api_key_supported
)
status = CredentialStatus(
credential_name=cred_name,
credential_id=cred_id,
env_var=spec.env_var,
description=spec.description,
help_url=spec.help_url,
api_key_instructions=getattr(spec, "api_key_instructions", ""),
tools=affected_tools,
node_types=affected_node_types,
available=available,
valid=None,
validation_message=None,
aden_supported=spec.aden_supported,
direct_api_key_supported=spec.direct_api_key_supported,
credential_key=spec.credential_key,
aden_not_connected=is_aden_nc,
alternative_group=alternative_group,
)
all_credentials.append(status)
if available and verify and spec.health_check_endpoint:
to_verify.append(len(all_credentials) - 1)
# Check tool credentials
for tool_name in sorted(required_tools):
cred_name = tool_to_cred.get(tool_name)
if cred_name is None or cred_name in checked:
cred_names = tool_to_creds.get(tool_name)
if cred_names is None:
continue
checked.add(cred_name)
spec = CREDENTIAL_SPECS[cred_name]
cred_id = spec.credential_id or cred_name
if spec.required and not store.is_available(cred_id):
# Filter to credentials we haven't already checked
unchecked = [cn for cn in cred_names if cn not in checked]
if not unchecked:
continue
# Single provider — existing behavior
if len(unchecked) == 1:
cred_name = unchecked[0]
checked.add(cred_name)
spec = CREDENTIAL_SPECS[cred_name]
if not spec.required:
continue
affected = sorted(t for t in required_tools if t in spec.tools)
entry = f" {spec.env_var} for {', '.join(affected)}"
if spec.help_url:
entry += f"\n Get it at: {spec.help_url}"
missing.append(entry)
_check_credential(spec, cred_name, affected_tools=affected, affected_node_types=[])
continue
# Multi-provider (e.g. send_email → resend OR google):
# satisfied if ANY provider credential is available.
available_cn = None
for cn in unchecked:
spec = CREDENTIAL_SPECS[cn]
cred_id = spec.credential_id or cn
if store.is_available(cred_id):
available_cn = cn
break
if available_cn is not None:
# Found an available provider — check (and health-check) it
checked.add(available_cn)
spec = CREDENTIAL_SPECS[available_cn]
affected = sorted(t for t in required_tools if t in spec.tools)
_check_credential(spec, available_cn, affected_tools=affected, affected_node_types=[])
else:
# None available — report ALL alternatives so the modal can show them
group_key = tool_name # e.g. "send_email"
for cn in unchecked:
checked.add(cn)
spec = CREDENTIAL_SPECS[cn]
affected = sorted(t for t in required_tools if t in spec.tools)
_check_credential(
spec,
cn,
affected_tools=affected,
affected_node_types=[],
alternative_group=group_key,
)
# Check node type credentials (e.g., ANTHROPIC_API_KEY for LLM nodes)
for nt in sorted(node_types):
@@ -113,21 +408,111 @@ def validate_agent_credentials(nodes: list) -> None:
continue
checked.add(cred_name)
spec = CREDENTIAL_SPECS[cred_name]
cred_id = spec.credential_id or cred_name
if spec.required and not store.is_available(cred_id):
affected_types = sorted(t for t in node_types if t in spec.node_types)
entry = f" {spec.env_var} for {', '.join(affected_types)} nodes"
if spec.help_url:
entry += f"\n Get it at: {spec.help_url}"
missing.append(entry)
if not spec.required:
continue
affected_types = sorted(t for t in node_types if t in spec.node_types)
_check_credential(spec, cred_name, affected_tools=[], affected_node_types=affected_types)
if missing:
# Phase 2: health-check present credentials
if to_verify:
try:
from aden_tools.credentials import check_credential_health
except ImportError:
check_credential_health = None # type: ignore[assignment]
if check_credential_health is not None:
for idx in to_verify:
status = all_credentials[idx]
spec = CREDENTIAL_SPECS[status.credential_name]
value = store.get(status.credential_id)
if not value:
continue
try:
result = check_credential_health(
status.credential_name,
value,
health_check_endpoint=spec.health_check_endpoint,
health_check_method=spec.health_check_method,
)
status.valid = result.valid
status.validation_message = result.message
if result.valid:
# Persist identity from health check (best-effort)
identity_data = result.details.get("identity")
if identity_data and isinstance(identity_data, dict):
try:
cred_obj = store.get_credential(
status.credential_id, refresh_if_needed=False
)
if cred_obj:
cred_obj.set_identity(**identity_data)
store.save_credential(cred_obj)
except Exception:
pass # Identity persistence is best-effort
except Exception as exc:
logger.debug("Health check for %s failed: %s", status.credential_name, exc)
validation_result = CredentialValidationResult(
credentials=all_credentials,
has_aden_key=has_aden_key,
)
if raise_on_error and validation_result.has_errors:
from framework.credentials.models import CredentialError
lines = ["Missing required credentials:\n"]
lines.extend(missing)
lines.append(
"\nTo fix: run /hive-credentials in Claude Code."
"\nIf you've already set up credentials, restart your terminal to load them."
)
raise CredentialError("\n".join(lines))
exc = CredentialError(validation_result.format_error_message())
exc.validation_result = validation_result # type: ignore[attr-defined]
exc.failed_cred_names = validation_result.failed_cred_names # type: ignore[attr-defined]
raise exc
return validation_result
def build_setup_session_from_error(
credential_error: Exception,
nodes: list | None = None,
agent_path: str | None = None,
):
"""Build a ``CredentialSetupSession`` that covers all failed credentials.
Uses the ``CredentialValidationResult`` attached to the ``CredentialError``
when available. Falls back to re-detecting from nodes / agent_path.
Args:
credential_error: The ``CredentialError`` raised by validation.
nodes: Graph nodes (preferred avoids re-loading from disk).
agent_path: Agent directory path (used when nodes aren't available).
"""
from framework.credentials.setup import CredentialSetupSession
# Prefer the validation result attached to the exception
result: CredentialValidationResult | None = getattr(credential_error, "validation_result", None)
if result is not None:
missing = [_status_to_missing(c) for c in result.failed]
return CredentialSetupSession(missing)
# Fallback: re-detect from nodes or agent_path
if nodes is not None:
return CredentialSetupSession.from_nodes(nodes)
elif agent_path is not None:
return CredentialSetupSession.from_agent_path(agent_path)
return CredentialSetupSession(missing=[])
def _status_to_missing(c: CredentialStatus):
"""Convert a CredentialStatus to a MissingCredential for the setup flow."""
from framework.credentials.setup import MissingCredential
return MissingCredential(
credential_name=c.credential_name,
env_var=c.env_var,
description=c.description,
help_url=c.help_url,
api_key_instructions=c.api_key_instructions,
tools=c.tools,
node_types=c.node_types,
aden_supported=c.aden_supported,
direct_api_key_supported=c.direct_api_key_supported,
credential_id=c.credential_id,
credential_key=c.credential_key,
)
+2 -52
View File
@@ -1,4 +1,4 @@
"""Graph structures: Goals, Nodes, Edges, and Flexible Execution."""
"""Graph structures: Goals, Nodes, Edges, and Execution."""
from framework.graph.client_io import (
ActiveNodeClientIO,
@@ -6,7 +6,6 @@ from framework.graph.client_io import (
InertNodeClientIO,
NodeClientIO,
)
from framework.graph.code_sandbox import CodeSandbox, safe_eval, safe_exec
from framework.graph.context_handoff import ContextHandoff, HandoffContext
from framework.graph.conversation import ConversationStore, Message, NodeConversation
from framework.graph.edge import DEFAULT_MAX_TOKENS, EdgeCondition, EdgeSpec, GraphSpec
@@ -18,31 +17,9 @@ from framework.graph.event_loop_node import (
OutputAccumulator,
)
from framework.graph.executor import GraphExecutor
from framework.graph.flexible_executor import ExecutorConfig, FlexibleGraphExecutor
from framework.graph.goal import Constraint, Goal, GoalStatus, SuccessCriterion
from framework.graph.judge import HybridJudge, create_default_judge
from framework.graph.node import NodeContext, NodeProtocol, NodeResult, NodeSpec
# Flexible execution (Worker-Judge pattern)
from framework.graph.plan import (
ActionSpec,
ActionType,
# HITL (Human-in-the-loop)
ApprovalDecision,
ApprovalRequest,
ApprovalResult,
EvaluationRule,
ExecutionStatus,
Judgment,
JudgmentAction,
Plan,
PlanExecutionResult,
PlanStep,
StepStatus,
load_export,
)
from framework.graph.worker_node import StepExecutionResult, WorkerNode
__all__ = [
# Goal
"Goal",
@@ -59,35 +36,8 @@ __all__ = [
"EdgeCondition",
"GraphSpec",
"DEFAULT_MAX_TOKENS",
# Executor (fixed graph)
# Executor
"GraphExecutor",
# Plan (flexible execution)
"Plan",
"PlanStep",
"ActionSpec",
"ActionType",
"StepStatus",
"Judgment",
"JudgmentAction",
"EvaluationRule",
"PlanExecutionResult",
"ExecutionStatus",
"load_export",
# HITL (Human-in-the-loop)
"ApprovalDecision",
"ApprovalRequest",
"ApprovalResult",
# Worker-Judge
"HybridJudge",
"create_default_judge",
"WorkerNode",
"StepExecutionResult",
"FlexibleGraphExecutor",
"ExecutorConfig",
# Code Sandbox
"CodeSandbox",
"safe_exec",
"safe_eval",
# Conversation
"NodeConversation",
"ConversationStore",
+6 -1
View File
@@ -46,9 +46,11 @@ class ActiveNodeClientIO(NodeClientIO):
self,
node_id: str,
event_bus: EventBus | None = None,
execution_id: str = "",
) -> None:
self.node_id = node_id
self._event_bus = event_bus
self._execution_id = execution_id
self._output_queue: asyncio.Queue[str | None] = asyncio.Queue()
self._output_snapshot = ""
@@ -66,6 +68,7 @@ class ActiveNodeClientIO(NodeClientIO):
node_id=self.node_id,
content=content,
snapshot=self._output_snapshot,
execution_id=self._execution_id or None,
)
if is_final:
@@ -83,6 +86,7 @@ class ActiveNodeClientIO(NodeClientIO):
stream_id=self.node_id,
node_id=self.node_id,
prompt=prompt,
execution_id=self._execution_id or None,
)
try:
@@ -158,11 +162,12 @@ class ClientIOGateway:
def __init__(self, event_bus: EventBus | None = None) -> None:
self._event_bus = event_bus
def create_io(self, node_id: str, client_facing: bool) -> NodeClientIO:
def create_io(self, node_id: str, client_facing: bool, execution_id: str = "") -> NodeClientIO:
if client_facing:
return ActiveNodeClientIO(
node_id=node_id,
event_bus=self._event_bus,
execution_id=execution_id,
)
return InertNodeClientIO(
node_id=node_id,
-413
View File
@@ -1,413 +0,0 @@
"""
Code Sandbox for Safe Execution of Dynamic Code.
Provides a restricted execution environment for code generated by
the external planner. This is critical for open-ended planning where
the planner can create arbitrary code actions.
Security measures:
1. Restricted builtins (no file I/O, no imports of dangerous modules)
2. Timeout enforcement
3. Memory limits (via resource module on Unix)
4. Namespace isolation
"""
import ast
import signal
import sys
from contextlib import contextmanager
from dataclasses import dataclass, field
from typing import Any
# Safe builtins whitelist
SAFE_BUILTINS = {
# Basic types
"True": True,
"False": False,
"None": None,
# Type constructors
"bool": bool,
"int": int,
"float": float,
"str": str,
"list": list,
"dict": dict,
"set": set,
"tuple": tuple,
"frozenset": frozenset,
# Basic functions
"abs": abs,
"all": all,
"any": any,
"bin": bin,
"chr": chr,
"divmod": divmod,
"enumerate": enumerate,
"filter": filter,
"format": format,
"hex": hex,
"isinstance": isinstance,
"issubclass": issubclass,
"iter": iter,
"len": len,
"map": map,
"max": max,
"min": min,
"next": next,
"oct": oct,
"ord": ord,
"pow": pow,
"range": range,
"repr": repr,
"reversed": reversed,
"round": round,
"slice": slice,
"sorted": sorted,
"sum": sum,
"zip": zip,
}
# Modules that can be imported
ALLOWED_MODULES = {
"math",
"json",
"re",
"datetime",
"collections",
"itertools",
"functools",
"operator",
"string",
"random",
"statistics",
"decimal",
"fractions",
}
# Dangerous AST nodes to block
BLOCKED_AST_NODES = {
ast.Import,
ast.ImportFrom,
ast.Global,
ast.Nonlocal,
}
class CodeSandboxError(Exception):
"""Error during sandboxed code execution."""
pass
class TimeoutError(CodeSandboxError):
"""Code execution timed out."""
pass
class SecurityError(CodeSandboxError):
"""Code contains potentially dangerous operations."""
pass
@dataclass
class SandboxResult:
"""Result of sandboxed code execution."""
success: bool
result: Any = None
error: str | None = None
stdout: str = ""
variables: dict[str, Any] = field(default_factory=dict)
execution_time_ms: int = 0
class RestrictedImporter:
"""Custom importer that only allows whitelisted modules."""
def __init__(self, allowed_modules: set[str]):
self.allowed_modules = allowed_modules
self._cache: dict[str, Any] = {}
def __call__(self, name: str, *args, **kwargs):
if name not in self.allowed_modules:
raise SecurityError(f"Import of module '{name}' is not allowed")
if name not in self._cache:
import importlib
self._cache[name] = importlib.import_module(name)
return self._cache[name]
class CodeValidator:
"""Validates code for safety before execution."""
def __init__(self, blocked_nodes: set[type] | None = None):
self.blocked_nodes = blocked_nodes or BLOCKED_AST_NODES
def validate(self, code: str) -> list[str]:
"""
Validate code and return list of issues.
Returns empty list if code is safe.
"""
issues = []
try:
tree = ast.parse(code)
except SyntaxError as e:
return [f"Syntax error: {e}"]
for node in ast.walk(tree):
# Check for blocked node types
if type(node) in self.blocked_nodes:
lineno = getattr(node, "lineno", "?")
issues.append(f"Blocked operation: {type(node).__name__} at line {lineno}")
# Check for dangerous attribute access
if isinstance(node, ast.Attribute):
if node.attr.startswith("_"):
issues.append(
f"Access to private attribute '{node.attr}' at line {node.lineno}"
)
# Check for exec/eval calls
if isinstance(node, ast.Call):
if isinstance(node.func, ast.Name):
if node.func.id in ("exec", "eval", "compile", "__import__"):
issues.append(
f"Blocked function call: {node.func.id} at line {node.lineno}"
)
return issues
class CodeSandbox:
"""
Sandboxed environment for executing dynamic code.
Usage:
sandbox = CodeSandbox(timeout_seconds=5)
result = sandbox.execute(
code="x = 1 + 2\\nresult = x * 3",
inputs={"multiplier": 2},
)
if result.success:
print(result.variables["result"]) # 6
"""
def __init__(
self,
timeout_seconds: int = 10,
allowed_modules: set[str] | None = None,
safe_builtins: dict[str, Any] | None = None,
):
self.timeout_seconds = timeout_seconds
self.allowed_modules = allowed_modules or ALLOWED_MODULES
self.safe_builtins = safe_builtins or SAFE_BUILTINS
self.validator = CodeValidator()
self.importer = RestrictedImporter(self.allowed_modules)
@contextmanager
def _timeout_context(self, seconds: int):
"""Context manager for timeout enforcement."""
def handler(signum, frame):
raise TimeoutError(f"Code execution timed out after {seconds} seconds")
# Only works on Unix-like systems
if hasattr(signal, "SIGALRM"):
old_handler = signal.signal(signal.SIGALRM, handler)
signal.alarm(seconds)
try:
yield
finally:
signal.alarm(0)
signal.signal(signal.SIGALRM, old_handler)
else:
# Windows: no timeout support, just execute
yield
def _create_namespace(self, inputs: dict[str, Any]) -> dict[str, Any]:
"""Create isolated namespace for code execution."""
namespace = {
"__builtins__": dict(self.safe_builtins),
"__import__": self.importer,
}
# Add input variables
namespace.update(inputs)
return namespace
def execute(
self,
code: str,
inputs: dict[str, Any] | None = None,
extract_vars: list[str] | None = None,
) -> SandboxResult:
"""
Execute code in sandbox.
Args:
code: Python code to execute
inputs: Variables to inject into namespace
extract_vars: Variable names to extract from namespace after execution
Returns:
SandboxResult with execution outcome
"""
import time
inputs = inputs or {}
extract_vars = extract_vars or []
# Validate code first
issues = self.validator.validate(code)
if issues:
return SandboxResult(
success=False,
error=f"Code validation failed: {'; '.join(issues)}",
)
# Create isolated namespace
namespace = self._create_namespace(inputs)
# Capture stdout
import io
old_stdout = sys.stdout
sys.stdout = captured_stdout = io.StringIO()
start_time = time.time()
try:
with self._timeout_context(self.timeout_seconds):
# Compile and execute
compiled = compile(code, "<sandbox>", "exec")
exec(compiled, namespace)
execution_time_ms = int((time.time() - start_time) * 1000)
# Extract requested variables
extracted = {}
for var in extract_vars:
if var in namespace:
extracted[var] = namespace[var]
# Also extract any new variables (not in inputs or builtins)
for key, value in namespace.items():
if key not in inputs and key not in self.safe_builtins and not key.startswith("_"):
extracted[key] = value
return SandboxResult(
success=True,
result=namespace.get("result"), # Convention: 'result' is the return value
stdout=captured_stdout.getvalue(),
variables=extracted,
execution_time_ms=execution_time_ms,
)
except TimeoutError as e:
return SandboxResult(
success=False,
error=str(e),
execution_time_ms=self.timeout_seconds * 1000,
)
except SecurityError as e:
return SandboxResult(
success=False,
error=f"Security violation: {e}",
execution_time_ms=int((time.time() - start_time) * 1000),
)
except Exception as e:
return SandboxResult(
success=False,
error=f"{type(e).__name__}: {e}",
stdout=captured_stdout.getvalue(),
execution_time_ms=int((time.time() - start_time) * 1000),
)
finally:
sys.stdout = old_stdout
def execute_expression(
self,
expression: str,
inputs: dict[str, Any] | None = None,
) -> SandboxResult:
"""
Execute a single expression and return its value.
Simpler than execute() - just evaluates one expression.
"""
inputs = inputs or {}
# Validate
try:
ast.parse(expression, mode="eval")
except SyntaxError as e:
return SandboxResult(success=False, error=f"Syntax error: {e}")
namespace = self._create_namespace(inputs)
try:
with self._timeout_context(self.timeout_seconds):
result = eval(expression, namespace)
return SandboxResult(success=True, result=result)
except Exception as e:
return SandboxResult(
success=False,
error=f"{type(e).__name__}: {e}",
)
# Singleton instance with default settings
default_sandbox = CodeSandbox()
def safe_exec(
code: str,
inputs: dict[str, Any] | None = None,
timeout_seconds: int = 10,
) -> SandboxResult:
"""
Convenience function for safe code execution.
Args:
code: Python code to execute
inputs: Variables to inject
timeout_seconds: Max execution time
Returns:
SandboxResult
"""
sandbox = CodeSandbox(timeout_seconds=timeout_seconds)
return sandbox.execute(code, inputs)
def safe_eval(
expression: str,
inputs: dict[str, Any] | None = None,
timeout_seconds: int = 5,
) -> SandboxResult:
"""
Convenience function for safe expression evaluation.
Args:
expression: Python expression to evaluate
inputs: Variables to inject
timeout_seconds: Max execution time
Returns:
SandboxResult
"""
sandbox = CodeSandbox(timeout_seconds=timeout_seconds)
return sandbox.execute_expression(expression, inputs)
+388 -20
View File
@@ -5,6 +5,7 @@ from __future__ import annotations
import json
import re
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Literal, Protocol, runtime_checkable
@@ -30,6 +31,8 @@ class Message:
# Phase-aware compaction metadata (continuous mode)
phase_id: str | None = None
is_transition_marker: bool = False
# True when this message is real human input (from /chat), not a system prompt
is_client_input: bool = False
def to_llm_dict(self) -> dict[str, Any]:
"""Convert to OpenAI-format message dict."""
@@ -67,6 +70,8 @@ class Message:
d["phase_id"] = self.phase_id
if self.is_transition_marker:
d["is_transition_marker"] = self.is_transition_marker
if self.is_client_input:
d["is_client_input"] = self.is_client_input
return d
@classmethod
@@ -81,19 +86,138 @@ class Message:
is_error=data.get("is_error", False),
phase_id=data.get("phase_id"),
is_transition_marker=data.get("is_transition_marker", False),
is_client_input=data.get("is_client_input", False),
)
def _extract_spillover_filename(content: str) -> str | None:
"""Extract spillover filename from a truncated tool result.
"""Extract spillover filename from a tool result annotation.
Matches the pattern produced by EventLoopNode._truncate_tool_result():
"saved to 'tool_github_list_stargazers_abc123.txt'"
Matches patterns produced by EventLoopNode._truncate_tool_result():
- Large result: "saved to 'web_search_1.txt'"
- Small result: "[Saved to 'web_search_1.txt']"
"""
match = re.search(r"saved to '([^']+)'", content)
match = re.search(r"[Ss]aved to '([^']+)'", content)
return match.group(1) if match else None
_TC_ARG_LIMIT = 200 # max chars per tool_call argument after compaction
def _compact_tool_calls(tool_calls: list[dict[str, Any]]) -> list[dict[str, Any]]:
"""Truncate tool_call arguments to save context tokens during compaction.
Preserves ``id``, ``type``, and ``function.name`` exactly. When arguments
exceed ``_TC_ARG_LIMIT``, replaces the full JSON string with a compact
**valid** JSON summary. The Anthropic API parses tool_call arguments and
rejects requests with malformed JSON (e.g. unterminated strings), so we
must never produce broken JSON here.
"""
compact = []
for tc in tool_calls:
func = tc.get("function", {})
args = func.get("arguments", "")
if len(args) > _TC_ARG_LIMIT:
# Build a valid JSON summary instead of slicing mid-string.
# Try to extract top-level keys for a meaningful preview.
try:
parsed = json.loads(args)
if isinstance(parsed, dict):
# Preserve key names, truncate values
summary_parts = []
for k, v in parsed.items():
v_str = str(v)
if len(v_str) > 60:
v_str = v_str[:60] + "..."
summary_parts.append(f"{k}={v_str}")
summary = ", ".join(summary_parts)
if len(summary) > _TC_ARG_LIMIT:
summary = summary[:_TC_ARG_LIMIT] + "..."
args = json.dumps({"_compacted": summary})
else:
args = json.dumps({"_compacted": str(parsed)[:_TC_ARG_LIMIT]})
except (json.JSONDecodeError, TypeError):
# Args were already invalid JSON — wrap the preview safely
args = json.dumps({"_compacted": args[:_TC_ARG_LIMIT]})
compact.append(
{
"id": tc.get("id", ""),
"type": tc.get("type", "function"),
"function": {
"name": func.get("name", ""),
"arguments": args,
},
}
)
return compact
def extract_tool_call_history(messages: list[Message], max_entries: int = 30) -> str:
"""Build a compact tool call history from a list of messages.
Used in compaction summaries to prevent the LLM from re-calling
tools it already called. Extracts tool call details, files saved,
outputs set, and errors encountered.
"""
tool_calls_detail: dict[str, list[str]] = {}
files_saved: list[str] = []
outputs_set: list[str] = []
errors: list[str] = []
def _summarize_input(name: str, args: dict) -> str:
if name == "web_search":
return args.get("query", "")
if name == "web_scrape":
return args.get("url", "")
if name in ("load_data", "save_data"):
return args.get("filename", "")
return ""
for msg in messages:
if msg.role == "assistant" and msg.tool_calls:
for tc in msg.tool_calls:
func = tc.get("function", {})
name = func.get("name", "unknown")
try:
args = json.loads(func.get("arguments", "{}"))
except (json.JSONDecodeError, TypeError):
args = {}
summary = _summarize_input(name, args)
tool_calls_detail.setdefault(name, []).append(summary)
if name == "save_data" and args.get("filename"):
files_saved.append(args["filename"])
if name == "set_output" and args.get("key"):
outputs_set.append(args["key"])
if msg.role == "tool" and msg.is_error:
preview = msg.content[:120].replace("\n", " ")
errors.append(preview)
parts: list[str] = []
if tool_calls_detail:
lines: list[str] = []
for name, inputs in list(tool_calls_detail.items())[:max_entries]:
count = len(inputs)
non_empty = [s for s in inputs if s]
if non_empty:
detail_lines = [f" - {s[:120]}" for s in non_empty[:8]]
lines.append(f" {name} ({count}x):\n" + "\n".join(detail_lines))
else:
lines.append(f" {name} ({count}x)")
parts.append("TOOLS ALREADY CALLED:\n" + "\n".join(lines))
if files_saved:
unique = list(dict.fromkeys(files_saved))
parts.append("FILES SAVED: " + ", ".join(unique))
if outputs_set:
unique = list(dict.fromkeys(outputs_set))
parts.append("OUTPUTS SET: " + ", ".join(unique))
if errors:
parts.append("ERRORS (do NOT retry these):\n" + "\n".join(f" - {e}" for e in errors[:10]))
return "\n\n".join(parts)
# ---------------------------------------------------------------------------
# ConversationStore protocol (Phase 2)
# ---------------------------------------------------------------------------
@@ -212,22 +336,12 @@ class NodeConversation:
Layer 3 (focus) while preserving the conversation history.
"""
self._system_prompt = new_prompt
self._meta_persisted = False # re-persist with new prompt
def set_current_phase(self, phase_id: str) -> None:
"""Set the current phase ID. Subsequent messages will be stamped with it."""
self._current_phase = phase_id
async def switch_store(self, new_store: ConversationStore) -> None:
"""Switch to a new persistence store at a phase transition.
Subsequent messages are written to *new_store*. Meta (system
prompt, config) is re-persisted on the next write so the new
store's ``meta.json`` reflects the updated prompt.
"""
self._store = new_store
self._meta_persisted = False
await new_store.write_cursor({"next_seq": self._next_seq})
@property
def current_phase(self) -> str | None:
return self._current_phase
@@ -258,6 +372,7 @@ class NodeConversation:
content: str,
*,
is_transition_marker: bool = False,
is_client_input: bool = False,
) -> Message:
msg = Message(
seq=self._next_seq,
@@ -265,6 +380,7 @@ class NodeConversation:
content=content,
phase_id=self._current_phase,
is_transition_marker=is_transition_marker,
is_client_input=is_client_input,
)
self._messages.append(msg)
self._next_seq += 1
@@ -323,9 +439,36 @@ class NodeConversation:
def _repair_orphaned_tool_calls(
msgs: list[dict[str, Any]],
) -> list[dict[str, Any]]:
"""Ensure every tool_call has a matching tool-result message."""
"""Ensure tool_call / tool_result pairs are consistent.
1. **Orphaned tool results** (tool_result with no preceding tool_use)
are dropped. This happens when compaction removes an assistant
message but leaves its tool-result messages behind.
2. **Orphaned tool calls** (tool_use with no following tool_result)
get a synthetic error result appended. This happens when a loop
is cancelled mid-tool-execution.
"""
# Pass 1: collect all tool_call IDs from assistant messages so we
# can identify orphaned tool-result messages.
all_tool_call_ids: set[str] = set()
for m in msgs:
if m.get("role") == "assistant":
for tc in m.get("tool_calls") or []:
tc_id = tc.get("id")
if tc_id:
all_tool_call_ids.add(tc_id)
# Pass 2: build repaired list — drop orphaned tool results, patch
# missing tool results.
repaired: list[dict[str, Any]] = []
for i, m in enumerate(msgs):
# Drop tool-result messages whose tool_call_id has no matching
# tool_use in any assistant message (orphaned by compaction).
if m.get("role") == "tool":
tid = m.get("tool_call_id")
if tid and tid not in all_tool_call_ids:
continue # skip orphaned result
repaired.append(m)
tool_calls = m.get("tool_calls")
if m.get("role") != "assistant" or not tool_calls:
@@ -356,12 +499,20 @@ class NodeConversation:
"""Best available token estimate.
Uses actual API input token count when available (set via
:meth:`update_token_count`), otherwise falls back to the rough
``total_chars / 4`` heuristic.
:meth:`update_token_count`), otherwise falls back to a
``total_chars / 4`` heuristic that includes both message content
AND tool_call argument sizes.
"""
if self._last_api_input_tokens is not None:
return self._last_api_input_tokens
total_chars = sum(len(m.content) for m in self._messages)
total_chars = 0
for m in self._messages:
total_chars += len(m.content)
if m.tool_calls:
for tc in m.tool_calls:
func = tc.get("function", {})
total_chars += len(func.get("arguments", ""))
total_chars += len(func.get("name", ""))
return total_chars // 4
def update_token_count(self, actual_input_tokens: int) -> None:
@@ -590,6 +741,210 @@ class NodeConversation:
self._messages = [summary_msg] + recent_messages
self._last_api_input_tokens = None # reset; next LLM call will recalibrate
async def compact_preserving_structure(
self,
spillover_dir: str,
keep_recent: int = 4,
phase_graduated: bool = False,
aggressive: bool = False,
) -> None:
"""Structure-preserving compaction: save freeform text to file, keep tool messages.
Unlike ``compact()`` which replaces ALL old messages with a single LLM
summary, this method preserves the tool call structure (assistant
messages with tool_calls + tool result messages) that are already tiny
after pruning. Only freeform text exchanges (user messages,
text-only assistant messages) are saved to a file and removed.
When *aggressive* is True, non-essential tool call pairs are also
collapsed into a compact summary instead of being kept individually.
Only ``set_output`` calls and error results are preserved; all other
old tool pairs are replaced by a tool-call history summary.
The result: the agent retains exact knowledge of what tools it called,
where each result is stored, and can load the conversation text if
needed. No LLM summary call. No heuristics. Nothing lost.
"""
if not self._messages:
return
total = len(self._messages)
# Determine split point (same logic as compact)
if phase_graduated and self._current_phase:
split = self._find_phase_graduated_split()
else:
split = None
if split is None:
keep_recent = max(0, min(keep_recent, total - 1))
split = total - keep_recent if keep_recent > 0 else total
# Advance split past orphaned tool results at the boundary
while split < total and self._messages[split].role == "tool":
split += 1
if split == 0:
return
old_messages = self._messages[:split]
# Classify old messages: structural (keep) vs freeform (save to file)
kept_structural: list[Message] = []
freeform_lines: list[str] = []
collapsed_msgs: list[Message] = []
if aggressive:
# Aggressive: only keep set_output tool pairs and error results.
# Everything else is collapsed into a tool-call history summary.
# We need to track tool_call IDs to pair assistant messages with
# their tool results.
protected_tc_ids: set[str] = set()
collapsible_tc_ids: set[str] = set()
# First pass: classify assistant messages
for msg in old_messages:
if msg.role != "assistant" or not msg.tool_calls:
continue
has_protected = any(
tc.get("function", {}).get("name") == "set_output" for tc in msg.tool_calls
)
tc_ids = {tc.get("id", "") for tc in msg.tool_calls}
if has_protected:
protected_tc_ids |= tc_ids
else:
collapsible_tc_ids |= tc_ids
# Second pass: classify all messages
for msg in old_messages:
if msg.role == "tool":
tc_id = msg.tool_use_id or ""
if tc_id in protected_tc_ids:
kept_structural.append(msg)
elif msg.is_error:
# Error results are always protected
kept_structural.append(msg)
# Protect the parent assistant message too
protected_tc_ids.add(tc_id)
else:
collapsed_msgs.append(msg)
elif msg.role == "assistant" and msg.tool_calls:
tc_ids = {tc.get("id", "") for tc in msg.tool_calls}
if tc_ids & protected_tc_ids:
# Has at least one protected tool call — keep entire msg
compact_tcs = _compact_tool_calls(msg.tool_calls)
kept_structural.append(
Message(
seq=msg.seq,
role=msg.role,
content="",
tool_calls=compact_tcs,
is_error=msg.is_error,
phase_id=msg.phase_id,
is_transition_marker=msg.is_transition_marker,
)
)
else:
collapsed_msgs.append(msg)
else:
# Freeform text — save to file
role_label = msg.role
text = msg.content
if len(text) > 2000:
text = text[:2000] + ""
freeform_lines.append(f"[{role_label}] (seq={msg.seq}): {text}")
else:
# Standard mode: keep all tool call pairs as structural
for msg in old_messages:
if msg.role == "tool":
kept_structural.append(msg)
elif msg.role == "assistant" and msg.tool_calls:
compact_tcs = _compact_tool_calls(msg.tool_calls)
kept_structural.append(
Message(
seq=msg.seq,
role=msg.role,
content="",
tool_calls=compact_tcs,
is_error=msg.is_error,
phase_id=msg.phase_id,
is_transition_marker=msg.is_transition_marker,
)
)
else:
role_label = msg.role
text = msg.content
if len(text) > 2000:
text = text[:2000] + ""
freeform_lines.append(f"[{role_label}] (seq={msg.seq}): {text}")
# Write freeform text to a numbered conversation file
spill_path = Path(spillover_dir)
spill_path.mkdir(parents=True, exist_ok=True)
# Find next conversation file number
existing = sorted(spill_path.glob("conversation_*.md"))
next_n = len(existing) + 1
conv_filename = f"conversation_{next_n}.md"
if freeform_lines:
header = f"## Compacted conversation (messages 1-{split})\n\n"
conv_text = header + "\n\n".join(freeform_lines)
(spill_path / conv_filename).write_text(conv_text, encoding="utf-8")
else:
# Nothing to save — skip file creation
conv_filename = ""
# Build reference message
ref_parts: list[str] = []
if conv_filename:
ref_parts.append(
f"[Previous conversation saved to '{conv_filename}'. "
f"Use load_data('{conv_filename}') to review if needed.]"
)
elif not collapsed_msgs:
ref_parts.append("[Previous freeform messages compacted.]")
# Aggressive: add collapsed tool-call history to the reference
if collapsed_msgs:
tool_history = extract_tool_call_history(collapsed_msgs)
if tool_history:
ref_parts.append(tool_history)
elif not ref_parts:
ref_parts.append("[Previous tool calls compacted.]")
ref_content = "\n\n".join(ref_parts)
# Use a seq just before the first kept message
recent_messages = list(self._messages[split:])
if kept_structural:
ref_seq = kept_structural[0].seq - 1
elif recent_messages:
ref_seq = recent_messages[0].seq - 1
else:
ref_seq = self._next_seq
self._next_seq += 1
ref_msg = Message(seq=ref_seq, role="user", content=ref_content)
# Persist: delete old messages from store, write reference + kept structural.
# In aggressive mode, collapsed messages may be interspersed with kept
# messages, so we delete everything before the recent boundary and
# rewrite only what we want to keep.
if self._store:
recent_boundary = recent_messages[0].seq if recent_messages else self._next_seq
await self._store.delete_parts_before(recent_boundary)
# Write the reference message
await self._store.write_part(ref_msg.seq, ref_msg.to_storage_dict())
# Write kept structural messages (they may have been modified)
for msg in kept_structural:
await self._store.write_part(msg.seq, msg.to_storage_dict())
await self._store.write_cursor({"next_seq": self._next_seq})
# Reassemble: reference + kept structural (in original order) + recent
self._messages = [ref_msg] + kept_structural + recent_messages
self._last_api_input_tokens = None
def _find_phase_graduated_split(self) -> int | None:
"""Find split point that preserves current + previous phase.
@@ -682,9 +1037,20 @@ class NodeConversation:
# --- Restore -----------------------------------------------------------
@classmethod
async def restore(cls, store: ConversationStore) -> NodeConversation | None:
async def restore(
cls,
store: ConversationStore,
phase_id: str | None = None,
) -> NodeConversation | None:
"""Reconstruct a NodeConversation from a store.
Args:
store: The conversation store to read from.
phase_id: If set, only load parts matching this phase_id.
Used in isolated mode so a node only sees its own
messages in the shared flat store. In continuous mode
pass ``None`` to load all parts.
Returns ``None`` if the store contains no metadata (i.e. the
conversation was never persisted).
"""
@@ -702,6 +1068,8 @@ class NodeConversation:
conv._meta_persisted = True
parts = await store.read_parts()
if phase_id:
parts = [p for p in parts if p.get("phase_id") == phase_id]
conv._messages = [Message.from_storage_dict(p) for p in parts]
cursor = await store.read_cursor()
+33 -8
View File
@@ -86,7 +86,7 @@ CONFIDENCE: 0.X
FEEDBACK: (reason if RETRY, empty if ACCEPT)"""
try:
response = llm.complete(
response = await llm.acomplete(
messages=[{"role": "user", "content": user_prompt}],
system=system_prompt,
max_tokens=max(1024, max_history_tokens // 5),
@@ -103,7 +103,12 @@ FEEDBACK: (reason if RETRY, empty if ACCEPT)"""
def _extract_recent_context(conversation: NodeConversation, max_messages: int = 10) -> str:
"""Extract recent conversation messages for evaluation."""
"""Extract recent conversation messages for evaluation.
Includes tool-call summaries from assistant messages so the judge
can see what tools were invoked (especially set_output values) even
when the assistant message body is empty.
"""
messages = conversation.messages
recent = messages[-max_messages:] if len(messages) > max_messages else messages
@@ -112,8 +117,24 @@ def _extract_recent_context(conversation: NodeConversation, max_messages: int =
role = msg.role.upper()
content = msg.content or ""
# Truncate long tool results
if msg.role == "tool" and len(content) > 200:
content = content[:200] + "..."
if msg.role == "tool" and len(content) > 500:
content = content[:500] + "..."
# For assistant messages with empty content but tool_calls,
# summarise the tool calls so the judge knows what happened.
if msg.role == "assistant" and not content.strip():
tool_calls = getattr(msg, "tool_calls", None)
if tool_calls:
tc_parts = []
for tc in tool_calls:
fn = tc.get("function", {}) if isinstance(tc, dict) else {}
name = fn.get("name", "")
args = fn.get("arguments", "")
if name == "set_output":
# Show the value so the judge can evaluate content quality
tc_parts.append(f" called {name}({args[:1000]})")
else:
tc_parts.append(f" called {name}(...)")
content = "Tool calls:\n" + "\n".join(tc_parts)
if content.strip():
parts.append(f"[{role}]: {content.strip()}")
@@ -125,6 +146,10 @@ def _format_outputs(accumulator_state: dict[str, Any]) -> str:
Lists and dicts get structural formatting so the judge can assess
quantity and structure, not just a truncated stringification.
String values are given a generous limit (2000 chars) so the judge
can verify substantive content (e.g. a research brief with key
questions, scope boundaries, and deliverables).
"""
if not accumulator_state:
return "(none)"
@@ -144,12 +169,12 @@ def _format_outputs(accumulator_state: dict[str, Any]) -> str:
val_str += f"\n ... and {len(value) - 8} more"
elif isinstance(value, dict):
val_str = str(value)
if len(val_str) > 400:
val_str = val_str[:400] + "..."
if len(val_str) > 2000:
val_str = val_str[:2000] + "..."
else:
val_str = str(value)
if len(val_str) > 300:
val_str = val_str[:300] + "..."
if len(val_str) > 2000:
val_str = val_str[:2000] + "..."
parts.append(f" {key}: {val_str}")
return "\n".join(parts)
+66 -5
View File
@@ -104,7 +104,7 @@ class EdgeSpec(BaseModel):
model_config = {"extra": "allow"}
def should_traverse(
async def should_traverse(
self,
source_success: bool,
source_output: dict[str, Any],
@@ -145,7 +145,7 @@ class EdgeSpec(BaseModel):
if llm is None or goal is None:
# Fallback to ON_SUCCESS if LLM not available
return source_success
return self._llm_decide(
return await self._llm_decide(
llm=llm,
goal=goal,
source_success=source_success,
@@ -203,7 +203,7 @@ class EdgeSpec(BaseModel):
logger.warning(f" Available context keys: {list(context.keys())}")
return False
def _llm_decide(
async def _llm_decide(
self,
llm: Any,
goal: Any,
@@ -247,7 +247,7 @@ Respond with ONLY a JSON object:
{{"proceed": true/false, "reasoning": "brief explanation"}}"""
try:
response = llm.complete(
response = await llm.acomplete(
messages=[{"role": "user", "content": prompt}],
system="You are a routing agent. Respond with JSON only.",
max_tokens=150,
@@ -338,9 +338,19 @@ class AsyncEntryPointSpec(BaseModel):
max_concurrent: int = Field(
default=10, description="Maximum concurrent executions for this entry point"
)
max_resurrections: int = Field(
default=3,
description="Auto-restart on non-fatal failure (0 to disable)",
)
model_config = {"extra": "allow"}
def get_isolation_level(self):
"""Convert string isolation level to enum (duck-type with EntryPointSpec)."""
from framework.runtime.execution_stream import IsolationLevel
return IsolationLevel(self.isolation_level)
class GraphSpec(BaseModel):
"""
@@ -422,7 +432,7 @@ class GraphSpec(BaseModel):
# Cleanup LLM for JSON extraction fallback (fast/cheap model preferred)
# If not set, uses CEREBRAS_API_KEY -> cerebras/llama-3.3-70b or
# ANTHROPIC_API_KEY -> claude-3-5-haiku as fallback
# ANTHROPIC_API_KEY -> claude-haiku-4-5 as fallback
cleanup_llm_model: str | None = None
# Execution limits
@@ -638,6 +648,13 @@ class GraphSpec(BaseModel):
for edge in self.get_outgoing_edges(current):
to_visit.append(edge.target)
# Also mark sub-agents as reachable (they're invoked via delegate_to_sub_agent, not edges)
for node in self.nodes:
if node.id in reachable:
sub_agents = getattr(node, "sub_agents", []) or []
for sub_agent_id in sub_agents:
reachable.add(sub_agent_id)
# Build set of async entry point nodes for quick lookup
async_entry_nodes = {ep.entry_node for ep in self.async_entry_points}
@@ -689,4 +706,48 @@ class GraphSpec(BaseModel):
else:
seen_keys[key] = node_id
# GCU nodes must only be used as subagents
gcu_node_ids = {n.id for n in self.nodes if n.node_type == "gcu"}
if gcu_node_ids:
# GCU nodes must not be entry nodes
if self.entry_node in gcu_node_ids:
errors.append(
f"GCU node '{self.entry_node}' is used as entry node. "
"GCU nodes must only be used as subagents via delegate_to_sub_agent()."
)
# GCU nodes must not be terminal nodes
for term in self.terminal_nodes:
if term in gcu_node_ids:
errors.append(
f"GCU node '{term}' is used as terminal node. "
"GCU nodes must only be used as subagents."
)
# GCU nodes must not be connected via edges
for edge in self.edges:
if edge.source in gcu_node_ids:
errors.append(
f"GCU node '{edge.source}' is used as edge source (edge '{edge.id}'). "
"GCU nodes must only be used as subagents, not connected via edges."
)
if edge.target in gcu_node_ids:
errors.append(
f"GCU node '{edge.target}' is used as edge target (edge '{edge.id}'). "
"GCU nodes must only be used as subagents, not connected via edges."
)
# GCU nodes must be referenced in at least one parent's sub_agents
referenced_subagents = set()
for node in self.nodes:
for sa_id in node.sub_agents or []:
referenced_subagents.add(sa_id)
orphaned = gcu_node_ids - referenced_subagents
for nid in orphaned:
errors.append(
f"GCU node '{nid}' is not referenced in any node's sub_agents list. "
"GCU nodes must be declared as subagents of a parent node."
)
return errors
File diff suppressed because it is too large Load Diff
+493 -121
View File
@@ -11,7 +11,6 @@ The executor:
import asyncio
import logging
import warnings
from collections.abc import Callable
from dataclasses import dataclass, field
from pathlib import Path
@@ -21,13 +20,10 @@ from framework.graph.checkpoint_config import CheckpointConfig
from framework.graph.edge import EdgeCondition, EdgeSpec, GraphSpec
from framework.graph.goal import Goal
from framework.graph.node import (
FunctionNode,
LLMNode,
NodeContext,
NodeProtocol,
NodeResult,
NodeSpec,
RouterNode,
SharedMemory,
)
from framework.graph.output_cleaner import CleansingConfig, OutputCleaner
@@ -135,9 +131,14 @@ class GraphExecutor:
parallel_config: ParallelExecutionConfig | None = None,
event_bus: Any | None = None,
stream_id: str = "",
execution_id: str = "",
runtime_logger: Any = None,
storage_path: str | Path | None = None,
loop_config: dict[str, Any] | None = None,
accounts_prompt: str = "",
accounts_data: list[dict] | None = None,
tool_provider_map: dict[str, str] | None = None,
dynamic_tools_provider: Callable | None = None,
):
"""
Initialize the executor.
@@ -157,6 +158,11 @@ class GraphExecutor:
runtime_logger: Optional RuntimeLogger for per-graph-run logging
storage_path: Optional base path for conversation persistence
loop_config: Optional EventLoopNode configuration (max_iterations, etc.)
accounts_prompt: Connected accounts block for system prompt injection
accounts_data: Raw account data for per-node prompt generation
tool_provider_map: Tool name to provider name mapping for account routing
dynamic_tools_provider: Optional callback returning current
tool list (for mode switching)
"""
self.runtime = runtime
self.llm = llm
@@ -168,9 +174,14 @@ class GraphExecutor:
self.logger = logging.getLogger(__name__)
self._event_bus = event_bus
self._stream_id = stream_id
self._execution_id = execution_id or getattr(runtime, "execution_id", "")
self.runtime_logger = runtime_logger
self._storage_path = Path(storage_path) if storage_path else None
self._loop_config = loop_config or {}
self.accounts_prompt = accounts_prompt
self.accounts_data = accounts_data
self.tool_provider_map = tool_provider_map
self.dynamic_tools_provider = dynamic_tools_provider
# Initialize output cleaner
self.cleansing_config = cleansing_config or CleansingConfig()
@@ -186,6 +197,9 @@ class GraphExecutor:
# Pause/resume control
self._pause_requested = asyncio.Event()
# Track the currently executing node for external injection routing
self.current_node_id: str | None = None
def _write_progress(
self,
current_node: str,
@@ -237,7 +251,12 @@ class GraphExecutor:
def _validate_tools(self, graph: GraphSpec) -> list[str]:
"""
Validate that all tools declared by nodes are available.
Validate that all tools declared by reachable nodes are available.
Only checks nodes reachable from graph.entry_node via edges.
Nodes belonging to other entry points (e.g. the coder node when
entering via ticket_triage) are skipped they will be validated
when their own entry point triggers execution.
Returns:
List of error messages (empty if all tools are available)
@@ -245,7 +264,20 @@ class GraphExecutor:
errors = []
available_tool_names = {t.name for t in self.tools}
# Compute reachable nodes from the execution's entry node
reachable: set[str] = set()
to_visit = [graph.entry_node]
while to_visit:
nid = to_visit.pop()
if nid in reachable:
continue
reachable.add(nid)
for edge in graph.get_outgoing_edges(nid):
to_visit.append(edge.target)
for node in graph.nodes:
if node.id not in reachable:
continue
if node.tools:
missing = set(node.tools) - available_tool_names
if missing:
@@ -258,6 +290,125 @@ class GraphExecutor:
return errors
# Max chars of formatted messages before proactively splitting for LLM.
_PHASE_LLM_CHAR_LIMIT = 240_000
_PHASE_LLM_MAX_DEPTH = 10
async def _phase_llm_compact(
self,
conversation: Any,
next_spec: NodeSpec,
messages: list,
_depth: int = 0,
) -> str:
"""Summarise messages for phase-boundary compaction.
Uses the same recursive binary-search splitting as EventLoopNode.
"""
from framework.graph.conversation import extract_tool_call_history
from framework.graph.event_loop_node import _is_context_too_large_error
if _depth > self._PHASE_LLM_MAX_DEPTH:
raise RuntimeError("Phase LLM compaction recursion limit")
# Format messages
lines: list[str] = []
for m in messages:
if m.role == "tool":
c = m.content[:500] + ("..." if len(m.content) > 500 else "")
lines.append(f"[tool result]: {c}")
elif m.role == "assistant" and m.tool_calls:
names = [tc.get("function", {}).get("name", "?") for tc in m.tool_calls]
lines.append(
f"[assistant (calls: {', '.join(names)})]: "
f"{m.content[:200] if m.content else ''}"
)
else:
lines.append(f"[{m.role}]: {m.content}")
formatted = "\n\n".join(lines)
# Proactive split
if len(formatted) > self._PHASE_LLM_CHAR_LIMIT and len(messages) > 1:
summary = await self._phase_llm_compact_split(
conversation,
next_spec,
messages,
_depth,
)
else:
max_tokens = getattr(conversation, "_max_history_tokens", 32000)
target_tokens = max_tokens // 2
target_chars = target_tokens * 4
prompt = (
"You are compacting an AI agent's conversation history "
"at a phase boundary.\n\n"
f"NEXT PHASE: {next_spec.name}\n"
)
if next_spec.description:
prompt += f"NEXT PHASE PURPOSE: {next_spec.description}\n"
prompt += (
f"\nCONVERSATION MESSAGES:\n{formatted}\n\n"
"INSTRUCTIONS:\n"
f"Write a summary of approximately {target_chars} characters "
f"(~{target_tokens} tokens).\n"
"Preserve user-stated rules, constraints, and preferences "
"verbatim. Preserve key decisions and results from earlier "
"phases. Preserve context needed for the next phase.\n"
)
summary_budget = max(1024, max_tokens // 2)
try:
response = await self._llm.acomplete(
messages=[{"role": "user", "content": prompt}],
system=(
"You are a conversation compactor. Write a detailed "
"summary preserving context for the next phase."
),
max_tokens=summary_budget,
)
summary = response.content
except Exception as e:
if _is_context_too_large_error(e) and len(messages) > 1:
summary = await self._phase_llm_compact_split(
conversation,
next_spec,
messages,
_depth,
)
else:
raise
# Append tool history at top level only
if _depth == 0:
tool_history = extract_tool_call_history(messages)
if tool_history and "TOOLS ALREADY CALLED" not in summary:
summary += "\n\n" + tool_history
return summary
async def _phase_llm_compact_split(
self,
conversation: Any,
next_spec: NodeSpec,
messages: list,
_depth: int,
) -> str:
"""Split messages in half and summarise each half."""
mid = max(1, len(messages) // 2)
s1 = await self._phase_llm_compact(
conversation,
next_spec,
messages[:mid],
_depth + 1,
)
s2 = await self._phase_llm_compact(
conversation,
next_spec,
messages[mid:],
_depth + 1,
)
return s1 + "\n\n" + s2
async def execute(
self,
graph: GraphSpec,
@@ -313,6 +464,9 @@ class GraphExecutor:
cumulative_tool_names: set[str] = set()
cumulative_output_keys: list[str] = [] # Output keys from all visited nodes
# Build node registry for subagent lookup
node_registry: dict[str, NodeSpec] = {node.id: node for node in graph.nodes}
# Initialize checkpoint store if checkpointing is enabled
checkpoint_store: CheckpointStore | None = None
if checkpoint_config and checkpoint_config.enabled and self._storage_path:
@@ -346,6 +500,9 @@ class GraphExecutor:
for key, value in input_data.items():
memory.write(key, value)
# Detect event-triggered execution (timer/webhook) — no interactive user.
_event_triggered = bool(input_data and isinstance(input_data.get("event"), dict))
path: list[str] = []
total_tokens = 0
total_latency = 0
@@ -459,6 +616,66 @@ class GraphExecutor:
steps = 0
# Fresh shared-session execution: clear stale cursor so the entry
# node doesn't restore a filled OutputAccumulator from the previous
# webhook run (which would cause the judge to accept immediately).
# The conversation history is preserved (continuous memory).
_is_fresh_shared = bool(
session_state
and session_state.get("resume_session_id")
and not session_state.get("paused_at")
and not session_state.get("resume_from_checkpoint")
)
if _is_fresh_shared and is_continuous and self._storage_path:
try:
from framework.storage.conversation_store import FileConversationStore
entry_conv_path = self._storage_path / "conversations"
if entry_conv_path.exists():
_store = FileConversationStore(base_path=entry_conv_path)
# Read cursor to find next seq for the transition marker.
_cursor = await _store.read_cursor() or {}
_next_seq = _cursor.get("next_seq", 0)
if _next_seq == 0:
# Fallback: scan part files for max seq
_parts = await _store.read_parts()
if _parts:
_next_seq = max(p.get("seq", 0) for p in _parts) + 1
# Reset cursor — clears stale accumulator outputs and
# iteration counter so the node starts fresh work while
# the conversation thread carries forward.
await _store.write_cursor({})
# Append a transition marker so the LLM knows a new
# event arrived and previous results are outdated.
await _store.write_part(
_next_seq,
{
"role": "user",
"content": (
"--- NEW EVENT TRIGGER ---\n"
"A new event has been received. "
"Process this as a fresh request — "
"previous outputs are no longer valid."
),
"seq": _next_seq,
"is_transition_marker": True,
},
)
self.logger.info(
"🔄 Cleared stale cursor and added transition marker "
"for shared-session entry node '%s'",
current_node_id,
)
except Exception:
self.logger.debug(
"Could not prepare conversation store for shared-session entry node '%s'",
current_node_id,
exc_info=True,
)
if session_state and current_node_id != graph.entry_node:
self.logger.info(f"🔄 Resuming from: {current_node_id}")
@@ -467,6 +684,7 @@ class GraphExecutor:
await self._event_bus.emit_execution_resumed(
stream_id=self._stream_id,
node_id=current_node_id,
execution_id=self._execution_id,
)
# Start run
@@ -511,6 +729,7 @@ class GraphExecutor:
stream_id=self._stream_id,
node_id=current_node_id,
reason="User requested pause (Ctrl+Z)",
execution_id=self._execution_id,
)
# Create session state for pause
@@ -559,7 +778,7 @@ class GraphExecutor:
cnt = node_visit_counts.get(current_node_id, 0) + 1
node_visit_counts[current_node_id] = cnt
_is_retry = False
max_visits = getattr(node_spec, "max_node_visits", 1)
max_visits = getattr(node_spec, "max_node_visits", 0)
if max_visits > 0 and node_visit_counts[current_node_id] > max_visits:
self.logger.warning(
f" ⊘ Node '{node_spec.name}' visit limit reached "
@@ -567,7 +786,7 @@ class GraphExecutor:
)
# Skip execution — follow outgoing edges using current memory
skip_result = NodeResult(success=True, output=memory.read_all())
next_node = self._follow_edges(
next_node = await self._follow_edges(
graph=graph,
goal=goal,
current_node_id=current_node_id,
@@ -604,6 +823,9 @@ class GraphExecutor:
# Execute this node, then pause
# (We'll check again after execution and save state)
# Expose current node for external injection routing
self.current_node_id = current_node_id
self.logger.info(f"\n▶ Step {steps}: {node_spec.name} ({node_spec.node_type})")
self.logger.info(f" Inputs: {node_spec.input_keys}")
self.logger.info(f" Outputs: {node_spec.output_keys}")
@@ -619,6 +841,14 @@ class GraphExecutor:
if k not in cumulative_output_keys:
cumulative_output_keys.append(k)
# Build resume narrative (Layer 2) when restoring a session
# so the EventLoopNode can rebuild the full 3-layer system prompt.
_resume_narrative = ""
if _is_resuming and path:
from framework.graph.prompt_composer import build_narrative
_resume_narrative = build_narrative(memory, path, graph)
# Build context for node
ctx = self._build_context(
node_spec=node_spec,
@@ -630,6 +860,11 @@ class GraphExecutor:
inherited_conversation=continuous_conversation if is_continuous else None,
override_tools=cumulative_tools if is_continuous else None,
cumulative_output_keys=cumulative_output_keys if is_continuous else None,
event_triggered=_event_triggered,
node_registry=node_registry,
identity_prompt=getattr(graph, "identity_prompt", ""),
narrative=_resume_narrative,
graph=graph,
)
# Log actual input data being read
@@ -680,7 +915,9 @@ class GraphExecutor:
# Emit node-started event (skip event_loop nodes — they emit their own)
if self._event_bus and node_spec.node_type != "event_loop":
await self._event_bus.emit_node_loop_started(
stream_id=self._stream_id, node_id=current_node_id
stream_id=self._stream_id,
node_id=current_node_id,
execution_id=self._execution_id,
)
# Execute node
@@ -690,7 +927,10 @@ class GraphExecutor:
# Emit node-completed event (skip event_loop nodes)
if self._event_bus and node_spec.node_type != "event_loop":
await self._event_bus.emit_node_loop_completed(
stream_id=self._stream_id, node_id=current_node_id, iterations=1
stream_id=self._stream_id,
node_id=current_node_id,
iterations=1,
execution_id=self._execution_id,
)
# Ensure runtime logging has an L2 entry for this node
@@ -773,9 +1013,13 @@ class GraphExecutor:
# [CORRECTED] Use node_spec.max_retries instead of hardcoded 3
max_retries = getattr(node_spec, "max_retries", 3)
# Event loop nodes handle retry internally via judge —
# executor retry is catastrophic (retry multiplication)
if node_spec.node_type == "event_loop" and max_retries > 0:
# EventLoopNode instances handle retry internally via judge —
# executor retry would cause catastrophic retry multiplication.
# Only override for actual EventLoopNode instances, not custom
# NodeProtocol implementations that happen to use node_type="event_loop"
from framework.graph.event_loop_node import EventLoopNode
if isinstance(node_impl, EventLoopNode) and max_retries > 0:
self.logger.warning(
f"EventLoopNode '{node_spec.id}' has max_retries={max_retries}. "
"Overriding to 0 — event loop nodes handle retry internally via judge."
@@ -806,6 +1050,7 @@ class GraphExecutor:
retry_count=retry_count,
max_retries=max_retries,
error=result.error or "",
execution_id=self._execution_id,
)
_is_retry = True
@@ -817,7 +1062,7 @@ class GraphExecutor:
)
# Check if there's an ON_FAILURE edge to follow
next_node = self._follow_edges(
next_node = await self._follow_edges(
graph=graph,
goal=goal,
current_node_id=current_node_id,
@@ -901,6 +1146,7 @@ class GraphExecutor:
stream_id=self._stream_id,
node_id=node_spec.id,
reason="HITL pause node",
execution_id=self._execution_id,
)
saved_memory = memory.read_all()
@@ -966,13 +1212,14 @@ class GraphExecutor:
source_node=current_node_id,
target_node=result.next_node,
edge_condition="router",
execution_id=self._execution_id,
)
current_node_id = result.next_node
self._write_progress(current_node_id, path, memory, node_visit_counts)
else:
# Get all traversable edges for fan-out detection
traversable_edges = self._get_all_traversable_edges(
traversable_edges = await self._get_all_traversable_edges(
graph=graph,
goal=goal,
current_node_id=current_node_id,
@@ -1001,6 +1248,7 @@ class GraphExecutor:
edge_condition=edge.condition.value
if hasattr(edge.condition, "value")
else str(edge.condition),
execution_id=self._execution_id,
)
# Execute branches in parallel
@@ -1016,6 +1264,7 @@ class GraphExecutor:
source_result=result,
source_node_spec=node_spec,
path=path,
node_registry=node_registry,
)
total_tokens += branch_tokens
@@ -1032,7 +1281,7 @@ class GraphExecutor:
break
else:
# Sequential: follow single edge (existing logic via _follow_edges)
next_node = self._follow_edges(
next_node = await self._follow_edges(
graph=graph,
goal=goal,
current_node_id=current_node_id,
@@ -1052,6 +1301,7 @@ class GraphExecutor:
stream_id=self._stream_id,
source_node=current_node_id,
target_node=next_node,
execution_id=self._execution_id,
)
# CHECKPOINT: node_complete (after determining next node)
@@ -1099,6 +1349,7 @@ class GraphExecutor:
next_spec = graph.get_node(current_node_id)
if next_spec and next_spec.node_type == "event_loop":
from framework.graph.prompt_composer import (
build_accounts_prompt,
build_narrative,
build_transition_marker,
compose_system_prompt,
@@ -1107,26 +1358,44 @@ class GraphExecutor:
# Build Layer 2 (narrative) from current state
narrative = build_narrative(memory, path, graph)
# Compose new system prompt (Layer 1 + 2 + 3)
# Read agent working memory (adapt.md) once for both
# system prompt and transition marker.
_adapt_text: str | None = None
if self._storage_path:
_adapt_path = self._storage_path / "data" / "adapt.md"
if _adapt_path.exists():
_raw = _adapt_path.read_text(encoding="utf-8").strip()
_adapt_text = _raw or None
# Merge adapt.md into narrative for system prompt
if _adapt_text:
narrative = (
f"{narrative}\n\n--- Agent Memory ---\n{_adapt_text}"
if narrative
else _adapt_text
)
# Build per-node accounts prompt for the next node
_node_accounts = self.accounts_prompt or None
if self.accounts_data and self.tool_provider_map:
_node_accounts = (
build_accounts_prompt(
self.accounts_data,
self.tool_provider_map,
node_tool_names=next_spec.tools,
)
or None
)
# Compose new system prompt (Layer 1 + 2 + 3 + accounts)
new_system = compose_system_prompt(
identity_prompt=getattr(graph, "identity_prompt", None),
focus_prompt=next_spec.system_prompt,
narrative=narrative,
accounts_prompt=_node_accounts,
)
continuous_conversation.update_system_prompt(new_system)
# Switch conversation store to the next node's directory
# so the transition marker and all subsequent messages are
# persisted there instead of the first node's directory.
if self._storage_path:
from framework.storage.conversation_store import (
FileConversationStore,
)
next_store_path = self._storage_path / "conversations" / next_spec.id
next_store = FileConversationStore(base_path=next_store_path)
await continuous_conversation.switch_store(next_store)
# Insert transition marker into conversation
data_dir = str(self._storage_path / "data") if self._storage_path else None
marker = build_transition_marker(
@@ -1135,6 +1404,7 @@ class GraphExecutor:
memory=memory,
cumulative_tool_names=sorted(cumulative_tool_names),
data_dir=data_dir,
adapt_content=_adapt_text,
)
await continuous_conversation.add_user_message(
marker,
@@ -1144,27 +1414,78 @@ class GraphExecutor:
# Set current phase for phase-aware compaction
continuous_conversation.set_current_phase(next_spec.id)
# Opportunistic compaction at transition:
# 1. Prune old tool results (free, no LLM call)
# 2. If still over 80%, do a phase-graduated compact
# Phase-boundary compaction (same flow as EventLoopNode._compact)
if continuous_conversation.usage_ratio() > 0.5:
await continuous_conversation.prune_old_tool_results(
protect_tokens=2000,
)
if continuous_conversation.needs_compaction():
_phase_ratio = continuous_conversation.usage_ratio()
self.logger.info(
" Phase-boundary compaction (%.0f%% usage)",
continuous_conversation.usage_ratio() * 100,
_phase_ratio * 100,
)
summary = (
f"Summary of earlier phases (before {next_spec.name}). "
"See transition markers for phase details."
)
await continuous_conversation.compact(
summary,
keep_recent=4,
phase_graduated=True,
_data_dir = (
str(self._storage_path / "data") if self._storage_path else None
)
# Step 1: Structural compaction (>=80%)
if _data_dir:
_pre = continuous_conversation.usage_ratio()
await continuous_conversation.compact_preserving_structure(
spillover_dir=_data_dir,
keep_recent=4,
phase_graduated=True,
)
if continuous_conversation.usage_ratio() >= 0.9 * _pre:
await continuous_conversation.compact_preserving_structure(
spillover_dir=_data_dir,
keep_recent=4,
phase_graduated=True,
aggressive=True,
)
# Step 2: LLM compaction (>95%)
if (
continuous_conversation.usage_ratio() > 0.95
and self._llm is not None
):
self.logger.info(
" LLM phase-boundary compaction (%.0f%% usage)",
continuous_conversation.usage_ratio() * 100,
)
try:
_llm_summary = await self._phase_llm_compact(
continuous_conversation,
next_spec,
list(continuous_conversation.messages),
)
await continuous_conversation.compact(
_llm_summary,
keep_recent=2,
phase_graduated=True,
)
except Exception as e:
self.logger.warning(
" Phase LLM compaction failed: %s",
e,
)
# Step 3: Emergency (only if still over budget)
if continuous_conversation.needs_compaction():
self.logger.warning(
" Emergency phase compaction (%.0f%%)",
continuous_conversation.usage_ratio() * 100,
)
summary = (
f"Summary of earlier phases "
f"(before {next_spec.name}). "
"See transition markers for phase details."
)
await continuous_conversation.compact(
summary,
keep_recent=1,
phase_graduated=True,
)
# Update input_data for next node
input_data = result.output
@@ -1230,6 +1551,34 @@ class GraphExecutor:
# Handle cancellation (e.g., TUI quit) - save as paused instead of failed
self.logger.info("⏸ Execution cancelled - saving state for resume")
# Flush WIP accumulator outputs from the interrupted node's
# cursor.json into SharedMemory so they survive resume. The
# accumulator writes to cursor.json on every set() call, but
# only writes to SharedMemory when the judge ACCEPTs. Without
# this, edge conditions checking these keys see None on resume.
if current_node_id and self._storage_path:
try:
import json as _json
cursor_path = self._storage_path / "conversations" / "cursor.json"
if cursor_path.exists():
cursor_data = _json.loads(cursor_path.read_text(encoding="utf-8"))
wip_outputs = cursor_data.get("outputs", {})
for key, value in wip_outputs.items():
if value is not None:
memory.write(key, value, validate=False)
if wip_outputs:
self.logger.info(
"Flushed %d WIP accumulator outputs to memory: %s",
len(wip_outputs),
list(wip_outputs.keys()),
)
except Exception:
self.logger.debug(
"Could not flush accumulator outputs from cursor",
exc_info=True,
)
# Save memory and state for resume
saved_memory = memory.read_all()
session_state_out: dict[str, Any] = {
@@ -1307,6 +1656,23 @@ class GraphExecutor:
execution_quality="failed",
)
# Flush WIP accumulator outputs (same as CancelledError path)
if current_node_id and self._storage_path:
try:
import json as _json
cursor_path = self._storage_path / "conversations" / "cursor.json"
if cursor_path.exists():
cursor_data = _json.loads(cursor_path.read_text(encoding="utf-8"))
for key, value in cursor_data.get("outputs", {}).items():
if value is not None:
memory.write(key, value, validate=False)
except Exception:
self.logger.debug(
"Could not flush accumulator outputs from cursor",
exc_info=True,
)
# Save memory and state for potential resume
saved_memory = memory.read_all()
session_state_out: dict[str, Any] = {
@@ -1370,6 +1736,11 @@ class GraphExecutor:
inherited_conversation: Any = None,
override_tools: list | None = None,
cumulative_output_keys: list[str] | None = None,
event_triggered: bool = False,
identity_prompt: str = "",
narrative: str = "",
node_registry: dict[str, NodeSpec] | None = None,
graph: "GraphSpec | None" = None,
) -> NodeContext:
"""Build execution context for a node."""
# Filter tools to those available to this node
@@ -1387,6 +1758,19 @@ class GraphExecutor:
write_keys=node_spec.output_keys,
)
# Build per-node accounts prompt (filtered to this node's tools)
node_accounts_prompt = self.accounts_prompt
if self.accounts_data and self.tool_provider_map:
from framework.graph.prompt_composer import build_accounts_prompt
node_accounts_prompt = build_accounts_prompt(
self.accounts_data,
self.tool_provider_map,
node_tool_names=node_spec.tools,
)
goal_context = goal.to_prompt_context()
return NodeContext(
runtime=self.runtime,
node_id=node_spec.id,
@@ -1395,7 +1779,7 @@ class GraphExecutor:
input_data=input_data,
llm=self.llm,
available_tools=available_tools,
goal_context=goal.to_prompt_context(),
goal_context=goal_context,
goal=goal, # Pass Goal object for LLM-powered routers
max_tokens=max_tokens,
runtime_logger=self.runtime_logger,
@@ -1403,18 +1787,30 @@ class GraphExecutor:
continuous_mode=continuous_mode,
inherited_conversation=inherited_conversation,
cumulative_output_keys=cumulative_output_keys or [],
event_triggered=event_triggered,
accounts_prompt=node_accounts_prompt,
identity_prompt=identity_prompt,
narrative=narrative,
execution_id=self._execution_id,
stream_id=self._stream_id,
node_registry=node_registry or {},
all_tools=list(self.tools), # Full catalog for subagent tool resolution
shared_node_registry=self.node_registry, # For subagent escalation routing
dynamic_tools_provider=self.dynamic_tools_provider,
)
# Valid node types - no ambiguous "llm" type allowed
VALID_NODE_TYPES = {
"llm_tool_use",
"llm_generate",
"router",
"function",
"human_input",
"event_loop",
"gcu",
}
# Node types removed in v0.5 — provide migration guidance
REMOVED_NODE_TYPES = {
"function": "event_loop",
"llm_tool_use": "event_loop",
"llm_generate": "event_loop",
"router": "event_loop", # Unused theoretical infrastructure
"human_input": "event_loop", # Use client_facing=True instead
}
DEPRECATED_NODE_TYPES = {"llm_tool_use": "event_loop", "llm_generate": "event_loop"}
def _get_node_implementation(
self, node_spec: NodeSpec, cleanup_llm_model: str | None = None
@@ -1424,63 +1820,24 @@ class GraphExecutor:
if node_spec.id in self.node_registry:
return self.node_registry[node_spec.id]
# Reject removed node types with migration guidance
if node_spec.node_type in self.REMOVED_NODE_TYPES:
replacement = self.REMOVED_NODE_TYPES[node_spec.node_type]
raise RuntimeError(
f"Node type '{node_spec.node_type}' was removed in v0.5. "
f"Migrate node '{node_spec.id}' to '{replacement}'. "
f"See https://github.com/adenhq/hive/issues/4753 for migration guide."
)
# Validate node type
if node_spec.node_type not in self.VALID_NODE_TYPES:
raise RuntimeError(
f"Invalid node type '{node_spec.node_type}' for node '{node_spec.id}'. "
f"Must be one of: {sorted(self.VALID_NODE_TYPES)}. "
f"Use 'llm_tool_use' for nodes that call tools, 'llm_generate' for text generation."
)
# Warn on deprecated node types
if node_spec.node_type in self.DEPRECATED_NODE_TYPES:
replacement = self.DEPRECATED_NODE_TYPES[node_spec.node_type]
warnings.warn(
f"Node type '{node_spec.node_type}' is deprecated. "
f"Use '{replacement}' instead. "
f"Node: '{node_spec.id}'",
DeprecationWarning,
stacklevel=2,
f"Must be one of: {sorted(self.VALID_NODE_TYPES)}."
)
# Create based on type
if node_spec.node_type == "llm_tool_use":
if not node_spec.tools:
raise RuntimeError(
f"Node '{node_spec.id}' is type 'llm_tool_use' but declares no tools. "
"Either add tools to the node or change type to 'llm_generate'."
)
return LLMNode(
tool_executor=self.tool_executor,
require_tools=True,
cleanup_llm_model=cleanup_llm_model,
)
if node_spec.node_type == "llm_generate":
return LLMNode(
tool_executor=None,
require_tools=False,
cleanup_llm_model=cleanup_llm_model,
)
if node_spec.node_type == "router":
return RouterNode()
if node_spec.node_type == "function":
# Function nodes need explicit registration
raise RuntimeError(
f"Function node '{node_spec.id}' not registered. Register with node_registry."
)
if node_spec.node_type == "human_input":
# Human input nodes are handled specially by HITL mechanism
return LLMNode(
tool_executor=None,
require_tools=False,
cleanup_llm_model=cleanup_llm_model,
)
if node_spec.node_type == "event_loop":
if node_spec.node_type in ("event_loop", "gcu"):
# Auto-create EventLoopNode with sensible defaults.
# Custom configs can still be pre-registered via node_registry.
from framework.graph.event_loop_node import EventLoopNode, LoopConfig
@@ -1490,7 +1847,7 @@ class GraphExecutor:
if self._storage_path:
from framework.storage.conversation_store import FileConversationStore
store_path = self._storage_path / "conversations" / node_spec.id
store_path = self._storage_path / "conversations"
conv_store = FileConversationStore(base_path=store_path)
# Auto-configure spillover directory for large tool results.
@@ -1510,11 +1867,11 @@ class GraphExecutor:
judge=None, # implicit judge: accept when output_keys are filled
config=LoopConfig(
max_iterations=lc.get("max_iterations", default_max_iter),
max_tool_calls_per_turn=lc.get("max_tool_calls_per_turn", 10),
max_tool_calls_per_turn=lc.get("max_tool_calls_per_turn", 30),
tool_call_overflow_margin=lc.get("tool_call_overflow_margin", 0.5),
stall_detection_threshold=lc.get("stall_detection_threshold", 3),
max_history_tokens=lc.get("max_history_tokens", 32000),
max_tool_result_chars=lc.get("max_tool_result_chars", 3_000),
max_tool_result_chars=lc.get("max_tool_result_chars", 30_000),
spillover_dir=spillover,
),
tool_executor=self.tool_executor,
@@ -1527,7 +1884,7 @@ class GraphExecutor:
# Should never reach here due to validation above
raise RuntimeError(f"Unhandled node type: {node_spec.node_type}")
def _follow_edges(
async def _follow_edges(
self,
graph: GraphSpec,
goal: Goal,
@@ -1542,7 +1899,7 @@ class GraphExecutor:
for edge in edges:
target_node_spec = graph.get_node(edge.target)
if edge.should_traverse(
if await edge.should_traverse(
source_success=result.success,
source_output=result.output,
memory=memory.read_all(),
@@ -1568,7 +1925,7 @@ class GraphExecutor:
self.logger.warning(f"⚠ Output validation failed: {validation.errors}")
# Clean the output
cleaned_output = self.output_cleaner.clean_output(
cleaned_output = await self.output_cleaner.clean_output(
output=output_to_validate,
source_node_id=current_node_id,
target_node_spec=target_node_spec,
@@ -1606,7 +1963,7 @@ class GraphExecutor:
return None
def _get_all_traversable_edges(
async def _get_all_traversable_edges(
self,
graph: GraphSpec,
goal: Goal,
@@ -1626,7 +1983,7 @@ class GraphExecutor:
for edge in edges:
target_node_spec = graph.get_node(edge.target)
if edge.should_traverse(
if await edge.should_traverse(
source_success=result.success,
source_output=result.output,
memory=memory.read_all(),
@@ -1697,6 +2054,7 @@ class GraphExecutor:
source_result: NodeResult,
source_node_spec: Any,
path: list[str],
node_registry: dict[str, NodeSpec] | None = None,
) -> tuple[dict[str, NodeResult], int, int]:
"""
Execute multiple branches in parallel using asyncio.gather.
@@ -1739,14 +2097,19 @@ class GraphExecutor:
branch.error = f"Node {branch.node_id} not found in graph"
return branch, RuntimeError(branch.error)
# Get node implementation to check its type
branch_impl = self._get_node_implementation(node_spec, graph.cleanup_llm_model)
effective_max_retries = node_spec.max_retries
if node_spec.node_type == "event_loop":
if effective_max_retries > 1:
self.logger.warning(
f"EventLoopNode '{node_spec.id}' has "
f"max_retries={effective_max_retries}. Overriding "
"to 1 — event loop nodes handle retry internally."
)
# Only override for actual EventLoopNode instances, not custom NodeProtocol impls
from framework.graph.event_loop_node import EventLoopNode
if isinstance(branch_impl, EventLoopNode) and effective_max_retries > 1:
self.logger.warning(
f"EventLoopNode '{node_spec.id}' has "
f"max_retries={effective_max_retries}. Overriding "
"to 1 — event loop nodes handle retry internally."
)
effective_max_retries = 1
branch.status = "running"
@@ -1768,7 +2131,7 @@ class GraphExecutor:
f"⚠ Output validation failed for branch "
f"{branch.node_id}: {validation.errors}"
)
cleaned_output = self.output_cleaner.clean_output(
cleaned_output = await self.output_cleaner.clean_output(
output=mem_snapshot,
source_node_id=source_node_spec.id if source_node_spec else "unknown",
target_node_spec=node_spec,
@@ -1789,13 +2152,23 @@ class GraphExecutor:
branch.retry_count = attempt
# Build context for this branch
ctx = self._build_context(node_spec, memory, goal, mapped, graph.max_tokens)
ctx = self._build_context(
node_spec,
memory,
goal,
mapped,
graph.max_tokens,
node_registry=node_registry,
graph=graph,
)
node_impl = self._get_node_implementation(node_spec, graph.cleanup_llm_model)
# Emit node-started event (skip event_loop nodes)
if self._event_bus and node_spec.node_type != "event_loop":
await self._event_bus.emit_node_loop_started(
stream_id=self._stream_id, node_id=branch.node_id
stream_id=self._stream_id,
node_id=branch.node_id,
execution_id=self._execution_id,
)
self.logger.info(
@@ -1819,7 +2192,10 @@ class GraphExecutor:
# Emit node-completed event (skip event_loop nodes)
if self._event_bus and node_spec.node_type != "event_loop":
await self._event_bus.emit_node_loop_completed(
stream_id=self._stream_id, node_id=branch.node_id, iterations=1
stream_id=self._stream_id,
node_id=branch.node_id,
iterations=1,
execution_id=self._execution_id,
)
if result.success:
@@ -1912,10 +2288,6 @@ class GraphExecutor:
"""Register a custom node implementation."""
self.node_registry[node_id] = implementation
def register_function(self, node_id: str, func: Callable) -> None:
"""Register a function as a node."""
self.node_registry[node_id] = FunctionNode(func)
def request_pause(self) -> None:
"""
Request graceful pause of the current execution.
+23
View File
@@ -0,0 +1,23 @@
"""File tools MCP server constants.
Analogous to ``gcu.py`` defines the server name and default stdio config
so the runner can auto-register the files MCP server for any agent that has
``event_loop`` or ``gcu`` nodes.
"""
# ---------------------------------------------------------------------------
# MCP server identity
# ---------------------------------------------------------------------------
FILES_MCP_SERVER_NAME = "files-tools"
"""Name used to identify the file tools MCP server in ``mcp_servers.json``."""
FILES_MCP_SERVER_CONFIG: dict = {
"name": FILES_MCP_SERVER_NAME,
"transport": "stdio",
"command": "uv",
"args": ["run", "python", "files_server.py", "--stdio"],
"cwd": "../../tools",
"description": "File tools for reading, writing, editing, and searching files",
}
"""Default stdio config for the file tools MCP server (relative to exports/<agent>/)."""
-552
View File
@@ -1,552 +0,0 @@
"""
Flexible Graph Executor with Worker-Judge Loop.
Executes plans created by external planner (Claude Code, etc.)
using a Worker-Judge loop:
1. External planner creates Plan
2. FlexibleGraphExecutor receives Plan
3. Worker executes each step
4. Judge evaluates each result
5. If Judge says "replan" return to external planner with feedback
6. If Judge says "escalate" request human intervention
7. If all steps complete return success
This keeps planning external while execution/evaluation is internal.
"""
from collections.abc import Callable
from dataclasses import dataclass
from datetime import datetime
from typing import Any
from framework.graph.code_sandbox import CodeSandbox
from framework.graph.goal import Goal
from framework.graph.judge import HybridJudge, create_default_judge
from framework.graph.plan import (
ApprovalDecision,
ApprovalRequest,
ApprovalResult,
ExecutionStatus,
Judgment,
JudgmentAction,
Plan,
PlanExecutionResult,
PlanStep,
StepStatus,
)
from framework.graph.worker_node import StepExecutionResult, WorkerNode
from framework.llm.provider import LLMProvider, Tool
from framework.runtime.core import Runtime
# Type alias for approval callback
ApprovalCallback = Callable[[ApprovalRequest], ApprovalResult]
@dataclass
class ExecutorConfig:
"""Configuration for FlexibleGraphExecutor."""
max_retries_per_step: int = 3
max_total_steps: int = 100
timeout_seconds: int = 300
enable_parallel_execution: bool = False # Future: parallel step execution
class FlexibleGraphExecutor:
"""
Executes plans with Worker-Judge loop.
Plans come from external source (Claude Code, etc.).
Returns feedback for replanning if needed.
Usage:
executor = FlexibleGraphExecutor(
runtime=runtime,
llm=llm_provider,
tools=tools,
)
result = await executor.execute_plan(plan, goal, context)
if result.status == ExecutionStatus.NEEDS_REPLAN:
# External planner should create new plan using result.feedback
new_plan = external_planner.replan(result.feedback_context)
result = await executor.execute_plan(new_plan, goal, result.feedback_context)
"""
def __init__(
self,
runtime: Runtime,
llm: LLMProvider | None = None,
tools: dict[str, Tool] | None = None,
tool_executor: Callable | None = None,
functions: dict[str, Callable] | None = None,
judge: HybridJudge | None = None,
config: ExecutorConfig | None = None,
approval_callback: ApprovalCallback | None = None,
):
"""
Initialize the FlexibleGraphExecutor.
Args:
runtime: Runtime for decision logging
llm: LLM provider for Worker and Judge
tools: Available tools
tool_executor: Function to execute tools
functions: Registered functions
judge: Custom judge (defaults to HybridJudge with default rules)
config: Executor configuration
approval_callback: Callback for human-in-the-loop approval.
If None, steps requiring approval will pause execution.
"""
self.runtime = runtime
self.llm = llm
self.tools = tools or {}
self.tool_executor = tool_executor
self.functions = functions or {}
self.config = config or ExecutorConfig()
self.approval_callback = approval_callback
# Create judge
self.judge = judge or create_default_judge(llm)
# Create worker
self.worker = WorkerNode(
runtime=runtime,
llm=llm,
tools=tools,
tool_executor=tool_executor,
functions=functions,
sandbox=CodeSandbox(),
)
async def execute_plan(
self,
plan: Plan,
goal: Goal,
context: dict[str, Any] | None = None,
) -> PlanExecutionResult:
"""
Execute a plan created by external planner.
Args:
plan: The plan to execute
goal: The goal context
context: Initial context (e.g., from previous execution)
Returns:
PlanExecutionResult with status and feedback
"""
context = context or {}
context.update(plan.context) # Merge plan's accumulated context
# Start run
_run_id = self.runtime.start_run(
goal_id=goal.id,
goal_description=goal.description,
input_data={"plan_id": plan.id, "revision": plan.revision},
)
steps_executed = 0
total_tokens = 0
total_latency = 0
try:
while steps_executed < self.config.max_total_steps:
# Get next ready steps
ready_steps = plan.get_ready_steps()
if not ready_steps:
# Check if we're done or stuck
if plan.is_complete():
break
else:
# No ready steps but not complete - something's wrong
return self._create_result(
status=ExecutionStatus.NEEDS_REPLAN,
plan=plan,
context=context,
feedback=(
"No executable steps available but plan not complete. "
"Check dependencies."
),
steps_executed=steps_executed,
total_tokens=total_tokens,
total_latency=total_latency,
)
# Execute next step (for now, sequential; could be parallel)
step = ready_steps[0]
# Debug: show ready steps
# ready_ids = [s.id for s in ready_steps]
# print(f" [DEBUG] Ready steps: {ready_ids}, executing: {step.id}")
# APPROVAL CHECK - before execution
if step.requires_approval:
approval_result = await self._request_approval(step, context)
if approval_result is None:
# No callback, pause execution
step.status = StepStatus.AWAITING_APPROVAL
return self._create_result(
status=ExecutionStatus.AWAITING_APPROVAL,
plan=plan,
context=context,
feedback=f"Step '{step.id}' requires approval: {step.description}",
steps_executed=steps_executed,
total_tokens=total_tokens,
total_latency=total_latency,
)
if approval_result.decision == ApprovalDecision.REJECT:
step.status = StepStatus.REJECTED
step.error = approval_result.reason or "Rejected by human"
# Skip this step and continue with dependents marked as skipped
self._skip_dependent_steps(plan, step.id)
continue
if approval_result.decision == ApprovalDecision.ABORT:
return self._create_result(
status=ExecutionStatus.ABORTED,
plan=plan,
context=context,
feedback=approval_result.reason or "Aborted by human",
steps_executed=steps_executed,
total_tokens=total_tokens,
total_latency=total_latency,
)
if approval_result.decision == ApprovalDecision.MODIFY:
# Apply modifications to step
if approval_result.modifications:
self._apply_modifications(step, approval_result.modifications)
# APPROVE - continue to execution
step.status = StepStatus.IN_PROGRESS
step.started_at = datetime.now()
step.attempts += 1
# WORK
work_result = await self.worker.execute(step, context)
steps_executed += 1
total_tokens += work_result.tokens_used
total_latency += work_result.latency_ms
# JUDGE
judgment = await self.judge.evaluate(
step=step,
result=work_result.__dict__,
goal=goal,
context=context,
)
# Handle judgment
result = await self._handle_judgment(
step=step,
work_result=work_result,
judgment=judgment,
plan=plan,
goal=goal,
context=context,
steps_executed=steps_executed,
total_tokens=total_tokens,
total_latency=total_latency,
)
if result is not None:
# Judgment resulted in early return (replan/escalate)
self.runtime.end_run(
success=False,
narrative=f"Execution stopped: {result.status.value}",
)
return result
# All steps completed successfully
self.runtime.end_run(
success=True,
output_data=context,
narrative=f"Plan completed: {steps_executed} steps executed",
)
return self._create_result(
status=ExecutionStatus.COMPLETED,
plan=plan,
context=context,
steps_executed=steps_executed,
total_tokens=total_tokens,
total_latency=total_latency,
)
except Exception as e:
self.runtime.report_problem(
severity="critical",
description=str(e),
)
self.runtime.end_run(
success=False,
narrative=f"Execution failed: {e}",
)
return PlanExecutionResult(
status=ExecutionStatus.FAILED,
error=str(e),
feedback=f"Execution error: {e}",
feedback_context=plan.to_feedback_context(),
completed_steps=[s.id for s in plan.get_completed_steps()],
steps_executed=steps_executed,
total_tokens=total_tokens,
total_latency_ms=total_latency,
)
async def _handle_judgment(
self,
step: PlanStep,
work_result: StepExecutionResult,
judgment: Judgment,
plan: Plan,
goal: Goal,
context: dict[str, Any],
steps_executed: int,
total_tokens: int,
total_latency: int,
) -> PlanExecutionResult | None:
"""
Handle judgment and return result if execution should stop.
Returns None to continue execution, or PlanExecutionResult to stop.
"""
if judgment.action == JudgmentAction.ACCEPT:
# Step succeeded - update state and continue
step.status = StepStatus.COMPLETED
step.completed_at = datetime.now()
step.result = work_result.outputs
# Map outputs to expected output keys
# If output has generic "result" key but step expects specific keys, map it
outputs_to_store = work_result.outputs.copy()
if step.expected_outputs and "result" in outputs_to_store:
result_value = outputs_to_store["result"]
# For each expected output key that's not in outputs, map from "result"
for expected_key in step.expected_outputs:
if expected_key not in outputs_to_store:
outputs_to_store[expected_key] = result_value
# Update context with mapped outputs
context.update(outputs_to_store)
# Store in plan context for replanning feedback
plan.context[step.id] = outputs_to_store
return None # Continue execution
elif judgment.action == JudgmentAction.RETRY:
# Retry step if under limit
if step.attempts < step.max_retries:
step.status = StepStatus.PENDING
step.error = judgment.feedback
# Record retry decision
self.runtime.decide(
intent=f"Retry step {step.id}",
options=[{"id": "retry", "description": "Retry with feedback"}],
chosen="retry",
reasoning=judgment.reasoning,
context={"attempt": step.attempts, "feedback": judgment.feedback},
)
return None # Continue (step will be retried)
else:
# Max retries exceeded - escalate to replan
step.status = StepStatus.FAILED
step.error = f"Max retries ({step.max_retries}) exceeded: {judgment.feedback}"
return self._create_result(
status=ExecutionStatus.NEEDS_REPLAN,
plan=plan,
context=context,
feedback=(
f"Step '{step.id}' failed after {step.attempts} attempts: "
f"{judgment.feedback}"
),
steps_executed=steps_executed,
total_tokens=total_tokens,
total_latency=total_latency,
)
elif judgment.action == JudgmentAction.REPLAN:
# Return to external planner
step.status = StepStatus.FAILED
step.error = judgment.feedback
return self._create_result(
status=ExecutionStatus.NEEDS_REPLAN,
plan=plan,
context=context,
feedback=judgment.feedback or f"Step '{step.id}' requires replanning",
steps_executed=steps_executed,
total_tokens=total_tokens,
total_latency=total_latency,
)
elif judgment.action == JudgmentAction.ESCALATE:
# Request human intervention
return self._create_result(
status=ExecutionStatus.NEEDS_ESCALATION,
plan=plan,
context=context,
feedback=judgment.feedback or f"Step '{step.id}' requires human intervention",
steps_executed=steps_executed,
total_tokens=total_tokens,
total_latency=total_latency,
)
return None # Unknown action - continue
def _create_result(
self,
status: ExecutionStatus,
plan: Plan,
context: dict[str, Any],
feedback: str | None = None,
steps_executed: int = 0,
total_tokens: int = 0,
total_latency: int = 0,
) -> PlanExecutionResult:
"""Create a PlanExecutionResult."""
return PlanExecutionResult(
status=status,
results=context,
feedback=feedback,
feedback_context=plan.to_feedback_context(),
completed_steps=[s.id for s in plan.get_completed_steps()],
steps_executed=steps_executed,
total_tokens=total_tokens,
total_latency_ms=total_latency,
)
def register_function(self, name: str, func: Callable) -> None:
"""Register a function for FUNCTION actions."""
self.functions[name] = func
self.worker.register_function(name, func)
def register_tool(self, tool: Tool) -> None:
"""Register a tool for TOOL_USE actions."""
self.tools[tool.name] = tool
self.worker.register_tool(tool)
def add_evaluation_rule(self, rule) -> None:
"""Add an evaluation rule to the judge."""
self.judge.add_rule(rule)
async def _request_approval(
self,
step: PlanStep,
context: dict[str, Any],
) -> ApprovalResult | None:
"""
Request human approval for a step.
Returns None if no callback is set (execution should pause).
"""
if self.approval_callback is None:
return None
# Build preview of what will happen
preview_parts = []
if step.action.tool_name:
preview_parts.append(f"Tool: {step.action.tool_name}")
if step.action.tool_args:
import json
args_preview = json.dumps(step.action.tool_args, indent=2, default=str)
if len(args_preview) > 500:
args_preview = args_preview[:500] + "..."
preview_parts.append(f"Args: {args_preview}")
elif step.action.prompt:
prompt_preview = (
step.action.prompt[:300] + "..."
if len(step.action.prompt) > 300
else step.action.prompt
)
preview_parts.append(f"Prompt: {prompt_preview}")
# Include step inputs resolved from context (what will be sent/used)
relevant_context = {}
for input_key, input_value in step.inputs.items():
# Resolve variable references like "$email_sequence"
if isinstance(input_value, str) and input_value.startswith("$"):
context_key = input_value[1:] # Remove $ prefix
if context_key in context:
relevant_context[input_key] = context[context_key]
else:
relevant_context[input_key] = input_value
request = ApprovalRequest(
step_id=step.id,
step_description=step.description,
action_type=step.action.action_type.value,
action_details={
"tool_name": step.action.tool_name,
"tool_args": step.action.tool_args,
"prompt": step.action.prompt,
},
context=relevant_context,
approval_message=step.approval_message,
preview="\n".join(preview_parts) if preview_parts else None,
)
return self.approval_callback(request)
def _skip_dependent_steps(self, plan: Plan, rejected_step_id: str) -> None:
"""Mark steps that depend on a rejected step as skipped."""
for step in plan.steps:
if rejected_step_id in step.dependencies:
if step.status == StepStatus.PENDING:
step.status = StepStatus.SKIPPED
step.error = f"Skipped because dependency '{rejected_step_id}' was rejected"
# Recursively skip dependents
self._skip_dependent_steps(plan, step.id)
def _apply_modifications(self, step: PlanStep, modifications: dict[str, Any]) -> None:
"""Apply human modifications to a step before execution."""
# Allow modifying tool args
if "tool_args" in modifications and step.action.tool_args:
step.action.tool_args.update(modifications["tool_args"])
# Allow modifying prompt
if "prompt" in modifications:
step.action.prompt = modifications["prompt"]
# Allow modifying inputs
if "inputs" in modifications:
step.inputs.update(modifications["inputs"])
def set_approval_callback(self, callback: ApprovalCallback) -> None:
"""Set the approval callback for HITL steps."""
self.approval_callback = callback
# Convenience function for simple execution
async def execute_plan(
plan: Plan,
goal: Goal,
runtime: Runtime,
llm: LLMProvider | None = None,
tools: dict[str, Tool] | None = None,
tool_executor: Callable | None = None,
context: dict[str, Any] | None = None,
) -> PlanExecutionResult:
"""
Execute a plan with default configuration.
Convenience function for simple use cases.
"""
executor = FlexibleGraphExecutor(
runtime=runtime,
llm=llm,
tools=tools,
tool_executor=tool_executor,
)
return await executor.execute_plan(plan, goal, context)
+86
View File
@@ -0,0 +1,86 @@
"""GCU (browser automation) node type constants.
A ``gcu`` node is an ``event_loop`` node with two automatic enhancements:
1. A canonical browser best-practices system prompt is prepended.
2. All tools from the GCU MCP server are auto-included.
No new ``NodeProtocol`` subclass the ``gcu`` type is purely a declarative
signal processed by the runner and executor at setup time.
"""
# ---------------------------------------------------------------------------
# MCP server identity
# ---------------------------------------------------------------------------
GCU_SERVER_NAME = "gcu-tools"
"""Name used to identify the GCU MCP server in ``mcp_servers.json``."""
GCU_MCP_SERVER_CONFIG: dict = {
"name": GCU_SERVER_NAME,
"transport": "stdio",
"command": "uv",
"args": ["run", "python", "-m", "gcu.server", "--stdio"],
"cwd": "../../tools",
"description": "GCU tools for browser automation",
}
"""Default stdio config for the GCU MCP server (relative to exports/<agent>/)."""
# ---------------------------------------------------------------------------
# Browser best-practices system prompt
# ---------------------------------------------------------------------------
GCU_BROWSER_SYSTEM_PROMPT = """\
# Browser Automation Best Practices
Follow these rules for reliable, efficient browser interaction.
## Reading Pages
- ALWAYS prefer `browser_snapshot` over `browser_get_text("body")`
it returns a compact ~1-5 KB accessibility tree vs 100+ KB of raw HTML.
- Use `browser_snapshot_aria` when you need full ARIA properties
for detailed element inspection.
- Do NOT use `browser_screenshot` for reading text content
it produces huge base64 images with no searchable text.
- Only fall back to `browser_get_text` for extracting specific
small elements by CSS selector.
## Navigation & Waiting
- Always call `browser_wait` after navigation actions
(`browser_open`, `browser_navigate`, `browser_click` on links)
to let the page load.
- NEVER re-navigate to the same URL after scrolling
this resets your scroll position and loses loaded content.
## Scrolling
- Use large scroll amounts ~2000 when loading more content
sites like twitter and linkedin have lazy loading for paging.
- After scrolling, take a new `browser_snapshot` to see updated content.
## Error Recovery
- If a tool fails, retry once with the same approach.
- If it fails a second time, STOP retrying and switch approach.
- If `browser_snapshot` fails try `browser_get_text` with a
specific small selector as fallback.
- If `browser_open` fails or page seems stale `browser_stop`,
then `browser_start`, then retry.
## Tab Management
- Use `browser_tabs` to list open tabs when managing multiple pages.
- Pass `target_id` to tools when operating on a specific tab.
- Open background tabs with `browser_open(url=..., background=true)`
to avoid losing your current context.
- Close tabs you no longer need with `browser_close` to free resources.
## Login & Auth Walls
- If you see a "Log in" or "Sign up" prompt instead of expected
content, report the auth wall immediately do NOT attempt to log in.
- Check for cookie consent banners and dismiss them if they block content.
## Efficiency
- Minimize tool calls combine actions where possible.
- When a snapshot result is saved to a spillover file, use
`run_command` with grep to extract specific data rather than
re-reading the full file.
- Call `set_output` in the same turn as your last browser action
when possible don't waste a turn.
"""
+16 -1
View File
@@ -44,6 +44,11 @@ class SuccessCriterion(BaseModel):
metric: str = Field(
description="How to measure: 'output_contains', 'output_equals', 'llm_judge', 'custom'"
)
# NEW: runtime evaluation type (separate from metric)
type: str = Field(
default="success_rate", description="Runtime evaluation type, e.g. 'success_rate'"
)
target: Any = Field(description="The target value or condition")
weight: float = Field(default=1.0, ge=0.0, le=1.0, description="Relative importance (0-1)")
met: bool = False
@@ -171,7 +176,17 @@ class Goal(BaseModel):
return True
def to_prompt_context(self) -> str:
"""Generate context string for LLM prompts."""
"""Generate context string for LLM prompts.
Returns empty string when the goal is a stub (no success criteria,
no constraints, no context). Stub goals are metadata-only used for
graph identification but not communicated to the LLM as actionable
intent. This prevents runtime agents (e.g. the queen) from
misinterpreting their own goal as a user request.
"""
if not self.success_criteria and not self.constraints and not self.context:
return ""
lines = [
f"# Goal: {self.name}",
f"{self.description}",
+1 -1
View File
@@ -197,7 +197,7 @@ Example format:
client = anthropic.Anthropic(api_key=api_key)
message = client.messages.create(
model="claude-3-5-haiku-20241022",
model="claude-haiku-4-5-20251001",
max_tokens=500,
messages=[{"role": "user", "content": prompt}],
)
-406
View File
@@ -1,406 +0,0 @@
"""
Hybrid Judge for Evaluating Plan Step Results.
The HybridJudge evaluates step execution results using:
1. Rule-based evaluation (fast, deterministic)
2. LLM-based evaluation (fallback for ambiguous cases)
Escalation path: rules LLM human
"""
from dataclasses import dataclass, field
from typing import Any
from framework.graph.code_sandbox import safe_eval
from framework.graph.goal import Goal
from framework.graph.plan import (
EvaluationRule,
Judgment,
JudgmentAction,
PlanStep,
)
from framework.llm.provider import LLMProvider
@dataclass
class RuleEvaluationResult:
"""Result of rule-based evaluation."""
is_definitive: bool # True if a rule matched definitively
judgment: Judgment | None = None
context: dict[str, Any] = field(default_factory=dict)
rules_checked: int = 0
rule_matched: str | None = None
class HybridJudge:
"""
Evaluates plan step results using rules first, then LLM fallback.
Usage:
judge = HybridJudge(llm=llm_provider)
judge.add_rule(EvaluationRule(
id="success_check",
condition="result.get('success') == True",
action=JudgmentAction.ACCEPT,
))
judgment = await judge.evaluate(step, result, goal)
"""
def __init__(
self,
llm: LLMProvider | None = None,
rules: list[EvaluationRule] | None = None,
llm_confidence_threshold: float = 0.7,
):
"""
Initialize the HybridJudge.
Args:
llm: LLM provider for ambiguous cases
rules: Initial evaluation rules
llm_confidence_threshold: Confidence below this triggers escalation
"""
self.llm = llm
self.rules: list[EvaluationRule] = rules or []
self.llm_confidence_threshold = llm_confidence_threshold
# Sort rules by priority (higher first)
self._sort_rules()
def _sort_rules(self):
"""Sort rules by priority."""
self.rules.sort(key=lambda r: -r.priority)
def add_rule(self, rule: EvaluationRule) -> None:
"""Add an evaluation rule."""
self.rules.append(rule)
self._sort_rules()
def remove_rule(self, rule_id: str) -> bool:
"""Remove a rule by ID. Returns True if found and removed."""
for i, rule in enumerate(self.rules):
if rule.id == rule_id:
self.rules.pop(i)
return True
return False
async def evaluate(
self,
step: PlanStep,
result: Any,
goal: Goal,
context: dict[str, Any] | None = None,
) -> Judgment:
"""
Evaluate a step result.
Args:
step: The executed plan step
result: The result of executing the step
goal: The goal context for evaluation
context: Additional context from previous steps
Returns:
Judgment with action and feedback
"""
context = context or {}
# Try rule-based evaluation first
rule_result = self._evaluate_rules(step, result, goal, context)
if rule_result.is_definitive:
return rule_result.judgment
# Fall back to LLM evaluation
if self.llm:
return await self._evaluate_llm(step, result, goal, context, rule_result)
# No LLM available - default to accept with low confidence
return Judgment(
action=JudgmentAction.ACCEPT,
reasoning="No definitive rule matched and no LLM available for evaluation",
confidence=0.5,
llm_used=False,
)
def _evaluate_rules(
self,
step: PlanStep,
result: Any,
goal: Goal,
context: dict[str, Any],
) -> RuleEvaluationResult:
"""Evaluate step using rules."""
rules_checked = 0
# Build evaluation context
eval_context = {
"step": step.model_dump() if hasattr(step, "model_dump") else step,
"result": result,
"goal": goal.model_dump() if hasattr(goal, "model_dump") else goal,
"context": context,
"success": isinstance(result, dict) and result.get("success", False),
"error": isinstance(result, dict) and result.get("error"),
}
for rule in self.rules:
rules_checked += 1
# Evaluate rule condition
eval_result = safe_eval(rule.condition, eval_context)
if eval_result.success and eval_result.result:
# Rule matched!
feedback = self._format_feedback(rule.feedback_template, eval_context)
return RuleEvaluationResult(
is_definitive=True,
judgment=Judgment(
action=rule.action,
reasoning=rule.description,
feedback=feedback if feedback else None,
rule_matched=rule.id,
confidence=1.0,
llm_used=False,
),
rules_checked=rules_checked,
rule_matched=rule.id,
)
# No rule matched definitively
return RuleEvaluationResult(
is_definitive=False,
context=eval_context,
rules_checked=rules_checked,
)
def _format_feedback(
self,
template: str,
context: dict[str, Any],
) -> str:
"""Format feedback template with context values."""
if not template:
return ""
try:
return template.format(**context)
except (KeyError, ValueError):
return template
async def _evaluate_llm(
self,
step: PlanStep,
result: Any,
goal: Goal,
context: dict[str, Any],
rule_result: RuleEvaluationResult,
) -> Judgment:
"""Evaluate step using LLM."""
system_prompt = self._build_llm_system_prompt(goal)
user_prompt = self._build_llm_user_prompt(step, result, context, rule_result)
try:
response = self.llm.complete(
messages=[{"role": "user", "content": user_prompt}],
system=system_prompt,
)
# Parse LLM response
judgment = self._parse_llm_response(response.content)
judgment.llm_used = True
# Check confidence threshold
if judgment.confidence < self.llm_confidence_threshold:
# Low confidence - escalate
return Judgment(
action=JudgmentAction.ESCALATE,
reasoning=(
f"LLM confidence ({judgment.confidence:.2f}) "
f"below threshold ({self.llm_confidence_threshold})"
),
feedback=judgment.feedback,
confidence=judgment.confidence,
llm_used=True,
context={"original_judgment": judgment.model_dump()},
)
return judgment
except Exception as e:
# LLM failed - escalate
return Judgment(
action=JudgmentAction.ESCALATE,
reasoning=f"LLM evaluation failed: {e}",
feedback="Human review needed due to LLM error",
llm_used=True,
)
def _build_llm_system_prompt(self, goal: Goal) -> str:
"""Build system prompt for LLM judge."""
return f"""You are a judge evaluating the execution of a plan step.
GOAL: {goal.description}
SUCCESS CRITERIA:
{chr(10).join(f"- {sc.description}" for sc in goal.success_criteria)}
CONSTRAINTS:
{chr(10).join(f"- {c.description}" for c in goal.constraints)}
Your task is to evaluate whether the step was executed successfully and decide the next action.
Respond in this exact format:
ACTION: [ACCEPT|RETRY|REPLAN|ESCALATE]
CONFIDENCE: [0.0-1.0]
REASONING: [Your reasoning]
FEEDBACK: [Feedback for retry/replan, or empty if accepting]
Actions:
- ACCEPT: Step completed successfully, continue to next step
- RETRY: Step failed but can be retried with feedback
- REPLAN: Step failed in a way that requires replanning
- ESCALATE: Requires human intervention
"""
def _build_llm_user_prompt(
self,
step: PlanStep,
result: Any,
context: dict[str, Any],
rule_result: RuleEvaluationResult,
) -> str:
"""Build user prompt for LLM judge."""
return f"""Evaluate this step execution:
STEP: {step.description}
STEP ID: {step.id}
ACTION TYPE: {step.action.action_type}
EXPECTED OUTPUTS: {step.expected_outputs}
RESULT:
{result}
CONTEXT FROM PREVIOUS STEPS:
{context}
RULES CHECKED: {rule_result.rules_checked} (none matched definitively)
Please evaluate and provide your judgment."""
def _parse_llm_response(self, response: str) -> Judgment:
"""Parse LLM response into Judgment."""
lines = response.strip().split("\n")
action = JudgmentAction.ACCEPT
confidence = 0.8
reasoning = ""
feedback = ""
for line in lines:
line = line.strip()
if line.startswith("ACTION:"):
action_str = line.split(":", 1)[1].strip().upper()
try:
action = JudgmentAction(action_str.lower())
except ValueError:
action = JudgmentAction.ESCALATE
elif line.startswith("CONFIDENCE:"):
try:
confidence = float(line.split(":", 1)[1].strip())
except ValueError:
confidence = 0.5
elif line.startswith("REASONING:"):
reasoning = line.split(":", 1)[1].strip()
elif line.startswith("FEEDBACK:"):
feedback = line.split(":", 1)[1].strip()
return Judgment(
action=action,
reasoning=reasoning or "LLM evaluation",
feedback=feedback if feedback else None,
confidence=confidence,
)
# Factory function for creating judge with common rules
def create_default_judge(llm: LLMProvider | None = None) -> HybridJudge:
"""
Create a HybridJudge with commonly useful default rules.
Args:
llm: LLM provider for fallback evaluation
Returns:
Configured HybridJudge instance
"""
judge = HybridJudge(llm=llm)
# Rule: Accept on explicit success flag
judge.add_rule(
EvaluationRule(
id="explicit_success",
description="Step explicitly marked as successful",
condition="isinstance(result, dict) and result.get('success') == True",
action=JudgmentAction.ACCEPT,
priority=100,
)
)
# Rule: Retry on transient errors
judge.add_rule(
EvaluationRule(
id="transient_error_retry",
description="Transient error that can be retried",
condition=(
"isinstance(result, dict) and "
"result.get('error_type') in ['timeout', 'rate_limit', 'connection_error']"
),
action=JudgmentAction.RETRY,
feedback_template="Transient error: {result[error]}. Please retry.",
priority=90,
)
)
# Rule: Replan on missing data
judge.add_rule(
EvaluationRule(
id="missing_data_replan",
description="Required data not available",
condition="isinstance(result, dict) and result.get('error_type') == 'missing_data'",
action=JudgmentAction.REPLAN,
feedback_template="Missing required data: {result[error]}. Plan needs adjustment.",
priority=80,
)
)
# Rule: Escalate on security issues
judge.add_rule(
EvaluationRule(
id="security_escalate",
description="Security issue detected",
condition="isinstance(result, dict) and result.get('error_type') == 'security'",
action=JudgmentAction.ESCALATE,
feedback_template="Security issue detected: {result[error]}",
priority=200,
)
)
# Rule: Fail on max retries exceeded
judge.add_rule(
EvaluationRule(
id="max_retries_fail",
description="Maximum retries exceeded",
condition="step.get('attempts', 0) >= step.get('max_retries', 3)",
action=JudgmentAction.REPLAN,
feedback_template="Step '{step[id]}' failed after {step[attempts]} attempts",
priority=150,
)
)
return judge
File diff suppressed because it is too large Load Diff
+2 -2
View File
@@ -206,7 +206,7 @@ class OutputCleaner:
warnings=warnings,
)
def clean_output(
async def clean_output(
self,
output: dict[str, Any],
source_node_id: str,
@@ -288,7 +288,7 @@ Return ONLY valid JSON matching the expected schema. No explanations, no markdow
f"🧹 Cleaning output from '{source_node_id}' using {self.config.fast_model}"
)
response = self.llm.complete(
response = await self.llm.acomplete(
messages=[{"role": "user", "content": prompt}],
system=(
"You clean malformed agent outputs. Return only valid JSON matching the schema."
-513
View File
@@ -1,513 +0,0 @@
"""
Plan Data Structures for Flexible Execution.
Plans are created externally (by Claude Code or another LLM agent) and
executed internally by the FlexibleGraphExecutor with Worker-Judge loop.
The Plan is the contract between the external planner and the executor:
- Planner creates a Plan with PlanSteps
- Executor runs steps and judges results
- If replanning needed, returns feedback to external planner
"""
from datetime import datetime
from enum import StrEnum
from typing import Any
from pydantic import BaseModel, Field
class ActionType(StrEnum):
"""Types of actions a PlanStep can perform."""
LLM_CALL = "llm_call" # Call LLM for generation
TOOL_USE = "tool_use" # Use a registered tool
SUB_GRAPH = "sub_graph" # Execute a sub-graph
FUNCTION = "function" # Call a Python function
CODE_EXECUTION = "code_execution" # Execute dynamic code (sandboxed)
class StepStatus(StrEnum):
"""Status of a plan step."""
PENDING = "pending"
AWAITING_APPROVAL = "awaiting_approval" # Waiting for human approval
IN_PROGRESS = "in_progress"
COMPLETED = "completed"
FAILED = "failed"
SKIPPED = "skipped"
REJECTED = "rejected" # Human rejected execution
def is_terminal(self) -> bool:
"""Check if this status represents a terminal (finished) state.
Terminal states are states where the step will not execute further,
either because it completed successfully or failed/was skipped.
"""
return self in (
StepStatus.COMPLETED,
StepStatus.FAILED,
StepStatus.SKIPPED,
StepStatus.REJECTED,
)
def is_successful(self) -> bool:
"""Check if this status represents successful completion."""
return self == StepStatus.COMPLETED
class ApprovalDecision(StrEnum):
"""Human decision on a step requiring approval."""
APPROVE = "approve" # Execute as planned
REJECT = "reject" # Skip this step
MODIFY = "modify" # Execute with modifications
ABORT = "abort" # Stop entire execution
class ApprovalRequest(BaseModel):
"""Request for human approval before executing a step."""
step_id: str
step_description: str
action_type: str
action_details: dict[str, Any] = Field(default_factory=dict)
context: dict[str, Any] = Field(default_factory=dict)
approval_message: str | None = None
# Preview of what will happen
preview: str | None = None
model_config = {"extra": "allow"}
class ApprovalResult(BaseModel):
"""Result of human approval decision."""
decision: ApprovalDecision
reason: str | None = None
modifications: dict[str, Any] = Field(default_factory=dict)
model_config = {"extra": "allow"}
class JudgmentAction(StrEnum):
"""Actions the judge can take after evaluating a step."""
ACCEPT = "accept" # Step completed successfully, continue
RETRY = "retry" # Retry the step with feedback
REPLAN = "replan" # Return to external planner for new plan
ESCALATE = "escalate" # Request human intervention
class ActionSpec(BaseModel):
"""
Specification for an action to be executed.
This is the "what to do" part of a PlanStep.
"""
action_type: ActionType
# For LLM_CALL
prompt: str | None = None
system_prompt: str | None = None
model: str | None = None
# For TOOL_USE
tool_name: str | None = None
tool_args: dict[str, Any] = Field(default_factory=dict)
# For SUB_GRAPH
graph_id: str | None = None
# For FUNCTION
function_name: str | None = None
function_args: dict[str, Any] = Field(default_factory=dict)
# For CODE_EXECUTION
code: str | None = None
language: str = "python"
model_config = {"extra": "allow"}
class PlanStep(BaseModel):
"""
A single step in a plan.
Created by external planner, executed by Worker, evaluated by Judge.
"""
id: str
description: str
action: ActionSpec
# Data flow
inputs: dict[str, Any] = Field(
default_factory=dict,
description="Input data for this step (can reference previous step outputs)",
)
expected_outputs: list[str] = Field(
default_factory=list, description="Keys this step should produce"
)
# Dependencies
dependencies: list[str] = Field(
default_factory=list, description="IDs of steps that must complete before this one"
)
# Human-in-the-loop (HITL)
requires_approval: bool = Field(
default=False, description="If True, requires human approval before execution"
)
approval_message: str | None = Field(
default=None, description="Message to show human when requesting approval"
)
# Execution state
status: StepStatus = StepStatus.PENDING
result: Any | None = None
error: str | None = None
attempts: int = 0
max_retries: int = 3
# Metadata
started_at: datetime | None = None
completed_at: datetime | None = None
model_config = {"extra": "allow"}
def is_ready(self, terminal_step_ids: set[str]) -> bool:
"""Check if this step is ready to execute (all dependencies finished).
A step is ready when:
1. Its status is PENDING (not yet started)
2. All its dependencies are in a terminal state (completed, failed, skipped, or rejected)
Note: This allows dependent steps to become "ready" even if their dependencies
failed. The executor should check if any dependencies failed and handle
accordingly (e.g., skip the step or mark it as blocked).
Args:
terminal_step_ids: Set of step IDs that are in a terminal state
"""
if self.status != StepStatus.PENDING:
return False
return all(dep in terminal_step_ids for dep in self.dependencies)
class Judgment(BaseModel):
"""
Result of judging a step execution.
The Judge evaluates step results and decides what to do next.
"""
action: JudgmentAction
reasoning: str
feedback: str | None = None # For retry/replan - what went wrong
# For rule-based judgments
rule_matched: str | None = None
# For LLM-based judgments
confidence: float = 1.0
llm_used: bool = False
# Context for replanning
context: dict[str, Any] = Field(default_factory=dict)
model_config = {"extra": "allow"}
class EvaluationRule(BaseModel):
"""
A rule for the HybridJudge to evaluate step results.
Rules are checked before falling back to LLM evaluation.
"""
id: str
description: str
# Condition (Python expression evaluated with result, step, goal context)
condition: str
# What to do if condition matches
action: JudgmentAction
feedback_template: str = "" # Can use {result}, {step}, etc.
# Priority (higher = checked first)
priority: int = 0
model_config = {"extra": "allow"}
class Plan(BaseModel):
"""
A complete execution plan.
Created by external planner (Claude Code, etc).
Executed by FlexibleGraphExecutor.
"""
id: str
goal_id: str
description: str
# Steps to execute
steps: list[PlanStep] = Field(default_factory=list)
# Execution state
revision: int = 1 # Incremented on replan
current_step_idx: int = 0
# Accumulated context from execution
context: dict[str, Any] = Field(default_factory=dict)
# Metadata
created_at: datetime = Field(default_factory=datetime.now)
created_by: str = "external" # Who created this plan
# Previous attempt info (for replanning)
previous_feedback: str | None = None
model_config = {"extra": "allow"}
@classmethod
def from_json(cls, data: str | dict) -> "Plan":
"""
Load a Plan from exported JSON.
This handles the output from export_graph() and properly converts
action_type strings to ActionType enums.
Args:
data: JSON string or dict from export_graph()
Returns:
Plan object ready for FlexibleGraphExecutor
Example:
# Load from export_graph() output
exported = export_graph()
plan = Plan.from_json(exported)
# Load from file
with open("plan.json") as f:
plan = Plan.from_json(json.load(f))
"""
import json as json_module
if isinstance(data, str):
data = json_module.loads(data)
# Handle nested "plan" key from export_graph output
if "plan" in data:
data = data["plan"]
# Convert steps
steps = []
for step_data in data.get("steps", []):
action_data = step_data.get("action", {})
# Convert action_type string to enum
action_type_str = action_data.get("action_type", "function")
action_type = ActionType(action_type_str)
action = ActionSpec(
action_type=action_type,
prompt=action_data.get("prompt"),
system_prompt=action_data.get("system_prompt"),
tool_name=action_data.get("tool_name"),
tool_args=action_data.get("tool_args", {}),
function_name=action_data.get("function_name"),
function_args=action_data.get("function_args", {}),
code=action_data.get("code"),
)
step = PlanStep(
id=step_data["id"],
description=step_data.get("description", ""),
action=action,
inputs=step_data.get("inputs", {}),
expected_outputs=step_data.get("expected_outputs", []),
dependencies=step_data.get("dependencies", []),
requires_approval=step_data.get("requires_approval", False),
approval_message=step_data.get("approval_message"),
)
steps.append(step)
return cls(
id=data.get("id", "plan"),
goal_id=data.get("goal_id", ""),
description=data.get("description", ""),
steps=steps,
context=data.get("context", {}),
revision=data.get("revision", 1),
)
def get_step(self, step_id: str) -> PlanStep | None:
"""Get a step by ID."""
for step in self.steps:
if step.id == step_id:
return step
return None
def get_ready_steps(self) -> list[PlanStep]:
"""Get all steps that are ready to execute.
A step is ready when all its dependencies are in terminal states
(completed, failed, skipped, or rejected).
"""
terminal_ids = {s.id for s in self.steps if s.status.is_terminal()}
return [s for s in self.steps if s.is_ready(terminal_ids)]
def get_completed_steps(self) -> list[PlanStep]:
"""Get all completed steps."""
return [s for s in self.steps if s.status == StepStatus.COMPLETED]
def is_complete(self) -> bool:
"""Check if all steps are in terminal states (finished executing).
Returns True when all steps have reached a terminal state, regardless
of whether they succeeded or failed. Use has_failed_steps() to check
if any steps failed.
"""
return all(s.status.is_terminal() for s in self.steps)
def is_successful(self) -> bool:
"""Check if all steps completed successfully."""
return all(s.status == StepStatus.COMPLETED for s in self.steps)
def has_failed_steps(self) -> bool:
"""Check if any steps failed, were skipped, or were rejected."""
return any(
s.status in (StepStatus.FAILED, StepStatus.SKIPPED, StepStatus.REJECTED)
for s in self.steps
)
def get_failed_steps(self) -> list[PlanStep]:
"""Get all steps that failed, were skipped, or were rejected."""
return [
s
for s in self.steps
if s.status in (StepStatus.FAILED, StepStatus.SKIPPED, StepStatus.REJECTED)
]
def to_feedback_context(self) -> dict[str, Any]:
"""Create context for replanning."""
return {
"plan_id": self.id,
"revision": self.revision,
"completed_steps": [
{
"id": s.id,
"description": s.description,
"result": s.result,
}
for s in self.get_completed_steps()
],
"failed_steps": [
{
"id": s.id,
"description": s.description,
"error": s.error,
"attempts": s.attempts,
}
for s in self.steps
if s.status == StepStatus.FAILED
],
"context": self.context,
}
class ExecutionStatus(StrEnum):
"""Status of plan execution."""
COMPLETED = "completed"
AWAITING_APPROVAL = "awaiting_approval" # Paused for human approval
NEEDS_REPLAN = "needs_replan"
NEEDS_ESCALATION = "needs_escalation"
REJECTED = "rejected" # Human rejected a step
ABORTED = "aborted" # Human aborted execution
FAILED = "failed"
class PlanExecutionResult(BaseModel):
"""
Result of executing a plan.
Returned to external planner with status and feedback.
"""
status: ExecutionStatus
# Results from completed steps
results: dict[str, Any] = Field(default_factory=dict)
# For needs_replan - what to tell the planner
feedback: str | None = None
feedback_context: dict[str, Any] = Field(default_factory=dict)
# Steps that completed before stopping
completed_steps: list[str] = Field(default_factory=list)
# Metrics
steps_executed: int = 0
total_tokens: int = 0
total_latency_ms: int = 0
# Error info (for failed status)
error: str | None = None
model_config = {"extra": "allow"}
def load_export(data: str | dict) -> tuple["Plan", Any]:
"""
Load both Plan and Goal from export_graph() output.
The export_graph() MCP tool returns both the plan and the goal that was
defined and approved during the agent building process. This function
loads both so you can use them with FlexibleGraphExecutor.
Args:
data: JSON string or dict from export_graph()
Returns:
Tuple of (Plan, Goal) ready for FlexibleGraphExecutor
Example:
# Load from export_graph() output
exported = export_graph()
plan, goal = load_export(exported)
result = await executor.execute_plan(plan, goal, context)
"""
import json as json_module
from framework.graph.goal import Goal
if isinstance(data, str):
data = json_module.loads(data)
# Load plan
plan = Plan.from_json(data)
# Load goal
goal_data = data.get("goal", {})
if goal_data:
goal = Goal.model_validate(goal_data)
else:
# Fallback: create minimal goal from plan metadata
goal = Goal(
id=plan.goal_id,
name=plan.goal_id,
description=plan.description,
success_criteria=[],
constraints=[],
)
return plan, goal
+125 -4
View File
@@ -16,8 +16,9 @@ Layer 3 — Focus (per-node system_prompt, reframed as focus directive):
from __future__ import annotations
import logging
from datetime import datetime
from pathlib import Path
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Any
if TYPE_CHECKING:
from framework.graph.edge import GraphSpec
@@ -26,10 +27,119 @@ if TYPE_CHECKING:
logger = logging.getLogger(__name__)
def _with_datetime(prompt: str) -> str:
"""Append current datetime with local timezone to a system prompt."""
local = datetime.now().astimezone()
stamp = f"Current date and time: {local.strftime('%Y-%m-%d %H:%M %Z (UTC%z)')}"
return f"{prompt}\n\n{stamp}" if prompt else stamp
def build_accounts_prompt(
accounts: list[dict[str, Any]],
tool_provider_map: dict[str, str] | None = None,
node_tool_names: list[str] | None = None,
) -> str:
"""Build a prompt section describing connected accounts.
When tool_provider_map is provided, produces structured output grouped
by provider with tool mapping, so the LLM knows which ``account`` value
to pass to which tool.
When node_tool_names is also provided, filters to only show providers
whose tools overlap with the node's tool list.
Args:
accounts: List of account info dicts from
CredentialStoreAdapter.get_all_account_info().
tool_provider_map: Mapping of tool_name -> provider_name
(e.g. {"gmail_list_messages": "google"}).
node_tool_names: Tool names available to the current node.
When provided, only providers with matching tools are shown.
Returns:
Formatted accounts block, or empty string if no accounts.
"""
if not accounts:
return ""
# Flat format (backward compat) when no tool mapping provided
if tool_provider_map is None:
lines = [
"Connected accounts (use the alias as the `account` parameter "
"when calling tools to target a specific account):"
]
for acct in accounts:
provider = acct.get("provider", "unknown")
alias = acct.get("alias", "unknown")
identity = acct.get("identity", {})
detail_parts = [f"{k}: {v}" for k, v in identity.items() if v]
detail = f" ({', '.join(detail_parts)})" if detail_parts else ""
lines.append(f"- {provider}/{alias}{detail}")
return "\n".join(lines)
# --- Structured format: group by provider with tool mapping ---
# Invert tool_provider_map to provider -> [tools]
provider_tools: dict[str, list[str]] = {}
for tool_name, provider in tool_provider_map.items():
provider_tools.setdefault(provider, []).append(tool_name)
# Filter to relevant providers based on node tools
node_tool_set = set(node_tool_names) if node_tool_names else None
# Group accounts by provider
provider_accounts: dict[str, list[dict[str, Any]]] = {}
for acct in accounts:
provider = acct.get("provider", "unknown")
provider_accounts.setdefault(provider, []).append(acct)
sections: list[str] = ["Connected accounts:"]
for provider, acct_list in provider_accounts.items():
tools_for_provider = sorted(provider_tools.get(provider, []))
# If node tools specified, only show providers with overlapping tools
if node_tool_set is not None:
relevant_tools = [t for t in tools_for_provider if t in node_tool_set]
if not relevant_tools:
continue
tools_for_provider = relevant_tools
# Local-only providers: tools read from env vars, no account= routing
all_local = all(a.get("source") == "local" for a in acct_list)
# Provider header with tools
display_name = provider.replace("_", " ").title()
if tools_for_provider and not all_local:
tools_str = ", ".join(tools_for_provider)
sections.append(f'\n{display_name} (use account="<alias>" with: {tools_str}):')
elif tools_for_provider and all_local:
tools_str = ", ".join(tools_for_provider)
sections.append(f"\n{display_name} (tools: {tools_str}):")
else:
sections.append(f"\n{display_name}:")
# Account entries
for acct in acct_list:
alias = acct.get("alias", "unknown")
identity = acct.get("identity", {})
detail_parts = [f"{k}: {v}" for k, v in identity.items() if v]
detail = f" ({', '.join(detail_parts)})" if detail_parts else ""
source_tag = " [local]" if acct.get("source") == "local" else ""
sections.append(f" - {provider}/{alias}{detail}{source_tag}")
# If filtering removed all providers, return empty
if len(sections) <= 1:
return ""
return "\n".join(sections)
def compose_system_prompt(
identity_prompt: str | None,
focus_prompt: str | None,
narrative: str | None = None,
accounts_prompt: str | None = None,
) -> str:
"""Compose the three-layer system prompt.
@@ -37,9 +147,10 @@ def compose_system_prompt(
identity_prompt: Layer 1 static agent identity (from GraphSpec).
focus_prompt: Layer 3 per-node focus directive (from NodeSpec.system_prompt).
narrative: Layer 2 auto-generated from conversation state.
accounts_prompt: Connected accounts block (sits between identity and narrative).
Returns:
Composed system prompt with all layers present.
Composed system prompt with all layers present, plus current datetime.
"""
parts: list[str] = []
@@ -47,6 +158,10 @@ def compose_system_prompt(
if identity_prompt:
parts.append(identity_prompt)
# Accounts (semi-static, deployment-specific)
if accounts_prompt:
parts.append(f"\n{accounts_prompt}")
# Layer 2: Narrative (what's happened so far)
if narrative:
parts.append(f"\n--- Context (what has happened so far) ---\n{narrative}")
@@ -55,7 +170,7 @@ def compose_system_prompt(
if focus_prompt:
parts.append(f"\n--- Current Focus ---\n{focus_prompt}")
return "\n".join(parts) if parts else ""
return _with_datetime("\n".join(parts) if parts else "")
def build_narrative(
@@ -112,6 +227,7 @@ def build_transition_marker(
memory: SharedMemory,
cumulative_tool_names: list[str],
data_dir: Path | str | None = None,
adapt_content: str | None = None,
) -> str:
"""Build a 'State of the World' transition marker.
@@ -125,6 +241,7 @@ def build_transition_marker(
memory: Current shared memory state.
cumulative_tool_names: All tools available (cumulative set).
data_dir: Path to spillover data directory.
adapt_content: Agent working memory (adapt.md) content.
Returns:
Transition marker message text.
@@ -163,9 +280,13 @@ def build_transition_marker(
]
if file_lines:
sections.append(
"\nData files (use load_data to access):\n" + "\n".join(file_lines)
"\nData files (use read_file to access):\n" + "\n".join(file_lines)
)
# Agent working memory
if adapt_content:
sections.append(f"\n--- Agent Memory ---\n{adapt_content}")
# Available tools
if cumulative_tool_names:
sections.append("\nAvailable tools: " + ", ".join(sorted(cumulative_tool_names)))
-620
View File
@@ -1,620 +0,0 @@
"""
Worker Node for Executing Plan Steps.
The Worker executes individual plan steps by dispatching to the
appropriate executor based on action type:
- LLM calls
- Tool usage
- Sub-graph execution
- Function calls
- Code execution (sandboxed)
"""
import json
import logging
import re
import time
from collections.abc import Callable
from dataclasses import dataclass, field
from typing import Any
from framework.graph.code_sandbox import CodeSandbox
from framework.graph.plan import (
ActionSpec,
ActionType,
PlanStep,
)
from framework.llm.provider import LLMProvider, Tool
from framework.runtime.core import Runtime
logger = logging.getLogger(__name__)
def parse_llm_json_response(text: str) -> tuple[Any | None, str]:
"""
Parse JSON from LLM response, handling markdown code blocks.
LLMs often return JSON wrapped in markdown code blocks like:
```json
{"key": "value"}
```
This function extracts and parses the JSON.
Args:
text: Raw LLM response text
Returns:
Tuple of (parsed_json_or_None, cleaned_text)
"""
if not isinstance(text, str):
return None, str(text)
cleaned = text.strip()
# Try to extract JSON from markdown code blocks
# Pattern: ```json ... ``` or ``` ... ```
code_block_pattern = r"```(?:json)?\s*([\s\S]*?)\s*```"
matches = re.findall(code_block_pattern, cleaned)
if matches:
# Try to parse each match
for match in matches:
try:
parsed = json.loads(match.strip())
return parsed, match.strip()
except json.JSONDecodeError as e:
logger.debug(
f"Failed to parse JSON from code block: {e}. "
f"Content preview: {match.strip()[:100]}..."
)
continue
# No code blocks or parsing failed - try parsing the whole response
try:
parsed = json.loads(cleaned)
return parsed, cleaned
except json.JSONDecodeError as e:
logger.debug(
f"Failed to parse entire response as JSON: {e}. Content preview: {cleaned[:100]}..."
)
# Try to find JSON-like content (starts with { or [)
json_start_pattern = r"(\{[\s\S]*\}|\[[\s\S]*\])"
json_matches = re.findall(json_start_pattern, cleaned)
for match in json_matches:
try:
parsed = json.loads(match)
return parsed, match
except json.JSONDecodeError as e:
logger.debug(f"Failed to parse JSON pattern: {e}. Content preview: {match[:100]}...")
continue
# Could not parse as JSON - log warning
logger.warning(
f"Could not parse LLM response as JSON after trying all strategies. "
f"Response preview: {cleaned[:200]}..."
)
return None, cleaned
@dataclass
class StepExecutionResult:
"""Result of executing a plan step."""
success: bool
outputs: dict[str, Any] = field(default_factory=dict)
error: str | None = None
error_type: str | None = None # For judge rules: timeout, rate_limit, etc.
# Metadata
tokens_used: int = 0
latency_ms: int = 0
executor_type: str = ""
class WorkerNode:
"""
Executes plan steps by dispatching to appropriate executors.
Usage:
worker = WorkerNode(
runtime=runtime,
llm=llm_provider,
tools=tool_registry,
)
result = await worker.execute(step, context)
"""
def __init__(
self,
runtime: Runtime,
llm: LLMProvider | None = None,
tools: dict[str, Tool] | None = None,
tool_executor: Callable | None = None,
functions: dict[str, Callable] | None = None,
sub_graph_executor: Callable | None = None,
sandbox: CodeSandbox | None = None,
):
"""
Initialize the Worker.
Args:
runtime: Runtime for decision logging
llm: LLM provider for LLM_CALL actions
tools: Available tools for TOOL_USE actions
tool_executor: Function to execute tools
functions: Registered functions for FUNCTION actions
sub_graph_executor: Function to execute sub-graphs
sandbox: Code sandbox for CODE_EXECUTION actions
"""
self.runtime = runtime
self.llm = llm
self.tools = tools or {}
self.tool_executor = tool_executor
self.functions = functions or {}
self.sub_graph_executor = sub_graph_executor
self.sandbox = sandbox or CodeSandbox()
async def execute(
self,
step: PlanStep,
context: dict[str, Any],
) -> StepExecutionResult:
"""
Execute a plan step.
Args:
step: The step to execute
context: Current execution context
Returns:
StepExecutionResult with outputs and status
"""
# Record decision
decision_id = self.runtime.decide(
intent=f"Execute plan step: {step.description}",
options=[
{
"id": step.action.action_type.value,
"description": f"Execute {step.action.action_type.value} action",
"action_type": step.action.action_type.value,
}
],
chosen=step.action.action_type.value,
reasoning=f"Step requires {step.action.action_type.value}",
context={"step_id": step.id, "inputs": step.inputs},
)
start_time = time.time()
try:
# Resolve inputs from context
resolved_inputs = self._resolve_inputs(step.inputs, context)
# Dispatch to appropriate executor
result = await self._dispatch(step.action, resolved_inputs, context)
latency_ms = int((time.time() - start_time) * 1000)
result.latency_ms = latency_ms
# Record outcome
self.runtime.record_outcome(
decision_id=decision_id,
success=result.success,
result=result.outputs if result.success else result.error,
tokens_used=result.tokens_used,
latency_ms=latency_ms,
)
return result
except Exception as e:
latency_ms = int((time.time() - start_time) * 1000)
self.runtime.record_outcome(
decision_id=decision_id,
success=False,
error=str(e),
latency_ms=latency_ms,
)
return StepExecutionResult(
success=False,
error=str(e),
error_type="exception",
latency_ms=latency_ms,
)
def _resolve_inputs(
self,
inputs: dict[str, Any],
context: dict[str, Any],
) -> dict[str, Any]:
"""Resolve input references from context."""
resolved = {}
for key, value in inputs.items():
if isinstance(value, str) and value.startswith("$"):
# Reference to context variable
ref_key = value[1:] # Remove $
resolved[key] = context.get(ref_key, value)
else:
resolved[key] = value
return resolved
async def _dispatch(
self,
action: ActionSpec,
inputs: dict[str, Any],
context: dict[str, Any],
) -> StepExecutionResult:
"""Dispatch to appropriate executor based on action type."""
if action.action_type == ActionType.LLM_CALL:
return await self._execute_llm_call(action, inputs, context)
elif action.action_type == ActionType.TOOL_USE:
return await self._execute_tool_use(action, inputs)
elif action.action_type == ActionType.SUB_GRAPH:
return await self._execute_sub_graph(action, inputs, context)
elif action.action_type == ActionType.FUNCTION:
return await self._execute_function(action, inputs)
elif action.action_type == ActionType.CODE_EXECUTION:
return self._execute_code(action, inputs, context)
else:
return StepExecutionResult(
success=False,
error=f"Unknown action type: {action.action_type}",
error_type="invalid_action",
)
async def _execute_llm_call(
self,
action: ActionSpec,
inputs: dict[str, Any],
context: dict[str, Any],
) -> StepExecutionResult:
"""Execute an LLM call action."""
if self.llm is None:
return StepExecutionResult(
success=False,
error="No LLM provider configured",
error_type="configuration",
executor_type="llm_call",
)
try:
# Build prompt with context data
prompt = action.prompt or ""
# First try format placeholders (for prompts like "Hello {name}")
if inputs:
try:
prompt = prompt.format(**inputs)
except (KeyError, ValueError):
pass # Keep original prompt if formatting fails
# Always append context data so LLM can personalize
# This ensures the LLM has access to lead info, company context, etc.
if inputs:
context_section = "\n\n--- Context Data ---\n"
for key, value in inputs.items():
if isinstance(value, dict | list):
context_section += f"{key}: {json.dumps(value, indent=2)}\n"
else:
context_section += f"{key}: {value}\n"
prompt = prompt + context_section
messages = [{"role": "user", "content": prompt}]
response = self.llm.complete(
messages=messages,
system=action.system_prompt,
)
# Try to parse JSON from LLM response
# LLMs often return JSON wrapped in markdown code blocks
parsed_json, _ = parse_llm_json_response(response.content)
# If JSON was parsed successfully, use it as the result
# Otherwise, use the raw text
result_value = parsed_json if parsed_json is not None else response.content
return StepExecutionResult(
success=True,
outputs={
"result": result_value,
"response": response.content, # Always keep raw response
"parsed_json": parsed_json, # Explicit parsed JSON (or None)
},
tokens_used=response.input_tokens + response.output_tokens,
executor_type="llm_call",
)
except Exception as e:
error_type = "rate_limit" if "rate" in str(e).lower() else "llm_error"
return StepExecutionResult(
success=False,
error=str(e),
error_type=error_type,
executor_type="llm_call",
)
async def _execute_tool_use(
self,
action: ActionSpec,
inputs: dict[str, Any],
) -> StepExecutionResult:
"""Execute a tool use action."""
tool_name = action.tool_name
if not tool_name:
return StepExecutionResult(
success=False,
error="No tool name specified",
error_type="invalid_action",
executor_type="tool_use",
)
# Merge action args with inputs
args = {**action.tool_args, **inputs}
# Resolve any $variable references in the merged args
# (tool_args may contain $refs that should be resolved from inputs)
resolved_args = {}
for key, value in args.items():
if isinstance(value, str) and value.startswith("$"):
ref_key = value[1:] # Remove $
resolved_args[key] = args.get(ref_key, value)
else:
resolved_args[key] = value
args = resolved_args
# First, check if we have a registered function with this name
# This allows simpler tool registration without full Tool/ToolExecutor setup
if tool_name in self.functions:
try:
func = self.functions[tool_name]
result = func(**args)
# Handle async functions
if hasattr(result, "__await__"):
result = await result
# If result is already a dict with success/outputs, use it directly
if isinstance(result, dict) and "success" in result:
return StepExecutionResult(
success=result.get("success", False),
outputs=result.get("outputs", {}),
error=result.get("error"),
error_type=result.get("error_type"),
executor_type="tool_use",
)
# Otherwise wrap the result
return StepExecutionResult(
success=True,
outputs={"result": result},
executor_type="tool_use",
)
except Exception as e:
return StepExecutionResult(
success=False,
error=str(e),
error_type="tool_exception",
executor_type="tool_use",
)
# Fall back to formal Tool registry
if tool_name not in self.tools:
return StepExecutionResult(
success=False,
error=f"Tool '{tool_name}' not found",
error_type="missing_tool",
executor_type="tool_use",
)
if self.tool_executor is None:
return StepExecutionResult(
success=False,
error="No tool executor configured",
error_type="configuration",
executor_type="tool_use",
)
try:
# Execute tool via formal executor
from framework.llm.provider import ToolUse
tool_use = ToolUse(
id=f"step_{tool_name}",
name=tool_name,
input=args,
)
result = self.tool_executor(tool_use)
if result.is_error:
return StepExecutionResult(
success=False,
outputs={},
error=result.content,
error_type="tool_error",
executor_type="tool_use",
)
# Parse JSON result and unpack fields into outputs
# Tools return JSON like {"lead_email": "...", "company_name": "..."}
# We want each field as a separate output key
outputs = {"result": result.content}
try:
parsed = json.loads(result.content)
if isinstance(parsed, dict):
# Unpack all fields from the JSON response
outputs.update(parsed)
except (json.JSONDecodeError, TypeError):
pass # Keep result as-is if not valid JSON
return StepExecutionResult(
success=True,
outputs=outputs,
executor_type="tool_use",
)
except Exception as e:
return StepExecutionResult(
success=False,
error=str(e),
error_type="tool_exception",
executor_type="tool_use",
)
async def _execute_sub_graph(
self,
action: ActionSpec,
inputs: dict[str, Any],
context: dict[str, Any],
) -> StepExecutionResult:
"""Execute a sub-graph action."""
if self.sub_graph_executor is None:
return StepExecutionResult(
success=False,
error="No sub-graph executor configured",
error_type="configuration",
executor_type="sub_graph",
)
graph_id = action.graph_id
if not graph_id:
return StepExecutionResult(
success=False,
error="No graph ID specified",
error_type="invalid_action",
executor_type="sub_graph",
)
try:
result = await self.sub_graph_executor(graph_id, inputs, context)
return StepExecutionResult(
success=result.success,
outputs=result.output if result.success else {},
error=result.error if not result.success else None,
tokens_used=result.total_tokens,
executor_type="sub_graph",
)
except Exception as e:
return StepExecutionResult(
success=False,
error=str(e),
error_type="sub_graph_exception",
executor_type="sub_graph",
)
async def _execute_function(
self,
action: ActionSpec,
inputs: dict[str, Any],
) -> StepExecutionResult:
"""Execute a function action."""
func_name = action.function_name
if not func_name:
return StepExecutionResult(
success=False,
error="No function name specified",
error_type="invalid_action",
executor_type="function",
)
if func_name not in self.functions:
return StepExecutionResult(
success=False,
error=f"Function '{func_name}' not registered",
error_type="missing_function",
executor_type="function",
)
try:
func = self.functions[func_name]
# Merge action args with inputs
args = {**action.function_args, **inputs}
# Execute function
result = func(**args)
# Handle async functions
if hasattr(result, "__await__"):
result = await result
return StepExecutionResult(
success=True,
outputs={"result": result},
executor_type="function",
)
except Exception as e:
return StepExecutionResult(
success=False,
error=str(e),
error_type="function_exception",
executor_type="function",
)
def _execute_code(
self,
action: ActionSpec,
inputs: dict[str, Any],
context: dict[str, Any],
) -> StepExecutionResult:
"""Execute a code action in sandbox."""
code = action.code
if not code:
return StepExecutionResult(
success=False,
error="No code specified",
error_type="invalid_action",
executor_type="code_execution",
)
# Merge inputs with context for code
code_inputs = {**context, **inputs}
# Execute in sandbox
sandbox_result = self.sandbox.execute(code, code_inputs)
if sandbox_result.success:
return StepExecutionResult(
success=True,
outputs={
"result": sandbox_result.result,
**sandbox_result.variables,
},
executor_type="code_execution",
latency_ms=sandbox_result.execution_time_ms,
)
else:
error_type = "security" if "Security" in (sandbox_result.error or "") else "code_error"
return StepExecutionResult(
success=False,
error=sandbox_result.error,
error_type=error_type,
executor_type="code_execution",
latency_ms=sandbox_result.execution_time_ms,
)
def register_function(self, name: str, func: Callable) -> None:
"""Register a function for FUNCTION actions."""
self.functions[name] = func
def register_tool(self, tool: Tool) -> None:
"""Register a tool for TOOL_USE actions."""
self.tools[tool.name] = tool
+14 -11
View File
@@ -1,11 +1,10 @@
"""Anthropic Claude LLM provider - backward compatible wrapper around LiteLLM."""
import os
from collections.abc import Callable
from typing import Any
from framework.llm.litellm import LiteLLMProvider
from framework.llm.provider import LLMProvider, LLMResponse, Tool, ToolResult, ToolUse
from framework.llm.provider import LLMProvider, LLMResponse, Tool
def _get_api_key_from_credential_store() -> str | None:
@@ -83,19 +82,23 @@ class AnthropicProvider(LLMProvider):
max_retries=max_retries,
)
def complete_with_tools(
async def acomplete(
self,
messages: list[dict[str, Any]],
system: str,
tools: list[Tool],
tool_executor: Callable[[ToolUse], ToolResult],
max_iterations: int = 10,
system: str = "",
tools: list[Tool] | None = None,
max_tokens: int = 1024,
response_format: dict[str, Any] | None = None,
json_mode: bool = False,
max_retries: int | None = None,
) -> LLMResponse:
"""Run a tool-use loop until Claude produces a final response (via LiteLLM)."""
return self._provider.complete_with_tools(
"""Async completion via LiteLLM."""
return await self._provider.acomplete(
messages=messages,
system=system,
tools=tools,
tool_executor=tool_executor,
max_iterations=max_iterations,
max_tokens=max_tokens,
response_format=response_format,
json_mode=json_mode,
max_retries=max_retries,
)
+578 -154
View File
@@ -11,7 +11,7 @@ import asyncio
import json
import logging
import time
from collections.abc import AsyncIterator, Callable
from collections.abc import AsyncIterator
from datetime import datetime
from pathlib import Path
from typing import Any
@@ -23,13 +23,105 @@ except ImportError:
litellm = None # type: ignore[assignment]
RateLimitError = Exception # type: ignore[assignment, misc]
from framework.llm.provider import LLMProvider, LLMResponse, Tool, ToolResult, ToolUse
from framework.llm.provider import LLMProvider, LLMResponse, Tool
from framework.llm.stream_events import StreamEvent
logger = logging.getLogger(__name__)
def _patch_litellm_anthropic_oauth() -> None:
"""Patch litellm's Anthropic header construction to fix OAuth token handling.
litellm bug: validate_environment() puts the OAuth token into x-api-key,
but Anthropic's API rejects OAuth tokens in x-api-key. They must be sent
via Authorization: Bearer only, with x-api-key omitted entirely.
This patch wraps validate_environment to remove x-api-key when the
Authorization header carries an OAuth token (sk-ant-oat prefix).
See: https://github.com/BerriAI/litellm/issues/19618
"""
try:
from litellm.llms.anthropic.common_utils import AnthropicModelInfo
from litellm.types.llms.anthropic import ANTHROPIC_OAUTH_TOKEN_PREFIX
except ImportError:
return
original = AnthropicModelInfo.validate_environment
def _patched_validate_environment(
self, headers, model, messages, optional_params, litellm_params, api_key=None, api_base=None
):
result = original(
self,
headers,
model,
messages,
optional_params,
litellm_params,
api_key=api_key,
api_base=api_base,
)
auth = result.get("authorization", "")
if auth.startswith(f"Bearer {ANTHROPIC_OAUTH_TOKEN_PREFIX}"):
result.pop("x-api-key", None)
return result
AnthropicModelInfo.validate_environment = _patched_validate_environment
def _patch_litellm_metadata_nonetype() -> None:
"""Patch litellm entry points to prevent metadata=None TypeError.
litellm bug: the @client decorator in utils.py has four places that do
"model_group" in kwargs.get("metadata", {})
but kwargs["metadata"] can be explicitly None (set internally by
litellm_params), causing:
TypeError: argument of type 'NoneType' is not iterable
This masks the real API error with a confusing APIConnectionError.
Fix: wrap the four litellm entry points (completion, acompletion,
responses, aresponses) to pop metadata=None before the @client
decorator's error handler can crash on it.
"""
import functools
for fn_name in ("completion", "acompletion", "responses", "aresponses"):
original = getattr(litellm, fn_name, None)
if original is None:
continue
if asyncio.iscoroutinefunction(original):
@functools.wraps(original)
async def _async_wrapper(*args, _orig=original, **kwargs):
if kwargs.get("metadata") is None:
kwargs.pop("metadata", None)
return await _orig(*args, **kwargs)
setattr(litellm, fn_name, _async_wrapper)
else:
@functools.wraps(original)
def _sync_wrapper(*args, _orig=original, **kwargs):
if kwargs.get("metadata") is None:
kwargs.pop("metadata", None)
return _orig(*args, **kwargs)
setattr(litellm, fn_name, _sync_wrapper)
if litellm is not None:
_patch_litellm_anthropic_oauth()
_patch_litellm_metadata_nonetype()
RATE_LIMIT_MAX_RETRIES = 10
RATE_LIMIT_BACKOFF_BASE = 2 # seconds
RATE_LIMIT_MAX_DELAY = 120 # seconds - cap to prevent absurd waits
# Empty-stream retries use a short fixed delay, not the rate-limit backoff.
# Conversation-structure issues are deterministic — long waits don't help.
EMPTY_STREAM_MAX_RETRIES = 3
EMPTY_STREAM_RETRY_DELAY = 1.0 # seconds
# Directory for dumping failed requests
FAILED_REQUESTS_DIR = Path.home() / ".hive" / "failed_requests"
@@ -78,12 +170,102 @@ def _dump_failed_request(
"temperature": kwargs.get("temperature"),
}
with open(filepath, "w") as f:
with open(filepath, "w", encoding="utf-8") as f:
json.dump(dump_data, f, indent=2, default=str)
return str(filepath)
def _compute_retry_delay(
attempt: int,
exception: BaseException | None = None,
backoff_base: int = RATE_LIMIT_BACKOFF_BASE,
max_delay: int = RATE_LIMIT_MAX_DELAY,
) -> float:
"""Compute retry delay, preferring server-provided Retry-After headers.
Priority:
1. retry-after-ms header (milliseconds, float)
2. retry-after header as seconds (float)
3. retry-after header as HTTP-date (RFC 7231)
4. Exponential backoff: backoff_base * 2^attempt
All values are capped at max_delay seconds.
"""
if exception is not None:
response = getattr(exception, "response", None)
if response is not None:
headers = getattr(response, "headers", None)
if headers is not None:
# Priority 1: retry-after-ms (milliseconds)
retry_after_ms = headers.get("retry-after-ms")
if retry_after_ms is not None:
try:
delay = float(retry_after_ms) / 1000.0
return min(max(delay, 0), max_delay)
except (ValueError, TypeError):
pass
# Priority 2: retry-after (seconds or HTTP-date)
retry_after = headers.get("retry-after")
if retry_after is not None:
# Try as seconds (float)
try:
delay = float(retry_after)
return min(max(delay, 0), max_delay)
except (ValueError, TypeError):
pass
# Try as HTTP-date (e.g., "Fri, 31 Dec 2025 23:59:59 GMT")
try:
from email.utils import parsedate_to_datetime
retry_date = parsedate_to_datetime(retry_after)
now = datetime.now(retry_date.tzinfo)
delay = (retry_date - now).total_seconds()
return min(max(delay, 0), max_delay)
except (ValueError, TypeError, OverflowError):
pass
# Fallback: exponential backoff
delay = backoff_base * (2**attempt)
return min(delay, max_delay)
def _is_stream_transient_error(exc: BaseException) -> bool:
"""Classify whether a streaming exception is transient (recoverable).
Transient errors (recoverable=True): network issues, server errors, timeouts.
Permanent errors (recoverable=False): auth, bad request, context window, etc.
NOTE: "Failed to parse tool call arguments" (malformed LLM output) is NOT
transient at the stream level retrying with the same messages produces the
same malformed output. This error is handled at the EventLoopNode level
where the conversation can be modified before retrying.
"""
try:
from litellm.exceptions import (
APIConnectionError,
BadGatewayError,
InternalServerError,
ServiceUnavailableError,
)
transient_types: tuple[type[BaseException], ...] = (
APIConnectionError,
InternalServerError,
BadGatewayError,
ServiceUnavailableError,
TimeoutError,
ConnectionError,
OSError,
)
except ImportError:
transient_types = (TimeoutError, ConnectionError, OSError)
return isinstance(exc, transient_types)
class LiteLLMProvider(LLMProvider):
"""
LiteLLM-based LLM provider for multi-provider support.
@@ -144,12 +326,21 @@ class LiteLLMProvider(LLMProvider):
self.api_key = api_key
self.api_base = api_base
self.extra_kwargs = kwargs
# The Codex ChatGPT backend (chatgpt.com/backend-api/codex) rejects
# several standard OpenAI params: max_output_tokens, stream_options.
self._codex_backend = bool(api_base and "chatgpt.com/backend-api/codex" in api_base)
if litellm is None:
raise ImportError(
"LiteLLM is not installed. Please install it with: uv pip install litellm"
)
# Note: The Codex ChatGPT backend is a Responses API endpoint at
# chatgpt.com/backend-api/codex/responses. LiteLLM's model registry
# correctly marks codex models with mode="responses", so we do NOT
# override the mode. The responses_api_bridge in litellm handles
# converting Chat Completions requests to Responses API format.
def _completion_with_rate_limit_retry(
self, max_retries: int | None = None, **kwargs: Any
) -> Any:
@@ -197,6 +388,20 @@ class LiteLLMProvider(LLMProvider):
f"Full request dumped to: {dump_path}"
)
# finish_reason=length means the model exhausted max_tokens
# before producing content. Retrying with the same max_tokens
# will never help — return immediately instead of looping.
if finish_reason == "length":
max_tok = kwargs.get("max_tokens", "unset")
logger.error(
f"[retry] {model} returned empty content with "
f"finish_reason=length (max_tokens={max_tok}). "
f"The model exhausted its token budget before "
f"producing visible output. Increase max_tokens "
f"or use a different model. Not retrying."
)
return response
if attempt == retries:
logger.error(
f"[retry] GAVE UP on {model} after {retries + 1} "
@@ -205,7 +410,7 @@ class LiteLLMProvider(LLMProvider):
f"choices={len(response.choices) if response.choices else 0})"
)
return response
wait = RATE_LIMIT_BACKOFF_BASE * (2**attempt)
wait = _compute_retry_delay(attempt)
logger.warning(
f"[retry] {model} returned empty response "
f"(finish_reason={finish_reason}, "
@@ -236,7 +441,7 @@ class LiteLLMProvider(LLMProvider):
f"Full request dumped to: {dump_path}"
)
raise
wait = RATE_LIMIT_BACKOFF_BASE * (2**attempt)
wait = _compute_retry_delay(attempt, exception=e)
logger.warning(
f"[retry] {model} rate limited (429): {e!s}. "
f"~{token_count} tokens ({token_method}). "
@@ -259,6 +464,21 @@ class LiteLLMProvider(LLMProvider):
max_retries: int | None = None,
) -> LLMResponse:
"""Generate a completion using LiteLLM."""
# Codex ChatGPT backend requires streaming — delegate to the unified
# async streaming path which properly handles tool calls.
if self._codex_backend:
return asyncio.run(
self.acomplete(
messages=messages,
system=system,
tools=tools,
max_tokens=max_tokens,
response_format=response_format,
json_mode=json_mode,
max_retries=max_retries,
)
)
# Prepare messages with system prompt
full_messages = []
if system:
@@ -321,125 +541,183 @@ class LiteLLMProvider(LLMProvider):
raw_response=response,
)
def complete_with_tools(
self,
messages: list[dict[str, Any]],
system: str,
tools: list[Tool],
tool_executor: Callable[[ToolUse], ToolResult],
max_iterations: int = 10,
max_tokens: int = 4096,
) -> LLMResponse:
"""Run a tool-use loop until the LLM produces a final response."""
# Prepare messages with system prompt
current_messages = []
if system:
current_messages.append({"role": "system", "content": system})
current_messages.extend(messages)
# ------------------------------------------------------------------
# Async variants — non-blocking on the event loop
# ------------------------------------------------------------------
total_input_tokens = 0
total_output_tokens = 0
async def _acompletion_with_rate_limit_retry(
self, max_retries: int | None = None, **kwargs: Any
) -> Any:
"""Async version of _completion_with_rate_limit_retry.
# Convert tools to OpenAI format
openai_tools = [self._tool_to_openai_format(t) for t in tools]
Uses litellm.acompletion and asyncio.sleep instead of blocking calls.
"""
model = kwargs.get("model", self.model)
retries = max_retries if max_retries is not None else RATE_LIMIT_MAX_RETRIES
for attempt in range(retries + 1):
try:
response = await litellm.acompletion(**kwargs) # type: ignore[union-attr]
for _ in range(max_iterations):
# Build kwargs
kwargs: dict[str, Any] = {
"model": self.model,
"messages": current_messages,
"max_tokens": max_tokens,
"tools": openai_tools,
**self.extra_kwargs,
}
if self.api_key:
kwargs["api_key"] = self.api_key
if self.api_base:
kwargs["api_base"] = self.api_base
response = self._completion_with_rate_limit_retry(**kwargs)
# Track tokens
usage = response.usage
if usage:
total_input_tokens += usage.prompt_tokens
total_output_tokens += usage.completion_tokens
choice = response.choices[0]
message = choice.message
# Check if we're done (no tool calls)
if choice.finish_reason == "stop" or not message.tool_calls:
return LLMResponse(
content=message.content or "",
model=response.model or self.model,
input_tokens=total_input_tokens,
output_tokens=total_output_tokens,
stop_reason=choice.finish_reason or "stop",
raw_response=response,
)
# Process tool calls.
# Add assistant message with tool calls.
current_messages.append(
{
"role": "assistant",
"content": message.content,
"tool_calls": [
{
"id": tc.id,
"type": "function",
"function": {
"name": tc.function.name,
"arguments": tc.function.arguments,
},
}
for tc in message.tool_calls
],
}
)
# Execute tools and add results.
for tool_call in message.tool_calls:
try:
args = json.loads(tool_call.function.arguments)
except json.JSONDecodeError:
# Surface error to LLM and skip tool execution
current_messages.append(
{
"role": "tool",
"tool_call_id": tool_call.id,
"content": "Invalid JSON arguments provided to tool.",
}
content = response.choices[0].message.content if response.choices else None
has_tool_calls = bool(response.choices and response.choices[0].message.tool_calls)
if not content and not has_tool_calls:
messages = kwargs.get("messages", [])
last_role = next(
(m["role"] for m in reversed(messages) if m.get("role") != "system"),
None,
)
if last_role == "assistant":
logger.debug(
"[async-retry] Empty response after assistant message — "
"expected, not retrying."
)
return response
finish_reason = (
response.choices[0].finish_reason if response.choices else "unknown"
)
token_count, token_method = _estimate_tokens(model, messages)
dump_path = _dump_failed_request(
model=model,
kwargs=kwargs,
error_type="empty_response",
attempt=attempt,
)
logger.warning(
f"[async-retry] Empty response - {len(messages)} messages, "
f"~{token_count} tokens ({token_method}). "
f"Full request dumped to: {dump_path}"
)
# finish_reason=length means the model exhausted max_tokens
# before producing content. Retrying with the same max_tokens
# will never help — return immediately instead of looping.
if finish_reason == "length":
max_tok = kwargs.get("max_tokens", "unset")
logger.error(
f"[async-retry] {model} returned empty content with "
f"finish_reason=length (max_tokens={max_tok}). "
f"The model exhausted its token budget before "
f"producing visible output. Increase max_tokens "
f"or use a different model. Not retrying."
)
return response
if attempt == retries:
logger.error(
f"[async-retry] GAVE UP on {model} after {retries + 1} "
f"attempts — empty response "
f"(finish_reason={finish_reason}, "
f"choices={len(response.choices) if response.choices else 0})"
)
return response
wait = _compute_retry_delay(attempt)
logger.warning(
f"[async-retry] {model} returned empty response "
f"(finish_reason={finish_reason}, "
f"choices={len(response.choices) if response.choices else 0}) — "
f"likely rate limited or quota exceeded. "
f"Retrying in {wait}s "
f"(attempt {attempt + 1}/{retries})"
)
await asyncio.sleep(wait)
continue
tool_use = ToolUse(
id=tool_call.id,
name=tool_call.function.name,
input=args,
return response
except RateLimitError as e:
messages = kwargs.get("messages", [])
token_count, token_method = _estimate_tokens(model, messages)
dump_path = _dump_failed_request(
model=model,
kwargs=kwargs,
error_type="rate_limit",
attempt=attempt,
)
result = tool_executor(tool_use)
# Add tool result message
current_messages.append(
{
"role": "tool",
"tool_call_id": result.tool_use_id,
"content": result.content,
}
if attempt == retries:
logger.error(
f"[async-retry] GAVE UP on {model} after {retries + 1} "
f"attempts — rate limit error: {e!s}. "
f"~{token_count} tokens ({token_method}). "
f"Full request dumped to: {dump_path}"
)
raise
wait = _compute_retry_delay(attempt, exception=e)
logger.warning(
f"[async-retry] {model} rate limited (429): {e!s}. "
f"~{token_count} tokens ({token_method}). "
f"Full request dumped to: {dump_path}. "
f"Retrying in {wait}s "
f"(attempt {attempt + 1}/{retries})"
)
await asyncio.sleep(wait)
raise RuntimeError("Exhausted rate limit retries")
async def acomplete(
self,
messages: list[dict[str, Any]],
system: str = "",
tools: list[Tool] | None = None,
max_tokens: int = 1024,
response_format: dict[str, Any] | None = None,
json_mode: bool = False,
max_retries: int | None = None,
) -> LLMResponse:
"""Async version of complete(). Uses litellm.acompletion — non-blocking."""
# Codex ChatGPT backend requires streaming — route through stream() which
# already handles Codex quirks and has proper tool call accumulation.
if self._codex_backend:
stream_iter = self.stream(
messages=messages,
system=system,
tools=tools,
max_tokens=max_tokens,
response_format=response_format,
json_mode=json_mode,
)
return await self._collect_stream_to_response(stream_iter)
full_messages: list[dict[str, Any]] = []
if system:
full_messages.append({"role": "system", "content": system})
full_messages.extend(messages)
if json_mode:
json_instruction = "\n\nPlease respond with a valid JSON object."
if full_messages and full_messages[0]["role"] == "system":
full_messages[0]["content"] += json_instruction
else:
full_messages.insert(0, {"role": "system", "content": json_instruction.strip()})
kwargs: dict[str, Any] = {
"model": self.model,
"messages": full_messages,
"max_tokens": max_tokens,
**self.extra_kwargs,
}
if self.api_key:
kwargs["api_key"] = self.api_key
if self.api_base:
kwargs["api_base"] = self.api_base
if tools:
kwargs["tools"] = [self._tool_to_openai_format(t) for t in tools]
if response_format:
kwargs["response_format"] = response_format
response = await self._acompletion_with_rate_limit_retry(max_retries=max_retries, **kwargs)
content = response.choices[0].message.content or ""
usage = response.usage
input_tokens = usage.prompt_tokens if usage else 0
output_tokens = usage.completion_tokens if usage else 0
# Max iterations reached
return LLMResponse(
content="Max tool iterations reached",
model=self.model,
input_tokens=total_input_tokens,
output_tokens=total_output_tokens,
stop_reason="max_iterations",
raw_response=None,
content=content,
model=response.model or self.model,
input_tokens=input_tokens,
output_tokens=output_tokens,
stop_reason=response.choices[0].finish_reason or "",
raw_response=response,
)
def _tool_to_openai_format(self, tool: Tool) -> dict[str, Any]:
@@ -463,6 +741,8 @@ class LiteLLMProvider(LLMProvider):
system: str = "",
tools: list[Tool] | None = None,
max_tokens: int = 4096,
response_format: dict[str, Any] | None = None,
json_mode: bool = False,
) -> AsyncIterator[StreamEvent]:
"""Stream a completion via litellm.acompletion(stream=True).
@@ -487,6 +767,31 @@ class LiteLLMProvider(LLMProvider):
full_messages.append({"role": "system", "content": system})
full_messages.extend(messages)
# Codex Responses API requires an `instructions` field (system prompt).
# Inject a minimal one when callers don't provide a system message.
if self._codex_backend and not any(m["role"] == "system" for m in full_messages):
full_messages.insert(0, {"role": "system", "content": "You are a helpful assistant."})
# Add JSON mode via prompt engineering (works across all providers)
if json_mode:
json_instruction = "\n\nPlease respond with a valid JSON object."
if full_messages and full_messages[0]["role"] == "system":
full_messages[0]["content"] += json_instruction
else:
full_messages.insert(0, {"role": "system", "content": json_instruction.strip()})
# Remove ghost empty assistant messages (content="" and no tool_calls).
# These arise when a model returns an empty stream after a tool result
# (an "expected" no-op turn). Keeping them in history confuses some
# models (notably Codex/gpt-5.3) and causes cascading empty streams.
full_messages = [
m
for m in full_messages
if not (
m.get("role") == "assistant" and not m.get("content") and not m.get("tool_calls")
)
]
kwargs: dict[str, Any] = {
"model": self.model,
"messages": full_messages,
@@ -501,6 +806,12 @@ class LiteLLMProvider(LLMProvider):
kwargs["api_base"] = self.api_base
if tools:
kwargs["tools"] = [self._tool_to_openai_format(t) for t in tools]
if response_format:
kwargs["response_format"] = response_format
# The Codex ChatGPT backend (Responses API) rejects several params.
if self._codex_backend:
kwargs.pop("max_tokens", None)
kwargs.pop("stream_options", None)
for attempt in range(RATE_LIMIT_MAX_RETRIES + 1):
# Post-stream events (ToolCall, TextEnd, Finish) are buffered
@@ -509,8 +820,10 @@ class LiteLLMProvider(LLMProvider):
tail_events: list[StreamEvent] = []
accumulated_text = ""
tool_calls_acc: dict[int, dict[str, str]] = {}
_last_tool_idx = 0 # tracks most recently opened tool call slot
input_tokens = 0
output_tokens = 0
stream_finish_reason: str | None = None
try:
response = await litellm.acompletion(**kwargs) # type: ignore[union-attr]
@@ -531,9 +844,36 @@ class LiteLLMProvider(LLMProvider):
)
# --- Tool calls (accumulate across chunks) ---
# The Codex/Responses API bridge (litellm bug) hardcodes
# index=0 on every ChatCompletionToolCallChunk, even for
# parallel tool calls. We work around this by using tc.id
# (set on output_item.added events) as a "new tool call"
# signal and tracking the most recently opened slot for
# argument deltas that arrive with id=None.
if delta and delta.tool_calls:
for tc in delta.tool_calls:
idx = tc.index if hasattr(tc, "index") and tc.index is not None else 0
if tc.id:
# New tool call announced (or done event re-sent).
# Check if this id already has a slot.
existing_idx = next(
(k for k, v in tool_calls_acc.items() if v["id"] == tc.id),
None,
)
if existing_idx is not None:
idx = existing_idx
elif idx in tool_calls_acc and tool_calls_acc[idx]["id"] not in (
"",
tc.id,
):
# Slot taken by a different call — assign new index
idx = max(tool_calls_acc.keys()) + 1
_last_tool_idx = idx
else:
# Argument delta with no id — route to last opened slot
idx = _last_tool_idx
if idx not in tool_calls_acc:
tool_calls_acc[idx] = {"id": "", "name": "", "arguments": ""}
if tc.id:
@@ -546,6 +886,7 @@ class LiteLLMProvider(LLMProvider):
# --- Finish ---
if choice.finish_reason:
stream_finish_reason = choice.finish_reason
for _idx, tc_data in sorted(tool_calls_acc.items()):
try:
parsed_args = json.loads(tc_data["arguments"])
@@ -580,58 +921,77 @@ class LiteLLMProvider(LLMProvider):
# (If text deltas were yielded above, has_content is True
# and we skip the retry path — nothing was yielded in vain.)
has_content = accumulated_text or tool_calls_acc
if not has_content and attempt < RATE_LIMIT_MAX_RETRIES:
# If the conversation ends with an assistant or tool
# message, an empty stream is expected — the LLM has
# nothing new to say. Don't burn retries on this;
# let the caller (EventLoopNode) decide what to do.
# Typical case: client_facing node where the LLM set
# all outputs via set_output tool calls, and the tool
# results are the last messages.
last_role = next(
(m["role"] for m in reversed(full_messages) if m.get("role") != "system"),
None,
)
if last_role in ("assistant", "tool"):
logger.debug(
"[stream] Empty response after %s message — expected, not retrying.",
last_role,
if not has_content:
# finish_reason=length means the model exhausted
# max_tokens before producing content. Retrying with
# the same max_tokens will never help.
if stream_finish_reason == "length":
max_tok = kwargs.get("max_tokens", "unset")
logger.error(
f"[stream] {self.model} returned empty content "
f"with finish_reason=length "
f"(max_tokens={max_tok}). The model exhausted "
f"its token budget before producing visible "
f"output. Increase max_tokens or use a "
f"different model. Not retrying."
)
for event in tail_events:
yield event
return
wait = RATE_LIMIT_BACKOFF_BASE * (2**attempt)
token_count, token_method = _estimate_tokens(
self.model,
full_messages,
)
dump_path = _dump_failed_request(
model=self.model,
kwargs=kwargs,
error_type="empty_stream",
attempt=attempt,
)
logger.warning(
f"[stream-retry] {self.model} returned empty stream — "
f"~{token_count} tokens ({token_method}). "
f"Request dumped to: {dump_path}. "
f"Retrying in {wait}s "
f"(attempt {attempt + 1}/{RATE_LIMIT_MAX_RETRIES})"
)
await asyncio.sleep(wait)
continue
# Success (or final attempt) — flush remaining events.
# Empty stream — always retry regardless of last message
# role. Ghost empty streams after tool results are NOT
# expected no-ops; they create infinite loops when the
# conversation doesn't change between iterations.
# After retries, return the empty result and let the
# caller (EventLoopNode) decide how to handle it.
last_role = next(
(m["role"] for m in reversed(full_messages) if m.get("role") != "system"),
None,
)
if attempt < EMPTY_STREAM_MAX_RETRIES:
token_count, token_method = _estimate_tokens(
self.model,
full_messages,
)
dump_path = _dump_failed_request(
model=self.model,
kwargs=kwargs,
error_type="empty_stream",
attempt=attempt,
)
logger.warning(
f"[stream-retry] {self.model} returned empty stream "
f"after {last_role} message — "
f"~{token_count} tokens ({token_method}). "
f"Request dumped to: {dump_path}. "
f"Retrying in {EMPTY_STREAM_RETRY_DELAY}s "
f"(attempt {attempt + 1}/{EMPTY_STREAM_MAX_RETRIES})"
)
await asyncio.sleep(EMPTY_STREAM_RETRY_DELAY)
continue
# All retries exhausted — log and return the empty
# result. EventLoopNode's empty response guard will
# accept if all outputs are set, or handle the ghost
# stream case if outputs are still missing.
logger.error(
f"[stream] {self.model} returned empty stream after "
f"{EMPTY_STREAM_MAX_RETRIES} retries "
f"(last_role={last_role}). Returning empty result."
)
# Success (or empty after exhausted retries) — flush events.
for event in tail_events:
yield event
return
except RateLimitError as e:
if attempt < RATE_LIMIT_MAX_RETRIES:
wait = RATE_LIMIT_BACKOFF_BASE * (2**attempt)
wait = _compute_retry_delay(attempt, exception=e)
logger.warning(
f"[stream-retry] {self.model} rate limited (429): {e!s}. "
f"Retrying in {wait}s "
f"Retrying in {wait:.1f}s "
f"(attempt {attempt + 1}/{RATE_LIMIT_MAX_RETRIES})"
)
await asyncio.sleep(wait)
@@ -640,5 +1000,69 @@ class LiteLLMProvider(LLMProvider):
return
except Exception as e:
yield StreamErrorEvent(error=str(e), recoverable=False)
if _is_stream_transient_error(e) and attempt < RATE_LIMIT_MAX_RETRIES:
wait = _compute_retry_delay(attempt, exception=e)
logger.warning(
f"[stream-retry] {self.model} transient error "
f"({type(e).__name__}): {e!s}. "
f"Retrying in {wait:.1f}s "
f"(attempt {attempt + 1}/{RATE_LIMIT_MAX_RETRIES})"
)
await asyncio.sleep(wait)
continue
recoverable = _is_stream_transient_error(e)
yield StreamErrorEvent(error=str(e), recoverable=recoverable)
return
async def _collect_stream_to_response(
self,
stream: AsyncIterator[StreamEvent],
) -> LLMResponse:
"""Consume a stream() iterator and collect it into a single LLMResponse.
Used by acomplete() to route through the unified streaming path so that
all backends (including Codex) get proper tool call handling.
"""
from framework.llm.stream_events import (
FinishEvent,
StreamErrorEvent,
TextDeltaEvent,
ToolCallEvent,
)
content = ""
tool_calls: list[dict[str, Any]] = []
input_tokens = 0
output_tokens = 0
stop_reason = ""
model = self.model
async for event in stream:
if isinstance(event, TextDeltaEvent):
content = event.snapshot # snapshot is the accumulated text
elif isinstance(event, ToolCallEvent):
tool_calls.append(
{
"id": event.tool_use_id,
"name": event.tool_name,
"input": event.tool_input,
}
)
elif isinstance(event, FinishEvent):
input_tokens = event.input_tokens
output_tokens = event.output_tokens
stop_reason = event.stop_reason
if event.model:
model = event.model
elif isinstance(event, StreamErrorEvent):
if not event.recoverable:
raise RuntimeError(f"Stream error: {event.error}")
return LLMResponse(
content=content,
model=model,
input_tokens=input_tokens,
output_tokens=output_tokens,
stop_reason=stop_reason,
raw_response={"tool_calls": tool_calls} if tool_calls else None,
)
+18 -34
View File
@@ -2,10 +2,10 @@
import json
import re
from collections.abc import AsyncIterator, Callable
from collections.abc import AsyncIterator
from typing import Any
from framework.llm.provider import LLMProvider, LLMResponse, Tool, ToolResult, ToolUse
from framework.llm.provider import LLMProvider, LLMResponse, Tool
from framework.llm.stream_events import (
FinishEvent,
StreamEvent,
@@ -146,41 +146,25 @@ class MockLLMProvider(LLMProvider):
stop_reason="mock_complete",
)
def complete_with_tools(
async def acomplete(
self,
messages: list[dict[str, Any]],
system: str,
tools: list[Tool],
tool_executor: Callable[[ToolUse], ToolResult],
max_iterations: int = 10,
system: str = "",
tools: list[Tool] | None = None,
max_tokens: int = 1024,
response_format: dict[str, Any] | None = None,
json_mode: bool = False,
max_retries: int | None = None,
) -> LLMResponse:
"""
Generate a mock completion without tool use.
In mock mode, we skip tool execution and return a final response immediately.
Args:
messages: Initial conversation (ignored in mock mode)
system: System prompt (used to extract expected output keys)
tools: Available tools (ignored in mock mode)
tool_executor: Tool executor function (ignored in mock mode)
max_iterations: Max iterations (ignored in mock mode)
Returns:
LLMResponse with mock content
"""
# In mock mode, we don't execute tools - just return a final response
# Try to generate JSON if the system prompt suggests structured output
json_mode = "json" in system.lower() or "output_keys" in system.lower()
content = self._generate_mock_response(system=system, json_mode=json_mode)
return LLMResponse(
content=content,
model=self.model,
input_tokens=0,
output_tokens=0,
stop_reason="mock_complete",
"""Async mock completion (no I/O, returns immediately)."""
return self.complete(
messages=messages,
system=system,
tools=tools,
max_tokens=max_tokens,
response_format=response_format,
json_mode=json_mode,
max_retries=max_retries,
)
async def stream(
+29 -21
View File
@@ -1,8 +1,10 @@
"""LLM Provider abstraction for pluggable LLM backends."""
import asyncio
from abc import ABC, abstractmethod
from collections.abc import AsyncIterator, Callable
from collections.abc import AsyncIterator
from dataclasses import dataclass, field
from functools import partial
from typing import Any
@@ -88,29 +90,35 @@ class LLMProvider(ABC):
"""
pass
@abstractmethod
def complete_with_tools(
async def acomplete(
self,
messages: list[dict[str, Any]],
system: str,
tools: list[Tool],
tool_executor: Callable[["ToolUse"], "ToolResult"],
max_iterations: int = 10,
) -> LLMResponse:
"""
Run a tool-use loop until the LLM produces a final response.
system: str = "",
tools: list["Tool"] | None = None,
max_tokens: int = 1024,
response_format: dict[str, Any] | None = None,
json_mode: bool = False,
max_retries: int | None = None,
) -> "LLMResponse":
"""Async version of complete(). Non-blocking on the event loop.
Args:
messages: Initial conversation
system: System prompt
tools: Available tools
tool_executor: Function to execute tools: (ToolUse) -> ToolResult
max_iterations: Max tool calls before stopping
Returns:
Final LLMResponse after tool use completes
Default implementation offloads the sync complete() to a thread pool.
Subclasses SHOULD override for native async I/O.
"""
pass
loop = asyncio.get_running_loop()
return await loop.run_in_executor(
None,
partial(
self.complete,
messages=messages,
system=system,
tools=tools,
max_tokens=max_tokens,
response_format=response_format,
json_mode=json_mode,
max_retries=max_retries,
),
)
async def stream(
self,
@@ -135,7 +143,7 @@ class LLMProvider(ABC):
TextEndEvent,
)
response = self.complete(
response = await self.acomplete(
messages=messages,
system=system,
tools=tools,
File diff suppressed because it is too large Load Diff
+33
View File
@@ -0,0 +1,33 @@
"""Framework-level worker monitoring package.
Provides the Worker Health Judge: a reusable secondary graph that attaches to
any worker agent runtime and monitors its execution health via periodic log
inspection. Emits structured EscalationTickets when degradation is detected.
Usage::
from framework.monitoring import HEALTH_JUDGE_ENTRY_POINT, judge_goal, judge_graph
from framework.tools.worker_monitoring_tools import register_worker_monitoring_tools
# Register tools bound to the worker runtime's EventBus
monitoring_registry = ToolRegistry()
register_worker_monitoring_tools(monitoring_registry, worker_runtime._event_bus, storage_path)
# Load judge as secondary graph on the worker runtime
await worker_runtime.add_graph(
graph_id="judge",
graph=judge_graph,
goal=judge_goal,
entry_points={"health_check": HEALTH_JUDGE_ENTRY_POINT},
storage_subpath="graphs/judge",
)
"""
from .judge import HEALTH_JUDGE_ENTRY_POINT, judge_goal, judge_graph, judge_node
__all__ = [
"HEALTH_JUDGE_ENTRY_POINT",
"judge_goal",
"judge_graph",
"judge_node",
]
+258
View File
@@ -0,0 +1,258 @@
"""Worker Health Judge — framework-level reusable monitoring graph.
Attaches to any worker agent runtime as a secondary graph. Fires on a
2-minute timer, reads the worker's session logs via ``get_worker_health_summary``,
accumulates observations in a continuous conversation context, and emits a
structured ``EscalationTicket`` when it detects a degradation pattern.
Usage::
from framework.monitoring import judge_graph, judge_goal, HEALTH_JUDGE_ENTRY_POINT
from framework.tools.worker_monitoring_tools import register_worker_monitoring_tools
# Register tools bound to the worker runtime's event bus
monitoring_registry = ToolRegistry()
register_worker_monitoring_tools(
monitoring_registry, worker_runtime._event_bus, storage_path
)
monitoring_tools = list(monitoring_registry.get_tools().values())
monitoring_executor = monitoring_registry.get_executor()
# Load judge as secondary graph on the worker runtime
await worker_runtime.add_graph(
graph_id="judge",
graph=judge_graph,
goal=judge_goal,
entry_points={"health_check": HEALTH_JUDGE_ENTRY_POINT},
storage_subpath="graphs/judge",
)
Design:
- ``isolation_level="isolated"`` the judge has its own memory, not
polluting the worker's shared memory namespace.
- ``conversation_mode="continuous"`` the judge's conversation carries
across timer ticks. The conversation IS the judge's memory. It tracks
trends by referring to its own prior messages ("Last check I saw 47
steps; now 52; 5 new steps, 3 RETRY").
- No shared memory keys. No external state files.
"""
from __future__ import annotations
from framework.graph import Constraint, Goal, NodeSpec, SuccessCriterion
from framework.graph.edge import AsyncEntryPointSpec, GraphSpec
# ---------------------------------------------------------------------------
# Goal
# ---------------------------------------------------------------------------
judge_goal = Goal(
id="worker-health-monitor",
name="Worker Health Monitor",
description=(
"Periodically assess the health of the worker agent by reading its "
"execution logs. Detect degradation patterns (excessive retries, "
"stalls, doom loops) and emit structured EscalationTickets when the "
"worker needs attention."
),
success_criteria=[
SuccessCriterion(
id="accurate-detection",
description="Only escalates genuine degradation, not normal retry cycles",
metric="false_positive_rate",
target="low",
weight=0.5,
),
SuccessCriterion(
id="timely-detection",
description="Detects genuine stalls within 2 timer ticks (≤4 minutes)",
metric="detection_latency_minutes",
target="<=4",
weight=0.5,
),
],
constraints=[
Constraint(
id="conservative-escalation",
description=(
"Do not escalate on a single bad verdict or a brief stall. "
"Require clear patterns (10+ consecutive bad verdicts or 4+ minute stall) "
"before creating a ticket."
),
constraint_type="hard",
category="quality",
),
Constraint(
id="complete-ticket",
description=(
"Every EscalationTicket must have all required fields filled. "
"Do not emit partial or placeholder tickets."
),
constraint_type="hard",
category="correctness",
),
],
)
# ---------------------------------------------------------------------------
# Node
# ---------------------------------------------------------------------------
judge_node = NodeSpec(
id="judge",
name="Worker Health Judge",
description=(
"Autonomous health monitor for worker agents. Reads execution logs "
"on each timer tick, compares to prior observations (via conversation "
"history), and emits a structured EscalationTicket when a genuine "
"degradation pattern is detected."
),
node_type="event_loop",
client_facing=False, # Autonomous monitor, not interactive
max_node_visits=0, # Unbounded — runs on every timer tick
input_keys=[],
output_keys=["health_verdict"],
nullable_output_keys=["health_verdict"],
success_criteria=(
"A clear health verdict is produced each check: either 'healthy' with "
"a brief observation, or a complete EscalationTicket is emitted via "
"emit_escalation_ticket and health_verdict describes the issue."
),
tools=[
"get_worker_health_summary",
"emit_escalation_ticket",
],
system_prompt="""\
You are the Worker Health Judge. You run every 2 minutes alongside a worker \
agent to monitor its execution health.
# Your Role
You observe the worker's iteration patterns over time and escalate only when \
you see genuine degradation not normal retry cycles. Your conversation history \
IS your memory. On each check, refer to your previous observations to track trends.
# Check Procedure
On each timer tick (every 2 minutes):
## Step 1: Read health snapshot
Call get_worker_health_summary() with no arguments to auto-discover the active \
session. This returns:
- worker_agent_id: the worker's agent name — use this for ticket identity fields
- worker_graph_id: the worker's primary graph ID — use this for ticket identity fields
- session_id: the session being monitored use this for worker_session_id in tickets
- total_steps: how many log steps have been recorded
- recent_verdicts: list of recent ACCEPT/RETRY/CONTINUE verdicts
- steps_since_last_accept: consecutive non-ACCEPT steps
- stall_minutes: wall-clock since last step (null if active)
- evidence_snippet: recent LLM output
## Step 2: Compare to prior check
Look at your conversation history. What was total_steps last time?
- If total_steps is UNCHANGED from prior check AND prior check was also unchanged:
STALL confirmed (worker has produced no new iterations in 4+ minutes).
Escalate with severity="high" or "critical" depending on stall duration.
- If total_steps increased: worker is making progress. Examine verdicts.
## Step 3: Analyze verdict pattern
- Healthy: Mix of ACCEPT and RETRY, steps_since_last_accept < 5. No action.
- Warning: steps_since_last_accept is 5-9. Note it, no escalation yet.
- Degraded: steps_since_last_accept >= 10. Examine evidence_snippet.
- If evidence shows the agent is making real progress (complex reasoning,
exploring solutions, productive tool use): may be a hard problem. Note it.
- If evidence shows a loop (same error, same tool call, no new information):
Escalate with severity="medium" or "high".
- Critical: steps_since_last_accept >= 20, OR stall_minutes >= 4.
Escalate with severity="critical".
## Step 4: Decide
### If healthy:
set_output("health_verdict", "healthy: <brief observation>")
Done.
### If escalating:
Build an EscalationTicket JSON string with ALL required fields:
{
"worker_agent_id": "<worker_agent_id from get_worker_health_summary>",
"worker_session_id": "<session_id from get_worker_health_summary>",
"worker_node_id": "<worker_graph_id from get_worker_health_summary>",
"worker_graph_id": "<worker_graph_id from get_worker_health_summary>",
"severity": "<low|medium|high|critical>",
"cause": "<what you observed — concrete, specific>",
"judge_reasoning": "<why you decided to escalate, not just dismiss>",
"suggested_action": "<what you recommend: restart, human review, etc.>",
"recent_verdicts": [<list from get_worker_health_summary>],
"total_steps_checked": <int>,
"steps_since_last_accept": <int>,
"stall_minutes": <float or null>,
"evidence_snippet": "<from get_worker_health_summary>"
}
Call: emit_escalation_ticket(ticket_json=<the JSON string above>)
Then: set_output("health_verdict", "escalated: <one-line summary>")
# Severity Guide
- low: Mild concern, worth noting. 5-9 consecutive bad verdicts.
- medium: Clear degradation pattern. 10-15 bad verdicts or brief stall (1-2 min).
- high: Serious issue. 15+ bad verdicts or stall 2-4 minutes or clear doom loop.
- critical: Worker is definitively stuck. 20+ bad verdicts or stall > 4 minutes.
# Conservative Bias
You MUST resist the urge to escalate prematurely. Worker agents naturally retry.
A node may legitimately need 5-8 retries before succeeding. Do not escalate unless:
1. The pattern is clear and sustained across your observation window, AND
2. The evidence shows no genuine progress
One missed escalation is less costly than two false alarms. The Queen will filter \
further. But do not be passive genuine stalls and doom loops must be caught.
# Rules
- Never escalate on the FIRST check unless stall_minutes > 4
- Always call get_worker_health_summary FIRST before deciding anything
- All ticket fields are REQUIRED do not submit partial tickets
- After any emit_escalation_ticket call, always set_output to complete the check
""",
)
# ---------------------------------------------------------------------------
# Entry Point
# ---------------------------------------------------------------------------
HEALTH_JUDGE_ENTRY_POINT = AsyncEntryPointSpec(
id="health_check",
name="Worker Health Check",
entry_node="judge",
trigger_type="timer",
trigger_config={
"interval_minutes": 2,
"run_immediately": True, # Fire immediately to establish a baseline
},
isolation_level="isolated", # Own memory namespace, not polluting worker's
)
# ---------------------------------------------------------------------------
# Graph
# ---------------------------------------------------------------------------
judge_graph = GraphSpec(
id="judge-graph",
goal_id=judge_goal.id,
version="1.0.0",
entry_node="judge",
entry_points={"health_check": "judge"},
terminal_nodes=[], # Forever-alive: fires on every timer tick
pause_nodes=[],
nodes=[judge_node],
edges=[],
conversation_mode="continuous", # Conversation persists across timer ticks
async_entry_points=[HEALTH_JUDGE_ENTRY_POINT],
loop_config={
"max_iterations": 10, # One check shouldn't take many turns
"max_tool_calls_per_turn": 3, # get_summary + optionally emit_ticket
"max_history_tokens": 16000, # Compact — judge only needs recent context
},
)
+480 -66
View File
@@ -208,6 +208,21 @@ def register_commands(subparsers: argparse._SubParsersAction) -> None:
)
tui_parser.set_defaults(func=cmd_tui)
# code command (Hive Coder — framework agent builder)
code_parser = subparsers.add_parser(
"code",
help="Launch Hive Coder to build agents",
description="Interactive agent builder. Describe what you want and Hive Coder builds it.",
)
code_parser.add_argument(
"--model",
"-m",
type=str,
default=None,
help="LLM model to use (any LiteLLM-compatible name)",
)
code_parser.set_defaults(func=cmd_code)
# sessions command group (checkpoint/resume management)
sessions_parser = subparsers.add_parser(
"sessions",
@@ -331,6 +346,98 @@ def register_commands(subparsers: argparse._SubParsersAction) -> None:
)
resume_parser.set_defaults(func=cmd_resume)
# setup-credentials command
setup_creds_parser = subparsers.add_parser(
"setup-credentials",
help="Interactive credential setup",
description="Guide through setting up required credentials for an agent.",
)
setup_creds_parser.add_argument(
"agent_path",
type=str,
nargs="?",
help="Path to agent folder (optional - runs general setup if not specified)",
)
setup_creds_parser.set_defaults(func=cmd_setup_credentials)
# serve command (HTTP API server)
serve_parser = subparsers.add_parser(
"serve",
help="Start HTTP API server",
description="Start an HTTP server exposing REST + SSE APIs for agent control.",
)
serve_parser.add_argument(
"--host",
type=str,
default="127.0.0.1",
help="Host to bind (default: 127.0.0.1)",
)
serve_parser.add_argument(
"--port",
"-p",
type=int,
default=8787,
help="Port to listen on (default: 8787)",
)
serve_parser.add_argument(
"--agent",
"-a",
type=str,
action="append",
default=[],
help="Agent path to preload (repeatable)",
)
serve_parser.add_argument(
"--model",
"-m",
type=str,
default=None,
help="LLM model for preloaded agents",
)
serve_parser.add_argument(
"--open",
action="store_true",
help="Open dashboard in browser after server starts",
)
serve_parser.set_defaults(func=cmd_serve)
# open command (serve + auto-open browser)
open_parser = subparsers.add_parser(
"open",
help="Start HTTP server and open dashboard in browser",
description="Shortcut for 'hive serve --open'. "
"Starts the HTTP server and opens the dashboard.",
)
open_parser.add_argument(
"--host",
type=str,
default="127.0.0.1",
help="Host to bind (default: 127.0.0.1)",
)
open_parser.add_argument(
"--port",
"-p",
type=int,
default=8787,
help="Port to listen on (default: 8787)",
)
open_parser.add_argument(
"--agent",
"-a",
type=str,
action="append",
default=[],
help="Agent path to preload (repeatable)",
)
open_parser.add_argument(
"--model",
"-m",
type=str,
default=None,
help="LLM model for preloaded agents",
)
open_parser.set_defaults(func=cmd_open)
def _load_resume_state(
agent_path: str, session_id: str, checkpoint_id: str | None = None
@@ -358,7 +465,7 @@ def _load_resume_state(
if not cp_path.exists():
return None
try:
cp_data = json.loads(cp_path.read_text())
cp_data = json.loads(cp_path.read_text(encoding="utf-8"))
except (json.JSONDecodeError, OSError):
return None
return {
@@ -374,7 +481,7 @@ def _load_resume_state(
if not state_path.exists():
return None
try:
state_data = json.loads(state_path.read_text())
state_data = json.loads(state_path.read_text(encoding="utf-8"))
except (json.JSONDecodeError, OSError):
return None
progress = state_data.get("progress", {})
@@ -388,6 +495,40 @@ def _load_resume_state(
}
def _prompt_before_start(agent_path: str, runner, model: str | None = None):
"""Prompt user to start agent or update credentials.
Returns:
Updated runner if user proceeds, None if user aborts.
"""
from framework.credentials.setup import CredentialSetupSession
from framework.runner import AgentRunner
while True:
print()
try:
choice = input("Press Enter to start agent, or 'u' to update credentials: ").strip()
except (EOFError, KeyboardInterrupt):
print()
return None
if choice == "":
return runner
elif choice.lower() == "u":
session = CredentialSetupSession.from_agent_path(agent_path)
result = session.run_interactive()
if result.success:
# Reload runner with updated credentials
try:
runner = AgentRunner.load(agent_path, model=model)
except Exception as e:
print(f"Error reloading agent: {e}")
return None
# Loop back to prompt again
elif choice.lower() == "q":
return None
def cmd_run(args: argparse.Namespace) -> int:
"""Run an exported agent."""
import logging
@@ -413,7 +554,7 @@ def cmd_run(args: argparse.Namespace) -> int:
return 1
elif args.input_file:
try:
with open(args.input_file) as f:
with open(args.input_file, encoding="utf-8") as f:
context = json.load(f)
except (FileNotFoundError, json.JSONDecodeError) as e:
print(f"Error reading input file: {e}", file=sys.stderr)
@@ -427,6 +568,7 @@ def cmd_run(args: argparse.Namespace) -> int:
try:
# Load runner inside the async loop to ensure strict loop affinity
# (only one load — avoids spawning duplicate MCP subprocesses)
# AgentRunner handles credential setup interactively when stdin is a TTY.
try:
runner = AgentRunner.load(
args.agent_path,
@@ -439,9 +581,19 @@ def cmd_run(args: argparse.Namespace) -> int:
print(f"Error loading agent: {e}")
return
# Prompt before starting (allows credential updates)
if sys.stdin.isatty():
runner = _prompt_before_start(args.agent_path, runner, args.model)
if runner is None:
return
# Force setup inside the loop
if runner._agent_runtime is None:
runner._setup()
try:
runner._setup()
except CredentialError as e:
print(f"\n{e}", file=sys.stderr)
return
# Start runtime before TUI so it's ready for user input
if runner._agent_runtime and not runner._agent_runtime.is_running:
@@ -470,6 +622,7 @@ def cmd_run(args: argparse.Namespace) -> int:
return 0
else:
# Standard execution — load runner here (not shared with TUI path)
# AgentRunner handles credential setup interactively when stdin is a TTY.
try:
runner = AgentRunner.load(
args.agent_path,
@@ -482,6 +635,12 @@ def cmd_run(args: argparse.Namespace) -> int:
print(f"Error: {e}", file=sys.stderr)
return 1
# Prompt before starting (allows credential updates)
if sys.stdin.isatty() and not args.quiet:
runner = _prompt_before_start(args.agent_path, runner, args.model)
if runner is None:
return 1
# Load session/checkpoint state for resume (headless mode)
session_state = None
resume_session = getattr(args, "resume_session", None)
@@ -537,7 +696,7 @@ def cmd_run(args: argparse.Namespace) -> int:
# Output results
if args.output:
with open(args.output, "w") as f:
with open(args.output, "w", encoding="utf-8") as f:
json.dump(output, f, indent=2, default=str)
if not args.quiet:
print(f"Results written to {args.output}")
@@ -717,7 +876,7 @@ def cmd_list(args: argparse.Namespace) -> int:
agents = []
for path in directory.iterdir():
if path.is_dir() and (path / "agent.json").exists():
if _is_valid_agent_dir(path):
try:
runner = AgentRunner.load(path)
info = runner.info()
@@ -784,14 +943,14 @@ def cmd_dispatch(args: argparse.Namespace) -> int:
# Use specific agents
for agent_name in args.agents:
agent_path = agents_dir / agent_name
if not (agent_path / "agent.json").exists():
if not _is_valid_agent_dir(agent_path):
print(f"Agent not found: {agent_path}", file=sys.stderr)
return 1
agent_paths.append((agent_name, agent_path))
else:
# Discover all agents
for path in agents_dir.iterdir():
if path.is_dir() and (path / "agent.json").exists():
if _is_valid_agent_dir(path):
agent_paths.append((path.name, path))
if not agent_paths:
@@ -966,7 +1125,7 @@ Output ONLY valid JSON, no explanation:"""
try:
message = client.messages.create(
model="claude-3-5-haiku-20241022", # Fast and cheap
model="claude-haiku-4-5-20251001", # Fast and cheap
max_tokens=500,
messages=[{"role": "user", "content": prompt}],
)
@@ -1226,60 +1385,26 @@ def cmd_shell(args: argparse.Namespace) -> int:
return 0
def cmd_tui(args: argparse.Namespace) -> int:
"""Browse agents and launch the interactive TUI dashboard."""
import logging
def _get_framework_agents_dir() -> Path:
"""Resolve the framework agents directory relative to this file."""
return Path(__file__).resolve().parent.parent / "agents"
def _launch_agent_tui(
agent_path: str | Path,
model: str | None = None,
) -> int:
"""Load an agent and launch the TUI. Shared by cmd_tui and cmd_code."""
from framework.credentials.models import CredentialError
from framework.runner import AgentRunner
from framework.tui.app import AdenTUI
logging.basicConfig(level=logging.WARNING, format="%(message)s")
exports_dir = Path("exports")
examples_dir = Path("examples/templates")
has_exports = _has_agents(exports_dir)
has_examples = _has_agents(examples_dir)
if not has_exports and not has_examples:
print("No agents found in exports/ or examples/templates/", file=sys.stderr)
return 1
# Determine which directory to browse
if has_exports and has_examples:
print("\nAgent sources:\n")
print(" 1. Your Agents (exports/)")
print(" 2. Sample Agents (examples/templates/)")
print()
try:
choice = input("Select source (number): ").strip()
if choice == "1":
agents_dir = exports_dir
elif choice == "2":
agents_dir = examples_dir
else:
print("Invalid selection")
return 1
except (EOFError, KeyboardInterrupt):
print()
return 1
elif has_exports:
agents_dir = exports_dir
else:
agents_dir = examples_dir
# Let user pick an agent
agent_path = _select_agent(agents_dir)
if not agent_path:
return 1
# Launch TUI (same pattern as cmd_run --tui)
async def run_with_tui():
# AgentRunner handles credential setup interactively when stdin is a TTY.
try:
runner = AgentRunner.load(
agent_path,
model=args.model,
model=model,
)
except CredentialError as e:
print(f"\n{e}", file=sys.stderr)
@@ -1289,7 +1414,11 @@ def cmd_tui(args: argparse.Namespace) -> int:
return
if runner._agent_runtime is None:
runner._setup()
try:
runner._setup()
except CredentialError as e:
print(f"\n{e}", file=sys.stderr)
return
if runner._agent_runtime and not runner._agent_runtime.is_running:
await runner._agent_runtime.start()
@@ -1310,6 +1439,105 @@ def cmd_tui(args: argparse.Namespace) -> int:
return 0
def cmd_tui(args: argparse.Namespace) -> int:
"""Launch the interactive TUI dashboard with in-app agent picker."""
import logging
logging.basicConfig(level=logging.WARNING, format="%(message)s")
from framework.tui.app import AdenTUI
async def run_tui():
app = AdenTUI(
model=args.model,
)
await app.run_async()
asyncio.run(run_tui())
print("TUI session ended.")
return 0
def cmd_code(args: argparse.Namespace) -> int:
"""Launch Hive Coder with multi-graph support.
Unlike ``_launch_agent_tui``, this sets up graph lifecycle tools and
assigns ``graph_id="hive_coder"`` so the coder can load, supervise,
and restart secondary agent graphs within the same session.
"""
import logging
logging.basicConfig(level=logging.WARNING, format="%(message)s")
framework_agents_dir = _get_framework_agents_dir()
hive_coder_path = framework_agents_dir / "hive_coder"
if not (hive_coder_path / "agent.py").exists():
print("Error: Hive Coder agent not found.", file=sys.stderr)
return 1
# Ensure framework agents dir is on sys.path for import
fa_str = str(framework_agents_dir)
if fa_str not in sys.path:
sys.path.insert(0, fa_str)
from framework.credentials.models import CredentialError
from framework.runner import AgentRunner
from framework.tools.session_graph_tools import register_graph_tools
from framework.tui.app import AdenTUI
async def run_with_tui():
try:
runner = AgentRunner.load(hive_coder_path, model=args.model)
except CredentialError as e:
print(f"\n{e}", file=sys.stderr)
return
except Exception as e:
print(f"Error loading agent: {e}")
return
if runner._agent_runtime is None:
try:
runner._setup()
except CredentialError as e:
print(f"\n{e}", file=sys.stderr)
return
runtime = runner._agent_runtime
# -- Multi-graph setup --
# Tag the primary graph so events carry graph_id="hive_coder"
runtime._graph_id = "hive_coder"
runtime._active_graph_id = "hive_coder"
# Register graph lifecycle tools (load_agent, unload_agent, etc.)
register_graph_tools(runner._tool_registry, runtime)
# Refresh tool schemas AND executor so streams see the new tools.
# The executor closure references the registry dict by ref, but
# refreshing both is robust against any copy-on-read behavior.
runtime._tools = list(runner._tool_registry.get_tools().values())
runtime._tool_executor = runner._tool_registry.get_executor()
if not runtime.is_running:
await runtime.start()
app = AdenTUI(runtime)
try:
await app.run_async()
except Exception as e:
import traceback
traceback.print_exc()
print(f"TUI error: {e}")
await runner.cleanup_async()
asyncio.run(run_with_tui())
print("TUI session ended.")
return 0
def _extract_python_agent_metadata(agent_path: Path) -> tuple[str, str]:
"""Extract name and description from a Python-based agent's config.py.
@@ -1326,7 +1554,7 @@ def _extract_python_agent_metadata(agent_path: Path) -> tuple[str, str]:
return fallback_name, fallback_desc
try:
with open(config_path) as f:
with open(config_path, encoding="utf-8") as f:
tree = ast.parse(f.read())
# Find AgentMetadata class definition
@@ -1449,6 +1677,7 @@ def _select_agent(agents_dir: Path) -> str | None:
for path in agents_dir.iterdir():
if _is_valid_agent_dir(path):
agents.append(path)
agents.sort(key=lambda p: p.name)
if not agents:
print(f"No agents found in {agents_dir}", file=sys.stderr)
@@ -1472,16 +1701,7 @@ def _select_agent(agents_dir: Path) -> str | None:
# Display agents for current page (with global numbering)
for i, agent_path in enumerate(page_agents, start_idx + 1):
try:
agent_json = agent_path / "agent.json"
if agent_json.exists():
with open(agent_json) as f:
data = json.load(f)
agent_meta = data.get("agent", {})
name = agent_meta.get("name", agent_path.name)
desc = agent_meta.get("description", "")
else:
# Python-based agent - extract from config.py
name, desc = _extract_python_agent_metadata(agent_path)
name, desc = _extract_python_agent_metadata(agent_path)
desc = desc[:50] + "..." if len(desc) > 50 else desc
print(f" {i}. {name}")
print(f" {desc}")
@@ -1718,3 +1938,197 @@ def cmd_resume(args: argparse.Namespace) -> int:
if args.tui:
print("Mode: TUI")
return 1
def cmd_setup_credentials(args: argparse.Namespace) -> int:
"""Interactive credential setup for an agent."""
from framework.credentials.setup import CredentialSetupSession
agent_path = getattr(args, "agent_path", None)
if agent_path:
# Setup credentials for a specific agent
session = CredentialSetupSession.from_agent_path(agent_path)
else:
# No agent specified - show usage
print("Usage: hive setup-credentials <agent_path>")
print()
print("Examples:")
print(" hive setup-credentials exports/my-agent")
print(" hive setup-credentials examples/templates/deep_research_agent")
return 1
result = session.run_interactive()
return 0 if result.success else 1
def _open_browser(url: str) -> None:
"""Open URL in the default browser (best-effort, non-blocking)."""
import subprocess
try:
if sys.platform == "darwin":
subprocess.Popen(
["open", url],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
encoding="utf-8",
)
elif sys.platform == "win32":
subprocess.Popen(
["cmd", "/c", "start", "", url],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
elif sys.platform == "linux":
subprocess.Popen(
["xdg-open", url],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
encoding="utf-8",
)
except Exception:
pass # Best-effort — don't crash if browser can't open
def _build_frontend() -> bool:
"""Build the frontend if source is newer than dist. Returns True if dist exists."""
import subprocess
# Find the frontend directory relative to this file or cwd
candidates = [
Path("core/frontend"),
Path(__file__).resolve().parent.parent.parent / "frontend",
]
frontend_dir: Path | None = None
for c in candidates:
if (c / "package.json").is_file():
frontend_dir = c.resolve()
break
if frontend_dir is None:
return False
dist_dir = frontend_dir / "dist"
src_dir = frontend_dir / "src"
# Skip build if dist is up-to-date (newest src file older than dist index.html)
index_html = dist_dir / "index.html"
if index_html.exists() and src_dir.is_dir():
dist_mtime = index_html.stat().st_mtime
needs_build = False
for f in src_dir.rglob("*"):
if f.is_file() and f.stat().st_mtime > dist_mtime:
needs_build = True
break
if not needs_build:
return True
# Need to build
print("Building frontend...")
try:
# Ensure deps are installed
subprocess.run(
["npm", "install", "--no-fund", "--no-audit"],
encoding="utf-8",
cwd=frontend_dir,
check=True,
capture_output=True,
)
subprocess.run(
["npm", "run", "build"],
encoding="utf-8",
cwd=frontend_dir,
check=True,
capture_output=True,
)
print("Frontend built.")
return True
except FileNotFoundError:
print("Node.js not found — skipping frontend build.")
return dist_dir.is_dir()
except subprocess.CalledProcessError as exc:
stderr = exc.stderr.decode(errors="replace") if exc.stderr else ""
print(f"Frontend build failed: {stderr[:500]}")
return dist_dir.is_dir()
def cmd_serve(args: argparse.Namespace) -> int:
"""Start the HTTP API server."""
import logging
from aiohttp import web
_build_frontend()
from framework.server.app import create_app
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
)
model = getattr(args, "model", None)
app = create_app(model=model)
async def run_server():
manager = app["manager"]
# Preload agents specified via --agent
for agent_path in args.agent:
try:
session = await manager.create_session_with_worker(agent_path, model=model)
info = session.worker_info
name = info.name if info else session.worker_id
print(f"Loaded agent: {session.worker_id} ({name})")
except Exception as e:
print(f"Error loading {agent_path}: {e}")
# Start server using AppRunner/TCPSite (same pattern as webhook_server.py)
runner = web.AppRunner(app, access_log=None)
await runner.setup()
site = web.TCPSite(runner, args.host, args.port)
await site.start()
# Check if frontend is being served
dist_candidates = [
Path("frontend/dist"),
Path("core/frontend/dist"),
]
has_frontend = any((c / "index.html").exists() for c in dist_candidates if c.is_dir())
dashboard_url = f"http://{args.host}:{args.port}"
print()
print(f"Hive API server running on {dashboard_url}")
if has_frontend:
print(f"Dashboard: {dashboard_url}")
print(f"Health: {dashboard_url}/api/health")
print(f"Agents loaded: {sum(1 for s in manager.list_sessions() if s.worker_runtime)}")
print()
print("Press Ctrl+C to stop")
# Auto-open browser if --open flag is set and frontend exists
if getattr(args, "open", False) and has_frontend:
_open_browser(dashboard_url)
# Run forever until interrupted
try:
await asyncio.Event().wait()
except asyncio.CancelledError:
pass
finally:
await manager.shutdown_all()
await runner.cleanup()
try:
asyncio.run(run_server())
except KeyboardInterrupt:
print("\nServer stopped.")
return 0
def cmd_open(args: argparse.Namespace) -> int:
"""Start the HTTP API server and open the dashboard in the browser."""
args.open = True
return cmd_serve(args)
+5 -2
View File
@@ -183,8 +183,11 @@ class MCPClient:
from mcp import ClientSession
from mcp.client.stdio import stdio_client
# Create persistent stdio client context
self._stdio_context = stdio_client(server_params)
# Create persistent stdio client context.
# Redirect server stderr to devnull to prevent raw
# output from leaking behind the TUI.
devnull = open(os.devnull, "w") # noqa: SIM115
self._stdio_context = stdio_client(server_params, errlog=devnull)
(
self._read_stream,
self._write_stream,
+8 -2
View File
@@ -71,9 +71,15 @@ class AgentOrchestrator:
# Auto-create LLM - LiteLLM auto-detects provider and API key from model name
if self._llm is None:
from framework.config import get_api_base, get_api_key, get_llm_extra_kwargs
from framework.llm.litellm import LiteLLMProvider
self._llm = LiteLLMProvider(model=self._model)
self._llm = LiteLLMProvider(
model=self._model,
api_key=get_api_key(),
api_base=get_api_base(),
**get_llm_extra_kwargs(),
)
def register(
self,
@@ -456,7 +462,7 @@ Respond with JSON only:
}}"""
try:
response = self._llm.complete(
response = await self._llm.acomplete(
messages=[{"role": "user", "content": prompt}],
system="You are a request router. Respond with JSON only.",
max_tokens=256,
+185
View File
@@ -0,0 +1,185 @@
"""Pre-load validation for agent graphs.
Runs structural and credential checks before MCP servers are spawned.
Fails fast with actionable error messages.
"""
from __future__ import annotations
import logging
from dataclasses import dataclass, field
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from framework.graph.edge import GraphSpec
from framework.graph.node import NodeSpec
logger = logging.getLogger(__name__)
class PreloadValidationError(Exception):
"""Raised when pre-load validation fails."""
def __init__(self, errors: list[str]):
self.errors = errors
msg = "Pre-load validation failed:\n" + "\n".join(f" - {e}" for e in errors)
super().__init__(msg)
@dataclass
class PreloadResult:
"""Result of pre-load validation."""
valid: bool
errors: list[str] = field(default_factory=list)
warnings: list[str] = field(default_factory=list)
def validate_graph_structure(graph: GraphSpec) -> list[str]:
"""Run graph structural validation (includes GCU subagent-only checks).
Delegates to GraphSpec.validate() which checks entry/terminal nodes,
edge references, reachability, fan-out rules, and GCU constraints.
"""
return graph.validate()
def validate_credentials(
nodes: list[NodeSpec],
*,
interactive: bool = True,
skip: bool = False,
) -> None:
"""Validate agent credentials.
Calls ``validate_agent_credentials`` which performs two-phase validation:
1. Presence check (env var, encrypted store, Aden sync)
2. Health check (lightweight HTTP call to verify the key works)
On failure raises ``CredentialError`` with ``validation_result`` and
``failed_cred_names`` attributes preserved from the upstream check.
In interactive mode (CLI with TTY), attempts recovery via the
credential setup flow before re-raising.
"""
if skip:
return
from framework.credentials.validation import validate_agent_credentials
if not interactive:
# Non-interactive: let CredentialError propagate with full context.
# validate_agent_credentials attaches .validation_result and
# .failed_cred_names to the exception automatically.
validate_agent_credentials(nodes)
return
import sys
from framework.credentials.models import CredentialError
try:
validate_agent_credentials(nodes)
except CredentialError as e:
if not sys.stdin.isatty():
raise
print(f"\n{e}", file=sys.stderr)
from framework.credentials.validation import build_setup_session_from_error
session = build_setup_session_from_error(e, nodes=nodes)
if not session.missing:
raise
result = session.run_interactive()
if not result.success:
# Preserve the original validation_result so callers can
# inspect which credentials are still missing.
exc = CredentialError(
"Credential setup incomplete. Run again after configuring the required credentials."
)
if hasattr(e, "validation_result"):
exc.validation_result = e.validation_result # type: ignore[attr-defined]
if hasattr(e, "failed_cred_names"):
exc.failed_cred_names = e.failed_cred_names # type: ignore[attr-defined]
raise exc from None
# Re-validate after successful setup — this will raise if still broken,
# with fresh validation_result attached to the new exception.
validate_agent_credentials(nodes)
def credential_errors_to_json(exc: Exception) -> dict:
"""Extract structured credential failure details from a CredentialError.
Returns a dict suitable for JSON serialization with enough detail for
the queen to report actionable guidance to the user. Falls back to
``str(exc)`` when rich metadata is not available.
"""
result = getattr(exc, "validation_result", None)
if result is None:
return {
"error": "credentials_required",
"message": str(exc),
}
failed = result.failed
missing = []
for c in failed:
if c.available:
status = "invalid"
elif c.aden_not_connected:
status = "aden_not_connected"
else:
status = "missing"
entry: dict = {
"credential": c.credential_name,
"env_var": c.env_var,
"status": status,
}
if c.tools:
entry["tools"] = c.tools
if c.node_types:
entry["node_types"] = c.node_types
if c.help_url:
entry["help_url"] = c.help_url
if c.validation_message:
entry["validation_message"] = c.validation_message
missing.append(entry)
return {
"error": "credentials_required",
"message": str(exc),
"missing_credentials": missing,
}
def run_preload_validation(
graph: GraphSpec,
*,
interactive: bool = True,
skip_credential_validation: bool = False,
) -> PreloadResult:
"""Run all pre-load validations.
Order:
1. Graph structure (includes GCU subagent-only checks) non-recoverable
2. Credentials potentially recoverable via interactive setup
Raises PreloadValidationError for structural issues.
Raises CredentialError for credential issues.
"""
# 1. Structural validation (calls graph.validate() which includes GCU checks)
graph_errors = validate_graph_structure(graph)
if graph_errors:
raise PreloadValidationError(graph_errors)
# 2. Credential validation
validate_credentials(
graph.nodes,
interactive=interactive,
skip=skip_credential_validation,
)
return PreloadResult(valid=True)
File diff suppressed because it is too large Load Diff
+178 -9
View File
@@ -1,10 +1,12 @@
"""Tool discovery and registration for agent runner."""
import asyncio
import contextvars
import importlib.util
import inspect
import json
import logging
import os
from collections.abc import Callable
from dataclasses import dataclass
from pathlib import Path
@@ -46,10 +48,20 @@ class ToolRegistry:
# and auto-injected at call time for tools that accept them.
CONTEXT_PARAMS = frozenset({"workspace_id", "agent_id", "session_id", "data_dir"})
# Credential directory used for change detection
_CREDENTIAL_DIR = Path("~/.hive/credentials/credentials").expanduser()
def __init__(self):
self._tools: dict[str, RegisteredTool] = {}
self._mcp_clients: list[Any] = [] # List of MCPClient instances
self._session_context: dict[str, Any] = {} # Auto-injected context for tools
self._provider_index: dict[str, set[str]] = {} # provider -> tool names
# MCP resync tracking
self._mcp_config_path: Path | None = None # Path used for initial load
self._mcp_tool_names: set[str] = set() # Tool names registered from MCP
self._mcp_cred_snapshot: set[str] = set() # Credential filenames at MCP load time
self._mcp_aden_key_snapshot: str | None = None # ADEN_API_KEY value at MCP load time
self._mcp_server_tools: dict[str, set[str]] = {} # server name -> tool names
def register(
self,
@@ -224,8 +236,19 @@ class ToolRegistry:
Get unified tool executor function.
Returns a function that dispatches to the appropriate tool executor.
Handles both sync and async tool implementations async results are
wrapped so that ``EventLoopNode._execute_tool`` can await them.
"""
def _wrap_result(tool_use_id: str, result: Any) -> ToolResult:
if isinstance(result, ToolResult):
return result
return ToolResult(
tool_use_id=tool_use_id,
content=json.dumps(result) if not isinstance(result, str) else result,
is_error=False,
)
def executor(tool_use: ToolUse) -> ToolResult:
if tool_use.name not in self._tools:
return ToolResult(
@@ -237,13 +260,24 @@ class ToolRegistry:
registered = self._tools[tool_use.name]
try:
result = registered.executor(tool_use.input)
if isinstance(result, ToolResult):
return result
return ToolResult(
tool_use_id=tool_use.id,
content=json.dumps(result) if not isinstance(result, str) else result,
is_error=False,
)
# Async tool: wrap the awaitable so the caller can await it
if asyncio.iscoroutine(result) or asyncio.isfuture(result):
async def _await_and_wrap():
try:
r = await result
return _wrap_result(tool_use.id, r)
except Exception as exc:
return ToolResult(
tool_use_id=tool_use.id,
content=json.dumps({"error": str(exc)}),
is_error=True,
)
return _await_and_wrap()
return _wrap_result(tool_use.id, result)
except Exception as e:
return ToolResult(
tool_use_id=tool_use.id,
@@ -261,6 +295,10 @@ class ToolRegistry:
"""Check if a tool is registered."""
return name in self._tools
def get_server_tool_names(self, server_name: str) -> set[str]:
"""Return tool names registered from a specific MCP server."""
return set(self._mcp_server_tools.get(server_name, set()))
def set_session_context(self, **context) -> None:
"""
Set session context to auto-inject into tool calls.
@@ -298,8 +336,11 @@ class ToolRegistry:
Args:
config_path: Path to an ``mcp_servers.json`` file.
"""
# Remember config path for potential resync later
self._mcp_config_path = Path(config_path)
try:
with open(config_path) as f:
with open(config_path, encoding="utf-8") as f:
config = json.load(f)
except Exception as e:
logger.warning(f"Failed to load MCP config from {config_path}: {e}")
@@ -325,6 +366,10 @@ class ToolRegistry:
name = server_config.get("name", "unknown")
logger.warning(f"Failed to register MCP server '{name}': {e}")
# Snapshot credential files and ADEN_API_KEY so we can detect mid-session changes
self._mcp_cred_snapshot = self._snapshot_credentials()
self._mcp_aden_key_snapshot = os.environ.get("ADEN_API_KEY")
def register_mcp_server(
self,
server_config: dict[str, Any],
@@ -371,6 +416,9 @@ class ToolRegistry:
self._mcp_clients.append(client)
# Register each tool
server_name = server_config["name"]
if server_name not in self._mcp_server_tools:
self._mcp_server_tools[server_name] = set()
count = 0
for mcp_tool in client.list_tools():
# Convert MCP tool to framework Tool (strips context params from LLM schema)
@@ -395,7 +443,15 @@ class ToolRegistry:
filtered_context = {
k: v for k, v in base_context.items() if k in tool_params
}
merged_inputs = {**filtered_context, **inputs}
# Strip context params from LLM inputs — the framework
# values are authoritative (prevents the LLM from passing
# e.g. data_dir="/data" and overriding the real path).
clean_inputs = {
k: v
for k, v in inputs.items()
if k not in registry_ref.CONTEXT_PARAMS
}
merged_inputs = {**clean_inputs, **filtered_context}
result = client_ref.call_tool(tool_name, merged_inputs)
# MCP tools return content array, extract the result
if isinstance(result, list) and len(result) > 0:
@@ -415,6 +471,8 @@ class ToolRegistry:
tool,
make_mcp_executor(client, mcp_tool.name, self, tool_params),
)
self._mcp_tool_names.add(mcp_tool.name)
self._mcp_server_tools[server_name].add(mcp_tool.name)
count += 1
logger.info(f"Registered {count} tools from MCP server '{config.name}'")
@@ -457,6 +515,117 @@ class ToolRegistry:
return tool
# ------------------------------------------------------------------
# Provider-based tool filtering
# ------------------------------------------------------------------
def build_provider_index(self) -> None:
"""Build provider -> tool-name mapping from CREDENTIAL_SPECS.
Populates ``_provider_index`` so :meth:`get_by_provider` works.
Safe to call even if ``aden_tools`` is not installed (silently no-ops).
"""
try:
from aden_tools.credentials import CREDENTIAL_SPECS
except ImportError:
logger.debug("aden_tools not available, skipping provider index")
return
self._provider_index.clear()
for spec in CREDENTIAL_SPECS.values():
provider = spec.aden_provider_name
if provider:
if provider not in self._provider_index:
self._provider_index[provider] = set()
self._provider_index[provider].update(spec.tools)
def get_by_provider(self, provider: str) -> dict[str, Tool]:
"""Return registered tools that belong to *provider*.
Lazily builds the provider index on first call.
"""
if not self._provider_index:
self.build_provider_index()
tool_names = self._provider_index.get(provider, set())
return {name: rt.tool for name, rt in self._tools.items() if name in tool_names}
def get_tool_names_by_provider(self, provider: str) -> list[str]:
"""Return sorted registered tool names for *provider*."""
if not self._provider_index:
self.build_provider_index()
tool_names = self._provider_index.get(provider, set())
return sorted(name for name in self._tools if name in tool_names)
def get_all_provider_tool_names(self) -> list[str]:
"""Return sorted names of all registered tools that belong to any provider."""
if not self._provider_index:
self.build_provider_index()
all_names: set[str] = set()
for names in self._provider_index.values():
all_names.update(names)
return sorted(name for name in self._tools if name in all_names)
# ------------------------------------------------------------------
# MCP credential resync
# ------------------------------------------------------------------
def _snapshot_credentials(self) -> set[str]:
"""Return the set of credential filenames currently on disk."""
try:
return set(self._CREDENTIAL_DIR.iterdir()) if self._CREDENTIAL_DIR.is_dir() else set()
except OSError:
return set()
def resync_mcp_servers_if_needed(self) -> bool:
"""Restart MCP servers if credential files changed since last load.
Compares the current credential directory listing against the snapshot
taken when MCP servers were first loaded. If new files appeared (e.g.
user connected an OAuth account mid-session), disconnects all MCP
clients and re-loads them so the new subprocess picks up the fresh
credentials.
Returns True if a resync was performed, False otherwise.
"""
if not self._mcp_clients or self._mcp_config_path is None:
return False
current = self._snapshot_credentials()
current_aden_key = os.environ.get("ADEN_API_KEY")
files_changed = current != self._mcp_cred_snapshot
aden_key_changed = current_aden_key != self._mcp_aden_key_snapshot
if not files_changed and not aden_key_changed:
return False
reason = (
"Credential files and ADEN_API_KEY changed"
if files_changed and aden_key_changed
else "ADEN_API_KEY changed"
if aden_key_changed
else "Credential files changed"
)
logger.info("%s — resyncing MCP servers", reason)
# 1. Disconnect existing MCP clients
for client in self._mcp_clients:
try:
client.disconnect()
except Exception as e:
logger.warning(f"Error disconnecting MCP client during resync: {e}")
self._mcp_clients.clear()
# 2. Remove MCP-registered tools
for name in self._mcp_tool_names:
self._tools.pop(name, None)
self._mcp_tool_names.clear()
# 3. Re-load MCP servers (spawns fresh subprocesses with new credentials)
self.load_mcp_config(self._mcp_config_path)
logger.info("MCP server resync complete")
return True
def cleanup(self) -> None:
"""Clean up all MCP client connections."""
for client in self._mcp_clients:
+539
View File
@@ -0,0 +1,539 @@
# Event Types and Schema Reference
The Hive runtime uses a pub/sub `EventBus` for inter-component communication and observability. Every event is an `AgentEvent` dataclass published through `EventBus.publish()`.
## Event Envelope (`AgentEvent`)
Every event shares a common envelope:
| Field | Type | Description |
| ---------------- | ----------------- | ------------------------------------------------------------ |
| `type` | `EventType` (str) | Event type identifier (see below) |
| `stream_id` | `str` | Entry point / pipeline that emitted the event |
| `node_id` | `str \| None` | Graph node that emitted the event |
| `execution_id` | `str \| None` | Unique execution run ID (UUID, set by `ExecutionStream`) |
| `graph_id` | `str \| None` | Graph that emitted the event (set by `GraphScopedEventBus`) |
| `data` | `dict` | Event-type-specific payload (see individual schemas below) |
| `timestamp` | `datetime` | When the event was created |
| `correlation_id` | `str \| None` | Optional ID for tracking related events across streams |
### Identity Fields
The identity tuple `(graph_id, stream_id, node_id, execution_id)` uniquely locates any event:
- **`graph_id`** — Which graph produced the event. Set automatically by `GraphScopedEventBus` (a subclass that stamps `graph_id` on every `publish()` call). Values: `"worker"`, `"judge"`, `"queen"`, or the graph spec ID.
- **`stream_id`** — Which entry point / pipeline. Corresponds to `EntryPointSpec.id` in the graph definition. For single-entry-point graphs, this equals the entry point name (e.g. `"default"`, `"health_check"`, `"ticket_receiver"`).
- **`node_id`** — Which specific node emitted the event. For `EventLoopNode` events, this is the node spec ID.
- **`execution_id`** — UUID identifying a specific execution run. Multiple concurrent executions of the same entry point each get a unique `execution_id`.
---
## Execution Lifecycle
### `execution_started`
A new graph execution has begun.
| Data Field | Type | Description |
| ---------- | ------ | ------------------------------- |
| `input` | `dict` | Input data passed to the graph |
**Emitted by:** `ExecutionStream._run_execution()`
---
### `execution_completed`
A graph execution finished successfully.
| Data Field | Type | Description |
| ---------- | ------ | ----------------- |
| `output` | `dict` | Final output data |
**Emitted by:** `ExecutionStream._run_execution()`
---
### `execution_failed`
A graph execution failed with an error.
| Data Field | Type | Description |
| ---------- | ----- | ------------- |
| `error` | `str` | Error message |
**Emitted by:** `ExecutionStream._run_execution()`
---
### `execution_paused`
Execution has been paused (Ctrl+Z or HITL approval).
| Data Field | Type | Description |
| ---------- | ----- | ----------------- |
| `reason` | `str` | Why it was paused |
**Emitted by:** `GraphExecutor.execute()`
---
### `execution_resumed`
Execution has resumed from a paused state.
| Data Field | Type | Description |
| ---------- | ---- | ----------- |
| *(none)* | | |
**Emitted by:** `GraphExecutor.execute()`
---
## Node Event-Loop Lifecycle
These events track the inner loop of `EventLoopNode` — the multi-turn LLM streaming loop that powers most agent nodes.
### `node_loop_started`
An EventLoopNode has begun its execution loop.
| Data Field | Type | Description |
| ---------------- | ---------- | ------------------------------- |
| `max_iterations` | `int\|null`| Maximum iterations configured |
**Emitted by:** `EventLoopNode._publish_loop_started()`, `GraphExecutor` (for function nodes in parallel branches)
---
### `node_loop_iteration`
An EventLoopNode has started a new iteration (one LLM turn).
| Data Field | Type | Description |
| ----------- | ----- | ------------------------- |
| `iteration` | `int` | Zero-based iteration index |
**Emitted by:** `EventLoopNode._publish_iteration()`
---
### `node_loop_completed`
An EventLoopNode has finished its execution loop.
| Data Field | Type | Description |
| ------------ | ----- | -------------------------------------- |
| `iterations` | `int` | Total number of iterations completed |
**Emitted by:** `EventLoopNode._publish_loop_completed()`, `GraphExecutor` (for function nodes in parallel branches)
---
## LLM Streaming
### `llm_text_delta`
Incremental text output from the LLM (non-client-facing nodes only).
| Data Field | Type | Description |
| ---------- | ----- | ---------------------------------------- |
| `content` | `str` | New text chunk (delta) |
| `snapshot` | `str` | Full accumulated text so far |
**Emitted by:** `EventLoopNode._publish_text_delta()` when `client_facing=False`
---
### `llm_reasoning_delta`
Incremental reasoning/thinking output from the LLM.
| Data Field | Type | Description |
| ---------- | ----- | ------------------- |
| `content` | `str` | New reasoning chunk |
**Emitted by:** Not currently wired in `EventLoopNode` (reserved for extended thinking models).
---
## Tool Lifecycle
### `tool_call_started`
The LLM has requested a tool call and execution is about to begin.
| Data Field | Type | Description |
| ------------ | ------ | ------------------------------------ |
| `tool_use_id`| `str` | Unique ID for this tool invocation |
| `tool_name` | `str` | Name of the tool being called |
| `tool_input` | `dict` | Arguments passed to the tool |
**Emitted by:** `EventLoopNode._publish_tool_started()`
---
### `tool_call_completed`
A tool call has finished executing.
| Data Field | Type | Description |
| ------------ | ------ | -------------------------------------- |
| `tool_use_id`| `str` | Same ID from `tool_call_started` |
| `tool_name` | `str` | Name of the tool |
| `result` | `str` | Tool execution result (may be truncated)|
| `is_error` | `bool` | Whether the tool returned an error |
**Emitted by:** `EventLoopNode._publish_tool_completed()`
---
## Client I/O
These events are emitted only by nodes with `client_facing=True`. They drive the TUI's chat interface.
### `client_output_delta`
Incremental text output meant for the human operator.
| Data Field | Type | Description |
| ---------- | ----- | ---------------------------- |
| `content` | `str` | New text chunk (delta) |
| `snapshot` | `str` | Full accumulated text so far |
**Emitted by:** `EventLoopNode._publish_text_delta()` when `client_facing=True`
---
### `client_input_requested`
The node is waiting for human input (via `ask_user` tool or auto-block on text-only turns).
| Data Field | Type | Description |
| ---------- | ----- | ------------------------------------------------- |
| `prompt` | `str` | Optional prompt/question shown to the user |
**Emitted by:** `EventLoopNode._await_user_input()`, doom loop handler
The TUI subscribes to this event to show the input prompt and focus the chat input. After the user types, `inject_event()` is called on the node to unblock it.
---
## Internal Node Observability
### `node_internal_output`
Output from a non-client-facing node (for debugging/monitoring).
| Data Field | Type | Description |
| ---------- | ----- | ---------------- |
| `content` | `str` | Output text |
**Emitted by:** Available via `emit_node_internal_output()` — not currently wired in the default `EventLoopNode`.
---
### `node_input_blocked`
A non-client-facing node is blocked waiting for input.
| Data Field | Type | Description |
| ---------- | ----- | --------------- |
| `prompt` | `str` | Block reason |
**Emitted by:** Available via `emit_node_input_blocked()` — reserved for future use.
---
### `node_stalled`
The node's LLM has produced identical responses for several consecutive turns (stall detection).
| Data Field | Type | Description |
| ---------- | ----- | ------------------------------------------------- |
| `reason` | `str` | Always `"Consecutive identical responses detected"`|
**Emitted by:** `EventLoopNode._publish_stalled()`
---
### `node_tool_doom_loop`
The LLM is calling the same tool(s) with identical arguments repeatedly (doom loop detection).
| Data Field | Type | Description |
| ------------- | ----- | ------------------------------------ |
| `description` | `str` | Human-readable doom loop description |
**Emitted by:** `EventLoopNode` doom loop handler
---
## Judge Decisions
### `judge_verdict`
The judge (custom or implicit) has evaluated the current iteration.
| Data Field | Type | Description |
| ------------ | ----- | ---------------------------------------------------- |
| `action` | `str` | `"ACCEPT"`, `"RETRY"`, `"ESCALATE"`, or `"CONTINUE"` |
| `feedback` | `str` | Judge feedback (empty for ACCEPT/CONTINUE) |
| `judge_type` | `str` | `"custom"` (explicit JudgeProtocol) or `"implicit"` (stop-reason heuristic) |
| `iteration` | `int` | Which iteration this verdict applies to |
**Emitted by:** `EventLoopNode._publish_judge_verdict()`
**Verdict meanings:**
- **ACCEPT** — Output meets requirements; node exits successfully.
- **RETRY** — Output needs improvement; loop continues with feedback injected.
- **ESCALATE** — Problem cannot be solved at this level; triggers escalation.
- **CONTINUE** — Implicit verdict: LLM called tools, so it's making progress — let it keep going.
---
## Output Tracking
### `output_key_set`
A node has set an output key via the `set_output` synthetic tool.
| Data Field | Type | Description |
| ---------- | ----- | ----------------- |
| `key` | `str` | Output key name |
**Emitted by:** `EventLoopNode._publish_output_key_set()`
---
## Retry & Edge Tracking
### `node_retry`
A transient error occurred during an LLM call and the node is retrying.
| Data Field | Type | Description |
| ------------- | ----- | ---------------------------------- |
| `retry_count` | `int` | Current retry attempt number |
| `max_retries` | `int` | Maximum retries configured |
| `error` | `str` | Error message (truncated to 500ch) |
**Emitted by:** `EventLoopNode` (stream retry handler), `GraphExecutor` (node-level retry)
---
### `edge_traversed`
The executor has traversed an edge from one node to another.
| Data Field | Type | Description |
| ---------------- | ----- | ---------------------------------------------- |
| `source_node` | `str` | Node ID the edge starts from |
| `target_node` | `str` | Node ID the edge goes to |
| `edge_condition` | `str` | Edge condition: `"router"`, `"on_success"`, etc. |
**Emitted by:** `GraphExecutor.execute()` — after router decisions, condition-based edges, and fallback edges.
---
## Context Management
### `context_compacted`
Not currently emitted — reserved for future use when `NodeConversation` compacts history.
---
## State Changes
### `state_changed`
A shared memory key has been modified.
| Data Field | Type | Description |
| ----------- | ----- | ---------------------------------- |
| `key` | `str` | Memory key that changed |
| `old_value` | `Any` | Previous value |
| `new_value` | `Any` | New value |
| `scope` | `str` | Scope of the change |
**Emitted by:** Available via `emit_state_changed()` — not currently wired in default execution.
---
### `state_conflict`
Not currently emitted — reserved for concurrent write conflict detection.
---
## Goal Tracking
### `goal_progress`
Goal completion progress update.
| Data Field | Type | Description |
| ----------------- | ------- | ------------------------------------ |
| `progress` | `float` | 0.01.0 completion fraction |
| `criteria_status` | `dict` | Per-criterion status |
**Emitted by:** Available via `emit_goal_progress()` — not currently wired in default execution.
---
### `goal_achieved`
Not currently emitted — reserved for explicit goal completion signals.
---
### `constraint_violation`
A goal constraint has been violated.
| Data Field | Type | Description |
| --------------- | ----- | ------------------------ |
| `constraint_id` | `str` | Which constraint failed |
| `description` | `str` | What went wrong |
**Emitted by:** Available via `emit_constraint_violation()`.
---
## Stream Lifecycle
### `stream_started` / `stream_stopped`
Not currently emitted — reserved for `ExecutionStream` lifecycle tracking.
---
## External Triggers
### `webhook_received`
An external webhook has been received.
| Data Field | Type | Description |
| -------------- | ------ | ---------------------------- |
| `path` | `str` | Webhook URL path |
| `method` | `str` | HTTP method |
| `headers` | `dict` | HTTP headers |
| `payload` | `dict` | Request body |
| `query_params` | `dict` | URL query parameters |
**Emitted by:** Webhook server integration.
Note: `node_id` is not set on this event; `stream_id` is the webhook source ID.
---
## Escalation
### `escalation_requested`
An agent has requested handoff to the Hive Coder (via the `escalate_to_coder` synthetic tool).
| Data Field | Type | Description |
| ---------- | ----- | ------------------------------- |
| `reason` | `str` | Why escalation is needed |
| `context` | `str` | Additional context for the coder|
**Emitted by:** `EventLoopNode` when the LLM calls `escalate_to_coder`.
---
## Worker Health Monitoring
These events form the **judge → queen → operator** escalation pipeline.
### `worker_escalation_ticket`
The Worker Health Judge has detected a degradation pattern and is escalating to the Queen.
| Data Field | Type | Description |
| ---------- | ------ | ------------------------------------ |
| `ticket` | `dict` | Full `EscalationTicket` (see below) |
**Emitted by:** `emit_escalation_ticket` tool (in `worker_monitoring_tools.py`)
#### EscalationTicket Schema
| Field | Type | Description |
| ------------------------- | ------------------ | -------------------------------------------------------- |
| `ticket_id` | `str` | Auto-generated UUID |
| `created_at` | `str` | ISO timestamp |
| `worker_agent_id` | `str` | Which worker agent |
| `worker_session_id` | `str` | Which session |
| `worker_node_id` | `str` | Which node is struggling |
| `worker_graph_id` | `str` | Which graph |
| `severity` | `str` | `"low"`, `"medium"`, `"high"`, or `"critical"` |
| `cause` | `str` | Human-readable problem description |
| `judge_reasoning` | `str` | Judge's deliberation chain |
| `suggested_action` | `str` | e.g. `"Restart node"`, `"Human review"`, `"Kill session"`|
| `recent_verdicts` | `list[str]` | e.g. `["RETRY", "RETRY", "CONTINUE", "RETRY"]` |
| `total_steps_checked` | `int` | Steps the judge inspected |
| `steps_since_last_accept` | `int` | Consecutive non-ACCEPT steps |
| `stall_minutes` | `float \| null` | Minutes since last activity (null if active) |
| `evidence_snippet` | `str` | Excerpt from recent LLM output |
---
### `queen_intervention_requested`
The Queen has triaged an escalation ticket and decided the human operator should be involved.
| Data Field | Type | Description |
| ----------------- | ----- | ---------------------------------------------------- |
| `ticket_id` | `str` | From the original `EscalationTicket` |
| `analysis` | `str` | Queen's 23 sentence analysis |
| `severity` | `str` | `"low"`, `"medium"`, `"high"`, or `"critical"` |
| `queen_graph_id` | `str` | Queen's graph ID (for TUI navigation) |
| `queen_stream_id` | `str` | Queen's stream ID |
**Emitted by:** `notify_operator` tool (in `worker_monitoring_tools.py`)
The TUI subscribes to this event and shows a non-disruptive notification. The worker continues running.
---
## Custom Events
### `custom`
User-defined events with arbitrary payloads. No schema enforced.
---
## Subscription & Filtering
Events can be filtered when subscribing:
```python
bus.subscribe(
event_types=[EventType.TOOL_CALL_STARTED, EventType.TOOL_CALL_COMPLETED],
handler=my_handler,
filter_stream="default", # Only events from this stream
filter_node="planner", # Only events from this node
filter_execution="exec-uuid", # Only events from this execution
filter_graph="worker", # Only events from this graph
)
```
## Debug Event Logging
Set `HIVE_DEBUG_EVENTS=1` to write every published event to a JSONL file at `~/.hive/event_logs/<timestamp>.jsonl`. Each line is the full JSON serialization of an `AgentEvent`:
```json
{
"type": "tool_call_started",
"stream_id": "default",
"node_id": "planner",
"execution_id": "a1b2c3d4-...",
"graph_id": "worker",
"data": {"tool_use_id": "tu_1", "tool_name": "web_search", "tool_input": {"query": "..."}},
"timestamp": "2026-02-24T12:00:00.000000",
"correlation_id": null
}
```
@@ -84,12 +84,10 @@ class Checkpoint:
│ ├── checkpoint_1.json # Individual checkpoints
│ ├── checkpoint_2.json
│ └── checkpoint_N.json
├── conversations/ # Per-node conversation state (existing)
│ ├── node_id_1/
│ ├── parts/
│ ├── meta.json
│ │ └── cursor.json
│ └── node_id_2/...
├── conversations/ # Flat conversation state (parts carry phase_id)
│ ├── meta.json # Current node config
├── cursor.json # Iteration, outputs, stall state
└── parts/ # Sequential message files
├── data/ # Spillover artifacts (existing)
└── logs/ # L1/L2/L3 logs (existing)
```
+1 -1
View File
@@ -27,7 +27,7 @@ This layered approach enables efficient debugging: start with L1 to identify pro
│ ├── summary.json # L1: Run outcome
│ ├── details.jsonl # L2: Per-node results
│ └── tool_logs.jsonl # L3: Step-by-step execution
├── conversations/ # Per-node EventLoop state
├── conversations/ # Flat EventLoop state (parts carry phase_id)
└── data/ # Spillover artifacts
```
File diff suppressed because it is too large Load Diff
+4
View File
@@ -66,6 +66,10 @@ class Runtime:
self._current_run: Run | None = None
self._current_node: str = "unknown"
@property
def execution_id(self) -> str:
return ""
# === RUN LIFECYCLE ===
def start_run(
@@ -0,0 +1,39 @@
"""EscalationTicket — structured schema for worker health judge escalations."""
from __future__ import annotations
from datetime import UTC, datetime
from typing import Literal
from uuid import uuid4
from pydantic import BaseModel, Field
class EscalationTicket(BaseModel):
"""Structured escalation report emitted by the Worker Health Judge.
The judge must fill every field before calling emit_escalation_ticket.
Pydantic validation rejects partial tickets, preventing impulsive escalation.
"""
ticket_id: str = Field(default_factory=lambda: str(uuid4()))
created_at: str = Field(default_factory=lambda: datetime.now(UTC).isoformat())
# Worker identification
worker_agent_id: str
worker_session_id: str
worker_node_id: str
worker_graph_id: str
# Problem characterization (filled by judge via LLM deliberation)
severity: Literal["low", "medium", "high", "critical"]
cause: str # Human-readable: "Node has produced 18 RETRY verdicts..."
judge_reasoning: str # Judge's own deliberation chain
suggested_action: str # "Restart node", "Human review", "Kill session", etc.
# Evidence
recent_verdicts: list[str] # e.g. ["RETRY", "RETRY", "CONTINUE", "RETRY"]
total_steps_checked: int # How many steps the judge saw
steps_since_last_accept: int # Steps with no ACCEPT verdict
stall_minutes: float | None # Wall-clock minutes since last new log step (None if active)
evidence_snippet: str # Brief excerpt from recent LLM output or error
+260 -4
View File
@@ -8,15 +8,53 @@ Allows streams to:
"""
import asyncio
import json
import logging
import os
from collections.abc import Awaitable, Callable
from dataclasses import dataclass, field
from datetime import datetime
from enum import StrEnum
from typing import Any
from pathlib import Path
from typing import IO, Any
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# HIVE_DEBUG_EVENTS — write every published event to a JSONL file.
#
# Set the env var to any truthy value to enable:
# HIVE_DEBUG_EVENTS=1 → writes to ~/.hive/event_logs/<ts>.jsonl
# HIVE_DEBUG_EVENTS=/tmp/ev → writes to that exact directory
#
# Each line is a full JSON serialisation of the AgentEvent.
# The file is opened lazily on first publish and flushed after every write.
# ---------------------------------------------------------------------------
_DEBUG_EVENTS_RAW = os.environ.get("HIVE_DEBUG_EVENTS", "").strip()
_DEBUG_EVENTS_ENABLED = _DEBUG_EVENTS_RAW.lower() in ("1", "true", "full") or (
bool(_DEBUG_EVENTS_RAW) and _DEBUG_EVENTS_RAW.lower() not in ("0", "false", "")
)
def _open_event_log() -> IO[str] | None:
"""Open a JSONL event log file. Returns None if disabled."""
if not _DEBUG_EVENTS_ENABLED:
return None
raw = _DEBUG_EVENTS_RAW
if raw.lower() in ("1", "true", "full"):
log_dir = Path.home() / ".hive" / "event_logs"
else:
log_dir = Path(raw)
log_dir.mkdir(parents=True, exist_ok=True)
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
path = log_dir / f"{ts}.jsonl"
logger.info("Event debug log → %s", path)
return open(path, "a", encoding="utf-8") # noqa: SIM115
_event_log_file: IO[str] | None = None
_event_log_ready = False # lazy init guard
class EventType(StrEnum):
"""Types of events that can be published."""
@@ -45,10 +83,12 @@ class EventType(StrEnum):
NODE_LOOP_STARTED = "node_loop_started"
NODE_LOOP_ITERATION = "node_loop_iteration"
NODE_LOOP_COMPLETED = "node_loop_completed"
NODE_ACTION_PLAN = "node_action_plan"
# LLM streaming observability
LLM_TEXT_DELTA = "llm_text_delta"
LLM_REASONING_DELTA = "llm_reasoning_delta"
LLM_TURN_COMPLETE = "llm_turn_complete"
# Tool lifecycle
TOOL_CALL_STARTED = "tool_call_started"
@@ -62,6 +102,7 @@ class EventType(StrEnum):
NODE_INTERNAL_OUTPUT = "node_internal_output"
NODE_INPUT_BLOCKED = "node_input_blocked"
NODE_STALLED = "node_stalled"
NODE_TOOL_DOOM_LOOP = "node_tool_doom_loop"
# Judge decisions
JUDGE_VERDICT = "judge_verdict"
@@ -82,6 +123,26 @@ class EventType(StrEnum):
# Custom events
CUSTOM = "custom"
# Escalation (agent requests handoff to hive_coder)
ESCALATION_REQUESTED = "escalation_requested"
# Worker health monitoring (judge → queen → operator)
WORKER_ESCALATION_TICKET = "worker_escalation_ticket"
QUEEN_INTERVENTION_REQUESTED = "queen_intervention_requested"
# Execution resurrection (auto-restart on non-fatal failure)
EXECUTION_RESURRECTED = "execution_resurrected"
# Worker lifecycle (session manager → frontend)
WORKER_LOADED = "worker_loaded"
CREDENTIALS_REQUIRED = "credentials_required"
# Queen mode changes (building ↔ running)
QUEEN_MODE_CHANGED = "queen_mode_changed"
# Subagent reports (one-way progress updates from sub-agents)
SUBAGENT_REPORT = "subagent_report"
@dataclass
class AgentEvent:
@@ -94,6 +155,7 @@ class AgentEvent:
data: dict[str, Any] = field(default_factory=dict)
timestamp: datetime = field(default_factory=datetime.now)
correlation_id: str | None = None # For tracking related events
graph_id: str | None = None # Which graph emitted this event (multi-graph sessions)
def to_dict(self) -> dict:
"""Convert to dictionary for serialization."""
@@ -105,6 +167,7 @@ class AgentEvent:
"data": self.data,
"timestamp": self.timestamp.isoformat(),
"correlation_id": self.correlation_id,
"graph_id": self.graph_id,
}
@@ -122,6 +185,7 @@ class Subscription:
filter_stream: str | None = None # Only receive events from this stream
filter_node: str | None = None # Only receive events from this node
filter_execution: str | None = None # Only receive events from this execution
filter_graph: str | None = None # Only receive events from this graph
class EventBus:
@@ -181,6 +245,7 @@ class EventBus:
filter_stream: str | None = None,
filter_node: str | None = None,
filter_execution: str | None = None,
filter_graph: str | None = None,
) -> str:
"""
Subscribe to events.
@@ -191,6 +256,7 @@ class EventBus:
filter_stream: Only receive events from this stream
filter_node: Only receive events from this node
filter_execution: Only receive events from this execution
filter_graph: Only receive events from this graph
Returns:
Subscription ID (use to unsubscribe)
@@ -205,6 +271,7 @@ class EventBus:
filter_stream=filter_stream,
filter_node=filter_node,
filter_execution=filter_execution,
filter_graph=filter_graph,
)
self._subscriptions[sub_id] = subscription
@@ -241,6 +308,20 @@ class EventBus:
if len(self._event_history) > self._max_history:
self._event_history = self._event_history[-self._max_history :]
# Write event to JSONL file (gated by HIVE_DEBUG_EVENTS env var)
if _DEBUG_EVENTS_ENABLED:
global _event_log_file, _event_log_ready # noqa: PLW0603
if not _event_log_ready:
_event_log_file = _open_event_log()
_event_log_ready = True
if _event_log_file is not None:
try:
line = json.dumps(event.to_dict(), default=str)
_event_log_file.write(line + "\n")
_event_log_file.flush()
except Exception:
pass # never break event delivery
# Find matching subscriptions
matching_handlers: list[EventHandler] = []
@@ -270,6 +351,10 @@ class EventBus:
if subscription.filter_execution and subscription.filter_execution != event.execution_id:
return False
# Check graph filter
if subscription.filter_graph and subscription.filter_graph != event.graph_id:
return False
return True
async def _execute_handlers(
@@ -463,6 +548,24 @@ class EventBus:
)
)
async def emit_node_action_plan(
self,
stream_id: str,
node_id: str,
plan: str,
execution_id: str | None = None,
) -> None:
"""Emit node action plan event."""
await self.publish(
AgentEvent(
type=EventType.NODE_ACTION_PLAN,
stream_id=stream_id,
node_id=node_id,
execution_id=execution_id,
data={"plan": plan},
)
)
# === LLM STREAMING PUBLISHERS ===
async def emit_llm_text_delta(
@@ -502,6 +605,36 @@ class EventBus:
)
)
async def emit_llm_turn_complete(
self,
stream_id: str,
node_id: str,
stop_reason: str,
model: str,
input_tokens: int,
output_tokens: int,
execution_id: str | None = None,
iteration: int | None = None,
) -> None:
"""Emit LLM turn completion with stop reason and model metadata."""
data: dict = {
"stop_reason": stop_reason,
"model": model,
"input_tokens": input_tokens,
"output_tokens": output_tokens,
}
if iteration is not None:
data["iteration"] = iteration
await self.publish(
AgentEvent(
type=EventType.LLM_TURN_COMPLETE,
stream_id=stream_id,
node_id=node_id,
execution_id=execution_id,
data=data,
)
)
# === TOOL LIFECYCLE PUBLISHERS ===
async def emit_tool_call_started(
@@ -563,15 +696,19 @@ class EventBus:
content: str,
snapshot: str,
execution_id: str | None = None,
iteration: int | None = None,
) -> None:
"""Emit client output delta event (client_facing=True nodes)."""
data: dict = {"content": content, "snapshot": snapshot}
if iteration is not None:
data["iteration"] = iteration
await self.publish(
AgentEvent(
type=EventType.CLIENT_OUTPUT_DELTA,
stream_id=stream_id,
node_id=node_id,
execution_id=execution_id,
data={"content": content, "snapshot": snapshot},
data=data,
)
)
@@ -581,15 +718,24 @@ class EventBus:
node_id: str,
prompt: str = "",
execution_id: str | None = None,
options: list[str] | None = None,
) -> None:
"""Emit client input requested event (client_facing=True nodes)."""
"""Emit client input requested event (client_facing=True nodes).
Args:
options: Optional predefined choices for the user (1-3 items).
The frontend appends an "Other" free-text option automatically.
"""
data: dict[str, Any] = {"prompt": prompt}
if options:
data["options"] = options
await self.publish(
AgentEvent(
type=EventType.CLIENT_INPUT_REQUESTED,
stream_id=stream_id,
node_id=node_id,
execution_id=execution_id,
data={"prompt": prompt},
data=data,
)
)
@@ -631,6 +777,24 @@ class EventBus:
)
)
async def emit_tool_doom_loop(
self,
stream_id: str,
node_id: str,
description: str = "",
execution_id: str | None = None,
) -> None:
"""Emit tool doom loop detection event."""
await self.publish(
AgentEvent(
type=EventType.NODE_TOOL_DOOM_LOOP,
stream_id=stream_id,
node_id=node_id,
execution_id=execution_id,
data={"description": description},
)
)
async def emit_node_input_blocked(
self,
stream_id: str,
@@ -801,6 +965,95 @@ class EventBus:
)
)
async def emit_escalation_requested(
self,
stream_id: str,
node_id: str,
reason: str = "",
context: str = "",
execution_id: str | None = None,
) -> None:
"""Emit escalation requested event (agent wants hive_coder)."""
await self.publish(
AgentEvent(
type=EventType.ESCALATION_REQUESTED,
stream_id=stream_id,
node_id=node_id,
execution_id=execution_id,
data={"reason": reason, "context": context},
)
)
async def emit_worker_escalation_ticket(
self,
stream_id: str,
node_id: str,
ticket: dict,
execution_id: str | None = None,
) -> None:
"""Emitted by health judge when worker shows a degradation pattern."""
await self.publish(
AgentEvent(
type=EventType.WORKER_ESCALATION_TICKET,
stream_id=stream_id,
node_id=node_id,
execution_id=execution_id,
data={"ticket": ticket},
)
)
async def emit_queen_intervention_requested(
self,
stream_id: str,
node_id: str,
ticket_id: str,
analysis: str,
severity: str,
queen_graph_id: str,
queen_stream_id: str,
execution_id: str | None = None,
) -> None:
"""Emitted by queen when she decides the operator should be involved."""
await self.publish(
AgentEvent(
type=EventType.QUEEN_INTERVENTION_REQUESTED,
stream_id=stream_id,
node_id=node_id,
execution_id=execution_id,
data={
"ticket_id": ticket_id,
"analysis": analysis,
"severity": severity,
"queen_graph_id": queen_graph_id,
"queen_stream_id": queen_stream_id,
},
)
)
async def emit_subagent_report(
self,
stream_id: str,
node_id: str,
subagent_id: str,
message: str,
data: dict[str, Any] | None = None,
execution_id: str | None = None,
) -> None:
"""Emit a one-way progress report from a sub-agent."""
await self.publish(
AgentEvent(
type=EventType.SUBAGENT_REPORT,
stream_id=stream_id,
node_id=node_id,
execution_id=execution_id,
data={
"subagent_id": subagent_id,
"message": message,
"data": data,
},
)
)
# === QUERY OPERATIONS ===
def get_history(
@@ -854,6 +1107,7 @@ class EventBus:
stream_id: str | None = None,
node_id: str | None = None,
execution_id: str | None = None,
graph_id: str | None = None,
timeout: float | None = None,
) -> AgentEvent | None:
"""
@@ -864,6 +1118,7 @@ class EventBus:
stream_id: Filter by stream
node_id: Filter by node
execution_id: Filter by execution
graph_id: Filter by graph
timeout: Maximum time to wait (seconds)
Returns:
@@ -884,6 +1139,7 @@ class EventBus:
filter_stream=stream_id,
filter_node=node_id,
filter_execution=execution_id,
filter_graph=graph_id,
)
try:

Some files were not shown because too many files have changed in this diff Show More