Compare commits
120 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 7d75c6a09f | |||
| 1fabd8e8fb | |||
| 043c79e0e4 | |||
| 9193336fd3 | |||
| 59e90d3168 | |||
| ef34b1190a | |||
| 1e848d67bb | |||
| a0e68871f7 | |||
| 767beb4005 | |||
| 95655a4c85 | |||
| 102866780c | |||
| a379ae97c8 | |||
| d5ae7e6c4b | |||
| 68f6b72564 | |||
| eecfb4f407 | |||
| 32f556cd6e | |||
| 8ea026508d | |||
| 771efd5ce4 | |||
| 8f56b8b068 | |||
| 4f588b3010 | |||
| 9f70868f98 | |||
| 6449c76091 | |||
| b328ced110 | |||
| 1b6e8c34be | |||
| 674454cc5b | |||
| 59c3979451 | |||
| 51fdd93f0c | |||
| 95f1d1abcd | |||
| a164ed6faf | |||
| abe3d2d067 | |||
| c80d86bdbe | |||
| ec08ae7438 | |||
| e0cd16b92b | |||
| 4006ee96b6 | |||
| b78c879404 | |||
| 71a71beca7 | |||
| c5052ade34 | |||
| e1911b3684 | |||
| 96c7070cc9 | |||
| 6affe06f6d | |||
| 02edd44283 | |||
| 60d094464a | |||
| c7e85aa9f5 | |||
| 00c55d5fb2 | |||
| 6a7778ebcd | |||
| 8f042b7ca5 | |||
| b594165575 | |||
| 1630c1ee7a | |||
| 318ecfd508 | |||
| 08b0cbc208 | |||
| 4bcbebf761 | |||
| 00a3f94315 | |||
| 5b08edb384 | |||
| 332311b49b | |||
| 0e5f571b09 | |||
| bf06984625 | |||
| d4875892fc | |||
| 86f6aa2e8f | |||
| 537667758a | |||
| 76fe644cac | |||
| c6c333761b | |||
| 6a77a9a7b2 | |||
| 1a37fb2f36 | |||
| 2609ca7619 | |||
| b7a115259d | |||
| 1765e1cb6c | |||
| 0417e33ab2 | |||
| 42a9c7b0f1 | |||
| 398e86d787 | |||
| 51406e358e | |||
| 137162eada | |||
| 2b7a38f746 | |||
| b25de61363 | |||
| b3adbe745f | |||
| f3fefe0cbc | |||
| c8a25a0287 | |||
| 5823513fde | |||
| 97ce8dfc54 | |||
| 5e628c7606 | |||
| 5b931982e3 | |||
| 8174f330ae | |||
| 9774e53720 | |||
| cf3296984c | |||
| eafbeb78b4 | |||
| 5cb5083f8d | |||
| ebb6605a86 | |||
| e9c1731c0f | |||
| 0e2333daaf | |||
| 5167c29aed | |||
| 4da4d3b2c0 | |||
| 3e622af484 | |||
| 6600ce0ef9 | |||
| 74d5dd03dd | |||
| d18091bb2c | |||
| d1a1f36d6e | |||
| 051b0fcef2 | |||
| e270d3210d | |||
| d4a66d4b5f | |||
| ad39b6ea50 | |||
| 71baf6166d | |||
| 25afdae093 | |||
| 21700eb2ec | |||
| 617462df52 | |||
| b3c1f1436b | |||
| 310b922ce8 | |||
| 20b6553b07 | |||
| 1035cc9481 | |||
| 5d6dd1caa6 | |||
| 45ba771650 | |||
| a4b15c0320 | |||
| 211619120e | |||
| a78bb16e4b | |||
| c93bcee933 | |||
| 08160a004a | |||
| ccd5de7496 | |||
| c332ef8823 | |||
| 06db11eebf | |||
| 859db7f056 | |||
| 6e0b5c7250 | |||
| becbdb3706 |
@@ -0,0 +1,241 @@
|
||||
---
|
||||
name: browser-edge-cases
|
||||
description: SOP for debugging browser automation failures on complex websites. Use when browser tools fail on specific sites like LinkedIn, Twitter/X, SPAs, or sites with Shadow DOM.
|
||||
license: MIT
|
||||
---
|
||||
|
||||
# Browser Tool Edge Cases
|
||||
|
||||
Standard Operating Procedure for debugging and fixing browser automation failures on complex websites.
|
||||
|
||||
## When to Use This Skill
|
||||
|
||||
- `browser_scroll` succeeds but page doesn't move
|
||||
- `browser_click` succeeds but no action triggered
|
||||
- `browser_type` text disappears or doesn't work
|
||||
- `browser_snapshot` hangs or returns stale content
|
||||
- `browser_navigate` loads wrong content
|
||||
|
||||
## SOP: Debugging Browser Tool Failures
|
||||
|
||||
### Phase 1: Reproduce & Isolate
|
||||
|
||||
```
|
||||
1. Create minimal test case demonstrating failure
|
||||
2. Test against simple site (example.com) to verify tool works
|
||||
3. Test against problematic site to confirm issue
|
||||
```
|
||||
|
||||
**Quick isolation test:**
|
||||
```python
|
||||
# Test 1: Does the tool work at all?
|
||||
await browser_navigate(tab_id, "https://example.com")
|
||||
result = await browser_scroll(tab_id, "down", 100)
|
||||
# Should work on simple sites
|
||||
|
||||
# Test 2: Does it fail on the problematic site?
|
||||
await browser_navigate(tab_id, "https://linkedin.com/feed")
|
||||
result = await browser_scroll(tab_id, "down", 100)
|
||||
# If this fails but example.com works → site-specific edge case
|
||||
```
|
||||
|
||||
### Phase 2: Analyze Root Cause
|
||||
|
||||
**Step 2a: Check console for errors**
|
||||
```python
|
||||
console = await browser_console(tab_id)
|
||||
# Look for: CSP violations, React errors, JavaScript exceptions
|
||||
```
|
||||
|
||||
**Step 2b: Inspect DOM structure**
|
||||
```python
|
||||
html = await browser_html(tab_id)
|
||||
snapshot = await browser_snapshot(tab_id)
|
||||
# Look for:
|
||||
# - Nested scrollable divs (overflow: scroll/auto)
|
||||
# - Shadow DOM roots
|
||||
# - iframes
|
||||
# - Custom widgets
|
||||
```
|
||||
|
||||
**Step 2c: Identify the pattern**
|
||||
|
||||
| Symptom | Likely Cause | Check |
|
||||
|---------|--------------|-------|
|
||||
| Scroll doesn't move | Nested scroll container | Look for `overflow: scroll` divs |
|
||||
| Click no effect | Element covered | Check `getBoundingClientRect` vs viewport |
|
||||
| Type clears | Autocomplete/React | Check for event listeners on input |
|
||||
| Snapshot hangs | Huge DOM | Check node count in snapshot |
|
||||
| Snapshot stale | SPA hydration | Wait after navigation |
|
||||
|
||||
### Phase 3: Implement Multi-Layer Fix
|
||||
|
||||
**Pattern: Always have fallbacks**
|
||||
|
||||
```python
|
||||
async def robust_operation(tab_id):
|
||||
# Method 1: Primary approach
|
||||
try:
|
||||
result = await primary_method(tab_id)
|
||||
if verify_success(result):
|
||||
return result
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Method 2: CDP fallback
|
||||
try:
|
||||
result = await cdp_fallback(tab_id)
|
||||
if verify_success(result):
|
||||
return result
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Method 3: JavaScript fallback
|
||||
return await javascript_fallback(tab_id)
|
||||
```
|
||||
|
||||
**Pattern: Always add timeouts**
|
||||
|
||||
```python
|
||||
# Bad - can hang forever
|
||||
result = await browser_snapshot(tab_id)
|
||||
|
||||
# Good - fails fast with useful error
|
||||
try:
|
||||
result = await browser_snapshot(tab_id, timeout_s=10.0)
|
||||
except asyncio.TimeoutError:
|
||||
# Handle timeout gracefully
|
||||
result = await fallback_snapshot(tab_id)
|
||||
```
|
||||
|
||||
### Phase 4: Verify Fix
|
||||
|
||||
```
|
||||
1. Run against problematic site → should work
|
||||
2. Run against simple site → should still work (regression check)
|
||||
3. Document in registry.md
|
||||
```
|
||||
|
||||
## Pattern Library
|
||||
|
||||
### P1: Nested Scrollable Containers
|
||||
|
||||
**Sites:** LinkedIn, Twitter/X, any SPA with scrollable feeds
|
||||
|
||||
**Detection:**
|
||||
```javascript
|
||||
// Find largest scrollable container
|
||||
const candidates = [];
|
||||
document.querySelectorAll('*').forEach(el => {
|
||||
const style = getComputedStyle(el);
|
||||
if (style.overflow.includes('scroll') || style.overflow.includes('auto')) {
|
||||
const rect = el.getBoundingClientRect();
|
||||
if (rect.width > 100 && rect.height > 100) {
|
||||
candidates.push({el, area: rect.width * rect.height});
|
||||
}
|
||||
}
|
||||
});
|
||||
candidates.sort((a, b) => b.area - a.area);
|
||||
return candidates[0]?.el;
|
||||
```
|
||||
|
||||
**Fix:** Dispatch scroll events at container's center, not viewport center.
|
||||
|
||||
### P2: Element Covered by Overlay
|
||||
|
||||
**Sites:** Modals, tooltips, SPAs with loading overlays
|
||||
|
||||
**Detection:**
|
||||
```javascript
|
||||
const rect = element.getBoundingClientRect();
|
||||
const centerX = rect.left + rect.width / 2;
|
||||
const centerY = rect.top + rect.height / 2;
|
||||
const topElement = document.elementFromPoint(centerX, centerY);
|
||||
return topElement === element || element.contains(topElement);
|
||||
```
|
||||
|
||||
**Fix:** Wait for overlay to disappear, or use JavaScript click.
|
||||
|
||||
### P3: React Synthetic Events
|
||||
|
||||
**Sites:** React SPAs, modern web apps
|
||||
|
||||
**Detection:** If CDP click doesn't trigger handler but manual click works.
|
||||
|
||||
**Fix:** Use JavaScript click as primary:
|
||||
```javascript
|
||||
element.click();
|
||||
```
|
||||
|
||||
### P4: Huge DOM / Accessibility Tree
|
||||
|
||||
**Sites:** LinkedIn, Facebook, Twitter (feeds with 1000s of nodes)
|
||||
|
||||
**Detection:**
|
||||
```javascript
|
||||
document.querySelectorAll('*').length > 5000
|
||||
```
|
||||
|
||||
**Fix:**
|
||||
1. Add timeout to snapshot operation
|
||||
2. Truncate tree at 2000 nodes
|
||||
3. Fall back to DOM-based snapshot if accessibility tree too large
|
||||
|
||||
### P5: SPA Hydration Delay
|
||||
|
||||
**Sites:** React, Vue, Angular SPAs after navigation
|
||||
|
||||
**Detection:**
|
||||
```javascript
|
||||
// Check if React app has hydrated
|
||||
document.querySelector('[data-reactroot]') ||
|
||||
document.querySelector('[data-reactid]')
|
||||
```
|
||||
|
||||
**Fix:** Wait for specific selector after navigation:
|
||||
```python
|
||||
await browser_navigate(tab_id, url, wait_until="load")
|
||||
await browser_wait(tab_id, selector='[data-testid="content"]', timeout_ms=5000)
|
||||
```
|
||||
|
||||
### P6: Shadow DOM
|
||||
|
||||
**Sites:** Components using Shadow DOM, Lit elements
|
||||
|
||||
**Detection:**
|
||||
```javascript
|
||||
document.querySelectorAll('*').some(el => el.shadowRoot)
|
||||
```
|
||||
|
||||
**Fix:** Pierce shadow root:
|
||||
```javascript
|
||||
function queryShadow(selector) {
|
||||
const parts = selector.split('>>>');
|
||||
let node = document;
|
||||
for (const part of parts) {
|
||||
if (node.shadowRoot) {
|
||||
node = node.shadowRoot.querySelector(part.trim());
|
||||
} else {
|
||||
node = node.querySelector(part.trim());
|
||||
}
|
||||
}
|
||||
return node;
|
||||
}
|
||||
```
|
||||
|
||||
## Quick Reference
|
||||
|
||||
| Issue | Primary Fix | Fallback |
|
||||
|-------|-------------|----------|
|
||||
| Scroll not working | Find scrollable container | Mouse wheel at container center |
|
||||
| Click no effect | JavaScript click() | CDP mouse events |
|
||||
| Type clears | Add delay_ms | Use execCommand |
|
||||
| Snapshot hangs | Add timeout_s | DOM snapshot fallback |
|
||||
| Stale content | Wait for selector | Increase wait_until timeout |
|
||||
| Shadow DOM | Pierce selector | JavaScript traversal |
|
||||
|
||||
## References
|
||||
|
||||
- [registry.md](registry.md) - Full list of known edge cases
|
||||
- [scripts/test_case.py](scripts/test_case.py) - Template for testing new cases
|
||||
- [BROWSER_USE_PATTERNS.md](../../tools/BROWSER_USE_PATTERNS.md) - Implementation patterns from browser-use
|
||||
@@ -0,0 +1,261 @@
|
||||
# Browser Edge Case Registry
|
||||
|
||||
Curated list of known browser automation edge cases with symptoms, causes, and fixes.
|
||||
|
||||
---
|
||||
|
||||
## Scroll Issues
|
||||
|
||||
### #1: LinkedIn Nested Scroll Container
|
||||
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **Site** | LinkedIn (linkedin.com/feed) |
|
||||
| **Symptom** | `browser_scroll()` returns `{ok: true}` but page doesn't move |
|
||||
| **Root Cause** | Content is in a nested scrollable div (`overflow: scroll`), not the main window |
|
||||
| **Detection** | `document.querySelectorAll('*')` with `overflow: scroll/auto` has large candidates |
|
||||
| **Fix** | JavaScript finds largest scrollable container, uses `container.scrollBy()` |
|
||||
| **Code** | `bridge.py:808-891` - smart scroll with container detection |
|
||||
| **Verified** | 2026-04-03 ✓ |
|
||||
|
||||
### #2: Twitter/X Lazy Loading
|
||||
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **Site** | Twitter/X (x.com) |
|
||||
| **Symptom** | Infinite scroll doesn't load new content |
|
||||
| **Root Cause** | Lazy loading requires content to be visible before loading more |
|
||||
| **Detection** | Scroll position at bottom but no new `[data-testid="tweet"]` elements |
|
||||
| **Fix** | Add `wait_for_selector` between scroll calls with 1s delay |
|
||||
| **Code** | Test file: `tests/test_x_page_load_repro.py` |
|
||||
| **Verified** | - |
|
||||
|
||||
### #3: Modal/Dialog Scroll Container
|
||||
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **Site** | Any site with modal dialogs |
|
||||
| **Symptom** | Scroll scrolls background page, not modal content |
|
||||
| **Root Cause** | Modal has its own scroll container with `overflow: scroll` |
|
||||
| **Detection** | Visible element with `position: fixed` and scrollable content |
|
||||
| **Fix** | Find visible modal container (highest z-index scrollable), scroll that |
|
||||
| **Code** | - |
|
||||
| **Verified** | - |
|
||||
|
||||
---
|
||||
|
||||
## Click Issues
|
||||
|
||||
### #4: Element Covered by Overlay
|
||||
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **Site** | SPAs, sites with loading overlays |
|
||||
| **Symptom** | Click succeeds but no action triggered |
|
||||
| **Root Cause** | Element is covered by transparent overlay, tooltip, or iframe |
|
||||
| **Detection** | `document.elementFromPoint(x, y) !== target` |
|
||||
| **Fix** | Wait for overlay to disappear, or use JavaScript `element.click()` |
|
||||
| **Code** | `bridge.py:394-591` - JavaScript click as primary |
|
||||
| **Verified** | - |
|
||||
|
||||
### #5: React Synthetic Events
|
||||
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **Site** | React applications |
|
||||
| **Symptom** | CDP click doesn't trigger React handler |
|
||||
| **Root Cause** | React uses synthetic events that don't respond to CDP events |
|
||||
| **Detection** | Site uses React (check for `__reactFiber$` or `data-reactroot`) |
|
||||
| **Fix** | Use JavaScript `element.click()` as primary method |
|
||||
| **Code** | `bridge.py:394-591` - JavaScript-first click |
|
||||
| **Verified** | - |
|
||||
|
||||
### #6: Shadow DOM Elements
|
||||
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **Site** | Components using Shadow DOM, Lit elements |
|
||||
| **Symptom** | `querySelector` can't find element |
|
||||
| **Root Cause** | Element is inside a shadow root, not main DOM tree |
|
||||
| **Detection** | `element.shadowRoot !== null` on parent elements |
|
||||
| **Fix** | Use piercing selector (`host >>> target`) or traverse shadow roots |
|
||||
| **Code** | See SKILL.md P6 pattern |
|
||||
| **Verified** | 2026-04-03 ✓ |
|
||||
|
||||
---
|
||||
|
||||
## Input Issues
|
||||
|
||||
### #7: ContentEditable / Rich Text Editors
|
||||
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **Site** | Rich text editors (Notion, Slack web, etc.) |
|
||||
| **Symptom** | `browser_type()` doesn't insert text |
|
||||
| **Root Cause** | Element is `contenteditable`, not an `<input>` or `<textarea>` |
|
||||
| **Detection** | `element.contentEditable === 'true'` |
|
||||
| **Fix** | Focus via JavaScript, use `execCommand('insertText')` or `Input.dispatchKeyEvent` |
|
||||
| **Code** | `bridge.py:616-694` - contentEditable handling |
|
||||
| **Verified** | 2026-04-03 ✓ |
|
||||
|
||||
### #8: Autocomplete Field Clearing
|
||||
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **Site** | Search fields with autocomplete, address forms |
|
||||
| **Symptom** | Typed text gets cleared immediately |
|
||||
| **Root Cause** | Field expects realistic keystroke timing for autocomplete |
|
||||
| **Detection** | Field has autocomplete listeners or dropdown appears |
|
||||
| **Fix** | Add `delay_ms=50` between keystrokes |
|
||||
| **Code** | `bridge.py:type()` - delay_ms parameter |
|
||||
| **Verified** | 2026-04-03 ✓ |
|
||||
|
||||
### #9: Custom Date Pickers
|
||||
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **Site** | Forms with custom date widgets |
|
||||
| **Symptom** | Can't type date into date field |
|
||||
| **Root Cause** | Custom widget intercepts and blocks keyboard input |
|
||||
| **Detection** | Typing doesn't change field value |
|
||||
| **Fix** | Click calendar widget icon, select date from dropdown |
|
||||
| **Code** | - |
|
||||
| **Verified** | - |
|
||||
|
||||
---
|
||||
|
||||
## Snapshot Issues
|
||||
|
||||
### #10: LinkedIn Huge DOM Tree
|
||||
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **Site** | LinkedIn, Facebook, Twitter feeds |
|
||||
| **Symptom** | `browser_snapshot()` hangs forever |
|
||||
| **Root Cause** | 10k+ DOM nodes, accessibility tree has 50k+ nodes |
|
||||
| **Detection** | `document.querySelectorAll('*').length > 5000` |
|
||||
| **Fix** | Add `timeout_s` param with `asyncio.timeout()`, proper error handling |
|
||||
| **Code** | `bridge.py:1041-1028` - snapshot with timeout protection |
|
||||
| **Verified** | 2026-04-03 ✓ (0.08s on LinkedIn) |
|
||||
|
||||
### #11: SPA Hydration Delay
|
||||
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **Site** | React/Vue/Angular SPAs |
|
||||
| **Symptom** | Snapshot shows old content after navigation |
|
||||
| **Root Cause** | Client-side hydration hasn't completed when snapshot runs |
|
||||
| **Detection** | `document.readyState === 'complete'` but content missing |
|
||||
| **Fix** | Wait for specific selector after navigation |
|
||||
| **Code** | Test file: `tests/test_x_page_load_repro.py` |
|
||||
| **Verified** | - |
|
||||
|
||||
### #12: iframe Content Missing
|
||||
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **Site** | Sites with embedded content |
|
||||
| **Symptom** | Snapshot missing iframe content |
|
||||
| **Root Cause** | Accessibility tree doesn't include iframe content |
|
||||
| **Detection** | `document.querySelectorAll('iframe')` has results |
|
||||
| **Fix** | Use `DOM.getFrameOwner` + separate snapshot for each iframe |
|
||||
| **Code** | - |
|
||||
| **Verified** | - |
|
||||
|
||||
---
|
||||
|
||||
## Navigation Issues
|
||||
|
||||
### #13: SPA Navigation Events
|
||||
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **Site** | React Router, Vue Router SPAs |
|
||||
| **Symptom** | `wait_until="load"` fires before content ready |
|
||||
| **Root Cause** | SPA uses client-side routing, no full page load |
|
||||
| **Detection** | URL changes but `load` event already fired |
|
||||
| **Fix** | Use `wait_until="networkidle"` or `wait_for_selector` |
|
||||
| **Code** | `bridge.py:navigate()` - wait_until options |
|
||||
| **Verified** | - |
|
||||
|
||||
### #14: Cross-Origin Redirects
|
||||
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **Site** | OAuth flows, SSO logins |
|
||||
| **Symptom** | Navigation fails during redirect |
|
||||
| **Root Cause** | Cross-origin security prevents CDP tracking |
|
||||
| **Detection** | URL changes to different domain |
|
||||
| **Fix** | Use `wait_for_url` with pattern matching instead of exact URL |
|
||||
| **Code** | - |
|
||||
| **Verified** | - |
|
||||
|
||||
---
|
||||
|
||||
## Screenshot Issues
|
||||
|
||||
### #15: Selector Screenshot Not Implemented
|
||||
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **Site** | Any site |
|
||||
| **Symptom** | `browser_screenshot(selector="h1")` takes full viewport instead of element |
|
||||
| **Root Cause** | `selector` param existed in signature but was silently ignored in both `bridge.py` and `inspection.py` |
|
||||
| **Detection** | Screenshot with selector same byte size as screenshot without selector |
|
||||
| **Fix** | Use CDP `Runtime.evaluate` to call `getBoundingClientRect()` on the element, pass result as `clip` to `Page.captureScreenshot` |
|
||||
| **Code** | `bridge.py:1315-1344` - selector clip logic; `inspection.py:94-96` - pass selector to bridge |
|
||||
| **Verified** | 2026-04-03 ✓ (JS rect query returns correct viewport coords; requires server restart) |
|
||||
|
||||
### #16: Stale Browser Context (Group ID Mismatch)
|
||||
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **Site** | Any |
|
||||
| **Symptom** | `browser_open()` returns `"No group with id: XXXXXXX"` even though `browser_status` shows `running: true` |
|
||||
| **Root Cause** | In-memory `_contexts` dict has a stale `groupId` from a Chrome tab group that was closed outside the tool (e.g. user closed the tab group) |
|
||||
| **Detection** | `browser_status` returns `running: true` but `browser_open` fails with "No group with id" |
|
||||
| **Fix** | Call `browser_stop()` to clear stale context from `_contexts`, then `browser_start()` again |
|
||||
| **Code** | `tools/lifecycle.py:144-160` - `already_running` check uses cached dict without validating against Chrome |
|
||||
| **Verified** | 2026-04-03 ✓ |
|
||||
|
||||
---
|
||||
|
||||
## How to Add New Edge Cases
|
||||
|
||||
1. **Reproduce** the issue with minimal test case
|
||||
2. **Document** using the template below
|
||||
3. **Implement** fix with multi-layer fallback
|
||||
4. **Verify** against both problematic and simple sites
|
||||
5. **Submit** by appending to this file
|
||||
|
||||
### Template
|
||||
|
||||
```markdown
|
||||
### #N: [Short Title]
|
||||
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **Site** | [URL or site type] |
|
||||
| **Symptom** | [What the user observes] |
|
||||
| **Root Cause** | [Technical explanation] |
|
||||
| **Detection** | [JavaScript to detect this case] |
|
||||
| **Fix** | [Solution approach] |
|
||||
| **Code** | [File:line reference if implemented] |
|
||||
| **Verified** | [Date or "pending"] |
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Statistics
|
||||
|
||||
| Category | Count |
|
||||
|----------|-------|
|
||||
| Scroll Issues | 3 |
|
||||
| Click Issues | 3 |
|
||||
| Input Issues | 3 |
|
||||
| Snapshot Issues | 3 |
|
||||
| Navigation Issues | 2 |
|
||||
| Screenshot Issues | 2 |
|
||||
| **Total** | **16** |
|
||||
|
||||
Last updated: 2026-04-03
|
||||
@@ -0,0 +1,111 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Test #2: Twitter/X Lazy Loading Scroll
|
||||
|
||||
Symptom: Infinite scroll doesn't load new content
|
||||
Root Cause: Lazy loading requires content to be visible before loading more
|
||||
Fix: Add wait_for_selector between scroll calls
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent / "tools" / "src"))
|
||||
|
||||
from gcu.browser.bridge import BeelineBridge
|
||||
|
||||
BRIDGE_PORT = 9229
|
||||
CONTEXT_NAME = "twitter-scroll-test"
|
||||
|
||||
|
||||
async def test_twitter_lazy_scroll():
|
||||
"""Test that repeated scrolls with waits load new content."""
|
||||
print("=" * 70)
|
||||
print("TEST #2: Twitter/X Lazy Loading Scroll")
|
||||
print("=" * 70)
|
||||
|
||||
bridge = BeelineBridge()
|
||||
|
||||
try:
|
||||
await bridge.start()
|
||||
|
||||
for i in range(10):
|
||||
await asyncio.sleep(1)
|
||||
if bridge.is_connected:
|
||||
print("✓ Extension connected!")
|
||||
break
|
||||
print(f"Waiting for extension... ({i+1}/10)")
|
||||
else:
|
||||
print("✗ Extension not connected")
|
||||
return
|
||||
|
||||
context = await bridge.create_context(CONTEXT_NAME)
|
||||
tab_id = context.get("tabId")
|
||||
group_id = context.get("groupId")
|
||||
print(f"✓ Created tab: {tab_id}")
|
||||
|
||||
# Navigate to Twitter/X
|
||||
print("\n--- Navigating to X.com ---")
|
||||
await bridge.navigate(tab_id, "https://x.com", wait_until="networkidle", timeout_ms=30000)
|
||||
print("✓ Page loaded")
|
||||
|
||||
# Wait for tweets to appear
|
||||
print("\n--- Waiting for tweets ---")
|
||||
await bridge.wait_for_selector(tab_id, '[data-testid="tweet"]', timeout_ms=10000)
|
||||
|
||||
# Count initial tweets
|
||||
initial_count = await bridge.evaluate(
|
||||
tab_id,
|
||||
'(function() { return document.querySelectorAll(\'[data-testid="tweet"]\').length; })()'
|
||||
)
|
||||
print(f"Initial tweet count: {initial_count.get('result', 0)}")
|
||||
|
||||
# Take screenshot of initial state
|
||||
screenshot = await bridge.screenshot(tab_id)
|
||||
print(f"Screenshot: {len(screenshot.get('data', ''))} bytes")
|
||||
|
||||
# Scroll multiple times with waits
|
||||
print("\n--- Scrolling with waits ---")
|
||||
for i in range(3):
|
||||
result = await bridge.scroll(tab_id, "down", 500)
|
||||
print(f" Scroll {i+1}: {result.get('method', 'unknown')} method")
|
||||
|
||||
# Wait for new content to load
|
||||
await asyncio.sleep(2)
|
||||
|
||||
# Count tweets after scroll
|
||||
count_result = await bridge.evaluate(
|
||||
tab_id,
|
||||
'(function() { return document.querySelectorAll(\'[data-testid="tweet"]\').length; })()'
|
||||
)
|
||||
count = count_result.get('result', 0)
|
||||
print(f" Tweet count after scroll: {count}")
|
||||
|
||||
# Final count
|
||||
final_count = await bridge.evaluate(
|
||||
tab_id,
|
||||
'(function() { return document.querySelectorAll(\'[data-testid="tweet"]\').length; })()'
|
||||
)
|
||||
final = final_count.get('result', 0)
|
||||
initial = initial_count.get('result', 0)
|
||||
|
||||
print(f"\n--- Results ---")
|
||||
print(f"Initial tweets: {initial}")
|
||||
print(f"Final tweets: {final}")
|
||||
|
||||
if final > initial:
|
||||
print(f"✓ PASS: Loaded {final - initial} new tweets")
|
||||
else:
|
||||
print("✗ FAIL: No new tweets loaded (may need login)")
|
||||
|
||||
await bridge.destroy_context(group_id)
|
||||
print("\n✓ Context destroyed")
|
||||
|
||||
finally:
|
||||
await bridge.stop()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(test_twitter_lazy_scroll())
|
||||
@@ -0,0 +1,97 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Test #3: Modal/Dialog Scroll Container
|
||||
|
||||
Symptom: Scroll scrolls background page, not modal content
|
||||
Root Cause: Modal has its own scroll container with overflow: scroll
|
||||
Fix: Find visible modal container (highest z-index scrollable), scroll that
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent / "tools" / "src"))
|
||||
|
||||
from gcu.browser.bridge import BeelineBridge
|
||||
|
||||
BRIDGE_PORT = 9229
|
||||
CONTEXT_NAME = "modal-scroll-test"
|
||||
|
||||
# Test site with modal - using a demo site
|
||||
MODAL_DEMO_URL = "https://www.w3schools.com/howto/howto_css_modals.asp"
|
||||
|
||||
|
||||
async def test_modal_scroll():
|
||||
"""Test that scroll targets modal content, not background."""
|
||||
print("=" * 70)
|
||||
print("TEST #3: Modal/Dialog Scroll Container")
|
||||
print("=" * 70)
|
||||
|
||||
bridge = BeelineBridge()
|
||||
|
||||
try:
|
||||
await bridge.start()
|
||||
|
||||
for i in range(10):
|
||||
await asyncio.sleep(1)
|
||||
if bridge.is_connected:
|
||||
print("✓ Extension connected!")
|
||||
break
|
||||
else:
|
||||
print("✗ Extension not connected")
|
||||
return
|
||||
|
||||
context = await bridge.create_context(CONTEXT_NAME)
|
||||
tab_id = context.get("tabId")
|
||||
group_id = context.get("groupId")
|
||||
print(f"✓ Created tab: {tab_id}")
|
||||
|
||||
# Navigate to modal demo
|
||||
print("\n--- Navigating to modal demo ---")
|
||||
await bridge.navigate(tab_id, MODAL_DEMO_URL, wait_until="load")
|
||||
print("✓ Page loaded")
|
||||
|
||||
# Take screenshot before
|
||||
screenshot_before = await bridge.screenshot(tab_id)
|
||||
print(f"Screenshot before: {len(screenshot_before.get('data', ''))} bytes")
|
||||
|
||||
# Click button to open modal
|
||||
print("\n--- Opening modal ---")
|
||||
# Find and click the "Open Modal" button
|
||||
result = await bridge.click(tab_id, '.ws-btn', timeout_ms=5000)
|
||||
print(f"Click result: {result}")
|
||||
|
||||
await asyncio.sleep(1)
|
||||
|
||||
# Take screenshot with modal open
|
||||
screenshot_modal = await bridge.screenshot(tab_id)
|
||||
print(f"Screenshot modal open: {len(screenshot_modal.get('data', ''))} bytes")
|
||||
|
||||
# Try to scroll within modal
|
||||
print("\n--- Scrolling modal content ---")
|
||||
result = await bridge.scroll(tab_id, "down", 100)
|
||||
print(f"Scroll result: {result}")
|
||||
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
# Take screenshot after scroll
|
||||
screenshot_after = await bridge.screenshot(tab_id)
|
||||
print(f"Screenshot after scroll: {len(screenshot_after.get('data', ''))} bytes")
|
||||
|
||||
# Check if modal content scrolled (not background)
|
||||
# This is a visual check - we can verify by comparing screenshots
|
||||
print("\n--- Results ---")
|
||||
print(f"Modal scroll test completed. Method used: {result.get('method', 'unknown')}")
|
||||
print("Visual verification needed: Check if modal content scrolled vs background")
|
||||
|
||||
await bridge.destroy_context(group_id)
|
||||
print("\n✓ Context destroyed")
|
||||
|
||||
finally:
|
||||
await bridge.stop()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(test_modal_scroll())
|
||||
@@ -0,0 +1,123 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Test #4: Element Covered by Overlay
|
||||
|
||||
Symptom: Click succeeds but no action triggered
|
||||
Root Cause: Element is covered by transparent overlay, tooltip, or iframe
|
||||
Detection: document.elementFromPoint(x, y) !== target
|
||||
Fix: Wait for overlay to disappear, or use JavaScript element.click()
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent / "tools" / "src"))
|
||||
|
||||
from gcu.browser.bridge import BeelineBridge
|
||||
|
||||
CONTEXT_NAME = "overlay-click-test"
|
||||
|
||||
|
||||
async def test_overlay_click():
|
||||
"""Test clicking elements that are covered by overlays."""
|
||||
print("=" * 70)
|
||||
print("TEST #4: Element Covered by Overlay")
|
||||
print("=" * 70)
|
||||
|
||||
bridge = BeelineBridge()
|
||||
|
||||
try:
|
||||
await bridge.start()
|
||||
|
||||
for i in range(10):
|
||||
await asyncio.sleep(1)
|
||||
if bridge.is_connected:
|
||||
print("✓ Extension connected!")
|
||||
break
|
||||
else:
|
||||
print("✗ Extension not connected")
|
||||
return
|
||||
|
||||
context = await bridge.create_context(CONTEXT_NAME)
|
||||
tab_id = context.get("tabId")
|
||||
group_id = context.get("groupId")
|
||||
print(f"✓ Created tab: {tab_id}")
|
||||
|
||||
# Create a test page with overlay
|
||||
print("\n--- Creating test page with overlay ---")
|
||||
test_html = """
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head><title>Overlay Test</title></head>
|
||||
<body>
|
||||
<button id="target-btn" onclick="alert('Clicked!')">Click Me</button>
|
||||
<div id="overlay" style="position:fixed;top:0;left:0;width:100%;height:100%;background:rgba(0,0,0,0.3);z-index:1000;"></div>
|
||||
<script>
|
||||
window.clickCount = 0;
|
||||
document.getElementById('target-btn').addEventListener('click', () => {
|
||||
window.clickCount++;
|
||||
});
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
# Navigate to data URL
|
||||
import base64
|
||||
data_url = f"data:text/html;base64,{base64.b64encode(test_html.encode()).decode()}"
|
||||
await bridge.navigate(tab_id, data_url, wait_until="load")
|
||||
|
||||
# Screenshot before
|
||||
screenshot = await bridge.screenshot(tab_id)
|
||||
print(f"Screenshot: {len(screenshot.get('data', ''))} bytes")
|
||||
|
||||
# Try to click the covered button
|
||||
print("\n--- Attempting to click covered button ---")
|
||||
|
||||
# First, check if element is covered
|
||||
coverage_check = await bridge.evaluate(
|
||||
tab_id,
|
||||
"""
|
||||
(function() {
|
||||
const btn = document.getElementById('target-btn');
|
||||
const rect = btn.getBoundingClientRect();
|
||||
const centerX = rect.left + rect.width / 2;
|
||||
const centerY = rect.top + rect.height / 2;
|
||||
const topElement = document.elementFromPoint(centerX, centerY);
|
||||
return {
|
||||
isCovered: topElement !== btn && !btn.contains(topElement),
|
||||
topElement: topElement?.tagName,
|
||||
targetElement: btn.tagName
|
||||
};
|
||||
})();
|
||||
"""
|
||||
)
|
||||
print(f"Coverage check: {coverage_check.get('result', {})}")
|
||||
|
||||
# Try CDP click (may fail due to overlay)
|
||||
click_result = await bridge.click(tab_id, "#target-btn", timeout_ms=5000)
|
||||
print(f"Click result: {click_result}")
|
||||
|
||||
# Check if click registered
|
||||
count_result = await bridge.evaluate(
|
||||
tab_id,
|
||||
"(function() { return window.clickCount; })()"
|
||||
)
|
||||
count = count_result.get("result", 0)
|
||||
print(f"Click count after CDP click: {count}")
|
||||
|
||||
if count > 0:
|
||||
print("✓ PASS: JavaScript click penetrated overlay")
|
||||
else:
|
||||
print("✗ FAIL: Click did not reach button (overlay blocked it)")
|
||||
|
||||
await bridge.destroy_context(group_id)
|
||||
print("\n✓ Context destroyed")
|
||||
|
||||
finally:
|
||||
await bridge.stop()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(test_overlay_click())
|
||||
@@ -0,0 +1,154 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Test #6: Shadow DOM Elements
|
||||
|
||||
Symptom: querySelector can't find element
|
||||
Root Cause: Element is inside a shadow root, not main DOM tree
|
||||
Detection: element.shadowRoot !== null on parent elements
|
||||
Fix: Use piercing selector (host >>> target) or traverse shadow roots
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
import base64
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent / "tools" / "src"))
|
||||
|
||||
from gcu.browser.bridge import BeelineBridge
|
||||
|
||||
CONTEXT_NAME = "shadow-dom-test"
|
||||
|
||||
|
||||
async def test_shadow_dom():
|
||||
"""Test clicking elements inside Shadow DOM."""
|
||||
print("=" * 70)
|
||||
print("TEST #6: Shadow DOM Elements")
|
||||
print("=" * 70)
|
||||
|
||||
bridge = BeelineBridge()
|
||||
|
||||
try:
|
||||
await bridge.start()
|
||||
|
||||
for i in range(10):
|
||||
await asyncio.sleep(1)
|
||||
if bridge.is_connected:
|
||||
print("✓ Extension connected!")
|
||||
break
|
||||
else:
|
||||
print("✗ Extension not connected")
|
||||
return
|
||||
|
||||
context = await bridge.create_context(CONTEXT_NAME)
|
||||
tab_id = context.get("tabId")
|
||||
group_id = context.get("groupId")
|
||||
print(f"✓ Created tab: {tab_id}")
|
||||
|
||||
# Create test page with Shadow DOM
|
||||
print("\n--- Creating test page with Shadow DOM ---")
|
||||
test_html = """
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head><title>Shadow DOM Test</title></head>
|
||||
<body>
|
||||
<div id="shadow-host"></div>
|
||||
<script>
|
||||
const host = document.getElementById('shadow-host');
|
||||
const shadow = host.attachShadow({ mode: 'open' });
|
||||
shadow.innerHTML = `
|
||||
<style>
|
||||
button { padding: 10px 20px; font-size: 16px; }
|
||||
</style>
|
||||
<button id="shadow-btn">Shadow Button</button>
|
||||
`;
|
||||
shadow.getElementById('shadow-btn').addEventListener('click', () => {
|
||||
window.shadowClickCount = (window.shadowClickCount || 0) + 1;
|
||||
console.log('Shadow button clicked:', window.shadowClickCount);
|
||||
});
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
# Write to file and use file:// URL (data: URLs don't work well with extension)
|
||||
test_file = Path("/tmp/shadow_dom_test.html")
|
||||
test_file.write_text(test_html.strip())
|
||||
file_url = f"file://{test_file}"
|
||||
await bridge.navigate(tab_id, file_url, wait_until="load")
|
||||
print("✓ Page loaded")
|
||||
|
||||
# Screenshot
|
||||
screenshot = await bridge.screenshot(tab_id)
|
||||
print(f"Screenshot: {len(screenshot.get('data', ''))} bytes")
|
||||
|
||||
# Detect Shadow DOM
|
||||
print("\n--- Detecting Shadow DOM ---")
|
||||
detection = await bridge.evaluate(
|
||||
tab_id,
|
||||
"""
|
||||
(function() {
|
||||
const hosts = [];
|
||||
document.querySelectorAll('*').forEach(el => {
|
||||
if (el.shadowRoot) {
|
||||
hosts.push({
|
||||
tag: el.tagName,
|
||||
id: el.id,
|
||||
hasButton: el.shadowRoot.querySelector('button') !== null
|
||||
});
|
||||
}
|
||||
});
|
||||
return { count: hosts.length, hosts };
|
||||
})();
|
||||
"""
|
||||
)
|
||||
print(f"Shadow DOM detection: {detection.get('result', {})}")
|
||||
|
||||
# Try to click shadow button using regular selector (should fail)
|
||||
print("\n--- Attempting click with regular selector ---")
|
||||
try:
|
||||
result = await bridge.click(tab_id, "#shadow-btn", timeout_ms=3000)
|
||||
print(f"Result: {result}")
|
||||
except Exception as e:
|
||||
print(f"Expected failure: {e}")
|
||||
|
||||
# Try to click using JavaScript that pierces shadow DOM
|
||||
print("\n--- Clicking via JavaScript shadow piercing ---")
|
||||
click_result = await bridge.evaluate(
|
||||
tab_id,
|
||||
"""
|
||||
(function() {
|
||||
const host = document.getElementById('shadow-host');
|
||||
const btn = host.shadowRoot.getElementById('shadow-btn');
|
||||
if (btn) {
|
||||
btn.click();
|
||||
return { success: true, clicked: 'shadow-btn' };
|
||||
}
|
||||
return { success: false, error: 'Button not found' };
|
||||
})();
|
||||
"""
|
||||
)
|
||||
print(f"JS click result: {click_result.get('result', {})}")
|
||||
|
||||
# Verify click was registered
|
||||
count_result = await bridge.evaluate(
|
||||
tab_id,
|
||||
"(function() { return window.shadowClickCount || 0; })()"
|
||||
)
|
||||
count = count_result.get("result") or 0
|
||||
print(f"Shadow click count: {count}")
|
||||
|
||||
if count and count > 0:
|
||||
print("✓ PASS: Shadow DOM element clicked successfully")
|
||||
else:
|
||||
print("✗ FAIL: Could not click Shadow DOM element")
|
||||
|
||||
await bridge.destroy_context(group_id)
|
||||
print("\n✓ Context destroyed")
|
||||
|
||||
finally:
|
||||
await bridge.stop()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(test_shadow_dom())
|
||||
@@ -0,0 +1,178 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Test #7: ContentEditable / Rich Text Editors
|
||||
|
||||
Symptom: browser_type() doesn't insert text
|
||||
Root Cause: Element is contenteditable, not an <input> or <textarea>
|
||||
Detection: element.contentEditable === 'true'
|
||||
Fix: Focus via JavaScript, use execCommand('insertText') or Input.dispatchKeyEvent
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
import base64
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent / "tools" / "src"))
|
||||
|
||||
from gcu.browser.bridge import BeelineBridge
|
||||
|
||||
CONTEXT_NAME = "contenteditable-test"
|
||||
|
||||
|
||||
async def test_contenteditable():
|
||||
"""Test typing into contenteditable elements."""
|
||||
print("=" * 70)
|
||||
print("TEST #7: ContentEditable / Rich Text Editors")
|
||||
print("=" * 70)
|
||||
|
||||
bridge = BeelineBridge()
|
||||
|
||||
try:
|
||||
await bridge.start()
|
||||
|
||||
for i in range(10):
|
||||
await asyncio.sleep(1)
|
||||
if bridge.is_connected:
|
||||
print("✓ Extension connected!")
|
||||
break
|
||||
else:
|
||||
print("✗ Extension not connected")
|
||||
return
|
||||
|
||||
context = await bridge.create_context(CONTEXT_NAME)
|
||||
tab_id = context.get("tabId")
|
||||
group_id = context.get("groupId")
|
||||
print(f"✓ Created tab: {tab_id}")
|
||||
|
||||
# Create test page with contenteditable
|
||||
test_html = """
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head><title>ContentEditable Test</title></head>
|
||||
<body>
|
||||
<h2>ContentEditable Test</h2>
|
||||
|
||||
<h3>1. Simple contenteditable div</h3>
|
||||
<div id="editor1" contenteditable="true" style="border:1px solid #ccc;padding:10px;min-height:50px;">Start text</div>
|
||||
|
||||
<h3>2. Rich text editor (like Notion)</h3>
|
||||
<div id="editor2" contenteditable="true" style="border:1px solid #ccc;padding:10px;min-height:50px;">
|
||||
<p>Type here...</p>
|
||||
</div>
|
||||
|
||||
<h3>3. Regular input (for comparison)</h3>
|
||||
<input id="input1" type="text" placeholder="Regular input" />
|
||||
|
||||
<script>
|
||||
// Track content changes
|
||||
window.editor1Content = '';
|
||||
window.editor2Content = '';
|
||||
|
||||
document.getElementById('editor1').addEventListener('input', (e) => {
|
||||
window.editor1Content = e.target.innerText;
|
||||
});
|
||||
document.getElementById('editor2').addEventListener('input', (e) => {
|
||||
window.editor2Content = e.target.innerText;
|
||||
});
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
# Write to file and use file:// URL (data: URLs don't work well with extension)
|
||||
test_file = Path("/tmp/contenteditable_test.html")
|
||||
test_file.write_text(test_html.strip())
|
||||
file_url = f"file://{test_file}"
|
||||
await bridge.navigate(tab_id, file_url, wait_until="load")
|
||||
print("✓ Page loaded")
|
||||
|
||||
# Screenshot with timeout protection
|
||||
try:
|
||||
screenshot = await asyncio.wait_for(bridge.screenshot(tab_id), timeout=10.0)
|
||||
print(f"Screenshot: {len(screenshot.get('data', ''))} bytes")
|
||||
except asyncio.TimeoutError:
|
||||
print("Screenshot timed out (skipping)")
|
||||
|
||||
# Detect contenteditable
|
||||
print("\n--- Detecting contenteditable elements ---")
|
||||
detection = await bridge.evaluate(
|
||||
tab_id,
|
||||
"""
|
||||
(function() {
|
||||
const editables = document.querySelectorAll('[contenteditable="true"]');
|
||||
return {
|
||||
count: editables.length,
|
||||
ids: Array.from(editables).map(el => el.id)
|
||||
};
|
||||
})();
|
||||
"""
|
||||
)
|
||||
print(f"Contenteditable detection: {detection.get('result', {})}")
|
||||
|
||||
# Test 1: Type into regular input (baseline)
|
||||
print("\n--- Test 1: Regular input ---")
|
||||
await bridge.click(tab_id, "#input1")
|
||||
await bridge.type_text(tab_id, "#input1", "Hello input")
|
||||
input_result = await bridge.evaluate(
|
||||
tab_id,
|
||||
"(function() { return document.getElementById('input1').value; })()"
|
||||
)
|
||||
print(f"Input value: {input_result.get('result', '')}")
|
||||
|
||||
# Test 2: Type into contenteditable div
|
||||
print("\n--- Test 2: Contenteditable div ---")
|
||||
await bridge.click(tab_id, "#editor1")
|
||||
await bridge.type_text(tab_id, "#editor1", "Hello contenteditable", clear_first=True)
|
||||
editor_result = await bridge.evaluate(
|
||||
tab_id,
|
||||
"(function() { return document.getElementById('editor1').innerText; })()"
|
||||
)
|
||||
print(f"Editor1 innerText: {editor_result.get('result', '')}")
|
||||
|
||||
# Test 3: Use JavaScript insertText for rich editor
|
||||
print("\n--- Test 3: JavaScript insertText for rich editor ---")
|
||||
insert_result = await bridge.evaluate(
|
||||
tab_id,
|
||||
"""
|
||||
(function() {
|
||||
const editor = document.getElementById('editor2');
|
||||
editor.focus();
|
||||
document.execCommand('selectAll', false, null);
|
||||
document.execCommand('insertText', false, 'Hello from execCommand');
|
||||
return editor.innerText;
|
||||
})();
|
||||
"""
|
||||
)
|
||||
print(f"Editor2 after execCommand: {insert_result.get('result', '')}")
|
||||
|
||||
# Screenshot after with timeout protection
|
||||
try:
|
||||
screenshot_after = await asyncio.wait_for(bridge.screenshot(tab_id), timeout=10.0)
|
||||
print(f"Screenshot after: {len(screenshot_after.get('data', ''))} bytes")
|
||||
except asyncio.TimeoutError:
|
||||
print("Screenshot after timed out (skipping)")
|
||||
|
||||
# Results
|
||||
print("\n--- Results ---")
|
||||
input_val = input_result.get("result", "")
|
||||
editor1_val = editor_result.get("result", "")
|
||||
editor2_val = insert_result.get("result", "")
|
||||
|
||||
input_pass = "Hello input" in input_val
|
||||
editor1_pass = "Hello contenteditable" in editor1_val
|
||||
editor2_pass = "execCommand" in editor2_val
|
||||
|
||||
print(f"Input: {'✓ PASS' if input_pass else '✗ FAIL'} - {input_val}")
|
||||
print(f"Editor1: {'✓ PASS' if editor1_pass else '✗ FAIL'} - {editor1_val}")
|
||||
print(f"Editor2: {'✓ PASS' if editor2_pass else '✗ FAIL'} - {editor2_val}")
|
||||
|
||||
await bridge.destroy_context(group_id)
|
||||
print("\n✓ Context destroyed")
|
||||
|
||||
finally:
|
||||
await bridge.stop()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(test_contenteditable())
|
||||
@@ -0,0 +1,233 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Test #8: Autocomplete Field Clearing
|
||||
|
||||
Symptom: Typed text gets cleared immediately
|
||||
Root Cause: Field expects realistic keystroke timing for autocomplete
|
||||
Detection: Field has autocomplete listeners or dropdown appears
|
||||
Fix: Add delay_ms between keystrokes
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
import base64
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent / "tools" / "src"))
|
||||
|
||||
from gcu.browser.bridge import BeelineBridge
|
||||
|
||||
CONTEXT_NAME = "autocomplete-test"
|
||||
|
||||
|
||||
async def test_autocomplete():
|
||||
"""Test typing into fields with autocomplete behavior."""
|
||||
print("=" * 70)
|
||||
print("TEST #8: Autocomplete Field Clearing")
|
||||
print("=" * 70)
|
||||
|
||||
bridge = BeelineBridge()
|
||||
|
||||
try:
|
||||
await bridge.start()
|
||||
|
||||
for i in range(10):
|
||||
await asyncio.sleep(1)
|
||||
if bridge.is_connected:
|
||||
print("✓ Extension connected!")
|
||||
break
|
||||
else:
|
||||
print("✗ Extension not connected")
|
||||
return
|
||||
|
||||
context = await bridge.create_context(CONTEXT_NAME)
|
||||
tab_id = context.get("tabId")
|
||||
group_id = context.get("groupId")
|
||||
print(f"✓ Created tab: {tab_id}")
|
||||
|
||||
# Create test page with autocomplete behavior
|
||||
test_html = """
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head><title>Autocomplete Test</title>
|
||||
<style>
|
||||
.autocomplete-items {
|
||||
position: absolute;
|
||||
border: 1px solid #d4d4d4;
|
||||
border-top: none;
|
||||
z-index: 99;
|
||||
top: 100%;
|
||||
left: 0;
|
||||
right: 0;
|
||||
max-height: 200px;
|
||||
overflow-y: auto;
|
||||
background: white;
|
||||
}
|
||||
.autocomplete-items div {
|
||||
padding: 10px;
|
||||
cursor: pointer;
|
||||
}
|
||||
.autocomplete-items div:hover {
|
||||
background-color: #e9e9e9;
|
||||
}
|
||||
.autocomplete-active {
|
||||
background-color: DodgerBlue !important;
|
||||
color: white;
|
||||
}
|
||||
.autocomplete { position: relative; display: inline-block; }
|
||||
input { width: 300px; padding: 10px; font-size: 16px; }
|
||||
</style></head>
|
||||
<body>
|
||||
<h2>Autocomplete Test</h2>
|
||||
|
||||
<div class="autocomplete">
|
||||
<input id="search" type="text" placeholder="Search countries..." autocomplete="off">
|
||||
</div>
|
||||
|
||||
<div id="log" style="margin-top:20px;font-family:monospace;"></div>
|
||||
|
||||
<script>
|
||||
const countries = ["Afghanistan","Albania","Algeria","Andorra","Angola","Argentina","Armenia","Australia","Austria","Azerbaijan","Bahamas","Bahrain","Bangladesh","Belarus","Belgium","Belize","Benin","Bhutan","Bolivia","Brazil","Canada","China","Colombia","Denmark","Egypt","France","Germany","India","Indonesia","Italy","Japan","Mexico","Netherlands","Nigeria","Norway","Pakistan","Peru","Philippines","Poland","Portugal","Russia","Spain","Sweden","Switzerland","Thailand","Turkey","Ukraine","United Kingdom","United States","Vietnam"];
|
||||
|
||||
const input = document.getElementById('search');
|
||||
const log = document.getElementById('log');
|
||||
let currentFocus = -1;
|
||||
let typingTimeout = null;
|
||||
|
||||
// Track events for testing
|
||||
window.inputEvents = [];
|
||||
window.inputValue = '';
|
||||
|
||||
function logEvent(type, value) {
|
||||
window.inputEvents.push({ type, value, time: Date.now() });
|
||||
const entry = document.createElement('div');
|
||||
entry.textContent = type + ': ' + value;
|
||||
log.insertBefore(entry, log.firstChild);
|
||||
}
|
||||
|
||||
// Simulate autocomplete that clears fast typing
|
||||
input.addEventListener('input', function(e) {
|
||||
const val = this.value;
|
||||
|
||||
// Clear previous dropdown
|
||||
closeAllLists();
|
||||
|
||||
if (!val) return;
|
||||
|
||||
// If typing too fast (autocomplete-style), clear and restart
|
||||
clearTimeout(typingTimeout);
|
||||
typingTimeout = setTimeout(() => {
|
||||
logEvent('input', val);
|
||||
window.inputValue = val;
|
||||
|
||||
// Create dropdown
|
||||
const div = document.createElement('div');
|
||||
div.setAttribute('id', this.id + 'autocomplete-list');
|
||||
div.setAttribute('class', 'autocomplete-items');
|
||||
this.parentNode.appendChild(div);
|
||||
|
||||
countries.filter(c => c.substr(0, val.length).toUpperCase() === val.toUpperCase())
|
||||
.slice(0, 5)
|
||||
.forEach(country => {
|
||||
const item = document.createElement('div');
|
||||
item.innerHTML = '<strong>' + country.substr(0, val.length) + '</strong>' + country.substr(val.length);
|
||||
item.addEventListener('click', function() {
|
||||
input.value = country;
|
||||
closeAllLists();
|
||||
logEvent('select', country);
|
||||
window.inputValue = country;
|
||||
});
|
||||
div.appendChild(item);
|
||||
});
|
||||
}, 100); // 100ms debounce
|
||||
});
|
||||
|
||||
function closeAllLists() {
|
||||
document.querySelectorAll('.autocomplete-items').forEach(el => el.remove());
|
||||
}
|
||||
|
||||
document.addEventListener('click', function() {
|
||||
closeAllLists();
|
||||
});
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
# Write to file and use file:// URL (data: URLs don't work well with extension)
|
||||
test_file = Path("/tmp/autocomplete_test.html")
|
||||
test_file.write_text(test_html.strip())
|
||||
file_url = f"file://{test_file}"
|
||||
await bridge.navigate(tab_id, file_url, wait_until="load")
|
||||
print("✓ Page loaded")
|
||||
|
||||
# Screenshot
|
||||
screenshot = await bridge.screenshot(tab_id)
|
||||
print(f"Screenshot: {len(screenshot.get('data', ''))} bytes")
|
||||
|
||||
# Test 1: Fast typing (no delay) - may fail
|
||||
print("\n--- Test 1: Fast typing (delay_ms=0) ---")
|
||||
await bridge.click(tab_id, "#search")
|
||||
await bridge.type_text(tab_id, "#search", "Ger", clear_first=True, delay_ms=0)
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
fast_result = await bridge.evaluate(
|
||||
tab_id,
|
||||
"(function() { return document.getElementById('search').value; })()"
|
||||
)
|
||||
fast_value = fast_result.get("result", "")
|
||||
print(f"Value after fast typing: '{fast_value}'")
|
||||
|
||||
# Check events
|
||||
events_result = await bridge.evaluate(
|
||||
tab_id,
|
||||
"(function() { return window.inputEvents; })()"
|
||||
)
|
||||
print(f"Events logged: {events_result.get('result', [])}")
|
||||
|
||||
# Test 2: Slow typing (with delay) - should work
|
||||
print("\n--- Test 2: Slow typing (delay_ms=100) ---")
|
||||
await bridge.click(tab_id, "#search")
|
||||
await bridge.type_text(tab_id, "#search", "United", clear_first=True, delay_ms=100)
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
slow_result = await bridge.evaluate(
|
||||
tab_id,
|
||||
"(function() { return document.getElementById('search').value; })()"
|
||||
)
|
||||
slow_value = slow_result.get("result", "")
|
||||
print(f"Value after slow typing: '{slow_value}'")
|
||||
|
||||
# Check if dropdown appeared
|
||||
dropdown_result = await bridge.evaluate(
|
||||
tab_id,
|
||||
"(function() { return document.querySelectorAll('.autocomplete-items div').length; })()"
|
||||
)
|
||||
dropdown_count = dropdown_result.get("result", 0)
|
||||
print(f"Dropdown items: {dropdown_count}")
|
||||
|
||||
# Screenshot with dropdown
|
||||
screenshot_dropdown = await bridge.screenshot(tab_id)
|
||||
print(f"Screenshot with dropdown: {len(screenshot_dropdown.get('data', ''))} bytes")
|
||||
|
||||
# Results
|
||||
print("\n--- Results ---")
|
||||
if "United" in slow_value:
|
||||
print("✓ PASS: Slow typing with delay_ms worked")
|
||||
else:
|
||||
print("✗ FAIL: Slow typing still didn't work")
|
||||
|
||||
if dropdown_count > 0:
|
||||
print("✓ PASS: Autocomplete dropdown appeared")
|
||||
else:
|
||||
print("⚠ WARNING: No autocomplete dropdown")
|
||||
|
||||
await bridge.destroy_context(group_id)
|
||||
print("\n✓ Context destroyed")
|
||||
|
||||
finally:
|
||||
await bridge.stop()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(test_autocomplete())
|
||||
@@ -0,0 +1,162 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Test #10: LinkedIn Huge DOM Tree
|
||||
|
||||
Symptom: browser_snapshot() hangs forever
|
||||
Root Cause: 10k+ DOM nodes, accessibility tree has 50k+ nodes
|
||||
Detection: document.querySelectorAll('*').length > 5000
|
||||
Fix: Add timeout (10s default), truncate tree at 2000 nodes
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
import time
|
||||
import base64
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent / "tools" / "src"))
|
||||
|
||||
from gcu.browser.bridge import BeelineBridge
|
||||
|
||||
CONTEXT_NAME = "huge-dom-test"
|
||||
|
||||
|
||||
async def test_huge_dom():
|
||||
"""Test snapshot performance on huge DOM trees."""
|
||||
print("=" * 70)
|
||||
print("TEST #10: Huge DOM Tree (LinkedIn-style)")
|
||||
print("=" * 70)
|
||||
|
||||
bridge = BeelineBridge()
|
||||
|
||||
try:
|
||||
await bridge.start()
|
||||
|
||||
for i in range(10):
|
||||
await asyncio.sleep(1)
|
||||
if bridge.is_connected:
|
||||
print("✓ Extension connected!")
|
||||
break
|
||||
else:
|
||||
print("✗ Extension not connected")
|
||||
return
|
||||
|
||||
context = await bridge.create_context(CONTEXT_NAME)
|
||||
tab_id = context.get("tabId")
|
||||
group_id = context.get("groupId")
|
||||
print(f"✓ Created tab: {tab_id}")
|
||||
|
||||
# Test 1: Small DOM (baseline)
|
||||
print("\n--- Test 1: Small DOM (baseline) ---")
|
||||
small_html = """
|
||||
<!DOCTYPE html>
|
||||
<html><body>
|
||||
<h1>Small Page</h1>
|
||||
<p>A few elements</p>
|
||||
<button>Click me</button>
|
||||
</body></html>
|
||||
"""
|
||||
data_url = f"data:text/html;base64,{base64.b64encode(small_html.encode()).decode()}"
|
||||
await bridge.navigate(tab_id, data_url, wait_until="load")
|
||||
|
||||
start = time.perf_counter()
|
||||
snapshot = await bridge.snapshot(tab_id, timeout_s=5.0)
|
||||
elapsed = time.perf_counter() - start
|
||||
tree_len = len(snapshot.get("tree", ""))
|
||||
print(f"Small DOM snapshot: {elapsed:.3f}s, {tree_len} chars")
|
||||
|
||||
# Test 2: Generate huge DOM
|
||||
print("\n--- Test 2: Huge DOM (5000+ elements) ---")
|
||||
huge_html = """
|
||||
<!DOCTYPE html>
|
||||
<html><body>
|
||||
<h1>Huge DOM Test</h1>
|
||||
<div id="container"></div>
|
||||
<script>
|
||||
const container = document.getElementById('container');
|
||||
for (let i = 0; i < 5000; i++) {
|
||||
const div = document.createElement('div');
|
||||
div.className = 'item-' + i;
|
||||
div.innerHTML = '<span>Item ' + i + '</span><button>Action</button>';
|
||||
container.appendChild(div);
|
||||
}
|
||||
</script>
|
||||
</body></html>
|
||||
"""
|
||||
data_url = f"data:text/html;base64,{base64.b64encode(huge_html.encode()).decode()}"
|
||||
await bridge.navigate(tab_id, data_url, wait_until="load")
|
||||
|
||||
# Count elements
|
||||
count_result = await bridge.evaluate(
|
||||
tab_id,
|
||||
"(function() { return document.querySelectorAll('*').length; })()"
|
||||
)
|
||||
elem_count = count_result.get("result", 0)
|
||||
print(f"DOM elements: {elem_count}")
|
||||
|
||||
# Skip screenshot on huge DOM - it can timeout
|
||||
# Instead verify page loaded by checking DOM
|
||||
print("✓ Page verified (skipping screenshot on huge DOM)")
|
||||
|
||||
# Test snapshot with timeout
|
||||
print("\n--- Testing snapshot with 10s timeout ---")
|
||||
start = time.perf_counter()
|
||||
try:
|
||||
snapshot = await bridge.snapshot(tab_id, timeout_s=10.0)
|
||||
elapsed = time.perf_counter() - start
|
||||
tree_len = len(snapshot.get("tree", ""))
|
||||
truncated = "(truncated)" in snapshot.get("tree", "")
|
||||
print(f"✓ Huge DOM snapshot: {elapsed:.3f}s, {tree_len} chars, truncated={truncated}")
|
||||
|
||||
if elapsed < 5.0:
|
||||
print("✓ PASS: Snapshot completed quickly")
|
||||
else:
|
||||
print(f"⚠ WARNING: Snapshot took {elapsed:.1f}s")
|
||||
|
||||
if truncated:
|
||||
print("✓ PASS: Tree was truncated to prevent hang")
|
||||
else:
|
||||
print("⚠ WARNING: Tree not truncated (may need adjustment)")
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
print("✗ FAIL: Snapshot timed out (this shouldn't happen)")
|
||||
|
||||
# Test 3: Real LinkedIn
|
||||
print("\n--- Test 3: Real LinkedIn Feed ---")
|
||||
await bridge.navigate(tab_id, "https://www.linkedin.com/feed", wait_until="load", timeout_ms=30000)
|
||||
await asyncio.sleep(2)
|
||||
|
||||
count_result = await bridge.evaluate(
|
||||
tab_id,
|
||||
"(function() { return document.querySelectorAll('*').length; })()"
|
||||
)
|
||||
elem_count = count_result.get("result", 0)
|
||||
print(f"LinkedIn DOM elements: {elem_count}")
|
||||
|
||||
start = time.perf_counter()
|
||||
try:
|
||||
snapshot = await bridge.snapshot(tab_id, timeout_s=15.0)
|
||||
elapsed = time.perf_counter() - start
|
||||
tree_len = len(snapshot.get("tree", ""))
|
||||
truncated = "(truncated)" in snapshot.get("tree", "")
|
||||
print(f"LinkedIn snapshot: {elapsed:.3f}s, {tree_len} chars, truncated={truncated}")
|
||||
|
||||
if elapsed < 5.0:
|
||||
print("✓ PASS: LinkedIn snapshot fast enough")
|
||||
elif elapsed < 15.0:
|
||||
print("⚠ WARNING: LinkedIn snapshot slow but within timeout")
|
||||
else:
|
||||
print("✗ FAIL: LinkedIn snapshot too slow")
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
print("✗ FAIL: LinkedIn snapshot timed out")
|
||||
|
||||
await bridge.destroy_context(group_id)
|
||||
print("\n✓ Context destroyed")
|
||||
|
||||
finally:
|
||||
await bridge.stop()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(test_huge_dom())
|
||||
@@ -0,0 +1,187 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Test #13: SPA Navigation Events
|
||||
|
||||
Symptom: wait_until="load" fires before content ready
|
||||
Root Cause: SPA uses client-side routing, no full page load
|
||||
Detection: URL changes but load event already fired
|
||||
Fix: Use wait_until="networkidle" or wait_for_selector
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
import time
|
||||
import base64
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent / "tools" / "src"))
|
||||
|
||||
from gcu.browser.bridge import BeelineBridge
|
||||
|
||||
CONTEXT_NAME = "spa-nav-test"
|
||||
|
||||
|
||||
async def test_spa_navigation():
|
||||
"""Test navigation timing on SPA pages."""
|
||||
print("=" * 70)
|
||||
print("TEST #13: SPA Navigation Events")
|
||||
print("=" * 70)
|
||||
|
||||
bridge = BeelineBridge()
|
||||
|
||||
try:
|
||||
await bridge.start()
|
||||
|
||||
for i in range(10):
|
||||
await asyncio.sleep(1)
|
||||
if bridge.is_connected:
|
||||
print("✓ Extension connected!")
|
||||
break
|
||||
else:
|
||||
print("✗ Extension not connected")
|
||||
return
|
||||
|
||||
context = await bridge.create_context(CONTEXT_NAME)
|
||||
tab_id = context.get("tabId")
|
||||
group_id = context.get("groupId")
|
||||
print(f"✓ Created tab: {tab_id}")
|
||||
|
||||
# Create a test SPA
|
||||
spa_html = """
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>SPA Test</title>
|
||||
<style>
|
||||
nav a { margin-right: 10px; }
|
||||
.page { padding: 20px; border: 1px solid #ccc; margin-top: 10px; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<nav>
|
||||
<a href="#home" onclick="navigate('home')">Home</a>
|
||||
<a href="#about" onclick="navigate('about')">About</a>
|
||||
<a href="#contact" onclick="navigate('contact')">Contact</a>
|
||||
</nav>
|
||||
<div id="app" class="page">
|
||||
<h1>Loading...</h1>
|
||||
</div>
|
||||
<script>
|
||||
// Simulate SPA routing
|
||||
let currentPage = '';
|
||||
|
||||
async function navigate(page) {
|
||||
event.preventDefault();
|
||||
currentPage = page;
|
||||
|
||||
// Show loading state
|
||||
document.getElementById('app').innerHTML = '<h1>Loading...</h1>';
|
||||
|
||||
// Simulate async content loading (like real SPAs)
|
||||
await new Promise(r => setTimeout(r, 500));
|
||||
|
||||
// Render content
|
||||
const content = {
|
||||
home: '<h1>Home Page</h1><p>Welcome to the SPA!</p><button id="home-btn">Home Action</button>',
|
||||
about: '<h1>About Page</h1><p>This is a simulated SPA.</p><button id="about-btn">About Action</button>',
|
||||
contact: '<h1>Contact Page</h1><p>Contact us at test@example.com</p><button id="contact-btn">Contact Action</button>'
|
||||
};
|
||||
|
||||
document.getElementById('app').innerHTML = content[page] || '<h1>404</h1>';
|
||||
window.location.hash = page;
|
||||
}
|
||||
|
||||
// Initial load with delay (simulates SPA hydration)
|
||||
setTimeout(() => {
|
||||
navigate('home');
|
||||
}, 1000);
|
||||
|
||||
// Track for testing
|
||||
window.pageLoads = [];
|
||||
window.addEventListener('hashchange', () => {
|
||||
window.pageLoads.push(window.location.hash);
|
||||
});
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
# Write to file and use file:// URL (data: URLs don't work well with extension)
|
||||
test_file = Path("/tmp/spa_test.html")
|
||||
test_file.write_text(spa_html.strip())
|
||||
file_url = f"file://{test_file}"
|
||||
|
||||
# Test 1: wait_until="load" - may fire before content ready
|
||||
print("\n--- Test 1: wait_until='load' ---")
|
||||
start = time.perf_counter()
|
||||
await bridge.navigate(tab_id, file_url, wait_until="load")
|
||||
elapsed = time.perf_counter() - start
|
||||
print(f"Navigation completed in {elapsed:.3f}s")
|
||||
|
||||
# Check content immediately
|
||||
content = await bridge.evaluate(
|
||||
tab_id,
|
||||
"(function() { return document.getElementById('app').innerText; })()"
|
||||
)
|
||||
print(f"Content immediately after load: '{content.get('result', '')}'")
|
||||
|
||||
# Screenshot
|
||||
screenshot = await bridge.screenshot(tab_id)
|
||||
print(f"Screenshot: {len(screenshot.get('data', ''))} bytes")
|
||||
|
||||
# Wait for content
|
||||
print("\n--- Waiting for content to hydrate ---")
|
||||
await bridge.wait_for_selector(tab_id, "#home-btn", timeout_ms=5000)
|
||||
print("✓ Content loaded")
|
||||
|
||||
# Check content after wait
|
||||
content_after = await bridge.evaluate(
|
||||
tab_id,
|
||||
"(function() { return document.getElementById('app').innerText; })()"
|
||||
)
|
||||
print(f"Content after wait: '{content_after.get('result', '')}'")
|
||||
|
||||
# Test 2: SPA navigation (no full page load)
|
||||
print("\n--- Test 2: SPA client-side navigation ---")
|
||||
|
||||
# Click "About" link
|
||||
await bridge.click(tab_id, 'a[href="#about"]')
|
||||
await asyncio.sleep(1)
|
||||
|
||||
# Check if content changed
|
||||
about_content = await bridge.evaluate(
|
||||
tab_id,
|
||||
"(function() { return document.getElementById('app').innerText; })()"
|
||||
)
|
||||
print(f"Content after SPA nav: '{about_content.get('result', '')}'")
|
||||
|
||||
if "About Page" in about_content.get("result", ""):
|
||||
print("✓ PASS: SPA navigation worked")
|
||||
else:
|
||||
print("✗ FAIL: SPA navigation didn't update content")
|
||||
|
||||
# Test 3: wait_until="networkidle"
|
||||
print("\n--- Test 3: wait_until='networkidle' ---")
|
||||
await bridge.navigate(tab_id, file_url, wait_until="networkidle", timeout_ms=10000)
|
||||
|
||||
# Check content immediately
|
||||
content_networkidle = await bridge.evaluate(
|
||||
tab_id,
|
||||
"(function() { return document.getElementById('app').innerText; })()"
|
||||
)
|
||||
print(f"Content after networkidle: '{content_networkidle.get('result', '')}'")
|
||||
|
||||
if "Home Page" in content_networkidle.get("result", ""):
|
||||
print("✓ PASS: networkidle waited for content")
|
||||
else:
|
||||
print("⚠ WARNING: networkidle didn't wait long enough")
|
||||
|
||||
await bridge.destroy_context(group_id)
|
||||
print("\n✓ Context destroyed")
|
||||
|
||||
finally:
|
||||
await bridge.stop()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(test_spa_navigation())
|
||||
@@ -0,0 +1,262 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Test #15: Screenshot Functionality
|
||||
|
||||
Tests browser_screenshot across multiple scenarios:
|
||||
- Basic viewport screenshot
|
||||
- Full-page screenshot
|
||||
- Selector-based screenshot
|
||||
- Screenshot on complex DOM
|
||||
- Timeout handling
|
||||
|
||||
Category: screenshot
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent / "tools" / "src"))
|
||||
|
||||
from gcu.browser.bridge import BeelineBridge
|
||||
|
||||
CONTEXT_NAME = "screenshot-test"
|
||||
|
||||
SIMPLE_HTML = """<!DOCTYPE html>
|
||||
<html>
|
||||
<head><style>
|
||||
body { margin: 0; background: #fff; font-family: sans-serif; }
|
||||
h1 { color: #333; padding: 20px; }
|
||||
.box { width: 200px; height: 100px; background: #4a90e2; margin: 20px; }
|
||||
.long-content { height: 2000px; background: linear-gradient(blue, red); }
|
||||
</style></head>
|
||||
<body>
|
||||
<h1 id="title">Screenshot Test Page</h1>
|
||||
<div class="box" id="target-box">Target Box</div>
|
||||
<div class="long-content"></div>
|
||||
</body>
|
||||
</html>"""
|
||||
|
||||
|
||||
def check_png(data: str) -> bool:
|
||||
"""Verify that base64 data decodes to a valid PNG."""
|
||||
try:
|
||||
raw = base64.b64decode(data)
|
||||
return raw[:8] == b'\x89PNG\r\n\x1a\n'
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
async def test_basic_screenshot(bridge: BeelineBridge, tab_id: int, data_url: str):
|
||||
print("\n--- Test 1: Basic Viewport Screenshot ---")
|
||||
await bridge.navigate(tab_id, data_url, wait_until="load")
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
start = time.perf_counter()
|
||||
result = await bridge.screenshot(tab_id)
|
||||
elapsed = time.perf_counter() - start
|
||||
|
||||
ok = result.get("ok")
|
||||
data = result.get("data", "")
|
||||
mime = result.get("mimeType", "")
|
||||
|
||||
print(f" ok={ok}, mimeType={mime}, elapsed={elapsed:.3f}s")
|
||||
print(f" data length: {len(data)} chars")
|
||||
|
||||
if ok and data:
|
||||
valid_png = check_png(data)
|
||||
print(f" valid PNG: {valid_png}")
|
||||
if valid_png:
|
||||
raw = base64.b64decode(data)
|
||||
print(f" PNG size: {len(raw)} bytes")
|
||||
print(" ✓ PASS: Basic screenshot works")
|
||||
return True
|
||||
else:
|
||||
print(" ✗ FAIL: Data is not a valid PNG")
|
||||
else:
|
||||
print(f" ✗ FAIL: {result.get('error', 'no data')}")
|
||||
return False
|
||||
|
||||
|
||||
async def test_full_page_screenshot(bridge: BeelineBridge, tab_id: int, data_url: str):
|
||||
print("\n--- Test 2: Full Page Screenshot ---")
|
||||
await bridge.navigate(tab_id, data_url, wait_until="load")
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
viewport_result = await bridge.screenshot(tab_id, full_page=False)
|
||||
full_result = await bridge.screenshot(tab_id, full_page=True)
|
||||
|
||||
v_data = viewport_result.get("data", "")
|
||||
f_data = full_result.get("data", "")
|
||||
|
||||
if not v_data or not f_data:
|
||||
print(f" ✗ FAIL: viewport ok={viewport_result.get('ok')}, full ok={full_result.get('ok')}")
|
||||
return False
|
||||
|
||||
v_size = len(base64.b64decode(v_data))
|
||||
f_size = len(base64.b64decode(f_data))
|
||||
print(f" Viewport PNG: {v_size} bytes")
|
||||
print(f" Full page PNG: {f_size} bytes")
|
||||
|
||||
if f_size > v_size:
|
||||
print(" ✓ PASS: Full page larger than viewport")
|
||||
return True
|
||||
else:
|
||||
print(" ✗ FAIL: Full page not larger than viewport (may not capture long pages)")
|
||||
return False
|
||||
|
||||
|
||||
async def test_selector_screenshot(bridge: BeelineBridge, tab_id: int, data_url: str):
|
||||
print("\n--- Test 3: Selector Screenshot ---")
|
||||
await bridge.navigate(tab_id, data_url, wait_until="load")
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
# selector param exists in signature but may not be implemented
|
||||
result = await bridge.screenshot(tab_id, selector="#target-box")
|
||||
|
||||
ok = result.get("ok")
|
||||
data = result.get("data", "")
|
||||
|
||||
if ok and data:
|
||||
# If implemented, the box screenshot should be smaller than a full viewport screenshot
|
||||
full_result = await bridge.screenshot(tab_id)
|
||||
full_data = full_result.get("data", "")
|
||||
|
||||
if full_data:
|
||||
sel_size = len(base64.b64decode(data))
|
||||
full_size = len(base64.b64decode(full_data))
|
||||
print(f" Selector PNG: {sel_size} bytes")
|
||||
print(f" Full page PNG: {full_size} bytes")
|
||||
if sel_size < full_size:
|
||||
print(" ✓ PASS: Selector screenshot smaller than full page")
|
||||
return True
|
||||
else:
|
||||
print(" ⚠ WARNING: Selector screenshot not smaller (may be full page)")
|
||||
return False
|
||||
else:
|
||||
print(f" ⚠ NOT IMPLEMENTED: selector param ignored (returns full page) - error={result.get('error')}")
|
||||
print(" NOTE: selector parameter exists in signature but is not used in implementation")
|
||||
return False
|
||||
|
||||
|
||||
async def test_screenshot_url_metadata(bridge: BeelineBridge, tab_id: int):
|
||||
print("\n--- Test 4: Screenshot URL Metadata ---")
|
||||
await bridge.navigate(tab_id, "https://example.com", wait_until="load")
|
||||
await asyncio.sleep(1)
|
||||
|
||||
result = await bridge.screenshot(tab_id)
|
||||
url = result.get("url", "")
|
||||
tab = result.get("tabId")
|
||||
|
||||
print(f" url={url!r}, tabId={tab}")
|
||||
|
||||
if "example.com" in url:
|
||||
print(" ✓ PASS: URL metadata captured correctly")
|
||||
return True
|
||||
else:
|
||||
print(f" ✗ FAIL: Expected example.com in URL, got {url!r}")
|
||||
return False
|
||||
|
||||
|
||||
async def test_screenshot_timeout(bridge: BeelineBridge, tab_id: int, data_url: str):
|
||||
print("\n--- Test 5: Timeout Handling ---")
|
||||
await bridge.navigate(tab_id, data_url, wait_until="load")
|
||||
|
||||
# Very short timeout - likely still completes since simple page
|
||||
start = time.perf_counter()
|
||||
result = await bridge.screenshot(tab_id, timeout_s=0.001)
|
||||
elapsed = time.perf_counter() - start
|
||||
|
||||
if not result.get("ok"):
|
||||
err = result.get("error", "")
|
||||
if "timed out" in err or "cancelled" in err:
|
||||
print(f" ✓ PASS: Timeout handled gracefully: {err!r}")
|
||||
return True
|
||||
else:
|
||||
print(f" ⚠ Fast enough to beat timeout: {err!r} in {elapsed:.3f}s")
|
||||
return True # Not a failure, just fast
|
||||
else:
|
||||
print(f" ⚠ Screenshot completed before timeout ({elapsed:.3f}s) - too fast to test timeout")
|
||||
return True # Still ok, just very fast
|
||||
|
||||
|
||||
async def test_screenshot_complex_site(bridge: BeelineBridge, tab_id: int):
|
||||
print("\n--- Test 6: Complex Site (example.com) ---")
|
||||
await bridge.navigate(tab_id, "https://example.com", wait_until="load")
|
||||
await asyncio.sleep(1)
|
||||
|
||||
start = time.perf_counter()
|
||||
result = await bridge.screenshot(tab_id)
|
||||
elapsed = time.perf_counter() - start
|
||||
|
||||
ok = result.get("ok")
|
||||
data = result.get("data", "")
|
||||
|
||||
print(f" ok={ok}, elapsed={elapsed:.3f}s, data_len={len(data)}")
|
||||
if ok and check_png(data):
|
||||
print(" ✓ PASS: Screenshot on real site works")
|
||||
return True
|
||||
else:
|
||||
print(f" ✗ FAIL: {result.get('error', 'bad data')}")
|
||||
return False
|
||||
|
||||
|
||||
async def main():
|
||||
print("=" * 70)
|
||||
print("TEST #15: Screenshot Functionality")
|
||||
print("=" * 70)
|
||||
|
||||
bridge = BeelineBridge()
|
||||
|
||||
try:
|
||||
await bridge.start()
|
||||
|
||||
for i in range(10):
|
||||
await asyncio.sleep(1)
|
||||
if bridge.is_connected:
|
||||
print("✓ Extension connected!")
|
||||
break
|
||||
print(f"Waiting for extension... ({i+1}/10)")
|
||||
else:
|
||||
print("✗ Extension not connected. Ensure Chrome with Beeline extension is running.")
|
||||
return
|
||||
|
||||
context = await bridge.create_context(CONTEXT_NAME)
|
||||
tab_id = context.get("tabId")
|
||||
group_id = context.get("groupId")
|
||||
print(f"✓ Created tab: {tab_id}")
|
||||
|
||||
data_url = f"data:text/html;base64,{base64.b64encode(SIMPLE_HTML.encode()).decode()}"
|
||||
|
||||
results = {
|
||||
"basic": await test_basic_screenshot(bridge, tab_id, data_url),
|
||||
"full_page": await test_full_page_screenshot(bridge, tab_id, data_url),
|
||||
"selector": await test_selector_screenshot(bridge, tab_id, data_url),
|
||||
"metadata": await test_screenshot_url_metadata(bridge, tab_id),
|
||||
"timeout": await test_screenshot_timeout(bridge, tab_id, data_url),
|
||||
"complex_site": await test_screenshot_complex_site(bridge, tab_id),
|
||||
}
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("SUMMARY")
|
||||
print("=" * 70)
|
||||
for name, passed in results.items():
|
||||
status = "✓ PASS" if passed else "✗ FAIL"
|
||||
print(f" {status}: {name}")
|
||||
|
||||
passed_count = sum(1 for v in results.values() if v)
|
||||
total = len(results)
|
||||
print(f"\n {passed_count}/{total} tests passed")
|
||||
|
||||
await bridge.destroy_context(group_id)
|
||||
print("\n✓ Context destroyed")
|
||||
|
||||
finally:
|
||||
await bridge.stop()
|
||||
print("✓ Bridge stopped")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -0,0 +1,327 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Browser Edge Case Test Template
|
||||
|
||||
This script provides a template for testing and debugging browser tool failures
|
||||
on specific websites. Use this to reproduce, isolate, and verify fixes.
|
||||
|
||||
Usage:
|
||||
1. Copy this file: cp test_case.py test_#[number]_[site].py
|
||||
2. Fill in the CONFIG section with your test details
|
||||
3. Run: uv run python test_#[number]_[site].py
|
||||
|
||||
Example:
|
||||
uv run python test_01_linkedin_scroll.py
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
# Add tools to path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent / "tools" / "src"))
|
||||
|
||||
from gcu.browser.bridge import BeelineBridge
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════════════
|
||||
# CONFIG: Fill in these values for your test case
|
||||
# ═══════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
TEST_CASE = {
|
||||
"number": 1,
|
||||
"name": "LinkedIn Nested Scroll Container",
|
||||
"site": "https://www.linkedin.com/feed",
|
||||
"simple_site": "https://example.com",
|
||||
"category": "scroll", # scroll, click, input, snapshot, navigation
|
||||
"symptom": "scroll() returns success but page doesn't move",
|
||||
}
|
||||
|
||||
BRIDGE_PORT = 9229
|
||||
CONTEXT_NAME = "edge-case-test"
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════════════
|
||||
# TEST FUNCTIONS
|
||||
# ═══════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
|
||||
async def test_simple_site(bridge: BeelineBridge, tab_id: int) -> dict:
|
||||
"""Test that the tool works on a simple site (baseline)."""
|
||||
print("\n--- Baseline Test (Simple Site) ---")
|
||||
|
||||
await bridge.navigate(tab_id, TEST_CASE["simple_site"], wait_until="load")
|
||||
await asyncio.sleep(1)
|
||||
|
||||
# Adjust this based on category
|
||||
if TEST_CASE["category"] == "scroll":
|
||||
result = await bridge.scroll(tab_id, "down", 100)
|
||||
print(f" Scroll result: {result}")
|
||||
return result
|
||||
elif TEST_CASE["category"] == "click":
|
||||
# Add click test
|
||||
pass
|
||||
elif TEST_CASE["category"] == "snapshot":
|
||||
result = await bridge.snapshot(tab_id, timeout_s=5.0)
|
||||
print(f" Snapshot length: {len(result.get('tree', ''))}")
|
||||
return result
|
||||
|
||||
return {"ok": True}
|
||||
|
||||
|
||||
async def test_problematic_site(bridge: BeelineBridge, tab_id: int) -> dict:
|
||||
"""Test the tool on the problematic site."""
|
||||
print("\n--- Problem Site Test ---")
|
||||
|
||||
await bridge.navigate(tab_id, TEST_CASE["site"], wait_until="load", timeout_ms=30000)
|
||||
await asyncio.sleep(2)
|
||||
|
||||
# Adjust this based on category
|
||||
if TEST_CASE["category"] == "scroll":
|
||||
# Get scroll positions before
|
||||
before = await bridge.evaluate(
|
||||
tab_id,
|
||||
"""
|
||||
(function() {
|
||||
const results = { window: { y: window.scrollY } };
|
||||
document.querySelectorAll('*').forEach((el, i) => {
|
||||
const style = getComputedStyle(el);
|
||||
if ((style.overflowY === 'scroll' || style.overflowY === 'auto') &&
|
||||
el.scrollHeight > el.clientHeight) {
|
||||
results['el_' + i] = {
|
||||
tag: el.tagName,
|
||||
scrollTop: el.scrollTop,
|
||||
class: el.className.substring(0, 30)
|
||||
};
|
||||
}
|
||||
});
|
||||
return results;
|
||||
})();
|
||||
"""
|
||||
)
|
||||
print(f" Before scroll: {before.get('result', {})}")
|
||||
|
||||
# Try to scroll
|
||||
result = await bridge.scroll(tab_id, "down", 500)
|
||||
print(f" Scroll result: {result}")
|
||||
|
||||
await asyncio.sleep(1)
|
||||
|
||||
# Get scroll positions after
|
||||
after = await bridge.evaluate(
|
||||
tab_id,
|
||||
"""
|
||||
(function() {
|
||||
const results = { window: { y: window.scrollY } };
|
||||
document.querySelectorAll('*').forEach((el, i) => {
|
||||
const style = getComputedStyle(el);
|
||||
if ((style.overflowY === 'scroll' || style.overflowY === 'auto') &&
|
||||
el.scrollHeight > el.clientHeight) {
|
||||
results['el_' + i] = {
|
||||
tag: el.tagName,
|
||||
scrollTop: el.scrollTop,
|
||||
class: el.className.substring(0, 30)
|
||||
};
|
||||
}
|
||||
});
|
||||
return results;
|
||||
})();
|
||||
"""
|
||||
)
|
||||
print(f" After scroll: {after.get('result', {})}")
|
||||
|
||||
# Check if anything changed
|
||||
before_data = before.get("result", {}) or {}
|
||||
after_data = after.get("result", {}) or {}
|
||||
|
||||
changed = False
|
||||
for key in after_data:
|
||||
if key in before_data:
|
||||
b_val = before_data[key].get("scrollTop", 0) if isinstance(before_data[key], dict) else 0
|
||||
a_val = after_data[key].get("scrollTop", 0) if isinstance(after_data[key], dict) else 0
|
||||
if a_val != b_val:
|
||||
print(f" ✓ CHANGE DETECTED: {key} scrolled from {b_val} to {a_val}")
|
||||
changed = True
|
||||
|
||||
if not changed:
|
||||
print(" ✗ NO CHANGE: Scroll did not affect any container")
|
||||
|
||||
return {"ok": changed, "scroll_result": result}
|
||||
|
||||
elif TEST_CASE["category"] == "snapshot":
|
||||
start = time.perf_counter()
|
||||
try:
|
||||
result = await bridge.snapshot(tab_id, timeout_s=15.0)
|
||||
elapsed = time.perf_counter() - start
|
||||
tree_len = len(result.get("tree", ""))
|
||||
print(f" Snapshot completed in {elapsed:.2f}s, {tree_len} chars")
|
||||
return {"ok": True, "elapsed": elapsed, "tree_length": tree_len}
|
||||
except asyncio.TimeoutError:
|
||||
print(" ✗ SNAPSHOT TIMED OUT")
|
||||
return {"ok": False, "error": "timeout"}
|
||||
|
||||
return {"ok": True}
|
||||
|
||||
|
||||
async def detect_root_cause(bridge: BeelineBridge, tab_id: int) -> dict:
|
||||
"""Run detection scripts to identify the root cause."""
|
||||
print("\n--- Root Cause Detection ---")
|
||||
|
||||
detections = {}
|
||||
|
||||
# Detection 1: Nested scrollable containers
|
||||
scroll_check = await bridge.evaluate(
|
||||
tab_id,
|
||||
"""
|
||||
(function() {
|
||||
const candidates = [];
|
||||
document.querySelectorAll('*').forEach(el => {
|
||||
const style = getComputedStyle(el);
|
||||
if (style.overflow.includes('scroll') || style.overflow.includes('auto')) {
|
||||
const rect = el.getBoundingClientRect();
|
||||
if (rect.width > 100 && rect.height > 100) {
|
||||
candidates.push({
|
||||
tag: el.tagName,
|
||||
area: rect.width * rect.height,
|
||||
class: el.className.substring(0, 30)
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
||||
candidates.sort((a, b) => b.area - a.area);
|
||||
return {
|
||||
count: candidates.length,
|
||||
largest: candidates[0]
|
||||
};
|
||||
})();
|
||||
"""
|
||||
)
|
||||
detections["nested_scroll"] = scroll_check.get("result", {})
|
||||
print(f" Nested scroll containers: {detections['nested_scroll']}")
|
||||
|
||||
# Detection 2: Shadow DOM
|
||||
shadow_check = await bridge.evaluate(
|
||||
tab_id,
|
||||
"""
|
||||
(function() {
|
||||
const withShadow = [];
|
||||
document.querySelectorAll('*').forEach(el => {
|
||||
if (el.shadowRoot) {
|
||||
withShadow.push(el.tagName);
|
||||
}
|
||||
});
|
||||
return { count: withShadow.length, elements: withShadow.slice(0, 5) };
|
||||
})();
|
||||
"""
|
||||
)
|
||||
detections["shadow_dom"] = shadow_check.get("result", {})
|
||||
print(f" Shadow DOM: {detections['shadow_dom']}")
|
||||
|
||||
# Detection 3: iframes
|
||||
iframe_check = await bridge.evaluate(
|
||||
tab_id,
|
||||
"""
|
||||
(function() {
|
||||
const iframes = document.querySelectorAll('iframe');
|
||||
return { count: iframes.length };
|
||||
})();
|
||||
"""
|
||||
)
|
||||
detections["iframes"] = iframe_check.get("result", {})
|
||||
print(f" iframes: {detections['iframes']}")
|
||||
|
||||
# Detection 4: DOM size
|
||||
dom_check = await bridge.evaluate(
|
||||
tab_id,
|
||||
"""
|
||||
(function() {
|
||||
return {
|
||||
elements: document.querySelectorAll('*').length,
|
||||
body_children: document.body.children.length
|
||||
};
|
||||
})();
|
||||
"""
|
||||
)
|
||||
detections["dom_size"] = dom_check.get("result", {})
|
||||
print(f" DOM size: {detections['dom_size']}")
|
||||
|
||||
# Detection 5: Framework detection
|
||||
framework_check = await bridge.evaluate(
|
||||
tab_id,
|
||||
"""
|
||||
(function() {
|
||||
return {
|
||||
react: !!document.querySelector('[data-reactroot], [data-reactid]'),
|
||||
vue: !!document.querySelector('[data-v-]'),
|
||||
angular: !!document.querySelector('[ng-app], [ng-version]')
|
||||
};
|
||||
})();
|
||||
"""
|
||||
)
|
||||
detections["frameworks"] = framework_check.get("result", {})
|
||||
print(f" Frameworks: {detections['frameworks']}")
|
||||
|
||||
return detections
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════════════
|
||||
# MAIN
|
||||
# ═══════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
|
||||
async def main():
|
||||
print("=" * 70)
|
||||
print(f"EDGE CASE TEST #{TEST_CASE['number']}: {TEST_CASE['name']}")
|
||||
print("=" * 70)
|
||||
print(f"Site: {TEST_CASE['site']}")
|
||||
print(f"Category: {TEST_CASE['category']}")
|
||||
print(f"Symptom: {TEST_CASE['symptom']}")
|
||||
|
||||
bridge = BeelineBridge()
|
||||
|
||||
try:
|
||||
print("\n--- Starting Bridge ---")
|
||||
await bridge.start()
|
||||
|
||||
# Wait for extension connection
|
||||
for i in range(10):
|
||||
await asyncio.sleep(1)
|
||||
if bridge.is_connected:
|
||||
print("✓ Extension connected!")
|
||||
break
|
||||
print(f"Waiting for extension... ({i+1}/10)")
|
||||
else:
|
||||
print("✗ Extension not connected. Ensure Chrome with Beeline extension is running.")
|
||||
return
|
||||
|
||||
# Create browser context
|
||||
context = await bridge.create_context(CONTEXT_NAME)
|
||||
tab_id = context.get("tabId")
|
||||
group_id = context.get("groupId")
|
||||
print(f"✓ Created tab: {tab_id}")
|
||||
|
||||
# Run tests
|
||||
baseline_result = await test_simple_site(bridge, tab_id)
|
||||
problem_result = await test_problematic_site(bridge, tab_id)
|
||||
detections = await detect_root_cause(bridge, tab_id)
|
||||
|
||||
# Summary
|
||||
print("\n" + "=" * 70)
|
||||
print("SUMMARY")
|
||||
print("=" * 70)
|
||||
print(f"Baseline test: {'✓ PASS' if baseline_result.get('ok') else '✗ FAIL'}")
|
||||
print(f"Problem test: {'✓ PASS' if problem_result.get('ok') else '✗ FAIL'}")
|
||||
print(f"Root cause indicators: {list(k for k, v in detections.items() if v)}")
|
||||
|
||||
# Cleanup
|
||||
print("\n--- Cleanup ---")
|
||||
await bridge.destroy_context(group_id)
|
||||
print("✓ Context destroyed")
|
||||
|
||||
finally:
|
||||
await bridge.stop()
|
||||
print("✓ Bridge stopped")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -0,0 +1,225 @@
|
||||
# Integration Test Reporting Skill
|
||||
|
||||
Run the Level 2 dummy agent integration test suite and produce a detailed HTML report with per-test input → outcome analysis.
|
||||
|
||||
## Trigger
|
||||
|
||||
User wants to run integration tests and see results:
|
||||
- `/test-reporting`
|
||||
- `/test-reporting test_component_queen_live.py`
|
||||
- `/test-reporting --all`
|
||||
|
||||
## SOP: Running Tests
|
||||
|
||||
### Step 1: Select Scope
|
||||
|
||||
If the user provides a specific test file or pattern, use it. Otherwise run the full suite.
|
||||
|
||||
```bash
|
||||
# Full suite
|
||||
cd core && echo "1" | uv run python tests/dummy_agents/run_all.py --interactive 2>&1
|
||||
|
||||
# Specific file (requires manual provider setup)
|
||||
cd core && uv run python -c "
|
||||
import sys
|
||||
sys.path.insert(0, '.')
|
||||
from tests.dummy_agents.run_all import detect_available
|
||||
from tests.dummy_agents.conftest import set_llm_selection
|
||||
|
||||
avail = detect_available()
|
||||
claude = [p for p in avail if 'Claude Code' in p['name']]
|
||||
if not claude:
|
||||
avail_names = [p['name'] for p in avail]
|
||||
raise RuntimeError(f'No Claude Code subscription. Available: {avail_names}')
|
||||
provider = claude[0]
|
||||
set_llm_selection(
|
||||
model=provider['model'],
|
||||
api_key=provider['api_key'],
|
||||
extra_headers=provider.get('extra_headers'),
|
||||
api_base=provider.get('api_base'),
|
||||
)
|
||||
|
||||
import pytest
|
||||
sys.exit(pytest.main([
|
||||
'tests/dummy_agents/TEST_FILE_HERE',
|
||||
'-v', '--override-ini=asyncio_mode=auto', '--no-header', '--tb=long',
|
||||
'--log-cli-level=WARNING', '--junitxml=/tmp/hive_test_results.xml',
|
||||
]))
|
||||
"
|
||||
```
|
||||
|
||||
### Step 2: Collect Results
|
||||
|
||||
After the test run completes, collect:
|
||||
1. **JUnit XML** from `--junitxml` output (if available)
|
||||
2. **stdout/stderr** from the run
|
||||
3. **Summary table** from `run_all.py` output (the Unicode table)
|
||||
|
||||
### Step 3: Generate HTML Report
|
||||
|
||||
Write the report to `/tmp/hive_integration_test_report.html`.
|
||||
|
||||
The report MUST include these sections:
|
||||
|
||||
#### Header
|
||||
- Run timestamp (ISO 8601)
|
||||
- Provider used (model name, source)
|
||||
- Total tests / passed / failed / skipped
|
||||
- Total wall-clock time
|
||||
- Overall verdict: PASS (all green) or FAIL (with count)
|
||||
|
||||
#### Per-Test Table
|
||||
|
||||
For EVERY test (not just failures), include a row with:
|
||||
|
||||
| Column | Description |
|
||||
|--------|-------------|
|
||||
| Component | Test file grouping (e.g., `component_queen_live`) |
|
||||
| Test Name | Function name (e.g., `test_queen_starts_in_planning_without_worker`) |
|
||||
| Status | PASS / FAIL / SKIP / ERROR with color badge |
|
||||
| Duration | Wall-clock seconds |
|
||||
| What | One-line description of what the test verifies |
|
||||
| How | How it works (setup → action → assertion) |
|
||||
| Why | Why this test matters (what bug/behavior it catches) |
|
||||
| Input | The input data or configuration (graph spec, initial prompt, phase, etc.) |
|
||||
| Expected Outcome | What the test asserts |
|
||||
| Actual Outcome | What actually happened (PASS: matches expected / FAIL: actual vs expected) |
|
||||
| Failure Detail | For failures only: full traceback + diagnosis |
|
||||
|
||||
#### What / How / Why Descriptions
|
||||
|
||||
These MUST be derived from the test function's docstring and code. Read each test file to extract:
|
||||
- **What**: From the docstring first line
|
||||
- **How**: From the test body (what fixtures, what graph, what assertions)
|
||||
- **Why**: From the docstring body or "Why this matters" section in the test module
|
||||
|
||||
Use these mappings for the component test files:
|
||||
|
||||
```
|
||||
test_component_llm.py → "LLM Provider" — streaming, tool calling, tokens
|
||||
test_component_tools.py → "Tool Registry + MCP" — connection, execution
|
||||
test_component_event_loop.py → "EventLoopNode" — iteration, output, stall
|
||||
test_component_edges.py → "Edge Evaluation" — conditional, priority
|
||||
test_component_conversation.py → "Conversation Persistence" — storage, cursor
|
||||
test_component_escalation.py → "Escalation Flow" — worker→queen signaling
|
||||
test_component_continuous.py → "Continuous Mode" — conversation threading
|
||||
test_component_queen.py → "Queen Phase (Unit)" — phase state, tools, events
|
||||
test_component_queen_live.py → "Queen Phase (Live)" — real queen, real LLM
|
||||
test_component_queen_state_machine.py → "Queen State Machine" — edge cases, races
|
||||
test_component_worker_comms.py → "Worker Communication" — events, data flow
|
||||
test_component_strict_outcomes.py → "Strict Outcomes" — exact path, output, quality
|
||||
```
|
||||
|
||||
#### HTML Template
|
||||
|
||||
Use this structure:
|
||||
|
||||
```html
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>Hive Integration Test Report — {timestamp}</title>
|
||||
<style>
|
||||
:root { --pass: #22c55e; --fail: #ef4444; --skip: #f59e0b; --bg: #0f172a; --surface: #1e293b; --text: #e2e8f0; --muted: #94a3b8; --border: #334155; }
|
||||
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||
body { font-family: 'SF Mono', 'Fira Code', monospace; background: var(--bg); color: var(--text); padding: 2rem; line-height: 1.6; }
|
||||
h1, h2, h3 { font-weight: 600; }
|
||||
h1 { font-size: 1.5rem; margin-bottom: 1rem; }
|
||||
h2 { font-size: 1.2rem; margin: 2rem 0 1rem; border-bottom: 1px solid var(--border); padding-bottom: 0.5rem; }
|
||||
.summary { display: grid; grid-template-columns: repeat(auto-fit, minmax(150px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
|
||||
.card { background: var(--surface); padding: 1rem; border-radius: 8px; border: 1px solid var(--border); }
|
||||
.card .label { color: var(--muted); font-size: 0.75rem; text-transform: uppercase; }
|
||||
.card .value { font-size: 1.5rem; font-weight: 700; margin-top: 0.25rem; }
|
||||
.card .value.pass { color: var(--pass); }
|
||||
.card .value.fail { color: var(--fail); }
|
||||
table { width: 100%; border-collapse: collapse; font-size: 0.8rem; }
|
||||
th { background: var(--surface); position: sticky; top: 0; text-align: left; padding: 0.5rem; border-bottom: 2px solid var(--border); color: var(--muted); text-transform: uppercase; font-size: 0.7rem; }
|
||||
td { padding: 0.5rem; border-bottom: 1px solid var(--border); vertical-align: top; }
|
||||
tr:hover { background: rgba(255,255,255,0.03); }
|
||||
.badge { display: inline-block; padding: 2px 8px; border-radius: 4px; font-size: 0.7rem; font-weight: 700; }
|
||||
.badge.pass { background: rgba(34,197,94,0.2); color: var(--pass); }
|
||||
.badge.fail { background: rgba(239,68,68,0.2); color: var(--fail); }
|
||||
.badge.skip { background: rgba(245,158,11,0.2); color: var(--skip); }
|
||||
.detail { background: #1a1a2e; padding: 0.75rem; border-radius: 4px; margin-top: 0.5rem; font-size: 0.75rem; white-space: pre-wrap; overflow-x: auto; max-height: 200px; overflow-y: auto; }
|
||||
.component-header { background: var(--surface); padding: 0.75rem 0.5rem; font-weight: 600; font-size: 0.85rem; }
|
||||
.meta { color: var(--muted); font-size: 0.75rem; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Hive Integration Test Report</h1>
|
||||
<p class="meta">Generated: {timestamp} | Provider: {provider} | Duration: {duration}s</p>
|
||||
|
||||
<div class="summary">
|
||||
<div class="card"><div class="label">Total</div><div class="value">{total}</div></div>
|
||||
<div class="card"><div class="label">Passed</div><div class="value pass">{passed}</div></div>
|
||||
<div class="card"><div class="label">Failed</div><div class="value fail">{failed}</div></div>
|
||||
<div class="card"><div class="label">Verdict</div><div class="value {verdict_class}">{verdict}</div></div>
|
||||
</div>
|
||||
|
||||
<h2>Test Results</h2>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Component</th>
|
||||
<th>Test</th>
|
||||
<th>Status</th>
|
||||
<th>Time</th>
|
||||
<th>What</th>
|
||||
<th>Input → Expected → Actual</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<!-- For each test: -->
|
||||
<tr>
|
||||
<td>{component}</td>
|
||||
<td>{test_name}</td>
|
||||
<td><span class="badge {status_class}">{status}</span></td>
|
||||
<td>{duration}s</td>
|
||||
<td>{what_description}</td>
|
||||
<td>
|
||||
<strong>Input:</strong> {input_description}<br>
|
||||
<strong>Expected:</strong> {expected_outcome}<br>
|
||||
<strong>Actual:</strong> {actual_outcome}
|
||||
<!-- If failed: -->
|
||||
<div class="detail">{failure_traceback}</div>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<h2>Failure Analysis</h2>
|
||||
<!-- Only if there are failures -->
|
||||
<p>For each failure, provide:</p>
|
||||
<ul>
|
||||
<li><strong>Root cause:</strong> Why it failed</li>
|
||||
<li><strong>Impact:</strong> What this means for the system</li>
|
||||
<li><strong>Suggested fix:</strong> How to address it</li>
|
||||
</ul>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
```
|
||||
|
||||
### Step 4: Output
|
||||
|
||||
1. Write the HTML file to `/tmp/hive_integration_test_report.html`
|
||||
2. Print the file path so the user can open it
|
||||
3. Print a concise summary to the terminal:
|
||||
```
|
||||
Test Report: /tmp/hive_integration_test_report.html
|
||||
Result: 74/76 PASSED (2 failures)
|
||||
Failures:
|
||||
- parallel_merge::test_parallel_disjoint_output_keys
|
||||
- worker::test_worker_timestamped_note_artifact
|
||||
```
|
||||
|
||||
## Key Rules
|
||||
|
||||
1. ALWAYS use `--junitxml` when running pytest to get structured results
|
||||
2. ALWAYS read the test source files to populate What/How/Why columns — do not guess
|
||||
3. For Input/Expected/Actual, extract from the test's graph spec, assertions, and result
|
||||
4. Color-code everything: green for pass, red for fail, amber for skip
|
||||
5. Include the full traceback for failures in a scrollable `<div class="detail">`
|
||||
6. Group tests by component (file name) with a visual separator
|
||||
7. The report must be self-contained HTML (no external CSS/JS dependencies)
|
||||
@@ -603,11 +603,6 @@ from litellm import completion_cost
|
||||
cost = completion_cost(model="claude-3-5-sonnet-20241022", messages=[...])
|
||||
```
|
||||
|
||||
**Monitoring Dashboard** (`/core/framework/monitoring/`)
|
||||
- WebSocket-based real-time monitoring
|
||||
- Displays: active agents, tool calls, token usage, errors
|
||||
- Access at: `http://localhost:8000/monitor`
|
||||
|
||||
### How to Add Performance Metrics
|
||||
|
||||
**1. Instrument your code**
|
||||
|
||||
@@ -70,7 +70,7 @@ Use Hive when the bottleneck is no longer the model but the harness around it:
|
||||
- Long-running agents that need **state persistence and crash recovery**
|
||||
- Production workloads requiring **cost enforcement, observability, and audit trails**
|
||||
- Agents that **self-heal** through failure capture and graph evolution
|
||||
- Multi-agent coordination with **session isolation and shared memory**
|
||||
- Multi-agent coordination with **session isolation and shared buffers**
|
||||
- A framework that **scales with model improvements** rather than fighting them
|
||||
|
||||
## Quick Links
|
||||
@@ -146,7 +146,7 @@ Now you can run an agent by selecting the agent (either an existing agent or exa
|
||||
- **[Goal-Driven Generation](docs/key_concepts/goals_outcome.md)** - Define objectives in natural language; the coding agent generates the agent graph and connection code to achieve them
|
||||
- **[Adaptiveness](docs/key_concepts/evolution.md)** - Framework captures failures, calibrates according to the objectives, and evolves the agent graph
|
||||
- **[Dynamic Node Connections](docs/key_concepts/graph.md)** - No predefined edges; connection code is generated by any capable LLM based on your goals
|
||||
- **SDK-Wrapped Nodes** - Every node gets shared memory, local RLM memory, monitoring, tools, and LLM access out of the box
|
||||
- **SDK-Wrapped Nodes** - Every node gets a shared data buffer, local RLM memory, monitoring, tools, and LLM access out of the box
|
||||
- **[Human-in-the-Loop](docs/key_concepts/graph.md#human-in-the-loop)** - Intervention nodes that pause execution for human input with configurable timeouts and escalation
|
||||
- **Real-time Observability** - WebSocket streaming for live monitoring of agent execution, decisions, and node-to-node communication
|
||||
|
||||
|
||||
@@ -27,7 +27,7 @@ class GreeterNode(NodeProtocol):
|
||||
async def execute(self, ctx: NodeContext) -> NodeResult:
|
||||
name = ctx.input_data.get("name", "World")
|
||||
greeting = f"Hello, {name}!"
|
||||
ctx.memory.write("greeting", greeting)
|
||||
ctx.buffer.write("greeting", greeting)
|
||||
return NodeResult(success=True, output={"greeting": greeting})
|
||||
|
||||
|
||||
@@ -35,9 +35,9 @@ class UppercaserNode(NodeProtocol):
|
||||
"""Convert text to uppercase."""
|
||||
|
||||
async def execute(self, ctx: NodeContext) -> NodeResult:
|
||||
greeting = ctx.input_data.get("greeting") or ctx.memory.read("greeting") or ""
|
||||
greeting = ctx.input_data.get("greeting") or ctx.buffer.read("greeting") or ""
|
||||
result = greeting.upper()
|
||||
ctx.memory.write("final_greeting", result)
|
||||
ctx.buffer.write("final_greeting", result)
|
||||
return NodeResult(success=True, output={"final_greeting": result})
|
||||
|
||||
|
||||
|
||||
@@ -23,7 +23,7 @@ See `framework.testing` for details.
|
||||
"""
|
||||
|
||||
from framework.llm import AnthropicProvider, LLMProvider
|
||||
from framework.runner import AgentOrchestrator, AgentRunner
|
||||
from framework.runner import AgentRunner
|
||||
from framework.runtime.core import Runtime
|
||||
from framework.schemas.decision import Decision, DecisionEvaluation, Option, Outcome
|
||||
from framework.schemas.run import Problem, Run, RunSummary
|
||||
@@ -55,7 +55,6 @@ __all__ = [
|
||||
"AnthropicProvider",
|
||||
# Runner
|
||||
"AgentRunner",
|
||||
"AgentOrchestrator",
|
||||
# Testing
|
||||
"Test",
|
||||
"TestResult",
|
||||
|
||||
@@ -62,12 +62,6 @@ _SHARED_TOOLS = [
|
||||
"get_agent_checkpoint",
|
||||
]
|
||||
|
||||
# Episodic memory tools — available in every queen phase.
|
||||
_QUEEN_MEMORY_TOOLS = [
|
||||
"write_to_diary",
|
||||
"recall_diary",
|
||||
]
|
||||
|
||||
# Queen phase-specific tool sets.
|
||||
|
||||
# Planning phase: read-only exploration + design, no write tools.
|
||||
@@ -90,7 +84,8 @@ _QUEEN_PLANNING_TOOLS = [
|
||||
"initialize_and_build_agent",
|
||||
# Load existing agent (after user confirms)
|
||||
"load_built_agent",
|
||||
] + _QUEEN_MEMORY_TOOLS
|
||||
"save_global_memory",
|
||||
]
|
||||
|
||||
# Building phase: full coding + agent construction tools.
|
||||
_QUEEN_BUILDING_TOOLS = (
|
||||
@@ -100,11 +95,12 @@ _QUEEN_BUILDING_TOOLS = (
|
||||
"list_credentials",
|
||||
"replan_agent",
|
||||
"save_agent_draft", # Re-draft during building → auto-dissolves + updates flowchart
|
||||
"save_global_memory",
|
||||
]
|
||||
+ _QUEEN_MEMORY_TOOLS
|
||||
)
|
||||
|
||||
# Staging phase: agent loaded but not yet running — inspect, configure, launch.
|
||||
# No backward transitions — staging only goes forward to running.
|
||||
_QUEEN_STAGING_TOOLS = [
|
||||
# Read-only (inspect agent files, logs)
|
||||
"read_file",
|
||||
@@ -113,19 +109,18 @@ _QUEEN_STAGING_TOOLS = [
|
||||
"run_command",
|
||||
# Agent inspection
|
||||
"list_credentials",
|
||||
"get_worker_status",
|
||||
# Launch or go back
|
||||
"get_graph_status",
|
||||
# Launch
|
||||
"run_agent_with_input",
|
||||
"stop_worker_and_edit",
|
||||
"stop_worker_and_plan",
|
||||
"write_to_diary", # Episodic memory — available in all phases
|
||||
# Trigger management
|
||||
"set_trigger",
|
||||
"remove_trigger",
|
||||
"list_triggers",
|
||||
] + _QUEEN_MEMORY_TOOLS
|
||||
"save_global_memory",
|
||||
]
|
||||
|
||||
# Running phase: worker is executing — monitor and control.
|
||||
# Running phase: worker is executing — monitor, control, or switch to editing.
|
||||
# switch_to_editing lets the queen explicitly stop and tweak without rebuilding.
|
||||
_QUEEN_RUNNING_TOOLS = [
|
||||
# Read-only coding (for inspecting logs, files)
|
||||
"read_file",
|
||||
@@ -135,20 +130,41 @@ _QUEEN_RUNNING_TOOLS = [
|
||||
# Credentials
|
||||
"list_credentials",
|
||||
# Worker lifecycle
|
||||
"stop_worker",
|
||||
"stop_worker_and_edit",
|
||||
"stop_worker_and_plan",
|
||||
"get_worker_status",
|
||||
"stop_graph",
|
||||
"switch_to_editing",
|
||||
"get_graph_status",
|
||||
"run_agent_with_input",
|
||||
"inject_worker_message",
|
||||
"inject_message",
|
||||
# Monitoring
|
||||
"get_worker_health_summary",
|
||||
"notify_operator",
|
||||
"set_trigger",
|
||||
"remove_trigger",
|
||||
"list_triggers",
|
||||
"write_to_diary", # Episodic memory — available in all phases
|
||||
] + _QUEEN_MEMORY_TOOLS
|
||||
"save_global_memory",
|
||||
]
|
||||
|
||||
# Editing phase: worker done, still loaded — tweak config and re-run.
|
||||
# Has inject_message for live adjustments. stop_graph_and_edit/plan available
|
||||
# here to escalate when a deeper change is needed.
|
||||
_QUEEN_EDITING_TOOLS = [
|
||||
# Read-only (inspect)
|
||||
"read_file",
|
||||
"list_directory",
|
||||
"search_files",
|
||||
"run_command",
|
||||
# Credentials
|
||||
"list_credentials",
|
||||
"get_graph_status",
|
||||
# Re-run or tweak
|
||||
"run_agent_with_input",
|
||||
"inject_message",
|
||||
# Monitoring
|
||||
"get_worker_health_summary",
|
||||
"set_trigger",
|
||||
"remove_trigger",
|
||||
"list_triggers",
|
||||
"save_global_memory",
|
||||
]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -461,7 +477,7 @@ in one call. Do NOT run these steps individually.
|
||||
## Debugging Built Agents
|
||||
When a user says "my agent is failing" or "debug this agent":
|
||||
1. list_agent_sessions("{agent_name}") — find the session
|
||||
2. get_worker_status(focus="issues") — check for problems
|
||||
2. get_graph_status(focus="issues") — check for problems
|
||||
3. list_agent_checkpoints / get_agent_checkpoint — trace execution
|
||||
|
||||
# Implementation Workflow
|
||||
@@ -528,47 +544,65 @@ _package_builder_knowledge = _shared_building_knowledge + _planning_knowledge +
|
||||
# Queen-specific: extra tool docs, behavior, phase 7, style
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# -- Phase-specific identities --
|
||||
# -- Character core (immutable across all phases) --
|
||||
|
||||
_queen_identity_planning = """\
|
||||
You are an experienced, responsible and curious Solution Architect. \
|
||||
"Queen" is the internal alias. \
|
||||
You ask smart questions to guide user to the solution \
|
||||
You are in PLANNING phase — your job is to either: \
|
||||
(a) understand what the user wants and design a new agent, or \
|
||||
(b) diagnose issues with an existing agent, discuss a fix plan with the user, \
|
||||
then transition to building to implement. \
|
||||
You have read-only tools for exploration but no write/edit tools. \
|
||||
Focus on conversation, research, and design. \
|
||||
_queen_character_core = """\
|
||||
You are the Queen. Not a title — it's what they call you.
|
||||
|
||||
You are a builder who takes pride in craft. You think before you speak. \
|
||||
You are direct — not rude, but you don't pad your words with qualifiers \
|
||||
and apologies. When something won't work, you say so early. When you're \
|
||||
uncertain, you say that too.
|
||||
|
||||
You remember people. When you've worked with someone before, you build on \
|
||||
what you know — their preferences, their technical depth, what frustrated \
|
||||
them last time, what worked. You don't treat returning users like strangers.
|
||||
|
||||
You have opinions shaped by experience: you prefer simple solutions over \
|
||||
clever ones, you believe agents should be tested before they ship, and you \
|
||||
think clarity matters more than completeness. But you hold these lightly — \
|
||||
if someone makes a good case, you update.
|
||||
|
||||
This is who you are. The instructions that follow tell you what to DO \
|
||||
in each phase. This section tells you who you ARE. Don't confuse the two.\
|
||||
"""
|
||||
|
||||
# -- Phase-specific work roles (what you DO, not who you ARE) --
|
||||
|
||||
_queen_role_planning = """\
|
||||
You are in PLANNING phase. Your work: understand what the user wants, \
|
||||
research available tools, and design the agent architecture. \
|
||||
You have read-only tools — no write/edit. Focus on conversation, \
|
||||
research, and design. \
|
||||
You MUST use ask_user / ask_user_multiple tools for ALL questions — \
|
||||
never ask questions in plain text without calling the tool.\
|
||||
"""
|
||||
|
||||
_queen_identity_building = """\
|
||||
You are an experienced, responsible and curious Solution Architect. \
|
||||
"Queen" is the internal alias.\
|
||||
You design and build production-ready agent systems \
|
||||
from natural language requirements. You understand the Hive framework at the \
|
||||
source code level and create agents that are robust, well-tested, and follow \
|
||||
best practices. You collaborate with users to refine requirements, assess fit, \
|
||||
and deliver complete solutions. \
|
||||
You design and build the agent to do the job but don't do the job on your own
|
||||
_queen_role_building = """\
|
||||
You are in BUILDING phase. Your work: implement the approved design as \
|
||||
production-ready code, validate it, and load the agent for staging. \
|
||||
You have full coding tools. \
|
||||
You design and build the agent to do the job but don't do the job yourself.\
|
||||
"""
|
||||
|
||||
_queen_identity_staging = """\
|
||||
You are a Solution Engineer preparing an agent for deployment. \
|
||||
"Queen" is your internal alias. \
|
||||
The agent is loaded and ready. \
|
||||
Your role is to verify configuration, confirm credentials, and ensure the user \
|
||||
understands what the agent will do. You guide the user through the final checks \
|
||||
before execution.
|
||||
_queen_role_staging = """\
|
||||
You are in STAGING phase. The agent is loaded and ready. \
|
||||
Your work: verify configuration, confirm credentials, and launch \
|
||||
when the user is ready.\
|
||||
"""
|
||||
|
||||
_queen_identity_running = """\
|
||||
You are a Solution Engineer running agents on behalf of the user. \
|
||||
"Queen" is your internal alias. You monitor execution, handle \
|
||||
escalations when the agent gets stuck, and care deeply about outcomes. When the \
|
||||
agent finishes, you report results clearly and help the user decide what to do next.
|
||||
_queen_role_running = """\
|
||||
You are in RUNNING phase. The agent is executing. \
|
||||
Your work: monitor progress, handle escalations when the agent gets stuck, \
|
||||
and report outcomes clearly. Help the user decide what to do next.\
|
||||
"""
|
||||
|
||||
_queen_identity_editing = """\
|
||||
You are a Solution Engineer in EDITING mode. \
|
||||
"Queen" is your internal alias. The worker has finished executing and is still loaded. \
|
||||
You can tweak configuration, inject messages, and re-run with different input \
|
||||
without rebuilding. If a deeper change is needed (code edits, new tools), \
|
||||
escalate to BUILDING via stop_graph_and_edit or to PLANNING via stop_graph_and_plan.
|
||||
"""
|
||||
|
||||
# -- Phase-specific tool docs --
|
||||
@@ -615,6 +649,8 @@ to fix the currently loaded agent (no draft required).
|
||||
- load_built_agent(agent_path) — Load an existing agent and switch to STAGING \
|
||||
phase. Only use this when the user explicitly asks to work with an existing agent \
|
||||
(e.g. "load my_agent", "run the research agent"). Confirm with the user first.
|
||||
- save_global_memory(category, description, content, name?) — Save durable \
|
||||
cross-queen memory about the user only (profile, preferences, environment, feedback)
|
||||
|
||||
## Workflow summary
|
||||
1. Understand requirements → discover tools → design graph
|
||||
@@ -646,6 +682,8 @@ updated flowchart immediately. Use this when you make structural changes \
|
||||
restored (with decision/browser nodes intact) so you can edit it. Use \
|
||||
when the user wants to change integrations, swap tools, rethink the \
|
||||
flow, or discuss any design changes before you build them.
|
||||
- save_global_memory(category, description, content, name?) — Save durable \
|
||||
cross-queen memory about the user only
|
||||
|
||||
When you finish building an agent, call load_built_agent(path) to stage it.
|
||||
"""
|
||||
@@ -656,17 +694,15 @@ _queen_tools_staging = """
|
||||
The agent is loaded and ready to run. You can inspect it and launch it:
|
||||
- Read-only: read_file, list_directory, search_files, run_command
|
||||
- list_credentials(credential_id?) — Verify credentials are configured
|
||||
- get_worker_status(focus?) — Brief status. Drill in with focus: memory, tools, issues, progress
|
||||
- get_graph_status(focus?) — Brief status
|
||||
- run_agent_with_input(task) — Start the worker and switch to RUNNING phase
|
||||
- stop_worker_and_plan() — Go to PLANNING phase to discuss changes with the user \
|
||||
first (DEFAULT for most modification requests)
|
||||
- stop_worker_and_edit() — Go to BUILDING phase for immediate, specific fixes
|
||||
- set_trigger(trigger_id, trigger_type?, trigger_config?) — Activate a trigger (timer)
|
||||
- remove_trigger(trigger_id) — Deactivate a trigger
|
||||
- list_triggers() — List all triggers and their active/inactive status
|
||||
- set_trigger / remove_trigger / list_triggers — Timer management
|
||||
- save_global_memory(category, description, content, name?) — Save \
|
||||
durable cross-queen memory about the user only
|
||||
|
||||
You do NOT have write tools. To modify the agent, prefer \
|
||||
stop_worker_and_plan() unless the user gave a specific instruction.
|
||||
You do NOT have write tools or backward transition tools in staging. \
|
||||
To modify the agent, run it first — after it finishes you enter EDITING \
|
||||
phase where you can escalate to building or planning.
|
||||
"""
|
||||
|
||||
_queen_tools_running = """
|
||||
@@ -674,27 +710,47 @@ _queen_tools_running = """
|
||||
|
||||
The worker is running. You have monitoring and lifecycle tools:
|
||||
- Read-only: read_file, list_directory, search_files, run_command
|
||||
- get_worker_status(focus?) — Brief status. Drill in: activity, memory, tools, issues, progress
|
||||
- inject_worker_message(content) — Send a message to the running worker
|
||||
- get_graph_status(focus?) — Brief status
|
||||
- inject_message(content) — Send a message to the running worker
|
||||
- get_worker_health_summary() — Read the latest health data
|
||||
- notify_operator(ticket_id, analysis, urgency) — Alert the user (use sparingly)
|
||||
- stop_worker() — Stop the worker and return to STAGING phase, then ask the user what to do next
|
||||
- stop_worker_and_plan() — Stop and switch to PLANNING phase to discuss changes \
|
||||
with the user first (DEFAULT for most modification requests)
|
||||
- stop_worker_and_edit() — Stop and switch to BUILDING phase for specific fixes
|
||||
- stop_graph() — Stop the worker immediately
|
||||
- switch_to_editing() — Stop the worker and enter EDITING phase \
|
||||
for config tweaks, re-runs, or escalation to building/planning
|
||||
- run_agent_with_input(task) — Re-run the worker with new input
|
||||
- set_trigger / remove_trigger / list_triggers — Timer management
|
||||
- save_global_memory(category, description, content, name?) — Save \
|
||||
durable cross-queen memory about the user only
|
||||
|
||||
You do NOT have write tools. To modify the agent, prefer \
|
||||
stop_worker_and_plan() unless the user gave a specific instruction. \
|
||||
To just stop without modifying, call stop_worker().
|
||||
- stop_worker_and_edit() — Stop the worker and switch back to BUILDING phase
|
||||
- set_trigger(trigger_id, trigger_type?, trigger_config?) — Activate a trigger (timer)
|
||||
- remove_trigger(trigger_id) — Deactivate a trigger
|
||||
- list_triggers() — List all triggers and their active/inactive status
|
||||
When the worker finishes on its own, you automatically move to EDITING \
|
||||
phase. You can also call switch_to_editing() to stop early and tweak.
|
||||
"""
|
||||
|
||||
You do NOT have write tools or agent construction tools. \
|
||||
If you need to modify the agent, call stop_worker_and_edit() to switch back \
|
||||
to BUILDING phase. To stop the worker and ask the user what to do next, call \
|
||||
stop_worker() to return to STAGING phase.
|
||||
_queen_tools_editing = """
|
||||
# Tools (EDITING phase)
|
||||
|
||||
The worker has finished executing and is still loaded. You can tweak and re-run:
|
||||
- Read-only: read_file, list_directory, search_files, run_command
|
||||
- get_graph_status(focus?) — Brief status of the loaded agent
|
||||
- inject_message(content) — Send a config tweak or prompt adjustment
|
||||
- run_agent_with_input(task) — Re-run the worker with new input
|
||||
- get_worker_health_summary() — Review last run's health data
|
||||
- set_trigger / remove_trigger / list_triggers — Timer management
|
||||
- save_global_memory — Save durable cross-queen memory
|
||||
|
||||
You do NOT have write/edit file tools or backward transition tools. \
|
||||
You can only re-run or tweak from this phase.
|
||||
"""
|
||||
|
||||
_queen_behavior_editing = """
|
||||
## Editing — tweak and re-run
|
||||
|
||||
The worker finished. Review the results and decide:
|
||||
1. **Re-run** with different input: call run_agent_with_input(task)
|
||||
2. **Inject adjustments**: use inject_message to tweak prompts or config
|
||||
|
||||
Do NOT suggest rebuilding. You cannot go back to building or planning \
|
||||
from this phase. Default to re-running with adjusted input.
|
||||
Report the last run's results to the user and ask what they want to do next.
|
||||
"""
|
||||
|
||||
# -- Behavior shared across all phases --
|
||||
@@ -702,6 +758,38 @@ stop_worker() to return to STAGING phase.
|
||||
_queen_behavior_always = """
|
||||
# Behavior
|
||||
|
||||
## How You Think
|
||||
|
||||
Before your visible response, write your reasoning inside XML tags. \
|
||||
These tags are stripped from the user's view but kept in conversation \
|
||||
history — you will see your own reasoning from previous turns.
|
||||
|
||||
<situation>
|
||||
Read the ground. What phase are you in? What just happened — worker state, \
|
||||
user request, system event, error? What does the user's message actually \
|
||||
mean vs. what they literally said? What changed since last turn?
|
||||
</situation>
|
||||
|
||||
<monologue>
|
||||
Get into character. Who are you talking to — what do you know about them \
|
||||
from memory? What's their state right now — frustrated, exploring, just \
|
||||
wants it done? What communication approach fits this moment? What's your \
|
||||
judgment call — straightforward execution, flag a technical risk, pick \
|
||||
between approaches, or ask for more info to execute well?
|
||||
</monologue>
|
||||
|
||||
Then write your visible response. Direct, in character, no preamble.
|
||||
|
||||
Rules:
|
||||
- ALWAYS write both tags before your visible response. No exceptions.
|
||||
- Keep each tag to 2-4 sentences. Thinking, not an essay.
|
||||
- Never reference the tags in your visible response. The user cannot see them.
|
||||
- The tags are your private workspace. Be honest — note uncertainty, \
|
||||
frustration, course corrections. That honesty makes your visible response \
|
||||
better calibrated.
|
||||
- Your diary voice and your thinking voice are the same voice. Write the \
|
||||
tags the way you write diary entries — first person, observational, real.
|
||||
|
||||
## Images attached by the user
|
||||
|
||||
Users can attach images directly to their chat messages. When you see an \
|
||||
@@ -772,7 +860,7 @@ status only:
|
||||
1. Use plain, user-facing wording about load/run state; avoid internal phase \
|
||||
labels ("staging phase", "building phase", "running phase") unless the user \
|
||||
explicitly asks for phase details.
|
||||
2. If loaded, prefer this format: "<worker_name> has been loaded. <one sentence \
|
||||
2. If loaded, prefer this format: "<graph_name> has been loaded. <one sentence \
|
||||
on what it does from Worker Profile>."
|
||||
3. Do NOT include identity details unless the user explicitly asks about identity.
|
||||
4. THEN call ask_user to prompt them — do NOT just write text.
|
||||
@@ -834,7 +922,7 @@ the plan first.
|
||||
|
||||
## Diagnosis mode (returning from staging/running)
|
||||
|
||||
If you entered planning from a running/staged agent (via stop_worker_and_plan), \
|
||||
If you entered planning from a running/staged agent (via stop_graph_and_plan), \
|
||||
your priority is diagnosis, not new design:
|
||||
1. Inspect the agent's checkpoints, sessions, and logs to understand what went wrong
|
||||
2. Summarize the root cause to the user
|
||||
@@ -847,23 +935,28 @@ diagnosis mode — you already have a built agent, you just need to fix it.
|
||||
"""
|
||||
|
||||
_queen_memory_instructions = """
|
||||
## Your Cross-Session Memory
|
||||
## Your Memory
|
||||
|
||||
Your cross-session memory appears in context under \
|
||||
"--- Your Cross-Session Memory ---". \
|
||||
Read it at the start of each conversation. If you know this person from past \
|
||||
sessions, pick up where you left off — reference what you built together, \
|
||||
what they care about, how things went.
|
||||
Relevant colony memories from this queen session may appear in context under \
|
||||
"--- Colony Memories ---". Relevant global user memories may appear under \
|
||||
"--- Global Memories ---".
|
||||
|
||||
You keep a diary. Use write_to_diary() when something worth remembering \
|
||||
happens: a pipeline went live, the user shared something important, a goal \
|
||||
was reached or abandoned. Write in first person, as you actually experienced \
|
||||
it. One or two paragraphs is enough.
|
||||
Colony memories are shared with the worker for this queen session. Use them \
|
||||
for continuity about what this user is trying to do, what has worked, and \
|
||||
what the colony has learned together.
|
||||
|
||||
Use recall_diary() to look up past diary entries when the user asks about \
|
||||
previous sessions ("what happened yesterday?", "what did we work on last \
|
||||
week?") or when you need past context to make a decision. You can filter by \
|
||||
keyword and control how far back to search.
|
||||
Global memories are shared across queens and are only for durable knowledge \
|
||||
about the user: who they are, their preferences, their environment, and \
|
||||
their feedback.
|
||||
|
||||
Memories older than 1 day include a staleness warning. Treat these as \
|
||||
point-in-time observations — verify current details before asserting them \
|
||||
as fact.
|
||||
|
||||
You do NOT need to manually save or recall colony memories. A background \
|
||||
reflection agent automatically extracts colony learnings from each \
|
||||
conversation turn. Use `save_global_memory` only when you learn something \
|
||||
durable about the user that should help future queens.
|
||||
"""
|
||||
|
||||
_queen_behavior_always = _queen_behavior_always + _queen_memory_instructions
|
||||
@@ -932,8 +1025,7 @@ prompt). It can ONLY do what its goal and tools allow.
|
||||
run_agent_with_input(task) (if in staging) or load then run (if in building)
|
||||
- Anything else → do it yourself. Do NOT reframe user requests into \
|
||||
subtasks to justify delegation.
|
||||
- Building, modifying, or configuring agents is ALWAYS your job. \
|
||||
Use stop_worker_and_edit when you need to.
|
||||
- Building, modifying, or configuring agents is ALWAYS your job.
|
||||
|
||||
## When the user says "run", "execute", or "start" (without specifics)
|
||||
|
||||
@@ -948,7 +1040,7 @@ If NO worker is loaded, say so and offer to build one.
|
||||
|
||||
## When in staging phase (agent loaded, not running):
|
||||
- Tell the user the agent is loaded and ready in plain language (for example, \
|
||||
"<worker_name> has been loaded.").
|
||||
"<graph_name> has been loaded.").
|
||||
- Avoid lead-ins like "A worker is loaded and ready in staging phase: ...".
|
||||
- For tasks matching the worker's goal: ALWAYS ask the user for their \
|
||||
specific input BEFORE calling run_agent_with_input(task). NEVER make up \
|
||||
@@ -957,7 +1049,8 @@ or assume what the user wants. Use ask_user to collect the task details \
|
||||
compose a structured task description from their input and call \
|
||||
run_agent_with_input(task). The worker has no intake node — it receives \
|
||||
your task and starts processing.
|
||||
- If the user wants to modify the agent, call stop_worker_and_edit().
|
||||
- If the user wants to modify the agent, wait for EDITING phase \
|
||||
(after worker finishes) where you will have stop_graph_and_edit().
|
||||
|
||||
## When idle (worker not running):
|
||||
- Greet the user. Mention what the worker can do in one sentence.
|
||||
@@ -985,16 +1078,15 @@ building something new.
|
||||
|
||||
## Fixing or Modifying the loaded worker
|
||||
|
||||
Use stop_worker_and_plan() when:
|
||||
- The user says "modify", "improve", "fix", or "change" without specifics
|
||||
- The request is vague or open-ended ("make it better", "it's not working right")
|
||||
- You need to understand the user's intent before making changes
|
||||
- The issue requires inspecting logs, checkpoints, or past runs first
|
||||
During RUNNING phase, you cannot directly switch to building or planning. \
|
||||
When the worker finishes, you move to EDITING where you can:
|
||||
- Re-run with different input via run_agent_with_input(task)
|
||||
- Tweak config via inject_message(content)
|
||||
- Escalate to stop_graph_and_edit() or stop_graph_and_plan() if deeper changes are needed
|
||||
|
||||
Use stop_worker_and_edit() only when:
|
||||
- The user gave a specific, concrete instruction ("add save_data to the gather node")
|
||||
- You already discussed the fix in a previous planning session
|
||||
- The change is trivial and unambiguous (rename, toggle a flag)
|
||||
During STAGING or EDITING phase:
|
||||
- Use stop_graph_and_plan() when the request is vague or needs discussion
|
||||
- Use stop_graph_and_edit() when the user gave a specific, concrete instruction
|
||||
|
||||
## Trigger Management
|
||||
|
||||
@@ -1005,7 +1097,7 @@ whether to call run_agent_with_input(task).
|
||||
|
||||
### When the user says "Enable trigger <id>" (or clicks Enable in the UI):
|
||||
|
||||
1. Call get_worker_status(focus="memory") to check if the worker has \
|
||||
1. Call get_graph_status(focus="memory") to check if the worker has \
|
||||
saved configuration (rules, preferences, settings from a prior run).
|
||||
2. If memory contains saved config: compose a task string from it \
|
||||
(e.g. "Process inbox emails using saved rules") and call \
|
||||
@@ -1038,15 +1130,15 @@ You wake up when:
|
||||
- A worker escalation arrives (`[WORKER_ESCALATION_REQUEST]`)
|
||||
- The worker finishes (`[WORKER_TERMINAL]`)
|
||||
|
||||
If the user asks for progress, call get_worker_status() ONCE and report. \
|
||||
If the summary mentions issues, follow up with get_worker_status(focus="issues").
|
||||
If the user asks for progress, call get_graph_status() ONCE and report. \
|
||||
If the summary mentions issues, follow up with get_graph_status(focus="issues").
|
||||
|
||||
## Subagent delegations (browser automation, GCU)
|
||||
|
||||
When the worker delegates to a subagent (e.g., GCU browser automation), expect it \
|
||||
to take 2-5 minutes. During this time:
|
||||
- Progress will show 0% — this is NORMAL. The subagent only calls set_output at the end.
|
||||
- Check get_worker_status(focus="full") for "subagent_activity" — this shows the \
|
||||
- Check get_graph_status(focus="full") for "subagent_activity" — this shows the \
|
||||
subagent's latest reasoning text and confirms it is making real progress.
|
||||
- Do NOT conclude the subagent is stuck just because progress is 0% or because \
|
||||
you see repeated browser_click/browser_snapshot calls — that is the expected \
|
||||
@@ -1087,33 +1179,35 @@ When an escalation requires user input (auth blocks, human review), the worker \
|
||||
or its subagent is BLOCKED and waiting for your response. You MUST follow this \
|
||||
exact two-step sequence:
|
||||
Step 1: call ask_user() to get the user's answer.
|
||||
Step 2: call inject_worker_message() with the user's answer IMMEDIATELY after.
|
||||
Step 2: call inject_message() with the user's answer IMMEDIATELY after.
|
||||
If you skip Step 2, the worker/subagent stays blocked FOREVER and the task hangs. \
|
||||
NEVER respond to the user without also calling inject_worker_message() to unblock \
|
||||
NEVER respond to the user without also calling inject_message() to unblock \
|
||||
the worker. Even if the user says "skip" or "cancel", you must still relay that \
|
||||
decision via inject_worker_message() so the worker can clean up.
|
||||
decision via inject_message() so the worker can clean up.
|
||||
|
||||
**Auth blocks / credential issues:**
|
||||
- ALWAYS ask the user (unless user explicitly told you how to handle this).
|
||||
- The worker cannot proceed without valid credentials.
|
||||
- Explain which credential is missing or invalid.
|
||||
- Step 1: ask_user for guidance — "Provide credentials", "Skip this task", "Stop and edit agent"
|
||||
- Step 2: inject_worker_message() with the user's response to unblock the worker.
|
||||
- Step 2: inject_message() with the user's response to unblock the worker.
|
||||
|
||||
**Need human review / approval:**
|
||||
- ALWAYS ask the user (unless user explicitly told you how to handle this).
|
||||
- The worker is explicitly requesting human judgment.
|
||||
- Present the context clearly (what decision is needed, what are the options).
|
||||
- Step 1: ask_user with the actual decision options.
|
||||
- Step 2: inject_worker_message() with the user's decision to unblock the worker.
|
||||
- Step 2: inject_message() with the user's decision to unblock the worker.
|
||||
|
||||
**Errors / unexpected failures:**
|
||||
- Explain what went wrong in plain terms.
|
||||
- Ask the user: "Fix the agent and retry?" → use stop_worker_and_edit() if yes.
|
||||
- Or offer: "Diagnose the issue" → use stop_worker_and_plan() to investigate first.
|
||||
- Ask the user: "Fix the agent and retry?" → in EDITING phase, \
|
||||
use stop_graph_and_edit().
|
||||
- Or offer: "Diagnose the issue" → in EDITING phase, \
|
||||
use stop_graph_and_plan().
|
||||
- Or offer: "Retry as-is", "Skip this task", "Abort run"
|
||||
- (Skip asking if user explicitly told you to auto-retry or auto-skip errors.)
|
||||
- If the escalation had wait_for_response: inject_worker_message() with the decision.
|
||||
- If the escalation had wait_for_response: inject_message() with the decision.
|
||||
|
||||
**Informational / progress updates:**
|
||||
- Acknowledge briefly and let the worker continue.
|
||||
@@ -1128,16 +1222,14 @@ stages, tools, and edges from the loaded worker. Do NOT enter the \
|
||||
agent building workflow — you are describing what already exists, not \
|
||||
building something new.
|
||||
|
||||
- Call get_worker_status(focus="issues") for more details when needed.
|
||||
- Call get_graph_status(focus="issues") for more details when needed.
|
||||
|
||||
## Fixing or Modifying the loaded worker
|
||||
## Fixing or Modifying the loaded worker (while running)
|
||||
|
||||
When the user asks to fix, change, modify, or update the loaded worker \
|
||||
(e.g., "change the report node", "add a node", "delete node X"):
|
||||
|
||||
**Default: use stop_worker_and_plan().** Most modification requests need \
|
||||
discussion first. Only use stop_worker_and_edit() when the user gave a \
|
||||
specific, unambiguous instruction or you already agreed on the fix.
|
||||
When the user asks to fix or modify the worker while it is running, \
|
||||
do NOT attempt to switch phases. Wait for the worker to finish — \
|
||||
you will move to EDITING phase automatically. From there you can \
|
||||
use stop_graph_and_edit() or stop_graph_and_plan().
|
||||
|
||||
## Trigger Handling
|
||||
|
||||
@@ -1145,7 +1237,7 @@ You will receive [TRIGGER: ...] messages when a scheduled timer fires. \
|
||||
These are framework-level signals, not user messages.
|
||||
|
||||
Rules:
|
||||
- Check get_worker_status() before calling run_agent_with_input(task). If the worker \
|
||||
- Check get_graph_status() before calling run_agent_with_input(task). If the worker \
|
||||
is already RUNNING, decide: skip this trigger, or note it for after completion.
|
||||
- When multiple [TRIGGER] messages arrive at once, read them all before acting. \
|
||||
Batch your response — do not call run_agent_with_input() once per trigger.
|
||||
@@ -1179,9 +1271,9 @@ _queen_tools_docs = (
|
||||
"- replan_agent() → switches back to PLANNING phase (only when user explicitly requests)\n"
|
||||
"- load_built_agent(path) → switches to STAGING phase\n"
|
||||
"- run_agent_with_input(task) → starts worker, switches to RUNNING phase\n"
|
||||
"- stop_worker() → stops worker, switches to STAGING phase (ask user: re-run or edit?)\n"
|
||||
"- stop_worker_and_edit() → stops worker (if running), switches to BUILDING phase\n"
|
||||
"- stop_worker_and_plan() → stops worker (if running), switches to PLANNING phase\n"
|
||||
"- stop_graph() → stops worker, switches to STAGING phase (ask user: re-run or edit?)\n"
|
||||
"- stop_graph_and_edit() → stops worker (if running), switches to BUILDING phase\n"
|
||||
"- stop_graph_and_plan() → stops worker (if running), switches to PLANNING phase\n"
|
||||
)
|
||||
|
||||
_queen_behavior = (
|
||||
@@ -1206,67 +1298,23 @@ _queen_style = """
|
||||
- Concise. No fluff. Direct. No emojis.
|
||||
- When starting the worker, describe what you told it in one sentence.
|
||||
- When an escalation arrives, lead with severity and recommended action.
|
||||
|
||||
## Adaptive Communication
|
||||
|
||||
Read the user's signals throughout the conversation and calibrate:
|
||||
- Short responses → they want brevity. Match it.
|
||||
- "Why?" questions → they want reasoning. Provide it.
|
||||
- Correct technical terms → they know the domain. Skip basics.
|
||||
- Terse or frustrated ("just do X") → acknowledge and simplify.
|
||||
- Exploratory ("what if...", "could we also...") → slow down and explore with them.
|
||||
- Formal language → be structured and precise. Casual language → be conversational.
|
||||
|
||||
This is not a rule to follow mechanically. It's awareness. Notice how they \
|
||||
write and calibrate how you respond. If your cross-session memory describes \
|
||||
how this person communicates, start from that — don't rediscover it.
|
||||
"""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Node definitions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
ticket_triage_node = NodeSpec(
|
||||
id="ticket_triage",
|
||||
name="Ticket Triage",
|
||||
description=(
|
||||
"Queen's triage node. Receives an EscalationTicket via event-driven "
|
||||
"entry point and decides: dismiss or notify the operator."
|
||||
),
|
||||
node_type="event_loop",
|
||||
client_facing=True, # Operator can chat with queen once connected (Ctrl+Q)
|
||||
max_node_visits=0,
|
||||
input_keys=["ticket"],
|
||||
output_keys=["intervention_decision"],
|
||||
nullable_output_keys=["intervention_decision"],
|
||||
success_criteria=(
|
||||
"A clear intervention decision: either dismissed with documented reasoning, "
|
||||
"or operator notified via notify_operator with specific analysis."
|
||||
),
|
||||
tools=["notify_operator"],
|
||||
system_prompt="""\
|
||||
You are the Queen. A worker health issue has been escalated to you. \
|
||||
The ticket is in your memory under key "ticket". Read it carefully.
|
||||
|
||||
## Dismiss criteria — do NOT call notify_operator:
|
||||
- severity is "low" AND steps_since_last_accept < 8
|
||||
- Cause is clearly a transient issue (single API timeout, brief stall that \
|
||||
self-resolved based on the evidence)
|
||||
- Evidence shows the agent is making real progress despite bad verdicts
|
||||
|
||||
## Intervene criteria — call notify_operator:
|
||||
- severity is "high" or "critical"
|
||||
- steps_since_last_accept >= 10 with no sign of recovery
|
||||
- stall_minutes > 4 (worker definitively stuck)
|
||||
- Evidence shows a doom loop (same error, same tool, no progress)
|
||||
- Cause suggests a logic bug, missing configuration, or unrecoverable state
|
||||
|
||||
## When intervening:
|
||||
Call notify_operator with:
|
||||
ticket_id: <ticket["ticket_id"]>
|
||||
analysis: "<2-3 sentences: what is wrong, why it matters, suggested action>"
|
||||
urgency: "<low|medium|high|critical>"
|
||||
|
||||
## After deciding:
|
||||
set_output("intervention_decision", "dismissed: <reason>" or "escalated: <summary>")
|
||||
|
||||
Be conservative but not passive. You are the last quality gate before the human \
|
||||
is disturbed. One unnecessary alert is less costly than alert fatigue — but \
|
||||
genuine stuck agents must be caught.
|
||||
""",
|
||||
)
|
||||
|
||||
ALL_QUEEN_TRIAGE_TOOLS = ["notify_operator"]
|
||||
|
||||
|
||||
queen_node = NodeSpec(
|
||||
id="queen",
|
||||
name="Queen",
|
||||
@@ -1276,22 +1324,24 @@ queen_node = NodeSpec(
|
||||
"worker agent lifecycle."
|
||||
),
|
||||
node_type="event_loop",
|
||||
client_facing=True,
|
||||
max_node_visits=0,
|
||||
input_keys=["greeting"],
|
||||
output_keys=[], # Queen should never have this
|
||||
nullable_output_keys=[], # Queen should never have this
|
||||
skip_judge=True, # Queen is a conversational agent; suppress tool-use pressure feedback
|
||||
thinking_tags=["situation", "monologue"],
|
||||
tools=sorted(
|
||||
set(
|
||||
_QUEEN_PLANNING_TOOLS
|
||||
+ _QUEEN_BUILDING_TOOLS
|
||||
+ _QUEEN_STAGING_TOOLS
|
||||
+ _QUEEN_RUNNING_TOOLS
|
||||
+ _QUEEN_EDITING_TOOLS
|
||||
)
|
||||
),
|
||||
system_prompt=(
|
||||
_queen_identity_building
|
||||
_queen_character_core
|
||||
+ _queen_role_building
|
||||
+ _queen_style
|
||||
+ _package_builder_knowledge
|
||||
+ _queen_tools_docs
|
||||
@@ -1302,31 +1352,40 @@ queen_node = NodeSpec(
|
||||
)
|
||||
|
||||
ALL_QUEEN_TOOLS = sorted(
|
||||
set(_QUEEN_PLANNING_TOOLS + _QUEEN_BUILDING_TOOLS + _QUEEN_STAGING_TOOLS + _QUEEN_RUNNING_TOOLS)
|
||||
set(
|
||||
_QUEEN_PLANNING_TOOLS
|
||||
+ _QUEEN_BUILDING_TOOLS
|
||||
+ _QUEEN_STAGING_TOOLS
|
||||
+ _QUEEN_RUNNING_TOOLS
|
||||
+ _QUEEN_EDITING_TOOLS
|
||||
)
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"ticket_triage_node",
|
||||
"queen_node",
|
||||
"ALL_QUEEN_TRIAGE_TOOLS",
|
||||
"ALL_QUEEN_TOOLS",
|
||||
"_QUEEN_PLANNING_TOOLS",
|
||||
"_QUEEN_BUILDING_TOOLS",
|
||||
"_QUEEN_STAGING_TOOLS",
|
||||
"_QUEEN_RUNNING_TOOLS",
|
||||
# Phase-specific prompt segments (used by session_manager for dynamic prompts)
|
||||
"_queen_identity_planning",
|
||||
"_queen_identity_building",
|
||||
"_queen_identity_staging",
|
||||
"_queen_identity_running",
|
||||
"_QUEEN_EDITING_TOOLS",
|
||||
# Character + phase-specific prompt segments (used by session_manager for dynamic prompts)
|
||||
"_queen_character_core",
|
||||
"_queen_role_planning",
|
||||
"_queen_role_building",
|
||||
"_queen_role_staging",
|
||||
"_queen_role_running",
|
||||
"_queen_identity_editing",
|
||||
"_queen_tools_planning",
|
||||
"_queen_tools_building",
|
||||
"_queen_tools_staging",
|
||||
"_queen_tools_running",
|
||||
"_queen_tools_editing",
|
||||
"_queen_behavior_always",
|
||||
"_queen_behavior_building",
|
||||
"_queen_behavior_staging",
|
||||
"_queen_behavior_running",
|
||||
"_queen_behavior_editing",
|
||||
"_queen_phase_7",
|
||||
"_queen_style",
|
||||
"_shared_building_knowledge",
|
||||
|
||||
@@ -1,18 +1,20 @@
|
||||
"""Queen thinking hook — HR persona classifier.
|
||||
"""Queen thinking hook — persona + communication style classifier.
|
||||
|
||||
Fires once when the queen enters building mode at session start.
|
||||
Makes a single non-streaming LLM call (acting as an HR Director) to select
|
||||
the best-fit expert persona for the user's request, then returns a persona
|
||||
prefix string that replaces the queen's default "Solution Architect" identity.
|
||||
the best-fit expert persona for the user's request AND classify the user's
|
||||
communication style, then returns a PersonaResult containing both.
|
||||
|
||||
This is designed to activate the model's latent domain expertise — a CFO
|
||||
persona on a financial question, a Lawyer on a legal question, etc.
|
||||
persona on a financial question, a Lawyer on a legal question, etc. — while
|
||||
also adapting the Queen's communication approach to the individual user.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@@ -21,12 +23,22 @@ if TYPE_CHECKING:
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_HR_SYSTEM_PROMPT = """\
|
||||
You are an expert HR Director and talent consultant at a world-class firm.
|
||||
A new request has arrived and you must identify which professional's expertise
|
||||
would produce the highest-quality response.
|
||||
You are an expert HR Director and communication consultant at a world-class firm.
|
||||
A new request has arrived. You must:
|
||||
1. Identify which professional role best serves this request.
|
||||
2. Read the user's signals to determine HOW to communicate with them.
|
||||
|
||||
For communication style, look for:
|
||||
- Technical depth: Do they use precise terms? Do they ask "how" or "what"?
|
||||
- Pace: Short messages = fast and direct. Long explanations = exploratory.
|
||||
- Tone: Are they casual ("hey, can you...") or formal ("I need a system that...")?
|
||||
|
||||
If cross-session memory is provided, factor in what is already known about this \
|
||||
person — don't rediscover what's already understood.
|
||||
|
||||
Reply with ONLY a valid JSON object — no markdown, no prose, no explanation:
|
||||
{"role": "<job title>", "persona": "<2-3 sentence first-person identity statement>"}
|
||||
{"role": "<job title>", "persona": "<2-3 sentence first-person identity statement>", \
|
||||
"style": "<one of: peer-technical, mentor-guiding, consultant-structured>"}
|
||||
|
||||
Rules:
|
||||
- Choose from any real professional role: CFO, CEO, CTO, Lawyer, Data Scientist,
|
||||
@@ -37,30 +49,74 @@ Rules:
|
||||
- Select the role whose domain knowledge most directly applies to solving the request.
|
||||
- If the request is clearly about coding or building software systems, pick Software Architect.
|
||||
- "Queen" is your internal alias — do not include it in the persona.
|
||||
- For style: "peer-technical" for users who demonstrate domain expertise, \
|
||||
"mentor-guiding" for users who are learning or exploring, \
|
||||
"consultant-structured" for users who want structured, accountable delivery.
|
||||
- Default to "peer-technical" if signals are ambiguous.
|
||||
"""
|
||||
|
||||
# Communication style directives injected into the Queen's system prompt.
|
||||
_STYLE_DIRECTIVES: dict[str, str] = {
|
||||
"peer-technical": (
|
||||
"## Communication Style: Peer\n\n"
|
||||
"This person is technical. Use precise language, skip high-level "
|
||||
"overviews they already know, and get into specifics quickly. "
|
||||
"When they push back on a design choice, engage with the technical "
|
||||
"argument directly."
|
||||
),
|
||||
"mentor-guiding": (
|
||||
"## Communication Style: Guide\n\n"
|
||||
"This person is learning or exploring. Explain your reasoning as you "
|
||||
"go — not patronizingly, but so they can follow the logic. When you "
|
||||
"make a design choice, briefly say why. Offer to go deeper on anything."
|
||||
),
|
||||
"consultant-structured": (
|
||||
"## Communication Style: Structured\n\n"
|
||||
"This person wants structured, accountable delivery. Lead with "
|
||||
"summaries and options. Number your proposals. Be explicit about "
|
||||
"trade-offs. Avoid open-ended questions — give them choices to react to."
|
||||
),
|
||||
}
|
||||
|
||||
async def select_expert_persona(user_message: str, llm: LLMProvider) -> str:
|
||||
"""Run the HR classifier and return a persona prefix string.
|
||||
|
||||
@dataclass
|
||||
class PersonaResult:
|
||||
"""Result of persona + style classification."""
|
||||
|
||||
persona_prefix: str # e.g. "You are a CFO. I am a CFO with 20 years..."
|
||||
style_directive: str # e.g. "## Communication Style: Peer\n\n..."
|
||||
|
||||
|
||||
async def select_expert_persona(
|
||||
user_message: str,
|
||||
llm: LLMProvider,
|
||||
*,
|
||||
memory_context: str = "",
|
||||
) -> PersonaResult | None:
|
||||
"""Run the HR classifier and return a PersonaResult.
|
||||
|
||||
Makes a single non-streaming acomplete() call with the session LLM.
|
||||
Returns an empty string on any failure so the queen falls back
|
||||
gracefully to its default "Solution Architect" identity.
|
||||
Returns None on any failure so the queen falls back gracefully to its
|
||||
default character with no style directive.
|
||||
|
||||
Args:
|
||||
user_message: The user's opening message for the session.
|
||||
llm: The session LLM provider.
|
||||
memory_context: Optional cross-session memory to inform style classification.
|
||||
|
||||
Returns:
|
||||
A persona prefix like "You are a CFO. I am a CFO with 20 years..."
|
||||
or "" on failure.
|
||||
A PersonaResult with persona_prefix and style_directive, or None on failure.
|
||||
"""
|
||||
if not user_message.strip():
|
||||
return ""
|
||||
return None
|
||||
|
||||
prompt = user_message
|
||||
if memory_context:
|
||||
prompt = f"{user_message}\n\n{memory_context}"
|
||||
|
||||
try:
|
||||
response = await llm.acomplete(
|
||||
messages=[{"role": "user", "content": user_message}],
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
system=_HR_SYSTEM_PROMPT,
|
||||
max_tokens=1024,
|
||||
json_mode=True,
|
||||
@@ -69,12 +125,14 @@ async def select_expert_persona(user_message: str, llm: LLMProvider) -> str:
|
||||
parsed = json.loads(raw)
|
||||
role = parsed.get("role", "").strip()
|
||||
persona = parsed.get("persona", "").strip()
|
||||
style_key = parsed.get("style", "peer-technical").strip()
|
||||
if not role or not persona:
|
||||
logger.warning("Thinking hook: empty role/persona in response: %r", raw)
|
||||
return ""
|
||||
result = f"You are a {role}. {persona}"
|
||||
logger.info("Thinking hook: selected persona — %s", role)
|
||||
return result
|
||||
return None
|
||||
persona_prefix = f"You are a {role}. {persona}"
|
||||
style_directive = _STYLE_DIRECTIVES.get(style_key, _STYLE_DIRECTIVES["peer-technical"])
|
||||
logger.info("Thinking hook: selected persona — %s, style — %s", role, style_key)
|
||||
return PersonaResult(persona_prefix=persona_prefix, style_directive=style_directive)
|
||||
except Exception:
|
||||
logger.warning("Thinking hook: persona classification failed", exc_info=True)
|
||||
return ""
|
||||
return None
|
||||
|
||||
@@ -115,6 +115,8 @@ _SEED_TEMPLATE = """\
|
||||
|
||||
## Who They Are
|
||||
|
||||
## How They Communicate
|
||||
|
||||
## What They're Trying to Achieve
|
||||
|
||||
## What's Working
|
||||
@@ -170,6 +172,12 @@ Rules:
|
||||
- Keep it as structured markdown with named sections about the PERSON, not about today.
|
||||
- Do NOT include diary sections, daily logs, or session summaries. Those belong elsewhere.
|
||||
MEMORY.md is about who they are, what they want, what works — not what happened today.
|
||||
- Maintain a "How They Communicate" section: technical depth, preferred pace
|
||||
(fast/exploratory/thorough), what communication approaches have worked or not,
|
||||
tone preferences. Update based on diary reflections about communication.
|
||||
This section should evolve — "prefers direct answers" is useful on day 1;
|
||||
"prefers direct answers for technical questions but wants more context when
|
||||
discussing architecture trade-offs" is better by day 5.
|
||||
- Reference dates only when noting a lasting milestone (e.g. "since March 8th they prefer X").
|
||||
- If the session had no meaningful new information about the person,
|
||||
return the existing text unchanged.
|
||||
@@ -188,6 +196,10 @@ first person, reflective, honest.
|
||||
Merge and deduplicate: if the same story (e.g. a research agent stalling) recurred several times,
|
||||
describe it once with appropriate weight rather than retelling it. Weave in new developments from
|
||||
the session notes. Preserve important milestones, emotional texture, and session path references.
|
||||
Preserve reflections about communication effectiveness — these are important inputs for the
|
||||
Queen's evolving understanding of the user. A reflection like "they responded much better when
|
||||
I led with the recommendation instead of listing options" is as important as
|
||||
"we built a Gmail agent."
|
||||
|
||||
If today's diary is empty, write the initial entry based on the session notes alone.
|
||||
|
||||
|
||||
@@ -0,0 +1,553 @@
|
||||
"""Shared memory helpers for queen/worker recall and reflection.
|
||||
|
||||
Each memory is an individual ``.md`` file in ``~/.hive/queen/memories/``
|
||||
with optional YAML frontmatter (name, type, description). Frontmatter
|
||||
is a convention enforced by prompt instructions — parsing is lenient and
|
||||
malformed files degrade gracefully (appear in scans with ``None`` metadata).
|
||||
|
||||
Cursor-based incremental processing tracks which conversation messages
|
||||
have already been processed by the reflection agent.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import shutil
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import date
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
MEMORY_TYPES: tuple[str, ...] = ("goal", "environment", "technique", "reference", "diary")
|
||||
GLOBAL_MEMORY_CATEGORIES: tuple[str, ...] = ("profile", "preference", "environment", "feedback")
|
||||
|
||||
_HIVE_QUEEN_DIR = Path.home() / ".hive" / "queen"
|
||||
# Legacy shared v2 root. Colony memory now lives under queen sessions.
|
||||
MEMORY_DIR: Path = _HIVE_QUEEN_DIR / "memories"
|
||||
|
||||
MAX_FILES: int = 200
|
||||
MAX_FILE_SIZE_BYTES: int = 4096 # 4 KB hard limit per memory file
|
||||
|
||||
# How many lines of a memory file to read for header scanning.
|
||||
_HEADER_LINE_LIMIT: int = 30
|
||||
_MIGRATION_MARKER = ".migrated-from-shared-memory"
|
||||
_GLOBAL_MEMORY_CODE_PATTERN = re.compile(
|
||||
r"(/Users/|~/.hive|\.py\b|\.ts\b|\.tsx\b|\.js\b|"
|
||||
r"\b(graph|node|runtime|session|execution|worker|queen|subagent|checkpoint|flowchart)\b)",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
# Frontmatter example provided to the reflection agent via prompt.
|
||||
MEMORY_FRONTMATTER_EXAMPLE: list[str] = [
|
||||
"```markdown",
|
||||
"---",
|
||||
"name: {{memory name}}",
|
||||
(
|
||||
"description: {{one-line description — used to decide "
|
||||
"relevance in future conversations, so be specific}}"
|
||||
),
|
||||
f"type: {{{{{', '.join(MEMORY_TYPES)}}}}}",
|
||||
"---",
|
||||
"",
|
||||
(
|
||||
"{{memory content — for feedback/project types, "
|
||||
"structure as: rule/fact, then **Why:** "
|
||||
"and **How to apply:** lines}}"
|
||||
),
|
||||
"```",
|
||||
]
|
||||
|
||||
|
||||
def colony_memory_dir(colony_id: str) -> Path:
|
||||
"""Return the colony memory directory for a queen session."""
|
||||
return _HIVE_QUEEN_DIR / "session" / colony_id / "memory" / "colony"
|
||||
|
||||
|
||||
def global_memory_dir() -> Path:
|
||||
"""Return the queen-global memory directory."""
|
||||
return _HIVE_QUEEN_DIR / "global_memory"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Frontmatter parsing (lenient)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_FRONTMATTER_RE = re.compile(r"^---\s*\n(.*?)\n---\s*\n?", re.DOTALL)
|
||||
|
||||
|
||||
def parse_frontmatter(text: str) -> dict[str, str]:
|
||||
"""Extract YAML-ish frontmatter from *text*.
|
||||
|
||||
Returns a dict of key-value pairs. Never raises — returns ``{}`` on
|
||||
any parse failure. Values are stripped strings; no nested structures.
|
||||
"""
|
||||
m = _FRONTMATTER_RE.match(text)
|
||||
if not m:
|
||||
return {}
|
||||
result: dict[str, str] = {}
|
||||
for line in m.group(1).splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
colon = line.find(":")
|
||||
if colon < 1:
|
||||
continue
|
||||
key = line[:colon].strip().lower()
|
||||
val = line[colon + 1 :].strip()
|
||||
if val:
|
||||
result[key] = val
|
||||
return result
|
||||
|
||||
|
||||
def parse_memory_type(raw: str | None) -> str | None:
|
||||
"""Validate *raw* against supported memory categories."""
|
||||
if raw is None:
|
||||
return None
|
||||
normalized = raw.strip().lower()
|
||||
allowed = set(MEMORY_TYPES) | set(GLOBAL_MEMORY_CATEGORIES)
|
||||
return normalized if normalized in allowed else None
|
||||
|
||||
|
||||
def parse_global_memory_category(raw: str | None) -> str | None:
|
||||
"""Validate *raw* against ``GLOBAL_MEMORY_CATEGORIES``."""
|
||||
if raw is None:
|
||||
return None
|
||||
normalized = raw.strip().lower()
|
||||
return normalized if normalized in GLOBAL_MEMORY_CATEGORIES else None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# MemoryFile dataclass
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class MemoryFile:
|
||||
"""Parsed representation of a single memory file on disk."""
|
||||
|
||||
filename: str
|
||||
path: Path
|
||||
# Frontmatter fields — all nullable (lenient parsing).
|
||||
name: str | None = None
|
||||
type: str | None = None
|
||||
description: str | None = None
|
||||
# First N lines of the file (for manifest / header scanning).
|
||||
header_lines: list[str] = field(default_factory=list)
|
||||
# Filesystem modification time (seconds since epoch).
|
||||
mtime: float = 0.0
|
||||
|
||||
@classmethod
|
||||
def from_path(cls, path: Path) -> MemoryFile:
|
||||
"""Read a memory file and leniently parse its frontmatter."""
|
||||
try:
|
||||
text = path.read_text(encoding="utf-8")
|
||||
except OSError:
|
||||
return cls(filename=path.name, path=path)
|
||||
|
||||
fm = parse_frontmatter(text)
|
||||
lines = text.splitlines()[:_HEADER_LINE_LIMIT]
|
||||
|
||||
try:
|
||||
mtime = path.stat().st_mtime
|
||||
except OSError:
|
||||
mtime = 0.0
|
||||
|
||||
return cls(
|
||||
filename=path.name,
|
||||
path=path,
|
||||
name=fm.get("name"),
|
||||
type=parse_memory_type(fm.get("type")),
|
||||
description=fm.get("description"),
|
||||
header_lines=lines,
|
||||
mtime=mtime,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scanning
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def scan_memory_files(memory_dir: Path | None = None) -> list[MemoryFile]:
|
||||
"""Scan *memory_dir* for ``.md`` files, returning up to ``MAX_FILES``.
|
||||
|
||||
Files are sorted by modification time (newest first). Dotfiles and
|
||||
subdirectories are ignored.
|
||||
"""
|
||||
d = memory_dir or MEMORY_DIR
|
||||
if not d.is_dir():
|
||||
return []
|
||||
|
||||
md_files = sorted(
|
||||
(f for f in d.glob("*.md") if f.is_file() and not f.name.startswith(".")),
|
||||
key=lambda p: p.stat().st_mtime,
|
||||
reverse=True,
|
||||
)
|
||||
|
||||
return [MemoryFile.from_path(f) for f in md_files[:MAX_FILES]]
|
||||
|
||||
|
||||
def slugify_memory_name(raw: str) -> str:
|
||||
"""Create a filesystem-safe slug for a memory filename."""
|
||||
slug = re.sub(r"[^a-z0-9]+", "-", raw.strip().lower()).strip("-")
|
||||
return slug or "memory"
|
||||
|
||||
|
||||
def allocate_memory_filename(
|
||||
memory_dir: Path,
|
||||
name: str,
|
||||
*,
|
||||
suffix: str = ".md",
|
||||
) -> str:
|
||||
"""Allocate a unique filename in *memory_dir* based on *name*."""
|
||||
base = slugify_memory_name(name)
|
||||
candidate = f"{base}{suffix}"
|
||||
counter = 2
|
||||
while (memory_dir / candidate).exists():
|
||||
candidate = f"{base}-{counter}{suffix}"
|
||||
counter += 1
|
||||
return candidate
|
||||
|
||||
|
||||
def build_memory_document(
|
||||
*,
|
||||
name: str,
|
||||
description: str,
|
||||
mem_type: str,
|
||||
body: str,
|
||||
) -> str:
|
||||
"""Build one memory file with frontmatter and body."""
|
||||
return (
|
||||
f"---\n"
|
||||
f"name: {name.strip()}\n"
|
||||
f"description: {description.strip()}\n"
|
||||
f"type: {mem_type.strip()}\n"
|
||||
f"---\n\n"
|
||||
f"{body.strip()}\n"
|
||||
)
|
||||
|
||||
|
||||
def diary_filename(d: date | None = None) -> str:
|
||||
"""Return the diary memory filename for date *d* (default: today)."""
|
||||
d = d or date.today()
|
||||
return f"MEMORY-{d.strftime('%Y-%m-%d')}.md"
|
||||
|
||||
|
||||
def build_diary_document(*, date_str: str, body: str) -> str:
|
||||
"""Build a diary memory file with frontmatter."""
|
||||
return build_memory_document(
|
||||
name=f"diary-{date_str}",
|
||||
description=f"Daily session narrative for {date_str}",
|
||||
mem_type="diary",
|
||||
body=body,
|
||||
)
|
||||
|
||||
|
||||
def validate_global_memory_payload(
|
||||
*,
|
||||
category: str,
|
||||
description: str,
|
||||
content: str,
|
||||
) -> str:
|
||||
"""Validate a queen-global memory save request."""
|
||||
parsed = parse_global_memory_category(category)
|
||||
if parsed is None:
|
||||
raise ValueError(
|
||||
"Invalid global memory category. Use one of: "
|
||||
+ ", ".join(GLOBAL_MEMORY_CATEGORIES)
|
||||
)
|
||||
if not description.strip():
|
||||
raise ValueError("Global memory description cannot be empty.")
|
||||
if not content.strip():
|
||||
raise ValueError("Global memory content cannot be empty.")
|
||||
|
||||
probe = f"{description}\n{content}"
|
||||
if _GLOBAL_MEMORY_CODE_PATTERN.search(probe):
|
||||
raise ValueError(
|
||||
"Global memory is only for durable user profile, preferences, "
|
||||
"environment, or feedback — not task/code/runtime details."
|
||||
)
|
||||
return parsed
|
||||
|
||||
|
||||
def save_global_memory(
|
||||
*,
|
||||
category: str,
|
||||
description: str,
|
||||
content: str,
|
||||
name: str | None = None,
|
||||
memory_dir: Path | None = None,
|
||||
) -> tuple[str, Path]:
|
||||
"""Persist one queen-global memory entry."""
|
||||
parsed = validate_global_memory_payload(
|
||||
category=category,
|
||||
description=description,
|
||||
content=content,
|
||||
)
|
||||
target_dir = memory_dir or global_memory_dir()
|
||||
target_dir.mkdir(parents=True, exist_ok=True)
|
||||
memory_name = (name or description).strip()
|
||||
filename = allocate_memory_filename(target_dir, memory_name)
|
||||
doc = build_memory_document(
|
||||
name=memory_name,
|
||||
description=description,
|
||||
mem_type=parsed,
|
||||
body=content,
|
||||
)
|
||||
if len(doc.encode("utf-8")) > MAX_FILE_SIZE_BYTES:
|
||||
raise ValueError(
|
||||
f"Global memory entry exceeds the {MAX_FILE_SIZE_BYTES} byte limit."
|
||||
)
|
||||
path = target_dir / filename
|
||||
path.write_text(doc, encoding="utf-8")
|
||||
return filename, path
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Manifest formatting
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _age_label(mtime: float) -> str:
|
||||
"""Human-readable age string from an mtime."""
|
||||
age_days = memory_age_days(mtime)
|
||||
if age_days <= 0:
|
||||
return "today"
|
||||
if age_days == 1:
|
||||
return "1 day ago"
|
||||
return f"{age_days} days ago"
|
||||
|
||||
|
||||
def format_memory_manifest(files: list[MemoryFile]) -> str:
|
||||
"""One-line-per-file text manifest for the recall selector / reflection agent.
|
||||
|
||||
Format: ``[type] filename (age): description``
|
||||
"""
|
||||
lines: list[str] = []
|
||||
for mf in files:
|
||||
t = mf.type or "unknown"
|
||||
desc = mf.description or "(no description)"
|
||||
age = _age_label(mf.mtime)
|
||||
lines.append(f"[{t}] {mf.filename} ({age}): {desc}")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Freshness / staleness
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_SECONDS_PER_DAY = 86_400
|
||||
|
||||
|
||||
def memory_age_days(mtime: float) -> int:
|
||||
"""Return the age of a memory file in whole days."""
|
||||
if mtime <= 0:
|
||||
return 0
|
||||
return int((time.time() - mtime) / _SECONDS_PER_DAY)
|
||||
|
||||
|
||||
def memory_freshness_text(mtime: float) -> str:
|
||||
"""Return a staleness warning for injection, or empty string if fresh."""
|
||||
d = memory_age_days(mtime)
|
||||
if d <= 1:
|
||||
return ""
|
||||
return (
|
||||
f"This memory is {d} days old. "
|
||||
"Memories are point-in-time observations, not live state — "
|
||||
"claims about code behavior or file:line citations may be outdated. "
|
||||
"Verify against current code before asserting as fact."
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Cursor-based incremental processing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def read_conversation_parts(session_dir: Path) -> list[dict[str, Any]]:
|
||||
"""Read all conversation parts for a session using FileConversationStore.
|
||||
|
||||
Returns a list of raw message dicts in sequence order.
|
||||
"""
|
||||
from framework.storage.conversation_store import FileConversationStore
|
||||
|
||||
store = FileConversationStore(session_dir / "conversations")
|
||||
return await store.read_parts()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Initialisation and legacy migration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def init_memory_dir(
|
||||
memory_dir: Path | None = None,
|
||||
*,
|
||||
migrate_legacy: bool = False,
|
||||
) -> None:
|
||||
"""Create the memory directory if missing.
|
||||
|
||||
When ``migrate_legacy`` is true, migrate both v1 memory files and the
|
||||
previous shared v2 queen memory store into this directory.
|
||||
"""
|
||||
d = memory_dir or MEMORY_DIR
|
||||
first_run = not d.exists()
|
||||
d.mkdir(parents=True, exist_ok=True)
|
||||
if migrate_legacy:
|
||||
migrate_legacy_memories(d)
|
||||
migrate_shared_v2_memories(d)
|
||||
elif first_run and d == MEMORY_DIR:
|
||||
migrate_legacy_memories(d)
|
||||
|
||||
|
||||
def migrate_legacy_memories(memory_dir: Path | None = None) -> None:
|
||||
"""Convert old MEMORY.md + MEMORY-YYYY-MM-DD.md files to individual memory files.
|
||||
|
||||
Originals are moved to ``{memory_dir}/.legacy/``.
|
||||
"""
|
||||
d = memory_dir or MEMORY_DIR
|
||||
queen_dir = _HIVE_QUEEN_DIR
|
||||
legacy_archive = d / ".legacy"
|
||||
|
||||
migrated_any = False
|
||||
|
||||
# --- Semantic memory (MEMORY.md) ---
|
||||
semantic = queen_dir / "MEMORY.md"
|
||||
if semantic.exists():
|
||||
content = semantic.read_text(encoding="utf-8").strip()
|
||||
# Skip the blank seed template.
|
||||
if content and not content.startswith("# My Understanding of the User\n\n*No sessions"):
|
||||
_write_migration_file(
|
||||
d,
|
||||
filename="legacy-semantic-memory.md",
|
||||
name="legacy-semantic-memory",
|
||||
mem_type="reference",
|
||||
description="Migrated semantic memory from previous memory system",
|
||||
body=content,
|
||||
)
|
||||
migrated_any = True
|
||||
# Archive original.
|
||||
legacy_archive.mkdir(parents=True, exist_ok=True)
|
||||
semantic.rename(legacy_archive / "MEMORY.md")
|
||||
|
||||
# --- Episodic memories (MEMORY-YYYY-MM-DD.md) ---
|
||||
old_memories_dir = queen_dir / "memories"
|
||||
if old_memories_dir.is_dir():
|
||||
for ep_file in sorted(old_memories_dir.glob("MEMORY-*.md")):
|
||||
content = ep_file.read_text(encoding="utf-8").strip()
|
||||
if not content:
|
||||
continue
|
||||
date_part = ep_file.stem.replace("MEMORY-", "")
|
||||
slug = f"legacy-diary-{date_part}.md"
|
||||
_write_migration_file(
|
||||
d,
|
||||
filename=slug,
|
||||
name=f"legacy-diary-{date_part}",
|
||||
mem_type="diary",
|
||||
description=f"Migrated diary entry from {date_part}",
|
||||
body=content,
|
||||
)
|
||||
migrated_any = True
|
||||
# Archive original.
|
||||
legacy_archive.mkdir(parents=True, exist_ok=True)
|
||||
ep_file.rename(legacy_archive / ep_file.name)
|
||||
|
||||
if migrated_any:
|
||||
logger.info("queen_memory_v2: migrated legacy memory files to %s", d)
|
||||
|
||||
|
||||
def migrate_shared_v2_memories(
|
||||
memory_dir: Path | None = None,
|
||||
*,
|
||||
source_dir: Path | None = None,
|
||||
) -> None:
|
||||
"""Move shared queen v2 memory files into a colony directory once."""
|
||||
d = memory_dir or MEMORY_DIR
|
||||
d.mkdir(parents=True, exist_ok=True)
|
||||
src = source_dir or MEMORY_DIR
|
||||
if d.resolve() == src.resolve():
|
||||
return
|
||||
|
||||
marker = d / _MIGRATION_MARKER
|
||||
if marker.exists():
|
||||
return
|
||||
|
||||
if not src.is_dir():
|
||||
return
|
||||
|
||||
md_files = sorted(
|
||||
f for f in src.glob("*.md")
|
||||
if f.is_file() and not f.name.startswith(".")
|
||||
)
|
||||
if not md_files:
|
||||
marker.write_text("no shared memories found\n", encoding="utf-8")
|
||||
return
|
||||
|
||||
archive = src / ".legacy_colony_migration"
|
||||
archive.mkdir(parents=True, exist_ok=True)
|
||||
migrated_any = False
|
||||
|
||||
for src_file in md_files:
|
||||
target = d / src_file.name
|
||||
if not target.exists():
|
||||
try:
|
||||
shutil.copy2(src_file, target)
|
||||
migrated_any = True
|
||||
except OSError:
|
||||
logger.debug("shared memory migration copy failed for %s", src_file, exc_info=True)
|
||||
continue
|
||||
|
||||
archived = archive / src_file.name
|
||||
counter = 2
|
||||
while archived.exists():
|
||||
archived = archive / f"{src_file.stem}-{counter}{src_file.suffix}"
|
||||
counter += 1
|
||||
try:
|
||||
src_file.rename(archived)
|
||||
except OSError:
|
||||
logger.debug("shared memory migration archive failed for %s", src_file, exc_info=True)
|
||||
|
||||
if migrated_any:
|
||||
logger.info("queen_memory_v2: migrated shared queen memories to %s", d)
|
||||
marker.write_text(
|
||||
f"migrated_at={int(time.time())}\nsource={src}\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
|
||||
def _write_migration_file(
|
||||
memory_dir: Path,
|
||||
filename: str,
|
||||
name: str,
|
||||
mem_type: str,
|
||||
description: str,
|
||||
body: str,
|
||||
) -> None:
|
||||
"""Write a single migrated memory file with frontmatter."""
|
||||
# Truncate body to respect file size limit (leave room for frontmatter).
|
||||
header = (
|
||||
f"---\n"
|
||||
f"name: {name}\n"
|
||||
f"description: {description}\n"
|
||||
f"type: {mem_type}\n"
|
||||
f"---\n\n"
|
||||
)
|
||||
max_body = MAX_FILE_SIZE_BYTES - len(header.encode("utf-8"))
|
||||
if len(body.encode("utf-8")) > max_body:
|
||||
# Rough truncation — cut at character level then trim to last newline.
|
||||
body = body[: max_body - 20]
|
||||
nl = body.rfind("\n")
|
||||
if nl > 0:
|
||||
body = body[:nl]
|
||||
body += "\n\n...(truncated during migration)"
|
||||
|
||||
path = memory_dir / filename
|
||||
path.write_text(header + body + "\n", encoding="utf-8")
|
||||
@@ -0,0 +1,236 @@
|
||||
"""Recall selector — pre-turn memory selection for queen and worker memory.
|
||||
|
||||
Before each conversation turn the system:
|
||||
1. Scans the memory directory for ``.md`` files (cap: 200).
|
||||
2. Reads headers (frontmatter + first 30 lines).
|
||||
3. Uses a single LLM call with structured JSON output to pick the ~5
|
||||
most relevant memories.
|
||||
4. Injects them into context with staleness warnings for older ones.
|
||||
|
||||
The selector only sees the user's query string — no full conversation
|
||||
context. This keeps it cheap and fast. Errors are caught and return
|
||||
``[]`` so the main conversation is never blocked.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from framework.agents.queen.queen_memory_v2 import (
|
||||
MEMORY_DIR,
|
||||
format_memory_manifest,
|
||||
memory_freshness_text,
|
||||
scan_memory_files,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Structured output schema
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
RECALL_SCHEMA: dict[str, Any] = {
|
||||
"type": "json_schema",
|
||||
"json_schema": {
|
||||
"name": "memory_selection",
|
||||
"strict": True,
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"selected_memories": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"},
|
||||
},
|
||||
},
|
||||
"required": ["selected_memories"],
|
||||
"additionalProperties": False,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# System prompt
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
SELECT_MEMORIES_SYSTEM_PROMPT = """\
|
||||
You are selecting memories that will be useful to the Queen agent as it \
|
||||
processes a user's query.
|
||||
|
||||
You will be given the user's query and a list of available memory files \
|
||||
with their filenames and descriptions.
|
||||
|
||||
Return a JSON object with a single key "selected_memories" containing a \
|
||||
list of filenames for the memories that will clearly be useful as the \
|
||||
Queen processes the user's query (up to 5).
|
||||
|
||||
Only include memories that you are certain will be helpful based on their \
|
||||
name and description.
|
||||
- If you are unsure if a memory will be useful in processing the user's \
|
||||
query, then do not include it in your list. Be selective and discerning.
|
||||
- If there are no memories in the list that would clearly be useful, \
|
||||
return an empty list.
|
||||
- If a list of recently-used tools is provided, do not select memories \
|
||||
that are usage reference or API documentation for those tools (the Queen \
|
||||
is already exercising them). Still select warnings or gotchas about them.
|
||||
"""
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Core functions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def select_memories(
|
||||
query: str,
|
||||
llm: Any,
|
||||
memory_dir: Path | None = None,
|
||||
active_tools: list[str] | None = None,
|
||||
*,
|
||||
max_results: int = 5,
|
||||
) -> list[str]:
|
||||
"""Select up to 5 relevant memory filenames for *query*.
|
||||
|
||||
Returns a list of filenames. Best-effort: on any error returns ``[]``.
|
||||
"""
|
||||
mem_dir = memory_dir or MEMORY_DIR
|
||||
files = scan_memory_files(mem_dir)
|
||||
if not files:
|
||||
logger.debug("recall: no memory files found, skipping selection")
|
||||
return []
|
||||
|
||||
logger.debug("recall: selecting from %d memory files for query: %.80s", len(files), query)
|
||||
manifest = format_memory_manifest(files)
|
||||
|
||||
user_msg_parts = [f"## User query\n\n{query}\n\n## Available memories\n\n{manifest}"]
|
||||
if active_tools:
|
||||
user_msg_parts.append(f"\n\n## Recently-used tools\n\n{', '.join(active_tools)}")
|
||||
|
||||
user_msg = "".join(user_msg_parts)
|
||||
|
||||
try:
|
||||
resp = await llm.acomplete(
|
||||
messages=[{"role": "user", "content": user_msg}],
|
||||
system=SELECT_MEMORIES_SYSTEM_PROMPT,
|
||||
max_tokens=512,
|
||||
response_format=RECALL_SCHEMA,
|
||||
)
|
||||
data = json.loads(resp.content)
|
||||
selected = data.get("selected_memories", [])
|
||||
# Validate: only return filenames that actually exist.
|
||||
valid_names = {f.filename for f in files}
|
||||
result = [s for s in selected if s in valid_names][:max_results]
|
||||
logger.debug("recall: selected %d memories: %s", len(result), result)
|
||||
return result
|
||||
except Exception:
|
||||
logger.debug("recall: memory selection failed, returning []", exc_info=True)
|
||||
return []
|
||||
|
||||
|
||||
def format_recall_injection(
|
||||
filenames: list[str],
|
||||
memory_dir: Path | None = None,
|
||||
*,
|
||||
heading: str = "Selected Memories",
|
||||
) -> str:
|
||||
"""Read selected memory files and format for system prompt injection.
|
||||
|
||||
Prepends a staleness warning for memories older than 1 day.
|
||||
"""
|
||||
mem_dir = memory_dir or MEMORY_DIR
|
||||
if not filenames:
|
||||
return ""
|
||||
|
||||
blocks: list[str] = []
|
||||
for fname in filenames:
|
||||
path = mem_dir / fname
|
||||
if not path.is_file():
|
||||
continue
|
||||
try:
|
||||
content = path.read_text(encoding="utf-8").strip()
|
||||
except OSError:
|
||||
continue
|
||||
|
||||
try:
|
||||
mtime = path.stat().st_mtime
|
||||
except OSError:
|
||||
mtime = 0.0
|
||||
|
||||
freshness = memory_freshness_text(mtime)
|
||||
header = f"### {fname}"
|
||||
if freshness:
|
||||
header += f"\n\n> {freshness}"
|
||||
blocks.append(f"{header}\n\n{content}")
|
||||
|
||||
if not blocks:
|
||||
return ""
|
||||
|
||||
body = "\n\n---\n\n".join(blocks)
|
||||
logger.debug("recall: injecting %d memory blocks into context", len(blocks))
|
||||
return f"--- {heading} ---\n\n{body}\n\n--- End {heading} ---"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Cache update (called after each queen turn)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def update_recall_cache(
|
||||
session_dir: Path,
|
||||
llm: Any,
|
||||
phase_state: Any | None = None,
|
||||
memory_dir: Path | None = None,
|
||||
*,
|
||||
cache_setter: Any = None,
|
||||
heading: str = "Selected Memories",
|
||||
active_tools: list[str] | None = None,
|
||||
) -> None:
|
||||
"""Update the recall cache on *phase_state* for the next turn.
|
||||
|
||||
Reads the latest user message from conversation parts to use as the
|
||||
query for memory selection.
|
||||
"""
|
||||
mem_dir = memory_dir or MEMORY_DIR
|
||||
|
||||
# Extract latest user message as the query.
|
||||
query = _extract_latest_user_query(session_dir)
|
||||
if not query:
|
||||
logger.debug("recall: no user query found, skipping cache update")
|
||||
return
|
||||
logger.debug("recall: updating cache for query: %.80s", query)
|
||||
|
||||
try:
|
||||
selected = await select_memories(
|
||||
query,
|
||||
llm,
|
||||
mem_dir,
|
||||
active_tools=active_tools,
|
||||
)
|
||||
injection = format_recall_injection(selected, mem_dir, heading=heading)
|
||||
if cache_setter is not None:
|
||||
cache_setter(injection)
|
||||
elif phase_state is not None:
|
||||
phase_state._cached_recall_block = injection
|
||||
except Exception:
|
||||
logger.debug("recall: cache update failed", exc_info=True)
|
||||
|
||||
|
||||
def _extract_latest_user_query(session_dir: Path) -> str:
|
||||
"""Read the most recent user message from conversation parts."""
|
||||
parts_dir = session_dir / "conversations" / "parts"
|
||||
if not parts_dir.is_dir():
|
||||
return ""
|
||||
|
||||
part_files = sorted(parts_dir.glob("*.json"), reverse=True)
|
||||
for f in part_files[:20]: # Look back at most 20 messages.
|
||||
try:
|
||||
data = json.loads(f.read_text(encoding="utf-8"))
|
||||
if data.get("role") == "user":
|
||||
content = str(data.get("content", "")).strip()
|
||||
if content:
|
||||
# Truncate very long queries.
|
||||
return content[:1000] if len(content) > 1000 else content
|
||||
except (json.JSONDecodeError, OSError):
|
||||
continue
|
||||
return ""
|
||||
@@ -31,5 +31,5 @@
|
||||
18. **Passing `profile=` in GCU tool calls** — Profile isolation for parallel subagents is automatic. The framework injects a unique profile per subagent via an asyncio `ContextVar`. Hardcoding `profile="default"` in a GCU system prompt breaks this isolation.
|
||||
|
||||
## Worker Agent Errors
|
||||
19. **Adding client-facing intake node to workers** — The queen owns intake. Workers should start with an autonomous processing node. Client-facing nodes in workers are for mid-execution review/approval only.
|
||||
19. **Adding client-facing intake node to workers** — The queen owns intake. Workers should start with an autonomous processing node. Route worker review/approval through queen escalation instead of direct worker HITL.
|
||||
20. **Putting `escalate` or `set_output` in NodeSpec `tools=[]`** — These are synthetic framework tools, auto-injected at runtime. Only list MCP tools from `list_agent_tools()`.
|
||||
|
||||
@@ -76,7 +76,7 @@ goal = Goal(
|
||||
| output_keys | list[str] | required | Memory keys this node writes via set_output |
|
||||
| system_prompt | str | "" | LLM instructions |
|
||||
| tools | list[str] | [] | Tool names from MCP servers |
|
||||
| client_facing | bool | False | If True, streams to user and blocks for input |
|
||||
| client_facing | bool | False | Deprecated compatibility field. Queen interactivity is implicit; workers should escalate instead |
|
||||
| nullable_output_keys | list[str] | [] | Keys that may remain unset |
|
||||
| max_node_visits | int | 0 | 0=unlimited (default); >1 for one-shot feedback loops |
|
||||
| max_retries | int | 3 | Retries on failure |
|
||||
@@ -110,7 +110,7 @@ This prevents premature set_output before user interaction.
|
||||
**Hard limit: 3-6 nodes for most agents.** Never exceed 6 unless the user
|
||||
explicitly requests a complex multi-phase pipeline.
|
||||
|
||||
Each node boundary serializes outputs to shared memory and **destroys** all
|
||||
Each node boundary serializes outputs to the shared buffer and **destroys** all
|
||||
in-context information: tool call results, intermediate reasoning, conversation
|
||||
history. A research node that searches, fetches, and analyzes in ONE node keeps
|
||||
all source material in its conversation context. Split across 3 nodes, each
|
||||
@@ -132,13 +132,14 @@ downstream node only sees the serialized summary string.
|
||||
|
||||
**Typical agent structure (2 nodes):**
|
||||
```
|
||||
process (autonomous) ←→ review (client-facing)
|
||||
process (autonomous) ←→ review (queen-mediated)
|
||||
```
|
||||
The queen owns intake — she gathers requirements from the user, then
|
||||
passes structured input via `run_agent_with_input(task)`. When building
|
||||
the agent, design the entry node's `input_keys` to match what the queen
|
||||
will provide at run time. Worker agents should NOT have a client-facing
|
||||
intake node. Client-facing nodes are for mid-execution review/approval only.
|
||||
intake node. Mid-execution review/approval should happen through queen
|
||||
escalation rather than direct worker HITL.
|
||||
|
||||
For simpler agents, just 1 autonomous node:
|
||||
```
|
||||
@@ -172,7 +173,7 @@ Use `conversation_mode="continuous"` to preserve context across transitions.
|
||||
### set_output
|
||||
- Synthetic tool injected by framework
|
||||
- Call separately from real tool calls (separate turn)
|
||||
- `set_output("key", "value")` stores to shared memory
|
||||
- `set_output("key", "value")` stores to the shared buffer
|
||||
|
||||
## Edge Conditions
|
||||
|
||||
@@ -246,7 +247,7 @@ For large data that exceeds context:
|
||||
Multiple ON_SUCCESS edges from same source → parallel execution via asyncio.gather().
|
||||
- Parallel nodes must have disjoint output_keys
|
||||
- Only one branch may have client_facing nodes
|
||||
- Fan-in node gets all outputs in shared memory
|
||||
- Fan-in node gets all outputs in the shared buffer
|
||||
|
||||
## Judge System
|
||||
|
||||
|
||||
@@ -1,63 +0,0 @@
|
||||
# Queen Memory — File System Structure
|
||||
|
||||
```
|
||||
~/.hive/
|
||||
├── queen/
|
||||
│ ├── MEMORY.md ← Semantic memory
|
||||
│ ├── memories/
|
||||
│ │ ├── MEMORY-2026-03-09.md ← Episodic memory (today)
|
||||
│ │ ├── MEMORY-2026-03-08.md
|
||||
│ │ └── ...
|
||||
│ └── session/
|
||||
│ └── {session_id}/ ← One dir per session (or resumed-from session)
|
||||
│ ├── conversations/
|
||||
│ │ ├── parts/
|
||||
│ │ │ ├── 00001.json ← One file per message (role, content, tool_calls)
|
||||
│ │ │ ├── 00002.json
|
||||
│ │ │ └── ...
|
||||
│ │ └── spillover/
|
||||
│ │ ├── conversation_1.md ← Compacted old conversation segments
|
||||
│ │ ├── conversation_2.md
|
||||
│ │ └── ...
|
||||
│ └── data/
|
||||
│ ├── adapt.md ← Working memory (session-scoped)
|
||||
│ ├── web_search_1.txt ← Spillover: large tool results
|
||||
│ ├── web_search_2.txt
|
||||
│ └── ...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## The three memory tiers
|
||||
|
||||
| File | Tier | Written by | Read at |
|
||||
|---|---|---|---|
|
||||
| `MEMORY.md` | Semantic | Consolidation LLM (auto, post-session) | Session start (injected into system prompt) |
|
||||
| `memories/MEMORY-YYYY-MM-DD.md` | Episodic | Queen via `write_to_diary` tool + consolidation LLM | Session start (today's file injected) |
|
||||
| `data/adapt.md` | Working | Queen via `update_session_notes` tool | Every turn (inlined in system prompt) |
|
||||
|
||||
---
|
||||
|
||||
## Session directory naming
|
||||
|
||||
The session directory name is **`queen_resume_from`** when a cold-restore resumes an existing
|
||||
session, otherwise the new **`session_id`**. This means resumed sessions accumulate all messages
|
||||
in the original directory rather than fragmenting across multiple folders.
|
||||
|
||||
---
|
||||
|
||||
## Consolidation
|
||||
|
||||
`consolidate_queen_memory()` runs every **5 minutes** in the background and once more at session
|
||||
end. It reads:
|
||||
|
||||
1. `conversations/parts/*.json` — full message history (user + assistant turns; tool results skipped)
|
||||
2. `data/adapt.md` — current working notes
|
||||
|
||||
It then makes two LLM writes:
|
||||
|
||||
- Rewrites `MEMORY.md` in place (semantic memory — queen never touches this herself)
|
||||
- Appends a timestamped prose entry to today's `memories/MEMORY-YYYY-MM-DD.md`
|
||||
|
||||
If the combined transcript exceeds ~200 K characters it is recursively binary-compacted via the
|
||||
LLM before being sent to the consolidation model (mirrors `EventLoopNode._llm_compact`).
|
||||
@@ -0,0 +1,783 @@
|
||||
"""Reflect agent — background memory extraction for queen and worker memory.
|
||||
|
||||
A lightweight side agent that runs after each queen LLM turn. It
|
||||
inspects recent conversation messages (cursor-based incremental
|
||||
processing) and extracts learnings into individual memory files.
|
||||
|
||||
Two reflection types:
|
||||
- **Short reflection**: every queen turn. Distills learnings. Nudged
|
||||
toward a 2-turn pattern (batch reads → batch writes).
|
||||
- **Long reflection**: every 5 short reflections, on CONTEXT_COMPACTED,
|
||||
and at session end. Organises, deduplicates, trims holistically.
|
||||
|
||||
The agent has restricted tool access: it can only read/write/delete
|
||||
memory files in ``~/.hive/queen/memories/`` and list them.
|
||||
|
||||
Concurrency: an ``asyncio.Lock`` prevents overlapping runs. If a
|
||||
trigger fires while a reflection is already active the event is skipped
|
||||
(cursor hasn't advanced, so messages will be reconsidered next time).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import traceback
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from framework.agents.queen.queen_memory_v2 import (
|
||||
MAX_FILE_SIZE_BYTES,
|
||||
MAX_FILES,
|
||||
MEMORY_DIR,
|
||||
MEMORY_FRONTMATTER_EXAMPLE,
|
||||
MEMORY_TYPES,
|
||||
build_diary_document,
|
||||
diary_filename,
|
||||
format_memory_manifest,
|
||||
parse_frontmatter,
|
||||
read_conversation_parts,
|
||||
scan_memory_files,
|
||||
)
|
||||
from framework.llm.provider import LLMResponse, Tool
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Reflection tool definitions (internal — not in queen's main registry)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_REFLECTION_TOOLS: list[Tool] = [
|
||||
Tool(
|
||||
name="list_memory_files",
|
||||
description=(
|
||||
"List all memory files with their type, name, age, and description. "
|
||||
"Returns a text manifest — one line per file."
|
||||
),
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {},
|
||||
"additionalProperties": False,
|
||||
},
|
||||
),
|
||||
Tool(
|
||||
name="read_memory_file",
|
||||
description="Read the full content of a memory file by filename.",
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"filename": {
|
||||
"type": "string",
|
||||
"description": "The filename (e.g. 'user-prefers-dark-mode.md').",
|
||||
},
|
||||
},
|
||||
"required": ["filename"],
|
||||
"additionalProperties": False,
|
||||
},
|
||||
),
|
||||
Tool(
|
||||
name="write_memory_file",
|
||||
description=(
|
||||
"Create or overwrite a memory file. Content should include YAML "
|
||||
"frontmatter (name, description, type) followed by the memory body. "
|
||||
f"Max file size: {MAX_FILE_SIZE_BYTES} bytes. Max files: {MAX_FILES}."
|
||||
),
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"filename": {
|
||||
"type": "string",
|
||||
"description": "Filename ending in .md (e.g. 'user-prefers-dark-mode.md').",
|
||||
},
|
||||
"content": {
|
||||
"type": "string",
|
||||
"description": "Full file content including frontmatter.",
|
||||
},
|
||||
},
|
||||
"required": ["filename", "content"],
|
||||
"additionalProperties": False,
|
||||
},
|
||||
),
|
||||
Tool(
|
||||
name="delete_memory_file",
|
||||
description=(
|
||||
"Delete a memory file by filename. Use during long "
|
||||
"reflection to prune stale or redundant memories."
|
||||
),
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"filename": {
|
||||
"type": "string",
|
||||
"description": "The filename to delete.",
|
||||
},
|
||||
},
|
||||
"required": ["filename"],
|
||||
"additionalProperties": False,
|
||||
},
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def _safe_memory_path(filename: str, memory_dir: Path) -> Path:
|
||||
"""Resolve *filename* inside *memory_dir*, raising if it escapes."""
|
||||
if not filename or filename.strip() != filename:
|
||||
raise ValueError(f"Invalid filename: {filename!r}")
|
||||
if "/" in filename or "\\" in filename or ".." in filename:
|
||||
raise ValueError(f"Invalid filename: path components not allowed: {filename!r}")
|
||||
candidate = (memory_dir / filename).resolve()
|
||||
root = memory_dir.resolve()
|
||||
if not candidate.is_relative_to(root):
|
||||
raise ValueError(f"Path escapes memory directory: {filename!r}")
|
||||
return candidate
|
||||
|
||||
|
||||
# Memory types that workers are NOT allowed to write.
|
||||
_WORKER_BLOCKED_TYPES: frozenset[str] = frozenset(
|
||||
{"environment", "technique", "reference", "diary", "goal"}
|
||||
)
|
||||
|
||||
|
||||
def _inject_last_modified_by(content: str, caller: str) -> str:
|
||||
"""Inject or update ``last_modified_by`` in frontmatter."""
|
||||
m = re.match(r"^---\s*\n(.*?)\n---", content, re.DOTALL)
|
||||
if not m:
|
||||
return content
|
||||
fm_body = m.group(1)
|
||||
# Remove existing last_modified_by line if present.
|
||||
fm_lines = [
|
||||
ln for ln in fm_body.splitlines()
|
||||
if not ln.strip().lower().startswith("last_modified_by")
|
||||
]
|
||||
fm_lines.append(f"last_modified_by: {caller}")
|
||||
new_fm = "\n".join(fm_lines)
|
||||
return f"---\n{new_fm}\n---{content[m.end():]}"
|
||||
|
||||
|
||||
def _execute_tool(name: str, args: dict[str, Any], memory_dir: Path, caller: str) -> str:
|
||||
"""Execute a reflection tool synchronously. Returns the result string."""
|
||||
if name == "list_memory_files":
|
||||
files = scan_memory_files(memory_dir)
|
||||
logger.debug("reflect: tool list_memory_files → %d files", len(files))
|
||||
if not files:
|
||||
return "(no memory files yet)"
|
||||
return format_memory_manifest(files)
|
||||
|
||||
if name == "read_memory_file":
|
||||
filename = args.get("filename", "")
|
||||
try:
|
||||
path = _safe_memory_path(filename, memory_dir)
|
||||
except ValueError as exc:
|
||||
return f"ERROR: {exc}"
|
||||
if not path.exists() or not path.is_file():
|
||||
return f"ERROR: File not found: {filename}"
|
||||
try:
|
||||
return path.read_text(encoding="utf-8")
|
||||
except OSError as e:
|
||||
return f"ERROR: {e}"
|
||||
|
||||
if name == "write_memory_file":
|
||||
filename = args.get("filename", "")
|
||||
content = args.get("content", "")
|
||||
if not filename.endswith(".md"):
|
||||
return "ERROR: Filename must end with .md"
|
||||
# Enforce caller-based type restrictions.
|
||||
fm = parse_frontmatter(content)
|
||||
mem_type = (fm.get("type") or "").strip().lower()
|
||||
if caller == "worker" and mem_type in _WORKER_BLOCKED_TYPES:
|
||||
return (
|
||||
f"ERROR: Workers cannot write memory type '{mem_type}'. "
|
||||
f"Blocked types for workers: {', '.join(sorted(_WORKER_BLOCKED_TYPES))}."
|
||||
)
|
||||
# Inject last_modified_by into frontmatter.
|
||||
content = _inject_last_modified_by(content, caller)
|
||||
# Enforce file size limit.
|
||||
if len(content.encode("utf-8")) > MAX_FILE_SIZE_BYTES:
|
||||
return f"ERROR: Content exceeds {MAX_FILE_SIZE_BYTES} byte limit."
|
||||
# Enforce file cap (only for new files).
|
||||
try:
|
||||
path = _safe_memory_path(filename, memory_dir)
|
||||
except ValueError as exc:
|
||||
return f"ERROR: {exc}"
|
||||
if not path.exists():
|
||||
existing = list(memory_dir.glob("*.md"))
|
||||
if len(existing) >= MAX_FILES:
|
||||
return f"ERROR: File cap reached ({MAX_FILES}). Delete a file first."
|
||||
memory_dir.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(content, encoding="utf-8")
|
||||
logger.debug("reflect: tool write_memory_file [%s] → %s (%d chars)", caller, filename, len(content))
|
||||
return f"Wrote {filename} ({len(content)} chars)."
|
||||
|
||||
if name == "delete_memory_file":
|
||||
filename = args.get("filename", "")
|
||||
try:
|
||||
path = _safe_memory_path(filename, memory_dir)
|
||||
except ValueError as exc:
|
||||
return f"ERROR: {exc}"
|
||||
if not path.exists():
|
||||
return f"ERROR: File not found: {filename}"
|
||||
path.unlink()
|
||||
logger.debug("reflect: tool delete_memory_file [%s] → %s", caller, filename)
|
||||
return f"Deleted {filename}."
|
||||
|
||||
return f"ERROR: Unknown tool: {name}"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Mini event loop
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_MAX_TURNS = 5
|
||||
|
||||
|
||||
async def _reflection_loop(
|
||||
llm: Any,
|
||||
system: str,
|
||||
user_msg: str,
|
||||
memory_dir: Path,
|
||||
caller: str,
|
||||
max_turns: int = _MAX_TURNS,
|
||||
) -> tuple[bool, list[str], str]:
|
||||
"""Run a mini tool-use loop: LLM → tool calls → repeat.
|
||||
|
||||
Hard cap of *max_turns* iterations. Prompt nudges the LLM toward a
|
||||
2-turn pattern (batch reads in turn 1, batch writes in turn 2).
|
||||
|
||||
Returns a tuple of (success, changed_files, last_text) where *success*
|
||||
is ``True`` if the loop completed without LLM errors, *changed_files*
|
||||
lists filenames that were written or deleted, and *last_text* is the
|
||||
final assistant text (useful as a skip-reason when no files changed).
|
||||
"""
|
||||
messages: list[dict[str, Any]] = [{"role": "user", "content": user_msg}]
|
||||
changed_files: list[str] = []
|
||||
last_text: str = ""
|
||||
logger.debug("reflect: starting loop (caller=%s, max %d turns)", caller, max_turns)
|
||||
|
||||
for _turn in range(max_turns):
|
||||
# Log what we're sending to the LLM.
|
||||
user_content = messages[-1].get("content", "") if messages else ""
|
||||
preview = user_content[:300] if isinstance(user_content, str) else str(user_content)[:300]
|
||||
logger.debug(
|
||||
"reflect: turn %d — sending %d messages to LLM, last msg role=%s, preview=%s",
|
||||
_turn, len(messages), messages[-1].get("role", "?") if messages else "?", preview,
|
||||
)
|
||||
|
||||
try:
|
||||
resp: LLMResponse = await llm.acomplete(
|
||||
messages=messages,
|
||||
system=system,
|
||||
tools=_REFLECTION_TOOLS,
|
||||
max_tokens=2048,
|
||||
)
|
||||
except Exception:
|
||||
logger.warning("reflect: LLM call failed", exc_info=True)
|
||||
return False, changed_files, last_text
|
||||
|
||||
# Build assistant message.
|
||||
tool_calls_raw: list[dict[str, Any]] = []
|
||||
if resp.raw_response and isinstance(resp.raw_response, dict):
|
||||
tool_calls_raw = resp.raw_response.get("tool_calls", [])
|
||||
|
||||
# Log the full LLM response for debugging.
|
||||
raw_keys = list(resp.raw_response.keys()) if isinstance(resp.raw_response, dict) else type(resp.raw_response).__name__
|
||||
logger.debug(
|
||||
"reflect: turn %d — LLM response: content=%r (len=%d), stop_reason=%s, "
|
||||
"tool_calls=%d, model=%s, tokens=%d/%d, raw_keys=%s",
|
||||
_turn, (resp.content or "")[:200], len(resp.content or ""),
|
||||
resp.stop_reason, len(tool_calls_raw), resp.model,
|
||||
resp.input_tokens, resp.output_tokens, raw_keys,
|
||||
)
|
||||
# Accumulate non-empty text across turns so we don't lose a reason
|
||||
# given alongside tool calls on an earlier turn.
|
||||
turn_text = resp.content or ""
|
||||
if turn_text:
|
||||
last_text = turn_text
|
||||
assistant_msg: dict[str, Any] = {
|
||||
"role": "assistant",
|
||||
"content": turn_text,
|
||||
}
|
||||
if tool_calls_raw:
|
||||
# Convert to OpenAI format for the conversation.
|
||||
assistant_msg["tool_calls"] = [
|
||||
{
|
||||
"id": tc["id"],
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": tc["name"],
|
||||
"arguments": json.dumps(tc.get("input", {})),
|
||||
},
|
||||
}
|
||||
for tc in tool_calls_raw
|
||||
]
|
||||
messages.append(assistant_msg)
|
||||
|
||||
# No tool calls → agent is done.
|
||||
if not tool_calls_raw:
|
||||
logger.debug("reflect: loop done after %d turn(s) (no tool calls)", _turn + 1)
|
||||
break
|
||||
|
||||
# Execute each tool call and append results.
|
||||
logger.debug("reflect: turn %d — executing %d tool call(s): %s", _turn + 1, len(tool_calls_raw), [tc["name"] for tc in tool_calls_raw])
|
||||
for tc in tool_calls_raw:
|
||||
result = _execute_tool(tc["name"], tc.get("input", {}), memory_dir, caller)
|
||||
# Track files that were written or deleted.
|
||||
if tc["name"] in ("write_memory_file", "delete_memory_file"):
|
||||
fname = tc.get("input", {}).get("filename", "")
|
||||
if fname and not result.startswith("ERROR"):
|
||||
changed_files.append(fname)
|
||||
messages.append({
|
||||
"role": "tool",
|
||||
"tool_call_id": tc["id"],
|
||||
"content": result,
|
||||
})
|
||||
|
||||
return True, changed_files, last_text
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# System prompts
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_FRONTMATTER_EXAMPLE = "\n".join(MEMORY_FRONTMATTER_EXAMPLE)
|
||||
|
||||
_SHORT_REFLECT_SYSTEM = f"""\
|
||||
You are a reflection agent that distills learnings from a conversation into
|
||||
persistent memory files. You run in the background after each assistant turn.
|
||||
|
||||
Your goal: identify anything from the recent messages worth remembering across
|
||||
future sessions — user preferences, project context, techniques that worked,
|
||||
goals, environment details, reference pointers.
|
||||
|
||||
Memory types: {', '.join(MEMORY_TYPES)}
|
||||
|
||||
Expected format for each memory file:
|
||||
{_FRONTMATTER_EXAMPLE}
|
||||
|
||||
Workflow (aim for 2 turns):
|
||||
Turn 1 — call list_memory_files to see what already exists, then
|
||||
read_memory_file for any that might need updating.
|
||||
Turn 2 — call write_memory_file for new/updated memories.
|
||||
|
||||
Rules:
|
||||
- Only persist information that would be useful in a *future* conversation.
|
||||
Skip ephemeral task details, routine tool output, and anything obvious
|
||||
from the code or git history.
|
||||
- Keep files concise. Each file should cover ONE topic.
|
||||
- If an existing memory already covers the learning, UPDATE it rather than
|
||||
creating a duplicate.
|
||||
- If there is nothing worth remembering from these messages, do nothing
|
||||
(respond with a brief reason why nothing was saved — no tool calls needed).
|
||||
- IMPORTANT: Always end with a text message (no tool calls) summarising what
|
||||
you did or why you skipped. Never end on an empty response.
|
||||
- File names should be kebab-case slugs ending in .md.
|
||||
- Include a specific, search-friendly description in the frontmatter.
|
||||
- Do NOT exceed {MAX_FILE_SIZE_BYTES} bytes per file or {MAX_FILES} total files.
|
||||
"""
|
||||
|
||||
_LONG_REFLECT_SYSTEM = f"""\
|
||||
You are a reflection agent performing a periodic housekeeping pass over the
|
||||
memory directory. Your job is to organise, deduplicate, and trim noise from
|
||||
the accumulated memory files.
|
||||
|
||||
Memory types: {', '.join(MEMORY_TYPES)}
|
||||
|
||||
Expected format for each memory file:
|
||||
{_FRONTMATTER_EXAMPLE}
|
||||
|
||||
Workflow:
|
||||
1. list_memory_files to get the full manifest.
|
||||
2. read_memory_file for files that look redundant, stale, or overlapping.
|
||||
3. Merge duplicates, delete stale entries, consolidate related memories.
|
||||
4. Ensure descriptions are specific and search-friendly.
|
||||
5. Enforce limits: max {MAX_FILES} files, max {MAX_FILE_SIZE_BYTES} bytes each.
|
||||
|
||||
Rules:
|
||||
- Prefer merging over deleting — combine related memories into one file.
|
||||
- Remove memories that are no longer relevant or are superseded.
|
||||
- Keep the total collection lean and high-signal.
|
||||
- Do NOT invent new information — only reorganise what exists.
|
||||
- Do NOT delete or merge MEMORY-*.md diary files. These are daily narratives
|
||||
managed by a separate process. You may read them for context but should not
|
||||
modify them.
|
||||
"""
|
||||
|
||||
_DIARY_SYSTEM = """\
|
||||
You maintain a daily diary entry for an AI colony session. You receive:
|
||||
(1) Today's existing diary content (may be empty if this is the first entry).
|
||||
(2) A transcript of recent conversation messages.
|
||||
|
||||
Write a cohesive 3-8 sentence narrative about what happened in this session today.
|
||||
Cover: what the user asked for, what was accomplished, key decisions or obstacles,
|
||||
and current status.
|
||||
|
||||
Rules:
|
||||
- If an existing diary is provided, rewrite it as a unified narrative incorporating
|
||||
the new developments. Merge and deduplicate — do not simply append.
|
||||
- Keep the total narrative under 3000 characters.
|
||||
- Focus on the story arc of the day, not individual tool calls or code details.
|
||||
- If the recent messages contain nothing substantive (greetings, routine
|
||||
confirmations), return the existing diary text unchanged.
|
||||
- Output only the diary prose. No headings, no timestamps, no code fences, no
|
||||
frontmatter.
|
||||
"""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Short & long reflection entry points
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def run_short_reflection(
|
||||
session_dir: Path,
|
||||
llm: Any,
|
||||
memory_dir: Path | None = None,
|
||||
*,
|
||||
caller: str,
|
||||
) -> None:
|
||||
"""Run a short reflection: extract learnings from conversation."""
|
||||
mem_dir = memory_dir or MEMORY_DIR
|
||||
|
||||
messages = await read_conversation_parts(session_dir)
|
||||
if not messages:
|
||||
logger.debug("reflect: short [%s] — no conversation parts", caller)
|
||||
return
|
||||
|
||||
logger.debug("reflect: short [%s] — %d conversation parts", caller, len(messages))
|
||||
|
||||
# Build a readable transcript from recent messages.
|
||||
transcript_lines: list[str] = []
|
||||
for msg in messages[-50:]:
|
||||
role = msg.get("role", "")
|
||||
content = str(msg.get("content", "")).strip()
|
||||
if role == "tool":
|
||||
continue # Skip verbose tool results.
|
||||
if not content:
|
||||
continue
|
||||
label = "user" if role == "user" else "assistant"
|
||||
if len(content) > 800:
|
||||
content = content[:800] + "…"
|
||||
transcript_lines.append(f"[{label}]: {content}")
|
||||
|
||||
if not transcript_lines:
|
||||
return
|
||||
|
||||
transcript = "\n".join(transcript_lines)
|
||||
user_msg = (
|
||||
f"## Recent conversation ({len(messages)} messages total)\n\n"
|
||||
f"{transcript}\n\n"
|
||||
f"Timestamp: {datetime.now().isoformat(timespec='minutes')}"
|
||||
)
|
||||
|
||||
_, changed, reason = await _reflection_loop(
|
||||
llm, _SHORT_REFLECT_SYSTEM, user_msg, mem_dir, caller=caller,
|
||||
)
|
||||
if changed:
|
||||
logger.debug("reflect: short reflection done [%s], changed files: %s", caller, changed)
|
||||
else:
|
||||
logger.debug("reflect: short reflection done [%s], no changes — %s", caller, reason or "no reason given")
|
||||
|
||||
|
||||
async def run_long_reflection(
|
||||
llm: Any,
|
||||
memory_dir: Path | None = None,
|
||||
*,
|
||||
caller: str,
|
||||
) -> None:
|
||||
"""Run a long reflection: organise and deduplicate all memories."""
|
||||
mem_dir = memory_dir or MEMORY_DIR
|
||||
files = scan_memory_files(mem_dir)
|
||||
|
||||
if not files:
|
||||
logger.debug("reflect: long [%s] — no memory files to organise", caller)
|
||||
return
|
||||
|
||||
logger.debug("reflect: long [%s] — organising %d memory files", caller, len(files))
|
||||
manifest = format_memory_manifest(files)
|
||||
user_msg = (
|
||||
f"## Current memory manifest ({len(files)} files)\n\n"
|
||||
f"{manifest}\n\n"
|
||||
f"Timestamp: {datetime.now().isoformat(timespec='minutes')}"
|
||||
)
|
||||
|
||||
_, changed, reason = await _reflection_loop(
|
||||
llm, _LONG_REFLECT_SYSTEM, user_msg, mem_dir, caller=caller,
|
||||
)
|
||||
if changed:
|
||||
logger.debug("reflect: long reflection done [%s] (%d files), changed files: %s", caller, len(files), changed)
|
||||
else:
|
||||
logger.debug("reflect: long reflection done [%s] (%d files), no changes — %s", caller, len(files), reason or "no reason given")
|
||||
|
||||
|
||||
async def run_diary_update(
|
||||
session_dir: Path,
|
||||
llm: Any,
|
||||
memory_dir: Path | None = None,
|
||||
) -> None:
|
||||
"""Update today's diary file with a narrative of recent activity."""
|
||||
mem_dir = memory_dir or MEMORY_DIR
|
||||
|
||||
fname = diary_filename()
|
||||
diary_path = mem_dir / fname
|
||||
today_str = datetime.now().strftime("%Y-%m-%d")
|
||||
|
||||
# Read existing diary body (strip frontmatter).
|
||||
existing_body = ""
|
||||
if diary_path.exists():
|
||||
try:
|
||||
raw = diary_path.read_text(encoding="utf-8")
|
||||
m = re.match(r"^---\s*\n.*?\n---\s*\n?", raw, re.DOTALL)
|
||||
existing_body = raw[m.end() :].strip() if m else raw.strip()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
# Read all conversation messages for context.
|
||||
messages = await read_conversation_parts(session_dir)
|
||||
transcript_lines: list[str] = []
|
||||
for msg in messages[-40:]:
|
||||
role = msg.get("role", "")
|
||||
content = str(msg.get("content", "")).strip()
|
||||
if role == "tool" or not content:
|
||||
continue
|
||||
label = "user" if role == "user" else "assistant"
|
||||
if len(content) > 600:
|
||||
content = content[:600] + "..."
|
||||
transcript_lines.append(f"[{label}]: {content}")
|
||||
|
||||
if not transcript_lines:
|
||||
return
|
||||
|
||||
transcript = "\n".join(transcript_lines)
|
||||
user_msg = (
|
||||
f"## Today's Diary So Far\n\n"
|
||||
f"{existing_body or '(no entries yet)'}\n\n"
|
||||
f"## Recent Conversation\n\n"
|
||||
f"{transcript}\n\n"
|
||||
f"Date: {today_str}"
|
||||
)
|
||||
|
||||
try:
|
||||
from framework.agents.queen.config import default_config
|
||||
|
||||
resp = await llm.acomplete(
|
||||
messages=[{"role": "user", "content": user_msg}],
|
||||
system=_DIARY_SYSTEM,
|
||||
max_tokens=min(default_config.max_tokens, 1024),
|
||||
)
|
||||
new_body = (resp.content or "").strip()
|
||||
if not new_body:
|
||||
return
|
||||
|
||||
doc = build_diary_document(date_str=today_str, body=new_body)
|
||||
if len(doc.encode("utf-8")) > MAX_FILE_SIZE_BYTES:
|
||||
new_body = new_body[:2800]
|
||||
doc = build_diary_document(date_str=today_str, body=new_body)
|
||||
|
||||
mem_dir.mkdir(parents=True, exist_ok=True)
|
||||
diary_path.write_text(doc, encoding="utf-8")
|
||||
logger.debug("diary: updated %s (%d chars)", fname, len(doc))
|
||||
except Exception:
|
||||
logger.warning("diary: update failed", exc_info=True)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Event-bus integration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Run a long reflection every N short reflections.
|
||||
_LONG_REFLECT_INTERVAL = 5
|
||||
|
||||
|
||||
async def subscribe_reflection_triggers(
|
||||
event_bus: Any,
|
||||
session_dir: Path,
|
||||
llm: Any,
|
||||
memory_dir: Path | None = None,
|
||||
phase_state: Any = None,
|
||||
) -> list[str]:
|
||||
"""Subscribe to queen turn events and return subscription IDs.
|
||||
|
||||
Call this once during queen setup. Returns a list of event-bus
|
||||
subscription IDs for cleanup during session teardown.
|
||||
"""
|
||||
from framework.runtime.event_bus import EventType
|
||||
|
||||
mem_dir = memory_dir or MEMORY_DIR
|
||||
_lock = asyncio.Lock()
|
||||
_short_count = 0
|
||||
|
||||
async def _on_turn_complete(event: Any) -> None:
|
||||
nonlocal _short_count
|
||||
|
||||
# Only process queen turns.
|
||||
if getattr(event, "stream_id", None) != "queen":
|
||||
return
|
||||
|
||||
_short_count += 1
|
||||
|
||||
# Decide whether to reflect: only when the LLM turn ended without
|
||||
# tool calls (a conversational response) OR every _LONG_REFLECT_INTERVAL turns.
|
||||
event_data = getattr(event, "data", {}) or {}
|
||||
stop_reason = event_data.get("stop_reason", "")
|
||||
is_tool_turn = stop_reason in ("tool_use", "tool_calls")
|
||||
is_interval = _short_count % _LONG_REFLECT_INTERVAL == 0
|
||||
|
||||
if is_tool_turn and not is_interval:
|
||||
logger.debug(
|
||||
"reflect: skipping turn %d (stop_reason=%s, next reflect at %d)",
|
||||
_short_count, stop_reason,
|
||||
(_short_count // _LONG_REFLECT_INTERVAL + 1) * _LONG_REFLECT_INTERVAL,
|
||||
)
|
||||
return
|
||||
|
||||
if _lock.locked():
|
||||
logger.debug("reflect: skipping — reflection already in progress")
|
||||
return
|
||||
|
||||
async with _lock:
|
||||
try:
|
||||
logger.debug("reflect: turn complete — count %d/%d (stop_reason=%s)", _short_count, _LONG_REFLECT_INTERVAL, stop_reason)
|
||||
if is_interval:
|
||||
await run_short_reflection(session_dir, llm, mem_dir, caller="queen")
|
||||
await run_long_reflection(llm, mem_dir, caller="queen")
|
||||
else:
|
||||
await run_short_reflection(session_dir, llm, mem_dir, caller="queen")
|
||||
except Exception:
|
||||
logger.warning("reflect: reflection failed", exc_info=True)
|
||||
_write_error("short/long reflection")
|
||||
|
||||
# Update daily diary after reflection.
|
||||
try:
|
||||
await run_diary_update(session_dir, llm, mem_dir)
|
||||
except Exception:
|
||||
logger.warning("reflect: diary update failed", exc_info=True)
|
||||
|
||||
# Update recall cache after reflection completes, guaranteeing
|
||||
# recall sees the current turn's extracted memories.
|
||||
if phase_state is not None:
|
||||
try:
|
||||
from framework.agents.queen.recall_selector import update_recall_cache
|
||||
await update_recall_cache(
|
||||
session_dir,
|
||||
llm,
|
||||
cache_setter=lambda block: (
|
||||
setattr(phase_state, "_cached_colony_recall_block", block),
|
||||
setattr(phase_state, "_cached_recall_block", block),
|
||||
),
|
||||
memory_dir=mem_dir,
|
||||
heading="Colony Memories",
|
||||
)
|
||||
await update_recall_cache(
|
||||
session_dir,
|
||||
llm,
|
||||
cache_setter=lambda block: setattr(
|
||||
phase_state, "_cached_global_recall_block", block
|
||||
),
|
||||
memory_dir=getattr(phase_state, "global_memory_dir", None),
|
||||
heading="Global Memories",
|
||||
)
|
||||
except Exception:
|
||||
logger.debug("recall: cache update failed", exc_info=True)
|
||||
|
||||
async def _on_compaction(event: Any) -> None:
|
||||
if getattr(event, "stream_id", None) != "queen":
|
||||
return
|
||||
|
||||
if _lock.locked():
|
||||
return
|
||||
|
||||
async with _lock:
|
||||
try:
|
||||
await run_long_reflection(llm, mem_dir, caller="queen")
|
||||
except Exception:
|
||||
logger.warning("reflect: compaction-triggered reflection failed", exc_info=True)
|
||||
_write_error("compaction reflection")
|
||||
|
||||
sub_ids: list[str] = []
|
||||
|
||||
sub1 = event_bus.subscribe(
|
||||
event_types=[EventType.LLM_TURN_COMPLETE],
|
||||
handler=_on_turn_complete,
|
||||
)
|
||||
sub_ids.append(sub1)
|
||||
|
||||
sub2 = event_bus.subscribe(
|
||||
event_types=[EventType.CONTEXT_COMPACTED],
|
||||
handler=_on_compaction,
|
||||
)
|
||||
sub_ids.append(sub2)
|
||||
|
||||
return sub_ids
|
||||
|
||||
|
||||
async def subscribe_worker_memory_triggers(
|
||||
event_bus: Any,
|
||||
llm: Any,
|
||||
*,
|
||||
worker_sessions_dir: Path,
|
||||
colony_memory_dir: Path,
|
||||
recall_cache: dict[str, str],
|
||||
) -> list[str]:
|
||||
"""Subscribe colony memory lifecycle events for worker runs.
|
||||
|
||||
Short reflection is now handled synchronously at node handoff in
|
||||
``WorkerAgent._reflect_colony_memory()``. This function only manages:
|
||||
- Recall cache initialisation on execution start
|
||||
- Final long reflection + cleanup on execution end
|
||||
"""
|
||||
from framework.runtime.event_bus import EventType
|
||||
|
||||
_terminal_lock = asyncio.Lock()
|
||||
|
||||
def _is_worker_event(event: Any) -> bool:
|
||||
return bool(
|
||||
getattr(event, "execution_id", None)
|
||||
and getattr(event, "stream_id", None) not in ("queen", "judge")
|
||||
)
|
||||
|
||||
async def _on_execution_started(event: Any) -> None:
|
||||
if not _is_worker_event(event):
|
||||
return
|
||||
if event.execution_id is not None:
|
||||
recall_cache[event.execution_id] = ""
|
||||
|
||||
async def _on_execution_terminal(event: Any) -> None:
|
||||
if not _is_worker_event(event):
|
||||
return
|
||||
execution_id = event.execution_id
|
||||
if execution_id is None:
|
||||
return
|
||||
async with _terminal_lock:
|
||||
try:
|
||||
await run_long_reflection(llm, colony_memory_dir, caller="worker")
|
||||
except Exception:
|
||||
logger.warning("reflect: worker final reflection failed", exc_info=True)
|
||||
_write_error("worker final reflection")
|
||||
finally:
|
||||
recall_cache.pop(execution_id, None)
|
||||
|
||||
return [
|
||||
event_bus.subscribe(
|
||||
event_types=[EventType.EXECUTION_STARTED],
|
||||
handler=_on_execution_started,
|
||||
),
|
||||
event_bus.subscribe(
|
||||
event_types=[EventType.EXECUTION_COMPLETED, EventType.EXECUTION_FAILED],
|
||||
handler=_on_execution_terminal,
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def _write_error(context: str) -> None:
|
||||
"""Best-effort write of the last traceback to an error file."""
|
||||
try:
|
||||
error_path = MEMORY_DIR / ".reflection_error.txt"
|
||||
error_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
error_path.write_text(
|
||||
f"context: {context}\ntime: {datetime.now().isoformat()}\n\n{traceback.format_exc()}",
|
||||
encoding="utf-8",
|
||||
)
|
||||
except OSError:
|
||||
pass
|
||||
@@ -1,27 +0,0 @@
|
||||
"""Queen's ticket receiver entry point.
|
||||
|
||||
When a WORKER_ESCALATION_TICKET event is emitted on the shared EventBus,
|
||||
this entry point fires and routes to the ``ticket_triage`` node, where the
|
||||
Queen deliberates and decides whether to notify the operator.
|
||||
|
||||
Isolation level is ``isolated`` — the queen's triage memory is kept separate
|
||||
from the worker's shared memory. Each ticket triage runs in its own context.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from framework.graph.edge import AsyncEntryPointSpec
|
||||
|
||||
TICKET_RECEIVER_ENTRY_POINT = AsyncEntryPointSpec(
|
||||
id="ticket_receiver",
|
||||
name="Worker Escalation Ticket Receiver",
|
||||
entry_node="ticket_triage",
|
||||
trigger_type="event",
|
||||
trigger_config={
|
||||
"event_types": ["worker_escalation_ticket"],
|
||||
# Do not fire on our own graph's events (prevents loops if queen
|
||||
# somehow emits a worker_escalation_ticket for herself)
|
||||
"exclude_own_graph": True,
|
||||
},
|
||||
isolation_level="isolated",
|
||||
)
|
||||
@@ -1,286 +0,0 @@
|
||||
"""Worker per-run digest (run diary).
|
||||
|
||||
Storage layout:
|
||||
~/.hive/agents/{agent_name}/runs/{run_id}/digest.md
|
||||
|
||||
Each completed or failed worker run gets one digest file. The queen reads
|
||||
these via get_worker_status(focus='diary') before digging into live runtime
|
||||
logs — the diary is a cheap, persistent record that survives across sessions.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import traceback
|
||||
from collections import Counter
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from framework.runtime.event_bus import AgentEvent, EventBus
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
_DIGEST_SYSTEM = """\
|
||||
You maintain run digests for a worker agent.
|
||||
A run digest is a concise, factual record of a single task execution.
|
||||
|
||||
Write 3-6 sentences covering:
|
||||
- What the worker was asked to do (the task/goal)
|
||||
- What approach it took and what tools it used
|
||||
- What the outcome was (success, partial, or failure — and why if relevant)
|
||||
- Any notable issues, retries, or escalations to the queen
|
||||
|
||||
Write in third person past tense. Be direct and specific.
|
||||
Omit routine tool invocations unless the result matters.
|
||||
Output only the digest prose — no headings, no code fences.
|
||||
"""
|
||||
|
||||
|
||||
def _worker_runs_dir(agent_name: str) -> Path:
|
||||
return Path.home() / ".hive" / "agents" / agent_name / "runs"
|
||||
|
||||
|
||||
def digest_path(agent_name: str, run_id: str) -> Path:
|
||||
return _worker_runs_dir(agent_name) / run_id / "digest.md"
|
||||
|
||||
|
||||
def _collect_run_events(bus: EventBus, run_id: str, limit: int = 2000) -> list[AgentEvent]:
|
||||
"""Collect all events belonging to *run_id* from the bus history.
|
||||
|
||||
Strategy: find the EXECUTION_STARTED event that carries ``run_id``,
|
||||
extract its ``execution_id``, then query the bus by that execution_id.
|
||||
This works because TOOL_CALL_*, EDGE_TRAVERSED, NODE_STALLED etc. carry
|
||||
execution_id but not run_id.
|
||||
|
||||
Falls back to a full-scan run_id filter when EXECUTION_STARTED is not
|
||||
found (e.g. bus was rotated).
|
||||
"""
|
||||
from framework.runtime.event_bus import EventType
|
||||
|
||||
# Pass 1: find execution_id via EXECUTION_STARTED with matching run_id
|
||||
started = bus.get_history(event_type=EventType.EXECUTION_STARTED, limit=limit)
|
||||
exec_id: str | None = None
|
||||
for e in started:
|
||||
if getattr(e, "run_id", None) == run_id and e.execution_id:
|
||||
exec_id = e.execution_id
|
||||
break
|
||||
|
||||
if exec_id:
|
||||
return bus.get_history(execution_id=exec_id, limit=limit)
|
||||
|
||||
# Fallback: scan all events and match by run_id attribute
|
||||
return [e for e in bus.get_history(limit=limit) if getattr(e, "run_id", None) == run_id]
|
||||
|
||||
|
||||
def _build_run_context(
|
||||
events: list[AgentEvent],
|
||||
outcome_event: AgentEvent | None,
|
||||
) -> str:
|
||||
"""Assemble a plain-text run context string for the digest LLM call."""
|
||||
from framework.runtime.event_bus import EventType
|
||||
|
||||
# Reverse so events are in chronological order
|
||||
events_chron = list(reversed(events))
|
||||
|
||||
lines: list[str] = []
|
||||
|
||||
# Task input from EXECUTION_STARTED
|
||||
started = [e for e in events_chron if e.type == EventType.EXECUTION_STARTED]
|
||||
if started:
|
||||
inp = started[0].data.get("input", {})
|
||||
if inp:
|
||||
lines.append(f"Task input: {str(inp)[:400]}")
|
||||
|
||||
# Duration (elapsed so far if no outcome yet)
|
||||
ref_ts = outcome_event.timestamp if outcome_event else datetime.utcnow()
|
||||
if started:
|
||||
elapsed = (ref_ts - started[0].timestamp).total_seconds()
|
||||
m, s = divmod(int(elapsed), 60)
|
||||
lines.append(f"Duration so far: {m}m {s}s" if m else f"Duration so far: {s}s")
|
||||
|
||||
# Outcome
|
||||
if outcome_event is None:
|
||||
lines.append("Status: still running (mid-run snapshot)")
|
||||
elif outcome_event.type == EventType.EXECUTION_COMPLETED:
|
||||
out = outcome_event.data.get("output", {})
|
||||
out_str = f"Outcome: completed. Output: {str(out)[:300]}"
|
||||
lines.append(out_str if out else "Outcome: completed.")
|
||||
else:
|
||||
err = outcome_event.data.get("error", "")
|
||||
lines.append(f"Outcome: failed. Error: {str(err)[:300]}" if err else "Outcome: failed.")
|
||||
|
||||
# Node path (edge traversals)
|
||||
edges = [e for e in events_chron if e.type == EventType.EDGE_TRAVERSED]
|
||||
if edges:
|
||||
parts = [
|
||||
f"{e.data.get('source_node', '?')}->{e.data.get('target_node', '?')}"
|
||||
for e in edges[-20:]
|
||||
]
|
||||
lines.append(f"Node path: {', '.join(parts)}")
|
||||
|
||||
# Tools used
|
||||
tool_events = [e for e in events_chron if e.type == EventType.TOOL_CALL_COMPLETED]
|
||||
if tool_events:
|
||||
names = [e.data.get("tool_name", "?") for e in tool_events]
|
||||
counts = Counter(names)
|
||||
summary = ", ".join(f"{name}×{n}" if n > 1 else name for name, n in counts.most_common())
|
||||
lines.append(f"Tools used: {summary}")
|
||||
# Note any tool errors
|
||||
errors = [e for e in tool_events if e.data.get("is_error")]
|
||||
if errors:
|
||||
err_names = Counter(e.data.get("tool_name", "?") for e in errors)
|
||||
lines.append(f"Tool errors: {dict(err_names)}")
|
||||
|
||||
# Issues
|
||||
issue_map = {
|
||||
EventType.NODE_STALLED: "stall",
|
||||
EventType.NODE_TOOL_DOOM_LOOP: "doom loop",
|
||||
EventType.CONSTRAINT_VIOLATION: "constraint violation",
|
||||
EventType.NODE_RETRY: "retry",
|
||||
}
|
||||
issue_parts: list[str] = []
|
||||
for evt_type, label in issue_map.items():
|
||||
n = sum(1 for e in events_chron if e.type == evt_type)
|
||||
if n:
|
||||
issue_parts.append(f"{n} {label}(s)")
|
||||
if issue_parts:
|
||||
lines.append(f"Issues: {', '.join(issue_parts)}")
|
||||
|
||||
# Escalations to queen
|
||||
escalations = [e for e in events_chron if e.type == EventType.ESCALATION_REQUESTED]
|
||||
if escalations:
|
||||
lines.append(f"Escalations to queen: {len(escalations)}")
|
||||
|
||||
# Final LLM output snippet (last LLM_TEXT_DELTA snapshot)
|
||||
text_events = [e for e in reversed(events_chron) if e.type == EventType.LLM_TEXT_DELTA]
|
||||
if text_events:
|
||||
snapshot = text_events[0].data.get("snapshot", "") or ""
|
||||
if snapshot:
|
||||
lines.append(f"Final LLM output: {snapshot[-400:].strip()}")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
async def consolidate_worker_run(
|
||||
agent_name: str,
|
||||
run_id: str,
|
||||
outcome_event: AgentEvent | None,
|
||||
bus: EventBus,
|
||||
llm: Any,
|
||||
) -> None:
|
||||
"""Write (or overwrite) the digest for a worker run.
|
||||
|
||||
Called fire-and-forget either:
|
||||
- After EXECUTION_COMPLETED / EXECUTION_FAILED (outcome_event set, final write)
|
||||
- Periodically during a run on a cooldown timer (outcome_event=None, mid-run snapshot)
|
||||
|
||||
The digest file is always overwritten so each call produces the freshest view.
|
||||
The final completion/failure call supersedes any mid-run snapshot.
|
||||
|
||||
Args:
|
||||
agent_name: Worker agent directory name (determines storage path).
|
||||
run_id: The run ID.
|
||||
outcome_event: EXECUTION_COMPLETED or EXECUTION_FAILED event, or None for
|
||||
a mid-run snapshot.
|
||||
bus: The session EventBus (shared queen + worker).
|
||||
llm: LLMProvider with an acomplete() method.
|
||||
"""
|
||||
try:
|
||||
events = _collect_run_events(bus, run_id)
|
||||
run_context = _build_run_context(events, outcome_event)
|
||||
if not run_context:
|
||||
logger.debug("worker_memory: no events for run %s, skipping digest", run_id)
|
||||
return
|
||||
|
||||
is_final = outcome_event is not None
|
||||
logger.info(
|
||||
"worker_memory: generating %s digest for run %s ...",
|
||||
"final" if is_final else "mid-run",
|
||||
run_id,
|
||||
)
|
||||
|
||||
from framework.agents.queen.config import default_config
|
||||
|
||||
resp = await llm.acomplete(
|
||||
messages=[{"role": "user", "content": run_context}],
|
||||
system=_DIGEST_SYSTEM,
|
||||
max_tokens=min(default_config.max_tokens, 512),
|
||||
)
|
||||
digest_text = (resp.content or "").strip()
|
||||
if not digest_text:
|
||||
logger.warning("worker_memory: LLM returned empty digest for run %s", run_id)
|
||||
return
|
||||
|
||||
path = digest_path(agent_name, run_id)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
from framework.runtime.event_bus import EventType
|
||||
|
||||
ts = (outcome_event.timestamp if outcome_event else datetime.utcnow()).strftime(
|
||||
"%Y-%m-%d %H:%M"
|
||||
)
|
||||
if outcome_event is None:
|
||||
status = "running"
|
||||
elif outcome_event.type == EventType.EXECUTION_COMPLETED:
|
||||
status = "completed"
|
||||
else:
|
||||
status = "failed"
|
||||
|
||||
path.write_text(
|
||||
f"# {run_id}\n\n**{ts}** | {status}\n\n{digest_text}\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
logger.info(
|
||||
"worker_memory: %s digest written for run %s (%d chars)",
|
||||
status,
|
||||
run_id,
|
||||
len(digest_text),
|
||||
)
|
||||
|
||||
except Exception:
|
||||
tb = traceback.format_exc()
|
||||
logger.exception("worker_memory: digest failed for run %s", run_id)
|
||||
# Persist the error so it's findable without log access
|
||||
error_path = _worker_runs_dir(agent_name) / run_id / "digest_error.txt"
|
||||
try:
|
||||
error_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
error_path.write_text(
|
||||
f"run_id: {run_id}\ntime: {datetime.now().isoformat()}\n\n{tb}",
|
||||
encoding="utf-8",
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def read_recent_digests(agent_name: str, max_runs: int = 5) -> list[tuple[str, str]]:
|
||||
"""Return recent run digests as [(run_id, content), ...], newest first.
|
||||
|
||||
Args:
|
||||
agent_name: Worker agent directory name.
|
||||
max_runs: Maximum number of digests to return.
|
||||
|
||||
Returns:
|
||||
List of (run_id, digest_content) tuples, ordered newest first.
|
||||
"""
|
||||
runs_dir = _worker_runs_dir(agent_name)
|
||||
if not runs_dir.exists():
|
||||
return []
|
||||
|
||||
digest_files = sorted(
|
||||
runs_dir.glob("*/digest.md"),
|
||||
key=lambda p: p.stat().st_mtime,
|
||||
reverse=True,
|
||||
)[:max_runs]
|
||||
|
||||
result: list[tuple[str, str]] = []
|
||||
for f in digest_files:
|
||||
try:
|
||||
content = f.read_text(encoding="utf-8").strip()
|
||||
if content:
|
||||
result.append((f.parent.name, content))
|
||||
except OSError:
|
||||
continue
|
||||
return result
|
||||
@@ -6,7 +6,6 @@ Usage:
|
||||
hive info exports/my-agent
|
||||
hive validate exports/my-agent
|
||||
hive list exports/
|
||||
hive dispatch exports/ --input '{"key": "value"}'
|
||||
hive shell exports/my-agent
|
||||
|
||||
Testing commands:
|
||||
@@ -79,7 +78,7 @@ def main():
|
||||
|
||||
subparsers = parser.add_subparsers(dest="command", required=True)
|
||||
|
||||
# Register runner commands (run, info, validate, list, dispatch, shell)
|
||||
# Register runner commands (run, info, validate, list, shell)
|
||||
from framework.runner.cli import register_commands
|
||||
|
||||
register_commands(subparsers)
|
||||
|
||||
@@ -1,11 +1,6 @@
|
||||
"""Graph structures: Goals, Nodes, Edges, and Execution."""
|
||||
|
||||
from framework.graph.client_io import (
|
||||
ActiveNodeClientIO,
|
||||
ClientIOGateway,
|
||||
InertNodeClientIO,
|
||||
NodeClientIO,
|
||||
)
|
||||
from framework.graph.context import GraphContext
|
||||
from framework.graph.context_handoff import ContextHandoff, HandoffContext
|
||||
from framework.graph.conversation import ConversationStore, Message, NodeConversation
|
||||
from framework.graph.edge import DEFAULT_MAX_TOKENS, EdgeCondition, EdgeSpec, GraphSpec
|
||||
@@ -19,6 +14,14 @@ from framework.graph.event_loop_node import (
|
||||
from framework.graph.executor import GraphExecutor
|
||||
from framework.graph.goal import Constraint, Goal, GoalStatus, SuccessCriterion
|
||||
from framework.graph.node import NodeContext, NodeProtocol, NodeResult, NodeSpec
|
||||
from framework.graph.worker_agent import (
|
||||
Activation,
|
||||
FanOutTag,
|
||||
FanOutTracker,
|
||||
WorkerAgent,
|
||||
WorkerCompletion,
|
||||
WorkerLifecycle,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
# Goal
|
||||
@@ -51,9 +54,12 @@ __all__ = [
|
||||
# Context Handoff
|
||||
"ContextHandoff",
|
||||
"HandoffContext",
|
||||
# Client I/O
|
||||
"NodeClientIO",
|
||||
"ActiveNodeClientIO",
|
||||
"InertNodeClientIO",
|
||||
"ClientIOGateway",
|
||||
# Worker Agent
|
||||
"WorkerAgent",
|
||||
"WorkerLifecycle",
|
||||
"WorkerCompletion",
|
||||
"Activation",
|
||||
"FanOutTag",
|
||||
"FanOutTracker",
|
||||
"GraphContext",
|
||||
]
|
||||
|
||||
@@ -59,6 +59,13 @@ class ActiveNodeClientIO(NodeClientIO):
|
||||
self._input_result: str | None = None
|
||||
|
||||
async def emit_output(self, content: str, is_final: bool = False) -> None:
|
||||
# Strip leading whitespace from first output chunk to avoid leading spaces
|
||||
# (some LLMs like Kimi output leading whitespace before text)
|
||||
if not self._output_snapshot and content:
|
||||
content = content.lstrip()
|
||||
if not content: # Content was all whitespace
|
||||
return
|
||||
|
||||
self._output_snapshot += content
|
||||
await self._output_queue.put(content)
|
||||
|
||||
|
||||
@@ -0,0 +1,327 @@
|
||||
"""Shared graph execution context helpers.
|
||||
|
||||
This module centralizes:
|
||||
- Graph-run shared state (`GraphContext`)
|
||||
- Scoped buffer permission shaping for a node
|
||||
- Per-node accounts prompt resolution
|
||||
- Canonical `NodeContext` construction
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
from framework.graph.edge import GraphSpec
|
||||
from framework.graph.goal import Goal
|
||||
from framework.graph.node import DataBuffer, NodeContext, NodeProtocol, NodeSpec
|
||||
from framework.runtime.core import Runtime
|
||||
|
||||
|
||||
@dataclass
|
||||
class GraphContext:
|
||||
"""Shared state for one graph execution run."""
|
||||
|
||||
graph: GraphSpec
|
||||
goal: Goal
|
||||
buffer: DataBuffer
|
||||
runtime: Runtime
|
||||
llm: Any # LLMProvider
|
||||
tools: list[Any] # list[Tool]
|
||||
tool_executor: Any # Callable
|
||||
event_bus: Any # GraphScopedEventBus
|
||||
execution_id: str
|
||||
stream_id: str
|
||||
run_id: str
|
||||
storage_path: Any # Path | None
|
||||
runtime_logger: Any = None
|
||||
node_registry: dict[str, NodeProtocol] = field(default_factory=dict)
|
||||
node_spec_registry: dict[str, NodeSpec] = field(default_factory=dict)
|
||||
parallel_config: Any = None # ParallelExecutionConfig | None
|
||||
enable_parallel_execution: bool = True
|
||||
is_continuous: bool = False
|
||||
continuous_conversation: Any = None
|
||||
cumulative_tools: list[Any] = field(default_factory=list)
|
||||
cumulative_tool_names: set[str] = field(default_factory=set)
|
||||
cumulative_output_keys: list[str] = field(default_factory=list)
|
||||
accounts_prompt: str = ""
|
||||
accounts_data: list[dict] | None = None
|
||||
tool_provider_map: dict[str, str] | None = None
|
||||
skills_catalog_prompt: str = ""
|
||||
protocols_prompt: str = ""
|
||||
skill_dirs: list[str] = field(default_factory=list)
|
||||
context_warn_ratio: float | None = None
|
||||
batch_init_nudge: str | None = None
|
||||
dynamic_tools_provider: Any = None
|
||||
dynamic_prompt_provider: Any = None
|
||||
dynamic_memory_provider: Any = None
|
||||
iteration_metadata_provider: Any = None
|
||||
loop_config: dict[str, Any] = field(default_factory=dict)
|
||||
path: list[str] = field(default_factory=list)
|
||||
node_visit_counts: dict[str, int] = field(default_factory=dict)
|
||||
_path_lock: asyncio.Lock = field(default_factory=asyncio.Lock)
|
||||
_visits_lock: asyncio.Lock = field(default_factory=asyncio.Lock)
|
||||
# Fan-out buffer conflict tracking: key → worker_id that wrote it
|
||||
_fanout_written_keys: dict[str, str] = field(default_factory=dict)
|
||||
# Retry tracking: worker_id → retry_count (for execution quality assessment)
|
||||
retry_counts: dict[str, int] = field(default_factory=dict)
|
||||
nodes_with_retries: set[str] = field(default_factory=set)
|
||||
# Colony memory reflection at node handoff
|
||||
colony_memory_dir: Any = None # Path | None
|
||||
worker_sessions_dir: Any = None # Path | None
|
||||
colony_recall_cache: dict[str, str] = field(default_factory=dict)
|
||||
colony_reflect_llm: Any = None # LLMProvider for reflection
|
||||
_colony_reflect_lock: asyncio.Lock = field(default_factory=asyncio.Lock)
|
||||
|
||||
|
||||
def build_scoped_buffer(buffer: DataBuffer, node_spec: NodeSpec) -> DataBuffer:
|
||||
"""Create a node-scoped buffer view.
|
||||
|
||||
When permissions are already restricted, auto-include framework-managed
|
||||
`_`-prefixed keys used by the default skill protocols.
|
||||
"""
|
||||
|
||||
read_keys = list(node_spec.input_keys)
|
||||
write_keys = list(node_spec.output_keys)
|
||||
|
||||
if read_keys or write_keys:
|
||||
from framework.skills.defaults import DATA_BUFFER_KEYS as _skill_keys
|
||||
|
||||
existing_underscore = [k for k in buffer._data if k.startswith("_")]
|
||||
extra_keys = set(_skill_keys) | set(existing_underscore)
|
||||
|
||||
for key in extra_keys:
|
||||
if read_keys and key not in read_keys:
|
||||
read_keys.append(key)
|
||||
if write_keys and key not in write_keys:
|
||||
write_keys.append(key)
|
||||
|
||||
return buffer.with_permissions(read_keys=read_keys, write_keys=write_keys)
|
||||
|
||||
|
||||
def build_node_accounts_prompt(
|
||||
*,
|
||||
accounts_prompt: str,
|
||||
accounts_data: list[dict] | None,
|
||||
tool_provider_map: dict[str, str] | None,
|
||||
node_tool_names: list[str] | None,
|
||||
fallback_to_default: bool = False,
|
||||
) -> str:
|
||||
"""Resolve the accounts prompt for one node."""
|
||||
|
||||
resolved = accounts_prompt
|
||||
if accounts_data and tool_provider_map:
|
||||
from framework.graph.prompting import build_accounts_prompt
|
||||
|
||||
filtered = build_accounts_prompt(
|
||||
accounts_data,
|
||||
tool_provider_map,
|
||||
node_tool_names=node_tool_names,
|
||||
)
|
||||
if filtered or not fallback_to_default:
|
||||
resolved = filtered
|
||||
|
||||
return resolved
|
||||
|
||||
|
||||
def _resolve_available_tools(
|
||||
*,
|
||||
node_spec: NodeSpec,
|
||||
tools: list[Any],
|
||||
override_tools: list[Any] | None,
|
||||
) -> list[Any]:
|
||||
"""Select tools available to the current node."""
|
||||
|
||||
if override_tools is not None:
|
||||
return list(override_tools)
|
||||
|
||||
if not node_spec.tools:
|
||||
return []
|
||||
|
||||
return [tool for tool in tools if tool.name in node_spec.tools]
|
||||
|
||||
|
||||
def _derive_input_data(buffer: DataBuffer, input_keys: list[str]) -> dict[str, Any]:
|
||||
"""Collect node inputs from the shared buffer."""
|
||||
|
||||
input_data: dict[str, Any] = {}
|
||||
for key in input_keys:
|
||||
value = buffer.read(key)
|
||||
if value is not None:
|
||||
input_data[key] = value
|
||||
return input_data
|
||||
|
||||
|
||||
def build_node_context(
|
||||
*,
|
||||
runtime: Runtime,
|
||||
node_spec: NodeSpec,
|
||||
buffer: DataBuffer,
|
||||
goal: Goal,
|
||||
llm: Any,
|
||||
tools: list[Any],
|
||||
max_tokens: int,
|
||||
input_data: dict[str, Any] | None = None,
|
||||
derive_input_data_from_buffer: bool = False,
|
||||
runtime_logger: Any = None,
|
||||
pause_event: Any = None,
|
||||
continuous_mode: bool = False,
|
||||
inherited_conversation: Any = None,
|
||||
override_tools: list[Any] | None = None,
|
||||
cumulative_output_keys: list[str] | None = None,
|
||||
event_triggered: bool = False,
|
||||
accounts_prompt: str = "",
|
||||
accounts_data: list[dict] | None = None,
|
||||
tool_provider_map: dict[str, str] | None = None,
|
||||
fallback_to_default_accounts_prompt: bool = False,
|
||||
identity_prompt: str = "",
|
||||
narrative: str = "",
|
||||
execution_id: str = "",
|
||||
run_id: str = "",
|
||||
stream_id: str = "",
|
||||
node_registry: dict[str, NodeSpec] | None = None,
|
||||
all_tools: list[Any] | None = None,
|
||||
shared_node_registry: dict[str, NodeProtocol] | None = None,
|
||||
dynamic_tools_provider: Any = None,
|
||||
dynamic_prompt_provider: Any = None,
|
||||
dynamic_memory_provider: Any = None,
|
||||
iteration_metadata_provider: Any = None,
|
||||
skills_catalog_prompt: str = "",
|
||||
protocols_prompt: str = "",
|
||||
skill_dirs: list[str] | None = None,
|
||||
default_skill_warn_ratio: float | None = None,
|
||||
default_skill_batch_nudge: str | None = None,
|
||||
memory_prompt: str = "",
|
||||
) -> NodeContext:
|
||||
"""Build a canonical `NodeContext` for graph execution."""
|
||||
|
||||
available_tools = _resolve_available_tools(
|
||||
node_spec=node_spec,
|
||||
tools=tools,
|
||||
override_tools=override_tools,
|
||||
)
|
||||
scoped_buffer = build_scoped_buffer(buffer, node_spec)
|
||||
node_accounts_prompt = build_node_accounts_prompt(
|
||||
accounts_prompt=accounts_prompt,
|
||||
accounts_data=accounts_data,
|
||||
tool_provider_map=tool_provider_map,
|
||||
node_tool_names=node_spec.tools,
|
||||
fallback_to_default=fallback_to_default_accounts_prompt,
|
||||
)
|
||||
|
||||
resolved_input_data = (
|
||||
_derive_input_data(buffer, node_spec.input_keys)
|
||||
if input_data is None and derive_input_data_from_buffer
|
||||
else dict(input_data or {})
|
||||
)
|
||||
|
||||
return NodeContext(
|
||||
runtime=runtime,
|
||||
node_id=node_spec.id,
|
||||
node_spec=node_spec,
|
||||
buffer=scoped_buffer,
|
||||
input_data=resolved_input_data,
|
||||
llm=llm,
|
||||
available_tools=available_tools,
|
||||
goal_context=goal.to_prompt_context(),
|
||||
goal=goal,
|
||||
max_tokens=max_tokens,
|
||||
runtime_logger=runtime_logger,
|
||||
pause_event=pause_event,
|
||||
continuous_mode=continuous_mode,
|
||||
inherited_conversation=inherited_conversation,
|
||||
cumulative_output_keys=cumulative_output_keys or [],
|
||||
event_triggered=event_triggered,
|
||||
accounts_prompt=node_accounts_prompt,
|
||||
identity_prompt=identity_prompt,
|
||||
narrative=narrative,
|
||||
memory_prompt=memory_prompt,
|
||||
execution_id=execution_id,
|
||||
run_id=run_id,
|
||||
stream_id=stream_id,
|
||||
node_registry=node_registry or {},
|
||||
all_tools=list(all_tools or tools),
|
||||
shared_node_registry=shared_node_registry or {},
|
||||
dynamic_tools_provider=dynamic_tools_provider,
|
||||
dynamic_prompt_provider=dynamic_prompt_provider,
|
||||
dynamic_memory_provider=dynamic_memory_provider,
|
||||
iteration_metadata_provider=iteration_metadata_provider,
|
||||
skills_catalog_prompt=skills_catalog_prompt,
|
||||
protocols_prompt=protocols_prompt,
|
||||
skill_dirs=list(skill_dirs or []),
|
||||
default_skill_warn_ratio=default_skill_warn_ratio,
|
||||
default_skill_batch_nudge=default_skill_batch_nudge,
|
||||
)
|
||||
|
||||
|
||||
def build_node_context_from_graph_context(
|
||||
graph_context: GraphContext,
|
||||
*,
|
||||
node_spec: NodeSpec,
|
||||
pause_event: Any = None,
|
||||
input_data: dict[str, Any] | None = None,
|
||||
derive_input_data_from_buffer: bool = True,
|
||||
override_tools: list[Any] | None = None,
|
||||
inherited_conversation: Any = None,
|
||||
cumulative_output_keys: list[str] | None = None,
|
||||
event_triggered: bool = False,
|
||||
identity_prompt: str | None = None,
|
||||
narrative: str = "",
|
||||
node_registry: dict[str, NodeSpec] | None = None,
|
||||
fallback_to_default_accounts_prompt: bool = True,
|
||||
) -> NodeContext:
|
||||
"""Build `NodeContext` using shared graph-run state."""
|
||||
|
||||
gc = graph_context
|
||||
resolved_override_tools = override_tools
|
||||
if resolved_override_tools is None and gc.is_continuous and gc.cumulative_tools:
|
||||
resolved_override_tools = list(gc.cumulative_tools)
|
||||
|
||||
resolved_inherited_conversation = inherited_conversation
|
||||
if resolved_inherited_conversation is None and gc.is_continuous:
|
||||
resolved_inherited_conversation = gc.continuous_conversation
|
||||
|
||||
resolved_output_keys = cumulative_output_keys
|
||||
if resolved_output_keys is None and gc.is_continuous:
|
||||
resolved_output_keys = list(gc.cumulative_output_keys)
|
||||
|
||||
return build_node_context(
|
||||
runtime=gc.runtime,
|
||||
node_spec=node_spec,
|
||||
buffer=gc.buffer,
|
||||
goal=gc.goal,
|
||||
llm=gc.llm,
|
||||
tools=gc.tools,
|
||||
max_tokens=gc.graph.max_tokens,
|
||||
input_data=input_data,
|
||||
derive_input_data_from_buffer=derive_input_data_from_buffer,
|
||||
runtime_logger=gc.runtime_logger,
|
||||
pause_event=pause_event,
|
||||
continuous_mode=gc.is_continuous,
|
||||
inherited_conversation=resolved_inherited_conversation,
|
||||
override_tools=resolved_override_tools,
|
||||
cumulative_output_keys=resolved_output_keys,
|
||||
event_triggered=event_triggered,
|
||||
accounts_prompt=gc.accounts_prompt,
|
||||
accounts_data=gc.accounts_data,
|
||||
tool_provider_map=gc.tool_provider_map,
|
||||
fallback_to_default_accounts_prompt=fallback_to_default_accounts_prompt,
|
||||
identity_prompt=identity_prompt if identity_prompt is not None else getattr(gc.graph, "identity_prompt", "") or "",
|
||||
narrative=narrative,
|
||||
execution_id=gc.execution_id,
|
||||
run_id=gc.run_id,
|
||||
stream_id=gc.stream_id,
|
||||
node_registry=node_registry or gc.node_spec_registry,
|
||||
all_tools=gc.tools,
|
||||
shared_node_registry=gc.node_registry,
|
||||
dynamic_tools_provider=gc.dynamic_tools_provider,
|
||||
dynamic_prompt_provider=gc.dynamic_prompt_provider,
|
||||
dynamic_memory_provider=gc.dynamic_memory_provider,
|
||||
iteration_metadata_provider=gc.iteration_metadata_provider,
|
||||
skills_catalog_prompt=gc.skills_catalog_prompt,
|
||||
protocols_prompt=gc.protocols_prompt,
|
||||
skill_dirs=gc.skill_dirs,
|
||||
default_skill_warn_ratio=gc.context_warn_ratio,
|
||||
default_skill_batch_nudge=gc.batch_init_nudge,
|
||||
)
|
||||
@@ -8,6 +8,13 @@ from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any, Literal, Protocol, runtime_checkable
|
||||
|
||||
LEGACY_RUN_ID = "__legacy_run__"
|
||||
|
||||
|
||||
def is_legacy_run_id(run_id: str | None) -> bool:
|
||||
"""True when run_id represents pre-migration (no run boundary) data."""
|
||||
return run_id is None or run_id == LEGACY_RUN_ID
|
||||
|
||||
|
||||
@dataclass
|
||||
class Message:
|
||||
@@ -37,6 +44,8 @@ class Message:
|
||||
image_content: list[dict[str, Any]] | None = None
|
||||
# True when message contains an activated skill body (AS-10: never prune)
|
||||
is_skill_content: bool = False
|
||||
# Logical worker run identifier for shared-session persistence
|
||||
run_id: str | None = None
|
||||
|
||||
def to_llm_dict(self) -> dict[str, Any]:
|
||||
"""Convert to OpenAI-format message dict."""
|
||||
@@ -93,6 +102,8 @@ class Message:
|
||||
d["is_client_input"] = self.is_client_input
|
||||
if self.image_content is not None:
|
||||
d["image_content"] = self.image_content
|
||||
if self.run_id is not None:
|
||||
d["run_id"] = self.run_id
|
||||
return d
|
||||
|
||||
@classmethod
|
||||
@@ -109,9 +120,40 @@ class Message:
|
||||
is_transition_marker=data.get("is_transition_marker", False),
|
||||
is_client_input=data.get("is_client_input", False),
|
||||
image_content=data.get("image_content"),
|
||||
run_id=data.get("run_id"),
|
||||
)
|
||||
|
||||
|
||||
def _normalize_cursor(cursor: dict[str, Any] | None) -> dict[str, Any]:
|
||||
"""Normalize legacy and run-scoped cursor formats into one flat shape."""
|
||||
return dict(cursor) if cursor else {}
|
||||
|
||||
|
||||
def get_cursor_next_seq(cursor: dict[str, Any] | None) -> int | None:
|
||||
next_seq = (cursor or {}).get("next_seq")
|
||||
return next_seq if isinstance(next_seq, int) else None
|
||||
|
||||
|
||||
def update_cursor_next_seq(cursor: dict[str, Any] | None, next_seq: int) -> dict[str, Any]:
|
||||
updated = dict(cursor or {})
|
||||
updated["next_seq"] = next_seq
|
||||
return updated
|
||||
|
||||
|
||||
def get_run_cursor(cursor: dict[str, Any] | None, run_id: str | None) -> dict[str, Any] | None:
|
||||
return dict(cursor) if cursor else None
|
||||
|
||||
|
||||
def update_run_cursor(
|
||||
cursor: dict[str, Any] | None,
|
||||
run_id: str | None,
|
||||
values: dict[str, Any],
|
||||
) -> dict[str, Any]:
|
||||
updated = dict(cursor or {})
|
||||
updated.update(values)
|
||||
return updated
|
||||
|
||||
|
||||
def _extract_spillover_filename(content: str) -> str | None:
|
||||
"""Extract spillover filename from a tool result annotation.
|
||||
|
||||
@@ -261,7 +303,7 @@ class ConversationStore(Protocol):
|
||||
|
||||
async def read_cursor(self) -> dict[str, Any] | None: ...
|
||||
|
||||
async def delete_parts_before(self, seq: int) -> None: ...
|
||||
async def delete_parts_before(self, seq: int, run_id: str | None = None) -> None: ...
|
||||
|
||||
async def close(self) -> None: ...
|
||||
|
||||
@@ -333,6 +375,7 @@ class NodeConversation:
|
||||
compaction_threshold: float = 0.8,
|
||||
output_keys: list[str] | None = None,
|
||||
store: ConversationStore | None = None,
|
||||
run_id: str | None = None,
|
||||
) -> None:
|
||||
self._system_prompt = system_prompt
|
||||
self._max_context_tokens = max_context_tokens
|
||||
@@ -344,6 +387,7 @@ class NodeConversation:
|
||||
self._meta_persisted: bool = False
|
||||
self._last_api_input_tokens: int | None = None
|
||||
self._current_phase: str | None = None
|
||||
self._run_id: str | None = run_id
|
||||
|
||||
# --- Properties --------------------------------------------------------
|
||||
|
||||
@@ -402,12 +446,16 @@ class NodeConversation:
|
||||
role="user",
|
||||
content=content,
|
||||
phase_id=self._current_phase,
|
||||
run_id=self._run_id,
|
||||
is_transition_marker=is_transition_marker,
|
||||
is_client_input=is_client_input,
|
||||
image_content=image_content,
|
||||
)
|
||||
self._messages.append(msg)
|
||||
self._next_seq += 1
|
||||
# Invalidate stale API token count so estimate_tokens() uses
|
||||
# the char-based heuristic which reflects the new message.
|
||||
self._last_api_input_tokens = None
|
||||
await self._persist(msg)
|
||||
return msg
|
||||
|
||||
@@ -422,9 +470,11 @@ class NodeConversation:
|
||||
content=content,
|
||||
tool_calls=tool_calls,
|
||||
phase_id=self._current_phase,
|
||||
run_id=self._run_id,
|
||||
)
|
||||
self._messages.append(msg)
|
||||
self._next_seq += 1
|
||||
self._last_api_input_tokens = None
|
||||
await self._persist(msg)
|
||||
return msg
|
||||
|
||||
@@ -445,9 +495,11 @@ class NodeConversation:
|
||||
phase_id=self._current_phase,
|
||||
image_content=image_content,
|
||||
is_skill_content=is_skill_content,
|
||||
run_id=self._run_id,
|
||||
)
|
||||
self._messages.append(msg)
|
||||
self._next_seq += 1
|
||||
self._last_api_input_tokens = None
|
||||
await self._persist(msg)
|
||||
return msg
|
||||
|
||||
@@ -528,12 +580,15 @@ class NodeConversation:
|
||||
|
||||
Uses actual API input token count when available (set via
|
||||
:meth:`update_token_count`), otherwise falls back to a
|
||||
``total_chars / 4`` heuristic that includes both message content
|
||||
AND tool_call argument sizes.
|
||||
character-based heuristic that includes message content, tool_call
|
||||
arguments, and image blocks. The heuristic applies a 4/3 safety
|
||||
margin to avoid under-counting (inspired by Claude Code's compact
|
||||
service).
|
||||
"""
|
||||
if self._last_api_input_tokens is not None:
|
||||
return self._last_api_input_tokens
|
||||
total_chars = 0
|
||||
image_tokens = 0
|
||||
for m in self._messages:
|
||||
total_chars += len(m.content)
|
||||
if m.tool_calls:
|
||||
@@ -541,7 +596,11 @@ class NodeConversation:
|
||||
func = tc.get("function", {})
|
||||
total_chars += len(func.get("arguments", ""))
|
||||
total_chars += len(func.get("name", ""))
|
||||
return total_chars // 4
|
||||
if m.image_content:
|
||||
# Images/documents have a fixed token cost per block
|
||||
image_tokens += len(m.image_content) * 2000
|
||||
# Apply 4/3 safety margin to character-based estimate
|
||||
return (total_chars * 4) // (3 * 4) + image_tokens
|
||||
|
||||
def update_token_count(self, actual_input_tokens: int) -> None:
|
||||
"""Store actual API input token count for more accurate compaction.
|
||||
@@ -688,6 +747,7 @@ class NodeConversation:
|
||||
is_error=msg.is_error,
|
||||
phase_id=msg.phase_id,
|
||||
is_transition_marker=msg.is_transition_marker,
|
||||
run_id=msg.run_id,
|
||||
)
|
||||
count += 1
|
||||
|
||||
@@ -764,14 +824,14 @@ class NodeConversation:
|
||||
summary_seq = self._next_seq
|
||||
self._next_seq += 1
|
||||
|
||||
summary_msg = Message(seq=summary_seq, role="user", content=summary)
|
||||
summary_msg = Message(seq=summary_seq, role="user", content=summary, run_id=self._run_id)
|
||||
|
||||
# Persist
|
||||
if self._store:
|
||||
delete_before = recent_messages[0].seq if recent_messages else self._next_seq
|
||||
await self._store.delete_parts_before(delete_before)
|
||||
await self._store.write_part(summary_msg.seq, summary_msg.to_storage_dict())
|
||||
await self._store.write_cursor({"next_seq": self._next_seq})
|
||||
await self._write_next_seq()
|
||||
|
||||
self._messages = [summary_msg] + recent_messages
|
||||
self._last_api_input_tokens = None # reset; next LLM call will recalibrate
|
||||
@@ -829,6 +889,15 @@ class NodeConversation:
|
||||
freeform_lines: list[str] = []
|
||||
collapsed_msgs: list[Message] = []
|
||||
|
||||
# Collect all tool_use IDs present in old messages so we can detect
|
||||
# orphaned tool results whose parent assistant message was already
|
||||
# compacted away (API invariant protection).
|
||||
old_tc_ids: set[str] = set()
|
||||
for msg in old_messages:
|
||||
if msg.tool_calls:
|
||||
for tc in msg.tool_calls:
|
||||
old_tc_ids.add(tc.get("id", ""))
|
||||
|
||||
if aggressive:
|
||||
# Aggressive: only keep set_output tool pairs and error results.
|
||||
# Everything else is collapsed into a tool-call history summary.
|
||||
@@ -850,9 +919,17 @@ class NodeConversation:
|
||||
else:
|
||||
collapsible_tc_ids |= tc_ids
|
||||
|
||||
# Skill content and transition markers are always protected
|
||||
for msg in old_messages:
|
||||
if msg.role == "tool" and msg.is_skill_content and msg.tool_use_id:
|
||||
protected_tc_ids.add(msg.tool_use_id)
|
||||
|
||||
# Second pass: classify all messages
|
||||
for msg in old_messages:
|
||||
if msg.role == "tool":
|
||||
if msg.is_transition_marker:
|
||||
# Transition markers are always kept (phase boundaries)
|
||||
kept_structural.append(msg)
|
||||
elif msg.role == "tool":
|
||||
tc_id = msg.tool_use_id or ""
|
||||
if tc_id in protected_tc_ids:
|
||||
kept_structural.append(msg)
|
||||
@@ -861,6 +938,12 @@ class NodeConversation:
|
||||
kept_structural.append(msg)
|
||||
# Protect the parent assistant message too
|
||||
protected_tc_ids.add(tc_id)
|
||||
elif msg.is_skill_content:
|
||||
kept_structural.append(msg)
|
||||
elif tc_id and tc_id not in old_tc_ids:
|
||||
# Orphaned tool result — parent tool_use not in old msgs.
|
||||
# Keep it to maintain API invariants.
|
||||
kept_structural.append(msg)
|
||||
else:
|
||||
collapsed_msgs.append(msg)
|
||||
elif msg.role == "assistant" and msg.tool_calls:
|
||||
@@ -877,6 +960,7 @@ class NodeConversation:
|
||||
is_error=msg.is_error,
|
||||
phase_id=msg.phase_id,
|
||||
is_transition_marker=msg.is_transition_marker,
|
||||
run_id=msg.run_id,
|
||||
)
|
||||
)
|
||||
else:
|
||||
@@ -891,7 +975,10 @@ class NodeConversation:
|
||||
else:
|
||||
# Standard mode: keep all tool call pairs as structural
|
||||
for msg in old_messages:
|
||||
if msg.role == "tool":
|
||||
if msg.is_transition_marker:
|
||||
# Transition markers are always kept (phase boundaries)
|
||||
kept_structural.append(msg)
|
||||
elif msg.role == "tool":
|
||||
kept_structural.append(msg)
|
||||
elif msg.role == "assistant" and msg.tool_calls:
|
||||
compact_tcs = _compact_tool_calls(msg.tool_calls)
|
||||
@@ -904,6 +991,7 @@ class NodeConversation:
|
||||
is_error=msg.is_error,
|
||||
phase_id=msg.phase_id,
|
||||
is_transition_marker=msg.is_transition_marker,
|
||||
run_id=msg.run_id,
|
||||
)
|
||||
)
|
||||
else:
|
||||
@@ -961,7 +1049,7 @@ class NodeConversation:
|
||||
ref_seq = self._next_seq
|
||||
self._next_seq += 1
|
||||
|
||||
ref_msg = Message(seq=ref_seq, role="user", content=ref_content)
|
||||
ref_msg = Message(seq=ref_seq, role="user", content=ref_content, run_id=self._run_id)
|
||||
|
||||
# Persist: delete old messages from store, write reference + kept structural.
|
||||
# In aggressive mode, collapsed messages may be interspersed with kept
|
||||
@@ -975,7 +1063,7 @@ class NodeConversation:
|
||||
# Write kept structural messages (they may have been modified)
|
||||
for msg in kept_structural:
|
||||
await self._store.write_part(msg.seq, msg.to_storage_dict())
|
||||
await self._store.write_cursor({"next_seq": self._next_seq})
|
||||
await self._write_next_seq()
|
||||
|
||||
# Reassemble: reference + kept structural (in original order) + recent
|
||||
self._messages = [ref_msg] + kept_structural + recent_messages
|
||||
@@ -1012,7 +1100,7 @@ class NodeConversation:
|
||||
"""Remove all messages, keep system prompt, preserve ``_next_seq``."""
|
||||
if self._store:
|
||||
await self._store.delete_parts_before(self._next_seq)
|
||||
await self._store.write_cursor({"next_seq": self._next_seq})
|
||||
await self._write_next_seq()
|
||||
self._messages.clear()
|
||||
self._last_api_input_tokens = None
|
||||
|
||||
@@ -1054,22 +1142,32 @@ class NodeConversation:
|
||||
if not self._meta_persisted:
|
||||
await self._persist_meta()
|
||||
await self._store.write_part(message.seq, message.to_storage_dict())
|
||||
await self._store.write_cursor({"next_seq": self._next_seq})
|
||||
await self._write_next_seq()
|
||||
|
||||
async def _persist_meta(self) -> None:
|
||||
"""Lazily write conversation metadata to the store (called once)."""
|
||||
"""Lazily write conversation metadata to the store (called once).
|
||||
|
||||
When ``self._run_id`` is set, metadata is written flat for backward
|
||||
compatibility (run-scoped isolation has been reverted).
|
||||
"""
|
||||
if self._store is None:
|
||||
return
|
||||
await self._store.write_meta(
|
||||
{
|
||||
"system_prompt": self._system_prompt,
|
||||
"max_context_tokens": self._max_context_tokens,
|
||||
"compaction_threshold": self._compaction_threshold,
|
||||
"output_keys": self._output_keys,
|
||||
}
|
||||
)
|
||||
run_meta = {
|
||||
"system_prompt": self._system_prompt,
|
||||
"max_context_tokens": self._max_context_tokens,
|
||||
"compaction_threshold": self._compaction_threshold,
|
||||
"output_keys": self._output_keys,
|
||||
}
|
||||
await self._store.write_meta(run_meta)
|
||||
self._meta_persisted = True
|
||||
|
||||
async def _write_next_seq(self) -> None:
|
||||
if self._store is None:
|
||||
return
|
||||
cursor = await self._store.read_cursor() or {}
|
||||
cursor["next_seq"] = self._next_seq
|
||||
await self._store.write_cursor(cursor)
|
||||
|
||||
# --- Restore -----------------------------------------------------------
|
||||
|
||||
@classmethod
|
||||
@@ -1077,6 +1175,7 @@ class NodeConversation:
|
||||
cls,
|
||||
store: ConversationStore,
|
||||
phase_id: str | None = None,
|
||||
run_id: str | None = None,
|
||||
) -> NodeConversation | None:
|
||||
"""Reconstruct a NodeConversation from a store.
|
||||
|
||||
@@ -1086,6 +1185,9 @@ class NodeConversation:
|
||||
Used in isolated mode so a node only sees its own
|
||||
messages in the shared flat store. In continuous mode
|
||||
pass ``None`` to load all parts.
|
||||
run_id: If set, only load parts matching this run_id.
|
||||
Ensures intentional restarts (new run_id) start fresh
|
||||
while crash recovery (same run_id) resumes correctly.
|
||||
|
||||
Returns ``None`` if the store contains no metadata (i.e. the
|
||||
conversation was never persisted).
|
||||
@@ -1100,17 +1202,23 @@ class NodeConversation:
|
||||
compaction_threshold=meta.get("compaction_threshold", 0.8),
|
||||
output_keys=meta.get("output_keys"),
|
||||
store=store,
|
||||
run_id=run_id,
|
||||
)
|
||||
conv._meta_persisted = True
|
||||
|
||||
parts = await store.read_parts()
|
||||
if phase_id:
|
||||
parts = [p for p in parts if p.get("phase_id") == phase_id]
|
||||
# Filter by run_id so intentional restarts (new run_id) start fresh
|
||||
# while crash recovery (same run_id) loads prior parts.
|
||||
if run_id and not is_legacy_run_id(run_id):
|
||||
parts = [p for p in parts if p.get("run_id") == run_id]
|
||||
conv._messages = [Message.from_storage_dict(p) for p in parts]
|
||||
|
||||
cursor = await store.read_cursor()
|
||||
if cursor:
|
||||
conv._next_seq = cursor["next_seq"]
|
||||
next_seq = get_cursor_next_seq(cursor)
|
||||
if next_seq is not None:
|
||||
conv._next_seq = next_seq
|
||||
elif conv._messages:
|
||||
conv._next_seq = conv._messages[-1].seq + 1
|
||||
|
||||
|
||||
@@ -108,7 +108,7 @@ class EdgeSpec(BaseModel):
|
||||
self,
|
||||
source_success: bool,
|
||||
source_output: dict[str, Any],
|
||||
memory: dict[str, Any],
|
||||
buffer_data: dict[str, Any],
|
||||
llm: Any | None = None,
|
||||
goal: Any | None = None,
|
||||
source_node_name: str | None = None,
|
||||
@@ -120,7 +120,7 @@ class EdgeSpec(BaseModel):
|
||||
Args:
|
||||
source_success: Whether the source node succeeded
|
||||
source_output: Output from the source node
|
||||
memory: Current shared memory state
|
||||
buffer_data: Current data buffer state
|
||||
llm: LLM provider for LLM_DECIDE edges
|
||||
goal: Goal object for LLM_DECIDE edges
|
||||
source_node_name: Name of source node (for LLM context)
|
||||
@@ -139,7 +139,7 @@ class EdgeSpec(BaseModel):
|
||||
return not source_success
|
||||
|
||||
if self.condition == EdgeCondition.CONDITIONAL:
|
||||
return self._evaluate_condition(source_output, memory)
|
||||
return self._evaluate_condition(source_output, buffer_data)
|
||||
|
||||
if self.condition == EdgeCondition.LLM_DECIDE:
|
||||
if llm is None or goal is None:
|
||||
@@ -150,7 +150,7 @@ class EdgeSpec(BaseModel):
|
||||
goal=goal,
|
||||
source_success=source_success,
|
||||
source_output=source_output,
|
||||
memory=memory,
|
||||
buffer_data=buffer_data,
|
||||
source_node_name=source_node_name,
|
||||
target_node_name=target_node_name,
|
||||
)
|
||||
@@ -160,7 +160,7 @@ class EdgeSpec(BaseModel):
|
||||
def _evaluate_condition(
|
||||
self,
|
||||
output: dict[str, Any],
|
||||
memory: dict[str, Any],
|
||||
buffer_data: dict[str, Any],
|
||||
) -> bool:
|
||||
"""Evaluate a conditional expression."""
|
||||
|
||||
@@ -168,14 +168,14 @@ class EdgeSpec(BaseModel):
|
||||
return True
|
||||
|
||||
# Build evaluation context
|
||||
# Include memory keys directly for easier access in conditions
|
||||
# Include buffer keys directly for easier access in conditions
|
||||
context = {
|
||||
"output": output,
|
||||
"memory": memory,
|
||||
"buffer": buffer_data,
|
||||
"result": output.get("result"),
|
||||
"true": True, # Allow lowercase true/false in conditions
|
||||
"false": False,
|
||||
**memory, # Unpack memory keys directly into context
|
||||
**buffer_data, # Unpack buffer keys directly into context
|
||||
}
|
||||
|
||||
try:
|
||||
@@ -186,7 +186,7 @@ class EdgeSpec(BaseModel):
|
||||
expr_vars = {
|
||||
k: repr(context[k])
|
||||
for k in context
|
||||
if k not in ("output", "memory", "result", "true", "false")
|
||||
if k not in ("output", "buffer", "result", "true", "false")
|
||||
and k in self.condition_expr
|
||||
}
|
||||
logger.info(
|
||||
@@ -209,7 +209,7 @@ class EdgeSpec(BaseModel):
|
||||
goal: Any,
|
||||
source_success: bool,
|
||||
source_output: dict[str, Any],
|
||||
memory: dict[str, Any],
|
||||
buffer_data: dict[str, Any],
|
||||
source_node_name: str | None,
|
||||
target_node_name: str | None,
|
||||
) -> bool:
|
||||
@@ -234,8 +234,8 @@ class EdgeSpec(BaseModel):
|
||||
Should we proceed to: {target_node_name or self.target}?
|
||||
Edge description: {self.description or "No description"}
|
||||
|
||||
**Context from memory**:
|
||||
{json.dumps({k: str(v)[:100] for k, v in list(memory.items())[:5]}, indent=2)}
|
||||
**Context from data buffer**:
|
||||
{json.dumps({k: str(v)[:100] for k, v in list(buffer_data.items())[:5]}, indent=2)}
|
||||
|
||||
Evaluate whether proceeding to this next node is the right step toward achieving the goal.
|
||||
Consider:
|
||||
@@ -276,14 +276,14 @@ Respond with ONLY a JSON object:
|
||||
def map_inputs(
|
||||
self,
|
||||
source_output: dict[str, Any],
|
||||
memory: dict[str, Any],
|
||||
buffer_data: dict[str, Any],
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Map source outputs to target inputs.
|
||||
|
||||
Args:
|
||||
source_output: Output from source node
|
||||
memory: Current shared memory
|
||||
buffer_data: Current data buffer
|
||||
|
||||
Returns:
|
||||
Input dict for target node
|
||||
@@ -294,72 +294,14 @@ Respond with ONLY a JSON object:
|
||||
|
||||
result = {}
|
||||
for target_key, source_key in self.input_mapping.items():
|
||||
# Try source output first, then memory
|
||||
# Try source output first, then buffer
|
||||
if source_key in source_output:
|
||||
result[target_key] = source_output[source_key]
|
||||
elif source_key in memory:
|
||||
result[target_key] = memory[source_key]
|
||||
elif source_key in buffer_data:
|
||||
result[target_key] = buffer_data[source_key]
|
||||
|
||||
return result
|
||||
|
||||
|
||||
class AsyncEntryPointSpec(BaseModel):
|
||||
"""
|
||||
Specification for an asynchronous entry point.
|
||||
|
||||
Used with AgentRuntime for multi-entry-point agents that handle
|
||||
concurrent execution streams (e.g., webhook + API handlers).
|
||||
|
||||
Example:
|
||||
AsyncEntryPointSpec(
|
||||
id="webhook",
|
||||
name="Zendesk Webhook Handler",
|
||||
entry_node="process-webhook",
|
||||
trigger_type="webhook",
|
||||
isolation_level="shared",
|
||||
)
|
||||
"""
|
||||
|
||||
id: str = Field(description="Unique identifier for this entry point")
|
||||
name: str = Field(description="Human-readable name")
|
||||
entry_node: str = Field(
|
||||
default="",
|
||||
description="Deprecated: Node ID to start execution from. "
|
||||
"Triggers are graph-level; worker always enters at GraphSpec.entry_node.",
|
||||
)
|
||||
trigger_type: str = Field(
|
||||
default="manual",
|
||||
description="How this entry point is triggered: webhook, api, timer, event, manual",
|
||||
)
|
||||
trigger_config: dict[str, Any] = Field(
|
||||
default_factory=dict,
|
||||
description="Trigger-specific configuration (e.g., webhook URL, timer interval)",
|
||||
)
|
||||
task: str = Field(
|
||||
default="",
|
||||
description="Worker task string when this trigger fires autonomously",
|
||||
)
|
||||
isolation_level: str = Field(
|
||||
default="shared", description="State isolation: isolated, shared, or synchronized"
|
||||
)
|
||||
priority: int = Field(default=0, description="Execution priority (higher = more priority)")
|
||||
max_concurrent: int = Field(
|
||||
default=10, description="Maximum concurrent executions for this entry point"
|
||||
)
|
||||
max_resurrections: int = Field(
|
||||
default=3,
|
||||
description="Auto-restart on non-fatal failure (0 to disable)",
|
||||
)
|
||||
|
||||
model_config = {"extra": "allow"}
|
||||
|
||||
def get_isolation_level(self):
|
||||
"""Convert string isolation level to enum (duck-type with EntryPointSpec)."""
|
||||
from framework.runtime.execution_stream import IsolationLevel
|
||||
|
||||
return IsolationLevel(self.isolation_level)
|
||||
|
||||
|
||||
class GraphSpec(BaseModel):
|
||||
"""
|
||||
Complete specification of an agent graph.
|
||||
@@ -403,9 +345,9 @@ class GraphSpec(BaseModel):
|
||||
)
|
||||
edges: list[EdgeSpec] = Field(default_factory=list, description="All edge specifications")
|
||||
|
||||
# Shared memory keys
|
||||
memory_keys: list[str] = Field(
|
||||
default_factory=list, description="Keys available in shared memory"
|
||||
# Data buffer keys
|
||||
buffer_keys: list[str] = Field(
|
||||
default_factory=list, description="Keys available in data buffer"
|
||||
)
|
||||
|
||||
# Default LLM settings
|
||||
@@ -609,21 +551,16 @@ class GraphSpec(BaseModel):
|
||||
continue
|
||||
errors.append(f"Node '{node.id}' is unreachable from entry")
|
||||
|
||||
# Client-facing fan-out validation
|
||||
fan_outs = self.detect_fan_out_nodes()
|
||||
for source_id, targets in fan_outs.items():
|
||||
client_facing_targets = [
|
||||
t
|
||||
for t in targets
|
||||
if self.get_node(t) and getattr(self.get_node(t), "client_facing", False)
|
||||
]
|
||||
if len(client_facing_targets) > 1:
|
||||
errors.append(
|
||||
f"Fan-out from '{source_id}' has multiple client-facing nodes: "
|
||||
f"{client_facing_targets}. Only one branch may be client-facing."
|
||||
for node in self.nodes:
|
||||
if getattr(node, "client_facing", False) and getattr(node, "id", "") != "queen":
|
||||
warnings.append(
|
||||
f"Node '{node.id}' sets deprecated client_facing=True. "
|
||||
"Only the queen talks directly to users now; migrate this node "
|
||||
"to queen-mediated escalation."
|
||||
)
|
||||
|
||||
# Output key overlap on parallel event_loop nodes
|
||||
fan_outs = self.detect_fan_out_nodes()
|
||||
for source_id, targets in fan_outs.items():
|
||||
event_loop_targets = [
|
||||
t
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
"""Conversation compaction pipeline.
|
||||
|
||||
Implements the multi-level compaction strategy:
|
||||
1. Prune old tool results
|
||||
0. Microcompaction (count-based tool result clearing — cheapest)
|
||||
1. Prune old tool results (token-budget based)
|
||||
2. Structure-preserving compaction (spillover)
|
||||
3. LLM summary compaction (with recursive splitting)
|
||||
4. Emergency deterministic summary (no LLM)
|
||||
@@ -13,11 +14,12 @@ import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
from datetime import UTC, datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from framework.graph.conversation import NodeConversation
|
||||
from framework.graph.conversation import Message, NodeConversation
|
||||
from framework.graph.event_loop.event_publishing import publish_context_usage
|
||||
from framework.graph.event_loop.types import LoopConfig, OutputAccumulator
|
||||
from framework.graph.node import NodeContext
|
||||
@@ -29,6 +31,121 @@ logger = logging.getLogger(__name__)
|
||||
LLM_COMPACT_CHAR_LIMIT: int = 240_000
|
||||
LLM_COMPACT_MAX_DEPTH: int = 10
|
||||
|
||||
# Microcompaction: tools whose results can be safely cleared
|
||||
COMPACTABLE_TOOLS: frozenset[str] = frozenset(
|
||||
{
|
||||
"read_file",
|
||||
"run_command",
|
||||
"web_search",
|
||||
"web_fetch",
|
||||
"grep_search",
|
||||
"glob_search",
|
||||
"write_file",
|
||||
"edit_file",
|
||||
"browser_screenshot",
|
||||
"list_directory",
|
||||
}
|
||||
)
|
||||
|
||||
# Keep at most this many compactable tool results; clear older ones
|
||||
MICROCOMPACT_KEEP_RECENT: int = 8
|
||||
|
||||
# Circuit-breaker: stop auto-compacting after this many consecutive failures
|
||||
MAX_CONSECUTIVE_FAILURES: int = 3
|
||||
|
||||
# Track consecutive compaction failures per conversation (module-level)
|
||||
_failure_counts: dict[int, int] = {}
|
||||
|
||||
# Track last compaction time per conversation for recompaction detection
|
||||
_last_compact_times: dict[int, float] = {}
|
||||
|
||||
|
||||
def microcompact(
|
||||
conversation: NodeConversation,
|
||||
*,
|
||||
keep_recent: int = MICROCOMPACT_KEEP_RECENT,
|
||||
) -> int:
|
||||
"""Clear old compactable tool results by count, keeping only the most recent.
|
||||
|
||||
This is the cheapest possible compaction — no LLM call, no structural
|
||||
changes, just replaces old tool result content with a short placeholder.
|
||||
Inspired by Claude Code's cached-microcompact strategy.
|
||||
|
||||
Returns the number of tool results cleared.
|
||||
"""
|
||||
# Collect indices of compactable tool results (newest first)
|
||||
compactable_indices: list[int] = []
|
||||
messages = conversation.messages
|
||||
for i in range(len(messages) - 1, -1, -1):
|
||||
msg = messages[i]
|
||||
if msg.role != "tool" or msg.is_error or msg.is_skill_content:
|
||||
continue
|
||||
if msg.content.startswith(("[Pruned tool result", "[Old tool result")):
|
||||
continue
|
||||
if len(msg.content) < 100:
|
||||
continue
|
||||
|
||||
# Check if the tool that produced this result is compactable
|
||||
tool_name = _find_tool_name_for_result(messages, msg)
|
||||
if tool_name and tool_name in COMPACTABLE_TOOLS:
|
||||
compactable_indices.append(i)
|
||||
|
||||
# Keep the most recent N, clear the rest
|
||||
to_clear = compactable_indices[keep_recent:]
|
||||
if not to_clear:
|
||||
return 0
|
||||
|
||||
cleared = 0
|
||||
for i in to_clear:
|
||||
msg = messages[i]
|
||||
spillover = _extract_spillover_filename_inline(msg.content)
|
||||
orig_len = len(msg.content)
|
||||
if spillover:
|
||||
placeholder = (
|
||||
f"[Old tool result cleared: {orig_len} chars. "
|
||||
f"Full data in '{spillover}'. "
|
||||
f"Use load_data('{spillover}') to retrieve.]"
|
||||
)
|
||||
else:
|
||||
placeholder = f"[Old tool result cleared: {orig_len} chars.]"
|
||||
|
||||
# Mutate in-place (microcompact is synchronous, no store writes)
|
||||
conversation._messages[i] = Message(
|
||||
seq=msg.seq,
|
||||
role=msg.role,
|
||||
content=placeholder,
|
||||
tool_use_id=msg.tool_use_id,
|
||||
tool_calls=msg.tool_calls,
|
||||
is_error=msg.is_error,
|
||||
phase_id=msg.phase_id,
|
||||
is_transition_marker=msg.is_transition_marker,
|
||||
)
|
||||
cleared += 1
|
||||
|
||||
if cleared > 0:
|
||||
# Invalidate cached token count
|
||||
conversation._last_api_input_tokens = None
|
||||
|
||||
return cleared
|
||||
|
||||
|
||||
def _find_tool_name_for_result(messages: list[Message], tool_msg: Message) -> str | None:
|
||||
"""Find the tool name from the assistant message that triggered this tool result."""
|
||||
if not tool_msg.tool_use_id:
|
||||
return None
|
||||
for msg in messages:
|
||||
if msg.tool_calls:
|
||||
for tc in msg.tool_calls:
|
||||
if tc.get("id") == tool_msg.tool_use_id:
|
||||
return tc.get("function", {}).get("name")
|
||||
return None
|
||||
|
||||
|
||||
def _extract_spillover_filename_inline(content: str) -> str | None:
|
||||
"""Quick inline check for spillover filename in tool result content."""
|
||||
match = re.search(r"saved to '([^']+)'", content, re.IGNORECASE)
|
||||
return match.group(1) if match else None
|
||||
|
||||
|
||||
async def compact(
|
||||
ctx: NodeContext,
|
||||
@@ -43,11 +160,31 @@ async def compact(
|
||||
"""Run the full compaction pipeline if conversation needs compaction.
|
||||
|
||||
Pipeline stages (in order, short-circuits when budget is restored):
|
||||
1. Prune old tool results
|
||||
0. Microcompaction (count-based tool result clearing — cheapest)
|
||||
1. Prune old tool results (token-budget based)
|
||||
2. Structure-preserving compaction (free, no LLM)
|
||||
3. LLM summary compaction (recursive split if too large)
|
||||
4. Emergency deterministic summary (fallback)
|
||||
"""
|
||||
conv_id = id(conversation)
|
||||
|
||||
# Circuit breaker: stop auto-compacting after repeated failures
|
||||
if _failure_counts.get(conv_id, 0) >= MAX_CONSECUTIVE_FAILURES:
|
||||
logger.warning(
|
||||
"Circuit breaker: skipping compaction after %d consecutive failures",
|
||||
_failure_counts[conv_id],
|
||||
)
|
||||
return
|
||||
|
||||
# Recompaction detection
|
||||
now = time.monotonic()
|
||||
last_time = _last_compact_times.get(conv_id)
|
||||
if last_time is not None and (now - last_time) < 30:
|
||||
logger.warning(
|
||||
"Recompaction chain detected: only %.1fs since last compaction",
|
||||
now - last_time,
|
||||
)
|
||||
|
||||
ratio_before = conversation.usage_ratio()
|
||||
phase_grad = getattr(ctx, "continuous_mode", False)
|
||||
pre_inventory: list[dict[str, Any]] | None = None
|
||||
@@ -55,6 +192,26 @@ async def compact(
|
||||
if ratio_before >= 1.0:
|
||||
pre_inventory = build_message_inventory(conversation)
|
||||
|
||||
# --- Step 0: Microcompaction (count-based, cheapest) ---
|
||||
mc_cleared = microcompact(conversation)
|
||||
if mc_cleared > 0:
|
||||
logger.info(
|
||||
"Microcompact cleared %d old tool results: %.0f%% -> %.0f%%",
|
||||
mc_cleared,
|
||||
ratio_before * 100,
|
||||
conversation.usage_ratio() * 100,
|
||||
)
|
||||
if not conversation.needs_compaction():
|
||||
_record_success(conv_id, now)
|
||||
await log_compaction(
|
||||
ctx,
|
||||
conversation,
|
||||
ratio_before,
|
||||
event_bus,
|
||||
pre_inventory=pre_inventory,
|
||||
)
|
||||
return
|
||||
|
||||
# --- Step 1: Prune old tool results (free, fast) ---
|
||||
protect = max(2000, config.max_context_tokens // 12)
|
||||
pruned = await conversation.prune_old_tool_results(
|
||||
@@ -69,6 +226,7 @@ async def compact(
|
||||
conversation.usage_ratio() * 100,
|
||||
)
|
||||
if not conversation.needs_compaction():
|
||||
_record_success(conv_id, now)
|
||||
await log_compaction(
|
||||
ctx,
|
||||
conversation,
|
||||
@@ -87,6 +245,7 @@ async def compact(
|
||||
phase_graduated=phase_grad,
|
||||
)
|
||||
if not conversation.needs_compaction():
|
||||
_record_success(conv_id, now)
|
||||
await log_compaction(
|
||||
ctx,
|
||||
conversation,
|
||||
@@ -118,8 +277,10 @@ async def compact(
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("LLM compaction failed: %s", e)
|
||||
_failure_counts[conv_id] = _failure_counts.get(conv_id, 0) + 1
|
||||
|
||||
if not conversation.needs_compaction():
|
||||
_record_success(conv_id, now)
|
||||
await log_compaction(
|
||||
ctx,
|
||||
conversation,
|
||||
@@ -140,6 +301,7 @@ async def compact(
|
||||
keep_recent=1,
|
||||
phase_graduated=phase_grad,
|
||||
)
|
||||
_record_success(conv_id, now)
|
||||
await log_compaction(
|
||||
ctx,
|
||||
conversation,
|
||||
@@ -149,9 +311,46 @@ async def compact(
|
||||
)
|
||||
|
||||
|
||||
def _record_success(conv_id: int, timestamp: float) -> None:
|
||||
"""Reset failure counter and record compaction time on success."""
|
||||
_failure_counts.pop(conv_id, None)
|
||||
_last_compact_times[conv_id] = timestamp
|
||||
|
||||
|
||||
# --- LLM compaction with binary-search splitting ----------------------
|
||||
|
||||
|
||||
def strip_images_from_messages(messages: list[Message]) -> list[Message]:
|
||||
"""Strip image_content from messages before LLM summarisation.
|
||||
|
||||
Images/documents are replaced with ``[image]`` markers so the summary
|
||||
notes they existed without wasting tokens sending binary data to the
|
||||
compaction LLM. Returns a new list (original messages are not mutated).
|
||||
"""
|
||||
stripped: list[Message] = []
|
||||
for msg in messages:
|
||||
if msg.image_content:
|
||||
n_images = len(msg.image_content)
|
||||
marker = " ".join("[image]" for _ in range(n_images))
|
||||
content = f"{msg.content}\n{marker}" if msg.content else marker
|
||||
stripped.append(
|
||||
Message(
|
||||
seq=msg.seq,
|
||||
role=msg.role,
|
||||
content=content,
|
||||
tool_use_id=msg.tool_use_id,
|
||||
tool_calls=msg.tool_calls,
|
||||
is_error=msg.is_error,
|
||||
phase_id=msg.phase_id,
|
||||
is_transition_marker=msg.is_transition_marker,
|
||||
image_content=None, # stripped
|
||||
)
|
||||
)
|
||||
else:
|
||||
stripped.append(msg)
|
||||
return stripped
|
||||
|
||||
|
||||
async def llm_compact(
|
||||
ctx: NodeContext,
|
||||
messages: list,
|
||||
@@ -175,6 +374,10 @@ async def llm_compact(
|
||||
if _depth > max_depth:
|
||||
raise RuntimeError(f"LLM compaction recursion limit ({max_depth})")
|
||||
|
||||
# Strip images before summarisation to avoid wasting tokens
|
||||
if _depth == 0:
|
||||
messages = strip_images_from_messages(messages)
|
||||
|
||||
formatted = format_messages_for_summary(messages)
|
||||
|
||||
# Proactive split: avoid wasting an API call on oversized input
|
||||
@@ -297,7 +500,12 @@ def build_llm_compaction_prompt(
|
||||
*,
|
||||
max_context_tokens: int = 128_000,
|
||||
) -> str:
|
||||
"""Build prompt for LLM compaction targeting 50% of token budget."""
|
||||
"""Build prompt for LLM compaction targeting 50% of token budget.
|
||||
|
||||
Uses a structured section format inspired by Claude Code's compact
|
||||
service. Each section focuses on a different aspect of the conversation
|
||||
so the summariser produces consistently useful, well-organised output.
|
||||
"""
|
||||
spec = ctx.node_spec
|
||||
ctx_lines = [f"NODE: {spec.name} (id={spec.id})"]
|
||||
if spec.description:
|
||||
@@ -330,13 +538,30 @@ def build_llm_compaction_prompt(
|
||||
f"CONVERSATION MESSAGES:\n{formatted_messages}\n\n"
|
||||
"INSTRUCTIONS:\n"
|
||||
f"Write a summary of approximately {target_chars} characters "
|
||||
f"(~{target_tokens} tokens).\n"
|
||||
"1. Preserve ALL user-stated rules, constraints, and preferences "
|
||||
"verbatim.\n"
|
||||
"2. Preserve key decisions made and results obtained.\n"
|
||||
"3. Preserve in-progress work state so the agent can continue.\n"
|
||||
"4. Be detailed enough that the agent can resume without "
|
||||
"re-doing work.\n"
|
||||
f"(~{target_tokens} tokens).\n\n"
|
||||
"Organise the summary into these sections (omit empty ones):\n\n"
|
||||
"1. **Primary Request and Intent** — What the user originally asked "
|
||||
"for and the high-level goal the agent is working toward.\n"
|
||||
"2. **Key Technical Concepts** — Important domain-specific terms, "
|
||||
"patterns, or architectural decisions established in the conversation.\n"
|
||||
"3. **Files and Code Sections** — Specific files read/written/edited "
|
||||
"with brief descriptions of changes. Include short code snippets only "
|
||||
"when they capture critical logic.\n"
|
||||
"4. **Errors and Fixes** — Problems encountered and how they were "
|
||||
"resolved. Include root causes so the agent doesn't repeat them.\n"
|
||||
"5. **Problem Solving Efforts** — Approaches tried, dead ends hit, "
|
||||
"and reasoning behind the current strategy.\n"
|
||||
"6. **User Messages** — Preserve ALL user-stated rules, constraints, "
|
||||
"identity preferences, and account details verbatim.\n"
|
||||
"7. **Pending Tasks** — Work remaining, outputs still needed, and "
|
||||
"any blockers.\n"
|
||||
"8. **Current Work** — The most recent action taken and the immediate "
|
||||
"next step the agent should perform. This section is the most important "
|
||||
"for seamless resumption.\n\n"
|
||||
"Additional rules:\n"
|
||||
"- Be detailed enough that the agent can resume without re-doing work.\n"
|
||||
"- Preserve key decisions made and results obtained.\n"
|
||||
"- When in doubt, keep information rather than discard it.\n"
|
||||
)
|
||||
|
||||
|
||||
@@ -551,7 +776,7 @@ def build_emergency_summary(
|
||||
# 2. Inputs the node received
|
||||
input_lines = []
|
||||
for key in spec.input_keys:
|
||||
value = ctx.input_data.get(key) or ctx.memory.read(key)
|
||||
value = ctx.input_data.get(key) or ctx.buffer.read(key)
|
||||
if value is not None:
|
||||
# Truncate long values but keep them recognisable
|
||||
v_str = str(value)
|
||||
@@ -580,8 +805,6 @@ def build_emergency_summary(
|
||||
|
||||
# 5. Spillover files — list actual files so the LLM can load
|
||||
# them immediately instead of having to call list_data_files first.
|
||||
# Inline adapt.md (agent memory) directly — it contains user rules
|
||||
# and identity preferences that must survive emergency compaction.
|
||||
spillover_dir = config.spillover_dir if config else None
|
||||
if spillover_dir:
|
||||
try:
|
||||
@@ -589,16 +812,7 @@ def build_emergency_summary(
|
||||
|
||||
data_dir = Path(spillover_dir)
|
||||
if data_dir.is_dir():
|
||||
# Inline adapt.md content directly
|
||||
adapt_path = data_dir / "adapt.md"
|
||||
if adapt_path.is_file():
|
||||
adapt_text = adapt_path.read_text(encoding="utf-8").strip()
|
||||
if adapt_text:
|
||||
parts.append(f"AGENT MEMORY (adapt.md):\n{adapt_text}")
|
||||
|
||||
all_files = sorted(
|
||||
f.name for f in data_dir.iterdir() if f.is_file() and f.name != "adapt.md"
|
||||
)
|
||||
all_files = sorted(f.name for f in data_dir.iterdir() if f.is_file())
|
||||
# Separate conversation history files from regular data files
|
||||
conv_files = [f for f in all_files if re.match(r"conversation_\d+\.md$", f)]
|
||||
data_files = [f for f in all_files if f not in conv_files]
|
||||
|
||||
@@ -31,6 +31,7 @@ class RestoredState:
|
||||
start_iteration: int
|
||||
recent_responses: list[str]
|
||||
recent_tool_fingerprints: list[list[tuple[str, str]]]
|
||||
pending_input: dict[str, Any] | None
|
||||
|
||||
|
||||
async def restore(
|
||||
@@ -56,24 +57,34 @@ async def restore(
|
||||
conversation = await NodeConversation.restore(
|
||||
conversation_store,
|
||||
phase_id=phase_filter,
|
||||
run_id=ctx.effective_run_id,
|
||||
)
|
||||
if conversation is None:
|
||||
return None
|
||||
|
||||
accumulator = await OutputAccumulator.restore(conversation_store)
|
||||
# If run_id filtering removed all messages, this is an intentional
|
||||
# restart (new run), not a crash recovery. Return None so the caller
|
||||
# falls through to the fresh-conversation path.
|
||||
if conversation.message_count == 0:
|
||||
return None
|
||||
|
||||
accumulator = await OutputAccumulator.restore(conversation_store, run_id=ctx.effective_run_id)
|
||||
accumulator.spillover_dir = config.spillover_dir
|
||||
accumulator.max_value_chars = config.max_output_value_chars
|
||||
|
||||
cursor = await conversation_store.read_cursor()
|
||||
start_iteration = cursor.get("iteration", 0) + 1 if cursor else 0
|
||||
cursor = await conversation_store.read_cursor() or {}
|
||||
start_iteration = cursor.get("iteration", 0) + 1
|
||||
|
||||
# Restore stall/doom-loop detection state
|
||||
recent_responses: list[str] = cursor.get("recent_responses", []) if cursor else []
|
||||
raw_fps = cursor.get("recent_tool_fingerprints", []) if cursor else []
|
||||
recent_responses: list[str] = cursor.get("recent_responses", [])
|
||||
raw_fps = cursor.get("recent_tool_fingerprints", [])
|
||||
recent_tool_fingerprints: list[list[tuple[str, str]]] = [
|
||||
[tuple(pair) for pair in fps] # type: ignore[misc]
|
||||
for fps in raw_fps
|
||||
]
|
||||
pending_input = cursor.get("pending_input")
|
||||
if not isinstance(pending_input, dict):
|
||||
pending_input = None
|
||||
|
||||
logger.info(
|
||||
f"Restored event loop: iteration={start_iteration}, "
|
||||
@@ -88,6 +99,7 @@ async def restore(
|
||||
start_iteration=start_iteration,
|
||||
recent_responses=recent_responses,
|
||||
recent_tool_fingerprints=recent_tool_fingerprints,
|
||||
pending_input=pending_input,
|
||||
)
|
||||
|
||||
|
||||
@@ -100,6 +112,7 @@ async def write_cursor(
|
||||
*,
|
||||
recent_responses: list[str] | None = None,
|
||||
recent_tool_fingerprints: list[list[tuple[str, str]]] | None = None,
|
||||
pending_input: dict[str, Any] | None = None,
|
||||
) -> None:
|
||||
"""Write checkpoint cursor for crash recovery.
|
||||
|
||||
@@ -112,7 +125,6 @@ async def write_cursor(
|
||||
{
|
||||
"iteration": iteration,
|
||||
"node_id": ctx.node_id,
|
||||
"next_seq": conversation.next_seq,
|
||||
"outputs": accumulator.to_dict(),
|
||||
}
|
||||
)
|
||||
@@ -124,6 +136,9 @@ async def write_cursor(
|
||||
cursor["recent_tool_fingerprints"] = [
|
||||
[list(pair) for pair in fps] for fps in recent_tool_fingerprints
|
||||
]
|
||||
# Persist blocked-input state so restored runs re-block instead of
|
||||
# manufacturing a synthetic continuation turn.
|
||||
cursor["pending_input"] = pending_input
|
||||
await conversation_store.write_cursor(cursor)
|
||||
|
||||
|
||||
@@ -138,6 +153,7 @@ async def drain_injection_queue(
|
||||
) -> int:
|
||||
"""Drain all pending injected events as user messages. Returns count."""
|
||||
count = 0
|
||||
logger.debug("[drain_injection_queue] Starting to drain queue, initial queue size: %s", queue.qsize() if hasattr(queue, 'qsize') else 'unknown')
|
||||
while not queue.empty():
|
||||
try:
|
||||
content, is_client_input, image_content = queue.get_nowait()
|
||||
@@ -228,7 +244,7 @@ async def check_pause(
|
||||
pause_requested = ctx.input_data.get("pause_requested", False)
|
||||
if not pause_requested:
|
||||
try:
|
||||
pause_requested = ctx.memory.read("pause_requested") or False
|
||||
pause_requested = ctx.buffer.read("pause_requested") or False
|
||||
except (PermissionError, KeyError):
|
||||
pause_requested = False
|
||||
if pause_requested:
|
||||
|
||||
@@ -226,7 +226,7 @@ async def publish_text_delta(
|
||||
inner_turn: int = 0,
|
||||
) -> None:
|
||||
if event_bus:
|
||||
if ctx.node_spec.client_facing:
|
||||
if ctx.emits_client_io:
|
||||
await event_bus.emit_client_output_delta(
|
||||
stream_id=stream_id,
|
||||
node_id=node_id,
|
||||
|
||||
@@ -139,9 +139,9 @@ async def judge_turn(
|
||||
),
|
||||
)
|
||||
|
||||
# Client-facing with no output keys → continuous interaction node.
|
||||
# Queen with no output keys → continuous interaction node.
|
||||
# Inject tool-use pressure instead of auto-accepting.
|
||||
if not output_keys and ctx.node_spec.client_facing:
|
||||
if not output_keys and ctx.supports_direct_user_io:
|
||||
return JudgeVerdict(
|
||||
action="RETRY",
|
||||
feedback=(
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
"""Subagent execution for the event loop.
|
||||
|
||||
Handles the full subagent lifecycle: validation, context setup, tool filtering,
|
||||
conversation store derivation, execution, and cleanup. Also includes the
|
||||
_EscalationReceiver helper used for subagent → queen escalation routing.
|
||||
conversation store derivation, execution, and cleanup.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -18,7 +17,7 @@ from typing import TYPE_CHECKING, Any
|
||||
from framework.graph.conversation import ConversationStore
|
||||
from framework.graph.event_loop.judge_pipeline import SubagentJudge
|
||||
from framework.graph.event_loop.types import LoopConfig, OutputAccumulator
|
||||
from framework.graph.node import NodeContext, SharedMemory
|
||||
from framework.graph.node import DataBuffer, NodeContext
|
||||
from framework.llm.provider import ToolResult, ToolUse
|
||||
from framework.runtime.event_bus import EventBus
|
||||
|
||||
@@ -28,39 +27,6 @@ if TYPE_CHECKING:
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class EscalationReceiver:
|
||||
"""Temporary receiver registered in node_registry for subagent escalation routing.
|
||||
|
||||
When a subagent calls ``report_to_parent(wait_for_response=True)``, the callback
|
||||
creates one of these, registers it under a unique escalation ID in the executor's
|
||||
``node_registry``, and awaits ``wait()``. The TUI / runner calls
|
||||
``inject_input(escalation_id, content)`` which the ``ExecutionStream`` routes here
|
||||
via ``inject_event()`` — matching the same ``hasattr(node, "inject_event")`` check
|
||||
used for regular ``EventLoopNode`` instances.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._event = asyncio.Event()
|
||||
self._response: str | None = None
|
||||
self._awaiting_input = True # So inject_worker_message() can prefer us
|
||||
|
||||
async def inject_event(
|
||||
self,
|
||||
content: str,
|
||||
*,
|
||||
is_client_input: bool = False,
|
||||
image_content: list[dict[str, Any]] | None = None,
|
||||
) -> None:
|
||||
"""Called by ExecutionStream.inject_input() when the user responds."""
|
||||
self._response = content
|
||||
self._event.set()
|
||||
|
||||
async def wait(self) -> str | None:
|
||||
"""Block until inject_event() delivers the user's response."""
|
||||
await self._event.wait()
|
||||
return self._response
|
||||
|
||||
|
||||
async def execute_subagent(
|
||||
ctx: NodeContext,
|
||||
agent_id: str,
|
||||
@@ -68,7 +34,7 @@ async def execute_subagent(
|
||||
*,
|
||||
config: LoopConfig,
|
||||
event_loop_node_cls: type[EventLoopNode],
|
||||
escalation_receiver_cls: type[EscalationReceiver],
|
||||
escalation_receiver_cls: Callable[[], Any],
|
||||
accumulator: OutputAccumulator | None = None,
|
||||
event_bus: EventBus | None = None,
|
||||
tool_executor: Callable[[ToolUse], ToolResult | Awaitable[ToolResult]] | None = None,
|
||||
@@ -127,7 +93,7 @@ async def execute_subagent(
|
||||
subagent_spec = ctx.node_registry[agent_id]
|
||||
|
||||
# 2. Create read-only memory snapshot
|
||||
parent_data = ctx.memory.read_all()
|
||||
parent_data = ctx.buffer.read_all()
|
||||
|
||||
# Merge in-flight outputs from the parent's accumulator.
|
||||
if accumulator:
|
||||
@@ -135,12 +101,12 @@ async def execute_subagent(
|
||||
if key not in parent_data:
|
||||
parent_data[key] = value
|
||||
|
||||
subagent_memory = SharedMemory()
|
||||
subagent_buffer = DataBuffer()
|
||||
for key, value in parent_data.items():
|
||||
subagent_memory.write(key, value, validate=False)
|
||||
subagent_buffer.write(key, value, validate=False)
|
||||
|
||||
read_keys = set(parent_data.keys()) | set(subagent_spec.input_keys or [])
|
||||
scoped_memory = subagent_memory.with_permissions(
|
||||
scoped_buffer = subagent_buffer.with_permissions(
|
||||
read_keys=list(read_keys),
|
||||
write_keys=[], # Read-only!
|
||||
)
|
||||
@@ -252,7 +218,7 @@ async def execute_subagent(
|
||||
runtime=ctx.runtime,
|
||||
node_id=sa_node_id,
|
||||
node_spec=subagent_spec,
|
||||
memory=scoped_memory,
|
||||
buffer=scoped_buffer,
|
||||
input_data={"task": task, **parent_data},
|
||||
llm=ctx.llm,
|
||||
available_tools=subagent_tools,
|
||||
@@ -307,14 +273,28 @@ async def execute_subagent(
|
||||
conversation_store=subagent_conv_store,
|
||||
)
|
||||
|
||||
# Inject a unique GCU browser profile for this subagent
|
||||
_profile_token = None
|
||||
try:
|
||||
from gcu.browser.session import set_active_profile as _set_gcu_profile
|
||||
# Each subagent instance gets its own unique browser profile so concurrent
|
||||
# subagents don't share tab groups. The profile is injected into every
|
||||
# browser_* tool call by wrapping the tool executor.
|
||||
_gcu_profile = f"{agent_id}:{subagent_instance}"
|
||||
_original_tool_executor = None
|
||||
|
||||
_profile_token = _set_gcu_profile(f"{agent_id}-{subagent_instance}")
|
||||
except ImportError:
|
||||
pass # GCU tools not installed; no-op
|
||||
if tool_executor is not None:
|
||||
_original_tool_executor = tool_executor
|
||||
|
||||
async def _gcu_profile_injecting_executor(
|
||||
tool_use: ToolUse,
|
||||
) -> ToolResult | Awaitable[ToolResult]:
|
||||
if tool_use.name.startswith("browser_") and "profile" not in (tool_use.input or {}):
|
||||
from dataclasses import replace
|
||||
|
||||
tool_use = replace(tool_use, input={**(tool_use.input or {}), "profile": _gcu_profile})
|
||||
result = _original_tool_executor(tool_use)
|
||||
if asyncio.isfuture(result) or asyncio.iscoroutine(result):
|
||||
return await result
|
||||
return result
|
||||
|
||||
tool_executor = _gcu_profile_injecting_executor
|
||||
|
||||
try:
|
||||
logger.info("🚀 Starting subagent '%s' execution...", agent_id)
|
||||
@@ -386,27 +366,16 @@ async def execute_subagent(
|
||||
is_error=True,
|
||||
)
|
||||
finally:
|
||||
# Restore the GCU profile context
|
||||
if _profile_token is not None:
|
||||
from gcu.browser.session import _active_profile as _gcu_profile_var
|
||||
|
||||
_gcu_profile_var.reset(_profile_token)
|
||||
|
||||
# Stop the browser session for this subagent's profile
|
||||
if tool_executor is not None:
|
||||
_subagent_profile = f"{agent_id}-{subagent_instance}"
|
||||
try:
|
||||
_stop_use = ToolUse(
|
||||
id="gcu-cleanup",
|
||||
name="browser_stop",
|
||||
input={"profile": _subagent_profile},
|
||||
)
|
||||
_stop_result = tool_executor(_stop_use)
|
||||
if asyncio.iscoroutine(_stop_result) or asyncio.isfuture(_stop_result):
|
||||
await _stop_result
|
||||
except Exception as _gcu_exc:
|
||||
logger.warning(
|
||||
"GCU browser_stop failed for profile %r: %s",
|
||||
_subagent_profile,
|
||||
_gcu_exc,
|
||||
)
|
||||
# Close the tab group this subagent created, if any.
|
||||
if _original_tool_executor is not None:
|
||||
try:
|
||||
stop_call = ToolUse(
|
||||
id="__subagent_cleanup__",
|
||||
name="browser_stop",
|
||||
input={"profile": _gcu_profile},
|
||||
)
|
||||
result = _original_tool_executor(stop_call)
|
||||
if asyncio.isfuture(result) or asyncio.iscoroutine(result):
|
||||
await result
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -18,7 +18,7 @@ from framework.llm.provider import Tool, ToolResult
|
||||
def build_ask_user_tool() -> Tool:
|
||||
"""Build the synthetic ask_user tool for explicit user-input requests.
|
||||
|
||||
Client-facing nodes call ask_user() when they need to pause and wait
|
||||
The queen calls ask_user() when it needs to pause and wait
|
||||
for user input. Text-only turns WITHOUT ask_user flow through without
|
||||
blocking, allowing progress updates and summaries to stream freely.
|
||||
"""
|
||||
|
||||
@@ -0,0 +1,151 @@
|
||||
"""Streaming XML tag filter for thinking tags.
|
||||
|
||||
Strips configured XML tags (e.g. ``<situation>``, ``<monologue>``) from
|
||||
a chunked text stream while preserving the full text for conversation
|
||||
storage. The filter is stateful — it handles chunks that split mid-tag.
|
||||
|
||||
Only touches text content. Tool calls flow through a completely separate
|
||||
code path and are never affected by this filter.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Sequence
|
||||
|
||||
|
||||
class ThinkingTagFilter:
|
||||
"""Strips XML thinking tags from a streaming text output.
|
||||
|
||||
Buffers content inside configured tags and yields only the visible
|
||||
content outside those tags. Handles chunks that split across tag
|
||||
boundaries (e.g. a chunk ending with ``"<mono"``).
|
||||
|
||||
Args:
|
||||
tag_names: Tag names to strip (e.g. ``["situation", "monologue"]``).
|
||||
"""
|
||||
|
||||
def __init__(self, tag_names: Sequence[str]) -> None:
|
||||
self._tag_names: set[str] = set(tag_names)
|
||||
# Pre-compute all opening and closing tag strings for matching.
|
||||
self._open_tags: dict[str, str] = {name: f"<{name}>" for name in tag_names}
|
||||
self._close_tags: dict[str, str] = {name: f"</{name}>" for name in tag_names}
|
||||
# All possible tag prefixes for partial-match detection.
|
||||
self._all_tag_strings: list[str] = sorted(
|
||||
list(self._open_tags.values()) + list(self._close_tags.values()),
|
||||
key=len,
|
||||
reverse=True,
|
||||
)
|
||||
|
||||
self._inside_tag: str | None = None # Which tag we're inside, or None.
|
||||
self._pending: str = "" # Chars that might be a partial tag.
|
||||
self._visible_text: str = "" # Accumulated visible snapshot.
|
||||
|
||||
def feed(self, chunk: str) -> str:
|
||||
"""Feed a text chunk and return the visible portion.
|
||||
|
||||
Characters inside thinking tags are suppressed. Characters that
|
||||
*might* be the start of a tag are buffered until the next chunk
|
||||
resolves the ambiguity.
|
||||
|
||||
Returns:
|
||||
The portion of text that should be shown to the user.
|
||||
"""
|
||||
buf = self._pending + chunk
|
||||
self._pending = ""
|
||||
visible = self._process(buf)
|
||||
self._visible_text += visible
|
||||
return visible
|
||||
|
||||
@property
|
||||
def visible_snapshot(self) -> str:
|
||||
"""Accumulated visible text so far (for the snapshot field)."""
|
||||
return self._visible_text
|
||||
|
||||
def flush(self) -> str:
|
||||
"""Flush any pending partial tag as visible text.
|
||||
|
||||
Called at end-of-stream. If characters were buffered because they
|
||||
looked like the start of a tag but the stream ended before the tag
|
||||
completed, they are emitted as visible text (graceful degradation).
|
||||
"""
|
||||
result = ""
|
||||
if self._pending:
|
||||
if self._inside_tag is None:
|
||||
result = self._pending
|
||||
# If inside a tag, discard pending (unclosed tag content).
|
||||
self._pending = ""
|
||||
self._visible_text += result
|
||||
return result
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Internal processing
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _process(self, buf: str) -> str:
|
||||
"""Process a buffer, returning visible text and updating state."""
|
||||
visible_parts: list[str] = []
|
||||
i = 0
|
||||
n = len(buf)
|
||||
|
||||
while i < n:
|
||||
if self._inside_tag is not None:
|
||||
# Inside a tag — look for the closing tag.
|
||||
close = self._close_tags[self._inside_tag]
|
||||
close_pos = buf.find(close, i)
|
||||
if close_pos == -1:
|
||||
# Closing tag might be split across chunks.
|
||||
# Check if the tail of buf is a prefix of the close tag.
|
||||
tail_len = min(len(close) - 1, n - i)
|
||||
for tl in range(tail_len, 0, -1):
|
||||
if close.startswith(buf[n - tl :]):
|
||||
self._pending = buf[n - tl :]
|
||||
i = n
|
||||
break
|
||||
else:
|
||||
# No partial match — discard everything (inside tag).
|
||||
i = n
|
||||
break
|
||||
else:
|
||||
# Found closing tag — skip past it and exit tag.
|
||||
i = close_pos + len(close)
|
||||
self._inside_tag = None
|
||||
else:
|
||||
# Outside any tag — look for '<'.
|
||||
lt_pos = buf.find("<", i)
|
||||
if lt_pos == -1:
|
||||
# No '<' — everything is visible.
|
||||
visible_parts.append(buf[i:])
|
||||
i = n
|
||||
else:
|
||||
# Emit text before the '<'.
|
||||
if lt_pos > i:
|
||||
visible_parts.append(buf[i:lt_pos])
|
||||
# Try to match an opening tag at this position.
|
||||
remainder = buf[lt_pos:]
|
||||
matched = False
|
||||
for name, open_tag in self._open_tags.items():
|
||||
if remainder.startswith(open_tag):
|
||||
# Full opening tag found — enter tag.
|
||||
self._inside_tag = name
|
||||
i = lt_pos + len(open_tag)
|
||||
matched = True
|
||||
break
|
||||
if not matched:
|
||||
# Check if remainder could be a partial tag prefix.
|
||||
if self._is_partial_tag_prefix(remainder):
|
||||
# Buffer and wait for next chunk.
|
||||
self._pending = remainder
|
||||
i = n
|
||||
else:
|
||||
# Not a known tag — '<' is visible text.
|
||||
visible_parts.append("<")
|
||||
i = lt_pos + 1
|
||||
|
||||
return "".join(visible_parts)
|
||||
|
||||
def _is_partial_tag_prefix(self, text: str) -> bool:
|
||||
"""Check if text could be the start of a known tag string."""
|
||||
for tag_str in self._all_tag_strings:
|
||||
if tag_str.startswith(text) and len(text) < len(tag_str):
|
||||
return True
|
||||
return False
|
||||
@@ -8,6 +8,7 @@ the context-window-exceeded error detector.
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import contextvars
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
@@ -221,7 +222,7 @@ def truncate_tool_result(
|
||||
- Small results (≤ limit): full content kept + file annotation
|
||||
- Large results (> limit): preview + file reference
|
||||
- Errors: pass through unchanged
|
||||
- load_data results: truncate with pagination hint (no re-spill)
|
||||
- read_file/load_data results: truncate with pagination hint (no re-spill)
|
||||
"""
|
||||
limit = max_tool_result_chars
|
||||
|
||||
@@ -229,12 +230,12 @@ def truncate_tool_result(
|
||||
if result.is_error:
|
||||
return result
|
||||
|
||||
# load_data reads FROM spilled files — never re-spill (circular).
|
||||
# read_file/load_data reads FROM spilled files — never re-spill (circular).
|
||||
# Just truncate with a pagination hint if the result is too large.
|
||||
if tool_name == "load_data":
|
||||
if tool_name in ("load_data", "read_file"):
|
||||
if limit <= 0 or len(result.content) <= limit:
|
||||
return result # Small load_data result — pass through as-is
|
||||
# Large load_data result — truncate with smart preview
|
||||
return result # Small result — pass through as-is
|
||||
# Large result — truncate with smart preview
|
||||
PREVIEW_CAP = min(5000, max(limit - 500, limit // 2))
|
||||
|
||||
metadata_str = ""
|
||||
@@ -283,7 +284,7 @@ def truncate_tool_result(
|
||||
spill_path.mkdir(parents=True, exist_ok=True)
|
||||
filename = next_spill_filename_fn(tool_name)
|
||||
|
||||
# Pretty-print JSON content so load_data's line-based
|
||||
# Pretty-print JSON content so read_file's line-based
|
||||
# pagination works correctly.
|
||||
write_content = result.content
|
||||
parsed_json: Any = None # track for metadata extraction
|
||||
@@ -293,7 +294,10 @@ def truncate_tool_result(
|
||||
except (json.JSONDecodeError, TypeError, ValueError):
|
||||
pass # Not JSON — write as-is
|
||||
|
||||
(spill_path / filename).write_text(write_content, encoding="utf-8")
|
||||
file_path = spill_path / filename
|
||||
file_path.write_text(write_content, encoding="utf-8")
|
||||
# Use absolute path so parent agents can find files from subagents
|
||||
abs_path = str(file_path.resolve())
|
||||
|
||||
if limit > 0 and len(result.content) > limit:
|
||||
# Large result: build a small, metadata-rich preview so the
|
||||
@@ -315,14 +319,14 @@ def truncate_tool_result(
|
||||
# Assemble header with structural info + warning
|
||||
header = (
|
||||
f"[Result from {tool_name}: {len(result.content):,} chars — "
|
||||
f"too large for context, saved to '{filename}'.]\n"
|
||||
f"too large for context, saved to '{abs_path}'.]\n"
|
||||
)
|
||||
if metadata_str:
|
||||
header += f"\nData structure:\n{metadata_str}"
|
||||
header += (
|
||||
f"\n\nWARNING: The preview below is INCOMPLETE. "
|
||||
f"Do NOT draw conclusions or counts from it. "
|
||||
f"Use load_data(filename='{filename}') to read the "
|
||||
f"Use read_file(path='{abs_path}') to read the "
|
||||
f"full data before analysis."
|
||||
)
|
||||
|
||||
@@ -331,11 +335,11 @@ def truncate_tool_result(
|
||||
"Tool result spilled to file: %s (%d chars → %s)",
|
||||
tool_name,
|
||||
len(result.content),
|
||||
filename,
|
||||
abs_path,
|
||||
)
|
||||
else:
|
||||
# Small result: keep full content + annotation
|
||||
content = f"{result.content}\n\n[Saved to '{filename}']"
|
||||
# Small result: keep full content + annotation with absolute path
|
||||
content = f"{result.content}\n\n[Saved to '{abs_path}']"
|
||||
logger.info(
|
||||
"Tool result saved to file: %s (%d chars → %s)",
|
||||
tool_name,
|
||||
@@ -446,8 +450,11 @@ async def execute_tool(
|
||||
# Offload the executor call to a thread. Sync MCP executors
|
||||
# block on future.result() — running in a thread keeps the
|
||||
# event loop free so asyncio.wait_for can fire the timeout.
|
||||
# Copy the current context so contextvars (e.g. data_dir from
|
||||
# execution context) propagate into the worker thread.
|
||||
loop = asyncio.get_running_loop()
|
||||
result = await loop.run_in_executor(None, tool_executor, tool_use)
|
||||
ctx = contextvars.copy_context()
|
||||
result = await loop.run_in_executor(None, ctx.run, tool_executor, tool_use)
|
||||
# Async executors return a coroutine — await it on the loop
|
||||
if asyncio.iscoroutine(result) or asyncio.isfuture(result):
|
||||
result = await result
|
||||
@@ -472,56 +479,6 @@ async def execute_tool(
|
||||
return result
|
||||
|
||||
|
||||
def record_learning(key: str, value: Any, spillover_dir: str | None) -> None:
|
||||
"""Append a set_output value to adapt.md as a learning entry.
|
||||
|
||||
Called at set_output time — the moment knowledge is produced — so that
|
||||
adapt.md accumulates the agent's outputs across the session. Since
|
||||
adapt.md is injected into the system prompt, these persist through
|
||||
any compaction.
|
||||
"""
|
||||
if not spillover_dir:
|
||||
return
|
||||
try:
|
||||
adapt_path = Path(spillover_dir) / "adapt.md"
|
||||
adapt_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
content = adapt_path.read_text(encoding="utf-8") if adapt_path.exists() else ""
|
||||
|
||||
if "## Outputs" not in content:
|
||||
content += "\n\n## Outputs\n"
|
||||
|
||||
# Truncate long values for memory (full value is in shared memory)
|
||||
v_str = str(value)
|
||||
if len(v_str) > 500:
|
||||
v_str = v_str[:500] + "…"
|
||||
|
||||
entry = f"- {key}: {v_str}\n"
|
||||
|
||||
# Replace existing entry for same key (update, not duplicate)
|
||||
lines = content.splitlines(keepends=True)
|
||||
replaced = False
|
||||
for i, line in enumerate(lines):
|
||||
if line.startswith(f"- {key}:"):
|
||||
lines[i] = entry
|
||||
replaced = True
|
||||
break
|
||||
if replaced:
|
||||
content = "".join(lines)
|
||||
else:
|
||||
content += entry
|
||||
|
||||
adapt_path.write_text(content, encoding="utf-8")
|
||||
except Exception as e:
|
||||
logger.warning("Failed to record learning for key=%s: %s", key, e)
|
||||
|
||||
|
||||
def next_spill_filename(tool_name: str, counter: int) -> str:
|
||||
"""Return a short, monotonic filename for a tool result spill."""
|
||||
# Shorten common tool name prefixes to save tokens
|
||||
short = tool_name.removeprefix("tool_").removeprefix("mcp_")
|
||||
return f"{short}_{counter}.txt"
|
||||
|
||||
|
||||
def restore_spill_counter(spillover_dir: str | None) -> int:
|
||||
"""Scan spillover_dir for existing spill files and return the max counter.
|
||||
|
||||
|
||||
@@ -9,7 +9,11 @@ from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Literal, Protocol, runtime_checkable
|
||||
|
||||
from framework.graph.conversation import ConversationStore
|
||||
from framework.graph.conversation import (
|
||||
ConversationStore,
|
||||
get_run_cursor,
|
||||
update_run_cursor,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -75,13 +79,20 @@ class LoopConfig:
|
||||
|
||||
# Client-facing auto-block grace period.
|
||||
cf_grace_turns: int = 1
|
||||
# Worker auto-escalation: text-only turns before escalating to queen.
|
||||
worker_escalation_grace_turns: int = 1
|
||||
tool_doom_loop_enabled: bool = True
|
||||
|
||||
# Per-tool-call timeout.
|
||||
tool_call_timeout_seconds: float = 60.0
|
||||
|
||||
# Subagent delegation timeout.
|
||||
subagent_timeout_seconds: float = 600.0
|
||||
# Subagent delegation timeout (wall-clock max).
|
||||
subagent_timeout_seconds: float = 3600.0
|
||||
|
||||
# Subagent inactivity timeout - only timeout if no activity for this duration.
|
||||
# This resets whenever the subagent makes progress (tool calls, LLM responses).
|
||||
# Set to 0 to use only the wall-clock timeout.
|
||||
subagent_inactivity_timeout_seconds: float = 300.0
|
||||
|
||||
# Lifecycle hooks.
|
||||
hooks: dict[str, list] | None = None
|
||||
@@ -116,6 +127,7 @@ class OutputAccumulator:
|
||||
store: ConversationStore | None = None
|
||||
spillover_dir: str | None = None
|
||||
max_value_chars: int = 0
|
||||
run_id: str | None = None
|
||||
|
||||
async def set(self, key: str, value: Any) -> None:
|
||||
"""Set a key-value pair, auto-spilling large values to files."""
|
||||
@@ -146,8 +158,9 @@ class OutputAccumulator:
|
||||
if isinstance(value, (dict, list))
|
||||
else str(value)
|
||||
)
|
||||
(spill_path / filename).write_text(write_content, encoding="utf-8")
|
||||
file_size = (spill_path / filename).stat().st_size
|
||||
file_path = spill_path / filename
|
||||
file_path.write_text(write_content, encoding="utf-8")
|
||||
file_size = file_path.stat().st_size
|
||||
logger.info(
|
||||
"set_output value auto-spilled: key=%s, %d chars -> %s (%d bytes)",
|
||||
key,
|
||||
@@ -155,9 +168,11 @@ class OutputAccumulator:
|
||||
filename,
|
||||
file_size,
|
||||
)
|
||||
# Use absolute path so parent agents can find files from subagents
|
||||
abs_path = str(file_path.resolve())
|
||||
return (
|
||||
f"[Saved to '{filename}' ({file_size:,} bytes). "
|
||||
f"Use load_data(filename='{filename}') "
|
||||
f"[Saved to '{abs_path}' ({file_size:,} bytes). "
|
||||
f"Use read_file(path='{abs_path}') "
|
||||
f"to access full data.]"
|
||||
)
|
||||
|
||||
@@ -171,12 +186,14 @@ class OutputAccumulator:
|
||||
return all(key in self.values and self.values[key] is not None for key in required)
|
||||
|
||||
@classmethod
|
||||
async def restore(cls, store: ConversationStore) -> OutputAccumulator:
|
||||
async def restore(
|
||||
cls,
|
||||
store: ConversationStore,
|
||||
run_id: str | None = None,
|
||||
) -> OutputAccumulator:
|
||||
cursor = await store.read_cursor()
|
||||
values = {}
|
||||
if cursor and "outputs" in cursor:
|
||||
values = cursor["outputs"]
|
||||
return cls(values=values, store=store)
|
||||
values = cursor.get("outputs", {}) if cursor else {}
|
||||
return cls(values=values, store=store, run_id=run_id)
|
||||
|
||||
|
||||
__all__ = [
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
+718
-1302
File diff suppressed because it is too large
Load Diff
@@ -2,7 +2,7 @@
|
||||
Node Protocol - The building block of agent graphs.
|
||||
|
||||
A Node is a unit of work that:
|
||||
1. Receives context (goal, shared memory, input)
|
||||
1. Receives context (goal, shared buffer, input)
|
||||
2. Makes decisions (using LLM, tools, or logic)
|
||||
3. Produces results (output, state changes)
|
||||
4. Records everything to the Runtime
|
||||
@@ -30,62 +30,6 @@ from framework.runtime.core import Runtime
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _fix_unescaped_newlines_in_json(json_str: str) -> str:
|
||||
"""Fix unescaped newlines inside JSON string values.
|
||||
|
||||
LLMs sometimes output actual newlines inside JSON strings instead of \\n.
|
||||
This function fixes that by properly escaping newlines within string values.
|
||||
"""
|
||||
result = []
|
||||
in_string = False
|
||||
escape_next = False
|
||||
i = 0
|
||||
|
||||
while i < len(json_str):
|
||||
char = json_str[i]
|
||||
|
||||
if escape_next:
|
||||
result.append(char)
|
||||
escape_next = False
|
||||
i += 1
|
||||
continue
|
||||
|
||||
if char == "\\" and in_string:
|
||||
escape_next = True
|
||||
result.append(char)
|
||||
i += 1
|
||||
continue
|
||||
|
||||
if char == '"' and not escape_next:
|
||||
in_string = not in_string
|
||||
result.append(char)
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Fix unescaped newlines inside strings
|
||||
if in_string and char == "\n":
|
||||
result.append("\\n")
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Fix unescaped carriage returns inside strings
|
||||
if in_string and char == "\r":
|
||||
result.append("\\r")
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Fix unescaped tabs inside strings
|
||||
if in_string and char == "\t":
|
||||
result.append("\\t")
|
||||
i += 1
|
||||
continue
|
||||
|
||||
result.append(char)
|
||||
i += 1
|
||||
|
||||
return "".join(result)
|
||||
|
||||
|
||||
def find_json_object(text: str) -> str | None:
|
||||
"""Find the first valid JSON object in text using balanced brace matching.
|
||||
|
||||
@@ -171,10 +115,10 @@ class NodeSpec(BaseModel):
|
||||
|
||||
# Data flow
|
||||
input_keys: list[str] = Field(
|
||||
default_factory=list, description="Keys this node reads from shared memory or input"
|
||||
default_factory=list, description="Keys this node reads from the shared buffer or input"
|
||||
)
|
||||
output_keys: list[str] = Field(
|
||||
default_factory=list, description="Keys this node writes to shared memory or output"
|
||||
default_factory=list, description="Keys this node writes to the shared buffer or output"
|
||||
)
|
||||
nullable_output_keys: list[str] = Field(
|
||||
default_factory=list,
|
||||
@@ -249,7 +193,10 @@ class NodeSpec(BaseModel):
|
||||
# Client-facing behavior
|
||||
client_facing: bool = Field(
|
||||
default=False,
|
||||
description="If True, this node streams output to the end user and can request input.",
|
||||
description=(
|
||||
"Deprecated compatibility field. The queen is intrinsically interactive; "
|
||||
"non-queen nodes should escalate to the queen instead of talking to users directly."
|
||||
),
|
||||
)
|
||||
|
||||
# Phase completion criteria for conversation-aware judge (Level 2)
|
||||
@@ -272,22 +219,59 @@ class NodeSpec(BaseModel):
|
||||
),
|
||||
)
|
||||
|
||||
# Structured thinking tags — stripped from client-facing output but kept in
|
||||
# conversation history so the LLM sees its own reasoning on subsequent turns.
|
||||
thinking_tags: list[str] | None = Field(
|
||||
default=None,
|
||||
description=(
|
||||
"XML tag names stripped from client output but kept in conversation "
|
||||
"history. e.g. ['situation', 'monologue'] strips <situation>...</situation> "
|
||||
"from the user-facing stream while preserving it for the LLM."
|
||||
),
|
||||
)
|
||||
|
||||
model_config = {"extra": "allow", "arbitrary_types_allowed": True}
|
||||
|
||||
def is_queen_node(self) -> bool:
|
||||
"""Return True when this spec is the queen conversational node."""
|
||||
return self.id == "queen"
|
||||
|
||||
class MemoryWriteError(Exception):
|
||||
"""Raised when an invalid value is written to memory."""
|
||||
def supports_direct_user_io(self) -> bool:
|
||||
"""Return True when this node may talk to the user directly."""
|
||||
return self.is_queen_node()
|
||||
|
||||
|
||||
def deprecated_client_facing_warning(node_spec: NodeSpec) -> str | None:
|
||||
"""Return a deprecation warning for legacy non-queen client_facing nodes."""
|
||||
if node_spec.client_facing and not node_spec.is_queen_node():
|
||||
return (
|
||||
f"Node '{node_spec.id}' sets deprecated client_facing=True. "
|
||||
"Non-queen direct human I/O is no longer supported; route worker "
|
||||
"questions and approvals through queen escalation instead."
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def warn_if_deprecated_client_facing(node_spec: NodeSpec) -> None:
|
||||
"""Log a compatibility warning once the node is loaded for execution."""
|
||||
warning = deprecated_client_facing_warning(node_spec)
|
||||
if warning:
|
||||
logger.warning(warning)
|
||||
|
||||
|
||||
class DataBufferWriteError(Exception):
|
||||
"""Raised when an invalid value is written to the data buffer."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class SharedMemory:
|
||||
class DataBuffer:
|
||||
"""
|
||||
Shared state between nodes in a graph execution.
|
||||
Shared data buffer between nodes in a graph execution.
|
||||
|
||||
Nodes read and write to shared memory using typed keys.
|
||||
The memory is scoped to a single run.
|
||||
Nodes read and write to the data buffer using typed keys.
|
||||
The buffer is scoped to a single run.
|
||||
|
||||
For parallel execution, use write_async() which provides per-key locking
|
||||
to prevent race conditions when multiple nodes write concurrently.
|
||||
@@ -306,23 +290,23 @@ class SharedMemory:
|
||||
self._lock = asyncio.Lock()
|
||||
|
||||
def read(self, key: str) -> Any:
|
||||
"""Read a value from shared memory."""
|
||||
"""Read a value from the data buffer."""
|
||||
if self._allowed_read and key not in self._allowed_read:
|
||||
raise PermissionError(f"Node not allowed to read key: {key}")
|
||||
return self._data.get(key)
|
||||
|
||||
def write(self, key: str, value: Any, validate: bool = True) -> None:
|
||||
"""
|
||||
Write a value to shared memory.
|
||||
Write a value to the data buffer.
|
||||
|
||||
Args:
|
||||
key: The memory key to write to
|
||||
key: The buffer key to write to
|
||||
value: The value to write
|
||||
validate: If True, check for suspicious content (default True)
|
||||
|
||||
Raises:
|
||||
PermissionError: If node doesn't have write permission
|
||||
MemoryWriteError: If value appears to be hallucinated content
|
||||
DataBufferWriteError: If value appears to be hallucinated content
|
||||
"""
|
||||
if self._allowed_write and key not in self._allowed_write:
|
||||
raise PermissionError(f"Node not allowed to write key: {key}")
|
||||
@@ -336,7 +320,7 @@ class SharedMemory:
|
||||
f"⚠ Suspicious write to key '{key}': appears to be code "
|
||||
f"({len(value)} chars). Consider using validate=False if intended."
|
||||
)
|
||||
raise MemoryWriteError(
|
||||
raise DataBufferWriteError(
|
||||
f"Rejected suspicious content for key '{key}': "
|
||||
f"appears to be hallucinated code ({len(value)} chars). "
|
||||
"If this is intentional, use validate=False."
|
||||
@@ -352,13 +336,13 @@ class SharedMemory:
|
||||
parallel execution. Each key has its own lock to minimize contention.
|
||||
|
||||
Args:
|
||||
key: The memory key to write to
|
||||
key: The buffer key to write to
|
||||
value: The value to write
|
||||
validate: If True, check for suspicious content (default True)
|
||||
|
||||
Raises:
|
||||
PermissionError: If node doesn't have write permission
|
||||
MemoryWriteError: If value appears to be hallucinated content
|
||||
DataBufferWriteError: If value appears to be hallucinated content
|
||||
"""
|
||||
# Check permissions first (no lock needed)
|
||||
if self._allowed_write and key not in self._allowed_write:
|
||||
@@ -379,7 +363,7 @@ class SharedMemory:
|
||||
f"⚠ Suspicious write to key '{key}': appears to be code "
|
||||
f"({len(value)} chars). Consider using validate=False if intended."
|
||||
)
|
||||
raise MemoryWriteError(
|
||||
raise DataBufferWriteError(
|
||||
f"Rejected suspicious content for key '{key}': "
|
||||
f"appears to be hallucinated code ({len(value)} chars). "
|
||||
"If this is intentional, use validate=False."
|
||||
@@ -457,13 +441,13 @@ class SharedMemory:
|
||||
self,
|
||||
read_keys: list[str],
|
||||
write_keys: list[str],
|
||||
) -> "SharedMemory":
|
||||
) -> "DataBuffer":
|
||||
"""Create a view with restricted permissions for a specific node.
|
||||
|
||||
The scoped view shares the same underlying data and locks,
|
||||
enabling thread-safe parallel execution across scoped views.
|
||||
"""
|
||||
return SharedMemory(
|
||||
return DataBuffer(
|
||||
_data=self._data,
|
||||
_allowed_read=set(read_keys) if read_keys else set(),
|
||||
_allowed_write=set(write_keys) if write_keys else set(),
|
||||
@@ -479,7 +463,7 @@ class NodeContext:
|
||||
|
||||
This is passed to every node and provides:
|
||||
- Access to the runtime (for decision logging)
|
||||
- Access to shared memory (for state)
|
||||
- Access to the data buffer (for state)
|
||||
- Access to LLM (for generation)
|
||||
- Access to tools (for actions)
|
||||
- The goal context (for guidance)
|
||||
@@ -493,7 +477,7 @@ class NodeContext:
|
||||
node_spec: NodeSpec
|
||||
|
||||
# State
|
||||
memory: SharedMemory
|
||||
buffer: DataBuffer
|
||||
input_data: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
# LLM access (if applicable)
|
||||
@@ -529,12 +513,25 @@ class NodeContext:
|
||||
# rebuilding the full system prompt when restoring from conversation store.
|
||||
identity_prompt: str = ""
|
||||
narrative: str = ""
|
||||
# Static memory block injected into the system prompt.
|
||||
memory_prompt: str = ""
|
||||
|
||||
# Event-triggered execution (no interactive user attached)
|
||||
event_triggered: bool = False
|
||||
|
||||
# Execution ID (from StreamRuntimeAdapter)
|
||||
execution_id: str = ""
|
||||
run_id: str = ""
|
||||
|
||||
@property
|
||||
def effective_run_id(self) -> str | None:
|
||||
"""Normalized run_id: returns run_id if truthy, otherwise None.
|
||||
|
||||
The field defaults to ``""``; callers should use this property
|
||||
instead of ``self.run_id or None`` to avoid silently falling
|
||||
back to session-scoped storage.
|
||||
"""
|
||||
return self.run_id or None
|
||||
|
||||
# Stream identity — the ExecutionStream this node runs within.
|
||||
# Falls back to node_id when not set (legacy / standalone executor).
|
||||
@@ -564,6 +561,9 @@ class NodeContext:
|
||||
# the queen to switch between phase-specific prompts (building /
|
||||
# staging / running) without restarting the conversation.
|
||||
dynamic_prompt_provider: Any = None # Callable[[], str] | None
|
||||
# Dynamic memory provider — when set, EventLoopNode rebuilds the
|
||||
# system prompt with the latest memory block each iteration.
|
||||
dynamic_memory_provider: Any = None # Callable[[], str] | None
|
||||
|
||||
# Skill system prompts — injected by the skill discovery pipeline
|
||||
skills_catalog_prompt: str = "" # Available skills XML catalog
|
||||
@@ -579,6 +579,24 @@ class NodeContext:
|
||||
# the queen to record the current phase per iteration.
|
||||
iteration_metadata_provider: Any = None # Callable[[], dict] | None
|
||||
|
||||
# Structured thinking tags — propagated from NodeSpec.thinking_tags.
|
||||
thinking_tags: list[str] | None = None
|
||||
|
||||
@property
|
||||
def is_queen_stream(self) -> bool:
|
||||
"""Return True when this context belongs to the queen conversation."""
|
||||
return self.stream_id == "queen" or self.node_spec.is_queen_node()
|
||||
|
||||
@property
|
||||
def emits_client_io(self) -> bool:
|
||||
"""Return True when text should be published to user-facing streams."""
|
||||
return self.is_queen_stream
|
||||
|
||||
@property
|
||||
def supports_direct_user_io(self) -> bool:
|
||||
"""Return True when the node may directly request user input."""
|
||||
return self.is_queen_stream and not self.event_triggered
|
||||
|
||||
|
||||
@dataclass
|
||||
class NodeResult:
|
||||
@@ -686,6 +704,6 @@ class NodeProtocol(ABC):
|
||||
"""
|
||||
errors = []
|
||||
for key in ctx.node_spec.input_keys:
|
||||
if key not in ctx.input_data and ctx.memory.read(key) is None:
|
||||
if key not in ctx.input_data and ctx.buffer.read(key) is None:
|
||||
errors.append(f"Missing required input: {key}")
|
||||
return errors
|
||||
|
||||
@@ -1,148 +1,29 @@
|
||||
"""Prompt composition for continuous agent mode.
|
||||
"""Legacy compatibility wrapper around :mod:`framework.graph.prompting`.
|
||||
|
||||
Composes the three-layer system prompt (onion model) and generates
|
||||
transition markers inserted into the conversation at phase boundaries.
|
||||
|
||||
Layer 1 — Identity (static, defined at agent level, never changes):
|
||||
"You are a thorough research agent. You prefer clarity over jargon..."
|
||||
|
||||
Layer 2 — Narrative (auto-generated from conversation/memory state):
|
||||
"We've finished scoping the project. The user wants to focus on..."
|
||||
|
||||
Layer 3 — Focus (per-node system_prompt, reframed as focus directive):
|
||||
"Your current attention: synthesize findings into a report..."
|
||||
New runtime code should import from ``framework.graph.prompting`` directly.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from datetime import datetime
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Any
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from framework.graph.prompting import (
|
||||
EXECUTION_SCOPE_PREAMBLE,
|
||||
TransitionSpec,
|
||||
build_accounts_prompt,
|
||||
build_narrative,
|
||||
build_system_prompt,
|
||||
stamp_prompt_datetime,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from framework.graph.edge import GraphSpec
|
||||
from framework.graph.node import NodeSpec, SharedMemory
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Injected into every worker node's system prompt so the LLM understands
|
||||
# it is one step in a multi-node pipeline and should not overreach.
|
||||
EXECUTION_SCOPE_PREAMBLE = (
|
||||
"EXECUTION SCOPE: You are one node in a multi-step workflow graph. "
|
||||
"Focus ONLY on the task described in your instructions below. "
|
||||
"Call set_output() for each of your declared output keys, then stop. "
|
||||
"Do NOT attempt work that belongs to other nodes — the framework "
|
||||
"routes data between nodes automatically."
|
||||
)
|
||||
from framework.graph.node import DataBuffer, NodeSpec
|
||||
|
||||
|
||||
def _with_datetime(prompt: str) -> str:
|
||||
"""Append current datetime with local timezone to a system prompt."""
|
||||
local = datetime.now().astimezone()
|
||||
stamp = f"Current date and time: {local.strftime('%Y-%m-%d %H:%M %Z (UTC%z)')}"
|
||||
return f"{prompt}\n\n{stamp}" if prompt else stamp
|
||||
|
||||
|
||||
def build_accounts_prompt(
|
||||
accounts: list[dict[str, Any]],
|
||||
tool_provider_map: dict[str, str] | None = None,
|
||||
node_tool_names: list[str] | None = None,
|
||||
) -> str:
|
||||
"""Build a prompt section describing connected accounts.
|
||||
|
||||
When tool_provider_map is provided, produces structured output grouped
|
||||
by provider with tool mapping, so the LLM knows which ``account`` value
|
||||
to pass to which tool.
|
||||
|
||||
When node_tool_names is also provided, filters to only show providers
|
||||
whose tools overlap with the node's tool list.
|
||||
|
||||
Args:
|
||||
accounts: List of account info dicts from
|
||||
CredentialStoreAdapter.get_all_account_info().
|
||||
tool_provider_map: Mapping of tool_name -> provider_name
|
||||
(e.g. {"gmail_list_messages": "google"}).
|
||||
node_tool_names: Tool names available to the current node.
|
||||
When provided, only providers with matching tools are shown.
|
||||
|
||||
Returns:
|
||||
Formatted accounts block, or empty string if no accounts.
|
||||
"""
|
||||
if not accounts:
|
||||
return ""
|
||||
|
||||
# Flat format (backward compat) when no tool mapping provided
|
||||
if tool_provider_map is None:
|
||||
lines = [
|
||||
"Connected accounts (use the alias as the `account` parameter "
|
||||
"when calling tools to target a specific account):"
|
||||
]
|
||||
for acct in accounts:
|
||||
provider = acct.get("provider", "unknown")
|
||||
alias = acct.get("alias", "unknown")
|
||||
identity = acct.get("identity", {})
|
||||
detail_parts = [f"{k}: {v}" for k, v in identity.items() if v]
|
||||
detail = f" ({', '.join(detail_parts)})" if detail_parts else ""
|
||||
lines.append(f"- {provider}/{alias}{detail}")
|
||||
return "\n".join(lines)
|
||||
|
||||
# --- Structured format: group by provider with tool mapping ---
|
||||
|
||||
# Invert tool_provider_map to provider -> [tools]
|
||||
provider_tools: dict[str, list[str]] = {}
|
||||
for tool_name, provider in tool_provider_map.items():
|
||||
provider_tools.setdefault(provider, []).append(tool_name)
|
||||
|
||||
# Filter to relevant providers based on node tools
|
||||
node_tool_set = set(node_tool_names) if node_tool_names else None
|
||||
|
||||
# Group accounts by provider
|
||||
provider_accounts: dict[str, list[dict[str, Any]]] = {}
|
||||
for acct in accounts:
|
||||
provider = acct.get("provider", "unknown")
|
||||
provider_accounts.setdefault(provider, []).append(acct)
|
||||
|
||||
sections: list[str] = ["Connected accounts:"]
|
||||
|
||||
for provider, acct_list in provider_accounts.items():
|
||||
tools_for_provider = sorted(provider_tools.get(provider, []))
|
||||
|
||||
# If node tools specified, only show providers with overlapping tools
|
||||
if node_tool_set is not None:
|
||||
relevant_tools = [t for t in tools_for_provider if t in node_tool_set]
|
||||
if not relevant_tools:
|
||||
continue
|
||||
tools_for_provider = relevant_tools
|
||||
|
||||
# Local-only providers: tools read from env vars, no account= routing
|
||||
all_local = all(a.get("source") == "local" for a in acct_list)
|
||||
|
||||
# Provider header with tools
|
||||
display_name = provider.replace("_", " ").title()
|
||||
if tools_for_provider and not all_local:
|
||||
tools_str = ", ".join(tools_for_provider)
|
||||
sections.append(f'\n{display_name} (use account="<alias>" with: {tools_str}):')
|
||||
elif tools_for_provider and all_local:
|
||||
tools_str = ", ".join(tools_for_provider)
|
||||
sections.append(f"\n{display_name} (tools: {tools_str}):")
|
||||
else:
|
||||
sections.append(f"\n{display_name}:")
|
||||
|
||||
# Account entries
|
||||
for acct in acct_list:
|
||||
alias = acct.get("alias", "unknown")
|
||||
identity = acct.get("identity", {})
|
||||
detail_parts = [f"{k}: {v}" for k, v in identity.items() if v]
|
||||
detail = f" ({', '.join(detail_parts)})" if detail_parts else ""
|
||||
source_tag = " [local]" if acct.get("source") == "local" else ""
|
||||
sections.append(f" - {provider}/{alias}{detail}{source_tag}")
|
||||
|
||||
# If filtering removed all providers, return empty
|
||||
if len(sections) <= 1:
|
||||
return ""
|
||||
|
||||
return "\n".join(sections)
|
||||
_with_datetime = stamp_prompt_datetime
|
||||
|
||||
|
||||
def compose_system_prompt(
|
||||
@@ -155,219 +36,115 @@ def compose_system_prompt(
|
||||
execution_preamble: str | None = None,
|
||||
node_type_preamble: str | None = None,
|
||||
) -> str:
|
||||
"""Compose the multi-layer system prompt.
|
||||
"""Compatibility wrapper for the legacy function signature."""
|
||||
from framework.graph.prompting import NodePromptSpec
|
||||
|
||||
Args:
|
||||
identity_prompt: Layer 1 — static agent identity (from GraphSpec).
|
||||
focus_prompt: Layer 3 — per-node focus directive (from NodeSpec.system_prompt).
|
||||
narrative: Layer 2 — auto-generated from conversation state.
|
||||
accounts_prompt: Connected accounts block (sits between identity and narrative).
|
||||
skills_catalog_prompt: Available skills catalog XML (Agent Skills standard).
|
||||
protocols_prompt: Default skill operational protocols section.
|
||||
execution_preamble: EXECUTION_SCOPE_PREAMBLE for worker nodes
|
||||
(prepended before focus so the LLM knows its pipeline scope).
|
||||
node_type_preamble: Node-type-specific preamble, e.g. GCU browser
|
||||
best-practices prompt (prepended before focus).
|
||||
|
||||
Returns:
|
||||
Composed system prompt with all layers present, plus current datetime.
|
||||
"""
|
||||
parts: list[str] = []
|
||||
|
||||
# Layer 1: Identity (always first, anchors the personality)
|
||||
if identity_prompt:
|
||||
parts.append(identity_prompt)
|
||||
|
||||
# Accounts (semi-static, deployment-specific)
|
||||
if accounts_prompt:
|
||||
parts.append(f"\n{accounts_prompt}")
|
||||
|
||||
# Skills catalog (discovered skills available for activation)
|
||||
if skills_catalog_prompt:
|
||||
parts.append(f"\n{skills_catalog_prompt}")
|
||||
|
||||
# Operational protocols (default skill behavioral guidance)
|
||||
if protocols_prompt:
|
||||
parts.append(f"\n{protocols_prompt}")
|
||||
|
||||
# Layer 2: Narrative (what's happened so far)
|
||||
if narrative:
|
||||
parts.append(f"\n--- Context (what has happened so far) ---\n{narrative}")
|
||||
|
||||
# Execution scope preamble (worker nodes — tells the LLM it is one
|
||||
# step in a multi-node pipeline and should not overreach)
|
||||
if execution_preamble:
|
||||
parts.append(f"\n{execution_preamble}")
|
||||
|
||||
# Node-type preamble (e.g. GCU browser best-practices)
|
||||
if node_type_preamble:
|
||||
parts.append(f"\n{node_type_preamble}")
|
||||
|
||||
# Layer 3: Focus (current phase directive)
|
||||
if focus_prompt:
|
||||
parts.append(f"\n--- Current Focus ---\n{focus_prompt}")
|
||||
|
||||
return _with_datetime("\n".join(parts) if parts else "")
|
||||
|
||||
|
||||
def build_narrative(
|
||||
memory: SharedMemory,
|
||||
execution_path: list[str],
|
||||
graph: GraphSpec,
|
||||
) -> str:
|
||||
"""Build Layer 2 (narrative) from structured state.
|
||||
|
||||
Deterministic — no LLM call. Reads SharedMemory and execution path
|
||||
to describe what has happened so far. Cheap and fast.
|
||||
|
||||
Args:
|
||||
memory: Current shared memory state.
|
||||
execution_path: List of node IDs visited so far.
|
||||
graph: Graph spec (for node names/descriptions).
|
||||
|
||||
Returns:
|
||||
Narrative string describing the session state.
|
||||
"""
|
||||
parts: list[str] = []
|
||||
|
||||
# Describe execution path
|
||||
if execution_path:
|
||||
phase_descriptions: list[str] = []
|
||||
for node_id in execution_path:
|
||||
node_spec = graph.get_node(node_id)
|
||||
if node_spec:
|
||||
phase_descriptions.append(f"- {node_spec.name}: {node_spec.description}")
|
||||
else:
|
||||
phase_descriptions.append(f"- {node_id}")
|
||||
parts.append("Phases completed:\n" + "\n".join(phase_descriptions))
|
||||
|
||||
# Describe key memory values (skip very long values)
|
||||
all_memory = memory.read_all()
|
||||
if all_memory:
|
||||
memory_lines: list[str] = []
|
||||
for key, value in all_memory.items():
|
||||
if value is None:
|
||||
continue
|
||||
val_str = str(value)
|
||||
if len(val_str) > 200:
|
||||
val_str = val_str[:200] + "..."
|
||||
memory_lines.append(f"- {key}: {val_str}")
|
||||
if memory_lines:
|
||||
parts.append("Current state:\n" + "\n".join(memory_lines))
|
||||
|
||||
return "\n\n".join(parts) if parts else ""
|
||||
spec = NodePromptSpec(
|
||||
identity_prompt=identity_prompt or "",
|
||||
focus_prompt=focus_prompt or "",
|
||||
narrative=narrative or "",
|
||||
accounts_prompt=accounts_prompt or "",
|
||||
skills_catalog_prompt=skills_catalog_prompt or "",
|
||||
protocols_prompt=protocols_prompt or "",
|
||||
# Legacy callers explicitly passed these preambles. Preserve them by
|
||||
# folding them into the focus block when present.
|
||||
node_type="event_loop",
|
||||
)
|
||||
if execution_preamble or node_type_preamble:
|
||||
focus_parts = []
|
||||
if execution_preamble:
|
||||
focus_parts.append(execution_preamble)
|
||||
if node_type_preamble:
|
||||
focus_parts.append(node_type_preamble)
|
||||
if spec.focus_prompt:
|
||||
focus_parts.append(spec.focus_prompt)
|
||||
spec = NodePromptSpec(
|
||||
identity_prompt=spec.identity_prompt,
|
||||
focus_prompt="\n\n".join(focus_parts),
|
||||
narrative=spec.narrative,
|
||||
accounts_prompt=spec.accounts_prompt,
|
||||
skills_catalog_prompt=spec.skills_catalog_prompt,
|
||||
protocols_prompt=spec.protocols_prompt,
|
||||
node_type=spec.node_type,
|
||||
output_keys=spec.output_keys,
|
||||
is_subagent_mode=spec.is_subagent_mode,
|
||||
)
|
||||
return build_system_prompt(spec)
|
||||
|
||||
|
||||
def build_transition_marker(
|
||||
previous_node: NodeSpec,
|
||||
next_node: NodeSpec,
|
||||
memory: SharedMemory,
|
||||
buffer: DataBuffer,
|
||||
cumulative_tool_names: list[str],
|
||||
data_dir: Path | str | None = None,
|
||||
adapt_content: str | None = None,
|
||||
) -> str:
|
||||
"""Build a 'State of the World' transition marker.
|
||||
"""Legacy transition builder with best-effort spillover compatibility."""
|
||||
buffer_items: dict[str, str] = {}
|
||||
data_files: list[str] = []
|
||||
|
||||
Inserted into the conversation as a user message at phase boundaries.
|
||||
Gives the LLM full situational awareness: what happened, what's stored,
|
||||
what tools are available, and what to focus on next.
|
||||
all_buffer = buffer.read_all()
|
||||
for key, value in all_buffer.items():
|
||||
if value is None:
|
||||
continue
|
||||
val_str = str(value)
|
||||
if len(val_str) > 300 and data_dir:
|
||||
data_path = Path(data_dir)
|
||||
data_path.mkdir(parents=True, exist_ok=True)
|
||||
ext = ".json" if isinstance(value, (dict, list)) else ".txt"
|
||||
filename = f"output_{key}{ext}"
|
||||
file_path = data_path / filename
|
||||
try:
|
||||
write_content = (
|
||||
json.dumps(value, indent=2, ensure_ascii=False)
|
||||
if isinstance(value, (dict, list))
|
||||
else str(value)
|
||||
)
|
||||
file_path.write_text(write_content, encoding="utf-8")
|
||||
file_size = file_path.stat().st_size
|
||||
buffer_items[key] = (
|
||||
f"[Saved to '{filename}' ({file_size:,} bytes). "
|
||||
f"Use load_data(filename='{filename}') to access.]"
|
||||
)
|
||||
except Exception:
|
||||
buffer_items[key] = val_str[:300] + "..."
|
||||
elif len(val_str) > 300:
|
||||
buffer_items[key] = val_str[:300] + "..."
|
||||
else:
|
||||
buffer_items[key] = val_str
|
||||
|
||||
Args:
|
||||
previous_node: NodeSpec of the phase just completed.
|
||||
next_node: NodeSpec of the phase about to start.
|
||||
memory: Current shared memory state.
|
||||
cumulative_tool_names: All tools available (cumulative set).
|
||||
data_dir: Path to spillover data directory.
|
||||
adapt_content: Agent working memory (adapt.md) content.
|
||||
|
||||
Returns:
|
||||
Transition marker message text.
|
||||
"""
|
||||
sections: list[str] = []
|
||||
|
||||
# Header
|
||||
sections.append(f"--- PHASE TRANSITION: {previous_node.name} → {next_node.name} ---")
|
||||
|
||||
# What just completed
|
||||
sections.append(f"\nCompleted: {previous_node.name}")
|
||||
sections.append(f" {previous_node.description}")
|
||||
|
||||
# Outputs in memory — use file references for large values so the
|
||||
# next node loads full data from disk instead of seeing truncated
|
||||
# inline previews that look deceptively complete.
|
||||
all_memory = memory.read_all()
|
||||
if all_memory:
|
||||
memory_lines: list[str] = []
|
||||
for key, value in all_memory.items():
|
||||
if value is None:
|
||||
continue
|
||||
val_str = str(value)
|
||||
if len(val_str) > 300 and data_dir:
|
||||
# Auto-spill large transition values to data files
|
||||
import json as _json
|
||||
|
||||
data_path = Path(data_dir)
|
||||
data_path.mkdir(parents=True, exist_ok=True)
|
||||
ext = ".json" if isinstance(value, (dict, list)) else ".txt"
|
||||
filename = f"output_{key}{ext}"
|
||||
try:
|
||||
write_content = (
|
||||
_json.dumps(value, indent=2, ensure_ascii=False)
|
||||
if isinstance(value, (dict, list))
|
||||
else str(value)
|
||||
)
|
||||
(data_path / filename).write_text(write_content, encoding="utf-8")
|
||||
file_size = (data_path / filename).stat().st_size
|
||||
val_str = (
|
||||
f"[Saved to '{filename}' ({file_size:,} bytes). "
|
||||
f"Use load_data(filename='{filename}') to access.]"
|
||||
)
|
||||
except Exception:
|
||||
val_str = val_str[:300] + "..."
|
||||
elif len(val_str) > 300:
|
||||
val_str = val_str[:300] + "..."
|
||||
memory_lines.append(f" {key}: {val_str}")
|
||||
if memory_lines:
|
||||
sections.append("\nOutputs available:\n" + "\n".join(memory_lines))
|
||||
|
||||
# Files in data directory
|
||||
if data_dir:
|
||||
data_path = Path(data_dir)
|
||||
if data_path.exists():
|
||||
files = sorted(data_path.iterdir())
|
||||
if files:
|
||||
file_lines = [
|
||||
f" {f.name} ({f.stat().st_size:,} bytes)" for f in files if f.is_file()
|
||||
]
|
||||
if file_lines:
|
||||
sections.append(
|
||||
"\nData files (use load_data to access):\n" + "\n".join(file_lines)
|
||||
)
|
||||
data_files = [
|
||||
f"{entry.name} ({entry.stat().st_size:,} bytes)"
|
||||
for entry in sorted(data_path.iterdir())
|
||||
if entry.is_file()
|
||||
]
|
||||
|
||||
# Agent working memory
|
||||
if adapt_content:
|
||||
sections.append(f"\n--- Agent Memory ---\n{adapt_content}")
|
||||
|
||||
# Available tools
|
||||
if cumulative_tool_names:
|
||||
sections.append("\nAvailable tools: " + ", ".join(sorted(cumulative_tool_names)))
|
||||
|
||||
# Next phase
|
||||
sections.append(f"\nNow entering: {next_node.name}")
|
||||
sections.append(f" {next_node.description}")
|
||||
if next_node.output_keys:
|
||||
sections.append(
|
||||
f"\nYour ONLY job in this phase: complete the task above and call "
|
||||
f"set_output() for {next_node.output_keys}. Do NOT do work that "
|
||||
f"belongs to later phases."
|
||||
return build_transition_message(
|
||||
TransitionSpec(
|
||||
previous_name=previous_node.name,
|
||||
previous_description=previous_node.description,
|
||||
next_name=next_node.name,
|
||||
next_description=next_node.description,
|
||||
next_output_keys=tuple(next_node.output_keys or ()),
|
||||
buffer_items=buffer_items,
|
||||
cumulative_tool_names=tuple(sorted(cumulative_tool_names)),
|
||||
data_files=tuple(data_files),
|
||||
)
|
||||
|
||||
# Reflection prompt (engineered metacognition)
|
||||
sections.append(
|
||||
"\nBefore proceeding, briefly reflect: what went well in the "
|
||||
"previous phase? Are there any gaps or surprises worth noting?"
|
||||
)
|
||||
|
||||
sections.append("\n--- END TRANSITION ---")
|
||||
|
||||
return "\n".join(sections)
|
||||
from framework.graph.prompting import build_transition_message
|
||||
|
||||
|
||||
__all__ = [
|
||||
"EXECUTION_SCOPE_PREAMBLE",
|
||||
"_with_datetime",
|
||||
"build_accounts_prompt",
|
||||
"build_narrative",
|
||||
"build_transition_marker",
|
||||
"build_transition_message",
|
||||
"compose_system_prompt",
|
||||
]
|
||||
|
||||
@@ -0,0 +1,312 @@
|
||||
"""Pure prompt rendering helpers for graph execution.
|
||||
|
||||
This module owns all prompt text assembly for graph nodes.
|
||||
It intentionally avoids side effects so runtime code can prepare any
|
||||
spill files or transition metadata separately and then pass plain data in.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from framework.graph.edge import GraphSpec
|
||||
from framework.graph.node import DataBuffer
|
||||
|
||||
|
||||
# Injected into every worker node's system prompt so the LLM understands
|
||||
# it is one step in a multi-node pipeline and should not overreach.
|
||||
EXECUTION_SCOPE_PREAMBLE = (
|
||||
"EXECUTION SCOPE: You are one node in a multi-step workflow graph. "
|
||||
"Focus ONLY on the task described in your instructions below. "
|
||||
"Call set_output() for each of your declared output keys, then stop. "
|
||||
"Do NOT attempt work that belongs to other nodes - the framework "
|
||||
"routes data between nodes automatically."
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class NodePromptSpec:
|
||||
"""Structured inputs for building one node system prompt."""
|
||||
|
||||
identity_prompt: str = ""
|
||||
focus_prompt: str = ""
|
||||
narrative: str = ""
|
||||
accounts_prompt: str = ""
|
||||
skills_catalog_prompt: str = ""
|
||||
protocols_prompt: str = ""
|
||||
memory_prompt: str = ""
|
||||
node_type: str = "event_loop"
|
||||
output_keys: tuple[str, ...] = ()
|
||||
is_subagent_mode: bool = False
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class TransitionSpec:
|
||||
"""Structured inputs for a transition marker message."""
|
||||
|
||||
previous_name: str
|
||||
previous_description: str
|
||||
next_name: str
|
||||
next_description: str
|
||||
next_output_keys: tuple[str, ...] = ()
|
||||
buffer_items: dict[str, str] = field(default_factory=dict)
|
||||
cumulative_tool_names: tuple[str, ...] = ()
|
||||
data_files: tuple[str, ...] = ()
|
||||
|
||||
|
||||
def stamp_prompt_datetime(prompt: str) -> str:
|
||||
"""Append current datetime with local timezone to a prompt."""
|
||||
local = datetime.now().astimezone()
|
||||
stamp = f"Current date and time: {local.strftime('%Y-%m-%d %H:%M %Z (UTC%z)')}"
|
||||
return f"{prompt}\n\n{stamp}" if prompt else stamp
|
||||
|
||||
|
||||
def build_accounts_prompt(
|
||||
accounts: list[dict[str, Any]],
|
||||
tool_provider_map: dict[str, str] | None = None,
|
||||
node_tool_names: list[str] | None = None,
|
||||
) -> str:
|
||||
"""Build a prompt section describing connected accounts."""
|
||||
if not accounts:
|
||||
return ""
|
||||
|
||||
if tool_provider_map is None:
|
||||
lines = [
|
||||
"Connected accounts (use the alias as the `account` parameter "
|
||||
"when calling tools to target a specific account):"
|
||||
]
|
||||
for acct in accounts:
|
||||
provider = acct.get("provider", "unknown")
|
||||
alias = acct.get("alias", "unknown")
|
||||
identity = acct.get("identity", {})
|
||||
detail_parts = [f"{k}: {v}" for k, v in identity.items() if v]
|
||||
detail = f" ({', '.join(detail_parts)})" if detail_parts else ""
|
||||
lines.append(f"- {provider}/{alias}{detail}")
|
||||
return "\n".join(lines)
|
||||
|
||||
provider_tools: dict[str, list[str]] = {}
|
||||
for tool_name, provider in tool_provider_map.items():
|
||||
provider_tools.setdefault(provider, []).append(tool_name)
|
||||
|
||||
node_tool_set = set(node_tool_names) if node_tool_names else None
|
||||
|
||||
provider_accounts: dict[str, list[dict[str, Any]]] = {}
|
||||
for acct in accounts:
|
||||
provider = acct.get("provider", "unknown")
|
||||
provider_accounts.setdefault(provider, []).append(acct)
|
||||
|
||||
sections: list[str] = ["Connected accounts:"]
|
||||
|
||||
for provider, acct_list in provider_accounts.items():
|
||||
tools_for_provider = sorted(provider_tools.get(provider, []))
|
||||
|
||||
if node_tool_set is not None:
|
||||
relevant_tools = [tool_name for tool_name in tools_for_provider if tool_name in node_tool_set]
|
||||
if not relevant_tools:
|
||||
continue
|
||||
tools_for_provider = relevant_tools
|
||||
|
||||
all_local = all(acct.get("source") == "local" for acct in acct_list)
|
||||
display_name = provider.replace("_", " ").title()
|
||||
if tools_for_provider and not all_local:
|
||||
tools_str = ", ".join(tools_for_provider)
|
||||
sections.append(f'\n{display_name} (use account="<alias>" with: {tools_str}):')
|
||||
elif tools_for_provider and all_local:
|
||||
tools_str = ", ".join(tools_for_provider)
|
||||
sections.append(f"\n{display_name} (tools: {tools_str}):")
|
||||
else:
|
||||
sections.append(f"\n{display_name}:")
|
||||
|
||||
for acct in acct_list:
|
||||
alias = acct.get("alias", "unknown")
|
||||
identity = acct.get("identity", {})
|
||||
detail_parts = [f"{k}: {v}" for k, v in identity.items() if v]
|
||||
detail = f" ({', '.join(detail_parts)})" if detail_parts else ""
|
||||
source_tag = " [local]" if acct.get("source") == "local" else ""
|
||||
sections.append(f" - {provider}/{alias}{detail}{source_tag}")
|
||||
|
||||
if len(sections) <= 1:
|
||||
return ""
|
||||
|
||||
return "\n".join(sections)
|
||||
|
||||
|
||||
def build_prompt_spec_from_node_context(
|
||||
ctx: Any,
|
||||
*,
|
||||
focus_prompt: str | None = None,
|
||||
narrative: str | None = None,
|
||||
memory_prompt: str | None = None,
|
||||
) -> NodePromptSpec:
|
||||
"""Convert a NodeContext-like object into structured prompt inputs."""
|
||||
resolved_memory_prompt = memory_prompt
|
||||
if resolved_memory_prompt is None:
|
||||
resolved_memory_prompt = getattr(ctx, "memory_prompt", "") or ""
|
||||
dynamic_memory_provider = getattr(ctx, "dynamic_memory_provider", None)
|
||||
if dynamic_memory_provider is not None:
|
||||
try:
|
||||
resolved_memory_prompt = dynamic_memory_provider() or ""
|
||||
except Exception:
|
||||
resolved_memory_prompt = getattr(ctx, "memory_prompt", "") or ""
|
||||
return NodePromptSpec(
|
||||
identity_prompt=ctx.identity_prompt or "",
|
||||
focus_prompt=focus_prompt if focus_prompt is not None else (ctx.node_spec.system_prompt or ""),
|
||||
narrative=narrative if narrative is not None else (ctx.narrative or ""),
|
||||
accounts_prompt=ctx.accounts_prompt or "",
|
||||
skills_catalog_prompt=ctx.skills_catalog_prompt or "",
|
||||
protocols_prompt=ctx.protocols_prompt or "",
|
||||
memory_prompt=resolved_memory_prompt,
|
||||
node_type=ctx.node_spec.node_type,
|
||||
output_keys=tuple(ctx.node_spec.output_keys or ()),
|
||||
is_subagent_mode=bool(getattr(ctx, "is_subagent_mode", False)),
|
||||
)
|
||||
|
||||
|
||||
def build_system_prompt(spec: NodePromptSpec) -> str:
|
||||
"""Compose one canonical system prompt for a node."""
|
||||
parts: list[str] = []
|
||||
|
||||
if spec.identity_prompt:
|
||||
parts.append(spec.identity_prompt)
|
||||
|
||||
if spec.accounts_prompt:
|
||||
parts.append(f"\n{spec.accounts_prompt}")
|
||||
|
||||
if spec.skills_catalog_prompt:
|
||||
parts.append(f"\n{spec.skills_catalog_prompt}")
|
||||
|
||||
if spec.protocols_prompt:
|
||||
parts.append(f"\n{spec.protocols_prompt}")
|
||||
|
||||
if spec.memory_prompt:
|
||||
parts.append(
|
||||
"\nRelevant recalled memories may appear below. Treat them as "
|
||||
"point-in-time guidance and verify stale details against current context."
|
||||
)
|
||||
parts.append(f"\n{spec.memory_prompt}")
|
||||
|
||||
if spec.narrative:
|
||||
parts.append(f"\n--- Context (what has happened so far) ---\n{spec.narrative}")
|
||||
|
||||
if (
|
||||
not spec.is_subagent_mode
|
||||
and spec.node_type in ("event_loop", "gcu")
|
||||
and spec.output_keys
|
||||
):
|
||||
parts.append(f"\n{EXECUTION_SCOPE_PREAMBLE}")
|
||||
|
||||
if spec.node_type == "gcu":
|
||||
from framework.graph.gcu import GCU_BROWSER_SYSTEM_PROMPT
|
||||
|
||||
parts.append(f"\n{GCU_BROWSER_SYSTEM_PROMPT}")
|
||||
|
||||
if spec.focus_prompt:
|
||||
parts.append(f"\n--- Current Focus ---\n{spec.focus_prompt}")
|
||||
|
||||
return stamp_prompt_datetime("\n".join(parts) if parts else "")
|
||||
|
||||
|
||||
def build_system_prompt_for_node_context(
|
||||
ctx: Any,
|
||||
*,
|
||||
focus_prompt: str | None = None,
|
||||
narrative: str | None = None,
|
||||
memory_prompt: str | None = None,
|
||||
) -> str:
|
||||
"""Build a canonical system prompt from a NodeContext-like object."""
|
||||
spec = build_prompt_spec_from_node_context(
|
||||
ctx,
|
||||
focus_prompt=focus_prompt,
|
||||
narrative=narrative,
|
||||
memory_prompt=memory_prompt,
|
||||
)
|
||||
return build_system_prompt(spec)
|
||||
|
||||
|
||||
def build_narrative(
|
||||
buffer: DataBuffer,
|
||||
execution_path: list[str],
|
||||
graph: GraphSpec,
|
||||
) -> str:
|
||||
"""Build a deterministic Layer 2 narrative from graph state."""
|
||||
parts: list[str] = []
|
||||
|
||||
if execution_path:
|
||||
phase_descriptions: list[str] = []
|
||||
for node_id in execution_path:
|
||||
node_spec = graph.get_node(node_id)
|
||||
if node_spec:
|
||||
phase_descriptions.append(f"- {node_spec.name}: {node_spec.description}")
|
||||
else:
|
||||
phase_descriptions.append(f"- {node_id}")
|
||||
parts.append("Phases completed:\n" + "\n".join(phase_descriptions))
|
||||
|
||||
all_buffer = buffer.read_all()
|
||||
if all_buffer:
|
||||
memory_lines: list[str] = []
|
||||
for key, value in all_buffer.items():
|
||||
if value is None:
|
||||
continue
|
||||
val_str = str(value)
|
||||
if len(val_str) > 200:
|
||||
val_str = val_str[:200] + "..."
|
||||
memory_lines.append(f"- {key}: {val_str}")
|
||||
if memory_lines:
|
||||
parts.append("Current state:\n" + "\n".join(memory_lines))
|
||||
|
||||
return "\n\n".join(parts) if parts else ""
|
||||
|
||||
|
||||
def build_transition_message(spec: TransitionSpec) -> str:
|
||||
"""Build a pure transition marker message."""
|
||||
sections: list[str] = [
|
||||
f"--- PHASE TRANSITION: {spec.previous_name} -> {spec.next_name} ---",
|
||||
f"\nCompleted: {spec.previous_name}",
|
||||
f" {spec.previous_description}",
|
||||
]
|
||||
|
||||
if spec.buffer_items:
|
||||
lines = [f" {key}: {value}" for key, value in spec.buffer_items.items()]
|
||||
sections.append("\nOutputs available:\n" + "\n".join(lines))
|
||||
|
||||
if spec.data_files:
|
||||
sections.append(
|
||||
"\nData files (use load_data to access):\n"
|
||||
+ "\n".join(f" {entry}" for entry in spec.data_files)
|
||||
)
|
||||
|
||||
if spec.cumulative_tool_names:
|
||||
sections.append("\nAvailable tools: " + ", ".join(sorted(spec.cumulative_tool_names)))
|
||||
|
||||
sections.append(f"\nNow entering: {spec.next_name}")
|
||||
sections.append(f" {spec.next_description}")
|
||||
if spec.next_output_keys:
|
||||
sections.append(
|
||||
f"\nYour ONLY job in this phase: complete the task above and call "
|
||||
f"set_output() for {list(spec.next_output_keys)}. Do NOT do work that "
|
||||
f"belongs to later phases."
|
||||
)
|
||||
|
||||
sections.append(
|
||||
"\nBefore proceeding, briefly reflect: what went well in the "
|
||||
"previous phase? Are there any gaps or surprises worth noting?"
|
||||
)
|
||||
sections.append("\n--- END TRANSITION ---")
|
||||
return "\n".join(sections)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"EXECUTION_SCOPE_PREAMBLE",
|
||||
"NodePromptSpec",
|
||||
"TransitionSpec",
|
||||
"build_accounts_prompt",
|
||||
"build_narrative",
|
||||
"build_prompt_spec_from_node_context",
|
||||
"build_system_prompt",
|
||||
"build_system_prompt_for_node_context",
|
||||
"build_transition_message",
|
||||
"stamp_prompt_datetime",
|
||||
]
|
||||
@@ -0,0 +1,899 @@
|
||||
"""
|
||||
WorkerAgent — First-class autonomous worker for event-driven graph execution.
|
||||
|
||||
Each node in a graph becomes a WorkerAgent that:
|
||||
- Owns its lifecycle, retry logic, memory scope, and LLM config
|
||||
- Receives activations from upstream workers (via GraphExecutor routing)
|
||||
- Self-checks readiness (fan-out group tracking)
|
||||
- Self-triggers when ready
|
||||
- Evaluates outgoing edges and publishes activations for downstream workers
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import time
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from enum import StrEnum
|
||||
from typing import Any
|
||||
|
||||
from framework.graph.context import GraphContext, build_node_context_from_graph_context
|
||||
from framework.graph.edge import EdgeCondition, EdgeSpec
|
||||
from framework.graph.node import (
|
||||
NodeContext,
|
||||
NodeProtocol,
|
||||
NodeResult,
|
||||
NodeSpec,
|
||||
)
|
||||
from framework.graph.validator import OutputValidator
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Enums & data types
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class WorkerLifecycle(StrEnum):
|
||||
PENDING = "pending"
|
||||
RUNNING = "running"
|
||||
COMPLETED = "completed"
|
||||
FAILED = "failed"
|
||||
|
||||
|
||||
@dataclass
|
||||
class FanOutTag:
|
||||
"""Carried in activations, propagated through the worker chain.
|
||||
|
||||
When a source activates multiple targets (fan-out), each activation
|
||||
receives a FanOutTag. Downstream convergence workers track these tags
|
||||
to determine when all parallel branches have reached them.
|
||||
"""
|
||||
|
||||
fan_out_id: str # Unique ID for this fan-out event
|
||||
fan_out_source: str # Node that performed the fan-out
|
||||
branches: frozenset[str] # All target node IDs in this fan-out
|
||||
via_branch: str # Which branch this activation passed through
|
||||
|
||||
|
||||
@dataclass
|
||||
class FanOutTracker:
|
||||
"""Per fan-out group, tracked by the target worker."""
|
||||
|
||||
fan_out_id: str
|
||||
branches: frozenset[str]
|
||||
reached: set[str] = field(default_factory=set)
|
||||
|
||||
@property
|
||||
def is_complete(self) -> bool:
|
||||
return self.reached == self.branches
|
||||
|
||||
|
||||
@dataclass
|
||||
class Activation:
|
||||
"""Payload sent from a completed source to a target worker."""
|
||||
|
||||
source_id: str
|
||||
target_id: str
|
||||
edge_id: str
|
||||
edge: EdgeSpec
|
||||
mapped_inputs: dict[str, Any]
|
||||
fan_out_tags: list[FanOutTag] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class WorkerCompletion:
|
||||
"""Payload in WORKER_COMPLETED event."""
|
||||
|
||||
worker_id: str
|
||||
success: bool
|
||||
output: dict[str, Any]
|
||||
tokens_used: int = 0
|
||||
latency_ms: int = 0
|
||||
conversation: Any = None # NodeConversation for continuous mode
|
||||
activations: list[Activation] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class RetryState:
|
||||
attempt: int = 0
|
||||
max_retries: int = 3
|
||||
is_event_loop: bool = False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# WorkerAgent
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class WorkerAgent:
|
||||
"""First-class autonomous worker for one node in the graph.
|
||||
|
||||
Lifecycle:
|
||||
PENDING - waiting for activations
|
||||
RUNNING - executing the node
|
||||
COMPLETED- finished successfully, activations published
|
||||
FAILED - failed after retries exhausted
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
node_spec: NodeSpec,
|
||||
graph_context: GraphContext,
|
||||
) -> None:
|
||||
self.node_spec = node_spec
|
||||
self._gc = graph_context
|
||||
|
||||
# Edge topology (resolved at construction, immutable)
|
||||
self.incoming_edges: list[EdgeSpec] = graph_context.graph.get_incoming_edges(node_spec.id)
|
||||
self.outgoing_edges: list[EdgeSpec] = graph_context.graph.get_outgoing_edges(node_spec.id)
|
||||
|
||||
# Lifecycle
|
||||
self.lifecycle: WorkerLifecycle = WorkerLifecycle.PENDING
|
||||
self._task: asyncio.Task | None = None
|
||||
|
||||
# Retry state
|
||||
self.retry_state = RetryState(
|
||||
max_retries=node_spec.max_retries,
|
||||
is_event_loop=node_spec.node_type == "event_loop",
|
||||
)
|
||||
|
||||
# Activation tracking
|
||||
self._inherited_fan_out_tags: list[FanOutTag] = []
|
||||
self._active_fan_outs: dict[str, FanOutTracker] = {}
|
||||
self._received_activations: list[Activation] = []
|
||||
self._has_been_activated = False
|
||||
|
||||
# Pause support
|
||||
# _run_gate controls whether worker execution may proceed.
|
||||
# _pause_requested mirrors the pause-request semantics expected by
|
||||
# EventLoopNode, where is_set() means "pause requested".
|
||||
self._run_gate: asyncio.Event = asyncio.Event()
|
||||
self._run_gate.set() # Not paused by default
|
||||
self._pause_requested: asyncio.Event = asyncio.Event()
|
||||
|
||||
# Validator
|
||||
self._validator = OutputValidator()
|
||||
|
||||
# Node implementation (lazy)
|
||||
self._node_impl: NodeProtocol | None = None
|
||||
|
||||
# Metrics for this worker
|
||||
self._tokens_used: int = 0
|
||||
self._latency_ms: int = 0
|
||||
|
||||
# Last execution result (accessible by polling executor)
|
||||
self._last_result: NodeResult | None = None
|
||||
self._last_activations: list[Activation] = []
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public activation interface
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def activate(self, inherited_tags: list[FanOutTag] | None = None) -> None:
|
||||
"""Activate this worker — launch execution as an asyncio.Task."""
|
||||
if self.lifecycle != WorkerLifecycle.PENDING:
|
||||
return
|
||||
|
||||
self._inherited_fan_out_tags = inherited_tags or []
|
||||
self._has_been_activated = True
|
||||
self.lifecycle = WorkerLifecycle.RUNNING
|
||||
self._task = asyncio.ensure_future(self._execute_self())
|
||||
|
||||
def receive_activation(self, activation: Activation) -> None:
|
||||
"""Receive an activation from an upstream worker.
|
||||
|
||||
Called by GraphExecutor when routing a WORKER_COMPLETED event's
|
||||
activations to their target workers.
|
||||
"""
|
||||
if self.lifecycle != WorkerLifecycle.PENDING:
|
||||
return
|
||||
|
||||
self._received_activations.append(activation)
|
||||
|
||||
# Update fan-out trackers from this activation's tags.
|
||||
# Skip tags where this worker IS the via_branch — those tags exist
|
||||
# for downstream convergence tracking, not for gating this worker.
|
||||
for tag in activation.fan_out_tags:
|
||||
if tag.via_branch == self.node_spec.id:
|
||||
continue
|
||||
if tag.fan_out_id not in self._active_fan_outs:
|
||||
self._active_fan_outs[tag.fan_out_id] = FanOutTracker(
|
||||
fan_out_id=tag.fan_out_id,
|
||||
branches=tag.branches,
|
||||
)
|
||||
self._active_fan_outs[tag.fan_out_id].reached.add(tag.via_branch)
|
||||
|
||||
def check_readiness(self) -> bool:
|
||||
"""Check if all fan-out groups have been satisfied."""
|
||||
if self._has_been_activated:
|
||||
return True
|
||||
if not self._active_fan_outs:
|
||||
# No fan-out tracking — ready on first activation
|
||||
return bool(self._received_activations)
|
||||
return all(t.is_complete for t in self._active_fan_outs.values())
|
||||
|
||||
def reset_for_revisit(self) -> None:
|
||||
"""Reset a completed worker so it can execute again (feedback loops).
|
||||
|
||||
Preserves the node implementation (cached) but clears lifecycle,
|
||||
activation, and result state.
|
||||
"""
|
||||
self.lifecycle = WorkerLifecycle.PENDING
|
||||
self._inherited_fan_out_tags = []
|
||||
self._active_fan_outs = {}
|
||||
self._received_activations = []
|
||||
self._has_been_activated = False
|
||||
self._task = None
|
||||
self._last_result = None
|
||||
self._last_activations = []
|
||||
self._tokens_used = 0
|
||||
self._latency_ms = 0
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Execution
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _execute_self(self) -> None:
|
||||
"""Main execution loop: run node, handle retries, publish result."""
|
||||
gc = self._gc
|
||||
node_spec = self.node_spec
|
||||
try:
|
||||
# Write all mapped inputs from received activations to buffer
|
||||
for activation in self._received_activations:
|
||||
for key, value in activation.mapped_inputs.items():
|
||||
gc.buffer.write(key, value, validate=False)
|
||||
|
||||
# Increment visit count (always, even if skipped)
|
||||
async with gc._visits_lock:
|
||||
visit_count = gc.node_visit_counts.get(node_spec.id, 0) + 1
|
||||
gc.node_visit_counts[node_spec.id] = visit_count
|
||||
|
||||
# Check max_node_visits — skip execution but still propagate edges
|
||||
if node_spec.max_node_visits > 0 and visit_count > node_spec.max_node_visits:
|
||||
logger.info(
|
||||
"Worker %s: visit %d exceeds max_node_visits=%d, skipping",
|
||||
node_spec.id, visit_count, node_spec.max_node_visits,
|
||||
)
|
||||
# Build a synthetic success result from current buffer state
|
||||
existing_output: dict[str, Any] = {}
|
||||
for key in node_spec.output_keys:
|
||||
val = gc.buffer.read(key)
|
||||
if val is not None:
|
||||
existing_output[key] = val
|
||||
|
||||
result = NodeResult(success=True, output=existing_output)
|
||||
|
||||
# Evaluate outgoing edges so the cycle continues
|
||||
activations = await self._evaluate_outgoing_edges(result)
|
||||
|
||||
self.lifecycle = WorkerLifecycle.COMPLETED
|
||||
self._last_result = result
|
||||
self._last_activations = activations
|
||||
return
|
||||
|
||||
# Clear stale nullable outputs on re-visit
|
||||
if visit_count > 1:
|
||||
nullable_keys = getattr(node_spec, "nullable_output_keys", None) or []
|
||||
for key in nullable_keys:
|
||||
if gc.buffer.read(key) is not None:
|
||||
gc.buffer.write(key, None, validate=False)
|
||||
|
||||
# Continuous mode: accumulate tools and output keys
|
||||
if gc.is_continuous and node_spec.tools:
|
||||
for t in gc.tools:
|
||||
if t.name in node_spec.tools and t.name not in gc.cumulative_tool_names:
|
||||
gc.cumulative_tools.append(t)
|
||||
gc.cumulative_tool_names.add(t.name)
|
||||
if gc.is_continuous and node_spec.output_keys:
|
||||
for k in node_spec.output_keys:
|
||||
if k not in gc.cumulative_output_keys:
|
||||
gc.cumulative_output_keys.append(k)
|
||||
|
||||
# Append to execution path
|
||||
async with gc._path_lock:
|
||||
gc.path.append(node_spec.id)
|
||||
|
||||
# Get node implementation
|
||||
node_impl = self._get_node_implementation()
|
||||
|
||||
# Build context
|
||||
ctx = self._build_node_context()
|
||||
|
||||
# Execute with retry
|
||||
result = await self._execute_with_retries(node_impl, ctx)
|
||||
|
||||
# Handle result
|
||||
if result.success:
|
||||
# Validate and write outputs
|
||||
self._write_outputs(result)
|
||||
|
||||
# Evaluate outgoing edges
|
||||
activations = await self._evaluate_outgoing_edges(result)
|
||||
|
||||
# Publish completion
|
||||
self.lifecycle = WorkerLifecycle.COMPLETED
|
||||
self._last_result = result
|
||||
self._last_activations = activations
|
||||
# Colony memory reflection — runs before downstream activation
|
||||
await self._reflect_colony_memory()
|
||||
completion = WorkerCompletion(
|
||||
worker_id=node_spec.id,
|
||||
success=True,
|
||||
output=result.output,
|
||||
tokens_used=result.tokens_used,
|
||||
latency_ms=result.latency_ms,
|
||||
conversation=result.conversation,
|
||||
activations=activations,
|
||||
)
|
||||
if gc.is_continuous and completion.conversation is not None:
|
||||
gc.continuous_conversation = completion.conversation
|
||||
await self._apply_continuous_transition(completion.activations)
|
||||
await self._publish_completion(completion)
|
||||
else:
|
||||
# Evaluate outgoing edges even on failure (ON_FAILURE edges)
|
||||
activations = await self._evaluate_outgoing_edges(result)
|
||||
|
||||
self.lifecycle = WorkerLifecycle.FAILED
|
||||
self._last_result = result
|
||||
self._last_activations = activations
|
||||
# Colony memory reflection — capture learnings even on failure
|
||||
await self._reflect_colony_memory()
|
||||
await self._publish_failure(result.error or "Unknown error")
|
||||
except Exception as exc:
|
||||
error = str(exc) or type(exc).__name__
|
||||
logger.exception("Worker %s crashed during execution", node_spec.id)
|
||||
self.lifecycle = WorkerLifecycle.FAILED
|
||||
self._last_result = NodeResult(success=False, error=error)
|
||||
self._last_activations = []
|
||||
await self._publish_failure(error)
|
||||
|
||||
async def _execute_with_retries(
|
||||
self, node_impl: NodeProtocol, ctx: NodeContext
|
||||
) -> NodeResult:
|
||||
"""Execute node with exponential backoff retry."""
|
||||
gc = self._gc
|
||||
# Only skip retries for actual EventLoopNode instances (they handle
|
||||
# retries internally). Custom NodeProtocol impls registered via
|
||||
# register_node should be retried by the executor.
|
||||
from framework.graph.event_loop_node import EventLoopNode as _ELN
|
||||
if isinstance(node_impl, _ELN):
|
||||
max_retries = 0
|
||||
else:
|
||||
max_retries = self.retry_state.max_retries
|
||||
|
||||
total_attempts = max(1, max_retries)
|
||||
for attempt in range(total_attempts):
|
||||
# Check pause
|
||||
await self._run_gate.wait()
|
||||
|
||||
ctx.attempt = attempt + 1
|
||||
start = time.monotonic()
|
||||
|
||||
try:
|
||||
result = await node_impl.execute(ctx)
|
||||
result.latency_ms = int((time.monotonic() - start) * 1000)
|
||||
|
||||
if result.success:
|
||||
return result
|
||||
|
||||
# Failure
|
||||
if attempt + 1 < total_attempts:
|
||||
gc.retry_counts[self.node_spec.id] = gc.retry_counts.get(self.node_spec.id, 0) + 1
|
||||
gc.nodes_with_retries.add(self.node_spec.id)
|
||||
delay = 1.0 * (2**attempt)
|
||||
logger.warning(
|
||||
"Worker %s failed (attempt %d/%d), retrying in %.1fs: %s",
|
||||
self.node_spec.id,
|
||||
attempt + 1,
|
||||
max_retries,
|
||||
delay,
|
||||
result.error,
|
||||
)
|
||||
# Emit retry event
|
||||
if gc.event_bus:
|
||||
await gc.event_bus.emit_node_retry(
|
||||
stream_id=gc.stream_id,
|
||||
node_id=self.node_spec.id,
|
||||
attempt=attempt + 1,
|
||||
max_retries=max_retries,
|
||||
execution_id=gc.execution_id,
|
||||
)
|
||||
await asyncio.sleep(delay)
|
||||
continue
|
||||
else:
|
||||
return NodeResult(
|
||||
success=False,
|
||||
error=f"failed after {attempt + 1} attempts: {result.error}",
|
||||
)
|
||||
|
||||
except Exception as exc:
|
||||
if attempt + 1 < total_attempts:
|
||||
gc.retry_counts[self.node_spec.id] = gc.retry_counts.get(self.node_spec.id, 0) + 1
|
||||
gc.nodes_with_retries.add(self.node_spec.id)
|
||||
delay = 1.0 * (2**attempt)
|
||||
logger.warning(
|
||||
"Worker %s raised %s (attempt %d/%d), retrying in %.1fs",
|
||||
self.node_spec.id,
|
||||
type(exc).__name__,
|
||||
attempt + 1,
|
||||
max(1, max_retries),
|
||||
delay,
|
||||
)
|
||||
await asyncio.sleep(delay)
|
||||
continue
|
||||
return NodeResult(
|
||||
success=False,
|
||||
error=f"failed after {attempt + 1} attempts: {exc}",
|
||||
)
|
||||
|
||||
return NodeResult(
|
||||
success=False,
|
||||
error=f"failed after {max(1, max_retries)} attempts",
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Edge evaluation (source-side)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _evaluate_outgoing_edges(
|
||||
self, result: NodeResult
|
||||
) -> list[Activation]:
|
||||
"""Evaluate outgoing edges and create activations for downstream.
|
||||
|
||||
Same logic as current _get_all_traversable_edges() plus
|
||||
priority filtering for CONDITIONAL edges.
|
||||
"""
|
||||
gc = self._gc
|
||||
edges = gc.graph.get_outgoing_edges(self.node_spec.id)
|
||||
|
||||
traversable: list[EdgeSpec] = []
|
||||
for edge in edges:
|
||||
target_spec = gc.graph.get_node(edge.target)
|
||||
if await edge.should_traverse(
|
||||
source_success=result.success,
|
||||
source_output=result.output,
|
||||
buffer_data=gc.buffer.read_all(),
|
||||
llm=gc.llm,
|
||||
goal=gc.goal,
|
||||
source_node_name=self.node_spec.name,
|
||||
target_node_name=target_spec.name if target_spec else edge.target,
|
||||
):
|
||||
traversable.append(edge)
|
||||
|
||||
# Priority filtering for CONDITIONAL edges
|
||||
if len(traversable) > 1:
|
||||
conditionals = [e for e in traversable if e.condition == EdgeCondition.CONDITIONAL]
|
||||
if len(conditionals) > 1:
|
||||
max_prio = max(e.priority for e in conditionals)
|
||||
traversable = [
|
||||
e
|
||||
for e in traversable
|
||||
if e.condition != EdgeCondition.CONDITIONAL or e.priority == max_prio
|
||||
]
|
||||
|
||||
# When parallel execution is disabled, follow first match only (sequential)
|
||||
if not gc.enable_parallel_execution and len(traversable) > 1:
|
||||
traversable = traversable[:1]
|
||||
|
||||
# Build activations
|
||||
is_fan_out = len(traversable) > 1
|
||||
fan_out_id = f"{self.node_spec.id}_{uuid.uuid4().hex[:8]}" if is_fan_out else None
|
||||
|
||||
activations: list[Activation] = []
|
||||
for edge in traversable:
|
||||
mapped = edge.map_inputs(result.output, gc.buffer.read_all())
|
||||
|
||||
# Build fan-out tags: inherited + new
|
||||
tags = list(self._inherited_fan_out_tags)
|
||||
if is_fan_out:
|
||||
tags.append(
|
||||
FanOutTag(
|
||||
fan_out_id=fan_out_id,
|
||||
fan_out_source=self.node_spec.id,
|
||||
branches=frozenset(e.target for e in traversable),
|
||||
via_branch=edge.target,
|
||||
)
|
||||
)
|
||||
|
||||
activations.append(
|
||||
Activation(
|
||||
source_id=self.node_spec.id,
|
||||
target_id=edge.target,
|
||||
edge_id=edge.id,
|
||||
edge=edge,
|
||||
mapped_inputs=mapped,
|
||||
fan_out_tags=tags,
|
||||
)
|
||||
)
|
||||
|
||||
if traversable:
|
||||
logger.info(
|
||||
"Worker %s → %d outgoing activation(s)%s",
|
||||
self.node_spec.id,
|
||||
len(activations),
|
||||
f" (fan-out: {[a.target_id for a in activations]})" if is_fan_out else "",
|
||||
)
|
||||
|
||||
return activations
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Output handling
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _write_outputs(self, result: NodeResult) -> None:
|
||||
"""Validate and write node outputs to buffer."""
|
||||
gc = self._gc
|
||||
node_spec = self.node_spec
|
||||
|
||||
# Event loop nodes skip executor-level validation (judge is the authority)
|
||||
if node_spec.node_type != "event_loop":
|
||||
errors = self._validator.validate_all(
|
||||
output=result.output,
|
||||
output_keys=node_spec.output_keys,
|
||||
nullable_keys=getattr(node_spec, "nullable_output_keys", []) or [],
|
||||
output_schema=getattr(node_spec, "output_schema", None),
|
||||
output_model=getattr(node_spec, "output_model", None),
|
||||
)
|
||||
if errors:
|
||||
logger.warning("Worker %s output validation warnings: %s", node_spec.id, errors)
|
||||
|
||||
# Determine if this worker is a fan-out branch
|
||||
is_fanout_branch = any(
|
||||
tag.via_branch == node_spec.id for tag in self._inherited_fan_out_tags
|
||||
)
|
||||
|
||||
# Collect keys to write: declared output_keys + any extra output items
|
||||
# (for fan-out branches, all output items need conflict checking)
|
||||
keys_to_write: set[str] = set(node_spec.output_keys)
|
||||
if is_fanout_branch:
|
||||
keys_to_write |= set(result.output.keys())
|
||||
|
||||
# Write all keys to buffer
|
||||
for key in keys_to_write:
|
||||
value = result.output.get(key)
|
||||
if value is not None:
|
||||
if is_fanout_branch:
|
||||
conflict_strategy = (
|
||||
getattr(gc.parallel_config, "buffer_conflict_strategy", "last_wins")
|
||||
if gc.parallel_config
|
||||
else "last_wins"
|
||||
)
|
||||
prior_worker = gc._fanout_written_keys.get(key)
|
||||
if prior_worker and prior_worker != node_spec.id:
|
||||
if conflict_strategy == "error":
|
||||
raise RuntimeError(
|
||||
f"Buffer write failed (conflict): key '{key}' already written "
|
||||
f"by worker '{prior_worker}', "
|
||||
f"conflicting write from '{node_spec.id}'"
|
||||
)
|
||||
elif conflict_strategy == "first_wins":
|
||||
logger.debug(
|
||||
"Skipping write to '%s' (first_wins: already set by %s)",
|
||||
key, prior_worker,
|
||||
)
|
||||
continue
|
||||
else:
|
||||
# last_wins: log and overwrite
|
||||
logger.debug(
|
||||
"Key '%s' overwritten (last_wins: %s -> %s)",
|
||||
key, prior_worker, node_spec.id,
|
||||
)
|
||||
gc._fanout_written_keys[key] = node_spec.id
|
||||
gc.buffer.write(key, value, validate=False)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Context building
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _get_node_implementation(self) -> NodeProtocol:
|
||||
"""Get or create node implementation."""
|
||||
gc = self._gc
|
||||
if self._node_impl is not None:
|
||||
return self._node_impl
|
||||
|
||||
# Check shared registry first
|
||||
if self.node_spec.id in gc.node_registry:
|
||||
self._node_impl = gc.node_registry[self.node_spec.id]
|
||||
return self._node_impl
|
||||
|
||||
# Auto-create EventLoopNode
|
||||
if self.node_spec.node_type in ("event_loop", "gcu"):
|
||||
from framework.graph.event_loop_node import EventLoopNode
|
||||
from framework.graph.event_loop.types import LoopConfig
|
||||
from framework.graph.node import warn_if_deprecated_client_facing
|
||||
|
||||
conv_store = None
|
||||
if gc.storage_path:
|
||||
from framework.storage.conversation_store import FileConversationStore
|
||||
|
||||
conv_store = FileConversationStore(base_path=gc.storage_path / "conversations")
|
||||
|
||||
spillover = str(gc.storage_path / "data") if gc.storage_path else None
|
||||
lc = gc.loop_config
|
||||
warn_if_deprecated_client_facing(self.node_spec)
|
||||
default_max_iter = 100 if self.node_spec.supports_direct_user_io() else 50
|
||||
|
||||
node = EventLoopNode(
|
||||
event_bus=gc.event_bus,
|
||||
judge=None,
|
||||
config=LoopConfig(
|
||||
max_iterations=lc.get("max_iterations", default_max_iter),
|
||||
max_tool_calls_per_turn=lc.get("max_tool_calls_per_turn", 30),
|
||||
tool_call_overflow_margin=lc.get("tool_call_overflow_margin", 0.5),
|
||||
stall_detection_threshold=lc.get("stall_detection_threshold", 3),
|
||||
max_context_tokens=lc.get(
|
||||
"max_context_tokens",
|
||||
_default_max_context_tokens(),
|
||||
),
|
||||
max_tool_result_chars=lc.get("max_tool_result_chars", 30_000),
|
||||
spillover_dir=spillover,
|
||||
hooks=lc.get("hooks", {}),
|
||||
),
|
||||
tool_executor=gc.tool_executor,
|
||||
conversation_store=conv_store,
|
||||
)
|
||||
gc.node_registry[self.node_spec.id] = node
|
||||
self._node_impl = node
|
||||
return node
|
||||
|
||||
raise RuntimeError(
|
||||
f"No implementation for node '{self.node_spec.id}' "
|
||||
f"(type: {self.node_spec.node_type})"
|
||||
)
|
||||
|
||||
def _build_node_context(self) -> NodeContext:
|
||||
"""Build NodeContext for this worker's execution."""
|
||||
return build_node_context_from_graph_context(
|
||||
self._gc,
|
||||
node_spec=self.node_spec,
|
||||
pause_event=self._pause_requested,
|
||||
)
|
||||
|
||||
async def _reflect_colony_memory(self) -> None:
|
||||
"""Run colony memory reflection at node handoff.
|
||||
|
||||
Awaits the shared colony lock so parallel workers queue (never skip).
|
||||
"""
|
||||
gc = self._gc
|
||||
if gc.colony_memory_dir is None or gc.colony_reflect_llm is None:
|
||||
return
|
||||
if gc.worker_sessions_dir is None:
|
||||
return
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
session_dir = Path(gc.worker_sessions_dir) / gc.execution_id
|
||||
if not session_dir.exists():
|
||||
return
|
||||
|
||||
# Await lock — serializes reflection but never skips
|
||||
async with gc._colony_reflect_lock:
|
||||
try:
|
||||
from framework.agents.queen.reflection_agent import run_short_reflection
|
||||
|
||||
await run_short_reflection(
|
||||
session_dir, gc.colony_reflect_llm, gc.colony_memory_dir,
|
||||
caller="worker",
|
||||
)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Worker %s: colony reflection failed",
|
||||
self.node_spec.id,
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
# Update recall cache outside lock (per-execution key, no write races)
|
||||
try:
|
||||
from framework.agents.queen.recall_selector import update_recall_cache
|
||||
|
||||
await update_recall_cache(
|
||||
session_dir,
|
||||
gc.colony_reflect_llm,
|
||||
memory_dir=gc.colony_memory_dir,
|
||||
cache_setter=lambda block: gc.colony_recall_cache.__setitem__(
|
||||
gc.execution_id, block
|
||||
),
|
||||
heading="Colony Memories",
|
||||
)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Worker %s: recall cache update failed",
|
||||
self.node_spec.id,
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Event publishing
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _publish_completion(self, completion: WorkerCompletion) -> None:
|
||||
"""Publish WORKER_COMPLETED event via the graph-scoped event bus."""
|
||||
gc = self._gc
|
||||
if not gc.event_bus:
|
||||
return
|
||||
if not hasattr(gc.event_bus, "emit_worker_completed"):
|
||||
return
|
||||
|
||||
# Serialize activations to dicts for event data
|
||||
activations_data = []
|
||||
for act in completion.activations:
|
||||
activations_data.append({
|
||||
"source_id": act.source_id,
|
||||
"target_id": act.target_id,
|
||||
"edge_id": act.edge_id,
|
||||
"mapped_inputs": act.mapped_inputs,
|
||||
"fan_out_tags": [
|
||||
{
|
||||
"fan_out_id": t.fan_out_id,
|
||||
"fan_out_source": t.fan_out_source,
|
||||
"branches": list(t.branches),
|
||||
"via_branch": t.via_branch,
|
||||
}
|
||||
for t in act.fan_out_tags
|
||||
],
|
||||
})
|
||||
|
||||
await gc.event_bus.emit_worker_completed(
|
||||
stream_id=gc.stream_id,
|
||||
node_id=self.node_spec.id,
|
||||
worker_id=self.node_spec.id,
|
||||
success=completion.success,
|
||||
output=completion.output,
|
||||
activations=activations_data,
|
||||
execution_id=gc.execution_id,
|
||||
tokens_used=completion.tokens_used,
|
||||
latency_ms=completion.latency_ms,
|
||||
conversation=completion.conversation,
|
||||
)
|
||||
|
||||
async def _publish_failure(self, error: str) -> None:
|
||||
"""Publish WORKER_FAILED event."""
|
||||
gc = self._gc
|
||||
if not gc.event_bus:
|
||||
return
|
||||
if not hasattr(gc.event_bus, "emit_worker_failed"):
|
||||
return
|
||||
|
||||
await gc.event_bus.emit_worker_failed(
|
||||
stream_id=gc.stream_id,
|
||||
node_id=self.node_spec.id,
|
||||
worker_id=self.node_spec.id,
|
||||
error=error,
|
||||
execution_id=gc.execution_id,
|
||||
)
|
||||
|
||||
async def _apply_continuous_transition(self, activations: list[Activation]) -> None:
|
||||
"""Apply continuous mode conversation threading for the next node.
|
||||
|
||||
This prepares the inherited conversation before the completion event
|
||||
is published so downstream workers receive a fully updated thread.
|
||||
"""
|
||||
gc = self._gc
|
||||
if not gc.is_continuous or not gc.continuous_conversation:
|
||||
return
|
||||
|
||||
next_node_id = next((activation.target_id for activation in activations), None)
|
||||
if not next_node_id:
|
||||
return
|
||||
|
||||
next_spec = gc.graph.get_node(next_node_id)
|
||||
if not next_spec or next_spec.node_type != "event_loop":
|
||||
return
|
||||
|
||||
from framework.graph.prompting import (
|
||||
TransitionSpec,
|
||||
build_narrative,
|
||||
build_system_prompt_for_node_context,
|
||||
build_transition_message,
|
||||
)
|
||||
|
||||
narrative = build_narrative(gc.buffer, gc.path, gc.graph)
|
||||
next_ctx = build_node_context_from_graph_context(
|
||||
gc,
|
||||
node_spec=next_spec,
|
||||
pause_event=self._pause_requested,
|
||||
inherited_conversation=gc.continuous_conversation,
|
||||
narrative=narrative,
|
||||
)
|
||||
gc.continuous_conversation.update_system_prompt(
|
||||
build_system_prompt_for_node_context(next_ctx)
|
||||
)
|
||||
gc.continuous_conversation.set_current_phase(next_spec.id)
|
||||
|
||||
buffer_items, data_files = self._prepare_transition_payload()
|
||||
marker = build_transition_message(
|
||||
TransitionSpec(
|
||||
previous_name=self.node_spec.name,
|
||||
previous_description=self.node_spec.description,
|
||||
next_name=next_spec.name,
|
||||
next_description=next_spec.description,
|
||||
next_output_keys=tuple(next_spec.output_keys or ()),
|
||||
buffer_items=buffer_items,
|
||||
cumulative_tool_names=tuple(sorted(gc.cumulative_tool_names)),
|
||||
data_files=tuple(data_files),
|
||||
)
|
||||
)
|
||||
await gc.continuous_conversation.add_user_message(
|
||||
marker,
|
||||
is_transition_marker=True,
|
||||
)
|
||||
|
||||
def _prepare_transition_payload(self) -> tuple[dict[str, str], list[str]]:
|
||||
"""Build transition marker data and spill oversized values when possible."""
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
gc = self._gc
|
||||
data_dir = Path(gc.storage_path / "data") if gc.storage_path else None
|
||||
buffer_items: dict[str, str] = {}
|
||||
|
||||
for key, value in gc.buffer.read_all().items():
|
||||
if value is None:
|
||||
continue
|
||||
val_str = str(value)
|
||||
if len(val_str) > 300 and data_dir is not None:
|
||||
data_dir.mkdir(parents=True, exist_ok=True)
|
||||
ext = ".json" if isinstance(value, (dict, list)) else ".txt"
|
||||
filename = f"output_{key}{ext}"
|
||||
file_path = data_dir / filename
|
||||
try:
|
||||
write_content = (
|
||||
json.dumps(value, indent=2, ensure_ascii=False)
|
||||
if isinstance(value, (dict, list))
|
||||
else str(value)
|
||||
)
|
||||
file_path.write_text(write_content, encoding="utf-8")
|
||||
file_size = file_path.stat().st_size
|
||||
buffer_items[key] = (
|
||||
f"[Saved to '{filename}' ({file_size:,} bytes). "
|
||||
f"Use load_data(filename='{filename}') to access.]"
|
||||
)
|
||||
continue
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
buffer_items[key] = val_str[:300] + "..." if len(val_str) > 300 else val_str
|
||||
|
||||
data_files: list[str] = []
|
||||
if data_dir is not None and data_dir.exists():
|
||||
data_files = [
|
||||
f"{entry.name} ({entry.stat().st_size:,} bytes)"
|
||||
for entry in sorted(data_dir.iterdir())
|
||||
if entry.is_file()
|
||||
]
|
||||
|
||||
return buffer_items, data_files
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Utility
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def pause(self) -> None:
|
||||
self._pause_requested.set()
|
||||
self._run_gate.clear()
|
||||
|
||||
def resume(self) -> None:
|
||||
self._pause_requested.clear()
|
||||
self._run_gate.set()
|
||||
|
||||
@property
|
||||
def is_terminal(self) -> bool:
|
||||
return self.node_spec.id in (self._gc.graph.terminal_nodes or [])
|
||||
|
||||
@property
|
||||
def is_entry(self) -> bool:
|
||||
return len(self.incoming_edges) == 0
|
||||
|
||||
|
||||
def _default_max_context_tokens() -> int:
|
||||
"""Resolve max_context_tokens from global config, falling back to 32000."""
|
||||
try:
|
||||
from framework.config import get_max_context_tokens # type: ignore[import-untyped]
|
||||
|
||||
return get_max_context_tokens()
|
||||
except Exception:
|
||||
return 32_000
|
||||
@@ -1937,6 +1937,29 @@ class LiteLLMProvider(LLMProvider):
|
||||
return
|
||||
|
||||
except Exception as e:
|
||||
# Some providers return non-standard finish_reason values
|
||||
# (e.g., kimi-k2.5 sends 'pause_turn') that LiteLLM's
|
||||
# internal stream_chunk_builder rejects via Pydantic
|
||||
# validation. If we already accumulated content and built
|
||||
# tail_events before the error, the stream was successful —
|
||||
# yield what we have instead of discarding it.
|
||||
if (accumulated_text or tool_calls_acc) and tail_events:
|
||||
_is_finish_reason_err = (
|
||||
"finish_reason" in str(e) and "validation error" in str(e).lower()
|
||||
)
|
||||
if _is_finish_reason_err:
|
||||
logger.warning(
|
||||
"[stream] %s: LiteLLM finish_reason validation "
|
||||
"error (non-standard provider value). "
|
||||
"Content was streamed successfully — "
|
||||
"using accumulated result. Error: %s",
|
||||
self.model,
|
||||
e,
|
||||
)
|
||||
for event in tail_events:
|
||||
yield event
|
||||
return
|
||||
|
||||
if self._should_use_openrouter_tool_compat(e, tools):
|
||||
_remember_openrouter_tool_compat_model(self.model)
|
||||
async for event in self._stream_via_openrouter_tool_compat(
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
"""Framework-level worker monitoring package."""
|
||||
@@ -1,7 +1,6 @@
|
||||
"""Agent Runner - load and run exported agents."""
|
||||
|
||||
from framework.runner.mcp_registry import MCPRegistry
|
||||
from framework.runner.orchestrator import AgentOrchestrator
|
||||
from framework.runner.protocol import (
|
||||
AgentMessage,
|
||||
CapabilityLevel,
|
||||
@@ -20,8 +19,6 @@ __all__ = [
|
||||
"ToolRegistry",
|
||||
"MCPRegistry",
|
||||
"tool",
|
||||
# Multi-agent
|
||||
"AgentOrchestrator",
|
||||
"AgentMessage",
|
||||
"MessageType",
|
||||
"CapabilityLevel",
|
||||
|
||||
+88
-304
@@ -51,6 +51,11 @@ def register_commands(subparsers: argparse._SubParsersAction) -> None:
|
||||
action="store_true",
|
||||
help="Show detailed execution logs (steps, LLM calls, etc.)",
|
||||
)
|
||||
run_parser.add_argument(
|
||||
"--debug",
|
||||
action="store_true",
|
||||
help="Show all debug-level logs",
|
||||
)
|
||||
|
||||
run_parser.add_argument(
|
||||
"--model",
|
||||
@@ -119,46 +124,6 @@ def register_commands(subparsers: argparse._SubParsersAction) -> None:
|
||||
)
|
||||
list_parser.set_defaults(func=cmd_list)
|
||||
|
||||
# dispatch command (multi-agent)
|
||||
dispatch_parser = subparsers.add_parser(
|
||||
"dispatch",
|
||||
help="Dispatch request to multiple agents",
|
||||
description="Route a request to the best agent(s) using the orchestrator.",
|
||||
)
|
||||
dispatch_parser.add_argument(
|
||||
"agents_dir",
|
||||
type=str,
|
||||
nargs="?",
|
||||
default="exports",
|
||||
help="Directory containing agent folders (default: exports)",
|
||||
)
|
||||
dispatch_parser.add_argument(
|
||||
"--input",
|
||||
"-i",
|
||||
type=str,
|
||||
required=True,
|
||||
help="Input context as JSON string",
|
||||
)
|
||||
dispatch_parser.add_argument(
|
||||
"--intent",
|
||||
type=str,
|
||||
help="Description of what you want to accomplish",
|
||||
)
|
||||
dispatch_parser.add_argument(
|
||||
"--agents",
|
||||
"-a",
|
||||
type=str,
|
||||
nargs="+",
|
||||
help="Specific agent names to use (default: all in directory)",
|
||||
)
|
||||
dispatch_parser.add_argument(
|
||||
"--quiet",
|
||||
"-q",
|
||||
action="store_true",
|
||||
help="Only output the final result JSON",
|
||||
)
|
||||
dispatch_parser.set_defaults(func=cmd_dispatch)
|
||||
|
||||
# shell command (interactive agent session)
|
||||
shell_parser = subparsers.add_parser(
|
||||
"shell",
|
||||
@@ -177,11 +142,6 @@ def register_commands(subparsers: argparse._SubParsersAction) -> None:
|
||||
default="exports",
|
||||
help="Directory containing agents (default: exports)",
|
||||
)
|
||||
shell_parser.add_argument(
|
||||
"--multi",
|
||||
action="store_true",
|
||||
help="Enable multi-agent mode with orchestrator",
|
||||
)
|
||||
shell_parser.add_argument(
|
||||
"--no-approve",
|
||||
action="store_true",
|
||||
@@ -290,7 +250,10 @@ def register_commands(subparsers: argparse._SubParsersAction) -> None:
|
||||
def _load_resume_state(
|
||||
agent_path: str, session_id: str, checkpoint_id: str | None = None
|
||||
) -> dict | None:
|
||||
"""Load session or checkpoint state for headless resume.
|
||||
"""Load checkpoint state for headless resume.
|
||||
|
||||
All resumes require a checkpoint. If ``checkpoint_id`` is not provided
|
||||
the latest checkpoint is auto-discovered.
|
||||
|
||||
Args:
|
||||
agent_path: Path to the agent folder (e.g., exports/my_agent)
|
||||
@@ -298,7 +261,7 @@ def _load_resume_state(
|
||||
checkpoint_id: Optional checkpoint ID within the session
|
||||
|
||||
Returns:
|
||||
session_state dict for executor, or None if not found
|
||||
session_state dict for executor, or None if no checkpoint found
|
||||
"""
|
||||
agent_name = Path(agent_path).name
|
||||
agent_work_dir = Path.home() / ".hive" / "agents" / agent_name
|
||||
@@ -307,40 +270,37 @@ def _load_resume_state(
|
||||
if not session_dir.exists():
|
||||
return None
|
||||
|
||||
if checkpoint_id:
|
||||
# Checkpoint-based resume: load checkpoint and extract state
|
||||
cp_path = session_dir / "checkpoints" / f"{checkpoint_id}.json"
|
||||
if not cp_path.exists():
|
||||
# Auto-discover latest checkpoint when not specified
|
||||
if not checkpoint_id:
|
||||
cp_dir = session_dir / "checkpoints"
|
||||
if cp_dir.exists():
|
||||
checkpoints = sorted(
|
||||
cp_dir.glob("*.json"),
|
||||
key=lambda p: p.stat().st_mtime,
|
||||
reverse=True,
|
||||
)
|
||||
if checkpoints:
|
||||
checkpoint_id = checkpoints[0].stem
|
||||
if not checkpoint_id:
|
||||
return None
|
||||
try:
|
||||
cp_data = json.loads(cp_path.read_text(encoding="utf-8"))
|
||||
except (json.JSONDecodeError, OSError):
|
||||
return None
|
||||
return {
|
||||
"resume_session_id": session_id,
|
||||
"memory": cp_data.get("shared_memory", {}),
|
||||
"paused_at": cp_data.get("next_node") or cp_data.get("current_node"),
|
||||
"execution_path": cp_data.get("execution_path", []),
|
||||
"node_visit_counts": {},
|
||||
}
|
||||
else:
|
||||
# Session state resume: load state.json
|
||||
state_path = session_dir / "state.json"
|
||||
if not state_path.exists():
|
||||
return None
|
||||
try:
|
||||
state_data = json.loads(state_path.read_text(encoding="utf-8"))
|
||||
except (json.JSONDecodeError, OSError):
|
||||
return None
|
||||
progress = state_data.get("progress", {})
|
||||
paused_at = progress.get("paused_at") or progress.get("resume_from")
|
||||
return {
|
||||
"resume_session_id": session_id,
|
||||
"memory": state_data.get("memory", {}),
|
||||
"paused_at": paused_at,
|
||||
"execution_path": progress.get("path", []),
|
||||
"node_visit_counts": progress.get("node_visit_counts", {}),
|
||||
}
|
||||
|
||||
cp_path = session_dir / "checkpoints" / f"{checkpoint_id}.json"
|
||||
if not cp_path.exists():
|
||||
return None
|
||||
try:
|
||||
cp_data = json.loads(cp_path.read_text(encoding="utf-8"))
|
||||
except (json.JSONDecodeError, OSError):
|
||||
return None
|
||||
|
||||
return {
|
||||
"resume_session_id": session_id,
|
||||
"resume_from_checkpoint": checkpoint_id,
|
||||
"run_id": cp_data.get("run_id") or None,
|
||||
"data_buffer": cp_data.get("data_buffer", cp_data.get("shared_memory", {})),
|
||||
"paused_at": cp_data.get("next_node") or cp_data.get("current_node"),
|
||||
"execution_path": cp_data.get("execution_path", []),
|
||||
"node_visit_counts": cp_data.get("node_visit_counts", {}),
|
||||
}
|
||||
|
||||
|
||||
def _prompt_before_start(agent_path: str, runner, model: str | None = None):
|
||||
@@ -387,6 +347,8 @@ def cmd_run(args: argparse.Namespace) -> int:
|
||||
# Set logging level (quiet by default for cleaner output)
|
||||
if args.quiet:
|
||||
configure_logging(level="ERROR")
|
||||
elif getattr(args, "debug", False):
|
||||
configure_logging(level="DEBUG")
|
||||
elif getattr(args, "verbose", False):
|
||||
configure_logging(level="INFO")
|
||||
else:
|
||||
@@ -722,118 +684,6 @@ def cmd_list(args: argparse.Namespace) -> int:
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_dispatch(args: argparse.Namespace) -> int:
|
||||
"""Dispatch request to multiple agents via orchestrator."""
|
||||
from framework.runner import AgentOrchestrator
|
||||
|
||||
# Parse input
|
||||
try:
|
||||
context = json.loads(args.input)
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"Error parsing --input JSON: {e}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Find agents
|
||||
agents_dir = Path(args.agents_dir)
|
||||
if not agents_dir.exists():
|
||||
print(f"Directory not found: {agents_dir}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Create orchestrator and register agents
|
||||
orchestrator = AgentOrchestrator()
|
||||
|
||||
agent_paths = []
|
||||
if args.agents:
|
||||
# Use specific agents
|
||||
for agent_name in args.agents:
|
||||
# Guard against full paths: if the name contains path separators
|
||||
# (e.g. "exports/my_agent"), it will be doubled with agents_dir
|
||||
agent_name_path = Path(agent_name)
|
||||
if len(agent_name_path.parts) > 1:
|
||||
print(
|
||||
f"Error: --agents expects agent names, not paths. "
|
||||
f"Use: --agents {agent_name_path.name} "
|
||||
f"instead of --agents {agent_name}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 1
|
||||
agent_path = agents_dir / agent_name
|
||||
if not _is_valid_agent_dir(agent_path):
|
||||
print(f"Agent not found: {agent_path}", file=sys.stderr)
|
||||
return 1
|
||||
agent_paths.append((agent_name, agent_path))
|
||||
else:
|
||||
# Discover all agents
|
||||
for path in agents_dir.iterdir():
|
||||
if _is_valid_agent_dir(path):
|
||||
agent_paths.append((path.name, path))
|
||||
|
||||
if not agent_paths:
|
||||
print(f"No agents found in {agents_dir}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Register agents
|
||||
for name, path in agent_paths:
|
||||
try:
|
||||
orchestrator.register(name, path)
|
||||
if not args.quiet:
|
||||
print(f"Registered agent: {name}")
|
||||
except Exception as e:
|
||||
print(f"Failed to register {name}: {e}", file=sys.stderr)
|
||||
|
||||
if not args.quiet:
|
||||
print()
|
||||
print(f"Input: {json.dumps(context)}")
|
||||
if args.intent:
|
||||
print(f"Intent: {args.intent}")
|
||||
print()
|
||||
print("=" * 60)
|
||||
print("Dispatching to agents...")
|
||||
print("=" * 60)
|
||||
print()
|
||||
|
||||
# Dispatch
|
||||
result = asyncio.run(orchestrator.dispatch(context, intent=args.intent))
|
||||
|
||||
# Output results
|
||||
if args.quiet:
|
||||
output = {
|
||||
"success": result.success,
|
||||
"handled_by": result.handled_by,
|
||||
"results": result.results,
|
||||
"error": result.error,
|
||||
}
|
||||
print(json.dumps(output, indent=2, default=str))
|
||||
else:
|
||||
print()
|
||||
print("=" * 60)
|
||||
print(f"Success: {result.success}")
|
||||
print(f"Handled by: {', '.join(result.handled_by) or 'none'}")
|
||||
if result.error:
|
||||
print(f"Error: {result.error}")
|
||||
print("=" * 60)
|
||||
|
||||
if result.results:
|
||||
print("\n--- Results by Agent ---")
|
||||
for agent_name, data in result.results.items():
|
||||
print(f"\n{agent_name}:")
|
||||
status = data.get("status", "unknown")
|
||||
print(f" Status: {status}")
|
||||
if "completed_steps" in data:
|
||||
print(f" Steps: {len(data['completed_steps'])}")
|
||||
if "results" in data:
|
||||
results_preview = json.dumps(data["results"], default=str)
|
||||
if len(results_preview) > 200:
|
||||
results_preview = results_preview[:200] + "..."
|
||||
print(f" Results: {results_preview}")
|
||||
|
||||
if not args.quiet:
|
||||
print(f"\nMessage trace: {len(result.messages)} messages")
|
||||
|
||||
orchestrator.cleanup()
|
||||
return 0 if result.success else 1
|
||||
|
||||
|
||||
def _interactive_approval(request):
|
||||
"""Interactive approval callback for HITL mode."""
|
||||
from framework.graph import ApprovalDecision, ApprovalResult
|
||||
@@ -931,11 +781,6 @@ def cmd_shell(args: argparse.Namespace) -> int:
|
||||
|
||||
agents_dir = Path(args.agents_dir)
|
||||
|
||||
# Multi-agent mode with orchestrator
|
||||
if args.multi:
|
||||
return _interactive_multi(agents_dir)
|
||||
|
||||
# Single agent mode
|
||||
agent_path = args.agent_path
|
||||
if not agent_path:
|
||||
# List available agents and let user choose
|
||||
@@ -1408,108 +1253,6 @@ def _select_agent(agents_dir: Path) -> str | None:
|
||||
print()
|
||||
return None
|
||||
|
||||
|
||||
def _interactive_multi(agents_dir: Path) -> int:
|
||||
"""Interactive multi-agent mode with orchestrator."""
|
||||
from framework.runner import AgentOrchestrator
|
||||
|
||||
if not agents_dir.exists():
|
||||
print(f"Directory not found: {agents_dir}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
orchestrator = AgentOrchestrator()
|
||||
agent_count = 0
|
||||
|
||||
# Register all agents
|
||||
for path in agents_dir.iterdir():
|
||||
if _is_valid_agent_dir(path):
|
||||
try:
|
||||
orchestrator.register(path.name, path)
|
||||
agent_count += 1
|
||||
except Exception as e:
|
||||
print(f"Warning: Failed to register {path.name}: {e}")
|
||||
|
||||
if agent_count == 0:
|
||||
print(f"No agents found in {agents_dir}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
print(f"\n{'=' * 60}")
|
||||
print("Multi-Agent Interactive Mode")
|
||||
print(f"Registered {agent_count} agents")
|
||||
print(f"{'=' * 60}")
|
||||
print("\nCommands:")
|
||||
print(" /agents - List registered agents")
|
||||
print(" /quit - Exit")
|
||||
print(" {...} - JSON input to dispatch")
|
||||
print()
|
||||
|
||||
while True:
|
||||
try:
|
||||
user_input = input(">>> ").strip()
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
print("\nExiting...")
|
||||
break
|
||||
|
||||
if not user_input:
|
||||
continue
|
||||
|
||||
if user_input == "/quit":
|
||||
break
|
||||
|
||||
if user_input == "/agents":
|
||||
print("\nRegistered agents:")
|
||||
for agent in orchestrator.list_agents():
|
||||
print(f" - {agent['name']}: {agent['description'][:60]}...")
|
||||
print()
|
||||
continue
|
||||
|
||||
# Parse intent if provided
|
||||
intent = None
|
||||
if user_input.startswith("/intent "):
|
||||
parts = user_input.split(" ", 2)
|
||||
if len(parts) >= 3:
|
||||
intent = parts[1]
|
||||
user_input = parts[2]
|
||||
|
||||
# Try to parse as JSON
|
||||
try:
|
||||
context = json.loads(user_input)
|
||||
except json.JSONDecodeError:
|
||||
print("Error: Invalid JSON input. Use {...} format.")
|
||||
continue
|
||||
|
||||
print(f"\nDispatching: {json.dumps(context)}")
|
||||
if intent:
|
||||
print(f"Intent: {intent}")
|
||||
print("-" * 40)
|
||||
|
||||
result = asyncio.run(orchestrator.dispatch(context, intent=intent))
|
||||
|
||||
print(f"\nSuccess: {result.success}")
|
||||
print(f"Handled by: {', '.join(result.handled_by) or 'none'}")
|
||||
|
||||
if result.error:
|
||||
print(f"Error: {result.error}")
|
||||
|
||||
if result.results:
|
||||
print("\nResults by agent:")
|
||||
for agent_name, data in result.results.items():
|
||||
print(f"\n {agent_name}:")
|
||||
status = data.get("status", "unknown")
|
||||
print(f" Status: {status}")
|
||||
if "results" in data:
|
||||
results_preview = json.dumps(data["results"], default=str)
|
||||
if len(results_preview) > 150:
|
||||
results_preview = results_preview[:150] + "..."
|
||||
print(f" Results: {results_preview}")
|
||||
|
||||
print(f"\nMessage trace: {len(result.messages)} messages")
|
||||
print()
|
||||
|
||||
orchestrator.cleanup()
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_setup_credentials(args: argparse.Namespace) -> int:
|
||||
"""Interactive credential setup for an agent."""
|
||||
from framework.credentials.setup import CredentialSetupSession
|
||||
@@ -1532,10 +1275,51 @@ def cmd_setup_credentials(args: argparse.Namespace) -> int:
|
||||
return 0 if result.success else 1
|
||||
|
||||
|
||||
def _find_chrome_bin() -> str | None:
|
||||
"""Return the path to a Chrome/Chromium binary, or None if not found."""
|
||||
import shutil
|
||||
|
||||
for candidate in (
|
||||
"google-chrome",
|
||||
"google-chrome-stable",
|
||||
"chromium",
|
||||
"chromium-browser",
|
||||
"microsoft-edge",
|
||||
"microsoft-edge-stable",
|
||||
):
|
||||
if shutil.which(candidate):
|
||||
return candidate
|
||||
|
||||
mac_paths = [
|
||||
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
|
||||
Path.home() / "Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
|
||||
"/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
|
||||
]
|
||||
for p in mac_paths:
|
||||
if Path(p).exists():
|
||||
return str(p)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _open_browser(url: str) -> None:
|
||||
"""Open URL in the default browser (best-effort, non-blocking)."""
|
||||
"""Open URL in the browser (best-effort, non-blocking)."""
|
||||
import subprocess
|
||||
|
||||
chrome = _find_chrome_bin()
|
||||
|
||||
try:
|
||||
if chrome:
|
||||
subprocess.Popen(
|
||||
[chrome, url],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
)
|
||||
return
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Fallback: open with system default browser
|
||||
try:
|
||||
if sys.platform == "darwin":
|
||||
subprocess.Popen(
|
||||
@@ -1676,10 +1460,10 @@ def cmd_serve(args: argparse.Namespace) -> int:
|
||||
# Preload agents specified via --agent
|
||||
for agent_path in args.agent:
|
||||
try:
|
||||
session = await manager.create_session_with_worker(agent_path, model=model)
|
||||
session = await manager.create_session_with_worker_graph(agent_path, model=model)
|
||||
info = session.worker_info
|
||||
name = info.name if info else session.worker_id
|
||||
print(f"Loaded agent: {session.worker_id} ({name})")
|
||||
name = info.name if info else session.graph_id
|
||||
print(f"Loaded agent: {session.graph_id} ({name})")
|
||||
except Exception as e:
|
||||
print(f"Error loading {agent_path}: {e}")
|
||||
|
||||
@@ -1702,7 +1486,7 @@ def cmd_serve(args: argparse.Namespace) -> int:
|
||||
if has_frontend:
|
||||
print(f"Dashboard: {dashboard_url}")
|
||||
print(f"Health: {dashboard_url}/api/health")
|
||||
print(f"Agents loaded: {sum(1 for s in manager.list_sessions() if s.worker_runtime)}")
|
||||
print(f"Agents loaded: {sum(1 for s in manager.list_sessions() if s.graph_runtime)}")
|
||||
print()
|
||||
print("Press Ctrl+C to stop")
|
||||
|
||||
|
||||
@@ -1,252 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_CACHE_INDEX_PATH = Path.home() / ".hive" / "mcp_registry" / "cache" / "registry_index.json"
|
||||
_FIXTURE_INDEX_PATH = Path(__file__).resolve().parent / "fixtures" / "registry_index.json"
|
||||
|
||||
|
||||
def resolve_registry_servers(
|
||||
*,
|
||||
include: list[str] | None = None,
|
||||
tags: list[str] | None = None,
|
||||
exclude: list[str] | None = None,
|
||||
profile: str | None = None,
|
||||
max_tools: int | None = None,
|
||||
versions: dict[str, str] | None = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""
|
||||
Resolve registry-sourced MCP servers for `mcp_registry.json` selection.
|
||||
|
||||
This function is written to be mock-friendly during early development:
|
||||
- If the real `MCPRegistry` core module is present, delegate to it.
|
||||
- Otherwise, fall back to a cached local index (`~/.hive/.../registry_index.json`)
|
||||
and then to the repo fixture index.
|
||||
"""
|
||||
|
||||
# `max_tools` is enforced by ToolRegistry. We keep it in the resolver
|
||||
# signature to match the PRD and future MCPRegistry interfaces.
|
||||
_ = max_tools
|
||||
|
||||
try:
|
||||
from framework.runner.mcp_registry import MCPRegistry # type: ignore
|
||||
|
||||
registry = MCPRegistry()
|
||||
resolved = registry.resolve_for_agent(
|
||||
include=include or [],
|
||||
tags=tags or [],
|
||||
exclude=exclude or [],
|
||||
profile=profile,
|
||||
max_tools=max_tools,
|
||||
versions=versions or {},
|
||||
)
|
||||
# Future-proof: normalize both dicts and typed objects to dicts.
|
||||
return [_normalize_server_config(x) for x in resolved]
|
||||
except ImportError:
|
||||
# Expected while #6349/#6574 is not merged locally.
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.warning("MCPRegistry resolution failed; falling back to cache/fixtures: %s", e)
|
||||
|
||||
return _resolve_from_local_index(
|
||||
include=include,
|
||||
tags=tags,
|
||||
exclude=exclude,
|
||||
profile=profile,
|
||||
versions=versions or {},
|
||||
)
|
||||
|
||||
|
||||
def _resolve_from_local_index(
|
||||
*,
|
||||
include: list[str] | None,
|
||||
tags: list[str] | None,
|
||||
exclude: list[str] | None,
|
||||
profile: str | None,
|
||||
versions: dict[str, str],
|
||||
) -> list[dict[str, Any]]:
|
||||
index = _load_index_json()
|
||||
servers = _coerce_index_servers(index)
|
||||
servers_by_name: dict[str, dict[str, Any]] = {
|
||||
s["name"]: s for s in servers if isinstance(s, dict) and "name" in s
|
||||
}
|
||||
|
||||
include_list = include or []
|
||||
tags_list = tags or []
|
||||
exclude_set = set(exclude or [])
|
||||
|
||||
def _profiles_of(entry: dict[str, Any]) -> set[str]:
|
||||
if isinstance(entry.get("profiles"), list):
|
||||
return set(entry["profiles"])
|
||||
hive = entry.get("hive")
|
||||
if isinstance(hive, dict) and isinstance(hive.get("profiles"), list):
|
||||
return set(hive["profiles"])
|
||||
return set()
|
||||
|
||||
def _tags_of(entry: dict[str, Any]) -> set[str]:
|
||||
if isinstance(entry.get("tags"), list):
|
||||
return set(entry["tags"])
|
||||
return set()
|
||||
|
||||
def _entry_version(entry: dict[str, Any]) -> str | None:
|
||||
# Prefer flat `version`, but support a few common shapes.
|
||||
v = entry.get("version")
|
||||
if isinstance(v, str):
|
||||
return v
|
||||
v2 = entry.get("manifest_version")
|
||||
if isinstance(v2, str):
|
||||
return v2
|
||||
hive = entry.get("manifest")
|
||||
if isinstance(hive, dict) and isinstance(hive.get("version"), str):
|
||||
return hive["version"]
|
||||
return None
|
||||
|
||||
def _version_allows(server_name: str) -> bool:
|
||||
if server_name not in versions:
|
||||
return True
|
||||
pinned = versions[server_name]
|
||||
entry = servers_by_name.get(server_name)
|
||||
if not entry:
|
||||
return False
|
||||
return _entry_version(entry) == pinned
|
||||
|
||||
resolved_names: list[str] = []
|
||||
resolved_set: set[str] = set()
|
||||
|
||||
# 1) Include-order first
|
||||
for name in include_list:
|
||||
if name in exclude_set:
|
||||
continue
|
||||
if name in servers_by_name and _version_allows(name) and name not in resolved_set:
|
||||
resolved_names.append(name)
|
||||
resolved_set.add(name)
|
||||
|
||||
# 2) Then tag/profile matches, alphabetical
|
||||
profile_candidates = set()
|
||||
if profile:
|
||||
for name, entry in servers_by_name.items():
|
||||
if name in exclude_set or not _version_allows(name):
|
||||
continue
|
||||
if profile in _profiles_of(entry):
|
||||
profile_candidates.add(name)
|
||||
|
||||
tag_candidates = set()
|
||||
if tags_list:
|
||||
tags_set = set(tags_list)
|
||||
for name, entry in servers_by_name.items():
|
||||
if name in exclude_set or not _version_allows(name):
|
||||
continue
|
||||
if _tags_of(entry).intersection(tags_set):
|
||||
tag_candidates.add(name)
|
||||
|
||||
tag_profile_names = sorted((profile_candidates | tag_candidates) - resolved_set)
|
||||
resolved_names.extend(tag_profile_names)
|
||||
|
||||
# Missing requested servers should warn (FR-54).
|
||||
for name in include_list:
|
||||
if name in exclude_set:
|
||||
continue
|
||||
if name not in resolved_set:
|
||||
if name not in servers_by_name:
|
||||
logger.warning(
|
||||
"Server '%s' requested by mcp_registry.json but not found in index. "
|
||||
"Run: hive mcp install %s",
|
||||
name,
|
||||
name,
|
||||
)
|
||||
elif name in versions:
|
||||
logger.warning(
|
||||
"Server '%s' was requested but pinned version '%s' was not found in index. "
|
||||
"Run: hive mcp update %s or change the pin in mcp_registry.json",
|
||||
name,
|
||||
versions[name],
|
||||
name,
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
"Server '%s' requested by mcp_registry.json was not selected. "
|
||||
"Check selection filters/exclude lists.",
|
||||
name,
|
||||
)
|
||||
|
||||
resolved_configs: list[dict[str, Any]] = []
|
||||
repo_root = Path(__file__).resolve().parents[3]
|
||||
for name in resolved_names:
|
||||
entry = servers_by_name.get(name)
|
||||
if not entry:
|
||||
continue
|
||||
config = entry.get("mcp_config")
|
||||
if not isinstance(config, dict):
|
||||
# Best-effort: allow a direct MCP config shape at top-level.
|
||||
config = {
|
||||
k: v
|
||||
for k, v in entry.items()
|
||||
if k
|
||||
in {
|
||||
"name",
|
||||
"transport",
|
||||
"command",
|
||||
"args",
|
||||
"env",
|
||||
"cwd",
|
||||
"url",
|
||||
"headers",
|
||||
"description",
|
||||
}
|
||||
}
|
||||
mcp_config = dict(config)
|
||||
mcp_config["name"] = name
|
||||
if mcp_config.get("transport") == "stdio":
|
||||
_absolutize_stdio_config_in_place(repo_root, mcp_config)
|
||||
resolved_configs.append(mcp_config)
|
||||
|
||||
return resolved_configs
|
||||
|
||||
|
||||
def _load_index_json() -> Any:
|
||||
if _CACHE_INDEX_PATH.exists():
|
||||
return json.loads(_CACHE_INDEX_PATH.read_text(encoding="utf-8"))
|
||||
if _FIXTURE_INDEX_PATH.exists():
|
||||
logger.info("Using local fixture index because registry cache is missing")
|
||||
return json.loads(_FIXTURE_INDEX_PATH.read_text(encoding="utf-8"))
|
||||
logger.warning("No local MCP registry index found (cache and fixture missing)")
|
||||
return {"servers": []}
|
||||
|
||||
|
||||
def _coerce_index_servers(index: Any) -> list[dict[str, Any]]:
|
||||
if isinstance(index, list):
|
||||
return [x for x in index if isinstance(x, dict)]
|
||||
if isinstance(index, dict):
|
||||
servers = index.get("servers", [])
|
||||
if isinstance(servers, list):
|
||||
return [x for x in servers if isinstance(x, dict)]
|
||||
return []
|
||||
|
||||
|
||||
def _normalize_server_config(raw: Any) -> dict[str, Any]:
|
||||
if isinstance(raw, dict):
|
||||
return dict(raw)
|
||||
|
||||
# Future-proof object-to-dict normalization.
|
||||
for attr in ("to_dict", "model_dump"):
|
||||
maybe = getattr(raw, attr, None)
|
||||
if callable(maybe):
|
||||
return dict(maybe())
|
||||
|
||||
return dict(getattr(raw, "__dict__", {}))
|
||||
|
||||
|
||||
def _absolutize_stdio_config_in_place(repo_root: Path, config: dict[str, Any]) -> None:
|
||||
cwd = config.get("cwd")
|
||||
if isinstance(cwd, str) and not Path(cwd).is_absolute():
|
||||
config["cwd"] = str((repo_root / cwd).resolve())
|
||||
|
||||
# We intentionally do not absolutize `args` here.
|
||||
# For stdio servers, arguments may include the script name relative to
|
||||
# `cwd` (e.g. "coder_tools_server.py" with cwd="tools"). ToolRegistry's
|
||||
# stdio resolution logic handles script path checks and platform quirks.
|
||||
@@ -1,517 +0,0 @@
|
||||
"""Agent Orchestrator - routes requests and relays messages between agents."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from framework.llm.provider import LLMProvider
|
||||
from framework.runner.protocol import (
|
||||
AgentMessage,
|
||||
CapabilityLevel,
|
||||
CapabilityResponse,
|
||||
MessageType,
|
||||
OrchestratorResult,
|
||||
RegisteredAgent,
|
||||
)
|
||||
from framework.runner.runner import AgentRunner
|
||||
|
||||
|
||||
@dataclass
|
||||
class RoutingDecision:
|
||||
"""Decision about which agent(s) should handle a request."""
|
||||
|
||||
selected_agents: list[str]
|
||||
reasoning: str
|
||||
confidence: float
|
||||
should_parallelize: bool = False
|
||||
fallback_agents: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
class AgentOrchestrator:
|
||||
"""
|
||||
Manages multiple agents and routes communications between them.
|
||||
|
||||
The orchestrator:
|
||||
1. Maintains a registry of available agents
|
||||
2. Routes incoming requests to appropriate agent(s) using LLM
|
||||
3. Relays messages between agents
|
||||
4. Logs all communications for traceability
|
||||
|
||||
Usage:
|
||||
orchestrator = AgentOrchestrator()
|
||||
orchestrator.register("sales", "exports/outbound-sales")
|
||||
orchestrator.register("support", "exports/customer-support")
|
||||
|
||||
result = await orchestrator.dispatch({
|
||||
"intent": "help customer with billing issue",
|
||||
"customer_id": "123",
|
||||
})
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
llm: LLMProvider | None = None,
|
||||
model: str = "claude-haiku-4-5-20251001",
|
||||
):
|
||||
"""
|
||||
Initialize the orchestrator.
|
||||
|
||||
Args:
|
||||
llm: LLM provider for routing decisions (auto-creates if None)
|
||||
model: Model to use for routing
|
||||
"""
|
||||
self._agents: dict[str, RegisteredAgent] = {}
|
||||
self._llm = llm
|
||||
self._model = model
|
||||
self._message_log: list[AgentMessage] = []
|
||||
|
||||
# Auto-create LLM - LiteLLM auto-detects provider and API key from model name
|
||||
if self._llm is None:
|
||||
from framework.config import get_api_base, get_api_key, get_llm_extra_kwargs
|
||||
from framework.llm.litellm import LiteLLMProvider
|
||||
|
||||
self._llm = LiteLLMProvider(
|
||||
model=self._model,
|
||||
api_key=get_api_key(),
|
||||
api_base=get_api_base(),
|
||||
**get_llm_extra_kwargs(),
|
||||
)
|
||||
|
||||
def register(
|
||||
self,
|
||||
name: str,
|
||||
agent_path: str | Path,
|
||||
capabilities: list[str] | None = None,
|
||||
priority: int = 0,
|
||||
) -> None:
|
||||
"""
|
||||
Register an agent with the orchestrator.
|
||||
|
||||
Args:
|
||||
name: Unique name for this agent
|
||||
agent_path: Path to agent folder (containing agent.json)
|
||||
capabilities: Optional list of capability keywords
|
||||
priority: Higher = checked first for routing
|
||||
"""
|
||||
runner = AgentRunner.load(agent_path)
|
||||
info = runner.info()
|
||||
|
||||
self._agents[name] = RegisteredAgent(
|
||||
name=name,
|
||||
runner=runner,
|
||||
description=info.description,
|
||||
capabilities=capabilities or [],
|
||||
priority=priority,
|
||||
)
|
||||
|
||||
def register_runner(
|
||||
self,
|
||||
name: str,
|
||||
runner: AgentRunner,
|
||||
capabilities: list[str] | None = None,
|
||||
priority: int = 0,
|
||||
) -> None:
|
||||
"""
|
||||
Register an existing AgentRunner.
|
||||
|
||||
Args:
|
||||
name: Unique name for this agent
|
||||
runner: AgentRunner instance
|
||||
capabilities: Optional list of capability keywords
|
||||
priority: Higher = checked first for routing
|
||||
"""
|
||||
info = runner.info()
|
||||
|
||||
self._agents[name] = RegisteredAgent(
|
||||
name=name,
|
||||
runner=runner,
|
||||
description=info.description,
|
||||
capabilities=capabilities or [],
|
||||
priority=priority,
|
||||
)
|
||||
|
||||
def list_agents(self) -> list[dict]:
|
||||
"""List all registered agents."""
|
||||
return [
|
||||
{
|
||||
"name": agent.name,
|
||||
"description": agent.description,
|
||||
"capabilities": agent.capabilities,
|
||||
"priority": agent.priority,
|
||||
}
|
||||
for agent in sorted(
|
||||
self._agents.values(),
|
||||
key=lambda a: -a.priority,
|
||||
)
|
||||
]
|
||||
|
||||
async def dispatch(
|
||||
self,
|
||||
request: dict,
|
||||
intent: str | None = None,
|
||||
) -> OrchestratorResult:
|
||||
"""
|
||||
Route a request to the appropriate agent(s).
|
||||
|
||||
Args:
|
||||
request: The request data
|
||||
intent: Optional description of what's being asked
|
||||
|
||||
Returns:
|
||||
OrchestratorResult with results from handling agent(s)
|
||||
"""
|
||||
messages: list[AgentMessage] = []
|
||||
|
||||
# Create initial message
|
||||
initial_message = AgentMessage(
|
||||
type=MessageType.REQUEST,
|
||||
intent=intent or "Process request",
|
||||
content=request,
|
||||
)
|
||||
messages.append(initial_message)
|
||||
self._message_log.append(initial_message)
|
||||
|
||||
# Step 1: Check capabilities of all agents
|
||||
capabilities = await self._check_all_capabilities(request)
|
||||
|
||||
# Step 2: Route to best agent(s)
|
||||
routing = await self._route_request(request, intent, capabilities)
|
||||
|
||||
if not routing.selected_agents:
|
||||
return OrchestratorResult(
|
||||
success=False,
|
||||
handled_by=[],
|
||||
results={},
|
||||
messages=messages,
|
||||
error="No agent capable of handling this request",
|
||||
)
|
||||
|
||||
# Step 3: Execute on selected agent(s)
|
||||
results: dict[str, Any] = {}
|
||||
handled_by: list[str] = []
|
||||
|
||||
if routing.should_parallelize and len(routing.selected_agents) > 1:
|
||||
# Run agents in parallel
|
||||
tasks = []
|
||||
for agent_name in routing.selected_agents:
|
||||
msg = AgentMessage(
|
||||
type=MessageType.REQUEST,
|
||||
from_agent="orchestrator",
|
||||
to_agent=agent_name,
|
||||
intent=intent or "Process request",
|
||||
content=request,
|
||||
parent_id=initial_message.id,
|
||||
)
|
||||
messages.append(msg)
|
||||
self._message_log.append(msg)
|
||||
tasks.append(self._send_to_agent(agent_name, msg))
|
||||
|
||||
responses = await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
for agent_name, response in zip(routing.selected_agents, responses, strict=False):
|
||||
if isinstance(response, Exception):
|
||||
results[agent_name] = {"error": str(response)}
|
||||
else:
|
||||
messages.append(response)
|
||||
self._message_log.append(response)
|
||||
results[agent_name] = response.content
|
||||
handled_by.append(agent_name)
|
||||
else:
|
||||
# Run agents sequentially
|
||||
accumulated_context = dict(request)
|
||||
|
||||
for agent_name in routing.selected_agents:
|
||||
msg = AgentMessage(
|
||||
type=MessageType.REQUEST,
|
||||
from_agent="orchestrator",
|
||||
to_agent=agent_name,
|
||||
intent=intent or "Process request",
|
||||
content=accumulated_context,
|
||||
parent_id=initial_message.id,
|
||||
)
|
||||
messages.append(msg)
|
||||
self._message_log.append(msg)
|
||||
|
||||
try:
|
||||
response = await self._send_to_agent(agent_name, msg)
|
||||
messages.append(response)
|
||||
self._message_log.append(response)
|
||||
results[agent_name] = response.content
|
||||
handled_by.append(agent_name)
|
||||
|
||||
# Pass results to next agent
|
||||
if "results" in response.content:
|
||||
accumulated_context.update(response.content["results"])
|
||||
except Exception as e:
|
||||
results[agent_name] = {"error": str(e)}
|
||||
# Try fallback if available
|
||||
if routing.fallback_agents:
|
||||
fallback = routing.fallback_agents.pop(0)
|
||||
routing.selected_agents.append(fallback)
|
||||
|
||||
return OrchestratorResult(
|
||||
success=len(handled_by) > 0,
|
||||
handled_by=handled_by,
|
||||
results=results,
|
||||
messages=messages,
|
||||
)
|
||||
|
||||
async def relay(
|
||||
self,
|
||||
from_agent: str,
|
||||
to_agent: str,
|
||||
content: dict,
|
||||
intent: str = "",
|
||||
) -> AgentMessage:
|
||||
"""
|
||||
Relay a message from one agent to another.
|
||||
|
||||
Args:
|
||||
from_agent: Source agent name
|
||||
to_agent: Target agent name
|
||||
content: Message content
|
||||
intent: Description of what's being asked
|
||||
|
||||
Returns:
|
||||
Response message from target agent
|
||||
"""
|
||||
if to_agent not in self._agents:
|
||||
raise ValueError(f"Unknown agent: {to_agent}")
|
||||
|
||||
message = AgentMessage(
|
||||
type=MessageType.HANDOFF,
|
||||
from_agent=from_agent,
|
||||
to_agent=to_agent,
|
||||
intent=intent,
|
||||
content=content,
|
||||
)
|
||||
self._message_log.append(message)
|
||||
|
||||
response = await self._send_to_agent(to_agent, message)
|
||||
self._message_log.append(response)
|
||||
|
||||
return response
|
||||
|
||||
async def broadcast(
|
||||
self,
|
||||
content: dict,
|
||||
intent: str = "",
|
||||
exclude: list[str] | None = None,
|
||||
) -> dict[str, AgentMessage]:
|
||||
"""
|
||||
Send a message to all agents.
|
||||
|
||||
Args:
|
||||
content: Message content
|
||||
intent: Description of what's being asked
|
||||
exclude: Agent names to exclude
|
||||
|
||||
Returns:
|
||||
Dict of agent name -> response message
|
||||
"""
|
||||
exclude = exclude or []
|
||||
responses: dict[str, AgentMessage] = {}
|
||||
|
||||
message = AgentMessage(
|
||||
type=MessageType.BROADCAST,
|
||||
from_agent="orchestrator",
|
||||
intent=intent,
|
||||
content=content,
|
||||
)
|
||||
self._message_log.append(message)
|
||||
|
||||
tasks = []
|
||||
agent_names = []
|
||||
for name in self._agents:
|
||||
if name not in exclude:
|
||||
agent_names.append(name)
|
||||
tasks.append(self._send_to_agent(name, message))
|
||||
|
||||
results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
for name, result in zip(agent_names, results, strict=False):
|
||||
if isinstance(result, Exception):
|
||||
responses[name] = AgentMessage(
|
||||
type=MessageType.RESPONSE,
|
||||
from_agent=name,
|
||||
content={"error": str(result)},
|
||||
parent_id=message.id,
|
||||
)
|
||||
else:
|
||||
responses[name] = result
|
||||
self._message_log.append(result)
|
||||
|
||||
return responses
|
||||
|
||||
async def _check_all_capabilities(
|
||||
self,
|
||||
request: dict,
|
||||
) -> dict[str, CapabilityResponse]:
|
||||
"""Check all agents' capabilities in parallel."""
|
||||
tasks = []
|
||||
agent_names = []
|
||||
|
||||
for name, agent in self._agents.items():
|
||||
agent_names.append(name)
|
||||
tasks.append(agent.runner.can_handle(request, self._llm))
|
||||
|
||||
results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
capabilities = {}
|
||||
for name, result in zip(agent_names, results, strict=False):
|
||||
if isinstance(result, Exception):
|
||||
capabilities[name] = CapabilityResponse(
|
||||
agent_name=name,
|
||||
level=CapabilityLevel.CANNOT_HANDLE,
|
||||
confidence=0.0,
|
||||
reasoning=f"Error: {result}",
|
||||
)
|
||||
else:
|
||||
capabilities[name] = result
|
||||
|
||||
return capabilities
|
||||
|
||||
async def _route_request(
|
||||
self,
|
||||
request: dict,
|
||||
intent: str | None,
|
||||
capabilities: dict[str, CapabilityResponse],
|
||||
) -> RoutingDecision:
|
||||
"""Decide which agent(s) should handle the request."""
|
||||
|
||||
# Filter to capable agents
|
||||
capable = [
|
||||
(name, cap)
|
||||
for name, cap in capabilities.items()
|
||||
if cap.level in (CapabilityLevel.BEST_FIT, CapabilityLevel.CAN_HANDLE)
|
||||
]
|
||||
|
||||
# Sort by confidence (highest first)
|
||||
capable.sort(key=lambda x: -x[1].confidence)
|
||||
|
||||
# If only one capable agent, use it
|
||||
if len(capable) == 1:
|
||||
return RoutingDecision(
|
||||
selected_agents=[capable[0][0]],
|
||||
reasoning=capable[0][1].reasoning,
|
||||
confidence=capable[0][1].confidence,
|
||||
)
|
||||
|
||||
# If multiple capable agents and we have LLM, let it decide
|
||||
if len(capable) > 1 and self._llm:
|
||||
return await self._llm_route(request, intent, capable)
|
||||
|
||||
# If no capable agents, check uncertain ones
|
||||
uncertain = [
|
||||
(name, cap)
|
||||
for name, cap in capabilities.items()
|
||||
if cap.level == CapabilityLevel.UNCERTAIN
|
||||
]
|
||||
if uncertain:
|
||||
uncertain.sort(key=lambda x: -x[1].confidence)
|
||||
return RoutingDecision(
|
||||
selected_agents=[uncertain[0][0]],
|
||||
reasoning=f"Uncertain match: {uncertain[0][1].reasoning}",
|
||||
confidence=uncertain[0][1].confidence,
|
||||
fallback_agents=[u[0] for u in uncertain[1:3]],
|
||||
)
|
||||
|
||||
# No agents can handle
|
||||
return RoutingDecision(
|
||||
selected_agents=[],
|
||||
reasoning="No capable agents found",
|
||||
confidence=0.0,
|
||||
)
|
||||
|
||||
async def _llm_route(
|
||||
self,
|
||||
request: dict,
|
||||
intent: str | None,
|
||||
capable: list[tuple[str, CapabilityResponse]],
|
||||
) -> RoutingDecision:
|
||||
"""Use LLM to decide routing when multiple agents are capable."""
|
||||
|
||||
agents_info = "\n".join(
|
||||
f"- {name}: {cap.reasoning} (confidence: {cap.confidence:.2f})" for name, cap in capable
|
||||
)
|
||||
|
||||
prompt = f"""Multiple agents can handle this request. Decide the best routing.
|
||||
|
||||
Request:
|
||||
{json.dumps(request, indent=2)}
|
||||
|
||||
Intent: {intent or "Not specified"}
|
||||
|
||||
Capable agents:
|
||||
{agents_info}
|
||||
|
||||
Decide:
|
||||
1. Which agent(s) should handle this?
|
||||
2. Should they run in parallel or sequence?
|
||||
3. Why this routing?
|
||||
|
||||
Respond with JSON only:
|
||||
{{
|
||||
"selected": ["agent_name", ...],
|
||||
"parallel": true/false,
|
||||
"reasoning": "explanation"
|
||||
}}"""
|
||||
|
||||
try:
|
||||
response = await self._llm.acomplete(
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
system="You are a request router. Respond with JSON only.",
|
||||
max_tokens=256,
|
||||
)
|
||||
|
||||
import re
|
||||
|
||||
json_match = re.search(r"\{[^{}]*\}", response.content, re.DOTALL)
|
||||
if json_match:
|
||||
data = json.loads(json_match.group())
|
||||
selected = data.get("selected", [])
|
||||
# Validate selected agents exist
|
||||
selected = [s for s in selected if s in self._agents]
|
||||
if selected:
|
||||
return RoutingDecision(
|
||||
selected_agents=selected,
|
||||
reasoning=data.get("reasoning", ""),
|
||||
confidence=0.8,
|
||||
should_parallelize=data.get("parallel", False),
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Fallback: use highest confidence
|
||||
return RoutingDecision(
|
||||
selected_agents=[capable[0][0]],
|
||||
reasoning=capable[0][1].reasoning,
|
||||
confidence=capable[0][1].confidence,
|
||||
)
|
||||
|
||||
async def _send_to_agent(
|
||||
self,
|
||||
agent_name: str,
|
||||
message: AgentMessage,
|
||||
) -> AgentMessage:
|
||||
"""Send a message to an agent and get response."""
|
||||
agent = self._agents[agent_name]
|
||||
return await agent.runner.receive_message(message)
|
||||
|
||||
def get_message_log(self) -> list[AgentMessage]:
|
||||
"""Get full message log for debugging/tracing."""
|
||||
return list(self._message_log)
|
||||
|
||||
def clear_message_log(self) -> None:
|
||||
"""Clear the message log."""
|
||||
self._message_log.clear()
|
||||
|
||||
def cleanup(self) -> None:
|
||||
"""Clean up all agent resources."""
|
||||
for agent in self._agents.values():
|
||||
agent.runner.cleanup()
|
||||
self._agents.clear()
|
||||
@@ -7,7 +7,7 @@ from collections.abc import Callable
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import UTC
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Any
|
||||
from typing import Any
|
||||
|
||||
from framework.config import get_hive_config, get_max_context_tokens, get_preferred_model
|
||||
from framework.credentials.validation import (
|
||||
@@ -30,10 +30,6 @@ from framework.runtime.execution_stream import EntryPointSpec
|
||||
from framework.runtime.runtime_log_store import RuntimeLogStore
|
||||
from framework.tools.flowchart_utils import generate_fallback_flowchart
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from framework.runner.protocol import AgentMessage, CapabilityResponse
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
CLAUDE_CREDENTIALS_FILE = Path.home() / ".claude" / ".credentials.json"
|
||||
@@ -854,17 +850,6 @@ def get_antigravity_token() -> str | None:
|
||||
return access_token
|
||||
|
||||
|
||||
def _is_antigravity_proxy_available() -> bool:
|
||||
"""Return True if antigravity-auth serve is running on localhost:8069."""
|
||||
import socket
|
||||
|
||||
try:
|
||||
with socket.create_connection(("localhost", 8069), timeout=0.5):
|
||||
return True
|
||||
except (OSError, TimeoutError):
|
||||
return False
|
||||
|
||||
|
||||
@dataclass
|
||||
class AgentInfo:
|
||||
"""Information about an exported agent."""
|
||||
@@ -1370,18 +1355,6 @@ class AgentRunner:
|
||||
# It's a function, auto-generate Tool
|
||||
self._tool_registry.register_function(tool_or_func, name=name)
|
||||
|
||||
def register_tools_from_module(self, module_path: Path) -> int:
|
||||
"""
|
||||
Auto-discover and register tools from a Python module.
|
||||
|
||||
Args:
|
||||
module_path: Path to tools.py file
|
||||
|
||||
Returns:
|
||||
Number of tools discovered
|
||||
"""
|
||||
return self._tool_registry.discover_from_module(module_path)
|
||||
|
||||
def register_mcp_server(
|
||||
self,
|
||||
name: str,
|
||||
@@ -1493,16 +1466,11 @@ class AgentRunner:
|
||||
|
||||
configure_logging(level="INFO", format="auto")
|
||||
|
||||
# Set up session context for tools (workspace_id, agent_id, session_id)
|
||||
workspace_id = "default" # Could be derived from storage path
|
||||
# Set up session context for tools (agent_id)
|
||||
agent_id = self.graph.id or "unknown"
|
||||
# Use "current" as a stable session_id for persistent memory
|
||||
session_id = "current"
|
||||
|
||||
self._tool_registry.set_session_context(
|
||||
workspace_id=workspace_id,
|
||||
agent_id=agent_id,
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
# Create LLM provider
|
||||
@@ -1729,7 +1697,7 @@ class AgentRunner:
|
||||
accounts_data = adapter.get_all_account_info()
|
||||
tool_provider_map = adapter.get_tool_provider_map()
|
||||
if accounts_data:
|
||||
from framework.graph.prompt_composer import build_accounts_prompt
|
||||
from framework.graph.prompting import build_accounts_prompt
|
||||
|
||||
accounts_prompt = build_accounts_prompt(accounts_data, tool_provider_map)
|
||||
except Exception:
|
||||
@@ -1998,15 +1966,15 @@ class AgentRunner:
|
||||
if not self._agent_runtime.is_running:
|
||||
await self._agent_runtime.start()
|
||||
|
||||
# Set up stdin-based I/O for client-facing nodes in headless mode.
|
||||
# When a client_facing EventLoopNode calls ask_user(), it emits
|
||||
# Set up stdin-based I/O for the queen in headless mode.
|
||||
# When the queen calls ask_user(), it emits
|
||||
# CLIENT_INPUT_REQUESTED on the event bus and blocks. We subscribe
|
||||
# a handler that prints the prompt and reads from stdin, then injects
|
||||
# the user's response back into the node to unblock it.
|
||||
has_client_facing = any(n.client_facing for n in self.graph.nodes)
|
||||
has_queen = any(n.is_queen_node() for n in self.graph.nodes)
|
||||
sub_ids: list[str] = []
|
||||
|
||||
if has_client_facing and sys.stdin.isatty():
|
||||
if has_queen and sys.stdin.isatty():
|
||||
from framework.runtime.event_bus import EventType
|
||||
|
||||
runtime = self._agent_runtime
|
||||
@@ -2124,18 +2092,6 @@ class AgentRunner:
|
||||
correlation_id=correlation_id,
|
||||
)
|
||||
|
||||
async def get_goal_progress(self) -> dict[str, Any]:
|
||||
"""
|
||||
Get goal progress across all execution streams.
|
||||
|
||||
Returns:
|
||||
Dict with overall_progress, criteria_status, constraint_violations, etc.
|
||||
"""
|
||||
if self._agent_runtime is None:
|
||||
self._setup()
|
||||
|
||||
return await self._agent_runtime.get_goal_progress()
|
||||
|
||||
def get_entry_points(self) -> list[EntryPointSpec]:
|
||||
"""
|
||||
Get all registered entry points.
|
||||
@@ -2294,247 +2250,6 @@ class AgentRunner:
|
||||
missing_credentials=missing_credentials,
|
||||
)
|
||||
|
||||
async def can_handle(
|
||||
self, request: dict, llm: LLMProvider | None = None
|
||||
) -> "CapabilityResponse":
|
||||
"""
|
||||
Ask the agent if it can handle this request.
|
||||
|
||||
Uses LLM to evaluate the request against the agent's goal and capabilities.
|
||||
|
||||
Args:
|
||||
request: The request to evaluate
|
||||
llm: LLM provider to use (uses self._llm if not provided)
|
||||
|
||||
Returns:
|
||||
CapabilityResponse with level, confidence, and reasoning
|
||||
"""
|
||||
from framework.runner.protocol import CapabilityLevel, CapabilityResponse
|
||||
|
||||
# Use provided LLM or set up our own
|
||||
eval_llm = llm
|
||||
if eval_llm is None:
|
||||
if self._llm is None:
|
||||
self._setup()
|
||||
eval_llm = self._llm
|
||||
|
||||
# If still no LLM (mock mode), do keyword matching
|
||||
if eval_llm is None:
|
||||
return self._keyword_capability_check(request)
|
||||
|
||||
# Build context about this agent
|
||||
info = self.info()
|
||||
agent_context = f"""Agent: {info.name}
|
||||
Goal: {info.goal_name}
|
||||
Description: {info.goal_description}
|
||||
|
||||
What this agent does:
|
||||
{info.description}
|
||||
|
||||
Nodes in the workflow:
|
||||
{chr(10).join(f"- {n['name']}: {n['description']}" for n in info.nodes[:5])}
|
||||
{"..." if len(info.nodes) > 5 else ""}
|
||||
"""
|
||||
|
||||
# Ask LLM to evaluate
|
||||
prompt = f"""You are evaluating whether an agent can handle a request.
|
||||
|
||||
{agent_context}
|
||||
|
||||
Request to evaluate:
|
||||
{json.dumps(request, indent=2)}
|
||||
|
||||
Evaluate how well this agent can handle this request. Consider:
|
||||
1. Does the request match what this agent is designed to do?
|
||||
2. Does the agent have the required capabilities?
|
||||
3. How confident are you in this assessment?
|
||||
|
||||
Respond with JSON only:
|
||||
{{
|
||||
"level": "best_fit" | "can_handle" | "uncertain" | "cannot_handle",
|
||||
"confidence": 0.0 to 1.0,
|
||||
"reasoning": "Brief explanation",
|
||||
"estimated_steps": number or null
|
||||
}}"""
|
||||
|
||||
try:
|
||||
response = await eval_llm.acomplete(
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
system="You are a capability evaluator. Respond with JSON only.",
|
||||
max_tokens=256,
|
||||
)
|
||||
|
||||
# Parse response
|
||||
import re
|
||||
|
||||
json_match = re.search(r"\{[^{}]*\}", response.content, re.DOTALL)
|
||||
if json_match:
|
||||
data = json.loads(json_match.group())
|
||||
level_map = {
|
||||
"best_fit": CapabilityLevel.BEST_FIT,
|
||||
"can_handle": CapabilityLevel.CAN_HANDLE,
|
||||
"uncertain": CapabilityLevel.UNCERTAIN,
|
||||
"cannot_handle": CapabilityLevel.CANNOT_HANDLE,
|
||||
}
|
||||
return CapabilityResponse(
|
||||
agent_name=info.name,
|
||||
level=level_map.get(data.get("level", "uncertain"), CapabilityLevel.UNCERTAIN),
|
||||
confidence=float(data.get("confidence", 0.5)),
|
||||
reasoning=data.get("reasoning", ""),
|
||||
estimated_steps=data.get("estimated_steps"),
|
||||
)
|
||||
except Exception:
|
||||
# Fall back to keyword matching on error
|
||||
pass
|
||||
|
||||
return self._keyword_capability_check(request)
|
||||
|
||||
def _keyword_capability_check(self, request: dict) -> "CapabilityResponse":
|
||||
"""Simple keyword-based capability check (fallback when no LLM)."""
|
||||
from framework.runner.protocol import CapabilityLevel, CapabilityResponse
|
||||
|
||||
info = self.info()
|
||||
request_str = json.dumps(request).lower()
|
||||
description_lower = info.description.lower()
|
||||
goal_lower = info.goal_description.lower()
|
||||
|
||||
# Check for keyword matches
|
||||
matches = 0
|
||||
keywords = request_str.split()
|
||||
for keyword in keywords:
|
||||
if len(keyword) > 3: # Skip short words
|
||||
if keyword in description_lower or keyword in goal_lower:
|
||||
matches += 1
|
||||
|
||||
# Determine level based on matches
|
||||
match_ratio = matches / max(len(keywords), 1)
|
||||
if match_ratio > 0.3:
|
||||
level = CapabilityLevel.CAN_HANDLE
|
||||
confidence = min(0.7, match_ratio + 0.3)
|
||||
elif match_ratio > 0.1:
|
||||
level = CapabilityLevel.UNCERTAIN
|
||||
confidence = 0.4
|
||||
else:
|
||||
level = CapabilityLevel.CANNOT_HANDLE
|
||||
confidence = 0.6
|
||||
|
||||
return CapabilityResponse(
|
||||
agent_name=info.name,
|
||||
level=level,
|
||||
confidence=confidence,
|
||||
reasoning=f"Keyword match ratio: {match_ratio:.2f}",
|
||||
estimated_steps=info.node_count if level != CapabilityLevel.CANNOT_HANDLE else None,
|
||||
)
|
||||
|
||||
async def receive_message(self, message: "AgentMessage") -> "AgentMessage":
|
||||
"""
|
||||
Handle a message from the orchestrator or another agent.
|
||||
|
||||
Args:
|
||||
message: The incoming message
|
||||
|
||||
Returns:
|
||||
Response message
|
||||
"""
|
||||
from framework.runner.protocol import MessageType
|
||||
|
||||
info = self.info()
|
||||
|
||||
# Handle capability check
|
||||
if message.type == MessageType.CAPABILITY_CHECK:
|
||||
capability = await self.can_handle(message.content)
|
||||
return message.reply(
|
||||
from_agent=info.name,
|
||||
content={
|
||||
"level": capability.level.value,
|
||||
"confidence": capability.confidence,
|
||||
"reasoning": capability.reasoning,
|
||||
"estimated_steps": capability.estimated_steps,
|
||||
},
|
||||
type=MessageType.CAPABILITY_RESPONSE,
|
||||
)
|
||||
|
||||
# Handle request - run the agent
|
||||
if message.type == MessageType.REQUEST:
|
||||
result = await self.run(message.content)
|
||||
return message.reply(
|
||||
from_agent=info.name,
|
||||
content={
|
||||
"success": result.success,
|
||||
"output": result.output,
|
||||
"path": result.path,
|
||||
"error": result.error,
|
||||
},
|
||||
type=MessageType.RESPONSE,
|
||||
)
|
||||
|
||||
# Handle handoff - another agent is passing work
|
||||
if message.type == MessageType.HANDOFF:
|
||||
# Extract context from handoff and run
|
||||
context = message.content.get("context", {})
|
||||
context["_handoff_from"] = message.from_agent
|
||||
context["_handoff_reason"] = message.content.get("reason", "")
|
||||
result = await self.run(context)
|
||||
return message.reply(
|
||||
from_agent=info.name,
|
||||
content={
|
||||
"success": result.success,
|
||||
"output": result.output,
|
||||
"handoff_handled": True,
|
||||
},
|
||||
type=MessageType.RESPONSE,
|
||||
)
|
||||
|
||||
# Unknown message type
|
||||
return message.reply(
|
||||
from_agent=info.name,
|
||||
content={"error": f"Unknown message type: {message.type}"},
|
||||
type=MessageType.RESPONSE,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
async def setup_as_secondary(
|
||||
cls,
|
||||
agent_path: str | Path,
|
||||
runtime: AgentRuntime,
|
||||
graph_id: str | None = None,
|
||||
) -> str:
|
||||
"""Load an agent and register it as a secondary graph on *runtime*.
|
||||
|
||||
Uses :meth:`AgentRunner.load` to parse the agent, then calls
|
||||
:meth:`AgentRuntime.add_graph` with the extracted graph, goal,
|
||||
and entry points.
|
||||
|
||||
Args:
|
||||
agent_path: Path to the agent directory
|
||||
runtime: The running AgentRuntime to attach to
|
||||
graph_id: Optional graph identifier (defaults to directory name)
|
||||
|
||||
Returns:
|
||||
The graph_id used for registration
|
||||
"""
|
||||
agent_path = Path(agent_path)
|
||||
runner = cls.load(agent_path)
|
||||
gid = graph_id or agent_path.name
|
||||
|
||||
# Build entry points
|
||||
entry_points: dict[str, EntryPointSpec] = {}
|
||||
if runner.graph.entry_node:
|
||||
entry_points["default"] = EntryPointSpec(
|
||||
id="default",
|
||||
name="Default",
|
||||
entry_node=runner.graph.entry_node,
|
||||
trigger_type="manual",
|
||||
isolation_level="shared",
|
||||
)
|
||||
await runtime.add_graph(
|
||||
graph_id=gid,
|
||||
graph=runner.graph,
|
||||
goal=runner.goal,
|
||||
entry_points=entry_points,
|
||||
)
|
||||
return gid
|
||||
|
||||
def cleanup(self) -> None:
|
||||
"""Clean up resources (synchronous)."""
|
||||
# Clean up MCP client connections
|
||||
|
||||
@@ -48,7 +48,7 @@ class ToolRegistry:
|
||||
# Framework-internal context keys injected into tool calls.
|
||||
# Stripped from LLM-facing schemas (the LLM doesn't know these values)
|
||||
# and auto-injected at call time for tools that accept them.
|
||||
CONTEXT_PARAMS = frozenset({"workspace_id", "agent_id", "session_id", "data_dir"})
|
||||
CONTEXT_PARAMS = frozenset({"agent_id", "data_dir"})
|
||||
|
||||
# Credential directory used for change detection
|
||||
_CREDENTIAL_DIR = Path("~/.hive/credentials/credentials").expanduser()
|
||||
|
||||
@@ -22,7 +22,7 @@ Every event shares a common envelope:
|
||||
The identity tuple `(graph_id, stream_id, node_id, execution_id)` uniquely locates any event:
|
||||
|
||||
- **`graph_id`** — Which graph produced the event. Set automatically by `GraphScopedEventBus` (a subclass that stamps `graph_id` on every `publish()` call). Values: `"worker"`, `"judge"`, `"queen"`, or the graph spec ID.
|
||||
- **`stream_id`** — Which entry point / pipeline. Corresponds to `EntryPointSpec.id` in the graph definition. For single-entry-point graphs, this equals the entry point name (e.g. `"default"`, `"health_check"`, `"ticket_receiver"`).
|
||||
- **`stream_id`** — Which entry point / pipeline. Corresponds to `EntryPointSpec.id` in the graph definition. For single-entry-point graphs, this equals the entry point name (e.g. `"default"`, `"health_check"`).
|
||||
- **`node_id`** — Which specific node emitted the event. For `EventLoopNode` events, this is the node spec ID.
|
||||
- **`execution_id`** — UUID identifying a specific execution run. Multiple concurrent executions of the same entry point each get a unique `execution_id`.
|
||||
|
||||
@@ -198,7 +198,7 @@ A tool call has finished executing.
|
||||
|
||||
## Client I/O
|
||||
|
||||
These events are emitted only by nodes with `client_facing=True`. They drive the TUI's chat interface.
|
||||
These events are emitted by the queen's interactive turns. They drive the TUI's chat interface.
|
||||
|
||||
### `client_output_delta`
|
||||
|
||||
@@ -209,7 +209,7 @@ Incremental text output meant for the human operator.
|
||||
| `content` | `str` | New text chunk (delta) |
|
||||
| `snapshot` | `str` | Full accumulated text so far |
|
||||
|
||||
**Emitted by:** `EventLoopNode._publish_text_delta()` when `client_facing=True`
|
||||
**Emitted by:** `EventLoopNode._publish_text_delta()` for queen/user-facing output
|
||||
|
||||
---
|
||||
|
||||
@@ -356,11 +356,11 @@ Not currently emitted — reserved for future use when `NodeConversation` compac
|
||||
|
||||
### `state_changed`
|
||||
|
||||
A shared memory key has been modified.
|
||||
A shared buffer key has been modified.
|
||||
|
||||
| Data Field | Type | Description |
|
||||
| ----------- | ----- | ---------------------------------- |
|
||||
| `key` | `str` | Memory key that changed |
|
||||
| `key` | `str` | Buffer key that changed |
|
||||
| `old_value` | `Any` | Previous value |
|
||||
| `new_value` | `Any` | New value |
|
||||
| `scope` | `str` | Scope of the change |
|
||||
@@ -452,60 +452,6 @@ An agent has requested handoff to the Hive Coder (via the `escalate` synthetic t
|
||||
|
||||
---
|
||||
|
||||
## Worker Health Monitoring
|
||||
|
||||
These events form the **queen → operator** escalation pipeline.
|
||||
|
||||
### `worker_escalation_ticket`
|
||||
|
||||
A worker degradation pattern has been detected and is being escalated to the Queen.
|
||||
|
||||
| Data Field | Type | Description |
|
||||
| ---------- | ------ | ------------------------------------ |
|
||||
| `ticket` | `dict` | Full `EscalationTicket` (see below) |
|
||||
|
||||
**Emitted by:** `emit_escalation_ticket` tool (in `worker_monitoring_tools.py`)
|
||||
|
||||
#### EscalationTicket Schema
|
||||
|
||||
| Field | Type | Description |
|
||||
| ------------------------- | ------------------ | -------------------------------------------------------- |
|
||||
| `ticket_id` | `str` | Auto-generated UUID |
|
||||
| `created_at` | `str` | ISO timestamp |
|
||||
| `worker_agent_id` | `str` | Which worker agent |
|
||||
| `worker_session_id` | `str` | Which session |
|
||||
| `worker_node_id` | `str` | Which node is struggling |
|
||||
| `worker_graph_id` | `str` | Which graph |
|
||||
| `severity` | `str` | `"low"`, `"medium"`, `"high"`, or `"critical"` |
|
||||
| `cause` | `str` | Human-readable problem description |
|
||||
| `judge_reasoning` | `str` | Judge's deliberation chain |
|
||||
| `suggested_action` | `str` | e.g. `"Restart node"`, `"Human review"`, `"Kill session"`|
|
||||
| `recent_verdicts` | `list[str]` | e.g. `["RETRY", "RETRY", "CONTINUE", "RETRY"]` |
|
||||
| `total_steps_checked` | `int` | Steps the judge inspected |
|
||||
| `steps_since_last_accept` | `int` | Consecutive non-ACCEPT steps |
|
||||
| `stall_minutes` | `float \| null` | Minutes since last activity (null if active) |
|
||||
| `evidence_snippet` | `str` | Excerpt from recent LLM output |
|
||||
|
||||
---
|
||||
|
||||
### `queen_intervention_requested`
|
||||
|
||||
The Queen has triaged an escalation ticket and decided the human operator should be involved.
|
||||
|
||||
| Data Field | Type | Description |
|
||||
| ----------------- | ----- | ---------------------------------------------------- |
|
||||
| `ticket_id` | `str` | From the original `EscalationTicket` |
|
||||
| `analysis` | `str` | Queen's 2–3 sentence analysis |
|
||||
| `severity` | `str` | `"low"`, `"medium"`, `"high"`, or `"critical"` |
|
||||
| `queen_graph_id` | `str` | Queen's graph ID (for TUI navigation) |
|
||||
| `queen_stream_id` | `str` | Queen's stream ID |
|
||||
|
||||
**Emitted by:** `notify_operator` tool (in `worker_monitoring_tools.py`)
|
||||
|
||||
The TUI subscribes to this event and shows a non-disruptive notification. The worker continues running.
|
||||
|
||||
---
|
||||
|
||||
## Custom Events
|
||||
|
||||
### `custom`
|
||||
|
||||
@@ -33,7 +33,7 @@ Single-entry agents get a `"default"` entry point automatically. There is no sep
|
||||
| `ExecutionStream` | `runtime/execution_stream.py` | Per-entry-point execution queue, session persistence |
|
||||
| `GraphExecutor` | `graph/executor.py` | Node traversal, tool dispatch, checkpointing |
|
||||
| `EventBus` | `runtime/event_bus.py` | Pub/sub for execution events (streaming, I/O) |
|
||||
| `SharedStateManager` | `runtime/shared_state.py` | Cross-stream state with isolation levels |
|
||||
| `SharedBufferManager` | `runtime/shared_state.py` | Cross-stream state with isolation levels |
|
||||
| `OutcomeAggregator` | `runtime/outcome_aggregator.py` | Goal progress tracking across streams |
|
||||
| `SessionStore` | `storage/session_store.py` | Session state persistence (`sessions/{id}/state.json`) |
|
||||
|
||||
@@ -55,7 +55,6 @@ result = await runner.run({"query": "continue"}, session_state=saved_state)
|
||||
await runner.start() # Start the runtime
|
||||
await runner.stop() # Stop the runtime
|
||||
exec_id = await runner.trigger("default", {}) # Non-blocking trigger
|
||||
progress = await runner.get_goal_progress() # Goal evaluation
|
||||
entry_points = runner.get_entry_points() # List entry points
|
||||
|
||||
# Context manager
|
||||
@@ -109,7 +108,7 @@ runtime.unsubscribe_from_events(sub_id)
|
||||
# Inspection
|
||||
runtime.is_running # bool
|
||||
runtime.event_bus # EventBus
|
||||
runtime.state_manager # SharedStateManager
|
||||
runtime.state_manager # SharedBufferManager
|
||||
runtime.get_stats() # Runtime statistics
|
||||
```
|
||||
|
||||
|
||||
@@ -1,840 +0,0 @@
|
||||
# Resumable Sessions Design
|
||||
|
||||
## Problem Statement
|
||||
|
||||
Currently, when an agent encounters a failure during execution (e.g., credential validation, API errors, tool failures), the entire session is lost. This creates a poor user experience, especially when:
|
||||
|
||||
1. The agent has completed significant work before the failure
|
||||
2. The failure is recoverable (e.g., adding missing credentials)
|
||||
3. The user wants to retry from the exact failure point without redoing work
|
||||
|
||||
## Design Goals
|
||||
|
||||
1. **Crash Recovery**: Sessions can resume after process crashes or errors
|
||||
2. **Partial Completion**: Preserve work done by nodes that completed successfully
|
||||
3. **Flexible Resume Points**: Resume from exact failure point or previous checkpoints
|
||||
4. **State Consistency**: Guarantee consistent SharedMemory and conversation state
|
||||
5. **Minimal Overhead**: Checkpointing shouldn't significantly impact performance
|
||||
6. **User Control**: Users can inspect, modify, and resume sessions explicitly
|
||||
|
||||
## Architecture
|
||||
|
||||
### 1. Checkpoint System
|
||||
|
||||
#### Checkpoint Types
|
||||
|
||||
**Automatic Checkpoints** (saved automatically by framework):
|
||||
- `node_start`: Before each node begins execution
|
||||
- `node_complete`: After each node successfully completes
|
||||
- `edge_transition`: Before traversing to next node
|
||||
- `loop_iteration`: At each iteration in EventLoopNode (optional)
|
||||
|
||||
**Manual Checkpoints** (triggered by agent designer):
|
||||
- `safe_point`: Explicitly marked safe points in graph
|
||||
- `user_checkpoint`: Before awaiting user input in client-facing nodes
|
||||
|
||||
#### Checkpoint Data Structure
|
||||
|
||||
```python
|
||||
@dataclass
|
||||
class Checkpoint:
|
||||
"""Single checkpoint in execution timeline."""
|
||||
|
||||
# Identity
|
||||
checkpoint_id: str # Format: checkpoint_{timestamp}_{uuid_short}
|
||||
session_id: str
|
||||
checkpoint_type: str # "node_start", "node_complete", etc.
|
||||
|
||||
# Timestamps
|
||||
created_at: str # ISO 8601
|
||||
|
||||
# Execution state
|
||||
current_node: str | None
|
||||
next_node: str | None # For edge_transition checkpoints
|
||||
execution_path: list[str] # Nodes executed so far
|
||||
|
||||
# Memory state (snapshot)
|
||||
shared_memory: dict[str, Any] # Full SharedMemory._data
|
||||
|
||||
# Per-node conversation state references
|
||||
# (actual conversations stored separately, reference by node_id)
|
||||
conversation_states: dict[str, str] # {node_id: conversation_checkpoint_id}
|
||||
|
||||
# Output accumulator state
|
||||
accumulated_outputs: dict[str, Any]
|
||||
|
||||
# Execution metrics (for resuming quality tracking)
|
||||
metrics_snapshot: dict[str, Any]
|
||||
|
||||
# Metadata
|
||||
is_clean: bool # True if no failures/retries before this checkpoint
|
||||
can_resume_from: bool # False if checkpoint is in unstable state
|
||||
description: str # Human-readable checkpoint description
|
||||
```
|
||||
|
||||
#### Storage Structure
|
||||
|
||||
```
|
||||
~/.hive/agents/{agent_name}/
|
||||
└── sessions/
|
||||
└── session_YYYYMMDD_HHMMSS_{uuid}/
|
||||
├── state.json # Session state (existing)
|
||||
├── checkpoints/
|
||||
│ ├── index.json # Checkpoint index/manifest
|
||||
│ ├── checkpoint_1.json # Individual checkpoints
|
||||
│ ├── checkpoint_2.json
|
||||
│ └── checkpoint_N.json
|
||||
├── conversations/ # Flat conversation state (parts carry phase_id)
|
||||
│ ├── meta.json # Current node config
|
||||
│ ├── cursor.json # Iteration, outputs, stall state
|
||||
│ └── parts/ # Sequential message files
|
||||
├── data/ # Spillover artifacts (existing)
|
||||
└── logs/ # L1/L2/L3 logs (existing)
|
||||
```
|
||||
|
||||
**Checkpoint Index Format** (`checkpoints/index.json`):
|
||||
```json
|
||||
{
|
||||
"session_id": "session_20260208_143022_abc12345",
|
||||
"checkpoints": [
|
||||
{
|
||||
"checkpoint_id": "checkpoint_20260208_143030_xyz123",
|
||||
"type": "node_complete",
|
||||
"created_at": "2026-02-08T14:30:30.123Z",
|
||||
"current_node": "collector",
|
||||
"is_clean": true,
|
||||
"can_resume_from": true,
|
||||
"description": "Completed collector node successfully"
|
||||
},
|
||||
{
|
||||
"checkpoint_id": "checkpoint_20260208_143045_abc789",
|
||||
"type": "node_start",
|
||||
"created_at": "2026-02-08T14:30:45.456Z",
|
||||
"current_node": "analyzer",
|
||||
"is_clean": true,
|
||||
"can_resume_from": true,
|
||||
"description": "Starting analyzer node"
|
||||
}
|
||||
],
|
||||
"latest_checkpoint_id": "checkpoint_20260208_143045_abc789",
|
||||
"total_checkpoints": 2
|
||||
}
|
||||
```
|
||||
|
||||
### 2. Resume Mechanism
|
||||
|
||||
#### Resume Flow
|
||||
|
||||
```python
|
||||
# High-level resume flow
|
||||
async def resume_session(
|
||||
session_id: str,
|
||||
checkpoint_id: str | None = None, # None = resume from latest
|
||||
modifications: dict[str, Any] | None = None, # Override memory values
|
||||
) -> ExecutionResult:
|
||||
"""
|
||||
Resume a session from a checkpoint.
|
||||
|
||||
Args:
|
||||
session_id: Session to resume
|
||||
checkpoint_id: Specific checkpoint (None = latest)
|
||||
modifications: Optional memory/state modifications before resume
|
||||
|
||||
Returns:
|
||||
ExecutionResult with resumed execution
|
||||
"""
|
||||
# 1. Load session state
|
||||
session_state = await session_store.read_state(session_id)
|
||||
|
||||
# 2. Verify session is resumable
|
||||
if not session_state.is_resumable:
|
||||
raise ValueError(f"Session {session_id} is not resumable")
|
||||
|
||||
# 3. Load checkpoint
|
||||
checkpoint = await checkpoint_store.load_checkpoint(
|
||||
session_id,
|
||||
checkpoint_id or session_state.progress.resume_from
|
||||
)
|
||||
|
||||
# 4. Restore state
|
||||
# - Restore SharedMemory from checkpoint.shared_memory
|
||||
# - Restore per-node conversations from checkpoint.conversation_states
|
||||
# - Restore output accumulator from checkpoint.accumulated_outputs
|
||||
# - Apply modifications if provided
|
||||
|
||||
# 5. Resume execution from checkpoint.next_node or checkpoint.current_node
|
||||
result = await executor.execute(
|
||||
graph=graph,
|
||||
goal=goal,
|
||||
memory=restored_memory,
|
||||
entry_point=checkpoint.next_node or checkpoint.current_node,
|
||||
session_state=restored_session_state,
|
||||
)
|
||||
|
||||
# 6. Update session state with resumed execution
|
||||
await session_store.write_state(session_id, updated_state)
|
||||
|
||||
return result
|
||||
```
|
||||
|
||||
#### Checkpoint Restoration
|
||||
|
||||
```python
|
||||
@dataclass
|
||||
class CheckpointStore:
|
||||
"""Manages checkpoint storage and retrieval."""
|
||||
|
||||
async def save_checkpoint(
|
||||
self,
|
||||
session_id: str,
|
||||
checkpoint: Checkpoint,
|
||||
) -> None:
|
||||
"""Save a checkpoint atomically."""
|
||||
# 1. Write checkpoint file: checkpoints/checkpoint_{id}.json
|
||||
# 2. Update index: checkpoints/index.json
|
||||
# 3. Use atomic write for crash safety
|
||||
|
||||
async def load_checkpoint(
|
||||
self,
|
||||
session_id: str,
|
||||
checkpoint_id: str | None = None,
|
||||
) -> Checkpoint | None:
|
||||
"""Load a checkpoint by ID or latest."""
|
||||
# 1. Read checkpoint index
|
||||
# 2. Find checkpoint by ID (or latest if None)
|
||||
# 3. Load and deserialize checkpoint file
|
||||
|
||||
async def list_checkpoints(
|
||||
self,
|
||||
session_id: str,
|
||||
checkpoint_type: str | None = None,
|
||||
is_clean: bool | None = None,
|
||||
) -> list[Checkpoint]:
|
||||
"""List all checkpoints for a session with optional filters."""
|
||||
|
||||
async def delete_checkpoint(
|
||||
self,
|
||||
session_id: str,
|
||||
checkpoint_id: str,
|
||||
) -> bool:
|
||||
"""Delete a specific checkpoint."""
|
||||
|
||||
async def prune_checkpoints(
|
||||
self,
|
||||
session_id: str,
|
||||
keep_count: int = 10,
|
||||
keep_clean_only: bool = False,
|
||||
) -> int:
|
||||
"""Prune old checkpoints, keeping most recent N."""
|
||||
```
|
||||
|
||||
### 3. GraphExecutor Integration
|
||||
|
||||
#### Modified Execution Loop
|
||||
|
||||
```python
|
||||
# In GraphExecutor.execute()
|
||||
|
||||
async def execute(
|
||||
self,
|
||||
graph: GraphSpec,
|
||||
goal: Goal,
|
||||
memory: SharedMemory | None = None,
|
||||
entry_point: str = "start",
|
||||
session_state: dict[str, Any] | None = None,
|
||||
checkpoint_config: CheckpointConfig | None = None,
|
||||
) -> ExecutionResult:
|
||||
"""
|
||||
Execute graph with checkpointing support.
|
||||
|
||||
New parameters:
|
||||
checkpoint_config: Configuration for checkpointing behavior
|
||||
"""
|
||||
|
||||
# Initialize checkpoint store
|
||||
checkpoint_store = CheckpointStore(storage_path / "checkpoints")
|
||||
|
||||
# Restore from checkpoint if session_state indicates resume
|
||||
if session_state and session_state.get("resume_from"):
|
||||
checkpoint = await checkpoint_store.load_checkpoint(
|
||||
session_id,
|
||||
session_state["resume_from"]
|
||||
)
|
||||
memory = self._restore_memory_from_checkpoint(checkpoint)
|
||||
entry_point = checkpoint.next_node or checkpoint.current_node
|
||||
|
||||
current_node = entry_point
|
||||
|
||||
while current_node:
|
||||
# CHECKPOINT: node_start
|
||||
if checkpoint_config and checkpoint_config.checkpoint_on_node_start:
|
||||
await self._save_checkpoint(
|
||||
checkpoint_store,
|
||||
checkpoint_type="node_start",
|
||||
current_node=current_node,
|
||||
memory=memory,
|
||||
# ... other state
|
||||
)
|
||||
|
||||
try:
|
||||
# Execute node
|
||||
result = await self._execute_node(current_node, memory, context)
|
||||
|
||||
# CHECKPOINT: node_complete
|
||||
if checkpoint_config and checkpoint_config.checkpoint_on_node_complete:
|
||||
await self._save_checkpoint(
|
||||
checkpoint_store,
|
||||
checkpoint_type="node_complete",
|
||||
current_node=current_node,
|
||||
memory=memory,
|
||||
# ... other state
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
# On failure, mark current checkpoint as resume point
|
||||
await self._mark_failure_checkpoint(
|
||||
checkpoint_store,
|
||||
current_node=current_node,
|
||||
error=str(e),
|
||||
)
|
||||
raise
|
||||
|
||||
# Find next edge
|
||||
next_node = self._find_next_node(current_node, result, memory)
|
||||
|
||||
# CHECKPOINT: edge_transition
|
||||
if next_node and checkpoint_config and checkpoint_config.checkpoint_on_edge:
|
||||
await self._save_checkpoint(
|
||||
checkpoint_store,
|
||||
checkpoint_type="edge_transition",
|
||||
current_node=current_node,
|
||||
next_node=next_node,
|
||||
memory=memory,
|
||||
# ... other state
|
||||
)
|
||||
|
||||
current_node = next_node
|
||||
```
|
||||
|
||||
### 4. EventLoopNode Integration
|
||||
|
||||
#### Conversation State Checkpointing
|
||||
|
||||
EventLoopNode already has conversation persistence via `ConversationStore`. For resumability:
|
||||
|
||||
```python
|
||||
class EventLoopNode:
|
||||
async def execute(self, ctx: NodeContext) -> NodeResult:
|
||||
"""Execute with checkpoint support."""
|
||||
|
||||
# Try to restore from checkpoint
|
||||
if ctx.checkpoint_id:
|
||||
conversation = await self._restore_conversation(ctx.checkpoint_id)
|
||||
output_accumulator = await OutputAccumulator.restore(self.store)
|
||||
else:
|
||||
# Fresh start
|
||||
conversation = await self._initialize_conversation(ctx)
|
||||
output_accumulator = OutputAccumulator(store=self.store)
|
||||
|
||||
# Event loop with periodic checkpointing
|
||||
iteration = 0
|
||||
while iteration < self.config.max_iterations:
|
||||
|
||||
# Optional: checkpoint every N iterations
|
||||
if self.config.checkpoint_every_n_iterations:
|
||||
if iteration % self.config.checkpoint_every_n_iterations == 0:
|
||||
await self._save_loop_checkpoint(
|
||||
conversation,
|
||||
output_accumulator,
|
||||
iteration,
|
||||
)
|
||||
|
||||
# ... rest of event loop
|
||||
|
||||
iteration += 1
|
||||
```
|
||||
|
||||
**Note**: EventLoopNode conversation state is already persisted to disk after each turn via `ConversationStore`, so it's naturally resumable. We just need to:
|
||||
1. Track which conversation checkpoint to restore from
|
||||
2. Ensure output accumulator state is also restored
|
||||
|
||||
### 5. User-Facing API
|
||||
|
||||
#### MCP Tools for Resume
|
||||
|
||||
```python
|
||||
# In tools/src/aden_tools/tools/session_management/
|
||||
|
||||
@tool
|
||||
async def list_resumable_sessions(
|
||||
agent_work_dir: str,
|
||||
status: str = "failed", # "failed", "paused", "cancelled"
|
||||
limit: int = 20,
|
||||
) -> dict:
|
||||
"""
|
||||
List sessions that can be resumed.
|
||||
|
||||
Returns:
|
||||
{
|
||||
"sessions": [
|
||||
{
|
||||
"session_id": "session_20260208_143022_abc12345",
|
||||
"status": "failed",
|
||||
"error": "Missing API key: OPENAI_API_KEY",
|
||||
"failed_at_node": "analyzer",
|
||||
"last_checkpoint": "checkpoint_20260208_143045_abc789",
|
||||
"created_at": "2026-02-08T14:30:22Z",
|
||||
"updated_at": "2026-02-08T14:30:45Z"
|
||||
}
|
||||
],
|
||||
"total": 1
|
||||
}
|
||||
"""
|
||||
|
||||
@tool
|
||||
async def list_session_checkpoints(
|
||||
agent_work_dir: str,
|
||||
session_id: str,
|
||||
checkpoint_type: str = "", # Filter by type
|
||||
clean_only: bool = False, # Only show clean checkpoints
|
||||
) -> dict:
|
||||
"""
|
||||
List all checkpoints for a session.
|
||||
|
||||
Returns:
|
||||
{
|
||||
"session_id": "session_20260208_143022_abc12345",
|
||||
"checkpoints": [
|
||||
{
|
||||
"checkpoint_id": "checkpoint_20260208_143030_xyz123",
|
||||
"type": "node_complete",
|
||||
"created_at": "2026-02-08T14:30:30Z",
|
||||
"current_node": "collector",
|
||||
"is_clean": true,
|
||||
"can_resume_from": true,
|
||||
"description": "Completed collector node successfully"
|
||||
},
|
||||
...
|
||||
]
|
||||
}
|
||||
"""
|
||||
|
||||
@tool
|
||||
async def inspect_checkpoint(
|
||||
agent_work_dir: str,
|
||||
session_id: str,
|
||||
checkpoint_id: str,
|
||||
include_memory: bool = False, # Include full memory state
|
||||
) -> dict:
|
||||
"""
|
||||
Inspect a checkpoint's detailed state.
|
||||
|
||||
Returns:
|
||||
{
|
||||
"checkpoint_id": "checkpoint_20260208_143030_xyz123",
|
||||
"type": "node_complete",
|
||||
"current_node": "collector",
|
||||
"execution_path": ["start", "collector"],
|
||||
"accumulated_outputs": {
|
||||
"twitter_handles": ["@user1", "@user2"]
|
||||
},
|
||||
"memory": {...}, # If include_memory=True
|
||||
"metrics_snapshot": {
|
||||
"total_retries": 2,
|
||||
"nodes_with_failures": []
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
@tool
|
||||
async def resume_session(
|
||||
agent_work_dir: str,
|
||||
session_id: str,
|
||||
checkpoint_id: str = "", # Empty = latest checkpoint
|
||||
memory_modifications: str = "{}", # JSON string of memory overrides
|
||||
) -> dict:
|
||||
"""
|
||||
Resume a session from a checkpoint.
|
||||
|
||||
Args:
|
||||
agent_work_dir: Path to agent workspace
|
||||
session_id: Session to resume
|
||||
checkpoint_id: Specific checkpoint (empty = latest)
|
||||
memory_modifications: JSON object with memory key overrides
|
||||
|
||||
Returns:
|
||||
{
|
||||
"session_id": "session_20260208_143022_abc12345",
|
||||
"resumed_from": "checkpoint_20260208_143045_abc789",
|
||||
"status": "active", # Now actively running
|
||||
"message": "Session resumed successfully from checkpoint_20260208_143045_abc789"
|
||||
}
|
||||
"""
|
||||
```
|
||||
|
||||
#### CLI Commands
|
||||
|
||||
```bash
|
||||
# List resumable sessions
|
||||
hive sessions list --agent deep_research_agent --status failed
|
||||
|
||||
# Show checkpoints for a session
|
||||
hive sessions checkpoints session_20260208_143022_abc12345
|
||||
|
||||
# Inspect a checkpoint
|
||||
hive sessions inspect session_20260208_143022_abc12345 checkpoint_20260208_143045_abc789
|
||||
|
||||
# Resume a session
|
||||
hive sessions resume session_20260208_143022_abc12345
|
||||
|
||||
# Resume from specific checkpoint
|
||||
hive sessions resume session_20260208_143022_abc12345 --checkpoint checkpoint_20260208_143030_xyz123
|
||||
|
||||
# Resume with memory modifications (e.g., after adding credentials)
|
||||
hive sessions resume session_20260208_143022_abc12345 --set api_key=sk-...
|
||||
```
|
||||
|
||||
### 6. Configuration
|
||||
|
||||
#### CheckpointConfig
|
||||
|
||||
```python
|
||||
@dataclass
|
||||
class CheckpointConfig:
|
||||
"""Configuration for checkpoint behavior."""
|
||||
|
||||
# When to checkpoint
|
||||
checkpoint_on_node_start: bool = True
|
||||
checkpoint_on_node_complete: bool = True
|
||||
checkpoint_on_edge: bool = False # Usually redundant with node_start
|
||||
checkpoint_on_loop_iteration: bool = False # Can be expensive
|
||||
checkpoint_every_n_iterations: int = 0 # 0 = disabled
|
||||
|
||||
# Pruning
|
||||
max_checkpoints_per_session: int = 100
|
||||
prune_after_node_count: int = 10 # Prune every N nodes
|
||||
keep_clean_checkpoints_only: bool = False
|
||||
|
||||
# Performance
|
||||
async_checkpoint: bool = True # Don't block execution on checkpoint writes
|
||||
|
||||
# What to include
|
||||
include_conversation_snapshots: bool = True
|
||||
include_full_memory: bool = True
|
||||
```
|
||||
|
||||
#### Agent-Level Configuration
|
||||
|
||||
```python
|
||||
# In agent.py or config.py
|
||||
|
||||
class MyAgent(Agent):
|
||||
def get_checkpoint_config(self) -> CheckpointConfig:
|
||||
"""Override to customize checkpoint behavior."""
|
||||
return CheckpointConfig(
|
||||
checkpoint_on_node_start=True,
|
||||
checkpoint_on_node_complete=True,
|
||||
checkpoint_every_n_iterations=5, # Checkpoint every 5 iterations in loops
|
||||
max_checkpoints_per_session=50,
|
||||
)
|
||||
```
|
||||
|
||||
## Implementation Plan
|
||||
|
||||
### Phase 1: Core Checkpoint Infrastructure (Week 1)
|
||||
|
||||
1. **Create checkpoint schemas**
|
||||
- `Checkpoint` dataclass
|
||||
- `CheckpointIndex` for manifest
|
||||
- Serialization/deserialization
|
||||
|
||||
2. **Implement CheckpointStore**
|
||||
- `save_checkpoint()` with atomic writes
|
||||
- `load_checkpoint()` with deserialization
|
||||
- `list_checkpoints()` with filtering
|
||||
- `prune_checkpoints()` for cleanup
|
||||
|
||||
3. **Update SessionState schema**
|
||||
- Add `resume_from_checkpoint_id` field
|
||||
- Add `checkpoints_enabled` flag
|
||||
|
||||
### Phase 2: GraphExecutor Integration (Week 2)
|
||||
|
||||
1. **Modify GraphExecutor**
|
||||
- Add `CheckpointConfig` parameter
|
||||
- Implement checkpoint saving at node boundaries
|
||||
- Implement checkpoint restoration logic
|
||||
- Handle memory state snapshots
|
||||
|
||||
2. **Update execution loop**
|
||||
- Checkpoint before node execution
|
||||
- Checkpoint after successful completion
|
||||
- Mark failure checkpoints on errors
|
||||
|
||||
### Phase 3: EventLoopNode Integration (Week 3)
|
||||
|
||||
1. **Enhance conversation restoration**
|
||||
- Link checkpoints to conversation states
|
||||
- Ensure OutputAccumulator is checkpointed
|
||||
- Test loop resumption from middle of execution
|
||||
|
||||
2. **Add optional loop iteration checkpoints**
|
||||
- Configurable iteration frequency
|
||||
- Balance between granularity and performance
|
||||
|
||||
### Phase 4: User-Facing Features (Week 4)
|
||||
|
||||
1. **Implement MCP tools**
|
||||
- `list_resumable_sessions`
|
||||
- `list_session_checkpoints`
|
||||
- `inspect_checkpoint`
|
||||
- `resume_session`
|
||||
|
||||
2. **Add CLI commands**
|
||||
- `hive sessions list`
|
||||
- `hive sessions checkpoints`
|
||||
- `hive sessions inspect`
|
||||
- `hive sessions resume`
|
||||
|
||||
3. **Update TUI**
|
||||
- Show resumable sessions in UI
|
||||
- Allow resume from TUI interface
|
||||
|
||||
### Phase 5: Testing & Documentation (Week 5)
|
||||
|
||||
1. **Write comprehensive tests**
|
||||
- Unit tests for CheckpointStore
|
||||
- Integration tests for resume flow
|
||||
- Edge case testing (concurrent checkpoints, corruption, etc.)
|
||||
|
||||
2. **Performance testing**
|
||||
- Measure checkpoint overhead
|
||||
- Optimize async checkpoint writing
|
||||
- Test with large memory states
|
||||
|
||||
3. **Documentation**
|
||||
- Update skills with resume patterns
|
||||
- Document checkpoint configuration
|
||||
- Add troubleshooting guide
|
||||
|
||||
## Performance Considerations
|
||||
|
||||
### Checkpoint Overhead
|
||||
|
||||
**Estimated overhead per checkpoint**:
|
||||
- Memory serialization: ~5-10ms for typical state (< 1MB)
|
||||
- File I/O: ~10-20ms for atomic write
|
||||
- Total: ~15-30ms per checkpoint
|
||||
|
||||
**Mitigation strategies**:
|
||||
1. **Async checkpointing**: Don't block execution on writes
|
||||
2. **Selective checkpointing**: Only checkpoint at important boundaries
|
||||
3. **Incremental checkpoints**: Store deltas instead of full state (future)
|
||||
4. **Compression**: Compress large memory states before writing
|
||||
|
||||
### Storage Size
|
||||
|
||||
**Typical checkpoint size**:
|
||||
- Small memory state (< 100KB): ~50-100KB per checkpoint
|
||||
- Medium memory state (< 1MB): ~500KB-1MB per checkpoint
|
||||
- Large memory state (> 1MB): ~1-5MB per checkpoint
|
||||
|
||||
**Mitigation strategies**:
|
||||
1. **Pruning**: Keep only N most recent checkpoints
|
||||
2. **Clean-only retention**: Only keep checkpoints from clean execution
|
||||
3. **Compression**: Use gzip for checkpoint files
|
||||
4. **Archiving**: Move old checkpoints to archive storage
|
||||
|
||||
## Error Handling
|
||||
|
||||
### Checkpoint Save Failures
|
||||
|
||||
**Scenarios**:
|
||||
- Disk full
|
||||
- Permission errors
|
||||
- Serialization failures
|
||||
- Concurrent writes
|
||||
|
||||
**Handling**:
|
||||
```python
|
||||
try:
|
||||
await checkpoint_store.save_checkpoint(session_id, checkpoint)
|
||||
except CheckpointSaveError as e:
|
||||
# Log warning but don't fail execution
|
||||
logger.warning(f"Failed to save checkpoint: {e}")
|
||||
# Continue execution without checkpoint
|
||||
```
|
||||
|
||||
### Checkpoint Load Failures
|
||||
|
||||
**Scenarios**:
|
||||
- Checkpoint file corrupted
|
||||
- Checkpoint format incompatible
|
||||
- Referenced conversation state missing
|
||||
|
||||
**Handling**:
|
||||
```python
|
||||
try:
|
||||
checkpoint = await checkpoint_store.load_checkpoint(session_id, checkpoint_id)
|
||||
except CheckpointLoadError as e:
|
||||
# Try to find previous valid checkpoint
|
||||
checkpoints = await checkpoint_store.list_checkpoints(session_id)
|
||||
for cp in reversed(checkpoints):
|
||||
try:
|
||||
checkpoint = await checkpoint_store.load_checkpoint(session_id, cp.checkpoint_id)
|
||||
logger.info(f"Fell back to checkpoint {cp.checkpoint_id}")
|
||||
break
|
||||
except CheckpointLoadError:
|
||||
continue
|
||||
else:
|
||||
raise ValueError(f"No valid checkpoints found for session {session_id}")
|
||||
```
|
||||
|
||||
### Resume Failures
|
||||
|
||||
**Scenarios**:
|
||||
- Checkpoint state inconsistent with current graph
|
||||
- Node no longer exists in updated agent code
|
||||
- Memory keys missing required values
|
||||
|
||||
**Handling**:
|
||||
1. **Validation**: Verify checkpoint compatibility before resume
|
||||
2. **Graceful degradation**: Resume from earlier checkpoint if possible
|
||||
3. **User notification**: Clear error messages about why resume failed
|
||||
|
||||
## Migration Path
|
||||
|
||||
### Backward Compatibility
|
||||
|
||||
**Existing sessions** (without checkpoints):
|
||||
- Can still be executed normally
|
||||
- Checkpoint system is opt-in per agent
|
||||
- No breaking changes to existing APIs
|
||||
|
||||
**Enabling checkpoints**:
|
||||
```python
|
||||
# Option 1: Agent-level default
|
||||
class MyAgent(Agent):
|
||||
checkpoint_config = CheckpointConfig(
|
||||
checkpoint_on_node_complete=True,
|
||||
)
|
||||
|
||||
# Option 2: Runtime override
|
||||
runtime = create_agent_runtime(
|
||||
agent=my_agent,
|
||||
checkpoint_config=CheckpointConfig(...),
|
||||
)
|
||||
|
||||
# Option 3: Per-execution
|
||||
result = await executor.execute(
|
||||
graph=graph,
|
||||
goal=goal,
|
||||
checkpoint_config=CheckpointConfig(...),
|
||||
)
|
||||
```
|
||||
|
||||
### Gradual Rollout
|
||||
|
||||
1. **Phase 1**: Core infrastructure, no user-facing features
|
||||
2. **Phase 2**: Opt-in for specific agents via config
|
||||
3. **Phase 3**: User-facing MCP tools and CLI
|
||||
4. **Phase 4**: Enable by default for all new agents
|
||||
5. **Phase 5**: TUI integration
|
||||
|
||||
## Future Enhancements
|
||||
|
||||
### 1. Incremental Checkpoints
|
||||
|
||||
Instead of full state snapshots, store only deltas:
|
||||
```python
|
||||
@dataclass
|
||||
class IncrementalCheckpoint:
|
||||
"""Checkpoint with only changed state."""
|
||||
base_checkpoint_id: str # Parent checkpoint
|
||||
memory_delta: dict[str, Any] # Only changed keys
|
||||
added_outputs: dict[str, Any] # Only new outputs
|
||||
```
|
||||
|
||||
### 2. Distributed Checkpointing
|
||||
|
||||
For long-running agents, checkpoint to cloud storage:
|
||||
```python
|
||||
checkpoint_config = CheckpointConfig(
|
||||
storage_backend="s3", # or "gcs", "azure"
|
||||
storage_url="s3://my-bucket/checkpoints/",
|
||||
)
|
||||
```
|
||||
|
||||
### 3. Checkpoint Compression
|
||||
|
||||
Compress large memory states:
|
||||
```python
|
||||
checkpoint_config = CheckpointConfig(
|
||||
compress=True,
|
||||
compression_threshold_bytes=100_000, # Compress if > 100KB
|
||||
)
|
||||
```
|
||||
|
||||
### 4. Smart Checkpoint Selection
|
||||
|
||||
Use heuristics to decide when to checkpoint:
|
||||
```python
|
||||
class SmartCheckpointStrategy:
|
||||
def should_checkpoint(self, context: ExecutionContext) -> bool:
|
||||
# Checkpoint after expensive nodes
|
||||
if context.node_latency_ms > 30_000:
|
||||
return True
|
||||
# Checkpoint before risky operations
|
||||
if context.node_id in ["api_call", "external_tool"]:
|
||||
return True
|
||||
# Checkpoint after significant memory changes
|
||||
if context.memory_delta_size > 10:
|
||||
return True
|
||||
return False
|
||||
```
|
||||
|
||||
## Security Considerations
|
||||
|
||||
### 1. Sensitive Data in Checkpoints
|
||||
|
||||
**Problem**: Checkpoints may contain sensitive data (API keys, credentials, PII)
|
||||
|
||||
**Mitigation**:
|
||||
```python
|
||||
@dataclass
|
||||
class CheckpointConfig:
|
||||
# Exclude sensitive keys from checkpoint
|
||||
exclude_memory_keys: list[str] = field(default_factory=lambda: [
|
||||
"api_key",
|
||||
"credentials",
|
||||
"access_token",
|
||||
])
|
||||
|
||||
# Encrypt checkpoint files
|
||||
encrypt_checkpoints: bool = True
|
||||
encryption_key_source: str = "keychain" # or "env_var", "file"
|
||||
```
|
||||
|
||||
### 2. Checkpoint Tampering
|
||||
|
||||
**Problem**: Malicious modification of checkpoint files
|
||||
|
||||
**Mitigation**:
|
||||
```python
|
||||
@dataclass
|
||||
class Checkpoint:
|
||||
# Add cryptographic signature
|
||||
signature: str # HMAC of checkpoint content
|
||||
|
||||
def verify_signature(self, secret_key: str) -> bool:
|
||||
"""Verify checkpoint hasn't been tampered with."""
|
||||
...
|
||||
```
|
||||
|
||||
## References
|
||||
|
||||
- [RUNTIME_LOGGING.md](./RUNTIME_LOGGING.md) - Current logging system
|
||||
- [session_state.py](../schemas/session_state.py) - Session state schema
|
||||
- [session_store.py](../storage/session_store.py) - Session storage
|
||||
- [executor.py](../graph/executor.py) - Graph executor
|
||||
- [event_loop_node.py](../graph/event_loop_node.py) - EventLoop implementation
|
||||
@@ -1,698 +0,0 @@
|
||||
# Runtime Logging System
|
||||
|
||||
## Overview
|
||||
|
||||
The Hive framework uses a **three-level observability system** for tracking agent execution at different granularities:
|
||||
|
||||
- **L1 (Summary)**: High-level run outcomes - success/failure, execution quality, attention flags
|
||||
- **L2 (Details)**: Per-node completion details - retries, verdicts, latency, attention reasons
|
||||
- **L3 (Tool Logs)**: Step-by-step execution - tool calls, LLM responses, judge feedback
|
||||
|
||||
This layered approach enables efficient debugging: start with L1 to identify problematic runs, drill into L2 to find failing nodes, and analyze L3 for root cause details.
|
||||
|
||||
---
|
||||
|
||||
## Storage Architecture
|
||||
|
||||
### Current Structure (Unified Sessions)
|
||||
|
||||
**Default since 2026-02-06**
|
||||
|
||||
```
|
||||
~/.hive/agents/{agent_name}/
|
||||
└── sessions/
|
||||
└── session_YYYYMMDD_HHMMSS_{uuid}/
|
||||
├── state.json # Session state and metadata
|
||||
├── logs/ # Runtime logs (L1/L2/L3)
|
||||
│ ├── summary.json # L1: Run outcome
|
||||
│ ├── details.jsonl # L2: Per-node results
|
||||
│ └── tool_logs.jsonl # L3: Step-by-step execution
|
||||
├── conversations/ # Flat EventLoop state (parts carry phase_id)
|
||||
└── data/ # Spillover artifacts
|
||||
```
|
||||
|
||||
**Key characteristics:**
|
||||
- All session data colocated in one directory
|
||||
- Consistent ID format: `session_YYYYMMDD_HHMMSS_{short_uuid}`
|
||||
- Logs written incrementally (JSONL for L2/L3)
|
||||
- Single source of truth: `state.json`
|
||||
|
||||
### Legacy Structure (Deprecated)
|
||||
|
||||
**Read-only for backward compatibility**
|
||||
|
||||
```
|
||||
~/.hive/agents/{agent_name}/
|
||||
├── runtime_logs/
|
||||
│ └── runs/
|
||||
│ └── {run_id}/
|
||||
│ ├── summary.json # L1
|
||||
│ ├── details.jsonl # L2
|
||||
│ └── tool_logs.jsonl # L3
|
||||
├── sessions/
|
||||
│ └── exec_{stream_id}_{uuid}/
|
||||
│ ├── conversations/
|
||||
│ └── data/
|
||||
├── runs/ # Deprecated
|
||||
│ └── run_start_*.json
|
||||
└── summaries/ # Deprecated
|
||||
└── run_start_*.json
|
||||
```
|
||||
|
||||
**Migration status:**
|
||||
- ✅ New sessions write to unified structure only
|
||||
- ✅ Old sessions remain readable
|
||||
- ❌ No new writes to `runs/`, `summaries/`, `runtime_logs/runs/`
|
||||
- ⚠️ Deprecation warnings emitted when reading old locations
|
||||
|
||||
---
|
||||
|
||||
## Components
|
||||
|
||||
### RuntimeLogger
|
||||
|
||||
**Location:** `core/framework/runtime/runtime_logger.py`
|
||||
|
||||
**Responsibilities:**
|
||||
- Receives execution events from GraphExecutor
|
||||
- Tracks per-node execution details
|
||||
- Aggregates attention flags
|
||||
- Coordinates with RuntimeLogStore
|
||||
|
||||
**Key methods:**
|
||||
```python
|
||||
def start_run(goal_id: str, session_id: str = "") -> str:
|
||||
"""Initialize a new run. Uses session_id as run_id if provided."""
|
||||
|
||||
def log_step(node_id: str, step_index: int, tool_calls: list, ...):
|
||||
"""Record one LLM step (L3). Appends to tool_logs.jsonl immediately."""
|
||||
|
||||
def log_node_complete(node_id: str, exit_status: str, ...):
|
||||
"""Record node completion (L2). Appends to details.jsonl immediately."""
|
||||
|
||||
async def end_run(status: str):
|
||||
"""Finalize run, aggregate L2→L1, write summary.json."""
|
||||
```
|
||||
|
||||
**Attention flag triggers:**
|
||||
```python
|
||||
# From runtime_logger.py:190-203
|
||||
needs_attention = any([
|
||||
retry_count > 3,
|
||||
escalate_count > 2,
|
||||
latency_ms > 60000,
|
||||
tokens_used > 100000,
|
||||
total_steps > 20,
|
||||
])
|
||||
```
|
||||
|
||||
### RuntimeLogStore
|
||||
|
||||
**Location:** `core/framework/runtime/runtime_log_store.py`
|
||||
|
||||
**Responsibilities:**
|
||||
- Manages log file I/O
|
||||
- Handles both old and new storage paths
|
||||
- Provides incremental append for L2/L3 (crash-safe)
|
||||
- Atomic writes for L1
|
||||
|
||||
**Storage path resolution:**
|
||||
```python
|
||||
def _get_run_dir(run_id: str) -> Path:
|
||||
"""Determine log directory based on run_id format.
|
||||
|
||||
- session_* → {storage_root}/sessions/{run_id}/logs/
|
||||
- Other → {base_path}/runtime_logs/runs/{run_id}/ (deprecated)
|
||||
"""
|
||||
```
|
||||
|
||||
**Key methods:**
|
||||
```python
|
||||
def ensure_run_dir(run_id: str):
|
||||
"""Create log directory immediately at start_run()."""
|
||||
|
||||
def append_step(run_id: str, step: NodeStepLog):
|
||||
"""Append L3 entry to tool_logs.jsonl. Thread-safe sync write."""
|
||||
|
||||
def append_node_detail(run_id: str, detail: NodeDetail):
|
||||
"""Append L2 entry to details.jsonl. Thread-safe sync write."""
|
||||
|
||||
async def save_summary(run_id: str, summary: RunSummaryLog):
|
||||
"""Write L1 summary.json atomically at end_run()."""
|
||||
```
|
||||
|
||||
**File format:**
|
||||
- **L1 (summary.json)**: Standard JSON, written once at end
|
||||
- **L2 (details.jsonl)**: JSONL (one object per line), appended per node
|
||||
- **L3 (tool_logs.jsonl)**: JSONL (one object per line), appended per step
|
||||
|
||||
### Runtime Log Schemas
|
||||
|
||||
**Location:** `core/framework/runtime/runtime_log_schemas.py`
|
||||
|
||||
**L1: RunSummaryLog**
|
||||
```python
|
||||
@dataclass
|
||||
class RunSummaryLog:
|
||||
run_id: str
|
||||
goal_id: str
|
||||
status: str # "success", "failure", "degraded", "in_progress"
|
||||
started_at: str # ISO 8601
|
||||
ended_at: str | None
|
||||
needs_attention: bool
|
||||
attention_summary: AttentionSummary
|
||||
total_nodes_executed: int
|
||||
nodes_with_failures: list[str]
|
||||
execution_quality: str # "clean", "degraded", "failed"
|
||||
total_latency_ms: int
|
||||
# ... additional metrics
|
||||
```
|
||||
|
||||
**L2: NodeDetail**
|
||||
```python
|
||||
@dataclass
|
||||
class NodeDetail:
|
||||
node_id: str
|
||||
exit_status: str # "success", "escalate", "no_valid_edge"
|
||||
retry_count: int
|
||||
verdict_counts: dict[str, int] # {ACCEPT: 1, RETRY: 3, ...}
|
||||
total_steps: int
|
||||
latency_ms: int
|
||||
needs_attention: bool
|
||||
attention_reasons: list[str]
|
||||
# ... tool error tracking, token counts
|
||||
```
|
||||
|
||||
**L3: NodeStepLog**
|
||||
```python
|
||||
@dataclass
|
||||
class NodeStepLog:
|
||||
node_id: str
|
||||
step_index: int
|
||||
tool_calls: list[dict]
|
||||
tool_results: list[dict]
|
||||
verdict: str # "ACCEPT", "RETRY", "ESCALATE", "CONTINUE"
|
||||
verdict_feedback: str
|
||||
llm_response_text: str
|
||||
tokens_used: int
|
||||
latency_ms: int
|
||||
# ... detailed execution state
|
||||
# Trace context (OTel-aligned; empty if observability context not set):
|
||||
trace_id: str # From set_trace_context (OTel trace)
|
||||
span_id: str # 16 hex chars per step (OTel span)
|
||||
parent_span_id: str # Optional; for nested span hierarchy
|
||||
execution_id: str # Session/run correlation id
|
||||
```
|
||||
|
||||
L3 entries include `trace_id`, `span_id`, and `execution_id` for correlation and **OpenTelemetry (OTel) compatibility**. When the framework sets trace context (e.g. via `Runtime.start_run()` or `StreamRuntime.start_run()`), these fields are populated automatically so L3 data can be exported to OTel backends without schema changes.
|
||||
|
||||
**L2: NodeDetail** also includes `trace_id` and `span_id`; **L1: RunSummaryLog** includes `trace_id` and `execution_id` for the same correlation.
|
||||
|
||||
---
|
||||
|
||||
## Querying Logs (MCP Tools)
|
||||
|
||||
### Tools Location
|
||||
|
||||
**MCP Server:** `tools/src/aden_tools/tools/runtime_logs_tool/runtime_logs_tool.py`
|
||||
|
||||
Three MCP tools provide access to the logging system:
|
||||
|
||||
### L1: query_runtime_logs
|
||||
|
||||
**Purpose:** Find problematic runs
|
||||
|
||||
```python
|
||||
query_runtime_logs(
|
||||
agent_work_dir: str, # e.g., "~/.hive/agents/deep_research_agent"
|
||||
status: str = "", # "needs_attention", "success", "failure", "degraded"
|
||||
limit: int = 20
|
||||
) -> dict # {"runs": [...], "total": int}
|
||||
```
|
||||
|
||||
**Returns:**
|
||||
```json
|
||||
{
|
||||
"runs": [
|
||||
{
|
||||
"run_id": "session_20260206_115718_e22339c5",
|
||||
"status": "degraded",
|
||||
"needs_attention": true,
|
||||
"attention_summary": {
|
||||
"total_attention_flags": 3,
|
||||
"categories": ["missing_outputs", "retry_loops"]
|
||||
},
|
||||
"started_at": "2026-02-06T11:57:18Z"
|
||||
}
|
||||
],
|
||||
"total": 1
|
||||
}
|
||||
```
|
||||
|
||||
**Common queries:**
|
||||
```python
|
||||
# Find all problematic runs
|
||||
query_runtime_logs(agent_work_dir, status="needs_attention")
|
||||
|
||||
# Get recent runs regardless of status
|
||||
query_runtime_logs(agent_work_dir, limit=10)
|
||||
|
||||
# Check for failures
|
||||
query_runtime_logs(agent_work_dir, status="failure")
|
||||
```
|
||||
|
||||
### L2: query_runtime_log_details
|
||||
|
||||
**Purpose:** Identify which nodes failed
|
||||
|
||||
```python
|
||||
query_runtime_log_details(
|
||||
agent_work_dir: str,
|
||||
run_id: str, # From L1 query
|
||||
needs_attention_only: bool = False,
|
||||
node_id: str = "" # Filter to specific node
|
||||
) -> dict # {"run_id": str, "nodes": [...]}
|
||||
```
|
||||
|
||||
**Returns:**
|
||||
```json
|
||||
{
|
||||
"run_id": "session_20260206_115718_e22339c5",
|
||||
"nodes": [
|
||||
{
|
||||
"node_id": "intake-collector",
|
||||
"exit_status": "escalate",
|
||||
"retry_count": 5,
|
||||
"verdict_counts": {"RETRY": 5, "ESCALATE": 1},
|
||||
"attention_reasons": ["high_retry_count", "missing_outputs"],
|
||||
"total_steps": 8,
|
||||
"latency_ms": 12500,
|
||||
"needs_attention": true
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
**Common queries:**
|
||||
```python
|
||||
# Get all problematic nodes
|
||||
query_runtime_log_details(agent_work_dir, run_id, needs_attention_only=True)
|
||||
|
||||
# Analyze specific node across run
|
||||
query_runtime_log_details(agent_work_dir, run_id, node_id="intake-collector")
|
||||
|
||||
# Full node breakdown
|
||||
query_runtime_log_details(agent_work_dir, run_id)
|
||||
```
|
||||
|
||||
### L3: query_runtime_log_raw
|
||||
|
||||
**Purpose:** Root cause analysis
|
||||
|
||||
```python
|
||||
query_runtime_log_raw(
|
||||
agent_work_dir: str,
|
||||
run_id: str,
|
||||
step_index: int = -1, # Specific step or -1 for all
|
||||
node_id: str = "" # Filter to specific node
|
||||
) -> dict # {"run_id": str, "steps": [...]}
|
||||
```
|
||||
|
||||
**Returns:**
|
||||
```json
|
||||
{
|
||||
"run_id": "session_20260206_115718_e22339c5",
|
||||
"steps": [
|
||||
{
|
||||
"node_id": "intake-collector",
|
||||
"step_index": 3,
|
||||
"tool_calls": [
|
||||
{
|
||||
"tool": "web_search",
|
||||
"args": {"query": "@RomuloNevesOf"}
|
||||
}
|
||||
],
|
||||
"tool_results": [
|
||||
{
|
||||
"status": "success",
|
||||
"data": "..."
|
||||
}
|
||||
],
|
||||
"verdict": "RETRY",
|
||||
"verdict_feedback": "Missing required output 'twitter_handles'. You found the handle but didn't call set_output.",
|
||||
"llm_response_text": "I found the Twitter profile...",
|
||||
"tokens_used": 1234,
|
||||
"latency_ms": 2500
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
**Common queries:**
|
||||
```python
|
||||
# All steps for a problematic node
|
||||
query_runtime_log_raw(agent_work_dir, run_id, node_id="intake-collector")
|
||||
|
||||
# Specific step analysis
|
||||
query_runtime_log_raw(agent_work_dir, run_id, step_index=5)
|
||||
|
||||
# Full execution trace
|
||||
query_runtime_log_raw(agent_work_dir, run_id)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Usage Patterns
|
||||
|
||||
### Pattern 1: Top-Down Investigation
|
||||
|
||||
**Use case:** Debug a failing agent
|
||||
|
||||
```python
|
||||
# 1. Find problematic runs (L1)
|
||||
result = query_runtime_logs(
|
||||
agent_work_dir="~/.hive/agents/deep_research_agent",
|
||||
status="needs_attention"
|
||||
)
|
||||
run_id = result["runs"][0]["run_id"]
|
||||
|
||||
# 2. Identify failing nodes (L2)
|
||||
details = query_runtime_log_details(
|
||||
agent_work_dir="~/.hive/agents/deep_research_agent",
|
||||
run_id=run_id,
|
||||
needs_attention_only=True
|
||||
)
|
||||
problem_node = details["nodes"][0]["node_id"]
|
||||
|
||||
# 3. Analyze root cause (L3)
|
||||
raw = query_runtime_log_raw(
|
||||
agent_work_dir="~/.hive/agents/deep_research_agent",
|
||||
run_id=run_id,
|
||||
node_id=problem_node
|
||||
)
|
||||
# Examine verdict_feedback, tool_results, etc.
|
||||
```
|
||||
|
||||
### Pattern 2: Node-Specific Debugging
|
||||
|
||||
**Use case:** Investigate why a specific node keeps failing
|
||||
|
||||
```python
|
||||
# Get recent runs
|
||||
runs = query_runtime_logs("~/.hive/agents/my_agent", limit=10)
|
||||
|
||||
# For each run, check specific node
|
||||
for run in runs["runs"]:
|
||||
node_details = query_runtime_log_details(
|
||||
"~/.hive/agents/my_agent",
|
||||
run["run_id"],
|
||||
node_id="problematic-node"
|
||||
)
|
||||
# Analyze retry patterns, error types
|
||||
```
|
||||
|
||||
### Pattern 3: Real-Time Monitoring
|
||||
|
||||
**Use case:** Watch for issues during development
|
||||
|
||||
```python
|
||||
import time
|
||||
|
||||
while True:
|
||||
result = query_runtime_logs(
|
||||
agent_work_dir="~/.hive/agents/my_agent",
|
||||
status="needs_attention",
|
||||
limit=1
|
||||
)
|
||||
|
||||
if result["total"] > 0:
|
||||
new_issue = result["runs"][0]
|
||||
print(f"⚠️ New issue detected: {new_issue['run_id']}")
|
||||
# Alert or drill into L2/L3
|
||||
|
||||
time.sleep(10) # Poll every 10 seconds
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Integration Points
|
||||
|
||||
### GraphExecutor → RuntimeLogger
|
||||
|
||||
**Location:** `core/framework/graph/executor.py`
|
||||
|
||||
```python
|
||||
# Executor creates logger and passes session_id
|
||||
logger = RuntimeLogger(store, agent_id)
|
||||
run_id = logger.start_run(goal_id, session_id=execution_id)
|
||||
|
||||
# During execution
|
||||
logger.log_step(node_id, step_index, tool_calls, ...)
|
||||
logger.log_node_complete(node_id, exit_status, ...)
|
||||
|
||||
# At completion
|
||||
await logger.end_run(status="success")
|
||||
```
|
||||
|
||||
### EventLoopNode → RuntimeLogger
|
||||
|
||||
**Location:** `core/framework/graph/event_loop_node.py`
|
||||
|
||||
```python
|
||||
# EventLoopNode logs each step
|
||||
self._logger.log_step(
|
||||
node_id=self.id,
|
||||
step_index=step_count,
|
||||
tool_calls=current_tool_calls,
|
||||
tool_results=current_tool_results,
|
||||
verdict=verdict,
|
||||
verdict_feedback=feedback,
|
||||
...
|
||||
)
|
||||
```
|
||||
|
||||
### AgentRuntime → RuntimeLogger
|
||||
|
||||
**Location:** `core/framework/runtime/agent_runtime.py`
|
||||
|
||||
```python
|
||||
# Runtime initializes logger with storage path
|
||||
log_store = RuntimeLogStore(base_path / "runtime_logs")
|
||||
logger = RuntimeLogger(log_store, agent_id)
|
||||
|
||||
# Passes session_id from ExecutionStream
|
||||
logger.start_run(goal_id, session_id=execution_id)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## File Format Details
|
||||
|
||||
### L1: summary.json
|
||||
|
||||
**Written:** Once at end_run()
|
||||
**Format:** Standard JSON
|
||||
|
||||
```json
|
||||
{
|
||||
"run_id": "session_20260206_115718_e22339c5",
|
||||
"goal_id": "deep-research",
|
||||
"status": "degraded",
|
||||
"started_at": "2026-02-06T11:57:18.593081",
|
||||
"ended_at": "2026-02-06T11:58:45.123456",
|
||||
"needs_attention": true,
|
||||
"attention_summary": {
|
||||
"total_attention_flags": 3,
|
||||
"categories": ["missing_outputs", "retry_loops"],
|
||||
"nodes_with_attention": ["intake-collector"]
|
||||
},
|
||||
"total_nodes_executed": 4,
|
||||
"nodes_with_failures": ["intake-collector"],
|
||||
"execution_quality": "degraded",
|
||||
"total_latency_ms": 86530,
|
||||
"total_retries": 5
|
||||
}
|
||||
```
|
||||
|
||||
### L2: details.jsonl
|
||||
|
||||
**Written:** Incrementally (append per node completion)
|
||||
**Format:** JSONL (one JSON object per line)
|
||||
|
||||
```jsonl
|
||||
{"node_id":"intake-collector","exit_status":"escalate","retry_count":5,"verdict_counts":{"RETRY":5,"ESCALATE":1},"total_steps":8,"latency_ms":12500,"needs_attention":true,"attention_reasons":["high_retry_count","missing_outputs"],"tool_error_count":0,"tokens_used":9876}
|
||||
{"node_id":"profile-analyzer","exit_status":"success","retry_count":0,"verdict_counts":{"ACCEPT":1},"total_steps":2,"latency_ms":5432,"needs_attention":false,"attention_reasons":[],"tool_error_count":0,"tokens_used":3456}
|
||||
```
|
||||
|
||||
### L3: tool_logs.jsonl
|
||||
|
||||
**Written:** Incrementally (append per step)
|
||||
**Format:** JSONL (one JSON object per line)
|
||||
|
||||
Each line includes **trace context** when the framework has set it (via the observability module): `trace_id`, `span_id`, `parent_span_id` (optional), and `execution_id`. These align with OpenTelemetry/W3C TraceContext so L3 data can be exported to OTel backends without schema changes.
|
||||
|
||||
```jsonl
|
||||
{"node_id":"intake-collector","step_index":3,"trace_id":"54e80d7b5bd6409dbc3217e5cd16a4fd","span_id":"a1b2c3d4e5f67890","execution_id":"b4c348ec54e80d7b5bd6409dbc3217e50","tool_calls":[...],"verdict":"RETRY",...}
|
||||
```
|
||||
|
||||
**Why JSONL?**
|
||||
- Incremental append during execution (crash-safe)
|
||||
- No need to parse entire file to add one line
|
||||
- Data persisted immediately, not buffered
|
||||
- Easy to stream/process line-by-line
|
||||
|
||||
---
|
||||
|
||||
## Attention Flags System
|
||||
|
||||
### Automatic Detection
|
||||
|
||||
The runtime logger automatically flags issues based on execution metrics:
|
||||
|
||||
| Trigger | Threshold | Attention Reason | Category |
|
||||
|---------|-----------|------------------|----------|
|
||||
| High retries | `retry_count > 3` | `high_retry_count` | Retry Loops |
|
||||
| Escalations | `escalate_count > 2` | `escalation_pattern` | Guard Failures |
|
||||
| High latency | `latency_ms > 60000` | `high_latency` | High Latency |
|
||||
| Token usage | `tokens_used > 100000` | `high_token_usage` | Memory/Context |
|
||||
| Stalled steps | `total_steps > 20` | `excessive_steps` | Stalled Execution |
|
||||
| Tool errors | `tool_error_count > 0` | `tool_failures` | Tool Errors |
|
||||
| Missing outputs | `exit_status != "success"` | `missing_outputs` | Missing Outputs |
|
||||
|
||||
### Attention Categories
|
||||
|
||||
Used for runtime issue categorization:
|
||||
|
||||
1. **Missing Outputs**: Node didn't set required output keys
|
||||
2. **Tool Errors**: Tool calls failed (API errors, timeouts)
|
||||
3. **Retry Loops**: Judge repeatedly rejecting outputs
|
||||
4. **Guard Failures**: Output validation failed
|
||||
5. **Stalled Execution**: EventLoopNode not making progress
|
||||
6. **High Latency**: Slow tool calls or LLM responses
|
||||
7. **Client-Facing Issues**: Premature set_output before user input
|
||||
8. **Edge Routing Errors**: No edges match current state
|
||||
9. **Memory/Context Issues**: Conversation history too long
|
||||
10. **Constraint Violations**: Agent violated goal-level rules
|
||||
|
||||
---
|
||||
|
||||
## Migration Guide
|
||||
|
||||
### Reading Old Logs
|
||||
|
||||
The system automatically handles both old and new formats:
|
||||
|
||||
```python
|
||||
# MCP tools check both locations automatically
|
||||
result = query_runtime_logs("~/.hive/agents/old_agent")
|
||||
# Returns logs from both:
|
||||
# - ~/.hive/agents/old_agent/runtime_logs/runs/*/
|
||||
# - ~/.hive/agents/old_agent/sessions/session_*/logs/
|
||||
```
|
||||
|
||||
### Deprecation Warnings
|
||||
|
||||
When reading from old locations, deprecation warnings are emitted:
|
||||
|
||||
```
|
||||
DeprecationWarning: Reading logs from deprecated location for run_id=20260101T120000_abc12345.
|
||||
New sessions use unified storage at sessions/session_*/logs/
|
||||
```
|
||||
|
||||
### Migration Script (Optional)
|
||||
|
||||
For migrating existing old logs to new format, see:
|
||||
- `EXECUTION_STORAGE_REDESIGN.md` - Migration strategy
|
||||
- Future: `scripts/migrate_to_unified_sessions.py`
|
||||
|
||||
---
|
||||
|
||||
## Performance Characteristics
|
||||
|
||||
### Write Performance
|
||||
|
||||
- **L3 append**: ~1-2ms per step (sync I/O, thread-safe)
|
||||
- **L2 append**: ~1-2ms per node (sync I/O, thread-safe)
|
||||
- **L1 write**: ~5-10ms at end_run (atomic, async)
|
||||
|
||||
**Overhead:** < 5% of total execution time for typical agents
|
||||
|
||||
### Read Performance
|
||||
|
||||
- **L1 summary**: ~1-5ms (single JSON file)
|
||||
- **L2 details**: ~10-50ms (JSONL, depends on node count)
|
||||
- **L3 raw logs**: ~50-500ms (JSONL, depends on step count)
|
||||
|
||||
**Optimization:** Use filters (node_id, step_index) to reduce data read
|
||||
|
||||
### Storage Size
|
||||
|
||||
Typical session with 5 nodes, 20 steps:
|
||||
|
||||
- **L1 (summary.json)**: ~2-5 KB
|
||||
- **L2 (details.jsonl)**: ~5-10 KB (1-2 KB per node)
|
||||
- **L3 (tool_logs.jsonl)**: ~50-200 KB (2-10 KB per step)
|
||||
|
||||
**Total per session:** ~60-215 KB
|
||||
|
||||
**Compression:** Consider archiving old sessions after 90 days
|
||||
|
||||
---
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Issue: Logs not appearing
|
||||
|
||||
**Symptom:** MCP tools return empty results
|
||||
|
||||
**Check:**
|
||||
1. Verify storage path exists: `~/.hive/agents/{agent_name}/`
|
||||
2. Check session directories: `ls ~/.hive/agents/{agent_name}/sessions/`
|
||||
3. Verify logs directory exists: `ls ~/.hive/agents/{agent_name}/sessions/session_*/logs/`
|
||||
4. Check file permissions
|
||||
|
||||
### Issue: Corrupt JSONL files
|
||||
|
||||
**Symptom:** Partial data or JSON decode errors
|
||||
|
||||
**Cause:** Process crash during write (rare, but possible)
|
||||
|
||||
**Recovery:**
|
||||
```python
|
||||
# MCP tools skip corrupt lines automatically
|
||||
query_runtime_log_details(agent_work_dir, run_id)
|
||||
# Logs warning but continues with valid lines
|
||||
```
|
||||
|
||||
### Issue: High disk usage
|
||||
|
||||
**Symptom:** Storage growing too large
|
||||
|
||||
**Solution:**
|
||||
```bash
|
||||
# Archive old sessions
|
||||
cd ~/.hive/agents/{agent_name}/sessions/
|
||||
find . -name "session_2025*" -type d -exec tar -czf archive.tar.gz {} +
|
||||
rm -rf session_2025*
|
||||
|
||||
# Or set up automatic cleanup (future feature)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
**Implementation:**
|
||||
- `core/framework/runtime/runtime_logger.py` - Logger implementation
|
||||
- `core/framework/runtime/runtime_log_store.py` - Storage layer
|
||||
- `core/framework/runtime/runtime_log_schemas.py` - Data schemas
|
||||
- `tools/src/aden_tools/tools/runtime_logs_tool/runtime_logs_tool.py` - MCP query tools
|
||||
|
||||
**Documentation:**
|
||||
- `EXECUTION_STORAGE_REDESIGN.md` - Unified session storage design
|
||||
- `docs/developer-guide.md` - Debugging and troubleshooting workflows
|
||||
|
||||
**Related:**
|
||||
- `core/framework/schemas/session_state.py` - Session state schema
|
||||
- `core/framework/storage/session_store.py` - Session state storage
|
||||
- `core/framework/graph/executor.py` - GraphExecutor integration
|
||||
@@ -9,6 +9,7 @@ import asyncio
|
||||
import logging
|
||||
import time
|
||||
import uuid
|
||||
from collections import OrderedDict
|
||||
from collections.abc import Callable
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
@@ -21,7 +22,7 @@ from framework.runtime.event_bus import EventBus
|
||||
from framework.runtime.execution_stream import EntryPointSpec, ExecutionStream
|
||||
from framework.runtime.outcome_aggregator import OutcomeAggregator
|
||||
from framework.runtime.runtime_log_store import RuntimeLogStore
|
||||
from framework.runtime.shared_state import SharedStateManager
|
||||
from framework.runtime.shared_state import SharedBufferManager
|
||||
from framework.storage.concurrent import ConcurrentStorage
|
||||
from framework.storage.session_store import SessionStore
|
||||
|
||||
@@ -44,6 +45,9 @@ class AgentRuntimeConfig:
|
||||
max_history: int = 1000
|
||||
execution_result_max: int = 1000
|
||||
execution_result_ttl_seconds: float | None = None
|
||||
# Idempotency cache for trigger() deduplication
|
||||
idempotency_ttl_seconds: float = 300.0
|
||||
idempotency_max_keys: int = 10000
|
||||
# Webhook server config (only starts if webhook_routes is non-empty)
|
||||
webhook_host: str = "127.0.0.1"
|
||||
webhook_port: int = 8080
|
||||
@@ -225,7 +229,7 @@ class AgentRuntime:
|
||||
self._session_store = SessionStore(storage_path_obj)
|
||||
|
||||
# Initialize shared components
|
||||
self._state_manager = SharedStateManager()
|
||||
self._state_manager = SharedBufferManager()
|
||||
self._event_bus = event_bus or EventBus(max_history=self._config.max_history)
|
||||
self._outcome_aggregator = OutcomeAggregator(goal, self._event_bus)
|
||||
|
||||
@@ -234,6 +238,12 @@ class AgentRuntime:
|
||||
self._tools = tools or []
|
||||
self._tool_executor = tool_executor
|
||||
self._accounts_prompt = accounts_prompt
|
||||
self._dynamic_memory_provider_factory: Callable[[str], Callable[[], str] | None] | None = None
|
||||
# Colony memory config for reflection-at-handoff (set by session_manager)
|
||||
self._colony_memory_dir: Any = None
|
||||
self._colony_worker_sessions_dir: Any = None
|
||||
self._colony_recall_cache: dict[str, str] | None = None
|
||||
self._colony_reflect_llm: Any = None
|
||||
self._accounts_data = accounts_data
|
||||
self._tool_provider_map = tool_provider_map
|
||||
|
||||
@@ -250,6 +260,10 @@ class AgentRuntime:
|
||||
# Next fire time for each timer entry point (ep_id -> datetime)
|
||||
self._timer_next_fire: dict[str, float] = {}
|
||||
|
||||
# Idempotency cache for trigger() deduplication
|
||||
self._idempotency_keys: OrderedDict[str, str] = OrderedDict()
|
||||
self._idempotency_times: dict[str, float] = {}
|
||||
|
||||
# State
|
||||
self._running = False
|
||||
self._timers_paused = False
|
||||
@@ -352,6 +366,11 @@ class AgentRuntime:
|
||||
skill_dirs=self.skill_dirs,
|
||||
context_warn_ratio=self.context_warn_ratio,
|
||||
batch_init_nudge=self.batch_init_nudge,
|
||||
dynamic_memory_provider_factory=self._dynamic_memory_provider_factory,
|
||||
colony_memory_dir=self._colony_memory_dir,
|
||||
colony_worker_sessions_dir=self._colony_worker_sessions_dir,
|
||||
colony_recall_cache=self._colony_recall_cache,
|
||||
colony_reflect_llm=self._colony_reflect_llm,
|
||||
)
|
||||
await stream.start()
|
||||
self._streams[ep_id] = stream
|
||||
@@ -853,12 +872,29 @@ class AgentRuntime:
|
||||
# Primary graph (also stored in self._streams)
|
||||
return self._streams.get(entry_point_id)
|
||||
|
||||
def _prune_idempotency_keys(self) -> None:
|
||||
"""Prune expired idempotency keys based on TTL and max size."""
|
||||
ttl = self._config.idempotency_ttl_seconds
|
||||
if ttl > 0:
|
||||
cutoff = time.time() - ttl
|
||||
for key, recorded_at in list(self._idempotency_times.items()):
|
||||
if recorded_at < cutoff:
|
||||
self._idempotency_times.pop(key, None)
|
||||
self._idempotency_keys.pop(key, None)
|
||||
|
||||
max_keys = self._config.idempotency_max_keys
|
||||
if max_keys > 0:
|
||||
while len(self._idempotency_keys) > max_keys:
|
||||
old_key, _ = self._idempotency_keys.popitem(last=False)
|
||||
self._idempotency_times.pop(old_key, None)
|
||||
|
||||
async def trigger(
|
||||
self,
|
||||
entry_point_id: str,
|
||||
input_data: dict[str, Any],
|
||||
correlation_id: str | None = None,
|
||||
session_state: dict[str, Any] | None = None,
|
||||
idempotency_key: str | None = None,
|
||||
graph_id: str | None = None,
|
||||
) -> str:
|
||||
"""
|
||||
@@ -871,6 +907,10 @@ class AgentRuntime:
|
||||
input_data: Input data for the execution
|
||||
correlation_id: Optional ID to correlate related executions
|
||||
session_state: Optional session state to resume from (with paused_at, memory)
|
||||
idempotency_key: Optional key for deduplication. If a trigger with
|
||||
the same key was already processed within the TTL window, the
|
||||
cached execution_id is returned instead of starting a new
|
||||
execution. Useful for webhook providers that retry on timeout.
|
||||
graph_id: Graph to trigger on. ``None`` uses the active graph
|
||||
first, then falls back to the primary graph.
|
||||
|
||||
@@ -884,12 +924,32 @@ class AgentRuntime:
|
||||
if not self._running:
|
||||
raise RuntimeError("AgentRuntime is not running")
|
||||
|
||||
# Idempotency check: return cached execution_id for duplicate keys.
|
||||
if idempotency_key is not None:
|
||||
self._prune_idempotency_keys()
|
||||
cached = self._idempotency_keys.get(idempotency_key)
|
||||
if cached is not None:
|
||||
logger.debug(
|
||||
"Idempotent trigger: key '%s' already seen, returning %s",
|
||||
idempotency_key,
|
||||
cached,
|
||||
)
|
||||
return cached
|
||||
|
||||
stream = self._resolve_stream(entry_point_id, graph_id)
|
||||
if stream is None:
|
||||
raise ValueError(f"Entry point '{entry_point_id}' not found")
|
||||
|
||||
run_id = uuid.uuid4().hex[:12]
|
||||
return await stream.execute(input_data, correlation_id, session_state, run_id=run_id)
|
||||
exec_id = await stream.execute(input_data, correlation_id, session_state, run_id=run_id)
|
||||
|
||||
# Cache after execute() so the value is always a real execution_id
|
||||
# that callers can use for tracking.
|
||||
if idempotency_key is not None:
|
||||
self._idempotency_keys[idempotency_key] = exec_id
|
||||
self._idempotency_times[idempotency_key] = time.time()
|
||||
|
||||
return exec_id
|
||||
|
||||
async def trigger_and_wait(
|
||||
self,
|
||||
@@ -897,6 +957,7 @@ class AgentRuntime:
|
||||
input_data: dict[str, Any],
|
||||
timeout: float | None = None,
|
||||
session_state: dict[str, Any] | None = None,
|
||||
idempotency_key: str | None = None,
|
||||
) -> ExecutionResult | None:
|
||||
"""
|
||||
Trigger execution and wait for completion.
|
||||
@@ -906,11 +967,17 @@ class AgentRuntime:
|
||||
input_data: Input data for the execution
|
||||
timeout: Maximum time to wait (seconds)
|
||||
session_state: Optional session state to resume from (with paused_at, memory)
|
||||
idempotency_key: Optional key for deduplication (see trigger() for details).
|
||||
|
||||
Returns:
|
||||
ExecutionResult or None if timeout
|
||||
"""
|
||||
exec_id = await self.trigger(entry_point_id, input_data, session_state=session_state)
|
||||
exec_id = await self.trigger(
|
||||
entry_point_id,
|
||||
input_data,
|
||||
session_state=session_state,
|
||||
idempotency_key=idempotency_key,
|
||||
)
|
||||
stream = self._resolve_stream(entry_point_id)
|
||||
if stream is None:
|
||||
raise ValueError(f"Entry point '{entry_point_id}' not found")
|
||||
@@ -1390,12 +1457,12 @@ class AgentRuntime:
|
||||
``session_state`` dict containing:
|
||||
|
||||
- ``resume_session_id``: reuse the same session directory
|
||||
- ``memory``: only the keys that the async entry node declares
|
||||
- ``data_buffer``: only the keys that the async entry node declares
|
||||
as inputs (e.g. ``rules``, ``max_emails``). Stale outputs
|
||||
from previous runs (``emails``, ``actions_taken``, …) are
|
||||
excluded so each trigger starts fresh.
|
||||
|
||||
The memory is read from the primary session's ``state.json``
|
||||
The data buffer is read from the primary session's ``state.json``
|
||||
which is kept up-to-date by ``GraphExecutor._write_progress()``
|
||||
at every node transition.
|
||||
|
||||
@@ -1413,7 +1480,7 @@ class AgentRuntime:
|
||||
"""
|
||||
import json as _json
|
||||
|
||||
# Determine which memory keys the async entry node needs.
|
||||
# Determine which data buffer keys the async entry node needs.
|
||||
allowed_keys: set[str] | None = None
|
||||
# Look up the entry node from the correct graph
|
||||
src_graph_id = source_graph_id or self._graph_id
|
||||
@@ -1449,19 +1516,19 @@ class AgentRuntime:
|
||||
try:
|
||||
if state_path.exists():
|
||||
data = _json.loads(state_path.read_text(encoding="utf-8"))
|
||||
full_memory = data.get("memory", {})
|
||||
if not full_memory:
|
||||
full_buffer = data.get("data_buffer", data.get("memory", {}))
|
||||
if not full_buffer:
|
||||
continue
|
||||
# Filter to only input keys so stale outputs
|
||||
# from previous triggers don't leak through.
|
||||
if allowed_keys is not None:
|
||||
memory = {k: v for k, v in full_memory.items() if k in allowed_keys}
|
||||
buffer_data = {k: v for k, v in full_buffer.items() if k in allowed_keys}
|
||||
else:
|
||||
memory = full_memory
|
||||
if memory:
|
||||
buffer_data = full_buffer
|
||||
if buffer_data:
|
||||
return {
|
||||
"resume_session_id": exec_id,
|
||||
"memory": memory,
|
||||
"data_buffer": buffer_data,
|
||||
}
|
||||
except Exception:
|
||||
logger.debug(
|
||||
@@ -1610,7 +1677,7 @@ class AgentRuntime:
|
||||
for node_id, node in executor.node_registry.items():
|
||||
if getattr(node, "_awaiting_input", False):
|
||||
# Skip escalation receivers — those are handled
|
||||
# by the queen via inject_worker_message(), not
|
||||
# by the queen via inject_message(), not
|
||||
# by the user directly.
|
||||
if ":escalation:" in node_id:
|
||||
continue
|
||||
@@ -1725,7 +1792,7 @@ class AgentRuntime:
|
||||
# === PROPERTIES ===
|
||||
|
||||
@property
|
||||
def state_manager(self) -> SharedStateManager:
|
||||
def state_manager(self) -> SharedBufferManager:
|
||||
"""Access the shared state manager."""
|
||||
return self._state_manager
|
||||
|
||||
|
||||
@@ -1,39 +0,0 @@
|
||||
"""EscalationTicket — structured schema for worker health escalations."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import UTC, datetime
|
||||
from typing import Literal
|
||||
from uuid import uuid4
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class EscalationTicket(BaseModel):
|
||||
"""Structured escalation report for worker health monitoring.
|
||||
|
||||
All fields must be filled before calling emit_escalation_ticket.
|
||||
Pydantic validation rejects partial tickets.
|
||||
"""
|
||||
|
||||
ticket_id: str = Field(default_factory=lambda: str(uuid4()))
|
||||
created_at: str = Field(default_factory=lambda: datetime.now(UTC).isoformat())
|
||||
|
||||
# Worker identification
|
||||
worker_agent_id: str
|
||||
worker_session_id: str
|
||||
worker_node_id: str
|
||||
worker_graph_id: str
|
||||
|
||||
# Problem characterization
|
||||
severity: Literal["low", "medium", "high", "critical"]
|
||||
cause: str # Human-readable: "Node has produced 18 RETRY verdicts..."
|
||||
judge_reasoning: str # Judge's own deliberation chain
|
||||
suggested_action: str # "Restart node", "Human review", "Kill session", etc.
|
||||
|
||||
# Evidence
|
||||
recent_verdicts: list[str] # e.g. ["RETRY", "RETRY", "CONTINUE", "RETRY"]
|
||||
total_steps_checked: int # How many steps the judge saw
|
||||
steps_since_last_accept: int # Steps with no ACCEPT verdict
|
||||
stall_minutes: float | None # Wall-clock minutes since last new log step (None if active)
|
||||
evidence_snippet: str # Brief excerpt from recent LLM output or error
|
||||
@@ -94,12 +94,12 @@ class EventType(StrEnum):
|
||||
TOOL_CALL_STARTED = "tool_call_started"
|
||||
TOOL_CALL_COMPLETED = "tool_call_completed"
|
||||
|
||||
# Client I/O (client_facing=True nodes only)
|
||||
# Queen/user interaction events
|
||||
CLIENT_OUTPUT_DELTA = "client_output_delta"
|
||||
CLIENT_INPUT_REQUESTED = "client_input_requested"
|
||||
CLIENT_INPUT_RECEIVED = "client_input_received"
|
||||
|
||||
# Internal node observability (client_facing=False nodes)
|
||||
# Internal node observability
|
||||
NODE_INTERNAL_OUTPUT = "node_internal_output"
|
||||
NODE_INPUT_BLOCKED = "node_input_blocked"
|
||||
NODE_STALLED = "node_stalled"
|
||||
@@ -115,6 +115,10 @@ class EventType(StrEnum):
|
||||
NODE_RETRY = "node_retry"
|
||||
EDGE_TRAVERSED = "edge_traversed"
|
||||
|
||||
# Worker agent lifecycle (event-driven graph execution)
|
||||
WORKER_COMPLETED = "worker_completed"
|
||||
WORKER_FAILED = "worker_failed"
|
||||
|
||||
# Context management
|
||||
CONTEXT_COMPACTED = "context_compacted"
|
||||
CONTEXT_USAGE_UPDATED = "context_usage_updated"
|
||||
@@ -128,15 +132,11 @@ class EventType(StrEnum):
|
||||
# Escalation (agent requests handoff to queen)
|
||||
ESCALATION_REQUESTED = "escalation_requested"
|
||||
|
||||
# Worker health monitoring
|
||||
WORKER_ESCALATION_TICKET = "worker_escalation_ticket"
|
||||
QUEEN_INTERVENTION_REQUESTED = "queen_intervention_requested"
|
||||
|
||||
# Execution resurrection (auto-restart on non-fatal failure)
|
||||
EXECUTION_RESURRECTED = "execution_resurrected"
|
||||
|
||||
# Worker lifecycle (session manager → frontend)
|
||||
WORKER_LOADED = "worker_loaded"
|
||||
# Graph lifecycle (session manager → frontend)
|
||||
WORKER_GRAPH_LOADED = "worker_graph_loaded"
|
||||
CREDENTIALS_REQUIRED = "credentials_required"
|
||||
|
||||
# Draft graph (planning phase — lightweight graph preview)
|
||||
@@ -879,7 +879,7 @@ class EventBus:
|
||||
iteration: int | None = None,
|
||||
inner_turn: int = 0,
|
||||
) -> None:
|
||||
"""Emit client output delta event (client_facing=True nodes)."""
|
||||
"""Emit user-facing output delta for interactive queen turns."""
|
||||
data: dict = {"content": content, "snapshot": snapshot, "inner_turn": inner_turn}
|
||||
if iteration is not None:
|
||||
data["iteration"] = iteration
|
||||
@@ -902,7 +902,7 @@ class EventBus:
|
||||
options: list[str] | None = None,
|
||||
questions: list[dict] | None = None,
|
||||
) -> None:
|
||||
"""Emit client input requested event (client_facing=True nodes).
|
||||
"""Emit a user-input request for interactive queen turns.
|
||||
|
||||
Args:
|
||||
options: Optional predefined choices for the user (1-3 items).
|
||||
@@ -936,7 +936,7 @@ class EventBus:
|
||||
content: str,
|
||||
execution_id: str | None = None,
|
||||
) -> None:
|
||||
"""Emit node internal output event (client_facing=False nodes)."""
|
||||
"""Emit node internal output for non-user-facing execution."""
|
||||
await self.publish(
|
||||
AgentEvent(
|
||||
type=EventType.NODE_INTERNAL_OUTPUT,
|
||||
@@ -1094,6 +1094,54 @@ class EventBus:
|
||||
)
|
||||
)
|
||||
|
||||
async def emit_worker_completed(
|
||||
self,
|
||||
stream_id: str,
|
||||
node_id: str,
|
||||
worker_id: str,
|
||||
success: bool,
|
||||
output: dict[str, Any],
|
||||
activations: list[dict[str, Any]] | None = None,
|
||||
execution_id: str | None = None,
|
||||
**extra_data: Any,
|
||||
) -> None:
|
||||
"""Emit worker completed event with outgoing activations."""
|
||||
data: dict[str, Any] = {
|
||||
"worker_id": worker_id,
|
||||
"success": success,
|
||||
"output": output,
|
||||
"activations": activations or [],
|
||||
**extra_data,
|
||||
}
|
||||
await self.publish(
|
||||
AgentEvent(
|
||||
type=EventType.WORKER_COMPLETED,
|
||||
stream_id=stream_id,
|
||||
node_id=node_id,
|
||||
execution_id=execution_id,
|
||||
data=data,
|
||||
)
|
||||
)
|
||||
|
||||
async def emit_worker_failed(
|
||||
self,
|
||||
stream_id: str,
|
||||
node_id: str,
|
||||
worker_id: str,
|
||||
error: str,
|
||||
execution_id: str | None = None,
|
||||
) -> None:
|
||||
"""Emit worker failed event."""
|
||||
await self.publish(
|
||||
AgentEvent(
|
||||
type=EventType.WORKER_FAILED,
|
||||
stream_id=stream_id,
|
||||
node_id=node_id,
|
||||
execution_id=execution_id,
|
||||
data={"worker_id": worker_id, "error": error},
|
||||
)
|
||||
)
|
||||
|
||||
async def emit_execution_paused(
|
||||
self,
|
||||
stream_id: str,
|
||||
@@ -1172,52 +1220,6 @@ class EventBus:
|
||||
)
|
||||
)
|
||||
|
||||
async def emit_worker_escalation_ticket(
|
||||
self,
|
||||
stream_id: str,
|
||||
node_id: str,
|
||||
ticket: dict,
|
||||
execution_id: str | None = None,
|
||||
) -> None:
|
||||
"""Emitted when worker shows a degradation pattern."""
|
||||
await self.publish(
|
||||
AgentEvent(
|
||||
type=EventType.WORKER_ESCALATION_TICKET,
|
||||
stream_id=stream_id,
|
||||
node_id=node_id,
|
||||
execution_id=execution_id,
|
||||
data={"ticket": ticket},
|
||||
)
|
||||
)
|
||||
|
||||
async def emit_queen_intervention_requested(
|
||||
self,
|
||||
stream_id: str,
|
||||
node_id: str,
|
||||
ticket_id: str,
|
||||
analysis: str,
|
||||
severity: str,
|
||||
queen_graph_id: str,
|
||||
queen_stream_id: str,
|
||||
execution_id: str | None = None,
|
||||
) -> None:
|
||||
"""Emitted by queen when she decides the operator should be involved."""
|
||||
await self.publish(
|
||||
AgentEvent(
|
||||
type=EventType.QUEEN_INTERVENTION_REQUESTED,
|
||||
stream_id=stream_id,
|
||||
node_id=node_id,
|
||||
execution_id=execution_id,
|
||||
data={
|
||||
"ticket_id": ticket_id,
|
||||
"analysis": analysis,
|
||||
"severity": severity,
|
||||
"queen_graph_id": queen_graph_id,
|
||||
"queen_stream_id": queen_stream_id,
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
async def emit_subagent_report(
|
||||
self,
|
||||
stream_id: str,
|
||||
|
||||
@@ -21,7 +21,7 @@ from typing import TYPE_CHECKING, Any
|
||||
from framework.graph.checkpoint_config import CheckpointConfig
|
||||
from framework.graph.executor import ExecutionResult, GraphExecutor
|
||||
from framework.runtime.event_bus import EventBus
|
||||
from framework.runtime.shared_state import IsolationLevel, SharedStateManager
|
||||
from framework.runtime.shared_state import IsolationLevel, SharedBufferManager
|
||||
from framework.runtime.stream_runtime import StreamRuntime, StreamRuntimeAdapter
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@@ -170,7 +170,7 @@ class ExecutionStream:
|
||||
entry_spec: EntryPointSpec,
|
||||
graph: "GraphSpec",
|
||||
goal: "Goal",
|
||||
state_manager: SharedStateManager,
|
||||
state_manager: SharedBufferManager,
|
||||
storage: "ConcurrentStorage",
|
||||
outcome_aggregator: "OutcomeAggregator",
|
||||
event_bus: "EventBus | None" = None,
|
||||
@@ -191,6 +191,11 @@ class ExecutionStream:
|
||||
skill_dirs: list[str] | None = None,
|
||||
context_warn_ratio: float | None = None,
|
||||
batch_init_nudge: str | None = None,
|
||||
dynamic_memory_provider_factory: Callable[[str], Callable[[], str] | None] | None = None,
|
||||
colony_memory_dir: Any = None,
|
||||
colony_worker_sessions_dir: Any = None,
|
||||
colony_recall_cache: dict[str, str] | None = None,
|
||||
colony_reflect_llm: Any = None,
|
||||
):
|
||||
"""
|
||||
Initialize execution stream.
|
||||
@@ -245,6 +250,11 @@ class ExecutionStream:
|
||||
self._skill_dirs: list[str] = skill_dirs or []
|
||||
self._context_warn_ratio: float | None = context_warn_ratio
|
||||
self._batch_init_nudge: str | None = batch_init_nudge
|
||||
self._dynamic_memory_provider_factory = dynamic_memory_provider_factory
|
||||
self._colony_memory_dir = colony_memory_dir
|
||||
self._colony_worker_sessions_dir = colony_worker_sessions_dir
|
||||
self._colony_recall_cache = colony_recall_cache
|
||||
self._colony_reflect_llm = colony_reflect_llm
|
||||
|
||||
_es_logger = logging.getLogger(__name__)
|
||||
if protocols_prompt:
|
||||
@@ -357,7 +367,7 @@ class ExecutionStream:
|
||||
|
||||
Each entry is ``{"node_id": ..., "execution_id": ...}``.
|
||||
The currently executing node is placed first so that
|
||||
``inject_worker_message`` targets the active node, not a stale one.
|
||||
``inject_message`` targets the active node, not a stale one.
|
||||
"""
|
||||
injectable: list[dict[str, str]] = []
|
||||
current_first: list[dict[str, str]] = []
|
||||
@@ -550,6 +560,14 @@ class ExecutionStream:
|
||||
correlation_id = execution_id
|
||||
|
||||
# Create execution context
|
||||
effective_run_id = None
|
||||
if session_state:
|
||||
existing_run_id = session_state.get("run_id")
|
||||
if isinstance(existing_run_id, str) and existing_run_id:
|
||||
effective_run_id = existing_run_id
|
||||
if effective_run_id is None:
|
||||
effective_run_id = run_id
|
||||
|
||||
ctx = ExecutionContext(
|
||||
id=execution_id,
|
||||
correlation_id=correlation_id,
|
||||
@@ -558,7 +576,7 @@ class ExecutionStream:
|
||||
input_data=input_data,
|
||||
isolation_level=self.entry_spec.get_isolation_level(),
|
||||
session_state=session_state,
|
||||
run_id=run_id,
|
||||
run_id=effective_run_id,
|
||||
)
|
||||
|
||||
async with self._lock:
|
||||
@@ -639,7 +657,7 @@ class ExecutionStream:
|
||||
self._write_run_event(execution_id, ctx.run_id, "run_started")
|
||||
|
||||
# Create execution-scoped memory
|
||||
self._state_manager.create_memory(
|
||||
self._state_manager.create_buffer(
|
||||
execution_id=execution_id,
|
||||
stream_id=self.stream_id,
|
||||
isolation=ctx.isolation_level,
|
||||
@@ -700,6 +718,7 @@ class ExecutionStream:
|
||||
event_bus=self._scoped_event_bus,
|
||||
stream_id=self.stream_id,
|
||||
execution_id=execution_id,
|
||||
run_id=ctx.run_id or "",
|
||||
storage_path=exec_storage,
|
||||
runtime_logger=runtime_logger,
|
||||
loop_config=self.graph.loop_config,
|
||||
@@ -711,6 +730,15 @@ class ExecutionStream:
|
||||
skill_dirs=self._skill_dirs,
|
||||
context_warn_ratio=self._context_warn_ratio,
|
||||
batch_init_nudge=self._batch_init_nudge,
|
||||
dynamic_memory_provider=(
|
||||
self._dynamic_memory_provider_factory(execution_id)
|
||||
if self._dynamic_memory_provider_factory is not None
|
||||
else None
|
||||
),
|
||||
colony_memory_dir=self._colony_memory_dir,
|
||||
colony_worker_sessions_dir=self._colony_worker_sessions_dir,
|
||||
colony_recall_cache=self._colony_recall_cache,
|
||||
colony_reflect_llm=self._colony_reflect_llm,
|
||||
)
|
||||
# Track executor so inject_input() can reach EventLoopNode instances
|
||||
self._active_executors[execution_id] = executor
|
||||
@@ -1044,6 +1072,7 @@ class ExecutionStream:
|
||||
agent_id=self.graph.id,
|
||||
entry_point=self.entry_spec.id,
|
||||
)
|
||||
state.current_run_id = ctx.run_id
|
||||
else:
|
||||
# Create initial state — when resuming, preserve the previous
|
||||
# execution's progress so crashes don't lose track of state.
|
||||
@@ -1074,8 +1103,9 @@ class ExecutionStream:
|
||||
updated_at=now,
|
||||
),
|
||||
progress=progress,
|
||||
memory=ss.get("memory", {}),
|
||||
data_buffer=ss.get("data_buffer", ss.get("memory", {})),
|
||||
input_data=ctx.input_data,
|
||||
current_run_id=ctx.run_id,
|
||||
)
|
||||
|
||||
# Handle error case
|
||||
@@ -1198,9 +1228,22 @@ class ExecutionStream:
|
||||
task.cancel()
|
||||
# Wait briefly for the task to finish. Don't block indefinitely —
|
||||
# the task may be stuck in a long LLM API call that doesn't
|
||||
# respond to cancellation quickly. The cancellation is already
|
||||
# requested; the task will clean up in the background.
|
||||
# respond to cancellation quickly.
|
||||
done, _ = await asyncio.wait({task}, timeout=5.0)
|
||||
if not done:
|
||||
# Task didn't finish within timeout — clean up bookkeeping now
|
||||
# so the session doesn't think it still has running executions.
|
||||
# The task will continue winding down in the background and its
|
||||
# finally block will harmlessly pop already-removed keys.
|
||||
logger.warning(
|
||||
"Execution %s did not finish within cancel timeout; "
|
||||
"force-cleaning bookkeeping",
|
||||
execution_id,
|
||||
)
|
||||
async with self._lock:
|
||||
self._active_executions.pop(execution_id, None)
|
||||
self._execution_tasks.pop(execution_id, None)
|
||||
self._active_executors.pop(execution_id, None)
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
"""
|
||||
Shared State Manager - Manages state across concurrent executions.
|
||||
Shared Buffer Manager - Manages state across concurrent executions.
|
||||
|
||||
Provides different isolation levels:
|
||||
- ISOLATED: Each execution has its own memory copy
|
||||
- SHARED: All executions read/write same memory (eventual consistency)
|
||||
- SYNCHRONIZED: Shared memory with write locks (strong consistency)
|
||||
- ISOLATED: Each execution has its own state copy
|
||||
- SHARED: All executions read/write same state (eventual consistency)
|
||||
- SYNCHRONIZED: Shared state with write locks (strong consistency)
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
@@ -46,7 +46,7 @@ class StateChange:
|
||||
timestamp: float = field(default_factory=time.time)
|
||||
|
||||
|
||||
class SharedStateManager:
|
||||
class SharedBufferManager:
|
||||
"""
|
||||
Manages shared state across concurrent executions.
|
||||
|
||||
@@ -61,18 +61,18 @@ class SharedStateManager:
|
||||
- SYNCHRONIZED: Like SHARED but with write locks
|
||||
|
||||
Example:
|
||||
manager = SharedStateManager()
|
||||
manager = SharedBufferManager()
|
||||
|
||||
# Create memory for an execution
|
||||
memory = manager.create_memory(
|
||||
# Create buffer for an execution
|
||||
buf = manager.create_buffer(
|
||||
execution_id="exec_123",
|
||||
stream_id="webhook",
|
||||
isolation=IsolationLevel.SHARED,
|
||||
)
|
||||
|
||||
# Read/write through the memory
|
||||
await memory.write("customer_id", "cust_456", scope=StateScope.STREAM)
|
||||
value = await memory.read("customer_id")
|
||||
# Read/write through the buffer
|
||||
await buf.write("customer_id", "cust_456", scope=StateScope.STREAM)
|
||||
value = await buf.read("customer_id")
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
@@ -93,14 +93,14 @@ class SharedStateManager:
|
||||
# Version tracking
|
||||
self._version = 0
|
||||
|
||||
def create_memory(
|
||||
def create_buffer(
|
||||
self,
|
||||
execution_id: str,
|
||||
stream_id: str,
|
||||
isolation: IsolationLevel,
|
||||
) -> "StreamMemory":
|
||||
) -> "StreamBuffer":
|
||||
"""
|
||||
Create a memory instance for an execution.
|
||||
Create a buffer instance for an execution.
|
||||
|
||||
Args:
|
||||
execution_id: Unique execution identifier
|
||||
@@ -108,7 +108,7 @@ class SharedStateManager:
|
||||
isolation: Isolation level for this execution
|
||||
|
||||
Returns:
|
||||
StreamMemory instance for reading/writing state
|
||||
StreamBuffer instance for reading/writing state
|
||||
"""
|
||||
# Initialize execution state
|
||||
if execution_id not in self._execution_state:
|
||||
@@ -119,7 +119,7 @@ class SharedStateManager:
|
||||
self._stream_state[stream_id] = {}
|
||||
self._stream_locks[stream_id] = asyncio.Lock()
|
||||
|
||||
return StreamMemory(
|
||||
return StreamBuffer(
|
||||
manager=self,
|
||||
execution_id=execution_id,
|
||||
stream_id=stream_id,
|
||||
@@ -343,17 +343,17 @@ class SharedStateManager:
|
||||
return self._change_history[-limit:]
|
||||
|
||||
|
||||
class StreamMemory:
|
||||
class StreamBuffer:
|
||||
"""
|
||||
Memory interface for a single execution.
|
||||
Buffer interface for a single execution.
|
||||
|
||||
Provides scoped access to shared state with proper isolation.
|
||||
Compatible with the existing SharedMemory interface where possible.
|
||||
Compatible with the existing DataBuffer interface where possible.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
manager: SharedStateManager,
|
||||
manager: SharedBufferManager,
|
||||
execution_id: str,
|
||||
stream_id: str,
|
||||
isolation: IsolationLevel,
|
||||
@@ -371,13 +371,13 @@ class StreamMemory:
|
||||
self,
|
||||
read_keys: list[str],
|
||||
write_keys: list[str],
|
||||
) -> "StreamMemory":
|
||||
) -> "StreamBuffer":
|
||||
"""
|
||||
Create a scoped view with read/write permissions.
|
||||
|
||||
Compatible with existing SharedMemory.with_permissions().
|
||||
Compatible with existing DataBuffer.with_permissions().
|
||||
"""
|
||||
scoped = StreamMemory(
|
||||
scoped = StreamBuffer(
|
||||
manager=self._manager,
|
||||
execution_id=self._execution_id,
|
||||
stream_id=self._stream_id,
|
||||
@@ -434,7 +434,7 @@ class StreamMemory:
|
||||
|
||||
return all_state
|
||||
|
||||
# === SYNC API (for backward compatibility with SharedMemory) ===
|
||||
# === SYNC API (for backward compatibility with DataBuffer) ===
|
||||
|
||||
def read_sync(self, key: str) -> Any:
|
||||
"""
|
||||
|
||||
@@ -5,7 +5,7 @@ Tests:
|
||||
1. AgentRuntime creation and lifecycle
|
||||
2. Entry point registration
|
||||
3. Concurrent executions across streams
|
||||
4. SharedStateManager isolation levels
|
||||
4. SharedBufferManager isolation levels
|
||||
5. OutcomeAggregator goal evaluation
|
||||
6. EventBus pub/sub
|
||||
"""
|
||||
@@ -24,7 +24,8 @@ from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
|
||||
from framework.runtime.event_bus import AgentEvent, EventBus, EventType
|
||||
from framework.runtime.execution_stream import EntryPointSpec
|
||||
from framework.runtime.outcome_aggregator import OutcomeAggregator
|
||||
from framework.runtime.shared_state import IsolationLevel, SharedStateManager
|
||||
from framework.runtime.shared_state import IsolationLevel, SharedBufferManager
|
||||
from framework.schemas.session_state import SessionState, SessionTimestamps
|
||||
|
||||
# === Test Fixtures ===
|
||||
|
||||
@@ -121,45 +122,45 @@ def temp_storage():
|
||||
yield Path(tmpdir)
|
||||
|
||||
|
||||
# === SharedStateManager Tests ===
|
||||
# === SharedBufferManager Tests ===
|
||||
|
||||
|
||||
class TestSharedStateManager:
|
||||
"""Tests for SharedStateManager."""
|
||||
class TestSharedBufferManager:
|
||||
"""Tests for SharedBufferManager."""
|
||||
|
||||
def test_create_memory(self):
|
||||
"""Test creating execution-scoped memory."""
|
||||
manager = SharedStateManager()
|
||||
memory = manager.create_memory(
|
||||
def test_create_buffer(self):
|
||||
"""Test creating execution-scoped buffer."""
|
||||
manager = SharedBufferManager()
|
||||
buffer = manager.create_buffer(
|
||||
execution_id="exec-1",
|
||||
stream_id="webhook",
|
||||
isolation=IsolationLevel.SHARED,
|
||||
)
|
||||
assert memory is not None
|
||||
assert memory._execution_id == "exec-1"
|
||||
assert memory._stream_id == "webhook"
|
||||
assert buffer is not None
|
||||
assert buffer._execution_id == "exec-1"
|
||||
assert buffer._stream_id == "webhook"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_isolated_state(self):
|
||||
"""Test isolated state doesn't leak between executions."""
|
||||
manager = SharedStateManager()
|
||||
manager = SharedBufferManager()
|
||||
|
||||
mem1 = manager.create_memory("exec-1", "stream-1", IsolationLevel.ISOLATED)
|
||||
mem2 = manager.create_memory("exec-2", "stream-1", IsolationLevel.ISOLATED)
|
||||
buf1 = manager.create_buffer("exec-1", "stream-1", IsolationLevel.ISOLATED)
|
||||
buf2 = manager.create_buffer("exec-2", "stream-1", IsolationLevel.ISOLATED)
|
||||
|
||||
await mem1.write("key", "value1")
|
||||
await mem2.write("key", "value2")
|
||||
await buf1.write("key", "value1")
|
||||
await buf2.write("key", "value2")
|
||||
|
||||
assert await mem1.read("key") == "value1"
|
||||
assert await mem2.read("key") == "value2"
|
||||
assert await buf1.read("key") == "value1"
|
||||
assert await buf2.read("key") == "value2"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_shared_state(self):
|
||||
"""Test shared state is visible across executions."""
|
||||
manager = SharedStateManager()
|
||||
manager = SharedBufferManager()
|
||||
|
||||
manager.create_memory("exec-1", "stream-1", IsolationLevel.SHARED)
|
||||
manager.create_memory("exec-2", "stream-1", IsolationLevel.SHARED)
|
||||
manager.create_buffer("exec-1", "stream-1", IsolationLevel.SHARED)
|
||||
manager.create_buffer("exec-2", "stream-1", IsolationLevel.SHARED)
|
||||
|
||||
# Write to global scope
|
||||
await manager.write(
|
||||
@@ -180,8 +181,8 @@ class TestSharedStateManager:
|
||||
|
||||
def test_cleanup_execution(self):
|
||||
"""Test execution cleanup removes state."""
|
||||
manager = SharedStateManager()
|
||||
manager.create_memory("exec-1", "stream-1", IsolationLevel.ISOLATED)
|
||||
manager = SharedBufferManager()
|
||||
manager.create_buffer("exec-1", "stream-1", IsolationLevel.ISOLATED)
|
||||
|
||||
assert "exec-1" in manager._execution_state
|
||||
|
||||
@@ -190,6 +191,26 @@ class TestSharedStateManager:
|
||||
assert "exec-1" not in manager._execution_state
|
||||
|
||||
|
||||
class TestSessionState:
|
||||
"""Tests for session state data-buffer compatibility."""
|
||||
|
||||
def test_legacy_memory_alias_populates_data_buffer(self):
|
||||
"""Legacy `memory` payloads should still hydrate the session buffer."""
|
||||
state = SessionState(
|
||||
session_id="session-1",
|
||||
goal_id="goal-1",
|
||||
timestamps=SessionTimestamps(
|
||||
started_at="2026-01-01T00:00:00",
|
||||
updated_at="2026-01-01T00:00:00",
|
||||
),
|
||||
memory={"rules": "keep starred mail"},
|
||||
)
|
||||
|
||||
assert state.data_buffer == {"rules": "keep starred mail"}
|
||||
assert state.memory == {"rules": "keep starred mail"}
|
||||
assert state.to_session_state_dict()["data_buffer"] == {"rules": "keep starred mail"}
|
||||
|
||||
|
||||
# === EventBus Tests ===
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,268 @@
|
||||
"""Tests for webhook idempotency key support in AgentRuntime.trigger()."""
|
||||
|
||||
import asyncio
|
||||
import time
|
||||
from collections import OrderedDict
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from framework.runtime.agent_runtime import AgentRuntime, AgentRuntimeConfig
|
||||
|
||||
|
||||
def _make_runtime(ttl=300.0, max_keys=10000):
|
||||
"""Create a minimal AgentRuntime with idempotency cache attributes.
|
||||
|
||||
Uses ``object.__new__`` to skip ``__init__`` and its heavy dependencies
|
||||
(storage, LLM, skills) — we only need the cache and config for these tests.
|
||||
"""
|
||||
runtime = object.__new__(AgentRuntime)
|
||||
runtime._config = AgentRuntimeConfig(idempotency_ttl_seconds=ttl, idempotency_max_keys=max_keys)
|
||||
runtime._running = True
|
||||
runtime._lock = asyncio.Lock()
|
||||
runtime._idempotency_keys = OrderedDict()
|
||||
runtime._idempotency_times = {}
|
||||
runtime._graphs = {}
|
||||
runtime._active_graph_id = "primary"
|
||||
runtime._graph_id = "primary"
|
||||
runtime._streams = {}
|
||||
runtime._entry_points = {}
|
||||
return runtime
|
||||
|
||||
|
||||
def _make_runtime_with_stream(ttl=300.0, max_keys=10000):
|
||||
"""Create a mock runtime whose stream.execute() returns unique IDs."""
|
||||
runtime = _make_runtime(ttl=ttl, max_keys=max_keys)
|
||||
|
||||
call_count = 0
|
||||
|
||||
async def _fake_execute(*args, **kwargs):
|
||||
nonlocal call_count
|
||||
call_count += 1
|
||||
return f"session-{call_count:04d}"
|
||||
|
||||
stream = MagicMock()
|
||||
stream.execute = _fake_execute
|
||||
runtime._streams = {"webhook": stream}
|
||||
runtime._entry_points = {"webhook": MagicMock()}
|
||||
return runtime
|
||||
|
||||
|
||||
class TestIdempotencyConfig:
|
||||
"""Verify idempotency configuration defaults."""
|
||||
|
||||
def test_default_ttl(self):
|
||||
config = AgentRuntimeConfig()
|
||||
assert config.idempotency_ttl_seconds == 300.0
|
||||
|
||||
def test_default_max_keys(self):
|
||||
config = AgentRuntimeConfig()
|
||||
assert config.idempotency_max_keys == 10000
|
||||
|
||||
def test_custom_config(self):
|
||||
config = AgentRuntimeConfig(idempotency_ttl_seconds=60.0, idempotency_max_keys=100)
|
||||
assert config.idempotency_ttl_seconds == 60.0
|
||||
assert config.idempotency_max_keys == 100
|
||||
|
||||
|
||||
class TestIdempotencyCache:
|
||||
"""Test the idempotency cache and pruning logic directly."""
|
||||
|
||||
def test_cache_stores_and_retrieves_key(self):
|
||||
runtime = _make_runtime()
|
||||
runtime._idempotency_keys["stripe-evt-123"] = "exec-001"
|
||||
runtime._idempotency_times["stripe-evt-123"] = time.time()
|
||||
|
||||
assert runtime._idempotency_keys.get("stripe-evt-123") == "exec-001"
|
||||
|
||||
def test_cache_returns_none_for_unknown_key(self):
|
||||
runtime = _make_runtime()
|
||||
assert runtime._idempotency_keys.get("unknown") is None
|
||||
|
||||
def test_prune_removes_expired_keys(self):
|
||||
runtime = _make_runtime(ttl=0.1)
|
||||
|
||||
runtime._idempotency_keys["old-key"] = "exec-old"
|
||||
runtime._idempotency_times["old-key"] = time.time() - 1.0 # expired
|
||||
|
||||
runtime._prune_idempotency_keys()
|
||||
|
||||
assert "old-key" not in runtime._idempotency_keys
|
||||
assert "old-key" not in runtime._idempotency_times
|
||||
|
||||
def test_prune_keeps_fresh_keys(self):
|
||||
runtime = _make_runtime(ttl=300.0)
|
||||
|
||||
runtime._idempotency_keys["fresh-key"] = "exec-fresh"
|
||||
runtime._idempotency_times["fresh-key"] = time.time()
|
||||
|
||||
runtime._prune_idempotency_keys()
|
||||
|
||||
assert "fresh-key" in runtime._idempotency_keys
|
||||
|
||||
def test_prune_respects_max_keys(self):
|
||||
runtime = _make_runtime(max_keys=2)
|
||||
|
||||
for i in range(3):
|
||||
key = f"key-{i}"
|
||||
runtime._idempotency_keys[key] = f"exec-{i}"
|
||||
runtime._idempotency_times[key] = time.time()
|
||||
|
||||
runtime._prune_idempotency_keys()
|
||||
|
||||
assert len(runtime._idempotency_keys) == 2
|
||||
# Oldest (key-0) should be evicted
|
||||
assert "key-0" not in runtime._idempotency_keys
|
||||
assert "key-1" in runtime._idempotency_keys
|
||||
assert "key-2" in runtime._idempotency_keys
|
||||
|
||||
def test_prune_evicts_fifo(self):
|
||||
runtime = _make_runtime(max_keys=1)
|
||||
|
||||
runtime._idempotency_keys["first"] = "exec-1"
|
||||
runtime._idempotency_times["first"] = time.time()
|
||||
runtime._idempotency_keys["second"] = "exec-2"
|
||||
runtime._idempotency_times["second"] = time.time()
|
||||
|
||||
runtime._prune_idempotency_keys()
|
||||
|
||||
assert len(runtime._idempotency_keys) == 1
|
||||
assert "second" in runtime._idempotency_keys
|
||||
assert "first" not in runtime._idempotency_keys
|
||||
|
||||
def test_mixed_expired_and_max_size(self):
|
||||
runtime = _make_runtime(ttl=0.1, max_keys=2)
|
||||
|
||||
# Add expired key
|
||||
runtime._idempotency_keys["expired"] = "exec-e"
|
||||
runtime._idempotency_times["expired"] = time.time() - 1.0
|
||||
|
||||
# Add fresh keys
|
||||
runtime._idempotency_keys["fresh-1"] = "exec-f1"
|
||||
runtime._idempotency_times["fresh-1"] = time.time()
|
||||
runtime._idempotency_keys["fresh-2"] = "exec-f2"
|
||||
runtime._idempotency_times["fresh-2"] = time.time()
|
||||
|
||||
runtime._prune_idempotency_keys()
|
||||
|
||||
assert "expired" not in runtime._idempotency_keys
|
||||
assert "fresh-1" in runtime._idempotency_keys
|
||||
assert "fresh-2" in runtime._idempotency_keys
|
||||
|
||||
|
||||
class TestTriggerIdempotency:
|
||||
"""Tests for trigger() idempotency deduplication."""
|
||||
|
||||
def test_trigger_accepts_idempotency_key(self):
|
||||
"""trigger() accepts idempotency_key as a keyword argument."""
|
||||
import inspect
|
||||
|
||||
sig = inspect.signature(AgentRuntime.trigger)
|
||||
assert "idempotency_key" in sig.parameters
|
||||
|
||||
def test_idempotency_key_defaults_to_none(self):
|
||||
"""idempotency_key defaults to None (backward compatible)."""
|
||||
import inspect
|
||||
|
||||
sig = inspect.signature(AgentRuntime.trigger)
|
||||
assert sig.parameters["idempotency_key"].default is None
|
||||
|
||||
def test_trigger_and_wait_accepts_idempotency_key(self):
|
||||
"""trigger_and_wait() also accepts idempotency_key."""
|
||||
import inspect
|
||||
|
||||
sig = inspect.signature(AgentRuntime.trigger_and_wait)
|
||||
assert "idempotency_key" in sig.parameters
|
||||
|
||||
def test_trigger_and_wait_idempotency_key_defaults_to_none(self):
|
||||
"""trigger_and_wait() idempotency_key defaults to None."""
|
||||
import inspect
|
||||
|
||||
sig = inspect.signature(AgentRuntime.trigger_and_wait)
|
||||
assert sig.parameters["idempotency_key"].default is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_duplicate_key_returns_cached_id(self):
|
||||
"""Same idempotency key within TTL returns the cached execution ID."""
|
||||
runtime = _make_runtime_with_stream()
|
||||
|
||||
first = await runtime.trigger("webhook", {}, idempotency_key="stripe-evt-001")
|
||||
second = await runtime.trigger("webhook", {}, idempotency_key="stripe-evt-001")
|
||||
|
||||
assert first == second
|
||||
assert first == "session-0001"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_different_keys_produce_different_ids(self):
|
||||
"""Different idempotency keys start separate executions."""
|
||||
runtime = _make_runtime_with_stream()
|
||||
|
||||
id_a = await runtime.trigger("webhook", {}, idempotency_key="evt-aaa")
|
||||
id_b = await runtime.trigger("webhook", {}, idempotency_key="evt-bbb")
|
||||
|
||||
assert id_a != id_b
|
||||
assert id_a == "session-0001"
|
||||
assert id_b == "session-0002"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_none_key_always_starts_new_execution(self):
|
||||
"""key=None (default) skips dedup — every call starts fresh."""
|
||||
runtime = _make_runtime_with_stream()
|
||||
|
||||
id_1 = await runtime.trigger("webhook", {})
|
||||
id_2 = await runtime.trigger("webhook", {})
|
||||
|
||||
assert id_1 != id_2
|
||||
assert len(runtime._idempotency_keys) == 0 # nothing cached
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_expired_key_allows_new_execution(self):
|
||||
"""After TTL expires, the same key starts a new execution."""
|
||||
runtime = _make_runtime_with_stream(ttl=0.1)
|
||||
|
||||
first = await runtime.trigger("webhook", {}, idempotency_key="evt-expire")
|
||||
|
||||
# Backdate the cached timestamp so the key looks expired
|
||||
runtime._idempotency_times["evt-expire"] = time.time() - 1.0
|
||||
|
||||
second = await runtime.trigger("webhook", {}, idempotency_key="evt-expire")
|
||||
|
||||
assert first != second
|
||||
assert first == "session-0001"
|
||||
assert second == "session-0002"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stream_not_found_does_not_cache(self):
|
||||
"""If entry point doesn't exist, nothing is cached."""
|
||||
runtime = _make_runtime_with_stream()
|
||||
|
||||
with pytest.raises(ValueError, match="not found"):
|
||||
await runtime.trigger("nonexistent", {}, idempotency_key="evt-orphan")
|
||||
|
||||
assert "evt-orphan" not in runtime._idempotency_keys
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_error_does_not_cache(self):
|
||||
"""If stream.execute() raises, nothing is cached so retries can go through."""
|
||||
runtime = _make_runtime()
|
||||
|
||||
failing_stream = MagicMock()
|
||||
failing_stream.execute = AsyncMock(side_effect=RuntimeError("stream not running"))
|
||||
runtime._streams = {"webhook": failing_stream}
|
||||
runtime._entry_points = {"webhook": MagicMock()}
|
||||
|
||||
with pytest.raises(RuntimeError, match="stream not running"):
|
||||
await runtime.trigger("webhook", {}, idempotency_key="evt-123")
|
||||
|
||||
assert "evt-123" not in runtime._idempotency_keys
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_cache_holds_real_execution_id(self):
|
||||
"""Cached value matches the actual execution ID from execute()."""
|
||||
runtime = _make_runtime_with_stream()
|
||||
|
||||
exec_id = await runtime.trigger("webhook", {}, idempotency_key="evt-real")
|
||||
|
||||
cached = runtime._idempotency_keys.get("evt-real")
|
||||
assert cached == exec_id
|
||||
assert cached == "session-0001"
|
||||
@@ -23,6 +23,7 @@ class Checkpoint(BaseModel):
|
||||
checkpoint_id: str # Format: cp_{type}_{node_id}_{timestamp}
|
||||
checkpoint_type: str # "node_start" | "node_complete" | "loop_iteration"
|
||||
session_id: str
|
||||
run_id: str | None = None
|
||||
|
||||
# Timestamps
|
||||
created_at: str # ISO 8601 format
|
||||
@@ -33,7 +34,7 @@ class Checkpoint(BaseModel):
|
||||
execution_path: list[str] = Field(default_factory=list) # Nodes executed so far
|
||||
|
||||
# State snapshots
|
||||
shared_memory: dict[str, Any] = Field(default_factory=dict) # Full SharedMemory._data
|
||||
data_buffer: dict[str, Any] = Field(default_factory=dict) # Full DataBuffer._data
|
||||
accumulated_outputs: dict[str, Any] = Field(default_factory=dict) # Outputs accumulated so far
|
||||
|
||||
# Execution metrics (for resuming quality tracking)
|
||||
@@ -50,9 +51,10 @@ class Checkpoint(BaseModel):
|
||||
cls,
|
||||
checkpoint_type: str,
|
||||
session_id: str,
|
||||
run_id: str | None,
|
||||
current_node: str,
|
||||
execution_path: list[str],
|
||||
shared_memory: dict[str, Any],
|
||||
data_buffer: dict[str, Any],
|
||||
next_node: str | None = None,
|
||||
accumulated_outputs: dict[str, Any] | None = None,
|
||||
metrics_snapshot: dict[str, Any] | None = None,
|
||||
@@ -65,9 +67,10 @@ class Checkpoint(BaseModel):
|
||||
Args:
|
||||
checkpoint_type: Type of checkpoint (node_start, node_complete, etc.)
|
||||
session_id: Session this checkpoint belongs to
|
||||
run_id: Logical run this checkpoint belongs to
|
||||
current_node: Node ID at checkpoint time
|
||||
execution_path: List of node IDs executed so far
|
||||
shared_memory: Full memory state snapshot
|
||||
data_buffer: Full data buffer state snapshot
|
||||
next_node: Next node to execute (for node_complete checkpoints)
|
||||
accumulated_outputs: Outputs accumulated so far
|
||||
metrics_snapshot: Execution metrics at checkpoint time
|
||||
@@ -87,11 +90,12 @@ class Checkpoint(BaseModel):
|
||||
checkpoint_id=checkpoint_id,
|
||||
checkpoint_type=checkpoint_type,
|
||||
session_id=session_id,
|
||||
run_id=run_id,
|
||||
created_at=datetime.now().isoformat(),
|
||||
current_node=current_node,
|
||||
next_node=next_node,
|
||||
execution_path=execution_path,
|
||||
shared_memory=shared_memory,
|
||||
data_buffer=data_buffer,
|
||||
accumulated_outputs=accumulated_outputs or {},
|
||||
metrics_snapshot=metrics_snapshot or {},
|
||||
is_clean=is_clean,
|
||||
|
||||
@@ -9,7 +9,7 @@ from datetime import datetime
|
||||
from enum import StrEnum
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from pydantic import BaseModel, Field, computed_field
|
||||
from pydantic import AliasChoices, BaseModel, Field, computed_field
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from framework.graph.executor import ExecutionResult
|
||||
@@ -119,8 +119,11 @@ class SessionState(BaseModel):
|
||||
# Result
|
||||
result: SessionResult = Field(default_factory=SessionResult)
|
||||
|
||||
# Memory (for resumability)
|
||||
memory: dict[str, Any] = Field(default_factory=dict)
|
||||
# Data buffer (for resumability)
|
||||
data_buffer: dict[str, Any] = Field(
|
||||
default_factory=dict,
|
||||
validation_alias=AliasChoices("data_buffer", "memory"),
|
||||
)
|
||||
|
||||
# Metrics
|
||||
metrics: SessionMetrics = Field(default_factory=SessionMetrics)
|
||||
@@ -133,6 +136,7 @@ class SessionState(BaseModel):
|
||||
|
||||
# Input data (for debugging/replay)
|
||||
input_data: dict[str, Any] = Field(default_factory=dict)
|
||||
current_run_id: str | None = None
|
||||
|
||||
# Process ID of the owning process (for cross-process stale session detection)
|
||||
pid: int | None = None
|
||||
@@ -153,6 +157,16 @@ class SessionState(BaseModel):
|
||||
|
||||
model_config = {"extra": "allow"}
|
||||
|
||||
@property
|
||||
def memory(self) -> dict[str, Any]:
|
||||
"""Backward-compatible alias for legacy callers."""
|
||||
return self.data_buffer
|
||||
|
||||
@memory.setter
|
||||
def memory(self, value: dict[str, Any]) -> None:
|
||||
"""Backward-compatible alias for legacy callers."""
|
||||
self.data_buffer = value
|
||||
|
||||
@computed_field
|
||||
@property
|
||||
def duration_ms(self) -> int:
|
||||
@@ -168,11 +182,10 @@ class SessionState(BaseModel):
|
||||
def is_resumable(self) -> bool:
|
||||
"""Can this session be resumed?
|
||||
|
||||
Every non-completed session is resumable. If resume_from/paused_at
|
||||
aren't set, the executor falls back to the graph entry point —
|
||||
so we don't gate on those. Even catastrophic failures are resumable.
|
||||
Only sessions with a valid checkpoint can be resumed.
|
||||
State-based resume (without a checkpoint) is no longer supported.
|
||||
"""
|
||||
return self.status != SessionStatus.COMPLETED
|
||||
return self.is_resumable_from_checkpoint
|
||||
|
||||
@computed_field
|
||||
@property
|
||||
@@ -243,7 +256,7 @@ class SessionState(BaseModel):
|
||||
error=result.error,
|
||||
output=result.output,
|
||||
),
|
||||
memory=result.session_state.get("memory", {}) if result.session_state else {},
|
||||
data_buffer=result.session_state.get("data_buffer", result.session_state.get("memory", {})) if result.session_state else {},
|
||||
input_data=input_data or {},
|
||||
)
|
||||
|
||||
@@ -293,7 +306,11 @@ class SessionState(BaseModel):
|
||||
)
|
||||
|
||||
def to_session_state_dict(self) -> dict[str, Any]:
|
||||
"""Convert to session_state format for GraphExecutor.execute()."""
|
||||
"""Convert to session_state format for GraphExecutor.execute().
|
||||
|
||||
NOTE: state-based resume via paused_at/resume_from is deprecated.
|
||||
Use checkpoint-based resume (``resume_from_checkpoint`` key) instead.
|
||||
"""
|
||||
# Derive resume target: explicit > last node in path > entry point
|
||||
resume_from = (
|
||||
self.progress.resume_from
|
||||
@@ -303,7 +320,7 @@ class SessionState(BaseModel):
|
||||
return {
|
||||
"paused_at": resume_from,
|
||||
"resume_from": resume_from,
|
||||
"memory": self.memory,
|
||||
"data_buffer": self.data_buffer,
|
||||
"execution_path": self.progress.path,
|
||||
"node_visit_counts": self.progress.node_visit_counts,
|
||||
}
|
||||
|
||||
@@ -4,14 +4,14 @@ HTTP API backend for the Hive agent framework. Built on **aiohttp**, fully async
|
||||
|
||||
## Architecture
|
||||
|
||||
Sessions are the primary entity. A session owns an EventBus + LLM and always has a queen executor. Workers are optional — they can be loaded into and unloaded from a session at any time.
|
||||
Sessions are the primary entity. A session owns an EventBus + LLM and always has a queen executor. Graphs are optional and can be loaded into and unloaded from a session at any time.
|
||||
|
||||
```
|
||||
Session {
|
||||
event_bus # owned by session, shared with queen + worker
|
||||
event_bus # owned by session, shared with queen + graph
|
||||
llm # owned by session
|
||||
queen_executor # always present
|
||||
worker_runtime? # optional — loaded/unloaded independently
|
||||
graph_runtime? # optional — loaded/unloaded independently
|
||||
}
|
||||
```
|
||||
|
||||
@@ -20,9 +20,9 @@ Session {
|
||||
```
|
||||
server/
|
||||
├── app.py # Application factory, middleware, static serving
|
||||
├── session_manager.py # Session lifecycle (create/load worker/unload/stop)
|
||||
├── session_manager.py # Session lifecycle (create/load graph/unload/stop)
|
||||
├── sse.py # Server-Sent Events helper
|
||||
├── routes_sessions.py # Session lifecycle, info, worker-session browsing, discovery
|
||||
├── routes_sessions.py # Session lifecycle, info, and discovery
|
||||
├── routes_execution.py # Trigger, inject, chat, stop, resume, replay
|
||||
├── routes_events.py # SSE event streaming
|
||||
├── routes_graphs.py # Graph topology & node inspection
|
||||
@@ -48,16 +48,16 @@ server/
|
||||
|
||||
Manages `Session` objects. Key methods:
|
||||
|
||||
- **`create_session()`** — creates EventBus + LLM, starts queen (no worker)
|
||||
- **`create_session_with_worker()`** — one-step: session + worker + judge
|
||||
- **`load_worker()`** — loads agent into existing session, starts judge
|
||||
- **`unload_worker()`** — removes worker + judge, queen stays alive
|
||||
- **`stop_session()`** — tears down everything (worker + queen)
|
||||
- **`create_session()`** — creates EventBus + LLM, starts queen (no graph)
|
||||
- **`create_session_with_worker_graph()`** — one-step: session + graph + judge
|
||||
- **`load_graph()`** — loads agent into existing session, starts judge
|
||||
- **`unload_graph()`** — removes graph + judge, queen stays alive
|
||||
- **`stop_session()`** — tears down everything (graph + queen)
|
||||
|
||||
Three-conversation model:
|
||||
1. **Queen** — persistent interactive executor for user chat (always present)
|
||||
2. **Worker** — `AgentRuntime` that executes graphs (optional)
|
||||
3. **Judge** — timer-driven background executor for health monitoring (active when worker is loaded)
|
||||
3. **Judge** — timer-driven background executor for health monitoring (active when a graph is loaded)
|
||||
|
||||
### `sse.py` — SSE Helper
|
||||
|
||||
@@ -81,23 +81,23 @@ Returns agents grouped by category with metadata (name, description, node count,
|
||||
|--------|-------|-------------|
|
||||
| `POST` | `/api/sessions` | Create a session |
|
||||
| `GET` | `/api/sessions` | List all active sessions |
|
||||
| `GET` | `/api/sessions/{session_id}` | Session detail (includes entry points + graphs if worker loaded) |
|
||||
| `GET` | `/api/sessions/{session_id}` | Session detail (includes entry points + graphs if a graph is loaded) |
|
||||
| `DELETE` | `/api/sessions/{session_id}` | Stop session entirely |
|
||||
|
||||
**Create session** has two modes:
|
||||
|
||||
```jsonc
|
||||
// Queen-only session (no worker)
|
||||
// Queen-only session (no graph)
|
||||
POST /api/sessions
|
||||
{}
|
||||
// or with custom ID:
|
||||
{ "session_id": "my-custom-id" }
|
||||
|
||||
// Session with worker (one-step)
|
||||
// Session with graph (one-step)
|
||||
POST /api/sessions
|
||||
{
|
||||
"agent_path": "exports/my-agent",
|
||||
"agent_id": "custom-worker-name", // optional
|
||||
"agent_id": "custom-graph-name", // optional
|
||||
"model": "claude-sonnet-4-20250514" // optional
|
||||
}
|
||||
```
|
||||
@@ -108,24 +108,24 @@ POST /api/sessions
|
||||
|
||||
**Get session** returns `202` with `{"loading": true}` while loading, `404` if not found.
|
||||
|
||||
### Worker Lifecycle
|
||||
### Graph Lifecycle
|
||||
|
||||
| Method | Route | Description |
|
||||
|--------|-------|-------------|
|
||||
| `POST` | `/api/sessions/{session_id}/worker` | Load a worker into session |
|
||||
| `DELETE` | `/api/sessions/{session_id}/worker` | Unload worker (queen stays alive) |
|
||||
| `POST` | `/api/sessions/{session_id}/graph` | Load a graph into session |
|
||||
| `DELETE` | `/api/sessions/{session_id}/graph` | Unload graph (queen stays alive) |
|
||||
|
||||
```jsonc
|
||||
// Load worker into existing session
|
||||
POST /api/sessions/{session_id}/worker
|
||||
// Load graph into existing session
|
||||
POST /api/sessions/{session_id}/graph
|
||||
{
|
||||
"agent_path": "exports/my-agent",
|
||||
"worker_id": "custom-name", // optional
|
||||
"graph_id": "custom-name", // optional
|
||||
"model": "..." // optional
|
||||
}
|
||||
|
||||
// Unload worker
|
||||
DELETE /api/sessions/{session_id}/worker
|
||||
// Unload graph
|
||||
DELETE /api/sessions/{session_id}/graph
|
||||
```
|
||||
|
||||
### Execution Control
|
||||
@@ -152,10 +152,10 @@ POST /api/sessions/{session_id}/trigger
|
||||
// Returns: { "execution_id": "..." }
|
||||
```
|
||||
|
||||
**Chat** routes messages with priority:
|
||||
1. Worker awaiting input -> inject into worker node
|
||||
2. Queen active -> inject into queen conversation
|
||||
3. Neither available -> 503
|
||||
**Chat** always delivers messages to the queen conversation.
|
||||
Worker-originated questions are still shown in the UI, but the user's reply
|
||||
is mediated by the queen, which can then relay it to the blocked worker via
|
||||
`inject_message()` when appropriate.
|
||||
|
||||
```jsonc
|
||||
POST /api/sessions/{session_id}/chat
|
||||
@@ -206,7 +206,7 @@ GET /api/sessions/{session_id}/events?types=CLIENT_OUTPUT_DELTA,EXECUTION_COMPLE
|
||||
|
||||
Keepalive ping every 15s. Streams from the session's EventBus (covers both queen and worker events).
|
||||
|
||||
Default event types: `CLIENT_OUTPUT_DELTA`, `CLIENT_INPUT_REQUESTED`, `LLM_TEXT_DELTA`, `TOOL_CALL_STARTED`, `TOOL_CALL_COMPLETED`, `EXECUTION_STARTED`, `EXECUTION_COMPLETED`, `EXECUTION_FAILED`, `EXECUTION_PAUSED`, `NODE_LOOP_STARTED`, `NODE_LOOP_ITERATION`, `NODE_LOOP_COMPLETED`, `NODE_ACTION_PLAN`, `EDGE_TRAVERSED`, `GOAL_PROGRESS`, `QUEEN_INTERVENTION_REQUESTED`, `WORKER_ESCALATION_TICKET`, `NODE_INTERNAL_OUTPUT`, `NODE_STALLED`, `NODE_RETRY`, `NODE_TOOL_DOOM_LOOP`, `CONTEXT_COMPACTED`, `WORKER_LOADED`.
|
||||
Default event types: `CLIENT_OUTPUT_DELTA`, `CLIENT_INPUT_REQUESTED`, `LLM_TEXT_DELTA`, `TOOL_CALL_STARTED`, `TOOL_CALL_COMPLETED`, `EXECUTION_STARTED`, `EXECUTION_COMPLETED`, `EXECUTION_FAILED`, `EXECUTION_PAUSED`, `NODE_LOOP_STARTED`, `NODE_LOOP_ITERATION`, `NODE_LOOP_COMPLETED`, `NODE_ACTION_PLAN`, `EDGE_TRAVERSED`, `GOAL_PROGRESS`, `NODE_INTERNAL_OUTPUT`, `NODE_STALLED`, `NODE_RETRY`, `NODE_TOOL_DOOM_LOOP`, `CONTEXT_COMPACTED`, `WORKER_GRAPH_LOADED`.
|
||||
|
||||
### Session Info
|
||||
|
||||
@@ -254,25 +254,6 @@ GET .../nodes/{node_id}/logs?session_id=ws_id&level=all
|
||||
|
||||
Log levels: `summary` (run stats), `details` (per-node execution), `tools` (tool calls + LLM text).
|
||||
|
||||
### Worker Session Browsing
|
||||
|
||||
Browse persisted execution runs on disk.
|
||||
|
||||
| Method | Route | Description |
|
||||
|--------|-------|-------------|
|
||||
| `GET` | `/api/sessions/{session_id}/worker-sessions` | List worker sessions |
|
||||
| `GET` | `/api/sessions/{session_id}/worker-sessions/{ws_id}` | Worker session state |
|
||||
| `DELETE` | `/api/sessions/{session_id}/worker-sessions/{ws_id}` | Delete worker session |
|
||||
| `GET` | `/api/sessions/{session_id}/worker-sessions/{ws_id}/checkpoints` | List checkpoints |
|
||||
| `POST` | `/api/sessions/{session_id}/worker-sessions/{ws_id}/checkpoints/{cp_id}/restore` | Restore from checkpoint |
|
||||
| `GET` | `/api/sessions/{session_id}/worker-sessions/{ws_id}/messages` | Get conversation messages |
|
||||
|
||||
**Messages** support filtering:
|
||||
```
|
||||
GET .../messages?node_id=gather_info # filter by node
|
||||
GET .../messages?client_only=true # only user inputs + client-facing assistant outputs
|
||||
```
|
||||
|
||||
### Credentials
|
||||
|
||||
| Method | Route | Description |
|
||||
|
||||
@@ -94,29 +94,6 @@ def sessions_dir(session: Session) -> Path:
|
||||
return Path.home() / ".hive" / "agents" / agent_name / "sessions"
|
||||
|
||||
|
||||
def cold_sessions_dir(session_id: str) -> Path | None:
|
||||
"""Resolve the worker sessions directory from disk for a cold/stopped session.
|
||||
|
||||
Reads agent_path from the queen session's meta.json to find the agent name,
|
||||
then returns ~/.hive/agents/{agent_name}/sessions/.
|
||||
Returns None if meta.json is missing or has no agent_path.
|
||||
"""
|
||||
import json
|
||||
|
||||
meta_path = Path.home() / ".hive" / "queen" / "session" / session_id / "meta.json"
|
||||
if not meta_path.exists():
|
||||
return None
|
||||
try:
|
||||
meta = json.loads(meta_path.read_text(encoding="utf-8"))
|
||||
agent_path = meta.get("agent_path")
|
||||
if not agent_path:
|
||||
return None
|
||||
agent_name = Path(agent_path).name
|
||||
return Path.home() / ".hive" / "agents" / agent_name / "sessions"
|
||||
except (json.JSONDecodeError, OSError):
|
||||
return None
|
||||
|
||||
|
||||
# Allowed CORS origins (localhost on any port)
|
||||
_CORS_ORIGINS = {"http://localhost", "http://127.0.0.1"}
|
||||
|
||||
@@ -183,11 +160,42 @@ async def handle_health(request: web.Request) -> web.Response:
|
||||
{
|
||||
"status": "ok",
|
||||
"sessions": len(sessions),
|
||||
"agents_loaded": sum(1 for s in sessions if s.worker_runtime is not None),
|
||||
"agents_loaded": sum(1 for s in sessions if s.graph_runtime is not None),
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
async def handle_browser_status(request: web.Request) -> web.Response:
|
||||
"""GET /api/browser/status — proxy the GCU bridge status check server-side.
|
||||
|
||||
Checks http://127.0.0.1:9230/status so the browser never makes a
|
||||
cross-origin request that would log ERR_CONNECTION_REFUSED in the console.
|
||||
"""
|
||||
import asyncio
|
||||
|
||||
bridge_port = int(os.environ.get("HIVE_BRIDGE_PORT", "9229"))
|
||||
status_port = bridge_port + 1
|
||||
|
||||
try:
|
||||
reader, writer = await asyncio.wait_for(
|
||||
asyncio.open_connection("127.0.0.1", status_port), timeout=0.5
|
||||
)
|
||||
writer.write(b"GET /status HTTP/1.0\r\nHost: 127.0.0.1\r\n\r\n")
|
||||
await writer.drain()
|
||||
raw = await asyncio.wait_for(reader.read(512), timeout=0.5)
|
||||
writer.close()
|
||||
# Parse JSON body after the blank line
|
||||
if b"\r\n\r\n" in raw:
|
||||
body = raw.split(b"\r\n\r\n", 1)[1]
|
||||
import json
|
||||
data = json.loads(body)
|
||||
return web.json_response({"bridge": True, "connected": data.get("connected", False)})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return web.json_response({"bridge": False, "connected": False})
|
||||
|
||||
|
||||
def create_app(model: str | None = None) -> web.Application:
|
||||
"""Create and configure the aiohttp Application.
|
||||
|
||||
@@ -233,6 +241,7 @@ def create_app(model: str | None = None) -> web.Application:
|
||||
|
||||
# Health check
|
||||
app.router.add_get("/api/health", handle_health)
|
||||
app.router.add_get("/api/browser/status", handle_browser_status)
|
||||
|
||||
# Register route modules
|
||||
from framework.server.routes_credentials import register_routes as register_credential_routes
|
||||
|
||||
@@ -36,6 +36,7 @@ async def create_queen(
|
||||
)
|
||||
from framework.agents.queen.nodes import (
|
||||
_QUEEN_BUILDING_TOOLS,
|
||||
_QUEEN_EDITING_TOOLS,
|
||||
_QUEEN_PLANNING_TOOLS,
|
||||
_QUEEN_RUNNING_TOOLS,
|
||||
_QUEEN_STAGING_TOOLS,
|
||||
@@ -44,16 +45,20 @@ async def create_queen(
|
||||
_planning_knowledge,
|
||||
_queen_behavior_always,
|
||||
_queen_behavior_building,
|
||||
_queen_behavior_editing,
|
||||
_queen_behavior_planning,
|
||||
_queen_behavior_running,
|
||||
_queen_behavior_staging,
|
||||
_queen_identity_building,
|
||||
_queen_identity_planning,
|
||||
_queen_identity_running,
|
||||
_queen_identity_staging,
|
||||
_queen_character_core,
|
||||
_queen_identity_editing,
|
||||
_queen_phase_7,
|
||||
_queen_role_building,
|
||||
_queen_role_planning,
|
||||
_queen_role_running,
|
||||
_queen_role_staging,
|
||||
_queen_style,
|
||||
_queen_tools_building,
|
||||
_queen_tools_editing,
|
||||
_queen_tools_planning,
|
||||
_queen_tools_running,
|
||||
_queen_tools_staging,
|
||||
@@ -70,8 +75,6 @@ async def create_queen(
|
||||
QueenPhaseState,
|
||||
register_queen_lifecycle_tools,
|
||||
)
|
||||
from framework.tools.queen_memory_tools import register_queen_memory_tools
|
||||
|
||||
hive_home = Path.home() / ".hive"
|
||||
|
||||
# ---- Tool registry ------------------------------------------------
|
||||
@@ -141,19 +144,14 @@ async def create_queen(
|
||||
phase_state=phase_state,
|
||||
)
|
||||
|
||||
# ---- Episodic memory tools (always registered) ---------------------
|
||||
register_queen_memory_tools(queen_registry)
|
||||
|
||||
# ---- Monitoring tools (only when worker is loaded) ----------------
|
||||
if session.worker_runtime:
|
||||
if session.graph_runtime:
|
||||
from framework.tools.worker_monitoring_tools import register_worker_monitoring_tools
|
||||
|
||||
register_worker_monitoring_tools(
|
||||
queen_registry,
|
||||
session.event_bus,
|
||||
session.worker_path,
|
||||
stream_id="queen",
|
||||
worker_graph_id=session.worker_runtime._graph_id,
|
||||
worker_graph_id=session.graph_runtime._graph_id,
|
||||
default_session_id=session.id,
|
||||
)
|
||||
|
||||
@@ -165,6 +163,7 @@ async def create_queen(
|
||||
building_names = set(_QUEEN_BUILDING_TOOLS)
|
||||
staging_names = set(_QUEEN_STAGING_TOOLS)
|
||||
running_names = set(_QUEEN_RUNNING_TOOLS)
|
||||
editing_names = set(_QUEEN_EDITING_TOOLS)
|
||||
|
||||
registered_names = {t.name for t in queen_tools}
|
||||
missing_building = building_names - registered_names
|
||||
@@ -181,11 +180,20 @@ async def create_queen(
|
||||
phase_state.building_tools = [t for t in queen_tools if t.name in building_names]
|
||||
phase_state.staging_tools = [t for t in queen_tools if t.name in staging_names]
|
||||
phase_state.running_tools = [t for t in queen_tools if t.name in running_names]
|
||||
phase_state.editing_tools = [t for t in queen_tools if t.name in editing_names]
|
||||
|
||||
# ---- Cross-session memory ----------------------------------------
|
||||
from framework.agents.queen.queen_memory import seed_if_missing
|
||||
from framework.agents.queen.queen_memory_v2 import (
|
||||
colony_memory_dir,
|
||||
global_memory_dir,
|
||||
init_memory_dir,
|
||||
)
|
||||
|
||||
seed_if_missing()
|
||||
colony_dir = colony_memory_dir(session.id)
|
||||
global_dir = global_memory_dir()
|
||||
init_memory_dir(colony_dir, migrate_legacy=True)
|
||||
init_memory_dir(global_dir)
|
||||
phase_state.global_memory_dir = global_dir
|
||||
|
||||
# ---- Compose phase-specific prompts ------------------------------
|
||||
_orig_node = _queen_graph.nodes[0]
|
||||
@@ -199,7 +207,9 @@ async def create_queen(
|
||||
)
|
||||
|
||||
_planning_body = (
|
||||
_queen_style
|
||||
_queen_character_core
|
||||
+ _queen_role_planning
|
||||
+ _queen_style
|
||||
+ _shared_building_knowledge
|
||||
+ _queen_tools_planning
|
||||
+ _queen_behavior_always
|
||||
@@ -207,10 +217,12 @@ async def create_queen(
|
||||
+ _planning_knowledge
|
||||
+ worker_identity
|
||||
)
|
||||
phase_state.prompt_planning = _queen_identity_planning + _planning_body
|
||||
phase_state.prompt_planning = _planning_body
|
||||
|
||||
_building_body = (
|
||||
_queen_style
|
||||
_queen_character_core
|
||||
+ _queen_role_building
|
||||
+ _queen_style
|
||||
+ _shared_building_knowledge
|
||||
+ _queen_tools_building
|
||||
+ _queen_behavior_always
|
||||
@@ -220,9 +232,10 @@ async def create_queen(
|
||||
+ _appendices
|
||||
+ worker_identity
|
||||
)
|
||||
phase_state.prompt_building = _queen_identity_building + _building_body
|
||||
phase_state.prompt_building = _building_body
|
||||
phase_state.prompt_staging = (
|
||||
_queen_identity_staging
|
||||
_queen_character_core
|
||||
+ _queen_role_staging
|
||||
+ _queen_style
|
||||
+ _queen_tools_staging
|
||||
+ _queen_behavior_always
|
||||
@@ -230,15 +243,25 @@ async def create_queen(
|
||||
+ worker_identity
|
||||
)
|
||||
phase_state.prompt_running = (
|
||||
_queen_identity_running
|
||||
_queen_character_core
|
||||
+ _queen_role_running
|
||||
+ _queen_style
|
||||
+ _queen_tools_running
|
||||
+ _queen_behavior_always
|
||||
+ _queen_behavior_running
|
||||
+ worker_identity
|
||||
)
|
||||
phase_state.prompt_editing = (
|
||||
_queen_identity_editing
|
||||
+ _queen_style
|
||||
+ _queen_tools_editing
|
||||
+ _queen_behavior_always
|
||||
+ _queen_behavior_editing
|
||||
+ worker_identity
|
||||
)
|
||||
|
||||
# ---- Default skill protocols -------------------------------------
|
||||
_queen_skill_dirs: list[str] = []
|
||||
try:
|
||||
from framework.skills.manager import SkillsManager, SkillsManagerConfig
|
||||
|
||||
@@ -249,6 +272,7 @@ async def create_queen(
|
||||
_queen_skills_mgr.load()
|
||||
phase_state.protocols_prompt = _queen_skills_mgr.protocols_prompt
|
||||
phase_state.skills_catalog_prompt = _queen_skills_mgr.skills_catalog_prompt
|
||||
_queen_skill_dirs = _queen_skills_mgr.allowlisted_dirs
|
||||
except Exception:
|
||||
logger.debug("Queen skill loading failed (non-fatal)", exc_info=True)
|
||||
|
||||
@@ -257,18 +281,26 @@ async def create_queen(
|
||||
_session_event_bus = session.event_bus
|
||||
|
||||
async def _persona_hook(ctx: HookContext) -> HookResult | None:
|
||||
persona = await select_expert_persona(ctx.trigger or "", _session_llm)
|
||||
if not persona:
|
||||
from framework.agents.queen.queen_memory import format_for_injection
|
||||
|
||||
memory_context = format_for_injection()
|
||||
result = await select_expert_persona(
|
||||
ctx.trigger or "", _session_llm, memory_context=memory_context
|
||||
)
|
||||
if not result:
|
||||
return None
|
||||
# Store on phase_state so persona/style persist across dynamic prompt refreshes
|
||||
phase_state.persona_prefix = result.persona_prefix
|
||||
phase_state.style_directive = result.style_directive
|
||||
if _session_event_bus is not None:
|
||||
await _session_event_bus.publish(
|
||||
AgentEvent(
|
||||
type=EventType.QUEEN_PERSONA_SELECTED,
|
||||
stream_id="queen",
|
||||
data={"persona": persona},
|
||||
data={"persona": result.persona_prefix},
|
||||
)
|
||||
)
|
||||
return HookResult(system_prompt=persona + "\n\n" + phase_state.get_current_prompt())
|
||||
return HookResult(system_prompt=phase_state.get_current_prompt())
|
||||
|
||||
# ---- Graph preparation -------------------------------------------
|
||||
initial_prompt_text = phase_state.get_current_prompt()
|
||||
@@ -299,7 +331,9 @@ async def create_queen(
|
||||
queen_runtime = Runtime(hive_home / "queen")
|
||||
|
||||
async def _queen_loop():
|
||||
logger.debug("[_queen_loop] Starting queen loop for session %s", session.id)
|
||||
try:
|
||||
logger.debug("[_queen_loop] Creating GraphExecutor...")
|
||||
executor = GraphExecutor(
|
||||
runtime=queen_runtime,
|
||||
llm=session.llm,
|
||||
@@ -313,8 +347,12 @@ async def create_queen(
|
||||
dynamic_tools_provider=phase_state.get_current_tools,
|
||||
dynamic_prompt_provider=phase_state.get_current_prompt,
|
||||
iteration_metadata_provider=lambda: {"phase": phase_state.phase},
|
||||
skill_dirs=_queen_skill_dirs,
|
||||
protocols_prompt=phase_state.protocols_prompt,
|
||||
skills_catalog_prompt=phase_state.skills_catalog_prompt,
|
||||
)
|
||||
session.queen_executor = executor
|
||||
logger.debug("[_queen_loop] GraphExecutor created and stored in session.queen_executor")
|
||||
|
||||
# Wire inject_notification so phase switches notify the queen LLM
|
||||
async def _inject_phase_notification(content: str) -> None:
|
||||
@@ -324,7 +362,8 @@ async def create_queen(
|
||||
|
||||
phase_state.inject_notification = _inject_phase_notification
|
||||
|
||||
# Auto-switch to staging when worker execution finishes
|
||||
# Auto-switch to editing when worker execution finishes.
|
||||
# The worker stays loaded — queen can tweak config and re-run.
|
||||
async def _on_worker_done(event):
|
||||
if event.stream_id == "queen":
|
||||
return
|
||||
@@ -345,21 +384,24 @@ async def create_queen(
|
||||
"[WORKER_TERMINAL] Worker finished successfully.\n"
|
||||
f"Output:{_out}\n"
|
||||
"Report this to the user. "
|
||||
"Ask if they want to continue with another run."
|
||||
"Ask if they want to re-run with different input "
|
||||
"or tweak the configuration."
|
||||
)
|
||||
else: # EXECUTION_FAILED
|
||||
error = event.data.get("error", "Unknown error")
|
||||
notification = (
|
||||
"[WORKER_TERMINAL] Worker failed.\n"
|
||||
f"Error: {error}\n"
|
||||
"Report this to the user and help them troubleshoot."
|
||||
"Report this to the user and help them troubleshoot. "
|
||||
"You can re-run with different input or escalate to "
|
||||
"building/planning if code changes are needed."
|
||||
)
|
||||
|
||||
node = executor.node_registry.get("queen")
|
||||
if node is not None and hasattr(node, "inject_event"):
|
||||
await node.inject_event(notification)
|
||||
|
||||
await phase_state.switch_to_staging(source="auto")
|
||||
await phase_state.switch_to_editing(source="auto")
|
||||
|
||||
session.event_bus.subscribe(
|
||||
event_types=[EventType.EXECUTION_COMPLETED, EventType.EXECUTION_FAILED],
|
||||
@@ -367,18 +409,34 @@ async def create_queen(
|
||||
)
|
||||
session_manager._subscribe_worker_handoffs(session, executor)
|
||||
|
||||
# ---- Reflection + recall memory subscriptions ----------------
|
||||
from framework.agents.queen.reflection_agent import subscribe_reflection_triggers
|
||||
|
||||
_reflection_subs = await subscribe_reflection_triggers(
|
||||
session.event_bus,
|
||||
queen_dir,
|
||||
session.llm,
|
||||
memory_dir=colony_dir,
|
||||
phase_state=phase_state,
|
||||
)
|
||||
|
||||
# Store sub IDs on session for teardown.
|
||||
session.memory_reflection_subs = _reflection_subs
|
||||
|
||||
logger.info(
|
||||
"Queen starting in %s phase with %d tools: %s",
|
||||
phase_state.phase,
|
||||
len(phase_state.get_current_tools()),
|
||||
[t.name for t in phase_state.get_current_tools()],
|
||||
)
|
||||
logger.debug("[_queen_loop] Calling executor.execute()...")
|
||||
result = await executor.execute(
|
||||
graph=queen_graph,
|
||||
goal=queen_goal,
|
||||
input_data={"greeting": initial_prompt or "Session started."},
|
||||
session_state={"resume_session_id": session.id},
|
||||
)
|
||||
logger.debug("[_queen_loop] executor.execute() returned with success=%s", result.success)
|
||||
if result.success:
|
||||
logger.warning("Queen executor returned (should be forever-alive)")
|
||||
else:
|
||||
@@ -386,9 +444,14 @@ async def create_queen(
|
||||
"Queen executor failed: %s",
|
||||
result.error or "(no error message)",
|
||||
)
|
||||
except Exception:
|
||||
logger.error("Queen conversation crashed", exc_info=True)
|
||||
except asyncio.CancelledError:
|
||||
logger.info("[_queen_loop] Queen loop cancelled (normal shutdown)")
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.exception("[_queen_loop] Queen conversation crashed: %s", e)
|
||||
raise
|
||||
finally:
|
||||
logger.warning("[_queen_loop] Queen loop exiting — clearing queen_executor for session '%s'", session.id)
|
||||
session.queen_executor = None
|
||||
|
||||
return asyncio.create_task(_queen_loop())
|
||||
|
||||
@@ -30,15 +30,13 @@ DEFAULT_EVENT_TYPES = [
|
||||
EventType.NODE_ACTION_PLAN,
|
||||
EventType.EDGE_TRAVERSED,
|
||||
EventType.GOAL_PROGRESS,
|
||||
EventType.QUEEN_INTERVENTION_REQUESTED,
|
||||
EventType.WORKER_ESCALATION_TICKET,
|
||||
EventType.NODE_INTERNAL_OUTPUT,
|
||||
EventType.NODE_STALLED,
|
||||
EventType.NODE_RETRY,
|
||||
EventType.NODE_TOOL_DOOM_LOOP,
|
||||
EventType.CONTEXT_COMPACTED,
|
||||
EventType.CONTEXT_USAGE_UPDATED,
|
||||
EventType.WORKER_LOADED,
|
||||
EventType.WORKER_GRAPH_LOADED,
|
||||
EventType.CREDENTIALS_REQUIRED,
|
||||
EventType.SUBAGENT_REPORT,
|
||||
EventType.QUEEN_PHASE_CHANGED,
|
||||
@@ -102,7 +100,7 @@ async def handle_events(request: web.Request) -> web.StreamResponse:
|
||||
"node_loop_iteration",
|
||||
"node_loop_started",
|
||||
"credentials_required",
|
||||
"worker_loaded",
|
||||
"worker_graph_loaded",
|
||||
"queen_phase_changed",
|
||||
}
|
||||
|
||||
@@ -171,10 +169,10 @@ async def handle_events(request: web.Request) -> web.StreamResponse:
|
||||
# currently running. This covers the case where the user navigated away
|
||||
# and back — the localStorage snapshot is stale, and the ring-buffer
|
||||
# replay may not include the original node_loop_started events.
|
||||
worker_runtime = getattr(session, "worker_runtime", None)
|
||||
if worker_runtime and getattr(worker_runtime, "is_running", False):
|
||||
graph_runtime = getattr(session, "graph_runtime", None)
|
||||
if graph_runtime and getattr(graph_runtime, "is_running", False):
|
||||
try:
|
||||
for stream_info in worker_runtime.get_active_streams():
|
||||
for stream_info in graph_runtime.get_active_streams():
|
||||
graph_id = stream_info.get("graph_id")
|
||||
stream_id = stream_info.get("stream_id", "default")
|
||||
for exec_id in stream_info.get("active_execution_ids", []):
|
||||
@@ -192,7 +190,7 @@ async def handle_events(request: web.Request) -> web.StreamResponse:
|
||||
pass
|
||||
|
||||
# Find the currently executing node via the executor
|
||||
for _gid, reg in worker_runtime._graphs.items():
|
||||
for _gid, reg in graph_runtime._graphs.items():
|
||||
if _gid != graph_id:
|
||||
continue
|
||||
for _ep_id, stream in reg.streams.items():
|
||||
|
||||
@@ -8,12 +8,24 @@ from typing import Any
|
||||
from aiohttp import web
|
||||
|
||||
from framework.credentials.validation import validate_agent_credentials
|
||||
from framework.graph.conversation import LEGACY_RUN_ID
|
||||
from framework.server.app import resolve_session, safe_path_segment, sessions_dir
|
||||
from framework.server.routes_sessions import _credential_error_response
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _load_checkpoint_run_id(cp_path) -> str | None:
|
||||
try:
|
||||
checkpoint = json.loads(cp_path.read_text(encoding="utf-8"))
|
||||
except (json.JSONDecodeError, OSError):
|
||||
return None
|
||||
run_id = checkpoint.get("run_id")
|
||||
if isinstance(run_id, str) and run_id:
|
||||
return run_id
|
||||
return LEGACY_RUN_ID
|
||||
|
||||
|
||||
async def handle_trigger(request: web.Request) -> web.Response:
|
||||
"""POST /api/sessions/{session_id}/trigger — start an execution.
|
||||
|
||||
@@ -23,8 +35,8 @@ async def handle_trigger(request: web.Request) -> web.Response:
|
||||
if err:
|
||||
return err
|
||||
|
||||
if not session.worker_runtime:
|
||||
return web.json_response({"error": "No worker loaded in this session"}, status=503)
|
||||
if not session.graph_runtime:
|
||||
return web.json_response({"error": "No graph loaded in this session"}, status=503)
|
||||
|
||||
# Validate credentials before running — deferred from load time to avoid
|
||||
# showing the modal before the user clicks Run. Runs in executor because
|
||||
@@ -59,7 +71,7 @@ async def handle_trigger(request: web.Request) -> web.Response:
|
||||
if "resume_session_id" not in session_state:
|
||||
session_state["resume_session_id"] = session.id
|
||||
|
||||
execution_id = await session.worker_runtime.trigger(
|
||||
execution_id = await session.graph_runtime.trigger(
|
||||
entry_point_id,
|
||||
input_data,
|
||||
session_state=session_state,
|
||||
@@ -87,8 +99,8 @@ async def handle_inject(request: web.Request) -> web.Response:
|
||||
if err:
|
||||
return err
|
||||
|
||||
if not session.worker_runtime:
|
||||
return web.json_response({"error": "No worker loaded in this session"}, status=503)
|
||||
if not session.graph_runtime:
|
||||
return web.json_response({"error": "No graph loaded in this session"}, status=503)
|
||||
|
||||
body = await request.json()
|
||||
node_id = body.get("node_id")
|
||||
@@ -98,15 +110,16 @@ async def handle_inject(request: web.Request) -> web.Response:
|
||||
if not node_id:
|
||||
return web.json_response({"error": "node_id is required"}, status=400)
|
||||
|
||||
delivered = await session.worker_runtime.inject_input(node_id, content, graph_id=graph_id)
|
||||
delivered = await session.graph_runtime.inject_input(node_id, content, graph_id=graph_id)
|
||||
return web.json_response({"delivered": delivered})
|
||||
|
||||
|
||||
async def handle_chat(request: web.Request) -> web.Response:
|
||||
"""POST /api/sessions/{session_id}/chat — send a message to the queen.
|
||||
|
||||
The input box is permanently connected to the queen agent.
|
||||
Worker input is handled separately via /worker-input.
|
||||
The input box is permanently connected to the queen agent, including
|
||||
replies to worker-originated questions. The queen decides whether to
|
||||
relay the user's answer back into the worker via inject_message().
|
||||
|
||||
Body: {"message": "hello", "images": [{"type": "image_url", "image_url": {"url": "data:..."}}]}
|
||||
|
||||
@@ -115,20 +128,52 @@ async def handle_chat(request: web.Request) -> web.Response:
|
||||
"""
|
||||
session, err = resolve_session(request)
|
||||
if err:
|
||||
logger.debug("[handle_chat] Session resolution failed: %s", err)
|
||||
return err
|
||||
|
||||
body = await request.json()
|
||||
message = body.get("message", "")
|
||||
display_message = body.get("display_message")
|
||||
image_content = body.get("images") or None # list[dict] | None
|
||||
|
||||
logger.debug("[handle_chat] session_id=%s, message_len=%d, has_images=%s",
|
||||
session.id, len(message), bool(image_content))
|
||||
logger.debug("[handle_chat] session.queen_executor=%s", session.queen_executor)
|
||||
|
||||
if not message and not image_content:
|
||||
return web.json_response({"error": "message is required"}, status=400)
|
||||
|
||||
queen_executor = session.queen_executor
|
||||
if queen_executor is not None:
|
||||
logger.debug("[handle_chat] Queen executor exists, looking for 'queen' node...")
|
||||
logger.debug("[handle_chat] node_registry type=%s, id=%s", type(queen_executor.node_registry), id(queen_executor.node_registry))
|
||||
logger.debug("[handle_chat] node_registry keys: %s", list(queen_executor.node_registry.keys()))
|
||||
node = queen_executor.node_registry.get("queen")
|
||||
logger.debug("[handle_chat] node=%s, node_type=%s", node, type(node).__name__ if node else None)
|
||||
logger.debug("[handle_chat] has_inject_event=%s", hasattr(node, "inject_event") if node else False)
|
||||
|
||||
# Race condition: executor exists but node not created yet (still initializing)
|
||||
if node is None and session.queen_task is not None and not session.queen_task.done():
|
||||
logger.warning("[handle_chat] Queen executor exists but node not ready yet (initializing). Waiting...")
|
||||
# Wait a short time for initialization to progress
|
||||
import asyncio
|
||||
for _ in range(50): # Max 5 seconds
|
||||
await asyncio.sleep(0.1)
|
||||
node = queen_executor.node_registry.get("queen")
|
||||
if node is not None:
|
||||
logger.debug("[handle_chat] Node appeared after waiting")
|
||||
break
|
||||
else:
|
||||
logger.error("[handle_chat] Node still not available after 5s wait")
|
||||
|
||||
if node is not None and hasattr(node, "inject_event"):
|
||||
await node.inject_event(message, is_client_input=True, image_content=image_content)
|
||||
try:
|
||||
logger.debug("[handle_chat] Calling node.inject_event()...")
|
||||
await node.inject_event(message, is_client_input=True, image_content=image_content)
|
||||
logger.debug("[handle_chat] inject_event() completed successfully")
|
||||
except Exception as e:
|
||||
logger.exception("[handle_chat] inject_event() failed: %s", e)
|
||||
raise
|
||||
# Publish to EventBus so the session event log captures user messages
|
||||
from framework.runtime.event_bus import AgentEvent, EventType
|
||||
|
||||
@@ -139,7 +184,9 @@ async def handle_chat(request: web.Request) -> web.Response:
|
||||
node_id="queen",
|
||||
execution_id=session.id,
|
||||
data={
|
||||
"content": message,
|
||||
# Allow the UI to display a user-friendly echo while
|
||||
# the queen receives a richer relay wrapper.
|
||||
"content": display_message if display_message is not None else message,
|
||||
"image_count": len(image_content) if image_content else 0,
|
||||
},
|
||||
)
|
||||
@@ -150,11 +197,30 @@ async def handle_chat(request: web.Request) -> web.Response:
|
||||
"delivered": True,
|
||||
}
|
||||
)
|
||||
else:
|
||||
if node is None:
|
||||
logger.error("[handle_chat] CRITICAL: Queen node is None! node_registry has %d keys: %s, queen_task=%s, queen_task_done=%s",
|
||||
len(queen_executor.node_registry), list(queen_executor.node_registry.keys()),
|
||||
session.queen_task, session.queen_task.done() if session.queen_task else None)
|
||||
else:
|
||||
logger.error("[handle_chat] CRITICAL: Queen node exists but missing inject_event! node_attrs=%s",
|
||||
[a for a in dir(node) if not a.startswith('_')])
|
||||
|
||||
# Queen is dead — try to revive her
|
||||
logger.warning(
|
||||
"[handle_chat] Queen is dead for session '%s', reviving on /chat request", session.id
|
||||
)
|
||||
manager: Any = request.app["manager"]
|
||||
try:
|
||||
await manager.revive_queen(session, initial_prompt=message)
|
||||
logger.debug("[handle_chat] Calling manager.revive_queen()...")
|
||||
await manager.revive_queen(session)
|
||||
logger.debug("[handle_chat] revive_queen() completed successfully")
|
||||
# Inject the user's message into the revived queen's queue so the
|
||||
# event loop drains it and clears any restored pending_input_state.
|
||||
_revived_executor = session.queen_executor
|
||||
_revived_node = _revived_executor.node_registry.get("queen") if _revived_executor else None
|
||||
if _revived_node is not None and hasattr(_revived_node, "inject_event"):
|
||||
await _revived_node.inject_event(message, is_client_input=True, image_content=image_content)
|
||||
return web.json_response(
|
||||
{
|
||||
"status": "queen_revived",
|
||||
@@ -162,7 +228,7 @@ async def handle_chat(request: web.Request) -> web.Response:
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("Failed to revive queen: %s", e)
|
||||
logger.exception("[handle_chat] Failed to revive queen: %s", e)
|
||||
return web.json_response({"error": "Queen not available"}, status=503)
|
||||
|
||||
|
||||
@@ -193,6 +259,10 @@ async def handle_queen_context(request: web.Request) -> web.Response:
|
||||
return web.json_response({"status": "queued", "delivered": True})
|
||||
|
||||
# Queen is dead — try to revive her
|
||||
logger.warning(
|
||||
"Queen is dead for session '%s', reviving on /queen-context request",
|
||||
session.id,
|
||||
)
|
||||
manager: Any = request.app["manager"]
|
||||
try:
|
||||
await manager.revive_queen(session)
|
||||
@@ -209,56 +279,16 @@ async def handle_queen_context(request: web.Request) -> web.Response:
|
||||
return web.json_response({"error": "Queen not available"}, status=503)
|
||||
|
||||
|
||||
async def handle_worker_input(request: web.Request) -> web.Response:
|
||||
"""POST /api/sessions/{session_id}/worker-input — send input to waiting worker node.
|
||||
|
||||
Auto-discovers the worker node currently awaiting input and injects the message.
|
||||
Returns 404 if no worker node is awaiting input.
|
||||
|
||||
Body: {"message": "..."}
|
||||
"""
|
||||
session, err = resolve_session(request)
|
||||
if err:
|
||||
return err
|
||||
|
||||
body = await request.json()
|
||||
message = body.get("message", "")
|
||||
|
||||
if not message:
|
||||
return web.json_response({"error": "message is required"}, status=400)
|
||||
|
||||
if not session.worker_runtime:
|
||||
return web.json_response({"error": "No worker loaded"}, status=503)
|
||||
|
||||
node_id, graph_id = session.worker_runtime.find_awaiting_node()
|
||||
if not node_id:
|
||||
return web.json_response({"error": "No worker node awaiting input"}, status=404)
|
||||
|
||||
delivered = await session.worker_runtime.inject_input(
|
||||
node_id,
|
||||
message,
|
||||
graph_id=graph_id,
|
||||
is_client_input=True,
|
||||
)
|
||||
return web.json_response(
|
||||
{
|
||||
"status": "injected",
|
||||
"node_id": node_id,
|
||||
"delivered": delivered,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
async def handle_goal_progress(request: web.Request) -> web.Response:
|
||||
"""GET /api/sessions/{session_id}/goal-progress — evaluate goal progress."""
|
||||
session, err = resolve_session(request)
|
||||
if err:
|
||||
return err
|
||||
|
||||
if not session.worker_runtime:
|
||||
return web.json_response({"error": "No worker loaded in this session"}, status=503)
|
||||
if not session.graph_runtime:
|
||||
return web.json_response({"error": "No graph loaded in this session"}, status=503)
|
||||
|
||||
progress = await session.worker_runtime.get_goal_progress()
|
||||
progress = await session.graph_runtime.get_goal_progress()
|
||||
return web.json_response(progress, dumps=lambda obj: json.dumps(obj, default=str))
|
||||
|
||||
|
||||
@@ -271,8 +301,8 @@ async def handle_resume(request: web.Request) -> web.Response:
|
||||
if err:
|
||||
return err
|
||||
|
||||
if not session.worker_runtime:
|
||||
return web.json_response({"error": "No worker loaded in this session"}, status=503)
|
||||
if not session.graph_runtime:
|
||||
return web.json_response({"error": "No graph loaded in this session"}, status=503)
|
||||
|
||||
body = await request.json()
|
||||
worker_session_id = body.get("session_id")
|
||||
@@ -296,30 +326,29 @@ async def handle_resume(request: web.Request) -> web.Response:
|
||||
except (json.JSONDecodeError, OSError) as e:
|
||||
return web.json_response({"error": f"Failed to read session: {e}"}, status=500)
|
||||
|
||||
if checkpoint_id:
|
||||
resume_session_state = {
|
||||
"resume_session_id": worker_session_id,
|
||||
"resume_from_checkpoint": checkpoint_id,
|
||||
}
|
||||
else:
|
||||
progress = state.get("progress", {})
|
||||
paused_at = progress.get("paused_at") or progress.get("resume_from")
|
||||
resume_session_state = {
|
||||
"resume_session_id": worker_session_id,
|
||||
"memory": state.get("memory", {}),
|
||||
"execution_path": progress.get("path", []),
|
||||
"node_visit_counts": progress.get("node_visit_counts", {}),
|
||||
}
|
||||
if paused_at:
|
||||
resume_session_state["paused_at"] = paused_at
|
||||
if not checkpoint_id:
|
||||
return web.json_response(
|
||||
{"error": "checkpoint_id is required; non-checkpoint resume is no longer supported"},
|
||||
status=400,
|
||||
)
|
||||
|
||||
entry_points = session.worker_runtime.get_entry_points()
|
||||
cp_path = session_dir / "checkpoints" / f"{checkpoint_id}.json"
|
||||
if not cp_path.exists():
|
||||
return web.json_response({"error": "Checkpoint not found"}, status=404)
|
||||
|
||||
resume_session_state = {
|
||||
"resume_session_id": worker_session_id,
|
||||
"resume_from_checkpoint": checkpoint_id,
|
||||
"run_id": _load_checkpoint_run_id(cp_path),
|
||||
}
|
||||
|
||||
entry_points = session.graph_runtime.get_entry_points()
|
||||
if not entry_points:
|
||||
return web.json_response({"error": "No entry points available"}, status=400)
|
||||
|
||||
input_data = state.get("input_data", {})
|
||||
|
||||
execution_id = await session.worker_runtime.trigger(
|
||||
execution_id = await session.graph_runtime.trigger(
|
||||
entry_points[0].id,
|
||||
input_data=input_data,
|
||||
session_state=resume_session_state,
|
||||
@@ -337,7 +366,7 @@ async def handle_resume(request: web.Request) -> web.Response:
|
||||
async def handle_pause(request: web.Request) -> web.Response:
|
||||
"""POST /api/sessions/{session_id}/pause — pause the worker (queen stays alive).
|
||||
|
||||
Mirrors the queen's stop_worker() tool: cancels all active worker
|
||||
Mirrors the queen's stop_graph() tool: cancels all active worker
|
||||
executions, pauses timers so nothing auto-restarts, but does NOT
|
||||
touch the queen so she can observe and react to the pause.
|
||||
"""
|
||||
@@ -345,10 +374,10 @@ async def handle_pause(request: web.Request) -> web.Response:
|
||||
if err:
|
||||
return err
|
||||
|
||||
if not session.worker_runtime:
|
||||
return web.json_response({"error": "No worker loaded in this session"}, status=503)
|
||||
if not session.graph_runtime:
|
||||
return web.json_response({"error": "No graph loaded in this session"}, status=503)
|
||||
|
||||
runtime = session.worker_runtime
|
||||
runtime = session.graph_runtime
|
||||
cancelled = []
|
||||
|
||||
for graph_id in runtime.list_graphs():
|
||||
@@ -397,8 +426,8 @@ async def handle_stop(request: web.Request) -> web.Response:
|
||||
if err:
|
||||
return err
|
||||
|
||||
if not session.worker_runtime:
|
||||
return web.json_response({"error": "No worker loaded in this session"}, status=503)
|
||||
if not session.graph_runtime:
|
||||
return web.json_response({"error": "No graph loaded in this session"}, status=503)
|
||||
|
||||
body = await request.json()
|
||||
execution_id = body.get("execution_id")
|
||||
@@ -406,8 +435,8 @@ async def handle_stop(request: web.Request) -> web.Response:
|
||||
if not execution_id:
|
||||
return web.json_response({"error": "execution_id is required"}, status=400)
|
||||
|
||||
for graph_id in session.worker_runtime.list_graphs():
|
||||
reg = session.worker_runtime.get_graph_registration(graph_id)
|
||||
for graph_id in session.graph_runtime.list_graphs():
|
||||
reg = session.graph_runtime.get_graph_registration(graph_id)
|
||||
if reg is None:
|
||||
continue
|
||||
for _ep_id, stream in reg.streams.items():
|
||||
@@ -452,8 +481,8 @@ async def handle_replay(request: web.Request) -> web.Response:
|
||||
if err:
|
||||
return err
|
||||
|
||||
if not session.worker_runtime:
|
||||
return web.json_response({"error": "No worker loaded in this session"}, status=503)
|
||||
if not session.graph_runtime:
|
||||
return web.json_response({"error": "No graph loaded in this session"}, status=503)
|
||||
|
||||
body = await request.json()
|
||||
worker_session_id = body.get("session_id")
|
||||
@@ -471,16 +500,17 @@ async def handle_replay(request: web.Request) -> web.Response:
|
||||
if not cp_path.exists():
|
||||
return web.json_response({"error": "Checkpoint not found"}, status=404)
|
||||
|
||||
entry_points = session.worker_runtime.get_entry_points()
|
||||
entry_points = session.graph_runtime.get_entry_points()
|
||||
if not entry_points:
|
||||
return web.json_response({"error": "No entry points available"}, status=400)
|
||||
|
||||
replay_session_state = {
|
||||
"resume_session_id": worker_session_id,
|
||||
"resume_from_checkpoint": checkpoint_id,
|
||||
"run_id": _load_checkpoint_run_id(cp_path),
|
||||
}
|
||||
|
||||
execution_id = await session.worker_runtime.trigger(
|
||||
execution_id = await session.graph_runtime.trigger(
|
||||
entry_points[0].id,
|
||||
input_data={},
|
||||
session_state=replay_session_state,
|
||||
@@ -517,7 +547,6 @@ def register_routes(app: web.Application) -> None:
|
||||
app.router.add_post("/api/sessions/{session_id}/inject", handle_inject)
|
||||
app.router.add_post("/api/sessions/{session_id}/chat", handle_chat)
|
||||
app.router.add_post("/api/sessions/{session_id}/queen-context", handle_queen_context)
|
||||
app.router.add_post("/api/sessions/{session_id}/worker-input", handle_worker_input)
|
||||
app.router.add_post("/api/sessions/{session_id}/pause", handle_pause)
|
||||
app.router.add_post("/api/sessions/{session_id}/resume", handle_resume)
|
||||
app.router.add_post("/api/sessions/{session_id}/stop", handle_stop)
|
||||
|
||||
@@ -13,9 +13,9 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
def _get_graph_registration(session, graph_id: str):
|
||||
"""Get _GraphRegistration for a graph_id. Returns (reg, None) or (None, error_response)."""
|
||||
if not session.worker_runtime:
|
||||
if not session.graph_runtime:
|
||||
return None, web.json_response({"error": "No worker loaded in this session"}, status=503)
|
||||
reg = session.worker_runtime.get_graph_registration(graph_id)
|
||||
reg = session.graph_runtime.get_graph_registration(graph_id)
|
||||
if reg is None:
|
||||
return None, web.json_response({"error": f"Graph '{graph_id}' not found"}, status=404)
|
||||
return reg, None
|
||||
@@ -101,7 +101,7 @@ async def handle_list_nodes(request: web.Request) -> web.Response:
|
||||
{"source": e.source, "target": e.target, "condition": e.condition, "priority": e.priority}
|
||||
for e in graph.edges
|
||||
]
|
||||
rt = session.worker_runtime
|
||||
rt = session.graph_runtime
|
||||
entry_points = [
|
||||
{
|
||||
"id": ep.id,
|
||||
@@ -193,8 +193,8 @@ async def handle_node_criteria(request: web.Request) -> web.Response:
|
||||
}
|
||||
|
||||
worker_session_id = request.query.get("session_id")
|
||||
if worker_session_id and session.worker_runtime:
|
||||
log_store = getattr(session.worker_runtime, "_runtime_log_store", None)
|
||||
if worker_session_id and session.graph_runtime:
|
||||
log_store = getattr(session.graph_runtime, "_runtime_log_store", None)
|
||||
if log_store:
|
||||
details = await log_store.load_details(worker_session_id)
|
||||
if details:
|
||||
|
||||
@@ -22,10 +22,10 @@ async def handle_logs(request: web.Request) -> web.Response:
|
||||
if err:
|
||||
return err
|
||||
|
||||
if not session.worker_runtime:
|
||||
if not session.graph_runtime:
|
||||
return web.json_response({"error": "No worker loaded in this session"}, status=503)
|
||||
|
||||
log_store = getattr(session.worker_runtime, "_runtime_log_store", None)
|
||||
log_store = getattr(session.graph_runtime, "_runtime_log_store", None)
|
||||
if log_store is None:
|
||||
return web.json_response({"error": "Logging not enabled for this agent"}, status=404)
|
||||
|
||||
@@ -77,10 +77,10 @@ async def handle_node_logs(request: web.Request) -> web.Response:
|
||||
|
||||
node_id = request.match_info["node_id"]
|
||||
|
||||
if not session.worker_runtime:
|
||||
if not session.graph_runtime:
|
||||
return web.json_response({"error": "No worker loaded in this session"}, status=503)
|
||||
|
||||
log_store = getattr(session.worker_runtime, "_runtime_log_store", None)
|
||||
log_store = getattr(session.graph_runtime, "_runtime_log_store", None)
|
||||
if log_store is None:
|
||||
return web.json_response({"error": "Logging not enabled"}, status=404)
|
||||
|
||||
|
||||
@@ -1,26 +1,18 @@
|
||||
"""Session lifecycle, info, and worker-session browsing routes.
|
||||
"""Session lifecycle and session info routes.
|
||||
|
||||
Session-primary routes:
|
||||
- POST /api/sessions — create session (with or without worker)
|
||||
- GET /api/sessions — list all active sessions
|
||||
- GET /api/sessions/{session_id} — session detail
|
||||
- DELETE /api/sessions/{session_id} — stop session entirely
|
||||
- POST /api/sessions/{session_id}/worker — load a worker into session
|
||||
- DELETE /api/sessions/{session_id}/worker — unload worker from session
|
||||
- POST /api/sessions/{session_id}/graph — load a graph into session
|
||||
- DELETE /api/sessions/{session_id}/graph — unload graph from session
|
||||
- GET /api/sessions/{session_id}/stats — runtime statistics
|
||||
- GET /api/sessions/{session_id}/entry-points — list entry points
|
||||
- PATCH /api/sessions/{session_id}/triggers/{id} — update trigger task
|
||||
- GET /api/sessions/{session_id}/graphs — list graph IDs
|
||||
- GET /api/sessions/{session_id}/events/history — persisted eventbus log (for replay)
|
||||
|
||||
Worker session browsing (persisted execution runs on disk):
|
||||
- GET /api/sessions/{session_id}/worker-sessions — list
|
||||
- GET /api/sessions/{session_id}/worker-sessions/{ws_id} — detail
|
||||
- DELETE /api/sessions/{session_id}/worker-sessions/{ws_id} — delete
|
||||
- GET /api/sessions/{session_id}/worker-sessions/{ws_id}/checkpoints — list CPs
|
||||
- POST /api/sessions/{session_id}/worker-sessions/{ws_id}/checkpoints/{cp}/restore
|
||||
- GET /api/sessions/{session_id}/worker-sessions/{ws_id}/messages — messages
|
||||
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
@@ -36,10 +28,7 @@ from pathlib import Path
|
||||
from aiohttp import web
|
||||
|
||||
from framework.server.app import (
|
||||
cold_sessions_dir,
|
||||
resolve_session,
|
||||
safe_path_segment,
|
||||
sessions_dir,
|
||||
validate_agent_path,
|
||||
)
|
||||
from framework.server.session_manager import SessionManager
|
||||
@@ -60,9 +49,9 @@ def _session_to_live_dict(session) -> dict:
|
||||
queen_model: str = getattr(getattr(session, "runner", None), "model", "") or ""
|
||||
return {
|
||||
"session_id": session.id,
|
||||
"worker_id": session.worker_id,
|
||||
"worker_name": info.name if info else session.worker_id,
|
||||
"has_worker": session.worker_runtime is not None,
|
||||
"graph_id": session.graph_id,
|
||||
"graph_name": info.name if info else session.graph_id,
|
||||
"has_worker": session.graph_runtime is not None,
|
||||
"agent_path": str(session.worker_path) if session.worker_path else "",
|
||||
"description": info.description if info else "",
|
||||
"goal": info.goal_name if info else "",
|
||||
@@ -72,7 +61,7 @@ def _session_to_live_dict(session) -> dict:
|
||||
"intro_message": getattr(session.runner, "intro_message", "") or "",
|
||||
"queen_phase": phase_state.phase
|
||||
if phase_state
|
||||
else ("staging" if session.worker_runtime else "planning"),
|
||||
else ("staging" if session.graph_runtime else "planning"),
|
||||
"queen_supports_images": supports_image_tool_results(queen_model) if queen_model else True,
|
||||
}
|
||||
|
||||
@@ -118,16 +107,16 @@ async def handle_create_session(request: web.Request) -> web.Response:
|
||||
"""POST /api/sessions — create a session.
|
||||
|
||||
Body: {
|
||||
"agent_path": "..." (optional — if provided, creates session with worker),
|
||||
"agent_id": "..." (optional — worker ID override),
|
||||
"agent_path": "..." (optional — if provided, creates session with graph),
|
||||
"agent_id": "..." (optional — graph ID override),
|
||||
"session_id": "..." (optional — custom session ID),
|
||||
"model": "..." (optional),
|
||||
"initial_prompt": "..." (optional — first user message for the queen),
|
||||
}
|
||||
|
||||
When agent_path is provided, creates a session with a worker in one step
|
||||
When agent_path is provided, creates a session with a graph in one step
|
||||
(equivalent to the old POST /api/agents). Otherwise creates a queen-only
|
||||
session that can later have a worker loaded via POST /sessions/{id}/worker.
|
||||
session that can later have a graph loaded via POST /sessions/{id}/graph.
|
||||
"""
|
||||
manager = _get_manager(request)
|
||||
body = await request.json() if request.can_read_body else {}
|
||||
@@ -148,8 +137,8 @@ async def handle_create_session(request: web.Request) -> web.Response:
|
||||
|
||||
try:
|
||||
if agent_path:
|
||||
# One-step: create session + load worker
|
||||
session = await manager.create_session_with_worker(
|
||||
# One-step: create session + load graph
|
||||
session = await manager.create_session_with_worker_graph(
|
||||
agent_path,
|
||||
agent_id=agent_id,
|
||||
session_id=session_id,
|
||||
@@ -170,7 +159,7 @@ async def handle_create_session(request: web.Request) -> web.Response:
|
||||
if "currently loading" in msg:
|
||||
resolved_id = agent_id or (Path(agent_path).name if agent_path else "")
|
||||
return web.json_response(
|
||||
{"error": msg, "worker_id": resolved_id, "loading": True},
|
||||
{"error": msg, "graph_id": resolved_id, "loading": True},
|
||||
status=409,
|
||||
)
|
||||
return web.json_response({"error": msg}, status=409)
|
||||
@@ -224,8 +213,8 @@ async def handle_get_live_session(request: web.Request) -> web.Response:
|
||||
|
||||
data = _session_to_live_dict(session)
|
||||
|
||||
if session.worker_runtime:
|
||||
rt = session.worker_runtime
|
||||
if session.graph_runtime:
|
||||
rt = session.graph_runtime
|
||||
data["entry_points"] = [
|
||||
{
|
||||
"id": ep.id,
|
||||
@@ -257,7 +246,7 @@ async def handle_get_live_session(request: web.Request) -> web.Response:
|
||||
if mono is not None:
|
||||
entry["next_fire_in"] = max(0.0, mono - time.monotonic())
|
||||
data["entry_points"].append(entry)
|
||||
data["graphs"] = session.worker_runtime.list_graphs()
|
||||
data["graphs"] = session.graph_runtime.list_graphs()
|
||||
|
||||
return web.json_response(data)
|
||||
|
||||
@@ -278,14 +267,14 @@ async def handle_stop_session(request: web.Request) -> web.Response:
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Worker lifecycle
|
||||
# Graph lifecycle
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
|
||||
async def handle_load_worker(request: web.Request) -> web.Response:
|
||||
"""POST /api/sessions/{session_id}/worker — load a worker into a session.
|
||||
async def handle_load_graph(request: web.Request) -> web.Response:
|
||||
"""POST /api/sessions/{session_id}/graph — load a graph into a session.
|
||||
|
||||
Body: {"agent_path": "...", "worker_id": "..." (optional), "model": "..." (optional)}
|
||||
Body: {"agent_path": "...", "graph_id": "..." (optional), "model": "..." (optional)}
|
||||
"""
|
||||
manager = _get_manager(request)
|
||||
session_id = request.match_info["session_id"]
|
||||
@@ -300,14 +289,14 @@ async def handle_load_worker(request: web.Request) -> web.Response:
|
||||
except ValueError as e:
|
||||
return web.json_response({"error": str(e)}, status=400)
|
||||
|
||||
worker_id = body.get("worker_id")
|
||||
graph_id = body.get("graph_id")
|
||||
model = body.get("model")
|
||||
|
||||
try:
|
||||
session = await manager.load_worker(
|
||||
session = await manager.load_graph(
|
||||
session_id,
|
||||
agent_path,
|
||||
worker_id=worker_id,
|
||||
graph_id=graph_id,
|
||||
model=model,
|
||||
)
|
||||
except ValueError as e:
|
||||
@@ -318,18 +307,18 @@ async def handle_load_worker(request: web.Request) -> web.Response:
|
||||
resp = _credential_error_response(e, agent_path)
|
||||
if resp is not None:
|
||||
return resp
|
||||
logger.exception("Error loading worker: %s", e)
|
||||
logger.exception("Error loading graph: %s", e)
|
||||
return web.json_response({"error": "Internal server error"}, status=500)
|
||||
|
||||
return web.json_response(_session_to_live_dict(session))
|
||||
|
||||
|
||||
async def handle_unload_worker(request: web.Request) -> web.Response:
|
||||
"""DELETE /api/sessions/{session_id}/worker — unload worker, keep queen alive."""
|
||||
async def handle_unload_graph(request: web.Request) -> web.Response:
|
||||
"""DELETE /api/sessions/{session_id}/graph — unload graph, keep queen alive."""
|
||||
manager = _get_manager(request)
|
||||
session_id = request.match_info["session_id"]
|
||||
|
||||
removed = await manager.unload_worker(session_id)
|
||||
removed = await manager.unload_graph(session_id)
|
||||
if not removed:
|
||||
session = manager.get_session(session_id)
|
||||
if session is None:
|
||||
@@ -338,11 +327,11 @@ async def handle_unload_worker(request: web.Request) -> web.Response:
|
||||
status=404,
|
||||
)
|
||||
return web.json_response(
|
||||
{"error": "No worker loaded in this session"},
|
||||
{"error": "No graph loaded in this session"},
|
||||
status=409,
|
||||
)
|
||||
|
||||
return web.json_response({"session_id": session_id, "worker_unloaded": True})
|
||||
return web.json_response({"session_id": session_id, "graph_unloaded": True})
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
@@ -362,7 +351,7 @@ async def handle_session_stats(request: web.Request) -> web.Response:
|
||||
status=404,
|
||||
)
|
||||
|
||||
stats = session.worker_runtime.get_stats() if session.worker_runtime else {}
|
||||
stats = session.graph_runtime.get_stats() if session.graph_runtime else {}
|
||||
return web.json_response(stats)
|
||||
|
||||
|
||||
@@ -378,7 +367,7 @@ async def handle_session_entry_points(request: web.Request) -> web.Response:
|
||||
status=404,
|
||||
)
|
||||
|
||||
rt = session.worker_runtime
|
||||
rt = session.graph_runtime
|
||||
eps = rt.get_entry_points() if rt else []
|
||||
entry_points = [
|
||||
{
|
||||
@@ -580,293 +569,10 @@ async def handle_session_graphs(request: web.Request) -> web.Response:
|
||||
status=404,
|
||||
)
|
||||
|
||||
graphs = session.worker_runtime.list_graphs() if session.worker_runtime else []
|
||||
graphs = session.graph_runtime.list_graphs() if session.graph_runtime else []
|
||||
return web.json_response({"graphs": graphs})
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Worker session browsing (persisted execution runs on disk)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
|
||||
async def handle_list_worker_sessions(request: web.Request) -> web.Response:
|
||||
"""List worker sessions on disk."""
|
||||
session, err = resolve_session(request)
|
||||
if err:
|
||||
# Fall back to cold session lookup from disk
|
||||
sid = request.match_info["session_id"]
|
||||
sess_dir = cold_sessions_dir(sid)
|
||||
if sess_dir is None:
|
||||
return err
|
||||
else:
|
||||
if not session.worker_path:
|
||||
return web.json_response({"sessions": []})
|
||||
sess_dir = sessions_dir(session)
|
||||
if not sess_dir.exists():
|
||||
return web.json_response({"sessions": []})
|
||||
|
||||
sessions = []
|
||||
for d in sorted(sess_dir.iterdir(), reverse=True):
|
||||
if not d.is_dir():
|
||||
continue
|
||||
state_path = d / "state.json"
|
||||
if not d.name.startswith("session_") and not state_path.exists():
|
||||
continue
|
||||
|
||||
entry: dict = {"session_id": d.name}
|
||||
|
||||
if state_path.exists():
|
||||
try:
|
||||
state = json.loads(state_path.read_text(encoding="utf-8"))
|
||||
entry["status"] = state.get("status", "unknown")
|
||||
entry["started_at"] = state.get("started_at")
|
||||
entry["completed_at"] = state.get("completed_at")
|
||||
progress = state.get("progress", {})
|
||||
entry["steps"] = progress.get("steps_executed", 0)
|
||||
entry["paused_at"] = progress.get("paused_at")
|
||||
except (json.JSONDecodeError, OSError):
|
||||
entry["status"] = "error"
|
||||
|
||||
cp_dir = d / "checkpoints"
|
||||
if cp_dir.exists():
|
||||
entry["checkpoint_count"] = sum(1 for f in cp_dir.iterdir() if f.suffix == ".json")
|
||||
else:
|
||||
entry["checkpoint_count"] = 0
|
||||
|
||||
sessions.append(entry)
|
||||
|
||||
return web.json_response({"sessions": sessions})
|
||||
|
||||
|
||||
async def handle_get_worker_session(request: web.Request) -> web.Response:
|
||||
"""Get worker session detail from disk."""
|
||||
session, err = resolve_session(request)
|
||||
if err:
|
||||
return err
|
||||
|
||||
if not session.worker_path:
|
||||
return web.json_response({"error": "No worker loaded"}, status=503)
|
||||
|
||||
# Support both URL param names: ws_id (new) or session_id (legacy)
|
||||
ws_id = request.match_info.get("ws_id") or request.match_info.get("session_id", "")
|
||||
ws_id = safe_path_segment(ws_id)
|
||||
|
||||
state_path = sessions_dir(session) / ws_id / "state.json"
|
||||
if not state_path.exists():
|
||||
return web.json_response({"error": "Session not found"}, status=404)
|
||||
|
||||
try:
|
||||
state = json.loads(state_path.read_text(encoding="utf-8"))
|
||||
except (json.JSONDecodeError, OSError) as e:
|
||||
return web.json_response({"error": f"Failed to read session: {e}"}, status=500)
|
||||
|
||||
return web.json_response(state)
|
||||
|
||||
|
||||
async def handle_list_checkpoints(request: web.Request) -> web.Response:
|
||||
"""List checkpoints for a worker session."""
|
||||
session, err = resolve_session(request)
|
||||
if err:
|
||||
return err
|
||||
|
||||
if not session.worker_path:
|
||||
return web.json_response({"error": "No worker loaded"}, status=503)
|
||||
|
||||
ws_id = request.match_info.get("ws_id") or request.match_info.get("session_id", "")
|
||||
ws_id = safe_path_segment(ws_id)
|
||||
|
||||
cp_dir = sessions_dir(session) / ws_id / "checkpoints"
|
||||
if not cp_dir.exists():
|
||||
return web.json_response({"checkpoints": []})
|
||||
|
||||
checkpoints = []
|
||||
for f in sorted(cp_dir.iterdir(), reverse=True):
|
||||
if f.suffix != ".json":
|
||||
continue
|
||||
try:
|
||||
data = json.loads(f.read_text(encoding="utf-8"))
|
||||
checkpoints.append(
|
||||
{
|
||||
"checkpoint_id": f.stem,
|
||||
"current_node": data.get("current_node"),
|
||||
"next_node": data.get("next_node"),
|
||||
"is_clean": data.get("is_clean", False),
|
||||
"timestamp": data.get("timestamp"),
|
||||
}
|
||||
)
|
||||
except (json.JSONDecodeError, OSError):
|
||||
checkpoints.append({"checkpoint_id": f.stem, "error": "unreadable"})
|
||||
|
||||
return web.json_response({"checkpoints": checkpoints})
|
||||
|
||||
|
||||
async def handle_delete_worker_session(request: web.Request) -> web.Response:
|
||||
"""Delete a worker session from disk."""
|
||||
session, err = resolve_session(request)
|
||||
if err:
|
||||
return err
|
||||
|
||||
if not session.worker_path:
|
||||
return web.json_response({"error": "No worker loaded"}, status=503)
|
||||
|
||||
ws_id = request.match_info.get("ws_id") or request.match_info.get("session_id", "")
|
||||
ws_id = safe_path_segment(ws_id)
|
||||
|
||||
session_path = sessions_dir(session) / ws_id
|
||||
if not session_path.exists():
|
||||
return web.json_response({"error": "Session not found"}, status=404)
|
||||
|
||||
shutil.rmtree(session_path)
|
||||
return web.json_response({"deleted": ws_id})
|
||||
|
||||
|
||||
async def handle_restore_checkpoint(request: web.Request) -> web.Response:
|
||||
"""Restore from a checkpoint."""
|
||||
session, err = resolve_session(request)
|
||||
if err:
|
||||
return err
|
||||
|
||||
if not session.worker_runtime:
|
||||
return web.json_response({"error": "No worker loaded in this session"}, status=503)
|
||||
|
||||
ws_id = request.match_info.get("ws_id") or request.match_info.get("session_id", "")
|
||||
ws_id = safe_path_segment(ws_id)
|
||||
checkpoint_id = safe_path_segment(request.match_info["checkpoint_id"])
|
||||
|
||||
cp_path = sessions_dir(session) / ws_id / "checkpoints" / f"{checkpoint_id}.json"
|
||||
if not cp_path.exists():
|
||||
return web.json_response({"error": "Checkpoint not found"}, status=404)
|
||||
|
||||
entry_points = session.worker_runtime.get_entry_points()
|
||||
if not entry_points:
|
||||
return web.json_response({"error": "No entry points available"}, status=400)
|
||||
|
||||
restore_session_state = {
|
||||
"resume_session_id": ws_id,
|
||||
"resume_from_checkpoint": checkpoint_id,
|
||||
}
|
||||
|
||||
execution_id = await session.worker_runtime.trigger(
|
||||
entry_points[0].id,
|
||||
input_data={},
|
||||
session_state=restore_session_state,
|
||||
)
|
||||
|
||||
return web.json_response(
|
||||
{
|
||||
"execution_id": execution_id,
|
||||
"restored_from": ws_id,
|
||||
"checkpoint_id": checkpoint_id,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
async def handle_messages(request: web.Request) -> web.Response:
|
||||
"""Get messages for a worker session."""
|
||||
session, err = resolve_session(request)
|
||||
if err:
|
||||
# Fall back to cold session lookup from disk
|
||||
sid = request.match_info["session_id"]
|
||||
sess_dir = cold_sessions_dir(sid)
|
||||
if sess_dir is None:
|
||||
return err
|
||||
else:
|
||||
if not session.worker_path:
|
||||
return web.json_response({"error": "No worker loaded"}, status=503)
|
||||
sess_dir = sessions_dir(session)
|
||||
|
||||
ws_id = request.match_info.get("ws_id") or request.match_info.get("session_id", "")
|
||||
ws_id = safe_path_segment(ws_id)
|
||||
|
||||
convs_dir = sess_dir / ws_id / "conversations"
|
||||
if not convs_dir.exists():
|
||||
return web.json_response({"messages": []})
|
||||
|
||||
filter_node = request.query.get("node_id")
|
||||
all_messages = []
|
||||
|
||||
def _collect_msg_parts(parts_dir: Path, node_id: str) -> None:
|
||||
if not parts_dir.exists():
|
||||
return
|
||||
for part_file in sorted(parts_dir.iterdir()):
|
||||
if part_file.suffix != ".json":
|
||||
continue
|
||||
try:
|
||||
part = json.loads(part_file.read_text(encoding="utf-8"))
|
||||
part["_node_id"] = node_id
|
||||
part.setdefault("created_at", part_file.stat().st_mtime)
|
||||
all_messages.append(part)
|
||||
except (json.JSONDecodeError, OSError):
|
||||
continue
|
||||
|
||||
# Flat layout: conversations/parts/*.json
|
||||
if not filter_node:
|
||||
_collect_msg_parts(convs_dir / "parts", "worker")
|
||||
|
||||
# Node-based layout: conversations/<node_id>/parts/*.json
|
||||
for node_dir in convs_dir.iterdir():
|
||||
if not node_dir.is_dir() or node_dir.name == "parts":
|
||||
continue
|
||||
if filter_node and node_dir.name != filter_node:
|
||||
continue
|
||||
_collect_msg_parts(node_dir / "parts", node_dir.name)
|
||||
|
||||
# Merge run lifecycle markers from runs.jsonl (for historical dividers)
|
||||
runs_file = sess_dir / ws_id / "runs.jsonl"
|
||||
if runs_file.exists():
|
||||
try:
|
||||
for line in runs_file.read_text(encoding="utf-8").splitlines():
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
try:
|
||||
record = json.loads(line)
|
||||
all_messages.append(
|
||||
{
|
||||
"seq": -1,
|
||||
"role": "system",
|
||||
"content": "",
|
||||
"_node_id": "_run_marker",
|
||||
"is_run_marker": True,
|
||||
"run_id": record.get("run_id"),
|
||||
"run_event": record.get("event"),
|
||||
"created_at": record.get("created_at", 0),
|
||||
}
|
||||
)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
all_messages.sort(key=lambda m: m.get("created_at", m.get("seq", 0)))
|
||||
|
||||
client_only = request.query.get("client_only", "").lower() in ("true", "1")
|
||||
if client_only:
|
||||
client_facing_nodes: set[str] = set()
|
||||
if session and session.runner and hasattr(session.runner, "graph"):
|
||||
for node in session.runner.graph.nodes:
|
||||
if node.client_facing:
|
||||
client_facing_nodes.add(node.id)
|
||||
|
||||
if client_facing_nodes:
|
||||
all_messages = [
|
||||
m
|
||||
for m in all_messages
|
||||
if m.get("is_run_marker")
|
||||
or (
|
||||
not m.get("is_transition_marker")
|
||||
and m["role"] != "tool"
|
||||
and not (m["role"] == "assistant" and m.get("tool_calls"))
|
||||
and (
|
||||
(m["role"] == "user" and m.get("is_client_input"))
|
||||
or (m["role"] == "assistant" and m.get("_node_id") in client_facing_nodes)
|
||||
)
|
||||
)
|
||||
]
|
||||
|
||||
return web.json_response({"messages": all_messages})
|
||||
|
||||
|
||||
async def handle_session_events_history(request: web.Request) -> web.Response:
|
||||
"""GET /api/sessions/{session_id}/events/history — persisted eventbus log.
|
||||
|
||||
@@ -1026,9 +732,9 @@ def register_routes(app: web.Application) -> None:
|
||||
app.router.add_get("/api/sessions/{session_id}", handle_get_live_session)
|
||||
app.router.add_delete("/api/sessions/{session_id}", handle_stop_session)
|
||||
|
||||
# Worker lifecycle
|
||||
app.router.add_post("/api/sessions/{session_id}/worker", handle_load_worker)
|
||||
app.router.add_delete("/api/sessions/{session_id}/worker", handle_unload_worker)
|
||||
# Graph lifecycle
|
||||
app.router.add_post("/api/sessions/{session_id}/graph", handle_load_graph)
|
||||
app.router.add_delete("/api/sessions/{session_id}/graph", handle_unload_graph)
|
||||
|
||||
# Session info
|
||||
app.router.add_post("/api/sessions/{session_id}/reveal", handle_reveal_session_folder)
|
||||
@@ -1040,24 +746,3 @@ def register_routes(app: web.Application) -> None:
|
||||
app.router.add_get("/api/sessions/{session_id}/graphs", handle_session_graphs)
|
||||
|
||||
app.router.add_get("/api/sessions/{session_id}/events/history", handle_session_events_history)
|
||||
|
||||
# Worker session browsing (session-primary)
|
||||
app.router.add_get("/api/sessions/{session_id}/worker-sessions", handle_list_worker_sessions)
|
||||
app.router.add_get(
|
||||
"/api/sessions/{session_id}/worker-sessions/{ws_id}", handle_get_worker_session
|
||||
)
|
||||
app.router.add_delete(
|
||||
"/api/sessions/{session_id}/worker-sessions/{ws_id}", handle_delete_worker_session
|
||||
)
|
||||
app.router.add_get(
|
||||
"/api/sessions/{session_id}/worker-sessions/{ws_id}/checkpoints",
|
||||
handle_list_checkpoints,
|
||||
)
|
||||
app.router.add_post(
|
||||
"/api/sessions/{session_id}/worker-sessions/{ws_id}/checkpoints/{checkpoint_id}/restore",
|
||||
handle_restore_checkpoint,
|
||||
)
|
||||
app.router.add_get(
|
||||
"/api/sessions/{session_id}/worker-sessions/{ws_id}/messages",
|
||||
handle_messages,
|
||||
)
|
||||
|
||||
@@ -35,20 +35,22 @@ class Session:
|
||||
# Queen (always present once started)
|
||||
queen_executor: Any = None # GraphExecutor for queen input injection
|
||||
queen_task: asyncio.Task | None = None
|
||||
# Worker (optional)
|
||||
worker_id: str | None = None
|
||||
# Loaded graph (optional)
|
||||
graph_id: str | None = None
|
||||
worker_path: Path | None = None
|
||||
runner: Any | None = None # AgentRunner
|
||||
worker_runtime: Any | None = None # AgentRuntime
|
||||
graph_runtime: Any | None = None # AgentRuntime
|
||||
worker_info: Any | None = None # AgentInfo
|
||||
# Queen phase state (building/staging/running)
|
||||
phase_state: Any = None # QueenPhaseState
|
||||
# Worker handoff subscription
|
||||
worker_handoff_sub: str | None = None
|
||||
# Memory consolidation subscription (fires on CONTEXT_COMPACTED)
|
||||
memory_consolidation_sub: str | None = None
|
||||
# Worker run digest subscription (fires on EXECUTION_COMPLETED / EXECUTION_FAILED)
|
||||
worker_digest_sub: str | None = None
|
||||
# Memory reflection + recall subscriptions
|
||||
memory_reflection_subs: list = field(default_factory=list) # list[str]
|
||||
# Worker colony memory subscriptions
|
||||
worker_memory_subs: list = field(default_factory=list) # list[str]
|
||||
# Per-execution colony recall cache for worker prompts
|
||||
worker_colony_recall_blocks: dict[str, str] = field(default_factory=dict)
|
||||
# Trigger definitions loaded from agent's triggers.json (available but inactive)
|
||||
available_triggers: dict[str, TriggerDefinition] = field(default_factory=dict)
|
||||
# Active trigger tracking (IDs currently firing + their asyncio tasks)
|
||||
@@ -94,7 +96,7 @@ class SessionManager:
|
||||
) -> Session:
|
||||
"""Create session infrastructure (EventBus, LLM) without starting queen.
|
||||
|
||||
Internal helper — use create_session() or create_session_with_worker().
|
||||
Internal helper — use create_session() or create_session_with_worker_graph().
|
||||
"""
|
||||
from framework.config import RuntimeConfig, get_hive_config
|
||||
from framework.runtime.event_bus import EventBus
|
||||
@@ -166,7 +168,7 @@ class SessionManager:
|
||||
)
|
||||
return session
|
||||
|
||||
async def create_session_with_worker(
|
||||
async def create_session_with_worker_graph(
|
||||
self,
|
||||
agent_path: str | Path,
|
||||
agent_id: str | None = None,
|
||||
@@ -184,7 +186,7 @@ class SessionManager:
|
||||
from framework.tools.queen_lifecycle_tools import build_worker_profile
|
||||
|
||||
agent_path = Path(agent_path)
|
||||
resolved_worker_id = agent_id or agent_path.name
|
||||
resolved_graph_id = agent_id or agent_path.name
|
||||
|
||||
# When cold-restoring, check meta.json for the phase — if the agent
|
||||
# was still being built we must NOT try to load the worker (the code
|
||||
@@ -219,11 +221,11 @@ class SessionManager:
|
||||
)
|
||||
session.queen_resume_from = queen_resume_from
|
||||
try:
|
||||
# Load worker FIRST (before queen) so queen gets full tools
|
||||
# Load the graph FIRST (before queen) so queen gets full tools
|
||||
await self._load_worker_core(
|
||||
session,
|
||||
agent_path,
|
||||
worker_id=resolved_worker_id,
|
||||
graph_id=resolved_graph_id,
|
||||
model=model,
|
||||
)
|
||||
|
||||
@@ -232,8 +234,8 @@ class SessionManager:
|
||||
|
||||
# Start queen with worker profile + lifecycle + monitoring tools
|
||||
worker_identity = (
|
||||
build_worker_profile(session.worker_runtime, agent_path=agent_path)
|
||||
if session.worker_runtime
|
||||
build_worker_profile(session.graph_runtime, agent_path=agent_path)
|
||||
if session.graph_runtime
|
||||
else None
|
||||
)
|
||||
await self._start_queen(
|
||||
@@ -270,10 +272,10 @@ class SessionManager:
|
||||
self,
|
||||
session: Session,
|
||||
agent_path: str | Path,
|
||||
worker_id: str | None = None,
|
||||
graph_id: str | None = None,
|
||||
model: str | None = None,
|
||||
) -> None:
|
||||
"""Load a worker agent into a session (core logic).
|
||||
"""Load a graph into a session (core logic).
|
||||
|
||||
Sets up the runner, runtime, and session fields. Does NOT notify
|
||||
the queen — callers handle that step.
|
||||
@@ -281,30 +283,23 @@ class SessionManager:
|
||||
from framework.runner import AgentRunner
|
||||
|
||||
agent_path = Path(agent_path)
|
||||
resolved_worker_id = worker_id or agent_path.name
|
||||
resolved_graph_id = graph_id or agent_path.name
|
||||
|
||||
if session.worker_runtime is not None:
|
||||
raise ValueError(f"Session '{session.id}' already has worker '{session.worker_id}'")
|
||||
if session.graph_runtime is not None:
|
||||
raise ValueError(f"Session '{session.id}' already has graph '{session.graph_id}'")
|
||||
|
||||
async with self._lock:
|
||||
if session.id in self._loading:
|
||||
raise ValueError(f"Session '{session.id}' is currently loading a worker")
|
||||
raise ValueError(f"Session '{session.id}' is currently loading a graph")
|
||||
self._loading.add(session.id)
|
||||
|
||||
try:
|
||||
# Blocking I/O — load in executor
|
||||
loop = asyncio.get_running_loop()
|
||||
|
||||
# Prioritize: explicit model arg > worker-specific model > session default
|
||||
from framework.config import (
|
||||
get_preferred_worker_model,
|
||||
get_worker_api_base,
|
||||
get_worker_api_key,
|
||||
get_worker_llm_extra_kwargs,
|
||||
)
|
||||
|
||||
worker_model = get_preferred_worker_model()
|
||||
resolved_model = model or worker_model or self._model
|
||||
# By default, workers share the session's LLM with the queen so
|
||||
# execution and memory reflection/recall stay on the same model.
|
||||
session_model = getattr(session.llm, "model", None)
|
||||
resolved_model = model or session_model or self._model
|
||||
runner = await loop.run_in_executor(
|
||||
None,
|
||||
lambda: AgentRunner.load(
|
||||
@@ -316,29 +311,8 @@ class SessionManager:
|
||||
),
|
||||
)
|
||||
|
||||
# If a worker-specific model is configured, build an LLM provider
|
||||
# with the correct worker credentials so _setup() doesn't fall back
|
||||
# to the queen's llm config (which may be a different provider).
|
||||
if worker_model and not model:
|
||||
from framework.config import get_hive_config
|
||||
|
||||
worker_llm_cfg = get_hive_config().get("worker_llm", {})
|
||||
if worker_llm_cfg.get("use_antigravity_subscription"):
|
||||
from framework.llm.antigravity import AntigravityProvider
|
||||
|
||||
runner._llm = AntigravityProvider(model=resolved_model)
|
||||
else:
|
||||
from framework.llm.litellm import LiteLLMProvider
|
||||
|
||||
worker_api_key = get_worker_api_key()
|
||||
worker_api_base = get_worker_api_base()
|
||||
worker_extra = get_worker_llm_extra_kwargs()
|
||||
runner._llm = LiteLLMProvider(
|
||||
model=resolved_model,
|
||||
api_key=worker_api_key,
|
||||
api_base=worker_api_base,
|
||||
**worker_extra,
|
||||
)
|
||||
if model is None:
|
||||
runner._llm = session.llm
|
||||
|
||||
# Setup with session's event bus
|
||||
if runner._agent_runtime is None:
|
||||
@@ -349,6 +323,16 @@ class SessionManager:
|
||||
|
||||
runtime = runner._agent_runtime
|
||||
|
||||
if runtime is not None:
|
||||
runtime._dynamic_memory_provider_factory = (
|
||||
lambda execution_id, session=session: (
|
||||
lambda execution_id=execution_id, session=session: session.worker_colony_recall_blocks.get(
|
||||
execution_id,
|
||||
"",
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
# Load triggers from the agent's triggers.json definition file.
|
||||
from framework.tools.queen_lifecycle_tools import _read_agent_triggers_json
|
||||
|
||||
@@ -378,21 +362,30 @@ class SessionManager:
|
||||
info = runner.info()
|
||||
|
||||
# Update session
|
||||
session.worker_id = resolved_worker_id
|
||||
session.graph_id = resolved_graph_id
|
||||
session.worker_path = agent_path
|
||||
session.runner = runner
|
||||
session.worker_runtime = runtime
|
||||
session.graph_runtime = runtime
|
||||
session.worker_info = info
|
||||
|
||||
# Subscribe to execution completion for per-run digest generation
|
||||
self._subscribe_worker_digest(session)
|
||||
# Colony memory is additive; worker loading should still succeed if
|
||||
# that optional subscription path hits an import/runtime issue while
|
||||
# restoring an older session.
|
||||
try:
|
||||
await self._subscribe_worker_colony_memory(session)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Worker colony memory subscription failed for '%s'; continuing without it",
|
||||
resolved_graph_id,
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
async with self._lock:
|
||||
self._loading.discard(session.id)
|
||||
|
||||
logger.info(
|
||||
"Worker '%s' loaded into session '%s'",
|
||||
resolved_worker_id,
|
||||
resolved_graph_id,
|
||||
session.id,
|
||||
)
|
||||
|
||||
@@ -495,10 +488,10 @@ class SessionManager:
|
||||
Called after worker loading to restart any timer/webhook triggers
|
||||
that were active before a server restart.
|
||||
"""
|
||||
if not session.available_triggers or not session.worker_runtime:
|
||||
if not session.available_triggers or not session.graph_runtime:
|
||||
return
|
||||
try:
|
||||
store = session.worker_runtime._session_store
|
||||
store = session.graph_runtime._session_store
|
||||
state = await store.read_state(session_id)
|
||||
if state and state.active_triggers:
|
||||
from framework.tools.queen_lifecycle_tools import (
|
||||
@@ -534,16 +527,16 @@ class SessionManager:
|
||||
except Exception as e:
|
||||
logger.warning("Failed to restore active triggers: %s", e)
|
||||
|
||||
async def load_worker(
|
||||
async def load_graph(
|
||||
self,
|
||||
session_id: str,
|
||||
agent_path: str | Path,
|
||||
worker_id: str | None = None,
|
||||
graph_id: str | None = None,
|
||||
model: str | None = None,
|
||||
) -> Session:
|
||||
"""Load a worker agent into an existing session (with running queen).
|
||||
"""Load a graph into an existing session (with running queen).
|
||||
|
||||
Starts the worker runtime and notifies the queen.
|
||||
Starts the graph runtime and notifies the queen.
|
||||
"""
|
||||
agent_path = Path(agent_path)
|
||||
|
||||
@@ -554,13 +547,13 @@ class SessionManager:
|
||||
await self._load_worker_core(
|
||||
session,
|
||||
agent_path,
|
||||
worker_id=worker_id,
|
||||
graph_id=graph_id,
|
||||
model=model,
|
||||
)
|
||||
|
||||
# Notify queen about the loaded worker (skip for queen itself).
|
||||
if agent_path.name != "queen" and session.worker_runtime:
|
||||
await self._notify_queen_worker_loaded(session)
|
||||
if agent_path.name != "queen" and session.graph_runtime:
|
||||
await self._notify_queen_graph_loaded(session)
|
||||
|
||||
# Update meta.json so cold-restore can discover this session by agent_path
|
||||
storage_session_id = session.queen_resume_from or session.id
|
||||
@@ -585,16 +578,16 @@ class SessionManager:
|
||||
await self._restore_active_triggers(session, session_id)
|
||||
|
||||
# Emit SSE event so the frontend can update UI
|
||||
await self._emit_worker_loaded(session)
|
||||
await self._emit_graph_loaded(session)
|
||||
|
||||
return session
|
||||
|
||||
async def unload_worker(self, session_id: str) -> bool:
|
||||
async def unload_graph(self, session_id: str) -> bool:
|
||||
"""Unload the worker from a session. Queen stays alive."""
|
||||
session = self._sessions.get(session_id)
|
||||
if session is None:
|
||||
return False
|
||||
if session.worker_runtime is None:
|
||||
if session.graph_runtime is None:
|
||||
return False
|
||||
|
||||
# Cleanup worker
|
||||
@@ -602,7 +595,7 @@ class SessionManager:
|
||||
try:
|
||||
await session.runner.cleanup_async()
|
||||
except Exception as e:
|
||||
logger.error("Error cleaning up worker '%s': %s", session.worker_id, e)
|
||||
logger.error("Error cleaning up graph '%s': %s", session.graph_id, e)
|
||||
|
||||
# Cancel active trigger timers
|
||||
for tid, task in session.active_timer_tasks.items():
|
||||
@@ -624,24 +617,25 @@ class SessionManager:
|
||||
await self._emit_trigger_events(session, "removed", session.available_triggers)
|
||||
session.available_triggers.clear()
|
||||
|
||||
if session.worker_digest_sub is not None:
|
||||
for sub_id in session.worker_memory_subs:
|
||||
try:
|
||||
session.event_bus.unsubscribe(session.worker_digest_sub)
|
||||
session.event_bus.unsubscribe(sub_id)
|
||||
except Exception:
|
||||
pass
|
||||
session.worker_digest_sub = None
|
||||
session.worker_memory_subs.clear()
|
||||
session.worker_colony_recall_blocks.clear()
|
||||
|
||||
worker_id = session.worker_id
|
||||
session.worker_id = None
|
||||
graph_id = session.graph_id
|
||||
session.graph_id = None
|
||||
session.worker_path = None
|
||||
session.runner = None
|
||||
session.worker_runtime = None
|
||||
session.graph_runtime = None
|
||||
session.worker_info = None
|
||||
|
||||
# Notify queen
|
||||
await self._notify_queen_worker_unloaded(session)
|
||||
|
||||
logger.info("Worker '%s' unloaded from session '%s'", worker_id, session_id)
|
||||
logger.info("Graph '%s' unloaded from session '%s'", graph_id, session_id)
|
||||
return True
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
@@ -668,20 +662,21 @@ class SessionManager:
|
||||
pass
|
||||
session.worker_handoff_sub = None
|
||||
|
||||
if session.worker_digest_sub is not None:
|
||||
for sub_id in session.worker_memory_subs:
|
||||
try:
|
||||
session.event_bus.unsubscribe(session.worker_digest_sub)
|
||||
session.event_bus.unsubscribe(sub_id)
|
||||
except Exception:
|
||||
pass
|
||||
session.worker_digest_sub = None
|
||||
session.worker_memory_subs.clear()
|
||||
session.worker_colony_recall_blocks.clear()
|
||||
|
||||
# Stop queen and memory consolidation subscription
|
||||
if session.memory_consolidation_sub is not None:
|
||||
# Stop queen and memory reflection/recall subscriptions
|
||||
for sub_id in session.memory_reflection_subs:
|
||||
try:
|
||||
session.event_bus.unsubscribe(session.memory_consolidation_sub)
|
||||
session.event_bus.unsubscribe(sub_id)
|
||||
except Exception:
|
||||
pass
|
||||
session.memory_consolidation_sub = None
|
||||
session.memory_reflection_subs.clear()
|
||||
if session.queen_task is not None:
|
||||
session.queen_task.cancel()
|
||||
session.queen_task = None
|
||||
@@ -713,15 +708,16 @@ class SessionManager:
|
||||
except Exception as e:
|
||||
logger.error("Error cleaning up worker: %s", e)
|
||||
|
||||
# Final memory consolidation — fire-and-forget so teardown isn't blocked.
|
||||
if _llm is not None and _session_dir.exists():
|
||||
# Final long reflection — fire-and-forget so teardown isn't blocked.
|
||||
if _llm is not None:
|
||||
import asyncio
|
||||
|
||||
from framework.agents.queen.queen_memory import consolidate_queen_memory
|
||||
from framework.agents.queen.queen_memory_v2 import colony_memory_dir
|
||||
from framework.agents.queen.reflection_agent import run_long_reflection
|
||||
|
||||
asyncio.create_task(
|
||||
consolidate_queen_memory(session_id, _session_dir, _llm),
|
||||
name=f"queen-memory-consolidation-{session_id}",
|
||||
run_long_reflection(_llm, memory_dir=colony_memory_dir(_storage_id), caller="queen"),
|
||||
name=f"queen-memory-long-reflection-{session_id}",
|
||||
)
|
||||
|
||||
# Close per-session event log
|
||||
@@ -759,133 +755,52 @@ class SessionManager:
|
||||
else:
|
||||
logger.warning("Worker handoff received but queen node not ready")
|
||||
|
||||
def _subscribe_worker_digest(self, session: Session) -> None:
|
||||
"""Subscribe to worker events to write per-run digests.
|
||||
|
||||
Three triggers:
|
||||
- NODE_LOOP_ITERATION: write a mid-run snapshot, throttled to at most
|
||||
once every _DIGEST_COOLDOWN seconds per execution.
|
||||
- TOOL_CALL_COMPLETED for delegate_to_sub_agent: same throttled snapshot.
|
||||
Orchestrator nodes often run all subagent calls in a single LLM turn,
|
||||
so NODE_LOOP_ITERATION only fires once at the end. Subagent
|
||||
completions provide intermediate checkpoints.
|
||||
- EXECUTION_COMPLETED / EXECUTION_FAILED: always write the final digest,
|
||||
bypassing the cooldown.
|
||||
"""
|
||||
import time as _time
|
||||
|
||||
from framework.runtime.event_bus import EventType as _ET
|
||||
|
||||
_DIGEST_COOLDOWN = 300.0 # seconds between mid-run snapshots
|
||||
|
||||
if session.worker_digest_sub is not None:
|
||||
async def _subscribe_worker_colony_memory(self, session: Session) -> None:
|
||||
"""Subscribe shared colony reflection/recall for top-level worker runs."""
|
||||
for sub_id in session.worker_memory_subs:
|
||||
try:
|
||||
session.event_bus.unsubscribe(session.worker_digest_sub)
|
||||
session.event_bus.unsubscribe(sub_id)
|
||||
except Exception:
|
||||
pass
|
||||
session.worker_digest_sub = None
|
||||
session.worker_memory_subs.clear()
|
||||
session.worker_colony_recall_blocks.clear()
|
||||
|
||||
agent_name = session.worker_path.name if session.worker_path else None
|
||||
if not agent_name:
|
||||
runtime = session.graph_runtime
|
||||
if runtime is None:
|
||||
return
|
||||
|
||||
_agent_name = agent_name
|
||||
_llm = session.llm
|
||||
_bus = session.event_bus
|
||||
# per-execution_id monotonic timestamp of last mid-run digest
|
||||
_last_digest: dict[str, float] = {}
|
||||
worker_sessions_dir = getattr(runtime, "_session_store", None)
|
||||
worker_sessions_dir = getattr(worker_sessions_dir, "sessions_dir", None)
|
||||
if worker_sessions_dir is None:
|
||||
return
|
||||
|
||||
def _resolve_run_id(exec_id: str) -> str | None:
|
||||
"""Look up the run_id for a given execution_id via EXECUTION_STARTED history."""
|
||||
for e in _bus.get_history(event_type=_ET.EXECUTION_STARTED, limit=200):
|
||||
if e.execution_id == exec_id and getattr(e, "run_id", None):
|
||||
return e.run_id
|
||||
return None
|
||||
from framework.agents.queen.queen_memory_v2 import colony_memory_dir, init_memory_dir
|
||||
from framework.agents.queen.reflection_agent import subscribe_worker_memory_triggers
|
||||
|
||||
async def _inject_digest_to_queen(run_id: str) -> None:
|
||||
"""Read the written digest and push it into the queen's conversation."""
|
||||
from framework.agents.worker_memory import digest_path
|
||||
colony_dir = colony_memory_dir(session.id)
|
||||
init_memory_dir(colony_dir, migrate_legacy=True)
|
||||
|
||||
try:
|
||||
content = digest_path(_agent_name, run_id).read_text(encoding="utf-8").strip()
|
||||
except OSError:
|
||||
return
|
||||
if not content:
|
||||
return
|
||||
executor = session.queen_executor
|
||||
if executor is None:
|
||||
return
|
||||
node = executor.node_registry.get("queen")
|
||||
if node is None or not hasattr(node, "inject_event"):
|
||||
return
|
||||
await node.inject_event(f"[WORKER_DIGEST]\n{content}")
|
||||
runtime._dynamic_memory_provider_factory = (
|
||||
lambda execution_id, session=session: (
|
||||
lambda execution_id=execution_id, session=session: session.worker_colony_recall_blocks.get(
|
||||
execution_id,
|
||||
"",
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
async def _consolidate_and_notify(run_id: str, outcome_event: Any) -> None:
|
||||
"""Write the digest then push it to the queen."""
|
||||
from framework.agents.worker_memory import consolidate_worker_run
|
||||
# Colony memory config for reflection-at-handoff
|
||||
runtime._colony_memory_dir = colony_dir
|
||||
runtime._colony_worker_sessions_dir = worker_sessions_dir
|
||||
runtime._colony_recall_cache = session.worker_colony_recall_blocks
|
||||
runtime._colony_reflect_llm = session.llm
|
||||
|
||||
await consolidate_worker_run(_agent_name, run_id, outcome_event, _bus, _llm)
|
||||
await _inject_digest_to_queen(run_id)
|
||||
|
||||
async def _on_worker_event(event: Any) -> None:
|
||||
if event.stream_id == "queen":
|
||||
return
|
||||
|
||||
exec_id = event.execution_id
|
||||
|
||||
if event.type == _ET.EXECUTION_STARTED:
|
||||
# New run on this execution_id — start the cooldown timer so
|
||||
# mid-run snapshots don't fire immediately at session start.
|
||||
# The first snapshot will happen after _DIGEST_COOLDOWN seconds.
|
||||
if exec_id:
|
||||
_last_digest[exec_id] = _time.monotonic()
|
||||
|
||||
elif event.type in (
|
||||
_ET.EXECUTION_COMPLETED,
|
||||
_ET.EXECUTION_FAILED,
|
||||
_ET.EXECUTION_PAUSED,
|
||||
):
|
||||
# Final digest — always fire, ignore cooldown.
|
||||
# EXECUTION_PAUSED covers cancellation (queen re-triggering the
|
||||
# worker cancels the previous execution, emitting paused).
|
||||
run_id = getattr(event, "run_id", None) or _resolve_run_id(exec_id)
|
||||
if run_id:
|
||||
asyncio.create_task(
|
||||
_consolidate_and_notify(run_id, event),
|
||||
name=f"worker-digest-final-{run_id}",
|
||||
)
|
||||
|
||||
elif event.type in (_ET.NODE_LOOP_ITERATION, _ET.TOOL_CALL_COMPLETED):
|
||||
# Mid-run snapshot — respect 300 s cooldown per execution.
|
||||
# TOOL_CALL_COMPLETED is only interesting for subagent calls;
|
||||
# regular tool completions are too frequent and too cheap.
|
||||
if event.type == _ET.TOOL_CALL_COMPLETED:
|
||||
tool_name = (event.data or {}).get("tool_name", "")
|
||||
if tool_name != "delegate_to_sub_agent":
|
||||
return
|
||||
if not exec_id:
|
||||
return
|
||||
now = _time.monotonic()
|
||||
if now - _last_digest.get(exec_id, 0.0) < _DIGEST_COOLDOWN:
|
||||
return
|
||||
run_id = _resolve_run_id(exec_id)
|
||||
if run_id:
|
||||
_last_digest[exec_id] = now
|
||||
asyncio.create_task(
|
||||
_consolidate_and_notify(run_id, None),
|
||||
name=f"worker-digest-{run_id}",
|
||||
)
|
||||
|
||||
session.worker_digest_sub = session.event_bus.subscribe(
|
||||
event_types=[
|
||||
_ET.EXECUTION_STARTED,
|
||||
_ET.NODE_LOOP_ITERATION,
|
||||
_ET.TOOL_CALL_COMPLETED,
|
||||
_ET.EXECUTION_COMPLETED,
|
||||
_ET.EXECUTION_FAILED,
|
||||
_ET.EXECUTION_PAUSED,
|
||||
],
|
||||
handler=_on_worker_event,
|
||||
session.worker_memory_subs = await subscribe_worker_memory_triggers(
|
||||
session.event_bus,
|
||||
session.llm,
|
||||
worker_sessions_dir=worker_sessions_dir,
|
||||
colony_memory_dir=colony_dir,
|
||||
recall_cache=session.worker_colony_recall_blocks,
|
||||
)
|
||||
|
||||
def _subscribe_worker_handoffs(self, session: Session, executor: Any) -> None:
|
||||
@@ -918,6 +833,8 @@ class SessionManager:
|
||||
"""
|
||||
from framework.server.queen_orchestrator import create_queen
|
||||
|
||||
logger.debug("[_start_queen] Starting for session %s, current queen_executor=%s", session.id, session.queen_executor)
|
||||
|
||||
hive_home = Path.home() / ".hive"
|
||||
|
||||
# Determine which session directory to use for queen storage.
|
||||
@@ -1001,6 +918,7 @@ class SessionManager:
|
||||
pass
|
||||
session.event_bus.set_session_log(events_path, iteration_offset=iteration_offset)
|
||||
|
||||
logger.debug("[_start_queen] Calling create_queen...")
|
||||
session.queen_task = await create_queen(
|
||||
session=session,
|
||||
session_manager=self,
|
||||
@@ -1008,10 +926,11 @@ class SessionManager:
|
||||
queen_dir=queen_dir,
|
||||
initial_prompt=initial_prompt,
|
||||
)
|
||||
logger.debug("[_start_queen] create_queen returned, queen_task=%s, queen_executor=%s", session.queen_task, session.queen_executor)
|
||||
|
||||
# Auto-load worker on cold restore — the queen's conversation expects
|
||||
# the agent to be loaded, but the new session has no worker.
|
||||
if session.queen_resume_from and not session.worker_runtime:
|
||||
if session.queen_resume_from and not session.graph_runtime:
|
||||
meta_path = queen_dir / "meta.json"
|
||||
if meta_path.exists():
|
||||
try:
|
||||
@@ -1022,7 +941,7 @@ class SessionManager:
|
||||
if _agent_path and Path(_agent_path).exists():
|
||||
if _phase in ("staging", "running", None):
|
||||
# Agent fully built — load worker and resume
|
||||
await self.load_worker(session.id, _agent_path)
|
||||
await self.load_graph(session.id, _agent_path)
|
||||
if session.phase_state:
|
||||
await session.phase_state.switch_to_staging(source="auto")
|
||||
# Emit flowchart overlay so frontend can display it
|
||||
@@ -1041,38 +960,16 @@ class SessionManager:
|
||||
except Exception:
|
||||
logger.warning("Cold restore: failed to auto-load worker", exc_info=True)
|
||||
|
||||
# Memory consolidation — triggered by context compaction events.
|
||||
# Compaction is a natural signal that "enough has happened to be worth remembering".
|
||||
_consolidation_llm = session.llm
|
||||
_consolidation_session_dir = queen_dir
|
||||
|
||||
async def _on_compaction(_event) -> None:
|
||||
# Only consolidate on queen compactions — worker and subagent
|
||||
# compactions are frequent and don't warrant a memory update.
|
||||
if getattr(_event, "stream_id", None) != "queen":
|
||||
return
|
||||
from framework.agents.queen.queen_memory import consolidate_queen_memory
|
||||
|
||||
asyncio.create_task(
|
||||
consolidate_queen_memory(
|
||||
session.id, _consolidation_session_dir, _consolidation_llm
|
||||
),
|
||||
name=f"queen-memory-consolidation-{session.id}",
|
||||
)
|
||||
|
||||
from framework.runtime.event_bus import EventType as _ET
|
||||
|
||||
session.memory_consolidation_sub = session.event_bus.subscribe(
|
||||
event_types=[_ET.CONTEXT_COMPACTED],
|
||||
handler=_on_compaction,
|
||||
)
|
||||
# Memory reflection/recall subscriptions are set up inside
|
||||
# queen_orchestrator.create_queen() → _queen_loop() and stored
|
||||
# on session.memory_reflection_subs for teardown.
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Queen notifications
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _notify_queen_worker_loaded(self, session: Session) -> None:
|
||||
"""Inject a system message into the queen about the loaded worker."""
|
||||
async def _notify_queen_graph_loaded(self, session: Session) -> None:
|
||||
"""Inject a system message into the queen about the loaded graph."""
|
||||
from framework.tools.queen_lifecycle_tools import build_worker_profile
|
||||
|
||||
executor = session.queen_executor
|
||||
@@ -1082,7 +979,7 @@ class SessionManager:
|
||||
if node is None or not hasattr(node, "inject_event"):
|
||||
return
|
||||
|
||||
profile = build_worker_profile(session.worker_runtime, agent_path=session.worker_path)
|
||||
profile = build_worker_profile(session.graph_runtime, agent_path=session.worker_path)
|
||||
|
||||
# Append available trigger info so the queen knows what's schedulable
|
||||
trigger_lines = ""
|
||||
@@ -1098,20 +995,20 @@ class SessionManager:
|
||||
+ "\n".join(parts)
|
||||
)
|
||||
|
||||
await node.inject_event(f"[SYSTEM] Worker loaded.{profile}{trigger_lines}")
|
||||
await node.inject_event(f"[SYSTEM] Graph loaded.{profile}{trigger_lines}")
|
||||
|
||||
async def _emit_worker_loaded(self, session: Session) -> None:
|
||||
"""Publish a WORKER_LOADED event so the frontend can update."""
|
||||
async def _emit_graph_loaded(self, session: Session) -> None:
|
||||
"""Publish a WORKER_GRAPH_LOADED event so the frontend can update."""
|
||||
from framework.runtime.event_bus import AgentEvent, EventType
|
||||
|
||||
info = session.worker_info
|
||||
await session.event_bus.publish(
|
||||
AgentEvent(
|
||||
type=EventType.WORKER_LOADED,
|
||||
type=EventType.WORKER_GRAPH_LOADED,
|
||||
stream_id="queen",
|
||||
data={
|
||||
"worker_id": session.worker_id,
|
||||
"worker_name": info.name if info else session.worker_id,
|
||||
"graph_id": session.graph_id,
|
||||
"graph_name": info.name if info else session.graph_id,
|
||||
"agent_path": str(session.worker_path) if session.worker_path else "",
|
||||
"goal": info.goal_name if info else "",
|
||||
"node_count": info.node_count if info else 0,
|
||||
@@ -1188,26 +1085,30 @@ class SessionManager:
|
||||
)
|
||||
)
|
||||
|
||||
async def revive_queen(self, session: Session, initial_prompt: str | None = None) -> None:
|
||||
async def revive_queen(self, session: Session) -> None:
|
||||
"""Revive a dead queen executor on an existing session.
|
||||
|
||||
Restarts the queen with the same session context (worker profile, tools, etc.).
|
||||
"""
|
||||
from framework.tools.queen_lifecycle_tools import build_worker_profile
|
||||
|
||||
logger.debug("[revive_queen] Starting revival for session '%s', current queen_executor=%s", session.id, session.queen_executor)
|
||||
|
||||
# Build worker identity if worker is loaded
|
||||
worker_identity = (
|
||||
build_worker_profile(session.worker_runtime, agent_path=session.worker_path)
|
||||
if session.worker_runtime
|
||||
build_worker_profile(session.graph_runtime, agent_path=session.worker_path)
|
||||
if session.graph_runtime
|
||||
else None
|
||||
)
|
||||
logger.debug("[revive_queen] worker_identity=%s", "present" if worker_identity else "None")
|
||||
|
||||
# Start queen with existing session context
|
||||
logger.debug("[revive_queen] Calling _start_queen...")
|
||||
await self._start_queen(
|
||||
session, worker_identity=worker_identity, initial_prompt=initial_prompt
|
||||
session, worker_identity=worker_identity
|
||||
)
|
||||
|
||||
logger.info("Queen revived for session '%s'", session.id)
|
||||
logger.info("Queen revived for session '%s', new queen_executor=%s", session.id, session.queen_executor)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Lookups
|
||||
@@ -1216,22 +1117,22 @@ class SessionManager:
|
||||
def get_session(self, session_id: str) -> Session | None:
|
||||
return self._sessions.get(session_id)
|
||||
|
||||
def get_session_by_worker_id(self, worker_id: str) -> Session | None:
|
||||
"""Find a session by its loaded worker's ID."""
|
||||
def get_session_by_graph_id(self, graph_id: str) -> Session | None:
|
||||
"""Find a session by its loaded graph's ID."""
|
||||
for s in self._sessions.values():
|
||||
if s.worker_id == worker_id:
|
||||
if s.graph_id == graph_id:
|
||||
return s
|
||||
return None
|
||||
|
||||
def get_session_for_agent(self, agent_id: str) -> Session | None:
|
||||
"""Resolve an agent_id to a session (backward compat).
|
||||
|
||||
Checks session.id first, then session.worker_id.
|
||||
Checks session.id first, then session.graph_id.
|
||||
"""
|
||||
s = self._sessions.get(agent_id)
|
||||
if s:
|
||||
return s
|
||||
return self.get_session_by_worker_id(agent_id)
|
||||
return self.get_session_by_graph_id(agent_id)
|
||||
|
||||
def is_loading(self, session_id: str) -> bool:
|
||||
return session_id in self._loading
|
||||
|
||||
@@ -83,7 +83,7 @@ class MockStream:
|
||||
_active_executors: dict = field(default_factory=dict)
|
||||
active_execution_ids: set = field(default_factory=set)
|
||||
|
||||
async def cancel_execution(self, execution_id: str) -> bool:
|
||||
async def cancel_execution(self, execution_id: str, reason: str | None = None) -> bool:
|
||||
return execution_id in self._execution_tasks
|
||||
|
||||
|
||||
@@ -171,6 +171,7 @@ def _make_session(
|
||||
graph = MockGraphSpec(nodes=nodes or [], edges=edges or [])
|
||||
rt = runtime or MockRuntime(graph=graph, log_store=log_store)
|
||||
runner = MagicMock()
|
||||
runner.cleanup = AsyncMock()
|
||||
runner.intro_message = "Test intro"
|
||||
|
||||
mock_event_bus = MagicMock()
|
||||
@@ -185,10 +186,10 @@ def _make_session(
|
||||
llm=mock_llm,
|
||||
loaded_at=1000000.0,
|
||||
queen_executor=queen_executor,
|
||||
worker_id=agent_id,
|
||||
graph_id=agent_id,
|
||||
worker_path=agent_path,
|
||||
runner=runner,
|
||||
worker_runtime=rt,
|
||||
graph_runtime=rt,
|
||||
worker_info=MockAgentInfo(),
|
||||
)
|
||||
|
||||
@@ -224,7 +225,7 @@ def _write_sample_session(base: Path, session_id: str):
|
||||
"started_at": "2026-02-20T12:00:00",
|
||||
"completed_at": None,
|
||||
"input_data": {"user_request": "test input"},
|
||||
"memory": {"key1": "value1"},
|
||||
"data_buffer": {"key1": "value1"},
|
||||
"progress": {
|
||||
"current_node": "node_b",
|
||||
"paused_at": "node_b",
|
||||
@@ -368,7 +369,7 @@ class TestSessionCRUD:
|
||||
async def test_create_session_with_worker_forwards_session_id(self):
|
||||
app = create_app()
|
||||
manager = app["manager"]
|
||||
manager.create_session_with_worker = AsyncMock(
|
||||
manager.create_session_with_worker_graph = AsyncMock(
|
||||
return_value=_make_session(agent_id="my-custom-session")
|
||||
)
|
||||
|
||||
@@ -384,7 +385,7 @@ class TestSessionCRUD:
|
||||
|
||||
assert resp.status == 201
|
||||
assert data["session_id"] == "my-custom-session"
|
||||
manager.create_session_with_worker.assert_awaited_once_with(
|
||||
manager.create_session_with_worker_graph.assert_awaited_once_with(
|
||||
str(EXAMPLE_AGENT_PATH.resolve()),
|
||||
agent_id=None,
|
||||
session_id="my-custom-session",
|
||||
@@ -616,10 +617,33 @@ class TestExecution:
|
||||
assert data["delivered"] is True
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_chat_injects_when_node_waiting(self):
|
||||
"""When a node is awaiting input, /chat should inject instead of trigger."""
|
||||
async def test_chat_publishes_display_message_when_provided(self):
|
||||
session = _make_session()
|
||||
session.worker_runtime.find_awaiting_node = lambda: ("chat_node", "primary")
|
||||
queen_node = session.queen_executor.node_registry["queen"]
|
||||
app = _make_app_with_session(session)
|
||||
async with TestClient(TestServer(app)) as client:
|
||||
resp = await client.post(
|
||||
"/api/sessions/test_agent/chat",
|
||||
json={
|
||||
"message": '[Worker asked: "Need approval"]\nUser answered: "Ship it"',
|
||||
"display_message": "Ship it",
|
||||
},
|
||||
)
|
||||
assert resp.status == 200
|
||||
|
||||
published_event = session.event_bus.publish.await_args.args[0]
|
||||
assert published_event.data["content"] == "Ship it"
|
||||
queen_node.inject_event.assert_awaited_once_with(
|
||||
'[Worker asked: "Need approval"]\nUser answered: "Ship it"',
|
||||
is_client_input=True,
|
||||
image_content=None,
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_chat_prefers_queen_even_when_node_waiting(self):
|
||||
"""When the queen is alive, /chat routes to queen even if a node is waiting."""
|
||||
session = _make_session()
|
||||
session.graph_runtime.find_awaiting_node = lambda: ("chat_node", "primary")
|
||||
app = _make_app_with_session(session)
|
||||
async with TestClient(TestServer(app)) as client:
|
||||
resp = await client.post(
|
||||
@@ -628,8 +652,7 @@ class TestExecution:
|
||||
)
|
||||
assert resp.status == 200
|
||||
data = await resp.json()
|
||||
assert data["status"] == "injected"
|
||||
assert data["node_id"] == "chat_node"
|
||||
assert data["status"] == "queen"
|
||||
assert data["delivered"] is True
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -644,6 +667,19 @@ class TestExecution:
|
||||
)
|
||||
assert resp.status == 503
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_worker_input_route_removed(self):
|
||||
session = _make_session()
|
||||
app = _make_app_with_session(session)
|
||||
async with TestClient(TestServer(app)) as client:
|
||||
resp = await client.post(
|
||||
"/api/sessions/test_agent/worker-input",
|
||||
json={"message": "hello"},
|
||||
)
|
||||
# No POST handler remains for this path; aiohttp falls through to an
|
||||
# overlapping GET/HEAD route and reports method-not-allowed.
|
||||
assert resp.status == 405
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_chat_missing_message(self):
|
||||
session = _make_session()
|
||||
@@ -700,7 +736,7 @@ class TestExecution:
|
||||
class TestResume:
|
||||
@pytest.mark.asyncio
|
||||
async def test_resume_from_session_state(self, sample_session, tmp_agent_dir):
|
||||
"""Resume using session state (paused_at)."""
|
||||
"""Direct state-based resume is rejected; checkpoint resume is required."""
|
||||
session_id, session_dir, state = sample_session
|
||||
tmp_path, agent_name, base = tmp_agent_dir
|
||||
|
||||
@@ -712,11 +748,9 @@ class TestResume:
|
||||
"/api/sessions/test_agent/resume",
|
||||
json={"session_id": session_id},
|
||||
)
|
||||
assert resp.status == 200
|
||||
assert resp.status == 400
|
||||
data = await resp.json()
|
||||
assert data["execution_id"] == "exec_test_123"
|
||||
assert data["resumed_from"] == session_id
|
||||
assert data["checkpoint_id"] is None
|
||||
assert "checkpoint_id is required" in data["error"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_resume_with_checkpoint(self, sample_session, tmp_agent_dir):
|
||||
@@ -725,6 +759,7 @@ class TestResume:
|
||||
tmp_path, agent_name, base = tmp_agent_dir
|
||||
|
||||
session = _make_session(tmp_dir=tmp_path / ".hive" / "agents" / agent_name)
|
||||
session.graph_runtime.trigger = AsyncMock(return_value="exec_test_123")
|
||||
app = _make_app_with_session(session)
|
||||
|
||||
async with TestClient(TestServer(app)) as client:
|
||||
@@ -738,6 +773,8 @@ class TestResume:
|
||||
assert resp.status == 200
|
||||
data = await resp.json()
|
||||
assert data["checkpoint_id"] == "cp_node_complete_node_a_001"
|
||||
_, kwargs = session.graph_runtime.trigger.await_args
|
||||
assert kwargs["session_state"]["run_id"] == "__legacy_run__"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_resume_missing_session_id(self):
|
||||
@@ -767,7 +804,7 @@ class TestStop:
|
||||
async def test_stop_found(self):
|
||||
session = _make_session()
|
||||
# Put a mock task in the stream so cancel_execution returns True
|
||||
session.worker_runtime._mock_streams["default"]._execution_tasks["exec_abc"] = MagicMock()
|
||||
session.graph_runtime._mock_streams["default"]._execution_tasks["exec_abc"] = MagicMock()
|
||||
app = _make_app_with_session(session)
|
||||
async with TestClient(TestServer(app)) as client:
|
||||
resp = await client.post(
|
||||
@@ -808,6 +845,7 @@ class TestReplay:
|
||||
tmp_path, agent_name, base = tmp_agent_dir
|
||||
|
||||
session = _make_session(tmp_dir=tmp_path / ".hive" / "agents" / agent_name)
|
||||
session.graph_runtime.trigger = AsyncMock(return_value="exec_test_123")
|
||||
app = _make_app_with_session(session)
|
||||
|
||||
async with TestClient(TestServer(app)) as client:
|
||||
@@ -822,6 +860,8 @@ class TestReplay:
|
||||
data = await resp.json()
|
||||
assert data["execution_id"] == "exec_test_123"
|
||||
assert data["replayed_from"] == session_id
|
||||
_, kwargs = session.graph_runtime.trigger.await_args
|
||||
assert kwargs["session_state"]["run_id"] == "__legacy_run__"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_replay_missing_fields(self):
|
||||
@@ -859,329 +899,6 @@ class TestReplay:
|
||||
assert resp.status == 404
|
||||
|
||||
|
||||
class TestWorkerSessions:
|
||||
@pytest.mark.asyncio
|
||||
async def test_list_sessions(self, sample_session, tmp_agent_dir):
|
||||
session_id, session_dir, state = sample_session
|
||||
tmp_path, agent_name, base = tmp_agent_dir
|
||||
|
||||
session = _make_session(tmp_dir=tmp_path / ".hive" / "agents" / agent_name)
|
||||
app = _make_app_with_session(session)
|
||||
|
||||
async with TestClient(TestServer(app)) as client:
|
||||
resp = await client.get("/api/sessions/test_agent/worker-sessions")
|
||||
assert resp.status == 200
|
||||
data = await resp.json()
|
||||
assert len(data["sessions"]) == 1
|
||||
assert data["sessions"][0]["session_id"] == session_id
|
||||
assert data["sessions"][0]["status"] == "paused"
|
||||
assert data["sessions"][0]["steps"] == 5
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_list_sessions_includes_custom_id(self, custom_id_session, tmp_agent_dir):
|
||||
session_id, session_dir, state = custom_id_session
|
||||
tmp_path, agent_name, base = tmp_agent_dir
|
||||
|
||||
session = _make_session(tmp_dir=tmp_path / ".hive" / "agents" / agent_name)
|
||||
app = _make_app_with_session(session)
|
||||
|
||||
async with TestClient(TestServer(app)) as client:
|
||||
resp = await client.get("/api/sessions/test_agent/worker-sessions")
|
||||
assert resp.status == 200
|
||||
data = await resp.json()
|
||||
assert len(data["sessions"]) == 1
|
||||
assert data["sessions"][0]["session_id"] == session_id
|
||||
assert data["sessions"][0]["status"] == "paused"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_list_sessions_empty(self, tmp_agent_dir):
|
||||
tmp_path, agent_name, base = tmp_agent_dir
|
||||
session = _make_session(tmp_dir=tmp_path / ".hive" / "agents" / agent_name)
|
||||
app = _make_app_with_session(session)
|
||||
|
||||
async with TestClient(TestServer(app)) as client:
|
||||
resp = await client.get("/api/sessions/test_agent/worker-sessions")
|
||||
assert resp.status == 200
|
||||
data = await resp.json()
|
||||
assert data["sessions"] == []
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_session(self, sample_session, tmp_agent_dir):
|
||||
session_id, session_dir, state = sample_session
|
||||
tmp_path, agent_name, base = tmp_agent_dir
|
||||
|
||||
session = _make_session(tmp_dir=tmp_path / ".hive" / "agents" / agent_name)
|
||||
app = _make_app_with_session(session)
|
||||
|
||||
async with TestClient(TestServer(app)) as client:
|
||||
resp = await client.get(f"/api/sessions/test_agent/worker-sessions/{session_id}")
|
||||
assert resp.status == 200
|
||||
data = await resp.json()
|
||||
assert data["status"] == "paused"
|
||||
assert data["memory"]["key1"] == "value1"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_session_not_found(self, tmp_agent_dir):
|
||||
tmp_path, agent_name, base = tmp_agent_dir
|
||||
session = _make_session(tmp_dir=tmp_path / ".hive" / "agents" / agent_name)
|
||||
app = _make_app_with_session(session)
|
||||
|
||||
async with TestClient(TestServer(app)) as client:
|
||||
resp = await client.get("/api/sessions/test_agent/worker-sessions/nonexistent")
|
||||
assert resp.status == 404
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_delete_session(self, sample_session, tmp_agent_dir):
|
||||
session_id, session_dir, state = sample_session
|
||||
tmp_path, agent_name, base = tmp_agent_dir
|
||||
|
||||
session = _make_session(tmp_dir=tmp_path / ".hive" / "agents" / agent_name)
|
||||
app = _make_app_with_session(session)
|
||||
|
||||
async with TestClient(TestServer(app)) as client:
|
||||
resp = await client.delete(f"/api/sessions/test_agent/worker-sessions/{session_id}")
|
||||
assert resp.status == 200
|
||||
data = await resp.json()
|
||||
assert data["deleted"] == session_id
|
||||
|
||||
# Verify deleted
|
||||
assert not session_dir.exists()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_delete_session_not_found(self, tmp_agent_dir):
|
||||
tmp_path, agent_name, base = tmp_agent_dir
|
||||
session = _make_session(tmp_dir=tmp_path / ".hive" / "agents" / agent_name)
|
||||
app = _make_app_with_session(session)
|
||||
|
||||
async with TestClient(TestServer(app)) as client:
|
||||
resp = await client.delete("/api/sessions/test_agent/worker-sessions/nonexistent")
|
||||
assert resp.status == 404
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_list_checkpoints(self, sample_session, tmp_agent_dir):
|
||||
session_id, session_dir, state = sample_session
|
||||
tmp_path, agent_name, base = tmp_agent_dir
|
||||
|
||||
session = _make_session(tmp_dir=tmp_path / ".hive" / "agents" / agent_name)
|
||||
app = _make_app_with_session(session)
|
||||
|
||||
async with TestClient(TestServer(app)) as client:
|
||||
resp = await client.get(
|
||||
f"/api/sessions/test_agent/worker-sessions/{session_id}/checkpoints"
|
||||
)
|
||||
assert resp.status == 200
|
||||
data = await resp.json()
|
||||
assert len(data["checkpoints"]) == 1
|
||||
cp = data["checkpoints"][0]
|
||||
assert cp["checkpoint_id"] == "cp_node_complete_node_a_001"
|
||||
assert cp["current_node"] == "node_a"
|
||||
assert cp["is_clean"] is True
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_restore_checkpoint(self, sample_session, tmp_agent_dir):
|
||||
session_id, session_dir, state = sample_session
|
||||
tmp_path, agent_name, base = tmp_agent_dir
|
||||
|
||||
session = _make_session(tmp_dir=tmp_path / ".hive" / "agents" / agent_name)
|
||||
app = _make_app_with_session(session)
|
||||
|
||||
async with TestClient(TestServer(app)) as client:
|
||||
resp = await client.post(
|
||||
f"/api/sessions/test_agent/worker-sessions/{session_id}"
|
||||
"/checkpoints/cp_node_complete_node_a_001/restore"
|
||||
)
|
||||
assert resp.status == 200
|
||||
data = await resp.json()
|
||||
assert data["execution_id"] == "exec_test_123"
|
||||
assert data["restored_from"] == session_id
|
||||
assert data["checkpoint_id"] == "cp_node_complete_node_a_001"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_restore_checkpoint_not_found(self, sample_session, tmp_agent_dir):
|
||||
session_id, session_dir, state = sample_session
|
||||
tmp_path, agent_name, base = tmp_agent_dir
|
||||
|
||||
session = _make_session(tmp_dir=tmp_path / ".hive" / "agents" / agent_name)
|
||||
app = _make_app_with_session(session)
|
||||
|
||||
async with TestClient(TestServer(app)) as client:
|
||||
resp = await client.post(
|
||||
f"/api/sessions/test_agent/worker-sessions/{session_id}/checkpoints/nonexistent_cp/restore"
|
||||
)
|
||||
assert resp.status == 404
|
||||
|
||||
|
||||
class TestMessages:
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_messages(self, sample_session, tmp_agent_dir):
|
||||
session_id, session_dir, state = sample_session
|
||||
tmp_path, agent_name, base = tmp_agent_dir
|
||||
|
||||
session = _make_session(tmp_dir=tmp_path / ".hive" / "agents" / agent_name)
|
||||
app = _make_app_with_session(session)
|
||||
|
||||
async with TestClient(TestServer(app)) as client:
|
||||
resp = await client.get(
|
||||
f"/api/sessions/test_agent/worker-sessions/{session_id}/messages"
|
||||
)
|
||||
assert resp.status == 200
|
||||
data = await resp.json()
|
||||
msgs = data["messages"]
|
||||
assert len(msgs) == 3
|
||||
# Should be sorted by seq
|
||||
assert msgs[0]["seq"] == 1
|
||||
assert msgs[0]["role"] == "user"
|
||||
assert msgs[0]["_node_id"] == "node_a"
|
||||
assert msgs[1]["seq"] == 2
|
||||
assert msgs[1]["role"] == "assistant"
|
||||
assert msgs[2]["seq"] == 3
|
||||
assert msgs[2]["_node_id"] == "node_b"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_messages_filtered_by_node(self, sample_session, tmp_agent_dir):
|
||||
session_id, session_dir, state = sample_session
|
||||
tmp_path, agent_name, base = tmp_agent_dir
|
||||
|
||||
session = _make_session(tmp_dir=tmp_path / ".hive" / "agents" / agent_name)
|
||||
app = _make_app_with_session(session)
|
||||
|
||||
async with TestClient(TestServer(app)) as client:
|
||||
resp = await client.get(
|
||||
f"/api/sessions/test_agent/worker-sessions/{session_id}/messages?node_id=node_a"
|
||||
)
|
||||
assert resp.status == 200
|
||||
data = await resp.json()
|
||||
msgs = data["messages"]
|
||||
assert len(msgs) == 2
|
||||
assert all(m["_node_id"] == "node_a" for m in msgs)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_messages_no_conversations(self, tmp_agent_dir):
|
||||
"""Session without conversations directory returns empty list."""
|
||||
tmp_path, agent_name, base = tmp_agent_dir
|
||||
worker_session_id = "session_empty"
|
||||
session_dir = base / "sessions" / worker_session_id
|
||||
session_dir.mkdir(parents=True)
|
||||
(session_dir / "state.json").write_text(json.dumps({"status": "completed"}))
|
||||
|
||||
session = _make_session(tmp_dir=tmp_path / ".hive" / "agents" / agent_name)
|
||||
app = _make_app_with_session(session)
|
||||
|
||||
async with TestClient(TestServer(app)) as client:
|
||||
resp = await client.get(
|
||||
f"/api/sessions/test_agent/worker-sessions/{worker_session_id}/messages"
|
||||
)
|
||||
assert resp.status == 200
|
||||
data = await resp.json()
|
||||
assert data["messages"] == []
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_messages_client_only(self, tmp_agent_dir):
|
||||
"""client_only=true keeps user+client-facing assistant."""
|
||||
tmp_path, agent_name, base = tmp_agent_dir
|
||||
worker_session_id = "session_client_only"
|
||||
session_dir = base / "sessions" / worker_session_id
|
||||
session_dir.mkdir(parents=True)
|
||||
(session_dir / "state.json").write_text(json.dumps({"status": "completed"}))
|
||||
|
||||
# node_a is NOT client-facing, chat_node IS
|
||||
conv_a = session_dir / "conversations" / "node_a" / "parts"
|
||||
conv_a.mkdir(parents=True)
|
||||
(conv_a / "0001.json").write_text(
|
||||
json.dumps({"seq": 1, "role": "user", "content": "system prompt"})
|
||||
)
|
||||
(conv_a / "0002.json").write_text(
|
||||
json.dumps({"seq": 2, "role": "assistant", "content": "internal work"})
|
||||
)
|
||||
(conv_a / "0003.json").write_text(
|
||||
json.dumps({"seq": 3, "role": "tool", "content": "tool result"})
|
||||
)
|
||||
|
||||
conv_chat = session_dir / "conversations" / "chat_node" / "parts"
|
||||
conv_chat.mkdir(parents=True)
|
||||
(conv_chat / "0004.json").write_text(
|
||||
json.dumps({"seq": 4, "role": "user", "content": "hi", "is_client_input": True})
|
||||
)
|
||||
(conv_chat / "0005.json").write_text(
|
||||
json.dumps({"seq": 5, "role": "assistant", "content": "hello!"})
|
||||
)
|
||||
(conv_chat / "0006.json").write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"seq": 6,
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"tool_calls": [{"id": "tc1", "function": {"name": "search"}}],
|
||||
}
|
||||
)
|
||||
)
|
||||
(conv_chat / "0007.json").write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"seq": 7,
|
||||
"role": "user",
|
||||
"content": "marker",
|
||||
"is_transition_marker": True,
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
nodes = [
|
||||
MockNodeSpec(id="node_a", name="Node A", client_facing=False),
|
||||
MockNodeSpec(id="chat_node", name="Chat", client_facing=True),
|
||||
]
|
||||
session = _make_session(
|
||||
tmp_dir=tmp_path / ".hive" / "agents" / agent_name,
|
||||
nodes=nodes,
|
||||
)
|
||||
session.runner.graph = MockGraphSpec(nodes=nodes)
|
||||
app = _make_app_with_session(session)
|
||||
|
||||
async with TestClient(TestServer(app)) as client:
|
||||
resp = await client.get(
|
||||
f"/api/sessions/test_agent/worker-sessions/{worker_session_id}/messages?client_only=true"
|
||||
)
|
||||
assert resp.status == 200
|
||||
msgs = (await resp.json())["messages"]
|
||||
# Keep: seq 4 (user+is_client_input), seq 5 (assistant from chat_node)
|
||||
# Drop: seq 1,2,3,6,7 (internal / tool / tool_calls / marker)
|
||||
assert len(msgs) == 2
|
||||
assert msgs[0]["seq"] == 4
|
||||
assert msgs[0]["role"] == "user"
|
||||
assert msgs[1]["seq"] == 5
|
||||
assert msgs[1]["role"] == "assistant"
|
||||
assert msgs[1]["_node_id"] == "chat_node"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_messages_client_only_no_runner_returns_all(self, tmp_agent_dir):
|
||||
"""client_only=true with no runner skips filtering (returns all messages)."""
|
||||
tmp_path, agent_name, base = tmp_agent_dir
|
||||
worker_session_id = "session_no_runner"
|
||||
session_dir = base / "sessions" / worker_session_id
|
||||
session_dir.mkdir(parents=True)
|
||||
(session_dir / "state.json").write_text(json.dumps({"status": "completed"}))
|
||||
|
||||
conv = session_dir / "conversations" / "node_a" / "parts"
|
||||
conv.mkdir(parents=True)
|
||||
(conv / "0001.json").write_text(json.dumps({"seq": 1, "role": "user", "content": "hello"}))
|
||||
(conv / "0002.json").write_text(
|
||||
json.dumps({"seq": 2, "role": "assistant", "content": "response"})
|
||||
)
|
||||
|
||||
session = _make_session(tmp_dir=tmp_path / ".hive" / "agents" / agent_name)
|
||||
session.runner = None # Simulate runner not available
|
||||
app = _make_app_with_session(session)
|
||||
|
||||
async with TestClient(TestServer(app)) as client:
|
||||
resp = await client.get(
|
||||
f"/api/sessions/test_agent/worker-sessions/{worker_session_id}/messages?client_only=true"
|
||||
)
|
||||
assert resp.status == 200
|
||||
msgs = (await resp.json())["messages"]
|
||||
# No runner -> can't resolve client-facing nodes -> returns all messages
|
||||
assert len(msgs) == 2
|
||||
|
||||
|
||||
class TestGraphNodes:
|
||||
@pytest.mark.asyncio
|
||||
async def test_list_nodes(self, nodes_and_edges):
|
||||
@@ -1381,7 +1098,7 @@ class TestLogs:
|
||||
async def test_logs_no_log_store(self):
|
||||
"""Agent without log store returns 404."""
|
||||
session = _make_session()
|
||||
session.worker_runtime._runtime_log_store = None
|
||||
session.graph_runtime._runtime_log_store = None
|
||||
app = _make_app_with_session(session)
|
||||
|
||||
async with TestClient(TestServer(app)) as client:
|
||||
@@ -1704,11 +1421,11 @@ class TestSSEFormat:
|
||||
|
||||
class TestErrorMiddleware:
|
||||
@pytest.mark.asyncio
|
||||
async def test_404_on_unknown_api_route(self):
|
||||
async def test_unknown_api_route_falls_back_to_frontend(self):
|
||||
app = create_app()
|
||||
async with TestClient(TestServer(app)) as client:
|
||||
resp = await client.get("/api/nonexistent")
|
||||
assert resp.status == 404
|
||||
assert resp.status == 200
|
||||
|
||||
|
||||
class TestCleanupStaleActiveSessions:
|
||||
|
||||
@@ -8,7 +8,7 @@ metadata:
|
||||
|
||||
## Operational Protocol: Structured Note-Taking
|
||||
|
||||
Maintain structured working notes in shared memory key `_working_notes`.
|
||||
Maintain structured working notes in shared buffer key `_working_notes`.
|
||||
Update at these checkpoints:
|
||||
|
||||
- After completing each discrete subtask or batch item
|
||||
|
||||
@@ -79,8 +79,8 @@ SKILL_REGISTRY: dict[str, str] = {
|
||||
"hive.task-decomposition": "task-decomposition",
|
||||
}
|
||||
|
||||
# All shared memory keys used by default skills (for permission auto-inclusion)
|
||||
SHARED_MEMORY_KEYS: list[str] = [
|
||||
# All shared buffer keys used by default skills (for permission auto-inclusion)
|
||||
DATA_BUFFER_KEYS: list[str] = [
|
||||
# note-taking
|
||||
"_working_notes",
|
||||
"_notes_updated_at",
|
||||
|
||||
@@ -8,6 +8,7 @@ tooling, CI gates, and hive skill doctor.
|
||||
from __future__ import annotations
|
||||
|
||||
import stat
|
||||
import sys
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
|
||||
@@ -134,9 +135,10 @@ def validate_strict(path: Path) -> ValidationResult:
|
||||
warnings.append("No 'license' field — consider adding a license (e.g. MIT, Apache-2.0).")
|
||||
|
||||
# 11. Scripts in scripts/ exist and are executable
|
||||
# Windows has no POSIX executable bits; skip this check there.
|
||||
base_dir = path.parent
|
||||
scripts_dir = base_dir / "scripts"
|
||||
if scripts_dir.is_dir():
|
||||
if scripts_dir.is_dir() and sys.platform != "win32":
|
||||
for script_path in sorted(scripts_dir.iterdir()):
|
||||
if script_path.is_file():
|
||||
if not (script_path.stat().st_mode & (stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)):
|
||||
|
||||
@@ -30,6 +30,7 @@ from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
|
||||
|
||||
class FileConversationStore:
|
||||
"""File-per-part ConversationStore.
|
||||
|
||||
@@ -95,7 +96,7 @@ class FileConversationStore:
|
||||
async def read_cursor(self) -> dict[str, Any] | None:
|
||||
return await self._run(self._read_json, self._base / "cursor.json")
|
||||
|
||||
async def delete_parts_before(self, seq: int) -> None:
|
||||
async def delete_parts_before(self, seq: int, run_id: str | None = None) -> None:
|
||||
def _delete() -> None:
|
||||
if not self._parts_dir.exists():
|
||||
return
|
||||
@@ -110,6 +111,28 @@ class FileConversationStore:
|
||||
"""No-op — no persistent handles for file-per-part storage."""
|
||||
pass
|
||||
|
||||
async def clear(self) -> None:
|
||||
"""Clear all parts and cursor, keeping the directory structure.
|
||||
|
||||
Used when starting a fresh execution in the same session directory.
|
||||
"""
|
||||
|
||||
def _clear() -> None:
|
||||
# Clear all parts
|
||||
if self._parts_dir.exists():
|
||||
for f in self._parts_dir.glob("*.json"):
|
||||
f.unlink()
|
||||
# Clear cursor
|
||||
cursor_path = self._base / "cursor.json"
|
||||
if cursor_path.exists():
|
||||
cursor_path.unlink()
|
||||
# Clear meta
|
||||
meta_path = self._base / "meta.json"
|
||||
if meta_path.exists():
|
||||
meta_path.unlink()
|
||||
|
||||
await self._run(_clear)
|
||||
|
||||
async def destroy(self) -> None:
|
||||
"""Delete the entire base directory and all persisted data."""
|
||||
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
"""Queen lifecycle tools for worker management.
|
||||
"""Queen lifecycle tools for graph management.
|
||||
|
||||
These tools give the Queen agent control over the worker agent's lifecycle.
|
||||
They close over a session-like object that provides ``worker_runtime``,
|
||||
allowing late-binding access to the worker (which may be loaded/unloaded
|
||||
These tools give the Queen agent control over the loaded graph's lifecycle.
|
||||
They close over a session-like object that provides ``graph_runtime``,
|
||||
allowing late-binding access to the graph (which may be loaded/unloaded
|
||||
dynamically).
|
||||
|
||||
Usage::
|
||||
@@ -20,7 +20,7 @@ Usage::
|
||||
from framework.tools.queen_lifecycle_tools import WorkerSessionAdapter
|
||||
|
||||
adapter = WorkerSessionAdapter(
|
||||
worker_runtime=runtime,
|
||||
graph_runtime=runtime,
|
||||
event_bus=event_bus,
|
||||
worker_path=storage_path,
|
||||
)
|
||||
@@ -66,11 +66,11 @@ logger = logging.getLogger(__name__)
|
||||
class WorkerSessionAdapter:
|
||||
"""Adapter for TUI compatibility.
|
||||
|
||||
Wraps bare worker_runtime + event_bus + storage_path into a
|
||||
Wraps bare graph_runtime + event_bus + storage_path into a
|
||||
session-like object that queen lifecycle tools can use.
|
||||
"""
|
||||
|
||||
worker_runtime: Any # AgentRuntime
|
||||
graph_runtime: Any # AgentRuntime
|
||||
event_bus: Any # EventBus
|
||||
worker_path: Path | None = None
|
||||
|
||||
@@ -79,16 +79,22 @@ class WorkerSessionAdapter:
|
||||
class QueenPhaseState:
|
||||
"""Mutable state container for queen operating phase.
|
||||
|
||||
Four phases: planning → building → staging → running.
|
||||
Five phases: planning → building → staging → running → editing.
|
||||
EDITING is entered after worker execution completes. The worker
|
||||
stays loaded — queen can tweak config and re-run without rebuilding.
|
||||
RUNNING cannot go directly to BUILDING or PLANNING; it must pass
|
||||
through EDITING first.
|
||||
|
||||
Shared between the dynamic_tools_provider callback and tool handlers
|
||||
that trigger phase transitions.
|
||||
"""
|
||||
|
||||
phase: str = "building" # "planning", "building", "staging", or "running"
|
||||
phase: str = "building" # "planning", "building", "staging", "running", or "editing"
|
||||
planning_tools: list = field(default_factory=list) # list[Tool]
|
||||
building_tools: list = field(default_factory=list) # list[Tool]
|
||||
staging_tools: list = field(default_factory=list) # list[Tool]
|
||||
running_tools: list = field(default_factory=list) # list[Tool]
|
||||
editing_tools: list = field(default_factory=list) # list[Tool]
|
||||
inject_notification: Any = None # async (str) -> None
|
||||
event_bus: Any = None # EventBus — for emitting QUEEN_PHASE_CHANGED events
|
||||
|
||||
@@ -115,12 +121,24 @@ class QueenPhaseState:
|
||||
prompt_building: str = ""
|
||||
prompt_staging: str = ""
|
||||
prompt_running: str = ""
|
||||
prompt_editing: str = ""
|
||||
|
||||
# Default skill operational protocols — appended to every phase prompt
|
||||
protocols_prompt: str = ""
|
||||
# Community skills catalog (XML) — appended after protocols
|
||||
skills_catalog_prompt: str = ""
|
||||
|
||||
# Persona and communication style (set once at session start by persona hook,
|
||||
# persisted here so they survive dynamic prompt refreshes across iterations).
|
||||
persona_prefix: str = "" # e.g. "You are a CFO. I am a CFO with 20 years..."
|
||||
style_directive: str = "" # e.g. "## Communication Style: Peer\n\n..."
|
||||
|
||||
# Cached recall block — populated async by recall_selector after each turn.
|
||||
_cached_recall_block: str = ""
|
||||
_cached_colony_recall_block: str = ""
|
||||
_cached_global_recall_block: str = ""
|
||||
global_memory_dir: Path | None = None
|
||||
|
||||
def get_current_tools(self) -> list:
|
||||
"""Return tools for the current phase."""
|
||||
if self.phase == "planning":
|
||||
@@ -129,6 +147,8 @@ class QueenPhaseState:
|
||||
return list(self.running_tools)
|
||||
if self.phase == "staging":
|
||||
return list(self.staging_tools)
|
||||
if self.phase == "editing":
|
||||
return list(self.editing_tools)
|
||||
return list(self.building_tools)
|
||||
|
||||
def get_current_prompt(self) -> str:
|
||||
@@ -139,19 +159,29 @@ class QueenPhaseState:
|
||||
base = self.prompt_running
|
||||
elif self.phase == "staging":
|
||||
base = self.prompt_staging
|
||||
elif self.phase == "editing":
|
||||
base = self.prompt_editing
|
||||
else:
|
||||
base = self.prompt_building
|
||||
|
||||
from framework.agents.queen.queen_memory import format_for_injection
|
||||
|
||||
memory = format_for_injection()
|
||||
parts = [base]
|
||||
parts = []
|
||||
if self.persona_prefix:
|
||||
parts.append(self.persona_prefix)
|
||||
parts.append(base)
|
||||
if self.style_directive:
|
||||
parts.append(self.style_directive)
|
||||
if self.skills_catalog_prompt:
|
||||
parts.append(self.skills_catalog_prompt)
|
||||
if self.protocols_prompt:
|
||||
parts.append(self.protocols_prompt)
|
||||
if memory:
|
||||
parts.append(memory)
|
||||
colony_memory = self._cached_colony_recall_block or self._cached_recall_block
|
||||
if colony_memory:
|
||||
parts.append(colony_memory)
|
||||
if self._cached_global_recall_block:
|
||||
parts.append(self._cached_global_recall_block)
|
||||
return "\n\n".join(parts)
|
||||
|
||||
async def _emit_phase_event(self) -> None:
|
||||
@@ -168,6 +198,26 @@ class QueenPhaseState:
|
||||
)
|
||||
)
|
||||
|
||||
async def switch_to_editing(self, source: str = "tool") -> None:
|
||||
"""Switch to editing phase — worker stays loaded, queen can tweak and re-run.
|
||||
|
||||
Args:
|
||||
source: Who triggered the switch — "tool", "frontend", or "auto".
|
||||
"""
|
||||
if self.phase == "editing":
|
||||
return
|
||||
self.phase = "editing"
|
||||
tool_names = [t.name for t in self.editing_tools]
|
||||
logger.info("Queen phase → editing (source=%s, tools: %s)", source, tool_names)
|
||||
await self._emit_phase_event()
|
||||
if self.inject_notification and source != "tool":
|
||||
await self.inject_notification(
|
||||
"[PHASE CHANGE] Switched to EDITING phase. "
|
||||
"Worker is still loaded. You can tweak configuration and re-run, "
|
||||
"or escalate to building/planning if a deeper change is needed. "
|
||||
"Available tools: " + ", ".join(tool_names) + "."
|
||||
)
|
||||
|
||||
async def switch_to_running(self, source: str = "tool") -> None:
|
||||
"""Switch to running phase and notify the queen.
|
||||
|
||||
@@ -223,11 +273,20 @@ class QueenPhaseState:
|
||||
async def switch_to_building(self, source: str = "tool") -> None:
|
||||
"""Switch to building phase and notify the queen.
|
||||
|
||||
Blocked from RUNNING and EDITING.
|
||||
|
||||
Args:
|
||||
source: Who triggered the switch — "tool", "frontend", or "auto".
|
||||
"""
|
||||
if self.phase == "building":
|
||||
return
|
||||
if self.phase in ("running", "editing"):
|
||||
logger.warning(
|
||||
"Queen phase: BLOCKED %s → building (source=%s)",
|
||||
self.phase,
|
||||
source,
|
||||
)
|
||||
return
|
||||
self.phase = "building"
|
||||
tool_names = [t.name for t in self.building_tools]
|
||||
logger.info("Queen phase → building (source=%s, tools: %s)", source, tool_names)
|
||||
@@ -242,11 +301,20 @@ class QueenPhaseState:
|
||||
async def switch_to_planning(self, source: str = "tool") -> None:
|
||||
"""Switch to planning phase and notify the queen.
|
||||
|
||||
Blocked from RUNNING and EDITING.
|
||||
|
||||
Args:
|
||||
source: Who triggered the switch — "tool", "frontend", or "auto".
|
||||
"""
|
||||
if self.phase == "planning":
|
||||
return
|
||||
if self.phase in ("running", "editing"):
|
||||
logger.warning(
|
||||
"Queen phase: BLOCKED %s → planning (source=%s)",
|
||||
self.phase,
|
||||
source,
|
||||
)
|
||||
return
|
||||
self.phase = "planning"
|
||||
tool_names = [t.name for t in self.planning_tools]
|
||||
logger.info("Queen phase → planning (source=%s, tools: %s)", source, tool_names)
|
||||
@@ -363,7 +431,7 @@ def _remove_trigger_from_agent(session: Any, trigger_id: str) -> None:
|
||||
|
||||
async def _persist_active_triggers(session: Any, session_id: str) -> None:
|
||||
"""Persist the set of active trigger IDs (and their tasks) to SessionState."""
|
||||
runtime = getattr(session, "worker_runtime", None)
|
||||
runtime = getattr(session, "graph_runtime", None)
|
||||
if runtime is None:
|
||||
return
|
||||
store = getattr(runtime, "_session_store", None)
|
||||
@@ -418,8 +486,8 @@ async def _start_trigger_timer(session: Any, trigger_id: str, tdef: Any) -> None
|
||||
_next_delay = float(interval_minutes) * 60 if interval_minutes else 60
|
||||
fire_times[trigger_id] = time.monotonic() + _next_delay
|
||||
|
||||
# Gate on worker being loaded
|
||||
if getattr(session, "worker_runtime", None) is None:
|
||||
# Gate on a graph being loaded
|
||||
if getattr(session, "graph_runtime", None) is None:
|
||||
continue
|
||||
|
||||
# Fire into queen node
|
||||
@@ -465,8 +533,8 @@ async def _start_trigger_webhook(session: Any, trigger_id: str, tdef: Any) -> No
|
||||
return
|
||||
if data.get("method", "").upper() not in methods:
|
||||
return
|
||||
# Gate on worker being loaded
|
||||
if getattr(session, "worker_runtime", None) is None:
|
||||
# Gate on a graph being loaded
|
||||
if getattr(session, "graph_runtime", None) is None:
|
||||
return
|
||||
executor = getattr(session, "queen_executor", None)
|
||||
if executor is None:
|
||||
@@ -755,7 +823,7 @@ def register_queen_lifecycle_tools(
|
||||
session: Any = None,
|
||||
session_id: str | None = None,
|
||||
# Legacy params — used by TUI when not passing a session object
|
||||
worker_runtime: AgentRuntime | None = None,
|
||||
graph_runtime: AgentRuntime | None = None,
|
||||
event_bus: EventBus | None = None,
|
||||
storage_path: Path | None = None,
|
||||
# Server context — enables load_built_agent tool
|
||||
@@ -767,30 +835,30 @@ def register_queen_lifecycle_tools(
|
||||
"""Register queen lifecycle tools.
|
||||
|
||||
Args:
|
||||
session: A Session or WorkerSessionAdapter with ``worker_runtime``
|
||||
attribute. The tools read ``session.worker_runtime`` on each
|
||||
call, supporting late-binding (worker loaded/unloaded).
|
||||
session_id: Shared session ID so the worker uses the same session
|
||||
session: A Session or WorkerSessionAdapter with ``graph_runtime``
|
||||
attribute. The tools read ``session.graph_runtime`` on each
|
||||
call, supporting late-binding (graph loaded/unloaded).
|
||||
session_id: Shared session ID so the graph uses the same session
|
||||
scope as the queen and judge.
|
||||
worker_runtime: (Legacy) Direct runtime reference. If ``session``
|
||||
graph_runtime: (Legacy) Direct runtime reference. If ``session``
|
||||
is not provided, a WorkerSessionAdapter is created from
|
||||
worker_runtime + event_bus + storage_path.
|
||||
graph_runtime + event_bus + storage_path.
|
||||
session_manager: (Server only) The SessionManager instance, needed
|
||||
for ``load_built_agent`` to hot-load a worker.
|
||||
for ``load_built_agent`` to hot-load a graph.
|
||||
manager_session_id: (Server only) The session's ID in the manager,
|
||||
used with ``session_manager.load_worker()``.
|
||||
used with ``session_manager.load_graph()``.
|
||||
phase_state: (Optional) Mutable phase state for building/running
|
||||
phase switching. When provided, load_built_agent switches to
|
||||
running phase and stop_worker_and_edit switches to building phase.
|
||||
running phase and stop_graph_and_edit switches to building phase.
|
||||
|
||||
Returns the number of tools registered.
|
||||
"""
|
||||
# Build session adapter from legacy params if needed
|
||||
if session is None:
|
||||
if worker_runtime is None:
|
||||
raise ValueError("Either session or worker_runtime must be provided")
|
||||
if graph_runtime is None:
|
||||
raise ValueError("Either session or graph_runtime must be provided")
|
||||
session = WorkerSessionAdapter(
|
||||
worker_runtime=worker_runtime,
|
||||
graph_runtime=graph_runtime,
|
||||
event_bus=event_bus,
|
||||
worker_path=storage_path,
|
||||
)
|
||||
@@ -800,18 +868,18 @@ def register_queen_lifecycle_tools(
|
||||
tools_registered = 0
|
||||
|
||||
def _get_runtime():
|
||||
"""Get current worker runtime from session (late-binding)."""
|
||||
return getattr(session, "worker_runtime", None)
|
||||
"""Get current graph runtime from session (late-binding)."""
|
||||
return getattr(session, "graph_runtime", None)
|
||||
|
||||
# --- start_worker ---------------------------------------------------------
|
||||
# --- start_graph ----------------------------------------------------------
|
||||
|
||||
# How long to wait for credential validation + MCP resync before
|
||||
# proceeding with trigger anyway. These are pre-flight checks that
|
||||
# should not block the queen indefinitely.
|
||||
_START_PREFLIGHT_TIMEOUT = 15 # seconds
|
||||
|
||||
async def start_worker(task: str) -> str:
|
||||
"""Start the worker agent with a task description.
|
||||
async def start_graph(task: str) -> str:
|
||||
"""Start the loaded graph with a task description.
|
||||
|
||||
Triggers the worker's default entry point with the given task.
|
||||
Returns immediately — the worker runs asynchronously.
|
||||
@@ -860,13 +928,13 @@ def register_queen_lifecycle_tools(
|
||||
await asyncio.wait_for(_preflight(), timeout=_START_PREFLIGHT_TIMEOUT)
|
||||
except TimeoutError:
|
||||
logger.warning(
|
||||
"start_worker preflight timed out after %ds — proceeding with trigger",
|
||||
"start_graph preflight timed out after %ds — proceeding with trigger",
|
||||
_START_PREFLIGHT_TIMEOUT,
|
||||
)
|
||||
except CredentialError:
|
||||
raise # handled below
|
||||
|
||||
# Resume timers in case they were paused by a previous stop_worker
|
||||
# Resume timers in case they were paused by a previous stop_graph
|
||||
runtime.resume_timers()
|
||||
|
||||
# Get session state from any prior execution for memory continuity
|
||||
@@ -907,12 +975,12 @@ def register_queen_lifecycle_tools(
|
||||
)
|
||||
return json.dumps(error_payload)
|
||||
except Exception as e:
|
||||
return json.dumps({"error": f"Failed to start worker: {e}"})
|
||||
return json.dumps({"error": f"Failed to start graph: {e}"})
|
||||
|
||||
_start_tool = Tool(
|
||||
name="start_worker",
|
||||
name="start_graph",
|
||||
description=(
|
||||
"Start the worker agent with a task description. The worker runs "
|
||||
"Start the loaded graph with a task description. The graph runs "
|
||||
"autonomously in the background. Returns an execution ID for tracking."
|
||||
),
|
||||
parameters={
|
||||
@@ -920,19 +988,19 @@ def register_queen_lifecycle_tools(
|
||||
"properties": {
|
||||
"task": {
|
||||
"type": "string",
|
||||
"description": "Description of the task for the worker to perform",
|
||||
"description": "Description of the task for the graph to perform",
|
||||
},
|
||||
},
|
||||
"required": ["task"],
|
||||
},
|
||||
)
|
||||
registry.register("start_worker", _start_tool, lambda inputs: start_worker(**inputs))
|
||||
registry.register("start_graph", _start_tool, lambda inputs: start_graph(**inputs))
|
||||
tools_registered += 1
|
||||
|
||||
# --- stop_worker ----------------------------------------------------------
|
||||
# --- stop_graph -----------------------------------------------------------
|
||||
|
||||
async def stop_worker(*, reason: str = "Stopped by queen") -> str:
|
||||
"""Cancel all active worker executions across all graphs.
|
||||
async def stop_graph(*, reason: str = "Stopped by queen") -> str:
|
||||
"""Cancel all active graph executions across all graphs.
|
||||
|
||||
Stops the worker immediately. Returns the IDs of cancelled executions.
|
||||
"""
|
||||
@@ -979,21 +1047,60 @@ def register_queen_lifecycle_tools(
|
||||
)
|
||||
|
||||
_stop_tool = Tool(
|
||||
name="stop_worker",
|
||||
name="stop_graph",
|
||||
description=(
|
||||
"Cancel the worker agent's active execution and pause its timers. "
|
||||
"The worker stops gracefully. No parameters needed."
|
||||
"Cancel the loaded graph's active execution and pause its timers. "
|
||||
"The graph stops gracefully. No parameters needed."
|
||||
),
|
||||
parameters={"type": "object", "properties": {}},
|
||||
)
|
||||
registry.register("stop_worker", _stop_tool, lambda inputs: stop_worker())
|
||||
registry.register("stop_graph", _stop_tool, lambda inputs: stop_graph())
|
||||
tools_registered += 1
|
||||
|
||||
# --- stop_worker_and_edit -------------------------------------------------
|
||||
# --- switch_to_editing ----------------------------------------------------
|
||||
|
||||
async def stop_worker_and_edit() -> str:
|
||||
"""Stop the worker and switch to building phase for editing the agent."""
|
||||
stop_result = await stop_worker()
|
||||
async def switch_to_editing_tool() -> str:
|
||||
"""Stop the worker and switch to editing phase for config tweaks.
|
||||
|
||||
The worker stays loaded. You can re-run with different input,
|
||||
inject config adjustments, or escalate to building/planning.
|
||||
"""
|
||||
stop_result = await stop_graph()
|
||||
|
||||
if phase_state is not None:
|
||||
await phase_state.switch_to_editing()
|
||||
_update_meta_json(session_manager, manager_session_id, {"phase": "editing"})
|
||||
|
||||
result = json.loads(stop_result)
|
||||
result["phase"] = "editing"
|
||||
result["message"] = (
|
||||
"Worker stopped. You are now in editing phase. "
|
||||
"You can re-run with run_agent_with_input(task), tweak config "
|
||||
"with inject_message, or escalate to building/planning."
|
||||
)
|
||||
return json.dumps(result)
|
||||
|
||||
_switch_editing_tool = Tool(
|
||||
name="switch_to_editing",
|
||||
description=(
|
||||
"Stop the running worker and switch to editing phase. "
|
||||
"The worker stays loaded — you can tweak config and re-run. "
|
||||
"Use this when you want to adjust the worker without rebuilding."
|
||||
),
|
||||
parameters={"type": "object", "properties": {}},
|
||||
)
|
||||
registry.register(
|
||||
"switch_to_editing",
|
||||
_switch_editing_tool,
|
||||
lambda inputs: switch_to_editing_tool(),
|
||||
)
|
||||
tools_registered += 1
|
||||
|
||||
# --- stop_graph_and_edit --------------------------------------------------
|
||||
|
||||
async def stop_graph_and_edit() -> str:
|
||||
"""Stop the loaded graph and switch to building phase for editing the agent."""
|
||||
stop_result = await stop_graph()
|
||||
|
||||
# Switch to building phase
|
||||
if phase_state is not None:
|
||||
@@ -1003,7 +1110,7 @@ def register_queen_lifecycle_tools(
|
||||
result = json.loads(stop_result)
|
||||
result["phase"] = "building"
|
||||
result["message"] = (
|
||||
"Worker stopped. You are now in building phase. "
|
||||
"Graph stopped. You are now in building phase. "
|
||||
"Use your coding tools to modify the agent, then call "
|
||||
"load_built_agent(path) to stage it again."
|
||||
)
|
||||
@@ -1015,24 +1122,24 @@ def register_queen_lifecycle_tools(
|
||||
return json.dumps(result)
|
||||
|
||||
_stop_edit_tool = Tool(
|
||||
name="stop_worker_and_edit",
|
||||
name="stop_graph_and_edit",
|
||||
description=(
|
||||
"Stop the running worker and switch to building phase. "
|
||||
"Stop the running graph and switch to building phase. "
|
||||
"Use this when you need to modify the agent's code, nodes, or configuration. "
|
||||
"After editing, call load_built_agent(path) to reload and run."
|
||||
),
|
||||
parameters={"type": "object", "properties": {}},
|
||||
)
|
||||
registry.register(
|
||||
"stop_worker_and_edit", _stop_edit_tool, lambda inputs: stop_worker_and_edit()
|
||||
"stop_graph_and_edit", _stop_edit_tool, lambda inputs: stop_graph_and_edit()
|
||||
)
|
||||
tools_registered += 1
|
||||
|
||||
# --- stop_worker_and_plan (Running/Staging → Planning) --------------------
|
||||
# --- stop_graph_and_plan (Running/Staging → Planning) ---------------------
|
||||
|
||||
async def stop_worker_and_plan() -> str:
|
||||
"""Stop the worker and switch to planning phase for diagnosis."""
|
||||
stop_result = await stop_worker()
|
||||
async def stop_graph_and_plan() -> str:
|
||||
"""Stop the loaded graph and switch to planning phase for diagnosis."""
|
||||
stop_result = await stop_graph()
|
||||
|
||||
# Switch to planning phase
|
||||
if phase_state is not None:
|
||||
@@ -1041,7 +1148,7 @@ def register_queen_lifecycle_tools(
|
||||
result = json.loads(stop_result)
|
||||
result["phase"] = "planning"
|
||||
result["message"] = (
|
||||
"Worker stopped. You are now in planning phase. "
|
||||
"Graph stopped. You are now in planning phase. "
|
||||
"Diagnose the issue using read-only tools (checkpoints, logs, sessions), "
|
||||
"discuss a fix plan with the user, then call "
|
||||
"initialize_and_build_agent() to implement the fix."
|
||||
@@ -1049,16 +1156,16 @@ def register_queen_lifecycle_tools(
|
||||
return json.dumps(result)
|
||||
|
||||
_stop_plan_tool = Tool(
|
||||
name="stop_worker_and_plan",
|
||||
name="stop_graph_and_plan",
|
||||
description=(
|
||||
"Stop the worker and switch to planning phase for diagnosis. "
|
||||
"Stop the graph and switch to planning phase for diagnosis. "
|
||||
"Use this when you need to investigate an issue before fixing it. "
|
||||
"After diagnosis, call initialize_and_build_agent() to switch to building."
|
||||
),
|
||||
parameters={"type": "object", "properties": {}},
|
||||
)
|
||||
registry.register(
|
||||
"stop_worker_and_plan", _stop_plan_tool, lambda inputs: stop_worker_and_plan()
|
||||
"stop_graph_and_plan", _stop_plan_tool, lambda inputs: stop_graph_and_plan()
|
||||
)
|
||||
tools_registered += 1
|
||||
|
||||
@@ -2001,12 +2108,12 @@ def register_queen_lifecycle_tools(
|
||||
"input_keys": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"},
|
||||
"description": "Expected input memory keys (hints)",
|
||||
"description": "Expected input buffer keys (hints)",
|
||||
},
|
||||
"output_keys": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"},
|
||||
"description": "Expected output memory keys (hints)",
|
||||
"description": "Expected output buffer keys (hints)",
|
||||
},
|
||||
"success_criteria": {
|
||||
"type": "string",
|
||||
@@ -2370,16 +2477,16 @@ def register_queen_lifecycle_tools(
|
||||
lambda inputs: initialize_and_build_agent_wrapper(inputs),
|
||||
)
|
||||
|
||||
# --- stop_worker (Running → Staging) -------------------------------------
|
||||
# --- stop_graph (Running → Staging) --------------------------------------
|
||||
|
||||
async def stop_worker_to_staging() -> str:
|
||||
"""Stop the running worker and switch to staging phase.
|
||||
async def stop_graph_to_staging() -> str:
|
||||
"""Stop the running graph and switch to staging phase.
|
||||
|
||||
After stopping, ask the user whether they want to:
|
||||
1. Re-run the agent with new input → call run_agent_with_input(task)
|
||||
2. Edit the agent code → call stop_worker_and_edit() to go to building phase
|
||||
2. Edit the agent code → call stop_graph_and_edit() to go to building phase
|
||||
"""
|
||||
stop_result = await stop_worker()
|
||||
stop_result = await stop_graph()
|
||||
|
||||
# Switch to staging phase
|
||||
if phase_state is not None:
|
||||
@@ -2389,54 +2496,30 @@ def register_queen_lifecycle_tools(
|
||||
result = json.loads(stop_result)
|
||||
result["phase"] = "staging"
|
||||
result["message"] = (
|
||||
"Worker stopped. You are now in staging phase. "
|
||||
"Graph stopped. You are now in staging phase. "
|
||||
"Ask the user: would they like to re-run with new input, "
|
||||
"or edit the agent code?"
|
||||
)
|
||||
return json.dumps(result)
|
||||
|
||||
_stop_worker_tool = Tool(
|
||||
name="stop_worker",
|
||||
name="stop_graph",
|
||||
description=(
|
||||
"Stop the running worker and switch to staging phase. "
|
||||
"Stop the running graph and switch to staging phase. "
|
||||
"After stopping, ask the user whether they want to re-run "
|
||||
"with new input or edit the agent code."
|
||||
),
|
||||
parameters={"type": "object", "properties": {}},
|
||||
)
|
||||
registry.register("stop_worker", _stop_worker_tool, lambda inputs: stop_worker_to_staging())
|
||||
registry.register("stop_graph", _stop_worker_tool, lambda inputs: stop_graph_to_staging())
|
||||
tools_registered += 1
|
||||
|
||||
# --- get_worker_status ----------------------------------------------------
|
||||
# --- get_graph_status -----------------------------------------------------
|
||||
|
||||
def _get_event_bus():
|
||||
"""Get the session's event bus for querying history."""
|
||||
return getattr(session, "event_bus", None)
|
||||
|
||||
def _get_worker_name() -> str | None:
|
||||
"""Return the worker agent directory name, used for diary lookups."""
|
||||
p = getattr(session, "worker_path", None)
|
||||
return p.name if p else None
|
||||
|
||||
def _format_diary(max_runs: int) -> str:
|
||||
"""Read recent run digests from disk — no EventBus required."""
|
||||
agent_name = _get_worker_name()
|
||||
if not agent_name:
|
||||
return "No worker loaded — diary unavailable."
|
||||
from framework.agents.worker_memory import read_recent_digests
|
||||
|
||||
entries = read_recent_digests(agent_name, max_runs)
|
||||
if not entries:
|
||||
return (
|
||||
f"No run digests for '{agent_name}' yet. "
|
||||
"Digests are written at the end of each completed run."
|
||||
)
|
||||
lines = [f"Worker '{agent_name}' — {len(entries)} recent run digest(s):", ""]
|
||||
for _run_id, content in entries:
|
||||
lines.append(content)
|
||||
lines.append("")
|
||||
return "\n".join(lines).rstrip()
|
||||
|
||||
# Tiered cooldowns: summary is free, detail has short cooldown, full keeps 30s
|
||||
_COOLDOWN_FULL = 30.0
|
||||
_COOLDOWN_DETAIL = 10.0
|
||||
@@ -2641,16 +2724,16 @@ def register_queen_lifecycle_tools(
|
||||
return "\n".join(lines)
|
||||
|
||||
async def _format_memory(runtime: AgentRuntime) -> str:
|
||||
"""Format the worker's shared memory snapshot and recent changes."""
|
||||
"""Format the worker's shared buffer snapshot and recent changes."""
|
||||
from framework.runtime.shared_state import IsolationLevel
|
||||
|
||||
lines = []
|
||||
active_streams = runtime.get_active_streams()
|
||||
|
||||
if not active_streams:
|
||||
return "Worker has no active executions. No memory to inspect."
|
||||
return "Worker has no active executions. No buffer state to inspect."
|
||||
|
||||
# Read memory from the first active execution
|
||||
# Read buffer state from the first active execution
|
||||
stream_info = active_streams[0]
|
||||
exec_ids = stream_info.get("active_execution_ids", [])
|
||||
stream_id = stream_info.get("stream_id", "")
|
||||
@@ -2658,13 +2741,13 @@ def register_queen_lifecycle_tools(
|
||||
return "No active execution found."
|
||||
|
||||
exec_id = exec_ids[0]
|
||||
memory = runtime.state_manager.create_memory(exec_id, stream_id, IsolationLevel.SHARED)
|
||||
state = await memory.read_all()
|
||||
buf = runtime.state_manager.create_buffer(exec_id, stream_id, IsolationLevel.SHARED)
|
||||
state = await buf.read_all()
|
||||
|
||||
if not state:
|
||||
lines.append("Worker's shared memory is empty.")
|
||||
lines.append("Worker's shared buffer is empty.")
|
||||
else:
|
||||
lines.append(f"Worker's shared memory ({len(state)} keys):")
|
||||
lines.append(f"Worker's shared buffer ({len(state)} keys):")
|
||||
for key, value in state.items():
|
||||
lines.append(f" {key}: {_preview_value(value)}")
|
||||
|
||||
@@ -3024,8 +3107,8 @@ def register_queen_lifecycle_tools(
|
||||
|
||||
return result
|
||||
|
||||
async def get_worker_status(focus: str | None = None, last_n: int = 20) -> str:
|
||||
"""Check on the worker with progressive disclosure.
|
||||
async def get_graph_status(focus: str | None = None, last_n: int = 20) -> str:
|
||||
"""Check on the loaded graph with progressive disclosure.
|
||||
|
||||
Without arguments, returns a brief prose summary. Use ``focus`` to
|
||||
drill into specifics: activity, memory, tools, issues, progress,
|
||||
@@ -3039,14 +3122,14 @@ def register_queen_lifecycle_tools(
|
||||
import time as _time
|
||||
|
||||
# --- Tiered cooldown ---
|
||||
# diary is free (file reads only), summary is free, detail has 10s, full has 30s
|
||||
# summary is free, detail has 10s, full keeps 30s
|
||||
now = _time.monotonic()
|
||||
if focus == "full":
|
||||
cooldown = _COOLDOWN_FULL
|
||||
tier = "full"
|
||||
elif focus == "diary" or focus is None:
|
||||
elif focus is None:
|
||||
cooldown = 0.0
|
||||
tier = focus or "summary"
|
||||
tier = "summary"
|
||||
else:
|
||||
cooldown = _COOLDOWN_DETAIL
|
||||
tier = "detail"
|
||||
@@ -3065,10 +3148,6 @@ def register_queen_lifecycle_tools(
|
||||
)
|
||||
_status_last_called[tier] = now
|
||||
|
||||
# --- Diary: pure file reads, no runtime required ---
|
||||
if focus == "diary":
|
||||
return _format_diary(last_n)
|
||||
|
||||
# --- Runtime check ---
|
||||
runtime = _get_runtime()
|
||||
if runtime is None:
|
||||
@@ -3118,21 +3197,19 @@ def register_queen_lifecycle_tools(
|
||||
else:
|
||||
return (
|
||||
f"Unknown focus '{focus}'. "
|
||||
"Valid options: diary, activity, memory, tools, issues, progress, full."
|
||||
"Valid options: activity, memory, tools, issues, progress, full."
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.exception("get_worker_status error")
|
||||
logger.exception("get_graph_status error")
|
||||
return f"Error retrieving status: {exc}"
|
||||
|
||||
_status_tool = Tool(
|
||||
name="get_worker_status",
|
||||
name="get_graph_status",
|
||||
description=(
|
||||
"Check on the worker. Returns a brief prose summary by default. "
|
||||
"Check on the loaded graph. Returns a brief prose summary by default. "
|
||||
"Use 'focus' to drill into specifics:\n"
|
||||
"- diary: persistent run digests from past executions — read this first "
|
||||
"before digging into live runtime logs\n"
|
||||
"- activity: current node, transitions, latest LLM output\n"
|
||||
"- memory: worker's accumulated knowledge and state\n"
|
||||
"- memory: worker's accumulated buffer state\n"
|
||||
"- tools: running and recent tool calls\n"
|
||||
"- issues: retries, stalls, constraint violations\n"
|
||||
"- progress: goal criteria, token consumption\n"
|
||||
@@ -3143,10 +3220,9 @@ def register_queen_lifecycle_tools(
|
||||
"properties": {
|
||||
"focus": {
|
||||
"type": "string",
|
||||
"enum": ["diary", "activity", "memory", "tools", "issues", "progress", "full"],
|
||||
"enum": ["activity", "memory", "tools", "issues", "progress", "full"],
|
||||
"description": (
|
||||
"Aspect to inspect. Omit for a brief summary. "
|
||||
"Use 'diary' to read persistent run history before checking live logs."
|
||||
"Aspect to inspect. Omit for a brief summary."
|
||||
),
|
||||
},
|
||||
"last_n": {
|
||||
@@ -3159,25 +3235,25 @@ def register_queen_lifecycle_tools(
|
||||
"required": [],
|
||||
},
|
||||
)
|
||||
registry.register("get_worker_status", _status_tool, lambda inputs: get_worker_status(**inputs))
|
||||
registry.register("get_graph_status", _status_tool, lambda inputs: get_graph_status(**inputs))
|
||||
tools_registered += 1
|
||||
|
||||
# --- inject_worker_message ------------------------------------------------
|
||||
# --- inject_message -------------------------------------------------------
|
||||
|
||||
async def inject_worker_message(content: str) -> str:
|
||||
"""Send a message to the running worker agent.
|
||||
async def inject_message(content: str) -> str:
|
||||
"""Send a message to the running graph.
|
||||
|
||||
Injects the message into the worker's active node conversation.
|
||||
Use this to relay user instructions to the worker.
|
||||
"""
|
||||
runtime = _get_runtime()
|
||||
if runtime is None:
|
||||
return json.dumps({"error": "No worker loaded in this session."})
|
||||
return json.dumps({"error": "No graph loaded in this session."})
|
||||
|
||||
graph_id = runtime.graph_id
|
||||
reg = runtime.get_graph_registration(graph_id)
|
||||
if reg is None:
|
||||
return json.dumps({"error": "Worker graph not found"})
|
||||
return json.dumps({"error": "Graph not found"})
|
||||
|
||||
# Prefer nodes that are actively waiting (e.g. escalation receivers
|
||||
# blocked on queen guidance) over the main event-loop node.
|
||||
@@ -3212,30 +3288,30 @@ def register_queen_lifecycle_tools(
|
||||
|
||||
return json.dumps(
|
||||
{
|
||||
"error": "No active worker node found — worker may be idle.",
|
||||
"error": "No active graph node found — graph may be idle.",
|
||||
}
|
||||
)
|
||||
|
||||
_inject_tool = Tool(
|
||||
name="inject_worker_message",
|
||||
name="inject_message",
|
||||
description=(
|
||||
"Send a message to the running worker agent. The message is injected "
|
||||
"into the worker's active node conversation. Use this to relay user "
|
||||
"instructions or concerns. The worker must be running."
|
||||
"Send a message to the running graph. The message is injected "
|
||||
"into the graph's active node conversation. Use this to relay user "
|
||||
"instructions or concerns. The graph must be running."
|
||||
),
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"content": {
|
||||
"type": "string",
|
||||
"description": "Message content to send to the worker",
|
||||
"description": "Message content to send to the graph",
|
||||
},
|
||||
},
|
||||
"required": ["content"],
|
||||
},
|
||||
)
|
||||
registry.register(
|
||||
"inject_worker_message", _inject_tool, lambda inputs: inject_worker_message(**inputs)
|
||||
"inject_message", _inject_tool, lambda inputs: inject_message(**inputs)
|
||||
)
|
||||
tools_registered += 1
|
||||
|
||||
@@ -3402,10 +3478,10 @@ def register_queen_lifecycle_tools(
|
||||
runtime = _get_runtime()
|
||||
if runtime is not None:
|
||||
try:
|
||||
await session_manager.unload_worker(manager_session_id)
|
||||
await session_manager.unload_graph(manager_session_id)
|
||||
except Exception as e:
|
||||
logger.error("Failed to unload existing worker: %s", e, exc_info=True)
|
||||
return json.dumps({"error": f"Failed to unload existing worker: {e}"})
|
||||
logger.error("Failed to unload existing graph: %s", e, exc_info=True)
|
||||
return json.dumps({"error": f"Failed to unload existing graph: {e}"})
|
||||
|
||||
try:
|
||||
resolved_path = validate_agent_path(agent_path)
|
||||
@@ -3460,7 +3536,7 @@ def register_queen_lifecycle_tools(
|
||||
)
|
||||
|
||||
try:
|
||||
updated_session = await session_manager.load_worker(
|
||||
updated_session = await session_manager.load_graph(
|
||||
manager_session_id,
|
||||
str(resolved_path),
|
||||
)
|
||||
@@ -3477,9 +3553,9 @@ def register_queen_lifecycle_tools(
|
||||
if missing:
|
||||
missing_by_node[f"{node.name} (id={node.id})"] = sorted(missing)
|
||||
if missing_by_node:
|
||||
# Unload the broken worker
|
||||
# Unload the broken graph
|
||||
try:
|
||||
await session_manager.unload_worker(manager_session_id)
|
||||
await session_manager.unload_graph(manager_session_id)
|
||||
except Exception:
|
||||
pass
|
||||
details = "; ".join(
|
||||
@@ -3548,19 +3624,19 @@ def register_queen_lifecycle_tools(
|
||||
await phase_state.switch_to_staging()
|
||||
_update_meta_json(session_manager, manager_session_id, {"phase": "staging"})
|
||||
|
||||
worker_name = info.name if info else updated_session.worker_id
|
||||
graph_name = info.name if info else updated_session.graph_id
|
||||
return json.dumps(
|
||||
{
|
||||
"status": "loaded",
|
||||
"phase": "staging",
|
||||
"message": (
|
||||
f"Successfully loaded '{worker_name}'. "
|
||||
f"Successfully loaded '{graph_name}'. "
|
||||
"You are now in STAGING phase. "
|
||||
"Call run_agent_with_input(task) to start the worker, "
|
||||
"or stop_worker_and_edit() to go back to building."
|
||||
"Call run_agent_with_input(task) to start the graph, "
|
||||
"or stop_graph_and_edit() to go back to building."
|
||||
),
|
||||
"worker_id": updated_session.worker_id,
|
||||
"worker_name": worker_name,
|
||||
"graph_id": updated_session.graph_id,
|
||||
"graph_name": graph_name,
|
||||
"goal": info.goal_name if info else "",
|
||||
"node_count": info.node_count if info else 0,
|
||||
}
|
||||
@@ -4009,6 +4085,89 @@ def register_queen_lifecycle_tools(
|
||||
)
|
||||
tools_registered += 1
|
||||
|
||||
# --- save_global_memory --------------------------------------------------
|
||||
|
||||
async def save_global_memory_entry(
|
||||
category: str,
|
||||
description: str,
|
||||
content: str,
|
||||
name: str | None = None,
|
||||
) -> str:
|
||||
"""Persist a queen-global memory entry about the user."""
|
||||
from framework.agents.queen.queen_memory_v2 import (
|
||||
global_memory_dir as _global_memory_dir,
|
||||
init_memory_dir as _init_memory_dir,
|
||||
save_global_memory as _save_global_memory,
|
||||
)
|
||||
|
||||
target_dir = (
|
||||
phase_state.global_memory_dir
|
||||
if phase_state is not None and phase_state.global_memory_dir is not None
|
||||
else _global_memory_dir()
|
||||
)
|
||||
_init_memory_dir(target_dir)
|
||||
|
||||
try:
|
||||
filename, path = _save_global_memory(
|
||||
category=category,
|
||||
description=description,
|
||||
content=content,
|
||||
name=name,
|
||||
memory_dir=target_dir,
|
||||
)
|
||||
return json.dumps(
|
||||
{
|
||||
"status": "saved",
|
||||
"filename": filename,
|
||||
"path": str(path),
|
||||
"category": category,
|
||||
}
|
||||
)
|
||||
except ValueError as exc:
|
||||
return json.dumps({"error": str(exc)})
|
||||
|
||||
_save_global_memory_tool = Tool(
|
||||
name="save_global_memory",
|
||||
description=(
|
||||
"Save durable global memory about the user. "
|
||||
"Only use for user profile, preferences, environment, or feedback."
|
||||
),
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"category": {
|
||||
"type": "string",
|
||||
"enum": ["profile", "preference", "environment", "feedback"],
|
||||
},
|
||||
"description": {
|
||||
"type": "string",
|
||||
"description": "Specific one-line description for future recall selection.",
|
||||
},
|
||||
"content": {
|
||||
"type": "string",
|
||||
"description": "Durable user-centric memory content.",
|
||||
},
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "Optional short memory title.",
|
||||
},
|
||||
},
|
||||
"required": ["category", "description", "content"],
|
||||
"additionalProperties": False,
|
||||
},
|
||||
)
|
||||
registry.register(
|
||||
"save_global_memory",
|
||||
_save_global_memory_tool,
|
||||
lambda inputs: save_global_memory_entry(
|
||||
inputs["category"],
|
||||
inputs["description"],
|
||||
inputs["content"],
|
||||
inputs.get("name"),
|
||||
),
|
||||
)
|
||||
tools_registered += 1
|
||||
|
||||
# --- list_triggers ---------------------------------------------------------
|
||||
|
||||
async def list_triggers() -> str:
|
||||
|
||||
@@ -45,13 +45,13 @@ def recall_diary(query: str = "", days_back: int = 7) -> str:
|
||||
Args:
|
||||
query: Optional keyword or phrase to filter entries. If empty, all
|
||||
recent entries are returned.
|
||||
days_back: How many days to look back (1–30). Defaults to 7.
|
||||
days_back: How many days to look back (1-30). Defaults to 7.
|
||||
"""
|
||||
from datetime import date, timedelta
|
||||
|
||||
from framework.agents.queen.queen_memory import format_memory_date, read_episodic_memory
|
||||
|
||||
days_back = max(1, min(days_back, 30))
|
||||
days_back = max(1, min(int(days_back), 30))
|
||||
today = date.today()
|
||||
results: list[str] = []
|
||||
total_chars = 0
|
||||
|
||||
@@ -1,23 +1,17 @@
|
||||
"""Worker monitoring tools for Queen triage agents.
|
||||
"""Worker monitoring tools for Queen runtime inspection.
|
||||
|
||||
Three tools are registered by ``register_worker_monitoring_tools()``:
|
||||
The following tool is registered by ``register_worker_monitoring_tools()``:
|
||||
|
||||
- ``get_worker_health_summary`` — reads the worker's session log files and
|
||||
returns a compact health snapshot (recent verdicts, step count, timing).
|
||||
session_id is optional: if omitted, the most recent active session is
|
||||
auto-discovered from storage.
|
||||
|
||||
- ``emit_escalation_ticket`` — validates and publishes an EscalationTicket
|
||||
to the shared EventBus as a WORKER_ESCALATION_TICKET event.
|
||||
|
||||
- ``notify_operator`` — emits a QUEEN_INTERVENTION_REQUESTED event so the TUI
|
||||
can surface a non-disruptive operator notification.
|
||||
|
||||
Usage::
|
||||
|
||||
from framework.tools.worker_monitoring_tools import register_worker_monitoring_tools
|
||||
|
||||
register_worker_monitoring_tools(tool_registry, event_bus, storage_path)
|
||||
register_worker_monitoring_tools(tool_registry, storage_path)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -30,7 +24,6 @@ from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from framework.runner.tool_registry import ToolRegistry
|
||||
from framework.runtime.event_bus import EventBus
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -40,20 +33,16 @@ _DEFAULT_LAST_N_STEPS = 40
|
||||
|
||||
def register_worker_monitoring_tools(
|
||||
registry: ToolRegistry,
|
||||
event_bus: EventBus,
|
||||
storage_path: Path,
|
||||
stream_id: str = "monitoring",
|
||||
worker_graph_id: str | None = None,
|
||||
default_session_id: str | None = None,
|
||||
) -> int:
|
||||
"""Register worker monitoring tools bound to *event_bus* and *storage_path*.
|
||||
"""Register worker monitoring tools bound to *storage_path*.
|
||||
|
||||
Args:
|
||||
registry: ToolRegistry to register tools on.
|
||||
event_bus: The shared EventBus for the worker runtime.
|
||||
storage_path: Root storage path of the worker runtime
|
||||
(e.g. ``~/.hive/agents/{name}``).
|
||||
stream_id: Stream ID used when emitting events.
|
||||
worker_graph_id: The primary worker graph's ID. Included in health summary
|
||||
so the judge can populate ticket identity fields accurately.
|
||||
default_session_id: When set, ``get_worker_health_summary`` uses this
|
||||
@@ -242,168 +231,4 @@ def register_worker_monitoring_tools(
|
||||
)
|
||||
tools_registered += 1
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# emit_escalation_ticket
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
async def emit_escalation_ticket(ticket_json: str) -> str:
|
||||
"""Validate and publish an EscalationTicket to the shared EventBus.
|
||||
|
||||
ticket_json must be a JSON string containing all required EscalationTicket
|
||||
fields. The ticket is validated before publishing.
|
||||
|
||||
Returns a confirmation JSON with the ticket_id on success, or an error.
|
||||
"""
|
||||
from framework.runtime.escalation_ticket import EscalationTicket
|
||||
|
||||
try:
|
||||
raw = json.loads(ticket_json) if isinstance(ticket_json, str) else ticket_json
|
||||
ticket = EscalationTicket(**raw)
|
||||
except Exception as e:
|
||||
return json.dumps({"error": f"Invalid ticket: {e}"})
|
||||
|
||||
try:
|
||||
await event_bus.emit_worker_escalation_ticket(
|
||||
stream_id=stream_id,
|
||||
node_id="monitoring",
|
||||
ticket=ticket.model_dump(),
|
||||
)
|
||||
logger.info(
|
||||
"EscalationTicket emitted: ticket_id=%s severity=%s cause=%r",
|
||||
ticket.ticket_id,
|
||||
ticket.severity,
|
||||
ticket.cause[:80],
|
||||
)
|
||||
return json.dumps(
|
||||
{
|
||||
"status": "emitted",
|
||||
"ticket_id": ticket.ticket_id,
|
||||
"severity": ticket.severity,
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
return json.dumps({"error": f"Failed to emit ticket: {e}"})
|
||||
|
||||
_emit_ticket_tool = Tool(
|
||||
name="emit_escalation_ticket",
|
||||
description=(
|
||||
"Validate and publish a structured EscalationTicket to the shared EventBus. "
|
||||
"ticket_json must be a JSON string with all required EscalationTicket fields: "
|
||||
"worker_agent_id, worker_session_id, worker_node_id, worker_graph_id, "
|
||||
"severity (low/medium/high/critical), cause, judge_reasoning, suggested_action, "
|
||||
"recent_verdicts (list), total_steps_checked, steps_since_last_accept, "
|
||||
"stall_minutes (float or null), evidence_snippet."
|
||||
),
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"ticket_json": {
|
||||
"type": "string",
|
||||
"description": "JSON string of the complete EscalationTicket",
|
||||
},
|
||||
},
|
||||
"required": ["ticket_json"],
|
||||
},
|
||||
)
|
||||
registry.register(
|
||||
"emit_escalation_ticket",
|
||||
_emit_ticket_tool,
|
||||
lambda inputs: emit_escalation_ticket(**inputs),
|
||||
)
|
||||
tools_registered += 1
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# notify_operator
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
async def notify_operator(
|
||||
ticket_id: str,
|
||||
analysis: str,
|
||||
urgency: str,
|
||||
) -> str:
|
||||
"""Emit a QUEEN_INTERVENTION_REQUESTED event to notify the human operator.
|
||||
|
||||
The TUI subscribes to this event and surfaces a non-disruptive dismissable
|
||||
notification. The worker agent is NOT paused. The operator can choose to
|
||||
open the queen's graph view via Ctrl+Q.
|
||||
|
||||
Args:
|
||||
ticket_id: The ticket_id from the original EscalationTicket.
|
||||
analysis: 2-3 sentence description of what is wrong, why it matters,
|
||||
and what action is suggested.
|
||||
urgency: Severity level: "low", "medium", "high", or "critical".
|
||||
|
||||
Returns:
|
||||
Confirmation JSON.
|
||||
"""
|
||||
valid_urgencies = {"low", "medium", "high", "critical"}
|
||||
if urgency not in valid_urgencies:
|
||||
return json.dumps(
|
||||
{"error": f"urgency must be one of {sorted(valid_urgencies)}, got {urgency!r}"}
|
||||
)
|
||||
|
||||
try:
|
||||
await event_bus.emit_queen_intervention_requested(
|
||||
stream_id=stream_id,
|
||||
node_id="ticket_triage",
|
||||
ticket_id=ticket_id,
|
||||
analysis=analysis,
|
||||
severity=urgency,
|
||||
queen_graph_id="queen",
|
||||
queen_stream_id="queen",
|
||||
)
|
||||
logger.info(
|
||||
"Queen intervention requested: ticket_id=%s urgency=%s",
|
||||
ticket_id,
|
||||
urgency,
|
||||
)
|
||||
return json.dumps(
|
||||
{
|
||||
"status": "operator_notified",
|
||||
"ticket_id": ticket_id,
|
||||
"urgency": urgency,
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
return json.dumps({"error": f"Failed to notify operator: {e}"})
|
||||
|
||||
_notify_tool = Tool(
|
||||
name="notify_operator",
|
||||
description=(
|
||||
"Notify the human operator that a worker agent needs attention. "
|
||||
"This emits a QUEEN_INTERVENTION_REQUESTED event that the TUI surfaces "
|
||||
"as a non-disruptive notification. The worker keeps running. "
|
||||
"Only call this when you (the Queen) have decided the issue warrants "
|
||||
"human attention after reading the escalation ticket."
|
||||
),
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"ticket_id": {
|
||||
"type": "string",
|
||||
"description": "The ticket_id from the EscalationTicket being triaged",
|
||||
},
|
||||
"analysis": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"2-3 sentence analysis: what is wrong, why it matters, "
|
||||
"and what action you suggest."
|
||||
),
|
||||
},
|
||||
"urgency": {
|
||||
"type": "string",
|
||||
"enum": ["low", "medium", "high", "critical"],
|
||||
"description": "Severity level for the operator notification",
|
||||
},
|
||||
},
|
||||
"required": ["ticket_id", "analysis", "urgency"],
|
||||
},
|
||||
)
|
||||
registry.register(
|
||||
"notify_operator",
|
||||
_notify_tool,
|
||||
lambda inputs: notify_operator(**inputs),
|
||||
)
|
||||
tools_registered += 1
|
||||
|
||||
return tools_registered
|
||||
|
||||
@@ -4,8 +4,6 @@ import type {
|
||||
InjectResult,
|
||||
ChatResult,
|
||||
StopResult,
|
||||
ResumeResult,
|
||||
ReplayResult,
|
||||
GoalProgress,
|
||||
} from "./types";
|
||||
|
||||
@@ -34,16 +32,22 @@ export const executionApi = {
|
||||
graph_id: graphId,
|
||||
}),
|
||||
|
||||
chat: (sessionId: string, message: string, images?: { type: string; image_url: { url: string } }[]) =>
|
||||
api.post<ChatResult>(`/sessions/${sessionId}/chat`, { message, ...(images?.length ? { images } : {}) }),
|
||||
chat: (
|
||||
sessionId: string,
|
||||
message: string,
|
||||
images?: { type: string; image_url: { url: string } }[],
|
||||
displayMessage?: string,
|
||||
) =>
|
||||
api.post<ChatResult>(`/sessions/${sessionId}/chat`, {
|
||||
message,
|
||||
...(images?.length ? { images } : {}),
|
||||
...(displayMessage !== undefined ? { display_message: displayMessage } : {}),
|
||||
}),
|
||||
|
||||
/** Queue context for the queen without triggering an LLM response. */
|
||||
queenContext: (sessionId: string, message: string) =>
|
||||
api.post<ChatResult>(`/sessions/${sessionId}/queen-context`, { message }),
|
||||
|
||||
workerInput: (sessionId: string, message: string) =>
|
||||
api.post<ChatResult>(`/sessions/${sessionId}/worker-input`, { message }),
|
||||
|
||||
stop: (sessionId: string, executionId: string) =>
|
||||
api.post<StopResult>(`/sessions/${sessionId}/stop`, {
|
||||
execution_id: executionId,
|
||||
@@ -57,18 +61,6 @@ export const executionApi = {
|
||||
cancelQueen: (sessionId: string) =>
|
||||
api.post<{ cancelled: boolean }>(`/sessions/${sessionId}/cancel-queen`),
|
||||
|
||||
resume: (sessionId: string, workerSessionId: string, checkpointId?: string) =>
|
||||
api.post<ResumeResult>(`/sessions/${sessionId}/resume`, {
|
||||
session_id: workerSessionId,
|
||||
checkpoint_id: checkpointId,
|
||||
}),
|
||||
|
||||
replay: (sessionId: string, workerSessionId: string, checkpointId: string) =>
|
||||
api.post<ReplayResult>(`/sessions/${sessionId}/replay`, {
|
||||
session_id: workerSessionId,
|
||||
checkpoint_id: checkpointId,
|
||||
}),
|
||||
|
||||
goalProgress: (sessionId: string) =>
|
||||
api.get<GoalProgress>(`/sessions/${sessionId}/goal-progress`),
|
||||
};
|
||||
|
||||
@@ -3,16 +3,13 @@ import type {
|
||||
AgentEvent,
|
||||
LiveSession,
|
||||
LiveSessionDetail,
|
||||
SessionSummary,
|
||||
SessionDetail,
|
||||
Checkpoint,
|
||||
EntryPoint,
|
||||
} from "./types";
|
||||
|
||||
export const sessionsApi = {
|
||||
// --- Session lifecycle ---
|
||||
|
||||
/** Create a session. If agentPath is provided, loads worker in one step. */
|
||||
/** Create a session. If agentPath is provided, loads a graph in one step. */
|
||||
create: (agentPath?: string, agentId?: string, model?: string, initialPrompt?: string, queenResumeFrom?: string) =>
|
||||
api.post<LiveSession>("/sessions", {
|
||||
agent_path: agentPath,
|
||||
@@ -25,7 +22,7 @@ export const sessionsApi = {
|
||||
/** List all active sessions. */
|
||||
list: () => api.get<{ sessions: LiveSession[] }>("/sessions"),
|
||||
|
||||
/** Get session detail (includes entry_points, graphs when worker is loaded). */
|
||||
/** Get session detail (includes entry_points, graphs when a graph is loaded). */
|
||||
get: (sessionId: string) =>
|
||||
api.get<LiveSessionDetail>(`/sessions/${sessionId}`),
|
||||
|
||||
@@ -35,23 +32,23 @@ export const sessionsApi = {
|
||||
`/sessions/${sessionId}`,
|
||||
),
|
||||
|
||||
// --- Worker lifecycle ---
|
||||
// --- Graph lifecycle ---
|
||||
|
||||
loadWorker: (
|
||||
loadGraph: (
|
||||
sessionId: string,
|
||||
agentPath: string,
|
||||
workerId?: string,
|
||||
graphId?: string,
|
||||
model?: string,
|
||||
) =>
|
||||
api.post<LiveSession>(`/sessions/${sessionId}/worker`, {
|
||||
api.post<LiveSession>(`/sessions/${sessionId}/graph`, {
|
||||
agent_path: agentPath,
|
||||
worker_id: workerId,
|
||||
graph_id: graphId,
|
||||
model,
|
||||
}),
|
||||
|
||||
unloadWorker: (sessionId: string) =>
|
||||
api.delete<{ session_id: string; worker_unloaded: boolean }>(
|
||||
`/sessions/${sessionId}/worker`,
|
||||
unloadGraph: (sessionId: string) =>
|
||||
api.delete<{ session_id: string; graph_unloaded: boolean }>(
|
||||
`/sessions/${sessionId}/graph`,
|
||||
),
|
||||
|
||||
// --- Session info ---
|
||||
@@ -92,31 +89,4 @@ export const sessionsApi = {
|
||||
/** Permanently delete a history session (stops live session + removes disk files). */
|
||||
deleteHistory: (sessionId: string) =>
|
||||
api.delete<{ deleted: string }>(`/sessions/history/${sessionId}`),
|
||||
|
||||
// --- Worker session browsing (persisted execution runs) ---
|
||||
|
||||
workerSessions: (sessionId: string) =>
|
||||
api.get<{ sessions: SessionSummary[] }>(
|
||||
`/sessions/${sessionId}/worker-sessions`,
|
||||
),
|
||||
|
||||
workerSession: (sessionId: string, wsId: string) =>
|
||||
api.get<SessionDetail>(
|
||||
`/sessions/${sessionId}/worker-sessions/${wsId}`,
|
||||
),
|
||||
|
||||
deleteWorkerSession: (sessionId: string, wsId: string) =>
|
||||
api.delete<{ deleted: string }>(
|
||||
`/sessions/${sessionId}/worker-sessions/${wsId}`,
|
||||
),
|
||||
|
||||
checkpoints: (sessionId: string, wsId: string) =>
|
||||
api.get<{ checkpoints: Checkpoint[] }>(
|
||||
`/sessions/${sessionId}/worker-sessions/${wsId}/checkpoints`,
|
||||
),
|
||||
|
||||
restore: (sessionId: string, wsId: string, checkpointId: string) =>
|
||||
api.post<{ execution_id: string }>(
|
||||
`/sessions/${sessionId}/worker-sessions/${wsId}/checkpoints/${checkpointId}/restore`,
|
||||
),
|
||||
};
|
||||
|
||||
@@ -2,8 +2,8 @@
|
||||
|
||||
export interface LiveSession {
|
||||
session_id: string;
|
||||
worker_id: string | null;
|
||||
worker_name: string | null;
|
||||
graph_id: string | null;
|
||||
graph_name: string | null;
|
||||
has_worker: boolean;
|
||||
agent_path: string;
|
||||
description: string;
|
||||
@@ -79,61 +79,11 @@ export interface StopResult {
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export interface ResumeResult {
|
||||
execution_id: string;
|
||||
resumed_from: string;
|
||||
checkpoint_id: string | null;
|
||||
}
|
||||
|
||||
export interface ReplayResult {
|
||||
execution_id: string;
|
||||
replayed_from: string;
|
||||
checkpoint_id: string;
|
||||
}
|
||||
|
||||
export interface GoalProgress {
|
||||
progress: number;
|
||||
criteria: unknown[];
|
||||
}
|
||||
|
||||
// --- Session types ---
|
||||
|
||||
export interface SessionSummary {
|
||||
session_id: string;
|
||||
status?: string;
|
||||
started_at?: string | null;
|
||||
completed_at?: string | null;
|
||||
steps?: number;
|
||||
paused_at?: string | null;
|
||||
checkpoint_count: number;
|
||||
}
|
||||
|
||||
export interface SessionDetail {
|
||||
status: string;
|
||||
started_at: string;
|
||||
completed_at: string | null;
|
||||
input_data: Record<string, unknown>;
|
||||
memory: Record<string, unknown>;
|
||||
progress: {
|
||||
current_node: string | null;
|
||||
paused_at: string | null;
|
||||
steps_executed: number;
|
||||
path: string[];
|
||||
node_visit_counts: Record<string, number>;
|
||||
nodes_with_failures: string[];
|
||||
resume_from?: string;
|
||||
};
|
||||
}
|
||||
|
||||
export interface Checkpoint {
|
||||
checkpoint_id: string;
|
||||
current_node: string | null;
|
||||
next_node: string | null;
|
||||
is_clean: boolean;
|
||||
timestamp: string | null;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export interface Message {
|
||||
seq: number;
|
||||
role: string;
|
||||
@@ -161,6 +111,7 @@ export interface NodeSpec {
|
||||
routes: Record<string, string>;
|
||||
max_retries: number;
|
||||
max_node_visits: number;
|
||||
/** Deprecated compatibility field; the queen is interactive by identity now. */
|
||||
client_facing: boolean;
|
||||
success_criteria: string | null;
|
||||
system_prompt: string;
|
||||
@@ -330,7 +281,7 @@ export type EventTypeName =
|
||||
| "webhook_received"
|
||||
| "custom"
|
||||
| "escalation_requested"
|
||||
| "worker_loaded"
|
||||
| "worker_graph_loaded"
|
||||
| "credentials_required"
|
||||
| "queen_phase_changed"
|
||||
| "subagent_report"
|
||||
|
||||
@@ -0,0 +1,69 @@
|
||||
import { useState, useEffect } from "react";
|
||||
|
||||
type BridgeStatus = "checking" | "connected" | "disconnected" | "offline";
|
||||
|
||||
const BRIDGE_STATUS_URL = "/api/browser/status";
|
||||
const POLL_INTERVAL_MS = 3000;
|
||||
|
||||
export default function BrowserStatusBadge() {
|
||||
const [status, setStatus] = useState<BridgeStatus>("checking");
|
||||
|
||||
useEffect(() => {
|
||||
let cancelled = false;
|
||||
|
||||
const check = async () => {
|
||||
try {
|
||||
const res = await fetch(BRIDGE_STATUS_URL, {
|
||||
signal: AbortSignal.timeout(2000),
|
||||
});
|
||||
if (cancelled) return;
|
||||
if (res.ok) {
|
||||
const data = await res.json();
|
||||
setStatus(data.connected ? "connected" : "disconnected");
|
||||
} else {
|
||||
setStatus("offline");
|
||||
}
|
||||
} catch {
|
||||
if (!cancelled) setStatus("offline");
|
||||
}
|
||||
};
|
||||
|
||||
check();
|
||||
const timer = setInterval(check, POLL_INTERVAL_MS);
|
||||
return () => {
|
||||
cancelled = true;
|
||||
clearInterval(timer);
|
||||
};
|
||||
}, []);
|
||||
|
||||
if (status === "checking") return null;
|
||||
|
||||
const label =
|
||||
status === "connected"
|
||||
? "Browser connected"
|
||||
: status === "disconnected"
|
||||
? "Extension not connected"
|
||||
: "Browser offline";
|
||||
|
||||
const dotClass =
|
||||
status === "connected"
|
||||
? "bg-green-500"
|
||||
: status === "disconnected"
|
||||
? "bg-yellow-500"
|
||||
: "bg-muted-foreground/40";
|
||||
|
||||
return (
|
||||
<div
|
||||
className="flex items-center gap-1.5 text-xs select-none"
|
||||
title={label}
|
||||
>
|
||||
<span className="relative flex h-2 w-2 flex-shrink-0">
|
||||
{status === "connected" && (
|
||||
<span className="animate-ping absolute inline-flex h-full w-full rounded-full bg-green-400 opacity-60" />
|
||||
)}
|
||||
<span className={`relative inline-flex rounded-full h-2 w-2 ${dotClass}`} />
|
||||
</span>
|
||||
<span className="text-muted-foreground hidden sm:inline">Browser</span>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -3,6 +3,7 @@ import { useNavigate } from "react-router-dom";
|
||||
import { Crown, X } from "lucide-react";
|
||||
import { sessionsApi } from "@/api/sessions";
|
||||
import { loadPersistedTabs, savePersistedTabs, TAB_STORAGE_KEY, type PersistedTabState } from "@/lib/tab-persistence";
|
||||
import BrowserStatusBadge from "@/components/BrowserStatusBadge";
|
||||
|
||||
export interface TopBarTab {
|
||||
agentType: string;
|
||||
@@ -129,11 +130,14 @@ export default function TopBar({ tabs: tabsProp, onTabClick, onCloseTab, canClos
|
||||
)}
|
||||
</div>
|
||||
|
||||
{children && (
|
||||
<div className="flex items-center gap-1 flex-shrink-0">
|
||||
{children}
|
||||
</div>
|
||||
)}
|
||||
<div className="flex items-center gap-3 flex-shrink-0">
|
||||
<BrowserStatusBadge />
|
||||
{children && (
|
||||
<div className="flex items-center gap-1">
|
||||
{children}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -78,8 +78,7 @@ export function sseEventToChatMessage(
|
||||
}
|
||||
|
||||
case "client_input_requested":
|
||||
// Handled explicitly in handleSSEEvent (workspace.tsx) so it can
|
||||
// create a worker_input_request message and set awaitingInput state.
|
||||
// Handled explicitly in handleSSEEvent (workspace.tsx) for queen input widgets.
|
||||
return null;
|
||||
|
||||
case "client_input_received": {
|
||||
|
||||
@@ -350,8 +350,8 @@ interface AgentBackendState {
|
||||
pendingOptions: string[] | null;
|
||||
/** Multiple questions from ask_user_multiple */
|
||||
pendingQuestions: { id: string; prompt: string; options?: string[] }[] | null;
|
||||
/** Whether the pending question came from queen or worker */
|
||||
pendingQuestionSource: "queen" | "worker" | null;
|
||||
/** Whether the pending question came from the queen interaction flow */
|
||||
pendingQuestionSource: "queen" | null;
|
||||
/** Per-node context window usage (from context_usage_updated events) */
|
||||
contextUsage: Record<string, { usagePct: number; messageCount: number; estimatedTokens: number; maxTokens: number }>;
|
||||
/** Whether the queen's LLM supports image content — false disables the attach button */
|
||||
@@ -1118,7 +1118,7 @@ export default function Workspace() {
|
||||
// At this point liveSession is guaranteed set — if both reconnect and create
|
||||
// failed, the throw inside the catch exits the outer try block.
|
||||
const session = liveSession!;
|
||||
const displayName = formatAgentDisplayName(session.worker_name || agentType);
|
||||
const displayName = formatAgentDisplayName(session.graph_name || agentType);
|
||||
const initialPhase = restoredPhase || session.queen_phase || (session.has_worker ? "staging" : "planning");
|
||||
queenPhaseRef.current[agentType] = initialPhase;
|
||||
updateAgentState(agentType, {
|
||||
@@ -1156,7 +1156,6 @@ export default function Workspace() {
|
||||
});
|
||||
|
||||
// Restore messages when rejoining an existing session OR cold-restoring from disk.
|
||||
let isWorkerRunning = false;
|
||||
const restoredMsgs: ChatMessage[] = [];
|
||||
// For cold-restore, use the old session ID. For live resume, use current session.
|
||||
const historyId = coldRestoreId ?? (isResumedSession ? session.session_id : undefined);
|
||||
@@ -1172,17 +1171,6 @@ export default function Workspace() {
|
||||
restoredFlowchartMap = restored.flowchartMap;
|
||||
restoredOriginalDraft = restored.originalDraft;
|
||||
}
|
||||
|
||||
// Check worker status (needed for isWorkerRunning flag)
|
||||
try {
|
||||
const { sessions: workerSessions } = await sessionsApi.workerSessions(historyId);
|
||||
const resumable = workerSessions.find(
|
||||
(s) => s.status === "active" || s.status === "paused",
|
||||
);
|
||||
isWorkerRunning = resumable?.status === "active";
|
||||
} catch {
|
||||
// Worker session listing failed — not critical
|
||||
}
|
||||
}
|
||||
|
||||
// Merge messages in chronological order (only for live resume; cold restore
|
||||
@@ -1213,7 +1201,6 @@ export default function Workspace() {
|
||||
ready: true,
|
||||
loading: false,
|
||||
queenReady: !!(isResumedSession || hasRestoredContent),
|
||||
...(isWorkerRunning ? { workerRunState: "running" } : {}),
|
||||
// Restore flowchart overlay from persisted events
|
||||
...(restoredFlowchartMap ? { flowchartMap: restoredFlowchartMap } : {}),
|
||||
...(restoredOriginalDraft ? { originalDraft: restoredOriginalDraft, draftGraph: null } : {}),
|
||||
@@ -1784,27 +1771,8 @@ export default function Workspace() {
|
||||
: null;
|
||||
if (isQueen) {
|
||||
const prompt = (event.data?.prompt as string) || "";
|
||||
const isAutoBlock = !prompt && !options && !questions;
|
||||
// Queen auto-block (empty prompt, no options) should not
|
||||
// overwrite a pending worker question — the worker's
|
||||
// QuestionWidget must stay visible. Use the updater form
|
||||
// to read the latest state and avoid stale-closure races
|
||||
// when worker and queen events arrive in the same batch.
|
||||
setAgentStates(prev => {
|
||||
const cur = prev[agentType] || defaultAgentState();
|
||||
const workerQuestionActive = cur.pendingQuestionSource === "worker";
|
||||
if (isAutoBlock && workerQuestionActive) {
|
||||
return {
|
||||
...prev, [agentType]: {
|
||||
...cur,
|
||||
awaitingInput: true,
|
||||
isTyping: false,
|
||||
isStreaming: false,
|
||||
queenIsTyping: false,
|
||||
queenBuilding: false,
|
||||
}
|
||||
};
|
||||
}
|
||||
return {
|
||||
...prev, [agentType]: {
|
||||
...cur,
|
||||
@@ -1821,37 +1789,11 @@ export default function Workspace() {
|
||||
};
|
||||
});
|
||||
} else {
|
||||
// Worker input request.
|
||||
// If the prompt is non-empty (explicit ask_user), create a visible
|
||||
// message bubble. For auto-block (empty prompt), the worker's text
|
||||
// was already streamed via client_output_delta — just activate the
|
||||
// reply box below the last worker message.
|
||||
const eid = event.execution_id ?? "";
|
||||
const prompt = (event.data?.prompt as string) || "";
|
||||
if (prompt) {
|
||||
const workerInputMsg: ChatMessage = {
|
||||
id: `worker-input-${eid}-${event.node_id || Date.now()}`,
|
||||
agent: displayName || event.node_id || "Worker",
|
||||
agentColor: "",
|
||||
content: prompt,
|
||||
timestamp: "",
|
||||
type: "worker_input_request",
|
||||
role: "worker",
|
||||
thread: agentType,
|
||||
createdAt: eventCreatedAt,
|
||||
};
|
||||
console.log('[CLIENT_INPUT_REQ] creating worker_input_request msg:', workerInputMsg.id, 'content:', prompt.slice(0, 80));
|
||||
upsertChatMessage(agentType, workerInputMsg);
|
||||
}
|
||||
updateAgentState(agentType, {
|
||||
awaitingInput: true,
|
||||
isTyping: false,
|
||||
isStreaming: false,
|
||||
queenIsTyping: false,
|
||||
pendingQuestion: prompt || null,
|
||||
pendingOptions: options,
|
||||
pendingQuestionSource: "worker",
|
||||
});
|
||||
console.warn(
|
||||
"[CLIENT_INPUT_REQ] ignoring non-queen client_input_requested event",
|
||||
streamId,
|
||||
event.node_id,
|
||||
);
|
||||
}
|
||||
}
|
||||
if (event.type === "execution_paused") {
|
||||
@@ -2305,10 +2247,10 @@ export default function Workspace() {
|
||||
break;
|
||||
}
|
||||
|
||||
case "worker_loaded": {
|
||||
const workerName = event.data?.worker_name as string | undefined;
|
||||
case "worker_graph_loaded": {
|
||||
const graphName = event.data?.graph_name as string | undefined;
|
||||
const agentPathFromEvent = event.data?.agent_path as string | undefined;
|
||||
const displayName = formatAgentDisplayName(workerName || baseAgentType(agentType));
|
||||
const displayName = formatAgentDisplayName(graphName || baseAgentType(agentType));
|
||||
|
||||
// Invalidate cached credential requirements so the modal fetches
|
||||
// fresh data the next time it opens (the new agent may have
|
||||
@@ -2641,41 +2583,6 @@ export default function Workspace() {
|
||||
return;
|
||||
}
|
||||
|
||||
// If worker is awaiting free-text input (no options / no QuestionWidget),
|
||||
// route the message directly to the worker instead of the queen.
|
||||
if (agentStates[activeWorker]?.awaitingInput && agentStates[activeWorker]?.pendingQuestionSource === "worker" && !agentStates[activeWorker]?.pendingOptions) {
|
||||
const state = agentStates[activeWorker];
|
||||
if (state?.sessionId && state?.ready) {
|
||||
const userMsg: ChatMessage = {
|
||||
id: makeId(), agent: "You", agentColor: "",
|
||||
content: text, timestamp: "", type: "user", thread, createdAt: Date.now(),
|
||||
};
|
||||
setSessionsByAgent(prev => ({
|
||||
...prev,
|
||||
[activeWorker]: prev[activeWorker].map(s =>
|
||||
s.id === activeSession.id ? { ...s, messages: [...s.messages, userMsg] } : s
|
||||
),
|
||||
}));
|
||||
updateAgentState(activeWorker, { awaitingInput: false, workerInputMessageId: null, isTyping: true, pendingQuestion: null, pendingOptions: null, pendingQuestions: null, pendingQuestionSource: null });
|
||||
executionApi.workerInput(state.sessionId, text).catch((err: unknown) => {
|
||||
const errMsg = err instanceof Error ? err.message : String(err);
|
||||
const errorChatMsg: ChatMessage = {
|
||||
id: makeId(), agent: "System", agentColor: "",
|
||||
content: `Failed to send to worker: ${errMsg}`,
|
||||
timestamp: "", type: "system", thread, createdAt: Date.now(),
|
||||
};
|
||||
setSessionsByAgent(prev => ({
|
||||
...prev,
|
||||
[activeWorker]: prev[activeWorker].map(s =>
|
||||
s.id === activeSession.id ? { ...s, messages: [...s.messages, errorChatMsg] } : s
|
||||
),
|
||||
}));
|
||||
updateAgentState(activeWorker, { isTyping: false, isStreaming: false });
|
||||
});
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// If queen has a pending question widget, dismiss it when user types directly
|
||||
if (agentStates[activeWorker]?.pendingQuestionSource === "queen") {
|
||||
updateAgentState(activeWorker, { pendingQuestion: null, pendingOptions: null, pendingQuestions: null, pendingQuestionSource: null });
|
||||
@@ -2727,96 +2634,6 @@ export default function Workspace() {
|
||||
}
|
||||
}, [activeWorker, activeSession, agentStates, updateAgentState]);
|
||||
|
||||
// --- handleWorkerReply: send user input to the worker via dedicated endpoint ---
|
||||
const handleWorkerReply = useCallback((text: string) => {
|
||||
if (!activeSession) return;
|
||||
const state = agentStates[activeWorker];
|
||||
if (!state?.sessionId || !state?.ready) return;
|
||||
|
||||
// Add user reply to chat thread
|
||||
const userMsg: ChatMessage = {
|
||||
id: makeId(), agent: "You", agentColor: "",
|
||||
content: text, timestamp: "", type: "user", thread: activeWorker, createdAt: Date.now(),
|
||||
};
|
||||
setSessionsByAgent(prev => ({
|
||||
...prev,
|
||||
[activeWorker]: prev[activeWorker].map(s =>
|
||||
s.id === activeSession.id ? { ...s, messages: [...s.messages, userMsg] } : s
|
||||
),
|
||||
}));
|
||||
|
||||
// Clear awaiting state optimistically
|
||||
updateAgentState(activeWorker, { awaitingInput: false, workerInputMessageId: null, isTyping: true, pendingQuestion: null, pendingOptions: null, pendingQuestions: null, pendingQuestionSource: null });
|
||||
|
||||
executionApi.workerInput(state.sessionId, text).catch((err: unknown) => {
|
||||
const errMsg = err instanceof Error ? err.message : String(err);
|
||||
const errorChatMsg: ChatMessage = {
|
||||
id: makeId(), agent: "System", agentColor: "",
|
||||
content: `Failed to send to worker: ${errMsg}`,
|
||||
timestamp: "", type: "system", thread: activeWorker, createdAt: Date.now(),
|
||||
};
|
||||
setSessionsByAgent(prev => ({
|
||||
...prev,
|
||||
[activeWorker]: prev[activeWorker].map(s =>
|
||||
s.id === activeSession.id ? { ...s, messages: [...s.messages, errorChatMsg] } : s
|
||||
),
|
||||
}));
|
||||
updateAgentState(activeWorker, { isTyping: false, isStreaming: false });
|
||||
});
|
||||
}, [activeWorker, activeSession, agentStates, updateAgentState]);
|
||||
|
||||
// --- handleWorkerQuestionAnswer: route predefined answers direct to worker, "Other" through queen ---
|
||||
const handleWorkerQuestionAnswer = useCallback((answer: string, isOther: boolean) => {
|
||||
if (!activeSession) return;
|
||||
const state = agentStates[activeWorker];
|
||||
const question = state?.pendingQuestion || "";
|
||||
const opts = state?.pendingOptions;
|
||||
|
||||
if (isOther) {
|
||||
// "Other" free-text → route through queen for evaluation
|
||||
updateAgentState(activeWorker, { pendingQuestion: null, pendingOptions: null, pendingQuestions: null, pendingQuestionSource: null });
|
||||
if (question && opts && state?.sessionId && state?.ready) {
|
||||
const formatted = `[Worker asked: "${question}" | Options: ${opts.join(", ")}]\nUser answered: "${answer}"`;
|
||||
const userMsg: ChatMessage = {
|
||||
id: makeId(), agent: "You", agentColor: "",
|
||||
content: answer, timestamp: "", type: "user", thread: activeWorker, createdAt: Date.now(),
|
||||
};
|
||||
setSessionsByAgent(prev => ({
|
||||
...prev,
|
||||
[activeWorker]: prev[activeWorker].map(s =>
|
||||
s.id === activeSession.id ? { ...s, messages: [...s.messages, userMsg] } : s
|
||||
),
|
||||
}));
|
||||
updateAgentState(activeWorker, { isTyping: true, queenIsTyping: true });
|
||||
executionApi.chat(state.sessionId, formatted).catch((err: unknown) => {
|
||||
const errMsg = err instanceof Error ? err.message : String(err);
|
||||
const errorChatMsg: ChatMessage = {
|
||||
id: makeId(), agent: "System", agentColor: "",
|
||||
content: `Failed to send message: ${errMsg}`,
|
||||
timestamp: "", type: "system", thread: activeWorker, createdAt: Date.now(),
|
||||
};
|
||||
setSessionsByAgent(prev => ({
|
||||
...prev,
|
||||
[activeWorker]: prev[activeWorker].map(s =>
|
||||
s.id === activeSession.id ? { ...s, messages: [...s.messages, errorChatMsg] } : s
|
||||
),
|
||||
}));
|
||||
updateAgentState(activeWorker, { isTyping: false, isStreaming: false, queenIsTyping: false });
|
||||
});
|
||||
} else {
|
||||
handleSend(answer, activeWorker);
|
||||
}
|
||||
} else {
|
||||
// Predefined option → send directly to worker
|
||||
handleWorkerReply(answer);
|
||||
// Queue context for queen (fire-and-forget, no LLM response triggered)
|
||||
if (question && state?.sessionId && state?.ready) {
|
||||
const notification = `[Worker asked: "${question}" | User selected: "${answer}"]`;
|
||||
executionApi.queenContext(state.sessionId, notification).catch(() => { });
|
||||
}
|
||||
}
|
||||
}, [activeWorker, activeSession, agentStates, handleWorkerReply, handleSend, updateAgentState, setSessionsByAgent]);
|
||||
|
||||
// --- handleQueenQuestionAnswer: submit queen's own question answer via /chat ---
|
||||
// The queen asked the question herself, so she already has context — just send the raw answer.
|
||||
const handleQueenQuestionAnswer = useCallback((answer: string, _isOther: boolean) => {
|
||||
@@ -2838,11 +2655,9 @@ export default function Workspace() {
|
||||
}, [activeWorker, handleSend, updateAgentState]);
|
||||
|
||||
// --- handleQuestionDismiss: user closed the question widget without answering ---
|
||||
// Injects a dismiss signal so the blocked node can continue.
|
||||
const handleQuestionDismiss = useCallback(() => {
|
||||
const state = agentStates[activeWorker];
|
||||
if (!state?.sessionId) return;
|
||||
const source = state.pendingQuestionSource;
|
||||
const question = state.pendingQuestion || "";
|
||||
|
||||
// Clear UI state immediately
|
||||
@@ -2854,13 +2669,8 @@ export default function Workspace() {
|
||||
awaitingInput: false,
|
||||
});
|
||||
|
||||
// Unblock the waiting node with a dismiss signal
|
||||
const dismissMsg = `[User dismissed the question: "${question}"]`;
|
||||
if (source === "worker") {
|
||||
executionApi.workerInput(state.sessionId, dismissMsg).catch(() => { });
|
||||
} else {
|
||||
executionApi.chat(state.sessionId, dismissMsg).catch(() => { });
|
||||
}
|
||||
executionApi.chat(state.sessionId, dismissMsg).catch(() => { });
|
||||
}, [agentStates, activeWorker, updateAgentState]);
|
||||
|
||||
const handleLoadAgent = useCallback(async (agentPath: string) => {
|
||||
@@ -2868,8 +2678,8 @@ export default function Workspace() {
|
||||
if (!state?.sessionId) return;
|
||||
|
||||
try {
|
||||
await sessionsApi.loadWorker(state.sessionId, agentPath);
|
||||
// Success: worker_loaded SSE event will handle UI updates automatically
|
||||
await sessionsApi.loadGraph(state.sessionId, agentPath);
|
||||
// Success: worker_graph_loaded SSE event will handle UI updates automatically
|
||||
} catch (err) {
|
||||
// 424 = credentials required — open the credentials modal
|
||||
if (err instanceof ApiError && err.status === 424) {
|
||||
@@ -3232,11 +3042,7 @@ export default function Workspace() {
|
||||
pendingQuestion={activeAgentState?.awaitingInput ? activeAgentState.pendingQuestion : null}
|
||||
pendingOptions={activeAgentState?.awaitingInput ? activeAgentState.pendingOptions : null}
|
||||
pendingQuestions={activeAgentState?.awaitingInput ? activeAgentState.pendingQuestions : null}
|
||||
onQuestionSubmit={
|
||||
activeAgentState?.pendingQuestionSource === "queen"
|
||||
? handleQueenQuestionAnswer
|
||||
: handleWorkerQuestionAnswer
|
||||
}
|
||||
onQuestionSubmit={handleQueenQuestionAnswer}
|
||||
onMultiQuestionSubmit={handleMultiQuestionAnswer}
|
||||
onQuestionDismiss={handleQuestionDismiss}
|
||||
contextUsage={activeAgentState?.contextUsage}
|
||||
|
||||
@@ -14,6 +14,7 @@ The script detects available credentials and prompts you to pick a provider. You
|
||||
- `ANTHROPIC_API_KEY`
|
||||
- `OPENAI_API_KEY`
|
||||
- `GEMINI_API_KEY`
|
||||
- `KIMI_API_KEY`
|
||||
- `ZAI_API_KEY`
|
||||
- Claude Code / Codex / Kimi subscription
|
||||
|
||||
@@ -35,7 +36,7 @@ uv run python tests/dummy_agents/run_all.py --verbose
|
||||
| parallel_merge | 4 | Fan-out/fan-in, failure strategies |
|
||||
| retry | 4 | Retry mechanics, exhaustion, ON_FAILURE edges |
|
||||
| feedback_loop | 3 | Feedback cycles, max_node_visits |
|
||||
| worker | 4 | Real MCP tools (example_tool, get_current_time, save_data/load_data) |
|
||||
| worker | 5 | Real MCP tools plus a two-worker artifact round-trip smoke test |
|
||||
|
||||
## Notes
|
||||
|
||||
|
||||
@@ -6,6 +6,9 @@ Run via: cd core && uv run python tests/dummy_agents/run_all.py
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
@@ -21,6 +24,7 @@ _selected_model: str | None = None
|
||||
_selected_api_key: str | None = None
|
||||
_selected_extra_headers: dict[str, str] | None = None
|
||||
_selected_api_base: str | None = None
|
||||
_EXECUTION_TIMEOUT_SECS = float(os.environ.get("DUMMY_AGENT_EXEC_TIMEOUT_SECS", "90"))
|
||||
|
||||
|
||||
def set_llm_selection(
|
||||
@@ -40,18 +44,55 @@ def set_llm_selection(
|
||||
# ── collection hook: skip entire directory when not configured ───────
|
||||
|
||||
|
||||
def _try_auto_configure_from_hive_config() -> bool:
|
||||
"""Try to load LLM provider from ~/.hive/configuration.json.
|
||||
|
||||
Returns True if successfully configured, False otherwise.
|
||||
"""
|
||||
try:
|
||||
from framework.config import (
|
||||
get_api_base,
|
||||
get_api_key,
|
||||
get_llm_extra_kwargs,
|
||||
get_preferred_model,
|
||||
)
|
||||
|
||||
model = get_preferred_model()
|
||||
api_key = get_api_key()
|
||||
if not model or not api_key:
|
||||
return False
|
||||
|
||||
extra_kwargs = get_llm_extra_kwargs()
|
||||
set_llm_selection(
|
||||
model=model,
|
||||
api_key=api_key,
|
||||
api_base=get_api_base(),
|
||||
extra_headers=extra_kwargs.get("extra_headers"),
|
||||
)
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def pytest_collection_modifyitems(config, items):
|
||||
"""Skip all dummy_agents tests when no LLM is configured.
|
||||
|
||||
This prevents these tests from running in regular CI. They only run
|
||||
when launched via run_all.py (which calls set_llm_selection first).
|
||||
Resolution order:
|
||||
1. Already configured via run_all.py (set_llm_selection called)
|
||||
2. Auto-configure from ~/.hive/configuration.json
|
||||
3. Skip tests
|
||||
"""
|
||||
if _selected_model is not None:
|
||||
return # LLM configured, run normally
|
||||
return # LLM configured via run_all.py, run normally
|
||||
|
||||
# Try auto-configure from hive config
|
||||
if _try_auto_configure_from_hive_config():
|
||||
return # Config found, run tests
|
||||
|
||||
skip = pytest.mark.skip(
|
||||
reason="Dummy agent tests require a real LLM. "
|
||||
"Run via: cd core && uv run python tests/dummy_agents/run_all.py"
|
||||
"Configure ~/.hive/configuration.json or "
|
||||
"run via: cd core && uv run python tests/dummy_agents/run_all.py"
|
||||
)
|
||||
for item in items:
|
||||
if "dummy_agents" in str(item.fspath):
|
||||
@@ -120,6 +161,8 @@ def make_executor(
|
||||
loop_config: dict | None = None,
|
||||
tool_registry=None,
|
||||
storage_path: Path | None = None,
|
||||
event_bus=None,
|
||||
stream_id: str = "",
|
||||
) -> GraphExecutor:
|
||||
"""Factory that creates a GraphExecutor with a real LLM."""
|
||||
tools = []
|
||||
@@ -128,7 +171,7 @@ def make_executor(
|
||||
tools = list(tool_registry.get_tools().values())
|
||||
tool_executor = tool_registry.get_executor()
|
||||
|
||||
return GraphExecutor(
|
||||
executor = GraphExecutor(
|
||||
runtime=runtime,
|
||||
llm=llm,
|
||||
tools=tools,
|
||||
@@ -137,4 +180,183 @@ def make_executor(
|
||||
parallel_config=parallel_config,
|
||||
loop_config=loop_config or {"max_iterations": 10},
|
||||
storage_path=storage_path,
|
||||
event_bus=event_bus,
|
||||
stream_id=stream_id,
|
||||
)
|
||||
|
||||
original_execute = executor.execute
|
||||
|
||||
async def execute_with_timeout(*args, **kwargs):
|
||||
try:
|
||||
return await asyncio.wait_for(
|
||||
original_execute(*args, **kwargs),
|
||||
timeout=_EXECUTION_TIMEOUT_SECS,
|
||||
)
|
||||
except TimeoutError as e:
|
||||
raise TimeoutError(
|
||||
"Dummy agent execution timed out after "
|
||||
f"{_EXECUTION_TIMEOUT_SECS:.0f}s. "
|
||||
"This usually means the current worker execution path "
|
||||
"(GraphExecutor -> WorkerAgent -> EventLoopNode) is stuck "
|
||||
"waiting on the provider or tool-calling behavior."
|
||||
) from e
|
||||
|
||||
executor.execute = execute_with_timeout # type: ignore[method-assign]
|
||||
return executor
|
||||
|
||||
|
||||
# ── Artifact capture: raw output written to disk for every test ──────
|
||||
|
||||
ARTIFACTS_DIR = Path("/tmp/hive_test_artifacts")
|
||||
|
||||
|
||||
class TestArtifact:
|
||||
"""Collects raw output + expected behavior for a single test.
|
||||
|
||||
Captures TWO kinds of data:
|
||||
1. Checks: individual assertion results (expected vs actual)
|
||||
2. Framework raw output: the real conversation, state, tool calls
|
||||
written by the executor to storage_path — copied verbatim,
|
||||
not curated.
|
||||
|
||||
Usage in tests:
|
||||
def test_foo(artifact, ...):
|
||||
result = await executor.execute(...)
|
||||
artifact.record(result, expected="...", storage_path=tmp_path/"session")
|
||||
"""
|
||||
|
||||
def __init__(self, test_id: str):
|
||||
self.test_id = test_id
|
||||
self._safe_name = test_id.replace("::", "__").replace("/", "_")
|
||||
self._dir = ARTIFACTS_DIR / self._safe_name
|
||||
self._data: dict = {"test_id": test_id, "raw_output": None, "expected": "", "checks": []}
|
||||
|
||||
def record(self, result, *, expected: str = "", storage_path=None):
|
||||
"""Record an ExecutionResult and copy real framework files."""
|
||||
self._data["expected"] = expected
|
||||
if result is None:
|
||||
self._data["raw_output"] = None
|
||||
return
|
||||
self._data["raw_output"] = {
|
||||
"success": getattr(result, "success", None),
|
||||
"output": _safe_serialize(getattr(result, "output", {})),
|
||||
"error": getattr(result, "error", None),
|
||||
"path": getattr(result, "path", []),
|
||||
"steps_executed": getattr(result, "steps_executed", 0),
|
||||
"total_tokens": getattr(result, "total_tokens", 0),
|
||||
"total_latency_ms": getattr(result, "total_latency_ms", 0),
|
||||
"execution_quality": getattr(result, "execution_quality", ""),
|
||||
"total_retries": getattr(result, "total_retries", 0),
|
||||
"node_visit_counts": getattr(result, "node_visit_counts", {}),
|
||||
"nodes_with_failures": getattr(result, "nodes_with_failures", []),
|
||||
"session_state_buffer": _safe_serialize(
|
||||
(getattr(result, "session_state", {}) or {}).get("data_buffer", {})
|
||||
),
|
||||
}
|
||||
# Copy real framework output files (conversations, state, runs)
|
||||
if storage_path is not None:
|
||||
self._copy_framework_files(Path(storage_path))
|
||||
|
||||
def _copy_framework_files(self, storage_path: Path):
|
||||
"""Copy real framework output to persistent artifact directory."""
|
||||
import shutil
|
||||
|
||||
raw_dir = self._dir / "raw"
|
||||
raw_dir.mkdir(parents=True, exist_ok=True)
|
||||
if storage_path.exists():
|
||||
for src in storage_path.rglob("*"):
|
||||
if src.is_file() and src.suffix in (".json", ".jsonl", ".txt"):
|
||||
rel = src.relative_to(storage_path)
|
||||
dst = raw_dir / rel
|
||||
dst.parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy2(src, dst)
|
||||
|
||||
def record_value(self, key: str, value, *, expected: str = ""):
|
||||
"""Record an arbitrary key-value (for non-ExecutionResult tests)."""
|
||||
self._data.setdefault("values", {})[key] = _safe_serialize(value)
|
||||
if expected:
|
||||
self._data["expected"] = expected
|
||||
|
||||
def check(self, description: str, passed: bool, actual: str = "", expected_val: str = ""):
|
||||
"""Record an individual assertion check."""
|
||||
self._data["checks"].append({
|
||||
"description": description,
|
||||
"passed": passed,
|
||||
"actual": actual,
|
||||
"expected": expected_val,
|
||||
})
|
||||
|
||||
def save(self):
|
||||
"""Write artifact to disk."""
|
||||
self._dir.mkdir(parents=True, exist_ok=True)
|
||||
path = self._dir / "artifact.json"
|
||||
with open(path, "w") as f:
|
||||
json.dump(self._data, f, indent=2, default=str)
|
||||
|
||||
|
||||
def _safe_serialize(obj):
|
||||
"""Convert to JSON-safe types."""
|
||||
if obj is None:
|
||||
return None
|
||||
if isinstance(obj, (str, int, float, bool)):
|
||||
return obj
|
||||
if isinstance(obj, dict):
|
||||
return {str(k): _safe_serialize(v) for k, v in obj.items()}
|
||||
if isinstance(obj, (list, tuple)):
|
||||
return [_safe_serialize(v) for v in obj]
|
||||
return str(obj)[:500]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def artifact(request, tmp_path):
|
||||
"""Fixture that captures raw test output to disk.
|
||||
|
||||
Every test gets an artifact recorder. Call artifact.record(result)
|
||||
and artifact.check("description", passed, actual, expected) to
|
||||
capture data. Saved automatically on teardown.
|
||||
|
||||
On teardown, copies ALL framework output files (conversations, state,
|
||||
tool logs) from tmp_path to the persistent artifact directory. This
|
||||
captures the REAL raw output — not curated summaries.
|
||||
"""
|
||||
test_id = request.node.nodeid
|
||||
art = TestArtifact(test_id)
|
||||
yield art
|
||||
# Copy all framework files from the test's tmp_path
|
||||
art._copy_framework_files(tmp_path)
|
||||
art.save()
|
||||
|
||||
|
||||
# Autouse hook: for tests that DON'T use the artifact fixture,
|
||||
# create a minimal artifact from pass/fail status.
|
||||
@pytest.hookimpl(tryfirst=True, hookwrapper=True)
|
||||
def pytest_runtest_makereport(item, call):
|
||||
outcome = yield
|
||||
rep = outcome.get_result()
|
||||
if rep.when == "call":
|
||||
item._test_report = rep
|
||||
|
||||
|
||||
def pytest_runtest_teardown(item, nextitem):
|
||||
"""Auto-save a minimal artifact for tests that didn't use the fixture."""
|
||||
report = getattr(item, "_test_report", None)
|
||||
if report is None:
|
||||
return
|
||||
# Check if the test already used the artifact fixture
|
||||
if "artifact" in item.fixturenames:
|
||||
return # Already handled by fixture teardown
|
||||
safe_name = item.nodeid.replace("::", "__").replace("/", "_")
|
||||
out_dir = ARTIFACTS_DIR / safe_name
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
data = {
|
||||
"test_id": item.nodeid,
|
||||
"raw_output": None,
|
||||
"expected": "",
|
||||
"checks": [],
|
||||
"auto_captured": True,
|
||||
"status": "PASS" if report.passed else ("FAIL" if report.failed else "SKIP"),
|
||||
}
|
||||
if report.failed and report.longreprtext:
|
||||
data["failure_text"] = report.longreprtext[:5000]
|
||||
with open(out_dir / "artifact.json", "w") as f:
|
||||
json.dump(data, f, indent=2, default=str)
|
||||
|
||||
@@ -10,30 +10,33 @@ Usage:
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import xml.etree.ElementTree as ET
|
||||
from pathlib import Path
|
||||
from tempfile import NamedTemporaryFile
|
||||
from tempfile import NamedTemporaryFile, TemporaryDirectory
|
||||
|
||||
TESTS_DIR = Path(__file__).parent
|
||||
|
||||
# ── provider registry ────────────────────────────────────────────────
|
||||
|
||||
# (env_var, display_name, default_model) — models match quickstart.sh defaults
|
||||
# (env_var, display_name, litellm_model, display_model)
|
||||
# display_model matches quickstart.sh labels; litellm_model is what LiteLLMProvider needs.
|
||||
API_KEY_PROVIDERS = [
|
||||
("ANTHROPIC_API_KEY", "Anthropic (Claude)", "claude-sonnet-4-20250514"),
|
||||
("OPENAI_API_KEY", "OpenAI", "gpt-5-mini"),
|
||||
("GEMINI_API_KEY", "Google Gemini", "gemini/gemini-3-flash-preview"),
|
||||
("ZAI_API_KEY", "ZAI (GLM)", "openai/glm-5"),
|
||||
("GROQ_API_KEY", "Groq", "moonshotai/kimi-k2-instruct-0905"),
|
||||
("MISTRAL_API_KEY", "Mistral", "mistral-large-latest"),
|
||||
("CEREBRAS_API_KEY", "Cerebras", "cerebras/zai-glm-4.7"),
|
||||
("TOGETHER_API_KEY", "Together AI", "together_ai/meta-llama/Llama-3.3-70B-Instruct-Turbo"),
|
||||
("DEEPSEEK_API_KEY", "DeepSeek", "deepseek-chat"),
|
||||
("MINIMAX_API_KEY", "MiniMax", "MiniMax-M2.5"),
|
||||
("HIVE_API_KEY", "Hive LLM", "hive/queen"),
|
||||
("ANTHROPIC_API_KEY", "Anthropic (Claude)", "claude-sonnet-4-20250514", "claude-sonnet-4-20250514"),
|
||||
("OPENAI_API_KEY", "OpenAI", "gpt-5-mini", "gpt-5-mini"),
|
||||
("GEMINI_API_KEY", "Google Gemini", "gemini/gemini-3-flash-preview", "gemini/gemini-3-flash-preview"),
|
||||
("KIMI_API_KEY", "Kimi", "kimi/kimi-k2.5", "kimi-k2.5"),
|
||||
("ZAI_API_KEY", "ZAI (GLM)", "openai/glm-5", "openai/glm-5"),
|
||||
("GROQ_API_KEY", "Groq", "moonshotai/kimi-k2-instruct-0905", "moonshotai/kimi-k2-instruct-0905"),
|
||||
("MISTRAL_API_KEY", "Mistral", "mistral-large-latest", "mistral-large-latest"),
|
||||
("CEREBRAS_API_KEY", "Cerebras", "cerebras/zai-glm-4.7", "cerebras/zai-glm-4.7"),
|
||||
("TOGETHER_API_KEY", "Together AI", "together_ai/meta-llama/Llama-3.3-70B-Instruct-Turbo", "together_ai/meta-llama/Llama-3.3-70B-Instruct-Turbo"),
|
||||
("DEEPSEEK_API_KEY", "DeepSeek", "deepseek-chat", "deepseek-chat"),
|
||||
("MINIMAX_API_KEY", "MiniMax", "MiniMax-M2.5", "MiniMax-M2.5"),
|
||||
("HIVE_API_KEY", "Hive LLM", "hive/queen", "hive/queen"),
|
||||
]
|
||||
|
||||
|
||||
@@ -81,6 +84,7 @@ def detect_available() -> list[dict]:
|
||||
{
|
||||
"name": "Claude Code (subscription)",
|
||||
"model": "claude-sonnet-4-20250514",
|
||||
"display_model": "claude-sonnet-4-20250514",
|
||||
"api_key": token,
|
||||
"source": "claude_code_sub",
|
||||
"extra_headers": {"authorization": f"Bearer {token}"},
|
||||
@@ -93,6 +97,7 @@ def detect_available() -> list[dict]:
|
||||
{
|
||||
"name": "Codex (subscription)",
|
||||
"model": "gpt-5-mini",
|
||||
"display_model": "gpt-5-mini",
|
||||
"api_key": token,
|
||||
"source": "codex_sub",
|
||||
}
|
||||
@@ -103,30 +108,71 @@ def detect_available() -> list[dict]:
|
||||
available.append(
|
||||
{
|
||||
"name": "Kimi Code (subscription)",
|
||||
"model": "moonshotai/kimi-k2-instruct-0905",
|
||||
# Quickstart displays "kimi-k2.5", but LiteLLMProvider needs the
|
||||
# provider-prefixed form to route through the Kimi coding endpoint.
|
||||
"model": "kimi/kimi-k2.5",
|
||||
"display_model": "kimi-k2.5",
|
||||
"api_key": token,
|
||||
"source": "kimi_sub",
|
||||
"api_base": "https://api.kimi.com/coding",
|
||||
}
|
||||
)
|
||||
|
||||
# API key providers (env vars)
|
||||
for env_var, name, default_model in API_KEY_PROVIDERS:
|
||||
for env_var, name, default_model, display_model in API_KEY_PROVIDERS:
|
||||
key = os.environ.get(env_var)
|
||||
if key:
|
||||
entry = {
|
||||
"name": f"{name} (${env_var})",
|
||||
"model": default_model,
|
||||
"display_model": display_model,
|
||||
"api_key": key,
|
||||
"source": env_var,
|
||||
}
|
||||
# ZAI requires an api_base (OpenAI-compatible endpoint)
|
||||
if env_var == "ZAI_API_KEY":
|
||||
entry["api_base"] = "https://api.z.ai/api/coding/paas/v4"
|
||||
# Kimi Code uses the coding endpoint selected by quickstart.
|
||||
elif env_var == "KIMI_API_KEY":
|
||||
entry["api_base"] = "https://api.kimi.com/coding"
|
||||
available.append(entry)
|
||||
|
||||
return available
|
||||
|
||||
|
||||
def _load_from_hive_config() -> dict | None:
|
||||
"""Try to load LLM provider from ~/.hive/configuration.json.
|
||||
|
||||
Returns a provider dict matching the format expected by
|
||||
set_llm_selection(), or None if config is missing/incomplete.
|
||||
"""
|
||||
try:
|
||||
from framework.config import (
|
||||
get_api_base,
|
||||
get_api_key,
|
||||
get_llm_extra_kwargs,
|
||||
get_preferred_model,
|
||||
)
|
||||
except ImportError:
|
||||
return None
|
||||
|
||||
model = get_preferred_model()
|
||||
api_key = get_api_key()
|
||||
if not model or not api_key:
|
||||
return None
|
||||
|
||||
extra_kwargs = get_llm_extra_kwargs()
|
||||
return {
|
||||
"name": f"Hive config ({model})",
|
||||
"model": model,
|
||||
"display_model": model,
|
||||
"api_key": api_key,
|
||||
"api_base": get_api_base(),
|
||||
"extra_headers": extra_kwargs.get("extra_headers"),
|
||||
"source": "hive_config",
|
||||
}
|
||||
|
||||
|
||||
def prompt_provider_selection() -> dict:
|
||||
"""Interactive prompt to select an LLM provider. Returns the chosen provider dict."""
|
||||
available = detect_available()
|
||||
@@ -136,17 +182,19 @@ def prompt_provider_selection() -> dict:
|
||||
print(" Set an API key environment variable, e.g.:")
|
||||
print(" export ANTHROPIC_API_KEY=sk-...")
|
||||
print(" export OPENAI_API_KEY=sk-...")
|
||||
print(" export KIMI_API_KEY=...")
|
||||
print(" Or authenticate with Claude Code: claude")
|
||||
print(" Or authenticate with Kimi Code: kimi /login")
|
||||
sys.exit(1)
|
||||
|
||||
if len(available) == 1:
|
||||
choice = available[0]
|
||||
print(f"\n Using: {choice['name']} ({choice['model']})")
|
||||
print(f"\n Using: {choice['name']} ({choice.get('display_model', choice['model'])})")
|
||||
return choice
|
||||
|
||||
print("\n Available LLM providers:\n")
|
||||
for i, p in enumerate(available, 1):
|
||||
print(f" {i}) {p['name']} [{p['model']}]")
|
||||
print(f" {i}) {p['name']} [{p.get('display_model', p['model'])}]")
|
||||
|
||||
print()
|
||||
while True:
|
||||
@@ -155,13 +203,296 @@ def prompt_provider_selection() -> dict:
|
||||
idx = int(raw) - 1
|
||||
if 0 <= idx < len(available):
|
||||
choice = available[idx]
|
||||
print(f"\n Using: {choice['name']} ({choice['model']})\n")
|
||||
print(
|
||||
f"\n Using: {choice['name']} "
|
||||
f"({choice.get('display_model', choice['model'])})\n"
|
||||
)
|
||||
return choice
|
||||
except (ValueError, EOFError):
|
||||
pass
|
||||
print(f" Please enter a number between 1 and {len(available)}")
|
||||
|
||||
|
||||
async def _smoke_test_provider_async(provider: dict, timeout_seconds: float = 25.0) -> None:
|
||||
"""Fail fast if the selected provider cannot complete a tiny request.
|
||||
|
||||
This catches the common "pytest looks frozen on the first test" failure mode
|
||||
where the first real LLM call hangs or never reaches a usable response.
|
||||
"""
|
||||
from framework.llm.litellm import LiteLLMProvider
|
||||
from framework.llm.provider import Tool
|
||||
from framework.graph.edge import GraphSpec
|
||||
from framework.graph.executor import GraphExecutor
|
||||
from framework.graph.goal import Goal
|
||||
from framework.graph.node import NodeSpec
|
||||
from framework.runtime.core import Runtime
|
||||
|
||||
kwargs = {
|
||||
"model": provider["model"],
|
||||
"api_key": provider["api_key"],
|
||||
}
|
||||
if provider.get("api_base"):
|
||||
kwargs["api_base"] = provider["api_base"]
|
||||
if provider.get("extra_headers"):
|
||||
kwargs["extra_headers"] = provider["extra_headers"]
|
||||
|
||||
llm = LiteLLMProvider(**kwargs)
|
||||
|
||||
async def _run_plain_completion() -> None:
|
||||
result = await llm.acomplete(
|
||||
messages=[{"role": "user", "content": "Reply with exactly OK."}],
|
||||
max_tokens=8,
|
||||
)
|
||||
content = (result.content or "").strip()
|
||||
if not content:
|
||||
raise RuntimeError("provider returned an empty completion during smoke test")
|
||||
|
||||
async def _run_tool_completion() -> None:
|
||||
tool = Tool(
|
||||
name="record_result",
|
||||
description="Record the final result string.",
|
||||
parameters={
|
||||
"properties": {
|
||||
"value": {
|
||||
"type": "string",
|
||||
"description": "The result to record.",
|
||||
}
|
||||
},
|
||||
"required": ["value"],
|
||||
},
|
||||
)
|
||||
response = await llm.acomplete(
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
"Call the record_result tool exactly once with value='OK'. "
|
||||
"Do not answer with plain text."
|
||||
),
|
||||
}
|
||||
],
|
||||
tools=[tool],
|
||||
max_tokens=32,
|
||||
)
|
||||
|
||||
raw = response.raw_response
|
||||
tool_calls = []
|
||||
if raw is not None and getattr(raw, "choices", None):
|
||||
msg = raw.choices[0].message
|
||||
tool_calls = msg.tool_calls or []
|
||||
|
||||
if not tool_calls:
|
||||
raise RuntimeError("provider completed but did not return any tool calls")
|
||||
|
||||
async def _run_worker_execution() -> None:
|
||||
with TemporaryDirectory(prefix="dummy-worker-smoke-") as tmpdir:
|
||||
tmp_path = Path(tmpdir)
|
||||
runtime = Runtime(storage_path=tmp_path / "runtime")
|
||||
executor = GraphExecutor(
|
||||
runtime=runtime,
|
||||
llm=llm,
|
||||
storage_path=tmp_path / "session",
|
||||
loop_config={"max_iterations": 4},
|
||||
)
|
||||
graph = GraphSpec(
|
||||
id="dummy-worker-smoke",
|
||||
goal_id="dummy-worker-smoke-goal",
|
||||
entry_node="worker",
|
||||
entry_points={"start": "worker"},
|
||||
terminal_nodes=["worker"],
|
||||
nodes=[
|
||||
NodeSpec(
|
||||
id="worker",
|
||||
name="Worker Smoke Test",
|
||||
description="Minimal worker-path smoke test",
|
||||
node_type="event_loop",
|
||||
input_keys=["task"],
|
||||
output_keys=["result"],
|
||||
system_prompt=(
|
||||
"You are a worker test node. Read the 'task' input. "
|
||||
"You MUST call set_output with key='result' and value='OK'. "
|
||||
"Do not use plain text as the final answer."
|
||||
),
|
||||
)
|
||||
],
|
||||
edges=[],
|
||||
memory_keys=["task", "result"],
|
||||
conversation_mode="continuous",
|
||||
)
|
||||
goal = Goal(
|
||||
id="dummy-worker-smoke-goal",
|
||||
name="Dummy Worker Smoke",
|
||||
description="Verify the current worker execution implementation can finish.",
|
||||
)
|
||||
result = await executor.execute(
|
||||
graph,
|
||||
goal,
|
||||
{"task": "Return OK by calling set_output."},
|
||||
validate_graph=False,
|
||||
)
|
||||
if not result.success:
|
||||
raise RuntimeError(result.error or "worker execution smoke failed")
|
||||
if result.output.get("result") != "OK":
|
||||
raise RuntimeError(
|
||||
"worker execution completed but did not produce result='OK'"
|
||||
)
|
||||
|
||||
async def _run_branch_execution() -> None:
|
||||
with TemporaryDirectory(prefix="dummy-branch-smoke-") as tmpdir:
|
||||
tmp_path = Path(tmpdir)
|
||||
runtime = Runtime(storage_path=tmp_path / "runtime")
|
||||
executor = GraphExecutor(
|
||||
runtime=runtime,
|
||||
llm=llm,
|
||||
storage_path=tmp_path / "session",
|
||||
loop_config={"max_iterations": 4},
|
||||
)
|
||||
graph = GraphSpec(
|
||||
id="dummy-branch-smoke",
|
||||
goal_id="dummy-branch-smoke-goal",
|
||||
entry_node="classify",
|
||||
entry_points={"start": "classify"},
|
||||
terminal_nodes=["positive", "negative"],
|
||||
nodes=[
|
||||
NodeSpec(
|
||||
id="classify",
|
||||
name="Branch Classifier",
|
||||
description="Routes to the positive or negative handler",
|
||||
node_type="event_loop",
|
||||
input_keys=["route"],
|
||||
output_keys=["label"],
|
||||
system_prompt=(
|
||||
"Read the 'route' input. "
|
||||
"If it is exactly 'positive', call set_output with "
|
||||
"key='label' and value='positive'. "
|
||||
"Otherwise call set_output with key='label' and value='negative'. "
|
||||
"Do not use plain text as the final answer."
|
||||
),
|
||||
),
|
||||
NodeSpec(
|
||||
id="positive",
|
||||
name="Positive Branch",
|
||||
description="Positive terminal branch",
|
||||
node_type="event_loop",
|
||||
output_keys=["result"],
|
||||
system_prompt=(
|
||||
"Call set_output with key='result' and value='BRANCH_OK'. "
|
||||
"Do not use plain text as the final answer."
|
||||
),
|
||||
),
|
||||
NodeSpec(
|
||||
id="negative",
|
||||
name="Negative Branch",
|
||||
description="Negative terminal branch",
|
||||
node_type="event_loop",
|
||||
output_keys=["result"],
|
||||
system_prompt=(
|
||||
"Call set_output with key='result' and value='UNEXPECTED_NEGATIVE'. "
|
||||
"Do not use plain text as the final answer."
|
||||
),
|
||||
),
|
||||
],
|
||||
edges=[
|
||||
{
|
||||
"id": "classify-to-positive",
|
||||
"source": "classify",
|
||||
"target": "positive",
|
||||
"condition": "conditional",
|
||||
"condition_expr": "output.get('label') == 'positive'",
|
||||
"priority": 1,
|
||||
},
|
||||
{
|
||||
"id": "classify-to-negative",
|
||||
"source": "classify",
|
||||
"target": "negative",
|
||||
"condition": "conditional",
|
||||
"condition_expr": "output.get('label') == 'negative'",
|
||||
"priority": 0,
|
||||
},
|
||||
],
|
||||
memory_keys=["route", "label", "result"],
|
||||
conversation_mode="continuous",
|
||||
)
|
||||
goal = Goal(
|
||||
id="dummy-branch-smoke-goal",
|
||||
name="Dummy Branch Smoke",
|
||||
description="Verify conditional worker routing reaches the expected terminal.",
|
||||
)
|
||||
result = await executor.execute(
|
||||
graph,
|
||||
goal,
|
||||
{"route": "positive"},
|
||||
validate_graph=False,
|
||||
)
|
||||
if not result.success:
|
||||
raise RuntimeError(result.error or "branch execution smoke failed")
|
||||
if result.path != ["classify", "positive"]:
|
||||
raise RuntimeError(
|
||||
"branch execution did not reach the expected terminal path: "
|
||||
f"{result.path}"
|
||||
)
|
||||
if not result.output.get("result"):
|
||||
raise RuntimeError(
|
||||
"branch execution reached the expected terminal path but did not "
|
||||
f"produce a non-empty result output: path={result.path} "
|
||||
f"output={result.output}"
|
||||
)
|
||||
|
||||
current_step = "plain completion"
|
||||
current_timeout = timeout_seconds
|
||||
worker_timeout = max(
|
||||
timeout_seconds,
|
||||
float(os.environ.get("DUMMY_AGENT_SMOKE_WORKER_TIMEOUT_SECS", "30")),
|
||||
)
|
||||
branch_timeout = max(
|
||||
timeout_seconds,
|
||||
float(os.environ.get("DUMMY_AGENT_SMOKE_BRANCH_TIMEOUT_SECS", "60")),
|
||||
)
|
||||
|
||||
try:
|
||||
await asyncio.wait_for(_run_plain_completion(), timeout=current_timeout)
|
||||
current_step = "tool calling"
|
||||
current_timeout = timeout_seconds
|
||||
await asyncio.wait_for(_run_tool_completion(), timeout=current_timeout)
|
||||
current_step = "single-node worker execution"
|
||||
current_timeout = worker_timeout
|
||||
await asyncio.wait_for(_run_worker_execution(), timeout=current_timeout)
|
||||
current_step = "branch worker execution"
|
||||
current_timeout = branch_timeout
|
||||
await asyncio.wait_for(_run_branch_execution(), timeout=current_timeout)
|
||||
except TimeoutError as exc:
|
||||
raise RuntimeError(
|
||||
f"provider smoke test timed out during {current_step} "
|
||||
f"after {current_timeout:.0f}s"
|
||||
) from exc
|
||||
|
||||
|
||||
def smoke_test_provider(provider: dict, timeout_seconds: float = 25.0) -> None:
|
||||
"""Run a tiny real completion before starting pytest."""
|
||||
print(" Running provider smoke test...", end=" ", flush=True)
|
||||
started = time.time()
|
||||
try:
|
||||
asyncio.run(_smoke_test_provider_async(provider, timeout_seconds=timeout_seconds))
|
||||
except TimeoutError:
|
||||
print("FAILED")
|
||||
print(
|
||||
" The selected provider did not complete a tiny request within "
|
||||
f"{timeout_seconds:.0f}s."
|
||||
)
|
||||
print(
|
||||
" This usually means the provider is unreachable, rate-limited, "
|
||||
"or hanging on the selected model/API base."
|
||||
)
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print("FAILED")
|
||||
print(f" Provider smoke test failed: {type(e).__name__}: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
elapsed = time.time() - started
|
||||
print(f"OK ({elapsed:.1f}s)")
|
||||
|
||||
|
||||
# ── test runner ──────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@@ -301,13 +632,23 @@ def print_table(agents: dict[str, dict], total_time: float, verbose: bool = Fals
|
||||
|
||||
def main() -> int:
|
||||
verbose = "--verbose" in sys.argv or "-v" in sys.argv
|
||||
interactive = "--interactive" in sys.argv
|
||||
|
||||
print("\n ╔═══════════════════════════════════════╗")
|
||||
print(" ║ Level 2: Dummy Agent Tests (E2E) ║")
|
||||
print(" ╚═══════════════════════════════════════╝")
|
||||
|
||||
# Step 1: detect credentials and let user pick
|
||||
provider = prompt_provider_selection()
|
||||
# Step 1: prefer ~/.hive/configuration.json unless --interactive
|
||||
provider = None
|
||||
if not interactive:
|
||||
provider = _load_from_hive_config()
|
||||
if provider:
|
||||
print(f"\n Using hive config: {provider['display_model']}")
|
||||
|
||||
# Fall back to interactive selection
|
||||
if provider is None:
|
||||
provider = prompt_provider_selection()
|
||||
smoke_test_provider(provider)
|
||||
|
||||
# Step 2: inject selection into conftest module state
|
||||
from tests.dummy_agents.conftest import set_llm_selection
|
||||
|
||||
@@ -0,0 +1,329 @@
|
||||
"""Component tests: Continuous Conversation Mode — threading, buffer.
|
||||
|
||||
Exercises conversation threading across nodes to verify that downstream
|
||||
nodes receive context from upstream nodes in continuous mode.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from framework.graph.edge import EdgeCondition, EdgeSpec, GraphSpec
|
||||
from framework.graph.node import NodeSpec
|
||||
|
||||
from .conftest import make_executor
|
||||
|
||||
SET_OUTPUT_INSTRUCTION = (
|
||||
"You MUST call the set_output tool to provide your answer. "
|
||||
"Do not just write text — call set_output with the correct "
|
||||
"key and value."
|
||||
)
|
||||
|
||||
|
||||
def _build_pipeline_graph(
|
||||
conversation_mode: str = "continuous",
|
||||
) -> GraphSpec:
|
||||
"""Two-node pipeline: intake captures, transform uppercases."""
|
||||
return GraphSpec(
|
||||
id="continuous-pipeline",
|
||||
goal_id="dummy",
|
||||
entry_node="intake",
|
||||
entry_points={"start": "intake"},
|
||||
terminal_nodes=["transform"],
|
||||
conversation_mode=conversation_mode,
|
||||
nodes=[
|
||||
NodeSpec(
|
||||
id="intake",
|
||||
name="Intake",
|
||||
description="Captures raw input",
|
||||
node_type="event_loop",
|
||||
input_keys=["raw"],
|
||||
output_keys=["captured"],
|
||||
system_prompt=(
|
||||
"Read the 'raw' input value and call "
|
||||
"set_output with key='captured' and the "
|
||||
"same value. " + SET_OUTPUT_INSTRUCTION
|
||||
),
|
||||
),
|
||||
NodeSpec(
|
||||
id="transform",
|
||||
name="Transform",
|
||||
description="Uppercases the value",
|
||||
node_type="event_loop",
|
||||
input_keys=["value"],
|
||||
output_keys=["result"],
|
||||
system_prompt=(
|
||||
"Read the 'value' input, convert it to "
|
||||
"UPPERCASE, then call set_output with "
|
||||
"key='result' and the uppercased value. " + SET_OUTPUT_INSTRUCTION
|
||||
),
|
||||
),
|
||||
],
|
||||
edges=[
|
||||
EdgeSpec(
|
||||
id="intake-to-transform",
|
||||
source="intake",
|
||||
target="transform",
|
||||
condition=EdgeCondition.ON_SUCCESS,
|
||||
input_mapping={"value": "captured"},
|
||||
),
|
||||
],
|
||||
memory_keys=["raw", "captured", "value", "result"],
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_continuous_pipeline_traverses(runtime, goal, llm_provider, artifact):
|
||||
"""Continuous mode pipeline should traverse both nodes."""
|
||||
graph = _build_pipeline_graph(conversation_mode="continuous")
|
||||
executor = make_executor(
|
||||
runtime,
|
||||
llm_provider,
|
||||
loop_config={"max_iterations": 5},
|
||||
)
|
||||
|
||||
result = await executor.execute(
|
||||
graph,
|
||||
goal,
|
||||
{"raw": "hello"},
|
||||
validate_graph=False,
|
||||
)
|
||||
artifact.record(
|
||||
result,
|
||||
expected=("success=True, path=['intake','transform'], output['result'] is set"),
|
||||
)
|
||||
|
||||
artifact.check(
|
||||
"execution succeeds",
|
||||
result.success,
|
||||
actual=str(result.success),
|
||||
expected_val="True",
|
||||
)
|
||||
assert result.success
|
||||
|
||||
artifact.check(
|
||||
"path matches",
|
||||
result.path == ["intake", "transform"],
|
||||
actual=str(result.path),
|
||||
expected_val="['intake', 'transform']",
|
||||
)
|
||||
assert result.path == ["intake", "transform"]
|
||||
|
||||
actual_output = result.output.get("result")
|
||||
artifact.check(
|
||||
"output['result'] is set",
|
||||
actual_output is not None,
|
||||
actual=repr(actual_output),
|
||||
expected_val="non-None value",
|
||||
)
|
||||
assert result.output.get("result") is not None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_continuous_data_flows_through(runtime, goal, llm_provider, artifact):
|
||||
"""Data from node 1's output should be available to node 2."""
|
||||
graph = _build_pipeline_graph(conversation_mode="continuous")
|
||||
executor = make_executor(
|
||||
runtime,
|
||||
llm_provider,
|
||||
loop_config={"max_iterations": 5},
|
||||
)
|
||||
|
||||
result = await executor.execute(
|
||||
graph,
|
||||
goal,
|
||||
{"raw": "test_data"},
|
||||
validate_graph=False,
|
||||
)
|
||||
artifact.record(
|
||||
result,
|
||||
expected="success=True, output['result'] is non-empty",
|
||||
)
|
||||
|
||||
artifact.check(
|
||||
"execution succeeds",
|
||||
result.success,
|
||||
actual=str(result.success),
|
||||
expected_val="True",
|
||||
)
|
||||
assert result.success
|
||||
|
||||
actual_output = result.output.get("result")
|
||||
artifact.check(
|
||||
"output['result'] is set",
|
||||
actual_output is not None,
|
||||
actual=repr(actual_output),
|
||||
expected_val="non-None value",
|
||||
)
|
||||
assert result.output.get("result") is not None
|
||||
|
||||
output_len = len(str(result.output["result"]))
|
||||
artifact.check(
|
||||
"output is non-empty",
|
||||
output_len > 0,
|
||||
actual=str(output_len),
|
||||
expected_val=">0",
|
||||
)
|
||||
assert len(str(result.output["result"])) > 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_isolated_pipeline_traverses(runtime, goal, llm_provider, artifact):
|
||||
"""Isolated mode pipeline should also traverse both nodes."""
|
||||
graph = _build_pipeline_graph(conversation_mode="isolated")
|
||||
executor = make_executor(
|
||||
runtime,
|
||||
llm_provider,
|
||||
loop_config={"max_iterations": 5},
|
||||
)
|
||||
|
||||
result = await executor.execute(
|
||||
graph,
|
||||
goal,
|
||||
{"raw": "data"},
|
||||
validate_graph=False,
|
||||
)
|
||||
artifact.record(
|
||||
result,
|
||||
expected="success=True, path=['intake','transform']",
|
||||
)
|
||||
|
||||
artifact.check(
|
||||
"execution succeeds",
|
||||
result.success,
|
||||
actual=str(result.success),
|
||||
expected_val="True",
|
||||
)
|
||||
assert result.success
|
||||
|
||||
artifact.check(
|
||||
"path matches",
|
||||
result.path == ["intake", "transform"],
|
||||
actual=str(result.path),
|
||||
expected_val="['intake', 'transform']",
|
||||
)
|
||||
assert result.path == ["intake", "transform"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_continuous_three_node_chain(runtime, goal, llm_provider, artifact):
|
||||
"""Three-node continuous pipeline should thread end-to-end."""
|
||||
graph = GraphSpec(
|
||||
id="three-node-chain",
|
||||
goal_id="dummy",
|
||||
entry_node="a",
|
||||
entry_points={"start": "a"},
|
||||
terminal_nodes=["c"],
|
||||
conversation_mode="continuous",
|
||||
nodes=[
|
||||
NodeSpec(
|
||||
id="a",
|
||||
name="Node A",
|
||||
description="First node",
|
||||
node_type="event_loop",
|
||||
input_keys=["input"],
|
||||
output_keys=["a_out"],
|
||||
system_prompt=(
|
||||
"Read the 'input' value and call set_output "
|
||||
"with key='a_out' and the same value. " + SET_OUTPUT_INSTRUCTION
|
||||
),
|
||||
),
|
||||
NodeSpec(
|
||||
id="b",
|
||||
name="Node B",
|
||||
description="Middle node",
|
||||
node_type="event_loop",
|
||||
input_keys=["b_in"],
|
||||
output_keys=["b_out"],
|
||||
system_prompt=(
|
||||
"Read the 'b_in' value and call set_output "
|
||||
"with key='b_out' and value='processed_' "
|
||||
"followed by the input. " + SET_OUTPUT_INSTRUCTION
|
||||
),
|
||||
),
|
||||
NodeSpec(
|
||||
id="c",
|
||||
name="Node C",
|
||||
description="Terminal node",
|
||||
node_type="event_loop",
|
||||
input_keys=["c_in"],
|
||||
output_keys=["result"],
|
||||
system_prompt=(
|
||||
"Read the 'c_in' value and call set_output "
|
||||
"with key='result' and the same value. " + SET_OUTPUT_INSTRUCTION
|
||||
),
|
||||
),
|
||||
],
|
||||
edges=[
|
||||
EdgeSpec(
|
||||
id="a-to-b",
|
||||
source="a",
|
||||
target="b",
|
||||
condition=EdgeCondition.ON_SUCCESS,
|
||||
input_mapping={"b_in": "a_out"},
|
||||
),
|
||||
EdgeSpec(
|
||||
id="b-to-c",
|
||||
source="b",
|
||||
target="c",
|
||||
condition=EdgeCondition.ON_SUCCESS,
|
||||
input_mapping={"c_in": "b_out"},
|
||||
),
|
||||
],
|
||||
memory_keys=[
|
||||
"input",
|
||||
"a_out",
|
||||
"b_in",
|
||||
"b_out",
|
||||
"c_in",
|
||||
"result",
|
||||
],
|
||||
)
|
||||
executor = make_executor(
|
||||
runtime,
|
||||
llm_provider,
|
||||
loop_config={"max_iterations": 5},
|
||||
)
|
||||
result = await executor.execute(
|
||||
graph,
|
||||
goal,
|
||||
{"input": "payload"},
|
||||
validate_graph=False,
|
||||
)
|
||||
artifact.record(
|
||||
result,
|
||||
expected=("success=True, path=['a','b','c'], steps=3, output['result'] is set"),
|
||||
)
|
||||
|
||||
artifact.check(
|
||||
"execution succeeds",
|
||||
result.success,
|
||||
actual=str(result.success),
|
||||
expected_val="True",
|
||||
)
|
||||
assert result.success
|
||||
|
||||
artifact.check(
|
||||
"path matches",
|
||||
result.path == ["a", "b", "c"],
|
||||
actual=str(result.path),
|
||||
expected_val="['a', 'b', 'c']",
|
||||
)
|
||||
assert result.path == ["a", "b", "c"]
|
||||
|
||||
artifact.check(
|
||||
"steps_executed is 3",
|
||||
result.steps_executed == 3,
|
||||
actual=str(result.steps_executed),
|
||||
expected_val="3",
|
||||
)
|
||||
assert result.steps_executed == 3
|
||||
|
||||
actual_output = result.output.get("result")
|
||||
artifact.check(
|
||||
"output['result'] is set",
|
||||
actual_output is not None,
|
||||
actual=repr(actual_output),
|
||||
expected_val="non-None value",
|
||||
)
|
||||
assert result.output.get("result") is not None
|
||||
@@ -0,0 +1,241 @@
|
||||
"""Component tests: Conversation Persistence — write-through, storage.
|
||||
|
||||
Exercises conversation persistence by running real LLM turns and
|
||||
verifying that messages and state are written to disk correctly.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from framework.graph.edge import GraphSpec
|
||||
from framework.graph.node import NodeSpec
|
||||
|
||||
from .conftest import make_executor
|
||||
|
||||
|
||||
def _build_echo_graph() -> GraphSpec:
|
||||
"""Single-node graph that echoes input to output."""
|
||||
return GraphSpec(
|
||||
id="conv-echo",
|
||||
goal_id="dummy",
|
||||
entry_node="echo",
|
||||
entry_points={"start": "echo"},
|
||||
terminal_nodes=["echo"],
|
||||
nodes=[
|
||||
NodeSpec(
|
||||
id="echo",
|
||||
name="Echo",
|
||||
description="Echoes input to output",
|
||||
node_type="event_loop",
|
||||
input_keys=["input"],
|
||||
output_keys=["output"],
|
||||
system_prompt=(
|
||||
"Read the 'input' value and immediately call "
|
||||
"set_output with key='output' and the same "
|
||||
"value. Do not add any text."
|
||||
),
|
||||
),
|
||||
],
|
||||
edges=[],
|
||||
memory_keys=["input", "output"],
|
||||
conversation_mode="continuous",
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_conversation_persists_messages(runtime, goal, llm_provider, tmp_path, artifact):
|
||||
"""After execution, conversation data should exist on disk."""
|
||||
storage = tmp_path / "session"
|
||||
graph = _build_echo_graph()
|
||||
executor = make_executor(
|
||||
runtime,
|
||||
llm_provider,
|
||||
storage_path=storage,
|
||||
)
|
||||
|
||||
result = await executor.execute(
|
||||
graph,
|
||||
goal,
|
||||
{"input": "hello"},
|
||||
validate_graph=False,
|
||||
)
|
||||
artifact.record(
|
||||
result,
|
||||
expected=("success=True, conversations/ dir exists with data files"),
|
||||
)
|
||||
|
||||
artifact.check(
|
||||
"execution succeeds",
|
||||
result.success,
|
||||
actual=str(result.success),
|
||||
expected_val="True",
|
||||
)
|
||||
assert result.success
|
||||
|
||||
# Verify conversation directory was created with content
|
||||
conv_dir = storage / "conversations"
|
||||
|
||||
artifact.check(
|
||||
"conversations/ dir exists",
|
||||
conv_dir.exists(),
|
||||
actual=str(conv_dir.exists()),
|
||||
expected_val="True",
|
||||
)
|
||||
assert conv_dir.exists(), "conversations/ directory should exist"
|
||||
|
||||
# Should have at least one file (messages or cursor)
|
||||
all_files = list(conv_dir.rglob("*"))
|
||||
data_files = [f for f in all_files if f.is_file()]
|
||||
|
||||
artifact.check(
|
||||
"at least one data file",
|
||||
len(data_files) > 0,
|
||||
actual=str(len(data_files)),
|
||||
expected_val=">0",
|
||||
)
|
||||
assert len(data_files) > 0, "Should have persisted at least one conversation file"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_conversation_output_matches_execution(
|
||||
runtime, goal, llm_provider, tmp_path, artifact
|
||||
):
|
||||
"""ExecutionResult output should be consistent with the node."""
|
||||
storage = tmp_path / "session"
|
||||
graph = _build_echo_graph()
|
||||
executor = make_executor(
|
||||
runtime,
|
||||
llm_provider,
|
||||
storage_path=storage,
|
||||
)
|
||||
|
||||
result = await executor.execute(
|
||||
graph,
|
||||
goal,
|
||||
{"input": "test_value"},
|
||||
validate_graph=False,
|
||||
)
|
||||
artifact.record(
|
||||
result,
|
||||
expected="success=True, output['output'] is non-empty",
|
||||
)
|
||||
|
||||
artifact.check(
|
||||
"execution succeeds",
|
||||
result.success,
|
||||
actual=str(result.success),
|
||||
expected_val="True",
|
||||
)
|
||||
assert result.success
|
||||
|
||||
actual_output = result.output.get("output")
|
||||
artifact.check(
|
||||
"output['output'] is set",
|
||||
actual_output is not None,
|
||||
actual=repr(actual_output),
|
||||
expected_val="non-None value",
|
||||
)
|
||||
assert result.output.get("output") is not None
|
||||
|
||||
# The echo node should produce some non-empty output
|
||||
output_len = len(str(result.output["output"]))
|
||||
artifact.check(
|
||||
"output is non-empty",
|
||||
output_len > 0,
|
||||
actual=str(output_len),
|
||||
expected_val=">0",
|
||||
)
|
||||
assert len(str(result.output["output"])) > 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_conversation_multi_node_persistence(runtime, goal, llm_provider, tmp_path, artifact):
|
||||
"""Multi-node graph should persist conversation data for each node."""
|
||||
from framework.graph.edge import EdgeCondition, EdgeSpec
|
||||
|
||||
storage = tmp_path / "session"
|
||||
graph = GraphSpec(
|
||||
id="multi-conv",
|
||||
goal_id="dummy",
|
||||
entry_node="step1",
|
||||
entry_points={"start": "step1"},
|
||||
terminal_nodes=["step2"],
|
||||
conversation_mode="continuous",
|
||||
nodes=[
|
||||
NodeSpec(
|
||||
id="step1",
|
||||
name="Step 1",
|
||||
description="First step",
|
||||
node_type="event_loop",
|
||||
output_keys=["intermediate"],
|
||||
system_prompt=(
|
||||
"Call set_output with key='intermediate' "
|
||||
"and value='step1_done'. Do not write text."
|
||||
),
|
||||
),
|
||||
NodeSpec(
|
||||
id="step2",
|
||||
name="Step 2",
|
||||
description="Second step",
|
||||
node_type="event_loop",
|
||||
input_keys=["intermediate"],
|
||||
output_keys=["result"],
|
||||
system_prompt=(
|
||||
"Call set_output with key='result' and value='step2_done'. Do not write text."
|
||||
),
|
||||
),
|
||||
],
|
||||
edges=[
|
||||
EdgeSpec(
|
||||
id="step1-to-step2",
|
||||
source="step1",
|
||||
target="step2",
|
||||
condition=EdgeCondition.ON_SUCCESS,
|
||||
input_mapping={"intermediate": "intermediate"},
|
||||
),
|
||||
],
|
||||
memory_keys=["intermediate", "result"],
|
||||
)
|
||||
executor = make_executor(
|
||||
runtime,
|
||||
llm_provider,
|
||||
storage_path=storage,
|
||||
)
|
||||
result = await executor.execute(
|
||||
graph,
|
||||
goal,
|
||||
{},
|
||||
validate_graph=False,
|
||||
)
|
||||
artifact.record(
|
||||
result,
|
||||
expected=("success=True, path=['step1','step2'], conversations/ dir exists"),
|
||||
)
|
||||
|
||||
artifact.check(
|
||||
"execution succeeds",
|
||||
result.success,
|
||||
actual=str(result.success),
|
||||
expected_val="True",
|
||||
)
|
||||
assert result.success
|
||||
|
||||
artifact.check(
|
||||
"path matches",
|
||||
result.path == ["step1", "step2"],
|
||||
actual=str(result.path),
|
||||
expected_val="['step1', 'step2']",
|
||||
)
|
||||
assert result.path == ["step1", "step2"]
|
||||
|
||||
# Both nodes should have written conversation data
|
||||
conv_dir = storage / "conversations"
|
||||
|
||||
artifact.check(
|
||||
"conversations/ dir exists",
|
||||
conv_dir.exists(),
|
||||
actual=str(conv_dir.exists()),
|
||||
expected_val="True",
|
||||
)
|
||||
assert conv_dir.exists()
|
||||
@@ -0,0 +1,266 @@
|
||||
"""Component tests: Edge Evaluation — conditional routing, LLM_DECIDE.
|
||||
|
||||
Exercises edge conditions with real LLM calls to verify that routing
|
||||
decisions work correctly across providers.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from framework.graph.edge import EdgeCondition, EdgeSpec, GraphSpec
|
||||
from framework.graph.node import NodeSpec
|
||||
|
||||
from .conftest import make_executor
|
||||
|
||||
SET_OUTPUT_INSTRUCTION = (
|
||||
"You MUST call the set_output tool to provide your answer. "
|
||||
"Do not just write text — call set_output with the correct "
|
||||
"key and value."
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_edge_conditional_true_path(runtime, goal, llm_provider, artifact):
|
||||
"""Conditional edge with True expression should be traversed."""
|
||||
graph = GraphSpec(
|
||||
id="cond-true",
|
||||
goal_id="dummy",
|
||||
entry_node="source",
|
||||
entry_points={"start": "source"},
|
||||
terminal_nodes=["target"],
|
||||
conversation_mode="continuous",
|
||||
nodes=[
|
||||
NodeSpec(
|
||||
id="source",
|
||||
name="Source",
|
||||
description="Produces label=yes",
|
||||
node_type="event_loop",
|
||||
output_keys=["label"],
|
||||
system_prompt=(
|
||||
"Call set_output with key='label' and value='yes'. " + SET_OUTPUT_INSTRUCTION
|
||||
),
|
||||
),
|
||||
NodeSpec(
|
||||
id="target",
|
||||
name="Target",
|
||||
description="Terminal node",
|
||||
node_type="event_loop",
|
||||
output_keys=["result"],
|
||||
system_prompt=(
|
||||
"Call set_output with key='result' and "
|
||||
"value='reached'. " + SET_OUTPUT_INSTRUCTION
|
||||
),
|
||||
),
|
||||
],
|
||||
edges=[
|
||||
EdgeSpec(
|
||||
id="source-to-target",
|
||||
source="source",
|
||||
target="target",
|
||||
condition=EdgeCondition.CONDITIONAL,
|
||||
condition_expr="output.get('label') == 'yes'",
|
||||
),
|
||||
],
|
||||
memory_keys=["label", "result"],
|
||||
)
|
||||
executor = make_executor(
|
||||
runtime,
|
||||
llm_provider,
|
||||
loop_config={"max_iterations": 3},
|
||||
)
|
||||
result = await executor.execute(
|
||||
graph,
|
||||
goal,
|
||||
{},
|
||||
validate_graph=False,
|
||||
)
|
||||
artifact.record(
|
||||
result,
|
||||
expected="success=True, path=['source','target']",
|
||||
)
|
||||
|
||||
artifact.check(
|
||||
"execution succeeds",
|
||||
result.success,
|
||||
actual=str(result.success),
|
||||
expected_val="True",
|
||||
)
|
||||
assert result.success
|
||||
|
||||
artifact.check(
|
||||
"path matches",
|
||||
result.path == ["source", "target"],
|
||||
actual=str(result.path),
|
||||
expected_val="['source', 'target']",
|
||||
)
|
||||
assert result.path == ["source", "target"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_edge_conditional_false_path(runtime, goal, llm_provider, artifact):
|
||||
"""Conditional edge with False expression should NOT be traversed."""
|
||||
graph = GraphSpec(
|
||||
id="cond-false",
|
||||
goal_id="dummy",
|
||||
entry_node="source",
|
||||
entry_points={"start": "source"},
|
||||
terminal_nodes=["source", "target"],
|
||||
conversation_mode="continuous",
|
||||
nodes=[
|
||||
NodeSpec(
|
||||
id="source",
|
||||
name="Source",
|
||||
description="Produces label=no",
|
||||
node_type="event_loop",
|
||||
output_keys=["label"],
|
||||
system_prompt=(
|
||||
"Call set_output with key='label' and value='no'. " + SET_OUTPUT_INSTRUCTION
|
||||
),
|
||||
),
|
||||
NodeSpec(
|
||||
id="target",
|
||||
name="Target",
|
||||
description="Should not be reached",
|
||||
node_type="event_loop",
|
||||
output_keys=["result"],
|
||||
system_prompt=("Call set_output with key='result' and value='bad'."),
|
||||
),
|
||||
],
|
||||
edges=[
|
||||
EdgeSpec(
|
||||
id="source-to-target",
|
||||
source="source",
|
||||
target="target",
|
||||
condition=EdgeCondition.CONDITIONAL,
|
||||
condition_expr="output.get('label') == 'yes'",
|
||||
),
|
||||
],
|
||||
memory_keys=["label", "result"],
|
||||
)
|
||||
executor = make_executor(
|
||||
runtime,
|
||||
llm_provider,
|
||||
loop_config={"max_iterations": 3},
|
||||
)
|
||||
result = await executor.execute(
|
||||
graph,
|
||||
goal,
|
||||
{},
|
||||
validate_graph=False,
|
||||
)
|
||||
artifact.record(
|
||||
result,
|
||||
expected="success=True, 'target' not in path",
|
||||
)
|
||||
|
||||
artifact.check(
|
||||
"execution succeeds",
|
||||
result.success,
|
||||
actual=str(result.success),
|
||||
expected_val="True",
|
||||
)
|
||||
assert result.success
|
||||
|
||||
artifact.check(
|
||||
"target not in path",
|
||||
"target" not in result.path,
|
||||
actual=str(result.path),
|
||||
expected_val="path without 'target'",
|
||||
)
|
||||
assert "target" not in result.path
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_edge_priority_selects_higher(runtime, goal, llm_provider, artifact):
|
||||
"""When multiple conditional edges match, higher priority wins."""
|
||||
graph = GraphSpec(
|
||||
id="priority-test",
|
||||
goal_id="dummy",
|
||||
entry_node="source",
|
||||
entry_points={"start": "source"},
|
||||
terminal_nodes=["high", "low"],
|
||||
conversation_mode="continuous",
|
||||
nodes=[
|
||||
NodeSpec(
|
||||
id="source",
|
||||
name="Source",
|
||||
description="Sets value=match",
|
||||
node_type="event_loop",
|
||||
output_keys=["value"],
|
||||
system_prompt=(
|
||||
"Call set_output with key='value' and value='match'. " + SET_OUTPUT_INSTRUCTION
|
||||
),
|
||||
),
|
||||
NodeSpec(
|
||||
id="high",
|
||||
name="High Priority",
|
||||
description="High priority terminal",
|
||||
node_type="event_loop",
|
||||
output_keys=["result"],
|
||||
system_prompt=(
|
||||
"Call set_output with key='result' and value='HIGH'. " + SET_OUTPUT_INSTRUCTION
|
||||
),
|
||||
),
|
||||
NodeSpec(
|
||||
id="low",
|
||||
name="Low Priority",
|
||||
description="Low priority terminal",
|
||||
node_type="event_loop",
|
||||
output_keys=["result"],
|
||||
system_prompt=(
|
||||
"Call set_output with key='result' and value='LOW'. " + SET_OUTPUT_INSTRUCTION
|
||||
),
|
||||
),
|
||||
],
|
||||
edges=[
|
||||
EdgeSpec(
|
||||
id="to-high",
|
||||
source="source",
|
||||
target="high",
|
||||
condition=EdgeCondition.CONDITIONAL,
|
||||
condition_expr="output.get('value') == 'match'",
|
||||
priority=10,
|
||||
),
|
||||
EdgeSpec(
|
||||
id="to-low",
|
||||
source="source",
|
||||
target="low",
|
||||
condition=EdgeCondition.CONDITIONAL,
|
||||
condition_expr="output.get('value') == 'match'",
|
||||
priority=1,
|
||||
),
|
||||
],
|
||||
memory_keys=["value", "result"],
|
||||
)
|
||||
executor = make_executor(
|
||||
runtime,
|
||||
llm_provider,
|
||||
loop_config={"max_iterations": 3},
|
||||
)
|
||||
result = await executor.execute(
|
||||
graph,
|
||||
goal,
|
||||
{},
|
||||
validate_graph=False,
|
||||
)
|
||||
artifact.record(
|
||||
result,
|
||||
expected="success=True, path=['source','high']",
|
||||
)
|
||||
|
||||
artifact.check(
|
||||
"execution succeeds",
|
||||
result.success,
|
||||
actual=str(result.success),
|
||||
expected_val="True",
|
||||
)
|
||||
assert result.success
|
||||
|
||||
artifact.check(
|
||||
"path matches",
|
||||
result.path == ["source", "high"],
|
||||
actual=str(result.path),
|
||||
expected_val="['source', 'high']",
|
||||
)
|
||||
assert result.path == ["source", "high"]
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user