chore: ruff lint

This commit is contained in:
Richard Tang
2026-04-03 20:31:14 -07:00
parent 294df7f066
commit ed8d417bef
106 changed files with 1149 additions and 828 deletions
@@ -9,7 +9,6 @@ Fix: Add wait_for_selector between scroll calls
import asyncio
import sys
import time
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent / "tools" / "src"))
@@ -36,7 +35,7 @@ async def test_twitter_lazy_scroll():
if bridge.is_connected:
print("✓ Extension connected!")
break
print(f"Waiting for extension... ({i+1}/10)")
print(f"Waiting for extension... ({i + 1}/10)")
else:
print("✗ Extension not connected")
return
@@ -58,7 +57,8 @@ async def test_twitter_lazy_scroll():
# Count initial tweets
initial_count = await bridge.evaluate(
tab_id,
'(function() { return document.querySelectorAll(\'[data-testid="tweet"]\').length; })()'
"(function() { return document.querySelectorAll("
"'[data-testid=\"tweet\"]').length; })()",
)
print(f"Initial tweet count: {initial_count.get('result', 0)}")
@@ -70,7 +70,7 @@ async def test_twitter_lazy_scroll():
print("\n--- Scrolling with waits ---")
for i in range(3):
result = await bridge.scroll(tab_id, "down", 500)
print(f" Scroll {i+1}: {result.get('method', 'unknown')} method")
print(f" Scroll {i + 1}: {result.get('method', 'unknown')} method")
# Wait for new content to load
await asyncio.sleep(2)
@@ -78,20 +78,22 @@ async def test_twitter_lazy_scroll():
# Count tweets after scroll
count_result = await bridge.evaluate(
tab_id,
'(function() { return document.querySelectorAll(\'[data-testid="tweet"]\').length; })()'
"(function() { return document.querySelectorAll("
"'[data-testid=\"tweet\"]').length; })()",
)
count = count_result.get('result', 0)
count = count_result.get("result", 0)
print(f" Tweet count after scroll: {count}")
# Final count
final_count = await bridge.evaluate(
tab_id,
'(function() { return document.querySelectorAll(\'[data-testid="tweet"]\').length; })()'
"(function() { return document.querySelectorAll("
"'[data-testid=\"tweet\"]').length; })()",
)
final = final_count.get('result', 0)
initial = initial_count.get('result', 0)
final = final_count.get("result", 0)
initial = initial_count.get("result", 0)
print(f"\n--- Results ---")
print("\n--- Results ---")
print(f"Initial tweets: {initial}")
print(f"Final tweets: {final}")
@@ -9,7 +9,6 @@ Fix: Find visible modal container (highest z-index scrollable), scroll that
import asyncio
import sys
import time
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent / "tools" / "src"))
@@ -60,7 +59,7 @@ async def test_modal_scroll():
# Click button to open modal
print("\n--- Opening modal ---")
# Find and click the "Open Modal" button
result = await bridge.click(tab_id, '.ws-btn', timeout_ms=5000)
result = await bridge.click(tab_id, ".ws-btn", timeout_ms=5000)
print(f"Click result: {result}")
await asyncio.sleep(1)
@@ -52,7 +52,9 @@ async def test_overlay_click():
<head><title>Overlay Test</title></head>
<body>
<button id="target-btn" onclick="alert('Clicked!')">Click Me</button>
<div id="overlay" style="position:fixed;top:0;left:0;width:100%;height:100%;background:rgba(0,0,0,0.3);z-index:1000;"></div>
<div id="overlay" style="position:fixed;top:0;left:0;
width:100%;height:100%;
background:rgba(0,0,0,0.3);z-index:1000;"></div>
<script>
window.clickCount = 0;
document.getElementById('target-btn').addEventListener('click', () => {
@@ -65,6 +67,7 @@ async def test_overlay_click():
# Navigate to data URL
import base64
data_url = f"data:text/html;base64,{base64.b64encode(test_html.encode()).decode()}"
await bridge.navigate(tab_id, data_url, wait_until="load")
@@ -91,7 +94,7 @@ async def test_overlay_click():
targetElement: btn.tagName
};
})();
"""
""",
)
print(f"Coverage check: {coverage_check.get('result', {})}")
@@ -100,10 +103,7 @@ async def test_overlay_click():
print(f"Click result: {click_result}")
# Check if click registered
count_result = await bridge.evaluate(
tab_id,
"(function() { return window.clickCount; })()"
)
count_result = await bridge.evaluate(tab_id, "(function() { return window.clickCount; })()")
count = count_result.get("result", 0)
print(f"Click count after CDP click: {count}")
@@ -10,7 +10,6 @@ Fix: Use piercing selector (host >>> target) or traverse shadow roots
import asyncio
import sys
import base64
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent / "tools" / "src"))
@@ -100,7 +99,7 @@ async def test_shadow_dom():
});
return { count: hosts.length, hosts };
})();
"""
""",
)
print(f"Shadow DOM detection: {detection.get('result', {})}")
@@ -126,14 +125,13 @@ async def test_shadow_dom():
}
return { success: false, error: 'Button not found' };
})();
"""
""",
)
print(f"JS click result: {click_result.get('result', {})}")
# Verify click was registered
count_result = await bridge.evaluate(
tab_id,
"(function() { return window.shadowClickCount || 0; })()"
tab_id, "(function() { return window.shadowClickCount || 0; })()"
)
count = count_result.get("result") or 0
print(f"Shadow click count: {count}")
@@ -10,7 +10,6 @@ Fix: Focus via JavaScript, use execCommand('insertText') or Input.dispatchKeyEve
import asyncio
import sys
import base64
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent / "tools" / "src"))
@@ -54,10 +53,14 @@ async def test_contenteditable():
<h2>ContentEditable Test</h2>
<h3>1. Simple contenteditable div</h3>
<div id="editor1" contenteditable="true" style="border:1px solid #ccc;padding:10px;min-height:50px;">Start text</div>
<div id="editor1" contenteditable="true"
style="border:1px solid #ccc;padding:10px;
min-height:50px;">Start text</div>
<h3>2. Rich text editor (like Notion)</h3>
<div id="editor2" contenteditable="true" style="border:1px solid #ccc;padding:10px;min-height:50px;">
<div id="editor2" contenteditable="true"
style="border:1px solid #ccc;padding:10px;
min-height:50px;">
<p>Type here...</p>
</div>
@@ -106,7 +109,7 @@ async def test_contenteditable():
ids: Array.from(editables).map(el => el.id)
};
})();
"""
""",
)
print(f"Contenteditable detection: {detection.get('result', {})}")
@@ -115,8 +118,7 @@ async def test_contenteditable():
await bridge.click(tab_id, "#input1")
await bridge.type_text(tab_id, "#input1", "Hello input")
input_result = await bridge.evaluate(
tab_id,
"(function() { return document.getElementById('input1').value; })()"
tab_id, "(function() { return document.getElementById('input1').value; })()"
)
print(f"Input value: {input_result.get('result', '')}")
@@ -126,7 +128,7 @@ async def test_contenteditable():
await bridge.type_text(tab_id, "#editor1", "Hello contenteditable", clear_first=True)
editor_result = await bridge.evaluate(
tab_id,
"(function() { return document.getElementById('editor1').innerText; })()"
"(function() { return document.getElementById('editor1').innerText; })()",
)
print(f"Editor1 innerText: {editor_result.get('result', '')}")
@@ -142,7 +144,7 @@ async def test_contenteditable():
document.execCommand('insertText', false, 'Hello from execCommand');
return editor.innerText;
})();
"""
""",
)
print(f"Editor2 after execCommand: {insert_result.get('result', '')}")
@@ -10,7 +10,6 @@ Fix: Add delay_ms between keystrokes
import asyncio
import sys
import base64
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent / "tools" / "src"))
@@ -87,7 +86,26 @@ async def test_autocomplete():
<div id="log" style="margin-top:20px;font-family:monospace;"></div>
<script>
const countries = ["Afghanistan","Albania","Algeria","Andorra","Angola","Argentina","Armenia","Australia","Austria","Azerbaijan","Bahamas","Bahrain","Bangladesh","Belarus","Belgium","Belize","Benin","Bhutan","Bolivia","Brazil","Canada","China","Colombia","Denmark","Egypt","France","Germany","India","Indonesia","Italy","Japan","Mexico","Netherlands","Nigeria","Norway","Pakistan","Peru","Philippines","Poland","Portugal","Russia","Spain","Sweden","Switzerland","Thailand","Turkey","Ukraine","United Kingdom","United States","Vietnam"];
const countries = [
"Afghanistan","Albania","Algeria",
"Andorra","Angola","Argentina",
"Armenia","Australia","Austria",
"Azerbaijan","Bahamas","Bahrain",
"Bangladesh","Belarus","Belgium",
"Belize","Benin","Bhutan",
"Bolivia","Brazil","Canada",
"China","Colombia","Denmark",
"Egypt","France","Germany",
"India","Indonesia","Italy",
"Japan","Mexico","Netherlands",
"Nigeria","Norway","Pakistan",
"Peru","Philippines","Poland",
"Portugal","Russia","Spain",
"Sweden","Switzerland","Thailand",
"Turkey","Ukraine",
"United Kingdom","United States",
"Vietnam"
];
const input = document.getElementById('search');
const log = document.getElementById('log');
@@ -126,11 +144,15 @@ async def test_autocomplete():
div.setAttribute('class', 'autocomplete-items');
this.parentNode.appendChild(div);
countries.filter(c => c.substr(0, val.length).toUpperCase() === val.toUpperCase())
.slice(0, 5)
.forEach(country => {
countries.filter(
c => c.substr(0, val.length).toUpperCase()
=== val.toUpperCase()
).slice(0, 5).forEach(country => {
const item = document.createElement('div');
item.innerHTML = '<strong>' + country.substr(0, val.length) + '</strong>' + country.substr(val.length);
item.innerHTML = '<strong>'
+ country.substr(0, val.length)
+ '</strong>'
+ country.substr(val.length);
item.addEventListener('click', function() {
input.value = country;
closeAllLists();
@@ -172,16 +194,14 @@ async def test_autocomplete():
await asyncio.sleep(0.5)
fast_result = await bridge.evaluate(
tab_id,
"(function() { return document.getElementById('search').value; })()"
tab_id, "(function() { return document.getElementById('search').value; })()"
)
fast_value = fast_result.get("result", "")
print(f"Value after fast typing: '{fast_value}'")
# Check events
events_result = await bridge.evaluate(
tab_id,
"(function() { return window.inputEvents; })()"
tab_id, "(function() { return window.inputEvents; })()"
)
print(f"Events logged: {events_result.get('result', [])}")
@@ -192,8 +212,7 @@ async def test_autocomplete():
await asyncio.sleep(0.5)
slow_result = await bridge.evaluate(
tab_id,
"(function() { return document.getElementById('search').value; })()"
tab_id, "(function() { return document.getElementById('search').value; })()"
)
slow_value = slow_result.get("result", "")
print(f"Value after slow typing: '{slow_value}'")
@@ -201,7 +220,8 @@ async def test_autocomplete():
# Check if dropdown appeared
dropdown_result = await bridge.evaluate(
tab_id,
"(function() { return document.querySelectorAll('.autocomplete-items div').length; })()"
"(function() { return document.querySelectorAll("
"'.autocomplete-items div').length; })()",
)
dropdown_count = dropdown_result.get("result", 0)
print(f"Dropdown items: {dropdown_count}")
@@ -88,8 +88,7 @@ async def test_huge_dom():
# Count elements
count_result = await bridge.evaluate(
tab_id,
"(function() { return document.querySelectorAll('*').length; })()"
tab_id, "(function() { return document.querySelectorAll('*').length; })()"
)
elem_count = count_result.get("result", 0)
print(f"DOM elements: {elem_count}")
@@ -123,12 +122,13 @@ async def test_huge_dom():
# Test 3: Real LinkedIn
print("\n--- Test 3: Real LinkedIn Feed ---")
await bridge.navigate(tab_id, "https://www.linkedin.com/feed", wait_until="load", timeout_ms=30000)
await bridge.navigate(
tab_id, "https://www.linkedin.com/feed", wait_until="load", timeout_ms=30000
)
await asyncio.sleep(2)
count_result = await bridge.evaluate(
tab_id,
"(function() { return document.querySelectorAll('*').length; })()"
tab_id, "(function() { return document.querySelectorAll('*').length; })()"
)
elem_count = count_result.get("result", 0)
print(f"LinkedIn DOM elements: {elem_count}")
@@ -11,7 +11,6 @@ Fix: Use wait_until="networkidle" or wait_for_selector
import asyncio
import sys
import time
import base64
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent / "tools" / "src"))
@@ -82,9 +81,13 @@ async def test_spa_navigation():
// Render content
const content = {
home: '<h1>Home Page</h1><p>Welcome to the SPA!</p><button id="home-btn">Home Action</button>',
about: '<h1>About Page</h1><p>This is a simulated SPA.</p><button id="about-btn">About Action</button>',
contact: '<h1>Contact Page</h1><p>Contact us at test@example.com</p><button id="contact-btn">Contact Action</button>'
home: '<h1>Home Page</h1><p>Welcome!</p>'
+ '<button id="home-btn">Home Action</button>',
about: '<h1>About Page</h1><p>Simulated SPA.</p>'
+ '<button id="about-btn">About Action</button>',
contact: '<h1>Contact Page</h1>'
+ '<p>Contact us at test@example.com</p>'
+ '<button id="contact-btn">Contact Action</button>'
};
document.getElementById('app').innerHTML = content[page] || '<h1>404</h1>';
@@ -121,7 +124,7 @@ async def test_spa_navigation():
# Check content immediately
content = await bridge.evaluate(
tab_id,
"(function() { return document.getElementById('app').innerText; })()"
"(function() { return document.getElementById('app').innerText; })()",
)
print(f"Content immediately after load: '{content.get('result', '')}'")
@@ -137,7 +140,7 @@ async def test_spa_navigation():
# Check content after wait
content_after = await bridge.evaluate(
tab_id,
"(function() { return document.getElementById('app').innerText; })()"
"(function() { return document.getElementById('app').innerText; })()",
)
print(f"Content after wait: '{content_after.get('result', '')}'")
@@ -151,7 +154,7 @@ async def test_spa_navigation():
# Check if content changed
about_content = await bridge.evaluate(
tab_id,
"(function() { return document.getElementById('app').innerText; })()"
"(function() { return document.getElementById('app').innerText; })()",
)
print(f"Content after SPA nav: '{about_content.get('result', '')}'")
@@ -167,7 +170,7 @@ async def test_spa_navigation():
# Check content immediately
content_networkidle = await bridge.evaluate(
tab_id,
"(function() { return document.getElementById('app').innerText; })()"
"(function() { return document.getElementById('app').innerText; })()",
)
print(f"Content after networkidle: '{content_networkidle.get('result', '')}'")
@@ -44,7 +44,7 @@ def check_png(data: str) -> bool:
"""Verify that base64 data decodes to a valid PNG."""
try:
raw = base64.b64decode(data)
return raw[:8] == b'\x89PNG\r\n\x1a\n'
return raw[:8] == b"\x89PNG\r\n\x1a\n"
except Exception:
return False
@@ -136,7 +136,10 @@ async def test_selector_screenshot(bridge: BeelineBridge, tab_id: int, data_url:
print(" ⚠ WARNING: Selector screenshot not smaller (may be full page)")
return False
else:
print(f" ⚠ NOT IMPLEMENTED: selector param ignored (returns full page) - error={result.get('error')}")
print(
" ⚠ NOT IMPLEMENTED: selector param ignored"
f" (returns full page) - error={result.get('error')}"
)
print(" NOTE: selector parameter exists in signature but is not used in implementation")
return False
@@ -178,7 +181,9 @@ async def test_screenshot_timeout(bridge: BeelineBridge, tab_id: int, data_url:
print(f" ⚠ Fast enough to beat timeout: {err!r} in {elapsed:.3f}s")
return True # Not a failure, just fast
else:
print(f" ⚠ Screenshot completed before timeout ({elapsed:.3f}s) - too fast to test timeout")
print(
f" ⚠ Screenshot completed before timeout ({elapsed:.3f}s) - too fast to test timeout"
)
return True # Still ok, just very fast
@@ -218,7 +223,7 @@ async def main():
if bridge.is_connected:
print("✓ Extension connected!")
break
print(f"Waiting for extension... ({i+1}/10)")
print(f"Waiting for extension... ({i + 1}/10)")
else:
print("✗ Extension not connected. Ensure Chrome with Beeline extension is running.")
return
@@ -97,7 +97,7 @@ async def test_problematic_site(bridge: BeelineBridge, tab_id: int) -> dict:
});
return results;
})();
"""
""",
)
print(f" Before scroll: {before.get('result', {})}")
@@ -126,7 +126,7 @@ async def test_problematic_site(bridge: BeelineBridge, tab_id: int) -> dict:
});
return results;
})();
"""
""",
)
print(f" After scroll: {after.get('result', {})}")
@@ -137,8 +137,14 @@ async def test_problematic_site(bridge: BeelineBridge, tab_id: int) -> dict:
changed = False
for key in after_data:
if key in before_data:
b_val = before_data[key].get("scrollTop", 0) if isinstance(before_data[key], dict) else 0
a_val = after_data[key].get("scrollTop", 0) if isinstance(after_data[key], dict) else 0
b_val = (
before_data[key].get("scrollTop", 0)
if isinstance(before_data[key], dict)
else 0
)
a_val = (
after_data[key].get("scrollTop", 0) if isinstance(after_data[key], dict) else 0
)
if a_val != b_val:
print(f" ✓ CHANGE DETECTED: {key} scrolled from {b_val} to {a_val}")
changed = True
@@ -194,7 +200,7 @@ async def detect_root_cause(bridge: BeelineBridge, tab_id: int) -> dict:
largest: candidates[0]
};
})();
"""
""",
)
detections["nested_scroll"] = scroll_check.get("result", {})
print(f" Nested scroll containers: {detections['nested_scroll']}")
@@ -212,7 +218,7 @@ async def detect_root_cause(bridge: BeelineBridge, tab_id: int) -> dict:
});
return { count: withShadow.length, elements: withShadow.slice(0, 5) };
})();
"""
""",
)
detections["shadow_dom"] = shadow_check.get("result", {})
print(f" Shadow DOM: {detections['shadow_dom']}")
@@ -225,7 +231,7 @@ async def detect_root_cause(bridge: BeelineBridge, tab_id: int) -> dict:
const iframes = document.querySelectorAll('iframe');
return { count: iframes.length };
})();
"""
""",
)
detections["iframes"] = iframe_check.get("result", {})
print(f" iframes: {detections['iframes']}")
@@ -240,7 +246,7 @@ async def detect_root_cause(bridge: BeelineBridge, tab_id: int) -> dict:
body_children: document.body.children.length
};
})();
"""
""",
)
detections["dom_size"] = dom_check.get("result", {})
print(f" DOM size: {detections['dom_size']}")
@@ -256,7 +262,7 @@ async def detect_root_cause(bridge: BeelineBridge, tab_id: int) -> dict:
angular: !!document.querySelector('[ng-app], [ng-version]')
};
})();
"""
""",
)
detections["frameworks"] = framework_check.get("result", {})
print(f" Frameworks: {detections['frameworks']}")
@@ -289,7 +295,7 @@ async def main():
if bridge.is_connected:
print("✓ Extension connected!")
break
print(f"Waiting for extension... ({i+1}/10)")
print(f"Waiting for extension... ({i + 1}/10)")
else:
print("✗ Extension not connected. Ensure Chrome with Beeline extension is running.")
return
+7 -10
View File
@@ -88,16 +88,13 @@ _QUEEN_PLANNING_TOOLS = [
]
# Building phase: full coding + agent construction tools.
_QUEEN_BUILDING_TOOLS = (
_SHARED_TOOLS
+ [
"load_built_agent",
"list_credentials",
"replan_agent",
"save_agent_draft", # Re-draft during building → auto-dissolves + updates flowchart
"save_global_memory",
]
)
_QUEEN_BUILDING_TOOLS = _SHARED_TOOLS + [
"load_built_agent",
"list_credentials",
"replan_agent",
"save_agent_draft", # Re-draft during building → auto-dissolves + updates flowchart
"save_global_memory",
]
# Staging phase: agent loaded but not yet running — inspect, configure, launch.
# No backward transitions — staging only goes forward to running.
+5 -17
View File
@@ -11,7 +11,6 @@ have already been processed by the reflection agent.
from __future__ import annotations
import json
import logging
import re
import shutil
@@ -262,8 +261,7 @@ def validate_global_memory_payload(
parsed = parse_global_memory_category(category)
if parsed is None:
raise ValueError(
"Invalid global memory category. Use one of: "
+ ", ".join(GLOBAL_MEMORY_CATEGORIES)
"Invalid global memory category. Use one of: " + ", ".join(GLOBAL_MEMORY_CATEGORIES)
)
if not description.strip():
raise ValueError("Global memory description cannot be empty.")
@@ -304,9 +302,7 @@ def save_global_memory(
body=content,
)
if len(doc.encode("utf-8")) > MAX_FILE_SIZE_BYTES:
raise ValueError(
f"Global memory entry exceeds the {MAX_FILE_SIZE_BYTES} byte limit."
)
raise ValueError(f"Global memory entry exceeds the {MAX_FILE_SIZE_BYTES} byte limit.")
path = target_dir / filename
path.write_text(doc, encoding="utf-8")
return filename, path
@@ -316,6 +312,7 @@ def save_global_memory(
# Manifest formatting
# ---------------------------------------------------------------------------
def _age_label(mtime: float) -> str:
"""Human-readable age string from an mtime."""
age_days = memory_age_days(mtime)
@@ -483,10 +480,7 @@ def migrate_shared_v2_memories(
if not src.is_dir():
return
md_files = sorted(
f for f in src.glob("*.md")
if f.is_file() and not f.name.startswith(".")
)
md_files = sorted(f for f in src.glob("*.md") if f.is_file() and not f.name.startswith("."))
if not md_files:
marker.write_text("no shared memories found\n", encoding="utf-8")
return
@@ -533,13 +527,7 @@ def _write_migration_file(
) -> None:
"""Write a single migrated memory file with frontmatter."""
# Truncate body to respect file size limit (leave room for frontmatter).
header = (
f"---\n"
f"name: {name}\n"
f"description: {description}\n"
f"type: {mem_type}\n"
f"---\n\n"
)
header = f"---\nname: {name}\ndescription: {description}\ntype: {mem_type}\n---\n\n"
max_body = MAX_FILE_SIZE_BYTES - len(header.encode("utf-8"))
if len(body.encode("utf-8")) > max_body:
# Rough truncation — cut at character level then trim to last newline.
+74 -24
View File
@@ -149,12 +149,11 @@ def _inject_last_modified_by(content: str, caller: str) -> str:
fm_body = m.group(1)
# Remove existing last_modified_by line if present.
fm_lines = [
ln for ln in fm_body.splitlines()
if not ln.strip().lower().startswith("last_modified_by")
ln for ln in fm_body.splitlines() if not ln.strip().lower().startswith("last_modified_by")
]
fm_lines.append(f"last_modified_by: {caller}")
new_fm = "\n".join(fm_lines)
return f"---\n{new_fm}\n---{content[m.end():]}"
return f"---\n{new_fm}\n---{content[m.end() :]}"
def _execute_tool(name: str, args: dict[str, Any], memory_dir: Path, caller: str) -> str:
@@ -208,7 +207,9 @@ def _execute_tool(name: str, args: dict[str, Any], memory_dir: Path, caller: str
return f"ERROR: File cap reached ({MAX_FILES}). Delete a file first."
memory_dir.mkdir(parents=True, exist_ok=True)
path.write_text(content, encoding="utf-8")
logger.debug("reflect: tool write_memory_file [%s] → %s (%d chars)", caller, filename, len(content))
logger.debug(
"reflect: tool write_memory_file [%s] → %s (%d chars)", caller, filename, len(content)
)
return f"Wrote {filename} ({len(content)} chars)."
if name == "delete_memory_file":
@@ -262,7 +263,10 @@ async def _reflection_loop(
preview = user_content[:300] if isinstance(user_content, str) else str(user_content)[:300]
logger.debug(
"reflect: turn %d — sending %d messages to LLM, last msg role=%s, preview=%s",
_turn, len(messages), messages[-1].get("role", "?") if messages else "?", preview,
_turn,
len(messages),
messages[-1].get("role", "?") if messages else "?",
preview,
)
try:
@@ -282,13 +286,23 @@ async def _reflection_loop(
tool_calls_raw = resp.raw_response.get("tool_calls", [])
# Log the full LLM response for debugging.
raw_keys = list(resp.raw_response.keys()) if isinstance(resp.raw_response, dict) else type(resp.raw_response).__name__
raw_keys = (
list(resp.raw_response.keys())
if isinstance(resp.raw_response, dict)
else type(resp.raw_response).__name__
)
logger.debug(
"reflect: turn %d — LLM response: content=%r (len=%d), stop_reason=%s, "
"tool_calls=%d, model=%s, tokens=%d/%d, raw_keys=%s",
_turn, (resp.content or "")[:200], len(resp.content or ""),
resp.stop_reason, len(tool_calls_raw), resp.model,
resp.input_tokens, resp.output_tokens, raw_keys,
_turn,
(resp.content or "")[:200],
len(resp.content or ""),
resp.stop_reason,
len(tool_calls_raw),
resp.model,
resp.input_tokens,
resp.output_tokens,
raw_keys,
)
# Accumulate non-empty text across turns so we don't lose a reason
# given alongside tool calls on an earlier turn.
@@ -320,7 +334,12 @@ async def _reflection_loop(
break
# Execute each tool call and append results.
logger.debug("reflect: turn %d — executing %d tool call(s): %s", _turn + 1, len(tool_calls_raw), [tc["name"] for tc in tool_calls_raw])
logger.debug(
"reflect: turn %d — executing %d tool call(s): %s",
_turn + 1,
len(tool_calls_raw),
[tc["name"] for tc in tool_calls_raw],
)
for tc in tool_calls_raw:
result = _execute_tool(tc["name"], tc.get("input", {}), memory_dir, caller)
# Track files that were written or deleted.
@@ -328,11 +347,13 @@ async def _reflection_loop(
fname = tc.get("input", {}).get("filename", "")
if fname and not result.startswith("ERROR"):
changed_files.append(fname)
messages.append({
"role": "tool",
"tool_call_id": tc["id"],
"content": result,
})
messages.append(
{
"role": "tool",
"tool_call_id": tc["id"],
"content": result,
}
)
return True, changed_files, last_text
@@ -351,7 +372,7 @@ Your goal: identify anything from the recent messages worth remembering across
future sessions user preferences, project context, techniques that worked,
goals, environment details, reference pointers.
Memory types: {', '.join(MEMORY_TYPES)}
Memory types: {", ".join(MEMORY_TYPES)}
Expected format for each memory file:
{_FRONTMATTER_EXAMPLE}
@@ -382,7 +403,7 @@ You are a reflection agent performing a periodic housekeeping pass over the
memory directory. Your job is to organise, deduplicate, and trim noise from
the accumulated memory files.
Memory types: {', '.join(MEMORY_TYPES)}
Memory types: {", ".join(MEMORY_TYPES)}
Expected format for each memory file:
{_FRONTMATTER_EXAMPLE}
@@ -472,12 +493,20 @@ async def run_short_reflection(
)
_, changed, reason = await _reflection_loop(
llm, _SHORT_REFLECT_SYSTEM, user_msg, mem_dir, caller=caller,
llm,
_SHORT_REFLECT_SYSTEM,
user_msg,
mem_dir,
caller=caller,
)
if changed:
logger.debug("reflect: short reflection done [%s], changed files: %s", caller, changed)
else:
logger.debug("reflect: short reflection done [%s], no changes — %s", caller, reason or "no reason given")
logger.debug(
"reflect: short reflection done [%s], no changes — %s",
caller,
reason or "no reason given",
)
async def run_long_reflection(
@@ -503,12 +532,26 @@ async def run_long_reflection(
)
_, changed, reason = await _reflection_loop(
llm, _LONG_REFLECT_SYSTEM, user_msg, mem_dir, caller=caller,
llm,
_LONG_REFLECT_SYSTEM,
user_msg,
mem_dir,
caller=caller,
)
if changed:
logger.debug("reflect: long reflection done [%s] (%d files), changed files: %s", caller, len(files), changed)
logger.debug(
"reflect: long reflection done [%s] (%d files), changed files: %s",
caller,
len(files),
changed,
)
else:
logger.debug("reflect: long reflection done [%s] (%d files), no changes — %s", caller, len(files), reason or "no reason given")
logger.debug(
"reflect: long reflection done [%s] (%d files), no changes — %s",
caller,
len(files),
reason or "no reason given",
)
async def run_diary_update(
@@ -627,7 +670,8 @@ async def subscribe_reflection_triggers(
if is_tool_turn and not is_interval:
logger.debug(
"reflect: skipping turn %d (stop_reason=%s, next reflect at %d)",
_short_count, stop_reason,
_short_count,
stop_reason,
(_short_count // _LONG_REFLECT_INTERVAL + 1) * _LONG_REFLECT_INTERVAL,
)
return
@@ -638,7 +682,12 @@ async def subscribe_reflection_triggers(
async with _lock:
try:
logger.debug("reflect: turn complete — count %d/%d (stop_reason=%s)", _short_count, _LONG_REFLECT_INTERVAL, stop_reason)
logger.debug(
"reflect: turn complete — count %d/%d (stop_reason=%s)",
_short_count,
_LONG_REFLECT_INTERVAL,
stop_reason,
)
if is_interval:
await run_short_reflection(session_dir, llm, mem_dir, caller="queen")
await run_long_reflection(llm, mem_dir, caller="queen")
@@ -659,6 +708,7 @@ async def subscribe_reflection_triggers(
if phase_state is not None:
try:
from framework.agents.queen.recall_selector import update_recall_cache
await update_recall_cache(
session_dir,
llm,
+3 -1
View File
@@ -307,7 +307,9 @@ def build_node_context_from_graph_context(
accounts_data=gc.accounts_data,
tool_provider_map=gc.tool_provider_map,
fallback_to_default_accounts_prompt=fallback_to_default_accounts_prompt,
identity_prompt=identity_prompt if identity_prompt is not None else getattr(gc.graph, "identity_prompt", "") or "",
identity_prompt=identity_prompt
if identity_prompt is not None
else getattr(gc.graph, "identity_prompt", "") or "",
narrative=narrative,
execution_id=gc.execution_id,
run_id=gc.run_id,
+1
View File
@@ -302,6 +302,7 @@ Respond with ONLY a JSON object:
return result
class GraphSpec(BaseModel):
"""
Complete specification of an agent graph.
@@ -153,7 +153,10 @@ async def drain_injection_queue(
) -> int:
"""Drain all pending injected events as user messages. Returns count."""
count = 0
logger.debug("[drain_injection_queue] Starting to drain queue, initial queue size: %s", queue.qsize() if hasattr(queue, 'qsize') else 'unknown')
logger.debug(
"[drain_injection_queue] Starting to drain queue, initial queue size: %s",
queue.qsize() if hasattr(queue, "qsize") else "unknown",
)
while not queue.empty():
try:
content, is_client_input, image_content = queue.get_nowait()
-2
View File
@@ -11,8 +11,6 @@ from typing import Any, Literal, Protocol, runtime_checkable
from framework.graph.conversation import (
ConversationStore,
get_run_cursor,
update_run_cursor,
)
logger = logging.getLogger(__name__)
+77 -25
View File
@@ -306,7 +306,11 @@ class EventLoopNode(NodeProtocol):
async def execute(self, ctx: NodeContext) -> NodeResult:
"""Run the event loop."""
logger.debug("[EventLoopNode.execute] Starting execution for node=%s, stream=%s", ctx.node_id, ctx.stream_id)
logger.debug(
"[EventLoopNode.execute] Starting execution for node=%s, stream=%s",
ctx.node_id,
ctx.stream_id,
)
start_time = time.time()
total_input_tokens = 0
total_output_tokens = 0
@@ -315,7 +319,12 @@ class EventLoopNode(NodeProtocol):
execution_id = ctx.execution_id or ""
# Store skill dirs for AS-9 file-read interception in _execute_tool
self._skill_dirs: list[str] = ctx.skill_dirs
logger.debug("[EventLoopNode.execute] node_id=%s, execution_id=%s, max_iterations=%d", node_id, execution_id, self._config.max_iterations)
logger.debug(
"[EventLoopNode.execute] node_id=%s, execution_id=%s, max_iterations=%d",
node_id,
execution_id,
self._config.max_iterations,
)
# DS-13: context preservation warning state
_context_warn_sent = False
@@ -555,7 +564,9 @@ class EventLoopNode(NodeProtocol):
_consecutive_empty_turns: int = 0
# 6. Main loop
logger.debug("[EventLoopNode.execute] Entering main loop, start_iteration=%d", start_iteration)
logger.debug(
"[EventLoopNode.execute] Entering main loop, start_iteration=%d", start_iteration
)
for iteration in range(start_iteration, self._config.max_iterations):
iter_start = time.time()
logger.debug("[EventLoopNode.execute] iteration=%d starting", iteration)
@@ -589,12 +600,22 @@ class EventLoopNode(NodeProtocol):
)
# 6b. Drain injection queue
logger.debug("[EventLoopNode.execute] iteration=%d: draining injection queue...", iteration)
logger.debug(
"[EventLoopNode.execute] iteration=%d: draining injection queue...", iteration
)
drained_injections = await self._drain_injection_queue(conversation, ctx)
logger.debug("[EventLoopNode.execute] iteration=%d: drained %d injections", iteration, drained_injections)
logger.debug(
"[EventLoopNode.execute] iteration=%d: drained %d injections",
iteration,
drained_injections,
)
# 6b1. Drain trigger queue (framework-level signals)
drained_triggers = await self._drain_trigger_queue(conversation)
logger.debug("[EventLoopNode.execute] iteration=%d: drained %d triggers", iteration, drained_triggers)
logger.debug(
"[EventLoopNode.execute] iteration=%d: drained %d triggers",
iteration,
drained_triggers,
)
# Resume blocked ask_user/auto-block waits durably across restarts.
# If the node was parked for input and no new message has been
@@ -648,7 +669,8 @@ class EventLoopNode(NodeProtocol):
)
if self._injection_queue.empty() and self._trigger_queue.empty():
logger.info(
"[%s] iter=%d: pending-input wait woke without queued input; re-waiting",
"[%s] iter=%d: pending-input wait woke"
" without queued input; re-waiting",
node_id,
iteration,
)
@@ -720,14 +742,20 @@ class EventLoopNode(NodeProtocol):
iteration,
len(conversation.messages),
)
logger.debug("[EventLoopNode.execute] iteration=%d: entering _run_single_turn loop", iteration)
logger.debug(
"[EventLoopNode.execute] iteration=%d: entering _run_single_turn loop", iteration
)
_stream_retry_count = 0
_turn_cancelled = False
_llm_turn_failed_waiting_input = False
_turn_t0 = time.monotonic()
while True:
try:
logger.debug("[EventLoopNode.execute] iteration=%d: calling _run_single_turn (retry=%d)", iteration, _stream_retry_count)
logger.debug(
"[EventLoopNode.execute] iteration=%d: calling _run_single_turn (retry=%d)",
iteration,
_stream_retry_count,
)
(
assistant_text,
real_tool_results,
@@ -744,7 +772,11 @@ class EventLoopNode(NodeProtocol):
) = await self._run_single_turn(
ctx, conversation, tools, iteration, accumulator
)
logger.debug("[EventLoopNode.execute] iteration=%d: _run_single_turn completed successfully", iteration)
logger.debug(
"[EventLoopNode.execute] iteration=%d:"
" _run_single_turn completed successfully",
iteration,
)
_turn_ms = int((time.monotonic() - _turn_t0) * 1000)
logger.info(
"[%s] iter=%d: LLM done (%dms) — text=%d chars, real_tools=%d, "
@@ -815,7 +847,13 @@ class EventLoopNode(NodeProtocol):
break
except Exception as e:
logger.debug("[EventLoopNode.execute] iteration=%d: Exception in _run_single_turn: %s (%s)", iteration, type(e).__name__, str(e)[:200])
logger.debug(
"[EventLoopNode.execute] iteration=%d:"
" Exception in _run_single_turn: %s (%s)",
iteration,
type(e).__name__,
str(e)[:200],
)
# Retry transient errors with exponential backoff
if (
self._is_transient_error(e)
@@ -1950,11 +1988,13 @@ class EventLoopNode(NodeProtocol):
image_content: Optional list of OpenAI-style image blocks to attach.
"""
logger.debug(
"[EventLoopNode.inject_event] content_len=%d, is_client_input=%s, has_images=%s, queue_size_before=%d",
"[EventLoopNode.inject_event] content_len=%d,"
" is_client_input=%s, has_images=%s,"
" queue_size_before=%d",
len(content) if content else 0,
is_client_input,
bool(image_content),
self._injection_queue.qsize() if hasattr(self._injection_queue, 'qsize') else -1,
self._injection_queue.qsize() if hasattr(self._injection_queue, "qsize") else -1,
)
try:
await self._injection_queue.put((content, is_client_input, image_content))
@@ -2126,7 +2166,12 @@ class EventLoopNode(NodeProtocol):
# to "". Without this, all calls share the same message ID on the
# frontend and the second call's text silently replaces the first.
inner_turn = 0
logger.debug("[_run_single_turn] node_id=%s, tools_count=%d, execution_id=%s", node_id, len(tools), execution_id)
logger.debug(
"[_run_single_turn] node_id=%s, tools_count=%d, execution_id=%s",
node_id,
len(tools),
execution_id,
)
# Inner tool loop: stream may produce tool calls requiring re-invocation
while True:
@@ -2161,7 +2206,12 @@ class EventLoopNode(NodeProtocol):
tool_calls: list[ToolCallEvent] = []
_stream_error: StreamErrorEvent | None = None
logger.debug("[_run_single_turn] inner_turn=%d: Starting LLM stream with %d messages, %d tools", inner_turn, len(messages), len(tools))
logger.debug(
"[_run_single_turn] inner_turn=%d: Starting LLM stream with %d messages, %d tools",
inner_turn,
len(messages),
len(tools),
)
# Stream LLM response in a child task so cancel_current_turn()
# can kill it instantly without terminating the queen's main loop.
@@ -2210,10 +2260,14 @@ class EventLoopNode(NodeProtocol):
_llm_stream_t0 = time.monotonic()
self._stream_task = asyncio.create_task(_do_stream())
logger.debug("[_run_single_turn] inner_turn=%d: Stream task created, waiting...", inner_turn)
logger.debug(
"[_run_single_turn] inner_turn=%d: Stream task created, waiting...", inner_turn
)
try:
await self._stream_task
logger.debug("[_run_single_turn] inner_turn=%d: Stream task completed normally", inner_turn)
logger.debug(
"[_run_single_turn] inner_turn=%d: Stream task completed normally", inner_turn
)
except asyncio.CancelledError:
logger.debug("[_run_single_turn] inner_turn=%d: Stream task cancelled", inner_turn)
if accumulated_text:
@@ -2229,7 +2283,9 @@ class EventLoopNode(NodeProtocol):
raise
raise TurnCancelled() from None
except Exception as e:
logger.exception("[_run_single_turn] inner_turn=%d: Stream task failed: %s", inner_turn, e)
logger.exception(
"[_run_single_turn] inner_turn=%d: Stream task failed: %s", inner_turn, e
)
raise
finally:
self._stream_task = None
@@ -2728,10 +2784,7 @@ class EventLoopNode(NodeProtocol):
return
try:
await asyncio.wait_for(
_activity_event.wait(),
timeout=_remaining
)
await asyncio.wait_for(_activity_event.wait(), timeout=_remaining)
_activity_event.clear()
except TimeoutError:
# Check again in case activity happened during wait
@@ -2762,6 +2815,7 @@ class EventLoopNode(NodeProtocol):
_sub_id = None
if self._event_bus and _activity_timeout > 0:
from framework.runtime.event_bus import EventType
_sub_id = self._event_bus.subscribe(
event_types=[
EventType.TOOL_CALL_STARTED,
@@ -2783,9 +2837,7 @@ class EventLoopNode(NodeProtocol):
# Use activity-based timeout with wall-clock max
_result_coro = _run_with_activity_timeout(_coro)
if _wall_timeout > 0:
_r = await asyncio.wait_for(
_result_coro, timeout=_wall_timeout
)
_r = await asyncio.wait_for(_result_coro, timeout=_wall_timeout)
else:
_r = await _result_coro
elif _wall_timeout > 0:
+63 -39
View File
@@ -18,17 +18,17 @@ from typing import Any
from framework.graph.checkpoint_config import CheckpointConfig
from framework.graph.context import GraphContext, build_node_context
from framework.graph.conversation import LEGACY_RUN_ID
from framework.graph.edge import EdgeCondition, EdgeSpec, GraphSpec
from framework.graph.goal import Goal
from framework.graph.conversation import LEGACY_RUN_ID, get_run_cursor
from framework.graph.node import (
DataBuffer,
NodeProtocol,
NodeResult,
NodeSpec,
DataBuffer,
)
from framework.graph.validator import OutputValidator
from framework.llm.provider import LLMProvider, Tool, ToolUse
from framework.llm.provider import LLMProvider, Tool
from framework.observability import set_trace_context
from framework.runtime.core import Runtime
from framework.schemas.checkpoint import Checkpoint
@@ -205,7 +205,14 @@ class GraphExecutor:
self.approval_callback = approval_callback
self.validator = OutputValidator()
self.logger = logging.getLogger(__name__)
self.logger.debug("[GraphExecutor.__init__] Created with stream_id=%s, execution_id=%s, initial node_registry keys: %s", stream_id, execution_id, list(self.node_registry.keys()))
self.logger.debug(
"[GraphExecutor.__init__] Created with"
" stream_id=%s, execution_id=%s,"
" initial node_registry keys: %s",
stream_id,
execution_id,
list(self.node_registry.keys()),
)
self._event_bus = event_bus
self._stream_id = stream_id
self._execution_id = execution_id or getattr(runtime, "execution_id", "")
@@ -529,13 +536,15 @@ class GraphExecutor:
# Continuous conversation mode state
is_continuous = getattr(graph, "conversation_mode", "isolated") == "continuous"
continuous_conversation = None # NodeConversation threaded across nodes
cumulative_tools: list = [] # Tools accumulate, never removed
cumulative_tool_names: set[str] = set()
cumulative_output_keys: list[str] = [] # Output keys from all visited nodes
continuous_conversation = None # NodeConversation threaded across nodes # noqa: F841
cumulative_tools: list = [] # Tools accumulate, never removed # noqa: F841
cumulative_tool_names: set[str] = set() # noqa: F841
cumulative_output_keys: list[str] = [] # noqa: F841
# Build node registry for subagent lookup
node_registry: dict[str, NodeSpec] = {node.id: node for node in graph.nodes}
node_registry: dict[str, NodeSpec] = { # noqa: F841
node.id: node for node in graph.nodes
}
# Initialize checkpoint store if checkpointing is enabled
checkpoint_store: CheckpointStore | None = None
@@ -575,10 +584,7 @@ class GraphExecutor:
# input_data would overwrite intermediate results with stale values.
_is_resuming = bool(
session_state
and (
session_state.get("paused_at")
or session_state.get("resume_from_checkpoint")
)
and (session_state.get("paused_at") or session_state.get("resume_from_checkpoint"))
)
if input_data and not _is_resuming:
for key, value in input_data.items():
@@ -588,9 +594,9 @@ class GraphExecutor:
_event_triggered = bool(input_data and isinstance(input_data.get("event"), dict))
path: list[str] = []
total_tokens = 0
total_latency = 0
node_retry_counts: dict[str, int] = {} # Track retries per node
total_tokens = 0 # noqa: F841
total_latency = 0 # noqa: F841
node_retry_counts: dict[str, int] = {} # noqa: F841
node_visit_counts: dict[str, int] = {} # Track visits for feedback loops
_is_retry = False # True when looping back for a retry (not a new visit)
@@ -661,7 +667,7 @@ class GraphExecutor:
else:
current_node_id = graph.get_entry_point(session_state)
steps = 0
steps = 0 # noqa: F841
if session_state and current_node_id != graph.entry_node:
self.logger.info(f"🔄 Resuming from: {current_node_id}")
@@ -719,7 +725,6 @@ class GraphExecutor:
ToolRegistry.reset_execution_context(_ctx_token)
VALID_NODE_TYPES = {
"event_loop",
"gcu",
@@ -739,9 +744,17 @@ class GraphExecutor:
"""Get or create a node implementation."""
# Check registry first
if node_spec.id in self.node_registry:
logger.debug("[GraphExecutor._get_node_implementation] Found node '%s' in registry", node_spec.id)
logger.debug(
"[GraphExecutor._get_node_implementation] Found node '%s' in registry", node_spec.id
)
return self.node_registry[node_spec.id]
logger.debug("[GraphExecutor._get_node_implementation] Node '%s' not in registry (keys: %s), creating new", node_spec.id, list(self.node_registry.keys()))
logger.debug(
"[GraphExecutor._get_node_implementation]"
" Node '%s' not in registry (keys: %s),"
" creating new",
node_spec.id,
list(self.node_registry.keys()),
)
# Reject removed node types with migration guidance
if node_spec.node_type in self.REMOVED_NODE_TYPES:
@@ -807,7 +820,13 @@ class GraphExecutor:
)
# Cache so inject_event() is reachable for queen interaction and escalation routing
self.node_registry[node_spec.id] = node
logger.debug("[GraphExecutor._get_node_implementation] Cached node '%s' in node_registry, registry now has keys: %s", node_spec.id, list(self.node_registry.keys()))
logger.debug(
"[GraphExecutor._get_node_implementation]"
" Cached node '%s' in node_registry,"
" registry now has keys: %s",
node_spec.id,
list(self.node_registry.keys()),
)
return node
# Should never reach here due to validation above
@@ -1408,8 +1427,7 @@ class GraphExecutor:
if any(w.lifecycle == WorkerLifecycle.RUNNING for w in workers.values()):
return False
return any(
tid in completed_terminals or tid in failed_workers
for tid in terminal_worker_ids
tid in completed_terminals or tid in failed_workers for tid in terminal_worker_ids
)
def _mark_quiescent_terminal_failure() -> bool:
@@ -1419,8 +1437,7 @@ class GraphExecutor:
if any(w.lifecycle == WorkerLifecycle.RUNNING for w in workers.values()):
return False
if any(
tid in completed_terminals or tid in failed_workers
for tid in terminal_worker_ids
tid in completed_terminals or tid in failed_workers for tid in terminal_worker_ids
):
return False
execution_error = (
@@ -1432,7 +1449,9 @@ class GraphExecutor:
# Track fan-out branch workers for per-branch timeout enforcement
_fanout_branch_tasks: dict[str, asyncio.Task] = {} # worker_id → timeout-wrapper task
branch_timeout = self._parallel_config.branch_timeout_seconds if self._parallel_config else 300.0
branch_timeout = (
self._parallel_config.branch_timeout_seconds if self._parallel_config else 300.0
)
def _route_activation(
activation: Activation,
@@ -1468,8 +1487,7 @@ class GraphExecutor:
if target_worker._task is not None:
# Fan-out branch: wrap with timeout
is_fanout_branch = any(
tag.via_branch == activation.target_id
for tag in activation.fan_out_tags
tag.via_branch == activation.target_id for tag in activation.fan_out_tags
)
if is_fanout_branch and branch_timeout > 0:
timed_task = asyncio.ensure_future(
@@ -1526,14 +1544,15 @@ class GraphExecutor:
gc.continuous_conversation = completion.conversation
self.logger.info(
f" ✓ Worker completed: {worker_id} "
f"({len(activations)} outgoing activation(s))"
f" ✓ Worker completed: {worker_id} ({len(activations)} outgoing activation(s))"
)
# Route activations to target workers
for activation in activations:
_route_activation(
activation, workers, {},
activation,
workers,
{},
has_event_subscription=True,
)
@@ -1567,8 +1586,8 @@ class GraphExecutor:
completion_event.set()
# Subscribe to events (only if event bus has subscribe capability)
has_event_subscription = (
self._event_bus is not None and hasattr(self._event_bus, "subscribe")
has_event_subscription = self._event_bus is not None and hasattr(
self._event_bus, "subscribe"
)
if has_event_subscription:
sub_completed = self._event_bus.subscribe(
@@ -1597,9 +1616,7 @@ class GraphExecutor:
else:
# No event bus: wait on worker tasks directly and route completions inline.
pending_tasks: dict[str, asyncio.Task] = {
wid: w._task
for wid, w in workers.items()
if w._task is not None
wid: w._task for wid, w in workers.items() if w._task is not None
}
while True:
if _check_graph_done():
@@ -1651,7 +1668,10 @@ class GraphExecutor:
task_error = exc
# Check for fan-out branch timeout
if isinstance(task_error, asyncio.TimeoutError) and wid in _fanout_branch_tasks:
if (
isinstance(task_error, asyncio.TimeoutError)
and wid in _fanout_branch_tasks
):
error = f"Branch failed (timed out after {branch_timeout}s)"
failed_workers[wid] = error
worker.lifecycle = WorkerLifecycle.FAILED
@@ -1716,7 +1736,9 @@ class GraphExecutor:
# Route activations
for activation in outgoing_activations:
_route_activation(
activation, workers, pending_tasks,
activation,
workers,
pending_tasks,
has_event_subscription=False,
)
@@ -1744,7 +1766,9 @@ class GraphExecutor:
if outgoing_activations:
for activation in outgoing_activations:
_route_activation(
activation, workers, pending_tasks,
activation,
workers,
pending_tasks,
has_event_subscription=False,
)
elif task_error is not None:
+1 -1
View File
@@ -153,7 +153,7 @@ wrong place.
| Space | Used by | How to get |
|---|---|---|
| Physical pixels | `browser_click_coordinate`, `browser_hover_coordinate` | `browser_coords` `physical_x/y`, or CSS × DPR |
| Physical pixels | `browser_click_coordinate` | `browser_coords` `physical_x/y` |
| CSS pixels | `getBoundingClientRect()`, `elementFromPoint` | `browser_coords` `css_x/y` |
| Screenshot pixels | What you see in the 800px image | Raw position in screenshot |
+1 -3
View File
@@ -19,7 +19,6 @@ from framework.graph.prompting import (
)
if TYPE_CHECKING:
from framework.graph.edge import GraphSpec
from framework.graph.node import DataBuffer, NodeSpec
@@ -136,8 +135,7 @@ def build_transition_marker(
)
from framework.graph.prompting import build_transition_message
from framework.graph.prompting import build_transition_message # noqa: E402
__all__ = [
"EXECUTION_SCOPE_PREAMBLE",
+7 -7
View File
@@ -104,7 +104,9 @@ def build_accounts_prompt(
tools_for_provider = sorted(provider_tools.get(provider, []))
if node_tool_set is not None:
relevant_tools = [tool_name for tool_name in tools_for_provider if tool_name in node_tool_set]
relevant_tools = [
tool_name for tool_name in tools_for_provider if tool_name in node_tool_set
]
if not relevant_tools:
continue
tools_for_provider = relevant_tools
@@ -153,7 +155,9 @@ def build_prompt_spec_from_node_context(
resolved_memory_prompt = getattr(ctx, "memory_prompt", "") or ""
return NodePromptSpec(
identity_prompt=ctx.identity_prompt or "",
focus_prompt=focus_prompt if focus_prompt is not None else (ctx.node_spec.system_prompt or ""),
focus_prompt=focus_prompt
if focus_prompt is not None
else (ctx.node_spec.system_prompt or ""),
narrative=narrative if narrative is not None else (ctx.narrative or ""),
accounts_prompt=ctx.accounts_prompt or "",
skills_catalog_prompt=ctx.skills_catalog_prompt or "",
@@ -191,11 +195,7 @@ def build_system_prompt(spec: NodePromptSpec) -> str:
if spec.narrative:
parts.append(f"\n--- Context (what has happened so far) ---\n{spec.narrative}")
if (
not spec.is_subagent_mode
and spec.node_type in ("event_loop", "gcu")
and spec.output_keys
):
if not spec.is_subagent_mode and spec.node_type in ("event_loop", "gcu") and spec.output_keys:
parts.append(f"\n{EXECUTION_SCOPE_PREAMBLE}")
if spec.node_type == "gcu":
+39 -30
View File
@@ -256,7 +256,9 @@ class WorkerAgent:
if node_spec.max_node_visits > 0 and visit_count > node_spec.max_node_visits:
logger.info(
"Worker %s: visit %d exceeds max_node_visits=%d, skipping",
node_spec.id, visit_count, node_spec.max_node_visits,
node_spec.id,
visit_count,
node_spec.max_node_visits,
)
# Build a synthetic success result from current buffer state
existing_output: dict[str, Any] = {}
@@ -351,15 +353,14 @@ class WorkerAgent:
self._last_activations = []
await self._publish_failure(error)
async def _execute_with_retries(
self, node_impl: NodeProtocol, ctx: NodeContext
) -> NodeResult:
async def _execute_with_retries(self, node_impl: NodeProtocol, ctx: NodeContext) -> NodeResult:
"""Execute node with exponential backoff retry."""
gc = self._gc
# Only skip retries for actual EventLoopNode instances (they handle
# retries internally). Custom NodeProtocol impls registered via
# register_node should be retried by the executor.
from framework.graph.event_loop_node import EventLoopNode as _ELN
if isinstance(node_impl, _ELN):
max_retries = 0
else:
@@ -382,7 +383,9 @@ class WorkerAgent:
# Failure
if attempt + 1 < total_attempts:
gc.retry_counts[self.node_spec.id] = gc.retry_counts.get(self.node_spec.id, 0) + 1
gc.retry_counts[self.node_spec.id] = (
gc.retry_counts.get(self.node_spec.id, 0) + 1
)
gc.nodes_with_retries.add(self.node_spec.id)
delay = 1.0 * (2**attempt)
logger.warning(
@@ -412,7 +415,9 @@ class WorkerAgent:
except Exception as exc:
if attempt + 1 < total_attempts:
gc.retry_counts[self.node_spec.id] = gc.retry_counts.get(self.node_spec.id, 0) + 1
gc.retry_counts[self.node_spec.id] = (
gc.retry_counts.get(self.node_spec.id, 0) + 1
)
gc.nodes_with_retries.add(self.node_spec.id)
delay = 1.0 * (2**attempt)
logger.warning(
@@ -439,9 +444,7 @@ class WorkerAgent:
# Edge evaluation (source-side)
# ------------------------------------------------------------------
async def _evaluate_outgoing_edges(
self, result: NodeResult
) -> list[Activation]:
async def _evaluate_outgoing_edges(self, result: NodeResult) -> list[Activation]:
"""Evaluate outgoing edges and create activations for downstream.
Same logic as current _get_all_traversable_edges() plus
@@ -573,14 +576,17 @@ class WorkerAgent:
elif conflict_strategy == "first_wins":
logger.debug(
"Skipping write to '%s' (first_wins: already set by %s)",
key, prior_worker,
key,
prior_worker,
)
continue
else:
# last_wins: log and overwrite
logger.debug(
"Key '%s' overwritten (last_wins: %s -> %s)",
key, prior_worker, node_spec.id,
key,
prior_worker,
node_spec.id,
)
gc._fanout_written_keys[key] = node_spec.id
gc.buffer.write(key, value, validate=False)
@@ -602,8 +608,8 @@ class WorkerAgent:
# Auto-create EventLoopNode
if self.node_spec.node_type in ("event_loop", "gcu"):
from framework.graph.event_loop_node import EventLoopNode
from framework.graph.event_loop.types import LoopConfig
from framework.graph.event_loop_node import EventLoopNode
from framework.graph.node import warn_if_deprecated_client_facing
conv_store = None
@@ -641,8 +647,7 @@ class WorkerAgent:
return node
raise RuntimeError(
f"No implementation for node '{self.node_spec.id}' "
f"(type: {self.node_spec.node_type})"
f"No implementation for node '{self.node_spec.id}' (type: {self.node_spec.node_type})"
)
def _build_node_context(self) -> NodeContext:
@@ -676,7 +681,9 @@ class WorkerAgent:
from framework.agents.queen.reflection_agent import run_short_reflection
await run_short_reflection(
session_dir, gc.colony_reflect_llm, gc.colony_memory_dir,
session_dir,
gc.colony_reflect_llm,
gc.colony_memory_dir,
caller="worker",
)
except Exception:
@@ -721,21 +728,23 @@ class WorkerAgent:
# Serialize activations to dicts for event data
activations_data = []
for act in completion.activations:
activations_data.append({
"source_id": act.source_id,
"target_id": act.target_id,
"edge_id": act.edge_id,
"mapped_inputs": act.mapped_inputs,
"fan_out_tags": [
{
"fan_out_id": t.fan_out_id,
"fan_out_source": t.fan_out_source,
"branches": list(t.branches),
"via_branch": t.via_branch,
}
for t in act.fan_out_tags
],
})
activations_data.append(
{
"source_id": act.source_id,
"target_id": act.target_id,
"edge_id": act.edge_id,
"mapped_inputs": act.mapped_inputs,
"fan_out_tags": [
{
"fan_out_id": t.fan_out_id,
"fan_out_source": t.fan_out_source,
"branches": list(t.branches),
"via_branch": t.via_branch,
}
for t in act.fan_out_tags
],
}
)
await gc.event_bus.emit_worker_completed(
stream_id=gc.stream_id,
+5 -4
View File
@@ -464,7 +464,7 @@ def _is_stream_transient_error(exc: BaseException) -> bool:
def _extract_text_tool_calls(
text: str,
) -> tuple[list["ToolCallEvent"], str]:
) -> tuple[list, str]:
"""Extract hallucinated tool calls from ``<tool_code>`` blocks in LLM text.
Some models (notably Gemini) emit tool invocations as text instead of using
@@ -498,7 +498,9 @@ def _extract_text_tool_calls(
continue
for tool_name, tool_args in payload.items():
call_id = f"synth_{hashlib.md5(f'{tool_name}:{json.dumps(tool_args, sort_keys=True)}'.encode()).hexdigest()[:12]}"
key = f"{tool_name}:{json.dumps(tool_args, sort_keys=True)}"
digest = hashlib.md5(key.encode()).hexdigest()[:12]
call_id = f"synth_{digest}"
events.append(
ToolCallEvent(
tool_use_id=call_id,
@@ -2040,8 +2042,7 @@ class LiteLLMProvider(LLMProvider):
"finish_reason" in _err_chain and "validation error" in _err_chain.lower()
) or (
# Fallback: the APIError wrapper message for chunk-building failures
"building chunks" in str(e).lower()
and (accumulated_text or tool_calls_acc)
"building chunks" in str(e).lower() and (accumulated_text or tool_calls_acc)
)
if _is_finish_reason_err:
logger.warning(
+1
View File
@@ -1253,6 +1253,7 @@ def _select_agent(agents_dir: Path) -> str | None:
print()
return None
def cmd_setup_credentials(args: argparse.Namespace) -> int:
"""Interactive credential setup for an agent."""
from framework.credentials.setup import CredentialSetupSession
+6 -2
View File
@@ -238,7 +238,9 @@ class AgentRuntime:
self._tools = tools or []
self._tool_executor = tool_executor
self._accounts_prompt = accounts_prompt
self._dynamic_memory_provider_factory: Callable[[str], Callable[[], str] | None] | None = None
self._dynamic_memory_provider_factory: Callable[[str], Callable[[], str] | None] | None = (
None
)
# Colony memory config for reflection-at-handoff (set by session_manager)
self._colony_memory_dir: Any = None
self._colony_worker_sessions_dir: Any = None
@@ -1522,7 +1524,9 @@ class AgentRuntime:
# Filter to only input keys so stale outputs
# from previous triggers don't leak through.
if allowed_keys is not None:
buffer_data = {k: v for k, v in full_buffer.items() if k in allowed_keys}
buffer_data = {
k: v for k, v in full_buffer.items() if k in allowed_keys
}
else:
buffer_data = full_buffer
if buffer_data:
+1 -2
View File
@@ -1236,8 +1236,7 @@ class ExecutionStream:
# The task will continue winding down in the background and its
# finally block will harmlessly pop already-removed keys.
logger.warning(
"Execution %s did not finish within cancel timeout; "
"force-cleaning bookkeeping",
"Execution %s did not finish within cancel timeout; force-cleaning bookkeeping",
execution_id,
)
async with self._lock:
+5 -1
View File
@@ -256,7 +256,11 @@ class SessionState(BaseModel):
error=result.error,
output=result.output,
),
data_buffer=result.session_state.get("data_buffer", result.session_state.get("memory", {})) if result.session_state else {},
data_buffer=result.session_state.get(
"data_buffer", result.session_state.get("memory", {})
)
if result.session_state
else {},
input_data=input_data or {},
)
+1
View File
@@ -188,6 +188,7 @@ async def handle_browser_status(request: web.Request) -> web.Response:
if b"\r\n\r\n" in raw:
body = raw.split(b"\r\n\r\n", 1)[1]
import json
data = json.loads(body)
return web.json_response({"bridge": True, "connected": data.get("connected", False)})
except Exception:
+8 -2
View File
@@ -75,6 +75,7 @@ async def create_queen(
QueenPhaseState,
register_queen_lifecycle_tools,
)
hive_home = Path.home() / ".hive"
# ---- Tool registry ------------------------------------------------
@@ -436,7 +437,9 @@ async def create_queen(
input_data={"greeting": initial_prompt or "Session started."},
session_state={"resume_session_id": session.id},
)
logger.debug("[_queen_loop] executor.execute() returned with success=%s", result.success)
logger.debug(
"[_queen_loop] executor.execute() returned with success=%s", result.success
)
if result.success:
logger.warning("Queen executor returned (should be forever-alive)")
else:
@@ -451,7 +454,10 @@ async def create_queen(
logger.exception("[_queen_loop] Queen conversation crashed: %s", e)
raise
finally:
logger.warning("[_queen_loop] Queen loop exiting — clearing queen_executor for session '%s'", session.id)
logger.warning(
"[_queen_loop] Queen loop exiting — clearing queen_executor for session '%s'",
session.id,
)
session.queen_executor = None
return asyncio.create_task(_queen_loop())
+45 -15
View File
@@ -136,8 +136,12 @@ async def handle_chat(request: web.Request) -> web.Response:
display_message = body.get("display_message")
image_content = body.get("images") or None # list[dict] | None
logger.debug("[handle_chat] session_id=%s, message_len=%d, has_images=%s",
session.id, len(message), bool(image_content))
logger.debug(
"[handle_chat] session_id=%s, message_len=%d, has_images=%s",
session.id,
len(message),
bool(image_content),
)
logger.debug("[handle_chat] session.queen_executor=%s", session.queen_executor)
if not message and not image_content:
@@ -146,17 +150,31 @@ async def handle_chat(request: web.Request) -> web.Response:
queen_executor = session.queen_executor
if queen_executor is not None:
logger.debug("[handle_chat] Queen executor exists, looking for 'queen' node...")
logger.debug("[handle_chat] node_registry type=%s, id=%s", type(queen_executor.node_registry), id(queen_executor.node_registry))
logger.debug("[handle_chat] node_registry keys: %s", list(queen_executor.node_registry.keys()))
logger.debug(
"[handle_chat] node_registry type=%s, id=%s",
type(queen_executor.node_registry),
id(queen_executor.node_registry),
)
logger.debug(
"[handle_chat] node_registry keys: %s", list(queen_executor.node_registry.keys())
)
node = queen_executor.node_registry.get("queen")
logger.debug("[handle_chat] node=%s, node_type=%s", node, type(node).__name__ if node else None)
logger.debug("[handle_chat] has_inject_event=%s", hasattr(node, "inject_event") if node else False)
logger.debug(
"[handle_chat] node=%s, node_type=%s", node, type(node).__name__ if node else None
)
logger.debug(
"[handle_chat] has_inject_event=%s", hasattr(node, "inject_event") if node else False
)
# Race condition: executor exists but node not created yet (still initializing)
if node is None and session.queen_task is not None and not session.queen_task.done():
logger.warning("[handle_chat] Queen executor exists but node not ready yet (initializing). Waiting...")
logger.warning(
"[handle_chat] Queen executor exists but node"
" not ready yet (initializing). Waiting..."
)
# Wait a short time for initialization to progress
import asyncio
for _ in range(50): # Max 5 seconds
await asyncio.sleep(0.1)
node = queen_executor.node_registry.get("queen")
@@ -165,7 +183,7 @@ async def handle_chat(request: web.Request) -> web.Response:
break
else:
logger.error("[handle_chat] Node still not available after 5s wait")
if node is not None and hasattr(node, "inject_event"):
try:
logger.debug("[handle_chat] Calling node.inject_event()...")
@@ -199,12 +217,22 @@ async def handle_chat(request: web.Request) -> web.Response:
)
else:
if node is None:
logger.error("[handle_chat] CRITICAL: Queen node is None! node_registry has %d keys: %s, queen_task=%s, queen_task_done=%s",
len(queen_executor.node_registry), list(queen_executor.node_registry.keys()),
session.queen_task, session.queen_task.done() if session.queen_task else None)
logger.error(
"[handle_chat] CRITICAL: Queen node is None!"
" node_registry has %d keys: %s,"
" queen_task=%s, queen_task_done=%s",
len(queen_executor.node_registry),
list(queen_executor.node_registry.keys()),
session.queen_task,
session.queen_task.done() if session.queen_task else None,
)
else:
logger.error("[handle_chat] CRITICAL: Queen node exists but missing inject_event! node_attrs=%s",
[a for a in dir(node) if not a.startswith('_')])
logger.error(
"[handle_chat] CRITICAL: Queen node exists"
" but missing inject_event!"
" node_attrs=%s",
[a for a in dir(node) if not a.startswith("_")],
)
# Queen is dead — try to revive her
logger.warning(
@@ -220,7 +248,9 @@ async def handle_chat(request: web.Request) -> web.Response:
_revived_executor = session.queen_executor
_revived_node = _revived_executor.node_registry.get("queen") if _revived_executor else None
if _revived_node is not None and hasattr(_revived_node, "inject_event"):
await _revived_node.inject_event(message, is_client_input=True, image_content=image_content)
await _revived_node.inject_event(
message, is_client_input=True, image_content=image_content
)
return web.json_response(
{
"status": "queen_revived",
+30 -14
View File
@@ -324,9 +324,9 @@ class SessionManager:
runtime = runner._agent_runtime
if runtime is not None:
runtime._dynamic_memory_provider_factory = (
lambda execution_id, session=session: (
lambda execution_id=execution_id, session=session: session.worker_colony_recall_blocks.get(
runtime._dynamic_memory_provider_factory = lambda execution_id, session=session: (
lambda execution_id=execution_id, session=session: (
session.worker_colony_recall_blocks.get(
execution_id,
"",
)
@@ -716,7 +716,9 @@ class SessionManager:
from framework.agents.queen.reflection_agent import run_long_reflection
asyncio.create_task(
run_long_reflection(_llm, memory_dir=colony_memory_dir(_storage_id), caller="queen"),
run_long_reflection(
_llm, memory_dir=colony_memory_dir(_storage_id), caller="queen"
),
name=f"queen-memory-long-reflection-{session_id}",
)
@@ -780,9 +782,9 @@ class SessionManager:
colony_dir = colony_memory_dir(session.id)
init_memory_dir(colony_dir, migrate_legacy=True)
runtime._dynamic_memory_provider_factory = (
lambda execution_id, session=session: (
lambda execution_id=execution_id, session=session: session.worker_colony_recall_blocks.get(
runtime._dynamic_memory_provider_factory = lambda execution_id, session=session: (
lambda execution_id=execution_id, session=session: (
session.worker_colony_recall_blocks.get(
execution_id,
"",
)
@@ -833,7 +835,11 @@ class SessionManager:
"""
from framework.server.queen_orchestrator import create_queen
logger.debug("[_start_queen] Starting for session %s, current queen_executor=%s", session.id, session.queen_executor)
logger.debug(
"[_start_queen] Starting for session %s, current queen_executor=%s",
session.id,
session.queen_executor,
)
hive_home = Path.home() / ".hive"
@@ -926,7 +932,11 @@ class SessionManager:
queen_dir=queen_dir,
initial_prompt=initial_prompt,
)
logger.debug("[_start_queen] create_queen returned, queen_task=%s, queen_executor=%s", session.queen_task, session.queen_executor)
logger.debug(
"[_start_queen] create_queen returned, queen_task=%s, queen_executor=%s",
session.queen_task,
session.queen_executor,
)
# Auto-load worker on cold restore — the queen's conversation expects
# the agent to be loaded, but the new session has no worker.
@@ -1092,7 +1102,11 @@ class SessionManager:
"""
from framework.tools.queen_lifecycle_tools import build_worker_profile
logger.debug("[revive_queen] Starting revival for session '%s', current queen_executor=%s", session.id, session.queen_executor)
logger.debug(
"[revive_queen] Starting revival for session '%s', current queen_executor=%s",
session.id,
session.queen_executor,
)
# Build worker identity if worker is loaded
worker_identity = (
@@ -1104,11 +1118,13 @@ class SessionManager:
# Start queen with existing session context
logger.debug("[revive_queen] Calling _start_queen...")
await self._start_queen(
session, worker_identity=worker_identity
)
await self._start_queen(session, worker_identity=worker_identity)
logger.info("Queen revived for session '%s', new queen_executor=%s", session.id, session.queen_executor)
logger.info(
"Queen revived for session '%s', new queen_executor=%s",
session.id,
session.queen_executor,
)
# ------------------------------------------------------------------
# Lookups
@@ -30,7 +30,6 @@ from pathlib import Path
from typing import Any
class FileConversationStore:
"""File-per-part ConversationStore.
+5 -13
View File
@@ -166,7 +166,7 @@ class QueenPhaseState:
from framework.agents.queen.queen_memory import format_for_injection
memory = format_for_injection()
_memory = format_for_injection() # noqa: F841
parts = []
if self.persona_prefix:
parts.append(self.persona_prefix)
@@ -1130,9 +1130,7 @@ def register_queen_lifecycle_tools(
),
parameters={"type": "object", "properties": {}},
)
registry.register(
"stop_graph_and_edit", _stop_edit_tool, lambda inputs: stop_graph_and_edit()
)
registry.register("stop_graph_and_edit", _stop_edit_tool, lambda inputs: stop_graph_and_edit())
tools_registered += 1
# --- stop_graph_and_plan (Running/Staging → Planning) ---------------------
@@ -1164,9 +1162,7 @@ def register_queen_lifecycle_tools(
),
parameters={"type": "object", "properties": {}},
)
registry.register(
"stop_graph_and_plan", _stop_plan_tool, lambda inputs: stop_graph_and_plan()
)
registry.register("stop_graph_and_plan", _stop_plan_tool, lambda inputs: stop_graph_and_plan())
tools_registered += 1
# --- replan_agent (Building → Planning) -----------------------------------
@@ -3221,9 +3217,7 @@ def register_queen_lifecycle_tools(
"focus": {
"type": "string",
"enum": ["activity", "memory", "tools", "issues", "progress", "full"],
"description": (
"Aspect to inspect. Omit for a brief summary."
),
"description": ("Aspect to inspect. Omit for a brief summary."),
},
"last_n": {
"type": "integer",
@@ -3310,9 +3304,7 @@ def register_queen_lifecycle_tools(
"required": ["content"],
},
)
registry.register(
"inject_message", _inject_tool, lambda inputs: inject_message(**inputs)
)
registry.register("inject_message", _inject_tool, lambda inputs: inject_message(**inputs))
tools_registered += 1
# --- list_credentials -----------------------------------------------------
+8 -6
View File
@@ -279,12 +279,14 @@ class TestArtifact:
def check(self, description: str, passed: bool, actual: str = "", expected_val: str = ""):
"""Record an individual assertion check."""
self._data["checks"].append({
"description": description,
"passed": passed,
"actual": actual,
"expected": expected_val,
})
self._data["checks"].append(
{
"description": description,
"passed": passed,
"actual": actual,
"expected": expected_val,
}
)
def save(self):
"""Write artifact to disk."""
+29 -13
View File
@@ -25,15 +25,35 @@ TESTS_DIR = Path(__file__).parent
# (env_var, display_name, litellm_model, display_model)
# display_model matches quickstart.sh labels; litellm_model is what LiteLLMProvider needs.
API_KEY_PROVIDERS = [
("ANTHROPIC_API_KEY", "Anthropic (Claude)", "claude-sonnet-4-20250514", "claude-sonnet-4-20250514"),
(
"ANTHROPIC_API_KEY",
"Anthropic (Claude)",
"claude-sonnet-4-20250514",
"claude-sonnet-4-20250514",
),
("OPENAI_API_KEY", "OpenAI", "gpt-5-mini", "gpt-5-mini"),
("GEMINI_API_KEY", "Google Gemini", "gemini/gemini-3-flash-preview", "gemini/gemini-3-flash-preview"),
(
"GEMINI_API_KEY",
"Google Gemini",
"gemini/gemini-3-flash-preview",
"gemini/gemini-3-flash-preview",
),
("KIMI_API_KEY", "Kimi", "kimi/kimi-k2.5", "kimi-k2.5"),
("ZAI_API_KEY", "ZAI (GLM)", "openai/glm-5", "openai/glm-5"),
("GROQ_API_KEY", "Groq", "moonshotai/kimi-k2-instruct-0905", "moonshotai/kimi-k2-instruct-0905"),
(
"GROQ_API_KEY",
"Groq",
"moonshotai/kimi-k2-instruct-0905",
"moonshotai/kimi-k2-instruct-0905",
),
("MISTRAL_API_KEY", "Mistral", "mistral-large-latest", "mistral-large-latest"),
("CEREBRAS_API_KEY", "Cerebras", "cerebras/zai-glm-4.7", "cerebras/zai-glm-4.7"),
("TOGETHER_API_KEY", "Together AI", "together_ai/meta-llama/Llama-3.3-70B-Instruct-Turbo", "together_ai/meta-llama/Llama-3.3-70B-Instruct-Turbo"),
(
"TOGETHER_API_KEY",
"Together AI",
"together_ai/meta-llama/Llama-3.3-70B-Instruct-Turbo",
"together_ai/meta-llama/Llama-3.3-70B-Instruct-Turbo",
),
("DEEPSEEK_API_KEY", "DeepSeek", "deepseek-chat", "deepseek-chat"),
("MINIMAX_API_KEY", "MiniMax", "MiniMax-M2.5", "MiniMax-M2.5"),
("HIVE_API_KEY", "Hive LLM", "hive/queen", "hive/queen"),
@@ -219,12 +239,12 @@ async def _smoke_test_provider_async(provider: dict, timeout_seconds: float = 25
This catches the common "pytest looks frozen on the first test" failure mode
where the first real LLM call hangs or never reaches a usable response.
"""
from framework.llm.litellm import LiteLLMProvider
from framework.llm.provider import Tool
from framework.graph.edge import GraphSpec
from framework.graph.executor import GraphExecutor
from framework.graph.goal import Goal
from framework.graph.node import NodeSpec
from framework.llm.litellm import LiteLLMProvider
from framework.llm.provider import Tool
from framework.runtime.core import Runtime
kwargs = {
@@ -333,9 +353,7 @@ async def _smoke_test_provider_async(provider: dict, timeout_seconds: float = 25
if not result.success:
raise RuntimeError(result.error or "worker execution smoke failed")
if result.output.get("result") != "OK":
raise RuntimeError(
"worker execution completed but did not produce result='OK'"
)
raise RuntimeError("worker execution completed but did not produce result='OK'")
async def _run_branch_execution() -> None:
with TemporaryDirectory(prefix="dummy-branch-smoke-") as tmpdir:
@@ -428,8 +446,7 @@ async def _smoke_test_provider_async(provider: dict, timeout_seconds: float = 25
raise RuntimeError(result.error or "branch execution smoke failed")
if result.path != ["classify", "positive"]:
raise RuntimeError(
"branch execution did not reach the expected terminal path: "
f"{result.path}"
f"branch execution did not reach the expected terminal path: {result.path}"
)
if not result.output.get("result"):
raise RuntimeError(
@@ -462,8 +479,7 @@ async def _smoke_test_provider_async(provider: dict, timeout_seconds: float = 25
await asyncio.wait_for(_run_branch_execution(), timeout=current_timeout)
except TimeoutError as exc:
raise RuntimeError(
f"provider smoke test timed out during {current_step} "
f"after {current_timeout:.0f}s"
f"provider smoke test timed out during {current_step} after {current_timeout:.0f}s"
) from exc
@@ -83,7 +83,7 @@ async def test_escalation_worker_calls_escalate(runtime, goal, llm_provider, tmp
),
timeout=30,
)
except (TimeoutError, _asyncio.TimeoutError):
except TimeoutError:
pass # Expected: worker hangs waiting for queen
artifact.record_value(
@@ -18,7 +18,6 @@ import pytest
from framework.runtime.event_bus import AgentEvent, EventBus, EventType
from framework.server.session_manager import Session
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
@@ -53,7 +52,7 @@ class PhaseCapture:
try:
await asyncio.wait_for(evt.wait(), timeout=timeout)
return True
except (TimeoutError, asyncio.TimeoutError):
except TimeoutError:
return False
@@ -74,7 +73,7 @@ class TextCapture:
try:
await asyncio.wait_for(self._has_text.wait(), timeout=timeout)
return True
except (TimeoutError, asyncio.TimeoutError):
except TimeoutError:
return False
@property
@@ -145,7 +144,7 @@ async def _shutdown_queen(session: Session, task: asyncio.Task) -> None:
task.cancel()
try:
await asyncio.wait_for(task, timeout=5)
except (asyncio.CancelledError, TimeoutError, asyncio.TimeoutError):
except (asyncio.CancelledError, TimeoutError):
pass
@@ -508,7 +507,7 @@ async def test_queen_responds_to_message(llm_provider, tmp_path, artifact):
try:
await asyncio.wait_for(turn_complete.wait(), timeout=QUEEN_RESPONSE_TIMEOUT)
got_turn = True
except (TimeoutError, asyncio.TimeoutError):
except TimeoutError:
pass
artifact.record_value(
@@ -546,7 +545,7 @@ async def test_queen_responds_after_injected_message(llm_provider, tmp_path, art
)
try:
await asyncio.wait_for(first_turn.wait(), timeout=QUEEN_RESPONSE_TIMEOUT)
except (TimeoutError, asyncio.TimeoutError):
except TimeoutError:
pass
session.event_bus.unsubscribe(sub_id)
@@ -573,7 +572,7 @@ async def test_queen_responds_after_injected_message(llm_provider, tmp_path, art
try:
await asyncio.wait_for(second_turn.wait(), timeout=QUEEN_RESPONSE_TIMEOUT)
got_turn = True
except (TimeoutError, asyncio.TimeoutError):
except TimeoutError:
pass
artifact.record_value(
@@ -17,7 +17,6 @@ from __future__ import annotations
import asyncio
import time
from pathlib import Path
from unittest.mock import MagicMock
import pytest
@@ -26,7 +25,6 @@ from framework.runtime.event_bus import AgentEvent, EventBus, EventType
from framework.server.session_manager import Session
from framework.tools.queen_lifecycle_tools import QueenPhaseState
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
@@ -76,7 +74,7 @@ async def _shutdown(session, task):
task.cancel()
try:
await asyncio.wait_for(task, timeout=5)
except (asyncio.CancelledError, TimeoutError, asyncio.TimeoutError):
except (asyncio.CancelledError, TimeoutError):
pass
@@ -309,9 +307,7 @@ async def test_running_to_building_blocked(llm_provider, tmp_path, artifact):
actual=repr(ps.phase),
expected_val="'running'",
)
assert ps.phase == "running", (
f"running->building should be BLOCKED, got: {ps.phase}"
)
assert ps.phase == "running", f"running->building should be BLOCKED, got: {ps.phase}"
finally:
await _shutdown(session, task)
@@ -338,9 +334,7 @@ async def test_running_to_planning_blocked(llm_provider, tmp_path, artifact):
actual=repr(ps.phase),
expected_val="'running'",
)
assert ps.phase == "running", (
f"running->planning should be BLOCKED, got: {ps.phase}"
)
assert ps.phase == "running", f"running->planning should be BLOCKED, got: {ps.phase}"
finally:
await _shutdown(session, task)
@@ -378,18 +372,14 @@ async def test_editing_to_building_blocked(llm_provider, tmp_path, artifact):
await ps.switch_to_building(source="test")
artifact.record_value(
"phase_after_blocked", ps.phase, expected="'editing' (blocked)"
)
artifact.record_value("phase_after_blocked", ps.phase, expected="'editing' (blocked)")
artifact.check(
"phase still editing",
ps.phase == "editing",
actual=repr(ps.phase),
expected_val="'editing'",
)
assert ps.phase == "editing", (
f"editing->building should be BLOCKED, got: {ps.phase}"
)
assert ps.phase == "editing", f"editing->building should be BLOCKED, got: {ps.phase}"
finally:
await _shutdown(session, task)
@@ -405,18 +395,14 @@ async def test_editing_to_planning_blocked(llm_provider, tmp_path, artifact):
await ps.switch_to_planning(source="test")
artifact.record_value(
"phase_after_blocked", ps.phase, expected="'editing' (blocked)"
)
artifact.record_value("phase_after_blocked", ps.phase, expected="'editing' (blocked)")
artifact.check(
"phase still editing",
ps.phase == "editing",
actual=repr(ps.phase),
expected_val="'editing'",
)
assert ps.phase == "editing", (
f"editing->planning should be BLOCKED, got: {ps.phase}"
)
assert ps.phase == "editing", f"editing->planning should be BLOCKED, got: {ps.phase}"
finally:
await _shutdown(session, task)
@@ -728,9 +728,10 @@ async def test_strict_artifact_creation_and_verification(
@pytest.mark.asyncio
async def test_strict_feedback_loop_visit_counts(runtime, goal, llm_provider, artifact):
"""Feedback loop must respect max_node_visits and record visit counts."""
from .nodes import StatefulNode, SuccessNode
from framework.graph.node import NodeResult
from .nodes import StatefulNode, SuccessNode
graph = GraphSpec(
id="strict-feedback",
goal_id="dummy",
@@ -47,7 +47,7 @@ def test_tools_registry_has_expected_tools(tool_registry, artifact):
)
artifact.record_value("expected_tools", sorted(expected))
missing = expected - tool_names
missing = expected - tool_names # noqa: F841
artifact.check(
"expected tools present",
expected.issubset(tool_names),
@@ -236,8 +236,7 @@ async def test_verified_artifact_binary_match(
artifact.record(
result,
expected=(
"success=True, file 'verified.txt' on disk "
"matches VERIFIED_PAYLOAD_99_ZULU exactly"
"success=True, file 'verified.txt' on disk matches VERIFIED_PAYLOAD_99_ZULU exactly"
),
)
@@ -887,7 +887,7 @@ async def test_worker_escalation_emits_event_with_reason(
),
timeout=30,
)
except (TimeoutError, asyncio.TimeoutError):
except TimeoutError:
pass # Expected: worker hangs waiting for queen
escalations = capture.of_type(EventType.ESCALATION_REQUESTED)
-2
View File
@@ -7,8 +7,6 @@ accomplish tasks, matching how real agents are structured.
from __future__ import annotations
from pathlib import Path
import pytest
from framework.graph.edge import EdgeCondition, EdgeSpec, GraphSpec
+1 -1
View File
@@ -20,7 +20,7 @@ logging.getLogger("framework.llm.litellm").setLevel(logging.DEBUG)
from framework.config import RuntimeConfig # noqa: E402
from framework.graph.event_loop_node import EventLoopNode, LoopConfig # noqa: E402
from framework.graph.node import NodeContext, NodeResult, NodeSpec, DataBuffer # noqa: E402
from framework.graph.node import DataBuffer, NodeContext, NodeResult, NodeSpec # noqa: E402
from framework.llm.litellm import LiteLLMProvider # noqa: E402
+3 -1
View File
@@ -50,7 +50,9 @@ class TestDeprecatedClientFacingWarnings:
entry_node="queen",
nodes=[
NodeSpec(id="queen", name="Queen", description="Queen node", client_facing=True),
NodeSpec(id="worker", name="Worker", description="Worker node", client_facing=False),
NodeSpec(
id="worker", name="Worker", description="Worker node", client_facing=False
),
],
edges=[
EdgeSpec(
+1 -1
View File
@@ -16,7 +16,7 @@ logging.getLogger("framework.llm.litellm").setLevel(logging.DEBUG)
from framework.config import RuntimeConfig # noqa: E402
from framework.graph.event_loop_node import EventLoopNode, LoopConfig # noqa: E402
from framework.graph.node import NodeContext, NodeResult, NodeSpec, DataBuffer # noqa: E402
from framework.graph.node import DataBuffer, NodeContext, NodeResult, NodeSpec # noqa: E402
from framework.llm.litellm import LiteLLMProvider # noqa: E402
+1 -1
View File
@@ -21,7 +21,7 @@ from framework.graph.conversation import NodeConversation
from framework.graph.edge import EdgeCondition, EdgeSpec, GraphSpec
from framework.graph.executor import GraphExecutor
from framework.graph.goal import Goal
from framework.graph.node import NodeResult, NodeSpec, DataBuffer
from framework.graph.node import DataBuffer, NodeResult, NodeSpec
from framework.graph.prompt_composer import (
build_narrative,
build_transition_marker,
+11 -3
View File
@@ -25,11 +25,11 @@ from framework.graph.event_loop_node import (
from framework.graph.executor import GraphExecutor
from framework.graph.goal import Goal
from framework.graph.node import (
DataBuffer,
NodeContext,
NodeProtocol,
NodeResult,
NodeSpec,
DataBuffer,
)
from framework.llm.provider import LLMProvider, LLMResponse, Tool, ToolResult, ToolUse
from framework.llm.stream_events import (
@@ -368,13 +368,21 @@ async def test_event_loop_branch_graph_routes_to_terminal(runtime):
scripts = [
StreamScript(
tool_calls=[
{"name": "set_output", "id": "tc_label", "input": {"key": "label", "value": "positive"}}
{
"name": "set_output",
"id": "tc_label",
"input": {"key": "label", "value": "positive"},
}
],
),
StreamScript(text="Classification done."),
StreamScript(
tool_calls=[
{"name": "set_output", "id": "tc_result", "input": {"key": "result", "value": "positive path"}}
{
"name": "set_output",
"id": "tc_result",
"input": {"key": "result", "value": "positive path"},
}
],
),
StreamScript(text="Handled positive branch."),
+2 -4
View File
@@ -21,7 +21,7 @@ from framework.graph.event_loop_node import (
LoopConfig,
OutputAccumulator,
)
from framework.graph.node import NodeContext, NodeProtocol, NodeSpec, DataBuffer
from framework.graph.node import DataBuffer, NodeContext, NodeProtocol, NodeSpec
from framework.llm.provider import LLMProvider, LLMResponse, Tool, ToolResult, ToolUse
from framework.llm.stream_events import (
FinishEvent,
@@ -1645,9 +1645,7 @@ class TestTransientErrorRetry:
assert llm._call_index == 1 # only tried once
@pytest.mark.asyncio
async def test_queen_non_transient_error_does_not_crash(
self, runtime, node_spec, buffer
):
async def test_queen_non_transient_error_does_not_crash(self, runtime, node_spec, buffer):
"""Queen non-transient errors should wait for input, not crash on token vars."""
node_spec.output_keys = []
llm = ErrorThenSuccessLLM(
+1 -1
View File
@@ -7,7 +7,7 @@ string content, not just the first 500 characters.
import pytest
from framework.graph.node import DataBufferWriteError, DataBuffer
from framework.graph.node import DataBuffer, DataBufferWriteError
from framework.graph.validator import OutputValidator, ValidationResult
-2
View File
@@ -8,11 +8,9 @@ from typing import Any
import pytest
from framework.graph.conversation import (
LEGACY_RUN_ID,
Message,
NodeConversation,
extract_tool_call_history,
is_legacy_run_id,
)
from framework.storage.conversation_store import FileConversationStore
+23 -11
View File
@@ -11,11 +11,11 @@ from unittest.mock import AsyncMock, MagicMock
import pytest
from framework.agents.queen import queen_memory_v2 as qm
from framework.agents.queen.reflection_agent import subscribe_worker_memory_triggers
from framework.agents.queen.recall_selector import (
format_recall_injection,
select_memories,
)
from framework.agents.queen.reflection_agent import subscribe_worker_memory_triggers
from framework.graph.prompting import build_system_prompt_for_node_context
from framework.runtime.event_bus import AgentEvent, EventBus, EventType
from framework.tools.queen_lifecycle_tools import QueenPhaseState
@@ -237,9 +237,7 @@ async def test_select_memories_with_files(tmp_path: Path):
(tmp_path / "b.md").write_text("---\nname: b\ndescription: about B\ntype: reference\n---\nbody")
llm = AsyncMock()
llm.acomplete.return_value = MagicMock(
content=json.dumps({"selected_memories": ["a.md"]})
)
llm.acomplete.return_value = MagicMock(content=json.dumps({"selected_memories": ["a.md"]}))
result = await select_memories("tell me about A", llm, memory_dir=tmp_path)
assert result == ["a.md"]
@@ -309,7 +307,12 @@ async def test_short_reflection(tmp_path: Path):
"name": "write_memory_file",
"input": {
"filename": "user-likes-tests.md",
"content": "---\nname: user-likes-tests\ntype: technique\ndescription: User values thorough testing\n---\nObserved emphasis on test coverage.",
"content": (
"---\nname: user-likes-tests\n"
"type: technique\n"
"description: User values thorough testing\n"
"---\nObserved emphasis on test coverage."
),
},
}
]
@@ -341,8 +344,12 @@ async def test_long_reflection(tmp_path: Path):
mem_dir = tmp_path / "memories"
mem_dir.mkdir()
(mem_dir / "dup-a.md").write_text("---\nname: dup-a\ntype: goal\ndescription: goal A\n---\nGoal A details.")
(mem_dir / "dup-b.md").write_text("---\nname: dup-b\ntype: goal\ndescription: goal A duplicate\n---\nSame goal A.")
(mem_dir / "dup-a.md").write_text(
"---\nname: dup-a\ntype: goal\ndescription: goal A\n---\nGoal A details."
)
(mem_dir / "dup-b.md").write_text(
"---\nname: dup-b\ntype: goal\ndescription: goal A duplicate\n---\nSame goal A."
)
llm = AsyncMock()
llm.acomplete.side_effect = [
@@ -365,7 +372,12 @@ async def test_long_reflection(tmp_path: Path):
"name": "write_memory_file",
"input": {
"filename": "dup-a.md",
"content": "---\nname: dup-a\ntype: goal\ndescription: goal A (merged)\n---\nGoal A details. Also same goal A.",
"content": (
"---\nname: dup-a\ntype: goal\n"
"description: goal A (merged)\n"
"---\nGoal A details."
" Also same goal A."
),
},
},
{
@@ -512,7 +524,9 @@ def test_save_global_memory_persists_frontmatter(tmp_path: Path):
def test_build_system_prompt_injects_dynamic_memory():
ctx = SimpleNamespace(
identity_prompt="Identity",
node_spec=SimpleNamespace(system_prompt="Focus", node_type="event_loop", output_keys=["out"]),
node_spec=SimpleNamespace(
system_prompt="Focus", node_type="event_loop", output_keys=["out"]
),
narrative="Narrative",
accounts_prompt="",
skills_catalog_prompt="",
@@ -649,5 +663,3 @@ async def test_subscribe_worker_triggers_only_lifecycle_events(tmp_path: Path):
finally:
for sub_id in subs:
bus.unsubscribe(sub_id)
+1 -1
View File
@@ -19,7 +19,7 @@ from framework.graph.event_loop_node import (
LoopConfig,
SubagentJudge,
)
from framework.graph.node import NodeContext, NodeSpec, DataBuffer
from framework.graph.node import DataBuffer, NodeContext, NodeSpec
from framework.llm.provider import LLMProvider, LLMResponse, Tool, ToolResult, ToolUse
from framework.llm.stream_events import (
FinishEvent,
@@ -117,9 +117,7 @@ def tui(verbose: bool, debug: bool) -> None:
try:
from framework.tui.app import AdenTUI
except ImportError:
click.echo(
"TUI requires the 'textual' package. Install with: pip install textual"
)
click.echo("TUI requires the 'textual' package. Install with: pip install textual")
sys.exit(1)
from framework.llm import LiteLLMProvider
@@ -249,9 +247,7 @@ async def _interactive_shell(verbose: bool = False) -> None:
click.echo("\nGathering competitive intelligence...\n")
result = await agent.trigger_and_wait(
"start", {"competitors_input": user_input}
)
result = await agent.trigger_and_wait("start", {"competitors_input": user_input})
if result is None:
click.echo("\n[Execution timed out]\n")
@@ -318,9 +318,7 @@ class CompetitiveIntelAgent:
"""
await self.start()
try:
result = await self.trigger_and_wait(
"start", context, session_state=session_state
)
result = await self.trigger_and_wait("start", context, session_state=session_state)
return result or ExecutionResult(success=False, error="Execution timeout")
finally:
await self.stop()
@@ -370,9 +368,7 @@ class CompetitiveIntelAgent:
for ep_id, node_id in self.entry_points.items():
if node_id not in node_ids:
errors.append(
f"Entry point '{ep_id}' references unknown node '{node_id}'"
)
errors.append(f"Entry point '{ep_id}' references unknown node '{node_id}'")
return {
"valid": len(errors) == 0,
@@ -68,9 +68,7 @@ def tui(verbose, debug):
try:
from framework.tui.app import AdenTUI
except ImportError:
click.echo(
"TUI requires the 'textual' package. Install with: pip install textual"
)
click.echo("TUI requires the 'textual' package. Install with: pip install textual")
sys.exit(1)
from pathlib import Path
@@ -187,9 +185,7 @@ async def _interactive_shell(verbose=False):
try:
while True:
try:
topic = await asyncio.get_event_loop().run_in_executor(
None, input, "Topic> "
)
topic = await asyncio.get_event_loop().run_in_executor(None, input, "Topic> ")
if topic.lower() in ["quit", "exit", "q"]:
click.echo("Goodbye!")
break
@@ -285,15 +285,11 @@ class DeepResearchAgent:
session_state=session_state,
)
async def run(
self, context: dict, mock_mode=False, session_state=None
) -> ExecutionResult:
async def run(self, context: dict, mock_mode=False, session_state=None) -> ExecutionResult:
"""Run the agent (convenience method for single execution)."""
await self.start(mock_mode=mock_mode)
try:
result = await self.trigger_and_wait(
"default", context, session_state=session_state
)
result = await self.trigger_and_wait("default", context, session_state=session_state)
return result or ExecutionResult(success=False, error="Execution timeout")
finally:
await self.stop()
@@ -338,9 +334,7 @@ class DeepResearchAgent:
for ep_id, node_id in self.entry_points.items():
if node_id not in node_ids:
errors.append(
f"Entry point '{ep_id}' references unknown node '{node_id}'"
)
errors.append(f"Entry point '{ep_id}' references unknown node '{node_id}'")
return {
"valid": len(errors) == 0,
@@ -77,9 +77,7 @@ def tui(mock, verbose, debug):
try:
from framework.tui.app import AdenTUI
except ImportError:
click.echo(
"TUI requires the 'textual' package. Install with: pip install textual"
)
click.echo("TUI requires the 'textual' package. Install with: pip install textual")
sys.exit(1)
from pathlib import Path
@@ -202,9 +200,7 @@ async def _interactive_shell(verbose=False):
try:
while True:
try:
rules = await asyncio.get_event_loop().run_in_executor(
None, input, "Rules> "
)
rules = await asyncio.get_event_loop().run_in_executor(None, input, "Rules> ")
if rules.lower() in ["quit", "exit", "q"]:
click.echo("Goodbye!")
break
@@ -35,8 +35,7 @@ goal = Goal(
SuccessCriterion(
id="correct-action-execution",
description=(
"Gmail actions are applied correctly to the right emails "
"based on the user's rules"
"Gmail actions are applied correctly to the right emails based on the user's rules"
),
metric="action_correctness",
target=">=95%",
@@ -308,15 +307,11 @@ class EmailInboxManagementAgent:
session_state=session_state,
)
async def run(
self, context: dict, mock_mode=False, session_state=None
) -> ExecutionResult:
async def run(self, context: dict, mock_mode=False, session_state=None) -> ExecutionResult:
"""Run the agent (convenience method for single execution)."""
await self.start(mock_mode=mock_mode)
try:
result = await self.trigger_and_wait(
"default", context, session_state=session_state
)
result = await self.trigger_and_wait("default", context, session_state=session_state)
return result or ExecutionResult(success=False, error="Execution timeout")
finally:
await self.stop()
@@ -361,9 +356,7 @@ class EmailInboxManagementAgent:
for ep_id, node_id in self.entry_points.items():
if node_id not in node_ids:
errors.append(
f"Entry point '{ep_id}' references unknown node '{node_id}'"
)
errors.append(f"Entry point '{ep_id}' references unknown node '{node_id}'")
return {
"valid": len(errors) == 0,
@@ -144,10 +144,7 @@ Do NOT add commentary or explanation. Execute the steps and call set_output when
classify_and_act_node = NodeSpec(
id="classify-and-act",
name="Classify and Act",
description=(
"Apply the user's rules to each email and execute "
"the appropriate Gmail actions."
),
description=("Apply the user's rules to each email and execute the appropriate Gmail actions."),
node_type="event_loop",
client_facing=False,
max_node_visits=0,
@@ -102,8 +102,7 @@ def _get_data_dir() -> str:
ctx = _execution_context.get()
if not ctx or "data_dir" not in ctx:
raise RuntimeError(
"data_dir not set in execution context. "
"Is the tool running inside a GraphExecutor?"
"data_dir not set in execution context. Is the tool running inside a GraphExecutor?"
)
return ctx["data_dir"]
@@ -216,9 +215,7 @@ def _bulk_fetch_emails(
resp = client.get(f"{GMAIL_API_BASE}/messages", params=params)
if resp.status_code != 200:
raise RuntimeError(
f"Gmail list failed (HTTP {resp.status_code}): {resp.text}"
)
raise RuntimeError(f"Gmail list failed (HTTP {resp.status_code}): {resp.text}")
data = resp.json()
messages = data.get("messages", [])
@@ -261,9 +258,7 @@ def _bulk_fetch_emails(
)
if r.status_code == 200:
raw = r.json()
parsed = _parse_headers(
raw.get("payload", {}).get("headers", [])
)
parsed = _parse_headers(raw.get("payload", {}).get("headers", []))
emails.append(
{
"id": raw.get("id"),
@@ -285,9 +280,7 @@ def _bulk_fetch_emails(
if attempt < retries:
time.sleep(0.5)
continue
logger.warning(
f"Failed to fetch {msg_id} after {retries + 1} attempts: {e}"
)
logger.warning(f"Failed to fetch {msg_id} after {retries + 1} attempts: {e}")
dropped = len(message_ids) - len(emails)
if dropped > 0:
@@ -302,9 +295,7 @@ def _bulk_fetch_emails(
for email in emails:
f.write(json.dumps(email, ensure_ascii=False) + "\n")
logger.info(
f"Wrote {len(emails)} emails to emails.jsonl ({output_path.stat().st_size} bytes)"
)
logger.info(f"Wrote {len(emails)} emails to emails.jsonl ({output_path.stat().st_size} bytes)")
return {
"filename": "emails.jsonl",
"count": len(emails),
@@ -212,9 +212,7 @@ class EmailReplyAgent:
async def run(self, context, session_state=None):
await self.start()
try:
result = await self.trigger_and_wait(
"default", context, session_state=session_state
)
result = await self.trigger_and_wait("default", context, session_state=session_state)
return result or ExecutionResult(success=False, error="Execution timeout")
finally:
await self.stop()
@@ -36,9 +36,7 @@ default_config = RuntimeConfig()
class AgentMetadata:
name: str = "Email Reply Agent"
version: str = "1.0.0"
description: str = (
"Filter unreplied emails, confirm recipients, send personalized replies."
)
description: str = "Filter unreplied emails, confirm recipients, send personalized replies."
intro_message: str = "Tell me which emails you want to reply to (e.g., 'emails from @company.com in the last week')."
+2 -6
View File
@@ -69,9 +69,7 @@ def tui(mock, verbose, debug):
try:
from framework.tui.app import AdenTUI
except ImportError:
click.echo(
"TUI requires the 'textual' package. Install with: pip install textual"
)
click.echo("TUI requires the 'textual' package. Install with: pip install textual")
sys.exit(1)
from pathlib import Path
@@ -190,9 +188,7 @@ async def _interactive_shell(verbose=False):
try:
while True:
try:
user_input = await asyncio.get_event_loop().run_in_executor(
None, input, "> "
)
user_input = await asyncio.get_event_loop().run_in_executor(None, input, "> ")
if user_input.lower() in ["quit", "exit", "q"]:
click.echo("Goodbye!")
break
+2 -6
View File
@@ -265,15 +265,11 @@ class JobHunterAgent:
session_state=session_state,
)
async def run(
self, context: dict, mock_mode=False, session_state=None
) -> ExecutionResult:
async def run(self, context: dict, mock_mode=False, session_state=None) -> ExecutionResult:
"""Run the agent (convenience method for single execution)."""
await self.start(mock_mode=mock_mode)
try:
result = await self.trigger_and_wait(
"default", context, session_state=session_state
)
result = await self.trigger_and_wait("default", context, session_state=session_state)
return result or ExecutionResult(success=False, error="Execution timeout")
finally:
await self.stop()
@@ -114,9 +114,7 @@ async def _interactive_shell(verbose=False):
try:
while True:
try:
query = await asyncio.get_event_loop().run_in_executor(
None, input, "Query> "
)
query = await asyncio.get_event_loop().run_in_executor(None, input, "Query> ")
if query.lower() in ["quit", "exit", "q"]:
click.echo("Goodbye!")
break
@@ -111,9 +111,7 @@ class LocalBusinessExtractor:
)
def _setup(self):
self._storage_path = (
Path.home() / ".hive" / "agents" / "local_business_extractor"
)
self._storage_path = Path.home() / ".hive" / "agents" / "local_business_extractor"
self._storage_path.mkdir(parents=True, exist_ok=True)
self._tool_registry = ToolRegistry()
mcp_config = Path(__file__).parent / "mcp_servers.json"
@@ -143,9 +141,7 @@ class LocalBusinessExtractor:
llm=llm,
tools=tools,
tool_executor=tool_executor,
checkpoint_config=CheckpointConfig(
enabled=True, checkpoint_on_node_complete=True
),
checkpoint_config=CheckpointConfig(enabled=True, checkpoint_on_node_complete=True),
)
async def start(self):
@@ -27,9 +27,7 @@ def cli():
@cli.command()
@click.option("--attendee", "-a", required=True, help="Attendee email address")
@click.option(
"--duration", "-d", type=int, required=True, help="Meeting duration in minutes"
)
@click.option("--duration", "-d", type=int, required=True, help="Meeting duration in minutes")
@click.option("--title", "-t", required=True, help="Meeting title")
@click.option("--verbose", "-v", is_flag=True)
def run(attendee, duration, title, verbose):
@@ -45,9 +43,7 @@ def run(attendee, duration, title, verbose):
)
)
click.echo(
json.dumps(
{"success": result.success, "output": result.output}, indent=2, default=str
)
json.dumps({"success": result.success, "output": result.output}, indent=2, default=str)
)
sys.exit(0 if result.success else 1)
@@ -214,9 +214,7 @@ class MeetingScheduler:
async def run(self, context, session_state=None):
await self.start()
try:
result = await self.trigger_and_wait(
"default", context, session_state=session_state
)
result = await self.trigger_and_wait("default", context, session_state=session_state)
return result or ExecutionResult(success=False, error="Execution timeout")
finally:
await self.stop()
+2 -6
View File
@@ -62,9 +62,7 @@ def cli():
default=20,
help="Max contacts to process per batch (default: 20)",
)
@click.option(
"--mock", is_flag=True, help="Run in mock mode without LLM or Gmail calls"
)
@click.option("--mock", is_flag=True, help="Run in mock mode without LLM or Gmail calls")
@click.option("--quiet", "-q", is_flag=True, help="Only output result JSON")
@click.option("--verbose", "-v", is_flag=True, help="Show execution details")
@click.option("--debug", is_flag=True, help="Show debug logging")
@@ -105,9 +103,7 @@ def tui(mock, verbose, debug):
try:
from framework.tui.app import AdenTUI
except ImportError:
click.echo(
"TUI requires the 'textual' package. Install with: pip install textual"
)
click.echo("TUI requires the 'textual' package. Install with: pip install textual")
sys.exit(1)
async def run_with_tui():
+3 -9
View File
@@ -307,15 +307,11 @@ class SDRAgent:
session_state=session_state,
)
async def run(
self, context: dict, mock_mode=False, session_state=None
) -> ExecutionResult:
async def run(self, context: dict, mock_mode=False, session_state=None) -> ExecutionResult:
"""Run the agent (convenience method for single execution)."""
await self.start(mock_mode=mock_mode)
try:
result = await self.trigger_and_wait(
"default", context, session_state=session_state
)
result = await self.trigger_and_wait("default", context, session_state=session_state)
return result or ExecutionResult(success=False, error="Execution timeout")
finally:
await self.stop()
@@ -360,9 +356,7 @@ class SDRAgent:
for ep_id, node_id in self.entry_points.items():
if node_id not in node_ids:
errors.append(
f"Entry point '{ep_id}' references unknown node '{node_id}'"
)
errors.append(f"Entry point '{ep_id}' references unknown node '{node_id}'")
return {
"valid": len(errors) == 0,
+1 -2
View File
@@ -56,8 +56,7 @@ def _get_data_dir() -> str:
ctx = _execution_context.get()
if not ctx or "data_dir" not in ctx:
raise RuntimeError(
"data_dir not set in execution context. "
"Is the tool running inside a GraphExecutor?"
"data_dir not set in execution context. Is the tool running inside a GraphExecutor?"
)
return ctx["data_dir"]
@@ -67,9 +67,7 @@ def tui(verbose, debug):
try:
from framework.tui.app import AdenTUI
except ImportError:
click.echo(
"TUI requires the 'textual' package. Install with: pip install textual"
)
click.echo("TUI requires the 'textual' package. Install with: pip install textual")
sys.exit(1)
from pathlib import Path
@@ -185,9 +183,7 @@ async def _interactive_shell(verbose=False):
try:
while True:
try:
user_input = await asyncio.get_event_loop().run_in_executor(
None, input, "News> "
)
user_input = await asyncio.get_event_loop().run_in_executor(None, input, "News> ")
if user_input.lower() in ["quit", "exit", "q"]:
click.echo("Goodbye!")
break
@@ -229,9 +229,7 @@ class TechNewsReporterAgent:
"""Run the agent (convenience method for single execution)."""
await self.start()
try:
result = await self.trigger_and_wait(
"start", context, session_state=session_state
)
result = await self.trigger_and_wait("start", context, session_state=session_state)
return result or ExecutionResult(success=False, error="Execution timeout")
finally:
await self.stop()
@@ -276,9 +274,7 @@ class TechNewsReporterAgent:
for ep_id, node_id in self.entry_points.items():
if node_id not in node_ids:
errors.append(
f"Entry point '{ep_id}' references unknown node '{node_id}'"
)
errors.append(f"Entry point '{ep_id}' references unknown node '{node_id}'")
return {
"valid": len(errors) == 0,
@@ -116,9 +116,7 @@ async def _interactive_shell(verbose=False):
try:
while True:
try:
query = await asyncio.get_event_loop().run_in_executor(
None, input, "News> "
)
query = await asyncio.get_event_loop().run_in_executor(None, input, "News> ")
if query.lower() in ["quit", "exit", "q"]:
click.echo("Goodbye!")
break
@@ -198,9 +198,7 @@ class TwitterNewsAgent:
async def run(self, context, session_state=None):
await self.start()
try:
result = await self.trigger_and_wait(
"default", context, session_state=session_state
)
result = await self.trigger_and_wait("default", context, session_state=session_state)
return result or ExecutionResult(success=False, error="Execution timeout")
finally:
await self.stop()
@@ -14,7 +14,9 @@ class AgentMetadata:
description: str = (
"Monitors Twitter feeds and provides a daily news digest, focused on tech news."
)
intro_message: str = "I'm ready to fetch the latest tech news from Twitter. Which tech handles should I check?"
intro_message: str = (
"I'm ready to fetch the latest tech news from Twitter. Which tech handles should I check?"
)
metadata = AgentMetadata()
@@ -70,9 +70,7 @@ def tui(mock, verbose, debug):
try:
from framework.tui.app import AdenTUI
except ImportError:
click.echo(
"TUI requires the 'textual' package. Install with: pip install textual"
)
click.echo("TUI requires the 'textual' package. Install with: pip install textual")
sys.exit(1)
from pathlib import Path
@@ -152,9 +150,7 @@ def info(output_json):
click.echo(f"\nNodes: {', '.join(info_data['nodes'])}")
click.echo(f"Client-facing: {', '.join(info_data['client_facing_nodes'])}")
click.echo(f"Entry: {info_data['entry_node']}")
click.echo(
f"Terminal: {', '.join(info_data['terminal_nodes']) or '(forever-alive)'}"
)
click.echo(f"Terminal: {', '.join(info_data['terminal_nodes']) or '(forever-alive)'}")
@cli.command()
@@ -193,9 +189,7 @@ async def _interactive_shell(verbose=False):
try:
while True:
try:
target = await asyncio.get_event_loop().run_in_executor(
None, input, "Target> "
)
target = await asyncio.get_event_loop().run_in_executor(None, input, "Target> ")
if target.lower() in ["quit", "exit", "q"]:
click.echo("Goodbye!")
break
@@ -205,9 +199,7 @@ async def _interactive_shell(verbose=False):
click.echo("\nAssessing...\n")
result = await agent.trigger_and_wait(
"start", {"target_domain": target}
)
result = await agent.trigger_and_wait("start", {"target_domain": target})
if result is None:
click.echo("\n[Execution timed out]\n")
@@ -216,9 +208,7 @@ async def _interactive_shell(verbose=False):
if result.success:
output = result.output
if "report_status" in output:
click.echo(
f"\nAssessment complete: {output['report_status']}\n"
)
click.echo(f"\nAssessment complete: {output['report_status']}\n")
else:
click.echo(f"\nAssessment failed: {result.error}\n")
@@ -48,9 +48,7 @@ goal = Goal(
),
SuccessCriterion(
id="vulnerability-discovery",
description=(
"At least 3 security findings identified across different categories"
),
description=("At least 3 security findings identified across different categories"),
metric="vulnerabilities_found",
target=">=3",
weight=0.20,
@@ -58,8 +56,7 @@ goal = Goal(
SuccessCriterion(
id="remediation-guidance",
description=(
"Every finding includes clear, actionable remediation steps "
"a developer can follow"
"Every finding includes clear, actionable remediation steps a developer can follow"
),
metric="findings_with_remediation",
target="100%",
@@ -292,15 +289,11 @@ class VulnerabilityResearcherAgent:
session_state=session_state,
)
async def run(
self, context: dict, mock_mode=False, session_state=None
) -> ExecutionResult:
async def run(self, context: dict, mock_mode=False, session_state=None) -> ExecutionResult:
"""Run the agent (convenience method for single execution)."""
await self.start(mock_mode=mock_mode)
try:
result = await self.trigger_and_wait(
"start", context, session_state=session_state
)
result = await self.trigger_and_wait("start", context, session_state=session_state)
return result or ExecutionResult(success=False, error="Execution timeout")
finally:
await self.stop()
@@ -345,9 +338,7 @@ class VulnerabilityResearcherAgent:
for ep_id, node_id in self.entry_points.items():
if node_id not in node_ids:
errors.append(
f"Entry point '{ep_id}' references unknown node '{node_id}'"
)
errors.append(f"Entry point '{ep_id}' references unknown node '{node_id}'")
# Verify all nodes have at least one outgoing edge (forever-alive)
for node_id in node_ids:
+3
View File
@@ -1,2 +1,5 @@
[tool.uv.workspace]
members = ["core", "tools"]
[tool.ruff]
line-length = 100
+5 -16
View File
@@ -65,9 +65,7 @@ def _extract_error_message(response: httpx.Response) -> str:
def _sanitize_openrouter_model_id(value: str) -> str:
"""Sanitize pasted OpenRouter model IDs into a comparable slug."""
normalized = unicodedata.normalize("NFKC", value or "")
normalized = "".join(
ch for ch in normalized if unicodedata.category(ch) not in {"Cc", "Cf"}
)
normalized = "".join(ch for ch in normalized if unicodedata.category(ch) not in {"Cc", "Cf"})
normalized = normalized.translate(OPENROUTER_SEPARATOR_TRANSLATION)
normalized = re.sub(r"\s+", "", normalized)
if normalized.casefold().startswith("openrouter/"):
@@ -203,9 +201,7 @@ def check_openrouter_model(
)
if r.status_code == 200:
available_model_lookup = _extract_openrouter_model_lookup(r.json())
matched_model = available_model_lookup.get(
_normalize_openrouter_model_id(requested_model)
)
matched_model = available_model_lookup.get(_normalize_openrouter_model_id(requested_model))
if matched_model:
return {
"valid": True,
@@ -231,10 +227,7 @@ def check_openrouter_model(
detail = _extract_error_message(r)
if r.status_code in (400, 404, 422):
base = (
"OpenRouter model is not available for this key/settings: "
f"{requested_model}"
)
base = f"OpenRouter model is not available for this key/settings: {requested_model}"
return {"valid": False, "message": f"{base}. {detail}" if detail else base}
suffix = f": {detail}" if detail else ""
@@ -244,9 +237,7 @@ def check_openrouter_model(
}
def check_minimax(
api_key: str, api_base: str = "https://api.minimax.io/v1", **_: str
) -> dict:
def check_minimax(api_key: str, api_base: str = "https://api.minimax.io/v1", **_: str) -> dict:
"""Validate via chatcompletion_v2 endpoint with empty messages.
MiniMax doesn't support GET /models; their native endpoint is
@@ -327,9 +318,7 @@ PROVIDERS = {
"mistral": lambda key, **_: check_openai_compatible(
key, "https://api.mistral.ai/v1/models", "Mistral"
),
"xai": lambda key, **_: check_openai_compatible(
key, "https://api.x.ai/v1/models", "xAI"
),
"xai": lambda key, **_: check_openai_compatible(key, "https://api.x.ai/v1/models", "xAI"),
"perplexity": lambda key, **_: check_openai_compatible(
key, "https://api.perplexity.ai/v1/models", "Perplexity"
),
+15 -8
View File
@@ -3,8 +3,12 @@
Examples:
uv run scripts/debug_agent_node.py exports/reddit_star_growth_agent --list-nodes
uv run scripts/debug_agent_node.py exports/reddit_star_growth_agent --node load_contacted_users --task '{"repo_url":"https://github.com/acme/repo"}'
uv run scripts/debug_agent_node.py exports/reddit_star_growth_agent/nodes/__init__.py --node load_contacted_users --input-file /tmp/payload.json
uv run scripts/debug_agent_node.py exports/reddit_star_growth_agent \
--node load_contacted_users \
--task '{"repo_url":"https://github.com/acme/repo"}'
uv run scripts/debug_agent_node.py \
exports/reddit_star_growth_agent/nodes/__init__.py \
--node load_contacted_users --input-file /tmp/payload.json
"""
from __future__ import annotations
@@ -23,11 +27,11 @@ REPO_ROOT = Path(__file__).resolve().parents[1]
if str(REPO_ROOT) not in sys.path:
sys.path.insert(0, str(REPO_ROOT))
from framework.graph.checkpoint_config import CheckpointConfig
from framework.graph.edge import GraphSpec
from framework.runtime.agent_runtime import create_agent_runtime
from framework.runtime.execution_stream import EntryPointSpec
from framework.runner.runner import AgentRunner
from framework.graph.checkpoint_config import CheckpointConfig # noqa: E402
from framework.graph.edge import GraphSpec # noqa: E402
from framework.runtime.agent_runtime import create_agent_runtime # noqa: E402
from framework.runtime.execution_stream import EntryPointSpec # noqa: E402
from framework.runner.runner import AgentRunner # noqa: E402
def _configure_event_debug_logging(storage_path: Path) -> None:
@@ -122,7 +126,10 @@ def _parse_args() -> argparse.Namespace:
)
parser.add_argument(
"agent_path",
help="Export directory or file path (for example exports/my_agent or exports/my_agent/nodes/__init__.py).",
help=(
"Export directory or file path (for example"
" exports/my_agent or exports/my_agent/nodes/__init__.py)."
),
)
parser.add_argument(
"--node",
+1 -4
View File
@@ -162,8 +162,5 @@ if __name__ == "__main__":
print_running_prompt()
else:
print(f"Unknown phase: {phase}")
print(
"Usage: uv run scripts/debug_queen_prompt.py "
"[planning|building|staging|running|all]"
)
print("Usage: uv run scripts/debug_queen_prompt.py [planning|building|staging|running|all]")
sys.exit(1)
+49 -38
View File
@@ -108,11 +108,7 @@ def _discover_records(logs_dir: Path, limit_files: int) -> list[dict[str, Any]]:
raise FileNotFoundError(f"log directory not found: {logs_dir}")
files = sorted(
[
path
for path in logs_dir.iterdir()
if path.is_file() and path.suffix == ".jsonl"
],
[path for path in logs_dir.iterdir() if path.is_file() and path.suffix == ".jsonl"],
key=lambda path: path.stat().st_mtime,
reverse=True,
)[:limit_files]
@@ -164,9 +160,7 @@ def _group_sessions(
if not include_tests:
by_session = {
eid: recs
for eid, recs in by_session.items()
if not _is_test_session(eid, recs)
eid: recs for eid, recs in by_session.items() if not _is_test_session(eid, recs)
}
summaries: list[SessionSummary] = []
@@ -187,18 +181,10 @@ def _group_sessions(
end_timestamp=str(last.get("timestamp", "")),
turn_count=len(session_records),
streams=sorted(
{
str(r.get("stream_id", ""))
for r in session_records
if r.get("stream_id")
}
{str(r.get("stream_id", "")) for r in session_records if r.get("stream_id")}
),
nodes=sorted(
{
str(r.get("node_id", ""))
for r in session_records
if r.get("node_id")
}
{str(r.get("node_id", "")) for r in session_records if r.get("node_id")}
),
models=sorted(
{
@@ -585,12 +571,14 @@ def _render_html(
<aside class="sidebar">
<div class="brand">
<h1>Hive Debug</h1>
<p>Pick a session in the browser and inspect prompts, inputs, outputs, and tool activity turn by turn.</p>
<p>Pick a session in the browser and inspect prompts,
inputs, outputs, and tool activity turn by turn.</p>
</div>
<input id="sessionSearch" type="search" placeholder="Filter sessions">
<div class="setup-note">
<h3>Logging status</h3>
<p>LLM turn logging is always on. If this list is empty, run Hive once and refresh after the session produces turns.</p>
<p>LLM turn logging is always on. If this list is empty,
run Hive once and refresh after the session produces turns.</p>
<pre>~/.hive/llm_logs</pre>
</div>
<div class="session-list" id="sessionList"></div>
@@ -602,7 +590,8 @@ def _render_html(
<div class="meta-grid" id="metaGrid"></div>
</section>
<div class="toolbar">
<input id="turnFilter" type="search" placeholder="Filter selected session by text, tool name, role, model, or prompt content">
<input id="turnFilter" type="search"
placeholder="Filter by text, tool, role, model, or prompt">
<button type="button" id="expandAll">Expand all</button>
<button type="button" id="collapseAll">Collapse all</button>
</div>
@@ -610,7 +599,9 @@ def _render_html(
</main>
</div>
<script id="session-summaries" type="application/json">{json.dumps(summaries_data, ensure_ascii=False)}</script>
<script id="session-summaries" type="application/json">{
json.dumps(summaries_data, ensure_ascii=False)
}</script>
<script>
const summaries = JSON.parse(document.getElementById("session-summaries").textContent);
const recordCache = {{}};
@@ -669,9 +660,15 @@ def _render_html(
...(summary.models || []).slice(0, 2),
];
return `
<button type="button" class="session-card${{active}}" data-session-id="${{escapeHtml(summary.execution_id)}}">
<div class="sid">${{escapeHtml(summary.execution_id)}}</div>
<div class="meta">${{chips.map((chip) => `<span>${{escapeHtml(chip)}}</span>`).join("")}}</div>
<button type="button"
class="session-card${{active}}"
data-session-id="${{escapeHtml(summary.execution_id)}}">
<div class="sid">${{escapeHtml(
summary.execution_id
)}}</div>
<div class="meta">${{chips.map(
(chip) => `<span>${{escapeHtml(chip)}}</span>`
).join("")}}</div>
</button>
`;
}})
@@ -679,7 +676,9 @@ def _render_html(
}}
function renderMetaCard(label, value) {{
return `<div class="meta-card"><span class="label">${{escapeHtml(label)}}</span>${{escapeHtml(value || "-")}}</div>`;
return `<div class="meta-card"><span class="label">${{
escapeHtml(label)
}}</span>${{escapeHtml(value || "-")}}</div>`;
}}
function renderMessage(message, index) {{
@@ -699,7 +698,8 @@ def _render_html(
}}
${{
toolCalls
? `<details class="block"><summary>tool_calls</summary><pre>${{prettyJson(toolCalls)}}</pre></details>`
? `<details class="block"><summary>tool_calls</summary>` +
`<pre>${{prettyJson(toolCalls)}}</pre></details>`
: ""
}}
</div>
@@ -746,12 +746,18 @@ def _render_html(
</div>
${{
systemPrompt
? `<details class="block" open><summary>System prompt</summary><pre>${{escapeHtml(systemPrompt)}}</pre></details>`
? `<details class="block" open>` +
`<summary>System prompt</summary>` +
`<pre>${{escapeHtml(systemPrompt)}}</pre></details>`
: ""
}}
${{
messages.length
? `<details class="block" open><summary>Input messages (${{messages.length}})</summary>${{messages.map((message, index) => renderMessage(message, index + 1)).join("")}}</details>`
? `<details class="block" open>` +
`<summary>Input messages (${{messages.length}})` +
`</summary>${{messages.map((message, index) =>
renderMessage(message, index + 1)
).join("")}}</details>`
: ""
}}
<details class="block" open>
@@ -760,17 +766,26 @@ def _render_html(
</details>
${{
toolCalls.length
? `<details class="block" open><summary>Tool calls (${{toolCalls.length}})</summary>${{toolCalls.map((toolCall, index) => renderToolCall(toolCall, index + 1)).join("")}}</details>`
? `<details class="block" open>` +
`<summary>Tool calls (${{toolCalls.length}})` +
`</summary>${{toolCalls.map((toolCall, index) =>
renderToolCall(toolCall, index + 1)
).join("")}}</details>`
: ""
}}
${{
toolResults.length
? `<details class="block"><summary>Tool results (${{toolResults.length}})</summary><pre>${{prettyJson(toolResults)}}</pre></details>`
? `<details class="block">` +
`<summary>Tool results (${{toolResults.length}})` +
`</summary><pre>${{prettyJson(toolResults)}}` +
`</pre></details>`
: ""
}}
${{
parseError
? `<details class="block"><summary>Parse error</summary><pre>${{prettyJson(record)}}</pre></details>`
? `<details class="block">` +
`<summary>Parse error</summary>` +
`<pre>${{prettyJson(record)}}</pre></details>`
: ""
}}
</section>
@@ -881,9 +896,7 @@ def _run_server(
if records is None:
self._respond(404, "application/json", b"[]")
else:
body = json.dumps(
_sort_records(records), ensure_ascii=False
).encode("utf-8")
body = json.dumps(_sort_records(records), ensure_ascii=False).encode("utf-8")
self._respond(200, "application/json", body)
else:
self.send_error(404)
@@ -919,9 +932,7 @@ def main() -> int:
records = _discover_records(args.logs_dir.expanduser(), args.limit_files)
summaries, sessions = _group_sessions(records, include_tests=args.include_tests)
initial_session_id = args.session or (
summaries[0].execution_id if summaries else ""
)
initial_session_id = args.session or (summaries[0].execution_id if summaries else "")
if initial_session_id and initial_session_id not in sessions:
print(f"session not found: {initial_session_id}")
return 1
+1 -3
View File
@@ -48,9 +48,7 @@ def test_check_requirements():
try:
data = json.loads(result.stdout)
assert data["json"] == "ok", "json should be ok"
assert "error" in data["nonexistent_module"], (
"nonexistent_module should have error"
)
assert "error" in data["nonexistent_module"], "nonexistent_module should have error"
assert result.returncode == 1, "Exit code should be 1 when errors exist"
print("✓ Test 2 passed")
except Exception as e:
+1 -1
View File
@@ -28,7 +28,7 @@ async def main():
if bridge.is_connected:
print("✓ Extension connected!")
break
print(f" Waiting... ({i+1}/10)")
print(f" Waiting... ({i + 1}/10)")
else:
print("✗ Extension not connected")
await bridge.stop()
+26 -10
View File
@@ -34,7 +34,7 @@ async def main():
if bridge.is_connected:
print("✓ Extension connected!")
break
print(f"Waiting for extension... ({i+1}/10)")
print(f"Waiting for extension... ({i + 1}/10)")
else:
print("✗ Extension not connected")
return
@@ -48,7 +48,9 @@ async def main():
# Navigate to LinkedIn
print("\n--- Navigating to LinkedIn ---")
try:
await bridge.navigate(tab_id, "https://www.linkedin.com", wait_until="load", timeout_ms=30000)
await bridge.navigate(
tab_id, "https://www.linkedin.com", wait_until="load", timeout_ms=30000
)
print("✓ Page loaded")
except Exception as e:
print(f"Navigation result: {e}")
@@ -59,6 +61,7 @@ async def main():
print("\n--- Test 1: Snapshot (with timeout protection) ---")
try:
import time
start = time.perf_counter()
snapshot = await bridge.snapshot(tab_id, timeout_s=15.0)
elapsed = time.perf_counter() - start
@@ -68,7 +71,7 @@ async def main():
if "truncated" in tree:
print(" (Tree was truncated due to size)")
print(f" First 300 chars:\n{tree[:300]}...")
except asyncio.TimeoutError:
except TimeoutError:
print("✗ Snapshot timed out (this shouldn't happen with 15s timeout)")
except Exception as e:
print(f"✗ Snapshot error: {e}")
@@ -105,7 +108,7 @@ async def main():
try:
pos_before = await bridge.evaluate(tab_id, get_scroll_positions)
before_data = pos_before.get("result", {}) if pos_before else {}
print(f" Positions before scroll:")
print(" Positions before scroll:")
if isinstance(before_data, dict):
for key, val in before_data.items():
print(f" {key}: {val}")
@@ -117,7 +120,7 @@ async def main():
if result.get("ok"):
method = result.get("method", "unknown")
container = result.get("container", "unknown")
container = result.get("container", "unknown") # noqa: F841
print(f" ✓ Scroll command succeeded using {method}")
else:
print(f" ✗ Scroll command failed: {result.get('error')}")
@@ -127,7 +130,7 @@ async def main():
# Get scroll positions after
pos_after = await bridge.evaluate(tab_id, get_scroll_positions)
after_data = pos_after.get("result", {}) if pos_after else {}
print(f" Positions after scroll:")
print(" Positions after scroll:")
if isinstance(after_data, dict):
for key, val in after_data.items():
print(f" {key}: {val}")
@@ -139,8 +142,16 @@ async def main():
if isinstance(before_data, dict) and isinstance(after_data, dict):
for key in after_data:
if key in before_data:
b_val = before_data[key].get("scrollTop", 0) if isinstance(before_data[key], dict) else 0
a_val = after_data[key].get("scrollTop", 0) if isinstance(after_data[key], dict) else 0
b_val = (
before_data[key].get("scrollTop", 0)
if isinstance(before_data[key], dict)
else 0
)
a_val = (
after_data[key].get("scrollTop", 0)
if isinstance(after_data[key], dict)
else 0
)
if a_val != b_val:
print(f" ✓ SCROLL CONFIRMED: {key} changed from {b_val} to {a_val}")
changed = True
@@ -149,6 +160,7 @@ async def main():
except Exception as e:
import traceback
print(f"✗ Scroll error: {e}")
traceback.print_exc()
@@ -157,10 +169,14 @@ async def main():
for i in range(3):
try:
result = await bridge.scroll(tab_id, "down", 200)
print(f" Scroll {i+1}: {result.get('method', 'failed')} on {result.get('container', 'unknown')}")
print(
f" Scroll {i + 1}: "
f"{result.get('method', 'failed')} "
f"on {result.get('container', 'unknown')}"
)
await asyncio.sleep(0.5)
except Exception as e:
print(f" Scroll {i+1} failed: {e}")
print(f" Scroll {i + 1} failed: {e}")
# Test 4: Snapshot after scroll
print("\n--- Test 4: Snapshot After Scroll ---")
+26 -11
View File
@@ -25,7 +25,7 @@ from gcu.browser.bridge import BeelineBridge
async def wait_for_bridge(bridge: BeelineBridge, timeout: int = 5) -> bool:
"""Wait for extension connection."""
await bridge.start()
for i in range(timeout):
for _i in range(timeout):
await asyncio.sleep(1)
if bridge.is_connected:
return True
@@ -38,7 +38,9 @@ async def test_linkedin_profile_scroll(bridge: BeelineBridge, tab_id: int) -> di
try:
# Navigate to a LinkedIn page (public profile, no login required)
await bridge.navigate(tab_id, "https://www.linkedin.com/in/williamhgates/", wait_until="networkidle")
await bridge.navigate(
tab_id, "https://www.linkedin.com/in/williamhgates/", wait_until="networkidle"
)
await asyncio.sleep(3)
results = {"steps": []}
@@ -108,7 +110,9 @@ async def test_twitter_timeline(bridge: BeelineBridge, tab_id: int) -> dict:
await asyncio.sleep(2)
results["steps"].append("scrolled")
results["ok"] = data.get("pageTitle", "").lower().find("x") >= 0 or data.get("tweetCount", 0) >= 0
results["ok"] = (
data.get("pageTitle", "").lower().find("x") >= 0 or data.get("tweetCount", 0) >= 0
)
print(f" {'' if results['ok'] else ''} Twitter page loaded")
return results
@@ -123,7 +127,9 @@ async def test_youtube_controls(bridge: BeelineBridge, tab_id: int) -> dict:
try:
# Navigate to a YouTube video
await bridge.navigate(tab_id, "https://www.youtube.com/watch?v=dQw4w9WgXcQ", wait_until="networkidle")
await bridge.navigate(
tab_id, "https://www.youtube.com/watch?v=dQw4w9WgXcQ", wait_until="networkidle"
)
await asyncio.sleep(3)
results = {"steps": []}
@@ -188,7 +194,11 @@ async def test_form_interaction(bridge: BeelineBridge, tab_id: int) -> dict:
results["steps"].append(f"type_name: {result.get('ok')}")
# Fill textarea
result = await bridge.type_text(tab_id, "textarea[name='comments']", "This is a test comment with multiple lines.\nLine 2.\nLine 3.")
result = await bridge.type_text(
tab_id,
"textarea[name='comments']",
"This is a test comment with multiple lines.\nLine 2.\nLine 3.",
)
results["steps"].append(f"type_comments: {result.get('ok')}")
# Click radio button
@@ -215,9 +225,9 @@ async def test_form_interaction(bridge: BeelineBridge, tab_id: int) -> dict:
# Check all fields are filled correctly
results["ok"] = (
form_state.get("name") == "Test Customer" and
form_state.get("medium") is True and
form_state.get("cheese") is True
form_state.get("name") == "Test Customer"
and form_state.get("medium") is True
and form_state.get("cheese") is True
)
print(f" {'' if results['ok'] else ''} Form interaction")
@@ -234,7 +244,9 @@ async def test_drag_drop(bridge: BeelineBridge, tab_id: int) -> dict:
try:
# Navigate to a drag-drop demo page
await bridge.navigate(tab_id, "https://www.w3schools.com/html/html5_draganddrop.asp", wait_until="load")
await bridge.navigate(
tab_id, "https://www.w3schools.com/html/html5_draganddrop.asp", wait_until="load"
)
await asyncio.sleep(2)
results = {"steps": []}
@@ -245,7 +257,9 @@ async def test_drag_drop(bridge: BeelineBridge, tab_id: int) -> dict:
# Try drag operation - this page has draggable elements
# Note: HTML5 drag-drop via CDP is limited, this tests mouse events
result = await bridge.evaluate(tab_id, """
result = await bridge.evaluate(
tab_id,
"""
// Check if drag elements exist
const drag1 = document.getElementById('drag1');
const div2 = document.getElementById('div2');
@@ -253,7 +267,8 @@ async def test_drag_drop(bridge: BeelineBridge, tab_id: int) -> dict:
hasDragElement: !!drag1,
hasDropZone: !!div2
};
""")
""",
)
elements = result.get("result", {}).get("value", {})
results["elements"] = elements
print(f" Elements found: {elements}")
+1 -2
View File
@@ -16,7 +16,6 @@ This will test:
"""
import asyncio
import json
import sys
from pathlib import Path
@@ -37,7 +36,7 @@ async def test_connection(bridge: BeelineBridge) -> bool:
if bridge.is_connected:
print("✓ Extension connected!")
return True
print(f" Waiting... ({i+1}/5)")
print(f" Waiting... ({i + 1}/5)")
print("✗ Extension not connected. Ensure Chrome extension is installed.")
return False
+2 -1
View File
@@ -31,7 +31,7 @@ async def main():
if bridge.is_connected:
print("✓ Extension connected!")
break
print(f"Waiting for extension... ({i+1}/5)")
print(f"Waiting for extension... ({i + 1}/5)")
else:
print("✗ Extension not connected")
return
@@ -132,6 +132,7 @@ async def main():
print(f"✗ Hover failed: {result.get('error')}")
except Exception as e:
import traceback
print(f"✗ Hover exception: {e}")
traceback.print_exc()
+4 -6
View File
@@ -110,7 +110,8 @@ BINARY_EXTENSIONS = frozenset(
# ── Context-aware sandboxing ─────────────────────────────────────────────────
# Context variable for additional allowed paths (beyond base_root)
_allowed_paths_ctx: ContextVar[list[str]] = ContextVar("allowed_paths", default=[])
_EMPTY_PATHS: list[str] = []
_allowed_paths_ctx: ContextVar[list[str]] = ContextVar("allowed_paths", default=_EMPTY_PATHS)
def set_allowed_paths(paths: list[str]) -> None:
@@ -150,7 +151,7 @@ def create_sandboxed_resolver(
- Allows absolute paths under base_root or any allowed_path
- Blocks access outside allowed scopes with a helpful error message
"""
hive_dir = os.path.expanduser("~/.hive")
hive_dir = os.path.expanduser("~/.hive") # noqa: F841
def resolve(path: str) -> str:
# Normalize slashes for cross-platform
@@ -180,10 +181,7 @@ def create_sandboxed_resolver(
# Block and remind
allowed_str = ", ".join(f"'{p}'" for p in all_allowed)
raise ValueError(
f"Access denied: '{path}' is not accessible. "
f"Allowed paths: {allowed_str}"
)
raise ValueError(f"Access denied: '{path}' is not accessible. Allowed paths: {allowed_str}")
return resolve
+1
View File
@@ -133,6 +133,7 @@ from .twilio_tool import register_tools as register_twilio
from .twitter_tool import register_tools as register_twitter
from .vercel_tool import register_tools as register_vercel
from .vision_tool import register_tools as register_vision
try:
from .web_scrape_tool import register_tools as register_web_scrape
except ImportError:
@@ -10,9 +10,7 @@ def register_tools(mcp: FastMCP) -> None:
"""Register diff application tools with the MCP server."""
@mcp.tool()
def apply_diff(
path: str, diff_text: str, agent_id: str
) -> dict:
def apply_diff(path: str, diff_text: str, agent_id: str) -> dict:
"""
Purpose
Apply a structured diff to update a file while preserving context.
@@ -10,9 +10,7 @@ def register_tools(mcp: FastMCP) -> None:
"""Register patch application tools with the MCP server."""
@mcp.tool()
def apply_patch(
path: str, patch_text: str, agent_id: str
) -> dict:
def apply_patch(path: str, patch_text: str, agent_id: str) -> dict:
"""
Purpose
Apply a scoped, line-level modification to an existing file.
@@ -59,10 +59,7 @@ def _resolve_path(path: str, data_dir: str | None) -> str:
# Block and remind
allowed_str = ", ".join(f"'{p}'" for p in allowed_paths)
raise ValueError(
f"Access denied: '{path}' is not accessible. "
f"Allowed paths: {allowed_str}"
)
raise ValueError(f"Access denied: '{path}' is not accessible. Allowed paths: {allowed_str}")
def register_tools(mcp: FastMCP) -> None:
@@ -144,7 +141,11 @@ def register_tools(mcp: FastMCP) -> None:
actual_end = start_idx + lines_shown
if actual_end < total_lines:
result += f"\n\n(Showing lines {start_idx + 1}-{actual_end} of {total_lines}. Use offset={actual_end + 1} to continue reading.)"
result += (
f"\n\n(Showing lines {start_idx + 1}-{actual_end}"
f" of {total_lines}."
f" Use offset={actual_end + 1} to continue reading.)"
)
return result
except Exception as e:
@@ -11,9 +11,7 @@ def register_tools(mcp: FastMCP) -> None:
"""Register command execution tools with the MCP server."""
@mcp.tool()
def execute_command_tool(
command: str, agent_id: str, cwd: str | None = None
) -> dict:
def execute_command_tool(command: str, agent_id: str, cwd: str | None = None) -> dict:
"""
Purpose
Execute a shell command within the agent sandbox.
@@ -9,9 +9,7 @@ def register_tools(mcp: FastMCP) -> None:
"""Register file content replacement tools with the MCP server."""
@mcp.tool()
def replace_file_content(
path: str, target: str, replacement: str, agent_id: str
) -> dict:
def replace_file_content(path: str, target: str, replacement: str, agent_id: str) -> dict:
"""
Purpose
Replace all occurrences of a target string with replacement text in a file.
+76 -33
View File
@@ -89,6 +89,7 @@ class BeelineBridge:
# Suppress noisy websockets logging for invalid upgrade attempts
# by providing a null logger
import logging
null_logger = logging.getLogger("websockets.null")
null_logger.setLevel(logging.CRITICAL)
null_logger.addHandler(logging.NullHandler())
@@ -98,7 +99,9 @@ class BeelineBridge:
"127.0.0.1",
port,
logger=null_logger,
max_size=50 * 1024 * 1024, # 50 MB — CDP responses (AX tree, screenshots) can be large
max_size=50
* 1024
* 1024, # 50 MB — CDP responses (AX tree, screenshots) can be large
)
logger.info("Beeline bridge listening on ws://127.0.0.1:%d", port)
except OSError as e:
@@ -134,7 +137,9 @@ class BeelineBridge:
pass
self._status_server = None
async def _http_status_handler(self, reader: asyncio.StreamReader, writer: asyncio.StreamWriter) -> None:
async def _http_status_handler(
self, reader: asyncio.StreamReader, writer: asyncio.StreamWriter
) -> None:
"""Minimal asyncio TCP handler serving HTTP GET /status on the status port."""
try:
raw = await asyncio.wait_for(reader.read(512), timeout=2.0)
@@ -146,7 +151,9 @@ class BeelineBridge:
b"Content-Type: application/json\r\n"
b"Access-Control-Allow-Origin: *\r\n"
b"Access-Control-Allow-Headers: *\r\n"
+ b"Content-Length: " + str(len(body)).encode() + b"\r\n"
+ b"Content-Length: "
+ str(len(body)).encode()
+ b"\r\n"
+ b"Connection: close\r\n"
b"\r\n" + body
)
@@ -160,7 +167,9 @@ class BeelineBridge:
b"\r\n"
)
else:
response = b"HTTP/1.1 404 Not Found\r\nContent-Length: 0\r\nConnection: close\r\n\r\n"
response = (
b"HTTP/1.1 404 Not Found\r\nContent-Length: 0\r\nConnection: close\r\n\r\n"
)
writer.write(response)
await writer.drain()
except Exception:
@@ -566,14 +575,16 @@ class BeelineBridge:
# JavaScript click succeeded — highlight element
rx = value.get("x", 0) - value.get("width", 0) / 2
ry = value.get("y", 0) - value.get("height", 0) / 2
await self.highlight_rect(tab_id, rx, ry, value.get("width", 0), value.get("height", 0), label=selector)
await self.highlight_rect(
tab_id, rx, ry, value.get("width", 0), value.get("height", 0), label=selector
)
return {
"ok": True,
"action": "click",
"selector": selector,
"x": value.get("x", 0),
"y": value.get("y", 0),
"method": "javascript"
"method": "javascript",
}
# If JavaScript click failed, try CDP approach
@@ -639,7 +650,7 @@ class BeelineBridge:
),
timeout=1.0,
)
except asyncio.TimeoutError:
except TimeoutError:
pass # Continue even if timeout
await asyncio.sleep(0.08)
@@ -660,13 +671,20 @@ class BeelineBridge:
),
timeout=3.0,
)
except asyncio.TimeoutError:
except TimeoutError:
pass # Continue even if timeout
w = bounds_value.get("width", 0)
h = bounds_value.get("height", 0)
await self.highlight_rect(tab_id, x - w / 2, y - h / 2, w, h, label=selector)
return {"ok": True, "action": "click", "selector": selector, "x": x, "y": y, "method": "cdp"}
return {
"ok": True,
"action": "click",
"selector": selector,
"x": x,
"y": y,
"method": "cdp",
}
except Exception as e:
return {"ok": False, "error": f"Click failed: {e}"}
@@ -775,12 +793,16 @@ class BeelineBridge:
# Highlight the element that was typed into
rect_result = await self.evaluate(
tab_id,
f"(function(){{const el=document.querySelector({json.dumps(selector)});if(!el)return null;"
f"const r=el.getBoundingClientRect();return{{x:r.left,y:r.top,w:r.width,h:r.height}};}})()",
f"(function(){{const el=document.querySelector("
f"{json.dumps(selector)});if(!el)return null;"
f"const r=el.getBoundingClientRect();"
f"return{{x:r.left,y:r.top,w:r.width,h:r.height}};}})()",
)
rect = (rect_result or {}).get("result")
if rect:
await self.highlight_rect(tab_id, rect["x"], rect["y"], rect["w"], rect["h"], label=selector)
await self.highlight_rect(
tab_id, rect["x"], rect["y"], rect["w"], rect["h"], label=selector
)
return {"ok": True, "action": "type", "selector": selector, "length": len(text)}
async def press_key(self, tab_id: int, key: str, selector: str | None = None) -> dict:
@@ -1036,18 +1058,33 @@ class BeelineBridge:
},
)
_interaction_highlights[tab_id] = {
"x": x, "y": y, "w": w, "h": h, "label": label, "kind": "rect",
"x": x,
"y": y,
"w": w,
"h": h,
"label": label,
"kind": "rect",
}
async def highlight_point(self, tab_id: int, x: float, y: float, label: str = "") -> None:
"""Highlight a coordinate as a small crosshair box in the browser."""
r = 12 # half-size of the crosshair box in CSS px
await self.highlight_rect(
tab_id, x - r, y - r, r * 2, r * 2, label=label,
tab_id,
x - r,
y - r,
r * 2,
r * 2,
label=label,
color={"r": 239, "g": 68, "b": 68, "a": 0.45}, # red-500 @ 45%
)
_interaction_highlights[tab_id] = {
"x": x, "y": y, "w": 0, "h": 0, "label": label, "kind": "point",
"x": x,
"y": y,
"w": 0,
"h": 0,
"label": label,
"kind": "point",
}
async def clear_highlight(self, tab_id: int) -> None:
@@ -1119,10 +1156,7 @@ class BeelineBridge:
"""
try:
result = await asyncio.wait_for(
self.evaluate(tab_id, scroll_script),
timeout=5.0
)
result = await asyncio.wait_for(self.evaluate(tab_id, scroll_script), timeout=5.0)
value = (result or {}).get("result") or {}
if value.get("success"):
@@ -1132,12 +1166,12 @@ class BeelineBridge:
"direction": direction,
"amount": amount,
"method": value.get("method", "js"),
"container": value.get("tag", "unknown")
"container": value.get("tag", "unknown"),
}
else:
return {"ok": False, "error": "scroll script returned failure"}
except asyncio.TimeoutError:
except TimeoutError:
return {"ok": False, "error": "scroll timed out"}
except Exception as e:
logger.warning("Scroll failed: %s", e)
@@ -1182,8 +1216,10 @@ class BeelineBridge:
)
# Arrow-function IIFE: (() => { ... })()
is_arrow_iife = stripped.startswith("(()") and (
stripped.endswith("})()") or stripped.endswith("})();")
or stripped.endswith(")()") or stripped.endswith(")()")
stripped.endswith("})()")
or stripped.endswith("})();")
or stripped.endswith(")()")
or stripped.endswith(")()")
)
if is_iife or is_arrow_iife:
@@ -1212,7 +1248,9 @@ class BeelineBridge:
if "exceptionDetails" in result:
ex = result["exceptionDetails"]
# Extract the actual exception message from the nested structure
ex_value = (ex.get("exception") or {}).get("description") or ex.get("text", "Script error")
ex_value = (ex.get("exception") or {}).get("description") or ex.get(
"text", "Script error"
)
return {"ok": False, "error": ex_value}
# The CDP response structure is {result: {type: ..., value: ...}}
@@ -1285,7 +1323,7 @@ class BeelineBridge:
"url": url,
"tree": snapshot,
}
except asyncio.TimeoutError:
except TimeoutError:
logger.warning("Snapshot timed out after %ss", timeout_s)
return {"ok": False, "error": f"snapshot timed out after {timeout_s}s"}
except asyncio.CancelledError:
@@ -1332,7 +1370,8 @@ class BeelineBridge:
}
const tag = el.tagName.toLowerCase();
const text = (el.innerText || el.value || el.placeholder || el.getAttribute('aria-label') || '').substring(0, 80);
const text = (el.innerText || el.value || el.placeholder
|| el.getAttribute('aria-label') || '').substring(0, 80);
const type = el.type || tag;
const role = el.getAttribute('role') || tag;
const name = el.name || el.id || '';
@@ -1383,7 +1422,7 @@ class BeelineBridge:
if text:
desc += f' "{text[:40]}"'
if el.get("href"):
desc += f' [href]'
desc += " [href]"
desc += f" [ref={ref}]"
lines.append(f" - {desc}")
@@ -1557,7 +1596,10 @@ class BeelineBridge:
return {"ok": False, "error": f"Element not found: {selector}"}
async def screenshot(
self, tab_id: int, full_page: bool = False, selector: str | None = None,
self,
tab_id: int,
full_page: bool = False,
selector: str | None = None,
timeout_s: float = 30.0,
) -> dict:
"""Take a screenshot of the page or element.
@@ -1587,9 +1629,7 @@ class BeelineBridge:
"returnByValue": True,
},
)
rect = (
(rect_result or {}).get("result", {}).get("result", {}).get("value")
)
rect = (rect_result or {}).get("result", {}).get("result", {}).get("value")
if rect and rect.get("width") and rect.get("height"):
params["clip"] = {
"x": rect["x"],
@@ -1648,12 +1688,15 @@ class BeelineBridge:
"data": data,
"mimeType": "image/png",
}
except asyncio.TimeoutError:
except TimeoutError:
logger.warning("Screenshot timed out after %ss", timeout_s)
return {"ok": False, "error": f"screenshot timed out after {timeout_s}s"}
except asyncio.CancelledError:
logger.warning("Screenshot cancelled (timeout or task cancellation)")
return {"ok": False, "error": f"screenshot timed out or cancelled (limit: {timeout_s}s)"}
return {
"ok": False,
"error": f"screenshot timed out or cancelled (limit: {timeout_s}s)",
}
except Exception as e:
logger.error("Screenshot failed: %s", e)
return {"ok": False, "error": str(e)}
+6 -6
View File
@@ -14,12 +14,12 @@ from __future__ import annotations
import functools
import json
import os
import time
import traceback
from datetime import datetime, timezone
from collections.abc import Callable
from datetime import UTC, datetime
from pathlib import Path
from typing import Any, Callable, TypeVar
from typing import Any, TypeVar
# Try to import context variable, but don't fail if not available
try:
@@ -43,7 +43,7 @@ def _get_log_file() -> Path:
"""Get the current log file, rotating daily."""
global _current_log_file, _log_file_date
today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
today = datetime.now(UTC).strftime("%Y-%m-%d")
if _log_file_date != today:
_log_file_date = today
_current_log_file = LOG_DIR / f"browser-{today}.jsonl"
@@ -95,7 +95,7 @@ def write_log(entry: dict[str, Any]) -> None:
"""Write a log entry to the current log file."""
try:
log_file = _get_log_file()
entry["ts"] = datetime.now(timezone.utc).isoformat()
entry["ts"] = datetime.now(UTC).isoformat()
entry["profile"] = _get_profile()
with open(log_file, "a", encoding="utf-8") as f:
@@ -283,4 +283,4 @@ def instrument_tool(tool_name: str) -> Callable[[F], F]:
# Import asyncio at the end to avoid circular import issues
import asyncio
import asyncio # noqa: E402

Some files were not shown because too many files have changed in this diff Show More