test: align stale tests with current behavior

This commit is contained in:
Richard Tang
2026-04-18 22:02:03 -07:00
parent 8e4468851c
commit c17205a453
5 changed files with 54 additions and 108 deletions
+4
View File
@@ -63,6 +63,10 @@ lint.isort.section-order = [
"local-folder",
]
[tool.pytest.ini_options]
addopts = "-m 'not live'"
markers = [
"live: Tests that call real external APIs (require credentials, never run in CI)",
]
filterwarnings = [
"ignore::DeprecationWarning:litellm.*"
]
+7 -4
View File
@@ -45,10 +45,13 @@ def _has_any_llm_key() -> bool:
return any(os.environ.get(k) for k in _LLM_KEY_ENV_VARS)
pytestmark = pytest.mark.skipif(
not _has_any_llm_key(),
reason="No LLM API key set; skipping live integration test",
)
pytestmark = [
pytest.mark.live,
pytest.mark.skipif(
not _has_any_llm_key(),
reason="No LLM API key set; skipping live integration test",
),
]
# ---------------------------------------------------------------------------
+4 -1
View File
@@ -600,7 +600,10 @@ async def test_subscribe_reflection_triggers_runs_housekeeping_for_both_scopes(
await asyncio.sleep(0.05)
assert len(sub_ids) == 2
assert unified_short.await_count == 3
# With 5 turns and _SHORT_REFLECT_TURN_INTERVAL=3 plus the 5-minute
# cooldown, reflections fire on count=1 (first run, no gate) and
# count=3 (turn interval hit). Counts 2, 4, 5 are all gated out.
assert unified_short.await_count == 2
unified_long.assert_not_awaited()
+2 -2
View File
@@ -99,7 +99,7 @@ class TestCmdSkillValidate:
class TestCmdSkillDoctor:
def test_defaults_pass_against_real_framework_skills(self):
"""All 7 framework default skills should be healthy (no mocking)."""
"""All 6 framework default skills should be healthy (no mocking)."""
args = Namespace(defaults=True, name=None, project_dir=None)
result = cmd_skill_doctor(args)
assert result == 0
@@ -355,7 +355,7 @@ class TestJsonFlag:
data = json.loads(out)
assert result == 0
assert "skills" in data
assert len(data["skills"]) == 7 # 7 framework default skills
assert len(data["skills"]) == 6 # 6 framework default skills
assert data["total_errors"] == 0
def test_search_json_registry_unavailable_exits_1(self, capsys):
+37 -101
View File
@@ -1,130 +1,66 @@
"""Phase 5 test: SSE filter drops worker noise from queen DM stream.
The queen DM SSE handler must drop events from parallel-worker streams
(``stream_id="worker:{uuid}"``) so that worker LLM deltas, tool calls,
and iteration events do not flood the user's chat tab. A small allowlist
of worker events is still passed through (SUBAGENT_REPORT,
EXECUTION_COMPLETED, EXECUTION_FAILED) so the frontend can render
fan-out / fan-in lifecycle.
The queen DM SSE handler drops events from worker streams both the
single-worker tag (``stream_id="worker"``) and the parallel-fan-out tag
(``stream_id="worker:{uuid}"``) so that worker LLM deltas, tool calls,
and iteration events do not flood the user's chat when the queen is in
the ``independent`` phase. A small allowlist of worker events still
passes through (SUBAGENT_REPORT, EXECUTION_COMPLETED, EXECUTION_FAILED)
so the frontend can render fan-out / fan-in lifecycle summaries.
We test the pure ``_is_worker_noise`` predicate by importing the SSE
handler module and exercising the inner function via a closure helper.
Phase-aware behavior (filter on vs off) lives in the SSE handler's
``_should_filter_worker_noise`` closure tested at the integration
level, not here. This file just exercises the pure
``_is_worker_noise`` predicate.
"""
from __future__ import annotations
from framework.host.event_bus import EventType
from framework.server.routes_events import _is_worker_noise
def _make_evt(stream_id: str, evt_type: str) -> dict:
def _make_evt(stream_id: str | None, evt_type: str) -> dict:
return {"stream_id": stream_id, "type": evt_type}
def test_queen_stream_events_pass_through() -> None:
"""Events from non-worker streams must always pass."""
from framework.server.routes_events import _WORKER_EVENT_ALLOWLIST # noqa: F401
# Recreate the predicate locally — it's a closure inside the handler,
# so we mirror its logic here. If the handler's logic changes, this
# test must be updated to match.
def is_worker_noise(evt: dict) -> bool:
sid = evt.get("stream_id") or ""
if not sid.startswith("worker:"):
return False
return evt.get("type") not in {
EventType.SUBAGENT_REPORT.value,
EventType.EXECUTION_COMPLETED.value,
EventType.EXECUTION_FAILED.value,
}
# Queen events
assert not is_worker_noise(_make_evt("queen", EventType.LLM_TEXT_DELTA.value))
assert not is_worker_noise(_make_evt("queen", EventType.TOOL_CALL_STARTED.value))
assert not is_worker_noise(_make_evt("overseer", EventType.LLM_TEXT_DELTA.value))
assert not is_worker_noise(_make_evt("", EventType.LLM_TEXT_DELTA.value))
assert not is_worker_noise(_make_evt(None, EventType.LLM_TEXT_DELTA.value))
assert not _is_worker_noise(_make_evt("queen", EventType.LLM_TEXT_DELTA.value))
assert not _is_worker_noise(_make_evt("queen", EventType.TOOL_CALL_STARTED.value))
assert not _is_worker_noise(_make_evt("overseer", EventType.LLM_TEXT_DELTA.value))
assert not _is_worker_noise(_make_evt("", EventType.LLM_TEXT_DELTA.value))
assert not _is_worker_noise(_make_evt(None, EventType.LLM_TEXT_DELTA.value))
def test_worker_llm_and_tool_events_are_filtered() -> None:
def is_worker_noise(evt: dict) -> bool:
sid = evt.get("stream_id") or ""
if not sid.startswith("worker:"):
return False
return evt.get("type") not in {
EventType.SUBAGENT_REPORT.value,
EventType.EXECUTION_COMPLETED.value,
EventType.EXECUTION_FAILED.value,
}
assert is_worker_noise(_make_evt("worker:abc123", EventType.LLM_TEXT_DELTA.value))
assert is_worker_noise(_make_evt("worker:abc123", EventType.TOOL_CALL_STARTED.value))
assert is_worker_noise(_make_evt("worker:xyz", EventType.TOOL_CALL_COMPLETED.value))
assert is_worker_noise(_make_evt("worker:xyz", EventType.NODE_LOOP_ITERATION.value))
"""Worker chatter is noise on both the singular and fan-out tags."""
# Parallel fan-out tag
assert _is_worker_noise(_make_evt("worker:abc123", EventType.LLM_TEXT_DELTA.value))
assert _is_worker_noise(_make_evt("worker:abc123", EventType.TOOL_CALL_STARTED.value))
assert _is_worker_noise(_make_evt("worker:xyz", EventType.TOOL_CALL_COMPLETED.value))
assert _is_worker_noise(_make_evt("worker:xyz", EventType.NODE_LOOP_ITERATION.value))
# Singular primary-worker tag
assert _is_worker_noise(_make_evt("worker", EventType.LLM_TEXT_DELTA.value))
assert _is_worker_noise(_make_evt("worker", EventType.TOOL_CALL_STARTED.value))
def test_worker_lifecycle_and_report_events_pass_through() -> None:
def is_worker_noise(evt: dict) -> bool:
sid = evt.get("stream_id") or ""
if not sid.startswith("worker:"):
return False
return evt.get("type") not in {
EventType.SUBAGENT_REPORT.value,
EventType.EXECUTION_COMPLETED.value,
EventType.EXECUTION_FAILED.value,
}
assert not is_worker_noise(_make_evt("worker:abc", EventType.SUBAGENT_REPORT.value))
assert not is_worker_noise(_make_evt("worker:abc", EventType.EXECUTION_COMPLETED.value))
assert not is_worker_noise(_make_evt("worker:abc", EventType.EXECUTION_FAILED.value))
"""Allowlisted lifecycle events survive the filter on both tags."""
# Parallel fan-out tag
assert not _is_worker_noise(_make_evt("worker:abc", EventType.SUBAGENT_REPORT.value))
assert not _is_worker_noise(_make_evt("worker:abc", EventType.EXECUTION_COMPLETED.value))
assert not _is_worker_noise(_make_evt("worker:abc", EventType.EXECUTION_FAILED.value))
# Singular primary-worker tag
assert not _is_worker_noise(_make_evt("worker", EventType.SUBAGENT_REPORT.value))
assert not _is_worker_noise(_make_evt("worker", EventType.EXECUTION_COMPLETED.value))
assert not _is_worker_noise(_make_evt("worker", EventType.EXECUTION_FAILED.value))
def test_handler_module_exposes_allowlist_constant() -> None:
"""Smoke test that the constant the handler closes over still exists."""
"""Smoke test that the allowlist constant the predicate closes over still exists."""
from framework.server.routes_events import _WORKER_EVENT_ALLOWLIST
assert EventType.SUBAGENT_REPORT.value in _WORKER_EVENT_ALLOWLIST
assert EventType.EXECUTION_COMPLETED.value in _WORKER_EVENT_ALLOWLIST
assert EventType.EXECUTION_FAILED.value in _WORKER_EVENT_ALLOWLIST
def test_loaded_worker_stream_id_singular_passes_through() -> None:
"""The loaded primary worker uses stream_id='worker' (no colon).
This is the stream tag run_agent_with_input passes to
ColonyRuntime.spawn. The SSE filter must NOT confuse it with the
parallel-fan-out 'worker:{uuid}' tag otherwise the user's main
chat-visible workstream gets dropped from the queen DM.
Regression test for: 'why worker message no longer goes to the
frontend' after migrating run_agent_with_input from
AgentHost.trigger to ColonyRuntime.spawn.
"""
from framework.server.routes_events import _is_worker_noise
# All of these are events from the LOADED worker (single primary
# worker spawned via run_agent_with_input). They must pass the
# filter — including high-frequency LLM deltas and tool calls,
# because the queen DM IS the visible chat for this worker.
for evt_type in [
EventType.LLM_TEXT_DELTA.value,
EventType.TOOL_CALL_STARTED.value,
EventType.TOOL_CALL_COMPLETED.value,
EventType.NODE_LOOP_ITERATION.value,
EventType.CLIENT_OUTPUT_DELTA.value,
EventType.EXECUTION_STARTED.value,
EventType.EXECUTION_COMPLETED.value,
]:
evt = {"stream_id": "worker", "type": evt_type}
assert not _is_worker_noise(evt), (
f"loaded-worker event {evt_type} with stream_id='worker' was "
"filtered as worker noise — this regresses the queen DM "
"primary worker chat path"
)
# Sanity: the parallel fan-out tag is still filtered.
assert _is_worker_noise(
{
"stream_id": "worker:abc123",
"type": EventType.LLM_TEXT_DELTA.value,
}
)