Files
deer-flow/backend/tests/test_update_agent_tool.py
T
yangzheli 59c4a3f0a4
Unit Tests / backend-unit-tests (push) Waiting to run
E2E Tests / e2e-tests (push) Waiting to run
Frontend Unit Tests / frontend-unit-tests (push) Waiting to run
Lint Check / lint (push) Waiting to run
Lint Check / lint-frontend (push) Waiting to run
feat(agent): add custom-agent self-updates with user isolation (#2713)
* feat(agent): add update_agent tool for in-chat custom-agent self-updates (#2616)

Custom agents had no built-in way to persist updates to their own SOUL.md /
config.yaml from a normal chat — `setup_agent` was only bound during the
bootstrap flow, so when the user asked the agent to refine its description
or personality, the agent would shell out via bash/write_file and the edits
landed in a temporary sandbox/tool workspace instead of
`{base_dir}/agents/{agent_name}/`.

Changes:
- New `update_agent` builtin tool with partial-update semantics (only the
  fields you pass are written) and atomic temp-file + os.replace writes so
  a failed update never corrupts existing SOUL.md / config.yaml.
- Lead agent now binds `update_agent` in the non-bootstrap path whenever
  `agent_name` is set in the runtime context. Default agent (no
  agent_name) and bootstrap flow are unchanged.
- New `<self_update>` system-prompt section is injected for custom agents,
  instructing them to use `update_agent` — and explicitly NOT bash /
  write_file — to persist self-updates.
- Tests: 11 new cases in `tests/test_update_agent_tool.py` covering
  validation (missing/invalid agent_name, unknown agent, no fields),
  partial updates (soul-only, description-only, skills=[] vs omitted),
  no-op detection, atomic-write safety, and AgentConfig round-tripping;
  plus 2 new cases in `tests/test_lead_agent_prompt.py` covering the
  self-update prompt section.
- Docs: updated backend/CLAUDE.md builtin tools list and tools.mdx
  (en/zh) with the new tool description.

* feat(agent): isolate custom agents per user

Store custom agent definitions under the effective user, keep legacy agents readable until migration, and cover API/tool/migration behavior with tests.

Co-authored-by: Cursor <cursoragent@cursor.com>

* feat: consistent write/delete targets & add --user-id to migration

---------

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-05 23:17:42 +08:00

311 lines
12 KiB
Python

"""Tests for update_agent tool — partial updates, atomic writes, and validation.
Resolves issue #2616: a custom agent must be able to persist updates to its
own SOUL.md / config.yaml from inside a normal chat (not only from bootstrap).
The tool writes per-user (``{base_dir}/users/{user_id}/agents/{name}/``) so
that one user's update cannot mutate another user's agent.
"""
from __future__ import annotations
from pathlib import Path
from types import SimpleNamespace
from unittest.mock import MagicMock, patch
import pytest
import yaml
from deerflow.config.agents_config import AgentConfig
from deerflow.tools.builtins.update_agent_tool import update_agent
DEFAULT_USER = "test-user-autouse" # matches the autouse fixture in tests/conftest.py
class _DummyRuntime(SimpleNamespace):
context: dict
tool_call_id: str
def _runtime(agent_name: str | None = "test-agent", tool_call_id: str = "call_1") -> _DummyRuntime:
return _DummyRuntime(context={"agent_name": agent_name} if agent_name is not None else {}, tool_call_id=tool_call_id)
def _make_paths_mock(tmp_path: Path) -> MagicMock:
paths = MagicMock()
paths.base_dir = tmp_path
paths.agent_dir = lambda name: tmp_path / "agents" / name
paths.agents_dir = tmp_path / "agents"
paths.user_agent_dir = lambda user_id, name: tmp_path / "users" / user_id / "agents" / name
paths.user_agents_dir = lambda user_id: tmp_path / "users" / user_id / "agents"
return paths
def _user_agent_dir(tmp_path: Path, name: str = "test-agent", user_id: str = DEFAULT_USER) -> Path:
return tmp_path / "users" / user_id / "agents" / name
def _seed_agent(
tmp_path: Path,
name: str = "test-agent",
*,
description: str = "old desc",
soul: str = "old soul",
skills: list[str] | None = None,
user_id: str = DEFAULT_USER,
) -> Path:
"""Create a baseline agent dir with config.yaml and SOUL.md for tests to mutate."""
agent_dir = _user_agent_dir(tmp_path, name, user_id=user_id)
agent_dir.mkdir(parents=True, exist_ok=True)
cfg: dict = {"name": name, "description": description}
if skills is not None:
cfg["skills"] = skills
(agent_dir / "config.yaml").write_text(yaml.safe_dump(cfg, sort_keys=False), encoding="utf-8")
(agent_dir / "SOUL.md").write_text(soul, encoding="utf-8")
return agent_dir
@pytest.fixture()
def patched_paths(tmp_path: Path):
paths_mock = _make_paths_mock(tmp_path)
with patch("deerflow.tools.builtins.update_agent_tool.get_paths", return_value=paths_mock):
# load_agent_config also calls get_paths(); patch the same target it uses.
with patch("deerflow.config.agents_config.get_paths", return_value=paths_mock):
yield paths_mock
@pytest.fixture()
def stub_app_config():
"""Stub get_app_config so model validation accepts only known names."""
fake = MagicMock()
fake.get_model_config.side_effect = lambda name: object() if name in {"gpt-known", "m1"} else None
with patch("deerflow.tools.builtins.update_agent_tool.get_app_config", return_value=fake):
yield fake
# --- Validation tests ---
def test_update_agent_rejects_missing_agent_name(patched_paths):
result = update_agent.func(runtime=_runtime(agent_name=None), soul="new soul")
msg = result.update["messages"][0]
assert "only available inside a custom agent's chat" in msg.content
def test_update_agent_rejects_invalid_agent_name(patched_paths):
result = update_agent.func(runtime=_runtime(agent_name="../../etc/passwd"), soul="x")
msg = result.update["messages"][0]
assert "Invalid agent name" in msg.content
def test_update_agent_rejects_unknown_agent(tmp_path, patched_paths):
result = update_agent.func(runtime=_runtime(agent_name="ghost"), soul="x")
msg = result.update["messages"][0]
assert "does not exist" in msg.content
assert not _user_agent_dir(tmp_path, "ghost").exists()
def test_update_agent_requires_at_least_one_field(tmp_path, patched_paths):
_seed_agent(tmp_path)
result = update_agent.func(runtime=_runtime())
msg = result.update["messages"][0]
assert "No fields provided" in msg.content
def test_update_agent_rejects_unknown_model(tmp_path, patched_paths, stub_app_config):
"""Copilot review: model must be validated against configured models before
being persisted; otherwise _resolve_model_name silently falls back to the
default and the user gets repeated warnings on every later turn."""
_seed_agent(tmp_path)
result = update_agent.func(runtime=_runtime(), model="not-in-config")
msg = result.update["messages"][0]
assert "Unknown model" in msg.content
cfg = yaml.safe_load((_user_agent_dir(tmp_path) / "config.yaml").read_text())
assert "model" not in cfg, "Invalid model must not have been written to config.yaml"
def test_update_agent_accepts_known_model(tmp_path, patched_paths, stub_app_config):
_seed_agent(tmp_path)
result = update_agent.func(runtime=_runtime(), model="gpt-known")
cfg = yaml.safe_load((_user_agent_dir(tmp_path) / "config.yaml").read_text())
assert cfg["model"] == "gpt-known"
assert "model" in result.update["messages"][0].content
# --- Partial update tests ---
def test_update_agent_updates_soul_only(tmp_path, patched_paths):
agent_dir = _seed_agent(tmp_path, description="keep me", soul="old soul")
result = update_agent.func(runtime=_runtime(), soul="brand new soul")
assert (agent_dir / "SOUL.md").read_text() == "brand new soul"
cfg = yaml.safe_load((agent_dir / "config.yaml").read_text())
assert cfg["description"] == "keep me", "description must be preserved"
assert "soul" in result.update["messages"][0].content
def test_update_agent_updates_description_only(tmp_path, patched_paths):
agent_dir = _seed_agent(tmp_path, description="old desc", soul="keep this soul")
result = update_agent.func(runtime=_runtime(), description="new desc")
cfg = yaml.safe_load((agent_dir / "config.yaml").read_text())
assert cfg["description"] == "new desc"
assert (agent_dir / "SOUL.md").read_text() == "keep this soul", "SOUL.md must be preserved"
assert "description" in result.update["messages"][0].content
def test_update_agent_skills_empty_list_disables_all(tmp_path, patched_paths):
agent_dir = _seed_agent(tmp_path, skills=["a", "b"])
result = update_agent.func(runtime=_runtime(), skills=[])
cfg = yaml.safe_load((agent_dir / "config.yaml").read_text())
assert cfg["skills"] == [], "empty list must persist as empty list (not be omitted)"
assert "skills" in result.update["messages"][0].content
def test_update_agent_skills_omitted_keeps_existing(tmp_path, patched_paths):
agent_dir = _seed_agent(tmp_path, skills=["alpha", "beta"])
update_agent.func(runtime=_runtime(), description="bumped")
cfg = yaml.safe_load((agent_dir / "config.yaml").read_text())
assert cfg["skills"] == ["alpha", "beta"], "omitting skills must preserve the existing whitelist"
def test_update_agent_no_op_when_values_match_existing(tmp_path, patched_paths):
_seed_agent(tmp_path, description="same")
result = update_agent.func(runtime=_runtime(), description="same")
assert "No changes applied" in result.update["messages"][0].content
def test_update_agent_forces_name_to_directory(tmp_path, patched_paths):
"""Copilot review: if the existing config.yaml has a drifted ``name`` field,
update_agent must rewrite it to match the directory name so on-disk state
stays consistent with the runtime context."""
agent_dir = _user_agent_dir(tmp_path)
agent_dir.mkdir(parents=True)
(agent_dir / "config.yaml").write_text(yaml.safe_dump({"name": "drifted-name", "description": "old"}, sort_keys=False), encoding="utf-8")
(agent_dir / "SOUL.md").write_text("soul", encoding="utf-8")
update_agent.func(runtime=_runtime(), description="bumped")
cfg = yaml.safe_load((agent_dir / "config.yaml").read_text())
assert cfg["name"] == "test-agent", "config.yaml name must follow the directory name, not legacy yaml content"
# --- Atomicity tests ---
def test_update_agent_failure_preserves_existing_files(tmp_path, patched_paths):
agent_dir = _seed_agent(tmp_path, soul="original soul")
real_replace = Path.replace
def _explode(self, target):
if str(target).endswith("SOUL.md"):
raise OSError("disk full")
return real_replace(self, target)
with patch.object(Path, "replace", _explode):
result = update_agent.func(runtime=_runtime(), soul="poisoned content")
assert (agent_dir / "SOUL.md").read_text() == "original soul", "atomic write must not corrupt existing SOUL.md"
assert "Error" in result.update["messages"][0].content
leftover_tmps = list(agent_dir.glob("*.tmp"))
assert leftover_tmps == [], "temp files must be cleaned up on failure"
def test_update_agent_soul_failure_does_not_replace_config(tmp_path, patched_paths):
"""Copilot review: if both config.yaml and SOUL.md are scheduled to be
written and SOUL.md staging fails *before* any rename, config.yaml must
NOT be replaced. The fix stages every temp file first and only renames
after all temps exist on disk."""
agent_dir = _seed_agent(tmp_path, description="original-desc", soul="original soul")
real_named_temp_file = __import__("tempfile").NamedTemporaryFile
call_count = {"n": 0}
def _explode_on_soul(*args, **kwargs):
# Inspect target dir + suffix; the SOUL temp file is the second one we stage.
call_count["n"] += 1
if call_count["n"] >= 2:
raise OSError("disk full while staging SOUL.md")
return real_named_temp_file(*args, **kwargs)
with patch("deerflow.tools.builtins.update_agent_tool.tempfile.NamedTemporaryFile", side_effect=_explode_on_soul):
result = update_agent.func(runtime=_runtime(), description="new-desc", soul="new soul")
cfg = yaml.safe_load((agent_dir / "config.yaml").read_text())
assert cfg["description"] == "original-desc", "config.yaml must not be replaced when SOUL.md staging fails"
assert (agent_dir / "SOUL.md").read_text() == "original soul"
assert "Error" in result.update["messages"][0].content
assert list(agent_dir.glob("*.tmp")) == [], "staged config.yaml temp must be cleaned up on SOUL.md failure"
# --- Per-user isolation ---
def test_update_agent_only_writes_under_current_user(tmp_path, patched_paths):
"""An update from user 'alice' must never touch user 'bob's agent files."""
from deerflow.runtime.user_context import reset_current_user, set_current_user
# Seed an agent for both users with the same name.
alice_dir = _seed_agent(tmp_path, name="shared", description="alice-desc", soul="alice soul", user_id="alice")
bob_dir = _seed_agent(tmp_path, name="shared", description="bob-desc", soul="bob soul", user_id="bob")
# Override the autouse contextvar so update_agent runs as Alice.
token = set_current_user(SimpleNamespace(id="alice"))
try:
update_agent.func(runtime=_runtime(agent_name="shared"), description="alice-bumped")
finally:
reset_current_user(token)
alice_cfg = yaml.safe_load((alice_dir / "config.yaml").read_text())
bob_cfg = yaml.safe_load((bob_dir / "config.yaml").read_text())
assert alice_cfg["description"] == "alice-bumped"
assert bob_cfg["description"] == "bob-desc", "bob's config.yaml must not have been touched"
assert (bob_dir / "SOUL.md").read_text() == "bob soul"
# --- Loader passthrough sanity check ---
def test_update_agent_round_trips_known_fields(tmp_path, patched_paths):
"""update_agent reads through load_agent_config so all fields the loader
knows about (name, description, model, tool_groups, skills) round-trip
on a partial update.
Note: ``load_agent_config`` strips unknown fields before constructing
AgentConfig, so legacy/extra YAML keys are NOT preserved across
updates — by design.
"""
_seed_agent(tmp_path, description="legacy")
fake_cfg = AgentConfig(name="test-agent", description="legacy", skills=["s1"], tool_groups=["g1"], model="m1")
fake_app_config = MagicMock()
fake_app_config.get_model_config.return_value = object()
with patch("deerflow.tools.builtins.update_agent_tool.load_agent_config", return_value=fake_cfg):
with patch("deerflow.tools.builtins.update_agent_tool.get_app_config", return_value=fake_app_config):
update_agent.func(runtime=_runtime(), description="bumped")
cfg = yaml.safe_load((_user_agent_dir(tmp_path) / "config.yaml").read_text())
assert cfg["description"] == "bumped"
assert cfg["skills"] == ["s1"]
assert cfg["tool_groups"] == ["g1"]
assert cfg["model"] == "m1"