Merge branch 'feat/tasks-system'

2026-04-27 10:55:50 -07:00
parent 82ce6bed68 facd919371
commit 9eeba74851
7 changed files with 398 additions and 46 deletions
@@ -226,9 +226,9 @@ another pass, kick it off with run_parallel_workers; otherwise stay \
 conversational.

 If the review itself is multi-step (e.g. "verify each worker's output, \
-then draft a summary, then propose next steps"), you may use \
-`task_create` / `task_update` to keep yourself organised. Skip them \
-for a single-paragraph summary.
+then draft a summary, then propose next steps"), lay it out upfront \
+with `task_create_batch` and walk through with `task_update`. Skip the \
+ceremony for a single-paragraph summary.
 """


@@ -240,10 +240,16 @@ _queen_tools_independent = """
 # Tools (INDEPENDENT mode)

 ## Planning — use FIRST for multi-step work
- task_create / task_update / task_list / task_get — When a request \
-has 3+ atomic steps, your FIRST tool call is `task_create` (one task \
-per step) BEFORE you touch any other tool. See "Independent execution" \
-for the per-step flow and granularity rule.
+- task_create_batch — When a request has 3+ atomic steps, your FIRST \
+tool call is `task_create_batch` with one entry per step (atomic, \
+one round-trip). Use this for the upfront plan, NOT five separate \
+`task_create` calls.
+- task_create — One-off mid-run additions when you discover \
+unplanned work AFTER the initial plan is laid out.
+- task_update / task_list / task_get — Mark progress, inspect, or \
+re-read state.
+
+See "Independent execution" for the per-step flow and granularity rule.

 ## File I/O (coder-tools MCP)
 - read_file, write_file, edit_file, hashline_edit, list_directory, \
@@ -413,19 +419,21 @@ _queen_behavior_independent = """
 ## Independent execution

 You are the agent. **For multi-step work (3+ atomic actions): your FIRST \
-tool call is `task_create`** — one task per atomic action, before you \
-touch any other tool. Then work the list one task at a time:
+tool call is `task_create_batch`** with one entry per atomic action, \
+before you touch any other tool. (One call, atomic — not N separate \
+`task_create` calls.) Then work the list one task at a time:

 1. `task_update` → in_progress before you start the step.
 2. Do one real inline instance — open the browser, call the real API, \
 write to the real file. If the action is irreversible or touches \
 shared systems, show and confirm before executing. Report concrete \
 evidence (actual output, what worked / failed) after the run.
-3. `task_update` → completed THE MOMENT it's done. **Never batch up \
-multiple completions to flush at the end.** `completed` transitions \
-are the user's progress heartbeat in the right-rail panel — without \
-them, the panel shows a hung spinner no matter how much real work \
-you got done.
+3. `task_update` → completed THE MOMENT it's done. **Do not let \
+multiple finished tasks pile up unmarked.** There is no batch update \
+tool by design — each `completed` transition is a discrete progress \
+heartbeat in the user's right-rail panel. Without those transitions \
+the panel shows a hung spinner no matter how much real work you got \
+done.

 **Granularity: one task per atomic action, not one umbrella per project.** \
 Replying to 5 posts is 5 tasks, not 1. Crawling 3 sites is 3 tasks. \
@@ -5,8 +5,9 @@ See temp/tasks-system-implementation-plan.md for the design. Two list types:
    colony:{colony_id}            -- the queen's spawn-plan template
    session:{agent_id}:{sess_id}  -- per-session working list

-Each agent operates on its own session list via the four task tools
-(`task_create`, `task_update`, `task_list`, `task_get`). The colony
+Each agent operates on its own session list via the session task tools
+(`task_create_batch`, `task_create`, `task_update`, `task_list`,
+`task_get`). The colony
 template is addressed only by the queen's `colony_template_*` tools and by
 the UI/event surface.
 """
@@ -86,7 +86,9 @@ def build_reminder(records: list[TaskRecord]) -> str:
        "before starting the next step. Don't batch completions.",
        "  - If you've finished work that wasn't on the list, add a "
        "task_create + task_update completed pair so the panel reflects it.",
-        "  - If you're umbrella-tracking ('reply to all posts' as one task), break it into one task per atomic action.",
+        "  - If you're umbrella-tracking ('reply to all posts' as one task), "
+        "break it into one task per atomic action — use `task_create_batch` "
+        "with one entry per action.",
    ]
    if in_progress:
        bullets.append(
@@ -164,6 +164,27 @@ class TaskStore:

    # ----- task CRUD ----------------------------------------------------

+    async def create_tasks_batch(
+        self,
+        task_list_id: str,
+        specs: list[dict[str, Any]],
+    ) -> list[TaskRecord]:
+        """Atomically create N tasks under a single list-lock acquisition.
+
+        Each spec is a dict with keys: subject (required), description,
+        active_form, owner, metadata. Ids are assigned sequentially and
+        contiguously — if any task fails to write, an exception is raised
+        and the whole batch is rolled back (file unlinked, high-water-mark
+        kept at the prior value).
+
+        Atomic-or-none semantics matter for the tool surface: a failed
+        partial batch would leave the LLM reasoning about cleanup, which
+        defeats the point of batching as a single decision.
+        """
+        return await asyncio.to_thread(
+            self._create_tasks_batch_sync, task_list_id, specs
+        )
+
    async def create_task(
        self,
        task_list_id: str,
@@ -432,6 +453,70 @@ class TaskStore:
                self._write_highwatermark_sync(task_list_id, new_id)
            return record

+    def _create_tasks_batch_sync(
+        self,
+        task_list_id: str,
+        specs: list[dict[str, Any]],
+    ) -> list[TaskRecord]:
+        if not specs:
+            return []
+        # Validate up-front so we don't half-create on a malformed entry.
+        for i, spec in enumerate(specs):
+            subj = spec.get("subject")
+            if not isinstance(subj, str) or not subj.strip():
+                raise ValueError(f"specs[{i}].subject must be a non-empty string")
+
+        with self._list_lock(task_list_id):
+            # Same lazy meta backfill as _create_task_sync.
+            if not self._meta_path(task_list_id).exists():
+                inferred_role = (
+                    TaskListRole.TEMPLATE
+                    if task_list_id.startswith("colony:")
+                    else TaskListRole.SESSION
+                )
+                self._write_meta_sync(
+                    task_list_id,
+                    TaskListMeta(task_list_id=task_list_id, role=inferred_role),
+                )
+
+            base_id = self._next_id_sync(task_list_id)
+            now = time.time()
+            records: list[TaskRecord] = []
+            for offset, spec in enumerate(specs):
+                rec = TaskRecord(
+                    id=base_id + offset,
+                    subject=spec["subject"],
+                    description=spec.get("description", ""),
+                    active_form=spec.get("active_form"),
+                    owner=spec.get("owner"),
+                    status=TaskStatus.PENDING,
+                    metadata=dict(spec.get("metadata") or {}),
+                    created_at=now,
+                    updated_at=now,
+                )
+                records.append(rec)
+
+            # Write all task files; on any failure, unlink everything we
+            # wrote so far and re-raise. High-water-mark is bumped only
+            # after a successful full-batch write.
+            written: list[Path] = []
+            try:
+                for rec in records:
+                    self._write_task_sync(task_list_id, rec)
+                    written.append(self._task_path(task_list_id, rec.id))
+            except Exception:
+                for path in written:
+                    try:
+                        path.unlink(missing_ok=True)
+                    except OSError:
+                        logger.warning("Failed to roll back batch task at %s", path, exc_info=True)
+                raise
+
+            highest = records[-1].id
+            if highest > self._read_highwatermark_sync(task_list_id):
+                self._write_highwatermark_sync(task_list_id, highest)
+            return records
+
    # ----- update -------------------------------------------------------

    def _update_task_sync(
@@ -159,6 +159,133 @@ async def test_task_not_found_is_not_error(
 # ---------------------------------------------------------------------------


+@pytest.mark.asyncio
+async def test_create_batch_creates_n_tasks_atomically(
+    registry_with_session_tools: ToolRegistry,
+) -> None:
+    reg = registry_with_session_tools
+    list_id = "session:agent_a:sess_1"
+    token = _set_ctx(agent_id="agent_a", task_list_id=list_id)
+    try:
+        result = await _invoke(
+            reg,
+            "task_create_batch",
+            tasks=[
+                {"subject": "step 1", "active_form": "Doing 1"},
+                {"subject": "step 2"},
+                {"subject": "step 3"},
+            ],
+        )
+        assert result.is_error is False
+        body = json.loads(result.content)
+        assert body["success"] is True
+        assert body["task_ids"] == [1, 2, 3]
+        # Compact summary message — references first id and the range.
+        assert "#1-#3" in body["message"] or "#1, #2, #3" in body["message"]
+        assert "Mark #1 in_progress" in body["message"]
+
+        # Sanity: list shows all three.
+        body2 = json.loads((await _invoke(reg, "task_list")).content)
+        assert body2["count"] == 3
+    finally:
+        ToolRegistry.reset_execution_context(token)
+
+
+@pytest.mark.asyncio
+async def test_create_batch_rejects_empty(
+    registry_with_session_tools: ToolRegistry,
+) -> None:
+    reg = registry_with_session_tools
+    token = _set_ctx(agent_id="a", task_list_id="session:a:s")
+    try:
+        result = await _invoke(reg, "task_create_batch", tasks=[])
+        body = json.loads(result.content)
+        assert body["success"] is False
+        assert "non-empty" in body["error"]
+    finally:
+        ToolRegistry.reset_execution_context(token)
+
+
+@pytest.mark.asyncio
+async def test_create_batch_rejects_malformed_spec_atomically(
+    registry_with_session_tools: ToolRegistry,
+) -> None:
+    """A bad subject in the middle of the batch must reject the whole
+    batch — not leave partial state on disk."""
+    reg = registry_with_session_tools
+    token = _set_ctx(agent_id="a", task_list_id="session:a:s")
+    try:
+        result = await _invoke(
+            reg,
+            "task_create_batch",
+            tasks=[{"subject": "good"}, {"subject": ""}],
+        )
+        body = json.loads(result.content)
+        assert body["success"] is False
+        # Confirm zero tasks landed.
+        body2 = json.loads((await _invoke(reg, "task_list")).content)
+        assert body2["count"] == 0
+    finally:
+        ToolRegistry.reset_execution_context(token)
+
+
+@pytest.mark.asyncio
+async def test_create_batch_hook_blocks_rolls_back_whole_batch(
+    registry_with_session_tools: ToolRegistry,
+) -> None:
+    """If a task_created hook blocks even one task in the batch, the
+    entire batch must roll back."""
+    reg = registry_with_session_tools
+
+    # Block on the second task only.
+    def selective_blocker(ctx) -> None:
+        if ctx.task.subject == "block me":
+            raise BlockingHookError("policy")
+
+    register_hook(HOOK_TASK_CREATED, selective_blocker)
+
+    token = _set_ctx(agent_id="a", task_list_id="session:a:s")
+    try:
+        result = await _invoke(
+            reg,
+            "task_create_batch",
+            tasks=[
+                {"subject": "ok 1"},
+                {"subject": "block me"},
+                {"subject": "ok 3"},
+            ],
+        )
+        body = json.loads(result.content)
+        assert body["success"] is False
+        assert "rolled back" in body["error"]
+        # All three rolled back.
+        body2 = json.loads((await _invoke(reg, "task_list")).content)
+        assert body2["count"] == 0
+    finally:
+        ToolRegistry.reset_execution_context(token)
+
+
+@pytest.mark.asyncio
+async def test_create_batch_then_single_create_keeps_id_monotonic(
+    registry_with_session_tools: ToolRegistry,
+) -> None:
+    """task_create_batch uses sequential ids; a follow-up task_create
+    should pick up at the next id after the batch's highest."""
+    reg = registry_with_session_tools
+    token = _set_ctx(agent_id="a", task_list_id="session:a:s")
+    try:
+        await _invoke(
+            reg,
+            "task_create_batch",
+            tasks=[{"subject": "a"}, {"subject": "b"}, {"subject": "c"}],
+        )
+        result = await _invoke(reg, "task_create", subject="d")
+        body = json.loads(result.content)
+        assert body["task_id"] == 4
+    finally:
+        ToolRegistry.reset_execution_context(token)
+
+
@pytest.mark.asyncio
 async def test_completion_suffix_points_to_next_pending(
    registry_with_session_tools: ToolRegistry,
@@ -117,31 +117,68 @@ def _get_schema() -> dict[str, Any]:
    }


+def _create_batch_schema() -> dict[str, Any]:
+    return {
+        "type": "object",
+        "properties": {
+            "tasks": {
+                "type": "array",
+                "minItems": 1,
+                "description": (
+                    "Array of task specs. Each becomes one task with a "
+                    "sequential id. Atomic — all created or none."
+                ),
+                "items": {
+                    "type": "object",
+                    "properties": {
+                        "subject": {
+                            "type": "string",
+                            "description": "Imperative title (e.g. 'Crawl target URL').",
+                        },
+                        "description": {"type": "string"},
+                        "active_form": {
+                            "type": "string",
+                            "description": (
+                                "Present-continuous label shown while in_progress."
+                            ),
+                        },
+                        "metadata": {"type": "object"},
+                    },
+                    "required": ["subject"],
+                },
+            }
+        },
+        "required": ["tasks"],
+    }
+
+
 # ---------------------------------------------------------------------------
 # Tool descriptions
 # ---------------------------------------------------------------------------

 _CREATE_DESC = (
-    "Create a task on your own session task list to break down and track "
-    "multi-step work. Use when you have 3+ distinct steps, non-trivial "
-    "planning, or the user explicitly asks for tracked progress. Capture "
-    "tasks IMMEDIATELY after receiving instructions — don't narrate intent. "
-    "DO NOT use this for: a single trivial task, purely conversational "
-    "replies, greetings, or work that fits in one tool call. The user "
-    "sees this list live in the right rail.\n\n"
+    "Create ONE task on your own session task list. Use this for one-off "
+    "mid-run additions when you discover unplanned work after the initial "
+    "plan is laid out.\n\n"
+    "**For laying out a multi-step plan upfront, use `task_create_batch` "
+    "instead** — one tool call with all the steps is cheaper and atomic.\n\n"
    "Fields:\n"
-    "- subject: short imperative title (e.g. 'Crawl target URLs').\n"
+    "- subject: short imperative title (e.g. 'Crawl target URL').\n"
    "- description: optional, slightly longer 'what to do' note.\n"
    "- active_form: present-continuous label shown while in_progress (e.g. "
-    "'Crawling target URLs'). If omitted, the spinner shows the subject.\n"
+    "'Crawling target URL'). If omitted, the spinner shows the subject.\n"
    "- metadata: optional KV. Set _internal=true to hide from task_list."
 )

 _UPDATE_DESC = (
-    "Update a task on your own session task list. Workflow:\n"
+    "Update ONE task on your own session task list. There is no batch "
+    "update tool by design — every `completed` transition is a discrete "
+    "progress signal to the user.\n\n"
+    "Workflow:\n"
    "- Mark a task `in_progress` BEFORE you start working on it.\n"
-    "- Mark it `completed` AS SOON as you finish it — never batch up "
-    "  multiple completions to flush at the end.\n"
+    "- Mark it `completed` AS SOON as you finish it — do not let "
+    "multiple finished tasks pile up unmarked before flushing them at "
+    "the end of the run.\n"
    "- Set status='deleted' to drop a task that's no longer relevant.\n\n"
    "ONLY mark `completed` when the task is FULLY done. If you hit errors, "
    "blockers, or partial state, keep it `in_progress` and create a new "
@@ -163,6 +200,15 @@ _GET_DESC = (
    "task before updating it if you're not sure of current fields."
 )

+_CREATE_BATCH_DESC = (
+    "Create N tasks at once on your own session task list. **Use this "
+    "FIRST when laying out a multi-step plan upfront** — replying to 5 "
+    "posts is one `task_create_batch` with 5 entries, not 5 separate "
+    "`task_create` calls. Atomic: all-or-none. Use single `task_create` "
+    "for one-off mid-run additions when you discover unplanned work, "
+    "not for the initial plan."
+)
+

 # ---------------------------------------------------------------------------
 # Executors
@@ -227,6 +273,77 @@ def _make_create_executor(store: TaskStore):
    return execute


+def _make_create_batch_executor(store: TaskStore):
+    async def execute(inputs: dict) -> dict[str, Any]:
+        list_id = _resolve_list_id()
+        if not list_id:
+            return {"success": False, "error": "No task_list_id resolved for this agent."}
+        agent_id = current_agent_id() or ""
+        specs = inputs.get("tasks") or []
+        if not isinstance(specs, list) or not specs:
+            return {
+                "success": False,
+                "error": "task_create_batch requires a non-empty `tasks` array.",
+            }
+        # Storage layer validates subject; surface its error as a soft
+        # tool_result so sibling tools don't cancel.
+        try:
+            recs = await store.create_tasks_batch(list_id, specs)
+        except ValueError as exc:
+            return {"success": False, "error": str(exc)}
+
+        # Run task_created hooks per task; blocking on any aborts the
+        # whole batch (delete every record we just wrote, return error).
+        for rec in recs:
+            try:
+                await run_task_hooks(
+                    HOOK_TASK_CREATED,
+                    task_list_id=list_id,
+                    task=rec,
+                    agent_id=agent_id,
+                )
+            except BlockingHookError as exc:
+                logger.warning(
+                    "task_created hook blocked batch on task #%s: %s",
+                    rec.id,
+                    exc,
+                )
+                for r in recs:
+                    await store.delete_task(list_id, r.id)
+                return {
+                    "success": False,
+                    "error": (
+                        f"Hook blocked task #{rec.id} ({rec.subject!r}); "
+                        f"entire batch rolled back: {exc}"
+                    ),
+                }
+
+        for rec in recs:
+            await emit_task_created(task_list_id=list_id, record=rec)
+
+        ids = [r.id for r in recs]
+        # Compact summary message — don't flood the conversation with
+        # one line per created task.
+        if len(ids) == 1:
+            range_label = f"#{ids[0]}"
+        elif ids == list(range(ids[0], ids[-1] + 1)):
+            range_label = f"#{ids[0]}-#{ids[-1]}"
+        else:
+            range_label = ", ".join(f"#{i}" for i in ids)
+        return {
+            "success": True,
+            "task_list_id": list_id,
+            "task_ids": ids,
+            "message": (
+                f"Created {len(ids)} task(s): {range_label}. "
+                f"Mark #{ids[0]} in_progress before starting it."
+            ),
+            "tasks": [_serialize_task(r) for r in recs],
+        }
+
+    return execute
+
+
 def _make_update_executor(store: TaskStore):
    async def execute(inputs: dict) -> dict[str, Any]:
        list_id = _resolve_list_id()
@@ -426,9 +543,18 @@ class _OwnerSentinel:  # noqa: N801 — internal sentinel class
 def build_session_tools(
    store: TaskStore | None = None,
 ) -> list[tuple[Tool, Any]]:
-    """Build (Tool, executor) pairs for the four session task tools."""
+    """Build (Tool, executor) pairs for the session task tools."""
    s = store or get_task_store()
    return [
+        (
+            Tool(
+                name="task_create_batch",
+                description=_CREATE_BATCH_DESC,
+                parameters=_create_batch_schema(),
+                concurrency_safe=False,
+            ),
+            _make_create_batch_executor(s),
+        ),
        (
            Tool(
                name="task_create",
@@ -108,6 +108,25 @@ function TaskListPanelInner({ title, variant = "rail", onClose }: TaskListPanelP
          </p>
        ) : (
          <>
+            {/* Completed sits above Active so finished tasks stay visually
+             *  "above" the work that came after them — preserves the order
+             *  the user originally saw before the status flipped. */}
+            <Section
+              label="Completed"
+              count={buckets.completed.length}
+              open={completedOpen}
+              onToggle={() => setCompletedOpen((v) => !v)}
+            >
+              {buckets.completed.map((t) => (
+                <RefItem
+                  key={t.id}
+                  task={t}
+                  itemRefs={itemRefs}
+                  unresolved={[]}
+                  onJumpToBlocker={handleJumpToBlocker}
+                />
+              ))}
+            </Section>
            <Section
              label="Active"
              count={buckets.active.length}
@@ -140,22 +159,6 @@ function TaskListPanelInner({ title, variant = "rail", onClose }: TaskListPanelP
                />
              ))}
            </Section>
-            <Section
-              label="Completed"
-              count={buckets.completed.length}
-              open={completedOpen}
-              onToggle={() => setCompletedOpen((v) => !v)}
-            >
-              {buckets.completed.map((t) => (
-                <RefItem
-                  key={t.id}
-                  task={t}
-                  itemRefs={itemRefs}
-                  unresolved={[]}
-                  onJumpToBlocker={handleJumpToBlocker}
-                />
-              ))}
-            </Section>
          </>
        )}
      </div>