feat: move thinking tags handling on frontend

2026-04-03 18:20:45 -07:00
parent 043c79e0e4
commit 2b263f6e10
8 changed files with 171 additions and 217 deletions
@@ -0,0 +1,8 @@
+{"type": "connection", "event": "connect", "ts": "2026-04-04T01:10:38.245667+00:00", "profile": "default"}
+{"type": "connection", "event": "hello", "details": {"version": "1.0"}, "ts": "2026-04-04T01:10:38.247207+00:00", "profile": "default"}
+{"type": "connection", "event": "disconnect", "ts": "2026-04-04T01:11:57.148273+00:00", "profile": "default"}
+{"type": "connection", "event": "connect", "ts": "2026-04-04T01:12:09.162378+00:00", "profile": "default"}
+{"type": "connection", "event": "hello", "details": {"version": "1.0"}, "ts": "2026-04-04T01:12:09.163899+00:00", "profile": "default"}
+{"type": "connection", "event": "disconnect", "ts": "2026-04-04T01:15:12.826042+00:00", "profile": "default"}
+{"type": "connection", "event": "connect", "ts": "2026-04-04T01:15:30.842533+00:00", "profile": "default"}
+{"type": "connection", "event": "hello", "details": {"version": "1.0"}, "ts": "2026-04-04T01:15:30.845025+00:00", "profile": "default"}
@@ -1349,7 +1349,6 @@ queen_node = NodeSpec(
    output_keys=[],  # Queen should never have this
    nullable_output_keys=[],  # Queen should never have this
    skip_judge=True,  # Queen is a conversational agent; suppress tool-use pressure feedback
-    thinking_tags=["situation", "monologue"],
    tools=sorted(
        set(
            _QUEEN_PLANNING_TOOLS
@@ -1,151 +0,0 @@
-"""Streaming XML tag filter for thinking tags.
-
-Strips configured XML tags (e.g. ``<situation>``, ``<monologue>``) from
-a chunked text stream while preserving the full text for conversation
-storage.  The filter is stateful — it handles chunks that split mid-tag.
-
-Only touches text content.  Tool calls flow through a completely separate
-code path and are never affected by this filter.
-"""
-
-from __future__ import annotations
-
-from collections.abc import Sequence
-
-
-class ThinkingTagFilter:
-    """Strips XML thinking tags from a streaming text output.
-
-    Buffers content inside configured tags and yields only the visible
-    content outside those tags.  Handles chunks that split across tag
-    boundaries (e.g. a chunk ending with ``"<mono"``).
-
-    Args:
-        tag_names: Tag names to strip (e.g. ``["situation", "monologue"]``).
-    """
-
-    def __init__(self, tag_names: Sequence[str]) -> None:
-        self._tag_names: set[str] = set(tag_names)
-        # Pre-compute all opening and closing tag strings for matching.
-        self._open_tags: dict[str, str] = {name: f"<{name}>" for name in tag_names}
-        self._close_tags: dict[str, str] = {name: f"</{name}>" for name in tag_names}
-        # All possible tag prefixes for partial-match detection.
-        self._all_tag_strings: list[str] = sorted(
-            list(self._open_tags.values()) + list(self._close_tags.values()),
-            key=len,
-            reverse=True,
-        )
-
-        self._inside_tag: str | None = None  # Which tag we're inside, or None.
-        self._pending: str = ""  # Chars that might be a partial tag.
-        self._visible_text: str = ""  # Accumulated visible snapshot.
-
-    def feed(self, chunk: str) -> str:
-        """Feed a text chunk and return the visible portion.
-
-        Characters inside thinking tags are suppressed.  Characters that
-        *might* be the start of a tag are buffered until the next chunk
-        resolves the ambiguity.
-
-        Returns:
-            The portion of text that should be shown to the user.
-        """
-        buf = self._pending + chunk
-        self._pending = ""
-        visible = self._process(buf)
-        self._visible_text += visible
-        return visible
-
-    @property
-    def visible_snapshot(self) -> str:
-        """Accumulated visible text so far (for the snapshot field)."""
-        return self._visible_text
-
-    def flush(self) -> str:
-        """Flush any pending partial tag as visible text.
-
-        Called at end-of-stream.  If characters were buffered because they
-        looked like the start of a tag but the stream ended before the tag
-        completed, they are emitted as visible text (graceful degradation).
-        """
-        result = ""
-        if self._pending:
-            if self._inside_tag is None:
-                result = self._pending
-            # If inside a tag, discard pending (unclosed tag content).
-            self._pending = ""
-        self._visible_text += result
-        return result
-
-    # ------------------------------------------------------------------
-    # Internal processing
-    # ------------------------------------------------------------------
-
-    def _process(self, buf: str) -> str:
-        """Process a buffer, returning visible text and updating state."""
-        visible_parts: list[str] = []
-        i = 0
-        n = len(buf)
-
-        while i < n:
-            if self._inside_tag is not None:
-                # Inside a tag — look for the closing tag.
-                close = self._close_tags[self._inside_tag]
-                close_pos = buf.find(close, i)
-                if close_pos == -1:
-                    # Closing tag might be split across chunks.
-                    # Check if the tail of buf is a prefix of the close tag.
-                    tail_len = min(len(close) - 1, n - i)
-                    for tl in range(tail_len, 0, -1):
-                        if close.startswith(buf[n - tl :]):
-                            self._pending = buf[n - tl :]
-                            i = n
-                            break
-                    else:
-                        # No partial match — discard everything (inside tag).
-                        i = n
-                    break
-                else:
-                    # Found closing tag — skip past it and exit tag.
-                    i = close_pos + len(close)
-                    self._inside_tag = None
-            else:
-                # Outside any tag — look for '<'.
-                lt_pos = buf.find("<", i)
-                if lt_pos == -1:
-                    # No '<' — everything is visible.
-                    visible_parts.append(buf[i:])
-                    i = n
-                else:
-                    # Emit text before the '<'.
-                    if lt_pos > i:
-                        visible_parts.append(buf[i:lt_pos])
-                    # Try to match an opening tag at this position.
-                    remainder = buf[lt_pos:]
-                    matched = False
-                    for name, open_tag in self._open_tags.items():
-                        if remainder.startswith(open_tag):
-                            # Full opening tag found — enter tag.
-                            self._inside_tag = name
-                            i = lt_pos + len(open_tag)
-                            matched = True
-                            break
-                    if not matched:
-                        # Check if remainder could be a partial tag prefix.
-                        if self._is_partial_tag_prefix(remainder):
-                            # Buffer and wait for next chunk.
-                            self._pending = remainder
-                            i = n
-                        else:
-                            # Not a known tag — '<' is visible text.
-                            visible_parts.append("<")
-                            i = lt_pos + 1
-
-        return "".join(visible_parts)
-
-    def _is_partial_tag_prefix(self, text: str) -> bool:
-        """Check if text could be the start of a known tag string."""
-        for tag_str in self._all_tag_strings:
-            if tag_str.startswith(text) and len(text) < len(tag_str):
-                return True
-        return False
@@ -2173,17 +2173,6 @@ class EventLoopNode(NodeProtocol):
            ) -> None:
                nonlocal accumulated_text, _stream_error

-                # Thinking tag filter — strips configured XML tags from
-                # client-facing output while keeping full text in conversation
-                # history.  Only created when thinking_tags is set on the node.
-                _tag_filter = None
-                if ctx.thinking_tags:
-                    from framework.graph.event_loop.thinking_tag_filter import (
-                        ThinkingTagFilter,
-                    )
-
-                    _tag_filter = ThinkingTagFilter(ctx.thinking_tags)
-
                async for event in ctx.llm.stream(
                    messages=_msgs,
                    system=conversation.system_prompt,
@@ -2191,29 +2180,17 @@ class EventLoopNode(NodeProtocol):
                    max_tokens=ctx.max_tokens,
                ):
                    if isinstance(event, TextDeltaEvent):
-                        # Full text (with tags) kept for conversation storage.
                        accumulated_text = event.snapshot
-
-                        if _tag_filter:
-                            visible_chunk = _tag_filter.feed(event.content)
-                            visible_snapshot = _tag_filter.visible_snapshot
-                        else:
-                            visible_chunk = event.content
-                            visible_snapshot = event.snapshot
-
-                        # Only publish if there's visible content (skip chunks
-                        # that are entirely inside thinking tags).
-                        if visible_chunk:
-                            await self._publish_text_delta(
-                                stream_id,
-                                node_id,
-                                visible_chunk,
-                                visible_snapshot,
-                                ctx,
-                                execution_id,
-                                iteration=iteration,
-                                inner_turn=inner_turn,
-                            )
+                        await self._publish_text_delta(
+                            stream_id,
+                            node_id,
+                            event.content,
+                            event.snapshot,
+                            ctx,
+                            execution_id,
+                            iteration=iteration,
+                            inner_turn=inner_turn,
+                        )

                    elif isinstance(event, ToolCallEvent):
                        _tc.append(event)
@@ -2231,21 +2208,6 @@ class EventLoopNode(NodeProtocol):
                        _stream_error = event
                        logger.warning("Recoverable stream error: %s", event.error)

-                # Flush any pending partial tag at end of stream.
-                if _tag_filter:
-                    tail = _tag_filter.flush()
-                    if tail:
-                        await self._publish_text_delta(
-                            stream_id,
-                            node_id,
-                            tail,
-                            _tag_filter.visible_snapshot,
-                            ctx,
-                            execution_id,
-                            iteration=iteration,
-                            inner_turn=inner_turn,
-                        )
-
            _llm_stream_t0 = time.monotonic()
            self._stream_task = asyncio.create_task(_do_stream())
            logger.debug("[_run_single_turn] inner_turn=%d: Stream task created, waiting...", inner_turn)
@@ -219,17 +219,6 @@ class NodeSpec(BaseModel):
        ),
    )

-    # Structured thinking tags — stripped from client-facing output but kept in
-    # conversation history so the LLM sees its own reasoning on subsequent turns.
-    thinking_tags: list[str] | None = Field(
-        default=None,
-        description=(
-            "XML tag names stripped from client output but kept in conversation "
-            "history. e.g. ['situation', 'monologue'] strips <situation>...</situation> "
-            "from the user-facing stream while preserving it for the LLM."
-        ),
-    )
-
    model_config = {"extra": "allow", "arbitrary_types_allowed": True}

    def is_queen_node(self) -> bool:
@@ -579,9 +568,6 @@ class NodeContext:
    # the queen to record the current phase per iteration.
    iteration_metadata_provider: Any = None  # Callable[[], dict] | None

-    # Structured thinking tags — propagated from NodeSpec.thinking_tags.
-    thinking_tags: list[str] | None = None
-
    @property
    def is_queen_stream(self) -> bool:
        """Return True when this context belongs to the queen conversation."""
@@ -2,6 +2,8 @@ import ReactMarkdown from "react-markdown";
 import remarkGfm from "remark-gfm";
 import type { Components } from "react-markdown";
 import { cn } from "@/lib/utils";
+import { parseThinkingTags } from "@/lib/thinking-tags";
+import ThinkingBlock from "./ThinkingBlock";

 const components: Components = {
  // Headers: same size as body text, just bold — keeps chat bubbles compact
@@ -87,11 +89,30 @@ interface MarkdownContentProps {
 }

 export default function MarkdownContent({ content, className }: MarkdownContentProps) {
+  const segments = parseThinkingTags(content);
+
+  // Fast path: no thinking tags — render as before
+  if (segments.length === 1 && segments[0].type === "text") {
+    return (
+      <div className={cn("break-words text-foreground", className)}>
+        <ReactMarkdown remarkPlugins={remarkPlugins} components={components}>
+          {content}
+        </ReactMarkdown>
+      </div>
+    );
+  }
+
  return (
    <div className={cn("break-words text-foreground", className)}>
-      <ReactMarkdown remarkPlugins={remarkPlugins} components={components}>
-        {content}
-      </ReactMarkdown>
+      {segments.map((seg, i) =>
+        seg.type === "thinking" ? (
+          <ThinkingBlock key={i} content={seg.content} />
+        ) : (
+          <ReactMarkdown key={i} remarkPlugins={remarkPlugins} components={components}>
+            {seg.content}
+          </ReactMarkdown>
+        ),
+      )}
    </div>
  );
 }
@@ -0,0 +1,31 @@
+import { useState } from "react";
+import { ChevronRight, ChevronDown } from "lucide-react";
+import MarkdownContent from "./MarkdownContent";
+
+interface ThinkingBlockProps {
+  content: string;
+  defaultExpanded?: boolean;
+}
+
+export default function ThinkingBlock({ content, defaultExpanded = false }: ThinkingBlockProps) {
+  const [expanded, setExpanded] = useState(defaultExpanded);
+
+  return (
+    <div className="my-1.5 rounded-lg border border-border/30 bg-muted/15 overflow-hidden">
+      <button
+        onClick={() => setExpanded(v => !v)}
+        className="flex items-center gap-1.5 w-full px-2.5 py-1.5 text-xs text-muted-foreground hover:text-foreground transition-colors cursor-pointer"
+      >
+        {expanded
+          ? <ChevronDown className="w-3 h-3 shrink-0" />
+          : <ChevronRight className="w-3 h-3 shrink-0" />}
+        <span className="font-medium">Thinking</span>
+      </button>
+      {expanded && (
+        <div className="px-2.5 pb-2 text-xs text-muted-foreground border-t border-border/15">
+          <MarkdownContent content={content} />
+        </div>
+      )}
+    </div>
+  );
+}
@@ -0,0 +1,98 @@
+/**
+ * Parse XML-style thinking tags from LLM output into structured segments.
+ * All thinking tags are merged into unified "thinking" segments regardless
+ * of the original tag name (situation, monologue, execution_plan, etc.).
+ */
+
+const THINKING_TAGS = ["situation", "monologue", "execution_plan"];
+
+export interface TextSegment {
+  type: "text" | "thinking";
+  content: string;
+}
+
+const TAG_REGEX = new RegExp(
+  `<(/?)(${THINKING_TAGS.join("|")})>`,
+  "g",
+);
+
+/**
+ * Parse text containing XML-style thinking tags into segments.
+ *
+ * Adjacent thinking blocks are merged into a single segment so they
+ * render as one collapsible "Reasoning" block in the UI.
+ */
+export function parseThinkingTags(text: string): TextSegment[] {
+  const raw: TextSegment[] = [];
+  let lastIndex = 0;
+  let insideTag: string | null = null;
+  let tagContentStart = 0;
+
+  TAG_REGEX.lastIndex = 0;
+  let match: RegExpExecArray | null;
+
+  while ((match = TAG_REGEX.exec(text)) !== null) {
+    const isClosing = match[1] === "/";
+    const tagName = match[2];
+
+    if (!isClosing && insideTag === null) {
+      const before = text.slice(lastIndex, match.index);
+      if (before) {
+        raw.push({ type: "text", content: before });
+      }
+      insideTag = tagName;
+      tagContentStart = match.index + match[0].length;
+    } else if (isClosing && tagName === insideTag) {
+      const inner = text.slice(tagContentStart, match.index);
+      if (inner.trim()) {
+        raw.push({ type: "thinking", content: inner });
+      }
+      insideTag = null;
+      lastIndex = match.index + match[0].length;
+    }
+  }
+
+  if (insideTag !== null) {
+    const inner = text.slice(tagContentStart);
+    if (inner.trim()) {
+      raw.push({ type: "thinking", content: inner });
+    }
+  } else {
+    const tail = text.slice(lastIndex);
+    if (tail) {
+      raw.push({ type: "text", content: tail });
+    }
+  }
+
+  if (raw.length === 0) {
+    return [{ type: "text", content: text }];
+  }
+
+  // Merge adjacent thinking segments into one.
+  // Whitespace-only text between thinking blocks does not break adjacency —
+  // e.g. </situation>\n\n<monologue> should produce one "Thinking" block.
+  const merged: TextSegment[] = [];
+  for (let i = 0; i < raw.length; i++) {
+    const seg = raw[i];
+    const prev = merged[merged.length - 1];
+
+    // Skip whitespace-only text segments that sit between two thinking segments
+    if (
+      seg.type === "text" && !seg.content.trim() &&
+      prev?.type === "thinking" &&
+      raw[i + 1]?.type === "thinking"
+    ) {
+      continue;
+    }
+
+    if (prev && prev.type === "thinking" && seg.type === "thinking") {
+      prev.content += "\n" + seg.content;
+    } else if (prev && prev.type === seg.type) {
+      prev.content += seg.content;
+    } else {
+      merged.push({ ...seg });
+    }
+  }
+
+  return merged;
+}