feat: move thinking tags handling on frontend

This commit is contained in:
Richard Tang
2026-04-03 18:20:45 -07:00
parent 043c79e0e4
commit 2b263f6e10
8 changed files with 171 additions and 217 deletions
@@ -0,0 +1,8 @@
{"type": "connection", "event": "connect", "ts": "2026-04-04T01:10:38.245667+00:00", "profile": "default"}
{"type": "connection", "event": "hello", "details": {"version": "1.0"}, "ts": "2026-04-04T01:10:38.247207+00:00", "profile": "default"}
{"type": "connection", "event": "disconnect", "ts": "2026-04-04T01:11:57.148273+00:00", "profile": "default"}
{"type": "connection", "event": "connect", "ts": "2026-04-04T01:12:09.162378+00:00", "profile": "default"}
{"type": "connection", "event": "hello", "details": {"version": "1.0"}, "ts": "2026-04-04T01:12:09.163899+00:00", "profile": "default"}
{"type": "connection", "event": "disconnect", "ts": "2026-04-04T01:15:12.826042+00:00", "profile": "default"}
{"type": "connection", "event": "connect", "ts": "2026-04-04T01:15:30.842533+00:00", "profile": "default"}
{"type": "connection", "event": "hello", "details": {"version": "1.0"}, "ts": "2026-04-04T01:15:30.845025+00:00", "profile": "default"}
@@ -1349,7 +1349,6 @@ queen_node = NodeSpec(
output_keys=[], # Queen should never have this
nullable_output_keys=[], # Queen should never have this
skip_judge=True, # Queen is a conversational agent; suppress tool-use pressure feedback
thinking_tags=["situation", "monologue"],
tools=sorted(
set(
_QUEEN_PLANNING_TOOLS
@@ -1,151 +0,0 @@
"""Streaming XML tag filter for thinking tags.
Strips configured XML tags (e.g. ``<situation>``, ``<monologue>``) from
a chunked text stream while preserving the full text for conversation
storage. The filter is stateful it handles chunks that split mid-tag.
Only touches text content. Tool calls flow through a completely separate
code path and are never affected by this filter.
"""
from __future__ import annotations
from collections.abc import Sequence
class ThinkingTagFilter:
"""Strips XML thinking tags from a streaming text output.
Buffers content inside configured tags and yields only the visible
content outside those tags. Handles chunks that split across tag
boundaries (e.g. a chunk ending with ``"<mono"``).
Args:
tag_names: Tag names to strip (e.g. ``["situation", "monologue"]``).
"""
def __init__(self, tag_names: Sequence[str]) -> None:
self._tag_names: set[str] = set(tag_names)
# Pre-compute all opening and closing tag strings for matching.
self._open_tags: dict[str, str] = {name: f"<{name}>" for name in tag_names}
self._close_tags: dict[str, str] = {name: f"</{name}>" for name in tag_names}
# All possible tag prefixes for partial-match detection.
self._all_tag_strings: list[str] = sorted(
list(self._open_tags.values()) + list(self._close_tags.values()),
key=len,
reverse=True,
)
self._inside_tag: str | None = None # Which tag we're inside, or None.
self._pending: str = "" # Chars that might be a partial tag.
self._visible_text: str = "" # Accumulated visible snapshot.
def feed(self, chunk: str) -> str:
"""Feed a text chunk and return the visible portion.
Characters inside thinking tags are suppressed. Characters that
*might* be the start of a tag are buffered until the next chunk
resolves the ambiguity.
Returns:
The portion of text that should be shown to the user.
"""
buf = self._pending + chunk
self._pending = ""
visible = self._process(buf)
self._visible_text += visible
return visible
@property
def visible_snapshot(self) -> str:
"""Accumulated visible text so far (for the snapshot field)."""
return self._visible_text
def flush(self) -> str:
"""Flush any pending partial tag as visible text.
Called at end-of-stream. If characters were buffered because they
looked like the start of a tag but the stream ended before the tag
completed, they are emitted as visible text (graceful degradation).
"""
result = ""
if self._pending:
if self._inside_tag is None:
result = self._pending
# If inside a tag, discard pending (unclosed tag content).
self._pending = ""
self._visible_text += result
return result
# ------------------------------------------------------------------
# Internal processing
# ------------------------------------------------------------------
def _process(self, buf: str) -> str:
"""Process a buffer, returning visible text and updating state."""
visible_parts: list[str] = []
i = 0
n = len(buf)
while i < n:
if self._inside_tag is not None:
# Inside a tag — look for the closing tag.
close = self._close_tags[self._inside_tag]
close_pos = buf.find(close, i)
if close_pos == -1:
# Closing tag might be split across chunks.
# Check if the tail of buf is a prefix of the close tag.
tail_len = min(len(close) - 1, n - i)
for tl in range(tail_len, 0, -1):
if close.startswith(buf[n - tl :]):
self._pending = buf[n - tl :]
i = n
break
else:
# No partial match — discard everything (inside tag).
i = n
break
else:
# Found closing tag — skip past it and exit tag.
i = close_pos + len(close)
self._inside_tag = None
else:
# Outside any tag — look for '<'.
lt_pos = buf.find("<", i)
if lt_pos == -1:
# No '<' — everything is visible.
visible_parts.append(buf[i:])
i = n
else:
# Emit text before the '<'.
if lt_pos > i:
visible_parts.append(buf[i:lt_pos])
# Try to match an opening tag at this position.
remainder = buf[lt_pos:]
matched = False
for name, open_tag in self._open_tags.items():
if remainder.startswith(open_tag):
# Full opening tag found — enter tag.
self._inside_tag = name
i = lt_pos + len(open_tag)
matched = True
break
if not matched:
# Check if remainder could be a partial tag prefix.
if self._is_partial_tag_prefix(remainder):
# Buffer and wait for next chunk.
self._pending = remainder
i = n
else:
# Not a known tag — '<' is visible text.
visible_parts.append("<")
i = lt_pos + 1
return "".join(visible_parts)
def _is_partial_tag_prefix(self, text: str) -> bool:
"""Check if text could be the start of a known tag string."""
for tag_str in self._all_tag_strings:
if tag_str.startswith(text) and len(text) < len(tag_str):
return True
return False
+2 -40
View File
@@ -2173,17 +2173,6 @@ class EventLoopNode(NodeProtocol):
) -> None:
nonlocal accumulated_text, _stream_error
# Thinking tag filter — strips configured XML tags from
# client-facing output while keeping full text in conversation
# history. Only created when thinking_tags is set on the node.
_tag_filter = None
if ctx.thinking_tags:
from framework.graph.event_loop.thinking_tag_filter import (
ThinkingTagFilter,
)
_tag_filter = ThinkingTagFilter(ctx.thinking_tags)
async for event in ctx.llm.stream(
messages=_msgs,
system=conversation.system_prompt,
@@ -2191,24 +2180,12 @@ class EventLoopNode(NodeProtocol):
max_tokens=ctx.max_tokens,
):
if isinstance(event, TextDeltaEvent):
# Full text (with tags) kept for conversation storage.
accumulated_text = event.snapshot
if _tag_filter:
visible_chunk = _tag_filter.feed(event.content)
visible_snapshot = _tag_filter.visible_snapshot
else:
visible_chunk = event.content
visible_snapshot = event.snapshot
# Only publish if there's visible content (skip chunks
# that are entirely inside thinking tags).
if visible_chunk:
await self._publish_text_delta(
stream_id,
node_id,
visible_chunk,
visible_snapshot,
event.content,
event.snapshot,
ctx,
execution_id,
iteration=iteration,
@@ -2231,21 +2208,6 @@ class EventLoopNode(NodeProtocol):
_stream_error = event
logger.warning("Recoverable stream error: %s", event.error)
# Flush any pending partial tag at end of stream.
if _tag_filter:
tail = _tag_filter.flush()
if tail:
await self._publish_text_delta(
stream_id,
node_id,
tail,
_tag_filter.visible_snapshot,
ctx,
execution_id,
iteration=iteration,
inner_turn=inner_turn,
)
_llm_stream_t0 = time.monotonic()
self._stream_task = asyncio.create_task(_do_stream())
logger.debug("[_run_single_turn] inner_turn=%d: Stream task created, waiting...", inner_turn)
-14
View File
@@ -219,17 +219,6 @@ class NodeSpec(BaseModel):
),
)
# Structured thinking tags — stripped from client-facing output but kept in
# conversation history so the LLM sees its own reasoning on subsequent turns.
thinking_tags: list[str] | None = Field(
default=None,
description=(
"XML tag names stripped from client output but kept in conversation "
"history. e.g. ['situation', 'monologue'] strips <situation>...</situation> "
"from the user-facing stream while preserving it for the LLM."
),
)
model_config = {"extra": "allow", "arbitrary_types_allowed": True}
def is_queen_node(self) -> bool:
@@ -579,9 +568,6 @@ class NodeContext:
# the queen to record the current phase per iteration.
iteration_metadata_provider: Any = None # Callable[[], dict] | None
# Structured thinking tags — propagated from NodeSpec.thinking_tags.
thinking_tags: list[str] | None = None
@property
def is_queen_stream(self) -> bool:
"""Return True when this context belongs to the queen conversation."""
@@ -2,6 +2,8 @@ import ReactMarkdown from "react-markdown";
import remarkGfm from "remark-gfm";
import type { Components } from "react-markdown";
import { cn } from "@/lib/utils";
import { parseThinkingTags } from "@/lib/thinking-tags";
import ThinkingBlock from "./ThinkingBlock";
const components: Components = {
// Headers: same size as body text, just bold — keeps chat bubbles compact
@@ -87,6 +89,10 @@ interface MarkdownContentProps {
}
export default function MarkdownContent({ content, className }: MarkdownContentProps) {
const segments = parseThinkingTags(content);
// Fast path: no thinking tags — render as before
if (segments.length === 1 && segments[0].type === "text") {
return (
<div className={cn("break-words text-foreground", className)}>
<ReactMarkdown remarkPlugins={remarkPlugins} components={components}>
@@ -95,3 +101,18 @@ export default function MarkdownContent({ content, className }: MarkdownContentP
</div>
);
}
return (
<div className={cn("break-words text-foreground", className)}>
{segments.map((seg, i) =>
seg.type === "thinking" ? (
<ThinkingBlock key={i} content={seg.content} />
) : (
<ReactMarkdown key={i} remarkPlugins={remarkPlugins} components={components}>
{seg.content}
</ReactMarkdown>
),
)}
</div>
);
}
@@ -0,0 +1,31 @@
import { useState } from "react";
import { ChevronRight, ChevronDown } from "lucide-react";
import MarkdownContent from "./MarkdownContent";
interface ThinkingBlockProps {
content: string;
defaultExpanded?: boolean;
}
export default function ThinkingBlock({ content, defaultExpanded = false }: ThinkingBlockProps) {
const [expanded, setExpanded] = useState(defaultExpanded);
return (
<div className="my-1.5 rounded-lg border border-border/30 bg-muted/15 overflow-hidden">
<button
onClick={() => setExpanded(v => !v)}
className="flex items-center gap-1.5 w-full px-2.5 py-1.5 text-xs text-muted-foreground hover:text-foreground transition-colors cursor-pointer"
>
{expanded
? <ChevronDown className="w-3 h-3 shrink-0" />
: <ChevronRight className="w-3 h-3 shrink-0" />}
<span className="font-medium">Thinking</span>
</button>
{expanded && (
<div className="px-2.5 pb-2 text-xs text-muted-foreground border-t border-border/15">
<MarkdownContent content={content} />
</div>
)}
</div>
);
}
+98
View File
@@ -0,0 +1,98 @@
/**
* Parse XML-style thinking tags from LLM output into structured segments.
* All thinking tags are merged into unified "thinking" segments regardless
* of the original tag name (situation, monologue, execution_plan, etc.).
*/
const THINKING_TAGS = ["situation", "monologue", "execution_plan"];
export interface TextSegment {
type: "text" | "thinking";
content: string;
}
const TAG_REGEX = new RegExp(
`<(/?)(${THINKING_TAGS.join("|")})>`,
"g",
);
/**
* Parse text containing XML-style thinking tags into segments.
*
* Adjacent thinking blocks are merged into a single segment so they
* render as one collapsible "Reasoning" block in the UI.
*/
export function parseThinkingTags(text: string): TextSegment[] {
const raw: TextSegment[] = [];
let lastIndex = 0;
let insideTag: string | null = null;
let tagContentStart = 0;
TAG_REGEX.lastIndex = 0;
let match: RegExpExecArray | null;
while ((match = TAG_REGEX.exec(text)) !== null) {
const isClosing = match[1] === "/";
const tagName = match[2];
if (!isClosing && insideTag === null) {
const before = text.slice(lastIndex, match.index);
if (before) {
raw.push({ type: "text", content: before });
}
insideTag = tagName;
tagContentStart = match.index + match[0].length;
} else if (isClosing && tagName === insideTag) {
const inner = text.slice(tagContentStart, match.index);
if (inner.trim()) {
raw.push({ type: "thinking", content: inner });
}
insideTag = null;
lastIndex = match.index + match[0].length;
}
}
if (insideTag !== null) {
const inner = text.slice(tagContentStart);
if (inner.trim()) {
raw.push({ type: "thinking", content: inner });
}
} else {
const tail = text.slice(lastIndex);
if (tail) {
raw.push({ type: "text", content: tail });
}
}
if (raw.length === 0) {
return [{ type: "text", content: text }];
}
// Merge adjacent thinking segments into one.
// Whitespace-only text between thinking blocks does not break adjacency —
// e.g. </situation>\n\n<monologue> should produce one "Thinking" block.
const merged: TextSegment[] = [];
for (let i = 0; i < raw.length; i++) {
const seg = raw[i];
const prev = merged[merged.length - 1];
// Skip whitespace-only text segments that sit between two thinking segments
if (
seg.type === "text" && !seg.content.trim() &&
prev?.type === "thinking" &&
raw[i + 1]?.type === "thinking"
) {
continue;
}
if (prev && prev.type === "thinking" && seg.type === "thinking") {
prev.content += "\n" + seg.content;
} else if (prev && prev.type === seg.type) {
prev.content += seg.content;
} else {
merged.push({ ...seg });
}
}
return merged;
}