feat: move thinking tags handling on frontend
This commit is contained in:
@@ -0,0 +1,8 @@
|
||||
{"type": "connection", "event": "connect", "ts": "2026-04-04T01:10:38.245667+00:00", "profile": "default"}
|
||||
{"type": "connection", "event": "hello", "details": {"version": "1.0"}, "ts": "2026-04-04T01:10:38.247207+00:00", "profile": "default"}
|
||||
{"type": "connection", "event": "disconnect", "ts": "2026-04-04T01:11:57.148273+00:00", "profile": "default"}
|
||||
{"type": "connection", "event": "connect", "ts": "2026-04-04T01:12:09.162378+00:00", "profile": "default"}
|
||||
{"type": "connection", "event": "hello", "details": {"version": "1.0"}, "ts": "2026-04-04T01:12:09.163899+00:00", "profile": "default"}
|
||||
{"type": "connection", "event": "disconnect", "ts": "2026-04-04T01:15:12.826042+00:00", "profile": "default"}
|
||||
{"type": "connection", "event": "connect", "ts": "2026-04-04T01:15:30.842533+00:00", "profile": "default"}
|
||||
{"type": "connection", "event": "hello", "details": {"version": "1.0"}, "ts": "2026-04-04T01:15:30.845025+00:00", "profile": "default"}
|
||||
@@ -1349,7 +1349,6 @@ queen_node = NodeSpec(
|
||||
output_keys=[], # Queen should never have this
|
||||
nullable_output_keys=[], # Queen should never have this
|
||||
skip_judge=True, # Queen is a conversational agent; suppress tool-use pressure feedback
|
||||
thinking_tags=["situation", "monologue"],
|
||||
tools=sorted(
|
||||
set(
|
||||
_QUEEN_PLANNING_TOOLS
|
||||
|
||||
@@ -1,151 +0,0 @@
|
||||
"""Streaming XML tag filter for thinking tags.
|
||||
|
||||
Strips configured XML tags (e.g. ``<situation>``, ``<monologue>``) from
|
||||
a chunked text stream while preserving the full text for conversation
|
||||
storage. The filter is stateful — it handles chunks that split mid-tag.
|
||||
|
||||
Only touches text content. Tool calls flow through a completely separate
|
||||
code path and are never affected by this filter.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Sequence
|
||||
|
||||
|
||||
class ThinkingTagFilter:
|
||||
"""Strips XML thinking tags from a streaming text output.
|
||||
|
||||
Buffers content inside configured tags and yields only the visible
|
||||
content outside those tags. Handles chunks that split across tag
|
||||
boundaries (e.g. a chunk ending with ``"<mono"``).
|
||||
|
||||
Args:
|
||||
tag_names: Tag names to strip (e.g. ``["situation", "monologue"]``).
|
||||
"""
|
||||
|
||||
def __init__(self, tag_names: Sequence[str]) -> None:
|
||||
self._tag_names: set[str] = set(tag_names)
|
||||
# Pre-compute all opening and closing tag strings for matching.
|
||||
self._open_tags: dict[str, str] = {name: f"<{name}>" for name in tag_names}
|
||||
self._close_tags: dict[str, str] = {name: f"</{name}>" for name in tag_names}
|
||||
# All possible tag prefixes for partial-match detection.
|
||||
self._all_tag_strings: list[str] = sorted(
|
||||
list(self._open_tags.values()) + list(self._close_tags.values()),
|
||||
key=len,
|
||||
reverse=True,
|
||||
)
|
||||
|
||||
self._inside_tag: str | None = None # Which tag we're inside, or None.
|
||||
self._pending: str = "" # Chars that might be a partial tag.
|
||||
self._visible_text: str = "" # Accumulated visible snapshot.
|
||||
|
||||
def feed(self, chunk: str) -> str:
|
||||
"""Feed a text chunk and return the visible portion.
|
||||
|
||||
Characters inside thinking tags are suppressed. Characters that
|
||||
*might* be the start of a tag are buffered until the next chunk
|
||||
resolves the ambiguity.
|
||||
|
||||
Returns:
|
||||
The portion of text that should be shown to the user.
|
||||
"""
|
||||
buf = self._pending + chunk
|
||||
self._pending = ""
|
||||
visible = self._process(buf)
|
||||
self._visible_text += visible
|
||||
return visible
|
||||
|
||||
@property
|
||||
def visible_snapshot(self) -> str:
|
||||
"""Accumulated visible text so far (for the snapshot field)."""
|
||||
return self._visible_text
|
||||
|
||||
def flush(self) -> str:
|
||||
"""Flush any pending partial tag as visible text.
|
||||
|
||||
Called at end-of-stream. If characters were buffered because they
|
||||
looked like the start of a tag but the stream ended before the tag
|
||||
completed, they are emitted as visible text (graceful degradation).
|
||||
"""
|
||||
result = ""
|
||||
if self._pending:
|
||||
if self._inside_tag is None:
|
||||
result = self._pending
|
||||
# If inside a tag, discard pending (unclosed tag content).
|
||||
self._pending = ""
|
||||
self._visible_text += result
|
||||
return result
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Internal processing
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _process(self, buf: str) -> str:
|
||||
"""Process a buffer, returning visible text and updating state."""
|
||||
visible_parts: list[str] = []
|
||||
i = 0
|
||||
n = len(buf)
|
||||
|
||||
while i < n:
|
||||
if self._inside_tag is not None:
|
||||
# Inside a tag — look for the closing tag.
|
||||
close = self._close_tags[self._inside_tag]
|
||||
close_pos = buf.find(close, i)
|
||||
if close_pos == -1:
|
||||
# Closing tag might be split across chunks.
|
||||
# Check if the tail of buf is a prefix of the close tag.
|
||||
tail_len = min(len(close) - 1, n - i)
|
||||
for tl in range(tail_len, 0, -1):
|
||||
if close.startswith(buf[n - tl :]):
|
||||
self._pending = buf[n - tl :]
|
||||
i = n
|
||||
break
|
||||
else:
|
||||
# No partial match — discard everything (inside tag).
|
||||
i = n
|
||||
break
|
||||
else:
|
||||
# Found closing tag — skip past it and exit tag.
|
||||
i = close_pos + len(close)
|
||||
self._inside_tag = None
|
||||
else:
|
||||
# Outside any tag — look for '<'.
|
||||
lt_pos = buf.find("<", i)
|
||||
if lt_pos == -1:
|
||||
# No '<' — everything is visible.
|
||||
visible_parts.append(buf[i:])
|
||||
i = n
|
||||
else:
|
||||
# Emit text before the '<'.
|
||||
if lt_pos > i:
|
||||
visible_parts.append(buf[i:lt_pos])
|
||||
# Try to match an opening tag at this position.
|
||||
remainder = buf[lt_pos:]
|
||||
matched = False
|
||||
for name, open_tag in self._open_tags.items():
|
||||
if remainder.startswith(open_tag):
|
||||
# Full opening tag found — enter tag.
|
||||
self._inside_tag = name
|
||||
i = lt_pos + len(open_tag)
|
||||
matched = True
|
||||
break
|
||||
if not matched:
|
||||
# Check if remainder could be a partial tag prefix.
|
||||
if self._is_partial_tag_prefix(remainder):
|
||||
# Buffer and wait for next chunk.
|
||||
self._pending = remainder
|
||||
i = n
|
||||
else:
|
||||
# Not a known tag — '<' is visible text.
|
||||
visible_parts.append("<")
|
||||
i = lt_pos + 1
|
||||
|
||||
return "".join(visible_parts)
|
||||
|
||||
def _is_partial_tag_prefix(self, text: str) -> bool:
|
||||
"""Check if text could be the start of a known tag string."""
|
||||
for tag_str in self._all_tag_strings:
|
||||
if tag_str.startswith(text) and len(text) < len(tag_str):
|
||||
return True
|
||||
return False
|
||||
@@ -2173,17 +2173,6 @@ class EventLoopNode(NodeProtocol):
|
||||
) -> None:
|
||||
nonlocal accumulated_text, _stream_error
|
||||
|
||||
# Thinking tag filter — strips configured XML tags from
|
||||
# client-facing output while keeping full text in conversation
|
||||
# history. Only created when thinking_tags is set on the node.
|
||||
_tag_filter = None
|
||||
if ctx.thinking_tags:
|
||||
from framework.graph.event_loop.thinking_tag_filter import (
|
||||
ThinkingTagFilter,
|
||||
)
|
||||
|
||||
_tag_filter = ThinkingTagFilter(ctx.thinking_tags)
|
||||
|
||||
async for event in ctx.llm.stream(
|
||||
messages=_msgs,
|
||||
system=conversation.system_prompt,
|
||||
@@ -2191,29 +2180,17 @@ class EventLoopNode(NodeProtocol):
|
||||
max_tokens=ctx.max_tokens,
|
||||
):
|
||||
if isinstance(event, TextDeltaEvent):
|
||||
# Full text (with tags) kept for conversation storage.
|
||||
accumulated_text = event.snapshot
|
||||
|
||||
if _tag_filter:
|
||||
visible_chunk = _tag_filter.feed(event.content)
|
||||
visible_snapshot = _tag_filter.visible_snapshot
|
||||
else:
|
||||
visible_chunk = event.content
|
||||
visible_snapshot = event.snapshot
|
||||
|
||||
# Only publish if there's visible content (skip chunks
|
||||
# that are entirely inside thinking tags).
|
||||
if visible_chunk:
|
||||
await self._publish_text_delta(
|
||||
stream_id,
|
||||
node_id,
|
||||
visible_chunk,
|
||||
visible_snapshot,
|
||||
ctx,
|
||||
execution_id,
|
||||
iteration=iteration,
|
||||
inner_turn=inner_turn,
|
||||
)
|
||||
await self._publish_text_delta(
|
||||
stream_id,
|
||||
node_id,
|
||||
event.content,
|
||||
event.snapshot,
|
||||
ctx,
|
||||
execution_id,
|
||||
iteration=iteration,
|
||||
inner_turn=inner_turn,
|
||||
)
|
||||
|
||||
elif isinstance(event, ToolCallEvent):
|
||||
_tc.append(event)
|
||||
@@ -2231,21 +2208,6 @@ class EventLoopNode(NodeProtocol):
|
||||
_stream_error = event
|
||||
logger.warning("Recoverable stream error: %s", event.error)
|
||||
|
||||
# Flush any pending partial tag at end of stream.
|
||||
if _tag_filter:
|
||||
tail = _tag_filter.flush()
|
||||
if tail:
|
||||
await self._publish_text_delta(
|
||||
stream_id,
|
||||
node_id,
|
||||
tail,
|
||||
_tag_filter.visible_snapshot,
|
||||
ctx,
|
||||
execution_id,
|
||||
iteration=iteration,
|
||||
inner_turn=inner_turn,
|
||||
)
|
||||
|
||||
_llm_stream_t0 = time.monotonic()
|
||||
self._stream_task = asyncio.create_task(_do_stream())
|
||||
logger.debug("[_run_single_turn] inner_turn=%d: Stream task created, waiting...", inner_turn)
|
||||
|
||||
@@ -219,17 +219,6 @@ class NodeSpec(BaseModel):
|
||||
),
|
||||
)
|
||||
|
||||
# Structured thinking tags — stripped from client-facing output but kept in
|
||||
# conversation history so the LLM sees its own reasoning on subsequent turns.
|
||||
thinking_tags: list[str] | None = Field(
|
||||
default=None,
|
||||
description=(
|
||||
"XML tag names stripped from client output but kept in conversation "
|
||||
"history. e.g. ['situation', 'monologue'] strips <situation>...</situation> "
|
||||
"from the user-facing stream while preserving it for the LLM."
|
||||
),
|
||||
)
|
||||
|
||||
model_config = {"extra": "allow", "arbitrary_types_allowed": True}
|
||||
|
||||
def is_queen_node(self) -> bool:
|
||||
@@ -579,9 +568,6 @@ class NodeContext:
|
||||
# the queen to record the current phase per iteration.
|
||||
iteration_metadata_provider: Any = None # Callable[[], dict] | None
|
||||
|
||||
# Structured thinking tags — propagated from NodeSpec.thinking_tags.
|
||||
thinking_tags: list[str] | None = None
|
||||
|
||||
@property
|
||||
def is_queen_stream(self) -> bool:
|
||||
"""Return True when this context belongs to the queen conversation."""
|
||||
|
||||
@@ -2,6 +2,8 @@ import ReactMarkdown from "react-markdown";
|
||||
import remarkGfm from "remark-gfm";
|
||||
import type { Components } from "react-markdown";
|
||||
import { cn } from "@/lib/utils";
|
||||
import { parseThinkingTags } from "@/lib/thinking-tags";
|
||||
import ThinkingBlock from "./ThinkingBlock";
|
||||
|
||||
const components: Components = {
|
||||
// Headers: same size as body text, just bold — keeps chat bubbles compact
|
||||
@@ -87,11 +89,30 @@ interface MarkdownContentProps {
|
||||
}
|
||||
|
||||
export default function MarkdownContent({ content, className }: MarkdownContentProps) {
|
||||
const segments = parseThinkingTags(content);
|
||||
|
||||
// Fast path: no thinking tags — render as before
|
||||
if (segments.length === 1 && segments[0].type === "text") {
|
||||
return (
|
||||
<div className={cn("break-words text-foreground", className)}>
|
||||
<ReactMarkdown remarkPlugins={remarkPlugins} components={components}>
|
||||
{content}
|
||||
</ReactMarkdown>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div className={cn("break-words text-foreground", className)}>
|
||||
<ReactMarkdown remarkPlugins={remarkPlugins} components={components}>
|
||||
{content}
|
||||
</ReactMarkdown>
|
||||
{segments.map((seg, i) =>
|
||||
seg.type === "thinking" ? (
|
||||
<ThinkingBlock key={i} content={seg.content} />
|
||||
) : (
|
||||
<ReactMarkdown key={i} remarkPlugins={remarkPlugins} components={components}>
|
||||
{seg.content}
|
||||
</ReactMarkdown>
|
||||
),
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,31 @@
|
||||
import { useState } from "react";
|
||||
import { ChevronRight, ChevronDown } from "lucide-react";
|
||||
import MarkdownContent from "./MarkdownContent";
|
||||
|
||||
interface ThinkingBlockProps {
|
||||
content: string;
|
||||
defaultExpanded?: boolean;
|
||||
}
|
||||
|
||||
export default function ThinkingBlock({ content, defaultExpanded = false }: ThinkingBlockProps) {
|
||||
const [expanded, setExpanded] = useState(defaultExpanded);
|
||||
|
||||
return (
|
||||
<div className="my-1.5 rounded-lg border border-border/30 bg-muted/15 overflow-hidden">
|
||||
<button
|
||||
onClick={() => setExpanded(v => !v)}
|
||||
className="flex items-center gap-1.5 w-full px-2.5 py-1.5 text-xs text-muted-foreground hover:text-foreground transition-colors cursor-pointer"
|
||||
>
|
||||
{expanded
|
||||
? <ChevronDown className="w-3 h-3 shrink-0" />
|
||||
: <ChevronRight className="w-3 h-3 shrink-0" />}
|
||||
<span className="font-medium">Thinking</span>
|
||||
</button>
|
||||
{expanded && (
|
||||
<div className="px-2.5 pb-2 text-xs text-muted-foreground border-t border-border/15">
|
||||
<MarkdownContent content={content} />
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,98 @@
|
||||
/**
|
||||
* Parse XML-style thinking tags from LLM output into structured segments.
|
||||
* All thinking tags are merged into unified "thinking" segments regardless
|
||||
* of the original tag name (situation, monologue, execution_plan, etc.).
|
||||
*/
|
||||
|
||||
const THINKING_TAGS = ["situation", "monologue", "execution_plan"];
|
||||
|
||||
export interface TextSegment {
|
||||
type: "text" | "thinking";
|
||||
content: string;
|
||||
}
|
||||
|
||||
const TAG_REGEX = new RegExp(
|
||||
`<(/?)(${THINKING_TAGS.join("|")})>`,
|
||||
"g",
|
||||
);
|
||||
|
||||
/**
|
||||
* Parse text containing XML-style thinking tags into segments.
|
||||
*
|
||||
* Adjacent thinking blocks are merged into a single segment so they
|
||||
* render as one collapsible "Reasoning" block in the UI.
|
||||
*/
|
||||
export function parseThinkingTags(text: string): TextSegment[] {
|
||||
const raw: TextSegment[] = [];
|
||||
let lastIndex = 0;
|
||||
let insideTag: string | null = null;
|
||||
let tagContentStart = 0;
|
||||
|
||||
TAG_REGEX.lastIndex = 0;
|
||||
let match: RegExpExecArray | null;
|
||||
|
||||
while ((match = TAG_REGEX.exec(text)) !== null) {
|
||||
const isClosing = match[1] === "/";
|
||||
const tagName = match[2];
|
||||
|
||||
if (!isClosing && insideTag === null) {
|
||||
const before = text.slice(lastIndex, match.index);
|
||||
if (before) {
|
||||
raw.push({ type: "text", content: before });
|
||||
}
|
||||
insideTag = tagName;
|
||||
tagContentStart = match.index + match[0].length;
|
||||
} else if (isClosing && tagName === insideTag) {
|
||||
const inner = text.slice(tagContentStart, match.index);
|
||||
if (inner.trim()) {
|
||||
raw.push({ type: "thinking", content: inner });
|
||||
}
|
||||
insideTag = null;
|
||||
lastIndex = match.index + match[0].length;
|
||||
}
|
||||
}
|
||||
|
||||
if (insideTag !== null) {
|
||||
const inner = text.slice(tagContentStart);
|
||||
if (inner.trim()) {
|
||||
raw.push({ type: "thinking", content: inner });
|
||||
}
|
||||
} else {
|
||||
const tail = text.slice(lastIndex);
|
||||
if (tail) {
|
||||
raw.push({ type: "text", content: tail });
|
||||
}
|
||||
}
|
||||
|
||||
if (raw.length === 0) {
|
||||
return [{ type: "text", content: text }];
|
||||
}
|
||||
|
||||
// Merge adjacent thinking segments into one.
|
||||
// Whitespace-only text between thinking blocks does not break adjacency —
|
||||
// e.g. </situation>\n\n<monologue> should produce one "Thinking" block.
|
||||
const merged: TextSegment[] = [];
|
||||
for (let i = 0; i < raw.length; i++) {
|
||||
const seg = raw[i];
|
||||
const prev = merged[merged.length - 1];
|
||||
|
||||
// Skip whitespace-only text segments that sit between two thinking segments
|
||||
if (
|
||||
seg.type === "text" && !seg.content.trim() &&
|
||||
prev?.type === "thinking" &&
|
||||
raw[i + 1]?.type === "thinking"
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (prev && prev.type === "thinking" && seg.type === "thinking") {
|
||||
prev.content += "\n" + seg.content;
|
||||
} else if (prev && prev.type === seg.type) {
|
||||
prev.content += seg.content;
|
||||
} else {
|
||||
merged.push({ ...seg });
|
||||
}
|
||||
}
|
||||
|
||||
return merged;
|
||||
}
|
||||
Reference in New Issue
Block a user