Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 2ab2876580 | |||
| 172ba2d7ad | |||
| 423f5c829c |
@@ -3,7 +3,7 @@ name: Publish Containers
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- main-1.x
|
||||
release:
|
||||
types: [published]
|
||||
workflow_dispatch:
|
||||
@@ -92,4 +92,4 @@ jobs:
|
||||
with:
|
||||
subject-name: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME}}
|
||||
subject-digest: ${{ steps.push.outputs.digest }}
|
||||
push-to-registry: true
|
||||
push-to-registry: true
|
||||
|
||||
Vendored
+1
@@ -1,4 +1,5 @@
|
||||
{
|
||||
"python.defaultInterpreterPath": "${workspaceFolder}/.venv/bin/python",
|
||||
"python.testing.pytestArgs": [
|
||||
"tests"
|
||||
],
|
||||
|
||||
+55
-9
@@ -4,8 +4,10 @@
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from functools import partial
|
||||
from typing import Annotated, Any, Literal
|
||||
from pydantic import ValidationError
|
||||
|
||||
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage
|
||||
from langchain_core.runnables import RunnableConfig
|
||||
@@ -495,8 +497,9 @@ def human_feedback_node(
|
||||
)
|
||||
|
||||
# if the plan is accepted, run the following node
|
||||
plan_iterations = state["plan_iterations"] if state.get("plan_iterations", 0) else 0
|
||||
plan_iterations = (state.get("plan_iterations") or 0) + 1
|
||||
goto = "research_team"
|
||||
configurable = Configuration.from_runnable_config(config)
|
||||
try:
|
||||
# Safely extract plan content from different types (string, AIMessage, dict)
|
||||
original_plan = current_plan
|
||||
@@ -507,18 +510,55 @@ def human_feedback_node(
|
||||
current_plan = json.loads(current_plan)
|
||||
current_plan_content = extract_plan_content(current_plan)
|
||||
|
||||
# increment the plan iterations
|
||||
plan_iterations += 1
|
||||
# parse the plan
|
||||
new_plan = json.loads(repair_json_output(current_plan_content))
|
||||
|
||||
# Some models may return only a raw steps list instead of a full plan object.
|
||||
# Normalize to Plan schema to avoid ValidationError in Plan.model_validate().
|
||||
if isinstance(new_plan, list):
|
||||
logger.warning("Planner returned plan as list; normalizing to dict with inferred metadata")
|
||||
new_plan = {
|
||||
"locale": state.get("locale", "en-US"),
|
||||
"has_enough_context": False,
|
||||
"thought": "",
|
||||
"title": state.get("research_topic") or "Research Plan",
|
||||
"steps": new_plan,
|
||||
}
|
||||
elif not isinstance(new_plan, dict):
|
||||
raise ValueError(f"Unsupported plan type after parsing: {type(new_plan).__name__}")
|
||||
|
||||
# Fill required fields if partially missing.
|
||||
new_plan.setdefault("locale", state.get("locale", "en-US"))
|
||||
new_plan.setdefault("has_enough_context", False)
|
||||
new_plan.setdefault("thought", "")
|
||||
if not new_plan.get("title"):
|
||||
new_plan["title"] = state.get("research_topic") or "Research Plan"
|
||||
if "steps" not in new_plan or new_plan.get("steps") is None:
|
||||
new_plan["steps"] = []
|
||||
|
||||
# Validate and fix plan to ensure web search requirements are met
|
||||
configurable = Configuration.from_runnable_config(config)
|
||||
new_plan = validate_and_fix_plan(new_plan, configurable.enforce_web_search, configurable.enable_web_search)
|
||||
except (json.JSONDecodeError, AttributeError, ValueError) as e:
|
||||
# after normalization so list-shaped plans are also enforced.
|
||||
new_plan = validate_and_fix_plan(
|
||||
new_plan,
|
||||
configurable.enforce_web_search,
|
||||
configurable.enable_web_search,
|
||||
)
|
||||
|
||||
validated_plan = Plan.model_validate(new_plan)
|
||||
|
||||
except (json.JSONDecodeError, AttributeError, ValueError, ValidationError) as e:
|
||||
logger.warning(f"Failed to parse plan: {str(e)}. Plan data type: {type(current_plan).__name__}")
|
||||
if isinstance(current_plan, dict) and "content" in original_plan:
|
||||
logger.warning(f"Plan appears to be an AIMessage object with content field")
|
||||
if plan_iterations > 1: # the plan_iterations is increased before this check
|
||||
if plan_iterations < configurable.max_plan_iterations:
|
||||
return Command(
|
||||
update={
|
||||
"plan_iterations": plan_iterations,
|
||||
**preserve_state_meta_fields(state),
|
||||
},
|
||||
goto="planner"
|
||||
)
|
||||
if plan_iterations > 1:
|
||||
return Command(
|
||||
update=preserve_state_meta_fields(state),
|
||||
goto="reporter"
|
||||
@@ -531,7 +571,7 @@ def human_feedback_node(
|
||||
|
||||
# Build update dict with safe locale handling
|
||||
update_dict = {
|
||||
"current_plan": Plan.model_validate(new_plan),
|
||||
"current_plan": validated_plan,
|
||||
"plan_iterations": plan_iterations,
|
||||
**preserve_state_meta_fields(state),
|
||||
}
|
||||
@@ -900,7 +940,13 @@ def reporter_node(state: State, config: RunnableConfig):
|
||||
logger.debug(f"Current invoke messages: {invoke_messages}")
|
||||
response = get_llm_by_type(AGENT_LLM_MAP["reporter"]).invoke(invoke_messages)
|
||||
response_content = response.content
|
||||
logger.info(f"reporter response: {response_content}")
|
||||
# Strip <think>...</think> tags that some models (e.g. QwQ, DeepSeek) embed
|
||||
# directly in content instead of using the reasoning_content field (#781)
|
||||
if isinstance(response_content, str) and "<think>" in response_content:
|
||||
response_content = re.sub(
|
||||
r"<think>[\s\S]*?</think>", "", response_content
|
||||
).strip()
|
||||
logger.debug(f"reporter response length: {len(response_content)}")
|
||||
|
||||
return {
|
||||
"final_report": response_content,
|
||||
|
||||
@@ -6,6 +6,7 @@ import base64
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from typing import Annotated, Any, List, Optional, cast
|
||||
from uuid import uuid4
|
||||
|
||||
@@ -423,6 +424,11 @@ def _create_event_stream_message(
|
||||
if not isinstance(content, str):
|
||||
content = json.dumps(content, ensure_ascii=False)
|
||||
|
||||
# Strip <think>...</think> tags that some models (e.g. DeepSeek-R1, QwQ via ollama)
|
||||
# embed directly in content instead of using the reasoning_content field (#781)
|
||||
if isinstance(content, str) and "<think>" in content:
|
||||
content = re.sub(r"<think>[\s\S]*?</think>", "", content).strip()
|
||||
|
||||
event_stream_message = {
|
||||
"thread_id": thread_id,
|
||||
"agent": agent_name,
|
||||
|
||||
@@ -2,6 +2,7 @@ import json
|
||||
from collections import namedtuple
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from pydantic import ValidationError
|
||||
import pytest
|
||||
|
||||
from src.graph.nodes import (
|
||||
@@ -825,12 +826,102 @@ def test_human_feedback_node_json_decode_error_first_iteration(
|
||||
state = dict(mock_state_base)
|
||||
state["auto_accepted_plan"] = True
|
||||
state["plan_iterations"] = 0
|
||||
with patch(
|
||||
"src.graph.nodes.json.loads", side_effect=json.JSONDecodeError("err", "doc", 0)
|
||||
mock_configurable = MagicMock()
|
||||
mock_configurable.max_plan_iterations = 3
|
||||
with (
|
||||
patch(
|
||||
"src.graph.nodes.Configuration.from_runnable_config",
|
||||
return_value=mock_configurable,
|
||||
),
|
||||
patch(
|
||||
"src.graph.nodes.json.loads",
|
||||
side_effect=json.JSONDecodeError("err", "doc", 0),
|
||||
),
|
||||
):
|
||||
result = human_feedback_node(state, mock_config)
|
||||
assert isinstance(result, Command)
|
||||
assert result.goto == "__end__"
|
||||
assert result.goto == "planner"
|
||||
assert result.update["plan_iterations"] == 1
|
||||
|
||||
def test_human_feedback_node_model_validate_error(mock_state_base, mock_config):
|
||||
# Plan.model_validate raises ValidationError, should enter error handling path
|
||||
from pydantic import BaseModel
|
||||
|
||||
state = dict(mock_state_base)
|
||||
state["auto_accepted_plan"] = True
|
||||
state["plan_iterations"] = 0
|
||||
|
||||
# Build a real ValidationError instance from pydantic
|
||||
class DummyModel(BaseModel):
|
||||
value: int
|
||||
|
||||
try:
|
||||
DummyModel.model_validate({"value": "not_an_int"})
|
||||
except ValidationError as validation_error:
|
||||
raised_validation_error = validation_error
|
||||
|
||||
mock_configurable = MagicMock()
|
||||
mock_configurable.max_plan_iterations = 3
|
||||
mock_configurable.enforce_web_search = False
|
||||
mock_configurable.enable_web_search = True
|
||||
|
||||
with (
|
||||
patch(
|
||||
"src.graph.nodes.Configuration.from_runnable_config",
|
||||
return_value=mock_configurable,
|
||||
),
|
||||
patch(
|
||||
"src.graph.nodes.Plan.model_validate",
|
||||
side_effect=raised_validation_error,
|
||||
),
|
||||
):
|
||||
result = human_feedback_node(state, mock_config)
|
||||
assert isinstance(result, Command)
|
||||
assert result.goto == "planner"
|
||||
assert result.update["plan_iterations"] == 1
|
||||
|
||||
def test_human_feedback_node_list_plan_runs_enforcement_after_normalization(
|
||||
mock_state_base, mock_config
|
||||
):
|
||||
# Regression: when plan content is a list, normalization happens first,
|
||||
# then validate_and_fix_plan must still run on the normalized dict.
|
||||
raw_list_plan = [
|
||||
{
|
||||
"need_search": False,
|
||||
"title": "Only Step",
|
||||
"description": "Collect baseline info",
|
||||
# intentionally missing step_type
|
||||
}
|
||||
]
|
||||
|
||||
state = dict(mock_state_base)
|
||||
state["auto_accepted_plan"] = True
|
||||
state["plan_iterations"] = 0
|
||||
state["current_plan"] = json.dumps({"content": [json.dumps(raw_list_plan)]})
|
||||
|
||||
mock_configurable = MagicMock()
|
||||
mock_configurable.max_plan_iterations = 3
|
||||
mock_configurable.enforce_web_search = True
|
||||
mock_configurable.enable_web_search = True
|
||||
|
||||
with patch(
|
||||
"src.graph.nodes.Configuration.from_runnable_config",
|
||||
return_value=mock_configurable,
|
||||
):
|
||||
result = human_feedback_node(state, mock_config)
|
||||
|
||||
assert isinstance(result, Command)
|
||||
assert result.goto == "research_team"
|
||||
assert result.update["plan_iterations"] == 1
|
||||
|
||||
normalized_plan = result.update["current_plan"]
|
||||
assert isinstance(normalized_plan, dict)
|
||||
assert isinstance(normalized_plan.get("steps"), list)
|
||||
assert len(normalized_plan["steps"]) == 1
|
||||
|
||||
# validate_and_fix_plan effects should be visible after normalization
|
||||
assert normalized_plan["steps"][0]["step_type"] == "research"
|
||||
assert normalized_plan["steps"][0]["need_search"] is True
|
||||
|
||||
|
||||
def test_human_feedback_node_json_decode_error_second_iteration(
|
||||
@@ -2823,3 +2914,115 @@ async def test_execute_agent_step_no_tool_calls_still_works():
|
||||
|
||||
# Verify step execution result is set
|
||||
assert state["current_plan"].steps[0].execution_res == "Based on my knowledge, here is the answer without needing to search."
|
||||
|
||||
|
||||
class TestReporterNodeThinkTagStripping:
|
||||
"""Tests for stripping <think> tags from reporter_node output (#781).
|
||||
|
||||
Some models (e.g. DeepSeek-R1, QwQ via ollama) embed reasoning in
|
||||
content using <think>...</think> tags instead of the separate
|
||||
reasoning_content field.
|
||||
"""
|
||||
|
||||
def _make_mock_state(self):
|
||||
plan = MagicMock()
|
||||
plan.title = "Test Plan"
|
||||
plan.thought = "Test Thought"
|
||||
return {
|
||||
"current_plan": plan,
|
||||
"observations": [],
|
||||
"citations": [],
|
||||
"locale": "en-US",
|
||||
}
|
||||
|
||||
def _run_reporter_node(self, response_content):
|
||||
state = self._make_mock_state()
|
||||
mock_response = MagicMock()
|
||||
mock_response.content = response_content
|
||||
|
||||
mock_configurable = MagicMock()
|
||||
|
||||
with (
|
||||
patch(
|
||||
"src.graph.nodes.Configuration.from_runnable_config",
|
||||
return_value=mock_configurable,
|
||||
),
|
||||
patch(
|
||||
"src.graph.nodes.apply_prompt_template",
|
||||
return_value=[{"role": "user", "content": "test"}],
|
||||
),
|
||||
patch("src.graph.nodes.get_llm_by_type") as mock_get_llm,
|
||||
patch("src.graph.nodes.get_llm_token_limit_by_type", return_value=4096),
|
||||
patch("src.graph.nodes.AGENT_LLM_MAP", {"reporter": "basic"}),
|
||||
patch(
|
||||
"src.graph.nodes.ContextManager"
|
||||
) as mock_ctx_mgr,
|
||||
):
|
||||
mock_ctx_mgr.return_value.compress_messages.return_value = {"messages": []}
|
||||
mock_llm = MagicMock()
|
||||
mock_llm.invoke.return_value = mock_response
|
||||
mock_get_llm.return_value = mock_llm
|
||||
|
||||
result = reporter_node(state, MagicMock())
|
||||
return result
|
||||
|
||||
def test_strips_think_tag_at_beginning(self):
|
||||
result = self._run_reporter_node(
|
||||
"<think>\nLet me analyze...\n</think>\n\n# Report\n\nContent here."
|
||||
)
|
||||
assert "<think>" not in result["final_report"]
|
||||
assert "# Report" in result["final_report"]
|
||||
assert "Content here." in result["final_report"]
|
||||
|
||||
def test_strips_multiple_think_blocks(self):
|
||||
result = self._run_reporter_node(
|
||||
"<think>First thought</think>\nParagraph 1.\n<think>Second thought</think>\nParagraph 2."
|
||||
)
|
||||
assert "<think>" not in result["final_report"]
|
||||
assert "Paragraph 1." in result["final_report"]
|
||||
assert "Paragraph 2." in result["final_report"]
|
||||
|
||||
def test_preserves_content_without_think_tags(self):
|
||||
result = self._run_reporter_node("Normal content without think tags.")
|
||||
assert result["final_report"] == "Normal content without think tags."
|
||||
|
||||
def test_empty_content_after_stripping(self):
|
||||
result = self._run_reporter_node(
|
||||
"<think>Only thinking, no real content</think>"
|
||||
)
|
||||
assert "<think>" not in result["final_report"]
|
||||
|
||||
def test_non_string_content_passes_through(self):
|
||||
"""Verify non-string content is not broken by the stripping logic."""
|
||||
state = self._make_mock_state()
|
||||
mock_response = MagicMock()
|
||||
# Simulate non-string content (e.g. list from multimodal model)
|
||||
mock_response.content = ["some", "list"]
|
||||
|
||||
mock_configurable = MagicMock()
|
||||
|
||||
with (
|
||||
patch(
|
||||
"src.graph.nodes.Configuration.from_runnable_config",
|
||||
return_value=mock_configurable,
|
||||
),
|
||||
patch(
|
||||
"src.graph.nodes.apply_prompt_template",
|
||||
return_value=[{"role": "user", "content": "test"}],
|
||||
),
|
||||
patch("src.graph.nodes.get_llm_by_type") as mock_get_llm,
|
||||
patch("src.graph.nodes.get_llm_token_limit_by_type", return_value=4096),
|
||||
patch("src.graph.nodes.AGENT_LLM_MAP", {"reporter": "basic"}),
|
||||
patch(
|
||||
"src.graph.nodes.ContextManager"
|
||||
) as mock_ctx_mgr,
|
||||
):
|
||||
mock_ctx_mgr.return_value.compress_messages.return_value = {"messages": []}
|
||||
mock_llm = MagicMock()
|
||||
mock_llm.invoke.return_value = mock_response
|
||||
mock_get_llm.return_value = mock_llm
|
||||
|
||||
result = reporter_node(state, MagicMock())
|
||||
|
||||
# Non-string content should pass through unchanged
|
||||
assert result["final_report"] == ["some", "list"]
|
||||
|
||||
@@ -16,6 +16,7 @@ from langgraph.types import Command
|
||||
from src.config.report_style import ReportStyle
|
||||
from src.server.app import (
|
||||
_astream_workflow_generator,
|
||||
_create_event_stream_message,
|
||||
_create_interrupt_event,
|
||||
_make_event,
|
||||
_stream_graph_events,
|
||||
@@ -1680,3 +1681,53 @@ class TestGlobalConnectionPoolUsage:
|
||||
"""Helper to create an empty async generator."""
|
||||
if False:
|
||||
yield
|
||||
|
||||
|
||||
class TestCreateEventStreamMessageThinkTagStripping:
|
||||
"""Tests for stripping <think> tags from streamed content (#781).
|
||||
|
||||
Some models (e.g. DeepSeek-R1, QwQ via ollama) embed reasoning in
|
||||
content using <think>...</think> tags instead of the separate
|
||||
reasoning_content field.
|
||||
"""
|
||||
|
||||
def _make_mock_chunk(self, content):
|
||||
chunk = AIMessageChunk(content=content)
|
||||
chunk.id = "msg_test"
|
||||
chunk.response_metadata = {}
|
||||
return chunk
|
||||
|
||||
def test_strips_think_tag_at_beginning(self):
|
||||
chunk = self._make_mock_chunk(
|
||||
"<think>\nLet me analyze...\n</think>\n\n# Report\n\nContent here."
|
||||
)
|
||||
result = _create_event_stream_message(chunk, {}, "thread-1", "reporter")
|
||||
assert "<think>" not in result["content"]
|
||||
assert "# Report" in result["content"]
|
||||
assert "Content here." in result["content"]
|
||||
|
||||
def test_strips_multiple_think_blocks(self):
|
||||
chunk = self._make_mock_chunk(
|
||||
"<think>First thought</think>\nParagraph 1.\n<think>Second thought</think>\nParagraph 2."
|
||||
)
|
||||
result = _create_event_stream_message(chunk, {}, "thread-1", "coordinator")
|
||||
assert "<think>" not in result["content"]
|
||||
assert "Paragraph 1." in result["content"]
|
||||
assert "Paragraph 2." in result["content"]
|
||||
|
||||
def test_preserves_content_without_think_tags(self):
|
||||
chunk = self._make_mock_chunk("Normal content without think tags.")
|
||||
result = _create_event_stream_message(chunk, {}, "thread-1", "planner")
|
||||
assert result["content"] == "Normal content without think tags."
|
||||
|
||||
def test_empty_content_after_stripping(self):
|
||||
chunk = self._make_mock_chunk("<think>Only thinking, no real content</think>")
|
||||
result = _create_event_stream_message(chunk, {}, "thread-1", "reporter")
|
||||
assert "<think>" not in result["content"]
|
||||
|
||||
def test_preserves_reasoning_content_field(self):
|
||||
chunk = self._make_mock_chunk("Actual content")
|
||||
chunk.additional_kwargs["reasoning_content"] = "This is reasoning"
|
||||
result = _create_event_stream_message(chunk, {}, "thread-1", "planner")
|
||||
assert result["content"] == "Actual content"
|
||||
assert result["reasoning_content"] == "This is reasoning"
|
||||
|
||||
Reference in New Issue
Block a user