fix: worker context token

This commit is contained in:
Richard Tang
2026-05-03 11:45:37 -07:00
parent eee55ea8c7
commit feabf32768
2 changed files with 12 additions and 3 deletions
+7 -1
View File
@@ -359,6 +359,7 @@ async def create_queen(
queen_goal, queen_goal,
queen_loop_config as _base_loop_config, queen_loop_config as _base_loop_config,
) )
from framework.config import get_max_tokens as _get_max_tokens
from framework.agents.queen.nodes import ( from framework.agents.queen.nodes import (
_QUEEN_INCUBATING_TOOLS, _QUEEN_INCUBATING_TOOLS,
_QUEEN_INDEPENDENT_TOOLS, _QUEEN_INDEPENDENT_TOOLS,
@@ -982,7 +983,12 @@ async def create_queen(
llm=session.llm, llm=session.llm,
available_tools=queen_tools, available_tools=queen_tools,
goal_context=queen_goal.to_prompt_context(), goal_context=queen_goal.to_prompt_context(),
max_tokens=lc.get("max_tokens", 8192), # Honor configuration.json (llm.max_tokens) instead of
# hard-defaulting to 8192. The legacy fallback ignored both
# the user's saved ceiling AND the model's actual output
# capacity (e.g. glm-5.1 / kimi-k2.5 both support 32k out),
# which silently truncated long tool-emitting turns.
max_tokens=lc.get("max_tokens", _get_max_tokens()),
stream_id="queen", stream_id="queen",
execution_id=session.id, execution_id=session.id,
dynamic_tools_provider=phase_state.get_current_tools, dynamic_tools_provider=phase_state.get_current_tools,
+5 -2
View File
@@ -19,7 +19,7 @@ from datetime import datetime
from pathlib import Path from pathlib import Path
from typing import Any, Literal from typing import Any, Literal
from framework.config import QUEENS_DIR from framework.config import QUEENS_DIR, get_max_tokens
from framework.host.triggers import TriggerDefinition from framework.host.triggers import TriggerDefinition
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -700,7 +700,10 @@ class SessionManager:
available_tools=all_tools, available_tools=all_tools,
goal_context=goal.to_prompt_context(), goal_context=goal.to_prompt_context(),
goal=goal, goal=goal,
max_tokens=8192, # Worker output cap — pull from configuration.json instead of
# hard-coding 8192. glm-5.1/kimi-k2.5 both support 32k out, and
# capping at 8k silently truncates long worker turns mid-tool.
max_tokens=get_max_tokens(),
stream_id=worker_name, stream_id=worker_name,
execution_id=worker_name, execution_id=worker_name,
identity_prompt=worker_data.get("identity_prompt", ""), identity_prompt=worker_data.get("identity_prompt", ""),