fix: worker context token
This commit is contained in:
@@ -359,6 +359,7 @@ async def create_queen(
|
|||||||
queen_goal,
|
queen_goal,
|
||||||
queen_loop_config as _base_loop_config,
|
queen_loop_config as _base_loop_config,
|
||||||
)
|
)
|
||||||
|
from framework.config import get_max_tokens as _get_max_tokens
|
||||||
from framework.agents.queen.nodes import (
|
from framework.agents.queen.nodes import (
|
||||||
_QUEEN_INCUBATING_TOOLS,
|
_QUEEN_INCUBATING_TOOLS,
|
||||||
_QUEEN_INDEPENDENT_TOOLS,
|
_QUEEN_INDEPENDENT_TOOLS,
|
||||||
@@ -982,7 +983,12 @@ async def create_queen(
|
|||||||
llm=session.llm,
|
llm=session.llm,
|
||||||
available_tools=queen_tools,
|
available_tools=queen_tools,
|
||||||
goal_context=queen_goal.to_prompt_context(),
|
goal_context=queen_goal.to_prompt_context(),
|
||||||
max_tokens=lc.get("max_tokens", 8192),
|
# Honor configuration.json (llm.max_tokens) instead of
|
||||||
|
# hard-defaulting to 8192. The legacy fallback ignored both
|
||||||
|
# the user's saved ceiling AND the model's actual output
|
||||||
|
# capacity (e.g. glm-5.1 / kimi-k2.5 both support 32k out),
|
||||||
|
# which silently truncated long tool-emitting turns.
|
||||||
|
max_tokens=lc.get("max_tokens", _get_max_tokens()),
|
||||||
stream_id="queen",
|
stream_id="queen",
|
||||||
execution_id=session.id,
|
execution_id=session.id,
|
||||||
dynamic_tools_provider=phase_state.get_current_tools,
|
dynamic_tools_provider=phase_state.get_current_tools,
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ from datetime import datetime
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Literal
|
from typing import Any, Literal
|
||||||
|
|
||||||
from framework.config import QUEENS_DIR
|
from framework.config import QUEENS_DIR, get_max_tokens
|
||||||
from framework.host.triggers import TriggerDefinition
|
from framework.host.triggers import TriggerDefinition
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -700,7 +700,10 @@ class SessionManager:
|
|||||||
available_tools=all_tools,
|
available_tools=all_tools,
|
||||||
goal_context=goal.to_prompt_context(),
|
goal_context=goal.to_prompt_context(),
|
||||||
goal=goal,
|
goal=goal,
|
||||||
max_tokens=8192,
|
# Worker output cap — pull from configuration.json instead of
|
||||||
|
# hard-coding 8192. glm-5.1/kimi-k2.5 both support 32k out, and
|
||||||
|
# capping at 8k silently truncates long worker turns mid-tool.
|
||||||
|
max_tokens=get_max_tokens(),
|
||||||
stream_id=worker_name,
|
stream_id=worker_name,
|
||||||
execution_id=worker_name,
|
execution_id=worker_name,
|
||||||
identity_prompt=worker_data.get("identity_prompt", ""),
|
identity_prompt=worker_data.get("identity_prompt", ""),
|
||||||
|
|||||||
Reference in New Issue
Block a user