From 934c4245103f4d18a02486498c02eb983eb204e1 Mon Sep 17 00:00:00 2001 From: Richard Tang Date: Fri, 3 Apr 2026 18:29:04 -0700 Subject: [PATCH] feat: handle gemini tool call tags --- .hive/browser-logs/browser-2026-04-04.jsonl | 1 + core/framework/llm/litellm.py | 80 +++++++++++++++++++++ 2 files changed, 81 insertions(+) diff --git a/.hive/browser-logs/browser-2026-04-04.jsonl b/.hive/browser-logs/browser-2026-04-04.jsonl index cad80c46..a748c778 100644 --- a/.hive/browser-logs/browser-2026-04-04.jsonl +++ b/.hive/browser-logs/browser-2026-04-04.jsonl @@ -6,3 +6,4 @@ {"type": "connection", "event": "disconnect", "ts": "2026-04-04T01:15:12.826042+00:00", "profile": "default"} {"type": "connection", "event": "connect", "ts": "2026-04-04T01:15:30.842533+00:00", "profile": "default"} {"type": "connection", "event": "hello", "details": {"version": "1.0"}, "ts": "2026-04-04T01:15:30.845025+00:00", "profile": "default"} +{"type": "tool_call", "tool": "browser_stop", "params": {"profile": "gcu-browser-worker:3"}, "result": {"ok": true, "status": "not_running", "profile": "gcu-browser-worker:3"}, "ok": true, "duration_ms": 0.01, "ts": "2026-04-04T01:29:04.294954+00:00", "profile": "default"} diff --git a/core/framework/llm/litellm.py b/core/framework/llm/litellm.py index 8a11569a..20ac7d65 100644 --- a/core/framework/llm/litellm.py +++ b/core/framework/llm/litellm.py @@ -458,6 +458,57 @@ def _is_stream_transient_error(exc: BaseException) -> bool: return isinstance(exc, transient_types) +def _extract_text_tool_calls( + text: str, +) -> tuple[list["ToolCallEvent"], str]: + """Extract hallucinated tool calls from ```` blocks in LLM text. + + Some models (notably Gemini) emit tool invocations as text instead of using + the structured function-calling API. This function parses those blocks and + returns ``(tool_call_events, cleaned_text)`` where *cleaned_text* has the + ```` blocks removed. + + Expected format:: + + + { + "tool_name": { ...args } + } + + """ + from framework.llm.stream_events import ToolCallEvent + + pattern = re.compile(r"\s*(.*?)\s*", re.DOTALL) + events: list[ToolCallEvent] = [] + cleaned = text + + for match in pattern.finditer(text): + raw = match.group(1).strip() + try: + payload = json.loads(raw) + except json.JSONDecodeError: + logger.warning("[_extract_text_tool_calls] failed to parse JSON: %s", raw[:200]) + continue + + if not isinstance(payload, dict): + continue + + for tool_name, tool_args in payload.items(): + call_id = f"synth_{hashlib.md5(f'{tool_name}:{json.dumps(tool_args, sort_keys=True)}'.encode()).hexdigest()[:12]}" + events.append( + ToolCallEvent( + tool_use_id=call_id, + tool_name=tool_name, + tool_input=tool_args if isinstance(tool_args, dict) else {}, + ) + ) + + if events: + cleaned = pattern.sub("", text).strip() + + return events, cleaned + + class LiteLLMProvider(LLMProvider): """ LiteLLM-based LLM provider for multi-provider support. @@ -1918,6 +1969,35 @@ class LiteLLMProvider(LLMProvider): f"(last_role={last_role}). Returning empty result." ) + # Gemini sometimes outputs tool calls as text in + # {"name": {...args}} blocks + # instead of using the function-calling API. Extract + # these as real ToolCallEvents and strip them from the + # text so the rest of the system treats them normally. + if accumulated_text and "" in accumulated_text: + extracted, cleaned = _extract_text_tool_calls(accumulated_text) + if extracted: + logger.info( + "[stream] extracted %d hallucinated tool call(s) from text", + len(extracted), + ) + accumulated_text = cleaned + # Emit a corrected TextDeltaEvent so the caller's + # accumulated_text is overwritten with the cleaned text. + yield TextDeltaEvent(content="", snapshot=cleaned) + # Insert synthetic ToolCallEvents before FinishEvent. + finish_idx = next( + (i for i, ev in enumerate(tail_events) if isinstance(ev, FinishEvent)), + len(tail_events), + ) + for tc_ev in reversed(extracted): + tail_events.insert(finish_idx, tc_ev) + # Update TextEndEvent if present. + for _i, _ev in enumerate(tail_events): + if isinstance(_ev, TextEndEvent): + tail_events[_i] = TextEndEvent(full_text=cleaned) + break + # Success (or empty after exhausted retries) — flush events. for event in tail_events: yield event