fix: harden codex responses transport
This commit is contained in:
@@ -108,10 +108,15 @@ class CodexResponsesAdapter:
|
||||
system_messages: list[dict[str, Any]] = []
|
||||
if system:
|
||||
chunks = self.chunk_system_prompt(system)
|
||||
if len(chunks) > 1 or len(chunks[0]) > _CODEX_SYSTEM_CHUNK_CHARS:
|
||||
system_messages.append({"role": "system", "content": _CODEX_SYSTEM_PREAMBLE})
|
||||
for chunk in chunks:
|
||||
system_messages.append({"role": "system", "content": chunk})
|
||||
if chunks:
|
||||
if len(chunks) > 1 or len(chunks[0]) > _CODEX_SYSTEM_CHUNK_CHARS:
|
||||
system_messages.append({"role": "system", "content": _CODEX_SYSTEM_PREAMBLE})
|
||||
for chunk in chunks:
|
||||
system_messages.append({"role": "system", "content": chunk})
|
||||
else:
|
||||
system_messages.append(
|
||||
{"role": "system", "content": "You are a helpful assistant."}
|
||||
)
|
||||
else:
|
||||
system_messages.append({"role": "system", "content": "You are a helpful assistant."})
|
||||
|
||||
|
||||
@@ -24,7 +24,7 @@ def is_codex_api_base(api_base: str | None) -> bool:
|
||||
parsed = urlparse(api_base)
|
||||
path = parsed.path.rstrip("/")
|
||||
return (
|
||||
parsed.scheme in {"http", "https"}
|
||||
parsed.scheme == "https"
|
||||
and parsed.hostname == _CODEX_HOST
|
||||
and (path == _CODEX_PATH or path == f"{_CODEX_PATH}/responses")
|
||||
)
|
||||
|
||||
@@ -16,6 +16,7 @@ import os
|
||||
import re
|
||||
import time
|
||||
from collections.abc import AsyncIterator
|
||||
from contextlib import contextmanager
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
@@ -557,18 +558,6 @@ class LiteLLMProvider(LLMProvider):
|
||||
"LiteLLM is not installed. Please install it with: uv pip install litellm"
|
||||
)
|
||||
|
||||
# The Codex ChatGPT backend is a Responses API endpoint at
|
||||
# chatgpt.com/backend-api/codex/responses. LiteLLM's model registry
|
||||
# marks legacy codex models (gpt-5.3-codex) with mode="responses",
|
||||
# but newer models like gpt-5.4 default to mode="chat". Force
|
||||
# mode="responses" so litellm routes through the responses_api_bridge.
|
||||
if self._codex_backend and litellm is not None:
|
||||
_strip = self.model.removeprefix("openai/")
|
||||
_entry = litellm.model_cost.get(_strip, {})
|
||||
if _entry.get("mode") != "responses":
|
||||
litellm.model_cost.setdefault(_strip, {})
|
||||
litellm.model_cost[_strip]["mode"] = "responses"
|
||||
|
||||
@staticmethod
|
||||
def _default_api_base_for_model(model: str) -> str | None:
|
||||
"""Return provider-specific default API base when required."""
|
||||
@@ -592,6 +581,33 @@ class LiteLLMProvider(LLMProvider):
|
||||
"""Break large system prompts into smaller Codex-friendly chunks."""
|
||||
return self._codex_adapter.chunk_system_prompt(system)
|
||||
|
||||
@contextmanager
|
||||
def _codex_responses_mode_override(self, model: str | None = None):
|
||||
"""Temporarily route Codex requests through LiteLLM's Responses bridge."""
|
||||
if not self._codex_backend or litellm is None:
|
||||
yield
|
||||
return
|
||||
|
||||
stripped_model = (model or self.model).removeprefix("openai/")
|
||||
previous_entry = litellm.model_cost.get(stripped_model)
|
||||
previous_mode = previous_entry.get("mode") if isinstance(previous_entry, dict) else None
|
||||
if previous_mode == "responses":
|
||||
yield
|
||||
return
|
||||
|
||||
if previous_entry is None:
|
||||
litellm.model_cost[stripped_model] = {"mode": "responses"}
|
||||
else:
|
||||
litellm.model_cost[stripped_model] = {**previous_entry, "mode": "responses"}
|
||||
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
if previous_entry is None:
|
||||
litellm.model_cost.pop(stripped_model, None)
|
||||
else:
|
||||
litellm.model_cost[stripped_model] = previous_entry
|
||||
|
||||
def _build_request_messages(
|
||||
self,
|
||||
messages: list[dict[str, Any]],
|
||||
@@ -719,7 +735,8 @@ class LiteLLMProvider(LLMProvider):
|
||||
retries = max_retries if max_retries is not None else RATE_LIMIT_MAX_RETRIES
|
||||
for attempt in range(retries + 1):
|
||||
try:
|
||||
response = litellm.completion(**kwargs) # type: ignore[union-attr]
|
||||
with self._codex_responses_mode_override(model):
|
||||
response = litellm.completion(**kwargs) # type: ignore[union-attr]
|
||||
|
||||
# Some providers (e.g. Gemini) return 200 with empty content on
|
||||
# rate limit / quota exhaustion instead of a proper 429. Treat
|
||||
@@ -899,7 +916,8 @@ class LiteLLMProvider(LLMProvider):
|
||||
retries = max_retries if max_retries is not None else RATE_LIMIT_MAX_RETRIES
|
||||
for attempt in range(retries + 1):
|
||||
try:
|
||||
response = await litellm.acompletion(**kwargs) # type: ignore[union-attr]
|
||||
with self._codex_responses_mode_override(model):
|
||||
response = await litellm.acompletion(**kwargs) # type: ignore[union-attr]
|
||||
|
||||
content = response.choices[0].message.content if response.choices else None
|
||||
has_tool_calls = bool(response.choices and response.choices[0].message.tool_calls)
|
||||
@@ -1824,10 +1842,15 @@ class LiteLLMProvider(LLMProvider):
|
||||
"""Try a non-stream completion when Codex returns an empty stream."""
|
||||
if not self._codex_backend:
|
||||
return None
|
||||
|
||||
async def _codex_acompletion(**fallback_kwargs: Any) -> Any:
|
||||
with self._codex_responses_mode_override(fallback_kwargs.get("model")):
|
||||
return await litellm.acompletion(**fallback_kwargs) # type: ignore[union-attr]
|
||||
|
||||
return await self._codex_adapter.recover_empty_stream(
|
||||
kwargs,
|
||||
last_role=last_role,
|
||||
acompletion=litellm.acompletion, # type: ignore[union-attr]
|
||||
acompletion=_codex_acompletion,
|
||||
)
|
||||
|
||||
def _merge_tool_call_chunk(
|
||||
@@ -1914,7 +1937,8 @@ class LiteLLMProvider(LLMProvider):
|
||||
stream_finish_reason: str | None = None
|
||||
|
||||
try:
|
||||
response = await litellm.acompletion(**kwargs) # type: ignore[union-attr]
|
||||
with self._codex_responses_mode_override(kwargs.get("model")):
|
||||
response = await litellm.acompletion(**kwargs) # type: ignore[union-attr]
|
||||
|
||||
async for chunk in response:
|
||||
# Capture usage from the trailing usage-only chunk that
|
||||
|
||||
@@ -115,6 +115,7 @@ class TestCodexConfig:
|
||||
def test_codex_api_base_detection_requires_real_chatgpt_origin(self):
|
||||
assert is_codex_api_base("https://chatgpt.com/backend-api/codex")
|
||||
assert is_codex_api_base("https://chatgpt.com/backend-api/codex/responses")
|
||||
assert not is_codex_api_base("http://chatgpt.com/backend-api/codex")
|
||||
assert not is_codex_api_base(
|
||||
"https://proxy.example/v1?target=https://chatgpt.com/backend-api/codex"
|
||||
)
|
||||
|
||||
@@ -918,8 +918,8 @@ class TestCodexEmptyStreamRecovery:
|
||||
|
||||
|
||||
class TestCodexRequestHardening:
|
||||
def test_codex_backend_forces_responses_mode_for_newer_models(self):
|
||||
"""Codex backend should force LiteLLM through the Responses bridge."""
|
||||
def test_codex_backend_forces_responses_mode_per_request(self):
|
||||
"""Codex backend should scope the Responses override to the active request."""
|
||||
import litellm
|
||||
|
||||
original = litellm.model_cost.get("gpt-5.4")
|
||||
@@ -932,7 +932,27 @@ class TestCodexRequestHardening:
|
||||
api_base="https://chatgpt.com/backend-api/codex",
|
||||
)
|
||||
assert provider._codex_backend is True
|
||||
assert litellm.model_cost["gpt-5.4"]["mode"] == "responses"
|
||||
|
||||
async def _fake_acompletion(**kwargs):
|
||||
assert kwargs["model"] == "openai/gpt-5.4"
|
||||
assert litellm.model_cost["gpt-5.4"]["mode"] == "responses"
|
||||
response = MagicMock()
|
||||
response.choices = [MagicMock()]
|
||||
response.choices[0].message.content = "ok"
|
||||
response.choices[0].message.tool_calls = []
|
||||
response.choices[0].finish_reason = "stop"
|
||||
return response
|
||||
|
||||
with patch("litellm.acompletion", new=_fake_acompletion):
|
||||
response = asyncio.run(
|
||||
provider._acompletion_with_rate_limit_retry(
|
||||
model=provider.model,
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
)
|
||||
)
|
||||
|
||||
assert response.choices[0].message.content == "ok"
|
||||
assert litellm.model_cost["gpt-5.4"]["mode"] == "chat"
|
||||
finally:
|
||||
if original is None:
|
||||
litellm.model_cost.pop("gpt-5.4", None)
|
||||
@@ -973,6 +993,18 @@ class TestCodexRequestHardening:
|
||||
assert kwargs["api_base"] == "https://chatgpt.com/backend-api/codex"
|
||||
assert "store" in kwargs["allowed_openai_params"]
|
||||
|
||||
def test_codex_build_system_messages_handles_whitespace_only_prompt(self):
|
||||
"""Whitespace-only system prompts should fall back to the default prompt."""
|
||||
provider = LiteLLMProvider(
|
||||
model="openai/gpt-5.4",
|
||||
api_key="test-key",
|
||||
api_base="https://chatgpt.com/backend-api/codex",
|
||||
)
|
||||
|
||||
messages = provider._codex_adapter.build_system_messages("\n \n", json_mode=False)
|
||||
|
||||
assert messages == [{"role": "system", "content": "You are a helpful assistant."}]
|
||||
|
||||
def test_codex_merge_tool_call_chunk_handles_parallel_calls_with_broken_indexes(self):
|
||||
"""Codex chunk merging should survive index=0 for multiple parallel tool calls."""
|
||||
from types import SimpleNamespace
|
||||
|
||||
Reference in New Issue
Block a user