core: introduce litellm provider

This commit is contained in:
Mohamed Awnallah
2026-01-21 07:24:35 +02:00
parent 967318da3e
commit 86d11bbf39
6 changed files with 499 additions and 7 deletions
+2 -1
View File
@@ -2,5 +2,6 @@
from framework.llm.provider import LLMProvider, LLMResponse
from framework.llm.anthropic import AnthropicProvider
from framework.llm.litellm import LiteLLMProvider
__all__ = ["LLMProvider", "LLMResponse", "AnthropicProvider"]
__all__ = ["LLMProvider", "LLMResponse", "AnthropicProvider", "LiteLLMProvider"]
+248
View File
@@ -0,0 +1,248 @@
"""LiteLLM provider for pluggable multi-provider LLM support.
LiteLLM provides a unified, OpenAI-compatible interface that supports
multiple LLM providers including OpenAI, Anthropic, Gemini, Mistral,
Groq, and local models.
See: https://docs.litellm.ai/docs/providers
"""
import json
from typing import Any
import litellm
from framework.llm.provider import LLMProvider, LLMResponse, Tool, ToolUse, ToolResult
class LiteLLMProvider(LLMProvider):
"""
LiteLLM-based LLM provider for multi-provider support.
Supports any model that LiteLLM supports, including:
- OpenAI: gpt-4o, gpt-4o-mini, gpt-4-turbo, gpt-3.5-turbo
- Anthropic: claude-3-opus, claude-3-sonnet, claude-3-haiku
- Google: gemini-pro, gemini-1.5-pro, gemini-1.5-flash
- Mistral: mistral-large, mistral-medium, mistral-small
- Groq: llama3-70b, mixtral-8x7b
- Local: ollama/llama3, ollama/mistral
- And many more...
Usage:
# OpenAI
provider = LiteLLMProvider(model="gpt-4o-mini")
# Anthropic
provider = LiteLLMProvider(model="claude-3-haiku-20240307")
# Google Gemini
provider = LiteLLMProvider(model="gemini/gemini-1.5-flash")
# Local Ollama
provider = LiteLLMProvider(model="ollama/llama3")
# With custom API base
provider = LiteLLMProvider(
model="gpt-4o-mini",
api_base="https://my-proxy.com/v1"
)
"""
def __init__(
self,
model: str = "gpt-4o-mini",
api_key: str | None = None,
api_base: str | None = None,
**kwargs: Any,
):
"""
Initialize the LiteLLM provider.
Args:
model: Model identifier (e.g., "gpt-4o-mini", "claude-3-haiku-20240307")
LiteLLM auto-detects the provider from the model name.
api_key: API key for the provider. If not provided, LiteLLM will
look for the appropriate env var (OPENAI_API_KEY,
ANTHROPIC_API_KEY, etc.)
api_base: Custom API base URL (for proxies or local deployments)
**kwargs: Additional arguments passed to litellm.completion()
"""
self.model = model
self.api_key = api_key
self.api_base = api_base
self.extra_kwargs = kwargs
def complete(
self,
messages: list[dict[str, Any]],
system: str = "",
tools: list[Tool] | None = None,
max_tokens: int = 1024,
) -> LLMResponse:
"""Generate a completion using LiteLLM."""
# Prepare messages with system prompt
full_messages = []
if system:
full_messages.append({"role": "system", "content": system})
full_messages.extend(messages)
# Build kwargs
kwargs: dict[str, Any] = {
"model": self.model,
"messages": full_messages,
"max_tokens": max_tokens,
**self.extra_kwargs,
}
if self.api_key:
kwargs["api_key"] = self.api_key
if self.api_base:
kwargs["api_base"] = self.api_base
# Add tools if provided
if tools:
kwargs["tools"] = [self._tool_to_openai_format(t) for t in tools]
# Make the call
response = litellm.completion(**kwargs)
# Extract content
content = response.choices[0].message.content or ""
# Get usage info
usage = response.usage
input_tokens = usage.prompt_tokens if usage else 0
output_tokens = usage.completion_tokens if usage else 0
return LLMResponse(
content=content,
model=response.model or self.model,
input_tokens=input_tokens,
output_tokens=output_tokens,
stop_reason=response.choices[0].finish_reason or "",
raw_response=response,
)
def complete_with_tools(
self,
messages: list[dict[str, Any]],
system: str,
tools: list[Tool],
tool_executor: callable,
max_iterations: int = 10,
) -> LLMResponse:
"""Run a tool-use loop until the LLM produces a final response."""
# Prepare messages with system prompt
current_messages = []
if system:
current_messages.append({"role": "system", "content": system})
current_messages.extend(messages)
total_input_tokens = 0
total_output_tokens = 0
# Convert tools to OpenAI format
openai_tools = [self._tool_to_openai_format(t) for t in tools]
for _ in range(max_iterations):
# Build kwargs
kwargs: dict[str, Any] = {
"model": self.model,
"messages": current_messages,
"max_tokens": 1024,
"tools": openai_tools,
**self.extra_kwargs,
}
if self.api_key:
kwargs["api_key"] = self.api_key
if self.api_base:
kwargs["api_base"] = self.api_base
response = litellm.completion(**kwargs)
# Track tokens
usage = response.usage
if usage:
total_input_tokens += usage.prompt_tokens
total_output_tokens += usage.completion_tokens
choice = response.choices[0]
message = choice.message
# Check if we're done (no tool calls)
if choice.finish_reason == "stop" or not message.tool_calls:
return LLMResponse(
content=message.content or "",
model=response.model or self.model,
input_tokens=total_input_tokens,
output_tokens=total_output_tokens,
stop_reason=choice.finish_reason or "stop",
raw_response=response,
)
# Process tool calls.
# Add assistant message with tool calls.
current_messages.append({
"role": "assistant",
"content": message.content,
"tool_calls": [
{
"id": tc.id,
"type": "function",
"function": {
"name": tc.function.name,
"arguments": tc.function.arguments,
},
}
for tc in message.tool_calls
],
})
# Execute tools and add results.
for tool_call in message.tool_calls:
# Parse arguments
try:
args = json.loads(tool_call.function.arguments)
except json.JSONDecodeError:
args = {}
tool_use = ToolUse(
id=tool_call.id,
name=tool_call.function.name,
input=args,
)
result = tool_executor(tool_use)
# Add tool result message
current_messages.append({
"role": "tool",
"tool_call_id": result.tool_use_id,
"content": result.content,
})
# Max iterations reached
return LLMResponse(
content="Max tool iterations reached",
model=self.model,
input_tokens=total_input_tokens,
output_tokens=total_output_tokens,
stop_reason="max_iterations",
raw_response=None,
)
def _tool_to_openai_format(self, tool: Tool) -> dict[str, Any]:
"""Convert Tool to OpenAI function calling format."""
return {
"type": "function",
"function": {
"name": tool.name,
"description": tool.description,
"parameters": {
"type": "object",
"properties": tool.parameters.get("properties", {}),
"required": tool.parameters.get("required", []),
},
},
}
+8 -6
View File
@@ -12,6 +12,7 @@ from framework.graph.edge import GraphSpec, EdgeSpec, EdgeCondition
from framework.graph.node import NodeSpec
from framework.graph.executor import GraphExecutor, ExecutionResult
from framework.llm.provider import LLMProvider, Tool, ToolResult, ToolUse
from framework.llm.litellm import LiteLLMProvider
from framework.runner.tool_registry import ToolRegistry
from framework.runtime.core import Runtime
@@ -182,7 +183,8 @@ class AgentRunner:
goal: Loaded Goal object
mock_mode: If True, use mock LLM responses
storage_path: Path for runtime storage (defaults to temp)
model: Anthropic model to use
model: Model to use - any LiteLLM-compatible model name
(e.g., "claude-sonnet-4-20250514", "gpt-4o-mini", "gemini/gemini-pro")
"""
self.agent_path = agent_path
self.graph = graph
@@ -366,11 +368,11 @@ class AgentRunner:
# Create runtime
self._runtime = Runtime(storage_path=self._storage_path)
# Create LLM provider (if not mock mode and API key available)
if not self.mock_mode and os.environ.get("ANTHROPIC_API_KEY"):
from framework.llm.anthropic import AnthropicProvider
self._llm = AnthropicProvider(model=self.model)
# Create LLM provider (if not mock mode)
# Use LiteLLM as the unified backend for all providers
if not self.mock_mode:
# LiteLLM auto-detects the provider from model name and finds the right API key
self._llm = LiteLLMProvider(model=self.model)
# Create executor
self._executor = GraphExecutor(
+1
View File
@@ -7,6 +7,7 @@ requires-python = ">=3.11"
dependencies = [
"pydantic>=2.0",
"anthropic>=0.40.0",
"litellm>=1.81.0",
]
[project.optional-dependencies]
+1
View File
@@ -2,6 +2,7 @@
pydantic>=2.0
anthropic>=0.40.0
httpx>=0.27.0
litellm>=1.81.0
# MCP server dependencies
mcp
+239
View File
@@ -0,0 +1,239 @@
"""Tests for LiteLLM provider.
Run with:
cd core
pip install litellm pytest
pytest tests/test_litellm_provider.py -v
For live tests (requires API keys):
OPENAI_API_KEY=sk-... pytest tests/test_litellm_provider.py -v -m live
"""
import os
import pytest
from unittest.mock import Mock, patch, MagicMock
from framework.llm.litellm import LiteLLMProvider
from framework.llm.anthropic import AnthropicProvider
from framework.llm.provider import LLMProvider, Tool, ToolUse, ToolResult
class TestLiteLLMProviderInit:
"""Test LiteLLMProvider initialization."""
def test_init_with_defaults(self):
"""Test initialization with default parameters."""
with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
provider = LiteLLMProvider()
assert provider.model == "gpt-4o-mini"
assert provider.api_key is None
assert provider.api_base is None
def test_init_with_custom_model(self):
"""Test initialization with custom model."""
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
provider = LiteLLMProvider(model="claude-3-haiku-20240307")
assert provider.model == "claude-3-haiku-20240307"
def test_init_with_api_key(self):
"""Test initialization with explicit API key."""
provider = LiteLLMProvider(model="gpt-4o-mini", api_key="my-api-key")
assert provider.api_key == "my-api-key"
def test_init_with_api_base(self):
"""Test initialization with custom API base."""
provider = LiteLLMProvider(
model="gpt-4o-mini",
api_key="my-key",
api_base="https://my-proxy.com/v1"
)
assert provider.api_base == "https://my-proxy.com/v1"
def test_init_ollama_no_key_needed(self):
"""Test that Ollama models don't require API key."""
with patch.dict(os.environ, {}, clear=True):
# Should not raise.
provider = LiteLLMProvider(model="ollama/llama3")
assert provider.model == "ollama/llama3"
class TestLiteLLMProviderComplete:
"""Test LiteLLMProvider.complete() method."""
@patch("litellm.completion")
def test_complete_basic(self, mock_completion):
"""Test basic completion call."""
# Mock response
mock_response = MagicMock()
mock_response.choices = [MagicMock()]
mock_response.choices[0].message.content = "Hello! I'm an AI assistant."
mock_response.choices[0].finish_reason = "stop"
mock_response.model = "gpt-4o-mini"
mock_response.usage.prompt_tokens = 10
mock_response.usage.completion_tokens = 20
mock_completion.return_value = mock_response
provider = LiteLLMProvider(model="gpt-4o-mini", api_key="test-key")
result = provider.complete(
messages=[{"role": "user", "content": "Hello"}]
)
assert result.content == "Hello! I'm an AI assistant."
assert result.model == "gpt-4o-mini"
assert result.input_tokens == 10
assert result.output_tokens == 20
assert result.stop_reason == "stop"
# Verify litellm.completion was called correctly
mock_completion.assert_called_once()
call_kwargs = mock_completion.call_args[1]
assert call_kwargs["model"] == "gpt-4o-mini"
assert call_kwargs["api_key"] == "test-key"
@patch("litellm.completion")
def test_complete_with_system_prompt(self, mock_completion):
"""Test completion with system prompt."""
mock_response = MagicMock()
mock_response.choices = [MagicMock()]
mock_response.choices[0].message.content = "Response"
mock_response.choices[0].finish_reason = "stop"
mock_response.model = "gpt-4o-mini"
mock_response.usage.prompt_tokens = 15
mock_response.usage.completion_tokens = 5
mock_completion.return_value = mock_response
provider = LiteLLMProvider(model="gpt-4o-mini", api_key="test-key")
provider.complete(
messages=[{"role": "user", "content": "Hello"}],
system="You are a helpful assistant."
)
call_kwargs = mock_completion.call_args[1]
messages = call_kwargs["messages"]
assert messages[0]["role"] == "system"
assert messages[0]["content"] == "You are a helpful assistant."
@patch("litellm.completion")
def test_complete_with_tools(self, mock_completion):
"""Test completion with tools."""
mock_response = MagicMock()
mock_response.choices = [MagicMock()]
mock_response.choices[0].message.content = "Response"
mock_response.choices[0].finish_reason = "stop"
mock_response.model = "gpt-4o-mini"
mock_response.usage.prompt_tokens = 20
mock_response.usage.completion_tokens = 10
mock_completion.return_value = mock_response
provider = LiteLLMProvider(model="gpt-4o-mini", api_key="test-key")
tools = [
Tool(
name="get_weather",
description="Get the weather for a location",
parameters={
"properties": {
"location": {"type": "string", "description": "City name"}
},
"required": ["location"]
}
)
]
provider.complete(
messages=[{"role": "user", "content": "What's the weather?"}],
tools=tools
)
call_kwargs = mock_completion.call_args[1]
assert "tools" in call_kwargs
assert call_kwargs["tools"][0]["type"] == "function"
assert call_kwargs["tools"][0]["function"]["name"] == "get_weather"
class TestLiteLLMProviderToolUse:
"""Test LiteLLMProvider.complete_with_tools() method."""
@patch("litellm.completion")
def test_complete_with_tools_single_iteration(self, mock_completion):
"""Test tool use with single iteration."""
# First response: tool call
tool_call_response = MagicMock()
tool_call_response.choices = [MagicMock()]
tool_call_response.choices[0].message.content = None
tool_call_response.choices[0].message.tool_calls = [MagicMock()]
tool_call_response.choices[0].message.tool_calls[0].id = "call_123"
tool_call_response.choices[0].message.tool_calls[0].function.name = "get_weather"
tool_call_response.choices[0].message.tool_calls[0].function.arguments = '{"location": "London"}'
tool_call_response.choices[0].finish_reason = "tool_calls"
tool_call_response.model = "gpt-4o-mini"
tool_call_response.usage.prompt_tokens = 20
tool_call_response.usage.completion_tokens = 15
# Second response: final answer
final_response = MagicMock()
final_response.choices = [MagicMock()]
final_response.choices[0].message.content = "The weather in London is sunny."
final_response.choices[0].message.tool_calls = None
final_response.choices[0].finish_reason = "stop"
final_response.model = "gpt-4o-mini"
final_response.usage.prompt_tokens = 30
final_response.usage.completion_tokens = 10
mock_completion.side_effect = [tool_call_response, final_response]
provider = LiteLLMProvider(model="gpt-4o-mini", api_key="test-key")
tools = [
Tool(
name="get_weather",
description="Get the weather",
parameters={"properties": {"location": {"type": "string"}}, "required": ["location"]}
)
]
def tool_executor(tool_use: ToolUse) -> ToolResult:
return ToolResult(
tool_use_id=tool_use.id,
content="Sunny, 22C",
is_error=False
)
result = provider.complete_with_tools(
messages=[{"role": "user", "content": "What's the weather in London?"}],
system="You are a weather assistant.",
tools=tools,
tool_executor=tool_executor
)
assert result.content == "The weather in London is sunny."
assert result.input_tokens == 50 # 20 + 30
assert result.output_tokens == 25 # 15 + 10
assert mock_completion.call_count == 2
class TestToolConversion:
"""Test tool format conversion."""
def test_tool_to_openai_format(self):
"""Test converting Tool to OpenAI format."""
provider = LiteLLMProvider(model="gpt-4o-mini", api_key="test-key")
tool = Tool(
name="search",
description="Search the web",
parameters={
"properties": {
"query": {"type": "string", "description": "Search query"}
},
"required": ["query"]
}
)
result = provider._tool_to_openai_format(tool)
assert result["type"] == "function"
assert result["function"]["name"] == "search"
assert result["function"]["description"] == "Search the web"
assert result["function"]["parameters"]["properties"]["query"]["type"] == "string"
assert result["function"]["parameters"]["required"] == ["query"]