feat(community): add Serper web search provider (#2630)
* feat(community): add Serper web search provider Add a new community search provider backed by the Serper Google Search API (https://serper.dev). Serper returns real-time Google results via a simple JSON API and requires only an API key — no extra Python package. Changes: - backend/packages/harness/deerflow/community/serper/__init__.py - backend/packages/harness/deerflow/community/serper/tools.py Implements web_search_tool using httpx (already a project dependency). API key is read from config.yaml `api_key` field or SERPER_API_KEY env var. Follows the same interface / output shape as the existing ddg_search provider. Exposes max_results parameter (default 5) with config override logic. - backend/tests/test_serper_tools.py Unit tests covering API key resolution, config overrides, HTTP errors, empty results, and parameter passing. - config.example.yaml: add commented-out Serper example alongside other providers - .env.example: add SERPER_API_KEY placeholder Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * Fix the lint error * Fix the lint error --------- Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com> Co-authored-by: Willem Jiang <willem.jiang@gmail.com>
This commit is contained in:
@@ -1,3 +1,6 @@
|
|||||||
|
# Serper API Key (Google Search) - https://serper.dev
|
||||||
|
SERPER_API_KEY=your-serper-api-key
|
||||||
|
|
||||||
# TAVILY API Key
|
# TAVILY API Key
|
||||||
TAVILY_API_KEY=your-tavily-api-key
|
TAVILY_API_KEY=your-tavily-api-key
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,3 @@
|
|||||||
|
from .tools import web_search_tool
|
||||||
|
|
||||||
|
__all__ = ["web_search_tool"]
|
||||||
@@ -0,0 +1,95 @@
|
|||||||
|
"""
|
||||||
|
Web Search Tool - Search the web using Serper (Google Search API).
|
||||||
|
|
||||||
|
Serper provides real-time Google Search results via a JSON API.
|
||||||
|
An API key is required. Sign up at https://serper.dev to get one.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
from langchain.tools import tool
|
||||||
|
|
||||||
|
from deerflow.config import get_app_config
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_SERPER_ENDPOINT = "https://google.serper.dev/search"
|
||||||
|
_api_key_warned = False
|
||||||
|
|
||||||
|
|
||||||
|
def _get_api_key() -> str | None:
|
||||||
|
config = get_app_config().get_tool_config("web_search")
|
||||||
|
if config is not None:
|
||||||
|
api_key = config.model_extra.get("api_key")
|
||||||
|
if isinstance(api_key, str) and api_key.strip():
|
||||||
|
return api_key
|
||||||
|
return os.getenv("SERPER_API_KEY")
|
||||||
|
|
||||||
|
|
||||||
|
@tool("web_search", parse_docstring=True)
|
||||||
|
def web_search_tool(query: str, max_results: int = 5) -> str:
|
||||||
|
"""Search the web for information using Google Search via Serper.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query: Search keywords describing what you want to find. Be specific for better results.
|
||||||
|
max_results: Maximum number of search results to return. Default is 5.
|
||||||
|
"""
|
||||||
|
global _api_key_warned
|
||||||
|
|
||||||
|
config = get_app_config().get_tool_config("web_search")
|
||||||
|
if config is not None and "max_results" in config.model_extra:
|
||||||
|
max_results = config.model_extra.get("max_results", max_results)
|
||||||
|
|
||||||
|
api_key = _get_api_key()
|
||||||
|
if not api_key:
|
||||||
|
if not _api_key_warned:
|
||||||
|
_api_key_warned = True
|
||||||
|
logger.warning("Serper API key is not set. Set SERPER_API_KEY in your environment or provide api_key in config.yaml. Sign up at https://serper.dev")
|
||||||
|
return json.dumps(
|
||||||
|
{"error": "SERPER_API_KEY is not configured", "query": query},
|
||||||
|
ensure_ascii=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
"X-API-KEY": api_key,
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
}
|
||||||
|
payload = {"q": query, "num": max_results}
|
||||||
|
|
||||||
|
try:
|
||||||
|
with httpx.Client(timeout=30) as client:
|
||||||
|
response = client.post(_SERPER_ENDPOINT, headers=headers, json=payload)
|
||||||
|
response.raise_for_status()
|
||||||
|
data = response.json()
|
||||||
|
except httpx.HTTPStatusError as e:
|
||||||
|
logger.error(f"Serper API returned HTTP {e.response.status_code}: {e.response.text}")
|
||||||
|
return json.dumps(
|
||||||
|
{"error": f"Serper API error: HTTP {e.response.status_code}", "query": query},
|
||||||
|
ensure_ascii=False,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Serper search failed: {type(e).__name__}: {e}")
|
||||||
|
return json.dumps({"error": str(e), "query": query}, ensure_ascii=False)
|
||||||
|
|
||||||
|
organic = data.get("organic", [])
|
||||||
|
if not organic:
|
||||||
|
return json.dumps({"error": "No results found", "query": query}, ensure_ascii=False)
|
||||||
|
|
||||||
|
normalized_results = [
|
||||||
|
{
|
||||||
|
"title": r.get("title", ""),
|
||||||
|
"url": r.get("link", ""),
|
||||||
|
"content": r.get("snippet", ""),
|
||||||
|
}
|
||||||
|
for r in organic[:max_results]
|
||||||
|
]
|
||||||
|
|
||||||
|
output = {
|
||||||
|
"query": query,
|
||||||
|
"total_results": len(normalized_results),
|
||||||
|
"results": normalized_results,
|
||||||
|
}
|
||||||
|
return json.dumps(output, indent=2, ensure_ascii=False)
|
||||||
@@ -0,0 +1,308 @@
|
|||||||
|
"""Unit tests for the Serper community web search tool."""
|
||||||
|
|
||||||
|
import json
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def reset_api_key_warned():
|
||||||
|
"""Reset the module-level warning flag before each test."""
|
||||||
|
import deerflow.community.serper.tools as serper_mod
|
||||||
|
|
||||||
|
serper_mod._api_key_warned = False
|
||||||
|
yield
|
||||||
|
serper_mod._api_key_warned = False
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_config_with_key():
|
||||||
|
with patch("deerflow.community.serper.tools.get_app_config") as mock:
|
||||||
|
tool_config = MagicMock()
|
||||||
|
tool_config.model_extra = {"api_key": "test-serper-key", "max_results": 5}
|
||||||
|
mock.return_value.get_tool_config.return_value = tool_config
|
||||||
|
yield mock
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_config_no_key():
|
||||||
|
with patch("deerflow.community.serper.tools.get_app_config") as mock:
|
||||||
|
tool_config = MagicMock()
|
||||||
|
tool_config.model_extra = {}
|
||||||
|
mock.return_value.get_tool_config.return_value = tool_config
|
||||||
|
yield mock
|
||||||
|
|
||||||
|
|
||||||
|
def _make_serper_response(organic: list) -> MagicMock:
|
||||||
|
mock_resp = MagicMock()
|
||||||
|
mock_resp.json.return_value = {"organic": organic}
|
||||||
|
mock_resp.raise_for_status = MagicMock()
|
||||||
|
return mock_resp
|
||||||
|
|
||||||
|
|
||||||
|
class TestGetApiKey:
|
||||||
|
def test_returns_config_key_when_present(self):
|
||||||
|
with patch("deerflow.community.serper.tools.get_app_config") as mock:
|
||||||
|
tool_config = MagicMock()
|
||||||
|
tool_config.model_extra = {"api_key": "from-config"}
|
||||||
|
mock.return_value.get_tool_config.return_value = tool_config
|
||||||
|
|
||||||
|
from deerflow.community.serper.tools import _get_api_key
|
||||||
|
|
||||||
|
assert _get_api_key() == "from-config"
|
||||||
|
|
||||||
|
def test_falls_back_to_env_when_config_key_empty(self):
|
||||||
|
with patch("deerflow.community.serper.tools.get_app_config") as mock:
|
||||||
|
tool_config = MagicMock()
|
||||||
|
tool_config.model_extra = {"api_key": ""}
|
||||||
|
mock.return_value.get_tool_config.return_value = tool_config
|
||||||
|
with patch.dict("os.environ", {"SERPER_API_KEY": "env-key"}):
|
||||||
|
from deerflow.community.serper.tools import _get_api_key
|
||||||
|
|
||||||
|
assert _get_api_key() == "env-key"
|
||||||
|
|
||||||
|
def test_falls_back_to_env_when_config_key_whitespace(self):
|
||||||
|
with patch("deerflow.community.serper.tools.get_app_config") as mock:
|
||||||
|
tool_config = MagicMock()
|
||||||
|
tool_config.model_extra = {"api_key": " "}
|
||||||
|
mock.return_value.get_tool_config.return_value = tool_config
|
||||||
|
with patch.dict("os.environ", {"SERPER_API_KEY": "env-key"}):
|
||||||
|
from deerflow.community.serper.tools import _get_api_key
|
||||||
|
|
||||||
|
assert _get_api_key() == "env-key"
|
||||||
|
|
||||||
|
def test_falls_back_to_env_when_config_key_null(self):
|
||||||
|
with patch("deerflow.community.serper.tools.get_app_config") as mock:
|
||||||
|
tool_config = MagicMock()
|
||||||
|
tool_config.model_extra = {"api_key": None}
|
||||||
|
mock.return_value.get_tool_config.return_value = tool_config
|
||||||
|
with patch.dict("os.environ", {"SERPER_API_KEY": "env-key"}):
|
||||||
|
from deerflow.community.serper.tools import _get_api_key
|
||||||
|
|
||||||
|
assert _get_api_key() == "env-key"
|
||||||
|
|
||||||
|
def test_falls_back_to_env_when_no_config(self):
|
||||||
|
with patch("deerflow.community.serper.tools.get_app_config") as mock:
|
||||||
|
mock.return_value.get_tool_config.return_value = None
|
||||||
|
with patch.dict("os.environ", {"SERPER_API_KEY": "env-only"}):
|
||||||
|
from deerflow.community.serper.tools import _get_api_key
|
||||||
|
|
||||||
|
assert _get_api_key() == "env-only"
|
||||||
|
|
||||||
|
def test_returns_none_when_no_key_anywhere(self):
|
||||||
|
with patch("deerflow.community.serper.tools.get_app_config") as mock:
|
||||||
|
mock.return_value.get_tool_config.return_value = None
|
||||||
|
with patch.dict("os.environ", {}, clear=True):
|
||||||
|
import os
|
||||||
|
|
||||||
|
os.environ.pop("SERPER_API_KEY", None)
|
||||||
|
from deerflow.community.serper.tools import _get_api_key
|
||||||
|
|
||||||
|
assert _get_api_key() is None
|
||||||
|
|
||||||
|
|
||||||
|
class TestWebSearchTool:
|
||||||
|
def test_basic_search_returns_normalized_results(self, mock_config_with_key):
|
||||||
|
organic = [
|
||||||
|
{"title": "Result 1", "link": "https://example.com/1", "snippet": "Snippet 1"},
|
||||||
|
{"title": "Result 2", "link": "https://example.com/2", "snippet": "Snippet 2"},
|
||||||
|
]
|
||||||
|
mock_resp = _make_serper_response(organic)
|
||||||
|
|
||||||
|
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||||
|
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
|
||||||
|
|
||||||
|
from deerflow.community.serper.tools import web_search_tool
|
||||||
|
|
||||||
|
result = web_search_tool.invoke({"query": "python tutorial"})
|
||||||
|
parsed = json.loads(result)
|
||||||
|
|
||||||
|
assert parsed["query"] == "python tutorial"
|
||||||
|
assert parsed["total_results"] == 2
|
||||||
|
assert parsed["results"][0]["title"] == "Result 1"
|
||||||
|
assert parsed["results"][0]["url"] == "https://example.com/1"
|
||||||
|
assert parsed["results"][0]["content"] == "Snippet 1"
|
||||||
|
|
||||||
|
def test_respects_max_results_from_config(self, mock_config_with_key):
|
||||||
|
mock_config_with_key.return_value.get_tool_config.return_value.model_extra = {
|
||||||
|
"api_key": "test-key",
|
||||||
|
"max_results": 3,
|
||||||
|
}
|
||||||
|
organic = [{"title": f"R{i}", "link": f"https://x.com/{i}", "snippet": f"S{i}"} for i in range(10)]
|
||||||
|
mock_resp = _make_serper_response(organic)
|
||||||
|
|
||||||
|
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||||
|
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
|
||||||
|
|
||||||
|
from deerflow.community.serper.tools import web_search_tool
|
||||||
|
|
||||||
|
result = web_search_tool.invoke({"query": "test"})
|
||||||
|
parsed = json.loads(result)
|
||||||
|
|
||||||
|
assert parsed["total_results"] == 3
|
||||||
|
assert len(parsed["results"]) == 3
|
||||||
|
|
||||||
|
def test_max_results_parameter_accepted(self, mock_config_no_key):
|
||||||
|
"""Tool accepts max_results as a call parameter when config does not override it."""
|
||||||
|
organic = [{"title": f"R{i}", "link": f"https://x.com/{i}", "snippet": f"S{i}"} for i in range(10)]
|
||||||
|
mock_resp = _make_serper_response(organic)
|
||||||
|
|
||||||
|
with patch.dict("os.environ", {"SERPER_API_KEY": "env-key"}):
|
||||||
|
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||||
|
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
|
||||||
|
|
||||||
|
from deerflow.community.serper.tools import web_search_tool
|
||||||
|
|
||||||
|
result = web_search_tool.invoke({"query": "test", "max_results": 2})
|
||||||
|
parsed = json.loads(result)
|
||||||
|
|
||||||
|
assert parsed["total_results"] == 2
|
||||||
|
|
||||||
|
def test_config_max_results_overrides_parameter(self):
|
||||||
|
"""Config max_results overrides the parameter passed at call time, matching ddg_search behaviour."""
|
||||||
|
with patch("deerflow.community.serper.tools.get_app_config") as mock:
|
||||||
|
tool_config = MagicMock()
|
||||||
|
tool_config.model_extra = {"api_key": "test-key", "max_results": 3}
|
||||||
|
mock.return_value.get_tool_config.return_value = tool_config
|
||||||
|
|
||||||
|
organic = [{"title": f"R{i}", "link": f"https://x.com/{i}", "snippet": f"S{i}"} for i in range(10)]
|
||||||
|
mock_resp = _make_serper_response(organic)
|
||||||
|
|
||||||
|
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||||
|
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
|
||||||
|
|
||||||
|
from deerflow.community.serper.tools import web_search_tool
|
||||||
|
|
||||||
|
result = web_search_tool.invoke({"query": "test", "max_results": 8})
|
||||||
|
parsed = json.loads(result)
|
||||||
|
|
||||||
|
assert parsed["total_results"] == 3
|
||||||
|
|
||||||
|
def test_empty_organic_returns_error_json(self, mock_config_with_key):
|
||||||
|
"""Empty organic list returns structured error, matching ddg_search convention."""
|
||||||
|
mock_resp = _make_serper_response([])
|
||||||
|
|
||||||
|
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||||
|
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
|
||||||
|
|
||||||
|
from deerflow.community.serper.tools import web_search_tool
|
||||||
|
|
||||||
|
result = web_search_tool.invoke({"query": "no results"})
|
||||||
|
parsed = json.loads(result)
|
||||||
|
|
||||||
|
assert "error" in parsed
|
||||||
|
assert parsed["error"] == "No results found"
|
||||||
|
assert parsed["query"] == "no results"
|
||||||
|
|
||||||
|
def test_missing_api_key_returns_error_json(self, mock_config_no_key):
|
||||||
|
with patch.dict("os.environ", {}, clear=True):
|
||||||
|
import os
|
||||||
|
|
||||||
|
os.environ.pop("SERPER_API_KEY", None)
|
||||||
|
|
||||||
|
from deerflow.community.serper.tools import web_search_tool
|
||||||
|
|
||||||
|
result = web_search_tool.invoke({"query": "test"})
|
||||||
|
parsed = json.loads(result)
|
||||||
|
|
||||||
|
assert "error" in parsed
|
||||||
|
assert "SERPER_API_KEY" in parsed["error"]
|
||||||
|
|
||||||
|
def test_missing_api_key_logs_warning_once(self, mock_config_no_key, caplog):
|
||||||
|
import logging
|
||||||
|
|
||||||
|
with patch.dict("os.environ", {}, clear=True):
|
||||||
|
import os
|
||||||
|
|
||||||
|
os.environ.pop("SERPER_API_KEY", None)
|
||||||
|
|
||||||
|
from deerflow.community.serper.tools import web_search_tool
|
||||||
|
|
||||||
|
with caplog.at_level(logging.WARNING, logger="deerflow.community.serper.tools"):
|
||||||
|
web_search_tool.invoke({"query": "q1"})
|
||||||
|
web_search_tool.invoke({"query": "q2"})
|
||||||
|
|
||||||
|
warnings = [r for r in caplog.records if r.levelno == logging.WARNING]
|
||||||
|
assert len(warnings) == 1
|
||||||
|
|
||||||
|
def test_http_error_returns_structured_error(self, mock_config_with_key):
|
||||||
|
mock_error_response = MagicMock()
|
||||||
|
mock_error_response.status_code = 403
|
||||||
|
mock_error_response.text = "Forbidden"
|
||||||
|
|
||||||
|
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||||
|
mock_client_cls.return_value.__enter__.return_value.post.side_effect = httpx.HTTPStatusError("403", request=MagicMock(), response=mock_error_response)
|
||||||
|
|
||||||
|
from deerflow.community.serper.tools import web_search_tool
|
||||||
|
|
||||||
|
result = web_search_tool.invoke({"query": "test"})
|
||||||
|
parsed = json.loads(result)
|
||||||
|
|
||||||
|
assert "error" in parsed
|
||||||
|
assert "403" in parsed["error"]
|
||||||
|
|
||||||
|
def test_network_exception_returns_error_json(self, mock_config_with_key):
|
||||||
|
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||||
|
mock_client_cls.return_value.__enter__.return_value.post.side_effect = Exception("timeout")
|
||||||
|
|
||||||
|
from deerflow.community.serper.tools import web_search_tool
|
||||||
|
|
||||||
|
result = web_search_tool.invoke({"query": "test"})
|
||||||
|
parsed = json.loads(result)
|
||||||
|
|
||||||
|
assert "error" in parsed
|
||||||
|
|
||||||
|
def test_sends_correct_headers_and_payload(self, mock_config_with_key):
|
||||||
|
organic = [{"title": "T", "link": "https://x.com", "snippet": "S"}]
|
||||||
|
mock_resp = _make_serper_response(organic)
|
||||||
|
|
||||||
|
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||||
|
mock_post = mock_client_cls.return_value.__enter__.return_value.post
|
||||||
|
mock_post.return_value = mock_resp
|
||||||
|
|
||||||
|
from deerflow.community.serper.tools import web_search_tool
|
||||||
|
|
||||||
|
web_search_tool.invoke({"query": "hello world"})
|
||||||
|
|
||||||
|
call_kwargs = mock_post.call_args
|
||||||
|
headers = call_kwargs.kwargs["headers"]
|
||||||
|
payload = call_kwargs.kwargs["json"]
|
||||||
|
|
||||||
|
assert headers["X-API-KEY"] == "test-serper-key"
|
||||||
|
assert payload["q"] == "hello world"
|
||||||
|
assert payload["num"] == 5
|
||||||
|
|
||||||
|
def test_uses_env_key_when_config_absent(self):
|
||||||
|
with patch("deerflow.community.serper.tools.get_app_config") as mock:
|
||||||
|
mock.return_value.get_tool_config.return_value = None
|
||||||
|
with patch.dict("os.environ", {"SERPER_API_KEY": "env-only-key"}):
|
||||||
|
organic = [{"title": "T", "link": "https://x.com", "snippet": "S"}]
|
||||||
|
mock_resp = _make_serper_response(organic)
|
||||||
|
|
||||||
|
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||||
|
mock_post = mock_client_cls.return_value.__enter__.return_value.post
|
||||||
|
mock_post.return_value = mock_resp
|
||||||
|
|
||||||
|
from deerflow.community.serper.tools import web_search_tool
|
||||||
|
|
||||||
|
web_search_tool.invoke({"query": "env key test"})
|
||||||
|
headers = mock_post.call_args.kwargs["headers"]
|
||||||
|
|
||||||
|
assert headers["X-API-KEY"] == "env-only-key"
|
||||||
|
|
||||||
|
def test_partial_fields_in_organic_result(self, mock_config_with_key):
|
||||||
|
"""Missing title/link/snippet should default to empty string."""
|
||||||
|
organic = [{}]
|
||||||
|
mock_resp = _make_serper_response(organic)
|
||||||
|
|
||||||
|
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||||
|
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
|
||||||
|
|
||||||
|
from deerflow.community.serper.tools import web_search_tool
|
||||||
|
|
||||||
|
result = web_search_tool.invoke({"query": "test"})
|
||||||
|
parsed = json.loads(result)
|
||||||
|
|
||||||
|
assert parsed["results"][0] == {"title": "", "url": "", "content": ""}
|
||||||
@@ -373,6 +373,16 @@ tools:
|
|||||||
use: deerflow.community.ddg_search.tools:web_search_tool
|
use: deerflow.community.ddg_search.tools:web_search_tool
|
||||||
max_results: 5
|
max_results: 5
|
||||||
|
|
||||||
|
# Web search tool (uses Serper - Google Search API, requires SERPER_API_KEY)
|
||||||
|
# Serper provides real-time Google Search results. Sign up at https://serper.dev
|
||||||
|
# Note: set SERPER_API_KEY in your environment before starting the app, or
|
||||||
|
# uncomment and fill in api_key below (the $VAR syntax is resolved at startup).
|
||||||
|
# - name: web_search
|
||||||
|
# group: web
|
||||||
|
# use: deerflow.community.serper.tools:web_search_tool
|
||||||
|
# max_results: 5
|
||||||
|
# # api_key: $SERPER_API_KEY # Optional if SERPER_API_KEY env var is set
|
||||||
|
|
||||||
# Web search tool (requires Tavily API key)
|
# Web search tool (requires Tavily API key)
|
||||||
# - name: web_search
|
# - name: web_search
|
||||||
# group: web
|
# group: web
|
||||||
|
|||||||
Reference in New Issue
Block a user