diff --git a/core/framework/config.py b/core/framework/config.py index 236f9be6..a393683d 100644 --- a/core/framework/config.py +++ b/core/framework/config.py @@ -186,6 +186,8 @@ def get_worker_llm_extra_kwargs() -> dict[str, Any]: "store": False, "allowed_openai_params": ["store"], } + if worker_llm.get("provider") == "ollama": + return {"num_ctx": worker_llm.get("num_ctx", 16384)} return {} @@ -432,6 +434,11 @@ def get_llm_extra_kwargs() -> dict[str, Any]: "store": False, "allowed_openai_params": ["store"], } + if llm.get("provider") == "ollama": + # Pass num_ctx to Ollama so it doesn't silently truncate the ~9.5k Queen prompt. + # Ollama's default num_ctx is only 2048. We set it to 16384 here so LiteLLM + # passes it through as a provider-specific option. + return {"num_ctx": llm.get("num_ctx", 16384)} return {} diff --git a/core/framework/llm/litellm.py b/core/framework/llm/litellm.py index 7697cdd8..f995ec69 100644 --- a/core/framework/llm/litellm.py +++ b/core/framework/llm/litellm.py @@ -159,6 +159,26 @@ if litellm is not None: # (e.g. stream_options for Anthropic) instead of forwarding them verbatim. litellm.drop_params = True + +def _is_ollama_model(model: str) -> bool: + """Return True for any Ollama model string (ollama/ or ollama_chat/ prefix).""" + return model.startswith("ollama/") or model.startswith("ollama_chat/") + + +def _ensure_ollama_chat_prefix(model: str) -> str: + """Normalise Ollama model strings to use the ollama_chat/ prefix. + + LiteLLM requires the ``ollama_chat/`` prefix (not ``ollama/``) to enable + native function-calling support. With ``ollama/``, LiteLLM falls back to + JSON-mode tool calls, which the framework cannot parse as real tool calls. + + See: https://docs.litellm.ai/docs/providers/ollama#example-usage---tool-calling + """ + if model.startswith("ollama/"): + return "ollama_chat/" + model[len("ollama/") :] + return model + + RATE_LIMIT_MAX_RETRIES = 10 RATE_LIMIT_BACKOFF_BASE = 2 # seconds RATE_LIMIT_MAX_DELAY = 120 # seconds - cap to prevent absurd waits @@ -499,7 +519,9 @@ class LiteLLMProvider(LLMProvider): # Translate kimi/ prefix to anthropic/ so litellm uses the Anthropic # Messages API handler and routes to that endpoint — no special headers needed. _original_model = model - if model.lower().startswith("kimi/"): + if _is_ollama_model(model): + model = _ensure_ollama_chat_prefix(model) + elif model.lower().startswith("kimi/"): model = "anthropic/" + model[len("kimi/") :] # Normalise api_base: litellm's Anthropic handler appends /v1/messages, # so the base must be https://api.kimi.com/coding (no /v1 suffix). @@ -722,6 +744,10 @@ class LiteLLMProvider(LLMProvider): # Add tools if provided if tools: kwargs["tools"] = [self._tool_to_openai_format(t) for t in tools] + if _is_ollama_model(self.model): + # Ollama requires explicit tool_choice=auto for function calling + # so future readers don't have to guess. + kwargs.setdefault("tool_choice", "auto") # Add response_format for structured output # LiteLLM passes this through to the underlying provider @@ -919,6 +945,10 @@ class LiteLLMProvider(LLMProvider): kwargs["api_base"] = self.api_base if tools: kwargs["tools"] = [self._tool_to_openai_format(t) for t in tools] + if _is_ollama_model(self.model): + # Ollama requires explicit tool_choice=auto for function calling + # so future readers don't have to guess. + kwargs.setdefault("tool_choice", "auto") if response_format: kwargs["response_format"] = response_format @@ -1620,6 +1650,10 @@ class LiteLLMProvider(LLMProvider): kwargs["api_base"] = self.api_base if tools: kwargs["tools"] = [self._tool_to_openai_format(t) for t in tools] + if _is_ollama_model(self.model): + # Ollama requires explicit tool_choice=auto for function calling + # so future readers don't have to guess. + kwargs.setdefault("tool_choice", "auto") if response_format: kwargs["response_format"] = response_format # The Codex ChatGPT backend (Responses API) rejects several params. diff --git a/core/tests/test_litellm_provider.py b/core/tests/test_litellm_provider.py index 6024f355..2fa9a2af 100644 --- a/core/tests/test_litellm_provider.py +++ b/core/tests/test_litellm_provider.py @@ -18,11 +18,14 @@ from unittest.mock import AsyncMock, MagicMock, patch import pytest +from framework.config import get_llm_extra_kwargs from framework.llm.anthropic import AnthropicProvider from framework.llm.litellm import ( OPENROUTER_TOOL_COMPAT_MODEL_CACHE, LiteLLMProvider, _compute_retry_delay, + _ensure_ollama_chat_prefix, + _is_ollama_model, ) from framework.llm.provider import LLMProvider, LLMResponse, Tool @@ -93,9 +96,9 @@ class TestLiteLLMProviderInit: def test_init_ollama_no_key_needed(self): """Test that Ollama models don't require API key.""" with patch.dict(os.environ, {}, clear=True): - # Should not raise. + # Should not raise; ollama/ is normalised to ollama_chat/ for tool-call support. provider = LiteLLMProvider(model="ollama/llama3") - assert provider.model == "ollama/llama3" + assert provider.model == "ollama_chat/llama3" class TestLiteLLMProviderComplete: @@ -1084,3 +1087,103 @@ class TestIsLocalModel: from framework.runner.runner import AgentRunner assert AgentRunner._is_local_model(model) is False + + +# --------------------------------------------------------------------------- +# Ollama helper functions +# --------------------------------------------------------------------------- + + +class TestIsOllamaModel: + """Tests for _is_ollama_model().""" + + @pytest.mark.parametrize( + "model", + [ + "ollama/llama3", + "ollama/mistral:7b", + "ollama_chat/llama3", + "ollama_chat/qwen2.5:72b", + ], + ) + def test_ollama_models_return_true(self, model): + assert _is_ollama_model(model) is True + + @pytest.mark.parametrize( + "model", + [ + "gpt-4o-mini", + "anthropic/claude-3-haiku", + "openai/gpt-4o", + "gemini/gemini-1.5-flash", + "llama3", + "", + ], + ) + def test_non_ollama_models_return_false(self, model): + assert _is_ollama_model(model) is False + + +class TestEnsureOllamaChatPrefix: + """Tests for _ensure_ollama_chat_prefix().""" + + @pytest.mark.parametrize( + ("input_model", "expected"), + [ + ("ollama/llama3", "ollama_chat/llama3"), + ("ollama/mistral:7b", "ollama_chat/mistral:7b"), + ("ollama/qwen2.5:72b-instruct", "ollama_chat/qwen2.5:72b-instruct"), + ], + ) + def test_rewrites_ollama_to_ollama_chat(self, input_model, expected): + assert _ensure_ollama_chat_prefix(input_model) == expected + + @pytest.mark.parametrize( + "model", + [ + "ollama_chat/llama3", + "gpt-4o-mini", + "anthropic/claude-3-haiku", + "gemini/gemini-1.5-flash", + "", + ], + ) + def test_leaves_non_ollama_prefix_unchanged(self, model): + assert _ensure_ollama_chat_prefix(model) == model + + +class TestGetLlmExtraKwargsOllama: + """Tests for num_ctx injection via get_llm_extra_kwargs() for Ollama.""" + + def test_ollama_provider_returns_num_ctx(self): + """Ollama config should inject num_ctx with default 16384.""" + config = { + "llm": {"provider": "ollama", "model": "ollama/llama3"}, + } + with patch("framework.config.get_hive_config", return_value=config): + result = get_llm_extra_kwargs() + assert result == {"num_ctx": 16384} + + def test_ollama_provider_respects_custom_num_ctx(self): + """User-specified num_ctx in config should take precedence.""" + config = { + "llm": {"provider": "ollama", "model": "ollama/llama3", "num_ctx": 32768}, + } + with patch("framework.config.get_hive_config", return_value=config): + result = get_llm_extra_kwargs() + assert result == {"num_ctx": 32768} + + def test_non_ollama_provider_returns_empty(self): + """Non-Ollama provider without subscriptions should return empty dict.""" + config = { + "llm": {"provider": "anthropic", "model": "claude-3-haiku"}, + } + with patch("framework.config.get_hive_config", return_value=config): + result = get_llm_extra_kwargs() + assert result == {} + + def test_empty_config_returns_empty(self): + """Missing config should return empty dict.""" + with patch("framework.config.get_hive_config", return_value={}): + result = get_llm_extra_kwargs() + assert result == {} diff --git a/quickstart.ps1 b/quickstart.ps1 index 858d2e89..b8bbd9a2 100644 --- a/quickstart.ps1 +++ b/quickstart.ps1 @@ -1035,6 +1035,12 @@ $ProviderMenuUrls = @( "https://openrouter.ai/keys" ) +$OllamaDetected = $false +try { + $null = & ollama list 2>$null + if ($LASTEXITCODE -eq 0) { $OllamaDetected = $true } +} catch { } + # ── Read previous configuration (if any) ────────────────────── $PrevProvider = "" $PrevModel = "" @@ -1071,7 +1077,9 @@ if ($PrevSubMode -or $PrevProvider) { "kimi_code" { if ($KimiCredDetected) { $prevCredValid = $true } } "hive_llm" { if ($HiveCredDetected) { $prevCredValid = $true } } default { - if ($PrevEnvVar) { + if ($PrevProvider -eq "ollama") { + $prevCredValid = $true + } elseif ($PrevEnvVar) { $envVal = [System.Environment]::GetEnvironmentVariable($PrevEnvVar, "Process") if (-not $envVal) { $envVal = [System.Environment]::GetEnvironmentVariable($PrevEnvVar, "User") } if ($envVal) { $prevCredValid = $true } @@ -1095,6 +1103,7 @@ if ($PrevSubMode -or $PrevProvider) { "groq" { $DefaultChoice = "10" } "cerebras" { $DefaultChoice = "11" } "openrouter" { $DefaultChoice = "12" } + "ollama" { $DefaultChoice = "13" } "minimax" { $DefaultChoice = "4" } "kimi" { $DefaultChoice = "5" } } @@ -1163,7 +1172,17 @@ for ($idx = 0; $idx -lt $ProviderMenuEnvVars.Count; $idx++) { if ($envVal) { Write-Color -Text " (credential detected)" -Color Green } else { Write-Host "" } } -$SkipChoice = 7 + $ProviderMenuEnvVars.Count +# 13) Local (Ollama) — no API key needed +Write-Host " " -NoNewline +Write-Color -Text "13" -Color Cyan -NoNewline +if ($OllamaDetected) { + Write-Host ") Local (Ollama) - No API key needed " -NoNewline + Write-Color -Text "(ollama detected)" -Color Green +} else { + Write-Host ") Local (Ollama) - No API key needed" +} + +$SkipChoice = 7 + $ProviderMenuEnvVars.Count + 1 Write-Host " " -NoNewline Write-Color -Text "$SkipChoice" -Color Cyan -NoNewline Write-Host ") Skip for now" @@ -1383,6 +1402,75 @@ switch ($num) { } } } + 13 { + # Local (Ollama) + if (-not $OllamaDetected) { + Write-Host "" + Write-Warn "Ollama depends on a local Ollama server, but 'ollama list' failed." + Write-Host " Please install Ollama (https://ollama.com) and start the server," + Write-Host " then run this quickstart again." + Write-Host "" + exit 1 + } + $SelectedProviderId = "ollama" + Write-Host "" + Write-Ok "Using Local (Ollama)" + Write-Host "" + + # Fetch available models + $ollamaModels = @() + try { + $listOutput = & ollama list 2>$null + if ($listOutput.Count -gt 1) { + for ($i = 1; $i -lt $listOutput.Count; $i++) { + $line = $listOutput[$i].Trim() + if ($line) { + $mName = ($line -split '\s+')[0] + if ($mName) { $ollamaModels += $mName } + } + } + } + } catch { } + + if ($ollamaModels.Count -eq 0) { + Write-Warn "No Ollama models found." + Write-Host " Please open another terminal, run 'ollama run ' (e.g. 'ollama run llama3')," + Write-Host " and then run this quickstart again." + Write-Host "" + exit 1 + } + + # Show model picker + Write-Host " Select an Ollama model:" + Write-Host "" + $defaultIdx = "1" + for ($i = 0; $i -lt $ollamaModels.Count; $i++) { + Write-Color -Text " $($i + 1)" -Color Cyan -NoNewline + Write-Host ") $($ollamaModels[$i])" + if ($PrevProvider -eq "ollama" -and $PrevModel -eq $ollamaModels[$i]) { + $defaultIdx = [string]($i + 1) + } + } + Write-Host "" + + while ($true) { + $raw = Read-Host "Enter choice (1-$($ollamaModels.Count)) [$defaultIdx]" + if ([string]::IsNullOrWhiteSpace($raw)) { $raw = $defaultIdx } + if ($raw -match '^\d+$') { + $num = [int]$raw + if ($num -ge 1 -and $num -le $ollamaModels.Count) { + $SelectedModel = $ollamaModels[$num - 1] + Write-Host "" + Write-Ok "Model: $SelectedModel" + $SelectedMaxTokens = 8192 + $SelectedMaxContextTokens = 16384 + $SelectedApiBase = "http://localhost:11434" + break + } + } + Write-Color -Text "Invalid choice. Please enter 1-$($ollamaModels.Count)" -Color Red + } + } { $_ -eq $SkipChoice } { Write-Host "" Write-Warn "Skipped. An LLM API key is required to test and use worker agents." @@ -1701,8 +1789,13 @@ if ($SelectedProviderId) { } elseif ($SelectedProviderId -eq "openrouter") { $config.llm["api_base"] = "https://openrouter.ai/api/v1" $config.llm["api_key_env_var"] = $SelectedEnvVar - } else { + } elseif ($SelectedProviderId -eq "ollama") { + $config.llm["api_base"] = "http://localhost:11434" + $config.llm.Remove("api_key_env_var") + } elseif ($SelectedEnvVar) { $config.llm["api_key_env_var"] = $SelectedEnvVar + } else { + $config.llm.Remove("api_key_env_var") } $config | ConvertTo-Json -Depth 4 | Set-Content -Path $HiveConfigFile -Encoding UTF8 diff --git a/quickstart.sh b/quickstart.sh index e59cf760..c4669eb5 100755 --- a/quickstart.sh +++ b/quickstart.sh @@ -912,8 +912,9 @@ config["llm"] = { "model": model, "max_tokens": int(max_tokens), "max_context_tokens": int(max_context_tokens), - "api_key_env_var": env_var, } +if env_var: + config["llm"]["api_key_env_var"] = env_var config["created_at"] = created_at if use_claude_code_sub == "true": @@ -1024,6 +1025,11 @@ elif [ -f "$HOME/.hive/antigravity-accounts.json" ]; then ANTIGRAVITY_CRED_DETECTED=true fi +OLLAMA_DETECTED=false +if ollama list >/dev/null 2>&1; then + OLLAMA_DETECTED=true +fi + # Detect API key providers if [ "$USE_ASSOC_ARRAYS" = true ]; then for env_var in "${!PROVIDER_NAMES[@]}"; do @@ -1056,9 +1062,12 @@ try: with open(cfg_path, encoding="utf-8-sig") as f: c = json.load(f) llm = c.get("llm", {}) - print(f"PREV_PROVIDER={llm.get(\"provider\", \"\")}") - print(f"PREV_MODEL={llm.get(\"model\", \"\")}") - print(f"PREV_ENV_VAR={llm.get(\"api_key_env_var\", \"\")}") + prov = llm.get("provider", "") + mod = llm.get("model", "") + env = llm.get("api_key_env_var", "") + print(f"PREV_PROVIDER='{prov}'") + print(f"PREV_MODEL='{mod}'") + print(f"PREV_ENV_VAR='{env}'") sub = "" if llm.get("use_claude_code_subscription"): sub = "claude_code" @@ -1093,8 +1102,12 @@ if [ -n "$PREV_SUB_MODE" ] || [ -n "$PREV_PROVIDER" ]; then hive_llm) [ "$HIVE_CRED_DETECTED" = true ] && PREV_CRED_VALID=true ;; antigravity) [ "$ANTIGRAVITY_CRED_DETECTED" = true ] && PREV_CRED_VALID=true ;; *) - # API key provider — check if the env var is set - if [ -n "$PREV_ENV_VAR" ] && [ -n "${!PREV_ENV_VAR}" ]; then + # API key provider — check if the env var is set; ollama uses local runtime detection + if [ "$PREV_PROVIDER" = "ollama" ]; then + if [ "$OLLAMA_DETECTED" = true ]; then + PREV_CRED_VALID=true + fi + elif [ -n "$PREV_ENV_VAR" ] && [ -n "${!PREV_ENV_VAR}" ]; then PREV_CRED_VALID=true fi ;; @@ -1118,6 +1131,7 @@ if [ -n "$PREV_SUB_MODE" ] || [ -n "$PREV_PROVIDER" ]; then groq) DEFAULT_CHOICE=11 ;; cerebras) DEFAULT_CHOICE=12 ;; openrouter) DEFAULT_CHOICE=13 ;; + ollama) DEFAULT_CHOICE=14 ;; minimax) DEFAULT_CHOICE=4 ;; kimi) DEFAULT_CHOICE=5 ;; hive) DEFAULT_CHOICE=6 ;; @@ -1196,7 +1210,14 @@ for idx in "${!PROVIDER_MENU_ENVS[@]}"; do fi done -SKIP_CHOICE=$((8 + ${#PROVIDER_MENU_ENVS[@]})) +# 14) Local (Ollama) — no API key needed +if [ "$OLLAMA_DETECTED" = true ]; then + echo -e " ${CYAN}14)${NC} Local (Ollama) - No API key needed ${GREEN}(ollama detected)${NC}" +else + echo -e " ${CYAN}14)${NC} Local (Ollama) - No API key needed" +fi + +SKIP_CHOICE=$((8 + ${#PROVIDER_MENU_ENVS[@]} + 1)) echo -e " ${CYAN}$SKIP_CHOICE)${NC} Skip for now" echo "" @@ -1414,6 +1435,56 @@ case $choice in PROVIDER_NAME="OpenRouter" SIGNUP_URL="https://openrouter.ai/keys" ;; + 14) + # Local (Ollama) — no API key; pick model from ollama list + if [ "$OLLAMA_DETECTED" != true ]; then + echo "" + echo -e "${YELLOW}Ollama depends on a local Ollama server, but 'ollama list' failed.${NC}" + echo -e " Please install Ollama (https://ollama.com) and start the server," + echo -e " then run this quickstart again." + echo "" + exit 1 + fi + SELECTED_PROVIDER_ID="ollama" + SELECTED_ENV_VAR="" + SELECTED_MAX_TOKENS=8192 + SELECTED_MAX_CONTEXT_TOKENS=16384 + OLLAMA_MODELS=() + while IFS= read -r line; do + [ -n "$line" ] && OLLAMA_MODELS+=("$line") + done < <(ollama list 2>/dev/null | tail -n +2 | awk '{print $1}') + if [ ${#OLLAMA_MODELS[@]} -gt 0 ]; then + echo "" + echo -e "${BOLD}Select an Ollama model:${NC}" + echo "" + for idx in "${!OLLAMA_MODELS[@]}"; do + num=$((idx + 1)) + echo -e " ${CYAN}$num)${NC} ${OLLAMA_MODELS[$idx]}" + done + echo "" + while true; do + read -r -p "Enter choice (1-${#OLLAMA_MODELS[@]}): " model_choice + if [[ "$model_choice" =~ ^[0-9]+$ ]] && [ "$model_choice" -ge 1 ] && [ "$model_choice" -le ${#OLLAMA_MODELS[@]} ]; then + SELECTED_MODEL="${OLLAMA_MODELS[$((model_choice - 1))]}" + SELECTED_API_BASE="http://localhost:11434" + break + fi + echo -e "${RED}Invalid choice. Please enter 1-${#OLLAMA_MODELS[@]}${NC}" + done + echo "" + echo -e "${GREEN}⬢${NC} Using Ollama with model ${DIM}$SELECTED_MODEL${NC}" + echo -e "${YELLOW} ⚠ Note: The framework uses a ~9,500 token system prompt and requires strong tool use.${NC}" + echo -e "${YELLOW} For best results, use models like qwen2.5:72b+ or mistral-large.${NC}" + echo "" + else + echo "" + echo -e "${RED}No Ollama models found.${NC}" + echo -e " Please open another terminal, run ${CYAN}ollama pull llama3${NC} (or another model)," + echo -e " and then run this quickstart again." + echo "" + exit 1 + fi + ;; "$SKIP_CHOICE") echo "" echo -e "${YELLOW}Skipped.${NC} An LLM API key is required to test and use worker agents." @@ -1584,6 +1655,10 @@ if [ -n "$SELECTED_PROVIDER_ID" ]; then save_configuration "$SELECTED_PROVIDER_ID" "$SELECTED_ENV_VAR" "$SELECTED_MODEL" "$SELECTED_MAX_TOKENS" "$SELECTED_MAX_CONTEXT_TOKENS" "" "$SELECTED_API_BASE" > /dev/null || SAVE_OK=false elif [ "$SELECTED_PROVIDER_ID" = "openrouter" ]; then save_configuration "$SELECTED_PROVIDER_ID" "$SELECTED_ENV_VAR" "$SELECTED_MODEL" "$SELECTED_MAX_TOKENS" "$SELECTED_MAX_CONTEXT_TOKENS" "" "$SELECTED_API_BASE" > /dev/null || SAVE_OK=false + elif [ "$SELECTED_PROVIDER_ID" = "ollama" ]; then + # Pass api_base explicitly — LiteLLM requires this to route ollama/* models + # to the local Ollama server instead of trying to reach a remote endpoint. + save_configuration "ollama" "" "$SELECTED_MODEL" "$SELECTED_MAX_TOKENS" "$SELECTED_MAX_CONTEXT_TOKENS" "" "http://localhost:11434" > /dev/null || SAVE_OK=false else save_configuration "$SELECTED_PROVIDER_ID" "$SELECTED_ENV_VAR" "$SELECTED_MODEL" "$SELECTED_MAX_TOKENS" "$SELECTED_MAX_CONTEXT_TOKENS" > /dev/null || SAVE_OK=false fi @@ -1859,6 +1934,9 @@ if [ -n "$SELECTED_PROVIDER_ID" ]; then elif [ "$SELECTED_PROVIDER_ID" = "openrouter" ]; then echo -e " ${GREEN}⬢${NC} OpenRouter API Key → ${DIM}$SELECTED_MODEL${NC}" echo -e " ${DIM}API: openrouter.ai/api/v1 (OpenAI-compatible)${NC}" + elif [ "$SELECTED_PROVIDER_ID" = "ollama" ]; then + echo -e " ${GREEN}⬢${NC} Local (Ollama) → ${DIM}$SELECTED_MODEL${NC}" + echo -e " ${DIM}No API key required (runs locally via http://localhost:11434)${NC}" else echo -e " ${CYAN}$SELECTED_PROVIDER_ID${NC} → ${DIM}$SELECTED_MODEL${NC}" fi