fix(core): add zero-config local LLM support and fix AgentRunner crash (#3994) and adding docs

This commit is contained in:
vakrahul
2026-02-22 22:59:11 +05:30
parent 9c781ed78e
commit a0d14b8a25
2 changed files with 161 additions and 21 deletions
+43 -21
View File
@@ -788,31 +788,38 @@ class AgentRunner:
extra_headers={"authorization": f"Bearer {api_key}"},
)
else:
# Fall back to environment variable
# First check api_key_env_var from config (set by quickstart)
api_key_env = llm_config.get("api_key_env_var") or self._get_api_key_env_var(
self.model
)
if api_key_env and os.environ.get(api_key_env):
# Local models (e.g. Ollama) don't need an API key
if self._is_local_model(self.model):
self._llm = LiteLLMProvider(
model=self.model,
api_key=os.environ[api_key_env],
api_base=api_base,
)
else:
# Fall back to credential store
api_key = self._get_api_key_from_credential_store()
if api_key:
# Fall back to environment variable
# First check api_key_env_var from config (set by quickstart)
api_key_env = llm_config.get("api_key_env_var") or self._get_api_key_env_var(
self.model
)
if api_key_env and os.environ.get(api_key_env):
self._llm = LiteLLMProvider(
model=self.model, api_key=api_key, api_base=api_base
model=self.model,
api_key=os.environ[api_key_env],
api_base=api_base,
)
# Set env var so downstream code (e.g. cleanup LLM in
# node._extract_json) can also find it
if api_key_env:
os.environ[api_key_env] = api_key
elif api_key_env:
print(f"Warning: {api_key_env} not set. LLM calls will fail.")
print(f"Set it with: export {api_key_env}=your-api-key")
else:
# Fall back to credential store
api_key = self._get_api_key_from_credential_store()
if api_key:
self._llm = LiteLLMProvider(
model=self.model, api_key=api_key, api_base=api_base
)
# Set env var so downstream code (e.g. cleanup LLM in
# node._extract_json) can also find it
if api_key_env:
os.environ[api_key_env] = api_key
elif api_key_env:
print(f"Warning: {api_key_env} not set. LLM calls will fail.")
print(f"Set it with: export {api_key_env}=your-api-key")
# Fail fast if the agent needs an LLM but none was configured
if self._llm is None:
@@ -866,8 +873,8 @@ class AgentRunner:
return "MISTRAL_API_KEY"
elif model_lower.startswith("groq/"):
return "GROQ_API_KEY"
elif model_lower.startswith("ollama/"):
return None # Ollama doesn't need an API key (local)
elif self._is_local_model(model_lower):
return None # Local models don't need an API key
elif model_lower.startswith("azure/"):
return "AZURE_API_KEY"
elif model_lower.startswith("cohere/"):
@@ -907,6 +914,22 @@ class AgentRunner:
except Exception:
return None
@staticmethod
def _is_local_model(model: str) -> bool:
"""Check if a model is a local model that doesn't require an API key.
Local providers like Ollama run on the user's machine and do not
need any authentication credentials.
"""
LOCAL_PREFIXES = (
"ollama/",
"ollama_chat/",
"vllm/",
"lm_studio/",
"llamacpp/",
)
return model.lower().startswith(LOCAL_PREFIXES)
def _setup_agent_runtime(
self, tools: list, tool_executor: Callable | None, accounts_prompt: str = ""
) -> None:
@@ -980,7 +1003,6 @@ class AgentRunner:
checkpoint_config=checkpoint_config,
config=runtime_config,
graph_id=self.graph.id or self.agent_path.name,
accounts_prompt=accounts_prompt,
)
# Pass intro_message through for TUI display
+118
View File
@@ -0,0 +1,118 @@
# Roadmap: First-Class Local LLM Support
Local LLMs (Ollama, vLLM, LM Studio, Llama.cpp) let developers run agents
entirely on their own hardware — no API keys, no cloud costs, full data privacy.
This roadmap tracks the work to make local models first-class citizens in the
Aden Agent Framework.
> [!IMPORTANT]
> Related: [Bug #3994](https://github.com/aden-hive/hive/issues/3994) —
> AgentRunner crashes with local LLMs,
> [Feature #5154](https://github.com/aden-hive/hive/issues/5154) —
> First-Class Local LLM Support.
---
## Supported Local Providers
| Prefix | Provider | Status |
|---|---|---|
| `ollama/` | [Ollama](https://ollama.com) | ✅ Supported |
| `ollama_chat/` | Ollama (chat mode) | ✅ Supported |
| `vllm/` | [vLLM](https://vllm.ai) | ✅ Supported |
| `lm_studio/` | [LM Studio](https://lmstudio.ai) | ✅ Supported |
| `llamacpp/` | [Llama.cpp](https://github.com/ggerganov/llama.cpp) | ✅ Supported |
All of the above are recognized by `AgentRunner._is_local_model()` and
initialize `LiteLLMProvider` **without requiring an API key**.
---
## Phase 1: Core Support ✅
- [x] **Zero-config initialization**`AgentRunner._setup()` detects local
model prefixes and creates `LiteLLMProvider` without an API key
- [x] **`_is_local_model()` helper** — centralized check for all local
provider prefixes, used by both `_setup()` and `_get_api_key_env_var()`
- [x] **No API key warnings**`_get_api_key_env_var()` returns `None` for
local models so no spurious "missing API key" warnings are shown
### Quick Start
```python
from pathlib import Path
from framework.runner import AgentRunner
# Just set the model — no API key, no env vars, no config needed
runner = AgentRunner.load(
agent_path=Path("./my_agent"),
model="ollama/llama3", # or vllm/mistral, lm_studio/phi3, etc.
)
result = await runner.run({"query": "Hello!"})
```
---
## Phase 2: Enhanced Local Experience
- [ ] **Auto-detect local server** — ping `localhost:11434` (Ollama) or
common ports to confirm the service is running before execution
- [ ] **Custom `api_base` from config** — read `api_base` per-provider from
`~/.hive/configuration.json` for non-default ports/hosts
- [ ] **Connection health check** — pre-flight connectivity test with clear
error message ("Ollama is not running — start it with `ollama serve`")
- [ ] **Model availability check** — verify the requested model is pulled
locally (`ollama list`) before attempting completion
---
## Phase 3: Performance & DX
- [ ] **Local model benchmarking** — built-in timing for local inference to
help users compare model speed
- [ ] **GPU/CPU detection** — log available hardware (CUDA, Metal, CPU) to
help users optimize model selection
- [ ] **Model recommendation engine** — suggest the best local model based
on agent complexity and available hardware
- [ ] **Offline mode** — graceful fallback when no internet is available,
using only local models and cached tools
---
## Phase 4: Advanced Local Features
- [ ] **Model management CLI**`hive model pull ollama/llama3`,
`hive model list` for managing local models from the Hive CLI
- [ ] **Hybrid routing** — route simple tasks to local models and complex
tasks to cloud models automatically based on configurable rules
- [ ] **Local model fine-tuning integration** — support for LoRA adapters
and custom fine-tuned local models
- [ ] **Multi-GPU support** — distribute inference across multiple GPUs
for larger local models (vLLM, Llama.cpp)
---
## Adding a New Local Provider
To add support for a new local LLM provider:
1. Add the provider prefix to `LOCAL_PREFIXES` in
[`AgentRunner._is_local_model()`](file:///c:/Users/RAHUL/hive_aden/memo/hive/core/framework/runner/runner.py)
2. That's it — `_setup()` and `_get_api_key_env_var()` both delegate to
`_is_local_model()`, so the new provider will be auto-detected
```python
# framework/runner/runner.py — AgentRunner._is_local_model()
LOCAL_PREFIXES = (
"ollama/",
"ollama_chat/",
"vllm/",
"lm_studio/",
"llamacpp/",
"your_provider/", # ← add here
)
```
> [!TIP]
> The provider must be supported by [LiteLLM](https://docs.litellm.ai/docs/providers)
> for completion calls to work. Check LiteLLM's provider list before adding.