refactor: remove deprecated storage/backend.py (267 lines) (#6849)
* refactor: remove deprecated storage/backend.py (267 lines) Delete the fully deprecated FileStorage class and inline its 5 still-active methods (_validate_key, _load_run_sync, _load_summary_sync, _delete_run_sync, _list_all_runs_sync) directly into ConcurrentStorage. Changes: - Delete core/framework/storage/backend.py (267 lines of no-op/deprecated code) - Inline active read methods into ConcurrentStorage (no new FileStorage dep) - Remove deprecated index operations (get_runs_by_goal, get_runs_by_status, get_runs_by_node, list_all_goals) and their associated locking - Update __init__.py to export ConcurrentStorage instead of FileStorage - Update runtime/core.py to use ConcurrentStorage directly - Fix Runtime.end_run() to call save_run_sync() (sync wrapper) instead of the async save_run(), which was silently dropping the coroutine - Update test_path_traversal_fix.py to test ConcurrentStorage._validate_key() - Clean up test_storage.py — remove all FileStorage test classes, un-skip ConcurrentStorage tests now that it's self-contained - Remove stale FileStorage references from testing/test_storage.py docstring, testing/debug_tool.py docstring, and test_runtime.py skip reasons All 44 tests pass, ruff check and ruff format clean. Fixes #6797 * fix(core): address CodeRabbitAI PR review feedback - Fix critical no-op in ConcurrentStorage._save_run_sync by implementing atomic persistence to uns/{run_id}.json. - Update est_path_traversal_fix.py to test ConcurrentStorage directly and use real file paths for end-to-end validation. - Unskip est_run_saved_on_end and assert actual run file persistence. - Fix debug_tool.py to use load_run_sync() instead of the async load_run(). * fix(core): address round 2 of CodeRabbitAI reviews - Add _validate_key to _save_run_sync and _load_summary_sync to enforce path traversal protections on the lowest level APIs. - Invalidate summary cache and refresh run cache in save_run_sync() to match the async save_run() cache coherence behavior. - Add tests for load_summary and save_run_sync path traversal rejection.
This commit is contained in:
@@ -16,7 +16,7 @@ from typing import Any
|
||||
from framework.observability import set_trace_context
|
||||
from framework.schemas.decision import Decision, DecisionType, Option, Outcome
|
||||
from framework.schemas.run import Run, RunStatus
|
||||
from framework.storage.backend import FileStorage
|
||||
from framework.storage.concurrent import ConcurrentStorage
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -62,7 +62,7 @@ class Runtime:
|
||||
logger.warning(f"Storage path does not exist, creating: {path}")
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
self.storage = FileStorage(storage_path)
|
||||
self.storage = ConcurrentStorage(storage_path)
|
||||
self._current_run: Run | None = None
|
||||
self._current_node: str = "unknown"
|
||||
|
||||
@@ -132,8 +132,8 @@ class Runtime:
|
||||
self._current_run.output_data = output_data or {}
|
||||
self._current_run.complete(status, narrative)
|
||||
|
||||
# Save to storage
|
||||
self.storage.save_run(self._current_run)
|
||||
# Save to storage (sync — Runtime methods are not async)
|
||||
self.storage.save_run_sync(self._current_run)
|
||||
self._current_run = None
|
||||
|
||||
def set_node(self, node_id: str) -> None:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
"""Storage backends for runtime data."""
|
||||
|
||||
from framework.storage.backend import FileStorage
|
||||
from framework.storage.concurrent import ConcurrentStorage
|
||||
from framework.storage.conversation_store import FileConversationStore
|
||||
|
||||
__all__ = ["FileStorage", "FileConversationStore"]
|
||||
__all__ = ["ConcurrentStorage", "FileConversationStore"]
|
||||
|
||||
@@ -1,266 +0,0 @@
|
||||
"""
|
||||
File-based storage backend for runtime data.
|
||||
|
||||
DEPRECATED: This storage backend is deprecated for new sessions.
|
||||
New sessions use unified storage at sessions/{session_id}/state.json.
|
||||
This module is kept for backward compatibility with old run data only.
|
||||
|
||||
Uses Pydantic's built-in serialization.
|
||||
"""
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
from framework.schemas.run import Run, RunStatus, RunSummary
|
||||
from framework.utils.io import atomic_write
|
||||
|
||||
|
||||
class FileStorage:
|
||||
"""
|
||||
DEPRECATED: File-based storage for old runs only.
|
||||
|
||||
New sessions use unified storage at sessions/{session_id}/state.json.
|
||||
This class is kept for backward compatibility with old run data.
|
||||
|
||||
Old directory structure (deprecated):
|
||||
{base_path}/
|
||||
runs/ # DEPRECATED - no longer written
|
||||
{run_id}.json
|
||||
summaries/ # DEPRECATED - no longer written
|
||||
{run_id}.json
|
||||
indexes/ # DEPRECATED - no longer written or read
|
||||
by_goal/
|
||||
{goal_id}.json
|
||||
by_status/
|
||||
{status}.json
|
||||
by_node/
|
||||
{node_id}.json
|
||||
"""
|
||||
|
||||
def __init__(self, base_path: str | Path):
|
||||
self.base_path = Path(base_path)
|
||||
self._ensure_dirs()
|
||||
|
||||
def _ensure_dirs(self) -> None:
|
||||
"""Create directory structure if it doesn't exist.
|
||||
|
||||
DEPRECATED: All directories (runs/, summaries/, indexes/) are deprecated.
|
||||
New sessions use unified storage at sessions/{session_id}/state.json.
|
||||
This method is now a no-op. Tests should not rely on this.
|
||||
"""
|
||||
# No-op: do not create deprecated directories
|
||||
pass
|
||||
|
||||
def _validate_key(self, key: str) -> None:
|
||||
"""
|
||||
Validate key to prevent path traversal attacks.
|
||||
|
||||
Args:
|
||||
key: The key to validate
|
||||
|
||||
Raises:
|
||||
ValueError: If key contains path traversal or dangerous patterns
|
||||
"""
|
||||
if not key or key.strip() == "":
|
||||
raise ValueError("Key cannot be empty")
|
||||
|
||||
# Block path separators
|
||||
if "/" in key or "\\" in key:
|
||||
raise ValueError(f"Invalid key format: path separators not allowed in '{key}'")
|
||||
|
||||
# Block parent directory references
|
||||
if ".." in key or key.startswith("."):
|
||||
raise ValueError(f"Invalid key format: path traversal detected in '{key}'")
|
||||
|
||||
# Block absolute paths
|
||||
if key.startswith("/") or (len(key) > 1 and key[1] == ":"):
|
||||
raise ValueError(f"Invalid key format: absolute paths not allowed in '{key}'")
|
||||
|
||||
# Block null bytes (Unix path injection)
|
||||
if "\x00" in key:
|
||||
raise ValueError("Invalid key format: null bytes not allowed")
|
||||
|
||||
# Block other dangerous special characters
|
||||
dangerous_chars = {"<", ">", "|", "&", "$", "`", "'", '"'}
|
||||
if any(char in key for char in dangerous_chars):
|
||||
raise ValueError(f"Invalid key format: contains dangerous characters in '{key}'")
|
||||
|
||||
# === RUN OPERATIONS ===
|
||||
|
||||
def save_run(self, run: Run) -> None:
|
||||
"""Save a run to storage.
|
||||
|
||||
DEPRECATED: This method is now a no-op.
|
||||
New sessions use unified storage at sessions/{session_id}/state.json.
|
||||
Tests should not rely on FileStorage - use unified session storage instead.
|
||||
"""
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
"FileStorage.save_run() is deprecated. "
|
||||
"New sessions use unified storage at sessions/{session_id}/state.json. "
|
||||
"This write has been skipped.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
# No-op: do not write to deprecated locations
|
||||
|
||||
def load_run(self, run_id: str) -> Run | None:
|
||||
"""Load a run from storage."""
|
||||
run_path = self.base_path / "runs" / f"{run_id}.json"
|
||||
if not run_path.exists():
|
||||
return None
|
||||
with open(run_path, encoding="utf-8") as f:
|
||||
return Run.model_validate_json(f.read())
|
||||
|
||||
def load_summary(self, run_id: str) -> RunSummary | None:
|
||||
"""Load just the summary (faster than full run)."""
|
||||
summary_path = self.base_path / "summaries" / f"{run_id}.json"
|
||||
if not summary_path.exists():
|
||||
# Fall back to computing from full run
|
||||
run = self.load_run(run_id)
|
||||
if run:
|
||||
return RunSummary.from_run(run)
|
||||
return None
|
||||
|
||||
with open(summary_path, encoding="utf-8") as f:
|
||||
return RunSummary.model_validate_json(f.read())
|
||||
|
||||
def delete_run(self, run_id: str) -> bool:
|
||||
"""Delete a run from storage."""
|
||||
run_path = self.base_path / "runs" / f"{run_id}.json"
|
||||
summary_path = self.base_path / "summaries" / f"{run_id}.json"
|
||||
|
||||
if not run_path.exists():
|
||||
return False
|
||||
|
||||
# Load run to get index keys
|
||||
run = self.load_run(run_id)
|
||||
if run:
|
||||
self._remove_from_index("by_goal", run.goal_id, run_id)
|
||||
self._remove_from_index("by_status", run.status.value, run_id)
|
||||
for node_id in run.metrics.nodes_executed:
|
||||
self._remove_from_index("by_node", node_id, run_id)
|
||||
|
||||
run_path.unlink()
|
||||
if summary_path.exists():
|
||||
summary_path.unlink()
|
||||
|
||||
return True
|
||||
|
||||
# === QUERY OPERATIONS ===
|
||||
|
||||
def get_runs_by_goal(self, goal_id: str) -> list[str]:
|
||||
"""Get all run IDs for a goal.
|
||||
|
||||
DEPRECATED: Indexes are deprecated. For new sessions, scan sessions/*/state.json instead.
|
||||
This method only returns old run IDs from deprecated indexes.
|
||||
"""
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
"FileStorage.get_runs_by_goal() is deprecated. "
|
||||
"For new sessions, scan sessions/*/state.json instead.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
return self._get_index("by_goal", goal_id)
|
||||
|
||||
def get_runs_by_status(self, status: str | RunStatus) -> list[str]:
|
||||
"""Get all run IDs with a status.
|
||||
|
||||
DEPRECATED: Indexes are deprecated. For new sessions, scan sessions/*/state.json instead.
|
||||
This method only returns old run IDs from deprecated indexes.
|
||||
"""
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
"FileStorage.get_runs_by_status() is deprecated. "
|
||||
"For new sessions, scan sessions/*/state.json instead.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
if isinstance(status, RunStatus):
|
||||
status = status.value
|
||||
return self._get_index("by_status", status)
|
||||
|
||||
def get_runs_by_node(self, node_id: str) -> list[str]:
|
||||
"""Get all run IDs that executed a node.
|
||||
|
||||
DEPRECATED: Indexes are deprecated. For new sessions, scan sessions/*/state.json instead.
|
||||
This method only returns old run IDs from deprecated indexes.
|
||||
"""
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
"FileStorage.get_runs_by_node() is deprecated. "
|
||||
"For new sessions, scan sessions/*/state.json instead.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
return self._get_index("by_node", node_id)
|
||||
|
||||
def list_all_runs(self) -> list[str]:
|
||||
"""List all run IDs."""
|
||||
runs_dir = self.base_path / "runs"
|
||||
return [f.stem for f in runs_dir.glob("*.json")]
|
||||
|
||||
def list_all_goals(self) -> list[str]:
|
||||
"""List all goal IDs that have runs.
|
||||
|
||||
DEPRECATED: Indexes are deprecated. For new sessions, scan sessions/*/state.json instead.
|
||||
This method only returns goals from old run IDs in deprecated indexes.
|
||||
"""
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
"FileStorage.list_all_goals() is deprecated. "
|
||||
"For new sessions, scan sessions/*/state.json instead.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
goals_dir = self.base_path / "indexes" / "by_goal"
|
||||
if not goals_dir.exists():
|
||||
return []
|
||||
return [f.stem for f in goals_dir.glob("*.json")]
|
||||
|
||||
# === INDEX OPERATIONS ===
|
||||
|
||||
def _get_index(self, index_type: str, key: str) -> list[str]:
|
||||
"""Get values from an index."""
|
||||
self._validate_key(key) # Prevent path traversal
|
||||
index_path = self.base_path / "indexes" / index_type / f"{key}.json"
|
||||
if not index_path.exists():
|
||||
return []
|
||||
with open(index_path, encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
|
||||
def _add_to_index(self, index_type: str, key: str, value: str) -> None:
|
||||
"""Add a value to an index."""
|
||||
self._validate_key(key) # Prevent path traversal
|
||||
index_path = self.base_path / "indexes" / index_type / f"{key}.json"
|
||||
values = self._get_index(index_type, key) # Already validated in _get_index
|
||||
if value not in values:
|
||||
values.append(value)
|
||||
with atomic_write(index_path) as f:
|
||||
json.dump(values, f, indent=2)
|
||||
|
||||
def _remove_from_index(self, index_type: str, key: str, value: str) -> None:
|
||||
"""Remove a value from an index."""
|
||||
self._validate_key(key) # Prevent path traversal
|
||||
index_path = self.base_path / "indexes" / index_type / f"{key}.json"
|
||||
values = self._get_index(index_type, key) # Already validated in _get_index
|
||||
if value in values:
|
||||
values.remove(value)
|
||||
with atomic_write(index_path) as f:
|
||||
json.dump(values, f, indent=2)
|
||||
|
||||
# === UTILITY ===
|
||||
|
||||
def get_stats(self) -> dict:
|
||||
"""Get storage statistics."""
|
||||
return {
|
||||
"total_runs": len(self.list_all_runs()),
|
||||
"total_goals": len(self.list_all_goals()),
|
||||
"storage_path": str(self.base_path),
|
||||
}
|
||||
@@ -1,7 +1,7 @@
|
||||
"""
|
||||
Concurrent Storage - Thread-safe storage backend with file locking.
|
||||
|
||||
Wraps FileStorage with:
|
||||
Provides:
|
||||
- Async file locking for atomic writes
|
||||
- Write batching for performance
|
||||
- Read caching for concurrent access
|
||||
@@ -16,8 +16,8 @@ from pathlib import Path
|
||||
from typing import Any
|
||||
from weakref import WeakValueDictionary
|
||||
|
||||
from framework.schemas.run import Run, RunStatus, RunSummary
|
||||
from framework.storage.backend import FileStorage
|
||||
from framework.schemas.run import Run, RunSummary
|
||||
from framework.utils.io import atomic_write
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -41,7 +41,6 @@ class ConcurrentStorage:
|
||||
- Async file locking to prevent concurrent write corruption
|
||||
- Write batching to reduce I/O overhead
|
||||
- Read caching for frequently accessed data
|
||||
- Compatible API with FileStorage
|
||||
|
||||
Example:
|
||||
storage = ConcurrentStorage("/path/to/storage")
|
||||
@@ -75,7 +74,6 @@ class ConcurrentStorage:
|
||||
max_locks: Maximum number of active file locks to track strongly
|
||||
"""
|
||||
self.base_path = Path(base_path)
|
||||
self._base_storage = FileStorage(base_path)
|
||||
|
||||
# Caching
|
||||
self._cache: dict[str, CacheEntry] = {}
|
||||
@@ -157,6 +155,93 @@ class ConcurrentStorage:
|
||||
|
||||
return lock
|
||||
|
||||
# === KEY VALIDATION ===
|
||||
|
||||
@staticmethod
|
||||
def _validate_key(key: str) -> None:
|
||||
"""Validate key to prevent path traversal attacks.
|
||||
|
||||
Args:
|
||||
key: The key to validate
|
||||
|
||||
Raises:
|
||||
ValueError: If key contains path traversal or dangerous patterns
|
||||
"""
|
||||
if not key or key.strip() == "":
|
||||
raise ValueError("Key cannot be empty")
|
||||
|
||||
if "/" in key or "\\" in key:
|
||||
raise ValueError(f"Invalid key format: path separators not allowed in '{key}'")
|
||||
|
||||
if ".." in key or key.startswith("."):
|
||||
raise ValueError(f"Invalid key format: path traversal detected in '{key}'")
|
||||
|
||||
if key.startswith("/") or (len(key) > 1 and key[1] == ":"):
|
||||
raise ValueError(f"Invalid key format: absolute paths not allowed in '{key}'")
|
||||
|
||||
if "\x00" in key:
|
||||
raise ValueError("Invalid key format: null bytes not allowed")
|
||||
|
||||
dangerous_chars = {"<", ">", "|", "&", "$", "`", "'", '"'}
|
||||
if any(char in key for char in dangerous_chars):
|
||||
raise ValueError(f"Invalid key format: contains dangerous characters in '{key}'")
|
||||
|
||||
# === FILE OPERATIONS (formerly in FileStorage) ===
|
||||
|
||||
def _save_run_sync(self, run: Run) -> None:
|
||||
"""Persist a run to disk as ``runs/{run_id}.json``.
|
||||
|
||||
Uses an atomic write (temp-file + rename) so a mid-write crash
|
||||
never leaves a partially written file on disk.
|
||||
"""
|
||||
self._validate_key(run.id)
|
||||
runs_dir = self.base_path / "runs"
|
||||
runs_dir.mkdir(parents=True, exist_ok=True)
|
||||
run_path = runs_dir / f"{run.id}.json"
|
||||
with atomic_write(run_path) as f:
|
||||
f.write(run.model_dump_json(indent=2))
|
||||
|
||||
def _load_run_sync(self, run_id: str) -> Run | None:
|
||||
"""Load a run from storage."""
|
||||
run_path = self.base_path / "runs" / f"{run_id}.json"
|
||||
if not run_path.exists():
|
||||
return None
|
||||
with open(run_path, encoding="utf-8") as f:
|
||||
return Run.model_validate_json(f.read())
|
||||
|
||||
def _load_summary_sync(self, run_id: str) -> RunSummary | None:
|
||||
"""Load just the summary (faster than full run)."""
|
||||
self._validate_key(run_id)
|
||||
summary_path = self.base_path / "summaries" / f"{run_id}.json"
|
||||
if not summary_path.exists():
|
||||
run = self._load_run_sync(run_id)
|
||||
if run:
|
||||
return RunSummary.from_run(run)
|
||||
return None
|
||||
with open(summary_path, encoding="utf-8") as f:
|
||||
return RunSummary.model_validate_json(f.read())
|
||||
|
||||
def _delete_run_sync(self, run_id: str) -> bool:
|
||||
"""Delete a run from storage."""
|
||||
run_path = self.base_path / "runs" / f"{run_id}.json"
|
||||
summary_path = self.base_path / "summaries" / f"{run_id}.json"
|
||||
|
||||
if not run_path.exists():
|
||||
return False
|
||||
|
||||
run_path.unlink()
|
||||
if summary_path.exists():
|
||||
summary_path.unlink()
|
||||
|
||||
return True
|
||||
|
||||
def _list_all_runs_sync(self) -> list[str]:
|
||||
"""List all run IDs."""
|
||||
runs_dir = self.base_path / "runs"
|
||||
if not runs_dir.exists():
|
||||
return []
|
||||
return [f.stem for f in runs_dir.glob("*.json")]
|
||||
|
||||
# === RUN OPERATIONS (Async, Thread-Safe) ===
|
||||
|
||||
async def save_run(self, run: Run, immediate: bool = False) -> None:
|
||||
@@ -180,40 +265,17 @@ class ConcurrentStorage:
|
||||
await self._write_queue.put(("run", run))
|
||||
|
||||
async def _save_run_locked(self, run: Run) -> None:
|
||||
"""Save a run with file locking, including index locks."""
|
||||
"""Save a run with file locking."""
|
||||
lock_key = f"run:{run.id}"
|
||||
|
||||
# Helper to get lock
|
||||
async def get_lock(k):
|
||||
return await self._get_lock(k)
|
||||
|
||||
# Acquire main lock
|
||||
run_lock = await get_lock(lock_key)
|
||||
run_lock = await self._get_lock(lock_key)
|
||||
|
||||
async with run_lock:
|
||||
# 2. Acquire index locks
|
||||
index_lock_keys = [
|
||||
f"index:by_goal:{run.goal_id}",
|
||||
f"index:by_status:{run.status.value}",
|
||||
]
|
||||
for node_id in run.metrics.nodes_executed:
|
||||
index_lock_keys.append(f"index:by_node:{node_id}")
|
||||
|
||||
# Collect index locks
|
||||
index_locks = [await get_lock(k) for k in index_lock_keys]
|
||||
|
||||
# Recursive acquisition
|
||||
async def with_locks(locks, callback):
|
||||
if not locks:
|
||||
return await callback()
|
||||
async with locks[0]:
|
||||
return await with_locks(locks[1:], callback)
|
||||
|
||||
async def perform_save():
|
||||
loop = asyncio.get_event_loop()
|
||||
await loop.run_in_executor(None, self._base_storage.save_run, run)
|
||||
await loop.run_in_executor(None, self._save_run_sync, run)
|
||||
|
||||
await with_locks(index_locks, perform_save)
|
||||
await perform_save()
|
||||
|
||||
async def load_run(self, run_id: str, use_cache: bool = True) -> Run | None:
|
||||
"""
|
||||
@@ -225,7 +287,11 @@ class ConcurrentStorage:
|
||||
|
||||
Returns:
|
||||
Run object or None if not found
|
||||
|
||||
Raises:
|
||||
ValueError: If run_id contains path traversal characters.
|
||||
"""
|
||||
self._validate_key(run_id)
|
||||
if use_cache:
|
||||
cache_key = f"run:{run_id}"
|
||||
cached = self._cache.get(cache_key)
|
||||
@@ -240,7 +306,7 @@ class ConcurrentStorage:
|
||||
lock_key = f"run:{run_id}"
|
||||
async with await self._get_lock(lock_key):
|
||||
loop = asyncio.get_event_loop()
|
||||
run = await loop.run_in_executor(None, self._base_storage.load_run, run_id)
|
||||
run = await loop.run_in_executor(None, self._load_run_sync, run_id)
|
||||
|
||||
# Update cache
|
||||
if run:
|
||||
@@ -249,7 +315,12 @@ class ConcurrentStorage:
|
||||
return run
|
||||
|
||||
async def load_summary(self, run_id: str, use_cache: bool = True) -> RunSummary | None:
|
||||
"""Load just the summary (faster than full run)."""
|
||||
"""Load just the summary (faster than full run).
|
||||
|
||||
Raises:
|
||||
ValueError: If run_id contains path traversal characters.
|
||||
"""
|
||||
self._validate_key(run_id)
|
||||
cache_key = f"summary:{run_id}"
|
||||
|
||||
# Check cache
|
||||
@@ -262,7 +333,7 @@ class ConcurrentStorage:
|
||||
lock_key = f"summary:{run_id}"
|
||||
async with await self._get_lock(lock_key):
|
||||
loop = asyncio.get_event_loop()
|
||||
summary = await loop.run_in_executor(None, self._base_storage.load_summary, run_id)
|
||||
summary = await loop.run_in_executor(None, self._load_summary_sync, run_id)
|
||||
|
||||
# Update cache
|
||||
if summary:
|
||||
@@ -271,11 +342,16 @@ class ConcurrentStorage:
|
||||
return summary
|
||||
|
||||
async def delete_run(self, run_id: str) -> bool:
|
||||
"""Delete a run from storage."""
|
||||
"""Delete a run from storage.
|
||||
|
||||
Raises:
|
||||
ValueError: If run_id contains path traversal characters.
|
||||
"""
|
||||
self._validate_key(run_id)
|
||||
lock_key = f"run:{run_id}"
|
||||
async with await self._get_lock(lock_key):
|
||||
loop = asyncio.get_event_loop()
|
||||
result = await loop.run_in_executor(None, self._base_storage.delete_run, run_id)
|
||||
result = await loop.run_in_executor(None, self._delete_run_sync, run_id)
|
||||
|
||||
# Clear cache
|
||||
self._cache.pop(f"run:{run_id}", None)
|
||||
@@ -283,37 +359,10 @@ class ConcurrentStorage:
|
||||
|
||||
return result
|
||||
|
||||
# === QUERY OPERATIONS (Async, with Locking) ===
|
||||
|
||||
async def get_runs_by_goal(self, goal_id: str) -> list[str]:
|
||||
"""Get all run IDs for a goal."""
|
||||
async with await self._get_lock(f"index:by_goal:{goal_id}"):
|
||||
loop = asyncio.get_event_loop()
|
||||
return await loop.run_in_executor(None, self._base_storage.get_runs_by_goal, goal_id)
|
||||
|
||||
async def get_runs_by_status(self, status: str | RunStatus) -> list[str]:
|
||||
"""Get all run IDs with a status."""
|
||||
if isinstance(status, RunStatus):
|
||||
status = status.value
|
||||
async with await self._get_lock(f"index:by_status:{status}"):
|
||||
loop = asyncio.get_event_loop()
|
||||
return await loop.run_in_executor(None, self._base_storage.get_runs_by_status, status)
|
||||
|
||||
async def get_runs_by_node(self, node_id: str) -> list[str]:
|
||||
"""Get all run IDs that executed a node."""
|
||||
async with await self._get_lock(f"index:by_node:{node_id}"):
|
||||
loop = asyncio.get_event_loop()
|
||||
return await loop.run_in_executor(None, self._base_storage.get_runs_by_node, node_id)
|
||||
|
||||
async def list_all_runs(self) -> list[str]:
|
||||
"""List all run IDs."""
|
||||
loop = asyncio.get_event_loop()
|
||||
return await loop.run_in_executor(None, self._base_storage.list_all_runs)
|
||||
|
||||
async def list_all_goals(self) -> list[str]:
|
||||
"""List all goal IDs that have runs."""
|
||||
loop = asyncio.get_event_loop()
|
||||
return await loop.run_in_executor(None, self._base_storage.list_all_goals)
|
||||
return await loop.run_in_executor(None, self._list_all_runs_sync)
|
||||
|
||||
# === BATCH OPERATIONS ===
|
||||
|
||||
@@ -411,10 +460,11 @@ class ConcurrentStorage:
|
||||
async def get_stats(self) -> dict:
|
||||
"""Get storage statistics."""
|
||||
loop = asyncio.get_event_loop()
|
||||
base_stats = await loop.run_in_executor(None, self._base_storage.get_stats)
|
||||
all_runs = await loop.run_in_executor(None, self._list_all_runs_sync)
|
||||
|
||||
return {
|
||||
**base_stats,
|
||||
"total_runs": len(all_runs),
|
||||
"storage_path": str(self.base_path),
|
||||
"cache": self.get_cache_stats(),
|
||||
"pending_writes": self._write_queue.qsize(),
|
||||
"running": self._running,
|
||||
@@ -423,10 +473,21 @@ class ConcurrentStorage:
|
||||
# === SYNC API (for backward compatibility) ===
|
||||
|
||||
def save_run_sync(self, run: Run) -> None:
|
||||
"""Synchronous save (uses base storage directly with lock)."""
|
||||
# Use threading lock for sync operations
|
||||
self._base_storage.save_run(run)
|
||||
"""Synchronous save — persists a run to disk immediately."""
|
||||
self._validate_key(run.id)
|
||||
# Invalidate summary cache since the run data is changing
|
||||
self._cache.pop(f"summary:{run.id}", None)
|
||||
|
||||
self._save_run_sync(run)
|
||||
|
||||
# Refresh run cache
|
||||
self._cache[f"run:{run.id}"] = CacheEntry(run, time.time())
|
||||
|
||||
def load_run_sync(self, run_id: str) -> Run | None:
|
||||
"""Synchronous load (uses base storage directly)."""
|
||||
return self._base_storage.load_run(run_id)
|
||||
"""Synchronous load.
|
||||
|
||||
Raises:
|
||||
ValueError: If run_id contains path traversal characters.
|
||||
"""
|
||||
self._validate_key(run_id)
|
||||
return self._load_run_sync(run_id)
|
||||
|
||||
@@ -73,7 +73,9 @@ class DebugTool:
|
||||
|
||||
Args:
|
||||
test_storage: Storage for test and result data
|
||||
runtime_storage: Optional FileStorage for Runtime data
|
||||
runtime_storage: Optional storage backend for Runtime data.
|
||||
Must expose a synchronous ``load_run_sync(run_id)`` method
|
||||
(e.g. ``ConcurrentStorage``).
|
||||
"""
|
||||
self.test_storage = test_storage
|
||||
self.runtime_storage = runtime_storage
|
||||
@@ -233,7 +235,9 @@ class DebugTool:
|
||||
return {}
|
||||
|
||||
try:
|
||||
run = self.runtime_storage.load_run(run_id)
|
||||
# Use the synchronous loader — _get_runtime_data is not async
|
||||
# and ConcurrentStorage.load_run() is a coroutine.
|
||||
run = self.runtime_storage.load_run_sync(run_id)
|
||||
if not run:
|
||||
return {"error": f"Run {run_id} not found"}
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""
|
||||
File-based storage backend for test data.
|
||||
|
||||
Follows the same pattern as framework/storage/backend.py (FileStorage),
|
||||
Follows the same pattern as framework/storage/concurrent.py (ConcurrentStorage),
|
||||
storing tests as JSON files with indexes for efficient querying.
|
||||
"""
|
||||
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
"""
|
||||
Tests for path traversal vulnerability fix in FileStorage.
|
||||
Tests for path traversal vulnerability protection in ConcurrentStorage.
|
||||
|
||||
Verifies that the _validate_key() method properly blocks path traversal attempts.
|
||||
Verifies that the _validate_key() method properly blocks path traversal
|
||||
attempts and that the public storage API enforces these checks end-to-end.
|
||||
"""
|
||||
|
||||
import tempfile
|
||||
@@ -9,23 +10,22 @@ from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from framework.storage.backend import FileStorage
|
||||
from framework.storage.concurrent import ConcurrentStorage
|
||||
|
||||
|
||||
class TestPathTraversalProtection:
|
||||
"""Tests for path traversal vulnerability protection."""
|
||||
"""Tests for path traversal vulnerability protection in ConcurrentStorage."""
|
||||
|
||||
@pytest.fixture
|
||||
def storage(self):
|
||||
"""Create a temporary storage instance for testing."""
|
||||
"""Create a temporary ConcurrentStorage instance for testing."""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
yield FileStorage(tmpdir)
|
||||
yield ConcurrentStorage(tmpdir)
|
||||
|
||||
# === VALID KEYS (should pass validation) ===
|
||||
|
||||
def test_valid_alphanumeric_key(self, storage):
|
||||
"""Alphanumeric keys should be allowed."""
|
||||
# Should not raise
|
||||
storage._validate_key("goal_123")
|
||||
storage._validate_key("run_abc_def")
|
||||
storage._validate_key("status_completed")
|
||||
@@ -40,7 +40,6 @@ class TestPathTraversalProtection:
|
||||
|
||||
def test_blocks_parent_directory_traversal(self, storage):
|
||||
"""Block .. path traversal attempts."""
|
||||
# These all have path separators which are blocked first
|
||||
with pytest.raises(ValueError):
|
||||
storage._validate_key("../../../etc/passwd")
|
||||
|
||||
@@ -55,13 +54,12 @@ class TestPathTraversalProtection:
|
||||
with pytest.raises(ValueError, match="path traversal detected"):
|
||||
storage._validate_key(".env")
|
||||
|
||||
# This also has path separator which is caught first
|
||||
# Also has a path separator which is caught first
|
||||
with pytest.raises(ValueError):
|
||||
storage._validate_key(".ssh/id_rsa")
|
||||
|
||||
def test_blocks_absolute_paths_unix(self, storage):
|
||||
"""Block absolute paths (Unix)."""
|
||||
# These have path separators which are blocked first
|
||||
with pytest.raises(ValueError):
|
||||
storage._validate_key("/etc/passwd")
|
||||
|
||||
@@ -70,7 +68,6 @@ class TestPathTraversalProtection:
|
||||
|
||||
def test_blocks_absolute_paths_windows(self, storage):
|
||||
"""Block absolute paths (Windows)."""
|
||||
# These have path separators which are blocked first
|
||||
with pytest.raises(ValueError):
|
||||
storage._validate_key("C:\\Windows\\System32")
|
||||
|
||||
@@ -115,68 +112,76 @@ class TestPathTraversalProtection:
|
||||
with pytest.raises(ValueError, match="empty"):
|
||||
storage._validate_key(" ")
|
||||
|
||||
# === END-TO-END TESTS ===
|
||||
# === END-TO-END TESTS (public API enforces validation) ===
|
||||
|
||||
def test_get_runs_by_goal_blocks_traversal(self, storage):
|
||||
"""get_runs_by_goal() should block path traversal."""
|
||||
@pytest.mark.asyncio
|
||||
async def test_load_run_blocks_traversal(self, storage):
|
||||
"""load_run() must reject path traversal in the run_id."""
|
||||
with pytest.raises(ValueError):
|
||||
storage.get_runs_by_goal("../../../.env")
|
||||
await storage.load_run("../../../.env")
|
||||
|
||||
def test_get_runs_by_node_blocks_traversal(self, storage):
|
||||
"""get_runs_by_node() should block path traversal."""
|
||||
@pytest.mark.asyncio
|
||||
async def test_load_run_valid_id_returns_none(self, storage):
|
||||
"""A valid but nonexistent run_id returns None, not an error."""
|
||||
result = await storage.load_run("legitimate_run_id", use_cache=False)
|
||||
assert result is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_delete_run_blocks_traversal(self, storage):
|
||||
"""delete_run() must reject path traversal in the run_id."""
|
||||
with pytest.raises(ValueError):
|
||||
storage.get_runs_by_node("/etc/passwd")
|
||||
await storage.delete_run("../etc/passwd")
|
||||
|
||||
def test_get_runs_by_status_blocks_traversal(self, storage):
|
||||
"""get_runs_by_status() should block path traversal."""
|
||||
@pytest.mark.asyncio
|
||||
async def test_load_summary_blocks_traversal(self, storage):
|
||||
"""load_summary() must reject path traversal in the run_id."""
|
||||
with pytest.raises(ValueError):
|
||||
storage.get_runs_by_status("..\\..\\windows\\system32")
|
||||
await storage.load_summary("../../../.env")
|
||||
|
||||
def test_valid_queries_still_work(self, storage):
|
||||
"""Valid queries should work after fix."""
|
||||
# These should return empty list, not raise errors
|
||||
result = storage.get_runs_by_goal("legitimate_goal")
|
||||
assert result == []
|
||||
|
||||
result = storage.get_runs_by_node("legitimate_node")
|
||||
assert result == []
|
||||
|
||||
result = storage.get_runs_by_status("completed")
|
||||
assert result == []
|
||||
|
||||
# === REAL-WORLD ATTACK SCENARIOS ===
|
||||
|
||||
def test_blocks_env_file_escape(self, storage):
|
||||
"""Block attempts to access .env files."""
|
||||
def test_load_run_sync_blocks_traversal(self, storage):
|
||||
"""load_run_sync() must reject path traversal in the run_id."""
|
||||
with pytest.raises(ValueError):
|
||||
storage.get_runs_by_goal("../../../.env")
|
||||
storage.load_run_sync("../../../.env")
|
||||
|
||||
def test_blocks_config_file_escape(self, storage):
|
||||
"""Block attempts to access config files."""
|
||||
with pytest.raises(ValueError):
|
||||
storage.get_runs_by_goal("../../../../etc/aden/database.yaml")
|
||||
def test_save_run_sync_blocks_traversal(self, storage):
|
||||
"""save_run_sync() must reject path traversal in the run_id."""
|
||||
from framework.schemas.run import Run
|
||||
|
||||
def test_blocks_web_shell_creation(self, storage):
|
||||
"""Block attempts to create web shells."""
|
||||
run = Run(id="../../../.env", goal_id="test", goal_description="", input_data={})
|
||||
with pytest.raises(ValueError):
|
||||
storage._add_to_index("by_goal", "../../var/www/html/shell", "malicious_code")
|
||||
storage.save_run_sync(run)
|
||||
|
||||
def test_blocks_cron_injection(self, storage):
|
||||
"""Block attempts to create cron jobs."""
|
||||
with pytest.raises(ValueError):
|
||||
storage._add_to_index("by_node", "../../../etc/cron.d/backdoor", "reverse_shell")
|
||||
def test_load_run_sync_valid_id_returns_none(self, storage):
|
||||
"""load_run_sync with a legitimate nonexistent ID returns None."""
|
||||
result = storage.load_run_sync("legitimate_run_id")
|
||||
assert result is None
|
||||
|
||||
def test_blocks_sudoers_modification(self, storage):
|
||||
"""Block attempts to modify sudoers file."""
|
||||
# === REAL-WORLD ATTACK SCENARIOS (end-to-end) ===
|
||||
|
||||
def test_blocks_env_file_escape_via_load_sync(self, storage):
|
||||
"""Block attempts to read .env files via load_run_sync."""
|
||||
with pytest.raises(ValueError):
|
||||
storage._add_to_index("by_status", "../../../../etc/sudoers", "ALL=(ALL) NOPASSWD:ALL")
|
||||
storage.load_run_sync("../../../.env")
|
||||
|
||||
def test_blocks_config_file_escape_via_load_sync(self, storage):
|
||||
"""Block attempts to access config files via load_run_sync."""
|
||||
with pytest.raises(ValueError):
|
||||
storage.load_run_sync("../../../../etc/aden/database.yaml")
|
||||
|
||||
def test_blocks_arbitrary_write_via_save_sync(self, storage):
|
||||
"""Block attempts to write arbitrary files via save_run_sync."""
|
||||
from framework.schemas.run import Run
|
||||
|
||||
run = Run(id="../../var/www/html/shell", goal_id="test", goal_description="", input_data={})
|
||||
with pytest.raises(ValueError):
|
||||
storage.save_run_sync(run)
|
||||
|
||||
|
||||
class TestPathTraversalWithActualFiles:
|
||||
"""Test path traversal protection with actual file operations."""
|
||||
|
||||
def test_cannot_escape_storage_directory(self):
|
||||
"""Verify that even with path traversal, we can't escape storage dir."""
|
||||
"""Verify that path traversal is caught before any filesystem access."""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir_path = Path(tmpdir)
|
||||
storage_dir = tmpdir_path / "storage"
|
||||
@@ -186,31 +191,38 @@ class TestPathTraversalWithActualFiles:
|
||||
secret_file = tmpdir_path / "secret.txt"
|
||||
secret_file.write_text("SENSITIVE_DATA", encoding="utf-8")
|
||||
|
||||
storage = FileStorage(storage_dir)
|
||||
storage = ConcurrentStorage(storage_dir)
|
||||
|
||||
# Attempt to read the secret file via path traversal
|
||||
# Attempt to read the secret file via path traversal — must raise
|
||||
with pytest.raises(ValueError):
|
||||
storage.get_runs_by_goal("../secret")
|
||||
storage.load_run_sync("../secret")
|
||||
|
||||
# Verify the secret file was not accessed (still contains original data)
|
||||
# Verify the secret file was not accessed
|
||||
assert secret_file.read_text(encoding="utf-8") == "SENSITIVE_DATA"
|
||||
|
||||
def test_cannot_write_outside_storage(self):
|
||||
"""Verify that we can't write files outside storage directory."""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir_path = Path(tmpdir)
|
||||
storage_dir = tmpdir_path / "storage"
|
||||
storage_dir.mkdir()
|
||||
def test_save_and_load_roundtrip(self, tmp_path):
|
||||
"""Verify save_run_sync/load_run_sync roundtrip works correctly."""
|
||||
from framework.schemas.run import Run, RunStatus
|
||||
|
||||
storage = FileStorage(storage_dir)
|
||||
storage = ConcurrentStorage(tmp_path)
|
||||
run = Run(
|
||||
id="run_test_123",
|
||||
goal_id="goal_abc",
|
||||
goal_description="Integration test",
|
||||
input_data={},
|
||||
)
|
||||
run.complete(RunStatus.COMPLETED, "done")
|
||||
|
||||
# Attempt to write outside storage directory
|
||||
with pytest.raises(ValueError):
|
||||
storage._add_to_index("by_goal", "../../malicious", "payload")
|
||||
storage.save_run_sync(run)
|
||||
|
||||
# Verify no file was created outside storage
|
||||
malicious_file = tmpdir_path / "malicious.json"
|
||||
assert not malicious_file.exists()
|
||||
loaded = storage.load_run_sync("run_test_123")
|
||||
assert loaded is not None
|
||||
assert loaded.id == "run_test_123"
|
||||
assert loaded.status == RunStatus.COMPLETED
|
||||
|
||||
# Verify the file is at the expected path
|
||||
run_file = tmp_path / "runs" / "run_test_123.json"
|
||||
assert run_file.exists()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -37,20 +37,21 @@ class TestRuntimeBasics:
|
||||
runtime.end_run(success=True)
|
||||
assert runtime.current_run is None
|
||||
|
||||
@pytest.mark.skip(
|
||||
reason="FileStorage.save_run() is deprecated and now a no-op. "
|
||||
"New sessions use unified storage at sessions/{session_id}/state.json"
|
||||
)
|
||||
def test_run_saved_on_end(self, tmp_path: Path):
|
||||
"""Run is saved to storage when ended."""
|
||||
"""Run is persisted to disk when ended.
|
||||
|
||||
ConcurrentStorage.save_run_sync() writes to runs/{run_id}.json
|
||||
via an atomic temp-file+rename. This is the primary guardrail
|
||||
ensuring end_run() does not silently discard completed runs.
|
||||
"""
|
||||
runtime = Runtime(tmp_path)
|
||||
|
||||
run_id = runtime.start_run("test_goal", "Test")
|
||||
runtime.end_run(success=True)
|
||||
|
||||
# Check file exists
|
||||
# ConcurrentStorage writes to {base_path}/runs/{run_id}.json
|
||||
run_file = tmp_path / "runs" / f"{run_id}.json"
|
||||
assert run_file.exists()
|
||||
assert run_file.exists(), f"Expected persisted run at {run_file}"
|
||||
|
||||
|
||||
class TestDecisionRecording:
|
||||
@@ -346,7 +347,7 @@ class TestNarrativeGeneration:
|
||||
"""Test automatic narrative generation."""
|
||||
|
||||
@pytest.mark.skip(
|
||||
reason="FileStorage.save_run() and get_runs_by_goal() are deprecated. "
|
||||
reason="save_run() and get_runs_by_goal() are deprecated. "
|
||||
"New sessions use unified storage at sessions/{session_id}/state.json"
|
||||
)
|
||||
def test_default_narrative_success(self, tmp_path: Path):
|
||||
@@ -369,7 +370,7 @@ class TestNarrativeGeneration:
|
||||
assert "completed successfully" in run.narrative
|
||||
|
||||
@pytest.mark.skip(
|
||||
reason="FileStorage.save_run() and get_runs_by_goal() are deprecated. "
|
||||
reason="save_run() and get_runs_by_goal() are deprecated. "
|
||||
"New sessions use unified storage at sessions/{session_id}/state.json"
|
||||
)
|
||||
def test_default_narrative_failure(self, tmp_path: Path):
|
||||
|
||||
+3
-496
@@ -1,18 +1,16 @@
|
||||
"""Tests for the storage module - FileStorage and ConcurrentStorage backends.
|
||||
"""Tests for the storage module - ConcurrentStorage backend.
|
||||
|
||||
DEPRECATED: FileStorage and ConcurrentStorage are deprecated.
|
||||
DEPRECATED: FileStorage has been removed.
|
||||
New sessions use unified storage at sessions/{session_id}/state.json.
|
||||
These tests are kept for backward compatibility verification only.
|
||||
These tests are kept for backward compatibility verification of ConcurrentStorage only.
|
||||
"""
|
||||
|
||||
import json
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from framework.schemas.run import Run, RunMetrics, RunStatus
|
||||
from framework.storage.backend import FileStorage
|
||||
from framework.storage.concurrent import CacheEntry, ConcurrentStorage
|
||||
|
||||
# === HELPER FUNCTIONS ===
|
||||
@@ -40,277 +38,6 @@ def create_test_run(
|
||||
)
|
||||
|
||||
|
||||
# === FILESTORAGE TESTS ===
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="FileStorage is deprecated - use unified session storage")
|
||||
class TestFileStorageBasics:
|
||||
"""Test basic FileStorage operations."""
|
||||
|
||||
def test_init_creates_directories(self, tmp_path: Path):
|
||||
"""FileStorage should create the directory structure on init."""
|
||||
FileStorage(tmp_path)
|
||||
|
||||
assert (tmp_path / "runs").exists()
|
||||
assert (tmp_path / "summaries").exists()
|
||||
assert (tmp_path / "indexes" / "by_goal").exists()
|
||||
assert (tmp_path / "indexes" / "by_status").exists()
|
||||
assert (tmp_path / "indexes" / "by_node").exists()
|
||||
|
||||
def test_init_with_string_path(self, tmp_path: Path):
|
||||
"""FileStorage should accept string paths."""
|
||||
storage = FileStorage(str(tmp_path))
|
||||
assert storage.base_path == tmp_path
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="FileStorage is deprecated - use unified session storage")
|
||||
class TestFileStorageRunOperations:
|
||||
"""Test FileStorage run CRUD operations."""
|
||||
|
||||
def test_save_and_load_run(self, tmp_path: Path):
|
||||
"""Test saving and loading a run."""
|
||||
storage = FileStorage(tmp_path)
|
||||
run = create_test_run()
|
||||
|
||||
storage.save_run(run)
|
||||
loaded = storage.load_run(run.id)
|
||||
|
||||
assert loaded is not None
|
||||
assert loaded.id == run.id
|
||||
assert loaded.goal_id == run.goal_id
|
||||
assert loaded.status == run.status
|
||||
|
||||
def test_load_nonexistent_run_returns_none(self, tmp_path: Path):
|
||||
"""Loading a nonexistent run should return None."""
|
||||
storage = FileStorage(tmp_path)
|
||||
|
||||
result = storage.load_run("nonexistent_id")
|
||||
assert result is None
|
||||
|
||||
def test_save_creates_json_file(self, tmp_path: Path):
|
||||
"""Saving a run should create a JSON file."""
|
||||
storage = FileStorage(tmp_path)
|
||||
run = create_test_run(run_id="my_run")
|
||||
|
||||
storage.save_run(run)
|
||||
|
||||
run_file = tmp_path / "runs" / "my_run.json"
|
||||
assert run_file.exists()
|
||||
|
||||
# Verify it's valid JSON
|
||||
with open(run_file, encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
assert data["id"] == "my_run"
|
||||
|
||||
def test_save_creates_summary(self, tmp_path: Path):
|
||||
"""Saving a run should also create a summary file."""
|
||||
storage = FileStorage(tmp_path)
|
||||
run = create_test_run(run_id="my_run")
|
||||
|
||||
storage.save_run(run)
|
||||
|
||||
summary_file = tmp_path / "summaries" / "my_run.json"
|
||||
assert summary_file.exists()
|
||||
|
||||
def test_load_summary(self, tmp_path: Path):
|
||||
"""Test loading a run summary."""
|
||||
storage = FileStorage(tmp_path)
|
||||
run = create_test_run()
|
||||
|
||||
storage.save_run(run)
|
||||
summary = storage.load_summary(run.id)
|
||||
|
||||
assert summary is not None
|
||||
assert summary.run_id == run.id
|
||||
assert summary.goal_id == run.goal_id
|
||||
assert summary.status == run.status
|
||||
|
||||
def test_load_summary_fallback_to_run(self, tmp_path: Path):
|
||||
"""If summary file is missing, load_summary should compute from run."""
|
||||
storage = FileStorage(tmp_path)
|
||||
run = create_test_run()
|
||||
|
||||
storage.save_run(run)
|
||||
|
||||
# Delete the summary file
|
||||
summary_file = tmp_path / "summaries" / f"{run.id}.json"
|
||||
summary_file.unlink()
|
||||
|
||||
# Should still work by computing from run
|
||||
summary = storage.load_summary(run.id)
|
||||
assert summary is not None
|
||||
assert summary.run_id == run.id
|
||||
|
||||
def test_delete_run(self, tmp_path: Path):
|
||||
"""Test deleting a run."""
|
||||
storage = FileStorage(tmp_path)
|
||||
run = create_test_run()
|
||||
|
||||
storage.save_run(run)
|
||||
assert storage.load_run(run.id) is not None
|
||||
|
||||
result = storage.delete_run(run.id)
|
||||
|
||||
assert result is True
|
||||
assert storage.load_run(run.id) is None
|
||||
|
||||
def test_delete_nonexistent_run_returns_false(self, tmp_path: Path):
|
||||
"""Deleting a nonexistent run should return False."""
|
||||
storage = FileStorage(tmp_path)
|
||||
|
||||
result = storage.delete_run("nonexistent")
|
||||
assert result is False
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="FileStorage is deprecated - use unified session storage")
|
||||
class TestFileStorageIndexing:
|
||||
"""Test FileStorage index operations."""
|
||||
|
||||
def test_index_by_goal(self, tmp_path: Path):
|
||||
"""Runs should be indexed by goal_id."""
|
||||
storage = FileStorage(tmp_path)
|
||||
|
||||
run1 = create_test_run(run_id="run_1", goal_id="goal_a")
|
||||
run2 = create_test_run(run_id="run_2", goal_id="goal_a")
|
||||
run3 = create_test_run(run_id="run_3", goal_id="goal_b")
|
||||
|
||||
storage.save_run(run1)
|
||||
storage.save_run(run2)
|
||||
storage.save_run(run3)
|
||||
|
||||
goal_a_runs = storage.get_runs_by_goal("goal_a")
|
||||
goal_b_runs = storage.get_runs_by_goal("goal_b")
|
||||
|
||||
assert len(goal_a_runs) == 2
|
||||
assert "run_1" in goal_a_runs
|
||||
assert "run_2" in goal_a_runs
|
||||
assert len(goal_b_runs) == 1
|
||||
assert "run_3" in goal_b_runs
|
||||
|
||||
def test_index_by_status(self, tmp_path: Path):
|
||||
"""Runs should be indexed by status."""
|
||||
storage = FileStorage(tmp_path)
|
||||
|
||||
run1 = create_test_run(run_id="run_1", status=RunStatus.COMPLETED)
|
||||
run2 = create_test_run(run_id="run_2", status=RunStatus.FAILED)
|
||||
run3 = create_test_run(run_id="run_3", status=RunStatus.COMPLETED)
|
||||
|
||||
storage.save_run(run1)
|
||||
storage.save_run(run2)
|
||||
storage.save_run(run3)
|
||||
|
||||
completed = storage.get_runs_by_status(RunStatus.COMPLETED)
|
||||
failed = storage.get_runs_by_status(RunStatus.FAILED)
|
||||
|
||||
assert len(completed) == 2
|
||||
assert len(failed) == 1
|
||||
|
||||
def test_index_by_status_string(self, tmp_path: Path):
|
||||
"""get_runs_by_status should accept string status."""
|
||||
storage = FileStorage(tmp_path)
|
||||
|
||||
run = create_test_run(status=RunStatus.RUNNING)
|
||||
storage.save_run(run)
|
||||
|
||||
runs = storage.get_runs_by_status("running")
|
||||
assert len(runs) == 1
|
||||
|
||||
def test_index_by_node(self, tmp_path: Path):
|
||||
"""Runs should be indexed by executed nodes."""
|
||||
storage = FileStorage(tmp_path)
|
||||
|
||||
run1 = create_test_run(run_id="run_1", nodes_executed=["node_a", "node_b"])
|
||||
run2 = create_test_run(run_id="run_2", nodes_executed=["node_a", "node_c"])
|
||||
|
||||
storage.save_run(run1)
|
||||
storage.save_run(run2)
|
||||
|
||||
node_a_runs = storage.get_runs_by_node("node_a")
|
||||
node_b_runs = storage.get_runs_by_node("node_b")
|
||||
node_c_runs = storage.get_runs_by_node("node_c")
|
||||
|
||||
assert len(node_a_runs) == 2
|
||||
assert len(node_b_runs) == 1
|
||||
assert len(node_c_runs) == 1
|
||||
|
||||
def test_delete_removes_from_indexes(self, tmp_path: Path):
|
||||
"""Deleting a run should remove it from all indexes."""
|
||||
storage = FileStorage(tmp_path)
|
||||
|
||||
run = create_test_run(
|
||||
run_id="run_1",
|
||||
goal_id="goal_a",
|
||||
status=RunStatus.COMPLETED,
|
||||
nodes_executed=["node_1"],
|
||||
)
|
||||
storage.save_run(run)
|
||||
|
||||
# Verify indexed
|
||||
assert "run_1" in storage.get_runs_by_goal("goal_a")
|
||||
assert "run_1" in storage.get_runs_by_status(RunStatus.COMPLETED)
|
||||
assert "run_1" in storage.get_runs_by_node("node_1")
|
||||
|
||||
# Delete
|
||||
storage.delete_run("run_1")
|
||||
|
||||
# Verify removed from indexes
|
||||
assert "run_1" not in storage.get_runs_by_goal("goal_a")
|
||||
assert "run_1" not in storage.get_runs_by_status(RunStatus.COMPLETED)
|
||||
assert "run_1" not in storage.get_runs_by_node("node_1")
|
||||
|
||||
def test_empty_index_returns_empty_list(self, tmp_path: Path):
|
||||
"""Querying an empty index should return empty list."""
|
||||
storage = FileStorage(tmp_path)
|
||||
|
||||
assert storage.get_runs_by_goal("nonexistent") == []
|
||||
assert storage.get_runs_by_status("nonexistent") == []
|
||||
assert storage.get_runs_by_node("nonexistent") == []
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="FileStorage is deprecated - use unified session storage")
|
||||
class TestFileStorageListOperations:
|
||||
"""Test FileStorage list operations."""
|
||||
|
||||
def test_list_all_runs(self, tmp_path: Path):
|
||||
"""Test listing all run IDs."""
|
||||
storage = FileStorage(tmp_path)
|
||||
|
||||
storage.save_run(create_test_run(run_id="run_1"))
|
||||
storage.save_run(create_test_run(run_id="run_2"))
|
||||
storage.save_run(create_test_run(run_id="run_3"))
|
||||
|
||||
all_runs = storage.list_all_runs()
|
||||
|
||||
assert len(all_runs) == 3
|
||||
assert set(all_runs) == {"run_1", "run_2", "run_3"}
|
||||
|
||||
def test_list_all_goals(self, tmp_path: Path):
|
||||
"""Test listing all goal IDs that have runs."""
|
||||
storage = FileStorage(tmp_path)
|
||||
|
||||
storage.save_run(create_test_run(run_id="run_1", goal_id="goal_a"))
|
||||
storage.save_run(create_test_run(run_id="run_2", goal_id="goal_b"))
|
||||
storage.save_run(create_test_run(run_id="run_3", goal_id="goal_a"))
|
||||
|
||||
all_goals = storage.list_all_goals()
|
||||
|
||||
assert len(all_goals) == 2
|
||||
assert set(all_goals) == {"goal_a", "goal_b"}
|
||||
|
||||
def test_get_stats(self, tmp_path: Path):
|
||||
"""Test getting storage statistics."""
|
||||
storage = FileStorage(tmp_path)
|
||||
|
||||
storage.save_run(create_test_run(run_id="run_1", goal_id="goal_a"))
|
||||
storage.save_run(create_test_run(run_id="run_2", goal_id="goal_b"))
|
||||
|
||||
stats = storage.get_stats()
|
||||
|
||||
assert stats["total_runs"] == 2
|
||||
assert stats["total_goals"] == 2
|
||||
assert stats["storage_path"] == str(tmp_path)
|
||||
|
||||
|
||||
# === CACHE ENTRY TESTS ===
|
||||
|
||||
|
||||
@@ -332,7 +59,6 @@ class TestCacheEntry:
|
||||
# === CONCURRENTSTORAGE TESTS ===
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="ConcurrentStorage is deprecated - wraps deprecated FileStorage")
|
||||
class TestConcurrentStorageBasics:
|
||||
"""Test basic ConcurrentStorage operations."""
|
||||
|
||||
@@ -377,168 +103,6 @@ class TestConcurrentStorageBasics:
|
||||
assert storage._running is False
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="ConcurrentStorage is deprecated - wraps deprecated FileStorage")
|
||||
class TestConcurrentStorageRunOperations:
|
||||
"""Test ConcurrentStorage run operations."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_save_and_load_run(self, tmp_path: Path):
|
||||
"""Test async save and load of a run."""
|
||||
storage = ConcurrentStorage(tmp_path)
|
||||
await storage.start()
|
||||
|
||||
try:
|
||||
run = create_test_run()
|
||||
await storage.save_run(run, immediate=True)
|
||||
|
||||
loaded = await storage.load_run(run.id)
|
||||
|
||||
assert loaded is not None
|
||||
assert loaded.id == run.id
|
||||
assert loaded.goal_id == run.goal_id
|
||||
finally:
|
||||
await storage.stop()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_load_run_uses_cache(self, tmp_path: Path):
|
||||
"""Second load should use cached value."""
|
||||
storage = ConcurrentStorage(tmp_path)
|
||||
await storage.start()
|
||||
|
||||
try:
|
||||
run = create_test_run()
|
||||
await storage.save_run(run, immediate=True)
|
||||
|
||||
# First load
|
||||
loaded1 = await storage.load_run(run.id)
|
||||
# Second load (should use cache)
|
||||
loaded2 = await storage.load_run(run.id, use_cache=True)
|
||||
|
||||
assert loaded1 is not None
|
||||
assert loaded2 is not None
|
||||
# Cache should return same object
|
||||
assert loaded1 is loaded2
|
||||
finally:
|
||||
await storage.stop()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_load_run_bypass_cache(self, tmp_path: Path):
|
||||
"""Load with use_cache=False should bypass cache."""
|
||||
storage = ConcurrentStorage(tmp_path)
|
||||
await storage.start()
|
||||
|
||||
try:
|
||||
run = create_test_run()
|
||||
await storage.save_run(run, immediate=True)
|
||||
|
||||
loaded1 = await storage.load_run(run.id)
|
||||
loaded2 = await storage.load_run(run.id, use_cache=False)
|
||||
|
||||
assert loaded1 is not None
|
||||
assert loaded2 is not None
|
||||
# Fresh load should be different object
|
||||
assert loaded1 is not loaded2
|
||||
finally:
|
||||
await storage.stop()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_delete_run(self, tmp_path: Path):
|
||||
"""Test async delete of a run."""
|
||||
storage = ConcurrentStorage(tmp_path)
|
||||
await storage.start()
|
||||
|
||||
try:
|
||||
run = create_test_run()
|
||||
await storage.save_run(run, immediate=True)
|
||||
|
||||
result = await storage.delete_run(run.id)
|
||||
|
||||
assert result is True
|
||||
loaded = await storage.load_run(run.id)
|
||||
assert loaded is None
|
||||
finally:
|
||||
await storage.stop()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_delete_clears_cache(self, tmp_path: Path):
|
||||
"""Deleting a run should clear it from cache."""
|
||||
storage = ConcurrentStorage(tmp_path)
|
||||
await storage.start()
|
||||
|
||||
try:
|
||||
run = create_test_run()
|
||||
await storage.save_run(run, immediate=True)
|
||||
|
||||
# Load to populate cache
|
||||
await storage.load_run(run.id)
|
||||
assert f"run:{run.id}" in storage._cache
|
||||
|
||||
# Delete
|
||||
await storage.delete_run(run.id)
|
||||
|
||||
# Cache should be cleared
|
||||
assert f"run:{run.id}" not in storage._cache
|
||||
finally:
|
||||
await storage.stop()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="ConcurrentStorage is deprecated - wraps deprecated FileStorage")
|
||||
class TestConcurrentStorageQueryOperations:
|
||||
"""Test ConcurrentStorage query operations."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_runs_by_goal(self, tmp_path: Path):
|
||||
"""Test async query by goal."""
|
||||
storage = ConcurrentStorage(tmp_path)
|
||||
await storage.start()
|
||||
|
||||
try:
|
||||
run1 = create_test_run(run_id="run_1", goal_id="goal_a")
|
||||
run2 = create_test_run(run_id="run_2", goal_id="goal_a")
|
||||
|
||||
await storage.save_run(run1, immediate=True)
|
||||
await storage.save_run(run2, immediate=True)
|
||||
|
||||
runs = await storage.get_runs_by_goal("goal_a")
|
||||
|
||||
assert len(runs) == 2
|
||||
finally:
|
||||
await storage.stop()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_runs_by_status(self, tmp_path: Path):
|
||||
"""Test async query by status."""
|
||||
storage = ConcurrentStorage(tmp_path)
|
||||
await storage.start()
|
||||
|
||||
try:
|
||||
run = create_test_run(status=RunStatus.FAILED)
|
||||
await storage.save_run(run, immediate=True)
|
||||
|
||||
runs = await storage.get_runs_by_status(RunStatus.FAILED)
|
||||
|
||||
assert len(runs) == 1
|
||||
finally:
|
||||
await storage.stop()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_list_all_runs(self, tmp_path: Path):
|
||||
"""Test async list all runs."""
|
||||
storage = ConcurrentStorage(tmp_path)
|
||||
await storage.start()
|
||||
|
||||
try:
|
||||
await storage.save_run(create_test_run(run_id="run_1"), immediate=True)
|
||||
await storage.save_run(create_test_run(run_id="run_2"), immediate=True)
|
||||
|
||||
runs = await storage.list_all_runs()
|
||||
|
||||
assert len(runs) == 2
|
||||
finally:
|
||||
await storage.stop()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="ConcurrentStorage is deprecated - wraps deprecated FileStorage")
|
||||
class TestConcurrentStorageCacheManagement:
|
||||
"""Test ConcurrentStorage cache management."""
|
||||
|
||||
@@ -576,60 +140,3 @@ class TestConcurrentStorageCacheManagement:
|
||||
assert stats["total_entries"] == 2
|
||||
assert stats["expired_entries"] == 1
|
||||
assert stats["valid_entries"] == 1
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="ConcurrentStorage is deprecated - wraps deprecated FileStorage")
|
||||
class TestConcurrentStorageSyncAPI:
|
||||
"""Test ConcurrentStorage synchronous API for backward compatibility."""
|
||||
|
||||
def test_save_run_sync(self, tmp_path: Path):
|
||||
"""Test synchronous save."""
|
||||
storage = ConcurrentStorage(tmp_path)
|
||||
run = create_test_run()
|
||||
|
||||
storage.save_run_sync(run)
|
||||
|
||||
# Verify saved
|
||||
loaded = storage.load_run_sync(run.id)
|
||||
assert loaded is not None
|
||||
assert loaded.id == run.id
|
||||
|
||||
def test_load_run_sync(self, tmp_path: Path):
|
||||
"""Test synchronous load."""
|
||||
storage = ConcurrentStorage(tmp_path)
|
||||
run = create_test_run()
|
||||
|
||||
storage.save_run_sync(run)
|
||||
loaded = storage.load_run_sync(run.id)
|
||||
|
||||
assert loaded is not None
|
||||
|
||||
def test_load_run_sync_nonexistent(self, tmp_path: Path):
|
||||
"""Synchronous load of nonexistent run returns None."""
|
||||
storage = ConcurrentStorage(tmp_path)
|
||||
|
||||
loaded = storage.load_run_sync("nonexistent")
|
||||
assert loaded is None
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="ConcurrentStorage is deprecated - wraps deprecated FileStorage")
|
||||
class TestConcurrentStorageStats:
|
||||
"""Test ConcurrentStorage statistics."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_stats(self, tmp_path: Path):
|
||||
"""Test getting async storage stats."""
|
||||
storage = ConcurrentStorage(tmp_path)
|
||||
await storage.start()
|
||||
|
||||
try:
|
||||
await storage.save_run(create_test_run(), immediate=True)
|
||||
|
||||
stats = await storage.get_stats()
|
||||
|
||||
assert stats["total_runs"] == 1
|
||||
assert "cache" in stats
|
||||
assert "pending_writes" in stats
|
||||
assert stats["running"] is True
|
||||
finally:
|
||||
await storage.stop()
|
||||
|
||||
Reference in New Issue
Block a user