Files
deer-flow/backend/packages/harness/deerflow/skills/installer.py
T
DanielWalnut 707ed328dd fix(skills): scan skill archives before install (#2561)
* fix(skills): scan skill archives before install

Fixes #2536

* fix(skills): scan archive support files before install

* style(skills): format archive installer

* fix(skills): address archive install review comments
2026-04-28 11:56:11 +08:00

291 lines
10 KiB
Python

"""Shared skill archive installation logic.
Pure business logic — no FastAPI/HTTP dependencies.
Both Gateway and Client delegate to these functions.
"""
import asyncio
import concurrent.futures
import logging
import posixpath
import shutil
import stat
import tempfile
import zipfile
from pathlib import Path, PurePosixPath, PureWindowsPath
from deerflow.skills.loader import get_skills_root_path
from deerflow.skills.security_scanner import scan_skill_content
from deerflow.skills.validation import _validate_skill_frontmatter
logger = logging.getLogger(__name__)
_PROMPT_INPUT_DIRS = {"references", "templates"}
_PROMPT_INPUT_SUFFIXES = frozenset({".json", ".markdown", ".md", ".rst", ".txt", ".yaml", ".yml"})
class SkillAlreadyExistsError(ValueError):
"""Raised when a skill with the same name is already installed."""
class SkillSecurityScanError(ValueError):
"""Raised when a skill archive fails security scanning."""
def is_unsafe_zip_member(info: zipfile.ZipInfo) -> bool:
"""Return True if the zip member path is absolute or attempts directory traversal."""
name = info.filename
if not name:
return False
normalized = name.replace("\\", "/")
if normalized.startswith("/"):
return True
path = PurePosixPath(normalized)
if path.is_absolute():
return True
if PureWindowsPath(name).is_absolute():
return True
if ".." in path.parts:
return True
return False
def is_symlink_member(info: zipfile.ZipInfo) -> bool:
"""Detect symlinks based on the external attributes stored in the ZipInfo."""
mode = info.external_attr >> 16
return stat.S_ISLNK(mode)
def should_ignore_archive_entry(path: Path) -> bool:
"""Return True for macOS metadata dirs and dotfiles."""
return path.name.startswith(".") or path.name == "__MACOSX"
def resolve_skill_dir_from_archive(temp_path: Path) -> Path:
"""Locate the skill root directory from extracted archive contents.
Filters out macOS metadata (__MACOSX) and dotfiles (.DS_Store).
Returns:
Path to the skill directory.
Raises:
ValueError: If the archive is empty after filtering.
"""
items = [p for p in temp_path.iterdir() if not should_ignore_archive_entry(p)]
if not items:
raise ValueError("Skill archive is empty")
if len(items) == 1 and items[0].is_dir():
return items[0]
return temp_path
def safe_extract_skill_archive(
zip_ref: zipfile.ZipFile,
dest_path: Path,
max_total_size: int = 512 * 1024 * 1024,
) -> None:
"""Safely extract a skill archive with security protections.
Protections:
- Reject absolute paths and directory traversal (..).
- Skip symlink entries instead of materialising them.
- Enforce a hard limit on total uncompressed size (zip bomb defence).
Raises:
ValueError: If unsafe members or size limit exceeded.
"""
dest_root = dest_path.resolve()
total_written = 0
for info in zip_ref.infolist():
if is_unsafe_zip_member(info):
raise ValueError(f"Archive contains unsafe member path: {info.filename!r}")
if is_symlink_member(info):
logger.warning("Skipping symlink entry in skill archive: %s", info.filename)
continue
normalized_name = posixpath.normpath(info.filename.replace("\\", "/"))
member_path = dest_root.joinpath(*PurePosixPath(normalized_name).parts)
if not member_path.resolve().is_relative_to(dest_root):
raise ValueError(f"Zip entry escapes destination: {info.filename!r}")
member_path.parent.mkdir(parents=True, exist_ok=True)
if info.is_dir():
member_path.mkdir(parents=True, exist_ok=True)
continue
with zip_ref.open(info) as src, member_path.open("wb") as dst:
while chunk := src.read(65536):
total_written += len(chunk)
if total_written > max_total_size:
raise ValueError("Skill archive is too large or appears highly compressed.")
dst.write(chunk)
def _is_script_support_file(rel_path: Path) -> bool:
return bool(rel_path.parts) and rel_path.parts[0] == "scripts"
def _should_scan_support_file(rel_path: Path) -> bool:
if _is_script_support_file(rel_path):
return True
return bool(rel_path.parts) and rel_path.parts[0] in _PROMPT_INPUT_DIRS and rel_path.suffix.lower() in _PROMPT_INPUT_SUFFIXES
def _move_staged_skill_into_reserved_target(staging_target: Path, target: Path) -> None:
installed = False
reserved = False
try:
target.mkdir(mode=0o700)
reserved = True
for child in staging_target.iterdir():
shutil.move(str(child), target / child.name)
installed = True
except FileExistsError as e:
raise SkillAlreadyExistsError(f"Skill '{target.name}' already exists") from e
finally:
if reserved and not installed and target.exists():
shutil.rmtree(target)
async def _scan_skill_file_or_raise(skill_dir: Path, path: Path, skill_name: str, *, executable: bool) -> None:
rel_path = path.relative_to(skill_dir).as_posix()
location = f"{skill_name}/{rel_path}"
try:
content = path.read_text(encoding="utf-8")
except UnicodeDecodeError as e:
raise SkillSecurityScanError(f"Security scan failed for skill '{skill_name}': {location} must be valid UTF-8") from e
try:
result = await scan_skill_content(content, executable=executable, location=location)
except Exception as e:
raise SkillSecurityScanError(f"Security scan failed for {location}: {e}") from e
decision = getattr(result, "decision", None)
reason = str(getattr(result, "reason", "") or "No reason provided.")
if decision == "block":
if rel_path == "SKILL.md":
raise SkillSecurityScanError(f"Security scan blocked skill '{skill_name}': {reason}")
raise SkillSecurityScanError(f"Security scan blocked {location}: {reason}")
if executable and decision != "allow":
raise SkillSecurityScanError(f"Security scan rejected executable {location}: {reason}")
if decision not in {"allow", "warn"}:
raise SkillSecurityScanError(f"Security scan failed for {location}: invalid scanner decision {decision!r}")
async def _scan_skill_archive_contents_or_raise(skill_dir: Path, skill_name: str) -> None:
"""Run the skill security scanner against all installable text and script files."""
skill_md = skill_dir / "SKILL.md"
await _scan_skill_file_or_raise(skill_dir, skill_md, skill_name, executable=False)
for path in sorted(skill_dir.rglob("*")):
if not path.is_file():
continue
rel_path = path.relative_to(skill_dir)
if rel_path == Path("SKILL.md"):
continue
if path.name == "SKILL.md":
raise SkillSecurityScanError(f"Security scan failed for skill '{skill_name}': nested SKILL.md is not allowed at {skill_name}/{rel_path.as_posix()}")
if not _should_scan_support_file(rel_path):
continue
await _scan_skill_file_or_raise(skill_dir, path, skill_name, executable=_is_script_support_file(rel_path))
async def ainstall_skill_from_archive(
zip_path: str | Path,
*,
skills_root: Path | None = None,
) -> dict:
"""Install a skill from a .skill archive (ZIP).
Args:
zip_path: Path to the .skill file.
skills_root: Override the skills root directory. If None, uses
the default from config.
Returns:
Dict with success, skill_name, message.
Raises:
FileNotFoundError: If the file does not exist.
ValueError: If the file is invalid (wrong extension, bad ZIP,
invalid frontmatter, duplicate name).
"""
logger.info("Installing skill from %s", zip_path)
path = Path(zip_path)
if not path.is_file():
if not path.exists():
raise FileNotFoundError(f"Skill file not found: {zip_path}")
raise ValueError(f"Path is not a file: {zip_path}")
if path.suffix != ".skill":
raise ValueError("File must have .skill extension")
if skills_root is None:
skills_root = get_skills_root_path()
custom_dir = skills_root / "custom"
custom_dir.mkdir(parents=True, exist_ok=True)
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
try:
zf = zipfile.ZipFile(path, "r")
except FileNotFoundError:
raise FileNotFoundError(f"Skill file not found: {zip_path}") from None
except (zipfile.BadZipFile, IsADirectoryError):
raise ValueError("File is not a valid ZIP archive") from None
with zf:
safe_extract_skill_archive(zf, tmp_path)
skill_dir = resolve_skill_dir_from_archive(tmp_path)
is_valid, message, skill_name = _validate_skill_frontmatter(skill_dir)
if not is_valid:
raise ValueError(f"Invalid skill: {message}")
if not skill_name or "/" in skill_name or "\\" in skill_name or ".." in skill_name:
raise ValueError(f"Invalid skill name: {skill_name}")
target = custom_dir / skill_name
if target.exists():
raise SkillAlreadyExistsError(f"Skill '{skill_name}' already exists")
await _scan_skill_archive_contents_or_raise(skill_dir, skill_name)
with tempfile.TemporaryDirectory(prefix=f".installing-{skill_name}-", dir=custom_dir) as staging_root:
staging_target = Path(staging_root) / skill_name
shutil.copytree(skill_dir, staging_target)
_move_staged_skill_into_reserved_target(staging_target, target)
logger.info("Skill %r installed to %s", skill_name, target)
return {
"success": True,
"skill_name": skill_name,
"message": f"Skill '{skill_name}' installed successfully",
}
def _run_async_install(coro):
try:
loop = asyncio.get_running_loop()
except RuntimeError:
loop = None
if loop is not None and loop.is_running():
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
return executor.submit(asyncio.run, coro).result()
return asyncio.run(coro)
def install_skill_from_archive(
zip_path: str | Path,
*,
skills_root: Path | None = None,
) -> dict:
"""Install a skill from a .skill archive (ZIP)."""
return _run_async_install(ainstall_skill_from_archive(zip_path, skills_root=skills_root))