fix(sandbox): preserve PermissionError messages and allow /mnt/user-data root in resolve_local_tool_path

Co-authored-by: WillemJiang <219644+WillemJiang@users.noreply.github.com>
Initial plan
2026-03-06 02:18:35 +00:00 · 2026-03-06 02:02:57 +00:00 · 2026-03-06 09:45:04 +08:00 · 2026-03-06 09:44:40 +08:00 · 2026-03-05 22:13:06 +08:00 · 2026-03-05 22:07:45 +08:00
815 changed files with 94098 additions and 92328 deletions
@@ -3,6 +3,7 @@ Dockerfile
 .dockerignore
 .git
 .gitignore
+docker/

 # Python
 __pycache__/
@@ -51,3 +52,20 @@ examples/
 assets/
 tests/
 *.log
+
+# Exclude directories not needed in Docker context
+# Frontend build only needs frontend/
+# Backend build only needs backend/
+scripts/
+logs/
+docker/
+skills/
+frontend/.next
+frontend/node_modules
+backend/.venv
+backend/htmlcov
+backend/.coverage
+*.md
+!README.md
+!frontend/README.md
+!backend/README.md
@@ -1,35 +1,16 @@
-# Application Settings
-DEBUG=True
-APP_ENV=development
+# TAVILY API Key
+TAVILY_API_KEY=your-tavily-api-key

-# docker build args
-NEXT_PUBLIC_API_URL="http://localhost:8000/api"
+# Jina API Key
+JINA_API_KEY=your-jina-api-key

-AGENT_RECURSION_LIMIT=30
+# CORS Origins (comma-separated) - e.g., http://localhost:3000,http://localhost:3001
+# CORS_ORIGINS=http://localhost:3000

-# Search Engine, Supported values: tavily (recommended), duckduckgo, brave_search, arxiv
-SEARCH_API=tavily
-TAVILY_API_KEY=tvly-xxx
-# BRAVE_SEARCH_API_KEY=xxx # Required only if SEARCH_API is brave_search
-# JINA_API_KEY=jina_xxx # Optional, default is None
-
-# Optional, RAG provider
-# RAG_PROVIDER=ragflow
-# RAGFLOW_API_URL="http://localhost:9388"
-# RAGFLOW_API_KEY="ragflow-xxx"
-# RAGFLOW_RETRIEVAL_SIZE=10
-
-# Optional, volcengine TTS for generating podcast
-VOLCENGINE_TTS_APPID=xxx
-VOLCENGINE_TTS_ACCESS_TOKEN=xxx
-# VOLCENGINE_TTS_CLUSTER=volcano_tts # Optional, default is volcano_tts
-# VOLCENGINE_TTS_VOICE_TYPE=BV700_V2_streaming # Optional, default is BV700_V2_streaming
-
-# Option, for langsmith tracing and monitoring
-# LANGSMITH_TRACING=true
-# LANGSMITH_ENDPOINT="https://api.smith.langchain.com"
-# LANGSMITH_API_KEY="xxx"
-# LANGSMITH_PROJECT="xxx"
-
-# [!NOTE]
-# For model settings and other configurations, please refer to `docs/configuration_guide.md`
+# Optional:
+# FIRECRAWL_API_KEY=your-firecrawl-api-key
+# VOLCENGINE_API_KEY=your-volcengine-api-key
+# OPENAI_API_KEY=your-openai-api-key
+# GEMINI_API_KEY=your-gemini-api-key
+# DEEPSEEK_API_KEY=your-deepseek-api-key
+# NOVITA_API_KEY=your-novita-api-key  # OpenAI-compatible, see https://novita.ai
@@ -0,0 +1,43 @@
+# Normalize line endings to LF for all text files
+* text=auto eol=lf
+
+# Shell scripts and makefiles must always use LF
+*.sh text eol=lf
+Makefile text eol=lf
+**/Makefile text eol=lf
+
+# Common config/source files
+*.yml text eol=lf
+*.yaml text eol=lf
+*.toml text eol=lf
+*.json text eol=lf
+*.md text eol=lf
+*.py text eol=lf
+*.ts text eol=lf
+*.tsx text eol=lf
+*.js text eol=lf
+*.jsx text eol=lf
+*.css text eol=lf
+*.scss text eol=lf
+*.html text eol=lf
+*.env text eol=lf
+
+# Windows scripts
+*.bat text eol=crlf
+*.cmd text eol=crlf
+
+# Binary assets
+*.png binary
+*.jpg binary
+*.jpeg binary
+*.gif binary
+*.webp binary
+*.ico binary
+*.pdf binary
+*.zip binary
+*.tar binary
+*.gz binary
+*.mp4 binary
+*.mov binary
+*.woff binary
+*.woff2 binary
@@ -0,0 +1,39 @@
+name: Unit Tests
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened, ready_for_review]
+
+concurrency:
+  group: unit-tests-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  backend-unit-tests:
+    if: github.event.pull_request.draft == false
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v6
+
+      - name: Set up Python
+        uses: actions/setup-python@v6
+        with:
+          python-version: '3.12'
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v7
+
+      - name: Install backend dependencies
+        working-directory: backend
+        run: uv sync --group dev
+
+      - name: Lint backend
+        working-directory: backend
+        run: make lint
+
+      - name: Run unit tests of backend
+        working-directory: backend
+        run: make test
@@ -1,31 +0,0 @@
-name: Lint Check
-
-on:
-  push:
-    branches: [ 'main' ]
-  pull_request:
-    branches: [ '*' ]
-
-permissions:
-  contents: read
-
-jobs:
-  lint:
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v3
-
-    - name: Install the latest version of uv
-      uses: astral-sh/setup-uv@v5
-      with:
-        version: "latest"
-
-    - name: Install dependencies
-      run: |
-        uv venv --python 3.12
-        uv pip install -e ".[dev]"
-
-    - name: Run linters
-      run: |
-        source .venv/bin/activate
-        make lint
@@ -1,48 +0,0 @@
-name: Test Cases Check
-
-on:
-  push:
-    branches: [ 'main' ]
-  pull_request:
-    branches: [ '*' ]
-
-permissions:
-  contents: read
-
-jobs:
-  test:
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v3
-
-    - name: Install the latest version of uv
-      uses: astral-sh/setup-uv@v5
-      with:
-        version: "latest"
-
-    - name: Install dependencies
-      run: |
-        uv venv --python 3.12
-        uv pip install -e ".[dev]"
-        uv pip install -e ".[test]"
-
-    - name: Run test cases with coverage
-      run: |
-        source .venv/bin/activate
-        TAVILY_API_KEY=mock-key make coverage
-
-    - name: Generate HTML Coverage Report
-      run: |
-        source .venv/bin/activate
-        python -m coverage html -d coverage_html
-
-    - name: Upload Coverage Report
-      uses: actions/upload-artifact@v4
-      with:
-        name: coverage-report
-        path: coverage_html/
-        
-    - name: Display Coverage Summary
-      run: |
-        source .venv/bin/activate
-        python -m coverage report
@@ -1,13 +1,18 @@
-# Python-generated files
+# DeerFlow docker image cache
+docker/.cache/
+# OS generated files
+.DS_Store
+*.local
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
+
+# Python cache
 __pycache__/
-*.py[oc]
-build/
-dist/
-wheels/
-*.egg-info
-.coverage
-agent_history.gif
-static/browser_history/*.gif
+*.pyc
+*.pyo

 # Virtual environments
 .venv
@@ -16,12 +21,30 @@ venv/
 # Environment variables
 .env

-# user conf
-conf.yaml
+# Configuration files
+config.yaml
+mcp_config.json
+extensions_config.json

+# IDE
 .idea/
-.langgraph_api/
+.vscode/

-# coverage report
+# Coverage report
 coverage.xml
 coverage/
+.deer-flow/
+.claude/
+skills/custom/*
+logs/
+log/
+
+# Local git hooks (keep only on this machine, do not push)
+.githooks/
+
+# pnpm
+.pnpm-store
+sandbox_image_cache.tar
+
+# ignore the legacy `web` folder
+web/
@@ -1,60 +0,0 @@
-{
-    "version": "0.2.0",
-    "configurations": [
-        {
-            "name": "Python: 当前文件",
-            "type": "debugpy",
-            "request": "launch",
-            "program": "${file}",
-            "console": "integratedTerminal",
-            "justMyCode": true
-        },
-        {
-            "name": "Python: main.py",
-            "type": "debugpy",
-            "request": "launch",
-            "program": "${workspaceFolder}/main.py",
-            "console": "integratedTerminal",
-            "justMyCode": false,
-            "env": {
-                "PYTHONPATH": "${workspaceFolder}"
-            },
-            "args": [
-                "--debug", "--max_plan_iterations", "1", "--max_step_num", "1"
-            ]
-        },
-        {
-            "name": "Python: llm.py",
-            "type": "debugpy",
-            "request": "launch",
-            "program": "${workspaceFolder}/src/llms/llm.py",
-            "console": "integratedTerminal",
-            "justMyCode": true,
-            "env": {
-                "PYTHONPATH": "${workspaceFolder}"
-            }
-        },
-        {
-            "name": "Python: server.py",
-            "type": "debugpy",
-            "request": "launch",
-            "program": "${workspaceFolder}/server.py",
-            "console": "integratedTerminal",
-            "justMyCode": false,
-            "env": {
-                "PYTHONPATH": "${workspaceFolder}"
-            }
-        },
-        {
-            "name": "Python: graph.py",
-            "type": "debugpy",
-            "request": "launch",
-            "program": "${workspaceFolder}/src/ppt/graph/builder.py",
-            "console": "integratedTerminal",
-            "justMyCode": false,
-            "env": {
-                "PYTHONPATH": "${workspaceFolder}"
-            }
-        },
-    ]
-}
@@ -1,133 +0,0 @@
-# Contributing to DeerFlow
-
-Thank you for your interest in contributing to DeerFlow! We welcome contributions of all kinds from the community.
-
-## Ways to Contribute
-
-There are many ways you can contribute to DeerFlow:
-
- **Code Contributions**: Add new features, fix bugs, or improve performance
- **Documentation**: Improve README, add code comments, or create examples
- **Bug Reports**: Submit detailed bug reports through issues
- **Feature Requests**: Suggest new features or improvements
- **Code Reviews**: Review pull requests from other contributors
- **Community Support**: Help others in discussions and issues
-
-## Development Setup
-
-1. Fork the repository
-2. Clone your fork:
-   ```bash
-   git clone https://github.com/bytedance/deer-flow.git
-   cd deer-flow
-   ```
-3. Set up your development environment:
-   ```bash
-   # Install dependencies, uv will take care of the python interpreter and venv creation
-   uv sync
-
-   # For development, install additional dependencies
-   uv pip install -e ".[dev]"
-   uv pip install -e ".[test]"
-   ```
-4. Configure pre-commit hooks:
-   ```bash
-   chmod +x pre-commit
-   ln -s ../../pre-commit .git/hooks/pre-commit
-   ```
-
-## Development Process
-
-1. Create a new branch:
-   ```bash
-   git checkout -b feature/amazing-feature
-   ```
-
-2. Make your changes following our coding standards:
-   - Write clear, documented code
-   - Follow PEP 8 style guidelines
-   - Add tests for new features
-   - Update documentation as needed
-
-3. Run tests and checks:
-   ```bash
-   make test      # Run tests
-   make lint      # Run linting
-   make format    # Format code
-   make coverage  # Check test coverage
-   ```
-
-4. Commit your changes:
-   ```bash
-   git commit -m 'Add some amazing feature'
-   ```
-
-5. Push to your fork:
-   ```bash
-   git push origin feature/amazing-feature
-   ```
-
-6. Open a Pull Request
-
-## Pull Request Guidelines
-
- Fill in the pull request template completely
- Include tests for new features
- Update documentation as needed
- Ensure all tests pass and there are no linting errors
- Keep pull requests focused on a single feature or fix
- Reference any related issues
-
-## Code Style
-
- Follow PEP 8 guidelines
- Use type hints where possible
- Write descriptive docstrings
- Keep functions and methods focused and single-purpose
- Comment complex logic
- Python version requirement: >= 3.12
-
-## Testing
-
-Run the test suite:
-```bash
-# Run all tests
-make test
-
-# Run specific test file
-pytest tests/integration/test_workflow.py
-
-# Run with coverage
-make coverage
-```
-
-## Code Quality
-
-```bash
-# Run linting
-make lint
-
-# Format code
-make format
-```
-
-## Community Guidelines
-
- Be respectful and inclusive
- Follow our code of conduct
- Help others learn and grow
- Give constructive feedback
- Stay focused on improving the project
-
-## Need Help?
-
-If you need help with anything:
- Check existing issues and discussions
- Join our community channels
- Ask questions in discussions
-
-## License
-
-By contributing to DeerFlow, you agree that your contributions will be licensed under the MIT License.
-
-We appreciate your contributions to making DeerFlow better!
@@ -0,0 +1,270 @@
+# Contributing to DeerFlow
+
+Thank you for your interest in contributing to DeerFlow! This guide will help you set up your development environment and understand our development workflow.
+
+## Development Environment Setup
+
+We offer two development environments. **Docker is recommended** for the most consistent and hassle-free experience.
+
+### Option 1: Docker Development (Recommended)
+
+Docker provides a consistent, isolated environment with all dependencies pre-configured. No need to install Node.js, Python, or nginx on your local machine.
+
+#### Prerequisites
+
+- Docker Desktop or Docker Engine
+- pnpm (for caching optimization)
+
+#### Setup Steps
+
+1. **Configure the application**:
+   ```bash
+   # Copy example configuration
+   cp config.example.yaml config.yaml
+
+   # Set your API keys
+   export OPENAI_API_KEY="your-key-here"
+   # or edit config.yaml directly
+   ```
+
+2. **Initialize Docker environment** (first time only):
+   ```bash
+   make docker-init
+   ```
+   This will:
+   - Build Docker images
+   - Install frontend dependencies (pnpm)
+   - Install backend dependencies (uv)
+   - Share pnpm cache with host for faster builds
+
+3. **Start development services**:
+   ```bash
+   make docker-start
+   ```
+   `make docker-start` reads `config.yaml` and starts `provisioner` only for provisioner/Kubernetes sandbox mode.
+
+   All services will start with hot-reload enabled:
+   - Frontend changes are automatically reloaded
+   - Backend changes trigger automatic restart
+   - LangGraph server supports hot-reload
+
+4. **Access the application**:
+   - Web Interface: http://localhost:2026
+   - API Gateway: http://localhost:2026/api/*
+   - LangGraph: http://localhost:2026/api/langgraph/*
+
+#### Docker Commands
+
+```bash
+# Build the custom k3s image (with pre-cached sandbox image)
+make docker-init
+# Start Docker services (mode-aware, localhost:2026)
+make docker-start
+# Stop Docker development services
+make docker-stop
+# View Docker development logs
+make docker-logs
+# View Docker frontend logs
+make docker-logs-frontend
+# View Docker gateway logs
+make docker-logs-gateway
+```
+
+#### Docker Architecture
+
+```
+Host Machine
+  ↓
+Docker Compose (deer-flow-dev)
+  ├→ nginx (port 2026) ← Reverse proxy
+  ├→ web (port 3000) ← Frontend with hot-reload
+  ├→ api (port 8001) ← Gateway API with hot-reload
+   ├→ langgraph (port 2024) ← LangGraph server with hot-reload
+   └→ provisioner (optional, port 8002) ← Started only in provisioner/K8s sandbox mode
+```
+
+**Benefits of Docker Development**:
+- ✅ Consistent environment across different machines
+- ✅ No need to install Node.js, Python, or nginx locally
+- ✅ Isolated dependencies and services
+- ✅ Easy cleanup and reset
+- ✅ Hot-reload for all services
+- ✅ Production-like environment
+
+### Option 2: Local Development
+
+If you prefer to run services directly on your machine:
+
+#### Prerequisites
+
+Check that you have all required tools installed:
+
+```bash
+make check
+```
+
+Required tools:
+- Node.js 22+
+- pnpm
+- uv (Python package manager)
+- nginx
+
+#### Setup Steps
+
+1. **Configure the application** (same as Docker setup above)
+
+2. **Install dependencies**:
+   ```bash
+   make install
+   ```
+
+3. **Run development server** (starts all services with nginx):
+   ```bash
+   make dev
+   ```
+
+4. **Access the application**:
+   - Web Interface: http://localhost:2026
+   - All API requests are automatically proxied through nginx
+
+#### Manual Service Control
+
+If you need to start services individually:
+
+1. **Start backend services**:
+   ```bash
+   # Terminal 1: Start LangGraph Server (port 2024)
+   cd backend
+   make dev
+
+   # Terminal 2: Start Gateway API (port 8001)
+   cd backend
+   make gateway
+
+   # Terminal 3: Start Frontend (port 3000)
+   cd frontend
+   pnpm dev
+   ```
+
+2. **Start nginx**:
+   ```bash
+   make nginx
+   # or directly: nginx -c $(pwd)/docker/nginx/nginx.local.conf -g 'daemon off;'
+   ```
+
+3. **Access the application**:
+   - Web Interface: http://localhost:2026
+
+#### Nginx Configuration
+
+The nginx configuration provides:
+- Unified entry point on port 2026
+- Routes `/api/langgraph/*` to LangGraph Server (2024)
+- Routes other `/api/*` endpoints to Gateway API (8001)
+- Routes non-API requests to Frontend (3000)
+- Centralized CORS handling
+- SSE/streaming support for real-time agent responses
+- Optimized timeouts for long-running operations
+
+## Project Structure
+
+```
+deer-flow/
+├── config.example.yaml      # Configuration template
+├── extensions_config.example.json  # MCP and Skills configuration template
+├── Makefile                 # Build and development commands
+├── scripts/
+│   └── docker.sh           # Docker management script
+├── docker/
+│   ├── docker-compose-dev.yaml  # Docker Compose configuration
+│   └── nginx/
+│       ├── nginx.conf      # Nginx config for Docker
+│       └── nginx.local.conf # Nginx config for local dev
+├── backend/                 # Backend application
+│   ├── src/
+│   │   ├── gateway/        # Gateway API (port 8001)
+│   │   ├── agents/         # LangGraph agents (port 2024)
+│   │   ├── mcp/            # Model Context Protocol integration
+│   │   ├── skills/         # Skills system
+│   │   └── sandbox/        # Sandbox execution
+│   ├── docs/               # Backend documentation
+│   └── Makefile            # Backend commands
+├── frontend/               # Frontend application
+│   └── Makefile            # Frontend commands
+└── skills/                 # Agent skills
+    ├── public/             # Public skills
+    └── custom/             # Custom skills
+```
+
+## Architecture
+
+```
+Browser
+  ↓
+Nginx (port 2026) ← Unified entry point
+  ├→ Frontend (port 3000) ← / (non-API requests)
+  ├→ Gateway API (port 8001) ← /api/models, /api/mcp, /api/skills, /api/threads/*/artifacts
+  └→ LangGraph Server (port 2024) ← /api/langgraph/* (agent interactions)
+```
+
+## Development Workflow
+
+1. **Create a feature branch**:
+   ```bash
+   git checkout -b feature/your-feature-name
+   ```
+
+2. **Make your changes** with hot-reload enabled
+
+3. **Test your changes** thoroughly
+
+4. **Commit your changes**:
+   ```bash
+   git add .
+   git commit -m "feat: description of your changes"
+   ```
+
+5. **Push and create a Pull Request**:
+   ```bash
+   git push origin feature/your-feature-name
+   ```
+
+## Testing
+
+```bash
+# Backend tests
+cd backend
+uv run pytest
+
+# Frontend tests
+cd frontend
+pnpm test
+```
+
+### PR Regression Checks
+
+Every pull request runs the backend regression workflow at [.github/workflows/backend-unit-tests.yml](.github/workflows/backend-unit-tests.yml), including:
+
+- `tests/test_provisioner_kubeconfig.py`
+- `tests/test_docker_sandbox_mode_detection.py`
+
+## Code Style
+
+- **Backend (Python)**: We use `ruff` for linting and formatting
+- **Frontend (TypeScript)**: We use ESLint and Prettier
+
+## Documentation
+
+- [Configuration Guide](backend/docs/CONFIGURATION.md) - Setup and configuration
+- [Architecture Overview](backend/CLAUDE.md) - Technical architecture
+- [MCP Setup Guide](MCP_SETUP.md) - Model Context Protocol configuration
+
+## Need Help?
+
+- Check existing [Issues](https://github.com/bytedance/deer-flow/issues)
+- Read the [Documentation](backend/docs/)
+- Ask questions in [Discussions](https://github.com/bytedance/deer-flow/discussions)
+
+## License
+
+By contributing to DeerFlow, you agree that your contributions will be licensed under the [MIT License](./LICENSE).
@@ -1,24 +0,0 @@
-FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim
-
-# Install uv.
-COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/uv
-
-WORKDIR /app
-
-# Pre-cache the application dependencies.
-RUN --mount=type=cache,target=/root/.cache/uv \
-    --mount=type=bind,source=uv.lock,target=uv.lock \
-    --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
-    uv sync --locked --no-install-project
-
-# Copy the application into the container.
-COPY . /app
-
-# Install the application dependencies.
-RUN --mount=type=cache,target=/root/.cache/uv \
-    uv sync --locked
-
-EXPOSE 8000
-
-# Run the application.
-CMD ["uv", "run", "python", "server.py", "--host", "0.0.0.0", "--port", "8000"]
@@ -1,6 +1,7 @@
 MIT License

 Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+Copyright (c) 2025-2026 DeerFlow Authors

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -18,4 +19,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+SOFTWARE.
@@ -1,22 +1,273 @@
-.PHONY: lint format install-dev serve test coverage
+# DeerFlow - Unified Development Environment

-install-dev:
-	uv pip install -e ".[dev]" && uv pip install -e ".[test]"
+.PHONY: help config check install dev stop clean docker-init docker-start docker-stop docker-logs docker-logs-frontend docker-logs-gateway

-format:
-	uv run black --preview .
+help:
+	@echo "DeerFlow Development Commands:"
+	@echo "  make config          - Generate local config files (aborts if config already exists)"
+	@echo "  make check           - Check if all required tools are installed"
+	@echo "  make install         - Install all dependencies (frontend + backend)"
+	@echo "  make setup-sandbox   - Pre-pull sandbox container image (recommended)"
+	@echo "  make dev             - Start all services (frontend + backend + nginx on localhost:2026)"
+	@echo "  make stop            - Stop all running services"
+	@echo "  make clean           - Clean up processes and temporary files"
+	@echo ""
+	@echo "Docker Development Commands:"
+	@echo "  make docker-init     - Build the custom k3s image (with pre-cached sandbox image)"
+	@echo "  make docker-start    - Start Docker services (mode-aware from config.yaml, localhost:2026)"
+	@echo "  make docker-stop     - Stop Docker development services"
+	@echo "  make docker-logs     - View Docker development logs"
+	@echo "  make docker-logs-frontend - View Docker frontend logs"
+	@echo "  make docker-logs-gateway - View Docker gateway logs"

-lint:
-	uv run black --check .
+config:
+	@if [ -f config.yaml ] || [ -f config.yml ] || [ -f configure.yml ]; then \
+		echo "Error: configuration file already exists (config.yaml/config.yml/configure.yml). Aborting."; \
+		exit 1; \
+	fi
+	@cp config.example.yaml config.yaml
+	@test -f .env || cp .env.example .env
+	@test -f frontend/.env || cp frontend/.env.example frontend/.env

-serve:
-	uv run server.py --reload
+# Check required tools
+check:
+	@echo "=========================================="
+	@echo "  Checking Required Dependencies"
+	@echo "=========================================="
+	@echo ""
+	@FAILED=0; \
+	echo "Checking Node.js..."; \
+	if command -v node >/dev/null 2>&1; then \
+		NODE_VERSION=$$(node -v | sed 's/v//'); \
+		NODE_MAJOR=$$(echo $$NODE_VERSION | cut -d. -f1); \
+		if [ $$NODE_MAJOR -ge 22 ]; then \
+			echo "  ✓ Node.js $$NODE_VERSION (>= 22 required)"; \
+		else \
+			echo "  ✗ Node.js $$NODE_VERSION found, but version 22+ is required"; \
+			echo "    Install from: https://nodejs.org/"; \
+			FAILED=1; \
+		fi; \
+	else \
+		echo "  ✗ Node.js not found (version 22+ required)"; \
+		echo "    Install from: https://nodejs.org/"; \
+		FAILED=1; \
+	fi; \
+	echo ""; \
+	echo "Checking pnpm..."; \
+	if command -v pnpm >/dev/null 2>&1; then \
+		PNPM_VERSION=$$(pnpm -v); \
+		echo "  ✓ pnpm $$PNPM_VERSION"; \
+	else \
+		echo "  ✗ pnpm not found"; \
+		echo "    Install: npm install -g pnpm"; \
+		echo "    Or visit: https://pnpm.io/installation"; \
+		FAILED=1; \
+	fi; \
+	echo ""; \
+	echo "Checking uv..."; \
+	if command -v uv >/dev/null 2>&1; then \
+		UV_VERSION=$$(uv --version | awk '{print $$2}'); \
+		echo "  ✓ uv $$UV_VERSION"; \
+	else \
+		echo "  ✗ uv not found"; \
+		echo "    Install: curl -LsSf https://astral.sh/uv/install.sh | sh"; \
+		echo "    Or visit: https://docs.astral.sh/uv/getting-started/installation/"; \
+		FAILED=1; \
+	fi; \
+	echo ""; \
+	echo "Checking nginx..."; \
+	if command -v nginx >/dev/null 2>&1; then \
+		NGINX_VERSION=$$(nginx -v 2>&1 | awk -F'/' '{print $$2}'); \
+		echo "  ✓ nginx $$NGINX_VERSION"; \
+	else \
+		echo "  ✗ nginx not found"; \
+		echo "    macOS:   brew install nginx"; \
+		echo "    Ubuntu:  sudo apt install nginx"; \
+		echo "    Or visit: https://nginx.org/en/download.html"; \
+		FAILED=1; \
+	fi; \
+	echo ""; \
+	if [ $$FAILED -eq 0 ]; then \
+		echo "=========================================="; \
+		echo "  ✓ All dependencies are installed!"; \
+		echo "=========================================="; \
+		echo ""; \
+		echo "You can now run:"; \
+		echo "  make install  - Install project dependencies"; \
+		echo "  make dev      - Start development server"; \
+	else \
+		echo "=========================================="; \
+		echo "  ✗ Some dependencies are missing"; \
+		echo "=========================================="; \
+		echo ""; \
+		echo "Please install the missing tools and run 'make check' again."; \
+		exit 1; \
+	fi

-test:
-	uv run pytest tests/
+# Install all dependencies
+install:
+	@echo "Installing backend dependencies..."
+	@cd backend && uv sync
+	@echo "Installing frontend dependencies..."
+	@cd frontend && pnpm install
+	@echo "✓ All dependencies installed"
+	@echo ""
+	@echo "=========================================="
+	@echo "  Optional: Pre-pull Sandbox Image"
+	@echo "=========================================="
+	@echo ""
+	@echo "If you plan to use Docker/Container-based sandbox, you can pre-pull the image:"
+	@echo "  make setup-sandbox"
+	@echo ""

-langgraph-dev:
-	uvx --refresh --from "langgraph-cli[inmem]" --with-editable . --python 3.12 langgraph dev --allow-blocking
+# Pre-pull sandbox Docker image (optional but recommended)
+setup-sandbox:
+	@echo "=========================================="
+	@echo "  Pre-pulling Sandbox Container Image"
+	@echo "=========================================="
+	@echo ""
+	@IMAGE=$$(grep -A 20 "# sandbox:" config.yaml 2>/dev/null | grep "image:" | awk '{print $$2}' | head -1); \
+	if [ -z "$$IMAGE" ]; then \
+		IMAGE="enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest"; \
+		echo "Using default image: $$IMAGE"; \
+	else \
+		echo "Using configured image: $$IMAGE"; \
+	fi; \
+	echo ""; \
+	if command -v container >/dev/null 2>&1 && [ "$$(uname)" = "Darwin" ]; then \
+		echo "Detected Apple Container on macOS, pulling image..."; \
+		container pull "$$IMAGE" || echo "⚠ Apple Container pull failed, will try Docker"; \
+	fi; \
+	if command -v docker >/dev/null 2>&1; then \
+		echo "Pulling image using Docker..."; \
+		docker pull "$$IMAGE"; \
+		echo ""; \
+		echo "✓ Sandbox image pulled successfully"; \
+	else \
+		echo "✗ Neither Docker nor Apple Container is available"; \
+		echo "  Please install Docker: https://docs.docker.com/get-docker/"; \
+		exit 1; \
+	fi

-coverage:
-	uv run pytest --cov=src tests/ --cov-report=term-missing --cov-report=xml
+# Start all services
+dev:
+	@echo "Stopping existing services if any..."
+	@-pkill -f "langgraph dev" 2>/dev/null || true
+	@-pkill -f "uvicorn src.gateway.app:app" 2>/dev/null || true
+	@-pkill -f "next dev" 2>/dev/null || true
+	@-nginx -c $(PWD)/docker/nginx/nginx.local.conf -p $(PWD) -s quit 2>/dev/null || true
+	@sleep 1
+	@-pkill -9 nginx 2>/dev/null || true
+	@-./scripts/cleanup-containers.sh deer-flow-sandbox 2>/dev/null || true
+	@sleep 1
+	@echo ""
+	@echo "=========================================="
+	@echo "  Starting DeerFlow Development Server"
+	@echo "=========================================="
+	@echo ""
+	@echo "Services starting up..."
+	@echo "  → Backend: LangGraph + Gateway"
+	@echo "  → Frontend: Next.js"
+	@echo "  → Nginx: Reverse Proxy"
+	@echo ""
+	@cleanup() { \
+		trap - INT TERM; \
+		echo ""; \
+		echo "Shutting down services..."; \
+		pkill -f "langgraph dev" 2>/dev/null || true; \
+		pkill -f "uvicorn src.gateway.app:app" 2>/dev/null || true; \
+		pkill -f "next dev" 2>/dev/null || true; \
+		nginx -c $(PWD)/docker/nginx/nginx.local.conf -p $(PWD) -s quit 2>/dev/null || true; \
+		sleep 1; \
+		pkill -9 nginx 2>/dev/null || true; \
+		echo "Cleaning up sandbox containers..."; \
+		./scripts/cleanup-containers.sh deer-flow-sandbox 2>/dev/null || true; \
+		echo "✓ All services stopped"; \
+		exit 0; \
+	}; \
+	trap cleanup INT TERM; \
+	mkdir -p logs; \
+	echo "Starting LangGraph server..."; \
+	cd backend && NO_COLOR=1 uv run langgraph dev --no-browser --allow-blocking --no-reload > ../logs/langgraph.log 2>&1 & \
+	sleep 3; \
+	echo "✓ LangGraph server started on localhost:2024"; \
+	echo "Starting Gateway API..."; \
+	cd backend && uv run uvicorn src.gateway.app:app --host 0.0.0.0 --port 8001 > ../logs/gateway.log 2>&1 & \
+	sleep 3; \
+	if ! lsof -i :8001 -sTCP:LISTEN -t >/dev/null 2>&1; then \
+		echo "✗ Gateway API failed to start. Last log output:"; \
+		tail -30 logs/gateway.log; \
+		cleanup; \
+	fi; \
+	echo "✓ Gateway API started on localhost:8001"; \
+	echo "Starting Frontend..."; \
+	cd frontend && pnpm run dev > ../logs/frontend.log 2>&1 & \
+	sleep 3; \
+	echo "✓ Frontend started on localhost:3000"; \
+	echo "Starting Nginx reverse proxy..."; \
+	mkdir -p logs && nginx -g 'daemon off;' -c $(PWD)/docker/nginx/nginx.local.conf -p $(PWD) > logs/nginx.log 2>&1 & \
+	sleep 2; \
+	echo "✓ Nginx started on localhost:2026"; \
+	echo ""; \
+	echo "=========================================="; \
+	echo "  DeerFlow is ready!"; \
+	echo "=========================================="; \
+	echo ""; \
+	echo "  🌐 Application: http://localhost:2026"; \
+	echo "  📡 API Gateway: http://localhost:2026/api/*"; \
+	echo "  🤖 LangGraph:   http://localhost:2026/api/langgraph/*"; \
+	echo ""; \
+	echo "  📋 Logs:"; \
+	echo "     - LangGraph: logs/langgraph.log"; \
+	echo "     - Gateway:   logs/gateway.log"; \
+	echo "     - Frontend:  logs/frontend.log"; \
+	echo "     - Nginx:     logs/nginx.log"; \
+	echo ""; \
+	echo "Press Ctrl+C to stop all services"; \
+	echo ""; \
+	wait
+
+# Stop all services
+stop:
+	@echo "Stopping all services..."
+	@-pkill -f "langgraph dev" 2>/dev/null || true
+	@-pkill -f "uvicorn src.gateway.app:app" 2>/dev/null || true
+	@-pkill -f "next dev" 2>/dev/null || true
+	@-nginx -c $(PWD)/docker/nginx/nginx.local.conf -p $(PWD) -s quit 2>/dev/null || true
+	@sleep 1
+	@-pkill -9 nginx 2>/dev/null || true
+	@echo "Cleaning up sandbox containers..."
+	@-./scripts/cleanup-containers.sh deer-flow-sandbox 2>/dev/null || true
+	@echo "✓ All services stopped"
+
+# Clean up
+clean: stop
+	@echo "Cleaning up..."
+	@-rm -rf logs/*.log 2>/dev/null || true
+	@echo "✓ Cleanup complete"
+
+# ==========================================
+# Docker Development Commands
+# ==========================================
+
+# Initialize Docker containers and install dependencies
+docker-init:
+	@./scripts/docker.sh init
+
+# Start Docker development environment
+docker-start:
+	@./scripts/docker.sh start
+
+# Stop Docker development environment
+docker-stop:
+	@./scripts/docker.sh stop
+
+# View Docker development logs
+docker-logs:
+	@./scripts/docker.sh logs
+
+# View Docker development logs
+docker-logs-frontend:
+	@./scripts/docker.sh logs --frontend
+docker-logs-gateway:
+	@./scripts/docker.sh logs --gateway
@@ -1,543 +1,298 @@
-# 🦌 DeerFlow
+# 🦌 DeerFlow - 2.0

-[![Python 3.12+](https://img.shields.io/badge/python-3.12+-blue.svg)](https://www.python.org/downloads/)
-[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
-[![DeepWiki](https://img.shields.io/badge/DeepWiki-bytedance%2Fdeer--flow-blue.svg?logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACwAAAAyCAYAAAAnWDnqAAAAAXNSR0IArs4c6QAAA05JREFUaEPtmUtyEzEQhtWTQyQLHNak2AB7ZnyXZMEjXMGeK/AIi+QuHrMnbChYY7MIh8g01fJoopFb0uhhEqqcbWTp06/uv1saEDv4O3n3dV60RfP947Mm9/SQc0ICFQgzfc4CYZoTPAswgSJCCUJUnAAoRHOAUOcATwbmVLWdGoH//PB8mnKqScAhsD0kYP3j/Yt5LPQe2KvcXmGvRHcDnpxfL2zOYJ1mFwrryWTz0advv1Ut4CJgf5uhDuDj5eUcAUoahrdY/56ebRWeraTjMt/00Sh3UDtjgHtQNHwcRGOC98BJEAEymycmYcWwOprTgcB6VZ5JK5TAJ+fXGLBm3FDAmn6oPPjR4rKCAoJCal2eAiQp2x0vxTPB3ALO2CRkwmDy5WohzBDwSEFKRwPbknEggCPB/imwrycgxX2NzoMCHhPkDwqYMr9tRcP5qNrMZHkVnOjRMWwLCcr8ohBVb1OMjxLwGCvjTikrsBOiA6fNyCrm8V1rP93iVPpwaE+gO0SsWmPiXB+jikdf6SizrT5qKasx5j8ABbHpFTx+vFXp9EnYQmLx02h1QTTrl6eDqxLnGjporxl3NL3agEvXdT0WmEost648sQOYAeJS9Q7bfUVoMGnjo4AZdUMQku50McCcMWcBPvr0SzbTAFDfvJqwLzgxwATnCgnp4wDl6Aa+Ax283gghmj+vj7feE2KBBRMW3FzOpLOADl0Isb5587h/U4gGvkt5v60Z1VLG8BhYjbzRwyQZemwAd6cCR5/XFWLYZRIMpX39AR0tjaGGiGzLVyhse5C9RKC6ai42ppWPKiBagOvaYk8lO7DajerabOZP46Lby5wKjw1HCRx7p9sVMOWGzb/vA1hwiWc6jm3MvQDTogQkiqIhJV0nBQBTU+3okKCFDy9WwferkHjtxib7t3xIUQtHxnIwtx4mpg26/HfwVNVDb4oI9RHmx5WGelRVlrtiw43zboCLaxv46AZeB3IlTkwouebTr1y2NjSpHz68WNFjHvupy3q8TFn3Hos2IAk4Ju5dCo8B3wP7VPr/FGaKiG+T+v+TQqIrOqMTL1VdWV1DdmcbO8KXBz6esmYWYKPwDL5b5FA1a0hwapHiom0r/cKaoqr+27/XcrS5UwSMbQAAAABJRU5ErkJggg==)](https://deepwiki.com/bytedance/deer-flow)
+<a href="https://trendshift.io/repositories/14699" target="_blank"><img src="https://trendshift.io/api/badge/repositories/14699" alt="bytedance%2Fdeer-flow | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
+> On February 28th, 2026, DeerFlow claimed the 🏆 #1 spot on GitHub Trending following the launch of version 2. Thanks a million to our incredible community — you made this happen! 💪🔥

-<!-- DeepWiki badge generated by https://deepwiki.ryoppippi.com/ -->
+DeerFlow (**D**eep **E**xploration and **E**fficient **R**esearch **Flow**) is an open-source **super agent harness** that orchestrates **sub-agents**, **memory**, and **sandboxes** to do almost anything — powered by **extensible skills**.

-[English](./README.md) | [简体中文](./README_zh.md) | [日本語](./README_ja.md) | [Deutsch](./README_de.md) | [Español](./README_es.md) | [Русский](./README_ru.md) | [Portuguese](./README_pt.md)
+https://github.com/user-attachments/assets/a8bcadc4-e040-4cf2-8fda-dd768b999c18

-> Originated from Open Source, give back to Open Source.
+> [!NOTE]
+> **DeerFlow 2.0 is a ground-up rewrite.** It shares no code with v1. If you're looking for the original Deep Research framework, it's maintained on the [`1.x` branch](https://github.com/bytedance/deer-flow/tree/main-1.x) — contributions there are still welcome. Active development has moved to 2.0.

-**DeerFlow** (**D**eep **E**xploration and **E**fficient **R**esearch **Flow**) is a community-driven Deep Research framework that builds upon the incredible work of the open source community. Our goal is to combine language models with specialized tools for tasks like web search, crawling, and Python code execution, while giving back to the community that made this possible.
+## Official Website

-Please visit [our official website](https://deerflow.tech/) for more details.
+Learn more and see **real demos** on our official website.

-## Demo
-
-### Video
-
-https://github.com/user-attachments/assets/f3786598-1f2a-4d07-919e-8b99dfa1de3e
-
-In this demo, we showcase how to use DeerFlow to:
-
- Seamlessly integrate with MCP services
- Conduct the Deep Research process and produce a comprehensive report with images
- Create podcast audio based on the generated report
-
-### Replays
-
- [How tall is Eiffel Tower compared to tallest building?](https://deerflow.tech/chat?replay=eiffel-tower-vs-tallest-building)
- [What are the top trending repositories on GitHub?](https://deerflow.tech/chat?replay=github-top-trending-repo)
- [Write an article about Nanjing's traditional dishes](https://deerflow.tech/chat?replay=nanjing-traditional-dishes)
- [How to decorate a rental apartment?](https://deerflow.tech/chat?replay=rental-apartment-decoration)
- [Visit our official website to explore more replays.](https://deerflow.tech/#case-studies)
+**[deerflow.tech](https://deerflow.tech/)**

 ---

-## 📑 Table of Contents
+## Table of Contents

- [🚀 Quick Start](#quick-start)
- [🌟 Features](#features)
- [🏗️ Architecture](#architecture)
- [🛠️ Development](#development)
- [🐳 Docker](#docker)
- [🗣️ Text-to-Speech Integration](#text-to-speech-integration)
- [📚 Examples](#examples)
- [❓ FAQ](#faq)
- [📜 License](#license)
- [💖 Acknowledgments](#acknowledgments)
- [⭐ Star History](#star-history)
+- [🦌 DeerFlow - 2.0](#-deerflow---20)
+  - [Official Website](#official-website)
+  - [Table of Contents](#table-of-contents)
+  - [Quick Start](#quick-start)
+    - [Configuration](#configuration)
+    - [Running the Application](#running-the-application)
+      - [Option 1: Docker (Recommended)](#option-1-docker-recommended)
+      - [Option 2: Local Development](#option-2-local-development)
+    - [Advanced](#advanced)
+      - [Sandbox Mode](#sandbox-mode)
+      - [MCP Server](#mcp-server)
+  - [From Deep Research to Super Agent Harness](#from-deep-research-to-super-agent-harness)
+  - [Core Features](#core-features)
+    - [Skills \& Tools](#skills--tools)
+    - [Sub-Agents](#sub-agents)
+    - [Sandbox \& File System](#sandbox--file-system)
+    - [Context Engineering](#context-engineering)
+    - [Long-Term Memory](#long-term-memory)
+  - [Recommended Models](#recommended-models)
+  - [Documentation](#documentation)
+  - [Contributing](#contributing)
+  - [License](#license)
+  - [Acknowledgments](#acknowledgments)
+    - [Key Contributors](#key-contributors)
+  - [Star History](#star-history)

 ## Quick Start

-DeerFlow is developed in Python, and comes with a web UI written in Node.js. To ensure a smooth setup process, we recommend using the following tools:
+### Configuration

-### Recommended Tools
-
- **[`uv`](https://docs.astral.sh/uv/getting-started/installation/):**
-  Simplify Python environment and dependency management. `uv` automatically creates a virtual environment in the root directory and installs all required packages for you—no need to manually install Python environments.
-
- **[`nvm`](https://github.com/nvm-sh/nvm):**
-  Manage multiple versions of the Node.js runtime effortlessly.
-
- **[`pnpm`](https://pnpm.io/installation):**
-  Install and manage dependencies of Node.js project.
-
-### Environment Requirements
-
-Make sure your system meets the following minimum requirements:
-
- **[Python](https://www.python.org/downloads/):** Version `3.12+`
- **[Node.js](https://nodejs.org/en/download/):** Version `22+`
-
-### Installation
-
-```bash
-# Clone the repository
-git clone https://github.com/bytedance/deer-flow.git
-cd deer-flow
-
-# Install dependencies, uv will take care of the python interpreter and venv creation, and install the required packages
-uv sync
-
-# Configure .env with your API keys
-# Tavily: https://app.tavily.com/home
-# Brave_SEARCH: https://brave.com/search/api/
-# volcengine TTS: Add your TTS credentials if you have them
-cp .env.example .env
-
-# See the 'Supported Search Engines' and 'Text-to-Speech Integration' sections below for all available options
-
-# Configure conf.yaml for your LLM model and API keys
-# Please refer to 'docs/configuration_guide.md' for more details
-cp conf.yaml.example conf.yaml
-
-# Install marp for ppt generation
-# https://github.com/marp-team/marp-cli?tab=readme-ov-file#use-package-manager
-brew install marp-cli
-```
-
-Optionally, install web UI dependencies via [pnpm](https://pnpm.io/installation):
-
-```bash
-cd deer-flow/web
-pnpm install
-```
-
-### Configurations
-
-Please refer to the [Configuration Guide](docs/configuration_guide.md) for more details.
-
-> [!NOTE]
-> Before you start the project, read the guide carefully, and update the configurations to match your specific settings and requirements.
-
-### Console UI
-
-The quickest way to run the project is to use the console UI.
-
-```bash
-# Run the project in a bash-like shell
-uv run main.py
-```
-
-### Web UI
-
-This project also includes a Web UI, offering a more dynamic and engaging interactive experience.
-
-> [!NOTE]
-> You need to install the dependencies of web UI first.
-
-```bash
-# Run both the backend and frontend servers in development mode
-# On macOS/Linux
-./bootstrap.sh -d
-
-# On Windows
-bootstrap.bat -d
-```
-
-Open your browser and visit [`http://localhost:3000`](http://localhost:3000) to explore the web UI.
-
-Explore more details in the [`web`](./web/) directory.
-
-## Supported Search Engines
-
-DeerFlow supports multiple search engines that can be configured in your `.env` file using the `SEARCH_API` variable:
-
- **Tavily** (default): A specialized search API for AI applications
-
-  - Requires `TAVILY_API_KEY` in your `.env` file
-  - Sign up at: https://app.tavily.com/home
-
- **DuckDuckGo**: Privacy-focused search engine
-
-  - No API key required
-
- **Brave Search**: Privacy-focused search engine with advanced features
-
-  - Requires `BRAVE_SEARCH_API_KEY` in your `.env` file
-  - Sign up at: https://brave.com/search/api/
-
- **Arxiv**: Scientific paper search for academic research
-  - No API key required
-  - Specialized for scientific and academic papers
-
-To configure your preferred search engine, set the `SEARCH_API` variable in your `.env` file:
-
-```bash
-# Choose one: tavily, duckduckgo, brave_search, arxiv
-SEARCH_API=tavily
-```
-
-## Features
-
-### Core Capabilities
-
- 🤖 **LLM Integration**
-  - It supports the integration of most models through [litellm](https://docs.litellm.ai/docs/providers).
-  - Support for open source models like Qwen
-  - OpenAI-compatible API interface
-  - Multi-tier LLM system for different task complexities
-
-### Tools and MCP Integrations
-
- 🔍 **Search and Retrieval**
-
-  - Web search via Tavily, Brave Search and more
-  - Crawling with Jina
-  - Advanced content extraction
-
- 📃 **RAG Integration**
-
-  - Supports mentioning files from [RAGFlow](https://github.com/infiniflow/ragflow) within the input box. [Start up RAGFlow server](https://ragflow.io/docs/dev/).
-
-  ```bash
-     # .env
-     RAG_PROVIDER=ragflow
-     RAGFLOW_API_URL="http://localhost:9388"
-     RAGFLOW_API_KEY="ragflow-xxx"
-     RAGFLOW_RETRIEVAL_SIZE=10
-  ```
-
- 🔗 **MCP Seamless Integration**
-  - Expand capabilities for private domain access, knowledge graph, web browsing and more
-  - Facilitates integration of diverse research tools and methodologies
-
-### Human Collaboration
-
- 🧠 **Human-in-the-loop**
-
-  - Supports interactive modification of research plans using natural language
-  - Supports auto-acceptance of research plans
-
- 📝 **Report Post-Editing**
-  - Supports Notion-like block editing
-  - Allows AI refinements, including AI-assisted polishing, sentence shortening, and expansion
-  - Powered by [tiptap](https://tiptap.dev/)
-
-### Content Creation
-
- 🎙️ **Podcast and Presentation Generation**
-  - AI-powered podcast script generation and audio synthesis
-  - Automated creation of simple PowerPoint presentations
-  - Customizable templates for tailored content
-
-## Architecture
-
-DeerFlow implements a modular multi-agent system architecture designed for automated research and code analysis. The system is built on LangGraph, enabling a flexible state-based workflow where components communicate through a well-defined message passing system.
-
-![Architecture Diagram](./assets/architecture.png)
-
-> See it live at [deerflow.tech](https://deerflow.tech/#multi-agent-architecture)
-
-The system employs a streamlined workflow with the following components:
-
-1. **Coordinator**: The entry point that manages the workflow lifecycle
-
-   - Initiates the research process based on user input
-   - Delegates tasks to the planner when appropriate
-   - Acts as the primary interface between the user and the system
-
-2. **Planner**: Strategic component for task decomposition and planning
-
-   - Analyzes research objectives and creates structured execution plans
-   - Determines if enough context is available or if more research is needed
-   - Manages the research flow and decides when to generate the final report
-
-3. **Research Team**: A collection of specialized agents that execute the plan:
-
-   - **Researcher**: Conducts web searches and information gathering using tools like web search engines, crawling and even MCP services.
-   - **Coder**: Handles code analysis, execution, and technical tasks using Python REPL tool.
-     Each agent has access to specific tools optimized for their role and operates within the LangGraph framework
-
-4. **Reporter**: Final stage processor for research outputs
-   - Aggregates findings from the research team
-   - Processes and structures the collected information
-   - Generates comprehensive research reports
-
-## Text-to-Speech Integration
-
-DeerFlow now includes a Text-to-Speech (TTS) feature that allows you to convert research reports to speech. This feature uses the volcengine TTS API to generate high-quality audio from text. Features like speed, volume, and pitch are also customizable.
-
-### Using the TTS API
-
-You can access the TTS functionality through the `/api/tts` endpoint:
-
-```bash
-# Example API call using curl
-curl --location 'http://localhost:8000/api/tts' \
--header 'Content-Type: application/json' \
--data '{
-    "text": "This is a test of the text-to-speech functionality.",
-    "speed_ratio": 1.0,
-    "volume_ratio": 1.0,
-    "pitch_ratio": 1.0
-}' \
--output speech.mp3
-```
-
-## Development
-
-### Testing
-
-Run the test suite:
-
-```bash
-# Run all tests
-make test
-
-# Run specific test file
-pytest tests/integration/test_workflow.py
-
-# Run with coverage
-make coverage
-```
-
-### Code Quality
-
-```bash
-# Run linting
-make lint
-
-# Format code
-make format
-```
-
-### Debugging with LangGraph Studio
-
-DeerFlow uses LangGraph for its workflow architecture. You can use LangGraph Studio to debug and visualize the workflow in real-time.
-
-#### Running LangGraph Studio Locally
-
-DeerFlow includes a `langgraph.json` configuration file that defines the graph structure and dependencies for the LangGraph Studio. This file points to the workflow graphs defined in the project and automatically loads environment variables from the `.env` file.
-
-##### Mac
-
-```bash
-# Install uv package manager if you don't have it
-curl -LsSf https://astral.sh/uv/install.sh | sh
-
-# Install dependencies and start the LangGraph server
-uvx --refresh --from "langgraph-cli[inmem]" --with-editable . --python 3.12 langgraph dev --allow-blocking
-```
-
-##### Windows / Linux
-
-```bash
-# Install dependencies
-pip install -e .
-pip install -U "langgraph-cli[inmem]"
-
-# Start the LangGraph server
-langgraph dev
-```
-
-After starting the LangGraph server, you'll see several URLs in the terminal:
-
- API: http://127.0.0.1:2024
- Studio UI: https://smith.langchain.com/studio/?baseUrl=http://127.0.0.1:2024
- API Docs: http://127.0.0.1:2024/docs
-
-Open the Studio UI link in your browser to access the debugging interface.
-
-#### Using LangGraph Studio
-
-In the Studio UI, you can:
-
-1. Visualize the workflow graph and see how components connect
-2. Trace execution in real-time to see how data flows through the system
-3. Inspect the state at each step of the workflow
-4. Debug issues by examining inputs and outputs of each component
-5. Provide feedback during the planning phase to refine research plans
-
-When you submit a research topic in the Studio UI, you'll be able to see the entire workflow execution, including:
-
- The planning phase where the research plan is created
- The feedback loop where you can modify the plan
- The research and writing phases for each section
- The final report generation
-
-### Enabling LangSmith Tracing
-
-DeerFlow supports LangSmith tracing to help you debug and monitor your workflows. To enable LangSmith tracing:
-
-1. Make sure your `.env` file has the following configurations (see `.env.example`):
+1. **Clone the DeerFlow repository**

   ```bash
-   LANGSMITH_TRACING=true
-   LANGSMITH_ENDPOINT="https://api.smith.langchain.com"
-   LANGSMITH_API_KEY="xxx"
-   LANGSMITH_PROJECT="xxx"
+   git clone https://github.com/bytedance/deer-flow.git
+   cd deer-flow
   ```

-2. Start tracing and visualize the graph locally with LangSmith by running:
-   ```bash
-   langgraph dev
-   ```
+2. **Generate local configuration files**

-This will enable trace visualization in LangGraph Studio and send your traces to LangSmith for monitoring and analysis.
-
-## Docker
-
-You can also run this project with Docker.
-
-First, you need read the [configuration](docs/configuration_guide.md) below. Make sure `.env`, `.conf.yaml` files are ready.
-
-Second, to build a Docker image of your own web server:
-
-```bash
-docker build -t deer-flow-api .
-```
-
-Final, start up a docker container running the web server:
-
-```bash
-# Replace deer-flow-api-app with your preferred container name
-docker run -d -t -p 8000:8000 --env-file .env --name deer-flow-api-app deer-flow-api
-
-# stop the server
-docker stop deer-flow-api-app
-```
-
-### Docker Compose (include both backend and frontend)
-
-DeerFlow provides a docker-compose setup to easily run both the backend and frontend together:
-
-```bash
-# building docker image
-docker compose build
-
-# start the server
-docker compose up
-```
-
-## Examples
-
-The following examples demonstrate the capabilities of DeerFlow:
-
-### Research Reports
-
-1. **OpenAI Sora Report** - Analysis of OpenAI's Sora AI tool
-
-   - Discusses features, access, prompt engineering, limitations, and ethical considerations
-   - [View full report](examples/openai_sora_report.md)
-
-2. **Google's Agent to Agent Protocol Report** - Overview of Google's Agent to Agent (A2A) protocol
-
-   - Discusses its role in AI agent communication and its relationship with Anthropic's Model Context Protocol (MCP)
-   - [View full report](examples/what_is_agent_to_agent_protocol.md)
-
-3. **What is MCP?** - A comprehensive analysis of the term "MCP" across multiple contexts
-
-   - Explores Model Context Protocol in AI, Monocalcium Phosphate in chemistry, and Micro-channel Plate in electronics
-   - [View full report](examples/what_is_mcp.md)
-
-4. **Bitcoin Price Fluctuations** - Analysis of recent Bitcoin price movements
-
-   - Examines market trends, regulatory influences, and technical indicators
-   - Provides recommendations based on historical data
-   - [View full report](examples/bitcoin_price_fluctuation.md)
-
-5. **What is LLM?** - An in-depth exploration of Large Language Models
-
-   - Discusses architecture, training, applications, and ethical considerations
-   - [View full report](examples/what_is_llm.md)
-
-6. **How to Use Claude for Deep Research?** - Best practices and workflows for using Claude in deep research
-
-   - Covers prompt engineering, data analysis, and integration with other tools
-   - [View full report](examples/how_to_use_claude_deep_research.md)
-
-7. **AI Adoption in Healthcare: Influencing Factors** - Analysis of factors driving AI adoption in healthcare
-
-   - Discusses AI technologies, data quality, ethical considerations, economic evaluations, organizational readiness, and digital infrastructure
-   - [View full report](examples/AI_adoption_in_healthcare.md)
-
-8. **Quantum Computing Impact on Cryptography** - Analysis of quantum computing's impact on cryptography
-
-   - Discusses vulnerabilities of classical cryptography, post-quantum cryptography, and quantum-resistant cryptographic solutions
-   - [View full report](examples/Quantum_Computing_Impact_on_Cryptography.md)
-
-9. **Cristiano Ronaldo's Performance Highlights** - Analysis of Cristiano Ronaldo's performance highlights
-   - Discusses his career achievements, international goals, and performance in various matches
-   - [View full report](examples/Cristiano_Ronaldo's_Performance_Highlights.md)
-
-To run these examples or create your own research reports, you can use the following commands:
-
-```bash
-# Run with a specific query
-uv run main.py "What factors are influencing AI adoption in healthcare?"
-
-# Run with custom planning parameters
-uv run main.py --max_plan_iterations 3 "How does quantum computing impact cryptography?"
-
-# Run in interactive mode with built-in questions
-uv run main.py --interactive
-
-# Or run with basic interactive prompt
-uv run main.py
-
-# View all available options
-uv run main.py --help
-```
-
-### Interactive Mode
-
-The application now supports an interactive mode with built-in questions in both English and Chinese:
-
-1. Launch the interactive mode:
+   From the project root directory (`deer-flow/`), run:

   ```bash
-   uv run main.py --interactive
+   make config
   ```

-2. Select your preferred language (English or 中文)
+   This command creates local configuration files based on the provided example templates.

-3. Choose from a list of built-in questions or select the option to ask your own question
+3. **Configure your preferred model(s)**

-4. The system will process your question and generate a comprehensive research report
+   Edit `config.yaml` and define at least one model:

-### Human in the Loop
-
-DeerFlow includes a human in the loop mechanism that allows you to review, edit, and approve research plans before they are executed:
-
-1. **Plan Review**: When human in the loop is enabled, the system will present the generated research plan for your review before execution
-
-2. **Providing Feedback**: You can:
-
-   - Accept the plan by responding with `[ACCEPTED]`
-   - Edit the plan by providing feedback (e.g., `[EDIT PLAN] Add more steps about technical implementation`)
-   - The system will incorporate your feedback and generate a revised plan
-
-3. **Auto-acceptance**: You can enable auto-acceptance to skip the review process:
-
-   - Via API: Set `auto_accepted_plan: true` in your request
-
-4. **API Integration**: When using the API, you can provide feedback through the `feedback` parameter:
-   ```json
-   {
-     "messages": [{ "role": "user", "content": "What is quantum computing?" }],
-     "thread_id": "my_thread_id",
-     "auto_accepted_plan": false,
-     "feedback": "[EDIT PLAN] Include more about quantum algorithms"
-   }
+   ```yaml
+   models:
+     - name: gpt-4                       # Internal identifier
+       display_name: GPT-4               # Human-readable name
+       use: langchain_openai:ChatOpenAI  # LangChain class path
+       model: gpt-4                      # Model identifier for API
+       api_key: $OPENAI_API_KEY          # API key (recommended: use env var)
+       max_tokens: 4096                  # Maximum tokens per request
+       temperature: 0.7                  # Sampling temperature
   ```

-### Command Line Arguments
+  
+4. **Set API keys for your configured model(s)**

-The application supports several command-line arguments to customize its behavior:
+   Choose one of the following methods:

- **query**: The research query to process (can be multiple words)
- **--interactive**: Run in interactive mode with built-in questions
- **--max_plan_iterations**: Maximum number of planning cycles (default: 1)
- **--max_step_num**: Maximum number of steps in a research plan (default: 3)
- **--debug**: Enable detailed debug logging
+- Option A: Edit the `.env` file in the project root (Recommended)

-## FAQ

-Please refer to the [FAQ.md](docs/FAQ.md) for more details.
+   ```bash
+   TAVILY_API_KEY=your-tavily-api-key
+   OPENAI_API_KEY=your-openai-api-key
+   # Add other provider keys as needed
+   ```
+
+- Option B: Export environment variables in your shell
+
+   ```bash
+   export OPENAI_API_KEY=your-openai-api-key
+   ```
+
+- Option C: Edit `config.yaml` directly (Not recommended for production)
+
+   ```yaml
+   models:
+     - name: gpt-4
+       api_key: your-actual-api-key-here  # Replace placeholder
+   ```
+
+### Running the Application
+
+#### Option 1: Docker (Recommended)
+
+The fastest way to get started with a consistent environment:
+
+1. **Initialize and start**:
+   ```bash
+   make docker-init    # Pull sandbox image (Only once or when image updates)
+   make docker-start   # Start services (auto-detects sandbox mode from config.yaml)
+   ```
+
+   `make docker-start` now starts `provisioner` only when `config.yaml` uses provisioner mode (`sandbox.use: src.community.aio_sandbox:AioSandboxProvider` with `provisioner_url`).
+
+2. **Access**: http://localhost:2026
+
+See [CONTRIBUTING.md](CONTRIBUTING.md) for detailed Docker development guide.
+
+#### Option 2: Local Development
+
+If you prefer running services locally:
+
+1. **Check prerequisites**:
+   ```bash
+   make check  # Verifies Node.js 22+, pnpm, uv, nginx
+   ```
+
+2. **Install dependencies**:
+   ```bash
+   make install  # Install backend + frontend dependencies
+   ```
+
+3. **(Optional) Pre-pull sandbox image**:
+   ```bash
+   # Recommended if using Docker/Container-based sandbox
+   make setup-sandbox
+   ```
+
+4. **Start services**:
+   ```bash
+   make dev
+   ```
+
+5. **Access**: http://localhost:2026
+
+### Advanced
+#### Sandbox Mode
+
+DeerFlow supports multiple sandbox execution modes:
+- **Local Execution** (runs sandbox code directly on the host machine)
+- **Docker Execution** (runs sandbox code in isolated Docker containers)
+- **Docker Execution with Kubernetes** (runs sandbox code in Kubernetes pods via provisioner service)
+
+For Docker development, service startup follows `config.yaml` sandbox mode. In Local/Docker modes, `provisioner` is not started.
+
+See the [Sandbox Configuration Guide](backend/docs/CONFIGURATION.md#sandbox) to configure your preferred mode.
+
+#### MCP Server
+
+DeerFlow supports configurable MCP servers and skills to extend its capabilities.
+For HTTP/SSE MCP servers, OAuth token flows are supported (`client_credentials`, `refresh_token`).
+See the [MCP Server Guide](backend/docs/MCP_SERVER.md) for detailed instructions.
+
+## From Deep Research to Super Agent Harness
+
+DeerFlow started as a Deep Research framework — and the community ran with it. Since launch, developers have pushed it far beyond research: building data pipelines, generating slide decks, spinning up dashboards, automating content workflows. Things we never anticipated.
+
+That told us something important: DeerFlow wasn't just a research tool. It was a **harness** — a runtime that gives agents the infrastructure to actually get work done.
+
+So we rebuilt it from scratch.
+
+DeerFlow 2.0 is no longer a framework you wire together. It's a super agent harness — batteries included, fully extensible. Built on LangGraph and LangChain, it ships with everything an agent needs out of the box: a filesystem, memory, skills, sandboxed execution, and the ability to plan and spawn sub-agents for complex, multi-step tasks.
+
+Use it as-is. Or tear it apart and make it yours.
+
+## Core Features
+
+### Skills & Tools
+
+Skills are what make DeerFlow do *almost anything*.
+
+A standard Agent Skill is a structured capability module — a Markdown file that defines a workflow, best practices, and references to supporting resources. DeerFlow ships with built-in skills for research, report generation, slide creation, web pages, image and video generation, and more. But the real power is extensibility: add your own skills, replace the built-in ones, or combine them into compound workflows.
+
+Skills are loaded progressively — only when the task needs them, not all at once. This keeps the context window lean and makes DeerFlow work well even with token-sensitive models.
+
+Tools follow the same philosophy. DeerFlow comes with a core toolset — web search, web fetch, file operations, bash execution — and supports custom tools via MCP servers and Python functions. Swap anything. Add anything.
+
+```
+# Paths inside the sandbox container
+/mnt/skills/public
+├── research/SKILL.md
+├── report-generation/SKILL.md
+├── slide-creation/SKILL.md
+├── web-page/SKILL.md
+└── image-generation/SKILL.md
+
+/mnt/skills/custom
+└── your-custom-skill/SKILL.md      ← yours
+```
+
+### Sub-Agents
+
+Complex tasks rarely fit in a single pass. DeerFlow decomposes them.
+
+The lead agent can spawn sub-agents on the fly — each with its own scoped context, tools, and termination conditions. Sub-agents run in parallel when possible, report back structured results, and the lead agent synthesizes everything into a coherent output.
+
+This is how DeerFlow handles tasks that take minutes to hours: a research task might fan out into a dozen sub-agents, each exploring a different angle, then converge into a single report — or a website — or a slide deck with generated visuals. One harness, many hands.
+
+### Sandbox & File System
+
+DeerFlow doesn't just *talk* about doing things. It has its own computer.
+
+Each task runs inside an isolated Docker container with a full filesystem — skills, workspace, uploads, outputs. The agent reads, writes, and edits files. It executes bash commands and codes. It views images. All sandboxed, all auditable, zero contamination between sessions.
+
+This is the difference between a chatbot with tool access and an agent with an actual execution environment.
+
+```
+# Paths inside the sandbox container
+/mnt/user-data/
+├── uploads/          ← your files
+├── workspace/        ← agents' working directory
+└── outputs/          ← final deliverables
+```
+
+### Context Engineering
+
+**Isolated Sub-Agent Context**: Each sub-agent runs in its own isolated context. This means that the sub-agent will not be able to see the context of the main agent or other sub-agents. This is important to ensure that the sub-agent is able to focus on the task at hand and not be distracted by the context of the main agent or other sub-agents.
+
+**Summarization**: Within a session, DeerFlow manages context aggressively — summarizing completed sub-tasks, offloading intermediate results to the filesystem, compressing what's no longer immediately relevant. This lets it stay sharp across long, multi-step tasks without blowing the context window.
+
+### Long-Term Memory
+
+Most agents forget everything the moment a conversation ends. DeerFlow remembers.
+
+Across sessions, DeerFlow builds a persistent memory of your profile, preferences, and accumulated knowledge. The more you use it, the better it knows you — your writing style, your technical stack, your recurring workflows. Memory is stored locally and stays under your control.
+
+## Recommended Models
+
+DeerFlow is model-agnostic — it works with any LLM that implements the OpenAI-compatible API. That said, it performs best with models that support:
+
+- **Long context windows** (100k+ tokens) for deep research and multi-step tasks
+- **Reasoning capabilities** for adaptive planning and complex decomposition
+- **Multimodal inputs** for image understanding and video comprehension
+- **Strong tool-use** for reliable function calling and structured outputs
+
+## Embedded Python Client
+
+DeerFlow can be used as an embedded Python library without running the full HTTP services. The `DeerFlowClient` provides direct in-process access to all agent and Gateway capabilities, returning the same response schemas as the HTTP Gateway API:
+
+```python
+from src.client import DeerFlowClient
+
+client = DeerFlowClient()
+
+# Chat
+response = client.chat("Analyze this paper for me", thread_id="my-thread")
+
+# Streaming (LangGraph SSE protocol: values, messages-tuple, end)
+for event in client.stream("hello"):
+    if event.type == "messages-tuple" and event.data.get("type") == "ai":
+        print(event.data["content"])
+
+# Configuration & management — returns Gateway-aligned dicts
+models = client.list_models()        # {"models": [...]}
+skills = client.list_skills()        # {"skills": [...]}
+client.update_skill("web-search", enabled=True)
+client.upload_files("thread-1", ["./report.pdf"])  # {"success": True, "files": [...]}
+```
+
+All dict-returning methods are validated against Gateway Pydantic response models in CI (`TestGatewayConformance`), ensuring the embedded client stays in sync with the HTTP API schemas. See `backend/src/client.py` for full API documentation.
+
+## Documentation
+
+- [Contributing Guide](CONTRIBUTING.md) - Development environment setup and workflow
+- [Configuration Guide](backend/docs/CONFIGURATION.md) - Setup and configuration instructions
+- [Architecture Overview](backend/CLAUDE.md) - Technical architecture details
+- [Backend Architecture](backend/README.md) - Backend architecture and API reference
+
+## Contributing
+
+We welcome contributions! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for development setup, workflow, and guidelines.
+
+Regression coverage includes Docker sandbox mode detection and provisioner kubeconfig-path handling tests in `backend/tests/`.

 ## License

@@ -551,8 +306,6 @@ We would like to extend our sincere appreciation to the following projects for t

 - **[LangChain](https://github.com/langchain-ai/langchain)**: Their exceptional framework powers our LLM interactions and chains, enabling seamless integration and functionality.
 - **[LangGraph](https://github.com/langchain-ai/langgraph)**: Their innovative approach to multi-agent orchestration has been instrumental in enabling DeerFlow's sophisticated workflows.
- **[Novel](https://github.com/steven-tey/novel)**: Their Notion-style WYSIWYG editor supports our report editing and AI-assisted rewriting.
- **[RAGFlow](https://github.com/infiniflow/ragflow)**: We have achieved support for research on users' private knowledge bases through integration with RAGFlow.

 These projects exemplify the transformative power of open-source collaboration, and we are proud to build upon their foundations.

@@ -1,495 +0,0 @@
-# 🦌 DeerFlow
-
-[![Python 3.12+](https://img.shields.io/badge/python-3.12+-blue.svg)](https://www.python.org/downloads/)
-[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
-[![DeepWiki](https://img.shields.io/badge/DeepWiki-bytedance%2Fdeer--flow-blue.svg?logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACwAAAAyCAYAAAAnWDnqAAAAAXNSR0IArs4c6QAAA05JREFUaEPtmUtyEzEQhtWTQyQLHNak2AB7ZnyXZMEjXMGeK/AIi+QuHrMnbChYY7MIh8g01fJoopFb0uhhEqqcbWTp06/uv1saEDv4O3n3dV60RfP947Mm9/SQc0ICFQgzfc4CYZoTPAswgSJCCUJUnAAoRHOAUOcATwbmVLWdGoH//PB8mnKqScAhsD0kYP3j/Yt5LPQe2KvcXmGvRHcDnpxfL2zOYJ1mFwrryWTz0advv1Ut4CJgf5uhDuDj5eUcAUoahrdY/56ebRWeraTjMt/00Sh3UDtjgHtQNHwcRGOC98BJEAEymycmYcWwOprTgcB6VZ5JK5TAJ+fXGLBm3FDAmn6oPPjR4rKCAoJCal2eAiQp2x0vxTPB3ALO2CRkwmDy5WohzBDwSEFKRwPbknEggCPB/imwrycgxX2NzoMCHhPkDwqYMr9tRcP5qNrMZHkVnOjRMWwLCcr8ohBVb1OMjxLwGCvjTikrsBOiA6fNyCrm8V1rP93iVPpwaE+gO0SsWmPiXB+jikdf6SizrT5qKasx5j8ABbHpFTx+vFXp9EnYQmLx02h1QTTrl6eDqxLnGjporxl3NL3agEvXdT0WmEost648sQOYAeJS9Q7bfUVoMGnjo4AZdUMQku50McCcMWcBPvr0SzbTAFDfvJqwLzgxwATnCgnp4wDl6Aa+Ax283gghmj+vj7feE2KBBRMW3FzOpLOADl0Isb5587h/U4gGvkt5v60Z1VLG8BhYjbzRwyQZemwAd6cCR5/XFWLYZRIMpX39AR0tjaGGiGzLVyhse5C9RKC6ai42ppWPKiBagOvaYk8lO7DajerabOZP46Lby5wKjw1HCRx7p9sVMOWGzb/vA1hwiWc6jm3MvQDTogQkiqIhJV0nBQBTU+3okKCFDy9WwferkHjtxib7t3xIUQtHxnIwtx4mpg26/HfwVNVDb4oI9RHmx5WGelRVlrtiw43zboCLaxv46AZeB3IlTkwouebTr1y2NjSpHz68WNFjHvupy3q8TFn3Hos2IAk4Ju5dCo8B3wP7VPr/FGaKiG+T+v+TQqIrOqMTL1VdWV1DdmcbO8KXBz6esmYWYKPwDL5b5FA1a0hwapHiom0r/cKaoqr+27/XcrS5UwSMbQAAAABJRU5ErkJggg==)](https://deepwiki.com/bytedance/deer-flow)
-<!-- DeepWiki badge generated by https://deepwiki.ryoppippi.com/ -->
-
-[English](./README.md) | [简体中文](./README_zh.md) | [日本語](./README_ja.md) | [Deutsch](./README_de.md) | [Español](./README_es.md) | [Русский](./README_ru.md) | [Portuguese](./README_pt.md)
-
-> Aus Open Source entstanden, an Open Source zurückgeben.
-
-**DeerFlow** (**D**eep **E**xploration and **E**fficient **R**esearch **Flow**) ist ein Community-getriebenes Framework für tiefgehende Recherche, das auf der großartigen Arbeit der Open-Source-Community aufbaut. Unser Ziel ist es, Sprachmodelle mit spezialisierten Werkzeugen für Aufgaben wie Websuche, Crawling und Python-Code-Ausführung zu kombinieren und gleichzeitig der Community, die dies möglich gemacht hat, etwas zurückzugeben.
-
-Besuchen Sie [unsere offizielle Website](https://deerflow.tech/) für weitere Details.
-
-## Demo
-
-### Video
-
-https://github.com/user-attachments/assets/f3786598-1f2a-4d07-919e-8b99dfa1de3e
-
-In dieser Demo zeigen wir, wie man DeerFlow nutzt, um:
- Nahtlos mit MCP-Diensten zu integrieren
- Den Prozess der tiefgehenden Recherche durchzuführen und einen umfassenden Bericht mit Bildern zu erstellen
- Podcast-Audio basierend auf dem generierten Bericht zu erstellen
-
-### Wiedergaben
-
- [Wie hoch ist der Eiffelturm im Vergleich zum höchsten Gebäude?](https://deerflow.tech/chat?replay=eiffel-tower-vs-tallest-building)
- [Was sind die angesagtesten Repositories auf GitHub?](https://deerflow.tech/chat?replay=github-top-trending-repo)
- [Einen Artikel über traditionelle Gerichte aus Nanjing schreiben](https://deerflow.tech/chat?replay=nanjing-traditional-dishes)
- [Wie dekoriert man eine Mietwohnung?](https://deerflow.tech/chat?replay=rental-apartment-decoration)
- [Besuchen Sie unsere offizielle Website, um weitere Wiedergaben zu entdecken.](https://deerflow.tech/#case-studies)
-
---
-
-
-## 📑 Inhaltsverzeichnis
-
- [🚀 Schnellstart](#schnellstart)
- [🌟 Funktionen](#funktionen)
- [🏗️ Architektur](#architektur)
- [🛠️ Entwicklung](#entwicklung)
- [🗣️ Text-zu-Sprache-Integration](#text-zu-sprache-integration)
- [📚 Beispiele](#beispiele)
- [❓ FAQ](#faq)
- [📜 Lizenz](#lizenz)
- [💖 Danksagungen](#danksagungen)
- [⭐ Star-Verlauf](#star-verlauf)
-
-
-## Schnellstart
-
-DeerFlow ist in Python entwickelt und kommt mit einer in Node.js geschriebenen Web-UI. Um einen reibungslosen Einrichtungsprozess zu gewährleisten, empfehlen wir die Verwendung der folgenden Tools:
-
-### Empfohlene Tools
- **[`uv`](https://docs.astral.sh/uv/getting-started/installation/):**
-  Vereinfacht die Verwaltung von Python-Umgebungen und Abhängigkeiten. `uv` erstellt automatisch eine virtuelle Umgebung im Stammverzeichnis und installiert alle erforderlichen Pakete für Sie—keine manuelle Installation von Python-Umgebungen notwendig.
-
- **[`nvm`](https://github.com/nvm-sh/nvm):**
-  Verwalten Sie mühelos mehrere Versionen der Node.js-Laufzeit.
-
- **[`pnpm`](https://pnpm.io/installation):**
-  Installieren und verwalten Sie Abhängigkeiten des Node.js-Projekts.
-
-### Umgebungsanforderungen
-Stellen Sie sicher, dass Ihr System die folgenden Mindestanforderungen erfüllt:
- **[Python](https://www.python.org/downloads/):** Version `3.12+`
- **[Node.js](https://nodejs.org/en/download/):** Version `22+`
-
-### Installation
-```bash
-# Repository klonen
-git clone https://github.com/bytedance/deer-flow.git
-cd deer-flow
-
-# Abhängigkeiten installieren, uv kümmert sich um den Python-Interpreter und die Erstellung der venv sowie die Installation der erforderlichen Pakete
-uv sync
-
-# Konfigurieren Sie .env mit Ihren API-Schlüsseln
-# Tavily: https://app.tavily.com/home
-# Brave_SEARCH: https://brave.com/search/api/
-# volcengine TTS: Fügen Sie Ihre TTS-Anmeldedaten hinzu, falls vorhanden
-cp .env.example .env
-
-# Siehe die Abschnitte 'Unterstützte Suchmaschinen' und 'Text-zu-Sprache-Integration' unten für alle verfügbaren Optionen
-
-# Konfigurieren Sie conf.yaml für Ihr LLM-Modell und API-Schlüssel
-# Weitere Details finden Sie unter 'docs/configuration_guide.md'
-cp conf.yaml.example conf.yaml
-
-# Installieren Sie marp für PPT-Generierung
-# https://github.com/marp-team/marp-cli?tab=readme-ov-file#use-package-manager
-brew install marp-cli
-```
-
-Optional können Sie Web-UI-Abhängigkeiten über [pnpm](https://pnpm.io/installation) installieren:
-
-```bash
-cd deer-flow/web
-pnpm install
-```
-
-### Konfigurationen
-
-Weitere Informationen finden Sie im [Konfigurationsleitfaden](docs/configuration_guide.md).
-
-> [!HINWEIS]
-> Lesen Sie den Leitfaden sorgfältig, bevor Sie das Projekt starten, und aktualisieren Sie die Konfigurationen entsprechend Ihren spezifischen Einstellungen und Anforderungen.
-
-### Konsolen-UI
-
-Der schnellste Weg, um das Projekt auszuführen, ist die Verwendung der Konsolen-UI.
-
-```bash
-# Führen Sie das Projekt in einer bash-ähnlichen Shell aus
-uv run main.py
-```
-
-### Web-UI
-
-Dieses Projekt enthält auch eine Web-UI, die ein dynamischeres und ansprechenderes interaktives Erlebnis bietet.
-> [!HINWEIS]
-> Sie müssen zuerst die Abhängigkeiten der Web-UI installieren.
-
-```bash
-# Führen Sie sowohl den Backend- als auch den Frontend-Server im Entwicklungsmodus aus
-# Unter macOS/Linux
-./bootstrap.sh -d
-
-# Unter Windows
-bootstrap.bat -d
-```
-
-Öffnen Sie Ihren Browser und besuchen Sie [`http://localhost:3000`](http://localhost:3000), um die Web-UI zu erkunden.
-
-Weitere Details finden Sie im Verzeichnis [`web`](./web/).
-
-
-## Unterstützte Suchmaschinen
-
-DeerFlow unterstützt mehrere Suchmaschinen, die in Ihrer `.env`-Datei über die Variable `SEARCH_API` konfiguriert werden können:
-
- **Tavily** (Standard): Eine spezialisierte Such-API für KI-Anwendungen
-    - Erfordert `TAVILY_API_KEY` in Ihrer `.env`-Datei
-    - Registrieren Sie sich unter: https://app.tavily.com/home
-
- **DuckDuckGo**: Datenschutzorientierte Suchmaschine
-    - Kein API-Schlüssel erforderlich
-
- **Brave Search**: Datenschutzorientierte Suchmaschine mit erweiterten Funktionen
-    - Erfordert `BRAVE_SEARCH_API_KEY` in Ihrer `.env`-Datei
-    - Registrieren Sie sich unter: https://brave.com/search/api/
-
- **Arxiv**: Wissenschaftliche Papiersuche für akademische Forschung
-    - Kein API-Schlüssel erforderlich
-    - Spezialisiert auf wissenschaftliche und akademische Papiere
-
-Um Ihre bevorzugte Suchmaschine zu konfigurieren, setzen Sie die Variable `SEARCH_API` in Ihrer `.env`-Datei:
-
-```bash
-# Wählen Sie eine: tavily, duckduckgo, brave_search, arxiv
-SEARCH_API=tavily
-```
-
-## Funktionen
-
-### Kernfähigkeiten
-
- 🤖 **LLM-Integration**
-    - Unterstützt die Integration der meisten Modelle über [litellm](https://docs.litellm.ai/docs/providers).
-    - Unterstützung für Open-Source-Modelle wie Qwen
-    - OpenAI-kompatible API-Schnittstelle
-    - Mehrstufiges LLM-System für unterschiedliche Aufgabenkomplexitäten
-
-### Tools und MCP-Integrationen
-
- 🔍 **Suche und Abruf**
-    - Websuche über Tavily, Brave Search und mehr
-    - Crawling mit Jina
-    - Fortgeschrittene Inhaltsextraktion
-
- 🔗 **MCP Nahtlose Integration**
-    - Erweiterte Fähigkeiten für privaten Domänenzugriff, Wissensgraphen, Webbrowsing und mehr
-    - Erleichtert die Integration verschiedener Forschungswerkzeuge und -methoden
-
-### Menschliche Zusammenarbeit
-
- 🧠 **Mensch-in-der-Schleife**
-    - Unterstützt interaktive Modifikation von Forschungsplänen mit natürlicher Sprache
-    - Unterstützt automatische Akzeptanz von Forschungsplänen
-
- 📝 **Bericht-Nachbearbeitung**
-    - Unterstützt Notion-ähnliche Blockbearbeitung
-    - Ermöglicht KI-Verfeinerungen, einschließlich KI-unterstützter Polierung, Satzkürzung und -erweiterung
-    - Angetrieben von [tiptap](https://tiptap.dev/)
-
-### Inhaltserstellung
-
- 🎙️ **Podcast- und Präsentationserstellung**
-    - KI-gestützte Podcast-Skripterstellung und Audiosynthese
-    - Automatisierte Erstellung einfacher PowerPoint-Präsentationen
-    - Anpassbare Vorlagen für maßgeschneiderte Inhalte
-
-
-## Architektur
-
-DeerFlow implementiert eine modulare Multi-Agenten-Systemarchitektur, die für automatisierte Forschung und Codeanalyse konzipiert ist. Das System basiert auf LangGraph und ermöglicht einen flexiblen zustandsbasierten Workflow, bei dem Komponenten über ein klar definiertes Nachrichtenübermittlungssystem kommunizieren.
-
-![Architekturdiagramm](./assets/architecture.png)
-> Sehen Sie es live auf [deerflow.tech](https://deerflow.tech/#multi-agent-architecture)
-
-Das System verwendet einen optimierten Workflow mit den folgenden Komponenten:
-
-1. **Koordinator**: Der Einstiegspunkt, der den Workflow-Lebenszyklus verwaltet
-   - Initiiert den Forschungsprozess basierend auf Benutzereingaben
-   - Delegiert Aufgaben bei Bedarf an den Planer
-   - Fungiert als primäre Schnittstelle zwischen dem Benutzer und dem System
-
-2. **Planer**: Strategische Komponente für Aufgabenzerlegung und -planung
-   - Analysiert Forschungsziele und erstellt strukturierte Ausführungspläne
-   - Bestimmt, ob ausreichend Kontext verfügbar ist oder ob weitere Forschung benötigt wird
-   - Verwaltet den Forschungsablauf und entscheidet, wann der endgültige Bericht erstellt wird
-
-3. **Forschungsteam**: Eine Sammlung spezialisierter Agenten, die den Plan ausführen:
-   - **Forscher**: Führt Websuchen und Informationssammlung mit Tools wie Websuchmaschinen, Crawling und sogar MCP-Diensten durch.
-   - **Codierer**: Behandelt Codeanalyse, -ausführung und technische Aufgaben mit dem Python REPL Tool.
-   Jeder Agent hat Zugriff auf spezifische Tools, die für seine Rolle optimiert sind, und operiert innerhalb des LangGraph-Frameworks
-
-4. **Reporter**: Endphasenprozessor für Forschungsergebnisse
-   - Aggregiert Erkenntnisse vom Forschungsteam
-   - Verarbeitet und strukturiert die gesammelten Informationen
-   - Erstellt umfassende Forschungsberichte
-
-## Text-zu-Sprache-Integration
-
-DeerFlow enthält jetzt eine Text-zu-Sprache (TTS)-Funktion, mit der Sie Forschungsberichte in Sprache umwandeln können. Diese Funktion verwendet die volcengine TTS API, um hochwertige Audios aus Text zu generieren. Funktionen wie Geschwindigkeit, Lautstärke und Tonhöhe können ebenfalls angepasst werden.
-
-### Verwendung der TTS API
-
-Sie können auf die TTS-Funktionalität über den Endpunkt `/api/tts` zugreifen:
-
-```bash
-# Beispiel API-Aufruf mit curl
-curl --location 'http://localhost:8000/api/tts' \
--header 'Content-Type: application/json' \
--data '{
-    "text": "Dies ist ein Test der Text-zu-Sprache-Funktionalität.",
-    "speed_ratio": 1.0,
-    "volume_ratio": 1.0,
-    "pitch_ratio": 1.0
-}' \
--output speech.mp3
-```
-
-
-## Entwicklung
-
-### Testen
-
-Führen Sie die Testsuite aus:
-
-```bash
-# Alle Tests ausführen
-make test
-
-# Spezifische Testdatei ausführen
-pytest tests/integration/test_workflow.py
-
-# Mit Abdeckung ausführen
-make coverage
-```
-
-### Codequalität
-
-```bash
-# Lint ausführen
-make lint
-
-# Code formatieren
-make format
-```
-
-### Debugging mit LangGraph Studio
-
-DeerFlow verwendet LangGraph für seine Workflow-Architektur. Sie können LangGraph Studio verwenden, um den Workflow in Echtzeit zu debuggen und zu visualisieren.
-
-#### LangGraph Studio lokal ausführen
-
-DeerFlow enthält eine `langgraph.json`-Konfigurationsdatei, die die Graphstruktur und Abhängigkeiten für das LangGraph Studio definiert. Diese Datei verweist auf die im Projekt definierten Workflow-Graphen und lädt automatisch Umgebungsvariablen aus der `.env`-Datei.
-
-##### Mac
-
-```bash
-# Installieren Sie den uv-Paketmanager, wenn Sie ihn noch nicht haben
-curl -LsSf https://astral.sh/uv/install.sh | sh
-
-# Installieren Sie Abhängigkeiten und starten Sie den LangGraph-Server
-uvx --refresh --from "langgraph-cli[inmem]" --with-editable . --python 3.12 langgraph dev --allow-blocking
-```
-
-##### Windows / Linux
-
-```bash
-# Abhängigkeiten installieren
-pip install -e .
-pip install -U "langgraph-cli[inmem]"
-
-# LangGraph-Server starten
-langgraph dev
-```
-
-Nach dem Start des LangGraph-Servers sehen Sie mehrere URLs im Terminal:
- API: http://127.0.0.1:2024
- Studio UI: https://smith.langchain.com/studio/?baseUrl=http://127.0.0.1:2024
- API-Dokumentation: http://127.0.0.1:2024/docs
-
-Öffnen Sie den Studio UI-Link in Ihrem Browser, um auf die Debugging-Schnittstelle zuzugreifen.
-
-#### Verwendung von LangGraph Studio
-
-In der Studio UI können Sie:
-
-1. Den Workflow-Graphen visualisieren und sehen, wie Komponenten verbunden sind
-2. Die Ausführung in Echtzeit verfolgen, um zu sehen, wie Daten durch das System fließen
-3. Den Zustand in jedem Schritt des Workflows inspizieren
-4. Probleme durch Untersuchung von Ein- und Ausgaben jeder Komponente debuggen
-5. Feedback während der Planungsphase geben, um Forschungspläne zu verfeinern
-
-Wenn Sie ein Forschungsthema in der Studio UI einreichen, können Sie die gesamte Workflow-Ausführung sehen, einschließlich:
- Die Planungsphase, in der der Forschungsplan erstellt wird
- Die Feedback-Schleife, in der Sie den Plan ändern können
- Die Forschungs- und Schreibphasen für jeden Abschnitt
- Die Erstellung des endgültigen Berichts
-
-### Aktivieren von LangSmith-Tracing
-
-DeerFlow unterstützt LangSmith-Tracing, um Ihnen beim Debuggen und Überwachen Ihrer Workflows zu helfen. Um LangSmith-Tracing zu aktivieren:
-
-1. Stellen Sie sicher, dass Ihre `.env`-Datei die folgenden Konfigurationen enthält (siehe `.env.example`):
-   ```bash
-   LANGSMITH_TRACING=true
-   LANGSMITH_ENDPOINT="https://api.smith.langchain.com"
-   LANGSMITH_API_KEY="xxx"
-   LANGSMITH_PROJECT="xxx"
-   ```
-
-2. Starten Sie das Tracing mit LangSmith lokal, indem Sie folgenden Befehl ausführen:
-   ```bash
-   langgraph dev
-   ```
-
-Dies aktiviert die Trace-Visualisierung in LangGraph Studio und sendet Ihre Traces zur Überwachung und Analyse an LangSmith.
-
-## Beispiele
-
-Die folgenden Beispiele demonstrieren die Fähigkeiten von DeerFlow:
-
-### Forschungsberichte
-
-1. **OpenAI Sora Bericht** - Analyse von OpenAIs Sora KI-Tool
-   - Diskutiert Funktionen, Zugang, Prompt-Engineering, Einschränkungen und ethische Überlegungen
-   - [Vollständigen Bericht ansehen](examples/openai_sora_report.md)
-
-2. **Googles Agent-to-Agent-Protokoll Bericht** - Überblick über Googles Agent-to-Agent (A2A)-Protokoll
-   - Diskutiert seine Rolle in der KI-Agentenkommunikation und seine Beziehung zum Model Context Protocol (MCP) von Anthropic
-   - [Vollständigen Bericht ansehen](examples/what_is_agent_to_agent_protocol.md)
-
-3. **Was ist MCP?** - Eine umfassende Analyse des Begriffs "MCP" in mehreren Kontexten
-   - Untersucht Model Context Protocol in KI, Monocalciumphosphat in der Chemie und Micro-channel Plate in der Elektronik
-   - [Vollständigen Bericht ansehen](examples/what_is_mcp.md)
-
-4. **Bitcoin-Preisschwankungen** - Analyse der jüngsten Bitcoin-Preisbewegungen
-   - Untersucht Markttrends, regulatorische Einflüsse und technische Indikatoren
-   - Bietet Empfehlungen basierend auf historischen Daten
-   - [Vollständigen Bericht ansehen](examples/bitcoin_price_fluctuation.md)
-
-5. **Was ist LLM?** - Eine eingehende Erforschung großer Sprachmodelle
-   - Diskutiert Architektur, Training, Anwendungen und ethische Überlegungen
-   - [Vollständigen Bericht ansehen](examples/what_is_llm.md)
-
-6. **Wie nutzt man Claude für tiefgehende Recherche?** - Best Practices und Workflows für die Verwendung von Claude in der tiefgehenden Forschung
-   - Behandelt Prompt-Engineering, Datenanalyse und Integration mit anderen Tools
-   - [Vollständigen Bericht ansehen](examples/how_to_use_claude_deep_research.md)
-
-7. **KI-Adoption im Gesundheitswesen: Einflussfaktoren** - Analyse der Faktoren, die die KI-Adoption im Gesundheitswesen vorantreiben
-   - Diskutiert KI-Technologien, Datenqualität, ethische Überlegungen, wirtschaftliche Bewertungen, organisatorische Bereitschaft und digitale Infrastruktur
-   - [Vollständigen Bericht ansehen](examples/AI_adoption_in_healthcare.md)
-
-8. **Auswirkungen des Quantencomputing auf die Kryptographie** - Analyse der Auswirkungen des Quantencomputing auf die Kryptographie
-   - Diskutiert Schwachstellen der klassischen Kryptographie, Post-Quanten-Kryptographie und quantenresistente kryptographische Lösungen
-   - [Vollständigen Bericht ansehen](examples/Quantum_Computing_Impact_on_Cryptography.md)
-
-9. **Cristiano Ronaldos Leistungshöhepunkte** - Analyse der Leistungshöhepunkte von Cristiano Ronaldo
-   - Diskutiert seine Karriereerfolge, internationalen Tore und Leistungen in verschiedenen Spielen
-   - [Vollständigen Bericht ansehen](examples/Cristiano_Ronaldo's_Performance_Highlights.md)
-
-Um diese Beispiele auszuführen oder Ihre eigenen Forschungsberichte zu erstellen, können Sie die folgenden Befehle verwenden:
-
-```bash
-# Mit einer spezifischen Anfrage ausführen
-uv run main.py "Welche Faktoren beeinflussen die KI-Adoption im Gesundheitswesen?"
-
-# Mit benutzerdefinierten Planungsparametern ausführen
-uv run main.py --max_plan_iterations 3 "Wie wirkt sich Quantencomputing auf die Kryptographie aus?"
-
-# Im interaktiven Modus mit eingebauten Fragen ausführen
-uv run main.py --interactive
-
-# Oder mit grundlegendem interaktiven Prompt ausführen
-uv run main.py
-
-# Alle verfügbaren Optionen anzeigen
-uv run main.py --help
-```
-
-### Interaktiver Modus
-
-Die Anwendung unterstützt jetzt einen interaktiven Modus mit eingebauten Fragen in Englisch und Chinesisch:
-
-1. Starten Sie den interaktiven Modus:
-   ```bash
-   uv run main.py --interactive
-   ```
-
-2. Wählen Sie Ihre bevorzugte Sprache (English oder 中文)
-
-3. Wählen Sie aus einer Liste von eingebauten Fragen oder wählen Sie die Option, Ihre eigene Frage zu stellen
-
-4. Das System wird Ihre Frage verarbeiten und einen umfassenden Forschungsbericht generieren
-
-### Mensch-in-der-Schleife
-
-DeerFlow enthält einen Mensch-in-der-Schleife-Mechanismus, der es Ihnen ermöglicht, Forschungspläne vor ihrer Ausführung zu überprüfen, zu bearbeiten und zu genehmigen:
-
-1. **Planüberprüfung**: Wenn Mensch-in-der-Schleife aktiviert ist, präsentiert das System den generierten Forschungsplan zur Überprüfung vor der Ausführung
-
-2. **Feedback geben**: Sie können:
-   - Den Plan akzeptieren, indem Sie mit `[ACCEPTED]` antworten
-   - Den Plan bearbeiten, indem Sie Feedback geben (z.B., `[EDIT PLAN] Fügen Sie mehr Schritte zur technischen Implementierung hinzu`)
-   - Das System wird Ihr Feedback einarbeiten und einen überarbeiteten Plan generieren
-
-3. **Automatische Akzeptanz**: Sie können die automatische Akzeptanz aktivieren, um den Überprüfungsprozess zu überspringen:
-   - Über API: Setzen Sie `auto_accepted_plan: true` in Ihrer Anfrage
-
-4. **API-Integration**: Bei Verwendung der API können Sie Feedback über den Parameter `feedback` geben:
-   ```json
-   {
-     "messages": [{"role": "user", "content": "Was ist Quantencomputing?"}],
-     "thread_id": "my_thread_id",
-     "auto_accepted_plan": false,
-     "feedback": "[EDIT PLAN] Mehr über Quantenalgorithmen aufnehmen"
-   }
-   ```
-
-### Kommandozeilenargumente
-
-Die Anwendung unterstützt mehrere Kommandozeilenargumente, um ihr Verhalten anzupassen:
-
- **query**: Die zu verarbeitende Forschungsanfrage (kann mehrere Wörter umfassen)
- **--interactive**: Im interaktiven Modus mit eingebauten Fragen ausführen
- **--max_plan_iterations**: Maximale Anzahl von Planungszyklen (Standard: 1)
- **--max_step_num**: Maximale Anzahl von Schritten in einem Forschungsplan (Standard: 3)
- **--debug**: Detaillierte Debug-Protokollierung aktivieren
-
-## FAQ
-
-Weitere Informationen finden Sie in der [FAQ.md](docs/FAQ.md).
-
-## Lizenz
-
-Dieses Projekt ist Open Source und unter der [MIT-Lizenz](./LICENSE) verfügbar.
-
-## Danksagungen
-
-DeerFlow baut auf der unglaublichen Arbeit der Open-Source-Community auf. Wir sind allen Projekten und Mitwirkenden zutiefst dankbar, deren Bemühungen DeerFlow möglich gemacht haben. Wahrhaftig stehen wir auf den Schultern von Riesen.
-
-Wir möchten unsere aufrichtige Wertschätzung den folgenden Projekten für ihre unschätzbaren Beiträge aussprechen:
-
- **[LangChain](https://github.com/langchain-ai/langchain)**: Ihr außergewöhnliches Framework unterstützt unsere LLM-Interaktionen und -Ketten und ermöglicht nahtlose Integration und Funktionalität.
- **[LangGraph](https://github.com/langchain-ai/langgraph)**: Ihr innovativer Ansatz zur Multi-Agenten-Orchestrierung war maßgeblich für die Ermöglichung der ausgeklügelten Workflows von DeerFlow.
-
-Diese Projekte veranschaulichen die transformative Kraft der Open-Source-Zusammenarbeit, und wir sind stolz darauf, auf ihren Grundlagen aufzubauen.
-
-### Hauptmitwirkende
-Ein herzliches Dankeschön geht an die Hauptautoren von `DeerFlow`, deren Vision, Leidenschaft und Engagement dieses Projekt zum Leben erweckt haben:
-
- **[Daniel Walnut](https://github.com/hetaoBackend/)**
- **[Henry Li](https://github.com/magiccube/)**
-
-Ihr unerschütterliches Engagement und Fachwissen waren die treibende Kraft hinter dem Erfolg von DeerFlow. Wir fühlen uns geehrt, Sie an der Spitze dieser Reise zu haben.
-
-## Star-Verlauf
-
-[![Star History Chart](https://api.star-history.com/svg?repos=bytedance/deer-flow&type=Date)](https://star-history.com/#bytedance/deer-flow&Date) 
@@ -1,554 +0,0 @@
-# 🦌 DeerFlow
-
-[![Python 3.12+](https://img.shields.io/badge/python-3.12+-blue.svg)](https://www.python.org/downloads/)
-[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
-[![DeepWiki](https://img.shields.io/badge/DeepWiki-bytedance%2Fdeer--flow-blue.svg?logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACwAAAAyCAYAAAAnWDnqAAAAAXNSR0IArs4c6QAAA05JREFUaEPtmUtyEzEQhtWTQyQLHNak2AB7ZnyXZMEjXMGeK/AIi+QuHrMnbChYY7MIh8g01fJoopFb0uhhEqqcbWTp06/uv1saEDv4O3n3dV60RfP947Mm9/SQc0ICFQgzfc4CYZoTPAswgSJCCUJUnAAoRHOAUOcATwbmVLWdGoH//PB8mnKqScAhsD0kYP3j/Yt5LPQe2KvcXmGvRHcDnpxfL2zOYJ1mFwrryWTz0advv1Ut4CJgf5uhDuDj5eUcAUoahrdY/56ebRWeraTjMt/00Sh3UDtjgHtQNHwcRGOC98BJEAEymycmYcWwOprTgcB6VZ5JK5TAJ+fXGLBm3FDAmn6oPPjR4rKCAoJCal2eAiQp2x0vxTPB3ALO2CRkwmDy5WohzBDwSEFKRwPbknEggCPB/imwrycgxX2NzoMCHhPkDwqYMr9tRcP5qNrMZHkVnOjRMWwLCcr8ohBVb1OMjxLwGCvjTikrsBOiA6fNyCrm8V1rP93iVPpwaE+gO0SsWmPiXB+jikdf6SizrT5qKasx5j8ABbHpFTx+vFXp9EnYQmLx02h1QTTrl6eDqxLnGjporxl3NL3agEvXdT0WmEost648sQOYAeJS9Q7bfUVoMGnjo4AZdUMQku50McCcMWcBPvr0SzbTAFDfvJqwLzgxwATnCgnp4wDl6Aa+Ax283gghmj+vj7feE2KBBRMW3FzOpLOADl0Isb5587h/U4gGvkt5v60Z1VLG8BhYjbzRwyQZemwAd6cCR5/XFWLYZRIMpX39AR0tjaGGiGzLVyhse5C9RKC6ai42ppWPKiBagOvaYk8lO7DajerabOZP46Lby5wKjw1HCRx7p9sVMOWGzb/vA1hwiWc6jm3MvQDTogQkiqIhJV0nBQBTU+3okKCFDy9WwferkHjtxib7t3xIUQtHxnIwtx4mpg26/HfwVNVDb4oI9RHmx5WGelRVlrtiw43zboCLaxv46AZeB3IlTkwouebTr1y2NjSpHz68WNFjHvupy3q8TFn3Hos2IAk4Ju5dCo8B3wP7VPr/FGaKiG+T+v+TQqIrOqMTL1VdWV1DdmcbO8KXBz6esmYWYKPwDL5b5FA1a0hwapHiom0r/cKaoqr+27/XcrS5UwSMbQAAAABJRU5ErkJggg==)](https://deepwiki.com/bytedance/deer-flow)
-<!-- DeepWiki badge generated by https://deepwiki.ryoppippi.com/ -->
-
-[English](./README.md) | [简体中文](./README_zh.md) | [日本語](./README_ja.md) | [Deutsch](./README_de.md) | [Español](./README_es.md) | [Русский](./README_ru.md)  | [Portuguese](./README_pt.md)
-
-> Originado del código abierto, retribuido al código abierto.
-
-**DeerFlow** (**D**eep **E**xploration and **E**fficient **R**esearch **Flow**) es un marco de Investigación Profunda impulsado por la comunidad que se basa en el increíble trabajo de la comunidad de código abierto. Nuestro objetivo es combinar modelos de lenguaje con herramientas especializadas para tareas como búsqueda web, rastreo y ejecución de código Python, mientras devolvemos a la comunidad que hizo esto posible.
-
-Por favor, visita [nuestra página web oficial](https://deerflow.tech/) para más detalles.
-
-## Demostración
-
-### Video
-
-https://github.com/user-attachments/assets/f3786598-1f2a-4d07-919e-8b99dfa1de3e
-
-En esta demostración, mostramos cómo usar DeerFlow para:
-
- Integrar perfectamente con servicios MCP
- Realizar el proceso de Investigación Profunda y producir un informe completo con imágenes
- Crear audio de podcast basado en el informe generado
-
-### Repeticiones
-
- [¿Qué altura tiene la Torre Eiffel comparada con el edificio más alto?](https://deerflow.tech/chat?replay=eiffel-tower-vs-tallest-building)
- [¿Cuáles son los repositorios más populares en GitHub?](https://deerflow.tech/chat?replay=github-top-trending-repo)
- [Escribir un artículo sobre los platos tradicionales de Nanjing](https://deerflow.tech/chat?replay=nanjing-traditional-dishes)
- [¿Cómo decorar un apartamento de alquiler?](https://deerflow.tech/chat?replay=rental-apartment-decoration)
- [Visita nuestra página web oficial para explorar más repeticiones.](https://deerflow.tech/#case-studies)
-
---
-
-## 📑 Tabla de Contenidos
-
- [🚀 Inicio Rápido](#inicio-rápido)
- [🌟 Características](#características)
- [🏗️ Arquitectura](#arquitectura)
- [🛠️ Desarrollo](#desarrollo)
- [🐳 Docker](#docker)
- [🗣️ Integración de Texto a Voz](#integración-de-texto-a-voz)
- [📚 Ejemplos](#ejemplos)
- [❓ Preguntas Frecuentes](#preguntas-frecuentes)
- [📜 Licencia](#licencia)
- [💖 Agradecimientos](#agradecimientos)
- [⭐ Historial de Estrellas](#historial-de-estrellas)
-
-## Inicio Rápido
-
-DeerFlow está desarrollado en Python y viene con una interfaz web escrita en Node.js. Para garantizar un proceso de configuración sin problemas, recomendamos utilizar las siguientes herramientas:
-
-### Herramientas Recomendadas
-
- **[`uv`](https://docs.astral.sh/uv/getting-started/installation/):**
-  Simplifica la gestión del entorno Python y las dependencias. `uv` crea automáticamente un entorno virtual en el directorio raíz e instala todos los paquetes necesarios por ti—sin necesidad de instalar entornos Python manualmente.
-
- **[`nvm`](https://github.com/nvm-sh/nvm):**
-  Gestiona múltiples versiones del entorno de ejecución Node.js sin esfuerzo.
-
- **[`pnpm`](https://pnpm.io/installation):**
-  Instala y gestiona dependencias del proyecto Node.js.
-
-### Requisitos del Entorno
-
-Asegúrate de que tu sistema cumple con los siguientes requisitos mínimos:
-
- **[Python](https://www.python.org/downloads/):** Versión `3.12+`
- **[Node.js](https://nodejs.org/en/download/):** Versión `22+`
-
-### Instalación
-
-```bash
-# Clonar el repositorio
-git clone https://github.com/bytedance/deer-flow.git
-cd deer-flow
-
-# Instalar dependencias, uv se encargará del intérprete de python, la creación del entorno virtual y la instalación de los paquetes necesarios
-uv sync
-
-# Configurar .env con tus claves API
-# Tavily: https://app.tavily.com/home
-# Brave_SEARCH: https://brave.com/search/api/
-# volcengine TTS: Añade tus credenciales TTS si las tienes
-cp .env.example .env
-
-# Ver las secciones 'Motores de Búsqueda Compatibles' e 'Integración de Texto a Voz' a continuación para todas las opciones disponibles
-
-# Configurar conf.yaml para tu modelo LLM y claves API
-# Por favor, consulta 'docs/configuration_guide.md' para más detalles
-cp conf.yaml.example conf.yaml
-
-# Instalar marp para la generación de presentaciones
-# https://github.com/marp-team/marp-cli?tab=readme-ov-file#use-package-manager
-brew install marp-cli
-```
-
-Opcionalmente, instala las dependencias de la interfaz web vía [pnpm](https://pnpm.io/installation):
-
-```bash
-cd deer-flow/web
-pnpm install
-```
-
-### Configuraciones
-
-Por favor, consulta la [Guía de Configuración](docs/configuration_guide.md) para más detalles.
-
-> [!NOTA]
-> Antes de iniciar el proyecto, lee la guía cuidadosamente y actualiza las configuraciones para que coincidan con tus ajustes y requisitos específicos.
-
-### Interfaz de Consola
-
-La forma más rápida de ejecutar el proyecto es utilizar la interfaz de consola.
-
-```bash
-# Ejecutar el proyecto en un shell tipo bash
-uv run main.py
-```
-
-### Interfaz Web
-
-Este proyecto también incluye una Interfaz Web, que ofrece una experiencia interactiva más dinámica y atractiva.
-
-> [!NOTA]
-> Necesitas instalar primero las dependencias de la interfaz web.
-
-```bash
-# Ejecutar tanto el servidor backend como el frontend en modo desarrollo
-# En macOS/Linux
-./bootstrap.sh -d
-
-# En Windows
-bootstrap.bat -d
-```
-
-Abre tu navegador y visita [`http://localhost:3000`](http://localhost:3000) para explorar la interfaz web.
-
-Explora más detalles en el directorio [`web`](./web/).
-
-## Motores de Búsqueda Compatibles
-
-DeerFlow soporta múltiples motores de búsqueda que pueden configurarse en tu archivo `.env` usando la variable `SEARCH_API`:
-
- **Tavily** (predeterminado): Una API de búsqueda especializada para aplicaciones de IA
-
-  - Requiere `TAVILY_API_KEY` en tu archivo `.env`
-  - Regístrate en: https://app.tavily.com/home
-
- **DuckDuckGo**: Motor de búsqueda centrado en la privacidad
-
-  - No requiere clave API
-
- **Brave Search**: Motor de búsqueda centrado en la privacidad con características avanzadas
-
-  - Requiere `BRAVE_SEARCH_API_KEY` en tu archivo `.env`
-  - Regístrate en: https://brave.com/search/api/
-
- **Arxiv**: Búsqueda de artículos científicos para investigación académica
-  - No requiere clave API
-  - Especializado en artículos científicos y académicos
-
-Para configurar tu motor de búsqueda preferido, establece la variable `SEARCH_API` en tu archivo `.env`:
-
-```bash
-# Elige uno: tavily, duckduckgo, brave_search, arxiv
-SEARCH_API=tavily
-```
-
-## Características
-
-### Capacidades Principales
-
- 🤖 **Integración de LLM**
-  - Soporta la integración de la mayoría de los modelos a través de [litellm](https://docs.litellm.ai/docs/providers).
-  - Soporte para modelos de código abierto como Qwen
-  - Interfaz API compatible con OpenAI
-  - Sistema LLM de múltiples niveles para diferentes complejidades de tareas
-
-### Herramientas e Integraciones MCP
-
- 🔍 **Búsqueda y Recuperación**
-
-  - Búsqueda web a través de Tavily, Brave Search y más
-  - Rastreo con Jina
-  - Extracción avanzada de contenido
-
- 🔗 **Integración Perfecta con MCP**
-  - Amplía capacidades para acceso a dominio privado, gráfico de conocimiento, navegación web y más
-  - Facilita la integración de diversas herramientas y metodologías de investigación
-
-### Colaboración Humana
-
- 🧠 **Humano en el Bucle**
-
-  - Soporta modificación interactiva de planes de investigación usando lenguaje natural
-  - Soporta aceptación automática de planes de investigación
-
- 📝 **Post-Edición de Informes**
-  - Soporta edición de bloques tipo Notion
-  - Permite refinamientos por IA, incluyendo pulido asistido por IA, acortamiento y expansión de oraciones
-  - Impulsado por [tiptap](https://tiptap.dev/)
-
-### Creación de Contenido
-
- 🎙️ **Generación de Podcasts y Presentaciones**
-  - Generación de guiones de podcast y síntesis de audio impulsadas por IA
-  - Creación automatizada de presentaciones PowerPoint simples
-  - Plantillas personalizables para contenido a medida
-
-## Arquitectura
-
-DeerFlow implementa una arquitectura modular de sistema multi-agente diseñada para investigación automatizada y análisis de código. El sistema está construido sobre LangGraph, permitiendo un flujo de trabajo flexible basado en estados donde los componentes se comunican a través de un sistema de paso de mensajes bien definido.
-
-![Diagrama de Arquitectura](./assets/architecture.png)
-
-> Vélo en vivo en [deerflow.tech](https://deerflow.tech/#multi-agent-architecture)
-
-El sistema emplea un flujo de trabajo racionalizado con los siguientes componentes:
-
-1. **Coordinador**: El punto de entrada que gestiona el ciclo de vida del flujo de trabajo
-
-   - Inicia el proceso de investigación basado en la entrada del usuario
-   - Delega tareas al planificador cuando corresponde
-   - Actúa como la interfaz principal entre el usuario y el sistema
-
-2. **Planificador**: Componente estratégico para descomposición y planificación de tareas
-
-   - Analiza objetivos de investigación y crea planes de ejecución estructurados
-   - Determina si hay suficiente contexto disponible o si se necesita más investigación
-   - Gestiona el flujo de investigación y decide cuándo generar el informe final
-
-3. **Equipo de Investigación**: Una colección de agentes especializados que ejecutan el plan:
-
-   - **Investigador**: Realiza búsquedas web y recopilación de información utilizando herramientas como motores de búsqueda web, rastreo e incluso servicios MCP.
-   - **Programador**: Maneja análisis de código, ejecución y tareas técnicas utilizando la herramienta Python REPL.
-     Cada agente tiene acceso a herramientas específicas optimizadas para su rol y opera dentro del marco LangGraph
-
-4. **Reportero**: Procesador de etapa final para los resultados de la investigación
-   - Agrega hallazgos del equipo de investigación
-   - Procesa y estructura la información recopilada
-   - Genera informes de investigación completos
-
-## Integración de Texto a Voz
-
-DeerFlow ahora incluye una función de Texto a Voz (TTS) que te permite convertir informes de investigación a voz. Esta función utiliza la API TTS de volcengine para generar audio de alta calidad a partir de texto. Características como velocidad, volumen y tono también son personalizables.
-
-### Usando la API TTS
-
-Puedes acceder a la funcionalidad TTS a través del punto final `/api/tts`:
-
-```bash
-# Ejemplo de llamada API usando curl
-curl --location 'http://localhost:8000/api/tts' \
--header 'Content-Type: application/json' \
--data '{
-    "text": "Esto es una prueba de la funcionalidad de texto a voz.",
-    "speed_ratio": 1.0,
-    "volume_ratio": 1.0,
-    "pitch_ratio": 1.0
-}' \
--output speech.mp3
-```
-
-## Desarrollo
-
-### Pruebas
-
-Ejecuta el conjunto de pruebas:
-
-```bash
-# Ejecutar todas las pruebas
-make test
-
-# Ejecutar archivo de prueba específico
-pytest tests/integration/test_workflow.py
-
-# Ejecutar con cobertura
-make coverage
-```
-
-### Calidad del Código
-
-```bash
-# Ejecutar linting
-make lint
-
-# Formatear código
-make format
-```
-
-### Depuración con LangGraph Studio
-
-DeerFlow utiliza LangGraph para su arquitectura de flujo de trabajo. Puedes usar LangGraph Studio para depurar y visualizar el flujo de trabajo en tiempo real.
-
-#### Ejecutando LangGraph Studio Localmente
-
-DeerFlow incluye un archivo de configuración `langgraph.json` que define la estructura del grafo y las dependencias para LangGraph Studio. Este archivo apunta a los grafos de flujo de trabajo definidos en el proyecto y carga automáticamente variables de entorno desde el archivo `.env`.
-
-##### Mac
-
-```bash
-# Instala el gestor de paquetes uv si no lo tienes
-curl -LsSf https://astral.sh/uv/install.sh | sh
-
-# Instala dependencias e inicia el servidor LangGraph
-uvx --refresh --from "langgraph-cli[inmem]" --with-editable . --python 3.12 langgraph dev --allow-blocking
-```
-
-##### Windows / Linux
-
-```bash
-# Instalar dependencias
-pip install -e .
-pip install -U "langgraph-cli[inmem]"
-
-# Iniciar el servidor LangGraph
-langgraph dev
-```
-
-Después de iniciar el servidor LangGraph, verás varias URLs en la terminal:
-
- API: http://127.0.0.1:2024
- UI de Studio: https://smith.langchain.com/studio/?baseUrl=http://127.0.0.1:2024
- Docs de API: http://127.0.0.1:2024/docs
-
-Abre el enlace de UI de Studio en tu navegador para acceder a la interfaz de depuración.
-
-#### Usando LangGraph Studio
-
-En la UI de Studio, puedes:
-
-1. Visualizar el grafo de flujo de trabajo y ver cómo se conectan los componentes
-2. Rastrear la ejecución en tiempo real para ver cómo fluyen los datos a través del sistema
-3. Inspeccionar el estado en cada paso del flujo de trabajo
-4. Depurar problemas examinando entradas y salidas de cada componente
-5. Proporcionar retroalimentación durante la fase de planificación para refinar planes de investigación
-
-Cuando envías un tema de investigación en la UI de Studio, podrás ver toda la ejecución del flujo de trabajo, incluyendo:
-
- La fase de planificación donde se crea el plan de investigación
- El bucle de retroalimentación donde puedes modificar el plan
- Las fases de investigación y escritura para cada sección
- La generación del informe final
-
-### Habilitando el Rastreo de LangSmith
-
-DeerFlow soporta el rastreo de LangSmith para ayudarte a depurar y monitorear tus flujos de trabajo. Para habilitar el rastreo de LangSmith:
-
-1. Asegúrate de que tu archivo `.env` tenga las siguientes configuraciones (ver `.env.example`):
-   ```bash
-   LANGSMITH_TRACING=true
-   LANGSMITH_ENDPOINT="https://api.smith.langchain.com"
-   LANGSMITH_API_KEY="xxx"
-   LANGSMITH_PROJECT="xxx"
-   ```
-
-2. Inicia el rastreo y visualiza el grafo localmente con LangSmith ejecutando:
-   ```bash
-   langgraph dev
-   ```
-
-Esto habilitará la visualización de rastros en LangGraph Studio y enviará tus rastros a LangSmith para monitoreo y análisis.
-
-## Docker
-
-También puedes ejecutar este proyecto con Docker.
-
-Primero, necesitas leer la [configuración](docs/configuration_guide.md) a continuación. Asegúrate de que los archivos `.env` y `.conf.yaml` estén listos.
-
-Segundo, para construir una imagen Docker de tu propio servidor web:
-
-```bash
-docker build -t deer-flow-api .
-```
-
-Finalmente, inicia un contenedor Docker que ejecute el servidor web:
-
-```bash
-# Reemplaza deer-flow-api-app con tu nombre de contenedor preferido
-docker run -d -t -p 8000:8000 --env-file .env --name deer-flow-api-app deer-flow-api
-
-# detener el servidor
-docker stop deer-flow-api-app
-```
-
-### Docker Compose (incluye tanto backend como frontend)
-
-DeerFlow proporciona una configuración docker-compose para ejecutar fácilmente tanto el backend como el frontend juntos:
-
-```bash
-# construir imagen docker
-docker compose build
-
-# iniciar el servidor
-docker compose up
-```
-
-## Ejemplos
-
-Los siguientes ejemplos demuestran las capacidades de DeerFlow:
-
-### Informes de Investigación
-
-1. **Informe sobre OpenAI Sora** - Análisis de la herramienta IA Sora de OpenAI
-
-   - Discute características, acceso, ingeniería de prompts, limitaciones y consideraciones éticas
-   - [Ver informe completo](examples/openai_sora_report.md)
-
-2. **Informe sobre el Protocolo Agent to Agent de Google** - Visión general del protocolo Agent to Agent (A2A) de Google
-
-   - Discute su papel en la comunicación de agentes IA y su relación con el Model Context Protocol (MCP) de Anthropic
-   - [Ver informe completo](examples/what_is_agent_to_agent_protocol.md)
-
-3. **¿Qué es MCP?** - Un análisis completo del término "MCP" en múltiples contextos
-
-   - Explora Model Context Protocol en IA, Fosfato Monocálcico en química y Placa de Microcanales en electrónica
-   - [Ver informe completo](examples/what_is_mcp.md)
-
-4. **Fluctuaciones del Precio de Bitcoin** - Análisis de los movimientos recientes del precio de Bitcoin
-
-   - Examina tendencias del mercado, influencias regulatorias e indicadores técnicos
-   - Proporciona recomendaciones basadas en datos históricos
-   - [Ver informe completo](examples/bitcoin_price_fluctuation.md)
-
-5. **¿Qué es LLM?** - Una exploración en profundidad de los Modelos de Lenguaje Grandes
-
-   - Discute arquitectura, entrenamiento, aplicaciones y consideraciones éticas
-   - [Ver informe completo](examples/what_is_llm.md)
-
-6. **¿Cómo usar Claude para Investigación Profunda?** - Mejores prácticas y flujos de trabajo para usar Claude en investigación profunda
-
-   - Cubre ingeniería de prompts, análisis de datos e integración con otras herramientas
-   - [Ver informe completo](examples/how_to_use_claude_deep_research.md)
-
-7. **Adopción de IA en Salud: Factores de Influencia** - Análisis de factores que impulsan la adopción de IA en salud
-
-   - Discute tecnologías IA, calidad de datos, consideraciones éticas, evaluaciones económicas, preparación organizativa e infraestructura digital
-   - [Ver informe completo](examples/AI_adoption_in_healthcare.md)
-
-8. **Impacto de la Computación Cuántica en la Criptografía** - Análisis del impacto de la computación cuántica en la criptografía
-
-   - Discute vulnerabilidades de la criptografía clásica, criptografía post-cuántica y soluciones criptográficas resistentes a la cuántica
-   - [Ver informe completo](examples/Quantum_Computing_Impact_on_Cryptography.md)
-
-9. **Aspectos Destacados del Rendimiento de Cristiano Ronaldo** - Análisis de los aspectos destacados del rendimiento de Cristiano Ronaldo
-   - Discute sus logros profesionales, goles internacionales y rendimiento en varios partidos
-   - [Ver informe completo](examples/Cristiano_Ronaldo's_Performance_Highlights.md)
-
-Para ejecutar estos ejemplos o crear tus propios informes de investigación, puedes usar los siguientes comandos:
-
-```bash
-# Ejecutar con una consulta específica
-uv run main.py "¿Qué factores están influyendo en la adopción de IA en salud?"
-
-# Ejecutar con parámetros de planificación personalizados
-uv run main.py --max_plan_iterations 3 "¿Cómo impacta la computación cuántica en la criptografía?"
-
-# Ejecutar en modo interactivo con preguntas integradas
-uv run main.py --interactive
-
-# O ejecutar con prompt interactivo básico
-uv run main.py
-
-# Ver todas las opciones disponibles
-uv run main.py --help
-```
-
-### Modo Interactivo
-
-La aplicación ahora soporta un modo interactivo con preguntas integradas tanto en inglés como en chino:
-
-1. Lanza el modo interactivo:
-
-   ```bash
-   uv run main.py --interactive
-   ```
-
-2. Selecciona tu idioma preferido (English o 中文)
-
-3. Elige de una lista de preguntas integradas o selecciona la opción para hacer tu propia pregunta
-
-4. El sistema procesará tu pregunta y generará un informe de investigación completo
-
-### Humano en el Bucle
-
-DeerFlow incluye un mecanismo de humano en el bucle que te permite revisar, editar y aprobar planes de investigación antes de que sean ejecutados:
-
-1. **Revisión del Plan**: Cuando el humano en el bucle está habilitado, el sistema presentará el plan de investigación generado para tu revisión antes de la ejecución
-
-2. **Proporcionando Retroalimentación**: Puedes:
-
-   - Aceptar el plan respondiendo con `[ACCEPTED]`
-   - Editar el plan proporcionando retroalimentación (p.ej., `[EDIT PLAN] Añadir más pasos sobre implementación técnica`)
-   - El sistema incorporará tu retroalimentación y generará un plan revisado
-
-3. **Auto-aceptación**: Puedes habilitar la auto-aceptación para omitir el proceso de revisión:
-
-   - Vía API: Establece `auto_accepted_plan: true` en tu solicitud
-
-4. **Integración API**: Cuando uses la API, puedes proporcionar retroalimentación a través del parámetro `feedback`:
-   ```json
-   {
-     "messages": [{ "role": "user", "content": "¿Qué es la computación cuántica?" }],
-     "thread_id": "my_thread_id",
-     "auto_accepted_plan": false,
-     "feedback": "[EDIT PLAN] Incluir más sobre algoritmos cuánticos"
-   }
-   ```
-
-### Argumentos de Línea de Comandos
-
-La aplicación soporta varios argumentos de línea de comandos para personalizar su comportamiento:
-
- **query**: La consulta de investigación a procesar (puede ser múltiples palabras)
- **--interactive**: Ejecutar en modo interactivo con preguntas integradas
- **--max_plan_iterations**: Número máximo de ciclos de planificación (predeterminado: 1)
- **--max_step_num**: Número máximo de pasos en un plan de investigación (predeterminado: 3)
- **--debug**: Habilitar registro detallado de depuración
-
-## Preguntas Frecuentes
-
-Por favor, consulta [FAQ.md](docs/FAQ.md) para más detalles.
-
-## Licencia
-
-Este proyecto es de código abierto y está disponible bajo la [Licencia MIT](./LICENSE).
-
-## Agradecimientos
-
-DeerFlow está construido sobre el increíble trabajo de la comunidad de código abierto. Estamos profundamente agradecidos a todos los proyectos y contribuyentes cuyos esfuerzos han hecho posible DeerFlow. Verdaderamente, nos apoyamos en hombros de gigantes.
-
-Nos gustaría extender nuestro sincero agradecimiento a los siguientes proyectos por sus invaluables contribuciones:
-
- **[LangChain](https://github.com/langchain-ai/langchain)**: Su excepcional marco impulsa nuestras interacciones y cadenas LLM, permitiendo integración y funcionalidad sin problemas.
- **[LangGraph](https://github.com/langchain-ai/langgraph)**: Su enfoque innovador para la orquestación multi-agente ha sido instrumental en permitir los sofisticados flujos de trabajo de DeerFlow.
-
-Estos proyectos ejemplifican el poder transformador de la colaboración de código abierto, y estamos orgullosos de construir sobre sus cimientos.
-
-### Contribuyentes Clave
-
-Un sentido agradecimiento va para los autores principales de `DeerFlow`, cuya visión, pasión y dedicación han dado vida a este proyecto:
-
- **[Daniel Walnut](https://github.com/hetaoBackend/)**
- **[Henry Li](https://github.com/magiccube/)**
-
-Su compromiso inquebrantable y experiencia han sido la fuerza impulsora detrás del éxito de DeerFlow. Nos sentimos honrados de tenerlos al timón de este viaje.
-
-## Historial de Estrellas
-
-[![Gráfico de Historial de Estrellas](https://api.star-history.com/svg?repos=bytedance/deer-flow&type=Date)](https://star-history.com/#bytedance/deer-flow&Date) 
@@ -1,553 +0,0 @@
-# 🦌 DeerFlow
-
-[![Python 3.12+](https://img.shields.io/badge/python-3.12+-blue.svg)](https://www.python.org/downloads/)
-[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
-
-[English](./README.md) | [简体中文](./README_zh.md) | [日本語](./README_ja.md) | [Deutsch](./README_de.md) | [Español](./README_es.md) | [Русский](./README_ru.md) | [Portuguese](./README_pt.md)
-
-> オープンソースから生まれ、オープンソースに還元する。
-
-**DeerFlow**（**D**eep **E**xploration and **E**fficient **R**esearch **Flow**）は、オープンソースコミュニティの素晴らしい成果の上に構築されたコミュニティ主導の深層研究フレームワークです。私たちの目標は、言語モデルとウェブ検索、クローリング、Python コード実行などの専門ツールを組み合わせながら、これを可能にしたコミュニティに貢献することです。
-
-詳細については[DeerFlow の公式ウェブサイト](https://deerflow.tech/)をご覧ください。
-
-## デモ
-
-### ビデオ
-
-https://github.com/user-attachments/assets/f3786598-1f2a-4d07-919e-8b99dfa1de3e
-
-このデモでは、DeerFlow の使用方法を紹介しています：
-
- MCP サービスとのシームレスな統合
- 深層研究プロセスの実施と画像を含む包括的なレポートの作成
- 生成されたレポートに基づくポッドキャストオーディオの作成
-
-### リプレイ例
-
- [エッフェル塔は世界一高いビルと比べてどれくらい高い？](https://deerflow.tech/chat?replay=eiffel-tower-vs-tallest-building)
- [GitHub で最も人気のあるリポジトリは？](https://deerflow.tech/chat?replay=github-top-trending-repo)
- [南京の伝統料理に関する記事を書く](https://deerflow.tech/chat?replay=nanjing-traditional-dishes)
- [賃貸アパートの装飾方法は？](https://deerflow.tech/chat?replay=rental-apartment-decoration)
- [公式ウェブサイトでより多くのリプレイ例をご覧ください。](https://deerflow.tech/#case-studies)
-
---
-
-## 📑 目次
-
- [🚀 クイックスタート](#クイックスタート)
- [🌟 特徴](#特徴)
- [🏗️ アーキテクチャ](#アーキテクチャ)
- [🛠️ 開発](#開発)
- [🗣️ テキスト読み上げ統合](#テキスト読み上げ統合)
- [📚 例](#例)
- [❓ よくある質問](#よくある質問)
- [📜 ライセンス](#ライセンス)
- [💖 謝辞](#謝辞)
- [⭐ スター履歴](#スター履歴)
-
-## クイックスタート
-
-DeerFlow は Python で開発され、Node.js で書かれた Web UI が付属しています。スムーズなセットアッププロセスを確保するために、以下のツールの使用をお勧めします：
-
-### 推奨ツール
-
- **[`uv`](https://docs.astral.sh/uv/getting-started/installation/):**
-  Python 環境と依存関係の管理を簡素化します。`uv`はルートディレクトリに自動的に仮想環境を作成し、必要なパッケージをすべてインストールします—Python 環境を手動でインストールする必要はありません。
-
- **[`nvm`](https://github.com/nvm-sh/nvm):**
-  複数の Node.js ランタイムバージョンを簡単に管理します。
-
- **[`pnpm`](https://pnpm.io/installation):**
-  Node.js プロジェクトの依存関係をインストールおよび管理します。
-
-### 環境要件
-
-システムが以下の最小要件を満たしていることを確認してください：
-
- **[Python](https://www.python.org/downloads/):** バージョン `3.12+`
- **[Node.js](https://nodejs.org/en/download/):** バージョン `22+`
-
-### インストール
-
-```bash
-# リポジトリをクローン
-git clone https://github.com/bytedance/deer-flow.git
-cd deer-flow
-
-# 依存関係をインストール、uvがPythonインタープリタと仮想環境の作成、必要なパッケージのインストールを担当
-uv sync
-
-# APIキーで.envを設定
-# Tavily: https://app.tavily.com/home
-# Brave_SEARCH: https://brave.com/search/api/
-# 火山引擎TTS: TTSの資格情報がある場合は追加
-cp .env.example .env
-
-# 下記の「サポートされている検索エンジン」と「テキスト読み上げ統合」セクションですべての利用可能なオプションを確認
-
-# LLMモデルとAPIキーのconf.yamlを設定
-# 詳細は「docs/configuration_guide.md」を参照
-cp conf.yaml.example conf.yaml
-
-# PPT生成用にmarpをインストール
-# https://github.com/marp-team/marp-cli?tab=readme-ov-file#use-package-manager
-brew install marp-cli
-```
-
-オプションで、[pnpm](https://pnpm.io/installation)を使用して Web UI 依存関係をインストール：
-
-```bash
-cd deer-flow/web
-pnpm install
-```
-
-### 設定
-
-詳細については[設定ガイド](docs/configuration_guide.md)を参照してください。
-
-> [!注意]
-> プロジェクトを開始する前に、ガイドを注意深く読み、特定の設定と要件に合わせて構成を更新してください。
-
-### コンソール UI
-
-プロジェクトを実行する最も迅速な方法は、コンソール UI を使用することです。
-
-```bash
-# bashライクなシェルでプロジェクトを実行
-uv run main.py
-```
-
-### Web UI
-
-このプロジェクトには Web UI も含まれており、より動的で魅力的なインタラクティブ体験を提供します。
-
-> [!注意]
-> 先に Web UI の依存関係をインストールする必要があります。
-
-```bash
-# 開発モードでバックエンドとフロントエンドサーバーの両方を実行
-# macOS/Linuxの場合
-./bootstrap.sh -d
-
-# Windowsの場合
-bootstrap.bat -d
-```
-
-ブラウザを開き、[`http://localhost:3000`](http://localhost:3000)にアクセスして Web UI を探索してください。
-
-[`web`](./web/)ディレクトリで詳細を確認できます。
-
-## サポートされている検索エンジン
-
-DeerFlow は複数の検索エンジンをサポートしており、`.env`ファイルの`SEARCH_API`変数で設定できます：
-
- **Tavily**（デフォルト）：AI アプリケーション向けの専門検索 API
-
-  - `.env`ファイルに`TAVILY_API_KEY`が必要
-  - 登録先：https://app.tavily.com/home
-
- **DuckDuckGo**：プライバシー重視の検索エンジン
-
-  - API キー不要
-
- **Brave Search**：高度な機能を備えたプライバシー重視の検索エンジン
-
-  - `.env`ファイルに`BRAVE_SEARCH_API_KEY`が必要
-  - 登録先：https://brave.com/search/api/
-
- **Arxiv**：学術研究用の科学論文検索
-  - API キー不要
-  - 科学・学術論文専用
-
-お好みの検索エンジンを設定するには、`.env`ファイルで`SEARCH_API`変数を設定します：
-
-```bash
-# 選択肢: tavily, duckduckgo, brave_search, arxiv
-SEARCH_API=tavily
-```
-
-## 特徴
-
-### コア機能
-
- 🤖 **LLM 統合**
-  - [litellm](https://docs.litellm.ai/docs/providers)を通じてほとんどのモデルの統合をサポート
-  - Qwen などのオープンソースモデルをサポート
-  - OpenAI 互換の API インターフェース
-  - 異なるタスクの複雑さに対応するマルチティア LLM システム
-
-### ツールと MCP 統合
-
- 🔍 **検索と取得**
-
-  - Tavily、Brave Search などを通じた Web 検索
-  - Jina を使用したクローリング
-  - 高度なコンテンツ抽出
-
- 🔗 **MCP シームレス統合**
-  - プライベートドメインアクセス、ナレッジグラフ、Web ブラウジングなどの機能を拡張
-  - 多様な研究ツールと方法論の統合を促進
-
-### 人間との協力
-
- 🧠 **人間参加型ループ**
-
-  - 自然言語を使用した研究計画の対話的修正をサポート
-  - 研究計画の自動承認をサポート
-
- 📝 **レポート後編集**
-  - Notion ライクなブロック編集をサポート
-  - AI 支援による洗練、文の短縮、拡張などの AI 改良を可能に
-  - [tiptap](https://tiptap.dev/)を活用
-
-### コンテンツ作成
-
- 🎙️ **ポッドキャストとプレゼンテーション生成**
-  - AI 駆動のポッドキャストスクリプト生成と音声合成
-  - シンプルな PowerPoint プレゼンテーションの自動作成
-  - カスタマイズ可能なテンプレートで個別のコンテンツに対応
-
-## アーキテクチャ
-
-DeerFlow は、自動研究とコード分析のためのモジュラーなマルチエージェントシステムアーキテクチャを実装しています。システムは LangGraph 上に構築され、コンポーネントが明確に定義されたメッセージパッシングシステムを通じて通信する柔軟な状態ベースのワークフローを実現しています。
-
-![アーキテクチャ図](./assets/architecture.png)
-
-> [deerflow.tech](https://deerflow.tech/#multi-agent-architecture)でライブで確認できます
-
-システムは以下のコンポーネントを含む合理化されたワークフローを採用しています：
-
-1. **コーディネーター**：ワークフローのライフサイクルを管理するエントリーポイント
-
-   - ユーザー入力に基づいて研究プロセスを開始
-   - 適切なタイミングでプランナーにタスクを委託
-   - ユーザーとシステム間の主要なインターフェースとして機能
-
-2. **プランナー**：タスク分解と計画のための戦略的コンポーネント
-
-   - 研究目標を分析し、構造化された実行計画を作成
-   - 十分なコンテキストが利用可能か、さらなる研究が必要かを判断
-   - 研究フローを管理し、最終レポート生成のタイミングを決定
-
-3. **研究チーム**：計画を実行する専門エージェントの集合：
-
-   - **研究者**：Web 検索エンジン、クローリング、さらには MCP サービスなどのツールを使用して Web 検索と情報収集を行う。
-   - **コーダー**：Python REPL ツールを使用してコード分析、実行、技術的タスクを処理する。
-     各エージェントは自分の役割に最適化された特定のツールにアクセスでき、LangGraph フレームワーク内で動作する
-
-4. **レポーター**：研究出力の最終段階プロセッサ
-   - 研究チームの調査結果を集約
-   - 収集した情報を処理および構造化
-   - 包括的な研究レポートを生成
-
-## 開発
-
-### テスト
-
-テストスイートの実行：
-
-```bash
-# すべてのテストを実行
-make test
-
-# 特定のテストファイルを実行
-pytest tests/integration/test_workflow.py
-
-# カバレッジテストを実行
-make coverage
-```
-
-### コード品質
-
-```bash
-# コードチェックを実行
-make lint
-
-# コードをフォーマット
-make format
-```
-
-### LangGraph Studio によるデバッグ
-
-DeerFlow はワークフローアーキテクチャとして LangGraph を使用しています。LangGraph Studio を使用してワークフローをリアルタイムでデバッグおよび可視化できます。
-
-#### ローカルで LangGraph Studio を実行
-
-DeerFlow には`langgraph.json`設定ファイルが含まれており、これが LangGraph Studio のグラフ構造と依存関係を定義しています。このファイルはプロジェクトで定義されたワークフローグラフを指し、`.env`ファイルから環境変数を自動的に読み込みます。
-
-##### Mac
-
-```bash
-# uvパッケージマネージャがない場合はインストール
-curl -LsSf https://astral.sh/uv/install.sh | sh
-
-# 依存関係をインストールしLangGraphサーバーを開始
-uvx --refresh --from "langgraph-cli[inmem]" --with-editable . --python 3.12 langgraph dev --allow-blocking
-```
-
-##### Windows / Linux
-
-```bash
-# 依存関係をインストール
-pip install -e .
-pip install -U "langgraph-cli[inmem]"
-
-# LangGraphサーバーを開始
-langgraph dev
-```
-
-LangGraph サーバーを開始すると、端末にいくつかの URL が表示されます：
-
- API: http://127.0.0.1:2024
- Studio UI: https://smith.langchain.com/studio/?baseUrl=http://127.0.0.1:2024
- API ドキュメント: http://127.0.0.1:2024/docs
-
-ブラウザで Studio UI リンクを開いてデバッグインターフェースにアクセスします。
-
-#### LangGraph Studio の使用
-
-Studio UI では、次のことができます：
-
-1. ワークフローグラフを可視化し、コンポーネントの接続方法を確認
-2. 実行をリアルタイムで追跡し、データがシステム内をどのように流れるかを理解
-3. ワークフローの各ステップの状態を検査
-4. 各コンポーネントの入力と出力を検査して問題をデバッグ
-5. 計画段階でフィードバックを提供して研究計画を洗練
-
-Studio UI で研究トピックを送信すると、次を含む全ワークフロー実行プロセスを見ることができます：
-
- 研究計画を作成する計画段階
- 計画を修正できるフィードバックループ
- 各セクションの研究と執筆段階
- 最終レポート生成
-
-### LangSmith トレースの有効化
-
-DeerFlow は LangSmith トレース機能をサポートしており、ワークフローのデバッグとモニタリングに役立ちます。LangSmith トレースを有効にするには：
-
-1. `.env` ファイルに次の設定があることを確認してください（`.env.example` を参照）：
-   ```bash
-   LANGSMITH_TRACING=true
-   LANGSMITH_ENDPOINT="https://api.smith.langchain.com"
-   LANGSMITH_API_KEY="xxx"
-   LANGSMITH_PROJECT="xxx"
-   ```
-
-2. 次のコマンドを実行して LangSmith トレースを開始します：
-   ```bash
-   langgraph dev
-   ```
-
-これにより、LangGraph Studio でトレース可視化が有効になり、トレースがモニタリングと分析のために LangSmith に送信されます。
-
-## Docker
-
-このプロジェクトは Docker でも実行できます。
-
-まず、以下の[設定](#設定)セクションを読んでください。`.env`と`.conf.yaml`ファイルが準備できていることを確認してください。
-
-次に、独自の Web サーバーの Docker イメージをビルドします：
-
-```bash
-docker build -t deer-flow-api .
-```
-
-最後に、Web サーバーを実行する Docker コンテナを起動します：
-
-```bash
-# deer-flow-api-appを希望のコンテナ名に置き換えてください
-docker run -d -t -p 8000:8000 --env-file .env --name deer-flow-api-app deer-flow-api
-
-# サーバーを停止
-docker stop deer-flow-api-app
-```
-
-### Docker Compose
-
-このプロジェクトは docker compose でも設定できます：
-
-```bash
-# dockerイメージをビルド
-docker compose build
-
-# サーバーを起動
-docker compose up
-```
-
-## テキスト読み上げ統合
-
-DeerFlow には現在、研究レポートを音声に変換できるテキスト読み上げ（TTS）機能が含まれています。この機能は火山引擎 TTS API を使用して高品質なテキストオーディオを生成します。速度、音量、ピッチなどの特性もカスタマイズ可能です。
-
-### TTS API の使用
-
-`/api/tts`エンドポイントから TTS 機能にアクセスできます：
-
-```bash
-# curlを使用したAPI呼び出し例
-curl --location 'http://localhost:8000/api/tts' \
--header 'Content-Type: application/json' \
--data '{
-    "text": "これはテキスト読み上げ機能のテストです。",
-    "speed_ratio": 1.0,
-    "volume_ratio": 1.0,
-    "pitch_ratio": 1.0
-}' \
--output speech.mp3
-```
-
-## 例
-
-以下の例は DeerFlow の機能を示しています：
-
-### 研究レポート
-
-1. **OpenAI Sora レポート** - OpenAI の Sora AI ツールの分析
-
-   - 機能、アクセス方法、プロンプトエンジニアリング、制限、倫理的考慮について議論
-   - [完全なレポートを見る](examples/openai_sora_report.md)
-
-2. **Google の Agent to Agent プロトコルレポート** - Google の Agent to Agent（A2A）プロトコルの概要
-
-   - AI エージェント通信における役割と、Anthropic の Model Context Protocol（MCP）との関係について議論
-   - [完全なレポートを見る](examples/what_is_agent_to_agent_protocol.md)
-
-3. **MCP とは何か？** - 複数のコンテキストにおける「MCP」という用語の包括的分析
-
-   - AI における Model Context Protocol、化学における Monocalcium Phosphate、電子工学における Micro-channel Plate を探る
-   - [完全なレポートを見る](examples/what_is_mcp.md)
-
-4. **ビットコイン価格変動** - 最近のビットコイン価格動向の分析
-
-   - 市場動向、規制の影響、テクニカル指標の調査
-   - 歴史的データに基づく提言
-   - [完全なレポートを見る](examples/bitcoin_price_fluctuation.md)
-
-5. **LLM とは何か？** - 大規模言語モデルの詳細な探求
-
-   - アーキテクチャ、トレーニング、応用、倫理的考慮について議論
-   - [完全なレポートを見る](examples/what_is_llm.md)
-
-6. **Claude を使った深層研究の方法は？** - 深層研究での Claude の使用に関するベストプラクティスとワークフロー
-
-   - プロンプトエンジニアリング、データ分析、他のツールとの統合
-   - [完全なレポートを見る](examples/how_to_use_claude_deep_research.md)
-
-7. **医療における AI 採用：影響要因** - 医療における AI 採用に影響する要因の分析
-
-   - AI テクノロジー、データ品質、倫理的考慮、経済的評価、組織の準備状況、デジタルインフラについて議論
-   - [完全なレポートを見る](examples/AI_adoption_in_healthcare.md)
-
-8. **量子コンピューティングの暗号学への影響** - 量子コンピューティングの暗号学への影響の分析
-
-   - 古典的暗号の脆弱性、ポスト量子暗号学、耐量子暗号ソリューションについて議論
-   - [完全なレポートを見る](examples/Quantum_Computing_Impact_on_Cryptography.md)
-
-9. **クリスティアーノ・ロナウドのパフォーマンスハイライト** - クリスティアーノ・ロナウドのパフォーマンスハイライトの分析
-   - 彼のキャリア達成、国際ゴール、さまざまな大会でのパフォーマンスについて議論
-   - [完全なレポートを見る](examples/Cristiano_Ronaldo's_Performance_Highlights.md)
-
-これらの例を実行したり、独自の研究レポートを作成したりするには、次のコマンドを使用できます：
-
-```bash
-# 特定のクエリで実行
-uv run main.py "医療におけるAI採用に影響する要因は何か？"
-
-# カスタム計画パラメータで実行
-uv run main.py --max_plan_iterations 3 "量子コンピューティングは暗号学にどのように影響するか？"
-
-# 組み込み質問を使用したインタラクティブモードで実行
-uv run main.py --interactive
-
-# または基本的なインタラクティブプロンプトで実行
-uv run main.py
-
-# 利用可能なすべてのオプションを表示
-uv run main.py --help
-```
-
-### インタラクティブモード
-
-アプリケーションは現在、英語と中国語の組み込み質問を使用したインタラクティブモードをサポートしています：
-
-1. インタラクティブモードを開始：
-
-   ```bash
-   uv run main.py --interactive
-   ```
-
-2. 好みの言語（English または Chinese）を選択
-
-3. 組み込み質問リストから選択するか、独自の質問を提示するオプションを選択
-
-4. システムが質問を処理し、包括的な研究レポートを生成
-
-### 人間参加型ループ
-
-DeerFlow には人間参加型ループメカニズムが含まれており、研究計画を実行する前にレビュー、編集、承認することができます：
-
-1. **計画レビュー**：人間参加型ループが有効な場合、システムは実行前に生成された研究計画を表示
-
-2. **フィードバック提供**：次のことができます：
-
-   - `[ACCEPTED]`と返信して計画を承認
-   - フィードバックを提供して計画を編集（例：`[EDIT PLAN] 技術実装に関するステップをさらに追加する`）
-   - システムはフィードバックを統合し、修正された計画を生成
-
-3. **自動承認**：レビュープロセスをスキップするために自動承認を有効にできます：
-
-   - API 経由：リクエストで`auto_accepted_plan: true`を設定
-
-4. **API 統合**：API を使用する場合、`feedback`パラメータでフィードバックを提供できます：
-   ```json
-   {
-     "messages": [
-       { "role": "user", "content": "量子コンピューティングとは何ですか？" }
-     ],
-     "thread_id": "my_thread_id",
-     "auto_accepted_plan": false,
-     "feedback": "[EDIT PLAN] 量子アルゴリズムについてもっと含める"
-   }
-   ```
-
-### コマンドライン引数
-
-アプリケーションは動作をカスタマイズするための複数のコマンドライン引数をサポートしています：
-
- **query**：処理する研究クエリ（複数の単語でも可）
- **--interactive**：組み込み質問を使用したインタラクティブモードで実行
- **--max_plan_iterations**：最大計画サイクル数（デフォルト：1）
- **--max_step_num**：研究計画の最大ステップ数（デフォルト：3）
- **--debug**：詳細なデバッグログを有効化
-
-## よくある質問
-
-詳細については[FAQ.md](docs/FAQ.md)を参照してください。
-
-## ライセンス
-
-このプロジェクトはオープンソースであり、[MIT ライセンス](./LICENSE)に従っています。
-
-## 謝辞
-
-DeerFlow はオープンソースコミュニティの素晴らしい成果の上に構築されています。DeerFlow を可能にしたすべてのプロジェクトと貢献者に深く感謝します。私たちは確かに巨人の肩の上に立っています。
-
-以下のプロジェクトに心からの感謝を表します：
-
- **[LangChain](https://github.com/langchain-ai/langchain)**：彼らの優れたフレームワークは、シームレスな統合と機能性を実現する LLM 相互作用とチェーンに力を与えています。
- **[LangGraph](https://github.com/langchain-ai/langgraph)**：マルチエージェントオーケストレーションへの革新的アプローチは、DeerFlow の複雑なワークフローの実現に不可欠でした。
-
-これらのプロジェクトはオープンソースコラボレーションの変革力を示しており、その基盤の上に構築できることを誇りに思います。
-
-### 主要貢献者
-
-`DeerFlow`の主要な作者に心から感謝します。彼らのビジョン、情熱、献身がこのプロジェクトを実現しました：
-
- **[Daniel Walnut](https://github.com/hetaoBackend/)**
- **[Henry Li](https://github.com/magiccube/)**
-
-あなたの揺るぎない取り組みと専門知識が DeerFlow の成功を推進しています。この旅をリードしていただき光栄です。
-
-## スター履歴
-
-[![Star History Chart](https://api.star-history.com/svg?repos=bytedance/deer-flow&type=Date)](https://star-history.com/#bytedance/deer-flow&Date)
@@ -1,545 +0,0 @@
-# 🦌 DeerFlow
-
-[![Python 3.12+](https://img.shields.io/badge/python-3.12+-blue.svg)](https://www.python.org/downloads/)
-[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
-[![DeepWiki](https://img.shields.io/badge/DeepWiki-bytedance%2Fdeer--flow-blue.svg?logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACwAAAAyCAYAAAAnWDnqAAAAAXNSR0IArs4c6QAAA05JREFUaEPtmUtyEzEQhtWTQyQLHNak2AB7ZnyXZMEjXMGeK/AIi+QuHrMnbChYY7MIh8g01fJoopFb0uhhEqqcbWTp06/uv1saEDv4O3n3dV60RfP947Mm9/SQc0ICFQgzfc4CYZoTPAswgSJCCUJUnAAoRHOAUOcATwbmVLWdGoH//PB8mnKqScAhsD0kYP3j/Yt5LPQe2KvcXmGvRHcDnpxfL2zOYJ1mFwrryWTz0advv1Ut4CJgf5uhDuDj5eUcAUoahrdY/56ebRWeraTjMt/00Sh3UDtjgHtQNHwcRGOC98BJEAEymycmYcWwOprTgcB6VZ5JK5TAJ+fXGLBm3FDAmn6oPPjR4rKCAoJCal2eAiQp2x0vxTPB3ALO2CRkwmDy5WohzBDwSEFKRwPbknEggCPB/imwrycgxX2NzoMCHhPkDwqYMr9tRcP5qNrMZHkVnOjRMWwLCcr8ohBVb1OMjxLwGCvjTikrsBOiA6fNyCrm8V1rP93iVPpwaE+gO0SsWmPiXB+jikdf6SizrT5qKasx5j8ABbHpFTx+vFXp9EnYQmLx02h1QTTrl6eDqxLnGjporxl3NL3agEvXdT0WmEost648sQOYAeJS9Q7bfUVoMGnjo4AZdUMQku50McDcMWcBPvr0SzbTAFDfvJqwLzgxwATnCgnp4wDl6Aa+Ax283gghmj+vj7feE2KBBRMW3FzOpLOADl0Isb5587h/U4gGvkt5v60Z1VLG8BhYjbzRwyQZemwAd6cCR5/XFWLYZRIMpX39AR0tjaGGiGzLVyhse5C9RKC6ai42ppWPKiBagOvaYk8lO7DajerabOZP46Lby5wKjw1HCRx7p9sVMOWGzb/vA1hwiWc6jm3MvQDTogQkiqIhJV0nBQBTU+3okKCFDy9WwferkHjtxib7t3xIUQtHxnIwtx4mpg26/HfwVNVDb4oI9RHmx5WGelRVlrtiw43zboCLaxv46AZeB3IlTkwouebTr1y2NjSpHz68WNFjHvupy3q8TFn3Hos2IAk4Ju5dCo8B3wP7VPr/FGaKiG+T+v+TQqIrOqMTL1VdWV1DdmcbO8KXBz6esmYWYKPwDL5b5FA1a0hwapHiom0r/cKaoqr+27/XcrS5UwSMbQAAAABJRU5ErkJggg==)](https://deepwiki.com/bytedance/deer-flow)
-
-<!-- DeepWiki badge generated by https://deepwiki.ryoppippi.com/ -->
-
-[English](./README.md) | [简体中文](./README_zh.md) | [日本語](./README_ja.md) | [Deutsch](./README_de.md) | [Español](./README_es.md) | [Русский](./README_ru.md) | [Portuguese](./README_pt.md)
-
-> Originado do Open Source, de volta ao Open Source
-
-**DeerFlow** (**D**eep **E**xploration and **E**fficient **R**esearch **Flow**) é um framework de Pesquisa Profunda orientado-a-comunidade que baseia-se em um íncrivel trabalho da comunidade open source. Nosso objetivo é combinar modelos de linguagem com ferramentas especializadas para tarefas como busca na web, crawling, e execução de código Python, enquanto retribui com a comunidade que o tornou possível.
-
-Por favor, visite [Nosso Site Oficial](https://deerflow.tech/) para maiores detalhes.
-
-## Demo
-
-### Video
-
-https://github.com/user-attachments/assets/f3786598-1f2a-4d07-919e-8b99dfa1de3e
-
-Nesse demo, nós demonstramos como usar o DeerFlow para:
-In this demo, we showcase how to use DeerFlow to:
-
- Integração fácil com serviços MCP
- Conduzir o processo de Pesquisa Profunda e produzir um relatório abrangente com imagens
- Criar um áudio podcast baseado no relatório gerado
-
-### Replays
-
- [Quão alta é a Torre Eiffel comparada ao prédio mais alto?](https://deerflow.tech/chat?replay=eiffel-tower-vs-tallest-building)
- [Quais são os top repositórios tendência no GitHub?](https://deerflow.tech/chat?replay=github-top-trending-repo)
- [Escreva um artigo sobre os pratos tradicionais de Nanjing's](https://deerflow.tech/chat?replay=nanjing-traditional-dishes)
- [Como decorar um apartamento alugado?](https://deerflow.tech/chat?replay=rental-apartment-decoration)
- [Visite nosso site oficial para explorar mais replays.](https://deerflow.tech/#case-studies)
-
---
-
-## 📑 Tabela de Conteúdos
-
- [🚀 Início Rápido](#Início-Rápido)
- [🌟 Funcionalidades](#funcionalidades)
- [🏗️ Arquitetura](#arquitetura)
- [🛠️ Desenvolvimento](#desenvolvimento)
- [🐳 Docker](#docker)
- [🗣️ Texto-para-fala Integração](#texto-para-fala-integração)
- [📚 Exemplos](#exemplos)
- [❓ FAQ](#faq)
- [📜 Licença](#licença)
- [💖 Agradecimentos](#agradecimentos)
- [🏆 Contribuidores-Chave](#contribuidores-chave)
- [⭐ Histórico de Estrelas](#Histórico-Estrelas)
-
-## Início-Rápido
-
-DeerFlow é desenvolvido em Python, e vem com uma IU web escrita em Node.js. Para garantir um processo de configuração fácil, nós recomendamos o uso das seguintes ferramentas:
-
-### Ferramentas Recomendadas
-
- **[`uv`](https://docs.astral.sh/uv/getting-started/installation/):**
-  Simplifica o gerenciamento de dependência de ambientes Python. `uv` automaticamente cria um ambiente virtual no diretório raiz e instala todos os pacotes necessários para não haver a necessidade de instalar ambientes Python manualmente
-
- **[`nvm`](https://github.com/nvm-sh/nvm):**
-  Gerencia múltiplas versões do ambiente de execução do Node.js sem esforço.
-
- **[`pnpm`](https://pnpm.io/installation):**
-  Instala e gerencia dependências do projeto Node.js.
-
-### Requisitos de Ambiente
-
-Certifique-se de que seu sistema atenda os seguintes requisitos mínimos:
-
- **[Python](https://www.python.org/downloads/):** Versão `3.12+`
- **[Node.js](https://nodejs.org/en/download/):** Versão `22+`
-
-### Instalação
-
-```bash
-# Clone o repositório
-git clone https://github.com/bytedance/deer-flow.git
-cd deer-flow
-
-# Instale as dependências, uv irá lidar com o interpretador do python e a criação do venv, e instalar os pacotes necessários
-uv sync
-
-# Configure .env com suas chaves de API
-# Tavily: https://app.tavily.com/home
-# Brave_SEARCH: https://brave.com/search/api/
-# volcengine TTS: Adicione sua credencial TTS caso você a possua
-cp .env.example .env
-
-# Veja as seções abaixo 'Supported Search Engines' and 'Texto-para-Fala Integração' para todas as opções disponíveis
-
-# Configure o conf.yaml para o seu modelo LLM e chaves API
-# Por favor, consulte 'docs/configuration_guide.md' para maiores detalhes
-cp conf.yaml.example conf.yaml
-
-# Instale marp para geração de ppt
-# https://github.com/marp-team/marp-cli?tab=readme-ov-file#use-package-manager
-brew install marp-cli
-```
-
-Opcionalmente, instale as dependências IU web via [pnpm](https://pnpm.io/installation):
-
-```bash
-cd deer-flow/web
-pnpm install
-```
-
-### Configurações
-
-Por favor, consulte o [Guia de Configuração](docs/configuration_guide.md) para maiores detalhes.
-
-> [!NOTA]
-> Antes de iniciar o projeto, leia o guia detalhadamente, e atualize as configurações para baterem com os seus requisitos e configurações específicas.
-
-### Console IU
-
-A maneira mais rápida de rodar o projeto é usar o console IU.
-
-```bash
-# Execute o projeto em um shell tipo-bash
-uv run main.py
-```
-
-### Web IU
-
-Esse projeto também inclui uma IU Web, trazendo uma experiência mais interativa, dinâmica e engajadora.
-
-> [!NOTA]
-> Você precisa instalar as dependências do IU web primeiro.
-
-```bash
-# Execute ambos os servidores de backend e frontend em modo desenvolvimento
-# No macOS/Linux
-./bootstrap.sh -d
-
-# No Windows
-bootstrap.bat -d
-```
-
-Abra seu navegador e visite [`http://localhost:3000`](http://localhost:3000) para explorar a IU web.
-
-Explore mais detalhes no diretório [`web`](./web/) .
-
-## Mecanismos de Busca Suportados
-
-
-DeerFlow suporta múltiplos mecanismos de busca que podem ser configurados no seu arquivo `.env` usando a variável `SEARCH_API`:
-
- **Tavily** (padrão): Uma API de busca especializada para aplicações de IA
-
-  - Requer `TAVILY_API_KEY` no seu arquivo `.env`
-  - Inscreva-se em: https://app.tavily.com/home
-
- **DuckDuckGo**: Mecanismo de busca focado em privacidade
-
-  - Não requer chave API
-
- **Brave Search**: Mecanismo de busca focado em privacidade com funcionalidades avançadas
-
-  - Requer `BRAVE_SEARCH_API_KEY` no seu arquivo `.env`
-  - Inscreva-se em: https://brave.com/search/api/
-
- **Arxiv**: Busca de artigos científicos para pesquisa acadêmica
-  - Não requer chave API
-  - Especializado em artigos científicos e acadêmicos
-
-Para configurar o seu mecanismo preferido, defina a variável `SEARCH_API` no seu arquivo:
-
-```bash
-# Escolha uma: tavily, duckduckgo, brave_search, arxiv
-SEARCH_API=tavily
-```
-
-## Funcionalidades
-
-### Principais Funcionalidades
-
- 🤖 **Integração LLM**
-
-  - Suporta a integração da maioria dos modelos através de [litellm](https://docs.litellm.ai/docs/providers).
-  - Suporte a modelos open source como Qwen
-  - Interface API compatível com a OpenAI
-  - Sistema LLM multicamadas para diferentes complexidades de tarefa
-
-### Ferramentas e Integrações MCP
-
- 🔍 **Busca e Recuperação**
-
-  - Busca web com Tavily, Brave Search e mais
-  - Crawling com Jina
-  - Extração de Conteúdo avançada
-
- 🔗 **Integração MCP perfeita**
-
-  - Expansão de capacidades de acesso para acesso a domínios privados, grafo de conhecimento, navegação web e mais
-  - Integração facilitdade de diversas ferramentas de pesquisa e metodologias
-
-### Colaboração Humana
-
- 🧠 **Humano-no-processo**
-
-
-  - Suporta modificação interativa de planos de pesquisa usando linguagem natural
-  - Suporta auto-aceite de planos de pesquisa
-
- 📝 **Relatório Pós-Edição**
-  - Suporta edição de edição de blocos estilo Notion
-  - Permite refinamentos de IA, incluindo polimento de IA assistida, encurtamento de frase, e expansão
-  - Distribuído por [tiptap](https://tiptap.dev/)
-
-### Criação de Conteúdo
-
- 🎙️ **Geração de Podcast e apresentação**
-
-  - Script de geração de podcast e síntese de áudio movido por IA
-  - Criação automatizada de apresentações PowerPoint simples
-  - Templates customizáveis para conteúdo personalizado
-
-## Arquitetura
-
-DeerFlow implementa uma arquitetura de sistema multi-agente modular designada para pesquisa e análise de código automatizada. O sistema é construído em LangGraph, possibilitando um fluxo de trabalho flexível baseado-em-estado onde os componentes se comunicam através de um sistema de transmissão de mensagens bem-definido.
-
-![Diagrama de Arquitetura](./assets/architecture.png)
-
-> Veja ao vivo em [deerflow.tech](https://deerflow.tech/#multi-agent-architecture)
-
-O sistema emprega um fluxo de trabalho simplificado com os seguintes componentes:
-
-1. **Coordenador**: O ponto de entrada que gerencia o ciclo de vida do fluxo de trabalho
-
-   - Inicia o processo de pesquisa baseado na entrada do usuário
-   - Delega tarefas so planejador quando apropriado
-   - Atua como a interface primária entre o usuário e o sistema
-
-2. **Planejador**: Componente estratégico para a decomposição e planejamento
-
-   - Analisa objetivos de pesquisa e cria planos de execução estruturados
-   - Determina se há contexto suficiente disponível ou se mais pesquisa é necessária
-   - Gerencia o fluxo de pesquisa e decide quando gerar o relatório final
-
-3. **Time de Pesquisa**: Uma coleção de agentes especializados que executam o plano:
-
-   - **Pesquisador**: Conduz buscas web e coleta informações utilizando ferramentas como mecanismos de busca web, crawling e mesmo serviços MCP.
-   - **Programador**: Lida com a análise de código, execução e tarefas técnicas como usar a ferramenta Python REPL.
-     Cada agente tem acesso à ferramentas específicas otimizadas para seu papel e opera dentro do fluxo de trabalho LangGraph.
-
-4. **Repórter**: Estágio final do processador de estágio para saídas de pesquisa
-   - Resultados agregados do time de pesquisa
-   - Processa e estrutura as informações coletadas
-   - Gera relatórios abrangentes de pesquisas
-
-## Texto-para-Fala Integração
-
-DeerFlow agora inclui uma funcionalidade Texto-para-Fala (TTS) que permite que você converta relatórios de busca para voz. Essa funcionalidade usa o mecanismo de voz da API TTS para gerar áudio de alta qualidade a partir do texto. Funcionalidades como velocidade, volume e tom também são customizáveis.
-
-### Usando a API TTS
-
-Você pode acessar a funcionalidade TTS através do endpoint `/api/tts`:
-
-```bash
-# Exemplo de chamada da API usando curl
-curl --location 'http://localhost:8000/api/tts' \
--header 'Content-Type: application/json' \
--data '{
-    "text": "This is a test of the text-to-speech functionality.",
-    "speed_ratio": 1.0,
-    "volume_ratio": 1.0,
-    "pitch_ratio": 1.0
-}' \
--output speech.mp3
-```
-
-## Desenvolvimento
-
-### Testando
-
-Rode o conjunto de testes:
-
-```bash
-# Roda todos os testes
-make test
-
-# Roda um arquivo de teste específico
-pytest tests/integration/test_workflow.py
-
-# Roda com coverage
-make coverage
-```
-
-### Qualidade de Código
-
-```bash
-# Roda o linting
-make lint
-
-# Formata de código
-make format
-```
-
-### Debugando com o LangGraph Studio
-
-DeerFlow usa LangGraph para sua arquitetura de fluxo de trabalho. Nós podemos usar o LangGraph Studio para debugar e visualizar o fluxo de trabalho em tempo real.
-
-#### Rodando o LangGraph Studio Localmente
-
-DeerFlow inclui um arquivo de configuração `langgraph.json` que define a estrutura do grafo e dependências para o LangGraph Studio. Esse arquivo aponta para o grafo do fluxo de trabalho definido no projeto e automaticamente carrega as variáveis de ambiente do arquivo `.env`.
-
-##### Mac
-
-```bash
-# Instala o gerenciador de pacote uv caso você não o possua
-curl -LsSf https://astral.sh/uv/install.sh | sh
-
-# Instala as dependências e inicia o servidor LangGraph
-uvx --refresh --from "langgraph-cli[inmem]" --with-editable . --python 3.12 langgraph dev --allow-blocking
-```
-
-##### Windows / Linux
-
-```bash
-# Instala as dependências
-pip install -e .
-pip install -U "langgraph-cli[inmem]"
-
-# Inicia o servidor LangGraph
-langgraph dev
-```
-
-Após iniciar o servidor LangGraph, você verá diversas URLs no seu terminal:
-
- API: http://127.0.0.1:2024
- Studio UI: https://smith.langchain.com/studio/?baseUrl=http://127.0.0.1:2024
- API Docs: http://127.0.0.1:2024/docs
-
-Abra o link do Studio UI no seu navegador para acessar a interface de depuração.
-
-#### Usando o LangGraph Studio
-
-No Studio UI, você pode:
-
-
-1. Visualizar o grafo do fluxo de trabalho e como seus componentes se conectam
-2. Rastrear a execução em tempo-real e ver como os dados fluem através do sistema
-3. Inspecionar o estado de cada passo do fluxo de trabalho
-4. Depurar problemas ao examinar entradas e saídas de cada componente
-5. Coletar feedback durante a fase de planejamento para refinar os planos de pesquisa
-
-Quando você envia um tópico de pesquisa ao Studio UI, você será capaz de ver toda a execução do fluxo de trabalho, incluindo:
-
- A fase de planejamento onde o plano de pesquisa foi criado
- O processo de feedback onde você pode modificar o plano
- As fases de pesquisa e escrita de cada seção
- A geração do relatório final
-
-## Docker
-
-Você também pode executar esse projeto via Docker.
-
-Primeiro, voce deve ler a [configuração](#configuration) below. Make sure `.env`, `.conf.yaml` files are ready.
-
-Segundo, para fazer o build de sua imagem docker em seu próprio servidor:
-
-```bash
-docker build -t deer-flow-api .
-```
-
-E por fim, inicie um container docker rodando o servidor web:
-
-```bash
-# substitua deer-flow-api-app com seu nome de container preferido
-docker run -d -t -p 8000:8000 --env-file .env --name deer-flow-api-app deer-flow-api
-
-# pare o servidor
-docker stop deer-flow-api-app
-```
-
-### Docker Compose (inclui ambos backend e frontend)
-
-DeerFlow fornece uma estrutura docker-compose para facilmente executar ambos o backend e frontend juntos:
-
-```bash
-# building docker image
-docker compose build
-
-# start the server
-docker compose up
-```
-
-## Exemplos:
-
-Os seguintes exemplos demonstram as capacidades do DeerFlow:
-
-### Relatórios de Pesquisa
-
-1. **Relatório OpenAI Sora** - Análise da ferramenta Sora da OpenAI
-
-   - Discute funcionalidades, acesso, engenharia de prompt, limitações e considerações éticas
-
-   - [Veja o relatório completo](examples/openai_sora_report.md)
-
-2. **Relatório Protocolo Agent-to-Agent do Google** - Visão geral do protocolo Agent-to-Agent (A2A) do Google
-
-   - Discute o seu papel na comunicação de Agente de IA e seu relacionamento com o Protocolo de Contexto de Modelo ( MCP ) da Anthropic
-   - [Veja o relatório completo](examples/what_is_agent_to_agent_protocol.md)
-
-3. **O que é MCP?** - Uma análise abrangente to termo "MCP" através de múltiplos contextos
-
-   - Explora o Protocolo de Contexto de Modelo em IA, Fosfato Monocálcio em Química, e placa de microcanal em eletrônica
-   - [Veja o relatório completo](examples/what_is_mcp.md)
-
-4. **Bitcoin Price Fluctuations** - Análise das recentes movimentações de preço do Bitcoin
-
-   - Examina tendências de mercado, influências regulatórias, e indicadores técnicos
-   - Fornece recomendações baseadas nos dados históricos
-   - [Veja o relatório completo](examples/bitcoin_price_fluctuation.md)
-
-5. **O que é LLM?** - Uma exploração em profundidade de Large Language Models
-
-   - Discute arquitetura, treinamento, aplicações, e considerações éticas
-   - [Veja o relatório completo](examples/what_is_llm.md)
-
-6. **Como usar Claude para Pesquisa Aprofundada?** - Melhores práticas e fluxos de trabalho para usar Claude em pesquisa aprofundada
-
-   - Cobre engenharia de prompt, análise de dados, e integração com outras ferramentas
-   - [Veja o relatório completo](examples/how_to_use_claude_deep_research.md)
-
-7. **Adoção de IA na Área da Saúde: Fatores de Influência** - Análise dos fatores que levam à adoção de IA na área da saúde
-
-   - Discute tecnologias de IA, qualidade de dados, considerações éticas, avaliações econômicas, prontidão organizacional, e infraestrutura digital
-   - [Veja o relatório completo](examples/AI_adoption_in_healthcare.md)
-
-8. **Impacto da Computação Quântica em Criptografia** - Análise dos impactos da computação quântica em criptografia
-
-   - Discture vulnerabilidades da criptografia clássica, criptografia pós-quântica, e soluções criptográficas de resistência-quântica
-   - [Veja o relatório completo](examples/Quantum_Computing_Impact_on_Cryptography.md)
-
-9. **Destaques da Performance do Cristiano Ronaldo** - Análise dos destaques da performance do Cristiano Ronaldo
-   - Discute as suas conquistas de carreira, objetivos internacionais, e performance em diversas partidas
-   - [Veja o relatório completo](examples/Cristiano_Ronaldo's_Performance_Highlights.md)
-
-Para executar esses exemplos ou criar seus próprios relatórios de pesquisa, você deve utilizar os seguintes comandos:
-
-```bash
-# Executa com uma consulta específica
-uv run main.py "Quais fatores estão influenciando a adoção de IA na área da saúde?"
-
-# Executa com parâmetros de planejamento customizados
-uv run main.py --max_plan_iterations 3 "Como a computação quântica impacta na criptografia?"
-
-# Executa em modo interativo com questões embutidas
-uv run main.py --interactive
-
-# Ou executa com um prompt interativo básico
-uv run main.py
-
-# Vê todas as opções disponíveis
-uv run main.py --help
-```
-
-### Modo Interativo
-
-A aplicação agora suporta um modo interativo com questões embutidas tanto em Inglês quanto Chinês:
-
-1. Inicie o modo interativo:
-
-   ```bash
-   uv run main.py --interactive
-   ```
-
-2. Selecione sua linguagem de preferência (English or 中文)
-
-3. Escolha uma das questões embutidas da lista ou selecione a opção para perguntar sua própria questão
-
-4. O sistema irá processar sua questão e gerar um relatório abrangente de pesquisa
-
-### Humano no processo
-
-DeerFlow inclue um mecanismo de humano no processo que permite a você revisar, editar e aprovar planos de pesquisa antes que estes sejam executados:
-
-1. **Revisão de Plano**: Quando o humano no processo está habilitado, o sistema irá apresentar o plano de pesquisa gerado para sua revisão antes da execução
-
-2. **Fornecimento de Feedback**: Você pode:
-
-   - Aceitar o plano respondendo com `[ACCEPTED]`
-   - Edite o plano fornecendo feedback (e.g., `[EDIT PLAN] Adicione mais passos sobre a implementação técnica`)
-   - O sistema irá incorporar seu feedback e gerar um plano revisado
-
-3. **Auto-aceite**: Você pode habilitar o auto-aceite ou pular o processo de revisão:
-
-   - Via API: Defina `auto_accepted_plan: true` na sua requisição
-
-4. **Integração de API**: Quanto usar a API, você pode fornecer um feedback através do parâmetro `feedback`:   
-```json
-   {
-     "messages": [{ "role": "user", "content": "O que é computação quântica?" }],
-     "thread_id": "my_thread_id",
-     "auto_accepted_plan": false,
-     "feedback": "[EDIT PLAN] Inclua mais sobre algoritmos quânticos"
-   }
-   ```
-
-### Argumentos via Linha de Comando
-
-A aplicação suporta diversos argumentos via linha de comando para customizar o seu comportamento:
-
- **consulta**: A consulta de pesquisa a ser processada (podem ser múltiplas palavras)
- **--interativo**: Roda no modo interativo com questões embutidas
- **--max_plan_iterations**: Número máximo de ciclos de planejamento (padrão: 1)
- **--max_step_num**: Número máximo de passos em um plano de pesquisa (padrão: 3)
- **--debug**: Habilita Enable um log de depuração detalhado
-
-## FAQ
-
-Por favor consulte a [FAQ.md](docs/FAQ.md) para maiores detalhes.
-
-## Licença
-
-Esse projeto é open source e disponível sob a [MIT License](./LICENSE).
-
-## Agradecimentos
-
-DeerFlow é construído através do incrível trabalho da comunidade open-source. Nós somos profundamente gratos a todos os projetos e contribuidores cujos esforços tornaram o DeerFlow possível. Realmente, nós estamos apoiados nos ombros de gigantes.
-
-Nós gostaríamos de extender nossos sinceros agradecimentos aos seguintes projetos por suas invaloráveis contribuições:
-
- **[LangChain](https://github.com/langchain-ai/langchain)**: O framework excepcional deles empodera nossas interações via LLM e correntes, permitindo uma integração perfeita e funcional.
- **[LangGraph](https://github.com/langchain-ai/langgraph)**: A abordagem inovativa para orquestração multi-agente deles tem sido foi fundamental em permitir o acesso dos fluxos de trabalho sofisticados do DeerFlow.
-
-Esses projetos exemplificam o poder transformador da colaboração open-source, e nós temos orgulho de construir baseado em suas fundações.
-
-### Contribuidores-Chave
-
-Um sincero muito obrigado vai para os principais autores do `DeerFlow`, cuja visão, paixão, e dedicação trouxe esse projeto à vida:
-
- **[Daniel Walnut](https://github.com/hetaoBackend/)**
- **[Henry Li](https://github.com/magiccube/)**
-
-O seu compromisso inabalável e experiência tem sido a força por trás do sucesso do DeerFlow. Nós estamos honrados em tê-los no comando dessa trajetória.
-
-## Histórico-Estrelas
-
-[![Gráfico do Histórico de Estrelas](https://api.star-history.com/svg?repos=bytedance/deer-flow&type=Date)](https://star-history.com/#bytedance/deer-flow&Date)
@@ -1,554 +0,0 @@
-# 🦌 DeerFlow
-
-[![Python 3.12+](https://img.shields.io/badge/python-3.12+-blue.svg)](https://www.python.org/downloads/)
-[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
-[![DeepWiki](https://img.shields.io/badge/DeepWiki-bytedance%2Fdeer--flow-blue.svg?logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACwAAAAyCAYAAAAnWDnqAAAAAXNSR0IArs4c6QAAA05JREFUaEPtmUtyEzEQhtWTQyQLHNak2AB7ZnyXZMEjXMGeK/AIi+QuHrMnbChYY7MIh8g01fJoopFb0uhhEqqcbWTp06/uv1saEDv4O3n3dV60RfP947Mm9/SQc0ICFQgzfc4CYZoTPAswgSJCCUJUnAAoRHOAUOcATwbmVLWdGoH//PB8mnKqScAhsD0kYP3j/Yt5LPQe2KvcXmGvRHcDnpxfL2zOYJ1mFwrryWTz0advv1Ut4CJgf5uhDuDj5eUcAUoahrdY/56ebRWeraTjMt/00Sh3UDtjgHtQNHwcRGOC98BJEAEymycmYcWwOprTgcB6VZ5JK5TAJ+fXGLBm3FDAmn6oPPjR4rKCAoJCal2eAiQp2x0vxTPB3ALO2CRkwmDy5WohzBDwSEFKRwPbknEggCPB/imwrycgxX2NzoMCHhPkDwqYMr9tRcP5qNrMZHkVnOjRMWwLCcr8ohBVb1OMjxLwGCvjTikrsBOiA6fNyCrm8V1rP93iVPpwaE+gO0SsWmPiXB+jikdf6SizrT5qKasx5j8ABbHpFTx+vFXp9EnYQmLx02h1QTTrl6eDqxLnGjporxl3NL3agEvXdT0WmEost648sQOYAeJS9Q7bfUVoMGnjo4AZdUMQku50McCcMWcBPvr0SzbTAFDfvJqwLzgxwATnCgnp4wDl6Aa+Ax283gghmj+vj7feE2KBBRMW3FzOpLOADl0Isb5587h/U4gGvkt5v60Z1VLG8BhYjbzRwyQZemwAd6cCR5/XFWLYZRIMpX39AR0tjaGGiGzLVyhse5C9RKC6ai42ppWPKiBagOvaYk8lO7DajerabOZP46Lby5wKjw1HCRx7p9sVMOWGzb/vA1hwiWc6jm3MvQDTogQkiqIhJV0nBQBTU+3okKCFDy9WwferkHjtxib7t3xIUQtHxnIwtx4mpg26/HfwVNVDb4oI9RHmx5WGelRVlrtiw43zboCLaxv46AZeB3IlTkwouebTr1y2NjSpHz68WNFjHvupy3q8TFn3Hos2IAk4Ju5dCo8B3wP7VPr/FGaKiG+T+v+TQqIrOqMTL1VdWV1DdmcbO8KXBz6esmYWYKPwDL5b5FA1a0hwapHiom0r/cKaoqr+27/XcrS5UwSMbQAAAABJRU5ErkJggg==)](https://deepwiki.com/bytedance/deer-flow)
-<!-- DeepWiki badge generated by https://deepwiki.ryoppippi.com/ -->
-
-[English](./README.md) | [简体中文](./README_zh.md) | [日本語](./README_ja.md) | [Deutsch](./README_de.md) | [Español](./README_es.md) | [Русский](./README_ru.md) | [Portuguese](./README_pt.md)
-
-> Создано на базе открытого кода, возвращено в открытый код.
-
-**DeerFlow** (**D**eep **E**xploration and **E**fficient **R**esearch **Flow**) - это фреймворк для глубокого исследования, разработанный сообществом и основанный на впечатляющей работе сообщества открытого кода. Наша цель - объединить языковые модели со специализированными инструментами для таких задач, как веб-поиск, сканирование и выполнение кода Python, одновременно возвращая пользу сообществу, которое сделало это возможным.
-
-Пожалуйста, посетите [наш официальный сайт](https://deerflow.tech/) для получения дополнительной информации.
-
-## Демонстрация
-
-### Видео
-
-https://github.com/user-attachments/assets/f3786598-1f2a-4d07-919e-8b99dfa1de3e
-
-В этой демонстрации мы показываем, как использовать DeerFlow для:
-
- Бесшовной интеграции с сервисами MCP
- Проведения процесса глубокого исследования и создания комплексного отчета с изображениями
- Создания аудио подкаста на основе сгенерированного отчета
-
-### Повторы
-
- [Какова высота Эйфелевой башни по сравнению с самым высоким зданием?](https://deerflow.tech/chat?replay=eiffel-tower-vs-tallest-building)
- [Какие репозитории самые популярные на GitHub?](https://deerflow.tech/chat?replay=github-top-trending-repo)
- [Написать статью о традиционных блюдах Нанкина](https://deerflow.tech/chat?replay=nanjing-traditional-dishes)
- [Как украсить съемную квартиру?](https://deerflow.tech/chat?replay=rental-apartment-decoration)
- [Посетите наш официальный сайт, чтобы изучить больше повторов.](https://deerflow.tech/#case-studies)
-
---
-
-## 📑 Оглавление
-
- [🚀 Быстрый старт](#быстрый-старт)
- [🌟 Особенности](#особенности)
- [🏗️ Архитектура](#архитектура)
- [🛠️ Разработка](#разработка)
- [🐳 Docker](#docker)
- [🗣️ Интеграция преобразования текста в речь](#интеграция-преобразования-текста-в-речь)
- [📚 Примеры](#примеры)
- [❓ FAQ](#faq)
- [📜 Лицензия](#лицензия)
- [💖 Благодарности](#благодарности)
- [⭐ История звезд](#история-звезд)
-
-## Быстрый старт
-
-DeerFlow разработан на Python и поставляется с веб-интерфейсом, написанным на Node.js. Для обеспечения плавного процесса настройки мы рекомендуем использовать следующие инструменты:
-
-### Рекомендуемые инструменты
-
- **[`uv`](https://docs.astral.sh/uv/getting-started/installation/):**
-  Упрощает управление средой Python и зависимостями. `uv` автоматически создает виртуальную среду в корневом каталоге и устанавливает все необходимые пакеты за вас—без необходимости вручную устанавливать среды Python.
-
- **[`nvm`](https://github.com/nvm-sh/nvm):**
-  Легко управляйте несколькими версиями среды выполнения Node.js.
-
- **[`pnpm`](https://pnpm.io/installation):**
-  Установка и управление зависимостями проекта Node.js.
-
-### Требования к среде
-
-Убедитесь, что ваша система соответствует следующим минимальным требованиям:
-
- **[Python](https://www.python.org/downloads/):** Версия `3.12+`
- **[Node.js](https://nodejs.org/en/download/):** Версия `22+`
-
-### Установка
-
-```bash
-# Клонировать репозиторий
-git clone https://github.com/bytedance/deer-flow.git
-cd deer-flow
-
-# Установить зависимости, uv позаботится об интерпретаторе python и создании venv, и установит необходимые пакеты
-uv sync
-
-# Настроить .env с вашими API-ключами
-# Tavily: https://app.tavily.com/home
-# Brave_SEARCH: https://brave.com/search/api/
-# volcengine TTS: Добавьте ваши учетные данные TTS, если они у вас есть
-cp .env.example .env
-
-# См. разделы 'Поддерживаемые поисковые системы' и 'Интеграция преобразования текста в речь' ниже для всех доступных опций
-
-# Настроить conf.yaml для вашей модели LLM и API-ключей
-# Пожалуйста, обратитесь к 'docs/configuration_guide.md' для получения дополнительной информации
-cp conf.yaml.example conf.yaml
-
-# Установить marp для генерации презентаций
-# https://github.com/marp-team/marp-cli?tab=readme-ov-file#use-package-manager
-brew install marp-cli
-```
-
-По желанию установите зависимости веб-интерфейса через [pnpm](https://pnpm.io/installation):
-
-```bash
-cd deer-flow/web
-pnpm install
-```
-
-### Конфигурации
-
-Пожалуйста, обратитесь к [Руководству по конфигурации](docs/configuration_guide.md) для получения дополнительной информации.
-
-> [!ПРИМЕЧАНИЕ]
-> Прежде чем запустить проект, внимательно прочитайте руководство и обновите конфигурации в соответствии с вашими конкретными настройками и требованиями.
-
-### Консольный интерфейс
-
-Самый быстрый способ запустить проект - использовать консольный интерфейс.
-
-```bash
-# Запустить проект в оболочке, похожей на bash
-uv run main.py
-```
-
-### Веб-интерфейс
-
-Этот проект также включает веб-интерфейс, предлагающий более динамичный и привлекательный интерактивный опыт.
-
-> [!ПРИМЕЧАНИЕ]
-> Сначала вам нужно установить зависимости веб-интерфейса.
-
-```bash
-# Запустить оба сервера, бэкенд и фронтенд, в режиме разработки
-# На macOS/Linux
-./bootstrap.sh -d
-
-# На Windows
-bootstrap.bat -d
-```
-
-Откройте ваш браузер и посетите [`http://localhost:3000`](http://localhost:3000), чтобы исследовать веб-интерфейс.
-
-Исследуйте больше деталей в каталоге [`web`](./web/).
-
-## Поддерживаемые поисковые системы
-
-DeerFlow поддерживает несколько поисковых систем, которые можно настроить в файле `.env` с помощью переменной `SEARCH_API`:
-
- **Tavily** (по умолчанию): Специализированный поисковый API для приложений ИИ
-
-  - Требуется `TAVILY_API_KEY` в вашем файле `.env`
-  - Зарегистрируйтесь на: https://app.tavily.com/home
-
- **DuckDuckGo**: Поисковая система, ориентированная на конфиденциальность
-
-  - Не требуется API-ключ
-
- **Brave Search**: Поисковая система, ориентированная на конфиденциальность, с расширенными функциями
-
-  - Требуется `BRAVE_SEARCH_API_KEY` в вашем файле `.env`
-  - Зарегистрируйтесь на: https://brave.com/search/api/
-
- **Arxiv**: Поиск научных статей для академических исследований
-  - Не требуется API-ключ
-  - Специализируется на научных и академических статьях
-
-Чтобы настроить предпочитаемую поисковую систему, установите переменную `SEARCH_API` в вашем файле `.env`:
-
-```bash
-# Выберите одно: tavily, duckduckgo, brave_search, arxiv
-SEARCH_API=tavily
-```
-
-## Особенности
-
-### Ключевые возможности
-
- 🤖 **Интеграция LLM**
-  - Поддерживает интеграцию большинства моделей через [litellm](https://docs.litellm.ai/docs/providers).
-  - Поддержка моделей с открытым исходным кодом, таких как Qwen
-  - API-интерфейс, совместимый с OpenAI
-  - Многоуровневая система LLM для задач различной сложности
-
-### Инструменты и интеграции MCP
-
- 🔍 **Поиск и извлечение**
-
-  - Веб-поиск через Tavily, Brave Search и другие
-  - Сканирование с Jina
-  - Расширенное извлечение контента
-
- 🔗 **Бесшовная интеграция MCP**
-  - Расширение возможностей для доступа к частным доменам, графам знаний, веб-браузингу и многому другому
-  - Облегчает интеграцию различных исследовательских инструментов и методологий
-
-### Человеческое взаимодействие
-
- 🧠 **Человек в контуре**
-
-  - Поддерживает интерактивное изменение планов исследования с использованием естественного языка
-  - Поддерживает автоматическое принятие планов исследования
-
- 📝 **Пост-редактирование отчетов**
-  - Поддерживает блочное редактирование в стиле Notion
-  - Позволяет совершенствовать с помощью ИИ, включая полировку, сокращение и расширение предложений
-  - Работает на [tiptap](https://tiptap.dev/)
-
-### Создание контента
-
- 🎙️ **Генерация подкастов и презентаций**
-  - Генерация сценариев подкастов и синтез аудио с помощью ИИ
-  - Автоматическое создание простых презентаций PowerPoint
-  - Настраиваемые шаблоны для индивидуального контента
-
-## Архитектура
-
-DeerFlow реализует модульную архитектуру системы с несколькими агентами, предназначенную для автоматизированных исследований и анализа кода. Система построена на LangGraph, обеспечивающей гибкий рабочий процесс на основе состояний, где компоненты взаимодействуют через четко определенную систему передачи сообщений.
-
-![Диаграмма архитектуры](./assets/architecture.png)
-
-> Посмотрите вживую на [deerflow.tech](https://deerflow.tech/#multi-agent-architecture)
-
-В системе используется оптимизированный рабочий процесс со следующими компонентами:
-
-1. **Координатор**: Точка входа, управляющая жизненным циклом рабочего процесса
-
-   - Инициирует процесс исследования на основе пользовательского ввода
-   - Делегирует задачи планировщику, когда это необходимо
-   - Выступает в качестве основного интерфейса между пользователем и системой
-
-2. **Планировщик**: Стратегический компонент для декомпозиции и планирования задач
-
-   - Анализирует цели исследования и создает структурированные планы выполнения
-   - Определяет, достаточно ли доступного контекста или требуется дополнительное исследование
-   - Управляет потоком исследования и решает, когда генерировать итоговый отчет
-
-3. **Исследовательская команда**: Набор специализированных агентов, которые выполняют план:
-
-   - **Исследователь**: Проводит веб-поиск и сбор информации с использованием таких инструментов, как поисковые системы, сканирование и даже сервисы MCP.
-   - **Программист**: Обрабатывает анализ кода, выполнение и технические задачи с помощью инструмента Python REPL.
-     Каждый агент имеет доступ к определенным инструментам, оптимизированным для его роли, и работает в рамках фреймворка LangGraph
-
-4. **Репортер**: Процессор финальной стадии для результатов исследования
-   - Агрегирует находки исследовательской команды
-   - Обрабатывает и структурирует собранную информацию
-   - Генерирует комплексные исследовательские отчеты
-
-## Интеграция преобразования текста в речь
-
-DeerFlow теперь включает функцию преобразования текста в речь (TTS), которая позволяет конвертировать исследовательские отчеты в речь. Эта функция использует API TTS volcengine для генерации высококачественного аудио из текста. Также можно настраивать такие параметры, как скорость, громкость и тон.
-
-### Использование API TTS
-
-Вы можете получить доступ к функциональности TTS через конечную точку `/api/tts`:
-
-```bash
-# Пример вызова API с использованием curl
-curl --location 'http://localhost:8000/api/tts' \
--header 'Content-Type: application/json' \
--data '{
-    "text": "Это тест функциональности преобразования текста в речь.",
-    "speed_ratio": 1.0,
-    "volume_ratio": 1.0,
-    "pitch_ratio": 1.0
-}' \
--output speech.mp3
-```
-
-## Разработка
-
-### Тестирование
-
-Запустите набор тестов:
-
-```bash
-# Запустить все тесты
-make test
-
-# Запустить определенный тестовый файл
-pytest tests/integration/test_workflow.py
-
-# Запустить с покрытием
-make coverage
-```
-
-### Качество кода
-
-```bash
-# Запустить линтинг
-make lint
-
-# Форматировать код
-make format
-```
-
-### Отладка с LangGraph Studio
-
-DeerFlow использует LangGraph для своей архитектуры рабочего процесса. Вы можете использовать LangGraph Studio для отладки и визуализации рабочего процесса в реальном времени.
-
-#### Запуск LangGraph Studio локально
-
-DeerFlow включает конфигурационный файл `langgraph.json`, который определяет структуру графа и зависимости для LangGraph Studio. Этот файл указывает на графы рабочего процесса, определенные в проекте, и автоматически загружает переменные окружения из файла `.env`.
-
-##### Mac
-
-```bash
-# Установите менеджер пакетов uv, если у вас его нет
-curl -LsSf https://astral.sh/uv/install.sh | sh
-
-# Установите зависимости и запустите сервер LangGraph
-uvx --refresh --from "langgraph-cli[inmem]" --with-editable . --python 3.12 langgraph dev --allow-blocking
-```
-
-##### Windows / Linux
-
-```bash
-# Установить зависимости
-pip install -e .
-pip install -U "langgraph-cli[inmem]"
-
-# Запустить сервер LangGraph
-langgraph dev
-```
-
-После запуска сервера LangGraph вы увидите несколько URL в терминале:
-
- API: http://127.0.0.1:2024
- Studio UI: https://smith.langchain.com/studio/?baseUrl=http://127.0.0.1:2024
- API Docs: http://127.0.0.1:2024/docs
-
-Откройте ссылку Studio UI в вашем браузере для доступа к интерфейсу отладки.
-
-#### Использование LangGraph Studio
-
-В интерфейсе Studio вы можете:
-
-1. Визуализировать граф рабочего процесса и видеть, как соединяются компоненты
-2. Отслеживать выполнение в реальном времени, чтобы видеть, как данные проходят через систему
-3. Исследовать состояние на каждом шаге рабочего процесса
-4. Отлаживать проблемы путем изучения входов и выходов каждого компонента
-5. Предоставлять обратную связь во время фазы планирования для уточнения планов исследования
-
-Когда вы отправляете тему исследования в интерфейсе Studio, вы сможете увидеть весь процесс выполнения рабочего процесса, включая:
-
- Фазу планирования, где создается план исследования
- Цикл обратной связи, где вы можете модифицировать план
- Фазы исследования и написания для каждого раздела
- Генерацию итогового отчета
-
-### Включение трассировки LangSmith
-
-DeerFlow поддерживает трассировку LangSmith, чтобы помочь вам отладить и контролировать ваши рабочие процессы. Чтобы включить трассировку LangSmith:
-
-1. Убедитесь, что в вашем файле `.env` есть следующие конфигурации (см. `.env.example`):
-   ```bash
-   LANGSMITH_TRACING=true
-   LANGSMITH_ENDPOINT="https://api.smith.langchain.com"
-   LANGSMITH_API_KEY="xxx"
-   LANGSMITH_PROJECT="xxx"
-   ```
-
-2. Запустите трассировку и визуализируйте граф локально с LangSmith, выполнив:
-   ```bash
-   langgraph dev
-   ```
-
-Это включит визуализацию трассировки в LangGraph Studio и отправит ваши трассировки в LangSmith для мониторинга и анализа.
-
-## Docker
-
-Вы также можете запустить этот проект с Docker.
-
-Во-первых, вам нужно прочитать [конфигурацию](docs/configuration_guide.md) ниже. Убедитесь, что файлы `.env`, `.conf.yaml` готовы.
-
-Во-вторых, чтобы построить Docker-образ вашего собственного веб-сервера:
-
-```bash
-docker build -t deer-flow-api .
-```
-
-Наконец, запустите Docker-контейнер с веб-сервером:
-
-```bash
-# Замените deer-flow-api-app на предпочитаемое вами имя контейнера
-docker run -d -t -p 8000:8000 --env-file .env --name deer-flow-api-app deer-flow-api
-
-# остановить сервер
-docker stop deer-flow-api-app
-```
-
-### Docker Compose (включает как бэкенд, так и фронтенд)
-
-DeerFlow предоставляет настройку docker-compose для легкого запуска бэкенда и фронтенда вместе:
-
-```bash
-# сборка docker-образа
-docker compose build
-
-# запуск сервера
-docker compose up
-```
-
-## Примеры
-
-Следующие примеры демонстрируют возможности DeerFlow:
-
-### Исследовательские отчеты
-
-1. **Отчет о OpenAI Sora** - Анализ инструмента ИИ Sora от OpenAI
-
-   - Обсуждаются функции, доступ, инженерия промптов, ограничения и этические соображения
-   - [Просмотреть полный отчет](examples/openai_sora_report.md)
-
-2. **Отчет о протоколе Agent to Agent от Google** - Обзор протокола Agent to Agent (A2A) от Google
-
-   - Обсуждается его роль в коммуникации агентов ИИ и его отношение к протоколу Model Context Protocol (MCP) от Anthropic
-   - [Просмотреть полный отчет](examples/what_is_agent_to_agent_protocol.md)
-
-3. **Что такое MCP?** - Комплексный анализ термина "MCP" в различных контекстах
-
-   - Исследует Model Context Protocol в ИИ, Монокальцийфосфат в химии и Микроканальные пластины в электронике
-   - [Просмотреть полный отчет](examples/what_is_mcp.md)
-
-4. **Колебания цены Биткоина** - Анализ недавних движений цены Биткоина
-
-   - Исследует рыночные тренды, регуляторные влияния и технические индикаторы
-   - Предоставляет рекомендации на основе исторических данных
-   - [Просмотреть полный отчет](examples/bitcoin_price_fluctuation.md)
-
-5. **Что такое LLM?** - Углубленное исследование больших языковых моделей
-
-   - Обсуждаются архитектура, обучение, приложения и этические соображения
-   - [Просмотреть полный отчет](examples/what_is_llm.md)
-
-6. **Как использовать Claude для глубокого исследования?** - Лучшие практики и рабочие процессы для использования Claude в глубоком исследовании
-
-   - Охватывает инженерию промптов, анализ данных и интеграцию с другими инструментами
-   - [Просмотреть полный отчет](examples/how_to_use_claude_deep_research.md)
-
-7. **Внедрение ИИ в здравоохранении: Влияющие факторы** - Анализ факторов, движущих внедрением ИИ в здравоохранении
-
-   - Обсуждаются технологии ИИ, качество данных, этические соображения, экономические оценки, организационная готовность и цифровая инфраструктура
-   - [Просмотреть полный отчет](examples/AI_adoption_in_healthcare.md)
-
-8. **Влияние квантовых вычислений на криптографию** - Анализ влияния квантовых вычислений на криптографию
-
-   - Обсуждаются уязвимости классической криптографии, пост-квантовая криптография и криптографические решения, устойчивые к квантовым вычислениям
-   - [Просмотреть полный отчет](examples/Quantum_Computing_Impact_on_Cryptography.md)
-
-9. **Ключевые моменты выступлений Криштиану Роналду** - Анализ выдающихся выступлений Криштиану Роналду
-   - Обсуждаются его карьерные достижения, международные голы и выступления в различных матчах
-   - [Просмотреть полный отчет](examples/Cristiano_Ronaldo's_Performance_Highlights.md)
-
-Чтобы запустить эти примеры или создать собственные исследовательские отчеты, вы можете использовать следующие команды:
-
-```bash
-# Запустить с определенным запросом
-uv run main.py "Какие факторы влияют на внедрение ИИ в здравоохранении?"
-
-# Запустить с пользовательскими параметрами планирования
-uv run main.py --max_plan_iterations 3 "Как квантовые вычисления влияют на криптографию?"
-
-# Запустить в интерактивном режиме с встроенными вопросами
-uv run main.py --interactive
-
-# Или запустить с базовым интерактивным приглашением
-uv run main.py
-
-# Посмотреть все доступные опции 
-uv run main.py --help
-```
-
-### Интерактивный режим
-
-Приложение теперь поддерживает интерактивный режим с встроенными вопросами как на английском, так и на китайском языках:
-
-1. Запустите интерактивный режим:
-
-   ```bash
-   uv run main.py --interactive
-   ```
-
-2. Выберите предпочитаемый язык (English или 中文)
-
-3. Выберите из списка встроенных вопросов или выберите опцию задать собственный вопрос
-
-4. Система обработает ваш вопрос и сгенерирует комплексный исследовательский отчет
-
-### Человек в контуре
-
-DeerFlow включает механизм "человек в контуре", который позволяет вам просматривать, редактировать и утверждать планы исследования перед их выполнением:
-
-1. **Просмотр плана**: Когда активирован режим "человек в контуре", система представит сгенерированный план исследования для вашего просмотра перед выполнением
-
-2. **Предоставление обратной связи**: Вы можете:
-
-   - Принять план, ответив `[ACCEPTED]`
-   - Отредактировать план, предоставив обратную связь (например, `[EDIT PLAN] Добавить больше шагов о технической реализации`)
-   - Система включит вашу обратную связь и сгенерирует пересмотренный план
-
-3. **Автоматическое принятие**: Вы можете включить автоматическое принятие, чтобы пропустить процесс просмотра:
-
-   - Через API: Установите `auto_accepted_plan: true` в вашем запросе
-
-4. **Интеграция API**: При использовании API вы можете предоставить обратную связь через параметр `feedback`:
-   ```json
-   {
-     "messages": [{ "role": "user", "content": "Что такое квантовые вычисления?" }],
-     "thread_id": "my_thread_id",
-     "auto_accepted_plan": false,
-     "feedback": "[EDIT PLAN] Включить больше о квантовых алгоритмах"
-   }
-   ```
-
-### Аргументы командной строки
-
-Приложение поддерживает несколько аргументов командной строки для настройки его поведения:
-
- **query**: Запрос исследования для обработки (может состоять из нескольких слов)
- **--interactive**: Запустить в интерактивном режиме с встроенными вопросами
- **--max_plan_iterations**: Максимальное количество циклов планирования (по умолчанию: 1)
- **--max_step_num**: Максимальное количество шагов в плане исследования (по умолчанию: 3)
- **--debug**: Включить подробное логирование отладки
-
-## FAQ
-
-Пожалуйста, обратитесь к [FAQ.md](docs/FAQ.md) для получения дополнительной информации.
-
-## Лицензия
-
-Этот проект имеет открытый исходный код и доступен под [Лицензией MIT](./LICENSE).
-
-## Благодарности
-
-DeerFlow создан на основе невероятной работы сообщества открытого кода. Мы глубоко благодарны всем проектам и контрибьюторам, чьи усилия сделали DeerFlow возможным. Поистине, мы стоим на плечах гигантов.
-
-Мы хотели бы выразить искреннюю признательность следующим проектам за их неоценимый вклад:
-
- **[LangChain](https://github.com/langchain-ai/langchain)**: Их исключительный фреймворк обеспечивает наши взаимодействия и цепочки LLM, позволяя бесшовную интеграцию и функциональность.
- **[LangGraph](https://github.com/langchain-ai/langgraph)**: Их инновационный подход к оркестровке многоагентных систем сыграл решающую роль в обеспечении сложных рабочих процессов DeerFlow.
-
-Эти проекты являются примером преобразующей силы сотрудничества в области открытого кода, и мы гордимся тем, что строим на их основе.
-
-### Ключевые контрибьюторы
-
-Сердечная благодарность основным авторам `DeerFlow`, чье видение, страсть и преданность делу вдохнули жизнь в этот проект:
-
- **[Daniel Walnut](https://github.com/hetaoBackend/)**
- **[Henry Li](https://github.com/magiccube/)**
-
-Ваша непоколебимая приверженность и опыт стали движущей силой успеха DeerFlow. Мы считаем за честь иметь вас во главе этого путешествия.
-
-## История звезд
-
-[![Star History Chart](https://api.star-history.com/svg?repos=bytedance/deer-flow&type=Date)](https://star-history.com/#bytedance/deer-flow&Date)
@@ -1,551 +0,0 @@
-# 🦌 DeerFlow
-
-[![Python 3.12+](https://img.shields.io/badge/python-3.12+-blue.svg)](https://www.python.org/downloads/)
-[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
-
-[English](./README.md) | [简体中文](./README_zh.md) | [日本語](./README_ja.md) | [Deutsch](./README_de.md) | [Español](./README_es.md) | [Русский](./README_ru.md) |[Portuguese](./README_pt.md)
-
-> 源于开源，回馈开源。
-
-**DeerFlow**（**D**eep **E**xploration and **E**fficient **R**esearch **Flow**）是一个社区驱动的深度研究框架，它建立在开源社区的杰出工作基础之上。我们的目标是将语言模型与专业工具（如网络搜索、爬虫和 Python 代码执行）相结合，同时回馈使这一切成为可能的社区。
-
-请访问[DeerFlow 的官方网站](https://deerflow.tech/)了解更多详情。
-
-## 演示
-
-### 视频
-
-https://github.com/user-attachments/assets/f3786598-1f2a-4d07-919e-8b99dfa1de3e
-
-在此演示中，我们展示了如何使用 DeerFlow：
-
- 无缝集成 MCP 服务
- 进行深度研究过程并生成包含图像的综合报告
- 基于生成的报告创建播客音频
-
-### 回放示例
-
- [埃菲尔铁塔与最高建筑相比有多高？](https://deerflow.tech/chat?replay=eiffel-tower-vs-tallest-building)
- [GitHub 上最热门的仓库有哪些？](https://deerflow.tech/chat?replay=github-top-trending-repo)
- [撰写关于南京传统美食的文章](https://deerflow.tech/chat?replay=nanjing-traditional-dishes)
- [如何装饰租赁公寓？](https://deerflow.tech/chat?replay=rental-apartment-decoration)
- [访问我们的官方网站探索更多回放示例。](https://deerflow.tech/#case-studies)
-
---
-
-## 📑 目录
-
- [🚀 快速开始](#快速开始)
- [🌟 特性](#特性)
- [🏗️ 架构](#架构)
- [🛠️ 开发](#开发)
- [🗣️ 文本转语音集成](#文本转语音集成)
- [📚 示例](#示例)
- [❓ 常见问题](#常见问题)
- [📜 许可证](#许可证)
- [💖 致谢](#致谢)
- [⭐ Star History](#star-History)
-
-## 快速开始
-
-DeerFlow 使用 Python 开发，并配有用 Node.js 编写的 Web UI。为确保顺利的设置过程，我们推荐使用以下工具：
-
-### 推荐工具
-
- **[`uv`](https://docs.astral.sh/uv/getting-started/installation/):**
-  简化 Python 环境和依赖管理。`uv`会自动在根目录创建虚拟环境并为您安装所有必需的包—无需手动安装 Python 环境。
-
- **[`nvm`](https://github.com/nvm-sh/nvm):**
-  轻松管理多个 Node.js 运行时版本。
-
- **[`pnpm`](https://pnpm.io/installation):**
-  安装和管理 Node.js 项目的依赖。
-
-### 环境要求
-
-确保您的系统满足以下最低要求：
-
- **[Python](https://www.python.org/downloads/):** 版本 `3.12+`
- **[Node.js](https://nodejs.org/en/download/):** 版本 `22+`
-
-### 安装
-
-```bash
-# 克隆仓库
-git clone https://github.com/bytedance/deer-flow.git
-cd deer-flow
-
-# 安装依赖，uv将负责Python解释器和虚拟环境的创建，并安装所需的包
-uv sync
-
-# 使用您的API密钥配置.env
-# Tavily: https://app.tavily.com/home
-# Brave_SEARCH: https://brave.com/search/api/
-# 火山引擎TTS: 如果您有TTS凭证，请添加
-cp .env.example .env
-
-# 查看下方的"支持的搜索引擎"和"文本转语音集成"部分了解所有可用选项
-
-# 为您的LLM模型和API密钥配置conf.yaml
-# 请参阅'docs/configuration_guide.md'获取更多详情
-cp conf.yaml.example conf.yaml
-
-# 安装marp用于PPT生成
-# https://github.com/marp-team/marp-cli?tab=readme-ov-file#use-package-manager
-brew install marp-cli
-```
-
-可选，通过[pnpm](https://pnpm.io/installation)安装 Web UI 依赖：
-
-```bash
-cd deer-flow/web
-pnpm install
-```
-
-### 配置
-
-请参阅[配置指南](docs/configuration_guide.md)获取更多详情。
-
-> [!注意]
-> 在启动项目之前，请仔细阅读指南，并更新配置以匹配您的特定设置和要求。
-
-### 控制台 UI
-
-运行项目的最快方法是使用控制台 UI。
-
-```bash
-# 在类bash的shell中运行项目
-uv run main.py
-```
-
-### Web UI
-
-本项目还包括一个 Web UI，提供更加动态和引人入胜的交互体验。
-
-> [!注意]
-> 您需要先安装 Web UI 的依赖。
-
-```bash
-# 在开发模式下同时运行后端和前端服务器
-# 在macOS/Linux上
-./bootstrap.sh -d
-
-# 在Windows上
-bootstrap.bat -d
-```
-
-打开浏览器并访问[`http://localhost:3000`](http://localhost:3000)探索 Web UI。
-
-在[`web`](./web/)目录中探索更多详情。
-
-## 支持的搜索引擎
-
-DeerFlow 支持多种搜索引擎，可以在`.env`文件中通过`SEARCH_API`变量进行配置：
-
- **Tavily**（默认）：专为 AI 应用设计的专业搜索 API
-
-  - 需要在`.env`文件中设置`TAVILY_API_KEY`
-  - 注册地址：https://app.tavily.com/home
-
- **DuckDuckGo**：注重隐私的搜索引擎
-
-  - 无需 API 密钥
-
- **Brave Search**：具有高级功能的注重隐私的搜索引擎
-
-  - 需要在`.env`文件中设置`BRAVE_SEARCH_API_KEY`
-  - 注册地址：https://brave.com/search/api/
-
- **Arxiv**：用于学术研究的科学论文搜索
-  - 无需 API 密钥
-  - 专为科学和学术论文设计
-
-要配置您首选的搜索引擎，请在`.env`文件中设置`SEARCH_API`变量：
-
-```bash
-# 选择一个：tavily, duckduckgo, brave_search, arxiv
-SEARCH_API=tavily
-```
-
-## 特性
-
-### 核心能力
-
- 🤖 **LLM 集成**
-  - 通过[litellm](https://docs.litellm.ai/docs/providers)支持集成大多数模型
-  - 支持开源模型如 Qwen
-  - 兼容 OpenAI 的 API 接口
-  - 多层 LLM 系统适用于不同复杂度的任务
-
-### 工具和 MCP 集成
-
- 🔍 **搜索和检索**
-
-  - 通过 Tavily、Brave Search 等进行网络搜索
-  - 使用 Jina 进行爬取
-  - 高级内容提取
-
- 🔗 **MCP 无缝集成**
-  - 扩展私有域访问、知识图谱、网页浏览等能力
-  - 促进多样化研究工具和方法的集成
-
-### 人机协作
-
- 🧠 **人在环中**
-
-  - 支持使用自然语言交互式修改研究计划
-  - 支持自动接受研究计划
-
- 📝 **报告后期编辑**
-  - 支持类 Notion 的块编辑
-  - 允许 AI 优化，包括 AI 辅助润色、句子缩短和扩展
-  - 由[tiptap](https://tiptap.dev/)提供支持
-
-### 内容创作
-
- 🎙️ **播客和演示文稿生成**
-  - AI 驱动的播客脚本生成和音频合成
-  - 自动创建简单的 PowerPoint 演示文稿
-  - 可定制模板以满足个性化内容需求
-
-## 架构
-
-DeerFlow 实现了一个模块化的多智能体系统架构，专为自动化研究和代码分析而设计。该系统基于 LangGraph 构建，实现了灵活的基于状态的工作流，其中组件通过定义良好的消息传递系统进行通信。
-
-![架构图](./assets/architecture.png)
-
-> 在[deerflow.tech](https://deerflow.tech/#multi-agent-architecture)上查看实时演示
-
-系统采用了精简的工作流程，包含以下组件：
-
-1. **协调器**：管理工作流生命周期的入口点
-
-   - 根据用户输入启动研究过程
-   - 在适当时候将任务委派给规划器
-   - 作为用户和系统之间的主要接口
-
-2. **规划器**：负责任务分解和规划的战略组件
-
-   - 分析研究目标并创建结构化执行计划
-   - 确定是否有足够的上下文或是否需要更多研究
-   - 管理研究流程并决定何时生成最终报告
-
-3. **研究团队**：执行计划的专业智能体集合：
-
-   - **研究员**：使用网络搜索引擎、爬虫甚至 MCP 服务等工具进行网络搜索和信息收集。
-   - **编码员**：使用 Python REPL 工具处理代码分析、执行和技术任务。
-     每个智能体都可以访问针对其角色优化的特定工具，并在 LangGraph 框架内运行
-
-4. **报告员**：研究输出的最终阶段处理器
-   - 汇总研究团队的发现
-   - 处理和组织收集的信息
-   - 生成全面的研究报告
-
-## 开发
-
-### 测试
-
-运行测试套件：
-
-```bash
-# 运行所有测试
-make test
-
-# 运行特定测试文件
-pytest tests/integration/test_workflow.py
-
-# 运行覆盖率测试
-make coverage
-```
-
-### 代码质量
-
-```bash
-# 运行代码检查
-make lint
-
-# 格式化代码
-make format
-```
-
-### 使用 LangGraph Studio 进行调试
-
-DeerFlow 使用 LangGraph 作为其工作流架构。您可以使用 LangGraph Studio 实时调试和可视化工作流。
-
-#### 本地运行 LangGraph Studio
-
-DeerFlow 包含一个`langgraph.json`配置文件，该文件定义了 LangGraph Studio 的图结构和依赖关系。该文件指向项目中定义的工作流图，并自动从`.env`文件加载环境变量。
-
-##### Mac
-
-```bash
-# 如果您没有uv包管理器，请安装它
-curl -LsSf https://astral.sh/uv/install.sh | sh
-
-# 安装依赖并启动LangGraph服务器
-uvx --refresh --from "langgraph-cli[inmem]" --with-editable . --python 3.12 langgraph dev --allow-blocking
-```
-
-##### Windows / Linux
-
-```bash
-# 安装依赖
-pip install -e .
-pip install -U "langgraph-cli[inmem]"
-
-# 启动LangGraph服务器
-langgraph dev
-```
-
-启动 LangGraph 服务器后，您将在终端中看到几个 URL：
-
- API: http://127.0.0.1:2024
- Studio UI: https://smith.langchain.com/studio/?baseUrl=http://127.0.0.1:2024
- API 文档: http://127.0.0.1:2024/docs
-
-在浏览器中打开 Studio UI 链接以访问调试界面。
-
-#### 使用 LangGraph Studio
-
-在 Studio UI 中，您可以：
-
-1. 可视化工作流图并查看组件如何连接
-2. 实时跟踪执行情况，了解数据如何在系统中流动
-3. 检查工作流每个步骤的状态
-4. 通过检查每个组件的输入和输出来调试问题
-5. 在规划阶段提供反馈以完善研究计划
-
-当您在 Studio UI 中提交研究主题时，您将能够看到整个工作流执行过程，包括：
-
- 创建研究计划的规划阶段
- 可以修改计划的反馈循环
- 每个部分的研究和写作阶段
- 最终报告生成
-
-### 启用 LangSmith 追踪
-
-DeerFlow 支持 LangSmith 追踪功能，帮助您调试和监控工作流。要启用 LangSmith 追踪：
-
-1. 确保您的 `.env` 文件中有以下配置（参见 `.env.example`）：
-   ```bash
-   LANGSMITH_TRACING=true
-   LANGSMITH_ENDPOINT="https://api.smith.langchain.com"
-   LANGSMITH_API_KEY="xxx"
-   LANGSMITH_PROJECT="xxx"
-   ```
-
-2. 通过运行以下命令本地启动 LangSmith 追踪：
-   ```bash
-   langgraph dev
-   ```
-
-这将在 LangGraph Studio 中启用追踪可视化，并将您的追踪发送到 LangSmith 进行监控和分析。
-
-## Docker
-
-您也可以使用 Docker 运行此项目。
-
-首先，您需要阅读下面的[配置](#配置)部分。确保`.env`和`.conf.yaml`文件已准备就绪。
-
-其次，构建您自己的 Web 服务器 Docker 镜像：
-
-```bash
-docker build -t deer-flow-api .
-```
-
-最后，启动运行 Web 服务器的 Docker 容器：
-
-```bash
-# 将deer-flow-api-app替换为您首选的容器名称
-docker run -d -t -p 8000:8000 --env-file .env --name deer-flow-api-app deer-flow-api
-
-# 停止服务器
-docker stop deer-flow-api-app
-```
-
-### Docker Compose
-
-您也可以使用 docker compose 设置此项目：
-
-```bash
-# 构建docker镜像
-docker compose build
-
-# 启动服务器
-docker compose up
-```
-
-## 文本转语音集成
-
-DeerFlow 现在包含一个文本转语音(TTS)功能，允许您将研究报告转换为语音。此功能使用火山引擎 TTS API 生成高质量的文本音频。速度、音量和音调等特性也可以自定义。
-
-### 使用 TTS API
-
-您可以通过`/api/tts`端点访问 TTS 功能：
-
-```bash
-# 使用curl的API调用示例
-curl --location 'http://localhost:8000/api/tts' \
--header 'Content-Type: application/json' \
--data '{
-    "text": "这是文本转语音功能的测试。",
-    "speed_ratio": 1.0,
-    "volume_ratio": 1.0,
-    "pitch_ratio": 1.0
-}' \
--output speech.mp3
-```
-
-## 示例
-
-以下示例展示了 DeerFlow 的功能：
-
-### 研究报告
-
-1. **OpenAI Sora 报告** - OpenAI 的 Sora AI 工具分析
-
-   - 讨论功能、访问方式、提示工程、限制和伦理考虑
-   - [查看完整报告](examples/openai_sora_report.md)
-
-2. **Google 的 Agent to Agent 协议报告** - Google 的 Agent to Agent (A2A)协议概述
-
-   - 讨论其在 AI 智能体通信中的作用及其与 Anthropic 的 Model Context Protocol (MCP)的关系
-   - [查看完整报告](examples/what_is_agent_to_agent_protocol.md)
-
-3. **什么是 MCP？** - 对"MCP"一词在多个上下文中的全面分析
-
-   - 探讨 AI 中的 Model Context Protocol、化学中的 Monocalcium Phosphate 和电子学中的 Micro-channel Plate
-   - [查看完整报告](examples/what_is_mcp.md)
-
-4. **比特币价格波动** - 最近比特币价格走势分析
-
-   - 研究市场趋势、监管影响和技术指标
-   - 基于历史数据提供建议
-   - [查看完整报告](examples/bitcoin_price_fluctuation.md)
-
-5. **什么是 LLM？** - 对大型语言模型的深入探索
-
-   - 讨论架构、训练、应用和伦理考虑
-   - [查看完整报告](examples/what_is_llm.md)
-
-6. **如何使用 Claude 进行深度研究？** - 在深度研究中使用 Claude 的最佳实践和工作流程
-
-   - 涵盖提示工程、数据分析和与其他工具的集成
-   - [查看完整报告](examples/how_to_use_claude_deep_research.md)
-
-7. **医疗保健中的 AI 采用：影响因素** - 影响医疗保健中 AI 采用的因素分析
-
-   - 讨论 AI 技术、数据质量、伦理考虑、经济评估、组织准备度和数字基础设施
-   - [查看完整报告](examples/AI_adoption_in_healthcare.md)
-
-8. **量子计算对密码学的影响** - 量子计算对密码学影响的分析
-
-   - 讨论经典密码学的漏洞、后量子密码学和抗量子密码解决方案
-   - [查看完整报告](examples/Quantum_Computing_Impact_on_Cryptography.md)
-
-9. **克里斯蒂亚诺·罗纳尔多的表现亮点** - 克里斯蒂亚诺·罗纳尔多表现亮点的分析
-   - 讨论他的职业成就、国际进球和在各种比赛中的表现
-   - [查看完整报告](examples/Cristiano_Ronaldo's_Performance_Highlights.md)
-
-要运行这些示例或创建您自己的研究报告，您可以使用以下命令：
-
-```bash
-# 使用特定查询运行
-uv run main.py "哪些因素正在影响医疗保健中的AI采用？"
-
-# 使用自定义规划参数运行
-uv run main.py --max_plan_iterations 3 "量子计算如何影响密码学？"
-
-# 在交互模式下运行，带有内置问题
-uv run main.py --interactive
-
-# 或者使用基本交互提示运行
-uv run main.py
-
-# 查看所有可用选项
-uv run main.py --help
-```
-
-### 交互模式
-
-应用程序现在支持带有英文和中文内置问题的交互模式：
-
-1. 启动交互模式：
-
-   ```bash
-   uv run main.py --interactive
-   ```
-
-2. 选择您偏好的语言（English 或中文）
-
-3. 从内置问题列表中选择或选择提出您自己问题的选项
-
-4. 系统将处理您的问题并生成全面的研究报告
-
-### 人在环中
-
-DeerFlow 包含一个人在环中机制，允许您在执行研究计划前审查、编辑和批准：
-
-1. **计划审查**：启用人在环中时，系统将在执行前向您展示生成的研究计划
-
-2. **提供反馈**：您可以：
-
-   - 通过回复`[ACCEPTED]`接受计划
-   - 通过提供反馈编辑计划（例如，`[EDIT PLAN] 添加更多关于技术实现的步骤`）
-   - 系统将整合您的反馈并生成修订后的计划
-
-3. **自动接受**：您可以启用自动接受以跳过审查过程：
-
-   - 通过 API：在请求中设置`auto_accepted_plan: true`
-
-4. **API 集成**：使用 API 时，您可以通过`feedback`参数提供反馈：
-   ```json
-   {
-     "messages": [{ "role": "user", "content": "什么是量子计算？" }],
-     "thread_id": "my_thread_id",
-     "auto_accepted_plan": false,
-     "feedback": "[EDIT PLAN] 包含更多关于量子算法的内容"
-   }
-   ```
-
-### 命令行参数
-
-应用程序支持多个命令行参数来自定义其行为：
-
- **query**：要处理的研究查询（可以是多个词）
- **--interactive**：以交互模式运行，带有内置问题
- **--max_plan_iterations**：最大规划周期数（默认：1）
- **--max_step_num**：研究计划中的最大步骤数（默认：3）
- **--debug**：启用详细调试日志
-
-## 常见问题
-
-请参阅[FAQ.md](docs/FAQ.md)获取更多详情。
-
-## 许可证
-
-本项目是开源的，遵循[MIT 许可证](./LICENSE)。
-
-## 致谢
-
-DeerFlow 建立在开源社区的杰出工作基础之上。我们深深感谢所有使 DeerFlow 成为可能的项目和贡献者。诚然，我们站在巨人的肩膀上。
-
-我们要向以下项目表达诚挚的感谢，感谢他们的宝贵贡献：
-
- **[LangChain](https://github.com/langchain-ai/langchain)**：他们卓越的框架为我们的 LLM 交互和链提供动力，实现了无缝集成和功能。
- **[LangGraph](https://github.com/langchain-ai/langgraph)**：他们在多智能体编排方面的创新方法对于实现 DeerFlow 复杂工作流至关重要。
-
-这些项目展示了开源协作的变革力量，我们很自豪能够在他们的基础上构建。
-
-### 核心贡献者
-
-衷心感谢`DeerFlow`的核心作者，他们的愿景、热情和奉献使这个项目得以实现：
-
- **[Daniel Walnut](https://github.com/hetaoBackend/)**
- **[Henry Li](https://github.com/magiccube/)**
-
-您坚定不移的承诺和专业知识是 DeerFlow 成功的驱动力。我们很荣幸有您引领这一旅程。
-
-## Star History
-
-[![Star History Chart](https://api.star-history.com/svg?repos=bytedance/deer-flow&type=Date)](https://star-history.com/#bytedance/deer-flow&Date)
@@ -0,0 +1,12 @@
+# Security Policy
+
+## Supported Versions
+
+As deer-flow doesn't provide an offical release yet, please use the latest version for the security updates.
+Current we have two branches to maintain: 
+* main branch for deer-flow 2.x
+* main-1.x branch for deer-flow 1.x 
+
+## Reporting a Vulnerability
+
+Please go to https://github.com/bytedance/deer-flow/security to report the vulnerability you find.
@@ -0,0 +1,28 @@
+# Python-generated files
+__pycache__/
+*.py[oc]
+build/
+dist/
+wheels/
+*.egg-info
+.coverage
+.coverage.*
+.ruff_cache
+agent_history.gif
+static/browser_history/*.gif
+
+log/
+log/*
+
+# Virtual environments
+.venv
+venv/
+
+# User config file
+config.yaml
+
+# Langgraph
+.langgraph_api
+
+# Claude Code settings
+.claude/settings.local.json
@@ -0,0 +1,3 @@
+{
+  "recommendations": ["charliermarsh.ruff"]
+}
@@ -0,0 +1,11 @@
+{
+  "window.title": "${activeEditorShort}${separator}${separator}deer-flow/backend",
+  "[python]": {
+    "editor.formatOnSave": true,
+    "editor.codeActionsOnSave": {
+      "source.fixAll": "explicit",
+      "source.organizeImports": "explicit"
+    },
+    "editor.defaultFormatter": "charliermarsh.ruff"
+  }
+}
@@ -0,0 +1,2 @@
+For the backend architeture and design patterns:
+@./CLAUDE.md
@@ -0,0 +1,442 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Project Overview
+
+DeerFlow is a LangGraph-based AI super agent system with a full-stack architecture. The backend provides a "super agent" with sandbox execution, persistent memory, subagent delegation, and extensible tool integration - all operating in per-thread isolated environments.
+
+**Architecture**:
+- **LangGraph Server** (port 2024): Agent runtime and workflow execution
+- **Gateway API** (port 8001): REST API for models, MCP, skills, memory, artifacts, and uploads
+- **Frontend** (port 3000): Next.js web interface
+- **Nginx** (port 2026): Unified reverse proxy entry point
+- **Provisioner** (port 8002, optional in Docker dev): Started only when sandbox is configured for provisioner/Kubernetes mode
+
+**Project Structure**:
+```
+deer-flow/
+├── Makefile                    # Root commands (check, install, dev, stop)
+├── config.yaml                 # Main application configuration
+├── extensions_config.json      # MCP servers and skills configuration
+├── backend/                    # Backend application (this directory)
+│   ├── Makefile               # Backend-only commands (dev, gateway, lint)
+│   ├── langgraph.json         # LangGraph server configuration
+│   ├── src/
+│   │   ├── agents/            # LangGraph agent system
+│   │   │   ├── lead_agent/    # Main agent (factory + system prompt)
+│   │   │   ├── middlewares/   # 10 middleware components
+│   │   │   ├── memory/        # Memory extraction, queue, prompts
+│   │   │   └── thread_state.py # ThreadState schema
+│   │   ├── gateway/           # FastAPI Gateway API
+│   │   │   ├── app.py         # FastAPI application
+│   │   │   └── routers/       # 6 route modules
+│   │   ├── sandbox/           # Sandbox execution system
+│   │   │   ├── local/         # Local filesystem provider
+│   │   │   ├── sandbox.py     # Abstract Sandbox interface
+│   │   │   ├── tools.py       # bash, ls, read/write/str_replace
+│   │   │   └── middleware.py  # Sandbox lifecycle management
+│   │   ├── subagents/         # Subagent delegation system
+│   │   │   ├── builtins/      # general-purpose, bash agents
+│   │   │   ├── executor.py    # Background execution engine
+│   │   │   └── registry.py    # Agent registry
+│   │   ├── tools/builtins/    # Built-in tools (present_files, ask_clarification, view_image)
+│   │   ├── mcp/               # MCP integration (tools, cache, client)
+│   │   ├── models/            # Model factory with thinking/vision support
+│   │   ├── skills/            # Skills discovery, loading, parsing
+│   │   ├── config/            # Configuration system (app, model, sandbox, tool, etc.)
+│   │   ├── community/         # Community tools (tavily, jina_ai, firecrawl, image_search, aio_sandbox)
+│   │   ├── reflection/        # Dynamic module loading (resolve_variable, resolve_class)
+│   │   ├── utils/             # Utilities (network, readability)
+│   │   └── client.py          # Embedded Python client (DeerFlowClient)
+│   ├── tests/                 # Test suite
+│   └── docs/                  # Documentation
+├── frontend/                   # Next.js frontend application
+└── skills/                     # Agent skills directory
+    ├── public/                # Public skills (committed)
+    └── custom/                # Custom skills (gitignored)
+```
+
+## Important Development Guidelines
+
+### Documentation Update Policy
+**CRITICAL: Always update README.md and CLAUDE.md after every code change**
+
+When making code changes, you MUST update the relevant documentation:
+- Update `README.md` for user-facing changes (features, setup, usage instructions)
+- Update `CLAUDE.md` for development changes (architecture, commands, workflows, internal systems)
+- Keep documentation synchronized with the codebase at all times
+- Ensure accuracy and timeliness of all documentation
+
+## Commands
+
+**Root directory** (for full application):
+```bash
+make check      # Check system requirements
+make install    # Install all dependencies (frontend + backend)
+make dev        # Start all services (LangGraph + Gateway + Frontend + Nginx)
+make stop       # Stop all services
+```
+
+**Backend directory** (for backend development only):
+```bash
+make install    # Install backend dependencies
+make dev        # Run LangGraph server only (port 2024)
+make gateway    # Run Gateway API only (port 8001)
+make test       # Run all backend tests
+make lint       # Lint with ruff
+make format     # Format code with ruff
+```
+
+Regression tests related to Docker/provisioner behavior:
+- `tests/test_docker_sandbox_mode_detection.py` (mode detection from `config.yaml`)
+- `tests/test_provisioner_kubeconfig.py` (kubeconfig file/directory handling)
+
+CI runs these regression tests for every pull request via [.github/workflows/backend-unit-tests.yml](../.github/workflows/backend-unit-tests.yml).
+
+## Architecture
+
+### Agent System
+
+**Lead Agent** (`src/agents/lead_agent/agent.py`):
+- Entry point: `make_lead_agent(config: RunnableConfig)` registered in `langgraph.json`
+- Dynamic model selection via `create_chat_model()` with thinking/vision support
+- Tools loaded via `get_available_tools()` - combines sandbox, built-in, MCP, community, and subagent tools
+- System prompt generated by `apply_prompt_template()` with skills, memory, and subagent instructions
+
+**ThreadState** (`src/agents/thread_state.py`):
+- Extends `AgentState` with: `sandbox`, `thread_data`, `title`, `artifacts`, `todos`, `uploaded_files`, `viewed_images`
+- Uses custom reducers: `merge_artifacts` (deduplicate), `merge_viewed_images` (merge/clear)
+
+**Runtime Configuration** (via `config.configurable`):
+- `thinking_enabled` - Enable model's extended thinking
+- `model_name` - Select specific LLM model
+- `is_plan_mode` - Enable TodoList middleware
+- `subagent_enabled` - Enable task delegation tool
+
+### Middleware Chain
+
+Middlewares execute in strict order in `src/agents/lead_agent/agent.py`:
+
+1. **ThreadDataMiddleware** - Creates per-thread directories (`backend/.deer-flow/threads/{thread_id}/user-data/{workspace,uploads,outputs}`)
+2. **UploadsMiddleware** - Tracks and injects newly uploaded files into conversation
+3. **SandboxMiddleware** - Acquires sandbox, stores `sandbox_id` in state
+4. **DanglingToolCallMiddleware** - Injects placeholder ToolMessages for AIMessage tool_calls that lack responses (e.g., due to user interruption)
+5. **SummarizationMiddleware** - Context reduction when approaching token limits (optional, if enabled)
+6. **TodoListMiddleware** - Task tracking with `write_todos` tool (optional, if plan_mode)
+7. **TitleMiddleware** - Auto-generates thread title after first complete exchange
+8. **MemoryMiddleware** - Queues conversations for async memory update (filters to user + final AI responses)
+9. **ViewImageMiddleware** - Injects base64 image data before LLM call (conditional on vision support)
+10. **SubagentLimitMiddleware** - Truncates excess `task` tool calls from model response to enforce `MAX_CONCURRENT_SUBAGENTS` limit (optional, if subagent_enabled)
+11. **ClarificationMiddleware** - Intercepts `ask_clarification` tool calls, interrupts via `Command(goto=END)` (must be last)
+
+### Configuration System
+
+**Main Configuration** (`config.yaml`):
+
+Setup: Copy `config.example.yaml` to `config.yaml` in the **project root** directory.
+
+Configuration priority:
+1. Explicit `config_path` argument
+2. `DEER_FLOW_CONFIG_PATH` environment variable
+3. `config.yaml` in current directory (backend/)
+4. `config.yaml` in parent directory (project root - **recommended location**)
+
+Config values starting with `$` are resolved as environment variables (e.g., `$OPENAI_API_KEY`).
+
+**Extensions Configuration** (`extensions_config.json`):
+
+MCP servers and skills are configured together in `extensions_config.json` in project root:
+
+Configuration priority:
+1. Explicit `config_path` argument
+2. `DEER_FLOW_EXTENSIONS_CONFIG_PATH` environment variable
+3. `extensions_config.json` in current directory (backend/)
+4. `extensions_config.json` in parent directory (project root - **recommended location**)
+
+### Gateway API (`src/gateway/`)
+
+FastAPI application on port 8001 with health check at `GET /health`.
+
+**Routers**:
+
+| Router | Endpoints |
+|--------|-----------|
+| **Models** (`/api/models`) | `GET /` - list models; `GET /{name}` - model details |
+| **MCP** (`/api/mcp`) | `GET /config` - get config; `PUT /config` - update config (saves to extensions_config.json) |
+| **Skills** (`/api/skills`) | `GET /` - list skills; `GET /{name}` - details; `PUT /{name}` - update enabled; `POST /install` - install from .skill archive |
+| **Memory** (`/api/memory`) | `GET /` - memory data; `POST /reload` - force reload; `GET /config` - config; `GET /status` - config + data |
+| **Uploads** (`/api/threads/{id}/uploads`) | `POST /` - upload files (auto-converts PDF/PPT/Excel/Word); `GET /list` - list; `DELETE /{filename}` - delete |
+| **Artifacts** (`/api/threads/{id}/artifacts`) | `GET /{path}` - serve artifacts; `?download=true` for file download |
+
+Proxied through nginx: `/api/langgraph/*` → LangGraph, all other `/api/*` → Gateway.
+
+### Sandbox System (`src/sandbox/`)
+
+**Interface**: Abstract `Sandbox` with `execute_command`, `read_file`, `write_file`, `list_dir`
+**Provider Pattern**: `SandboxProvider` with `acquire`, `get`, `release` lifecycle
+**Implementations**:
+- `LocalSandboxProvider` - Singleton local filesystem execution with path mappings
+- `AioSandboxProvider` (`src/community/`) - Docker-based isolation
+
+**Virtual Path System**:
+- Agent sees: `/mnt/user-data/{workspace,uploads,outputs}`, `/mnt/skills`
+- Physical: `backend/.deer-flow/threads/{thread_id}/user-data/...`, `deer-flow/skills/`
+- Translation: `replace_virtual_path()` / `replace_virtual_paths_in_command()`
+- Detection: `is_local_sandbox()` checks `sandbox_id == "local"`
+
+**Sandbox Tools** (in `src/sandbox/tools.py`):
+- `bash` - Execute commands with path translation and error handling
+- `ls` - Directory listing (tree format, max 2 levels)
+- `read_file` - Read file contents with optional line range
+- `write_file` - Write/append to files, creates directories
+- `str_replace` - Substring replacement (single or all occurrences)
+
+### Subagent System (`src/subagents/`)
+
+**Built-in Agents**: `general-purpose` (all tools except `task`) and `bash` (command specialist)
+**Execution**: Dual thread pool - `_scheduler_pool` (3 workers) + `_execution_pool` (3 workers)
+**Concurrency**: `MAX_CONCURRENT_SUBAGENTS = 3` enforced by `SubagentLimitMiddleware` (truncates excess tool calls in `after_model`), 15-minute timeout
+**Flow**: `task()` tool → `SubagentExecutor` → background thread → poll 5s → SSE events → result
+**Events**: `task_started`, `task_running`, `task_completed`/`task_failed`/`task_timed_out`
+
+### Tool System (`src/tools/`)
+
+`get_available_tools(groups, include_mcp, model_name, subagent_enabled)` assembles:
+1. **Config-defined tools** - Resolved from `config.yaml` via `resolve_variable()`
+2. **MCP tools** - From enabled MCP servers (lazy initialized, cached with mtime invalidation)
+3. **Built-in tools**:
+   - `present_files` - Make output files visible to user (only `/mnt/user-data/outputs`)
+   - `ask_clarification` - Request clarification (intercepted by ClarificationMiddleware → interrupts)
+   - `view_image` - Read image as base64 (added only if model supports vision)
+4. **Subagent tool** (if enabled):
+   - `task` - Delegate to subagent (description, prompt, subagent_type, max_turns)
+
+**Community tools** (`src/community/`):
+- `tavily/` - Web search (5 results default) and web fetch (4KB limit)
+- `jina_ai/` - Web fetch via Jina reader API with readability extraction
+- `firecrawl/` - Web scraping via Firecrawl API
+- `image_search/` - Image search via DuckDuckGo
+
+### MCP System (`src/mcp/`)
+
+- Uses `langchain-mcp-adapters` `MultiServerMCPClient` for multi-server management
+- **Lazy initialization**: Tools loaded on first use via `get_cached_mcp_tools()`
+- **Cache invalidation**: Detects config file changes via mtime comparison
+- **Transports**: stdio (command-based), SSE, HTTP
+- **OAuth (HTTP/SSE)**: Supports token endpoint flows (`client_credentials`, `refresh_token`) with automatic token refresh + Authorization header injection
+- **Runtime updates**: Gateway API saves to extensions_config.json; LangGraph detects via mtime
+
+### Skills System (`src/skills/`)
+
+- **Location**: `deer-flow/skills/{public,custom}/`
+- **Format**: Directory with `SKILL.md` (YAML frontmatter: name, description, license, allowed-tools)
+- **Loading**: `load_skills()` recursively scans `skills/{public,custom}` for `SKILL.md`, parses metadata, and reads enabled state from extensions_config.json
+- **Injection**: Enabled skills listed in agent system prompt with container paths
+- **Installation**: `POST /api/skills/install` extracts .skill ZIP archive to custom/ directory
+
+### Model Factory (`src/models/factory.py`)
+
+- `create_chat_model(name, thinking_enabled)` instantiates LLM from config via reflection
+- Supports `thinking_enabled` flag with per-model `when_thinking_enabled` overrides
+- Supports `supports_vision` flag for image understanding models
+- Config values starting with `$` resolved as environment variables
+- Missing provider modules surface actionable install hints from reflection resolvers (for example `uv add langchain-google-genai`)
+
+### Memory System (`src/agents/memory/`)
+
+**Components**:
+- `updater.py` - LLM-based memory updates with fact extraction and atomic file I/O
+- `queue.py` - Debounced update queue (per-thread deduplication, configurable wait time)
+- `prompt.py` - Prompt templates for memory updates
+
+**Data Structure** (stored in `backend/.deer-flow/memory.json`):
+- **User Context**: `workContext`, `personalContext`, `topOfMind` (1-3 sentence summaries)
+- **History**: `recentMonths`, `earlierContext`, `longTermBackground`
+- **Facts**: Discrete facts with `id`, `content`, `category` (preference/knowledge/context/behavior/goal), `confidence` (0-1), `createdAt`, `source`
+
+**Workflow**:
+1. `MemoryMiddleware` filters messages (user inputs + final AI responses) and queues conversation
+2. Queue debounces (30s default), batches updates, deduplicates per-thread
+3. Background thread invokes LLM to extract context updates and facts
+4. Applies updates atomically (temp file + rename) with cache invalidation
+5. Next interaction injects top 15 facts + context into `<memory>` tags in system prompt
+
+**Configuration** (`config.yaml` → `memory`):
+- `enabled` / `injection_enabled` - Master switches
+- `storage_path` - Path to memory.json
+- `debounce_seconds` - Wait time before processing (default: 30)
+- `model_name` - LLM for updates (null = default model)
+- `max_facts` / `fact_confidence_threshold` - Fact storage limits (100 / 0.7)
+- `max_injection_tokens` - Token limit for prompt injection (2000)
+
+### Reflection System (`src/reflection/`)
+
+- `resolve_variable(path)` - Import module and return variable (e.g., `module.path:variable_name`)
+- `resolve_class(path, base_class)` - Import and validate class against base class
+
+### Config Schema
+
+**`config.yaml`** key sections:
+- `models[]` - LLM configs with `use` class path, `supports_thinking`, `supports_vision`, provider-specific fields
+- `tools[]` - Tool configs with `use` variable path and `group`
+- `tool_groups[]` - Logical groupings for tools
+- `sandbox.use` - Sandbox provider class path
+- `skills.path` / `skills.container_path` - Host and container paths to skills directory
+- `title` - Auto-title generation (enabled, max_words, max_chars, prompt_template)
+- `summarization` - Context summarization (enabled, trigger conditions, keep policy)
+- `subagents.enabled` - Master switch for subagent delegation
+- `memory` - Memory system (enabled, storage_path, debounce_seconds, model_name, max_facts, fact_confidence_threshold, injection_enabled, max_injection_tokens)
+
+**`extensions_config.json`**:
+- `mcpServers` - Map of server name → config (enabled, type, command, args, env, url, headers, oauth, description)
+- `skills` - Map of skill name → state (enabled)
+
+Both can be modified at runtime via Gateway API endpoints or `DeerFlowClient` methods.
+
+### Embedded Client (`src/client.py`)
+
+`DeerFlowClient` provides direct in-process access to all DeerFlow capabilities without HTTP services. All return types align with the Gateway API response schemas, so consumer code works identically in HTTP and embedded modes.
+
+**Architecture**: Imports the same `src/` modules that LangGraph Server and Gateway API use. Shares the same config files and data directories. No FastAPI dependency.
+
+**Agent Conversation** (replaces LangGraph Server):
+- `chat(message, thread_id)` — synchronous, returns final text
+- `stream(message, thread_id)` — yields `StreamEvent` aligned with LangGraph SSE protocol:
+  - `"values"` — full state snapshot (title, messages, artifacts)
+  - `"messages-tuple"` — per-message update (AI text, tool calls, tool results)
+  - `"end"` — stream finished
+- Agent created lazily via `create_agent()` + `_build_middlewares()`, same as `make_lead_agent`
+- Supports `checkpointer` parameter for state persistence across turns
+- `reset_agent()` forces agent recreation (e.g. after memory or skill changes)
+
+**Gateway Equivalent Methods** (replaces Gateway API):
+
+| Category | Methods | Return format |
+|----------|---------|---------------|
+| Models | `list_models()`, `get_model(name)` | `{"models": [...]}`, `{name, display_name, ...}` |
+| MCP | `get_mcp_config()`, `update_mcp_config(servers)` | `{"mcp_servers": {...}}` |
+| Skills | `list_skills()`, `get_skill(name)`, `update_skill(name, enabled)`, `install_skill(path)` | `{"skills": [...]}` |
+| Memory | `get_memory()`, `reload_memory()`, `get_memory_config()`, `get_memory_status()` | dict |
+| Uploads | `upload_files(thread_id, files)`, `list_uploads(thread_id)`, `delete_upload(thread_id, filename)` | `{"success": true, "files": [...]}`, `{"files": [...], "count": N}` |
+| Artifacts | `get_artifact(thread_id, path)` → `(bytes, mime_type)` | tuple |
+
+**Key difference from Gateway**: Upload accepts local `Path` objects instead of HTTP `UploadFile`. Artifact returns `(bytes, mime_type)` instead of HTTP Response. `update_mcp_config()` and `update_skill()` automatically invalidate the cached agent.
+
+**Tests**: `tests/test_client.py` (77 unit tests including `TestGatewayConformance`), `tests/test_client_live.py` (live integration tests, requires config.yaml)
+
+**Gateway Conformance Tests** (`TestGatewayConformance`): Validate that every dict-returning client method conforms to the corresponding Gateway Pydantic response model. Each test parses the client output through the Gateway model — if Gateway adds a required field that the client doesn't provide, Pydantic raises `ValidationError` and CI catches the drift. Covers: `ModelsListResponse`, `ModelResponse`, `SkillsListResponse`, `SkillResponse`, `SkillInstallResponse`, `McpConfigResponse`, `UploadResponse`, `MemoryConfigResponse`, `MemoryStatusResponse`.
+
+## Development Workflow
+
+### Test-Driven Development (TDD) — MANDATORY
+
+**Every new feature or bug fix MUST be accompanied by unit tests. No exceptions.**
+
+- Write tests in `backend/tests/` following the existing naming convention `test_<feature>.py`
+- Run the full suite before and after your change: `make test`
+- Tests must pass before a feature is considered complete
+- For lightweight config/utility modules, prefer pure unit tests with no external dependencies
+- If a module causes circular import issues in tests, add a `sys.modules` mock in `tests/conftest.py` (see existing example for `src.subagents.executor`)
+
+```bash
+# Run all tests
+make test
+
+# Run a specific test file
+PYTHONPATH=. uv run pytest tests/test_<feature>.py -v
+```
+
+### Running the Full Application
+
+From the **project root** directory:
+```bash
+make dev
+```
+
+This starts all services and makes the application available at `http://localhost:2026`.
+
+**Nginx routing**:
+- `/api/langgraph/*` → LangGraph Server (2024)
+- `/api/*` (other) → Gateway API (8001)
+- `/` (non-API) → Frontend (3000)
+
+### Running Backend Services Separately
+
+From the **backend** directory:
+
+```bash
+# Terminal 1: LangGraph server
+make dev
+
+# Terminal 2: Gateway API
+make gateway
+```
+
+Direct access (without nginx):
+- LangGraph: `http://localhost:2024`
+- Gateway: `http://localhost:8001`
+
+### Frontend Configuration
+
+The frontend uses environment variables to connect to backend services:
+- `NEXT_PUBLIC_LANGGRAPH_BASE_URL` - Defaults to `/api/langgraph` (through nginx)
+- `NEXT_PUBLIC_BACKEND_BASE_URL` - Defaults to empty string (through nginx)
+
+When using `make dev` from root, the frontend automatically connects through nginx.
+
+## Key Features
+
+### File Upload
+
+Multi-file upload with automatic document conversion:
+- Endpoint: `POST /api/threads/{thread_id}/uploads`
+- Supports: PDF, PPT, Excel, Word documents (converted via `markitdown`)
+- Files stored in thread-isolated directories
+- Agent receives uploaded file list via `UploadsMiddleware`
+
+See [docs/FILE_UPLOAD.md](docs/FILE_UPLOAD.md) for details.
+
+### Plan Mode
+
+TodoList middleware for complex multi-step tasks:
+- Controlled via runtime config: `config.configurable.is_plan_mode = True`
+- Provides `write_todos` tool for task tracking
+- One task in_progress at a time, real-time updates
+
+See [docs/plan_mode_usage.md](docs/plan_mode_usage.md) for details.
+
+### Context Summarization
+
+Automatic conversation summarization when approaching token limits:
+- Configured in `config.yaml` under `summarization` key
+- Trigger types: tokens, messages, or fraction of max input
+- Keeps recent messages while summarizing older ones
+
+See [docs/summarization.md](docs/summarization.md) for details.
+
+### Vision Support
+
+For models with `supports_vision: true`:
+- `ViewImageMiddleware` processes images in conversation
+- `view_image_tool` added to agent's toolset
+- Images automatically converted to base64 and injected into state
+
+## Code Style
+
+- Uses `ruff` for linting and formatting
+- Line length: 240 characters
+- Python 3.12+ with type hints
+- Double quotes, space indentation
+
+## Documentation
+
+See `docs/` directory for detailed documentation:
+- [CONFIGURATION.md](docs/CONFIGURATION.md) - Configuration options
+- [ARCHITECTURE.md](docs/ARCHITECTURE.md) - Architecture details
+- [API.md](docs/API.md) - API reference
+- [SETUP.md](docs/SETUP.md) - Setup guide
+- [FILE_UPLOAD.md](docs/FILE_UPLOAD.md) - File upload feature
+- [PATH_EXAMPLES.md](docs/PATH_EXAMPLES.md) - Path types and usage
+- [summarization.md](docs/summarization.md) - Context summarization
+- [plan_mode_usage.md](docs/plan_mode_usage.md) - Plan mode with TodoList
@@ -0,0 +1,426 @@
+# Contributing to DeerFlow Backend
+
+Thank you for your interest in contributing to DeerFlow! This document provides guidelines and instructions for contributing to the backend codebase.
+
+## Table of Contents
+
+- [Getting Started](#getting-started)
+- [Development Setup](#development-setup)
+- [Project Structure](#project-structure)
+- [Code Style](#code-style)
+- [Making Changes](#making-changes)
+- [Testing](#testing)
+- [Pull Request Process](#pull-request-process)
+- [Architecture Guidelines](#architecture-guidelines)
+
+## Getting Started
+
+### Prerequisites
+
+- Python 3.12 or higher
+- [uv](https://docs.astral.sh/uv/) package manager
+- Git
+- Docker (optional, for Docker sandbox testing)
+
+### Fork and Clone
+
+1. Fork the repository on GitHub
+2. Clone your fork locally:
+   ```bash
+   git clone https://github.com/YOUR_USERNAME/deer-flow.git
+   cd deer-flow
+   ```
+
+## Development Setup
+
+### Install Dependencies
+
+```bash
+# From project root
+cp config.example.yaml config.yaml
+
+# Install backend dependencies
+cd backend
+make install
+```
+
+### Configure Environment
+
+Set up your API keys for testing:
+
+```bash
+export OPENAI_API_KEY="your-api-key"
+# Add other keys as needed
+```
+
+### Run the Development Server
+
+```bash
+# Terminal 1: LangGraph server
+make dev
+
+# Terminal 2: Gateway API
+make gateway
+```
+
+## Project Structure
+
+```
+backend/src/
+├── agents/                  # Agent system
+│   ├── lead_agent/         # Main agent implementation
+│   │   └── agent.py        # Agent factory and creation
+│   ├── middlewares/        # Agent middlewares
+│   │   ├── thread_data_middleware.py
+│   │   ├── sandbox_middleware.py
+│   │   ├── title_middleware.py
+│   │   ├── uploads_middleware.py
+│   │   ├── view_image_middleware.py
+│   │   └── clarification_middleware.py
+│   └── thread_state.py     # Thread state definition
+│
+├── gateway/                 # FastAPI Gateway
+│   ├── app.py              # FastAPI application
+│   └── routers/            # Route handlers
+│       ├── models.py       # /api/models endpoints
+│       ├── mcp.py          # /api/mcp endpoints
+│       ├── skills.py       # /api/skills endpoints
+│       ├── artifacts.py    # /api/threads/.../artifacts
+│       └── uploads.py      # /api/threads/.../uploads
+│
+├── sandbox/                 # Sandbox execution
+│   ├── __init__.py         # Sandbox interface
+│   ├── local.py            # Local sandbox provider
+│   └── tools.py            # Sandbox tools (bash, file ops)
+│
+├── tools/                   # Agent tools
+│   └── builtins/           # Built-in tools
+│       ├── present_file_tool.py
+│       ├── ask_clarification_tool.py
+│       └── view_image_tool.py
+│
+├── mcp/                     # MCP integration
+│   └── manager.py          # MCP server management
+│
+├── models/                  # Model system
+│   └── factory.py          # Model factory
+│
+├── skills/                  # Skills system
+│   └── loader.py           # Skills loader
+│
+├── config/                  # Configuration
+│   ├── app_config.py       # Main app config
+│   ├── extensions_config.py # Extensions config
+│   └── summarization_config.py
+│
+├── community/               # Community tools
+│   ├── tavily/             # Tavily web search
+│   ├── jina/               # Jina web fetch
+│   ├── firecrawl/          # Firecrawl scraping
+│   └── aio_sandbox/        # Docker sandbox
+│
+├── reflection/              # Dynamic loading
+│   └── __init__.py         # Module resolution
+│
+└── utils/                   # Utilities
+    └── __init__.py
+```
+
+## Code Style
+
+### Linting and Formatting
+
+We use `ruff` for both linting and formatting:
+
+```bash
+# Check for issues
+make lint
+
+# Auto-fix and format
+make format
+```
+
+### Style Guidelines
+
+- **Line length**: 240 characters maximum
+- **Python version**: 3.12+ features allowed
+- **Type hints**: Use type hints for function signatures
+- **Quotes**: Double quotes for strings
+- **Indentation**: 4 spaces (no tabs)
+- **Imports**: Group by standard library, third-party, local
+
+### Docstrings
+
+Use docstrings for public functions and classes:
+
+```python
+def create_chat_model(name: str, thinking_enabled: bool = False) -> BaseChatModel:
+    """Create a chat model instance from configuration.
+
+    Args:
+        name: The model name as defined in config.yaml
+        thinking_enabled: Whether to enable extended thinking
+
+    Returns:
+        A configured LangChain chat model instance
+
+    Raises:
+        ValueError: If the model name is not found in configuration
+    """
+    ...
+```
+
+## Making Changes
+
+### Branch Naming
+
+Use descriptive branch names:
+
+- `feature/add-new-tool` - New features
+- `fix/sandbox-timeout` - Bug fixes
+- `docs/update-readme` - Documentation
+- `refactor/config-system` - Code refactoring
+
+### Commit Messages
+
+Write clear, concise commit messages:
+
+```
+feat: add support for Claude 3.5 model
+
+- Add model configuration in config.yaml
+- Update model factory to handle Claude-specific settings
+- Add tests for new model
+```
+
+Prefix types:
+- `feat:` - New feature
+- `fix:` - Bug fix
+- `docs:` - Documentation
+- `refactor:` - Code refactoring
+- `test:` - Tests
+- `chore:` - Build/config changes
+
+## Testing
+
+### Running Tests
+
+```bash
+uv run pytest
+```
+
+### Writing Tests
+
+Place tests in the `tests/` directory mirroring the source structure:
+
+```
+tests/
+├── test_models/
+│   └── test_factory.py
+├── test_sandbox/
+│   └── test_local.py
+└── test_gateway/
+    └── test_models_router.py
+```
+
+Example test:
+
+```python
+import pytest
+from src.models.factory import create_chat_model
+
+def test_create_chat_model_with_valid_name():
+    """Test that a valid model name creates a model instance."""
+    model = create_chat_model("gpt-4")
+    assert model is not None
+
+def test_create_chat_model_with_invalid_name():
+    """Test that an invalid model name raises ValueError."""
+    with pytest.raises(ValueError):
+        create_chat_model("nonexistent-model")
+```
+
+## Pull Request Process
+
+### Before Submitting
+
+1. **Ensure tests pass**: `uv run pytest`
+2. **Run linter**: `make lint`
+3. **Format code**: `make format`
+4. **Update documentation** if needed
+
+### PR Description
+
+Include in your PR description:
+
+- **What**: Brief description of changes
+- **Why**: Motivation for the change
+- **How**: Implementation approach
+- **Testing**: How you tested the changes
+
+### Review Process
+
+1. Submit PR with clear description
+2. Address review feedback
+3. Ensure CI passes
+4. Maintainer will merge when approved
+
+## Architecture Guidelines
+
+### Adding New Tools
+
+1. Create tool in `src/tools/builtins/` or `src/community/`:
+
+```python
+# src/tools/builtins/my_tool.py
+from langchain_core.tools import tool
+
+@tool
+def my_tool(param: str) -> str:
+    """Tool description for the agent.
+
+    Args:
+        param: Description of the parameter
+
+    Returns:
+        Description of return value
+    """
+    return f"Result: {param}"
+```
+
+2. Register in `config.yaml`:
+
+```yaml
+tools:
+  - name: my_tool
+    group: my_group
+    use: src.tools.builtins.my_tool:my_tool
+```
+
+### Adding New Middleware
+
+1. Create middleware in `src/agents/middlewares/`:
+
+```python
+# src/agents/middlewares/my_middleware.py
+from langchain.agents.middleware import BaseMiddleware
+from langchain_core.runnables import RunnableConfig
+
+class MyMiddleware(BaseMiddleware):
+    """Middleware description."""
+
+    def transform_state(self, state: dict, config: RunnableConfig) -> dict:
+        """Transform the state before agent execution."""
+        # Modify state as needed
+        return state
+```
+
+2. Register in `src/agents/lead_agent/agent.py`:
+
+```python
+middlewares = [
+    ThreadDataMiddleware(),
+    SandboxMiddleware(),
+    MyMiddleware(),  # Add your middleware
+    TitleMiddleware(),
+    ClarificationMiddleware(),
+]
+```
+
+### Adding New API Endpoints
+
+1. Create router in `src/gateway/routers/`:
+
+```python
+# src/gateway/routers/my_router.py
+from fastapi import APIRouter
+
+router = APIRouter(prefix="/my-endpoint", tags=["my-endpoint"])
+
+@router.get("/")
+async def get_items():
+    """Get all items."""
+    return {"items": []}
+
+@router.post("/")
+async def create_item(data: dict):
+    """Create a new item."""
+    return {"created": data}
+```
+
+2. Register in `src/gateway/app.py`:
+
+```python
+from src.gateway.routers import my_router
+
+app.include_router(my_router.router)
+```
+
+### Configuration Changes
+
+When adding new configuration options:
+
+1. Update `src/config/app_config.py` with new fields
+2. Add default values in `config.example.yaml`
+3. Document in `docs/CONFIGURATION.md`
+
+### MCP Server Integration
+
+To add support for a new MCP server:
+
+1. Add configuration in `extensions_config.json`:
+
+```json
+{
+  "mcpServers": {
+    "my-server": {
+      "enabled": true,
+      "type": "stdio",
+      "command": "npx",
+      "args": ["-y", "@my-org/mcp-server"],
+      "description": "My MCP Server"
+    }
+  }
+}
+```
+
+2. Update `extensions_config.example.json` with the new server
+
+### Skills Development
+
+To create a new skill:
+
+1. Create directory in `skills/public/` or `skills/custom/`:
+
+```
+skills/public/my-skill/
+└── SKILL.md
+```
+
+2. Write `SKILL.md` with YAML front matter:
+
+```markdown
+---
+name: My Skill
+description: What this skill does
+license: MIT
+allowed-tools:
+  - read_file
+  - write_file
+  - bash
+---
+
+# My Skill
+
+Instructions for the agent when this skill is enabled...
+```
+
+## Questions?
+
+If you have questions about contributing:
+
+1. Check existing documentation in `docs/`
+2. Look for similar issues or PRs on GitHub
+3. Open a discussion or issue on GitHub
+
+Thank you for contributing to DeerFlow!
@@ -0,0 +1,28 @@
+# Backend Development Dockerfile
+FROM python:3.12-slim
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    curl \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install uv
+RUN curl -LsSf https://astral.sh/uv/install.sh | sh
+ENV PATH="/root/.local/bin:$PATH"
+
+# Set working directory
+WORKDIR /app
+
+# Copy frontend source code
+COPY backend ./backend
+
+# Install dependencies with cache mount
+RUN --mount=type=cache,target=/root/.cache/uv \
+    sh -c "cd backend && uv sync"
+
+# Expose ports (gateway: 8001, langgraph: 2024)
+EXPOSE 8001 2024
+
+# Default command (can be overridden in docker-compose)
+CMD ["sh", "-c", "uv run uvicorn src.gateway.app:app --host 0.0.0.0 --port 8001"]
@@ -0,0 +1,17 @@
+install:
+	uv sync
+
+dev:
+	uv run langgraph dev --no-browser --allow-blocking --no-reload
+
+gateway:
+	uv run uvicorn src.gateway.app:app --host 0.0.0.0 --port 8001
+
+test:
+	PYTHONPATH=. uv run pytest tests/ -v
+
+lint:
+	uvx ruff check .
+
+format:
+	uvx ruff check . --fix && uvx ruff format .
@@ -0,0 +1,360 @@
+# DeerFlow Backend
+
+DeerFlow is a LangGraph-based AI super agent with sandbox execution, persistent memory, and extensible tool integration. The backend enables AI agents to execute code, browse the web, manage files, delegate tasks to subagents, and retain context across conversations - all in isolated, per-thread environments.
+
+---
+
+## Architecture
+
+```
+                        ┌──────────────────────────────────────┐
+                        │          Nginx (Port 2026)           │
+                        │      Unified reverse proxy           │
+                        └───────┬──────────────────┬───────────┘
+                                │                  │
+              /api/langgraph/*  │                  │  /api/* (other)
+                                ▼                  ▼
+               ┌────────────────────┐  ┌────────────────────────┐
+               │ LangGraph Server   │  │   Gateway API (8001)   │
+               │    (Port 2024)     │  │   FastAPI REST         │
+               │                    │  │                        │
+               │ ┌────────────────┐ │  │ Models, MCP, Skills,   │
+               │ │  Lead Agent    │ │  │ Memory, Uploads,       │
+               │ │  ┌──────────┐  │ │  │ Artifacts              │
+               │ │  │Middleware│  │ │  └────────────────────────┘
+               │ │  │  Chain   │  │ │
+               │ │  └──────────┘  │ │
+               │ │  ┌──────────┐  │ │
+               │ │  │  Tools   │  │ │
+               │ │  └──────────┘  │ │
+               │ │  ┌──────────┐  │ │
+               │ │  │Subagents │  │ │
+               │ │  └──────────┘  │ │
+               │ └────────────────┘ │
+               └────────────────────┘
+```
+
+**Request Routing** (via Nginx):
+- `/api/langgraph/*` → LangGraph Server - agent interactions, threads, streaming
+- `/api/*` (other) → Gateway API - models, MCP, skills, memory, artifacts, uploads
+- `/` (non-API) → Frontend - Next.js web interface
+
+---
+
+## Core Components
+
+### Lead Agent
+
+The single LangGraph agent (`lead_agent`) is the runtime entry point, created via `make_lead_agent(config)`. It combines:
+
+- **Dynamic model selection** with thinking and vision support
+- **Middleware chain** for cross-cutting concerns (9 middlewares)
+- **Tool system** with sandbox, MCP, community, and built-in tools
+- **Subagent delegation** for parallel task execution
+- **System prompt** with skills injection, memory context, and working directory guidance
+
+### Middleware Chain
+
+Middlewares execute in strict order, each handling a specific concern:
+
+| # | Middleware | Purpose |
+|---|-----------|---------|
+| 1 | **ThreadDataMiddleware** | Creates per-thread isolated directories (workspace, uploads, outputs) |
+| 2 | **UploadsMiddleware** | Injects newly uploaded files into conversation context |
+| 3 | **SandboxMiddleware** | Acquires sandbox environment for code execution |
+| 4 | **SummarizationMiddleware** | Reduces context when approaching token limits (optional) |
+| 5 | **TodoListMiddleware** | Tracks multi-step tasks in plan mode (optional) |
+| 6 | **TitleMiddleware** | Auto-generates conversation titles after first exchange |
+| 7 | **MemoryMiddleware** | Queues conversations for async memory extraction |
+| 8 | **ViewImageMiddleware** | Injects image data for vision-capable models (conditional) |
+| 9 | **ClarificationMiddleware** | Intercepts clarification requests and interrupts execution (must be last) |
+
+### Sandbox System
+
+Per-thread isolated execution with virtual path translation:
+
+- **Abstract interface**: `execute_command`, `read_file`, `write_file`, `list_dir`
+- **Providers**: `LocalSandboxProvider` (filesystem) and `AioSandboxProvider` (Docker, in community/)
+- **Virtual paths**: `/mnt/user-data/{workspace,uploads,outputs}` → thread-specific physical directories
+- **Skills path**: `/mnt/skills` → `deer-flow/skills/` directory
+- **Skills loading**: Recursively discovers nested `SKILL.md` files under `skills/{public,custom}` and preserves nested container paths
+- **Tools**: `bash`, `ls`, `read_file`, `write_file`, `str_replace`
+
+### Subagent System
+
+Async task delegation with concurrent execution:
+
+- **Built-in agents**: `general-purpose` (full toolset) and `bash` (command specialist)
+- **Concurrency**: Max 3 subagents per turn, 15-minute timeout
+- **Execution**: Background thread pools with status tracking and SSE events
+- **Flow**: Agent calls `task()` tool → executor runs subagent in background → polls for completion → returns result
+
+### Memory System
+
+LLM-powered persistent context retention across conversations:
+
+- **Automatic extraction**: Analyzes conversations for user context, facts, and preferences
+- **Structured storage**: User context (work, personal, top-of-mind), history, and confidence-scored facts
+- **Debounced updates**: Batches updates to minimize LLM calls (configurable wait time)
+- **System prompt injection**: Top facts + context injected into agent prompts
+- **Storage**: JSON file with mtime-based cache invalidation
+
+### Tool Ecosystem
+
+| Category | Tools |
+|----------|-------|
+| **Sandbox** | `bash`, `ls`, `read_file`, `write_file`, `str_replace` |
+| **Built-in** | `present_files`, `ask_clarification`, `view_image`, `task` (subagent) |
+| **Community** | Tavily (web search), Jina AI (web fetch), Firecrawl (scraping), DuckDuckGo (image search) |
+| **MCP** | Any Model Context Protocol server (stdio, SSE, HTTP transports) |
+| **Skills** | Domain-specific workflows injected via system prompt |
+
+### Gateway API
+
+FastAPI application providing REST endpoints for frontend integration:
+
+| Route | Purpose |
+|-------|---------|
+| `GET /api/models` | List available LLM models |
+| `GET/PUT /api/mcp/config` | Manage MCP server configurations |
+| `GET/PUT /api/skills` | List and manage skills |
+| `POST /api/skills/install` | Install skill from `.skill` archive |
+| `GET /api/memory` | Retrieve memory data |
+| `POST /api/memory/reload` | Force memory reload |
+| `GET /api/memory/config` | Memory configuration |
+| `GET /api/memory/status` | Combined config + data |
+| `POST /api/threads/{id}/uploads` | Upload files (auto-converts PDF/PPT/Excel/Word to Markdown) |
+| `GET /api/threads/{id}/uploads/list` | List uploaded files |
+| `GET /api/threads/{id}/artifacts/{path}` | Serve generated artifacts |
+
+---
+
+## Quick Start
+
+### Prerequisites
+
+- Python 3.12+
+- [uv](https://docs.astral.sh/uv/) package manager
+- API keys for your chosen LLM provider
+
+### Installation
+
+```bash
+cd deer-flow
+
+# Copy configuration files
+cp config.example.yaml config.yaml
+
+# Install backend dependencies
+cd backend
+make install
+```
+
+### Configuration
+
+Edit `config.yaml` in the project root:
+
+```yaml
+models:
+  - name: gpt-4o
+    display_name: GPT-4o
+    use: langchain_openai:ChatOpenAI
+    model: gpt-4o
+    api_key: $OPENAI_API_KEY
+    supports_thinking: false
+    supports_vision: true
+```
+
+Set your API keys:
+
+```bash
+export OPENAI_API_KEY="your-api-key-here"
+```
+
+### Running
+
+**Full Application** (from project root):
+
+```bash
+make dev  # Starts LangGraph + Gateway + Frontend + Nginx
+```
+
+Access at: http://localhost:2026
+
+**Backend Only** (from backend directory):
+
+```bash
+# Terminal 1: LangGraph server
+make dev
+
+# Terminal 2: Gateway API
+make gateway
+```
+
+Direct access: LangGraph at http://localhost:2024, Gateway at http://localhost:8001
+
+---
+
+## Project Structure
+
+```
+backend/
+├── src/
+│   ├── agents/                  # Agent system
+│   │   ├── lead_agent/         # Main agent (factory, prompts)
+│   │   ├── middlewares/        # 9 middleware components
+│   │   ├── memory/             # Memory extraction & storage
+│   │   └── thread_state.py    # ThreadState schema
+│   ├── gateway/                # FastAPI Gateway API
+│   │   ├── app.py             # Application setup
+│   │   └── routers/           # 6 route modules
+│   ├── sandbox/                # Sandbox execution
+│   │   ├── local/             # Local filesystem provider
+│   │   ├── sandbox.py         # Abstract interface
+│   │   ├── tools.py           # bash, ls, read/write/str_replace
+│   │   └── middleware.py      # Sandbox lifecycle
+│   ├── subagents/              # Subagent delegation
+│   │   ├── builtins/          # general-purpose, bash agents
+│   │   ├── executor.py        # Background execution engine
+│   │   └── registry.py        # Agent registry
+│   ├── tools/builtins/         # Built-in tools
+│   ├── mcp/                    # MCP protocol integration
+│   ├── models/                 # Model factory
+│   ├── skills/                 # Skill discovery & loading
+│   ├── config/                 # Configuration system
+│   ├── community/              # Community tools & providers
+│   ├── reflection/             # Dynamic module loading
+│   └── utils/                  # Utilities
+├── docs/                       # Documentation
+├── tests/                      # Test suite
+├── langgraph.json              # LangGraph server configuration
+├── pyproject.toml              # Python dependencies
+├── Makefile                    # Development commands
+└── Dockerfile                  # Container build
+```
+
+---
+
+## Configuration
+
+### Main Configuration (`config.yaml`)
+
+Place in project root. Config values starting with `$` resolve as environment variables.
+
+Key sections:
+- `models` - LLM configurations with class paths, API keys, thinking/vision flags
+- `tools` - Tool definitions with module paths and groups
+- `tool_groups` - Logical tool groupings
+- `sandbox` - Execution environment provider
+- `skills` - Skills directory paths
+- `title` - Auto-title generation settings
+- `summarization` - Context summarization settings
+- `subagents` - Subagent system (enabled/disabled)
+- `memory` - Memory system settings (enabled, storage, debounce, facts limits)
+
+Provider note:
+- `models[*].use` references provider classes by module path (for example `langchain_openai:ChatOpenAI`).
+- If a provider module is missing, DeerFlow now returns an actionable error with install guidance (for example `uv add langchain-google-genai`).
+
+### Extensions Configuration (`extensions_config.json`)
+
+MCP servers and skill states in a single file:
+
+```json
+{
+  "mcpServers": {
+    "github": {
+      "enabled": true,
+      "type": "stdio",
+      "command": "npx",
+      "args": ["-y", "@modelcontextprotocol/server-github"],
+      "env": {"GITHUB_TOKEN": "$GITHUB_TOKEN"}
+    },
+    "secure-http": {
+      "enabled": true,
+      "type": "http",
+      "url": "https://api.example.com/mcp",
+      "oauth": {
+        "enabled": true,
+        "token_url": "https://auth.example.com/oauth/token",
+        "grant_type": "client_credentials",
+        "client_id": "$MCP_OAUTH_CLIENT_ID",
+        "client_secret": "$MCP_OAUTH_CLIENT_SECRET"
+      }
+    }
+  },
+  "skills": {
+    "pdf-processing": {"enabled": true}
+  }
+}
+```
+
+### Environment Variables
+
+- `DEER_FLOW_CONFIG_PATH` - Override config.yaml location
+- `DEER_FLOW_EXTENSIONS_CONFIG_PATH` - Override extensions_config.json location
+- Model API keys: `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `DEEPSEEK_API_KEY`, etc.
+- Tool API keys: `TAVILY_API_KEY`, `GITHUB_TOKEN`, etc.
+
+---
+
+## Development
+
+### Commands
+
+```bash
+make install    # Install dependencies
+make dev        # Run LangGraph server (port 2024)
+make gateway    # Run Gateway API (port 8001)
+make lint       # Run linter (ruff)
+make format     # Format code (ruff)
+```
+
+### Code Style
+
+- **Linter/Formatter**: `ruff`
+- **Line length**: 240 characters
+- **Python**: 3.12+ with type hints
+- **Quotes**: Double quotes
+- **Indentation**: 4 spaces
+
+### Testing
+
+```bash
+uv run pytest
+```
+
+---
+
+## Technology Stack
+
+- **LangGraph** (1.0.6+) - Agent framework and multi-agent orchestration
+- **LangChain** (1.2.3+) - LLM abstractions and tool system
+- **FastAPI** (0.115.0+) - Gateway REST API
+- **langchain-mcp-adapters** - Model Context Protocol support
+- **agent-sandbox** - Sandboxed code execution
+- **markitdown** - Multi-format document conversion
+- **tavily-python** / **firecrawl-py** - Web search and scraping
+
+---
+
+## Documentation
+
+- [Configuration Guide](docs/CONFIGURATION.md)
+- [Architecture Details](docs/ARCHITECTURE.md)
+- [API Reference](docs/API.md)
+- [File Upload](docs/FILE_UPLOAD.md)
+- [Path Examples](docs/PATH_EXAMPLES.md)
+- [Context Summarization](docs/summarization.md)
+- [Plan Mode](docs/plan_mode_usage.md)
+- [Setup Guide](docs/SETUP.md)
+
+---
+
+## License
+
+See the [LICENSE](../LICENSE) file in the project root.
+
+## Contributing
+
+See [CONTRIBUTING.md](CONTRIBUTING.md) for contribution guidelines.
@@ -0,0 +1,92 @@
+#!/usr/bin/env python
+"""
+Debug script for lead_agent.
+Run this file directly in VS Code with breakpoints.
+
+Usage:
+    1. Set breakpoints in agent.py or other files
+    2. Press F5 or use "Run and Debug" panel
+    3. Input messages in the terminal to interact with the agent
+"""
+
+import asyncio
+import logging
+import os
+import sys
+
+# Ensure we can import from src
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+
+# Load environment variables
+from dotenv import load_dotenv
+from langchain_core.messages import HumanMessage
+
+from src.agents import make_lead_agent
+
+load_dotenv()
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+)
+
+
+async def main():
+    # Initialize MCP tools at startup
+    try:
+        from src.mcp import initialize_mcp_tools
+
+        await initialize_mcp_tools()
+    except Exception as e:
+        print(f"Warning: Failed to initialize MCP tools: {e}")
+
+    # Create agent with default config
+    config = {
+        "configurable": {
+            "thread_id": "debug-thread-001",
+            "thinking_enabled": True,
+            "is_plan_mode": True,
+            # Uncomment to use a specific model
+            "model_name": "kimi-k2.5",
+        }
+    }
+
+    agent = make_lead_agent(config)
+
+    print("=" * 50)
+    print("Lead Agent Debug Mode")
+    print("Type 'quit' or 'exit' to stop")
+    print("=" * 50)
+
+    while True:
+        try:
+            user_input = input("\nYou: ").strip()
+            if not user_input:
+                continue
+            if user_input.lower() in ("quit", "exit"):
+                print("Goodbye!")
+                break
+
+            # Invoke the agent
+            state = {"messages": [HumanMessage(content=user_input)]}
+            result = await agent.ainvoke(state, config=config, context={"thread_id": "debug-thread-001"})
+
+            # Print the response
+            if result.get("messages"):
+                last_message = result["messages"][-1]
+                print(f"\nAgent: {last_message.content}")
+
+        except KeyboardInterrupt:
+            print("\nInterrupted. Goodbye!")
+            break
+        except Exception as e:
+            print(f"\nError: {e}")
+            import traceback
+
+            traceback.print_exc()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
@@ -0,0 +1,607 @@
+# API Reference
+
+This document provides a complete reference for the DeerFlow backend APIs.
+
+## Overview
+
+DeerFlow backend exposes two sets of APIs:
+
+1. **LangGraph API** - Agent interactions, threads, and streaming (`/api/langgraph/*`)
+2. **Gateway API** - Models, MCP, skills, uploads, and artifacts (`/api/*`)
+
+All APIs are accessed through the Nginx reverse proxy at port 2026.
+
+## LangGraph API
+
+Base URL: `/api/langgraph`
+
+The LangGraph API is provided by the LangGraph server and follows the LangGraph SDK conventions.
+
+### Threads
+
+#### Create Thread
+
+```http
+POST /api/langgraph/threads
+Content-Type: application/json
+```
+
+**Request Body:**
+```json
+{
+  "metadata": {}
+}
+```
+
+**Response:**
+```json
+{
+  "thread_id": "abc123",
+  "created_at": "2024-01-15T10:30:00Z",
+  "metadata": {}
+}
+```
+
+#### Get Thread State
+
+```http
+GET /api/langgraph/threads/{thread_id}/state
+```
+
+**Response:**
+```json
+{
+  "values": {
+    "messages": [...],
+    "sandbox": {...},
+    "artifacts": [...],
+    "thread_data": {...},
+    "title": "Conversation Title"
+  },
+  "next": [],
+  "config": {...}
+}
+```
+
+### Runs
+
+#### Create Run
+
+Execute the agent with input.
+
+```http
+POST /api/langgraph/threads/{thread_id}/runs
+Content-Type: application/json
+```
+
+**Request Body:**
+```json
+{
+  "input": {
+    "messages": [
+      {
+        "role": "user",
+        "content": "Hello, can you help me?"
+      }
+    ]
+  },
+  "config": {
+    "configurable": {
+      "model_name": "gpt-4",
+      "thinking_enabled": false,
+      "is_plan_mode": false
+    }
+  },
+  "stream_mode": ["values", "messages"]
+}
+```
+
+**Configurable Options:**
+- `model_name` (string): Override the default model
+- `thinking_enabled` (boolean): Enable extended thinking for supported models
+- `is_plan_mode` (boolean): Enable TodoList middleware for task tracking
+
+**Response:** Server-Sent Events (SSE) stream
+
+```
+event: values
+data: {"messages": [...], "title": "..."}
+
+event: messages
+data: {"content": "Hello! I'd be happy to help.", "role": "assistant"}
+
+event: end
+data: {}
+```
+
+#### Get Run History
+
+```http
+GET /api/langgraph/threads/{thread_id}/runs
+```
+
+**Response:**
+```json
+{
+  "runs": [
+    {
+      "run_id": "run123",
+      "status": "success",
+      "created_at": "2024-01-15T10:30:00Z"
+    }
+  ]
+}
+```
+
+#### Stream Run
+
+Stream responses in real-time.
+
+```http
+POST /api/langgraph/threads/{thread_id}/runs/stream
+Content-Type: application/json
+```
+
+Same request body as Create Run. Returns SSE stream.
+
+---
+
+## Gateway API
+
+Base URL: `/api`
+
+### Models
+
+#### List Models
+
+Get all available LLM models from configuration.
+
+```http
+GET /api/models
+```
+
+**Response:**
+```json
+{
+  "models": [
+    {
+      "name": "gpt-4",
+      "display_name": "GPT-4",
+      "supports_thinking": false,
+      "supports_vision": true
+    },
+    {
+      "name": "claude-3-opus",
+      "display_name": "Claude 3 Opus",
+      "supports_thinking": false,
+      "supports_vision": true
+    },
+    {
+      "name": "deepseek-v3",
+      "display_name": "DeepSeek V3",
+      "supports_thinking": true,
+      "supports_vision": false
+    }
+  ]
+}
+```
+
+#### Get Model Details
+
+```http
+GET /api/models/{model_name}
+```
+
+**Response:**
+```json
+{
+  "name": "gpt-4",
+  "display_name": "GPT-4",
+  "model": "gpt-4",
+  "max_tokens": 4096,
+  "supports_thinking": false,
+  "supports_vision": true
+}
+```
+
+### MCP Configuration
+
+#### Get MCP Config
+
+Get current MCP server configurations.
+
+```http
+GET /api/mcp/config
+```
+
+**Response:**
+```json
+{
+  "mcpServers": {
+    "github": {
+      "enabled": true,
+      "type": "stdio",
+      "command": "npx",
+      "args": ["-y", "@modelcontextprotocol/server-github"],
+      "env": {
+        "GITHUB_TOKEN": "***"
+      },
+      "description": "GitHub operations"
+    },
+    "filesystem": {
+      "enabled": false,
+      "type": "stdio",
+      "command": "npx",
+      "args": ["-y", "@modelcontextprotocol/server-filesystem"],
+      "description": "File system access"
+    }
+  }
+}
+```
+
+#### Update MCP Config
+
+Update MCP server configurations.
+
+```http
+PUT /api/mcp/config
+Content-Type: application/json
+```
+
+**Request Body:**
+```json
+{
+  "mcpServers": {
+    "github": {
+      "enabled": true,
+      "type": "stdio",
+      "command": "npx",
+      "args": ["-y", "@modelcontextprotocol/server-github"],
+      "env": {
+        "GITHUB_TOKEN": "$GITHUB_TOKEN"
+      },
+      "description": "GitHub operations"
+    }
+  }
+}
+```
+
+**Response:**
+```json
+{
+  "success": true,
+  "message": "MCP configuration updated"
+}
+```
+
+### Skills
+
+#### List Skills
+
+Get all available skills.
+
+```http
+GET /api/skills
+```
+
+**Response:**
+```json
+{
+  "skills": [
+    {
+      "name": "pdf-processing",
+      "display_name": "PDF Processing",
+      "description": "Handle PDF documents efficiently",
+      "enabled": true,
+      "license": "MIT",
+      "path": "public/pdf-processing"
+    },
+    {
+      "name": "frontend-design",
+      "display_name": "Frontend Design",
+      "description": "Design and build frontend interfaces",
+      "enabled": false,
+      "license": "MIT",
+      "path": "public/frontend-design"
+    }
+  ]
+}
+```
+
+#### Get Skill Details
+
+```http
+GET /api/skills/{skill_name}
+```
+
+**Response:**
+```json
+{
+  "name": "pdf-processing",
+  "display_name": "PDF Processing",
+  "description": "Handle PDF documents efficiently",
+  "enabled": true,
+  "license": "MIT",
+  "path": "public/pdf-processing",
+  "allowed_tools": ["read_file", "write_file", "bash"],
+  "content": "# PDF Processing\n\nInstructions for the agent..."
+}
+```
+
+#### Enable Skill
+
+```http
+POST /api/skills/{skill_name}/enable
+```
+
+**Response:**
+```json
+{
+  "success": true,
+  "message": "Skill 'pdf-processing' enabled"
+}
+```
+
+#### Disable Skill
+
+```http
+POST /api/skills/{skill_name}/disable
+```
+
+**Response:**
+```json
+{
+  "success": true,
+  "message": "Skill 'pdf-processing' disabled"
+}
+```
+
+#### Install Skill
+
+Install a skill from a `.skill` file.
+
+```http
+POST /api/skills/install
+Content-Type: multipart/form-data
+```
+
+**Request Body:**
+- `file`: The `.skill` file to install
+
+**Response:**
+```json
+{
+  "success": true,
+  "message": "Skill 'my-skill' installed successfully",
+  "skill": {
+    "name": "my-skill",
+    "display_name": "My Skill",
+    "path": "custom/my-skill"
+  }
+}
+```
+
+### File Uploads
+
+#### Upload Files
+
+Upload one or more files to a thread.
+
+```http
+POST /api/threads/{thread_id}/uploads
+Content-Type: multipart/form-data
+```
+
+**Request Body:**
+- `files`: One or more files to upload
+
+**Response:**
+```json
+{
+  "success": true,
+  "files": [
+    {
+      "filename": "document.pdf",
+      "size": 1234567,
+      "path": ".deer-flow/threads/abc123/user-data/uploads/document.pdf",
+      "virtual_path": "/mnt/user-data/uploads/document.pdf",
+      "artifact_url": "/api/threads/abc123/artifacts/mnt/user-data/uploads/document.pdf",
+      "markdown_file": "document.md",
+      "markdown_path": ".deer-flow/threads/abc123/user-data/uploads/document.md",
+      "markdown_virtual_path": "/mnt/user-data/uploads/document.md",
+      "markdown_artifact_url": "/api/threads/abc123/artifacts/mnt/user-data/uploads/document.md"
+    }
+  ],
+  "message": "Successfully uploaded 1 file(s)"
+}
+```
+
+**Supported Document Formats** (auto-converted to Markdown):
+- PDF (`.pdf`)
+- PowerPoint (`.ppt`, `.pptx`)
+- Excel (`.xls`, `.xlsx`)
+- Word (`.doc`, `.docx`)
+
+#### List Uploaded Files
+
+```http
+GET /api/threads/{thread_id}/uploads/list
+```
+
+**Response:**
+```json
+{
+  "files": [
+    {
+      "filename": "document.pdf",
+      "size": 1234567,
+      "path": ".deer-flow/threads/abc123/user-data/uploads/document.pdf",
+      "virtual_path": "/mnt/user-data/uploads/document.pdf",
+      "artifact_url": "/api/threads/abc123/artifacts/mnt/user-data/uploads/document.pdf",
+      "extension": ".pdf",
+      "modified": 1705997600.0
+    }
+  ],
+  "count": 1
+}
+```
+
+#### Delete File
+
+```http
+DELETE /api/threads/{thread_id}/uploads/{filename}
+```
+
+**Response:**
+```json
+{
+  "success": true,
+  "message": "Deleted document.pdf"
+}
+```
+
+### Artifacts
+
+#### Get Artifact
+
+Download or view an artifact generated by the agent.
+
+```http
+GET /api/threads/{thread_id}/artifacts/{path}
+```
+
+**Path Examples:**
+- `/api/threads/abc123/artifacts/mnt/user-data/outputs/result.txt`
+- `/api/threads/abc123/artifacts/mnt/user-data/uploads/document.pdf`
+
+**Query Parameters:**
+- `download` (boolean): If `true`, force download with Content-Disposition header
+
+**Response:** File content with appropriate Content-Type
+
+---
+
+## Error Responses
+
+All APIs return errors in a consistent format:
+
+```json
+{
+  "detail": "Error message describing what went wrong"
+}
+```
+
+**HTTP Status Codes:**
+- `400` - Bad Request: Invalid input
+- `404` - Not Found: Resource not found
+- `422` - Validation Error: Request validation failed
+- `500` - Internal Server Error: Server-side error
+
+---
+
+## Authentication
+
+Currently, DeerFlow does not implement authentication. All APIs are accessible without credentials.
+
+Note: This is about DeerFlow API authentication. MCP outbound connections can still use OAuth for configured HTTP/SSE MCP servers.
+
+For production deployments, it is recommended to:
+1. Use Nginx for basic auth or OAuth integration
+2. Deploy behind a VPN or private network
+3. Implement custom authentication middleware
+
+---
+
+## Rate Limiting
+
+No rate limiting is implemented by default. For production deployments, configure rate limiting in Nginx:
+
+```nginx
+limit_req_zone $binary_remote_addr zone=api:10m rate=10r/s;
+
+location /api/ {
+    limit_req zone=api burst=20 nodelay;
+    proxy_pass http://backend;
+}
+```
+
+---
+
+## WebSocket Support
+
+The LangGraph server supports WebSocket connections for real-time streaming. Connect to:
+
+```
+ws://localhost:2026/api/langgraph/threads/{thread_id}/runs/stream
+```
+
+---
+
+## SDK Usage
+
+### Python (LangGraph SDK)
+
+```python
+from langgraph_sdk import get_client
+
+client = get_client(url="http://localhost:2026/api/langgraph")
+
+# Create thread
+thread = await client.threads.create()
+
+# Run agent
+async for event in client.runs.stream(
+    thread["thread_id"],
+    "lead_agent",
+    input={"messages": [{"role": "user", "content": "Hello"}]},
+    config={"configurable": {"model_name": "gpt-4"}},
+    stream_mode=["values", "messages"],
+):
+    print(event)
+```
+
+### JavaScript/TypeScript
+
+```typescript
+// Using fetch for Gateway API
+const response = await fetch('/api/models');
+const data = await response.json();
+console.log(data.models);
+
+// Using EventSource for streaming
+const eventSource = new EventSource(
+  `/api/langgraph/threads/${threadId}/runs/stream`
+);
+eventSource.onmessage = (event) => {
+  console.log(JSON.parse(event.data));
+};
+```
+
+### cURL Examples
+
+```bash
+# List models
+curl http://localhost:2026/api/models
+
+# Get MCP config
+curl http://localhost:2026/api/mcp/config
+
+# Upload file
+curl -X POST http://localhost:2026/api/threads/abc123/uploads \
+  -F "files=@document.pdf"
+
+# Enable skill
+curl -X POST http://localhost:2026/api/skills/pdf-processing/enable
+
+# Create thread and run agent
+curl -X POST http://localhost:2026/api/langgraph/threads \
+  -H "Content-Type: application/json" \
+  -d '{}'
+
+curl -X POST http://localhost:2026/api/langgraph/threads/abc123/runs \
+  -H "Content-Type: application/json" \
+  -d '{
+    "input": {"messages": [{"role": "user", "content": "Hello"}]},
+    "config": {"configurable": {"model_name": "gpt-4"}}
+  }'
+```
@@ -0,0 +1,238 @@
+# Apple Container Support
+
+DeerFlow now supports Apple Container as the preferred container runtime on macOS, with automatic fallback to Docker.
+
+## Overview
+
+Starting with this version, DeerFlow automatically detects and uses Apple Container on macOS when available, falling back to Docker when:
+- Apple Container is not installed
+- Running on non-macOS platforms
+
+This provides better performance on Apple Silicon Macs while maintaining compatibility across all platforms.
+
+## Benefits
+
+### On Apple Silicon Macs with Apple Container:
+- **Better Performance**: Native ARM64 execution without Rosetta 2 translation
+- **Lower Resource Usage**: Lighter weight than Docker Desktop
+- **Native Integration**: Uses macOS Virtualization.framework
+
+### Fallback to Docker:
+- Full backward compatibility
+- Works on all platforms (macOS, Linux, Windows)
+- No configuration changes needed
+
+## Requirements
+
+### For Apple Container (macOS only):
+- macOS 15.0 or later
+- Apple Silicon (M1/M2/M3/M4)
+- Apple Container CLI installed
+
+### Installation:
+```bash
+# Download from GitHub releases
+# https://github.com/apple/container/releases
+
+# Verify installation
+container --version
+
+# Start the service
+container system start
+```
+
+### For Docker (all platforms):
+- Docker Desktop or Docker Engine
+
+## How It Works
+
+### Automatic Detection
+
+The `AioSandboxProvider` automatically detects the available container runtime:
+
+1. On macOS: Try `container --version`
+   - Success → Use Apple Container
+   - Failure → Fall back to Docker
+
+2. On other platforms: Use Docker directly
+
+### Runtime Differences
+
+Both runtimes use nearly identical command syntax:
+
+**Container Startup:**
+```bash
+# Apple Container
+container run --rm -d -p 8080:8080 -v /host:/container -e KEY=value image
+
+# Docker
+docker run --rm -d -p 8080:8080 -v /host:/container -e KEY=value image
+```
+
+**Container Cleanup:**
+```bash
+# Apple Container (with --rm flag)
+container stop <id>  # Auto-removes due to --rm
+
+# Docker (with --rm flag)
+docker stop <id>     # Auto-removes due to --rm
+```
+
+### Implementation Details
+
+The implementation is in `backend/src/community/aio_sandbox/aio_sandbox_provider.py`:
+
+- `_detect_container_runtime()`: Detects available runtime at startup
+- `_start_container()`: Uses detected runtime, skips Docker-specific options for Apple Container
+- `_stop_container()`: Uses appropriate stop command for the runtime
+
+## Configuration
+
+No configuration changes are needed! The system works automatically.
+
+However, you can verify the runtime in use by checking the logs:
+
+```
+INFO:src.community.aio_sandbox.aio_sandbox_provider:Detected Apple Container: container version 0.1.0
+INFO:src.community.aio_sandbox.aio_sandbox_provider:Starting sandbox container using container: ...
+```
+
+Or for Docker:
+```
+INFO:src.community.aio_sandbox.aio_sandbox_provider:Apple Container not available, falling back to Docker
+INFO:src.community.aio_sandbox.aio_sandbox_provider:Starting sandbox container using docker: ...
+```
+
+## Container Images
+
+Both runtimes use OCI-compatible images. The default image works with both:
+
+```yaml
+sandbox:
+  use: src.community.aio_sandbox:AioSandboxProvider
+  image: enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest  # Default image
+```
+
+Make sure your images are available for the appropriate architecture:
+- ARM64 for Apple Container on Apple Silicon
+- AMD64 for Docker on Intel Macs
+- Multi-arch images work on both
+
+### Pre-pulling Images (Recommended)
+
+**Important**: Container images are typically large (500MB+) and are pulled on first use, which can cause a long wait time without clear feedback.
+
+**Best Practice**: Pre-pull the image during setup:
+
+```bash
+# From project root
+make setup-sandbox
+```
+
+This command will:
+1. Read the configured image from `config.yaml` (or use default)
+2. Detect available runtime (Apple Container or Docker)
+3. Pull the image with progress indication
+4. Verify the image is ready for use
+
+**Manual pre-pull**:
+
+```bash
+# Using Apple Container
+container pull enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest
+
+# Using Docker
+docker pull enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest
+```
+
+If you skip pre-pulling, the image will be automatically pulled on first agent execution, which may take several minutes depending on your network speed.
+
+## Cleanup Scripts
+
+The project includes a unified cleanup script that handles both runtimes:
+
+**Script:** `scripts/cleanup-containers.sh`
+
+**Usage:**
+```bash
+# Clean up all DeerFlow sandbox containers
+./scripts/cleanup-containers.sh deer-flow-sandbox
+
+# Custom prefix
+./scripts/cleanup-containers.sh my-prefix
+```
+
+**Makefile Integration:**
+
+All cleanup commands in `Makefile` automatically handle both runtimes:
+```bash
+make stop   # Stops all services and cleans up containers
+make clean  # Full cleanup including logs
+```
+
+## Testing
+
+Test the container runtime detection:
+
+```bash
+cd backend
+python test_container_runtime.py
+```
+
+This will:
+1. Detect the available runtime
+2. Optionally start a test container
+3. Verify connectivity
+4. Clean up
+
+## Troubleshooting
+
+### Apple Container not detected on macOS
+
+1. Check if installed:
+   ```bash
+   which container
+   container --version
+   ```
+
+2. Check if service is running:
+   ```bash
+   container system start
+   ```
+
+3. Check logs for detection:
+   ```bash
+   # Look for detection message in application logs
+   grep "container runtime" logs/*.log
+   ```
+
+### Containers not cleaning up
+
+1. Manually check running containers:
+   ```bash
+   # Apple Container
+   container list
+
+   # Docker
+   docker ps
+   ```
+
+2. Run cleanup script manually:
+   ```bash
+   ./scripts/cleanup-containers.sh deer-flow-sandbox
+   ```
+
+### Performance issues
+
+- Apple Container should be faster on Apple Silicon
+- If experiencing issues, you can force Docker by temporarily renaming the `container` command:
+   ```bash
+   # Temporary workaround - not recommended for permanent use
+   sudo mv /opt/homebrew/bin/container /opt/homebrew/bin/container.bak
+   ```
+
+## References
+
+- [Apple Container GitHub](https://github.com/apple/container)
+- [Apple Container Documentation](https://github.com/apple/container/blob/main/docs/)
+- [OCI Image Spec](https://github.com/opencontainers/image-spec)
@@ -0,0 +1,464 @@
+# Architecture Overview
+
+This document provides a comprehensive overview of the DeerFlow backend architecture.
+
+## System Architecture
+
+```
+┌──────────────────────────────────────────────────────────────────────────┐
+│                              Client (Browser)                             │
+└─────────────────────────────────┬────────────────────────────────────────┘
+                                  │
+                                  ▼
+┌──────────────────────────────────────────────────────────────────────────┐
+│                          Nginx (Port 2026)                               │
+│                    Unified Reverse Proxy Entry Point                      │
+│  ┌────────────────────────────────────────────────────────────────────┐  │
+│  │  /api/langgraph/*  →  LangGraph Server (2024)                      │  │
+│  │  /api/*            →  Gateway API (8001)                           │  │
+│  │  /*                →  Frontend (3000)                               │  │
+│  └────────────────────────────────────────────────────────────────────┘  │
+└─────────────────────────────────┬────────────────────────────────────────┘
+                                  │
+          ┌───────────────────────┼───────────────────────┐
+          │                       │                       │
+          ▼                       ▼                       ▼
+┌─────────────────────┐ ┌─────────────────────┐ ┌─────────────────────┐
+│   LangGraph Server  │ │    Gateway API      │ │     Frontend        │
+│     (Port 2024)     │ │    (Port 8001)      │ │    (Port 3000)      │
+│                     │ │                     │ │                     │
+│  - Agent Runtime    │ │  - Models API       │ │  - Next.js App      │
+│  - Thread Mgmt      │ │  - MCP Config       │ │  - React UI         │
+│  - SSE Streaming    │ │  - Skills Mgmt      │ │  - Chat Interface   │
+│  - Checkpointing    │ │  - File Uploads     │ │                     │
+│                     │ │  - Artifacts        │ │                     │
+└─────────────────────┘ └─────────────────────┘ └─────────────────────┘
+          │                       │
+          │     ┌─────────────────┘
+          │     │
+          ▼     ▼
+┌──────────────────────────────────────────────────────────────────────────┐
+│                         Shared Configuration                              │
+│  ┌─────────────────────────┐  ┌────────────────────────────────────────┐ │
+│  │      config.yaml        │  │      extensions_config.json            │ │
+│  │  - Models               │  │  - MCP Servers                         │ │
+│  │  - Tools                │  │  - Skills State                        │ │
+│  │  - Sandbox              │  │                                        │ │
+│  │  - Summarization        │  │                                        │ │
+│  └─────────────────────────┘  └────────────────────────────────────────┘ │
+└──────────────────────────────────────────────────────────────────────────┘
+```
+
+## Component Details
+
+### LangGraph Server
+
+The LangGraph server is the core agent runtime, built on LangGraph for robust multi-agent workflow orchestration.
+
+**Entry Point**: `src/agents/lead_agent/agent.py:make_lead_agent`
+
+**Key Responsibilities**:
+- Agent creation and configuration
+- Thread state management
+- Middleware chain execution
+- Tool execution orchestration
+- SSE streaming for real-time responses
+
+**Configuration**: `langgraph.json`
+
+```json
+{
+  "agent": {
+    "type": "agent",
+    "path": "src.agents:make_lead_agent"
+  }
+}
+```
+
+### Gateway API
+
+FastAPI application providing REST endpoints for non-agent operations.
+
+**Entry Point**: `src/gateway/app.py`
+
+**Routers**:
+- `models.py` - `/api/models` - Model listing and details
+- `mcp.py` - `/api/mcp` - MCP server configuration
+- `skills.py` - `/api/skills` - Skills management
+- `uploads.py` - `/api/threads/{id}/uploads` - File upload
+- `artifacts.py` - `/api/threads/{id}/artifacts` - Artifact serving
+
+### Agent Architecture
+
+```
+┌─────────────────────────────────────────────────────────────────────────┐
+│                           make_lead_agent(config)                        │
+└────────────────────────────────────┬────────────────────────────────────┘
+                                     │
+                                     ▼
+┌─────────────────────────────────────────────────────────────────────────┐
+│                            Middleware Chain                              │
+│  ┌──────────────────────────────────────────────────────────────────┐   │
+│  │ 1. ThreadDataMiddleware  - Initialize workspace/uploads/outputs  │   │
+│  │ 2. UploadsMiddleware     - Process uploaded files               │   │
+│  │ 3. SandboxMiddleware     - Acquire sandbox environment          │   │
+│  │ 4. SummarizationMiddleware - Context reduction (if enabled)     │   │
+│  │ 5. TitleMiddleware       - Auto-generate titles                 │   │
+│  │ 6. TodoListMiddleware    - Task tracking (if plan_mode)         │   │
+│  │ 7. ViewImageMiddleware   - Vision model support                 │   │
+│  │ 8. ClarificationMiddleware - Handle clarifications              │   │
+│  └──────────────────────────────────────────────────────────────────┘   │
+└────────────────────────────────────┬────────────────────────────────────┘
+                                     │
+                                     ▼
+┌─────────────────────────────────────────────────────────────────────────┐
+│                              Agent Core                                  │
+│  ┌──────────────────┐  ┌──────────────────┐  ┌──────────────────────┐   │
+│  │      Model       │  │      Tools       │  │    System Prompt     │   │
+│  │  (from factory)  │  │  (configured +   │  │  (with skills)       │   │
+│  │                  │  │   MCP + builtin) │  │                      │   │
+│  └──────────────────┘  └──────────────────┘  └──────────────────────┘   │
+└─────────────────────────────────────────────────────────────────────────┘
+```
+
+### Thread State
+
+The `ThreadState` extends LangGraph's `AgentState` with additional fields:
+
+```python
+class ThreadState(AgentState):
+    # Core state from AgentState
+    messages: list[BaseMessage]
+
+    # DeerFlow extensions
+    sandbox: dict             # Sandbox environment info
+    artifacts: list[str]      # Generated file paths
+    thread_data: dict         # {workspace, uploads, outputs} paths
+    title: str | None         # Auto-generated conversation title
+    todos: list[dict]         # Task tracking (plan mode)
+    viewed_images: dict       # Vision model image data
+```
+
+### Sandbox System
+
+```
+┌─────────────────────────────────────────────────────────────────────────┐
+│                           Sandbox Architecture                           │
+└─────────────────────────────────────────────────────────────────────────┘
+
+                      ┌─────────────────────────┐
+                      │    SandboxProvider      │ (Abstract)
+                      │  - acquire()            │
+                      │  - get()                │
+                      │  - release()            │
+                      └────────────┬────────────┘
+                                   │
+              ┌────────────────────┼────────────────────┐
+              │                                         │
+              ▼                                         ▼
+┌─────────────────────────┐              ┌─────────────────────────┐
+│  LocalSandboxProvider   │              │  AioSandboxProvider     │
+│  (src/sandbox/local.py) │              │  (src/community/)       │
+│                         │              │                         │
+│  - Singleton instance   │              │  - Docker-based         │
+│  - Direct execution     │              │  - Isolated containers  │
+│  - Development use      │              │  - Production use       │
+└─────────────────────────┘              └─────────────────────────┘
+
+                      ┌─────────────────────────┐
+                      │        Sandbox          │ (Abstract)
+                      │  - execute_command()    │
+                      │  - read_file()          │
+                      │  - write_file()         │
+                      │  - list_dir()           │
+                      └─────────────────────────┘
+```
+
+**Virtual Path Mapping**:
+
+| Virtual Path | Physical Path |
+|-------------|---------------|
+| `/mnt/user-data/workspace` | `backend/.deer-flow/threads/{thread_id}/user-data/workspace` |
+| `/mnt/user-data/uploads` | `backend/.deer-flow/threads/{thread_id}/user-data/uploads` |
+| `/mnt/user-data/outputs` | `backend/.deer-flow/threads/{thread_id}/user-data/outputs` |
+| `/mnt/skills` | `deer-flow/skills/` |
+
+### Tool System
+
+```
+┌─────────────────────────────────────────────────────────────────────────┐
+│                            Tool Sources                                  │
+└─────────────────────────────────────────────────────────────────────────┘
+
+┌─────────────────────┐  ┌─────────────────────┐  ┌─────────────────────┐
+│   Built-in Tools    │  │  Configured Tools   │  │     MCP Tools       │
+│  (src/tools/)       │  │  (config.yaml)      │  │  (extensions.json)  │
+├─────────────────────┤  ├─────────────────────┤  ├─────────────────────┤
+│ - present_file      │  │ - web_search        │  │ - github            │
+│ - ask_clarification │  │ - web_fetch         │  │ - filesystem        │
+│ - view_image        │  │ - bash              │  │ - postgres          │
+│                     │  │ - read_file         │  │ - brave-search      │
+│                     │  │ - write_file        │  │ - puppeteer         │
+│                     │  │ - str_replace       │  │ - ...               │
+│                     │  │ - ls                │  │                     │
+└─────────────────────┘  └─────────────────────┘  └─────────────────────┘
+           │                       │                       │
+           └───────────────────────┴───────────────────────┘
+                                   │
+                                   ▼
+                      ┌─────────────────────────┐
+                      │   get_available_tools() │
+                      │   (src/tools/__init__)  │
+                      └─────────────────────────┘
+```
+
+### Model Factory
+
+```
+┌─────────────────────────────────────────────────────────────────────────┐
+│                          Model Factory                                   │
+│                     (src/models/factory.py)                              │
+└─────────────────────────────────────────────────────────────────────────┘
+
+config.yaml:
+┌─────────────────────────────────────────────────────────────────────────┐
+│ models:                                                                  │
+│   - name: gpt-4                                                         │
+│     display_name: GPT-4                                                 │
+│     use: langchain_openai:ChatOpenAI                                    │
+│     model: gpt-4                                                        │
+│     api_key: $OPENAI_API_KEY                                            │
+│     max_tokens: 4096                                                    │
+│     supports_thinking: false                                            │
+│     supports_vision: true                                               │
+└─────────────────────────────────────────────────────────────────────────┘
+                                   │
+                                   ▼
+                      ┌─────────────────────────┐
+                      │   create_chat_model()   │
+                      │  - name: str            │
+                      │  - thinking_enabled     │
+                      └────────────┬────────────┘
+                                   │
+                                   ▼
+                      ┌─────────────────────────┐
+                      │   resolve_class()       │
+                      │  (reflection system)    │
+                      └────────────┬────────────┘
+                                   │
+                                   ▼
+                      ┌─────────────────────────┐
+                      │   BaseChatModel         │
+                      │  (LangChain instance)   │
+                      └─────────────────────────┘
+```
+
+**Supported Providers**:
+- OpenAI (`langchain_openai:ChatOpenAI`)
+- Anthropic (`langchain_anthropic:ChatAnthropic`)
+- DeepSeek (`langchain_deepseek:ChatDeepSeek`)
+- Custom via LangChain integrations
+
+### MCP Integration
+
+```
+┌─────────────────────────────────────────────────────────────────────────┐
+│                          MCP Integration                                 │
+│                        (src/mcp/manager.py)                              │
+└─────────────────────────────────────────────────────────────────────────┘
+
+extensions_config.json:
+┌─────────────────────────────────────────────────────────────────────────┐
+│ {                                                                        │
+│   "mcpServers": {                                                       │
+│     "github": {                                                         │
+│       "enabled": true,                                                  │
+│       "type": "stdio",                                                  │
+│       "command": "npx",                                                 │
+│       "args": ["-y", "@modelcontextprotocol/server-github"],           │
+│       "env": {"GITHUB_TOKEN": "$GITHUB_TOKEN"}                          │
+│     }                                                                   │
+│   }                                                                     │
+│ }                                                                       │
+└─────────────────────────────────────────────────────────────────────────┘
+                                   │
+                                   ▼
+                      ┌─────────────────────────┐
+                      │  MultiServerMCPClient   │
+                      │  (langchain-mcp-adapters)│
+                      └────────────┬────────────┘
+                                   │
+              ┌────────────────────┼────────────────────┐
+              │                    │                    │
+              ▼                    ▼                    ▼
+       ┌───────────┐        ┌───────────┐        ┌───────────┐
+       │  stdio    │        │   SSE     │        │   HTTP    │
+       │ transport │        │ transport │        │ transport │
+       └───────────┘        └───────────┘        └───────────┘
+```
+
+### Skills System
+
+```
+┌─────────────────────────────────────────────────────────────────────────┐
+│                          Skills System                                   │
+│                       (src/skills/loader.py)                             │
+└─────────────────────────────────────────────────────────────────────────┘
+
+Directory Structure:
+┌─────────────────────────────────────────────────────────────────────────┐
+│ skills/                                                                  │
+│ ├── public/                        # Public skills (committed)           │
+│ │   ├── pdf-processing/                                                 │
+│ │   │   └── SKILL.md                                                    │
+│ │   ├── frontend-design/                                                │
+│ │   │   └── SKILL.md                                                    │
+│ │   └── ...                                                             │
+│ └── custom/                        # Custom skills (gitignored)          │
+│     └── user-installed/                                                 │
+│         └── SKILL.md                                                    │
+└─────────────────────────────────────────────────────────────────────────┘
+
+SKILL.md Format:
+┌─────────────────────────────────────────────────────────────────────────┐
+│ ---                                                                      │
+│ name: PDF Processing                                                     │
+│ description: Handle PDF documents efficiently                            │
+│ license: MIT                                                            │
+│ allowed-tools:                                                          │
+│   - read_file                                                           │
+│   - write_file                                                          │
+│   - bash                                                                │
+│ ---                                                                      │
+│                                                                          │
+│ # Skill Instructions                                                     │
+│ Content injected into system prompt...                                   │
+└─────────────────────────────────────────────────────────────────────────┘
+```
+
+### Request Flow
+
+```
+┌─────────────────────────────────────────────────────────────────────────┐
+│                         Request Flow Example                             │
+│                    User sends message to agent                           │
+└─────────────────────────────────────────────────────────────────────────┘
+
+1. Client → Nginx
+   POST /api/langgraph/threads/{thread_id}/runs
+   {"input": {"messages": [{"role": "user", "content": "Hello"}]}}
+
+2. Nginx → LangGraph Server (2024)
+   Proxied to LangGraph server
+
+3. LangGraph Server
+   a. Load/create thread state
+   b. Execute middleware chain:
+      - ThreadDataMiddleware: Set up paths
+      - UploadsMiddleware: Inject file list
+      - SandboxMiddleware: Acquire sandbox
+      - SummarizationMiddleware: Check token limits
+      - TitleMiddleware: Generate title if needed
+      - TodoListMiddleware: Load todos (if plan mode)
+      - ViewImageMiddleware: Process images
+      - ClarificationMiddleware: Check for clarifications
+
+   c. Execute agent:
+      - Model processes messages
+      - May call tools (bash, web_search, etc.)
+      - Tools execute via sandbox
+      - Results added to messages
+
+   d. Stream response via SSE
+
+4. Client receives streaming response
+```
+
+## Data Flow
+
+### File Upload Flow
+
+```
+1. Client uploads file
+   POST /api/threads/{thread_id}/uploads
+   Content-Type: multipart/form-data
+
+2. Gateway receives file
+   - Validates file
+   - Stores in .deer-flow/threads/{thread_id}/user-data/uploads/
+   - If document: converts to Markdown via markitdown
+
+3. Returns response
+   {
+     "files": [{
+       "filename": "doc.pdf",
+       "path": ".deer-flow/.../uploads/doc.pdf",
+       "virtual_path": "/mnt/user-data/uploads/doc.pdf",
+       "artifact_url": "/api/threads/.../artifacts/mnt/.../doc.pdf"
+     }]
+   }
+
+4. Next agent run
+   - UploadsMiddleware lists files
+   - Injects file list into messages
+   - Agent can access via virtual_path
+```
+
+### Configuration Reload
+
+```
+1. Client updates MCP config
+   PUT /api/mcp/config
+
+2. Gateway writes extensions_config.json
+   - Updates mcpServers section
+   - File mtime changes
+
+3. MCP Manager detects change
+   - get_cached_mcp_tools() checks mtime
+   - If changed: reinitializes MCP client
+   - Loads updated server configurations
+
+4. Next agent run uses new tools
+```
+
+## Security Considerations
+
+### Sandbox Isolation
+
+- Agent code executes within sandbox boundaries
+- Local sandbox: Direct execution (development only)
+- Docker sandbox: Container isolation (production recommended)
+- Path traversal prevention in file operations
+
+### API Security
+
+- Thread isolation: Each thread has separate data directories
+- File validation: Uploads checked for path safety
+- Environment variable resolution: Secrets not stored in config
+
+### MCP Security
+
+- Each MCP server runs in its own process
+- Environment variables resolved at runtime
+- Servers can be enabled/disabled independently
+
+## Performance Considerations
+
+### Caching
+
+- MCP tools cached with file mtime invalidation
+- Configuration loaded once, reloaded on file change
+- Skills parsed once at startup, cached in memory
+
+### Streaming
+
+- SSE used for real-time response streaming
+- Reduces time to first token
+- Enables progress visibility for long operations
+
+### Context Management
+
+- Summarization middleware reduces context when limits approached
+- Configurable triggers: tokens, messages, or fraction
+- Preserves recent messages while summarizing older ones
@@ -0,0 +1,256 @@
+# 自动 Thread Title 生成功能
+
+## 功能说明
+
+自动为对话线程生成标题，在用户首次提问并收到回复后自动触发。
+
+## 实现方式
+
+使用 `TitleMiddleware` 在 `after_agent` 钩子中：
+1. 检测是否是首次对话（1个用户消息 + 1个助手回复）
+2. 检查 state 是否已有 title
+3. 调用 LLM 生成简洁的标题（默认最多6个词）
+4. 将 title 存储到 `ThreadState` 中（会被 checkpointer 持久化）
+
+## ⚠️ 重要：存储机制
+
+### Title 存储位置
+
+Title 存储在 **`ThreadState.title`** 中，而非 thread metadata：
+
+```python
+class ThreadState(AgentState):
+    sandbox: SandboxState | None = None
+    title: str | None = None  # ✅ Title stored here
+```
+
+### 持久化说明
+
+| 部署方式 | 持久化 | 说明 |
+|---------|--------|------|
+| **LangGraph Studio (本地)** | ❌ 否 | 仅内存存储，重启后丢失 |
+| **LangGraph Platform** | ✅ 是 | 自动持久化到数据库 |
+| **自定义 + Checkpointer** | ✅ 是 | 需配置 PostgreSQL/SQLite checkpointer |
+
+### 如何启用持久化
+
+如果需要在本地开发时也持久化 title，需要配置 checkpointer：
+
+```python
+# 在 langgraph.json 同级目录创建 checkpointer.py
+from langgraph.checkpoint.postgres import PostgresSaver
+
+checkpointer = PostgresSaver.from_conn_string(
+    "postgresql://user:pass@localhost/dbname"
+)
+```
+
+然后在 `langgraph.json` 中引用：
+
+```json
+{
+  "graphs": {
+    "lead_agent": "src.agents:lead_agent"
+  },
+  "checkpointer": "checkpointer:checkpointer"
+}
+```
+
+## 配置
+
+在 `config.yaml` 中添加（可选）：
+
+```yaml
+title:
+  enabled: true
+  max_words: 6
+  max_chars: 60
+  model_name: null  # 使用默认模型
+```
+
+或在代码中配置：
+
+```python
+from src.config.title_config import TitleConfig, set_title_config
+
+set_title_config(TitleConfig(
+    enabled=True,
+    max_words=8,
+    max_chars=80,
+))
+```
+
+## 客户端使用
+
+### 获取 Thread Title
+
+```typescript
+// 方式1: 从 thread state 获取
+const state = await client.threads.getState(threadId);
+const title = state.values.title || "New Conversation";
+
+// 方式2: 监听 stream 事件
+for await (const chunk of client.runs.stream(threadId, assistantId, {
+  input: { messages: [{ role: "user", content: "Hello" }] }
+})) {
+  if (chunk.event === "values" && chunk.data.title) {
+    console.log("Title:", chunk.data.title);
+  }
+}
+```
+
+### 显示 Title
+
+```typescript
+// 在对话列表中显示
+function ConversationList() {
+  const [threads, setThreads] = useState([]);
+
+  useEffect(() => {
+    async function loadThreads() {
+      const allThreads = await client.threads.list();
+      
+      // 获取每个 thread 的 state 来读取 title
+      const threadsWithTitles = await Promise.all(
+        allThreads.map(async (t) => {
+          const state = await client.threads.getState(t.thread_id);
+          return {
+            id: t.thread_id,
+            title: state.values.title || "New Conversation",
+            updatedAt: t.updated_at,
+          };
+        })
+      );
+      
+      setThreads(threadsWithTitles);
+    }
+    loadThreads();
+  }, []);
+
+  return (
+    <ul>
+      {threads.map(thread => (
+        <li key={thread.id}>
+          <a href={`/chat/${thread.id}`}>{thread.title}</a>
+        </li>
+      ))}
+    </ul>
+  );
+}
+```
+
+## 工作流程
+
+```mermaid
+sequenceDiagram
+    participant User
+    participant Client
+    participant LangGraph
+    participant TitleMiddleware
+    participant LLM
+    participant Checkpointer
+
+    User->>Client: 发送首条消息
+    Client->>LangGraph: POST /threads/{id}/runs
+    LangGraph->>Agent: 处理消息
+    Agent-->>LangGraph: 返回回复
+    LangGraph->>TitleMiddleware: after_agent()
+    TitleMiddleware->>TitleMiddleware: 检查是否需要生成 title
+    TitleMiddleware->>LLM: 生成 title
+    LLM-->>TitleMiddleware: 返回 title
+    TitleMiddleware->>LangGraph: return {"title": "..."}
+    LangGraph->>Checkpointer: 保存 state (含 title)
+    LangGraph-->>Client: 返回响应
+    Client->>Client: 从 state.values.title 读取
+```
+
+## 优势
+
+✅ **可靠持久化** - 使用 LangGraph 的 state 机制，自动持久化  
+✅ **完全后端处理** - 客户端无需额外逻辑  
+✅ **自动触发** - 首次对话后自动生成  
+✅ **可配置** - 支持自定义长度、模型等  
+✅ **容错性强** - 失败时使用 fallback 策略  
+✅ **架构一致** - 与现有 SandboxMiddleware 保持一致  
+
+## 注意事项
+
+1. **读取方式不同**：Title 在 `state.values.title` 而非 `thread.metadata.title`
+2. **性能考虑**：title 生成会增加约 0.5-1 秒延迟，可通过使用更快的模型优化
+3. **并发安全**：middleware 在 agent 执行后运行，不会阻塞主流程
+4. **Fallback 策略**：如果 LLM 调用失败，会使用用户消息的前几个词作为 title
+
+## 测试
+
+```python
+# 测试 title 生成
+import pytest
+from src.agents.title_middleware import TitleMiddleware
+
+def test_title_generation():
+    # TODO: 添加单元测试
+    pass
+```
+
+## 故障排查
+
+### Title 没有生成
+
+1. 检查配置是否启用：`get_title_config().enabled == True`
+2. 检查日志：查找 "Generated thread title" 或错误信息
+3. 确认是首次对话：只有 1 个用户消息和 1 个助手回复时才会触发
+
+### Title 生成但客户端看不到
+
+1. 确认读取位置：应该从 `state.values.title` 读取，而非 `thread.metadata.title`
+2. 检查 API 响应：确认 state 中包含 title 字段
+3. 尝试重新获取 state：`client.threads.getState(threadId)`
+
+### Title 重启后丢失
+
+1. 检查是否配置了 checkpointer（本地开发需要）
+2. 确认部署方式：LangGraph Platform 会自动持久化
+3. 查看数据库：确认 checkpointer 正常工作
+
+## 架构设计
+
+### 为什么使用 State 而非 Metadata？
+
+| 特性 | State | Metadata |
+|------|-------|----------|
+| **持久化** | ✅ 自动（通过 checkpointer） | ⚠️ 取决于实现 |
+| **版本控制** | ✅ 支持时间旅行 | ❌ 不支持 |
+| **类型安全** | ✅ TypedDict 定义 | ❌ 任意字典 |
+| **可追溯** | ✅ 每次更新都记录 | ⚠️ 只有最新值 |
+| **标准化** | ✅ LangGraph 核心机制 | ⚠️ 扩展功能 |
+
+### 实现细节
+
+```python
+# TitleMiddleware 核心逻辑
+@override
+def after_agent(self, state: TitleMiddlewareState, runtime: Runtime) -> dict | None:
+    """Generate and set thread title after the first agent response."""
+    if self._should_generate_title(state, runtime):
+        title = self._generate_title(runtime)
+        print(f"Generated thread title: {title}")
+        
+        # ✅ 返回 state 更新，会被 checkpointer 自动持久化
+        return {"title": title}
+    
+    return None
+```
+
+## 相关文件
+
+- [`src/agents/thread_state.py`](../src/agents/thread_state.py) - ThreadState 定义
+- [`src/agents/title_middleware.py`](../src/agents/title_middleware.py) - TitleMiddleware 实现
+- [`src/config/title_config.py`](../src/config/title_config.py) - 配置管理
+- [`config.yaml`](../config.yaml) - 配置文件
+- [`src/agents/lead_agent/agent.py`](../src/agents/lead_agent/agent.py) - Middleware 注册
+
+## 参考资料
+
+- [LangGraph Checkpointer 文档](https://langchain-ai.github.io/langgraph/concepts/persistence/)
+- [LangGraph State 管理](https://langchain-ai.github.io/langgraph/concepts/low_level/#state)
+- [LangGraph Middleware](https://langchain-ai.github.io/langgraph/concepts/middleware/)
@@ -0,0 +1,238 @@
+# Configuration Guide
+
+This guide explains how to configure DeerFlow for your environment.
+
+## Configuration Sections
+
+### Models
+
+Configure the LLM models available to the agent:
+
+```yaml
+models:
+  - name: gpt-4                    # Internal identifier
+    display_name: GPT-4            # Human-readable name
+    use: langchain_openai:ChatOpenAI  # LangChain class path
+    model: gpt-4                   # Model identifier for API
+    api_key: $OPENAI_API_KEY       # API key (use env var)
+    max_tokens: 4096               # Max tokens per request
+    temperature: 0.7               # Sampling temperature
+```
+
+**Supported Providers**:
+- OpenAI (`langchain_openai:ChatOpenAI`)
+- Anthropic (`langchain_anthropic:ChatAnthropic`)
+- DeepSeek (`langchain_deepseek:ChatDeepSeek`)
+- Any LangChain-compatible provider
+
+For OpenAI-compatible gateways (for example Novita), keep using `langchain_openai:ChatOpenAI` and set `base_url`:
+
+```yaml
+models:
+  - name: novita-deepseek-v3.2
+    display_name: Novita DeepSeek V3.2
+    use: langchain_openai:ChatOpenAI
+    model: deepseek/deepseek-v3.2
+    api_key: $NOVITA_API_KEY
+    base_url: https://api.novita.ai/openai
+    supports_thinking: true
+    when_thinking_enabled:
+      extra_body:
+        thinking:
+          type: enabled
+```
+
+**Thinking Models**:
+Some models support "thinking" mode for complex reasoning:
+
+```yaml
+models:
+  - name: deepseek-v3
+    supports_thinking: true
+    when_thinking_enabled:
+      extra_body:
+        thinking:
+          type: enabled
+```
+
+### Tool Groups
+
+Organize tools into logical groups:
+
+```yaml
+tool_groups:
+  - name: web          # Web browsing and search
+  - name: file:read    # Read-only file operations
+  - name: file:write   # Write file operations
+  - name: bash         # Shell command execution
+```
+
+### Tools
+
+Configure specific tools available to the agent:
+
+```yaml
+tools:
+  - name: web_search
+    group: web
+    use: src.community.tavily.tools:web_search_tool
+    max_results: 5
+    # api_key: $TAVILY_API_KEY  # Optional
+```
+
+**Built-in Tools**:
+- `web_search` - Search the web (Tavily)
+- `web_fetch` - Fetch web pages (Jina AI)
+- `ls` - List directory contents
+- `read_file` - Read file contents
+- `write_file` - Write file contents
+- `str_replace` - String replacement in files
+- `bash` - Execute bash commands
+
+### Sandbox
+
+DeerFlow supports multiple sandbox execution modes. Configure your preferred mode in `config.yaml`:
+
+**Local Execution** (runs sandbox code directly on the host machine):
+```yaml
+sandbox:
+   use: src.sandbox.local:LocalSandboxProvider # Local execution
+```
+
+**Docker Execution** (runs sandbox code in isolated Docker containers):
+```yaml
+sandbox:
+   use: src.community.aio_sandbox:AioSandboxProvider # Docker-based sandbox
+```
+
+**Docker Execution with Kubernetes** (runs sandbox code in Kubernetes pods via provisioner service):
+
+This mode runs each sandbox in an isolated Kubernetes Pod on your **host machine's cluster**. Requires Docker Desktop K8s, OrbStack, or similar local K8s setup.
+
+```yaml
+sandbox:
+   use: src.community.aio_sandbox:AioSandboxProvider
+   provisioner_url: http://provisioner:8002
+```
+
+When using Docker development (`make docker-start`), DeerFlow starts the `provisioner` service only if this provisioner mode is configured. In local or plain Docker sandbox modes, `provisioner` is skipped.
+
+See [Provisioner Setup Guide](docker/provisioner/README.md) for detailed configuration, prerequisites, and troubleshooting.
+
+Choose between local execution or Docker-based isolation:
+
+**Option 1: Local Sandbox** (default, simpler setup):
+```yaml
+sandbox:
+  use: src.sandbox.local:LocalSandboxProvider
+```
+
+**Option 2: Docker Sandbox** (isolated, more secure):
+```yaml
+sandbox:
+  use: src.community.aio_sandbox:AioSandboxProvider
+  port: 8080
+  auto_start: true
+  container_prefix: deer-flow-sandbox
+
+  # Optional: Additional mounts
+  mounts:
+    - host_path: /path/on/host
+      container_path: /path/in/container
+      read_only: false
+```
+
+### Skills
+
+Configure the skills directory for specialized workflows:
+
+```yaml
+skills:
+  # Host path (optional, default: ../skills)
+  path: /custom/path/to/skills
+
+  # Container mount path (default: /mnt/skills)
+  container_path: /mnt/skills
+```
+
+**How Skills Work**:
+- Skills are stored in `deer-flow/skills/{public,custom}/`
+- Each skill has a `SKILL.md` file with metadata
+- Skills are automatically discovered and loaded
+- Available in both local and Docker sandbox via path mapping
+
+### Title Generation
+
+Automatic conversation title generation:
+
+```yaml
+title:
+  enabled: true
+  max_words: 6
+  max_chars: 60
+  model_name: null  # Use first model in list
+```
+
+## Environment Variables
+
+DeerFlow supports environment variable substitution using the `$` prefix:
+
+```yaml
+models:
+  - api_key: $OPENAI_API_KEY  # Reads from environment
+```
+
+**Common Environment Variables**:
+- `OPENAI_API_KEY` - OpenAI API key
+- `ANTHROPIC_API_KEY` - Anthropic API key
+- `DEEPSEEK_API_KEY` - DeepSeek API key
+- `NOVITA_API_KEY` - Novita API key (OpenAI-compatible endpoint)
+- `TAVILY_API_KEY` - Tavily search API key
+- `DEER_FLOW_CONFIG_PATH` - Custom config file path
+
+## Configuration Location
+
+The configuration file should be placed in the **project root directory** (`deer-flow/config.yaml`), not in the backend directory.
+
+## Configuration Priority
+
+DeerFlow searches for configuration in this order:
+
+1. Path specified in code via `config_path` argument
+2. Path from `DEER_FLOW_CONFIG_PATH` environment variable
+3. `config.yaml` in current working directory (typically `backend/` when running)
+4. `config.yaml` in parent directory (project root: `deer-flow/`)
+
+## Best Practices
+
+1. **Place `config.yaml` in project root** - Not in `backend/` directory
+2. **Never commit `config.yaml`** - It's already in `.gitignore`
+3. **Use environment variables for secrets** - Don't hardcode API keys
+4. **Keep `config.example.yaml` updated** - Document all new options
+5. **Test configuration changes locally** - Before deploying
+6. **Use Docker sandbox for production** - Better isolation and security
+
+## Troubleshooting
+
+### "Config file not found"
+- Ensure `config.yaml` exists in the **project root** directory (`deer-flow/config.yaml`)
+- The backend searches parent directory by default, so root location is preferred
+- Alternatively, set `DEER_FLOW_CONFIG_PATH` environment variable to custom location
+
+### "Invalid API key"
+- Verify environment variables are set correctly
+- Check that `$` prefix is used for env var references
+
+### "Skills not loading"
+- Check that `deer-flow/skills/` directory exists
+- Verify skills have valid `SKILL.md` files
+- Check `skills.path` configuration if using custom path
+
+### "Docker sandbox fails to start"
+- Ensure Docker is running
+- Check port 8080 (or configured port) is available
+- Verify Docker image is accessible
+
+## Examples
+
+See `config.example.yaml` for complete examples of all configuration options.
@@ -0,0 +1,293 @@
+# 文件上传功能
+
+## 概述
+
+DeerFlow 后端提供了完整的文件上传功能，支持多文件上传，并自动将 Office 文档和 PDF 转换为 Markdown 格式。
+
+## 功能特性
+
+- ✅ 支持多文件同时上传
+- ✅ 自动转换文档为 Markdown（PDF、PPT、Excel、Word）
+- ✅ 文件存储在线程隔离的目录中
+- ✅ Agent 自动感知已上传的文件
+- ✅ 支持文件列表查询和删除
+
+## API 端点
+
+### 1. 上传文件
+```
+POST /api/threads/{thread_id}/uploads
+```
+
+**请求体：** `multipart/form-data`
+- `files`: 一个或多个文件
+
+**响应：**
+```json
+{
+  "success": true,
+  "files": [
+    {
+      "filename": "document.pdf",
+      "size": 1234567,
+      "path": ".deer-flow/threads/{thread_id}/user-data/uploads/document.pdf",
+      "virtual_path": "/mnt/user-data/uploads/document.pdf",
+      "artifact_url": "/api/threads/{thread_id}/artifacts/mnt/user-data/uploads/document.pdf",
+      "markdown_file": "document.md",
+      "markdown_path": ".deer-flow/threads/{thread_id}/user-data/uploads/document.md",
+      "markdown_virtual_path": "/mnt/user-data/uploads/document.md",
+      "markdown_artifact_url": "/api/threads/{thread_id}/artifacts/mnt/user-data/uploads/document.md"
+    }
+  ],
+  "message": "Successfully uploaded 1 file(s)"
+}
+```
+
+**路径说明：**
+- `path`: 实际文件系统路径（相对于 `backend/` 目录）
+- `virtual_path`: Agent 在沙箱中使用的虚拟路径
+- `artifact_url`: 前端通过 HTTP 访问文件的 URL
+
+### 2. 列出已上传文件
+```
+GET /api/threads/{thread_id}/uploads/list
+```
+
+**响应：**
+```json
+{
+  "files": [
+    {
+      "filename": "document.pdf",
+      "size": 1234567,
+      "path": ".deer-flow/threads/{thread_id}/user-data/uploads/document.pdf",
+      "virtual_path": "/mnt/user-data/uploads/document.pdf",
+      "artifact_url": "/api/threads/{thread_id}/artifacts/mnt/user-data/uploads/document.pdf",
+      "extension": ".pdf",
+      "modified": 1705997600.0
+    }
+  ],
+  "count": 1
+}
+```
+
+### 3. 删除文件
+```
+DELETE /api/threads/{thread_id}/uploads/{filename}
+```
+
+**响应：**
+```json
+{
+  "success": true,
+  "message": "Deleted document.pdf"
+}
+```
+
+## 支持的文档格式
+
+以下格式会自动转换为 Markdown：
+- PDF (`.pdf`)
+- PowerPoint (`.ppt`, `.pptx`)
+- Excel (`.xls`, `.xlsx`)
+- Word (`.doc`, `.docx`)
+
+转换后的 Markdown 文件会保存在同一目录下，文件名为原文件名 + `.md` 扩展名。
+
+## Agent 集成
+
+### 自动文件列举
+
+Agent 在每次请求时会自动收到已上传文件的列表，格式如下：
+
+```xml
+<uploaded_files>
+The following files have been uploaded and are available for use:
+
+- document.pdf (1.2 MB)
+  Path: /mnt/user-data/uploads/document.pdf
+
+- document.md (45.3 KB)
+  Path: /mnt/user-data/uploads/document.md
+
+You can read these files using the `read_file` tool with the paths shown above.
+</uploaded_files>
+```
+
+### 使用上传的文件
+
+Agent 在沙箱中运行，使用虚拟路径访问文件。Agent 可以直接使用 `read_file` 工具读取上传的文件：
+
+```python
+# 读取原始 PDF（如果支持）
+read_file(path="/mnt/user-data/uploads/document.pdf")
+
+# 读取转换后的 Markdown（推荐）
+read_file(path="/mnt/user-data/uploads/document.md")
+```
+
+**路径映射关系：**
+- Agent 使用：`/mnt/user-data/uploads/document.pdf`（虚拟路径）
+- 实际存储：`backend/.deer-flow/threads/{thread_id}/user-data/uploads/document.pdf`
+- 前端访问：`/api/threads/{thread_id}/artifacts/mnt/user-data/uploads/document.pdf`（HTTP URL）
+
+上传流程采用“线程目录优先”策略：
+- 先写入 `backend/.deer-flow/threads/{thread_id}/user-data/uploads/` 作为权威存储
+- 本地沙箱（`sandbox_id=local`）直接使用线程目录内容
+- 非本地沙箱会额外同步到 `/mnt/user-data/uploads/*`，确保运行时可见
+
+## 测试示例
+
+### 使用 curl 测试
+
+```bash
+# 1. 上传单个文件
+curl -X POST http://localhost:2026/api/threads/test-thread/uploads \
+  -F "files=@/path/to/document.pdf"
+
+# 2. 上传多个文件
+curl -X POST http://localhost:2026/api/threads/test-thread/uploads \
+  -F "files=@/path/to/document.pdf" \
+  -F "files=@/path/to/presentation.pptx" \
+  -F "files=@/path/to/spreadsheet.xlsx"
+
+# 3. 列出已上传文件
+curl http://localhost:2026/api/threads/test-thread/uploads/list
+
+# 4. 删除文件
+curl -X DELETE http://localhost:2026/api/threads/test-thread/uploads/document.pdf
+```
+
+### 使用 Python 测试
+
+```python
+import requests
+
+thread_id = "test-thread"
+base_url = "http://localhost:2026"
+
+# 上传文件
+files = [
+    ("files", open("document.pdf", "rb")),
+    ("files", open("presentation.pptx", "rb")),
+]
+response = requests.post(
+    f"{base_url}/api/threads/{thread_id}/uploads",
+    files=files
+)
+print(response.json())
+
+# 列出文件
+response = requests.get(f"{base_url}/api/threads/{thread_id}/uploads/list")
+print(response.json())
+
+# 删除文件
+response = requests.delete(
+    f"{base_url}/api/threads/{thread_id}/uploads/document.pdf"
+)
+print(response.json())
+```
+
+## 文件存储结构
+
+```
+backend/.deer-flow/threads/
+└── {thread_id}/
+    └── user-data/
+        └── uploads/
+            ├── document.pdf          # 原始文件
+            ├── document.md           # 转换后的 Markdown
+            ├── presentation.pptx
+            ├── presentation.md
+            └── ...
+```
+
+## 限制
+
+- 最大文件大小：100MB（可在 nginx.conf 中配置 `client_max_body_size`）
+- 文件名安全性：系统会自动验证文件路径，防止目录遍历攻击
+- 线程隔离：每个线程的上传文件相互隔离，无法跨线程访问
+
+## 技术实现
+
+### 组件
+
+1. **Upload Router** (`src/gateway/routers/uploads.py`)
+   - 处理文件上传、列表、删除请求
+   - 使用 markitdown 转换文档
+
+2. **Uploads Middleware** (`src/agents/middlewares/uploads_middleware.py`)
+   - 在每次 Agent 请求前注入文件列表
+   - 自动生成格式化的文件列表消息
+
+3. **Nginx 配置** (`nginx.conf`)
+   - 路由上传请求到 Gateway API
+   - 配置大文件上传支持
+
+### 依赖
+
+- `markitdown>=0.0.1a2` - 文档转换
+- `python-multipart>=0.0.20` - 文件上传处理
+
+## 故障排查
+
+### 文件上传失败
+
+1. 检查文件大小是否超过限制
+2. 检查 Gateway API 是否正常运行
+3. 检查磁盘空间是否充足
+4. 查看 Gateway 日志：`make gateway`
+
+### 文档转换失败
+
+1. 检查 markitdown 是否正确安装：`uv run python -c "import markitdown"`
+2. 查看日志中的具体错误信息
+3. 某些损坏或加密的文档可能无法转换，但原文件仍会保存
+
+### Agent 看不到上传的文件
+
+1. 确认 UploadsMiddleware 已在 agent.py 中注册
+2. 检查 thread_id 是否正确
+3. 确认文件确实已上传到 `backend/.deer-flow/threads/{thread_id}/user-data/uploads/`
+4. 非本地沙箱场景下，确认上传接口没有报错（需要成功完成 sandbox 同步）
+
+## 开发建议
+
+### 前端集成
+
+```typescript
+// 上传文件示例
+async function uploadFiles(threadId: string, files: File[]) {
+  const formData = new FormData();
+  files.forEach(file => {
+    formData.append('files', file);
+  });
+
+  const response = await fetch(
+    `/api/threads/${threadId}/uploads`,
+    {
+      method: 'POST',
+      body: formData,
+    }
+  );
+
+  return response.json();
+}
+
+// 列出文件
+async function listFiles(threadId: string) {
+  const response = await fetch(
+    `/api/threads/${threadId}/uploads/list`
+  );
+  return response.json();
+}
+```
+
+### 扩展功能建议
+
+1. **文件预览**：添加预览端点，支持在浏览器中直接查看文件
+2. **批量删除**：支持一次删除多个文件
+3. **文件搜索**：支持按文件名或类型搜索
+4. **版本控制**：保留文件的多个版本
+5. **压缩包支持**：自动解压 zip 文件
+6. **图片 OCR**：对上传的图片进行 OCR 识别
@@ -0,0 +1,65 @@
+# MCP (Model Context Protocol) Configuration
+
+DeerFlow supports configurable MCP servers and skills to extend its capabilities, which are loaded from a dedicated `extensions_config.json` file in the project root directory.
+
+## Setup
+
+1. Copy `extensions_config.example.json` to `extensions_config.json` in the project root directory.
+   ```bash
+   # Copy example configuration
+   cp extensions_config.example.json extensions_config.json
+   ```
+   
+2. Enable the desired MCP servers or skills by setting `"enabled": true`.
+3. Configure each server’s command, arguments, and environment variables as needed.
+4. Restart the application to load and register MCP tools.
+
+## OAuth Support (HTTP/SSE MCP Servers)
+
+For `http` and `sse` MCP servers, DeerFlow supports OAuth token acquisition and automatic token refresh.
+
+- Supported grants: `client_credentials`, `refresh_token`
+- Configure per-server `oauth` block in `extensions_config.json`
+- Secrets should be provided via environment variables (for example: `$MCP_OAUTH_CLIENT_SECRET`)
+
+Example:
+
+```json
+{
+   "mcpServers": {
+      "secure-http-server": {
+         "enabled": true,
+         "type": "http",
+         "url": "https://api.example.com/mcp",
+         "oauth": {
+            "enabled": true,
+            "token_url": "https://auth.example.com/oauth/token",
+            "grant_type": "client_credentials",
+            "client_id": "$MCP_OAUTH_CLIENT_ID",
+            "client_secret": "$MCP_OAUTH_CLIENT_SECRET",
+            "scope": "mcp.read",
+            "refresh_skew_seconds": 60
+         }
+      }
+   }
+}
+```
+
+## How It Works
+
+MCP servers expose tools that are automatically discovered and integrated into DeerFlow’s agent system at runtime. Once enabled, these tools become available to agents without additional code changes.
+
+## Example Capabilities
+
+MCP servers can provide access to:
+
+- **File systems**
+- **Databases** (e.g., PostgreSQL)
+- **External APIs** (e.g., GitHub, Brave Search)
+- **Browser automation** (e.g., Puppeteer)
+- **Custom MCP server implementations**
+
+## Learn More
+
+For detailed documentation about the Model Context Protocol, visit:  
+https://modelcontextprotocol.io
@@ -0,0 +1,281 @@
+# Memory System Improvements
+
+This document describes recent improvements to the memory system's fact injection mechanism.
+
+## Overview
+
+Two major improvements have been made to the `format_memory_for_injection` function:
+
+1. **Similarity-Based Fact Retrieval**: Uses TF-IDF to select facts most relevant to current conversation context
+2. **Accurate Token Counting**: Uses tiktoken for precise token estimation instead of rough character-based approximation
+
+## 1. Similarity-Based Fact Retrieval
+
+### Problem
+The original implementation selected facts based solely on confidence scores, taking the top 15 highest-confidence facts regardless of their relevance to the current conversation. This could result in injecting irrelevant facts while omitting contextually important ones.
+
+### Solution
+The new implementation uses **TF-IDF (Term Frequency-Inverse Document Frequency)** vectorization with cosine similarity to measure how relevant each fact is to the current conversation context.
+
+**Scoring Formula**:
+```
+final_score = (similarity × 0.6) + (confidence × 0.4)
+```
+
+- **Similarity (60% weight)**: Cosine similarity between fact content and current context
+- **Confidence (40% weight)**: LLM-assigned confidence score (0-1)
+
+### Benefits
+- **Context-Aware**: Prioritizes facts relevant to what the user is currently discussing
+- **Dynamic**: Different facts surface based on conversation topic
+- **Balanced**: Considers both relevance and reliability
+- **Fallback**: Gracefully degrades to confidence-only ranking if context is unavailable
+
+### Example
+Given facts about Python, React, and Docker:
+- User asks: *"How should I write Python tests?"*
+  - Prioritizes: Python testing, type hints, pytest
+- User asks: *"How to optimize my Next.js app?"*
+  - Prioritizes: React/Next.js experience, performance optimization
+
+### Configuration
+Customize weights in `config.yaml` (optional):
+```yaml
+memory:
+  similarity_weight: 0.6  # Weight for TF-IDF similarity (0-1)
+  confidence_weight: 0.4  # Weight for confidence score (0-1)
+```
+
+**Note**: Weights should sum to 1.0 for best results.
+
+## 2. Accurate Token Counting
+
+### Problem
+The original implementation estimated tokens using a simple formula:
+```python
+max_chars = max_tokens * 4
+```
+
+This assumes ~4 characters per token, which is:
+- Inaccurate for many languages and content types
+- Can lead to over-injection (exceeding token limits)
+- Can lead to under-injection (wasting available budget)
+
+### Solution
+The new implementation uses **tiktoken**, OpenAI's official tokenizer library, to count tokens accurately:
+
+```python
+import tiktoken
+
+def _count_tokens(text: str, encoding_name: str = "cl100k_base") -> int:
+    encoding = tiktoken.get_encoding(encoding_name)
+    return len(encoding.encode(text))
+```
+
+- Uses `cl100k_base` encoding (GPT-4, GPT-3.5, text-embedding-ada-002)
+- Provides exact token counts for budget management
+- Falls back to character-based estimation if tiktoken fails
+
+### Benefits
+- **Precision**: Exact token counts match what the model sees
+- **Budget Optimization**: Maximizes use of available token budget
+- **No Overflows**: Prevents exceeding `max_injection_tokens` limit
+- **Better Planning**: Each section's token cost is known precisely
+
+### Example
+```python
+text = "This is a test string to count tokens accurately using tiktoken."
+
+# Old method
+char_count = len(text)  # 64 characters
+old_estimate = char_count // 4  # 16 tokens (overestimate)
+
+# New method
+accurate_count = _count_tokens(text)  # 13 tokens (exact)
+```
+
+**Result**: 3-token difference (18.75% error rate)
+
+In production, errors can be much larger for:
+- Code snippets (more tokens per character)
+- Non-English text (variable token ratios)
+- Technical jargon (often multi-token words)
+
+## Implementation Details
+
+### Function Signature
+```python
+def format_memory_for_injection(
+    memory_data: dict[str, Any],
+    max_tokens: int = 2000,
+    current_context: str | None = None,
+) -> str:
+```
+
+**New Parameter**:
+- `current_context`: Optional string containing recent conversation messages for similarity calculation
+
+### Backward Compatibility
+The function remains **100% backward compatible**:
+- If `current_context` is `None` or empty, falls back to confidence-only ranking
+- Existing callers without the parameter work exactly as before
+- Token counting is always accurate (transparent improvement)
+
+### Integration Point
+Memory is **dynamically injected** via `MemoryMiddleware.before_model()`:
+
+```python
+# src/agents/middlewares/memory_middleware.py
+
+def _extract_conversation_context(messages: list, max_turns: int = 3) -> str:
+    """Extract recent conversation (user input + final responses only)."""
+    context_parts = []
+    turn_count = 0
+
+    for msg in reversed(messages):
+        if msg.type == "human":
+            # Always include user messages
+            context_parts.append(extract_text(msg))
+            turn_count += 1
+            if turn_count >= max_turns:
+                break
+
+        elif msg.type == "ai" and not msg.tool_calls:
+            # Only include final AI responses (no tool_calls)
+            context_parts.append(extract_text(msg))
+
+        # Skip tool messages and AI messages with tool_calls
+
+    return " ".join(reversed(context_parts))
+
+
+class MemoryMiddleware:
+    def before_model(self, state, runtime):
+        """Inject memory before EACH LLM call (not just before_agent)."""
+
+        # Get recent conversation context (filtered)
+        conversation_context = _extract_conversation_context(
+            state["messages"],
+            max_turns=3
+        )
+
+        # Load memory with context-aware fact selection
+        memory_data = get_memory_data()
+        memory_content = format_memory_for_injection(
+            memory_data,
+            max_tokens=config.max_injection_tokens,
+            current_context=conversation_context,  # ✅ Clean conversation only
+        )
+
+        # Inject as system message
+        memory_message = SystemMessage(
+            content=f"<memory>\n{memory_content}\n</memory>",
+            name="memory_context",
+        )
+
+        return {"messages": [memory_message] + state["messages"]}
+```
+
+### How It Works
+
+1. **User continues conversation**:
+   ```
+   Turn 1: "I'm working on a Python project"
+   Turn 2: "It uses FastAPI and SQLAlchemy"
+   Turn 3: "How do I write tests?"  ← Current query
+   ```
+
+2. **Extract recent context**: Last 3 turns combined:
+   ```
+   "I'm working on a Python project. It uses FastAPI and SQLAlchemy. How do I write tests?"
+   ```
+
+3. **TF-IDF scoring**: Ranks facts by relevance to this context
+   - High score: "Prefers pytest for testing" (testing + Python)
+   - High score: "Likes type hints in Python" (Python related)
+   - High score: "Expert in Python and FastAPI" (Python + FastAPI)
+   - Low score: "Uses Docker for containerization" (less relevant)
+
+4. **Injection**: Top-ranked facts injected into system prompt's `<memory>` section
+
+5. **Agent sees**: Full system prompt with relevant memory context
+
+### Benefits of Dynamic System Prompt
+
+- **Multi-Turn Context**: Uses last 3 turns, not just current question
+  - Captures ongoing conversation flow
+  - Better understanding of user's current focus
+- **Query-Specific Facts**: Different facts surface based on conversation topic
+- **Clean Architecture**: No middleware message manipulation
+- **LangChain Native**: Uses built-in dynamic system prompt support
+- **Runtime Flexibility**: Memory regenerated for each agent invocation
+
+## Dependencies
+
+New dependencies added to `pyproject.toml`:
+```toml
+dependencies = [
+    # ... existing dependencies ...
+    "tiktoken>=0.8.0",      # Accurate token counting
+    "scikit-learn>=1.6.1",  # TF-IDF vectorization
+]
+```
+
+Install with:
+```bash
+cd backend
+uv sync
+```
+
+## Testing
+
+Run the test script to verify improvements:
+```bash
+cd backend
+python test_memory_improvement.py
+```
+
+Expected output shows:
+- Different fact ordering based on context
+- Accurate token counts vs old estimates
+- Budget-respecting fact selection
+
+## Performance Impact
+
+### Computational Cost
+- **TF-IDF Calculation**: O(n × m) where n=facts, m=vocabulary
+  - Negligible for typical fact counts (10-100 facts)
+  - Caching opportunities if context doesn't change
+- **Token Counting**: ~10-100µs per call
+  - Faster than the old character-counting approach
+  - Minimal overhead compared to LLM inference
+
+### Memory Usage
+- **TF-IDF Vectorizer**: ~1-5MB for typical vocabulary
+  - Instantiated once per injection call
+  - Garbage collected after use
+- **Tiktoken Encoding**: ~1MB (cached singleton)
+  - Loaded once per process lifetime
+
+### Recommendations
+- Current implementation is optimized for accuracy over caching
+- For high-throughput scenarios, consider:
+  - Pre-computing fact embeddings (store in memory.json)
+  - Caching TF-IDF vectorizer between calls
+  - Using approximate nearest neighbor search for >1000 facts
+
+## Summary
+
+| Aspect | Before | After |
+|--------|--------|-------|
+| Fact Selection | Top 15 by confidence only | Relevance-based (similarity + confidence) |
+| Token Counting | `len(text) // 4` | `tiktoken.encode(text)` |
+| Context Awareness | None | TF-IDF cosine similarity |
+| Accuracy | ±25% token estimate | Exact token count |
+| Configuration | Fixed weights | Customizable similarity/confidence weights |
+
+These improvements result in:
+- **More relevant** facts injected into context
+- **Better utilization** of available token budget
+- **Fewer hallucinations** due to focused context
+- **Higher quality** agent responses
@@ -0,0 +1,260 @@
+# Memory System Improvements - Summary
+
+## 改进概述
+
+针对你提出的两个问题进行了优化：
+1. ✅ **粗糙的 token 计算**（`字符数 * 4`）→ 使用 tiktoken 精确计算
+2. ✅ **缺乏相似度召回** → 使用 TF-IDF + 最近对话上下文
+
+## 核心改进
+
+### 1. 基于对话上下文的智能 Facts 召回
+
+**之前**：
+- 只按 confidence 排序取前 15 个
+- 无论用户在讨论什么都注入相同的 facts
+
+**现在**：
+- 提取最近 **3 轮对话**（human + AI 消息）作为上下文
+- 使用 **TF-IDF 余弦相似度**计算每个 fact 与对话的相关性
+- 综合评分：`相似度(60%) + 置信度(40%)`
+- 动态选择最相关的 facts
+
+**示例**：
+```
+对话历史：
+Turn 1: "我在做一个 Python 项目"
+Turn 2: "使用 FastAPI 和 SQLAlchemy"
+Turn 3: "怎么写测试？"
+
+上下文: "我在做一个 Python 项目 使用 FastAPI 和 SQLAlchemy 怎么写测试？"
+
+相关度高的 facts:
+✓ "Prefers pytest for testing" (Python + 测试)
+✓ "Expert in Python and FastAPI" (Python + FastAPI)
+✓ "Likes type hints in Python" (Python)
+
+相关度低的 facts:
+✗ "Uses Docker for containerization" (不相关)
+```
+
+### 2. 精确的 Token 计算
+
+**之前**：
+```python
+max_chars = max_tokens * 4  # 粗糙估算
+```
+
+**现在**：
+```python
+import tiktoken
+
+def _count_tokens(text: str) -> int:
+    encoding = tiktoken.get_encoding("cl100k_base")  # GPT-4/3.5
+    return len(encoding.encode(text))
+```
+
+**效果对比**：
+```python
+text = "This is a test string to count tokens accurately."
+旧方法: len(text) // 4 = 12 tokens (估算)
+新方法: tiktoken.encode = 10 tokens (精确)
+误差: 20%
+```
+
+### 3. 多轮对话上下文
+
+**之前的担心**：
+> "只传最近一条 human message 会不会上下文不太够？"
+
+**现在的解决方案**：
+- 提取最近 **3 轮对话**（可配置）
+- 包括 human 和 AI 消息
+- 更完整的对话上下文
+
+**示例**：
+```
+单条消息: "怎么写测试？"
+→ 缺少上下文，不知道是什么项目
+
+3轮对话: "Python 项目 + FastAPI + 怎么写测试？"
+→ 完整上下文，能选择更相关的 facts
+```
+
+## 实现方式
+
+### Middleware 动态注入
+
+使用 `before_model` 钩子在**每次 LLM 调用前**注入 memory：
+
+```python
+# src/agents/middlewares/memory_middleware.py
+
+def _extract_conversation_context(messages: list, max_turns: int = 3) -> str:
+    """提取最近 3 轮对话（只包含用户输入和最终回复）"""
+    context_parts = []
+    turn_count = 0
+
+    for msg in reversed(messages):
+        msg_type = getattr(msg, "type", None)
+
+        if msg_type == "human":
+            # ✅ 总是包含用户消息
+            content = extract_text(msg)
+            if content:
+                context_parts.append(content)
+                turn_count += 1
+                if turn_count >= max_turns:
+                    break
+
+        elif msg_type == "ai":
+            # ✅ 只包含没有 tool_calls 的 AI 消息（最终回复）
+            tool_calls = getattr(msg, "tool_calls", None)
+            if not tool_calls:
+                content = extract_text(msg)
+                if content:
+                    context_parts.append(content)
+
+        # ✅ 跳过 tool messages 和带 tool_calls 的 AI 消息
+
+    return " ".join(reversed(context_parts))
+
+
+class MemoryMiddleware:
+    def before_model(self, state, runtime):
+        """在每次 LLM 调用前注入 memory（不是 before_agent）"""
+
+        # 1. 提取最近 3 轮对话（过滤掉 tool calls）
+        messages = state["messages"]
+        conversation_context = _extract_conversation_context(messages, max_turns=3)
+
+        # 2. 使用干净的对话上下文选择相关 facts
+        memory_data = get_memory_data()
+        memory_content = format_memory_for_injection(
+            memory_data,
+            max_tokens=config.max_injection_tokens,
+            current_context=conversation_context,  # ✅ 只包含真实对话内容
+        )
+
+        # 3. 作为 system message 注入到消息列表开头
+        memory_message = SystemMessage(
+            content=f"<memory>\n{memory_content}\n</memory>",
+            name="memory_context",  # 用于去重检测
+        )
+
+        # 4. 插入到消息列表开头
+        updated_messages = [memory_message] + messages
+        return {"messages": updated_messages}
+```
+
+### 为什么这样设计？
+
+基于你的三个重要观察：
+
+1. **应该用 `before_model` 而不是 `before_agent`**
+   - ✅ `before_agent`: 只在整个 agent 开始时调用一次
+   - ✅ `before_model`: 在**每次 LLM 调用前**都会调用
+   - ✅ 这样每次 LLM 推理都能看到最新的相关 memory
+
+2. **messages 数组里只有 human/ai/tool，没有 system**
+   - ✅ 虽然不常见，但 LangChain 允许在对话中插入 system message
+   - ✅ Middleware 可以修改 messages 数组
+   - ✅ 使用 `name="memory_context"` 防止重复注入
+
+3. **应该剔除 tool call 的 AI messages，只传用户输入和最终输出**
+   - ✅ 过滤掉带 `tool_calls` 的 AI 消息（中间步骤）
+   - ✅ 只保留：     - Human 消息（用户输入）
+     - AI 消息但无 tool_calls（最终回复）
+   - ✅ 上下文更干净，TF-IDF 相似度计算更准确
+
+## 配置选项
+
+在 `config.yaml` 中可以调整：
+
+```yaml
+memory:
+  enabled: true
+  max_injection_tokens: 2000  # ✅ 使用精确 token 计数
+
+  # 高级设置（可选）
+  # max_context_turns: 3  # 对话轮数（默认 3）
+  # similarity_weight: 0.6  # 相似度权重
+  # confidence_weight: 0.4  # 置信度权重
+```
+
+## 依赖变更
+
+新增依赖：
+```toml
+dependencies = [
+    "tiktoken>=0.8.0",      # 精确 token 计数
+    "scikit-learn>=1.6.1",  # TF-IDF 向量化
+]
+```
+
+安装：
+```bash
+cd backend
+uv sync
+```
+
+## 性能影响
+
+- **TF-IDF 计算**：O(n × m)，n=facts 数量，m=词汇表大小
+  - 典型场景（10-100 facts）：< 10ms
+- **Token 计数**：~100µs per call
+  - 比字符计数还快
+- **总开销**：可忽略（相比 LLM 推理）
+
+## 向后兼容性
+
+✅ 完全向后兼容：
+- 如果没有 `current_context`，退化为按 confidence 排序
+- 所有现有配置继续工作
+- 不影响其他功能
+
+## 文件变更清单
+
+1. **核心功能**
+   - `src/agents/memory/prompt.py` - 添加 TF-IDF 召回和精确 token 计数
+   - `src/agents/lead_agent/prompt.py` - 动态系统提示
+   - `src/agents/lead_agent/agent.py` - 传入函数而非字符串
+
+2. **依赖**
+   - `pyproject.toml` - 添加 tiktoken 和 scikit-learn
+
+3. **文档**
+   - `docs/MEMORY_IMPROVEMENTS.md` - 详细技术文档
+   - `docs/MEMORY_IMPROVEMENTS_SUMMARY.md` - 改进总结（本文件）
+   - `CLAUDE.md` - 更新架构说明
+   - `config.example.yaml` - 添加配置说明
+
+## 测试验证
+
+运行项目验证：
+```bash
+cd backend
+make dev
+```
+
+在对话中测试：
+1. 讨论不同主题（Python、React、Docker 等）
+2. 观察不同对话注入的 facts 是否不同
+3. 检查 token 预算是否被准确控制
+
+## 总结
+
+| 问题 | 之前 | 现在 |
+|------|------|------|
+| Token 计算 | `len(text) // 4` (±25% 误差) | `tiktoken.encode()` (精确) |
+| Facts 选择 | 按 confidence 固定排序 | TF-IDF 相似度 + confidence |
+| 上下文 | 无 | 最近 3 轮对话 |
+| 实现方式 | 静态系统提示 | 动态系统提示函数 |
+| 配置灵活性 | 有限 | 可调轮数和权重 |
+
+所有改进都实现了，并且：
+- ✅ 不修改 messages 数组
+- ✅ 使用多轮对话上下文
+- ✅ 精确 token 计数
+- ✅ 智能相似度召回
+- ✅ 完全向后兼容
@@ -0,0 +1,289 @@
+# 文件路径使用示例
+
+## 三种路径类型
+
+DeerFlow 的文件上传系统返回三种不同的路径，每种路径用于不同的场景：
+
+### 1. 实际文件系统路径 (path)
+
+```
+.deer-flow/threads/{thread_id}/user-data/uploads/document.pdf
+```
+
+**用途：**
+- 文件在服务器文件系统中的实际位置
+- 相对于 `backend/` 目录
+- 用于直接文件系统访问、备份、调试等
+
+**示例：**
+```python
+# Python 代码中直接访问
+from pathlib import Path
+file_path = Path("backend/.deer-flow/threads/abc123/user-data/uploads/document.pdf")
+content = file_path.read_bytes()
+```
+
+### 2. 虚拟路径 (virtual_path)
+
+```
+/mnt/user-data/uploads/document.pdf
+```
+
+**用途：**
+- Agent 在沙箱环境中使用的路径
+- 沙箱系统会自动映射到实际路径
+- Agent 的所有文件操作工具都使用这个路径
+
+**示例：**
+Agent 在对话中使用：
+```python
+# Agent 使用 read_file 工具
+read_file(path="/mnt/user-data/uploads/document.pdf")
+
+# Agent 使用 bash 工具
+bash(command="cat /mnt/user-data/uploads/document.pdf")
+```
+
+### 3. HTTP 访问 URL (artifact_url)
+
+```
+/api/threads/{thread_id}/artifacts/mnt/user-data/uploads/document.pdf
+```
+
+**用途：**
+- 前端通过 HTTP 访问文件
+- 用于下载、预览文件
+- 可以直接在浏览器中打开
+
+**示例：**
+```typescript
+// 前端 TypeScript/JavaScript 代码
+const threadId = 'abc123';
+const filename = 'document.pdf';
+
+// 下载文件
+const downloadUrl = `/api/threads/${threadId}/artifacts/mnt/user-data/uploads/${filename}?download=true`;
+window.open(downloadUrl);
+
+// 在新窗口预览
+const viewUrl = `/api/threads/${threadId}/artifacts/mnt/user-data/uploads/${filename}`;
+window.open(viewUrl, '_blank');
+
+// 使用 fetch API 获取
+const response = await fetch(viewUrl);
+const blob = await response.blob();
+```
+
+## 完整使用流程示例
+
+### 场景：前端上传文件并让 Agent 处理
+
+```typescript
+// 1. 前端上传文件
+async function uploadAndProcess(threadId: string, file: File) {
+  // 上传文件
+  const formData = new FormData();
+  formData.append('files', file);
+
+  const uploadResponse = await fetch(
+    `/api/threads/${threadId}/uploads`,
+    {
+      method: 'POST',
+      body: formData
+    }
+  );
+
+  const uploadData = await uploadResponse.json();
+  const fileInfo = uploadData.files[0];
+
+  console.log('文件信息：', fileInfo);
+  // {
+  //   filename: "report.pdf",
+  //   path: ".deer-flow/threads/abc123/user-data/uploads/report.pdf",
+  //   virtual_path: "/mnt/user-data/uploads/report.pdf",
+  //   artifact_url: "/api/threads/abc123/artifacts/mnt/user-data/uploads/report.pdf",
+  //   markdown_file: "report.md",
+  //   markdown_path: ".deer-flow/threads/abc123/user-data/uploads/report.md",
+  //   markdown_virtual_path: "/mnt/user-data/uploads/report.md",
+  //   markdown_artifact_url: "/api/threads/abc123/artifacts/mnt/user-data/uploads/report.md"
+  // }
+
+  // 2. 发送消息给 Agent
+  await sendMessage(threadId, "请分析刚上传的 PDF 文件");
+
+  // Agent 会自动看到文件列表，包含：
+  // - report.pdf (虚拟路径: /mnt/user-data/uploads/report.pdf)
+  // - report.md (虚拟路径: /mnt/user-data/uploads/report.md)
+
+  // 3. 前端可以直接访问转换后的 Markdown
+  const mdResponse = await fetch(fileInfo.markdown_artifact_url);
+  const markdownContent = await mdResponse.text();
+  console.log('Markdown 内容：', markdownContent);
+
+  // 4. 或者下载原始 PDF
+  const downloadLink = document.createElement('a');
+  downloadLink.href = fileInfo.artifact_url + '?download=true';
+  downloadLink.download = fileInfo.filename;
+  downloadLink.click();
+}
+```
+
+## 路径转换表
+
+| 场景 | 使用的路径类型 | 示例 |
+|------|---------------|------|
+| 服务器后端代码直接访问 | `path` | `.deer-flow/threads/abc123/user-data/uploads/file.pdf` |
+| Agent 工具调用 | `virtual_path` | `/mnt/user-data/uploads/file.pdf` |
+| 前端下载/预览 | `artifact_url` | `/api/threads/abc123/artifacts/mnt/user-data/uploads/file.pdf` |
+| 备份脚本 | `path` | `.deer-flow/threads/abc123/user-data/uploads/file.pdf` |
+| 日志记录 | `path` | `.deer-flow/threads/abc123/user-data/uploads/file.pdf` |
+
+## 代码示例集合
+
+### Python - 后端处理
+
+```python
+from pathlib import Path
+from src.agents.middlewares.thread_data_middleware import THREAD_DATA_BASE_DIR
+
+def process_uploaded_file(thread_id: str, filename: str):
+    # 使用实际路径
+    base_dir = Path.cwd() / THREAD_DATA_BASE_DIR / thread_id / "user-data" / "uploads"
+    file_path = base_dir / filename
+
+    # 直接读取
+    with open(file_path, 'rb') as f:
+        content = f.read()
+
+    return content
+```
+
+### JavaScript - 前端访问
+
+```javascript
+// 列出已上传的文件
+async function listUploadedFiles(threadId) {
+  const response = await fetch(`/api/threads/${threadId}/uploads/list`);
+  const data = await response.json();
+
+  // 为每个文件创建下载链接
+  data.files.forEach(file => {
+    console.log(`文件: ${file.filename}`);
+    console.log(`下载: ${file.artifact_url}?download=true`);
+    console.log(`预览: ${file.artifact_url}`);
+
+    // 如果是文档，还有 Markdown 版本
+    if (file.markdown_artifact_url) {
+      console.log(`Markdown: ${file.markdown_artifact_url}`);
+    }
+  });
+
+  return data.files;
+}
+
+// 删除文件
+async function deleteFile(threadId, filename) {
+  const response = await fetch(
+    `/api/threads/${threadId}/uploads/${filename}`,
+    { method: 'DELETE' }
+  );
+  return response.json();
+}
+```
+
+### React 组件示例
+
+```tsx
+import React, { useState, useEffect } from 'react';
+
+interface UploadedFile {
+  filename: string;
+  size: number;
+  path: string;
+  virtual_path: string;
+  artifact_url: string;
+  extension: string;
+  modified: number;
+  markdown_artifact_url?: string;
+}
+
+function FileUploadList({ threadId }: { threadId: string }) {
+  const [files, setFiles] = useState<UploadedFile[]>([]);
+
+  useEffect(() => {
+    fetchFiles();
+  }, [threadId]);
+
+  async function fetchFiles() {
+    const response = await fetch(`/api/threads/${threadId}/uploads/list`);
+    const data = await response.json();
+    setFiles(data.files);
+  }
+
+  async function handleUpload(event: React.ChangeEvent<HTMLInputElement>) {
+    const fileList = event.target.files;
+    if (!fileList) return;
+
+    const formData = new FormData();
+    Array.from(fileList).forEach(file => {
+      formData.append('files', file);
+    });
+
+    await fetch(`/api/threads/${threadId}/uploads`, {
+      method: 'POST',
+      body: formData
+    });
+
+    fetchFiles(); // 刷新列表
+  }
+
+  async function handleDelete(filename: string) {
+    await fetch(`/api/threads/${threadId}/uploads/${filename}`, {
+      method: 'DELETE'
+    });
+    fetchFiles(); // 刷新列表
+  }
+
+  return (
+    <div>
+      <input type="file" multiple onChange={handleUpload} />
+
+      <ul>
+        {files.map(file => (
+          <li key={file.filename}>
+            <span>{file.filename}</span>
+            <a href={file.artifact_url} target="_blank">预览</a>
+            <a href={`${file.artifact_url}?download=true`}>下载</a>
+            {file.markdown_artifact_url && (
+              <a href={file.markdown_artifact_url} target="_blank">Markdown</a>
+            )}
+            <button onClick={() => handleDelete(file.filename)}>删除</button>
+          </li>
+        ))}
+      </ul>
+    </div>
+  );
+}
+```
+
+## 注意事项
+
+1. **路径安全性**
+   - 实际路径（`path`）包含线程 ID，确保隔离
+   - API 会验证路径，防止目录遍历攻击
+   - 前端不应直接使用 `path`，而应使用 `artifact_url`
+
+2. **Agent 使用**
+   - Agent 只能看到和使用 `virtual_path`
+   - 沙箱系统自动映射到实际路径
+   - Agent 不需要知道实际的文件系统结构
+
+3. **前端集成**
+   - 始终使用 `artifact_url` 访问文件
+   - 不要尝试直接访问文件系统路径
+   - 使用 `?download=true` 参数强制下载
+
+4. **Markdown 转换**
+   - 转换成功时，会返回额外的 `markdown_*` 字段
+   - 建议优先使用 Markdown 版本（更易处理）
+   - 原始文件始终保留
@@ -0,0 +1,53 @@
+# Documentation
+
+This directory contains detailed documentation for the DeerFlow backend.
+
+## Quick Links
+
+| Document | Description |
+|----------|-------------|
+| [ARCHITECTURE.md](ARCHITECTURE.md) | System architecture overview |
+| [API.md](API.md) | Complete API reference |
+| [CONFIGURATION.md](CONFIGURATION.md) | Configuration options |
+| [SETUP.md](SETUP.md) | Quick setup guide |
+
+## Feature Documentation
+
+| Document | Description |
+|----------|-------------|
+| [FILE_UPLOAD.md](FILE_UPLOAD.md) | File upload functionality |
+| [PATH_EXAMPLES.md](PATH_EXAMPLES.md) | Path types and usage examples |
+| [summarization.md](summarization.md) | Context summarization feature |
+| [plan_mode_usage.md](plan_mode_usage.md) | Plan mode with TodoList |
+| [AUTO_TITLE_GENERATION.md](AUTO_TITLE_GENERATION.md) | Automatic title generation |
+
+## Development
+
+| Document | Description |
+|----------|-------------|
+| [TODO.md](TODO.md) | Planned features and known issues |
+
+## Getting Started
+
+1. **New to DeerFlow?** Start with [SETUP.md](SETUP.md) for quick installation
+2. **Configuring the system?** See [CONFIGURATION.md](CONFIGURATION.md)
+3. **Understanding the architecture?** Read [ARCHITECTURE.md](ARCHITECTURE.md)
+4. **Building integrations?** Check [API.md](API.md) for API reference
+
+## Document Organization
+
+```
+docs/
+├── README.md                  # This file
+├── ARCHITECTURE.md            # System architecture
+├── API.md                     # API reference
+├── CONFIGURATION.md           # Configuration guide
+├── SETUP.md                   # Setup instructions
+├── FILE_UPLOAD.md             # File upload feature
+├── PATH_EXAMPLES.md           # Path usage examples
+├── summarization.md           # Summarization feature
+├── plan_mode_usage.md         # Plan mode feature
+├── AUTO_TITLE_GENERATION.md   # Title generation
+├── TITLE_GENERATION_IMPLEMENTATION.md  # Title implementation details
+└── TODO.md                    # Roadmap and issues
+```
@@ -0,0 +1,92 @@
+# Setup Guide
+
+Quick setup instructions for DeerFlow.
+
+## Configuration Setup
+
+DeerFlow uses a YAML configuration file that should be placed in the **project root directory**.
+
+### Steps
+
+1. **Navigate to project root**:
+   ```bash
+   cd /path/to/deer-flow
+   ```
+
+2. **Copy example configuration**:
+   ```bash
+   cp config.example.yaml config.yaml
+   ```
+
+3. **Edit configuration**:
+   ```bash
+   # Option A: Set environment variables (recommended)
+   export OPENAI_API_KEY="your-key-here"
+
+   # Option B: Edit config.yaml directly
+   vim config.yaml  # or your preferred editor
+   ```
+
+4. **Verify configuration**:
+   ```bash
+   cd backend
+   python -c "from src.config import get_app_config; print('✓ Config loaded:', get_app_config().models[0].name)"
+   ```
+
+## Important Notes
+
+- **Location**: `config.yaml` should be in `deer-flow/` (project root), not `deer-flow/backend/`
+- **Git**: `config.yaml` is automatically ignored by git (contains secrets)
+- **Priority**: If both `backend/config.yaml` and `../config.yaml` exist, backend version takes precedence
+
+## Configuration File Locations
+
+The backend searches for `config.yaml` in this order:
+
+1. `DEER_FLOW_CONFIG_PATH` environment variable (if set)
+2. `backend/config.yaml` (current directory when running from backend/)
+3. `deer-flow/config.yaml` (parent directory - **recommended location**)
+
+**Recommended**: Place `config.yaml` in project root (`deer-flow/config.yaml`).
+
+## Sandbox Setup (Optional but Recommended)
+
+If you plan to use Docker/Container-based sandbox (configured in `config.yaml` under `sandbox.use: src.community.aio_sandbox:AioSandboxProvider`), it's highly recommended to pre-pull the container image:
+
+```bash
+# From project root
+make setup-sandbox
+```
+
+**Why pre-pull?**
+- The sandbox image (~500MB+) is pulled on first use, causing a long wait
+- Pre-pulling provides clear progress indication
+- Avoids confusion when first using the agent
+
+If you skip this step, the image will be automatically pulled on first agent execution, which may take several minutes depending on your network speed.
+
+## Troubleshooting
+
+### Config file not found
+
+```bash
+# Check where the backend is looking
+cd deer-flow/backend
+python -c "from src.config.app_config import AppConfig; print(AppConfig.resolve_config_path())"
+```
+
+If it can't find the config:
+1. Ensure you've copied `config.example.yaml` to `config.yaml`
+2. Verify you're in the correct directory
+3. Check the file exists: `ls -la ../config.yaml`
+
+### Permission denied
+
+```bash
+chmod 600 ../config.yaml  # Protect sensitive configuration
+```
+
+## See Also
+
+- [Configuration Guide](docs/CONFIGURATION.md) - Detailed configuration options
+- [Architecture Overview](CLAUDE.md) - System architecture
@@ -0,0 +1,222 @@
+# 自动 Title 生成功能实现总结
+
+## ✅ 已完成的工作
+
+### 1. 核心实现文件
+
+#### [`src/agents/thread_state.py`](../src/agents/thread_state.py)
+- ✅ 添加 `title: str | None = None` 字段到 `ThreadState`
+
+#### [`src/config/title_config.py`](../src/config/title_config.py) (新建)
+- ✅ 创建 `TitleConfig` 配置类
+- ✅ 支持配置：enabled, max_words, max_chars, model_name, prompt_template
+- ✅ 提供 `get_title_config()` 和 `set_title_config()` 函数
+- ✅ 提供 `load_title_config_from_dict()` 从配置文件加载
+
+#### [`src/agents/title_middleware.py`](../src/agents/title_middleware.py) (新建)
+- ✅ 创建 `TitleMiddleware` 类
+- ✅ 实现 `_should_generate_title()` 检查是否需要生成
+- ✅ 实现 `_generate_title()` 调用 LLM 生成标题
+- ✅ 实现 `after_agent()` 钩子，在首次对话后自动触发
+- ✅ 包含 fallback 策略（LLM 失败时使用用户消息前几个词）
+
+#### [`src/config/app_config.py`](../src/config/app_config.py)
+- ✅ 导入 `load_title_config_from_dict`
+- ✅ 在 `from_file()` 中加载 title 配置
+
+#### [`src/agents/lead_agent/agent.py`](../src/agents/lead_agent/agent.py)
+- ✅ 导入 `TitleMiddleware`
+- ✅ 注册到 `middleware` 列表：`[SandboxMiddleware(), TitleMiddleware()]`
+
+### 2. 配置文件
+
+#### [`config.yaml`](../config.yaml)
+- ✅ 添加 title 配置段：
+```yaml
+title:
+  enabled: true
+  max_words: 6
+  max_chars: 60
+  model_name: null
+```
+
+### 3. 文档
+
+#### [`docs/AUTO_TITLE_GENERATION.md`](../docs/AUTO_TITLE_GENERATION.md) (新建)
+- ✅ 完整的功能说明文档
+- ✅ 实现方式和架构设计
+- ✅ 配置说明
+- ✅ 客户端使用示例（TypeScript）
+- ✅ 工作流程图（Mermaid）
+- ✅ 故障排查指南
+- ✅ State vs Metadata 对比
+
+#### [`BACKEND_TODO.md`](../BACKEND_TODO.md)
+- ✅ 添加功能完成记录
+
+### 4. 测试
+
+#### [`tests/test_title_generation.py`](../tests/test_title_generation.py) (新建)
+- ✅ 配置类测试
+- ✅ Middleware 初始化测试
+- ✅ TODO: 集成测试（需要 mock Runtime）
+
+---
+
+## 🎯 核心设计决策
+
+### 为什么使用 State 而非 Metadata？
+
+| 方面 | State (✅ 采用) | Metadata (❌ 未采用) |
+|------|----------------|---------------------|
+| **持久化** | 自动（通过 checkpointer） | 取决于实现，不可靠 |
+| **版本控制** | 支持时间旅行 | 不支持 |
+| **类型安全** | TypedDict 定义 | 任意字典 |
+| **标准化** | LangGraph 核心机制 | 扩展功能 |
+
+### 工作流程
+
+```
+用户发送首条消息
+  ↓
+Agent 处理并返回回复
+  ↓
+TitleMiddleware.after_agent() 触发
+  ↓
+检查：是否首次对话？是否已有 title？
+  ↓
+调用 LLM 生成 title
+  ↓
+返回 {"title": "..."} 更新 state
+  ↓
+Checkpointer 自动持久化（如果配置了）
+  ↓
+客户端从 state.values.title 读取
+```
+
+---
+
+## 📋 使用指南
+
+### 后端配置
+
+1. **启用/禁用功能**
+```yaml
+# config.yaml
+title:
+  enabled: true  # 设为 false 禁用
+```
+
+2. **自定义配置**
+```yaml
+title:
+  enabled: true
+  max_words: 8      # 标题最多 8 个词
+  max_chars: 80     # 标题最多 80 个字符
+  model_name: null  # 使用默认模型
+```
+
+3. **配置持久化（可选）**
+
+如果需要在本地开发时持久化 title：
+
+```python
+# checkpointer.py
+from langgraph.checkpoint.sqlite import SqliteSaver
+
+checkpointer = SqliteSaver.from_conn_string("checkpoints.db")
+```
+
+```json
+// langgraph.json
+{
+  "graphs": {
+    "lead_agent": "src.agents:lead_agent"
+  },
+  "checkpointer": "checkpointer:checkpointer"
+}
+```
+
+### 客户端使用
+
+```typescript
+// 获取 thread title
+const state = await client.threads.getState(threadId);
+const title = state.values.title || "New Conversation";
+
+// 显示在对话列表
+<li>{title}</li>
+```
+
+**⚠️ 注意**：Title 在 `state.values.title`，而非 `thread.metadata.title`
+
+---
+
+## 🧪 测试
+
+```bash
+# 运行测试
+pytest tests/test_title_generation.py -v
+
+# 运行所有测试
+pytest
+```
+
+---
+
+## 🔍 故障排查
+
+### Title 没有生成？
+
+1. 检查配置：`title.enabled = true`
+2. 查看日志：搜索 "Generated thread title"
+3. 确认是首次对话（1 个用户消息 + 1 个助手回复）
+
+### Title 生成但看不到？
+
+1. 确认读取位置：`state.values.title`（不是 `thread.metadata.title`）
+2. 检查 API 响应是否包含 title
+3. 重新获取 state
+
+### Title 重启后丢失？
+
+1. 本地开发需要配置 checkpointer
+2. LangGraph Platform 会自动持久化
+3. 检查数据库确认 checkpointer 工作正常
+
+---
+
+## 📊 性能影响
+
+- **延迟增加**：约 0.5-1 秒（LLM 调用）
+- **并发安全**：在 `after_agent` 中运行，不阻塞主流程
+- **资源消耗**：每个 thread 只生成一次
+
+### 优化建议
+
+1. 使用更快的模型（如 `gpt-3.5-turbo`）
+2. 减少 `max_words` 和 `max_chars`
+3. 调整 prompt 使其更简洁
+
+---
+
+## 🚀 下一步
+
+- [ ] 添加集成测试（需要 mock LangGraph Runtime）
+- [ ] 支持自定义 prompt template
+- [ ] 支持多语言 title 生成
+- [ ] 添加 title 重新生成功能
+- [ ] 监控 title 生成成功率和延迟
+
+---
+
+## 📚 相关资源
+
+- [完整文档](../docs/AUTO_TITLE_GENERATION.md)
+- [LangGraph Middleware](https://langchain-ai.github.io/langgraph/concepts/middleware/)
+- [LangGraph State 管理](https://langchain-ai.github.io/langgraph/concepts/low_level/#state)
+- [LangGraph Checkpointer](https://langchain-ai.github.io/langgraph/concepts/persistence/)
+
+---
+
+*实现完成时间: 2026-01-14*
@@ -0,0 +1,27 @@
+# TODO List
+
+## Completed Features
+
+- [x] Launch the sandbox only after the first file system or bash tool is called
+- [x] Add Clarification Process for the whole process
+- [x] Implement Context Summarization Mechanism to avoid context explosion
+- [x] Integrate MCP (Model Context Protocol) for extensible tools
+- [x] Add file upload support with automatic document conversion
+- [x] Implement automatic thread title generation
+- [x] Add Plan Mode with TodoList middleware
+- [x] Add vision model support with ViewImageMiddleware
+- [x] Skills system with SKILL.md format
+
+## Planned Features
+
+- [ ] Pooling the sandbox resources to reduce the number of sandbox containers
+- [ ] Add authentication/authorization layer
+- [ ] Implement rate limiting
+- [ ] Add metrics and monitoring
+- [ ] Support for more document formats in upload
+- [ ] Skill marketplace / remote skill installation
+
+## Resolved Issues
+
+- [x] Make sure that no duplicated files in `state.artifacts`
+- [x] Long thinking but with empty content (answer inside thinking process)
@@ -0,0 +1,204 @@
+# Plan Mode with TodoList Middleware
+
+This document describes how to enable and use the Plan Mode feature with TodoList middleware in DeerFlow 2.0.
+
+## Overview
+
+Plan Mode adds a TodoList middleware to the agent, which provides a `write_todos` tool that helps the agent:
+- Break down complex tasks into smaller, manageable steps
+- Track progress as work progresses
+- Provide visibility to users about what's being done
+
+The TodoList middleware is built on LangChain's `TodoListMiddleware`.
+
+## Configuration
+
+### Enabling Plan Mode
+
+Plan mode is controlled via **runtime configuration** through the `is_plan_mode` parameter in the `configurable` section of `RunnableConfig`. This allows you to dynamically enable or disable plan mode on a per-request basis.
+
+```python
+from langchain_core.runnables import RunnableConfig
+from src.agents.lead_agent.agent import make_lead_agent
+
+# Enable plan mode via runtime configuration
+config = RunnableConfig(
+    configurable={
+        "thread_id": "example-thread",
+        "thinking_enabled": True,
+        "is_plan_mode": True,  # Enable plan mode
+    }
+)
+
+# Create agent with plan mode enabled
+agent = make_lead_agent(config)
+```
+
+### Configuration Options
+
+- **is_plan_mode** (bool): Whether to enable plan mode with TodoList middleware. Default: `False`
+  - Pass via `config.get("configurable", {}).get("is_plan_mode", False)`
+  - Can be set dynamically for each agent invocation
+  - No global configuration needed
+
+## Default Behavior
+
+When plan mode is enabled with default settings, the agent will have access to a `write_todos` tool with the following behavior:
+
+### When to Use TodoList
+
+The agent will use the todo list for:
+1. Complex multi-step tasks (3+ distinct steps)
+2. Non-trivial tasks requiring careful planning
+3. When user explicitly requests a todo list
+4. When user provides multiple tasks
+
+### When NOT to Use TodoList
+
+The agent will skip using the todo list for:
+1. Single, straightforward tasks
+2. Trivial tasks (< 3 steps)
+3. Purely conversational or informational requests
+
+### Task States
+
+- **pending**: Task not yet started
+- **in_progress**: Currently working on (can have multiple parallel tasks)
+- **completed**: Task finished successfully
+
+## Usage Examples
+
+### Basic Usage
+
+```python
+from langchain_core.runnables import RunnableConfig
+from src.agents.lead_agent.agent import make_lead_agent
+
+# Create agent with plan mode ENABLED
+config_with_plan_mode = RunnableConfig(
+    configurable={
+        "thread_id": "example-thread",
+        "thinking_enabled": True,
+        "is_plan_mode": True,  # TodoList middleware will be added
+    }
+)
+agent_with_todos = make_lead_agent(config_with_plan_mode)
+
+# Create agent with plan mode DISABLED (default)
+config_without_plan_mode = RunnableConfig(
+    configurable={
+        "thread_id": "another-thread",
+        "thinking_enabled": True,
+        "is_plan_mode": False,  # No TodoList middleware
+    }
+)
+agent_without_todos = make_lead_agent(config_without_plan_mode)
+```
+
+### Dynamic Plan Mode per Request
+
+You can enable/disable plan mode dynamically for different conversations or tasks:
+
+```python
+from langchain_core.runnables import RunnableConfig
+from src.agents.lead_agent.agent import make_lead_agent
+
+def create_agent_for_task(task_complexity: str):
+    """Create agent with plan mode based on task complexity."""
+    is_complex = task_complexity in ["high", "very_high"]
+
+    config = RunnableConfig(
+        configurable={
+            "thread_id": f"task-{task_complexity}",
+            "thinking_enabled": True,
+            "is_plan_mode": is_complex,  # Enable only for complex tasks
+        }
+    )
+
+    return make_lead_agent(config)
+
+# Simple task - no TodoList needed
+simple_agent = create_agent_for_task("low")
+
+# Complex task - TodoList enabled for better tracking
+complex_agent = create_agent_for_task("high")
+```
+
+## How It Works
+
+1. When `make_lead_agent(config)` is called, it extracts `is_plan_mode` from `config.configurable`
+2. The config is passed to `_build_middlewares(config)`
+3. `_build_middlewares()` reads `is_plan_mode` and calls `_create_todo_list_middleware(is_plan_mode)`
+4. If `is_plan_mode=True`, a `TodoListMiddleware` instance is created and added to the middleware chain
+5. The middleware automatically adds a `write_todos` tool to the agent's toolset
+6. The agent can use this tool to manage tasks during execution
+7. The middleware handles the todo list state and provides it to the agent
+
+## Architecture
+
+```
+make_lead_agent(config)
+  │
+  ├─> Extracts: is_plan_mode = config.configurable.get("is_plan_mode", False)
+  │
+  └─> _build_middlewares(config)
+        │
+        ├─> ThreadDataMiddleware
+        ├─> SandboxMiddleware
+        ├─> SummarizationMiddleware (if enabled via global config)
+        ├─> TodoListMiddleware (if is_plan_mode=True) ← NEW
+        ├─> TitleMiddleware
+        └─> ClarificationMiddleware
+```
+
+## Implementation Details
+
+### Agent Module
+- **Location**: `src/agents/lead_agent/agent.py`
+- **Function**: `_create_todo_list_middleware(is_plan_mode: bool)` - Creates TodoListMiddleware if plan mode is enabled
+- **Function**: `_build_middlewares(config: RunnableConfig)` - Builds middleware chain based on runtime config
+- **Function**: `make_lead_agent(config: RunnableConfig)` - Creates agent with appropriate middlewares
+
+### Runtime Configuration
+Plan mode is controlled via the `is_plan_mode` parameter in `RunnableConfig.configurable`:
+```python
+config = RunnableConfig(
+    configurable={
+        "is_plan_mode": True,  # Enable plan mode
+        # ... other configurable options
+    }
+)
+```
+
+## Key Benefits
+
+1. **Dynamic Control**: Enable/disable plan mode per request without global state
+2. **Flexibility**: Different conversations can have different plan mode settings
+3. **Simplicity**: No need for global configuration management
+4. **Context-Aware**: Plan mode decision can be based on task complexity, user preferences, etc.
+
+## Custom Prompts
+
+DeerFlow uses custom `system_prompt` and `tool_description` for the TodoListMiddleware that match the overall DeerFlow prompt style:
+
+### System Prompt Features
+- Uses XML tags (`<todo_list_system>`) for structure consistency with DeerFlow's main prompt
+- Emphasizes CRITICAL rules and best practices
+- Clear "When to Use" vs "When NOT to Use" guidelines
+- Focuses on real-time updates and immediate task completion
+
+### Tool Description Features
+- Detailed usage scenarios with examples
+- Strong emphasis on NOT using for simple tasks
+- Clear task state definitions (pending, in_progress, completed)
+- Comprehensive best practices section
+- Task completion requirements to prevent premature marking
+
+The custom prompts are defined in `_create_todo_list_middleware()` in `/Users/hetao/workspace/deer-flow/backend/src/agents/lead_agent/agent.py:57`.
+
+## Notes
+
+- TodoList middleware uses LangChain's built-in `TodoListMiddleware` with **custom DeerFlow-style prompts**
+- Plan mode is **disabled by default** (`is_plan_mode=False`) to maintain backward compatibility
+- The middleware is positioned before `ClarificationMiddleware` to allow todo management during clarification flows
+- Custom prompts emphasize the same principles as DeerFlow's main system prompt (clarity, action-oriented, critical rules)
@@ -0,0 +1,353 @@
+# Conversation Summarization
+
+DeerFlow includes automatic conversation summarization to handle long conversations that approach model token limits. When enabled, the system automatically condenses older messages while preserving recent context.
+
+## Overview
+
+The summarization feature uses LangChain's `SummarizationMiddleware` to monitor conversation history and trigger summarization based on configurable thresholds. When activated, it:
+
+1. Monitors message token counts in real-time
+2. Triggers summarization when thresholds are met
+3. Keeps recent messages intact while summarizing older exchanges
+4. Maintains AI/Tool message pairs together for context continuity
+5. Injects the summary back into the conversation
+
+## Configuration
+
+Summarization is configured in `config.yaml` under the `summarization` key:
+
+```yaml
+summarization:
+  enabled: true
+  model_name: null  # Use default model or specify a lightweight model
+
+  # Trigger conditions (OR logic - any condition triggers summarization)
+  trigger:
+    - type: tokens
+      value: 4000
+    # Additional triggers (optional)
+    # - type: messages
+    #   value: 50
+    # - type: fraction
+    #   value: 0.8  # 80% of model's max input tokens
+
+  # Context retention policy
+  keep:
+    type: messages
+    value: 20
+
+  # Token trimming for summarization call
+  trim_tokens_to_summarize: 4000
+
+  # Custom summary prompt (optional)
+  summary_prompt: null
+```
+
+### Configuration Options
+
+#### `enabled`
+- **Type**: Boolean
+- **Default**: `false`
+- **Description**: Enable or disable automatic summarization
+
+#### `model_name`
+- **Type**: String or null
+- **Default**: `null` (uses default model)
+- **Description**: Model to use for generating summaries. Recommended to use a lightweight, cost-effective model like `gpt-4o-mini` or equivalent.
+
+#### `trigger`
+- **Type**: Single `ContextSize` or list of `ContextSize` objects
+- **Required**: At least one trigger must be specified when enabled
+- **Description**: Thresholds that trigger summarization. Uses OR logic - summarization runs when ANY threshold is met.
+
+**ContextSize Types:**
+
+1. **Token-based trigger**: Activates when token count reaches the specified value
+   ```yaml
+   trigger:
+     type: tokens
+     value: 4000
+   ```
+
+2. **Message-based trigger**: Activates when message count reaches the specified value
+   ```yaml
+   trigger:
+     type: messages
+     value: 50
+   ```
+
+3. **Fraction-based trigger**: Activates when token usage reaches a percentage of the model's maximum input tokens
+   ```yaml
+   trigger:
+     type: fraction
+     value: 0.8  # 80% of max input tokens
+   ```
+
+**Multiple Triggers:**
+```yaml
+trigger:
+  - type: tokens
+    value: 4000
+  - type: messages
+    value: 50
+```
+
+#### `keep`
+- **Type**: `ContextSize` object
+- **Default**: `{type: messages, value: 20}`
+- **Description**: Specifies how much recent conversation history to preserve after summarization.
+
+**Examples:**
+```yaml
+# Keep most recent 20 messages
+keep:
+  type: messages
+  value: 20
+
+# Keep most recent 3000 tokens
+keep:
+  type: tokens
+  value: 3000
+
+# Keep most recent 30% of model's max input tokens
+keep:
+  type: fraction
+  value: 0.3
+```
+
+#### `trim_tokens_to_summarize`
+- **Type**: Integer or null
+- **Default**: `4000`
+- **Description**: Maximum tokens to include when preparing messages for the summarization call itself. Set to `null` to skip trimming (not recommended for very long conversations).
+
+#### `summary_prompt`
+- **Type**: String or null
+- **Default**: `null` (uses LangChain's default prompt)
+- **Description**: Custom prompt template for generating summaries. The prompt should guide the model to extract the most important context.
+
+**Default Prompt Behavior:**
+The default LangChain prompt instructs the model to:
+- Extract highest quality/most relevant context
+- Focus on information critical to the overall goal
+- Avoid repeating completed actions
+- Return only the extracted context
+
+## How It Works
+
+### Summarization Flow
+
+1. **Monitoring**: Before each model call, the middleware counts tokens in the message history
+2. **Trigger Check**: If any configured threshold is met, summarization is triggered
+3. **Message Partitioning**: Messages are split into:
+   - Messages to summarize (older messages beyond the `keep` threshold)
+   - Messages to preserve (recent messages within the `keep` threshold)
+4. **Summary Generation**: The model generates a concise summary of the older messages
+5. **Context Replacement**: The message history is updated:
+   - All old messages are removed
+   - A single summary message is added
+   - Recent messages are preserved
+6. **AI/Tool Pair Protection**: The system ensures AI messages and their corresponding tool messages stay together
+
+### Token Counting
+
+- Uses approximate token counting based on character count
+- For Anthropic models: ~3.3 characters per token
+- For other models: Uses LangChain's default estimation
+- Can be customized with a custom `token_counter` function
+
+### Message Preservation
+
+The middleware intelligently preserves message context:
+
+- **Recent Messages**: Always kept intact based on `keep` configuration
+- **AI/Tool Pairs**: Never split - if a cutoff point falls within tool messages, the system adjusts to keep the entire AI + Tool message sequence together
+- **Summary Format**: Summary is injected as a HumanMessage with the format:
+  ```
+  Here is a summary of the conversation to date:
+
+  [Generated summary text]
+  ```
+
+## Best Practices
+
+### Choosing Trigger Thresholds
+
+1. **Token-based triggers**: Recommended for most use cases
+   - Set to 60-80% of your model's context window
+   - Example: For 8K context, use 4000-6000 tokens
+
+2. **Message-based triggers**: Useful for controlling conversation length
+   - Good for applications with many short messages
+   - Example: 50-100 messages depending on average message length
+
+3. **Fraction-based triggers**: Ideal when using multiple models
+   - Automatically adapts to each model's capacity
+   - Example: 0.8 (80% of model's max input tokens)
+
+### Choosing Retention Policy (`keep`)
+
+1. **Message-based retention**: Best for most scenarios
+   - Preserves natural conversation flow
+   - Recommended: 15-25 messages
+
+2. **Token-based retention**: Use when precise control is needed
+   - Good for managing exact token budgets
+   - Recommended: 2000-4000 tokens
+
+3. **Fraction-based retention**: For multi-model setups
+   - Automatically scales with model capacity
+   - Recommended: 0.2-0.4 (20-40% of max input)
+
+### Model Selection
+
+- **Recommended**: Use a lightweight, cost-effective model for summaries
+  - Examples: `gpt-4o-mini`, `claude-haiku`, or equivalent
+  - Summaries don't require the most powerful models
+  - Significant cost savings on high-volume applications
+
+- **Default**: If `model_name` is `null`, uses the default model
+  - May be more expensive but ensures consistency
+  - Good for simple setups
+
+### Optimization Tips
+
+1. **Balance triggers**: Combine token and message triggers for robust handling
+   ```yaml
+   trigger:
+     - type: tokens
+       value: 4000
+     - type: messages
+       value: 50
+   ```
+
+2. **Conservative retention**: Keep more messages initially, adjust based on performance
+   ```yaml
+   keep:
+     type: messages
+     value: 25  # Start higher, reduce if needed
+   ```
+
+3. **Trim strategically**: Limit tokens sent to summarization model
+   ```yaml
+   trim_tokens_to_summarize: 4000  # Prevents expensive summarization calls
+   ```
+
+4. **Monitor and iterate**: Track summary quality and adjust configuration
+
+## Troubleshooting
+
+### Summary Quality Issues
+
+**Problem**: Summaries losing important context
+
+**Solutions**:
+1. Increase `keep` value to preserve more messages
+2. Decrease trigger thresholds to summarize earlier
+3. Customize `summary_prompt` to emphasize key information
+4. Use a more capable model for summarization
+
+### Performance Issues
+
+**Problem**: Summarization calls taking too long
+
+**Solutions**:
+1. Use a faster model for summaries (e.g., `gpt-4o-mini`)
+2. Reduce `trim_tokens_to_summarize` to send less context
+3. Increase trigger thresholds to summarize less frequently
+
+### Token Limit Errors
+
+**Problem**: Still hitting token limits despite summarization
+
+**Solutions**:
+1. Lower trigger thresholds to summarize earlier
+2. Reduce `keep` value to preserve fewer messages
+3. Check if individual messages are very large
+4. Consider using fraction-based triggers
+
+## Implementation Details
+
+### Code Structure
+
+- **Configuration**: `src/config/summarization_config.py`
+- **Integration**: `src/agents/lead_agent/agent.py`
+- **Middleware**: Uses `langchain.agents.middleware.SummarizationMiddleware`
+
+### Middleware Order
+
+Summarization runs after ThreadData and Sandbox initialization but before Title and Clarification:
+
+1. ThreadDataMiddleware
+2. SandboxMiddleware
+3. **SummarizationMiddleware** ← Runs here
+4. TitleMiddleware
+5. ClarificationMiddleware
+
+### State Management
+
+- Summarization is stateless - configuration is loaded once at startup
+- Summaries are added as regular messages in the conversation history
+- The checkpointer persists the summarized history automatically
+
+## Example Configurations
+
+### Minimal Configuration
+```yaml
+summarization:
+  enabled: true
+  trigger:
+    type: tokens
+    value: 4000
+  keep:
+    type: messages
+    value: 20
+```
+
+### Production Configuration
+```yaml
+summarization:
+  enabled: true
+  model_name: gpt-4o-mini  # Lightweight model for cost efficiency
+  trigger:
+    - type: tokens
+      value: 6000
+    - type: messages
+      value: 75
+  keep:
+    type: messages
+    value: 25
+  trim_tokens_to_summarize: 5000
+```
+
+### Multi-Model Configuration
+```yaml
+summarization:
+  enabled: true
+  model_name: gpt-4o-mini
+  trigger:
+    type: fraction
+    value: 0.7  # 70% of model's max input
+  keep:
+    type: fraction
+    value: 0.3  # Keep 30% of max input
+  trim_tokens_to_summarize: 4000
+```
+
+### Conservative Configuration (High Quality)
+```yaml
+summarization:
+  enabled: true
+  model_name: gpt-4  # Use full model for high-quality summaries
+  trigger:
+    type: tokens
+    value: 8000
+  keep:
+    type: messages
+    value: 40  # Keep more context
+  trim_tokens_to_summarize: null  # No trimming
+```
+
+## References
+
+- [LangChain Summarization Middleware Documentation](https://docs.langchain.com/oss/python/langchain/middleware/built-in#summarization)
+- [LangChain Source Code](https://github.com/langchain-ai/langchain)
@@ -0,0 +1,174 @@
+# Task Tool Improvements
+
+## Overview
+
+The task tool has been improved to eliminate wasteful LLM polling. Previously, when using background tasks, the LLM had to repeatedly call `task_status` to poll for completion, causing unnecessary API requests.
+
+## Changes Made
+
+### 1. Removed `run_in_background` Parameter
+
+The `run_in_background` parameter has been removed from the `task` tool. All subagent tasks now run asynchronously by default, but the tool handles completion automatically.
+
+**Before:**
+```python
+# LLM had to manage polling
+task_id = task(
+    subagent_type="bash",
+    prompt="Run tests",
+    description="Run tests",
+    run_in_background=True
+)
+# Then LLM had to poll repeatedly:
+while True:
+    status = task_status(task_id)
+    if completed:
+        break
+```
+
+**After:**
+```python
+# Tool blocks until complete, polling happens in backend
+result = task(
+    subagent_type="bash",
+    prompt="Run tests",
+    description="Run tests"
+)
+# Result is available immediately after the call returns
+```
+
+### 2. Backend Polling
+
+The `task_tool` now:
+- Starts the subagent task asynchronously
+- Polls for completion in the backend (every 2 seconds)
+- Blocks the tool call until completion
+- Returns the final result directly
+
+This means:
+- ✅ LLM makes only ONE tool call
+- ✅ No wasteful LLM polling requests
+- ✅ Backend handles all status checking
+- ✅ Timeout protection (5 minutes max)
+
+### 3. Removed `task_status` from LLM Tools
+
+The `task_status_tool` is no longer exposed to the LLM. It's kept in the codebase for potential internal/debugging use, but the LLM cannot call it.
+
+### 4. Updated Documentation
+
+- Updated `SUBAGENT_SECTION` in `prompt.py` to remove all references to background tasks and polling
+- Simplified usage examples
+- Made it clear that the tool automatically waits for completion
+
+## Implementation Details
+
+### Polling Logic
+
+Located in `src/tools/builtins/task_tool.py`:
+
+```python
+# Start background execution
+task_id = executor.execute_async(prompt)
+
+# Poll for task completion in backend
+while True:
+    result = get_background_task_result(task_id)
+
+    # Check if task completed or failed
+    if result.status == SubagentStatus.COMPLETED:
+        return f"[Subagent: {subagent_type}]\n\n{result.result}"
+    elif result.status == SubagentStatus.FAILED:
+        return f"[Subagent: {subagent_type}] Task failed: {result.error}"
+
+    # Wait before next poll
+    time.sleep(2)
+
+    # Timeout protection (5 minutes)
+    if poll_count > 150:
+        return "Task timed out after 5 minutes"
+```
+
+### Execution Timeout
+
+In addition to polling timeout, subagent execution now has a built-in timeout mechanism:
+
+**Configuration** (`src/subagents/config.py`):
+```python
+@dataclass
+class SubagentConfig:
+    # ...
+    timeout_seconds: int = 300  # 5 minutes default
+```
+
+**Thread Pool Architecture**:
+
+To avoid nested thread pools and resource waste, we use two dedicated thread pools:
+
+1. **Scheduler Pool** (`_scheduler_pool`):
+   - Max workers: 4
+   - Purpose: Orchestrates background task execution
+   - Runs `run_task()` function that manages task lifecycle
+
+2. **Execution Pool** (`_execution_pool`):
+   - Max workers: 8 (larger to avoid blocking)
+   - Purpose: Actual subagent execution with timeout support
+   - Runs `execute()` method that invokes the agent
+
+**How it works**:
+```python
+# In execute_async():
+_scheduler_pool.submit(run_task)  # Submit orchestration task
+
+# In run_task():
+future = _execution_pool.submit(self.execute, task)  # Submit execution
+exec_result = future.result(timeout=timeout_seconds)  # Wait with timeout
+```
+
+**Benefits**:
+- ✅ Clean separation of concerns (scheduling vs execution)
+- ✅ No nested thread pools
+- ✅ Timeout enforcement at the right level
+- ✅ Better resource utilization
+
+**Two-Level Timeout Protection**:
+1. **Execution Timeout**: Subagent execution itself has a 5-minute timeout (configurable in SubagentConfig)
+2. **Polling Timeout**: Tool polling has a 5-minute timeout (30 polls × 10 seconds)
+
+This ensures that even if subagent execution hangs, the system won't wait indefinitely.
+
+### Benefits
+
+1. **Reduced API Costs**: No more repeated LLM requests for polling
+2. **Simpler UX**: LLM doesn't need to manage polling logic
+3. **Better Reliability**: Backend handles all status checking consistently
+4. **Timeout Protection**: Two-level timeout prevents infinite waiting (execution + polling)
+
+## Testing
+
+To verify the changes work correctly:
+
+1. Start a subagent task that takes a few seconds
+2. Verify the tool call blocks until completion
+3. Verify the result is returned directly
+4. Verify no `task_status` calls are made
+
+Example test scenario:
+```python
+# This should block for ~10 seconds then return result
+result = task(
+    subagent_type="bash",
+    prompt="sleep 10 && echo 'Done'",
+    description="Test task"
+)
+# result should contain "Done"
+```
+
+## Migration Notes
+
+For users/code that previously used `run_in_background=True`:
+- Simply remove the parameter
+- Remove any polling logic
+- The tool will automatically wait for completion
+
+No other changes needed - the API is backward compatible (minus the removed parameter).
@@ -0,0 +1,10 @@
+{
+  "$schema": "https://langgra.ph/schema.json",
+  "dependencies": [
+    "."
+  ],
+  "env": ".env",
+  "graphs": {
+    "lead_agent": "src.agents:make_lead_agent"
+  }
+}
@@ -0,0 +1,38 @@
+[project]
+name = "deer-flow"
+version = "0.1.0"
+description = "LangGraph-based AI agent system with sandbox execution capabilities"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "agent-sandbox>=0.0.19",
+    "dotenv>=0.9.9",
+    "fastapi>=0.115.0",
+    "httpx>=0.28.0",
+    "kubernetes>=30.0.0",
+    "langchain>=1.2.3",
+    "langchain-deepseek>=1.0.1",
+    "langchain-mcp-adapters>=0.1.0",
+    "langchain-openai>=1.1.7",
+    "langgraph>=1.0.6",
+    "langgraph-api>=0.7.0,<0.8.0",
+    "langgraph-cli>=0.4.14",
+    "langgraph-runtime-inmem>=0.22.1",
+    "markdownify>=1.2.2",
+    "markitdown[all,xlsx]>=0.0.1a2",
+    "pydantic>=2.12.5",
+    "python-multipart>=0.0.20",
+    "pyyaml>=6.0.3",
+    "readabilipy>=0.3.0",
+    "sse-starlette>=2.1.0",
+    "tavily-python>=0.7.17",
+    "firecrawl-py>=1.15.0",
+    "tiktoken>=0.8.0",
+    "uvicorn[standard]>=0.34.0",
+    "ddgs>=9.10.0",
+    "duckdb>=1.4.4",
+    "langchain-google-genai>=4.2.1",
+]
+
+[dependency-groups]
+dev = ["pytest>=8.0.0", "ruff>=0.14.11"]
@@ -0,0 +1,10 @@
+line-length = 240
+target-version = "py312"
+
+[lint]
+select = ["E", "F", "I", "UP"]
+ignore = []
+
+[format]
+quote-style = "double"
+indent-style = "space"
@@ -0,0 +1,4 @@
+from .lead_agent import make_lead_agent
+from .thread_state import SandboxState, ThreadState
+
+__all__ = ["make_lead_agent", "SandboxState", "ThreadState"]
@@ -0,0 +1,3 @@
+from .agent import make_lead_agent
+
+__all__ = ["make_lead_agent"]
@@ -0,0 +1,331 @@
+import logging
+
+from langchain.agents import create_agent
+from langchain.agents.middleware import SummarizationMiddleware, TodoListMiddleware
+from langchain_core.runnables import RunnableConfig
+
+from src.agents.lead_agent.prompt import apply_prompt_template
+from src.agents.middlewares.clarification_middleware import ClarificationMiddleware
+from src.agents.middlewares.dangling_tool_call_middleware import DanglingToolCallMiddleware
+from src.agents.middlewares.memory_middleware import MemoryMiddleware
+from src.agents.middlewares.subagent_limit_middleware import SubagentLimitMiddleware
+from src.agents.middlewares.thread_data_middleware import ThreadDataMiddleware
+from src.agents.middlewares.title_middleware import TitleMiddleware
+from src.agents.middlewares.uploads_middleware import UploadsMiddleware
+from src.agents.middlewares.view_image_middleware import ViewImageMiddleware
+from src.agents.thread_state import ThreadState
+from src.config.agents_config import load_agent_config
+from src.config.app_config import get_app_config
+from src.config.summarization_config import get_summarization_config
+from src.models import create_chat_model
+from src.sandbox.middleware import SandboxMiddleware
+
+logger = logging.getLogger(__name__)
+
+
+def _resolve_model_name(requested_model_name: str | None = None) -> str:
+    """Resolve a runtime model name safely, falling back to default if invalid. Returns None if no models are configured."""
+    app_config = get_app_config()
+    default_model_name = app_config.models[0].name if app_config.models else None
+    if default_model_name is None:
+        raise ValueError("No chat models are configured. Please configure at least one model in config.yaml.")
+
+    if requested_model_name and app_config.get_model_config(requested_model_name):
+        return requested_model_name
+
+    if requested_model_name and requested_model_name != default_model_name:
+        logger.warning(f"Model '{requested_model_name}' not found in config; fallback to default model '{default_model_name}'.")
+    return default_model_name
+
+
+def _create_summarization_middleware() -> SummarizationMiddleware | None:
+    """Create and configure the summarization middleware from config."""
+    config = get_summarization_config()
+
+    if not config.enabled:
+        return None
+
+    # Prepare trigger parameter
+    trigger = None
+    if config.trigger is not None:
+        if isinstance(config.trigger, list):
+            trigger = [t.to_tuple() for t in config.trigger]
+        else:
+            trigger = config.trigger.to_tuple()
+
+    # Prepare keep parameter
+    keep = config.keep.to_tuple()
+
+    # Prepare model parameter
+    if config.model_name:
+        model = config.model_name
+    else:
+        # Use a lightweight model for summarization to save costs
+        # Falls back to default model if not explicitly specified
+        model = create_chat_model(thinking_enabled=False)
+
+    # Prepare kwargs
+    kwargs = {
+        "model": model,
+        "trigger": trigger,
+        "keep": keep,
+    }
+
+    if config.trim_tokens_to_summarize is not None:
+        kwargs["trim_tokens_to_summarize"] = config.trim_tokens_to_summarize
+
+    if config.summary_prompt is not None:
+        kwargs["summary_prompt"] = config.summary_prompt
+
+    return SummarizationMiddleware(**kwargs)
+
+
+def _create_todo_list_middleware(is_plan_mode: bool) -> TodoListMiddleware | None:
+    """Create and configure the TodoList middleware.
+
+    Args:
+        is_plan_mode: Whether to enable plan mode with TodoList middleware.
+
+    Returns:
+        TodoListMiddleware instance if plan mode is enabled, None otherwise.
+    """
+    if not is_plan_mode:
+        return None
+
+    # Custom prompts matching DeerFlow's style
+    system_prompt = """
+<todo_list_system>
+You have access to the `write_todos` tool to help you manage and track complex multi-step objectives.
+
+**CRITICAL RULES:**
+- Mark todos as completed IMMEDIATELY after finishing each step - do NOT batch completions
+- Keep EXACTLY ONE task as `in_progress` at any time (unless tasks can run in parallel)
+- Update the todo list in REAL-TIME as you work - this gives users visibility into your progress
+- DO NOT use this tool for simple tasks (< 3 steps) - just complete them directly
+
+**When to Use:**
+This tool is designed for complex objectives that require systematic tracking:
+- Complex multi-step tasks requiring 3+ distinct steps
+- Non-trivial tasks needing careful planning and execution
+- User explicitly requests a todo list
+- User provides multiple tasks (numbered or comma-separated list)
+- The plan may need revisions based on intermediate results
+
+**When NOT to Use:**
+- Single, straightforward tasks
+- Trivial tasks (< 3 steps)
+- Purely conversational or informational requests
+- Simple tool calls where the approach is obvious
+
+**Best Practices:**
+- Break down complex tasks into smaller, actionable steps
+- Use clear, descriptive task names
+- Remove tasks that become irrelevant
+- Add new tasks discovered during implementation
+- Don't be afraid to revise the todo list as you learn more
+
+**Task Management:**
+Writing todos takes time and tokens - use it when helpful for managing complex problems, not for simple requests.
+</todo_list_system>
+"""
+
+    tool_description = """Use this tool to create and manage a structured task list for complex work sessions.
+
+**IMPORTANT: Only use this tool for complex tasks (3+ steps). For simple requests, just do the work directly.**
+
+## When to Use
+
+Use this tool in these scenarios:
+1. **Complex multi-step tasks**: When a task requires 3 or more distinct steps or actions
+2. **Non-trivial tasks**: Tasks requiring careful planning or multiple operations
+3. **User explicitly requests todo list**: When the user directly asks you to track tasks
+4. **Multiple tasks**: When users provide a list of things to be done
+5. **Dynamic planning**: When the plan may need updates based on intermediate results
+
+## When NOT to Use
+
+Skip this tool when:
+1. The task is straightforward and takes less than 3 steps
+2. The task is trivial and tracking provides no benefit
+3. The task is purely conversational or informational
+4. It's clear what needs to be done and you can just do it
+
+## How to Use
+
+1. **Starting a task**: Mark it as `in_progress` BEFORE beginning work
+2. **Completing a task**: Mark it as `completed` IMMEDIATELY after finishing
+3. **Updating the list**: Add new tasks, remove irrelevant ones, or update descriptions as needed
+4. **Multiple updates**: You can make several updates at once (e.g., complete one task and start the next)
+
+## Task States
+
+- `pending`: Task not yet started
+- `in_progress`: Currently working on (can have multiple if tasks run in parallel)
+- `completed`: Task finished successfully
+
+## Task Completion Requirements
+
+**CRITICAL: Only mark a task as completed when you have FULLY accomplished it.**
+
+Never mark a task as completed if:
+- There are unresolved issues or errors
+- Work is partial or incomplete
+- You encountered blockers preventing completion
+- You couldn't find necessary resources or dependencies
+- Quality standards haven't been met
+
+If blocked, keep the task as `in_progress` and create a new task describing what needs to be resolved.
+
+## Best Practices
+
+- Create specific, actionable items
+- Break complex tasks into smaller, manageable steps
+- Use clear, descriptive task names
+- Update task status in real-time as you work
+- Mark tasks complete IMMEDIATELY after finishing (don't batch completions)
+- Remove tasks that are no longer relevant
+- **IMPORTANT**: When you write the todo list, mark your first task(s) as `in_progress` immediately
+- **IMPORTANT**: Unless all tasks are completed, always have at least one task `in_progress` to show progress
+
+Being proactive with task management demonstrates thoroughness and ensures all requirements are completed successfully.
+
+**Remember**: If you only need a few tool calls to complete a task and it's clear what to do, it's better to just do the task directly and NOT use this tool at all.
+"""
+
+    return TodoListMiddleware(system_prompt=system_prompt, tool_description=tool_description)
+
+
+# ThreadDataMiddleware must be before SandboxMiddleware to ensure thread_id is available
+# UploadsMiddleware should be after ThreadDataMiddleware to access thread_id
+# DanglingToolCallMiddleware patches missing ToolMessages before model sees the history
+# SummarizationMiddleware should be early to reduce context before other processing
+# TodoListMiddleware should be before ClarificationMiddleware to allow todo management
+# TitleMiddleware generates title after first exchange
+# MemoryMiddleware queues conversation for memory update (after TitleMiddleware)
+# ViewImageMiddleware should be before ClarificationMiddleware to inject image details before LLM
+# ClarificationMiddleware should be last to intercept clarification requests after model calls
+def _build_middlewares(config: RunnableConfig, model_name: str | None, agent_name: str | None = None):
+    """Build middleware chain based on runtime configuration.
+
+    Args:
+        config: Runtime configuration containing configurable options like is_plan_mode.
+        agent_name: If provided, MemoryMiddleware will use per-agent memory storage.
+
+    Returns:
+        List of middleware instances.
+    """
+    middlewares = [ThreadDataMiddleware(), UploadsMiddleware(), SandboxMiddleware(), DanglingToolCallMiddleware()]
+
+    # Add summarization middleware if enabled
+    summarization_middleware = _create_summarization_middleware()
+    if summarization_middleware is not None:
+        middlewares.append(summarization_middleware)
+
+    # Add TodoList middleware if plan mode is enabled
+    is_plan_mode = config.get("configurable", {}).get("is_plan_mode", False)
+    todo_list_middleware = _create_todo_list_middleware(is_plan_mode)
+    if todo_list_middleware is not None:
+        middlewares.append(todo_list_middleware)
+
+    # Add TitleMiddleware
+    middlewares.append(TitleMiddleware())
+
+    # Add MemoryMiddleware (after TitleMiddleware)
+    middlewares.append(MemoryMiddleware(agent_name=agent_name))
+
+    # Add ViewImageMiddleware only if the current model supports vision.
+    # Use the resolved runtime model_name from make_lead_agent to avoid stale config values.
+    app_config = get_app_config()
+    model_config = app_config.get_model_config(model_name) if model_name else None
+    if model_config is not None and model_config.supports_vision:
+        middlewares.append(ViewImageMiddleware())
+
+    # Add SubagentLimitMiddleware to truncate excess parallel task calls
+    subagent_enabled = config.get("configurable", {}).get("subagent_enabled", False)
+    if subagent_enabled:
+        max_concurrent_subagents = config.get("configurable", {}).get("max_concurrent_subagents", 3)
+        middlewares.append(SubagentLimitMiddleware(max_concurrent=max_concurrent_subagents))
+
+    # ClarificationMiddleware should always be last
+    middlewares.append(ClarificationMiddleware())
+    return middlewares
+
+
+def make_lead_agent(config: RunnableConfig):
+    # Lazy import to avoid circular dependency
+    from src.tools import get_available_tools
+    from src.tools.builtins import setup_agent
+
+    cfg = config.get("configurable", {})
+
+    thinking_enabled = cfg.get("thinking_enabled", True)
+    reasoning_effort = cfg.get("reasoning_effort", None)
+    requested_model_name: str | None = cfg.get("model_name") or cfg.get("model")
+    is_plan_mode = cfg.get("is_plan_mode", False)
+    subagent_enabled = cfg.get("subagent_enabled", False)
+    max_concurrent_subagents = cfg.get("max_concurrent_subagents", 3)
+    is_bootstrap = cfg.get("is_bootstrap", False)
+    agent_name = cfg.get("agent_name")
+
+    agent_config = load_agent_config(agent_name) if not is_bootstrap else None
+    # Custom agent model or fallback to global/default model resolution
+    agent_model_name = agent_config.model if agent_config and agent_config.model else _resolve_model_name()
+
+    # Final model name resolution with request override, then agent config, then global default
+    model_name = requested_model_name or agent_model_name
+
+    app_config = get_app_config()
+    model_config = app_config.get_model_config(model_name) if model_name else None
+
+    if model_config is None:
+        raise ValueError("No chat model could be resolved. Please configure at least one model in config.yaml or provide a valid 'model_name'/'model' in the request.")
+    if thinking_enabled and not model_config.supports_thinking:
+        logger.warning(f"Thinking mode is enabled but model '{model_name}' does not support it; fallback to non-thinking mode.")
+        thinking_enabled = False
+
+    logger.info(
+        "Create Agent(%s) -> thinking_enabled: %s, reasoning_effort: %s, model_name: %s, is_plan_mode: %s, subagent_enabled: %s, max_concurrent_subagents: %s",
+        agent_name or "default",
+        thinking_enabled,
+        reasoning_effort,
+        model_name,
+        is_plan_mode,
+        subagent_enabled,
+        max_concurrent_subagents,
+    )
+
+    # Inject run metadata for LangSmith trace tagging
+    if "metadata" not in config:
+        config["metadata"] = {}
+
+    config["metadata"].update(
+        {
+            "agent_name": agent_name or "default",
+            "model_name": model_name or "default",
+            "thinking_enabled": thinking_enabled,
+            "reasoning_effort": reasoning_effort,
+            "is_plan_mode": is_plan_mode,
+            "subagent_enabled": subagent_enabled,
+        }
+    )
+
+    if is_bootstrap:
+        # Special bootstrap agent with minimal prompt for initial custom agent creation flow
+        system_prompt = apply_prompt_template(subagent_enabled=subagent_enabled, max_concurrent_subagents=max_concurrent_subagents, available_skills=set(["bootstrap"]))
+
+        return create_agent(
+            model=create_chat_model(name=model_name, thinking_enabled=thinking_enabled),
+            tools=get_available_tools(model_name=model_name, subagent_enabled=subagent_enabled) + [setup_agent],
+            middleware=_build_middlewares(config, model_name=model_name),
+            system_prompt=system_prompt,
+            state_schema=ThreadState,
+        )
+
+    # Default lead agent (unchanged behavior)
+    return create_agent(
+        model=create_chat_model(name=model_name, thinking_enabled=thinking_enabled, reasoning_effort=reasoning_effort),
+        tools=get_available_tools(model_name=model_name, groups=agent_config.tool_groups if agent_config else None, subagent_enabled=subagent_enabled),
+        middleware=_build_middlewares(config, model_name=model_name, agent_name=agent_name),
+        system_prompt=apply_prompt_template(subagent_enabled=subagent_enabled, max_concurrent_subagents=max_concurrent_subagents, agent_name=agent_name),
+        state_schema=ThreadState,
+    )
@@ -0,0 +1,409 @@
+from datetime import datetime
+
+from src.config.agents_config import load_agent_soul
+from src.skills import load_skills
+
+
+def _build_subagent_section(max_concurrent: int) -> str:
+    """Build the subagent system prompt section with dynamic concurrency limit.
+
+    Args:
+        max_concurrent: Maximum number of concurrent subagent calls allowed per response.
+
+    Returns:
+        Formatted subagent section string.
+    """
+    n = max_concurrent
+    return f"""<subagent_system>
+**🚀 SUBAGENT MODE ACTIVE - DECOMPOSE, DELEGATE, SYNTHESIZE**
+
+You are running with subagent capabilities enabled. Your role is to be a **task orchestrator**:
+1. **DECOMPOSE**: Break complex tasks into parallel sub-tasks
+2. **DELEGATE**: Launch multiple subagents simultaneously using parallel `task` calls
+3. **SYNTHESIZE**: Collect and integrate results into a coherent answer
+
+**CORE PRINCIPLE: Complex tasks should be decomposed and distributed across multiple subagents for parallel execution.**
+
+**⛔ HARD CONCURRENCY LIMIT: MAXIMUM {n} `task` CALLS PER RESPONSE. THIS IS NOT OPTIONAL.**
+- Each response, you may include **at most {n}** `task` tool calls. Any excess calls are **silently discarded** by the system — you will lose that work.
+- **Before launching subagents, you MUST count your sub-tasks in your thinking:**
+  - If count ≤ {n}: Launch all in this response.
+  - If count > {n}: **Pick the {n} most important/foundational sub-tasks for this turn.** Save the rest for the next turn.
+- **Multi-batch execution** (for >{n} sub-tasks):
+  - Turn 1: Launch sub-tasks 1-{n} in parallel → wait for results
+  - Turn 2: Launch next batch in parallel → wait for results
+  - ... continue until all sub-tasks are complete
+  - Final turn: Synthesize ALL results into a coherent answer
+- **Example thinking pattern**: "I identified 6 sub-tasks. Since the limit is {n} per turn, I will launch the first {n} now, and the rest in the next turn."
+
+**Available Subagents:**
+- **general-purpose**: For ANY non-trivial task - web research, code exploration, file operations, analysis, etc.
+- **bash**: For command execution (git, build, test, deploy operations)
+
+**Your Orchestration Strategy:**
+
+✅ **DECOMPOSE + PARALLEL EXECUTION (Preferred Approach):**
+
+For complex queries, break them down into focused sub-tasks and execute in parallel batches (max {n} per turn):
+
+**Example 1: "Why is Tencent's stock price declining?" (3 sub-tasks → 1 batch)**
+→ Turn 1: Launch 3 subagents in parallel:
+- Subagent 1: Recent financial reports, earnings data, and revenue trends
+- Subagent 2: Negative news, controversies, and regulatory issues
+- Subagent 3: Industry trends, competitor performance, and market sentiment
+→ Turn 2: Synthesize results
+
+**Example 2: "Compare 5 cloud providers" (5 sub-tasks → multi-batch)**
+→ Turn 1: Launch {n} subagents in parallel (first batch)
+→ Turn 2: Launch remaining subagents in parallel
+→ Final turn: Synthesize ALL results into comprehensive comparison
+
+**Example 3: "Refactor the authentication system"**
+→ Turn 1: Launch 3 subagents in parallel:
+- Subagent 1: Analyze current auth implementation and technical debt
+- Subagent 2: Research best practices and security patterns
+- Subagent 3: Review related tests, documentation, and vulnerabilities
+→ Turn 2: Synthesize results
+
+✅ **USE Parallel Subagents (max {n} per turn) when:**
+- **Complex research questions**: Requires multiple information sources or perspectives
+- **Multi-aspect analysis**: Task has several independent dimensions to explore
+- **Large codebases**: Need to analyze different parts simultaneously
+- **Comprehensive investigations**: Questions requiring thorough coverage from multiple angles
+
+❌ **DO NOT use subagents (execute directly) when:**
+- **Task cannot be decomposed**: If you can't break it into 2+ meaningful parallel sub-tasks, execute directly
+- **Ultra-simple actions**: Read one file, quick edits, single commands
+- **Need immediate clarification**: Must ask user before proceeding
+- **Meta conversation**: Questions about conversation history
+- **Sequential dependencies**: Each step depends on previous results (do steps yourself sequentially)
+
+**CRITICAL WORKFLOW** (STRICTLY follow this before EVERY action):
+1. **COUNT**: In your thinking, list all sub-tasks and count them explicitly: "I have N sub-tasks"
+2. **PLAN BATCHES**: If N > {n}, explicitly plan which sub-tasks go in which batch:
+   - "Batch 1 (this turn): first {n} sub-tasks"
+   - "Batch 2 (next turn): next batch of sub-tasks"
+3. **EXECUTE**: Launch ONLY the current batch (max {n} `task` calls). Do NOT launch sub-tasks from future batches.
+4. **REPEAT**: After results return, launch the next batch. Continue until all batches complete.
+5. **SYNTHESIZE**: After ALL batches are done, synthesize all results.
+6. **Cannot decompose** → Execute directly using available tools (bash, read_file, web_search, etc.)
+
+**⛔ VIOLATION: Launching more than {n} `task` calls in a single response is a HARD ERROR. The system WILL discard excess calls and you WILL lose work. Always batch.**
+
+**Remember: Subagents are for parallel decomposition, not for wrapping single tasks.**
+
+**How It Works:**
+- The task tool runs subagents asynchronously in the background
+- The backend automatically polls for completion (you don't need to poll)
+- The tool call will block until the subagent completes its work
+- Once complete, the result is returned to you directly
+
+**Usage Example 1 - Single Batch (≤{n} sub-tasks):**
+
+```python
+# User asks: "Why is Tencent's stock price declining?"
+# Thinking: 3 sub-tasks → fits in 1 batch
+
+# Turn 1: Launch 3 subagents in parallel
+task(description="Tencent financial data", prompt="...", subagent_type="general-purpose")
+task(description="Tencent news & regulation", prompt="...", subagent_type="general-purpose")
+task(description="Industry & market trends", prompt="...", subagent_type="general-purpose")
+# All 3 run in parallel → synthesize results
+```
+
+**Usage Example 2 - Multiple Batches (>{n} sub-tasks):**
+
+```python
+# User asks: "Compare AWS, Azure, GCP, Alibaba Cloud, and Oracle Cloud"
+# Thinking: 5 sub-tasks → need multiple batches (max {n} per batch)
+
+# Turn 1: Launch first batch of {n}
+task(description="AWS analysis", prompt="...", subagent_type="general-purpose")
+task(description="Azure analysis", prompt="...", subagent_type="general-purpose")
+task(description="GCP analysis", prompt="...", subagent_type="general-purpose")
+
+# Turn 2: Launch remaining batch (after first batch completes)
+task(description="Alibaba Cloud analysis", prompt="...", subagent_type="general-purpose")
+task(description="Oracle Cloud analysis", prompt="...", subagent_type="general-purpose")
+
+# Turn 3: Synthesize ALL results from both batches
+```
+
+**Counter-Example - Direct Execution (NO subagents):**
+
+```python
+# User asks: "Run the tests"
+# Thinking: Cannot decompose into parallel sub-tasks
+# → Execute directly
+
+bash("npm test")  # Direct execution, not task()
+```
+
+**CRITICAL**:
+- **Max {n} `task` calls per turn** - the system enforces this, excess calls are discarded
+- Only use `task` when you can launch 2+ subagents in parallel
+- Single task = No value from subagents = Execute directly
+- For >{n} sub-tasks, use sequential batches of {n} across multiple turns
+</subagent_system>"""
+
+
+SYSTEM_PROMPT_TEMPLATE = """
+<role>
+You are {agent_name}, an open-source super agent.
+</role>
+
+{soul}
+{memory_context}
+
+<thinking_style>
+- Think concisely and strategically about the user's request BEFORE taking action
+- Break down the task: What is clear? What is ambiguous? What is missing?
+- **PRIORITY CHECK: If anything is unclear, missing, or has multiple interpretations, you MUST ask for clarification FIRST - do NOT proceed with work**
+{subagent_thinking}- Never write down your full final answer or report in thinking process, but only outline
+- CRITICAL: After thinking, you MUST provide your actual response to the user. Thinking is for planning, the response is for delivery.
+- Your response must contain the actual answer, not just a reference to what you thought about
+</thinking_style>
+
+<clarification_system>
+**WORKFLOW PRIORITY: CLARIFY → PLAN → ACT**
+1. **FIRST**: Analyze the request in your thinking - identify what's unclear, missing, or ambiguous
+2. **SECOND**: If clarification is needed, call `ask_clarification` tool IMMEDIATELY - do NOT start working
+3. **THIRD**: Only after all clarifications are resolved, proceed with planning and execution
+
+**CRITICAL RULE: Clarification ALWAYS comes BEFORE action. Never start working and clarify mid-execution.**
+
+**MANDATORY Clarification Scenarios - You MUST call ask_clarification BEFORE starting work when:**
+
+1. **Missing Information** (`missing_info`): Required details not provided
+   - Example: User says "create a web scraper" but doesn't specify the target website
+   - Example: "Deploy the app" without specifying environment
+   - **REQUIRED ACTION**: Call ask_clarification to get the missing information
+
+2. **Ambiguous Requirements** (`ambiguous_requirement`): Multiple valid interpretations exist
+   - Example: "Optimize the code" could mean performance, readability, or memory usage
+   - Example: "Make it better" is unclear what aspect to improve
+   - **REQUIRED ACTION**: Call ask_clarification to clarify the exact requirement
+
+3. **Approach Choices** (`approach_choice`): Several valid approaches exist
+   - Example: "Add authentication" could use JWT, OAuth, session-based, or API keys
+   - Example: "Store data" could use database, files, cache, etc.
+   - **REQUIRED ACTION**: Call ask_clarification to let user choose the approach
+
+4. **Risky Operations** (`risk_confirmation`): Destructive actions need confirmation
+   - Example: Deleting files, modifying production configs, database operations
+   - Example: Overwriting existing code or data
+   - **REQUIRED ACTION**: Call ask_clarification to get explicit confirmation
+
+5. **Suggestions** (`suggestion`): You have a recommendation but want approval
+   - Example: "I recommend refactoring this code. Should I proceed?"
+   - **REQUIRED ACTION**: Call ask_clarification to get approval
+
+**STRICT ENFORCEMENT:**
+- ❌ DO NOT start working and then ask for clarification mid-execution - clarify FIRST
+- ❌ DO NOT skip clarification for "efficiency" - accuracy matters more than speed
+- ❌ DO NOT make assumptions when information is missing - ALWAYS ask
+- ❌ DO NOT proceed with guesses - STOP and call ask_clarification first
+- ✅ Analyze the request in thinking → Identify unclear aspects → Ask BEFORE any action
+- ✅ If you identify the need for clarification in your thinking, you MUST call the tool IMMEDIATELY
+- ✅ After calling ask_clarification, execution will be interrupted automatically
+- ✅ Wait for user response - do NOT continue with assumptions
+
+**How to Use:**
+```python
+ask_clarification(
+    question="Your specific question here?",
+    clarification_type="missing_info",  # or other type
+    context="Why you need this information",  # optional but recommended
+    options=["option1", "option2"]  # optional, for choices
+)
+```
+
+**Example:**
+User: "Deploy the application"
+You (thinking): Missing environment info - I MUST ask for clarification
+You (action): ask_clarification(
+    question="Which environment should I deploy to?",
+    clarification_type="approach_choice",
+    context="I need to know the target environment for proper configuration",
+    options=["development", "staging", "production"]
+)
+[Execution stops - wait for user response]
+
+User: "staging"
+You: "Deploying to staging..." [proceed]
+</clarification_system>
+
+{skills_section}
+
+{subagent_section}
+
+<working_directory existed="true">
+- User uploads: `/mnt/user-data/uploads` - Files uploaded by the user (automatically listed in context)
+- User workspace: `/mnt/user-data/workspace` - Working directory for temporary files
+- Output files: `/mnt/user-data/outputs` - Final deliverables must be saved here
+
+**File Management:**
+- Uploaded files are automatically listed in the <uploaded_files> section before each request
+- Use `read_file` tool to read uploaded files using their paths from the list
+- For PDF, PPT, Excel, and Word files, converted Markdown versions (*.md) are available alongside originals
+- All temporary work happens in `/mnt/user-data/workspace`
+- Final deliverables must be copied to `/mnt/user-data/outputs` and presented using `present_file` tool
+</working_directory>
+
+<response_style>
+- Clear and Concise: Avoid over-formatting unless requested
+- Natural Tone: Use paragraphs and prose, not bullet points by default
+- Action-Oriented: Focus on delivering results, not explaining processes
+</response_style>
+
+<citations>
+- When to Use: After web_search, include citations if applicable
+- Format: Use Markdown link format `[citation:TITLE](URL)`
+- Example: 
+```markdown
+The key AI trends for 2026 include enhanced reasoning capabilities and multimodal integration
+[citation:AI Trends 2026](https://techcrunch.com/ai-trends).
+Recent breakthroughs in language models have also accelerated progress
+[citation:OpenAI Research](https://openai.com/research).
+```
+</citations>
+
+<critical_reminders>
+- **Clarification First**: ALWAYS clarify unclear/missing/ambiguous requirements BEFORE starting work - never assume or guess
+{subagent_reminder}- Skill First: Always load the relevant skill before starting **complex** tasks.
+- Progressive Loading: Load resources incrementally as referenced in skills
+- Output Files: Final deliverables must be in `/mnt/user-data/outputs`
+- Clarity: Be direct and helpful, avoid unnecessary meta-commentary
+- Including Images and Mermaid: Images and Mermaid diagrams are always welcomed in the Markdown format, and you're encouraged to use `![Image Description](image_path)\n\n` or "```mermaid" to display images in response or Markdown files
+- Multi-task: Better utilize parallel tool calling to call multiple tools at one time for better performance
+- Language Consistency: Keep using the same language as user's
+- Always Respond: Your thinking is internal. You MUST always provide a visible response to the user after thinking.
+</critical_reminders>
+"""
+
+
+def _get_memory_context(agent_name: str | None = None) -> str:
+    """Get memory context for injection into system prompt.
+
+    Args:
+        agent_name: If provided, loads per-agent memory. If None, loads global memory.
+
+    Returns:
+        Formatted memory context string wrapped in XML tags, or empty string if disabled.
+    """
+    try:
+        from src.agents.memory import format_memory_for_injection, get_memory_data
+        from src.config.memory_config import get_memory_config
+
+        config = get_memory_config()
+        if not config.enabled or not config.injection_enabled:
+            return ""
+
+        memory_data = get_memory_data(agent_name)
+        memory_content = format_memory_for_injection(memory_data, max_tokens=config.max_injection_tokens)
+
+        if not memory_content.strip():
+            return ""
+
+        return f"""<memory>
+{memory_content}
+</memory>
+"""
+    except Exception as e:
+        print(f"Failed to load memory context: {e}")
+        return ""
+
+
+def get_skills_prompt_section(available_skills: set[str] | None = None) -> str:
+    """Generate the skills prompt section with available skills list.
+
+    Returns the <skill_system>...</skill_system> block listing all enabled skills,
+    suitable for injection into any agent's system prompt.
+    """
+    skills = load_skills(enabled_only=True)
+
+    try:
+        from src.config import get_app_config
+
+        config = get_app_config()
+        container_base_path = config.skills.container_path
+    except Exception:
+        container_base_path = "/mnt/skills"
+
+    if not skills:
+        return ""
+
+    if available_skills is not None:
+        skills = [skill for skill in skills if skill.name in available_skills]
+
+    skill_items = "\n".join(
+        f"    <skill>\n        <name>{skill.name}</name>\n        <description>{skill.description}</description>\n        <location>{skill.get_container_file_path(container_base_path)}</location>\n    </skill>" for skill in skills
+    )
+    skills_list = f"<available_skills>\n{skill_items}\n</available_skills>"
+
+    return f"""<skill_system>
+You have access to skills that provide optimized workflows for specific tasks. Each skill contains best practices, frameworks, and references to additional resources.
+
+**Progressive Loading Pattern:**
+1. When a user query matches a skill's use case, immediately call `read_file` on the skill's main file using the path attribute provided in the skill tag below
+2. Read and understand the skill's workflow and instructions
+3. The skill file contains references to external resources under the same folder
+4. Load referenced resources only when needed during execution
+5. Follow the skill's instructions precisely
+
+**Skills are located at:** {container_base_path}
+
+{skills_list}
+
+</skill_system>"""
+
+
+def get_agent_soul(agent_name: str | None) -> str:
+    # Append SOUL.md (agent personality) if present
+    soul = load_agent_soul(agent_name)
+    if soul:
+        return f"<soul>\n{soul}\n</soul>\n" if soul else ""
+    return ""
+
+
+def apply_prompt_template(subagent_enabled: bool = False, max_concurrent_subagents: int = 3, *, agent_name: str | None = None, available_skills: set[str] | None = None) -> str:
+    # Get memory context
+    memory_context = _get_memory_context(agent_name)
+
+    # Include subagent section only if enabled (from runtime parameter)
+    n = max_concurrent_subagents
+    subagent_section = _build_subagent_section(n) if subagent_enabled else ""
+
+    # Add subagent reminder to critical_reminders if enabled
+    subagent_reminder = (
+        "- **Orchestrator Mode**: You are a task orchestrator - decompose complex tasks into parallel sub-tasks. "
+        f"**HARD LIMIT: max {n} `task` calls per response.** "
+        f"If >{n} sub-tasks, split into sequential batches of ≤{n}. Synthesize after ALL batches complete.\n"
+        if subagent_enabled
+        else ""
+    )
+
+    # Add subagent thinking guidance if enabled
+    subagent_thinking = (
+        "- **DECOMPOSITION CHECK: Can this task be broken into 2+ parallel sub-tasks? If YES, COUNT them. "
+        f"If count > {n}, you MUST plan batches of ≤{n} and only launch the FIRST batch now. "
+        f"NEVER launch more than {n} `task` calls in one response.**\n"
+        if subagent_enabled
+        else ""
+    )
+
+    # Get skills section
+    skills_section = get_skills_prompt_section(available_skills)
+
+    # Format the prompt with dynamic skills and memory
+    prompt = SYSTEM_PROMPT_TEMPLATE.format(
+        agent_name=agent_name or "DeerFlow 2.0",
+        soul=get_agent_soul(agent_name),
+        skills_section=skills_section,
+        memory_context=memory_context,
+        subagent_section=subagent_section,
+        subagent_reminder=subagent_reminder,
+        subagent_thinking=subagent_thinking,
+    )
+
+    return prompt + f"\n<current_date>{datetime.now().strftime('%Y-%m-%d, %A')}</current_date>"
@@ -0,0 +1,44 @@
+"""Memory module for DeerFlow.
+
+This module provides a global memory mechanism that:
+- Stores user context and conversation history in memory.json
+- Uses LLM to summarize and extract facts from conversations
+- Injects relevant memory into system prompts for personalized responses
+"""
+
+from src.agents.memory.prompt import (
+    FACT_EXTRACTION_PROMPT,
+    MEMORY_UPDATE_PROMPT,
+    format_conversation_for_update,
+    format_memory_for_injection,
+)
+from src.agents.memory.queue import (
+    ConversationContext,
+    MemoryUpdateQueue,
+    get_memory_queue,
+    reset_memory_queue,
+)
+from src.agents.memory.updater import (
+    MemoryUpdater,
+    get_memory_data,
+    reload_memory_data,
+    update_memory_from_conversation,
+)
+
+__all__ = [
+    # Prompt utilities
+    "MEMORY_UPDATE_PROMPT",
+    "FACT_EXTRACTION_PROMPT",
+    "format_memory_for_injection",
+    "format_conversation_for_update",
+    # Queue
+    "ConversationContext",
+    "MemoryUpdateQueue",
+    "get_memory_queue",
+    "reset_memory_queue",
+    # Updater
+    "MemoryUpdater",
+    "get_memory_data",
+    "reload_memory_data",
+    "update_memory_from_conversation",
+]
@@ -0,0 +1,275 @@
+"""Prompt templates for memory update and injection."""
+
+import re
+from typing import Any
+
+try:
+    import tiktoken
+
+    TIKTOKEN_AVAILABLE = True
+except ImportError:
+    TIKTOKEN_AVAILABLE = False
+
+# Prompt template for updating memory based on conversation
+MEMORY_UPDATE_PROMPT = """You are a memory management system. Your task is to analyze a conversation and update the user's memory profile.
+
+Current Memory State:
+<current_memory>
+{current_memory}
+</current_memory>
+
+New Conversation to Process:
+<conversation>
+{conversation}
+</conversation>
+
+Instructions:
+1. Analyze the conversation for important information about the user
+2. Extract relevant facts, preferences, and context with specific details (numbers, names, technologies)
+3. Update the memory sections as needed following the detailed length guidelines below
+
+Memory Section Guidelines:
+
+**User Context** (Current state - concise summaries):
+- workContext: Professional role, company, key projects, main technologies (2-3 sentences)
+  Example: Core contributor, project names with metrics (16k+ stars), technical stack
+- personalContext: Languages, communication preferences, key interests (1-2 sentences)
+  Example: Bilingual capabilities, specific interest areas, expertise domains
+- topOfMind: Multiple ongoing focus areas and priorities (3-5 sentences, detailed paragraph)
+  Example: Primary project work, parallel technical investigations, ongoing learning/tracking
+  Include: Active implementation work, troubleshooting issues, market/research interests
+  Note: This captures SEVERAL concurrent focus areas, not just one task
+
+**History** (Temporal context - rich paragraphs):
+- recentMonths: Detailed summary of recent activities (4-6 sentences or 1-2 paragraphs)
+  Timeline: Last 1-3 months of interactions
+  Include: Technologies explored, projects worked on, problems solved, interests demonstrated
+- earlierContext: Important historical patterns (3-5 sentences or 1 paragraph)
+  Timeline: 3-12 months ago
+  Include: Past projects, learning journeys, established patterns
+- longTermBackground: Persistent background and foundational context (2-4 sentences)
+  Timeline: Overall/foundational information
+  Include: Core expertise, longstanding interests, fundamental working style
+
+**Facts Extraction**:
+- Extract specific, quantifiable details (e.g., "16k+ GitHub stars", "200+ datasets")
+- Include proper nouns (company names, project names, technology names)
+- Preserve technical terminology and version numbers
+- Categories:
+  * preference: Tools, styles, approaches user prefers/dislikes
+  * knowledge: Specific expertise, technologies mastered, domain knowledge
+  * context: Background facts (job title, projects, locations, languages)
+  * behavior: Working patterns, communication habits, problem-solving approaches
+  * goal: Stated objectives, learning targets, project ambitions
+- Confidence levels:
+  * 0.9-1.0: Explicitly stated facts ("I work on X", "My role is Y")
+  * 0.7-0.8: Strongly implied from actions/discussions
+  * 0.5-0.6: Inferred patterns (use sparingly, only for clear patterns)
+
+**What Goes Where**:
+- workContext: Current job, active projects, primary tech stack
+- personalContext: Languages, personality, interests outside direct work tasks
+- topOfMind: Multiple ongoing priorities and focus areas user cares about recently (gets updated most frequently)
+  Should capture 3-5 concurrent themes: main work, side explorations, learning/tracking interests
+- recentMonths: Detailed account of recent technical explorations and work
+- earlierContext: Patterns from slightly older interactions still relevant
+- longTermBackground: Unchanging foundational facts about the user
+
+**Multilingual Content**:
+- Preserve original language for proper nouns and company names
+- Keep technical terms in their original form (DeepSeek, LangGraph, etc.)
+- Note language capabilities in personalContext
+
+Output Format (JSON):
+{{
+  "user": {{
+    "workContext": {{ "summary": "...", "shouldUpdate": true/false }},
+    "personalContext": {{ "summary": "...", "shouldUpdate": true/false }},
+    "topOfMind": {{ "summary": "...", "shouldUpdate": true/false }}
+  }},
+  "history": {{
+    "recentMonths": {{ "summary": "...", "shouldUpdate": true/false }},
+    "earlierContext": {{ "summary": "...", "shouldUpdate": true/false }},
+    "longTermBackground": {{ "summary": "...", "shouldUpdate": true/false }}
+  }},
+  "newFacts": [
+    {{ "content": "...", "category": "preference|knowledge|context|behavior|goal", "confidence": 0.0-1.0 }}
+  ],
+  "factsToRemove": ["fact_id_1", "fact_id_2"]
+}}
+
+Important Rules:
+- Only set shouldUpdate=true if there's meaningful new information
+- Follow length guidelines: workContext/personalContext are concise (1-3 sentences), topOfMind and history sections are detailed (paragraphs)
+- Include specific metrics, version numbers, and proper nouns in facts
+- Only add facts that are clearly stated (0.9+) or strongly implied (0.7+)
+- Remove facts that are contradicted by new information
+- When updating topOfMind, integrate new focus areas while removing completed/abandoned ones
+  Keep 3-5 concurrent focus themes that are still active and relevant
+- For history sections, integrate new information chronologically into appropriate time period
+- Preserve technical accuracy - keep exact names of technologies, companies, projects
+- Focus on information useful for future interactions and personalization
+- IMPORTANT: Do NOT record file upload events in memory. Uploaded files are
+  session-specific and ephemeral — they will not be accessible in future sessions.
+  Recording upload events causes confusion in subsequent conversations.
+
+Return ONLY valid JSON, no explanation or markdown."""
+
+
+# Prompt template for extracting facts from a single message
+FACT_EXTRACTION_PROMPT = """Extract factual information about the user from this message.
+
+Message:
+{message}
+
+Extract facts in this JSON format:
+{{
+  "facts": [
+    {{ "content": "...", "category": "preference|knowledge|context|behavior|goal", "confidence": 0.0-1.0 }}
+  ]
+}}
+
+Categories:
+- preference: User preferences (likes/dislikes, styles, tools)
+- knowledge: User's expertise or knowledge areas
+- context: Background context (location, job, projects)
+- behavior: Behavioral patterns
+- goal: User's goals or objectives
+
+Rules:
+- Only extract clear, specific facts
+- Confidence should reflect certainty (explicit statement = 0.9+, implied = 0.6-0.8)
+- Skip vague or temporary information
+
+Return ONLY valid JSON."""
+
+
+def _count_tokens(text: str, encoding_name: str = "cl100k_base") -> int:
+    """Count tokens in text using tiktoken.
+
+    Args:
+        text: The text to count tokens for.
+        encoding_name: The encoding to use (default: cl100k_base for GPT-4/3.5).
+
+    Returns:
+        The number of tokens in the text.
+    """
+    if not TIKTOKEN_AVAILABLE:
+        # Fallback to character-based estimation if tiktoken is not available
+        return len(text) // 4
+
+    try:
+        encoding = tiktoken.get_encoding(encoding_name)
+        return len(encoding.encode(text))
+    except Exception:
+        # Fallback to character-based estimation on error
+        return len(text) // 4
+
+
+def format_memory_for_injection(memory_data: dict[str, Any], max_tokens: int = 2000) -> str:
+    """Format memory data for injection into system prompt.
+
+    Args:
+        memory_data: The memory data dictionary.
+        max_tokens: Maximum tokens to use (counted via tiktoken for accuracy).
+
+    Returns:
+        Formatted memory string for system prompt injection.
+    """
+    if not memory_data:
+        return ""
+
+    sections = []
+
+    # Format user context
+    user_data = memory_data.get("user", {})
+    if user_data:
+        user_sections = []
+
+        work_ctx = user_data.get("workContext", {})
+        if work_ctx.get("summary"):
+            user_sections.append(f"Work: {work_ctx['summary']}")
+
+        personal_ctx = user_data.get("personalContext", {})
+        if personal_ctx.get("summary"):
+            user_sections.append(f"Personal: {personal_ctx['summary']}")
+
+        top_of_mind = user_data.get("topOfMind", {})
+        if top_of_mind.get("summary"):
+            user_sections.append(f"Current Focus: {top_of_mind['summary']}")
+
+        if user_sections:
+            sections.append("User Context:\n" + "\n".join(f"- {s}" for s in user_sections))
+
+    # Format history
+    history_data = memory_data.get("history", {})
+    if history_data:
+        history_sections = []
+
+        recent = history_data.get("recentMonths", {})
+        if recent.get("summary"):
+            history_sections.append(f"Recent: {recent['summary']}")
+
+        earlier = history_data.get("earlierContext", {})
+        if earlier.get("summary"):
+            history_sections.append(f"Earlier: {earlier['summary']}")
+
+        if history_sections:
+            sections.append("History:\n" + "\n".join(f"- {s}" for s in history_sections))
+
+    if not sections:
+        return ""
+
+    result = "\n\n".join(sections)
+
+    # Use accurate token counting with tiktoken
+    token_count = _count_tokens(result)
+    if token_count > max_tokens:
+        # Truncate to fit within token limit
+        # Estimate characters to remove based on token ratio
+        char_per_token = len(result) / token_count
+        target_chars = int(max_tokens * char_per_token * 0.95)  # 95% to leave margin
+        result = result[:target_chars] + "\n..."
+
+    return result
+
+
+def format_conversation_for_update(messages: list[Any]) -> str:
+    """Format conversation messages for memory update prompt.
+
+    Args:
+        messages: List of conversation messages.
+
+    Returns:
+        Formatted conversation string.
+    """
+    lines = []
+    for msg in messages:
+        role = getattr(msg, "type", "unknown")
+        content = getattr(msg, "content", str(msg))
+
+        # Handle content that might be a list (multimodal)
+        if isinstance(content, list):
+            text_parts = [p.get("text", "") for p in content if isinstance(p, dict) and "text" in p]
+            content = " ".join(text_parts) if text_parts else str(content)
+
+        # Strip uploaded_files tags from human messages to avoid persisting
+        # ephemeral file path info into long-term memory.  Skip the turn entirely
+        # when nothing remains after stripping (upload-only message).
+        if role == "human":
+            content = re.sub(
+                r"<uploaded_files>[\s\S]*?</uploaded_files>\n*", "", str(content)
+            ).strip()
+            if not content:
+                continue
+
+        # Truncate very long messages
+        if len(str(content)) > 1000:
+            content = str(content)[:1000] + "..."
+
+        if role == "human":
+            lines.append(f"User: {content}")
+        elif role == "ai":
+            lines.append(f"Assistant: {content}")
+
+    return "\n\n".join(lines)
@@ -0,0 +1,195 @@
+"""Memory update queue with debounce mechanism."""
+
+import threading
+import time
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import Any
+
+from src.config.memory_config import get_memory_config
+
+
+@dataclass
+class ConversationContext:
+    """Context for a conversation to be processed for memory update."""
+
+    thread_id: str
+    messages: list[Any]
+    timestamp: datetime = field(default_factory=datetime.utcnow)
+    agent_name: str | None = None
+
+
+class MemoryUpdateQueue:
+    """Queue for memory updates with debounce mechanism.
+
+    This queue collects conversation contexts and processes them after
+    a configurable debounce period. Multiple conversations received within
+    the debounce window are batched together.
+    """
+
+    def __init__(self):
+        """Initialize the memory update queue."""
+        self._queue: list[ConversationContext] = []
+        self._lock = threading.Lock()
+        self._timer: threading.Timer | None = None
+        self._processing = False
+
+    def add(self, thread_id: str, messages: list[Any], agent_name: str | None = None) -> None:
+        """Add a conversation to the update queue.
+
+        Args:
+            thread_id: The thread ID.
+            messages: The conversation messages.
+            agent_name: If provided, memory is stored per-agent. If None, uses global memory.
+        """
+        config = get_memory_config()
+        if not config.enabled:
+            return
+
+        context = ConversationContext(
+            thread_id=thread_id,
+            messages=messages,
+            agent_name=agent_name,
+        )
+
+        with self._lock:
+            # Check if this thread already has a pending update
+            # If so, replace it with the newer one
+            self._queue = [c for c in self._queue if c.thread_id != thread_id]
+            self._queue.append(context)
+
+            # Reset or start the debounce timer
+            self._reset_timer()
+
+        print(f"Memory update queued for thread {thread_id}, queue size: {len(self._queue)}")
+
+    def _reset_timer(self) -> None:
+        """Reset the debounce timer."""
+        config = get_memory_config()
+
+        # Cancel existing timer if any
+        if self._timer is not None:
+            self._timer.cancel()
+
+        # Start new timer
+        self._timer = threading.Timer(
+            config.debounce_seconds,
+            self._process_queue,
+        )
+        self._timer.daemon = True
+        self._timer.start()
+
+        print(f"Memory update timer set for {config.debounce_seconds}s")
+
+    def _process_queue(self) -> None:
+        """Process all queued conversation contexts."""
+        # Import here to avoid circular dependency
+        from src.agents.memory.updater import MemoryUpdater
+
+        with self._lock:
+            if self._processing:
+                # Already processing, reschedule
+                self._reset_timer()
+                return
+
+            if not self._queue:
+                return
+
+            self._processing = True
+            contexts_to_process = self._queue.copy()
+            self._queue.clear()
+            self._timer = None
+
+        print(f"Processing {len(contexts_to_process)} queued memory updates")
+
+        try:
+            updater = MemoryUpdater()
+
+            for context in contexts_to_process:
+                try:
+                    print(f"Updating memory for thread {context.thread_id}")
+                    success = updater.update_memory(
+                        messages=context.messages,
+                        thread_id=context.thread_id,
+                        agent_name=context.agent_name,
+                    )
+                    if success:
+                        print(f"Memory updated successfully for thread {context.thread_id}")
+                    else:
+                        print(f"Memory update skipped/failed for thread {context.thread_id}")
+                except Exception as e:
+                    print(f"Error updating memory for thread {context.thread_id}: {e}")
+
+                # Small delay between updates to avoid rate limiting
+                if len(contexts_to_process) > 1:
+                    time.sleep(0.5)
+
+        finally:
+            with self._lock:
+                self._processing = False
+
+    def flush(self) -> None:
+        """Force immediate processing of the queue.
+
+        This is useful for testing or graceful shutdown.
+        """
+        with self._lock:
+            if self._timer is not None:
+                self._timer.cancel()
+                self._timer = None
+
+        self._process_queue()
+
+    def clear(self) -> None:
+        """Clear the queue without processing.
+
+        This is useful for testing.
+        """
+        with self._lock:
+            if self._timer is not None:
+                self._timer.cancel()
+                self._timer = None
+            self._queue.clear()
+            self._processing = False
+
+    @property
+    def pending_count(self) -> int:
+        """Get the number of pending updates."""
+        with self._lock:
+            return len(self._queue)
+
+    @property
+    def is_processing(self) -> bool:
+        """Check if the queue is currently being processed."""
+        with self._lock:
+            return self._processing
+
+
+# Global singleton instance
+_memory_queue: MemoryUpdateQueue | None = None
+_queue_lock = threading.Lock()
+
+
+def get_memory_queue() -> MemoryUpdateQueue:
+    """Get the global memory update queue singleton.
+
+    Returns:
+        The memory update queue instance.
+    """
+    global _memory_queue
+    with _queue_lock:
+        if _memory_queue is None:
+            _memory_queue = MemoryUpdateQueue()
+        return _memory_queue
+
+
+def reset_memory_queue() -> None:
+    """Reset the global memory queue.
+
+    This is useful for testing.
+    """
+    global _memory_queue
+    with _queue_lock:
+        if _memory_queue is not None:
+            _memory_queue.clear()
+        _memory_queue = None
@@ -0,0 +1,388 @@
+"""Memory updater for reading, writing, and updating memory data."""
+
+import json
+import re
+import uuid
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+
+from src.agents.memory.prompt import (
+    MEMORY_UPDATE_PROMPT,
+    format_conversation_for_update,
+)
+from src.config.memory_config import get_memory_config
+from src.config.paths import get_paths
+from src.models import create_chat_model
+
+
+def _get_memory_file_path(agent_name: str | None = None) -> Path:
+    """Get the path to the memory file.
+
+    Args:
+        agent_name: If provided, returns the per-agent memory file path.
+                    If None, returns the global memory file path.
+
+    Returns:
+        Path to the memory file.
+    """
+    if agent_name is not None:
+        return get_paths().agent_memory_file(agent_name)
+
+    config = get_memory_config()
+    if config.storage_path:
+        p = Path(config.storage_path)
+        # Absolute path: use as-is; relative path: resolve against base_dir
+        return p if p.is_absolute() else get_paths().base_dir / p
+    return get_paths().memory_file
+
+
+def _create_empty_memory() -> dict[str, Any]:
+    """Create an empty memory structure."""
+    return {
+        "version": "1.0",
+        "lastUpdated": datetime.utcnow().isoformat() + "Z",
+        "user": {
+            "workContext": {"summary": "", "updatedAt": ""},
+            "personalContext": {"summary": "", "updatedAt": ""},
+            "topOfMind": {"summary": "", "updatedAt": ""},
+        },
+        "history": {
+            "recentMonths": {"summary": "", "updatedAt": ""},
+            "earlierContext": {"summary": "", "updatedAt": ""},
+            "longTermBackground": {"summary": "", "updatedAt": ""},
+        },
+        "facts": [],
+    }
+
+
+# Per-agent memory cache: keyed by agent_name (None = global)
+# Value: (memory_data, file_mtime)
+_memory_cache: dict[str | None, tuple[dict[str, Any], float | None]] = {}
+
+
+def get_memory_data(agent_name: str | None = None) -> dict[str, Any]:
+    """Get the current memory data (cached with file modification time check).
+
+    The cache is automatically invalidated if the memory file has been modified
+    since the last load, ensuring fresh data is always returned.
+
+    Args:
+        agent_name: If provided, loads per-agent memory. If None, loads global memory.
+
+    Returns:
+        The memory data dictionary.
+    """
+    file_path = _get_memory_file_path(agent_name)
+
+    # Get current file modification time
+    try:
+        current_mtime = file_path.stat().st_mtime if file_path.exists() else None
+    except OSError:
+        current_mtime = None
+
+    cached = _memory_cache.get(agent_name)
+
+    # Invalidate cache if file has been modified or doesn't exist
+    if cached is None or cached[1] != current_mtime:
+        memory_data = _load_memory_from_file(agent_name)
+        _memory_cache[agent_name] = (memory_data, current_mtime)
+        return memory_data
+
+    return cached[0]
+
+
+def reload_memory_data(agent_name: str | None = None) -> dict[str, Any]:
+    """Reload memory data from file, forcing cache invalidation.
+
+    Args:
+        agent_name: If provided, reloads per-agent memory. If None, reloads global memory.
+
+    Returns:
+        The reloaded memory data dictionary.
+    """
+    file_path = _get_memory_file_path(agent_name)
+    memory_data = _load_memory_from_file(agent_name)
+
+    try:
+        mtime = file_path.stat().st_mtime if file_path.exists() else None
+    except OSError:
+        mtime = None
+
+    _memory_cache[agent_name] = (memory_data, mtime)
+    return memory_data
+
+
+def _load_memory_from_file(agent_name: str | None = None) -> dict[str, Any]:
+    """Load memory data from file.
+
+    Args:
+        agent_name: If provided, loads per-agent memory file. If None, loads global.
+
+    Returns:
+        The memory data dictionary.
+    """
+    file_path = _get_memory_file_path(agent_name)
+
+    if not file_path.exists():
+        return _create_empty_memory()
+
+    try:
+        with open(file_path, encoding="utf-8") as f:
+            data = json.load(f)
+        return data
+    except (json.JSONDecodeError, OSError) as e:
+        print(f"Failed to load memory file: {e}")
+        return _create_empty_memory()
+
+
+# Matches sentences that describe a file-upload *event* rather than general
+# file-related work.  Deliberately narrow to avoid removing legitimate facts
+# such as "User works with CSV files" or "prefers PDF export".
+_UPLOAD_SENTENCE_RE = re.compile(
+    r"[^.!?]*\b(?:"
+    r"upload(?:ed|ing)?(?:\s+\w+){0,3}\s+(?:file|files?|document|documents?|attachment|attachments?)"
+    r"|file\s+upload"
+    r"|/mnt/user-data/uploads/"
+    r"|<uploaded_files>"
+    r")[^.!?]*[.!?]?\s*",
+    re.IGNORECASE,
+)
+
+
+def _strip_upload_mentions_from_memory(memory_data: dict[str, Any]) -> dict[str, Any]:
+    """Remove sentences about file uploads from all memory summaries and facts.
+
+    Uploaded files are session-scoped; persisting upload events in long-term
+    memory causes the agent to search for non-existent files in future sessions.
+    """
+    # Scrub summaries in user/history sections
+    for section in ("user", "history"):
+        section_data = memory_data.get(section, {})
+        for _key, val in section_data.items():
+            if isinstance(val, dict) and "summary" in val:
+                cleaned = _UPLOAD_SENTENCE_RE.sub("", val["summary"]).strip()
+                cleaned = re.sub(r"  +", " ", cleaned)
+                val["summary"] = cleaned
+
+    # Also remove any facts that describe upload events
+    facts = memory_data.get("facts", [])
+    if facts:
+        memory_data["facts"] = [
+            f
+            for f in facts
+            if not _UPLOAD_SENTENCE_RE.search(f.get("content", ""))
+        ]
+
+    return memory_data
+
+
+def _save_memory_to_file(memory_data: dict[str, Any], agent_name: str | None = None) -> bool:
+    """Save memory data to file and update cache.
+
+    Args:
+        memory_data: The memory data to save.
+        agent_name: If provided, saves to per-agent memory file. If None, saves to global.
+
+    Returns:
+        True if successful, False otherwise.
+    """
+    file_path = _get_memory_file_path(agent_name)
+
+    try:
+        # Ensure directory exists
+        file_path.parent.mkdir(parents=True, exist_ok=True)
+
+        # Update lastUpdated timestamp
+        memory_data["lastUpdated"] = datetime.utcnow().isoformat() + "Z"
+
+        # Write atomically using temp file
+        temp_path = file_path.with_suffix(".tmp")
+        with open(temp_path, "w", encoding="utf-8") as f:
+            json.dump(memory_data, f, indent=2, ensure_ascii=False)
+
+        # Rename temp file to actual file (atomic on most systems)
+        temp_path.replace(file_path)
+
+        # Update cache and file modification time
+        try:
+            mtime = file_path.stat().st_mtime
+        except OSError:
+            mtime = None
+
+        _memory_cache[agent_name] = (memory_data, mtime)
+
+        print(f"Memory saved to {file_path}")
+        return True
+    except OSError as e:
+        print(f"Failed to save memory file: {e}")
+        return False
+
+
+class MemoryUpdater:
+    """Updates memory using LLM based on conversation context."""
+
+    def __init__(self, model_name: str | None = None):
+        """Initialize the memory updater.
+
+        Args:
+            model_name: Optional model name to use. If None, uses config or default.
+        """
+        self._model_name = model_name
+
+    def _get_model(self):
+        """Get the model for memory updates."""
+        config = get_memory_config()
+        model_name = self._model_name or config.model_name
+        return create_chat_model(name=model_name, thinking_enabled=False)
+
+    def update_memory(self, messages: list[Any], thread_id: str | None = None, agent_name: str | None = None) -> bool:
+        """Update memory based on conversation messages.
+
+        Args:
+            messages: List of conversation messages.
+            thread_id: Optional thread ID for tracking source.
+            agent_name: If provided, updates per-agent memory. If None, updates global memory.
+
+        Returns:
+            True if update was successful, False otherwise.
+        """
+        config = get_memory_config()
+        if not config.enabled:
+            return False
+
+        if not messages:
+            return False
+
+        try:
+            # Get current memory
+            current_memory = get_memory_data(agent_name)
+
+            # Format conversation for prompt
+            conversation_text = format_conversation_for_update(messages)
+
+            if not conversation_text.strip():
+                return False
+
+            # Build prompt
+            prompt = MEMORY_UPDATE_PROMPT.format(
+                current_memory=json.dumps(current_memory, indent=2),
+                conversation=conversation_text,
+            )
+
+            # Call LLM
+            model = self._get_model()
+            response = model.invoke(prompt)
+            response_text = str(response.content).strip()
+
+            # Parse response
+            # Remove markdown code blocks if present
+            if response_text.startswith("```"):
+                lines = response_text.split("\n")
+                response_text = "\n".join(lines[1:-1] if lines[-1] == "```" else lines[1:])
+
+            update_data = json.loads(response_text)
+
+            # Apply updates
+            updated_memory = self._apply_updates(current_memory, update_data, thread_id)
+
+            # Strip file-upload mentions from all summaries before saving.
+            # Uploaded files are session-scoped and won't exist in future sessions,
+            # so recording upload events in long-term memory causes the agent to
+            # try (and fail) to locate those files in subsequent conversations.
+            updated_memory = _strip_upload_mentions_from_memory(updated_memory)
+
+            # Save
+            return _save_memory_to_file(updated_memory, agent_name)
+
+        except json.JSONDecodeError as e:
+            print(f"Failed to parse LLM response for memory update: {e}")
+            return False
+        except Exception as e:
+            print(f"Memory update failed: {e}")
+            return False
+
+    def _apply_updates(
+        self,
+        current_memory: dict[str, Any],
+        update_data: dict[str, Any],
+        thread_id: str | None = None,
+    ) -> dict[str, Any]:
+        """Apply LLM-generated updates to memory.
+
+        Args:
+            current_memory: Current memory data.
+            update_data: Updates from LLM.
+            thread_id: Optional thread ID for tracking.
+
+        Returns:
+            Updated memory data.
+        """
+        config = get_memory_config()
+        now = datetime.utcnow().isoformat() + "Z"
+
+        # Update user sections
+        user_updates = update_data.get("user", {})
+        for section in ["workContext", "personalContext", "topOfMind"]:
+            section_data = user_updates.get(section, {})
+            if section_data.get("shouldUpdate") and section_data.get("summary"):
+                current_memory["user"][section] = {
+                    "summary": section_data["summary"],
+                    "updatedAt": now,
+                }
+
+        # Update history sections
+        history_updates = update_data.get("history", {})
+        for section in ["recentMonths", "earlierContext", "longTermBackground"]:
+            section_data = history_updates.get(section, {})
+            if section_data.get("shouldUpdate") and section_data.get("summary"):
+                current_memory["history"][section] = {
+                    "summary": section_data["summary"],
+                    "updatedAt": now,
+                }
+
+        # Remove facts
+        facts_to_remove = set(update_data.get("factsToRemove", []))
+        if facts_to_remove:
+            current_memory["facts"] = [f for f in current_memory.get("facts", []) if f.get("id") not in facts_to_remove]
+
+        # Add new facts
+        new_facts = update_data.get("newFacts", [])
+        for fact in new_facts:
+            confidence = fact.get("confidence", 0.5)
+            if confidence >= config.fact_confidence_threshold:
+                fact_entry = {
+                    "id": f"fact_{uuid.uuid4().hex[:8]}",
+                    "content": fact.get("content", ""),
+                    "category": fact.get("category", "context"),
+                    "confidence": confidence,
+                    "createdAt": now,
+                    "source": thread_id or "unknown",
+                }
+                current_memory["facts"].append(fact_entry)
+
+        # Enforce max facts limit
+        if len(current_memory["facts"]) > config.max_facts:
+            # Sort by confidence and keep top ones
+            current_memory["facts"] = sorted(
+                current_memory["facts"],
+                key=lambda f: f.get("confidence", 0),
+                reverse=True,
+            )[: config.max_facts]
+
+        return current_memory
+
+
+def update_memory_from_conversation(messages: list[Any], thread_id: str | None = None, agent_name: str | None = None) -> bool:
+    """Convenience function to update memory from a conversation.
+
+    Args:
+        messages: List of conversation messages.
+        thread_id: Optional thread ID.
+        agent_name: If provided, updates per-agent memory. If None, updates global memory.
+
+    Returns:
+        True if successful, False otherwise.
+    """
+    updater = MemoryUpdater()
+    return updater.update_memory(messages, thread_id, agent_name)
@@ -0,0 +1,173 @@
+"""Middleware for intercepting clarification requests and presenting them to the user."""
+
+from collections.abc import Callable
+from typing import override
+
+from langchain.agents import AgentState
+from langchain.agents.middleware import AgentMiddleware
+from langchain_core.messages import ToolMessage
+from langgraph.graph import END
+from langgraph.prebuilt.tool_node import ToolCallRequest
+from langgraph.types import Command
+
+
+class ClarificationMiddlewareState(AgentState):
+    """Compatible with the `ThreadState` schema."""
+
+    pass
+
+
+class ClarificationMiddleware(AgentMiddleware[ClarificationMiddlewareState]):
+    """Intercepts clarification tool calls and interrupts execution to present questions to the user.
+
+    When the model calls the `ask_clarification` tool, this middleware:
+    1. Intercepts the tool call before execution
+    2. Extracts the clarification question and metadata
+    3. Formats a user-friendly message
+    4. Returns a Command that interrupts execution and presents the question
+    5. Waits for user response before continuing
+
+    This replaces the tool-based approach where clarification continued the conversation flow.
+    """
+
+    state_schema = ClarificationMiddlewareState
+
+    def _is_chinese(self, text: str) -> bool:
+        """Check if text contains Chinese characters.
+
+        Args:
+            text: Text to check
+
+        Returns:
+            True if text contains Chinese characters
+        """
+        return any("\u4e00" <= char <= "\u9fff" for char in text)
+
+    def _format_clarification_message(self, args: dict) -> str:
+        """Format the clarification arguments into a user-friendly message.
+
+        Args:
+            args: The tool call arguments containing clarification details
+
+        Returns:
+            Formatted message string
+        """
+        question = args.get("question", "")
+        clarification_type = args.get("clarification_type", "missing_info")
+        context = args.get("context")
+        options = args.get("options", [])
+
+        # Type-specific icons
+        type_icons = {
+            "missing_info": "❓",
+            "ambiguous_requirement": "🤔",
+            "approach_choice": "🔀",
+            "risk_confirmation": "⚠️",
+            "suggestion": "💡",
+        }
+
+        icon = type_icons.get(clarification_type, "❓")
+
+        # Build the message naturally
+        message_parts = []
+
+        # Add icon and question together for a more natural flow
+        if context:
+            # If there's context, present it first as background
+            message_parts.append(f"{icon} {context}")
+            message_parts.append(f"\n{question}")
+        else:
+            # Just the question with icon
+            message_parts.append(f"{icon} {question}")
+
+        # Add options in a cleaner format
+        if options and len(options) > 0:
+            message_parts.append("")  # blank line for spacing
+            for i, option in enumerate(options, 1):
+                message_parts.append(f"  {i}. {option}")
+
+        return "\n".join(message_parts)
+
+    def _handle_clarification(self, request: ToolCallRequest) -> Command:
+        """Handle clarification request and return command to interrupt execution.
+
+        Args:
+            request: Tool call request
+
+        Returns:
+            Command that interrupts execution with the formatted clarification message
+        """
+        # Extract clarification arguments
+        args = request.tool_call.get("args", {})
+        question = args.get("question", "")
+
+        print("[ClarificationMiddleware] Intercepted clarification request")
+        print(f"[ClarificationMiddleware] Question: {question}")
+
+        # Format the clarification message
+        formatted_message = self._format_clarification_message(args)
+
+        # Get the tool call ID
+        tool_call_id = request.tool_call.get("id", "")
+
+        # Create a ToolMessage with the formatted question
+        # This will be added to the message history
+        tool_message = ToolMessage(
+            content=formatted_message,
+            tool_call_id=tool_call_id,
+            name="ask_clarification",
+        )
+
+        # Return a Command that:
+        # 1. Adds the formatted tool message
+        # 2. Interrupts execution by going to __end__
+        # Note: We don't add an extra AIMessage here - the frontend will detect
+        # and display ask_clarification tool messages directly
+        return Command(
+            update={"messages": [tool_message]},
+            goto=END,
+        )
+
+    @override
+    def wrap_tool_call(
+        self,
+        request: ToolCallRequest,
+        handler: Callable[[ToolCallRequest], ToolMessage | Command],
+    ) -> ToolMessage | Command:
+        """Intercept ask_clarification tool calls and interrupt execution (sync version).
+
+        Args:
+            request: Tool call request
+            handler: Original tool execution handler
+
+        Returns:
+            Command that interrupts execution with the formatted clarification message
+        """
+        # Check if this is an ask_clarification tool call
+        if request.tool_call.get("name") != "ask_clarification":
+            # Not a clarification call, execute normally
+            return handler(request)
+
+        return self._handle_clarification(request)
+
+    @override
+    async def awrap_tool_call(
+        self,
+        request: ToolCallRequest,
+        handler: Callable[[ToolCallRequest], ToolMessage | Command],
+    ) -> ToolMessage | Command:
+        """Intercept ask_clarification tool calls and interrupt execution (async version).
+
+        Args:
+            request: Tool call request
+            handler: Original tool execution handler (async)
+
+        Returns:
+            Command that interrupts execution with the formatted clarification message
+        """
+        # Check if this is an ask_clarification tool call
+        if request.tool_call.get("name") != "ask_clarification":
+            # Not a clarification call, execute normally
+            return await handler(request)
+
+        return self._handle_clarification(request)
@@ -0,0 +1,110 @@
+"""Middleware to fix dangling tool calls in message history.
+
+A dangling tool call occurs when an AIMessage contains tool_calls but there are
+no corresponding ToolMessages in the history (e.g., due to user interruption or
+request cancellation). This causes LLM errors due to incomplete message format.
+
+This middleware intercepts the model call to detect and patch such gaps by
+inserting synthetic ToolMessages with an error indicator immediately after the
+AIMessage that made the tool calls, ensuring correct message ordering.
+
+Note: Uses wrap_model_call instead of before_model to ensure patches are inserted
+at the correct positions (immediately after each dangling AIMessage), not appended
+to the end of the message list as before_model + add_messages reducer would do.
+"""
+
+import logging
+from collections.abc import Awaitable, Callable
+from typing import override
+
+from langchain.agents import AgentState
+from langchain.agents.middleware import AgentMiddleware
+from langchain.agents.middleware.types import ModelCallResult, ModelRequest, ModelResponse
+from langchain_core.messages import ToolMessage
+
+logger = logging.getLogger(__name__)
+
+
+class DanglingToolCallMiddleware(AgentMiddleware[AgentState]):
+    """Inserts placeholder ToolMessages for dangling tool calls before model invocation.
+
+    Scans the message history for AIMessages whose tool_calls lack corresponding
+    ToolMessages, and injects synthetic error responses immediately after the
+    offending AIMessage so the LLM receives a well-formed conversation.
+    """
+
+    def _build_patched_messages(self, messages: list) -> list | None:
+        """Return a new message list with patches inserted at the correct positions.
+
+        For each AIMessage with dangling tool_calls (no corresponding ToolMessage),
+        a synthetic ToolMessage is inserted immediately after that AIMessage.
+        Returns None if no patches are needed.
+        """
+        # Collect IDs of all existing ToolMessages
+        existing_tool_msg_ids: set[str] = set()
+        for msg in messages:
+            if isinstance(msg, ToolMessage):
+                existing_tool_msg_ids.add(msg.tool_call_id)
+
+        # Check if any patching is needed
+        needs_patch = False
+        for msg in messages:
+            if getattr(msg, "type", None) != "ai":
+                continue
+            for tc in getattr(msg, "tool_calls", None) or []:
+                tc_id = tc.get("id")
+                if tc_id and tc_id not in existing_tool_msg_ids:
+                    needs_patch = True
+                    break
+            if needs_patch:
+                break
+
+        if not needs_patch:
+            return None
+
+        # Build new list with patches inserted right after each dangling AIMessage
+        patched: list = []
+        patched_ids: set[str] = set()
+        patch_count = 0
+        for msg in messages:
+            patched.append(msg)
+            if getattr(msg, "type", None) != "ai":
+                continue
+            for tc in getattr(msg, "tool_calls", None) or []:
+                tc_id = tc.get("id")
+                if tc_id and tc_id not in existing_tool_msg_ids and tc_id not in patched_ids:
+                    patched.append(
+                        ToolMessage(
+                            content="[Tool call was interrupted and did not return a result.]",
+                            tool_call_id=tc_id,
+                            name=tc.get("name", "unknown"),
+                            status="error",
+                        )
+                    )
+                    patched_ids.add(tc_id)
+                    patch_count += 1
+
+        logger.warning(f"Injecting {patch_count} placeholder ToolMessage(s) for dangling tool calls")
+        return patched
+
+    @override
+    def wrap_model_call(
+        self,
+        request: ModelRequest,
+        handler: Callable[[ModelRequest], ModelResponse],
+    ) -> ModelCallResult:
+        patched = self._build_patched_messages(request.messages)
+        if patched is not None:
+            request = request.override(messages=patched)
+        return handler(request)
+
+    @override
+    async def awrap_model_call(
+        self,
+        request: ModelRequest,
+        handler: Callable[[ModelRequest], Awaitable[ModelResponse]],
+    ) -> ModelCallResult:
+        patched = self._build_patched_messages(request.messages)
+        if patched is not None:
+            request = request.override(messages=patched)
+        return await handler(request)
@@ -0,0 +1,153 @@
+"""Middleware for memory mechanism."""
+
+import re
+from typing import Any, override
+
+from langchain.agents import AgentState
+from langchain.agents.middleware import AgentMiddleware
+from langgraph.runtime import Runtime
+
+from src.agents.memory.queue import get_memory_queue
+from src.config.memory_config import get_memory_config
+
+
+class MemoryMiddlewareState(AgentState):
+    """Compatible with the `ThreadState` schema."""
+
+    pass
+
+
+def _filter_messages_for_memory(messages: list[Any]) -> list[Any]:
+    """Filter messages to keep only user inputs and final assistant responses.
+
+    This filters out:
+    - Tool messages (intermediate tool call results)
+    - AI messages with tool_calls (intermediate steps, not final responses)
+    - The <uploaded_files> block injected by UploadsMiddleware into human messages
+      (file paths are session-scoped and must not persist in long-term memory).
+      The user's actual question is preserved; only turns whose content is entirely
+      the upload block (nothing remains after stripping) are dropped along with
+      their paired assistant response.
+
+    Only keeps:
+    - Human messages (with the ephemeral upload block removed)
+    - AI messages without tool_calls (final assistant responses), unless the
+      paired human turn was upload-only and had no real user text.
+
+    Args:
+        messages: List of all conversation messages.
+
+    Returns:
+        Filtered list containing only user inputs and final assistant responses.
+    """
+    _UPLOAD_BLOCK_RE = re.compile(
+        r"<uploaded_files>[\s\S]*?</uploaded_files>\n*", re.IGNORECASE
+    )
+
+    filtered = []
+    skip_next_ai = False
+    for msg in messages:
+        msg_type = getattr(msg, "type", None)
+
+        if msg_type == "human":
+            content = getattr(msg, "content", "")
+            if isinstance(content, list):
+                content = " ".join(
+                    p.get("text", "") for p in content if isinstance(p, dict)
+                )
+            content_str = str(content)
+            if "<uploaded_files>" in content_str:
+                # Strip the ephemeral upload block; keep the user's real question.
+                stripped = _UPLOAD_BLOCK_RE.sub("", content_str).strip()
+                if not stripped:
+                    # Nothing left — the entire turn was upload bookkeeping;
+                    # skip it and the paired assistant response.
+                    skip_next_ai = True
+                    continue
+                # Rebuild the message with cleaned content so the user's question
+                # is still available for memory summarisation.
+                from copy import copy
+
+                clean_msg = copy(msg)
+                clean_msg.content = stripped
+                filtered.append(clean_msg)
+                skip_next_ai = False
+            else:
+                filtered.append(msg)
+                skip_next_ai = False
+        elif msg_type == "ai":
+            tool_calls = getattr(msg, "tool_calls", None)
+            if not tool_calls:
+                if skip_next_ai:
+                    skip_next_ai = False
+                    continue
+                filtered.append(msg)
+        # Skip tool messages and AI messages with tool_calls
+
+    return filtered
+
+
+class MemoryMiddleware(AgentMiddleware[MemoryMiddlewareState]):
+    """Middleware that queues conversation for memory update after agent execution.
+
+    This middleware:
+    1. After each agent execution, queues the conversation for memory update
+    2. Only includes user inputs and final assistant responses (ignores tool calls)
+    3. The queue uses debouncing to batch multiple updates together
+    4. Memory is updated asynchronously via LLM summarization
+    """
+
+    state_schema = MemoryMiddlewareState
+
+    def __init__(self, agent_name: str | None = None):
+        """Initialize the MemoryMiddleware.
+
+        Args:
+            agent_name: If provided, memory is stored per-agent. If None, uses global memory.
+        """
+        super().__init__()
+        self._agent_name = agent_name
+
+    @override
+    def after_agent(self, state: MemoryMiddlewareState, runtime: Runtime) -> dict | None:
+        """Queue conversation for memory update after agent completes.
+
+        Args:
+            state: The current agent state.
+            runtime: The runtime context.
+
+        Returns:
+            None (no state changes needed from this middleware).
+        """
+        config = get_memory_config()
+        if not config.enabled:
+            return None
+
+        # Get thread ID from runtime context
+        thread_id = runtime.context.get("thread_id")
+        if not thread_id:
+            print("MemoryMiddleware: No thread_id in context, skipping memory update")
+            return None
+
+        # Get messages from state
+        messages = state.get("messages", [])
+        if not messages:
+            print("MemoryMiddleware: No messages in state, skipping memory update")
+            return None
+
+        # Filter to only keep user inputs and final assistant responses
+        filtered_messages = _filter_messages_for_memory(messages)
+
+        # Only queue if there's meaningful conversation
+        # At minimum need one user message and one assistant response
+        user_messages = [m for m in filtered_messages if getattr(m, "type", None) == "human"]
+        assistant_messages = [m for m in filtered_messages if getattr(m, "type", None) == "ai"]
+
+        if not user_messages or not assistant_messages:
+            return None
+
+        # Queue the filtered conversation for memory update
+        queue = get_memory_queue()
+        queue.add(thread_id=thread_id, messages=filtered_messages, agent_name=self._agent_name)
+
+        return None
@@ -0,0 +1,75 @@
+"""Middleware to enforce maximum concurrent subagent tool calls per model response."""
+
+import logging
+from typing import override
+
+from langchain.agents import AgentState
+from langchain.agents.middleware import AgentMiddleware
+from langgraph.runtime import Runtime
+
+from src.subagents.executor import MAX_CONCURRENT_SUBAGENTS
+
+logger = logging.getLogger(__name__)
+
+# Valid range for max_concurrent_subagents
+MIN_SUBAGENT_LIMIT = 2
+MAX_SUBAGENT_LIMIT = 4
+
+
+def _clamp_subagent_limit(value: int) -> int:
+    """Clamp subagent limit to valid range [2, 4]."""
+    return max(MIN_SUBAGENT_LIMIT, min(MAX_SUBAGENT_LIMIT, value))
+
+
+class SubagentLimitMiddleware(AgentMiddleware[AgentState]):
+    """Truncates excess 'task' tool calls from a single model response.
+
+    When an LLM generates more than max_concurrent parallel task tool calls
+    in one response, this middleware keeps only the first max_concurrent and
+    discards the rest. This is more reliable than prompt-based limits.
+
+    Args:
+        max_concurrent: Maximum number of concurrent subagent calls allowed.
+            Defaults to MAX_CONCURRENT_SUBAGENTS (3). Clamped to [2, 4].
+    """
+
+    def __init__(self, max_concurrent: int = MAX_CONCURRENT_SUBAGENTS):
+        super().__init__()
+        self.max_concurrent = _clamp_subagent_limit(max_concurrent)
+
+    def _truncate_task_calls(self, state: AgentState) -> dict | None:
+        messages = state.get("messages", [])
+        if not messages:
+            return None
+
+        last_msg = messages[-1]
+        if getattr(last_msg, "type", None) != "ai":
+            return None
+
+        tool_calls = getattr(last_msg, "tool_calls", None)
+        if not tool_calls:
+            return None
+
+        # Count task tool calls
+        task_indices = [i for i, tc in enumerate(tool_calls) if tc.get("name") == "task"]
+        if len(task_indices) <= self.max_concurrent:
+            return None
+
+        # Build set of indices to drop (excess task calls beyond the limit)
+        indices_to_drop = set(task_indices[self.max_concurrent :])
+        truncated_tool_calls = [tc for i, tc in enumerate(tool_calls) if i not in indices_to_drop]
+
+        dropped_count = len(indices_to_drop)
+        logger.warning(f"Truncated {dropped_count} excess task tool call(s) from model response (limit: {self.max_concurrent})")
+
+        # Replace the AIMessage with truncated tool_calls (same id triggers replacement)
+        updated_msg = last_msg.model_copy(update={"tool_calls": truncated_tool_calls})
+        return {"messages": [updated_msg]}
+
+    @override
+    def after_model(self, state: AgentState, runtime: Runtime) -> dict | None:
+        return self._truncate_task_calls(state)
+
+    @override
+    async def aafter_model(self, state: AgentState, runtime: Runtime) -> dict | None:
+        return self._truncate_task_calls(state)
@@ -0,0 +1,90 @@
+from typing import NotRequired, override
+
+from langchain.agents import AgentState
+from langchain.agents.middleware import AgentMiddleware
+from langgraph.runtime import Runtime
+
+from src.agents.thread_state import ThreadDataState
+from src.config.paths import Paths, get_paths
+
+
+class ThreadDataMiddlewareState(AgentState):
+    """Compatible with the `ThreadState` schema."""
+
+    thread_data: NotRequired[ThreadDataState | None]
+
+
+class ThreadDataMiddleware(AgentMiddleware[ThreadDataMiddlewareState]):
+    """Create thread data directories for each thread execution.
+
+    Creates the following directory structure:
+    - {base_dir}/threads/{thread_id}/user-data/workspace
+    - {base_dir}/threads/{thread_id}/user-data/uploads
+    - {base_dir}/threads/{thread_id}/user-data/outputs
+
+    Lifecycle Management:
+    - With lazy_init=True (default): Only compute paths, directories created on-demand
+    - With lazy_init=False: Eagerly create directories in before_agent()
+    """
+
+    state_schema = ThreadDataMiddlewareState
+
+    def __init__(self, base_dir: str | None = None, lazy_init: bool = True):
+        """Initialize the middleware.
+
+        Args:
+            base_dir: Base directory for thread data. Defaults to Paths resolution.
+            lazy_init: If True, defer directory creation until needed.
+                      If False, create directories eagerly in before_agent().
+                      Default is True for optimal performance.
+        """
+        super().__init__()
+        self._paths = Paths(base_dir) if base_dir else get_paths()
+        self._lazy_init = lazy_init
+
+    def _get_thread_paths(self, thread_id: str) -> dict[str, str]:
+        """Get the paths for a thread's data directories.
+
+        Args:
+            thread_id: The thread ID.
+
+        Returns:
+            Dictionary with workspace_path, uploads_path, and outputs_path.
+        """
+        return {
+            "workspace_path": str(self._paths.sandbox_work_dir(thread_id)),
+            "uploads_path": str(self._paths.sandbox_uploads_dir(thread_id)),
+            "outputs_path": str(self._paths.sandbox_outputs_dir(thread_id)),
+        }
+
+    def _create_thread_directories(self, thread_id: str) -> dict[str, str]:
+        """Create the thread data directories.
+
+        Args:
+            thread_id: The thread ID.
+
+        Returns:
+            Dictionary with the created directory paths.
+        """
+        self._paths.ensure_thread_dirs(thread_id)
+        return self._get_thread_paths(thread_id)
+
+    @override
+    def before_agent(self, state: ThreadDataMiddlewareState, runtime: Runtime) -> dict | None:
+        thread_id = runtime.context.get("thread_id")
+        if thread_id is None:
+            raise ValueError("Thread ID is required in the context")
+
+        if self._lazy_init:
+            # Lazy initialization: only compute paths, don't create directories
+            paths = self._get_thread_paths(thread_id)
+        else:
+            # Eager initialization: create directories immediately
+            paths = self._create_thread_directories(thread_id)
+            print(f"Created thread data directories for thread {thread_id}")
+
+        return {
+            "thread_data": {
+                **paths,
+            }
+        }
@@ -0,0 +1,93 @@
+"""Middleware for automatic thread title generation."""
+
+from typing import NotRequired, override
+
+from langchain.agents import AgentState
+from langchain.agents.middleware import AgentMiddleware
+from langgraph.runtime import Runtime
+
+from src.config.title_config import get_title_config
+from src.models import create_chat_model
+
+
+class TitleMiddlewareState(AgentState):
+    """Compatible with the `ThreadState` schema."""
+
+    title: NotRequired[str | None]
+
+
+class TitleMiddleware(AgentMiddleware[TitleMiddlewareState]):
+    """Automatically generate a title for the thread after the first user message."""
+
+    state_schema = TitleMiddlewareState
+
+    def _should_generate_title(self, state: TitleMiddlewareState) -> bool:
+        """Check if we should generate a title for this thread."""
+        config = get_title_config()
+        if not config.enabled:
+            return False
+
+        # Check if thread already has a title in state
+        if state.get("title"):
+            return False
+
+        # Check if this is the first turn (has at least one user message and one assistant response)
+        messages = state.get("messages", [])
+        if len(messages) < 2:
+            return False
+
+        # Count user and assistant messages
+        user_messages = [m for m in messages if m.type == "human"]
+        assistant_messages = [m for m in messages if m.type == "ai"]
+
+        # Generate title after first complete exchange
+        return len(user_messages) == 1 and len(assistant_messages) >= 1
+
+    def _generate_title(self, state: TitleMiddlewareState) -> str:
+        """Generate a concise title based on the conversation."""
+        config = get_title_config()
+        messages = state.get("messages", [])
+
+        # Get first user message and first assistant response
+        user_msg_content = next((m.content for m in messages if m.type == "human"), "")
+        assistant_msg_content = next((m.content for m in messages if m.type == "ai"), "")
+
+        # Ensure content is string (LangChain messages can have list content)
+        user_msg = str(user_msg_content) if user_msg_content else ""
+        assistant_msg = str(assistant_msg_content) if assistant_msg_content else ""
+
+        # Use a lightweight model to generate title
+        model = create_chat_model(thinking_enabled=False)
+
+        prompt = config.prompt_template.format(
+            max_words=config.max_words,
+            user_msg=user_msg[:500],
+            assistant_msg=assistant_msg[:500],
+        )
+
+        try:
+            response = model.invoke(prompt)
+            # Ensure response content is string
+            title_content = str(response.content) if response.content else ""
+            title = title_content.strip().strip('"').strip("'")
+            # Limit to max characters
+            return title[: config.max_chars] if len(title) > config.max_chars else title
+        except Exception as e:
+            print(f"Failed to generate title: {e}")
+            # Fallback: use first part of user message (by character count)
+            fallback_chars = min(config.max_chars, 50)  # Use max_chars or 50, whichever is smaller
+            if len(user_msg) > fallback_chars:
+                return user_msg[:fallback_chars].rstrip() + "..."
+            return user_msg if user_msg else "New Conversation"
+
+    @override
+    def after_agent(self, state: TitleMiddlewareState, runtime: Runtime) -> dict | None:
+        """Generate and set thread title after the first agent response."""
+        if self._should_generate_title(state):
+            title = self._generate_title(state)
+            print(f"Generated thread title: {title}")
+
+            # Store title in state (will be persisted by checkpointer if configured)
+            return {"title": title}
+
+        return None
@@ -0,0 +1,204 @@
+"""Middleware to inject uploaded files information into agent context."""
+
+import logging
+from pathlib import Path
+from typing import NotRequired, override
+
+from langchain.agents import AgentState
+from langchain.agents.middleware import AgentMiddleware
+from langchain_core.messages import HumanMessage
+from langgraph.runtime import Runtime
+
+from src.config.paths import Paths, get_paths
+
+logger = logging.getLogger(__name__)
+
+
+class UploadsMiddlewareState(AgentState):
+    """State schema for uploads middleware."""
+
+    uploaded_files: NotRequired[list[dict] | None]
+
+
+class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
+    """Middleware to inject uploaded files information into the agent context.
+
+    Reads file metadata from the current message's additional_kwargs.files
+    (set by the frontend after upload) and prepends an <uploaded_files> block
+    to the last human message so the model knows which files are available.
+    """
+
+    state_schema = UploadsMiddlewareState
+
+    def __init__(self, base_dir: str | None = None):
+        """Initialize the middleware.
+
+        Args:
+            base_dir: Base directory for thread data. Defaults to Paths resolution.
+        """
+        super().__init__()
+        self._paths = Paths(base_dir) if base_dir else get_paths()
+
+    def _create_files_message(self, new_files: list[dict], historical_files: list[dict]) -> str:
+        """Create a formatted message listing uploaded files.
+
+        Args:
+            new_files: Files uploaded in the current message.
+            historical_files: Files uploaded in previous messages.
+
+        Returns:
+            Formatted string inside <uploaded_files> tags.
+        """
+        lines = ["<uploaded_files>"]
+
+        lines.append("The following files were uploaded in this message:")
+        lines.append("")
+        if new_files:
+            for file in new_files:
+                size_kb = file["size"] / 1024
+                size_str = f"{size_kb:.1f} KB" if size_kb < 1024 else f"{size_kb / 1024:.1f} MB"
+                lines.append(f"- {file['filename']} ({size_str})")
+                lines.append(f"  Path: {file['path']}")
+                lines.append("")
+        else:
+            lines.append("(empty)")
+
+        if historical_files:
+            lines.append("The following files were uploaded in previous messages and are still available:")
+            lines.append("")
+            for file in historical_files:
+                size_kb = file["size"] / 1024
+                size_str = f"{size_kb:.1f} KB" if size_kb < 1024 else f"{size_kb / 1024:.1f} MB"
+                lines.append(f"- {file['filename']} ({size_str})")
+                lines.append(f"  Path: {file['path']}")
+                lines.append("")
+
+        lines.append("You can read these files using the `read_file` tool with the paths shown above.")
+        lines.append("</uploaded_files>")
+
+        return "\n".join(lines)
+
+    def _files_from_kwargs(self, message: HumanMessage, uploads_dir: Path | None = None) -> list[dict] | None:
+        """Extract file info from message additional_kwargs.files.
+
+        The frontend sends uploaded file metadata in additional_kwargs.files
+        after a successful upload. Each entry has: filename, size (bytes),
+        path (virtual path), status.
+
+        Args:
+            message: The human message to inspect.
+            uploads_dir: Physical uploads directory used to verify file existence.
+                         When provided, entries whose files no longer exist are skipped.
+
+        Returns:
+            List of file dicts with virtual paths, or None if the field is absent or empty.
+        """
+        kwargs_files = (message.additional_kwargs or {}).get("files")
+        if not isinstance(kwargs_files, list) or not kwargs_files:
+            return None
+
+        files = []
+        for f in kwargs_files:
+            if not isinstance(f, dict):
+                continue
+            filename = f.get("filename") or ""
+            if not filename or Path(filename).name != filename:
+                continue
+            if uploads_dir is not None and not (uploads_dir / filename).is_file():
+                continue
+            files.append(
+                {
+                    "filename": filename,
+                    "size": int(f.get("size") or 0),
+                    "path": f"/mnt/user-data/uploads/{filename}",
+                    "extension": Path(filename).suffix,
+                }
+            )
+        return files if files else None
+
+    @override
+    def before_agent(self, state: UploadsMiddlewareState, runtime: Runtime) -> dict | None:
+        """Inject uploaded files information before agent execution.
+
+        New files come from the current message's additional_kwargs.files.
+        Historical files are scanned from the thread's uploads directory,
+        excluding the new ones.
+
+        Prepends <uploaded_files> context to the last human message content.
+        The original additional_kwargs (including files metadata) is preserved
+        on the updated message so the frontend can read it from the stream.
+
+        Args:
+            state: Current agent state.
+            runtime: Runtime context containing thread_id.
+
+        Returns:
+            State updates including uploaded files list.
+        """
+        messages = list(state.get("messages", []))
+        if not messages:
+            return None
+
+        last_message_index = len(messages) - 1
+        last_message = messages[last_message_index]
+
+        if not isinstance(last_message, HumanMessage):
+            return None
+
+        # Resolve uploads directory for existence checks
+        thread_id = runtime.context.get("thread_id")
+        uploads_dir = self._paths.sandbox_uploads_dir(thread_id) if thread_id else None
+
+        # Get newly uploaded files from the current message's additional_kwargs.files
+        new_files = self._files_from_kwargs(last_message, uploads_dir) or []
+
+        # Collect historical files from the uploads directory (all except the new ones)
+        new_filenames = {f["filename"] for f in new_files}
+        historical_files: list[dict] = []
+        if uploads_dir and uploads_dir.exists():
+            for file_path in sorted(uploads_dir.iterdir()):
+                if file_path.is_file() and file_path.name not in new_filenames:
+                    stat = file_path.stat()
+                    historical_files.append(
+                        {
+                            "filename": file_path.name,
+                            "size": stat.st_size,
+                            "path": f"/mnt/user-data/uploads/{file_path.name}",
+                            "extension": file_path.suffix,
+                        }
+                    )
+
+        if not new_files and not historical_files:
+            return None
+
+        logger.debug(f"New files: {[f['filename'] for f in new_files]}, historical: {[f['filename'] for f in historical_files]}")
+
+        # Create files message and prepend to the last human message content
+        files_message = self._create_files_message(new_files, historical_files)
+
+        # Extract original content - handle both string and list formats
+        original_content = ""
+        if isinstance(last_message.content, str):
+            original_content = last_message.content
+        elif isinstance(last_message.content, list):
+            text_parts = []
+            for block in last_message.content:
+                if isinstance(block, dict) and block.get("type") == "text":
+                    text_parts.append(block.get("text", ""))
+            original_content = "\n".join(text_parts)
+
+        # Create new message with combined content.
+        # Preserve additional_kwargs (including files metadata) so the frontend
+        # can read structured file info from the streamed message.
+        updated_message = HumanMessage(
+            content=f"{files_message}\n\n{original_content}",
+            id=last_message.id,
+            additional_kwargs=last_message.additional_kwargs,
+        )
+
+        messages[last_message_index] = updated_message
+
+        return {
+            "uploaded_files": new_files,
+            "messages": messages,
+        }
@@ -0,0 +1,221 @@
+"""Middleware for injecting image details into conversation before LLM call."""
+
+from typing import NotRequired, override
+
+from langchain.agents import AgentState
+from langchain.agents.middleware import AgentMiddleware
+from langchain_core.messages import AIMessage, HumanMessage, ToolMessage
+from langgraph.runtime import Runtime
+
+from src.agents.thread_state import ViewedImageData
+
+
+class ViewImageMiddlewareState(AgentState):
+    """Compatible with the `ThreadState` schema."""
+
+    viewed_images: NotRequired[dict[str, ViewedImageData] | None]
+
+
+class ViewImageMiddleware(AgentMiddleware[ViewImageMiddlewareState]):
+    """Injects image details as a human message before LLM calls when view_image tools have completed.
+
+    This middleware:
+    1. Runs before each LLM call
+    2. Checks if the last assistant message contains view_image tool calls
+    3. Verifies all tool calls in that message have been completed (have corresponding ToolMessages)
+    4. If conditions are met, creates a human message with all viewed image details (including base64 data)
+    5. Adds the message to state so the LLM can see and analyze the images
+
+    This enables the LLM to automatically receive and analyze images that were loaded via view_image tool,
+    without requiring explicit user prompts to describe the images.
+    """
+
+    state_schema = ViewImageMiddlewareState
+
+    def _get_last_assistant_message(self, messages: list) -> AIMessage | None:
+        """Get the last assistant message from the message list.
+
+        Args:
+            messages: List of messages
+
+        Returns:
+            Last AIMessage or None if not found
+        """
+        for msg in reversed(messages):
+            if isinstance(msg, AIMessage):
+                return msg
+        return None
+
+    def _has_view_image_tool(self, message: AIMessage) -> bool:
+        """Check if the assistant message contains view_image tool calls.
+
+        Args:
+            message: Assistant message to check
+
+        Returns:
+            True if message contains view_image tool calls
+        """
+        if not hasattr(message, "tool_calls") or not message.tool_calls:
+            return False
+
+        return any(tool_call.get("name") == "view_image" for tool_call in message.tool_calls)
+
+    def _all_tools_completed(self, messages: list, assistant_msg: AIMessage) -> bool:
+        """Check if all tool calls in the assistant message have been completed.
+
+        Args:
+            messages: List of all messages
+            assistant_msg: The assistant message containing tool calls
+
+        Returns:
+            True if all tool calls have corresponding ToolMessages
+        """
+        if not hasattr(assistant_msg, "tool_calls") or not assistant_msg.tool_calls:
+            return False
+
+        # Get all tool call IDs from the assistant message
+        tool_call_ids = {tool_call.get("id") for tool_call in assistant_msg.tool_calls if tool_call.get("id")}
+
+        # Find the index of the assistant message
+        try:
+            assistant_idx = messages.index(assistant_msg)
+        except ValueError:
+            return False
+
+        # Get all ToolMessages after the assistant message
+        completed_tool_ids = set()
+        for msg in messages[assistant_idx + 1 :]:
+            if isinstance(msg, ToolMessage) and msg.tool_call_id:
+                completed_tool_ids.add(msg.tool_call_id)
+
+        # Check if all tool calls have been completed
+        return tool_call_ids.issubset(completed_tool_ids)
+
+    def _create_image_details_message(self, state: ViewImageMiddlewareState) -> list[str | dict]:
+        """Create a formatted message with all viewed image details.
+
+        Args:
+            state: Current state containing viewed_images
+
+        Returns:
+            List of content blocks (text and images) for the HumanMessage
+        """
+        viewed_images = state.get("viewed_images", {})
+        if not viewed_images:
+            return ["No images have been viewed."]
+
+        # Build the message with image information
+        content_blocks: list[str | dict] = [{"type": "text", "text": "Here are the images you've viewed:"}]
+
+        for image_path, image_data in viewed_images.items():
+            mime_type = image_data.get("mime_type", "unknown")
+            base64_data = image_data.get("base64", "")
+
+            # Add text description
+            content_blocks.append({"type": "text", "text": f"\n- **{image_path}** ({mime_type})"})
+
+            # Add the actual image data so LLM can "see" it
+            if base64_data:
+                content_blocks.append(
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": f"data:{mime_type};base64,{base64_data}"},
+                    }
+                )
+
+        return content_blocks
+
+    def _should_inject_image_message(self, state: ViewImageMiddlewareState) -> bool:
+        """Determine if we should inject an image details message.
+
+        Args:
+            state: Current state
+
+        Returns:
+            True if we should inject the message
+        """
+        messages = state.get("messages", [])
+        if not messages:
+            return False
+
+        # Get the last assistant message
+        last_assistant_msg = self._get_last_assistant_message(messages)
+        if not last_assistant_msg:
+            return False
+
+        # Check if it has view_image tool calls
+        if not self._has_view_image_tool(last_assistant_msg):
+            return False
+
+        # Check if all tools have been completed
+        if not self._all_tools_completed(messages, last_assistant_msg):
+            return False
+
+        # Check if we've already added an image details message
+        # Look for a human message after the last assistant message that contains image details
+        assistant_idx = messages.index(last_assistant_msg)
+        for msg in messages[assistant_idx + 1 :]:
+            if isinstance(msg, HumanMessage):
+                content_str = str(msg.content)
+                if "Here are the images you've viewed" in content_str or "Here are the details of the images you've viewed" in content_str:
+                    # Already added, don't add again
+                    return False
+
+        return True
+
+    def _inject_image_message(self, state: ViewImageMiddlewareState) -> dict | None:
+        """Internal helper to inject image details message.
+
+        Args:
+            state: Current state
+
+        Returns:
+            State update with additional human message, or None if no update needed
+        """
+        if not self._should_inject_image_message(state):
+            return None
+
+        # Create the image details message with text and image content
+        image_content = self._create_image_details_message(state)
+
+        # Create a new human message with mixed content (text + images)
+        human_msg = HumanMessage(content=image_content)
+
+        print("[ViewImageMiddleware] Injecting image details message with images before LLM call")
+
+        # Return state update with the new message
+        return {"messages": [human_msg]}
+
+    @override
+    def before_model(self, state: ViewImageMiddlewareState, runtime: Runtime) -> dict | None:
+        """Inject image details message before LLM call if view_image tools have completed (sync version).
+
+        This runs before each LLM call, checking if the previous turn included view_image
+        tool calls that have all completed. If so, it injects a human message with the image
+        details so the LLM can see and analyze the images.
+
+        Args:
+            state: Current state
+            runtime: Runtime context (unused but required by interface)
+
+        Returns:
+            State update with additional human message, or None if no update needed
+        """
+        return self._inject_image_message(state)
+
+    @override
+    async def abefore_model(self, state: ViewImageMiddlewareState, runtime: Runtime) -> dict | None:
+        """Inject image details message before LLM call if view_image tools have completed (async version).
+
+        This runs before each LLM call, checking if the previous turn included view_image
+        tool calls that have all completed. If so, it injects a human message with the image
+        details so the LLM can see and analyze the images.
+
+        Args:
+            state: Current state
+            runtime: Runtime context (unused but required by interface)
+
+        Returns:
+            State update with additional human message, or None if no update needed
+        """
+        return self._inject_image_message(state)
@@ -0,0 +1,55 @@
+from typing import Annotated, NotRequired, TypedDict
+
+from langchain.agents import AgentState
+
+
+class SandboxState(TypedDict):
+    sandbox_id: NotRequired[str | None]
+
+
+class ThreadDataState(TypedDict):
+    workspace_path: NotRequired[str | None]
+    uploads_path: NotRequired[str | None]
+    outputs_path: NotRequired[str | None]
+
+
+class ViewedImageData(TypedDict):
+    base64: str
+    mime_type: str
+
+
+def merge_artifacts(existing: list[str] | None, new: list[str] | None) -> list[str]:
+    """Reducer for artifacts list - merges and deduplicates artifacts."""
+    if existing is None:
+        return new or []
+    if new is None:
+        return existing
+    # Use dict.fromkeys to deduplicate while preserving order
+    return list(dict.fromkeys(existing + new))
+
+
+def merge_viewed_images(existing: dict[str, ViewedImageData] | None, new: dict[str, ViewedImageData] | None) -> dict[str, ViewedImageData]:
+    """Reducer for viewed_images dict - merges image dictionaries.
+
+    Special case: If new is an empty dict {}, it clears the existing images.
+    This allows middlewares to clear the viewed_images state after processing.
+    """
+    if existing is None:
+        return new or {}
+    if new is None:
+        return existing
+    # Special case: empty dict means clear all viewed images
+    if len(new) == 0:
+        return {}
+    # Merge dictionaries, new values override existing ones for same keys
+    return {**existing, **new}
+
+
+class ThreadState(AgentState):
+    sandbox: NotRequired[SandboxState | None]
+    thread_data: NotRequired[ThreadDataState | None]
+    title: NotRequired[str | None]
+    artifacts: Annotated[list[str], merge_artifacts]
+    todos: NotRequired[list | None]
+    uploaded_files: NotRequired[list[dict] | None]
+    viewed_images: Annotated[dict[str, ViewedImageData], merge_viewed_images]  # image_path -> {base64, mime_type}
@@ -0,0 +1,875 @@
+"""DeerFlowClient — Embedded Python client for DeerFlow agent system.
+
+Provides direct programmatic access to DeerFlow's agent capabilities
+without requiring LangGraph Server or Gateway API processes.
+
+Usage:
+    from src.client import DeerFlowClient
+
+    client = DeerFlowClient()
+    response = client.chat("Analyze this paper for me", thread_id="my-thread")
+    print(response)
+
+    # Streaming
+    for event in client.stream("hello"):
+        print(event)
+"""
+
+import asyncio
+import json
+import logging
+import mimetypes
+import re
+import shutil
+import tempfile
+import uuid
+import zipfile
+from collections.abc import Generator
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any
+
+from langchain.agents import create_agent
+from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage
+from langchain_core.runnables import RunnableConfig
+
+from src.agents.lead_agent.agent import _build_middlewares
+from src.agents.lead_agent.prompt import apply_prompt_template
+from src.agents.thread_state import ThreadState
+from src.config.app_config import get_app_config, reload_app_config
+from src.config.extensions_config import ExtensionsConfig, SkillStateConfig, get_extensions_config, reload_extensions_config
+from src.config.paths import get_paths
+from src.models import create_chat_model
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class StreamEvent:
+    """A single event from the streaming agent response.
+
+    Event types align with the LangGraph SSE protocol:
+        - ``"values"``: Full state snapshot (title, messages, artifacts).
+        - ``"messages-tuple"``: Per-message update (AI text, tool calls, tool results).
+        - ``"end"``: Stream finished.
+
+    Attributes:
+        type: Event type.
+        data: Event payload. Contents vary by type.
+    """
+
+    type: str
+    data: dict[str, Any] = field(default_factory=dict)
+
+
+class DeerFlowClient:
+    """Embedded Python client for DeerFlow agent system.
+
+    Provides direct programmatic access to DeerFlow's agent capabilities
+    without requiring LangGraph Server or Gateway API processes.
+
+    Note:
+        Multi-turn conversations require a ``checkpointer``. Without one,
+        each ``stream()`` / ``chat()`` call is stateless — ``thread_id``
+        is only used for file isolation (uploads / artifacts).
+
+        The system prompt (including date, memory, and skills context) is
+        generated when the internal agent is first created and cached until
+        the configuration key changes. Call :meth:`reset_agent` to force
+        a refresh in long-running processes.
+
+    Example::
+
+        from src.client import DeerFlowClient
+
+        client = DeerFlowClient()
+
+        # Simple one-shot
+        print(client.chat("hello"))
+
+        # Streaming
+        for event in client.stream("hello"):
+            print(event.type, event.data)
+
+        # Configuration queries
+        print(client.list_models())
+        print(client.list_skills())
+    """
+
+    def __init__(
+        self,
+        config_path: str | None = None,
+        checkpointer=None,
+        *,
+        model_name: str | None = None,
+        thinking_enabled: bool = True,
+        subagent_enabled: bool = False,
+        plan_mode: bool = False,
+    ):
+        """Initialize the client.
+
+        Loads configuration but defers agent creation to first use.
+
+        Args:
+            config_path: Path to config.yaml. Uses default resolution if None.
+            checkpointer: LangGraph checkpointer instance for state persistence.
+                Required for multi-turn conversations on the same thread_id.
+                Without a checkpointer, each call is stateless.
+            model_name: Override the default model name from config.
+            thinking_enabled: Enable model's extended thinking.
+            subagent_enabled: Enable subagent delegation.
+            plan_mode: Enable TodoList middleware for plan mode.
+        """
+        if config_path is not None:
+            reload_app_config(config_path)
+        self._app_config = get_app_config()
+
+        self._checkpointer = checkpointer
+        self._model_name = model_name
+        self._thinking_enabled = thinking_enabled
+        self._subagent_enabled = subagent_enabled
+        self._plan_mode = plan_mode
+
+        # Lazy agent — created on first call, recreated when config changes.
+        self._agent = None
+        self._agent_config_key: tuple | None = None
+
+    def reset_agent(self) -> None:
+        """Force the internal agent to be recreated on the next call.
+
+        Use this after external changes (e.g. memory updates, skill
+        installations) that should be reflected in the system prompt
+        or tool set.
+        """
+        self._agent = None
+        self._agent_config_key = None
+
+    # ------------------------------------------------------------------
+    # Internal helpers
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _atomic_write_json(path: Path, data: dict) -> None:
+        """Write JSON to *path* atomically (temp file + replace)."""
+        fd = tempfile.NamedTemporaryFile(
+            mode="w", dir=path.parent, suffix=".tmp", delete=False,
+        )
+        try:
+            json.dump(data, fd, indent=2)
+            fd.close()
+            Path(fd.name).replace(path)
+        except BaseException:
+            fd.close()
+            Path(fd.name).unlink(missing_ok=True)
+            raise
+
+    def _get_runnable_config(self, thread_id: str, **overrides) -> RunnableConfig:
+        """Build a RunnableConfig for agent invocation."""
+        configurable = {
+            "thread_id": thread_id,
+            "model_name": overrides.get("model_name", self._model_name),
+            "thinking_enabled": overrides.get("thinking_enabled", self._thinking_enabled),
+            "is_plan_mode": overrides.get("plan_mode", self._plan_mode),
+            "subagent_enabled": overrides.get("subagent_enabled", self._subagent_enabled),
+        }
+        return RunnableConfig(
+            configurable=configurable,
+            recursion_limit=overrides.get("recursion_limit", 100),
+        )
+
+    def _ensure_agent(self, config: RunnableConfig):
+        """Create (or recreate) the agent when config-dependent params change."""
+        cfg = config.get("configurable", {})
+        key = (
+            cfg.get("model_name"),
+            cfg.get("thinking_enabled"),
+            cfg.get("is_plan_mode"),
+            cfg.get("subagent_enabled"),
+        )
+
+        if self._agent is not None and self._agent_config_key == key:
+            return
+
+        thinking_enabled = cfg.get("thinking_enabled", True)
+        model_name = cfg.get("model_name")
+        subagent_enabled = cfg.get("subagent_enabled", False)
+        max_concurrent_subagents = cfg.get("max_concurrent_subagents", 3)
+
+        kwargs: dict[str, Any] = {
+            "model": create_chat_model(name=model_name, thinking_enabled=thinking_enabled),
+            "tools": self._get_tools(model_name=model_name, subagent_enabled=subagent_enabled),
+            "middleware": _build_middlewares(config, model_name=model_name),
+            "system_prompt": apply_prompt_template(
+                subagent_enabled=subagent_enabled,
+                max_concurrent_subagents=max_concurrent_subagents,
+            ),
+            "state_schema": ThreadState,
+        }
+        if self._checkpointer is not None:
+            kwargs["checkpointer"] = self._checkpointer
+
+        self._agent = create_agent(**kwargs)
+        self._agent_config_key = key
+        logger.info("Agent created: model=%s, thinking=%s", model_name, thinking_enabled)
+
+    @staticmethod
+    def _get_tools(*, model_name: str | None, subagent_enabled: bool):
+        """Lazy import to avoid circular dependency at module level."""
+        from src.tools import get_available_tools
+
+        return get_available_tools(model_name=model_name, subagent_enabled=subagent_enabled)
+
+    @staticmethod
+    def _serialize_message(msg) -> dict:
+        """Serialize a LangChain message to a plain dict for values events."""
+        if isinstance(msg, AIMessage):
+            d: dict[str, Any] = {"type": "ai", "content": msg.content, "id": getattr(msg, "id", None)}
+            if msg.tool_calls:
+                d["tool_calls"] = [{"name": tc["name"], "args": tc["args"], "id": tc.get("id")} for tc in msg.tool_calls]
+            return d
+        if isinstance(msg, ToolMessage):
+            return {
+                "type": "tool",
+                "content": msg.content if isinstance(msg.content, str) else str(msg.content),
+                "name": getattr(msg, "name", None),
+                "tool_call_id": getattr(msg, "tool_call_id", None),
+                "id": getattr(msg, "id", None),
+            }
+        if isinstance(msg, HumanMessage):
+            return {"type": "human", "content": msg.content, "id": getattr(msg, "id", None)}
+        if isinstance(msg, SystemMessage):
+            return {"type": "system", "content": msg.content, "id": getattr(msg, "id", None)}
+        return {"type": "unknown", "content": str(msg), "id": getattr(msg, "id", None)}
+
+    @staticmethod
+    def _extract_text(content) -> str:
+        """Extract plain text from AIMessage content (str or list of blocks)."""
+        if isinstance(content, str):
+            return content
+        if isinstance(content, list):
+            parts = []
+            for block in content:
+                if isinstance(block, str):
+                    parts.append(block)
+                elif isinstance(block, dict) and block.get("type") == "text":
+                    parts.append(block["text"])
+            return "\n".join(parts) if parts else ""
+        return str(content)
+
+    # ------------------------------------------------------------------
+    # Public API — conversation
+    # ------------------------------------------------------------------
+
+    def stream(
+        self,
+        message: str,
+        *,
+        thread_id: str | None = None,
+        **kwargs,
+    ) -> Generator[StreamEvent, None, None]:
+        """Stream a conversation turn, yielding events incrementally.
+
+        Each call sends one user message and yields events until the agent
+        finishes its turn. A ``checkpointer`` must be provided at init time
+        for multi-turn context to be preserved across calls.
+
+        Event types align with the LangGraph SSE protocol so that
+        consumers can switch between HTTP streaming and embedded mode
+        without changing their event-handling logic.
+
+        Args:
+            message: User message text.
+            thread_id: Thread ID for conversation context. Auto-generated if None.
+            **kwargs: Override client defaults (model_name, thinking_enabled,
+                plan_mode, subagent_enabled, recursion_limit).
+
+        Yields:
+            StreamEvent with one of:
+            - type="values"          data={"title": str|None, "messages": [...], "artifacts": [...]}
+            - type="messages-tuple"  data={"type": "ai", "content": str, "id": str}
+            - type="messages-tuple"  data={"type": "ai", "content": "", "id": str, "tool_calls": [...]}
+            - type="messages-tuple"  data={"type": "tool", "content": str, "name": str, "tool_call_id": str, "id": str}
+            - type="end"             data={}
+        """
+        if thread_id is None:
+            thread_id = str(uuid.uuid4())
+
+        config = self._get_runnable_config(thread_id, **kwargs)
+        self._ensure_agent(config)
+
+        state: dict[str, Any] = {"messages": [HumanMessage(content=message)]}
+        context = {"thread_id": thread_id}
+
+        seen_ids: set[str] = set()
+
+        for chunk in self._agent.stream(state, config=config, context=context, stream_mode="values"):
+            messages = chunk.get("messages", [])
+
+            for msg in messages:
+                msg_id = getattr(msg, "id", None)
+                if msg_id and msg_id in seen_ids:
+                    continue
+                if msg_id:
+                    seen_ids.add(msg_id)
+
+                if isinstance(msg, AIMessage):
+                    if msg.tool_calls:
+                        yield StreamEvent(
+                            type="messages-tuple",
+                            data={
+                                "type": "ai",
+                                "content": "",
+                                "id": msg_id,
+                                "tool_calls": [
+                                    {"name": tc["name"], "args": tc["args"], "id": tc.get("id")}
+                                    for tc in msg.tool_calls
+                                ],
+                            },
+                        )
+
+                    text = self._extract_text(msg.content)
+                    if text:
+                        yield StreamEvent(
+                            type="messages-tuple",
+                            data={"type": "ai", "content": text, "id": msg_id},
+                        )
+
+                elif isinstance(msg, ToolMessage):
+                    yield StreamEvent(
+                        type="messages-tuple",
+                        data={
+                            "type": "tool",
+                            "content": msg.content if isinstance(msg.content, str) else str(msg.content),
+                            "name": getattr(msg, "name", None),
+                            "tool_call_id": getattr(msg, "tool_call_id", None),
+                            "id": msg_id,
+                        },
+                    )
+
+            # Emit a values event for each state snapshot
+            yield StreamEvent(
+                type="values",
+                data={
+                    "title": chunk.get("title"),
+                    "messages": [self._serialize_message(m) for m in messages],
+                    "artifacts": chunk.get("artifacts", []),
+                },
+            )
+
+        yield StreamEvent(type="end", data={})
+
+    def chat(self, message: str, *, thread_id: str | None = None, **kwargs) -> str:
+        """Send a message and return the final text response.
+
+        Convenience wrapper around :meth:`stream` that returns only the
+        **last** AI text from ``messages-tuple`` events. If the agent emits
+        multiple text segments in one turn, intermediate segments are
+        discarded. Use :meth:`stream` directly to capture all events.
+
+        Args:
+            message: User message text.
+            thread_id: Thread ID for conversation context. Auto-generated if None.
+            **kwargs: Override client defaults (same as stream()).
+
+        Returns:
+            The last AI message text, or empty string if no response.
+        """
+        last_text = ""
+        for event in self.stream(message, thread_id=thread_id, **kwargs):
+            if event.type == "messages-tuple" and event.data.get("type") == "ai":
+                content = event.data.get("content", "")
+                if content:
+                    last_text = content
+        return last_text
+
+    # ------------------------------------------------------------------
+    # Public API — configuration queries
+    # ------------------------------------------------------------------
+
+    def list_models(self) -> dict:
+        """List available models from configuration.
+
+        Returns:
+            Dict with "models" key containing list of model info dicts,
+            matching the Gateway API ``ModelsListResponse`` schema.
+        """
+        return {
+            "models": [
+                {
+                    "name": model.name,
+                    "display_name": getattr(model, "display_name", None),
+                    "description": getattr(model, "description", None),
+                    "supports_thinking": getattr(model, "supports_thinking", False),
+                    "supports_reasoning_effort": getattr(model, "supports_reasoning_effort", False),
+                }
+                for model in self._app_config.models
+            ]
+        }
+
+    def list_skills(self, enabled_only: bool = False) -> dict:
+        """List available skills.
+
+        Args:
+            enabled_only: If True, only return enabled skills.
+
+        Returns:
+            Dict with "skills" key containing list of skill info dicts,
+            matching the Gateway API ``SkillsListResponse`` schema.
+        """
+        from src.skills.loader import load_skills
+
+        return {
+            "skills": [
+                {
+                    "name": s.name,
+                    "description": s.description,
+                    "license": s.license,
+                    "category": s.category,
+                    "enabled": s.enabled,
+                }
+                for s in load_skills(enabled_only=enabled_only)
+            ]
+        }
+
+    def get_memory(self) -> dict:
+        """Get current memory data.
+
+        Returns:
+            Memory data dict (see src/agents/memory/updater.py for structure).
+        """
+        from src.agents.memory.updater import get_memory_data
+
+        return get_memory_data()
+
+    def get_model(self, name: str) -> dict | None:
+        """Get a specific model's configuration by name.
+
+        Args:
+            name: Model name.
+
+        Returns:
+            Model info dict matching the Gateway API ``ModelResponse``
+            schema, or None if not found.
+        """
+        model = self._app_config.get_model_config(name)
+        if model is None:
+            return None
+        return {
+            "name": model.name,
+            "display_name": getattr(model, "display_name", None),
+            "description": getattr(model, "description", None),
+            "supports_thinking": getattr(model, "supports_thinking", False),
+            "supports_reasoning_effort": getattr(model, "supports_reasoning_effort", False),
+        }
+
+    # ------------------------------------------------------------------
+    # Public API — MCP configuration
+    # ------------------------------------------------------------------
+
+    def get_mcp_config(self) -> dict:
+        """Get MCP server configurations.
+
+        Returns:
+            Dict with "mcp_servers" key mapping server name to config,
+            matching the Gateway API ``McpConfigResponse`` schema.
+        """
+        config = get_extensions_config()
+        return {"mcp_servers": {name: server.model_dump() for name, server in config.mcp_servers.items()}}
+
+    def update_mcp_config(self, mcp_servers: dict[str, dict]) -> dict:
+        """Update MCP server configurations.
+
+        Writes to extensions_config.json and reloads the cache.
+
+        Args:
+            mcp_servers: Dict mapping server name to config dict.
+                Each value should contain keys like enabled, type, command, args, env, url, etc.
+
+        Returns:
+            Dict with "mcp_servers" key, matching the Gateway API
+            ``McpConfigResponse`` schema.
+
+        Raises:
+            OSError: If the config file cannot be written.
+        """
+        config_path = ExtensionsConfig.resolve_config_path()
+        if config_path is None:
+            raise FileNotFoundError(
+                "Cannot locate extensions_config.json. "
+                "Set DEER_FLOW_EXTENSIONS_CONFIG_PATH or ensure it exists in the project root."
+            )
+
+        current_config = get_extensions_config()
+
+        config_data = {
+            "mcpServers": mcp_servers,
+            "skills": {name: {"enabled": skill.enabled} for name, skill in current_config.skills.items()},
+        }
+
+        self._atomic_write_json(config_path, config_data)
+
+        self._agent = None
+        reloaded = reload_extensions_config()
+        return {"mcp_servers": {name: server.model_dump() for name, server in reloaded.mcp_servers.items()}}
+
+    # ------------------------------------------------------------------
+    # Public API — skills management
+    # ------------------------------------------------------------------
+
+    def get_skill(self, name: str) -> dict | None:
+        """Get a specific skill by name.
+
+        Args:
+            name: Skill name.
+
+        Returns:
+            Skill info dict, or None if not found.
+        """
+        from src.skills.loader import load_skills
+
+        skill = next((s for s in load_skills(enabled_only=False) if s.name == name), None)
+        if skill is None:
+            return None
+        return {
+            "name": skill.name,
+            "description": skill.description,
+            "license": skill.license,
+            "category": skill.category,
+            "enabled": skill.enabled,
+        }
+
+    def update_skill(self, name: str, *, enabled: bool) -> dict:
+        """Update a skill's enabled status.
+
+        Args:
+            name: Skill name.
+            enabled: New enabled status.
+
+        Returns:
+            Updated skill info dict.
+
+        Raises:
+            ValueError: If the skill is not found.
+            OSError: If the config file cannot be written.
+        """
+        from src.skills.loader import load_skills
+
+        skills = load_skills(enabled_only=False)
+        skill = next((s for s in skills if s.name == name), None)
+        if skill is None:
+            raise ValueError(f"Skill '{name}' not found")
+
+        config_path = ExtensionsConfig.resolve_config_path()
+        if config_path is None:
+            raise FileNotFoundError(
+                "Cannot locate extensions_config.json. "
+                "Set DEER_FLOW_EXTENSIONS_CONFIG_PATH or ensure it exists in the project root."
+            )
+
+        extensions_config = get_extensions_config()
+        extensions_config.skills[name] = SkillStateConfig(enabled=enabled)
+
+        config_data = {
+            "mcpServers": {n: s.model_dump() for n, s in extensions_config.mcp_servers.items()},
+            "skills": {n: {"enabled": sc.enabled} for n, sc in extensions_config.skills.items()},
+        }
+
+        self._atomic_write_json(config_path, config_data)
+
+        self._agent = None
+        reload_extensions_config()
+
+        updated = next((s for s in load_skills(enabled_only=False) if s.name == name), None)
+        if updated is None:
+            raise RuntimeError(f"Skill '{name}' disappeared after update")
+        return {
+            "name": updated.name,
+            "description": updated.description,
+            "license": updated.license,
+            "category": updated.category,
+            "enabled": updated.enabled,
+        }
+
+    def install_skill(self, skill_path: str | Path) -> dict:
+        """Install a skill from a .skill archive (ZIP).
+
+        Args:
+            skill_path: Path to the .skill file.
+
+        Returns:
+            Dict with success, skill_name, message.
+
+        Raises:
+            FileNotFoundError: If the file does not exist.
+            ValueError: If the file is invalid.
+        """
+        from src.gateway.routers.skills import _validate_skill_frontmatter
+        from src.skills.loader import get_skills_root_path
+
+        path = Path(skill_path)
+        if not path.exists():
+            raise FileNotFoundError(f"Skill file not found: {skill_path}")
+        if not path.is_file():
+            raise ValueError(f"Path is not a file: {skill_path}")
+        if path.suffix != ".skill":
+            raise ValueError("File must have .skill extension")
+        if not zipfile.is_zipfile(path):
+            raise ValueError("File is not a valid ZIP archive")
+
+        skills_root = get_skills_root_path()
+        custom_dir = skills_root / "custom"
+        custom_dir.mkdir(parents=True, exist_ok=True)
+
+        with tempfile.TemporaryDirectory() as tmp:
+            tmp_path = Path(tmp)
+            with zipfile.ZipFile(path, "r") as zf:
+                total_size = sum(info.file_size for info in zf.infolist())
+                if total_size > 100 * 1024 * 1024:
+                    raise ValueError("Skill archive too large when extracted (>100MB)")
+                for info in zf.infolist():
+                    if Path(info.filename).is_absolute() or ".." in Path(info.filename).parts:
+                        raise ValueError(f"Unsafe path in archive: {info.filename}")
+                zf.extractall(tmp_path)
+            for p in tmp_path.rglob("*"):
+                if p.is_symlink():
+                    p.unlink()
+
+            items = list(tmp_path.iterdir())
+            if not items:
+                raise ValueError("Skill archive is empty")
+
+            skill_dir = items[0] if len(items) == 1 and items[0].is_dir() else tmp_path
+
+            is_valid, message, skill_name = _validate_skill_frontmatter(skill_dir)
+            if not is_valid:
+                raise ValueError(f"Invalid skill: {message}")
+            if not re.fullmatch(r"[a-zA-Z0-9_-]+", skill_name):
+                raise ValueError(f"Invalid skill name: {skill_name}")
+
+            target = custom_dir / skill_name
+            if target.exists():
+                raise ValueError(f"Skill '{skill_name}' already exists")
+
+            shutil.copytree(skill_dir, target)
+
+        return {"success": True, "skill_name": skill_name, "message": f"Skill '{skill_name}' installed successfully"}
+
+    # ------------------------------------------------------------------
+    # Public API — memory management
+    # ------------------------------------------------------------------
+
+    def reload_memory(self) -> dict:
+        """Reload memory data from file, forcing cache invalidation.
+
+        Returns:
+            The reloaded memory data dict.
+        """
+        from src.agents.memory.updater import reload_memory_data
+
+        return reload_memory_data()
+
+    def get_memory_config(self) -> dict:
+        """Get memory system configuration.
+
+        Returns:
+            Memory config dict.
+        """
+        from src.config.memory_config import get_memory_config
+
+        config = get_memory_config()
+        return {
+            "enabled": config.enabled,
+            "storage_path": config.storage_path,
+            "debounce_seconds": config.debounce_seconds,
+            "max_facts": config.max_facts,
+            "fact_confidence_threshold": config.fact_confidence_threshold,
+            "injection_enabled": config.injection_enabled,
+            "max_injection_tokens": config.max_injection_tokens,
+        }
+
+    def get_memory_status(self) -> dict:
+        """Get memory status: config + current data.
+
+        Returns:
+            Dict with "config" and "data" keys.
+        """
+        return {
+            "config": self.get_memory_config(),
+            "data": self.get_memory(),
+        }
+
+    # ------------------------------------------------------------------
+    # Public API — file uploads
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _get_uploads_dir(thread_id: str) -> Path:
+        """Get (and create) the uploads directory for a thread."""
+        base = get_paths().sandbox_uploads_dir(thread_id)
+        base.mkdir(parents=True, exist_ok=True)
+        return base
+
+    def upload_files(self, thread_id: str, files: list[str | Path]) -> dict:
+        """Upload local files into a thread's uploads directory.
+
+        For PDF, PPT, Excel, and Word files, they are also converted to Markdown.
+
+        Args:
+            thread_id: Target thread ID.
+            files: List of local file paths to upload.
+
+        Returns:
+            Dict with success, files, message — matching the Gateway API
+            ``UploadResponse`` schema.
+
+        Raises:
+            FileNotFoundError: If any file does not exist.
+        """
+        from src.gateway.routers.uploads import CONVERTIBLE_EXTENSIONS, convert_file_to_markdown
+
+        # Validate all files upfront to avoid partial uploads.
+        resolved_files = []
+        for f in files:
+            p = Path(f)
+            if not p.exists():
+                raise FileNotFoundError(f"File not found: {f}")
+            resolved_files.append(p)
+
+        uploads_dir = self._get_uploads_dir(thread_id)
+        uploaded_files: list[dict] = []
+
+        for src_path in resolved_files:
+
+            dest = uploads_dir / src_path.name
+            shutil.copy2(src_path, dest)
+
+            info: dict[str, Any] = {
+                "filename": src_path.name,
+                "size": str(dest.stat().st_size),
+                "path": str(dest),
+                "virtual_path": f"/mnt/user-data/uploads/{src_path.name}",
+                "artifact_url": f"/api/threads/{thread_id}/artifacts/mnt/user-data/uploads/{src_path.name}",
+            }
+
+            if src_path.suffix.lower() in CONVERTIBLE_EXTENSIONS:
+                try:
+                    try:
+                        asyncio.get_running_loop()
+                        import concurrent.futures
+                        with concurrent.futures.ThreadPoolExecutor() as pool:
+                            md_path = pool.submit(lambda: asyncio.run(convert_file_to_markdown(dest))).result()
+                    except RuntimeError:
+                        md_path = asyncio.run(convert_file_to_markdown(dest))
+                except Exception:
+                    logger.warning("Failed to convert %s to markdown", src_path.name, exc_info=True)
+                    md_path = None
+
+                if md_path is not None:
+                    info["markdown_file"] = md_path.name
+                    info["markdown_virtual_path"] = f"/mnt/user-data/uploads/{md_path.name}"
+                    info["markdown_artifact_url"] = f"/api/threads/{thread_id}/artifacts/mnt/user-data/uploads/{md_path.name}"
+
+            uploaded_files.append(info)
+
+        return {
+            "success": True,
+            "files": uploaded_files,
+            "message": f"Successfully uploaded {len(uploaded_files)} file(s)",
+        }
+
+    def list_uploads(self, thread_id: str) -> dict:
+        """List files in a thread's uploads directory.
+
+        Args:
+            thread_id: Thread ID.
+
+        Returns:
+            Dict with "files" and "count" keys, matching the Gateway API
+            ``list_uploaded_files`` response.
+        """
+        uploads_dir = self._get_uploads_dir(thread_id)
+        if not uploads_dir.exists():
+            return {"files": [], "count": 0}
+
+        files = []
+        for fp in sorted(uploads_dir.iterdir()):
+            if fp.is_file():
+                stat = fp.stat()
+                files.append({
+                    "filename": fp.name,
+                    "size": str(stat.st_size),
+                    "path": str(fp),
+                    "virtual_path": f"/mnt/user-data/uploads/{fp.name}",
+                    "artifact_url": f"/api/threads/{thread_id}/artifacts/mnt/user-data/uploads/{fp.name}",
+                    "extension": fp.suffix,
+                    "modified": stat.st_mtime,
+                })
+        return {"files": files, "count": len(files)}
+
+    def delete_upload(self, thread_id: str, filename: str) -> dict:
+        """Delete a file from a thread's uploads directory.
+
+        Args:
+            thread_id: Thread ID.
+            filename: Filename to delete.
+
+        Returns:
+            Dict with success and message, matching the Gateway API
+            ``delete_uploaded_file`` response.
+
+        Raises:
+            FileNotFoundError: If the file does not exist.
+            PermissionError: If path traversal is detected.
+        """
+        uploads_dir = self._get_uploads_dir(thread_id)
+        file_path = (uploads_dir / filename).resolve()
+
+        try:
+            file_path.relative_to(uploads_dir.resolve())
+        except ValueError as exc:
+            raise PermissionError("Access denied: path traversal detected") from exc
+
+        if not file_path.is_file():
+            raise FileNotFoundError(f"File not found: {filename}")
+
+        file_path.unlink()
+        return {"success": True, "message": f"Deleted {filename}"}
+
+    # ------------------------------------------------------------------
+    # Public API — artifacts
+    # ------------------------------------------------------------------
+
+    def get_artifact(self, thread_id: str, path: str) -> tuple[bytes, str]:
+        """Read an artifact file produced by the agent.
+
+        Args:
+            thread_id: Thread ID.
+            path: Virtual path (e.g. "mnt/user-data/outputs/file.txt").
+
+        Returns:
+            Tuple of (file_bytes, mime_type).
+
+        Raises:
+            FileNotFoundError: If the artifact does not exist.
+            ValueError: If the path is invalid.
+        """
+        virtual_prefix = "mnt/user-data"
+        clean_path = path.lstrip("/")
+        if not clean_path.startswith(virtual_prefix):
+            raise ValueError(f"Path must start with /{virtual_prefix}")
+
+        relative = clean_path[len(virtual_prefix):].lstrip("/")
+        base_dir = get_paths().sandbox_user_data_dir(thread_id)
+        actual = (base_dir / relative).resolve()
+
+        try:
+            actual.relative_to(base_dir.resolve())
+        except ValueError as exc:
+            raise PermissionError("Access denied: path traversal detected") from exc
+        if not actual.exists():
+            raise FileNotFoundError(f"Artifact not found: {path}")
+        if not actual.is_file():
+            raise ValueError(f"Path is not a file: {path}")
+
+        mime_type, _ = mimetypes.guess_type(actual)
+        return actual.read_bytes(), mime_type or "application/octet-stream"
@@ -0,0 +1,19 @@
+from .aio_sandbox import AioSandbox
+from .aio_sandbox_provider import AioSandboxProvider
+from .backend import SandboxBackend
+from .file_state_store import FileSandboxStateStore
+from .local_backend import LocalContainerBackend
+from .remote_backend import RemoteSandboxBackend
+from .sandbox_info import SandboxInfo
+from .state_store import SandboxStateStore
+
+__all__ = [
+    "AioSandbox",
+    "AioSandboxProvider",
+    "FileSandboxStateStore",
+    "LocalContainerBackend",
+    "RemoteSandboxBackend",
+    "SandboxBackend",
+    "SandboxInfo",
+    "SandboxStateStore",
+]
@@ -0,0 +1,128 @@
+import base64
+import logging
+
+from agent_sandbox import Sandbox as AioSandboxClient
+
+from src.sandbox.sandbox import Sandbox
+
+logger = logging.getLogger(__name__)
+
+
+class AioSandbox(Sandbox):
+    """Sandbox implementation using the agent-infra/sandbox Docker container.
+
+    This sandbox connects to a running AIO sandbox container via HTTP API.
+    """
+
+    def __init__(self, id: str, base_url: str, home_dir: str | None = None):
+        """Initialize the AIO sandbox.
+
+        Args:
+            id: Unique identifier for this sandbox instance.
+            base_url: URL of the sandbox API (e.g., http://localhost:8080).
+            home_dir: Home directory inside the sandbox. If None, will be fetched from the sandbox.
+        """
+        super().__init__(id)
+        self._base_url = base_url
+        self._client = AioSandboxClient(base_url=base_url, timeout=600)
+        self._home_dir = home_dir
+
+    @property
+    def base_url(self) -> str:
+        return self._base_url
+
+    @property
+    def home_dir(self) -> str:
+        """Get the home directory inside the sandbox."""
+        if self._home_dir is None:
+            context = self._client.sandbox.get_context()
+            self._home_dir = context.home_dir
+        return self._home_dir
+
+    def execute_command(self, command: str) -> str:
+        """Execute a shell command in the sandbox.
+
+        Args:
+            command: The command to execute.
+
+        Returns:
+            The output of the command.
+        """
+        try:
+            result = self._client.shell.exec_command(command=command)
+            output = result.data.output if result.data else ""
+            return output if output else "(no output)"
+        except Exception as e:
+            logger.error(f"Failed to execute command in sandbox: {e}")
+            return f"Error: {e}"
+
+    def read_file(self, path: str) -> str:
+        """Read the content of a file in the sandbox.
+
+        Args:
+            path: The absolute path of the file to read.
+
+        Returns:
+            The content of the file.
+        """
+        try:
+            result = self._client.file.read_file(file=path)
+            return result.data.content if result.data else ""
+        except Exception as e:
+            logger.error(f"Failed to read file in sandbox: {e}")
+            return f"Error: {e}"
+
+    def list_dir(self, path: str, max_depth: int = 2) -> list[str]:
+        """List the contents of a directory in the sandbox.
+
+        Args:
+            path: The absolute path of the directory to list.
+            max_depth: The maximum depth to traverse. Default is 2.
+
+        Returns:
+            The contents of the directory.
+        """
+        try:
+            # Use shell command to list directory with depth limit
+            # The -L flag limits the depth for the tree command
+            result = self._client.shell.exec_command(command=f"find {path} -maxdepth {max_depth} -type f -o -type d 2>/dev/null | head -500")
+            output = result.data.output if result.data else ""
+            if output:
+                return [line.strip() for line in output.strip().split("\n") if line.strip()]
+            return []
+        except Exception as e:
+            logger.error(f"Failed to list directory in sandbox: {e}")
+            return []
+
+    def write_file(self, path: str, content: str, append: bool = False) -> None:
+        """Write content to a file in the sandbox.
+
+        Args:
+            path: The absolute path of the file to write to.
+            content: The text content to write to the file.
+            append: Whether to append the content to the file.
+        """
+        try:
+            if append:
+                # Read existing content first and append
+                existing = self.read_file(path)
+                if not existing.startswith("Error:"):
+                    content = existing + content
+            self._client.file.write_file(file=path, content=content)
+        except Exception as e:
+            logger.error(f"Failed to write file in sandbox: {e}")
+            raise
+
+    def update_file(self, path: str, content: bytes) -> None:
+        """Update a file with binary content in the sandbox.
+
+        Args:
+            path: The absolute path of the file to update.
+            content: The binary content to write to the file.
+        """
+        try:
+            base64_content = base64.b64encode(content).decode("utf-8")
+            self._client.file.write_file(file=path, content=base64_content, encoding="base64")
+        except Exception as e:
+            logger.error(f"Failed to update file in sandbox: {e}")
+            raise
@@ -0,0 +1,493 @@
+"""AIO Sandbox Provider — orchestrates sandbox lifecycle with pluggable backends.
+
+This provider composes two abstractions:
+- SandboxBackend: how sandboxes are provisioned (local container vs remote/K8s)
+- SandboxStateStore: how thread→sandbox mappings are persisted (file vs Redis)
+
+The provider itself handles:
+- In-process caching for fast repeated access
+- Thread-safe locking (in-process + cross-process via state store)
+- Idle timeout management
+- Graceful shutdown with signal handling
+- Mount computation (thread-specific, skills)
+"""
+
+import atexit
+import hashlib
+import logging
+import os
+import signal
+import threading
+import time
+import uuid
+
+from src.config import get_app_config
+from src.config.paths import VIRTUAL_PATH_PREFIX, get_paths
+from src.sandbox.sandbox import Sandbox
+from src.sandbox.sandbox_provider import SandboxProvider
+
+from .aio_sandbox import AioSandbox
+from .backend import SandboxBackend, wait_for_sandbox_ready
+from .file_state_store import FileSandboxStateStore
+from .local_backend import LocalContainerBackend
+from .remote_backend import RemoteSandboxBackend
+from .sandbox_info import SandboxInfo
+from .state_store import SandboxStateStore
+
+logger = logging.getLogger(__name__)
+
+# Default configuration
+DEFAULT_IMAGE = "enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest"
+DEFAULT_PORT = 8080
+DEFAULT_CONTAINER_PREFIX = "deer-flow-sandbox"
+DEFAULT_IDLE_TIMEOUT = 600  # 10 minutes in seconds
+IDLE_CHECK_INTERVAL = 60  # Check every 60 seconds
+
+
+class AioSandboxProvider(SandboxProvider):
+    """Sandbox provider that manages containers running the AIO sandbox.
+
+    Architecture:
+        This provider composes a SandboxBackend (how to provision) and a
+        SandboxStateStore (how to persist state), enabling:
+        - Local Docker/Apple Container mode (auto-start containers)
+        - Remote/K8s mode (connect to pre-existing sandbox URL)
+        - Cross-process consistency via file-based or Redis state stores
+
+    Configuration options in config.yaml under sandbox:
+        use: src.community.aio_sandbox:AioSandboxProvider
+        image: <container image>
+        port: 8080                      # Base port for local containers
+        base_url: http://...            # If set, uses remote backend (K8s/external)
+        auto_start: true                # Whether to auto-start local containers
+        container_prefix: deer-flow-sandbox
+        idle_timeout: 600               # Idle timeout in seconds (0 to disable)
+        mounts:                         # Volume mounts for local containers
+          - host_path: /path/on/host
+            container_path: /path/in/container
+            read_only: false
+        environment:                    # Environment variables for containers
+          NODE_ENV: production
+          API_KEY: $MY_API_KEY
+    """
+
+    def __init__(self):
+        self._lock = threading.Lock()
+        self._sandboxes: dict[str, AioSandbox] = {}  # sandbox_id -> AioSandbox instance
+        self._sandbox_infos: dict[str, SandboxInfo] = {}  # sandbox_id -> SandboxInfo (for destroy)
+        self._thread_sandboxes: dict[str, str] = {}  # thread_id -> sandbox_id
+        self._thread_locks: dict[str, threading.Lock] = {}  # thread_id -> in-process lock
+        self._last_activity: dict[str, float] = {}  # sandbox_id -> last activity timestamp
+        self._shutdown_called = False
+        self._idle_checker_stop = threading.Event()
+        self._idle_checker_thread: threading.Thread | None = None
+
+        self._config = self._load_config()
+        self._backend: SandboxBackend = self._create_backend()
+        self._state_store: SandboxStateStore = self._create_state_store()
+
+        # Register shutdown handler
+        atexit.register(self.shutdown)
+        self._register_signal_handlers()
+
+        # Start idle checker if enabled
+        if self._config.get("idle_timeout", DEFAULT_IDLE_TIMEOUT) > 0:
+            self._start_idle_checker()
+
+    # ── Factory methods ──────────────────────────────────────────────────
+
+    def _create_backend(self) -> SandboxBackend:
+        """Create the appropriate backend based on configuration.
+
+        Selection logic (checked in order):
+        1. ``provisioner_url`` set → RemoteSandboxBackend (provisioner mode)
+              Provisioner dynamically creates Pods + Services in k3s.
+        2. ``auto_start``    → LocalContainerBackend (Docker / Apple Container)
+        """
+        provisioner_url = self._config.get("provisioner_url")
+        if provisioner_url:
+            logger.info(f"Using remote sandbox backend with provisioner at {provisioner_url}")
+            return RemoteSandboxBackend(provisioner_url=provisioner_url)
+
+        if not self._config.get("auto_start", True):
+            raise RuntimeError("auto_start is disabled and no base_url is configured")
+
+        logger.info("Using local container sandbox backend")
+        return LocalContainerBackend(
+            image=self._config["image"],
+            base_port=self._config["port"],
+            container_prefix=self._config["container_prefix"],
+            config_mounts=self._config["mounts"],
+            environment=self._config["environment"],
+        )
+
+    def _create_state_store(self) -> SandboxStateStore:
+        """Create the state store for cross-process sandbox mapping persistence.
+
+        Currently uses file-based store. For distributed multi-host deployments,
+        a Redis-based store can be plugged in here.
+        """
+        # TODO: Support RedisSandboxStateStore for distributed deployments.
+        #   Configuration would be:
+        #     sandbox:
+        #       state_store: redis
+        #       redis_url: redis://localhost:6379/0
+        #   This would enable cross-host sandbox discovery (e.g., multiple K8s pods
+        #   without shared PVC, or multi-node Docker Swarm).
+        return FileSandboxStateStore(base_dir=str(get_paths().base_dir))
+
+    # ── Configuration ────────────────────────────────────────────────────
+
+    def _load_config(self) -> dict:
+        """Load sandbox configuration from app config."""
+        config = get_app_config()
+        sandbox_config = config.sandbox
+
+        return {
+            "image": sandbox_config.image or DEFAULT_IMAGE,
+            "port": sandbox_config.port or DEFAULT_PORT,
+            "base_url": sandbox_config.base_url,
+            "auto_start": sandbox_config.auto_start if sandbox_config.auto_start is not None else True,
+            "container_prefix": sandbox_config.container_prefix or DEFAULT_CONTAINER_PREFIX,
+            "idle_timeout": getattr(sandbox_config, "idle_timeout", None) or DEFAULT_IDLE_TIMEOUT,
+            "mounts": sandbox_config.mounts or [],
+            "environment": self._resolve_env_vars(sandbox_config.environment or {}),
+            # provisioner URL for dynamic pod management (e.g. http://provisioner:8002)
+            "provisioner_url": getattr(sandbox_config, "provisioner_url", None) or "",
+        }
+
+    @staticmethod
+    def _resolve_env_vars(env_config: dict[str, str]) -> dict[str, str]:
+        """Resolve environment variable references (values starting with $)."""
+        resolved = {}
+        for key, value in env_config.items():
+            if isinstance(value, str) and value.startswith("$"):
+                env_name = value[1:]
+                resolved[key] = os.environ.get(env_name, "")
+            else:
+                resolved[key] = str(value)
+        return resolved
+
+    # ── Deterministic ID ─────────────────────────────────────────────────
+
+    @staticmethod
+    def _deterministic_sandbox_id(thread_id: str) -> str:
+        """Generate a deterministic sandbox ID from a thread ID.
+
+        Ensures all processes derive the same sandbox_id for a given thread,
+        enabling cross-process sandbox discovery without shared memory.
+        """
+        return hashlib.sha256(thread_id.encode()).hexdigest()[:8]
+
+    # ── Mount helpers ────────────────────────────────────────────────────
+
+    def _get_extra_mounts(self, thread_id: str | None) -> list[tuple[str, str, bool]]:
+        """Collect all extra mounts for a sandbox (thread-specific + skills)."""
+        mounts: list[tuple[str, str, bool]] = []
+
+        if thread_id:
+            mounts.extend(self._get_thread_mounts(thread_id))
+            logger.info(f"Adding thread mounts for thread {thread_id}: {mounts}")
+
+        skills_mount = self._get_skills_mount()
+        if skills_mount:
+            mounts.append(skills_mount)
+            logger.info(f"Adding skills mount: {skills_mount}")
+
+        return mounts
+
+    @staticmethod
+    def _get_thread_mounts(thread_id: str) -> list[tuple[str, str, bool]]:
+        """Get volume mounts for a thread's data directories.
+
+        Creates directories if they don't exist (lazy initialization).
+        """
+        paths = get_paths()
+        paths.ensure_thread_dirs(thread_id)
+
+        mounts = [
+            (str(paths.sandbox_work_dir(thread_id)), f"{VIRTUAL_PATH_PREFIX}/workspace", False),
+            (str(paths.sandbox_uploads_dir(thread_id)), f"{VIRTUAL_PATH_PREFIX}/uploads", False),
+            (str(paths.sandbox_outputs_dir(thread_id)), f"{VIRTUAL_PATH_PREFIX}/outputs", False),
+        ]
+
+        return mounts
+
+    @staticmethod
+    def _get_skills_mount() -> tuple[str, str, bool] | None:
+        """Get the skills directory mount configuration."""
+        try:
+            config = get_app_config()
+            skills_path = config.skills.get_skills_path()
+            container_path = config.skills.container_path
+
+            if skills_path.exists():
+                return (str(skills_path), container_path, True)  # Read-only for security
+        except Exception as e:
+            logger.warning(f"Could not setup skills mount: {e}")
+        return None
+
+    # ── Idle timeout management ──────────────────────────────────────────
+
+    def _start_idle_checker(self) -> None:
+        """Start the background thread that checks for idle sandboxes."""
+        self._idle_checker_thread = threading.Thread(
+            target=self._idle_checker_loop,
+            name="sandbox-idle-checker",
+            daemon=True,
+        )
+        self._idle_checker_thread.start()
+        logger.info(f"Started idle checker thread (timeout: {self._config.get('idle_timeout', DEFAULT_IDLE_TIMEOUT)}s)")
+
+    def _idle_checker_loop(self) -> None:
+        idle_timeout = self._config.get("idle_timeout", DEFAULT_IDLE_TIMEOUT)
+        while not self._idle_checker_stop.wait(timeout=IDLE_CHECK_INTERVAL):
+            try:
+                self._cleanup_idle_sandboxes(idle_timeout)
+            except Exception as e:
+                logger.error(f"Error in idle checker loop: {e}")
+
+    def _cleanup_idle_sandboxes(self, idle_timeout: float) -> None:
+        current_time = time.time()
+        sandboxes_to_release = []
+
+        with self._lock:
+            for sandbox_id, last_activity in self._last_activity.items():
+                idle_duration = current_time - last_activity
+                if idle_duration > idle_timeout:
+                    sandboxes_to_release.append(sandbox_id)
+                    logger.info(f"Sandbox {sandbox_id} idle for {idle_duration:.1f}s, marking for release")
+
+        for sandbox_id in sandboxes_to_release:
+            try:
+                logger.info(f"Releasing idle sandbox {sandbox_id}")
+                self.release(sandbox_id)
+            except Exception as e:
+                logger.error(f"Failed to release idle sandbox {sandbox_id}: {e}")
+
+    # ── Signal handling ──────────────────────────────────────────────────
+
+    def _register_signal_handlers(self) -> None:
+        """Register signal handlers for graceful shutdown."""
+        self._original_sigterm = signal.getsignal(signal.SIGTERM)
+        self._original_sigint = signal.getsignal(signal.SIGINT)
+
+        def signal_handler(signum, frame):
+            self.shutdown()
+            original = self._original_sigterm if signum == signal.SIGTERM else self._original_sigint
+            if callable(original):
+                original(signum, frame)
+            elif original == signal.SIG_DFL:
+                signal.signal(signum, signal.SIG_DFL)
+                signal.raise_signal(signum)
+
+        try:
+            signal.signal(signal.SIGTERM, signal_handler)
+            signal.signal(signal.SIGINT, signal_handler)
+        except ValueError:
+            logger.debug("Could not register signal handlers (not main thread)")
+
+    # ── Thread locking (in-process) ──────────────────────────────────────
+
+    def _get_thread_lock(self, thread_id: str) -> threading.Lock:
+        """Get or create an in-process lock for a specific thread_id."""
+        with self._lock:
+            if thread_id not in self._thread_locks:
+                self._thread_locks[thread_id] = threading.Lock()
+            return self._thread_locks[thread_id]
+
+    # ── Core: acquire / get / release / shutdown ─────────────────────────
+
+    def acquire(self, thread_id: str | None = None) -> str:
+        """Acquire a sandbox environment and return its ID.
+
+        For the same thread_id, this method will return the same sandbox_id
+        across multiple turns, multiple processes, and (with shared storage)
+        multiple pods.
+
+        Thread-safe with both in-process and cross-process locking.
+
+        Args:
+            thread_id: Optional thread ID for thread-specific configurations.
+
+        Returns:
+            The ID of the acquired sandbox environment.
+        """
+        if thread_id:
+            thread_lock = self._get_thread_lock(thread_id)
+            with thread_lock:
+                return self._acquire_internal(thread_id)
+        else:
+            return self._acquire_internal(thread_id)
+
+    def _acquire_internal(self, thread_id: str | None) -> str:
+        """Internal sandbox acquisition with three-layer consistency.
+
+        Layer 1: In-process cache (fastest, covers same-process repeated access)
+        Layer 2: Cross-process state store + file lock (covers multi-process)
+        Layer 3: Backend discovery (covers containers started by other processes)
+        """
+        # ── Layer 1: In-process cache (fast path) ──
+        if thread_id:
+            with self._lock:
+                if thread_id in self._thread_sandboxes:
+                    existing_id = self._thread_sandboxes[thread_id]
+                    if existing_id in self._sandboxes:
+                        logger.info(f"Reusing in-process sandbox {existing_id} for thread {thread_id}")
+                        self._last_activity[existing_id] = time.time()
+                        return existing_id
+                    else:
+                        del self._thread_sandboxes[thread_id]
+
+        # Deterministic ID for thread-specific, random for anonymous
+        sandbox_id = self._deterministic_sandbox_id(thread_id) if thread_id else str(uuid.uuid4())[:8]
+
+        # ── Layer 2 & 3: Cross-process recovery + creation ──
+        if thread_id:
+            with self._state_store.lock(thread_id):
+                # Try to recover from persisted state or discover existing container
+                recovered_id = self._try_recover(thread_id)
+                if recovered_id is not None:
+                    return recovered_id
+                # Nothing to recover — create new sandbox (still under cross-process lock)
+                return self._create_sandbox(thread_id, sandbox_id)
+        else:
+            return self._create_sandbox(thread_id, sandbox_id)
+
+    def _try_recover(self, thread_id: str) -> str | None:
+        """Try to recover a sandbox from persisted state or backend discovery.
+
+        Called under cross-process lock for the given thread_id.
+
+        Args:
+            thread_id: The thread ID.
+
+        Returns:
+            The sandbox_id if recovery succeeded, None otherwise.
+        """
+        info = self._state_store.load(thread_id)
+        if info is None:
+            return None
+
+        # Re-discover: verifies sandbox is alive and gets current connection info
+        # (handles cases like port changes after container restart)
+        discovered = self._backend.discover(info.sandbox_id)
+        if discovered is None:
+            logger.info(f"Persisted sandbox {info.sandbox_id} for thread {thread_id} could not be recovered")
+            self._state_store.remove(thread_id)
+            return None
+
+        # Adopt into this process's memory
+        sandbox = AioSandbox(id=discovered.sandbox_id, base_url=discovered.sandbox_url)
+        with self._lock:
+            self._sandboxes[discovered.sandbox_id] = sandbox
+            self._sandbox_infos[discovered.sandbox_id] = discovered
+            self._last_activity[discovered.sandbox_id] = time.time()
+            self._thread_sandboxes[thread_id] = discovered.sandbox_id
+
+        # Update state if connection info changed
+        if discovered.sandbox_url != info.sandbox_url:
+            self._state_store.save(thread_id, discovered)
+
+        logger.info(f"Recovered sandbox {discovered.sandbox_id} for thread {thread_id} at {discovered.sandbox_url}")
+        return discovered.sandbox_id
+
+    def _create_sandbox(self, thread_id: str | None, sandbox_id: str) -> str:
+        """Create a new sandbox via the backend.
+
+        Args:
+            thread_id: Optional thread ID.
+            sandbox_id: The sandbox ID to use.
+
+        Returns:
+            The sandbox_id.
+
+        Raises:
+            RuntimeError: If sandbox creation or readiness check fails.
+        """
+        extra_mounts = self._get_extra_mounts(thread_id)
+
+        info = self._backend.create(thread_id, sandbox_id, extra_mounts=extra_mounts or None)
+
+        # Wait for sandbox to be ready
+        if not wait_for_sandbox_ready(info.sandbox_url, timeout=60):
+            self._backend.destroy(info)
+            raise RuntimeError(f"Sandbox {sandbox_id} failed to become ready within timeout at {info.sandbox_url}")
+
+        sandbox = AioSandbox(id=sandbox_id, base_url=info.sandbox_url)
+        with self._lock:
+            self._sandboxes[sandbox_id] = sandbox
+            self._sandbox_infos[sandbox_id] = info
+            self._last_activity[sandbox_id] = time.time()
+            if thread_id:
+                self._thread_sandboxes[thread_id] = sandbox_id
+
+        # Persist for cross-process discovery
+        if thread_id:
+            self._state_store.save(thread_id, info)
+
+        logger.info(f"Created sandbox {sandbox_id} for thread {thread_id} at {info.sandbox_url}")
+        return sandbox_id
+
+    def get(self, sandbox_id: str) -> Sandbox | None:
+        """Get a sandbox by ID. Updates last activity timestamp.
+
+        Args:
+            sandbox_id: The ID of the sandbox.
+
+        Returns:
+            The sandbox instance if found, None otherwise.
+        """
+        with self._lock:
+            sandbox = self._sandboxes.get(sandbox_id)
+            if sandbox is not None:
+                self._last_activity[sandbox_id] = time.time()
+            return sandbox
+
+    def release(self, sandbox_id: str) -> None:
+        """Release a sandbox: clean up in-memory state, persisted state, and backend resources.
+
+        Args:
+            sandbox_id: The ID of the sandbox to release.
+        """
+        info = None
+        thread_ids_to_remove: list[str] = []
+
+        with self._lock:
+            self._sandboxes.pop(sandbox_id, None)
+            info = self._sandbox_infos.pop(sandbox_id, None)
+            thread_ids_to_remove = [tid for tid, sid in self._thread_sandboxes.items() if sid == sandbox_id]
+            for tid in thread_ids_to_remove:
+                del self._thread_sandboxes[tid]
+            self._last_activity.pop(sandbox_id, None)
+
+        # Clean up persisted state (outside lock, involves file I/O)
+        for tid in thread_ids_to_remove:
+            self._state_store.remove(tid)
+
+        # Destroy backend resources (stop container, release port, etc.)
+        if info:
+            self._backend.destroy(info)
+            logger.info(f"Released sandbox {sandbox_id}")
+
+    def shutdown(self) -> None:
+        """Shutdown all sandboxes. Thread-safe and idempotent."""
+        with self._lock:
+            if self._shutdown_called:
+                return
+            self._shutdown_called = True
+            sandbox_ids = list(self._sandboxes.keys())
+
+        # Stop idle checker
+        self._idle_checker_stop.set()
+        if self._idle_checker_thread is not None and self._idle_checker_thread.is_alive():
+            self._idle_checker_thread.join(timeout=5)
+            logger.info("Stopped idle checker thread")
+
+        logger.info(f"Shutting down {len(sandbox_ids)} sandbox(es)")
+
+        for sandbox_id in sandbox_ids:
+            try:
+                self.release(sandbox_id)
+            except Exception as e:
+                logger.error(f"Failed to release sandbox {sandbox_id} during shutdown: {e}")
@@ -0,0 +1,98 @@
+"""Abstract base class for sandbox provisioning backends."""
+
+from __future__ import annotations
+
+import logging
+import time
+from abc import ABC, abstractmethod
+
+import requests
+
+from .sandbox_info import SandboxInfo
+
+logger = logging.getLogger(__name__)
+
+
+def wait_for_sandbox_ready(sandbox_url: str, timeout: int = 30) -> bool:
+    """Poll sandbox health endpoint until ready or timeout.
+
+    Args:
+        sandbox_url: URL of the sandbox (e.g. http://k3s:30001).
+        timeout: Maximum time to wait in seconds.
+
+    Returns:
+        True if sandbox is ready, False otherwise.
+    """
+    start_time = time.time()
+    while time.time() - start_time < timeout:
+        try:
+            response = requests.get(f"{sandbox_url}/v1/sandbox", timeout=5)
+            if response.status_code == 200:
+                return True
+        except requests.exceptions.RequestException:
+            pass
+        time.sleep(1)
+    return False
+
+
+class SandboxBackend(ABC):
+    """Abstract base for sandbox provisioning backends.
+
+    Two implementations:
+    - LocalContainerBackend: starts Docker/Apple Container locally, manages ports
+    - RemoteSandboxBackend: connects to a pre-existing URL (K8s service, external)
+    """
+
+    @abstractmethod
+    def create(self, thread_id: str, sandbox_id: str, extra_mounts: list[tuple[str, str, bool]] | None = None) -> SandboxInfo:
+        """Create/provision a new sandbox.
+
+        Args:
+            thread_id: Thread ID for which the sandbox is being created. Useful for backends that want to organize sandboxes by thread.
+            sandbox_id: Deterministic sandbox identifier.
+            extra_mounts: Additional volume mounts as (host_path, container_path, read_only) tuples.
+                Ignored by backends that don't manage containers (e.g., remote).
+
+        Returns:
+            SandboxInfo with connection details.
+        """
+        ...
+
+    @abstractmethod
+    def destroy(self, info: SandboxInfo) -> None:
+        """Destroy/cleanup a sandbox and release its resources.
+
+        Args:
+            info: The sandbox metadata to destroy.
+        """
+        ...
+
+    @abstractmethod
+    def is_alive(self, info: SandboxInfo) -> bool:
+        """Quick check whether a sandbox is still alive.
+
+        This should be a lightweight check (e.g., container inspect)
+        rather than a full health check.
+
+        Args:
+            info: The sandbox metadata to check.
+
+        Returns:
+            True if the sandbox appears to be alive.
+        """
+        ...
+
+    @abstractmethod
+    def discover(self, sandbox_id: str) -> SandboxInfo | None:
+        """Try to discover an existing sandbox by its deterministic ID.
+
+        Used for cross-process recovery: when another process started a sandbox,
+        this process can discover it by the deterministic container name or URL.
+
+        Args:
+            sandbox_id: The deterministic sandbox ID to look for.
+
+        Returns:
+            SandboxInfo if found and healthy, None otherwise.
+        """
+        ...
@@ -0,0 +1,102 @@
+"""File-based sandbox state store.
+
+Uses JSON files for persistence and fcntl file locking for cross-process
+mutual exclusion. Works across processes on the same machine or across
+K8s pods with a shared PVC mount.
+"""
+
+from __future__ import annotations
+
+import fcntl
+import json
+import logging
+import os
+from collections.abc import Generator
+from contextlib import contextmanager
+from pathlib import Path
+
+from src.config.paths import Paths
+
+from .sandbox_info import SandboxInfo
+from .state_store import SandboxStateStore
+
+logger = logging.getLogger(__name__)
+
+SANDBOX_STATE_FILE = "sandbox.json"
+SANDBOX_LOCK_FILE = "sandbox.lock"
+
+
+class FileSandboxStateStore(SandboxStateStore):
+    """File-based state store using JSON files and fcntl file locking.
+
+    State is stored at: {base_dir}/threads/{thread_id}/sandbox.json
+    Lock files at:      {base_dir}/threads/{thread_id}/sandbox.lock
+
+    This works across processes on the same machine sharing a filesystem.
+    For K8s multi-pod scenarios, requires a shared PVC mount at base_dir.
+    """
+
+    def __init__(self, base_dir: str):
+        """Initialize the file-based state store.
+
+        Args:
+            base_dir: Root directory for state files (typically Paths.base_dir).
+        """
+        self._paths = Paths(base_dir)
+
+    def _thread_dir(self, thread_id: str) -> Path:
+        """Get the directory for a thread's state files."""
+        return self._paths.thread_dir(thread_id)
+
+    def save(self, thread_id: str, info: SandboxInfo) -> None:
+        thread_dir = self._thread_dir(thread_id)
+        os.makedirs(thread_dir, exist_ok=True)
+        state_file = thread_dir / SANDBOX_STATE_FILE
+        try:
+            state_file.write_text(json.dumps(info.to_dict()))
+            logger.info(f"Saved sandbox state for thread {thread_id}: {info.sandbox_id}")
+        except OSError as e:
+            logger.warning(f"Failed to save sandbox state for thread {thread_id}: {e}")
+
+    def load(self, thread_id: str) -> SandboxInfo | None:
+        state_file = self._thread_dir(thread_id) / SANDBOX_STATE_FILE
+        if not state_file.exists():
+            return None
+        try:
+            data = json.loads(state_file.read_text())
+            return SandboxInfo.from_dict(data)
+        except (OSError, json.JSONDecodeError, KeyError) as e:
+            logger.warning(f"Failed to load sandbox state for thread {thread_id}: {e}")
+            return None
+
+    def remove(self, thread_id: str) -> None:
+        state_file = self._thread_dir(thread_id) / SANDBOX_STATE_FILE
+        try:
+            if state_file.exists():
+                state_file.unlink()
+                logger.info(f"Removed sandbox state for thread {thread_id}")
+        except OSError as e:
+            logger.warning(f"Failed to remove sandbox state for thread {thread_id}: {e}")
+
+    @contextmanager
+    def lock(self, thread_id: str) -> Generator[None, None, None]:
+        """Acquire a cross-process file lock using fcntl.flock.
+
+        The lock is held for the duration of the context manager.
+        Only one process can hold the lock at a time for a given thread_id.
+
+        Note: fcntl.flock is available on macOS and Linux.
+        """
+        thread_dir = self._thread_dir(thread_id)
+        os.makedirs(thread_dir, exist_ok=True)
+        lock_path = thread_dir / SANDBOX_LOCK_FILE
+        lock_file = open(lock_path, "w")
+        try:
+            fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX)
+            yield
+        finally:
+            try:
+                fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
+                lock_file.close()
+            except OSError:
+                pass
@@ -0,0 +1,294 @@
+"""Local container backend for sandbox provisioning.
+
+Manages sandbox containers using Docker or Apple Container on the local machine.
+Handles container lifecycle, port allocation, and cross-process container discovery.
+"""
+
+from __future__ import annotations
+
+import logging
+import subprocess
+
+from src.utils.network import get_free_port, release_port
+
+from .backend import SandboxBackend, wait_for_sandbox_ready
+from .sandbox_info import SandboxInfo
+
+logger = logging.getLogger(__name__)
+
+
+class LocalContainerBackend(SandboxBackend):
+    """Backend that manages sandbox containers locally using Docker or Apple Container.
+
+    On macOS, automatically prefers Apple Container if available, otherwise falls back to Docker.
+    On other platforms, uses Docker.
+
+    Features:
+    - Deterministic container naming for cross-process discovery
+    - Port allocation with thread-safe utilities
+    - Container lifecycle management (start/stop with --rm)
+    - Support for volume mounts and environment variables
+    """
+
+    def __init__(
+        self,
+        *,
+        image: str,
+        base_port: int,
+        container_prefix: str,
+        config_mounts: list,
+        environment: dict[str, str],
+    ):
+        """Initialize the local container backend.
+
+        Args:
+            image: Container image to use.
+            base_port: Base port number to start searching for free ports.
+            container_prefix: Prefix for container names (e.g., "deer-flow-sandbox").
+            config_mounts: Volume mount configurations from config (list of VolumeMountConfig).
+            environment: Environment variables to inject into containers.
+        """
+        self._image = image
+        self._base_port = base_port
+        self._container_prefix = container_prefix
+        self._config_mounts = config_mounts
+        self._environment = environment
+        self._runtime = self._detect_runtime()
+
+    @property
+    def runtime(self) -> str:
+        """The detected container runtime ("docker" or "container")."""
+        return self._runtime
+
+    def _detect_runtime(self) -> str:
+        """Detect which container runtime to use.
+
+        On macOS, prefer Apple Container if available, otherwise fall back to Docker.
+        On other platforms, use Docker.
+
+        Returns:
+            "container" for Apple Container, "docker" for Docker.
+        """
+        import platform
+
+        if platform.system() == "Darwin":
+            try:
+                result = subprocess.run(
+                    ["container", "--version"],
+                    capture_output=True,
+                    text=True,
+                    check=True,
+                    timeout=5,
+                )
+                logger.info(f"Detected Apple Container: {result.stdout.strip()}")
+                return "container"
+            except (FileNotFoundError, subprocess.CalledProcessError, subprocess.TimeoutExpired):
+                logger.info("Apple Container not available, falling back to Docker")
+
+        return "docker"
+
+    # ── SandboxBackend interface ──────────────────────────────────────────
+
+    def create(self, thread_id: str, sandbox_id: str, extra_mounts: list[tuple[str, str, bool]] | None = None) -> SandboxInfo:
+        """Start a new container and return its connection info.
+
+        Args:
+            thread_id: Thread ID for which the sandbox is being created. Useful for backends that want to organize sandboxes by thread.
+            sandbox_id: Deterministic sandbox identifier (used in container name).
+            extra_mounts: Additional volume mounts as (host_path, container_path, read_only) tuples.
+
+        Returns:
+            SandboxInfo with container details.
+
+        Raises:
+            RuntimeError: If the container fails to start.
+        """
+        container_name = f"{self._container_prefix}-{sandbox_id}"
+        port = get_free_port(start_port=self._base_port)
+        try:
+            container_id = self._start_container(container_name, port, extra_mounts)
+        except Exception:
+            release_port(port)
+            raise
+
+        return SandboxInfo(
+            sandbox_id=sandbox_id,
+            sandbox_url=f"http://localhost:{port}",
+            container_name=container_name,
+            container_id=container_id,
+        )
+
+    def destroy(self, info: SandboxInfo) -> None:
+        """Stop the container and release its port."""
+        if info.container_id:
+            self._stop_container(info.container_id)
+        # Extract port from sandbox_url for release
+        try:
+            from urllib.parse import urlparse
+
+            port = urlparse(info.sandbox_url).port
+            if port:
+                release_port(port)
+        except Exception:
+            pass
+
+    def is_alive(self, info: SandboxInfo) -> bool:
+        """Check if the container is still running (lightweight, no HTTP)."""
+        if info.container_name:
+            return self._is_container_running(info.container_name)
+        return False
+
+    def discover(self, sandbox_id: str) -> SandboxInfo | None:
+        """Discover an existing container by its deterministic name.
+
+        Checks if a container with the expected name is running, retrieves its
+        port, and verifies it responds to health checks.
+
+        Args:
+            sandbox_id: The deterministic sandbox ID (determines container name).
+
+        Returns:
+            SandboxInfo if container found and healthy, None otherwise.
+        """
+        container_name = f"{self._container_prefix}-{sandbox_id}"
+
+        if not self._is_container_running(container_name):
+            return None
+
+        port = self._get_container_port(container_name)
+        if port is None:
+            return None
+
+        sandbox_url = f"http://localhost:{port}"
+        if not wait_for_sandbox_ready(sandbox_url, timeout=5):
+            return None
+
+        return SandboxInfo(
+            sandbox_id=sandbox_id,
+            sandbox_url=sandbox_url,
+            container_name=container_name,
+        )
+
+    # ── Container operations ─────────────────────────────────────────────
+
+    def _start_container(
+        self,
+        container_name: str,
+        port: int,
+        extra_mounts: list[tuple[str, str, bool]] | None = None,
+    ) -> str:
+        """Start a new container.
+
+        Args:
+            container_name: Name for the container.
+            port: Host port to map to container port 8080.
+            extra_mounts: Additional volume mounts.
+
+        Returns:
+            The container ID.
+
+        Raises:
+            RuntimeError: If container fails to start.
+        """
+        cmd = [self._runtime, "run"]
+
+        # Docker-specific security options
+        if self._runtime == "docker":
+            cmd.extend(["--security-opt", "seccomp=unconfined"])
+
+        cmd.extend(
+            [
+                "--rm",
+                "-d",
+                "-p",
+                f"{port}:8080",
+                "--name",
+                container_name,
+            ]
+        )
+
+        # Environment variables
+        for key, value in self._environment.items():
+            cmd.extend(["-e", f"{key}={value}"])
+
+        # Config-level volume mounts
+        for mount in self._config_mounts:
+            mount_spec = f"{mount.host_path}:{mount.container_path}"
+            if mount.read_only:
+                mount_spec += ":ro"
+            cmd.extend(["-v", mount_spec])
+
+        # Extra mounts (thread-specific, skills, etc.)
+        if extra_mounts:
+            for host_path, container_path, read_only in extra_mounts:
+                mount_spec = f"{host_path}:{container_path}"
+                if read_only:
+                    mount_spec += ":ro"
+                cmd.extend(["-v", mount_spec])
+
+        cmd.append(self._image)
+
+        logger.info(f"Starting container using {self._runtime}: {' '.join(cmd)}")
+
+        try:
+            result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+            container_id = result.stdout.strip()
+            logger.info(f"Started container {container_name} (ID: {container_id}) using {self._runtime}")
+            return container_id
+        except subprocess.CalledProcessError as e:
+            logger.error(f"Failed to start container using {self._runtime}: {e.stderr}")
+            raise RuntimeError(f"Failed to start sandbox container: {e.stderr}")
+
+    def _stop_container(self, container_id: str) -> None:
+        """Stop a container (--rm ensures automatic removal)."""
+        try:
+            subprocess.run(
+                [self._runtime, "stop", container_id],
+                capture_output=True,
+                text=True,
+                check=True,
+            )
+            logger.info(f"Stopped container {container_id} using {self._runtime}")
+        except subprocess.CalledProcessError as e:
+            logger.warning(f"Failed to stop container {container_id}: {e.stderr}")
+
+    def _is_container_running(self, container_name: str) -> bool:
+        """Check if a named container is currently running.
+
+        This enables cross-process container discovery — any process can detect
+        containers started by another process via the deterministic container name.
+        """
+        try:
+            result = subprocess.run(
+                [self._runtime, "inspect", "-f", "{{.State.Running}}", container_name],
+                capture_output=True,
+                text=True,
+                timeout=5,
+            )
+            return result.returncode == 0 and result.stdout.strip().lower() == "true"
+        except (subprocess.CalledProcessError, subprocess.TimeoutExpired):
+            return False
+
+    def _get_container_port(self, container_name: str) -> int | None:
+        """Get the host port of a running container.
+
+        Args:
+            container_name: The container name to inspect.
+
+        Returns:
+            The host port mapped to container port 8080, or None if not found.
+        """
+        try:
+            result = subprocess.run(
+                [self._runtime, "port", container_name, "8080"],
+                capture_output=True,
+                text=True,
+                timeout=5,
+            )
+            if result.returncode == 0 and result.stdout.strip():
+                # Output format: "0.0.0.0:PORT" or ":::PORT"
+                port_str = result.stdout.strip().split(":")[-1]
+                return int(port_str)
+        except (subprocess.CalledProcessError, subprocess.TimeoutExpired, ValueError):
+            pass
+        return None
@@ -0,0 +1,156 @@
+"""Remote sandbox backend — delegates Pod lifecycle to the provisioner service.
+
+The provisioner dynamically creates per-sandbox-id Pods + NodePort Services
+in k3s.  The backend accesses sandbox pods directly via ``k3s:{NodePort}``.
+
+Architecture:
+    ┌────────────┐  HTTP   ┌─────────────┐  K8s API  ┌──────────┐
+    │ this file  │ ──────▸ │ provisioner │ ────────▸ │   k3s    │
+    │ (backend)  │         │ :8002       │           │ :6443    │
+    └────────────┘         └─────────────┘           └─────┬────┘
+                                                           │ creates
+                           ┌─────────────┐           ┌─────▼──────┐
+                           │   backend   │ ────────▸ │  sandbox   │
+                           │             │  direct   │  Pod(s)    │
+                           └─────────────┘ k3s:NPort └────────────┘
+"""
+
+from __future__ import annotations
+
+import logging
+
+import requests
+
+from .backend import SandboxBackend
+from .sandbox_info import SandboxInfo
+
+logger = logging.getLogger(__name__)
+
+
+class RemoteSandboxBackend(SandboxBackend):
+    """Backend that delegates sandbox lifecycle to the provisioner service.
+
+    All Pod creation, destruction, and discovery are handled by the
+    provisioner.  This backend is a thin HTTP client.
+
+    Typical config.yaml::
+
+        sandbox:
+          use: src.community.aio_sandbox:AioSandboxProvider
+          provisioner_url: http://provisioner:8002
+    """
+
+    def __init__(self, provisioner_url: str):
+        """Initialize with the provisioner service URL.
+
+        Args:
+            provisioner_url: URL of the provisioner service
+                             (e.g., ``http://provisioner:8002``).
+        """
+        self._provisioner_url = provisioner_url.rstrip("/")
+
+    @property
+    def provisioner_url(self) -> str:
+        return self._provisioner_url
+
+    # ── SandboxBackend interface ──────────────────────────────────────────
+
+    def create(
+        self,
+        thread_id: str,
+        sandbox_id: str,
+        extra_mounts: list[tuple[str, str, bool]] | None = None,
+    ) -> SandboxInfo:
+        """Create a sandbox Pod + Service via the provisioner.
+
+        Calls ``POST /api/sandboxes`` which creates a dedicated Pod +
+        NodePort Service in k3s.
+        """
+        return self._provisioner_create(thread_id, sandbox_id, extra_mounts)
+
+    def destroy(self, info: SandboxInfo) -> None:
+        """Destroy a sandbox Pod + Service via the provisioner."""
+        self._provisioner_destroy(info.sandbox_id)
+
+    def is_alive(self, info: SandboxInfo) -> bool:
+        """Check whether the sandbox Pod is running."""
+        return self._provisioner_is_alive(info.sandbox_id)
+
+    def discover(self, sandbox_id: str) -> SandboxInfo | None:
+        """Discover an existing sandbox via the provisioner.
+
+        Calls ``GET /api/sandboxes/{sandbox_id}`` and returns info if
+        the Pod exists.
+        """
+        return self._provisioner_discover(sandbox_id)
+
+    # ── Provisioner API calls ─────────────────────────────────────────────
+
+    def _provisioner_create(self, thread_id: str, sandbox_id: str, extra_mounts: list[tuple[str, str, bool]] | None = None) -> SandboxInfo:
+        """POST /api/sandboxes → create Pod + Service."""
+        try:
+            resp = requests.post(
+                f"{self._provisioner_url}/api/sandboxes",
+                json={
+                    "sandbox_id": sandbox_id,
+                    "thread_id": thread_id,
+                },
+                timeout=30,
+            )
+            resp.raise_for_status()
+            data = resp.json()
+            logger.info(f"Provisioner created sandbox {sandbox_id}: sandbox_url={data['sandbox_url']}")
+            return SandboxInfo(
+                sandbox_id=sandbox_id,
+                sandbox_url=data["sandbox_url"],
+            )
+        except requests.RequestException as exc:
+            logger.error(f"Provisioner create failed for {sandbox_id}: {exc}")
+            raise RuntimeError(f"Provisioner create failed: {exc}") from exc
+
+    def _provisioner_destroy(self, sandbox_id: str) -> None:
+        """DELETE /api/sandboxes/{sandbox_id} → destroy Pod + Service."""
+        try:
+            resp = requests.delete(
+                f"{self._provisioner_url}/api/sandboxes/{sandbox_id}",
+                timeout=15,
+            )
+            if resp.ok:
+                logger.info(f"Provisioner destroyed sandbox {sandbox_id}")
+            else:
+                logger.warning(f"Provisioner destroy returned {resp.status_code}: {resp.text}")
+        except requests.RequestException as exc:
+            logger.warning(f"Provisioner destroy failed for {sandbox_id}: {exc}")
+
+    def _provisioner_is_alive(self, sandbox_id: str) -> bool:
+        """GET /api/sandboxes/{sandbox_id} → check Pod phase."""
+        try:
+            resp = requests.get(
+                f"{self._provisioner_url}/api/sandboxes/{sandbox_id}",
+                timeout=10,
+            )
+            if resp.ok:
+                data = resp.json()
+                return data.get("status") == "Running"
+            return False
+        except requests.RequestException:
+            return False
+
+    def _provisioner_discover(self, sandbox_id: str) -> SandboxInfo | None:
+        """GET /api/sandboxes/{sandbox_id} → discover existing sandbox."""
+        try:
+            resp = requests.get(
+                f"{self._provisioner_url}/api/sandboxes/{sandbox_id}",
+                timeout=10,
+            )
+            if resp.status_code == 404:
+                return None
+            resp.raise_for_status()
+            data = resp.json()
+            return SandboxInfo(
+                sandbox_id=sandbox_id,
+                sandbox_url=data["sandbox_url"],
+            )
+        except requests.RequestException as exc:
+            logger.debug(f"Provisioner discover failed for {sandbox_id}: {exc}")
+            return None
@@ -0,0 +1,41 @@
+"""Sandbox metadata for cross-process discovery and state persistence."""
+
+from __future__ import annotations
+
+import time
+from dataclasses import dataclass, field
+
+
+@dataclass
+class SandboxInfo:
+    """Persisted sandbox metadata that enables cross-process discovery.
+
+    This dataclass holds all the information needed to reconnect to an
+    existing sandbox from a different process (e.g., gateway vs langgraph,
+    multiple workers, or across K8s pods with shared storage).
+    """
+
+    sandbox_id: str
+    sandbox_url: str  # e.g. http://localhost:8080 or http://k3s:30001
+    container_name: str | None = None  # Only for local container backend
+    container_id: str | None = None  # Only for local container backend
+    created_at: float = field(default_factory=time.time)
+
+    def to_dict(self) -> dict:
+        return {
+            "sandbox_id": self.sandbox_id,
+            "sandbox_url": self.sandbox_url,
+            "container_name": self.container_name,
+            "container_id": self.container_id,
+            "created_at": self.created_at,
+        }
+
+    @classmethod
+    def from_dict(cls, data: dict) -> SandboxInfo:
+        return cls(
+            sandbox_id=data["sandbox_id"],
+            sandbox_url=data.get("sandbox_url", data.get("base_url", "")),
+            container_name=data.get("container_name"),
+            container_id=data.get("container_id"),
+            created_at=data.get("created_at", time.time()),
+        )
@@ -0,0 +1,70 @@
+"""Abstract base class for sandbox state persistence.
+
+The state store handles cross-process persistence of thread_id → sandbox mappings,
+enabling different processes (gateway, langgraph, multiple workers) to find the same
+sandbox for a given thread.
+"""
+
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from collections.abc import Generator
+from contextlib import contextmanager
+
+from .sandbox_info import SandboxInfo
+
+
+class SandboxStateStore(ABC):
+    """Abstract base for persisting thread_id → sandbox mappings across processes.
+
+    Implementations:
+    - FileSandboxStateStore: JSON files + fcntl file locking (single-host)
+    - TODO: RedisSandboxStateStore: Redis-based for distributed multi-host deployments
+    """
+
+    @abstractmethod
+    def save(self, thread_id: str, info: SandboxInfo) -> None:
+        """Save sandbox state for a thread.
+
+        Args:
+            thread_id: The thread ID.
+            info: Sandbox metadata to persist.
+        """
+        ...
+
+    @abstractmethod
+    def load(self, thread_id: str) -> SandboxInfo | None:
+        """Load sandbox state for a thread.
+
+        Args:
+            thread_id: The thread ID.
+
+        Returns:
+            SandboxInfo if found, None otherwise.
+        """
+        ...
+
+    @abstractmethod
+    def remove(self, thread_id: str) -> None:
+        """Remove sandbox state for a thread.
+
+        Args:
+            thread_id: The thread ID.
+        """
+        ...
+
+    @abstractmethod
+    @contextmanager
+    def lock(self, thread_id: str) -> Generator[None, None, None]:
+        """Acquire a cross-process lock for a thread's sandbox operations.
+
+        Ensures only one process can create/modify a sandbox for a given
+        thread_id at a time, preventing duplicate sandbox creation.
+
+        Args:
+            thread_id: The thread ID to lock.
+
+        Yields:
+            None — use as a context manager.
+        """
+        ...
@@ -0,0 +1,73 @@
+import json
+
+from firecrawl import FirecrawlApp
+from langchain.tools import tool
+
+from src.config import get_app_config
+
+
+def _get_firecrawl_client() -> FirecrawlApp:
+    config = get_app_config().get_tool_config("web_search")
+    api_key = None
+    if config is not None:
+        api_key = config.model_extra.get("api_key")
+    return FirecrawlApp(api_key=api_key)  # type: ignore[arg-type]
+
+
+@tool("web_search", parse_docstring=True)
+def web_search_tool(query: str) -> str:
+    """Search the web.
+
+    Args:
+        query: The query to search for.
+    """
+    try:
+        config = get_app_config().get_tool_config("web_search")
+        max_results = 5
+        if config is not None:
+            max_results = config.model_extra.get("max_results", max_results)
+
+        client = _get_firecrawl_client()
+        result = client.search(query, limit=max_results)
+
+        # result.web contains list of SearchResultWeb objects
+        web_results = result.web or []
+        normalized_results = [
+            {
+                "title": getattr(item, "title", "") or "",
+                "url": getattr(item, "url", "") or "",
+                "snippet": getattr(item, "description", "") or "",
+            }
+            for item in web_results
+        ]
+        json_results = json.dumps(normalized_results, indent=2, ensure_ascii=False)
+        return json_results
+    except Exception as e:
+        return f"Error: {str(e)}"
+
+
+@tool("web_fetch", parse_docstring=True)
+def web_fetch_tool(url: str) -> str:
+    """Fetch the contents of a web page at a given URL.
+    Only fetch EXACT URLs that have been provided directly by the user or have been returned in results from the web_search and web_fetch tools.
+    This tool can NOT access content that requires authentication, such as private Google Docs or pages behind login walls.
+    Do NOT add www. to URLs that do NOT have them.
+    URLs must include the schema: https://example.com is a valid URL while example.com is an invalid URL.
+
+    Args:
+        url: The URL to fetch the contents of.
+    """
+    try:
+        client = _get_firecrawl_client()
+        result = client.scrape(url, formats=["markdown"])
+
+        markdown_content = result.markdown or ""
+        metadata = result.metadata
+        title = metadata.title if metadata and metadata.title else "Untitled"
+
+        if not markdown_content:
+            return "Error: No content found"
+    except Exception as e:
+        return f"Error: {str(e)}"
+
+    return f"# {title}\n\n{markdown_content[:4096]}"
@@ -0,0 +1,3 @@
+from .tools import image_search_tool
+
+__all__ = ["image_search_tool"]
@@ -0,0 +1,135 @@
+"""
+Image Search Tool - Search images using DuckDuckGo for reference in image generation.
+"""
+
+import json
+import logging
+
+from langchain.tools import tool
+
+from src.config import get_app_config
+
+logger = logging.getLogger(__name__)
+
+
+def _search_images(
+    query: str,
+    max_results: int = 5,
+    region: str = "wt-wt",
+    safesearch: str = "moderate",
+    size: str | None = None,
+    color: str | None = None,
+    type_image: str | None = None,
+    layout: str | None = None,
+    license_image: str | None = None,
+) -> list[dict]:
+    """
+    Execute image search using DuckDuckGo.
+
+    Args:
+        query: Search keywords
+        max_results: Maximum number of results
+        region: Search region
+        safesearch: Safe search level
+        size: Image size (Small/Medium/Large/Wallpaper)
+        color: Color filter
+        type_image: Image type (photo/clipart/gif/transparent/line)
+        layout: Layout (Square/Tall/Wide)
+        license_image: License filter
+
+    Returns:
+        List of search results
+    """
+    try:
+        from ddgs import DDGS
+    except ImportError:
+        logger.error("ddgs library not installed. Run: pip install ddgs")
+        return []
+
+    ddgs = DDGS(timeout=30)
+
+    try:
+        kwargs = {
+            "region": region,
+            "safesearch": safesearch,
+            "max_results": max_results,
+        }
+
+        if size:
+            kwargs["size"] = size
+        if color:
+            kwargs["color"] = color
+        if type_image:
+            kwargs["type_image"] = type_image
+        if layout:
+            kwargs["layout"] = layout
+        if license_image:
+            kwargs["license_image"] = license_image
+
+        results = ddgs.images(query, **kwargs)
+        return list(results) if results else []
+
+    except Exception as e:
+        logger.error(f"Failed to search images: {e}")
+        return []
+
+
+@tool("image_search", parse_docstring=True)
+def image_search_tool(
+    query: str,
+    max_results: int = 5,
+    size: str | None = None,
+    type_image: str | None = None,
+    layout: str | None = None,
+) -> str:
+    """Search for images online. Use this tool BEFORE image generation to find reference images for characters, portraits, objects, scenes, or any content requiring visual accuracy.
+
+    **When to use:**
+    - Before generating character/portrait images: search for similar poses, expressions, styles
+    - Before generating specific objects/products: search for accurate visual references
+    - Before generating scenes/locations: search for architectural or environmental references
+    - Before generating fashion/clothing: search for style and detail references
+
+    The returned image URLs can be used as reference images in image generation to significantly improve quality.
+
+    Args:
+        query: Search keywords describing the images you want to find. Be specific for better results (e.g., "Japanese woman street photography 1990s" instead of just "woman").
+        max_results: Maximum number of images to return. Default is 5.
+        size: Image size filter. Options: "Small", "Medium", "Large", "Wallpaper". Use "Large" for reference images.
+        type_image: Image type filter. Options: "photo", "clipart", "gif", "transparent", "line". Use "photo" for realistic references.
+        layout: Layout filter. Options: "Square", "Tall", "Wide". Choose based on your generation needs.
+    """
+    config = get_app_config().get_tool_config("image_search")
+
+    # Override max_results from config if set
+    if config is not None and "max_results" in config.model_extra:
+        max_results = config.model_extra.get("max_results", max_results)
+
+    results = _search_images(
+        query=query,
+        max_results=max_results,
+        size=size,
+        type_image=type_image,
+        layout=layout,
+    )
+
+    if not results:
+        return json.dumps({"error": "No images found", "query": query}, ensure_ascii=False)
+
+    normalized_results = [
+        {
+            "title": r.get("title", ""),
+            "image_url": r.get("thumbnail", ""),
+            "thumbnail_url": r.get("thumbnail", ""),
+        }
+        for r in results
+    ]
+
+    output = {
+        "query": query,
+        "total_results": len(normalized_results),
+        "results": normalized_results,
+        "usage_hint": "Use the 'image_url' values as reference images in image generation. Download them first if needed.",
+    }
+
+    return json.dumps(output, indent=2, ensure_ascii=False)
@@ -0,0 +1,38 @@
+import logging
+import os
+
+import requests
+
+logger = logging.getLogger(__name__)
+
+
+class JinaClient:
+    def crawl(self, url: str, return_format: str = "html", timeout: int = 10) -> str:
+        headers = {
+            "Content-Type": "application/json",
+            "X-Return-Format": return_format,
+            "X-Timeout": str(timeout),
+        }
+        if os.getenv("JINA_API_KEY"):
+            headers["Authorization"] = f"Bearer {os.getenv('JINA_API_KEY')}"
+        else:
+            logger.warning("Jina API key is not set. Provide your own key to access a higher rate limit. See https://jina.ai/reader for more information.")
+        data = {"url": url}
+        try:
+            response = requests.post("https://r.jina.ai/", headers=headers, json=data)
+
+            if response.status_code != 200:
+                error_message = f"Jina API returned status {response.status_code}: {response.text}"
+                logger.error(error_message)
+                return f"Error: {error_message}"
+
+            if not response.text or not response.text.strip():
+                error_message = "Jina API returned empty response"
+                logger.error(error_message)
+                return f"Error: {error_message}"
+
+            return response.text
+        except Exception as e:
+            error_message = f"Request to Jina API failed: {str(e)}"
+            logger.error(error_message)
+            return f"Error: {error_message}"
@@ -0,0 +1,28 @@
+from langchain.tools import tool
+
+from src.community.jina_ai.jina_client import JinaClient
+from src.config import get_app_config
+from src.utils.readability import ReadabilityExtractor
+
+readability_extractor = ReadabilityExtractor()
+
+
+@tool("web_fetch", parse_docstring=True)
+def web_fetch_tool(url: str) -> str:
+    """Fetch the contents of a web page at a given URL.
+    Only fetch EXACT URLs that have been provided directly by the user or have been returned in results from the web_search and web_fetch tools.
+    This tool can NOT access content that requires authentication, such as private Google Docs or pages behind login walls.
+    Do NOT add www. to URLs that do NOT have them.
+    URLs must include the schema: https://example.com is a valid URL while example.com is an invalid URL.
+
+    Args:
+        url: The URL to fetch the contents of.
+    """
+    jina_client = JinaClient()
+    timeout = 10
+    config = get_app_config().get_tool_config("web_fetch")
+    if config is not None and "timeout" in config.model_extra:
+        timeout = config.model_extra.get("timeout")
+    html_content = jina_client.crawl(url, return_format="html", timeout=timeout)
+    article = readability_extractor.extract_article(html_content)
+    return article.to_markdown()[:4096]
@@ -0,0 +1,62 @@
+import json
+
+from langchain.tools import tool
+from tavily import TavilyClient
+
+from src.config import get_app_config
+
+
+def _get_tavily_client() -> TavilyClient:
+    config = get_app_config().get_tool_config("web_search")
+    api_key = None
+    if config is not None and "api_key" in config.model_extra:
+        api_key = config.model_extra.get("api_key")
+    return TavilyClient(api_key=api_key)
+
+
+@tool("web_search", parse_docstring=True)
+def web_search_tool(query: str) -> str:
+    """Search the web.
+
+    Args:
+        query: The query to search for.
+    """
+    config = get_app_config().get_tool_config("web_search")
+    max_results = 5
+    if config is not None and "max_results" in config.model_extra:
+        max_results = config.model_extra.get("max_results")
+
+    client = _get_tavily_client()
+    res = client.search(query, max_results=max_results)
+    normalized_results = [
+        {
+            "title": result["title"],
+            "url": result["url"],
+            "snippet": result["content"],
+        }
+        for result in res["results"]
+    ]
+    json_results = json.dumps(normalized_results, indent=2, ensure_ascii=False)
+    return json_results
+
+
+@tool("web_fetch", parse_docstring=True)
+def web_fetch_tool(url: str) -> str:
+    """Fetch the contents of a web page at a given URL.
+    Only fetch EXACT URLs that have been provided directly by the user or have been returned in results from the web_search and web_fetch tools.
+    This tool can NOT access content that requires authentication, such as private Google Docs or pages behind login walls.
+    Do NOT add www. to URLs that do NOT have them.
+    URLs must include the schema: https://example.com is a valid URL while example.com is an invalid URL.
+
+    Args:
+        url: The URL to fetch the contents of.
+    """
+    client = _get_tavily_client()
+    res = client.extract([url])
+    if "failed_results" in res and len(res["failed_results"]) > 0:
+        return f"Error: {res['failed_results'][0]['error']}"
+    elif "results" in res and len(res["results"]) > 0:
+        result = res["results"][0]
+        return f"# {result['title']}\n\n{result['raw_content'][:4096]}"
+    else:
+        return "Error: No results found"
@@ -0,0 +1,19 @@
+from .app_config import get_app_config
+from .extensions_config import ExtensionsConfig, get_extensions_config
+from .memory_config import MemoryConfig, get_memory_config
+from .paths import Paths, get_paths
+from .skills_config import SkillsConfig
+from .tracing_config import get_tracing_config, is_tracing_enabled
+
+__all__ = [
+    "get_app_config",
+    "Paths",
+    "get_paths",
+    "SkillsConfig",
+    "ExtensionsConfig",
+    "get_extensions_config",
+    "MemoryConfig",
+    "get_memory_config",
+    "get_tracing_config",
+    "is_tracing_enabled",
+]
@@ -0,0 +1,120 @@
+"""Configuration and loaders for custom agents."""
+
+import logging
+import re
+from typing import Any
+
+import yaml
+from pydantic import BaseModel
+
+from src.config.paths import get_paths
+
+logger = logging.getLogger(__name__)
+
+SOUL_FILENAME = "SOUL.md"
+AGENT_NAME_PATTERN = re.compile(r"^[A-Za-z0-9-]+$")
+
+
+class AgentConfig(BaseModel):
+    """Configuration for a custom agent."""
+
+    name: str
+    description: str = ""
+    model: str | None = None
+    tool_groups: list[str] | None = None
+
+
+def load_agent_config(name: str | None) -> AgentConfig | None:
+    """Load the custom or default agent's config from its directory.
+
+    Args:
+        name: The agent name.
+
+    Returns:
+        AgentConfig instance.
+
+    Raises:
+        FileNotFoundError: If the agent directory or config.yaml does not exist.
+        ValueError: If config.yaml cannot be parsed.
+    """
+
+    if name is None:
+        return None
+
+    if not AGENT_NAME_PATTERN.match(name):
+        raise ValueError(f"Invalid agent name '{name}'. Must match pattern: {AGENT_NAME_PATTERN.pattern}")
+    agent_dir = get_paths().agent_dir(name)
+    config_file = agent_dir / "config.yaml"
+
+    if not agent_dir.exists():
+        raise FileNotFoundError(f"Agent directory not found: {agent_dir}")
+
+    if not config_file.exists():
+        raise FileNotFoundError(f"Agent config not found: {config_file}")
+
+    try:
+        with open(config_file, encoding="utf-8") as f:
+            data: dict[str, Any] = yaml.safe_load(f) or {}
+    except yaml.YAMLError as e:
+        raise ValueError(f"Failed to parse agent config {config_file}: {e}") from e
+
+    # Ensure name is set from directory name if not in file
+    if "name" not in data:
+        data["name"] = name
+
+    # Strip unknown fields before passing to Pydantic (e.g. legacy prompt_file)
+    known_fields = set(AgentConfig.model_fields.keys())
+    data = {k: v for k, v in data.items() if k in known_fields}
+
+    return AgentConfig(**data)
+
+
+def load_agent_soul(agent_name: str | None) -> str | None:
+    """Read the SOUL.md file for a custom agent, if it exists.
+
+    SOUL.md defines the agent's personality, values, and behavioral guardrails.
+    It is injected into the lead agent's system prompt as additional context.
+
+    Args:
+        agent_name: The name of the agent or None for the default agent.
+
+    Returns:
+        The SOUL.md content as a string, or None if the file does not exist.
+    """
+    agent_dir = get_paths().agent_dir(agent_name) if agent_name else get_paths().base_dir
+    soul_path = agent_dir / SOUL_FILENAME
+    if not soul_path.exists():
+        return None
+    content = soul_path.read_text(encoding="utf-8").strip()
+    return content or None
+
+
+def list_custom_agents() -> list[AgentConfig]:
+    """Scan the agents directory and return all valid custom agents.
+
+    Returns:
+        List of AgentConfig for each valid agent directory found.
+    """
+    agents_dir = get_paths().agents_dir
+
+    if not agents_dir.exists():
+        return []
+
+    agents: list[AgentConfig] = []
+
+    for entry in sorted(agents_dir.iterdir()):
+        if not entry.is_dir():
+            continue
+
+        config_file = entry / "config.yaml"
+        if not config_file.exists():
+            logger.debug(f"Skipping {entry.name}: no config.yaml")
+            continue
+
+        try:
+            agent_cfg = load_agent_config(entry.name)
+            agents.append(agent_cfg)
+        except Exception as e:
+            logger.warning(f"Skipping agent '{entry.name}': {e}")
+
+    return agents
@@ -0,0 +1,214 @@
+import os
+from pathlib import Path
+from typing import Any, Self
+
+import yaml
+from dotenv import load_dotenv
+from pydantic import BaseModel, ConfigDict, Field
+
+from src.config.extensions_config import ExtensionsConfig
+from src.config.memory_config import load_memory_config_from_dict
+from src.config.model_config import ModelConfig
+from src.config.sandbox_config import SandboxConfig
+from src.config.skills_config import SkillsConfig
+from src.config.subagents_config import load_subagents_config_from_dict
+from src.config.summarization_config import load_summarization_config_from_dict
+from src.config.title_config import load_title_config_from_dict
+from src.config.tool_config import ToolConfig, ToolGroupConfig
+
+load_dotenv()
+
+
+class AppConfig(BaseModel):
+    """Config for the DeerFlow application"""
+
+    models: list[ModelConfig] = Field(default_factory=list, description="Available models")
+    sandbox: SandboxConfig = Field(description="Sandbox configuration")
+    tools: list[ToolConfig] = Field(default_factory=list, description="Available tools")
+    tool_groups: list[ToolGroupConfig] = Field(default_factory=list, description="Available tool groups")
+    skills: SkillsConfig = Field(default_factory=SkillsConfig, description="Skills configuration")
+    extensions: ExtensionsConfig = Field(default_factory=ExtensionsConfig, description="Extensions configuration (MCP servers and skills state)")
+    model_config = ConfigDict(extra="allow", frozen=False)
+
+    @classmethod
+    def resolve_config_path(cls, config_path: str | None = None) -> Path:
+        """Resolve the config file path.
+
+        Priority:
+        1. If provided `config_path` argument, use it.
+        2. If provided `DEER_FLOW_CONFIG_PATH` environment variable, use it.
+        3. Otherwise, first check the `config.yaml` in the current directory, then fallback to `config.yaml` in the parent directory.
+        """
+        if config_path:
+            path = Path(config_path)
+            if not Path.exists(path):
+                raise FileNotFoundError(f"Config file specified by param `config_path` not found at {path}")
+            return path
+        elif os.getenv("DEER_FLOW_CONFIG_PATH"):
+            path = Path(os.getenv("DEER_FLOW_CONFIG_PATH"))
+            if not Path.exists(path):
+                raise FileNotFoundError(f"Config file specified by environment variable `DEER_FLOW_CONFIG_PATH` not found at {path}")
+            return path
+        else:
+            # Check if the config.yaml is in the current directory
+            path = Path(os.getcwd()) / "config.yaml"
+            if not path.exists():
+                # Check if the config.yaml is in the parent directory of CWD
+                path = Path(os.getcwd()).parent / "config.yaml"
+                if not path.exists():
+                    raise FileNotFoundError("`config.yaml` file not found at the current directory nor its parent directory")
+            return path
+
+    @classmethod
+    def from_file(cls, config_path: str | None = None) -> Self:
+        """Load config from YAML file.
+
+        See `resolve_config_path` for more details.
+
+        Args:
+            config_path: Path to the config file.
+
+        Returns:
+            AppConfig: The loaded config.
+        """
+        resolved_path = cls.resolve_config_path(config_path)
+        with open(resolved_path, encoding="utf-8") as f:
+            config_data = yaml.safe_load(f)
+        config_data = cls.resolve_env_variables(config_data)
+
+        # Load title config if present
+        if "title" in config_data:
+            load_title_config_from_dict(config_data["title"])
+
+        # Load summarization config if present
+        if "summarization" in config_data:
+            load_summarization_config_from_dict(config_data["summarization"])
+
+        # Load memory config if present
+        if "memory" in config_data:
+            load_memory_config_from_dict(config_data["memory"])
+
+        # Load subagents config if present
+        if "subagents" in config_data:
+            load_subagents_config_from_dict(config_data["subagents"])
+
+        # Load extensions config separately (it's in a different file)
+        extensions_config = ExtensionsConfig.from_file()
+        config_data["extensions"] = extensions_config.model_dump()
+
+        result = cls.model_validate(config_data)
+        return result
+
+    @classmethod
+    def resolve_env_variables(cls, config: Any) -> Any:
+        """Recursively resolve environment variables in the config.
+
+        Environment variables are resolved using the `os.getenv` function. Example: $OPENAI_API_KEY
+
+        Args:
+            config: The config to resolve environment variables in.
+
+        Returns:
+            The config with environment variables resolved.
+        """
+        if isinstance(config, str):
+            if config.startswith("$"):
+                env_value = os.getenv(config[1:])
+                if env_value is None:
+                    raise ValueError(f"Environment variable {config[1:]} not found for config value {config}")
+                return env_value
+            return config
+        elif isinstance(config, dict):
+            return {k: cls.resolve_env_variables(v) for k, v in config.items()}
+        elif isinstance(config, list):
+            return [cls.resolve_env_variables(item) for item in config]
+        return config
+
+    def get_model_config(self, name: str) -> ModelConfig | None:
+        """Get the model config by name.
+
+        Args:
+            name: The name of the model to get the config for.
+
+        Returns:
+            The model config if found, otherwise None.
+        """
+        return next((model for model in self.models if model.name == name), None)
+
+    def get_tool_config(self, name: str) -> ToolConfig | None:
+        """Get the tool config by name.
+
+        Args:
+            name: The name of the tool to get the config for.
+
+        Returns:
+            The tool config if found, otherwise None.
+        """
+        return next((tool for tool in self.tools if tool.name == name), None)
+
+    def get_tool_group_config(self, name: str) -> ToolGroupConfig | None:
+        """Get the tool group config by name.
+
+        Args:
+            name: The name of the tool group to get the config for.
+
+        Returns:
+            The tool group config if found, otherwise None.
+        """
+        return next((group for group in self.tool_groups if group.name == name), None)
+
+
+_app_config: AppConfig | None = None
+
+
+def get_app_config() -> AppConfig:
+    """Get the DeerFlow config instance.
+
+    Returns a cached singleton instance. Use `reload_app_config()` to reload
+    from file, or `reset_app_config()` to clear the cache.
+    """
+    global _app_config
+    if _app_config is None:
+        _app_config = AppConfig.from_file()
+    return _app_config
+
+
+def reload_app_config(config_path: str | None = None) -> AppConfig:
+    """Reload the config from file and update the cached instance.
+
+    This is useful when the config file has been modified and you want
+    to pick up the changes without restarting the application.
+
+    Args:
+        config_path: Optional path to config file. If not provided,
+                     uses the default resolution strategy.
+
+    Returns:
+        The newly loaded AppConfig instance.
+    """
+    global _app_config
+    _app_config = AppConfig.from_file(config_path)
+    return _app_config
+
+
+def reset_app_config() -> None:
+    """Reset the cached config instance.
+
+    This clears the singleton cache, causing the next call to
+    `get_app_config()` to reload from file. Useful for testing
+    or when switching between different configurations.
+    """
+    global _app_config
+    _app_config = None
+
+
+def set_app_config(config: AppConfig) -> None:
+    """Set a custom config instance.
+
+    This allows injecting a custom or mock config for testing purposes.
+
+    Args:
+        config: The AppConfig instance to use.
+    """
+    global _app_config
+    _app_config = config
@@ -0,0 +1,250 @@
+"""Unified extensions configuration for MCP servers and skills."""
+
+import json
+import os
+from pathlib import Path
+from typing import Any, Literal
+
+from pydantic import BaseModel, ConfigDict, Field
+
+
+class McpOAuthConfig(BaseModel):
+    """OAuth configuration for an MCP server (HTTP/SSE transports)."""
+
+    enabled: bool = Field(default=True, description="Whether OAuth token injection is enabled")
+    token_url: str = Field(description="OAuth token endpoint URL")
+    grant_type: Literal["client_credentials", "refresh_token"] = Field(
+        default="client_credentials",
+        description="OAuth grant type",
+    )
+    client_id: str | None = Field(default=None, description="OAuth client ID")
+    client_secret: str | None = Field(default=None, description="OAuth client secret")
+    refresh_token: str | None = Field(default=None, description="OAuth refresh token (for refresh_token grant)")
+    scope: str | None = Field(default=None, description="OAuth scope")
+    audience: str | None = Field(default=None, description="OAuth audience (provider-specific)")
+    token_field: str = Field(default="access_token", description="Field name containing access token in token response")
+    token_type_field: str = Field(default="token_type", description="Field name containing token type in token response")
+    expires_in_field: str = Field(default="expires_in", description="Field name containing expiry (seconds) in token response")
+    default_token_type: str = Field(default="Bearer", description="Default token type when missing in token response")
+    refresh_skew_seconds: int = Field(default=60, description="Refresh token this many seconds before expiry")
+    extra_token_params: dict[str, str] = Field(default_factory=dict, description="Additional form params sent to token endpoint")
+    model_config = ConfigDict(extra="allow")
+
+
+class McpServerConfig(BaseModel):
+    """Configuration for a single MCP server."""
+
+    enabled: bool = Field(default=True, description="Whether this MCP server is enabled")
+    type: str = Field(default="stdio", description="Transport type: 'stdio', 'sse', or 'http'")
+    command: str | None = Field(default=None, description="Command to execute to start the MCP server (for stdio type)")
+    args: list[str] = Field(default_factory=list, description="Arguments to pass to the command (for stdio type)")
+    env: dict[str, str] = Field(default_factory=dict, description="Environment variables for the MCP server")
+    url: str | None = Field(default=None, description="URL of the MCP server (for sse or http type)")
+    headers: dict[str, str] = Field(default_factory=dict, description="HTTP headers to send (for sse or http type)")
+    oauth: McpOAuthConfig | None = Field(default=None, description="OAuth configuration (for sse or http type)")
+    description: str = Field(default="", description="Human-readable description of what this MCP server provides")
+    model_config = ConfigDict(extra="allow")
+
+
+class SkillStateConfig(BaseModel):
+    """Configuration for a single skill's state."""
+
+    enabled: bool = Field(default=True, description="Whether this skill is enabled")
+
+
+class ExtensionsConfig(BaseModel):
+    """Unified configuration for MCP servers and skills."""
+
+    mcp_servers: dict[str, McpServerConfig] = Field(
+        default_factory=dict,
+        description="Map of MCP server name to configuration",
+        alias="mcpServers",
+    )
+    skills: dict[str, SkillStateConfig] = Field(
+        default_factory=dict,
+        description="Map of skill name to state configuration",
+    )
+    model_config = ConfigDict(extra="allow", populate_by_name=True)
+
+    @classmethod
+    def resolve_config_path(cls, config_path: str | None = None) -> Path | None:
+        """Resolve the extensions config file path.
+
+        Priority:
+        1. If provided `config_path` argument, use it.
+        2. If provided `DEER_FLOW_EXTENSIONS_CONFIG_PATH` environment variable, use it.
+        3. Otherwise, check for `extensions_config.json` in the current directory, then in the parent directory.
+        4. For backward compatibility, also check for `mcp_config.json` if `extensions_config.json` is not found.
+        5. If not found, return None (extensions are optional).
+
+        Args:
+            config_path: Optional path to extensions config file.
+
+        Returns:
+            Path to the extensions config file if found, otherwise None.
+        """
+        if config_path:
+            path = Path(config_path)
+            if not path.exists():
+                raise FileNotFoundError(f"Extensions config file specified by param `config_path` not found at {path}")
+            return path
+        elif os.getenv("DEER_FLOW_EXTENSIONS_CONFIG_PATH"):
+            path = Path(os.getenv("DEER_FLOW_EXTENSIONS_CONFIG_PATH"))
+            if not path.exists():
+                raise FileNotFoundError(f"Extensions config file specified by environment variable `DEER_FLOW_EXTENSIONS_CONFIG_PATH` not found at {path}")
+            return path
+        else:
+            # Check if the extensions_config.json is in the current directory
+            path = Path(os.getcwd()) / "extensions_config.json"
+            if path.exists():
+                return path
+
+            # Check if the extensions_config.json is in the parent directory of CWD
+            path = Path(os.getcwd()).parent / "extensions_config.json"
+            if path.exists():
+                return path
+
+            # Backward compatibility: check for mcp_config.json
+            path = Path(os.getcwd()) / "mcp_config.json"
+            if path.exists():
+                return path
+
+            path = Path(os.getcwd()).parent / "mcp_config.json"
+            if path.exists():
+                return path
+
+            # Extensions are optional, so return None if not found
+            return None
+
+    @classmethod
+    def from_file(cls, config_path: str | None = None) -> "ExtensionsConfig":
+        """Load extensions config from JSON file.
+
+        See `resolve_config_path` for more details.
+
+        Args:
+            config_path: Path to the extensions config file.
+
+        Returns:
+            ExtensionsConfig: The loaded config, or empty config if file not found.
+        """
+        resolved_path = cls.resolve_config_path(config_path)
+        if resolved_path is None:
+            # Return empty config if extensions config file is not found
+            return cls(mcp_servers={}, skills={})
+
+        with open(resolved_path, encoding="utf-8") as f:
+            config_data = json.load(f)
+
+        cls.resolve_env_variables(config_data)
+        return cls.model_validate(config_data)
+
+    @classmethod
+    def resolve_env_variables(cls, config: dict[str, Any]) -> dict[str, Any]:
+        """Recursively resolve environment variables in the config.
+
+        Environment variables are resolved using the `os.getenv` function. Example: $OPENAI_API_KEY
+
+        Args:
+            config: The config to resolve environment variables in.
+
+        Returns:
+            The config with environment variables resolved.
+        """
+        for key, value in config.items():
+            if isinstance(value, str):
+                if value.startswith("$"):
+                    env_value = os.getenv(value[1:])
+                    if env_value is None:
+                        raise ValueError(f"Environment variable {value[1:]} not found for config value {value}")
+                    config[key] = env_value
+                else:
+                    config[key] = value
+            elif isinstance(value, dict):
+                config[key] = cls.resolve_env_variables(value)
+            elif isinstance(value, list):
+                config[key] = [cls.resolve_env_variables(item) if isinstance(item, dict) else item for item in value]
+        return config
+
+    def get_enabled_mcp_servers(self) -> dict[str, McpServerConfig]:
+        """Get only the enabled MCP servers.
+
+        Returns:
+            Dictionary of enabled MCP servers.
+        """
+        return {name: config for name, config in self.mcp_servers.items() if config.enabled}
+
+    def is_skill_enabled(self, skill_name: str, skill_category: str) -> bool:
+        """Check if a skill is enabled.
+
+        Args:
+            skill_name: Name of the skill
+            skill_category: Category of the skill
+
+        Returns:
+            True if enabled, False otherwise
+        """
+        skill_config = self.skills.get(skill_name)
+        if skill_config is None:
+            # Default to enable for public & custom skill
+            return skill_category in ("public", "custom")
+        return skill_config.enabled
+
+
+_extensions_config: ExtensionsConfig | None = None
+
+
+def get_extensions_config() -> ExtensionsConfig:
+    """Get the extensions config instance.
+
+    Returns a cached singleton instance. Use `reload_extensions_config()` to reload
+    from file, or `reset_extensions_config()` to clear the cache.
+
+    Returns:
+        The cached ExtensionsConfig instance.
+    """
+    global _extensions_config
+    if _extensions_config is None:
+        _extensions_config = ExtensionsConfig.from_file()
+    return _extensions_config
+
+
+def reload_extensions_config(config_path: str | None = None) -> ExtensionsConfig:
+    """Reload the extensions config from file and update the cached instance.
+
+    This is useful when the config file has been modified and you want
+    to pick up the changes without restarting the application.
+
+    Args:
+        config_path: Optional path to extensions config file. If not provided,
+                     uses the default resolution strategy.
+
+    Returns:
+        The newly loaded ExtensionsConfig instance.
+    """
+    global _extensions_config
+    _extensions_config = ExtensionsConfig.from_file(config_path)
+    return _extensions_config
+
+
+def reset_extensions_config() -> None:
+    """Reset the cached extensions config instance.
+
+    This clears the singleton cache, causing the next call to
+    `get_extensions_config()` to reload from file. Useful for testing
+    or when switching between different configurations.
+    """
+    global _extensions_config
+    _extensions_config = None
+
+
+def set_extensions_config(config: ExtensionsConfig) -> None:
+    """Set a custom extensions config instance.
+
+    This allows injecting a custom or mock config for testing purposes.
+
+    Args:
+        config: The ExtensionsConfig instance to use.
+    """
+    global _extensions_config
+    _extensions_config = config
@@ -0,0 +1,78 @@
+"""Configuration for memory mechanism."""
+
+from pydantic import BaseModel, Field
+
+
+class MemoryConfig(BaseModel):
+    """Configuration for global memory mechanism."""
+
+    enabled: bool = Field(
+        default=True,
+        description="Whether to enable memory mechanism",
+    )
+    storage_path: str = Field(
+        default="",
+        description=(
+            "Path to store memory data. "
+            "If empty, defaults to `{base_dir}/memory.json` (see Paths.memory_file). "
+            "Absolute paths are used as-is. "
+            "Relative paths are resolved against `Paths.base_dir` "
+            "(not the backend working directory). "
+            "Note: if you previously set this to `.deer-flow/memory.json`, "
+            "the file will now be resolved as `{base_dir}/.deer-flow/memory.json`; "
+            "migrate existing data or use an absolute path to preserve the old location."
+        ),
+    )
+    debounce_seconds: int = Field(
+        default=30,
+        ge=1,
+        le=300,
+        description="Seconds to wait before processing queued updates (debounce)",
+    )
+    model_name: str | None = Field(
+        default=None,
+        description="Model name to use for memory updates (None = use default model)",
+    )
+    max_facts: int = Field(
+        default=100,
+        ge=10,
+        le=500,
+        description="Maximum number of facts to store",
+    )
+    fact_confidence_threshold: float = Field(
+        default=0.7,
+        ge=0.0,
+        le=1.0,
+        description="Minimum confidence threshold for storing facts",
+    )
+    injection_enabled: bool = Field(
+        default=True,
+        description="Whether to inject memory into system prompt",
+    )
+    max_injection_tokens: int = Field(
+        default=2000,
+        ge=100,
+        le=8000,
+        description="Maximum tokens to use for memory injection",
+    )
+
+
+# Global configuration instance
+_memory_config: MemoryConfig = MemoryConfig()
+
+
+def get_memory_config() -> MemoryConfig:
+    """Get the current memory configuration."""
+    return _memory_config
+
+
+def set_memory_config(config: MemoryConfig) -> None:
+    """Set the memory configuration."""
+    global _memory_config
+    _memory_config = config
+
+
+def load_memory_config_from_dict(config_dict: dict) -> None:
+    """Load memory configuration from a dictionary."""
+    global _memory_config
+    _memory_config = MemoryConfig(**config_dict)
@@ -0,0 +1,22 @@
+from pydantic import BaseModel, ConfigDict, Field
+
+
+class ModelConfig(BaseModel):
+    """Config section for a model"""
+
+    name: str = Field(..., description="Unique name for the model")
+    display_name: str | None = Field(..., default_factory=lambda: None, description="Display name for the model")
+    description: str | None = Field(..., default_factory=lambda: None, description="Description for the model")
+    use: str = Field(
+        ...,
+        description="Class path of the model provider(e.g. langchain_openai.ChatOpenAI)",
+    )
+    model: str = Field(..., description="Model name")
+    model_config = ConfigDict(extra="allow")
+    supports_thinking: bool = Field(default_factory=lambda: False, description="Whether the model supports thinking")
+    supports_reasoning_effort: bool = Field(default_factory=lambda: False, description="Whether the model supports reasoning effort")
+    when_thinking_enabled: dict | None = Field(
+        default_factory=lambda: None,
+        description="Extra settings to be passed to the model when thinking is enabled",
+    )
+    supports_vision: bool = Field(default_factory=lambda: False, description="Whether the model supports vision/image inputs")
@@ -0,0 +1,178 @@
+import os
+import re
+from pathlib import Path
+
+# Virtual path prefix seen by agents inside the sandbox
+VIRTUAL_PATH_PREFIX = "/mnt/user-data"
+
+_SAFE_THREAD_ID_RE = re.compile(r"^[A-Za-z0-9_\-]+$")
+
+
+class Paths:
+    """
+    Centralized path configuration for DeerFlow application data.
+
+    Directory layout (host side):
+        {base_dir}/
+        ├── memory.json
+        ├── USER.md          <-- global user profile (injected into all agents)
+        ├── agents/
+        │   └── {agent_name}/
+        │       ├── config.yaml
+        │       ├── SOUL.md  <-- agent personality/identity (injected alongside lead prompt)
+        │       └── memory.json
+        └── threads/
+            └── {thread_id}/
+                └── user-data/         <-- mounted as /mnt/user-data/ inside sandbox
+                    ├── workspace/     <-- /mnt/user-data/workspace/
+                    ├── uploads/       <-- /mnt/user-data/uploads/
+                    └── outputs/       <-- /mnt/user-data/outputs/
+
+    BaseDir resolution (in priority order):
+        1. Constructor argument `base_dir`
+        2. DEER_FLOW_HOME environment variable
+        3. Local dev fallback: cwd/.deer-flow  (when cwd is the backend/ dir)
+        4. Default: $HOME/.deer-flow
+    """
+
+    def __init__(self, base_dir: str | Path | None = None) -> None:
+        self._base_dir = Path(base_dir).resolve() if base_dir is not None else None
+
+    @property
+    def base_dir(self) -> Path:
+        """Root directory for all application data."""
+        if self._base_dir is not None:
+            return self._base_dir
+
+        if env_home := os.getenv("DEER_FLOW_HOME"):
+            return Path(env_home).resolve()
+
+        cwd = Path.cwd()
+        if cwd.name == "backend" or (cwd / "pyproject.toml").exists():
+            return cwd / ".deer-flow"
+
+        return Path.home() / ".deer-flow"
+
+    @property
+    def memory_file(self) -> Path:
+        """Path to the persisted memory file: `{base_dir}/memory.json`."""
+        return self.base_dir / "memory.json"
+
+    @property
+    def user_md_file(self) -> Path:
+        """Path to the global user profile file: `{base_dir}/USER.md`."""
+        return self.base_dir / "USER.md"
+
+    @property
+    def agents_dir(self) -> Path:
+        """Root directory for all custom agents: `{base_dir}/agents/`."""
+        return self.base_dir / "agents"
+
+    def agent_dir(self, name: str) -> Path:
+        """Directory for a specific agent: `{base_dir}/agents/{name}/`."""
+        return self.agents_dir / name.lower()
+
+    def agent_memory_file(self, name: str) -> Path:
+        """Per-agent memory file: `{base_dir}/agents/{name}/memory.json`."""
+        return self.agent_dir(name) / "memory.json"
+
+    def thread_dir(self, thread_id: str) -> Path:
+        """
+        Host path for a thread's data: `{base_dir}/threads/{thread_id}/`
+
+        This directory contains a `user-data/` subdirectory that is mounted
+        as `/mnt/user-data/` inside the sandbox.
+
+        Raises:
+            ValueError: If `thread_id` contains unsafe characters (path separators
+                        or `..`) that could cause directory traversal.
+        """
+        if not _SAFE_THREAD_ID_RE.match(thread_id):
+            raise ValueError(f"Invalid thread_id {thread_id!r}: only alphanumeric characters, hyphens, and underscores are allowed.")
+        return self.base_dir / "threads" / thread_id
+
+    def sandbox_work_dir(self, thread_id: str) -> Path:
+        """
+        Host path for the agent's workspace directory.
+        Host: `{base_dir}/threads/{thread_id}/user-data/workspace/`
+        Sandbox: `/mnt/user-data/workspace/`
+        """
+        return self.thread_dir(thread_id) / "user-data" / "workspace"
+
+    def sandbox_uploads_dir(self, thread_id: str) -> Path:
+        """
+        Host path for user-uploaded files.
+        Host: `{base_dir}/threads/{thread_id}/user-data/uploads/`
+        Sandbox: `/mnt/user-data/uploads/`
+        """
+        return self.thread_dir(thread_id) / "user-data" / "uploads"
+
+    def sandbox_outputs_dir(self, thread_id: str) -> Path:
+        """
+        Host path for agent-generated artifacts.
+        Host: `{base_dir}/threads/{thread_id}/user-data/outputs/`
+        Sandbox: `/mnt/user-data/outputs/`
+        """
+        return self.thread_dir(thread_id) / "user-data" / "outputs"
+
+    def sandbox_user_data_dir(self, thread_id: str) -> Path:
+        """
+        Host path for the user-data root.
+        Host: `{base_dir}/threads/{thread_id}/user-data/`
+        Sandbox: `/mnt/user-data/`
+        """
+        return self.thread_dir(thread_id) / "user-data"
+
+    def ensure_thread_dirs(self, thread_id: str) -> None:
+        """Create all standard sandbox directories for a thread."""
+        self.sandbox_work_dir(thread_id).mkdir(parents=True, exist_ok=True)
+        self.sandbox_uploads_dir(thread_id).mkdir(parents=True, exist_ok=True)
+        self.sandbox_outputs_dir(thread_id).mkdir(parents=True, exist_ok=True)
+
+    def resolve_virtual_path(self, thread_id: str, virtual_path: str) -> Path:
+        """Resolve a sandbox virtual path to the actual host filesystem path.
+
+        Args:
+            thread_id: The thread ID.
+            virtual_path: Virtual path as seen inside the sandbox, e.g.
+                          ``/mnt/user-data/outputs/report.pdf``.
+                          Leading slashes are stripped before matching.
+
+        Returns:
+            The resolved absolute host filesystem path.
+
+        Raises:
+            ValueError: If the path does not start with the expected virtual
+                        prefix or a path-traversal attempt is detected.
+        """
+        stripped = virtual_path.lstrip("/")
+        prefix = VIRTUAL_PATH_PREFIX.lstrip("/")
+
+        # Require an exact segment-boundary match to avoid prefix confusion
+        # (e.g. reject paths like "mnt/user-dataX/...").
+        if stripped != prefix and not stripped.startswith(prefix + "/"):
+            raise ValueError(f"Path must start with /{prefix}")
+
+        relative = stripped[len(prefix) :].lstrip("/")
+        base = self.sandbox_user_data_dir(thread_id).resolve()
+        actual = (base / relative).resolve()
+
+        try:
+            actual.relative_to(base)
+        except ValueError:
+            raise ValueError("Access denied: path traversal detected")
+
+        return actual
+
+
+# ── Singleton ────────────────────────────────────────────────────────────
+
+_paths: Paths | None = None
+
+
+def get_paths() -> Paths:
+    """Return the global Paths singleton (lazy-initialized)."""
+    global _paths
+    if _paths is None:
+        _paths = Paths()
+    return _paths
@@ -0,0 +1,66 @@
+from pydantic import BaseModel, ConfigDict, Field
+
+
+class VolumeMountConfig(BaseModel):
+    """Configuration for a volume mount."""
+
+    host_path: str = Field(..., description="Path on the host machine")
+    container_path: str = Field(..., description="Path inside the container")
+    read_only: bool = Field(default=False, description="Whether the mount is read-only")
+
+
+class SandboxConfig(BaseModel):
+    """Config section for a sandbox.
+
+    Common options:
+        use: Class path of the sandbox provider (required)
+
+    AioSandboxProvider specific options:
+        image: Docker image to use (default: enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest)
+        port: Base port for sandbox containers (default: 8080)
+        base_url: If set, uses existing sandbox instead of starting new container
+        auto_start: Whether to automatically start Docker container (default: true)
+        container_prefix: Prefix for container names (default: deer-flow-sandbox)
+        idle_timeout: Idle timeout in seconds before sandbox is released (default: 600 = 10 minutes). Set to 0 to disable.
+        mounts: List of volume mounts to share directories with the container
+        environment: Environment variables to inject into the container (values starting with $ are resolved from host env)
+    """
+
+    use: str = Field(
+        ...,
+        description="Class path of the sandbox provider (e.g. src.sandbox.local:LocalSandboxProvider)",
+    )
+    image: str | None = Field(
+        default=None,
+        description="Docker image to use for the sandbox container",
+    )
+    port: int | None = Field(
+        default=None,
+        description="Base port for sandbox containers",
+    )
+    base_url: str | None = Field(
+        default=None,
+        description="If set, uses existing sandbox at this URL instead of starting new container",
+    )
+    auto_start: bool | None = Field(
+        default=None,
+        description="Whether to automatically start Docker container",
+    )
+    container_prefix: str | None = Field(
+        default=None,
+        description="Prefix for container names",
+    )
+    idle_timeout: int | None = Field(
+        default=None,
+        description="Idle timeout in seconds before sandbox is released (default: 600 = 10 minutes). Set to 0 to disable.",
+    )
+    mounts: list[VolumeMountConfig] = Field(
+        default_factory=list,
+        description="List of volume mounts to share directories between host and container",
+    )
+    environment: dict[str, str] = Field(
+        default_factory=dict,
+        description="Environment variables to inject into the sandbox container. Values starting with $ will be resolved from host environment variables.",
+    )
+
+    model_config = ConfigDict(extra="allow")
@@ -0,0 +1,49 @@
+from pathlib import Path
+
+from pydantic import BaseModel, Field
+
+
+class SkillsConfig(BaseModel):
+    """Configuration for skills system"""
+
+    path: str | None = Field(
+        default=None,
+        description="Path to skills directory. If not specified, defaults to ../skills relative to backend directory",
+    )
+    container_path: str = Field(
+        default="/mnt/skills",
+        description="Path where skills are mounted in the sandbox container",
+    )
+
+    def get_skills_path(self) -> Path:
+        """
+        Get the resolved skills directory path.
+
+        Returns:
+            Path to the skills directory
+        """
+        if self.path:
+            # Use configured path (can be absolute or relative)
+            path = Path(self.path)
+            if not path.is_absolute():
+                # If relative, resolve from current working directory
+                path = Path.cwd() / path
+            return path.resolve()
+        else:
+            # Default: ../skills relative to backend directory
+            from src.skills.loader import get_skills_root_path
+
+            return get_skills_root_path()
+
+    def get_skill_container_path(self, skill_name: str, category: str = "public") -> str:
+        """
+        Get the full container path for a specific skill.
+
+        Args:
+            skill_name: Name of the skill (directory name)
+            category: Category of the skill (public or custom)
+
+        Returns:
+            Full path to the skill in the container
+        """
+        return f"{self.container_path}/{category}/{skill_name}"
@@ -0,0 +1,65 @@
+"""Configuration for the subagent system loaded from config.yaml."""
+
+import logging
+
+from pydantic import BaseModel, Field
+
+logger = logging.getLogger(__name__)
+
+
+class SubagentOverrideConfig(BaseModel):
+    """Per-agent configuration overrides."""
+
+    timeout_seconds: int | None = Field(
+        default=None,
+        ge=1,
+        description="Timeout in seconds for this subagent (None = use global default)",
+    )
+
+
+class SubagentsAppConfig(BaseModel):
+    """Configuration for the subagent system."""
+
+    timeout_seconds: int = Field(
+        default=900,
+        ge=1,
+        description="Default timeout in seconds for all subagents (default: 900 = 15 minutes)",
+    )
+    agents: dict[str, SubagentOverrideConfig] = Field(
+        default_factory=dict,
+        description="Per-agent configuration overrides keyed by agent name",
+    )
+
+    def get_timeout_for(self, agent_name: str) -> int:
+        """Get the effective timeout for a specific agent.
+
+        Args:
+            agent_name: The name of the subagent.
+
+        Returns:
+            The timeout in seconds, using per-agent override if set, otherwise global default.
+        """
+        override = self.agents.get(agent_name)
+        if override is not None and override.timeout_seconds is not None:
+            return override.timeout_seconds
+        return self.timeout_seconds
+
+
+_subagents_config: SubagentsAppConfig = SubagentsAppConfig()
+
+
+def get_subagents_app_config() -> SubagentsAppConfig:
+    """Get the current subagents configuration."""
+    return _subagents_config
+
+
+def load_subagents_config_from_dict(config_dict: dict) -> None:
+    """Load subagents configuration from a dictionary."""
+    global _subagents_config
+    _subagents_config = SubagentsAppConfig(**config_dict)
+
+    overrides_summary = {name: f"{override.timeout_seconds}s" for name, override in _subagents_config.agents.items() if override.timeout_seconds is not None}
+    if overrides_summary:
+        logger.info(f"Subagents config loaded: default timeout={_subagents_config.timeout_seconds}s, per-agent overrides={overrides_summary}")
+    else:
+        logger.info(f"Subagents config loaded: default timeout={_subagents_config.timeout_seconds}s, no per-agent overrides")
@@ -0,0 +1,74 @@
+"""Configuration for conversation summarization."""
+
+from typing import Literal
+
+from pydantic import BaseModel, Field
+
+ContextSizeType = Literal["fraction", "tokens", "messages"]
+
+
+class ContextSize(BaseModel):
+    """Context size specification for trigger or keep parameters."""
+
+    type: ContextSizeType = Field(description="Type of context size specification")
+    value: int | float = Field(description="Value for the context size specification")
+
+    def to_tuple(self) -> tuple[ContextSizeType, int | float]:
+        """Convert to tuple format expected by SummarizationMiddleware."""
+        return (self.type, self.value)
+
+
+class SummarizationConfig(BaseModel):
+    """Configuration for automatic conversation summarization."""
+
+    enabled: bool = Field(
+        default=False,
+        description="Whether to enable automatic conversation summarization",
+    )
+    model_name: str | None = Field(
+        default=None,
+        description="Model name to use for summarization (None = use a lightweight model)",
+    )
+    trigger: ContextSize | list[ContextSize] | None = Field(
+        default=None,
+        description="One or more thresholds that trigger summarization. When any threshold is met, summarization runs. "
+        "Examples: {'type': 'messages', 'value': 50} triggers at 50 messages, "
+        "{'type': 'tokens', 'value': 4000} triggers at 4000 tokens, "
+        "{'type': 'fraction', 'value': 0.8} triggers at 80% of model's max input tokens",
+    )
+    keep: ContextSize = Field(
+        default_factory=lambda: ContextSize(type="messages", value=20),
+        description="Context retention policy after summarization. Specifies how much history to preserve. "
+        "Examples: {'type': 'messages', 'value': 20} keeps 20 messages, "
+        "{'type': 'tokens', 'value': 3000} keeps 3000 tokens, "
+        "{'type': 'fraction', 'value': 0.3} keeps 30% of model's max input tokens",
+    )
+    trim_tokens_to_summarize: int | None = Field(
+        default=4000,
+        description="Maximum tokens to keep when preparing messages for summarization. Pass null to skip trimming.",
+    )
+    summary_prompt: str | None = Field(
+        default=None,
+        description="Custom prompt template for generating summaries. If not provided, uses the default LangChain prompt.",
+    )
+
+
+# Global configuration instance
+_summarization_config: SummarizationConfig = SummarizationConfig()
+
+
+def get_summarization_config() -> SummarizationConfig:
+    """Get the current summarization configuration."""
+    return _summarization_config
+
+
+def set_summarization_config(config: SummarizationConfig) -> None:
+    """Set the summarization configuration."""
+    global _summarization_config
+    _summarization_config = config
+
+
+def load_summarization_config_from_dict(config_dict: dict) -> None:
+    """Load summarization configuration from a dictionary."""
+    global _summarization_config
+    _summarization_config = SummarizationConfig(**config_dict)
@@ -0,0 +1,53 @@
+"""Configuration for automatic thread title generation."""
+
+from pydantic import BaseModel, Field
+
+
+class TitleConfig(BaseModel):
+    """Configuration for automatic thread title generation."""
+
+    enabled: bool = Field(
+        default=True,
+        description="Whether to enable automatic title generation",
+    )
+    max_words: int = Field(
+        default=6,
+        ge=1,
+        le=20,
+        description="Maximum number of words in the generated title",
+    )
+    max_chars: int = Field(
+        default=60,
+        ge=10,
+        le=200,
+        description="Maximum number of characters in the generated title",
+    )
+    model_name: str | None = Field(
+        default=None,
+        description="Model name to use for title generation (None = use default model)",
+    )
+    prompt_template: str = Field(
+        default=("Generate a concise title (max {max_words} words) for this conversation.\nUser: {user_msg}\nAssistant: {assistant_msg}\n\nReturn ONLY the title, no quotes, no explanation."),
+        description="Prompt template for title generation",
+    )
+
+
+# Global configuration instance
+_title_config: TitleConfig = TitleConfig()
+
+
+def get_title_config() -> TitleConfig:
+    """Get the current title configuration."""
+    return _title_config
+
+
+def set_title_config(config: TitleConfig) -> None:
+    """Set the title configuration."""
+    global _title_config
+    _title_config = config
+
+
+def load_title_config_from_dict(config_dict: dict) -> None:
+    """Load title configuration from a dictionary."""
+    global _title_config
+    _title_config = TitleConfig(**config_dict)
@@ -0,0 +1,20 @@
+from pydantic import BaseModel, ConfigDict, Field
+
+
+class ToolGroupConfig(BaseModel):
+    """Config section for a tool group"""
+
+    name: str = Field(..., description="Unique name for the tool group")
+    model_config = ConfigDict(extra="allow")
+
+
+class ToolConfig(BaseModel):
+    """Config section for a tool"""
+
+    name: str = Field(..., description="Unique name for the tool")
+    group: str = Field(..., description="Group name for the tool")
+    use: str = Field(
+        ...,
+        description="Variable name of the tool provider(e.g. src.sandbox.tools:bash_tool)",
+    )
+    model_config = ConfigDict(extra="allow")
--- a/Show More
+++ b/Show More