feat(tools): add youtube transcript integration via youtube-transcript-api

This commit is contained in:
Aneesh
2026-02-04 19:24:40 +05:30
parent 48b1e0e038
commit f2f0b4fc61
4 changed files with 146 additions and 0 deletions
+1
View File
@@ -28,6 +28,7 @@ dependencies = [
"fastmcp>=2.0.0",
"diff-match-patch>=20230430",
"python-dotenv>=1.0.0",
"youtube-transcript-api>=0.6.0",
]
[project.optional-dependencies]
+3
View File
@@ -38,6 +38,7 @@ from .file_system_toolkits.write_to_file import register_tools as register_write
from .pdf_read_tool import register_tools as register_pdf_read
from .web_scrape_tool import register_tools as register_web_scrape
from .web_search_tool import register_tools as register_web_search
from .youtube_transcript_tool.tool import register as register_youtube
def register_all_tools(
@@ -59,6 +60,7 @@ def register_all_tools(
register_example(mcp)
register_web_scrape(mcp)
register_pdf_read(mcp)
register_youtube(mcp)
# Tools that need credentials (pass credentials if provided)
# web_search supports multiple providers (Google, Brave) with auto-detection
@@ -80,6 +82,7 @@ def register_all_tools(
"web_search",
"web_scrape",
"pdf_read",
"get_youtube_transcript",
"view_file",
"write_to_file",
"list_dir",
@@ -0,0 +1,71 @@
from fastmcp import FastMCP
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api._errors import TranscriptsDisabled, NoTranscriptFound
import re
def get_youtube_transcript(video_url: str) -> str:
"""
Fetches the transcript for a YouTube video.
Args:
video_url: The URL of the YouTube video
Returns:
The transcript text as a single string, or an error message if unavailable
"""
try:
# Extract video ID from URL
video_id = extract_video_id(video_url)
if not video_id:
return "Error: Invalid YouTube URL"
# Get transcript
transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
# Join transcript entries into a single string
transcript_text = " ".join([entry['text'] for entry in transcript_list])
return transcript_text
except TranscriptsDisabled:
return "Error: Transcripts are disabled for this video"
except NoTranscriptFound:
return "Error: No transcript available"
except Exception as e:
return f"Error: {str(e)}"
def extract_video_id(url: str) -> str:
"""
Extracts the video ID from various YouTube URL formats.
Supports:
- https://www.youtube.com/watch?v=VIDEO_ID
- https://youtu.be/VIDEO_ID
- https://www.youtube.com/embed/VIDEO_ID
- https://www.youtube.com/v/VIDEO_ID
Args:
url: YouTube video URL
Returns:
The video ID, or empty string if not found
"""
patterns = [
r'(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/embed\/|youtube\.com\/v\/)([a-zA-Z0-9_-]{11})',
r'youtube\.com\/watch\?.*v=([a-zA-Z0-9_-]{11})'
]
for pattern in patterns:
match = re.search(pattern, url)
if match:
return match.group(1)
return ""
def register(mcp: FastMCP):
"""Register the YouTube transcript tool with FastMCP."""
mcp.tool()(get_youtube_transcript)
+71
View File
@@ -0,0 +1,71 @@
import pytest
from unittest.mock import patch, MagicMock
from aden_tools.tools.youtube_transcript_tool.tool import get_youtube_transcript
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api._errors import TranscriptsDisabled, NoTranscriptFound
def test_get_transcript_success():
"""Test successful transcript retrieval."""
# Mock data
mock_transcript = [
{'text': 'Hello', 'start': 0},
{'text': 'World', 'start': 1}
]
# Mock YouTubeTranscriptApi.get_transcript using patch.object
with patch.object(YouTubeTranscriptApi, 'get_transcript', return_value=mock_transcript, create=True) as mock_get:
# Call the function with a dummy URL
result = get_youtube_transcript('https://www.youtube.com/watch?v=dQw4w9WgXcQ')
# Assert the result equals "Hello World"
assert result == "Hello World"
# Verify the API was called with the correct video ID
mock_get.assert_called_once_with('dQw4w9WgXcQ')
def test_get_transcript_transcripts_disabled():
"""Test handling of TranscriptsDisabled exception."""
# Mock YouTubeTranscriptApi.get_transcript to raise TranscriptsDisabled
with patch.object(YouTubeTranscriptApi, 'get_transcript', side_effect=TranscriptsDisabled('video_id'), create=True) as mock_get:
# Call the function
result = get_youtube_transcript('https://www.youtube.com/watch?v=dQw4w9WgXcQ')
# Assert the function returns an error string (does NOT raise the exception)
assert result.startswith('Error:')
assert 'disabled' in result.lower()
def test_get_transcript_no_transcript_found():
"""Test handling of NoTranscriptFound exception."""
# Mock YouTubeTranscriptApi.get_transcript to raise NoTranscriptFound
with patch.object(YouTubeTranscriptApi, 'get_transcript', side_effect=NoTranscriptFound('video_id', [], 'message'), create=True) as mock_get:
# Call the function
result = get_youtube_transcript('https://www.youtube.com/watch?v=dQw4w9WgXcQ')
# Assert the function returns an error string
assert result.startswith('Error:')
assert 'No transcript available' in result
def test_invalid_url():
"""Test handling of invalid YouTube URLs."""
# Call with an invalid URL
result = get_youtube_transcript('https://not-youtube.com/video')
# Assert error is returned
assert result.startswith('Error:')
assert 'Invalid' in result or 'invalid' in result
def test_generic_exception():
"""Test handling of generic exceptions."""
# Mock to raise a generic exception
with patch.object(YouTubeTranscriptApi, 'get_transcript', side_effect=Exception('Some unexpected error'), create=True) as mock_get:
# Call the function
result = get_youtube_transcript('https://www.youtube.com/watch?v=dQw4w9WgXcQ')
# Assert the function returns an error string
assert result.startswith('Error:')