diff --git a/src/backend/base/langflow/components/tools/youtube_transcripts.py b/src/backend/base/langflow/components/tools/youtube_transcripts.py index 0dc9247cd..0becb5770 100644 --- a/src/backend/base/langflow/components/tools/youtube_transcripts.py +++ b/src/backend/base/langflow/components/tools/youtube_transcripts.py @@ -1,49 +1,25 @@ -from langchain.tools import StructuredTool + from langchain_community.document_loaders import YoutubeLoader from langchain_community.document_loaders.youtube import TranscriptFormat -from langchain_core.tools import ToolException -from pydantic import BaseModel, Field -from langflow.base.langchain_utilities.model import LCToolComponent -from langflow.field_typing import Tool +from langflow.custom import Component from langflow.inputs import DropdownInput, IntInput, MultilineInput from langflow.schema import Data from langflow.template import Output -class YoutubeApiSchema(BaseModel): - """Schema to define the input structure for the tool.""" - - url: str = Field(..., description="The YouTube URL to get transcripts from.") - transcript_format: TranscriptFormat = Field( - TranscriptFormat.TEXT, - description="The format of the transcripts. Either 'text' for a single " - "text output or 'chunks' for timestamped chunks.", - ) - chunk_size_seconds: int = Field( - 120, - description="The size of each transcript chunk in seconds. Only " - "applicable when 'Transcript Format' is set to 'chunks'.", - ) - language: str = Field( - "", - description="A comma-separated list of language codes in descending " "priority. Leave empty for default.", - ) - translation: str = Field( - "", description="Translate the transcripts to the specified language. " "Leave empty for no translation." - ) - - -class YouTubeTranscriptsComponent(LCToolComponent): +class YouTubeTranscriptsComponent(Component): """A component that extracts spoken content from YouTube videos as transcripts.""" display_name: str = "YouTube Transcripts" description: str = "Extracts spoken content from YouTube videos as transcripts." icon: str = "YouTube" + name="YouTubeTranscripts" inputs = [ MultilineInput( - name="url", display_name="Video URL", info="Enter the YouTube video URL to get transcripts from." + name="url", display_name="Video URL", info="Enter the YouTube video URL to get transcripts from.", + tool_mode=True, ), DropdownInput( name="transcript_format", @@ -52,6 +28,7 @@ class YouTubeTranscriptsComponent(LCToolComponent): value="text", info="The format of the transcripts. Either 'text' for a single output " "or 'chunks' for timestamped chunks.", + advanced=True, ), IntInput( name="chunk_size_seconds", @@ -61,10 +38,23 @@ class YouTubeTranscriptsComponent(LCToolComponent): info="The size of each transcript chunk in seconds. Only applicable when " "'Transcript Format' is set to 'chunks'.", ), - MultilineInput( + DropdownInput( name="language", display_name="Language", - info="A comma-separated list of language codes in descending priority. " "Leave empty for default.", + options = [ + "af", "ak", "sq", "am", "ar", "hy", "as", "ay", "az", "bn", "eu", "be", "bho", + "bs", "bg", "my", "ca", "ceb", "zh", "zh-HK", "zh-CN", "zh-SG", "zh-TW", + "zh-Hans", "zh-Hant", "hak-TW", "nan-TW", "co", "hr", "cs", "da", "dv", "nl", + "en", "en-US", "eo", "et", "ee", "fil", "fi", "fr", "gl", "lg", "ka", "de", + "el", "gn", "gu", "ht", "ha", "haw", "iw", "hi", "hmn", "hu", "is", "ig", "id", + "ga", "it", "ja", "jv", "kn", "kk", "km", "rw", "ko", "kri", "ku", "ky", "lo", + "la", "lv", "ln", "lt", "lb", "mk", "mg", "ms", "ml", "mt", "mi", "mr", "mn", + "ne", "nso", "no", "ny", "or", "om", "ps", "fa", "pl", "pt", "pa", "qu", "ro", + "ru", "sm", "sa", "gd", "sr", "sn", "sd", "si", "sk", "sl", "so", "st", "es", + "su", "sw", "sv", "tg", "ta", "tt", "te", "th", "ti", "ts", "tr", "tk", "uk", + "ur", "ug", "uz", "vi", "cy", "fy", "xh", "yi", "yo", "zu"], + value="en", + info="Specify to make sure the transcripts are retrieved in your desired language. Defaults to English: 'en'", ), DropdownInput( name="translation", @@ -77,7 +67,6 @@ class YouTubeTranscriptsComponent(LCToolComponent): outputs = [ Output(name="transcripts", display_name="Data", method="build_youtube_transcripts"), - Output(name="transcripts_tool", display_name="Tool", method="build_youtube_tool"), ] def build_youtube_transcripts(self) -> Data | list[Data]: @@ -94,7 +83,7 @@ class YouTubeTranscriptsComponent(LCToolComponent): if self.transcript_format == "text" else TranscriptFormat.CHUNKS, chunk_size_seconds=self.chunk_size_seconds, - language=self.language.split(",") if self.language else ["en"], + language=[self.language], translation=self.translation or None, ) @@ -130,44 +119,19 @@ class YouTubeTranscriptsComponent(LCToolComponent): Returns: Data | list[Data]: Video transcripts as single Data or list of Data. """ - try: - if isinstance(transcript_format, str): - transcript_format = TranscriptFormat(transcript_format) - loader = YoutubeLoader.from_youtube_url( - url, - transcript_format=TranscriptFormat.TEXT - if transcript_format == TranscriptFormat.TEXT - else TranscriptFormat.CHUNKS, - chunk_size_seconds=chunk_size_seconds, - language=language.split(",") if language else ["en"], - translation=translation or None, - ) + if isinstance(transcript_format, str): + transcript_format = TranscriptFormat(transcript_format) + loader = YoutubeLoader.from_youtube_url( + url, + transcript_format=TranscriptFormat.TEXT + if transcript_format == TranscriptFormat.TEXT + else TranscriptFormat.CHUNKS, + chunk_size_seconds=chunk_size_seconds, + language=language.split(",") if language else ["en"], + translation=translation or None, + ) - transcripts = loader.load() - if transcript_format == TranscriptFormat.TEXT and len(transcripts) > 0: - return Data(data={"transcript": transcripts[0].page_content}) - return [Data(data={"content": doc.page_content, "metadata": doc.metadata}) for doc in transcripts] - except Exception as exc: - msg = f"Failed to get YouTube transcripts: {exc!s}" - raise ToolException(msg) from exc - - def build_youtube_tool(self) -> Tool: - """Method to build the transcripts tool. - - Returns: - Tool: A structured tool that uses the transcripts method. - - Raises: - RuntimeError: If tool creation fails. - """ - try: - return StructuredTool.from_function( - name="youtube_transcripts", - description="Get transcripts from YouTube videos.", - func=self.youtube_transcripts, - args_schema=YoutubeApiSchema, - ) - - except Exception as exc: - msg = f"Failed to build the YouTube transcripts tool: {exc!s}" - raise RuntimeError(msg) from exc + transcripts = loader.load() + if transcript_format == TranscriptFormat.TEXT and len(transcripts) > 0: + return Data(data={"transcript": transcripts[0].page_content}) + return [Data(data={"content": doc.page_content, "metadata": doc.metadata}) for doc in transcripts] \ No newline at end of file