diff --git a/src/backend/base/langflow/components/tools/__init__.py b/src/backend/base/langflow/components/tools/__init__.py index 1487e3a9c..09723a2a9 100644 --- a/src/backend/base/langflow/components/tools/__init__.py +++ b/src/backend/base/langflow/components/tools/__init__.py @@ -19,6 +19,7 @@ from .tavily_search import TavilySearchToolComponent from .wikipedia_api import WikipediaAPIComponent from .wolfram_alpha_api import WolframAlphaAPIComponent from .yahoo_finance import YfinanceToolComponent +from .youtube_transcripts import YouTubeTranscriptsComponent with warnings.catch_warnings(): warnings.simplefilter("ignore", LangChainDeprecationWarning) @@ -45,4 +46,5 @@ __all__ = [ "WikipediaAPIComponent", "WolframAlphaAPIComponent", "YfinanceToolComponent", + "YouTubeTranscriptsComponent", ] diff --git a/src/backend/base/langflow/components/tools/youtube_transcripts.py b/src/backend/base/langflow/components/tools/youtube_transcripts.py new file mode 100644 index 000000000..ee7c1b155 --- /dev/null +++ b/src/backend/base/langflow/components/tools/youtube_transcripts.py @@ -0,0 +1,173 @@ +from langchain.tools import StructuredTool +from langchain_community.document_loaders import YoutubeLoader +from langchain_community.document_loaders.youtube import TranscriptFormat +from langchain_core.tools import ToolException +from pydantic import BaseModel, Field + +from langflow.base.langchain_utilities.model import LCToolComponent +from langflow.field_typing import Tool +from langflow.inputs import DropdownInput, IntInput, MultilineInput +from langflow.schema import Data +from langflow.template import Output + + +class YoutubeApiSchema(BaseModel): + """Schema to define the input structure for the tool.""" + + url: str = Field(..., description="The YouTube URL to get transcripts from.") + transcript_format: TranscriptFormat = Field( + TranscriptFormat.TEXT, + description="The format of the transcripts. Either 'text' for a single " + "text output or 'chunks' for timestamped chunks.", + ) + chunk_size_seconds: int = Field( + 120, + description="The size of each transcript chunk in seconds. Only " + "applicable when 'Transcript Format' is set to 'chunks'.", + ) + language: str = Field( + "", + description="A comma-separated list of language codes in descending " "priority. Leave empty for default.", + ) + translation: str = Field( + "", description="Translate the transcripts to the specified language. " "Leave empty for no translation." + ) + + +class YouTubeTranscriptsComponent(LCToolComponent): + """A component that extracts spoken content from YouTube videos as transcripts.""" + + display_name: str = "YouTube Transcripts" + description: str = "Extracts spoken content from YouTube videos as transcripts." + icon: str = "YouTube" + + inputs = [ + MultilineInput( + name="url", display_name="Video URL", info="Enter the YouTube video URL to get transcripts from." + ), + DropdownInput( + name="transcript_format", + display_name="Transcript Format", + options=["text", "chunks"], + value="text", + info="The format of the transcripts. Either 'text' for a single output " + "or 'chunks' for timestamped chunks.", + ), + IntInput( + name="chunk_size_seconds", + display_name="Chunk Size (seconds)", + value=60, + advanced=True, + info="The size of each transcript chunk in seconds. Only applicable when " + "'Transcript Format' is set to 'chunks'.", + ), + MultilineInput( + name="language", + display_name="Language", + info="A comma-separated list of language codes in descending priority. " "Leave empty for default.", + ), + DropdownInput( + name="translation", + display_name="Translation Language", + advanced=True, + options=["", "en", "es", "fr", "de", "it", "pt", "ru", "ja", "ko", "hi", "ar", "id"], + info="Translate the transcripts to the specified language. " "Leave empty for no translation.", + ), + ] + + outputs = [ + Output(name="transcripts", display_name="Data", method="build_youtube_transcripts"), + Output(name="transcripts_tool", display_name="Tool", method="build_youtube_tool"), + ] + + def build_youtube_transcripts(self) -> Data | list[Data]: + """Method to build transcripts from the provided YouTube URL. + + Returns: + Data | list[Data]: The transcripts of the video, either as a single + Data object or a list of Data objects. + """ + try: + loader = YoutubeLoader.from_youtube_url( + self.url, + transcript_format=TranscriptFormat.TEXT + if self.transcript_format == "text" + else TranscriptFormat.CHUNKS, + chunk_size_seconds=self.chunk_size_seconds, + language=self.language.split(",") if self.language else ["en"], + translation=self.translation if self.translation else None, + ) + + transcripts = loader.load() + + if self.transcript_format == "text": + # Extract only the page_content from the Document + return Data(data={"transcripts": transcripts[0].page_content}) + # For chunks, extract page_content and metadata separately + return [Data(data={"content": doc.page_content, "metadata": doc.metadata}) for doc in transcripts] + + except Exception as exc: # noqa: BLE001 + # Using a specific error type for the return value + return Data(data={"error": f"Failed to get YouTube transcripts: {exc!s}"}) + + def youtube_transcripts( + self, + url: str = "", + transcript_format: TranscriptFormat = TranscriptFormat.TEXT, + chunk_size_seconds: int = 120, + language: str = "", + translation: str = "", + ) -> Data | list[Data]: + """Helper method to handle transcripts outside of component calls. + + Args: + url: The YouTube URL to get transcripts from. + transcript_format: Format of transcripts ('text' or 'chunks'). + chunk_size_seconds: Size of each transcript chunk in seconds. + language: Comma-separated list of language codes. + translation: Target language for translation. + + Returns: + Data | list[Data]: Video transcripts as single Data or list of Data. + """ + try: + if isinstance(transcript_format, str): + transcript_format = TranscriptFormat(transcript_format) + loader = YoutubeLoader.from_youtube_url( + url, + transcript_format=TranscriptFormat.TEXT + if transcript_format == TranscriptFormat.TEXT + else TranscriptFormat.CHUNKS, + chunk_size_seconds=chunk_size_seconds, + language=language.split(",") if language else ["en"], + translation=translation if translation else None, + ) + + transcripts = loader.load() + if transcript_format == TranscriptFormat.TEXT and len(transcripts) > 0: + return Data(data={"transcript": transcripts[0].page_content}) + return [Data(data={"content": doc.page_content, "metadata": doc.metadata}) for doc in transcripts] + except Exception as exc: + msg = f"Failed to get YouTube transcripts: {exc!s}" + raise ToolException(msg) from exc + + def build_youtube_tool(self) -> Tool: + """Method to build the transcripts tool. + + Returns: + Tool: A structured tool that uses the transcripts method. + + Raises: + RuntimeError: If tool creation fails. + """ + try: + return StructuredTool.from_function( + name="youtube_transcripts", + description="Get transcripts from YouTube videos.", + func=self.youtube_transcripts, + args_schema=YoutubeApiSchema, + ) + + except Exception as exc: + msg = f"Failed to build the YouTube transcripts tool: {exc!s}" + raise RuntimeError(msg) from exc diff --git a/src/frontend/src/icons/Youtube/index.tsx b/src/frontend/src/icons/Youtube/index.tsx new file mode 100644 index 000000000..6e49c4dd2 --- /dev/null +++ b/src/frontend/src/icons/Youtube/index.tsx @@ -0,0 +1,9 @@ +import React, { forwardRef } from "react"; +import YouTubeIcon from "./youtube"; + +export const YouTubeSvgIcon = forwardRef< + SVGSVGElement, + React.PropsWithChildren<{}> +>((props, ref) => { + return ; +}); diff --git a/src/frontend/src/icons/Youtube/youtube.jsx b/src/frontend/src/icons/Youtube/youtube.jsx new file mode 100644 index 000000000..bd1067549 --- /dev/null +++ b/src/frontend/src/icons/Youtube/youtube.jsx @@ -0,0 +1,5952 @@ +import React from "react"; + +const YouTubeIcon = (props) => ( + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +); + +export default YouTubeIcon; diff --git a/src/frontend/src/icons/Youtube/youtube.svg b/src/frontend/src/icons/Youtube/youtube.svg new file mode 100644 index 000000000..6e3dbd13e --- /dev/null +++ b/src/frontend/src/icons/Youtube/youtube.svg @@ -0,0 +1,3732 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/src/frontend/src/utils/styleUtils.ts b/src/frontend/src/utils/styleUtils.ts index a092abe67..f396fe40b 100644 --- a/src/frontend/src/utils/styleUtils.ts +++ b/src/frontend/src/utils/styleUtils.ts @@ -6,6 +6,7 @@ import { MilvusIcon } from "@/icons/Milvus"; import Perplexity from "@/icons/Perplexity/Perplexity"; import { TavilyIcon } from "@/icons/Tavily"; import { UnstructuredIcon } from "@/icons/Unstructured"; +import YouTubeIcon from "@/icons/Youtube/youtube"; import { ZepMemoryIcon } from "@/icons/ZepMemory"; import { AthenaIcon } from "@/icons/athena/index"; import { freezeAllIcon } from "@/icons/freezeAll"; @@ -658,6 +659,7 @@ export const nodeIconsLucide: iconsType = { GithubIcon, FaGithub, FaApple, + YouTube: YouTubeIcon, Milvus: MilvusIcon, ExaSearch: ExaIcon, ZepMemory: ZepMemoryIcon, diff --git a/src/frontend/tests/core/unit/codeAreaModalComponent.spec.ts b/src/frontend/tests/core/unit/codeAreaModalComponent.spec.ts index 6d60ccec6..4948c02b3 100644 --- a/src/frontend/tests/core/unit/codeAreaModalComponent.spec.ts +++ b/src/frontend/tests/core/unit/codeAreaModalComponent.spec.ts @@ -40,6 +40,8 @@ test("CodeAreaModalComponent", async ({ page }) => { await page.getByTestId("sidebar-legacy-switch").isVisible({ timeout: 5000 }); await page.getByTestId("sidebar-legacy-switch").click(); + await page.waitForTimeout(1000); + await page .getByTestId("prototypesPython Function") .dragTo(page.locator('//*[@id="react-flow-id"]')); diff --git a/src/frontend/tests/extended/integrations/youtube-transcripts.spec.ts b/src/frontend/tests/extended/integrations/youtube-transcripts.spec.ts new file mode 100644 index 000000000..e3ceaab21 --- /dev/null +++ b/src/frontend/tests/extended/integrations/youtube-transcripts.spec.ts @@ -0,0 +1,74 @@ +import { expect, test } from "@playwright/test"; + +test("user should be able to use youtube transcripts component", async ({ + page, +}) => { + await page.goto("/"); + await page.waitForSelector('[data-testid="mainpage_title"]', { + timeout: 30000, + }); + + await page.waitForSelector('[id="new-project-btn"]', { + timeout: 30000, + }); + + let modalCount = 0; + try { + const modalTitleElement = await page?.getByTestId("modal-title"); + if (modalTitleElement) { + modalCount = await modalTitleElement.count(); + } + } catch (error) { + modalCount = 0; + } + + while (modalCount === 0) { + await page.getByText("New Flow", { exact: true }).click(); + await page.waitForTimeout(3000); + modalCount = await page.getByTestId("modal-title")?.count(); + } + + await page.getByTestId("blank-flow").click(); + await page.getByTestId("sidebar-search-input").click(); + await page.getByTestId("sidebar-search-input").fill("youtube"); + + await page.waitForTimeout(1000); + + await page + .locator('//*[@id="toolsYouTube Transcripts"]') + .dragTo(page.locator('//*[@id="react-flow-id"]')); + await page.mouse.up(); + await page.mouse.down(); + await page.getByTestId("fit_view").click(); + + let outdatedComponents = await page.getByTestId("icon-AlertTriangle").count(); + + while (outdatedComponents > 0) { + await page.getByTestId("icon-AlertTriangle").first().click(); + await page.waitForTimeout(1000); + outdatedComponents = await page.getByTestId("icon-AlertTriangle").count(); + } + + await page + .getByTestId("textarea_str_url") + .fill("https://www.youtube.com/watch?v=VqhCQZaH4Vs"); + + await page.getByTestId("textarea_str_language").fill("en"); + + await page.getByTestId("button_run_youtube transcripts").click(); + + await page.waitForSelector("text=built successfully", { timeout: 30000 }); + + await page.getByTestId("output-inspection-data").first().click(); + + await page.waitForTimeout(1000); + + await page.getByRole("gridcell").first().click(); + + const value = await page.getByPlaceholder("Empty").inputValue(); + expect(value.length).toBeGreaterThan(10); + expect(value.toLowerCase()).toContain("i see trees of green"); + expect(value.toLowerCase()).toContain( + "and i think to myself what a wonderful world", + ); +});