diff --git a/src/backend/base/langflow/components/tools/__init__.py b/src/backend/base/langflow/components/tools/__init__.py
index 1487e3a9c..09723a2a9 100644
--- a/src/backend/base/langflow/components/tools/__init__.py
+++ b/src/backend/base/langflow/components/tools/__init__.py
@@ -19,6 +19,7 @@ from .tavily_search import TavilySearchToolComponent
from .wikipedia_api import WikipediaAPIComponent
from .wolfram_alpha_api import WolframAlphaAPIComponent
from .yahoo_finance import YfinanceToolComponent
+from .youtube_transcripts import YouTubeTranscriptsComponent
with warnings.catch_warnings():
warnings.simplefilter("ignore", LangChainDeprecationWarning)
@@ -45,4 +46,5 @@ __all__ = [
"WikipediaAPIComponent",
"WolframAlphaAPIComponent",
"YfinanceToolComponent",
+ "YouTubeTranscriptsComponent",
]
diff --git a/src/backend/base/langflow/components/tools/youtube_transcripts.py b/src/backend/base/langflow/components/tools/youtube_transcripts.py
new file mode 100644
index 000000000..ee7c1b155
--- /dev/null
+++ b/src/backend/base/langflow/components/tools/youtube_transcripts.py
@@ -0,0 +1,173 @@
+from langchain.tools import StructuredTool
+from langchain_community.document_loaders import YoutubeLoader
+from langchain_community.document_loaders.youtube import TranscriptFormat
+from langchain_core.tools import ToolException
+from pydantic import BaseModel, Field
+
+from langflow.base.langchain_utilities.model import LCToolComponent
+from langflow.field_typing import Tool
+from langflow.inputs import DropdownInput, IntInput, MultilineInput
+from langflow.schema import Data
+from langflow.template import Output
+
+
+class YoutubeApiSchema(BaseModel):
+ """Schema to define the input structure for the tool."""
+
+ url: str = Field(..., description="The YouTube URL to get transcripts from.")
+ transcript_format: TranscriptFormat = Field(
+ TranscriptFormat.TEXT,
+ description="The format of the transcripts. Either 'text' for a single "
+ "text output or 'chunks' for timestamped chunks.",
+ )
+ chunk_size_seconds: int = Field(
+ 120,
+ description="The size of each transcript chunk in seconds. Only "
+ "applicable when 'Transcript Format' is set to 'chunks'.",
+ )
+ language: str = Field(
+ "",
+ description="A comma-separated list of language codes in descending " "priority. Leave empty for default.",
+ )
+ translation: str = Field(
+ "", description="Translate the transcripts to the specified language. " "Leave empty for no translation."
+ )
+
+
+class YouTubeTranscriptsComponent(LCToolComponent):
+ """A component that extracts spoken content from YouTube videos as transcripts."""
+
+ display_name: str = "YouTube Transcripts"
+ description: str = "Extracts spoken content from YouTube videos as transcripts."
+ icon: str = "YouTube"
+
+ inputs = [
+ MultilineInput(
+ name="url", display_name="Video URL", info="Enter the YouTube video URL to get transcripts from."
+ ),
+ DropdownInput(
+ name="transcript_format",
+ display_name="Transcript Format",
+ options=["text", "chunks"],
+ value="text",
+ info="The format of the transcripts. Either 'text' for a single output "
+ "or 'chunks' for timestamped chunks.",
+ ),
+ IntInput(
+ name="chunk_size_seconds",
+ display_name="Chunk Size (seconds)",
+ value=60,
+ advanced=True,
+ info="The size of each transcript chunk in seconds. Only applicable when "
+ "'Transcript Format' is set to 'chunks'.",
+ ),
+ MultilineInput(
+ name="language",
+ display_name="Language",
+ info="A comma-separated list of language codes in descending priority. " "Leave empty for default.",
+ ),
+ DropdownInput(
+ name="translation",
+ display_name="Translation Language",
+ advanced=True,
+ options=["", "en", "es", "fr", "de", "it", "pt", "ru", "ja", "ko", "hi", "ar", "id"],
+ info="Translate the transcripts to the specified language. " "Leave empty for no translation.",
+ ),
+ ]
+
+ outputs = [
+ Output(name="transcripts", display_name="Data", method="build_youtube_transcripts"),
+ Output(name="transcripts_tool", display_name="Tool", method="build_youtube_tool"),
+ ]
+
+ def build_youtube_transcripts(self) -> Data | list[Data]:
+ """Method to build transcripts from the provided YouTube URL.
+
+ Returns:
+ Data | list[Data]: The transcripts of the video, either as a single
+ Data object or a list of Data objects.
+ """
+ try:
+ loader = YoutubeLoader.from_youtube_url(
+ self.url,
+ transcript_format=TranscriptFormat.TEXT
+ if self.transcript_format == "text"
+ else TranscriptFormat.CHUNKS,
+ chunk_size_seconds=self.chunk_size_seconds,
+ language=self.language.split(",") if self.language else ["en"],
+ translation=self.translation if self.translation else None,
+ )
+
+ transcripts = loader.load()
+
+ if self.transcript_format == "text":
+ # Extract only the page_content from the Document
+ return Data(data={"transcripts": transcripts[0].page_content})
+ # For chunks, extract page_content and metadata separately
+ return [Data(data={"content": doc.page_content, "metadata": doc.metadata}) for doc in transcripts]
+
+ except Exception as exc: # noqa: BLE001
+ # Using a specific error type for the return value
+ return Data(data={"error": f"Failed to get YouTube transcripts: {exc!s}"})
+
+ def youtube_transcripts(
+ self,
+ url: str = "",
+ transcript_format: TranscriptFormat = TranscriptFormat.TEXT,
+ chunk_size_seconds: int = 120,
+ language: str = "",
+ translation: str = "",
+ ) -> Data | list[Data]:
+ """Helper method to handle transcripts outside of component calls.
+
+ Args:
+ url: The YouTube URL to get transcripts from.
+ transcript_format: Format of transcripts ('text' or 'chunks').
+ chunk_size_seconds: Size of each transcript chunk in seconds.
+ language: Comma-separated list of language codes.
+ translation: Target language for translation.
+
+ Returns:
+ Data | list[Data]: Video transcripts as single Data or list of Data.
+ """
+ try:
+ if isinstance(transcript_format, str):
+ transcript_format = TranscriptFormat(transcript_format)
+ loader = YoutubeLoader.from_youtube_url(
+ url,
+ transcript_format=TranscriptFormat.TEXT
+ if transcript_format == TranscriptFormat.TEXT
+ else TranscriptFormat.CHUNKS,
+ chunk_size_seconds=chunk_size_seconds,
+ language=language.split(",") if language else ["en"],
+ translation=translation if translation else None,
+ )
+
+ transcripts = loader.load()
+ if transcript_format == TranscriptFormat.TEXT and len(transcripts) > 0:
+ return Data(data={"transcript": transcripts[0].page_content})
+ return [Data(data={"content": doc.page_content, "metadata": doc.metadata}) for doc in transcripts]
+ except Exception as exc:
+ msg = f"Failed to get YouTube transcripts: {exc!s}"
+ raise ToolException(msg) from exc
+
+ def build_youtube_tool(self) -> Tool:
+ """Method to build the transcripts tool.
+
+ Returns:
+ Tool: A structured tool that uses the transcripts method.
+
+ Raises:
+ RuntimeError: If tool creation fails.
+ """
+ try:
+ return StructuredTool.from_function(
+ name="youtube_transcripts",
+ description="Get transcripts from YouTube videos.",
+ func=self.youtube_transcripts,
+ args_schema=YoutubeApiSchema,
+ )
+
+ except Exception as exc:
+ msg = f"Failed to build the YouTube transcripts tool: {exc!s}"
+ raise RuntimeError(msg) from exc
diff --git a/src/frontend/src/icons/Youtube/index.tsx b/src/frontend/src/icons/Youtube/index.tsx
new file mode 100644
index 000000000..6e49c4dd2
--- /dev/null
+++ b/src/frontend/src/icons/Youtube/index.tsx
@@ -0,0 +1,9 @@
+import React, { forwardRef } from "react";
+import YouTubeIcon from "./youtube";
+
+export const YouTubeSvgIcon = forwardRef<
+ SVGSVGElement,
+ React.PropsWithChildren<{}>
+>((props, ref) => {
+ return ;
+});
diff --git a/src/frontend/src/icons/Youtube/youtube.jsx b/src/frontend/src/icons/Youtube/youtube.jsx
new file mode 100644
index 000000000..bd1067549
--- /dev/null
+++ b/src/frontend/src/icons/Youtube/youtube.jsx
@@ -0,0 +1,5952 @@
+import React from "react";
+
+const YouTubeIcon = (props) => (
+
+);
+
+export default YouTubeIcon;
diff --git a/src/frontend/src/icons/Youtube/youtube.svg b/src/frontend/src/icons/Youtube/youtube.svg
new file mode 100644
index 000000000..6e3dbd13e
--- /dev/null
+++ b/src/frontend/src/icons/Youtube/youtube.svg
@@ -0,0 +1,3732 @@
+
\ No newline at end of file
diff --git a/src/frontend/src/utils/styleUtils.ts b/src/frontend/src/utils/styleUtils.ts
index a092abe67..f396fe40b 100644
--- a/src/frontend/src/utils/styleUtils.ts
+++ b/src/frontend/src/utils/styleUtils.ts
@@ -6,6 +6,7 @@ import { MilvusIcon } from "@/icons/Milvus";
import Perplexity from "@/icons/Perplexity/Perplexity";
import { TavilyIcon } from "@/icons/Tavily";
import { UnstructuredIcon } from "@/icons/Unstructured";
+import YouTubeIcon from "@/icons/Youtube/youtube";
import { ZepMemoryIcon } from "@/icons/ZepMemory";
import { AthenaIcon } from "@/icons/athena/index";
import { freezeAllIcon } from "@/icons/freezeAll";
@@ -658,6 +659,7 @@ export const nodeIconsLucide: iconsType = {
GithubIcon,
FaGithub,
FaApple,
+ YouTube: YouTubeIcon,
Milvus: MilvusIcon,
ExaSearch: ExaIcon,
ZepMemory: ZepMemoryIcon,
diff --git a/src/frontend/tests/core/unit/codeAreaModalComponent.spec.ts b/src/frontend/tests/core/unit/codeAreaModalComponent.spec.ts
index 6d60ccec6..4948c02b3 100644
--- a/src/frontend/tests/core/unit/codeAreaModalComponent.spec.ts
+++ b/src/frontend/tests/core/unit/codeAreaModalComponent.spec.ts
@@ -40,6 +40,8 @@ test("CodeAreaModalComponent", async ({ page }) => {
await page.getByTestId("sidebar-legacy-switch").isVisible({ timeout: 5000 });
await page.getByTestId("sidebar-legacy-switch").click();
+ await page.waitForTimeout(1000);
+
await page
.getByTestId("prototypesPython Function")
.dragTo(page.locator('//*[@id="react-flow-id"]'));
diff --git a/src/frontend/tests/extended/integrations/youtube-transcripts.spec.ts b/src/frontend/tests/extended/integrations/youtube-transcripts.spec.ts
new file mode 100644
index 000000000..e3ceaab21
--- /dev/null
+++ b/src/frontend/tests/extended/integrations/youtube-transcripts.spec.ts
@@ -0,0 +1,74 @@
+import { expect, test } from "@playwright/test";
+
+test("user should be able to use youtube transcripts component", async ({
+ page,
+}) => {
+ await page.goto("/");
+ await page.waitForSelector('[data-testid="mainpage_title"]', {
+ timeout: 30000,
+ });
+
+ await page.waitForSelector('[id="new-project-btn"]', {
+ timeout: 30000,
+ });
+
+ let modalCount = 0;
+ try {
+ const modalTitleElement = await page?.getByTestId("modal-title");
+ if (modalTitleElement) {
+ modalCount = await modalTitleElement.count();
+ }
+ } catch (error) {
+ modalCount = 0;
+ }
+
+ while (modalCount === 0) {
+ await page.getByText("New Flow", { exact: true }).click();
+ await page.waitForTimeout(3000);
+ modalCount = await page.getByTestId("modal-title")?.count();
+ }
+
+ await page.getByTestId("blank-flow").click();
+ await page.getByTestId("sidebar-search-input").click();
+ await page.getByTestId("sidebar-search-input").fill("youtube");
+
+ await page.waitForTimeout(1000);
+
+ await page
+ .locator('//*[@id="toolsYouTube Transcripts"]')
+ .dragTo(page.locator('//*[@id="react-flow-id"]'));
+ await page.mouse.up();
+ await page.mouse.down();
+ await page.getByTestId("fit_view").click();
+
+ let outdatedComponents = await page.getByTestId("icon-AlertTriangle").count();
+
+ while (outdatedComponents > 0) {
+ await page.getByTestId("icon-AlertTriangle").first().click();
+ await page.waitForTimeout(1000);
+ outdatedComponents = await page.getByTestId("icon-AlertTriangle").count();
+ }
+
+ await page
+ .getByTestId("textarea_str_url")
+ .fill("https://www.youtube.com/watch?v=VqhCQZaH4Vs");
+
+ await page.getByTestId("textarea_str_language").fill("en");
+
+ await page.getByTestId("button_run_youtube transcripts").click();
+
+ await page.waitForSelector("text=built successfully", { timeout: 30000 });
+
+ await page.getByTestId("output-inspection-data").first().click();
+
+ await page.waitForTimeout(1000);
+
+ await page.getByRole("gridcell").first().click();
+
+ const value = await page.getByPlaceholder("Empty").inputValue();
+ expect(value.length).toBeGreaterThan(10);
+ expect(value.toLowerCase()).toContain("i see trees of green");
+ expect(value.toLowerCase()).toContain(
+ "and i think to myself what a wonderful world",
+ );
+});