diff --git a/src/backend/base/langflow/base/data/base_file.py b/src/backend/base/langflow/base/data/base_file.py index 7c14e31e2..46f4ab415 100644 --- a/src/backend/base/langflow/base/data/base_file.py +++ b/src/backend/base/langflow/base/data/base_file.py @@ -8,6 +8,7 @@ from zipfile import ZipFile, is_zipfile from langflow.custom import Component from langflow.io import BoolInput, FileInput, HandleInput, Output from langflow.schema import Data +from langflow.schema.message import Message class BaseFileComponent(Component, ABC): @@ -100,8 +101,6 @@ class BaseFileComponent(Component, ABC): SERVER_FILE_PATH_FIELDNAME = "file_path" SUPPORTED_BUNDLE_EXTENSIONS = ["zip", "tar", "tgz", "bz2", "gz"] - file_path: list[Data] | None = None - def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # Dynamically update FileInput to include valid extensions and bundles @@ -125,11 +124,12 @@ class BaseFileComponent(Component, ABC): name="file_path", display_name="Server File Path", info=( - f"Data object with a '{SERVER_FILE_PATH_FIELDNAME}' property pointing to server file. " - "Supercedes 'Path'. " + f"Data object with a '{SERVER_FILE_PATH_FIELDNAME}' property pointing to server file" + " or a Message object with a path to the file. Supercedes 'Path' but supports same file types." ), required=False, - input_types=["Data"], + input_types=["Data", "Message"], + is_list=True, ), BoolInput( name="silent_errors", @@ -284,6 +284,39 @@ class BaseFileComponent(Component, ABC): return updated_base_files + def _file_path_as_list(self) -> list[Data]: + file_path = self.file_path + if not file_path: + return [] + + def _message_to_data(message: Message) -> Data: + return Data(**{self.SERVER_FILE_PATH_FIELDNAME: message.text}) + + if isinstance(file_path, Data): + file_path = [file_path] + elif isinstance(file_path, Message): + file_path = [_message_to_data(file_path)] + elif not isinstance(file_path, list): + msg = f"Expected list of Data objects in file_path but got {type(file_path)}." + self.log(msg) + if not self.silent_errors: + raise ValueError(msg) + return [] + + file_paths = [] + for obj in file_path: + data_obj = _message_to_data(obj) if isinstance(obj, Message) else obj + + if not isinstance(data_obj, Data): + msg = f"Expected Data object in file_path but got {type(data_obj)}." + self.log(msg) + if not self.silent_errors: + raise ValueError(msg) + continue + file_paths.append(data_obj) + + return file_paths + def _validate_and_resolve_paths(self) -> list[BaseFile]: """Validate that all input paths exist and are valid, and create BaseFile instances. @@ -307,28 +340,14 @@ class BaseFileComponent(Component, ABC): BaseFileComponent.BaseFile(data, resolved_path, delete_after_processing=delete_after_processing) ) - if self.path and (not hasattr(self, "file_path") or not self.file_path): + file_path = self._file_path_as_list() + + if self.path and not file_path: # Wrap self.path into a Data object - data_obj = Data(data={"file_path": self.path}) + data_obj = Data(data={self.SERVER_FILE_PATH_FIELDNAME: self.path}) add_file(data=data_obj, path=self.path, delete_after_processing=False) - elif hasattr(self, "file_path") and self.file_path: - if isinstance(self.file_path, Data): - self.file_path = [self.file_path] - elif not isinstance(self.file_path, list): - msg = f"Expected list of Data objects in file_path but got {type(self.file_path)}." - self.log(msg) - if not self.silent_errors: - raise ValueError(msg) - return [] - - for obj in self.file_path: - if not isinstance(obj, Data): - msg = f"Expected Data object in file_path but got {type(obj)}." - self.log(msg) - if not self.silent_errors: - raise ValueError(msg) - continue - + elif file_path: + for obj in file_path: server_file_path = obj.data.get(self.SERVER_FILE_PATH_FIELDNAME) if server_file_path: add_file( diff --git a/src/backend/base/langflow/components/tools/youtube_transcripts.py b/src/backend/base/langflow/components/tools/youtube_transcripts.py index 0becb5770..a276b785e 100644 --- a/src/backend/base/langflow/components/tools/youtube_transcripts.py +++ b/src/backend/base/langflow/components/tools/youtube_transcripts.py @@ -1,4 +1,3 @@ - from langchain_community.document_loaders import YoutubeLoader from langchain_community.document_loaders.youtube import TranscriptFormat @@ -14,11 +13,13 @@ class YouTubeTranscriptsComponent(Component): display_name: str = "YouTube Transcripts" description: str = "Extracts spoken content from YouTube videos as transcripts." icon: str = "YouTube" - name="YouTubeTranscripts" + name = "YouTubeTranscripts" inputs = [ MultilineInput( - name="url", display_name="Video URL", info="Enter the YouTube video URL to get transcripts from.", + name="url", + display_name="Video URL", + info="Enter the YouTube video URL to get transcripts from.", tool_mode=True, ), DropdownInput( @@ -41,20 +42,146 @@ class YouTubeTranscriptsComponent(Component): DropdownInput( name="language", display_name="Language", - options = [ - "af", "ak", "sq", "am", "ar", "hy", "as", "ay", "az", "bn", "eu", "be", "bho", - "bs", "bg", "my", "ca", "ceb", "zh", "zh-HK", "zh-CN", "zh-SG", "zh-TW", - "zh-Hans", "zh-Hant", "hak-TW", "nan-TW", "co", "hr", "cs", "da", "dv", "nl", - "en", "en-US", "eo", "et", "ee", "fil", "fi", "fr", "gl", "lg", "ka", "de", - "el", "gn", "gu", "ht", "ha", "haw", "iw", "hi", "hmn", "hu", "is", "ig", "id", - "ga", "it", "ja", "jv", "kn", "kk", "km", "rw", "ko", "kri", "ku", "ky", "lo", - "la", "lv", "ln", "lt", "lb", "mk", "mg", "ms", "ml", "mt", "mi", "mr", "mn", - "ne", "nso", "no", "ny", "or", "om", "ps", "fa", "pl", "pt", "pa", "qu", "ro", - "ru", "sm", "sa", "gd", "sr", "sn", "sd", "si", "sk", "sl", "so", "st", "es", - "su", "sw", "sv", "tg", "ta", "tt", "te", "th", "ti", "ts", "tr", "tk", "uk", - "ur", "ug", "uz", "vi", "cy", "fy", "xh", "yi", "yo", "zu"], + options=[ + "af", + "ak", + "sq", + "am", + "ar", + "hy", + "as", + "ay", + "az", + "bn", + "eu", + "be", + "bho", + "bs", + "bg", + "my", + "ca", + "ceb", + "zh", + "zh-HK", + "zh-CN", + "zh-SG", + "zh-TW", + "zh-Hans", + "zh-Hant", + "hak-TW", + "nan-TW", + "co", + "hr", + "cs", + "da", + "dv", + "nl", + "en", + "en-US", + "eo", + "et", + "ee", + "fil", + "fi", + "fr", + "gl", + "lg", + "ka", + "de", + "el", + "gn", + "gu", + "ht", + "ha", + "haw", + "iw", + "hi", + "hmn", + "hu", + "is", + "ig", + "id", + "ga", + "it", + "ja", + "jv", + "kn", + "kk", + "km", + "rw", + "ko", + "kri", + "ku", + "ky", + "lo", + "la", + "lv", + "ln", + "lt", + "lb", + "mk", + "mg", + "ms", + "ml", + "mt", + "mi", + "mr", + "mn", + "ne", + "nso", + "no", + "ny", + "or", + "om", + "ps", + "fa", + "pl", + "pt", + "pa", + "qu", + "ro", + "ru", + "sm", + "sa", + "gd", + "sr", + "sn", + "sd", + "si", + "sk", + "sl", + "so", + "st", + "es", + "su", + "sw", + "sv", + "tg", + "ta", + "tt", + "te", + "th", + "ti", + "ts", + "tr", + "tk", + "uk", + "ur", + "ug", + "uz", + "vi", + "cy", + "fy", + "xh", + "yi", + "yo", + "zu", + ], value="en", - info="Specify to make sure the transcripts are retrieved in your desired language. Defaults to English: 'en'", + info=( + "Specify to make sure the transcripts are retrieved in your desired language. " + "Defaults to English: 'en'" + ), ), DropdownInput( name="translation", @@ -134,4 +261,4 @@ class YouTubeTranscriptsComponent(Component): transcripts = loader.load() if transcript_format == TranscriptFormat.TEXT and len(transcripts) > 0: return Data(data={"transcript": transcripts[0].page_content}) - return [Data(data={"content": doc.page_content, "metadata": doc.metadata}) for doc in transcripts] \ No newline at end of file + return [Data(data={"content": doc.page_content, "metadata": doc.metadata}) for doc in transcripts]