allowing Server File Path to be Message or Data (#4995)

* allowing Server File Path to be Message or Data

* [autofix.ci] apply automated fixes

* Ruff check updates

---------

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Eric Hare <ericrhare@gmail.com>
This commit is contained in:
Phil Miesle 2024-12-02 16:27:48 +00:00 committed by GitHub
commit 269bc8fa63
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 188 additions and 42 deletions

View file

@ -8,6 +8,7 @@ from zipfile import ZipFile, is_zipfile
from langflow.custom import Component
from langflow.io import BoolInput, FileInput, HandleInput, Output
from langflow.schema import Data
from langflow.schema.message import Message
class BaseFileComponent(Component, ABC):
@ -100,8 +101,6 @@ class BaseFileComponent(Component, ABC):
SERVER_FILE_PATH_FIELDNAME = "file_path"
SUPPORTED_BUNDLE_EXTENSIONS = ["zip", "tar", "tgz", "bz2", "gz"]
file_path: list[Data] | None = None
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# Dynamically update FileInput to include valid extensions and bundles
@ -125,11 +124,12 @@ class BaseFileComponent(Component, ABC):
name="file_path",
display_name="Server File Path",
info=(
f"Data object with a '{SERVER_FILE_PATH_FIELDNAME}' property pointing to server file. "
"Supercedes 'Path'. "
f"Data object with a '{SERVER_FILE_PATH_FIELDNAME}' property pointing to server file"
" or a Message object with a path to the file. Supercedes 'Path' but supports same file types."
),
required=False,
input_types=["Data"],
input_types=["Data", "Message"],
is_list=True,
),
BoolInput(
name="silent_errors",
@ -284,6 +284,39 @@ class BaseFileComponent(Component, ABC):
return updated_base_files
def _file_path_as_list(self) -> list[Data]:
file_path = self.file_path
if not file_path:
return []
def _message_to_data(message: Message) -> Data:
return Data(**{self.SERVER_FILE_PATH_FIELDNAME: message.text})
if isinstance(file_path, Data):
file_path = [file_path]
elif isinstance(file_path, Message):
file_path = [_message_to_data(file_path)]
elif not isinstance(file_path, list):
msg = f"Expected list of Data objects in file_path but got {type(file_path)}."
self.log(msg)
if not self.silent_errors:
raise ValueError(msg)
return []
file_paths = []
for obj in file_path:
data_obj = _message_to_data(obj) if isinstance(obj, Message) else obj
if not isinstance(data_obj, Data):
msg = f"Expected Data object in file_path but got {type(data_obj)}."
self.log(msg)
if not self.silent_errors:
raise ValueError(msg)
continue
file_paths.append(data_obj)
return file_paths
def _validate_and_resolve_paths(self) -> list[BaseFile]:
"""Validate that all input paths exist and are valid, and create BaseFile instances.
@ -307,28 +340,14 @@ class BaseFileComponent(Component, ABC):
BaseFileComponent.BaseFile(data, resolved_path, delete_after_processing=delete_after_processing)
)
if self.path and (not hasattr(self, "file_path") or not self.file_path):
file_path = self._file_path_as_list()
if self.path and not file_path:
# Wrap self.path into a Data object
data_obj = Data(data={"file_path": self.path})
data_obj = Data(data={self.SERVER_FILE_PATH_FIELDNAME: self.path})
add_file(data=data_obj, path=self.path, delete_after_processing=False)
elif hasattr(self, "file_path") and self.file_path:
if isinstance(self.file_path, Data):
self.file_path = [self.file_path]
elif not isinstance(self.file_path, list):
msg = f"Expected list of Data objects in file_path but got {type(self.file_path)}."
self.log(msg)
if not self.silent_errors:
raise ValueError(msg)
return []
for obj in self.file_path:
if not isinstance(obj, Data):
msg = f"Expected Data object in file_path but got {type(obj)}."
self.log(msg)
if not self.silent_errors:
raise ValueError(msg)
continue
elif file_path:
for obj in file_path:
server_file_path = obj.data.get(self.SERVER_FILE_PATH_FIELDNAME)
if server_file_path:
add_file(

View file

@ -1,4 +1,3 @@
from langchain_community.document_loaders import YoutubeLoader
from langchain_community.document_loaders.youtube import TranscriptFormat
@ -14,11 +13,13 @@ class YouTubeTranscriptsComponent(Component):
display_name: str = "YouTube Transcripts"
description: str = "Extracts spoken content from YouTube videos as transcripts."
icon: str = "YouTube"
name="YouTubeTranscripts"
name = "YouTubeTranscripts"
inputs = [
MultilineInput(
name="url", display_name="Video URL", info="Enter the YouTube video URL to get transcripts from.",
name="url",
display_name="Video URL",
info="Enter the YouTube video URL to get transcripts from.",
tool_mode=True,
),
DropdownInput(
@ -41,20 +42,146 @@ class YouTubeTranscriptsComponent(Component):
DropdownInput(
name="language",
display_name="Language",
options = [
"af", "ak", "sq", "am", "ar", "hy", "as", "ay", "az", "bn", "eu", "be", "bho",
"bs", "bg", "my", "ca", "ceb", "zh", "zh-HK", "zh-CN", "zh-SG", "zh-TW",
"zh-Hans", "zh-Hant", "hak-TW", "nan-TW", "co", "hr", "cs", "da", "dv", "nl",
"en", "en-US", "eo", "et", "ee", "fil", "fi", "fr", "gl", "lg", "ka", "de",
"el", "gn", "gu", "ht", "ha", "haw", "iw", "hi", "hmn", "hu", "is", "ig", "id",
"ga", "it", "ja", "jv", "kn", "kk", "km", "rw", "ko", "kri", "ku", "ky", "lo",
"la", "lv", "ln", "lt", "lb", "mk", "mg", "ms", "ml", "mt", "mi", "mr", "mn",
"ne", "nso", "no", "ny", "or", "om", "ps", "fa", "pl", "pt", "pa", "qu", "ro",
"ru", "sm", "sa", "gd", "sr", "sn", "sd", "si", "sk", "sl", "so", "st", "es",
"su", "sw", "sv", "tg", "ta", "tt", "te", "th", "ti", "ts", "tr", "tk", "uk",
"ur", "ug", "uz", "vi", "cy", "fy", "xh", "yi", "yo", "zu"],
options=[
"af",
"ak",
"sq",
"am",
"ar",
"hy",
"as",
"ay",
"az",
"bn",
"eu",
"be",
"bho",
"bs",
"bg",
"my",
"ca",
"ceb",
"zh",
"zh-HK",
"zh-CN",
"zh-SG",
"zh-TW",
"zh-Hans",
"zh-Hant",
"hak-TW",
"nan-TW",
"co",
"hr",
"cs",
"da",
"dv",
"nl",
"en",
"en-US",
"eo",
"et",
"ee",
"fil",
"fi",
"fr",
"gl",
"lg",
"ka",
"de",
"el",
"gn",
"gu",
"ht",
"ha",
"haw",
"iw",
"hi",
"hmn",
"hu",
"is",
"ig",
"id",
"ga",
"it",
"ja",
"jv",
"kn",
"kk",
"km",
"rw",
"ko",
"kri",
"ku",
"ky",
"lo",
"la",
"lv",
"ln",
"lt",
"lb",
"mk",
"mg",
"ms",
"ml",
"mt",
"mi",
"mr",
"mn",
"ne",
"nso",
"no",
"ny",
"or",
"om",
"ps",
"fa",
"pl",
"pt",
"pa",
"qu",
"ro",
"ru",
"sm",
"sa",
"gd",
"sr",
"sn",
"sd",
"si",
"sk",
"sl",
"so",
"st",
"es",
"su",
"sw",
"sv",
"tg",
"ta",
"tt",
"te",
"th",
"ti",
"ts",
"tr",
"tk",
"uk",
"ur",
"ug",
"uz",
"vi",
"cy",
"fy",
"xh",
"yi",
"yo",
"zu",
],
value="en",
info="Specify to make sure the transcripts are retrieved in your desired language. Defaults to English: 'en'",
info=(
"Specify to make sure the transcripts are retrieved in your desired language. "
"Defaults to English: 'en'"
),
),
DropdownInput(
name="translation",
@ -134,4 +261,4 @@ class YouTubeTranscriptsComponent(Component):
transcripts = loader.load()
if transcript_format == TranscriptFormat.TEXT and len(transcripts) > 0:
return Data(data={"transcript": transcripts[0].page_content})
return [Data(data={"content": doc.page_content, "metadata": doc.metadata}) for doc in transcripts]
return [Data(data={"content": doc.page_content, "metadata": doc.metadata}) for doc in transcripts]