From f5ec26e26fb4bbbcb2faebc67f6433e1f324020a Mon Sep 17 00:00:00 2001 From: Gabriel Luiz Freitas Almeida Date: Thu, 28 Sep 2023 08:47:53 -0300 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20fix(loading.py):=20pass=20node?= =?UTF-8?q?=5Ftype=20argument=20to=20instantiate=5Fdocumentloader=20functi?= =?UTF-8?q?on=20to=20improve=20flexibility=20and=20readability=20=E2=9C=A8?= =?UTF-8?q?=20feat(loading.py):=20add=20support=20for=20WebBaseLoader=20no?= =?UTF-8?q?de=20type=20to=20handle=20web=5Fpath=20parameter=20and=20conver?= =?UTF-8?q?t=20it=20to=20web=5Fpaths=20parameter?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/backend/langflow/interface/initialize/loading.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/backend/langflow/interface/initialize/loading.py b/src/backend/langflow/interface/initialize/loading.py index 82075cd2e..7b3ad4f6f 100644 --- a/src/backend/langflow/interface/initialize/loading.py +++ b/src/backend/langflow/interface/initialize/loading.py @@ -111,7 +111,7 @@ def instantiate_based_on_type(class_object, base_type, node_type, params, user_i elif base_type == "vectorstores": return instantiate_vectorstore(class_object, params) elif base_type == "documentloaders": - return instantiate_documentloader(class_object, params) + return instantiate_documentloader(node_type, class_object, params) elif base_type == "textsplitters": return instantiate_textsplitter(class_object, params) elif base_type == "utilities": @@ -321,7 +321,9 @@ def instantiate_vectorstore(class_object: Type[VectorStore], params: Dict): return vecstore -def instantiate_documentloader(class_object: Type[BaseLoader], params: Dict): +def instantiate_documentloader( + node_type: str, class_object: Type[BaseLoader], params: Dict +): if "file_filter" in params: # file_filter will be a string but we need a function # that will be used to filter the files using file_filter @@ -341,6 +343,11 @@ def instantiate_documentloader(class_object: Type[BaseLoader], params: Dict): raise ValueError( "The metadata you provided is not a valid JSON string." ) from exc + + if node_type == "WebBaseLoader": + if web_path := params.pop("web_path", None): + params["web_paths"] = [web_path] + docs = class_object(**params).load() # Now if metadata is an empty dict, we will not add it to the documents if metadata: