From 7364ba41f80144af91f147c041f275a65dd3e6d7 Mon Sep 17 00:00:00 2001 From: Gabriel Luiz Freitas Almeida Date: Wed, 28 Jun 2023 08:59:31 -0300 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20fix(loading.py):=20handle=20case?= =?UTF-8?q?=20where=20metadata=20is=20an=20empty=20dict=20and=20document?= =?UTF-8?q?=20already=20has=20metadata=20=F0=9F=90=9B=20fix(loading.py):?= =?UTF-8?q?=20handle=20case=20where=20separator=5Ftype=20is=20not=20provid?= =?UTF-8?q?ed=20in=20params=20The=20first=20fix=20ensures=20that=20if=20th?= =?UTF-8?q?e=20metadata=20is=20an=20empty=20dict,=20it=20will=20not=20be?= =?UTF-8?q?=20added=20to=20the=20documents=20if=20they=20already=20have=20?= =?UTF-8?q?metadata.=20This=20prevents=20overwriting=20existing=20metadata?= =?UTF-8?q?.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The second fix handles the case where the separator_type is not provided in the params. In this case, the text_splitter will be instantiated using the class_object and the params as is. --- .../langflow/interface/initialize/loading.py | 29 ++++++++++++------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/src/backend/langflow/interface/initialize/loading.py b/src/backend/langflow/interface/initialize/loading.py index cfa5b5717..14642828a 100644 --- a/src/backend/langflow/interface/initialize/loading.py +++ b/src/backend/langflow/interface/initialize/loading.py @@ -188,18 +188,22 @@ def instantiate_documentloader(class_object: Type[BaseLoader], params: Dict): extension.strip() in x for extension in extensions ) metadata = params.pop("metadata", None) + if metadata and isinstance(metadata, str): + try: + metadata = json.loads(metadata) + except json.JSONDecodeError as exc: + raise ValueError( + "The metadata you provided is not a valid JSON string." + ) from exc docs = class_object(**params).load() + # Now if metadata is an empty dict, we will not add it to the documents if metadata: - if isinstance(metadata, str): - try: - metadata = json.loads(metadata) - except json.JSONDecodeError as exc: - raise ValueError( - "The metadata you provided is not a valid JSON string." - ) from exc - for doc in docs: - doc.metadata = metadata + # If the document already has metadata, we will not overwrite it + if not doc.metadata: + doc.metadata = metadata + else: + doc.metadata.update(metadata) return docs @@ -216,13 +220,16 @@ def instantiate_textsplitter( "Try changing the chunk_size of the Text Splitter." ) from exc - if "separator_type" in params and params["separator_type"] == "Text": + if ( + "separator_type" in params + and params["separator_type"] == "Text" + or "separator_type" not in params + ): text_splitter = class_object(**params) else: params["language"] = params.pop("separator_type", None) params.pop("separators", None) text_splitter = class_object.from_language(**params) - return text_splitter.split_documents(documents)