🐛 fix(loading.py): handle case where metadata is an empty dict and document already has metadata

🐛 fix(loading.py): handle case where separator_type is not provided in params The first fix ensures that if the metadata is an empty dict, it will not be added to the documents if they already have metadata. This prevents overwriting existing metadata. The second fix handles the case where the separator_type is not provided in the params. In this case, the text_splitter will be instantiated using the class_object and the params as is.
2023-06-28 08:59:31 -03:00 · 2023-06-28 08:59:31 -03:00 · 7364ba41f8
commit 7364ba41f8
parent 8aeb7c5fb1
1 changed files with 18 additions and 11 deletions
--- a/src/backend/langflow/interface/initialize/loading.py
+++ b/src/backend/langflow/interface/initialize/loading.py
@ -188,18 +188,22 @@ def instantiate_documentloader(class_object: Type[BaseLoader], params: Dict):
            extension.strip() in x for extension in extensions
        )
    metadata = params.pop("metadata", None)
+    if metadata and isinstance(metadata, str):
+        try:
+            metadata = json.loads(metadata)
+        except json.JSONDecodeError as exc:
+            raise ValueError(
+                "The metadata you provided is not a valid JSON string."
+            ) from exc
    docs = class_object(**params).load()
+    # Now if metadata is an empty dict, we will not add it to the documents
    if metadata:
-        if isinstance(metadata, str):
-            try:
-                metadata = json.loads(metadata)
-            except json.JSONDecodeError as exc:
-                raise ValueError(
-                    "The metadata you provided is not a valid JSON string."
-                ) from exc
-
        for doc in docs:
-            doc.metadata = metadata
+            # If the document already has metadata, we will not overwrite it
+            if not doc.metadata:
+                doc.metadata = metadata
+            else:
+                doc.metadata.update(metadata)

    return docs

@ -216,13 +220,16 @@ def instantiate_textsplitter(
            "Try changing the chunk_size of the Text Splitter."
        ) from exc

-    if "separator_type" in params and params["separator_type"] == "Text":
+    if (
+        "separator_type" in params
+        and params["separator_type"] == "Text"
+        or "separator_type" not in params
+    ):
        text_splitter = class_object(**params)
    else:
        params["language"] = params.pop("separator_type", None)
        params.pop("separators", None)
        text_splitter = class_object.from_language(**params)
-
    return text_splitter.split_documents(documents)