From 59a1370c2d4bf6c358de59f9de63a0919724da00 Mon Sep 17 00:00:00 2001 From: anovazzi1 Date: Mon, 15 Jan 2024 18:34:22 -0300 Subject: [PATCH] Refactor document loaders to support metadata --- .../components/documentloaders/AZLyricsLoader.py | 12 ++++++++---- .../documentloaders/AirbyteJSONLoader.py | 16 ++++++++++------ 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/src/backend/langflow/components/documentloaders/AZLyricsLoader.py b/src/backend/langflow/components/documentloaders/AZLyricsLoader.py index cef344234..f0a9236c1 100644 --- a/src/backend/langflow/components/documentloaders/AZLyricsLoader.py +++ b/src/backend/langflow/components/documentloaders/AZLyricsLoader.py @@ -16,7 +16,11 @@ class AZLyricsLoaderComponent(CustomComponent): } def build(self, metadata: Optional[Dict] = None, web_path: str = "") -> Document: - # Assuming there is a class AZLyricsLoader that takes metadata and web_path as parameters - # and returns a Document object. Replace AZLyricsLoader with the actual class name if different. - # The import statement for AZLyricsLoader is assumed to be added above. - return AZLyricsLoader(metadata=metadata, web_path=web_path) + documents = AZLyricsLoader(web_path=web_path).load() + if(metadata): + for document in documents: + if not document.metadata: + document.metadata = metadata + else: + document.metadata.update(metadata) + return documents \ No newline at end of file diff --git a/src/backend/langflow/components/documentloaders/AirbyteJSONLoader.py b/src/backend/langflow/components/documentloaders/AirbyteJSONLoader.py index f1f22e45c..89a271945 100644 --- a/src/backend/langflow/components/documentloaders/AirbyteJSONLoader.py +++ b/src/backend/langflow/components/documentloaders/AirbyteJSONLoader.py @@ -1,6 +1,7 @@ from langflow import CustomComponent from langflow.field_typing import Document from typing import Optional, Dict +from langchain_community.document_loaders.airbyte_json import AirbyteJSONLoader class AirbyteJSONLoaderComponent(CustomComponent): @@ -14,8 +15,7 @@ class AirbyteJSONLoaderComponent(CustomComponent): return { "file_path": { "display_name": "File Path", - "type": "file", - "fileTypes": ["json"], + "file_types": [".json"], "required": True, "field_type": "file", }, @@ -27,7 +27,11 @@ class AirbyteJSONLoaderComponent(CustomComponent): } def build(self, file_path: str, metadata: Optional[Dict] = None) -> Document: - # Assuming there is a function or class named AirbyteJSONLoader that takes file_path and metadata as parameters - # and returns a Document object. Replace AirbyteJSONLoader with the actual class or function name. - # The actual implementation here is a placeholder and should be adapted to the real AirbyteJSONLoader class or function. - return AirbyteJSONLoader(file_path=file_path, metadata=metadata) + documents = AirbyteJSONLoader(file_path=file_path).load() + if(metadata): + for document in documents: + if not document.metadata: + document.metadata = metadata + else: + document.metadata.update(metadata) + return documents