From 0881f0600d7e760cd0a19dcddb67d5ce8ee5aee6 Mon Sep 17 00:00:00 2001 From: anovazzi1 Date: Mon, 15 Jan 2024 19:57:39 -0300 Subject: [PATCH] Refactor document loaders to support metadata --- .../CollegeConfidentialLoader.py | 12 +++++++++--- .../documentloaders/FacebookChatLoader.py | 18 ++++++++++-------- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/src/backend/langflow/components/documentloaders/CollegeConfidentialLoader.py b/src/backend/langflow/components/documentloaders/CollegeConfidentialLoader.py index 654932fd5..44fc8c9ab 100644 --- a/src/backend/langflow/components/documentloaders/CollegeConfidentialLoader.py +++ b/src/backend/langflow/components/documentloaders/CollegeConfidentialLoader.py @@ -2,6 +2,7 @@ from langflow import CustomComponent from langchain.docstore.document import Document from typing import Optional +from langchain_community.document_loaders.college_confidential import CollegeConfidentialLoader class CollegeConfidentialLoaderComponent(CustomComponent): display_name = "CollegeConfidentialLoader" @@ -19,6 +20,11 @@ class CollegeConfidentialLoaderComponent(CustomComponent): web_path: str, metadata: Optional[dict] = {} ) -> Document: - # Assuming there is a loader class `CollegeConfidentialLoader` that takes `metadata` and `web_path` as arguments - # Replace `CollegeConfidentialLoader` with the actual class name if different - return CollegeConfidentialLoader(web_path=web_path, metadata=metadata) + documents = CollegeConfidentialLoader(web_path=web_path).load() + if(metadata): + for document in documents: + if not document.metadata: + document.metadata = metadata + else: + document.metadata.update(metadata) + return documents diff --git a/src/backend/langflow/components/documentloaders/FacebookChatLoader.py b/src/backend/langflow/components/documentloaders/FacebookChatLoader.py index d5b3c1dd1..deb636eac 100644 --- a/src/backend/langflow/components/documentloaders/FacebookChatLoader.py +++ b/src/backend/langflow/components/documentloaders/FacebookChatLoader.py @@ -2,7 +2,7 @@ from langflow import CustomComponent from langchain.docstore.document import Document from typing import Optional, Dict - +from langchain_community.document_loaders.facebook_chat import FacebookChatLoader class FacebookChatLoaderComponent(CustomComponent): display_name = "FacebookChatLoader" description = "Load `Facebook Chat` messages directory dump." @@ -13,8 +13,7 @@ class FacebookChatLoaderComponent(CustomComponent): "file_path": { "display_name": "File Path", "required": True, - "suffixes": [".json"], - "file_types": ["json"], + "file_types": [".json"], "field_type": "file", }, "metadata": { @@ -25,8 +24,11 @@ class FacebookChatLoaderComponent(CustomComponent): } def build(self, file_path: str, metadata: Optional[Dict] = None) -> Document: - # Assuming there is a class named FacebookChatLoader that takes file_path and metadata as parameters - # and returns a Document object. Replace 'FacebookChatLoader' with the actual class name. - # As per the JSON, the output type is 'Document', which is part of langchain.documents. - # Therefore, the 'FacebookChatLoader' should be imported or defined elsewhere in the codebase. - return FacebookChatLoader(file_path=file_path, metadata=metadata) \ No newline at end of file + documents = FacebookChatLoader(file_path=file_path).load() + if(metadata): + for document in documents: + if not document.metadata: + document.metadata = metadata + else: + document.metadata.update(metadata) + return documents \ No newline at end of file