Refactor document loaders to support metadata

This commit is contained in:
anovazzi1 2024-01-15 19:57:39 -03:00
commit 0881f0600d
2 changed files with 19 additions and 11 deletions

View file

@ -2,6 +2,7 @@
from langflow import CustomComponent
from langchain.docstore.document import Document
from typing import Optional
from langchain_community.document_loaders.college_confidential import CollegeConfidentialLoader
class CollegeConfidentialLoaderComponent(CustomComponent):
display_name = "CollegeConfidentialLoader"
@ -19,6 +20,11 @@ class CollegeConfidentialLoaderComponent(CustomComponent):
web_path: str,
metadata: Optional[dict] = {}
) -> Document:
# Assuming there is a loader class `CollegeConfidentialLoader` that takes `metadata` and `web_path` as arguments
# Replace `CollegeConfidentialLoader` with the actual class name if different
return CollegeConfidentialLoader(web_path=web_path, metadata=metadata)
documents = CollegeConfidentialLoader(web_path=web_path).load()
if(metadata):
for document in documents:
if not document.metadata:
document.metadata = metadata
else:
document.metadata.update(metadata)
return documents

View file

@ -2,7 +2,7 @@
from langflow import CustomComponent
from langchain.docstore.document import Document
from typing import Optional, Dict
from langchain_community.document_loaders.facebook_chat import FacebookChatLoader
class FacebookChatLoaderComponent(CustomComponent):
display_name = "FacebookChatLoader"
description = "Load `Facebook Chat` messages directory dump."
@ -13,8 +13,7 @@ class FacebookChatLoaderComponent(CustomComponent):
"file_path": {
"display_name": "File Path",
"required": True,
"suffixes": [".json"],
"file_types": ["json"],
"file_types": [".json"],
"field_type": "file",
},
"metadata": {
@ -25,8 +24,11 @@ class FacebookChatLoaderComponent(CustomComponent):
}
def build(self, file_path: str, metadata: Optional[Dict] = None) -> Document:
# Assuming there is a class named FacebookChatLoader that takes file_path and metadata as parameters
# and returns a Document object. Replace 'FacebookChatLoader' with the actual class name.
# As per the JSON, the output type is 'Document', which is part of langchain.documents.
# Therefore, the 'FacebookChatLoader' should be imported or defined elsewhere in the codebase.
return FacebookChatLoader(file_path=file_path, metadata=metadata)
documents = FacebookChatLoader(file_path=file_path).load()
if(metadata):
for document in documents:
if not document.metadata:
document.metadata = metadata
else:
document.metadata.update(metadata)
return documents