From 3e5c41666c71c5e041d73b7bec63fa85b0bb4037 Mon Sep 17 00:00:00 2001 From: Gabriel Luiz Freitas Almeida Date: Fri, 1 Mar 2024 09:23:22 -0300 Subject: [PATCH] Refactor FileLoaderComponent to use Record and add Record import --- .../components/documentloaders/FileLoader.py | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/backend/langflow/components/documentloaders/FileLoader.py b/src/backend/langflow/components/documentloaders/FileLoader.py index 945ac3f64..2dcd0d98a 100644 --- a/src/backend/langflow/components/documentloaders/FileLoader.py +++ b/src/backend/langflow/components/documentloaders/FileLoader.py @@ -1,6 +1,7 @@ -from langchain_core.documents import Document +from typing import List from langflow import CustomComponent +from langflow.schema import Record from langflow.utils.constants import LOADERS_INFO @@ -10,7 +11,9 @@ class FileLoaderComponent(CustomComponent): beta = True def build_config(self): - loader_options = ["Automatic"] + [loader_info["name"] for loader_info in LOADERS_INFO] + loader_options = ["Automatic"] + [ + loader_info["name"] for loader_info in LOADERS_INFO + ] file_types = [] suffixes = [] @@ -74,7 +77,7 @@ class FileLoaderComponent(CustomComponent): "code": {"show": False}, } - def build(self, file_path: str, loader: str) -> Document: + def build(self, file_path: str, loader: str) -> List[Record]: file_type = file_path.split(".")[-1] # Map the loader to the correct loader class @@ -102,7 +105,9 @@ class FileLoaderComponent(CustomComponent): if isinstance(selected_loader_info, dict): loader_import: str = selected_loader_info["import"] else: - raise ValueError(f"Loader info for {loader} is not a dict\nLoader info:\n{selected_loader_info}") + raise ValueError( + f"Loader info for {loader} is not a dict\nLoader info:\n{selected_loader_info}" + ) module_name, class_name = loader_import.rsplit(".", 1) try: @@ -110,7 +115,10 @@ class FileLoaderComponent(CustomComponent): loader_module = __import__(module_name, fromlist=[class_name]) loader_instance = getattr(loader_module, class_name) except ImportError as e: - raise ValueError(f"Loader {loader} could not be imported\nLoader info:\n{selected_loader_info}") from e + raise ValueError( + f"Loader {loader} could not be imported\nLoader info:\n{selected_loader_info}" + ) from e result = loader_instance(file_path=file_path) - return result.load() + docs = result.load() + return self.to_records(docs, text_key="page_content", data_key="metadata")