fix: modified DirectoryDataComponent to support user defined custom file types (#4017)

test_data_components.py: Updated 'test_directory_component_build_with_multithreading' to expect an extra argument
This commit is contained in:
EDllT 2024-10-07 14:28:12 +03:00 committed by GitHub
commit 79a1257b7d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 8 additions and 6 deletions

View file

@ -1,4 +1,4 @@
from langflow.base.data.utils import parallel_load_data, parse_text_file_to_data, retrieve_file_paths
from langflow.base.data.utils import TEXT_FILE_TYPES, parallel_load_data, parse_text_file_to_data, retrieve_file_paths
from langflow.custom import Component
from langflow.io import BoolInput, IntInput, MessageTextInput
from langflow.schema import Data
@ -20,7 +20,7 @@ class DirectoryComponent(Component):
MessageTextInput(
name="types",
display_name="Types",
info="File types to load. Leave empty to load all types.",
info="File types to load. Leave empty to load all default supported types.",
is_list=True,
),
IntInput(
@ -68,7 +68,9 @@ class DirectoryComponent(Component):
def load_directory(self) -> list[Data]:
path = self.path
types = self.types or [] # self.types is already a list due to is_list=True
types = (
self.types if self.types and self.types != [""] else TEXT_FILE_TYPES
) # self.types is already a list due to is_list=True
depth = self.depth
max_concurrency = self.max_concurrency
load_hidden = self.load_hidden
@ -77,7 +79,7 @@ class DirectoryComponent(Component):
use_multithreading = self.use_multithreading
resolved_path = self.resolve_path(path)
file_paths = retrieve_file_paths(resolved_path, load_hidden, recursive, depth)
file_paths = retrieve_file_paths(resolved_path, load_hidden, recursive, depth, types)
if types:
file_paths = [fp for fp in file_paths if any(fp.endswith(ext) for ext in types)]

View file

@ -1,7 +1,7 @@
import os
import tempfile
from pathlib import Path
from unittest.mock import Mock, patch
from unittest.mock import Mock, patch, ANY
import httpx
import pytest
@ -158,7 +158,7 @@ def test_directory_component_build_with_multithreading(
# Assert
mock_resolve_path.assert_called_once_with(path)
mock_retrieve_file_paths.assert_called_once_with(path, load_hidden, recursive, depth)
mock_retrieve_file_paths.assert_called_once_with(path, load_hidden, recursive, depth, ANY)
mock_parallel_load_data.assert_called_once_with(
mock_retrieve_file_paths.return_value, silent_errors, max_concurrency
)