🔧 refactor(documentloaders.py): extract build_directory_loader_fields() function to improve code readability and maintainability

🔧 refactor(documentloaders.py): remove redundant code for adding fields in DocumentLoaderFrontNode class
The code in the DocumentLoaderFrontNode class has been refactored to extract the logic for building directory loader fields into a separate function called build_directory_loader_fields(). This improves code readability and maintainability by separating concerns and reducing duplication. The redundant code for adding fields in the DocumentLoaderFrontNode class has also been removed.
This commit is contained in:
Gabriel Luiz Freitas Almeida 2023-07-01 17:08:23 -03:00
commit d2609141ba
2 changed files with 121 additions and 14 deletions

View file

@ -2,6 +2,7 @@ import os
import yaml
from pydantic import BaseSettings, root_validator
from langflow.utils.logger import logger
class Settings(BaseSettings):
@ -20,10 +21,24 @@ class Settings(BaseSettings):
textsplitters: dict = {}
utilities: dict = {}
dev: bool = False
database_url: str = "sqlite:///./langflow.db"
database_url: str
cache: str = "InMemoryCache"
remove_api_keys: bool = False
# Create a root validator pre that will add the default
# sqlite database_url if not provided
# but check the DATABASE_URL env variable first
@root_validator(pre=True)
def set_database_url(cls, values):
if "database_url" not in values:
logger.debug("No database_url provided, trying DATABASE_URL env variable")
if database_url := os.getenv("DATABASE_URL"):
values["database_url"] = database_url
else:
logger.debug("No DATABASE_URL env variable, using sqlite database")
values["database_url"] = "sqlite:///./langflow.db"
return values
class Config:
validate_assignment = True
extra = "ignore"

View file

@ -120,29 +120,23 @@ class DocumentLoaderFrontNode(FrontendNode):
"DirectoryLoader",
"ReadTheDocsLoader",
"NotionDirectoryLoader",
"PyPDFDirectoryLoader",
}:
name = "path"
display_name = "Local directory"
if name:
self.template.add_field(
TemplateField(
field_type="str",
required=True,
show=True,
name=name,
value="",
display_name=display_name,
)
)
if self.template.type_name in {"DirectoryLoader"}:
for field in build_directory_loader_fields():
self.template.add_field(field)
else:
self.template.add_field(
TemplateField(
field_type="str",
required=True,
show=True,
name="glob",
value="**/*.txt",
display_name="glob",
name=name,
value="",
display_name=display_name,
)
)
# add a metadata field of type dict
@ -165,3 +159,101 @@ class DocumentLoaderFrontNode(FrontendNode):
field.show = True
field.advanced = False
field.show = True
def build_directory_loader_fields():
# if loader_kwargs is None:
# loader_kwargs = {}
# self.path = path
# self.glob = glob
# self.load_hidden = load_hidden
# self.loader_cls = loader_cls
# self.loader_kwargs = loader_kwargs
# self.silent_errors = silent_errors
# self.recursive = recursive
# self.show_progress = show_progress
# self.use_multithreading = use_multithreading
# self.max_concurrency = max_concurrency
# Based on the above fields, we can build the following fields:
# path, glob, load_hidden, silent_errors, recursive, show_progress, use_multithreading, max_concurrency
# path
path = TemplateField(
field_type="str",
required=True,
show=True,
name="path",
value="",
display_name="Local directory",
advanced=False,
)
# glob
glob = TemplateField(
field_type="str",
required=True,
show=True,
name="glob",
value="**/*.txt",
display_name="glob",
advanced=False,
)
# load_hidden
load_hidden = TemplateField(
field_type="bool",
required=False,
show=True,
name="load_hidden",
value="False",
display_name="Load hidden files",
advanced=True,
)
# silent_errors
silent_errors = TemplateField(
field_type="bool",
required=False,
show=True,
name="silent_errors",
value="False",
display_name="Silent errors",
advanced=True,
)
# recursive
recursive = TemplateField(
field_type="bool",
required=False,
show=True,
name="recursive",
value="True",
display_name="Recursive",
advanced=True,
)
# use_multithreading
use_multithreading = TemplateField(
field_type="bool",
required=False,
show=True,
name="use_multithreading",
value="True",
display_name="Use multithreading",
advanced=True,
)
# max_concurrency
max_concurrency = TemplateField(
field_type="int",
required=False,
show=True,
name="max_concurrency",
value=10,
display_name="Max concurrency",
advanced=True,
)
return (
path,
glob,
load_hidden,
silent_errors,
recursive,
use_multithreading,
max_concurrency,
)