Add PyPDFDirectoryLoader and improving DirectoryLoader (#586)

This commit is contained in:
Gabriel Luiz Freitas Almeida 2023-07-01 17:11:32 -03:00 committed by GitHub
commit d6326979e6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 157 additions and 35 deletions

View file

@ -271,7 +271,7 @@ def run_langflow(host, port, log_level, options, app):
except KeyboardInterrupt:
pass
except Exception as e:
logger.error(e)
logger.exception(e)
sys.exit(1)

View file

@ -59,6 +59,8 @@ documentloaders:
documentation: "https://python.langchain.com/docs/modules/data_connection/document_loaders/how_to/html"
UnstructuredMarkdownLoader:
documentation: "https://python.langchain.com/docs/modules/data_connection/document_loaders/how_to/markdown"
PyPDFDirectoryLoader:
documentation: "https://python.langchain.com/docs/modules/data_connection/document_loaders/how_to/pdf"
PyPDFLoader:
documentation: "https://python.langchain.com/docs/modules/data_connection/document_loaders/how_to/pdf"
UnstructuredPowerPointLoader:

View file

@ -1,6 +1,6 @@
from langflow.settings import settings
from sqlmodel import SQLModel, Session, create_engine
from langflow.utils.logger import logger
if settings.database_url.startswith("sqlite"):
connect_args = {"check_same_thread": False}
@ -10,7 +10,9 @@ engine = create_engine(settings.database_url, connect_args=connect_args)
def create_db_and_tables():
logger.debug("Creating database and tables")
SQLModel.metadata.create_all(engine)
logger.debug("Database and tables created")
def get_session():

View file

@ -2,6 +2,7 @@ import os
import yaml
from pydantic import BaseSettings, root_validator
from langflow.utils.logger import logger
class Settings(BaseSettings):
@ -20,10 +21,21 @@ class Settings(BaseSettings):
textsplitters: dict = {}
utilities: dict = {}
dev: bool = False
database_url: str = "sqlite:///./langflow.db"
database_url: str
cache: str = "InMemoryCache"
remove_api_keys: bool = False
@root_validator(pre=True)
def set_database_url(cls, values):
if "database_url" not in values:
logger.debug("No database_url provided, trying DATABASE_URL env variable")
if database_url := os.getenv("DATABASE_URL"):
values["database_url"] = database_url
else:
logger.debug("No DATABASE_URL env variable, using sqlite database")
values["database_url"] = "sqlite:///./langflow.db"
return values
class Config:
validate_assignment = True
extra = "ignore"

View file

@ -120,29 +120,23 @@ class DocumentLoaderFrontNode(FrontendNode):
"DirectoryLoader",
"ReadTheDocsLoader",
"NotionDirectoryLoader",
"PyPDFDirectoryLoader",
}:
name = "path"
display_name = "Local directory"
if name:
self.template.add_field(
TemplateField(
field_type="str",
required=True,
show=True,
name=name,
value="",
display_name=display_name,
)
)
if self.template.type_name in {"DirectoryLoader"}:
for field in build_directory_loader_fields():
self.template.add_field(field)
else:
self.template.add_field(
TemplateField(
field_type="str",
required=True,
show=True,
name="glob",
value="**/*.txt",
display_name="glob",
name=name,
value="",
display_name=display_name,
)
)
# add a metadata field of type dict
@ -165,3 +159,101 @@ class DocumentLoaderFrontNode(FrontendNode):
field.show = True
field.advanced = False
field.show = True
def build_directory_loader_fields():
# if loader_kwargs is None:
# loader_kwargs = {}
# self.path = path
# self.glob = glob
# self.load_hidden = load_hidden
# self.loader_cls = loader_cls
# self.loader_kwargs = loader_kwargs
# self.silent_errors = silent_errors
# self.recursive = recursive
# self.show_progress = show_progress
# self.use_multithreading = use_multithreading
# self.max_concurrency = max_concurrency
# Based on the above fields, we can build the following fields:
# path, glob, load_hidden, silent_errors, recursive, show_progress, use_multithreading, max_concurrency
# path
path = TemplateField(
field_type="str",
required=True,
show=True,
name="path",
value="",
display_name="Local directory",
advanced=False,
)
# glob
glob = TemplateField(
field_type="str",
required=True,
show=True,
name="glob",
value="**/*.txt",
display_name="glob",
advanced=False,
)
# load_hidden
load_hidden = TemplateField(
field_type="bool",
required=False,
show=True,
name="load_hidden",
value="False",
display_name="Load hidden files",
advanced=True,
)
# silent_errors
silent_errors = TemplateField(
field_type="bool",
required=False,
show=True,
name="silent_errors",
value="False",
display_name="Silent errors",
advanced=True,
)
# recursive
recursive = TemplateField(
field_type="bool",
required=False,
show=True,
name="recursive",
value="True",
display_name="Recursive",
advanced=True,
)
# use_multithreading
use_multithreading = TemplateField(
field_type="bool",
required=False,
show=True,
name="use_multithreading",
value="True",
display_name="Use multithreading",
advanced=True,
)
# max_concurrency
max_concurrency = TemplateField(
field_type="int",
required=False,
show=True,
name="max_concurrency",
value=10,
display_name="Max concurrency",
advanced=True,
)
return (
path,
glob,
load_hidden,
silent_errors,
recursive,
use_multithreading,
max_concurrency,
)

View file

@ -51,7 +51,11 @@ export default function App() {
useEffect(() => {
// If there is an error alert open with data, add it to the alertsList
if (errorOpen && errorData) {
if(alertsList.length > 0 && JSON.stringify(alertsList[alertsList.length - 1].data)===JSON.stringify(errorData)){
if (
alertsList.length > 0 &&
JSON.stringify(alertsList[alertsList.length - 1].data) ===
JSON.stringify(errorData)
) {
return;
}
setErrorOpen(false);
@ -65,7 +69,11 @@ export default function App() {
}
// If there is a notice alert open with data, add it to the alertsList
else if (noticeOpen && noticeData) {
if(alertsList.length > 0 && JSON.stringify(alertsList[alertsList.length - 1].data)===JSON.stringify(noticeData)){
if (
alertsList.length > 0 &&
JSON.stringify(alertsList[alertsList.length - 1].data) ===
JSON.stringify(noticeData)
) {
return;
}
setNoticeOpen(false);
@ -79,7 +87,11 @@ export default function App() {
}
// If there is a success alert open with data, add it to the alertsList
else if (successOpen && successData) {
if(alertsList.length > 0 && JSON.stringify(alertsList[alertsList.length - 1].data)===JSON.stringify(successData)){
if (
alertsList.length > 0 &&
JSON.stringify(alertsList[alertsList.length - 1].data) ===
JSON.stringify(successData)
) {
return;
}
setSuccessOpen(false);
@ -161,4 +173,4 @@ export default function App() {
</div>
</div>
);
}
}

View file

@ -205,25 +205,27 @@ export default function ChatModal({
handleOnClose(event);
};
newWs.onerror = (ev) => {
getHealth().then((res) => {
if (res.status === 200) {
connectWS();
}
}).catch((err) => {
setErrorData({
// message when the backend failed
title: "The backend is not responding. Please try again later.",
// possible solution list
list: [
"Check your internet connection.",
"Check if the backend is running."
],
getHealth()
.then((res) => {
if (res.status === 200) {
connectWS();
}
})
.catch((err) => {
setErrorData({
// message when the backend failed
title: "The backend is not responding. Please try again later.",
// possible solution list
list: [
"Check your internet connection.",
"Check if the backend is running.",
],
});
});
})
};
ws.current = newWs;
} catch (error) {
connectWS();
connectWS();
console.log(error);
}
}