Add PyPDFDirectoryLoader and improving DirectoryLoader (#586)
This commit is contained in:
commit
d6326979e6
7 changed files with 157 additions and 35 deletions
|
|
@ -271,7 +271,7 @@ def run_langflow(host, port, log_level, options, app):
|
|||
except KeyboardInterrupt:
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
logger.exception(e)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -59,6 +59,8 @@ documentloaders:
|
|||
documentation: "https://python.langchain.com/docs/modules/data_connection/document_loaders/how_to/html"
|
||||
UnstructuredMarkdownLoader:
|
||||
documentation: "https://python.langchain.com/docs/modules/data_connection/document_loaders/how_to/markdown"
|
||||
PyPDFDirectoryLoader:
|
||||
documentation: "https://python.langchain.com/docs/modules/data_connection/document_loaders/how_to/pdf"
|
||||
PyPDFLoader:
|
||||
documentation: "https://python.langchain.com/docs/modules/data_connection/document_loaders/how_to/pdf"
|
||||
UnstructuredPowerPointLoader:
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
from langflow.settings import settings
|
||||
from sqlmodel import SQLModel, Session, create_engine
|
||||
|
||||
from langflow.utils.logger import logger
|
||||
|
||||
if settings.database_url.startswith("sqlite"):
|
||||
connect_args = {"check_same_thread": False}
|
||||
|
|
@ -10,7 +10,9 @@ engine = create_engine(settings.database_url, connect_args=connect_args)
|
|||
|
||||
|
||||
def create_db_and_tables():
|
||||
logger.debug("Creating database and tables")
|
||||
SQLModel.metadata.create_all(engine)
|
||||
logger.debug("Database and tables created")
|
||||
|
||||
|
||||
def get_session():
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import os
|
|||
|
||||
import yaml
|
||||
from pydantic import BaseSettings, root_validator
|
||||
from langflow.utils.logger import logger
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
|
|
@ -20,10 +21,21 @@ class Settings(BaseSettings):
|
|||
textsplitters: dict = {}
|
||||
utilities: dict = {}
|
||||
dev: bool = False
|
||||
database_url: str = "sqlite:///./langflow.db"
|
||||
database_url: str
|
||||
cache: str = "InMemoryCache"
|
||||
remove_api_keys: bool = False
|
||||
|
||||
@root_validator(pre=True)
|
||||
def set_database_url(cls, values):
|
||||
if "database_url" not in values:
|
||||
logger.debug("No database_url provided, trying DATABASE_URL env variable")
|
||||
if database_url := os.getenv("DATABASE_URL"):
|
||||
values["database_url"] = database_url
|
||||
else:
|
||||
logger.debug("No DATABASE_URL env variable, using sqlite database")
|
||||
values["database_url"] = "sqlite:///./langflow.db"
|
||||
return values
|
||||
|
||||
class Config:
|
||||
validate_assignment = True
|
||||
extra = "ignore"
|
||||
|
|
|
|||
|
|
@ -120,29 +120,23 @@ class DocumentLoaderFrontNode(FrontendNode):
|
|||
"DirectoryLoader",
|
||||
"ReadTheDocsLoader",
|
||||
"NotionDirectoryLoader",
|
||||
"PyPDFDirectoryLoader",
|
||||
}:
|
||||
name = "path"
|
||||
display_name = "Local directory"
|
||||
if name:
|
||||
self.template.add_field(
|
||||
TemplateField(
|
||||
field_type="str",
|
||||
required=True,
|
||||
show=True,
|
||||
name=name,
|
||||
value="",
|
||||
display_name=display_name,
|
||||
)
|
||||
)
|
||||
if self.template.type_name in {"DirectoryLoader"}:
|
||||
for field in build_directory_loader_fields():
|
||||
self.template.add_field(field)
|
||||
else:
|
||||
self.template.add_field(
|
||||
TemplateField(
|
||||
field_type="str",
|
||||
required=True,
|
||||
show=True,
|
||||
name="glob",
|
||||
value="**/*.txt",
|
||||
display_name="glob",
|
||||
name=name,
|
||||
value="",
|
||||
display_name=display_name,
|
||||
)
|
||||
)
|
||||
# add a metadata field of type dict
|
||||
|
|
@ -165,3 +159,101 @@ class DocumentLoaderFrontNode(FrontendNode):
|
|||
field.show = True
|
||||
field.advanced = False
|
||||
field.show = True
|
||||
|
||||
|
||||
def build_directory_loader_fields():
|
||||
# if loader_kwargs is None:
|
||||
# loader_kwargs = {}
|
||||
# self.path = path
|
||||
# self.glob = glob
|
||||
# self.load_hidden = load_hidden
|
||||
# self.loader_cls = loader_cls
|
||||
# self.loader_kwargs = loader_kwargs
|
||||
# self.silent_errors = silent_errors
|
||||
# self.recursive = recursive
|
||||
# self.show_progress = show_progress
|
||||
# self.use_multithreading = use_multithreading
|
||||
# self.max_concurrency = max_concurrency
|
||||
# Based on the above fields, we can build the following fields:
|
||||
# path, glob, load_hidden, silent_errors, recursive, show_progress, use_multithreading, max_concurrency
|
||||
# path
|
||||
path = TemplateField(
|
||||
field_type="str",
|
||||
required=True,
|
||||
show=True,
|
||||
name="path",
|
||||
value="",
|
||||
display_name="Local directory",
|
||||
advanced=False,
|
||||
)
|
||||
# glob
|
||||
glob = TemplateField(
|
||||
field_type="str",
|
||||
required=True,
|
||||
show=True,
|
||||
name="glob",
|
||||
value="**/*.txt",
|
||||
display_name="glob",
|
||||
advanced=False,
|
||||
)
|
||||
# load_hidden
|
||||
load_hidden = TemplateField(
|
||||
field_type="bool",
|
||||
required=False,
|
||||
show=True,
|
||||
name="load_hidden",
|
||||
value="False",
|
||||
display_name="Load hidden files",
|
||||
advanced=True,
|
||||
)
|
||||
# silent_errors
|
||||
silent_errors = TemplateField(
|
||||
field_type="bool",
|
||||
required=False,
|
||||
show=True,
|
||||
name="silent_errors",
|
||||
value="False",
|
||||
display_name="Silent errors",
|
||||
advanced=True,
|
||||
)
|
||||
# recursive
|
||||
recursive = TemplateField(
|
||||
field_type="bool",
|
||||
required=False,
|
||||
show=True,
|
||||
name="recursive",
|
||||
value="True",
|
||||
display_name="Recursive",
|
||||
advanced=True,
|
||||
)
|
||||
|
||||
# use_multithreading
|
||||
use_multithreading = TemplateField(
|
||||
field_type="bool",
|
||||
required=False,
|
||||
show=True,
|
||||
name="use_multithreading",
|
||||
value="True",
|
||||
display_name="Use multithreading",
|
||||
advanced=True,
|
||||
)
|
||||
# max_concurrency
|
||||
max_concurrency = TemplateField(
|
||||
field_type="int",
|
||||
required=False,
|
||||
show=True,
|
||||
name="max_concurrency",
|
||||
value=10,
|
||||
display_name="Max concurrency",
|
||||
advanced=True,
|
||||
)
|
||||
|
||||
return (
|
||||
path,
|
||||
glob,
|
||||
load_hidden,
|
||||
silent_errors,
|
||||
recursive,
|
||||
use_multithreading,
|
||||
max_concurrency,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -51,7 +51,11 @@ export default function App() {
|
|||
useEffect(() => {
|
||||
// If there is an error alert open with data, add it to the alertsList
|
||||
if (errorOpen && errorData) {
|
||||
if(alertsList.length > 0 && JSON.stringify(alertsList[alertsList.length - 1].data)===JSON.stringify(errorData)){
|
||||
if (
|
||||
alertsList.length > 0 &&
|
||||
JSON.stringify(alertsList[alertsList.length - 1].data) ===
|
||||
JSON.stringify(errorData)
|
||||
) {
|
||||
return;
|
||||
}
|
||||
setErrorOpen(false);
|
||||
|
|
@ -65,7 +69,11 @@ export default function App() {
|
|||
}
|
||||
// If there is a notice alert open with data, add it to the alertsList
|
||||
else if (noticeOpen && noticeData) {
|
||||
if(alertsList.length > 0 && JSON.stringify(alertsList[alertsList.length - 1].data)===JSON.stringify(noticeData)){
|
||||
if (
|
||||
alertsList.length > 0 &&
|
||||
JSON.stringify(alertsList[alertsList.length - 1].data) ===
|
||||
JSON.stringify(noticeData)
|
||||
) {
|
||||
return;
|
||||
}
|
||||
setNoticeOpen(false);
|
||||
|
|
@ -79,7 +87,11 @@ export default function App() {
|
|||
}
|
||||
// If there is a success alert open with data, add it to the alertsList
|
||||
else if (successOpen && successData) {
|
||||
if(alertsList.length > 0 && JSON.stringify(alertsList[alertsList.length - 1].data)===JSON.stringify(successData)){
|
||||
if (
|
||||
alertsList.length > 0 &&
|
||||
JSON.stringify(alertsList[alertsList.length - 1].data) ===
|
||||
JSON.stringify(successData)
|
||||
) {
|
||||
return;
|
||||
}
|
||||
setSuccessOpen(false);
|
||||
|
|
@ -161,4 +173,4 @@ export default function App() {
|
|||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -205,25 +205,27 @@ export default function ChatModal({
|
|||
handleOnClose(event);
|
||||
};
|
||||
newWs.onerror = (ev) => {
|
||||
getHealth().then((res) => {
|
||||
if (res.status === 200) {
|
||||
connectWS();
|
||||
}
|
||||
}).catch((err) => {
|
||||
setErrorData({
|
||||
// message when the backend failed
|
||||
title: "The backend is not responding. Please try again later.",
|
||||
// possible solution list
|
||||
list: [
|
||||
"Check your internet connection.",
|
||||
"Check if the backend is running."
|
||||
],
|
||||
getHealth()
|
||||
.then((res) => {
|
||||
if (res.status === 200) {
|
||||
connectWS();
|
||||
}
|
||||
})
|
||||
.catch((err) => {
|
||||
setErrorData({
|
||||
// message when the backend failed
|
||||
title: "The backend is not responding. Please try again later.",
|
||||
// possible solution list
|
||||
list: [
|
||||
"Check your internet connection.",
|
||||
"Check if the backend is running.",
|
||||
],
|
||||
});
|
||||
});
|
||||
})
|
||||
};
|
||||
ws.current = newWs;
|
||||
} catch (error) {
|
||||
connectWS();
|
||||
connectWS();
|
||||
console.log(error);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue