Implemented add_extra_fields method for the documentloaders (#408)
This commit is contained in:
commit
b02ed92b84
6 changed files with 148 additions and 156 deletions
|
|
@ -53,7 +53,7 @@ llms:
|
|||
# - AzureOpenAI
|
||||
# - AzureChatOpenAI
|
||||
- ChatOpenAI
|
||||
- LlamaCpp
|
||||
- LlamaCpp
|
||||
- CTransformers
|
||||
- Cohere
|
||||
- Anthropic
|
||||
|
|
@ -69,7 +69,7 @@ prompts:
|
|||
- ZeroShotPrompt
|
||||
textsplitters:
|
||||
- CharacterTextSplitter
|
||||
# - RecursiveCharacterTextSplitter
|
||||
- RecursiveCharacterTextSplitter
|
||||
# - LatexTextSplitter
|
||||
# - PythonCodeTextSplitter
|
||||
toolkits:
|
||||
|
|
|
|||
|
|
@ -1,30 +1,20 @@
|
|||
from typing import Dict, List, Optional
|
||||
from typing import Dict, List, Optional, Type
|
||||
|
||||
from langflow.interface.base import LangChainTypeCreator
|
||||
from langflow.template.frontend_node.documentloaders import DocumentLoaderFrontNode
|
||||
from langflow.interface.custom_lists import documentloaders_type_to_cls_dict
|
||||
from langflow.settings import settings
|
||||
from langflow.utils.logger import logger
|
||||
from langflow.utils.util import build_template_from_class
|
||||
|
||||
|
||||
def build_file_path_template(
|
||||
suffixes: list, fileTypes: list, name: str = "file_path"
|
||||
) -> Dict:
|
||||
"""Build a file path template for a document loader."""
|
||||
return {
|
||||
"type": "file",
|
||||
"required": True,
|
||||
"show": True,
|
||||
"name": name,
|
||||
"value": "",
|
||||
"suffixes": suffixes,
|
||||
"fileTypes": fileTypes,
|
||||
}
|
||||
|
||||
|
||||
class DocumentLoaderCreator(LangChainTypeCreator):
|
||||
type_name: str = "documentloaders"
|
||||
|
||||
@property
|
||||
def frontend_node_class(self) -> Type[DocumentLoaderFrontNode]:
|
||||
return DocumentLoaderFrontNode
|
||||
|
||||
@property
|
||||
def type_to_loader_dict(self) -> Dict:
|
||||
return documentloaders_type_to_cls_dict
|
||||
|
|
@ -32,106 +22,7 @@ class DocumentLoaderCreator(LangChainTypeCreator):
|
|||
def get_signature(self, name: str) -> Optional[Dict]:
|
||||
"""Get the signature of a document loader."""
|
||||
try:
|
||||
signature = build_template_from_class(
|
||||
name, documentloaders_type_to_cls_dict
|
||||
)
|
||||
|
||||
file_path_templates = {
|
||||
"AirbyteJSONLoader": build_file_path_template(
|
||||
suffixes=[".json"], fileTypes=["json"]
|
||||
),
|
||||
"CoNLLULoader": build_file_path_template(
|
||||
suffixes=[".csv"], fileTypes=["csv"]
|
||||
),
|
||||
"CSVLoader": build_file_path_template(
|
||||
suffixes=[".csv"], fileTypes=["csv"]
|
||||
),
|
||||
"UnstructuredEmailLoader": build_file_path_template(
|
||||
suffixes=[".eml"], fileTypes=["eml"]
|
||||
),
|
||||
"EverNoteLoader": build_file_path_template(
|
||||
suffixes=[".xml"], fileTypes=["xml"]
|
||||
),
|
||||
"FacebookChatLoader": build_file_path_template(
|
||||
suffixes=[".json"], fileTypes=["json"]
|
||||
),
|
||||
"GutenbergLoader": build_file_path_template(
|
||||
suffixes=[".txt"], fileTypes=["txt"]
|
||||
),
|
||||
"BSHTMLLoader": build_file_path_template(
|
||||
suffixes=[".html"], fileTypes=["html"]
|
||||
),
|
||||
"UnstructuredHTMLLoader": build_file_path_template(
|
||||
suffixes=[".html"], fileTypes=["html"]
|
||||
),
|
||||
"UnstructuredImageLoader": build_file_path_template(
|
||||
suffixes=[".jpg", ".jpeg", ".png", ".gif", ".bmp"],
|
||||
fileTypes=["jpg", "jpeg", "png", "gif", "bmp"],
|
||||
),
|
||||
"UnstructuredMarkdownLoader": build_file_path_template(
|
||||
suffixes=[".md"], fileTypes=["md"]
|
||||
),
|
||||
"PyPDFLoader": build_file_path_template(
|
||||
suffixes=[".pdf"], fileTypes=["pdf"]
|
||||
),
|
||||
"UnstructuredPowerPointLoader": build_file_path_template(
|
||||
suffixes=[".pptx", ".ppt"], fileTypes=["pptx", "ppt"]
|
||||
),
|
||||
"SRTLoader": build_file_path_template(
|
||||
suffixes=[".srt"], fileTypes=["srt"]
|
||||
),
|
||||
"TelegramChatLoader": build_file_path_template(
|
||||
suffixes=[".json"], fileTypes=["json"]
|
||||
),
|
||||
"TextLoader": build_file_path_template(
|
||||
suffixes=[".txt"], fileTypes=["txt"]
|
||||
),
|
||||
"UnstructuredWordDocumentLoader": build_file_path_template(
|
||||
suffixes=[".docx", ".doc"], fileTypes=["docx", "doc"]
|
||||
),
|
||||
"SlackDirectoryLoader": build_file_path_template(
|
||||
suffixes=[".zip"], fileTypes=["zip"]
|
||||
),
|
||||
}
|
||||
|
||||
if name in file_path_templates:
|
||||
signature["template"]["file_path"] = file_path_templates[name]
|
||||
elif name in {
|
||||
"WebBaseLoader",
|
||||
"AZLyricsLoader",
|
||||
"CollegeConfidentialLoader",
|
||||
"HNLoader",
|
||||
"IFixitLoader",
|
||||
"IMSDbLoader",
|
||||
}:
|
||||
signature["template"]["web_path"] = {
|
||||
"type": "str",
|
||||
"required": True,
|
||||
"show": True,
|
||||
"name": "web_path",
|
||||
"value": "",
|
||||
"display_name": "Web Page",
|
||||
}
|
||||
elif name in {"GitbookLoader"}:
|
||||
signature["template"]["web_page"] = {
|
||||
"type": "str",
|
||||
"required": True,
|
||||
"show": True,
|
||||
"name": "web_page",
|
||||
"value": "",
|
||||
"display_name": "Web Page",
|
||||
}
|
||||
elif name in {"ReadTheDocsLoader", "NotionDirectoryLoader"}:
|
||||
signature["template"]["path"] = {
|
||||
"type": "str",
|
||||
"required": True,
|
||||
"show": True,
|
||||
"name": "path",
|
||||
"value": "",
|
||||
"display_name": "Web Page",
|
||||
}
|
||||
|
||||
return signature
|
||||
return build_template_from_class(name, documentloaders_type_to_cls_dict)
|
||||
except ValueError as exc:
|
||||
raise ValueError(f"Documment Loader {name} not found") from exc
|
||||
except AttributeError as exc:
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
from typing import Dict, List, Optional
|
||||
from typing import Dict, List, Optional, Type
|
||||
|
||||
from langflow.interface.base import LangChainTypeCreator
|
||||
from langflow.template.frontend_node.textsplitters import TextSplittersFrontendNode
|
||||
from langflow.interface.custom_lists import textsplitter_type_to_cls_dict
|
||||
from langflow.settings import settings
|
||||
from langflow.utils.logger import logger
|
||||
|
|
@ -10,6 +11,10 @@ from langflow.utils.util import build_template_from_class
|
|||
class TextSplitterCreator(LangChainTypeCreator):
|
||||
type_name: str = "textsplitters"
|
||||
|
||||
@property
|
||||
def frontend_node_class(self) -> Type[TextSplittersFrontendNode]:
|
||||
return TextSplittersFrontendNode
|
||||
|
||||
@property
|
||||
def type_to_loader_dict(self) -> Dict:
|
||||
return textsplitter_type_to_cls_dict
|
||||
|
|
@ -17,43 +22,7 @@ class TextSplitterCreator(LangChainTypeCreator):
|
|||
def get_signature(self, name: str) -> Optional[Dict]:
|
||||
"""Get the signature of a text splitter."""
|
||||
try:
|
||||
signature = build_template_from_class(name, textsplitter_type_to_cls_dict)
|
||||
|
||||
signature["template"]["documents"] = {
|
||||
"type": "BaseLoader",
|
||||
"required": True,
|
||||
"show": True,
|
||||
"name": "documents",
|
||||
}
|
||||
|
||||
signature["template"]["separator"] = {
|
||||
"type": "str",
|
||||
"required": True,
|
||||
"show": True,
|
||||
"value": ".",
|
||||
"name": "separator",
|
||||
"display_name": "Separator",
|
||||
}
|
||||
|
||||
signature["template"]["chunk_size"] = {
|
||||
"type": "int",
|
||||
"required": True,
|
||||
"show": True,
|
||||
"value": 1000,
|
||||
"name": "chunk_size",
|
||||
"display_name": "Chunk Size",
|
||||
}
|
||||
|
||||
signature["template"]["chunk_overlap"] = {
|
||||
"type": "int",
|
||||
"required": True,
|
||||
"show": True,
|
||||
"value": 200,
|
||||
"name": "chunk_overlap",
|
||||
"display_name": "Chunk Overlap",
|
||||
}
|
||||
|
||||
return signature
|
||||
return build_template_from_class(name, textsplitter_type_to_cls_dict)
|
||||
except ValueError as exc:
|
||||
raise ValueError(f"Text Splitter {name} not found") from exc
|
||||
except AttributeError as exc:
|
||||
|
|
|
|||
|
|
@ -7,6 +7,8 @@ from langflow.template.frontend_node import (
|
|||
prompts,
|
||||
tools,
|
||||
vectorstores,
|
||||
documentloaders,
|
||||
textsplitters,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
|
|
@ -18,4 +20,6 @@ __all__ = [
|
|||
"llms",
|
||||
"prompts",
|
||||
"vectorstores",
|
||||
"documentloaders",
|
||||
"textsplitters",
|
||||
]
|
||||
|
|
|
|||
|
|
@ -0,0 +1,79 @@
|
|||
from langflow.template.field.base import TemplateField
|
||||
from langflow.template.frontend_node.base import FrontendNode
|
||||
|
||||
|
||||
class DocumentLoaderFrontNode(FrontendNode):
|
||||
@staticmethod
|
||||
def build_template(
|
||||
suffixes: list, fileTypes: list, name: str = "file_path"
|
||||
) -> TemplateField:
|
||||
"""Build a template field for a document loader."""
|
||||
return TemplateField(
|
||||
field_type="file",
|
||||
required=True,
|
||||
show=True,
|
||||
name=name,
|
||||
value="",
|
||||
suffixes=suffixes,
|
||||
fileTypes=fileTypes,
|
||||
)
|
||||
|
||||
file_path_templates = {
|
||||
"AirbyteJSONLoader": build_template(suffixes=[".json"], fileTypes=["json"]),
|
||||
"CoNLLULoader": build_template(suffixes=[".csv"], fileTypes=["csv"]),
|
||||
"CSVLoader": build_template(suffixes=[".csv"], fileTypes=["csv"]),
|
||||
"UnstructuredEmailLoader": build_template(suffixes=[".eml"], fileTypes=["eml"]),
|
||||
"EverNoteLoader": build_template(suffixes=[".xml"], fileTypes=["xml"]),
|
||||
"FacebookChatLoader": build_template(suffixes=[".json"], fileTypes=["json"]),
|
||||
"GutenbergLoader": build_template(suffixes=[".txt"], fileTypes=["txt"]),
|
||||
"BSHTMLLoader": build_template(suffixes=[".html"], fileTypes=["html"]),
|
||||
"UnstructuredHTMLLoader": build_template(
|
||||
suffixes=[".html"], fileTypes=["html"]
|
||||
),
|
||||
"UnstructuredImageLoader": build_template(
|
||||
suffixes=[".jpg", ".jpeg", ".png", ".gif", ".bmp"],
|
||||
fileTypes=["jpg", "jpeg", "png", "gif", "bmp"],
|
||||
),
|
||||
"UnstructuredMarkdownLoader": build_template(
|
||||
suffixes=[".md"], fileTypes=["md"]
|
||||
),
|
||||
"PyPDFLoader": build_template(suffixes=[".pdf"], fileTypes=["pdf"]),
|
||||
"UnstructuredPowerPointLoader": build_template(
|
||||
suffixes=[".pptx", ".ppt"], fileTypes=["pptx", "ppt"]
|
||||
),
|
||||
"SRTLoader": build_template(suffixes=[".srt"], fileTypes=["srt"]),
|
||||
"TelegramChatLoader": build_template(suffixes=[".json"], fileTypes=["json"]),
|
||||
"TextLoader": build_template(suffixes=[".txt"], fileTypes=["txt"]),
|
||||
"UnstructuredWordDocumentLoader": build_template(
|
||||
suffixes=[".docx", ".doc"], fileTypes=["docx", "doc"]
|
||||
),
|
||||
}
|
||||
|
||||
def add_extra_fields(self) -> None:
|
||||
name = None
|
||||
if self.template.type_name in self.file_path_templates:
|
||||
self.template.add_field(self.file_path_templates[self.template.type_name])
|
||||
elif self.template.type_name in {
|
||||
"WebBaseLoader",
|
||||
"AZLyricsLoader",
|
||||
"CollegeConfidentialLoader",
|
||||
"HNLoader",
|
||||
"IFixitLoader",
|
||||
"IMSDbLoader",
|
||||
}:
|
||||
name = "web_path"
|
||||
elif self.template.type_name in {"GitbookLoader"}:
|
||||
name = "web_page"
|
||||
elif self.template.type_name in {"ReadTheDocsLoader"}:
|
||||
name = "path"
|
||||
if name:
|
||||
self.template.add_field(
|
||||
TemplateField(
|
||||
field_type="str",
|
||||
required=True,
|
||||
show=True,
|
||||
name=name,
|
||||
value="",
|
||||
display_name="Web Page",
|
||||
)
|
||||
)
|
||||
49
src/backend/langflow/template/frontend_node/textsplitters.py
Normal file
49
src/backend/langflow/template/frontend_node/textsplitters.py
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
from langflow.template.field.base import TemplateField
|
||||
from langflow.template.frontend_node.base import FrontendNode
|
||||
|
||||
|
||||
class TextSplittersFrontendNode(FrontendNode):
|
||||
def add_extra_fields(self) -> None:
|
||||
self.template.add_field(
|
||||
TemplateField(
|
||||
field_type="BaseLoader",
|
||||
required=True,
|
||||
show=True,
|
||||
name="documents",
|
||||
)
|
||||
)
|
||||
name = "separator"
|
||||
if self.template.type_name == "CharacterTextSplitter":
|
||||
name = "separator"
|
||||
elif self.template.type_name == "RecursiveCharacterTextSplitter":
|
||||
name = "separators"
|
||||
self.template.add_field(
|
||||
TemplateField(
|
||||
field_type="str",
|
||||
required=True,
|
||||
show=True,
|
||||
value=".",
|
||||
name=name,
|
||||
display_name="Separator",
|
||||
)
|
||||
)
|
||||
self.template.add_field(
|
||||
TemplateField(
|
||||
field_type="int",
|
||||
required=True,
|
||||
show=True,
|
||||
value=1000,
|
||||
name="chunk_size",
|
||||
display_name="Chunk Size",
|
||||
)
|
||||
)
|
||||
self.template.add_field(
|
||||
TemplateField(
|
||||
field_type="int",
|
||||
required=True,
|
||||
show=True,
|
||||
value=200,
|
||||
name="chunk_overlap",
|
||||
display_name="Chunk Overlap",
|
||||
)
|
||||
)
|
||||
Loading…
Add table
Add a link
Reference in a new issue