added add_extra_fields method to textsplitters

This commit is contained in:
Alexandre Henrique 2023-05-31 15:40:09 -03:00
commit cd2944bac6
5 changed files with 83 additions and 130 deletions

View file

@ -10,7 +10,6 @@ from langflow.utils.util import build_template_from_class
# Assuming necessary imports for Field, Template, and FrontendNode classes
class ChainCreator(LangChainTypeCreator):
type_name: str = "chains"

View file

@ -1,30 +1,28 @@
from typing import Dict, List, Optional, Type
from langflow.interface.base import LangChainTypeCreator
from langflow.template.field.base import TemplateField
from langflow.template.frontend_node.documentloaders import DocumentLoaderFrontNode
from langflow.interface.custom_lists import documentloaders_type_to_cls_dict
from langflow.settings import settings
from langflow.utils.logger import logger
from langflow.utils.util import build_template_from_class
class DocumentLoaderCreator(LangChainTypeCreator):
type_name: str = "documentloaders"
@property
def frontend_node_class(self) -> Type[DocumentLoaderFrontNode]:
def frontend_node_class(self) -> Type[DocumentLoaderFrontNode]:
return DocumentLoaderFrontNode
@property
def type_to_loader_dict(self) -> Dict:
return documentloaders_type_to_cls_dict
def get_signature(self, name: str) -> Optional[Dict]:
"""Get the signature of a document loader."""
try:
return build_template_from_class(
name, documentloaders_type_to_cls_dict
)
return build_template_from_class(name, documentloaders_type_to_cls_dict)
except ValueError as exc:
raise ValueError(f"Documment Loader {name} not found") from exc
except AttributeError as exc:

View file

@ -1,7 +1,7 @@
from typing import Dict, List, Optional, Type
from langflow.interface.base import LangChainTypeCreator
from langflow.template.frontend_node.textsplitters import VectorStoreFrontendNode
from langflow.template.frontend_node.textsplitters import TextSplittersFrontendNode
from langflow.interface.custom_lists import textsplitter_type_to_cls_dict
from langflow.settings import settings
from langflow.utils.logger import logger
@ -12,8 +12,8 @@ class TextSplitterCreator(LangChainTypeCreator):
type_name: str = "textsplitters"
@property
def frontend_node_class(self) -> Type[VectorStoreFrontendNode]:
return VectorStoreFrontendNode
def frontend_node_class(self) -> Type[TextSplittersFrontendNode]:
return TextSplittersFrontendNode
@property
def type_to_loader_dict(self) -> Dict:

View file

@ -1,49 +1,32 @@
from typing import Dict, List, Optional, Type
from langflow.template.field.base import TemplateField
from langflow.template.frontend_node.base import FrontendNode
class DocumentLoaderFrontNode(FrontendNode):
class DocumentLoaderFrontNode(FrontendNode):
@staticmethod
def build_template(suffixes: list, fileTypes: list, name: str = "file_path"
) -> TemplateField:
"""Build a template field for a document loader."""
return TemplateField(
field_type="file",
required=True,
show=True,
name=name,
value="",
suffixes=suffixes,
fileTypes=fileTypes,
)
def build_template(
suffixes: list, fileTypes: list, name: str = "file_path"
) -> TemplateField:
"""Build a template field for a document loader."""
return TemplateField(
field_type="file",
required=True,
show=True,
name=name,
value="",
suffixes=suffixes,
fileTypes=fileTypes,
)
file_path_templates = {
"AirbyteJSONLoader": build_template(
suffixes=[".json"], fileTypes=["json"]
),
"CoNLLULoader": build_template(
suffixes=[".csv"], fileTypes=["csv"]
),
"CSVLoader": build_template(
suffixes=[".csv"], fileTypes=["csv"]
),
"UnstructuredEmailLoader": build_template(
suffixes=[".eml"], fileTypes=["eml"]
),
"EverNoteLoader": build_template(
suffixes=[".xml"], fileTypes=["xml"]
),
"FacebookChatLoader": build_template(
suffixes=[".json"], fileTypes=["json"]
),
"GutenbergLoader": build_template(
suffixes=[".txt"], fileTypes=["txt"]
),
"BSHTMLLoader": build_template(
suffixes=[".html"], fileTypes=["html"]
),
"AirbyteJSONLoader": build_template(suffixes=[".json"], fileTypes=["json"]),
"CoNLLULoader": build_template(suffixes=[".csv"], fileTypes=["csv"]),
"CSVLoader": build_template(suffixes=[".csv"], fileTypes=["csv"]),
"UnstructuredEmailLoader": build_template(suffixes=[".eml"], fileTypes=["eml"]),
"EverNoteLoader": build_template(suffixes=[".xml"], fileTypes=["xml"]),
"FacebookChatLoader": build_template(suffixes=[".json"], fileTypes=["json"]),
"GutenbergLoader": build_template(suffixes=[".txt"], fileTypes=["txt"]),
"BSHTMLLoader": build_template(suffixes=[".html"], fileTypes=["html"]),
"UnstructuredHTMLLoader": build_template(
suffixes=[".html"], fileTypes=["html"]
),
@ -54,28 +37,21 @@ class DocumentLoaderFrontNode(FrontendNode):
"UnstructuredMarkdownLoader": build_template(
suffixes=[".md"], fileTypes=["md"]
),
"PyPDFLoader": build_template(
suffixes=[".pdf"], fileTypes=["pdf"]
),
"PyPDFLoader": build_template(suffixes=[".pdf"], fileTypes=["pdf"]),
"UnstructuredPowerPointLoader": build_template(
suffixes=[".pptx", ".ppt"], fileTypes=["pptx", "ppt"]
),
"SRTLoader": build_template(
suffixes=[".srt"], fileTypes=["srt"]
),
"TelegramChatLoader": build_template(
suffixes=[".json"], fileTypes=["json"]
),
"TextLoader": build_template(
suffixes=[".txt"], fileTypes=["txt"]
),
"SRTLoader": build_template(suffixes=[".srt"], fileTypes=["srt"]),
"TelegramChatLoader": build_template(suffixes=[".json"], fileTypes=["json"]),
"TextLoader": build_template(suffixes=[".txt"], fileTypes=["txt"]),
"UnstructuredWordDocumentLoader": build_template(
suffixes=[".docx", ".doc"], fileTypes=["docx", "doc"]
),
}
def add_extra_fields(self) -> None:
if self.template.type_name in self.file_path_templates:
def add_extra_fields(self) -> None:
name = None
if self.template.type_name in self.file_path_templates:
self.template.add_field(self.file_path_templates[self.template.type_name])
elif self.template.type_name in {
"WebBaseLoader",
@ -85,37 +61,19 @@ class DocumentLoaderFrontNode(FrontendNode):
"IFixitLoader",
"IMSDbLoader",
}:
self.template.add_field(
TemplateField(
field_type="str",
required=True,
show=True,
name="web_path",
value="",
display_name="Web Page",
)
)
name = "web_path"
elif self.template.type_name in {"GitbookLoader"}:
self.template.add_field(
TemplateField(
field_type="str",
required=True,
show=True,
name="web_page",
value="",
display_name="Web Page",
)
)
name = "web_page"
elif self.template.type_name in {"ReadTheDocsLoader"}:
name = "path"
if name:
self.template.add_field(
TemplateField(
field_type="str",
required=True,
show=True,
name="path",
name=name,
value="",
display_name="Web Page",
)
)

View file

@ -1,46 +1,44 @@
from typing import Dict, List, Optional, Type
from langflow.template.field.base import TemplateField
from langflow.template.frontend_node.base import FrontendNode
class VectorStoreFrontendNode(FrontendNode):
class TextSplittersFrontendNode(FrontendNode):
def add_extra_fields(self) -> None:
pass
"""
signature["template"]["documents"] = {
"type": "BaseLoader",
"required": True,
"show": True,
"name": "documents",
}
signature["template"]["separator"] = {
"type": "str",
"required": True,
"show": True,
"value": ".",
"name": "separator",
"display_name": "Separator",
}
signature["template"]["chunk_size"] = {
"type": "int",
"required": True,
"show": True,
"value": 4000,
"name": "chunk_size",
"display_name": "Chunk Size",
}
signature["template"]["chunk_overlap"] = {
"type": "int",
"required": True,
"show": True,
"value": 200,
"name": "chunk_overlap",
"display_name": "Chunk Overlap",
}
"""
self.template.add_field(
TemplateField(
field_type="BaseLoader",
required=True,
show=True,
name="documents",
)
)
self.template.add_field(
TemplateField(
field_type="str",
required=True,
show=True,
value=".",
name="separator",
display_name="Separator",
)
)
self.template.add_field(
TemplateField(
field_type="int",
required=True,
show=True,
value=1000,
name="chunk_size",
display_name="Chunk Size",
)
)
self.template.add_field(
TemplateField(
field_type="int",
required=True,
show=True,
value=200,
name="chunk_overlap",
display_name="Chunk Overlap",
)
)