added add_extra_fields method to textsplitters
This commit is contained in:
parent
5291e981e2
commit
cd2944bac6
5 changed files with 83 additions and 130 deletions
|
|
@ -10,7 +10,6 @@ from langflow.utils.util import build_template_from_class
|
|||
|
||||
# Assuming necessary imports for Field, Template, and FrontendNode classes
|
||||
|
||||
|
||||
class ChainCreator(LangChainTypeCreator):
|
||||
type_name: str = "chains"
|
||||
|
||||
|
|
|
|||
|
|
@ -1,30 +1,28 @@
|
|||
from typing import Dict, List, Optional, Type
|
||||
|
||||
from langflow.interface.base import LangChainTypeCreator
|
||||
from langflow.template.field.base import TemplateField
|
||||
from langflow.template.frontend_node.documentloaders import DocumentLoaderFrontNode
|
||||
from langflow.interface.custom_lists import documentloaders_type_to_cls_dict
|
||||
from langflow.settings import settings
|
||||
from langflow.utils.logger import logger
|
||||
from langflow.utils.util import build_template_from_class
|
||||
|
||||
|
||||
class DocumentLoaderCreator(LangChainTypeCreator):
|
||||
type_name: str = "documentloaders"
|
||||
|
||||
@property
|
||||
def frontend_node_class(self) -> Type[DocumentLoaderFrontNode]:
|
||||
def frontend_node_class(self) -> Type[DocumentLoaderFrontNode]:
|
||||
return DocumentLoaderFrontNode
|
||||
|
||||
@property
|
||||
def type_to_loader_dict(self) -> Dict:
|
||||
return documentloaders_type_to_cls_dict
|
||||
|
||||
|
||||
def get_signature(self, name: str) -> Optional[Dict]:
|
||||
"""Get the signature of a document loader."""
|
||||
try:
|
||||
return build_template_from_class(
|
||||
name, documentloaders_type_to_cls_dict
|
||||
)
|
||||
return build_template_from_class(name, documentloaders_type_to_cls_dict)
|
||||
except ValueError as exc:
|
||||
raise ValueError(f"Documment Loader {name} not found") from exc
|
||||
except AttributeError as exc:
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
from typing import Dict, List, Optional, Type
|
||||
|
||||
from langflow.interface.base import LangChainTypeCreator
|
||||
from langflow.template.frontend_node.textsplitters import VectorStoreFrontendNode
|
||||
from langflow.template.frontend_node.textsplitters import TextSplittersFrontendNode
|
||||
from langflow.interface.custom_lists import textsplitter_type_to_cls_dict
|
||||
from langflow.settings import settings
|
||||
from langflow.utils.logger import logger
|
||||
|
|
@ -12,8 +12,8 @@ class TextSplitterCreator(LangChainTypeCreator):
|
|||
type_name: str = "textsplitters"
|
||||
|
||||
@property
|
||||
def frontend_node_class(self) -> Type[VectorStoreFrontendNode]:
|
||||
return VectorStoreFrontendNode
|
||||
def frontend_node_class(self) -> Type[TextSplittersFrontendNode]:
|
||||
return TextSplittersFrontendNode
|
||||
|
||||
@property
|
||||
def type_to_loader_dict(self) -> Dict:
|
||||
|
|
|
|||
|
|
@ -1,49 +1,32 @@
|
|||
from typing import Dict, List, Optional, Type
|
||||
|
||||
from langflow.template.field.base import TemplateField
|
||||
from langflow.template.frontend_node.base import FrontendNode
|
||||
|
||||
class DocumentLoaderFrontNode(FrontendNode):
|
||||
|
||||
class DocumentLoaderFrontNode(FrontendNode):
|
||||
@staticmethod
|
||||
def build_template(suffixes: list, fileTypes: list, name: str = "file_path"
|
||||
) -> TemplateField:
|
||||
"""Build a template field for a document loader."""
|
||||
return TemplateField(
|
||||
field_type="file",
|
||||
required=True,
|
||||
show=True,
|
||||
name=name,
|
||||
value="",
|
||||
suffixes=suffixes,
|
||||
fileTypes=fileTypes,
|
||||
)
|
||||
|
||||
def build_template(
|
||||
suffixes: list, fileTypes: list, name: str = "file_path"
|
||||
) -> TemplateField:
|
||||
"""Build a template field for a document loader."""
|
||||
return TemplateField(
|
||||
field_type="file",
|
||||
required=True,
|
||||
show=True,
|
||||
name=name,
|
||||
value="",
|
||||
suffixes=suffixes,
|
||||
fileTypes=fileTypes,
|
||||
)
|
||||
|
||||
file_path_templates = {
|
||||
"AirbyteJSONLoader": build_template(
|
||||
suffixes=[".json"], fileTypes=["json"]
|
||||
),
|
||||
"CoNLLULoader": build_template(
|
||||
suffixes=[".csv"], fileTypes=["csv"]
|
||||
),
|
||||
"CSVLoader": build_template(
|
||||
suffixes=[".csv"], fileTypes=["csv"]
|
||||
),
|
||||
"UnstructuredEmailLoader": build_template(
|
||||
suffixes=[".eml"], fileTypes=["eml"]
|
||||
),
|
||||
"EverNoteLoader": build_template(
|
||||
suffixes=[".xml"], fileTypes=["xml"]
|
||||
),
|
||||
"FacebookChatLoader": build_template(
|
||||
suffixes=[".json"], fileTypes=["json"]
|
||||
),
|
||||
"GutenbergLoader": build_template(
|
||||
suffixes=[".txt"], fileTypes=["txt"]
|
||||
),
|
||||
"BSHTMLLoader": build_template(
|
||||
suffixes=[".html"], fileTypes=["html"]
|
||||
),
|
||||
"AirbyteJSONLoader": build_template(suffixes=[".json"], fileTypes=["json"]),
|
||||
"CoNLLULoader": build_template(suffixes=[".csv"], fileTypes=["csv"]),
|
||||
"CSVLoader": build_template(suffixes=[".csv"], fileTypes=["csv"]),
|
||||
"UnstructuredEmailLoader": build_template(suffixes=[".eml"], fileTypes=["eml"]),
|
||||
"EverNoteLoader": build_template(suffixes=[".xml"], fileTypes=["xml"]),
|
||||
"FacebookChatLoader": build_template(suffixes=[".json"], fileTypes=["json"]),
|
||||
"GutenbergLoader": build_template(suffixes=[".txt"], fileTypes=["txt"]),
|
||||
"BSHTMLLoader": build_template(suffixes=[".html"], fileTypes=["html"]),
|
||||
"UnstructuredHTMLLoader": build_template(
|
||||
suffixes=[".html"], fileTypes=["html"]
|
||||
),
|
||||
|
|
@ -54,28 +37,21 @@ class DocumentLoaderFrontNode(FrontendNode):
|
|||
"UnstructuredMarkdownLoader": build_template(
|
||||
suffixes=[".md"], fileTypes=["md"]
|
||||
),
|
||||
"PyPDFLoader": build_template(
|
||||
suffixes=[".pdf"], fileTypes=["pdf"]
|
||||
),
|
||||
"PyPDFLoader": build_template(suffixes=[".pdf"], fileTypes=["pdf"]),
|
||||
"UnstructuredPowerPointLoader": build_template(
|
||||
suffixes=[".pptx", ".ppt"], fileTypes=["pptx", "ppt"]
|
||||
),
|
||||
"SRTLoader": build_template(
|
||||
suffixes=[".srt"], fileTypes=["srt"]
|
||||
),
|
||||
"TelegramChatLoader": build_template(
|
||||
suffixes=[".json"], fileTypes=["json"]
|
||||
),
|
||||
"TextLoader": build_template(
|
||||
suffixes=[".txt"], fileTypes=["txt"]
|
||||
),
|
||||
"SRTLoader": build_template(suffixes=[".srt"], fileTypes=["srt"]),
|
||||
"TelegramChatLoader": build_template(suffixes=[".json"], fileTypes=["json"]),
|
||||
"TextLoader": build_template(suffixes=[".txt"], fileTypes=["txt"]),
|
||||
"UnstructuredWordDocumentLoader": build_template(
|
||||
suffixes=[".docx", ".doc"], fileTypes=["docx", "doc"]
|
||||
),
|
||||
}
|
||||
|
||||
def add_extra_fields(self) -> None:
|
||||
if self.template.type_name in self.file_path_templates:
|
||||
|
||||
def add_extra_fields(self) -> None:
|
||||
name = None
|
||||
if self.template.type_name in self.file_path_templates:
|
||||
self.template.add_field(self.file_path_templates[self.template.type_name])
|
||||
elif self.template.type_name in {
|
||||
"WebBaseLoader",
|
||||
|
|
@ -85,37 +61,19 @@ class DocumentLoaderFrontNode(FrontendNode):
|
|||
"IFixitLoader",
|
||||
"IMSDbLoader",
|
||||
}:
|
||||
self.template.add_field(
|
||||
TemplateField(
|
||||
field_type="str",
|
||||
required=True,
|
||||
show=True,
|
||||
name="web_path",
|
||||
value="",
|
||||
display_name="Web Page",
|
||||
)
|
||||
)
|
||||
name = "web_path"
|
||||
elif self.template.type_name in {"GitbookLoader"}:
|
||||
self.template.add_field(
|
||||
TemplateField(
|
||||
field_type="str",
|
||||
required=True,
|
||||
show=True,
|
||||
name="web_page",
|
||||
value="",
|
||||
display_name="Web Page",
|
||||
)
|
||||
)
|
||||
name = "web_page"
|
||||
elif self.template.type_name in {"ReadTheDocsLoader"}:
|
||||
name = "path"
|
||||
if name:
|
||||
self.template.add_field(
|
||||
TemplateField(
|
||||
field_type="str",
|
||||
required=True,
|
||||
show=True,
|
||||
name="path",
|
||||
name=name,
|
||||
value="",
|
||||
display_name="Web Page",
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -1,46 +1,44 @@
|
|||
from typing import Dict, List, Optional, Type
|
||||
|
||||
from langflow.template.field.base import TemplateField
|
||||
from langflow.template.frontend_node.base import FrontendNode
|
||||
|
||||
class VectorStoreFrontendNode(FrontendNode):
|
||||
|
||||
|
||||
class TextSplittersFrontendNode(FrontendNode):
|
||||
def add_extra_fields(self) -> None:
|
||||
pass
|
||||
|
||||
"""
|
||||
signature["template"]["documents"] = {
|
||||
"type": "BaseLoader",
|
||||
"required": True,
|
||||
"show": True,
|
||||
"name": "documents",
|
||||
}
|
||||
|
||||
signature["template"]["separator"] = {
|
||||
"type": "str",
|
||||
"required": True,
|
||||
"show": True,
|
||||
"value": ".",
|
||||
"name": "separator",
|
||||
"display_name": "Separator",
|
||||
}
|
||||
|
||||
signature["template"]["chunk_size"] = {
|
||||
"type": "int",
|
||||
"required": True,
|
||||
"show": True,
|
||||
"value": 4000,
|
||||
"name": "chunk_size",
|
||||
"display_name": "Chunk Size",
|
||||
}
|
||||
|
||||
signature["template"]["chunk_overlap"] = {
|
||||
"type": "int",
|
||||
"required": True,
|
||||
"show": True,
|
||||
"value": 200,
|
||||
"name": "chunk_overlap",
|
||||
"display_name": "Chunk Overlap",
|
||||
}
|
||||
"""
|
||||
|
||||
self.template.add_field(
|
||||
TemplateField(
|
||||
field_type="BaseLoader",
|
||||
required=True,
|
||||
show=True,
|
||||
name="documents",
|
||||
)
|
||||
)
|
||||
self.template.add_field(
|
||||
TemplateField(
|
||||
field_type="str",
|
||||
required=True,
|
||||
show=True,
|
||||
value=".",
|
||||
name="separator",
|
||||
display_name="Separator",
|
||||
)
|
||||
)
|
||||
self.template.add_field(
|
||||
TemplateField(
|
||||
field_type="int",
|
||||
required=True,
|
||||
show=True,
|
||||
value=1000,
|
||||
name="chunk_size",
|
||||
display_name="Chunk Size",
|
||||
)
|
||||
)
|
||||
self.template.add_field(
|
||||
TemplateField(
|
||||
field_type="int",
|
||||
required=True,
|
||||
show=True,
|
||||
value=200,
|
||||
name="chunk_overlap",
|
||||
display_name="Chunk Overlap",
|
||||
)
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue