✨ feat(documentloaders.py): add metadata field to document loader template
The function build_template was renamed to build_file_field to improve naming consistency. A metadata field of type dict was added to the document loader template to allow for additional information to be passed along with the document. The format_field method was updated to show the metadata field when it is present.
🔨 refactor(documentloaders.py): rename build_template function to build_file_field for better semantics
This commit is contained in:
parent
b8141dca7d
commit
796ce298f6
1 changed files with 39 additions and 18 deletions
|
|
@ -2,7 +2,7 @@ from langflow.template.field.base import TemplateField
|
|||
from langflow.template.frontend_node.base import FrontendNode
|
||||
|
||||
|
||||
def build_template(
|
||||
def build_file_field(
|
||||
suffixes: list, fileTypes: list, name: str = "file_path"
|
||||
) -> TemplateField:
|
||||
"""Build a template field for a document loader."""
|
||||
|
|
@ -19,32 +19,34 @@ def build_template(
|
|||
|
||||
class DocumentLoaderFrontNode(FrontendNode):
|
||||
file_path_templates = {
|
||||
"AirbyteJSONLoader": build_template(suffixes=[".json"], fileTypes=["json"]),
|
||||
"CoNLLULoader": build_template(suffixes=[".csv"], fileTypes=["csv"]),
|
||||
"CSVLoader": build_template(suffixes=[".csv"], fileTypes=["csv"]),
|
||||
"UnstructuredEmailLoader": build_template(suffixes=[".eml"], fileTypes=["eml"]),
|
||||
"EverNoteLoader": build_template(suffixes=[".xml"], fileTypes=["xml"]),
|
||||
"FacebookChatLoader": build_template(suffixes=[".json"], fileTypes=["json"]),
|
||||
"GutenbergLoader": build_template(suffixes=[".txt"], fileTypes=["txt"]),
|
||||
"BSHTMLLoader": build_template(suffixes=[".html"], fileTypes=["html"]),
|
||||
"UnstructuredHTMLLoader": build_template(
|
||||
"AirbyteJSONLoader": build_file_field(suffixes=[".json"], fileTypes=["json"]),
|
||||
"CoNLLULoader": build_file_field(suffixes=[".csv"], fileTypes=["csv"]),
|
||||
"CSVLoader": build_file_field(suffixes=[".csv"], fileTypes=["csv"]),
|
||||
"UnstructuredEmailLoader": build_file_field(
|
||||
suffixes=[".eml"], fileTypes=["eml"]
|
||||
),
|
||||
"EverNoteLoader": build_file_field(suffixes=[".xml"], fileTypes=["xml"]),
|
||||
"FacebookChatLoader": build_file_field(suffixes=[".json"], fileTypes=["json"]),
|
||||
"GutenbergLoader": build_file_field(suffixes=[".txt"], fileTypes=["txt"]),
|
||||
"BSHTMLLoader": build_file_field(suffixes=[".html"], fileTypes=["html"]),
|
||||
"UnstructuredHTMLLoader": build_file_field(
|
||||
suffixes=[".html"], fileTypes=["html"]
|
||||
),
|
||||
"UnstructuredImageLoader": build_template(
|
||||
"UnstructuredImageLoader": build_file_field(
|
||||
suffixes=[".jpg", ".jpeg", ".png", ".gif", ".bmp"],
|
||||
fileTypes=["jpg", "jpeg", "png", "gif", "bmp"],
|
||||
),
|
||||
"UnstructuredMarkdownLoader": build_template(
|
||||
"UnstructuredMarkdownLoader": build_file_field(
|
||||
suffixes=[".md"], fileTypes=["md"]
|
||||
),
|
||||
"PyPDFLoader": build_template(suffixes=[".pdf"], fileTypes=["pdf"]),
|
||||
"UnstructuredPowerPointLoader": build_template(
|
||||
"PyPDFLoader": build_file_field(suffixes=[".pdf"], fileTypes=["pdf"]),
|
||||
"UnstructuredPowerPointLoader": build_file_field(
|
||||
suffixes=[".pptx", ".ppt"], fileTypes=["pptx", "ppt"]
|
||||
),
|
||||
"SRTLoader": build_template(suffixes=[".srt"], fileTypes=["srt"]),
|
||||
"TelegramChatLoader": build_template(suffixes=[".json"], fileTypes=["json"]),
|
||||
"TextLoader": build_template(suffixes=[".txt"], fileTypes=["txt"]),
|
||||
"UnstructuredWordDocumentLoader": build_template(
|
||||
"SRTLoader": build_file_field(suffixes=[".srt"], fileTypes=["srt"]),
|
||||
"TelegramChatLoader": build_file_field(suffixes=[".json"], fileTypes=["json"]),
|
||||
"TextLoader": build_file_field(suffixes=[".txt"], fileTypes=["txt"]),
|
||||
"UnstructuredWordDocumentLoader": build_file_field(
|
||||
suffixes=[".docx", ".doc"], fileTypes=["docx", "doc"]
|
||||
),
|
||||
}
|
||||
|
|
@ -90,3 +92,22 @@ class DocumentLoaderFrontNode(FrontendNode):
|
|||
display_name="glob",
|
||||
)
|
||||
)
|
||||
# add a metadata field of type dict
|
||||
self.template.add_field(
|
||||
TemplateField(
|
||||
field_type="code",
|
||||
required=True,
|
||||
show=True,
|
||||
name="metadata",
|
||||
value="{}",
|
||||
display_name="Metadata",
|
||||
multiline=False,
|
||||
)
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def format_field(field: TemplateField, name: str | None = None) -> None:
|
||||
FrontendNode.format_field(field, name)
|
||||
if field.name == "metadata":
|
||||
field.show = True
|
||||
field.advanced = False
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue