Refactor SplitTextComponent class to accept inputs of type Record and Text

This commit is contained in:
Gabriel Luiz Freitas Almeida 2024-04-04 00:22:37 -03:00
commit 5cade5dca8

View file

@ -1,14 +1,11 @@
from typing import Optional
from langchain.text_splitter import (
RecursiveCharacterTextSplitter,
CharacterTextSplitter,
)
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain_core.documents import Document
from langflow.field_typing import Text
from langflow.interface.custom.custom_component import CustomComponent
from langflow.schema import Record
from langflow.field_typing import Text
from langflow.utils.util import unescape_string
@ -18,10 +15,10 @@ class SplitTextComponent(CustomComponent):
def build_config(self):
return {
"texts": {
"display_name": "Texts",
"inputs": {
"display_name": "Inputs",
"info": "Texts to split.",
"input_types": ["Text"],
"input_types": ["Record", "Text"],
},
"separators": {
"display_name": "Separators",
@ -48,7 +45,7 @@ class SplitTextComponent(CustomComponent):
def build(
self,
texts: list[Text],
inputs: list[Text],
separators: Optional[list[str]] = [" "],
chunk_size: Optional[int] = 1000,
chunk_overlap: Optional[int] = 200,
@ -77,9 +74,11 @@ class SplitTextComponent(CustomComponent):
)
documents = []
for _text in texts:
# documents.append(_input.to_lc_document())
documents.append(Document(page_content=_text))
for _input in inputs:
if isinstance(_input, Record):
documents.append(_input.to_lc_document())
else:
documents.append(Document(page_content=_input))
records = self.to_records(splitter.split_documents(documents))
self.status = records