diff --git a/src/backend/base/langflow/components/experimental/SplitText.py b/src/backend/base/langflow/components/experimental/SplitText.py index 3d0b18e5a..d43f3c872 100644 --- a/src/backend/base/langflow/components/experimental/SplitText.py +++ b/src/backend/base/langflow/components/experimental/SplitText.py @@ -1,49 +1,67 @@ -from typing import Optional +from typing import List -from langflow.custom import CustomComponent -from langflow.field_typing import Text +from langflow.custom import Component +from langflow.inputs import IntInput, StrInput, HandleInput from langflow.schema import Data +from langflow.template import Output from langflow.utils.util import unescape_string -class SplitTextComponent(CustomComponent): - display_name: str = "Split Text" - description: str = "Split text into chunks of a specified length." +class SplitContentComponent(Component): + display_name: str = "Split Content" + description: str = "Split textual content into chunks of a specified length." + icon = "split" - def build_config(self): - return { - "inputs": { - "display_name": "Inputs", - "info": "Texts to split.", - "input_types": ["Data", "Text"], - }, - "separator": { - "display_name": "Separator", - "info": 'The character to split on. Defaults to " ".', - }, - "truncate_size": { - "display_name": "Truncate Size", - "info": "The maximum length (in number of characters) of each chunk to keep. Defaults to 0 (no truncation).", - }, - } + inputs = [ + HandleInput( + name="data", + display_name="Data", + info="Data with text to split.", + input_types=["Data"] + ), + StrInput( + name="content_key", + display_name="Content Key", + info="The key to access the text content in the Data object.", + value="content", + ), + StrInput( + name="separator", + display_name="Separator", + info='The character to split on. Defaults to "\n".', + value="\n", + advanced=True + ), + IntInput( + name="chunk_size", + display_name="Chunk Size", + info="The maximum length (in number of characters) of each chunk. Defaults to 0 (no chunking).", + value=0, + advanced=True + ), + ] - def build( - self, - inputs: list[Text], - separator: str = " ", - truncate_size: Optional[int] = 0, - ) -> list[Data]: - separator = unescape_string(separator) + outputs = [ + Output(display_name="Chunks", name="chunks", method="split_text"), + ] - outputs = [] - for text in inputs: - chunks = text.split(separator) + def split_text(self) -> List[Data]: + data = self.data if isinstance(self.data, list) else [self.data] + content_key = self.content_key + separator = unescape_string(self.separator) + chunk_size = self.chunk_size + results = [] - if truncate_size: - chunks = [chunk[:truncate_size] for chunk in chunks] + for row in data: + content = row.data.get(content_key, '') + if chunk_size > 0: + chunks = [content[i:i + chunk_size] for i in range(0, len(content), chunk_size)] + else: + chunks = content.split(separator) for chunk in chunks: - outputs.append(Data(data={"parent": text, "text": chunk})) + if chunk.strip(): + results.append(Data(data={"parent": content, "text": chunk})) - self.status = outputs - return outputs + self.status = results + return results diff --git a/src/backend/base/langflow/components/experimental/__init__.py b/src/backend/base/langflow/components/experimental/__init__.py index 3c922da33..d67a125c8 100644 --- a/src/backend/base/langflow/components/experimental/__init__.py +++ b/src/backend/base/langflow/components/experimental/__init__.py @@ -16,14 +16,15 @@ __all__ = [ "ClearMessageHistoryComponent", "ExtractKeyFromDataComponent", "FlowToolComponent", - "ListFlowsComponent", "ListenComponent", + "ListFlowsComponent", "MergeDataComponent", "NotifyComponent", "PythonFunctionComponent", + "PythonFunctionComponent", "RunFlowComponent", "RunnableExecComponent", "SQLExecutorComponent", "SubFlowComponent", - "PythonFunctionComponent", + "SplitContentComponent", ]