refactor: Refactor SplitText component to SplitContentComponent

This commit is contained in:
Rodrigo 2024-06-15 22:47:35 -03:00
commit 4f0f8f3e0e
2 changed files with 58 additions and 39 deletions

View file

@ -1,49 +1,67 @@
from typing import Optional
from typing import List
from langflow.custom import CustomComponent
from langflow.field_typing import Text
from langflow.custom import Component
from langflow.inputs import IntInput, StrInput, HandleInput
from langflow.schema import Data
from langflow.template import Output
from langflow.utils.util import unescape_string
class SplitTextComponent(CustomComponent):
display_name: str = "Split Text"
description: str = "Split text into chunks of a specified length."
class SplitContentComponent(Component):
display_name: str = "Split Content"
description: str = "Split textual content into chunks of a specified length."
icon = "split"
def build_config(self):
return {
"inputs": {
"display_name": "Inputs",
"info": "Texts to split.",
"input_types": ["Data", "Text"],
},
"separator": {
"display_name": "Separator",
"info": 'The character to split on. Defaults to " ".',
},
"truncate_size": {
"display_name": "Truncate Size",
"info": "The maximum length (in number of characters) of each chunk to keep. Defaults to 0 (no truncation).",
},
}
inputs = [
HandleInput(
name="data",
display_name="Data",
info="Data with text to split.",
input_types=["Data"]
),
StrInput(
name="content_key",
display_name="Content Key",
info="The key to access the text content in the Data object.",
value="content",
),
StrInput(
name="separator",
display_name="Separator",
info='The character to split on. Defaults to "\n".',
value="\n",
advanced=True
),
IntInput(
name="chunk_size",
display_name="Chunk Size",
info="The maximum length (in number of characters) of each chunk. Defaults to 0 (no chunking).",
value=0,
advanced=True
),
]
def build(
self,
inputs: list[Text],
separator: str = " ",
truncate_size: Optional[int] = 0,
) -> list[Data]:
separator = unescape_string(separator)
outputs = [
Output(display_name="Chunks", name="chunks", method="split_text"),
]
outputs = []
for text in inputs:
chunks = text.split(separator)
def split_text(self) -> List[Data]:
data = self.data if isinstance(self.data, list) else [self.data]
content_key = self.content_key
separator = unescape_string(self.separator)
chunk_size = self.chunk_size
results = []
if truncate_size:
chunks = [chunk[:truncate_size] for chunk in chunks]
for row in data:
content = row.data.get(content_key, '')
if chunk_size > 0:
chunks = [content[i:i + chunk_size] for i in range(0, len(content), chunk_size)]
else:
chunks = content.split(separator)
for chunk in chunks:
outputs.append(Data(data={"parent": text, "text": chunk}))
if chunk.strip():
results.append(Data(data={"parent": content, "text": chunk}))
self.status = outputs
return outputs
self.status = results
return results

View file

@ -16,14 +16,15 @@ __all__ = [
"ClearMessageHistoryComponent",
"ExtractKeyFromDataComponent",
"FlowToolComponent",
"ListFlowsComponent",
"ListenComponent",
"ListFlowsComponent",
"MergeDataComponent",
"NotifyComponent",
"PythonFunctionComponent",
"PythonFunctionComponent",
"RunFlowComponent",
"RunnableExecComponent",
"SQLExecutorComponent",
"SubFlowComponent",
"PythonFunctionComponent",
"SplitContentComponent",
]