refactor: Refactor SplitText component to SplitContentComponent
This commit is contained in:
parent
0d609b9376
commit
4f0f8f3e0e
2 changed files with 58 additions and 39 deletions
|
|
@ -1,49 +1,67 @@
|
|||
from typing import Optional
|
||||
from typing import List
|
||||
|
||||
from langflow.custom import CustomComponent
|
||||
from langflow.field_typing import Text
|
||||
from langflow.custom import Component
|
||||
from langflow.inputs import IntInput, StrInput, HandleInput
|
||||
from langflow.schema import Data
|
||||
from langflow.template import Output
|
||||
from langflow.utils.util import unescape_string
|
||||
|
||||
|
||||
class SplitTextComponent(CustomComponent):
|
||||
display_name: str = "Split Text"
|
||||
description: str = "Split text into chunks of a specified length."
|
||||
class SplitContentComponent(Component):
|
||||
display_name: str = "Split Content"
|
||||
description: str = "Split textual content into chunks of a specified length."
|
||||
icon = "split"
|
||||
|
||||
def build_config(self):
|
||||
return {
|
||||
"inputs": {
|
||||
"display_name": "Inputs",
|
||||
"info": "Texts to split.",
|
||||
"input_types": ["Data", "Text"],
|
||||
},
|
||||
"separator": {
|
||||
"display_name": "Separator",
|
||||
"info": 'The character to split on. Defaults to " ".',
|
||||
},
|
||||
"truncate_size": {
|
||||
"display_name": "Truncate Size",
|
||||
"info": "The maximum length (in number of characters) of each chunk to keep. Defaults to 0 (no truncation).",
|
||||
},
|
||||
}
|
||||
inputs = [
|
||||
HandleInput(
|
||||
name="data",
|
||||
display_name="Data",
|
||||
info="Data with text to split.",
|
||||
input_types=["Data"]
|
||||
),
|
||||
StrInput(
|
||||
name="content_key",
|
||||
display_name="Content Key",
|
||||
info="The key to access the text content in the Data object.",
|
||||
value="content",
|
||||
),
|
||||
StrInput(
|
||||
name="separator",
|
||||
display_name="Separator",
|
||||
info='The character to split on. Defaults to "\n".',
|
||||
value="\n",
|
||||
advanced=True
|
||||
),
|
||||
IntInput(
|
||||
name="chunk_size",
|
||||
display_name="Chunk Size",
|
||||
info="The maximum length (in number of characters) of each chunk. Defaults to 0 (no chunking).",
|
||||
value=0,
|
||||
advanced=True
|
||||
),
|
||||
]
|
||||
|
||||
def build(
|
||||
self,
|
||||
inputs: list[Text],
|
||||
separator: str = " ",
|
||||
truncate_size: Optional[int] = 0,
|
||||
) -> list[Data]:
|
||||
separator = unescape_string(separator)
|
||||
outputs = [
|
||||
Output(display_name="Chunks", name="chunks", method="split_text"),
|
||||
]
|
||||
|
||||
outputs = []
|
||||
for text in inputs:
|
||||
chunks = text.split(separator)
|
||||
def split_text(self) -> List[Data]:
|
||||
data = self.data if isinstance(self.data, list) else [self.data]
|
||||
content_key = self.content_key
|
||||
separator = unescape_string(self.separator)
|
||||
chunk_size = self.chunk_size
|
||||
results = []
|
||||
|
||||
if truncate_size:
|
||||
chunks = [chunk[:truncate_size] for chunk in chunks]
|
||||
for row in data:
|
||||
content = row.data.get(content_key, '')
|
||||
if chunk_size > 0:
|
||||
chunks = [content[i:i + chunk_size] for i in range(0, len(content), chunk_size)]
|
||||
else:
|
||||
chunks = content.split(separator)
|
||||
|
||||
for chunk in chunks:
|
||||
outputs.append(Data(data={"parent": text, "text": chunk}))
|
||||
if chunk.strip():
|
||||
results.append(Data(data={"parent": content, "text": chunk}))
|
||||
|
||||
self.status = outputs
|
||||
return outputs
|
||||
self.status = results
|
||||
return results
|
||||
|
|
|
|||
|
|
@ -16,14 +16,15 @@ __all__ = [
|
|||
"ClearMessageHistoryComponent",
|
||||
"ExtractKeyFromDataComponent",
|
||||
"FlowToolComponent",
|
||||
"ListFlowsComponent",
|
||||
"ListenComponent",
|
||||
"ListFlowsComponent",
|
||||
"MergeDataComponent",
|
||||
"NotifyComponent",
|
||||
"PythonFunctionComponent",
|
||||
"PythonFunctionComponent",
|
||||
"RunFlowComponent",
|
||||
"RunnableExecComponent",
|
||||
"SQLExecutorComponent",
|
||||
"SubFlowComponent",
|
||||
"PythonFunctionComponent",
|
||||
"SplitContentComponent",
|
||||
]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue