refactor: Rename SplitContentComponent to SplitTextComponent and update related variables and descriptions
This commit is contained in:
parent
ef7e36ee14
commit
6b5d4798ac
1 changed files with 25 additions and 26 deletions
|
|
@ -7,18 +7,18 @@ from langflow.template import Output
|
|||
from langflow.utils.util import unescape_string
|
||||
|
||||
|
||||
class SplitContentComponent(Component):
|
||||
display_name: str = "Split Content"
|
||||
description: str = "Split textual content into chunks based on specified criteria."
|
||||
class SplitTextComponent(Component):
|
||||
display_name: str = "Split Text"
|
||||
description: str = "Split text into chunks based on specified criteria."
|
||||
icon = "split"
|
||||
|
||||
inputs = [
|
||||
HandleInput(name="data", display_name="Data", info="Data with text to split.", input_types=["Data"]),
|
||||
TextInput(
|
||||
name="content_key",
|
||||
display_name="Content Key",
|
||||
name="text_key",
|
||||
display_name="Text Key",
|
||||
info="The key to access the text content in the Data object.",
|
||||
value="content",
|
||||
value="text",
|
||||
),
|
||||
TextInput(
|
||||
name="separator",
|
||||
|
|
@ -27,25 +27,18 @@ class SplitContentComponent(Component):
|
|||
value="\n",
|
||||
advanced=True,
|
||||
),
|
||||
IntInput(
|
||||
name="chunk_size",
|
||||
display_name="Chunk Size",
|
||||
info="The target length (in number of characters) of each chunk.",
|
||||
value=0,
|
||||
advanced=True,
|
||||
),
|
||||
IntInput(
|
||||
name="min_chunk_size",
|
||||
display_name="Minimum Chunk Size",
|
||||
info="The minimum size of chunks. Smaller chunks will be merged.",
|
||||
value=0,
|
||||
value=10,
|
||||
advanced=True,
|
||||
),
|
||||
IntInput(
|
||||
name="max_chunk_size",
|
||||
display_name="Maximum Chunk Size",
|
||||
info="The maximum size of chunks. Larger chunks will be split.",
|
||||
value=0,
|
||||
value=200,
|
||||
advanced=True,
|
||||
),
|
||||
]
|
||||
|
|
@ -56,33 +49,39 @@ class SplitContentComponent(Component):
|
|||
|
||||
def split_text(self) -> List[Data]:
|
||||
data = self.data if isinstance(self.data, list) else [self.data]
|
||||
content_key = self.content_key
|
||||
text_key = self.text_key
|
||||
separator = unescape_string(self.separator)
|
||||
chunk_size = self.chunk_size
|
||||
min_chunk_size = self.min_chunk_size
|
||||
max_chunk_size = self.max_chunk_size
|
||||
results = []
|
||||
|
||||
if not separator:
|
||||
raise ValueError("Separator cannot be empty.")
|
||||
if max_chunk_size < 10:
|
||||
raise ValueError("Maximum chunk size cannot be less than 10 characters.")
|
||||
if min_chunk_size < 10:
|
||||
raise ValueError("Minimum chunk size cannot be less than 10 characters.")
|
||||
if max_chunk_size < min_chunk_size:
|
||||
raise ValueError("Maximum chunk size cannot be less than minimum chunk size.")
|
||||
|
||||
buffer = ""
|
||||
|
||||
for row in data:
|
||||
content = row.data.get(content_key, "")
|
||||
if chunk_size > 0:
|
||||
chunks = [content[i : i + chunk_size] for i in range(0, len(content), chunk_size)]
|
||||
else:
|
||||
chunks = content.split(separator)
|
||||
text = row.data.get(text_key, "")
|
||||
chunks = text.split(separator)
|
||||
|
||||
for chunk in chunks:
|
||||
buffer += chunk
|
||||
while len(buffer) >= max_chunk_size:
|
||||
results.append(Data(data={"parent": content, "chunk": buffer[:max_chunk_size]}))
|
||||
results.append(Data(data={"parent": text, "chunk": buffer[:max_chunk_size]}))
|
||||
buffer = buffer[max_chunk_size:]
|
||||
if len(buffer) >= min_chunk_size:
|
||||
results.append(Data(data={"parent": content, "chunk": buffer}))
|
||||
results.append(Data(data={"parent": text, "chunk": buffer}))
|
||||
buffer = ""
|
||||
|
||||
# Handle any remaining content that may not meet the min_chunk_size requirement
|
||||
# Handle any remaining text that may not meet the min_chunk_size requirement
|
||||
if buffer:
|
||||
results.append(Data(data={"parent": content, "chunk": buffer}))
|
||||
results.append(Data(data={"parent": text, "chunk": buffer}))
|
||||
|
||||
self.status = results
|
||||
return results
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue