Refactor SplitText (#1836)

Refactor LCModelComponent status message formatting for better readability
This commit is contained in:
Rodrigo Nader 2024-05-08 09:59:43 -03:00 committed by GitHub
commit 03f781c0f4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 51 additions and 89 deletions

View file

@ -57,7 +57,7 @@ class LCModelComponent(CustomComponent):
prompt_tokens = token_usage["prompt_tokens"]
total_tokens = token_usage["total_tokens"]
finish_reason = response_metadata["finish_reason"]
status_message = f"Tokens:\n- Input: {prompt_tokens}\nOutput: {completion_tokens}\nTotal Tokens: {total_tokens}\nStop Reason: {finish_reason}\nResponse: {content}"
status_message = f"Tokens:\nInput: {prompt_tokens}\nOutput: {completion_tokens}\nTotal Tokens: {total_tokens}\nStop Reason: {finish_reason}\nResponse: {content}"
elif all(key in response_metadata for key in anthropic_keys) and all(
key in response_metadata["usage"] for key in inner_anthropic_keys
):
@ -65,7 +65,7 @@ class LCModelComponent(CustomComponent):
input_tokens = usage["input_tokens"]
output_tokens = usage["output_tokens"]
stop_reason = response_metadata["stop_reason"]
status_message = f"Tokens:\n- Input: {input_tokens}\n- Output: {output_tokens}\nStop Reason: {stop_reason}\nResponse: {content}"
status_message = f"Tokens:\nInput: {input_tokens}\nOutput: {output_tokens}\nStop Reason: {stop_reason}\nResponse: {content}"
else:
status_message = f"Response: {content}"
else:

View file

@ -0,0 +1,49 @@
from typing import Optional
from langflow.field_typing import Text
from langflow.interface.custom.custom_component import CustomComponent
from langflow.schema import Record
from langflow.utils.util import unescape_string
class SplitTextComponent(CustomComponent):
display_name: str = "Split Text"
description: str = "Split text into chunks of a specified length."
def build_config(self):
return {
"inputs": {
"display_name": "Inputs",
"info": "Texts to split.",
"input_types": ["Record", "Text"],
},
"separator": {
"display_name": "Separator",
"info": 'The character to split on. Defaults to " ".',
},
"truncate_size": {
"display_name": "Truncate Size",
"info": "The maximum length (in number of characters) of each chunk to keep. Defaults to 0 (no truncation).",
},
}
def build(
self,
inputs: list[Text],
separator: str = " ",
truncate_size: Optional[int] = 0,
) -> list[Record]:
separator = unescape_string(separator)
outputs = []
for text in inputs:
chunks = text.split(separator)
if truncate_size:
chunks = [chunk[:truncate_size] for chunk in chunks]
for chunk in chunks:
outputs.append(Record(text=chunk, data={"parent": text}))
self.status = outputs
return outputs

View file

@ -1,87 +0,0 @@
from typing import Optional, Union
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain_core.documents import Document
from langflow.field_typing import Text
from langflow.interface.custom.custom_component import CustomComponent
from langflow.schema import Record
from langflow.utils.util import unescape_string
class SplitTextComponent(CustomComponent):
display_name: str = "Split Text"
description: str = "Split text into chunks of a specified length."
def build_config(self):
return {
"inputs": {
"display_name": "Inputs",
"info": "Texts to split.",
"input_types": ["Record", "Text"],
},
"separators": {
"display_name": "Separators",
"info": 'The characters to split on. Defaults to [" "].',
"is_list": True,
},
"chunk_size": {
"display_name": "Max Chunk Size",
"info": "The maximum length (in number of characters) of each chunk.",
"field_type": "int",
"value": 1000,
},
"chunk_overlap": {
"display_name": "Chunk Overlap",
"info": "The amount of character overlap between chunks.",
"field_type": "int",
"value": 200,
},
"recursive": {
"display_name": "Recursive",
},
"code": {"show": False},
}
def build(
self,
inputs: list[Text],
separators: Optional[list[str]] = [" "],
chunk_size: Optional[int] = 1000,
chunk_overlap: Optional[int] = 200,
recursive: bool = False,
) -> list[Record]:
if separators is None:
separators = []
separators = [unescape_string(x) for x in separators]
# Make sure chunk_size and chunk_overlap are ints
if isinstance(chunk_size, str):
chunk_size = int(chunk_size)
if isinstance(chunk_overlap, str):
chunk_overlap = int(chunk_overlap)
splitter: Optional[Union[CharacterTextSplitter, RecursiveCharacterTextSplitter]] = None
if recursive:
splitter = RecursiveCharacterTextSplitter(
separators=separators,
chunk_size=chunk_size,
chunk_overlap=chunk_overlap,
)
else:
splitter = CharacterTextSplitter(
separator=separators[0],
chunk_size=chunk_size,
chunk_overlap=chunk_overlap,
)
documents = []
for _input in inputs:
if isinstance(_input, Record):
documents.append(_input.to_lc_document())
else:
documents.append(Document(page_content=_input))
records = self.to_records(splitter.split_documents(documents))
self.status = records
return records