diff --git a/src/backend/base/langflow/base/models/model.py b/src/backend/base/langflow/base/models/model.py index 977090818..b38d275f9 100644 --- a/src/backend/base/langflow/base/models/model.py +++ b/src/backend/base/langflow/base/models/model.py @@ -57,7 +57,7 @@ class LCModelComponent(CustomComponent): prompt_tokens = token_usage["prompt_tokens"] total_tokens = token_usage["total_tokens"] finish_reason = response_metadata["finish_reason"] - status_message = f"Tokens:\n- Input: {prompt_tokens}\nOutput: {completion_tokens}\nTotal Tokens: {total_tokens}\nStop Reason: {finish_reason}\nResponse: {content}" + status_message = f"Tokens:\nInput: {prompt_tokens}\nOutput: {completion_tokens}\nTotal Tokens: {total_tokens}\nStop Reason: {finish_reason}\nResponse: {content}" elif all(key in response_metadata for key in anthropic_keys) and all( key in response_metadata["usage"] for key in inner_anthropic_keys ): @@ -65,7 +65,7 @@ class LCModelComponent(CustomComponent): input_tokens = usage["input_tokens"] output_tokens = usage["output_tokens"] stop_reason = response_metadata["stop_reason"] - status_message = f"Tokens:\n- Input: {input_tokens}\n- Output: {output_tokens}\nStop Reason: {stop_reason}\nResponse: {content}" + status_message = f"Tokens:\nInput: {input_tokens}\nOutput: {output_tokens}\nStop Reason: {stop_reason}\nResponse: {content}" else: status_message = f"Response: {content}" else: diff --git a/src/backend/base/langflow/components/experimental/SplitText.py b/src/backend/base/langflow/components/experimental/SplitText.py new file mode 100644 index 000000000..bd2bc921f --- /dev/null +++ b/src/backend/base/langflow/components/experimental/SplitText.py @@ -0,0 +1,49 @@ +from typing import Optional + +from langflow.field_typing import Text +from langflow.interface.custom.custom_component import CustomComponent +from langflow.schema import Record +from langflow.utils.util import unescape_string + + +class SplitTextComponent(CustomComponent): + display_name: str = "Split Text" + description: str = "Split text into chunks of a specified length." + + def build_config(self): + return { + "inputs": { + "display_name": "Inputs", + "info": "Texts to split.", + "input_types": ["Record", "Text"], + }, + "separator": { + "display_name": "Separator", + "info": 'The character to split on. Defaults to " ".', + }, + "truncate_size": { + "display_name": "Truncate Size", + "info": "The maximum length (in number of characters) of each chunk to keep. Defaults to 0 (no truncation).", + }, + } + + def build( + self, + inputs: list[Text], + separator: str = " ", + truncate_size: Optional[int] = 0, + ) -> list[Record]: + separator = unescape_string(separator) + + outputs = [] + for text in inputs: + chunks = text.split(separator) + + if truncate_size: + chunks = [chunk[:truncate_size] for chunk in chunks] + + for chunk in chunks: + outputs.append(Record(text=chunk, data={"parent": text})) + + self.status = outputs + return outputs diff --git a/src/backend/base/langflow/components/helpers/SplitText.py b/src/backend/base/langflow/components/helpers/SplitText.py deleted file mode 100644 index 565ee1ffc..000000000 --- a/src/backend/base/langflow/components/helpers/SplitText.py +++ /dev/null @@ -1,87 +0,0 @@ -from typing import Optional, Union - -from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter -from langchain_core.documents import Document - -from langflow.field_typing import Text -from langflow.interface.custom.custom_component import CustomComponent -from langflow.schema import Record -from langflow.utils.util import unescape_string - - -class SplitTextComponent(CustomComponent): - display_name: str = "Split Text" - description: str = "Split text into chunks of a specified length." - - def build_config(self): - return { - "inputs": { - "display_name": "Inputs", - "info": "Texts to split.", - "input_types": ["Record", "Text"], - }, - "separators": { - "display_name": "Separators", - "info": 'The characters to split on. Defaults to [" "].', - "is_list": True, - }, - "chunk_size": { - "display_name": "Max Chunk Size", - "info": "The maximum length (in number of characters) of each chunk.", - "field_type": "int", - "value": 1000, - }, - "chunk_overlap": { - "display_name": "Chunk Overlap", - "info": "The amount of character overlap between chunks.", - "field_type": "int", - "value": 200, - }, - "recursive": { - "display_name": "Recursive", - }, - "code": {"show": False}, - } - - def build( - self, - inputs: list[Text], - separators: Optional[list[str]] = [" "], - chunk_size: Optional[int] = 1000, - chunk_overlap: Optional[int] = 200, - recursive: bool = False, - ) -> list[Record]: - if separators is None: - separators = [] - separators = [unescape_string(x) for x in separators] - - # Make sure chunk_size and chunk_overlap are ints - if isinstance(chunk_size, str): - chunk_size = int(chunk_size) - if isinstance(chunk_overlap, str): - chunk_overlap = int(chunk_overlap) - splitter: Optional[Union[CharacterTextSplitter, RecursiveCharacterTextSplitter]] = None - if recursive: - splitter = RecursiveCharacterTextSplitter( - separators=separators, - chunk_size=chunk_size, - chunk_overlap=chunk_overlap, - ) - - else: - splitter = CharacterTextSplitter( - separator=separators[0], - chunk_size=chunk_size, - chunk_overlap=chunk_overlap, - ) - - documents = [] - for _input in inputs: - if isinstance(_input, Record): - documents.append(_input.to_lc_document()) - else: - documents.append(Document(page_content=_input)) - - records = self.to_records(splitter.split_documents(documents)) - self.status = records - return records