Refactor SplitText (#1836)
Refactor LCModelComponent status message formatting for better readability
This commit is contained in:
parent
5a21263272
commit
03f781c0f4
3 changed files with 51 additions and 89 deletions
|
|
@ -57,7 +57,7 @@ class LCModelComponent(CustomComponent):
|
|||
prompt_tokens = token_usage["prompt_tokens"]
|
||||
total_tokens = token_usage["total_tokens"]
|
||||
finish_reason = response_metadata["finish_reason"]
|
||||
status_message = f"Tokens:\n- Input: {prompt_tokens}\nOutput: {completion_tokens}\nTotal Tokens: {total_tokens}\nStop Reason: {finish_reason}\nResponse: {content}"
|
||||
status_message = f"Tokens:\nInput: {prompt_tokens}\nOutput: {completion_tokens}\nTotal Tokens: {total_tokens}\nStop Reason: {finish_reason}\nResponse: {content}"
|
||||
elif all(key in response_metadata for key in anthropic_keys) and all(
|
||||
key in response_metadata["usage"] for key in inner_anthropic_keys
|
||||
):
|
||||
|
|
@ -65,7 +65,7 @@ class LCModelComponent(CustomComponent):
|
|||
input_tokens = usage["input_tokens"]
|
||||
output_tokens = usage["output_tokens"]
|
||||
stop_reason = response_metadata["stop_reason"]
|
||||
status_message = f"Tokens:\n- Input: {input_tokens}\n- Output: {output_tokens}\nStop Reason: {stop_reason}\nResponse: {content}"
|
||||
status_message = f"Tokens:\nInput: {input_tokens}\nOutput: {output_tokens}\nStop Reason: {stop_reason}\nResponse: {content}"
|
||||
else:
|
||||
status_message = f"Response: {content}"
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -0,0 +1,49 @@
|
|||
from typing import Optional
|
||||
|
||||
from langflow.field_typing import Text
|
||||
from langflow.interface.custom.custom_component import CustomComponent
|
||||
from langflow.schema import Record
|
||||
from langflow.utils.util import unescape_string
|
||||
|
||||
|
||||
class SplitTextComponent(CustomComponent):
|
||||
display_name: str = "Split Text"
|
||||
description: str = "Split text into chunks of a specified length."
|
||||
|
||||
def build_config(self):
|
||||
return {
|
||||
"inputs": {
|
||||
"display_name": "Inputs",
|
||||
"info": "Texts to split.",
|
||||
"input_types": ["Record", "Text"],
|
||||
},
|
||||
"separator": {
|
||||
"display_name": "Separator",
|
||||
"info": 'The character to split on. Defaults to " ".',
|
||||
},
|
||||
"truncate_size": {
|
||||
"display_name": "Truncate Size",
|
||||
"info": "The maximum length (in number of characters) of each chunk to keep. Defaults to 0 (no truncation).",
|
||||
},
|
||||
}
|
||||
|
||||
def build(
|
||||
self,
|
||||
inputs: list[Text],
|
||||
separator: str = " ",
|
||||
truncate_size: Optional[int] = 0,
|
||||
) -> list[Record]:
|
||||
separator = unescape_string(separator)
|
||||
|
||||
outputs = []
|
||||
for text in inputs:
|
||||
chunks = text.split(separator)
|
||||
|
||||
if truncate_size:
|
||||
chunks = [chunk[:truncate_size] for chunk in chunks]
|
||||
|
||||
for chunk in chunks:
|
||||
outputs.append(Record(text=chunk, data={"parent": text}))
|
||||
|
||||
self.status = outputs
|
||||
return outputs
|
||||
|
|
@ -1,87 +0,0 @@
|
|||
from typing import Optional, Union
|
||||
|
||||
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langflow.field_typing import Text
|
||||
from langflow.interface.custom.custom_component import CustomComponent
|
||||
from langflow.schema import Record
|
||||
from langflow.utils.util import unescape_string
|
||||
|
||||
|
||||
class SplitTextComponent(CustomComponent):
|
||||
display_name: str = "Split Text"
|
||||
description: str = "Split text into chunks of a specified length."
|
||||
|
||||
def build_config(self):
|
||||
return {
|
||||
"inputs": {
|
||||
"display_name": "Inputs",
|
||||
"info": "Texts to split.",
|
||||
"input_types": ["Record", "Text"],
|
||||
},
|
||||
"separators": {
|
||||
"display_name": "Separators",
|
||||
"info": 'The characters to split on. Defaults to [" "].',
|
||||
"is_list": True,
|
||||
},
|
||||
"chunk_size": {
|
||||
"display_name": "Max Chunk Size",
|
||||
"info": "The maximum length (in number of characters) of each chunk.",
|
||||
"field_type": "int",
|
||||
"value": 1000,
|
||||
},
|
||||
"chunk_overlap": {
|
||||
"display_name": "Chunk Overlap",
|
||||
"info": "The amount of character overlap between chunks.",
|
||||
"field_type": "int",
|
||||
"value": 200,
|
||||
},
|
||||
"recursive": {
|
||||
"display_name": "Recursive",
|
||||
},
|
||||
"code": {"show": False},
|
||||
}
|
||||
|
||||
def build(
|
||||
self,
|
||||
inputs: list[Text],
|
||||
separators: Optional[list[str]] = [" "],
|
||||
chunk_size: Optional[int] = 1000,
|
||||
chunk_overlap: Optional[int] = 200,
|
||||
recursive: bool = False,
|
||||
) -> list[Record]:
|
||||
if separators is None:
|
||||
separators = []
|
||||
separators = [unescape_string(x) for x in separators]
|
||||
|
||||
# Make sure chunk_size and chunk_overlap are ints
|
||||
if isinstance(chunk_size, str):
|
||||
chunk_size = int(chunk_size)
|
||||
if isinstance(chunk_overlap, str):
|
||||
chunk_overlap = int(chunk_overlap)
|
||||
splitter: Optional[Union[CharacterTextSplitter, RecursiveCharacterTextSplitter]] = None
|
||||
if recursive:
|
||||
splitter = RecursiveCharacterTextSplitter(
|
||||
separators=separators,
|
||||
chunk_size=chunk_size,
|
||||
chunk_overlap=chunk_overlap,
|
||||
)
|
||||
|
||||
else:
|
||||
splitter = CharacterTextSplitter(
|
||||
separator=separators[0],
|
||||
chunk_size=chunk_size,
|
||||
chunk_overlap=chunk_overlap,
|
||||
)
|
||||
|
||||
documents = []
|
||||
for _input in inputs:
|
||||
if isinstance(_input, Record):
|
||||
documents.append(_input.to_lc_document())
|
||||
else:
|
||||
documents.append(Document(page_content=_input))
|
||||
|
||||
records = self.to_records(splitter.split_documents(documents))
|
||||
self.status = records
|
||||
return records
|
||||
Loading…
Add table
Add a link
Reference in a new issue