diff --git a/src/backend/base/langflow/components/data/URL.py b/src/backend/base/langflow/components/data/URL.py index e20b526d7..f8fc12bbf 100644 --- a/src/backend/base/langflow/components/data/URL.py +++ b/src/backend/base/langflow/components/data/URL.py @@ -18,7 +18,6 @@ class URLComponent(Component): name="urls", display_name="URLs", info="Enter one or more URLs, separated by commas.", - value="", is_list=True, ), ] diff --git a/src/backend/base/langflow/components/experimental/SplitText.py b/src/backend/base/langflow/components/experimental/SplitText.py index 0789d1422..5e0d7f178 100644 --- a/src/backend/base/langflow/components/experimental/SplitText.py +++ b/src/backend/base/langflow/components/experimental/SplitText.py @@ -1,45 +1,42 @@ from typing import List +from langchain_text_splitters import CharacterTextSplitter from langflow.custom import Component -from langflow.inputs import HandleInput, IntInput, TextInput +from langflow.inputs import IntInput, TextInput, HandleInput from langflow.schema import Data from langflow.template import Output from langflow.utils.util import unescape_string - class SplitTextComponent(Component): display_name: str = "Split Text" description: str = "Split text into chunks based on specified criteria." icon = "scissors-line-dashed" inputs = [ - HandleInput(name="data", display_name="Data", info="Data with text to split.", input_types=["Data"]), - TextInput( - name="text_key", - display_name="Text Key", - info="The key to access the text content in the Data object.", - value="text", + HandleInput( + name="data_inputs", + display_name="Data Inputs", + info="The data to split.", + input_types=["Data"], + is_list=True, + ), + IntInput( + name="chunk_overlap", + display_name="Chunk Overlap", + info="Number of characters to overlap between chunks.", + value=200, + ), + IntInput( + name="chunk_size", + display_name="Chunk Size", + info="The maximum number of characters in each chunk.", + value=1000, ), TextInput( name="separator", display_name="Separator", - info='The character to split on. Defaults to "\n".', + info="The character to split on. Defaults to newline.", value="\n", - advanced=True, - ), - IntInput( - name="min_chunk_size", - display_name="Minimum Chunk Size", - info="The minimum size of chunks. Smaller chunks will be merged.", - value=10, - advanced=True, - ), - IntInput( - name="max_chunk_size", - display_name="Maximum Chunk Size", - info="The maximum size of chunks. Larger chunks will be split.", - value=200, - advanced=True, ), ] @@ -47,41 +44,26 @@ class SplitTextComponent(Component): Output(display_name="Chunks", name="chunks", method="split_text"), ] + def _docs_to_data(self, docs): + data = [] + for doc in docs: + data.append(Data(text=doc.page_content, data=doc.metadata)) + return data + def split_text(self) -> List[Data]: - data = self.data if isinstance(self.data, list) else [self.data] - text_key = self.text_key separator = unescape_string(self.separator) - min_chunk_size = self.min_chunk_size - max_chunk_size = self.max_chunk_size - results = [] - if not separator: - raise ValueError("Separator cannot be empty.") - if max_chunk_size < 10: - raise ValueError("Maximum chunk size cannot be less than 10 characters.") - if min_chunk_size < 10: - raise ValueError("Minimum chunk size cannot be less than 10 characters.") - if max_chunk_size < min_chunk_size: - raise ValueError("Maximum chunk size cannot be less than minimum chunk size.") + documents = [] + for _input in self.data_inputs: + if isinstance(_input, Data): + documents.append(_input.to_lc_document()) - buffer = "" - - for row in data: - parent = row.data.get(text_key, "") - chunks = parent.split(separator) - - for chunk in chunks: - buffer += chunk - while len(buffer) >= max_chunk_size: - results.append(Data(data={"parent": parent, "text": buffer[:max_chunk_size]})) - buffer = buffer[max_chunk_size:] - if len(buffer) >= min_chunk_size: - results.append(Data(data={"parent": parent, "text": buffer})) - buffer = "" - - # Handle any remaining text that may not meet the min_chunk_size requirement - if buffer: - results.append(Data(parent=parent, text=buffer)) - - self.status = results - return results + splitter = CharacterTextSplitter( + chunk_overlap=self.chunk_overlap, + chunk_size=self.chunk_size, + separator=separator, + ) + docs = splitter.split_documents(documents) + data = self._docs_to_data(docs) + self.status = data + return data diff --git a/src/backend/base/langflow/components/helpers/Memory.py b/src/backend/base/langflow/components/helpers/Memory.py index 8c2231d79..21b2770f0 100644 --- a/src/backend/base/langflow/components/helpers/Memory.py +++ b/src/backend/base/langflow/components/helpers/Memory.py @@ -4,12 +4,13 @@ from langflow.memory import get_messages from langflow.schema import Data from langflow.schema.message import Message from langflow.template import Output +from langflow.helpers.data import data_to_text class MemoryComponent(Component): - display_name = "Memory" + display_name = "Chat Memory" description = "Retrieves stored chat messages." - icon = "history" + icon = "message-square-more" inputs = [ DropdownInput( @@ -47,11 +48,18 @@ class MemoryComponent(Component): info="Order of the messages.", advanced=True, ), + MultilineInput( + name="template", + display_name="Template", + info="The template to use for formatting the data. It can contain the keys {text}, {sender} or any other key in the message data.", + value="{sender_name}: {text}", + advanced=True, + ), ] outputs = [ - Output(display_name="Message Data", name="messages", method="retrieve_messages"), - Output(display_name="Parsed", name="messages_text", method="retrieve_messages_as_text"), + Output(display_name="Chat History", name="messages", method="retrieve_messages"), + Output(display_name="Messages (Text)", name="messages_text", method="retrieve_messages_as_text"), ] def retrieve_messages(self) -> Data: @@ -75,7 +83,6 @@ class MemoryComponent(Component): return messages def retrieve_messages_as_text(self) -> Message: - messages = self.retrieve_messages() - messages_text = "\n".join(["{sender_name}: {text}".format(**message.data) for message in messages]) + messages_text = data_to_text(self.template, self.retrieve_messages()) self.status = messages_text return Message(text=messages_text) diff --git a/src/backend/base/langflow/components/helpers/ParseData.py b/src/backend/base/langflow/components/helpers/ParseData.py index 375830a02..c1a3c7101 100644 --- a/src/backend/base/langflow/components/helpers/ParseData.py +++ b/src/backend/base/langflow/components/helpers/ParseData.py @@ -22,7 +22,7 @@ class ParseDataComponent(Component): name="sep", display_name="Separator", advanced=True, - value='---' + value='\n' ) ] diff --git a/src/backend/base/langflow/components/models/AnthropicModel.py b/src/backend/base/langflow/components/models/AnthropicModel.py index 24aec2384..716afc4d9 100644 --- a/src/backend/base/langflow/components/models/AnthropicModel.py +++ b/src/backend/base/langflow/components/models/AnthropicModel.py @@ -19,6 +19,7 @@ class AnthropicModelComponent(LCModelComponent): name="max_tokens", display_name="Max Tokens", advanced=True, + value=4096, info="The maximum number of tokens to generate. Set to 0 for unlimited tokens.", ), DropdownInput( diff --git a/src/backend/base/langflow/components/vectorstores/Chroma.py b/src/backend/base/langflow/components/vectorstores/Chroma.py index 8a7e34d98..ac3629e13 100644 --- a/src/backend/base/langflow/components/vectorstores/Chroma.py +++ b/src/backend/base/langflow/components/vectorstores/Chroma.py @@ -37,7 +37,6 @@ class ChromaVectorStoreComponent(LCVectorStoreComponent): TextInput( name="search_query", display_name="Search Query", - is_list=True, ), DataInput( name="ingest_data", @@ -146,11 +145,12 @@ class ChromaVectorStoreComponent(LCVectorStoreComponent): self.status = "" return + + _stored_documents_without_id = [] if self.allow_duplicates: stored_data = [] else: stored_data = chroma_collection_to_data(vector_store.get(self.limit)) - _stored_documents_without_id = [] for value in deepcopy(stored_data): del value.id _stored_documents_without_id.append(value) @@ -192,4 +192,5 @@ class ChromaVectorStoreComponent(LCVectorStoreComponent): search_results = self.search_with_vector_store( self.search_query, self.search_type, vector_store, k=self.number_of_results ) + self.status = search_results return search_results diff --git a/src/backend/base/langflow/inputs/inputs.py b/src/backend/base/langflow/inputs/inputs.py index bc3a65339..10163740c 100644 --- a/src/backend/base/langflow/inputs/inputs.py +++ b/src/backend/base/langflow/inputs/inputs.py @@ -123,7 +123,8 @@ class TextInput(StrInput): input_types (list[str]): A list of input types that this component supports. In this case, it supports the "Message" input type. """ - input_types: list[str] = ["Message"] + # ! adding str since it's checked on line 143 + input_types: list[str] = ["Message", "str"] @staticmethod def _validate_value(v: Any, _info):