refactor: Update URLComponent and ParseDataComponent
This commit updates the URLComponent and ParseDataComponent classes in the URL.py and ParseData.py files respectively. In URLComponent, the 'value' attribute in the 'urls' input has been removed, improving code clarity. In ParseDataComponent, the 'value' attribute in the 'sep' input has been changed from '---' to '\n', enhancing the functionality of the component. These changes optimize the code logic and ensure consistency in the codebase.
This commit is contained in:
parent
bac6a8cdff
commit
40fd956c9c
7 changed files with 60 additions and 69 deletions
|
|
@ -18,7 +18,6 @@ class URLComponent(Component):
|
|||
name="urls",
|
||||
display_name="URLs",
|
||||
info="Enter one or more URLs, separated by commas.",
|
||||
value="",
|
||||
is_list=True,
|
||||
),
|
||||
]
|
||||
|
|
|
|||
|
|
@ -1,45 +1,42 @@
|
|||
from typing import List
|
||||
|
||||
from langchain_text_splitters import CharacterTextSplitter
|
||||
from langflow.custom import Component
|
||||
from langflow.inputs import HandleInput, IntInput, TextInput
|
||||
from langflow.inputs import IntInput, TextInput, HandleInput
|
||||
from langflow.schema import Data
|
||||
from langflow.template import Output
|
||||
from langflow.utils.util import unescape_string
|
||||
|
||||
|
||||
class SplitTextComponent(Component):
|
||||
display_name: str = "Split Text"
|
||||
description: str = "Split text into chunks based on specified criteria."
|
||||
icon = "scissors-line-dashed"
|
||||
|
||||
inputs = [
|
||||
HandleInput(name="data", display_name="Data", info="Data with text to split.", input_types=["Data"]),
|
||||
TextInput(
|
||||
name="text_key",
|
||||
display_name="Text Key",
|
||||
info="The key to access the text content in the Data object.",
|
||||
value="text",
|
||||
HandleInput(
|
||||
name="data_inputs",
|
||||
display_name="Data Inputs",
|
||||
info="The data to split.",
|
||||
input_types=["Data"],
|
||||
is_list=True,
|
||||
),
|
||||
IntInput(
|
||||
name="chunk_overlap",
|
||||
display_name="Chunk Overlap",
|
||||
info="Number of characters to overlap between chunks.",
|
||||
value=200,
|
||||
),
|
||||
IntInput(
|
||||
name="chunk_size",
|
||||
display_name="Chunk Size",
|
||||
info="The maximum number of characters in each chunk.",
|
||||
value=1000,
|
||||
),
|
||||
TextInput(
|
||||
name="separator",
|
||||
display_name="Separator",
|
||||
info='The character to split on. Defaults to "\n".',
|
||||
info="The character to split on. Defaults to newline.",
|
||||
value="\n",
|
||||
advanced=True,
|
||||
),
|
||||
IntInput(
|
||||
name="min_chunk_size",
|
||||
display_name="Minimum Chunk Size",
|
||||
info="The minimum size of chunks. Smaller chunks will be merged.",
|
||||
value=10,
|
||||
advanced=True,
|
||||
),
|
||||
IntInput(
|
||||
name="max_chunk_size",
|
||||
display_name="Maximum Chunk Size",
|
||||
info="The maximum size of chunks. Larger chunks will be split.",
|
||||
value=200,
|
||||
advanced=True,
|
||||
),
|
||||
]
|
||||
|
||||
|
|
@ -47,41 +44,26 @@ class SplitTextComponent(Component):
|
|||
Output(display_name="Chunks", name="chunks", method="split_text"),
|
||||
]
|
||||
|
||||
def _docs_to_data(self, docs):
|
||||
data = []
|
||||
for doc in docs:
|
||||
data.append(Data(text=doc.page_content, data=doc.metadata))
|
||||
return data
|
||||
|
||||
def split_text(self) -> List[Data]:
|
||||
data = self.data if isinstance(self.data, list) else [self.data]
|
||||
text_key = self.text_key
|
||||
separator = unescape_string(self.separator)
|
||||
min_chunk_size = self.min_chunk_size
|
||||
max_chunk_size = self.max_chunk_size
|
||||
results = []
|
||||
|
||||
if not separator:
|
||||
raise ValueError("Separator cannot be empty.")
|
||||
if max_chunk_size < 10:
|
||||
raise ValueError("Maximum chunk size cannot be less than 10 characters.")
|
||||
if min_chunk_size < 10:
|
||||
raise ValueError("Minimum chunk size cannot be less than 10 characters.")
|
||||
if max_chunk_size < min_chunk_size:
|
||||
raise ValueError("Maximum chunk size cannot be less than minimum chunk size.")
|
||||
documents = []
|
||||
for _input in self.data_inputs:
|
||||
if isinstance(_input, Data):
|
||||
documents.append(_input.to_lc_document())
|
||||
|
||||
buffer = ""
|
||||
|
||||
for row in data:
|
||||
parent = row.data.get(text_key, "")
|
||||
chunks = parent.split(separator)
|
||||
|
||||
for chunk in chunks:
|
||||
buffer += chunk
|
||||
while len(buffer) >= max_chunk_size:
|
||||
results.append(Data(data={"parent": parent, "text": buffer[:max_chunk_size]}))
|
||||
buffer = buffer[max_chunk_size:]
|
||||
if len(buffer) >= min_chunk_size:
|
||||
results.append(Data(data={"parent": parent, "text": buffer}))
|
||||
buffer = ""
|
||||
|
||||
# Handle any remaining text that may not meet the min_chunk_size requirement
|
||||
if buffer:
|
||||
results.append(Data(parent=parent, text=buffer))
|
||||
|
||||
self.status = results
|
||||
return results
|
||||
splitter = CharacterTextSplitter(
|
||||
chunk_overlap=self.chunk_overlap,
|
||||
chunk_size=self.chunk_size,
|
||||
separator=separator,
|
||||
)
|
||||
docs = splitter.split_documents(documents)
|
||||
data = self._docs_to_data(docs)
|
||||
self.status = data
|
||||
return data
|
||||
|
|
|
|||
|
|
@ -4,12 +4,13 @@ from langflow.memory import get_messages
|
|||
from langflow.schema import Data
|
||||
from langflow.schema.message import Message
|
||||
from langflow.template import Output
|
||||
from langflow.helpers.data import data_to_text
|
||||
|
||||
|
||||
class MemoryComponent(Component):
|
||||
display_name = "Memory"
|
||||
display_name = "Chat Memory"
|
||||
description = "Retrieves stored chat messages."
|
||||
icon = "history"
|
||||
icon = "message-square-more"
|
||||
|
||||
inputs = [
|
||||
DropdownInput(
|
||||
|
|
@ -47,11 +48,18 @@ class MemoryComponent(Component):
|
|||
info="Order of the messages.",
|
||||
advanced=True,
|
||||
),
|
||||
MultilineInput(
|
||||
name="template",
|
||||
display_name="Template",
|
||||
info="The template to use for formatting the data. It can contain the keys {text}, {sender} or any other key in the message data.",
|
||||
value="{sender_name}: {text}",
|
||||
advanced=True,
|
||||
),
|
||||
]
|
||||
|
||||
outputs = [
|
||||
Output(display_name="Message Data", name="messages", method="retrieve_messages"),
|
||||
Output(display_name="Parsed", name="messages_text", method="retrieve_messages_as_text"),
|
||||
Output(display_name="Chat History", name="messages", method="retrieve_messages"),
|
||||
Output(display_name="Messages (Text)", name="messages_text", method="retrieve_messages_as_text"),
|
||||
]
|
||||
|
||||
def retrieve_messages(self) -> Data:
|
||||
|
|
@ -75,7 +83,6 @@ class MemoryComponent(Component):
|
|||
return messages
|
||||
|
||||
def retrieve_messages_as_text(self) -> Message:
|
||||
messages = self.retrieve_messages()
|
||||
messages_text = "\n".join(["{sender_name}: {text}".format(**message.data) for message in messages])
|
||||
messages_text = data_to_text(self.template, self.retrieve_messages())
|
||||
self.status = messages_text
|
||||
return Message(text=messages_text)
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ class ParseDataComponent(Component):
|
|||
name="sep",
|
||||
display_name="Separator",
|
||||
advanced=True,
|
||||
value='---'
|
||||
value='\n'
|
||||
)
|
||||
]
|
||||
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ class AnthropicModelComponent(LCModelComponent):
|
|||
name="max_tokens",
|
||||
display_name="Max Tokens",
|
||||
advanced=True,
|
||||
value=4096,
|
||||
info="The maximum number of tokens to generate. Set to 0 for unlimited tokens.",
|
||||
),
|
||||
DropdownInput(
|
||||
|
|
|
|||
|
|
@ -37,7 +37,6 @@ class ChromaVectorStoreComponent(LCVectorStoreComponent):
|
|||
TextInput(
|
||||
name="search_query",
|
||||
display_name="Search Query",
|
||||
is_list=True,
|
||||
),
|
||||
DataInput(
|
||||
name="ingest_data",
|
||||
|
|
@ -146,11 +145,12 @@ class ChromaVectorStoreComponent(LCVectorStoreComponent):
|
|||
self.status = ""
|
||||
return
|
||||
|
||||
|
||||
_stored_documents_without_id = []
|
||||
if self.allow_duplicates:
|
||||
stored_data = []
|
||||
else:
|
||||
stored_data = chroma_collection_to_data(vector_store.get(self.limit))
|
||||
_stored_documents_without_id = []
|
||||
for value in deepcopy(stored_data):
|
||||
del value.id
|
||||
_stored_documents_without_id.append(value)
|
||||
|
|
@ -192,4 +192,5 @@ class ChromaVectorStoreComponent(LCVectorStoreComponent):
|
|||
search_results = self.search_with_vector_store(
|
||||
self.search_query, self.search_type, vector_store, k=self.number_of_results
|
||||
)
|
||||
self.status = search_results
|
||||
return search_results
|
||||
|
|
|
|||
|
|
@ -123,7 +123,8 @@ class TextInput(StrInput):
|
|||
input_types (list[str]): A list of input types that this component supports. In this case, it supports the "Message" input type.
|
||||
"""
|
||||
|
||||
input_types: list[str] = ["Message"]
|
||||
# ! adding str since it's checked on line 143
|
||||
input_types: list[str] = ["Message", "str"]
|
||||
|
||||
@staticmethod
|
||||
def _validate_value(v: Any, _info):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue