refactor: Update URLComponent to include text_key in Data objects

This commit is contained in:
Rodrigo 2024-06-17 22:04:04 -03:00
commit 168a0db5ab
6 changed files with 50 additions and 52 deletions

View file

@ -62,6 +62,6 @@ class URLComponent(Component):
urls = [self.ensure_url(url.strip()) for url in self.urls if url.strip()]
loader = WebBaseLoader(web_paths=urls, encoding="utf-8")
docs = loader.load()
data = [Data(text_key="text", content=doc.page_content, **doc.metadata) for doc in docs]
data = [Data(text=doc.page_content, **doc.metadata) for doc in docs]
self.status = data
return data

View file

@ -67,21 +67,21 @@ class SplitTextComponent(Component):
buffer = ""
for row in data:
text = row.data.get(text_key, "")
chunks = text.split(separator)
parent = row.data.get(text_key, "")
chunks = parent.split(separator)
for chunk in chunks:
buffer += chunk
while len(buffer) >= max_chunk_size:
results.append(Data(data={"parent": text, "text": buffer[:max_chunk_size]}))
results.append(Data(data={"parent": parent, "text": buffer[:max_chunk_size]}))
buffer = buffer[max_chunk_size:]
if len(buffer) >= min_chunk_size:
results.append(Data(data={"parent": text, "text": buffer}))
results.append(Data(data={"parent": parent, "text": buffer}))
buffer = ""
# Handle any remaining text that may not meet the min_chunk_size requirement
if buffer:
results.append(Data(data={"parent": text, "text": buffer}))
results.append(Data(parent=parent, text=buffer))
self.status = results
return results

View file

@ -7,7 +7,7 @@ from loguru import logger
from langflow.base.vectorstores.utils import chroma_collection_to_data
from langflow.components.vectorstores.base.model import LCVectorStoreComponent
from langflow.inputs import BoolInput, DropdownInput, HandleInput, IntInput, StrInput
from langflow.inputs import BoolInput, DropdownInput, HandleInput, IntInput, StrInput, MessageInput, DataInput
from langflow.schema import Data
if TYPE_CHECKING:
@ -39,12 +39,15 @@ class ChromaVectorStoreComponent(LCVectorStoreComponent):
display_name="Code",
advanced=True,
),
StrInput(
name="vector_store_inputs",
display_name="Vector Store Inputs",
input_types=["Document", "Data"],
MessageInput(
name="search_query",
display_name="Search Query",
is_list=True,
),
DataInput(
name="ingest_data",
display_name="Ingest Data",
),
HandleInput(name="embedding", display_name="Embedding", input_types=["Embeddings"]),
StrInput(
name="chroma_server_cors_allow_origins",
@ -77,27 +80,19 @@ class ChromaVectorStoreComponent(LCVectorStoreComponent):
advanced=True,
info="If false, will not add documents that are already in the Vector Store.",
),
BoolInput(
name="add_to_vector_store",
display_name="Add to Vector Store",
info="If true, the Vector Store Inputs will be added to the Vector Store.",
),
StrInput(
name="search_input",
display_name="Search Input",
),
DropdownInput(
name="search_type",
display_name="Search Type",
options=["Similarity", "MMR"],
value="Similarity",
advanced=True
),
IntInput(
name="number_of_results",
display_name="Number of Results",
info="Number of results to return.",
advanced=True,
value=4,
value=10,
),
IntInput(
name="limit",
@ -144,9 +139,6 @@ class ChromaVectorStoreComponent(LCVectorStoreComponent):
collection_name=self.collection_name,
)
if self.add_to_vector_store:
self._add_documents_to_vector_store(chroma)
self.status = chroma_collection_to_data(chroma.get(self.limit))
return chroma
@ -154,6 +146,10 @@ class ChromaVectorStoreComponent(LCVectorStoreComponent):
"""
Adds documents to the Vector Store.
"""
if not self.ingest_data:
self.status = ""
return
if self.allow_duplicates:
stored_data = []
else:
@ -164,7 +160,7 @@ class ChromaVectorStoreComponent(LCVectorStoreComponent):
_stored_documents_without_id.append(value)
documents = []
for _input in self.vector_store_inputs or []:
for _input in self.ingest_data or []:
if isinstance(_input, Data):
if _input not in _stored_documents_without_id:
documents.append(_input.to_lc_document())
@ -181,16 +177,23 @@ class ChromaVectorStoreComponent(LCVectorStoreComponent):
"""
Search for documents in the Chroma vector store.
"""
if not self.search_input:
if not self.search_query.text:
self.status = ""
return
vector_store = self._build_chroma()
vector_store = self.build_vector_store()
logger.debug(f"Search input: {self.search_input}")
logger.debug(f"Search input: {self.search_query}")
logger.debug(f"Search type: {self.search_type}")
logger.debug(f"Number of results: {self.number_of_results}")
if isinstance(self.search_query, list):
if len(self.search_query) > 1:
raise ValueError("Input value must be a single-item list.")
else:
self.search_query = self.search_query[0]
search_results = self.search_with_vector_store(
self.input_value, self.search_type, vector_store, k=self.number_of_results
self.search_query.text, self.search_type, vector_store, k=self.number_of_results
)
return search_results

View file

@ -1,25 +1,19 @@
from typing import List, Union
from langchain_core.documents import Document
from langchain_core.retrievers import BaseRetriever
from langchain_core.vectorstores import VectorStore
from langflow.custom import Component
from langflow.field_typing import Text
from langflow.helpers.data import docs_to_data
from langflow.schema import Data
from langflow.template import Output
from langflow.field_typing import BaseRetriever, VectorStore
class LCVectorStoreComponent(Component):
outputs = [
Output(
display_name="Vector Store",
name="vector_store",
method="build_vector_store",
),
Output(
display_name="Base Retriever",
display_name="Retriever",
name="base_retriever",
method="build_base_retriever",
),

View file

@ -1,5 +1,6 @@
from .inputs import (
BoolInput,
DataInput,
DictInput,
DropdownInput,
FileInput,
@ -16,18 +17,19 @@ from .inputs import (
)
__all__ = [
"BoolInput",
"DataInput",
"DictInput",
"DropdownInput",
"FileInput",
"FloatInput",
"HandleInput",
"IntInput",
"MessageInput",
"MultilineInput",
"NestedDictInput",
"PromptInput",
"SecretStrInput",
"StrInput",
"PromptInput",
"MultilineInput",
"HandleInput",
"TextInput",
"BoolInput",
"DropdownInput",
"FloatInput",
"IntInput",
"DictInput",
"MessageInput",
"NestedDictInput",
"FileInput",
]

View file

@ -23,9 +23,8 @@ class HandleInput(BaseInputMixin, ListableInputMixin):
field_type: Optional[SerializableFieldTypes] = FieldTypes.OTHER
# class DataInput(HandleInput):
# input_types: list[str] = ["Data"]
# ! Let's add this?
class DataInput(HandleInput):
input_types: list[str] = ["Data"]
class PromptInput(BaseInputMixin, ListableInputMixin):
@ -136,7 +135,7 @@ class FileInput(BaseInputMixin, ListableInputMixin, FileMixin):
InputTypes = Union[
BoolInput,
# DataInput, # ! Let's add this
DataInput,
DictInput,
DropdownInput,
FileInput,