From 168a0db5ab0a38b4df4f01bbf9cf28ea119549a7 Mon Sep 17 00:00:00 2001 From: Rodrigo Date: Mon, 17 Jun 2024 22:04:04 -0300 Subject: [PATCH] refactor: Update URLComponent to include text_key in Data objects --- .../base/langflow/components/data/URL.py | 2 +- .../components/experimental/SplitText.py | 10 ++-- .../components/vectorstores/Chroma.py | 49 ++++++++++--------- .../components/vectorstores/base/model.py | 10 +--- src/backend/base/langflow/inputs/__init__.py | 24 ++++----- src/backend/base/langflow/inputs/inputs.py | 7 ++- 6 files changed, 50 insertions(+), 52 deletions(-) diff --git a/src/backend/base/langflow/components/data/URL.py b/src/backend/base/langflow/components/data/URL.py index 0b10a9820..e20b526d7 100644 --- a/src/backend/base/langflow/components/data/URL.py +++ b/src/backend/base/langflow/components/data/URL.py @@ -62,6 +62,6 @@ class URLComponent(Component): urls = [self.ensure_url(url.strip()) for url in self.urls if url.strip()] loader = WebBaseLoader(web_paths=urls, encoding="utf-8") docs = loader.load() - data = [Data(text_key="text", content=doc.page_content, **doc.metadata) for doc in docs] + data = [Data(text=doc.page_content, **doc.metadata) for doc in docs] self.status = data return data diff --git a/src/backend/base/langflow/components/experimental/SplitText.py b/src/backend/base/langflow/components/experimental/SplitText.py index 211c26338..0789d1422 100644 --- a/src/backend/base/langflow/components/experimental/SplitText.py +++ b/src/backend/base/langflow/components/experimental/SplitText.py @@ -67,21 +67,21 @@ class SplitTextComponent(Component): buffer = "" for row in data: - text = row.data.get(text_key, "") - chunks = text.split(separator) + parent = row.data.get(text_key, "") + chunks = parent.split(separator) for chunk in chunks: buffer += chunk while len(buffer) >= max_chunk_size: - results.append(Data(data={"parent": text, "text": buffer[:max_chunk_size]})) + results.append(Data(data={"parent": parent, "text": buffer[:max_chunk_size]})) buffer = buffer[max_chunk_size:] if len(buffer) >= min_chunk_size: - results.append(Data(data={"parent": text, "text": buffer})) + results.append(Data(data={"parent": parent, "text": buffer})) buffer = "" # Handle any remaining text that may not meet the min_chunk_size requirement if buffer: - results.append(Data(data={"parent": text, "text": buffer})) + results.append(Data(parent=parent, text=buffer)) self.status = results return results diff --git a/src/backend/base/langflow/components/vectorstores/Chroma.py b/src/backend/base/langflow/components/vectorstores/Chroma.py index 1db41a159..7ab97fe08 100644 --- a/src/backend/base/langflow/components/vectorstores/Chroma.py +++ b/src/backend/base/langflow/components/vectorstores/Chroma.py @@ -7,7 +7,7 @@ from loguru import logger from langflow.base.vectorstores.utils import chroma_collection_to_data from langflow.components.vectorstores.base.model import LCVectorStoreComponent -from langflow.inputs import BoolInput, DropdownInput, HandleInput, IntInput, StrInput +from langflow.inputs import BoolInput, DropdownInput, HandleInput, IntInput, StrInput, MessageInput, DataInput from langflow.schema import Data if TYPE_CHECKING: @@ -39,12 +39,15 @@ class ChromaVectorStoreComponent(LCVectorStoreComponent): display_name="Code", advanced=True, ), - StrInput( - name="vector_store_inputs", - display_name="Vector Store Inputs", - input_types=["Document", "Data"], + MessageInput( + name="search_query", + display_name="Search Query", is_list=True, ), + DataInput( + name="ingest_data", + display_name="Ingest Data", + ), HandleInput(name="embedding", display_name="Embedding", input_types=["Embeddings"]), StrInput( name="chroma_server_cors_allow_origins", @@ -77,27 +80,19 @@ class ChromaVectorStoreComponent(LCVectorStoreComponent): advanced=True, info="If false, will not add documents that are already in the Vector Store.", ), - BoolInput( - name="add_to_vector_store", - display_name="Add to Vector Store", - info="If true, the Vector Store Inputs will be added to the Vector Store.", - ), - StrInput( - name="search_input", - display_name="Search Input", - ), DropdownInput( name="search_type", display_name="Search Type", options=["Similarity", "MMR"], value="Similarity", + advanced=True ), IntInput( name="number_of_results", display_name="Number of Results", info="Number of results to return.", advanced=True, - value=4, + value=10, ), IntInput( name="limit", @@ -144,9 +139,6 @@ class ChromaVectorStoreComponent(LCVectorStoreComponent): collection_name=self.collection_name, ) - if self.add_to_vector_store: - self._add_documents_to_vector_store(chroma) - self.status = chroma_collection_to_data(chroma.get(self.limit)) return chroma @@ -154,6 +146,10 @@ class ChromaVectorStoreComponent(LCVectorStoreComponent): """ Adds documents to the Vector Store. """ + if not self.ingest_data: + self.status = "" + return + if self.allow_duplicates: stored_data = [] else: @@ -164,7 +160,7 @@ class ChromaVectorStoreComponent(LCVectorStoreComponent): _stored_documents_without_id.append(value) documents = [] - for _input in self.vector_store_inputs or []: + for _input in self.ingest_data or []: if isinstance(_input, Data): if _input not in _stored_documents_without_id: documents.append(_input.to_lc_document()) @@ -181,16 +177,23 @@ class ChromaVectorStoreComponent(LCVectorStoreComponent): """ Search for documents in the Chroma vector store. """ - if not self.search_input: + if not self.search_query.text: + self.status = "" return - vector_store = self._build_chroma() + vector_store = self.build_vector_store() - logger.debug(f"Search input: {self.search_input}") + logger.debug(f"Search input: {self.search_query}") logger.debug(f"Search type: {self.search_type}") logger.debug(f"Number of results: {self.number_of_results}") + if isinstance(self.search_query, list): + if len(self.search_query) > 1: + raise ValueError("Input value must be a single-item list.") + else: + self.search_query = self.search_query[0] + search_results = self.search_with_vector_store( - self.input_value, self.search_type, vector_store, k=self.number_of_results + self.search_query.text, self.search_type, vector_store, k=self.number_of_results ) return search_results diff --git a/src/backend/base/langflow/components/vectorstores/base/model.py b/src/backend/base/langflow/components/vectorstores/base/model.py index 608b234b6..c4bbe83ea 100644 --- a/src/backend/base/langflow/components/vectorstores/base/model.py +++ b/src/backend/base/langflow/components/vectorstores/base/model.py @@ -1,25 +1,19 @@ from typing import List, Union from langchain_core.documents import Document -from langchain_core.retrievers import BaseRetriever -from langchain_core.vectorstores import VectorStore from langflow.custom import Component from langflow.field_typing import Text from langflow.helpers.data import docs_to_data from langflow.schema import Data from langflow.template import Output +from langflow.field_typing import BaseRetriever, VectorStore class LCVectorStoreComponent(Component): outputs = [ Output( - display_name="Vector Store", - name="vector_store", - method="build_vector_store", - ), - Output( - display_name="Base Retriever", + display_name="Retriever", name="base_retriever", method="build_base_retriever", ), diff --git a/src/backend/base/langflow/inputs/__init__.py b/src/backend/base/langflow/inputs/__init__.py index 403adda82..41f03c9d2 100644 --- a/src/backend/base/langflow/inputs/__init__.py +++ b/src/backend/base/langflow/inputs/__init__.py @@ -1,5 +1,6 @@ from .inputs import ( BoolInput, + DataInput, DictInput, DropdownInput, FileInput, @@ -16,18 +17,19 @@ from .inputs import ( ) __all__ = [ + "BoolInput", + "DataInput", + "DictInput", + "DropdownInput", + "FileInput", + "FloatInput", + "HandleInput", + "IntInput", + "MessageInput", + "MultilineInput", + "NestedDictInput", + "PromptInput", "SecretStrInput", "StrInput", - "PromptInput", - "MultilineInput", - "HandleInput", "TextInput", - "BoolInput", - "DropdownInput", - "FloatInput", - "IntInput", - "DictInput", - "MessageInput", - "NestedDictInput", - "FileInput", ] diff --git a/src/backend/base/langflow/inputs/inputs.py b/src/backend/base/langflow/inputs/inputs.py index c01f25130..e34c6910f 100644 --- a/src/backend/base/langflow/inputs/inputs.py +++ b/src/backend/base/langflow/inputs/inputs.py @@ -23,9 +23,8 @@ class HandleInput(BaseInputMixin, ListableInputMixin): field_type: Optional[SerializableFieldTypes] = FieldTypes.OTHER -# class DataInput(HandleInput): -# input_types: list[str] = ["Data"] -# ! Let's add this? +class DataInput(HandleInput): + input_types: list[str] = ["Data"] class PromptInput(BaseInputMixin, ListableInputMixin): @@ -136,7 +135,7 @@ class FileInput(BaseInputMixin, ListableInputMixin, FileMixin): InputTypes = Union[ BoolInput, - # DataInput, # ! Let's add this + DataInput, DictInput, DropdownInput, FileInput,