From 783475e50dd902fd2409048e9ba26602110e4f93 Mon Sep 17 00:00:00 2001 From: Cezar Vasconcelos Date: Mon, 24 Jun 2024 00:57:36 +0000 Subject: [PATCH] refactor: Update VectaraVectorStoreComponent to handle embedding input and add documents to vector store --- .../components/vectorstores/Vectara.py | 89 ++++++++++++------- 1 file changed, 59 insertions(+), 30 deletions(-) diff --git a/src/backend/base/langflow/components/vectorstores/Vectara.py b/src/backend/base/langflow/components/vectorstores/Vectara.py index 674a35ff5..198baeb8d 100644 --- a/src/backend/base/langflow/components/vectorstores/Vectara.py +++ b/src/backend/base/langflow/components/vectorstores/Vectara.py @@ -1,30 +1,45 @@ -from typing import List +from typing import List, TYPE_CHECKING -from langchain_community.embeddings import FakeEmbeddings from langchain_community.vectorstores import Vectara +from loguru import logger from langflow.base.vectorstores.model import LCVectorStoreComponent from langflow.helpers.data import docs_to_data -from langflow.io import IntInput, StrInput, SecretStrInput, DataInput, MultilineInput +from langflow.io import HandleInput, IntInput, Output, SecretStrInput, StrInput, TextInput from langflow.schema import Data +if TYPE_CHECKING: + from langchain_community.vectorstores import Vectara class VectaraVectorStoreComponent(LCVectorStoreComponent): - display_name = "Vectara" - description = "Vectara Vector Store with search capabilities" - documentation = "https://python.langchain.com/v0.2/docs/integrations/vectorstores/vectara/" + """ + Vectara Vector Store with search capabilities + """ + + display_name: str = "Vectara" + description: str = "Vectara Vector Store with search capabilities" + documentation = "https://python.langchain.com/docs/modules/data_connection/vectorstores/integrations/vectara" icon = "Vectara" inputs = [ StrInput(name="vectara_customer_id", display_name="Vectara Customer ID", required=True), StrInput(name="vectara_corpus_id", display_name="Vectara Corpus ID", required=True), SecretStrInput(name="vectara_api_key", display_name="Vectara API Key", required=True), - MultilineInput(name="search_query", display_name="Search Query"), - DataInput( + HandleInput( + name="embedding", + display_name="Embedding", + input_types=["Embeddings"], + ), + HandleInput( name="ingest_data", - display_name="Vector Store Inputs", + display_name="Ingest Data", + input_types=["Document", "Data"], is_list=True, ), + TextInput( + name="search_query", + display_name="Search Query", + ), IntInput( name="number_of_results", display_name="Number of Results", @@ -34,11 +49,33 @@ class VectaraVectorStoreComponent(LCVectorStoreComponent): ), ] - def build_vector_store(self) -> Vectara: - return self._build_vectara() + def build_vector_store(self) -> "Vectara": + """ + Builds the Vectara object. + """ + try: + from langchain_community.vectorstores import Vectara + except ImportError: + raise ImportError( + "Could not import Vectara. Please install it with `pip install langchain-community`." + ) - def _build_vectara(self) -> Vectara: - source = "Langflow" + vectara = Vectara( + vectara_customer_id=self.vectara_customer_id, + vectara_corpus_id=self.vectara_corpus_id, + vectara_api_key=self.vectara_api_key, + ) + + self._add_documents_to_vector_store(vectara) + return vectara + + def _add_documents_to_vector_store(self, vector_store: "Vectara") -> None: + """ + Adds documents to the Vector Store. + """ + if not self.ingest_data: + self.status = "No documents to add to Vectara" + return documents = [] for _input in self.ingest_data or []: @@ -48,24 +85,15 @@ class VectaraVectorStoreComponent(LCVectorStoreComponent): documents.append(_input) if documents: - return Vectara.from_documents( - documents=documents, - embedding=FakeEmbeddings(size=768), - vectara_customer_id=self.vectara_customer_id, - vectara_corpus_id=self.vectara_corpus_id, - vectara_api_key=self.vectara_api_key, - source=source, - ) - - return Vectara( - vectara_customer_id=self.vectara_customer_id, - vectara_corpus_id=self.vectara_corpus_id, - vectara_api_key=self.vectara_api_key, - source=source, - ) + logger.debug(f"Adding {len(documents)} documents to Vectara.") + vector_store.add_documents(documents) + self.status = f"Added {len(documents)} documents to Vectara" + else: + logger.debug("No documents to add to Vectara.") + self.status = "No valid documents to add to Vectara" def search_documents(self) -> List[Data]: - vector_store = self._build_vectara() + vector_store = self.build_vector_store() if self.search_query and isinstance(self.search_query, str) and self.search_query.strip(): docs = vector_store.similarity_search( @@ -74,7 +102,8 @@ class VectaraVectorStoreComponent(LCVectorStoreComponent): ) data = docs_to_data(docs) - self.status = data + self.status = f"Found {len(data)} results for the query: {self.search_query}" return data else: + self.status = "No search query provided" return []