refactor: Update VectaraVectorStoreComponent to handle embedding input and add documents to vector store

This commit is contained in:
Cezar Vasconcelos 2024-06-24 00:57:36 +00:00 committed by Gabriel Luiz Freitas Almeida
commit 783475e50d

View file

@ -1,30 +1,45 @@
from typing import List
from typing import List, TYPE_CHECKING
from langchain_community.embeddings import FakeEmbeddings
from langchain_community.vectorstores import Vectara
from loguru import logger
from langflow.base.vectorstores.model import LCVectorStoreComponent
from langflow.helpers.data import docs_to_data
from langflow.io import IntInput, StrInput, SecretStrInput, DataInput, MultilineInput
from langflow.io import HandleInput, IntInput, Output, SecretStrInput, StrInput, TextInput
from langflow.schema import Data
if TYPE_CHECKING:
from langchain_community.vectorstores import Vectara
class VectaraVectorStoreComponent(LCVectorStoreComponent):
display_name = "Vectara"
description = "Vectara Vector Store with search capabilities"
documentation = "https://python.langchain.com/v0.2/docs/integrations/vectorstores/vectara/"
"""
Vectara Vector Store with search capabilities
"""
display_name: str = "Vectara"
description: str = "Vectara Vector Store with search capabilities"
documentation = "https://python.langchain.com/docs/modules/data_connection/vectorstores/integrations/vectara"
icon = "Vectara"
inputs = [
StrInput(name="vectara_customer_id", display_name="Vectara Customer ID", required=True),
StrInput(name="vectara_corpus_id", display_name="Vectara Corpus ID", required=True),
SecretStrInput(name="vectara_api_key", display_name="Vectara API Key", required=True),
MultilineInput(name="search_query", display_name="Search Query"),
DataInput(
HandleInput(
name="embedding",
display_name="Embedding",
input_types=["Embeddings"],
),
HandleInput(
name="ingest_data",
display_name="Vector Store Inputs",
display_name="Ingest Data",
input_types=["Document", "Data"],
is_list=True,
),
TextInput(
name="search_query",
display_name="Search Query",
),
IntInput(
name="number_of_results",
display_name="Number of Results",
@ -34,11 +49,33 @@ class VectaraVectorStoreComponent(LCVectorStoreComponent):
),
]
def build_vector_store(self) -> Vectara:
return self._build_vectara()
def build_vector_store(self) -> "Vectara":
"""
Builds the Vectara object.
"""
try:
from langchain_community.vectorstores import Vectara
except ImportError:
raise ImportError(
"Could not import Vectara. Please install it with `pip install langchain-community`."
)
def _build_vectara(self) -> Vectara:
source = "Langflow"
vectara = Vectara(
vectara_customer_id=self.vectara_customer_id,
vectara_corpus_id=self.vectara_corpus_id,
vectara_api_key=self.vectara_api_key,
)
self._add_documents_to_vector_store(vectara)
return vectara
def _add_documents_to_vector_store(self, vector_store: "Vectara") -> None:
"""
Adds documents to the Vector Store.
"""
if not self.ingest_data:
self.status = "No documents to add to Vectara"
return
documents = []
for _input in self.ingest_data or []:
@ -48,24 +85,15 @@ class VectaraVectorStoreComponent(LCVectorStoreComponent):
documents.append(_input)
if documents:
return Vectara.from_documents(
documents=documents,
embedding=FakeEmbeddings(size=768),
vectara_customer_id=self.vectara_customer_id,
vectara_corpus_id=self.vectara_corpus_id,
vectara_api_key=self.vectara_api_key,
source=source,
)
return Vectara(
vectara_customer_id=self.vectara_customer_id,
vectara_corpus_id=self.vectara_corpus_id,
vectara_api_key=self.vectara_api_key,
source=source,
)
logger.debug(f"Adding {len(documents)} documents to Vectara.")
vector_store.add_documents(documents)
self.status = f"Added {len(documents)} documents to Vectara"
else:
logger.debug("No documents to add to Vectara.")
self.status = "No valid documents to add to Vectara"
def search_documents(self) -> List[Data]:
vector_store = self._build_vectara()
vector_store = self.build_vector_store()
if self.search_query and isinstance(self.search_query, str) and self.search_query.strip():
docs = vector_store.similarity_search(
@ -74,7 +102,8 @@ class VectaraVectorStoreComponent(LCVectorStoreComponent):
)
data = docs_to_data(docs)
self.status = data
self.status = f"Found {len(data)} results for the query: {self.search_query}"
return data
else:
self.status = "No search query provided"
return []