From ed692530fbab7cbf74f236fc7707a3aca4f5f9a9 Mon Sep 17 00:00:00 2001 From: Irshad Ahamed M B Date: Wed, 6 Nov 2024 17:02:03 +0530 Subject: [PATCH] fix: correct issue with handling float32 (#4312) * Update pinecone.py Error building Component Pinecone: Invalid type for variable '0'. Required value type is float and passed type was float64 at ['values'][0] Fixed the issue by forcing float32 * [autofix.ci] apply automated fixes * Linit Recommendation Handled Linit Recommendation Handled * [autofix.ci] apply automated fixes * refactor: Simplify Float32Embeddings implementation Streamline the Float32Embeddings class by removing unnecessary parent linkage and consolidating the embedding logic, ensuring consistent float32 output for embeddings. * refactor: Re-arranging the code. Co-Authored-By: Gabriel Luiz Freitas Almeida * [autofix.ci] apply automated fixes --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Gabriel Luiz Freitas Almeida --- .../components/vectorstores/pinecone.py | 102 ++++++++++++------ 1 file changed, 67 insertions(+), 35 deletions(-) diff --git a/src/backend/base/langflow/components/vectorstores/pinecone.py b/src/backend/base/langflow/components/vectorstores/pinecone.py index fd7ed81f3..17eafefe5 100644 --- a/src/backend/base/langflow/components/vectorstores/pinecone.py +++ b/src/backend/base/langflow/components/vectorstores/pinecone.py @@ -1,16 +1,9 @@ +import numpy as np from langchain_pinecone import Pinecone from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store from langflow.helpers.data import docs_to_data -from langflow.io import ( - DataInput, - DropdownInput, - HandleInput, - IntInput, - MultilineInput, - SecretStrInput, - StrInput, -) +from langflow.io import DataInput, DropdownInput, HandleInput, IntInput, MultilineInput, SecretStrInput, StrInput from langflow.schema import Data @@ -20,7 +13,6 @@ class PineconeVectorStoreComponent(LCVectorStoreComponent): documentation = "https://python.langchain.com/v0.2/docs/integrations/vectorstores/pinecone/" name = "Pinecone" icon = "Pinecone" - inputs = [ StrInput(name="index_name", display_name="Index Name", required=True), StrInput(name="namespace", display_name="Namespace", info="Namespace for the index."), @@ -57,42 +49,82 @@ class PineconeVectorStoreComponent(LCVectorStoreComponent): @check_cached_vector_store def build_vector_store(self) -> Pinecone: - from langchain_pinecone._utilities import DistanceStrategy - from langchain_pinecone.vectorstores import Pinecone + """Build and return a Pinecone vector store instance.""" + try: + from langchain_pinecone._utilities import DistanceStrategy - distance_strategy = self.distance_strategy.replace(" ", "_").upper() - _distance_strategy = DistanceStrategy[distance_strategy] + # Wrap the embedding model to ensure float32 output + wrapped_embeddings = Float32Embeddings(self.embedding) - pinecone = Pinecone( - index_name=self.index_name, - embedding=self.embedding, - text_key=self.text_key, - namespace=self.namespace, - distance_strategy=_distance_strategy, - pinecone_api_key=self.pinecone_api_key, - ) + # Convert distance strategy + distance_strategy = self.distance_strategy.replace(" ", "_").upper() + distance_strategy = DistanceStrategy[distance_strategy] - documents = [] - for _input in self.ingest_data or []: - if isinstance(_input, Data): - documents.append(_input.to_lc_document()) - else: - documents.append(_input) + # Initialize Pinecone instance with wrapped embeddings + pinecone = Pinecone( + index_name=self.index_name, + embedding=wrapped_embeddings, # Use wrapped embeddings + text_key=self.text_key, + namespace=self.namespace, + distance_strategy=distance_strategy, + pinecone_api_key=self.pinecone_api_key, + ) + except Exception as e: + error_msg = "Error building Pinecone vector store" + raise ValueError(error_msg) from e + else: + # Process documents if any + documents = [] + if self.ingest_data: + for doc in self.ingest_data: + if isinstance(doc, Data): + documents.append(doc.to_lc_document()) + else: + documents.append(doc) - if documents: - pinecone.add_documents(documents) - return pinecone + if documents: + pinecone.add_documents(documents) + + return pinecone def search_documents(self) -> list[Data]: - vector_store = self.build_vector_store() + """Search documents in the vector store.""" + try: + if not self.search_query or not isinstance(self.search_query, str) or not self.search_query.strip(): + return [] - if self.search_query and isinstance(self.search_query, str) and self.search_query.strip(): + vector_store = self.build_vector_store() docs = vector_store.similarity_search( query=self.search_query, k=self.number_of_results, ) - + except Exception as e: + error_msg = "Error searching documents" + raise ValueError(error_msg) from e + else: data = docs_to_data(docs) self.status = data return data - return [] + + +class Float32Embeddings: + """Wrapper class to ensure float32 embeddings.""" + + def __init__(self, base_embeddings): + self.base_embeddings = base_embeddings + + def embed_documents(self, texts): + embeddings = self.base_embeddings.embed_documents(texts) + if isinstance(embeddings, np.ndarray): + return [[self._force_float32(x) for x in vec] for vec in embeddings] + return [[self._force_float32(x) for x in vec] for vec in embeddings] + + def embed_query(self, text): + embedding = self.base_embeddings.embed_query(text) + if isinstance(embedding, np.ndarray): + return [self._force_float32(x) for x in embedding] + return [self._force_float32(x) for x in embedding] + + def _force_float32(self, value): + """Convert any numeric type to Python float.""" + return float(np.float32(value))