fix: correct issue with handling float32 (#4312)

* Update pinecone.py

Error building Component Pinecone: Invalid type for variable '0'. Required value type is float and passed type was float64 at ['values'][0]

Fixed the issue by forcing float32

* [autofix.ci] apply automated fixes

* Linit Recommendation Handled

Linit Recommendation Handled

* [autofix.ci] apply automated fixes

* refactor: Simplify Float32Embeddings implementation

Streamline the Float32Embeddings class by removing unnecessary parent linkage and consolidating the embedding logic, ensuring consistent float32 output for embeddings.

* refactor: Re-arranging the code.

Co-Authored-By: Gabriel Luiz Freitas Almeida <gabriel@langflow.org>

* [autofix.ci] apply automated fixes

---------

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Gabriel Luiz Freitas Almeida <gabriel@langflow.org>
This commit is contained in:
Irshad Ahamed M B 2024-11-06 17:02:03 +05:30 committed by GitHub
commit ed692530fb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1,16 +1,9 @@
import numpy as np
from langchain_pinecone import Pinecone
from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store
from langflow.helpers.data import docs_to_data
from langflow.io import (
DataInput,
DropdownInput,
HandleInput,
IntInput,
MultilineInput,
SecretStrInput,
StrInput,
)
from langflow.io import DataInput, DropdownInput, HandleInput, IntInput, MultilineInput, SecretStrInput, StrInput
from langflow.schema import Data
@ -20,7 +13,6 @@ class PineconeVectorStoreComponent(LCVectorStoreComponent):
documentation = "https://python.langchain.com/v0.2/docs/integrations/vectorstores/pinecone/"
name = "Pinecone"
icon = "Pinecone"
inputs = [
StrInput(name="index_name", display_name="Index Name", required=True),
StrInput(name="namespace", display_name="Namespace", info="Namespace for the index."),
@ -57,42 +49,82 @@ class PineconeVectorStoreComponent(LCVectorStoreComponent):
@check_cached_vector_store
def build_vector_store(self) -> Pinecone:
from langchain_pinecone._utilities import DistanceStrategy
from langchain_pinecone.vectorstores import Pinecone
"""Build and return a Pinecone vector store instance."""
try:
from langchain_pinecone._utilities import DistanceStrategy
distance_strategy = self.distance_strategy.replace(" ", "_").upper()
_distance_strategy = DistanceStrategy[distance_strategy]
# Wrap the embedding model to ensure float32 output
wrapped_embeddings = Float32Embeddings(self.embedding)
pinecone = Pinecone(
index_name=self.index_name,
embedding=self.embedding,
text_key=self.text_key,
namespace=self.namespace,
distance_strategy=_distance_strategy,
pinecone_api_key=self.pinecone_api_key,
)
# Convert distance strategy
distance_strategy = self.distance_strategy.replace(" ", "_").upper()
distance_strategy = DistanceStrategy[distance_strategy]
documents = []
for _input in self.ingest_data or []:
if isinstance(_input, Data):
documents.append(_input.to_lc_document())
else:
documents.append(_input)
# Initialize Pinecone instance with wrapped embeddings
pinecone = Pinecone(
index_name=self.index_name,
embedding=wrapped_embeddings, # Use wrapped embeddings
text_key=self.text_key,
namespace=self.namespace,
distance_strategy=distance_strategy,
pinecone_api_key=self.pinecone_api_key,
)
except Exception as e:
error_msg = "Error building Pinecone vector store"
raise ValueError(error_msg) from e
else:
# Process documents if any
documents = []
if self.ingest_data:
for doc in self.ingest_data:
if isinstance(doc, Data):
documents.append(doc.to_lc_document())
else:
documents.append(doc)
if documents:
pinecone.add_documents(documents)
return pinecone
if documents:
pinecone.add_documents(documents)
return pinecone
def search_documents(self) -> list[Data]:
vector_store = self.build_vector_store()
"""Search documents in the vector store."""
try:
if not self.search_query or not isinstance(self.search_query, str) or not self.search_query.strip():
return []
if self.search_query and isinstance(self.search_query, str) and self.search_query.strip():
vector_store = self.build_vector_store()
docs = vector_store.similarity_search(
query=self.search_query,
k=self.number_of_results,
)
except Exception as e:
error_msg = "Error searching documents"
raise ValueError(error_msg) from e
else:
data = docs_to_data(docs)
self.status = data
return data
return []
class Float32Embeddings:
"""Wrapper class to ensure float32 embeddings."""
def __init__(self, base_embeddings):
self.base_embeddings = base_embeddings
def embed_documents(self, texts):
embeddings = self.base_embeddings.embed_documents(texts)
if isinstance(embeddings, np.ndarray):
return [[self._force_float32(x) for x in vec] for vec in embeddings]
return [[self._force_float32(x) for x in vec] for vec in embeddings]
def embed_query(self, text):
embedding = self.base_embeddings.embed_query(text)
if isinstance(embedding, np.ndarray):
return [self._force_float32(x) for x in embedding]
return [self._force_float32(x) for x in embedding]
def _force_float32(self, value):
"""Convert any numeric type to Python float."""
return float(np.float32(value))