fix: correct issue with handling float32 (#4312)
* Update pinecone.py Error building Component Pinecone: Invalid type for variable '0'. Required value type is float and passed type was float64 at ['values'][0] Fixed the issue by forcing float32 * [autofix.ci] apply automated fixes * Linit Recommendation Handled Linit Recommendation Handled * [autofix.ci] apply automated fixes * refactor: Simplify Float32Embeddings implementation Streamline the Float32Embeddings class by removing unnecessary parent linkage and consolidating the embedding logic, ensuring consistent float32 output for embeddings. * refactor: Re-arranging the code. Co-Authored-By: Gabriel Luiz Freitas Almeida <gabriel@langflow.org> * [autofix.ci] apply automated fixes --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Gabriel Luiz Freitas Almeida <gabriel@langflow.org>
This commit is contained in:
parent
e853a13d57
commit
ed692530fb
1 changed files with 67 additions and 35 deletions
|
|
@ -1,16 +1,9 @@
|
|||
import numpy as np
|
||||
from langchain_pinecone import Pinecone
|
||||
|
||||
from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store
|
||||
from langflow.helpers.data import docs_to_data
|
||||
from langflow.io import (
|
||||
DataInput,
|
||||
DropdownInput,
|
||||
HandleInput,
|
||||
IntInput,
|
||||
MultilineInput,
|
||||
SecretStrInput,
|
||||
StrInput,
|
||||
)
|
||||
from langflow.io import DataInput, DropdownInput, HandleInput, IntInput, MultilineInput, SecretStrInput, StrInput
|
||||
from langflow.schema import Data
|
||||
|
||||
|
||||
|
|
@ -20,7 +13,6 @@ class PineconeVectorStoreComponent(LCVectorStoreComponent):
|
|||
documentation = "https://python.langchain.com/v0.2/docs/integrations/vectorstores/pinecone/"
|
||||
name = "Pinecone"
|
||||
icon = "Pinecone"
|
||||
|
||||
inputs = [
|
||||
StrInput(name="index_name", display_name="Index Name", required=True),
|
||||
StrInput(name="namespace", display_name="Namespace", info="Namespace for the index."),
|
||||
|
|
@ -57,42 +49,82 @@ class PineconeVectorStoreComponent(LCVectorStoreComponent):
|
|||
|
||||
@check_cached_vector_store
|
||||
def build_vector_store(self) -> Pinecone:
|
||||
from langchain_pinecone._utilities import DistanceStrategy
|
||||
from langchain_pinecone.vectorstores import Pinecone
|
||||
"""Build and return a Pinecone vector store instance."""
|
||||
try:
|
||||
from langchain_pinecone._utilities import DistanceStrategy
|
||||
|
||||
distance_strategy = self.distance_strategy.replace(" ", "_").upper()
|
||||
_distance_strategy = DistanceStrategy[distance_strategy]
|
||||
# Wrap the embedding model to ensure float32 output
|
||||
wrapped_embeddings = Float32Embeddings(self.embedding)
|
||||
|
||||
pinecone = Pinecone(
|
||||
index_name=self.index_name,
|
||||
embedding=self.embedding,
|
||||
text_key=self.text_key,
|
||||
namespace=self.namespace,
|
||||
distance_strategy=_distance_strategy,
|
||||
pinecone_api_key=self.pinecone_api_key,
|
||||
)
|
||||
# Convert distance strategy
|
||||
distance_strategy = self.distance_strategy.replace(" ", "_").upper()
|
||||
distance_strategy = DistanceStrategy[distance_strategy]
|
||||
|
||||
documents = []
|
||||
for _input in self.ingest_data or []:
|
||||
if isinstance(_input, Data):
|
||||
documents.append(_input.to_lc_document())
|
||||
else:
|
||||
documents.append(_input)
|
||||
# Initialize Pinecone instance with wrapped embeddings
|
||||
pinecone = Pinecone(
|
||||
index_name=self.index_name,
|
||||
embedding=wrapped_embeddings, # Use wrapped embeddings
|
||||
text_key=self.text_key,
|
||||
namespace=self.namespace,
|
||||
distance_strategy=distance_strategy,
|
||||
pinecone_api_key=self.pinecone_api_key,
|
||||
)
|
||||
except Exception as e:
|
||||
error_msg = "Error building Pinecone vector store"
|
||||
raise ValueError(error_msg) from e
|
||||
else:
|
||||
# Process documents if any
|
||||
documents = []
|
||||
if self.ingest_data:
|
||||
for doc in self.ingest_data:
|
||||
if isinstance(doc, Data):
|
||||
documents.append(doc.to_lc_document())
|
||||
else:
|
||||
documents.append(doc)
|
||||
|
||||
if documents:
|
||||
pinecone.add_documents(documents)
|
||||
return pinecone
|
||||
if documents:
|
||||
pinecone.add_documents(documents)
|
||||
|
||||
return pinecone
|
||||
|
||||
def search_documents(self) -> list[Data]:
|
||||
vector_store = self.build_vector_store()
|
||||
"""Search documents in the vector store."""
|
||||
try:
|
||||
if not self.search_query or not isinstance(self.search_query, str) or not self.search_query.strip():
|
||||
return []
|
||||
|
||||
if self.search_query and isinstance(self.search_query, str) and self.search_query.strip():
|
||||
vector_store = self.build_vector_store()
|
||||
docs = vector_store.similarity_search(
|
||||
query=self.search_query,
|
||||
k=self.number_of_results,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
error_msg = "Error searching documents"
|
||||
raise ValueError(error_msg) from e
|
||||
else:
|
||||
data = docs_to_data(docs)
|
||||
self.status = data
|
||||
return data
|
||||
return []
|
||||
|
||||
|
||||
class Float32Embeddings:
|
||||
"""Wrapper class to ensure float32 embeddings."""
|
||||
|
||||
def __init__(self, base_embeddings):
|
||||
self.base_embeddings = base_embeddings
|
||||
|
||||
def embed_documents(self, texts):
|
||||
embeddings = self.base_embeddings.embed_documents(texts)
|
||||
if isinstance(embeddings, np.ndarray):
|
||||
return [[self._force_float32(x) for x in vec] for vec in embeddings]
|
||||
return [[self._force_float32(x) for x in vec] for vec in embeddings]
|
||||
|
||||
def embed_query(self, text):
|
||||
embedding = self.base_embeddings.embed_query(text)
|
||||
if isinstance(embedding, np.ndarray):
|
||||
return [self._force_float32(x) for x in embedding]
|
||||
return [self._force_float32(x) for x in embedding]
|
||||
|
||||
def _force_float32(self, value):
|
||||
"""Convert any numeric type to Python float."""
|
||||
return float(np.float32(value))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue