Fixes Pinecone implementation (#1856)

* ⬆️ (pyproject.toml): upgrade langchain-pinecone dependency to version 0.1.0

* 📝 (Pinecone.py): Update Pinecone component to use new DistanceStrategy enum for distance calculation and improve configuration options for Pinecone vector store creation.

* ⬆️ (pyproject.toml): upgrade package version from 0.6.18 to 0.6.19
This commit is contained in:
Gabriel Luiz Freitas Almeida 2024-05-07 18:29:42 -03:00 committed by GitHub
commit 731eec3dc5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 662 additions and 581 deletions

1112
poetry.lock generated

File diff suppressed because it is too large Load diff

View file

@ -1,6 +1,6 @@
[tool.poetry]
name = "langflow"
version = "0.6.18"
version = "0.6.19"
description = "A Python package with a built-in web application"
authors = ["Logspace <contact@logspace.ai>"]
maintainers = [
@ -105,6 +105,7 @@ llama-index = "0.9.48"
langchain-openai = "^0.0.6"
urllib3 = "<2"
langchain-anthropic = "^0.1.4"
langchain-pinecone = "^0.1.0"
[tool.poetry.group.dev.dependencies]
pytest-asyncio = "^0.23.1"

View file

@ -1,63 +1,145 @@
import os
from typing import List, Optional, Union
import pinecone # type: ignore
from langchain.schema import BaseRetriever
from langchain_community.vectorstores import VectorStore
from langchain_community.vectorstores.pinecone import Pinecone
from langflow import CustomComponent
from langflow.field_typing import Document, Embeddings
from langchain_core.documents import Document
from langchain_pinecone._utilities import DistanceStrategy
from langchain_pinecone.vectorstores import PineconeVectorStore
from langflow.field_typing import Embeddings
from langflow.interface.custom.custom_component import CustomComponent
class PineconeComponent(CustomComponent):
display_name = "Pinecone"
description = "Construct Pinecone wrapper from raw documents."
icon = "Pinecone"
field_order = ["index_name", "namespace", "distance_strategy", "pinecone_api_key", "documents", "embedding"]
def build_config(self):
distance_options = [e.value.title().replace("_", " ") for e in DistanceStrategy]
distance_value = distance_options[0]
return {
"documents": {"display_name": "Documents"},
"inputs": {"display_name": "Input", "input_types": ["Document", "Record"]},
"embedding": {"display_name": "Embedding"},
"index_name": {"display_name": "Index Name"},
"namespace": {"display_name": "Namespace"},
"pinecone_api_key": {"display_name": "Pinecone API Key", "default": "", "password": True, "required": True},
"pinecone_env": {"display_name": "Pinecone Environment", "default": "", "required": True},
"search_kwargs": {"display_name": "Search Kwargs", "default": "{}"},
"pool_threads": {"display_name": "Pool Threads", "default": 1, "advanced": True},
"text_key": {"display_name": "Text Key"},
"distance_strategy": {
"display_name": "Distance Strategy",
# get values from enum
# and make them title case for display
"options": distance_options,
"advanced": True,
"value": distance_value,
},
"pinecone_api_key": {
"display_name": "Pinecone API Key",
"default": "",
"password": True,
"required": True,
},
"pool_threads": {
"display_name": "Pool Threads",
"default": 1,
"advanced": True,
},
}
def from_existing_index(
self,
index_name: str,
embedding: Embeddings,
pinecone_api_key: str | None,
text_key: str = "text",
namespace: Optional[str] = None,
distance_strategy: DistanceStrategy = DistanceStrategy.COSINE,
pool_threads: int = 4,
) -> PineconeVectorStore:
"""Load pinecone vectorstore from index name."""
pinecone_index = PineconeVectorStore.get_pinecone_index(
index_name, pool_threads, pinecone_api_key=pinecone_api_key
)
return PineconeVectorStore(
index=pinecone_index,
embedding=embedding,
text_key=text_key,
namespace=namespace,
distance_strategy=distance_strategy,
)
def from_documents(
self,
documents: List[Document],
embedding: Embeddings,
index_name: str,
pinecone_api_key: str | None,
text_key: str = "text",
namespace: Optional[str] = None,
pool_threads: int = 4,
distance_strategy: DistanceStrategy = DistanceStrategy.COSINE,
batch_size: int = 32,
upsert_kwargs: Optional[dict] = None,
embeddings_chunk_size: int = 1000,
) -> PineconeVectorStore:
"""Create a new pinecone vectorstore from documents."""
texts = [d.page_content for d in documents]
metadatas = [d.metadata for d in documents]
pinecone = self.from_existing_index(
index_name=index_name,
embedding=embedding,
pinecone_api_key=pinecone_api_key,
text_key=text_key,
namespace=namespace,
distance_strategy=distance_strategy,
pool_threads=pool_threads,
)
pinecone.add_texts(
texts,
metadatas=metadatas,
ids=None,
namespace=namespace,
batch_size=batch_size,
embedding_chunk_size=embeddings_chunk_size,
**(upsert_kwargs or {}),
)
return pinecone
def build(
self,
embedding: Embeddings,
pinecone_env: str,
documents: List[Document],
distance_strategy: str,
documents: Optional[List[Document]] = None,
text_key: str = "text",
pool_threads: int = 4,
index_name: Optional[str] = None,
pinecone_api_key: Optional[str] = None,
namespace: Optional[str] = "default",
) -> Union[VectorStore, Pinecone, BaseRetriever]:
if pinecone_api_key is None or pinecone_env is None:
raise ValueError("Pinecone API Key and Environment are required.")
if os.getenv("PINECONE_API_KEY") is None and pinecone_api_key is None:
raise ValueError("Pinecone API Key is required.")
pinecone.init(api_key=pinecone_api_key, environment=pinecone_env) # type: ignore
) -> Union[VectorStore, BaseRetriever]:
# get distance strategy from string
distance_strategy = distance_strategy.replace(" ", "_").upper()
_distance_strategy = DistanceStrategy[distance_strategy]
if not index_name:
raise ValueError("Index Name is required.")
if documents:
return Pinecone.from_documents(
return self.from_documents(
documents=documents,
embedding=embedding,
index_name=index_name,
pool_threads=pool_threads,
namespace=namespace,
pinecone_api_key=pinecone_api_key,
text_key=text_key,
namespace=namespace,
distance_strategy=_distance_strategy,
pool_threads=pool_threads,
)
return Pinecone.from_existing_index(
return self.from_existing_index(
index_name=index_name,
embedding=embedding,
pinecone_api_key=pinecone_api_key,
text_key=text_key,
namespace=namespace,
distance_strategy=_distance_strategy,
pool_threads=pool_threads,
)