Fixes Pinecone implementation (#1856)
* ⬆️ (pyproject.toml): upgrade langchain-pinecone dependency to version 0.1.0 * 📝 (Pinecone.py): Update Pinecone component to use new DistanceStrategy enum for distance calculation and improve configuration options for Pinecone vector store creation. * ⬆️ (pyproject.toml): upgrade package version from 0.6.18 to 0.6.19
This commit is contained in:
parent
83dfa4d160
commit
731eec3dc5
3 changed files with 662 additions and 581 deletions
1112
poetry.lock
generated
1112
poetry.lock
generated
File diff suppressed because it is too large
Load diff
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "langflow"
|
||||
version = "0.6.18"
|
||||
version = "0.6.19"
|
||||
description = "A Python package with a built-in web application"
|
||||
authors = ["Logspace <contact@logspace.ai>"]
|
||||
maintainers = [
|
||||
|
|
@ -105,6 +105,7 @@ llama-index = "0.9.48"
|
|||
langchain-openai = "^0.0.6"
|
||||
urllib3 = "<2"
|
||||
langchain-anthropic = "^0.1.4"
|
||||
langchain-pinecone = "^0.1.0"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
pytest-asyncio = "^0.23.1"
|
||||
|
|
|
|||
|
|
@ -1,63 +1,145 @@
|
|||
import os
|
||||
from typing import List, Optional, Union
|
||||
|
||||
import pinecone # type: ignore
|
||||
from langchain.schema import BaseRetriever
|
||||
from langchain_community.vectorstores import VectorStore
|
||||
from langchain_community.vectorstores.pinecone import Pinecone
|
||||
from langflow import CustomComponent
|
||||
from langflow.field_typing import Document, Embeddings
|
||||
from langchain_core.documents import Document
|
||||
from langchain_pinecone._utilities import DistanceStrategy
|
||||
from langchain_pinecone.vectorstores import PineconeVectorStore
|
||||
|
||||
from langflow.field_typing import Embeddings
|
||||
from langflow.interface.custom.custom_component import CustomComponent
|
||||
|
||||
|
||||
class PineconeComponent(CustomComponent):
|
||||
display_name = "Pinecone"
|
||||
description = "Construct Pinecone wrapper from raw documents."
|
||||
icon = "Pinecone"
|
||||
field_order = ["index_name", "namespace", "distance_strategy", "pinecone_api_key", "documents", "embedding"]
|
||||
|
||||
def build_config(self):
|
||||
distance_options = [e.value.title().replace("_", " ") for e in DistanceStrategy]
|
||||
distance_value = distance_options[0]
|
||||
return {
|
||||
"documents": {"display_name": "Documents"},
|
||||
"inputs": {"display_name": "Input", "input_types": ["Document", "Record"]},
|
||||
"embedding": {"display_name": "Embedding"},
|
||||
"index_name": {"display_name": "Index Name"},
|
||||
"namespace": {"display_name": "Namespace"},
|
||||
"pinecone_api_key": {"display_name": "Pinecone API Key", "default": "", "password": True, "required": True},
|
||||
"pinecone_env": {"display_name": "Pinecone Environment", "default": "", "required": True},
|
||||
"search_kwargs": {"display_name": "Search Kwargs", "default": "{}"},
|
||||
"pool_threads": {"display_name": "Pool Threads", "default": 1, "advanced": True},
|
||||
"text_key": {"display_name": "Text Key"},
|
||||
"distance_strategy": {
|
||||
"display_name": "Distance Strategy",
|
||||
# get values from enum
|
||||
# and make them title case for display
|
||||
"options": distance_options,
|
||||
"advanced": True,
|
||||
"value": distance_value,
|
||||
},
|
||||
"pinecone_api_key": {
|
||||
"display_name": "Pinecone API Key",
|
||||
"default": "",
|
||||
"password": True,
|
||||
"required": True,
|
||||
},
|
||||
"pool_threads": {
|
||||
"display_name": "Pool Threads",
|
||||
"default": 1,
|
||||
"advanced": True,
|
||||
},
|
||||
}
|
||||
|
||||
def from_existing_index(
|
||||
self,
|
||||
index_name: str,
|
||||
embedding: Embeddings,
|
||||
pinecone_api_key: str | None,
|
||||
text_key: str = "text",
|
||||
namespace: Optional[str] = None,
|
||||
distance_strategy: DistanceStrategy = DistanceStrategy.COSINE,
|
||||
pool_threads: int = 4,
|
||||
) -> PineconeVectorStore:
|
||||
"""Load pinecone vectorstore from index name."""
|
||||
pinecone_index = PineconeVectorStore.get_pinecone_index(
|
||||
index_name, pool_threads, pinecone_api_key=pinecone_api_key
|
||||
)
|
||||
return PineconeVectorStore(
|
||||
index=pinecone_index,
|
||||
embedding=embedding,
|
||||
text_key=text_key,
|
||||
namespace=namespace,
|
||||
distance_strategy=distance_strategy,
|
||||
)
|
||||
|
||||
def from_documents(
|
||||
self,
|
||||
documents: List[Document],
|
||||
embedding: Embeddings,
|
||||
index_name: str,
|
||||
pinecone_api_key: str | None,
|
||||
text_key: str = "text",
|
||||
namespace: Optional[str] = None,
|
||||
pool_threads: int = 4,
|
||||
distance_strategy: DistanceStrategy = DistanceStrategy.COSINE,
|
||||
batch_size: int = 32,
|
||||
upsert_kwargs: Optional[dict] = None,
|
||||
embeddings_chunk_size: int = 1000,
|
||||
) -> PineconeVectorStore:
|
||||
"""Create a new pinecone vectorstore from documents."""
|
||||
texts = [d.page_content for d in documents]
|
||||
metadatas = [d.metadata for d in documents]
|
||||
pinecone = self.from_existing_index(
|
||||
index_name=index_name,
|
||||
embedding=embedding,
|
||||
pinecone_api_key=pinecone_api_key,
|
||||
text_key=text_key,
|
||||
namespace=namespace,
|
||||
distance_strategy=distance_strategy,
|
||||
pool_threads=pool_threads,
|
||||
)
|
||||
pinecone.add_texts(
|
||||
texts,
|
||||
metadatas=metadatas,
|
||||
ids=None,
|
||||
namespace=namespace,
|
||||
batch_size=batch_size,
|
||||
embedding_chunk_size=embeddings_chunk_size,
|
||||
**(upsert_kwargs or {}),
|
||||
)
|
||||
return pinecone
|
||||
|
||||
def build(
|
||||
self,
|
||||
embedding: Embeddings,
|
||||
pinecone_env: str,
|
||||
documents: List[Document],
|
||||
distance_strategy: str,
|
||||
documents: Optional[List[Document]] = None,
|
||||
text_key: str = "text",
|
||||
pool_threads: int = 4,
|
||||
index_name: Optional[str] = None,
|
||||
pinecone_api_key: Optional[str] = None,
|
||||
namespace: Optional[str] = "default",
|
||||
) -> Union[VectorStore, Pinecone, BaseRetriever]:
|
||||
if pinecone_api_key is None or pinecone_env is None:
|
||||
raise ValueError("Pinecone API Key and Environment are required.")
|
||||
if os.getenv("PINECONE_API_KEY") is None and pinecone_api_key is None:
|
||||
raise ValueError("Pinecone API Key is required.")
|
||||
|
||||
pinecone.init(api_key=pinecone_api_key, environment=pinecone_env) # type: ignore
|
||||
) -> Union[VectorStore, BaseRetriever]:
|
||||
# get distance strategy from string
|
||||
distance_strategy = distance_strategy.replace(" ", "_").upper()
|
||||
_distance_strategy = DistanceStrategy[distance_strategy]
|
||||
if not index_name:
|
||||
raise ValueError("Index Name is required.")
|
||||
|
||||
if documents:
|
||||
return Pinecone.from_documents(
|
||||
return self.from_documents(
|
||||
documents=documents,
|
||||
embedding=embedding,
|
||||
index_name=index_name,
|
||||
pool_threads=pool_threads,
|
||||
namespace=namespace,
|
||||
pinecone_api_key=pinecone_api_key,
|
||||
text_key=text_key,
|
||||
namespace=namespace,
|
||||
distance_strategy=_distance_strategy,
|
||||
pool_threads=pool_threads,
|
||||
)
|
||||
|
||||
return Pinecone.from_existing_index(
|
||||
return self.from_existing_index(
|
||||
index_name=index_name,
|
||||
embedding=embedding,
|
||||
pinecone_api_key=pinecone_api_key,
|
||||
text_key=text_key,
|
||||
namespace=namespace,
|
||||
distance_strategy=_distance_strategy,
|
||||
pool_threads=pool_threads,
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue