From 21be1268aef70c3a5a7cd4b65ad1b997987940f1 Mon Sep 17 00:00:00 2001 From: Gabriel Luiz Freitas Almeida Date: Tue, 7 May 2024 17:46:54 -0300 Subject: [PATCH] Update langchain-pinecone dependency and add Pinecone API key (#1854) * Update langchain-pinecone dependency and related code * Add Pinecone API key to environment variables * Fix commented out code in codeTabsComponent, parameterComponent, and editNodeModal * Add PineconeSearchComponent to PineconeSearch.py * Update PineconeComponent to accept pinecone_api_key as optional parameter --- docs/docs/migration/global-variables.mdx | 1 + poetry.lock | 18 ++- pyproject.toml | 1 + .../components/vectorsearch/PineconeSearch.py | 30 +++-- .../components/vectorstores/Pinecone.py | 107 ++++++++++++++---- .../langflow/services/settings/constants.py | 1 + .../components/codeTabsComponent/index.tsx | 2 +- .../components/parameterComponent/index.tsx | 2 +- .../src/modals/editNodeModal/index.tsx | 2 +- 9 files changed, 131 insertions(+), 33 deletions(-) diff --git a/docs/docs/migration/global-variables.mdx b/docs/docs/migration/global-variables.mdx index 17ec50ab5..630b9f858 100644 --- a/docs/docs/migration/global-variables.mdx +++ b/docs/docs/migration/global-variables.mdx @@ -101,6 +101,7 @@ The default list at the moment is: - GOOGLE_API_KEY - HUGGINGFACEHUB_API_TOKEN - OPENAI_API_KEY +- PINECONE_API_KEY - SEARCHAPI_API_KEY - SERPAPI_API_KEY - VECTARA_CUSTOMER_ID diff --git a/poetry.lock b/poetry.lock index ef35c9efb..704cc6070 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4166,6 +4166,22 @@ langchain-core = ">=0.1.46,<0.2.0" openai = ">=1.24.0,<2.0.0" tiktoken = ">=0.5.2,<1" +[[package]] +name = "langchain-pinecone" +version = "0.1.0" +description = "An integration package connecting Pinecone and LangChain" +optional = false +python-versions = "<3.13,>=3.8.1" +files = [ + {file = "langchain_pinecone-0.1.0-py3-none-any.whl", hash = "sha256:d957f27b1cceab425c3e8603c7a32533d4593ce8705242e78f6dc03aa71cf417"}, + {file = "langchain_pinecone-0.1.0.tar.gz", hash = "sha256:93f81e7c3926027cc6a87b001ee4d2e02a432a916709dbd395162b342bf84586"}, +] + +[package.dependencies] +langchain-core = ">=0.1.40,<0.2.0" +numpy = ">=1,<2" +pinecone-client = ">=3.2.2,<4.0.0" + [[package]] name = "langchain-text-splitters" version = "0.0.1" @@ -10670,4 +10686,4 @@ local = ["ctransformers", "llama-cpp-python", "sentence-transformers"] [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.12" -content-hash = "f3ff2734049f016f662c61e65efb63aee34455df7ee7d89b24ce49ea523db629" +content-hash = "136ea07bf3269774c4808a162f8cb5c98162bddf04870e0dd20194a886ade53d" diff --git a/pyproject.toml b/pyproject.toml index ad8c839cf..612e55460 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -83,6 +83,7 @@ langchain-astradb = "^0.1.0" langchain-openai = "^0.1.1" zep-python = { version = "^2.0.0rc5", allow-prereleases = true } langchain-google-vertexai = "^1.0.3" +langchain-pinecone = "^0.1.0" [tool.poetry.group.dev.dependencies] types-redis = "^4.6.0.5" diff --git a/src/backend/base/langflow/components/vectorsearch/PineconeSearch.py b/src/backend/base/langflow/components/vectorsearch/PineconeSearch.py index 3f318c748..d4818f354 100644 --- a/src/backend/base/langflow/components/vectorsearch/PineconeSearch.py +++ b/src/backend/base/langflow/components/vectorsearch/PineconeSearch.py @@ -1,5 +1,7 @@ from typing import List, Optional +from langchain_pinecone._utilities import DistanceStrategy + from langflow.components.vectorstores.base.model import LCVectorStoreComponent from langflow.components.vectorstores.Pinecone import PineconeComponent from langflow.field_typing import Embeddings, Text @@ -11,8 +13,11 @@ class PineconeSearchComponent(PineconeComponent, LCVectorStoreComponent): display_name = "Pinecone Search" description = "Search a Pinecone Vector Store for similar documents." icon = "Pinecone" + field_order = ["index_name", "namespace", "distance_strategy", "pinecone_api_key", "input_value", "embedding"] def build_config(self): + distance_options = [e.value.title().replace("_", " ") for e in DistanceStrategy] + distance_value = distance_options[0] return { "search_type": { "display_name": "Search Type", @@ -21,17 +26,19 @@ class PineconeSearchComponent(PineconeComponent, LCVectorStoreComponent): "input_value": {"display_name": "Input"}, "embedding": {"display_name": "Embedding"}, "index_name": {"display_name": "Index Name"}, - "namespace": {"display_name": "Namespace"}, + "namespace": {"display_name": "Namespace", "advanced": True}, + "distance_strategy": { + "display_name": "Distance Strategy", + # get values from enum + # and make them title case for display + "options": distance_options, + "advanced": True, + "value": distance_value, + }, "pinecone_api_key": { "display_name": "Pinecone API Key", "default": "", "password": True, - "required": True, - }, - "pinecone_env": { - "display_name": "Pinecone Environment", - "default": "", - "required": True, }, "pool_threads": { "display_name": "Pool Threads", @@ -43,13 +50,18 @@ class PineconeSearchComponent(PineconeComponent, LCVectorStoreComponent): "info": "Number of results to return.", "advanced": True, }, + "text_key": { + "display_name": "Text Key", + "info": "Key in the record to use as text.", + "advanced": True, + }, } def build( # type: ignore[override] self, input_value: Text, embedding: Embeddings, - pinecone_env: str, + distance_strategy: str, text_key: str = "text", number_of_results: int = 4, pool_threads: int = 4, @@ -61,7 +73,7 @@ class PineconeSearchComponent(PineconeComponent, LCVectorStoreComponent): ) -> List[Record]: # type: ignore[override] vector_store = super().build( embedding=embedding, - pinecone_env=pinecone_env, + distance_strategy=distance_strategy, inputs=[], text_key=text_key, pool_threads=pool_threads, diff --git a/src/backend/base/langflow/components/vectorstores/Pinecone.py b/src/backend/base/langflow/components/vectorstores/Pinecone.py index f596320b7..b25bb6086 100644 --- a/src/backend/base/langflow/components/vectorstores/Pinecone.py +++ b/src/backend/base/langflow/components/vectorstores/Pinecone.py @@ -1,10 +1,10 @@ -import os from typing import List, Optional, Union -import pinecone # type: ignore from langchain.schema import BaseRetriever from langchain_community.vectorstores import VectorStore -from langchain_community.vectorstores.pinecone import Pinecone +from langchain_core.documents import Document +from langchain_pinecone._utilities import DistanceStrategy +from langchain_pinecone.vectorstores import PineconeVectorStore from langflow.field_typing import Embeddings from langflow.interface.custom.custom_component import CustomComponent @@ -15,24 +15,31 @@ class PineconeComponent(CustomComponent): display_name = "Pinecone" description = "Construct Pinecone wrapper from raw documents." icon = "Pinecone" + field_order = ["index_name", "namespace", "distance_strategy", "pinecone_api_key", "documents", "embedding"] def build_config(self): + distance_options = [e.value.title().replace("_", " ") for e in DistanceStrategy] + distance_value = distance_options[0] return { "inputs": {"display_name": "Input", "input_types": ["Document", "Record"]}, "embedding": {"display_name": "Embedding"}, "index_name": {"display_name": "Index Name"}, "namespace": {"display_name": "Namespace"}, + "text_key": {"display_name": "Text Key"}, + "distance_strategy": { + "display_name": "Distance Strategy", + # get values from enum + # and make them title case for display + "options": distance_options, + "advanced": True, + "value": distance_value, + }, "pinecone_api_key": { "display_name": "Pinecone API Key", "default": "", "password": True, "required": True, }, - "pinecone_env": { - "display_name": "Pinecone Environment", - "default": "", - "required": True, - }, "pool_threads": { "display_name": "Pool Threads", "default": 1, @@ -40,23 +47,79 @@ class PineconeComponent(CustomComponent): }, } + def from_existing_index( + self, + index_name: str, + embedding: Embeddings, + pinecone_api_key: str | None, + text_key: str = "text", + namespace: Optional[str] = None, + distance_strategy: DistanceStrategy = DistanceStrategy.COSINE, + pool_threads: int = 4, + ) -> PineconeVectorStore: + """Load pinecone vectorstore from index name.""" + pinecone_index = PineconeVectorStore.get_pinecone_index( + index_name, pool_threads, pinecone_api_key=pinecone_api_key + ) + return PineconeVectorStore( + index=pinecone_index, + embedding=embedding, + text_key=text_key, + namespace=namespace, + distance_strategy=distance_strategy, + ) + + def from_documents( + self, + documents: List[Document], + embedding: Embeddings, + index_name: str, + pinecone_api_key: str | None, + text_key: str = "text", + namespace: Optional[str] = None, + pool_threads: int = 4, + distance_strategy: DistanceStrategy = DistanceStrategy.COSINE, + batch_size: int = 32, + upsert_kwargs: Optional[dict] = None, + embeddings_chunk_size: int = 1000, + ) -> PineconeVectorStore: + """Create a new pinecone vectorstore from documents.""" + texts = [d.page_content for d in documents] + metadatas = [d.metadata for d in documents] + pinecone = self.from_existing_index( + index_name=index_name, + embedding=embedding, + pinecone_api_key=pinecone_api_key, + text_key=text_key, + namespace=namespace, + distance_strategy=distance_strategy, + pool_threads=pool_threads, + ) + pinecone.add_texts( + texts, + metadatas=metadatas, + ids=None, + namespace=namespace, + batch_size=batch_size, + embedding_chunk_size=embeddings_chunk_size, + **(upsert_kwargs or {}), + ) + return pinecone + def build( self, embedding: Embeddings, - pinecone_env: str, + distance_strategy: str, inputs: Optional[List[Record]] = None, text_key: str = "text", pool_threads: int = 4, index_name: Optional[str] = None, pinecone_api_key: Optional[str] = None, namespace: Optional[str] = "default", - ) -> Union[VectorStore, Pinecone, BaseRetriever]: - if pinecone_api_key is None or pinecone_env is None: - raise ValueError("Pinecone API Key and Environment are required.") - if os.getenv("PINECONE_API_KEY") is None and pinecone_api_key is None: - raise ValueError("Pinecone API Key is required.") - - pinecone.init(api_key=pinecone_api_key, environment=pinecone_env) # type: ignore + ) -> Union[VectorStore, BaseRetriever]: + # get distance strategy from string + distance_strategy = distance_strategy.replace(" ", "_").upper() + _distance_strategy = DistanceStrategy[distance_strategy] if not index_name: raise ValueError("Index Name is required.") documents = [] @@ -66,19 +129,23 @@ class PineconeComponent(CustomComponent): else: documents.append(_input) if documents: - return Pinecone.from_documents( + return self.from_documents( documents=documents, embedding=embedding, index_name=index_name, - pool_threads=pool_threads, - namespace=namespace, + pinecone_api_key=pinecone_api_key, text_key=text_key, + namespace=namespace, + distance_strategy=_distance_strategy, + pool_threads=pool_threads, ) - return Pinecone.from_existing_index( + return self.from_existing_index( index_name=index_name, embedding=embedding, + pinecone_api_key=pinecone_api_key, text_key=text_key, namespace=namespace, + distance_strategy=_distance_strategy, pool_threads=pool_threads, ) diff --git a/src/backend/base/langflow/services/settings/constants.py b/src/backend/base/langflow/services/settings/constants.py index 323f53a01..663810311 100644 --- a/src/backend/base/langflow/services/settings/constants.py +++ b/src/backend/base/langflow/services/settings/constants.py @@ -13,6 +13,7 @@ VARIABLES_TO_GET_FROM_ENVIRONMENT = [ "ASTRA_DB_API_ENDPOINT", "COHERE_API_KEY", "HUGGINGFACEHUB_API_TOKEN", + "PINECONE_API_KEY", "SEARCHAPI_API_KEY", "SERPAPI_API_KEY", "VECTARA_CUSTOMER_ID", diff --git a/src/frontend/src/components/codeTabsComponent/index.tsx b/src/frontend/src/components/codeTabsComponent/index.tsx index 3d1c3af82..355ddb1ec 100644 --- a/src/frontend/src/components/codeTabsComponent/index.tsx +++ b/src/frontend/src/components/codeTabsComponent/index.tsx @@ -749,7 +749,7 @@ export default function CodeTabsComponent({ templateField ].value.toString() === "{}" ? { - yourkey: "value", + // yourkey: "value", } : node.data.node! .template[ diff --git a/src/frontend/src/customNodes/genericNode/components/parameterComponent/index.tsx b/src/frontend/src/customNodes/genericNode/components/parameterComponent/index.tsx index 3fe99b89e..e392c3064 100644 --- a/src/frontend/src/customNodes/genericNode/components/parameterComponent/index.tsx +++ b/src/frontend/src/customNodes/genericNode/components/parameterComponent/index.tsx @@ -715,7 +715,7 @@ export default function ParameterComponent({ !data.node!.template[name].value || data.node!.template[name].value?.toString() === "{}" ? { - yourkey: "value", + // yourkey: "value", } : data.node!.template[name].value } diff --git a/src/frontend/src/modals/editNodeModal/index.tsx b/src/frontend/src/modals/editNodeModal/index.tsx index a97afae09..b853ecfab 100644 --- a/src/frontend/src/modals/editNodeModal/index.tsx +++ b/src/frontend/src/modals/editNodeModal/index.tsx @@ -285,7 +285,7 @@ const EditNodeModal = forwardRef( templateParam ]?.value?.toString() === "{}" ? { - yourkey: "value", + // yourkey: "value", } : myData.node!.template[templateParam] .value