feat(vector_store.py): add support for initializing Pinecone and ChromaDB objects from existing indexes

The code has been refactored to improve readability by adding type hints and reformatting the code. The initialize_pinecone and initialize_chroma functions have been updated to support initializing objects from existing indexes. If there are no docs in the params, the functions will return an existing index. If there are docs in the params, the functions will create a new index. The initialize_qdrant function has not been changed.
🔨 refactor(vector_store.py): improve code readability by adding type hints and reformatting code
This commit is contained in:
Gabriel Luiz Freitas Almeida 2023-06-22 19:03:53 -03:00
commit bfef9495f1

View file

@ -1,15 +1,20 @@
from typing import Any
from typing import Type
from langchain.vectorstores import Pinecone, Qdrant, Chroma
def docs_in_params(params: dict) -> bool:
"""Check if params has documents OR texts and one of them is not an empty list"""
return ("documents" in params and params["documents"] != []) or (
"texts" in params and params["texts"] != []
"""Check if params has documents OR texts and one of them is not an empty list,
If any of them is not an empty list, return True, else return False"""
return ("documents" in params and params["documents"]) or (
"texts" in params and params["texts"]
)
def initialize_pinecone(class_object: Any, params: dict):
def initialize_pinecone(class_object: Type[Pinecone], params: dict):
"""Initialize pinecone and return the class object"""
import pinecone
PINECONE_API_KEY = params.get("pinecone_api_key")
PINECONE_ENV = params.get("pinecone_env")
@ -18,23 +23,31 @@ def initialize_pinecone(class_object: Any, params: dict):
"Pinecone API key and environment must be provided in the params"
)
if not docs_in_params(params):
import pinecone
# initialize pinecone
pinecone.init(
api_key=PINECONE_API_KEY, # find at app.pinecone.io
environment=PINECONE_ENV, # next to api key in console
)
# initialize pinecone
pinecone.init(
api_key=PINECONE_API_KEY, # find at app.pinecone.io
environment=PINECONE_ENV, # next to api key in console
)
# If there are docs in the index, delete them
return class_object.from_existing_index(**params)
# If there are no docs in the params, return an existing index
# but first remove any texts or docs keys from the params
if not docs_in_params(params):
existing_index_params = {
"embedding": params.pop("embedding"),
}
if "index_name" in params:
existing_index_params["index_name"] = params.pop("index_name")
if "namespace" in params:
existing_index_params["namespace"] = params.pop("namespace")
return class_object.from_existing_index(**existing_index_params)
# If there are docs in the params, create a new index
if "texts" in params:
params["documents"] = params.pop("texts")
return class_object.from_documents(**params)
def initialize_chroma(class_object, params):
def initialize_chroma(class_object: Type[Chroma], params: dict):
"""Initialize a ChromaDB object from the params"""
persist = params.pop("persist", False)
if not docs_in_params(params):
@ -54,7 +67,7 @@ def initialize_chroma(class_object, params):
return chromadb
def initialize_qdrant(class_object, params):
def initialize_qdrant(class_object: Type[Qdrant], params: dict):
if not docs_in_params(params):
if "location" not in params and "api_key" not in params:
raise ValueError("Location and API key must be provided in the params")