tests: fix up the astra integ tests and add vectorize tests (#2616)
* fix up the astra integ tests and add vectorize tests * [autofix.ci] apply automated fixes --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
parent
34c1a4d469
commit
46966d164d
4 changed files with 151 additions and 16 deletions
|
|
@ -88,7 +88,7 @@ class LCVectorStoreComponent(Component):
|
|||
|
||||
def search_documents(self) -> List[Data]:
|
||||
"""
|
||||
Search for documents in the Chroma vector store.
|
||||
Search for documents in the vector store.
|
||||
"""
|
||||
search_query: str = self.search_query
|
||||
if not search_query:
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ from langflow.inputs.inputs import DictInput, SecretStrInput, MessageTextInput,
|
|||
from langflow.template.field.base import Output
|
||||
|
||||
|
||||
class AstraVectorize(Component):
|
||||
class AstraVectorizeComponent(Component):
|
||||
display_name: str = "Astra Vectorize"
|
||||
description: str = "Configuration options for Astra Vectorize server-side embeddings."
|
||||
documentation: str = "https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html"
|
||||
|
|
@ -92,7 +92,7 @@ class AstraVectorize(Component):
|
|||
|
||||
def build_options(self) -> dict[str, Any]:
|
||||
provider_value = self.VECTORIZE_PROVIDERS_MAPPING[self.provider][0]
|
||||
authentication = {**self.authentication}
|
||||
authentication = {**(self.authentication or {})}
|
||||
api_key_name = self.api_key_name
|
||||
if api_key_name:
|
||||
authentication["providerKey"] = api_key_name
|
||||
|
|
@ -102,7 +102,7 @@ class AstraVectorize(Component):
|
|||
"provider": provider_value,
|
||||
"modelName": self.model_name,
|
||||
"authentication": authentication,
|
||||
"parameters": self.model_parameters,
|
||||
"parameters": self.model_parameters or {},
|
||||
},
|
||||
"collection_embedding_api_key": self.provider_api_key,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
from .AmazonBedrockEmbeddings import AmazonBedrockEmbeddingsComponent
|
||||
from .AstraVectorize import AstraVectorizeComponent
|
||||
from .AzureOpenAIEmbeddings import AzureOpenAIEmbeddingsComponent
|
||||
from .CohereEmbeddings import CohereEmbeddingsComponent
|
||||
from .HuggingFaceEmbeddings import HuggingFaceEmbeddingsComponent
|
||||
|
|
@ -9,6 +10,7 @@ from .VertexAIEmbeddings import VertexAIEmbeddingsComponent
|
|||
|
||||
__all__ = [
|
||||
"AmazonBedrockEmbeddingsComponent",
|
||||
"AstraVectorizeComponent",
|
||||
"AzureOpenAIEmbeddingsComponent",
|
||||
"CohereEmbeddingsComponent",
|
||||
"HuggingFaceEmbeddingsComponent",
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
import os
|
||||
|
||||
from langchain_astradb import AstraDBVectorStore, CollectionVectorServiceOptions
|
||||
from langflow.components.embeddings.AstraVectorize import AstraVectorizeComponent
|
||||
import pytest
|
||||
from integration.utils import MockEmbeddings, check_env_vars
|
||||
from langchain_core.documents import Document
|
||||
|
|
@ -11,7 +13,10 @@ from langflow.schema.data import Data
|
|||
|
||||
COLLECTION = "test_basic"
|
||||
SEARCH_COLLECTION = "test_search"
|
||||
MEMORY_COLLECTION = "test_memory"
|
||||
# MEMORY_COLLECTION = "test_memory"
|
||||
VECTORIZE_COLLECTION = "test_vectorize"
|
||||
VECTORIZE_COLLECTION_OPENAI = "test_vectorize_openai"
|
||||
VECTORIZE_COLLECTION_OPENAI_WITH_AUTH = "test_vectorize_openai_auth"
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
|
|
@ -77,24 +82,152 @@ def test_astra_embeds_and_search(astra_fixture):
|
|||
api_endpoint=api_endpoint,
|
||||
collection_name=SEARCH_COLLECTION,
|
||||
embedding=embedding,
|
||||
inputs=records,
|
||||
add_to_vector_store=True,
|
||||
)
|
||||
component.build_vector_store()
|
||||
|
||||
component.build(
|
||||
token=application_token,
|
||||
api_endpoint=api_endpoint,
|
||||
collection_name=SEARCH_COLLECTION,
|
||||
embedding=embedding,
|
||||
input_value="test1",
|
||||
ingest_data=records,
|
||||
search_input="test1",
|
||||
number_of_results=1,
|
||||
)
|
||||
component.build_vector_store()
|
||||
records = component.search_documents()
|
||||
|
||||
assert len(records) == 1
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not check_env_vars("ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_API_ENDPOINT"),
|
||||
reason="missing astra env vars",
|
||||
)
|
||||
def test_astra_vectorize():
|
||||
store = None
|
||||
try:
|
||||
options = {"provider": "nvidia", "modelName": "NV-Embed-QA", "parameters": {}, "authentication": {}}
|
||||
store = AstraDBVectorStore(
|
||||
collection_name=VECTORIZE_COLLECTION,
|
||||
api_endpoint=os.getenv("ASTRA_DB_API_ENDPOINT"),
|
||||
token=os.getenv("ASTRA_DB_APPLICATION_TOKEN"),
|
||||
collection_vector_service_options=CollectionVectorServiceOptions.from_dict(options),
|
||||
)
|
||||
|
||||
application_token = os.getenv("ASTRA_DB_APPLICATION_TOKEN")
|
||||
api_endpoint = os.getenv("ASTRA_DB_API_ENDPOINT")
|
||||
|
||||
documents = [Document(page_content="test1"), Document(page_content="test2")]
|
||||
records = [Data.from_document(d) for d in documents]
|
||||
|
||||
vectorize = AstraVectorizeComponent()
|
||||
vectorize.build(provider="NVIDIA", model_name="NV-Embed-QA")
|
||||
vectorize_options = vectorize.build_options()
|
||||
|
||||
component = AstraVectorStoreComponent()
|
||||
component.build(
|
||||
token=application_token,
|
||||
api_endpoint=api_endpoint,
|
||||
collection_name=VECTORIZE_COLLECTION,
|
||||
ingest_data=records,
|
||||
embedding=vectorize_options,
|
||||
search_input="test",
|
||||
number_of_results=2,
|
||||
)
|
||||
component.build_vector_store()
|
||||
records = component.search_documents()
|
||||
|
||||
assert len(records) == 2
|
||||
finally:
|
||||
if store is not None:
|
||||
store.delete_collection()
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not check_env_vars("ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_API_ENDPOINT", "OPENAI_API_KEY"),
|
||||
reason="missing env vars",
|
||||
)
|
||||
def test_astra_vectorize_with_provider_api_key():
|
||||
"""tests vectorize using an openai api key"""
|
||||
store = None
|
||||
try:
|
||||
application_token = os.getenv("ASTRA_DB_APPLICATION_TOKEN")
|
||||
api_endpoint = os.getenv("ASTRA_DB_API_ENDPOINT")
|
||||
options = {"provider": "openai", "modelName": "text-embedding-3-small", "parameters": {}, "authentication": {}}
|
||||
store = AstraDBVectorStore(
|
||||
collection_name=VECTORIZE_COLLECTION_OPENAI,
|
||||
api_endpoint=api_endpoint,
|
||||
token=application_token,
|
||||
collection_vector_service_options=CollectionVectorServiceOptions.from_dict(options),
|
||||
collection_embedding_api_key=os.getenv("OPENAI_API_KEY"),
|
||||
)
|
||||
documents = [Document(page_content="test1"), Document(page_content="test2")]
|
||||
records = [Data.from_document(d) for d in documents]
|
||||
|
||||
vectorize = AstraVectorizeComponent()
|
||||
vectorize.build(
|
||||
provider="OpenAI", model_name="text-embedding-3-small", provider_api_key=os.getenv("OPENAI_API_KEY")
|
||||
)
|
||||
vectorize_options = vectorize.build_options()
|
||||
|
||||
component = AstraVectorStoreComponent()
|
||||
component.build(
|
||||
token=application_token,
|
||||
api_endpoint=api_endpoint,
|
||||
collection_name=VECTORIZE_COLLECTION_OPENAI,
|
||||
ingest_data=records,
|
||||
embedding=vectorize_options,
|
||||
search_input="test",
|
||||
)
|
||||
component.build_vector_store()
|
||||
records = component.search_documents()
|
||||
assert len(records) == 2
|
||||
finally:
|
||||
if store is not None:
|
||||
store.delete_collection()
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not check_env_vars("ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_API_ENDPOINT", "OPENAI_API_KEY"),
|
||||
reason="missing env vars",
|
||||
)
|
||||
def test_astra_vectorize_passes_authentication():
|
||||
"""tests vectorize using the authentication parameter"""
|
||||
store = None
|
||||
try:
|
||||
application_token = os.getenv("ASTRA_DB_APPLICATION_TOKEN")
|
||||
api_endpoint = os.getenv("ASTRA_DB_API_ENDPOINT")
|
||||
options = {
|
||||
"provider": "openai",
|
||||
"modelName": "text-embedding-3-small",
|
||||
"parameters": {},
|
||||
"authentication": {"providerKey": "providerKey"},
|
||||
}
|
||||
store = AstraDBVectorStore(
|
||||
collection_name=VECTORIZE_COLLECTION_OPENAI_WITH_AUTH,
|
||||
api_endpoint=api_endpoint,
|
||||
token=application_token,
|
||||
collection_vector_service_options=CollectionVectorServiceOptions.from_dict(options),
|
||||
)
|
||||
documents = [Document(page_content="test1"), Document(page_content="test2")]
|
||||
records = [Data.from_document(d) for d in documents]
|
||||
|
||||
vectorize = AstraVectorizeComponent()
|
||||
vectorize.build(
|
||||
provider="OpenAI", model_name="text-embedding-3-small", authentication={"providerKey": "providerKey"}
|
||||
)
|
||||
vectorize_options = vectorize.build_options()
|
||||
|
||||
component = AstraVectorStoreComponent()
|
||||
component.build(
|
||||
token=application_token,
|
||||
api_endpoint=api_endpoint,
|
||||
collection_name=VECTORIZE_COLLECTION_OPENAI_WITH_AUTH,
|
||||
ingest_data=records,
|
||||
embedding=vectorize_options,
|
||||
search_input="test",
|
||||
)
|
||||
component.build_vector_store()
|
||||
records = component.search_documents()
|
||||
assert len(records) == 2
|
||||
finally:
|
||||
if store is not None:
|
||||
store.delete_collection()
|
||||
|
||||
|
||||
# @pytest.mark.skipif(
|
||||
# not check_env_vars("ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_API_ENDPOINT"),
|
||||
# reason="missing astra env vars",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue