From 46966d164daf89e45d9697d23614f972d8dfc033 Mon Sep 17 00:00:00 2001
From: Jordan Frazier <122494242+jordanrfrazier@users.noreply.github.com>
Date: Wed, 10 Jul 2024 02:32:34 -0700
Subject: [PATCH] tests: fix up the astra integ tests and add vectorize tests
 (#2616)

* fix up the astra integ tests and add vectorize tests

* [autofix.ci] apply automated fixes

---------

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
---
 .../base/langflow/base/vectorstores/model.py  |   2 +-
 .../components/embeddings/AstraVectorize.py   |   6 +-
 .../components/embeddings/__init__.py         |   2 +
 .../integration/astra/test_astra_component.py | 157 ++++++++++++++++--
 4 files changed, 151 insertions(+), 16 deletions(-)

diff --git a/src/backend/base/langflow/base/vectorstores/model.py b/src/backend/base/langflow/base/vectorstores/model.py
index a00b56e99..ef48a1d35 100644
--- a/src/backend/base/langflow/base/vectorstores/model.py
+++ b/src/backend/base/langflow/base/vectorstores/model.py
@@ -88,7 +88,7 @@ class LCVectorStoreComponent(Component):
 
     def search_documents(self) -> List[Data]:
         """
-        Search for documents in the Chroma vector store.
+        Search for documents in the vector store.
         """
         search_query: str = self.search_query
         if not search_query:
diff --git a/src/backend/base/langflow/components/embeddings/AstraVectorize.py b/src/backend/base/langflow/components/embeddings/AstraVectorize.py
index 4c8914665..b68159cc9 100644
--- a/src/backend/base/langflow/components/embeddings/AstraVectorize.py
+++ b/src/backend/base/langflow/components/embeddings/AstraVectorize.py
@@ -4,7 +4,7 @@ from langflow.inputs.inputs import DictInput, SecretStrInput, MessageTextInput,
 from langflow.template.field.base import Output
 
 
-class AstraVectorize(Component):
+class AstraVectorizeComponent(Component):
     display_name: str = "Astra Vectorize"
     description: str = "Configuration options for Astra Vectorize server-side embeddings."
     documentation: str = "https://docs.datastax.com/en/astra-db-serverless/databases/embedding-generation.html"
@@ -92,7 +92,7 @@ class AstraVectorize(Component):
 
     def build_options(self) -> dict[str, Any]:
         provider_value = self.VECTORIZE_PROVIDERS_MAPPING[self.provider][0]
-        authentication = {**self.authentication}
+        authentication = {**(self.authentication or {})}
         api_key_name = self.api_key_name
         if api_key_name:
             authentication["providerKey"] = api_key_name
@@ -102,7 +102,7 @@ class AstraVectorize(Component):
                 "provider": provider_value,
                 "modelName": self.model_name,
                 "authentication": authentication,
-                "parameters": self.model_parameters,
+                "parameters": self.model_parameters or {},
             },
             "collection_embedding_api_key": self.provider_api_key,
         }
diff --git a/src/backend/base/langflow/components/embeddings/__init__.py b/src/backend/base/langflow/components/embeddings/__init__.py
index a55a13ffe..79f8173d1 100644
--- a/src/backend/base/langflow/components/embeddings/__init__.py
+++ b/src/backend/base/langflow/components/embeddings/__init__.py
@@ -1,4 +1,5 @@
 from .AmazonBedrockEmbeddings import AmazonBedrockEmbeddingsComponent
+from .AstraVectorize import AstraVectorizeComponent
 from .AzureOpenAIEmbeddings import AzureOpenAIEmbeddingsComponent
 from .CohereEmbeddings import CohereEmbeddingsComponent
 from .HuggingFaceEmbeddings import HuggingFaceEmbeddingsComponent
@@ -9,6 +10,7 @@ from .VertexAIEmbeddings import VertexAIEmbeddingsComponent
 
 __all__ = [
     "AmazonBedrockEmbeddingsComponent",
+    "AstraVectorizeComponent",
     "AzureOpenAIEmbeddingsComponent",
     "CohereEmbeddingsComponent",
     "HuggingFaceEmbeddingsComponent",
diff --git a/tests/integration/astra/test_astra_component.py b/tests/integration/astra/test_astra_component.py
index e8982c797..8dd7c6965 100644
--- a/tests/integration/astra/test_astra_component.py
+++ b/tests/integration/astra/test_astra_component.py
@@ -1,5 +1,7 @@
 import os
 
+from langchain_astradb import AstraDBVectorStore, CollectionVectorServiceOptions
+from langflow.components.embeddings.AstraVectorize import AstraVectorizeComponent
 import pytest
 from integration.utils import MockEmbeddings, check_env_vars
 from langchain_core.documents import Document
@@ -11,7 +13,10 @@ from langflow.schema.data import Data
 
 COLLECTION = "test_basic"
 SEARCH_COLLECTION = "test_search"
-MEMORY_COLLECTION = "test_memory"
+# MEMORY_COLLECTION = "test_memory"
+VECTORIZE_COLLECTION = "test_vectorize"
+VECTORIZE_COLLECTION_OPENAI = "test_vectorize_openai"
+VECTORIZE_COLLECTION_OPENAI_WITH_AUTH = "test_vectorize_openai_auth"
 
 
 @pytest.fixture()
@@ -77,24 +82,152 @@ def test_astra_embeds_and_search(astra_fixture):
         api_endpoint=api_endpoint,
         collection_name=SEARCH_COLLECTION,
         embedding=embedding,
-        inputs=records,
-        add_to_vector_store=True,
-    )
-    component.build_vector_store()
-
-    component.build(
-        token=application_token,
-        api_endpoint=api_endpoint,
-        collection_name=SEARCH_COLLECTION,
-        embedding=embedding,
-        input_value="test1",
+        ingest_data=records,
+        search_input="test1",
         number_of_results=1,
     )
+    component.build_vector_store()
     records = component.search_documents()
 
     assert len(records) == 1
 
 
+@pytest.mark.skipif(
+    not check_env_vars("ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_API_ENDPOINT"),
+    reason="missing astra env vars",
+)
+def test_astra_vectorize():
+    store = None
+    try:
+        options = {"provider": "nvidia", "modelName": "NV-Embed-QA", "parameters": {}, "authentication": {}}
+        store = AstraDBVectorStore(
+            collection_name=VECTORIZE_COLLECTION,
+            api_endpoint=os.getenv("ASTRA_DB_API_ENDPOINT"),
+            token=os.getenv("ASTRA_DB_APPLICATION_TOKEN"),
+            collection_vector_service_options=CollectionVectorServiceOptions.from_dict(options),
+        )
+
+        application_token = os.getenv("ASTRA_DB_APPLICATION_TOKEN")
+        api_endpoint = os.getenv("ASTRA_DB_API_ENDPOINT")
+
+        documents = [Document(page_content="test1"), Document(page_content="test2")]
+        records = [Data.from_document(d) for d in documents]
+
+        vectorize = AstraVectorizeComponent()
+        vectorize.build(provider="NVIDIA", model_name="NV-Embed-QA")
+        vectorize_options = vectorize.build_options()
+
+        component = AstraVectorStoreComponent()
+        component.build(
+            token=application_token,
+            api_endpoint=api_endpoint,
+            collection_name=VECTORIZE_COLLECTION,
+            ingest_data=records,
+            embedding=vectorize_options,
+            search_input="test",
+            number_of_results=2,
+        )
+        component.build_vector_store()
+        records = component.search_documents()
+
+        assert len(records) == 2
+    finally:
+        if store is not None:
+            store.delete_collection()
+
+
+@pytest.mark.skipif(
+    not check_env_vars("ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_API_ENDPOINT", "OPENAI_API_KEY"),
+    reason="missing env vars",
+)
+def test_astra_vectorize_with_provider_api_key():
+    """tests vectorize using an openai api key"""
+    store = None
+    try:
+        application_token = os.getenv("ASTRA_DB_APPLICATION_TOKEN")
+        api_endpoint = os.getenv("ASTRA_DB_API_ENDPOINT")
+        options = {"provider": "openai", "modelName": "text-embedding-3-small", "parameters": {}, "authentication": {}}
+        store = AstraDBVectorStore(
+            collection_name=VECTORIZE_COLLECTION_OPENAI,
+            api_endpoint=api_endpoint,
+            token=application_token,
+            collection_vector_service_options=CollectionVectorServiceOptions.from_dict(options),
+            collection_embedding_api_key=os.getenv("OPENAI_API_KEY"),
+        )
+        documents = [Document(page_content="test1"), Document(page_content="test2")]
+        records = [Data.from_document(d) for d in documents]
+
+        vectorize = AstraVectorizeComponent()
+        vectorize.build(
+            provider="OpenAI", model_name="text-embedding-3-small", provider_api_key=os.getenv("OPENAI_API_KEY")
+        )
+        vectorize_options = vectorize.build_options()
+
+        component = AstraVectorStoreComponent()
+        component.build(
+            token=application_token,
+            api_endpoint=api_endpoint,
+            collection_name=VECTORIZE_COLLECTION_OPENAI,
+            ingest_data=records,
+            embedding=vectorize_options,
+            search_input="test",
+        )
+        component.build_vector_store()
+        records = component.search_documents()
+        assert len(records) == 2
+    finally:
+        if store is not None:
+            store.delete_collection()
+
+
+@pytest.mark.skipif(
+    not check_env_vars("ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_API_ENDPOINT", "OPENAI_API_KEY"),
+    reason="missing env vars",
+)
+def test_astra_vectorize_passes_authentication():
+    """tests vectorize using the authentication parameter"""
+    store = None
+    try:
+        application_token = os.getenv("ASTRA_DB_APPLICATION_TOKEN")
+        api_endpoint = os.getenv("ASTRA_DB_API_ENDPOINT")
+        options = {
+            "provider": "openai",
+            "modelName": "text-embedding-3-small",
+            "parameters": {},
+            "authentication": {"providerKey": "providerKey"},
+        }
+        store = AstraDBVectorStore(
+            collection_name=VECTORIZE_COLLECTION_OPENAI_WITH_AUTH,
+            api_endpoint=api_endpoint,
+            token=application_token,
+            collection_vector_service_options=CollectionVectorServiceOptions.from_dict(options),
+        )
+        documents = [Document(page_content="test1"), Document(page_content="test2")]
+        records = [Data.from_document(d) for d in documents]
+
+        vectorize = AstraVectorizeComponent()
+        vectorize.build(
+            provider="OpenAI", model_name="text-embedding-3-small", authentication={"providerKey": "providerKey"}
+        )
+        vectorize_options = vectorize.build_options()
+
+        component = AstraVectorStoreComponent()
+        component.build(
+            token=application_token,
+            api_endpoint=api_endpoint,
+            collection_name=VECTORIZE_COLLECTION_OPENAI_WITH_AUTH,
+            ingest_data=records,
+            embedding=vectorize_options,
+            search_input="test",
+        )
+        component.build_vector_store()
+        records = component.search_documents()
+        assert len(records) == 2
+    finally:
+        if store is not None:
+            store.delete_collection()
+
+
 # @pytest.mark.skipif(
 #     not check_env_vars("ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_API_ENDPOINT"),
 #     reason="missing astra env vars",