build: fix astra integration tests (#3518)

2024-08-26 09:44:27 -07:00 · 2024-08-26 09:44:27 -07:00 · 75520729de
commit 75520729de
parent f4e96b113e
3 changed files with 33 additions and 90 deletions
--- a/.github/workflows/scheduled_integration_test.yml
+++ b/.github/workflows/scheduled_integration_test.yml
@ -14,14 +14,20 @@ env:
  POETRY_VERSION: "1.8.2"

 jobs:
-  setup-environment:
+  test-integration:
+    name: Run Integration Tests
    runs-on: ubuntu-latest
    strategy:
+      max-parallel: 1 # Currently, we can only run at a time for collection-per-db-constraints
      matrix:
        python-version:
          - "3.12"
          - "3.11"
          - "3.10"
+    env:
+      OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+      ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_API_ENDPOINT }}
+      ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
@ -37,26 +43,6 @@ jobs:
        run: |
          poetry env use ${{ matrix.python-version }}
          poetry install
-
-  test-integration:
-    needs: setup-environment
-    name: Run Integration Tests
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python-version:
-          - "3.12"
-          - "3.11"
-          - "3.10"
-    env:
-      OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-      ASTRA_DB_API_KEY: ${{ secrets.ASTRA_DB_API_KEY }}
-      ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ inputs.branch || github.ref }}
      - name: Run integration tests
        timeout-minutes: 12
        run: |
--- a/src/backend/tests/integration/astra/test_astra_component.py
+++ b/src/backend/tests/integration/astra/test_astra_component.py
@ -1,7 +1,7 @@
 import os

 import pytest
-from integration.utils import MockEmbeddings, check_env_vars
+from integration.utils import MockEmbeddings, check_env_vars, valid_nvidia_vectorize_region
 from langchain_core.documents import Document

 # from langflow.components.memories.AstraDBMessageReader import AstraDBMessageReaderComponent
@ -91,8 +91,9 @@ def test_astra_embeds_and_search(astra_fixture):


@pytest.mark.skipif(
-    not check_env_vars("ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_API_ENDPOINT"),
-    reason="missing astra env vars",
+    not check_env_vars("ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_API_ENDPOINT")
+    or not valid_nvidia_vectorize_region(os.getenv("ASTRA_DB_API_ENDPOINT")),
+    reason="missing env vars or invalid region for nvidia vectorize",
 )
 def test_astra_vectorize():
    from langchain_astradb import AstraDBVectorStore, CollectionVectorServiceOptions
@ -101,7 +102,7 @@ def test_astra_vectorize():

    store = None
    try:
-        options = {"provider": "nvidia", "modelName": "NV-Embed-QA", "parameters": {}, "authentication": {}}
+        options = {"provider": "nvidia", "modelName": "NV-Embed-QA"}
        store = AstraDBVectorStore(
            collection_name=VECTORIZE_COLLECTION,
            api_endpoint=os.getenv("ASTRA_DB_API_ENDPOINT"),
@ -177,6 +178,7 @@ def test_astra_vectorize_with_provider_api_key():
            ingest_data=records,
            embedding=vectorize_options,
            search_input="test",
+            number_of_results=4,
        )
        component.build_vector_store()
        records = component.search_documents()
@ -187,7 +189,7 @@ def test_astra_vectorize_with_provider_api_key():


@pytest.mark.skipif(
-    not check_env_vars("ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_API_ENDPOINT", "OPENAI_API_KEY"),
+    not check_env_vars("ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_API_ENDPOINT"),
    reason="missing env vars",
 )
 def test_astra_vectorize_passes_authentication():
@ -204,7 +206,7 @@ def test_astra_vectorize_passes_authentication():
            "provider": "openai",
            "modelName": "text-embedding-3-small",
            "parameters": {},
-            "authentication": {"providerKey": "providerKey"},
+            "authentication": {"providerKey": "apikey"},
        }
        store = AstraDBVectorStore(
            collection_name=VECTORIZE_COLLECTION_OPENAI_WITH_AUTH,
@ -217,7 +219,7 @@ def test_astra_vectorize_passes_authentication():

        vectorize = AstraVectorizeComponent()
        vectorize.build(
-            provider="OpenAI", model_name="text-embedding-3-small", authentication={"providerKey": "providerKey"}
+            provider="OpenAI", model_name="text-embedding-3-small", authentication={"providerKey": "apikey"}
        )
        vectorize_options = vectorize.build_options()

@ -236,65 +238,3 @@ def test_astra_vectorize_passes_authentication():
    finally:
        if store is not None:
            store.delete_collection()
-
-
-# @pytest.mark.skipif(
-#     not check_env_vars("ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_API_ENDPOINT"),
-#     reason="missing astra env vars",
-# )
-# def test_astra_memory():
-#     application_token = os.getenv("ASTRA_DB_APPLICATION_TOKEN")
-#     api_endpoint = os.getenv("ASTRA_DB_API_ENDPOINT")
-
-#     writer = AstraDBMessageWriterComponent()
-#     reader = AstraDBMessageReaderComponent()
-
-#     input_value = Data.from_document(
-#         Document(
-#             page_content="memory1",
-#             metadata={"session_id": 1, "sender": "human", "sender_name": "Bob"},
-#         )
-#     )
-#     writer.build(
-#         input_value=input_value,
-#         session_id=1,
-#         token=application_token,
-#         api_endpoint=api_endpoint,
-#         collection_name=MEMORY_COLLECTION,
-#     )
-
-#     # verify reading w/ same session id pulls the same record
-#     records = reader.build(
-#         session_id=1,
-#         token=application_token,
-#         api_endpoint=api_endpoint,
-#         collection_name=MEMORY_COLLECTION,
-#     )
-#     assert len(records) == 1
-#     assert isinstance(records[0], Data)
-#     content = records[0].get_text()
-#     assert content == "memory1"
-
-#     # verify reading w/ different session id does not pull the same record
-#     records = reader.build(
-#         session_id=2,
-#         token=application_token,
-#         api_endpoint=api_endpoint,
-#         collection_name=MEMORY_COLLECTION,
-#     )
-#     assert len(records) == 0
-
-#     # Cleanup store - doing here rather than fixture (see https://github.com/langchain-ai/langchain-datastax/pull/36)
-#     try:
-#         from langchain_astradb import AstraDBVectorStore
-#     except ImportError:
-#         raise ImportError(
-#             "Could not import langchain Astra DB integration package. Please install it with `pip install langchain-astradb`."
-#         )
-#     store = AstraDBVectorStore(
-#         collection_name=MEMORY_COLLECTION,
-#         embedding=MockEmbeddings(),
-#         api_endpoint=api_endpoint,
-#         token=application_token,
-#     )
-#     store.delete_collection()
--- a/src/backend/tests/integration/utils.py
+++ b/src/backend/tests/integration/utils.py
@ -1,6 +1,7 @@
 import os
 from typing import List

+from astrapy.admin import parse_api_endpoint
 from langflow.field_typing import Embeddings


@ -17,6 +18,22 @@ def check_env_vars(*vars):
    return all(os.getenv(var) for var in vars)


+def valid_nvidia_vectorize_region(api_endpoint: str) -> bool:
+    """
+    Check if the specified region is valid.
+
+    Args:
+    region (str): The region to check.
+
+    Returns:
+    bool: True if the region is contains hosted nvidia models, False otherwise.
+    """
+    parsed_endpoint = parse_api_endpoint(api_endpoint)
+    if not parsed_endpoint:
+        raise ValueError("Invalid ASTRA_DB_API_ENDPOINT")
+    return parsed_endpoint.region in ["us-east-2"]
+
+
 class MockEmbeddings(Embeddings):
    def __init__(self):
        self.embedded_documents = None