chore: refactor and add components integration tests (#3607)

* improve inegration tests * add fixes * [autofix.ci] apply automated fixes --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2024-09-02 15:21:47 +02:00 · 2024-09-02 15:21:47 +02:00 · 96872f3aa5
commit 96872f3aa5
parent 4ee25359a5
32 changed files with 523 additions and 131 deletions
--- a/.github/workflows/python_test.yml
+++ b/.github/workflows/python_test.yml
@ -32,8 +32,6 @@ jobs:
        python-version: ${{ fromJson(inputs.python-versions || '["3.10", "3.11", "3.12"]' ) }}
        splitCount: [5]
        group: [1, 2, 3, 4, 5]
-    env:
-      OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
    steps:
      - uses: actions/checkout@v4
        with:
@ -59,6 +57,28 @@ jobs:
          timeout_minutes: 12
          max_attempts: 2
          command: make unit_tests async=false args="--splits ${{ matrix.splitCount }} --group ${{ matrix.group }}"
+  integration-tests:
+    name: Integration Tests - Python ${{ matrix.python-version }}
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ${{ fromJson(inputs.python-versions || '["3.10", "3.11", "3.12"]' ) }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          ref: ${{ inputs.branch || github.ref }}
+      - name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
+        uses: "./.github/actions/poetry_caching"
+        with:
+          python-version: ${{ matrix.python-version }}
+          poetry-version: ${{ env.POETRY_VERSION }}
+          cache-key: ${{ runner.os }}-poetry-${{ env.POETRY_VERSION }}-${{ hashFiles('**/poetry.lock') }}
+      - name: Install Python dependencies
+        run: |
+          poetry env use ${{ matrix.python-version }}
+          poetry install
+      - name: Run integration tests
+        run: make integration_tests_no_api_keys

  test-cli:
    name: Test CLI - Python ${{ matrix.python-version }}
--- a/.github/workflows/scheduled_integration_test.yml
+++ b/.github/workflows/scheduled_integration_test.yml
@ -43,7 +43,7 @@ jobs:
        run: |
          poetry env use ${{ matrix.python-version }}
          poetry install
-      - name: Run integration tests
-        timeout-minutes: 12
+      - name: Run integration tests with api keys
+        timeout-minutes: 20
        run: |
-          make integration_tests
+          make integration_tests_api_keys
--- a/12
+++ b/12
@ -148,11 +148,21 @@ else
 		$(args)
 endif

-integration_tests: ## run integration tests
+integration_tests:
 	poetry run pytest src/backend/tests/integration \
 		--instafail -ra \
 		$(args)

+integration_tests_no_api_keys:
+	poetry run pytest src/backend/tests/integration \
+		--instafail -ra -m "not api_key_required" \
+		$(args)
+
+integration_tests_api_keys:
+	poetry run pytest src/backend/tests/integration \
+		--instafail -ra -m "api_key_required" \
+		$(args)
+
 tests: ## run unit, integration, coverage tests
 	@echo 'Running Unit Tests...'
 	make unit_tests
--- a/poetry.lock
+++ b/poetry.lock
@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.

 [[package]]
 name = "aenum"
@ -586,6 +586,20 @@ urllib3 = {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version >
 [package.extras]
 crt = ["awscrt (==0.21.2)"]

+[[package]]
+name = "bson"
+version = "0.5.10"
+description = "BSON codec for Python"
+optional = false
+python-versions = "*"
+files = [
+    {file = "bson-0.5.10.tar.gz", hash = "sha256:d6511b2ab051139a9123c184de1a04227262173ad593429d21e443d6462d6590"},
+]
+
+[package.dependencies]
+python-dateutil = ">=2.4.0"
+six = ">=1.9.0"
+
 [[package]]
 name = "build"
 version = "1.2.1"
@ -10417,6 +10431,11 @@ files = [
    {file = "triton-3.0.0-1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:34e509deb77f1c067d8640725ef00c5cbfcb2052a1a3cb6a6d343841f92624eb"},
    {file = "triton-3.0.0-1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bcbf3b1c48af6a28011a5c40a5b3b9b5330530c3827716b5fbf6d7adcc1e53e9"},
    {file = "triton-3.0.0-1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6e5727202f7078c56f91ff13ad0c1abab14a0e7f2c87e91b12b6f64f3e8ae609"},
+    {file = "triton-3.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39b052da883351fdf6be3d93cedae6db3b8e3988d3b09ed221bccecfa9612230"},
+    {file = "triton-3.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd34f19a8582af96e6291d4afce25dac08cb2a5d218c599163761e8e0827208e"},
+    {file = "triton-3.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d5e10de8c011adeb7c878c6ce0dd6073b14367749e34467f1cff2bde1b78253"},
+    {file = "triton-3.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8903767951bf86ec960b4fe4e21bc970055afc65e9d57e916d79ae3c93665e3"},
+    {file = "triton-3.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41004fb1ae9a53fcb3e970745feb87f0e3c94c6ce1ba86e95fa3b8537894bef7"},
 ]

 [package.dependencies]
@ -11807,4 +11826,4 @@ local = ["ctransformers", "llama-cpp-python", "sentence-transformers"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10,<3.13"
-content-hash = "333638ccf5ed00e21c85a3572486cb62bb7012b1e548d8c7dc2f9e89d1900beb"
+content-hash = "d6fd6b327ba3ded7e8eefd2505c0cc6f15d4a5a9f1fd34020dc25324e9f13be1"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -105,6 +105,7 @@ astra-assistants = "^2.1.0.10"
 composio-langchain = "^0.5.8"
 spider-client = "^0.0.27"
 nltk = "^3.9.1"
+bson = "^0.5.10"


 [tool.poetry.group.dev.dependencies]
--- a/src/backend/base/langflow/custom/custom_component/component.py
+++ b/src/backend/base/langflow/custom/custom_component/component.py
@ -263,7 +263,7 @@ class Component(CustomComponent):
        for input_ in inputs:
            if input_.name is None:
                raise ValueError("Input name cannot be None.")
-            self._inputs[input_.name] = input_
+            self._inputs[input_.name] = deepcopy(input_)

    def validate(self, params: dict):
        """
@ -496,6 +496,8 @@ class Component(CustomComponent):
        #! works and then update this later
        field_config = self.get_template_config(self)
        frontend_node = ComponentFrontendNode.from_inputs(**field_config)
+        for key, value in self._inputs.items():
+            frontend_node.set_field_load_from_db_in_template(key, False)
        self._map_parameters_on_frontend_node(frontend_node)

        frontend_node_dict = frontend_node.to_dict(keep_name=False)
@ -532,7 +534,9 @@ class Component(CustomComponent):
            "data": {
                "node": frontend_node.to_dict(keep_name=False),
                "type": self.name or self.__class__.__name__,
-            }
+                "id": self._id,
+            },
+            "id": self._id,
        }
        return data

--- a/src/backend/base/langflow/graph/graph/base.py
+++ b/src/backend/base/langflow/graph/graph/base.py
@ -202,33 +202,35 @@ class Graph:
        self._edges = self._graph_data["edges"]
        self.initialize()

-    def add_component(self, _id: str, component: "Component"):
-        if _id in self.vertex_map:
-            return
+    def add_component(self, component: "Component", component_id: Optional[str] = None) -> str:
+        component_id = component_id or component._id
+        if component_id in self.vertex_map:
+            return component_id
+        component._id = component_id
+        if component_id in self.vertex_map:
+            raise ValueError(f"Component ID {component_id} already exists")
        frontend_node = component.to_frontend_node()
-        frontend_node["data"]["id"] = _id
-        frontend_node["id"] = _id
        self._vertices.append(frontend_node)
        vertex = self._create_vertex(frontend_node)
        vertex.add_component_instance(component)
-        self.vertices.append(vertex)
-        self.vertex_map[_id] = vertex
-
+        self._add_vertex(vertex)
        if component._edges:
            for edge in component._edges:
                self._add_edge(edge)

        if component._components:
            for _component in component._components:
-                self.add_component(_component._id, _component)
+                self.add_component(_component)
+
+        return component_id

    def _set_start_and_end(self, start: "Component", end: "Component"):
        if not hasattr(start, "to_frontend_node"):
            raise TypeError(f"start must be a Component. Got {type(start)}")
        if not hasattr(end, "to_frontend_node"):
            raise TypeError(f"end must be a Component. Got {type(end)}")
-        self.add_component(start._id, start)
-        self.add_component(end._id, end)
+        self.add_component(start, start._id)
+        self.add_component(end, end._id)

    def add_component_edge(self, source_id: str, output_input_tuple: tuple[str, str], target_id: str):
        source_vertex = self.get_vertex(source_id)
@ -242,6 +244,18 @@ class Graph:
            raise ValueError(f"Source vertex {source_id} does not have a custom component.")
        if target_vertex._custom_component is None:
            raise ValueError(f"Target vertex {target_id} does not have a custom component.")
+
+        try:
+            input_field = target_vertex.get_input(input_name)
+            input_types = input_field.input_types
+            input_field_type = str(input_field.field_type)
+        except ValueError:
+            input_field = target_vertex.data.get("node", {}).get("template", {}).get(input_name)
+            if not input_field:
+                raise ValueError(f"Input field {input_name} not found in target vertex {target_id}")
+            input_types = input_field.get("input_types", [])
+            input_field_type = input_field.get("type", "")
+
        edge_data: EdgeData = {
            "source": source_id,
            "target": target_id,
@ -256,8 +270,8 @@ class Graph:
                "targetHandle": {
                    "fieldName": input_name,
                    "id": target_vertex.id,
-                    "inputTypes": target_vertex.get_input(input_name).input_types,
-                    "type": str(target_vertex.get_input(input_name).field_type),
+                    "inputTypes": input_types,
+                    "type": input_field_type,
                },
            },
        }
@ -1397,7 +1411,7 @@ class Graph:
                tasks.append(task)
                vertex_task_run_count[vertex_id] = vertex_task_run_count.get(vertex_id, 0) + 1

-            logger.debug(f"Running layer {layer_index} with {len(tasks)} tasks")
+            logger.debug(f"Running layer {layer_index} with {len(tasks)} tasks, {current_batch}")
            try:
                next_runnable_vertices = await self._execute_tasks(tasks, lock=lock)
            except Exception as e:
@ -1463,6 +1477,8 @@ class Graph:
            # This could usually happen with input vertices like ChatInput
            self.run_manager.remove_vertex_from_runnables(v.id)

+            logger.debug(f"Vertex {v.id}, result: {v._built_result}, object: {v._built_object}")
+
        for v in vertices:
            next_runnable_vertices = await self.get_next_runnable_vertices(lock, vertex=v, cache=False)
            results.extend(next_runnable_vertices)
--- a/src/backend/base/langflow/interface/initialize/loading.py
+++ b/src/backend/base/langflow/interface/initialize/loading.py
@ -112,8 +112,8 @@ def update_params_with_load_from_db_fields(
                try:
                    key = custom_component.variables(params[field], field)
                except ValueError as e:
-                    # check if "User id is not set" is in the error message
-                    if "User id is not set" in str(e) and not fallback_to_env_vars:
+                    # check if "User id is not set" is in the error message, this is an internal bug
+                    if "User id is not set" in str(e):
                        raise e
                    logger.debug(str(e))
                if fallback_to_env_vars and key is None:
--- a/src/backend/base/langflow/main.py
+++ b/src/backend/base/langflow/main.py
@ -25,7 +25,6 @@ from langflow.initial_setup.setup import (
    create_or_update_starter_projects,
    initialize_super_user_if_needed,
    load_flows_from_directory,
-    download_nltk_resources,
 )
 from langflow.interface.types import get_and_cache_all_types_dict
 from langflow.interface.utils import setup_llm_caching
@ -182,7 +181,7 @@ def create_app():
    FastAPIInstrumentor.instrument_app(app)

    # Get necessary NLTK packages
-    download_nltk_resources()
+    # download_nltk_resources()

    return app

--- a/src/backend/base/langflow/template/frontend_node/base.py
+++ b/src/backend/base/langflow/template/frontend_node/base.py
@ -184,3 +184,9 @@ class FrontendNode(BaseModel):
            if field.name == field_name:
                field.value = value
                break
+
+    def set_field_load_from_db_in_template(self, field_name, value):
+        for field in self.template.fields:
+            if field.name == field_name and hasattr(field, "load_from_db"):
+                field.load_from_db = value
+                break
--- a/src/backend/tests/integration/astra/init.py
+++ b/src/backend/tests/integration/astra/init.py
--- a/src/backend/tests/api_keys.py
+++ b/src/backend/tests/api_keys.py
@ -0,0 +1,34 @@
+import os.path
+
+# we need to import tmpdir
+
+
+def get_required_env_var(var: str) -> str:
+    """
+    Get the value of the specified environment variable.
+
+    Args:
+    var (str): The environment variable to get.
+
+    Returns:
+    str: The value of the environment variable.
+
+    Raises:
+    ValueError: If the environment variable is not set.
+    """
+    value = os.getenv(var)
+    if not value:
+        raise ValueError(f"Environment variable {var} is not set")
+    return value
+
+
+def get_openai_api_key() -> str:
+    return get_required_env_var("OPENAI_API_KEY")
+
+
+def get_astradb_application_token() -> str:
+    return get_required_env_var("ASTRA_DB_APPLICATION_TOKEN")
+
+
+def get_astradb_api_endpoint() -> str:
+    return get_required_env_var("ASTRA_DB_API_ENDPOINT")
--- a/src/backend/tests/conftest.py
+++ b/src/backend/tests/conftest.py
@ -27,6 +27,7 @@ from langflow.services.database.models.folder.model import Folder
 from langflow.services.database.models.user.model import User, UserCreate
 from langflow.services.database.utils import session_getter
 from langflow.services.deps import get_db_service
+from tests.api_keys import get_openai_api_key

 if TYPE_CHECKING:
    from langflow.services.database.service import DatabaseService
@ -463,6 +464,8 @@ def get_starter_project(active_user):
        if not flow:
            raise ValueError("No starter project found")

+        # ensure openai api key is set
+        get_openai_api_key()
        new_flow_create = FlowCreate(
            name=flow.name,
            description=flow.description,
--- a/src/backend/tests/integration/backward_compatibility/test_starter_projects.py
+++ b/src/backend/tests/integration/backward_compatibility/test_starter_projects.py
@ -0,0 +1,16 @@
+import pytest
+from langflow.schema.message import Message
+from tests.api_keys import get_openai_api_key
+from tests.integration.utils import download_flow_from_github, run_json_flow
+
+
+@pytest.mark.asyncio
+@pytest.mark.api_key_required
+async def test_1_0_15_basic_prompting():
+    api_key = get_openai_api_key()
+    json_flow = download_flow_from_github("Basic Prompting (Hello, World)", "1.0.15")
+    json_flow.set_value(json_flow.get_component_by_type("OpenAIModel"), "api_key", api_key)
+    outputs = await run_json_flow(json_flow, run_input="my name is bob, say hello!")
+    assert isinstance(outputs["message"], Message)
+    response = outputs["message"].text.lower()
+    assert "arr" in response or "ahoy" in response
--- a/src/backend/tests/integration/components/astra/init.py
+++ b/src/backend/tests/integration/components/astra/init.py
--- a/src/backend/tests/integration/components/astra/test_astra_component.py
+++ b/src/backend/tests/integration/components/astra/test_astra_component.py
@ -1,118 +1,129 @@
 import os
+from typing import List

+from astrapy.db import AstraDB
 import pytest
-from integration.utils import MockEmbeddings, check_env_vars, valid_nvidia_vectorize_region
+
+from langflow.components.embeddings import OpenAIEmbeddingsComponent
+from langflow.custom import Component
+from langflow.inputs import StrInput
+from langflow.template import Output
+from tests.api_keys import get_astradb_application_token, get_astradb_api_endpoint, get_openai_api_key
+from tests.integration.utils import ComponentInputHandle
 from langchain_core.documents import Document

-# from langflow.components.memories.AstraDBMessageReader import AstraDBMessageReaderComponent
-# from langflow.components.memories.AstraDBMessageWriter import AstraDBMessageWriterComponent
+
 from langflow.components.vectorstores.AstraDB import AstraVectorStoreComponent
 from langflow.schema.data import Data
+from tests.integration.utils import run_single_component

-COLLECTION = "test_basic"
+BASIC_COLLECTION = "test_basic"
 SEARCH_COLLECTION = "test_search"
 # MEMORY_COLLECTION = "test_memory"
 VECTORIZE_COLLECTION = "test_vectorize"
 VECTORIZE_COLLECTION_OPENAI = "test_vectorize_openai"
 VECTORIZE_COLLECTION_OPENAI_WITH_AUTH = "test_vectorize_openai_auth"
+ALL_COLLECTIONS = [
+    BASIC_COLLECTION,
+    SEARCH_COLLECTION,
+    # MEMORY_COLLECTION,
+    VECTORIZE_COLLECTION,
+    VECTORIZE_COLLECTION_OPENAI,
+    VECTORIZE_COLLECTION_OPENAI_WITH_AUTH,
+]


@pytest.fixture()
-def astra_fixture(request):
-    """
-    Sets up the astra collection and cleans up after
-    """
-    try:
-        from langchain_astradb import AstraDBVectorStore
-    except ImportError:
-        raise ImportError(
-            "Could not import langchain Astra DB integration package. Please install it with `pip install langchain-astradb`."
-        )
+def astradb_client(request):
+    client = AstraDB(api_endpoint=get_astradb_api_endpoint(), token=get_astradb_application_token())
+    yield client
+    for collection in ALL_COLLECTIONS:
+        client.delete_collection(collection)

-    store = AstraDBVectorStore(
-        collection_name=request.param,
-        embedding=MockEmbeddings(),
-        api_endpoint=os.getenv("ASTRA_DB_API_ENDPOINT"),
-        token=os.getenv("ASTRA_DB_APPLICATION_TOKEN"),
+
+@pytest.mark.api_key_required
+@pytest.mark.asyncio
+async def test_base(astradb_client: AstraDB):
+    from langflow.components.embeddings import OpenAIEmbeddingsComponent
+
+    application_token = get_astradb_application_token()
+    api_endpoint = get_astradb_api_endpoint()
+
+    results = await run_single_component(
+        AstraVectorStoreComponent,
+        inputs={
+            "token": application_token,
+            "api_endpoint": api_endpoint,
+            "collection_name": BASIC_COLLECTION,
+            "embedding": ComponentInputHandle(
+                clazz=OpenAIEmbeddingsComponent,
+                inputs={"openai_api_key": get_openai_api_key()},
+                output_name="embeddings",
+            ),
+        },
    )
+    from langchain_core.vectorstores import VectorStoreRetriever

-    yield
-
-    store.delete_collection()
+    assert isinstance(results["base_retriever"], VectorStoreRetriever)
+    assert results["vector_store"] is not None
+    assert results["search_results"] == []
+    assert astradb_client.collection(BASIC_COLLECTION)


-@pytest.mark.skipif(
-    not check_env_vars("ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_API_ENDPOINT"),
-    reason="missing astra env vars",
-)
-@pytest.mark.parametrize("astra_fixture", [COLLECTION], indirect=True)
-def test_astra_setup(astra_fixture):
-    application_token = os.getenv("ASTRA_DB_APPLICATION_TOKEN")
-    api_endpoint = os.getenv("ASTRA_DB_API_ENDPOINT")
-    embedding = MockEmbeddings()
+class TextToData(Component):
+    inputs = [StrInput(name="text_data", is_list=True)]
+    outputs = [Output(name="data", display_name="Data", method="create_data")]

-    component = AstraVectorStoreComponent()
-    component.build(
-        token=application_token,
-        api_endpoint=api_endpoint,
-        collection_name=COLLECTION,
-        embedding=embedding,
+    def create_data(self) -> List[Data]:
+        return [Data(text=t) for t in self.text_data]
+
+
+@pytest.mark.api_key_required
+@pytest.mark.asyncio
+async def test_astra_embeds_and_search():
+    application_token = get_astradb_application_token()
+    api_endpoint = get_astradb_api_endpoint()
+
+    results = await run_single_component(
+        AstraVectorStoreComponent,
+        inputs={
+            "token": application_token,
+            "api_endpoint": api_endpoint,
+            "collection_name": BASIC_COLLECTION,
+            "number_of_results": 1,
+            "search_input": "test1",
+            "ingest_data": ComponentInputHandle(
+                clazz=TextToData, inputs={"text_data": ["test1", "test2"]}, output_name="data"
+            ),
+            "embedding": ComponentInputHandle(
+                clazz=OpenAIEmbeddingsComponent,
+                inputs={"openai_api_key": get_openai_api_key()},
+                output_name="embeddings",
+            ),
+        },
    )
-    component.build_vector_store()
+    assert len(results["search_results"]) == 1


-@pytest.mark.skipif(
-    not check_env_vars("ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_API_ENDPOINT"),
-    reason="missing astra env vars",
-)
-@pytest.mark.parametrize("astra_fixture", [SEARCH_COLLECTION], indirect=True)
-def test_astra_embeds_and_search(astra_fixture):
-    application_token = os.getenv("ASTRA_DB_APPLICATION_TOKEN")
-    api_endpoint = os.getenv("ASTRA_DB_API_ENDPOINT")
-    embedding = MockEmbeddings()
-
-    documents = [Document(page_content="test1"), Document(page_content="test2")]
-    records = [Data.from_document(d) for d in documents]
-
-    component = AstraVectorStoreComponent()
-    component.build(
-        token=application_token,
-        api_endpoint=api_endpoint,
-        collection_name=SEARCH_COLLECTION,
-        embedding=embedding,
-        ingest_data=records,
-        search_input="test1",
-        number_of_results=1,
-    )
-    component.build_vector_store()
-    records = component.search_documents()
-
-    assert len(records) == 1
-
-
-@pytest.mark.skipif(
-    not check_env_vars("ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_API_ENDPOINT")
-    or not valid_nvidia_vectorize_region(os.getenv("ASTRA_DB_API_ENDPOINT")),
-    reason="missing env vars or invalid region for nvidia vectorize",
-)
+@pytest.mark.api_key_required
 def test_astra_vectorize():
    from langchain_astradb import AstraDBVectorStore, CollectionVectorServiceOptions

    from langflow.components.embeddings.AstraVectorize import AstraVectorizeComponent

+    application_token = get_astradb_application_token()
+    api_endpoint = get_astradb_api_endpoint()
+
    store = None
    try:
        options = {"provider": "nvidia", "modelName": "NV-Embed-QA"}
        store = AstraDBVectorStore(
            collection_name=VECTORIZE_COLLECTION,
-            api_endpoint=os.getenv("ASTRA_DB_API_ENDPOINT"),
-            token=os.getenv("ASTRA_DB_APPLICATION_TOKEN"),
+            api_endpoint=api_endpoint,
+            token=application_token,
            collection_vector_service_options=CollectionVectorServiceOptions.from_dict(options),
        )

-        application_token = os.getenv("ASTRA_DB_APPLICATION_TOKEN")
-        api_endpoint = os.getenv("ASTRA_DB_API_ENDPOINT")
-
        documents = [Document(page_content="test1"), Document(page_content="test2")]
        records = [Data.from_document(d) for d in documents]

@ -139,20 +150,18 @@ def test_astra_vectorize():
            store.delete_collection()


-@pytest.mark.skipif(
-    not check_env_vars("ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_API_ENDPOINT", "OPENAI_API_KEY"),
-    reason="missing env vars",
-)
+@pytest.mark.api_key_required
 def test_astra_vectorize_with_provider_api_key():
    """tests vectorize using an openai api key"""
    from langchain_astradb import AstraDBVectorStore, CollectionVectorServiceOptions

    from langflow.components.embeddings.AstraVectorize import AstraVectorizeComponent

+    application_token = get_astradb_application_token()
+    api_endpoint = get_astradb_api_endpoint()
+
    store = None
    try:
-        application_token = os.getenv("ASTRA_DB_APPLICATION_TOKEN")
-        api_endpoint = os.getenv("ASTRA_DB_API_ENDPOINT")
        options = {"provider": "openai", "modelName": "text-embedding-3-small", "parameters": {}, "authentication": {}}
        store = AstraDBVectorStore(
            collection_name=VECTORIZE_COLLECTION_OPENAI,
@ -188,10 +197,7 @@ def test_astra_vectorize_with_provider_api_key():
            store.delete_collection()


-@pytest.mark.skipif(
-    not check_env_vars("ASTRA_DB_APPLICATION_TOKEN", "ASTRA_DB_API_ENDPOINT"),
-    reason="missing env vars",
-)
+@pytest.mark.api_key_required
 def test_astra_vectorize_passes_authentication():
    """tests vectorize using the authentication parameter"""
    from langchain_astradb import AstraDBVectorStore, CollectionVectorServiceOptions
@ -200,8 +206,8 @@ def test_astra_vectorize_passes_authentication():

    store = None
    try:
-        application_token = os.getenv("ASTRA_DB_APPLICATION_TOKEN")
-        api_endpoint = os.getenv("ASTRA_DB_API_ENDPOINT")
+        application_token = get_astradb_application_token()
+        api_endpoint = get_astradb_api_endpoint()
        options = {
            "provider": "openai",
            "modelName": "text-embedding-3-small",
--- a/src/backend/tests/integration/components/inputs/init.py
+++ b/src/backend/tests/integration/components/inputs/init.py
--- a/src/backend/tests/integration/components/inputs/test_chat_input.py
+++ b/src/backend/tests/integration/components/inputs/test_chat_input.py
@ -0,0 +1,55 @@
+from langflow.memory import get_messages
+from langflow.schema.message import Message
+from tests.integration.utils import run_single_component
+
+from langflow.components.inputs import ChatInput
+import pytest
+
+
+@pytest.mark.asyncio
+async def test_default():
+    outputs = await run_single_component(ChatInput, run_input="hello")
+    assert isinstance(outputs["message"], Message)
+    assert outputs["message"].text == "hello"
+    assert outputs["message"].sender == "User"
+    assert outputs["message"].sender_name == "User"
+
+    outputs = await run_single_component(ChatInput, run_input="")
+    assert isinstance(outputs["message"], Message)
+    assert outputs["message"].text == ""
+    assert outputs["message"].sender == "User"
+    assert outputs["message"].sender_name == "User"
+
+
+@pytest.mark.asyncio
+async def test_sender():
+    outputs = await run_single_component(
+        ChatInput, inputs={"sender": "Machine", "sender_name": "AI"}, run_input="hello"
+    )
+    assert isinstance(outputs["message"], Message)
+    assert outputs["message"].text == "hello"
+    assert outputs["message"].sender == "Machine"
+    assert outputs["message"].sender_name == "AI"
+
+
+@pytest.mark.asyncio
+async def test_do_not_store_messages():
+    session_id = "test-session-id"
+    outputs = await run_single_component(
+        ChatInput, inputs={"should_store_message": True}, run_input="hello", session_id=session_id
+    )
+    assert isinstance(outputs["message"], Message)
+    assert outputs["message"].text == "hello"
+    assert outputs["message"].session_id == session_id
+
+    assert len(get_messages(session_id=session_id)) == 1
+
+    session_id = "test-session-id-another"
+    outputs = await run_single_component(
+        ChatInput, inputs={"should_store_message": False}, run_input="hello", session_id=session_id
+    )
+    assert isinstance(outputs["message"], Message)
+    assert outputs["message"].text == "hello"
+    assert outputs["message"].session_id == session_id
+
+    assert len(get_messages(session_id=session_id)) == 0
--- a/src/backend/tests/integration/components/outputs/init.py
+++ b/src/backend/tests/integration/components/outputs/init.py
--- a/src/backend/tests/integration/components/outputs/test_chat_output.py
+++ b/src/backend/tests/integration/components/outputs/test_chat_output.py
@ -0,0 +1,45 @@
+from langflow.components.outputs import ChatOutput
+from langflow.memory import get_messages
+from langflow.schema.message import Message
+from tests.integration.utils import run_single_component
+
+import pytest
+
+
+@pytest.mark.asyncio
+async def test_string():
+    outputs = await run_single_component(ChatOutput, inputs={"input_value": "hello"})
+    assert isinstance(outputs["message"], Message)
+    assert outputs["message"].text == "hello"
+    assert outputs["message"].sender == "Machine"
+    assert outputs["message"].sender_name == "AI"
+
+
+@pytest.mark.asyncio
+async def test_message():
+    outputs = await run_single_component(ChatOutput, inputs={"input_value": Message(text="hello")})
+    assert isinstance(outputs["message"], Message)
+    assert outputs["message"].text == "hello"
+    assert outputs["message"].sender == "Machine"
+    assert outputs["message"].sender_name == "AI"
+
+
+@pytest.mark.asyncio
+async def test_do_not_store_message():
+    session_id = "test-session-id"
+    outputs = await run_single_component(
+        ChatOutput, inputs={"input_value": "hello", "should_store_message": True}, session_id=session_id
+    )
+    assert isinstance(outputs["message"], Message)
+    assert outputs["message"].text == "hello"
+
+    assert len(get_messages(session_id=session_id)) == 1
+    session_id = "test-session-id-another"
+
+    outputs = await run_single_component(
+        ChatOutput, inputs={"input_value": "hello", "should_store_message": False}, session_id=session_id
+    )
+    assert isinstance(outputs["message"], Message)
+    assert outputs["message"].text == "hello"
+
+    assert len(get_messages(session_id=session_id)) == 0
--- a/src/backend/tests/integration/components/prompts/init.py
+++ b/src/backend/tests/integration/components/prompts/init.py
--- a/src/backend/tests/integration/components/prompts/test_prompt.py
+++ b/src/backend/tests/integration/components/prompts/test_prompt.py
@ -0,0 +1,13 @@
+from langflow.components.prompts import PromptComponent
+from langflow.schema.message import Message
+from tests.integration.utils import run_single_component
+
+import pytest
+
+
+@pytest.mark.asyncio
+async def test():
+    outputs = await run_single_component(PromptComponent, inputs={"template": "test {var1}", "var1": "from the var"})
+    print(outputs)
+    assert isinstance(outputs["prompt"], Message)
+    assert outputs["prompt"].text == "test from the var"
--- a/src/backend/tests/integration/flows/test_basic_prompting.py
+++ b/src/backend/tests/integration/flows/test_basic_prompting.py
@ -0,0 +1,22 @@
+import pytest
+
+from langflow.components.inputs import ChatInput
+from langflow.components.outputs import ChatOutput
+from langflow.components.prompts import PromptComponent
+from langflow.graph import Graph
+from langflow.schema.message import Message
+from tests.integration.utils import run_flow
+
+
+@pytest.mark.asyncio
+async def test_simple_no_llm():
+    graph = Graph()
+    input = graph.add_component(ChatInput())
+    output = graph.add_component(ChatOutput())
+    component = PromptComponent(template="This is the message: {var1}", var1="")
+    prompt = graph.add_component(component)
+    graph.add_component_edge(input, ("message", "var1"), prompt)
+    graph.add_component_edge(prompt, ("prompt", "input_value"), output)
+    outputs = await run_flow(graph, run_input="hello!")
+    assert isinstance(outputs["message"], Message)
+    assert outputs["message"].text == "This is the message: hello!"
--- a/src/backend/tests/integration/test_misc.py
+++ b/src/backend/tests/integration/test_misc.py
--- a/src/backend/tests/integration/utils.py
+++ b/src/backend/tests/integration/utils.py
@ -1,7 +1,16 @@
+import dataclasses
 import os
+import uuid
+from typing import Optional, Any

 from astrapy.admin import parse_api_endpoint
+
+from langflow.api.v1.schemas import InputValueRequest
+from langflow.custom import Component
 from langflow.field_typing import Embeddings
+from langflow.graph import Graph
+from langflow.processing.process import run_graph_internal
+import requests


 def check_env_vars(*vars):
@ -49,3 +58,115 @@ class MockEmbeddings(Embeddings):
    def embed_query(self, text: str) -> list[float]:
        self.embedded_query = text
        return self.mock_embedding(text)
+
+
+@dataclasses.dataclass
+class JSONFlow:
+    json: dict
+
+    def get_components_by_type(self, component_type):
+        result = []
+        for node in self.json["data"]["nodes"]:
+            if node["data"]["type"] == component_type:
+                result.append(node["id"])
+        if not result:
+            raise ValueError(
+                f"Component of type {component_type} not found, available types: {', '.join(set(node['data']['type'] for node in self.json['data']['nodes']))}"
+            )
+        return result
+
+    def get_component_by_type(self, component_type):
+        components = self.get_components_by_type(component_type)
+        if len(components) > 1:
+            raise ValueError(f"Multiple components of type {component_type} found")
+        return components[0]
+
+    def set_value(self, component_id, key, value):
+        done = False
+        for node in self.json["data"]["nodes"]:
+            if node["id"] == component_id:
+                if key not in node["data"]["node"]["template"]:
+                    raise ValueError(f"Component {component_id} does not have input {key}")
+                node["data"]["node"]["template"][key]["value"] = value
+                node["data"]["node"]["template"][key]["load_from_db"] = False
+                done = True
+                break
+        if not done:
+            raise ValueError(f"Component {component_id} not found")
+
+
+def download_flow_from_github(name: str, version: str) -> JSONFlow:
+    response = requests.get(
+        f"https://raw.githubusercontent.com/langflow-ai/langflow/v{version}/src/backend/base/langflow/initial_setup/starter_projects/{name}.json"
+    )
+    response.raise_for_status()
+    as_json = response.json()
+    return JSONFlow(json=as_json)
+
+
+async def run_json_flow(
+    json_flow: JSONFlow, run_input: Optional[Any] = None, session_id: Optional[str] = None
+) -> dict[str, Any]:
+    graph = Graph.from_payload(json_flow.json)
+    return await run_flow(graph, run_input, session_id)
+
+
+async def run_flow(graph: Graph, run_input: Optional[Any] = None, session_id: Optional[str] = None) -> dict[str, Any]:
+    graph.prepare()
+    if run_input:
+        graph_run_inputs = [InputValueRequest(input_value=run_input, type="chat")]
+    else:
+        graph_run_inputs = []
+
+    flow_id = str(uuid.uuid4())
+
+    results, _ = await run_graph_internal(graph, flow_id, session_id=session_id, inputs=graph_run_inputs)
+    outputs = {}
+    for r in results:
+        for out in r.outputs:
+            outputs |= out.results
+    return outputs
+
+
+@dataclasses.dataclass
+class ComponentInputHandle:
+    clazz: type
+    inputs: dict
+    output_name: str
+
+
+async def run_single_component(
+    clazz: type, inputs: dict = None, run_input: Optional[Any] = None, session_id: Optional[str] = None
+) -> dict[str, Any]:
+    user_id = str(uuid.uuid4())
+    flow_id = str(uuid.uuid4())
+    graph = Graph(user_id=user_id, flow_id=flow_id)
+
+    def _add_component(clazz: type, inputs: Optional[dict] = None) -> str:
+        raw_inputs = {}
+        if inputs:
+            for key, value in inputs.items():
+                if not isinstance(value, ComponentInputHandle):
+                    raw_inputs[key] = value
+                if isinstance(value, Component):
+                    raise ValueError("Component inputs must be wrapped in ComponentInputHandle")
+        component = clazz(**raw_inputs, _user_id=user_id)
+        component_id = graph.add_component(component)
+        if inputs:
+            for input_name, handle in inputs.items():
+                if isinstance(handle, ComponentInputHandle):
+                    handle_component_id = _add_component(handle.clazz, handle.inputs)
+                    graph.add_component_edge(handle_component_id, (handle.output_name, input_name), component_id)
+        return component_id
+
+    component_id = _add_component(clazz, inputs)
+    graph.prepare()
+    if run_input:
+        graph_run_inputs = [InputValueRequest(input_value=run_input, type="chat")]
+    else:
+        graph_run_inputs = []
+
+    _, _ = await run_graph_internal(
+        graph, flow_id, session_id=session_id, inputs=graph_run_inputs, outputs=[component_id]
+    )
+    return graph.get_vertex(component_id)._built_object
--- a/src/backend/tests/unit/graph/graph/test_base.py
+++ b/src/backend/tests/unit/graph/graph/test_base.py
@ -21,8 +21,8 @@ async def test_graph_not_prepared():
    chat_input = ChatInput()
    chat_output = ChatOutput()
    graph = Graph()
-    graph.add_component("chat_input", chat_input)
-    graph.add_component("chat_output", chat_output)
+    graph.add_component(chat_input)
+    graph.add_component(chat_output)
    with pytest.raises(ValueError):
        await graph.astep()

@ -32,8 +32,8 @@ async def test_graph():
    chat_input = ChatInput()
    chat_output = ChatOutput()
    graph = Graph()
-    graph.add_component("chat_input", chat_input)
-    graph.add_component("chat_output", chat_output)
+    graph.add_component(chat_input)
+    graph.add_component(chat_output)
    with pytest.warns(UserWarning, match="Graph has vertices but no edges"):
        graph.prepare()

@ -43,18 +43,20 @@ async def test_graph_with_edge():
    chat_input = ChatInput()
    chat_output = ChatOutput()
    graph = Graph()
-    graph.add_component("chat_input", chat_input)
-    graph.add_component("chat_output", chat_output)
-    graph.add_component_edge("chat_input", (chat_input.outputs[0].name, chat_input.inputs[0].name), "chat_output")
+    input_id = graph.add_component(chat_input)
+    output_id = graph.add_component(chat_output)
+    graph.add_component_edge(input_id, (chat_input.outputs[0].name, chat_input.inputs[0].name), output_id)
    graph.prepare()
-    assert graph._run_queue == deque(["chat_input"])
+    # ensure prepare is idempotent
+    graph.prepare()
+    assert graph._run_queue == deque([input_id])
    await graph.astep()
-    assert graph._run_queue == deque(["chat_output"])
+    assert graph._run_queue == deque([output_id])

-    assert graph.vertices[0].id == "chat_input"
-    assert graph.vertices[1].id == "chat_output"
-    assert graph.edges[0].source_id == "chat_input"
-    assert graph.edges[0].target_id == "chat_output"
+    assert graph.vertices[0].id == input_id
+    assert graph.vertices[1].id == output_id
+    assert graph.edges[0].source_id == input_id
+    assert graph.edges[0].target_id == output_id


@pytest.mark.asyncio
--- a/src/backend/tests/unit/initial_setup/starter_projects/test_vector_store_rag.py
+++ b/src/backend/tests/unit/initial_setup/starter_projects/test_vector_store_rag.py
@ -135,6 +135,7 @@ def test_vector_store_rag_dump_components_and_edges(ingestion_graph, rag_graph):

    ingestion_data = ingestion_graph_dump["data"]
    ingestion_nodes = ingestion_data["nodes"]
+    assert len(ingestion_nodes) == 4
    ingestion_edges = ingestion_data["edges"]

    # Sort nodes by id to check components
--- a/src/backend/tests/unit/test_endpoints.py
+++ b/src/backend/tests/unit/test_endpoints.py
@ -427,7 +427,6 @@ def test_build_vertex_invalid_vertex_id(client, added_flow_with_prompt_and_histo
    assert response.status_code == 500


-@pytest.mark.api_key_required
 def test_successful_run_no_payload(client, simple_api_test, created_api_key):
    headers = {"x-api-key": created_api_key.api_key}
    flow_id = simple_api_test["id"]
--- a/src/backend/tests/unit/test_messages_endpoints.py
+++ b/src/backend/tests/unit/test_messages_endpoints.py
--- a/src/backend/tests/unit/test_schema.py
+++ b/src/backend/tests/unit/test_schema.py
--- a/src/backend/tests/unit/test_user.py
+++ b/src/backend/tests/unit/test_user.py
--- a/src/backend/tests/unit/test_webhook.py
+++ b/src/backend/tests/unit/test_webhook.py