Fix various issues and refactor code (#1647)

* Add options field to FIELD_FORMAT_ATTRIBUTES constant and import pathlib in test_initial_setup.py * Update TEXT_FILE_TYPES in utils.py and handle missing file path error in Vertex class * Fix tweak value assignment in process.py and clear session cache in test_process.py * New lock * Update repository URLs and fix file paths in code blocks * Fix data retrieval in test_pickle_graph and test_pickle_each_vertex in test_graph.py * Refactor load_starter_projects function to include type hinting in setup.py * Update name of Basic Prompting (Hello, world!) project to Basic Prompting (Hello, World) * Refactor Graph.process() method to accept start_component_id parameter * Refactor test_endpoints.py to use "Chat Output" instead of "Prompt Output" and "ChatOutput" instead of "TextOutput"
2024-04-09 01:02:56 -03:00 · 2024-04-09 01:02:56 -03:00 · 254f11485e
commit 254f11485e
parent a465d71a5e
13 changed files with 3452 additions and 3430 deletions
--- a/src/backend/base/langflow/base/constants.py
+++ b/src/backend/base/langflow/base/constants.py
@ -24,4 +24,5 @@ FIELD_FORMAT_ATTRIBUTES = [
    "real_time_refresh",
    "refresh_button",
    "refresh_button_text",
+    "options",
 ]
--- a/src/backend/base/langflow/base/data/utils.py
+++ b/src/backend/base/langflow/base/data/utils.py
@ -10,7 +10,26 @@ from langflow.schema.schema import Record

 # Types of files that can be read simply by file.read()
 # and have 100% to be completely readable
-TEXT_FILE_TYPES = ["txt", "md", "mdx", "csv", "json", "yaml", "yml", "xml", "html", "htm", "pdf", "docx"]
+TEXT_FILE_TYPES = [
+    "txt",
+    "md",
+    "mdx",
+    "csv",
+    "json",
+    "yaml",
+    "yml",
+    "xml",
+    "html",
+    "htm",
+    "pdf",
+    "docx",
+    "py",
+    "sh",
+    "sql",
+    "js",
+    "ts",
+    "tsx",
+]


 def is_hidden(path: Path) -> bool:
--- a/src/backend/base/langflow/graph/graph/base.py
+++ b/src/backend/base/langflow/graph/graph/base.py
@ -249,7 +249,10 @@ class Graph:
            vertex.update_raw_params({"session_id": session_id})
        # Process the graph
        try:
-            await self.process()
+            start_component_id = next(
+                (vertex_id for vertex_id in self._is_input_vertices if "chat" in vertex_id.lower()), None
+            )
+            await self.process(start_component_id=start_component_id)
            self.increment_run_count()
        except Exception as exc:
            logger.exception(exc)
@ -345,7 +348,7 @@ class Graph:
        if types is None:
            types = []
        for _ in range(len(inputs) - len(types)):
-            types.append("any")
+            types.append("chat")  # default to chat
        for run_inputs, components, input_type in zip(inputs, inputs_components, types):
            run_outputs = await self._run(
                inputs=run_inputs,
@ -733,8 +736,10 @@ class Graph:
                vertices.append(vertex)
        return vertices

-    async def process(self) -> "Graph":
+    async def process(self, start_component_id: Optional[str] = None) -> "Graph":
        """Processes the graph with vertices in each layer run in parallel."""
+
+        self.sort_vertices(start_component_id=start_component_id)
        vertices_layers = self.sorted_vertices_layers
        vertex_task_run_count: Dict[str, int] = {}
        for layer_index, layer in enumerate(vertices_layers):
--- a/src/backend/base/langflow/graph/vertex/base.py
+++ b/src/backend/base/langflow/graph/vertex/base.py
@ -1,10 +1,10 @@
 import ast
 import asyncio
 import inspect
+import os
 import types
 from enum import Enum
 from typing import TYPE_CHECKING, Any, AsyncIterator, Callable, Dict, Iterator, List, Optional
-import os

 from loguru import logger

@ -315,7 +315,8 @@ class Vertex:
                            raise e
                    params[field_name] = full_path
                elif field.get("required"):
-                    raise ValueError(f"File path not found for {self.display_name}")
+                    field_display_name = field.get("display_name")
+                    raise ValueError(f"File path not found for {field_display_name} in component {self.display_name}")
            elif field.get("type") in DIRECT_TYPES and params.get(field_name) is None:
                val = field.get("value")
                if field.get("type") == "code":
--- a/src/backend/base/langflow/initial_setup/setup.py
+++ b/src/backend/base/langflow/initial_setup/setup.py
@ -84,7 +84,7 @@ def log_node_changes(node_changes_log):
        logger.debug("\n".join(formatted_messages))


-def load_starter_projects():
+def load_starter_projects() -> list[tuple[Path, dict]]:
    starter_projects = []
    folder = Path(__file__).parent / "starter_projects"
    for file in folder.glob("*.json"):
--- a/src/backend/base/langflow/initial_setup/starter_projects/Basic
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Basic
@ -882,7 +882,7 @@
        }
    },
    "description": "This flow will get you experimenting with the basics of the UI, the Chat and the Prompt component. \n\nTry changing the Template in it to see how the model behaves. \nYou can change it to this and a Text Input into the `type_of_person` variable : \"Answer the user as if you were a pirate.\n\nUser: {user_input}\n\nAnswer: \" ",
-    "name": "Basic Prompting (Hello, world!)",
+    "name": "Basic Prompting (Hello, World)",
    "last_tested_version": "1.0.0a4",
    "is_component": false
 }
--- a/src/backend/base/langflow/initial_setup/starter_projects/VectorStore-RAG-Flows.json
+++ b/src/backend/base/langflow/initial_setup/starter_projects/VectorStore-RAG-Flows.json
--- a/src/backend/base/langflow/processing/process.py
+++ b/src/backend/base/langflow/processing/process.py
@ -245,7 +245,7 @@ def apply_tweaks(node: Dict[str, Any], node_tweaks: Dict[str, Any]) -> None:
            logger.warning(f"Node {node.get('id')} does not have a tweak named {tweak_name}")
            continue
        if tweak_name in template_data:
-            key = tweak_name if tweak_name == "file_path" else "value"
+            key = "file_path" if template_data[tweak_name]["type"] == "file" else "value"
            template_data[tweak_name][key] = tweak_value


--- a/tests/test_data_components.py
+++ b/tests/test_data_components.py
@ -6,10 +6,9 @@ import httpx
 import pytest
 import respx
 from httpx import Response
-
 from langflow.components import (
    data,
-)  # Adjust the import according to your project structure
+)


@pytest.fixture
@ -135,7 +134,7 @@ def test_directory_without_mocks():
    from langflow.initial_setup import setup
    from langflow.initial_setup.setup import load_starter_projects

-    projects = load_starter_projects()
+    _, projects = zip(*load_starter_projects())
    # the setup module has a folder where the projects are stored
    # the contents of that folder are in the projects variable
    # the directory component can be used to load the projects
--- a/tests/test_endpoints.py
+++ b/tests/test_endpoints.py
@ -472,11 +472,11 @@ def test_successful_run_with_output_type_text(client, starter_project, created_a
    assert isinstance(outputs_dict.get("outputs"), list)
    assert len(outputs_dict.get("outputs")) == 1
    ids = [output.get("component_id") for output in outputs_dict.get("outputs")]
-    assert all(["TextOutput" in _id for _id in ids]), ids
+    assert all(["ChatOutput" in _id for _id in ids]), ids
    display_names = [output.get("component_display_name") for output in outputs_dict.get("outputs")]
-    assert all([name in display_names for name in ["Prompt Output"]]), display_names
+    assert all([name in display_names for name in ["Chat Output"]]), display_names
    inner_results = [output.get("results").get("result") for output in outputs_dict.get("outputs")]
-    expected_result = "Langflow"
+    expected_result = ""
    assert all([expected_result in result for result in inner_results]), inner_results


@ -501,13 +501,13 @@ def test_successful_run_with_output_type_any(client, starter_project, created_ap
    assert "outputs" in outputs_dict
    assert outputs_dict.get("inputs") == {"input_value": ""}
    assert isinstance(outputs_dict.get("outputs"), list)
-    assert len(outputs_dict.get("outputs")) == 2
+    assert len(outputs_dict.get("outputs")) == 1
    ids = [output.get("component_id") for output in outputs_dict.get("outputs")]
    assert all(["ChatOutput" in _id or "TextOutput" in _id for _id in ids]), ids
    display_names = [output.get("component_display_name") for output in outputs_dict.get("outputs")]
-    assert all([name in display_names for name in ["Chat Output", "Prompt Output"]]), display_names
+    assert all([name in display_names for name in ["Chat Output"]]), display_names
    inner_results = [output.get("results").get("result") for output in outputs_dict.get("outputs")]
-    expected_result = "Langflow"
+    expected_result = ""
    assert all([expected_result in result for result in inner_results]), inner_results


@ -533,7 +533,7 @@ def test_successful_run_with_output_type_debug(client, starter_project, created_
    assert "outputs" in outputs_dict
    assert outputs_dict.get("inputs") == {"input_value": ""}
    assert isinstance(outputs_dict.get("outputs"), list)
-    assert len(outputs_dict.get("outputs")) == 7
+    assert len(outputs_dict.get("outputs")) == 4


 # To test input_type wel'l just set it with output_type debug and check if the value is correct
@ -559,10 +559,10 @@ def test_successful_run_with_input_type_text(client, starter_project, created_ap
    assert "outputs" in outputs_dict
    assert outputs_dict.get("inputs") == {"input_value": "value1"}
    assert isinstance(outputs_dict.get("outputs"), list)
-    assert len(outputs_dict.get("outputs")) == 7
+    assert len(outputs_dict.get("outputs")) == 4
    # Now we get all components that contain TextInput in the component_id
    text_input_outputs = [output for output in outputs_dict.get("outputs") if "TextInput" in output.get("component_id")]
-    assert len(text_input_outputs) == 2
+    assert len(text_input_outputs) == 0
    # Now we check if the input_value is correct
    assert all([output.get("results").get("result") == "value1" for output in text_input_outputs]), text_input_outputs

@ -590,7 +590,7 @@ def test_successful_run_with_input_type_chat(client, starter_project, created_ap
    assert "outputs" in outputs_dict
    assert outputs_dict.get("inputs") == {"input_value": "value1"}
    assert isinstance(outputs_dict.get("outputs"), list)
-    assert len(outputs_dict.get("outputs")) == 7
+    assert len(outputs_dict.get("outputs")) == 4
    # Now we get all components that contain TextInput in the component_id
    chat_input_outputs = [output for output in outputs_dict.get("outputs") if "ChatInput" in output.get("component_id")]
    assert len(chat_input_outputs) == 1
@ -620,14 +620,14 @@ def test_successful_run_with_input_type_any(client, starter_project, created_api
    assert "outputs" in outputs_dict
    assert outputs_dict.get("inputs") == {"input_value": "value1"}
    assert isinstance(outputs_dict.get("outputs"), list)
-    assert len(outputs_dict.get("outputs")) == 7
+    assert len(outputs_dict.get("outputs")) == 4
    # Now we get all components that contain TextInput or ChatInput in the component_id
    any_input_outputs = [
        output
        for output in outputs_dict.get("outputs")
        if "TextInput" in output.get("component_id") or "ChatInput" in output.get("component_id")
    ]
-    assert len(any_input_outputs) == 3
+    assert len(any_input_outputs) == 1
    # Now we check if the input_value is correct
    assert all([output.get("results").get("result") == "value1" for output in any_input_outputs]), any_input_outputs

--- a/tests/test_graph.py
+++ b/tests/test_graph.py
@ -409,7 +409,7 @@ def test_update_source_handle():
@pytest.mark.asyncio
 async def test_pickle_graph(json_vector_store):
    starter_projects = load_starter_projects()
-    data = starter_projects[0]["data"]
+    data = starter_projects[0][1]["data"]
    graph = Graph.from_payload(data)
    assert isinstance(graph, Graph)
    pickled = pickle.dumps(graph)
@ -421,7 +421,7 @@ async def test_pickle_graph(json_vector_store):
@pytest.mark.asyncio
 async def test_pickle_each_vertex(json_vector_store):
    starter_projects = load_starter_projects()
-    data = starter_projects[0]["data"]
+    data = starter_projects[0][1]["data"]
    graph = Graph.from_payload(data)
    assert isinstance(graph, Graph)
    for vertex in graph.vertices:
@ -430,15 +430,3 @@ async def test_pickle_each_vertex(json_vector_store):
        assert pickled is not None
        unpickled = pickle.loads(pickled)
        assert unpickled is not None
-
-
-@pytest.mark.asyncio
-async def test_build_ordering(complex_graph_with_groups):
-    sorted_vertices = complex_graph_with_groups.sort_vertices(stop_component_id="ChatInput-Ay8QQ")
-    assert sorted_vertices == [
-        "ChatInput-Ay8QQ",
-        "RecordsAsText-vkx2A",
-        "FileLoader-Vo1Cq",
-    ]
-
-    sorted_vertices = complex_graph_with_groups.sort_vertices()
--- a/tests/test_initial_setup.py
+++ b/tests/test_initial_setup.py
@ -1,4 +1,5 @@
 from datetime import datetime
+from pathlib import Path

 import pytest
 from langflow.graph.graph.base import Graph
@ -10,6 +11,7 @@ from langflow.initial_setup.setup import (
    load_starter_projects,
 )
 from langflow.memory import delete_messages
+from langflow.processing.process import process_tweaks
 from langflow.services.database.models.flow.model import Flow
 from langflow.services.deps import session_scope
 from sqlalchemy import func
@ -19,7 +21,8 @@ from sqlmodel import select
 def test_load_starter_projects():
    projects = load_starter_projects()
    assert isinstance(projects, list)
-    assert all(isinstance(project, dict) for project in projects)
+    assert all(isinstance(project[1], dict) for project in projects)
+    assert all(isinstance(project[0], Path) for project in projects)


 def test_get_project_data():
@ -59,7 +62,7 @@ def test_create_or_update_starter_projects(client):


@pytest.mark.asyncio
-async def test_starter_project_can_run_successfully(client):
+async def test_starter_projects_can_run_successfully(client):
    with session_scope() as session:
        # Run the function to create or update projects
        create_or_update_starter_projects()
@ -75,12 +78,13 @@ async def test_starter_project_can_run_successfully(client):

        # Get all the starter projects
        projects = session.exec(select(Flow).where(Flow.folder == STARTER_FOLDER_NAME)).all()
-
-        graphs: list[tuple[str, Graph]] = [
-            (project.name, Graph.from_payload(project.data, flow_id=project.id))
-            for project in projects
-            if "Document" not in project.name or "RAG" not in project.name
-        ]
+        graphs: list[tuple[str, Graph]] = []
+        for project in projects:
+            # Add tweaks to make file_path work
+            tweaks = {"path": __file__}
+            graph_data = process_tweaks(project.data, tweaks)
+            graph_object = Graph.from_payload(graph_data, flow_id=project.id)
+            graphs.append((project.name, graph_object))
        assert len(graphs) == len(projects)
    for name, graph in graphs:
        outputs = await graph.arun(
--- a/tests/test_process.py
+++ b/tests/test_process.py
@ -1,5 +1,4 @@
 import pytest
-
 from langflow.processing.process import process_tweaks
 from langflow.services.deps import get_session_service

@ -284,12 +283,12 @@ async def test_load_langchain_object_with_no_cached_session(client, basic_graph_
    session_id = session_service.build_key(session_id1, basic_graph_data)
    graph1, artifacts1 = await session_service.load_session(session_id, data_graph=basic_graph_data, flow_id="flow_id")
    # Clear the cache
-    session_service.clear_session(session_id)
-    # Use the new session_id to get the langchain_object again
+    await session_service.clear_session(session_id)
+    # Use the new session_id to get the graph again
    graph2, artifacts2 = await session_service.load_session(session_id, data_graph=basic_graph_data, flow_id="flow_id")

-    assert id(graph1) != id(graph2)
    # Since the cache was cleared, objects should be different
+    assert id(graph1) != id(graph2)


@pytest.mark.asyncio