diff --git a/poetry.lock b/poetry.lock index 294eb6640..fea92d030 100644 --- a/poetry.lock +++ b/poetry.lock @@ -11843,4 +11843,4 @@ local = ["ctransformers", "llama-cpp-python", "sentence-transformers"] [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.13" -content-hash = "0be9d1ea13484a0ccf92511c188edaab862ab3b883813efaca2f9bfbbfccd2a8" +content-hash = "29303d9f5e4beb16bbd62e80810294df24508972f8e38a71938c4531fc33a900" diff --git a/pyproject.toml b/pyproject.toml index 38af95d4c..ab5ee5eb6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -107,6 +107,7 @@ spider-client = "^0.0.27" nltk = "^3.9.1" bson = "^0.5.10" lark = "^1.2.2" +jq = "^1.8.0" [tool.poetry.group.dev.dependencies] diff --git a/src/backend/base/langflow/components/helpers/ParseJSONData.py b/src/backend/base/langflow/components/helpers/ParseJSONData.py new file mode 100644 index 000000000..97ad1b497 --- /dev/null +++ b/src/backend/base/langflow/components/helpers/ParseJSONData.py @@ -0,0 +1,74 @@ +import json +from json import JSONDecodeError + +import jq +from json_repair import repair_json + +from langflow.custom import Component +from langflow.inputs import HandleInput, MessageTextInput +from langflow.io import Output +from langflow.schema import Data +from langflow.schema.message import Message + + +class ParseJSONDataComponent(Component): + display_name = "Parse JSON" + description = "Convert and extract JSON fields." + icon = "braces" + name = "ParseJSONData" + + inputs = [ + HandleInput( + name="input_value", + display_name="Input", + info="Data object to filter.", + required=True, + input_types=["Message", "Data"], + ), + MessageTextInput( + name="query", + display_name="JQ Query", + info="JQ Query to filter the data. The input is always a JSON list.", + required=True, + ), + ] + + outputs = [ + Output(display_name="Filtered Data", name="filtered_data", method="filter_data"), + ] + + def _parse_data(self, input_value) -> str: + if isinstance(input_value, Message) and isinstance(input_value.text, str): + return input_value.text + if isinstance(input_value, Data): + return json.dumps(input_value.data) + return str(input_value) + + def filter_data(self) -> list[Data]: + to_filter = self.input_value + if not to_filter: + return [] + if isinstance(to_filter, list): + to_filter = [self._parse_data(f) for f in to_filter] + else: + to_filter = [self._parse_data(to_filter)] + + to_filter = [repair_json(f) for f in to_filter] + to_filter_as_dict = [] + for f in to_filter: + try: + to_filter_as_dict.append(json.loads(f)) + except JSONDecodeError: + try: + to_filter_as_dict.append(json.loads(repair_json(f))) + except JSONDecodeError as e: + raise ValueError(f"Invalid JSON: {e}") + + full_filter_str = json.dumps(to_filter_as_dict) + + print("to_filter: ", to_filter) + + results = jq.compile(self.query).input_text(full_filter_str).all() + print("results: ", results) + docs = [Data(data=value) if isinstance(value, dict) else Data(text=str(value)) for value in results] + return docs diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Agent Flow.json b/src/backend/base/langflow/initial_setup/starter_projects/Agent Flow.json index 2cebb6049..b69ef2e43 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Agent Flow.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Agent Flow.json @@ -414,7 +414,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from langflow.base.io.chat import ChatComponent\nfrom langflow.inputs import BoolInput\nfrom langflow.io import DropdownInput, MessageTextInput, Output\nfrom langflow.memory import store_message\nfrom langflow.schema.message import Message\nfrom langflow.utils.constants import MESSAGE_SENDER_NAME_AI, MESSAGE_SENDER_USER, MESSAGE_SENDER_AI\n\n\nclass ChatOutput(ChatComponent):\n display_name = \"Chat Output\"\n description = \"Display a chat message in the Playground.\"\n icon = \"ChatOutput\"\n name = \"ChatOutput\"\n\n inputs = [\n MessageTextInput(\n name=\"input_value\",\n display_name=\"Text\",\n info=\"Message to be passed as output.\",\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_AI,\n advanced=True,\n info=\"Type of sender.\",\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_AI,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"data_template\",\n display_name=\"Data Template\",\n value=\"{text}\",\n advanced=True,\n info=\"Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.\",\n ),\n ]\n outputs = [\n Output(display_name=\"Message\", name=\"message\", method=\"message_response\"),\n ]\n\n def message_response(self) -> Message:\n message = Message(\n text=self.input_value,\n sender=self.sender,\n sender_name=self.sender_name,\n session_id=self.session_id,\n )\n if (\n self.session_id\n and isinstance(message, Message)\n and isinstance(message.text, str)\n and self.should_store_message\n ):\n store_message(\n message,\n flow_id=self.graph.flow_id,\n )\n self.message.value = message\n\n self.status = message\n return message\n" + "value": "from langflow.base.io.chat import ChatComponent\nfrom langflow.inputs import BoolInput\nfrom langflow.io import DropdownInput, MessageTextInput, Output\nfrom langflow.memory import store_message\nfrom langflow.schema.message import Message\nfrom langflow.utils.constants import MESSAGE_SENDER_AI, MESSAGE_SENDER_NAME_AI, MESSAGE_SENDER_USER\n\n\nclass ChatOutput(ChatComponent):\n display_name = \"Chat Output\"\n description = \"Display a chat message in the Playground.\"\n icon = \"ChatOutput\"\n name = \"ChatOutput\"\n\n inputs = [\n MessageTextInput(\n name=\"input_value\",\n display_name=\"Text\",\n info=\"Message to be passed as output.\",\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_AI,\n advanced=True,\n info=\"Type of sender.\",\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_AI,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"data_template\",\n display_name=\"Data Template\",\n value=\"{text}\",\n advanced=True,\n info=\"Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.\",\n ),\n ]\n outputs = [\n Output(display_name=\"Message\", name=\"message\", method=\"message_response\"),\n ]\n\n def message_response(self) -> Message:\n message = Message(\n text=self.input_value,\n sender=self.sender,\n sender_name=self.sender_name,\n session_id=self.session_id,\n )\n if (\n self.session_id\n and isinstance(message, Message)\n and isinstance(message.text, str)\n and self.should_store_message\n ):\n store_message(\n message,\n flow_id=self.graph.flow_id,\n )\n self.message.value = message\n\n self.status = message\n return message\n" }, "data_template": { "_input_type": "MessageTextInput", diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Travel Planning Agents.json b/src/backend/base/langflow/initial_setup/starter_projects/Travel Planning Agents.json index e89f43ac1..a64f3af61 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Travel Planning Agents.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Travel Planning Agents.json @@ -653,7 +653,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from langflow.base.io.chat import ChatComponent\nfrom langflow.inputs import BoolInput\nfrom langflow.io import DropdownInput, MessageTextInput, Output\nfrom langflow.memory import store_message\nfrom langflow.schema.message import Message\nfrom langflow.utils.constants import MESSAGE_SENDER_NAME_AI, MESSAGE_SENDER_USER, MESSAGE_SENDER_AI\n\n\nclass ChatOutput(ChatComponent):\n display_name = \"Chat Output\"\n description = \"Display a chat message in the Playground.\"\n icon = \"ChatOutput\"\n name = \"ChatOutput\"\n\n inputs = [\n MessageTextInput(\n name=\"input_value\",\n display_name=\"Text\",\n info=\"Message to be passed as output.\",\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_AI,\n advanced=True,\n info=\"Type of sender.\",\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_AI,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"data_template\",\n display_name=\"Data Template\",\n value=\"{text}\",\n advanced=True,\n info=\"Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.\",\n ),\n ]\n outputs = [\n Output(display_name=\"Message\", name=\"message\", method=\"message_response\"),\n ]\n\n def message_response(self) -> Message:\n message = Message(\n text=self.input_value,\n sender=self.sender,\n sender_name=self.sender_name,\n session_id=self.session_id,\n )\n if (\n self.session_id\n and isinstance(message, Message)\n and isinstance(message.text, str)\n and self.should_store_message\n ):\n store_message(\n message,\n flow_id=self.graph.flow_id,\n )\n self.message.value = message\n\n self.status = message\n return message\n" + "value": "from langflow.base.io.chat import ChatComponent\nfrom langflow.inputs import BoolInput\nfrom langflow.io import DropdownInput, MessageTextInput, Output\nfrom langflow.memory import store_message\nfrom langflow.schema.message import Message\nfrom langflow.utils.constants import MESSAGE_SENDER_AI, MESSAGE_SENDER_NAME_AI, MESSAGE_SENDER_USER\n\n\nclass ChatOutput(ChatComponent):\n display_name = \"Chat Output\"\n description = \"Display a chat message in the Playground.\"\n icon = \"ChatOutput\"\n name = \"ChatOutput\"\n\n inputs = [\n MessageTextInput(\n name=\"input_value\",\n display_name=\"Text\",\n info=\"Message to be passed as output.\",\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_AI,\n advanced=True,\n info=\"Type of sender.\",\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_AI,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"data_template\",\n display_name=\"Data Template\",\n value=\"{text}\",\n advanced=True,\n info=\"Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.\",\n ),\n ]\n outputs = [\n Output(display_name=\"Message\", name=\"message\", method=\"message_response\"),\n ]\n\n def message_response(self) -> Message:\n message = Message(\n text=self.input_value,\n sender=self.sender,\n sender_name=self.sender_name,\n session_id=self.session_id,\n )\n if (\n self.session_id\n and isinstance(message, Message)\n and isinstance(message.text, str)\n and self.should_store_message\n ):\n store_message(\n message,\n flow_id=self.graph.flow_id,\n )\n self.message.value = message\n\n self.status = message\n return message\n" }, "data_template": { "_input_type": "MessageTextInput", diff --git a/src/backend/tests/integration/components/astra/test_astra_component.py b/src/backend/tests/integration/components/astra/test_astra_component.py index f169e287f..5bbb25dbf 100644 --- a/src/backend/tests/integration/components/astra/test_astra_component.py +++ b/src/backend/tests/integration/components/astra/test_astra_component.py @@ -1,14 +1,11 @@ import os -from typing import List from astrapy.db import AstraDB import pytest from langflow.components.embeddings import OpenAIEmbeddingsComponent -from langflow.custom import Component -from langflow.inputs import StrInput -from langflow.template import Output from tests.api_keys import get_astradb_application_token, get_astradb_api_endpoint, get_openai_api_key +from tests.integration.components.mock_components import TextToData from tests.integration.utils import ComponentInputHandle from langchain_core.documents import Document @@ -70,14 +67,6 @@ async def test_base(astradb_client: AstraDB): assert astradb_client.collection(BASIC_COLLECTION) -class TextToData(Component): - inputs = [StrInput(name="text_data", is_list=True)] - outputs = [Output(name="data", display_name="Data", method="create_data")] - - def create_data(self) -> List[Data]: - return [Data(text=t) for t in self.text_data] - - @pytest.mark.api_key_required @pytest.mark.asyncio async def test_astra_embeds_and_search(): @@ -93,7 +82,7 @@ async def test_astra_embeds_and_search(): "number_of_results": 1, "search_input": "test1", "ingest_data": ComponentInputHandle( - clazz=TextToData, inputs={"text_data": ["test1", "test2"]}, output_name="data" + clazz=TextToData, inputs={"text_data": ["test1", "test2"]}, output_name="from_text" ), "embedding": ComponentInputHandle( clazz=OpenAIEmbeddingsComponent, diff --git a/src/backend/tests/integration/components/helpers/__init__.py b/src/backend/tests/integration/components/helpers/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/backend/tests/integration/components/helpers/test_parse_json_data.py b/src/backend/tests/integration/components/helpers/test_parse_json_data.py new file mode 100644 index 000000000..9b3d13ebd --- /dev/null +++ b/src/backend/tests/integration/components/helpers/test_parse_json_data.py @@ -0,0 +1,57 @@ +import pytest + +from langflow.components.helpers.ParseJSONData import ParseJSONDataComponent +from langflow.components.inputs import ChatInput +from langflow.schema import Data +from tests.integration.components.mock_components import TextToData +from tests.integration.utils import run_single_component, ComponentInputHandle + + +@pytest.mark.asyncio +async def test_from_data(): + outputs = await run_single_component( + ParseJSONDataComponent, + inputs={ + "input_value": ComponentInputHandle( + clazz=TextToData, inputs={"text_data": ['{"key":"value1"}'], "is_json": True}, output_name="from_text" + ), + "query": ".[0].key", + }, + ) + assert outputs["filtered_data"] == [Data(text="value1")] + + outputs = await run_single_component( + ParseJSONDataComponent, + inputs={ + "input_value": ComponentInputHandle( + clazz=TextToData, + inputs={"text_data": ['{"key":[{"field1": 1, "field2": 2}]}'], "is_json": True}, + output_name="from_text", + ), + "query": ".[0].key.[0].field2", + }, + ) + assert outputs["filtered_data"] == [Data(text="2")] + + +@pytest.mark.asyncio +async def test_from_message(): + outputs = await run_single_component( + ParseJSONDataComponent, + inputs={ + "input_value": ComponentInputHandle(clazz=ChatInput, inputs={}, output_name="message"), + "query": ".[0].key", + }, + run_input="{'key':'value1'}", + ) + assert outputs["filtered_data"] == [Data(text="value1")] + + outputs = await run_single_component( + ParseJSONDataComponent, + inputs={ + "input_value": ComponentInputHandle(clazz=ChatInput, inputs={}, output_name="message"), + "query": ".[0].key.[0].field2", + }, + run_input='{"key":[{"field1": 1, "field2": 2}]}', + ) + assert outputs["filtered_data"] == [Data(text="2")] diff --git a/src/backend/tests/integration/components/mock_components.py b/src/backend/tests/integration/components/mock_components.py new file mode 100644 index 000000000..dc81594e5 --- /dev/null +++ b/src/backend/tests/integration/components/mock_components.py @@ -0,0 +1,25 @@ +import json +from typing import List + +from langflow.custom import Component +from langflow.inputs import StrInput, BoolInput +from langflow.schema import Data +from langflow.template import Output + + +class TextToData(Component): + inputs = [ + StrInput(name="text_data", is_list=True), + BoolInput(name="is_json", info="Parse text_data as json and fill the data object."), + ] + outputs = [ + Output(name="from_text", display_name="From text", method="create_data"), + ] + + def _to_data(self, text: str) -> Data: + if self.is_json: + return Data(data=json.loads(text)) + return Data(text=text) + + def create_data(self) -> List[Data]: + return [self._to_data(t) for t in self.text_data]