feat: add Parse JSON component (#3167)

* feat: add Parse JSON component

* feat: Update ParseJSONDataComponent to handle JSON decoding errors

* fix tests

* [autofix.ci] apply automated fixes

* add string check in _parse_data

---------

Co-authored-by: Gabriel Luiz Freitas Almeida <gabriel@langflow.org>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
Nicolò Boschi 2024-09-03 18:36:30 +02:00 committed by GitHub
commit 1c7ef6ee60
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 162 additions and 16 deletions

2
poetry.lock generated
View file

@ -11843,4 +11843,4 @@ local = ["ctransformers", "llama-cpp-python", "sentence-transformers"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.10,<3.13"
content-hash = "0be9d1ea13484a0ccf92511c188edaab862ab3b883813efaca2f9bfbbfccd2a8"
content-hash = "29303d9f5e4beb16bbd62e80810294df24508972f8e38a71938c4531fc33a900"

View file

@ -107,6 +107,7 @@ spider-client = "^0.0.27"
nltk = "^3.9.1"
bson = "^0.5.10"
lark = "^1.2.2"
jq = "^1.8.0"
[tool.poetry.group.dev.dependencies]

View file

@ -0,0 +1,74 @@
import json
from json import JSONDecodeError
import jq
from json_repair import repair_json
from langflow.custom import Component
from langflow.inputs import HandleInput, MessageTextInput
from langflow.io import Output
from langflow.schema import Data
from langflow.schema.message import Message
class ParseJSONDataComponent(Component):
display_name = "Parse JSON"
description = "Convert and extract JSON fields."
icon = "braces"
name = "ParseJSONData"
inputs = [
HandleInput(
name="input_value",
display_name="Input",
info="Data object to filter.",
required=True,
input_types=["Message", "Data"],
),
MessageTextInput(
name="query",
display_name="JQ Query",
info="JQ Query to filter the data. The input is always a JSON list.",
required=True,
),
]
outputs = [
Output(display_name="Filtered Data", name="filtered_data", method="filter_data"),
]
def _parse_data(self, input_value) -> str:
if isinstance(input_value, Message) and isinstance(input_value.text, str):
return input_value.text
if isinstance(input_value, Data):
return json.dumps(input_value.data)
return str(input_value)
def filter_data(self) -> list[Data]:
to_filter = self.input_value
if not to_filter:
return []
if isinstance(to_filter, list):
to_filter = [self._parse_data(f) for f in to_filter]
else:
to_filter = [self._parse_data(to_filter)]
to_filter = [repair_json(f) for f in to_filter]
to_filter_as_dict = []
for f in to_filter:
try:
to_filter_as_dict.append(json.loads(f))
except JSONDecodeError:
try:
to_filter_as_dict.append(json.loads(repair_json(f)))
except JSONDecodeError as e:
raise ValueError(f"Invalid JSON: {e}")
full_filter_str = json.dumps(to_filter_as_dict)
print("to_filter: ", to_filter)
results = jq.compile(self.query).input_text(full_filter_str).all()
print("results: ", results)
docs = [Data(data=value) if isinstance(value, dict) else Data(text=str(value)) for value in results]
return docs

View file

@ -414,7 +414,7 @@
"show": true,
"title_case": false,
"type": "code",
"value": "from langflow.base.io.chat import ChatComponent\nfrom langflow.inputs import BoolInput\nfrom langflow.io import DropdownInput, MessageTextInput, Output\nfrom langflow.memory import store_message\nfrom langflow.schema.message import Message\nfrom langflow.utils.constants import MESSAGE_SENDER_NAME_AI, MESSAGE_SENDER_USER, MESSAGE_SENDER_AI\n\n\nclass ChatOutput(ChatComponent):\n display_name = \"Chat Output\"\n description = \"Display a chat message in the Playground.\"\n icon = \"ChatOutput\"\n name = \"ChatOutput\"\n\n inputs = [\n MessageTextInput(\n name=\"input_value\",\n display_name=\"Text\",\n info=\"Message to be passed as output.\",\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_AI,\n advanced=True,\n info=\"Type of sender.\",\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_AI,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"data_template\",\n display_name=\"Data Template\",\n value=\"{text}\",\n advanced=True,\n info=\"Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.\",\n ),\n ]\n outputs = [\n Output(display_name=\"Message\", name=\"message\", method=\"message_response\"),\n ]\n\n def message_response(self) -> Message:\n message = Message(\n text=self.input_value,\n sender=self.sender,\n sender_name=self.sender_name,\n session_id=self.session_id,\n )\n if (\n self.session_id\n and isinstance(message, Message)\n and isinstance(message.text, str)\n and self.should_store_message\n ):\n store_message(\n message,\n flow_id=self.graph.flow_id,\n )\n self.message.value = message\n\n self.status = message\n return message\n"
"value": "from langflow.base.io.chat import ChatComponent\nfrom langflow.inputs import BoolInput\nfrom langflow.io import DropdownInput, MessageTextInput, Output\nfrom langflow.memory import store_message\nfrom langflow.schema.message import Message\nfrom langflow.utils.constants import MESSAGE_SENDER_AI, MESSAGE_SENDER_NAME_AI, MESSAGE_SENDER_USER\n\n\nclass ChatOutput(ChatComponent):\n display_name = \"Chat Output\"\n description = \"Display a chat message in the Playground.\"\n icon = \"ChatOutput\"\n name = \"ChatOutput\"\n\n inputs = [\n MessageTextInput(\n name=\"input_value\",\n display_name=\"Text\",\n info=\"Message to be passed as output.\",\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_AI,\n advanced=True,\n info=\"Type of sender.\",\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_AI,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"data_template\",\n display_name=\"Data Template\",\n value=\"{text}\",\n advanced=True,\n info=\"Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.\",\n ),\n ]\n outputs = [\n Output(display_name=\"Message\", name=\"message\", method=\"message_response\"),\n ]\n\n def message_response(self) -> Message:\n message = Message(\n text=self.input_value,\n sender=self.sender,\n sender_name=self.sender_name,\n session_id=self.session_id,\n )\n if (\n self.session_id\n and isinstance(message, Message)\n and isinstance(message.text, str)\n and self.should_store_message\n ):\n store_message(\n message,\n flow_id=self.graph.flow_id,\n )\n self.message.value = message\n\n self.status = message\n return message\n"
},
"data_template": {
"_input_type": "MessageTextInput",

View file

@ -653,7 +653,7 @@
"show": true,
"title_case": false,
"type": "code",
"value": "from langflow.base.io.chat import ChatComponent\nfrom langflow.inputs import BoolInput\nfrom langflow.io import DropdownInput, MessageTextInput, Output\nfrom langflow.memory import store_message\nfrom langflow.schema.message import Message\nfrom langflow.utils.constants import MESSAGE_SENDER_NAME_AI, MESSAGE_SENDER_USER, MESSAGE_SENDER_AI\n\n\nclass ChatOutput(ChatComponent):\n display_name = \"Chat Output\"\n description = \"Display a chat message in the Playground.\"\n icon = \"ChatOutput\"\n name = \"ChatOutput\"\n\n inputs = [\n MessageTextInput(\n name=\"input_value\",\n display_name=\"Text\",\n info=\"Message to be passed as output.\",\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_AI,\n advanced=True,\n info=\"Type of sender.\",\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_AI,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"data_template\",\n display_name=\"Data Template\",\n value=\"{text}\",\n advanced=True,\n info=\"Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.\",\n ),\n ]\n outputs = [\n Output(display_name=\"Message\", name=\"message\", method=\"message_response\"),\n ]\n\n def message_response(self) -> Message:\n message = Message(\n text=self.input_value,\n sender=self.sender,\n sender_name=self.sender_name,\n session_id=self.session_id,\n )\n if (\n self.session_id\n and isinstance(message, Message)\n and isinstance(message.text, str)\n and self.should_store_message\n ):\n store_message(\n message,\n flow_id=self.graph.flow_id,\n )\n self.message.value = message\n\n self.status = message\n return message\n"
"value": "from langflow.base.io.chat import ChatComponent\nfrom langflow.inputs import BoolInput\nfrom langflow.io import DropdownInput, MessageTextInput, Output\nfrom langflow.memory import store_message\nfrom langflow.schema.message import Message\nfrom langflow.utils.constants import MESSAGE_SENDER_AI, MESSAGE_SENDER_NAME_AI, MESSAGE_SENDER_USER\n\n\nclass ChatOutput(ChatComponent):\n display_name = \"Chat Output\"\n description = \"Display a chat message in the Playground.\"\n icon = \"ChatOutput\"\n name = \"ChatOutput\"\n\n inputs = [\n MessageTextInput(\n name=\"input_value\",\n display_name=\"Text\",\n info=\"Message to be passed as output.\",\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_AI,\n advanced=True,\n info=\"Type of sender.\",\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_AI,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"data_template\",\n display_name=\"Data Template\",\n value=\"{text}\",\n advanced=True,\n info=\"Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.\",\n ),\n ]\n outputs = [\n Output(display_name=\"Message\", name=\"message\", method=\"message_response\"),\n ]\n\n def message_response(self) -> Message:\n message = Message(\n text=self.input_value,\n sender=self.sender,\n sender_name=self.sender_name,\n session_id=self.session_id,\n )\n if (\n self.session_id\n and isinstance(message, Message)\n and isinstance(message.text, str)\n and self.should_store_message\n ):\n store_message(\n message,\n flow_id=self.graph.flow_id,\n )\n self.message.value = message\n\n self.status = message\n return message\n"
},
"data_template": {
"_input_type": "MessageTextInput",

View file

@ -1,14 +1,11 @@
import os
from typing import List
from astrapy.db import AstraDB
import pytest
from langflow.components.embeddings import OpenAIEmbeddingsComponent
from langflow.custom import Component
from langflow.inputs import StrInput
from langflow.template import Output
from tests.api_keys import get_astradb_application_token, get_astradb_api_endpoint, get_openai_api_key
from tests.integration.components.mock_components import TextToData
from tests.integration.utils import ComponentInputHandle
from langchain_core.documents import Document
@ -70,14 +67,6 @@ async def test_base(astradb_client: AstraDB):
assert astradb_client.collection(BASIC_COLLECTION)
class TextToData(Component):
inputs = [StrInput(name="text_data", is_list=True)]
outputs = [Output(name="data", display_name="Data", method="create_data")]
def create_data(self) -> List[Data]:
return [Data(text=t) for t in self.text_data]
@pytest.mark.api_key_required
@pytest.mark.asyncio
async def test_astra_embeds_and_search():
@ -93,7 +82,7 @@ async def test_astra_embeds_and_search():
"number_of_results": 1,
"search_input": "test1",
"ingest_data": ComponentInputHandle(
clazz=TextToData, inputs={"text_data": ["test1", "test2"]}, output_name="data"
clazz=TextToData, inputs={"text_data": ["test1", "test2"]}, output_name="from_text"
),
"embedding": ComponentInputHandle(
clazz=OpenAIEmbeddingsComponent,

View file

@ -0,0 +1,57 @@
import pytest
from langflow.components.helpers.ParseJSONData import ParseJSONDataComponent
from langflow.components.inputs import ChatInput
from langflow.schema import Data
from tests.integration.components.mock_components import TextToData
from tests.integration.utils import run_single_component, ComponentInputHandle
@pytest.mark.asyncio
async def test_from_data():
outputs = await run_single_component(
ParseJSONDataComponent,
inputs={
"input_value": ComponentInputHandle(
clazz=TextToData, inputs={"text_data": ['{"key":"value1"}'], "is_json": True}, output_name="from_text"
),
"query": ".[0].key",
},
)
assert outputs["filtered_data"] == [Data(text="value1")]
outputs = await run_single_component(
ParseJSONDataComponent,
inputs={
"input_value": ComponentInputHandle(
clazz=TextToData,
inputs={"text_data": ['{"key":[{"field1": 1, "field2": 2}]}'], "is_json": True},
output_name="from_text",
),
"query": ".[0].key.[0].field2",
},
)
assert outputs["filtered_data"] == [Data(text="2")]
@pytest.mark.asyncio
async def test_from_message():
outputs = await run_single_component(
ParseJSONDataComponent,
inputs={
"input_value": ComponentInputHandle(clazz=ChatInput, inputs={}, output_name="message"),
"query": ".[0].key",
},
run_input="{'key':'value1'}",
)
assert outputs["filtered_data"] == [Data(text="value1")]
outputs = await run_single_component(
ParseJSONDataComponent,
inputs={
"input_value": ComponentInputHandle(clazz=ChatInput, inputs={}, output_name="message"),
"query": ".[0].key.[0].field2",
},
run_input='{"key":[{"field1": 1, "field2": 2}]}',
)
assert outputs["filtered_data"] == [Data(text="2")]

View file

@ -0,0 +1,25 @@
import json
from typing import List
from langflow.custom import Component
from langflow.inputs import StrInput, BoolInput
from langflow.schema import Data
from langflow.template import Output
class TextToData(Component):
inputs = [
StrInput(name="text_data", is_list=True),
BoolInput(name="is_json", info="Parse text_data as json and fill the data object."),
]
outputs = [
Output(name="from_text", display_name="From text", method="create_data"),
]
def _to_data(self, text: str) -> Data:
if self.is_json:
return Data(data=json.loads(text))
return Data(text=text)
def create_data(self) -> List[Data]:
return [self._to_data(t) for t in self.text_data]