feat: add Parse JSON component (#3167)
* feat: add Parse JSON component * feat: Update ParseJSONDataComponent to handle JSON decoding errors * fix tests * [autofix.ci] apply automated fixes * add string check in _parse_data --------- Co-authored-by: Gabriel Luiz Freitas Almeida <gabriel@langflow.org> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
parent
439565413e
commit
1c7ef6ee60
9 changed files with 162 additions and 16 deletions
2
poetry.lock
generated
2
poetry.lock
generated
|
|
@ -11843,4 +11843,4 @@ local = ["ctransformers", "llama-cpp-python", "sentence-transformers"]
|
|||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = ">=3.10,<3.13"
|
||||
content-hash = "0be9d1ea13484a0ccf92511c188edaab862ab3b883813efaca2f9bfbbfccd2a8"
|
||||
content-hash = "29303d9f5e4beb16bbd62e80810294df24508972f8e38a71938c4531fc33a900"
|
||||
|
|
|
|||
|
|
@ -107,6 +107,7 @@ spider-client = "^0.0.27"
|
|||
nltk = "^3.9.1"
|
||||
bson = "^0.5.10"
|
||||
lark = "^1.2.2"
|
||||
jq = "^1.8.0"
|
||||
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
|
|
|
|||
|
|
@ -0,0 +1,74 @@
|
|||
import json
|
||||
from json import JSONDecodeError
|
||||
|
||||
import jq
|
||||
from json_repair import repair_json
|
||||
|
||||
from langflow.custom import Component
|
||||
from langflow.inputs import HandleInput, MessageTextInput
|
||||
from langflow.io import Output
|
||||
from langflow.schema import Data
|
||||
from langflow.schema.message import Message
|
||||
|
||||
|
||||
class ParseJSONDataComponent(Component):
|
||||
display_name = "Parse JSON"
|
||||
description = "Convert and extract JSON fields."
|
||||
icon = "braces"
|
||||
name = "ParseJSONData"
|
||||
|
||||
inputs = [
|
||||
HandleInput(
|
||||
name="input_value",
|
||||
display_name="Input",
|
||||
info="Data object to filter.",
|
||||
required=True,
|
||||
input_types=["Message", "Data"],
|
||||
),
|
||||
MessageTextInput(
|
||||
name="query",
|
||||
display_name="JQ Query",
|
||||
info="JQ Query to filter the data. The input is always a JSON list.",
|
||||
required=True,
|
||||
),
|
||||
]
|
||||
|
||||
outputs = [
|
||||
Output(display_name="Filtered Data", name="filtered_data", method="filter_data"),
|
||||
]
|
||||
|
||||
def _parse_data(self, input_value) -> str:
|
||||
if isinstance(input_value, Message) and isinstance(input_value.text, str):
|
||||
return input_value.text
|
||||
if isinstance(input_value, Data):
|
||||
return json.dumps(input_value.data)
|
||||
return str(input_value)
|
||||
|
||||
def filter_data(self) -> list[Data]:
|
||||
to_filter = self.input_value
|
||||
if not to_filter:
|
||||
return []
|
||||
if isinstance(to_filter, list):
|
||||
to_filter = [self._parse_data(f) for f in to_filter]
|
||||
else:
|
||||
to_filter = [self._parse_data(to_filter)]
|
||||
|
||||
to_filter = [repair_json(f) for f in to_filter]
|
||||
to_filter_as_dict = []
|
||||
for f in to_filter:
|
||||
try:
|
||||
to_filter_as_dict.append(json.loads(f))
|
||||
except JSONDecodeError:
|
||||
try:
|
||||
to_filter_as_dict.append(json.loads(repair_json(f)))
|
||||
except JSONDecodeError as e:
|
||||
raise ValueError(f"Invalid JSON: {e}")
|
||||
|
||||
full_filter_str = json.dumps(to_filter_as_dict)
|
||||
|
||||
print("to_filter: ", to_filter)
|
||||
|
||||
results = jq.compile(self.query).input_text(full_filter_str).all()
|
||||
print("results: ", results)
|
||||
docs = [Data(data=value) if isinstance(value, dict) else Data(text=str(value)) for value in results]
|
||||
return docs
|
||||
|
|
@ -414,7 +414,7 @@
|
|||
"show": true,
|
||||
"title_case": false,
|
||||
"type": "code",
|
||||
"value": "from langflow.base.io.chat import ChatComponent\nfrom langflow.inputs import BoolInput\nfrom langflow.io import DropdownInput, MessageTextInput, Output\nfrom langflow.memory import store_message\nfrom langflow.schema.message import Message\nfrom langflow.utils.constants import MESSAGE_SENDER_NAME_AI, MESSAGE_SENDER_USER, MESSAGE_SENDER_AI\n\n\nclass ChatOutput(ChatComponent):\n display_name = \"Chat Output\"\n description = \"Display a chat message in the Playground.\"\n icon = \"ChatOutput\"\n name = \"ChatOutput\"\n\n inputs = [\n MessageTextInput(\n name=\"input_value\",\n display_name=\"Text\",\n info=\"Message to be passed as output.\",\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_AI,\n advanced=True,\n info=\"Type of sender.\",\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_AI,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"data_template\",\n display_name=\"Data Template\",\n value=\"{text}\",\n advanced=True,\n info=\"Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.\",\n ),\n ]\n outputs = [\n Output(display_name=\"Message\", name=\"message\", method=\"message_response\"),\n ]\n\n def message_response(self) -> Message:\n message = Message(\n text=self.input_value,\n sender=self.sender,\n sender_name=self.sender_name,\n session_id=self.session_id,\n )\n if (\n self.session_id\n and isinstance(message, Message)\n and isinstance(message.text, str)\n and self.should_store_message\n ):\n store_message(\n message,\n flow_id=self.graph.flow_id,\n )\n self.message.value = message\n\n self.status = message\n return message\n"
|
||||
"value": "from langflow.base.io.chat import ChatComponent\nfrom langflow.inputs import BoolInput\nfrom langflow.io import DropdownInput, MessageTextInput, Output\nfrom langflow.memory import store_message\nfrom langflow.schema.message import Message\nfrom langflow.utils.constants import MESSAGE_SENDER_AI, MESSAGE_SENDER_NAME_AI, MESSAGE_SENDER_USER\n\n\nclass ChatOutput(ChatComponent):\n display_name = \"Chat Output\"\n description = \"Display a chat message in the Playground.\"\n icon = \"ChatOutput\"\n name = \"ChatOutput\"\n\n inputs = [\n MessageTextInput(\n name=\"input_value\",\n display_name=\"Text\",\n info=\"Message to be passed as output.\",\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_AI,\n advanced=True,\n info=\"Type of sender.\",\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_AI,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"data_template\",\n display_name=\"Data Template\",\n value=\"{text}\",\n advanced=True,\n info=\"Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.\",\n ),\n ]\n outputs = [\n Output(display_name=\"Message\", name=\"message\", method=\"message_response\"),\n ]\n\n def message_response(self) -> Message:\n message = Message(\n text=self.input_value,\n sender=self.sender,\n sender_name=self.sender_name,\n session_id=self.session_id,\n )\n if (\n self.session_id\n and isinstance(message, Message)\n and isinstance(message.text, str)\n and self.should_store_message\n ):\n store_message(\n message,\n flow_id=self.graph.flow_id,\n )\n self.message.value = message\n\n self.status = message\n return message\n"
|
||||
},
|
||||
"data_template": {
|
||||
"_input_type": "MessageTextInput",
|
||||
|
|
|
|||
|
|
@ -653,7 +653,7 @@
|
|||
"show": true,
|
||||
"title_case": false,
|
||||
"type": "code",
|
||||
"value": "from langflow.base.io.chat import ChatComponent\nfrom langflow.inputs import BoolInput\nfrom langflow.io import DropdownInput, MessageTextInput, Output\nfrom langflow.memory import store_message\nfrom langflow.schema.message import Message\nfrom langflow.utils.constants import MESSAGE_SENDER_NAME_AI, MESSAGE_SENDER_USER, MESSAGE_SENDER_AI\n\n\nclass ChatOutput(ChatComponent):\n display_name = \"Chat Output\"\n description = \"Display a chat message in the Playground.\"\n icon = \"ChatOutput\"\n name = \"ChatOutput\"\n\n inputs = [\n MessageTextInput(\n name=\"input_value\",\n display_name=\"Text\",\n info=\"Message to be passed as output.\",\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_AI,\n advanced=True,\n info=\"Type of sender.\",\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_AI,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"data_template\",\n display_name=\"Data Template\",\n value=\"{text}\",\n advanced=True,\n info=\"Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.\",\n ),\n ]\n outputs = [\n Output(display_name=\"Message\", name=\"message\", method=\"message_response\"),\n ]\n\n def message_response(self) -> Message:\n message = Message(\n text=self.input_value,\n sender=self.sender,\n sender_name=self.sender_name,\n session_id=self.session_id,\n )\n if (\n self.session_id\n and isinstance(message, Message)\n and isinstance(message.text, str)\n and self.should_store_message\n ):\n store_message(\n message,\n flow_id=self.graph.flow_id,\n )\n self.message.value = message\n\n self.status = message\n return message\n"
|
||||
"value": "from langflow.base.io.chat import ChatComponent\nfrom langflow.inputs import BoolInput\nfrom langflow.io import DropdownInput, MessageTextInput, Output\nfrom langflow.memory import store_message\nfrom langflow.schema.message import Message\nfrom langflow.utils.constants import MESSAGE_SENDER_AI, MESSAGE_SENDER_NAME_AI, MESSAGE_SENDER_USER\n\n\nclass ChatOutput(ChatComponent):\n display_name = \"Chat Output\"\n description = \"Display a chat message in the Playground.\"\n icon = \"ChatOutput\"\n name = \"ChatOutput\"\n\n inputs = [\n MessageTextInput(\n name=\"input_value\",\n display_name=\"Text\",\n info=\"Message to be passed as output.\",\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_AI,\n advanced=True,\n info=\"Type of sender.\",\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_AI,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"data_template\",\n display_name=\"Data Template\",\n value=\"{text}\",\n advanced=True,\n info=\"Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.\",\n ),\n ]\n outputs = [\n Output(display_name=\"Message\", name=\"message\", method=\"message_response\"),\n ]\n\n def message_response(self) -> Message:\n message = Message(\n text=self.input_value,\n sender=self.sender,\n sender_name=self.sender_name,\n session_id=self.session_id,\n )\n if (\n self.session_id\n and isinstance(message, Message)\n and isinstance(message.text, str)\n and self.should_store_message\n ):\n store_message(\n message,\n flow_id=self.graph.flow_id,\n )\n self.message.value = message\n\n self.status = message\n return message\n"
|
||||
},
|
||||
"data_template": {
|
||||
"_input_type": "MessageTextInput",
|
||||
|
|
|
|||
|
|
@ -1,14 +1,11 @@
|
|||
import os
|
||||
from typing import List
|
||||
|
||||
from astrapy.db import AstraDB
|
||||
import pytest
|
||||
|
||||
from langflow.components.embeddings import OpenAIEmbeddingsComponent
|
||||
from langflow.custom import Component
|
||||
from langflow.inputs import StrInput
|
||||
from langflow.template import Output
|
||||
from tests.api_keys import get_astradb_application_token, get_astradb_api_endpoint, get_openai_api_key
|
||||
from tests.integration.components.mock_components import TextToData
|
||||
from tests.integration.utils import ComponentInputHandle
|
||||
from langchain_core.documents import Document
|
||||
|
||||
|
|
@ -70,14 +67,6 @@ async def test_base(astradb_client: AstraDB):
|
|||
assert astradb_client.collection(BASIC_COLLECTION)
|
||||
|
||||
|
||||
class TextToData(Component):
|
||||
inputs = [StrInput(name="text_data", is_list=True)]
|
||||
outputs = [Output(name="data", display_name="Data", method="create_data")]
|
||||
|
||||
def create_data(self) -> List[Data]:
|
||||
return [Data(text=t) for t in self.text_data]
|
||||
|
||||
|
||||
@pytest.mark.api_key_required
|
||||
@pytest.mark.asyncio
|
||||
async def test_astra_embeds_and_search():
|
||||
|
|
@ -93,7 +82,7 @@ async def test_astra_embeds_and_search():
|
|||
"number_of_results": 1,
|
||||
"search_input": "test1",
|
||||
"ingest_data": ComponentInputHandle(
|
||||
clazz=TextToData, inputs={"text_data": ["test1", "test2"]}, output_name="data"
|
||||
clazz=TextToData, inputs={"text_data": ["test1", "test2"]}, output_name="from_text"
|
||||
),
|
||||
"embedding": ComponentInputHandle(
|
||||
clazz=OpenAIEmbeddingsComponent,
|
||||
|
|
|
|||
|
|
@ -0,0 +1,57 @@
|
|||
import pytest
|
||||
|
||||
from langflow.components.helpers.ParseJSONData import ParseJSONDataComponent
|
||||
from langflow.components.inputs import ChatInput
|
||||
from langflow.schema import Data
|
||||
from tests.integration.components.mock_components import TextToData
|
||||
from tests.integration.utils import run_single_component, ComponentInputHandle
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_from_data():
|
||||
outputs = await run_single_component(
|
||||
ParseJSONDataComponent,
|
||||
inputs={
|
||||
"input_value": ComponentInputHandle(
|
||||
clazz=TextToData, inputs={"text_data": ['{"key":"value1"}'], "is_json": True}, output_name="from_text"
|
||||
),
|
||||
"query": ".[0].key",
|
||||
},
|
||||
)
|
||||
assert outputs["filtered_data"] == [Data(text="value1")]
|
||||
|
||||
outputs = await run_single_component(
|
||||
ParseJSONDataComponent,
|
||||
inputs={
|
||||
"input_value": ComponentInputHandle(
|
||||
clazz=TextToData,
|
||||
inputs={"text_data": ['{"key":[{"field1": 1, "field2": 2}]}'], "is_json": True},
|
||||
output_name="from_text",
|
||||
),
|
||||
"query": ".[0].key.[0].field2",
|
||||
},
|
||||
)
|
||||
assert outputs["filtered_data"] == [Data(text="2")]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_from_message():
|
||||
outputs = await run_single_component(
|
||||
ParseJSONDataComponent,
|
||||
inputs={
|
||||
"input_value": ComponentInputHandle(clazz=ChatInput, inputs={}, output_name="message"),
|
||||
"query": ".[0].key",
|
||||
},
|
||||
run_input="{'key':'value1'}",
|
||||
)
|
||||
assert outputs["filtered_data"] == [Data(text="value1")]
|
||||
|
||||
outputs = await run_single_component(
|
||||
ParseJSONDataComponent,
|
||||
inputs={
|
||||
"input_value": ComponentInputHandle(clazz=ChatInput, inputs={}, output_name="message"),
|
||||
"query": ".[0].key.[0].field2",
|
||||
},
|
||||
run_input='{"key":[{"field1": 1, "field2": 2}]}',
|
||||
)
|
||||
assert outputs["filtered_data"] == [Data(text="2")]
|
||||
25
src/backend/tests/integration/components/mock_components.py
Normal file
25
src/backend/tests/integration/components/mock_components.py
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
import json
|
||||
from typing import List
|
||||
|
||||
from langflow.custom import Component
|
||||
from langflow.inputs import StrInput, BoolInput
|
||||
from langflow.schema import Data
|
||||
from langflow.template import Output
|
||||
|
||||
|
||||
class TextToData(Component):
|
||||
inputs = [
|
||||
StrInput(name="text_data", is_list=True),
|
||||
BoolInput(name="is_json", info="Parse text_data as json and fill the data object."),
|
||||
]
|
||||
outputs = [
|
||||
Output(name="from_text", display_name="From text", method="create_data"),
|
||||
]
|
||||
|
||||
def _to_data(self, text: str) -> Data:
|
||||
if self.is_json:
|
||||
return Data(data=json.loads(text))
|
||||
return Data(text=text)
|
||||
|
||||
def create_data(self) -> List[Data]:
|
||||
return [self._to_data(t) for t in self.text_data]
|
||||
Loading…
Add table
Add a link
Reference in a new issue