From 229ba19a6699a5bdf9a1af161467f2979a49d54d Mon Sep 17 00:00:00 2001 From: Edwin Jose Date: Wed, 22 Jan 2025 13:30:01 -0500 Subject: [PATCH] feat: add WikiData Component and depeciates the WikiData API tool component (#5872) * update * [autofix.ci] apply automated fixes * Update test_wikidata_api.py * [autofix.ci] apply automated fixes * Update wikidata.py --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> --- .../langflow/components/tools/__init__.py | 2 + .../langflow/components/tools/wikidata.py | 86 ++++++++++ .../langflow/components/tools/wikidata_api.py | 139 ++++++++------- .../components/tools/test_wikidata_api.py | 161 ++++++++++-------- 4 files changed, 252 insertions(+), 136 deletions(-) create mode 100644 src/backend/base/langflow/components/tools/wikidata.py diff --git a/src/backend/base/langflow/components/tools/__init__.py b/src/backend/base/langflow/components/tools/__init__.py index 2ff6f3c49..78820ed0f 100644 --- a/src/backend/base/langflow/components/tools/__init__.py +++ b/src/backend/base/langflow/components/tools/__init__.py @@ -24,6 +24,7 @@ from .serp import SerpComponent from .serp_api import SerpAPIComponent from .tavily import TavilySearchComponent from .tavily_search import TavilySearchToolComponent +from .wikidata import WikidataComponent from .wikidata_api import WikidataAPIComponent from .wikipedia import WikipediaComponent from .wikipedia_api import WikipediaAPIComponent @@ -62,6 +63,7 @@ __all__ = [ "TavilySearchComponent", "TavilySearchToolComponent", "WikidataAPIComponent", + "WikidataComponent", "WikipediaAPIComponent", "WikipediaComponent", "WolframAlphaAPIComponent", diff --git a/src/backend/base/langflow/components/tools/wikidata.py b/src/backend/base/langflow/components/tools/wikidata.py new file mode 100644 index 000000000..5fd0709e0 --- /dev/null +++ b/src/backend/base/langflow/components/tools/wikidata.py @@ -0,0 +1,86 @@ +import httpx +from httpx import HTTPError +from langchain_core.tools import ToolException + +from langflow.custom import Component +from langflow.helpers.data import data_to_text +from langflow.io import MultilineInput, Output +from langflow.schema import Data +from langflow.schema.message import Message + + +class WikidataComponent(Component): + display_name = "Wikidata" + description = "Performs a search using the Wikidata API." + icon = "Wikipedia" + + inputs = [ + MultilineInput( + name="query", + display_name="Query", + info="The text query for similarity search on Wikidata.", + required=True, + tool_mode=True, + ), + ] + + outputs = [ + Output(display_name="Data", name="data", method="fetch_content"), + Output(display_name="Message", name="text", method="fetch_content_text"), + ] + + def fetch_content(self) -> list[Data]: + try: + # Define request parameters for Wikidata API + params = { + "action": "wbsearchentities", + "format": "json", + "search": self.query, + "language": "en", + } + + # Send request to Wikidata API + wikidata_api_url = "https://www.wikidata.org/w/api.php" + response = httpx.get(wikidata_api_url, params=params) + response.raise_for_status() + response_json = response.json() + + # Extract search results + results = response_json.get("search", []) + + if not results: + return [Data(data={"error": "No search results found for the given query."})] + + # Transform the API response into Data objects + data = [ + Data( + text=f"{result['label']}: {result.get('description', '')}", + data={ + "label": result["label"], + "id": result.get("id"), + "url": result.get("url"), + "description": result.get("description", ""), + "concepturi": result.get("concepturi"), + }, + ) + for result in results + ] + + self.status = data + except HTTPError as e: + error_message = f"HTTP Error in Wikidata Search API: {e!s}" + raise ToolException(error_message) from None + except KeyError as e: + error_message = f"Data parsing error in Wikidata API response: {e!s}" + raise ToolException(error_message) from None + except ValueError as e: + error_message = f"Value error in Wikidata API: {e!s}" + raise ToolException(error_message) from None + else: + return data + + def fetch_content_text(self) -> Message: + data = self.fetch_content() + result_string = data_to_text("{text}", data) + self.status = result_string + return Message(text=result_string) diff --git a/src/backend/base/langflow/components/tools/wikidata_api.py b/src/backend/base/langflow/components/tools/wikidata_api.py index ae3db0944..eb9d0a286 100644 --- a/src/backend/base/langflow/components/tools/wikidata_api.py +++ b/src/backend/base/langflow/components/tools/wikidata_api.py @@ -1,19 +1,63 @@ +from typing import Any + import httpx -from httpx import HTTPError -from langchain_core.tools import ToolException +from langchain_core.tools import StructuredTool, ToolException +from pydantic import BaseModel, Field -from langflow.custom import Component -from langflow.helpers.data import data_to_text -from langflow.io import MultilineInput, Output +from langflow.base.langchain_utilities.model import LCToolComponent +from langflow.field_typing import Tool +from langflow.inputs import MultilineInput from langflow.schema import Data -from langflow.schema.message import Message -class WikidataAPIComponent(Component): - display_name = "Wikidata API" +class WikidataSearchSchema(BaseModel): + query: str = Field(..., description="The search query for Wikidata") + + +class WikidataAPIWrapper(BaseModel): + """Wrapper around Wikidata API.""" + + wikidata_api_url: str = "https://www.wikidata.org/w/api.php" + + def results(self, query: str) -> list[dict[str, Any]]: + # Define request parameters for Wikidata API + params = { + "action": "wbsearchentities", + "format": "json", + "search": query, + "language": "en", + } + + # Send request to Wikidata API + response = httpx.get(self.wikidata_api_url, params=params) + response.raise_for_status() + response_json = response.json() + + # Extract and return search results + return response_json.get("search", []) + + def run(self, query: str) -> list[dict[str, Any]]: + try: + results = self.results(query) + if results: + return results + + error_message = "No search results found for the given query." + + raise ToolException(error_message) + + except Exception as e: + error_message = f"Error in Wikidata Search API: {e!s}" + + raise ToolException(error_message) from e + + +class WikidataAPIComponent(LCToolComponent): + display_name = "Wikidata API [Deprecated]" description = "Performs a search using the Wikidata API." name = "WikidataAPI" icon = "Wikipedia" + legacy = True inputs = [ MultilineInput( @@ -21,67 +65,38 @@ class WikidataAPIComponent(Component): display_name="Query", info="The text query for similarity search on Wikidata.", required=True, - tool_mode=True, ), ] - outputs = [ - Output(display_name="Data", name="data", method="fetch_content"), - Output(display_name="Message", name="text", method="fetch_content_text"), - ] + def build_tool(self) -> Tool: + wrapper = WikidataAPIWrapper() - def fetch_content(self) -> list[Data]: - try: - # Define request parameters for Wikidata API - params = { - "action": "wbsearchentities", - "format": "json", - "search": self.query, - "language": "en", - } + # Define the tool using StructuredTool and wrapper's run method + tool = StructuredTool.from_function( + name="wikidata_search_api", + description="Perform similarity search on Wikidata API", + func=wrapper.run, + args_schema=WikidataSearchSchema, + ) - # Send request to Wikidata API - wikidata_api_url = "https://www.wikidata.org/w/api.php" - response = httpx.get(wikidata_api_url, params=params) - response.raise_for_status() - response_json = response.json() + self.status = "Wikidata Search API Tool for Langchain" - # Extract search results - results = response_json.get("search", []) + return tool - if not results: - return [Data(data={"error": "No search results found for the given query."})] + def run_model(self) -> list[Data]: + tool = self.build_tool() - # Transform the API response into Data objects - data = [ - Data( - text=f"{result['label']}: {result.get('description', '')}", - data={ - "label": result["label"], - "id": result.get("id"), - "url": result.get("url"), - "description": result.get("description", ""), - "concepturi": result.get("concepturi"), - }, - ) - for result in results - ] + results = tool.run({"query": self.query}) - self.status = data - except HTTPError as e: - error_message = f"HTTP Error in Wikidata Search API: {e!s}" - raise ToolException(error_message) from None - except KeyError as e: - error_message = f"Data parsing error in Wikidata API response: {e!s}" - raise ToolException(error_message) from None - except ValueError as e: - error_message = f"Value error in Wikidata API: {e!s}" - raise ToolException(error_message) from None - else: - return data + # Transform the API response into Data objects + data = [ + Data( + text=result["label"], + metadata=result, + ) + for result in results + ] - def fetch_content_text(self) -> Message: - data = self.fetch_content() - result_string = data_to_text("{text}", data) - self.status = result_string - return Message(text=result_string) + self.status = data # type: ignore[assignment] + + return data diff --git a/src/backend/tests/unit/components/tools/test_wikidata_api.py b/src/backend/tests/unit/components/tools/test_wikidata_api.py index b7a2a1ab3..0e432b5ad 100644 --- a/src/backend/tests/unit/components/tools/test_wikidata_api.py +++ b/src/backend/tests/unit/components/tools/test_wikidata_api.py @@ -3,104 +3,117 @@ from unittest.mock import MagicMock, patch import httpx import pytest from langchain_core.tools import ToolException -from langflow.components.tools import WikidataAPIComponent +from langflow.components.tools import WikidataComponent from langflow.custom import Component from langflow.custom.utils import build_custom_component_template from langflow.schema import Data from langflow.schema.message import Message - -def test_wikidata_initialization(): - component = WikidataAPIComponent() - assert component.display_name == "Wikidata API" - assert component.description == "Performs a search using the Wikidata API." - assert component.icon == "Wikipedia" +# Import the base test class +from tests.base import ComponentTestBaseWithoutClient -def test_wikidata_template(): - wikidata = WikidataAPIComponent() - component = Component(_code=wikidata._code) - frontend_node, _ = build_custom_component_template(component) +class TestWikidataComponent(ComponentTestBaseWithoutClient): + @pytest.fixture + def component_class(self): + """Fixture to create a WikidataComponent instance.""" + return WikidataComponent - # Verify basic structure - assert isinstance(frontend_node, dict) + @pytest.fixture + def file_names_mapping(self): + """Return an empty list since this component doesn't have version-specific files.""" + return [] - # Verify inputs - assert "template" in frontend_node - input_names = [input_["name"] for input_ in frontend_node["template"].values() if isinstance(input_, dict)] - assert "query" in input_names + @pytest.fixture + def mock_query(self): + """Fixture to provide a default query.""" + return "test query" + def test_wikidata_initialization(self, component_class): + component = component_class() + assert component.display_name == "Wikidata" + assert component.description == "Performs a search using the Wikidata API." + assert component.icon == "Wikipedia" -@patch("langflow.components.tools.wikidata_api.httpx.get") -def test_fetch_content_success(mock_httpx): - component = WikidataAPIComponent() - component.query = "test query" + def test_wikidata_template(self, component_class): + component = component_class() + frontend_node, _ = build_custom_component_template(Component(_code=component._code)) - # Mock successful API response - mock_response = MagicMock() - mock_response.json.return_value = { - "search": [ - { - "label": "Test Label", - "id": "Q123", - "url": "https://test.com", - "description": "Test Description", - "concepturi": "https://test.com/concept", - } - ] - } - mock_httpx.return_value = mock_response + # Verify basic structure + assert isinstance(frontend_node, dict) - result = component.fetch_content() + # Verify inputs + assert "template" in frontend_node + input_names = [input_["name"] for input_ in frontend_node["template"].values() if isinstance(input_, dict)] + assert "query" in input_names - assert isinstance(result, list) - assert len(result) == 1 - assert result[0].text == "Test Label: Test Description" - assert result[0].data["label"] == "Test Label" - assert result[0].data["id"] == "Q123" + @patch("langflow.components.tools.wikidata_api.httpx.get") + def test_fetch_content_success(self, mock_httpx, component_class, mock_query): + component = component_class() + component.query = mock_query + # Mock successful API response + mock_response = MagicMock() + mock_response.json.return_value = { + "search": [ + { + "label": "Test Label", + "id": "Q123", + "url": "https://test.com", + "description": "Test Description", + "concepturi": "https://test.com/concept", + } + ] + } + mock_httpx.return_value = mock_response -@patch("langflow.components.tools.wikidata_api.httpx.get") -def test_fetch_content_empty_response(mock_httpx): - component = WikidataAPIComponent() - component.query = "test query" + result = component.fetch_content() - # Mock empty API response - mock_response = MagicMock() - mock_response.json.return_value = {"search": []} - mock_httpx.return_value = mock_response + assert isinstance(result, list) + assert len(result) == 1 + assert result[0].text == "Test Label: Test Description" + assert result[0].data["label"] == "Test Label" + assert result[0].data["id"] == "Q123" - result = component.fetch_content() + @patch("langflow.components.tools.wikidata_api.httpx.get") + def test_fetch_content_empty_response(self, mock_httpx, component_class, mock_query): + component = component_class() + component.query = mock_query - assert isinstance(result, list) - assert len(result) == 1 - assert "error" in result[0].data - assert "No search results found" in result[0].data["error"] + # Mock empty API response + mock_response = MagicMock() + mock_response.json.return_value = {"search": []} + mock_httpx.return_value = mock_response + result = component.fetch_content() -@patch("langflow.components.tools.wikidata_api.httpx.get") -def test_fetch_content_error_handling(mock_httpx): - component = WikidataAPIComponent() - component.query = "test query" + assert isinstance(result, list) + assert len(result) == 1 + assert "error" in result[0].data + assert "No search results found" in result[0].data["error"] - # Mock HTTP error - mock_httpx.side_effect = httpx.HTTPError("API Error") + @patch("langflow.components.tools.wikidata_api.httpx.get") + def test_fetch_content_error_handling(self, mock_httpx, component_class, mock_query): + component = component_class() + component.query = mock_query - with pytest.raises(ToolException): - component.fetch_content() + # Mock HTTP error + mock_httpx.side_effect = httpx.HTTPError("API Error") + with pytest.raises(ToolException): + component.fetch_content() -def test_fetch_content_text(): - component = WikidataAPIComponent() - component.fetch_content = MagicMock( - return_value=[ - Data(text="First result", data={"label": "Label 1"}), - Data(text="Second result", data={"label": "Label 2"}), - ] - ) + def test_fetch_content_text(self, component_class): + component = component_class() + component.fetch_content = MagicMock( + return_value=[ + Data(text="First result", data={"label": "Label 1"}), + Data(text="Second result", data={"label": "Label 2"}), + ] + ) - result = component.fetch_content_text() + result = component.fetch_content_text() - assert isinstance(result, Message) - assert "First result" in result.text - assert "Second result" in result.text + assert isinstance(result, Message) + assert "First result" in result.text + assert "Second result" in result.text