From 8c8be151e5509d579a4d1e66d8aba1294fae62df Mon Sep 17 00:00:00 2001 From: Cristhian Zanforlin Lousa Date: Tue, 17 Sep 2024 16:39:17 -0300 Subject: [PATCH] feat: add duckduckgo search component (#3798) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 🔧 (pyproject.toml): add duckduckgo-search dependency to the project ✨ (DuckDuckGoSearchRun.py): introduce DuckDuckGoSearchComponent for performing web searches using DuckDuckGo search engine ✨ (DuckDuckGo.jsx): add SVG icon for DuckDuckGo ✨ (index.tsx): create DuckDuckGoIcon component for displaying DuckDuckGo icon 🔧 (styleUtils.ts): import DuckDuckGoIcon for nodeIconsLucide in styleUtils * 📝 (DuckDuckGoSearchRun.py): remove unnecessary whitespace to improve code readability and consistency * ✨ (DuckDuckGoSearchRun.py): Add retry logic to DuckDuckGo search component for rate-limited requests 📝 (DuckDuckGoSearchRun.py): Update component description to reflect the addition of retry logic 📝 (DuckDuckGoSearchRun.py): Add new inputs for max_retries and initial_delay to configure retry behavior 📝 (DuckDuckGoSearchRun.py): Update search_response method to use search_with_retry method with retry logic 📝 (DuckDuckGoSearchRun.py): Update format_results method to handle formatted results 📝 (DuckDuckGoSearchRun.py): Add search_with_retry method to handle search with retry logic 📝 (DuckDuckGoSearchRun.py): Update search_response method to use search_with_retry method 📝 (DuckDuckGoSearchRun.py): Update search_response method to set status messages 📝 (DuckDuckGoSearchRun.py): Handle exceptions and set appropriate status messages in search_response method ✨ (duckduckgo.spec.ts): Add integration test for DuckDuckGo search component in frontend * 📝 (DuckDuckGoSearchRun.py): add newline at the end of the file to follow best practices and avoid potential issues with some tools that expect it * [autofix.ci] apply automated fixes * updating duckudckgo * [autofix.ci] apply automated fixes * ✨ (DuckDuckGoSearchRun.py): Refactor DuckDuckGoSearchComponent to use pydantic BaseModel for schema definition and improve code structure for better readability and maintainability. Add support for result limiting in search functionality. * 🔧 (DuckDuckGoSearchRun.py): Remove unnecessary import and update status message for DuckDuckGo Search Tool to improve clarity * [autofix.ci] apply automated fixes * ✨ (duckduckgo.spec.ts): update test selectors for duckduckgo search component to match changes in the frontend code and improve test reliability * 🐛 (linkComponent.spec.ts): fix an issue where the key combination for selecting all text was not working correctly on Mac devices. Updated the key combination to use the correct modifier key based on the user's operating system. * 📝 (frontend): mark is-unicode-supported package as extraneous in package-lock.json * rollback lock file --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> --- poetry.lock | 41 ++++++++++- pyproject.toml | 1 + .../components/tools/DuckDuckGoSearchRun.py | 69 +++++++++++++++++++ .../src/icons/DuckDuckGo/DuckDuckGo.jsx | 59 ++++++++++++++++ .../src/icons/DuckDuckGo/duckduckgo-icon.svg | 1 + src/frontend/src/icons/DuckDuckGo/index.tsx | 9 +++ src/frontend/src/utils/styleUtils.ts | 2 + .../tests/core/unit/linkComponent.spec.ts | 6 +- .../extended/integrations/duckduckgo.spec.ts | 67 ++++++++++++++++++ 9 files changed, 253 insertions(+), 2 deletions(-) create mode 100644 src/backend/base/langflow/components/tools/DuckDuckGoSearchRun.py create mode 100644 src/frontend/src/icons/DuckDuckGo/DuckDuckGo.jsx create mode 100644 src/frontend/src/icons/DuckDuckGo/duckduckgo-icon.svg create mode 100644 src/frontend/src/icons/DuckDuckGo/index.tsx create mode 100644 src/frontend/tests/extended/integrations/duckduckgo.spec.ts diff --git a/poetry.lock b/poetry.lock index 144c50167..a72ed562d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2079,6 +2079,25 @@ files = [ {file = "duckdb-1.1.0.tar.gz", hash = "sha256:b4d4c12b1f98732151bd31377753e0da1a20f6423016d2d097d2e31953ec7c23"}, ] +[[package]] +name = "duckduckgo-search" +version = "6.2.11" +description = "Search for words, documents, images, news, maps and text translation using the DuckDuckGo.com search engine." +optional = false +python-versions = ">=3.8" +files = [ + {file = "duckduckgo_search-6.2.11-py3-none-any.whl", hash = "sha256:6fb7069b79e8928f487001de6859034ade19201bdcd257ec198802430e374bfe"}, + {file = "duckduckgo_search-6.2.11.tar.gz", hash = "sha256:6b6ef1b552c5e67f23e252025d2504caf6f9fc14f70e86c6dd512200f386c673"}, +] + +[package.dependencies] +click = ">=8.1.7" +primp = ">=0.6.1" + +[package.extras] +dev = ["mypy (>=1.11.1)", "pytest (>=8.3.1)", "pytest-asyncio (>=0.23.8)", "ruff (>=0.6.1)"] +lxml = ["lxml (>=5.2.2)"] + [[package]] name = "e2b" version = "0.17.1" @@ -7248,6 +7267,26 @@ nodeenv = ">=0.11.1" pyyaml = ">=5.1" virtualenv = ">=20.10.0" +[[package]] +name = "primp" +version = "0.6.1" +description = "HTTP client that can impersonate web browsers, mimicking their headers and `TLS/JA3/JA4/HTTP2` fingerprints" +optional = false +python-versions = ">=3.8" +files = [ + {file = "primp-0.6.1-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:60cfe95e0bdf154b0f9036d38acaddc9aef02d6723ed125839b01449672d3946"}, + {file = "primp-0.6.1-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:e1e92433ecf32639f9e800bc3a5d58b03792bdec99421b7fb06500e2fae63c85"}, + {file = "primp-0.6.1-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e02353f13f07fb5a6f91df9e2f4d8ec9f41312de95088744dce1c9729a3865d"}, + {file = "primp-0.6.1-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:c5a2ccfdf488b17be225a529a31e2b22724b2e22fba8e1ae168a222f857c2dc0"}, + {file = "primp-0.6.1-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:f335c2ace907800a23bbb7bc6e15acc7fff659b86a2d5858817f6ed79cea07cf"}, + {file = "primp-0.6.1-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:5dc15bd9d47ded7bc356fcb5d8321972dcbeba18e7d3b7250e12bb7365447b2b"}, + {file = "primp-0.6.1-cp38-abi3-win_amd64.whl", hash = "sha256:eebf0412ebba4089547b16b97b765d83f69f1433d811bb02b02cdcdbca20f672"}, + {file = "primp-0.6.1.tar.gz", hash = "sha256:64b3c12e3d463a887518811c46f3ec37cca02e6af1ddf1287e548342de436301"}, +] + +[package.extras] +dev = ["certifi", "pytest (>=8.1.1)"] + [[package]] name = "prometheus-client" version = "0.20.0" @@ -12114,4 +12153,4 @@ local = ["ctransformers", "llama-cpp-python", "sentence-transformers"] [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.13" -content-hash = "64cce31a5fc6e0dbb35aebf34f34bf437469f213f9059be6ac31f7877a02cbea" +content-hash = "304221b658b52d3de30e52ebbc45b4730832fb087ce4f15b68c858d0a5af5efb" diff --git a/pyproject.toml b/pyproject.toml index 175c6ae35..d0ce8d588 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -112,6 +112,7 @@ clickhouse-connect = {version = "0.7.19", optional = true, extras = ["clickhouse langchain-unstructured = "^0.1.2" pydantic-settings = "2.4.0" ragstack-ai-knowledge-store = "^0.2.1" +duckduckgo-search = "^6.2.11" [tool.poetry.group.dev.dependencies] diff --git a/src/backend/base/langflow/components/tools/DuckDuckGoSearchRun.py b/src/backend/base/langflow/components/tools/DuckDuckGoSearchRun.py new file mode 100644 index 000000000..a2505724f --- /dev/null +++ b/src/backend/base/langflow/components/tools/DuckDuckGoSearchRun.py @@ -0,0 +1,69 @@ +from typing import Dict, Any, List +from pydantic import BaseModel, Field +from langchain_community.tools import DuckDuckGoSearchRun +from langflow.base.langchain_utilities.model import LCToolComponent +from langflow.inputs import MessageTextInput, IntInput +from langflow.schema import Data +from langflow.field_typing import Tool +from langchain.tools import StructuredTool + + +class DuckDuckGoSearchComponent(LCToolComponent): + display_name: str = "DuckDuckGo Search" + description: str = "Perform web searches using the DuckDuckGo search engine with result limiting" + name = "DuckDuckGoSearch" + documentation: str = "https://python.langchain.com/docs/integrations/tools/ddg" + icon: str = "DuckDuckGo" + inputs = [ + MessageTextInput( + name="input_value", + display_name="Search Query", + required=True, + ), + IntInput(name="max_results", display_name="Max Results", value=5, advanced=True), + IntInput(name="max_snippet_length", display_name="Max Snippet Length", value=100, advanced=True), + ] + + class DuckDuckGoSearchSchema(BaseModel): + query: str = Field(..., description="The search query") + max_results: int = Field(5, description="Maximum number of results to return") + max_snippet_length: int = Field(100, description="Maximum length of each result snippet") + + def _build_wrapper(self): + return DuckDuckGoSearchRun() + + def build_tool(self) -> Tool: + wrapper = self._build_wrapper() + + def search_func(query: str, max_results: int = 5, max_snippet_length: int = 100) -> List[Dict[str, Any]]: + full_results = wrapper.run(f"{query} (site:*)") + result_list = full_results.split("\n")[:max_results] + limited_results = [] + for result in result_list: + limited_result = { + "snippet": result[:max_snippet_length], + } + limited_results.append(limited_result) + return limited_results + + tool = StructuredTool.from_function( + name="duckduckgo_search", + description="Search for recent results using DuckDuckGo with result limiting", + func=search_func, + args_schema=self.DuckDuckGoSearchSchema, + ) + self.status = "DuckDuckGo Search Tool created" + return tool + + def run_model(self) -> List[Data]: + tool = self.build_tool() + results = tool.run( + { + "query": self.input_value, + "max_results": self.max_results, + "max_snippet_length": self.max_snippet_length, + } + ) + data_list = [Data(data=result, text=result.get("snippet", "")) for result in results] + self.status = data_list + return data_list diff --git a/src/frontend/src/icons/DuckDuckGo/DuckDuckGo.jsx b/src/frontend/src/icons/DuckDuckGo/DuckDuckGo.jsx new file mode 100644 index 000000000..a2dc61b91 --- /dev/null +++ b/src/frontend/src/icons/DuckDuckGo/DuckDuckGo.jsx @@ -0,0 +1,59 @@ +const SvgDuckDuckGo = ({ ...props }) => ( + + + + + {"duckduckgo"} + + + + + + + + + +); +export default SvgDuckDuckGo; diff --git a/src/frontend/src/icons/DuckDuckGo/duckduckgo-icon.svg b/src/frontend/src/icons/DuckDuckGo/duckduckgo-icon.svg new file mode 100644 index 000000000..8215a918f --- /dev/null +++ b/src/frontend/src/icons/DuckDuckGo/duckduckgo-icon.svg @@ -0,0 +1 @@ +duckduckgo \ No newline at end of file diff --git a/src/frontend/src/icons/DuckDuckGo/index.tsx b/src/frontend/src/icons/DuckDuckGo/index.tsx new file mode 100644 index 000000000..9bca0851f --- /dev/null +++ b/src/frontend/src/icons/DuckDuckGo/index.tsx @@ -0,0 +1,9 @@ +import React, { forwardRef } from "react"; +import SvgDuckDuckGo from "./DuckDuckGo"; + +export const DuckDuckGoIcon = forwardRef< + SVGSVGElement, + React.PropsWithChildren<{ color?: string }> +>((props, ref) => { + return ; +}); diff --git a/src/frontend/src/utils/styleUtils.ts b/src/frontend/src/utils/styleUtils.ts index fad9dd6df..bda6dd279 100644 --- a/src/frontend/src/utils/styleUtils.ts +++ b/src/frontend/src/utils/styleUtils.ts @@ -1,4 +1,5 @@ import { AIMLIcon } from "@/icons/AIML"; +import { DuckDuckGoIcon } from "@/icons/DuckDuckGo"; import Perplexity from "@/icons/Perplexity/Perplexity"; import { UnstructuredIcon } from "@/icons/Unstructured"; import { AthenaIcon } from "@/icons/athena/index"; @@ -627,4 +628,5 @@ export const nodeIconsLucide: iconsType = { OptionIcon: OptionIcon, Option: OptionIcon, Perplexity, + DuckDuckGo: DuckDuckGoIcon, }; diff --git a/src/frontend/tests/core/unit/linkComponent.spec.ts b/src/frontend/tests/core/unit/linkComponent.spec.ts index a5a00bd55..11c59897f 100644 --- a/src/frontend/tests/core/unit/linkComponent.spec.ts +++ b/src/frontend/tests/core/unit/linkComponent.spec.ts @@ -31,6 +31,10 @@ test("user should interact with link component", async ({ context, page }) => { const userAgentInfo = uaParser(getUA); let control = "Control"; + if (userAgentInfo.os.name.includes("Mac")) { + control = "Meta"; + } + await page.waitForSelector('[data-testid="blank-flow"]', { timeout: 30000, }); @@ -80,7 +84,7 @@ test("user should interact with link component", async ({ context, page }) => { LinkInput(name="link", display_name="BUTTON", value="https://www.datastax.com", text="Click me"),`, ); - await page.locator("textarea").last().press(`Meta+a`); + await page.locator("textarea").last().press(`${control}+a`); await page.keyboard.press("Backspace"); await page.locator("textarea").last().fill(cleanCode); await page.locator('//*[@id="checkAndSaveBtn"]').click(); diff --git a/src/frontend/tests/extended/integrations/duckduckgo.spec.ts b/src/frontend/tests/extended/integrations/duckduckgo.spec.ts new file mode 100644 index 000000000..4f59b7076 --- /dev/null +++ b/src/frontend/tests/extended/integrations/duckduckgo.spec.ts @@ -0,0 +1,67 @@ +import { expect, test } from "@playwright/test"; + +test("user should be able to use duckduckgo search component", async ({ + page, +}) => { + await page.goto("/"); + await page.waitForSelector('[data-testid="mainpage_title"]', { + timeout: 30000, + }); + + await page.waitForSelector('[id="new-project-btn"]', { + timeout: 30000, + }); + + let modalCount = 0; + try { + const modalTitleElement = await page?.getByTestId("modal-title"); + if (modalTitleElement) { + modalCount = await modalTitleElement.count(); + } + } catch (error) { + modalCount = 0; + } + + while (modalCount === 0) { + await page.getByText("New Project", { exact: true }).click(); + await page.waitForTimeout(3000); + modalCount = await page.getByTestId("modal-title")?.count(); + } + + await page.getByTestId("blank-flow").click(); + await page.waitForSelector('[data-testid="extended-disclosure"]', { + timeout: 30000, + }); + await page.getByTestId("extended-disclosure").click(); + await page.getByPlaceholder("Search").click(); + await page.getByPlaceholder("Search").fill("duck"); + + await page.waitForTimeout(1000); + + await page + .locator('//*[@id="toolsDuckDuckGo Search"]') + .dragTo(page.locator('//*[@id="react-flow-id"]')); + await page.mouse.up(); + await page.mouse.down(); + await page.getByTitle("fit view").click(); + + await page + .getByTestId("popover-anchor-input-input_value") + .fill("what is langflow?"); + + await page.getByTestId("button_run_duckduckgo search").click(); + + await page.getByTitle("fit view").click(); + + await page.waitForSelector("text=built successfully", { timeout: 30000 }); + + await page.waitForTimeout(1000); + + await page.getByTestId("output-inspection-data").first().click(); + + await page.getByRole("gridcell").first().click(); + + const searchResults = await page.getByPlaceholder("Empty").inputValue(); + expect(searchResults.length).toBeGreaterThan(10); + expect(searchResults.toLowerCase()).toContain("langflow"); +});