feat: Add Tavily search API parameters and Tavily Extract API component (#7644)

* Add Tavily search parameters and Extract API component

* [autofix.ci] apply automated fixes

* Fix style issues flagged by Ruff

* fix Ruff Style Check issues

* [autofix.ci] apply automated fixes

* Fix linting issue reported by Ruff

* fix inlcude and exclude domains

* Fix linting issue reported by Ruff

* fix: update descriptions and input types across multiple starter project JSON files

- Changed "Tavily AI" to "Tavily Search" in descriptions for consistency.
- Updated input types for several API keys to be empty arrays instead of containing "Message".
- Added new fields such as "chunks_per_source", "days", "exclude_domains", "include_domains", and "include_raw_content" to enhance functionality in various projects.
- Adjusted the "description" field in the "Youtube Analysis" project for clarity.

---------

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Edwin Jose <edwin.jose@datastax.com>
Co-authored-by: Gabriel Luiz Freitas Almeida <gabriel@langflow.org>
This commit is contained in:
May Ramati Kroitero 2025-05-07 01:38:14 +03:00 committed by GitHub
commit 8cabedae55
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
20 changed files with 1255 additions and 357 deletions

View file

@ -22,8 +22,9 @@ from .search_api import SearchAPIComponent
from .searxng import SearXNGToolComponent
from .serp import SerpComponent
from .serp_api import SerpAPIComponent
from .tavily import TavilySearchComponent
from .tavily_search import TavilySearchToolComponent
from .tavily_extract import TavilyExtractComponent
from .tavily_search import TavilySearchComponent
from .tavily_search_tool import TavilySearchToolComponent
from .wikidata import WikidataComponent
from .wikidata_api import WikidataAPIComponent
from .wikipedia import WikipediaComponent
@ -60,6 +61,7 @@ __all__ = [
"SearchComponent",
"SerpAPIComponent",
"SerpComponent",
"TavilyExtractComponent",
"TavilySearchComponent",
"TavilySearchToolComponent",
"WikidataAPIComponent",

View file

@ -1,148 +0,0 @@
import httpx
from loguru import logger
from langflow.custom import Component
from langflow.helpers.data import data_to_text
from langflow.io import BoolInput, DropdownInput, IntInput, MessageTextInput, Output, SecretStrInput
from langflow.schema import Data
from langflow.schema.message import Message
class TavilySearchComponent(Component):
display_name = "Tavily AI Search"
description = """**Tavily AI** is a search engine optimized for LLMs and RAG, \
aimed at efficient, quick, and persistent search results."""
icon = "TavilyIcon"
inputs = [
SecretStrInput(
name="api_key",
display_name="Tavily API Key",
required=True,
info="Your Tavily API Key.",
),
MessageTextInput(
name="query",
display_name="Search Query",
info="The search query you want to execute with Tavily.",
tool_mode=True,
),
DropdownInput(
name="search_depth",
display_name="Search Depth",
info="The depth of the search.",
options=["basic", "advanced"],
value="advanced",
advanced=True,
),
DropdownInput(
name="topic",
display_name="Search Topic",
info="The category of the search.",
options=["general", "news"],
value="general",
advanced=True,
),
DropdownInput(
name="time_range",
display_name="Time Range",
info="The time range back from the current date to include in the search results.",
options=["day", "week", "month", "year"],
value=None,
advanced=True,
combobox=True,
),
IntInput(
name="max_results",
display_name="Max Results",
info="The maximum number of search results to return.",
value=5,
advanced=True,
),
BoolInput(
name="include_images",
display_name="Include Images",
info="Include a list of query-related images in the response.",
value=True,
advanced=True,
),
BoolInput(
name="include_answer",
display_name="Include Answer",
info="Include a short answer to original query.",
value=True,
advanced=True,
),
]
outputs = [
Output(display_name="Data", name="data", method="fetch_content"),
Output(display_name="Text", name="text", method="fetch_content_text"),
]
def fetch_content(self) -> list[Data]:
try:
url = "https://api.tavily.com/search"
headers = {
"content-type": "application/json",
"accept": "application/json",
}
payload = {
"api_key": self.api_key,
"query": self.query,
"search_depth": self.search_depth,
"topic": self.topic,
"max_results": self.max_results,
"include_images": self.include_images,
"include_answer": self.include_answer,
"time_range": self.time_range,
}
with httpx.Client() as client:
response = client.post(url, json=payload, headers=headers)
response.raise_for_status()
search_results = response.json()
data_results = []
if self.include_answer and search_results.get("answer"):
data_results.append(Data(text=search_results["answer"]))
for result in search_results.get("results", []):
content = result.get("content", "")
data_results.append(
Data(
text=content,
data={
"title": result.get("title"),
"url": result.get("url"),
"content": content,
"score": result.get("score"),
},
)
)
if self.include_images and search_results.get("images"):
data_results.append(Data(text="Images found", data={"images": search_results["images"]}))
except httpx.HTTPStatusError as exc:
error_message = f"HTTP error occurred: {exc.response.status_code} - {exc.response.text}"
logger.error(error_message)
return [Data(text=error_message, data={"error": error_message})]
except httpx.RequestError as exc:
error_message = f"Request error occurred: {exc}"
logger.error(error_message)
return [Data(text=error_message, data={"error": error_message})]
except ValueError as exc:
error_message = f"Invalid response format: {exc}"
logger.error(error_message)
return [Data(text=error_message, data={"error": error_message})]
else:
self.status = data_results
return data_results
def fetch_content_text(self) -> Message:
data = self.fetch_content()
result_string = data_to_text("{text}", data)
self.status = result_string
return Message(text=result_string)

View file

@ -0,0 +1,119 @@
import httpx
from loguru import logger
from langflow.custom import Component
from langflow.helpers.data import data_to_text
from langflow.io import BoolInput, DropdownInput, MessageTextInput, Output, SecretStrInput
from langflow.schema import Data
from langflow.schema.message import Message
class TavilyExtractComponent(Component):
"""Separate component specifically for Tavily Extract functionality."""
display_name = "Tavily Extract API"
description = """**Tavily Extract** extract raw content from URLs."""
icon = "TavilyIcon"
inputs = [
SecretStrInput(
name="api_key",
display_name="Tavily API Key",
required=True,
info="Your Tavily API Key.",
),
MessageTextInput(
name="urls",
display_name="URLs",
info="Comma-separated list of URLs to extract content from.",
required=True,
),
DropdownInput(
name="extract_depth",
display_name="Extract Depth",
info="The depth of the extraction process.",
options=["basic", "advanced"],
value="basic",
advanced=True,
),
BoolInput(
name="include_images",
display_name="Include Images",
info="Include a list of images extracted from the URLs.",
value=False,
advanced=True,
),
]
outputs = [
Output(display_name="Data", name="data", method="fetch_content"),
Output(display_name="Text", name="text", method="fetch_content_text"),
]
def fetch_content(self) -> list[Data]:
"""Fetches and processes extracted content into a list of Data objects."""
try:
# Split URLs by comma and clean them
urls = [url.strip() for url in (self.urls or "").split(",") if url.strip()]
if not urls:
error_message = "No valid URLs provided"
logger.error(error_message)
return [Data(text=error_message, data={"error": error_message})]
url = "https://api.tavily.com/extract"
headers = {
"content-type": "application/json",
"accept": "application/json",
"Authorization": f"Bearer {self.api_key}",
}
payload = {
"urls": urls,
"extract_depth": self.extract_depth,
"include_images": self.include_images,
}
with httpx.Client(timeout=90.0) as client:
response = client.post(url, json=payload, headers=headers)
response.raise_for_status()
except httpx.TimeoutException as exc:
error_message = f"Request timed out (90s): {exc}"
logger.error(error_message)
return [Data(text=error_message, data={"error": error_message})]
except httpx.HTTPStatusError as exc:
error_message = f"HTTP error occurred: {exc.response.status_code} - {exc.response.text}"
logger.error(error_message)
return [Data(text=error_message, data={"error": error_message})]
except (ValueError, KeyError, AttributeError, httpx.RequestError) as exc:
error_message = f"Data processing error: {exc}"
logger.error(error_message)
return [Data(text=error_message, data={"error": error_message})]
else:
extract_results = response.json()
data_results = []
# Process successful extractions
for result in extract_results.get("results", []):
raw_content = result.get("raw_content", "")
images = result.get("images", [])
result_data = {"url": result.get("url"), "raw_content": raw_content, "images": images}
data_results.append(Data(text=raw_content, data=result_data))
# Process failed extractions
if extract_results.get("failed_results"):
data_results.append(
Data(
text="Failed extractions",
data={"failed_results": extract_results["failed_results"]},
)
)
self.status = data_results
return data_results
def fetch_content_text(self) -> Message:
# This method should still work as it expects a list from fetch_content
data = self.fetch_content()
result_string = data_to_text("{text}", data)
self.status = result_string
return Message(text=result_string)

View file

@ -1,47 +1,18 @@
from enum import Enum
import httpx
from langchain.tools import StructuredTool
from langchain_core.tools import ToolException
from loguru import logger
from pydantic import BaseModel, Field
from langflow.base.langchain_utilities.model import LCToolComponent
from langflow.field_typing import Tool
from langflow.inputs import BoolInput, DropdownInput, IntInput, MessageTextInput, SecretStrInput
from langflow.custom import Component
from langflow.helpers.data import data_to_text
from langflow.io import BoolInput, DropdownInput, IntInput, MessageTextInput, Output, SecretStrInput
from langflow.schema import Data
from langflow.schema.message import Message
class TavilySearchDepth(Enum):
BASIC = "basic"
ADVANCED = "advanced"
class TavilySearchTopic(Enum):
GENERAL = "general"
NEWS = "news"
class TavilySearchSchema(BaseModel):
query: str = Field(..., description="The search query you want to execute with Tavily.")
search_depth: TavilySearchDepth = Field(TavilySearchDepth.BASIC, description="The depth of the search.")
topic: TavilySearchTopic = Field(TavilySearchTopic.GENERAL, description="The category of the search.")
max_results: int = Field(5, description="The maximum number of search results to return.")
include_images: bool = Field(default=False, description="Include a list of query-related images in the response.")
include_answer: bool = Field(default=False, description="Include a short answer to original query.")
class TavilySearchToolComponent(LCToolComponent):
display_name = "Tavily AI Search [DEPRECATED]"
description = """**Tavily AI** is a search engine optimized for LLMs and RAG, \
aimed at efficient, quick, and persistent search results. It can be used independently or as an agent tool.
Note: Check 'Advanced' for all options.
"""
class TavilySearchComponent(Component):
display_name = "Tavily Search API"
description = """**Tavily Search** is a search engine optimized for LLMs and RAG, \
aimed at efficient, quick, and persistent search results."""
icon = "TavilyIcon"
name = "TavilyAISearch"
documentation = "https://docs.tavily.com/"
legacy = True
inputs = [
SecretStrInput(
@ -54,21 +25,36 @@ Note: Check 'Advanced' for all options.
name="query",
display_name="Search Query",
info="The search query you want to execute with Tavily.",
tool_mode=True,
),
DropdownInput(
name="search_depth",
display_name="Search Depth",
info="The depth of the search.",
options=list(TavilySearchDepth),
value=TavilySearchDepth.ADVANCED,
options=["basic", "advanced"],
value="advanced",
advanced=True,
),
IntInput(
name="chunks_per_source",
display_name="Chunks Per Source",
info=("The number of content chunks to retrieve from each source (1-3). Only works with advanced search."),
value=3,
advanced=True,
),
DropdownInput(
name="topic",
display_name="Search Topic",
info="The category of the search.",
options=list(TavilySearchTopic),
value=TavilySearchTopic.GENERAL,
options=["general", "news"],
value="general",
advanced=True,
),
IntInput(
name="days",
display_name="Days",
info="Number of days back from current date to include. Only available with news topic.",
value=7,
advanced=True,
),
IntInput(
@ -78,13 +64,6 @@ Note: Check 'Advanced' for all options.
value=5,
advanced=True,
),
BoolInput(
name="include_images",
display_name="Include Images",
info="Include a list of query-related images in the response.",
value=True,
advanced=True,
),
BoolInput(
name="include_answer",
display_name="Include Answer",
@ -92,115 +71,145 @@ Note: Check 'Advanced' for all options.
value=True,
advanced=True,
),
DropdownInput(
name="time_range",
display_name="Time Range",
info="The time range back from the current date to filter results.",
options=["day", "week", "month", "year"],
value=None, # Default to None to make it optional
advanced=True,
),
BoolInput(
name="include_images",
display_name="Include Images",
info="Include a list of query-related images in the response.",
value=True,
advanced=True,
),
MessageTextInput(
name="include_domains",
display_name="Include Domains",
info="Comma-separated list of domains to include in the search results.",
advanced=True,
),
MessageTextInput(
name="exclude_domains",
display_name="Exclude Domains",
info="Comma-separated list of domains to exclude from the search results.",
advanced=True,
),
BoolInput(
name="include_raw_content",
display_name="Include Raw Content",
info="Include the cleaned and parsed HTML content of each search result.",
value=False,
advanced=True,
),
]
def run_model(self) -> list[Data]:
# Convert string values to enum instances with validation
outputs = [
Output(display_name="Data", name="data", method="fetch_content"),
Output(display_name="Text", name="text", method="fetch_content_text"),
]
def fetch_content(self) -> list[Data]:
try:
search_depth_enum = (
self.search_depth
if isinstance(self.search_depth, TavilySearchDepth)
else TavilySearchDepth(str(self.search_depth).lower())
)
except ValueError as e:
error_message = f"Invalid search depth value: {e!s}"
self.status = error_message
return [Data(data={"error": error_message})]
# Only process domains if they're provided
include_domains = None
exclude_domains = None
try:
topic_enum = (
self.topic if isinstance(self.topic, TavilySearchTopic) else TavilySearchTopic(str(self.topic).lower())
)
except ValueError as e:
error_message = f"Invalid topic value: {e!s}"
self.status = error_message
return [Data(data={"error": error_message})]
if self.include_domains:
include_domains = [domain.strip() for domain in self.include_domains.split(",") if domain.strip()]
return self._tavily_search(
self.query,
search_depth=search_depth_enum,
topic=topic_enum,
max_results=self.max_results,
include_images=self.include_images,
include_answer=self.include_answer,
)
if self.exclude_domains:
exclude_domains = [domain.strip() for domain in self.exclude_domains.split(",") if domain.strip()]
def build_tool(self) -> Tool:
return StructuredTool.from_function(
name="tavily_search",
description="Perform a web search using the Tavily API.",
func=self._tavily_search,
args_schema=TavilySearchSchema,
)
def _tavily_search(
self,
query: str,
*,
search_depth: TavilySearchDepth = TavilySearchDepth.BASIC,
topic: TavilySearchTopic = TavilySearchTopic.GENERAL,
max_results: int = 5,
include_images: bool = False,
include_answer: bool = False,
) -> list[Data]:
# Validate enum values
if not isinstance(search_depth, TavilySearchDepth):
msg = f"Invalid search_depth value: {search_depth}"
raise TypeError(msg)
if not isinstance(topic, TavilySearchTopic):
msg = f"Invalid topic value: {topic}"
raise TypeError(msg)
try:
url = "https://api.tavily.com/search"
headers = {
"content-type": "application/json",
"accept": "application/json",
}
payload = {
"api_key": self.api_key,
"query": query,
"search_depth": search_depth.value,
"topic": topic.value,
"max_results": max_results,
"include_images": include_images,
"include_answer": include_answer,
"query": self.query,
"search_depth": self.search_depth,
"topic": self.topic,
"max_results": self.max_results,
"include_images": self.include_images,
"include_answer": self.include_answer,
"include_raw_content": self.include_raw_content,
"days": self.days,
"time_range": self.time_range,
}
with httpx.Client() as client:
# Only add domains to payload if they exist and have values
if include_domains:
payload["include_domains"] = include_domains
if exclude_domains:
payload["exclude_domains"] = exclude_domains
# Add conditional parameters only if they should be included
if self.search_depth == "advanced" and self.chunks_per_source:
payload["chunks_per_source"] = self.chunks_per_source
if self.topic == "news" and self.days:
payload["days"] = int(self.days) # Ensure days is an integer
# Add time_range if it's set
if hasattr(self, "time_range") and self.time_range:
payload["time_range"] = self.time_range
# Add timeout handling
with httpx.Client(timeout=90.0) as client:
response = client.post(url, json=payload, headers=headers)
response.raise_for_status()
search_results = response.json()
data_results = [
Data(
data={
"title": result.get("title"),
"url": result.get("url"),
"content": result.get("content"),
"score": result.get("score"),
}
)
for result in search_results.get("results", [])
]
data_results = []
if include_answer and search_results.get("answer"):
data_results.insert(0, Data(data={"answer": search_results["answer"]}))
if self.include_answer and search_results.get("answer"):
data_results.append(Data(text=search_results["answer"]))
if include_images and search_results.get("images"):
data_results.append(Data(data={"images": search_results["images"]}))
for result in search_results.get("results", []):
content = result.get("content", "")
result_data = {
"title": result.get("title"),
"url": result.get("url"),
"content": content,
"score": result.get("score"),
}
if self.include_raw_content:
result_data["raw_content"] = result.get("raw_content")
self.status = data_results # type: ignore[assignment]
data_results.append(Data(text=content, data=result_data))
except httpx.HTTPStatusError as e:
error_message = f"HTTP error: {e.response.status_code} - {e.response.text}"
logger.debug(error_message)
self.status = error_message
raise ToolException(error_message) from e
except Exception as e:
error_message = f"Unexpected error: {e}"
logger.opt(exception=True).debug("Error running Tavily Search")
self.status = error_message
raise ToolException(error_message) from e
return data_results
if self.include_images and search_results.get("images"):
data_results.append(Data(text="Images found", data={"images": search_results["images"]}))
except httpx.TimeoutException:
error_message = "Request timed out (90s). Please try again or adjust parameters."
logger.error(error_message)
return [Data(text=error_message, data={"error": error_message})]
except httpx.HTTPStatusError as exc:
error_message = f"HTTP error occurred: {exc.response.status_code} - {exc.response.text}"
logger.error(error_message)
return [Data(text=error_message, data={"error": error_message})]
except httpx.RequestError as exc:
error_message = f"Request error occurred: {exc}"
logger.error(error_message)
return [Data(text=error_message, data={"error": error_message})]
except ValueError as exc:
error_message = f"Invalid response format: {exc}"
logger.error(error_message)
return [Data(text=error_message, data={"error": error_message})]
else:
self.status = data_results
return data_results
def fetch_content_text(self) -> Message:
data = self.fetch_content()
result_string = data_to_text("{text}", data)
self.status = result_string
return Message(text=result_string)

View file

@ -0,0 +1,344 @@
from enum import Enum
import httpx
from langchain.tools import StructuredTool
from langchain_core.tools import ToolException
from loguru import logger
from pydantic import BaseModel, Field
from langflow.base.langchain_utilities.model import LCToolComponent
from langflow.field_typing import Tool
from langflow.inputs import BoolInput, DropdownInput, IntInput, MessageTextInput, SecretStrInput
from langflow.schema import Data
# Add at the top with other constants
MAX_CHUNKS_PER_SOURCE = 3
class TavilySearchDepth(Enum):
BASIC = "basic"
ADVANCED = "advanced"
class TavilySearchTopic(Enum):
GENERAL = "general"
NEWS = "news"
class TavilySearchTimeRange(Enum):
DAY = "day"
WEEK = "week"
MONTH = "month"
YEAR = "year"
class TavilySearchSchema(BaseModel):
query: str = Field(..., description="The search query you want to execute with Tavily.")
search_depth: TavilySearchDepth = Field(TavilySearchDepth.BASIC, description="The depth of the search.")
topic: TavilySearchTopic = Field(TavilySearchTopic.GENERAL, description="The category of the search.")
max_results: int = Field(5, description="The maximum number of search results to return.")
include_images: bool = Field(default=False, description="Include a list of query-related images in the response.")
include_answer: bool = Field(default=False, description="Include a short answer to original query.")
chunks_per_source: int = Field(
default=MAX_CHUNKS_PER_SOURCE,
description=(
"The number of content chunks to retrieve from each source (max 500 chars each). Only for advanced search."
),
ge=1,
le=MAX_CHUNKS_PER_SOURCE,
)
include_domains: list[str] = Field(
default=[],
description="A list of domains to specifically include in the search results.",
)
exclude_domains: list[str] = Field(
default=[],
description="A list of domains to specifically exclude from the search results.",
)
include_raw_content: bool = Field(
default=False,
description="Include the cleaned and parsed HTML content of each search result.",
)
days: int = Field(
default=7,
description="Number of days back from the current date to include. Only available if topic is news.",
ge=1,
)
time_range: TavilySearchTimeRange | None = Field(
default=None,
description="The time range back from the current date to filter results.",
)
class TavilySearchToolComponent(LCToolComponent):
display_name = "Tavily Search API"
description = """**Tavily Search API** is a search engine optimized for LLMs and RAG, \
aimed at efficient, quick, and persistent search results. It can be used independently or as an agent tool.
Note: Check 'Advanced' for all options.
"""
icon = "TavilyIcon"
name = "TavilyAISearch"
documentation = "https://docs.tavily.com/"
legacy = True
inputs = [
SecretStrInput(
name="api_key",
display_name="Tavily API Key",
required=True,
info="Your Tavily API Key.",
),
MessageTextInput(
name="query",
display_name="Search Query",
info="The search query you want to execute with Tavily.",
),
DropdownInput(
name="search_depth",
display_name="Search Depth",
info="The depth of the search.",
options=list(TavilySearchDepth),
value=TavilySearchDepth.ADVANCED,
advanced=True,
),
IntInput(
name="chunks_per_source",
display_name="Chunks Per Source",
info=("The number of content chunks to retrieve from each source (1-3). Only works with advanced search."),
value=MAX_CHUNKS_PER_SOURCE,
advanced=True,
),
DropdownInput(
name="topic",
display_name="Search Topic",
info="The category of the search.",
options=list(TavilySearchTopic),
value=TavilySearchTopic.GENERAL,
advanced=True,
),
IntInput(
name="days",
display_name="Days",
info="Number of days back from current date to include. Only available with news topic.",
value=7,
advanced=True,
),
IntInput(
name="max_results",
display_name="Max Results",
info="The maximum number of search results to return.",
value=5,
advanced=True,
),
BoolInput(
name="include_answer",
display_name="Include Answer",
info="Include a short answer to original query.",
value=True,
advanced=True,
),
DropdownInput(
name="time_range",
display_name="Time Range",
info="The time range back from the current date to filter results.",
options=list(TavilySearchTimeRange),
value=None,
advanced=True,
),
BoolInput(
name="include_images",
display_name="Include Images",
info="Include a list of query-related images in the response.",
value=True,
advanced=True,
),
MessageTextInput(
name="include_domains",
display_name="Include Domains",
info="Comma-separated list of domains to include in the search results.",
advanced=True,
),
MessageTextInput(
name="exclude_domains",
display_name="Exclude Domains",
info="Comma-separated list of domains to exclude from the search results.",
advanced=True,
),
BoolInput(
name="include_raw_content",
display_name="Include Raw Content",
info="Include the cleaned and parsed HTML content of each search result.",
value=False,
advanced=True,
),
]
def run_model(self) -> list[Data]:
# Convert string values to enum instances with validation
try:
search_depth_enum = (
self.search_depth
if isinstance(self.search_depth, TavilySearchDepth)
else TavilySearchDepth(str(self.search_depth).lower())
)
except ValueError as e:
error_message = f"Invalid search depth value: {e!s}"
self.status = error_message
return [Data(data={"error": error_message})]
try:
topic_enum = (
self.topic if isinstance(self.topic, TavilySearchTopic) else TavilySearchTopic(str(self.topic).lower())
)
except ValueError as e:
error_message = f"Invalid topic value: {e!s}"
self.status = error_message
return [Data(data={"error": error_message})]
try:
time_range_enum = (
self.time_range
if isinstance(self.time_range, TavilySearchTimeRange)
else TavilySearchTimeRange(str(self.time_range).lower())
if self.time_range
else None
)
except ValueError as e:
error_message = f"Invalid time range value: {e!s}"
self.status = error_message
return [Data(data={"error": error_message})]
# Initialize domain variables as None
include_domains = None
exclude_domains = None
# Only process domains if they're provided
if self.include_domains:
include_domains = [domain.strip() for domain in self.include_domains.split(",") if domain.strip()]
if self.exclude_domains:
exclude_domains = [domain.strip() for domain in self.exclude_domains.split(",") if domain.strip()]
return self._tavily_search(
self.query,
search_depth=search_depth_enum,
topic=topic_enum,
max_results=self.max_results,
include_images=self.include_images,
include_answer=self.include_answer,
chunks_per_source=self.chunks_per_source,
include_domains=include_domains,
exclude_domains=exclude_domains,
include_raw_content=self.include_raw_content,
days=self.days,
time_range=time_range_enum,
)
def build_tool(self) -> Tool:
return StructuredTool.from_function(
name="tavily_search",
description="Perform a web search using the Tavily API.",
func=self._tavily_search,
args_schema=TavilySearchSchema,
)
def _tavily_search(
self,
query: str,
*,
search_depth: TavilySearchDepth = TavilySearchDepth.BASIC,
topic: TavilySearchTopic = TavilySearchTopic.GENERAL,
max_results: int = 5,
include_images: bool = False,
include_answer: bool = False,
chunks_per_source: int = MAX_CHUNKS_PER_SOURCE,
include_domains: list[str] | None = None,
exclude_domains: list[str] | None = None,
include_raw_content: bool = False,
days: int = 7,
time_range: TavilySearchTimeRange | None = None,
) -> list[Data]:
# Validate enum values
if not isinstance(search_depth, TavilySearchDepth):
msg = f"Invalid search_depth value: {search_depth}"
raise TypeError(msg)
if not isinstance(topic, TavilySearchTopic):
msg = f"Invalid topic value: {topic}"
raise TypeError(msg)
# Validate chunks_per_source range
if not 1 <= chunks_per_source <= MAX_CHUNKS_PER_SOURCE:
msg = f"chunks_per_source must be between 1 and {MAX_CHUNKS_PER_SOURCE}, got {chunks_per_source}"
raise ValueError(msg)
# Validate days is positive
if days < 1:
msg = f"days must be greater than or equal to 1, got {days}"
raise ValueError(msg)
try:
url = "https://api.tavily.com/search"
headers = {
"content-type": "application/json",
"accept": "application/json",
}
payload = {
"api_key": self.api_key,
"query": query,
"search_depth": search_depth.value,
"topic": topic.value,
"max_results": max_results,
"include_images": include_images,
"include_answer": include_answer,
"chunks_per_source": chunks_per_source if search_depth == TavilySearchDepth.ADVANCED else None,
"include_domains": include_domains if include_domains else None,
"exclude_domains": exclude_domains if exclude_domains else None,
"include_raw_content": include_raw_content,
"days": days if topic == TavilySearchTopic.NEWS else None,
"time_range": time_range.value if time_range else None,
}
with httpx.Client(timeout=90.0) as client:
response = client.post(url, json=payload, headers=headers)
response.raise_for_status()
search_results = response.json()
data_results = [
Data(
data={
"title": result.get("title"),
"url": result.get("url"),
"content": result.get("content"),
"score": result.get("score"),
"raw_content": result.get("raw_content") if include_raw_content else None,
}
)
for result in search_results.get("results", [])
]
if include_answer and search_results.get("answer"):
data_results.insert(0, Data(data={"answer": search_results["answer"]}))
if include_images and search_results.get("images"):
data_results.append(Data(data={"images": search_results["images"]}))
self.status = data_results # type: ignore[assignment]
except httpx.TimeoutException as e:
error_message = "Request timed out (90s). Please try again or adjust parameters."
logger.error(f"Timeout error: {e}")
self.status = error_message
raise ToolException(error_message) from e
except httpx.HTTPStatusError as e:
error_message = f"HTTP error: {e.response.status_code} - {e.response.text}"
logger.debug(error_message)
self.status = error_message
raise ToolException(error_message) from e
except Exception as e:
error_message = f"Unexpected error: {e}"
logger.opt(exception=True).debug("Error running Tavily Search")
self.status = error_message
raise ToolException(error_message) from e
return data_results

View file

@ -410,7 +410,7 @@
"custom_fields": {
"template": []
},
"description": "",
"description": "Create a prompt template with dynamic variables.",
"display_name": "Prompt",
"documentation": "",
"edited": false,

File diff suppressed because one or more lines are too long

View file

@ -1484,9 +1484,7 @@
"display_name": "Composio API Key",
"dynamic": false,
"info": "Refer to https://docs.composio.dev/faq/api_key/api_key",
"input_types": [
"Message"
],
"input_types": [],
"load_from_db": true,
"name": "api_key",
"password": true,

File diff suppressed because one or more lines are too long

View file

@ -735,9 +735,7 @@
"display_name": "Needle API Key",
"dynamic": false,
"info": "Your Needle API key.",
"input_types": [
"Message"
],
"input_types": [],
"load_from_db": false,
"name": "needle_api_key",
"password": true,

File diff suppressed because one or more lines are too long

View file

@ -232,9 +232,7 @@
"display_name": "API Key",
"dynamic": false,
"info": "Your AgentQL API key from dev.agentql.com",
"input_types": [
"Message"
],
"input_types": [],
"load_from_db": true,
"name": "api_key",
"password": true,
@ -301,7 +299,7 @@
},
"is_stealth_mode_enabled": {
"_input_type": "BoolInput",
"advanced": false,
"advanced": true,
"display_name": "Enable Stealth Mode (Beta)",
"dynamic": false,
"info": "Enable experimental anti-bot evasion strategies. May not work for all websites at all times.",

View file

@ -938,7 +938,7 @@
"multiline": true,
"name": "curl",
"placeholder": "",
"real_time_refresh": false,
"real_time_refresh": true,
"required": false,
"show": true,
"title_case": false,

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -131,9 +131,7 @@
"display_name": "ScrapeGraph API Key",
"dynamic": false,
"info": "The API key to use ScrapeGraph API.",
"input_types": [
"Message"
],
"input_types": [],
"load_from_db": true,
"name": "api_key",
"password": true,

File diff suppressed because one or more lines are too long

View file

@ -205,9 +205,7 @@
"display_name": "Apify Token",
"dynamic": false,
"info": "The API token for the Apify account.",
"input_types": [
"Message"
],
"input_types": [],
"load_from_db": true,
"name": "apify_token",
"password": true,
@ -409,9 +407,7 @@
"display_name": "Apify Token",
"dynamic": false,
"info": "The API token for the Apify account.",
"input_types": [
"Message"
],
"input_types": [],
"load_from_db": true,
"name": "apify_token",
"password": true,

View file

@ -1435,9 +1435,7 @@
"display_name": "SearchAPI API Key",
"dynamic": false,
"info": "",
"input_types": [
"Message"
],
"input_types": [],
"load_from_db": false,
"name": "api_key",
"password": true,

View file

@ -323,7 +323,7 @@
"category": "helpers",
"conditional_paths": [],
"custom_fields": {},
"description": "Runs a language model over each row of a DataFrame's text column and returns a new DataFrame with two columns: 'text_input' (the original text) and 'model_response' containing the model's response.",
"description": "Runs an LLM over each row of a DataFrame's column. If no column is set, the entire row is passed.",
"display_name": "Batch Run",
"documentation": "",
"edited": false,