feat: Add Tavily search API parameters and Tavily Extract API component (#7644)
* Add Tavily search parameters and Extract API component * [autofix.ci] apply automated fixes * Fix style issues flagged by Ruff * fix Ruff Style Check issues * [autofix.ci] apply automated fixes * Fix linting issue reported by Ruff * fix inlcude and exclude domains * Fix linting issue reported by Ruff * fix: update descriptions and input types across multiple starter project JSON files - Changed "Tavily AI" to "Tavily Search" in descriptions for consistency. - Updated input types for several API keys to be empty arrays instead of containing "Message". - Added new fields such as "chunks_per_source", "days", "exclude_domains", "include_domains", and "include_raw_content" to enhance functionality in various projects. - Adjusted the "description" field in the "Youtube Analysis" project for clarity. --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Edwin Jose <edwin.jose@datastax.com> Co-authored-by: Gabriel Luiz Freitas Almeida <gabriel@langflow.org>
This commit is contained in:
parent
793e5784d5
commit
8cabedae55
20 changed files with 1255 additions and 357 deletions
|
|
@ -22,8 +22,9 @@ from .search_api import SearchAPIComponent
|
|||
from .searxng import SearXNGToolComponent
|
||||
from .serp import SerpComponent
|
||||
from .serp_api import SerpAPIComponent
|
||||
from .tavily import TavilySearchComponent
|
||||
from .tavily_search import TavilySearchToolComponent
|
||||
from .tavily_extract import TavilyExtractComponent
|
||||
from .tavily_search import TavilySearchComponent
|
||||
from .tavily_search_tool import TavilySearchToolComponent
|
||||
from .wikidata import WikidataComponent
|
||||
from .wikidata_api import WikidataAPIComponent
|
||||
from .wikipedia import WikipediaComponent
|
||||
|
|
@ -60,6 +61,7 @@ __all__ = [
|
|||
"SearchComponent",
|
||||
"SerpAPIComponent",
|
||||
"SerpComponent",
|
||||
"TavilyExtractComponent",
|
||||
"TavilySearchComponent",
|
||||
"TavilySearchToolComponent",
|
||||
"WikidataAPIComponent",
|
||||
|
|
|
|||
|
|
@ -1,148 +0,0 @@
|
|||
import httpx
|
||||
from loguru import logger
|
||||
|
||||
from langflow.custom import Component
|
||||
from langflow.helpers.data import data_to_text
|
||||
from langflow.io import BoolInput, DropdownInput, IntInput, MessageTextInput, Output, SecretStrInput
|
||||
from langflow.schema import Data
|
||||
from langflow.schema.message import Message
|
||||
|
||||
|
||||
class TavilySearchComponent(Component):
|
||||
display_name = "Tavily AI Search"
|
||||
description = """**Tavily AI** is a search engine optimized for LLMs and RAG, \
|
||||
aimed at efficient, quick, and persistent search results."""
|
||||
icon = "TavilyIcon"
|
||||
|
||||
inputs = [
|
||||
SecretStrInput(
|
||||
name="api_key",
|
||||
display_name="Tavily API Key",
|
||||
required=True,
|
||||
info="Your Tavily API Key.",
|
||||
),
|
||||
MessageTextInput(
|
||||
name="query",
|
||||
display_name="Search Query",
|
||||
info="The search query you want to execute with Tavily.",
|
||||
tool_mode=True,
|
||||
),
|
||||
DropdownInput(
|
||||
name="search_depth",
|
||||
display_name="Search Depth",
|
||||
info="The depth of the search.",
|
||||
options=["basic", "advanced"],
|
||||
value="advanced",
|
||||
advanced=True,
|
||||
),
|
||||
DropdownInput(
|
||||
name="topic",
|
||||
display_name="Search Topic",
|
||||
info="The category of the search.",
|
||||
options=["general", "news"],
|
||||
value="general",
|
||||
advanced=True,
|
||||
),
|
||||
DropdownInput(
|
||||
name="time_range",
|
||||
display_name="Time Range",
|
||||
info="The time range back from the current date to include in the search results.",
|
||||
options=["day", "week", "month", "year"],
|
||||
value=None,
|
||||
advanced=True,
|
||||
combobox=True,
|
||||
),
|
||||
IntInput(
|
||||
name="max_results",
|
||||
display_name="Max Results",
|
||||
info="The maximum number of search results to return.",
|
||||
value=5,
|
||||
advanced=True,
|
||||
),
|
||||
BoolInput(
|
||||
name="include_images",
|
||||
display_name="Include Images",
|
||||
info="Include a list of query-related images in the response.",
|
||||
value=True,
|
||||
advanced=True,
|
||||
),
|
||||
BoolInput(
|
||||
name="include_answer",
|
||||
display_name="Include Answer",
|
||||
info="Include a short answer to original query.",
|
||||
value=True,
|
||||
advanced=True,
|
||||
),
|
||||
]
|
||||
|
||||
outputs = [
|
||||
Output(display_name="Data", name="data", method="fetch_content"),
|
||||
Output(display_name="Text", name="text", method="fetch_content_text"),
|
||||
]
|
||||
|
||||
def fetch_content(self) -> list[Data]:
|
||||
try:
|
||||
url = "https://api.tavily.com/search"
|
||||
headers = {
|
||||
"content-type": "application/json",
|
||||
"accept": "application/json",
|
||||
}
|
||||
payload = {
|
||||
"api_key": self.api_key,
|
||||
"query": self.query,
|
||||
"search_depth": self.search_depth,
|
||||
"topic": self.topic,
|
||||
"max_results": self.max_results,
|
||||
"include_images": self.include_images,
|
||||
"include_answer": self.include_answer,
|
||||
"time_range": self.time_range,
|
||||
}
|
||||
|
||||
with httpx.Client() as client:
|
||||
response = client.post(url, json=payload, headers=headers)
|
||||
|
||||
response.raise_for_status()
|
||||
search_results = response.json()
|
||||
|
||||
data_results = []
|
||||
|
||||
if self.include_answer and search_results.get("answer"):
|
||||
data_results.append(Data(text=search_results["answer"]))
|
||||
|
||||
for result in search_results.get("results", []):
|
||||
content = result.get("content", "")
|
||||
data_results.append(
|
||||
Data(
|
||||
text=content,
|
||||
data={
|
||||
"title": result.get("title"),
|
||||
"url": result.get("url"),
|
||||
"content": content,
|
||||
"score": result.get("score"),
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
if self.include_images and search_results.get("images"):
|
||||
data_results.append(Data(text="Images found", data={"images": search_results["images"]}))
|
||||
except httpx.HTTPStatusError as exc:
|
||||
error_message = f"HTTP error occurred: {exc.response.status_code} - {exc.response.text}"
|
||||
logger.error(error_message)
|
||||
return [Data(text=error_message, data={"error": error_message})]
|
||||
except httpx.RequestError as exc:
|
||||
error_message = f"Request error occurred: {exc}"
|
||||
logger.error(error_message)
|
||||
return [Data(text=error_message, data={"error": error_message})]
|
||||
except ValueError as exc:
|
||||
error_message = f"Invalid response format: {exc}"
|
||||
logger.error(error_message)
|
||||
return [Data(text=error_message, data={"error": error_message})]
|
||||
else:
|
||||
self.status = data_results
|
||||
return data_results
|
||||
|
||||
def fetch_content_text(self) -> Message:
|
||||
data = self.fetch_content()
|
||||
result_string = data_to_text("{text}", data)
|
||||
self.status = result_string
|
||||
return Message(text=result_string)
|
||||
119
src/backend/base/langflow/components/tools/tavily_extract.py
Normal file
119
src/backend/base/langflow/components/tools/tavily_extract.py
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
import httpx
|
||||
from loguru import logger
|
||||
|
||||
from langflow.custom import Component
|
||||
from langflow.helpers.data import data_to_text
|
||||
from langflow.io import BoolInput, DropdownInput, MessageTextInput, Output, SecretStrInput
|
||||
from langflow.schema import Data
|
||||
from langflow.schema.message import Message
|
||||
|
||||
|
||||
class TavilyExtractComponent(Component):
|
||||
"""Separate component specifically for Tavily Extract functionality."""
|
||||
|
||||
display_name = "Tavily Extract API"
|
||||
description = """**Tavily Extract** extract raw content from URLs."""
|
||||
icon = "TavilyIcon"
|
||||
|
||||
inputs = [
|
||||
SecretStrInput(
|
||||
name="api_key",
|
||||
display_name="Tavily API Key",
|
||||
required=True,
|
||||
info="Your Tavily API Key.",
|
||||
),
|
||||
MessageTextInput(
|
||||
name="urls",
|
||||
display_name="URLs",
|
||||
info="Comma-separated list of URLs to extract content from.",
|
||||
required=True,
|
||||
),
|
||||
DropdownInput(
|
||||
name="extract_depth",
|
||||
display_name="Extract Depth",
|
||||
info="The depth of the extraction process.",
|
||||
options=["basic", "advanced"],
|
||||
value="basic",
|
||||
advanced=True,
|
||||
),
|
||||
BoolInput(
|
||||
name="include_images",
|
||||
display_name="Include Images",
|
||||
info="Include a list of images extracted from the URLs.",
|
||||
value=False,
|
||||
advanced=True,
|
||||
),
|
||||
]
|
||||
|
||||
outputs = [
|
||||
Output(display_name="Data", name="data", method="fetch_content"),
|
||||
Output(display_name="Text", name="text", method="fetch_content_text"),
|
||||
]
|
||||
|
||||
def fetch_content(self) -> list[Data]:
|
||||
"""Fetches and processes extracted content into a list of Data objects."""
|
||||
try:
|
||||
# Split URLs by comma and clean them
|
||||
urls = [url.strip() for url in (self.urls or "").split(",") if url.strip()]
|
||||
if not urls:
|
||||
error_message = "No valid URLs provided"
|
||||
logger.error(error_message)
|
||||
return [Data(text=error_message, data={"error": error_message})]
|
||||
|
||||
url = "https://api.tavily.com/extract"
|
||||
headers = {
|
||||
"content-type": "application/json",
|
||||
"accept": "application/json",
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
}
|
||||
payload = {
|
||||
"urls": urls,
|
||||
"extract_depth": self.extract_depth,
|
||||
"include_images": self.include_images,
|
||||
}
|
||||
|
||||
with httpx.Client(timeout=90.0) as client:
|
||||
response = client.post(url, json=payload, headers=headers)
|
||||
response.raise_for_status()
|
||||
|
||||
except httpx.TimeoutException as exc:
|
||||
error_message = f"Request timed out (90s): {exc}"
|
||||
logger.error(error_message)
|
||||
return [Data(text=error_message, data={"error": error_message})]
|
||||
except httpx.HTTPStatusError as exc:
|
||||
error_message = f"HTTP error occurred: {exc.response.status_code} - {exc.response.text}"
|
||||
logger.error(error_message)
|
||||
return [Data(text=error_message, data={"error": error_message})]
|
||||
except (ValueError, KeyError, AttributeError, httpx.RequestError) as exc:
|
||||
error_message = f"Data processing error: {exc}"
|
||||
logger.error(error_message)
|
||||
return [Data(text=error_message, data={"error": error_message})]
|
||||
else:
|
||||
extract_results = response.json()
|
||||
data_results = []
|
||||
|
||||
# Process successful extractions
|
||||
for result in extract_results.get("results", []):
|
||||
raw_content = result.get("raw_content", "")
|
||||
images = result.get("images", [])
|
||||
result_data = {"url": result.get("url"), "raw_content": raw_content, "images": images}
|
||||
data_results.append(Data(text=raw_content, data=result_data))
|
||||
|
||||
# Process failed extractions
|
||||
if extract_results.get("failed_results"):
|
||||
data_results.append(
|
||||
Data(
|
||||
text="Failed extractions",
|
||||
data={"failed_results": extract_results["failed_results"]},
|
||||
)
|
||||
)
|
||||
|
||||
self.status = data_results
|
||||
return data_results
|
||||
|
||||
def fetch_content_text(self) -> Message:
|
||||
# This method should still work as it expects a list from fetch_content
|
||||
data = self.fetch_content()
|
||||
result_string = data_to_text("{text}", data)
|
||||
self.status = result_string
|
||||
return Message(text=result_string)
|
||||
|
|
@ -1,47 +1,18 @@
|
|||
from enum import Enum
|
||||
|
||||
import httpx
|
||||
from langchain.tools import StructuredTool
|
||||
from langchain_core.tools import ToolException
|
||||
from loguru import logger
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from langflow.base.langchain_utilities.model import LCToolComponent
|
||||
from langflow.field_typing import Tool
|
||||
from langflow.inputs import BoolInput, DropdownInput, IntInput, MessageTextInput, SecretStrInput
|
||||
from langflow.custom import Component
|
||||
from langflow.helpers.data import data_to_text
|
||||
from langflow.io import BoolInput, DropdownInput, IntInput, MessageTextInput, Output, SecretStrInput
|
||||
from langflow.schema import Data
|
||||
from langflow.schema.message import Message
|
||||
|
||||
|
||||
class TavilySearchDepth(Enum):
|
||||
BASIC = "basic"
|
||||
ADVANCED = "advanced"
|
||||
|
||||
|
||||
class TavilySearchTopic(Enum):
|
||||
GENERAL = "general"
|
||||
NEWS = "news"
|
||||
|
||||
|
||||
class TavilySearchSchema(BaseModel):
|
||||
query: str = Field(..., description="The search query you want to execute with Tavily.")
|
||||
search_depth: TavilySearchDepth = Field(TavilySearchDepth.BASIC, description="The depth of the search.")
|
||||
topic: TavilySearchTopic = Field(TavilySearchTopic.GENERAL, description="The category of the search.")
|
||||
max_results: int = Field(5, description="The maximum number of search results to return.")
|
||||
include_images: bool = Field(default=False, description="Include a list of query-related images in the response.")
|
||||
include_answer: bool = Field(default=False, description="Include a short answer to original query.")
|
||||
|
||||
|
||||
class TavilySearchToolComponent(LCToolComponent):
|
||||
display_name = "Tavily AI Search [DEPRECATED]"
|
||||
description = """**Tavily AI** is a search engine optimized for LLMs and RAG, \
|
||||
aimed at efficient, quick, and persistent search results. It can be used independently or as an agent tool.
|
||||
|
||||
Note: Check 'Advanced' for all options.
|
||||
"""
|
||||
class TavilySearchComponent(Component):
|
||||
display_name = "Tavily Search API"
|
||||
description = """**Tavily Search** is a search engine optimized for LLMs and RAG, \
|
||||
aimed at efficient, quick, and persistent search results."""
|
||||
icon = "TavilyIcon"
|
||||
name = "TavilyAISearch"
|
||||
documentation = "https://docs.tavily.com/"
|
||||
legacy = True
|
||||
|
||||
inputs = [
|
||||
SecretStrInput(
|
||||
|
|
@ -54,21 +25,36 @@ Note: Check 'Advanced' for all options.
|
|||
name="query",
|
||||
display_name="Search Query",
|
||||
info="The search query you want to execute with Tavily.",
|
||||
tool_mode=True,
|
||||
),
|
||||
DropdownInput(
|
||||
name="search_depth",
|
||||
display_name="Search Depth",
|
||||
info="The depth of the search.",
|
||||
options=list(TavilySearchDepth),
|
||||
value=TavilySearchDepth.ADVANCED,
|
||||
options=["basic", "advanced"],
|
||||
value="advanced",
|
||||
advanced=True,
|
||||
),
|
||||
IntInput(
|
||||
name="chunks_per_source",
|
||||
display_name="Chunks Per Source",
|
||||
info=("The number of content chunks to retrieve from each source (1-3). Only works with advanced search."),
|
||||
value=3,
|
||||
advanced=True,
|
||||
),
|
||||
DropdownInput(
|
||||
name="topic",
|
||||
display_name="Search Topic",
|
||||
info="The category of the search.",
|
||||
options=list(TavilySearchTopic),
|
||||
value=TavilySearchTopic.GENERAL,
|
||||
options=["general", "news"],
|
||||
value="general",
|
||||
advanced=True,
|
||||
),
|
||||
IntInput(
|
||||
name="days",
|
||||
display_name="Days",
|
||||
info="Number of days back from current date to include. Only available with news topic.",
|
||||
value=7,
|
||||
advanced=True,
|
||||
),
|
||||
IntInput(
|
||||
|
|
@ -78,13 +64,6 @@ Note: Check 'Advanced' for all options.
|
|||
value=5,
|
||||
advanced=True,
|
||||
),
|
||||
BoolInput(
|
||||
name="include_images",
|
||||
display_name="Include Images",
|
||||
info="Include a list of query-related images in the response.",
|
||||
value=True,
|
||||
advanced=True,
|
||||
),
|
||||
BoolInput(
|
||||
name="include_answer",
|
||||
display_name="Include Answer",
|
||||
|
|
@ -92,115 +71,145 @@ Note: Check 'Advanced' for all options.
|
|||
value=True,
|
||||
advanced=True,
|
||||
),
|
||||
DropdownInput(
|
||||
name="time_range",
|
||||
display_name="Time Range",
|
||||
info="The time range back from the current date to filter results.",
|
||||
options=["day", "week", "month", "year"],
|
||||
value=None, # Default to None to make it optional
|
||||
advanced=True,
|
||||
),
|
||||
BoolInput(
|
||||
name="include_images",
|
||||
display_name="Include Images",
|
||||
info="Include a list of query-related images in the response.",
|
||||
value=True,
|
||||
advanced=True,
|
||||
),
|
||||
MessageTextInput(
|
||||
name="include_domains",
|
||||
display_name="Include Domains",
|
||||
info="Comma-separated list of domains to include in the search results.",
|
||||
advanced=True,
|
||||
),
|
||||
MessageTextInput(
|
||||
name="exclude_domains",
|
||||
display_name="Exclude Domains",
|
||||
info="Comma-separated list of domains to exclude from the search results.",
|
||||
advanced=True,
|
||||
),
|
||||
BoolInput(
|
||||
name="include_raw_content",
|
||||
display_name="Include Raw Content",
|
||||
info="Include the cleaned and parsed HTML content of each search result.",
|
||||
value=False,
|
||||
advanced=True,
|
||||
),
|
||||
]
|
||||
|
||||
def run_model(self) -> list[Data]:
|
||||
# Convert string values to enum instances with validation
|
||||
outputs = [
|
||||
Output(display_name="Data", name="data", method="fetch_content"),
|
||||
Output(display_name="Text", name="text", method="fetch_content_text"),
|
||||
]
|
||||
|
||||
def fetch_content(self) -> list[Data]:
|
||||
try:
|
||||
search_depth_enum = (
|
||||
self.search_depth
|
||||
if isinstance(self.search_depth, TavilySearchDepth)
|
||||
else TavilySearchDepth(str(self.search_depth).lower())
|
||||
)
|
||||
except ValueError as e:
|
||||
error_message = f"Invalid search depth value: {e!s}"
|
||||
self.status = error_message
|
||||
return [Data(data={"error": error_message})]
|
||||
# Only process domains if they're provided
|
||||
include_domains = None
|
||||
exclude_domains = None
|
||||
|
||||
try:
|
||||
topic_enum = (
|
||||
self.topic if isinstance(self.topic, TavilySearchTopic) else TavilySearchTopic(str(self.topic).lower())
|
||||
)
|
||||
except ValueError as e:
|
||||
error_message = f"Invalid topic value: {e!s}"
|
||||
self.status = error_message
|
||||
return [Data(data={"error": error_message})]
|
||||
if self.include_domains:
|
||||
include_domains = [domain.strip() for domain in self.include_domains.split(",") if domain.strip()]
|
||||
|
||||
return self._tavily_search(
|
||||
self.query,
|
||||
search_depth=search_depth_enum,
|
||||
topic=topic_enum,
|
||||
max_results=self.max_results,
|
||||
include_images=self.include_images,
|
||||
include_answer=self.include_answer,
|
||||
)
|
||||
if self.exclude_domains:
|
||||
exclude_domains = [domain.strip() for domain in self.exclude_domains.split(",") if domain.strip()]
|
||||
|
||||
def build_tool(self) -> Tool:
|
||||
return StructuredTool.from_function(
|
||||
name="tavily_search",
|
||||
description="Perform a web search using the Tavily API.",
|
||||
func=self._tavily_search,
|
||||
args_schema=TavilySearchSchema,
|
||||
)
|
||||
|
||||
def _tavily_search(
|
||||
self,
|
||||
query: str,
|
||||
*,
|
||||
search_depth: TavilySearchDepth = TavilySearchDepth.BASIC,
|
||||
topic: TavilySearchTopic = TavilySearchTopic.GENERAL,
|
||||
max_results: int = 5,
|
||||
include_images: bool = False,
|
||||
include_answer: bool = False,
|
||||
) -> list[Data]:
|
||||
# Validate enum values
|
||||
if not isinstance(search_depth, TavilySearchDepth):
|
||||
msg = f"Invalid search_depth value: {search_depth}"
|
||||
raise TypeError(msg)
|
||||
if not isinstance(topic, TavilySearchTopic):
|
||||
msg = f"Invalid topic value: {topic}"
|
||||
raise TypeError(msg)
|
||||
|
||||
try:
|
||||
url = "https://api.tavily.com/search"
|
||||
headers = {
|
||||
"content-type": "application/json",
|
||||
"accept": "application/json",
|
||||
}
|
||||
|
||||
payload = {
|
||||
"api_key": self.api_key,
|
||||
"query": query,
|
||||
"search_depth": search_depth.value,
|
||||
"topic": topic.value,
|
||||
"max_results": max_results,
|
||||
"include_images": include_images,
|
||||
"include_answer": include_answer,
|
||||
"query": self.query,
|
||||
"search_depth": self.search_depth,
|
||||
"topic": self.topic,
|
||||
"max_results": self.max_results,
|
||||
"include_images": self.include_images,
|
||||
"include_answer": self.include_answer,
|
||||
"include_raw_content": self.include_raw_content,
|
||||
"days": self.days,
|
||||
"time_range": self.time_range,
|
||||
}
|
||||
|
||||
with httpx.Client() as client:
|
||||
# Only add domains to payload if they exist and have values
|
||||
if include_domains:
|
||||
payload["include_domains"] = include_domains
|
||||
if exclude_domains:
|
||||
payload["exclude_domains"] = exclude_domains
|
||||
|
||||
# Add conditional parameters only if they should be included
|
||||
if self.search_depth == "advanced" and self.chunks_per_source:
|
||||
payload["chunks_per_source"] = self.chunks_per_source
|
||||
|
||||
if self.topic == "news" and self.days:
|
||||
payload["days"] = int(self.days) # Ensure days is an integer
|
||||
|
||||
# Add time_range if it's set
|
||||
if hasattr(self, "time_range") and self.time_range:
|
||||
payload["time_range"] = self.time_range
|
||||
|
||||
# Add timeout handling
|
||||
with httpx.Client(timeout=90.0) as client:
|
||||
response = client.post(url, json=payload, headers=headers)
|
||||
|
||||
response.raise_for_status()
|
||||
search_results = response.json()
|
||||
|
||||
data_results = [
|
||||
Data(
|
||||
data={
|
||||
"title": result.get("title"),
|
||||
"url": result.get("url"),
|
||||
"content": result.get("content"),
|
||||
"score": result.get("score"),
|
||||
}
|
||||
)
|
||||
for result in search_results.get("results", [])
|
||||
]
|
||||
data_results = []
|
||||
|
||||
if include_answer and search_results.get("answer"):
|
||||
data_results.insert(0, Data(data={"answer": search_results["answer"]}))
|
||||
if self.include_answer and search_results.get("answer"):
|
||||
data_results.append(Data(text=search_results["answer"]))
|
||||
|
||||
if include_images and search_results.get("images"):
|
||||
data_results.append(Data(data={"images": search_results["images"]}))
|
||||
for result in search_results.get("results", []):
|
||||
content = result.get("content", "")
|
||||
result_data = {
|
||||
"title": result.get("title"),
|
||||
"url": result.get("url"),
|
||||
"content": content,
|
||||
"score": result.get("score"),
|
||||
}
|
||||
if self.include_raw_content:
|
||||
result_data["raw_content"] = result.get("raw_content")
|
||||
|
||||
self.status = data_results # type: ignore[assignment]
|
||||
data_results.append(Data(text=content, data=result_data))
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
error_message = f"HTTP error: {e.response.status_code} - {e.response.text}"
|
||||
logger.debug(error_message)
|
||||
self.status = error_message
|
||||
raise ToolException(error_message) from e
|
||||
except Exception as e:
|
||||
error_message = f"Unexpected error: {e}"
|
||||
logger.opt(exception=True).debug("Error running Tavily Search")
|
||||
self.status = error_message
|
||||
raise ToolException(error_message) from e
|
||||
return data_results
|
||||
if self.include_images and search_results.get("images"):
|
||||
data_results.append(Data(text="Images found", data={"images": search_results["images"]}))
|
||||
|
||||
except httpx.TimeoutException:
|
||||
error_message = "Request timed out (90s). Please try again or adjust parameters."
|
||||
logger.error(error_message)
|
||||
return [Data(text=error_message, data={"error": error_message})]
|
||||
except httpx.HTTPStatusError as exc:
|
||||
error_message = f"HTTP error occurred: {exc.response.status_code} - {exc.response.text}"
|
||||
logger.error(error_message)
|
||||
return [Data(text=error_message, data={"error": error_message})]
|
||||
except httpx.RequestError as exc:
|
||||
error_message = f"Request error occurred: {exc}"
|
||||
logger.error(error_message)
|
||||
return [Data(text=error_message, data={"error": error_message})]
|
||||
except ValueError as exc:
|
||||
error_message = f"Invalid response format: {exc}"
|
||||
logger.error(error_message)
|
||||
return [Data(text=error_message, data={"error": error_message})]
|
||||
else:
|
||||
self.status = data_results
|
||||
return data_results
|
||||
|
||||
def fetch_content_text(self) -> Message:
|
||||
data = self.fetch_content()
|
||||
result_string = data_to_text("{text}", data)
|
||||
self.status = result_string
|
||||
return Message(text=result_string)
|
||||
|
|
|
|||
344
src/backend/base/langflow/components/tools/tavily_search_tool.py
Normal file
344
src/backend/base/langflow/components/tools/tavily_search_tool.py
Normal file
|
|
@ -0,0 +1,344 @@
|
|||
from enum import Enum
|
||||
|
||||
import httpx
|
||||
from langchain.tools import StructuredTool
|
||||
from langchain_core.tools import ToolException
|
||||
from loguru import logger
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from langflow.base.langchain_utilities.model import LCToolComponent
|
||||
from langflow.field_typing import Tool
|
||||
from langflow.inputs import BoolInput, DropdownInput, IntInput, MessageTextInput, SecretStrInput
|
||||
from langflow.schema import Data
|
||||
|
||||
# Add at the top with other constants
|
||||
MAX_CHUNKS_PER_SOURCE = 3
|
||||
|
||||
|
||||
class TavilySearchDepth(Enum):
|
||||
BASIC = "basic"
|
||||
ADVANCED = "advanced"
|
||||
|
||||
|
||||
class TavilySearchTopic(Enum):
|
||||
GENERAL = "general"
|
||||
NEWS = "news"
|
||||
|
||||
|
||||
class TavilySearchTimeRange(Enum):
|
||||
DAY = "day"
|
||||
WEEK = "week"
|
||||
MONTH = "month"
|
||||
YEAR = "year"
|
||||
|
||||
|
||||
class TavilySearchSchema(BaseModel):
|
||||
query: str = Field(..., description="The search query you want to execute with Tavily.")
|
||||
search_depth: TavilySearchDepth = Field(TavilySearchDepth.BASIC, description="The depth of the search.")
|
||||
topic: TavilySearchTopic = Field(TavilySearchTopic.GENERAL, description="The category of the search.")
|
||||
max_results: int = Field(5, description="The maximum number of search results to return.")
|
||||
include_images: bool = Field(default=False, description="Include a list of query-related images in the response.")
|
||||
include_answer: bool = Field(default=False, description="Include a short answer to original query.")
|
||||
chunks_per_source: int = Field(
|
||||
default=MAX_CHUNKS_PER_SOURCE,
|
||||
description=(
|
||||
"The number of content chunks to retrieve from each source (max 500 chars each). Only for advanced search."
|
||||
),
|
||||
ge=1,
|
||||
le=MAX_CHUNKS_PER_SOURCE,
|
||||
)
|
||||
include_domains: list[str] = Field(
|
||||
default=[],
|
||||
description="A list of domains to specifically include in the search results.",
|
||||
)
|
||||
exclude_domains: list[str] = Field(
|
||||
default=[],
|
||||
description="A list of domains to specifically exclude from the search results.",
|
||||
)
|
||||
include_raw_content: bool = Field(
|
||||
default=False,
|
||||
description="Include the cleaned and parsed HTML content of each search result.",
|
||||
)
|
||||
days: int = Field(
|
||||
default=7,
|
||||
description="Number of days back from the current date to include. Only available if topic is news.",
|
||||
ge=1,
|
||||
)
|
||||
time_range: TavilySearchTimeRange | None = Field(
|
||||
default=None,
|
||||
description="The time range back from the current date to filter results.",
|
||||
)
|
||||
|
||||
|
||||
class TavilySearchToolComponent(LCToolComponent):
|
||||
display_name = "Tavily Search API"
|
||||
description = """**Tavily Search API** is a search engine optimized for LLMs and RAG, \
|
||||
aimed at efficient, quick, and persistent search results. It can be used independently or as an agent tool.
|
||||
|
||||
Note: Check 'Advanced' for all options.
|
||||
"""
|
||||
icon = "TavilyIcon"
|
||||
name = "TavilyAISearch"
|
||||
documentation = "https://docs.tavily.com/"
|
||||
legacy = True
|
||||
|
||||
inputs = [
|
||||
SecretStrInput(
|
||||
name="api_key",
|
||||
display_name="Tavily API Key",
|
||||
required=True,
|
||||
info="Your Tavily API Key.",
|
||||
),
|
||||
MessageTextInput(
|
||||
name="query",
|
||||
display_name="Search Query",
|
||||
info="The search query you want to execute with Tavily.",
|
||||
),
|
||||
DropdownInput(
|
||||
name="search_depth",
|
||||
display_name="Search Depth",
|
||||
info="The depth of the search.",
|
||||
options=list(TavilySearchDepth),
|
||||
value=TavilySearchDepth.ADVANCED,
|
||||
advanced=True,
|
||||
),
|
||||
IntInput(
|
||||
name="chunks_per_source",
|
||||
display_name="Chunks Per Source",
|
||||
info=("The number of content chunks to retrieve from each source (1-3). Only works with advanced search."),
|
||||
value=MAX_CHUNKS_PER_SOURCE,
|
||||
advanced=True,
|
||||
),
|
||||
DropdownInput(
|
||||
name="topic",
|
||||
display_name="Search Topic",
|
||||
info="The category of the search.",
|
||||
options=list(TavilySearchTopic),
|
||||
value=TavilySearchTopic.GENERAL,
|
||||
advanced=True,
|
||||
),
|
||||
IntInput(
|
||||
name="days",
|
||||
display_name="Days",
|
||||
info="Number of days back from current date to include. Only available with news topic.",
|
||||
value=7,
|
||||
advanced=True,
|
||||
),
|
||||
IntInput(
|
||||
name="max_results",
|
||||
display_name="Max Results",
|
||||
info="The maximum number of search results to return.",
|
||||
value=5,
|
||||
advanced=True,
|
||||
),
|
||||
BoolInput(
|
||||
name="include_answer",
|
||||
display_name="Include Answer",
|
||||
info="Include a short answer to original query.",
|
||||
value=True,
|
||||
advanced=True,
|
||||
),
|
||||
DropdownInput(
|
||||
name="time_range",
|
||||
display_name="Time Range",
|
||||
info="The time range back from the current date to filter results.",
|
||||
options=list(TavilySearchTimeRange),
|
||||
value=None,
|
||||
advanced=True,
|
||||
),
|
||||
BoolInput(
|
||||
name="include_images",
|
||||
display_name="Include Images",
|
||||
info="Include a list of query-related images in the response.",
|
||||
value=True,
|
||||
advanced=True,
|
||||
),
|
||||
MessageTextInput(
|
||||
name="include_domains",
|
||||
display_name="Include Domains",
|
||||
info="Comma-separated list of domains to include in the search results.",
|
||||
advanced=True,
|
||||
),
|
||||
MessageTextInput(
|
||||
name="exclude_domains",
|
||||
display_name="Exclude Domains",
|
||||
info="Comma-separated list of domains to exclude from the search results.",
|
||||
advanced=True,
|
||||
),
|
||||
BoolInput(
|
||||
name="include_raw_content",
|
||||
display_name="Include Raw Content",
|
||||
info="Include the cleaned and parsed HTML content of each search result.",
|
||||
value=False,
|
||||
advanced=True,
|
||||
),
|
||||
]
|
||||
|
||||
def run_model(self) -> list[Data]:
|
||||
# Convert string values to enum instances with validation
|
||||
try:
|
||||
search_depth_enum = (
|
||||
self.search_depth
|
||||
if isinstance(self.search_depth, TavilySearchDepth)
|
||||
else TavilySearchDepth(str(self.search_depth).lower())
|
||||
)
|
||||
except ValueError as e:
|
||||
error_message = f"Invalid search depth value: {e!s}"
|
||||
self.status = error_message
|
||||
return [Data(data={"error": error_message})]
|
||||
|
||||
try:
|
||||
topic_enum = (
|
||||
self.topic if isinstance(self.topic, TavilySearchTopic) else TavilySearchTopic(str(self.topic).lower())
|
||||
)
|
||||
except ValueError as e:
|
||||
error_message = f"Invalid topic value: {e!s}"
|
||||
self.status = error_message
|
||||
return [Data(data={"error": error_message})]
|
||||
|
||||
try:
|
||||
time_range_enum = (
|
||||
self.time_range
|
||||
if isinstance(self.time_range, TavilySearchTimeRange)
|
||||
else TavilySearchTimeRange(str(self.time_range).lower())
|
||||
if self.time_range
|
||||
else None
|
||||
)
|
||||
except ValueError as e:
|
||||
error_message = f"Invalid time range value: {e!s}"
|
||||
self.status = error_message
|
||||
return [Data(data={"error": error_message})]
|
||||
|
||||
# Initialize domain variables as None
|
||||
include_domains = None
|
||||
exclude_domains = None
|
||||
|
||||
# Only process domains if they're provided
|
||||
if self.include_domains:
|
||||
include_domains = [domain.strip() for domain in self.include_domains.split(",") if domain.strip()]
|
||||
|
||||
if self.exclude_domains:
|
||||
exclude_domains = [domain.strip() for domain in self.exclude_domains.split(",") if domain.strip()]
|
||||
|
||||
return self._tavily_search(
|
||||
self.query,
|
||||
search_depth=search_depth_enum,
|
||||
topic=topic_enum,
|
||||
max_results=self.max_results,
|
||||
include_images=self.include_images,
|
||||
include_answer=self.include_answer,
|
||||
chunks_per_source=self.chunks_per_source,
|
||||
include_domains=include_domains,
|
||||
exclude_domains=exclude_domains,
|
||||
include_raw_content=self.include_raw_content,
|
||||
days=self.days,
|
||||
time_range=time_range_enum,
|
||||
)
|
||||
|
||||
def build_tool(self) -> Tool:
|
||||
return StructuredTool.from_function(
|
||||
name="tavily_search",
|
||||
description="Perform a web search using the Tavily API.",
|
||||
func=self._tavily_search,
|
||||
args_schema=TavilySearchSchema,
|
||||
)
|
||||
|
||||
def _tavily_search(
|
||||
self,
|
||||
query: str,
|
||||
*,
|
||||
search_depth: TavilySearchDepth = TavilySearchDepth.BASIC,
|
||||
topic: TavilySearchTopic = TavilySearchTopic.GENERAL,
|
||||
max_results: int = 5,
|
||||
include_images: bool = False,
|
||||
include_answer: bool = False,
|
||||
chunks_per_source: int = MAX_CHUNKS_PER_SOURCE,
|
||||
include_domains: list[str] | None = None,
|
||||
exclude_domains: list[str] | None = None,
|
||||
include_raw_content: bool = False,
|
||||
days: int = 7,
|
||||
time_range: TavilySearchTimeRange | None = None,
|
||||
) -> list[Data]:
|
||||
# Validate enum values
|
||||
if not isinstance(search_depth, TavilySearchDepth):
|
||||
msg = f"Invalid search_depth value: {search_depth}"
|
||||
raise TypeError(msg)
|
||||
if not isinstance(topic, TavilySearchTopic):
|
||||
msg = f"Invalid topic value: {topic}"
|
||||
raise TypeError(msg)
|
||||
|
||||
# Validate chunks_per_source range
|
||||
if not 1 <= chunks_per_source <= MAX_CHUNKS_PER_SOURCE:
|
||||
msg = f"chunks_per_source must be between 1 and {MAX_CHUNKS_PER_SOURCE}, got {chunks_per_source}"
|
||||
raise ValueError(msg)
|
||||
|
||||
# Validate days is positive
|
||||
if days < 1:
|
||||
msg = f"days must be greater than or equal to 1, got {days}"
|
||||
raise ValueError(msg)
|
||||
|
||||
try:
|
||||
url = "https://api.tavily.com/search"
|
||||
headers = {
|
||||
"content-type": "application/json",
|
||||
"accept": "application/json",
|
||||
}
|
||||
payload = {
|
||||
"api_key": self.api_key,
|
||||
"query": query,
|
||||
"search_depth": search_depth.value,
|
||||
"topic": topic.value,
|
||||
"max_results": max_results,
|
||||
"include_images": include_images,
|
||||
"include_answer": include_answer,
|
||||
"chunks_per_source": chunks_per_source if search_depth == TavilySearchDepth.ADVANCED else None,
|
||||
"include_domains": include_domains if include_domains else None,
|
||||
"exclude_domains": exclude_domains if exclude_domains else None,
|
||||
"include_raw_content": include_raw_content,
|
||||
"days": days if topic == TavilySearchTopic.NEWS else None,
|
||||
"time_range": time_range.value if time_range else None,
|
||||
}
|
||||
|
||||
with httpx.Client(timeout=90.0) as client:
|
||||
response = client.post(url, json=payload, headers=headers)
|
||||
|
||||
response.raise_for_status()
|
||||
search_results = response.json()
|
||||
|
||||
data_results = [
|
||||
Data(
|
||||
data={
|
||||
"title": result.get("title"),
|
||||
"url": result.get("url"),
|
||||
"content": result.get("content"),
|
||||
"score": result.get("score"),
|
||||
"raw_content": result.get("raw_content") if include_raw_content else None,
|
||||
}
|
||||
)
|
||||
for result in search_results.get("results", [])
|
||||
]
|
||||
|
||||
if include_answer and search_results.get("answer"):
|
||||
data_results.insert(0, Data(data={"answer": search_results["answer"]}))
|
||||
|
||||
if include_images and search_results.get("images"):
|
||||
data_results.append(Data(data={"images": search_results["images"]}))
|
||||
|
||||
self.status = data_results # type: ignore[assignment]
|
||||
|
||||
except httpx.TimeoutException as e:
|
||||
error_message = "Request timed out (90s). Please try again or adjust parameters."
|
||||
logger.error(f"Timeout error: {e}")
|
||||
self.status = error_message
|
||||
raise ToolException(error_message) from e
|
||||
except httpx.HTTPStatusError as e:
|
||||
error_message = f"HTTP error: {e.response.status_code} - {e.response.text}"
|
||||
logger.debug(error_message)
|
||||
self.status = error_message
|
||||
raise ToolException(error_message) from e
|
||||
except Exception as e:
|
||||
error_message = f"Unexpected error: {e}"
|
||||
logger.opt(exception=True).debug("Error running Tavily Search")
|
||||
self.status = error_message
|
||||
raise ToolException(error_message) from e
|
||||
return data_results
|
||||
|
|
@ -410,7 +410,7 @@
|
|||
"custom_fields": {
|
||||
"template": []
|
||||
},
|
||||
"description": "",
|
||||
"description": "Create a prompt template with dynamic variables.",
|
||||
"display_name": "Prompt",
|
||||
"documentation": "",
|
||||
"edited": false,
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
|
|
@ -1484,9 +1484,7 @@
|
|||
"display_name": "Composio API Key",
|
||||
"dynamic": false,
|
||||
"info": "Refer to https://docs.composio.dev/faq/api_key/api_key",
|
||||
"input_types": [
|
||||
"Message"
|
||||
],
|
||||
"input_types": [],
|
||||
"load_from_db": true,
|
||||
"name": "api_key",
|
||||
"password": true,
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
|
|
@ -735,9 +735,7 @@
|
|||
"display_name": "Needle API Key",
|
||||
"dynamic": false,
|
||||
"info": "Your Needle API key.",
|
||||
"input_types": [
|
||||
"Message"
|
||||
],
|
||||
"input_types": [],
|
||||
"load_from_db": false,
|
||||
"name": "needle_api_key",
|
||||
"password": true,
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
|
|
@ -232,9 +232,7 @@
|
|||
"display_name": "API Key",
|
||||
"dynamic": false,
|
||||
"info": "Your AgentQL API key from dev.agentql.com",
|
||||
"input_types": [
|
||||
"Message"
|
||||
],
|
||||
"input_types": [],
|
||||
"load_from_db": true,
|
||||
"name": "api_key",
|
||||
"password": true,
|
||||
|
|
@ -301,7 +299,7 @@
|
|||
},
|
||||
"is_stealth_mode_enabled": {
|
||||
"_input_type": "BoolInput",
|
||||
"advanced": false,
|
||||
"advanced": true,
|
||||
"display_name": "Enable Stealth Mode (Beta)",
|
||||
"dynamic": false,
|
||||
"info": "Enable experimental anti-bot evasion strategies. May not work for all websites at all times.",
|
||||
|
|
|
|||
|
|
@ -938,7 +938,7 @@
|
|||
"multiline": true,
|
||||
"name": "curl",
|
||||
"placeholder": "",
|
||||
"real_time_refresh": false,
|
||||
"real_time_refresh": true,
|
||||
"required": false,
|
||||
"show": true,
|
||||
"title_case": false,
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
|
@ -131,9 +131,7 @@
|
|||
"display_name": "ScrapeGraph API Key",
|
||||
"dynamic": false,
|
||||
"info": "The API key to use ScrapeGraph API.",
|
||||
"input_types": [
|
||||
"Message"
|
||||
],
|
||||
"input_types": [],
|
||||
"load_from_db": true,
|
||||
"name": "api_key",
|
||||
"password": true,
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
|
|
@ -205,9 +205,7 @@
|
|||
"display_name": "Apify Token",
|
||||
"dynamic": false,
|
||||
"info": "The API token for the Apify account.",
|
||||
"input_types": [
|
||||
"Message"
|
||||
],
|
||||
"input_types": [],
|
||||
"load_from_db": true,
|
||||
"name": "apify_token",
|
||||
"password": true,
|
||||
|
|
@ -409,9 +407,7 @@
|
|||
"display_name": "Apify Token",
|
||||
"dynamic": false,
|
||||
"info": "The API token for the Apify account.",
|
||||
"input_types": [
|
||||
"Message"
|
||||
],
|
||||
"input_types": [],
|
||||
"load_from_db": true,
|
||||
"name": "apify_token",
|
||||
"password": true,
|
||||
|
|
|
|||
|
|
@ -1435,9 +1435,7 @@
|
|||
"display_name": "SearchAPI API Key",
|
||||
"dynamic": false,
|
||||
"info": "",
|
||||
"input_types": [
|
||||
"Message"
|
||||
],
|
||||
"input_types": [],
|
||||
"load_from_db": false,
|
||||
"name": "api_key",
|
||||
"password": true,
|
||||
|
|
|
|||
|
|
@ -323,7 +323,7 @@
|
|||
"category": "helpers",
|
||||
"conditional_paths": [],
|
||||
"custom_fields": {},
|
||||
"description": "Runs a language model over each row of a DataFrame's text column and returns a new DataFrame with two columns: 'text_input' (the original text) and 'model_response' containing the model's response.",
|
||||
"description": "Runs an LLM over each row of a DataFrame's column. If no column is set, the entire row is passed.",
|
||||
"display_name": "Batch Run",
|
||||
"documentation": "",
|
||||
"edited": false,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue