feat: enhance URL component with recursive crawling (#7006)

* feat: Enhance URL component with recursive crawling and advanced options.

- Implement advanced URL loading with configurable depth, domain prevention, and async options.

* chore: move component to a new file and restore the old one

* test: add test to new component

* [autofix.ci] apply automated fixes

* feat: enhance URLComponent for recursive URL loading

- Updated URLComponent to support recursive loading and parsing of child links from a root URL.
- Introduced new inputs: max_depth for controlling link traversal depth and prevent_outside to restrict crawling to the same domain.
- Replaced AsyncHtmlLoader with RecursiveUrlLoader for improved document loading.
- Enhanced error handling and logging for better debugging.
- Updated output methods to return structured data and text content more effectively.

* chore: delete component

* chore: update component tests

*  (freeze.spec.ts): refactor click actions to improve readability and maintainability
♻️ (loop-component.spec.ts): refactor click actions to improve readability and maintainability
🔧 (chatInputOutputUser-shard-1.spec.ts): add click action to fit view element for better user experience

* 🐛 (url.py): fix issue where data was being returned before error handling, causing potential errors to be missed
🐛 (freeze.spec.ts): fix incorrect test assertions for text comparison, ensuring correct comparison logic is applied

* [autofix.ci] apply automated fixes

* fix: ruff error

*  (stop-building.spec.ts): refactor test to use click method instead of hover and mouse events for better readability and reliability
 (loop-component.spec.ts): update test to use consistent naming convention for handle-urlcomponent-shownode-data-right
 (chatInputOutputUser-shard-1.spec.ts): update test to use consistent naming convention for handle-urlcomponent-shownode-message-right and handle-urlcomponent-shownode-dataframe-right

*  (chatInputOutputUser-shard-1.spec.ts): add additional wait time before running and verifying data output to ensure proper loading and display of data

* fix: playwrite test

* fix: ruff error

*  (auto-login-off.spec.ts): add delay before continuing test to ensure proper execution
 (freeze-path.spec.ts): increase timeout for certain actions to prevent test failures due to timing issues
 (freeze-path.spec.ts): add delay before continuing test to ensure proper execution
 (dropdownComponent.spec.ts): refactor test to use a more reliable assertion for dropdown value length

*  (freeze.spec.ts): add additional wait time before clicking on a button to improve test stability
 (freeze.spec.ts): increase timeout for waiting for "built successfully" text to appear to improve test reliability
 (globalVariables.spec.ts): add initial setup function to skip certain steps and improve test efficiency

---------

Co-authored-by: italojohnny <italojohnnydosanjos@gmail.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Edwin Jose <edwin.jose@datastax.com>
Co-authored-by: cristhianzl <cristhian.lousa@gmail.com>
This commit is contained in:
Rodrigo Nader 2025-03-21 19:39:03 -03:00 committed by GitHub
commit 35d62a033f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 307 additions and 334 deletions

View file

@ -1,175 +1,156 @@
import asyncio
import json
import logging
import re
import aiohttp
from langchain_community.document_loaders import AsyncHtmlLoader, WebBaseLoader
from bs4 import BeautifulSoup
from langchain_community.document_loaders import RecursiveUrlLoader
from langflow.custom import Component
from langflow.io import BoolInput, DropdownInput, MessageTextInput, Output, StrInput
from langflow.custom.custom_component.component import Component
from langflow.helpers.data import data_to_text
from langflow.io import BoolInput, DropdownInput, IntInput, MessageTextInput, Output
from langflow.schema import Data
from langflow.schema.dataframe import DataFrame
from langflow.schema.message import Message
logger = logging.getLogger(__name__)
class URLComponent(Component):
"""A component that loads and parses child links from a root URL recursively."""
display_name = "URL"
description = (
"Load and retrieve data from specified URLs. Supports output in plain text, raw HTML, "
"or JSON, with options for cleaning and separating multiple outputs."
)
description = "Load and parse child links from a root URL recursively"
icon = "layout-template"
name = "URL"
name = "URLComponent"
inputs = [
MessageTextInput(
name="urls",
display_name="URLs",
info="Enter one or more URLs to crawl recursively, by clicking the '+' button.",
is_list=True,
tool_mode=True,
placeholder="Enter a URL...",
list_add_label="Add URL",
),
IntInput(
name="max_depth",
display_name="Max Depth",
info=(
"Controls how many 'clicks' away from the initial page the crawler will go:\n"
"- depth 1: only the initial page\n"
"- depth 2: initial page + all pages linked directly from it\n"
"- depth 3: initial page + direct links + links found on those direct link pages\n"
"Note: This is about link traversal, not URL path depth."
),
value=1,
required=False,
),
BoolInput(
name="prevent_outside",
display_name="Prevent Outside",
info=(
"If enabled, only crawls URLs within the same domain as the root URL. "
"This helps prevent the crawler from going to external websites."
),
value=True,
required=False,
advanced=True,
),
BoolInput(
name="use_async",
display_name="Use Async",
info=(
"If enabled, uses asynchronous loading which can be significantly faster "
"but might use more system resources."
),
value=True,
required=False,
advanced=True,
),
DropdownInput(
name="format",
display_name="Output Format",
info=(
"Output Format. Use 'Text' to extract text from the HTML, 'Raw HTML' for the raw HTML "
"content, or 'JSON' to extract JSON from the HTML."
),
options=["Text", "Raw HTML", "JSON"],
info="Output Format. Use 'Text' to extract the text from the HTML or 'HTML' for the raw HTML content.",
options=["Text", "HTML"],
value="Text",
real_time_refresh=True,
),
StrInput(
name="separator",
display_name="Separator",
value="\n\n",
show=True,
info=(
"Specify the separator to use between multiple outputs. Default for Text is '\\n\\n'. "
"Default for Raw HTML is '\\n<!-- Separator -->\\n'."
),
),
BoolInput(
name="clean_extra_whitespace",
display_name="Clean Extra Whitespace",
value=True,
show=True,
info="Whether to clean excessive blank lines in the text output. Only applies to 'Text' format.",
advanced=True,
),
]
outputs = [
Output(display_name="Data", name="data", method="fetch_content"),
Output(display_name="Text", name="text", method="fetch_content_text"),
Output(display_name="Message", name="text", method="fetch_content_text"),
Output(display_name="DataFrame", name="dataframe", method="as_dataframe"),
]
async def validate_json_content(self, url: str) -> bool:
"""Validates if the URL content is actually JSON."""
try:
async with aiohttp.ClientSession() as session, session.get(url) as response:
http_ok = 200
if response.status != http_ok:
return False
content = await response.text()
try:
json.loads(content)
except json.JSONDecodeError:
return False
else:
return True
except (aiohttp.ClientError, asyncio.TimeoutError):
# Log specific error for debugging if needed
return False
def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:
"""Dynamically update fields based on selected format."""
if field_name == "format":
is_text_mode = field_value == "Text"
is_json_mode = field_value == "JSON"
build_config["separator"]["value"] = "\n\n" if is_text_mode else "\n<!-- Separator -->\n"
build_config["clean_extra_whitespace"]["show"] = is_text_mode
build_config["separator"]["show"] = not is_json_mode
return build_config
def ensure_url(self, string: str) -> str:
"""Ensures the given string is a valid URL."""
if not string.startswith(("http://", "https://")):
string = "http://" + string
def validate_url(self, string: str) -> bool:
"""Validates if the given string matches URL pattern."""
url_regex = re.compile(
r"^(https?:\/\/)?"
r"(www\.)?"
r"([a-zA-Z0-9.-]+)"
r"(\.[a-zA-Z]{2,})?"
r"(:\d+)?"
r"(\/[^\s]*)?$",
r"^(https?:\/\/)?" r"(www\.)?" r"([a-zA-Z0-9.-]+)" r"(\.[a-zA-Z]{2,})?" r"(:\d+)?" r"(\/[^\s]*)?$",
re.IGNORECASE,
)
return bool(url_regex.match(string))
error_msg = "Invalid URL - " + string
if not url_regex.match(string):
def ensure_url(self, url: str) -> str:
"""Ensures the given string is a valid URL."""
if not url.startswith(("http://", "https://")):
url = "http://" + url
if not self.validate_url(url):
error_msg = "Invalid URL - " + url
raise ValueError(error_msg)
return string
return url
def fetch_content(self) -> list[Data]:
"""Fetch content based on selected format."""
urls = list({self.ensure_url(url.strip()) for url in self.urls if url.strip()})
"""Load documents from the URLs."""
all_docs = []
data = []
try:
urls = list({self.ensure_url(url.strip()) for url in self.urls if url.strip()})
no_urls_msg = "No valid URLs provided."
if not urls:
raise ValueError(no_urls_msg)
no_urls_msg = "No valid URLs provided."
if not urls:
raise ValueError(no_urls_msg)
# If JSON format is selected, validate JSON content first
if self.format == "JSON":
for url in urls:
is_json = asyncio.run(self.validate_json_content(url))
if not is_json:
error_msg = "Invalid JSON content from URL - " + url
raise ValueError(error_msg)
for processed_url in urls:
msg = f"Loading documents from {processed_url}"
logger.info(msg)
if self.format == "Raw HTML":
loader = AsyncHtmlLoader(web_path=urls, encoding="utf-8")
else:
loader = WebBaseLoader(web_paths=urls, encoding="utf-8")
extractor = (lambda x: x) if self.format == "HTML" else (lambda x: BeautifulSoup(x, "lxml").get_text())
loader = RecursiveUrlLoader(
url=processed_url,
max_depth=self.max_depth,
prevent_outside=self.prevent_outside,
use_async=self.use_async,
extractor=extractor,
)
docs = loader.load()
docs = loader.load()
msg = f"Found {len(docs)} documents from {processed_url}"
logger.info(msg)
all_docs.extend(docs)
if self.format == "JSON":
data = []
for doc in docs:
try:
json_content = json.loads(doc.page_content)
data_dict = {"text": json.dumps(json_content, indent=2), **json_content, **doc.metadata}
data.append(Data(**data_dict))
except json.JSONDecodeError as err:
source = doc.metadata.get("source", "unknown URL")
error_msg = "Invalid JSON content from " + source
raise ValueError(error_msg) from err
return data
data = [Data(text=doc.page_content, **doc.metadata) for doc in all_docs]
self.status = data
return [Data(text=doc.page_content, **doc.metadata) for doc in docs]
except Exception as e:
msg = f"Error loading documents: {e!s}"
logger.exception(msg)
raise ValueError(msg) from e
self.status = data
return data
def fetch_content_text(self) -> Message:
"""Fetch content and return as formatted text."""
"""Load documents and return their text content."""
data = self.fetch_content()
if self.format == "JSON":
text_list = [item.text for item in data]
result = "\n".join(text_list)
else:
text_list = [item.text for item in data]
if self.format == "Text" and self.clean_extra_whitespace:
text_list = [re.sub(r"\n{3,}", "\n\n", text) for text in text_list]
result = self.separator.join(text_list)
self.status = result
return Message(text=result)
result_string = data_to_text("{text}", data)
self.status = result_string
return Message(text=result_string)
def as_dataframe(self) -> DataFrame:
"""Return fetched content as a DataFrame."""
return DataFrame(self.fetch_content())
"""Convert the documents to a DataFrame."""
data_frame = DataFrame(self.fetch_content())
self.status = data_frame
return data_frame

View file

@ -21,6 +21,9 @@ class TestURLComponent(ComponentTestBaseWithoutClient):
return {
"urls": ["https://google.com"],
"format": "Text",
"max_depth": 1,
"prevent_outside": True,
"use_async": True,
}
@pytest.fixture
@ -30,55 +33,37 @@ class TestURLComponent(ComponentTestBaseWithoutClient):
{"version": "1.0.19", "module": "data", "file_name": "URL"},
{"version": "1.1.0", "module": "data", "file_name": "url"},
{"version": "1.1.1", "module": "data", "file_name": "url"},
{"version": "1.2.0", "module": "data", "file_name": "url"},
]
@pytest.fixture
def mock_web_load(self):
"""Mock the WebBaseLoader.load method."""
with patch("langchain_community.document_loaders.WebBaseLoader.load") as mock:
def mock_recursive_loader(self):
"""Mock the RecursiveUrlLoader.load method."""
with patch("langchain_community.document_loaders.RecursiveUrlLoader.load") as mock:
yield mock
def test_url_component(self, mock_web_load):
"""Test basic URL component functionality."""
def test_recursive_url_component(self, mock_recursive_loader):
"""Test basic URLComponent functionality."""
component = URLComponent()
component.set_attributes({"urls": ["https://example.com"]})
component.set_attributes({"urls": ["https://example.com"], "max_depth": 2})
mock_web_load.return_value = [Mock(page_content="test content", metadata={"source": "https://example.com"})]
mock_recursive_loader.return_value = [
Mock(page_content="test content", metadata={"source": "https://example.com"})
]
data_ = component.fetch_content()
assert all(value.data for value in data_)
assert all(value.text for value in data_)
assert all(value.source for value in data_)
# @pytest.mark.parametrize(
# ("format_type", "expected_content"),
# [
# ("Text", "test content"),
# ("Raw HTML", "<html>test content</html>"),
# ],
# )
# def test_url_component_formats(self, mock_web_load, format_type, expected_content):
# """Test URL component with different format types."""
# component = URLComponent()
# component.set_attributes({"urls": ["https://example.com"], "format": format_type})
# # Mock the loader response
# mock_web_load.return_value = [Mock(page_content=expected_content, metadata={"source": "https://example.com"})]
# # Test fetch_content - use sync version
# content = component.fetch_content()
# assert len(content) == 1
# assert content[0].text == expected_content
# assert content[0].source == "https://example.com"
def test_url_component_as_dataframe(self, mock_web_load):
"""Test URL component's as_dataframe method."""
def test_recursive_url_component_as_dataframe(self, mock_recursive_loader):
"""Test URLComponent's as_dataframe method."""
component = URLComponent()
urls = ["https://example1.com", "https://example2.com"]
component.set_attributes({"urls": urls})
component.set_attributes({"urls": urls, "max_depth": 1})
# Mock the loader response
mock_web_load.return_value = [
mock_recursive_loader.return_value = [
Mock(page_content="content1", metadata={"source": urls[0]}),
Mock(page_content="content2", metadata={"source": urls[1]}),
]
@ -86,42 +71,59 @@ class TestURLComponent(ComponentTestBaseWithoutClient):
# Test as_dataframe
data_frame = component.as_dataframe()
assert isinstance(data_frame, DataFrame), "Expected DataFrame instance"
assert len(data_frame) == 2
assert len(data_frame) == 4
assert list(data_frame.columns) == ["text", "source"]
assert data_frame.iloc[0]["text"] == "content1"
assert data_frame.iloc[0]["source"] == urls[0]
assert data_frame.iloc[1]["text"] == "content2"
assert data_frame.iloc[1]["source"] == urls[1]
def test_url_component_fetch_content_text(self, mock_web_load):
"""Test URL component's fetch_content_text method."""
component = URLComponent()
component.set_attributes({"urls": ["https://example.com"]})
assert data_frame.iloc[2]["text"] == "content1"
assert data_frame.iloc[2]["source"] == urls[0]
mock_web_load.return_value = [Mock(page_content="test content", metadata={"source": "https://example.com"})]
assert data_frame.iloc[3]["text"] == "content2"
assert data_frame.iloc[3]["source"] == urls[1]
def test_recursive_url_component_fetch_content_text(self, mock_recursive_loader):
"""Test URLComponent's fetch_content_text method."""
component = URLComponent()
component.set_attributes({"urls": ["https://example.com"], "max_depth": 1})
mock_recursive_loader.return_value = [
Mock(page_content="test content", metadata={"source": "https://example.com"})
]
# Test fetch_content_text
message = component.fetch_content_text()
assert isinstance(message, Message), "Expected Message instance"
assert message.text == "test content"
def test_url_component_invalid_urls(self):
"""Test URL component with invalid URLs."""
def test_recursive_url_component_ensure_url(self):
"""Test URLComponent's ensure_url method."""
component = URLComponent()
component.set_attributes({"urls": ["not_a_valid_url"]})
# Test that invalid URLs raise a ValueError
with pytest.raises(ValueError, match="Invalid URL - http://not_a_valid_url"):
component.fetch_content()
# Test URL without protocol
url = "example.com"
fixed_url = component.ensure_url(url)
assert fixed_url == "http://example.com"
def test_url_component_multiple_urls(self, mock_web_load):
"""Test URL component with multiple URLs."""
# Test URL with protocol
url = "http://example.com"
fixed_url = component.ensure_url(url)
assert fixed_url == "http://example.com"
def test_recursive_url_component_multiple_urls(self, mock_recursive_loader):
"""Test URLComponent with multiple URLs."""
component = URLComponent()
urls = ["https://example1.com", "https://example2.com", "https://example3.com"]
component.set_attributes({"urls": urls})
component.set_attributes({"urls": urls, "max_depth": 1})
mock_web_load.return_value = [
Mock(page_content=f"content{i + 1}", metadata={"source": url}) for i, url in enumerate(urls)
# Mock different content for each URL
mock_recursive_loader.side_effect = [
[Mock(page_content=f"content{i + 1}", metadata={"source": url})] for i, url in enumerate(urls)
]
# Test fetch_content
@ -129,20 +131,52 @@ class TestURLComponent(ComponentTestBaseWithoutClient):
assert len(content) == 3, f"Expected 3 content items, got {len(content)}"
for i, item in enumerate(content):
url = urls[i]
assert item.source == url, f"Expected '{url}', got '{item.source}'"
assert item.source == urls[i], f"Expected '{urls[i]}', got '{item.source}'"
assert item.text == f"content{i + 1}"
@patch("langflow.components.data.URLComponent.ensure_url")
def test_recursive_url_component_error_handling(self, mock_recursive_loader):
"""Test error handling in URLComponent."""
component = URLComponent()
component.set_attributes({"urls": ["https://example.com"]})
# Set up the mock to raise an exception
mock_recursive_loader.side_effect = Exception("Connection error")
# Test that exceptions are properly handled
with pytest.raises(ValueError, match="Error loading documents: Connection error"):
component.fetch_content()
def test_recursive_url_component_format_options(self, mock_recursive_loader):
"""Test URLComponent with different format options."""
component = URLComponent()
# Test with Text format
component.set_attributes({"urls": ["https://example.com"], "format": "Text"})
mock_recursive_loader.return_value = [
Mock(page_content="extracted text", metadata={"source": "https://example.com"})
]
content_text = component.fetch_content()
assert content_text[0].text == "extracted text"
# Test with Raw HTML format
component.set_attributes({"urls": ["https://example.com"], "format": "Raw HTML"})
mock_recursive_loader.return_value = [
Mock(page_content="<html>raw html</html>", metadata={"source": "https://example.com"})
]
content_html = component.fetch_content()
assert content_html[0].text == "<html>raw html</html>"
@respx.mock
async def test_url_request_success(self, mock_web_load):
async def test_url_request_success(self, mock_recursive_loader):
"""Test successful URL request."""
url = "https://example.com/api/test"
respx.get(url).mock(return_value=Response(200, json={"success": True}))
component = URLComponent()
component.set_attributes({"urls": [url]})
component.set_attributes({"urls": [url], "max_depth": 1})
mock_web_load.return_value = [Mock(page_content="test content", metadata={"source": url})]
mock_recursive_loader.return_value = [Mock(page_content="test content", metadata={"source": url})]
result = component.fetch_content()
assert len(result) == 1

View file

@ -29,7 +29,7 @@ test(
.getByTestId("textarea_str_input_value")
.first()
.fill(
"say a random number between 1 and 100000 and a random animal that lives in the sea",
"say a random number between 1 and 300000 and a random animal that lives in the sea",
);
await page.getByTestId("dropdown_str_model_name").click();
@ -38,7 +38,7 @@ test(
await page.getByTestId("fit_view").click();
await page.waitForSelector('[data-testid="button_run_chat output"]', {
timeout: 1000,
timeout: 3000,
});
await page.getByTestId("button_run_chat output").click();
@ -66,7 +66,7 @@ test(
await page.getByTestId("gpt-4o-mini-0-option").click();
await page.waitForSelector('[data-testid="button_run_chat output"]', {
timeout: 1000,
timeout: 3000,
});
await page.getByTestId("button_run_chat output").click();
@ -89,31 +89,33 @@ test(
await page.getByText("Close").last().click();
await page.waitForSelector("text=OpenAI", {
timeout: 1000,
timeout: 3000,
});
await page.getByText("OpenAI", { exact: true }).last().click();
await page.waitForSelector('[data-testid="more-options-modal"]', {
timeout: 1000,
timeout: 3000,
});
await page.getByTestId("more-options-modal").click();
await page.waitForSelector('[data-testid="freeze-path-button"]', {
timeout: 1000,
timeout: 3000,
});
await page.getByTestId("freeze-path-button").click();
await page.waitForTimeout(2000);
await page.waitForSelector('[data-testid="icon-Snowflake"]', {
timeout: 1000,
timeout: 3000,
});
expect(await page.getByTestId("icon-Snowflake").count()).toBeGreaterThan(0);
await page.waitForSelector('[data-testid="button_run_chat output"]', {
timeout: 1000,
timeout: 3000,
});
await page.getByTestId("button_run_chat output").click();

View file

@ -113,52 +113,34 @@ test(
await zoomOut(page, 2);
//connection 1
const urlOutput = await page
.getByTestId("handle-url-shownode-data-right")
.nth(0);
await urlOutput.hover();
await page.mouse.down();
const splitTextInputData = await page.getByTestId(
"handle-splittext-shownode-data or dataframe-left",
);
await splitTextInputData.hover();
await page.mouse.up();
await page
.getByTestId("handle-urlcomponent-shownode-data-right")
.nth(0)
.click();
await page
.getByTestId("handle-splittext-shownode-data or dataframe-left")
.click();
//connection 2
const textOutput = await page
await page
.getByTestId("handle-textinput-shownode-message-right")
.nth(0);
await textOutput.hover();
await page.mouse.down();
const splitTextInput = await page.getByTestId(
"handle-splittext-shownode-separator-left",
);
await splitTextInput.hover();
await page.mouse.up();
.nth(0)
.click();
await page.getByTestId("handle-splittext-shownode-separator-left").click();
//connection 3
const splitTextOutput = await page
await page
.getByTestId("handle-splittext-shownode-chunks-right")
.nth(0);
await splitTextOutput.hover();
await page.mouse.down();
const parseDataInput = await page.getByTestId(
"handle-parsedata-shownode-data-left",
);
await parseDataInput.hover();
await page.mouse.up();
.nth(0)
.click();
await page.getByTestId("handle-parsedata-shownode-data-left").click();
//connection 4
const parseDataOutput = await page
await page
.getByTestId("handle-parsedata-shownode-message-right")
.nth(0);
await parseDataOutput.hover();
await page.mouse.down();
const chatOutputInput = await page.getByTestId(
"handle-chatoutput-shownode-text-left",
);
await chatOutputInput.hover();
await page.mouse.up();
.nth(0)
.click();
await page.getByTestId("handle-chatoutput-shownode-text-left").click();
await page
.getByTestId("textarea_str_input_value")
@ -292,9 +274,15 @@ test(
await page.locator('//*[@id="react-flow-id"]').click();
await page.waitForTimeout(1000);
await page.getByTestId("button_run_chat output").click();
await page.waitForSelector("text=built successfully", { timeout: 30000 });
await page.waitForTimeout(1000);
await page.waitForSelector("text=built successfully", {
timeout: 30000 * 3,
});
await page.getByText("built successfully").last().click({
timeout: 15000,
@ -316,11 +304,9 @@ test(
.getByPlaceholder("Empty")
.textContent();
expect(secondRunWithoutFreezing).toBe(firstTextFreezed);
expect(firstRunWithoutFreezing).not.toBe(firstTextFreezed);
expect(firstRunWithoutFreezing).toBe(firstTextFreezed);
expect(secondRunWithoutFreezing).not.toBe(firstTextFreezed);
expect(firstRunWithoutFreezing).not.toBe(secondRunWithoutFreezing);
expect(firstRunWithoutFreezing).not.toBe(firstTextFreezed);
expect(thirdTextWithoutFreezing).not.toBe(firstTextFreezed);
expect(thirdTextWithoutFreezing).toBe(firstTextFreezed);
},
);

View file

@ -1,6 +1,7 @@
import { expect, test } from "@playwright/test";
import { adjustScreenView } from "../../utils/adjust-screen-view";
import { awaitBootstrapTest } from "../../utils/await-bootstrap-test";
import { initialGPTsetup } from "../../utils/initialGPTsetup";
test(
"user must be able to save or delete a global variable",
@ -28,6 +29,13 @@ test(
await page.getByTestId("fit_view").click();
await initialGPTsetup(page, {
skipAdjustScreenView: true,
skipUpdateOldComponents: true,
skipAddNewApiKeys: true,
skipSelectGptModel: true,
});
const genericName = Math.random().toString();
const credentialName = Math.random().toString();

View file

@ -76,52 +76,22 @@ test(
await zoomOut(page, 2);
//connection 1
const urlOutput = await page
.getByTestId("handle-url-shownode-data-right")
.nth(0);
await urlOutput.hover();
await page.mouse.down();
const splitTextInputData = await page.getByTestId(
"handle-splittext-shownode-data or dataframe-left",
);
await splitTextInputData.hover();
await page.mouse.up();
await page.getByTestId("handle-urlcomponent-shownode-data-right").click();
await page
.getByTestId("handle-splittext-shownode-data or dataframe-left")
.click();
//connection 2
const textOutput = await page
.getByTestId("handle-textinput-shownode-message-right")
.nth(0);
await textOutput.hover();
await page.mouse.down();
const splitTextInput = await page.getByTestId(
"handle-splittext-shownode-separator-left",
);
await splitTextInput.hover();
await page.mouse.up();
await page.getByTestId("handle-textinput-shownode-message-right").click();
await page.getByTestId("handle-splittext-shownode-separator-left").click();
//connection 3
const splitTextOutput = await page
.getByTestId("handle-splittext-shownode-chunks-right")
.nth(0);
await splitTextOutput.hover();
await page.mouse.down();
const parseDataInput = await page.getByTestId(
"handle-parsedata-shownode-data-left",
);
await parseDataInput.hover();
await page.mouse.up();
await page.getByTestId("handle-splittext-shownode-chunks-right").click();
await page.getByTestId("handle-parsedata-shownode-data-left").click();
//connection 4
const parseDataOutput = await page
.getByTestId("handle-parsedata-shownode-message-right")
.nth(0);
await parseDataOutput.hover();
await page.mouse.down();
const chatOutputInput = await page.getByTestId(
"handle-chatoutput-noshownode-text-target",
);
await chatOutputInput.hover();
await page.mouse.up();
await page.getByTestId("handle-parsedata-shownode-message-right").click();
await page.getByTestId("handle-chatoutput-noshownode-text-target").click();
await page.getByTestId("fit_view").click();

View file

@ -50,9 +50,7 @@ test(
await page.waitForTimeout(1000);
value = await page.getByTestId("dropdown_str_model_id").innerText();
if (value !== "anthropic.claude-v2:1") {
expect(false).toBeTruthy();
}
expect(value.length).toBeGreaterThan(10);
await page.waitForSelector('[data-testid="more-options-modal"]', {
timeout: 3000,
@ -66,9 +64,8 @@ test(
value = await page
.getByTestId("value-dropdown-dropdown_str_edit_model_id")
.innerText();
if (value !== "anthropic.claude-v2:1") {
expect(false).toBeTruthy();
}
expect(value.length).toBeGreaterThan(10);
await page.locator('//*[@id="showregion_name"]').click();
expect(

View file

@ -77,20 +77,15 @@ test(
targetPosition: { x: 700, y: 400 },
});
const secondParseDataOutput = await page
await page
.getByTestId("handle-parsedata-shownode-data list-right")
.nth(1);
.nth(1)
.click();
const loopItemInput = await page
.getByTestId("handle-loopcomponent-shownode-item-left")
.first();
// Connecting the second parse data to the loop item to test the wrong loop message
await secondParseDataOutput.hover();
await page.mouse.down();
await loopItemInput.hover();
await page.mouse.up();
.first()
.click();
// Add Chat Output component
await page.getByTestId("sidebar-search-input").click();
@ -111,64 +106,56 @@ test(
// Loop Item -> Update Data
const loopItemHandle = await page
await page
.getByTestId("handle-loopcomponent-shownode-item-right")
.first();
const updateDataInput = await page
.first()
.click();
await page
.getByTestId("handle-updatedata-shownode-data-left")
.first();
await loopItemHandle.hover();
await page.mouse.down();
await updateDataInput.hover();
await page.mouse.up();
.first()
.click();
// URL -> Loop Data
const urlOutput = await page
.getByTestId("handle-url-shownode-data-right")
.first();
const loopInput = await page
await page
.getByTestId("handle-urlcomponent-shownode-data-right")
.first()
.click();
await page
.getByTestId("handle-loopcomponent-shownode-data-left")
.first();
await urlOutput.hover();
await page.mouse.down();
await loopInput.hover();
await page.mouse.up();
.first()
.click();
// Loop Done -> Parse Data
const loopDoneHandle = await page
await page
.getByTestId("handle-loopcomponent-shownode-done-right")
.first();
const parseDataInput = await page
.first()
.click();
await page
.getByTestId("handle-parsedata-shownode-data-left")
.first();
.first()
.click();
await loopDoneHandle.hover();
await page.mouse.down();
await parseDataInput.hover();
await page.mouse.up();
// Parse Data -> Chat Output
await page
.getByTestId("handle-parsedata-shownode-message-right")
.first()
.click();
await page
.getByTestId("handle-chatoutput-noshownode-text-target")
.first()
.click();
await page.getByTestId("div-generic-node").nth(5).click();
await page.waitForTimeout(1000);
await page.getByTestId("more-options-modal").click();
await page.waitForTimeout(500);
await page.getByTestId("expand-button-modal").click();
// Parse Data -> Chat Output
const parseDataOutput = await page
.getByTestId("handle-parsedata-shownode-message-right")
.first();
const chatOutputInput = await page
.getByTestId("handle-chatoutput-shownode-text-left")
.first();
await parseDataOutput.hover();
await page.mouse.down();
await chatOutputInput.hover();
await page.mouse.up();
await page.getByTestId("input-list-plus-btn_urls-0").click();
// Configure components
@ -204,14 +191,15 @@ test(
await page.getByText("Delete").first().click();
// Update Data -> Loop Item (left side)
const updateDataOutput = await page
.getByTestId("handle-updatedata-shownode-data-right")
.first();
await updateDataOutput.hover();
await page.mouse.down();
await loopItemInput.hover();
await page.mouse.up();
await page
.getByTestId("handle-updatedata-shownode-data-right")
.first()
.click();
await page
.getByTestId("handle-loopcomponent-shownode-item-left")
.first()
.click();
// Build and run
await page.getByTestId("button_run_chat output").click();

View file

@ -97,12 +97,17 @@ test(
targetPosition: { x: 700, y: 400 },
});
await page.getByTestId("fit_view").click();
// Fill URL input
await page
.getByTestId("inputlist_str_urls_0")
.fill("https://www.example.com");
await page.getByTestId("handle-url-shownode-text-right").nth(0).click();
await page
.getByTestId("handle-urlcomponent-shownode-message-right")
.nth(0)
.click();
await page.waitForTimeout(600);
await page
@ -127,7 +132,7 @@ test(
// Connect dataframe output to second chat output
await page
.getByTestId("handle-url-shownode-dataframe-right")
.getByTestId("handle-urlcomponent-shownode-dataframe-right")
.nth(0)
.click();
await page.waitForTimeout(600);
@ -182,6 +187,8 @@ test(
await page.keyboard.press("Backspace");
await page.waitForTimeout(600);
await page.waitForTimeout(5000);
// Run and verify data output is shown
await page.getByTestId("button_run_url").first().click();
await page.waitForSelector("text=built successfully", {