fix: url component output types and id generator component (#3740)

* Updated URL component to include text

* Update starter project

* Updated ID Generator with the new component specification

* [autofix.ci] apply automated fixes

* Fixed type errors

* updated tests

* [autofix.ci] apply automated fixes

* Fixed tests

* [autofix.ci] apply automated fixes

---------

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
Lucas Oliveira 2024-09-10 09:33:08 -03:00 committed by GitHub
commit 0088b46aac
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 50 additions and 21 deletions

View file

@ -2,9 +2,11 @@ import re
from langchain_community.document_loaders.web_base import WebBaseLoader
from langflow.helpers.data import data_to_text
from langflow.custom import Component
from langflow.io import MessageTextInput, Output
from langflow.schema import Data
from langflow.schema.message import Message
class URLComponent(Component):
@ -24,6 +26,7 @@ class URLComponent(Component):
outputs = [
Output(display_name="Data", name="data", method="fetch_content"),
Output(display_name="Text", name="text", method="fetch_content_text"),
]
def ensure_url(self, string: str) -> str:
@ -66,3 +69,10 @@ class URLComponent(Component):
data = [Data(text=doc.page_content, **doc.metadata) for doc in docs]
self.status = data
return data
def fetch_content_text(self) -> Message:
data = self.fetch_content()
result_string = data_to_text("{text}", data)
self.status = result_string
return Message(text=result_string)

View file

@ -1,29 +1,37 @@
import uuid
from typing import Any, Optional
from langflow.custom import CustomComponent
from langflow.schema.dotdict import dotdict
from langflow.custom import Component
from langflow.io import MessageTextInput, Output
from langflow.schema import dotdict
from langflow.schema.message import Message
class IDGeneratorComponent(CustomComponent):
class IDGeneratorComponent(Component):
display_name = "ID Generator"
description = "Generates a unique ID."
icon = "fingerprint"
name = "IDGenerator"
def update_build_config( # type: ignore
self, build_config: dotdict, field_value: Any, field_name: Optional[str] = None
):
inputs = [
MessageTextInput(
name="unique_id",
display_name="Value",
info="The generated unique ID.",
refresh_button=True,
),
]
outputs = [
Output(display_name="ID", name="id", method="generate_id"),
]
def update_build_config(self, build_config: dotdict, field_value: Any, field_name: Optional[str] = None):
if field_name == "unique_id":
build_config[field_name]["value"] = str(uuid.uuid4())
return build_config
def build_config(self):
return {
"unique_id": {
"display_name": "Value",
"refresh_button": True,
}
}
def build(self, unique_id: str) -> str:
return unique_id
def generate_id(self) -> Message:
unique_id = self.unique_id or str(uuid.uuid4())
self.status = f"Generated ID: {unique_id}"
return Message(text=unique_id)

View file

@ -168,6 +168,17 @@
"Data"
],
"value": "__UNDEFINED__"
},
{
"cache": true,
"display_name": "Text",
"method": "fetch_content_text",
"name": "text",
"selected": "Message",
"types": [
"Message"
],
"value": "__UNDEFINED__"
}
],
"pinned": false,
@ -189,7 +200,7 @@
"show": true,
"title_case": false,
"type": "code",
"value": "import re\n\nfrom langchain_community.document_loaders.web_base import WebBaseLoader\n\nfrom langflow.custom import Component\nfrom langflow.io import MessageTextInput, Output\nfrom langflow.schema import Data\n\n\nclass URLComponent(Component):\n display_name = \"URL\"\n description = \"Fetch content from one or more URLs.\"\n icon = \"layout-template\"\n name = \"URL\"\n\n inputs = [\n MessageTextInput(\n name=\"urls\",\n display_name=\"URLs\",\n info=\"Enter one or more URLs, by clicking the '+' button.\",\n is_list=True,\n ),\n ]\n\n outputs = [\n Output(display_name=\"Data\", name=\"data\", method=\"fetch_content\"),\n ]\n\n def ensure_url(self, string: str) -> str:\n \"\"\"\n Ensures the given string is a URL by adding 'http://' if it doesn't start with 'http://' or 'https://'.\n Raises an error if the string is not a valid URL.\n\n Parameters:\n string (str): The string to be checked and possibly modified.\n\n Returns:\n str: The modified string that is ensured to be a URL.\n\n Raises:\n ValueError: If the string is not a valid URL.\n \"\"\"\n if not string.startswith((\"http://\", \"https://\")):\n string = \"http://\" + string\n\n # Basic URL validation regex\n url_regex = re.compile(\n r\"^(https?:\\/\\/)?\" # optional protocol\n r\"(www\\.)?\" # optional www\n r\"([a-zA-Z0-9.-]+)\" # domain\n r\"(\\.[a-zA-Z]{2,})?\" # top-level domain\n r\"(:\\d+)?\" # optional port\n r\"(\\/[^\\s]*)?$\", # optional path\n re.IGNORECASE,\n )\n\n if not url_regex.match(string):\n raise ValueError(f\"Invalid URL: {string}\")\n\n return string\n\n def fetch_content(self) -> list[Data]:\n urls = [self.ensure_url(url.strip()) for url in self.urls if url.strip()]\n loader = WebBaseLoader(web_paths=urls, encoding=\"utf-8\")\n docs = loader.load()\n data = [Data(text=doc.page_content, **doc.metadata) for doc in docs]\n self.status = data\n return data\n"
"value": "import re\n\nfrom langchain_community.document_loaders.web_base import WebBaseLoader\n\nfrom langflow.helpers.data import data_to_text\nfrom langflow.custom import Component\nfrom langflow.io import MessageTextInput, Output\nfrom langflow.schema import Data\nfrom langflow.schema.message import Message\n\n\nclass URLComponent(Component):\n display_name = \"URL\"\n description = \"Fetch content from one or more URLs.\"\n icon = \"layout-template\"\n name = \"URL\"\n\n inputs = [\n MessageTextInput(\n name=\"urls\",\n display_name=\"URLs\",\n info=\"Enter one or more URLs, by clicking the '+' button.\",\n is_list=True,\n ),\n ]\n\n outputs = [\n Output(display_name=\"Data\", name=\"data\", method=\"fetch_content\"),\n Output(display_name=\"Text\", name=\"text\", method=\"fetch_content_text\"),\n ]\n\n def ensure_url(self, string: str) -> str:\n \"\"\"\n Ensures the given string is a URL by adding 'http://' if it doesn't start with 'http://' or 'https://'.\n Raises an error if the string is not a valid URL.\n\n Parameters:\n string (str): The string to be checked and possibly modified.\n\n Returns:\n str: The modified string that is ensured to be a URL.\n\n Raises:\n ValueError: If the string is not a valid URL.\n \"\"\"\n if not string.startswith((\"http://\", \"https://\")):\n string = \"http://\" + string\n\n # Basic URL validation regex\n url_regex = re.compile(\n r\"^(https?:\\/\\/)?\" # optional protocol\n r\"(www\\.)?\" # optional www\n r\"([a-zA-Z0-9.-]+)\" # domain\n r\"(\\.[a-zA-Z]{2,})?\" # top-level domain\n r\"(:\\d+)?\" # optional port\n r\"(\\/[^\\s]*)?$\", # optional path\n re.IGNORECASE,\n )\n\n if not url_regex.match(string):\n raise ValueError(f\"Invalid URL: {string}\")\n\n return string\n\n def fetch_content(self) -> list[Data]:\n urls = [self.ensure_url(url.strip()) for url in self.urls if url.strip()]\n loader = WebBaseLoader(web_paths=urls, encoding=\"utf-8\")\n docs = loader.load()\n data = [Data(text=doc.page_content, **doc.metadata) for doc in docs]\n self.status = data\n return data\n\n def fetch_content_text(self) -> Message:\n content = self.fetch_content()\n data = content if isinstance(content, list) else [content]\n\n result_string = data_to_text(\"{text}\", data)\n self.status = result_string\n return Message(text=result_string)\n"
},
"urls": {
"advanced": false,

View file

@ -1,6 +1,7 @@
from langflow.components import helpers
from langflow.custom.utils import build_custom_component_template
from langflow.schema import Data
from langflow.schema.message import Message
import pytest
@ -47,13 +48,12 @@ def test_uuid_generator_component():
build_config = frontend_node.get("template")
field_name = "unique_id"
build_config = uuid_generator_component.update_build_config(build_config, None, field_name)
unique_id = build_config["unique_id"]["value"]
result = uuid_generator_component.build(unique_id)
result = uuid_generator_component.generate_id()
# Assert
# UUID should be a string of length 36
assert isinstance(result, str)
assert len(result) == 36
assert isinstance(result, Message)
assert len(result.text) == 36
def test_data_as_text_component():