fix: problem with saving files from APIRequestComponent (#5627)

* fix problem with saving files from APIRequestComponent

* moving api_request component test to appropriate subdir
This commit is contained in:
Phil Miesle 2025-01-10 18:22:04 +00:00 committed by GitHub
commit 517961d281
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 188 additions and 39 deletions

View file

@ -3,11 +3,10 @@ import json
import mimetypes
import re
import tempfile
from datetime import datetime
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
from zoneinfo import ZoneInfo
import httpx
import validators
@ -177,12 +176,13 @@ class APIRequestComponent(Component):
)
redirection_history = [
{"url": str(redirect.url), "status_code": redirect.status_code} for redirect in response.history
{
"url": redirect.headers.get("Location", str(redirect.url)),
"status_code": redirect.status_code,
}
for redirect in response.history
]
if response.is_redirect:
redirection_history.append({"url": str(response.url), "status_code": response.status_code})
is_binary, file_path = self._response_info(response, with_file_path=save_to_file)
response_headers = self._headers_to_dict(response.headers)
@ -196,11 +196,12 @@ class APIRequestComponent(Component):
if file_path:
async with async_open(file_path, mode, encoding=encoding) as f:
await f.write(response.content if is_binary else response.text)
await f.flush() # Ensure the file is flushed to disk
metadata["file_path"] = str(file_path)
if include_httpx_metadata:
metadata.update(
{
"file_path": str(file_path),
"headers": headers,
"status_code": response.status_code,
"response_headers": response_headers,
@ -347,7 +348,7 @@ class APIRequestComponent(Component):
extracted_filename = filename_match.group(1)
# Ensure the filename is unique
if (component_temp_dir / extracted_filename).exists():
timestamp = datetime.now(ZoneInfo("UTC")).strftime("%Y%m%d%H%M%S%f")
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d%H%M%S%f")
filename = f"{timestamp}-{extracted_filename}"
else:
filename = extracted_filename
@ -355,7 +356,7 @@ class APIRequestComponent(Component):
# Step 3: Infer file extension or use part of the request URL if no filename
if not filename:
# Extract the last segment of the URL path
url_path = urlparse(str(response.request.url)).path
url_path = urlparse(str(response.request.url) if response.request else "").path
base_name = Path(url_path).name # Get the last segment of the path
if not base_name: # If the path ends with a slash or is empty
base_name = "response"
@ -366,7 +367,7 @@ class APIRequestComponent(Component):
extension = ".bin" if is_binary else ".txt" # Default extensions
# Combine the base name with timestamp and extension
timestamp = datetime.now(ZoneInfo("UTC")).strftime("%Y%m%d%H%M%S%f")
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d%H%M%S%f")
filename = f"{timestamp}-{base_name}{extension}"
# Step 4: Define the full file path

View file

@ -0,0 +1,177 @@
import tempfile
from pathlib import Path
from unittest.mock import Mock
import aiofiles.os
import httpx
import pytest
import respx
from aiofile import async_open
from httpx import Response
from langflow.components import data
@pytest.fixture
def api_request():
# This fixture provides an instance of APIRequest for each test case
return data.APIRequestComponent()
def test_parse_curl(api_request):
# Arrange
field_value = (
"curl -X GET https://example.com/api/test -H 'Content-Type: application/json' -d '{\"key\": \"value\"}'"
)
build_config = {
"method": {"value": ""},
"urls": {"value": []},
"headers": {},
"body": {},
}
# Act
new_build_config = api_request.parse_curl(field_value, build_config.copy())
# Assert
assert new_build_config["method"]["value"] == "GET"
assert new_build_config["urls"]["value"] == ["https://example.com/api/test"]
assert new_build_config["headers"]["value"] == {"Content-Type": "application/json"}
assert new_build_config["body"]["value"] == {"key": "value"}
# HTTPx Metadata testing
@pytest.mark.parametrize(
("include_metadata", "expected_properties"),
[
(False, {"source", "result"}),
(True, {"source", "result", "headers", "status_code", "response_headers", "redirection_history"}),
],
)
@respx.mock
async def test_httpx_metadata_behavior(api_request, include_metadata, expected_properties):
# Mocking a successful GET request with headers and a redirection
url = "https://example.com/api/test"
redirected_url = "https://example.com/api/redirect"
response_content = {"key": "value"}
respx.get(url).mock(return_value=Response(303, headers={"Location": redirected_url}))
respx.get(redirected_url).mock(
return_value=Response(200, json=response_content, headers={"Custom-Header": "HeaderValue"})
)
# Make the request
result = await api_request.make_request(
client=httpx.AsyncClient(),
method="GET",
url=url,
save_to_file=False,
include_httpx_metadata=include_metadata,
)
# Check returned metadata
metadata = result.data
assert set(metadata.keys()) == expected_properties, f"Unexpected properties: {set(metadata.keys())}"
if include_metadata:
# Validate individual fields
assert metadata["source"] == url
assert metadata["headers"] is None
assert metadata["status_code"] == 200
assert metadata["response_headers"]["custom-header"] == "HeaderValue"
# Validate redirection history
assert metadata["redirection_history"] == [
{"url": redirected_url, "status_code": 303}
], "Redirection history is incorrect"
# Validate result
assert metadata["result"] == response_content, "Response content mismatch"
# Save to File testing
@pytest.mark.parametrize(
("save_to_file", "expected_properties"),
[
(False, {"source", "result"}),
(True, {"source", "file_path"}),
],
)
@respx.mock
async def test_save_to_file_behavior(api_request, save_to_file, expected_properties):
# Mocking a successful GET request with a response body
url = "https://example.com/api/test"
response_content = "Test response content"
respx.get(url).mock(return_value=Response(200, content=response_content))
# Make the request
result = await api_request.make_request(
client=httpx.AsyncClient(),
method="GET",
url=url,
save_to_file=save_to_file,
)
# Check returned metadata
metadata = result.data
assert set(metadata.keys()) == expected_properties, f"Unexpected properties: {set(metadata.keys())}"
if save_to_file:
# Validate that file_path exists in metadata
assert "file_path" in metadata, "file_path is missing in metadata"
file_path = metadata["file_path"]
# Validate that the file exists and its content matches the response
assert await aiofiles.os.path.exists(file_path), "Saved file does not exist"
async with async_open(file_path, "r") as f:
file_content = await f.read()
assert file_content == response_content, "File content does not match response content"
# Cleanup the file
await aiofiles.os.remove(file_path)
else:
# Validate that result exists in metadata
assert "result" in metadata, "result is missing in metadata"
assert metadata["result"] == response_content.encode("utf-8"), "Response content mismatch in metadata"
def test_response_info_binary_content(api_request):
response = Mock()
response.headers = {"Content-Type": "application/octet-stream"}
is_binary, file_path = api_request._response_info(response, with_file_path=False)
assert is_binary is True
assert file_path is None
def test_response_info_non_binary_content(api_request):
response = Mock()
response.headers = {"Content-Type": "text/plain"}
is_binary, file_path = api_request._response_info(response, with_file_path=False)
assert is_binary is False
assert file_path is None
def test_response_info_filename_from_content_disposition(api_request):
response = Mock()
response.headers = {
"Content-Disposition": 'attachment; filename="thisfile.txt"',
"Content-Type": "text/plain",
}
response.request = Mock()
response.request.url = "https://example.com/testfile"
is_binary, file_path = api_request._response_info(response, with_file_path=True)
assert is_binary is False
assert file_path.parent == Path(tempfile.gettempdir()) / "APIRequestComponent"
assert file_path.name.endswith("thisfile.txt")
def test_response_info_default_filename(api_request):
response = Mock()
response.headers = {"Content-Type": "text/plain"}
response.request = Mock()
response.request.url = "https://example.com/testfile"
is_binary, file_path = api_request._response_info(response, with_file_path=True)
assert is_binary is False
assert file_path.parent == Path(tempfile.gettempdir()) / "APIRequestComponent"
assert file_path.name.endswith("testfile.txt")

View file

@ -1,29 +0,0 @@
import pytest
from langflow.components import data
@pytest.fixture
def api_request():
# This fixture provides an instance of APIRequest for each test case
return data.APIRequestComponent()
def test_parse_curl(api_request):
# Arrange
field_value = (
"curl -X GET https://example.com/api/test -H 'Content-Type: application/json' -d '{\"key\": \"value\"}'"
)
build_config = {
"method": {"value": ""},
"urls": {"value": []},
"headers": {},
"body": {},
}
# Act
new_build_config = api_request.parse_curl(field_value, build_config.copy())
# Assert
assert new_build_config["method"]["value"] == "GET"
assert new_build_config["urls"]["value"] == ["https://example.com/api/test"]
assert new_build_config["headers"]["value"] == {"Content-Type": "application/json"}
assert new_build_config["body"]["value"] == {"key": "value"}