From f85d447748e6d310d976e0a2d0d358d53e86efbf Mon Sep 17 00:00:00 2001 From: Phil Miesle Date: Mon, 13 Jan 2025 13:19:46 +0000 Subject: [PATCH] fix: address api_request issues on Python 3.11+ (#5643) Co-authored-by: Gabriel Luiz Freitas Almeida --- .../langflow/components/data/api_request.py | 62 ++++++++++++------- .../data/test_api_request_component.py | 24 +++---- 2 files changed, 52 insertions(+), 34 deletions(-) diff --git a/src/backend/base/langflow/components/data/api_request.py b/src/backend/base/langflow/components/data/api_request.py index ee947fc99..e085886c9 100644 --- a/src/backend/base/langflow/components/data/api_request.py +++ b/src/backend/base/langflow/components/data/api_request.py @@ -1,6 +1,5 @@ import asyncio import json -import mimetypes import re import tempfile from datetime import datetime, timezone @@ -8,9 +7,10 @@ from pathlib import Path from typing import Any from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse +import aiofiles +import aiofiles.os as aiofiles_os import httpx import validators -from aiofile import async_open from langflow.base.curl.parse import parse_context from langflow.custom import Component @@ -183,7 +183,7 @@ class APIRequestComponent(Component): for redirect in response.history ] - is_binary, file_path = self._response_info(response, with_file_path=save_to_file) + is_binary, file_path = await self._response_info(response, with_file_path=save_to_file) response_headers = self._headers_to_dict(response.headers) metadata: dict[str, Any] = { @@ -194,9 +194,16 @@ class APIRequestComponent(Component): mode = "wb" if is_binary else "w" encoding = response.encoding if mode == "w" else None if file_path: - async with async_open(file_path, mode, encoding=encoding) as f: - await f.write(response.content if is_binary else response.text) - await f.flush() # Ensure the file is flushed to disk + # Ensure parent directory exists + await aiofiles_os.makedirs(file_path.parent, exist_ok=True) + if is_binary: + async with aiofiles.open(file_path, "wb") as f: + await f.write(response.content) + await f.flush() + else: + async with aiofiles.open(file_path, "w", encoding=encoding) as f: + await f.write(response.text) + await f.flush() metadata["file_path"] = str(file_path) if include_httpx_metadata: @@ -315,7 +322,9 @@ class APIRequestComponent(Component): self.status = results return results - def _response_info(self, response: httpx.Response, *, with_file_path: bool = False) -> tuple[bool, Path | None]: + async def _response_info( + self, response: httpx.Response, *, with_file_path: bool = False + ) -> tuple[bool, Path | None]: """Determine the file path and whether the response content is binary. Args: @@ -335,23 +344,18 @@ class APIRequestComponent(Component): # Step 1: Set up a subdirectory for the component in the OS temp directory component_temp_dir = Path(tempfile.gettempdir()) / self.__class__.__name__ - component_temp_dir.mkdir(parents=True, exist_ok=True) + + # Create directory asynchronously + await aiofiles_os.makedirs(component_temp_dir, exist_ok=True) # Step 2: Extract filename from Content-Disposition filename = None if "Content-Disposition" in response.headers: content_disposition = response.headers["Content-Disposition"] filename_match = re.search(r'filename="(.+?)"', content_disposition) - if not filename_match: # Try to match RFC 5987 style - filename_match = re.search(r"filename\*=(?:UTF-8'')?(.+)", content_disposition) if filename_match: extracted_filename = filename_match.group(1) - # Ensure the filename is unique - if (component_temp_dir / extracted_filename).exists(): - timestamp = datetime.now(timezone.utc).strftime("%Y%m%d%H%M%S%f") - filename = f"{timestamp}-{extracted_filename}" - else: - filename = extracted_filename + filename = extracted_filename # Step 3: Infer file extension or use part of the request URL if no filename if not filename: @@ -362,17 +366,29 @@ class APIRequestComponent(Component): base_name = "response" # Infer file extension - extension = mimetypes.guess_extension(content_type.split(";")[0]) if content_type else None - if not extension: - extension = ".bin" if is_binary else ".txt" # Default extensions - - # Combine the base name with timestamp and extension - timestamp = datetime.now(timezone.utc).strftime("%Y%m%d%H%M%S%f") - filename = f"{timestamp}-{base_name}{extension}" + content_type_to_extension = { + "text/plain": ".txt", + "application/json": ".json", + "image/jpeg": ".jpg", + "image/png": ".png", + "application/octet-stream": ".bin", + } + extension = content_type_to_extension.get(content_type, ".bin" if is_binary else ".txt") + filename = f"{base_name}{extension}" # Step 4: Define the full file path file_path = component_temp_dir / filename + # Step 5: Check if file exists asynchronously and handle accordingly + try: + # Try to create the file exclusively (x mode) to check existence + async with aiofiles.open(file_path, "x") as _: + pass # File created successfully, we can use this path + except FileExistsError: + # If file exists, append a timestamp to the filename + timestamp = datetime.now(timezone.utc).strftime("%Y%m%d%H%M%S%f") + file_path = component_temp_dir / f"{timestamp}-{filename}" + return is_binary, file_path def _headers_to_dict(self, headers: httpx.Headers) -> dict[str, str]: diff --git a/src/backend/tests/unit/components/data/test_api_request_component.py b/src/backend/tests/unit/components/data/test_api_request_component.py index fb0811dcd..b4d21e43a 100644 --- a/src/backend/tests/unit/components/data/test_api_request_component.py +++ b/src/backend/tests/unit/components/data/test_api_request_component.py @@ -2,11 +2,11 @@ import tempfile from pathlib import Path from unittest.mock import Mock +import aiofiles import aiofiles.os import httpx import pytest import respx -from aiofile import async_open from httpx import Response from langflow.components import data @@ -111,7 +111,9 @@ async def test_save_to_file_behavior(api_request, save_to_file, expected_propert # Check returned metadata metadata = result.data - assert set(metadata.keys()) == expected_properties, f"Unexpected properties: {set(metadata.keys())}" + assert ( + set(metadata.keys()) == expected_properties + ), f"Unexpected properties: {set(metadata.keys())}. Raw result: {result.data}" if save_to_file: # Validate that file_path exists in metadata @@ -120,7 +122,7 @@ async def test_save_to_file_behavior(api_request, save_to_file, expected_propert # Validate that the file exists and its content matches the response assert await aiofiles.os.path.exists(file_path), "Saved file does not exist" - async with async_open(file_path, "r") as f: + async with aiofiles.open(file_path) as f: file_content = await f.read() assert file_content == response_content, "File content does not match response content" @@ -132,23 +134,23 @@ async def test_save_to_file_behavior(api_request, save_to_file, expected_propert assert metadata["result"] == response_content.encode("utf-8"), "Response content mismatch in metadata" -def test_response_info_binary_content(api_request): +async def test_response_info_binary_content(api_request): response = Mock() response.headers = {"Content-Type": "application/octet-stream"} - is_binary, file_path = api_request._response_info(response, with_file_path=False) + is_binary, file_path = await api_request._response_info(response, with_file_path=False) assert is_binary is True assert file_path is None -def test_response_info_non_binary_content(api_request): +async def test_response_info_non_binary_content(api_request): response = Mock() response.headers = {"Content-Type": "text/plain"} - is_binary, file_path = api_request._response_info(response, with_file_path=False) + is_binary, file_path = await api_request._response_info(response, with_file_path=False) assert is_binary is False assert file_path is None -def test_response_info_filename_from_content_disposition(api_request): +async def test_response_info_filename_from_content_disposition(api_request): response = Mock() response.headers = { "Content-Disposition": 'attachment; filename="thisfile.txt"', @@ -157,20 +159,20 @@ def test_response_info_filename_from_content_disposition(api_request): response.request = Mock() response.request.url = "https://example.com/testfile" - is_binary, file_path = api_request._response_info(response, with_file_path=True) + is_binary, file_path = await api_request._response_info(response, with_file_path=True) assert is_binary is False assert file_path.parent == Path(tempfile.gettempdir()) / "APIRequestComponent" assert file_path.name.endswith("thisfile.txt") -def test_response_info_default_filename(api_request): +async def test_response_info_default_filename(api_request): response = Mock() response.headers = {"Content-Type": "text/plain"} response.request = Mock() response.request.url = "https://example.com/testfile" - is_binary, file_path = api_request._response_info(response, with_file_path=True) + is_binary, file_path = await api_request._response_info(response, with_file_path=True) assert is_binary is False assert file_path.parent == Path(tempfile.gettempdir()) / "APIRequestComponent"