fix: address api_request issues on Python 3.11+ (#5643)

Co-authored-by: Gabriel Luiz Freitas Almeida <gabriel@langflow.org>
This commit is contained in:
Phil Miesle 2025-01-13 13:19:46 +00:00 committed by GitHub
commit f85d447748
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 52 additions and 34 deletions

View file

@ -1,6 +1,5 @@
import asyncio
import json
import mimetypes
import re
import tempfile
from datetime import datetime, timezone
@ -8,9 +7,10 @@ from pathlib import Path
from typing import Any
from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
import aiofiles
import aiofiles.os as aiofiles_os
import httpx
import validators
from aiofile import async_open
from langflow.base.curl.parse import parse_context
from langflow.custom import Component
@ -183,7 +183,7 @@ class APIRequestComponent(Component):
for redirect in response.history
]
is_binary, file_path = self._response_info(response, with_file_path=save_to_file)
is_binary, file_path = await self._response_info(response, with_file_path=save_to_file)
response_headers = self._headers_to_dict(response.headers)
metadata: dict[str, Any] = {
@ -194,9 +194,16 @@ class APIRequestComponent(Component):
mode = "wb" if is_binary else "w"
encoding = response.encoding if mode == "w" else None
if file_path:
async with async_open(file_path, mode, encoding=encoding) as f:
await f.write(response.content if is_binary else response.text)
await f.flush() # Ensure the file is flushed to disk
# Ensure parent directory exists
await aiofiles_os.makedirs(file_path.parent, exist_ok=True)
if is_binary:
async with aiofiles.open(file_path, "wb") as f:
await f.write(response.content)
await f.flush()
else:
async with aiofiles.open(file_path, "w", encoding=encoding) as f:
await f.write(response.text)
await f.flush()
metadata["file_path"] = str(file_path)
if include_httpx_metadata:
@ -315,7 +322,9 @@ class APIRequestComponent(Component):
self.status = results
return results
def _response_info(self, response: httpx.Response, *, with_file_path: bool = False) -> tuple[bool, Path | None]:
async def _response_info(
self, response: httpx.Response, *, with_file_path: bool = False
) -> tuple[bool, Path | None]:
"""Determine the file path and whether the response content is binary.
Args:
@ -335,23 +344,18 @@ class APIRequestComponent(Component):
# Step 1: Set up a subdirectory for the component in the OS temp directory
component_temp_dir = Path(tempfile.gettempdir()) / self.__class__.__name__
component_temp_dir.mkdir(parents=True, exist_ok=True)
# Create directory asynchronously
await aiofiles_os.makedirs(component_temp_dir, exist_ok=True)
# Step 2: Extract filename from Content-Disposition
filename = None
if "Content-Disposition" in response.headers:
content_disposition = response.headers["Content-Disposition"]
filename_match = re.search(r'filename="(.+?)"', content_disposition)
if not filename_match: # Try to match RFC 5987 style
filename_match = re.search(r"filename\*=(?:UTF-8'')?(.+)", content_disposition)
if filename_match:
extracted_filename = filename_match.group(1)
# Ensure the filename is unique
if (component_temp_dir / extracted_filename).exists():
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d%H%M%S%f")
filename = f"{timestamp}-{extracted_filename}"
else:
filename = extracted_filename
filename = extracted_filename
# Step 3: Infer file extension or use part of the request URL if no filename
if not filename:
@ -362,17 +366,29 @@ class APIRequestComponent(Component):
base_name = "response"
# Infer file extension
extension = mimetypes.guess_extension(content_type.split(";")[0]) if content_type else None
if not extension:
extension = ".bin" if is_binary else ".txt" # Default extensions
# Combine the base name with timestamp and extension
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d%H%M%S%f")
filename = f"{timestamp}-{base_name}{extension}"
content_type_to_extension = {
"text/plain": ".txt",
"application/json": ".json",
"image/jpeg": ".jpg",
"image/png": ".png",
"application/octet-stream": ".bin",
}
extension = content_type_to_extension.get(content_type, ".bin" if is_binary else ".txt")
filename = f"{base_name}{extension}"
# Step 4: Define the full file path
file_path = component_temp_dir / filename
# Step 5: Check if file exists asynchronously and handle accordingly
try:
# Try to create the file exclusively (x mode) to check existence
async with aiofiles.open(file_path, "x") as _:
pass # File created successfully, we can use this path
except FileExistsError:
# If file exists, append a timestamp to the filename
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d%H%M%S%f")
file_path = component_temp_dir / f"{timestamp}-{filename}"
return is_binary, file_path
def _headers_to_dict(self, headers: httpx.Headers) -> dict[str, str]:

View file

@ -2,11 +2,11 @@ import tempfile
from pathlib import Path
from unittest.mock import Mock
import aiofiles
import aiofiles.os
import httpx
import pytest
import respx
from aiofile import async_open
from httpx import Response
from langflow.components import data
@ -111,7 +111,9 @@ async def test_save_to_file_behavior(api_request, save_to_file, expected_propert
# Check returned metadata
metadata = result.data
assert set(metadata.keys()) == expected_properties, f"Unexpected properties: {set(metadata.keys())}"
assert (
set(metadata.keys()) == expected_properties
), f"Unexpected properties: {set(metadata.keys())}. Raw result: {result.data}"
if save_to_file:
# Validate that file_path exists in metadata
@ -120,7 +122,7 @@ async def test_save_to_file_behavior(api_request, save_to_file, expected_propert
# Validate that the file exists and its content matches the response
assert await aiofiles.os.path.exists(file_path), "Saved file does not exist"
async with async_open(file_path, "r") as f:
async with aiofiles.open(file_path) as f:
file_content = await f.read()
assert file_content == response_content, "File content does not match response content"
@ -132,23 +134,23 @@ async def test_save_to_file_behavior(api_request, save_to_file, expected_propert
assert metadata["result"] == response_content.encode("utf-8"), "Response content mismatch in metadata"
def test_response_info_binary_content(api_request):
async def test_response_info_binary_content(api_request):
response = Mock()
response.headers = {"Content-Type": "application/octet-stream"}
is_binary, file_path = api_request._response_info(response, with_file_path=False)
is_binary, file_path = await api_request._response_info(response, with_file_path=False)
assert is_binary is True
assert file_path is None
def test_response_info_non_binary_content(api_request):
async def test_response_info_non_binary_content(api_request):
response = Mock()
response.headers = {"Content-Type": "text/plain"}
is_binary, file_path = api_request._response_info(response, with_file_path=False)
is_binary, file_path = await api_request._response_info(response, with_file_path=False)
assert is_binary is False
assert file_path is None
def test_response_info_filename_from_content_disposition(api_request):
async def test_response_info_filename_from_content_disposition(api_request):
response = Mock()
response.headers = {
"Content-Disposition": 'attachment; filename="thisfile.txt"',
@ -157,20 +159,20 @@ def test_response_info_filename_from_content_disposition(api_request):
response.request = Mock()
response.request.url = "https://example.com/testfile"
is_binary, file_path = api_request._response_info(response, with_file_path=True)
is_binary, file_path = await api_request._response_info(response, with_file_path=True)
assert is_binary is False
assert file_path.parent == Path(tempfile.gettempdir()) / "APIRequestComponent"
assert file_path.name.endswith("thisfile.txt")
def test_response_info_default_filename(api_request):
async def test_response_info_default_filename(api_request):
response = Mock()
response.headers = {"Content-Type": "text/plain"}
response.request = Mock()
response.request.url = "https://example.com/testfile"
is_binary, file_path = api_request._response_info(response, with_file_path=True)
is_binary, file_path = await api_request._response_info(response, with_file_path=True)
assert is_binary is False
assert file_path.parent == Path(tempfile.gettempdir()) / "APIRequestComponent"