fix: make files be saved in unique path (#8965)

Co-authored-by: Gabriel Luiz Freitas Almeida <gabriel@langflow.org>
This commit is contained in:
Lucas Oliveira 2025-07-10 08:42:03 -03:00 committed by GitHub
commit 2fec8437b8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 281 additions and 10 deletions

View file

@ -105,20 +105,14 @@ async def upload_user_file(
detail=f"File size is larger than the maximum file size {max_file_size_upload}MB.",
)
# Read file content and create a unique file name
try:
file_id, file_name = await save_file_routine(file, storage_service, current_user)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error saving file: {e}") from e
# Create a new database record for the uploaded file.
try:
# Enforce unique constraint on name, except for the special _mcp_servers file
new_filename = file.filename
try:
root_filename, _ = new_filename.rsplit(".", 1)
root_filename, file_extension = new_filename.rsplit(".", 1)
except ValueError:
root_filename, _ = new_filename, ""
root_filename, file_extension = new_filename, ""
# Special handling for the MCP servers config file: always keep the same root filename
if root_filename == MCP_SERVERS_FILE:
@ -126,6 +120,7 @@ async def upload_user_file(
existing_mcp_file = await get_file_by_name(root_filename, current_user, session)
if existing_mcp_file:
await delete_file(existing_mcp_file.id, current_user, session, storage_service)
unique_filename = new_filename
else:
# For normal files, ensure unique name by appending a count if necessary
stmt = select(UserFile).where(col(UserFile.name).like(f"{root_filename}%"))
@ -144,10 +139,21 @@ async def upload_user_file(
count = max(counts) if counts else 0
root_filename = f"{root_filename} ({count + 1})"
# Create the unique filename with extension for storage
unique_filename = f"{root_filename}.{file_extension}" if file_extension else root_filename
# Read file content and save with unique filename
try:
file_id, stored_file_name = await save_file_routine(
file, storage_service, current_user, file_name=unique_filename
)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error saving file: {e}") from e
# Compute the file size based on the path
file_size = await storage_service.get_file_size(
flow_id=str(current_user.id),
file_name=file_name,
file_name=stored_file_name,
)
# Create a new file record
@ -155,7 +161,7 @@ async def upload_user_file(
id=file_id,
user_id=current_user.id,
name=root_filename,
path=f"{current_user.id}/{file_name}",
path=f"{current_user.id}/{stored_file_name}",
size=file_size,
)
session.add(new_file)

View file

@ -256,3 +256,268 @@ async def test_upload_list_delete_and_validate_files(files_client, files_created
assert file2["name"] in file_names
assert file2["id"] in file_ids
assert len(files) == 1
async def test_upload_files_with_same_name_creates_unique_names(files_client, files_created_api_key):
"""Test that uploading files with the same name creates unique filenames."""
headers = {"x-api-key": files_created_api_key.api_key}
# Upload first file
response1 = await files_client.post(
"api/v2/files",
files={"file": ("duplicate.txt", b"content1")},
headers=headers,
)
assert response1.status_code == 201
file1 = response1.json()
assert file1["name"] == "duplicate"
# Upload second file with same name
response2 = await files_client.post(
"api/v2/files",
files={"file": ("duplicate.txt", b"content2")},
headers=headers,
)
assert response2.status_code == 201
file2 = response2.json()
assert file2["name"] == "duplicate (1)"
# Upload third file with same name
response3 = await files_client.post(
"api/v2/files",
files={"file": ("duplicate.txt", b"content3")},
headers=headers,
)
assert response3.status_code == 201
file3 = response3.json()
assert file3["name"] == "duplicate (2)"
# Verify all files can be downloaded with their unique content
download1 = await files_client.get(f"api/v2/files/{file1['id']}", headers=headers)
assert download1.status_code == 200
assert download1.content == b"content1"
download2 = await files_client.get(f"api/v2/files/{file2['id']}", headers=headers)
assert download2.status_code == 200
assert download2.content == b"content2"
download3 = await files_client.get(f"api/v2/files/{file3['id']}", headers=headers)
assert download3.status_code == 200
assert download3.content == b"content3"
# List files and verify all three are present with unique names
response = await files_client.get("api/v2/files", headers=headers)
assert response.status_code == 200
files = response.json()
file_names = [f["name"] for f in files]
assert "duplicate" in file_names
assert "duplicate (1)" in file_names
assert "duplicate (2)" in file_names
assert len(files) == 3
async def test_upload_files_without_extension_creates_unique_names(files_client, files_created_api_key):
"""Test that uploading files without extensions also creates unique filenames."""
headers = {"x-api-key": files_created_api_key.api_key}
# Upload first file without extension
response1 = await files_client.post(
"api/v2/files",
files={"file": ("noextension", b"content1")},
headers=headers,
)
assert response1.status_code == 201
file1 = response1.json()
assert file1["name"] == "noextension"
# Upload second file with same name
response2 = await files_client.post(
"api/v2/files",
files={"file": ("noextension", b"content2")},
headers=headers,
)
assert response2.status_code == 201
file2 = response2.json()
assert file2["name"] == "noextension (1)"
# Verify both files can be downloaded
download1 = await files_client.get(f"api/v2/files/{file1['id']}", headers=headers)
assert download1.status_code == 200
assert download1.content == b"content1"
download2 = await files_client.get(f"api/v2/files/{file2['id']}", headers=headers)
assert download2.status_code == 200
assert download2.content == b"content2"
async def test_upload_files_with_different_extensions_same_name(files_client, files_created_api_key):
"""Test that files with same root name but different extensions create unique names."""
headers = {"x-api-key": files_created_api_key.api_key}
# Upload file with .txt extension
response1 = await files_client.post(
"api/v2/files",
files={"file": ("document.txt", b"text content")},
headers=headers,
)
assert response1.status_code == 201
file1 = response1.json()
assert file1["name"] == "document"
# Upload file with .md extension and same root name
response2 = await files_client.post(
"api/v2/files",
files={"file": ("document.md", b"markdown content")},
headers=headers,
)
assert response2.status_code == 201
file2 = response2.json()
assert file2["name"] == "document (1)"
# Upload another .txt file with same root name
response3 = await files_client.post(
"api/v2/files",
files={"file": ("document.txt", b"more text content")},
headers=headers,
)
assert response3.status_code == 201
file3 = response3.json()
assert file3["name"] == "document (2)"
async def test_mcp_servers_file_replacement(files_client, files_created_api_key):
"""Test that _mcp_servers file gets replaced instead of creating unique names."""
headers = {"x-api-key": files_created_api_key.api_key}
# Upload first _mcp_servers file
response1 = await files_client.post(
"api/v2/files",
files={"file": ("_mcp_servers.json", b'{"servers": ["server1"]}')},
headers=headers,
)
assert response1.status_code == 201
file1 = response1.json()
assert file1["name"] == "_mcp_servers"
# Upload second _mcp_servers file - should replace the first one
response2 = await files_client.post(
"api/v2/files",
files={"file": ("_mcp_servers.json", b'{"servers": ["server2"]}')},
headers=headers,
)
assert response2.status_code == 201
file2 = response2.json()
assert file2["name"] == "_mcp_servers"
# Note: _mcp_servers files are filtered out from the regular file list
# This is expected behavior since they're managed separately
response = await files_client.get("api/v2/files", headers=headers)
assert response.status_code == 200
files = response.json()
mcp_files = [f for f in files if f["name"] == "_mcp_servers"]
assert len(mcp_files) == 0 # MCP servers files are filtered out from regular list
# Verify the second file can be downloaded with the updated content
download2 = await files_client.get(f"api/v2/files/{file2['id']}", headers=headers)
assert download2.status_code == 200
assert download2.content == b'{"servers": ["server2"]}'
# Verify the first file no longer exists (should return 404)
download1 = await files_client.get(f"api/v2/files/{file1['id']}", headers=headers)
assert download1.status_code == 404
# Verify the file IDs are different (new file replaced old one)
assert file1["id"] != file2["id"]
async def test_unique_filename_counter_handles_gaps(files_client, files_created_api_key):
"""Test that the unique filename counter properly handles gaps in sequence."""
headers = {"x-api-key": files_created_api_key.api_key}
# Upload original file
response1 = await files_client.post(
"api/v2/files",
files={"file": ("gaptest.txt", b"content1")},
headers=headers,
)
assert response1.status_code == 201
file1 = response1.json()
assert file1["name"] == "gaptest"
# Upload second file (should be gaptest (1))
response2 = await files_client.post(
"api/v2/files",
files={"file": ("gaptest.txt", b"content2")},
headers=headers,
)
assert response2.status_code == 201
file2 = response2.json()
assert file2["name"] == "gaptest (1)"
# Upload third file (should be gaptest (2))
response3 = await files_client.post(
"api/v2/files",
files={"file": ("gaptest.txt", b"content3")},
headers=headers,
)
assert response3.status_code == 201
file3 = response3.json()
assert file3["name"] == "gaptest (2)"
# Delete the middle file (gaptest (1))
delete_response = await files_client.delete(f"api/v2/files/{file2['id']}", headers=headers)
assert delete_response.status_code == 200
# Upload another file - should be gaptest (3), not filling the gap
response4 = await files_client.post(
"api/v2/files",
files={"file": ("gaptest.txt", b"content4")},
headers=headers,
)
assert response4.status_code == 201
file4 = response4.json()
assert file4["name"] == "gaptest (3)"
# Verify final state
response = await files_client.get("api/v2/files", headers=headers)
assert response.status_code == 200
files = response.json()
file_names = [f["name"] for f in files]
assert "gaptest" in file_names
assert "gaptest (1)" not in file_names # deleted
assert "gaptest (2)" in file_names
assert "gaptest (3)" in file_names
assert len([name for name in file_names if name.startswith("gaptest")]) == 3
async def test_unique_filename_path_storage(files_client, files_created_api_key):
"""Test that files with unique names are stored with unique paths."""
headers = {"x-api-key": files_created_api_key.api_key}
# Upload two files with same name
response1 = await files_client.post(
"api/v2/files",
files={"file": ("pathtest.txt", b"path content 1")},
headers=headers,
)
assert response1.status_code == 201
file1 = response1.json()
response2 = await files_client.post(
"api/v2/files",
files={"file": ("pathtest.txt", b"path content 2")},
headers=headers,
)
assert response2.status_code == 201
file2 = response2.json()
# Verify both files have different paths and can be downloaded independently
assert file1["path"] != file2["path"]
download1 = await files_client.get(f"api/v2/files/{file1['id']}", headers=headers)
assert download1.status_code == 200
assert download1.content == b"path content 1"
download2 = await files_client.get(f"api/v2/files/{file2['id']}", headers=headers)
assert download2.status_code == 200
assert download2.content == b"path content 2"