From 2fec8437b8b6f926317a4f9f26e200168a3ec83b Mon Sep 17 00:00:00 2001 From: Lucas Oliveira <62335616+lucaseduoli@users.noreply.github.com> Date: Thu, 10 Jul 2025 08:42:03 -0300 Subject: [PATCH] fix: make files be saved in unique path (#8965) Co-authored-by: Gabriel Luiz Freitas Almeida --- src/backend/base/langflow/api/v2/files.py | 26 +- src/backend/tests/unit/api/v2/test_files.py | 265 ++++++++++++++++++++ 2 files changed, 281 insertions(+), 10 deletions(-) diff --git a/src/backend/base/langflow/api/v2/files.py b/src/backend/base/langflow/api/v2/files.py index dbc62229d..3777a5da6 100644 --- a/src/backend/base/langflow/api/v2/files.py +++ b/src/backend/base/langflow/api/v2/files.py @@ -105,20 +105,14 @@ async def upload_user_file( detail=f"File size is larger than the maximum file size {max_file_size_upload}MB.", ) - # Read file content and create a unique file name - try: - file_id, file_name = await save_file_routine(file, storage_service, current_user) - except Exception as e: - raise HTTPException(status_code=500, detail=f"Error saving file: {e}") from e - # Create a new database record for the uploaded file. try: # Enforce unique constraint on name, except for the special _mcp_servers file new_filename = file.filename try: - root_filename, _ = new_filename.rsplit(".", 1) + root_filename, file_extension = new_filename.rsplit(".", 1) except ValueError: - root_filename, _ = new_filename, "" + root_filename, file_extension = new_filename, "" # Special handling for the MCP servers config file: always keep the same root filename if root_filename == MCP_SERVERS_FILE: @@ -126,6 +120,7 @@ async def upload_user_file( existing_mcp_file = await get_file_by_name(root_filename, current_user, session) if existing_mcp_file: await delete_file(existing_mcp_file.id, current_user, session, storage_service) + unique_filename = new_filename else: # For normal files, ensure unique name by appending a count if necessary stmt = select(UserFile).where(col(UserFile.name).like(f"{root_filename}%")) @@ -144,10 +139,21 @@ async def upload_user_file( count = max(counts) if counts else 0 root_filename = f"{root_filename} ({count + 1})" + # Create the unique filename with extension for storage + unique_filename = f"{root_filename}.{file_extension}" if file_extension else root_filename + + # Read file content and save with unique filename + try: + file_id, stored_file_name = await save_file_routine( + file, storage_service, current_user, file_name=unique_filename + ) + except Exception as e: + raise HTTPException(status_code=500, detail=f"Error saving file: {e}") from e + # Compute the file size based on the path file_size = await storage_service.get_file_size( flow_id=str(current_user.id), - file_name=file_name, + file_name=stored_file_name, ) # Create a new file record @@ -155,7 +161,7 @@ async def upload_user_file( id=file_id, user_id=current_user.id, name=root_filename, - path=f"{current_user.id}/{file_name}", + path=f"{current_user.id}/{stored_file_name}", size=file_size, ) session.add(new_file) diff --git a/src/backend/tests/unit/api/v2/test_files.py b/src/backend/tests/unit/api/v2/test_files.py index 128212cd4..e44c4c0cd 100644 --- a/src/backend/tests/unit/api/v2/test_files.py +++ b/src/backend/tests/unit/api/v2/test_files.py @@ -256,3 +256,268 @@ async def test_upload_list_delete_and_validate_files(files_client, files_created assert file2["name"] in file_names assert file2["id"] in file_ids assert len(files) == 1 + + +async def test_upload_files_with_same_name_creates_unique_names(files_client, files_created_api_key): + """Test that uploading files with the same name creates unique filenames.""" + headers = {"x-api-key": files_created_api_key.api_key} + + # Upload first file + response1 = await files_client.post( + "api/v2/files", + files={"file": ("duplicate.txt", b"content1")}, + headers=headers, + ) + assert response1.status_code == 201 + file1 = response1.json() + assert file1["name"] == "duplicate" + + # Upload second file with same name + response2 = await files_client.post( + "api/v2/files", + files={"file": ("duplicate.txt", b"content2")}, + headers=headers, + ) + assert response2.status_code == 201 + file2 = response2.json() + assert file2["name"] == "duplicate (1)" + + # Upload third file with same name + response3 = await files_client.post( + "api/v2/files", + files={"file": ("duplicate.txt", b"content3")}, + headers=headers, + ) + assert response3.status_code == 201 + file3 = response3.json() + assert file3["name"] == "duplicate (2)" + + # Verify all files can be downloaded with their unique content + download1 = await files_client.get(f"api/v2/files/{file1['id']}", headers=headers) + assert download1.status_code == 200 + assert download1.content == b"content1" + + download2 = await files_client.get(f"api/v2/files/{file2['id']}", headers=headers) + assert download2.status_code == 200 + assert download2.content == b"content2" + + download3 = await files_client.get(f"api/v2/files/{file3['id']}", headers=headers) + assert download3.status_code == 200 + assert download3.content == b"content3" + + # List files and verify all three are present with unique names + response = await files_client.get("api/v2/files", headers=headers) + assert response.status_code == 200 + files = response.json() + file_names = [f["name"] for f in files] + assert "duplicate" in file_names + assert "duplicate (1)" in file_names + assert "duplicate (2)" in file_names + assert len(files) == 3 + + +async def test_upload_files_without_extension_creates_unique_names(files_client, files_created_api_key): + """Test that uploading files without extensions also creates unique filenames.""" + headers = {"x-api-key": files_created_api_key.api_key} + + # Upload first file without extension + response1 = await files_client.post( + "api/v2/files", + files={"file": ("noextension", b"content1")}, + headers=headers, + ) + assert response1.status_code == 201 + file1 = response1.json() + assert file1["name"] == "noextension" + + # Upload second file with same name + response2 = await files_client.post( + "api/v2/files", + files={"file": ("noextension", b"content2")}, + headers=headers, + ) + assert response2.status_code == 201 + file2 = response2.json() + assert file2["name"] == "noextension (1)" + + # Verify both files can be downloaded + download1 = await files_client.get(f"api/v2/files/{file1['id']}", headers=headers) + assert download1.status_code == 200 + assert download1.content == b"content1" + + download2 = await files_client.get(f"api/v2/files/{file2['id']}", headers=headers) + assert download2.status_code == 200 + assert download2.content == b"content2" + + +async def test_upload_files_with_different_extensions_same_name(files_client, files_created_api_key): + """Test that files with same root name but different extensions create unique names.""" + headers = {"x-api-key": files_created_api_key.api_key} + + # Upload file with .txt extension + response1 = await files_client.post( + "api/v2/files", + files={"file": ("document.txt", b"text content")}, + headers=headers, + ) + assert response1.status_code == 201 + file1 = response1.json() + assert file1["name"] == "document" + + # Upload file with .md extension and same root name + response2 = await files_client.post( + "api/v2/files", + files={"file": ("document.md", b"markdown content")}, + headers=headers, + ) + assert response2.status_code == 201 + file2 = response2.json() + assert file2["name"] == "document (1)" + + # Upload another .txt file with same root name + response3 = await files_client.post( + "api/v2/files", + files={"file": ("document.txt", b"more text content")}, + headers=headers, + ) + assert response3.status_code == 201 + file3 = response3.json() + assert file3["name"] == "document (2)" + + +async def test_mcp_servers_file_replacement(files_client, files_created_api_key): + """Test that _mcp_servers file gets replaced instead of creating unique names.""" + headers = {"x-api-key": files_created_api_key.api_key} + + # Upload first _mcp_servers file + response1 = await files_client.post( + "api/v2/files", + files={"file": ("_mcp_servers.json", b'{"servers": ["server1"]}')}, + headers=headers, + ) + assert response1.status_code == 201 + file1 = response1.json() + assert file1["name"] == "_mcp_servers" + + # Upload second _mcp_servers file - should replace the first one + response2 = await files_client.post( + "api/v2/files", + files={"file": ("_mcp_servers.json", b'{"servers": ["server2"]}')}, + headers=headers, + ) + assert response2.status_code == 201 + file2 = response2.json() + assert file2["name"] == "_mcp_servers" + + # Note: _mcp_servers files are filtered out from the regular file list + # This is expected behavior since they're managed separately + response = await files_client.get("api/v2/files", headers=headers) + assert response.status_code == 200 + files = response.json() + mcp_files = [f for f in files if f["name"] == "_mcp_servers"] + assert len(mcp_files) == 0 # MCP servers files are filtered out from regular list + + # Verify the second file can be downloaded with the updated content + download2 = await files_client.get(f"api/v2/files/{file2['id']}", headers=headers) + assert download2.status_code == 200 + assert download2.content == b'{"servers": ["server2"]}' + + # Verify the first file no longer exists (should return 404) + download1 = await files_client.get(f"api/v2/files/{file1['id']}", headers=headers) + assert download1.status_code == 404 + + # Verify the file IDs are different (new file replaced old one) + assert file1["id"] != file2["id"] + + +async def test_unique_filename_counter_handles_gaps(files_client, files_created_api_key): + """Test that the unique filename counter properly handles gaps in sequence.""" + headers = {"x-api-key": files_created_api_key.api_key} + + # Upload original file + response1 = await files_client.post( + "api/v2/files", + files={"file": ("gaptest.txt", b"content1")}, + headers=headers, + ) + assert response1.status_code == 201 + file1 = response1.json() + assert file1["name"] == "gaptest" + + # Upload second file (should be gaptest (1)) + response2 = await files_client.post( + "api/v2/files", + files={"file": ("gaptest.txt", b"content2")}, + headers=headers, + ) + assert response2.status_code == 201 + file2 = response2.json() + assert file2["name"] == "gaptest (1)" + + # Upload third file (should be gaptest (2)) + response3 = await files_client.post( + "api/v2/files", + files={"file": ("gaptest.txt", b"content3")}, + headers=headers, + ) + assert response3.status_code == 201 + file3 = response3.json() + assert file3["name"] == "gaptest (2)" + + # Delete the middle file (gaptest (1)) + delete_response = await files_client.delete(f"api/v2/files/{file2['id']}", headers=headers) + assert delete_response.status_code == 200 + + # Upload another file - should be gaptest (3), not filling the gap + response4 = await files_client.post( + "api/v2/files", + files={"file": ("gaptest.txt", b"content4")}, + headers=headers, + ) + assert response4.status_code == 201 + file4 = response4.json() + assert file4["name"] == "gaptest (3)" + + # Verify final state + response = await files_client.get("api/v2/files", headers=headers) + assert response.status_code == 200 + files = response.json() + file_names = [f["name"] for f in files] + assert "gaptest" in file_names + assert "gaptest (1)" not in file_names # deleted + assert "gaptest (2)" in file_names + assert "gaptest (3)" in file_names + assert len([name for name in file_names if name.startswith("gaptest")]) == 3 + + +async def test_unique_filename_path_storage(files_client, files_created_api_key): + """Test that files with unique names are stored with unique paths.""" + headers = {"x-api-key": files_created_api_key.api_key} + + # Upload two files with same name + response1 = await files_client.post( + "api/v2/files", + files={"file": ("pathtest.txt", b"path content 1")}, + headers=headers, + ) + assert response1.status_code == 201 + file1 = response1.json() + + response2 = await files_client.post( + "api/v2/files", + files={"file": ("pathtest.txt", b"path content 2")}, + headers=headers, + ) + assert response2.status_code == 201 + file2 = response2.json() + + # Verify both files have different paths and can be downloaded independently + assert file1["path"] != file2["path"] + + download1 = await files_client.get(f"api/v2/files/{file1['id']}", headers=headers) + assert download1.status_code == 200 + assert download1.content == b"path content 1" + + download2 = await files_client.get(f"api/v2/files/{file2['id']}", headers=headers) + assert download2.status_code == 200 + assert download2.content == b"path content 2"