diff --git a/src/backend/langflow/api/v1/endpoints.py b/src/backend/langflow/api/v1/endpoints.py index 10f2f4692..a8e5e5b76 100644 --- a/src/backend/langflow/api/v1/endpoints.py +++ b/src/backend/langflow/api/v1/endpoints.py @@ -62,7 +62,7 @@ async def predict_flow( async def create_upload_file(file: UploadFile, flow_id: str): # Cache file try: - file_path = save_uploaded_file(file.file, file_name=flow_id) + file_path = save_uploaded_file(file.file, folder_name=flow_id) return UploadFileResponse( flowId=flow_id, diff --git a/src/backend/langflow/cache/utils.py b/src/backend/langflow/cache/utils.py index 3fa95a3d6..3e456c3d4 100644 --- a/src/backend/langflow/cache/utils.py +++ b/src/backend/langflow/cache/utils.py @@ -8,15 +8,17 @@ import tempfile from collections import OrderedDict from pathlib import Path from typing import Any, Dict - +from appdirs import user_cache_dir CACHE: Dict[str, Any] = {} +CACHE_DIR = user_cache_dir("langflow", "langflow") + def create_cache_folder(func): def wrapper(*args, **kwargs): # Get the destination folder - cache_path = Path(tempfile.gettempdir()) / PREFIX + cache_path = Path(CACHE_DIR) / PREFIX # Create the destination folder if it doesn't exist os.makedirs(cache_path, exist_ok=True) @@ -118,7 +120,7 @@ def save_binary_file(content: str, file_name: str, accepted_types: list[str]) -> raise ValueError(f"File {file_name} is not accepted") # Get the destination folder - cache_path = Path(tempfile.gettempdir()) / PREFIX + cache_path = Path(CACHE_DIR) / PREFIX if not content: raise ValueError("Please, reload the file in the loader.") data = content.split(",")[1] @@ -135,23 +137,44 @@ def save_binary_file(content: str, file_name: str, accepted_types: list[str]) -> @create_cache_folder -def save_uploaded_file(file, file_name): +def save_uploaded_file(file, folder_name): """ - Save an uploaded file to the specified folder. + Save an uploaded file to the specified folder with a hash of its content as the file name. Args: file: The uploaded file object. - file_name: The name of the file, including its extension. + folder_name: The name of the folder to save the file in. Returns: The path to the saved file. """ - cache_path = Path(tempfile.gettempdir()) / PREFIX - file_path = cache_path / file_name + cache_path = Path(CACHE_DIR) + folder_path = cache_path / folder_name + # Create the folder if it doesn't exist + if not folder_path.exists(): + folder_path.mkdir() + + # Create a hash of the file content + sha256_hash = hashlib.sha256() + # Reset the file cursor to the beginning of the file + file.seek(0) + # Iterate over the uploaded file in small chunks to conserve memory + while chunk := file.read(8192): # Read 8KB at a time (adjust as needed) + sha256_hash.update(chunk) + + # Use the hex digest of the hash as the file name + hex_dig = sha256_hash.hexdigest() + file_extension = Path(file.filename).suffix + file_name = f"{hex_dig}{file_extension}" + + # Reset the file cursor to the beginning of the file + file.seek(0) + + # Save the file with the hash as its name + file_path = folder_path / file_name with open(file_path, "wb") as new_file: - # Iterate over the uploaded file in small chunks to conserve memory - while chunk := file.read(8192): # Read 8KB at a time (adjust as needed) + while chunk := file.read(8192): new_file.write(chunk) return file_path diff --git a/src/backend/langflow/interface/loading.py b/src/backend/langflow/interface/loading.py index af1b51678..a765d3b9b 100644 --- a/src/backend/langflow/interface/loading.py +++ b/src/backend/langflow/interface/loading.py @@ -11,7 +11,6 @@ from langchain.agents.load_tools import ( _EXTRA_OPTIONAL_TOOLS, _LLM_TOOLS, ) -from pathlib import Path from langchain.agents.loading import load_agent_from_config from langchain.agents.tools import Tool from langchain.base_language import BaseLanguageModel @@ -171,13 +170,7 @@ def instantiate_vectorstore(class_object, params): def instantiate_documentloader(class_object, params): - documents = class_object(**params).load() - # now that the file is loaded, we can remove the path - for value in params.values(): - path = Path(value) - if path.exists(): - path.unlink() - return documents + return class_object(**params).load() def instantiate_textsplitter(class_object, params):