🔒 chore(loading.py): remove code that deletes files after loading in instantiate_documentloader function
The save_uploaded_file function now uses the folder_name parameter instead of file_name to improve semantics. The appdirs library is now used to get the user cache directory and a folder for langflow cache is created. The sha256 hash of the file content is now used as the file name to avoid collisions and improve security. A folder is now created for each flow_id in the save_uploaded_file function. The code that deletes files after loading in the instantiate_documentloader function has been removed as it is unnecessary and can cause issues. 🐛 fix(endpoints.py): change file_name parameter to folder_name in save_uploaded_file function 🔒 chore(utils.py): use appdirs to get user cache directory and create a folder for langflow cache 🔒 chore(utils.py): use sha256 hash of file content as file name to avoid collisions and improve security 🔒 chore(utils.py): create folder for each flow_id in save_uploaded_file function
This commit is contained in:
parent
59deed4009
commit
bdd2076deb
3 changed files with 35 additions and 19 deletions
|
|
@ -62,7 +62,7 @@ async def predict_flow(
|
|||
async def create_upload_file(file: UploadFile, flow_id: str):
|
||||
# Cache file
|
||||
try:
|
||||
file_path = save_uploaded_file(file.file, file_name=flow_id)
|
||||
file_path = save_uploaded_file(file.file, folder_name=flow_id)
|
||||
|
||||
return UploadFileResponse(
|
||||
flowId=flow_id,
|
||||
|
|
|
|||
43
src/backend/langflow/cache/utils.py
vendored
43
src/backend/langflow/cache/utils.py
vendored
|
|
@ -8,15 +8,17 @@ import tempfile
|
|||
from collections import OrderedDict
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict
|
||||
|
||||
from appdirs import user_cache_dir
|
||||
|
||||
CACHE: Dict[str, Any] = {}
|
||||
|
||||
CACHE_DIR = user_cache_dir("langflow", "langflow")
|
||||
|
||||
|
||||
def create_cache_folder(func):
|
||||
def wrapper(*args, **kwargs):
|
||||
# Get the destination folder
|
||||
cache_path = Path(tempfile.gettempdir()) / PREFIX
|
||||
cache_path = Path(CACHE_DIR) / PREFIX
|
||||
|
||||
# Create the destination folder if it doesn't exist
|
||||
os.makedirs(cache_path, exist_ok=True)
|
||||
|
|
@ -118,7 +120,7 @@ def save_binary_file(content: str, file_name: str, accepted_types: list[str]) ->
|
|||
raise ValueError(f"File {file_name} is not accepted")
|
||||
|
||||
# Get the destination folder
|
||||
cache_path = Path(tempfile.gettempdir()) / PREFIX
|
||||
cache_path = Path(CACHE_DIR) / PREFIX
|
||||
if not content:
|
||||
raise ValueError("Please, reload the file in the loader.")
|
||||
data = content.split(",")[1]
|
||||
|
|
@ -135,23 +137,44 @@ def save_binary_file(content: str, file_name: str, accepted_types: list[str]) ->
|
|||
|
||||
|
||||
@create_cache_folder
|
||||
def save_uploaded_file(file, file_name):
|
||||
def save_uploaded_file(file, folder_name):
|
||||
"""
|
||||
Save an uploaded file to the specified folder.
|
||||
Save an uploaded file to the specified folder with a hash of its content as the file name.
|
||||
|
||||
Args:
|
||||
file: The uploaded file object.
|
||||
file_name: The name of the file, including its extension.
|
||||
folder_name: The name of the folder to save the file in.
|
||||
|
||||
Returns:
|
||||
The path to the saved file.
|
||||
"""
|
||||
cache_path = Path(tempfile.gettempdir()) / PREFIX
|
||||
file_path = cache_path / file_name
|
||||
cache_path = Path(CACHE_DIR)
|
||||
folder_path = cache_path / folder_name
|
||||
|
||||
# Create the folder if it doesn't exist
|
||||
if not folder_path.exists():
|
||||
folder_path.mkdir()
|
||||
|
||||
# Create a hash of the file content
|
||||
sha256_hash = hashlib.sha256()
|
||||
# Reset the file cursor to the beginning of the file
|
||||
file.seek(0)
|
||||
# Iterate over the uploaded file in small chunks to conserve memory
|
||||
while chunk := file.read(8192): # Read 8KB at a time (adjust as needed)
|
||||
sha256_hash.update(chunk)
|
||||
|
||||
# Use the hex digest of the hash as the file name
|
||||
hex_dig = sha256_hash.hexdigest()
|
||||
file_extension = Path(file.filename).suffix
|
||||
file_name = f"{hex_dig}{file_extension}"
|
||||
|
||||
# Reset the file cursor to the beginning of the file
|
||||
file.seek(0)
|
||||
|
||||
# Save the file with the hash as its name
|
||||
file_path = folder_path / file_name
|
||||
with open(file_path, "wb") as new_file:
|
||||
# Iterate over the uploaded file in small chunks to conserve memory
|
||||
while chunk := file.read(8192): # Read 8KB at a time (adjust as needed)
|
||||
while chunk := file.read(8192):
|
||||
new_file.write(chunk)
|
||||
|
||||
return file_path
|
||||
|
|
|
|||
|
|
@ -11,7 +11,6 @@ from langchain.agents.load_tools import (
|
|||
_EXTRA_OPTIONAL_TOOLS,
|
||||
_LLM_TOOLS,
|
||||
)
|
||||
from pathlib import Path
|
||||
from langchain.agents.loading import load_agent_from_config
|
||||
from langchain.agents.tools import Tool
|
||||
from langchain.base_language import BaseLanguageModel
|
||||
|
|
@ -171,13 +170,7 @@ def instantiate_vectorstore(class_object, params):
|
|||
|
||||
|
||||
def instantiate_documentloader(class_object, params):
|
||||
documents = class_object(**params).load()
|
||||
# now that the file is loaded, we can remove the path
|
||||
for value in params.values():
|
||||
path = Path(value)
|
||||
if path.exists():
|
||||
path.unlink()
|
||||
return documents
|
||||
return class_object(**params).load()
|
||||
|
||||
|
||||
def instantiate_textsplitter(class_object, params):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue