From d8643d13846014bdf6c599c89bca43c6dd7854db Mon Sep 17 00:00:00 2001 From: Jackie Moo Date: Wed, 5 Jun 2024 19:47:40 +0800 Subject: [PATCH] Correctly Handle Text File Encoding to Avoid 'GBK' Codec Errors (#2072) --- src/backend/base/langflow/base/data/utils.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/backend/base/langflow/base/data/utils.py b/src/backend/base/langflow/base/data/utils.py index 7026d4968..2aaf3b23d 100644 --- a/src/backend/base/langflow/base/data/utils.py +++ b/src/backend/base/langflow/base/data/utils.py @@ -3,7 +3,7 @@ import xml.etree.ElementTree as ET from concurrent import futures from pathlib import Path from typing import Callable, List, Optional, Text - +import chardet import yaml from langflow.schema.schema import Record @@ -89,7 +89,12 @@ def retrieve_file_paths( def read_text_file(file_path: str) -> str: - with open(file_path, "r") as f: + with open(file_path, "rb") as f: + raw_data = f.read() + result = chardet.detect(raw_data) + encoding = result['encoding'] + + with open(file_path, "r", encoding=encoding) as f: return f.read()