Correctly Handle Text File Encoding to Avoid 'GBK' Codec Errors (#2072)

This commit is contained in:
Jackie Moo 2024-06-05 19:47:40 +08:00 committed by GitHub
commit d8643d1384
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -3,7 +3,7 @@ import xml.etree.ElementTree as ET
from concurrent import futures
from pathlib import Path
from typing import Callable, List, Optional, Text
import chardet
import yaml
from langflow.schema.schema import Record
@ -89,7 +89,12 @@ def retrieve_file_paths(
def read_text_file(file_path: str) -> str:
with open(file_path, "r") as f:
with open(file_path, "rb") as f:
raw_data = f.read()
result = chardet.detect(raw_data)
encoding = result['encoding']
with open(file_path, "r", encoding=encoding) as f:
return f.read()