Refactor text splitter components to use unescape_string utility
This commit is contained in:
parent
b8639977ae
commit
d777df3dbf
2 changed files with 7 additions and 4 deletions
|
|
@ -4,6 +4,7 @@ from langchain.text_splitter import CharacterTextSplitter
|
|||
|
||||
from langflow import CustomComponent
|
||||
from langflow.schema.schema import Record
|
||||
from langflow.utils.util import unescape_string
|
||||
|
||||
|
||||
class CharacterTextSplitterComponent(CustomComponent):
|
||||
|
|
@ -26,7 +27,7 @@ class CharacterTextSplitterComponent(CustomComponent):
|
|||
separator: str = "\n",
|
||||
) -> List[Record]:
|
||||
# separator may come escaped from the frontend
|
||||
separator = separator.encode().decode("unicode_escape")
|
||||
separator = unescape_string(separator)
|
||||
documents = []
|
||||
for _input in inputs:
|
||||
if isinstance(_input, Record):
|
||||
|
|
|
|||
|
|
@ -5,13 +5,15 @@ from langchain_core.documents import Document
|
|||
|
||||
from langflow import CustomComponent
|
||||
from langflow.schema import Record
|
||||
from langflow.utils.util import build_loader_repr_from_records
|
||||
from langflow.utils.util import build_loader_repr_from_records, unescape_string
|
||||
|
||||
|
||||
class RecursiveCharacterTextSplitterComponent(CustomComponent):
|
||||
display_name: str = "Recursive Character Text Splitter"
|
||||
description: str = "Split text into chunks of a specified length."
|
||||
documentation: str = "https://docs.langflow.org/components/text-splitters#recursivecharactertextsplitter"
|
||||
documentation: str = (
|
||||
"https://docs.langflow.org/components/text-splitters#recursivecharactertextsplitter"
|
||||
)
|
||||
|
||||
def build_config(self):
|
||||
return {
|
||||
|
|
@ -65,7 +67,7 @@ class RecursiveCharacterTextSplitterComponent(CustomComponent):
|
|||
elif separators:
|
||||
# check if the separators list has escaped characters
|
||||
# if there are escaped characters, unescape them
|
||||
separators = [x.encode().decode("unicode-escape") for x in separators]
|
||||
separators = [unescape_string(x) for x in separators]
|
||||
|
||||
# Make sure chunk_size and chunk_overlap are ints
|
||||
if isinstance(chunk_size, str):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue