Refactor text splitter components to use unescape_string utility

This commit is contained in:
Gabriel Luiz Freitas Almeida 2024-03-08 10:20:41 -03:00
commit d777df3dbf
2 changed files with 7 additions and 4 deletions

View file

@ -4,6 +4,7 @@ from langchain.text_splitter import CharacterTextSplitter
from langflow import CustomComponent
from langflow.schema.schema import Record
from langflow.utils.util import unescape_string
class CharacterTextSplitterComponent(CustomComponent):
@ -26,7 +27,7 @@ class CharacterTextSplitterComponent(CustomComponent):
separator: str = "\n",
) -> List[Record]:
# separator may come escaped from the frontend
separator = separator.encode().decode("unicode_escape")
separator = unescape_string(separator)
documents = []
for _input in inputs:
if isinstance(_input, Record):

View file

@ -5,13 +5,15 @@ from langchain_core.documents import Document
from langflow import CustomComponent
from langflow.schema import Record
from langflow.utils.util import build_loader_repr_from_records
from langflow.utils.util import build_loader_repr_from_records, unescape_string
class RecursiveCharacterTextSplitterComponent(CustomComponent):
display_name: str = "Recursive Character Text Splitter"
description: str = "Split text into chunks of a specified length."
documentation: str = "https://docs.langflow.org/components/text-splitters#recursivecharactertextsplitter"
documentation: str = (
"https://docs.langflow.org/components/text-splitters#recursivecharactertextsplitter"
)
def build_config(self):
return {
@ -65,7 +67,7 @@ class RecursiveCharacterTextSplitterComponent(CustomComponent):
elif separators:
# check if the separators list has escaped characters
# if there are escaped characters, unescape them
separators = [x.encode().decode("unicode-escape") for x in separators]
separators = [unescape_string(x) for x in separators]
# Make sure chunk_size and chunk_overlap are ints
if isinstance(chunk_size, str):