From a28df1f08fe5d84f2687d2b6a9baec6ab755fd32 Mon Sep 17 00:00:00 2001 From: Gabriel Luiz Freitas Almeida Date: Thu, 24 Aug 2023 22:52:16 -0300 Subject: [PATCH] =?UTF-8?q?=F0=9F=93=9D=20docs(LanguageRecursiveTextSplitt?= =?UTF-8?q?er.py):=20update=20documentation=20link=20to=20point=20to=20the?= =?UTF-8?q?=20correct=20URL=20=F0=9F=94=A8=20refactor(LanguageRecursiveTex?= =?UTF-8?q?tSplitter.py):=20change=20type=20hints=20for=20the=20'documents?= =?UTF-8?q?'=20parameter=20from=20Document=20to=20list[Document]=20and=20c?= =?UTF-8?q?hange=20return=20type=20from=20Document=20to=20list[Document]?= =?UTF-8?q?=20to=20improve=20clarity=20and=20consistency=20=F0=9F=94=A8=20?= =?UTF-8?q?refactor(LanguageRecursiveTextSplitter.py):=20change=20type=20h?= =?UTF-8?q?ints=20for=20the=20'chunk=5Foverlap'=20parameter=20from=20Optio?= =?UTF-8?q?nal[int]=20to=20Optional[str]=20and=20handle=20conversion=20to?= =?UTF-8?q?=20int=20to=20improve=20flexibility=20and=20error=20handling=20?= =?UTF-8?q?=F0=9F=94=A8=20refactor(LanguageRecursiveTextSplitter.py):=20ch?= =?UTF-8?q?ange=20type=20hints=20for=20the=20'chunk=5Fsize'=20parameter=20?= =?UTF-8?q?from=20Optional[int]=20to=20Optional[str]=20and=20handle=20conv?= =?UTF-8?q?ersion=20to=20int=20to=20improve=20flexibility=20and=20error=20?= =?UTF-8?q?handling=20=F0=9F=94=A8=20refactor(RecursiveCharacterTextSplitt?= =?UTF-8?q?er.py):=20change=20type=20hints=20for=20the=20'documents'=20par?= =?UTF-8?q?ameter=20from=20Document=20to=20list[Document]=20and=20change?= =?UTF-8?q?=20return=20type=20from=20Document=20to=20list[Document]=20to?= =?UTF-8?q?=20improve=20clarity=20and=20consistency=20=F0=9F=94=A8=20refac?= =?UTF-8?q?tor(RecursiveCharacterTextSplitter.py):=20change=20type=20hints?= =?UTF-8?q?=20for=20the=20'separators'=20parameter=20from=20Optional[str]?= =?UTF-8?q?=20to=20Optional[list[str]]=20and=20handle=20conversion=20to=20?= =?UTF-8?q?list=20of=20escaped=20characters=20to=20improve=20flexibility?= =?UTF-8?q?=20and=20error=20handling=20=F0=9F=94=A8=20refactor(RecursiveCh?= =?UTF-8?q?aracterTextSplitter.py):=20change=20type=20hints=20for=20the=20?= =?UTF-8?q?'chunk=5Foverlap'=20parameter=20from=20Optional[int]=20to=20Opt?= =?UTF-8?q?ional[str]=20and=20handle=20conversion=20to=20int=20to=20improv?= =?UTF-8?q?e=20flexibility=20and=20error=20handling=20=F0=9F=94=A8=20refac?= =?UTF-8?q?tor(RecursiveCharacterTextSplitter.py):=20change=20type=20hints?= =?UTF-8?q?=20for=20the=20'chunk=5Fsize'=20parameter=20from=20Optional[int?= =?UTF-8?q?]=20to=20Optional[str]=20and=20handle=20conversion=20to=20int?= =?UTF-8?q?=20to=20improve=20flexibility=20and=20error=20handling?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../LanguageRecursiveTextSplitter.py | 15 ++++++--------- .../RecursiveCharacterTextSplitter.py | 14 +++++--------- 2 files changed, 11 insertions(+), 18 deletions(-) diff --git a/src/backend/langflow/components/textsplitters/LanguageRecursiveTextSplitter.py b/src/backend/langflow/components/textsplitters/LanguageRecursiveTextSplitter.py index 6b4373971..da7c0dd73 100644 --- a/src/backend/langflow/components/textsplitters/LanguageRecursiveTextSplitter.py +++ b/src/backend/langflow/components/textsplitters/LanguageRecursiveTextSplitter.py @@ -8,7 +8,7 @@ from langflow.utils.util import build_loader_repr_from_documents class LanguageRecursiveTextSplitterComponent(CustomComponent): display_name: str = "Language Recursive Text Splitter" description: str = "Split text into chunks of a specified length based on language." - documentation: str = "https://docs.langflow.org/components/text-splitters#recursivecharactertextsplitter" + documentation: str = "https://docs.langflow.org/components/text-splitters#languagerecursivetextsplitter" def build_config(self): options = [x.value for x in Language] @@ -46,11 +46,11 @@ class LanguageRecursiveTextSplitterComponent(CustomComponent): def build( self, - documents: Document, + documents: list[Document], chunk_size: Optional[int] = 1000, chunk_overlap: Optional[int] = 200, separator_type: Optional[str] = "Python", - ) -> Document: + ) -> list[Document]: """ Split text into chunks of a specified length. @@ -66,14 +66,11 @@ class LanguageRecursiveTextSplitterComponent(CustomComponent): from langchain.text_splitter import RecursiveCharacterTextSplitter # Make sure chunk_size and chunk_overlap are ints - try: + if isinstance(chunk_size, str): chunk_size = int(chunk_size) + if isinstance(chunk_overlap, str): chunk_overlap = int(chunk_overlap) - except Exception as e: - raise ValueError( - "chunk_size and chunk_overlap must be integers." - " Received chunk_size={chunk_size} and chunk_overlap={chunk_overlap}." - ) from e + splitter = RecursiveCharacterTextSplitter.from_language( language=Language(separator_type), chunk_size=chunk_size, diff --git a/src/backend/langflow/components/textsplitters/RecursiveCharacterTextSplitter.py b/src/backend/langflow/components/textsplitters/RecursiveCharacterTextSplitter.py index 3b1f70815..58b061f2f 100644 --- a/src/backend/langflow/components/textsplitters/RecursiveCharacterTextSplitter.py +++ b/src/backend/langflow/components/textsplitters/RecursiveCharacterTextSplitter.py @@ -36,11 +36,11 @@ class RecursiveCharacterTextSplitterComponent(CustomComponent): def build( self, - documents: Document, - separators: Optional[str] = None, + documents: list[Document], + separators: Optional[list[str]] = None, chunk_size: Optional[int] = 1000, chunk_overlap: Optional[int] = 200, - ) -> Document: + ) -> list[Document]: """ Split text into chunks of a specified length. @@ -63,14 +63,10 @@ class RecursiveCharacterTextSplitterComponent(CustomComponent): separators = [x.encode().decode("unicode-escape") for x in separators] # Make sure chunk_size and chunk_overlap are ints - try: + if isinstance(chunk_size, str): chunk_size = int(chunk_size) + if isinstance(chunk_overlap, str): chunk_overlap = int(chunk_overlap) - except Exception as e: - raise ValueError( - "chunk_size and chunk_overlap must be integers." - " Received chunk_size={chunk_size} and chunk_overlap={chunk_overlap}." - ) from e splitter = RecursiveCharacterTextSplitter( separators=separators, chunk_size=chunk_size,