📝 docs(LanguageRecursiveTextSplitter.py): update documentation link to point to the correct URL
🔨 refactor(LanguageRecursiveTextSplitter.py): change type hints for the 'documents' parameter from Document to list[Document] and change return type from Document to list[Document] to improve clarity and consistency 🔨 refactor(LanguageRecursiveTextSplitter.py): change type hints for the 'chunk_overlap' parameter from Optional[int] to Optional[str] and handle conversion to int to improve flexibility and error handling 🔨 refactor(LanguageRecursiveTextSplitter.py): change type hints for the 'chunk_size' parameter from Optional[int] to Optional[str] and handle conversion to int to improve flexibility and error handling 🔨 refactor(RecursiveCharacterTextSplitter.py): change type hints for the 'documents' parameter from Document to list[Document] and change return type from Document to list[Document] to improve clarity and consistency 🔨 refactor(RecursiveCharacterTextSplitter.py): change type hints for the 'separators' parameter from Optional[str] to Optional[list[str]] and handle conversion to list of escaped characters to improve flexibility and error handling 🔨 refactor(RecursiveCharacterTextSplitter.py): change type hints for the 'chunk_overlap' parameter from Optional[int] to Optional[str] and handle conversion to int to improve flexibility and error handling 🔨 refactor(RecursiveCharacterTextSplitter.py): change type hints for the 'chunk_size' parameter from Optional[int] to Optional[str] and handle conversion to int to improve flexibility and error handling
This commit is contained in:
parent
c9f4969080
commit
a28df1f08f
2 changed files with 11 additions and 18 deletions
|
|
@ -8,7 +8,7 @@ from langflow.utils.util import build_loader_repr_from_documents
|
|||
class LanguageRecursiveTextSplitterComponent(CustomComponent):
|
||||
display_name: str = "Language Recursive Text Splitter"
|
||||
description: str = "Split text into chunks of a specified length based on language."
|
||||
documentation: str = "https://docs.langflow.org/components/text-splitters#recursivecharactertextsplitter"
|
||||
documentation: str = "https://docs.langflow.org/components/text-splitters#languagerecursivetextsplitter"
|
||||
|
||||
def build_config(self):
|
||||
options = [x.value for x in Language]
|
||||
|
|
@ -46,11 +46,11 @@ class LanguageRecursiveTextSplitterComponent(CustomComponent):
|
|||
|
||||
def build(
|
||||
self,
|
||||
documents: Document,
|
||||
documents: list[Document],
|
||||
chunk_size: Optional[int] = 1000,
|
||||
chunk_overlap: Optional[int] = 200,
|
||||
separator_type: Optional[str] = "Python",
|
||||
) -> Document:
|
||||
) -> list[Document]:
|
||||
"""
|
||||
Split text into chunks of a specified length.
|
||||
|
||||
|
|
@ -66,14 +66,11 @@ class LanguageRecursiveTextSplitterComponent(CustomComponent):
|
|||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
|
||||
# Make sure chunk_size and chunk_overlap are ints
|
||||
try:
|
||||
if isinstance(chunk_size, str):
|
||||
chunk_size = int(chunk_size)
|
||||
if isinstance(chunk_overlap, str):
|
||||
chunk_overlap = int(chunk_overlap)
|
||||
except Exception as e:
|
||||
raise ValueError(
|
||||
"chunk_size and chunk_overlap must be integers."
|
||||
" Received chunk_size={chunk_size} and chunk_overlap={chunk_overlap}."
|
||||
) from e
|
||||
|
||||
splitter = RecursiveCharacterTextSplitter.from_language(
|
||||
language=Language(separator_type),
|
||||
chunk_size=chunk_size,
|
||||
|
|
|
|||
|
|
@ -36,11 +36,11 @@ class RecursiveCharacterTextSplitterComponent(CustomComponent):
|
|||
|
||||
def build(
|
||||
self,
|
||||
documents: Document,
|
||||
separators: Optional[str] = None,
|
||||
documents: list[Document],
|
||||
separators: Optional[list[str]] = None,
|
||||
chunk_size: Optional[int] = 1000,
|
||||
chunk_overlap: Optional[int] = 200,
|
||||
) -> Document:
|
||||
) -> list[Document]:
|
||||
"""
|
||||
Split text into chunks of a specified length.
|
||||
|
||||
|
|
@ -63,14 +63,10 @@ class RecursiveCharacterTextSplitterComponent(CustomComponent):
|
|||
separators = [x.encode().decode("unicode-escape") for x in separators]
|
||||
|
||||
# Make sure chunk_size and chunk_overlap are ints
|
||||
try:
|
||||
if isinstance(chunk_size, str):
|
||||
chunk_size = int(chunk_size)
|
||||
if isinstance(chunk_overlap, str):
|
||||
chunk_overlap = int(chunk_overlap)
|
||||
except Exception as e:
|
||||
raise ValueError(
|
||||
"chunk_size and chunk_overlap must be integers."
|
||||
" Received chunk_size={chunk_size} and chunk_overlap={chunk_overlap}."
|
||||
) from e
|
||||
splitter = RecursiveCharacterTextSplitter(
|
||||
separators=separators,
|
||||
chunk_size=chunk_size,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue