chore: Update RecursiveCharacterTextSplitterComponent method name to split_data
This commit is contained in:
parent
8d25605de4
commit
2cecfb009f
3 changed files with 4 additions and 4 deletions
2
docs/static/data/AstraDB-RAG-Flows.json
vendored
2
docs/static/data/AstraDB-RAG-Flows.json
vendored
|
|
@ -1902,7 +1902,7 @@
|
|||
"show": true,
|
||||
"title_case": false,
|
||||
"type": "code",
|
||||
"value": "from langchain_text_splitters import RecursiveCharacterTextSplitter\n\nfrom langflow.custom import Component\nfrom langflow.inputs.inputs import DataInput, IntInput, TextInput\nfrom langflow.schema import Data\nfrom langflow.template.field.base import Output\nfrom langflow.utils.util import build_loader_repr_from_data, unescape_string\n\n\nclass RecursiveCharacterTextSplitterComponent(Component):\n display_name: str = \"Recursive Character Text Splitter\"\n description: str = \"Split text into chunks of a specified length.\"\n documentation: str = \"https://docs.langflow.org/components/text-splitters#recursivecharactertextsplitter\"\n\n inputs = [\n IntInput(\n name=\"chunk_size\",\n display_name=\"Chunk Size\",\n info=\"The maximum length of each chunk.\",\n value=1000,\n ),\n IntInput(\n name=\"chunk_overlap\",\n display_name=\"Chunk Overlap\",\n info=\"The amount of overlap between chunks.\",\n value=200,\n ),\n DataInput(\n name=\"data_input\",\n display_name=\"Input\",\n info=\"The texts to split.\",\n input_types=[\"Document\", \"Data\"],\n ),\n TextInput(\n name=\"separators\",\n display_name=\"Separators\",\n info='The characters to split on.\\nIf left empty defaults to [\"\\\\n\\\\n\", \"\\\\n\", \" \", \"\"].',\n is_list=True,\n ),\n ]\n outputs = [\n Output(display_name=\"Data\", name=\"data\", method=\"build\"),\n ]\n\n def build(self) -> list[Data]:\n \"\"\"\n Split text into chunks of a specified length.\n\n Args:\n separators (list[str]): The characters to split on.\n chunk_size (int): The maximum length of each chunk.\n chunk_overlap (int): The amount of overlap between chunks.\n\n Returns:\n list[str]: The chunks of text.\n \"\"\"\n\n if self.separators == \"\":\n self.separators = None\n elif self.separators:\n # check if the separators list has escaped characters\n # if there are escaped characters, unescape them\n self.separators = [unescape_string(x) for x in self.separators]\n\n # Make sure chunk_size and chunk_overlap are ints\n if isinstance(self.chunk_size, str):\n self.chunk_size = int(self.chunk_size)\n if isinstance(self.chunk_overlap, str):\n self.chunk_overlap = int(self.chunk_overlap)\n splitter = RecursiveCharacterTextSplitter(\n separators=self.separators,\n chunk_size=self.chunk_size,\n chunk_overlap=self.chunk_overlap,\n )\n documents = []\n if not isinstance(self.data_input, list):\n self.data_input = [self.data_input]\n for _input in self.data_input:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n documents.append(_input)\n docs = splitter.split_documents(documents)\n data = self.to_data(docs)\n self.repr_value = build_loader_repr_from_data(data)\n return data\n"
|
||||
"value": "from langchain_text_splitters import RecursiveCharacterTextSplitter\n\nfrom langflow.custom import Component\nfrom langflow.inputs.inputs import DataInput, IntInput, TextInput\nfrom langflow.schema import Data\nfrom langflow.template.field.base import Output\nfrom langflow.utils.util import build_loader_repr_from_data, unescape_string\n\n\nclass RecursiveCharacterTextSplitterComponent(Component):\n display_name: str = \"Recursive Character Text Splitter\"\n description: str = \"Split text into chunks of a specified length.\"\n documentation: str = \"https://docs.langflow.org/components/text-splitters#recursivecharactertextsplitter\"\n\n inputs = [\n IntInput(\n name=\"chunk_size\",\n display_name=\"Chunk Size\",\n info=\"The maximum length of each chunk.\",\n value=1000,\n ),\n IntInput(\n name=\"chunk_overlap\",\n display_name=\"Chunk Overlap\",\n info=\"The amount of overlap between chunks.\",\n value=200,\n ),\n DataInput(\n name=\"data_input\",\n display_name=\"Input\",\n info=\"The texts to split.\",\n input_types=[\"Document\", \"Data\"],\n ),\n TextInput(\n name=\"separators\",\n display_name=\"Separators\",\n info='The characters to split on.\\nIf left empty defaults to [\"\\\\n\\\\n\", \"\\\\n\", \" \", \"\"].',\n is_list=True,\n ),\n ]\n outputs = [\n Output(display_name=\"Data\", name=\"data\", method=\"split_data\"),\n ]\n\n def split_data(self) -> list[Data]:\n \"\"\"\n Split text into chunks of a specified length.\n\n Args:\n separators (list[str]): The characters to split on.\n chunk_size (int): The maximum length of each chunk.\n chunk_overlap (int): The amount of overlap between chunks.\n\n Returns:\n list[str]: The chunks of text.\n \"\"\"\n\n if self.separators == \"\":\n self.separators = None\n elif self.separators:\n # check if the separators list has escaped characters\n # if there are escaped characters, unescape them\n self.separators = [unescape_string(x) for x in self.separators]\n\n # Make sure chunk_size and chunk_overlap are ints\n if isinstance(self.chunk_size, str):\n self.chunk_size = int(self.chunk_size)\n if isinstance(self.chunk_overlap, str):\n self.chunk_overlap = int(self.chunk_overlap)\n splitter = RecursiveCharacterTextSplitter(\n separators=self.separators,\n chunk_size=self.chunk_size,\n chunk_overlap=self.chunk_overlap,\n )\n documents = []\n if not isinstance(self.data_input, list):\n self.data_input = [self.data_input]\n for _input in self.data_input:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n documents.append(_input)\n docs = splitter.split_documents(documents)\n data = self.to_data(docs)\n self.repr_value = build_loader_repr_from_data(data)\n return data\n"
|
||||
},
|
||||
"data_input": {
|
||||
"advanced": false,
|
||||
|
|
|
|||
|
|
@ -39,10 +39,10 @@ class RecursiveCharacterTextSplitterComponent(Component):
|
|||
),
|
||||
]
|
||||
outputs = [
|
||||
Output(display_name="Data", name="data", method="build"),
|
||||
Output(display_name="Data", name="data", method="split_data"),
|
||||
]
|
||||
|
||||
def build(self) -> list[Data]:
|
||||
def split_data(self) -> list[Data]:
|
||||
"""
|
||||
Split text into chunks of a specified length.
|
||||
|
||||
|
|
|
|||
|
|
@ -2206,7 +2206,7 @@
|
|||
"show": true,
|
||||
"title_case": false,
|
||||
"type": "code",
|
||||
"value": "from langchain_text_splitters import RecursiveCharacterTextSplitter\n\nfrom langflow.custom import Component\nfrom langflow.inputs.inputs import DataInput, IntInput, TextInput\nfrom langflow.schema import Data\nfrom langflow.template.field.base import Output\nfrom langflow.utils.util import build_loader_repr_from_data, unescape_string\n\n\nclass RecursiveCharacterTextSplitterComponent(Component):\n display_name: str = \"Recursive Character Text Splitter\"\n description: str = \"Split text into chunks of a specified length.\"\n documentation: str = \"https://docs.langflow.org/components/text-splitters#recursivecharactertextsplitter\"\n\n inputs = [\n IntInput(\n name=\"chunk_size\",\n display_name=\"Chunk Size\",\n info=\"The maximum length of each chunk.\",\n value=1000,\n ),\n IntInput(\n name=\"chunk_overlap\",\n display_name=\"Chunk Overlap\",\n info=\"The amount of overlap between chunks.\",\n value=200,\n ),\n DataInput(\n name=\"data_input\",\n display_name=\"Input\",\n info=\"The texts to split.\",\n input_types=[\"Document\", \"Data\"],\n ),\n TextInput(\n name=\"separators\",\n display_name=\"Separators\",\n info='The characters to split on.\\nIf left empty defaults to [\"\\\\n\\\\n\", \"\\\\n\", \" \", \"\"].',\n is_list=True,\n ),\n ]\n outputs = [\n Output(display_name=\"Data\", name=\"data\", method=\"build\"),\n ]\n\n def build(self) -> list[Data]:\n \"\"\"\n Split text into chunks of a specified length.\n\n Args:\n separators (list[str]): The characters to split on.\n chunk_size (int): The maximum length of each chunk.\n chunk_overlap (int): The amount of overlap between chunks.\n\n Returns:\n list[str]: The chunks of text.\n \"\"\"\n\n if self.separators == \"\":\n self.separators = None\n elif self.separators:\n # check if the separators list has escaped characters\n # if there are escaped characters, unescape them\n self.separators = [unescape_string(x) for x in self.separators]\n\n # Make sure chunk_size and chunk_overlap are ints\n if isinstance(self.chunk_size, str):\n self.chunk_size = int(self.chunk_size)\n if isinstance(self.chunk_overlap, str):\n self.chunk_overlap = int(self.chunk_overlap)\n splitter = RecursiveCharacterTextSplitter(\n separators=self.separators,\n chunk_size=self.chunk_size,\n chunk_overlap=self.chunk_overlap,\n )\n documents = []\n if not isinstance(self.data_input, list):\n self.data_input = [self.data_input]\n for _input in self.data_input:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n documents.append(_input)\n docs = splitter.split_documents(documents)\n data = self.to_data(docs)\n self.repr_value = build_loader_repr_from_data(data)\n return data\n"
|
||||
"value": "from langchain_text_splitters import RecursiveCharacterTextSplitter\n\nfrom langflow.custom import Component\nfrom langflow.inputs.inputs import DataInput, IntInput, TextInput\nfrom langflow.schema import Data\nfrom langflow.template.field.base import Output\nfrom langflow.utils.util import build_loader_repr_from_data, unescape_string\n\n\nclass RecursiveCharacterTextSplitterComponent(Component):\n display_name: str = \"Recursive Character Text Splitter\"\n description: str = \"Split text into chunks of a specified length.\"\n documentation: str = \"https://docs.langflow.org/components/text-splitters#recursivecharactertextsplitter\"\n\n inputs = [\n IntInput(\n name=\"chunk_size\",\n display_name=\"Chunk Size\",\n info=\"The maximum length of each chunk.\",\n value=1000,\n ),\n IntInput(\n name=\"chunk_overlap\",\n display_name=\"Chunk Overlap\",\n info=\"The amount of overlap between chunks.\",\n value=200,\n ),\n DataInput(\n name=\"data_input\",\n display_name=\"Input\",\n info=\"The texts to split.\",\n input_types=[\"Document\", \"Data\"],\n ),\n TextInput(\n name=\"separators\",\n display_name=\"Separators\",\n info='The characters to split on.\\nIf left empty defaults to [\"\\\\n\\\\n\", \"\\\\n\", \" \", \"\"].',\n is_list=True,\n ),\n ]\n outputs = [\n Output(display_name=\"Data\", name=\"data\", method=\"split_data\"),\n ]\n\n def split_data(self) -> list[Data]:\n \"\"\"\n Split text into chunks of a specified length.\n\n Args:\n separators (list[str]): The characters to split on.\n chunk_size (int): The maximum length of each chunk.\n chunk_overlap (int): The amount of overlap between chunks.\n\n Returns:\n list[str]: The chunks of text.\n \"\"\"\n\n if self.separators == \"\":\n self.separators = None\n elif self.separators:\n # check if the separators list has escaped characters\n # if there are escaped characters, unescape them\n self.separators = [unescape_string(x) for x in self.separators]\n\n # Make sure chunk_size and chunk_overlap are ints\n if isinstance(self.chunk_size, str):\n self.chunk_size = int(self.chunk_size)\n if isinstance(self.chunk_overlap, str):\n self.chunk_overlap = int(self.chunk_overlap)\n splitter = RecursiveCharacterTextSplitter(\n separators=self.separators,\n chunk_size=self.chunk_size,\n chunk_overlap=self.chunk_overlap,\n )\n documents = []\n if not isinstance(self.data_input, list):\n self.data_input = [self.data_input]\n for _input in self.data_input:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n documents.append(_input)\n docs = splitter.split_documents(documents)\n data = self.to_data(docs)\n self.repr_value = build_loader_repr_from_data(data)\n return data\n"
|
||||
},
|
||||
"data_input": {
|
||||
"advanced": false,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue