From 2cecfb009fb7ec73aec314b139771f53cc4b5d3e Mon Sep 17 00:00:00 2001 From: Gabriel Luiz Freitas Almeida Date: Sat, 22 Jun 2024 14:19:30 -0300 Subject: [PATCH] chore: Update RecursiveCharacterTextSplitterComponent method name to split_data --- docs/static/data/AstraDB-RAG-Flows.json | 2 +- .../textsplitters/RecursiveCharacterTextSplitter.py | 4 ++-- .../initial_setup/starter_projects/VectorStore-RAG-Flows.json | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/static/data/AstraDB-RAG-Flows.json b/docs/static/data/AstraDB-RAG-Flows.json index b23d97a57..61464da86 100644 --- a/docs/static/data/AstraDB-RAG-Flows.json +++ b/docs/static/data/AstraDB-RAG-Flows.json @@ -1902,7 +1902,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from langchain_text_splitters import RecursiveCharacterTextSplitter\n\nfrom langflow.custom import Component\nfrom langflow.inputs.inputs import DataInput, IntInput, TextInput\nfrom langflow.schema import Data\nfrom langflow.template.field.base import Output\nfrom langflow.utils.util import build_loader_repr_from_data, unescape_string\n\n\nclass RecursiveCharacterTextSplitterComponent(Component):\n display_name: str = \"Recursive Character Text Splitter\"\n description: str = \"Split text into chunks of a specified length.\"\n documentation: str = \"https://docs.langflow.org/components/text-splitters#recursivecharactertextsplitter\"\n\n inputs = [\n IntInput(\n name=\"chunk_size\",\n display_name=\"Chunk Size\",\n info=\"The maximum length of each chunk.\",\n value=1000,\n ),\n IntInput(\n name=\"chunk_overlap\",\n display_name=\"Chunk Overlap\",\n info=\"The amount of overlap between chunks.\",\n value=200,\n ),\n DataInput(\n name=\"data_input\",\n display_name=\"Input\",\n info=\"The texts to split.\",\n input_types=[\"Document\", \"Data\"],\n ),\n TextInput(\n name=\"separators\",\n display_name=\"Separators\",\n info='The characters to split on.\\nIf left empty defaults to [\"\\\\n\\\\n\", \"\\\\n\", \" \", \"\"].',\n is_list=True,\n ),\n ]\n outputs = [\n Output(display_name=\"Data\", name=\"data\", method=\"build\"),\n ]\n\n def build(self) -> list[Data]:\n \"\"\"\n Split text into chunks of a specified length.\n\n Args:\n separators (list[str]): The characters to split on.\n chunk_size (int): The maximum length of each chunk.\n chunk_overlap (int): The amount of overlap between chunks.\n\n Returns:\n list[str]: The chunks of text.\n \"\"\"\n\n if self.separators == \"\":\n self.separators = None\n elif self.separators:\n # check if the separators list has escaped characters\n # if there are escaped characters, unescape them\n self.separators = [unescape_string(x) for x in self.separators]\n\n # Make sure chunk_size and chunk_overlap are ints\n if isinstance(self.chunk_size, str):\n self.chunk_size = int(self.chunk_size)\n if isinstance(self.chunk_overlap, str):\n self.chunk_overlap = int(self.chunk_overlap)\n splitter = RecursiveCharacterTextSplitter(\n separators=self.separators,\n chunk_size=self.chunk_size,\n chunk_overlap=self.chunk_overlap,\n )\n documents = []\n if not isinstance(self.data_input, list):\n self.data_input = [self.data_input]\n for _input in self.data_input:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n documents.append(_input)\n docs = splitter.split_documents(documents)\n data = self.to_data(docs)\n self.repr_value = build_loader_repr_from_data(data)\n return data\n" + "value": "from langchain_text_splitters import RecursiveCharacterTextSplitter\n\nfrom langflow.custom import Component\nfrom langflow.inputs.inputs import DataInput, IntInput, TextInput\nfrom langflow.schema import Data\nfrom langflow.template.field.base import Output\nfrom langflow.utils.util import build_loader_repr_from_data, unescape_string\n\n\nclass RecursiveCharacterTextSplitterComponent(Component):\n display_name: str = \"Recursive Character Text Splitter\"\n description: str = \"Split text into chunks of a specified length.\"\n documentation: str = \"https://docs.langflow.org/components/text-splitters#recursivecharactertextsplitter\"\n\n inputs = [\n IntInput(\n name=\"chunk_size\",\n display_name=\"Chunk Size\",\n info=\"The maximum length of each chunk.\",\n value=1000,\n ),\n IntInput(\n name=\"chunk_overlap\",\n display_name=\"Chunk Overlap\",\n info=\"The amount of overlap between chunks.\",\n value=200,\n ),\n DataInput(\n name=\"data_input\",\n display_name=\"Input\",\n info=\"The texts to split.\",\n input_types=[\"Document\", \"Data\"],\n ),\n TextInput(\n name=\"separators\",\n display_name=\"Separators\",\n info='The characters to split on.\\nIf left empty defaults to [\"\\\\n\\\\n\", \"\\\\n\", \" \", \"\"].',\n is_list=True,\n ),\n ]\n outputs = [\n Output(display_name=\"Data\", name=\"data\", method=\"split_data\"),\n ]\n\n def split_data(self) -> list[Data]:\n \"\"\"\n Split text into chunks of a specified length.\n\n Args:\n separators (list[str]): The characters to split on.\n chunk_size (int): The maximum length of each chunk.\n chunk_overlap (int): The amount of overlap between chunks.\n\n Returns:\n list[str]: The chunks of text.\n \"\"\"\n\n if self.separators == \"\":\n self.separators = None\n elif self.separators:\n # check if the separators list has escaped characters\n # if there are escaped characters, unescape them\n self.separators = [unescape_string(x) for x in self.separators]\n\n # Make sure chunk_size and chunk_overlap are ints\n if isinstance(self.chunk_size, str):\n self.chunk_size = int(self.chunk_size)\n if isinstance(self.chunk_overlap, str):\n self.chunk_overlap = int(self.chunk_overlap)\n splitter = RecursiveCharacterTextSplitter(\n separators=self.separators,\n chunk_size=self.chunk_size,\n chunk_overlap=self.chunk_overlap,\n )\n documents = []\n if not isinstance(self.data_input, list):\n self.data_input = [self.data_input]\n for _input in self.data_input:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n documents.append(_input)\n docs = splitter.split_documents(documents)\n data = self.to_data(docs)\n self.repr_value = build_loader_repr_from_data(data)\n return data\n" }, "data_input": { "advanced": false, diff --git a/src/backend/base/langflow/components/textsplitters/RecursiveCharacterTextSplitter.py b/src/backend/base/langflow/components/textsplitters/RecursiveCharacterTextSplitter.py index ab4308afe..f5e44c5e2 100644 --- a/src/backend/base/langflow/components/textsplitters/RecursiveCharacterTextSplitter.py +++ b/src/backend/base/langflow/components/textsplitters/RecursiveCharacterTextSplitter.py @@ -39,10 +39,10 @@ class RecursiveCharacterTextSplitterComponent(Component): ), ] outputs = [ - Output(display_name="Data", name="data", method="build"), + Output(display_name="Data", name="data", method="split_data"), ] - def build(self) -> list[Data]: + def split_data(self) -> list[Data]: """ Split text into chunks of a specified length. diff --git a/src/backend/base/langflow/initial_setup/starter_projects/VectorStore-RAG-Flows.json b/src/backend/base/langflow/initial_setup/starter_projects/VectorStore-RAG-Flows.json index ad307e2e9..178f3266e 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/VectorStore-RAG-Flows.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/VectorStore-RAG-Flows.json @@ -2206,7 +2206,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from langchain_text_splitters import RecursiveCharacterTextSplitter\n\nfrom langflow.custom import Component\nfrom langflow.inputs.inputs import DataInput, IntInput, TextInput\nfrom langflow.schema import Data\nfrom langflow.template.field.base import Output\nfrom langflow.utils.util import build_loader_repr_from_data, unescape_string\n\n\nclass RecursiveCharacterTextSplitterComponent(Component):\n display_name: str = \"Recursive Character Text Splitter\"\n description: str = \"Split text into chunks of a specified length.\"\n documentation: str = \"https://docs.langflow.org/components/text-splitters#recursivecharactertextsplitter\"\n\n inputs = [\n IntInput(\n name=\"chunk_size\",\n display_name=\"Chunk Size\",\n info=\"The maximum length of each chunk.\",\n value=1000,\n ),\n IntInput(\n name=\"chunk_overlap\",\n display_name=\"Chunk Overlap\",\n info=\"The amount of overlap between chunks.\",\n value=200,\n ),\n DataInput(\n name=\"data_input\",\n display_name=\"Input\",\n info=\"The texts to split.\",\n input_types=[\"Document\", \"Data\"],\n ),\n TextInput(\n name=\"separators\",\n display_name=\"Separators\",\n info='The characters to split on.\\nIf left empty defaults to [\"\\\\n\\\\n\", \"\\\\n\", \" \", \"\"].',\n is_list=True,\n ),\n ]\n outputs = [\n Output(display_name=\"Data\", name=\"data\", method=\"build\"),\n ]\n\n def build(self) -> list[Data]:\n \"\"\"\n Split text into chunks of a specified length.\n\n Args:\n separators (list[str]): The characters to split on.\n chunk_size (int): The maximum length of each chunk.\n chunk_overlap (int): The amount of overlap between chunks.\n\n Returns:\n list[str]: The chunks of text.\n \"\"\"\n\n if self.separators == \"\":\n self.separators = None\n elif self.separators:\n # check if the separators list has escaped characters\n # if there are escaped characters, unescape them\n self.separators = [unescape_string(x) for x in self.separators]\n\n # Make sure chunk_size and chunk_overlap are ints\n if isinstance(self.chunk_size, str):\n self.chunk_size = int(self.chunk_size)\n if isinstance(self.chunk_overlap, str):\n self.chunk_overlap = int(self.chunk_overlap)\n splitter = RecursiveCharacterTextSplitter(\n separators=self.separators,\n chunk_size=self.chunk_size,\n chunk_overlap=self.chunk_overlap,\n )\n documents = []\n if not isinstance(self.data_input, list):\n self.data_input = [self.data_input]\n for _input in self.data_input:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n documents.append(_input)\n docs = splitter.split_documents(documents)\n data = self.to_data(docs)\n self.repr_value = build_loader_repr_from_data(data)\n return data\n" + "value": "from langchain_text_splitters import RecursiveCharacterTextSplitter\n\nfrom langflow.custom import Component\nfrom langflow.inputs.inputs import DataInput, IntInput, TextInput\nfrom langflow.schema import Data\nfrom langflow.template.field.base import Output\nfrom langflow.utils.util import build_loader_repr_from_data, unescape_string\n\n\nclass RecursiveCharacterTextSplitterComponent(Component):\n display_name: str = \"Recursive Character Text Splitter\"\n description: str = \"Split text into chunks of a specified length.\"\n documentation: str = \"https://docs.langflow.org/components/text-splitters#recursivecharactertextsplitter\"\n\n inputs = [\n IntInput(\n name=\"chunk_size\",\n display_name=\"Chunk Size\",\n info=\"The maximum length of each chunk.\",\n value=1000,\n ),\n IntInput(\n name=\"chunk_overlap\",\n display_name=\"Chunk Overlap\",\n info=\"The amount of overlap between chunks.\",\n value=200,\n ),\n DataInput(\n name=\"data_input\",\n display_name=\"Input\",\n info=\"The texts to split.\",\n input_types=[\"Document\", \"Data\"],\n ),\n TextInput(\n name=\"separators\",\n display_name=\"Separators\",\n info='The characters to split on.\\nIf left empty defaults to [\"\\\\n\\\\n\", \"\\\\n\", \" \", \"\"].',\n is_list=True,\n ),\n ]\n outputs = [\n Output(display_name=\"Data\", name=\"data\", method=\"split_data\"),\n ]\n\n def split_data(self) -> list[Data]:\n \"\"\"\n Split text into chunks of a specified length.\n\n Args:\n separators (list[str]): The characters to split on.\n chunk_size (int): The maximum length of each chunk.\n chunk_overlap (int): The amount of overlap between chunks.\n\n Returns:\n list[str]: The chunks of text.\n \"\"\"\n\n if self.separators == \"\":\n self.separators = None\n elif self.separators:\n # check if the separators list has escaped characters\n # if there are escaped characters, unescape them\n self.separators = [unescape_string(x) for x in self.separators]\n\n # Make sure chunk_size and chunk_overlap are ints\n if isinstance(self.chunk_size, str):\n self.chunk_size = int(self.chunk_size)\n if isinstance(self.chunk_overlap, str):\n self.chunk_overlap = int(self.chunk_overlap)\n splitter = RecursiveCharacterTextSplitter(\n separators=self.separators,\n chunk_size=self.chunk_size,\n chunk_overlap=self.chunk_overlap,\n )\n documents = []\n if not isinstance(self.data_input, list):\n self.data_input = [self.data_input]\n for _input in self.data_input:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n documents.append(_input)\n docs = splitter.split_documents(documents)\n data = self.to_data(docs)\n self.repr_value = build_loader_repr_from_data(data)\n return data\n" }, "data_input": { "advanced": false,