diff --git a/src/backend/base/langflow/initial_setup/starter_projects/VectorStore-RAG-Flows.json b/src/backend/base/langflow/initial_setup/starter_projects/VectorStore-RAG-Flows.json index 097fdbbc2..c629cdb3e 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/VectorStore-RAG-Flows.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/VectorStore-RAG-Flows.json @@ -358,7 +358,7 @@ "list": false, "show": true, "multiline": true, - "value": "from typing import Any, Dict, List, Optional\n\nfrom langchain_openai.embeddings.base import OpenAIEmbeddings\nfrom pydantic.v1 import SecretStr\n\nfrom langflow.field_typing import Embeddings, NestedDict\nfrom langflow.interface.custom.custom_component import CustomComponent\n\n\nclass OpenAIEmbeddingsComponent(CustomComponent):\n display_name = \"OpenAI Embeddings\"\n description = \"Generate embeddings using OpenAI models.\"\n\n def build_config(self):\n return {\n \"allowed_special\": {\n \"display_name\": \"Allowed Special\",\n \"advanced\": True,\n \"field_type\": \"str\",\n \"is_list\": True,\n },\n \"default_headers\": {\n \"display_name\": \"Default Headers\",\n \"advanced\": True,\n \"field_type\": \"dict\",\n },\n \"default_query\": {\n \"display_name\": \"Default Query\",\n \"advanced\": True,\n \"field_type\": \"NestedDict\",\n },\n \"disallowed_special\": {\n \"display_name\": \"Disallowed Special\",\n \"advanced\": True,\n \"field_type\": \"str\",\n \"is_list\": True,\n },\n \"chunk_size\": {\"display_name\": \"Chunk Size\", \"advanced\": True},\n \"client\": {\"display_name\": \"Client\", \"advanced\": True},\n \"deployment\": {\"display_name\": \"Deployment\", \"advanced\": True},\n \"embedding_ctx_length\": {\n \"display_name\": \"Embedding Context Length\",\n \"advanced\": True,\n },\n \"max_retries\": {\"display_name\": \"Max Retries\", \"advanced\": True},\n \"model\": {\n \"display_name\": \"Model\",\n \"advanced\": False,\n \"options\": [\n \"text-embedding-3-small\",\n \"text-embedding-3-large\",\n \"text-embedding-ada-002\",\n ],\n },\n \"model_kwargs\": {\"display_name\": \"Model Kwargs\", \"advanced\": True},\n \"openai_api_base\": {\n \"display_name\": \"OpenAI API Base\",\n \"password\": True,\n \"advanced\": True,\n },\n \"openai_api_key\": {\"display_name\": \"OpenAI API Key\", \"password\": True},\n \"openai_api_type\": {\n \"display_name\": \"OpenAI API Type\",\n \"advanced\": True,\n \"password\": True,\n },\n \"openai_api_version\": {\n \"display_name\": \"OpenAI API Version\",\n \"advanced\": True,\n },\n \"openai_organization\": {\n \"display_name\": \"OpenAI Organization\",\n \"advanced\": True,\n },\n \"openai_proxy\": {\"display_name\": \"OpenAI Proxy\", \"advanced\": True},\n \"request_timeout\": {\"display_name\": \"Request Timeout\", \"advanced\": True},\n \"show_progress_bar\": {\n \"display_name\": \"Show Progress Bar\",\n \"advanced\": True,\n },\n \"skip_empty\": {\"display_name\": \"Skip Empty\", \"advanced\": True},\n \"tiktoken_model_name\": {\n \"display_name\": \"TikToken Model Name\",\n \"advanced\": True,\n },\n \"tiktoken_enable\": {\"display_name\": \"TikToken Enable\", \"advanced\": True},\n }\n\n def build(\n self,\n openai_api_key: str,\n default_headers: Optional[Dict[str, str]] = None,\n default_query: Optional[NestedDict] = {},\n allowed_special: List[str] = [],\n disallowed_special: List[str] = [\"all\"],\n chunk_size: int = 1000,\n client: Optional[Any] = None,\n deployment: str = \"text-embedding-ada-002\",\n embedding_ctx_length: int = 8191,\n max_retries: int = 6,\n model: str = \"text-embedding-ada-002\",\n model_kwargs: NestedDict = {},\n openai_api_base: Optional[str] = None,\n openai_api_type: Optional[str] = None,\n openai_api_version: Optional[str] = None,\n openai_organization: Optional[str] = None,\n openai_proxy: Optional[str] = None,\n request_timeout: Optional[float] = None,\n show_progress_bar: bool = False,\n skip_empty: bool = False,\n tiktoken_enable: bool = True,\n tiktoken_model_name: Optional[str] = None,\n ) -> Embeddings:\n # This is to avoid errors with Vector Stores (e.g Chroma)\n if disallowed_special == [\"all\"]:\n disallowed_special = \"all\" # type: ignore\n if openai_api_key:\n api_key = SecretStr(openai_api_key)\n else:\n api_key = None\n\n return OpenAIEmbeddings(\n tiktoken_enabled=tiktoken_enable,\n default_headers=default_headers,\n default_query=default_query,\n allowed_special=set(allowed_special),\n disallowed_special=\"all\",\n chunk_size=chunk_size,\n client=client,\n deployment=deployment,\n embedding_ctx_length=embedding_ctx_length,\n max_retries=max_retries,\n model=model,\n model_kwargs=model_kwargs,\n base_url=openai_api_base,\n api_key=api_key,\n openai_api_type=openai_api_type,\n api_version=openai_api_version,\n organization=openai_organization,\n openai_proxy=openai_proxy,\n timeout=request_timeout,\n show_progress_bar=show_progress_bar,\n skip_empty=skip_empty,\n tiktoken_model_name=tiktoken_model_name,\n )\n", + "value": "from typing import Dict, List, Optional\n\nfrom langchain_openai.embeddings.base import OpenAIEmbeddings\nfrom pydantic.v1 import SecretStr\n\nfrom langflow.custom import CustomComponent\nfrom langflow.field_typing import Embeddings, NestedDict\n\n\nclass OpenAIEmbeddingsComponent(CustomComponent):\n display_name = \"OpenAI Embeddings\"\n description = \"Generate embeddings using OpenAI models.\"\n\n def build_config(self):\n return {\n \"allowed_special\": {\n \"display_name\": \"Allowed Special\",\n \"advanced\": True,\n \"field_type\": \"str\",\n \"is_list\": True,\n },\n \"default_headers\": {\n \"display_name\": \"Default Headers\",\n \"advanced\": True,\n \"field_type\": \"dict\",\n },\n \"default_query\": {\n \"display_name\": \"Default Query\",\n \"advanced\": True,\n \"field_type\": \"NestedDict\",\n },\n \"disallowed_special\": {\n \"display_name\": \"Disallowed Special\",\n \"advanced\": True,\n \"field_type\": \"str\",\n \"is_list\": True,\n },\n \"chunk_size\": {\"display_name\": \"Chunk Size\", \"advanced\": True},\n \"client\": {\"display_name\": \"Client\", \"advanced\": True},\n \"deployment\": {\"display_name\": \"Deployment\", \"advanced\": True},\n \"embedding_ctx_length\": {\n \"display_name\": \"Embedding Context Length\",\n \"advanced\": True,\n },\n \"max_retries\": {\"display_name\": \"Max Retries\", \"advanced\": True},\n \"model\": {\n \"display_name\": \"Model\",\n \"advanced\": False,\n \"options\": [\n \"text-embedding-3-small\",\n \"text-embedding-3-large\",\n \"text-embedding-ada-002\",\n ],\n },\n \"model_kwargs\": {\"display_name\": \"Model Kwargs\", \"advanced\": True},\n \"openai_api_base\": {\n \"display_name\": \"OpenAI API Base\",\n \"password\": True,\n \"advanced\": True,\n },\n \"openai_api_key\": {\"display_name\": \"OpenAI API Key\", \"password\": True},\n \"openai_api_type\": {\n \"display_name\": \"OpenAI API Type\",\n \"advanced\": True,\n \"password\": True,\n },\n \"openai_api_version\": {\n \"display_name\": \"OpenAI API Version\",\n \"advanced\": True,\n },\n \"openai_organization\": {\n \"display_name\": \"OpenAI Organization\",\n \"advanced\": True,\n },\n \"openai_proxy\": {\"display_name\": \"OpenAI Proxy\", \"advanced\": True},\n \"request_timeout\": {\"display_name\": \"Request Timeout\", \"advanced\": True},\n \"show_progress_bar\": {\n \"display_name\": \"Show Progress Bar\",\n \"advanced\": True,\n },\n \"skip_empty\": {\"display_name\": \"Skip Empty\", \"advanced\": True},\n \"tiktoken_model_name\": {\n \"display_name\": \"TikToken Model Name\",\n \"advanced\": True,\n },\n \"tiktoken_enable\": {\"display_name\": \"TikToken Enable\", \"advanced\": True},\n }\n\n def build(\n self,\n openai_api_key: str,\n default_headers: Optional[Dict[str, str]] = None,\n default_query: Optional[NestedDict] = {},\n allowed_special: List[str] = [],\n disallowed_special: List[str] = [\"all\"],\n chunk_size: int = 1000,\n deployment: str = \"text-embedding-ada-002\",\n embedding_ctx_length: int = 8191,\n max_retries: int = 6,\n model: str = \"text-embedding-ada-002\",\n model_kwargs: NestedDict = {},\n openai_api_base: Optional[str] = None,\n openai_api_type: Optional[str] = None,\n openai_api_version: Optional[str] = None,\n openai_organization: Optional[str] = None,\n openai_proxy: Optional[str] = None,\n request_timeout: Optional[float] = None,\n show_progress_bar: bool = False,\n skip_empty: bool = False,\n tiktoken_enable: bool = True,\n tiktoken_model_name: Optional[str] = None,\n ) -> Embeddings:\n # This is to avoid errors with Vector Stores (e.g Chroma)\n if disallowed_special == [\"all\"]:\n disallowed_special = \"all\" # type: ignore\n if openai_api_key:\n api_key = SecretStr(openai_api_key)\n else:\n api_key = None\n\n return OpenAIEmbeddings(\n tiktoken_enabled=tiktoken_enable,\n default_headers=default_headers,\n default_query=default_query,\n allowed_special=set(allowed_special),\n disallowed_special=\"all\",\n chunk_size=chunk_size,\n deployment=deployment,\n embedding_ctx_length=embedding_ctx_length,\n max_retries=max_retries,\n model=model,\n model_kwargs=model_kwargs,\n base_url=openai_api_base,\n api_key=api_key,\n openai_api_type=openai_api_type,\n api_version=openai_api_version,\n organization=openai_organization,\n openai_proxy=openai_proxy,\n timeout=request_timeout,\n show_progress_bar=show_progress_bar,\n skip_empty=skip_empty,\n tiktoken_model_name=tiktoken_model_name,\n )\n", "fileTypes": [], "file_path": "", "password": false, @@ -851,7 +851,7 @@ "list": false, "show": true, "multiline": true, - "value": "from typing import Optional\n\nfrom langchain_openai import ChatOpenAI\nfrom pydantic.v1 import SecretStr\n\nfrom langflow.base.constants import STREAM_INFO_TEXT\nfrom langflow.base.models.model import LCModelComponent\nfrom langflow.base.models.openai_constants import MODEL_NAMES\nfrom langflow.field_typing import NestedDict, Text\n\n\nclass OpenAIModelComponent(LCModelComponent):\n display_name = \"OpenAI\"\n description = \"Generates text using OpenAI LLMs.\"\n icon = \"OpenAI\"\n\n field_order = [\n \"max_tokens\",\n \"model_kwargs\",\n \"model_name\",\n \"openai_api_base\",\n \"openai_api_key\",\n \"temperature\",\n \"input_value\",\n \"system_message\",\n \"stream\",\n ]\n\n def build_config(self):\n return {\n \"input_value\": {\"display_name\": \"Input\"},\n \"max_tokens\": {\n \"display_name\": \"Max Tokens\",\n \"advanced\": True,\n },\n \"model_kwargs\": {\n \"display_name\": \"Model Kwargs\",\n \"advanced\": True,\n },\n \"model_name\": {\n \"display_name\": \"Model Name\",\n \"advanced\": False,\n \"options\": MODEL_NAMES,\n },\n \"openai_api_base\": {\n \"display_name\": \"OpenAI API Base\",\n \"advanced\": True,\n \"info\": (\n \"The base URL of the OpenAI API. Defaults to https://api.openai.com/v1.\\n\\n\"\n \"You can change this to use other APIs like JinaChat, LocalAI and Prem.\"\n ),\n },\n \"openai_api_key\": {\n \"display_name\": \"OpenAI API Key\",\n \"info\": \"The OpenAI API Key to use for the OpenAI model.\",\n \"advanced\": False,\n \"password\": True,\n },\n \"temperature\": {\n \"display_name\": \"Temperature\",\n \"advanced\": False,\n \"value\": 0.1,\n },\n \"stream\": {\n \"display_name\": \"Stream\",\n \"info\": STREAM_INFO_TEXT,\n \"advanced\": True,\n },\n \"system_message\": {\n \"display_name\": \"System Message\",\n \"info\": \"System message to pass to the model.\",\n \"advanced\": True,\n },\n }\n\n def build(\n self,\n input_value: Text,\n openai_api_key: str,\n temperature: float,\n model_name: str = \"gpt-4o\",\n max_tokens: Optional[int] = 256,\n model_kwargs: NestedDict = {},\n openai_api_base: Optional[str] = None,\n stream: bool = False,\n system_message: Optional[str] = None,\n ) -> Text:\n if not openai_api_base:\n openai_api_base = \"https://api.openai.com/v1\"\n if openai_api_key:\n api_key = SecretStr(openai_api_key)\n else:\n api_key = None\n\n output = ChatOpenAI(\n max_tokens=max_tokens,\n model_kwargs=model_kwargs,\n model=model_name,\n base_url=openai_api_base,\n api_key=api_key,\n temperature=temperature,\n )\n\n return self.get_chat_result(output, stream, input_value, system_message)\n", + "value": "from typing import Optional\n\nfrom langchain_openai import ChatOpenAI\nfrom pydantic.v1 import SecretStr\n\nfrom langflow.base.constants import STREAM_INFO_TEXT\nfrom langflow.base.models.model import LCModelComponent\nfrom langflow.base.models.openai_constants import MODEL_NAMES\nfrom langflow.field_typing import NestedDict, Text\n\n\nclass OpenAIModelComponent(LCModelComponent):\n display_name = \"OpenAI\"\n description = \"Generates text using OpenAI LLMs.\"\n icon = \"OpenAI\"\n\n field_order = [\n \"max_tokens\",\n \"model_kwargs\",\n \"model_name\",\n \"openai_api_base\",\n \"openai_api_key\",\n \"temperature\",\n \"input_value\",\n \"system_message\",\n \"stream\",\n ]\n\n def build_config(self):\n return {\n \"input_value\": {\"display_name\": \"Input\"},\n \"max_tokens\": {\n \"display_name\": \"Max Tokens\",\n \"advanced\": True,\n \"info\": \"The maximum number of tokens to generate. Set to 0 for unlimited tokens.\",\n },\n \"model_kwargs\": {\n \"display_name\": \"Model Kwargs\",\n \"advanced\": True,\n },\n \"model_name\": {\n \"display_name\": \"Model Name\",\n \"advanced\": False,\n \"options\": MODEL_NAMES,\n },\n \"openai_api_base\": {\n \"display_name\": \"OpenAI API Base\",\n \"advanced\": True,\n \"info\": (\n \"The base URL of the OpenAI API. Defaults to https://api.openai.com/v1.\\n\\n\"\n \"You can change this to use other APIs like JinaChat, LocalAI and Prem.\"\n ),\n },\n \"openai_api_key\": {\n \"display_name\": \"OpenAI API Key\",\n \"info\": \"The OpenAI API Key to use for the OpenAI model.\",\n \"advanced\": False,\n \"password\": True,\n },\n \"temperature\": {\n \"display_name\": \"Temperature\",\n \"advanced\": False,\n \"value\": 0.1,\n },\n \"stream\": {\n \"display_name\": \"Stream\",\n \"info\": STREAM_INFO_TEXT,\n \"advanced\": True,\n },\n \"system_message\": {\n \"display_name\": \"System Message\",\n \"info\": \"System message to pass to the model.\",\n \"advanced\": True,\n },\n }\n\n def build(\n self,\n input_value: Text,\n openai_api_key: str,\n temperature: float,\n model_name: str = \"gpt-4o\",\n max_tokens: Optional[int] = 256,\n model_kwargs: NestedDict = {},\n openai_api_base: Optional[str] = None,\n stream: bool = False,\n system_message: Optional[str] = None,\n ) -> Text:\n if not openai_api_base:\n openai_api_base = \"https://api.openai.com/v1\"\n if openai_api_key:\n api_key = SecretStr(openai_api_key)\n else:\n api_key = None\n\n output = ChatOpenAI(\n max_tokens=max_tokens or None,\n model_kwargs=model_kwargs,\n model=model_name,\n base_url=openai_api_base,\n api_key=api_key,\n temperature=temperature,\n )\n\n return self.get_chat_result(output, stream, input_value, system_message)\n", "fileTypes": [], "file_path": "", "password": false, @@ -877,7 +877,7 @@ "display_name": "Max Tokens", "advanced": true, "dynamic": false, - "info": "", + "info": "The maximum number of tokens to generate. Set to 0 for unlimited tokens.", "load_from_db": false, "title_case": false }, @@ -1106,7 +1106,7 @@ "list": false, "show": true, "multiline": true, - "value": "from langchain_core.prompts import PromptTemplate\n\nfrom langflow.field_typing import Prompt, TemplateField, Text\nfrom langflow.interface.custom.custom_component import CustomComponent\n\n\nclass PromptComponent(CustomComponent):\n display_name: str = \"Prompt\"\n description: str = \"Create a prompt template with dynamic variables.\"\n icon = \"prompts\"\n\n def build_config(self):\n return {\n \"template\": TemplateField(display_name=\"Template\"),\n \"code\": TemplateField(advanced=True),\n }\n\n def build(\n self,\n template: Prompt,\n **kwargs,\n ) -> Text:\n from langflow.base.prompts.utils import dict_values_to_string\n\n prompt_template = PromptTemplate.from_template(Text(template))\n kwargs = dict_values_to_string(kwargs)\n kwargs = {k: \"\\n\".join(v) if isinstance(v, list) else v for k, v in kwargs.items()}\n try:\n formated_prompt = prompt_template.format(**kwargs)\n except Exception as exc:\n raise ValueError(f\"Error formatting prompt: {exc}\") from exc\n self.status = f'Prompt:\\n\"{formated_prompt}\"'\n return formated_prompt\n", + "value": "from langchain_core.prompts import PromptTemplate\n\nfrom langflow.custom import CustomComponent\nfrom langflow.field_typing import Prompt, TemplateField, Text\n\n\nclass PromptComponent(CustomComponent):\n display_name: str = \"Prompt\"\n description: str = \"Create a prompt template with dynamic variables.\"\n icon = \"prompts\"\n\n def build_config(self):\n return {\n \"template\": TemplateField(display_name=\"Template\"),\n \"code\": TemplateField(advanced=True),\n }\n\n def build(\n self,\n template: Prompt,\n **kwargs,\n ) -> Text:\n from langflow.base.prompts.utils import dict_values_to_string\n\n prompt_template = PromptTemplate.from_template(Text(template))\n kwargs = dict_values_to_string(kwargs)\n kwargs = {k: \"\\n\".join(v) if isinstance(v, list) else v for k, v in kwargs.items()}\n try:\n formated_prompt = prompt_template.format(**kwargs)\n except Exception as exc:\n raise ValueError(f\"Error formatting prompt: {exc}\") from exc\n self.status = f'Prompt:\\n\"{formated_prompt}\"'\n return formated_prompt\n", "fileTypes": [], "file_path": "", "password": false, @@ -1491,7 +1491,7 @@ "list": false, "show": true, "multiline": true, - "value": "from pathlib import Path\nfrom typing import Any, Dict\n\nfrom langflow.base.data.utils import TEXT_FILE_TYPES, parse_text_file_to_record\nfrom langflow.interface.custom.custom_component import CustomComponent\nfrom langflow.schema import Record\n\n\nclass FileComponent(CustomComponent):\n display_name = \"File\"\n description = \"A generic file loader.\"\n icon = \"file-text\"\n\n def build_config(self) -> Dict[str, Any]:\n return {\n \"path\": {\n \"display_name\": \"Path\",\n \"field_type\": \"file\",\n \"file_types\": TEXT_FILE_TYPES,\n \"info\": f\"Supported file types: {', '.join(TEXT_FILE_TYPES)}\",\n },\n \"silent_errors\": {\n \"display_name\": \"Silent Errors\",\n \"advanced\": True,\n \"info\": \"If true, errors will not raise an exception.\",\n },\n }\n\n def load_file(self, path: str, silent_errors: bool = False) -> Record:\n resolved_path = self.resolve_path(path)\n path_obj = Path(resolved_path)\n extension = path_obj.suffix[1:].lower()\n if extension == \"doc\":\n raise ValueError(\"doc files are not supported. Please save as .docx\")\n if extension not in TEXT_FILE_TYPES:\n raise ValueError(f\"Unsupported file type: {extension}\")\n record = parse_text_file_to_record(resolved_path, silent_errors)\n self.status = record if record else \"No data\"\n return record or Record()\n\n def build(\n self,\n path: str,\n silent_errors: bool = False,\n ) -> Record:\n record = self.load_file(path, silent_errors)\n self.status = record\n return record\n", + "value": "from pathlib import Path\nfrom typing import Any, Dict\n\nfrom langflow.base.data.utils import TEXT_FILE_TYPES, parse_text_file_to_record\nfrom langflow.custom import CustomComponent\nfrom langflow.schema import Record\n\n\nclass FileComponent(CustomComponent):\n display_name = \"File\"\n description = \"A generic file loader.\"\n icon = \"file-text\"\n\n def build_config(self) -> Dict[str, Any]:\n return {\n \"path\": {\n \"display_name\": \"Path\",\n \"field_type\": \"file\",\n \"file_types\": TEXT_FILE_TYPES,\n \"info\": f\"Supported file types: {', '.join(TEXT_FILE_TYPES)}\",\n },\n \"silent_errors\": {\n \"display_name\": \"Silent Errors\",\n \"advanced\": True,\n \"info\": \"If true, errors will not raise an exception.\",\n },\n }\n\n def load_file(self, path: str, silent_errors: bool = False) -> Record:\n resolved_path = self.resolve_path(path)\n path_obj = Path(resolved_path)\n extension = path_obj.suffix[1:].lower()\n if extension == \"doc\":\n raise ValueError(\"doc files are not supported. Please save as .docx\")\n if extension not in TEXT_FILE_TYPES:\n raise ValueError(f\"Unsupported file type: {extension}\")\n record = parse_text_file_to_record(resolved_path, silent_errors)\n self.status = record if record else \"No data\"\n return record or Record()\n\n def build(\n self,\n path: str,\n silent_errors: bool = False,\n ) -> Record:\n record = self.load_file(path, silent_errors)\n self.status = record\n return record\n", "fileTypes": [], "file_path": "", "password": false, @@ -1631,7 +1631,7 @@ "list": false, "show": true, "multiline": true, - "value": "from typing import Optional\nfrom langchain_core.documents import Document\n\nfrom langflow.interface.custom.custom_component import CustomComponent\nfrom langflow.schema import Record\nfrom langflow.utils.util import build_loader_repr_from_records, unescape_string\nfrom langchain_text_splitters import RecursiveCharacterTextSplitter\n\n\nclass RecursiveCharacterTextSplitterComponent(CustomComponent):\n display_name: str = \"Recursive Character Text Splitter\"\n description: str = \"Split text into chunks of a specified length.\"\n documentation: str = \"https://docs.langflow.org/components/text-splitters#recursivecharactertextsplitter\"\n\n def build_config(self):\n return {\n \"inputs\": {\n \"display_name\": \"Input\",\n \"info\": \"The texts to split.\",\n \"input_types\": [\"Document\", \"Record\"],\n },\n \"separators\": {\n \"display_name\": \"Separators\",\n \"info\": 'The characters to split on.\\nIf left empty defaults to [\"\\\\n\\\\n\", \"\\\\n\", \" \", \"\"].',\n \"is_list\": True,\n },\n \"chunk_size\": {\n \"display_name\": \"Chunk Size\",\n \"info\": \"The maximum length of each chunk.\",\n \"field_type\": \"int\",\n \"value\": 1000,\n },\n \"chunk_overlap\": {\n \"display_name\": \"Chunk Overlap\",\n \"info\": \"The amount of overlap between chunks.\",\n \"field_type\": \"int\",\n \"value\": 200,\n },\n \"code\": {\"show\": False},\n }\n\n def build(\n self,\n inputs: list[Document],\n separators: Optional[list[str]] = None,\n chunk_size: Optional[int] = 1000,\n chunk_overlap: Optional[int] = 200,\n ) -> list[Record]:\n \"\"\"\n Split text into chunks of a specified length.\n\n Args:\n separators (list[str]): The characters to split on.\n chunk_size (int): The maximum length of each chunk.\n chunk_overlap (int): The amount of overlap between chunks.\n length_function (function): The function to use to calculate the length of the text.\n\n Returns:\n list[str]: The chunks of text.\n \"\"\"\n\n if separators == \"\":\n separators = None\n elif separators:\n # check if the separators list has escaped characters\n # if there are escaped characters, unescape them\n separators = [unescape_string(x) for x in separators]\n\n # Make sure chunk_size and chunk_overlap are ints\n if isinstance(chunk_size, str):\n chunk_size = int(chunk_size)\n if isinstance(chunk_overlap, str):\n chunk_overlap = int(chunk_overlap)\n splitter = RecursiveCharacterTextSplitter(\n separators=separators,\n chunk_size=chunk_size,\n chunk_overlap=chunk_overlap,\n )\n documents = []\n for _input in inputs:\n if isinstance(_input, Record):\n documents.append(_input.to_lc_document())\n else:\n documents.append(_input)\n docs = splitter.split_documents(documents)\n records = self.to_records(docs)\n self.repr_value = build_loader_repr_from_records(records)\n return records\n", + "value": "from typing import Optional\n\nfrom langchain_core.documents import Document\nfrom langchain_text_splitters import RecursiveCharacterTextSplitter\n\nfrom langflow.custom import CustomComponent\nfrom langflow.schema import Record\nfrom langflow.utils.util import build_loader_repr_from_records, unescape_string\n\n\nclass RecursiveCharacterTextSplitterComponent(CustomComponent):\n display_name: str = \"Recursive Character Text Splitter\"\n description: str = \"Split text into chunks of a specified length.\"\n documentation: str = \"https://docs.langflow.org/components/text-splitters#recursivecharactertextsplitter\"\n\n def build_config(self):\n return {\n \"inputs\": {\n \"display_name\": \"Input\",\n \"info\": \"The texts to split.\",\n \"input_types\": [\"Document\", \"Record\"],\n },\n \"separators\": {\n \"display_name\": \"Separators\",\n \"info\": 'The characters to split on.\\nIf left empty defaults to [\"\\\\n\\\\n\", \"\\\\n\", \" \", \"\"].',\n \"is_list\": True,\n },\n \"chunk_size\": {\n \"display_name\": \"Chunk Size\",\n \"info\": \"The maximum length of each chunk.\",\n \"field_type\": \"int\",\n \"value\": 1000,\n },\n \"chunk_overlap\": {\n \"display_name\": \"Chunk Overlap\",\n \"info\": \"The amount of overlap between chunks.\",\n \"field_type\": \"int\",\n \"value\": 200,\n },\n \"code\": {\"show\": False},\n }\n\n def build(\n self,\n inputs: list[Document],\n separators: Optional[list[str]] = None,\n chunk_size: Optional[int] = 1000,\n chunk_overlap: Optional[int] = 200,\n ) -> list[Record]:\n \"\"\"\n Split text into chunks of a specified length.\n\n Args:\n separators (list[str]): The characters to split on.\n chunk_size (int): The maximum length of each chunk.\n chunk_overlap (int): The amount of overlap between chunks.\n length_function (function): The function to use to calculate the length of the text.\n\n Returns:\n list[str]: The chunks of text.\n \"\"\"\n\n if separators == \"\":\n separators = None\n elif separators:\n # check if the separators list has escaped characters\n # if there are escaped characters, unescape them\n separators = [unescape_string(x) for x in separators]\n\n # Make sure chunk_size and chunk_overlap are ints\n if isinstance(chunk_size, str):\n chunk_size = int(chunk_size)\n if isinstance(chunk_overlap, str):\n chunk_overlap = int(chunk_overlap)\n splitter = RecursiveCharacterTextSplitter(\n separators=separators,\n chunk_size=chunk_size,\n chunk_overlap=chunk_overlap,\n )\n documents = []\n for _input in inputs:\n if isinstance(_input, Record):\n documents.append(_input.to_lc_document())\n else:\n documents.append(_input)\n docs = splitter.split_documents(documents)\n records = self.to_records(docs)\n self.repr_value = build_loader_repr_from_records(records)\n return records\n", "fileTypes": [], "file_path": "", "password": false, @@ -2632,7 +2632,7 @@ "list": false, "show": true, "multiline": true, - "value": "from typing import Any, Dict, List, Optional\n\nfrom langchain_openai.embeddings.base import OpenAIEmbeddings\nfrom pydantic.v1 import SecretStr\n\nfrom langflow.field_typing import Embeddings, NestedDict\nfrom langflow.interface.custom.custom_component import CustomComponent\n\n\nclass OpenAIEmbeddingsComponent(CustomComponent):\n display_name = \"OpenAI Embeddings\"\n description = \"Generate embeddings using OpenAI models.\"\n\n def build_config(self):\n return {\n \"allowed_special\": {\n \"display_name\": \"Allowed Special\",\n \"advanced\": True,\n \"field_type\": \"str\",\n \"is_list\": True,\n },\n \"default_headers\": {\n \"display_name\": \"Default Headers\",\n \"advanced\": True,\n \"field_type\": \"dict\",\n },\n \"default_query\": {\n \"display_name\": \"Default Query\",\n \"advanced\": True,\n \"field_type\": \"NestedDict\",\n },\n \"disallowed_special\": {\n \"display_name\": \"Disallowed Special\",\n \"advanced\": True,\n \"field_type\": \"str\",\n \"is_list\": True,\n },\n \"chunk_size\": {\"display_name\": \"Chunk Size\", \"advanced\": True},\n \"client\": {\"display_name\": \"Client\", \"advanced\": True},\n \"deployment\": {\"display_name\": \"Deployment\", \"advanced\": True},\n \"embedding_ctx_length\": {\n \"display_name\": \"Embedding Context Length\",\n \"advanced\": True,\n },\n \"max_retries\": {\"display_name\": \"Max Retries\", \"advanced\": True},\n \"model\": {\n \"display_name\": \"Model\",\n \"advanced\": False,\n \"options\": [\n \"text-embedding-3-small\",\n \"text-embedding-3-large\",\n \"text-embedding-ada-002\",\n ],\n },\n \"model_kwargs\": {\"display_name\": \"Model Kwargs\", \"advanced\": True},\n \"openai_api_base\": {\n \"display_name\": \"OpenAI API Base\",\n \"password\": True,\n \"advanced\": True,\n },\n \"openai_api_key\": {\"display_name\": \"OpenAI API Key\", \"password\": True},\n \"openai_api_type\": {\n \"display_name\": \"OpenAI API Type\",\n \"advanced\": True,\n \"password\": True,\n },\n \"openai_api_version\": {\n \"display_name\": \"OpenAI API Version\",\n \"advanced\": True,\n },\n \"openai_organization\": {\n \"display_name\": \"OpenAI Organization\",\n \"advanced\": True,\n },\n \"openai_proxy\": {\"display_name\": \"OpenAI Proxy\", \"advanced\": True},\n \"request_timeout\": {\"display_name\": \"Request Timeout\", \"advanced\": True},\n \"show_progress_bar\": {\n \"display_name\": \"Show Progress Bar\",\n \"advanced\": True,\n },\n \"skip_empty\": {\"display_name\": \"Skip Empty\", \"advanced\": True},\n \"tiktoken_model_name\": {\n \"display_name\": \"TikToken Model Name\",\n \"advanced\": True,\n },\n \"tiktoken_enable\": {\"display_name\": \"TikToken Enable\", \"advanced\": True},\n }\n\n def build(\n self,\n openai_api_key: str,\n default_headers: Optional[Dict[str, str]] = None,\n default_query: Optional[NestedDict] = {},\n allowed_special: List[str] = [],\n disallowed_special: List[str] = [\"all\"],\n chunk_size: int = 1000,\n client: Optional[Any] = None,\n deployment: str = \"text-embedding-ada-002\",\n embedding_ctx_length: int = 8191,\n max_retries: int = 6,\n model: str = \"text-embedding-ada-002\",\n model_kwargs: NestedDict = {},\n openai_api_base: Optional[str] = None,\n openai_api_type: Optional[str] = None,\n openai_api_version: Optional[str] = None,\n openai_organization: Optional[str] = None,\n openai_proxy: Optional[str] = None,\n request_timeout: Optional[float] = None,\n show_progress_bar: bool = False,\n skip_empty: bool = False,\n tiktoken_enable: bool = True,\n tiktoken_model_name: Optional[str] = None,\n ) -> Embeddings:\n # This is to avoid errors with Vector Stores (e.g Chroma)\n if disallowed_special == [\"all\"]:\n disallowed_special = \"all\" # type: ignore\n if openai_api_key:\n api_key = SecretStr(openai_api_key)\n else:\n api_key = None\n\n return OpenAIEmbeddings(\n tiktoken_enabled=tiktoken_enable,\n default_headers=default_headers,\n default_query=default_query,\n allowed_special=set(allowed_special),\n disallowed_special=\"all\",\n chunk_size=chunk_size,\n client=client,\n deployment=deployment,\n embedding_ctx_length=embedding_ctx_length,\n max_retries=max_retries,\n model=model,\n model_kwargs=model_kwargs,\n base_url=openai_api_base,\n api_key=api_key,\n openai_api_type=openai_api_type,\n api_version=openai_api_version,\n organization=openai_organization,\n openai_proxy=openai_proxy,\n timeout=request_timeout,\n show_progress_bar=show_progress_bar,\n skip_empty=skip_empty,\n tiktoken_model_name=tiktoken_model_name,\n )\n", + "value": "from typing import Dict, List, Optional\n\nfrom langchain_openai.embeddings.base import OpenAIEmbeddings\nfrom pydantic.v1 import SecretStr\n\nfrom langflow.custom import CustomComponent\nfrom langflow.field_typing import Embeddings, NestedDict\n\n\nclass OpenAIEmbeddingsComponent(CustomComponent):\n display_name = \"OpenAI Embeddings\"\n description = \"Generate embeddings using OpenAI models.\"\n\n def build_config(self):\n return {\n \"allowed_special\": {\n \"display_name\": \"Allowed Special\",\n \"advanced\": True,\n \"field_type\": \"str\",\n \"is_list\": True,\n },\n \"default_headers\": {\n \"display_name\": \"Default Headers\",\n \"advanced\": True,\n \"field_type\": \"dict\",\n },\n \"default_query\": {\n \"display_name\": \"Default Query\",\n \"advanced\": True,\n \"field_type\": \"NestedDict\",\n },\n \"disallowed_special\": {\n \"display_name\": \"Disallowed Special\",\n \"advanced\": True,\n \"field_type\": \"str\",\n \"is_list\": True,\n },\n \"chunk_size\": {\"display_name\": \"Chunk Size\", \"advanced\": True},\n \"client\": {\"display_name\": \"Client\", \"advanced\": True},\n \"deployment\": {\"display_name\": \"Deployment\", \"advanced\": True},\n \"embedding_ctx_length\": {\n \"display_name\": \"Embedding Context Length\",\n \"advanced\": True,\n },\n \"max_retries\": {\"display_name\": \"Max Retries\", \"advanced\": True},\n \"model\": {\n \"display_name\": \"Model\",\n \"advanced\": False,\n \"options\": [\n \"text-embedding-3-small\",\n \"text-embedding-3-large\",\n \"text-embedding-ada-002\",\n ],\n },\n \"model_kwargs\": {\"display_name\": \"Model Kwargs\", \"advanced\": True},\n \"openai_api_base\": {\n \"display_name\": \"OpenAI API Base\",\n \"password\": True,\n \"advanced\": True,\n },\n \"openai_api_key\": {\"display_name\": \"OpenAI API Key\", \"password\": True},\n \"openai_api_type\": {\n \"display_name\": \"OpenAI API Type\",\n \"advanced\": True,\n \"password\": True,\n },\n \"openai_api_version\": {\n \"display_name\": \"OpenAI API Version\",\n \"advanced\": True,\n },\n \"openai_organization\": {\n \"display_name\": \"OpenAI Organization\",\n \"advanced\": True,\n },\n \"openai_proxy\": {\"display_name\": \"OpenAI Proxy\", \"advanced\": True},\n \"request_timeout\": {\"display_name\": \"Request Timeout\", \"advanced\": True},\n \"show_progress_bar\": {\n \"display_name\": \"Show Progress Bar\",\n \"advanced\": True,\n },\n \"skip_empty\": {\"display_name\": \"Skip Empty\", \"advanced\": True},\n \"tiktoken_model_name\": {\n \"display_name\": \"TikToken Model Name\",\n \"advanced\": True,\n },\n \"tiktoken_enable\": {\"display_name\": \"TikToken Enable\", \"advanced\": True},\n }\n\n def build(\n self,\n openai_api_key: str,\n default_headers: Optional[Dict[str, str]] = None,\n default_query: Optional[NestedDict] = {},\n allowed_special: List[str] = [],\n disallowed_special: List[str] = [\"all\"],\n chunk_size: int = 1000,\n deployment: str = \"text-embedding-ada-002\",\n embedding_ctx_length: int = 8191,\n max_retries: int = 6,\n model: str = \"text-embedding-ada-002\",\n model_kwargs: NestedDict = {},\n openai_api_base: Optional[str] = None,\n openai_api_type: Optional[str] = None,\n openai_api_version: Optional[str] = None,\n openai_organization: Optional[str] = None,\n openai_proxy: Optional[str] = None,\n request_timeout: Optional[float] = None,\n show_progress_bar: bool = False,\n skip_empty: bool = False,\n tiktoken_enable: bool = True,\n tiktoken_model_name: Optional[str] = None,\n ) -> Embeddings:\n # This is to avoid errors with Vector Stores (e.g Chroma)\n if disallowed_special == [\"all\"]:\n disallowed_special = \"all\" # type: ignore\n if openai_api_key:\n api_key = SecretStr(openai_api_key)\n else:\n api_key = None\n\n return OpenAIEmbeddings(\n tiktoken_enabled=tiktoken_enable,\n default_headers=default_headers,\n default_query=default_query,\n allowed_special=set(allowed_special),\n disallowed_special=\"all\",\n chunk_size=chunk_size,\n deployment=deployment,\n embedding_ctx_length=embedding_ctx_length,\n max_retries=max_retries,\n model=model,\n model_kwargs=model_kwargs,\n base_url=openai_api_base,\n api_key=api_key,\n openai_api_type=openai_api_type,\n api_version=openai_api_version,\n organization=openai_organization,\n openai_proxy=openai_proxy,\n timeout=request_timeout,\n show_progress_bar=show_progress_bar,\n skip_empty=skip_empty,\n tiktoken_model_name=tiktoken_model_name,\n )\n", "fileTypes": [], "file_path": "", "password": false,