From dce9901222bd4dd9ca9f8fa14d492e08648732bf Mon Sep 17 00:00:00 2001 From: Gabriel Luiz Freitas Almeida Date: Tue, 5 Mar 2024 18:46:18 -0300 Subject: [PATCH] Remove "documents" configuration from vector store components --- .../textsplitters/CharacterTextSplitter.py | 20 +++++++++----- .../LanguageRecursiveTextSplitter.py | 27 +++++++++++-------- .../RecursiveCharacterTextSplitter.py | 27 ++++++++++++------- .../components/vectorstores/Chroma.py | 9 ++++--- .../components/vectorstores/ChromaSearch.py | 1 - .../langflow/components/vectorstores/FAISS.py | 13 ++++++--- .../components/vectorstores/FAISSSearch.py | 1 - .../vectorstores/MongoDBAtlasVector.py | 21 +++++++++++---- .../components/vectorstores/Pinecone.py | 13 ++++++--- .../components/vectorstores/Qdrant.py | 24 +++++++++++++---- .../langflow/components/vectorstores/Redis.py | 19 +++++++++---- .../components/vectorstores/RedisSearch.py | 2 +- .../vectorstores/SupabaseVectorStore.py | 20 ++++++++++---- .../components/vectorstores/Vectara.py | 23 +++++++++++----- .../components/vectorstores/VectaraSearch.py | 4 --- .../components/vectorstores/Weaviate.py | 19 +++++++++---- .../components/vectorstores/WeaviateSearch.py | 1 - .../components/vectorstores/pgvector.py | 17 +++++++++--- 18 files changed, 183 insertions(+), 78 deletions(-) diff --git a/src/backend/langflow/components/textsplitters/CharacterTextSplitter.py b/src/backend/langflow/components/textsplitters/CharacterTextSplitter.py index d165f47fd..96576a4a3 100644 --- a/src/backend/langflow/components/textsplitters/CharacterTextSplitter.py +++ b/src/backend/langflow/components/textsplitters/CharacterTextSplitter.py @@ -1,8 +1,9 @@ from typing import List from langchain.text_splitter import CharacterTextSplitter -from langchain_core.documents.base import Document + from langflow import CustomComponent +from langflow.schema.schema import Record class CharacterTextSplitterComponent(CustomComponent): @@ -11,7 +12,7 @@ class CharacterTextSplitterComponent(CustomComponent): def build_config(self): return { - "documents": {"display_name": "Documents"}, + "inputs": {"display_name": "Input", "input_types": ["Document", "Record"]}, "chunk_overlap": {"display_name": "Chunk Overlap", "default": 200}, "chunk_size": {"display_name": "Chunk Size", "default": 1000}, "separator": {"display_name": "Separator", "default": "\n"}, @@ -19,17 +20,24 @@ class CharacterTextSplitterComponent(CustomComponent): def build( self, - documents: List[Document], + inputs: List[Record], chunk_overlap: int = 200, chunk_size: int = 1000, separator: str = "\n", - ) -> List[Document]: + ) -> List[Record]: # separator may come escaped from the frontend separator = separator.encode().decode("unicode_escape") + documents = [] + for _input in inputs: + if isinstance(_input, Record): + documents.append(_input.to_lc_document()) + else: + documents.append(_input) docs = CharacterTextSplitter( chunk_overlap=chunk_overlap, chunk_size=chunk_size, separator=separator, ).split_documents(documents) - self.status = docs - return docs + records = self.to_records(docs) + self.status = records + return records diff --git a/src/backend/langflow/components/textsplitters/LanguageRecursiveTextSplitter.py b/src/backend/langflow/components/textsplitters/LanguageRecursiveTextSplitter.py index d1494f4d0..b86b834d8 100644 --- a/src/backend/langflow/components/textsplitters/LanguageRecursiveTextSplitter.py +++ b/src/backend/langflow/components/textsplitters/LanguageRecursiveTextSplitter.py @@ -1,23 +1,22 @@ -from typing import Optional +from typing import List, Optional from langchain.text_splitter import Language -from langchain_core.documents import Document from langflow import CustomComponent +from langflow.schema.schema import Record class LanguageRecursiveTextSplitterComponent(CustomComponent): display_name: str = "Language Recursive Text Splitter" description: str = "Split text into chunks of a specified length based on language." - documentation: str = "https://docs.langflow.org/components/text-splitters#languagerecursivetextsplitter" + documentation: str = ( + "https://docs.langflow.org/components/text-splitters#languagerecursivetextsplitter" + ) def build_config(self): options = [x.value for x in Language] return { - "documents": { - "display_name": "Documents", - "info": "The documents to split.", - }, + "inputs": {"display_name": "Input", "input_types": ["Document", "Record"]}, "separator_type": { "display_name": "Separator Type", "info": "The type of separator to use.", @@ -47,11 +46,11 @@ class LanguageRecursiveTextSplitterComponent(CustomComponent): def build( self, - documents: list[Document], + inputs: List[Record], chunk_size: Optional[int] = 1000, chunk_overlap: Optional[int] = 200, separator_type: str = "Python", - ) -> list[Document]: + ) -> list[Record]: """ Split text into chunks of a specified length. @@ -77,6 +76,12 @@ class LanguageRecursiveTextSplitterComponent(CustomComponent): chunk_size=chunk_size, chunk_overlap=chunk_overlap, ) - + documents = [] + for _input in inputs: + if isinstance(_input, Record): + documents.append(_input.to_lc_document()) + else: + documents.append(_input) docs = splitter.split_documents(documents) - return docs + records = self.to_records(docs) + return records diff --git a/src/backend/langflow/components/textsplitters/RecursiveCharacterTextSplitter.py b/src/backend/langflow/components/textsplitters/RecursiveCharacterTextSplitter.py index d07ae3ebe..a15600394 100644 --- a/src/backend/langflow/components/textsplitters/RecursiveCharacterTextSplitter.py +++ b/src/backend/langflow/components/textsplitters/RecursiveCharacterTextSplitter.py @@ -1,22 +1,26 @@ from typing import Optional +from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_core.documents import Document from langflow import CustomComponent +from langflow.schema import Record from langflow.utils.util import build_loader_repr_from_documents -from langchain.text_splitter import RecursiveCharacterTextSplitter class RecursiveCharacterTextSplitterComponent(CustomComponent): display_name: str = "Recursive Character Text Splitter" description: str = "Split text into chunks of a specified length." - documentation: str = "https://docs.langflow.org/components/text-splitters#recursivecharactertextsplitter" + documentation: str = ( + "https://docs.langflow.org/components/text-splitters#recursivecharactertextsplitter" + ) def build_config(self): return { - "documents": { - "display_name": "Documents", - "info": "The documents to split.", + "inputs": { + "display_name": "Input", + "info": "The texts to split.", + "input_types": ["Document", "Record"], }, "separators": { "display_name": "Separators", @@ -40,11 +44,11 @@ class RecursiveCharacterTextSplitterComponent(CustomComponent): def build( self, - documents: list[Document], + inputs: list[Document], separators: Optional[list[str]] = None, chunk_size: Optional[int] = 1000, chunk_overlap: Optional[int] = 200, - ) -> list[Document]: + ) -> list[Record]: """ Split text into chunks of a specified length. @@ -75,7 +79,12 @@ class RecursiveCharacterTextSplitterComponent(CustomComponent): chunk_size=chunk_size, chunk_overlap=chunk_overlap, ) - + documents = [] + for _input in inputs: + if isinstance(_input, Record): + documents.append(_input.to_lc_document()) + else: + documents.append(_input) docs = splitter.split_documents(documents) self.repr_value = build_loader_repr_from_documents(docs) - return docs + return self.to_records(docs) diff --git a/src/backend/langflow/components/vectorstores/Chroma.py b/src/backend/langflow/components/vectorstores/Chroma.py index b1756e777..139a877e9 100644 --- a/src/backend/langflow/components/vectorstores/Chroma.py +++ b/src/backend/langflow/components/vectorstores/Chroma.py @@ -31,7 +31,7 @@ class ChromaComponent(CustomComponent): "collection_name": {"display_name": "Collection Name", "value": "langflow"}, "index_directory": {"display_name": "Persist Directory"}, "code": {"advanced": True, "display_name": "Code"}, - "documents": {"display_name": "Documents", "is_list": True}, + "inputs": {"display_name": "Input", "input_types": ["Document", "Record"]}, "embedding": {"display_name": "Embedding"}, "chroma_server_cors_allow_origins": { "display_name": "Server CORS Allow Origins", @@ -84,7 +84,8 @@ class ChromaComponent(CustomComponent): if chroma_server_host is not None: chroma_settings = chromadb.config.Settings( - chroma_server_cors_allow_origins=chroma_server_cors_allow_origins or None, + chroma_server_cors_allow_origins=chroma_server_cors_allow_origins + or None, chroma_server_host=chroma_server_host, chroma_server_port=chroma_server_port or None, chroma_server_grpc_port=chroma_server_grpc_port or None, @@ -99,7 +100,9 @@ class ChromaComponent(CustomComponent): if documents is not None and embedding is not None: if len(documents) == 0: - raise ValueError("If documents are provided, there must be at least one document.") + raise ValueError( + "If documents are provided, there must be at least one document." + ) chroma = Chroma.from_documents( documents=documents, # type: ignore persist_directory=index_directory, diff --git a/src/backend/langflow/components/vectorstores/ChromaSearch.py b/src/backend/langflow/components/vectorstores/ChromaSearch.py index 3a6d283b3..cf98ee987 100644 --- a/src/backend/langflow/components/vectorstores/ChromaSearch.py +++ b/src/backend/langflow/components/vectorstores/ChromaSearch.py @@ -35,7 +35,6 @@ class ChromaSearchComponent(LCVectorStoreComponent): # "persist": {"display_name": "Persist"}, "index_directory": {"display_name": "Index Directory"}, "code": {"show": False, "display_name": "Code"}, - "documents": {"display_name": "Documents", "is_list": True}, "embedding": { "display_name": "Embedding", "info": "Embedding model to vectorize inputs (make sure to use same as index)", diff --git a/src/backend/langflow/components/vectorstores/FAISS.py b/src/backend/langflow/components/vectorstores/FAISS.py index a0324456e..7cdadccdb 100644 --- a/src/backend/langflow/components/vectorstores/FAISS.py +++ b/src/backend/langflow/components/vectorstores/FAISS.py @@ -5,7 +5,8 @@ from langchain_community.vectorstores import VectorStore from langchain_community.vectorstores.faiss import FAISS from langflow import CustomComponent -from langflow.field_typing import Document, Embeddings +from langflow.field_typing import Embeddings +from langflow.schema.schema import Record class FAISSComponent(CustomComponent): @@ -15,7 +16,7 @@ class FAISSComponent(CustomComponent): def build_config(self): return { - "documents": {"display_name": "Documents"}, + "inputs": {"display_name": "Input", "input_types": ["Document", "Record"]}, "embedding": {"display_name": "Embedding"}, "folder_path": { "display_name": "Folder Path", @@ -27,10 +28,16 @@ class FAISSComponent(CustomComponent): def build( self, embedding: Embeddings, - documents: List[Document], + inputs: List[Record], folder_path: str, index_name: str = "langflow_index", ) -> Union[VectorStore, FAISS, BaseRetriever]: + documents = [] + for _input in inputs: + if isinstance(_input, Record): + documents.append(_input.to_lc_document()) + else: + documents.append(_input) vector_store = FAISS.from_documents(documents=documents, embedding=embedding) if not folder_path: raise ValueError("Folder path is required to save the FAISS index.") diff --git a/src/backend/langflow/components/vectorstores/FAISSSearch.py b/src/backend/langflow/components/vectorstores/FAISSSearch.py index f6ddf4f7a..4544c300b 100644 --- a/src/backend/langflow/components/vectorstores/FAISSSearch.py +++ b/src/backend/langflow/components/vectorstores/FAISSSearch.py @@ -14,7 +14,6 @@ class FAISSSearchComponent(LCVectorStoreComponent): def build_config(self): return { - "documents": {"display_name": "Documents"}, "embedding": {"display_name": "Embedding"}, "folder_path": { "display_name": "Folder Path", diff --git a/src/backend/langflow/components/vectorstores/MongoDBAtlasVector.py b/src/backend/langflow/components/vectorstores/MongoDBAtlasVector.py index e15368f7d..4488e4e92 100644 --- a/src/backend/langflow/components/vectorstores/MongoDBAtlasVector.py +++ b/src/backend/langflow/components/vectorstores/MongoDBAtlasVector.py @@ -3,17 +3,20 @@ from typing import List, Optional from langchain_community.vectorstores.mongodb_atlas import MongoDBAtlasVectorSearch from langflow import CustomComponent -from langflow.field_typing import Document, Embeddings, NestedDict +from langflow.field_typing import Embeddings, NestedDict +from langflow.schema.schema import Record class MongoDBAtlasComponent(CustomComponent): display_name = "MongoDB Atlas" - description = "Construct a `MongoDB Atlas Vector Search` vector store from raw documents." + description = ( + "Construct a `MongoDB Atlas Vector Search` vector store from raw documents." + ) icon = "MongoDB" def build_config(self): return { - "documents": {"display_name": "Documents"}, + "inputs": {"display_name": "Input", "input_types": ["Document", "Record"]}, "embedding": {"display_name": "Embedding"}, "collection_name": {"display_name": "Collection Name"}, "db_name": {"display_name": "Database Name"}, @@ -25,7 +28,7 @@ class MongoDBAtlasComponent(CustomComponent): def build( self, embedding: Embeddings, - documents: List[Document], + inputs: List[Record], collection_name: str = "", db_name: str = "", index_name: str = "", @@ -36,12 +39,20 @@ class MongoDBAtlasComponent(CustomComponent): try: from pymongo import MongoClient except ImportError: - raise ImportError("Please install pymongo to use MongoDB Atlas Vector Store") + raise ImportError( + "Please install pymongo to use MongoDB Atlas Vector Store" + ) try: mongo_client: MongoClient = MongoClient(mongodb_atlas_cluster_uri) collection = mongo_client[db_name][collection_name] except Exception as e: raise ValueError(f"Failed to connect to MongoDB Atlas: {e}") + documents = [] + for _input in inputs: + if isinstance(_input, Record): + documents.append(_input.to_lc_document()) + else: + documents.append(_input) if documents: vector_store = MongoDBAtlasVectorSearch.from_documents( documents=documents, diff --git a/src/backend/langflow/components/vectorstores/Pinecone.py b/src/backend/langflow/components/vectorstores/Pinecone.py index 54222b133..c71048266 100644 --- a/src/backend/langflow/components/vectorstores/Pinecone.py +++ b/src/backend/langflow/components/vectorstores/Pinecone.py @@ -7,7 +7,8 @@ from langchain_community.vectorstores import VectorStore from langchain_community.vectorstores.pinecone import Pinecone from langflow import CustomComponent -from langflow.field_typing import Document, Embeddings +from langflow.field_typing import Embeddings +from langflow.schema.schema import Record class PineconeComponent(CustomComponent): @@ -17,7 +18,7 @@ class PineconeComponent(CustomComponent): def build_config(self): return { - "documents": {"display_name": "Documents"}, + "inputs": {"display_name": "Input", "input_types": ["Document", "Record"]}, "embedding": {"display_name": "Embedding"}, "index_name": {"display_name": "Index Name"}, "namespace": {"display_name": "Namespace"}, @@ -44,7 +45,7 @@ class PineconeComponent(CustomComponent): self, embedding: Embeddings, pinecone_env: str, - documents: List[Document], + inputs: List[Record], text_key: str = "text", pool_threads: int = 4, index_name: Optional[str] = None, @@ -59,6 +60,12 @@ class PineconeComponent(CustomComponent): pinecone.init(api_key=pinecone_api_key, environment=pinecone_env) # type: ignore if not index_name: raise ValueError("Index Name is required.") + documents = [] + for _input in inputs: + if isinstance(_input, Record): + documents.append(_input.to_lc_document()) + else: + documents.append(_input) if documents: return Pinecone.from_documents( documents=documents, diff --git a/src/backend/langflow/components/vectorstores/Qdrant.py b/src/backend/langflow/components/vectorstores/Qdrant.py index 23ee70b11..e1773268b 100644 --- a/src/backend/langflow/components/vectorstores/Qdrant.py +++ b/src/backend/langflow/components/vectorstores/Qdrant.py @@ -3,8 +3,10 @@ from typing import Optional, Union from langchain.schema import BaseRetriever from langchain_community.vectorstores import VectorStore from langchain_community.vectorstores.qdrant import Qdrant + from langflow import CustomComponent -from langflow.field_typing import Document, Embeddings, NestedDict +from langflow.field_typing import Embeddings, NestedDict +from langflow.schema.schema import Record class QdrantComponent(CustomComponent): @@ -14,17 +16,23 @@ class QdrantComponent(CustomComponent): def build_config(self): return { - "documents": {"display_name": "Documents"}, + "inputs": {"display_name": "Input", "input_types": ["Document", "Record"]}, "embedding": {"display_name": "Embedding"}, "api_key": {"display_name": "API Key", "password": True, "advanced": True}, "collection_name": {"display_name": "Collection Name"}, - "content_payload_key": {"display_name": "Content Payload Key", "advanced": True}, + "content_payload_key": { + "display_name": "Content Payload Key", + "advanced": True, + }, "distance_func": {"display_name": "Distance Function", "advanced": True}, "grpc_port": {"display_name": "gRPC Port", "advanced": True}, "host": {"display_name": "Host", "advanced": True}, "https": {"display_name": "HTTPS", "advanced": True}, "location": {"display_name": "Location", "advanced": True}, - "metadata_payload_key": {"display_name": "Metadata Payload Key", "advanced": True}, + "metadata_payload_key": { + "display_name": "Metadata Payload Key", + "advanced": True, + }, "path": {"display_name": "Path", "advanced": True}, "port": {"display_name": "Port", "advanced": True}, "prefer_grpc": {"display_name": "Prefer gRPC", "advanced": True}, @@ -38,7 +46,7 @@ class QdrantComponent(CustomComponent): self, embedding: Embeddings, collection_name: str, - documents: Optional[Document] = None, + inputs: Optional[Record] = None, api_key: Optional[str] = None, content_payload_key: str = "page_content", distance_func: str = "Cosine", @@ -55,6 +63,12 @@ class QdrantComponent(CustomComponent): timeout: Optional[int] = None, url: Optional[str] = None, ) -> Union[VectorStore, Qdrant, BaseRetriever]: + documents = [] + for _input in inputs: + if isinstance(_input, Record): + documents.append(_input.to_lc_document()) + else: + documents.append(_input) if documents is None: from qdrant_client import QdrantClient diff --git a/src/backend/langflow/components/vectorstores/Redis.py b/src/backend/langflow/components/vectorstores/Redis.py index b2d7e4542..bbf04f1a4 100644 --- a/src/backend/langflow/components/vectorstores/Redis.py +++ b/src/backend/langflow/components/vectorstores/Redis.py @@ -3,9 +3,10 @@ from typing import Optional, Union from langchain.embeddings.base import Embeddings from langchain_community.vectorstores import VectorStore from langchain_community.vectorstores.redis import Redis -from langchain_core.documents import Document from langchain_core.retrievers import BaseRetriever + from langflow import CustomComponent +from langflow.schema.schema import Record class RedisComponent(CustomComponent): @@ -28,7 +29,7 @@ class RedisComponent(CustomComponent): return { "index_name": {"display_name": "Index Name", "value": "your_index"}, "code": {"show": False, "display_name": "Code"}, - "documents": {"display_name": "Documents", "is_list": True}, + "inputs": {"display_name": "Input", "input_types": ["Document", "Record"]}, "embedding": {"display_name": "Embedding"}, "schema": {"display_name": "Schema", "file_types": [".yaml"]}, "redis_server_url": { @@ -44,7 +45,7 @@ class RedisComponent(CustomComponent): redis_server_url: str, redis_index_name: str, schema: Optional[str] = None, - documents: Optional[Document] = None, + inputs: Optional[Record] = None, ) -> Union[VectorStore, BaseRetriever]: """ Builds the Vector Store or BaseRetriever object. @@ -58,9 +59,17 @@ class RedisComponent(CustomComponent): Returns: - VectorStore: The Vector Store object. """ - if documents is None: + documents = [] + for _input in inputs: + if isinstance(_input, Record): + documents.append(_input.to_lc_document()) + else: + documents.append(_input) + if not documents: if schema is None: - raise ValueError("If no documents are provided, a schema must be provided.") + raise ValueError( + "If no documents are provided, a schema must be provided." + ) redis_vs = Redis.from_existing_index( embedding=embedding, index_name=redis_index_name, diff --git a/src/backend/langflow/components/vectorstores/RedisSearch.py b/src/backend/langflow/components/vectorstores/RedisSearch.py index 4089d4f47..63fc46ddb 100644 --- a/src/backend/langflow/components/vectorstores/RedisSearch.py +++ b/src/backend/langflow/components/vectorstores/RedisSearch.py @@ -33,7 +33,7 @@ class RedisSearchComponent(RedisComponent, LCVectorStoreComponent): "input_value": {"display_name": "Input"}, "index_name": {"display_name": "Index Name", "value": "your_index"}, "code": {"show": False, "display_name": "Code"}, - "documents": {"display_name": "Documents", "is_list": True}, + "embedding": {"display_name": "Embedding"}, "schema": {"display_name": "Schema", "file_types": [".yaml"]}, "redis_server_url": { diff --git a/src/backend/langflow/components/vectorstores/SupabaseVectorStore.py b/src/backend/langflow/components/vectorstores/SupabaseVectorStore.py index 2ec6dfabc..7f6296c03 100644 --- a/src/backend/langflow/components/vectorstores/SupabaseVectorStore.py +++ b/src/backend/langflow/components/vectorstores/SupabaseVectorStore.py @@ -3,10 +3,12 @@ from typing import List, Union from langchain.schema import BaseRetriever from langchain_community.vectorstores import VectorStore from langchain_community.vectorstores.supabase import SupabaseVectorStore -from langflow import CustomComponent -from langflow.field_typing import Document, Embeddings, NestedDict from supabase.client import Client, create_client +from langflow import CustomComponent +from langflow.field_typing import Embeddings, NestedDict +from langflow.schema.schema import Record + class SupabaseComponent(CustomComponent): display_name = "Supabase" @@ -14,7 +16,7 @@ class SupabaseComponent(CustomComponent): def build_config(self): return { - "documents": {"display_name": "Documents"}, + "inputs": {"display_name": "Input", "input_types": ["Document", "Record"]}, "embedding": {"display_name": "Embedding"}, "query_name": {"display_name": "Query Name"}, "search_kwargs": {"display_name": "Search Kwargs", "advanced": True}, @@ -26,14 +28,22 @@ class SupabaseComponent(CustomComponent): def build( self, embedding: Embeddings, - documents: List[Document], + inputs: List[Record], query_name: str = "", search_kwargs: NestedDict = {}, supabase_service_key: str = "", supabase_url: str = "", table_name: str = "", ) -> Union[VectorStore, SupabaseVectorStore, BaseRetriever]: - supabase: Client = create_client(supabase_url, supabase_key=supabase_service_key) + supabase: Client = create_client( + supabase_url, supabase_key=supabase_service_key + ) + documents = [] + for _input in inputs: + if isinstance(_input, Record): + documents.append(_input.to_lc_document()) + else: + documents.append(_input) return SupabaseVectorStore.from_documents( documents=documents, embedding=embedding, diff --git a/src/backend/langflow/components/vectorstores/Vectara.py b/src/backend/langflow/components/vectorstores/Vectara.py index 0a396918c..8f1f7250e 100644 --- a/src/backend/langflow/components/vectorstores/Vectara.py +++ b/src/backend/langflow/components/vectorstores/Vectara.py @@ -8,13 +8,16 @@ from langchain_community.vectorstores.vectara import Vectara from langchain_core.vectorstores import VectorStore from langflow import CustomComponent -from langflow.field_typing import BaseRetriever, Document +from langflow.field_typing import BaseRetriever +from langflow.schema.schema import Record class VectaraComponent(CustomComponent): display_name: str = "Vectara" description: str = "Implementation of Vector Store using Vectara" - documentation = "https://python.langchain.com/docs/integrations/vectorstores/vectara" + documentation = ( + "https://python.langchain.com/docs/integrations/vectorstores/vectara" + ) beta = True icon = "Vectara" field_config = { @@ -28,8 +31,9 @@ class VectaraComponent(CustomComponent): "display_name": "Vectara API Key", "password": True, }, - "documents": { - "display_name": "Documents", + "inputs": { + "display_name": "Input", + "input_types": ["Document", "Record"], "info": "If provided, will be upserted to corpus (optional)", }, "files_url": { @@ -44,11 +48,18 @@ class VectaraComponent(CustomComponent): vectara_corpus_id: str, vectara_api_key: str, files_url: Optional[List[str]] = None, - documents: Optional[Document] = None, + inputs: Optional[Record] = None, ) -> Union[VectorStore, BaseRetriever]: source = "Langflow" - if documents is not None: + documents = [] + for _input in inputs: + if isinstance(_input, Record): + documents.append(_input.to_lc_document()) + else: + documents.append(_input) + + if documents: return Vectara.from_documents( documents=documents, # type: ignore embedding=FakeEmbeddings(size=768), diff --git a/src/backend/langflow/components/vectorstores/VectaraSearch.py b/src/backend/langflow/components/vectorstores/VectaraSearch.py index ae2d442be..cbc876f86 100644 --- a/src/backend/langflow/components/vectorstores/VectaraSearch.py +++ b/src/backend/langflow/components/vectorstores/VectaraSearch.py @@ -33,10 +33,6 @@ class VectaraSearchComponent(VectaraComponent, LCVectorStoreComponent): "display_name": "Vectara API Key", "password": True, }, - "documents": { - "display_name": "Documents", - "info": "If provided, will be upserted to corpus (optional)", - }, "files_url": { "display_name": "Files Url", "info": "Make vectara object using url of files (optional)", diff --git a/src/backend/langflow/components/vectorstores/Weaviate.py b/src/backend/langflow/components/vectorstores/Weaviate.py index 3d804255a..a85febcd5 100644 --- a/src/backend/langflow/components/vectorstores/Weaviate.py +++ b/src/backend/langflow/components/vectorstores/Weaviate.py @@ -2,16 +2,19 @@ from typing import Optional, Union import weaviate # type: ignore from langchain.embeddings.base import Embeddings -from langchain.schema import BaseRetriever, Document +from langchain.schema import BaseRetriever from langchain_community.vectorstores import VectorStore, Weaviate from langflow import CustomComponent +from langflow.schema.schema import Record class WeaviateVectorStoreComponent(CustomComponent): display_name: str = "Weaviate" description: str = "Implementation of Vector Store using Weaviate" - documentation = "https://python.langchain.com/docs/integrations/vectorstores/weaviate" + documentation = ( + "https://python.langchain.com/docs/integrations/vectorstores/weaviate" + ) beta = True field_config = { "url": {"display_name": "Weaviate URL", "value": "http://localhost:8080"}, @@ -30,7 +33,7 @@ class WeaviateVectorStoreComponent(CustomComponent): "advanced": True, "value": "text", }, - "documents": {"display_name": "Documents", "is_list": True}, + "inputs": {"display_name": "Input", "input_types": ["Document", "Record"]}, "embedding": {"display_name": "Embedding"}, "attributes": { "display_name": "Attributes", @@ -55,7 +58,7 @@ class WeaviateVectorStoreComponent(CustomComponent): index_name: Optional[str] = None, text_key: str = "text", embedding: Optional[Embeddings] = None, - documents: Optional[Document] = None, + inputs: Optional[Record] = None, attributes: Optional[list] = None, ) -> Union[VectorStore, BaseRetriever]: if api_key: @@ -78,8 +81,14 @@ class WeaviateVectorStoreComponent(CustomComponent): return pascal_case_word index_name = _to_pascal_case(index_name) if index_name else None + documents = [] + for _input in inputs: + if isinstance(_input, Record): + documents.append(_input.to_lc_document()) + else: + documents.append(_input) - if documents is not None and embedding is not None: + if documents and embedding is not None: return Weaviate.from_documents( client=client, index_name=index_name, diff --git a/src/backend/langflow/components/vectorstores/WeaviateSearch.py b/src/backend/langflow/components/vectorstores/WeaviateSearch.py index 6eee202c9..9b14ca779 100644 --- a/src/backend/langflow/components/vectorstores/WeaviateSearch.py +++ b/src/backend/langflow/components/vectorstores/WeaviateSearch.py @@ -39,7 +39,6 @@ class WeaviateSearchVectorStore(WeaviateVectorStoreComponent, LCVectorStoreCompo "advanced": True, "value": "text", }, - "documents": {"display_name": "Documents", "is_list": True}, "embedding": {"display_name": "Embedding"}, "attributes": { "display_name": "Attributes", diff --git a/src/backend/langflow/components/vectorstores/pgvector.py b/src/backend/langflow/components/vectorstores/pgvector.py index 2baf6dae6..e3b528313 100644 --- a/src/backend/langflow/components/vectorstores/pgvector.py +++ b/src/backend/langflow/components/vectorstores/pgvector.py @@ -3,9 +3,10 @@ from typing import Optional, Union from langchain.embeddings.base import Embeddings from langchain_community.vectorstores import VectorStore from langchain_community.vectorstores.pgvector import PGVector -from langchain_core.documents import Document from langchain_core.retrievers import BaseRetriever + from langflow import CustomComponent +from langflow.schema.schema import Record class PGVectorComponent(CustomComponent): @@ -15,7 +16,9 @@ class PGVectorComponent(CustomComponent): display_name: str = "PGVector" description: str = "Implementation of Vector Store using PostgreSQL" - documentation = "https://python.langchain.com/docs/integrations/vectorstores/pgvector" + documentation = ( + "https://python.langchain.com/docs/integrations/vectorstores/pgvector" + ) def build_config(self): """ @@ -26,7 +29,7 @@ class PGVectorComponent(CustomComponent): """ return { "code": {"show": False}, - "documents": {"display_name": "Documents", "is_list": True}, + "inputs": {"display_name": "Input", "input_types": ["Document", "Record"]}, "embedding": {"display_name": "Embedding"}, "pg_server_url": { "display_name": "PostgreSQL Server Connection String", @@ -40,7 +43,7 @@ class PGVectorComponent(CustomComponent): embedding: Embeddings, pg_server_url: str, collection_name: str, - documents: Optional[Document] = None, + inputs: Optional[Record] = None, ) -> Union[VectorStore, BaseRetriever]: """ Builds the Vector Store or BaseRetriever object. @@ -55,6 +58,12 @@ class PGVectorComponent(CustomComponent): - VectorStore: The Vector Store object. """ + documents = [] + for _input in inputs: + if isinstance(_input, Record): + documents.append(_input.to_lc_document()) + else: + documents.append(_input) try: if documents is None: vector_store = PGVector.from_existing_index(