Remove "documents" configuration from vector store components

This commit is contained in:
Gabriel Luiz Freitas Almeida 2024-03-05 18:46:18 -03:00
commit dce9901222
18 changed files with 183 additions and 78 deletions

View file

@ -1,8 +1,9 @@
from typing import List
from langchain.text_splitter import CharacterTextSplitter
from langchain_core.documents.base import Document
from langflow import CustomComponent
from langflow.schema.schema import Record
class CharacterTextSplitterComponent(CustomComponent):
@ -11,7 +12,7 @@ class CharacterTextSplitterComponent(CustomComponent):
def build_config(self):
return {
"documents": {"display_name": "Documents"},
"inputs": {"display_name": "Input", "input_types": ["Document", "Record"]},
"chunk_overlap": {"display_name": "Chunk Overlap", "default": 200},
"chunk_size": {"display_name": "Chunk Size", "default": 1000},
"separator": {"display_name": "Separator", "default": "\n"},
@ -19,17 +20,24 @@ class CharacterTextSplitterComponent(CustomComponent):
def build(
self,
documents: List[Document],
inputs: List[Record],
chunk_overlap: int = 200,
chunk_size: int = 1000,
separator: str = "\n",
) -> List[Document]:
) -> List[Record]:
# separator may come escaped from the frontend
separator = separator.encode().decode("unicode_escape")
documents = []
for _input in inputs:
if isinstance(_input, Record):
documents.append(_input.to_lc_document())
else:
documents.append(_input)
docs = CharacterTextSplitter(
chunk_overlap=chunk_overlap,
chunk_size=chunk_size,
separator=separator,
).split_documents(documents)
self.status = docs
return docs
records = self.to_records(docs)
self.status = records
return records

View file

@ -1,23 +1,22 @@
from typing import Optional
from typing import List, Optional
from langchain.text_splitter import Language
from langchain_core.documents import Document
from langflow import CustomComponent
from langflow.schema.schema import Record
class LanguageRecursiveTextSplitterComponent(CustomComponent):
display_name: str = "Language Recursive Text Splitter"
description: str = "Split text into chunks of a specified length based on language."
documentation: str = "https://docs.langflow.org/components/text-splitters#languagerecursivetextsplitter"
documentation: str = (
"https://docs.langflow.org/components/text-splitters#languagerecursivetextsplitter"
)
def build_config(self):
options = [x.value for x in Language]
return {
"documents": {
"display_name": "Documents",
"info": "The documents to split.",
},
"inputs": {"display_name": "Input", "input_types": ["Document", "Record"]},
"separator_type": {
"display_name": "Separator Type",
"info": "The type of separator to use.",
@ -47,11 +46,11 @@ class LanguageRecursiveTextSplitterComponent(CustomComponent):
def build(
self,
documents: list[Document],
inputs: List[Record],
chunk_size: Optional[int] = 1000,
chunk_overlap: Optional[int] = 200,
separator_type: str = "Python",
) -> list[Document]:
) -> list[Record]:
"""
Split text into chunks of a specified length.
@ -77,6 +76,12 @@ class LanguageRecursiveTextSplitterComponent(CustomComponent):
chunk_size=chunk_size,
chunk_overlap=chunk_overlap,
)
documents = []
for _input in inputs:
if isinstance(_input, Record):
documents.append(_input.to_lc_document())
else:
documents.append(_input)
docs = splitter.split_documents(documents)
return docs
records = self.to_records(docs)
return records

View file

@ -1,22 +1,26 @@
from typing import Optional
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.documents import Document
from langflow import CustomComponent
from langflow.schema import Record
from langflow.utils.util import build_loader_repr_from_documents
from langchain.text_splitter import RecursiveCharacterTextSplitter
class RecursiveCharacterTextSplitterComponent(CustomComponent):
display_name: str = "Recursive Character Text Splitter"
description: str = "Split text into chunks of a specified length."
documentation: str = "https://docs.langflow.org/components/text-splitters#recursivecharactertextsplitter"
documentation: str = (
"https://docs.langflow.org/components/text-splitters#recursivecharactertextsplitter"
)
def build_config(self):
return {
"documents": {
"display_name": "Documents",
"info": "The documents to split.",
"inputs": {
"display_name": "Input",
"info": "The texts to split.",
"input_types": ["Document", "Record"],
},
"separators": {
"display_name": "Separators",
@ -40,11 +44,11 @@ class RecursiveCharacterTextSplitterComponent(CustomComponent):
def build(
self,
documents: list[Document],
inputs: list[Document],
separators: Optional[list[str]] = None,
chunk_size: Optional[int] = 1000,
chunk_overlap: Optional[int] = 200,
) -> list[Document]:
) -> list[Record]:
"""
Split text into chunks of a specified length.
@ -75,7 +79,12 @@ class RecursiveCharacterTextSplitterComponent(CustomComponent):
chunk_size=chunk_size,
chunk_overlap=chunk_overlap,
)
documents = []
for _input in inputs:
if isinstance(_input, Record):
documents.append(_input.to_lc_document())
else:
documents.append(_input)
docs = splitter.split_documents(documents)
self.repr_value = build_loader_repr_from_documents(docs)
return docs
return self.to_records(docs)

View file

@ -31,7 +31,7 @@ class ChromaComponent(CustomComponent):
"collection_name": {"display_name": "Collection Name", "value": "langflow"},
"index_directory": {"display_name": "Persist Directory"},
"code": {"advanced": True, "display_name": "Code"},
"documents": {"display_name": "Documents", "is_list": True},
"inputs": {"display_name": "Input", "input_types": ["Document", "Record"]},
"embedding": {"display_name": "Embedding"},
"chroma_server_cors_allow_origins": {
"display_name": "Server CORS Allow Origins",
@ -84,7 +84,8 @@ class ChromaComponent(CustomComponent):
if chroma_server_host is not None:
chroma_settings = chromadb.config.Settings(
chroma_server_cors_allow_origins=chroma_server_cors_allow_origins or None,
chroma_server_cors_allow_origins=chroma_server_cors_allow_origins
or None,
chroma_server_host=chroma_server_host,
chroma_server_port=chroma_server_port or None,
chroma_server_grpc_port=chroma_server_grpc_port or None,
@ -99,7 +100,9 @@ class ChromaComponent(CustomComponent):
if documents is not None and embedding is not None:
if len(documents) == 0:
raise ValueError("If documents are provided, there must be at least one document.")
raise ValueError(
"If documents are provided, there must be at least one document."
)
chroma = Chroma.from_documents(
documents=documents, # type: ignore
persist_directory=index_directory,

View file

@ -35,7 +35,6 @@ class ChromaSearchComponent(LCVectorStoreComponent):
# "persist": {"display_name": "Persist"},
"index_directory": {"display_name": "Index Directory"},
"code": {"show": False, "display_name": "Code"},
"documents": {"display_name": "Documents", "is_list": True},
"embedding": {
"display_name": "Embedding",
"info": "Embedding model to vectorize inputs (make sure to use same as index)",

View file

@ -5,7 +5,8 @@ from langchain_community.vectorstores import VectorStore
from langchain_community.vectorstores.faiss import FAISS
from langflow import CustomComponent
from langflow.field_typing import Document, Embeddings
from langflow.field_typing import Embeddings
from langflow.schema.schema import Record
class FAISSComponent(CustomComponent):
@ -15,7 +16,7 @@ class FAISSComponent(CustomComponent):
def build_config(self):
return {
"documents": {"display_name": "Documents"},
"inputs": {"display_name": "Input", "input_types": ["Document", "Record"]},
"embedding": {"display_name": "Embedding"},
"folder_path": {
"display_name": "Folder Path",
@ -27,10 +28,16 @@ class FAISSComponent(CustomComponent):
def build(
self,
embedding: Embeddings,
documents: List[Document],
inputs: List[Record],
folder_path: str,
index_name: str = "langflow_index",
) -> Union[VectorStore, FAISS, BaseRetriever]:
documents = []
for _input in inputs:
if isinstance(_input, Record):
documents.append(_input.to_lc_document())
else:
documents.append(_input)
vector_store = FAISS.from_documents(documents=documents, embedding=embedding)
if not folder_path:
raise ValueError("Folder path is required to save the FAISS index.")

View file

@ -14,7 +14,6 @@ class FAISSSearchComponent(LCVectorStoreComponent):
def build_config(self):
return {
"documents": {"display_name": "Documents"},
"embedding": {"display_name": "Embedding"},
"folder_path": {
"display_name": "Folder Path",

View file

@ -3,17 +3,20 @@ from typing import List, Optional
from langchain_community.vectorstores.mongodb_atlas import MongoDBAtlasVectorSearch
from langflow import CustomComponent
from langflow.field_typing import Document, Embeddings, NestedDict
from langflow.field_typing import Embeddings, NestedDict
from langflow.schema.schema import Record
class MongoDBAtlasComponent(CustomComponent):
display_name = "MongoDB Atlas"
description = "Construct a `MongoDB Atlas Vector Search` vector store from raw documents."
description = (
"Construct a `MongoDB Atlas Vector Search` vector store from raw documents."
)
icon = "MongoDB"
def build_config(self):
return {
"documents": {"display_name": "Documents"},
"inputs": {"display_name": "Input", "input_types": ["Document", "Record"]},
"embedding": {"display_name": "Embedding"},
"collection_name": {"display_name": "Collection Name"},
"db_name": {"display_name": "Database Name"},
@ -25,7 +28,7 @@ class MongoDBAtlasComponent(CustomComponent):
def build(
self,
embedding: Embeddings,
documents: List[Document],
inputs: List[Record],
collection_name: str = "",
db_name: str = "",
index_name: str = "",
@ -36,12 +39,20 @@ class MongoDBAtlasComponent(CustomComponent):
try:
from pymongo import MongoClient
except ImportError:
raise ImportError("Please install pymongo to use MongoDB Atlas Vector Store")
raise ImportError(
"Please install pymongo to use MongoDB Atlas Vector Store"
)
try:
mongo_client: MongoClient = MongoClient(mongodb_atlas_cluster_uri)
collection = mongo_client[db_name][collection_name]
except Exception as e:
raise ValueError(f"Failed to connect to MongoDB Atlas: {e}")
documents = []
for _input in inputs:
if isinstance(_input, Record):
documents.append(_input.to_lc_document())
else:
documents.append(_input)
if documents:
vector_store = MongoDBAtlasVectorSearch.from_documents(
documents=documents,

View file

@ -7,7 +7,8 @@ from langchain_community.vectorstores import VectorStore
from langchain_community.vectorstores.pinecone import Pinecone
from langflow import CustomComponent
from langflow.field_typing import Document, Embeddings
from langflow.field_typing import Embeddings
from langflow.schema.schema import Record
class PineconeComponent(CustomComponent):
@ -17,7 +18,7 @@ class PineconeComponent(CustomComponent):
def build_config(self):
return {
"documents": {"display_name": "Documents"},
"inputs": {"display_name": "Input", "input_types": ["Document", "Record"]},
"embedding": {"display_name": "Embedding"},
"index_name": {"display_name": "Index Name"},
"namespace": {"display_name": "Namespace"},
@ -44,7 +45,7 @@ class PineconeComponent(CustomComponent):
self,
embedding: Embeddings,
pinecone_env: str,
documents: List[Document],
inputs: List[Record],
text_key: str = "text",
pool_threads: int = 4,
index_name: Optional[str] = None,
@ -59,6 +60,12 @@ class PineconeComponent(CustomComponent):
pinecone.init(api_key=pinecone_api_key, environment=pinecone_env) # type: ignore
if not index_name:
raise ValueError("Index Name is required.")
documents = []
for _input in inputs:
if isinstance(_input, Record):
documents.append(_input.to_lc_document())
else:
documents.append(_input)
if documents:
return Pinecone.from_documents(
documents=documents,

View file

@ -3,8 +3,10 @@ from typing import Optional, Union
from langchain.schema import BaseRetriever
from langchain_community.vectorstores import VectorStore
from langchain_community.vectorstores.qdrant import Qdrant
from langflow import CustomComponent
from langflow.field_typing import Document, Embeddings, NestedDict
from langflow.field_typing import Embeddings, NestedDict
from langflow.schema.schema import Record
class QdrantComponent(CustomComponent):
@ -14,17 +16,23 @@ class QdrantComponent(CustomComponent):
def build_config(self):
return {
"documents": {"display_name": "Documents"},
"inputs": {"display_name": "Input", "input_types": ["Document", "Record"]},
"embedding": {"display_name": "Embedding"},
"api_key": {"display_name": "API Key", "password": True, "advanced": True},
"collection_name": {"display_name": "Collection Name"},
"content_payload_key": {"display_name": "Content Payload Key", "advanced": True},
"content_payload_key": {
"display_name": "Content Payload Key",
"advanced": True,
},
"distance_func": {"display_name": "Distance Function", "advanced": True},
"grpc_port": {"display_name": "gRPC Port", "advanced": True},
"host": {"display_name": "Host", "advanced": True},
"https": {"display_name": "HTTPS", "advanced": True},
"location": {"display_name": "Location", "advanced": True},
"metadata_payload_key": {"display_name": "Metadata Payload Key", "advanced": True},
"metadata_payload_key": {
"display_name": "Metadata Payload Key",
"advanced": True,
},
"path": {"display_name": "Path", "advanced": True},
"port": {"display_name": "Port", "advanced": True},
"prefer_grpc": {"display_name": "Prefer gRPC", "advanced": True},
@ -38,7 +46,7 @@ class QdrantComponent(CustomComponent):
self,
embedding: Embeddings,
collection_name: str,
documents: Optional[Document] = None,
inputs: Optional[Record] = None,
api_key: Optional[str] = None,
content_payload_key: str = "page_content",
distance_func: str = "Cosine",
@ -55,6 +63,12 @@ class QdrantComponent(CustomComponent):
timeout: Optional[int] = None,
url: Optional[str] = None,
) -> Union[VectorStore, Qdrant, BaseRetriever]:
documents = []
for _input in inputs:
if isinstance(_input, Record):
documents.append(_input.to_lc_document())
else:
documents.append(_input)
if documents is None:
from qdrant_client import QdrantClient

View file

@ -3,9 +3,10 @@ from typing import Optional, Union
from langchain.embeddings.base import Embeddings
from langchain_community.vectorstores import VectorStore
from langchain_community.vectorstores.redis import Redis
from langchain_core.documents import Document
from langchain_core.retrievers import BaseRetriever
from langflow import CustomComponent
from langflow.schema.schema import Record
class RedisComponent(CustomComponent):
@ -28,7 +29,7 @@ class RedisComponent(CustomComponent):
return {
"index_name": {"display_name": "Index Name", "value": "your_index"},
"code": {"show": False, "display_name": "Code"},
"documents": {"display_name": "Documents", "is_list": True},
"inputs": {"display_name": "Input", "input_types": ["Document", "Record"]},
"embedding": {"display_name": "Embedding"},
"schema": {"display_name": "Schema", "file_types": [".yaml"]},
"redis_server_url": {
@ -44,7 +45,7 @@ class RedisComponent(CustomComponent):
redis_server_url: str,
redis_index_name: str,
schema: Optional[str] = None,
documents: Optional[Document] = None,
inputs: Optional[Record] = None,
) -> Union[VectorStore, BaseRetriever]:
"""
Builds the Vector Store or BaseRetriever object.
@ -58,9 +59,17 @@ class RedisComponent(CustomComponent):
Returns:
- VectorStore: The Vector Store object.
"""
if documents is None:
documents = []
for _input in inputs:
if isinstance(_input, Record):
documents.append(_input.to_lc_document())
else:
documents.append(_input)
if not documents:
if schema is None:
raise ValueError("If no documents are provided, a schema must be provided.")
raise ValueError(
"If no documents are provided, a schema must be provided."
)
redis_vs = Redis.from_existing_index(
embedding=embedding,
index_name=redis_index_name,

View file

@ -33,7 +33,7 @@ class RedisSearchComponent(RedisComponent, LCVectorStoreComponent):
"input_value": {"display_name": "Input"},
"index_name": {"display_name": "Index Name", "value": "your_index"},
"code": {"show": False, "display_name": "Code"},
"documents": {"display_name": "Documents", "is_list": True},
"embedding": {"display_name": "Embedding"},
"schema": {"display_name": "Schema", "file_types": [".yaml"]},
"redis_server_url": {

View file

@ -3,10 +3,12 @@ from typing import List, Union
from langchain.schema import BaseRetriever
from langchain_community.vectorstores import VectorStore
from langchain_community.vectorstores.supabase import SupabaseVectorStore
from langflow import CustomComponent
from langflow.field_typing import Document, Embeddings, NestedDict
from supabase.client import Client, create_client
from langflow import CustomComponent
from langflow.field_typing import Embeddings, NestedDict
from langflow.schema.schema import Record
class SupabaseComponent(CustomComponent):
display_name = "Supabase"
@ -14,7 +16,7 @@ class SupabaseComponent(CustomComponent):
def build_config(self):
return {
"documents": {"display_name": "Documents"},
"inputs": {"display_name": "Input", "input_types": ["Document", "Record"]},
"embedding": {"display_name": "Embedding"},
"query_name": {"display_name": "Query Name"},
"search_kwargs": {"display_name": "Search Kwargs", "advanced": True},
@ -26,14 +28,22 @@ class SupabaseComponent(CustomComponent):
def build(
self,
embedding: Embeddings,
documents: List[Document],
inputs: List[Record],
query_name: str = "",
search_kwargs: NestedDict = {},
supabase_service_key: str = "",
supabase_url: str = "",
table_name: str = "",
) -> Union[VectorStore, SupabaseVectorStore, BaseRetriever]:
supabase: Client = create_client(supabase_url, supabase_key=supabase_service_key)
supabase: Client = create_client(
supabase_url, supabase_key=supabase_service_key
)
documents = []
for _input in inputs:
if isinstance(_input, Record):
documents.append(_input.to_lc_document())
else:
documents.append(_input)
return SupabaseVectorStore.from_documents(
documents=documents,
embedding=embedding,

View file

@ -8,13 +8,16 @@ from langchain_community.vectorstores.vectara import Vectara
from langchain_core.vectorstores import VectorStore
from langflow import CustomComponent
from langflow.field_typing import BaseRetriever, Document
from langflow.field_typing import BaseRetriever
from langflow.schema.schema import Record
class VectaraComponent(CustomComponent):
display_name: str = "Vectara"
description: str = "Implementation of Vector Store using Vectara"
documentation = "https://python.langchain.com/docs/integrations/vectorstores/vectara"
documentation = (
"https://python.langchain.com/docs/integrations/vectorstores/vectara"
)
beta = True
icon = "Vectara"
field_config = {
@ -28,8 +31,9 @@ class VectaraComponent(CustomComponent):
"display_name": "Vectara API Key",
"password": True,
},
"documents": {
"display_name": "Documents",
"inputs": {
"display_name": "Input",
"input_types": ["Document", "Record"],
"info": "If provided, will be upserted to corpus (optional)",
},
"files_url": {
@ -44,11 +48,18 @@ class VectaraComponent(CustomComponent):
vectara_corpus_id: str,
vectara_api_key: str,
files_url: Optional[List[str]] = None,
documents: Optional[Document] = None,
inputs: Optional[Record] = None,
) -> Union[VectorStore, BaseRetriever]:
source = "Langflow"
if documents is not None:
documents = []
for _input in inputs:
if isinstance(_input, Record):
documents.append(_input.to_lc_document())
else:
documents.append(_input)
if documents:
return Vectara.from_documents(
documents=documents, # type: ignore
embedding=FakeEmbeddings(size=768),

View file

@ -33,10 +33,6 @@ class VectaraSearchComponent(VectaraComponent, LCVectorStoreComponent):
"display_name": "Vectara API Key",
"password": True,
},
"documents": {
"display_name": "Documents",
"info": "If provided, will be upserted to corpus (optional)",
},
"files_url": {
"display_name": "Files Url",
"info": "Make vectara object using url of files (optional)",

View file

@ -2,16 +2,19 @@ from typing import Optional, Union
import weaviate # type: ignore
from langchain.embeddings.base import Embeddings
from langchain.schema import BaseRetriever, Document
from langchain.schema import BaseRetriever
from langchain_community.vectorstores import VectorStore, Weaviate
from langflow import CustomComponent
from langflow.schema.schema import Record
class WeaviateVectorStoreComponent(CustomComponent):
display_name: str = "Weaviate"
description: str = "Implementation of Vector Store using Weaviate"
documentation = "https://python.langchain.com/docs/integrations/vectorstores/weaviate"
documentation = (
"https://python.langchain.com/docs/integrations/vectorstores/weaviate"
)
beta = True
field_config = {
"url": {"display_name": "Weaviate URL", "value": "http://localhost:8080"},
@ -30,7 +33,7 @@ class WeaviateVectorStoreComponent(CustomComponent):
"advanced": True,
"value": "text",
},
"documents": {"display_name": "Documents", "is_list": True},
"inputs": {"display_name": "Input", "input_types": ["Document", "Record"]},
"embedding": {"display_name": "Embedding"},
"attributes": {
"display_name": "Attributes",
@ -55,7 +58,7 @@ class WeaviateVectorStoreComponent(CustomComponent):
index_name: Optional[str] = None,
text_key: str = "text",
embedding: Optional[Embeddings] = None,
documents: Optional[Document] = None,
inputs: Optional[Record] = None,
attributes: Optional[list] = None,
) -> Union[VectorStore, BaseRetriever]:
if api_key:
@ -78,8 +81,14 @@ class WeaviateVectorStoreComponent(CustomComponent):
return pascal_case_word
index_name = _to_pascal_case(index_name) if index_name else None
documents = []
for _input in inputs:
if isinstance(_input, Record):
documents.append(_input.to_lc_document())
else:
documents.append(_input)
if documents is not None and embedding is not None:
if documents and embedding is not None:
return Weaviate.from_documents(
client=client,
index_name=index_name,

View file

@ -39,7 +39,6 @@ class WeaviateSearchVectorStore(WeaviateVectorStoreComponent, LCVectorStoreCompo
"advanced": True,
"value": "text",
},
"documents": {"display_name": "Documents", "is_list": True},
"embedding": {"display_name": "Embedding"},
"attributes": {
"display_name": "Attributes",

View file

@ -3,9 +3,10 @@ from typing import Optional, Union
from langchain.embeddings.base import Embeddings
from langchain_community.vectorstores import VectorStore
from langchain_community.vectorstores.pgvector import PGVector
from langchain_core.documents import Document
from langchain_core.retrievers import BaseRetriever
from langflow import CustomComponent
from langflow.schema.schema import Record
class PGVectorComponent(CustomComponent):
@ -15,7 +16,9 @@ class PGVectorComponent(CustomComponent):
display_name: str = "PGVector"
description: str = "Implementation of Vector Store using PostgreSQL"
documentation = "https://python.langchain.com/docs/integrations/vectorstores/pgvector"
documentation = (
"https://python.langchain.com/docs/integrations/vectorstores/pgvector"
)
def build_config(self):
"""
@ -26,7 +29,7 @@ class PGVectorComponent(CustomComponent):
"""
return {
"code": {"show": False},
"documents": {"display_name": "Documents", "is_list": True},
"inputs": {"display_name": "Input", "input_types": ["Document", "Record"]},
"embedding": {"display_name": "Embedding"},
"pg_server_url": {
"display_name": "PostgreSQL Server Connection String",
@ -40,7 +43,7 @@ class PGVectorComponent(CustomComponent):
embedding: Embeddings,
pg_server_url: str,
collection_name: str,
documents: Optional[Document] = None,
inputs: Optional[Record] = None,
) -> Union[VectorStore, BaseRetriever]:
"""
Builds the Vector Store or BaseRetriever object.
@ -55,6 +58,12 @@ class PGVectorComponent(CustomComponent):
- VectorStore: The Vector Store object.
"""
documents = []
for _input in inputs:
if isinstance(_input, Record):
documents.append(_input.to_lc_document())
else:
documents.append(_input)
try:
if documents is None:
vector_store = PGVector.from_existing_index(