Refactor vector store components

This commit is contained in:
Gabriel Luiz Freitas Almeida 2024-02-28 09:20:54 -03:00
commit 9d7f3dda5e
13 changed files with 556 additions and 50 deletions

View file

@ -0,0 +1,56 @@
from typing import List, Optional
from langchain_community.vectorstores.mongodb_atlas import MongoDBAtlasVectorSearch
from langflow import CustomComponent
from langflow.field_typing import Document, Embeddings, NestedDict
class MongoDBAtlasComponent(CustomComponent):
display_name = "MongoDB Atlas"
description = (
"Construct a `MongoDB Atlas Vector Search` vector store from raw documents."
)
def build_config(self):
return {
"documents": {"display_name": "Documents"},
"embedding": {"display_name": "Embedding"},
"collection_name": {"display_name": "Collection Name"},
"db_name": {"display_name": "Database Name"},
"index_name": {"display_name": "Index Name"},
"mongodb_atlas_cluster_uri": {"display_name": "MongoDB Atlas Cluster URI"},
"search_kwargs": {"display_name": "Search Kwargs", "advanced": True},
}
def build(
self,
embedding: Embeddings,
documents: List[Document] = None,
collection_name: str = "",
db_name: str = "",
index_name: str = "",
mongodb_atlas_cluster_uri: str = "",
search_kwargs: Optional[NestedDict] = None,
) -> MongoDBAtlasVectorSearch:
search_kwargs = search_kwargs or {}
if documents:
vector_store = MongoDBAtlasVectorSearch.from_documents(
documents=documents,
embedding=embedding,
collection_name=collection_name,
db_name=db_name,
index_name=index_name,
mongodb_atlas_cluster_uri=mongodb_atlas_cluster_uri,
search_kwargs=search_kwargs,
)
else:
vector_store = MongoDBAtlasVectorSearch(
embedding=embedding,
collection_name=collection_name,
db_name=db_name,
index_name=index_name,
mongodb_atlas_cluster_uri=mongodb_atlas_cluster_uri,
search_kwargs=search_kwargs,
)
return vector_store

View file

@ -1,22 +1,22 @@
from typing import List, Optional
from langchain_community.vectorstores import MongoDBAtlasVectorSearch
from langflow import CustomComponent
from langflow.field_typing import (
Document,
Embeddings,
NestedDict,
)
from langflow.components.vectorstores.base.model import LCVectorStoreComponent
from langflow.components.vectorstores.MongoDBAtlasVector import MongoDBAtlasComponent
from langflow.field_typing import Embeddings, NestedDict
from langflow.schema import Record
class MongoDBAtlasComponent(CustomComponent):
display_name = "MongoDB Atlas"
description = "Construct a `MongoDB Atlas Vector Search` vector store from raw documents."
class MongoDBAtlasSearchComponent(MongoDBAtlasComponent, LCVectorStoreComponent):
display_name = "MongoDB Atlas Search"
description = "Search a MongoDB Atlas Vector Store for similar documents."
def build_config(self):
return {
"documents": {"display_name": "Documents"},
"search_type": {
"display_name": "Search Type",
"options": ["Similarity", "MMR"],
},
"input_value": {"display_name": "Input"},
"embedding": {"display_name": "Embedding"},
"collection_name": {"display_name": "Collection Name"},
"db_name": {"display_name": "Database Name"},
@ -27,17 +27,16 @@ class MongoDBAtlasComponent(CustomComponent):
def build(
self,
documents: List[Document],
input_value: str,
search_type: str,
embedding: Embeddings,
collection_name: str = "",
db_name: str = "",
index_name: str = "",
mongodb_atlas_cluster_uri: str = "",
search_kwargs: Optional[NestedDict] = None,
) -> MongoDBAtlasVectorSearch:
search_kwargs = search_kwargs or {}
return MongoDBAtlasVectorSearch(
documents=documents,
) -> List[Record]:
vector_store = super().build(
embedding=embedding,
collection_name=collection_name,
db_name=db_name,
@ -45,3 +44,8 @@ class MongoDBAtlasComponent(CustomComponent):
mongodb_atlas_cluster_uri=mongodb_atlas_cluster_uri,
search_kwargs=search_kwargs,
)
if not vector_store:
raise ValueError("Failed to create MongoDB Atlas Vector Store")
return self.search_with_vector_store(
vector_store=vector_store, input_value=input_value, search_type=search_type
)

View file

@ -5,6 +5,7 @@ import pinecone # type: ignore
from langchain.schema import BaseRetriever
from langchain_community.vectorstores import VectorStore
from langchain_community.vectorstores.pinecone import Pinecone
from langflow import CustomComponent
from langflow.field_typing import Document, Embeddings
@ -12,6 +13,7 @@ from langflow.field_typing import Document, Embeddings
class PineconeComponent(CustomComponent):
display_name = "Pinecone"
description = "Construct Pinecone wrapper from raw documents."
icon = "Pinecone"
def build_config(self):
return {
@ -19,10 +21,23 @@ class PineconeComponent(CustomComponent):
"embedding": {"display_name": "Embedding"},
"index_name": {"display_name": "Index Name"},
"namespace": {"display_name": "Namespace"},
"pinecone_api_key": {"display_name": "Pinecone API Key", "default": "", "password": True, "required": True},
"pinecone_env": {"display_name": "Pinecone Environment", "default": "", "required": True},
"pinecone_api_key": {
"display_name": "Pinecone API Key",
"default": "",
"password": True,
"required": True,
},
"pinecone_env": {
"display_name": "Pinecone Environment",
"default": "",
"required": True,
},
"search_kwargs": {"display_name": "Search Kwargs", "default": "{}"},
"pool_threads": {"display_name": "Pool Threads", "default": 1, "advanced": True},
"pool_threads": {
"display_name": "Pool Threads",
"default": 1,
"advanced": True,
},
}
def build(

View file

@ -0,0 +1,70 @@
from typing import List, Optional
from langflow.components.vectorstores.base.model import LCVectorStoreComponent
from langflow.components.vectorstores.Pinecone import PineconeComponent
from langflow.field_typing import Embeddings
from langflow.schema import Record
class PineconeSearchComponent(PineconeComponent, LCVectorStoreComponent):
display_name = "Pinecone Search"
description = "Search a Pinecone Vector Store for similar documents."
icon = "Pinecone"
def build_config(self):
return {
"search_type": {
"display_name": "Search Type",
"options": ["Similarity", "MMR"],
},
"input_value": {"display_name": "Input"},
"embedding": {"display_name": "Embedding"},
"index_name": {"display_name": "Index Name"},
"namespace": {"display_name": "Namespace"},
"pinecone_api_key": {
"display_name": "Pinecone API Key",
"default": "",
"password": True,
"required": True,
},
"pinecone_env": {
"display_name": "Pinecone Environment",
"default": "",
"required": True,
},
"search_kwargs": {"display_name": "Search Kwargs", "default": "{}"},
"pool_threads": {
"display_name": "Pool Threads",
"default": 1,
"advanced": True,
},
}
def build(
self,
input_value: str,
embedding: Embeddings,
pinecone_env: str,
text_key: str = "text",
pool_threads: int = 4,
index_name: Optional[str] = None,
pinecone_api_key: Optional[str] = None,
namespace: Optional[str] = "default",
search_type: str = "similarity",
) -> List[Record]:
vector_store = super().build(
embedding=embedding,
pinecone_env=pinecone_env,
documents=[],
text_key=text_key,
pool_threads=pool_threads,
index_name=index_name,
pinecone_api_key=pinecone_api_key,
namespace=namespace,
)
if not vector_store:
raise ValueError("Failed to load the Pinecone index.")
return self.search_with_vector_store(
vector_store=vector_store, input_value=input_value, search_type=search_type
)

View file

@ -0,0 +1,91 @@
from typing import List, Optional
from langflow.components.vectorstores.base.model import LCVectorStoreComponent
from langflow.components.vectorstores.Qdrant import QdrantComponent
from langflow.field_typing import Embeddings, NestedDict
from langflow.schema import Record
class QdrantSearchComponent(QdrantComponent, LCVectorStoreComponent):
display_name = "Qdrant"
description = "Construct Qdrant wrapper from a list of texts."
def build_config(self):
return {
"search_type": {
"display_name": "Search Type",
"options": ["Similarity", "MMR"],
},
"input_value": {"display_name": "Input"},
"embedding": {"display_name": "Embedding"},
"api_key": {"display_name": "API Key", "password": True, "advanced": True},
"collection_name": {"display_name": "Collection Name"},
"content_payload_key": {
"display_name": "Content Payload Key",
"advanced": True,
},
"distance_func": {"display_name": "Distance Function", "advanced": True},
"grpc_port": {"display_name": "gRPC Port", "advanced": True},
"host": {"display_name": "Host", "advanced": True},
"https": {"display_name": "HTTPS", "advanced": True},
"location": {"display_name": "Location", "advanced": True},
"metadata_payload_key": {
"display_name": "Metadata Payload Key",
"advanced": True,
},
"path": {"display_name": "Path", "advanced": True},
"port": {"display_name": "Port", "advanced": True},
"prefer_grpc": {"display_name": "Prefer gRPC", "advanced": True},
"prefix": {"display_name": "Prefix", "advanced": True},
"search_kwargs": {"display_name": "Search Kwargs", "advanced": True},
"timeout": {"display_name": "Timeout", "advanced": True},
"url": {"display_name": "URL", "advanced": True},
}
def build(
self,
input_value: str,
embedding: Embeddings,
collection_name: str,
search_type: str = "similarity",
api_key: Optional[str] = None,
content_payload_key: str = "page_content",
distance_func: str = "Cosine",
grpc_port: int = 6334,
https: bool = False,
host: Optional[str] = None,
location: Optional[str] = None,
metadata_payload_key: str = "metadata",
path: Optional[str] = None,
port: Optional[int] = 6333,
prefer_grpc: bool = False,
prefix: Optional[str] = None,
search_kwargs: Optional[NestedDict] = None,
timeout: Optional[int] = None,
url: Optional[str] = None,
) -> List[Record]:
vector_store = super().build(
embedding=embedding,
collection_name=collection_name,
api_key=api_key,
content_payload_key=content_payload_key,
distance_func=distance_func,
grpc_port=grpc_port,
https=https,
host=host,
location=location,
metadata_payload_key=metadata_payload_key,
path=path,
port=port,
prefer_grpc=prefer_grpc,
prefix=prefix,
search_kwargs=search_kwargs,
timeout=timeout,
url=url,
)
if not vector_store:
raise ValueError("Failed to load the Qdrant index.")
return self.search_with_vector_store(
vector_store=vector_store, input_value=input_value, search_type=search_type
)

View file

@ -0,0 +1,77 @@
from typing import List, Optional
from langchain.embeddings.base import Embeddings
from langflow.components.vectorstores.base.model import LCVectorStoreComponent
from langflow.components.vectorstores.Redis import RedisComponent
from langflow.schema import Record
class RedisSearchComponent(RedisComponent, LCVectorStoreComponent):
"""
A custom component for implementing a Vector Store using Redis.
"""
display_name: str = "Redis Search"
description: str = "Search a Redis Vector Store for similar documents."
documentation = "https://python.langchain.com/docs/integrations/vectorstores/redis"
beta = True
def build_config(self):
"""
Builds the configuration for the component.
Returns:
- dict: A dictionary containing the configuration options for the component.
"""
return {
"search_type": {
"display_name": "Search Type",
"options": ["Similarity", "MMR"],
},
"input_value": {"display_name": "Input"},
"index_name": {"display_name": "Index Name", "value": "your_index"},
"code": {"show": False, "display_name": "Code"},
"documents": {"display_name": "Documents", "is_list": True},
"embedding": {"display_name": "Embedding"},
"schema": {"display_name": "Schema", "file_types": [".yaml"]},
"redis_server_url": {
"display_name": "Redis Server Connection String",
"advanced": False,
},
"redis_index_name": {"display_name": "Redis Index", "advanced": False},
}
def build(
self,
input_value: str,
search_type: str,
embedding: Embeddings,
redis_server_url: str,
redis_index_name: str,
schema: Optional[str] = None,
) -> List[Record]:
"""
Builds the Vector Store or BaseRetriever object.
Args:
- embedding (Embeddings): The embeddings to use for the Vector Store.
- documents (Optional[Document]): The documents to use for the Vector Store.
- redis_index_name (str): The name of the Redis index.
- redis_server_url (str): The URL for the Redis server.
Returns:
- VectorStore: The Vector Store object.
"""
vector_store = super().build(
embedding=embedding,
redis_server_url=redis_server_url,
redis_index_name=redis_index_name,
schema=schema,
)
if not vector_store:
raise ValueError("Failed to load the Redis index.")
return self.search_with_vector_store(
input_value=input_value, search_type=search_type, vector_store=vector_store
)

View file

@ -0,0 +1,49 @@
from typing import List
from langchain_community.vectorstores.supabase import SupabaseVectorStore
from supabase.client import Client, create_client
from langflow.components.vectorstores.base.model import LCVectorStoreComponent
from langflow.field_typing import Embeddings
from langflow.schema import Record
class SupabaseSearchComponent(LCVectorStoreComponent):
display_name = "Supabase Search"
description = "Search a Supabase Vector Store for similar documents."
def build_config(self):
return {
"search_type": {
"display_name": "Search Type",
"options": ["Similarity", "MMR"],
},
"input_value": {"display_name": "Input"},
"embedding": {"display_name": "Embedding"},
"query_name": {"display_name": "Query Name"},
"search_kwargs": {"display_name": "Search Kwargs", "advanced": True},
"supabase_service_key": {"display_name": "Supabase Service Key"},
"supabase_url": {"display_name": "Supabase URL"},
"table_name": {"display_name": "Table Name", "advanced": True},
}
def build(
self,
input_value: str,
search_type: str,
embedding: Embeddings,
query_name: str = "",
supabase_service_key: str = "",
supabase_url: str = "",
table_name: str = "",
) -> List[Record]:
supabase: Client = create_client(
supabase_url, supabase_key=supabase_service_key
)
vector_store = SupabaseVectorStore(
client=supabase,
embedding=embedding,
table_name=table_name,
query_name=query_name,
)
return self.search_with_vector_store(input_value, search_type, vector_store)

View file

@ -8,12 +8,15 @@ from langchain_community.vectorstores.vectara import Vectara
from langchain_core.vectorstores import VectorStore
from langflow import CustomComponent
from langflow.field_typing import BaseRetriever, Document
from langchain_community.vectorstores.vectara import Vectara
class VectaraComponent(CustomComponent):
display_name: str = "Vectara"
description: str = "Implementation of Vector Store using Vectara"
documentation = "https://python.langchain.com/docs/integrations/vectorstores/vectara"
documentation = (
"https://python.langchain.com/docs/integrations/vectorstores/vectara"
)
beta = True
field_config = {
"vectara_customer_id": {
@ -26,7 +29,10 @@ class VectaraComponent(CustomComponent):
"display_name": "Vectara API Key",
"password": True,
},
"documents": {"display_name": "Documents", "info": "If provided, will be upserted to corpus (optional)"},
"documents": {
"display_name": "Documents",
"info": "If provided, will be upserted to corpus (optional)",
},
"files_url": {
"display_name": "Files Url",
"info": "Make vectara object using url of files (optional)",

View file

@ -0,0 +1,64 @@
from typing import List
from langchain_community.vectorstores.vectara import Vectara
from langflow.components.vectorstores.base.model import LCVectorStoreComponent
from langflow.components.vectorstores.Vectara import VectaraComponent
from langflow.schema import Record
class VectaraSearchComponent(VectaraComponent, LCVectorStoreComponent):
display_name: str = "Vectara Search"
description: str = "Search a Vectara Vector Store for similar documents."
documentation = (
"https://python.langchain.com/docs/integrations/vectorstores/vectara"
)
beta = True
field_config = {
"search_type": {
"display_name": "Search Type",
"options": ["Similarity", "MMR"],
},
"input_value": {"display_name": "Input"},
"vectara_customer_id": {
"display_name": "Vectara Customer ID",
},
"vectara_corpus_id": {
"display_name": "Vectara Corpus ID",
},
"vectara_api_key": {
"display_name": "Vectara API Key",
"password": True,
},
"documents": {
"display_name": "Documents",
"info": "If provided, will be upserted to corpus (optional)",
},
"files_url": {
"display_name": "Files Url",
"info": "Make vectara object using url of files (optional)",
},
}
def build(
self,
input_value: str,
search_type: str,
vectara_customer_id: str,
vectara_corpus_id: str,
vectara_api_key: str,
) -> List[Record]:
source = "Langflow"
vector_store = Vectara(
vectara_customer_id=vectara_customer_id,
vectara_corpus_id=vectara_corpus_id,
vectara_api_key=vectara_api_key,
source=source,
)
if not vector_store:
raise ValueError("Failed to create Vectara Vector Store")
return self.search_with_vector_store(
vector_store=vector_store, input_value=input_value, search_type=search_type
)

View file

@ -8,10 +8,12 @@ from langchain_community.vectorstores import VectorStore, Weaviate
from langflow import CustomComponent
class WeaviateVectorStore(CustomComponent):
class WeaviateVectorStoreComponent(CustomComponent):
display_name: str = "Weaviate"
description: str = "Implementation of Vector Store using Weaviate"
documentation = "https://python.langchain.com/docs/integrations/vectorstores/weaviate"
documentation = (
"https://python.langchain.com/docs/integrations/vectorstores/weaviate"
)
beta = True
field_config = {
"url": {"display_name": "Weaviate URL", "value": "http://localhost:8080"},
@ -24,7 +26,12 @@ class WeaviateVectorStore(CustomComponent):
"display_name": "Index name",
"required": False,
},
"text_key": {"display_name": "Text Key", "required": False, "advanced": True, "value": "text"},
"text_key": {
"display_name": "Text Key",
"required": False,
"advanced": True,
"value": "text",
},
"documents": {"display_name": "Documents", "is_list": True},
"embedding": {"display_name": "Embedding"},
"attributes": {
@ -34,7 +41,11 @@ class WeaviateVectorStore(CustomComponent):
"field_type": "str",
"advanced": True,
},
"search_by_text": {"display_name": "Search By Text", "field_type": "bool", "advanced": True},
"search_by_text": {
"display_name": "Search By Text",
"field_type": "bool",
"advanced": True,
},
"code": {"show": False},
}

View file

@ -0,0 +1,82 @@
from typing import List, Optional
from langchain.embeddings.base import Embeddings
from langflow.components.vectorstores.base.model import LCVectorStoreComponent
from langflow.components.vectorstores.Weaviate import WeaviateVectorStoreComponent
from langflow.schema import Record
class WeaviateSearchVectorStore(WeaviateVectorStoreComponent, LCVectorStoreComponent):
display_name: str = "Weaviate Search"
description: str = "Search a Weaviate Vector Store for similar documents."
documentation = (
"https://python.langchain.com/docs/integrations/vectorstores/weaviate"
)
beta = True
field_config = {
"search_type": {
"display_name": "Search Type",
"options": ["Similarity", "MMR"],
},
"input_value": {"display_name": "Input"},
"url": {"display_name": "Weaviate URL", "value": "http://localhost:8080"},
"api_key": {
"display_name": "API Key",
"password": True,
"required": False,
},
"index_name": {
"display_name": "Index name",
"required": False,
},
"text_key": {
"display_name": "Text Key",
"required": False,
"advanced": True,
"value": "text",
},
"documents": {"display_name": "Documents", "is_list": True},
"embedding": {"display_name": "Embedding"},
"attributes": {
"display_name": "Attributes",
"required": False,
"is_list": True,
"field_type": "str",
"advanced": True,
},
"search_by_text": {
"display_name": "Search By Text",
"field_type": "bool",
"advanced": True,
},
"code": {"show": False},
}
def build(
self,
input_value: str,
search_type: str,
url: str,
search_by_text: bool = False,
api_key: Optional[str] = None,
index_name: Optional[str] = None,
text_key: str = "text",
embedding: Optional[Embeddings] = None,
attributes: Optional[list] = None,
) -> List[Record]:
vector_store = super().build(
url=url,
api_key=api_key,
index_name=index_name,
text_key=text_key,
embedding=embedding,
attributes=attributes,
search_by_text=search_by_text,
)
if not vector_store:
raise ValueError("Failed to load the Weaviate index.")
return self.search_with_vector_store(
vector_store=vector_store, input_value=input_value, search_type=search_type
)

View file

@ -1,13 +1,13 @@
from typing import List, Optional
from langchain.embeddings.base import Embeddings
from langchain_community.vectorstores.pgvector import PGVector
from langflow.components.vectorstores.base.model import LCVectorStoreComponent
from langflow.components.vectorstores.pgvector import PGVectorComponent
from langflow.schema import Record
class PGVectorSearchComponent(LCVectorStoreComponent):
class PGVectorSearchComponent(PGVectorComponent, LCVectorStoreComponent):
"""
A custom component for implementing a Vector Store using PostgreSQL.
"""
@ -60,14 +60,12 @@ class PGVectorSearchComponent(LCVectorStoreComponent):
Returns:
- VectorStore: The Vector Store object.
"""
try:
vector_store = PGVector.from_existing_index(
vector_store = super().build(
embedding=embedding,
pg_server_url=pg_server_url,
collection_name=collection_name,
connection_string=pg_server_url,
)
except Exception as e:
raise RuntimeError(f"Failed to build PGVector: {e}")
return self.search_with_vector_store(

View file

@ -218,24 +218,7 @@ retrievers:
# https://github.com/supabase-community/supabase-py/issues/482
# ZepRetriever:
# documentation: "https://python.langchain.com/docs/modules/data_connection/retrievers/integrations/zep_memorystore"
vectorstores:
# Chroma:
# documentation: "https://python.langchain.com/docs/modules/data_connection/vectorstores/integrations/chroma"
Qdrant:
documentation: "https://python.langchain.com/docs/modules/data_connection/vectorstores/integrations/qdrant"
FAISS:
documentation: "https://python.langchain.com/docs/modules/data_connection/vectorstores/integrations/faiss"
Pinecone:
documentation: "https://python.langchain.com/docs/modules/data_connection/vectorstores/integrations/pinecone"
ElasticsearchStore:
documentation: "https://python.langchain.com/docs/modules/data_connection/vectorstores/integrations/elasticsearch"
SupabaseVectorStore:
documentation: "https://python.langchain.com/docs/modules/data_connection/vectorstores/integrations/supabase"
MongoDBAtlasVectorSearch:
documentation: "https://python.langchain.com/docs/modules/data_connection/vectorstores/integrations/mongodb_atlas"
# Requires docarray >=0.32.0 but langchain-serve requires jina 3.15.2 which doesn't support docarray >=0.32.0
# DocArrayInMemorySearch:
# documentation: "https://python.langchain.com/docs/modules/data_connection/vectorstores/integrations/docarray_in_memory"
wrappers:
RequestsWrapper:
documentation: ""