Add support for Couchbase vector store (#1901)

* add couchbase vector store support

* add docs + minor changes

* Fix lint issues

* remove stray lines

* Add required validation and minor changes

* Address Comments

---------

Co-authored-by: Gabriel Luiz Freitas Almeida <gabriel@langflow.org>
This commit is contained in:
Prajwal Pai 2024-05-23 18:36:21 +05:30 committed by GitHub
commit 19680bb137
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 281 additions and 4 deletions

View file

@ -26,7 +26,8 @@ The `Astra DB` initializes a vector store using Astra DB from records. It create
- **Collection Indexing Policy:** Indexing policy for the collection.
<Admonition type="note" title="Note">
Ensure you configure the necessary Astra DB token and API endpoint before starting.
Ensure you configure the necessary Astra DB token and API endpoint before
starting.
</Admonition>
---
@ -96,6 +97,44 @@ For detailed documentation and integration guides, please refer to the [Chroma C
---
### Couchbase
`Couchbase` builds a Couchbase vector store from records, streamlining the storage and retrieval of documents.
**Parameters:**
- **Embedding:** Model used by Couchbase.
- **Input:** Documents or records.
- **Couchbase Cluster Connection String:** Cluster Connection string.
- **Couchbase Cluster Username:** Cluster Username.
- **Couchbase Cluster Password:** Cluster Password.
- **Bucket Name:** Bucket identifier in Couchbase.
- **Scope Name:** Scope identifier in Couchbase.
- **Collection Name:** Collection identifier in Couchbase.
- **Index Name:** Index identifier.
For detailed documentation and integration guides, please refer to the [Couchbase Component Documentation](https://python.langchain.com/docs/integrations/vectorstores/couchbase).
---
### Couchbase Search
`CouchbaseSearch` leverages the Couchbase component to search for documents based on similarity metric.
**Parameters:**
- **Input:** Search query.
- **Embedding:** Model used in the Vector Store.
- **Couchbase Cluster Connection String:** Cluster Connection string.
- **Couchbase Cluster Username:** Cluster Username.
- **Couchbase Cluster Password:** Cluster Password.
- **Bucket Name:** Bucket identifier.
- **Scope Name:** Scope identifier.
- **Collection Name:** Collection identifier in Couchbase.
- **Index Name:** Index identifier.
---
### FAISS
The `FAISS` component manages document ingestion into a FAISS Vector Store, optimizing document indexing and retrieval.
@ -278,7 +317,8 @@ For more details, see the [PGVector Component Documentation](https://python.lang
For detailed documentation, refer to the [Redis Documentation](https://python.langchain.com/docs/integrations/vectorstores/redis).
<Admonition type="note" title="Note">
Ensure the Redis server URL and index name are configured correctly. Provide a schema if no documents are available.
Ensure the Redis server URL and index name are configured correctly. Provide a
schema if no documents are available.
</Admonition>
---
@ -389,7 +429,8 @@ For more information, consult the [Vectara Component Documentation](https://pyth
For more details, see the [Weaviate Component Documentation](https://python.langchain.com/docs/integrations/vectorstores/weaviate).
<Admonition type="note" title="Note">
Ensure Weaviate instance is running and accessible. Verify API key, index name, text key, and attributes are set correctly.
Ensure Weaviate instance is running and accessible. Verify API key, index
name, text key, and attributes are set correctly.
</Admonition>
---

36
poetry.lock generated
View file

@ -1259,6 +1259,40 @@ files = [
test = ["PyYAML", "mock", "pytest"]
yaml = ["PyYAML"]
[[package]]
name = "couchbase"
version = "4.2.1"
description = "Python Client for Couchbase"
optional = false
python-versions = ">=3.7"
files = [
{file = "couchbase-4.2.1-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:7ad4c4462879f456a9067ac1788e62d852509439bac3538b9bc459a754666481"},
{file = "couchbase-4.2.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:06d91891c599ba0f5052e594ac025a2ca6ab7885e528b854ac9c125df7c74146"},
{file = "couchbase-4.2.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0191d4a631ead533551cb9a214704ad5f3dfff2029e21a23b57725a0b5666b25"},
{file = "couchbase-4.2.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b206790d6834a18c5e457f9a70f44774f476f3acccf9f22e8c1b5283a5bd03fa"},
{file = "couchbase-4.2.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c5ca571b9ce017ecbd447de12cd46e213f93e0664bec6fca0a06e1768db1a4f8"},
{file = "couchbase-4.2.1-cp310-cp310-win_amd64.whl", hash = "sha256:675c615cfd4b04e73e94cf03c786da5105d94527f5c3a087813dba477a1379e9"},
{file = "couchbase-4.2.1-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:4cd09eedf162dc28386d9c6490e832c25068406c0f5d70a0417c0b1445394651"},
{file = "couchbase-4.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:dfebb11551c6d947ce6297ab02b5006b1ac8739dda3e10d41896db0dc8672915"},
{file = "couchbase-4.2.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:39e742ccfe90a0e59e6e1b0e12f0fe224a736c0207b218ef48048052f926e1c6"},
{file = "couchbase-4.2.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f9ba24efddf47f30603275f5433434d8759a55233c78b3e4bc613c502ac429e9"},
{file = "couchbase-4.2.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:adfca3929f07fb4385dc52f08d3a60634012f364b176f95ab023cdd1bb7fe9c0"},
{file = "couchbase-4.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:e1c68b28c6f0475961afb9fe626ad2bac8a5643b53f719675386f060db4b6e19"},
{file = "couchbase-4.2.1-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:137512462426cd495954c1815d78115d109308a4d9f8843b638285104388a359"},
{file = "couchbase-4.2.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5987e5edcce7696e5f75b35be91f44fa69fb5eb95dba0957ad66f789affcdb36"},
{file = "couchbase-4.2.1-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:080cb0fc333bd4a641ede4ee14ff0c7dbe95067fbb280826ea546681e0b9f9e3"},
{file = "couchbase-4.2.1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e317c2628a4a917083e8e7ce8e2662432b6a12ebac65fc00de6da2b37ab5975c"},
{file = "couchbase-4.2.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:de7f8699ae344e2e96706ee0eac67e96bfdd3412fb18dcfb81d8ba5837dd3dfb"},
{file = "couchbase-4.2.1-cp38-cp38-win_amd64.whl", hash = "sha256:82b9deb8b1fe8e8d7dde9c232ac5f4c11ff0f067930837af0e7769706e6a9453"},
{file = "couchbase-4.2.1-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:44502d069ea17a8d692b7c88d84bc0df2cf4e944cde337c8eb3175bc0b835bb9"},
{file = "couchbase-4.2.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c0f131b816a7d91b755232872ba10f6d6ca5a715e595ee9534478bc97a518ae8"},
{file = "couchbase-4.2.1-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e9b9deb312bbe5f9a8e63828f9de877714c4b09b7d88f7dc87b60e5ffb2a13e6"},
{file = "couchbase-4.2.1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:71e8da251850d795975c3569c01d35ba1a556825dc7d9549ff9918d148255804"},
{file = "couchbase-4.2.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d04492144ce520c612a2f8f265278c9f0cdf62fdd6f703e7a3210a7476b228f6"},
{file = "couchbase-4.2.1-cp39-cp39-win_amd64.whl", hash = "sha256:3f91b7699ea7b8253cf34c9fb6e191de9b2edfd7aa4d6f97b29c10b9a1670444"},
{file = "couchbase-4.2.1.tar.gz", hash = "sha256:dc1c60d3f2fc179db8225aac4cc30d601d73cf2535aaf023d607e86be2d7dd78"},
]
[[package]]
name = "coverage"
version = "7.5.1"
@ -10224,5 +10258,5 @@ local = ["ctransformers", "llama-cpp-python", "sentence-transformers"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.10,<3.13"
content-hash = "9e4b7f3e920342839dd51795994bdd6b993b6d0313fc715c900c4e0409acceef"
content-hash = "c68a14e273363466c1cea2bacec1798a6894d2bb18cf79ba443dc65c9344c57d"

View file

@ -83,6 +83,7 @@ langchain-google-vertexai = "^1.0.3"
langchain-groq = "^0.1.3"
langchain-pinecone = "^0.1.0"
langchain-mistralai = "^0.1.6"
couchbase = "^4.2.1"
[tool.poetry.group.dev.dependencies]

View file

@ -0,0 +1,73 @@
from typing import List, Optional
from langflow.components.vectorstores.base.model import LCVectorStoreComponent
from langflow.components.vectorstores.Couchbase import CouchbaseComponent
from langflow.field_typing import Embeddings, NestedDict, Text
from langflow.schema import Record
class CouchbaseSearchComponent(LCVectorStoreComponent):
display_name = "Couchbase Search"
description = "Search a Couchbase Vector Store for similar documents."
documentation = "https://python.langchain.com/docs/integrations/vectorstores/couchbase"
icon = "Couchbase"
field_order = [
"couchbase_connection_string",
"couchbase_username",
"couchbase_password",
"bucket_name",
"scope_name",
"collection_name",
"index_name",
]
def build_config(self):
return {
"input_value": {"display_name": "Input"},
"embedding": {"display_name": "Embedding"},
"couchbase_connection_string": {"display_name": "Couchbase Cluster connection string","required": True},
"couchbase_username": {"display_name": "Couchbase username","required": True},
"couchbase_password": {
"display_name": "Couchbase password",
"password": True,
"required": True
},
"bucket_name": {"display_name": "Bucket Name","required": True},
"scope_name": {"display_name": "Scope Name","required": True},
"collection_name": {"display_name": "Collection Name","required": True},
"index_name": {"display_name": "Index Name","required": True},
"number_of_results": {
"display_name": "Number of Results",
"info": "Number of results to return.",
"advanced": True,
},
}
def build( # type: ignore[override]
self,
input_value: Text,
embedding: Embeddings,
number_of_results: int = 4,
bucket_name: str = "",
scope_name: str = "",
collection_name: str = "",
index_name: str = "",
couchbase_connection_string: str = "",
couchbase_username: str = "",
couchbase_password: str = "",
) -> List[Record]:
vector_store = CouchbaseComponent().build(
couchbase_connection_string=couchbase_connection_string,
couchbase_username=couchbase_username,
couchbase_password=couchbase_password,
bucket_name=bucket_name,
scope_name=scope_name,
collection_name=collection_name,
embedding=embedding,
index_name=index_name,
)
if not vector_store:
raise ValueError("Failed to create Couchbase Vector Store")
return self.search_with_vector_store(
vector_store=vector_store, input_value=input_value, search_type="similarity", k=number_of_results
)

View file

@ -9,10 +9,12 @@ from .SupabaseVectorStoreSearch import SupabaseSearchComponent
from .VectaraSearch import VectaraSearchComponent
from .WeaviateSearch import WeaviateSearchVectorStore
from .pgvectorSearch import PGVectorSearchComponent
from .Couchbase import CouchbaseSearchComponent # type: ignore
__all__ = [
"AstraDBSearchComponent",
"ChromaSearchComponent",
"CouchbaseSearchComponent",
"FAISSSearchComponent",
"MongoDBAtlasSearchComponent",
"PineconeSearchComponent",

View file

@ -0,0 +1,95 @@
from typing import List, Optional, Union
from langchain.schema import BaseRetriever
from langchain_community.vectorstores import CouchbaseVectorStore
from langflow.custom import CustomComponent
from langflow.field_typing import Embeddings, VectorStore
from langflow.schema import Record
from datetime import timedelta
from couchbase.auth import PasswordAuthenticator # type: ignore
from couchbase.cluster import Cluster # type: ignore
from couchbase.options import ClusterOptions # type: ignore
class CouchbaseComponent(CustomComponent):
display_name = "Couchbase"
description = "Construct a `Couchbase Vector Search` vector store from raw documents."
documentation = "https://python.langchain.com/docs/integrations/vectorstores/couchbase"
icon = "Couchbase"
field_order = [
"couchbase_connection_string",
"couchbase_username",
"couchbase_password",
"bucket_name",
"scope_name",
"collection_name",
"index_name",
]
def build_config(self):
return {
"inputs": {"display_name": "Input", "input_types": ["Document", "Record"]},
"embedding": {"display_name": "Embedding"},
"couchbase_connection_string": {"display_name": "Couchbase Cluster connection string","required": True},
"couchbase_username": {"display_name": "Couchbase username","required": True},
"couchbase_password": {
"display_name": "Couchbase password",
"password": True,
"required": True
},
"bucket_name": {"display_name": "Bucket Name","required": True},
"scope_name": {"display_name": "Scope Name","required": True},
"collection_name": {"display_name": "Collection Name","required": True},
"index_name": {"display_name": "Index Name","required": True},
}
def build(
self,
embedding: Embeddings,
inputs: Optional[List[Record]] = None,
bucket_name: str = "",
scope_name: str = "",
collection_name: str = "",
index_name: str = "",
couchbase_connection_string: str = "",
couchbase_username: str = "",
couchbase_password: str = "",
) -> Union[VectorStore, BaseRetriever]:
try:
auth = PasswordAuthenticator(couchbase_username, couchbase_password)
options = ClusterOptions(auth)
cluster = Cluster(couchbase_connection_string, options)
cluster.wait_until_ready(timedelta(seconds=5))
except Exception as e:
raise ValueError(f"Failed to connect to Couchbase: {e}")
documents = []
for _input in inputs or []:
if isinstance(_input, Record):
documents.append(_input.to_lc_document())
else:
documents.append(_input)
if documents:
vector_store = CouchbaseVectorStore.from_documents(
documents=documents,
cluster=cluster,
bucket_name=bucket_name,
scope_name=scope_name,
collection_name=collection_name,
embedding=embedding,
index_name=index_name,
)
else:
vector_store = CouchbaseVectorStore(
cluster=cluster,
bucket_name=bucket_name,
scope_name=scope_name,
collection_name=collection_name,
embedding=embedding,
index_name=index_name,
)
return vector_store

View file

@ -9,10 +9,12 @@ from .SupabaseVectorStore import SupabaseComponent
from .Vectara import VectaraComponent
from .Weaviate import WeaviateVectorStoreComponent
from .pgvector import PGVectorComponent
from .Couchbase import CouchbaseComponent
__all__ = [
"AstraDBVectorStoreComponent",
"ChromaComponent",
"CouchbaseComponent",
"FAISSComponent",
"MongoDBAtlasComponent",
"PineconeComponent",

View file

@ -0,0 +1,17 @@
const SvgCouchbaseIcon = (props) => (
<svg
xmlns="http://www.w3.org/2000/svg"
width="1em"
height="1em"
preserveAspectRatio="xMidYMid"
viewBox="0 0 256 256"
{...props}
>
<path
fill="#ED2226"
d="M128 0C57.426 0 0 57.233 0 128c0 70.574 57.233 128 128 128 70.574 0 128-57.233 128-128S198.574 0 128 0zm86.429 150.429c0 7.734-4.447 14.502-13.148 16.048-15.082 2.707-46.792 4.254-73.281 4.254-26.49 0-58.2-1.547-73.281-4.254-8.7-1.546-13.148-8.314-13.148-16.048v-49.885c0-7.734 5.994-14.888 13.148-16.049 4.447-.773 14.888-1.546 23.01-1.546 3.093 0 5.606 2.32 5.606 5.994v34.997l44.858-.967 44.858.967V88.943c0-3.674 2.514-5.994 5.608-5.994 8.12 0 18.562.773 23.009 1.546 7.347 1.16 13.148 8.315 13.148 16.049-.387 16.435-.387 33.257-.387 49.885z"
/>
</svg>
);
export default SvgCouchbaseIcon;

View file

@ -0,0 +1 @@
<svg xmlns="http://www.w3.org/2000/svg" width="2500" height="2500" preserveAspectRatio="xMidYMid" viewBox="0 0 256 256" id="couchbase"><path fill="#ED2226" d="M128 0C57.426 0 0 57.233 0 128c0 70.574 57.233 128 128 128 70.574 0 128-57.233 128-128S198.574 0 128 0zm86.429 150.429c0 7.734-4.447 14.502-13.148 16.048-15.082 2.707-46.792 4.254-73.281 4.254-26.49 0-58.2-1.547-73.281-4.254-8.7-1.546-13.148-8.314-13.148-16.048v-49.885c0-7.734 5.994-14.888 13.148-16.049 4.447-.773 14.888-1.546 23.01-1.546 3.093 0 5.606 2.32 5.606 5.994v34.997l44.858-.967 44.858.967V88.943c0-3.674 2.514-5.994 5.608-5.994 8.12 0 18.562.773 23.009 1.546 7.347 1.16 13.148 8.315 13.148 16.049-.387 16.435-.387 33.257-.387 49.885z"></path></svg>

After

Width:  |  Height:  |  Size: 720 B

View file

@ -0,0 +1,9 @@
import React, { forwardRef } from "react";
import SvgCouchbaseIcon from "./Couchbase";
export const CouchbaseIcon = forwardRef<
SVGSVGElement,
React.PropsWithChildren<{}>
>((props, ref) => {
return <SvgCouchbaseIcon ref={ref} {...props} />;
});

View file

@ -153,6 +153,7 @@ import { AzureIcon } from "../icons/Azure";
import { BingIcon } from "../icons/Bing";
import { BotMessageSquareIcon } from "../icons/BotMessageSquare";
import { ChromaIcon } from "../icons/ChromaIcon";
import { CouchbaseIcon } from "../icons/Couchbase";
import { CohereIcon } from "../icons/Cohere";
import { ElasticsearchIcon } from "../icons/ElasticsearchStore";
import { EvernoteIcon } from "../icons/Evernote";
@ -324,6 +325,7 @@ export const nodeIconsLucide: iconsType = {
Vectara: VectaraIcon,
ArrowUpToLine: ArrowUpToLine,
Chroma: ChromaIcon,
Couchbase: CouchbaseIcon,
AirbyteJSONLoader: AirbyteIcon,
AmazonBedrockEmbeddings: AWSIcon,
Amazon: AWSIcon,