From 1f5f5d7006f20e0a4f86e6dec926f33891304618 Mon Sep 17 00:00:00 2001 From: Gabriel Luiz Freitas Almeida Date: Wed, 7 Feb 2024 19:42:47 -0300 Subject: [PATCH] Add ChromaSearch component for implementing a Vector Store using Chroma --- .../components/vectorstores/ChromaSearch.py | 117 ++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 src/backend/langflow/components/vectorstores/ChromaSearch.py diff --git a/src/backend/langflow/components/vectorstores/ChromaSearch.py b/src/backend/langflow/components/vectorstores/ChromaSearch.py new file mode 100644 index 000000000..6108d9a82 --- /dev/null +++ b/src/backend/langflow/components/vectorstores/ChromaSearch.py @@ -0,0 +1,117 @@ +from typing import List, Optional + +import chromadb # type: ignore +from langchain_community.vectorstores.chroma import Chroma +from langflow import CustomComponent +from langflow.field_typing import Document, Embeddings, Text + + +class ChromaComponent(CustomComponent): + """ + A custom component for implementing a Vector Store using Chroma. + """ + + display_name: str = "Chroma" + description: str = "Implementation of Vector Store using Chroma" + documentation = "https://python.langchain.com/docs/integrations/vectorstores/chroma" + beta: bool = True + + def build_config(self): + """ + Builds the configuration for the component. + + Returns: + - dict: A dictionary containing the configuration options for the component. + """ + return { + "inputs": {"display_name": "Input"}, + "search_type": {"display_name": "Search Type", "options": ["Similarity", "MMR"]}, + "collection_name": {"display_name": "Collection Name", "value": "langflow"}, + "persist": {"display_name": "Persist"}, + "persist_directory": {"display_name": "Persist Directory"}, + "code": {"show": False, "display_name": "Code"}, + "documents": {"display_name": "Documents", "is_list": True}, + "embedding": {"display_name": "Embedding"}, + "chroma_server_cors_allow_origins": { + "display_name": "Server CORS Allow Origins", + "advanced": True, + }, + "chroma_server_host": {"display_name": "Server Host", "advanced": True}, + "chroma_server_port": {"display_name": "Server Port", "advanced": True}, + "chroma_server_grpc_port": { + "display_name": "Server gRPC Port", + "advanced": True, + }, + "chroma_server_ssl_enabled": { + "display_name": "Server SSL Enabled", + "advanced": True, + }, + } + + def build( + self, + inputs: Text, + search_type: str, + collection_name: str, + persist: bool, + embedding: Embeddings, + chroma_server_ssl_enabled: bool, + persist_directory: Optional[str] = None, + documents: Optional[List[Document]] = None, + chroma_server_cors_allow_origins: Optional[str] = None, + chroma_server_host: Optional[str] = None, + chroma_server_port: Optional[int] = None, + chroma_server_grpc_port: Optional[int] = None, + ) -> List[Document]: + """ + Builds the Vector Store or BaseRetriever object. + + Args: + - collection_name (str): The name of the collection. + - persist_directory (Optional[str]): The directory to persist the Vector Store to. + - chroma_server_ssl_enabled (bool): Whether to enable SSL for the Chroma server. + - persist (bool): Whether to persist the Vector Store or not. + - embedding (Optional[Embeddings]): The embeddings to use for the Vector Store. + - documents (Optional[Document]): The documents to use for the Vector Store. + - chroma_server_cors_allow_origins (Optional[str]): The CORS allow origins for the Chroma server. + - chroma_server_host (Optional[str]): The host for the Chroma server. + - chroma_server_port (Optional[int]): The port for the Chroma server. + - chroma_server_grpc_port (Optional[int]): The gRPC port for the Chroma server. + + Returns: + - Union[VectorStore, BaseRetriever]: The Vector Store or BaseRetriever object. + """ + + # Chroma settings + chroma_settings = None + + if chroma_server_host is not None: + chroma_settings = chromadb.config.Settings( + chroma_server_cors_allow_origins=chroma_server_cors_allow_origins or None, + chroma_server_host=chroma_server_host, + chroma_server_port=chroma_server_port or None, + chroma_server_grpc_port=chroma_server_grpc_port or None, + chroma_server_ssl_enabled=chroma_server_ssl_enabled, + ) + + # If documents, then we need to create a Chroma instance using .from_documents + if documents is not None and embedding is not None: + if len(documents) == 0: + raise ValueError("If documents are provided, there must be at least one document.") + chroma = Chroma.from_documents( + documents=documents, # type: ignore + persist_directory=persist_directory if persist else None, + collection_name=collection_name, + embedding=embedding, + client_settings=chroma_settings, + ) + else: + chroma = Chroma(persist_directory=persist_directory, client_settings=chroma_settings) + + # Validate the inputs + docs = [] + if inputs and isinstance(inputs, str): + docs = chroma.search(query=inputs, search_type=search_type.lower()) + else: + raise ValueError("Invalid inputs provided.") + return docs