From 327af1be86e29ac6322c55724cbec24c32d63181 Mon Sep 17 00:00:00 2001 From: Atharva J Date: Mon, 6 Nov 2023 17:07:20 +0530 Subject: [PATCH 1/4] vectara components --- .../retrievers/VectaraSelfQueryRetriever.py | 73 +++++++++++++++++++ .../components/vectorstores/Vectara.py | 65 +++++++++++++---- 2 files changed, 125 insertions(+), 13 deletions(-) create mode 100644 src/backend/langflow/components/retrievers/VectaraSelfQueryRetriever.py diff --git a/src/backend/langflow/components/retrievers/VectaraSelfQueryRetriever.py b/src/backend/langflow/components/retrievers/VectaraSelfQueryRetriever.py new file mode 100644 index 000000000..0055e6ff2 --- /dev/null +++ b/src/backend/langflow/components/retrievers/VectaraSelfQueryRetriever.py @@ -0,0 +1,73 @@ +from typing import Optional, Union, List +from langflow import CustomComponent +import json +import lark +from langchain.vectorstores import Vectara +from langchain.schema import Document +# from langchain.vectorstores.base import VectorStore +from langchain.schema import BaseRetriever +from langchain.embeddings.base import Embeddings +from langchain.schema.vectorstore import VectorStore +from langchain.base_language import BaseLanguageModel +from langchain.retrievers.self_query.base import SelfQueryRetriever +from langchain.chains.query_constructor.base import AttributeInfo +from langchain.embeddings import FakeEmbeddings + + +class VectaraComponent(CustomComponent): + display_name: str = "Vectara Self Query Retriever for Vectara Vector Store" + description: str = "Implementation of Vectara Self Query Retriever" + documentation = ( + "https://python.langchain.com/docs/integrations/vectorstores/vectara" + ) + beta = True + field_config = { + "code": {"show": False}, + "vectorstore": { + "display_name": "Vectara Vector Store", + "info": "Input Vectara Vectore Store" + }, + "llm": { + "display_name": "LLM", + "info": "For self query retriever" + }, + 'document_content_description':{ + "display_name": "Document Content Description", + "info": "For self query retriever", + }, + "metadata_field_info": { + "display_name": "Metadata Field Info", + "info": "Check json format in documentation for self query retriever", + }, + } + + def build( + self, + vectorstore: VectorStore = None, + document_content_description: str = None, + llm: BaseLanguageModel = None, + metadata_field_info: List[str] = None, + ) -> BaseRetriever: + + metadata_field_obj = [] + + for meta in metadata_field_info: + meta_obj = json.loads(meta) + if 'name' not in meta_obj or 'description' not in meta_obj or 'type' not in meta_obj : + raise Exception('Incorrect metadata field info format.') + attribute_info = AttributeInfo( + name = meta_obj['name'], + description = meta_obj['description'], + type = meta_obj['type'], + ) + metadata_field_obj.append(attribute_info) + + return SelfQueryRetriever.from_llm( + llm, + vectorstore, + document_content_description, + metadata_field_obj, + verbose=True + ) + + \ No newline at end of file diff --git a/src/backend/langflow/components/vectorstores/Vectara.py b/src/backend/langflow/components/vectorstores/Vectara.py index 6f7fcc0bb..1a40e94f1 100644 --- a/src/backend/langflow/components/vectorstores/Vectara.py +++ b/src/backend/langflow/components/vectorstores/Vectara.py @@ -1,10 +1,18 @@ -from typing import Optional, Union +from typing import Optional, Union, List from langflow import CustomComponent - +import tempfile +import urllib.request +import urllib from langchain.vectorstores import Vectara from langchain.schema import Document from langchain.vectorstores.base import VectorStore from langchain.schema import BaseRetriever +from langchain.embeddings.base import Embeddings +from langchain.schema.vectorstore import VectorStore +from langchain.base_language import BaseLanguageModel +from langchain.retrievers.self_query.base import SelfQueryRetriever +from langchain.chains.query_constructor.base import AttributeInfo +from langchain.embeddings import FakeEmbeddings class VectaraComponent(CustomComponent): @@ -14,13 +22,29 @@ class VectaraComponent(CustomComponent): "https://python.langchain.com/docs/integrations/vectorstores/vectara" ) beta = True - # api key should be password = True field_config = { - "vectara_customer_id": {"display_name": "Vectara Customer ID"}, - "vectara_corpus_id": {"display_name": "Vectara Corpus ID"}, - "vectara_api_key": {"display_name": "Vectara API Key", "password": True}, + "vectara_customer_id": { + "display_name": "Vectara Customer ID", + "required": True, + }, + "vectara_corpus_id": { + "display_name": "Vectara Corpus ID", + "required": True, + }, + "vectara_api_key": { + "display_name": "Vectara API Key", + "password": True, + "required": True, + }, "code": {"show": False}, - "documents": {"display_name": "Documents"}, + "documents": { + "display_name": "Documents", + "info": "Pass in either for Self Query Retriever or for making a Vectara Object" + }, + 'files_url':{ + "display_name": "Files Url", + "info": "Make vectara object using url of files(documents not needed)", + }, } def build( @@ -28,21 +52,36 @@ class VectaraComponent(CustomComponent): vectara_customer_id: str, vectara_corpus_id: str, vectara_api_key: str, + files_url: Optional[List[str]] = None, documents: Optional[Document] = None, ) -> Union[VectorStore, BaseRetriever]: - # If documents, then we need to create a Vectara instance using .from_documents - if documents is not None: + + if documents is not None : return Vectara.from_documents( - documents=documents, # type: ignore + documents=documents, + embedding=FakeEmbeddings(size=768), + vectara_customer_id=vectara_customer_id, + vectara_corpus_id=vectara_corpus_id, + vectara_api_key=vectara_api_key, + ) + + if files_url is not None : + files_list = [] + for url in files_url: + name = tempfile.NamedTemporaryFile().name + urllib.request.urlretrieve(url, name) + files_list.append(name) + + return Vectara.from_files( + files=files_list, + embedding=FakeEmbeddings(size=768), vectara_customer_id=vectara_customer_id, vectara_corpus_id=vectara_corpus_id, vectara_api_key=vectara_api_key, - source="langflow", ) return Vectara( vectara_customer_id=vectara_customer_id, vectara_corpus_id=vectara_corpus_id, vectara_api_key=vectara_api_key, - source="langflow", - ) + ) \ No newline at end of file From 6e0afd87c30bcafe828958cbbda2a47ea86d31e4 Mon Sep 17 00:00:00 2001 From: Atharva J Date: Sun, 12 Nov 2023 01:18:41 +0530 Subject: [PATCH 2/4] minor - removal of extra imports --- .../retrievers/VectaraSelfQueryRetriever.py | 13 ++++--------- .../langflow/components/vectorstores/Vectara.py | 4 ---- 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/src/backend/langflow/components/retrievers/VectaraSelfQueryRetriever.py b/src/backend/langflow/components/retrievers/VectaraSelfQueryRetriever.py index 0055e6ff2..34c9707f3 100644 --- a/src/backend/langflow/components/retrievers/VectaraSelfQueryRetriever.py +++ b/src/backend/langflow/components/retrievers/VectaraSelfQueryRetriever.py @@ -1,17 +1,11 @@ -from typing import Optional, Union, List +from typing import Optional, List from langflow import CustomComponent import json -import lark -from langchain.vectorstores import Vectara -from langchain.schema import Document -# from langchain.vectorstores.base import VectorStore from langchain.schema import BaseRetriever -from langchain.embeddings.base import Embeddings from langchain.schema.vectorstore import VectorStore from langchain.base_language import BaseLanguageModel from langchain.retrievers.self_query.base import SelfQueryRetriever from langchain.chains.query_constructor.base import AttributeInfo -from langchain.embeddings import FakeEmbeddings class VectaraComponent(CustomComponent): @@ -31,13 +25,14 @@ class VectaraComponent(CustomComponent): "display_name": "LLM", "info": "For self query retriever" }, - 'document_content_description':{ + "document_content_description":{ "display_name": "Document Content Description", "info": "For self query retriever", }, "metadata_field_info": { "display_name": "Metadata Field Info", - "info": "Check json format in documentation for self query retriever", + "info": "Check dictionary format in documentation for self query retriever", + "info": "Each metadata field is a json string containing additional search metadata. Example input: {\"name\":\"speech\",\"description\":\"what name of the speech\",\"type\":\"string or list[string]\"}. The keys should remain constant", }, } diff --git a/src/backend/langflow/components/vectorstores/Vectara.py b/src/backend/langflow/components/vectorstores/Vectara.py index 1a40e94f1..b8b2f1021 100644 --- a/src/backend/langflow/components/vectorstores/Vectara.py +++ b/src/backend/langflow/components/vectorstores/Vectara.py @@ -7,11 +7,7 @@ from langchain.vectorstores import Vectara from langchain.schema import Document from langchain.vectorstores.base import VectorStore from langchain.schema import BaseRetriever -from langchain.embeddings.base import Embeddings from langchain.schema.vectorstore import VectorStore -from langchain.base_language import BaseLanguageModel -from langchain.retrievers.self_query.base import SelfQueryRetriever -from langchain.chains.query_constructor.base import AttributeInfo from langchain.embeddings import FakeEmbeddings From 07815de4e5760e1bfa350f56514601bf3e3e119e Mon Sep 17 00:00:00 2001 From: Atharva J Date: Mon, 13 Nov 2023 19:51:36 +0530 Subject: [PATCH 3/4] minor commenting changes --- .../langflow/components/retrievers/VectaraSelfQueryRetriever.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/langflow/components/retrievers/VectaraSelfQueryRetriever.py b/src/backend/langflow/components/retrievers/VectaraSelfQueryRetriever.py index 34c9707f3..dec8efdc2 100644 --- a/src/backend/langflow/components/retrievers/VectaraSelfQueryRetriever.py +++ b/src/backend/langflow/components/retrievers/VectaraSelfQueryRetriever.py @@ -32,7 +32,7 @@ class VectaraComponent(CustomComponent): "metadata_field_info": { "display_name": "Metadata Field Info", "info": "Check dictionary format in documentation for self query retriever", - "info": "Each metadata field is a json string containing additional search metadata. Example input: {\"name\":\"speech\",\"description\":\"what name of the speech\",\"type\":\"string or list[string]\"}. The keys should remain constant", + "info": "Each metadata field is a string in the form of json containing additional search metadata.\nExample input: {\"name\":\"speech\",\"description\":\"what name of the speech\",\"type\":\"string or list[string]\"}.\nThe keys should remain constant", }, } From 68d6fae606967a7e7ac46ac239dd803d8fde891e Mon Sep 17 00:00:00 2001 From: Atharva J Date: Mon, 13 Nov 2023 19:58:43 +0530 Subject: [PATCH 4/4] minor formatting --- .../components/vectorstores/Vectara.py | 31 +++++++++---------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/src/backend/langflow/components/vectorstores/Vectara.py b/src/backend/langflow/components/vectorstores/Vectara.py index b8b2f1021..71e952fa6 100644 --- a/src/backend/langflow/components/vectorstores/Vectara.py +++ b/src/backend/langflow/components/vectorstores/Vectara.py @@ -22,25 +22,25 @@ class VectaraComponent(CustomComponent): "vectara_customer_id": { "display_name": "Vectara Customer ID", "required": True, - }, + }, "vectara_corpus_id": { - "display_name": "Vectara Corpus ID", + "display_name": "Vectara Corpus ID", "required": True, - }, + }, "vectara_api_key": { "display_name": "Vectara API Key", "password": True, "required": True, - }, + }, "code": {"show": False}, "documents": { - "display_name": "Documents", - "info": "Pass in either for Self Query Retriever or for making a Vectara Object" - }, - 'files_url':{ - "display_name": "Files Url", + "display_name": "Documents", + "info": "Pass in either for Self Query Retriever or for making a Vectara Object", + }, + "files_url": { + "display_name": "Files Url", "info": "Make vectara object using url of files(documents not needed)", - }, + }, } def build( @@ -48,11 +48,10 @@ class VectaraComponent(CustomComponent): vectara_customer_id: str, vectara_corpus_id: str, vectara_api_key: str, - files_url: Optional[List[str]] = None, + files_url: Optional[List[str]] = None, documents: Optional[Document] = None, ) -> Union[VectorStore, BaseRetriever]: - - if documents is not None : + if documents is not None: return Vectara.from_documents( documents=documents, embedding=FakeEmbeddings(size=768), @@ -60,8 +59,8 @@ class VectaraComponent(CustomComponent): vectara_corpus_id=vectara_corpus_id, vectara_api_key=vectara_api_key, ) - - if files_url is not None : + + if files_url is not None: files_list = [] for url in files_url: name = tempfile.NamedTemporaryFile().name @@ -80,4 +79,4 @@ class VectaraComponent(CustomComponent): vectara_customer_id=vectara_customer_id, vectara_corpus_id=vectara_corpus_id, vectara_api_key=vectara_api_key, - ) \ No newline at end of file + )