From 327af1be86e29ac6322c55724cbec24c32d63181 Mon Sep 17 00:00:00 2001 From: Atharva J Date: Mon, 6 Nov 2023 17:07:20 +0530 Subject: [PATCH 01/10] vectara components --- .../retrievers/VectaraSelfQueryRetriever.py | 73 +++++++++++++++++++ .../components/vectorstores/Vectara.py | 65 +++++++++++++---- 2 files changed, 125 insertions(+), 13 deletions(-) create mode 100644 src/backend/langflow/components/retrievers/VectaraSelfQueryRetriever.py diff --git a/src/backend/langflow/components/retrievers/VectaraSelfQueryRetriever.py b/src/backend/langflow/components/retrievers/VectaraSelfQueryRetriever.py new file mode 100644 index 000000000..0055e6ff2 --- /dev/null +++ b/src/backend/langflow/components/retrievers/VectaraSelfQueryRetriever.py @@ -0,0 +1,73 @@ +from typing import Optional, Union, List +from langflow import CustomComponent +import json +import lark +from langchain.vectorstores import Vectara +from langchain.schema import Document +# from langchain.vectorstores.base import VectorStore +from langchain.schema import BaseRetriever +from langchain.embeddings.base import Embeddings +from langchain.schema.vectorstore import VectorStore +from langchain.base_language import BaseLanguageModel +from langchain.retrievers.self_query.base import SelfQueryRetriever +from langchain.chains.query_constructor.base import AttributeInfo +from langchain.embeddings import FakeEmbeddings + + +class VectaraComponent(CustomComponent): + display_name: str = "Vectara Self Query Retriever for Vectara Vector Store" + description: str = "Implementation of Vectara Self Query Retriever" + documentation = ( + "https://python.langchain.com/docs/integrations/vectorstores/vectara" + ) + beta = True + field_config = { + "code": {"show": False}, + "vectorstore": { + "display_name": "Vectara Vector Store", + "info": "Input Vectara Vectore Store" + }, + "llm": { + "display_name": "LLM", + "info": "For self query retriever" + }, + 'document_content_description':{ + "display_name": "Document Content Description", + "info": "For self query retriever", + }, + "metadata_field_info": { + "display_name": "Metadata Field Info", + "info": "Check json format in documentation for self query retriever", + }, + } + + def build( + self, + vectorstore: VectorStore = None, + document_content_description: str = None, + llm: BaseLanguageModel = None, + metadata_field_info: List[str] = None, + ) -> BaseRetriever: + + metadata_field_obj = [] + + for meta in metadata_field_info: + meta_obj = json.loads(meta) + if 'name' not in meta_obj or 'description' not in meta_obj or 'type' not in meta_obj : + raise Exception('Incorrect metadata field info format.') + attribute_info = AttributeInfo( + name = meta_obj['name'], + description = meta_obj['description'], + type = meta_obj['type'], + ) + metadata_field_obj.append(attribute_info) + + return SelfQueryRetriever.from_llm( + llm, + vectorstore, + document_content_description, + metadata_field_obj, + verbose=True + ) + + \ No newline at end of file diff --git a/src/backend/langflow/components/vectorstores/Vectara.py b/src/backend/langflow/components/vectorstores/Vectara.py index 6f7fcc0bb..1a40e94f1 100644 --- a/src/backend/langflow/components/vectorstores/Vectara.py +++ b/src/backend/langflow/components/vectorstores/Vectara.py @@ -1,10 +1,18 @@ -from typing import Optional, Union +from typing import Optional, Union, List from langflow import CustomComponent - +import tempfile +import urllib.request +import urllib from langchain.vectorstores import Vectara from langchain.schema import Document from langchain.vectorstores.base import VectorStore from langchain.schema import BaseRetriever +from langchain.embeddings.base import Embeddings +from langchain.schema.vectorstore import VectorStore +from langchain.base_language import BaseLanguageModel +from langchain.retrievers.self_query.base import SelfQueryRetriever +from langchain.chains.query_constructor.base import AttributeInfo +from langchain.embeddings import FakeEmbeddings class VectaraComponent(CustomComponent): @@ -14,13 +22,29 @@ class VectaraComponent(CustomComponent): "https://python.langchain.com/docs/integrations/vectorstores/vectara" ) beta = True - # api key should be password = True field_config = { - "vectara_customer_id": {"display_name": "Vectara Customer ID"}, - "vectara_corpus_id": {"display_name": "Vectara Corpus ID"}, - "vectara_api_key": {"display_name": "Vectara API Key", "password": True}, + "vectara_customer_id": { + "display_name": "Vectara Customer ID", + "required": True, + }, + "vectara_corpus_id": { + "display_name": "Vectara Corpus ID", + "required": True, + }, + "vectara_api_key": { + "display_name": "Vectara API Key", + "password": True, + "required": True, + }, "code": {"show": False}, - "documents": {"display_name": "Documents"}, + "documents": { + "display_name": "Documents", + "info": "Pass in either for Self Query Retriever or for making a Vectara Object" + }, + 'files_url':{ + "display_name": "Files Url", + "info": "Make vectara object using url of files(documents not needed)", + }, } def build( @@ -28,21 +52,36 @@ class VectaraComponent(CustomComponent): vectara_customer_id: str, vectara_corpus_id: str, vectara_api_key: str, + files_url: Optional[List[str]] = None, documents: Optional[Document] = None, ) -> Union[VectorStore, BaseRetriever]: - # If documents, then we need to create a Vectara instance using .from_documents - if documents is not None: + + if documents is not None : return Vectara.from_documents( - documents=documents, # type: ignore + documents=documents, + embedding=FakeEmbeddings(size=768), + vectara_customer_id=vectara_customer_id, + vectara_corpus_id=vectara_corpus_id, + vectara_api_key=vectara_api_key, + ) + + if files_url is not None : + files_list = [] + for url in files_url: + name = tempfile.NamedTemporaryFile().name + urllib.request.urlretrieve(url, name) + files_list.append(name) + + return Vectara.from_files( + files=files_list, + embedding=FakeEmbeddings(size=768), vectara_customer_id=vectara_customer_id, vectara_corpus_id=vectara_corpus_id, vectara_api_key=vectara_api_key, - source="langflow", ) return Vectara( vectara_customer_id=vectara_customer_id, vectara_corpus_id=vectara_corpus_id, vectara_api_key=vectara_api_key, - source="langflow", - ) + ) \ No newline at end of file From 6e0afd87c30bcafe828958cbbda2a47ea86d31e4 Mon Sep 17 00:00:00 2001 From: Atharva J Date: Sun, 12 Nov 2023 01:18:41 +0530 Subject: [PATCH 02/10] minor - removal of extra imports --- .../retrievers/VectaraSelfQueryRetriever.py | 13 ++++--------- .../langflow/components/vectorstores/Vectara.py | 4 ---- 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/src/backend/langflow/components/retrievers/VectaraSelfQueryRetriever.py b/src/backend/langflow/components/retrievers/VectaraSelfQueryRetriever.py index 0055e6ff2..34c9707f3 100644 --- a/src/backend/langflow/components/retrievers/VectaraSelfQueryRetriever.py +++ b/src/backend/langflow/components/retrievers/VectaraSelfQueryRetriever.py @@ -1,17 +1,11 @@ -from typing import Optional, Union, List +from typing import Optional, List from langflow import CustomComponent import json -import lark -from langchain.vectorstores import Vectara -from langchain.schema import Document -# from langchain.vectorstores.base import VectorStore from langchain.schema import BaseRetriever -from langchain.embeddings.base import Embeddings from langchain.schema.vectorstore import VectorStore from langchain.base_language import BaseLanguageModel from langchain.retrievers.self_query.base import SelfQueryRetriever from langchain.chains.query_constructor.base import AttributeInfo -from langchain.embeddings import FakeEmbeddings class VectaraComponent(CustomComponent): @@ -31,13 +25,14 @@ class VectaraComponent(CustomComponent): "display_name": "LLM", "info": "For self query retriever" }, - 'document_content_description':{ + "document_content_description":{ "display_name": "Document Content Description", "info": "For self query retriever", }, "metadata_field_info": { "display_name": "Metadata Field Info", - "info": "Check json format in documentation for self query retriever", + "info": "Check dictionary format in documentation for self query retriever", + "info": "Each metadata field is a json string containing additional search metadata. Example input: {\"name\":\"speech\",\"description\":\"what name of the speech\",\"type\":\"string or list[string]\"}. The keys should remain constant", }, } diff --git a/src/backend/langflow/components/vectorstores/Vectara.py b/src/backend/langflow/components/vectorstores/Vectara.py index 1a40e94f1..b8b2f1021 100644 --- a/src/backend/langflow/components/vectorstores/Vectara.py +++ b/src/backend/langflow/components/vectorstores/Vectara.py @@ -7,11 +7,7 @@ from langchain.vectorstores import Vectara from langchain.schema import Document from langchain.vectorstores.base import VectorStore from langchain.schema import BaseRetriever -from langchain.embeddings.base import Embeddings from langchain.schema.vectorstore import VectorStore -from langchain.base_language import BaseLanguageModel -from langchain.retrievers.self_query.base import SelfQueryRetriever -from langchain.chains.query_constructor.base import AttributeInfo from langchain.embeddings import FakeEmbeddings From 07815de4e5760e1bfa350f56514601bf3e3e119e Mon Sep 17 00:00:00 2001 From: Atharva J Date: Mon, 13 Nov 2023 19:51:36 +0530 Subject: [PATCH 03/10] minor commenting changes --- .../langflow/components/retrievers/VectaraSelfQueryRetriever.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/langflow/components/retrievers/VectaraSelfQueryRetriever.py b/src/backend/langflow/components/retrievers/VectaraSelfQueryRetriever.py index 34c9707f3..dec8efdc2 100644 --- a/src/backend/langflow/components/retrievers/VectaraSelfQueryRetriever.py +++ b/src/backend/langflow/components/retrievers/VectaraSelfQueryRetriever.py @@ -32,7 +32,7 @@ class VectaraComponent(CustomComponent): "metadata_field_info": { "display_name": "Metadata Field Info", "info": "Check dictionary format in documentation for self query retriever", - "info": "Each metadata field is a json string containing additional search metadata. Example input: {\"name\":\"speech\",\"description\":\"what name of the speech\",\"type\":\"string or list[string]\"}. The keys should remain constant", + "info": "Each metadata field is a string in the form of json containing additional search metadata.\nExample input: {\"name\":\"speech\",\"description\":\"what name of the speech\",\"type\":\"string or list[string]\"}.\nThe keys should remain constant", }, } From 68d6fae606967a7e7ac46ac239dd803d8fde891e Mon Sep 17 00:00:00 2001 From: Atharva J Date: Mon, 13 Nov 2023 19:58:43 +0530 Subject: [PATCH 04/10] minor formatting --- .../components/vectorstores/Vectara.py | 31 +++++++++---------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/src/backend/langflow/components/vectorstores/Vectara.py b/src/backend/langflow/components/vectorstores/Vectara.py index b8b2f1021..71e952fa6 100644 --- a/src/backend/langflow/components/vectorstores/Vectara.py +++ b/src/backend/langflow/components/vectorstores/Vectara.py @@ -22,25 +22,25 @@ class VectaraComponent(CustomComponent): "vectara_customer_id": { "display_name": "Vectara Customer ID", "required": True, - }, + }, "vectara_corpus_id": { - "display_name": "Vectara Corpus ID", + "display_name": "Vectara Corpus ID", "required": True, - }, + }, "vectara_api_key": { "display_name": "Vectara API Key", "password": True, "required": True, - }, + }, "code": {"show": False}, "documents": { - "display_name": "Documents", - "info": "Pass in either for Self Query Retriever or for making a Vectara Object" - }, - 'files_url':{ - "display_name": "Files Url", + "display_name": "Documents", + "info": "Pass in either for Self Query Retriever or for making a Vectara Object", + }, + "files_url": { + "display_name": "Files Url", "info": "Make vectara object using url of files(documents not needed)", - }, + }, } def build( @@ -48,11 +48,10 @@ class VectaraComponent(CustomComponent): vectara_customer_id: str, vectara_corpus_id: str, vectara_api_key: str, - files_url: Optional[List[str]] = None, + files_url: Optional[List[str]] = None, documents: Optional[Document] = None, ) -> Union[VectorStore, BaseRetriever]: - - if documents is not None : + if documents is not None: return Vectara.from_documents( documents=documents, embedding=FakeEmbeddings(size=768), @@ -60,8 +59,8 @@ class VectaraComponent(CustomComponent): vectara_corpus_id=vectara_corpus_id, vectara_api_key=vectara_api_key, ) - - if files_url is not None : + + if files_url is not None: files_list = [] for url in files_url: name = tempfile.NamedTemporaryFile().name @@ -80,4 +79,4 @@ class VectaraComponent(CustomComponent): vectara_customer_id=vectara_customer_id, vectara_corpus_id=vectara_corpus_id, vectara_api_key=vectara_api_key, - ) \ No newline at end of file + ) From 390767cbf75745a35d6b423f25bd085f76c6d2fc Mon Sep 17 00:00:00 2001 From: Atharva J Date: Sun, 17 Dec 2023 20:17:46 +0530 Subject: [PATCH 05/10] major --- .../langflow/components/vectorstores/Vectara.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/src/backend/langflow/components/vectorstores/Vectara.py b/src/backend/langflow/components/vectorstores/Vectara.py index 53afd2718..bdcf9aef5 100644 --- a/src/backend/langflow/components/vectorstores/Vectara.py +++ b/src/backend/langflow/components/vectorstores/Vectara.py @@ -1,24 +1,13 @@ -<<<<<<< HEAD -from typing import Optional, Union - -from langchain.schema import BaseRetriever, Document -======= from typing import Optional, Union, List from langflow import CustomComponent import tempfile import urllib.request import urllib ->>>>>>> 68d6fae606967a7e7ac46ac239dd803d8fde891e from langchain.vectorstores import Vectara +from langchain.schema import Document from langchain.vectorstores.base import VectorStore -<<<<<<< HEAD - -from langflow import CustomComponent -======= from langchain.schema import BaseRetriever -from langchain.schema.vectorstore import VectorStore from langchain.embeddings import FakeEmbeddings ->>>>>>> 68d6fae606967a7e7ac46ac239dd803d8fde891e class VectaraComponent(CustomComponent): From 8fcf54bcb8795f274cdf3f4db21f58589ea92722 Mon Sep 17 00:00:00 2001 From: Atharva J Date: Mon, 18 Dec 2023 01:04:37 +0530 Subject: [PATCH 06/10] minor - test check trial --- tests/test_endpoints.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_endpoints.py b/tests/test_endpoints.py index 338224004..f4b65f4f7 100644 --- a/tests/test_endpoints.py +++ b/tests/test_endpoints.py @@ -11,6 +11,7 @@ from langflow.services.database.models.api_key.model import ApiKey from langflow.services.database.utils import session_getter from langflow.services.deps import get_db_service, get_settings_service from langflow.template.frontend_node.chains import TimeTravelGuideChainNode +import os def run_post(client, flow_id, headers, post_data): @@ -49,7 +50,7 @@ PROMPT_REQUEST = { "custom_fields": {}, "output_types": [], "field_formatters": { - "formatters": {"openai_api_key": {}}, + "formatters": {"openai_api_key": os.environ.get("OPENAI_API_KEY")}, "base_formatters": { "kwargs": {}, "optional": {}, From 83e9c089a7913620d4d46543da5c75a03df382e0 Mon Sep 17 00:00:00 2001 From: Atharva J Date: Mon, 18 Dec 2023 01:14:20 +0530 Subject: [PATCH 07/10] yes --- tests/test_endpoints.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test_endpoints.py b/tests/test_endpoints.py index f4b65f4f7..338224004 100644 --- a/tests/test_endpoints.py +++ b/tests/test_endpoints.py @@ -11,7 +11,6 @@ from langflow.services.database.models.api_key.model import ApiKey from langflow.services.database.utils import session_getter from langflow.services.deps import get_db_service, get_settings_service from langflow.template.frontend_node.chains import TimeTravelGuideChainNode -import os def run_post(client, flow_id, headers, post_data): @@ -50,7 +49,7 @@ PROMPT_REQUEST = { "custom_fields": {}, "output_types": [], "field_formatters": { - "formatters": {"openai_api_key": os.environ.get("OPENAI_API_KEY")}, + "formatters": {"openai_api_key": {}}, "base_formatters": { "kwargs": {}, "optional": {}, From df4f158eac9eb3075999dea0666073ed12c5023c Mon Sep 17 00:00:00 2001 From: Atharva J Date: Mon, 18 Dec 2023 01:16:18 +0530 Subject: [PATCH 08/10] test_endpoint.py from main branch --- tests/test_endpoints.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_endpoints.py b/tests/test_endpoints.py index 338224004..2e2a65b64 100644 --- a/tests/test_endpoints.py +++ b/tests/test_endpoints.py @@ -578,4 +578,4 @@ def test_async_task_processing_vector_store(client, added_vector_store, created_ # Validate that the task completed successfully and the result is as expected assert "result" in task_status_json, task_status_json assert "output" in task_status_json["result"], task_status_json["result"] - assert "Langflow" in task_status_json["result"]["output"], task_status_json["result"] + assert "Langflow" in task_status_json["result"]["output"], task_status_json["result"] \ No newline at end of file From a16fe991620f59c878afa963cf891d6619c22466 Mon Sep 17 00:00:00 2001 From: Atharva Jadhav <93152317+JAtharva22@users.noreply.github.com> Date: Mon, 18 Dec 2023 04:52:25 +0530 Subject: [PATCH 09/10] Delete src/backend/langflow/components/retrievers/VectaraSelfQueryRetriever.py --- .../retrievers/VectaraSelfQueryRetriever.py | 68 ------------------- 1 file changed, 68 deletions(-) delete mode 100644 src/backend/langflow/components/retrievers/VectaraSelfQueryRetriever.py diff --git a/src/backend/langflow/components/retrievers/VectaraSelfQueryRetriever.py b/src/backend/langflow/components/retrievers/VectaraSelfQueryRetriever.py deleted file mode 100644 index dec8efdc2..000000000 --- a/src/backend/langflow/components/retrievers/VectaraSelfQueryRetriever.py +++ /dev/null @@ -1,68 +0,0 @@ -from typing import Optional, List -from langflow import CustomComponent -import json -from langchain.schema import BaseRetriever -from langchain.schema.vectorstore import VectorStore -from langchain.base_language import BaseLanguageModel -from langchain.retrievers.self_query.base import SelfQueryRetriever -from langchain.chains.query_constructor.base import AttributeInfo - - -class VectaraComponent(CustomComponent): - display_name: str = "Vectara Self Query Retriever for Vectara Vector Store" - description: str = "Implementation of Vectara Self Query Retriever" - documentation = ( - "https://python.langchain.com/docs/integrations/vectorstores/vectara" - ) - beta = True - field_config = { - "code": {"show": False}, - "vectorstore": { - "display_name": "Vectara Vector Store", - "info": "Input Vectara Vectore Store" - }, - "llm": { - "display_name": "LLM", - "info": "For self query retriever" - }, - "document_content_description":{ - "display_name": "Document Content Description", - "info": "For self query retriever", - }, - "metadata_field_info": { - "display_name": "Metadata Field Info", - "info": "Check dictionary format in documentation for self query retriever", - "info": "Each metadata field is a string in the form of json containing additional search metadata.\nExample input: {\"name\":\"speech\",\"description\":\"what name of the speech\",\"type\":\"string or list[string]\"}.\nThe keys should remain constant", - }, - } - - def build( - self, - vectorstore: VectorStore = None, - document_content_description: str = None, - llm: BaseLanguageModel = None, - metadata_field_info: List[str] = None, - ) -> BaseRetriever: - - metadata_field_obj = [] - - for meta in metadata_field_info: - meta_obj = json.loads(meta) - if 'name' not in meta_obj or 'description' not in meta_obj or 'type' not in meta_obj : - raise Exception('Incorrect metadata field info format.') - attribute_info = AttributeInfo( - name = meta_obj['name'], - description = meta_obj['description'], - type = meta_obj['type'], - ) - metadata_field_obj.append(attribute_info) - - return SelfQueryRetriever.from_llm( - llm, - vectorstore, - document_content_description, - metadata_field_obj, - verbose=True - ) - - \ No newline at end of file From a5c66836094b42c4c302e6f7e4ad8b7def8b41b3 Mon Sep 17 00:00:00 2001 From: Atharva J Date: Tue, 19 Dec 2023 15:09:29 +0530 Subject: [PATCH 10/10] . --- src/backend/langflow/components/vectorstores/Vectara.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/backend/langflow/components/vectorstores/Vectara.py b/src/backend/langflow/components/vectorstores/Vectara.py index bdcf9aef5..e3555e6f1 100644 --- a/src/backend/langflow/components/vectorstores/Vectara.py +++ b/src/backend/langflow/components/vectorstores/Vectara.py @@ -3,6 +3,7 @@ from langflow import CustomComponent import tempfile import urllib.request import urllib + from langchain.vectorstores import Vectara from langchain.schema import Document from langchain.vectorstores.base import VectorStore