Merge branch 'dev' of https://github.com/JAtharva22/langflowdev into dev

2023-12-18 04:44:02 +05:30 · 2023-12-18 04:44:02 +05:30 · dc656ad402
commit dc656ad402
parent 1c612dc37f 68d6fae606
2 changed files with 121 additions and 9 deletions
--- a/src/backend/langflow/components/retrievers/VectaraSelfQueryRetriever.py
+++ b/src/backend/langflow/components/retrievers/VectaraSelfQueryRetriever.py
@ -0,0 +1,68 @@
+from typing import Optional, List
+from langflow import CustomComponent
+import json
+from langchain.schema import BaseRetriever
+from langchain.schema.vectorstore import VectorStore
+from langchain.base_language import BaseLanguageModel
+from langchain.retrievers.self_query.base import SelfQueryRetriever
+from langchain.chains.query_constructor.base import AttributeInfo
+
+
+class VectaraComponent(CustomComponent):
+    display_name: str = "Vectara Self Query Retriever for Vectara Vector Store"
+    description: str = "Implementation of Vectara Self Query Retriever"
+    documentation = (
+        "https://python.langchain.com/docs/integrations/vectorstores/vectara"
+    )
+    beta = True
+    field_config = {
+        "code": {"show": False},
+        "vectorstore": {
+            "display_name": "Vectara Vector Store", 
+            "info": "Input Vectara Vectore Store"
+            },
+        "llm": {
+            "display_name": "LLM", 
+            "info": "For self query retriever"
+            },
+        "document_content_description":{
+            "display_name": "Document Content Description", 
+            "info": "For self query retriever",
+            },
+        "metadata_field_info": {
+            "display_name": "Metadata Field Info", 
+            "info": "Check dictionary format in documentation for self query retriever",
+            "info": "Each metadata field is a string in the form of json containing additional search metadata.\nExample input: {\"name\":\"speech\",\"description\":\"what name of the speech\",\"type\":\"string or list[string]\"}.\nThe keys should remain constant",
+            },
+    }
+
+    def build(
+        self,
+        vectorstore: VectorStore = None,
+        document_content_description: str = None,
+        llm: BaseLanguageModel = None,
+        metadata_field_info: List[str] = None,
+    ) -> BaseRetriever:
+        
+        metadata_field_obj = []
+
+        for meta in metadata_field_info:
+            meta_obj = json.loads(meta)
+            if 'name' not in meta_obj or 'description' not in meta_obj or 'type' not in meta_obj :
+                raise Exception('Incorrect metadata field info format.')
+            attribute_info = AttributeInfo(
+                name = meta_obj['name'],
+                description = meta_obj['description'],
+                type = meta_obj['type'],
+            )
+            metadata_field_obj.append(attribute_info)
+
+        return SelfQueryRetriever.from_llm(
+            llm,
+            vectorstore, 
+            document_content_description, 
+            metadata_field_obj, 
+            verbose=True
+        )
+    
+ 
--- a/src/backend/langflow/components/vectorstores/Vectara.py
+++ b/src/backend/langflow/components/vectorstores/Vectara.py
@ -1,10 +1,24 @@
+<<<<<<< HEAD
 from typing import Optional, Union

 from langchain.schema import BaseRetriever, Document
+=======
+from typing import Optional, Union, List
+from langflow import CustomComponent
+import tempfile
+import urllib.request
+import urllib
+>>>>>>> 68d6fae606967a7e7ac46ac239dd803d8fde891e
 from langchain.vectorstores import Vectara
 from langchain.vectorstores.base import VectorStore
+<<<<<<< HEAD

 from langflow import CustomComponent
+=======
+from langchain.schema import BaseRetriever
+from langchain.schema.vectorstore import VectorStore
+from langchain.embeddings import FakeEmbeddings
+>>>>>>> 68d6fae606967a7e7ac46ac239dd803d8fde891e


 class VectaraComponent(CustomComponent):
@ -12,13 +26,29 @@ class VectaraComponent(CustomComponent):
    description: str = "Implementation of Vector Store using Vectara"
    documentation = "https://python.langchain.com/docs/integrations/vectorstores/vectara"
    beta = True
-    # api key should be password = True
    field_config = {
-        "vectara_customer_id": {"display_name": "Vectara Customer ID"},
-        "vectara_corpus_id": {"display_name": "Vectara Corpus ID"},
-        "vectara_api_key": {"display_name": "Vectara API Key", "password": True},
+        "vectara_customer_id": {
+            "display_name": "Vectara Customer ID",
+            "required": True,
+        },
+        "vectara_corpus_id": {
+            "display_name": "Vectara Corpus ID",
+            "required": True,
+        },
+        "vectara_api_key": {
+            "display_name": "Vectara API Key",
+            "password": True,
+            "required": True,
+        },
        "code": {"show": False},
-        "documents": {"display_name": "Documents"},
+        "documents": {
+            "display_name": "Documents",
+            "info": "Pass in either for Self Query Retriever or for making a Vectara Object",
+        },
+        "files_url": {
+            "display_name": "Files Url",
+            "info": "Make vectara object using url of files(documents not needed)",
+        },
    }

    def build(
@ -26,21 +56,35 @@ class VectaraComponent(CustomComponent):
        vectara_customer_id: str,
        vectara_corpus_id: str,
        vectara_api_key: str,
+        files_url: Optional[List[str]] = None,
        documents: Optional[Document] = None,
    ) -> Union[VectorStore, BaseRetriever]:
-        # If documents, then we need to create a Vectara instance using .from_documents
        if documents is not None:
            return Vectara.from_documents(
-                documents=documents,  # type: ignore
+                documents=documents,
+                embedding=FakeEmbeddings(size=768),
+                vectara_customer_id=vectara_customer_id,
+                vectara_corpus_id=vectara_corpus_id,
+                vectara_api_key=vectara_api_key,
+            )
+
+        if files_url is not None:
+            files_list = []
+            for url in files_url:
+                name = tempfile.NamedTemporaryFile().name
+                urllib.request.urlretrieve(url, name)
+                files_list.append(name)
+
+            return Vectara.from_files(
+                files=files_list,
+                embedding=FakeEmbeddings(size=768),
                vectara_customer_id=vectara_customer_id,
                vectara_corpus_id=vectara_corpus_id,
                vectara_api_key=vectara_api_key,
-                source="langflow",
            )

        return Vectara(
            vectara_customer_id=vectara_customer_id,
            vectara_corpus_id=vectara_corpus_id,
            vectara_api_key=vectara_api_key,
-            source="langflow",
        )