This commit is contained in:
Atharva J 2023-12-18 04:44:02 +05:30
commit dc656ad402
2 changed files with 121 additions and 9 deletions

View file

@ -0,0 +1,68 @@
from typing import Optional, List
from langflow import CustomComponent
import json
from langchain.schema import BaseRetriever
from langchain.schema.vectorstore import VectorStore
from langchain.base_language import BaseLanguageModel
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import AttributeInfo
class VectaraComponent(CustomComponent):
display_name: str = "Vectara Self Query Retriever for Vectara Vector Store"
description: str = "Implementation of Vectara Self Query Retriever"
documentation = (
"https://python.langchain.com/docs/integrations/vectorstores/vectara"
)
beta = True
field_config = {
"code": {"show": False},
"vectorstore": {
"display_name": "Vectara Vector Store",
"info": "Input Vectara Vectore Store"
},
"llm": {
"display_name": "LLM",
"info": "For self query retriever"
},
"document_content_description":{
"display_name": "Document Content Description",
"info": "For self query retriever",
},
"metadata_field_info": {
"display_name": "Metadata Field Info",
"info": "Check dictionary format in documentation for self query retriever",
"info": "Each metadata field is a string in the form of json containing additional search metadata.\nExample input: {\"name\":\"speech\",\"description\":\"what name of the speech\",\"type\":\"string or list[string]\"}.\nThe keys should remain constant",
},
}
def build(
self,
vectorstore: VectorStore = None,
document_content_description: str = None,
llm: BaseLanguageModel = None,
metadata_field_info: List[str] = None,
) -> BaseRetriever:
metadata_field_obj = []
for meta in metadata_field_info:
meta_obj = json.loads(meta)
if 'name' not in meta_obj or 'description' not in meta_obj or 'type' not in meta_obj :
raise Exception('Incorrect metadata field info format.')
attribute_info = AttributeInfo(
name = meta_obj['name'],
description = meta_obj['description'],
type = meta_obj['type'],
)
metadata_field_obj.append(attribute_info)
return SelfQueryRetriever.from_llm(
llm,
vectorstore,
document_content_description,
metadata_field_obj,
verbose=True
)

View file

@ -1,10 +1,24 @@
<<<<<<< HEAD
from typing import Optional, Union
from langchain.schema import BaseRetriever, Document
=======
from typing import Optional, Union, List
from langflow import CustomComponent
import tempfile
import urllib.request
import urllib
>>>>>>> 68d6fae606967a7e7ac46ac239dd803d8fde891e
from langchain.vectorstores import Vectara
from langchain.vectorstores.base import VectorStore
<<<<<<< HEAD
from langflow import CustomComponent
=======
from langchain.schema import BaseRetriever
from langchain.schema.vectorstore import VectorStore
from langchain.embeddings import FakeEmbeddings
>>>>>>> 68d6fae606967a7e7ac46ac239dd803d8fde891e
class VectaraComponent(CustomComponent):
@ -12,13 +26,29 @@ class VectaraComponent(CustomComponent):
description: str = "Implementation of Vector Store using Vectara"
documentation = "https://python.langchain.com/docs/integrations/vectorstores/vectara"
beta = True
# api key should be password = True
field_config = {
"vectara_customer_id": {"display_name": "Vectara Customer ID"},
"vectara_corpus_id": {"display_name": "Vectara Corpus ID"},
"vectara_api_key": {"display_name": "Vectara API Key", "password": True},
"vectara_customer_id": {
"display_name": "Vectara Customer ID",
"required": True,
},
"vectara_corpus_id": {
"display_name": "Vectara Corpus ID",
"required": True,
},
"vectara_api_key": {
"display_name": "Vectara API Key",
"password": True,
"required": True,
},
"code": {"show": False},
"documents": {"display_name": "Documents"},
"documents": {
"display_name": "Documents",
"info": "Pass in either for Self Query Retriever or for making a Vectara Object",
},
"files_url": {
"display_name": "Files Url",
"info": "Make vectara object using url of files(documents not needed)",
},
}
def build(
@ -26,21 +56,35 @@ class VectaraComponent(CustomComponent):
vectara_customer_id: str,
vectara_corpus_id: str,
vectara_api_key: str,
files_url: Optional[List[str]] = None,
documents: Optional[Document] = None,
) -> Union[VectorStore, BaseRetriever]:
# If documents, then we need to create a Vectara instance using .from_documents
if documents is not None:
return Vectara.from_documents(
documents=documents, # type: ignore
documents=documents,
embedding=FakeEmbeddings(size=768),
vectara_customer_id=vectara_customer_id,
vectara_corpus_id=vectara_corpus_id,
vectara_api_key=vectara_api_key,
)
if files_url is not None:
files_list = []
for url in files_url:
name = tempfile.NamedTemporaryFile().name
urllib.request.urlretrieve(url, name)
files_list.append(name)
return Vectara.from_files(
files=files_list,
embedding=FakeEmbeddings(size=768),
vectara_customer_id=vectara_customer_id,
vectara_corpus_id=vectara_corpus_id,
vectara_api_key=vectara_api_key,
source="langflow",
)
return Vectara(
vectara_customer_id=vectara_customer_id,
vectara_corpus_id=vectara_corpus_id,
vectara_api_key=vectara_api_key,
source="langflow",
)