vectara components

This commit is contained in:
Atharva J 2023-11-06 17:07:20 +05:30
commit 327af1be86
2 changed files with 125 additions and 13 deletions

View file

@ -0,0 +1,73 @@
from typing import Optional, Union, List
from langflow import CustomComponent
import json
import lark
from langchain.vectorstores import Vectara
from langchain.schema import Document
# from langchain.vectorstores.base import VectorStore
from langchain.schema import BaseRetriever
from langchain.embeddings.base import Embeddings
from langchain.schema.vectorstore import VectorStore
from langchain.base_language import BaseLanguageModel
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.embeddings import FakeEmbeddings
class VectaraComponent(CustomComponent):
display_name: str = "Vectara Self Query Retriever for Vectara Vector Store"
description: str = "Implementation of Vectara Self Query Retriever"
documentation = (
"https://python.langchain.com/docs/integrations/vectorstores/vectara"
)
beta = True
field_config = {
"code": {"show": False},
"vectorstore": {
"display_name": "Vectara Vector Store",
"info": "Input Vectara Vectore Store"
},
"llm": {
"display_name": "LLM",
"info": "For self query retriever"
},
'document_content_description':{
"display_name": "Document Content Description",
"info": "For self query retriever",
},
"metadata_field_info": {
"display_name": "Metadata Field Info",
"info": "Check json format in documentation for self query retriever",
},
}
def build(
self,
vectorstore: VectorStore = None,
document_content_description: str = None,
llm: BaseLanguageModel = None,
metadata_field_info: List[str] = None,
) -> BaseRetriever:
metadata_field_obj = []
for meta in metadata_field_info:
meta_obj = json.loads(meta)
if 'name' not in meta_obj or 'description' not in meta_obj or 'type' not in meta_obj :
raise Exception('Incorrect metadata field info format.')
attribute_info = AttributeInfo(
name = meta_obj['name'],
description = meta_obj['description'],
type = meta_obj['type'],
)
metadata_field_obj.append(attribute_info)
return SelfQueryRetriever.from_llm(
llm,
vectorstore,
document_content_description,
metadata_field_obj,
verbose=True
)

View file

@ -1,10 +1,18 @@
from typing import Optional, Union
from typing import Optional, Union, List
from langflow import CustomComponent
import tempfile
import urllib.request
import urllib
from langchain.vectorstores import Vectara
from langchain.schema import Document
from langchain.vectorstores.base import VectorStore
from langchain.schema import BaseRetriever
from langchain.embeddings.base import Embeddings
from langchain.schema.vectorstore import VectorStore
from langchain.base_language import BaseLanguageModel
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.embeddings import FakeEmbeddings
class VectaraComponent(CustomComponent):
@ -14,13 +22,29 @@ class VectaraComponent(CustomComponent):
"https://python.langchain.com/docs/integrations/vectorstores/vectara"
)
beta = True
# api key should be password = True
field_config = {
"vectara_customer_id": {"display_name": "Vectara Customer ID"},
"vectara_corpus_id": {"display_name": "Vectara Corpus ID"},
"vectara_api_key": {"display_name": "Vectara API Key", "password": True},
"vectara_customer_id": {
"display_name": "Vectara Customer ID",
"required": True,
},
"vectara_corpus_id": {
"display_name": "Vectara Corpus ID",
"required": True,
},
"vectara_api_key": {
"display_name": "Vectara API Key",
"password": True,
"required": True,
},
"code": {"show": False},
"documents": {"display_name": "Documents"},
"documents": {
"display_name": "Documents",
"info": "Pass in either for Self Query Retriever or for making a Vectara Object"
},
'files_url':{
"display_name": "Files Url",
"info": "Make vectara object using url of files(documents not needed)",
},
}
def build(
@ -28,21 +52,36 @@ class VectaraComponent(CustomComponent):
vectara_customer_id: str,
vectara_corpus_id: str,
vectara_api_key: str,
files_url: Optional[List[str]] = None,
documents: Optional[Document] = None,
) -> Union[VectorStore, BaseRetriever]:
# If documents, then we need to create a Vectara instance using .from_documents
if documents is not None:
if documents is not None :
return Vectara.from_documents(
documents=documents, # type: ignore
documents=documents,
embedding=FakeEmbeddings(size=768),
vectara_customer_id=vectara_customer_id,
vectara_corpus_id=vectara_corpus_id,
vectara_api_key=vectara_api_key,
)
if files_url is not None :
files_list = []
for url in files_url:
name = tempfile.NamedTemporaryFile().name
urllib.request.urlretrieve(url, name)
files_list.append(name)
return Vectara.from_files(
files=files_list,
embedding=FakeEmbeddings(size=768),
vectara_customer_id=vectara_customer_id,
vectara_corpus_id=vectara_corpus_id,
vectara_api_key=vectara_api_key,
source="langflow",
)
return Vectara(
vectara_customer_id=vectara_customer_id,
vectara_corpus_id=vectara_corpus_id,
vectara_api_key=vectara_api_key,
source="langflow",
)
)