Vectara Component Update - feature enhancement (#1219)

### Pull Request for Issue #1217 
[feature
request](https://github.com/logspace-ai/langflow/labels/feature%20request)
[python](https://github.com/logspace-ai/langflow/labels/python)

**Description**
This pull request addresses issue #1217 , which concerns the ability to
add files directly to the Vectara vector store using the indexing API's
file upload feature.

**Changes Made**
I have implemented the ability to add files directly to the Vectara
vector store by providing URLs of documents online. The add_files()
method (as well as from_files()) has been added to facilitate this
functionality.

**Files Modified/Added**
Modified: vectara.py
This commit is contained in:
Gabriel Luiz Freitas Almeida 2023-12-24 20:04:38 -03:00 committed by GitHub
commit a27c7221bb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 50 additions and 16 deletions

View file

@ -1,10 +1,14 @@
from typing import Optional, Union
from langchain.schema import BaseRetriever, Document
from langchain.vectorstores import Vectara
from langchain.vectorstores.base import VectorStore
from typing import Optional, Union, List
from langflow import CustomComponent
import tempfile
import urllib.request
import urllib
from langchain.vectorstores import Vectara
from langchain.schema import Document
from langchain.vectorstores.base import VectorStore
from langchain.schema import BaseRetriever
from langchain.embeddings import FakeEmbeddings
class VectaraComponent(CustomComponent):
@ -12,13 +16,29 @@ class VectaraComponent(CustomComponent):
description: str = "Implementation of Vector Store using Vectara"
documentation = "https://python.langchain.com/docs/integrations/vectorstores/vectara"
beta = True
# api key should be password = True
field_config = {
"vectara_customer_id": {"display_name": "Vectara Customer ID"},
"vectara_corpus_id": {"display_name": "Vectara Corpus ID"},
"vectara_api_key": {"display_name": "Vectara API Key", "password": True},
"vectara_customer_id": {
"display_name": "Vectara Customer ID",
"required": True,
},
"vectara_corpus_id": {
"display_name": "Vectara Corpus ID",
"required": True,
},
"vectara_api_key": {
"display_name": "Vectara API Key",
"password": True,
"required": True,
},
"code": {"show": False},
"documents": {"display_name": "Documents"},
"documents": {
"display_name": "Documents",
"info": "Pass in either for Self Query Retriever or for making a Vectara Object",
},
"files_url": {
"display_name": "Files Url",
"info": "Make vectara object using url of files(documents not needed)",
},
}
def build(
@ -26,21 +46,35 @@ class VectaraComponent(CustomComponent):
vectara_customer_id: str,
vectara_corpus_id: str,
vectara_api_key: str,
files_url: Optional[List[str]] = None,
documents: Optional[Document] = None,
) -> Union[VectorStore, BaseRetriever]:
# If documents, then we need to create a Vectara instance using .from_documents
if documents is not None:
return Vectara.from_documents(
documents=documents, # type: ignore
documents=documents,
embedding=FakeEmbeddings(size=768),
vectara_customer_id=vectara_customer_id,
vectara_corpus_id=vectara_corpus_id,
vectara_api_key=vectara_api_key,
)
if files_url is not None:
files_list = []
for url in files_url:
name = tempfile.NamedTemporaryFile().name
urllib.request.urlretrieve(url, name)
files_list.append(name)
return Vectara.from_files(
files=files_list,
embedding=FakeEmbeddings(size=768),
vectara_customer_id=vectara_customer_id,
vectara_corpus_id=vectara_corpus_id,
vectara_api_key=vectara_api_key,
source="langflow",
)
return Vectara(
vectara_customer_id=vectara_customer_id,
vectara_corpus_id=vectara_corpus_id,
vectara_api_key=vectara_api_key,
source="langflow",
)

View file

@ -578,4 +578,4 @@ def test_async_task_processing_vector_store(client, added_vector_store, created_
# Validate that the task completed successfully and the result is as expected
assert "result" in task_status_json, task_status_json
assert "output" in task_status_json["result"], task_status_json["result"]
assert "Langflow" in task_status_json["result"]["output"], task_status_json["result"]
assert "Langflow" in task_status_json["result"]["output"], task_status_json["result"]