feat: Google Drive Search Component (#3319)

* feat: Google Drive Search Component

feat: Google Drive Search Component
Ability to search Google Drive and get back the relevant Doc id or Doc urls

* Updated Google Drive Search.py
This commit is contained in:
Edwin Jose 2024-08-14 10:11:58 -04:00 committed by GitHub
commit ac4442d221
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -0,0 +1,157 @@
import json
from typing import List
from google.oauth2.credentials import Credentials
from googleapiclient.discovery import build
from langflow.custom import Component
from langflow.inputs import MessageTextInput, DropdownInput
from langflow.io import SecretStrInput
from langflow.template import Output
from langflow.schema import Data
class GoogleDriveSearchComponent(Component):
display_name = "Google Drive Search"
description = "Searches Google Drive files using provided credentials and query parameters."
icon = "Google"
inputs = [
SecretStrInput(
name="token_string",
display_name="Token String",
info="JSON string containing OAuth 2.0 access token information for service account access",
required=True,
),
DropdownInput(
name="query_item",
display_name="Query Item",
options=[
"name",
"fullText",
"mimeType",
"modifiedTime",
"viewedByMeTime",
"trashed",
"starred",
"parents",
"owners",
"writers",
"readers",
"sharedWithMe",
"createdTime",
"properties",
"appProperties",
"visibility",
"shortcutDetails.targetId",
],
info="The field to query.",
required=True,
),
DropdownInput(
name="valid_operator",
display_name="Valid Operator",
options=["contains", "=", "!=", "<=", "<", ">", ">=", "in", "has"],
info="Operator to use in the query.",
required=True,
),
MessageTextInput(
name="search_term",
display_name="Search Term",
info="The value to search for in the specified query item.",
required=True,
),
MessageTextInput(
name="query_string",
display_name="Query String",
info="The query string used for searching. You can edit this manually.",
value="", # This will be updated with the generated query string
),
]
outputs = [
Output(display_name="Document URLs", name="doc_urls", method="search_doc_urls"),
Output(display_name="Document IDs", name="doc_ids", method="search_doc_ids"),
Output(display_name="Document Titles", name="doc_titles", method="search_doc_titles"),
Output(display_name="Data", name="Data", method="search_data"),
]
def generate_query_string(self) -> str:
query_item = self.query_item
valid_operator = self.valid_operator
search_term = self.search_term
# Construct the query string
query = f"{query_item} {valid_operator} '{search_term}'"
# Update the editable query string input with the generated query
self.query_string = query
return query
def on_inputs_changed(self):
# Automatically regenerate the query string when inputs change
self.generate_query_string()
def generate_file_url(self, file_id: str, mime_type: str) -> str:
"""
Generates the appropriate Google Drive URL for a file based on its MIME type.
"""
if mime_type == "application/vnd.google-apps.document":
return f"https://docs.google.com/document/d/{file_id}/edit"
elif mime_type == "application/vnd.google-apps.spreadsheet":
return f"https://docs.google.com/spreadsheets/d/{file_id}/edit"
elif mime_type == "application/vnd.google-apps.presentation":
return f"https://docs.google.com/presentation/d/{file_id}/edit"
elif mime_type == "application/vnd.google-apps.drawing":
return f"https://docs.google.com/drawings/d/{file_id}/edit"
elif mime_type == "application/pdf":
return f"https://drive.google.com/file/d/{file_id}/view?usp=drivesdk"
else:
return f"https://drive.google.com/file/d/{file_id}/view?usp=drivesdk"
def search_files(self) -> dict:
# Load the token information from the JSON string
token_info = json.loads(self.token_string)
creds = Credentials.from_authorized_user_info(token_info)
# Use the query string from the input (which might have been edited by the user)
query = self.query_string if self.query_string else self.generate_query_string()
# Initialize the Google Drive API service
service = build("drive", "v3", credentials=creds)
# Perform the search
results = service.files().list(q=query, pageSize=5, fields="nextPageToken, files(id, name, mimeType)").execute()
items = results.get("files", [])
doc_urls = []
doc_ids = []
doc_titles_urls = []
doc_titles = []
if items:
for item in items:
# Directly use the file ID, title, and MIME type to generate the URL
file_id = item["id"]
file_title = item["name"]
mime_type = item["mimeType"]
file_url = self.generate_file_url(file_id, mime_type)
# Store the URL, ID, and title+URL in their respective lists
doc_urls.append(file_url)
doc_ids.append(file_id)
doc_titles.append(file_title)
doc_titles_urls.append({"title": file_title, "url": file_url})
return {"doc_urls": doc_urls, "doc_ids": doc_ids, "doc_titles_urls": doc_titles_urls, "doc_titles": doc_titles}
def search_doc_ids(self) -> List[str]:
return self.search_files()["doc_ids"]
def search_doc_urls(self) -> List[str]:
return self.search_files()["doc_urls"]
def search_doc_titles(self) -> List[str]:
return self.search_files()["doc_titles"]
def search_data(self) -> Data:
return Data(data={"text": self.search_files()["doc_titles_urls"]})