From ac4442d221d68fb434e4c2059c7ef9156a3c72a7 Mon Sep 17 00:00:00 2001 From: Edwin Jose Date: Wed, 14 Aug 2024 10:11:58 -0400 Subject: [PATCH] feat: Google Drive Search Component (#3319) * feat: Google Drive Search Component feat: Google Drive Search Component Ability to search Google Drive and get back the relevant Doc id or Doc urls * Updated Google Drive Search.py --- .../components/data/GoogleDriveSearch.py | 157 ++++++++++++++++++ 1 file changed, 157 insertions(+) create mode 100644 src/backend/base/langflow/components/data/GoogleDriveSearch.py diff --git a/src/backend/base/langflow/components/data/GoogleDriveSearch.py b/src/backend/base/langflow/components/data/GoogleDriveSearch.py new file mode 100644 index 000000000..05353f256 --- /dev/null +++ b/src/backend/base/langflow/components/data/GoogleDriveSearch.py @@ -0,0 +1,157 @@ +import json +from typing import List +from google.oauth2.credentials import Credentials +from googleapiclient.discovery import build +from langflow.custom import Component +from langflow.inputs import MessageTextInput, DropdownInput +from langflow.io import SecretStrInput +from langflow.template import Output +from langflow.schema import Data + + +class GoogleDriveSearchComponent(Component): + display_name = "Google Drive Search" + description = "Searches Google Drive files using provided credentials and query parameters." + icon = "Google" + + inputs = [ + SecretStrInput( + name="token_string", + display_name="Token String", + info="JSON string containing OAuth 2.0 access token information for service account access", + required=True, + ), + DropdownInput( + name="query_item", + display_name="Query Item", + options=[ + "name", + "fullText", + "mimeType", + "modifiedTime", + "viewedByMeTime", + "trashed", + "starred", + "parents", + "owners", + "writers", + "readers", + "sharedWithMe", + "createdTime", + "properties", + "appProperties", + "visibility", + "shortcutDetails.targetId", + ], + info="The field to query.", + required=True, + ), + DropdownInput( + name="valid_operator", + display_name="Valid Operator", + options=["contains", "=", "!=", "<=", "<", ">", ">=", "in", "has"], + info="Operator to use in the query.", + required=True, + ), + MessageTextInput( + name="search_term", + display_name="Search Term", + info="The value to search for in the specified query item.", + required=True, + ), + MessageTextInput( + name="query_string", + display_name="Query String", + info="The query string used for searching. You can edit this manually.", + value="", # This will be updated with the generated query string + ), + ] + + outputs = [ + Output(display_name="Document URLs", name="doc_urls", method="search_doc_urls"), + Output(display_name="Document IDs", name="doc_ids", method="search_doc_ids"), + Output(display_name="Document Titles", name="doc_titles", method="search_doc_titles"), + Output(display_name="Data", name="Data", method="search_data"), + ] + + def generate_query_string(self) -> str: + query_item = self.query_item + valid_operator = self.valid_operator + search_term = self.search_term + + # Construct the query string + query = f"{query_item} {valid_operator} '{search_term}'" + + # Update the editable query string input with the generated query + self.query_string = query + + return query + + def on_inputs_changed(self): + # Automatically regenerate the query string when inputs change + self.generate_query_string() + + def generate_file_url(self, file_id: str, mime_type: str) -> str: + """ + Generates the appropriate Google Drive URL for a file based on its MIME type. + """ + if mime_type == "application/vnd.google-apps.document": + return f"https://docs.google.com/document/d/{file_id}/edit" + elif mime_type == "application/vnd.google-apps.spreadsheet": + return f"https://docs.google.com/spreadsheets/d/{file_id}/edit" + elif mime_type == "application/vnd.google-apps.presentation": + return f"https://docs.google.com/presentation/d/{file_id}/edit" + elif mime_type == "application/vnd.google-apps.drawing": + return f"https://docs.google.com/drawings/d/{file_id}/edit" + elif mime_type == "application/pdf": + return f"https://drive.google.com/file/d/{file_id}/view?usp=drivesdk" + else: + return f"https://drive.google.com/file/d/{file_id}/view?usp=drivesdk" + + def search_files(self) -> dict: + # Load the token information from the JSON string + token_info = json.loads(self.token_string) + creds = Credentials.from_authorized_user_info(token_info) + + # Use the query string from the input (which might have been edited by the user) + query = self.query_string if self.query_string else self.generate_query_string() + + # Initialize the Google Drive API service + service = build("drive", "v3", credentials=creds) + + # Perform the search + results = service.files().list(q=query, pageSize=5, fields="nextPageToken, files(id, name, mimeType)").execute() + items = results.get("files", []) + + doc_urls = [] + doc_ids = [] + doc_titles_urls = [] + doc_titles = [] + + if items: + for item in items: + # Directly use the file ID, title, and MIME type to generate the URL + file_id = item["id"] + file_title = item["name"] + mime_type = item["mimeType"] + file_url = self.generate_file_url(file_id, mime_type) + + # Store the URL, ID, and title+URL in their respective lists + doc_urls.append(file_url) + doc_ids.append(file_id) + doc_titles.append(file_title) + doc_titles_urls.append({"title": file_title, "url": file_url}) + + return {"doc_urls": doc_urls, "doc_ids": doc_ids, "doc_titles_urls": doc_titles_urls, "doc_titles": doc_titles} + + def search_doc_ids(self) -> List[str]: + return self.search_files()["doc_ids"] + + def search_doc_urls(self) -> List[str]: + return self.search_files()["doc_urls"] + + def search_doc_titles(self) -> List[str]: + return self.search_files()["doc_titles"] + + def search_data(self) -> Data: + return Data(data={"text": self.search_files()["doc_titles_urls"]})