feat: Google Drive Search Component (#3319)

* feat: Google Drive Search Component feat: Google Drive Search Component Ability to search Google Drive and get back the relevant Doc id or Doc urls * Updated Google Drive Search.py
2024-08-14 10:11:58 -04:00 · 2024-08-14 10:11:58 -04:00 · ac4442d221
commit ac4442d221
parent 17337b4748
1 changed files with 157 additions and 0 deletions
--- a/src/backend/base/langflow/components/data/GoogleDriveSearch.py
+++ b/src/backend/base/langflow/components/data/GoogleDriveSearch.py
@ -0,0 +1,157 @@
+import json
+from typing import List
+from google.oauth2.credentials import Credentials
+from googleapiclient.discovery import build
+from langflow.custom import Component
+from langflow.inputs import MessageTextInput, DropdownInput
+from langflow.io import SecretStrInput
+from langflow.template import Output
+from langflow.schema import Data
+
+
+class GoogleDriveSearchComponent(Component):
+    display_name = "Google Drive Search"
+    description = "Searches Google Drive files using provided credentials and query parameters."
+    icon = "Google"
+
+    inputs = [
+        SecretStrInput(
+            name="token_string",
+            display_name="Token String",
+            info="JSON string containing OAuth 2.0 access token information for service account access",
+            required=True,
+        ),
+        DropdownInput(
+            name="query_item",
+            display_name="Query Item",
+            options=[
+                "name",
+                "fullText",
+                "mimeType",
+                "modifiedTime",
+                "viewedByMeTime",
+                "trashed",
+                "starred",
+                "parents",
+                "owners",
+                "writers",
+                "readers",
+                "sharedWithMe",
+                "createdTime",
+                "properties",
+                "appProperties",
+                "visibility",
+                "shortcutDetails.targetId",
+            ],
+            info="The field to query.",
+            required=True,
+        ),
+        DropdownInput(
+            name="valid_operator",
+            display_name="Valid Operator",
+            options=["contains", "=", "!=", "<=", "<", ">", ">=", "in", "has"],
+            info="Operator to use in the query.",
+            required=True,
+        ),
+        MessageTextInput(
+            name="search_term",
+            display_name="Search Term",
+            info="The value to search for in the specified query item.",
+            required=True,
+        ),
+        MessageTextInput(
+            name="query_string",
+            display_name="Query String",
+            info="The query string used for searching. You can edit this manually.",
+            value="",  # This will be updated with the generated query string
+        ),
+    ]
+
+    outputs = [
+        Output(display_name="Document URLs", name="doc_urls", method="search_doc_urls"),
+        Output(display_name="Document IDs", name="doc_ids", method="search_doc_ids"),
+        Output(display_name="Document Titles", name="doc_titles", method="search_doc_titles"),
+        Output(display_name="Data", name="Data", method="search_data"),
+    ]
+
+    def generate_query_string(self) -> str:
+        query_item = self.query_item
+        valid_operator = self.valid_operator
+        search_term = self.search_term
+
+        # Construct the query string
+        query = f"{query_item} {valid_operator} '{search_term}'"
+
+        # Update the editable query string input with the generated query
+        self.query_string = query
+
+        return query
+
+    def on_inputs_changed(self):
+        # Automatically regenerate the query string when inputs change
+        self.generate_query_string()
+
+    def generate_file_url(self, file_id: str, mime_type: str) -> str:
+        """
+        Generates the appropriate Google Drive URL for a file based on its MIME type.
+        """
+        if mime_type == "application/vnd.google-apps.document":
+            return f"https://docs.google.com/document/d/{file_id}/edit"
+        elif mime_type == "application/vnd.google-apps.spreadsheet":
+            return f"https://docs.google.com/spreadsheets/d/{file_id}/edit"
+        elif mime_type == "application/vnd.google-apps.presentation":
+            return f"https://docs.google.com/presentation/d/{file_id}/edit"
+        elif mime_type == "application/vnd.google-apps.drawing":
+            return f"https://docs.google.com/drawings/d/{file_id}/edit"
+        elif mime_type == "application/pdf":
+            return f"https://drive.google.com/file/d/{file_id}/view?usp=drivesdk"
+        else:
+            return f"https://drive.google.com/file/d/{file_id}/view?usp=drivesdk"
+
+    def search_files(self) -> dict:
+        # Load the token information from the JSON string
+        token_info = json.loads(self.token_string)
+        creds = Credentials.from_authorized_user_info(token_info)
+
+        # Use the query string from the input (which might have been edited by the user)
+        query = self.query_string if self.query_string else self.generate_query_string()
+
+        # Initialize the Google Drive API service
+        service = build("drive", "v3", credentials=creds)
+
+        # Perform the search
+        results = service.files().list(q=query, pageSize=5, fields="nextPageToken, files(id, name, mimeType)").execute()
+        items = results.get("files", [])
+
+        doc_urls = []
+        doc_ids = []
+        doc_titles_urls = []
+        doc_titles = []
+
+        if items:
+            for item in items:
+                # Directly use the file ID, title, and MIME type to generate the URL
+                file_id = item["id"]
+                file_title = item["name"]
+                mime_type = item["mimeType"]
+                file_url = self.generate_file_url(file_id, mime_type)
+
+                # Store the URL, ID, and title+URL in their respective lists
+                doc_urls.append(file_url)
+                doc_ids.append(file_id)
+                doc_titles.append(file_title)
+                doc_titles_urls.append({"title": file_title, "url": file_url})
+
+        return {"doc_urls": doc_urls, "doc_ids": doc_ids, "doc_titles_urls": doc_titles_urls, "doc_titles": doc_titles}
+
+    def search_doc_ids(self) -> List[str]:
+        return self.search_files()["doc_ids"]
+
+    def search_doc_urls(self) -> List[str]:
+        return self.search_files()["doc_urls"]
+
+    def search_doc_titles(self) -> List[str]:
+        return self.search_files()["doc_titles"]
+
+    def search_data(self) -> Data:
+        return Data(data={"text": self.search_files()["doc_titles_urls"]})