feat: New custom component to execute google BigQuery(BQ) SQLs (#7922)
* adding a google bq component and unit tests * [autofix.ci] apply automated fixes * resolved conflicts * Update test_google_bq_sql_executor_component.py * [autofix.ci] apply automated fixes * dataframe output to component * Update google_bq_sql_executor.py * [autofix.ci] apply automated fixes * replacing tests for dataframes * [autofix.ci] apply automated fixes * adding capability to extract SQL statement from a text blob * resolving test errors * [autofix.ci] apply automated fixes * resolving conflicts * resolve conflicts * resolving conflicts * issue with json input test * [autofix.ci] apply automated fixes * fix format issues ruff * Update google_bq_sql_executor.py * Update google_bq_sql_executor.py * Updating unit tests after clean query changes * [autofix.ci] apply automated fixes --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Edwin Jose <edwin.jose@datastax.com>
This commit is contained in:
parent
2efb7a6cad
commit
1b4b53d60d
4 changed files with 731 additions and 0 deletions
|
|
@ -1,9 +1,11 @@
|
|||
from .gmail import GmailLoaderComponent
|
||||
from .google_bq_sql_executor import BigQueryExecutorComponent
|
||||
from .google_drive import GoogleDriveComponent
|
||||
from .google_drive_search import GoogleDriveSearchComponent
|
||||
from .google_oauth_token import GoogleOAuthToken
|
||||
|
||||
__all__ = [
|
||||
"BigQueryExecutorComponent",
|
||||
"GmailLoaderComponent",
|
||||
"GoogleDriveComponent",
|
||||
"GoogleDriveSearchComponent",
|
||||
|
|
|
|||
|
|
@ -0,0 +1,157 @@
|
|||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
from google.auth.exceptions import RefreshError
|
||||
from google.cloud import bigquery
|
||||
from google.oauth2.service_account import Credentials
|
||||
|
||||
from langflow.custom import Component
|
||||
from langflow.io import BoolInput, FileInput, MessageTextInput, Output
|
||||
from langflow.schema.dataframe import DataFrame
|
||||
|
||||
|
||||
class BigQueryExecutorComponent(Component):
|
||||
display_name = "BigQuery"
|
||||
description = "Execute SQL queries on Google BigQuery."
|
||||
name = "BigQueryExecutor"
|
||||
icon = "Google"
|
||||
beta: bool = True
|
||||
|
||||
inputs = [
|
||||
FileInput(
|
||||
name="service_account_json_file",
|
||||
display_name="Upload Service Account JSON",
|
||||
info="Upload the JSON file containing Google Cloud service account credentials.",
|
||||
file_types=["json"],
|
||||
required=True,
|
||||
),
|
||||
MessageTextInput(
|
||||
name="query",
|
||||
display_name="SQL Query",
|
||||
info="The SQL query to execute on BigQuery.",
|
||||
required=True,
|
||||
tool_mode=True,
|
||||
),
|
||||
BoolInput(
|
||||
name="clean_query",
|
||||
display_name="Clean Query",
|
||||
info="When enabled, this will automatically clean up your SQL query.",
|
||||
value=False,
|
||||
advanced=True,
|
||||
),
|
||||
]
|
||||
|
||||
outputs = [
|
||||
Output(display_name="Query Results", name="query_results", method="execute_sql"),
|
||||
]
|
||||
|
||||
def _clean_sql_query(self, query: str) -> str:
|
||||
"""Clean SQL query by removing surrounding quotes and whitespace.
|
||||
|
||||
Also extracts SQL statements from text that might contain other content.
|
||||
|
||||
Args:
|
||||
query: The SQL query to clean
|
||||
|
||||
Returns:
|
||||
The cleaned SQL query
|
||||
"""
|
||||
# First, try to extract SQL from code blocks
|
||||
sql_pattern = r"```(?:sql)?\s*([\s\S]*?)\s*```"
|
||||
sql_matches = re.findall(sql_pattern, query, re.IGNORECASE)
|
||||
|
||||
if sql_matches:
|
||||
# If we found SQL in code blocks, use the first match
|
||||
query = sql_matches[0]
|
||||
else:
|
||||
# If no code block, try to find SQL statements
|
||||
# Look for common SQL keywords at the start of lines
|
||||
sql_keywords = r"(?i)(SELECT|INSERT|UPDATE|DELETE|CREATE|ALTER|DROP|WITH|MERGE)"
|
||||
lines = query.split("\n")
|
||||
sql_lines = []
|
||||
in_sql = False
|
||||
|
||||
for _line in lines:
|
||||
line = _line.strip()
|
||||
if re.match(sql_keywords, line):
|
||||
in_sql = True
|
||||
if in_sql:
|
||||
sql_lines.append(line)
|
||||
if line.endswith(";"):
|
||||
in_sql = False
|
||||
|
||||
if sql_lines:
|
||||
query = "\n".join(sql_lines)
|
||||
|
||||
# Remove any backticks that might be at the start or end
|
||||
query = query.strip("`")
|
||||
|
||||
# Then remove surrounding quotes (single or double) if they exist
|
||||
query = query.strip()
|
||||
if (query.startswith('"') and query.endswith('"')) or (query.startswith("'") and query.endswith("'")):
|
||||
query = query[1:-1]
|
||||
|
||||
# Finally, clean up any remaining whitespace and ensure no backticks remain
|
||||
query = query.strip()
|
||||
# Remove any remaining backticks, but preserve them if they're part of a table/column name
|
||||
# This regex will remove backticks that are not part of a valid identifier
|
||||
return re.sub(r"`(?![a-zA-Z0-9_])|(?<![a-zA-Z0-9_])`", "", query)
|
||||
|
||||
def execute_sql(self) -> DataFrame:
|
||||
try:
|
||||
# First try to read the file
|
||||
try:
|
||||
service_account_path = Path(self.service_account_json_file)
|
||||
with service_account_path.open() as f:
|
||||
credentials_json = json.load(f)
|
||||
project_id = credentials_json.get("project_id")
|
||||
if not project_id:
|
||||
msg = "No project_id found in service account credentials file."
|
||||
raise ValueError(msg)
|
||||
except FileNotFoundError as e:
|
||||
msg = f"Service account file not found: {e}"
|
||||
raise ValueError(msg) from e
|
||||
except json.JSONDecodeError as e:
|
||||
msg = "Invalid JSON string for service account credentials"
|
||||
raise ValueError(msg) from e
|
||||
|
||||
# Then try to load credentials
|
||||
try:
|
||||
credentials = Credentials.from_service_account_file(self.service_account_json_file)
|
||||
except Exception as e:
|
||||
msg = f"Error loading service account credentials: {e}"
|
||||
raise ValueError(msg) from e
|
||||
|
||||
except ValueError:
|
||||
raise
|
||||
except Exception as e:
|
||||
msg = f"Error executing BigQuery SQL query: {e}"
|
||||
raise ValueError(msg) from e
|
||||
|
||||
try:
|
||||
client = bigquery.Client(credentials=credentials, project=project_id)
|
||||
|
||||
# Check for empty or whitespace-only query before cleaning
|
||||
if not str(self.query).strip():
|
||||
msg = "No valid SQL query found in input text."
|
||||
raise ValueError(msg)
|
||||
|
||||
# Always clean the query if it contains code block markers, quotes, or if clean_query is enabled
|
||||
if "```" in str(self.query) or '"' in str(self.query) or "'" in str(self.query) or self.clean_query:
|
||||
sql_query = self._clean_sql_query(str(self.query))
|
||||
else:
|
||||
sql_query = str(self.query).strip() # At minimum, strip whitespace
|
||||
|
||||
query_job = client.query(sql_query)
|
||||
results = query_job.result()
|
||||
output_dict = [dict(row) for row in results]
|
||||
|
||||
except RefreshError as e:
|
||||
msg = "Authentication error: Unable to refresh authentication token. Please try to reauthenticate."
|
||||
raise ValueError(msg) from e
|
||||
except Exception as e:
|
||||
msg = f"Error executing BigQuery SQL query: {e}"
|
||||
raise ValueError(msg) from e
|
||||
|
||||
return DataFrame(output_dict)
|
||||
|
|
@ -0,0 +1 @@
|
|||
"""Google components test package."""
|
||||
|
|
@ -0,0 +1,571 @@
|
|||
"""Tests for BigQueryExecutorComponent."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from unittest.mock import MagicMock, mock_open, patch
|
||||
|
||||
import pytest
|
||||
from google.auth.exceptions import RefreshError
|
||||
from google.oauth2.service_account import Credentials
|
||||
from langflow.components.google.google_bq_sql_executor import BigQueryExecutorComponent
|
||||
from pandas import DataFrame
|
||||
|
||||
from tests.base import ComponentTestBaseWithoutClient
|
||||
|
||||
|
||||
class TestBigQueryExecutorComponent(ComponentTestBaseWithoutClient):
|
||||
@pytest.fixture
|
||||
def component_class(self):
|
||||
"""Return the component class to test."""
|
||||
return BigQueryExecutorComponent
|
||||
|
||||
@pytest.fixture
|
||||
def mock_credentials_json(self):
|
||||
"""Return a valid service account JSON string."""
|
||||
return json.dumps(
|
||||
{
|
||||
"type": "service_account",
|
||||
"project_id": "test-project",
|
||||
"private_key_id": "fake-key-id",
|
||||
"private_key": "-----BEGIN PRIVATE KEY-----\nfake-key\n-----END PRIVATE KEY-----\n",
|
||||
"client_email": "test@project.iam.gserviceaccount.com",
|
||||
"client_id": "123456789",
|
||||
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
||||
"token_uri": "https://oauth2.googleapis.com/token",
|
||||
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
|
||||
"client_x509_cert_url": (
|
||||
"https://www.googleapis.com/robot/v1/metadata/x509/test@project.iam.gserviceaccount.com"
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
@pytest.fixture
|
||||
def service_account_file(self, tmp_path, mock_credentials_json):
|
||||
"""Write service account JSON to a temp file and return its path."""
|
||||
f = tmp_path / "sa.json"
|
||||
f.write_text(mock_credentials_json)
|
||||
return str(f)
|
||||
|
||||
@pytest.fixture
|
||||
def default_kwargs(self, service_account_file):
|
||||
"""Return default kwargs for component instantiation."""
|
||||
return {
|
||||
"service_account_json_file": service_account_file,
|
||||
"query": "SELECT 1",
|
||||
}
|
||||
|
||||
@pytest.fixture
|
||||
def file_names_mapping(self):
|
||||
"""No version-specific files for this component."""
|
||||
return []
|
||||
|
||||
@patch.object(Credentials, "from_service_account_file")
|
||||
@patch("langflow.components.google.google_bq_sql_executor.bigquery.Client")
|
||||
def test_execute_sql_success(self, mock_client_cls, mock_from_file, component_class, default_kwargs):
|
||||
"""Test successful SQL execution and component side-effects."""
|
||||
# Arrange mocks
|
||||
mock_creds = MagicMock(spec=Credentials)
|
||||
mock_from_file.return_value = mock_creds
|
||||
|
||||
# Create a mock row that can be converted to a dict
|
||||
mock_row = MagicMock()
|
||||
mock_row.items.return_value = [("column1", "value1")]
|
||||
mock_row.__iter__.return_value = iter([("column1", "value1")])
|
||||
mock_row.keys.return_value = ["column1"]
|
||||
mock_row.to_numpy.return_value = ["value1"] # Changed from values to to_numpy
|
||||
mock_row.__getitem__.return_value = "value1"
|
||||
|
||||
# Create mock result with the mock row
|
||||
mock_result = MagicMock()
|
||||
mock_result.__iter__.return_value = iter([mock_row])
|
||||
|
||||
# Create mock job with the mock result
|
||||
mock_job = MagicMock()
|
||||
mock_job.result.return_value = mock_result
|
||||
|
||||
# Create mock client with the mock job
|
||||
mock_client = MagicMock()
|
||||
mock_client.query.return_value = mock_job
|
||||
mock_client_cls.return_value = mock_client
|
||||
|
||||
# Instantiate component with defaults
|
||||
component = component_class(**default_kwargs)
|
||||
|
||||
# Execute
|
||||
result = component.execute_sql()
|
||||
|
||||
# Verify the result
|
||||
assert isinstance(result, DataFrame)
|
||||
assert len(result) == 1 # Check number of rows
|
||||
assert "column1" in result.columns # Check column exists
|
||||
assert result.iloc[0]["column1"] == "value1" # Check value
|
||||
|
||||
# Verify the mocks were called correctly
|
||||
mock_from_file.assert_called_once_with(default_kwargs["service_account_json_file"])
|
||||
mock_client_cls.assert_called_once_with(credentials=mock_creds, project="test-project")
|
||||
mock_client.query.assert_called_once_with(default_kwargs["query"])
|
||||
|
||||
@pytest.mark.parametrize("q", ["", " \n\t "])
|
||||
@patch.object(Credentials, "from_service_account_file")
|
||||
@patch("langflow.components.google.google_bq_sql_executor.bigquery.Client")
|
||||
def test_empty_query_raises(self, mock_client_cls, mock_from_file, component_class, service_account_file, q):
|
||||
"""Empty or whitespace-only queries should raise a ValueError."""
|
||||
# Create a proper mock credentials object
|
||||
mock_creds = MagicMock(spec=Credentials)
|
||||
mock_from_file.return_value = mock_creds
|
||||
|
||||
# Mock the BigQuery client
|
||||
mock_client = MagicMock()
|
||||
mock_client_cls.return_value = mock_client
|
||||
|
||||
# Create component with empty/whitespace query
|
||||
component = component_class(
|
||||
service_account_json_file=service_account_file,
|
||||
query=q,
|
||||
)
|
||||
|
||||
# Verify that execute_sql raises ValueError for empty/whitespace queries
|
||||
expected_error = "No valid SQL query found in input text."
|
||||
with pytest.raises(ValueError, match=expected_error):
|
||||
component.execute_sql()
|
||||
|
||||
# Verify that the BigQuery client was not called
|
||||
mock_client.query.assert_not_called()
|
||||
|
||||
def test_missing_service_account_file(self, component_class):
|
||||
"""Non-existent service account file should raise a ValueError."""
|
||||
component = component_class(
|
||||
service_account_json_file="/no/such/file.json",
|
||||
query="SELECT 1",
|
||||
)
|
||||
expected_error = "Service account file not found"
|
||||
with pytest.raises(ValueError, match=expected_error):
|
||||
component.execute_sql()
|
||||
|
||||
def test_invalid_service_account_json(self, component_class):
|
||||
"""Invalid JSON in service account file should raise a ValueError."""
|
||||
with patch("pathlib.Path.open", mock_open(read_data="invalid json")):
|
||||
component = component_class(
|
||||
service_account_json_file="ignored.json",
|
||||
query="SELECT 1",
|
||||
)
|
||||
expected_error = "Invalid JSON string for service account credentials"
|
||||
with pytest.raises(ValueError, match=expected_error):
|
||||
component.execute_sql()
|
||||
|
||||
@patch.object(Credentials, "from_service_account_file")
|
||||
@patch("langflow.components.google.google_bq_sql_executor.bigquery.Client")
|
||||
def test_execute_sql_invalid_query(self, mock_client_cls, mock_from_file, component_class, default_kwargs):
|
||||
"""SQL execution errors should be wrapped in ValueError."""
|
||||
mock_from_file.return_value = MagicMock()
|
||||
fake_client = MagicMock()
|
||||
mock_client_cls.return_value = fake_client
|
||||
fake_client.query.side_effect = Exception("Invalid query syntax")
|
||||
|
||||
component = component_class(**default_kwargs)
|
||||
with pytest.raises(ValueError, match="Error executing BigQuery SQL query: Invalid query syntax"):
|
||||
component.execute_sql()
|
||||
|
||||
@patch.object(Credentials, "from_service_account_file")
|
||||
@patch("langflow.components.google.google_bq_sql_executor.bigquery.Client")
|
||||
def test_refresh_error_handling(self, mock_client_cls, mock_from_file, component_class, default_kwargs):
|
||||
"""RefreshError should produce an authentication ValueError."""
|
||||
mock_from_file.return_value = MagicMock()
|
||||
fake_client = MagicMock()
|
||||
mock_client_cls.return_value = fake_client
|
||||
fake_client.query.side_effect = RefreshError("Token expired")
|
||||
|
||||
component = component_class(**default_kwargs)
|
||||
with pytest.raises(ValueError, match="Authentication error: Unable to refresh authentication token."):
|
||||
component.execute_sql()
|
||||
|
||||
@patch.object(Credentials, "from_service_account_file")
|
||||
@patch("langflow.components.google.google_bq_sql_executor.bigquery.Client")
|
||||
def test_complex_query_result(self, mock_client_cls, mock_from_file, component_class, default_kwargs):
|
||||
"""Complex row structures should be correctly serialized to DataFrame."""
|
||||
# Arrange mocks
|
||||
mock_creds = MagicMock(spec=Credentials)
|
||||
mock_from_file.return_value = mock_creds
|
||||
|
||||
# Create mock rows with complex data
|
||||
mock_row1 = MagicMock()
|
||||
mock_row1.items.return_value = [("id", 1), ("name", "Test 1"), ("value", 10.5), ("active", True)]
|
||||
mock_row1.__iter__.return_value = iter([("id", 1), ("name", "Test 1"), ("value", 10.5), ("active", True)])
|
||||
mock_row1.keys.return_value = ["id", "name", "value", "active"]
|
||||
mock_row1.to_numpy.return_value = [1, "Test 1", 10.5, True] # Changed from values to to_numpy
|
||||
mock_row1.__getitem__.side_effect = lambda key: {"id": 1, "name": "Test 1", "value": 10.5, "active": True}[key]
|
||||
|
||||
mock_row2 = MagicMock()
|
||||
mock_row2.items.return_value = [("id", 2), ("name", "Test 2"), ("value", 20.75), ("active", False)]
|
||||
mock_row2.__iter__.return_value = iter([("id", 2), ("name", "Test 2"), ("value", 20.75), ("active", False)])
|
||||
mock_row2.keys.return_value = ["id", "name", "value", "active"]
|
||||
mock_row2.to_numpy.return_value = [2, "Test 2", 20.75, False] # Changed from values to to_numpy
|
||||
mock_row2.__getitem__.side_effect = lambda key: {"id": 2, "name": "Test 2", "value": 20.75, "active": False}[
|
||||
key
|
||||
]
|
||||
|
||||
# Create mock result with the mock rows
|
||||
mock_result = MagicMock()
|
||||
mock_result.__iter__.return_value = iter([mock_row1, mock_row2])
|
||||
|
||||
# Create mock job with the mock result
|
||||
mock_job = MagicMock()
|
||||
mock_job.result.return_value = mock_result
|
||||
|
||||
# Create mock client with the mock job
|
||||
mock_client = MagicMock()
|
||||
mock_client.query.return_value = mock_job
|
||||
mock_client_cls.return_value = mock_client
|
||||
|
||||
# Instantiate component with defaults
|
||||
component = component_class(**default_kwargs)
|
||||
|
||||
# Execute
|
||||
result = component.execute_sql()
|
||||
|
||||
# Verify the result
|
||||
assert isinstance(result, DataFrame)
|
||||
assert len(result) == 2 # Check number of rows
|
||||
assert list(result.columns) == ["id", "name", "value", "active"] # Check columns
|
||||
|
||||
# Convert DataFrame to dictionary for easier comparison
|
||||
result_dict = result.to_dict(orient="records")
|
||||
|
||||
# Verify first row
|
||||
assert result_dict[0]["id"] == 1
|
||||
assert result_dict[0]["name"] == "Test 1"
|
||||
assert result_dict[0]["value"] == 10.5
|
||||
assert result_dict[0]["active"] is True
|
||||
|
||||
# Verify second row
|
||||
assert result_dict[1]["id"] == 2
|
||||
assert result_dict[1]["name"] == "Test 2"
|
||||
assert result_dict[1]["value"] == 20.75
|
||||
assert result_dict[1]["active"] is False
|
||||
|
||||
# Verify the mocks were called correctly
|
||||
mock_from_file.assert_called_once_with(default_kwargs["service_account_json_file"])
|
||||
mock_client_cls.assert_called_once_with(credentials=mock_creds, project="test-project")
|
||||
mock_client.query.assert_called_once_with(default_kwargs["query"])
|
||||
|
||||
@patch.object(Credentials, "from_service_account_file")
|
||||
@patch("langflow.components.google.google_bq_sql_executor.bigquery.Client")
|
||||
def test_query_with_sql_code_block(self, mock_client_cls, mock_from_file, component_class, default_kwargs):
|
||||
"""Test that queries with SQL code blocks are properly handled."""
|
||||
mock_from_file.return_value = MagicMock()
|
||||
fake_client = MagicMock()
|
||||
mock_client_cls.return_value = fake_client
|
||||
|
||||
query_with_code_block = "```sql\nSELECT * FROM table\n```"
|
||||
component = component_class(**{**default_kwargs, "query": query_with_code_block, "clean_query": True})
|
||||
|
||||
result = component.execute_sql()
|
||||
|
||||
# Verify the query was properly cleaned (code block markers removed)
|
||||
fake_client.query.assert_called_once_with("SELECT * FROM table")
|
||||
assert isinstance(result, DataFrame)
|
||||
|
||||
@patch.object(Credentials, "from_service_account_file")
|
||||
@patch("langflow.components.google.google_bq_sql_executor.bigquery.Client")
|
||||
def test_query_with_whitespace(self, mock_client_cls, mock_from_file, component_class, default_kwargs):
|
||||
"""Test that queries with extra whitespace are properly handled."""
|
||||
# Arrange mocks
|
||||
mock_creds = MagicMock(spec=Credentials)
|
||||
mock_from_file.return_value = mock_creds
|
||||
|
||||
# Create a mock row that can be converted to a dict
|
||||
mock_row = MagicMock()
|
||||
mock_row.items.return_value = [("column1", "value1")]
|
||||
mock_row.__iter__.return_value = iter([("column1", "value1")])
|
||||
mock_row.keys.return_value = ["column1"]
|
||||
mock_row.to_numpy.return_value = ["value1"] # Changed from values to to_numpy
|
||||
mock_row.__getitem__.return_value = "value1"
|
||||
|
||||
# Create mock result with the mock row
|
||||
mock_result = MagicMock()
|
||||
mock_result.__iter__.return_value = iter([mock_row])
|
||||
|
||||
# Create mock job with the mock result
|
||||
mock_job = MagicMock()
|
||||
mock_job.result.return_value = mock_result
|
||||
|
||||
# Create mock client with the mock job
|
||||
mock_client = MagicMock()
|
||||
mock_client.query.return_value = mock_job
|
||||
mock_client_cls.return_value = mock_client
|
||||
|
||||
query_with_whitespace = " SELECT * FROM table "
|
||||
component = component_class(**{**default_kwargs, "query": query_with_whitespace, "clean_query": True})
|
||||
|
||||
result = component.execute_sql()
|
||||
|
||||
# Verify the query was properly stripped
|
||||
mock_client.query.assert_called_once_with("SELECT * FROM table")
|
||||
assert isinstance(result, DataFrame)
|
||||
assert len(result) == 1 # Check number of rows
|
||||
assert "column1" in result.columns # Check column exists
|
||||
assert result.iloc[0]["column1"] == "value1" # Check value
|
||||
|
||||
@patch.object(Credentials, "from_service_account_file")
|
||||
@patch("langflow.components.google.google_bq_sql_executor.bigquery.Client")
|
||||
def test_query_with_special_characters(self, mock_client_cls, mock_from_file, component_class, default_kwargs):
|
||||
"""Test that queries with special characters are properly handled."""
|
||||
# Arrange mocks
|
||||
mock_creds = MagicMock(spec=Credentials)
|
||||
mock_from_file.return_value = mock_creds
|
||||
|
||||
# Create a mock row that can be converted to a dict
|
||||
mock_row = MagicMock()
|
||||
mock_row.items.return_value = [("name", "test_value")]
|
||||
mock_row.__iter__.return_value = iter([("name", "test_value")])
|
||||
mock_row.keys.return_value = ["name"]
|
||||
mock_row.to_numpy.return_value = ["test_value"] # Changed from values to to_numpy
|
||||
mock_row.__getitem__.return_value = "test_value"
|
||||
|
||||
# Create mock result with the mock row
|
||||
mock_result = MagicMock()
|
||||
mock_result.__iter__.return_value = iter([mock_row])
|
||||
|
||||
# Create mock job with the mock result
|
||||
mock_job = MagicMock()
|
||||
mock_job.result.return_value = mock_result
|
||||
|
||||
# Create mock client with the mock job
|
||||
mock_client = MagicMock()
|
||||
mock_client.query.return_value = mock_job
|
||||
mock_client_cls.return_value = mock_client
|
||||
|
||||
query_with_special_chars = "SELECT * FROM project.dataset.table WHERE name LIKE '%test%'"
|
||||
component = component_class(**{**default_kwargs, "query": query_with_special_chars})
|
||||
|
||||
result = component.execute_sql()
|
||||
|
||||
# Verify the query with special characters was passed correctly
|
||||
mock_client.query.assert_called_once_with(query_with_special_chars)
|
||||
assert isinstance(result, DataFrame)
|
||||
assert len(result) == 1 # Check number of rows
|
||||
assert "name" in result.columns # Check column exists
|
||||
assert result.iloc[0]["name"] == "test_value" # Check value
|
||||
|
||||
@patch.object(Credentials, "from_service_account_file")
|
||||
@patch("langflow.components.google.google_bq_sql_executor.bigquery.Client")
|
||||
def test_query_with_multiple_statements(self, mock_client_cls, mock_from_file, component_class, default_kwargs):
|
||||
"""Test that queries with multiple statements are properly handled."""
|
||||
# Arrange mocks
|
||||
mock_creds = MagicMock(spec=Credentials)
|
||||
mock_from_file.return_value = mock_creds
|
||||
|
||||
# Create a mock row that can be converted to a dict
|
||||
mock_row = MagicMock()
|
||||
mock_row.items.return_value = [("id", 1)]
|
||||
mock_row.__iter__.return_value = iter([("id", 1)])
|
||||
mock_row.keys.return_value = ["id"]
|
||||
mock_row.to_numpy.return_value = [1] # Changed from values to to_numpy
|
||||
mock_row.__getitem__.return_value = 1
|
||||
|
||||
# Create mock result with the mock row
|
||||
mock_result = MagicMock()
|
||||
mock_result.__iter__.return_value = iter([mock_row])
|
||||
|
||||
# Create mock job with the mock result
|
||||
mock_job = MagicMock()
|
||||
mock_job.result.return_value = mock_result
|
||||
|
||||
# Create mock client with the mock job
|
||||
mock_client = MagicMock()
|
||||
mock_client.query.return_value = mock_job
|
||||
mock_client_cls.return_value = mock_client
|
||||
|
||||
multi_statement_query = (
|
||||
"CREATE TABLE IF NOT EXISTS test_table (id INT64);\n"
|
||||
"INSERT INTO test_table VALUES (1);\n"
|
||||
"SELECT * FROM test_table;"
|
||||
)
|
||||
component = component_class(**{**default_kwargs, "query": multi_statement_query})
|
||||
|
||||
result = component.execute_sql()
|
||||
|
||||
# Verify the multi-statement query was passed correctly
|
||||
mock_client.query.assert_called_once_with(multi_statement_query)
|
||||
assert isinstance(result, DataFrame)
|
||||
assert len(result) == 1 # Check number of rows
|
||||
assert "id" in result.columns # Check column exists
|
||||
assert result.iloc[0]["id"] == 1 # Check value
|
||||
|
||||
@patch.object(Credentials, "from_service_account_file")
|
||||
@patch("langflow.components.google.google_bq_sql_executor.bigquery.Client")
|
||||
def test_query_with_parameters(self, mock_client_cls, mock_from_file, component_class, default_kwargs):
|
||||
"""Test that queries with parameters are properly handled."""
|
||||
# Arrange mocks
|
||||
mock_creds = MagicMock(spec=Credentials)
|
||||
mock_from_file.return_value = mock_creds
|
||||
|
||||
# Create a mock row that can be converted to a dict
|
||||
mock_row = MagicMock()
|
||||
mock_row.items.return_value = [("id", 1), ("name", "test_name")]
|
||||
mock_row.__iter__.return_value = iter([("id", 1), ("name", "test_name")])
|
||||
mock_row.keys.return_value = ["id", "name"]
|
||||
mock_row.to_numpy.return_value = [1, "test_name"] # Changed from values to to_numpy
|
||||
mock_row.__getitem__.side_effect = lambda key: {"id": 1, "name": "test_name"}[key]
|
||||
|
||||
# Create mock result with the mock row
|
||||
mock_result = MagicMock()
|
||||
mock_result.__iter__.return_value = iter([mock_row])
|
||||
|
||||
# Create mock job with the mock result
|
||||
mock_job = MagicMock()
|
||||
mock_job.result.return_value = mock_result
|
||||
|
||||
# Create mock client with the mock job
|
||||
mock_client = MagicMock()
|
||||
mock_client.query.return_value = mock_job
|
||||
mock_client_cls.return_value = mock_client
|
||||
|
||||
query_with_params = "SELECT * FROM table WHERE id = @id AND name = @name"
|
||||
component = component_class(**{**default_kwargs, "query": query_with_params})
|
||||
|
||||
result = component.execute_sql()
|
||||
|
||||
# Verify the parameterized query was passed correctly
|
||||
mock_client.query.assert_called_once_with(query_with_params)
|
||||
assert isinstance(result, DataFrame)
|
||||
assert len(result) == 1 # Check number of rows
|
||||
assert list(result.columns) == ["id", "name"] # Check columns
|
||||
assert result.iloc[0]["id"] == 1 # Check id value
|
||||
assert result.iloc[0]["name"] == "test_name" # Check name value
|
||||
|
||||
def test_missing_project_id_in_credentials(self, component_class, tmp_path):
|
||||
"""Test that missing project_id in credentials raises an error."""
|
||||
# Create a service account JSON without project_id
|
||||
invalid_credentials = {
|
||||
"type": "service_account",
|
||||
"private_key_id": "fake-key-id",
|
||||
"private_key": "-----BEGIN PRIVATE KEY-----\nfake-key\n-----END PRIVATE KEY-----\n",
|
||||
"client_email": "test@project.iam.gserviceaccount.com",
|
||||
"client_id": "123456789",
|
||||
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
||||
"token_uri": "https://oauth2.googleapis.com/token",
|
||||
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
|
||||
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/test@project.iam.gserviceaccount.com",
|
||||
}
|
||||
|
||||
# Write invalid credentials to a temp file
|
||||
f = tmp_path / "invalid_sa.json"
|
||||
f.write_text(json.dumps(invalid_credentials))
|
||||
|
||||
component = component_class(
|
||||
service_account_json_file=str(f),
|
||||
query="SELECT 1",
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match="No project_id found in service account credentials file"):
|
||||
component.execute_sql()
|
||||
|
||||
@patch.object(Credentials, "from_service_account_file")
|
||||
@patch("langflow.components.google.google_bq_sql_executor.bigquery.Client")
|
||||
def test_query_with_quotes(self, mock_client_cls, mock_from_file, component_class, default_kwargs):
|
||||
"""Test that queries wrapped in quotes are properly handled."""
|
||||
# Arrange mocks
|
||||
mock_creds = MagicMock(spec=Credentials)
|
||||
mock_from_file.return_value = mock_creds
|
||||
|
||||
# Create a mock row that can be converted to a dict
|
||||
mock_row = MagicMock()
|
||||
mock_row.items.return_value = [("column1", "value1")]
|
||||
mock_row.__iter__.return_value = iter([("column1", "value1")])
|
||||
mock_row.keys.return_value = ["column1"]
|
||||
mock_row.to_numpy.return_value = ["value1"] # Changed from values to to_numpy
|
||||
mock_row.__getitem__.return_value = "value1"
|
||||
|
||||
# Create mock result with the mock row
|
||||
mock_result = MagicMock()
|
||||
mock_result.__iter__.return_value = iter([mock_row])
|
||||
|
||||
# Create mock job with the mock result
|
||||
mock_job = MagicMock()
|
||||
mock_job.result.return_value = mock_result
|
||||
|
||||
# Create mock client with the mock job
|
||||
mock_client = MagicMock()
|
||||
mock_client.query.return_value = mock_job
|
||||
mock_client_cls.return_value = mock_client
|
||||
|
||||
# Test with double quotes
|
||||
query_with_double_quotes = '"SELECT * FROM table"'
|
||||
component = component_class(**{**default_kwargs, "query": query_with_double_quotes, "clean_query": True})
|
||||
result = component.execute_sql()
|
||||
mock_client.query.assert_called_once_with("SELECT * FROM table")
|
||||
assert isinstance(result, DataFrame)
|
||||
|
||||
# Reset mocks for next test
|
||||
mock_client.reset_mock()
|
||||
|
||||
# Test with single quotes
|
||||
query_with_single_quotes = "'SELECT * FROM table'"
|
||||
component = component_class(**{**default_kwargs, "query": query_with_single_quotes, "clean_query": True})
|
||||
result = component.execute_sql()
|
||||
mock_client.query.assert_called_once_with("SELECT * FROM table")
|
||||
assert isinstance(result, DataFrame)
|
||||
|
||||
# Reset mocks for next test
|
||||
mock_client.reset_mock()
|
||||
|
||||
# Test with SQL code block
|
||||
query_with_code_block = "```sql\nSELECT * FROM table\n```"
|
||||
component = component_class(**{**default_kwargs, "query": query_with_code_block, "clean_query": True})
|
||||
result = component.execute_sql()
|
||||
mock_client.query.assert_called_once_with("SELECT * FROM table")
|
||||
assert isinstance(result, DataFrame)
|
||||
|
||||
# Reset mocks for next test
|
||||
mock_client.reset_mock()
|
||||
|
||||
# Test with SQL code block and quotes
|
||||
query_with_code_block_and_quotes = '```sql\n"SELECT * FROM table"\n```'
|
||||
component = component_class(
|
||||
**{**default_kwargs, "query": query_with_code_block_and_quotes, "clean_query": True}
|
||||
)
|
||||
result = component.execute_sql()
|
||||
mock_client.query.assert_called_once_with("SELECT * FROM table")
|
||||
assert isinstance(result, DataFrame)
|
||||
|
||||
# Reset mocks for next test
|
||||
mock_client.reset_mock()
|
||||
|
||||
# Test with just backticks
|
||||
query_with_backticks = "`SELECT * FROM table`"
|
||||
component = component_class(**{**default_kwargs, "query": query_with_backticks, "clean_query": True})
|
||||
result = component.execute_sql()
|
||||
mock_client.query.assert_called_once_with("SELECT * FROM table")
|
||||
assert isinstance(result, DataFrame)
|
||||
|
||||
# Reset mocks for next test
|
||||
mock_client.reset_mock()
|
||||
|
||||
# Test with mixed markers
|
||||
query_with_mixed = '```sql\n`"SELECT * FROM table"`\n```'
|
||||
component = component_class(**{**default_kwargs, "query": query_with_mixed, "clean_query": True})
|
||||
result = component.execute_sql()
|
||||
mock_client.query.assert_called_once_with("SELECT * FROM table")
|
||||
assert isinstance(result, DataFrame)
|
||||
|
||||
# Reset mocks for next test
|
||||
mock_client.reset_mock()
|
||||
|
||||
# Test with backticks in the middle of the query
|
||||
query_with_middle_backticks = "SELECT * FROM project.dataset.table"
|
||||
component = component_class(**{**default_kwargs, "query": query_with_middle_backticks, "clean_query": True})
|
||||
result = component.execute_sql()
|
||||
mock_client.query.assert_called_once_with("SELECT * FROM project.dataset.table")
|
||||
assert isinstance(result, DataFrame)
|
||||
|
||||
# Reset mocks for next test
|
||||
mock_client.reset_mock()
|
||||
|
||||
# Test with multiple backticks in the query
|
||||
query_with_multiple_backticks = "SELECT * FROM project.dataset.table WHERE column = 'value'"
|
||||
component = component_class(**{**default_kwargs, "query": query_with_multiple_backticks, "clean_query": True})
|
||||
result = component.execute_sql()
|
||||
mock_client.query.assert_called_once_with("SELECT * FROM project.dataset.table WHERE column = 'value'")
|
||||
assert isinstance(result, DataFrame)
|
||||
Loading…
Add table
Add a link
Reference in a new issue