diff --git a/setup.py b/setup.py index 5474c9e962..508b358794 100644 --- a/setup.py +++ b/setup.py @@ -106,7 +106,11 @@ setup( ], extras_require={ "athena": ["pyathena>=1.10.8,<1.11"], - "bigquery": ["pandas_gbq>=0.10.0", "pybigquery>=0.4.10"], + "bigquery": [ + "pandas_gbq>=0.10.0", + "pybigquery>=0.4.10", + "google-cloud-bigquery>=2.4.0", + ], "clickhouse": ["clickhouse-sqlalchemy>= 0.1.4, <0.2"], "cockroachdb": ["cockroachdb>=0.3.5, <0.4"], "cors": ["flask-cors>=2.0.0"], diff --git a/superset/db_engine_specs/base.py b/superset/db_engine_specs/base.py index c143d962da..43f185c431 100644 --- a/superset/db_engine_specs/base.py +++ b/superset/db_engine_specs/base.py @@ -156,6 +156,7 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods arraysize = 0 max_column_name_length = 0 try_remove_schema_from_table_name = True # pylint: disable=invalid-name + run_multiple_statements_as_one = False # default matching patterns for identifying column types db_column_types: Dict[utils.DbColumnType, Tuple[Pattern[Any], ...]] = { @@ -454,7 +455,7 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods @staticmethod def csv_to_df(**kwargs: Any) -> pd.DataFrame: - """ Read csv into Pandas DataFrame + """Read csv into Pandas DataFrame :param kwargs: params to be passed to DataFrame.read_csv :return: Pandas DataFrame containing data from csv """ @@ -466,7 +467,7 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods @classmethod def df_to_sql(cls, df: pd.DataFrame, **kwargs: Any) -> None: - """ Upload data from a Pandas DataFrame to a database. For + """Upload data from a Pandas DataFrame to a database. For regular engines this calls the DataFrame.to_sql() method. Can be overridden for engines that don't work well with to_sql(), e.g. BigQuery. diff --git a/superset/db_engine_specs/bigquery.py b/superset/db_engine_specs/bigquery.py index 7c783a2f8e..574a439cb8 100644 --- a/superset/db_engine_specs/bigquery.py +++ b/superset/db_engine_specs/bigquery.py @@ -39,6 +39,10 @@ class BigQueryEngineSpec(BaseEngineSpec): engine_name = "Google BigQuery" max_column_name_length = 128 + # BigQuery doesn't maintain context when running multiple statements in the + # same cursor, so we need to run all statements at once + run_multiple_statements_as_one = True + """ https://www.python.org/dev/peps/pep-0249/#arraysize raw_connections bypass the pybigquery query execution context and deal with diff --git a/superset/sql_lab.py b/superset/sql_lab.py index 21aa5b1fa8..8001498894 100644 --- a/superset/sql_lab.py +++ b/superset/sql_lab.py @@ -296,7 +296,7 @@ def _serialize_and_expand_data( return (data, selected_columns, all_columns, expanded_columns) -def execute_sql_statements( # pylint: disable=too-many-arguments, too-many-locals, too-many-statements +def execute_sql_statements( # pylint: disable=too-many-arguments, too-many-locals, too-many-statements, too-many-branches query_id: int, rendered_query: str, return_results: bool, @@ -322,9 +322,15 @@ def execute_sql_statements( # pylint: disable=too-many-arguments, too-many-loca raise SqlLabException("Results backend isn't configured.") # Breaking down into multiple statements - parsed_query = ParsedQuery(rendered_query) - statements = parsed_query.get_statements() - logger.info("Query %s: Executing %i statement(s)", str(query_id), len(statements)) + if not db_engine_spec.run_multiple_statements_as_one: + parsed_query = ParsedQuery(rendered_query) + statements = parsed_query.get_statements() + logger.info( + "Query %s: Executing %i statement(s)", str(query_id), len(statements) + ) + else: + statements = [rendered_query] + logger.info("Query %s: Executing query as a single statement", str(query_id)) logger.info("Query %s: Set query to 'running'", str(query_id)) query.status = QueryStatus.RUNNING