mirror of https://github.com/apache/superset.git
feat: run BQ queries as single statement (#11904)
* feat: run BQ queries as single statement * Update deps * Fix lint * Update superset/sql_lab.py Co-authored-by: Ville Brofeldt <33317356+villebro@users.noreply.github.com> Co-authored-by: Ville Brofeldt <33317356+villebro@users.noreply.github.com>
This commit is contained in:
parent
04f993e222
commit
54bf70733f
6
setup.py
6
setup.py
|
@ -106,7 +106,11 @@ setup(
|
||||||
],
|
],
|
||||||
extras_require={
|
extras_require={
|
||||||
"athena": ["pyathena>=1.10.8,<1.11"],
|
"athena": ["pyathena>=1.10.8,<1.11"],
|
||||||
"bigquery": ["pandas_gbq>=0.10.0", "pybigquery>=0.4.10"],
|
"bigquery": [
|
||||||
|
"pandas_gbq>=0.10.0",
|
||||||
|
"pybigquery>=0.4.10",
|
||||||
|
"google-cloud-bigquery>=2.4.0",
|
||||||
|
],
|
||||||
"clickhouse": ["clickhouse-sqlalchemy>= 0.1.4, <0.2"],
|
"clickhouse": ["clickhouse-sqlalchemy>= 0.1.4, <0.2"],
|
||||||
"cockroachdb": ["cockroachdb>=0.3.5, <0.4"],
|
"cockroachdb": ["cockroachdb>=0.3.5, <0.4"],
|
||||||
"cors": ["flask-cors>=2.0.0"],
|
"cors": ["flask-cors>=2.0.0"],
|
||||||
|
|
|
@ -156,6 +156,7 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods
|
||||||
arraysize = 0
|
arraysize = 0
|
||||||
max_column_name_length = 0
|
max_column_name_length = 0
|
||||||
try_remove_schema_from_table_name = True # pylint: disable=invalid-name
|
try_remove_schema_from_table_name = True # pylint: disable=invalid-name
|
||||||
|
run_multiple_statements_as_one = False
|
||||||
|
|
||||||
# default matching patterns for identifying column types
|
# default matching patterns for identifying column types
|
||||||
db_column_types: Dict[utils.DbColumnType, Tuple[Pattern[Any], ...]] = {
|
db_column_types: Dict[utils.DbColumnType, Tuple[Pattern[Any], ...]] = {
|
||||||
|
@ -454,7 +455,7 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def csv_to_df(**kwargs: Any) -> pd.DataFrame:
|
def csv_to_df(**kwargs: Any) -> pd.DataFrame:
|
||||||
""" Read csv into Pandas DataFrame
|
"""Read csv into Pandas DataFrame
|
||||||
:param kwargs: params to be passed to DataFrame.read_csv
|
:param kwargs: params to be passed to DataFrame.read_csv
|
||||||
:return: Pandas DataFrame containing data from csv
|
:return: Pandas DataFrame containing data from csv
|
||||||
"""
|
"""
|
||||||
|
@ -466,7 +467,7 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def df_to_sql(cls, df: pd.DataFrame, **kwargs: Any) -> None:
|
def df_to_sql(cls, df: pd.DataFrame, **kwargs: Any) -> None:
|
||||||
""" Upload data from a Pandas DataFrame to a database. For
|
"""Upload data from a Pandas DataFrame to a database. For
|
||||||
regular engines this calls the DataFrame.to_sql() method. Can be
|
regular engines this calls the DataFrame.to_sql() method. Can be
|
||||||
overridden for engines that don't work well with to_sql(), e.g.
|
overridden for engines that don't work well with to_sql(), e.g.
|
||||||
BigQuery.
|
BigQuery.
|
||||||
|
|
|
@ -39,6 +39,10 @@ class BigQueryEngineSpec(BaseEngineSpec):
|
||||||
engine_name = "Google BigQuery"
|
engine_name = "Google BigQuery"
|
||||||
max_column_name_length = 128
|
max_column_name_length = 128
|
||||||
|
|
||||||
|
# BigQuery doesn't maintain context when running multiple statements in the
|
||||||
|
# same cursor, so we need to run all statements at once
|
||||||
|
run_multiple_statements_as_one = True
|
||||||
|
|
||||||
"""
|
"""
|
||||||
https://www.python.org/dev/peps/pep-0249/#arraysize
|
https://www.python.org/dev/peps/pep-0249/#arraysize
|
||||||
raw_connections bypass the pybigquery query execution context and deal with
|
raw_connections bypass the pybigquery query execution context and deal with
|
||||||
|
|
|
@ -296,7 +296,7 @@ def _serialize_and_expand_data(
|
||||||
return (data, selected_columns, all_columns, expanded_columns)
|
return (data, selected_columns, all_columns, expanded_columns)
|
||||||
|
|
||||||
|
|
||||||
def execute_sql_statements( # pylint: disable=too-many-arguments, too-many-locals, too-many-statements
|
def execute_sql_statements( # pylint: disable=too-many-arguments, too-many-locals, too-many-statements, too-many-branches
|
||||||
query_id: int,
|
query_id: int,
|
||||||
rendered_query: str,
|
rendered_query: str,
|
||||||
return_results: bool,
|
return_results: bool,
|
||||||
|
@ -322,9 +322,15 @@ def execute_sql_statements( # pylint: disable=too-many-arguments, too-many-loca
|
||||||
raise SqlLabException("Results backend isn't configured.")
|
raise SqlLabException("Results backend isn't configured.")
|
||||||
|
|
||||||
# Breaking down into multiple statements
|
# Breaking down into multiple statements
|
||||||
parsed_query = ParsedQuery(rendered_query)
|
if not db_engine_spec.run_multiple_statements_as_one:
|
||||||
statements = parsed_query.get_statements()
|
parsed_query = ParsedQuery(rendered_query)
|
||||||
logger.info("Query %s: Executing %i statement(s)", str(query_id), len(statements))
|
statements = parsed_query.get_statements()
|
||||||
|
logger.info(
|
||||||
|
"Query %s: Executing %i statement(s)", str(query_id), len(statements)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
statements = [rendered_query]
|
||||||
|
logger.info("Query %s: Executing query as a single statement", str(query_id))
|
||||||
|
|
||||||
logger.info("Query %s: Set query to 'running'", str(query_id))
|
logger.info("Query %s: Set query to 'running'", str(query_id))
|
||||||
query.status = QueryStatus.RUNNING
|
query.status = QueryStatus.RUNNING
|
||||||
|
|
Loading…
Reference in New Issue