Fetch a batch of rows from bigquery (#5632)

* Fetch a batch of rows from bigquery * unused const * review comments
2018-08-14 21:44:04 -07:00 · 2018-08-14 21:44:04 -07:00 · c9bd5a6167
parent 46f89f4516
commit c9bd5a6167
1 changed files with 15 additions and 0 deletions
--- a/superset/db_engine_specs.py
+++ b/superset/db_engine_specs.py
@ -102,6 +102,7 @@ class BaseEngineSpec(object):
    inner_joins = True
    allows_subquery = True
    consistent_case_sensitivity = True  # do results have same case as qry for col names?
+    arraysize = None

    @classmethod
    def get_time_grains(cls):
@ -115,6 +116,8 @@ class BaseEngineSpec(object):

    @classmethod
    def fetch_data(cls, cursor, limit):
+        if cls.arraysize:
+            cursor.arraysize = cls.arraysize
        if cls.limit_method == LimitMethod.FETCH_MANY:
            return cursor.fetchmany(limit)
        return cursor.fetchall()
@ -1367,6 +1370,18 @@ class BQEngineSpec(BaseEngineSpec):
    As contributed by @mxmzdlv on issue #945"""
    engine = 'bigquery'

+    """
+    https://www.python.org/dev/peps/pep-0249/#arraysize
+    raw_connections bypass the pybigquery query execution context and deal with
+    raw dbapi connection directly.
+    If this value is not set, the default value is set to 1, as described here,
+    https://googlecloudplatform.github.io/google-cloud-python/latest/_modules/google/cloud/bigquery/dbapi/cursor.html#Cursor
+
+    The default value of 5000 is derived from the pybigquery.
+    https://github.com/mxmzdlv/pybigquery/blob/d214bb089ca0807ca9aaa6ce4d5a01172d40264e/pybigquery/sqlalchemy_bigquery.py#L102
+    """
+    arraysize = 5000
+
    time_grain_functions = {
        None: '{col}',
        'PT1S': 'TIMESTAMP_TRUNC({col}, SECOND)',