feat: view presto row objects in data grid (#7445)

* Merge lastest from master into lyft-release-sp8 (#7405) * filter out all nan series (#7313) * improve not rich tooltip (#7345) * Create issue_label_bot.yaml (#7341) * fix: do not save colors without a color scheme (#7347) * [wtforms] Strip leading/trailing whitespace (#7084) * [schema] Updating the datasources schema (#5451) * limit tables/views returned if schema is not provided (#7358) * limit tables/views returned if schema is not provided * fix typo * improve code performance * handle the case when table name or view name does not present a schema * Add type anno (#7342) * Updated local dev instructions to include missing step * First pass at type annotations * [schema] Updating the base column schema (#5452) * Update 937d04c16b64_update_datasources.py (#7361) * Feature flag for client cache (#7348) * Feature flag for client cache * Fix integration test * Revert "Fix integration test" This reverts commit 58434ab98a. * Feature flag for client cache * Fix integration tests * Add feature flag to config.py * Add another feature check * Fix more integration tests * Fix raw HTML in SliceAdder (#7338) * remove backendSync.json (#7331) * [bubbles] issue when using duplicated metrics (#7087) * SUPERSET-7: Docker compose config version breaks on Ubuntu 16.04 (#7359) * SUPERSET-8: Update text in docs copyright footer (#7360) * SUPERSET-7: Docker compose config version breaks on Ubuntu 16.04 * SUPERSET-8: Extra text in docs copyright footer * [schema] Adding commits and removing unnecessary foreign-key definitions (#7371) * Store last selected dashboard in sessionStorage (#7181) * Store last selected dashboard in sessionStorage * Fix tests * [schema] Updating the base metric schema (#5453) * Fix NoneType bug & fill the test recipients with original recipients if empty (#7365) * feat: see Presto row and array data types (#7391) * feat: see Presto row and array data types * fix: address PR comments * fix: lint and build issues * fix: add types * Incorporate feedback from initial PR (prematurely merged to lyft-release-sp8) (#7415) * add stronger type hints where possible * fix: lint issues and add select_star func in Hive * add missing pkg init * fix: build issues * fix: pylint issues * fix: use logging instead of print * feat: view presto row objects in data grid * fix: address feedback * fix: spacing
2019-05-03 16:29:57 -07:00 · 2019-05-03 16:29:57 -07:00 · 88e6ec992c
parent c8bb7e0fe4
commit 88e6ec992c
4 changed files with 118 additions and 6 deletions
--- a/superset/assets/src/components/FilterableTable/FilterableTable.jsx
+++ b/superset/assets/src/components/FilterableTable/FilterableTable.jsx
@ -138,7 +138,7 @@ export default class FilterableTable extends PureComponent {
  headerRenderer({ dataKey, label, sortBy, sortDirection }) {
    return (
-      <div>
+      <div className="header-style">
        {label}
        {sortBy === dataKey &&
          <SortIndicator sortDirection={sortDirection} />
--- a/superset/assets/src/components/FilterableTable/FilterableTableStyles.css
+++ b/superset/assets/src/components/FilterableTable/FilterableTableStyles.css
@ -72,3 +72,8 @@
 }
 .even-row { background: #f2f2f2; }
 .odd-row { background: #ffffff; }
 .header-style {
  overflow: hidden;
  text-overflow: ellipsis;
  white-space: nowrap;
 }
--- a/superset/db_engine_specs.py
+++ b/superset/db_engine_specs.py
@ -49,6 +49,7 @@ from sqlalchemy.engine.reflection import Inspector
 from sqlalchemy.engine.result import RowProxy
 from sqlalchemy.engine.url import make_url
 from sqlalchemy.sql import quoted_name, text
 from sqlalchemy.sql.expression import ColumnClause
 from sqlalchemy.sql.expression import TextAsFrom
 from sqlalchemy.types import String, UnicodeText
 import sqlparse
@ -980,19 +981,98 @@ class PrestoEngineSpec(BaseEngineSpec):
            result.append(cls._create_column_info(column, column.Column, column_type))
        return result
    @classmethod
    def _is_column_name_quoted(cls, column_name: str) -> bool:
        """
        Check if column name is in quotes
        :param column_name: column name
        :return: boolean
        """
        return column_name.startswith('"') and column_name.endswith('"')
    @classmethod
    def _get_fields(cls, cols: List[dict]) -> List[ColumnClause]:
        """
        Format column clauses where names are in quotes and labels are specified
        :param cols: columns
        :return: column clauses
        """
        column_clauses = []
        # Column names are separated by periods. This regex will find periods in a string
        # if they are not enclosed in quotes because if a period is enclosed in quotes,
        # then that period is part of a column name.
        dot_pattern = r"""\.                # split on period
                          (?=               # look ahead
                          (?:               # create non-capture group
                          [^\"]*\"[^\"]*\"  # two quotes
                          )*[^\"]*$)        # end regex"""
        dot_regex = re.compile(dot_pattern, re.VERBOSE)
        for col in cols:
            # get individual column names
            col_names = re.split(dot_regex, col['name'])
            # quote each column name if it is not already quoted
            for index, col_name in enumerate(col_names):
                if not cls._is_column_name_quoted(col_name):
                    col_names[index] = '"{}"'.format(col_name)
            quoted_col_name = '.'.join(
                col_name if cls._is_column_name_quoted(col_name) else f'"{col_name}"'
                for col_name in col_names)
            # create column clause in the format "name"."name" AS "name.name"
            column_clause = sqla.literal_column(quoted_col_name).label(col['name'])
            column_clauses.append(column_clause)
        return column_clauses
    @classmethod
    def _filter_presto_cols(cls, cols: List[dict]) -> List[dict]:
        """
        We want to filter out columns that correspond to array content because expanding
        arrays would require us to use unnest and join. This can lead to a large,
        complicated, and slow query.
        Example: select array_content
                 from TABLE
                 cross join UNNEST(array_column) as t(array_content);
        We know which columns to skip because cols is a list provided to us in a specific
        order where a structural column is positioned right before its content.
        Example: Column Name: ColA, Column Data Type: array(row(nest_obj int))
                 cols = [ ..., ColA, ColA.nest_obj, ... ]
        When we run across an array, check if subsequent column names start with the
        array name and skip them.
        :param cols: columns
        :return: filtered list of columns
        """
        filtered_cols = []
        curr_array_col_name = ''
        for col in cols:
            # col corresponds to an array's content and should be skipped
            if curr_array_col_name and col['name'].startswith(curr_array_col_name):
                continue
            # col is an array so we need to check if subsequent
            # columns correspond to the array's contents
            elif str(col['type']) == 'ARRAY':
                curr_array_col_name = col['name']
                filtered_cols.append(col)
            else:
                curr_array_col_name = ''
                filtered_cols.append(col)
        return filtered_cols
    @classmethod
    def select_star(cls, my_db, table_name: str, engine: Engine, schema: str = None,
                    limit: int = 100, show_cols: bool = False, indent: bool = True,
                    latest_partition: bool = True, cols: List[dict] = []) -> str:
        """
-        Temporary method until we have a function that can handle row and array columns
+        Include selecting properties of row objects. We cannot easily break arrays into
        rows, so render the whole array in its own row and skip columns that correspond
        to an array's contents.
        """
        presto_cols = cols
        if show_cols:
-            dot_regex = r'\.(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)'
+            presto_cols = cls._filter_presto_cols(cols)
-            presto_cols = [
+        return super(PrestoEngineSpec, cls).select_star(
                col for col in presto_cols if re.search(dot_regex, col['name']) is None]
        return BaseEngineSpec.select_star(
            my_db, table_name, engine, schema, limit,
            show_cols, indent, latest_partition, presto_cols,
        )
@ -1526,6 +1606,10 @@ class HiveEngineSpec(PrestoEngineSpec):
                    return qry.where(Column(col_name) == value)
        return False
    @classmethod
    def _get_fields(cls, cols: List[dict]) -> List[ColumnClause]:
        return BaseEngineSpec._get_fields(cols)
    @classmethod
    def latest_sub_partition(cls, table_name, schema, database, **kwargs):
        # TODO(bogdan): implement`
--- a/tests/db_engine_specs_test.py
+++ b/tests/db_engine_specs_test.py
@ -383,6 +383,29 @@ class DbEngineSpecsTestCase(SupersetTestCase):
            ('column_name.nested_obj', 'FLOAT')]
        self.verify_presto_column(presto_column, expected_results)
    def test_presto_get_fields(self):
        cols = [
            {'name': 'column'},
            {'name': 'column.nested_obj'},
            {'name': 'column."quoted.nested obj"'}]
        actual_results = PrestoEngineSpec._get_fields(cols)
        expected_results = [
            {'name': '"column"', 'label': 'column'},
            {'name': '"column"."nested_obj"', 'label': 'column.nested_obj'},
            {'name': '"column"."quoted.nested obj"',
             'label': 'column."quoted.nested obj"'}]
        for actual_result, expected_result in zip(actual_results, expected_results):
            self.assertEqual(actual_result.element.name, expected_result['name'])
            self.assertEqual(actual_result.name, expected_result['label'])
    def test_presto_filter_presto_cols(self):
        cols = [
            {'name': 'column', 'type': 'ARRAY'},
            {'name': 'column.nested_obj', 'type': 'FLOAT'}]
        actual_results = PrestoEngineSpec._filter_presto_cols(cols)
        expected_results = [cols[0]]
        self.assertEqual(actual_results, expected_results)
    def test_hive_get_view_names_return_empty_list(self):
        self.assertEquals([], HiveEngineSpec.get_view_names(mock.ANY, mock.ANY))