From 88e6ec992c6ad826a963d5ac3b37d550c4a3f136 Mon Sep 17 00:00:00 2001 From: Kim Truong <47833996+khtruong@users.noreply.github.com> Date: Fri, 3 May 2019 16:29:57 -0700 Subject: [PATCH] feat: view presto row objects in data grid (#7445) * Merge lastest from master into lyft-release-sp8 (#7405) * filter out all nan series (#7313) * improve not rich tooltip (#7345) * Create issue_label_bot.yaml (#7341) * fix: do not save colors without a color scheme (#7347) * [wtforms] Strip leading/trailing whitespace (#7084) * [schema] Updating the datasources schema (#5451) * limit tables/views returned if schema is not provided (#7358) * limit tables/views returned if schema is not provided * fix typo * improve code performance * handle the case when table name or view name does not present a schema * Add type anno (#7342) * Updated local dev instructions to include missing step * First pass at type annotations * [schema] Updating the base column schema (#5452) * Update 937d04c16b64_update_datasources.py (#7361) * Feature flag for client cache (#7348) * Feature flag for client cache * Fix integration test * Revert "Fix integration test" This reverts commit 58434ab98a015d6e96db4a97f26255aa282d989d. * Feature flag for client cache * Fix integration tests * Add feature flag to config.py * Add another feature check * Fix more integration tests * Fix raw HTML in SliceAdder (#7338) * remove backendSync.json (#7331) * [bubbles] issue when using duplicated metrics (#7087) * SUPERSET-7: Docker compose config version breaks on Ubuntu 16.04 (#7359) * SUPERSET-8: Update text in docs copyright footer (#7360) * SUPERSET-7: Docker compose config version breaks on Ubuntu 16.04 * SUPERSET-8: Extra text in docs copyright footer * [schema] Adding commits and removing unnecessary foreign-key definitions (#7371) * Store last selected dashboard in sessionStorage (#7181) * Store last selected dashboard in sessionStorage * Fix tests * [schema] Updating the base metric schema (#5453) * Fix NoneType bug & fill the test recipients with original recipients if empty (#7365) * feat: see Presto row and array data types (#7391) * feat: see Presto row and array data types * fix: address PR comments * fix: lint and build issues * fix: add types * Incorporate feedback from initial PR (prematurely merged to lyft-release-sp8) (#7415) * add stronger type hints where possible * fix: lint issues and add select_star func in Hive * add missing pkg init * fix: build issues * fix: pylint issues * fix: use logging instead of print * feat: view presto row objects in data grid * fix: address feedback * fix: spacing --- .../FilterableTable/FilterableTable.jsx | 2 +- .../FilterableTable/FilterableTableStyles.css | 5 + superset/db_engine_specs.py | 94 ++++++++++++++++++- tests/db_engine_specs_test.py | 23 +++++ 4 files changed, 118 insertions(+), 6 deletions(-) diff --git a/superset/assets/src/components/FilterableTable/FilterableTable.jsx b/superset/assets/src/components/FilterableTable/FilterableTable.jsx index 72a22c7ba2..3ef03ba8ae 100644 --- a/superset/assets/src/components/FilterableTable/FilterableTable.jsx +++ b/superset/assets/src/components/FilterableTable/FilterableTable.jsx @@ -138,7 +138,7 @@ export default class FilterableTable extends PureComponent { headerRenderer({ dataKey, label, sortBy, sortDirection }) { return ( -
+
{label} {sortBy === dataKey && diff --git a/superset/assets/src/components/FilterableTable/FilterableTableStyles.css b/superset/assets/src/components/FilterableTable/FilterableTableStyles.css index 5be4a36949..7a0d3ba0ea 100644 --- a/superset/assets/src/components/FilterableTable/FilterableTableStyles.css +++ b/superset/assets/src/components/FilterableTable/FilterableTableStyles.css @@ -72,3 +72,8 @@ } .even-row { background: #f2f2f2; } .odd-row { background: #ffffff; } +.header-style { + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} \ No newline at end of file diff --git a/superset/db_engine_specs.py b/superset/db_engine_specs.py index 32fc1aa79c..5fed480491 100644 --- a/superset/db_engine_specs.py +++ b/superset/db_engine_specs.py @@ -49,6 +49,7 @@ from sqlalchemy.engine.reflection import Inspector from sqlalchemy.engine.result import RowProxy from sqlalchemy.engine.url import make_url from sqlalchemy.sql import quoted_name, text +from sqlalchemy.sql.expression import ColumnClause from sqlalchemy.sql.expression import TextAsFrom from sqlalchemy.types import String, UnicodeText import sqlparse @@ -980,19 +981,98 @@ class PrestoEngineSpec(BaseEngineSpec): result.append(cls._create_column_info(column, column.Column, column_type)) return result + @classmethod + def _is_column_name_quoted(cls, column_name: str) -> bool: + """ + Check if column name is in quotes + :param column_name: column name + :return: boolean + """ + return column_name.startswith('"') and column_name.endswith('"') + + @classmethod + def _get_fields(cls, cols: List[dict]) -> List[ColumnClause]: + """ + Format column clauses where names are in quotes and labels are specified + :param cols: columns + :return: column clauses + """ + column_clauses = [] + # Column names are separated by periods. This regex will find periods in a string + # if they are not enclosed in quotes because if a period is enclosed in quotes, + # then that period is part of a column name. + dot_pattern = r"""\. # split on period + (?= # look ahead + (?: # create non-capture group + [^\"]*\"[^\"]*\" # two quotes + )*[^\"]*$) # end regex""" + dot_regex = re.compile(dot_pattern, re.VERBOSE) + for col in cols: + # get individual column names + col_names = re.split(dot_regex, col['name']) + # quote each column name if it is not already quoted + for index, col_name in enumerate(col_names): + if not cls._is_column_name_quoted(col_name): + col_names[index] = '"{}"'.format(col_name) + quoted_col_name = '.'.join( + col_name if cls._is_column_name_quoted(col_name) else f'"{col_name}"' + for col_name in col_names) + # create column clause in the format "name"."name" AS "name.name" + column_clause = sqla.literal_column(quoted_col_name).label(col['name']) + column_clauses.append(column_clause) + return column_clauses + + @classmethod + def _filter_presto_cols(cls, cols: List[dict]) -> List[dict]: + """ + We want to filter out columns that correspond to array content because expanding + arrays would require us to use unnest and join. This can lead to a large, + complicated, and slow query. + + Example: select array_content + from TABLE + cross join UNNEST(array_column) as t(array_content); + + We know which columns to skip because cols is a list provided to us in a specific + order where a structural column is positioned right before its content. + + Example: Column Name: ColA, Column Data Type: array(row(nest_obj int)) + cols = [ ..., ColA, ColA.nest_obj, ... ] + + When we run across an array, check if subsequent column names start with the + array name and skip them. + :param cols: columns + :return: filtered list of columns + """ + filtered_cols = [] + curr_array_col_name = '' + for col in cols: + # col corresponds to an array's content and should be skipped + if curr_array_col_name and col['name'].startswith(curr_array_col_name): + continue + # col is an array so we need to check if subsequent + # columns correspond to the array's contents + elif str(col['type']) == 'ARRAY': + curr_array_col_name = col['name'] + filtered_cols.append(col) + else: + curr_array_col_name = '' + filtered_cols.append(col) + return filtered_cols + @classmethod def select_star(cls, my_db, table_name: str, engine: Engine, schema: str = None, limit: int = 100, show_cols: bool = False, indent: bool = True, latest_partition: bool = True, cols: List[dict] = []) -> str: """ - Temporary method until we have a function that can handle row and array columns + Include selecting properties of row objects. We cannot easily break arrays into + rows, so render the whole array in its own row and skip columns that correspond + to an array's contents. """ presto_cols = cols if show_cols: - dot_regex = r'\.(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)' - presto_cols = [ - col for col in presto_cols if re.search(dot_regex, col['name']) is None] - return BaseEngineSpec.select_star( + presto_cols = cls._filter_presto_cols(cols) + return super(PrestoEngineSpec, cls).select_star( my_db, table_name, engine, schema, limit, show_cols, indent, latest_partition, presto_cols, ) @@ -1526,6 +1606,10 @@ class HiveEngineSpec(PrestoEngineSpec): return qry.where(Column(col_name) == value) return False + @classmethod + def _get_fields(cls, cols: List[dict]) -> List[ColumnClause]: + return BaseEngineSpec._get_fields(cols) + @classmethod def latest_sub_partition(cls, table_name, schema, database, **kwargs): # TODO(bogdan): implement` diff --git a/tests/db_engine_specs_test.py b/tests/db_engine_specs_test.py index ef9d6bc17d..2a22c590ed 100644 --- a/tests/db_engine_specs_test.py +++ b/tests/db_engine_specs_test.py @@ -383,6 +383,29 @@ class DbEngineSpecsTestCase(SupersetTestCase): ('column_name.nested_obj', 'FLOAT')] self.verify_presto_column(presto_column, expected_results) + def test_presto_get_fields(self): + cols = [ + {'name': 'column'}, + {'name': 'column.nested_obj'}, + {'name': 'column."quoted.nested obj"'}] + actual_results = PrestoEngineSpec._get_fields(cols) + expected_results = [ + {'name': '"column"', 'label': 'column'}, + {'name': '"column"."nested_obj"', 'label': 'column.nested_obj'}, + {'name': '"column"."quoted.nested obj"', + 'label': 'column."quoted.nested obj"'}] + for actual_result, expected_result in zip(actual_results, expected_results): + self.assertEqual(actual_result.element.name, expected_result['name']) + self.assertEqual(actual_result.name, expected_result['label']) + + def test_presto_filter_presto_cols(self): + cols = [ + {'name': 'column', 'type': 'ARRAY'}, + {'name': 'column.nested_obj', 'type': 'FLOAT'}] + actual_results = PrestoEngineSpec._filter_presto_cols(cols) + expected_results = [cols[0]] + self.assertEqual(actual_results, expected_results) + def test_hive_get_view_names_return_empty_list(self): self.assertEquals([], HiveEngineSpec.get_view_names(mock.ANY, mock.ANY))