Better support for Druid cardinality estimation mertics (#613)

* added rocognition of thetasketch and HLL metrics * make sure the name agreed with SQL convention
2024-09-12 08:39:45 -04:00 · 2016-06-13 20:49:51 -07:00 · 2016-06-13 20:49:51 -07:00 · 347c39b8e9
commit 347c39b8e9
parent bc58c5d031
1 changed files with 24 additions and 10 deletions
--- a/caravel/models.py
+++ b/caravel/models.py
@ -1078,6 +1078,8 @@ class DruidDatasource(Model, AuditMixinNullable, Queryable):
            if datatype == "STRING":
                col_obj.groupby = True
                col_obj.filterable = True
+            if datatype == "hyperUnique" or datatype == "thetaSketch":
+                col_obj.count_distinct = True
            if col_obj:
                col_obj.type = cols[col]['type']
            session.flush()
@ -1447,17 +1449,29 @@ class DruidColumn(Model, AuditMixinNullable):
                    'type': mt, 'name': name, 'fieldName': self.column_name})
            ))
        if self.count_distinct:
-            mt = 'count_distinct'
            name = 'count_distinct__' + self.column_name
-            metrics.append(DruidMetric(
-                metric_name=name,
-                verbose_name='COUNT(DISTINCT {})'.format(self.column_name),
-                metric_type='count_distinct',
-                json=json.dumps({
-                    'type': 'cardinality',
-                    'name': name,
-                    'fieldNames': [self.column_name]})
-            ))
+            if self.type == 'hyperUnique' or self.type == 'thetaSketch':
+                metrics.append(DruidMetric(
+                    metric_name=name,
+                    verbose_name='COUNT(DISTINCT {})'.format(self.column_name),
+                    metric_type=self.type,
+                    json=json.dumps({
+                        'type': self.type,
+                        'name': name,
+                        'fieldName': self.column_name
+                    })
+                ))
+            else:
+                mt = 'count_distinct'
+                metrics.append(DruidMetric(
+                    metric_name=name,
+                    verbose_name='COUNT(DISTINCT {})'.format(self.column_name),
+                    metric_type='count_distinct',
+                    json=json.dumps({
+                        'type': 'cardinality',
+                        'name': name,
+                        'fieldNames': [self.column_name]})
+                ))
        session = get_session()
        new_metrics = []
        for metric in metrics: