2018-08-14 16:01:28 -04:00
|
|
|
import numpy as np
|
|
|
|
|
2018-06-28 00:35:12 -04:00
|
|
|
from superset.dataframe import dedup, SupersetDataFrame
|
|
|
|
from superset.db_engine_specs import BaseEngineSpec
|
|
|
|
from .base_tests import SupersetTestCase
|
|
|
|
|
|
|
|
|
|
|
|
class SupersetDataFrameTestCase(SupersetTestCase):
|
|
|
|
def test_dedup(self):
|
|
|
|
self.assertEquals(
|
|
|
|
dedup(['foo', 'bar']),
|
|
|
|
['foo', 'bar'],
|
|
|
|
)
|
|
|
|
self.assertEquals(
|
2018-08-03 12:53:56 -04:00
|
|
|
dedup(['foo', 'bar', 'foo', 'bar', 'Foo']),
|
|
|
|
['foo', 'bar', 'foo__1', 'bar__1', 'Foo'],
|
2018-06-28 00:35:12 -04:00
|
|
|
)
|
|
|
|
self.assertEquals(
|
2018-08-03 12:53:56 -04:00
|
|
|
dedup(['foo', 'bar', 'bar', 'bar', 'Bar']),
|
|
|
|
['foo', 'bar', 'bar__1', 'bar__2', 'Bar'],
|
|
|
|
)
|
|
|
|
self.assertEquals(
|
|
|
|
dedup(['foo', 'bar', 'bar', 'bar', 'Bar'], case_sensitive=False),
|
|
|
|
['foo', 'bar', 'bar__1', 'bar__2', 'Bar__3'],
|
2018-06-28 00:35:12 -04:00
|
|
|
)
|
|
|
|
|
|
|
|
def test_get_columns_basic(self):
|
|
|
|
data = [
|
|
|
|
('a1', 'b1', 'c1'),
|
|
|
|
('a2', 'b2', 'c2'),
|
|
|
|
]
|
|
|
|
cursor_descr = (
|
|
|
|
('a', 'string'),
|
|
|
|
('b', 'string'),
|
|
|
|
('c', 'string'),
|
|
|
|
)
|
|
|
|
cdf = SupersetDataFrame(data, cursor_descr, BaseEngineSpec)
|
|
|
|
self.assertEqual(
|
|
|
|
cdf.columns,
|
|
|
|
[
|
|
|
|
{
|
|
|
|
'is_date': False,
|
|
|
|
'type': 'STRING',
|
|
|
|
'name': 'a',
|
|
|
|
'is_dim': True,
|
|
|
|
}, {
|
|
|
|
'is_date': False,
|
|
|
|
'type': 'STRING',
|
|
|
|
'name': 'b',
|
|
|
|
'is_dim': True,
|
|
|
|
}, {
|
|
|
|
'is_date': False,
|
|
|
|
'type': 'STRING',
|
|
|
|
'name': 'c',
|
|
|
|
'is_dim': True,
|
|
|
|
},
|
|
|
|
],
|
|
|
|
)
|
|
|
|
|
|
|
|
def test_get_columns_with_int(self):
|
|
|
|
data = [
|
|
|
|
('a1', 1),
|
|
|
|
('a2', 2),
|
|
|
|
]
|
|
|
|
cursor_descr = (
|
|
|
|
('a', 'string'),
|
|
|
|
('b', 'int'),
|
|
|
|
)
|
|
|
|
cdf = SupersetDataFrame(data, cursor_descr, BaseEngineSpec)
|
|
|
|
self.assertEqual(
|
|
|
|
cdf.columns,
|
|
|
|
[
|
|
|
|
{
|
|
|
|
'is_date': False,
|
|
|
|
'type': 'STRING',
|
|
|
|
'name': 'a',
|
|
|
|
'is_dim': True,
|
|
|
|
}, {
|
|
|
|
'is_date': False,
|
|
|
|
'type': 'INT',
|
|
|
|
'name': 'b',
|
|
|
|
'is_dim': False,
|
|
|
|
'agg': 'sum',
|
|
|
|
},
|
|
|
|
],
|
|
|
|
)
|
|
|
|
|
|
|
|
def test_get_columns_type_inference(self):
|
|
|
|
data = [
|
|
|
|
(1.2, 1),
|
|
|
|
(3.14, 2),
|
|
|
|
]
|
|
|
|
cursor_descr = (
|
|
|
|
('a', None),
|
|
|
|
('b', None),
|
|
|
|
)
|
|
|
|
cdf = SupersetDataFrame(data, cursor_descr, BaseEngineSpec)
|
|
|
|
self.assertEqual(
|
|
|
|
cdf.columns,
|
|
|
|
[
|
|
|
|
{
|
|
|
|
'is_date': False,
|
|
|
|
'type': 'FLOAT',
|
|
|
|
'name': 'a',
|
|
|
|
'is_dim': False,
|
|
|
|
'agg': 'sum',
|
|
|
|
}, {
|
|
|
|
'is_date': False,
|
|
|
|
'type': 'INT',
|
|
|
|
'name': 'b',
|
|
|
|
'is_dim': False,
|
|
|
|
'agg': 'sum',
|
|
|
|
},
|
|
|
|
],
|
|
|
|
)
|
2018-07-23 13:41:38 -04:00
|
|
|
|
2018-08-14 16:01:28 -04:00
|
|
|
def test_is_date(self):
|
|
|
|
f = SupersetDataFrame.is_date
|
2018-08-22 00:33:34 -04:00
|
|
|
self.assertEquals(f(np.dtype('M'), ''), True)
|
|
|
|
self.assertEquals(f(np.dtype('f'), 'DATETIME'), True)
|
|
|
|
self.assertEquals(f(np.dtype('i'), 'TIMESTAMP'), True)
|
|
|
|
self.assertEquals(f(None, 'DATETIME'), True)
|
|
|
|
self.assertEquals(f(None, 'TIMESTAMP'), True)
|
2018-08-14 16:01:28 -04:00
|
|
|
|
2018-08-22 00:33:34 -04:00
|
|
|
self.assertEquals(f(None, ''), False)
|
|
|
|
self.assertEquals(f(np.dtype(np.int32), ''), False)
|
2018-08-14 16:01:28 -04:00
|
|
|
|
2018-07-23 13:41:38 -04:00
|
|
|
def test_dedup_with_data(self):
|
|
|
|
data = [
|
|
|
|
('a', 1),
|
|
|
|
('a', 2),
|
|
|
|
]
|
|
|
|
cursor_descr = (
|
|
|
|
('a', 'string'),
|
|
|
|
('a', 'string'),
|
|
|
|
)
|
|
|
|
cdf = SupersetDataFrame(data, cursor_descr, BaseEngineSpec)
|
|
|
|
self.assertListEqual(cdf.column_names, ['a', 'a__1'])
|