superset/tests/dataframe_test.py

139 lines
4.0 KiB
Python

import numpy as np
from superset.dataframe import dedup, SupersetDataFrame
from superset.db_engine_specs import BaseEngineSpec
from .base_tests import SupersetTestCase
class SupersetDataFrameTestCase(SupersetTestCase):
def test_dedup(self):
self.assertEquals(
dedup(['foo', 'bar']),
['foo', 'bar'],
)
self.assertEquals(
dedup(['foo', 'bar', 'foo', 'bar', 'Foo']),
['foo', 'bar', 'foo__1', 'bar__1', 'Foo'],
)
self.assertEquals(
dedup(['foo', 'bar', 'bar', 'bar', 'Bar']),
['foo', 'bar', 'bar__1', 'bar__2', 'Bar'],
)
self.assertEquals(
dedup(['foo', 'bar', 'bar', 'bar', 'Bar'], case_sensitive=False),
['foo', 'bar', 'bar__1', 'bar__2', 'Bar__3'],
)
def test_get_columns_basic(self):
data = [
('a1', 'b1', 'c1'),
('a2', 'b2', 'c2'),
]
cursor_descr = (
('a', 'string'),
('b', 'string'),
('c', 'string'),
)
cdf = SupersetDataFrame(data, cursor_descr, BaseEngineSpec)
self.assertEqual(
cdf.columns,
[
{
'is_date': False,
'type': 'STRING',
'name': 'a',
'is_dim': True,
}, {
'is_date': False,
'type': 'STRING',
'name': 'b',
'is_dim': True,
}, {
'is_date': False,
'type': 'STRING',
'name': 'c',
'is_dim': True,
},
],
)
def test_get_columns_with_int(self):
data = [
('a1', 1),
('a2', 2),
]
cursor_descr = (
('a', 'string'),
('b', 'int'),
)
cdf = SupersetDataFrame(data, cursor_descr, BaseEngineSpec)
self.assertEqual(
cdf.columns,
[
{
'is_date': False,
'type': 'STRING',
'name': 'a',
'is_dim': True,
}, {
'is_date': False,
'type': 'INT',
'name': 'b',
'is_dim': False,
'agg': 'sum',
},
],
)
def test_get_columns_type_inference(self):
data = [
(1.2, 1),
(3.14, 2),
]
cursor_descr = (
('a', None),
('b', None),
)
cdf = SupersetDataFrame(data, cursor_descr, BaseEngineSpec)
self.assertEqual(
cdf.columns,
[
{
'is_date': False,
'type': 'FLOAT',
'name': 'a',
'is_dim': False,
'agg': 'sum',
}, {
'is_date': False,
'type': 'INT',
'name': 'b',
'is_dim': False,
'agg': 'sum',
},
],
)
def test_is_date(self):
f = SupersetDataFrame.is_date
self.assertEquals(f(np.dtype('M'), ''), True)
self.assertEquals(f(np.dtype('f'), 'DATETIME'), True)
self.assertEquals(f(np.dtype('i'), 'TIMESTAMP'), True)
self.assertEquals(f(None, 'DATETIME'), True)
self.assertEquals(f(None, 'TIMESTAMP'), True)
self.assertEquals(f(None, ''), False)
self.assertEquals(f(np.dtype(np.int32), ''), False)
def test_dedup_with_data(self):
data = [
('a', 1),
('a', 2),
]
cursor_descr = (
('a', 'string'),
('a', 'string'),
)
cdf = SupersetDataFrame(data, cursor_descr, BaseEngineSpec)
self.assertListEqual(cdf.column_names, ['a', 'a__1'])