diff --git a/superset/data/airports.csv.gz b/superset/data/airports.csv.gz deleted file mode 100644 index 3043486664..0000000000 Binary files a/superset/data/airports.csv.gz and /dev/null differ diff --git a/superset/data/bart-lines.json.gz b/superset/data/bart-lines.json.gz deleted file mode 100644 index 91f50fbe6a..0000000000 Binary files a/superset/data/bart-lines.json.gz and /dev/null differ diff --git a/superset/data/bart_lines.py b/superset/data/bart_lines.py index 3244a0a9cb..f4e0b1f09c 100644 --- a/superset/data/bart_lines.py +++ b/superset/data/bart_lines.py @@ -14,9 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -import gzip import json -import os import pandas as pd import polyline @@ -24,16 +22,17 @@ from sqlalchemy import String, Text from superset import db from superset.utils.core import get_or_create_main_db -from .helpers import DATA_FOLDER, TBL +from .helpers import TBL, get_example_data def load_bart_lines(): tbl_name = 'bart_lines' - with gzip.open(os.path.join(DATA_FOLDER, 'bart-lines.json.gz')) as f: - df = pd.read_json(f, encoding='latin-1') - df['path_json'] = df.path.map(json.dumps) - df['polyline'] = df.path.map(polyline.encode) - del df['path'] + content = get_example_data('bart-lines.json.gz') + df = pd.read_json(content, encoding='latin-1') + df['path_json'] = df.path.map(json.dumps) + df['polyline'] = df.path.map(polyline.encode) + del df['path'] + df.to_sql( tbl_name, db.engine, diff --git a/superset/data/birth_france_data_for_country_map.csv b/superset/data/birth_france_data_for_country_map.csv deleted file mode 100644 index 5de8d4551f..0000000000 --- a/superset/data/birth_france_data_for_country_map.csv +++ /dev/null @@ -1,97 +0,0 @@ -DEPT_ID,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014 -FR-01,6866,6706,6976,7228,6949,7323,7157,7282,7265,7242,7296,7354 -FR-02,6841,6761,6889,7041,6847,7012,6941,7050,6939,6755,6559,6468 -FR-03,3391,3335,3363,3503,3277,3289,3308,3402,3196,3288,3198,3152 -FR-04,1460,1522,1514,1536,1569,1569,1513,1547,1578,1561,1629,1538 -FR-05,1408,1403,1395,1461,1448,1441,1513,1470,1399,1441,1406,1383 -FR-06,11144,11514,11631,11754,11633,12275,11949,12257,11999,12087,12149,12170 -FR-07,3367,3176,3414,3484,3484,3447,3307,3380,3360,3405,3179,3254 -FR-08,3532,3422,3420,3343,3552,3522,3312,3254,3137,3258,3021,2966 -FR-09,1350,1412,1389,1499,1570,1493,1452,1473,1404,1425,1413,1364 -FR-10,3428,3553,3692,3685,3619,3721,3745,3722,3635,3587,3436,3377 -FR-11,3421,3321,3502,3661,3723,3778,3797,3770,3789,3669,3618,3516 -FR-12,2558,2614,2701,2829,2769,2748,2640,2694,2682,2615,2475,2555 -FR-13,23908,24056,24411,25371,25126,25412,25547,26410,25889,26328,26762,26384 -FR-14,8231,8257,8251,8531,8310,8183,8304,8111,8041,7833,7644,7466 -FR-15,1344,1396,1391,1398,1357,1300,1377,1274,1237,1230,1290,1214 -FR-16,3401,3514,3570,3653,3618,3666,3408,3564,3459,3490,3472,3378 -FR-17,5935,5900,6069,6089,5903,6136,6209,6185,6065,5916,5778,5846 -FR-18,3301,3271,3313,3231,3341,3303,3229,3341,3159,3120,3128,3097 -FR-19,2133,2250,2319,2327,2245,2263,2231,2247,2196,2163,2055,2094 -FR-21,6079,6052,5844,5986,6015,5960,5852,5963,5906,5905,5769,5779 -FR-22,6413,6317,6287,6743,6473,6494,6559,6438,6221,6184,5927,5790 -FR-23,1011,957,1054,1038,1013,1029,1044,919,967,998,897,879 -FR-24,3607,3690,3662,3758,3760,3832,3672,3665,3645,3547,3486,3479 -FR-25,6529,6798,6782,6993,6804,7097,6914,7105,6826,6778,6732,6659 -FR-26,5525,5703,5579,5945,5833,5927,5846,5915,5978,5912,6026,5965 -FR-27,7213,7220,7386,7402,7471,7717,7714,7715,7738,7676,7352,7242 -FR-28,5370,5363,5585,5632,5440,5677,5573,5716,5540,5548,5312,5295 -FR-29,9900,9963,9851,10184,9962,10040,9733,9823,9615,9597,9277,9088 -FR-2A,1232,1228,1348,1337,1284,1370,1422,1408,1422,1398,1317,1371 -FR-2B,1455,1444,1525,1474,1564,1569,1580,1591,1662,1612,1599,1616 -FR-30,7446,7777,7901,8384,8190,8449,8354,8494,8467,8196,8427,8216 -FR-31,13989,13900,14233,14957,14968,15415,15317,15770,16031,16347,16290,16641 -FR-32,1635,1625,1666,1580,1669,1689,1718,1671,1587,1668,1648,1643 -FR-33,15610,15819,15722,16539,16514,16636,17072,17271,17098,17097,17265,17303 -FR-34,11380,11562,11636,12191,12252,12564,12531,12658,13000,12902,12899,13008 -FR-35,12134,12072,12405,12687,12606,12837,12917,12876,13033,12892,12729,12555 -FR-36,2312,2314,2394,2283,2341,2371,2178,2221,2137,2136,2006,2030 -FR-37,6620,6594,6644,6813,6434,6811,6828,6886,6696,6796,6594,6718 -FR-38,14885,15356,15447,15830,15646,15999,15916,16136,15739,15948,15724,15664 -FR-39,2964,3017,2924,3021,3037,3045,2897,2865,2758,2741,2675,2637 -FR-40,3477,3621,3574,3755,3953,3862,3914,3993,3853,3880,3864,3696 -FR-41,3617,3678,3724,3815,3752,3847,3786,3777,3667,3704,3581,3517 -FR-42,8804,8906,8975,9184,9222,9357,9174,9403,9357,9473,9086,9183 -FR-43,2458,2416,2485,2426,2301,2398,2390,2348,2300,2244,2247,2157 -FR-44,15795,15988,16301,16530,16664,16763,16766,17159,16747,16821,16822,16700 -FR-45,8265,8424,8200,8635,8644,8524,8499,8757,8686,8689,8526,8355 -FR-46,1537,1430,1477,1563,1511,1555,1435,1506,1423,1487,1345,1415 -FR-47,3173,3245,3341,3426,3399,3378,3445,3359,3397,3332,3361,3347 -FR-48,768,772,760,784,781,779,798,736,695,711,663,651 -FR-49,10018,10085,10148,10548,10227,10270,10165,10312,10320,10061,10016,9781 -FR-50,5490,5487,5538,5448,5356,5384,5231,5238,5193,5282,4998,4911 -FR-51,6916,6979,7108,7118,6932,7065,7061,7182,7070,6761,7000,6887 -FR-52,2100,2095,2029,2104,2062,2037,1944,1889,1916,1847,1923,1881 -FR-53,3846,3932,3981,4118,3835,3912,3897,3962,3733,3750,3656,3456 -FR-54,8398,8671,8542,8743,8421,8559,8487,8536,8499,8387,8197,8135 -FR-55,2218,2287,2158,2294,2296,2220,2122,2221,2119,2107,2070,1928 -FR-56,7817,8036,7802,8221,7968,8288,7942,8029,7894,7909,7645,7554 -FR-57,11710,11970,12048,12114,11853,12012,11831,11856,11474,11579,11421,11385 -FR-58,2123,2181,2115,2137,2151,2049,1986,1982,1999,1942,1850,1801 -FR-59,36099,36257,35960,36858,36531,36572,36508,36703,36678,36513,36354,35923 -FR-60,10696,10630,10753,11144,11097,11162,11013,10960,11032,10941,10814,10802 -FR-61,3323,3243,3117,3276,3316,3185,3248,3192,3105,2933,2834,2810 -FR-62,18888,19304,19407,19780,19668,19902,19661,19784,19720,19017,19054,18809 -FR-63,6576,6632,6701,6902,6896,6865,6774,7131,6828,6933,6699,6908 -FR-64,6436,6338,6395,6680,6288,6455,6652,6569,6459,6490,6269,6497 -FR-65,2144,2186,2095,2284,2266,2095,2161,2149,2110,2201,2057,2111 -FR-66,4456,4320,4563,4779,4638,4756,4837,4869,4843,4943,4914,4800 -FR-67,13024,12828,13195,13388,13152,13231,13218,13346,13030,12895,13043,13262 -FR-68,9045,8945,8912,9324,8941,8909,8938,9177,8927,8818,8713,8826 -FR-69,23376,23796,24270,24808,24465,25120,25528,25973,25921,26294,25914,26712 -FR-70,2675,2773,2827,2975,2888,2755,2785,2761,2643,2609,2510,2458 -FR-71,5717,5709,5789,5876,5736,5860,5838,5865,5811,5752,5514,5552 -FR-72,6871,6935,6770,7133,6808,6909,6957,6942,6810,6703,6645,6664 -FR-73,4687,4736,4795,4903,5000,4971,4863,5074,4917,4786,4762,4798 -FR-74,8839,8753,8967,9124,8939,9333,9271,9521,9476,9829,9893,9982 -FR-75,31493,31817,31378,31748,30820,30623,31063,31447,30094,29291,28945,29134 -FR-76,15862,15650,15691,16004,16066,16041,15947,16338,16146,16014,15574,15199 -FR-77,17501,17729,18317,18986,18978,19240,19331,19712,19824,19678,19331,19708 -FR-78,19937,19431,19766,20438,19899,19895,19868,20312,19886,19827,19886,19525 -FR-79,3994,4100,4191,4057,4037,4331,4157,4060,4006,4029,3986,3718 -FR-80,7134,7035,7024,7021,6939,7094,6838,7103,6989,6843,6743,6506 -FR-81,3579,3611,3837,3933,3869,4056,4030,3925,4006,3939,3829,3831 -FR-82,2398,2591,2590,2823,2858,2932,2935,2926,2978,2940,2827,2829 -FR-83,10388,10622,10646,10889,10938,11131,10955,11159,11146,11240,10917,11123 -FR-84,6547,6629,6608,6805,6694,7000,7014,6967,7008,7107,7171,7058 -FR-85,6874,7062,7299,7589,7647,7629,7718,7601,7442,7436,7164,7070 -FR-86,4594,4568,4725,4850,4753,4909,4953,5006,4885,4880,4708,4686 -FR-87,3449,3659,3834,3754,3829,3891,3985,3848,3907,3825,3723,3724 -FR-88,4291,4264,4310,4416,4274,4215,4252,4057,3883,3715,3796,3679 -FR-89,3710,3844,3821,3929,3917,4045,3991,3842,3699,3729,3780,3621 -FR-90,1896,1766,1837,1888,1880,1818,1822,1802,1794,1763,1675,1707 -FR-91,17122,17614,17753,18281,17932,18134,18040,18509,18493,18506,18510,18903 -FR-92,24607,24649,24588,25426,24937,25217,25192,25194,25083,24790,24614,24675 -FR-93,25868,26313,26760,27916,27743,28062,28313,28513,28362,28675,28687,29471 -FR-94,19637,19866,19947,20948,20331,20736,21022,21391,20991,20967,20748,21566 -FR-95,17346,17863,18012,19015,18624,18761,18728,19506,19551,19495,19550,19737 \ No newline at end of file diff --git a/superset/data/birth_names.json.gz b/superset/data/birth_names.json.gz deleted file mode 100644 index 2652cf7242..0000000000 Binary files a/superset/data/birth_names.json.gz and /dev/null differ diff --git a/superset/data/birth_names.py b/superset/data/birth_names.py index 379fdc806c..4f11ac5b7d 100644 --- a/superset/data/birth_names.py +++ b/superset/data/birth_names.py @@ -14,9 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -import gzip import json -import os import textwrap import pandas as pd @@ -28,7 +26,7 @@ from superset.utils.core import get_or_create_main_db from .helpers import ( config, Dash, - DATA_FOLDER, + get_example_data, get_slice_json, merge_slice, Slice, @@ -39,8 +37,8 @@ from .helpers import ( def load_birth_names(): """Loading birth name dataset from a zip file in the repo""" - with gzip.open(os.path.join(DATA_FOLDER, 'birth_names.json.gz')) as f: - pdf = pd.read_json(f) + data = get_example_data('birth_names.json.gz') + pdf = pd.read_json(data) pdf.ds = pd.to_datetime(pdf.ds, unit='ms') pdf.to_sql( 'birth_names', diff --git a/superset/data/countries.json.gz b/superset/data/countries.json.gz deleted file mode 100644 index 6c71c0c432..0000000000 Binary files a/superset/data/countries.json.gz and /dev/null differ diff --git a/superset/data/country_map.py b/superset/data/country_map.py index c1c2b417b5..e74638bfbc 100644 --- a/superset/data/country_map.py +++ b/superset/data/country_map.py @@ -15,7 +15,6 @@ # specific language governing permissions and limitations # under the License. import datetime -import os import pandas as pd from sqlalchemy import BigInteger, Date, String @@ -24,7 +23,7 @@ from superset import db from superset.connectors.sqla.models import SqlMetric from superset.utils import core as utils from .helpers import ( - DATA_FOLDER, + get_example_data, get_slice_json, merge_slice, misc_dash_slices, @@ -35,8 +34,9 @@ from .helpers import ( def load_country_map_data(): """Loading data for map with country map""" - csv_path = os.path.join(DATA_FOLDER, 'birth_france_data_for_country_map.csv') - data = pd.read_csv(csv_path, encoding='utf-8') + csv_bytes = get_example_data( + 'birth_france_data_for_country_map.csv', is_gzip=False, make_bytes=True) + data = pd.read_csv(csv_bytes, encoding='utf-8') data['dttm'] = datetime.datetime.now().date() data.to_sql( # pylint: disable=no-member 'birth_france_by_region', diff --git a/superset/data/energy.json.gz b/superset/data/energy.json.gz deleted file mode 100644 index 624d71db68..0000000000 Binary files a/superset/data/energy.json.gz and /dev/null differ diff --git a/superset/data/energy.py b/superset/data/energy.py index c04eb46c48..e1d48e76a7 100644 --- a/superset/data/energy.py +++ b/superset/data/energy.py @@ -16,8 +16,6 @@ # under the License. """Loads datasets, dashboards and slices in a new superset instance""" # pylint: disable=C,R,W -import gzip -import os import textwrap import pandas as pd @@ -26,14 +24,16 @@ from sqlalchemy import Float, String from superset import db from superset.connectors.sqla.models import SqlMetric from superset.utils import core as utils -from .helpers import DATA_FOLDER, merge_slice, misc_dash_slices, Slice, TBL +from .helpers import ( + DATA_FOLDER, get_example_data, merge_slice, misc_dash_slices, Slice, TBL, +) def load_energy(): """Loads an energy related dataset to use with sankey and graphs""" tbl_name = 'energy_usage' - with gzip.open(os.path.join(DATA_FOLDER, 'energy.json.gz')) as f: - pdf = pd.read_json(f) + data = get_example_data('energy.json.gz') + pdf = pd.read_json(data) pdf.to_sql( tbl_name, db.engine, diff --git a/superset/data/flight_data.csv.gz b/superset/data/flight_data.csv.gz deleted file mode 100644 index bbdebdfafc..0000000000 Binary files a/superset/data/flight_data.csv.gz and /dev/null differ diff --git a/superset/data/flights.py b/superset/data/flights.py index 1ed575b44d..25112444fc 100644 --- a/superset/data/flights.py +++ b/superset/data/flights.py @@ -14,26 +14,23 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -import gzip -import os - import pandas as pd from sqlalchemy import DateTime from superset import db from superset.utils import core as utils -from .helpers import DATA_FOLDER, TBL +from .helpers import get_example_data, TBL def load_flights(): """Loading random time series data from a zip file in the repo""" tbl_name = 'flights' - with gzip.open(os.path.join(DATA_FOLDER, 'flight_data.csv.gz')) as f: - pdf = pd.read_csv(f, encoding='latin-1') + data = get_example_data('flight_data.csv.gz', make_bytes=True) + pdf = pd.read_csv(data, encoding='latin-1') # Loading airports info to join and get lat/long - with gzip.open(os.path.join(DATA_FOLDER, 'airports.csv.gz')) as f: - airports = pd.read_csv(f, encoding='latin-1') + airports_bytes = get_example_data('airports.csv.gz', make_bytes=True) + airports = pd.read_csv(airports_bytes, encoding='latin-1') airports = airports.set_index('IATA_CODE') pdf['ds'] = pdf.YEAR.map(str) + '-0' + pdf.MONTH.map(str) + '-0' + pdf.DAY.map(str) diff --git a/superset/data/helpers.py b/superset/data/helpers.py index d6192b64eb..f876dc9105 100644 --- a/superset/data/helpers.py +++ b/superset/data/helpers.py @@ -16,13 +16,19 @@ # under the License. """Loads datasets, dashboards and slices in a new superset instance""" # pylint: disable=C,R,W +from io import BytesIO import json import os +import zlib + +import requests from superset import app, db from superset.connectors.connector_registry import ConnectorRegistry from superset.models import core as models +BASE_URL = 'https://github.com/apache-superset/examples-data/blob/master/' + # Shortcuts DB = models.Database Slice = models.Slice @@ -60,3 +66,12 @@ def get_slice_json(defaults, **kwargs): d = defaults.copy() d.update(kwargs) return json.dumps(d, indent=4, sort_keys=True) + + +def get_example_data(filepath, is_gzip=True, make_bytes=False): + content = requests.get(f'{BASE_URL}{filepath}?raw=true').content + if is_gzip: + content = zlib.decompress(content, zlib.MAX_WBITS|16) + if make_bytes: + content = BytesIO(content) + return content diff --git a/superset/data/long_lat.py b/superset/data/long_lat.py index 40895d5404..18f477cfa4 100644 --- a/superset/data/long_lat.py +++ b/superset/data/long_lat.py @@ -15,8 +15,6 @@ # specific language governing permissions and limitations # under the License. import datetime -import gzip -import os import random import geohash @@ -26,7 +24,7 @@ from sqlalchemy import DateTime, Float, String from superset import db from superset.utils import core as utils from .helpers import ( - DATA_FOLDER, + get_example_data, get_slice_json, merge_slice, misc_dash_slices, @@ -37,8 +35,8 @@ from .helpers import ( def load_long_lat_data(): """Loading lat/long data from a csv file in the repo""" - with gzip.open(os.path.join(DATA_FOLDER, 'san_francisco.csv.gz')) as f: - pdf = pd.read_csv(f, encoding='utf-8') + data = get_example_data('san_francisco.csv.gz', make_bytes=True) + pdf = pd.read_csv(data, encoding='utf-8') start = datetime.datetime.now().replace( hour=0, minute=0, second=0, microsecond=0) pdf['datetime'] = [ diff --git a/superset/data/multiformat_time_series.json.gz b/superset/data/multiformat_time_series.json.gz deleted file mode 100644 index e0877b707d..0000000000 Binary files a/superset/data/multiformat_time_series.json.gz and /dev/null differ diff --git a/superset/data/multiformat_time_series.py b/superset/data/multiformat_time_series.py index 5dec85ab01..58ff7fbb0d 100644 --- a/superset/data/multiformat_time_series.py +++ b/superset/data/multiformat_time_series.py @@ -14,8 +14,6 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -import gzip -import os import pandas as pd from sqlalchemy import BigInteger, Date, DateTime, String @@ -24,7 +22,7 @@ from superset import db from superset.utils import core as utils from .helpers import ( config, - DATA_FOLDER, + get_example_data, get_slice_json, merge_slice, misc_dash_slices, @@ -35,8 +33,9 @@ from .helpers import ( def load_multiformat_time_series(): """Loading time series data from a zip file in the repo""" - with gzip.open(os.path.join(DATA_FOLDER, 'multiformat_time_series.json.gz')) as f: - pdf = pd.read_json(f) + data = get_example_data('multiformat_time_series.json.gz') + pdf = pd.read_json(data) + pdf.ds = pd.to_datetime(pdf.ds, unit='s') pdf.ds2 = pd.to_datetime(pdf.ds2, unit='s') pdf.to_sql( diff --git a/superset/data/paris.py b/superset/data/paris.py index e32588da1c..2ed3f8eaea 100644 --- a/superset/data/paris.py +++ b/superset/data/paris.py @@ -14,24 +14,22 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -import gzip import json -import os import pandas as pd from sqlalchemy import String, Text from superset import db from superset.utils import core as utils -from .helpers import DATA_FOLDER, TBL +from .helpers import TBL, get_example_data def load_paris_iris_geojson(): tbl_name = 'paris_iris_mapping' - with gzip.open(os.path.join(DATA_FOLDER, 'paris_iris.json.gz')) as f: - df = pd.read_json(f) - df['features'] = df.features.map(json.dumps) + data = get_example_data('paris_iris.json.gz') + df = pd.read_json(data) + df['features'] = df.features.map(json.dumps) df.to_sql( tbl_name, diff --git a/superset/data/paris_iris.json.gz b/superset/data/paris_iris.json.gz deleted file mode 100644 index 4a964c94f7..0000000000 Binary files a/superset/data/paris_iris.json.gz and /dev/null differ diff --git a/superset/data/random_time_series.json.gz b/superset/data/random_time_series.json.gz deleted file mode 100644 index 5275d5571d..0000000000 Binary files a/superset/data/random_time_series.json.gz and /dev/null differ diff --git a/superset/data/random_time_series.py b/superset/data/random_time_series.py index cfc13e131e..ee7450a634 100644 --- a/superset/data/random_time_series.py +++ b/superset/data/random_time_series.py @@ -14,8 +14,6 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -import gzip -import os import pandas as pd from sqlalchemy import DateTime @@ -24,7 +22,7 @@ from superset import db from superset.utils import core as utils from .helpers import ( config, - DATA_FOLDER, + get_example_data, get_slice_json, merge_slice, Slice, @@ -34,8 +32,8 @@ from .helpers import ( def load_random_time_series_data(): """Loading random time series data from a zip file in the repo""" - with gzip.open(os.path.join(DATA_FOLDER, 'random_time_series.json.gz')) as f: - pdf = pd.read_json(f) + data = get_example_data('random_time_series.json.gz') + pdf = pd.read_json(data) pdf.ds = pd.to_datetime(pdf.ds, unit='s') pdf.to_sql( 'random_time_series', diff --git a/superset/data/san_francisco.csv.gz b/superset/data/san_francisco.csv.gz deleted file mode 100644 index 1d977a4a1a..0000000000 Binary files a/superset/data/san_francisco.csv.gz and /dev/null differ diff --git a/superset/data/sf_population.json.gz b/superset/data/sf_population.json.gz deleted file mode 100644 index 53ba13acd7..0000000000 Binary files a/superset/data/sf_population.json.gz and /dev/null differ diff --git a/superset/data/sf_population_polygons.py b/superset/data/sf_population_polygons.py index 6da85f728b..2248a48daf 100644 --- a/superset/data/sf_population_polygons.py +++ b/superset/data/sf_population_polygons.py @@ -14,24 +14,22 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -import gzip import json -import os import pandas as pd from sqlalchemy import BigInteger, Text from superset import db from superset.utils import core as utils -from .helpers import DATA_FOLDER, TBL +from .helpers import TBL, get_example_data def load_sf_population_polygons(): tbl_name = 'sf_population_polygons' - with gzip.open(os.path.join(DATA_FOLDER, 'sf_population.json.gz')) as f: - df = pd.read_json(f) - df['contour'] = df.contour.map(json.dumps) + data = get_example_data('sf_population.json.gz') + df = pd.read_json(data) + df['contour'] = df.contour.map(json.dumps) df.to_sql( tbl_name, diff --git a/superset/data/unicode_test_data.py b/superset/data/unicode_test_data.py index 42e6bdb1e3..03c00a7b07 100644 --- a/superset/data/unicode_test_data.py +++ b/superset/data/unicode_test_data.py @@ -16,7 +16,6 @@ # under the License. import datetime import json -import os import random import pandas as pd @@ -27,7 +26,7 @@ from superset.utils import core as utils from .helpers import ( config, Dash, - DATA_FOLDER, + get_example_data, get_slice_json, merge_slice, Slice, @@ -38,8 +37,9 @@ from .helpers import ( def load_unicode_test_data(): """Loading unicode test dataset from a csv file in the repo""" - df = pd.read_csv(os.path.join(DATA_FOLDER, 'unicode_utf8_unixnl_test.csv'), - encoding='utf-8') + data = get_example_data( + 'unicode_utf8_unixnl_test.csv', is_gzip=False, make_bytes=True) + df = pd.read_csv(data, encoding='utf-8') # generate date/numeric data df['dttm'] = datetime.datetime.now().date() df['value'] = [random.randint(1, 100) for _ in range(len(df))] diff --git a/superset/data/unicode_utf8_unixnl_test.csv b/superset/data/unicode_utf8_unixnl_test.csv deleted file mode 100644 index 9b0235b0dd..0000000000 --- a/superset/data/unicode_utf8_unixnl_test.csv +++ /dev/null @@ -1,42 +0,0 @@ -phrase,short_phrase,with_missing -"Под южно дърво, цъфтящо в синьо, бягаше малко пухкаво зайче.",Под южно д,Fam hx-cardiovas dis NEC -Příliš žluťoučký kůň úpěl ďábelské ódy.,Příliš žlu, -視野無限廣,窗外有藍天,視野無限廣,窗外有藍,Sparganosis -微風迎客,軟語伴茶,微風迎客,軟語伴茶,Var mgr NEC wo ntc mgr -中国智造,慧及全球,中国智造,慧及全球,Mech prob w internal org -"Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen Walther spillede på xylofon.",Quizdeltag,Corneal dystrophy NOS -Pa’s wijze lynx bezag vroom het fikse aquaduct.,Pa’s wijze,Edema in preg-unspec -Eĥoŝanĝo ĉiuĵaŭde.,Eĥoŝanĝo ĉ, -See väike mölder jõuab rongile hüpata,See väike ,Twin NOS-nonhosp -Viekas kettu punaturkki laiskan koiran takaa kurkki.,Viekas ket,Postgastric surgery synd -Voix ambiguë d’un cœur qui au zéphyr préfère les jattes de kiwis.,Voix ambig,Loose body-mult joints -Portez ce vieux whisky au juge blond qui fume.,Portez ce ,Late eff acc poisoning -Zwölf Boxkämpfer jagen Viktor quer über den großen Sylter Deich,Zwölf Boxk,Opn brain inj w/o coma -Franz jagt im komplett verwahrlosten Taxi quer durch Bayern.,Franz jagt,TB of ear-unspec -Θέλει αρετή και τόλμη η ελευθερία. (Ανδρέας Κάλβος),Θέλει αρετ,Chr peptic ulcer w perf -Ο καλύμνιος σφουγγαράς ψιθύρισε πως θα βουτήξει χωρίς να διστάζει.,Ο καλύμνιο,Cns TB NEC-cult dx -דג סקרן שט לו בים זך אך לפתע פגש חבורה נחמדה שצצה כך.,דג סקרן שט,Polyhydramnios-delivered -Árvíztűrő tükörfúrógép,Árvíztűrő ,Malign neopl scrotum -"Egy hűtlen vejét fülöncsípő, dühös mexikói úr Wesselényinél mázol Quitóban.",Egy hűtlen,Tubal/broad lig anom NOS -Saya lihat foto Hamengkubuwono XV bersama enam zebra purba cantik yang jatuh dari Al Quranmu.,Saya lihat,Ben carcinoid duodenum -"Ma la volpe, col suo balzo, ha raggiunto il quieto Fido.",Ma la volp,Ch leu un cl wo ach rmsn -いろはにほへと ちりぬるを わかよたれそ つねならむ うゐのおくやま けふこえて あさきゆめみし ゑひもせす,いろはにほへと ちり,Mycotic arthritis-pelvis -다람쥐 헌 쳇바퀴에 타고파,다람쥐 헌 쳇바퀴에,Paral polio NEC-type 1 -Sarkanās jūrascūciņas peld pa jūru.,Sarkanās j,Fx larynx/trachea-open -En god stil må først og fremst være klar. Den må være passende. Aristoteles.,En god sti,Dermatophytosis site NOS -Pchnąć w tę łódź jeża lub ośm skrzyń fig,Pchnąć w t,Anxiety disorder oth dis -A rápida raposa castanha salta por cima do cão lento.,A rápida r,Adenoid vegetations -A ligeira raposa marrom ataca o cão preguiçoso.,A ligeira ,Consanguinity -Zebras caolhas de Java querem passar fax para moças gigantes de New York,Zebras cao,"Hypotony NOS, eye" -Agera vulpe maronie sare peste câinele cel leneş.,Agera vulp,Urethral syndrome NOS -Съешь ещё этих мягких французских булок да выпей же чаю,Съешь ещё ,Coccidioidomycosis NOS -Чешће цeђење мрeжастим џаком побољшава фертилизацију генских хибрида.,Чешће цeђе, -Češće ceđenje mrežastim džakom poboljšava fertilizaciju genskih hibrida.,Češće ceđe,Scrn-hemoglobinopath NEC -Kŕdeľ šťastných ďatľov učí pri ústí Váhu mĺkveho koňa obhrýzať kôru a žrať čerstvé mäso.,Kŕdeľ šťas, -V kožuščku hudobnega fanta stopiclja mizar in kliče 0619872345.,V kožuščku, -El veloz murciélago hindú comía feliz cardillo y kiwi. La cigüeña tocaba el saxofón detrás del palenque de paja.,El veloz m,Cervical syndrome NEC -Flygande bäckasiner söka hwila på mjuka tuvor,Flygande b,Letterer-siwe dis abdom -เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะ ๆ จ๋า ๆ น่าฟังเอยฯ,เป็นมนุษย์,Balantidiasis -"Pijamalı hasta, yağız şoföre çabucak güvendi",Pijamalı h,Epilepsy-delivered w p/p -زۆھرەگۈل ئابدۇۋاجىت فرانسىيەنىڭ پارىژدىكى خېلى بىشەم ئوقۇغۇچى.,زۆھرەگۈل ئ,Fit/adj non-vsc cath NEC -ئاۋۇ بىر جۈپ خوراز فرانسىيەنىڭ پارىژ شەھرىگە يېقىن تاغقا كۆچەلمىدى.,ئاۋۇ بىر ج,Sat cerv smr-no trnsfrm diff --git a/superset/data/world_bank.py b/superset/data/world_bank.py index 910b3389e5..16aa0cb24c 100644 --- a/superset/data/world_bank.py +++ b/superset/data/world_bank.py @@ -16,7 +16,6 @@ # under the License. """Loads datasets, dashboards and slices in a new superset instance""" # pylint: disable=C,R,W -import gzip import json import os import textwrap @@ -31,6 +30,7 @@ from .helpers import ( config, Dash, DATA_FOLDER, + get_example_data, get_slice_json, merge_slice, misc_dash_slices, @@ -43,8 +43,8 @@ from .helpers import ( def load_world_bank_health_n_pop(): """Loads the world bank health dataset, slices and a dashboard""" tbl_name = 'wb_health_population' - with gzip.open(os.path.join(DATA_FOLDER, 'countries.json.gz')) as f: - pdf = pd.read_json(f) + data = get_example_data('countries.json.gz') + pdf = pd.read_json(data) pdf.columns = [col.replace('.', '_') for col in pdf.columns] pdf.year = pd.to_datetime(pdf.year) pdf.to_sql(