mirror of https://github.com/apache/superset.git
fix: edit pyarrow stringify to better handle emojis and accents (#22881)
This commit is contained in:
parent
c839d0daf5
commit
f2b61fca15
|
@ -72,7 +72,12 @@ def stringify_values(array: NDArray[Any]) -> NDArray[Any]:
|
|||
# pandas <NA> type cannot be converted to string
|
||||
obj[na_obj] = None # type: ignore
|
||||
else:
|
||||
obj[...] = stringify(obj) # type: ignore
|
||||
try:
|
||||
# for simple string conversions
|
||||
# this handles odd character types better
|
||||
obj[...] = obj.astype(str) # type: ignore
|
||||
except ValueError:
|
||||
obj[...] = stringify(obj) # type: ignore
|
||||
|
||||
return result
|
||||
|
||||
|
|
|
@ -57,10 +57,10 @@ def boxplot(
|
|||
"""
|
||||
|
||||
def quartile1(series: Series) -> float:
|
||||
return np.nanpercentile(series, 25, interpolation="midpoint") # type: ignore
|
||||
return np.nanpercentile(series, 25, method="midpoint")
|
||||
|
||||
def quartile3(series: Series) -> float:
|
||||
return np.nanpercentile(series, 75, interpolation="midpoint") # type: ignore
|
||||
return np.nanpercentile(series, 75, method="midpoint")
|
||||
|
||||
if whisker_type == PostProcessingBoxplotWhiskerType.TUKEY:
|
||||
|
||||
|
|
|
@ -169,13 +169,13 @@ class TestSupersetResultSet(SupersetTestCase):
|
|||
"id": 4,
|
||||
"dict_arr": '[{"table_name": "unicode_test", "database_id": 1}]',
|
||||
"num_arr": "[1, 2, 3]",
|
||||
"map_col": '{"chart_name": "scatter"}',
|
||||
"map_col": "{'chart_name': 'scatter'}",
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"dict_arr": '[{"table_name": "birth_names", "database_id": 1}]',
|
||||
"num_arr": "[4, 5, 6]",
|
||||
"map_col": '{"chart_name": "plot"}',
|
||||
"map_col": "{'chart_name': 'plot'}",
|
||||
},
|
||||
],
|
||||
)
|
||||
|
|
|
@ -55,7 +55,87 @@ def test_df_to_records_NaT_type() -> None:
|
|||
|
||||
assert df_to_records(df) == [
|
||||
{"date": None},
|
||||
{"date": '"2023-01-06T20:50:31.749000+00:00"'},
|
||||
{"date": "2023-01-06 20:50:31.749000+00:00"},
|
||||
]
|
||||
|
||||
|
||||
def test_df_to_records_mixed_emoji_type() -> None:
|
||||
from superset.db_engine_specs import BaseEngineSpec
|
||||
from superset.result_set import SupersetResultSet
|
||||
|
||||
data = [
|
||||
("What's up?", "This is a string text", 1),
|
||||
("What's up?", "This is a string with an 😍 added", 2),
|
||||
("What's up?", NaT, 3),
|
||||
("What's up?", "Last emoji 😁", 4),
|
||||
]
|
||||
|
||||
cursor_descr: DbapiDescription = [
|
||||
("question", "varchar", None, None, None, None, False),
|
||||
("response", "varchar", None, None, None, None, False),
|
||||
("count", "integer", None, None, None, None, False),
|
||||
]
|
||||
|
||||
results = SupersetResultSet(data, cursor_descr, BaseEngineSpec)
|
||||
df = results.to_pandas_df()
|
||||
|
||||
assert df_to_records(df) == [
|
||||
{"question": "What's up?", "response": "This is a string text", "count": 1},
|
||||
{
|
||||
"question": "What's up?",
|
||||
"response": "This is a string with an 😍 added",
|
||||
"count": 2,
|
||||
},
|
||||
{
|
||||
"question": "What's up?",
|
||||
"response": None,
|
||||
"count": 3,
|
||||
},
|
||||
{
|
||||
"question": "What's up?",
|
||||
"response": "Last emoji 😁",
|
||||
"count": 4,
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def test_df_to_records_mixed_accent_type() -> None:
|
||||
from superset.db_engine_specs import BaseEngineSpec
|
||||
from superset.result_set import SupersetResultSet
|
||||
|
||||
data = [
|
||||
("What's up?", "This is a string text", 1),
|
||||
("What's up?", "This is a string with áccent", 2),
|
||||
("What's up?", NaT, 3),
|
||||
("What's up?", "móre áccent", 4),
|
||||
]
|
||||
|
||||
cursor_descr: DbapiDescription = [
|
||||
("question", "varchar", None, None, None, None, False),
|
||||
("response", "varchar", None, None, None, None, False),
|
||||
("count", "integer", None, None, None, None, False),
|
||||
]
|
||||
|
||||
results = SupersetResultSet(data, cursor_descr, BaseEngineSpec)
|
||||
df = results.to_pandas_df()
|
||||
|
||||
assert df_to_records(df) == [
|
||||
{"question": "What's up?", "response": "This is a string text", "count": 1},
|
||||
{
|
||||
"question": "What's up?",
|
||||
"response": "This is a string with áccent",
|
||||
"count": 2,
|
||||
},
|
||||
{
|
||||
"question": "What's up?",
|
||||
"response": None,
|
||||
"count": 3,
|
||||
},
|
||||
{
|
||||
"question": "What's up?",
|
||||
"response": "móre áccent",
|
||||
"count": 4,
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
|
|
|
@ -98,10 +98,10 @@ def test_stringify_with_null_integers():
|
|||
|
||||
expected = np.array(
|
||||
[
|
||||
array(['"foo"', '"foo"', '"foo"'], dtype=object),
|
||||
array(['"bar"', '"bar"', '"bar"'], dtype=object),
|
||||
array(["foo", "foo", "foo"], dtype=object),
|
||||
array(["bar", "bar", "bar"], dtype=object),
|
||||
array([None, None, None], dtype=object),
|
||||
array([None, "true", None], dtype=object),
|
||||
array([None, "True", None], dtype=object),
|
||||
]
|
||||
)
|
||||
|
||||
|
@ -132,10 +132,10 @@ def test_stringify_with_null_timestamps():
|
|||
|
||||
expected = np.array(
|
||||
[
|
||||
array(['"foo"', '"foo"', '"foo"'], dtype=object),
|
||||
array(['"bar"', '"bar"', '"bar"'], dtype=object),
|
||||
array(["foo", "foo", "foo"], dtype=object),
|
||||
array(["bar", "bar", "bar"], dtype=object),
|
||||
array([None, None, None], dtype=object),
|
||||
array([None, "true", None], dtype=object),
|
||||
array([None, "True", None], dtype=object),
|
||||
]
|
||||
)
|
||||
|
||||
|
|
Loading…
Reference in New Issue