fix(sql): unable to filter text with quotes (#17881)

This commit is contained in:
Stephen Liu 2022-01-28 21:49:22 +08:00 committed by GitHub
parent 1f6f3d7d06
commit 9671384efe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 116 additions and 62 deletions

View File

@ -398,7 +398,7 @@ class BaseDatasource(
): ):
return datetime.utcfromtimestamp(value / 1000) return datetime.utcfromtimestamp(value / 1000)
if isinstance(value, str): if isinstance(value, str):
value = value.strip("\t\n'\"") value = value.strip("\t\n")
if target_column_type == utils.GenericDataType.NUMERIC: if target_column_type == utils.GenericDataType.NUMERIC:
# For backwards compatibility and edge cases # For backwards compatibility and edge cases

View File

@ -359,7 +359,7 @@ class TestDruidFunc(SupersetTestCase):
col = DruidColumn(column_name="A") col = DruidColumn(column_name="A")
column_dict = {"A": col} column_dict = {"A": col}
res = DruidDatasource.get_filters([filtr], [], column_dict) res = DruidDatasource.get_filters([filtr], [], column_dict)
self.assertEqual("a", res.filter["filter"]["value"]) self.assertEqual('"a"', res.filter["filter"]["value"])
@unittest.skipUnless( @unittest.skipUnless(
SupersetTestCase.is_module_installed("pydruid"), "pydruid not installed" SupersetTestCase.is_module_installed("pydruid"), "pydruid not installed"

View File

@ -364,7 +364,7 @@ class TestDruidFunc(SupersetTestCase):
col = DruidColumn(column_name="A") col = DruidColumn(column_name="A")
column_dict = {"A": col} column_dict = {"A": col}
res = DruidDatasource.get_filters([filtr], [], column_dict) res = DruidDatasource.get_filters([filtr], [], column_dict)
self.assertEqual("a", res.filter["filter"]["value"]) self.assertEqual('"a"', res.filter["filter"]["value"])
@unittest.skipUnless( @unittest.skipUnless(
SupersetTestCase.is_module_installed("pydruid"), "pydruid not installed" SupersetTestCase.is_module_installed("pydruid"), "pydruid not installed"

View File

@ -380,7 +380,7 @@ class TestQueryContext(SupersetTestCase):
assert re.search(r'[`"\[]?num[`"\]]? IS NOT NULL', sql_text) assert re.search(r'[`"\[]?num[`"\]]? IS NOT NULL', sql_text)
assert re.search( assert re.search(
r"""NOT \([`"\[]?name[`"\]]? IS NULL[\s\n]* """ r"""NOT \([`"\[]?name[`"\]]? IS NULL[\s\n]* """
r"""OR [`"\[]?name[`"\]]? IN \('abc'\)\)""", r"""OR [`"\[]?name[`"\]]? IN \('"abc"'\)\)""",
sql_text, sql_text,
) )

View File

@ -47,6 +47,7 @@ from tests.integration_tests.fixtures.birth_names_dashboard import (
load_birth_names_dashboard_with_slices, load_birth_names_dashboard_with_slices,
load_birth_names_data, load_birth_names_data,
) )
from tests.integration_tests.test_app import app
from .base_tests import SupersetTestCase from .base_tests import SupersetTestCase
@ -475,80 +476,133 @@ class TestDatabaseModel(SupersetTestCase):
db.session.delete(database) db.session.delete(database)
db.session.commit() db.session.commit()
def test_values_for_column(self):
@pytest.fixture
def text_column_table():
with app.app_context():
table = SqlaTable( table = SqlaTable(
table_name="test_null_in_column", table_name="text_column_table",
sql=( sql=(
"SELECT 'foo' as foo " "SELECT 'foo' as foo "
"UNION SELECT '' " "UNION SELECT '' "
"UNION SELECT NULL " "UNION SELECT NULL "
"UNION SELECT 'null'" "UNION SELECT 'null' "
"UNION SELECT '\"text in double quotes\"' "
"UNION SELECT '''text in single quotes''' "
"UNION SELECT 'double quotes \" in text' "
"UNION SELECT 'single quotes '' in text' "
), ),
database=get_example_database(), database=get_example_database(),
) )
TableColumn(column_name="foo", type="VARCHAR(255)", table=table) TableColumn(column_name="foo", type="VARCHAR(255)", table=table)
SqlMetric(metric_name="count", expression="count(*)", table=table) SqlMetric(metric_name="count", expression="count(*)", table=table)
yield table
# null value, empty string and text should be retrieved
with_null = table.values_for_column(column_name="foo", limit=10000)
assert None in with_null
assert len(with_null) == 4
# null value should be replaced def test_values_for_column_on_text_column(text_column_table):
result_object = table.query( # null value, empty string and text should be retrieved
{ with_null = text_column_table.values_for_column(column_name="foo", limit=10000)
"metrics": ["count"], assert None in with_null
"filter": [{"col": "foo", "val": [NULL_STRING], "op": "IN"}], assert len(with_null) == 8
"is_timeseries": False,
}
)
assert result_object.df["count"][0] == 1
# also accept None value
result_object = table.query(
{
"metrics": ["count"],
"filter": [{"col": "foo", "val": [None], "op": "IN"}],
"is_timeseries": False,
}
)
assert result_object.df["count"][0] == 1
# empty string should be replaced def test_filter_on_text_column(text_column_table):
result_object = table.query( table = text_column_table
{ # null value should be replaced
"metrics": ["count"], result_object = table.query(
"filter": [{"col": "foo", "val": [EMPTY_STRING], "op": "IN"}], {
"is_timeseries": False, "metrics": ["count"],
} "filter": [{"col": "foo", "val": [NULL_STRING], "op": "IN"}],
) "is_timeseries": False,
assert result_object.df["count"][0] == 1 }
)
assert result_object.df["count"][0] == 1
# also accept "" string # also accept None value
result_object = table.query( result_object = table.query(
{ {
"metrics": ["count"], "metrics": ["count"],
"filter": [{"col": "foo", "val": [""], "op": "IN"}], "filter": [{"col": "foo", "val": [None], "op": "IN"}],
"is_timeseries": False, "is_timeseries": False,
} }
) )
assert result_object.df["count"][0] == 1 assert result_object.df["count"][0] == 1
# both replaced # empty string should be replaced
result_object = table.query( result_object = table.query(
{ {
"metrics": ["count"], "metrics": ["count"],
"filter": [ "filter": [{"col": "foo", "val": [EMPTY_STRING], "op": "IN"}],
{ "is_timeseries": False,
"col": "foo", }
"val": [EMPTY_STRING, NULL_STRING, "null", "foo"], )
"op": "IN", assert result_object.df["count"][0] == 1
}
], # also accept "" string
"is_timeseries": False, result_object = table.query(
} {
) "metrics": ["count"],
assert result_object.df["count"][0] == 4 "filter": [{"col": "foo", "val": [""], "op": "IN"}],
"is_timeseries": False,
}
)
assert result_object.df["count"][0] == 1
# both replaced
result_object = table.query(
{
"metrics": ["count"],
"filter": [
{
"col": "foo",
"val": [EMPTY_STRING, NULL_STRING, "null", "foo"],
"op": "IN",
}
],
"is_timeseries": False,
}
)
assert result_object.df["count"][0] == 4
# should filter text in double quotes
result_object = table.query(
{
"metrics": ["count"],
"filter": [{"col": "foo", "val": ['"text in double quotes"'], "op": "IN",}],
"is_timeseries": False,
}
)
assert result_object.df["count"][0] == 1
# should filter text in single quotes
result_object = table.query(
{
"metrics": ["count"],
"filter": [{"col": "foo", "val": ["'text in single quotes'"], "op": "IN",}],
"is_timeseries": False,
}
)
assert result_object.df["count"][0] == 1
# should filter text with double quote
result_object = table.query(
{
"metrics": ["count"],
"filter": [{"col": "foo", "val": ['double quotes " in text'], "op": "IN",}],
"is_timeseries": False,
}
)
assert result_object.df["count"][0] == 1
# should filter text with single quote
result_object = table.query(
{
"metrics": ["count"],
"filter": [{"col": "foo", "val": ["single quotes ' in text"], "op": "IN",}],
"is_timeseries": False,
}
)
assert result_object.df["count"][0] == 1
@pytest.mark.parametrize( @pytest.mark.parametrize(