mirror of https://github.com/apache/superset.git
fix: remove character set and collate column info by default (#9316)
* fix: remove character set and collate column info by default * lint * remove collation and charset info before compile
This commit is contained in:
parent
6cf36c91ea
commit
982c234a50
|
@ -161,6 +161,7 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods
|
||||||
utils.DbColumnType.STRING: (
|
utils.DbColumnType.STRING: (
|
||||||
re.compile(r".*CHAR.*", re.IGNORECASE),
|
re.compile(r".*CHAR.*", re.IGNORECASE),
|
||||||
re.compile(r".*STRING.*", re.IGNORECASE),
|
re.compile(r".*STRING.*", re.IGNORECASE),
|
||||||
|
re.compile(r".*TEXT.*", re.IGNORECASE),
|
||||||
),
|
),
|
||||||
utils.DbColumnType.TEMPORAL: (
|
utils.DbColumnType.TEMPORAL: (
|
||||||
re.compile(r".*DATE.*", re.IGNORECASE),
|
re.compile(r".*DATE.*", re.IGNORECASE),
|
||||||
|
@ -911,13 +912,18 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Convert sqlalchemy column type to string representation.
|
Convert sqlalchemy column type to string representation.
|
||||||
Can be overridden to remove unnecessary details, especially
|
By default removes collation and character encoding info to avoid unnecessarily
|
||||||
collation info (see mysql, mssql).
|
long datatypes.
|
||||||
|
|
||||||
:param sqla_column_type: SqlAlchemy column type
|
:param sqla_column_type: SqlAlchemy column type
|
||||||
:param dialect: Sqlalchemy dialect
|
:param dialect: Sqlalchemy dialect
|
||||||
:return: Compiled column type
|
:return: Compiled column type
|
||||||
"""
|
"""
|
||||||
|
sqla_column_type = sqla_column_type.copy()
|
||||||
|
if hasattr(sqla_column_type, "collation"):
|
||||||
|
sqla_column_type.collation = None
|
||||||
|
if hasattr(sqla_column_type, "charset"):
|
||||||
|
sqla_column_type.charset = None
|
||||||
return sqla_column_type.compile(dialect=dialect).upper()
|
return sqla_column_type.compile(dialect=dialect).upper()
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|
|
@ -18,7 +18,6 @@ import re
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Any, List, Optional, Tuple
|
from typing import Any, List, Optional, Tuple
|
||||||
|
|
||||||
from sqlalchemy.engine.interfaces import Dialect
|
|
||||||
from sqlalchemy.types import String, TypeEngine, UnicodeText
|
from sqlalchemy.types import String, TypeEngine, UnicodeText
|
||||||
|
|
||||||
from superset.db_engine_specs.base import BaseEngineSpec, LimitMethod
|
from superset.db_engine_specs.base import BaseEngineSpec, LimitMethod
|
||||||
|
@ -66,10 +65,10 @@ class MssqlEngineSpec(BaseEngineSpec):
|
||||||
# Lists of `pyodbc.Row` need to be unpacked further
|
# Lists of `pyodbc.Row` need to be unpacked further
|
||||||
return cls.pyodbc_rows_to_tuples(data)
|
return cls.pyodbc_rows_to_tuples(data)
|
||||||
|
|
||||||
column_types = [
|
column_types = (
|
||||||
(String(), re.compile(r"^(?<!N)((VAR){0,1}CHAR|TEXT|STRING)", re.IGNORECASE)),
|
(String(), re.compile(r"^(?<!N)((VAR){0,1}CHAR|TEXT|STRING)", re.IGNORECASE)),
|
||||||
(UnicodeText(), re.compile(r"^N((VAR){0,1}CHAR|TEXT)", re.IGNORECASE)),
|
(UnicodeText(), re.compile(r"^N((VAR){0,1}CHAR|TEXT)", re.IGNORECASE)),
|
||||||
]
|
)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_sqla_column_type(cls, type_: str) -> Optional[TypeEngine]:
|
def get_sqla_column_type(cls, type_: str) -> Optional[TypeEngine]:
|
||||||
|
@ -77,16 +76,3 @@ class MssqlEngineSpec(BaseEngineSpec):
|
||||||
if regex.match(type_):
|
if regex.match(type_):
|
||||||
return sqla_type
|
return sqla_type
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def column_datatype_to_string(
|
|
||||||
cls, sqla_column_type: TypeEngine, dialect: Dialect
|
|
||||||
) -> str:
|
|
||||||
datatype = super().column_datatype_to_string(sqla_column_type, dialect)
|
|
||||||
# MSSQL returns long overflowing datatype
|
|
||||||
# as in 'VARCHAR(255) COLLATE SQL_LATIN1_GENERAL_CP1_CI_AS'
|
|
||||||
# and we don't need the verbose collation type
|
|
||||||
str_cutoff = " COLLATE "
|
|
||||||
if str_cutoff in datatype:
|
|
||||||
datatype = datatype.split(str_cutoff)[0]
|
|
||||||
return datatype
|
|
||||||
|
|
|
@ -18,9 +18,7 @@ from datetime import datetime
|
||||||
from typing import Any, Dict, Optional
|
from typing import Any, Dict, Optional
|
||||||
from urllib import parse
|
from urllib import parse
|
||||||
|
|
||||||
from sqlalchemy.engine.interfaces import Dialect
|
|
||||||
from sqlalchemy.engine.url import URL
|
from sqlalchemy.engine.url import URL
|
||||||
from sqlalchemy.types import TypeEngine
|
|
||||||
|
|
||||||
from superset.db_engine_specs.base import BaseEngineSpec
|
from superset.db_engine_specs.base import BaseEngineSpec
|
||||||
|
|
||||||
|
@ -97,16 +95,3 @@ class MySQLEngineSpec(BaseEngineSpec):
|
||||||
except Exception: # pylint: disable=broad-except
|
except Exception: # pylint: disable=broad-except
|
||||||
pass
|
pass
|
||||||
return message
|
return message
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def column_datatype_to_string(
|
|
||||||
cls, sqla_column_type: TypeEngine, dialect: Dialect
|
|
||||||
) -> str:
|
|
||||||
datatype = super().column_datatype_to_string(sqla_column_type, dialect)
|
|
||||||
# MySQL dialect started returning long overflowing datatype
|
|
||||||
# as in 'VARCHAR(255) COLLATE UTF8MB4_GENERAL_CI'
|
|
||||||
# and we don't need the verbose collation type
|
|
||||||
str_cutoff = " COLLATE "
|
|
||||||
if str_cutoff in datatype:
|
|
||||||
datatype = datatype.split(str_cutoff)[0]
|
|
||||||
return datatype
|
|
||||||
|
|
|
@ -18,6 +18,7 @@ import unittest.mock as mock
|
||||||
|
|
||||||
from sqlalchemy import column, table
|
from sqlalchemy import column, table
|
||||||
from sqlalchemy.dialects import mssql
|
from sqlalchemy.dialects import mssql
|
||||||
|
from sqlalchemy.dialects.mssql import DATE, NTEXT, NVARCHAR, TEXT, VARCHAR
|
||||||
from sqlalchemy.sql import select
|
from sqlalchemy.sql import select
|
||||||
from sqlalchemy.types import String, UnicodeText
|
from sqlalchemy.types import String, UnicodeText
|
||||||
|
|
||||||
|
@ -75,22 +76,24 @@ class MssqlEngineSpecTest(DbEngineSpecTestCase):
|
||||||
|
|
||||||
def test_convert_dttm(self):
|
def test_convert_dttm(self):
|
||||||
dttm = self.get_dttm()
|
dttm = self.get_dttm()
|
||||||
|
test_cases = (
|
||||||
self.assertEqual(
|
(
|
||||||
MssqlEngineSpec.convert_dttm("DATE", dttm),
|
MssqlEngineSpec.convert_dttm("DATE", dttm),
|
||||||
"CONVERT(DATE, '2019-01-02', 23)",
|
"CONVERT(DATE, '2019-01-02', 23)",
|
||||||
)
|
),
|
||||||
|
(
|
||||||
self.assertEqual(
|
|
||||||
MssqlEngineSpec.convert_dttm("DATETIME", dttm),
|
MssqlEngineSpec.convert_dttm("DATETIME", dttm),
|
||||||
"CONVERT(DATETIME, '2019-01-02T03:04:05.678', 126)",
|
"CONVERT(DATETIME, '2019-01-02T03:04:05.678', 126)",
|
||||||
)
|
),
|
||||||
|
(
|
||||||
self.assertEqual(
|
|
||||||
MssqlEngineSpec.convert_dttm("SMALLDATETIME", dttm),
|
MssqlEngineSpec.convert_dttm("SMALLDATETIME", dttm),
|
||||||
"CONVERT(SMALLDATETIME, '2019-01-02 03:04:05', 20)",
|
"CONVERT(SMALLDATETIME, '2019-01-02 03:04:05', 20)",
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
for actual, expected in test_cases:
|
||||||
|
self.assertEqual(actual, expected)
|
||||||
|
|
||||||
@mock.patch.object(
|
@mock.patch.object(
|
||||||
MssqlEngineSpec, "pyodbc_rows_to_tuples", return_value="converted"
|
MssqlEngineSpec, "pyodbc_rows_to_tuples", return_value="converted"
|
||||||
)
|
)
|
||||||
|
@ -102,3 +105,19 @@ class MssqlEngineSpecTest(DbEngineSpecTestCase):
|
||||||
result = MssqlEngineSpec.fetch_data(None, 0)
|
result = MssqlEngineSpec.fetch_data(None, 0)
|
||||||
mock_pyodbc_rows_to_tuples.assert_called_once_with(data)
|
mock_pyodbc_rows_to_tuples.assert_called_once_with(data)
|
||||||
self.assertEqual(result, "converted")
|
self.assertEqual(result, "converted")
|
||||||
|
|
||||||
|
def test_column_datatype_to_string(self):
|
||||||
|
test_cases = (
|
||||||
|
(DATE(), "DATE"),
|
||||||
|
(VARCHAR(length=255), "VARCHAR(255)"),
|
||||||
|
(VARCHAR(length=255, collation="utf8_general_ci"), "VARCHAR(255)"),
|
||||||
|
(NVARCHAR(length=128), "NVARCHAR(128)"),
|
||||||
|
(TEXT(), "TEXT"),
|
||||||
|
(NTEXT(collation="utf8_general_ci"), "NTEXT"),
|
||||||
|
)
|
||||||
|
|
||||||
|
for original, expected in test_cases:
|
||||||
|
actual = MssqlEngineSpec.column_datatype_to_string(
|
||||||
|
original, mssql.dialect()
|
||||||
|
)
|
||||||
|
self.assertEqual(actual, expected)
|
||||||
|
|
|
@ -16,6 +16,9 @@
|
||||||
# under the License.
|
# under the License.
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
|
from sqlalchemy.dialects import mysql
|
||||||
|
from sqlalchemy.dialects.mysql import DATE, NVARCHAR, TEXT, VARCHAR
|
||||||
|
|
||||||
from superset.db_engine_specs.mysql import MySQLEngineSpec
|
from superset.db_engine_specs.mysql import MySQLEngineSpec
|
||||||
from tests.db_engine_specs.base_tests import DbEngineSpecTestCase
|
from tests.db_engine_specs.base_tests import DbEngineSpecTestCase
|
||||||
|
|
||||||
|
@ -41,3 +44,21 @@ class MySQLEngineSpecsTestCase(DbEngineSpecTestCase):
|
||||||
MySQLEngineSpec.convert_dttm("DATETIME", dttm),
|
MySQLEngineSpec.convert_dttm("DATETIME", dttm),
|
||||||
"STR_TO_DATE('2019-01-02 03:04:05.678900', '%Y-%m-%d %H:%i:%s.%f')",
|
"STR_TO_DATE('2019-01-02 03:04:05.678900', '%Y-%m-%d %H:%i:%s.%f')",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_column_datatype_to_string(self):
|
||||||
|
test_cases = (
|
||||||
|
(DATE(), "DATE"),
|
||||||
|
(VARCHAR(length=255), "VARCHAR(255)"),
|
||||||
|
(
|
||||||
|
VARCHAR(length=255, charset="latin1", collation="utf8mb4_general_ci"),
|
||||||
|
"VARCHAR(255)",
|
||||||
|
),
|
||||||
|
(NVARCHAR(length=128), "NATIONAL VARCHAR(128)"),
|
||||||
|
(TEXT(), "TEXT"),
|
||||||
|
)
|
||||||
|
|
||||||
|
for original, expected in test_cases:
|
||||||
|
actual = MySQLEngineSpec.column_datatype_to_string(
|
||||||
|
original, mysql.dialect()
|
||||||
|
)
|
||||||
|
self.assertEqual(actual, expected)
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
# under the License.
|
# under the License.
|
||||||
from sqlalchemy import column
|
from sqlalchemy import column
|
||||||
from sqlalchemy.dialects import oracle
|
from sqlalchemy.dialects import oracle
|
||||||
|
from sqlalchemy.dialects.oracle import DATE, NVARCHAR, VARCHAR
|
||||||
|
|
||||||
from superset.db_engine_specs.oracle import OracleEngineSpec
|
from superset.db_engine_specs.oracle import OracleEngineSpec
|
||||||
from tests.db_engine_specs.base_tests import DbEngineSpecTestCase
|
from tests.db_engine_specs.base_tests import DbEngineSpecTestCase
|
||||||
|
@ -39,17 +40,31 @@ class OracleTestCase(DbEngineSpecTestCase):
|
||||||
def test_convert_dttm(self):
|
def test_convert_dttm(self):
|
||||||
dttm = self.get_dttm()
|
dttm = self.get_dttm()
|
||||||
|
|
||||||
self.assertEqual(
|
test_cases = (
|
||||||
|
(
|
||||||
OracleEngineSpec.convert_dttm("DATE", dttm),
|
OracleEngineSpec.convert_dttm("DATE", dttm),
|
||||||
"TO_DATE('2019-01-02', 'YYYY-MM-DD')",
|
"TO_DATE('2019-01-02', 'YYYY-MM-DD')",
|
||||||
)
|
),
|
||||||
|
(
|
||||||
self.assertEqual(
|
|
||||||
OracleEngineSpec.convert_dttm("DATETIME", dttm),
|
OracleEngineSpec.convert_dttm("DATETIME", dttm),
|
||||||
"""TO_DATE('2019-01-02T03:04:05', 'YYYY-MM-DD"T"HH24:MI:SS')""",
|
"""TO_DATE('2019-01-02T03:04:05', 'YYYY-MM-DD"T"HH24:MI:SS')""",
|
||||||
)
|
),
|
||||||
|
(
|
||||||
self.assertEqual(
|
|
||||||
OracleEngineSpec.convert_dttm("TIMESTAMP", dttm),
|
OracleEngineSpec.convert_dttm("TIMESTAMP", dttm),
|
||||||
"""TO_TIMESTAMP('2019-01-02T03:04:05.678900', 'YYYY-MM-DD"T"HH24:MI:SS.ff6')""",
|
"""TO_TIMESTAMP('2019-01-02T03:04:05.678900', 'YYYY-MM-DD"T"HH24:MI:SS.ff6')""",
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_column_datatype_to_string(self):
|
||||||
|
test_cases = (
|
||||||
|
(DATE(), "DATE"),
|
||||||
|
(VARCHAR(length=255), "VARCHAR(255 CHAR)"),
|
||||||
|
(VARCHAR(length=255, collation="utf8"), "VARCHAR(255 CHAR)"),
|
||||||
|
(NVARCHAR(length=128), "NVARCHAR2(128)"),
|
||||||
|
)
|
||||||
|
|
||||||
|
for original, expected in test_cases:
|
||||||
|
actual = OracleEngineSpec.column_datatype_to_string(
|
||||||
|
original, oracle.dialect()
|
||||||
|
)
|
||||||
|
self.assertEqual(actual, expected)
|
||||||
|
|
|
@ -46,6 +46,8 @@ class DatabaseModelTestCase(SupersetTestCase):
|
||||||
"VARCHAR": DbColumnType.STRING,
|
"VARCHAR": DbColumnType.STRING,
|
||||||
"NVARCHAR": DbColumnType.STRING,
|
"NVARCHAR": DbColumnType.STRING,
|
||||||
"STRING": DbColumnType.STRING,
|
"STRING": DbColumnType.STRING,
|
||||||
|
"TEXT": DbColumnType.STRING,
|
||||||
|
"NTEXT": DbColumnType.STRING,
|
||||||
# numeric
|
# numeric
|
||||||
"INT": DbColumnType.NUMERIC,
|
"INT": DbColumnType.NUMERIC,
|
||||||
"BIGINT": DbColumnType.NUMERIC,
|
"BIGINT": DbColumnType.NUMERIC,
|
||||||
|
|
Loading…
Reference in New Issue