mirror of https://github.com/apache/superset.git
fix: excel sheet upload is not working (#10450)
* remove conflicts with csv upload * revert StringField * change description * remove redundant space * apply string approach
This commit is contained in:
parent
c716f7afe8
commit
473fe1003a
|
@ -438,10 +438,7 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods
|
|||
"""
|
||||
kwargs["encoding"] = "utf-8"
|
||||
kwargs["iterator"] = True
|
||||
chunks = pd.io.excel.read_excel(
|
||||
io=kwargs["filepath_or_buffer"], sheet_name=kwargs["sheet_name"]
|
||||
)
|
||||
df = pd.concat(chunk for chunk in chunks.values())
|
||||
df = pd.read_excel(**kwargs)
|
||||
return df
|
||||
|
||||
@staticmethod
|
||||
|
@ -513,7 +510,7 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods
|
|||
Create table from contents of a excel. Note: this method does not create
|
||||
metadata for the table.
|
||||
"""
|
||||
df = cls.excel_to_df(filepath_or_buffer=filename, **excel_to_df_kwargs,)
|
||||
df = cls.excel_to_df(io=filename, **excel_to_df_kwargs,)
|
||||
engine = cls.get_engine(database)
|
||||
if table.schema:
|
||||
# only add schema when it is preset and non empty
|
||||
|
|
|
@ -180,20 +180,18 @@ class CsvToDatabaseForm(DynamicForm):
|
|||
)
|
||||
skip_blank_lines = BooleanField(
|
||||
_("Skip Blank Lines"),
|
||||
description=_(
|
||||
"Skip blank lines rather than interpreting them " "as NaN values."
|
||||
),
|
||||
description=_("Skip blank lines rather than interpreting them as NaN values."),
|
||||
)
|
||||
parse_dates = CommaSeparatedListField(
|
||||
_("Parse Dates"),
|
||||
description=_(
|
||||
"A comma separated list of columns that should be " "parsed as dates."
|
||||
"A comma separated list of columns that should be parsed as dates."
|
||||
),
|
||||
filters=[filter_not_empty_values],
|
||||
)
|
||||
infer_datetime_format = BooleanField(
|
||||
_("Infer Datetime Format"),
|
||||
description=_("Use Pandas to interpret the datetime format " "automatically."),
|
||||
description=_("Use Pandas to interpret the datetime format automatically."),
|
||||
)
|
||||
decimal = StringField(
|
||||
_("Decimal Character"),
|
||||
|
@ -228,16 +226,16 @@ class CsvToDatabaseForm(DynamicForm):
|
|||
|
||||
class ExcelToDatabaseForm(DynamicForm):
|
||||
# pylint: disable=E0211
|
||||
def excel_allowed_dbs(): # type: ignore
|
||||
excel_allowed_dbs = []
|
||||
def excel_allowed_dbs() -> List[Database]: # type: ignore
|
||||
# TODO: change allow_csv_upload to allow_file_upload
|
||||
excel_enabled_dbs = (
|
||||
db.session.query(Database).filter_by(allow_csv_upload=True).all()
|
||||
)
|
||||
for excel_enabled_db in excel_enabled_dbs:
|
||||
if ExcelToDatabaseForm.at_least_one_schema_is_allowed(excel_enabled_db):
|
||||
excel_allowed_dbs.append(excel_enabled_db)
|
||||
return excel_allowed_dbs
|
||||
return [
|
||||
excel_enabled_db
|
||||
for excel_enabled_db in excel_enabled_dbs
|
||||
if ExcelToDatabaseForm.at_least_one_schema_is_allowed(excel_enabled_db)
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def at_least_one_schema_is_allowed(database: Database) -> bool:
|
||||
|
@ -265,10 +263,7 @@ class ExcelToDatabaseForm(DynamicForm):
|
|||
b) if database supports schema
|
||||
user is able to upload to schema in schemas_allowed_for_csv_upload
|
||||
"""
|
||||
if (
|
||||
security_manager.database_access(database)
|
||||
or security_manager.all_datasource_access()
|
||||
):
|
||||
if security_manager.can_access_database(database):
|
||||
return True
|
||||
schemas = database.get_schema_access_for_csv_upload()
|
||||
if schemas and security_manager.schemas_accessible_by_user(
|
||||
|
@ -304,7 +299,10 @@ class ExcelToDatabaseForm(DynamicForm):
|
|||
)
|
||||
|
||||
sheet_name = StringField(
|
||||
_("Sheet Name"), description="Sheet Name", validators=[Optional()]
|
||||
_("Sheet Name"),
|
||||
description=_("Strings used for sheet names (default is the first sheet)."),
|
||||
validators=[Optional()],
|
||||
widget=BS3TextFieldWidget(),
|
||||
)
|
||||
|
||||
con = QuerySelectField(
|
||||
|
@ -356,9 +354,6 @@ class ExcelToDatabaseForm(DynamicForm):
|
|||
_("Mangle Duplicate Columns"),
|
||||
description=_('Specify duplicate columns as "X.0, X.1".'),
|
||||
)
|
||||
skipinitialspace = BooleanField(
|
||||
_("Skip Initial Space"), description=_("Skip spaces after delimiter.")
|
||||
)
|
||||
skiprows = IntegerField(
|
||||
_("Skip Rows"),
|
||||
description=_("Number of rows to skip at start of file."),
|
||||
|
@ -371,6 +366,13 @@ class ExcelToDatabaseForm(DynamicForm):
|
|||
validators=[Optional(), NumberRange(min=0)],
|
||||
widget=BS3TextFieldWidget(),
|
||||
)
|
||||
parse_dates = CommaSeparatedListField(
|
||||
_("Parse Dates"),
|
||||
description=_(
|
||||
"A comma separated list of columns that should be parsed as dates."
|
||||
),
|
||||
filters=[filter_not_empty_values],
|
||||
)
|
||||
decimal = StringField(
|
||||
_("Decimal Character"),
|
||||
default=".",
|
||||
|
|
|
@ -263,10 +263,9 @@ class ExcelToDatabaseView(SimpleFormView):
|
|||
def form_get(self, form: ExcelToDatabaseForm) -> None:
|
||||
form.header.data = 0
|
||||
form.mangle_dupe_cols.data = True
|
||||
form.skipinitialspace.data = False
|
||||
form.decimal.data = "."
|
||||
form.if_exists.data = "fail"
|
||||
form.sheet_name = None
|
||||
form.sheet_name.data = ""
|
||||
|
||||
def form_post(self, form: ExcelToDatabaseForm) -> Response:
|
||||
database = form.con.data
|
||||
|
@ -307,16 +306,23 @@ class ExcelToDatabaseView(SimpleFormView):
|
|||
database = (
|
||||
db.session.query(models.Database).filter_by(id=con.data.get("id")).one()
|
||||
)
|
||||
|
||||
# some params are not supported by pandas.read_excel (e.g. chunksize).
|
||||
# More can be found here:
|
||||
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_excel.html
|
||||
excel_to_df_kwargs = {
|
||||
"header": form.header.data if form.header.data else 0,
|
||||
"index_col": form.index_col.data,
|
||||
"mangle_dupe_cols": form.mangle_dupe_cols.data,
|
||||
"skipinitialspace": form.skipinitialspace.data,
|
||||
"skiprows": form.skiprows.data,
|
||||
"nrows": form.nrows.data,
|
||||
"sheet_name": form.sheet_name.data,
|
||||
"chunksize": 1000,
|
||||
"sheet_name": form.sheet_name.data if form.sheet_name.data else 0,
|
||||
"parse_dates": form.parse_dates.data,
|
||||
}
|
||||
if form.null_values.data:
|
||||
excel_to_df_kwargs["na_values"] = form.null_values.data
|
||||
excel_to_df_kwargs["keep_default_na"] = False
|
||||
|
||||
df_to_sql_kwargs = {
|
||||
"name": excel_table.table,
|
||||
"if_exists": form.if_exists.data,
|
||||
|
@ -336,7 +342,7 @@ class ExcelToDatabaseView(SimpleFormView):
|
|||
# E.g. if hive was used to upload a excel, presto will be a better option
|
||||
# to explore the table.
|
||||
expore_database = database
|
||||
explore_database_id = database.get_extra().get("explore_database_id", None)
|
||||
explore_database_id = database.explore_database_id
|
||||
if explore_database_id:
|
||||
expore_database = (
|
||||
db.session.query(models.Database)
|
||||
|
|
Loading…
Reference in New Issue