mirror of https://github.com/apache/superset.git
[fix] Improve csv upload functionality (#8457)
* [fix] csv upload when table metadata present * Remove table from hive spec * Move upload before table metadata creation * Refine upload logic, dd unit tests and fix translations * Use ALLOWED_EXTENSIONS from config * Address review comments * Fix error message grammar * Add return type to hive csv upload and replace first with one_or_none
This commit is contained in:
parent
397e1e3d40
commit
49ea232c3a
|
@ -288,7 +288,7 @@ SUPERSET_WEBSERVER_DOMAINS = None
|
|||
|
||||
# Allowed format types for upload on Database view
|
||||
# TODO: Add processing of other spreadsheet formats (xls, xlsx etc)
|
||||
ALLOWED_EXTENSIONS = set(["csv"])
|
||||
ALLOWED_EXTENSIONS = {"csv", "tsv"}
|
||||
|
||||
# CSV Options: key/value pairs that will be passed as argument to DataFrame.to_csv
|
||||
# method.
|
||||
|
|
|
@ -37,7 +37,7 @@ from sqlalchemy.sql.expression import ColumnClause, ColumnElement, Select, TextA
|
|||
from sqlalchemy.types import TypeEngine
|
||||
from werkzeug.utils import secure_filename
|
||||
|
||||
from superset import app, db, sql_parse
|
||||
from superset import app, sql_parse
|
||||
from superset.utils import core as utils
|
||||
|
||||
if TYPE_CHECKING:
|
||||
|
@ -388,15 +388,17 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods
|
|||
df.to_sql(**kwargs)
|
||||
|
||||
@classmethod
|
||||
def create_table_from_csv(cls, form, table):
|
||||
""" Create table (including metadata in backend) from contents of a csv.
|
||||
def create_table_from_csv(cls, form) -> None:
|
||||
"""
|
||||
Create table from contents of a csv. Note: this method does not create
|
||||
metadata for the table.
|
||||
|
||||
:param form: Parameters defining how to process data
|
||||
:param table: Metadata of new table to be created
|
||||
"""
|
||||
|
||||
def _allowed_file(filename: str) -> bool:
|
||||
# Only allow specific file extensions as specified in the config
|
||||
extension = os.path.splitext(filename)[1]
|
||||
extension = os.path.splitext(filename)[1].lower()
|
||||
return (
|
||||
extension is not None and extension[1:] in config["ALLOWED_EXTENSIONS"]
|
||||
)
|
||||
|
@ -432,12 +434,6 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods
|
|||
}
|
||||
cls.df_to_sql(**df_to_sql_kwargs)
|
||||
|
||||
table.user_id = g.user.id
|
||||
table.schema = form.schema.data
|
||||
table.fetch_metadata()
|
||||
db.session.add(table)
|
||||
db.session.commit()
|
||||
|
||||
@classmethod
|
||||
def convert_dttm(cls, target_type: str, dttm: datetime) -> Optional[str]:
|
||||
"""
|
||||
|
|
|
@ -98,7 +98,7 @@ class HiveEngineSpec(PrestoEngineSpec):
|
|||
return []
|
||||
|
||||
@classmethod
|
||||
def create_table_from_csv(cls, form, table): # pylint: disable=too-many-locals
|
||||
def create_table_from_csv(cls, form) -> None: # pylint: disable=too-many-locals
|
||||
"""Uploads a csv file and creates a superset datasource in Hive."""
|
||||
|
||||
def convert_to_hive_type(col_type):
|
||||
|
|
|
@ -4669,7 +4669,7 @@ msgid "CSV to Database configuration"
|
|||
msgstr ""
|
||||
|
||||
#: superset/views/core.py:375
|
||||
msgid "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\""
|
||||
msgid "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\""
|
||||
msgstr ""
|
||||
|
||||
#: superset/views/core.py:401 superset/views/core.py:667
|
||||
|
@ -7765,4 +7765,3 @@ msgstr ""
|
|||
|
||||
#~ msgid "Saved Queries"
|
||||
#~ msgstr ""
|
||||
|
||||
|
|
|
@ -3124,7 +3124,7 @@
|
|||
"CSV to Database configuration": [
|
||||
""
|
||||
],
|
||||
"CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\"": [
|
||||
"CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\"": [
|
||||
""
|
||||
],
|
||||
"User": [
|
||||
|
|
|
@ -4880,8 +4880,8 @@ msgid "CSV to Database configuration"
|
|||
msgstr "CSV vers la configuration de la base de données"
|
||||
|
||||
#: superset/views/core.py:375
|
||||
msgid "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\""
|
||||
msgstr "Fichier CSV \"{0}\" chargé dans la table \"{1}\" de la base de données \"{2}\""
|
||||
msgid "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\""
|
||||
msgstr "Fichier CSV \"%(csv_filename)s\" chargé dans la table \"%(table_name)s\" de la base de données \"%(db_name)s\""
|
||||
|
||||
#: superset/views/core.py:401 superset/views/core.py:667
|
||||
#: superset/views/sql_lab.py:23 superset/views/sql_lab.py:60
|
||||
|
|
|
@ -2923,7 +2923,7 @@
|
|||
"CSV to Database configuration": [
|
||||
""
|
||||
],
|
||||
"CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\"": [
|
||||
"CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\"": [
|
||||
""
|
||||
],
|
||||
"User": [
|
||||
|
|
|
@ -4432,7 +4432,7 @@ msgid "CSV to Database configuration"
|
|||
msgstr ""
|
||||
|
||||
#: superset/views/core.py:363
|
||||
msgid "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\""
|
||||
msgid "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\""
|
||||
msgstr ""
|
||||
|
||||
#: superset/views/core.py:389 superset/views/core.py:655
|
||||
|
@ -6461,4 +6461,3 @@ msgstr "Query salvate"
|
|||
|
||||
#~ msgid "Slice"
|
||||
#~ msgstr "Slice"
|
||||
|
||||
|
|
|
@ -3788,7 +3788,7 @@ msgstr "대시보드 가져오기"
|
|||
msgid "CSV to Database configuration"
|
||||
msgstr ""
|
||||
|
||||
msgid "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\""
|
||||
msgid "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\""
|
||||
msgstr ""
|
||||
|
||||
msgid "User"
|
||||
|
|
|
@ -4669,7 +4669,7 @@ msgid "CSV to Database configuration"
|
|||
msgstr ""
|
||||
|
||||
#: superset/views/core.py:375
|
||||
msgid "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\""
|
||||
msgid "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\""
|
||||
msgstr ""
|
||||
|
||||
#: superset/views/core.py:401 superset/views/core.py:667
|
||||
|
@ -4967,4 +4967,3 @@ msgstr ""
|
|||
#: superset/views/sql_lab.py:86
|
||||
msgid "Saved Queries"
|
||||
msgstr ""
|
||||
|
||||
|
|
|
@ -2923,8 +2923,8 @@
|
|||
"CSV to Database configuration": [
|
||||
"Настройка CSV для БД"
|
||||
],
|
||||
"CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\"": [
|
||||
"CSV-файл \"{0}\" загружен в таблицу \"{1}\" базы данных \"{2}\""
|
||||
"CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\"": [
|
||||
"CSV-файл \"%(csv_filename)s\" загружен в таблицу \"%(table_name)s\" базы данных \"%(db_name)s\""
|
||||
],
|
||||
"User": [
|
||||
"Пользователь"
|
||||
|
|
|
@ -4544,8 +4544,8 @@ msgid "CSV to Database configuration"
|
|||
msgstr "Настройка CSV для БД"
|
||||
|
||||
#: superset/views/core.py:363
|
||||
msgid "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\""
|
||||
msgstr "CSV-файл \"{0}\" загружен в таблицу \"{1}\" базы данных \"{2}\""
|
||||
msgid "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\""
|
||||
msgstr "CSV-файл \"%(csv_filename)s\" загружен в таблицу \"%(table_name)s\" базы данных \"%(db_name)s\""
|
||||
|
||||
#: superset/views/core.py:389 superset/views/core.py:655
|
||||
#: superset/views/sql_lab.py:16 superset/views/sql_lab.py:53
|
||||
|
|
|
@ -3124,8 +3124,8 @@
|
|||
"CSV to Database configuration": [
|
||||
"csv 到数据库配置"
|
||||
],
|
||||
"CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\"": [
|
||||
"csv 文件 \"{0}\" 上传到数据库 \"{2}\" 中的表 \"{1}\""
|
||||
"CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\"": [
|
||||
"csv 文件 \"%(csv_filename)s\" 上传到数据库 \"%(db_name)s\" 中的表 \"%(table_name)s\""
|
||||
],
|
||||
"User": [
|
||||
"用户"
|
||||
|
|
|
@ -4746,8 +4746,8 @@ msgid "CSV to Database configuration"
|
|||
msgstr "csv 到数据库配置"
|
||||
|
||||
#: superset/views/core.py:375
|
||||
msgid "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\""
|
||||
msgstr "csv 文件 \"{0}\" 上传到数据库 \"{2}\" 中的表 \"{1}\""
|
||||
msgid "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\""
|
||||
msgstr "csv 文件 \"%(csv_filename)s\" 上传到数据库 \"%(db_name)s\" 中的表 \"%(table_name)s\""
|
||||
|
||||
#: superset/views/core.py:401 superset/views/core.py:667
|
||||
#: superset/views/sql_lab.py:23 superset/views/sql_lab.py:60
|
||||
|
|
|
@ -3049,7 +3049,7 @@ class Superset(BaseSupersetView):
|
|||
except Exception:
|
||||
return json_error_response(
|
||||
"Failed to fetch schemas allowed for csv upload in this database! "
|
||||
"Please contact Superset Admin!",
|
||||
"Please contact your Superset Admin!",
|
||||
stacktrace=utils.get_stacktrace(),
|
||||
)
|
||||
|
||||
|
|
|
@ -90,7 +90,17 @@ class CsvToDatabaseForm(DynamicForm):
|
|||
csv_file = FileField(
|
||||
_("CSV File"),
|
||||
description=_("Select a CSV file to be uploaded to a database."),
|
||||
validators=[FileRequired(), FileAllowed(["csv"], _("CSV Files Only!"))],
|
||||
validators=[
|
||||
FileRequired(),
|
||||
FileAllowed(
|
||||
config["ALLOWED_EXTENSIONS"],
|
||||
_(
|
||||
"Only the following file extensions are allowed: "
|
||||
"%(allowed_extensions)s",
|
||||
allowed_extensions=", ".join(config["ALLOWED_EXTENSIONS"]),
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
con = QuerySelectField(
|
||||
_("Database"),
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
# pylint: disable=C,R,W
|
||||
import os
|
||||
|
||||
from flask import flash, redirect
|
||||
from flask import flash, g, redirect
|
||||
from flask_appbuilder import SimpleFormView
|
||||
from flask_appbuilder.forms import DynamicForm
|
||||
from flask_appbuilder.models.sqla.interface import SQLAInterface
|
||||
|
@ -28,7 +28,7 @@ from wtforms.fields import StringField
|
|||
from wtforms.validators import ValidationError
|
||||
|
||||
import superset.models.core as models
|
||||
from superset import app, appbuilder, security_manager
|
||||
from superset import app, appbuilder, db, security_manager
|
||||
from superset.connectors.sqla.models import SqlaTable
|
||||
from superset.utils import core as utils
|
||||
from superset.views.base import DeleteMixin, SupersetModelView, YamlExportMixin
|
||||
|
@ -102,10 +102,10 @@ class CsvToDatabaseView(SimpleFormView):
|
|||
|
||||
if not self.is_schema_allowed(database, schema_name):
|
||||
message = _(
|
||||
'Database "{0}" Schema "{1}" is not allowed for csv uploads. '
|
||||
"Please contact Superset Admin".format(
|
||||
database.database_name, schema_name
|
||||
)
|
||||
'Database "%(database_name)s" schema "%(schema_name)s" '
|
||||
"is not allowed for csv uploads. Please contact your Superset Admin.",
|
||||
database_name=database.database_name,
|
||||
schema_name=schema_name,
|
||||
)
|
||||
flash(message, "danger")
|
||||
return redirect("/csvtodatabaseview/form")
|
||||
|
@ -117,32 +117,58 @@ class CsvToDatabaseView(SimpleFormView):
|
|||
try:
|
||||
utils.ensure_path_exists(config["UPLOAD_FOLDER"])
|
||||
csv_file.save(path)
|
||||
table = SqlaTable(table_name=form.name.data)
|
||||
table.database = form.data.get("con")
|
||||
table.database_id = table.database.id
|
||||
table.database.db_engine_spec.create_table_from_csv(form, table)
|
||||
table_name = form.name.data
|
||||
database = form.data.get("con")
|
||||
database.db_engine_spec.create_table_from_csv(form)
|
||||
|
||||
table = (
|
||||
db.session.query(SqlaTable)
|
||||
.filter_by(
|
||||
table_name=table_name,
|
||||
schema=form.schema.data,
|
||||
database_id=database.id,
|
||||
)
|
||||
.one_or_none()
|
||||
)
|
||||
if table:
|
||||
table.fetch_metadata()
|
||||
if not table:
|
||||
table = SqlaTable(table_name=table_name)
|
||||
table.database = database
|
||||
table.database_id = database.id
|
||||
table.user_id = g.user.id
|
||||
table.schema = form.schema.data
|
||||
table.fetch_metadata()
|
||||
db.session.add(table)
|
||||
db.session.commit()
|
||||
except Exception as e:
|
||||
db.session.rollback()
|
||||
try:
|
||||
os.remove(path)
|
||||
except OSError:
|
||||
pass
|
||||
message = (
|
||||
"Table name {} already exists. Please pick another".format(
|
||||
form.name.data
|
||||
)
|
||||
if isinstance(e, IntegrityError)
|
||||
else str(e)
|
||||
message = _(
|
||||
'Unable to upload CSV file "%(filename)s" to table '
|
||||
'"%(table_name)s" in database "%(db_name)s". '
|
||||
"Error message: %(error_msg)s",
|
||||
filename=csv_filename,
|
||||
table_name=form.name.data,
|
||||
db_name=database.database_name,
|
||||
error_msg=str(e),
|
||||
)
|
||||
|
||||
flash(message, "danger")
|
||||
stats_logger.incr("failed_csv_upload")
|
||||
return redirect("/csvtodatabaseview/form")
|
||||
|
||||
os.remove(path)
|
||||
# Go back to welcome page / splash screen
|
||||
db_name = table.database.database_name
|
||||
message = _(
|
||||
'CSV file "{0}" uploaded to table "{1}" in '
|
||||
'database "{2}"'.format(csv_filename, form.name.data, db_name)
|
||||
'CSV file "%(csv_filename)s" uploaded to table "%(table_name)s" in '
|
||||
'database "%(db_name)s"',
|
||||
csv_filename=csv_filename,
|
||||
table_name=form.name.data,
|
||||
db_name=table.database.database_name,
|
||||
)
|
||||
flash(message, "info")
|
||||
stats_logger.incr("successful_csv_upload")
|
||||
|
|
|
@ -598,42 +598,100 @@ class CoreTests(SupersetTestCase):
|
|||
|
||||
def test_import_csv(self):
|
||||
self.login(username="admin")
|
||||
filename = "testCSV.csv"
|
||||
table_name = "".join(random.choice(string.ascii_uppercase) for _ in range(5))
|
||||
|
||||
test_file = open(filename, "w+")
|
||||
test_file.write("a,b\n")
|
||||
test_file.write("john,1\n")
|
||||
test_file.write("paul,2\n")
|
||||
test_file.close()
|
||||
filename_1 = "testCSV.csv"
|
||||
test_file_1 = open(filename_1, "w+")
|
||||
test_file_1.write("a,b\n")
|
||||
test_file_1.write("john,1\n")
|
||||
test_file_1.write("paul,2\n")
|
||||
test_file_1.close()
|
||||
|
||||
filename_2 = "testCSV2.csv"
|
||||
test_file_2 = open(filename_2, "w+")
|
||||
test_file_2.write("b,c,d\n")
|
||||
test_file_2.write("john,1,x\n")
|
||||
test_file_2.write("paul,2,y\n")
|
||||
test_file_2.close()
|
||||
|
||||
example_db = utils.get_example_database()
|
||||
example_db.allow_csv_upload = True
|
||||
db_id = example_db.id
|
||||
db.session.commit()
|
||||
test_file = open(filename, "rb")
|
||||
form_data = {
|
||||
"csv_file": test_file,
|
||||
"csv_file": open(filename_1, "rb"),
|
||||
"sep": ",",
|
||||
"name": table_name,
|
||||
"con": db_id,
|
||||
"if_exists": "append",
|
||||
"if_exists": "fail",
|
||||
"index_label": "test_label",
|
||||
"mangle_dupe_cols": False,
|
||||
}
|
||||
url = "/databaseview/list/"
|
||||
add_datasource_page = self.get_resp(url)
|
||||
assert "Upload a CSV" in add_datasource_page
|
||||
self.assertIn("Upload a CSV", add_datasource_page)
|
||||
|
||||
url = "/csvtodatabaseview/form"
|
||||
form_get = self.get_resp(url)
|
||||
assert "CSV to Database configuration" in form_get
|
||||
self.assertIn("CSV to Database configuration", form_get)
|
||||
|
||||
try:
|
||||
# ensure uploaded successfully
|
||||
# initial upload with fail mode
|
||||
resp = self.get_resp(url, data=form_data)
|
||||
assert 'CSV file "testCSV.csv" uploaded to table' in resp
|
||||
self.assertIn(
|
||||
f'CSV file "{filename_1}" uploaded to table "{table_name}"', resp
|
||||
)
|
||||
|
||||
# upload again with fail mode; should fail
|
||||
form_data["csv_file"] = open(filename_1, "rb")
|
||||
resp = self.get_resp(url, data=form_data)
|
||||
self.assertIn(
|
||||
f'Unable to upload CSV file "{filename_1}" to table "{table_name}"',
|
||||
resp,
|
||||
)
|
||||
|
||||
# upload again with append mode
|
||||
form_data["csv_file"] = open(filename_1, "rb")
|
||||
form_data["if_exists"] = "append"
|
||||
resp = self.get_resp(url, data=form_data)
|
||||
self.assertIn(
|
||||
f'CSV file "{filename_1}" uploaded to table "{table_name}"', resp
|
||||
)
|
||||
|
||||
# upload again with replace mode
|
||||
form_data["csv_file"] = open(filename_1, "rb")
|
||||
form_data["if_exists"] = "replace"
|
||||
resp = self.get_resp(url, data=form_data)
|
||||
self.assertIn(
|
||||
f'CSV file "{filename_1}" uploaded to table "{table_name}"', resp
|
||||
)
|
||||
|
||||
# try to append to table from file with different schema
|
||||
form_data["csv_file"] = open(filename_2, "rb")
|
||||
form_data["if_exists"] = "append"
|
||||
resp = self.get_resp(url, data=form_data)
|
||||
self.assertIn(
|
||||
f'Unable to upload CSV file "{filename_2}" to table "{table_name}"',
|
||||
resp,
|
||||
)
|
||||
|
||||
# replace table from file with different schema
|
||||
form_data["csv_file"] = open(filename_2, "rb")
|
||||
form_data["if_exists"] = "replace"
|
||||
resp = self.get_resp(url, data=form_data)
|
||||
self.assertIn(
|
||||
f'CSV file "{filename_2}" uploaded to table "{table_name}"', resp
|
||||
)
|
||||
table = (
|
||||
db.session.query(SqlaTable)
|
||||
.filter_by(table_name=table_name, database_id=db_id)
|
||||
.first()
|
||||
)
|
||||
# make sure the new column name is reflected in the table metadata
|
||||
self.assertIn("d", table.column_names)
|
||||
finally:
|
||||
os.remove(filename)
|
||||
os.remove(filename_1)
|
||||
os.remove(filename_2)
|
||||
|
||||
def test_dataframe_timezone(self):
|
||||
tz = psycopg2.tz.FixedOffsetTimezone(offset=60, name=None)
|
||||
|
|
Loading…
Reference in New Issue