[fix] Improve csv upload functionality (#8457)

* [fix] csv upload when table metadata present

* Remove table from hive spec

* Move upload before table metadata creation

* Refine upload logic, dd unit tests and fix translations

* Use ALLOWED_EXTENSIONS from config

* Address review comments

* Fix error message grammar

* Add return type to hive csv upload and replace first with one_or_none
This commit is contained in:
Ville Brofeldt 2019-11-07 20:03:42 +02:00 committed by GitHub
parent 397e1e3d40
commit 49ea232c3a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 156 additions and 69 deletions

View File

@ -288,7 +288,7 @@ SUPERSET_WEBSERVER_DOMAINS = None
# Allowed format types for upload on Database view
# TODO: Add processing of other spreadsheet formats (xls, xlsx etc)
ALLOWED_EXTENSIONS = set(["csv"])
ALLOWED_EXTENSIONS = {"csv", "tsv"}
# CSV Options: key/value pairs that will be passed as argument to DataFrame.to_csv
# method.

View File

@ -37,7 +37,7 @@ from sqlalchemy.sql.expression import ColumnClause, ColumnElement, Select, TextA
from sqlalchemy.types import TypeEngine
from werkzeug.utils import secure_filename
from superset import app, db, sql_parse
from superset import app, sql_parse
from superset.utils import core as utils
if TYPE_CHECKING:
@ -388,15 +388,17 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods
df.to_sql(**kwargs)
@classmethod
def create_table_from_csv(cls, form, table):
""" Create table (including metadata in backend) from contents of a csv.
def create_table_from_csv(cls, form) -> None:
"""
Create table from contents of a csv. Note: this method does not create
metadata for the table.
:param form: Parameters defining how to process data
:param table: Metadata of new table to be created
"""
def _allowed_file(filename: str) -> bool:
# Only allow specific file extensions as specified in the config
extension = os.path.splitext(filename)[1]
extension = os.path.splitext(filename)[1].lower()
return (
extension is not None and extension[1:] in config["ALLOWED_EXTENSIONS"]
)
@ -432,12 +434,6 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods
}
cls.df_to_sql(**df_to_sql_kwargs)
table.user_id = g.user.id
table.schema = form.schema.data
table.fetch_metadata()
db.session.add(table)
db.session.commit()
@classmethod
def convert_dttm(cls, target_type: str, dttm: datetime) -> Optional[str]:
"""

View File

@ -98,7 +98,7 @@ class HiveEngineSpec(PrestoEngineSpec):
return []
@classmethod
def create_table_from_csv(cls, form, table): # pylint: disable=too-many-locals
def create_table_from_csv(cls, form) -> None: # pylint: disable=too-many-locals
"""Uploads a csv file and creates a superset datasource in Hive."""
def convert_to_hive_type(col_type):

View File

@ -4669,7 +4669,7 @@ msgid "CSV to Database configuration"
msgstr ""
#: superset/views/core.py:375
msgid "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\""
msgid "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\""
msgstr ""
#: superset/views/core.py:401 superset/views/core.py:667
@ -7765,4 +7765,3 @@ msgstr ""
#~ msgid "Saved Queries"
#~ msgstr ""

View File

@ -3124,7 +3124,7 @@
"CSV to Database configuration": [
""
],
"CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\"": [
"CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\"": [
""
],
"User": [

View File

@ -4880,8 +4880,8 @@ msgid "CSV to Database configuration"
msgstr "CSV vers la configuration de la base de données"
#: superset/views/core.py:375
msgid "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\""
msgstr "Fichier CSV \"{0}\" chargé dans la table \"{1}\" de la base de données \"{2}\""
msgid "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\""
msgstr "Fichier CSV \"%(csv_filename)s\" chargé dans la table \"%(table_name)s\" de la base de données \"%(db_name)s\""
#: superset/views/core.py:401 superset/views/core.py:667
#: superset/views/sql_lab.py:23 superset/views/sql_lab.py:60

View File

@ -2923,7 +2923,7 @@
"CSV to Database configuration": [
""
],
"CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\"": [
"CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\"": [
""
],
"User": [
@ -3132,4 +3132,4 @@
]
}
}
}
}

View File

@ -4432,7 +4432,7 @@ msgid "CSV to Database configuration"
msgstr ""
#: superset/views/core.py:363
msgid "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\""
msgid "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\""
msgstr ""
#: superset/views/core.py:389 superset/views/core.py:655
@ -6461,4 +6461,3 @@ msgstr "Query salvate"
#~ msgid "Slice"
#~ msgstr "Slice"

View File

@ -3788,7 +3788,7 @@ msgstr "대시보드 가져오기"
msgid "CSV to Database configuration"
msgstr ""
msgid "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\""
msgid "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\""
msgstr ""
msgid "User"

View File

@ -4669,7 +4669,7 @@ msgid "CSV to Database configuration"
msgstr ""
#: superset/views/core.py:375
msgid "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\""
msgid "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\""
msgstr ""
#: superset/views/core.py:401 superset/views/core.py:667
@ -4967,4 +4967,3 @@ msgstr ""
#: superset/views/sql_lab.py:86
msgid "Saved Queries"
msgstr ""

View File

@ -2923,8 +2923,8 @@
"CSV to Database configuration": [
"Настройка CSV для БД"
],
"CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\"": [
"CSV-файл \"{0}\" загружен в таблицу \"{1}\" базы данных \"{2}\""
"CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\"": [
"CSV-файл \"%(csv_filename)s\" загружен в таблицу \"%(table_name)s\" базы данных \"%(db_name)s\""
],
"User": [
"Пользователь"
@ -3132,4 +3132,4 @@
]
}
}
}
}

View File

@ -4544,8 +4544,8 @@ msgid "CSV to Database configuration"
msgstr "Настройка CSV для БД"
#: superset/views/core.py:363
msgid "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\""
msgstr "CSV-файл \"{0}\" загружен в таблицу \"{1}\" базы данных \"{2}\""
msgid "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\""
msgstr "CSV-файл \"%(csv_filename)s\" загружен в таблицу \"%(table_name)s\" базы данных \"%(db_name)s\""
#: superset/views/core.py:389 superset/views/core.py:655
#: superset/views/sql_lab.py:16 superset/views/sql_lab.py:53

View File

@ -3124,8 +3124,8 @@
"CSV to Database configuration": [
"csv 到数据库配置"
],
"CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\"": [
"csv 文件 \"{0}\" 上传到数据库 \"{2}\" 中的表 \"{1}\""
"CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\"": [
"csv 文件 \"%(csv_filename)s\" 上传到数据库 \"%(db_name)s\" 中的表 \"%(table_name)s\""
],
"User": [
"用户"

View File

@ -4746,8 +4746,8 @@ msgid "CSV to Database configuration"
msgstr "csv 到数据库配置"
#: superset/views/core.py:375
msgid "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\""
msgstr "csv 文件 \"{0}\" 上传到数据库 \"{2}\" 中的表 \"{1}\""
msgid "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\""
msgstr "csv 文件 \"%(csv_filename)s\" 上传到数据库 \"%(db_name)s\" 中的表 \"%(table_name)s\""
#: superset/views/core.py:401 superset/views/core.py:667
#: superset/views/sql_lab.py:23 superset/views/sql_lab.py:60

View File

@ -3049,7 +3049,7 @@ class Superset(BaseSupersetView):
except Exception:
return json_error_response(
"Failed to fetch schemas allowed for csv upload in this database! "
"Please contact Superset Admin!",
"Please contact your Superset Admin!",
stacktrace=utils.get_stacktrace(),
)

View File

@ -90,7 +90,17 @@ class CsvToDatabaseForm(DynamicForm):
csv_file = FileField(
_("CSV File"),
description=_("Select a CSV file to be uploaded to a database."),
validators=[FileRequired(), FileAllowed(["csv"], _("CSV Files Only!"))],
validators=[
FileRequired(),
FileAllowed(
config["ALLOWED_EXTENSIONS"],
_(
"Only the following file extensions are allowed: "
"%(allowed_extensions)s",
allowed_extensions=", ".join(config["ALLOWED_EXTENSIONS"]),
),
),
],
)
con = QuerySelectField(
_("Database"),

View File

@ -17,7 +17,7 @@
# pylint: disable=C,R,W
import os
from flask import flash, redirect
from flask import flash, g, redirect
from flask_appbuilder import SimpleFormView
from flask_appbuilder.forms import DynamicForm
from flask_appbuilder.models.sqla.interface import SQLAInterface
@ -28,7 +28,7 @@ from wtforms.fields import StringField
from wtforms.validators import ValidationError
import superset.models.core as models
from superset import app, appbuilder, security_manager
from superset import app, appbuilder, db, security_manager
from superset.connectors.sqla.models import SqlaTable
from superset.utils import core as utils
from superset.views.base import DeleteMixin, SupersetModelView, YamlExportMixin
@ -102,10 +102,10 @@ class CsvToDatabaseView(SimpleFormView):
if not self.is_schema_allowed(database, schema_name):
message = _(
'Database "{0}" Schema "{1}" is not allowed for csv uploads. '
"Please contact Superset Admin".format(
database.database_name, schema_name
)
'Database "%(database_name)s" schema "%(schema_name)s" '
"is not allowed for csv uploads. Please contact your Superset Admin.",
database_name=database.database_name,
schema_name=schema_name,
)
flash(message, "danger")
return redirect("/csvtodatabaseview/form")
@ -117,32 +117,58 @@ class CsvToDatabaseView(SimpleFormView):
try:
utils.ensure_path_exists(config["UPLOAD_FOLDER"])
csv_file.save(path)
table = SqlaTable(table_name=form.name.data)
table.database = form.data.get("con")
table.database_id = table.database.id
table.database.db_engine_spec.create_table_from_csv(form, table)
table_name = form.name.data
database = form.data.get("con")
database.db_engine_spec.create_table_from_csv(form)
table = (
db.session.query(SqlaTable)
.filter_by(
table_name=table_name,
schema=form.schema.data,
database_id=database.id,
)
.one_or_none()
)
if table:
table.fetch_metadata()
if not table:
table = SqlaTable(table_name=table_name)
table.database = database
table.database_id = database.id
table.user_id = g.user.id
table.schema = form.schema.data
table.fetch_metadata()
db.session.add(table)
db.session.commit()
except Exception as e:
db.session.rollback()
try:
os.remove(path)
except OSError:
pass
message = (
"Table name {} already exists. Please pick another".format(
form.name.data
)
if isinstance(e, IntegrityError)
else str(e)
message = _(
'Unable to upload CSV file "%(filename)s" to table '
'"%(table_name)s" in database "%(db_name)s". '
"Error message: %(error_msg)s",
filename=csv_filename,
table_name=form.name.data,
db_name=database.database_name,
error_msg=str(e),
)
flash(message, "danger")
stats_logger.incr("failed_csv_upload")
return redirect("/csvtodatabaseview/form")
os.remove(path)
# Go back to welcome page / splash screen
db_name = table.database.database_name
message = _(
'CSV file "{0}" uploaded to table "{1}" in '
'database "{2}"'.format(csv_filename, form.name.data, db_name)
'CSV file "%(csv_filename)s" uploaded to table "%(table_name)s" in '
'database "%(db_name)s"',
csv_filename=csv_filename,
table_name=form.name.data,
db_name=table.database.database_name,
)
flash(message, "info")
stats_logger.incr("successful_csv_upload")

View File

@ -598,42 +598,100 @@ class CoreTests(SupersetTestCase):
def test_import_csv(self):
self.login(username="admin")
filename = "testCSV.csv"
table_name = "".join(random.choice(string.ascii_uppercase) for _ in range(5))
test_file = open(filename, "w+")
test_file.write("a,b\n")
test_file.write("john,1\n")
test_file.write("paul,2\n")
test_file.close()
filename_1 = "testCSV.csv"
test_file_1 = open(filename_1, "w+")
test_file_1.write("a,b\n")
test_file_1.write("john,1\n")
test_file_1.write("paul,2\n")
test_file_1.close()
filename_2 = "testCSV2.csv"
test_file_2 = open(filename_2, "w+")
test_file_2.write("b,c,d\n")
test_file_2.write("john,1,x\n")
test_file_2.write("paul,2,y\n")
test_file_2.close()
example_db = utils.get_example_database()
example_db.allow_csv_upload = True
db_id = example_db.id
db.session.commit()
test_file = open(filename, "rb")
form_data = {
"csv_file": test_file,
"csv_file": open(filename_1, "rb"),
"sep": ",",
"name": table_name,
"con": db_id,
"if_exists": "append",
"if_exists": "fail",
"index_label": "test_label",
"mangle_dupe_cols": False,
}
url = "/databaseview/list/"
add_datasource_page = self.get_resp(url)
assert "Upload a CSV" in add_datasource_page
self.assertIn("Upload a CSV", add_datasource_page)
url = "/csvtodatabaseview/form"
form_get = self.get_resp(url)
assert "CSV to Database configuration" in form_get
self.assertIn("CSV to Database configuration", form_get)
try:
# ensure uploaded successfully
# initial upload with fail mode
resp = self.get_resp(url, data=form_data)
assert 'CSV file "testCSV.csv" uploaded to table' in resp
self.assertIn(
f'CSV file "{filename_1}" uploaded to table "{table_name}"', resp
)
# upload again with fail mode; should fail
form_data["csv_file"] = open(filename_1, "rb")
resp = self.get_resp(url, data=form_data)
self.assertIn(
f'Unable to upload CSV file "{filename_1}" to table "{table_name}"',
resp,
)
# upload again with append mode
form_data["csv_file"] = open(filename_1, "rb")
form_data["if_exists"] = "append"
resp = self.get_resp(url, data=form_data)
self.assertIn(
f'CSV file "{filename_1}" uploaded to table "{table_name}"', resp
)
# upload again with replace mode
form_data["csv_file"] = open(filename_1, "rb")
form_data["if_exists"] = "replace"
resp = self.get_resp(url, data=form_data)
self.assertIn(
f'CSV file "{filename_1}" uploaded to table "{table_name}"', resp
)
# try to append to table from file with different schema
form_data["csv_file"] = open(filename_2, "rb")
form_data["if_exists"] = "append"
resp = self.get_resp(url, data=form_data)
self.assertIn(
f'Unable to upload CSV file "{filename_2}" to table "{table_name}"',
resp,
)
# replace table from file with different schema
form_data["csv_file"] = open(filename_2, "rb")
form_data["if_exists"] = "replace"
resp = self.get_resp(url, data=form_data)
self.assertIn(
f'CSV file "{filename_2}" uploaded to table "{table_name}"', resp
)
table = (
db.session.query(SqlaTable)
.filter_by(table_name=table_name, database_id=db_id)
.first()
)
# make sure the new column name is reflected in the table metadata
self.assertIn("d", table.column_names)
finally:
os.remove(filename)
os.remove(filename_1)
os.remove(filename_2)
def test_dataframe_timezone(self):
tz = psycopg2.tz.FixedOffsetTimezone(offset=60, name=None)