DI-1113. ADDENDUM. Authentication: Enable user impersonation for Superset to HiveServer2 using hive.server2.proxy.user (a.fernandez) (#3697)

This commit is contained in:
Alejandro Fernandez 2017-11-06 10:20:38 -08:00 committed by Grace Guo
parent 13c17e1526
commit b059506afa
4 changed files with 54 additions and 48 deletions

View File

@ -198,15 +198,16 @@ class BaseEngineSpec(object):
url.username = username
@classmethod
def get_uri_for_impersonation(cls, uri, impersonate_user, username):
def get_configuration_for_impersonation(cls, uri, impersonate_user, username):
"""
Return a new URI string that allows for user impersonation.
Return a configuration dictionary that can be merged with other configs
that can set the correct properties for impersonating users
:param uri: URI string
:param impersonate_user: Bool indicating if impersonation is enabled
:param impersonate_user: Bool indicating if impersonation is enabled
:param username: Effective username
:return: New URI string
:return: Dictionary with configs required for impersonation
"""
return uri
return {}
class PostgresEngineSpec(BaseEngineSpec):
@ -701,7 +702,6 @@ class HiveEngineSpec(PrestoEngineSpec):
hive.constants = patched_constants
hive.ttypes = patched_ttypes
hive.Cursor.fetch_logs = patched_hive.fetch_logs
hive.Connection = patched_hive.ConnectionProxyUser
@classmethod
@cache_util.memoized_func(
@ -863,27 +863,29 @@ class HiveEngineSpec(PrestoEngineSpec):
:param impersonate_user: Bool indicating if impersonation is enabled
:param username: Effective username
"""
if impersonate_user is not None and "auth" in url.query.keys() and username is not None:
url.query["hive_server2_proxy_user"] = username
# Do nothing in the URL object since instead this should modify
# the configuraiton dictionary. See get_configuration_for_impersonation
pass
@classmethod
def get_uri_for_impersonation(cls, uri, impersonate_user, username):
def get_configuration_for_impersonation(cls, uri, impersonate_user, username):
"""
Return a new URI string that allows for user impersonation.
Return a configuration dictionary that can be merged with other configs
that can set the correct properties for impersonating users
:param uri: URI string
:param impersonate_user: Bool indicating if impersonation is enabled
:param impersonate_user: Bool indicating if impersonation is enabled
:param username: Effective username
:return: New URI string
:return: Dictionary with configs required for impersonation
"""
new_uri = uri
configuration = {}
url = make_url(uri)
backend_name = url.get_backend_name()
# Must be Hive connection, enable impersonation, and set param auth=LDAP|KERBEROS
if backend_name == "hive" and "auth" in url.query.keys() and\
if backend_name == "hive" and "auth" in url.query.keys() and \
impersonate_user is True and username is not None:
new_uri += "&hive_server2_proxy_user={0}".format(username)
return new_uri
configuration["hive.server2.proxy.user"] = username
return configuration
class MssqlEngineSpec(BaseEngineSpec):
engine = 'mssql'

View File

@ -3,27 +3,6 @@ from TCLIService import ttypes
from thrift import Thrift
old_Connection = hive.Connection
# TODO
# Monkey-patch of PyHive project's pyhive/hive.py which needed to change the constructor.
# Submitted a pull request on October 13, 2017 and waiting for it to be merged.
# https://github.com/dropbox/PyHive/pull/165
class ConnectionProxyUser(hive.Connection):
def __init__(self, host=None, port=None, username=None, database='default', auth=None,
configuration=None, kerberos_service_name=None, password=None,
thrift_transport=None, hive_server2_proxy_user=None):
configuration = configuration or {}
if auth is not None and auth in ('LDAP', 'KERBEROS'):
if hive_server2_proxy_user is not None:
configuration["hive.server2.proxy.user"] = hive_server2_proxy_user
# restore the old connection class, otherwise, will recurse on its own __init__ method
hive.Connection = old_Connection
hive.Connection.__init__(self, host=host, port=port, username=username, database=database, auth=auth,
configuration=configuration, kerberos_service_name=kerberos_service_name, password=password,
thrift_transport=thrift_transport)
# TODO: contribute back to pyhive.
def fetch_logs(self, max_rows=1024,

View File

@ -620,23 +620,35 @@ class Database(Model, AuditMixinNullable):
effective_username = url.username
if user_name:
effective_username = user_name
elif hasattr(g, 'user') and g.user.username:
elif hasattr(g, 'user') and hasattr(g.user, 'username') and g.user.username is not None:
effective_username = g.user.username
return effective_username
def get_sqla_engine(self, schema=None, nullpool=False, user_name=None):
extra = self.get_extra()
url = make_url(self.sqlalchemy_uri_decrypted)
params = extra.get('engine_params', {})
if nullpool:
params['poolclass'] = NullPool
url = self.db_engine_spec.adjust_database_uri(url, schema)
effective_username = self.get_effective_user(url, user_name)
# If using MySQL or Presto for example, will set url.username
# If using Hive, will not do anything yet since that relies on a configuration parameter instead.
self.db_engine_spec.modify_url_for_impersonation(url, self.impersonate_user, effective_username)
masked_url = self.get_password_masked_url(url)
logging.info("Database.get_sqla_engine(). Masked URL: {0}".format(masked_url))
params = extra.get('engine_params', {})
if nullpool:
params['poolclass'] = NullPool
# If using Hive, this will set hive.server2.proxy.user=$effective_username
configuration = {}
configuration.update(
self.db_engine_spec.get_configuration_for_impersonation(str(url),
self.impersonate_user,
effective_username))
if configuration:
params["connect_args"] = {"configuration": configuration}
return create_engine(url, **params)
def get_reserved_words(self):

View File

@ -1433,6 +1433,7 @@ class Superset(BaseSupersetView):
uri = request.json.get('uri')
db_name = request.json.get('name')
impersonate_user = request.json.get('impersonate_user')
database = None
if db_name:
database = (
db.session
@ -1444,20 +1445,32 @@ class Superset(BaseSupersetView):
# the password-masked uri was passed
# use the URI associated with this database
uri = database.sqlalchemy_uri_decrypted
url = make_url(uri)
db_engine = models.Database.get_db_engine_spec_for_backend(url.get_backend_name())
db_engine.patch()
uri = db_engine.get_uri_for_impersonation(uri, impersonate_user, username)
masked_url = database.get_password_masked_url_from_uri(uri)
logging.info("Superset.testconn(). Masked URL: {0}".format(masked_url))
configuration = {}
if database and uri:
url = make_url(uri)
db_engine = models.Database.get_db_engine_spec_for_backend(url.get_backend_name())
db_engine.patch()
masked_url = database.get_password_masked_url_from_uri(uri)
logging.info("Superset.testconn(). Masked URL: {0}".format(masked_url))
configuration.update(
db_engine.get_configuration_for_impersonation(uri,
impersonate_user,
username)
)
connect_args = (
request.json
.get('extras', {})
.get('engine_params', {})
.get('connect_args', {}))
if configuration:
connect_args["configuration"] = configuration
engine = create_engine(uri, connect_args=connect_args)
engine.connect()
return json_success(json.dumps(engine.table_names(), indent=4))