[cache] Using the query as the basis of the cache key (#4016)

This commit is contained in:
John Bodley 2018-01-12 12:05:12 -08:00 committed by Grace Guo
parent 8069d6221d
commit a7a6678d5c
4 changed files with 62 additions and 59 deletions

View File

View File

@ -214,8 +214,6 @@ class BaseViz(object):
@property @property
def cache_timeout(self): def cache_timeout(self):
if self.form_data.get('cache_timeout'):
return int(self.form_data.get('cache_timeout'))
if self.datasource.cache_timeout: if self.datasource.cache_timeout:
return self.datasource.cache_timeout return self.datasource.cache_timeout
if ( if (
@ -229,44 +227,50 @@ class BaseViz(object):
self.get_payload(force), self.get_payload(force),
default=utils.json_int_dttm_ser, ignore_nan=True) default=utils.json_int_dttm_ser, ignore_nan=True)
@property def cache_key(self, query_obj):
def cache_key(self): """
form_data = self.form_data.copy() The cache key is the datasource/query string tuple associated with the
merge_extra_filters(form_data) object which needs to be fully deterministic.
s = str([(k, form_data[k]) for k in sorted(form_data.keys())]) """
return hashlib.md5(s.encode('utf-8')).hexdigest()
return hashlib.md5(
json.dumps((
self.datasource.id,
self.datasource.get_query_str(query_obj),
)).encode('utf-8'),
).hexdigest()
def get_payload(self, force=False): def get_payload(self, force=False):
"""Handles caching around the json payload retrieval""" """Handles caching around the json payload retrieval"""
cache_key = self.cache_key query_obj = self.query_obj()
payload = None cache_key = self.cache_key(query_obj)
cached_dttm = None
data = None
stacktrace = None
rowcount = None
if not force and cache: if not force and cache:
payload = cache.get(cache_key) cache_value = cache.get(cache_key)
if cache_value:
if payload:
stats_logger.incr('loaded_from_cache') stats_logger.incr('loaded_from_cache')
is_cached = True is_cached = True
try: try:
cached_data = zlib.decompress(payload) cache_value = zlib.decompress(cache_value)
if PY3: if PY3:
cached_data = cached_data.decode('utf-8') cache_value = cache_value.decode('utf-8')
payload = json.loads(cached_data) cache_value = json.loads(cache_value)
data = cache_value['data']
cached_dttm = cache_value['dttm']
except Exception as e: except Exception as e:
logging.error('Error reading cache: ' + logging.error('Error reading cache: ' +
utils.error_msg_from_exception(e)) utils.error_msg_from_exception(e))
payload = None data = None
return []
logging.info('Serving from cache') logging.info('Serving from cache')
if not payload: if not data:
stats_logger.incr('loaded_from_source') stats_logger.incr('loaded_from_source')
data = None
is_cached = False is_cached = False
cache_timeout = self.cache_timeout
stacktrace = None
rowcount = None
try: try:
df = self.get_df() df = self.get_df(query_obj)
if not self.error_message: if not self.error_message:
data = self.get_data(df) data = self.get_data(df)
rowcount = len(df.index) if df is not None else 0 rowcount = len(df.index) if df is not None else 0
@ -277,37 +281,40 @@ class BaseViz(object):
self.status = utils.QueryStatus.FAILED self.status = utils.QueryStatus.FAILED
data = None data = None
stacktrace = traceback.format_exc() stacktrace = traceback.format_exc()
payload = {
'cache_key': cache_key, if data and cache and self.status != utils.QueryStatus.FAILED:
'cache_timeout': cache_timeout, cached_dttm = datetime.utcnow().isoformat().split('.')[0]
'data': data,
'error': self.error_message,
'form_data': self.form_data,
'query': self.query,
'status': self.status,
'stacktrace': stacktrace,
'rowcount': rowcount,
}
payload['cached_dttm'] = datetime.utcnow().isoformat().split('.')[0]
logging.info('Caching for the next {} seconds'.format(
cache_timeout))
data = self.json_dumps(payload)
if PY3:
data = bytes(data, 'utf-8')
if cache and self.status != utils.QueryStatus.FAILED:
try: try:
cache_value = json.dumps({
'data': data,
'dttm': cached_dttm,
})
if PY3:
cache_value = bytes(cache_value, 'utf-8')
cache.set( cache.set(
cache_key, cache_key,
zlib.compress(data), zlib.compress(cache_value),
timeout=cache_timeout) timeout=self.cache_timeout)
except Exception as e: except Exception as e:
# cache.set call can fail if the backend is down or if # cache.set call can fail if the backend is down or if
# the key is too large or whatever other reasons # the key is too large or whatever other reasons
logging.warning('Could not cache key {}'.format(cache_key)) logging.warning('Could not cache key {}'.format(cache_key))
logging.exception(e) logging.exception(e)
cache.delete(cache_key) cache.delete(cache_key)
payload['is_cached'] = is_cached
return payload return {
'cache_key': cache_key,
'cached_dttm': cached_dttm,
'cache_timeout': self.cache_timeout,
'data': data,
'error': self.error_message,
'form_data': self.form_data,
'is_cached': is_cached,
'query': self.query,
'status': self.status,
'stacktrace': stacktrace,
'rowcount': rowcount,
}
def json_dumps(self, obj): def json_dumps(self, obj):
return json.dumps(obj, default=utils.json_int_dttm_ser, ignore_nan=True) return json.dumps(obj, default=utils.json_int_dttm_ser, ignore_nan=True)

View File

@ -340,7 +340,6 @@ class CoreTests(SupersetTestCase):
slc = self.get_slice('Girls', db.session) slc = self.get_slice('Girls', db.session)
data = self.get_json_resp( data = self.get_json_resp(
'/superset/warm_up_cache?slice_id={}'.format(slc.id)) '/superset/warm_up_cache?slice_id={}'.format(slc.id))
assert data == [{'slice_id': slc.id, 'slice_name': slc.slice_name}] assert data == [{'slice_id': slc.id, 'slice_name': slc.slice_name}]
data = self.get_json_resp( data = self.get_json_resp(

View File

@ -101,11 +101,8 @@ class BaseVizTestCase(unittest.TestCase):
def test_cache_timeout(self): def test_cache_timeout(self):
datasource = Mock() datasource = Mock()
form_data = {'cache_timeout': '10'}
test_viz = viz.BaseViz(datasource, form_data)
self.assertEqual(10, test_viz.cache_timeout)
del form_data['cache_timeout']
datasource.cache_timeout = 156 datasource.cache_timeout = 156
test_viz = viz.BaseViz(datasource, form_data={})
self.assertEqual(156, test_viz.cache_timeout) self.assertEqual(156, test_viz.cache_timeout)
datasource.cache_timeout = None datasource.cache_timeout = None
datasource.database = Mock() datasource.database = Mock()