diff --git a/altair/vegalite/v2/api.py b/altair/vegalite/v2/api.py index f0bf2a678..dc8f10a7d 100644 --- a/altair/vegalite/v2/api.py +++ b/altair/vegalite/v2/api.py @@ -17,18 +17,50 @@ # ------------------------------------------------------------------------ # Data Utilities -def _dataset_name(data): - """Generate a unique hash of the data""" - def hash_(dct): - dct_str = json.dumps(dct, sort_keys=True) - return hashlib.md5(dct_str.encode()).hexdigest() +def _dataset_name(values): + """Generate a unique hash of the data + + Parameters + ---------- + values : list or dict + A list/dict representation of data values. + + Returns + ------- + name : string + A unique name generated from the hash of the values. + """ + if isinstance(values, core.InlineDataset): + values = values.to_dict() + values_json = json.dumps(values, sort_keys=True) + hsh = hashlib.md5(values_json.encode()).hexdigest() + return 'data-' + hsh + + +def _consolidate_data(data, context): + """If data is specified inline, then move it to context['datasets'] + + This function will modify context in-place, and return a new version of data + """ + values = Undefined + kwds = {} if isinstance(data, core.InlineData): - return 'data-' + hash_(data.values) - elif isinstance(data, dict) and 'values' in data: - return 'data-' + hash_(data['values']) - else: - raise ValueError("Cannot generate name for data {0}".format(data)) + if data.name is Undefined and data.values is not Undefined: + values = data.values + kwds = {'format': data.format} + + elif isinstance(data, dict): + if 'name' not in data and 'values' in data: + values = data['values'] + kwds = {k:v for k,v in data.items() if k != 'values'} + + if values is not Undefined: + name = _dataset_name(values) + data = core.NamedData(name=name, **kwds) + context.setdefault('datasets', {})[name] = values + + return data def _prepare_data(data, context): @@ -46,35 +78,25 @@ def _prepare_data(data, context): """ if data is Undefined: return data - if isinstance(data, core.InlineData): - if data_transformers.consolidate_datasets: - name = _dataset_name(data) - context.setdefault('datasets', {})[name] = data.values - return core.NamedData(name=name) - else: - return data - elif isinstance(data, dict) and 'values' in data: - if data_transformers.consolidate_datasets: - name = _dataset_name(data) - context.setdefault('datasets', {})[name] = data['values'] - return core.NamedData(name=name) - else: - return data - elif isinstance(data, pd.DataFrame): + + # convert dataframes to dict + if isinstance(data, pd.DataFrame): data = pipe(data, data_transformers.get()) - if data_transformers.consolidate_datasets and isinstance(data, dict) and 'values' in data: - name = _dataset_name(data) - context.setdefault('datasets', {})[name] = data['values'] - return core.NamedData(name=name) - else: - return data - elif isinstance(data, (dict, core.Data, core.UrlData, core.NamedData)): - return data - elif isinstance(data, six.string_types): - return core.UrlData(data) - else: + + # convert string input to a URLData + if isinstance(data, six.string_types): + data = core.UrlData(data) + + # consolidate inline data to top-level datasets + if data_transformers.consolidate_datasets: + data = _consolidate_data(data, context) + + # if data is still not a recognized type, then return + if not isinstance(data, (dict, core.Data, core.UrlData, + core.InlineData, core.NamedData)): warnings.warn("data of type {0} not recognized".format(type(data))) - return data + + return data # ------------------------------------------------------------------------ diff --git a/altair/vegalite/v2/tests/test_api.py b/altair/vegalite/v2/tests/test_api.py index aeba0701b..dbb101eb8 100644 --- a/altair/vegalite/v2/tests/test_api.py +++ b/altair/vegalite/v2/tests/test_api.py @@ -460,3 +460,35 @@ def test_consolidate_datasets(basic_chart): for spec in dct_consolidated['hconcat']: assert spec['data'] == {'name': name} + + +def test_consolidate_InlineData(): + data = alt.InlineData( + values=[{'a': 1, 'b': 1}, {'a': 2, 'b': 2}], + format={'type': 'csv'} + ) + chart = alt.Chart(data).mark_point() + + with alt.data_transformers.enable(consolidate_datasets=False): + dct = chart.to_dict() + assert dct['data']['format'] == data.format + assert dct['data']['values'] == data.values + + with alt.data_transformers.enable(consolidate_datasets=True): + dct = chart.to_dict() + assert dct['data']['format'] == data.format + assert list(dct['datasets'].values())[0] == data.values + + data = alt.InlineData( + values=[], + name='runtime_data' + ) + chart = alt.Chart(data).mark_point() + + with alt.data_transformers.enable(consolidate_datasets=False): + dct = chart.to_dict() + assert dct['data'] == data.to_dict() + + with alt.data_transformers.enable(consolidate_datasets=True): + dct = chart.to_dict() + assert dct['data'] == data.to_dict()