Skip to content

Commit

Permalink
Added support for reverse sort of tabular data (#1843)
Browse files Browse the repository at this point in the history
  • Loading branch information
philippjfr committed Sep 12, 2017
1 parent c85915a commit 45a25f4
Show file tree
Hide file tree
Showing 10 changed files with 59 additions and 15 deletions.
7 changes: 5 additions & 2 deletions examples/user_guide/07-Tabular_Datasets.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -463,7 +463,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Once data is in columnar form, it is simple to apply a variety of operations. For instance, Dataset can be sorted by their dimensions using the ``.sort()`` method. By default, this method will sort by the key dimensions, but any other dimension(s) can be sorted by providing them as an argument list to the sort method:"
"Once data is in columnar form, it is simple to apply a variety of operations. For instance, Dataset can be sorted by their dimensions using the ``.sort()`` method. By default, this method will sort by the key dimensions in an ascending order, but any other dimension(s) can be sorted by providing them as an argument list to the sort method. The ``reverse`` argument also allows sorting in descending order:"
]
},
{
Expand All @@ -473,7 +473,10 @@
"outputs": [],
"source": [
"bars = hv.Bars((['C', 'A', 'B', 'D'], [2, 7, 3, 4]))\n",
"bars + bars.sort() + bars.sort(['y'])"
"(bars +\n",
" bars.sort().relabel('sorted') +\n",
" bars.sort(['y']).relabel('y-sorted') +\n",
" bars.sort(reverse=True).relabel('reverse sorted')).cols(2)"
]
},
{
Expand Down
4 changes: 2 additions & 2 deletions holoviews/core/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,14 +218,14 @@ def closest(self, coords=[], **kwargs):
return [xs[idx] for idx in idxs]


def sort(self, by=[]):
def sort(self, by=[], reverse=False):
"""
Sorts the data by the values along the supplied dimensions.
"""
if not by: by = self.kdims
if not isinstance(by, list): by = [by]

sorted_columns = self.interface.sort(self, by)
sorted_columns = self.interface.sort(self, by, reverse)
return self.clone(sorted_columns)


Expand Down
5 changes: 3 additions & 2 deletions holoviews/core/data/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def concat(cls, dataset_objs):


@classmethod
def sort(cls, dataset, by=[]):
def sort(cls, dataset, by=[], reverse=False):
data = dataset.data
if len(by) == 1:
sorting = cls.values(dataset, by[0]).argsort()
Expand All @@ -107,7 +107,8 @@ def sort(cls, dataset, by=[]):
sort_fields = tuple(dataset.get_dimension(d).name for d in by)
sorting = dataset.data.view(dtypes, np.recarray).T
sorting = sorting.argsort(order=sort_fields)[0]
return data[sorting]
sorted_data = data[sorting]
return sorted_data[::-1] if reverse else sorted_data


@classmethod
Expand Down
2 changes: 1 addition & 1 deletion holoviews/core/data/dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def range(cls, columns, dimension):
return dd.compute(column.min(), column.max())

@classmethod
def sort(cls, columns, by=[]):
def sort(cls, columns, by=[], reverse=False):
columns.warning('Dask dataframes do not support sorting')
return columns.data

Expand Down
5 changes: 3 additions & 2 deletions holoviews/core/data/dictionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,14 +153,15 @@ def concat(cls, dataset_objs):


@classmethod
def sort(cls, dataset, by=[]):
def sort(cls, dataset, by=[], reverse=False):
by = [dataset.get_dimension(d).name for d in by]
if len(by) == 1:
sorting = cls.values(dataset, by[0]).argsort()
else:
arrays = [dataset.dimension_values(d) for d in by]
sorting = util.arglexsort(arrays)
return OrderedDict([(d, v[sorting]) for d, v in dataset.data.items()])
return OrderedDict([(d, v[sorting][::-1] if reverse else v[sorting])
for d, v in dataset.data.items()])


@classmethod
Expand Down
2 changes: 1 addition & 1 deletion holoviews/core/data/grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,7 +429,7 @@ def add_dimension(cls, dataset, dimension, dim_pos, values, vdim):


@classmethod
def sort(cls, dataset, by=[]):
def sort(cls, dataset, by=[], reverse=False):
if not by or by in [dataset.kdims, dataset.dimensions()]:
return dataset.data
else:
Expand Down
2 changes: 1 addition & 1 deletion holoviews/core/data/iris.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ def length(cls, dataset):


@classmethod
def sort(cls, columns, by=[]):
def sort(cls, columns, by=[], reverse=False):
"""
Cubes are assumed to be sorted by default.
"""
Expand Down
6 changes: 3 additions & 3 deletions holoviews/core/data/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,14 +177,14 @@ def redim(cls, dataset, dimensions):


@classmethod
def sort(cls, columns, by=[]):
def sort(cls, columns, by=[], reverse=False):
import pandas as pd
cols = [columns.get_dimension(d, strict=True).name for d in by]

if (not isinstance(columns.data, pd.DataFrame) or
LooseVersion(pd.__version__) < '0.17.0'):
return columns.data.sort(columns=cols)
return columns.data.sort_values(by=cols)
return columns.data.sort(columns=cols, ascending=not reverse)
return columns.data.sort_values(by=cols, ascending=not reverse)


@classmethod
Expand Down
2 changes: 1 addition & 1 deletion holoviews/core/data/xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ def reindex(cls, dataset, kdims=None, vdims=None):
return dataset.data

@classmethod
def sort(cls, dataset, by=[]):
def sort(cls, dataset, by=[], reverse=False):
return dataset

@classmethod
Expand Down
39 changes: 39 additions & 0 deletions tests/testdataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,13 @@ def test_dataset_sort_hm(self):
kdims=['x', 'y'], vdims=['z'])
self.assertEqual(ds.sort(), ds_sorted)

def test_dataset_sort_reverse_hm(self):
ds = Dataset(([2, 1, 2, 1], [2, 2, 1, 1], [0.1, 0.2, 0.3, 0.4]),
kdims=['x', 'y'], vdims=['z'])
ds_sorted = Dataset(([2, 2, 1, 1], [2, 1, 2, 1], [0.1, 0.3, 0.2, 0.4]),
kdims=['x', 'y'], vdims=['z'])
self.assertEqual(ds.sort(reverse=True), ds_sorted)

def test_dataset_sort_vdim_hm(self):
xs_2 = np.array(self.xs_2)
dataset = Dataset(np.column_stack([self.xs, -xs_2]),
Expand All @@ -130,6 +137,14 @@ def test_dataset_sort_vdim_hm(self):
kdims=['x'], vdims=['y'])
self.assertEqual(dataset.sort('y'), dataset_sorted)

def test_dataset_sort_reverse_vdim_hm(self):
xs_2 = np.array(self.xs_2)
dataset = Dataset(np.column_stack([self.xs, -xs_2]),
kdims=['x'], vdims=['y'])
dataset_sorted = Dataset(np.column_stack([self.xs, -xs_2]),
kdims=['x'], vdims=['y'])
self.assertEqual(dataset.sort('y', reverse=True), dataset_sorted)

def test_dataset_sort_vdim_hm_alias(self):
xs_2 = np.array(self.xs_2)
dataset = Dataset(np.column_stack([self.xs, -xs_2]),
Expand Down Expand Up @@ -782,6 +797,9 @@ def test_dataset_2D_aggregate_spread_fn_with_duplicates(self):
def test_dataset_sort_hm(self):
raise SkipTest("Not supported")

def test_dataset_sort_reverse_hm(self):
raise SkipTest("Not supported")

def test_dataset_sort_vdim_ht(self):
raise SkipTest("Not supported")

Expand Down Expand Up @@ -1030,12 +1048,21 @@ def test_dataset_dataframe_init_hm_alias(self):
def test_dataset_sort_hm(self):
raise SkipTest("Not supported")

def test_dataset_sort_reverse_hm(self):
raise SkipTest("Not supported")

def test_dataset_sort_vdim_hm(self):
exception = ('Compressed format cannot be sorted, either instantiate '
'in the desired order or use the expanded format.')
with self.assertRaisesRegexp(Exception, exception):
self.dataset_hm.sort('y')

def test_dataset_sort_reverse_vdim_hm(self):
exception = ('Compressed format cannot be sorted, either instantiate '
'in the desired order or use the expanded format.')
with self.assertRaisesRegexp(Exception, exception):
self.dataset_hm.sort('y', reverse=True)

def test_dataset_sort_vdim_hm_alias(self):
exception = ('Compressed format cannot be sorted, either instantiate '
'in the desired order or use the expanded format.')
Expand Down Expand Up @@ -1194,9 +1221,15 @@ def test_dataset_add_dimensions_values_hm_alias(self):
def test_dataset_sort_hm(self):
raise SkipTest("Not supported")

def test_dataset_sort_reverse_hm(self):
raise SkipTest("Not supported")

def test_dataset_sort_vdim_hm(self):
raise SkipTest("Not supported")

def test_dataset_sort_reverse_vdim_hm(self):
raise SkipTest("Not supported")

def test_dataset_sort_vdim_hm_alias(self):
raise SkipTest("Not supported")

Expand Down Expand Up @@ -1271,12 +1304,18 @@ def test_dataset_add_dimensions_values_hm(self):
def test_dataset_sort_hm(self):
raise SkipTest("Not supported")

def test_dataset_sort_reverse_hm(self):
raise SkipTest("Not supported")

def test_dataset_sort_vdim_hm_alias(self):
raise SkipTest("Not supported")

def test_dataset_sort_vdim_hm(self):
raise SkipTest("Not supported")

def test_dataset_sort_reverse_vdim_hm(self):
raise SkipTest("Not supported")

def test_dataset_sample_hm(self):
raise SkipTest("Not supported")

Expand Down

0 comments on commit 45a25f4

Please sign in to comment.