Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added support for reverse sort of tabular data #1843

Merged
merged 2 commits into from
Sep 4, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions examples/user_guide/07-Tabular_Datasets.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -463,7 +463,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Once data is in columnar form, it is simple to apply a variety of operations. For instance, Dataset can be sorted by their dimensions using the ``.sort()`` method. By default, this method will sort by the key dimensions, but any other dimension(s) can be sorted by providing them as an argument list to the sort method:"
"Once data is in columnar form, it is simple to apply a variety of operations. For instance, Dataset can be sorted by their dimensions using the ``.sort()`` method. By default, this method will sort by the key dimensions in an ascending order, but any other dimension(s) can be sorted by providing them as an argument list to the sort method. The ``reverse`` argument also allows sorting in descending order:"
]
},
{
Expand All @@ -473,7 +473,10 @@
"outputs": [],
"source": [
"bars = hv.Bars((['C', 'A', 'B', 'D'], [2, 7, 3, 4]))\n",
"bars + bars.sort() + bars.sort(['y'])"
"(bars +\n",
" bars.sort().relabel('sorted') +\n",
" bars.sort(['y']).relabel('y-sorted') +\n",
" bars.sort(reverse=True).relabel('reverse sorted')).cols(2)"
]
},
{
Expand Down
4 changes: 2 additions & 2 deletions holoviews/core/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,14 +218,14 @@ def closest(self, coords=[], **kwargs):
return [xs[idx] for idx in idxs]


def sort(self, by=[]):
def sort(self, by=[], reverse=False):
"""
Sorts the data by the values along the supplied dimensions.
"""
if not by: by = self.kdims
if not isinstance(by, list): by = [by]

sorted_columns = self.interface.sort(self, by)
sorted_columns = self.interface.sort(self, by, reverse)
return self.clone(sorted_columns)


Expand Down
5 changes: 3 additions & 2 deletions holoviews/core/data/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def concat(cls, dataset_objs):


@classmethod
def sort(cls, dataset, by=[]):
def sort(cls, dataset, by=[], reverse=False):
data = dataset.data
if len(by) == 1:
sorting = cls.values(dataset, by[0]).argsort()
Expand All @@ -107,7 +107,8 @@ def sort(cls, dataset, by=[]):
sort_fields = tuple(dataset.get_dimension(d).name for d in by)
sorting = dataset.data.view(dtypes, np.recarray).T
sorting = sorting.argsort(order=sort_fields)[0]
return data[sorting]
sorted_data = data[sorting]
return sorted_data[::-1] if reverse else sorted_data


@classmethod
Expand Down
2 changes: 1 addition & 1 deletion holoviews/core/data/dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def range(cls, columns, dimension):
return dd.compute(column.min(), column.max())

@classmethod
def sort(cls, columns, by=[]):
def sort(cls, columns, by=[], reverse=False):
columns.warning('Dask dataframes do not support sorting')
return columns.data

Expand Down
5 changes: 3 additions & 2 deletions holoviews/core/data/dictionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,14 +153,15 @@ def concat(cls, dataset_objs):


@classmethod
def sort(cls, dataset, by=[]):
def sort(cls, dataset, by=[], reverse=False):
by = [dataset.get_dimension(d).name for d in by]
if len(by) == 1:
sorting = cls.values(dataset, by[0]).argsort()
else:
arrays = [dataset.dimension_values(d) for d in by]
sorting = util.arglexsort(arrays)
return OrderedDict([(d, v[sorting]) for d, v in dataset.data.items()])
return OrderedDict([(d, v[sorting][::-1] if reverse else v[sorting])
for d, v in dataset.data.items()])


@classmethod
Expand Down
2 changes: 1 addition & 1 deletion holoviews/core/data/grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,7 +429,7 @@ def add_dimension(cls, dataset, dimension, dim_pos, values, vdim):


@classmethod
def sort(cls, dataset, by=[]):
def sort(cls, dataset, by=[], reverse=False):
if not by or by in [dataset.kdims, dataset.dimensions()]:
return dataset.data
else:
Expand Down
2 changes: 1 addition & 1 deletion holoviews/core/data/iris.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ def length(cls, dataset):


@classmethod
def sort(cls, columns, by=[]):
def sort(cls, columns, by=[], reverse=False):
"""
Cubes are assumed to be sorted by default.
"""
Expand Down
6 changes: 3 additions & 3 deletions holoviews/core/data/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,14 +177,14 @@ def redim(cls, dataset, dimensions):


@classmethod
def sort(cls, columns, by=[]):
def sort(cls, columns, by=[], reverse=False):
import pandas as pd
cols = [columns.get_dimension(d, strict=True).name for d in by]

if (not isinstance(columns.data, pd.DataFrame) or
LooseVersion(pd.__version__) < '0.17.0'):
return columns.data.sort(columns=cols)
return columns.data.sort_values(by=cols)
return columns.data.sort(columns=cols, ascending=not reverse)
return columns.data.sort_values(by=cols, ascending=not reverse)


@classmethod
Expand Down
2 changes: 1 addition & 1 deletion holoviews/core/data/xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ def reindex(cls, dataset, kdims=None, vdims=None):
return dataset.data

@classmethod
def sort(cls, dataset, by=[]):
def sort(cls, dataset, by=[], reverse=False):
return dataset

@classmethod
Expand Down
39 changes: 39 additions & 0 deletions tests/testdataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,13 @@ def test_dataset_sort_hm(self):
kdims=['x', 'y'], vdims=['z'])
self.assertEqual(ds.sort(), ds_sorted)

def test_dataset_sort_reverse_hm(self):
ds = Dataset(([2, 1, 2, 1], [2, 2, 1, 1], [0.1, 0.2, 0.3, 0.4]),
kdims=['x', 'y'], vdims=['z'])
ds_sorted = Dataset(([2, 2, 1, 1], [2, 1, 2, 1], [0.1, 0.3, 0.2, 0.4]),
kdims=['x', 'y'], vdims=['z'])
self.assertEqual(ds.sort(reverse=True), ds_sorted)

def test_dataset_sort_vdim_hm(self):
xs_2 = np.array(self.xs_2)
dataset = Dataset(np.column_stack([self.xs, -xs_2]),
Expand All @@ -130,6 +137,14 @@ def test_dataset_sort_vdim_hm(self):
kdims=['x'], vdims=['y'])
self.assertEqual(dataset.sort('y'), dataset_sorted)

def test_dataset_sort_reverse_vdim_hm(self):
xs_2 = np.array(self.xs_2)
dataset = Dataset(np.column_stack([self.xs, -xs_2]),
kdims=['x'], vdims=['y'])
dataset_sorted = Dataset(np.column_stack([self.xs, -xs_2]),
kdims=['x'], vdims=['y'])
self.assertEqual(dataset.sort('y', reverse=True), dataset_sorted)

def test_dataset_sort_vdim_hm_alias(self):
xs_2 = np.array(self.xs_2)
dataset = Dataset(np.column_stack([self.xs, -xs_2]),
Expand Down Expand Up @@ -782,6 +797,9 @@ def test_dataset_2D_aggregate_spread_fn_with_duplicates(self):
def test_dataset_sort_hm(self):
raise SkipTest("Not supported")

def test_dataset_sort_reverse_hm(self):
raise SkipTest("Not supported")

def test_dataset_sort_vdim_ht(self):
raise SkipTest("Not supported")

Expand Down Expand Up @@ -1030,12 +1048,21 @@ def test_dataset_dataframe_init_hm_alias(self):
def test_dataset_sort_hm(self):
raise SkipTest("Not supported")

def test_dataset_sort_reverse_hm(self):
raise SkipTest("Not supported")

def test_dataset_sort_vdim_hm(self):
exception = ('Compressed format cannot be sorted, either instantiate '
'in the desired order or use the expanded format.')
with self.assertRaisesRegexp(Exception, exception):
self.dataset_hm.sort('y')

def test_dataset_sort_reverse_vdim_hm(self):
exception = ('Compressed format cannot be sorted, either instantiate '
'in the desired order or use the expanded format.')
with self.assertRaisesRegexp(Exception, exception):
self.dataset_hm.sort('y', reverse=True)

def test_dataset_sort_vdim_hm_alias(self):
exception = ('Compressed format cannot be sorted, either instantiate '
'in the desired order or use the expanded format.')
Expand Down Expand Up @@ -1194,9 +1221,15 @@ def test_dataset_add_dimensions_values_hm_alias(self):
def test_dataset_sort_hm(self):
raise SkipTest("Not supported")

def test_dataset_sort_reverse_hm(self):
raise SkipTest("Not supported")

def test_dataset_sort_vdim_hm(self):
raise SkipTest("Not supported")

def test_dataset_sort_reverse_vdim_hm(self):
raise SkipTest("Not supported")

def test_dataset_sort_vdim_hm_alias(self):
raise SkipTest("Not supported")

Expand Down Expand Up @@ -1271,12 +1304,18 @@ def test_dataset_add_dimensions_values_hm(self):
def test_dataset_sort_hm(self):
raise SkipTest("Not supported")

def test_dataset_sort_reverse_hm(self):
raise SkipTest("Not supported")

def test_dataset_sort_vdim_hm_alias(self):
raise SkipTest("Not supported")

def test_dataset_sort_vdim_hm(self):
raise SkipTest("Not supported")

def test_dataset_sort_reverse_vdim_hm(self):
raise SkipTest("Not supported")

def test_dataset_sample_hm(self):
raise SkipTest("Not supported")

Expand Down