Skip to content

Commit

Permalink
Fix DataArray.stack() with non-unique coordinates on pandas 0.23 (#2168)
Browse files Browse the repository at this point in the history
  • Loading branch information
shoyer authored May 26, 2018
1 parent 04df50e commit a28aab0
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 7 deletions.
4 changes: 4 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ Bug fixes
dimension were improperly skipped.
By `Stephan Hoyer <https://github.com/shoyer>`_

- Fix :meth:`~DataArray.stack` with non-unique coordinates on pandas 0.23
(:issue:`2160`).
By `Stephan Hoyer <https://github.com/shoyer>`_

- Selecting data indexed by a length-1 ``CFTimeIndex`` with a slice of strings
now behaves as it does when using a length-1 ``DatetimeIndex`` (i.e. it no
longer falsely returns an empty array when the slice includes the value in
Expand Down
14 changes: 8 additions & 6 deletions xarray/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,22 +76,24 @@ def safe_cast_to_index(array):
def multiindex_from_product_levels(levels, names=None):
"""Creating a MultiIndex from a product without refactorizing levels.
Keeping levels the same is faster, and also gives back the original labels
when we unstack.
Keeping levels the same gives back the original labels when we unstack.
Parameters
----------
levels : sequence of arrays
Unique labels for each level.
levels : sequence of pd.Index
Values for each MultiIndex level.
names : optional sequence of objects
Names for each level.
Returns
-------
pandas.MultiIndex
"""
labels_mesh = np.meshgrid(*[np.arange(len(lev)) for lev in levels],
indexing='ij')
if any(not isinstance(lev, pd.Index) for lev in levels):
raise TypeError('levels must be a list of pd.Index objects')

split_labels, levels = zip(*[lev.factorize() for lev in levels])
labels_mesh = np.meshgrid(*split_labels, indexing='ij')
labels = [x.ravel() for x in labels_mesh]
return pd.MultiIndex(levels, labels, sortorder=0, names=names)

Expand Down
7 changes: 7 additions & 0 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1673,6 +1673,13 @@ def test_unstack_pandas_consistency(self):
actual = DataArray(s, dims='z').unstack('z')
assert_identical(expected, actual)

def test_stack_nonunique_consistency(self):
orig = DataArray([[0, 1], [2, 3]], dims=['x', 'y'],
coords={'x': [0, 1], 'y': [0, 0]})
actual = orig.stack(z=['x', 'y'])
expected = DataArray(orig.to_pandas().stack(), dims='z')
assert_identical(expected, actual)

def test_transpose(self):
assert_equal(self.dv.variable.transpose(),
self.dv.transpose().variable)
Expand Down
12 changes: 11 additions & 1 deletion xarray/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@ def test_safe_cast_to_index_datetime_datetime(enable_cftimeindex):


def test_multiindex_from_product_levels():
result = utils.multiindex_from_product_levels([['b', 'a'], [1, 3, 2]])
result = utils.multiindex_from_product_levels(
[pd.Index(['b', 'a']), pd.Index([1, 3, 2])])
np.testing.assert_array_equal(
result.labels, [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]])
np.testing.assert_array_equal(result.levels[0], ['b', 'a'])
Expand All @@ -82,6 +83,15 @@ def test_multiindex_from_product_levels():
np.testing.assert_array_equal(result.values, other.values)


def test_multiindex_from_product_levels_non_unique():
result = utils.multiindex_from_product_levels(
[pd.Index(['b', 'a']), pd.Index([1, 1, 2])])
np.testing.assert_array_equal(
result.labels, [[0, 0, 0, 1, 1, 1], [0, 0, 1, 0, 0, 1]])
np.testing.assert_array_equal(result.levels[0], ['b', 'a'])
np.testing.assert_array_equal(result.levels[1], [1, 2])


class TestArrayEquiv(TestCase):
def test_0d(self):
# verify our work around for pd.isnull not working for 0-dimensional
Expand Down

0 comments on commit a28aab0

Please sign in to comment.