Skip to content

Commit

Permalink
FIX-#2566: Ensure Series.unique does not return a scalar when there…
Browse files Browse the repository at this point in the history
… is only one unique value (#2567)

* FIX-#2566: Ensure unique doesn't return a scalar using np.atleast_1d

Signed-off-by: Richard Lin <[email protected]>

* FIX-#2566: Check array shapes match for test_unique

Signed-off-by: Richard Lin <[email protected]>

* FIX-#2566: Reduce unique dimensions using constructor instead

Signed-off-by: Richard Lin <[email protected]>
  • Loading branch information
richardlin047 authored Dec 23, 2020
1 parent c5aac3e commit db0f18c
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 1 deletion.
4 changes: 3 additions & 1 deletion modin/pandas/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1345,7 +1345,9 @@ def truncate(self, before=None, after=None, axis=None, copy=True):
)

def unique(self):
return self._query_compiler.unique().to_numpy().squeeze()
return self.__constructor__(
query_compiler=self._query_compiler.unique()
).to_numpy()

def update(self, other):
if not isinstance(other, Series):
Expand Down
6 changes: 6 additions & 0 deletions modin/pandas/test/test_general.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,10 +528,12 @@ def test_unique():
modin_result = pd.unique([2, 1, 3, 3])
pandas_result = pandas.unique([2, 1, 3, 3])
assert_array_equal(modin_result, pandas_result)
assert modin_result.shape == pandas_result.shape

modin_result = pd.unique(pd.Series([2] + [1] * 5))
pandas_result = pandas.unique(pandas.Series([2] + [1] * 5))
assert_array_equal(modin_result, pandas_result)
assert modin_result.shape == pandas_result.shape

modin_result = pd.unique(
pd.Series([pd.Timestamp("20160101"), pd.Timestamp("20160101")])
Expand All @@ -540,6 +542,7 @@ def test_unique():
pandas.Series([pandas.Timestamp("20160101"), pandas.Timestamp("20160101")])
)
assert_array_equal(modin_result, pandas_result)
assert modin_result.shape == pandas_result.shape

modin_result = pd.unique(
pd.Series(
Expand All @@ -558,6 +561,7 @@ def test_unique():
)
)
assert_array_equal(modin_result, pandas_result)
assert modin_result.shape == pandas_result.shape

modin_result = pd.unique(
pd.Index(
Expand All @@ -576,10 +580,12 @@ def test_unique():
)
)
assert_array_equal(modin_result, pandas_result)
assert modin_result.shape == pandas_result.shape

modin_result = pd.unique(pd.Series(pd.Categorical(list("baabc"))))
pandas_result = pandas.unique(pandas.Series(pandas.Categorical(list("baabc"))))
assert_array_equal(modin_result, pandas_result)
assert modin_result.shape == pandas_result.shape


@pytest.mark.parametrize("normalize, bins, dropna", [(True, 3, False)])
Expand Down
6 changes: 6 additions & 0 deletions modin/pandas/test/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3287,16 +3287,19 @@ def test_unique(data):
modin_result = modin_series.unique()
pandas_result = pandas_series.unique()
assert_array_equal(modin_result, pandas_result)
assert modin_result.shape == pandas_result.shape

modin_result = pd.Series([2, 1, 3, 3], name="A").unique()
pandas_result = pandas.Series([2, 1, 3, 3], name="A").unique()
assert_array_equal(modin_result, pandas_result)
assert modin_result.shape == pandas_result.shape

modin_result = pd.Series([pd.Timestamp("2016-01-01") for _ in range(3)]).unique()
pandas_result = pandas.Series(
[pd.Timestamp("2016-01-01") for _ in range(3)]
).unique()
assert_array_equal(modin_result, pandas_result)
assert modin_result.shape == pandas_result.shape

modin_result = pd.Series(
[pd.Timestamp("2016-01-01", tz="US/Eastern") for _ in range(3)]
Expand All @@ -3305,10 +3308,12 @@ def test_unique(data):
[pd.Timestamp("2016-01-01", tz="US/Eastern") for _ in range(3)]
).unique()
assert_array_equal(modin_result, pandas_result)
assert modin_result.shape == pandas_result.shape

modin_result = pandas.Series(pd.Categorical(list("baabc"))).unique()
pandas_result = pd.Series(pd.Categorical(list("baabc"))).unique()
assert_array_equal(modin_result, pandas_result)
assert modin_result.shape == pandas_result.shape

modin_result = pd.Series(
pd.Categorical(list("baabc"), categories=list("abc"), ordered=True)
Expand All @@ -3317,6 +3322,7 @@ def test_unique(data):
pd.Categorical(list("baabc"), categories=list("abc"), ordered=True)
).unique()
assert_array_equal(modin_result, pandas_result)
assert modin_result.shape == pandas_result.shape


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
Expand Down

0 comments on commit db0f18c

Please sign in to comment.