Skip to content

Commit

Permalink
ENH union_categoricals supports ignore_order GH13410
Browse files Browse the repository at this point in the history
  • Loading branch information
Justin Solinsky authored and Justin Solinsky committed Feb 20, 2017
1 parent 9b827ef commit d278d62
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 10 deletions.
11 changes: 11 additions & 0 deletions doc/source/categorical.rst
Original file line number Diff line number Diff line change
Expand Up @@ -693,6 +693,17 @@ The below raises ``TypeError`` because the categories are ordered and not identi
Out[3]:
TypeError: to union ordered Categoricals, all categories must be the same
.. versionadded:: 0.20.0

Ordered categoricals with different categories or orderings can be combined by
using the ``ignore_ordered=True`` argument.

.. ipython:: python
a = pd.Categorical(["a", "b", "c"], ordered=True)
b = pd.Categorical(["c", "b", "a"], ordered=True)
union_categoricals([a, b], ignore_order=True)
``union_categoricals`` also works with a ``CategoricalIndex``, or ``Series`` containing
categorical data, but note that the resulting array will always be a plain ``Categorical``

Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ Other enhancements
- HTML table output skips ``colspan`` or ``rowspan`` attribute if equal to 1. (:issue:`15403`)

.. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations
- ``ignore_ordered`` argument added to ``pd.types.concat.union_categoricals``; setting the argument to true will ignore the ordered attribute of unioned categoricals (:issue:`13410`)

.. _whatsnew_0200.api_breaking:

Expand Down
15 changes: 9 additions & 6 deletions pandas/tests/tools/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -1671,14 +1671,14 @@ def test_union_categoricals_ignore_order(self):
tm.assert_categorical_equal(res, exp)

res = union_categoricals([c1, c1], ignore_order=True)
exp = Categorical([1, 2, 3, 1, 2, 3], ordered=False)
exp = Categorical([1, 2, 3, 1, 2, 3])
tm.assert_categorical_equal(res, exp)

c1 = Categorical([1, 2, 3, np.nan], ordered=True)
c2 = Categorical([3, 2], categories=[1, 2, 3], ordered=True)

res = union_categoricals([c1, c2], ignore_order=True)
exp = Categorical([1, 2, 3, np.nan, 3, 2], ordered=False)
exp = Categorical([1, 2, 3, np.nan, 3, 2])
tm.assert_categorical_equal(res, exp)

c1 = Categorical([1, 2, 3], ordered=True)
Expand All @@ -1688,13 +1688,16 @@ def test_union_categoricals_ignore_order(self):
exp = Categorical([1, 2, 3, 1, 2, 3])
tm.assert_categorical_equal(res, exp)

c1 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True)
c2 = Categorical([1, 2, 3], ordered=True)

res = union_categoricals([c1, c2], ignore_order=True, sort_categories=True)
res = union_categoricals([c2, c1], ignore_order=True, sort_categories=True)
exp = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3])
tm.assert_categorical_equal(res, exp)

c1 = Categorical([1, 2, 3], ordered=True)
c2 = Categorical([4, 5, 6], ordered=True)
result = union_categoricals([c1, c2], ignore_order=True)
expected = Categorical([1, 2, 3, 4, 5, 6])
tm.assert_categorical_equal(result, expected)

def test_union_categoricals_sort(self):
# GH 13846
c1 = Categorical(['x', 'y', 'z'])
Expand Down
8 changes: 4 additions & 4 deletions pandas/types/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,8 @@ def union_categoricals(to_union, sort_categories=False, ignore_order=False):
If true, resulting categories will be lexsorted, otherwise
they will be ordered as they appear in the data.
ignore_order: boolean, default False
If true, ordered categories will be ignored. Results in
an unordered categorical.
If true, the ordered attribute of the Categoricals will be ignored.
Results in an unordered categorical.
Returns
-------
Expand All @@ -238,7 +238,7 @@ def union_categoricals(to_union, sort_categories=False, ignore_order=False):
- all inputs are ordered and their categories are not identical
- sort_categories=True and Categoricals are ordered
ValueError
Emmpty list of categoricals passed
Empty list of categoricals passed
"""
from pandas import Index, Categorical, CategoricalIndex, Series

Expand Down Expand Up @@ -275,7 +275,7 @@ def _maybe_unwrap(x):
categories = categories.sort_values()
indexer = categories.get_indexer(first.categories)
new_codes = take_1d(indexer, new_codes, fill_value=-1)
elif ignore_order | all(not c.ordered for c in to_union):
elif ignore_order or all(not c.ordered for c in to_union):
# different categories - union and recode
cats = first.categories.append([c.categories for c in to_union[1:]])
categories = Index(cats.unique())
Expand Down

0 comments on commit d278d62

Please sign in to comment.