pandas-dev · TomAugspurger · Nov 20, 2016 · Nov 20, 2016 · Nov 21, 2016 · jreback
diff --git a/pandas/core/api.py b/pandas/core/api.py
@@ -6,7 +6,7 @@
 
 from pandas.core.algorithms import factorize, match, unique, value_counts
 from pandas.types.missing import isnull, notnull
-from pandas.core.categorical import Categorical
+from pandas.core.categorical import Categorical, CategoricalType
 from pandas.core.groupby import Grouper
 from pandas.formats.format import set_eng_float_format
 from pandas.core.index import (Index, CategoricalIndex, Int64Index,

diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
@@ -2066,3 +2066,48 @@ def _factorize_from_iterables(iterables):
         # For consistency, it should return a list of 2 lists.
         return [[], []]
     return map(list, lzip(*[_factorize_from_iterable(it) for it in iterables]))
+
+
+class CategoricalType(CategoricalDtype):
+    """
+    Type for categorical data with the categories and orderedness,
+    but not the values
+
+    Parameters
+    ----------
+    categories : list or None
+    ordered : bool, default False
+
+    Notes
+    -----
+    `categories=None` implies infer in whatever operation you're
+    doing.
+
+    Examples
+    --------
+    >>> t = CategoricalType(categories=['b', 'a'], ordered=True)
+    >>> s = Series(['a', 'a', 'b', 'b', 'a'])
+    >>> s.astype(t)
+    0    a
+    1    a
+    2    b
+    3    b
+    4    a
+    dtype: category
+    Categories (2, object): [b < a]
+    """
+    dtype = 'category'
+    name = 'category'
+
+    def __new__(cls, categories=None, ordered=False):
+        self = object.__new__(cls)
+        self.categories = categories
+        self.ordered = ordered
+        # XXX: this is just for the repr, will move to base type
+        self._categorical = Categorical(None, categories=categories,
+                                        ordered=ordered)
+        return self
+
+    def __repr__(self):
+        return "<CategoricalType {}>".format(
+            self._categorical._repr_categories())
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -470,6 +470,12 @@ def _astype(self, dtype, copy=False, raise_on_error=True, values=None,
         # may need to convert to categorical
         # this is only called for non-categoricals
         if self.is_categorical_astype(dtype):
+            kwargs = kwargs.copy()
+            categories = getattr(dtype, 'categories', None)
+            ordered = getattr(dtype, 'ordered', False)
+            # should we raise if CategoricalType and passed in kwargs?
+            kwargs.setdefault('categories', categories)
+            kwargs.setdefault('ordered', ordered)
             return self.make_block(Categorical(self.values, **kwargs))
 
         # astype processing

diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py
@@ -8,7 +8,7 @@
 from numpy import nan
 import numpy as np
 
-from pandas import Series
+from pandas import Series, CategoricalType, Categorical
 from pandas.tseries.index import Timestamp
 from pandas.tseries.tdi import Timedelta
 
@@ -149,6 +149,12 @@ def test_astype_dict(self):
         self.assertRaises(KeyError, s.astype, {'abc': str, 'def': str})
         self.assertRaises(KeyError, s.astype, {0: str})
 
+    def test_astype_categorical(self):
+        s = Series(['a', 'b', 'a'])
+        result = s.astype(CategoricalType(['a', 'b'], ordered=True))
+        expected = Series(Categorical(['a', 'b', 'a'], ordered=True))
+        assert_series_equal(result, expected)
+
     def test_complexx(self):
         # GH4819
         # complex access for ndarray compat