Skip to content

Commit

Permalink
REGR: NA-values in ctors with string dtype
Browse files Browse the repository at this point in the history
```python
In [1]: import pandas as pd
In [2]: pd.Series([1, 2, None], dtype='str')[2]  # None

```

Closes pandas-dev#21083
  • Loading branch information
TomAugspurger committed Jun 7, 2018
1 parent ab6aaf7 commit d07b238
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 7 deletions.
8 changes: 8 additions & 0 deletions doc/source/whatsnew/v0.23.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,14 @@ and bug fixes. We recommend that all users upgrade to this version.
:local:
:backlinks: none

.. _whatsnew_0231.fixed_regressions:

Fixed Regressions
~~~~~~~~~~~~~~~~~

- Fixed regression in constructors coercing NA values like ``None`` to strings when passing ``dtype=str`` (:issue:`21083`)


.. _whatsnew_0231.enhancements:

New features
Expand Down
15 changes: 14 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4054,7 +4054,20 @@ def _try_cast(arr, take_fast_path):
isinstance(subarr, np.ndarray))):
subarr = construct_1d_object_array_from_listlike(subarr)
elif not is_extension_type(subarr):
subarr = np.array(subarr, dtype=dtype, copy=copy)
subarr2 = np.array(subarr, dtype=dtype, copy=copy)

if dtype and dtype.kind in ("U", "S"):
# GH-21083
# We can't just return np.array(subarr, dtype='str') since
# NumPy will convert the non-string objects into strings
# Including NA values. Se we have to go
# string -> object -> update NA, which requires an
# additional pass over the data.
na_values = isna(subarr)
subarr2 = subarr2.astype(object)
subarr2[na_values] = np.asarray(subarr)[na_values]

subarr = subarr2
except (ValueError, TypeError):
if is_categorical_dtype(dtype):
# We *do* allow casting to categorical, since we know
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,14 @@ def test_constructor_complex_dtypes(self):
assert a.dtype == df.a.dtype
assert b.dtype == df.b.dtype

def test_constructor_dtype_str_na_values(self):
# https://github.com/pandas-dev/pandas/issues/21083
df = DataFrame({'A': ['x', None]}, dtype=str)
result = df.isna()
expected = DataFrame({"A": [False, True]})
tm.assert_frame_equal(result, expected)
assert df.iloc[1, 0] is None

def test_constructor_rec(self):
rec = self.frame.to_records(index=False)

Expand Down
33 changes: 27 additions & 6 deletions pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,17 @@
from .common import TestData


@pytest.fixture(params=[str, 'str', 'U'])
def string_dtype(request):
"""Parametrized fixture for string dtypes.
* str
* 'str'
* 'U'
"""
return request.param


class TestSeriesConstructors(TestData):

def test_invalid_dtype(self):
Expand Down Expand Up @@ -137,6 +148,14 @@ def test_constructor_no_data_index_order(self):
result = pd.Series(index=['b', 'a', 'c'])
assert result.index.tolist() == ['b', 'a', 'c']

def test_constructor_dtype_str_na_values(self):
# https://github.com/pandas-dev/pandas/issues/21083
ser = Series(['x', None], dtype=str)
result = ser.isna()
expected = Series([False, True])
tm.assert_series_equal(result, expected)
assert ser.iloc[1] is None

def test_constructor_series(self):
index1 = ['d', 'b', 'a', 'c']
index2 = sorted(index1)
Expand Down Expand Up @@ -164,22 +183,24 @@ def test_constructor_list_like(self):

@pytest.mark.parametrize('input_vals', [
([1, 2]),
([1.0, 2.0, np.nan]),
(['1', '2']),
(list(pd.date_range('1/1/2011', periods=2, freq='H'))),
(list(pd.date_range('1/1/2011', periods=2, freq='H',
tz='US/Eastern'))),
([pd.Interval(left=0, right=5)]),
])
def test_constructor_list_str(self, input_vals):
def test_constructor_list_str(self, input_vals, string_dtype):
# GH 16605
# Ensure that data elements from a list are converted to strings
# when dtype is str, 'str', or 'U'
result = Series(input_vals, dtype=string_dtype)
expected = Series(input_vals).astype(string_dtype)
assert_series_equal(result, expected)

for dtype in ['str', str, 'U']:
result = Series(input_vals, dtype=dtype)
expected = Series(input_vals).astype(dtype)
assert_series_equal(result, expected)
def test_constructor_list_str_na(self, string_dtype):
result = Series([1.0, 2.0, np.nan], dtype=string_dtype)
expected = Series(['1.0', '2.0', None], dtype=object)
assert_series_equal(result, expected)

def test_constructor_generator(self):
gen = (i for i in range(10))
Expand Down

0 comments on commit d07b238

Please sign in to comment.