Skip to content

Commit

Permalink
Don't store object or unicode numpy arrays in figure. Coerce to lists
Browse files Browse the repository at this point in the history
  • Loading branch information
jonmmease committed Jan 21, 2021
1 parent 340aed3 commit 6cea61d
Show file tree
Hide file tree
Showing 10 changed files with 132 additions and 85 deletions.
69 changes: 25 additions & 44 deletions packages/python/plotly/_plotly_utils/basevalidators.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def to_scalar_or_list(v):
return v


def copy_to_readonly_numpy_array(v, kind=None, force_numeric=False):
def copy_to_readonly_numpy_array_or_list(v, kind=None, force_numeric=False):
"""
Convert an array-like value into a read-only numpy array
Expand Down Expand Up @@ -89,7 +89,7 @@ def copy_to_readonly_numpy_array(v, kind=None, force_numeric=False):

# u: unsigned int, i: signed int, f: float
numeric_kinds = {"u", "i", "f"}
kind_default_dtypes = {"u": "uint32", "i": "int32", "f": "float64", "O": "object"}
kind_default_dtypes = {"u": "uint32", "i": "int32", "f": "float64", "O": "object", "U": "U"}

# Handle pandas Series and Index objects
if pd and isinstance(v, (pd.Series, pd.Index)):
Expand All @@ -113,18 +113,12 @@ def copy_to_readonly_numpy_array(v, kind=None, force_numeric=False):
if not isinstance(v, np.ndarray):
# v has its own logic on how to convert itself into a numpy array
if is_numpy_convertable(v):
return copy_to_readonly_numpy_array(
return copy_to_readonly_numpy_array_or_list(
np.array(v), kind=kind, force_numeric=force_numeric
)
else:
# v is not homogenous array
v_list = [to_scalar_or_list(e) for e in v]

# Lookup dtype for requested kind, if any
dtype = kind_default_dtypes.get(first_kind, None)

# construct new array from list
new_v = np.array(v_list, order="C", dtype=dtype)
return [to_scalar_or_list(e) for e in v]
elif v.dtype.kind in numeric_kinds:
# v is a homogenous numeric array
if kind and v.dtype.kind not in kind:
Expand All @@ -135,6 +129,12 @@ def copy_to_readonly_numpy_array(v, kind=None, force_numeric=False):
else:
# Either no kind was requested or requested kind is satisfied
new_v = np.ascontiguousarray(v.copy())
elif v.dtype.kind == "O":
if kind:
dtype = kind_default_dtypes.get(first_kind, None)
return np.array(v, dtype=dtype)
else:
return v.tolist()
else:
# v is a non-numeric homogenous array
new_v = v.copy()
Expand All @@ -149,12 +149,12 @@ def copy_to_readonly_numpy_array(v, kind=None, force_numeric=False):
if "U" not in kind:
# Force non-numeric arrays to have object type
# --------------------------------------------
# Here we make sure that non-numeric arrays have the object
# datatype. This works around cases like np.array([1, 2, '3']) where
# Here we make sure that non-numeric arrays become lists
# This works around cases like np.array([1, 2, '3']) where
# numpy converts the integers to strings and returns array of dtype
# '<U21'
if new_v.dtype.kind not in ["u", "i", "f", "O", "M"]:
new_v = np.array(v, dtype="object")
return v.tolist()

# Set new array to be read-only
# -----------------------------
Expand Down Expand Up @@ -191,7 +191,7 @@ def is_homogeneous_array(v):
if v_numpy.shape == ():
return False
else:
return True
return True # v_numpy.dtype.kind in ["u", "i", "f", "M", "U"]
return False


Expand Down Expand Up @@ -393,7 +393,7 @@ def validate_coerce(self, v):
# Pass None through
pass
elif is_homogeneous_array(v):
v = copy_to_readonly_numpy_array(v)
v = copy_to_readonly_numpy_array_or_list(v)
elif is_simple_array(v):
v = to_scalar_or_list(v)
else:
Expand Down Expand Up @@ -598,7 +598,7 @@ def validate_coerce(self, v):
self.raise_invalid_elements(invalid_els[:10])

if is_homogeneous_array(v):
v = copy_to_readonly_numpy_array(v)
v = copy_to_readonly_numpy_array_or_list(v)
else:
v = to_scalar_or_list(v)
else:
Expand Down Expand Up @@ -754,7 +754,7 @@ def validate_coerce(self, v):
elif self.array_ok and is_homogeneous_array(v):
np = get_module("numpy")
try:
v_array = copy_to_readonly_numpy_array(v, force_numeric=True)
v_array = copy_to_readonly_numpy_array_or_list(v, force_numeric=True)
except (ValueError, TypeError, OverflowError):
self.raise_invalid_val(v)

Expand Down Expand Up @@ -881,7 +881,7 @@ def validate_coerce(self, v):
pass
elif self.array_ok and is_homogeneous_array(v):
np = get_module("numpy")
v_array = copy_to_readonly_numpy_array(
v_array = copy_to_readonly_numpy_array_or_list(
v, kind=("i", "u"), force_numeric=True
)

Expand Down Expand Up @@ -1042,26 +1042,7 @@ def validate_coerce(self, v):
if invalid_els:
self.raise_invalid_elements(invalid_els)

if is_homogeneous_array(v):
np = get_module("numpy")

# If not strict, let numpy cast elements to strings
v = copy_to_readonly_numpy_array(v, kind="U")

# Check no_blank
if self.no_blank:
invalid_els = v[v == ""][:10].tolist()
if invalid_els:
self.raise_invalid_elements(invalid_els)

# Check values
if self.values:
invalid_inds = np.logical_not(np.isin(v, self.values))
invalid_els = v[invalid_inds][:10].tolist()
if invalid_els:
self.raise_invalid_elements(invalid_els)

elif is_simple_array(v):
if is_simple_array(v) or is_homogeneous_array(v):
if not self.strict:
v = [StringValidator.to_str_or_unicode_or_none(e) for e in v]

Expand Down Expand Up @@ -1338,8 +1319,8 @@ def validate_coerce(self, v, should_raise=True):
# Pass None through
pass
elif self.array_ok and is_homogeneous_array(v):
v = copy_to_readonly_numpy_array(v)
if self.numbers_allowed() and v.dtype.kind in ["u", "i", "f"]:
v = copy_to_readonly_numpy_array_or_list(v)
if not isinstance(v, list) and self.numbers_allowed() and v.dtype.kind in ["u", "i", "f"]:
# Numbers are allowed and we have an array of numbers.
# All good
pass
Expand All @@ -1353,9 +1334,9 @@ def validate_coerce(self, v, should_raise=True):

# ### Check that elements have valid colors types ###
elif self.numbers_allowed() or invalid_els:
v = copy_to_readonly_numpy_array(validated_v, kind="O")
v = copy_to_readonly_numpy_array_or_list(validated_v, kind="O")
else:
v = copy_to_readonly_numpy_array(validated_v, kind="U")
v = copy_to_readonly_numpy_array_or_list(validated_v, kind="U")
elif self.array_ok and is_simple_array(v):
validated_v = [self.validate_coerce(e, should_raise=False) for e in v]

Expand Down Expand Up @@ -1870,7 +1851,7 @@ def validate_coerce(self, v):
self.raise_invalid_elements(invalid_els)

if is_homogeneous_array(v):
v = copy_to_readonly_numpy_array(validated_v, kind="U")
v = copy_to_readonly_numpy_array_or_list(validated_v, kind="U")
else:
v = to_scalar_or_list(v)
else:
Expand Down Expand Up @@ -1918,7 +1899,7 @@ def validate_coerce(self, v):
# Pass None through
pass
elif self.array_ok and is_homogeneous_array(v):
v = copy_to_readonly_numpy_array(v, kind="O")
v = copy_to_readonly_numpy_array_or_list(v, kind="O")
elif self.array_ok and is_simple_array(v):
v = to_scalar_or_list(v)
return v
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,25 @@ def test_validator_acceptance_simple(val, validator):

@pytest.mark.parametrize(
"val",
[np.array([2, 3, 4]), pd.Series(["a", "b", "c"]), np.array([[1, 2, 3], [4, 5, 6]])],
[np.array([2, 3, 4]), np.array([[1, 2, 3], [4, 5, 6]])],
)
def test_validator_acceptance_homogeneous(val, validator):
coerce_val = validator.validate_coerce(val)
assert isinstance(coerce_val, np.ndarray)
assert np.array_equal(validator.present(coerce_val), val)


# Accept object array as list
@pytest.mark.parametrize(
"val",
[["A", "B", "C"], np.array(["A", "B", "C"], dtype="object"), pd.Series(["a", "b", "c"])]
)
def test_validator_accept_object_array_as_list(val, validator):
coerce_val = validator.validate_coerce(val)
assert isinstance(coerce_val, list)
assert coerce_val == list(val)


# ### Rejection ###
@pytest.mark.parametrize("val", ["Hello", 23, set(), {}])
def test_rejection(val, validator):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def test_rejection_by_element_aok(val, validator_aok):
[],
["bar12"],
("foo", "bar012", "baz"),
np.array([]),
np.array([], dtype="object"),
np.array(["bar12"]),
np.array(["foo", "bar012", "baz"]),
],
Expand All @@ -135,7 +135,7 @@ def test_acceptance_aok(val, validator_aok_re):
# Values should be accepted and returned unchanged
coerce_val = validator_aok_re.validate_coerce(val)
if isinstance(val, (np.ndarray, pd.Series)):
assert np.array_equal(coerce_val, np.array(val, dtype=coerce_val.dtype))
assert coerce_val == list(np.array(val))
elif isinstance(val, (list, tuple)):
assert validator_aok_re.present(coerce_val) == tuple(val)
else:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,13 +149,10 @@ def test_color_validator_object(color_validator, color_object_pandas):
res = color_validator.validate_coerce(color_object_pandas)

# Check type
assert isinstance(res, np.ndarray)

# Check dtype
assert res.dtype == "object"
assert isinstance(res, list)

# Check values
np.testing.assert_array_equal(res, color_object_pandas)
assert res == color_object_pandas.tolist()


def test_color_validator_categorical(color_validator, color_categorical_pandas):
Expand All @@ -164,13 +161,10 @@ def test_color_validator_categorical(color_validator, color_categorical_pandas):

# Check type
assert color_categorical_pandas.dtype == "category"
assert isinstance(res, np.ndarray)

# Check dtype
assert res.dtype == "object"
assert isinstance(res, list)

# Check values
np.testing.assert_array_equal(res, np.array(color_categorical_pandas))
assert res == color_categorical_pandas.tolist()


def test_data_array_validator_dates_series(
Expand All @@ -180,13 +174,10 @@ def test_data_array_validator_dates_series(
res = data_array_validator.validate_coerce(datetime_pandas)

# Check type
assert isinstance(res, np.ndarray)

# Check dtype
assert res.dtype == "object"
assert isinstance(res, list)

# Check values
np.testing.assert_array_equal(res, dates_array)
assert res == dates_array.tolist()


def test_data_array_validator_dates_dataframe(
Expand All @@ -197,10 +188,7 @@ def test_data_array_validator_dates_dataframe(
res = data_array_validator.validate_coerce(df)

# Check type
assert isinstance(res, np.ndarray)

# Check dtype
assert res.dtype == "object"
assert isinstance(res, list)

# Check values
np.testing.assert_array_equal(res, dates_array.reshape(len(dates_array), 1))
assert res == dates_array.reshape(len(dates_array), 1).tolist()
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,7 @@ def test_acceptance_aok_scalars(val, validator_aok):
def test_acceptance_aok_list(val, validator_aok):
coerce_val = validator_aok.validate_coerce(val)
if isinstance(val, np.ndarray):
assert isinstance(coerce_val, np.ndarray)
assert np.array_equal(coerce_val, np.array(val, dtype=coerce_val.dtype))
assert coerce_val == val.tolist()
elif isinstance(val, list):
assert validator_aok.present(val) == tuple(val)
else:
Expand Down Expand Up @@ -178,9 +177,7 @@ def test_rejection_aok_values(val, validator_aok_values):
)
def test_acceptance_no_blanks_aok(val, validator_no_blanks_aok):
coerce_val = validator_no_blanks_aok.validate_coerce(val)
if isinstance(val, np.ndarray):
assert np.array_equal(coerce_val, np.array(val, dtype=coerce_val.dtype))
elif isinstance(val, list):
if isinstance(val, (list, np.ndarray)):
assert validator_no_blanks_aok.present(coerce_val) == tuple(val)
else:
assert coerce_val == val
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,7 @@ def test_color_validator_object(color_validator, color_object_xarray):
res = color_validator.validate_coerce(color_object_xarray)

# Check type
assert isinstance(res, np.ndarray)

# Check dtype
assert res.dtype == "object"
assert isinstance(res, list)

# Check values
np.testing.assert_array_equal(res, color_object_xarray)
assert res == list(color_object_xarray)
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ def test_custom_data_scatter():
hover_data=["petal_length", "petal_width"],
custom_data=["species_id", "species"],
)
assert np.all(fig.data[0].customdata[:, 0] == iris.species_id)
assert fig.data[0].customdata.shape[1] == 4
assert [e[0] for e in fig.data[0].customdata] == iris.species_id.to_list()
assert len(fig.data[0].customdata[0]) == 4
# Hover and custom data, with repeated arguments
fig = px.scatter(
iris,
Expand All @@ -47,8 +47,8 @@ def test_custom_data_scatter():
hover_data=["petal_length", "petal_width", "species_id"],
custom_data=["species_id", "species"],
)
assert np.all(fig.data[0].customdata[:, 0] == iris.species_id)
assert fig.data[0].customdata.shape[1] == 4
assert [e[0] for e in fig.data[0].customdata] == iris.species_id.tolist()
assert len(fig.data[0].customdata[0]) == 4
assert (
fig.data[0].hovertemplate
== "sepal_width=%{x}<br>sepal_length=%{y}<br>petal_length=%{customdata[2]}<br>petal_width=%{customdata[3]}<br>species_id=%{customdata[0]}<extra></extra>"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -229,9 +229,9 @@ def test_sunburst_treemap_with_path_color():
df["hover"] = [el.lower() for el in vendors]
fig = px.sunburst(df, path=path, color="calls", hover_data=["hover"])
custom = fig.data[0].customdata
assert np.all(custom[:8, 0] == df["hover"])
assert np.all(custom[8:, 0] == "(?)")
assert np.all(custom[:8, 1] == df["calls"])
assert [el[0] for el in custom[:8]] == df["hover"].tolist()
assert [el[0] for el in custom[8:]] == ["(?)"] * 7
assert [el[1] for el in custom[:8]] == df["calls"].tolist()

# Discrete color
fig = px.sunburst(df, path=path, color="vendors")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def test_repeated_name():
hover_data=["petal_length", "petal_width", "species_id"],
custom_data=["species_id", "species"],
)
assert fig.data[0].customdata.shape[1] == 4
assert len(fig.data[0].customdata[0]) == 4


def test_arrayattrable_numpy():
Expand Down
Loading

0 comments on commit 6cea61d

Please sign in to comment.