Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better handle returned dask npartitions in tests #1107

Merged
merged 1 commit into from
Aug 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
225 changes: 171 additions & 54 deletions datashader/tests/test_dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -702,13 +702,36 @@ def test_line_manual_range(DataFrame, df_kwargs, cvs_kwargs):

agg = cvs.line(ddf, agg=ds.count(), **cvs_kwargs)

sol = np.array([[0, 0, 1, 0, 1, 0, 0],
[0, 1, 0, 0, 0, 1, 0],
[1, 0, 0, 0, 0, 0, 1],
[1, 1, 1, 1, 1, 1, 1],
[1, 0, 0, 0, 0, 0, 1],
[0, 1, 0, 0, 0, 1, 0],
[0, 0, 1, 0, 1, 0, 0]], dtype='i4')
if (ddf.npartitions == 2 and cvs_kwargs.get('axis') == 0 and
isinstance(cvs_kwargs['y'], (list, tuple))):
# Github issue #1106.
# When axis==0 we do not deal with dask splitting up our lines/areas,
# so the output has undesirable missing segments.
if isinstance(cvs_kwargs['x'], list):
sol = np.array([[0, 0, 0, 0, 1, 0, 0],
[0, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 1],
[0, 0, 0, 1, 1, 1, 1],
[1, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 0]], dtype='i4')
else:
sol = np.array([[0, 0, 1, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0],
[1, 1, 1, 1, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 0]], dtype='i4')
else:
# Ideally all tests would give this solution.
sol = np.array([[0, 0, 1, 0, 1, 0, 0],
[0, 1, 0, 0, 0, 1, 0],
[1, 0, 0, 0, 0, 0, 1],
[1, 1, 1, 1, 1, 1, 1],
[1, 0, 0, 0, 0, 0, 1],
[0, 1, 0, 0, 0, 1, 0],
[0, 0, 1, 0, 1, 0, 0]], dtype='i4')

out = xr.DataArray(sol, coords=[lincoords, lincoords],
dims=['y', 'x'])
Expand Down Expand Up @@ -790,15 +813,41 @@ def test_line_autorange(DataFrame, df_kwargs, cvs_kwargs):

agg = cvs.line(ddf, agg=ds.count(), **cvs_kwargs)

sol = np.array([[0, 0, 0, 0, 3, 0, 0, 0, 0],
[0, 0, 0, 1, 1, 1, 0, 0, 0],
[0, 0, 1, 0, 1, 0, 1, 0, 0],
[0, 1, 0, 0, 1, 0, 0, 1, 0],
[1, 0, 0, 0, 1, 0, 0, 0, 1],
[0, 1, 0, 0, 1, 0, 0, 1, 0],
[0, 0, 1, 0, 1, 0, 1, 0, 0],
[0, 0, 0, 1, 1, 1, 0, 0, 0],
[0, 0, 0, 0, 3, 0, 0, 0, 0]], dtype='i4')
if (ddf.npartitions == 2 and cvs_kwargs.get('axis') == 0 and
isinstance(cvs_kwargs['x'], (list, tuple))):
# Github issue #1106.
# When axis==0 we do not deal with dask splitting up our lines/areas,
# so the output has undesirable missing segments.
if isinstance(cvs_kwargs['y'], list):
sol = np.array([[0, 0, 0, 0, 2, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 1, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 1, 0, 0],
[0, 1, 0, 0, 0, 0, 0, 1, 0],
[1, 0, 0, 0, 1, 0, 0, 0, 1],
[0, 0, 0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0]], dtype='i4')
else:
sol = np.array([[0, 0, 0, 0, 3, 0, 0, 0, 0],
[0, 0, 0, 1, 1, 1, 0, 0, 0],
[0, 0, 1, 0, 1, 0, 1, 0, 0],
[0, 1, 0, 0, 1, 0, 0, 1, 0],
[1, 0, 0, 0, 1, 0, 0, 0, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype='i4')
else:
sol = np.array([[0, 0, 0, 0, 3, 0, 0, 0, 0],
[0, 0, 0, 1, 1, 1, 0, 0, 0],
[0, 0, 1, 0, 1, 0, 1, 0, 0],
[0, 1, 0, 0, 1, 0, 0, 1, 0],
[1, 0, 0, 0, 1, 0, 0, 0, 1],
[0, 1, 0, 0, 1, 0, 0, 1, 0],
[0, 0, 1, 0, 1, 0, 1, 0, 0],
[0, 0, 0, 1, 1, 1, 0, 0, 0],
[0, 0, 0, 0, 3, 0, 0, 0, 0]], dtype='i4')

out = xr.DataArray(sol, coords=[lincoords, lincoords],
dims=['y', 'x'])
Expand Down Expand Up @@ -933,12 +982,24 @@ def test_area_to_zero_fixedrange(DataFrame, df_kwargs, cvs_kwargs):

agg = cvs.area(ddf, agg=ds.count(), **cvs_kwargs)

sol = np.array([[0, 1, 1, 0, 0, 0, 0, 0, 0],
[1, 1, 1, 1, 0, 0, 0, 0, 0],
[1, 1, 1, 1, 1, 0, 1, 1, 1],
[0, 0, 0, 0, 0, 0, 1, 1, 1],
[0, 0, 0, 0, 0, 0, 1, 1, 0]],
dtype='i4')
if (ddf.npartitions == 2 and cvs_kwargs.get('axis') == 0 and
isinstance(cvs_kwargs['x'], (list, tuple))):
# Github issue #1106.
# When axis==0 we do not deal with dask splitting up our lines/areas,
# so the output has undesirable missing segments.
sol = np.array([[0, 1, 1, 0, 0, 0, 0, 0, 0],
[1, 1, 1, 0, 0, 0, 0, 0, 0],
[1, 1, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0]],
dtype='i4')
else:
sol = np.array([[0, 1, 1, 0, 0, 0, 0, 0, 0],
[1, 1, 1, 1, 0, 0, 0, 0, 0],
[1, 1, 1, 1, 1, 0, 1, 1, 1],
[0, 0, 0, 0, 0, 0, 1, 1, 1],
[0, 0, 0, 0, 0, 0, 1, 1, 0]],
dtype='i4')

out = xr.DataArray(sol, coords=[lincoords_y, lincoords_x],
dims=['y', 'x'])
Expand Down Expand Up @@ -1009,14 +1070,28 @@ def test_area_to_zero_autorange(DataFrame, df_kwargs, cvs_kwargs):
ddf = DataFrame(**df_kwargs)
agg = cvs.area(ddf, agg=ds.count(), **cvs_kwargs)

sol = np.array([[0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
[0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0],
[0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0],
[0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0],
[0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0],
[1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1]],
dtype='i4')
if (ddf.npartitions == 2 and cvs_kwargs.get('axis') == 0 and
isinstance(cvs_kwargs['x'], (list, tuple))):
# Github issue #1106.
# When axis==0 we do not deal with dask splitting up our lines/areas,
# so the output has undesirable missing segments.
sol = np.array([[0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
[0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0],
[0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0],
[0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0],
[0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0],
[1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0]],
dtype='i4')
else:
sol = np.array([[0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
[0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0],
[0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0],
[0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0],
[0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0],
[1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1]],
dtype='i4')

out = xr.DataArray(sol, coords=[lincoords_y, lincoords_x],
dims=['y', 'x'])
Expand Down Expand Up @@ -1072,14 +1147,28 @@ def test_area_to_zero_autorange_gap(DataFrame, df_kwargs, cvs_kwargs):

agg = cvs.area(ddf, agg=ds.count(), **cvs_kwargs)

sol = np.array([[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
[1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]],
dtype='i4')
if (ddf.npartitions == 2 and cvs_kwargs.get('axis') == 0 and
isinstance(cvs_kwargs['x'], (list, tuple))):
# Github issue #1106.
# When axis==0 we do not deal with dask splitting up our lines/areas,
# so the output has undesirable missing segments.
sol = np.array([[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
dtype='i4')
else:
sol = np.array([[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
[1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]],
dtype='i4')

out = xr.DataArray(sol, coords=[lincoords_y, lincoords_x],
dims=['y', 'x'])
Expand Down Expand Up @@ -1161,14 +1250,28 @@ def test_area_to_line_autorange(DataFrame, df_kwargs, cvs_kwargs):
ddf = DataFrame(**df_kwargs)
agg = cvs.area(ddf, agg=ds.count(), **cvs_kwargs)

sol = np.array([[0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
[0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0],
[0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0],
[0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
dtype='i4')
if (ddf.npartitions == 2 and cvs_kwargs.get('axis') == 0 and
isinstance(cvs_kwargs['x'], (list, tuple))):
# Github issue #1106.
# When axis==0 we do not deal with dask splitting up our lines/areas,
# so the output has undesirable missing segments.
sol = np.array([[0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
[0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
dtype='i4')
else:
sol = np.array([[0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
[0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0],
[0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0],
[0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
dtype='i4')

out = xr.DataArray(sol, coords=[lincoords_y, lincoords_x],
dims=['y', 'x'])
Expand Down Expand Up @@ -1236,14 +1339,28 @@ def test_area_to_line_autorange_gap(DataFrame, df_kwargs, cvs_kwargs):
# the fill. So we expect the y=0 line to not be filled.
agg = cvs.area(ddf, agg=ds.count(), **cvs_kwargs)

sol = np.array([[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]],
dtype='i4')
if (ddf.npartitions == 2 and cvs_kwargs.get('axis') == 0 and
isinstance(cvs_kwargs['x'], (list, tuple))):
# Github issue #1106.
# When axis==0 we do not deal with dask splitting up our lines/areas,
# so the output has undesirable missing segments.
sol = np.array([[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
dtype='i4')
else:
sol = np.array([[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]],
dtype='i4')

out = xr.DataArray(sol, coords=[lincoords_y, lincoords_x],
dims=['y', 'x'])
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
########## dependencies ##########

install_requires = [
'dask',
'dask[complete]',
'datashape >=0.5.1',
'numba >=0.51',
'pandas >=0.24.1',
Expand Down