Skip to content

Commit

Permalink
Add windows CI (#151)
Browse files Browse the repository at this point in the history
* Add windows CI

* Update ci.yaml

* Update ci.yaml

* Make arg input the same as shown in pytest

* Add dtype check

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* have expected and actual results on the same side

* use np.intp for count expected

* [revert] minimize test

* specify dtypes

* more fixers

* more.

* Fix groupby_reduce

* [revert] only wiindows tests

* more fixes?

* more fixes.

* more fix

* Last fix?

* Update .github/workflows/ci.yaml

* revert

* Better fix

* Revert "revert"

This reverts commit 3b79f6e.

* better comment.

* clean up test

* Revert "Revert "revert""

This reverts commit 38438a2.

* xfail labels dtype test

* Revert "[revert] only wiindows tests"

This reverts commit 232cf15.

* Revert "[revert] minimize test"

This reverts commit f993b31.

* fix bad revert

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: dcherian <[email protected]>
Co-authored-by: Deepak Cherian <[email protected]>
  • Loading branch information
4 people authored Nov 5, 2022
1 parent e3ea0e7 commit 9b01c48
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 43 deletions.
5 changes: 2 additions & 3 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
strategy:
fail-fast: false
matrix:
os: ["ubuntu-latest"]
os: ["ubuntu-latest", "windows-latest"]
python-version: ["3.8", "3.10"]
steps:
- uses: actions/checkout@v3
Expand All @@ -43,8 +43,7 @@ jobs:
python="${{ matrix.python-version }}"
- name: Install flox
run: |
python -m pip install -e .
conda list
python -m pip install --no-deps -e .
- name: Run Tests
run: |
pytest -n auto --cov=./ --cov-report=xml
Expand Down
5 changes: 4 additions & 1 deletion flox/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1350,7 +1350,10 @@ def dask_groupby_agg(
aggregate=partial(aggregate, expected_groups=index, reindex=True),
)
)
groups_.append(cohort)
# This is done because pandas promotes to 64-bit types when an Index is created
# So we use the index to generate the return value for consistency with "map-reduce"
# This is important on windows
groups_.append(index.values)

reduced = dask.array.concatenate(reduced_, axis=-1)
groups = (np.concatenate(groups_),)
Expand Down
87 changes: 48 additions & 39 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,11 +135,11 @@ def test_groupby_reduce(
by = da.from_array(by, chunks=(3,) if by.ndim == 1 else (1, 3))

if func == "mean" or func == "nanmean":
expected_result = np.array(expected, dtype=float)
expected_result = np.array(expected, dtype=np.float64)
elif func == "sum":
expected_result = np.array(expected, dtype=dtype)
elif func == "count":
expected_result = np.array(expected, dtype=int)
expected_result = np.array(expected, dtype=np.int64)

result, groups, = groupby_reduce(
array,
Expand All @@ -149,7 +149,9 @@ def test_groupby_reduce(
fill_value=123,
engine=engine,
)
g_dtype = by.dtype if expected_groups is None else np.asarray(expected_groups).dtype
# we use pd.Index(expected_groups).to_numpy() which is always int64
# for the values in this tests
g_dtype = by.dtype if expected_groups is None else np.int64

assert_equal(groups, np.array([0, 1, 2], g_dtype))
assert_equal(expected_result, result)
Expand Down Expand Up @@ -274,7 +276,7 @@ def test_groupby_reduce_count():
array = np.array([0, 0, np.nan, np.nan, np.nan, 1, 1])
labels = np.array(["a", "b", "b", "b", "c", "c", "c"])
result, _ = groupby_reduce(array, labels, func="count")
assert_equal(result, [1, 1, 2])
assert_equal(result, np.array([1, 1, 2], dtype=np.int64))


def test_func_is_aggregation():
Expand Down Expand Up @@ -383,53 +385,52 @@ def test_groupby_agg_dask(func, shape, array_chunks, group_chunks, add_nan, dtyp
kwargs["expected_groups"] = [0, 2, 1]
with raise_if_dask_computes():
actual, groups = groupby_reduce(array, by, engine=engine, **kwargs, sort=False)
assert_equal(groups, [0, 2, 1])
assert_equal(groups, np.array([0, 2, 1], dtype=np.intp))
assert_equal(expected, actual[..., [0, 2, 1]])

kwargs["expected_groups"] = [0, 2, 1]
with raise_if_dask_computes():
actual, groups = groupby_reduce(array, by, engine=engine, **kwargs, sort=True)
assert_equal(groups, [0, 1, 2])
assert_equal(groups, np.array([0, 1, 2], np.intp))
assert_equal(expected, actual)


def test_numpy_reduce_axis_subset(engine):
# TODO: add NaNs
by = labels2d
array = np.ones_like(by)
array = np.ones_like(by, dtype=np.int64)
kwargs = dict(func="count", engine=engine, fill_value=0)
result, _ = groupby_reduce(array, by, **kwargs, axis=1)
assert_equal(result, [[2, 3], [2, 3]])
assert_equal(result, np.array([[2, 3], [2, 3]], dtype=np.int64))

by = np.broadcast_to(labels2d, (3, *labels2d.shape))
array = np.ones_like(by)
result, _ = groupby_reduce(array, by, **kwargs, axis=1)
subarr = np.array([[1, 1], [1, 1], [0, 2], [1, 1], [1, 1]])
subarr = np.array([[1, 1], [1, 1], [0, 2], [1, 1], [1, 1]], dtype=np.int64)
expected = np.tile(subarr, (3, 1, 1))
assert_equal(result, expected)

result, _ = groupby_reduce(array, by, **kwargs, axis=2)
subarr = np.array([[2, 3], [2, 3]])
subarr = np.array([[2, 3], [2, 3]], dtype=np.int64)
expected = np.tile(subarr, (3, 1, 1))
assert_equal(result, expected)

result, _ = groupby_reduce(array, by, **kwargs, axis=(1, 2))
expected = np.array([[4, 6], [4, 6], [4, 6]])
expected = np.array([[4, 6], [4, 6], [4, 6]], dtype=np.int64)
assert_equal(result, expected)

result, _ = groupby_reduce(array, by, **kwargs, axis=(2, 1))
assert_equal(result, expected)

result, _ = groupby_reduce(array, by[0, ...], **kwargs, axis=(1, 2))
expected = np.array([[4, 6], [4, 6], [4, 6]])
expected = np.array([[4, 6], [4, 6], [4, 6]], dtype=np.int64)
assert_equal(result, expected)


@requires_dask
def test_dask_reduce_axis_subset():

by = labels2d
array = np.ones_like(by)
array = np.ones_like(by, dtype=np.int64)
with raise_if_dask_computes():
result, _ = groupby_reduce(
da.from_array(array, chunks=(2, 3)),
Expand All @@ -438,11 +439,11 @@ def test_dask_reduce_axis_subset():
axis=1,
expected_groups=[0, 2],
)
assert_equal(result, [[2, 3], [2, 3]])
assert_equal(result, np.array([[2, 3], [2, 3]], dtype=np.int64))

by = np.broadcast_to(labels2d, (3, *labels2d.shape))
array = np.ones_like(by)
subarr = np.array([[1, 1], [1, 1], [123, 2], [1, 1], [1, 1]])
subarr = np.array([[1, 1], [1, 1], [123, 2], [1, 1], [1, 1]], dtype=np.int64)
expected = np.tile(subarr, (3, 1, 1))
with raise_if_dask_computes():
result, _ = groupby_reduce(
Expand All @@ -455,7 +456,7 @@ def test_dask_reduce_axis_subset():
)
assert_equal(result, expected)

subarr = np.array([[2, 3], [2, 3]])
subarr = np.array([[2, 3], [2, 3]], dtype=np.int64)
expected = np.tile(subarr, (3, 1, 1))
with raise_if_dask_computes():
result, _ = groupby_reduce(
Expand Down Expand Up @@ -663,7 +664,7 @@ def test_groupby_bins(chunk_labels, chunks, engine, method) -> None:
engine=engine,
method=method,
)
expected = np.array([3, 1, 0])
expected = np.array([3, 1, 0], dtype=np.int64)
for left, right in zip(groups, pd.IntervalIndex.from_arrays([1, 2, 4], [2, 4, 5]).to_numpy()):
assert left == right
assert_equal(actual, expected)
Expand Down Expand Up @@ -780,15 +781,23 @@ def test_dtype_preservation(dtype, func, engine):


@requires_dask
@pytest.mark.parametrize("method", ["split-reduce", "map-reduce", "cohorts"])
def test_cohorts(method):
repeats = [4, 4, 12, 2, 3, 4]
labels = np.repeat(np.arange(6), repeats)
array = dask.array.from_array(labels, chunks=(4, 8, 4, 9, 4))
@pytest.mark.parametrize("dtype", [np.int32, np.int64])
@pytest.mark.parametrize(
"labels_dtype", [pytest.param(np.int32, marks=pytest.mark.xfail), np.int64]
)
@pytest.mark.parametrize("method", ["map-reduce", "cohorts"])
def test_cohorts_map_reduce_consistent_dtypes(method, dtype, labels_dtype):
repeats = np.array([4, 4, 12, 2, 3, 4], dtype=np.int32)
labels = np.repeat(np.arange(6, dtype=labels_dtype), repeats)
array = dask.array.from_array(labels.astype(dtype), chunks=(4, 8, 4, 9, 4))

actual, actual_groups = groupby_reduce(array, labels, func="count", method=method)
assert_equal(actual_groups, np.arange(6))
assert_equal(actual, repeats)
assert_equal(actual_groups, np.arange(6, dtype=labels.dtype))
assert_equal(actual, repeats.astype(np.int64))

actual, actual_groups = groupby_reduce(array, labels, func="sum", method=method)
assert_equal(actual_groups, np.arange(6, dtype=labels.dtype))
assert_equal(actual, np.array([0, 4, 24, 6, 12, 20], dtype))


@requires_dask
Expand All @@ -800,7 +809,7 @@ def test_cohorts_nd_by(func, method, axis, engine):
o2 = dask.array.ones((2, 3), chunks=-1)

array = dask.array.block([[o, 2 * o], [3 * o2, 4 * o2]])
by = array.compute().astype(int)
by = array.compute().astype(np.int64)
by[0, 1] = 30
by[2, 1] = 40
by[0, 4] = 31
Expand All @@ -825,9 +834,9 @@ def test_cohorts_nd_by(func, method, axis, engine):

actual, groups = groupby_reduce(array, by, sort=False, **kwargs)
if method == "map-reduce":
assert_equal(groups, [1, 30, 2, 31, 3, 4, 40])
assert_equal(groups, np.array([1, 30, 2, 31, 3, 4, 40], dtype=np.int64))
else:
assert_equal(groups, [1, 30, 2, 31, 3, 40, 4])
assert_equal(groups, np.array([1, 30, 2, 31, 3, 40, 4], dtype=np.int64))
reindexed = reindex_(actual, groups, pd.Index(sorted_groups))
assert_equal(reindexed, expected)

Expand Down Expand Up @@ -950,7 +959,7 @@ def test_factorize_values_outside_bins():
fastpath=True,
)
actual = vals[0]
expected = np.array([[-1, -1], [-1, 0], [6, 12], [18, 24], [-1, -1]])
expected = np.array([[-1, -1], [-1, 0], [6, 12], [18, 24], [-1, -1]], np.int64)
assert_equal(expected, actual)


Expand All @@ -967,7 +976,7 @@ def test_multiple_groupers() -> None:
reindex=True,
func="count",
)
expected = np.eye(5, 5, dtype=int)
expected = np.eye(5, 5, dtype=np.int64)
assert_equal(expected, actual)


Expand All @@ -979,38 +988,38 @@ def test_factorize_reindex_sorting_strings():
)

expected = factorize_(**kwargs, reindex=True, sort=True)[0]
assert_equal(expected, [0, 1, 4, 2])
assert_equal(expected, np.array([0, 1, 4, 2], dtype=np.int64))

expected = factorize_(**kwargs, reindex=True, sort=False)[0]
assert_equal(expected, [0, 3, 4, 1])
assert_equal(expected, np.array([0, 3, 4, 1], dtype=np.int64))

expected = factorize_(**kwargs, reindex=False, sort=False)[0]
assert_equal(expected, [0, 1, 2, 3])
assert_equal(expected, np.array([0, 1, 2, 3], dtype=np.int64))

expected = factorize_(**kwargs, reindex=False, sort=True)[0]
assert_equal(expected, [0, 1, 3, 2])
assert_equal(expected, np.array([0, 1, 3, 2], dtype=np.int64))


def test_factorize_reindex_sorting_ints():
kwargs = dict(
by=(np.array([-10, 1, 10, 2, 3, 5]),),
axis=-1,
expected_groups=(np.array([0, 1, 2, 3, 4, 5]),),
expected_groups=(np.array([0, 1, 2, 3, 4, 5], np.int64),),
)

expected = factorize_(**kwargs, reindex=True, sort=True)[0]
assert_equal(expected, [6, 1, 6, 2, 3, 5])
assert_equal(expected, np.array([6, 1, 6, 2, 3, 5], dtype=np.int64))

expected = factorize_(**kwargs, reindex=True, sort=False)[0]
assert_equal(expected, [6, 1, 6, 2, 3, 5])
assert_equal(expected, np.array([6, 1, 6, 2, 3, 5], dtype=np.int64))

kwargs["expected_groups"] = (np.arange(5, -1, -1),)

expected = factorize_(**kwargs, reindex=True, sort=True)[0]
assert_equal(expected, [6, 1, 6, 2, 3, 5])
assert_equal(expected, np.array([6, 1, 6, 2, 3, 5], dtype=np.int64))

expected = factorize_(**kwargs, reindex=True, sort=False)[0]
assert_equal(expected, [6, 4, 6, 3, 2, 0])
assert_equal(expected, np.array([6, 4, 6, 3, 2, 0], dtype=np.int64))


@requires_dask
Expand Down

0 comments on commit 9b01c48

Please sign in to comment.