From d0b1bb6f422c0193baf89b7aa08a66813f54fbb1 Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Fri, 11 Oct 2019 11:06:10 -0500 Subject: [PATCH 1/4] Partitions of all arrays use views Previously only `::Vector` was special-cased to use views. The trade-off here is that we lose the ability to predict the concrete eltype -- since arrays can potentially choose to return something different from `vec` or `view`. Generic iterables still collect their elements into a freshly-allocated `Vector`, like before. --- NEWS.md | 2 ++ base/iterators.jl | 31 +++++++++++++++++++++++++------ 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/NEWS.md b/NEWS.md index 7ac3d162a924a..034ea5d85fbae 100644 --- a/NEWS.md +++ b/NEWS.md @@ -42,6 +42,8 @@ Standard library changes * Verbose `display` of `Char` (`text/plain` output) now shows the codepoint value in standard-conforming `"U+XXXX"` format ([#33291]). +* `Iterators.partition` now uses views (or smartly re-computed ranges) for partitions of all `AbstractArray`s ([#33533]). + #### Libdl diff --git a/base/iterators.jl b/base/iterators.jl index 9c813d7e21632..4617a139b9b4d 100644 --- a/base/iterators.jl +++ b/base/iterators.jl @@ -923,37 +923,56 @@ Iterate over a collection `n` elements at a time. # Examples ```jldoctest julia> collect(Iterators.partition([1,2,3,4,5], 2)) -3-element Array{Array{Int64,1},1}: +3-element Array{SubArray{Int64,1,Array{Int64,1},Tuple{UnitRange{Int64}},true},1}: [1, 2] [3, 4] [5] ``` """ -partition(c::T, n::Integer) where {T} = PartitionIterator{T}(c, Int(n)) +function partition(c, n::Integer) + n < 1 && throw(ArgumentError("cannot create partitions of length $n")) + return PartitionIterator(c, Int(n)) +end struct PartitionIterator{T} c::T n::Int end +# Partitions are explicitly a linear indexing operation, so reshape to 1-d immediately +PartitionIterator(A::AbstractArray, n::Int) = PartitionIterator(vec(A), n) +PartitionIterator(v::AbstractVector, n::Int) = PartitionIterator{typeof(v)}(v, n) eltype(::Type{PartitionIterator{T}}) where {T} = Vector{eltype(T)} +# Arrays use a generic `view`-of-a-`vec`, so we cannot exactly predict what we'll get back +eltype(::Type{PartitionIterator{T}}) where {T<:AbstractArray} = AbstractVector{eltype(T)} +# But for some common implementations in Base we know the answer exactly +eltype(::Type{PartitionIterator{T}}) where {T<:Vector} = SubArray{eltype(T), 1, T, Tuple{UnitRange{Int}}, true} + +IteratorEltype(::Type{<:PartitionIterator{T}}) where {T} = IteratorEltype(T) +IteratorEltype(::Type{<:PartitionIterator{T}}) where {T<:AbstractArray} = EltypeUnknown() +IteratorEltype(::Type{<:PartitionIterator{T}}) where {T<:Vector} = IteratorEltype(T) + partition_iteratorsize(::HasShape) = HasLength() partition_iteratorsize(isz) = isz function IteratorSize(::Type{PartitionIterator{T}}) where {T} partition_iteratorsize(IteratorSize(T)) end -IteratorEltype(::Type{<:PartitionIterator{T}}) where {T} = IteratorEltype(T) - function length(itr::PartitionIterator) l = length(itr.c) return div(l, itr.n) + ((mod(l, itr.n) > 0) ? 1 : 0) end -function iterate(itr::PartitionIterator{<:Vector}, state=1) +function iterate(itr::PartitionIterator{<:AbstractRange}, state=1) + state > length(itr.c) && return nothing + r = min(state + itr.n - 1, length(itr.c)) + return @inbounds itr.c[state:r], r + 1 +end + +function iterate(itr::PartitionIterator{<:AbstractArray}, state=1) state > length(itr.c) && return nothing r = min(state + itr.n - 1, length(itr.c)) - return view(itr.c, state:r), r + 1 + return @inbounds view(itr.c, state:r), r + 1 end struct IterationCutShort; end From 9f24a06ccd5f47df902354919fd7440ac161ea1d Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Fri, 11 Oct 2019 11:10:10 -0500 Subject: [PATCH 2/4] Optimize partitioned views of CartesianIndices And also recompute ranges instead of using views for partitions of ranges. Since `Iterators.partition` is so handy for dividing up iteration spaces, it makes sense to optimize this as much as possible. While it is enherently a "linear" operation, it is a batched linear operation that allows us to skip doing all the effective ind2sub work on every single iteration. --- base/multidimensional.jl | 47 +++++++++++++++++++++++- test/iterators.jl | 79 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 124 insertions(+), 2 deletions(-) diff --git a/base/multidimensional.jl b/base/multidimensional.jl index f3cebeec249ff..1978f45da44e3 100644 --- a/base/multidimensional.jl +++ b/base/multidimensional.jl @@ -8,8 +8,9 @@ module IteratorsMD import .Base: +, -, *, (:) import .Base: simd_outer_range, simd_inner_length, simd_index - using .Base: IndexLinear, IndexCartesian, AbstractCartesianIndex, fill_to_length, tail - using .Base.Iterators: Reverse + using .Base: IndexLinear, IndexCartesian, AbstractCartesianIndex, fill_to_length, tail, + ReshapedArray, ReshapedArrayLF, OneTo + using .Base.Iterators: Reverse, PartitionIterator export CartesianIndex, CartesianIndices @@ -463,6 +464,48 @@ module IteratorsMD iterate(iter::Reverse{<:CartesianIndices{0}}, state=false) = state ? nothing : (CartesianIndex(), true) Base.LinearIndices(inds::CartesianIndices{N,R}) where {N,R} = LinearIndices{N,R}(inds.indices) + + # Views of reshaped CartesianIndices are used for partitions — ensure these are fast + const CartesianPartition{T<:CartesianIndex, P<:CartesianIndices, R<:ReshapedArray{T,1,P}} = SubArray{T,1,R,Tuple{UnitRange{Int}},false} + eltype(::Type{PartitionIterator{T}}) where {T<:ReshapedArrayLF} = SubArray{eltype(T), 1, T, Tuple{UnitRange{Int}}, true} + eltype(::Type{PartitionIterator{T}}) where {T<:ReshapedArray} = SubArray{eltype(T), 1, T, Tuple{UnitRange{Int}}, false} + Iterators.IteratorEltype(::Type{<:PartitionIterator{T}}) where {T<:ReshapedArray} = Iterators.IteratorEltype(T) + + eltype(::Type{PartitionIterator{T}}) where {T<:OneTo} = UnitRange{eltype(T)} + eltype(::Type{PartitionIterator{T}}) where {T<:Union{UnitRange, StepRange, StepRangeLen, LinRange}} = T + Iterators.IteratorEltype(::Type{<:PartitionIterator{T}}) where {T<:Union{OneTo, UnitRange, StepRange, StepRangeLen, LinRange}} = Iterators.IteratorEltype(T) + + + @inline function iterate(iter::CartesianPartition) + isempty(iter) && return nothing + f = first(iter) + return (f, (f, 1)) + end + @inline function iterate(iter::CartesianPartition, (state, n)) + n >= length(iter) && return nothing + I = IteratorsMD.inc(state.I, first(iter.parent.parent).I, last(iter.parent.parent).I) + return I, (I, n+1) + end + + simd_outer_range(iter::CartesianPartition{CartesianIndex{2}}) = @inbounds CartesianIndices((iter[1][2]:iter[end][2],)) + @inline function simd_outer_range(iter::CartesianPartition) + t = tail(iter.parent.parent.indices) + ci = CartesianIndices(t) + li = LinearIndices(t) + return @inbounds view(ci, li[tail(iter[1].I)...]:li[tail(iter[end].I)...]) + end + @inline function simd_inner_length(iter::CartesianPartition, I::CartesianIndex) + inner = iter.parent.parent.indices[1] + @inbounds fi = iter[1].I + @inbounds li = iter[end].I + inner_start = I.I == tail(fi) ? fi[1] : first(inner) + inner_end = I.I == tail(li) ? li[1] : last(inner) + return inner_end - inner_start + 1 + end + @inline function simd_index(iter::CartesianPartition, Ilast::CartesianIndex, I1::Int) + f = @inbounds iter[1] + CartesianIndex((I1+first(iter.parent.parent.indices[1])+(Ilast.I==tail(f.I))*(f[1]-1), Ilast.I...)) + end end # IteratorsMD diff --git a/test/iterators.jl b/test/iterators.jl index eabe5e0905b98..4156942daefda 100644 --- a/test/iterators.jl +++ b/test/iterators.jl @@ -410,6 +410,85 @@ for n in [5,6] [(1,1),(2,2),(3,3),(4,4),(5,5)] end +function iterate_length(iter) + n=0 + for i in iter + n += 1 + end + return n +end +function simd_iterate_length(iter) + n=0 + @simd for i in iter + n += 1 + end + return n +end +function simd_trip_count(iter) + return sum(Base.SimdLoop.simd_inner_length(iter, i) for i in Base.SimdLoop.simd_outer_range(iter)) +end +function iterate_elements(iter) + vals = Vector{eltype(iter)}(undef, length(iter)) + i = 1 + for v in iter + @inbounds vals[i] = v + i += 1 + end + return vals +end +function simd_iterate_elements(iter) + vals = Vector{eltype(iter)}(undef, length(iter)) + i = 1 + @simd for v in iter + @inbounds vals[i] = v + i += 1 + end + return vals +end +function index_elements(iter) + vals = Vector{eltype(iter)}(undef, length(iter)) + i = 1 + for j in eachindex(iter) + @inbounds vals[i] = iter[j] + i += 1 + end + return vals +end + +@testset "CartesianPartition optimizations" for dims in ((1,), (64,), (101,), + (1,1), (8,8), (11, 13), + (1,1,1), (8, 4, 2), (11, 13, 17)), + part in (1, 7, 8, 11, 63, 64, 65, 142, 143, 144) + P = partition(CartesianIndices(dims), part) + for I in P + @test length(I) == iterate_length(I) == simd_iterate_length(I) == simd_trip_count(I) + @test collect(I) == iterate_elements(I) == simd_iterate_elements(I) == index_elements(I) + end + @test all(Base.splat(==), zip(Iterators.flatten(map(collect, P)), CartesianIndices(dims))) +end +@testset "empty/invalid partitions" begin + @test_throws ArgumentError partition(1:10, 0) + @test_throws ArgumentError partition(1:10, -1) + @test_throws ArgumentError partition(1:0, 0) + @test_throws ArgumentError partition(1:0, -1) + @test isempty(partition(1:0, 1)) + @test isempty(partition(CartesianIndices((0,1)), 1)) +end +@testset "exact partition eltypes" for a in (Base.OneTo(24), 1:24, 1:1:24, LinRange(1,10,24), .1:.1:2.4, Vector(1:24), + CartesianIndices((4, 6)), Dict((1:24) .=> (1:24))) + P = partition(a, 2) + @test eltype(P) === typeof(first(P)) + @test Iterators.IteratorEltype(P) == Iterators.HasEltype() + if a isa AbstractArray + P = partition(vec(a), 2) + @test eltype(P) === typeof(first(P)) + P = partition(reshape(a, 6, 4), 2) + @test eltype(P) === typeof(first(P)) + P = partition(reshape(a, 2, 3, 4), 2) + @test eltype(P) === typeof(first(P)) + end +end + @test join(map(x->string(x...), partition("Hello World!", 5)), "|") == "Hello| Worl|d!" From 9e10015337204c0b216011cad583d002574428ca Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Fri, 11 Oct 2019 11:10:43 -0500 Subject: [PATCH 3/4] Simplify, simdify, and speed up BitArray broadcasts with partitions --- base/broadcast.jl | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/base/broadcast.jl b/base/broadcast.jl index 16c0653be515d..ebce0afaba6c1 100644 --- a/base/broadcast.jl +++ b/base/broadcast.jl @@ -919,20 +919,19 @@ end length(dest) < 256 && return invoke(copyto!, Tuple{AbstractArray, Broadcasted{Nothing}}, dest, bc) tmp = Vector{Bool}(undef, bitcache_size) destc = dest.chunks - ind = cind = 1 + cind = 1 bc′ = preprocess(dest, bc) - @simd for I in eachindex(bc′) - @inbounds tmp[ind] = bc′[I] - ind += 1 - if ind > bitcache_size - dumpbitcache(destc, cind, tmp) - cind += bitcache_chunks - ind = 1 + for P in Iterators.partition(eachindex(bc′), bitcache_size) + ind = 1 + @simd for I in P + @inbounds tmp[ind] = bc′[I] + ind += 1 + end + @simd for i in ind:bitcache_size + @inbounds tmp[i] = false end - end - if ind > 1 - @inbounds tmp[ind:bitcache_size] .= false dumpbitcache(destc, cind, tmp) + cind += bitcache_chunks end return dest end From e472b14f50888ed03729685027b9817b5fbda2ab Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Thu, 31 Oct 2019 15:05:10 -0500 Subject: [PATCH 4/4] NFC: style changes per review shorter lines, more spaces, and comments (because I had already forgotten how this worked myself) --- base/multidimensional.jl | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/base/multidimensional.jl b/base/multidimensional.jl index 1978f45da44e3..3745186f5cc4f 100644 --- a/base/multidimensional.jl +++ b/base/multidimensional.jl @@ -487,13 +487,21 @@ module IteratorsMD return I, (I, n+1) end - simd_outer_range(iter::CartesianPartition{CartesianIndex{2}}) = @inbounds CartesianIndices((iter[1][2]:iter[end][2],)) @inline function simd_outer_range(iter::CartesianPartition) + # In general, the Cartesian Partition might start and stop in the middle of the outer + # dimensions — thus the outer range of a CartesianPartition is itself a + # CartesianPartition. t = tail(iter.parent.parent.indices) ci = CartesianIndices(t) li = LinearIndices(t) return @inbounds view(ci, li[tail(iter[1].I)...]:li[tail(iter[end].I)...]) end + function simd_outer_range(iter::CartesianPartition{CartesianIndex{2}}) + # But for two-dimensional Partitions the above is just a simple one-dimensional range + # over the second dimension; we don't need to worry about non-rectangular staggers in + # higher dimensions. + return @inbounds CartesianIndices((iter[1][2]:iter[end][2],)) + end @inline function simd_inner_length(iter::CartesianPartition, I::CartesianIndex) inner = iter.parent.parent.indices[1] @inbounds fi = iter[1].I @@ -503,8 +511,12 @@ module IteratorsMD return inner_end - inner_start + 1 end @inline function simd_index(iter::CartesianPartition, Ilast::CartesianIndex, I1::Int) + # I1 is the 0-based distance from the first dimension's offest + offset = first(iter.parent.parent.indices[1]) # (this is 1 for 1-based arrays) + # In the first column we need to also add in the iter's starting point (branchlessly) f = @inbounds iter[1] - CartesianIndex((I1+first(iter.parent.parent.indices[1])+(Ilast.I==tail(f.I))*(f[1]-1), Ilast.I...)) + startoffset = (Ilast.I == tail(f.I))*(f[1] - 1) + CartesianIndex((I1 + offset + startoffset, Ilast.I...)) end end # IteratorsMD