Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make Iterators.partition split arrays into views for faster and easier parallelism #33533

Merged
merged 5 commits into from
Nov 1, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ Standard library changes

* Verbose `display` of `Char` (`text/plain` output) now shows the codepoint value in standard-conforming `"U+XXXX"` format ([#33291]).

* `Iterators.partition` now uses views (or smartly re-computed ranges) for partitions of all `AbstractArray`s ([#33533]).

* Sets are now displayed less compactly in the REPL, as a column of elements, like vectors
and dictionaries ([#33300]).

Expand Down
21 changes: 10 additions & 11 deletions base/broadcast.jl
Original file line number Diff line number Diff line change
Expand Up @@ -919,20 +919,19 @@ end
length(dest) < 256 && return invoke(copyto!, Tuple{AbstractArray, Broadcasted{Nothing}}, dest, bc)
KristofferC marked this conversation as resolved.
Show resolved Hide resolved
tmp = Vector{Bool}(undef, bitcache_size)
destc = dest.chunks
ind = cind = 1
cind = 1
bc′ = preprocess(dest, bc)
@simd for I in eachindex(bc′)
@inbounds tmp[ind] = bc′[I]
ind += 1
if ind > bitcache_size
dumpbitcache(destc, cind, tmp)
cind += bitcache_chunks
ind = 1
for P in Iterators.partition(eachindex(bc′), bitcache_size)
ind = 1
@simd for I in P
@inbounds tmp[ind] = bc′[I]
ind += 1
end
@simd for i in ind:bitcache_size
@inbounds tmp[i] = false
end
end
if ind > 1
@inbounds tmp[ind:bitcache_size] .= false
dumpbitcache(destc, cind, tmp)
cind += bitcache_chunks
end
return dest
end
Expand Down
31 changes: 25 additions & 6 deletions base/iterators.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1022,37 +1022,56 @@ Iterate over a collection `n` elements at a time.
# Examples
```jldoctest
julia> collect(Iterators.partition([1,2,3,4,5], 2))
3-element Array{Array{Int64,1},1}:
3-element Array{SubArray{Int64,1,Array{Int64,1},Tuple{UnitRange{Int64}},true},1}:
[1, 2]
[3, 4]
[5]
```
"""
partition(c::T, n::Integer) where {T} = PartitionIterator{T}(c, Int(n))
function partition(c, n::Integer)
n < 1 && throw(ArgumentError("cannot create partitions of length $n"))
return PartitionIterator(c, Int(n))
end

struct PartitionIterator{T}
c::T
n::Int
end
# Partitions are explicitly a linear indexing operation, so reshape to 1-d immediately
PartitionIterator(A::AbstractArray, n::Int) = PartitionIterator(vec(A), n)
PartitionIterator(v::AbstractVector, n::Int) = PartitionIterator{typeof(v)}(v, n)

eltype(::Type{PartitionIterator{T}}) where {T} = Vector{eltype(T)}
# Arrays use a generic `view`-of-a-`vec`, so we cannot exactly predict what we'll get back
eltype(::Type{PartitionIterator{T}}) where {T<:AbstractArray} = AbstractVector{eltype(T)}
# But for some common implementations in Base we know the answer exactly
eltype(::Type{PartitionIterator{T}}) where {T<:Vector} = SubArray{eltype(T), 1, T, Tuple{UnitRange{Int}}, true}

IteratorEltype(::Type{<:PartitionIterator{T}}) where {T} = IteratorEltype(T)
IteratorEltype(::Type{<:PartitionIterator{T}}) where {T<:AbstractArray} = EltypeUnknown()
IteratorEltype(::Type{<:PartitionIterator{T}}) where {T<:Vector} = IteratorEltype(T)

partition_iteratorsize(::HasShape) = HasLength()
partition_iteratorsize(isz) = isz
function IteratorSize(::Type{PartitionIterator{T}}) where {T}
partition_iteratorsize(IteratorSize(T))
end

IteratorEltype(::Type{<:PartitionIterator{T}}) where {T} = IteratorEltype(T)

function length(itr::PartitionIterator)
l = length(itr.c)
return div(l, itr.n) + ((mod(l, itr.n) > 0) ? 1 : 0)
end

function iterate(itr::PartitionIterator{<:Vector}, state=1)
function iterate(itr::PartitionIterator{<:AbstractRange}, state=1)
state > length(itr.c) && return nothing
r = min(state + itr.n - 1, length(itr.c))
return @inbounds itr.c[state:r], r + 1
Copy link
Sponsor Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OT: Are we missing an abstraction here: should we define that view(::AbstractRange, slice::AbstractRange) isa AbstractRange, or would that confuse consumes or view?

Copy link
Sponsor Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's #26872 — looks like it got stalled because it was proposed before we really had a handle on minor changes.

end

function iterate(itr::PartitionIterator{<:AbstractArray}, state=1)
state > length(itr.c) && return nothing
r = min(state + itr.n - 1, length(itr.c))
return view(itr.c, state:r), r + 1
return @inbounds view(itr.c, state:r), r + 1
end

struct IterationCutShort; end
Expand Down
59 changes: 57 additions & 2 deletions base/multidimensional.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@ module IteratorsMD

import .Base: +, -, *, (:)
import .Base: simd_outer_range, simd_inner_length, simd_index
using .Base: IndexLinear, IndexCartesian, AbstractCartesianIndex, fill_to_length, tail
using .Base.Iterators: Reverse
using .Base: IndexLinear, IndexCartesian, AbstractCartesianIndex, fill_to_length, tail,
ReshapedArray, ReshapedArrayLF, OneTo
using .Base.Iterators: Reverse, PartitionIterator

export CartesianIndex, CartesianIndices

Expand Down Expand Up @@ -463,6 +464,60 @@ module IteratorsMD
iterate(iter::Reverse{<:CartesianIndices{0}}, state=false) = state ? nothing : (CartesianIndex(), true)

Base.LinearIndices(inds::CartesianIndices{N,R}) where {N,R} = LinearIndices{N,R}(inds.indices)

# Views of reshaped CartesianIndices are used for partitions — ensure these are fast
const CartesianPartition{T<:CartesianIndex, P<:CartesianIndices, R<:ReshapedArray{T,1,P}} = SubArray{T,1,R,Tuple{UnitRange{Int}},false}
eltype(::Type{PartitionIterator{T}}) where {T<:ReshapedArrayLF} = SubArray{eltype(T), 1, T, Tuple{UnitRange{Int}}, true}
eltype(::Type{PartitionIterator{T}}) where {T<:ReshapedArray} = SubArray{eltype(T), 1, T, Tuple{UnitRange{Int}}, false}
Iterators.IteratorEltype(::Type{<:PartitionIterator{T}}) where {T<:ReshapedArray} = Iterators.IteratorEltype(T)

eltype(::Type{PartitionIterator{T}}) where {T<:OneTo} = UnitRange{eltype(T)}
eltype(::Type{PartitionIterator{T}}) where {T<:Union{UnitRange, StepRange, StepRangeLen, LinRange}} = T
Iterators.IteratorEltype(::Type{<:PartitionIterator{T}}) where {T<:Union{OneTo, UnitRange, StepRange, StepRangeLen, LinRange}} = Iterators.IteratorEltype(T)


@inline function iterate(iter::CartesianPartition)
isempty(iter) && return nothing
f = first(iter)
return (f, (f, 1))
end
@inline function iterate(iter::CartesianPartition, (state, n))
n >= length(iter) && return nothing
I = IteratorsMD.inc(state.I, first(iter.parent.parent).I, last(iter.parent.parent).I)
return I, (I, n+1)
end

@inline function simd_outer_range(iter::CartesianPartition)
# In general, the Cartesian Partition might start and stop in the middle of the outer
# dimensions — thus the outer range of a CartesianPartition is itself a
# CartesianPartition.
t = tail(iter.parent.parent.indices)
ci = CartesianIndices(t)
li = LinearIndices(t)
return @inbounds view(ci, li[tail(iter[1].I)...]:li[tail(iter[end].I)...])
end
function simd_outer_range(iter::CartesianPartition{CartesianIndex{2}})
# But for two-dimensional Partitions the above is just a simple one-dimensional range
# over the second dimension; we don't need to worry about non-rectangular staggers in
# higher dimensions.
return @inbounds CartesianIndices((iter[1][2]:iter[end][2],))
end
@inline function simd_inner_length(iter::CartesianPartition, I::CartesianIndex)
inner = iter.parent.parent.indices[1]
@inbounds fi = iter[1].I
@inbounds li = iter[end].I
inner_start = I.I == tail(fi) ? fi[1] : first(inner)
inner_end = I.I == tail(li) ? li[1] : last(inner)
return inner_end - inner_start + 1
end
@inline function simd_index(iter::CartesianPartition, Ilast::CartesianIndex, I1::Int)
# I1 is the 0-based distance from the first dimension's offest
offset = first(iter.parent.parent.indices[1]) # (this is 1 for 1-based arrays)
# In the first column we need to also add in the iter's starting point (branchlessly)
f = @inbounds iter[1]
startoffset = (Ilast.I == tail(f.I))*(f[1] - 1)
CartesianIndex((I1 + offset + startoffset, Ilast.I...))
end
end # IteratorsMD


Expand Down
79 changes: 79 additions & 0 deletions test/iterators.jl
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,85 @@ for n in [5,6]
[(1,1),(2,2),(3,3),(4,4),(5,5)]
end

function iterate_length(iter)
n=0
for i in iter
n += 1
end
return n
end
function simd_iterate_length(iter)
n=0
@simd for i in iter
n += 1
end
return n
end
function simd_trip_count(iter)
return sum(Base.SimdLoop.simd_inner_length(iter, i) for i in Base.SimdLoop.simd_outer_range(iter))
end
function iterate_elements(iter)
vals = Vector{eltype(iter)}(undef, length(iter))
i = 1
for v in iter
@inbounds vals[i] = v
i += 1
end
return vals
end
function simd_iterate_elements(iter)
vals = Vector{eltype(iter)}(undef, length(iter))
i = 1
@simd for v in iter
@inbounds vals[i] = v
i += 1
end
return vals
end
function index_elements(iter)
vals = Vector{eltype(iter)}(undef, length(iter))
i = 1
for j in eachindex(iter)
@inbounds vals[i] = iter[j]
i += 1
end
return vals
end

@testset "CartesianPartition optimizations" for dims in ((1,), (64,), (101,),
(1,1), (8,8), (11, 13),
(1,1,1), (8, 4, 2), (11, 13, 17)),
part in (1, 7, 8, 11, 63, 64, 65, 142, 143, 144)
P = partition(CartesianIndices(dims), part)
for I in P
@test length(I) == iterate_length(I) == simd_iterate_length(I) == simd_trip_count(I)
@test collect(I) == iterate_elements(I) == simd_iterate_elements(I) == index_elements(I)
end
@test all(Base.splat(==), zip(Iterators.flatten(map(collect, P)), CartesianIndices(dims)))
end
@testset "empty/invalid partitions" begin
@test_throws ArgumentError partition(1:10, 0)
@test_throws ArgumentError partition(1:10, -1)
@test_throws ArgumentError partition(1:0, 0)
@test_throws ArgumentError partition(1:0, -1)
@test isempty(partition(1:0, 1))
@test isempty(partition(CartesianIndices((0,1)), 1))
end
@testset "exact partition eltypes" for a in (Base.OneTo(24), 1:24, 1:1:24, LinRange(1,10,24), .1:.1:2.4, Vector(1:24),
CartesianIndices((4, 6)), Dict((1:24) .=> (1:24)))
P = partition(a, 2)
@test eltype(P) === typeof(first(P))
@test Iterators.IteratorEltype(P) == Iterators.HasEltype()
if a isa AbstractArray
P = partition(vec(a), 2)
@test eltype(P) === typeof(first(P))
P = partition(reshape(a, 6, 4), 2)
@test eltype(P) === typeof(first(P))
P = partition(reshape(a, 2, 3, 4), 2)
@test eltype(P) === typeof(first(P))
end
end

@test join(map(x->string(x...), partition("Hello World!", 5)), "|") ==
"Hello| Worl|d!"

Expand Down