Skip to content

Commit

Permalink
fixed block diagonal example
Browse files Browse the repository at this point in the history
  • Loading branch information
SebastianAment committed Nov 10, 2021
1 parent 3547600 commit 1616239
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 25 deletions.
20 changes: 9 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,24 +69,22 @@ Right above, we saw that BlockFactorizations supports `Diagonal` elements.
Further, it also specializes matrix multiplication in case of a [block diagonal matrix](https://en.wikipedia.org/wiki/Block_matrix#Block_diagonal_matrices):
```julia
d = 512
n, m = 16, 8
n = 16
# testing Diagonal BlockFactorization
A = [Diagonal(randn(d)) for i in 1:n, j in 1:m]
A = Diagonal([(randn(d, d)) for i in 1:n])

B = BlockFactorization(A)

x = [randn(d) for _ in 1:m]
y = [zeros(d) for _ in 1:n]
@btime A*x
135.849 μs (321 allocations: 1.29 MiB)
@btime mul!(y, A, x);
135.590 μs (320 allocations: 1.29 MiB)
# this doesn't work in 1.6.2
# x = [zeros(d) for _ in 1:n]
# @btime A*x;

x = randn(d*m)
x = randn(d*n)
y = zeros(d*n)
@btime B*x;
58.371 μs (157 allocations: 81.09 KiB)
1.005 ms (30 allocations: 67.45 KiB)
@btime mul!(y, B, x);
50.194 μs (156 allocations: 17.05 KiB)
989.163 μs (28 allocations: 3.38 KiB)
```

## Notes
Expand Down
14 changes: 7 additions & 7 deletions examples/diagonal.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,17 @@ using BlockFactorizations
using BenchmarkTools

d = 512
n, m = 16, 8
n = 16
# testing Diagonal BlockFactorization
A = [Diagonal(randn(d)) for i in 1:n, j in 1:m]
A = Diagonal([(randn(d, d)) for i in 1:n])
B = BlockFactorization(A)

x = [randn(d) for _ in 1:m]
y = [zeros(d) for _ in 1:n]
@btime A*x
@btime mul!(y, A, x);
# x = [randn(d) for _ in 1:n]
# y = [zeros(d) for _ in 1:n]
# @btime A*x; # this doesn't work in 1.6
# @btime mul!(y, A, x);

x = randn(d*m)
x = randn(d*n)
y = zeros(d*n)
@btime B*x;
@btime mul!(y, B, x);
14 changes: 7 additions & 7 deletions src/block.jl
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ function LinearAlgebra.mul!(Y::AbstractMatrix, B::BlockFactorization, X::Abstrac
end

# carries out multiplication for general BlockFactorization
function blockmul!(y::AbstractVecOfVecOrMat, G::AbstractMatrix{<:AbstractMatOrFac},
function blockmul!(y::AbstractVecOfVecOrMat, G::AbstractMatrix,
x::AbstractVecOfVecOrMat, α::Real = 1, β::Real = 0, strided::Val{false} = Val(false))
@threads for i in eachindex(y)
@. y[i] = β * y[i]
Expand Down Expand Up @@ -173,7 +173,7 @@ function LinearAlgebra.mul!(Y::AbstractMatrix, B::StridedBlockFactorization, X::
end

# recursively calls mul!, thereby avoiding memory allocation of block-matrix multiplication
function blockmul!(y::AbstractVecOfVecOrMat, G::AbstractMatrix{<:AbstractMatOrFac},
function blockmul!(y::AbstractVecOfVecOrMat, G::AbstractMatrix,
x::AbstractVecOfVecOrMat, strided::Val{true}, α::Real = 1, β::Real = 0)
# pre-allocate temporary storage for matrix elements (needs to be done better, "similar"?)
Gijs = [G[1, 1] for _ in 1:nthreads()] # IDEA could be nothing if G is a AbstractMatrix{<:Matrix}
Expand All @@ -191,25 +191,25 @@ end
# fallback for generic matrices or factorizations
# does not overwrite Gij in this case, only for more advanced data structures,
# that are not already fully allocated
function evaluate_block!(Gij, G::AbstractMatrix{<:AbstractMatOrFac}, i::Int, j::Int)
function evaluate_block!(Gij, G::AbstractMatrix, i::Int, j::Int)
G[i, j]
end

################## specialization for diagonal block factorizations ############
# const DiagonalBlockFactorization{T} = BlockFactorization{T, <:Diagonal}
# carries out multiplication for Diagonal BlockFactorization
function blockmul!(y::AbstractVecOfVecOrMat, G::Diagonal{<:AbstractMatOrFac},
x::AbstractVecOfVecOrMat, α::Real = 1, β::Real = 0, strided::Val{false} = Val(false))
function blockmul!(y::AbstractVecOfVecOrMat, G::Diagonal,
x::AbstractVecOfVecOrMat, strided::Val{false}, α::Real = 1, β::Real = 0)
@threads for i in eachindex(y)
@. y[i] = β * y[i] # TODO: y[i] .*= β ?
@. y[i] = β * y[i] # IDEA: y[i] .*= β ?
Gii = G[i, i] # if it is not strided, we can't pre-allocate memory for blocks
mul!(y[i], Gii, x[i], α, 1) # woodbury still allocates here because of Diagonal
end
return y
end

# recursively calls mul!, thereby avoiding memory allocation of block-matrix multiplication
function blockmul!(y::AbstractVecOfVecOrMat, G::Diagonal{<:AbstractMatOrFac},
function blockmul!(y::AbstractVecOfVecOrMat, G::Diagonal,
x::AbstractVecOfVecOrMat, strided::Val{true}, α::Real = 1, β::Real = 0)
# pre-allocate temporary storage for matrix elements (needs to be done better, "similar"?)
Giis = [G[1, 1] for _ in 1:nthreads()]
Expand Down

0 comments on commit 1616239

Please sign in to comment.