fixed block diagonal example

SebastianAment · Nov 10, 2021 · 1616239 · 1616239
1 parent 3547600
commit 1616239
Show file tree

Hide file tree

Showing 3 changed files with 23 additions and 25 deletions.
diff --git a/README.md b/README.md
@@ -69,24 +69,22 @@ Right above, we saw that BlockFactorizations supports `Diagonal` elements.
 Further, it also specializes matrix multiplication in case of a [block diagonal matrix](https://en.wikipedia.org/wiki/Block_matrix#Block_diagonal_matrices):
 ```julia
 d = 512
-n, m = 16, 8
+n = 16
 # testing Diagonal BlockFactorization
-A = [Diagonal(randn(d)) for i in 1:n, j in 1:m]
+A = Diagonal([(randn(d, d)) for i in 1:n])
+
 B = BlockFactorization(A)
 
-x = [randn(d) for _ in 1:m]
-y = [zeros(d) for _ in 1:n]
-@btime A*x
-  135.849 μs (321 allocations: 1.29 MiB)
-@btime mul!(y, A, x);
-  135.590 μs (320 allocations: 1.29 MiB)
+# this doesn't work in 1.6.2
+# x = [zeros(d) for _ in 1:n]
+# @btime A*x; 
 
-x = randn(d*m)
+x = randn(d*n)
 y = zeros(d*n)
 @btime B*x;
-  58.371 μs (157 allocations: 81.09 KiB)
+  1.005 ms (30 allocations: 67.45 KiB)
 @btime mul!(y, B, x);
-  50.194 μs (156 allocations: 17.05 KiB)
+  989.163 μs (28 allocations: 3.38 KiB)
 ```
 
 ## Notes

diff --git a/examples/diagonal.jl b/examples/diagonal.jl
@@ -3,17 +3,17 @@ using BlockFactorizations
 using BenchmarkTools
 
 d = 512
-n, m = 16, 8
+n = 16
 # testing Diagonal BlockFactorization
-A = [Diagonal(randn(d)) for i in 1:n, j in 1:m]
+A = Diagonal([(randn(d, d)) for i in 1:n])
 B = BlockFactorization(A)
 
-x = [randn(d) for _ in 1:m]
-y = [zeros(d) for _ in 1:n]
-@btime A*x
-@btime mul!(y, A, x);
+# x = [randn(d) for _ in 1:n]
+# y = [zeros(d) for _ in 1:n]
+# @btime A*x; # this doesn't work in 1.6
+# @btime mul!(y, A, x);
 
-x = randn(d*m)
+x = randn(d*n)
 y = zeros(d*n)
 @btime B*x;
 @btime mul!(y, B, x);
diff --git a/src/block.jl b/src/block.jl
@@ -136,7 +136,7 @@ function LinearAlgebra.mul!(Y::AbstractMatrix, B::BlockFactorization, X::Abstrac
 end
 
 # carries out multiplication for general BlockFactorization
-function blockmul!(y::AbstractVecOfVecOrMat, G::AbstractMatrix{<:AbstractMatOrFac},
+function blockmul!(y::AbstractVecOfVecOrMat, G::AbstractMatrix,
                    x::AbstractVecOfVecOrMat, α::Real = 1, β::Real = 0, strided::Val{false} = Val(false))
     @threads for i in eachindex(y)
         @. y[i] = β * y[i]
@@ -173,7 +173,7 @@ function LinearAlgebra.mul!(Y::AbstractMatrix, B::StridedBlockFactorization, X::
 end
 
 # recursively calls mul!, thereby avoiding memory allocation of block-matrix multiplication
-function blockmul!(y::AbstractVecOfVecOrMat, G::AbstractMatrix{<:AbstractMatOrFac},
+function blockmul!(y::AbstractVecOfVecOrMat, G::AbstractMatrix,
                    x::AbstractVecOfVecOrMat, strided::Val{true}, α::Real = 1, β::Real = 0)
     # pre-allocate temporary storage for matrix elements (needs to be done better, "similar"?)
     Gijs = [G[1, 1] for _ in 1:nthreads()] # IDEA could be nothing if G is a AbstractMatrix{<:Matrix}
@@ -191,25 +191,25 @@ end
 # fallback for generic matrices or factorizations
 # does not overwrite Gij in this case, only for more advanced data structures,
 # that are not already fully allocated
-function evaluate_block!(Gij, G::AbstractMatrix{<:AbstractMatOrFac}, i::Int, j::Int)
+function evaluate_block!(Gij, G::AbstractMatrix, i::Int, j::Int)
     G[i, j]
 end
 
 ################## specialization for diagonal block factorizations ############
 # const DiagonalBlockFactorization{T} = BlockFactorization{T, <:Diagonal}
 # carries out multiplication for Diagonal BlockFactorization
-function blockmul!(y::AbstractVecOfVecOrMat, G::Diagonal{<:AbstractMatOrFac},
-                   x::AbstractVecOfVecOrMat, α::Real = 1, β::Real = 0, strided::Val{false} = Val(false))
+function blockmul!(y::AbstractVecOfVecOrMat, G::Diagonal,
+                   x::AbstractVecOfVecOrMat, strided::Val{false}, α::Real = 1, β::Real = 0)
     @threads for i in eachindex(y)
-        @. y[i] = β * y[i] # TODO: y[i] .*= β ?
+        @. y[i] = β * y[i] # IDEA: y[i] .*= β ?
         Gii = G[i, i] # if it is not strided, we can't pre-allocate memory for blocks
         mul!(y[i], Gii, x[i], α, 1) # woodbury still allocates here because of Diagonal
     end
     return y
 end
 
 # recursively calls mul!, thereby avoiding memory allocation of block-matrix multiplication
-function blockmul!(y::AbstractVecOfVecOrMat, G::Diagonal{<:AbstractMatOrFac},
+function blockmul!(y::AbstractVecOfVecOrMat, G::Diagonal,
                    x::AbstractVecOfVecOrMat, strided::Val{true}, α::Real = 1, β::Real = 0)
     # pre-allocate temporary storage for matrix elements (needs to be done better, "similar"?)
     Giis = [G[1, 1] for _ in 1:nthreads()]