diff --git a/src/Solvers/homogeneous_self_dual/qrchol.jl b/src/Solvers/homogeneous_self_dual/qrchol.jl
index b767e100f..891fb8d47 100644
--- a/src/Solvers/homogeneous_self_dual/qrchol.jl
+++ b/src/Solvers/homogeneous_self_dual/qrchol.jl
@@ -41,10 +41,9 @@ mutable struct QRCholCombinedHSDSystemSolver{T <: HypReal} <: CombinedHSDSystemS
     GQ2
     QpbxGHbz
     Q1pbxGHbz
-    Q2pbxGHbz
+    Q2div
     GQ1x
     HGQ1x
-    Q2div
     HGQ2
     Q2GHGQ2
     Gxi
@@ -118,10 +117,9 @@ mutable struct QRCholCombinedHSDSystemSolver{T <: HypReal} <: CombinedHSDSystemS
         end
         system_solver.QpbxGHbz = Matrix{T}(undef, n, 3)
         system_solver.Q1pbxGHbz = view(system_solver.QpbxGHbz, 1:p, :)
-        system_solver.Q2pbxGHbz = view(system_solver.QpbxGHbz, (p + 1):n, :)
+        system_solver.Q2div = view(system_solver.QpbxGHbz, (p + 1):n, :)
         system_solver.GQ1x = Matrix{T}(undef, q, 3)
         system_solver.HGQ1x = similar(system_solver.GQ1x)
-        system_solver.Q2div = Matrix{T}(undef, nmp, 3)
         system_solver.Gxi = similar(system_solver.GQ1x)
         system_solver.HGxi = similar(system_solver.Gxi)
 
@@ -173,10 +171,9 @@ function get_combined_directions(solver::HSDSolver{T}, system_solver::QRCholComb
     GQ2 = system_solver.GQ2
     QpbxGHbz = system_solver.QpbxGHbz
     Q1pbxGHbz = system_solver.Q1pbxGHbz
-    Q2pbxGHbz = system_solver.Q2pbxGHbz
+    Q2div = system_solver.Q2div
     GQ1x = system_solver.GQ1x
     HGQ1x = system_solver.HGQ1x
-    Q2div = system_solver.Q2div
     HGQ2 = system_solver.HGQ2
     Q2GHGQ2 = system_solver.Q2GHGQ2
     Gxi = system_solver.Gxi
@@ -233,15 +230,14 @@ function get_combined_directions(solver::HSDSolver{T}, system_solver::QRCholComb
 
     ldiv!(model.Ap_R', yi)
 
-    mul!(QpbxGHbz, model.G', zi)
-    @. QpbxGHbz += xi
+    copyto!(QpbxGHbz, xi)
+    mul!(QpbxGHbz, model.G', zi, true, true)
     lmul!(model.Ap_Q', QpbxGHbz)
 
     if !iszero(size(Q2div, 1))
         mul!(GQ1x, GQ1, yi)
         block_hessian_product!(HGQ1x_k, GQ1x_k)
-        mul!(Q2div, GQ2', HGQ1x)
-        @. Q2div = Q2pbxGHbz - Q2div
+        mul!(Q2div, GQ2', HGQ1x, -1, true)
 
         block_hessian_product!(HGQ2_k, GQ2_k)
         mul!(Q2GHGQ2, GQ2', HGQ2)
@@ -280,8 +276,8 @@ function get_combined_directions(solver::HSDSolver{T}, system_solver::QRCholComb
         end
     end
 
-    @. xi1 = yi
-    @. xi2 = Q2div
+    copyto!(xi1, yi)
+    copyto!(xi2, Q2div)
     lmul!(model.Ap_Q, xi)
 
     mul!(Gxi, model.G, xi)
@@ -290,8 +286,8 @@ function get_combined_directions(solver::HSDSolver{T}, system_solver::QRCholComb
     @. zi = HGxi - zi
 
     if !iszero(length(yi))
-        mul!(yi, GQ1', HGxi)
-        @. yi = Q1pbxGHbz - yi
+        copyto!(yi, Q1pbxGHbz)
+        mul!(yi, GQ1', HGxi, -1, true)
         ldiv!(model.Ap_R, yi)
     end
 
@@ -303,8 +299,8 @@ function get_combined_directions(solver::HSDSolver{T}, system_solver::QRCholComb
         @. x += tau_sol * x1
         @. y += tau_sol * y1
         @. z += tau_sol * z1
-        mul!(s, model.G, x)
-        @. s = -s + tau_sol * model.h
+        copyto!(s, model.h)
+        mul!(s, model.G, x, -1, tau_sol)
         kap_sol = -dot(model.c, x) - dot(model.b, y) - dot(model.h, z) - tau_rhs
         return (tau_sol, kap_sol)
     end
diff --git a/src/Solvers/homogeneous_self_dual/solver.jl b/src/Solvers/homogeneous_self_dual/solver.jl
index f8f9b7183..d7763eeb9 100644
--- a/src/Solvers/homogeneous_self_dual/solver.jl
+++ b/src/Solvers/homogeneous_self_dual/solver.jl
@@ -198,10 +198,11 @@ function calc_residual(solver::HSDSolver{T}) where {T <: HypReal}
     # x_residual = -A'*y - G'*z - c*tau
     x_residual = solver.x_residual
     mul!(x_residual, model.G', point.z)
-    x_residual .= -model.A' * point.y - x_residual # TODO remove allocs
+    mul!(x_residual, model.A', point.y, true, true)
     solver.x_norm_res_t = norm(x_residual)
-    @. x_residual -= model.c * solver.tau
+    @. x_residual += model.c * solver.tau
     solver.x_norm_res = norm(x_residual) / solver.tau
+    @. x_residual *= -1
 
     # y_residual = A*x - b*tau
     y_residual = solver.y_residual
diff --git a/src/linearalgebra.jl b/src/linearalgebra.jl
index 64fdd2af4..1c628fe12 100644
--- a/src/linearalgebra.jl
+++ b/src/linearalgebra.jl
@@ -52,61 +52,25 @@ size(A::HypBlockMatrix, d) = (d == 1 ? A.nrows : A.ncols)
 
 adjoint(A::HypBlockMatrix{T}) where {T <: HypReal} = HypBlockMatrix{T}(A.ncols, A.nrows, adjoint.(A.blocks), A.cols, A.rows)
 
-function mul!(y::AbstractVector{T}, A::HypBlockMatrix{T}, x::AbstractVector{T}) where {T <: HypReal}
+# TODO try to speed up by using better logic for alpha and beta (see Julia's 5-arg mul! code)
+# TODO check that this eliminates allocs when using IterativeSolvers methods, and that it is as fast as possible
+function mul!(y::AbstractVector{T}, A::HypBlockMatrix{T}, x::AbstractVector{T}, alpha::Number, beta::Number) where {T <: HypReal}
     @assert size(x, 1) == A.ncols
     @assert size(y, 1) == A.nrows
     @assert size(x, 2) == size(y, 2)
 
-    y .= zero(T)
-
+    @. y *= beta
     for (b, r, c) in zip(A.blocks, A.rows, A.cols)
         if isempty(r) || isempty(c)
             continue
         end
-        # println()
-        # if b isa UniformScaling
-        #     println("I")
-        # else
-        #     println(size(b))
-        # end
-        # println(r, " , ", c)
         xk = view(x, c)
         yk = view(y, r)
-        yk .+= b * xk # TODO need inplace mul+add
-        # mul!(yk, b, xk, α = one(T), β = one(T))
+        mul!(yk, b, xk, alpha, true)
     end
-
     return y
 end
 
-# function mul!(y::AbstractVector{T}, A::Adjoint{T, HypBlockMatrix{T}}, x::AbstractVector{T}) where {T <: HypReal}
-#     @assert size(x, 1) == A.ncols
-#     @assert size(y, 1) == A.nrows
-#     @assert size(x, 2) == size(y, 2)
-#
-#     y .= zero(T)
-#
-#     for (b, r, c) in zip(A.blocks, A.rows, A.cols)
-#         if isempty(r) || isempty(c)
-#             continue
-#         end
-#         # println()
-#         # if b isa UniformScaling
-#         #     println("I")
-#         # else
-#         #     println(size(b))
-#         # end
-#         # println(r, " , ", c)
-#         xk = view(x, r)
-#         yk = view(y, c)
-#         yk .+= b' * xk # TODO need inplace mul+add
-#         # mul!(yk, b', xk)
-#         # mul!(yk, b', xk, α = one(T), β = one(T))
-#     end
-#
-#     return y
-# end
-
 *(A::HypBlockMatrix{T}, x::AbstractVector{T}) where {T <: HypReal} = mul!(similar(x, size(A, 1)), A, x)
 
 -(A::HypBlockMatrix{T}) where {T <: HypReal} = HypBlockMatrix{T}(A.nrows, A.ncols, -A.blocks, A.rows, A.cols)