JuliaMolSim · mfherbst · Sep 28, 2022 · Aug 23, 2022 · Sep 7, 2022 · Sep 13, 2022
diff --git a/Project.toml b/Project.toml
@@ -14,7 +14,7 @@ Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
 DftFunctionals = "6bd331d2-b28d-4fd3-880e-1a1c7f37947f"
 FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
-GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
+GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527"
 InteratomicPotentials = "a9efe35a-c65d-452d-b8a8-82646cd5cb04"
 Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59"
 IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e"

diff --git a/src/eigen/lobpcg_hyper_impl.jl b/src/eigen/lobpcg_hyper_impl.jl
@@ -426,9 +426,7 @@ end
             # cP[Xn_indices,:] .= 0
 
             lenXn = length(Xn_indices)
-            # TODO: two allocations needed for zero(similar(...)). Create a zero_like
-            # function which only does one allocation.
-            e = zero(similar(X, size(cX, 1), M - prev_nlocked))
+            e = zeros_like(X, size(cX, 1), M - prev_nlocked)
             lower_diag = one(similar(X, lenXn, lenXn))
             # e has zeros everywhere except on one of its lower diagonal
             e[Xn_indices[1]:last(Xn_indices), 1:lenXn] = lower_diag

diff --git a/src/workarounds/gpu_arrays.jl b/src/workarounds/gpu_arrays.jl
@@ -1,7 +1,7 @@
 # TODO: remove this when it is implemented in GPUArrays and CUDA
 import LinearAlgebra.dot, LinearAlgebra.eigen
 using LinearAlgebra
-using GPUArrays
+using GPUArraysCore
 using CUDA
 
 # https://github.com/JuliaGPU/CUDA.jl/issues/1565
@@ -17,3 +17,11 @@ function LinearAlgebra.eigen(A::Hermitian{T,AT}) where {T <: Real,AT <: CuArray}
     vals, vects = CUDA.CUSOLVER.syevd!('V','U', A.data)
     (vectors = vects, values = vals)
 end
+
+# Create an array of same type as X filled with zeros, minimizing the number
+# of allocations.
+function zeros_like(X, n, m)
+    Z = similar(X, n, m)
+    Z .= 0
+    Z
+end