Skip to content

Commit

Permalink
Adding ability to skip final host memcopy of eigenvectors. Updating R…
Browse files Browse the repository at this point in the history
…EADME.md to include reference to real eigensolver.
  • Loading branch information
romerojosh committed Oct 3, 2017
1 parent 27af9bd commit d835843
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 21 deletions.
14 changes: 7 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,24 @@
GPU Eigensolver for Quantum ESPRESSO package

###
This library implements a generalized eigensolver for hermetian-definite eigenproblems with functionality similar to
the ZHEGVD/X functions available within LAPACK/MAGMA. This solver has less dependencies on CPU computation
This library implements a generalized eigensolver for symmetric/hermetian-definite eigenproblems with functionality similar to
the DSYGVD/X or ZHEGVD/X functions available within LAPACK/MAGMA. This solver has less dependencies on CPU computation
than comparable implementations within MAGMA, which may be of benefit to systems with limited CPU resources or to
users without access to high-performing CPU LAPACK libraries.

###
This implementation can be considered as a "proof of concept" and has been written to target the Quantum ESPRESSO
code. As such, this implementation is built only to handle one problem configuration of ZHEGVD/X. Specifically, this
code. As such, this implementation is built only to handle one problem configuration of DSYGVD/X or ZHEGVD/X. Specifically, this
solver computes eigenvalues and associated eigenvectors over a specified integer range for a
hermetian-definite eigenproblem in the following form:
symmetric/hermetian-definite eigenproblem in the following form:

A * x = lambda * B * x

where `A` and `B` are hermetian-matrices and `B` is positive definite. The solver expects the upper-triangular parts of the
input `A` and `B` arguments to be populated. This configuration corresponds to calling ZHEGVX within LAPACK with the configuration
where `A` and `B` are symmetric/hermetian-matrices and `B` is positive definite. The solver expects the upper-triangular parts of the
input `A` and `B` arguments to be populated. This configuration corresponds to calling DSYGVX/ZHEGVX within LAPACK with the configuration
arguments `ITYPE = 1`, `JOBZ = 'V'`, `RANGE = 'I'`, and `UPLO = 'U'`.

See comments within `zhegvdx_gpu.F90` for specific details on usage.
See comments within `dsygvdx_gpu.F90` or `zhegvdx_gpu.F90` for specific details on usage.

For additional information about the solver with some performance results, see presentation at the following link: (will be added
once available publically on the GTC On-Demand website)
Expand Down
22 changes: 15 additions & 7 deletions lib_eigsolve/dsygvdx_gpu.F90
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ module dsygvdx_gpu
! - lwork_h is an integer specifying length of work_h. lwork_h >= 1 + 6*N + 2*N*N
! - iwork_h is a integer array for integer workspace of length liwork_h.
! - liwork_h is an integer specifying length of iwork_h. liwork_h >= 3 + 5*N
! - (optional) _skip_host_copy is an optional logical argument. If .TRUE., memcopy of final updated eigenvectors from
! device to host will be skipped.
!
! Output:
! On device:
Expand All @@ -61,13 +63,13 @@ module dsygvdx_gpu
! On host:
! - Z_h(ldz, iu - il + 1) is a real(8) matrix on the host. On exit, the first iu - il + 1 columns of Z
! contains normalized eigenvectors corresponding to eigenvalues in the range [il, iu]. This is a copy of the Z
! matrix on the device
! matrix on the device. This is only true if optional argument _skip_host_copy is not provided or is set to .FALSE.
! - w(iu - il + 1) is a real(8) array on the host. On exit, the first iu - il + 1 values of w contain the computed
! eigenvalues. This is a copy of the w array on the host.
! - info is an integer. info will equal zero if the function completes succesfully. Otherwise, there was an error.
!
subroutine dsygvdx_gpu(N, A, lda, B, ldb, Z, ldz, il, iu, w, work, lwork, &
work_h, lwork_h, iwork_h, liwork_h, Z_h, ldz_h, w_h, info)
work_h, lwork_h, iwork_h, liwork_h, Z_h, ldz_h, w_h, info, _skip_host_copy)
use eigsolve_vars
use nvtx_inters
use dsygst_gpu
Expand All @@ -78,6 +80,7 @@ subroutine dsygvdx_gpu(N, A, lda, B, ldb, Z, ldz, il, iu, w, work, lwork, &
real(8), dimension(1:lwork), device :: work
real(8), dimension(1:lwork_h), pinned :: work_h
integer, dimension(1:liwork_h), pinned :: iwork_h
logical, optional :: _skip_host_copy

real(8), dimension(1:lda, 1:N), device :: A
real(8), dimension(1:ldb, 1:N), device :: B
Expand All @@ -88,8 +91,11 @@ subroutine dsygvdx_gpu(N, A, lda, B, ldb, Z, ldz, il, iu, w, work, lwork, &

real(8), parameter :: one = 1.d0
integer :: i, j
logical :: skip_host_copy

info = 0
skip_host_copy = .FALSE.
if(present(_skip_host_copy)) skip_host_copy = _skip_host_copy

! Check workspace sizes
if (lwork < 2*64*64 + 66*N) then
Expand Down Expand Up @@ -150,11 +156,13 @@ subroutine dsygvdx_gpu(N, A, lda, B, ldb, Z, ldz, il, iu, w, work, lwork, &
call nvtxEndRange

! Copy final eigenvectors to host
istat = cudaMemcpy2D(Z_h, ldz_h, Z, ldz, N, m)
if (istat .ne. 0) then
print*, "dsygvdx_gpu error: cudaMemcpy2D failed!"
info = -1
return
if (not(skip_host_copy)) then
istat = cudaMemcpy2D(Z_h, ldz_h, Z, ldz, N, m)
if (istat .ne. 0) then
print*, "dsygvdx_gpu error: cudaMemcpy2D failed!"
info = -1
return
endif
endif

end subroutine dsygvdx_gpu
Expand Down
22 changes: 15 additions & 7 deletions lib_eigsolve/zhegvdx_gpu.F90
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ module zhegvdx_gpu
! - lrwork_h is an integer specifying length of rwork_h. lrwork_h >= 1 + 5*N + 2*N*N
! - iwork_h is a integer array for integer workspace of length liwork_h.
! - liwork_h is an integer specifying length of iwork_h. liwork_h >= 3 + 5*N
! - (optional) _skip_host_copy is an optional logical argument. If .TRUE., memcopy of final updated eigenvectors from
! device to host will be skipped.
!
! Output:
! On device:
Expand All @@ -65,13 +67,13 @@ module zhegvdx_gpu
! On host:
! - Z_h(ldz, iu - il + 1) is a double-complex matrix on the host. On exit, the first iu - il + 1 columns of Z
! contains normalized eigenvectors corresponding to eigenvalues in the range [il, iu]. This is a copy of the Z
! matrix on the device
! matrix on the device. This is only true if optional argument _skip_host_copy is not provided or is set to .FALSE.
! - w(iu - il + 1) is a real(8) array on the host. On exit, the first iu - il + 1 values of w contain the computed
! eigenvalues. This is a copy of the w array on the host.
! - info is an integer. info will equal zero if the function completes succesfully. Otherwise, there was an error.
!
subroutine zhegvdx_gpu(N, A, lda, B, ldb, Z, ldz, il, iu, w, work, lwork, rwork, lrwork, &
work_h, lwork_h, rwork_h, lrwork_h, iwork_h, liwork_h, Z_h, ldz_h, w_h, info)
work_h, lwork_h, rwork_h, lrwork_h, iwork_h, liwork_h, Z_h, ldz_h, w_h, info, _skip_host_copy)
use eigsolve_vars
use nvtx_inters
use zhegst_gpu
Expand All @@ -84,6 +86,7 @@ subroutine zhegvdx_gpu(N, A, lda, B, ldb, Z, ldz, il, iu, w, work, lwork, rwork,
complex(8), dimension(1:lwork), device :: work
complex(8), dimension(1:lwork_h), pinned :: work_h
integer, dimension(1:liwork_h), pinned :: iwork_h
logical, optional :: _skip_host_copy

complex(8), dimension(1:lda, 1:N), device :: A
complex(8), dimension(1:ldb, 1:N), device :: B
Expand All @@ -94,8 +97,11 @@ subroutine zhegvdx_gpu(N, A, lda, B, ldb, Z, ldz, il, iu, w, work, lwork, rwork,

complex(8), parameter :: cone = cmplx(1,0,8)
integer :: i, j
logical :: skip_host_copy

info = 0
skip_host_copy = .FALSE.
if(present(_skip_host_copy)) skip_host_copy = _skip_host_copy

! Check workspace sizes
if (lwork < 2*64*64 + 65*N) then
Expand Down Expand Up @@ -164,11 +170,13 @@ subroutine zhegvdx_gpu(N, A, lda, B, ldb, Z, ldz, il, iu, w, work, lwork, rwork,
call nvtxEndRange

! Copy final eigenvectors to host
istat = cudaMemcpy2D(Z_h, ldz_h, Z, ldz, N, m)
if (istat .ne. 0) then
print*, "zhegvdx_gpu error: cudaMemcpy2D failed!"
info = -1
return
if (not(skip_host_copy)) then
istat = cudaMemcpy2D(Z_h, ldz_h, Z, ldz, N, m)
if (istat .ne. 0) then
print*, "zhegvdx_gpu error: cudaMemcpy2D failed!"
info = -1
return
endif
endif

end subroutine zhegvdx_gpu
Expand Down

0 comments on commit d835843

Please sign in to comment.