51subroutine get_hh_vec_&
55(obj, my_prow, nrow, nblk, np_rows, mpi_comm_rows, lr, allreduce_request1, &
56 usenonblockingcollectivesrows, wantdebug, vec_in, vr, tau, vrl)
65#include "../general/precision_kinds.F90"
68 integer(kind=ik),
intent(in) :: nrow, my_prow, nblk, np_rows, lr
69 integer(kind=ik),
intent(in) :: mpi_comm_rows
70 logical,
intent(in) :: useNonBlockingCollectivesRows, wantDebug
71 integer(kind=MPI_KIND) :: mpierr
72 integer(kind=MPI_KIND),
intent(inout) :: allreduce_request1
73 math_datatype(kind=rck):: vr(:), vec_in(:), tau, vrl
74 math_datatype(kind=rck):: aux1(2), xf
75 real(kind=rk):: vnorm2
79 if (my_prow==prow(nrow, nblk, np_rows))
then
80 aux1(1) = dot_product(vec_in(1:lr-1),vec_in(1:lr-1))
83 aux1(1) = dot_product(vec_in(1:lr),vec_in(1:lr))
88 if (usenonblockingcollectivesrows)
then
89 if (wantdebug)
call obj%timer%start(
"mpi_nbc_communication")
90 call mpi_iallreduce(mpi_in_place, aux1, 2_mpi_kind, mpi_math_datatype_precision, &
91 mpi_sum, int(mpi_comm_rows,kind=mpi_kind), &
92 allreduce_request1, mpierr)
94 call mpi_wait(allreduce_request1, mpi_status_ignore, mpierr)
96 if (wantdebug)
call obj%timer%stop(
"mpi_nbc_communication")
99 call mpi_allreduce(mpi_in_place, aux1, 2_mpi_kind, mpi_math_datatype_precision, &
100 mpi_sum, int(mpi_comm_rows,kind=mpi_kind), &
112 vnorm2 = real(aux1(1),kind=rk)
121 (obj, vrl, vnorm2, xf, tau, wantdebug)
124 vr(1:lr) = vec_in(1:lr) * xf
125 if (my_prow==prow(nrow, nblk, np_rows)) vr(lr) = 1.0_rck
134 (obj, max_threads, lr, nbw, mpi_comm_rows, usenonblockingcollectivesrows, allreduce_request3, wantdebug, tau, vr, ex_buff2d)
142 integer(kind=MPI_KIND) :: mpierr
143 integer(kind=ik),
intent(in) :: max_threads, lr, nbw
144 integer(kind=ik),
intent(in) :: mpi_comm_rows
145 logical,
intent(in) :: usenonblockingcollectivesrows, wantdebug
146 integer(kind=MPI_KIND),
intent(inout) :: allreduce_request3
148#include "../general/precision_kinds.F90"
149 math_datatype(kind=rck) :: tau, vr(:), ex_buff2d(:,:)
150 math_datatype(kind=rck) :: tauc
151 math_datatype(kind=rck) :: aux1(nbw)
155 imax=ubound(ex_buff2d,2)
157 if((imax.lt.3).or.(max_threads.gt.1))
then
168 call precision_gemv(blas_trans_or_conj,int(lr,kind=blas_kind),int(imax,kind=blas_kind), &
169 one, ex_buff2d,
size(ex_buff2d,1,kind=blas_kind), vr, 1_blas_kind, zero, aux1, &
172#ifdef WITH_OPENMP_TRADITIONAL
176 aux1(nlc) = dot_product(vr(1:lr),ex_buff2d(1:lr,nlc))
178#ifdef WITH_OPENMP_TRADITIONAL
188 if (usenonblockingcollectivesrows)
then
189 if (wantdebug)
call obj%timer%start(
"mpi_nbc_communication")
191 call mpi_iallreduce(mpi_in_place, aux1, int(imax,kind=mpi_kind), mpi_math_datatype_precision, &
192 mpi_sum, int(mpi_comm_rows,kind=mpi_kind), &
193 allreduce_request3, mpierr)
194 call mpi_wait(allreduce_request3, mpi_status_ignore, mpierr)
196 if (wantdebug)
call obj%timer%stop(
"mpi_nbc_communication")
198 if (wantdebug)
call obj%timer%start(
"mpi_communication")
200 call mpi_allreduce(mpi_in_place, aux1, int(imax,kind=mpi_kind), mpi_math_datatype_precision, &
201 mpi_sum, int(mpi_comm_rows,kind=mpi_kind), &
204 if (wantdebug)
call obj%timer%stop(
"mpi_communication")
217 call precision_gerc(int(lr,kind=blas_kind),int(imax,kind=blas_kind),tauc,vr,1_blas_kind,&
218 aux1,1_blas_kind,ex_buff2d,ubound(ex_buff2d,1,kind=blas_kind))
220#ifdef WITH_OPENMP_TRADITIONAL
224 ex_buff2d(1:lr,nlc) = ex_buff2d(1:lr,nlc) + tauc*aux1(nlc)*vr(1:lr)
226#ifdef WITH_OPENMP_TRADITIONAL