10recursive subroutine merge_recursive_cpu_&
12 (obj, np_off, nprocs, ldq, matrixcols, nblk, &
13 l_col, p_col, l_col_bc, p_col_bc, limits, &
14 np_cols, na, q, d, e, &
15 mpi_comm_all, mpi_comm_rows, mpi_comm_cols, &
16 usegpu, wantdebug, success, max_threads)
27#if defined(WITH_NVIDIA_GPU_VERSION) && defined(WITH_NVTX)
29#elif defined(WITH_AMD_GPU_VERSION) && defined(WITH_ROCTX)
39 integer(kind=ik),
intent(in) :: max_threads
40 integer(kind=ik),
intent(in) :: mpi_comm_all, mpi_comm_rows, mpi_comm_cols
41 integer(kind=ik),
intent(in) :: ldq, matrixcols, nblk, na, np_cols
42 integer(kind=ik),
intent(in) :: l_col_bc(na), p_col_bc(na), l_col(na), p_col(na), &
45 integer(kind=c_intptr_t) :: q_dev
46#ifdef USE_ASSUMED_SIZE
47#ifdef SOLVE_TRIDI_GPU_BUILD
48 real(kind=real_datatype) :: q(ldq,matrixcols)
50 real(kind=real_datatype) :: q(ldq,*)
53 real(kind=real_datatype) :: q(ldq,matrixcols)
57 integer(kind=MPI_KIND) :: mpierr, my_pcolmpi
59 integer(kind=ik) :: my_pcol
60 real(kind=real_datatype),
intent(inout) :: d(na), e(na)
61 integer(kind=ik) :: np_off, nprocs
62 integer(kind=ik) :: np1, np2, noff, nlen, nmid, n
63 logical,
intent(in) :: usegpu, wantdebug
64 logical,
intent(out) :: success
69 call obj%timer%start(
"mpi_communication")
70 call mpi_comm_rank(int(mpi_comm_cols,kind=mpi_kind) ,my_pcolmpi, mpierr)
72 my_pcol = int(my_pcolmpi,kind=c_int)
73 call obj%timer%stop(
"mpi_communication")
78 if (wantdebug)
write(error_unit,*)
"ELPA1_merge_recursive: INTERNAL error merge_recursive: nprocs=",nprocs
90 call merge_recursive_gpu_&
92 (obj, np_off, np1, ldq, matrixcols, nblk, &
93 l_col, p_col, l_col_bc, p_col_bc, limits, &
94 np_cols, na, q_dev, d, e, &
95 mpi_comm_all, mpi_comm_rows, mpi_comm_cols, &
96 usegpu, wantdebug, success, max_threads)
98 call merge_recursive_cpu_&
100 (obj, np_off, np1, ldq, matrixcols, nblk, &
101 l_col, p_col, l_col_bc, p_col_bc, limits, &
102 np_cols, na, q, d, e, &
103 mpi_comm_all, mpi_comm_rows, mpi_comm_cols, &
104 usegpu, wantdebug, success, max_threads)
108 if (.not.(success))
then
109 write(error_unit,*)
"Error in merge_recursive. Aborting..."
115 call merge_recursive_gpu_&
117 (obj, np_off+np1, np2, ldq, matrixcols, nblk, &
118 l_col, p_col, l_col_bc, p_col_bc, limits, &
119 np_cols, na, q_dev, d, e, &
120 mpi_comm_all, mpi_comm_rows, mpi_comm_cols, &
121 usegpu, wantdebug, success, max_threads)
123 call merge_recursive_cpu_&
125 (obj, np_off+np1, np2, ldq, matrixcols, nblk, &
126 l_col, p_col, l_col_bc, p_col_bc, limits, &
127 np_cols, na, q, d, e, &
128 mpi_comm_all, mpi_comm_rows, mpi_comm_cols, &
129 usegpu, wantdebug, success, max_threads)
133 if (.not.(success))
then
134 write(error_unit,*)
"Error in merge_recursice. Aborting..."
138 noff = limits(np_off)
139 nmid = limits(np_off+np1) - noff
140 nlen = limits(np_off+nprocs) - noff
143 call obj%timer%start(
"mpi_communication")
144 if (my_pcol==np_off)
then
145 do n=np_off+np1,np_off+nprocs-1
146 call mpi_send(d(noff+1), int(nmid,kind=mpi_kind), mpi_real_precision, int(n,kind=mpi_kind), 12_mpi_kind, &
147 int(mpi_comm_cols,kind=mpi_kind), mpierr)
150 call obj%timer%stop(
"mpi_communication")
154 if (my_pcol>=np_off+np1 .and. my_pcol<np_off+nprocs)
then
156 call obj%timer%start(
"mpi_communication")
157 call mpi_recv(d(noff+1), int(nmid,kind=mpi_kind), mpi_real_precision, int(np_off,kind=mpi_kind), 12_mpi_kind, &
158 int(mpi_comm_cols,kind=mpi_kind), mpi_status_ignore, mpierr)
159 call obj%timer%stop(
"mpi_communication")
165 if (my_pcol==np_off+np1)
then
166 do n=np_off,np_off+np1-1
168 call obj%timer%start(
"mpi_communication")
169 call mpi_send(d(noff+nmid+1), int(nlen-nmid,kind=mpi_kind), mpi_real_precision, int(n,kind=mpi_kind), &
170 15_mpi_kind, int(mpi_comm_cols,kind=mpi_kind), mpierr)
171 call obj%timer%stop(
"mpi_communication")
178 if (my_pcol>=np_off .and. my_pcol<np_off+np1)
then
180 call obj%timer%start(
"mpi_communication")
181 call mpi_recv(d(noff+nmid+1), int(nlen-nmid,kind=mpi_kind), mpi_real_precision, int(np_off+np1,kind=mpi_kind), &
182 15_mpi_kind, int(mpi_comm_cols,kind=mpi_kind), mpi_status_ignore, mpierr)
183 call obj%timer%stop(
"mpi_communication")
188 if (nprocs == np_cols)
then
193 nvtx_range_push(
"merge_systems_gpu")
194 call merge_systems_gpu_&
196 (obj, nlen, nmid, d(noff+1), e(noff+nmid), q_dev, ldq, noff, &
197 nblk, matrixcols, int(mpi_comm_rows,kind=ik), int(mpi_comm_cols,kind=ik), &
199 l_col_bc, p_col_bc, np_off, nprocs, usegpu, wantdebug, success, max_threads)
200 nvtx_range_pop(
"merge_systems_gpu")
202 call merge_systems_cpu_&
204 (obj, nlen, nmid, d(noff+1), e(noff+nmid), q, ldq, noff, &
205 nblk, matrixcols, int(mpi_comm_rows,kind=ik), int(mpi_comm_cols,kind=ik), &
207 l_col_bc, p_col_bc, np_off, nprocs, usegpu, wantdebug, success, max_threads)
209 if (.not.(success))
then
210 write(error_unit,*)
"Error in merge_systems: Aborting..."
217 nvtx_range_push(
"merge_systems_gpu")
218 call merge_systems_gpu_&
220 (obj, nlen, nmid, d(noff+1), e(noff+nmid), q_dev, ldq, noff, &
221 nblk, matrixcols, int(mpi_comm_rows,kind=ik), int(mpi_comm_cols,kind=ik), &
222 l_col(noff+1), p_col(noff+1), &
223 l_col(noff+1), p_col(noff+1), np_off, nprocs, usegpu, wantdebug, success, &
225 nvtx_range_pop(
"merge_systems_gpu")
227 call merge_systems_cpu_&
229 (obj, nlen, nmid, d(noff+1), e(noff+nmid), q, ldq, noff, &
230 nblk, matrixcols, int(mpi_comm_rows,kind=ik), int(mpi_comm_cols,kind=ik), &
231 l_col(noff+1), p_col(noff+1), &
232 l_col(noff+1), p_col(noff+1), np_off, nprocs, usegpu, wantdebug, success, &
235 if (.not.(success))
then
236 write(error_unit,*)
"Error in merge_systems: Aborting..."