Eigenvalue SoLvers for Petaflop-Applications (ELPA) 2024.05.001
Loading...
Searching...
No Matches
global_gather_template.F90
Go to the documentation of this file.
1subroutine global_gather_&
2&precision&
3&(obj, z, n, mpi_comm_rows, mpi_comm_cols, npc_n, np_prev, np_next, &
4 success)
5 ! This routine sums up z over all processors.
6 ! It should only be used for gathering distributed results,
7 ! i.e. z(i) should be nonzero on exactly 1 processor column,
8 ! otherways the results may be numerically different on different columns
9 use precision
11 use elpa_mpi
12 use elpa_utilities
13#ifdef WITH_OPENMP_TRADITIONAL
14 use elpa_omp
15#endif
16 implicit none
17 class(elpa_abstract_impl_t), intent(inout) :: obj
18 integer(kind=ik), intent(in) :: mpi_comm_cols, mpi_comm_rows
19 integer(kind=ik), intent(in) :: npc_n, np_prev, np_next
20#ifdef WITH_MPI
21 integer(kind=MPI_KIND) :: mpierr, np_rowsMPI, np_colsMPI
22#endif
23 integer(kind=ik) :: n, np_rows, np_cols
24 real(kind=real_datatype) :: z(n)
25 real(kind=real_datatype) :: tmp(n)
26 integer(kind=ik) :: np
27 integer(kind=MPI_KIND) :: allreduce_request1, allreduce_request2
28 logical :: useNonBlockingCollectivesCols
29 logical :: useNonBlockingCollectivesRows
30 integer(kind=c_int) :: non_blocking_collectives_rows, error, &
31 non_blocking_collectives_cols
32 logical :: success
33
34 success = .true.
35
36 call obj%get("nbc_row_global_gather", non_blocking_collectives_rows, error)
37 if (error .ne. elpa_ok) then
38 write(error_unit,*) "Problem getting option for non blocking collectives for rows in global_gather. Aborting..."
39 success = .false.
40 return
41 endif
42
43 call obj%get("nbc_col_global_gather", non_blocking_collectives_cols, error)
44 if (error .ne. elpa_ok) then
45 write(error_unit,*) "Problem getting option for non blocking collectives for cols in global_gather. Aborting..."
46 success = .false.
47 return
48 endif
49
50 if (non_blocking_collectives_rows .eq. 1) then
51 usenonblockingcollectivesrows = .true.
52 else
53 usenonblockingcollectivesrows = .false.
54 endif
55
56 if (non_blocking_collectives_cols .eq. 1) then
57 usenonblockingcollectivescols = .true.
58 else
59 usenonblockingcollectivescols = .false.
60 endif
61
62#ifdef WITH_MPI
63 call obj%timer%start("mpi_communication")
64 call mpi_comm_size(int(mpi_comm_rows,kind=mpi_kind) ,np_rowsmpi, mpierr)
65 np_rows = int(np_rowsmpi,kind=c_int)
66
67 !call mpi_comm_rank(int(mpi_comm_cols,kind=MPI_KIND) ,my_pcolMPI, mpierr)
68 call mpi_comm_size(int(mpi_comm_cols,kind=mpi_kind) ,np_colsmpi, mpierr)
69 np_cols = int(np_colsmpi,kind=c_int)
70
71 call obj%timer%stop("mpi_communication")
72
73#else
74#endif
75 if (npc_n==1 .and. np_rows==1) return ! nothing to do
76
77 ! Do an mpi_allreduce over processor rows
78#ifdef WITH_MPI
79 if (usenonblockingcollectivesrows) then
80 call obj%timer%start("mpi_nbc_communication")
81 call mpi_iallreduce(z, tmp, int(n,kind=mpi_kind), mpi_real_precision, mpi_sum, int(mpi_comm_rows,kind=mpi_kind), &
82 allreduce_request1, mpierr)
83 call mpi_wait(allreduce_request1, mpi_status_ignore, mpierr)
84 call obj%timer%stop("mpi_nbc_communication")
85 else
86 call obj%timer%start("mpi_communication")
87 call mpi_allreduce(z, tmp, int(n,kind=mpi_kind), mpi_real_precision, mpi_sum, int(mpi_comm_rows,kind=mpi_kind), &
88 mpierr)
89 call obj%timer%stop("mpi_communication")
90 endif
91#else /* WITH_MPI */
92 tmp = z
93#endif /* WITH_MPI */
94 ! If only 1 processor column, we are done
95 if (npc_n==1) then
96 z(:) = tmp(:)
97 return
98 endif
99
100 ! If all processor columns are involved, we can use mpi_allreduce
101 if (npc_n==np_cols) then
102#ifdef WITH_MPI
103 if (usenonblockingcollectivescols) then
104 call obj%timer%start("mpi_nbc_communication")
105 call mpi_iallreduce(tmp, z, int(n,kind=mpi_kind), mpi_real_precision, mpi_sum, int(mpi_comm_cols,kind=mpi_kind), &
106 allreduce_request2, mpierr)
107 call mpi_wait(allreduce_request2, mpi_status_ignore, mpierr)
108 call obj%timer%stop("mpi_nbc_communication")
109 else
110 call obj%timer%start("mpi_communication")
111 call mpi_allreduce(tmp, z, int(n,kind=mpi_kind), mpi_real_precision, mpi_sum, int(mpi_comm_cols,kind=mpi_kind), &
112 mpierr)
113 call obj%timer%stop("mpi_communication")
114 endif
115#else /* WITH_MPI */
116 tmp = z
117#endif /* WITH_MPI */
118
119 return
120 endif
121
122 ! Do a ring send over processor columns
123 z(:) = 0
124 do np = 1, npc_n
125 z(:) = z(:) + tmp(:)
126#ifdef WITH_MPI
127 call obj%timer%start("mpi_communication")
128 call mpi_sendrecv_replace(z, int(n,kind=mpi_kind), mpi_real_precision, int(np_next,kind=mpi_kind), &
129 1112_mpi_kind+int(np,kind=mpi_kind), &
130 int(np_prev,kind=mpi_kind), 1112_mpi_kind+int(np,kind=mpi_kind), &
131 int(mpi_comm_cols,kind=mpi_kind), mpi_status_ignore, mpierr)
132 call obj%timer%stop("mpi_communication")
133#else /* WITH_MPI */
134#endif /* WITH_MPI */
135 enddo
136end subroutine global_gather_&
137 &precision
Fortran module to provide an abstract definition of the implementation. Do not use directly....
Definition elpa_abstract_impl.F90:50
Definition elpa_abstract_impl.F90:73