Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief communication routines to reshape / replicate / merge tall-and-skinny matrices.
10 : !> \author Patrick Seewald
11 : ! **************************************************************************************************
12 : MODULE dbt_tas_reshape_ops
13 : USE OMP_LIB, ONLY: omp_destroy_lock,&
14 : omp_get_max_threads,&
15 : omp_get_num_threads,&
16 : omp_get_thread_num,&
17 : omp_init_lock,&
18 : omp_lock_kind,&
19 : omp_set_lock,&
20 : omp_unset_lock
21 : USE dbm_api, ONLY: &
22 : dbm_clear, dbm_distribution_col_dist, dbm_distribution_obj, dbm_distribution_row_dist, &
23 : dbm_finalize, dbm_get_col_block_sizes, dbm_get_distribution, dbm_get_name, &
24 : dbm_get_row_block_sizes, dbm_get_stored_coordinates, dbm_iterator, &
25 : dbm_iterator_blocks_left, dbm_iterator_next_block, dbm_iterator_start, dbm_iterator_stop, &
26 : dbm_put_block, dbm_reserve_blocks, dbm_type
27 : USE dbt_tas_base, ONLY: &
28 : dbt_repl_get_stored_coordinates, dbt_tas_blk_sizes, dbt_tas_clear, dbt_tas_create, &
29 : dbt_tas_distribution_new, dbt_tas_finalize, dbt_tas_get_stored_coordinates, dbt_tas_info, &
30 : dbt_tas_iterator_blocks_left, dbt_tas_iterator_next_block, dbt_tas_iterator_start, &
31 : dbt_tas_iterator_stop, dbt_tas_put_block, dbt_tas_reserve_blocks
32 : USE dbt_tas_global, ONLY: dbt_tas_blk_size_arb,&
33 : dbt_tas_blk_size_repl,&
34 : dbt_tas_dist_arb,&
35 : dbt_tas_dist_repl,&
36 : dbt_tas_distribution,&
37 : dbt_tas_rowcol_data
38 : USE dbt_tas_split, ONLY: colsplit,&
39 : dbt_tas_get_split_info,&
40 : rowsplit
41 : USE dbt_tas_types, ONLY: dbt_tas_distribution_type,&
42 : dbt_tas_iterator,&
43 : dbt_tas_split_info,&
44 : dbt_tas_type
45 : USE dbt_tas_util, ONLY: swap
46 : USE kinds, ONLY: dp,&
47 : int_8
48 : USE message_passing, ONLY: mp_cart_type,&
49 : mp_comm_type,&
50 : mp_request_type,&
51 : mp_waitall
52 : #include "../../base/base_uses.f90"
53 :
54 : IMPLICIT NONE
55 : PRIVATE
56 :
57 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'dbt_tas_reshape_ops'
58 :
59 : PUBLIC :: &
60 : dbt_tas_merge, &
61 : dbt_tas_replicate, &
62 : dbt_tas_reshape
63 :
64 : TYPE dbt_buffer_type
65 : INTEGER :: nblock = -1
66 : INTEGER(KIND=int_8), DIMENSION(:, :), ALLOCATABLE :: indx
67 : REAL(dp), DIMENSION(:), ALLOCATABLE :: msg
68 : INTEGER :: endpos = -1
69 : END TYPE dbt_buffer_type
70 :
71 : CONTAINS
72 :
73 : ! **************************************************************************************************
74 : !> \brief copy data (involves reshape)
75 : !> \param matrix_in ...
76 : !> \param matrix_out ...
77 : !> \param summation whether matrix_out = matrix_out + matrix_in
78 : !> \param transposed ...
79 : !> \param move_data memory optimization: move data to matrix_out such that matrix_in is empty on return
80 : !> \author Patrick Seewald
81 : ! **************************************************************************************************
82 197498 : RECURSIVE SUBROUTINE dbt_tas_reshape(matrix_in, matrix_out, summation, transposed, move_data)
83 : TYPE(dbt_tas_type), INTENT(INOUT) :: matrix_in, matrix_out
84 : LOGICAL, INTENT(IN), OPTIONAL :: summation, transposed, move_data
85 :
86 : CHARACTER(LEN=*), PARAMETER :: routineN = 'dbt_tas_reshape'
87 :
88 : INTEGER :: a, b, bcount, handle, handle2, iproc, &
89 : nblk, nblk_per_thread, ndata, numnodes
90 197498 : INTEGER(KIND=int_8), ALLOCATABLE, DIMENSION(:, :) :: blks_to_allocate, index_recv
91 : INTEGER(KIND=int_8), DIMENSION(2) :: blk_index
92 : INTEGER(kind=omp_lock_kind), ALLOCATABLE, &
93 197498 : DIMENSION(:) :: locks
94 197498 : INTEGER, ALLOCATABLE, DIMENSION(:) :: num_blocks_recv, num_blocks_send, &
95 197498 : num_entries_recv, num_entries_send, &
96 197498 : num_rec, num_send
97 : INTEGER, DIMENSION(2) :: blk_size
98 : LOGICAL :: move_prv, tr_in
99 197498 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: block
100 197498 : TYPE(dbt_buffer_type), ALLOCATABLE, DIMENSION(:) :: buffer_recv, buffer_send
101 : TYPE(dbt_tas_iterator) :: iter
102 1184988 : TYPE(dbt_tas_split_info) :: info
103 : TYPE(mp_comm_type) :: mp_comm
104 : TYPE(mp_request_type), ALLOCATABLE, &
105 197498 : DIMENSION(:, :) :: req_array
106 :
107 197498 : CALL timeset(routineN, handle)
108 :
109 197498 : IF (PRESENT(summation)) THEN
110 69710 : IF (.NOT. summation) CALL dbm_clear(matrix_out%matrix)
111 : ELSE
112 127788 : CALL dbm_clear(matrix_out%matrix)
113 : END IF
114 :
115 197498 : IF (PRESENT(move_data)) THEN
116 197498 : move_prv = move_data
117 : ELSE
118 : move_prv = .FALSE.
119 : END IF
120 :
121 197498 : IF (PRESENT(transposed)) THEN
122 197498 : tr_in = transposed
123 : ELSE
124 0 : tr_in = .FALSE.
125 : END IF
126 :
127 197498 : IF (.NOT. matrix_out%valid) THEN
128 0 : CPABORT("can not reshape into invalid matrix")
129 : END IF
130 :
131 197498 : info = dbt_tas_info(matrix_in)
132 197498 : mp_comm = info%mp_comm
133 197498 : numnodes = mp_comm%num_pe
134 913996 : ALLOCATE (buffer_send(0:numnodes - 1))
135 716498 : ALLOCATE (buffer_recv(0:numnodes - 1))
136 592494 : ALLOCATE (num_blocks_recv(0:numnodes - 1))
137 394996 : ALLOCATE (num_blocks_send(0:numnodes - 1))
138 394996 : ALLOCATE (num_entries_recv(0:numnodes - 1))
139 394996 : ALLOCATE (num_entries_send(0:numnodes - 1))
140 592494 : ALLOCATE (num_rec(0:2*numnodes - 1))
141 1038000 : ALLOCATE (num_send(0:2*numnodes - 1), SOURCE=0)
142 2668494 : ALLOCATE (req_array(1:numnodes, 4))
143 394996 : ALLOCATE (locks(0:numnodes - 1))
144 519000 : DO iproc = 0, numnodes - 1
145 519000 : CALL omp_init_lock(locks(iproc))
146 : END DO
147 :
148 197498 : CALL timeset(routineN//"_get_coord", handle2)
149 : !$OMP PARALLEL DEFAULT(NONE) SHARED(matrix_in,matrix_out,tr_in,num_send,locks) &
150 197498 : !$OMP PRIVATE(iter,blk_index,blk_size,iproc)
151 : CALL dbt_tas_iterator_start(iter, matrix_in)
152 : DO WHILE (dbt_tas_iterator_blocks_left(iter))
153 : CALL dbt_tas_iterator_next_block(iter, blk_index(1), blk_index(2), &
154 : row_size=blk_size(1), col_size=blk_size(2))
155 : IF (tr_in) THEN
156 : CALL dbt_tas_get_stored_coordinates(matrix_out, blk_index(2), blk_index(1), iproc)
157 : ELSE
158 : CALL dbt_tas_get_stored_coordinates(matrix_out, blk_index(1), blk_index(2), iproc)
159 : END IF
160 : CALL omp_set_lock(locks(iproc))
161 : num_send(2*iproc) = num_send(2*iproc) + PRODUCT(blk_size)
162 : num_send(2*iproc + 1) = num_send(2*iproc + 1) + 1
163 : CALL omp_unset_lock(locks(iproc))
164 : END DO
165 : CALL dbt_tas_iterator_stop(iter)
166 : !$OMP END PARALLEL
167 197498 : CALL timestop(handle2)
168 :
169 197498 : CALL timeset(routineN//"_alltoall", handle2)
170 197498 : CALL mp_comm%alltoall(num_send, num_rec, 2)
171 197498 : CALL timestop(handle2)
172 :
173 197498 : CALL timeset(routineN//"_buffer_fill", handle2)
174 519000 : DO iproc = 0, numnodes - 1
175 321502 : num_entries_recv(iproc) = num_rec(2*iproc)
176 321502 : num_blocks_recv(iproc) = num_rec(2*iproc + 1)
177 321502 : num_entries_send(iproc) = num_send(2*iproc)
178 321502 : num_blocks_send(iproc) = num_send(2*iproc + 1)
179 :
180 321502 : CALL dbt_buffer_create(buffer_send(iproc), num_blocks_send(iproc), num_entries_send(iproc))
181 519000 : CALL dbt_buffer_create(buffer_recv(iproc), num_blocks_recv(iproc), num_entries_recv(iproc))
182 : END DO
183 :
184 : !$OMP PARALLEL DEFAULT(NONE) SHARED(matrix_in,matrix_out,tr_in,buffer_send,locks) &
185 197498 : !$OMP PRIVATE(iter,blk_index,blk_size,block,iproc)
186 : CALL dbt_tas_iterator_start(iter, matrix_in)
187 : DO WHILE (dbt_tas_iterator_blocks_left(iter))
188 : CALL dbt_tas_iterator_next_block(iter, blk_index(1), blk_index(2), block, &
189 : row_size=blk_size(1), col_size=blk_size(2))
190 : IF (tr_in) THEN
191 : CALL dbt_tas_get_stored_coordinates(matrix_out, blk_index(2), blk_index(1), iproc)
192 : ELSE
193 : CALL dbt_tas_get_stored_coordinates(matrix_out, blk_index(1), blk_index(2), iproc)
194 : END IF
195 : CALL omp_set_lock(locks(iproc))
196 : CALL dbt_buffer_add_block(buffer_send(iproc), blk_index, block, transposed=tr_in)
197 : CALL omp_unset_lock(locks(iproc))
198 : END DO
199 : CALL dbt_tas_iterator_stop(iter)
200 : !$OMP END PARALLEL
201 :
202 197498 : IF (move_prv) CALL dbt_tas_clear(matrix_in)
203 197498 : CALL timestop(handle2)
204 :
205 197498 : CALL timeset(routineN//"_communicate_buffer", handle2)
206 197498 : CALL dbt_tas_communicate_buffer(mp_comm, buffer_recv, buffer_send, req_array)
207 :
208 519000 : DO iproc = 0, numnodes - 1
209 321502 : CALL dbt_buffer_destroy(buffer_send(iproc))
210 519000 : CALL omp_destroy_lock(locks(iproc))
211 : END DO
212 197498 : DEALLOCATE (locks)
213 :
214 197498 : CALL timestop(handle2)
215 :
216 197498 : CALL timeset(routineN//"_buffer_obtain", handle2)
217 :
218 : ! Parallel unpack of received blocks.
219 519000 : nblk = SUM(num_blocks_recv)
220 571769 : ALLOCATE (blks_to_allocate(nblk, 2))
221 :
222 197498 : bcount = 0
223 519000 : DO iproc = 0, numnodes - 1
224 321502 : CALL dbt_buffer_get_index(buffer_recv(iproc), index_recv)
225 5973378 : blks_to_allocate(bcount + 1:bcount + SIZE(index_recv, 1), :) = index_recv(:, :)
226 321502 : bcount = bcount + SIZE(index_recv, 1)
227 840502 : DEALLOCATE (index_recv)
228 : END DO
229 :
230 : !TODO: Parallelize creation of block list.
231 197498 : !$OMP PARALLEL DEFAULT(NONE) SHARED(matrix_out,nblk,blks_to_allocate) PRIVATE(nblk_per_thread,A,b)
232 : nblk_per_thread = nblk/omp_get_num_threads() + 1
233 : a = omp_get_thread_num()*nblk_per_thread + 1
234 : b = MIN(a + nblk_per_thread, nblk)
235 : CALL dbt_tas_reserve_blocks(matrix_out, blks_to_allocate(a:b, 1), blks_to_allocate(a:b, 2))
236 : !$OMP END PARALLEL
237 197498 : DEALLOCATE (blks_to_allocate)
238 :
239 : !$OMP PARALLEL DEFAULT(NONE) SHARED(buffer_recv,matrix_out,numnodes,summation) &
240 197498 : !$OMP PRIVATE(iproc,ndata,blk_index,blk_size,block)
241 : !$OMP DO SCHEDULE(DYNAMIC)
242 : DO iproc = 0, numnodes - 1
243 : ! First, we need to get the index to create block
244 : DO WHILE (dbt_buffer_blocks_left(buffer_recv(iproc)))
245 : CALL dbt_buffer_get_next_block(buffer_recv(iproc), ndata, blk_index)
246 : CALL dbt_tas_blk_sizes(matrix_out, blk_index(1), blk_index(2), blk_size(1), blk_size(2))
247 : ALLOCATE (block(blk_size(1), blk_size(2)))
248 : CALL dbt_buffer_get_next_block(buffer_recv(iproc), ndata, blk_index, block)
249 : CALL dbt_tas_put_block(matrix_out, blk_index(1), blk_index(2), block, summation=summation)
250 : DEALLOCATE (block)
251 : END DO
252 : CALL dbt_buffer_destroy(buffer_recv(iproc))
253 : END DO
254 : !$OMP END DO
255 : !$OMP END PARALLEL
256 :
257 197498 : CALL timestop(handle2)
258 :
259 197498 : CALL dbt_tas_finalize(matrix_out)
260 :
261 197498 : CALL timestop(handle)
262 2222988 : END SUBROUTINE dbt_tas_reshape
263 :
264 : ! **************************************************************************************************
265 : !> \brief Replicate matrix_in such that each submatrix of matrix_out is an exact copy of matrix_in
266 : !> \param matrix_in ...
267 : !> \param info ...
268 : !> \param matrix_out ...
269 : !> \param nodata Don't copy data but create matrix_out
270 : !> \param move_data memory optimization: move data to matrix_out such that matrix_in is empty on return
271 : !> \author Patrick Seewald
272 : ! **************************************************************************************************
273 1096140 : SUBROUTINE dbt_tas_replicate(matrix_in, info, matrix_out, nodata, move_data)
274 : TYPE(dbm_type), INTENT(INOUT) :: matrix_in
275 : TYPE(dbt_tas_split_info), INTENT(IN) :: info
276 : TYPE(dbt_tas_type), INTENT(OUT) :: matrix_out
277 : LOGICAL, INTENT(IN), OPTIONAL :: nodata, move_data
278 :
279 : INTEGER :: a, b, nblk_per_thread, nblkcols, nblkrows
280 : INTEGER, DIMENSION(2) :: pdims
281 365380 : INTEGER, DIMENSION(:), POINTER :: col_blk_size, col_dist, row_blk_size, &
282 182690 : row_dist
283 : TYPE(dbm_distribution_obj) :: dbm_dist
284 182690 : TYPE(dbt_tas_dist_arb), TARGET :: dir_dist
285 182690 : TYPE(dbt_tas_dist_repl), TARGET :: repl_dist
286 :
287 365380 : CLASS(dbt_tas_distribution), ALLOCATABLE :: col_dist_obj, row_dist_obj
288 365380 : CLASS(dbt_tas_rowcol_data), ALLOCATABLE :: row_bsize_obj, col_bsize_obj
289 182690 : TYPE(dbt_tas_blk_size_repl), TARGET :: repl_blksize
290 182690 : TYPE(dbt_tas_blk_size_arb), TARGET :: dir_blksize
291 913450 : TYPE(dbt_tas_distribution_type) :: dist
292 : INTEGER :: numnodes, ngroup, max_threads, cache_idx
293 182690 : INTEGER(kind=omp_lock_kind), ALLOCATABLE, DIMENSION(:) :: locks
294 182690 : TYPE(dbt_buffer_type), ALLOCATABLE, DIMENSION(:) :: buffer_recv, buffer_send
295 182690 : INTEGER, ALLOCATABLE, DIMENSION(:) :: num_blocks_recv, num_blocks_send, &
296 182690 : num_entries_recv, num_entries_send, &
297 182690 : num_rec, num_send
298 182690 : TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:, :) :: req_array
299 182690 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: blks_to_allocate
300 : INTEGER, DIMENSION(2) :: blk_size
301 : INTEGER, DIMENSION(2) :: blk_index
302 : INTEGER(KIND=int_8), DIMENSION(2) :: blk_index_i8
303 : TYPE(dbm_iterator) :: iter
304 : INTEGER :: i, iproc, bcount, nblk
305 182690 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: iprocs
306 : LOGICAL :: nodata_prv, move_prv
307 182690 : INTEGER(KIND=int_8), ALLOCATABLE, DIMENSION(:, :) :: index_recv
308 : INTEGER :: ndata
309 182690 : TYPE(mp_cart_type) :: mp_comm
310 :
311 182690 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: block
312 :
313 : CHARACTER(LEN=*), PARAMETER :: routineN = 'dbt_tas_replicate'
314 :
315 : INTEGER :: handle, handle2
316 :
317 182690 : NULLIFY (col_blk_size, row_blk_size)
318 :
319 182690 : CALL timeset(routineN, handle)
320 :
321 182690 : IF (PRESENT(nodata)) THEN
322 56720 : nodata_prv = nodata
323 : ELSE
324 : nodata_prv = .FALSE.
325 : END IF
326 :
327 182690 : IF (PRESENT(move_data)) THEN
328 125970 : move_prv = move_data
329 : ELSE
330 : move_prv = .FALSE.
331 : END IF
332 :
333 182690 : row_blk_size => dbm_get_row_block_sizes(matrix_in)
334 182690 : col_blk_size => dbm_get_col_block_sizes(matrix_in)
335 182690 : nblkrows = SIZE(row_blk_size)
336 182690 : nblkcols = SIZE(col_blk_size)
337 182690 : dbm_dist = dbm_get_distribution(matrix_in)
338 182690 : row_dist => dbm_distribution_row_dist(dbm_dist)
339 182690 : col_dist => dbm_distribution_col_dist(dbm_dist)
340 :
341 182690 : mp_comm = info%mp_comm
342 182690 : ngroup = info%ngroup
343 :
344 182690 : numnodes = mp_comm%num_pe
345 548070 : pdims = mp_comm%num_pe_cart
346 :
347 309548 : SELECT CASE (info%split_rowcol)
348 : CASE (rowsplit)
349 126858 : repl_dist = dbt_tas_dist_repl(row_dist, pdims(1), nblkrows, info%ngroup, info%pgrid_split_size)
350 126858 : dir_dist = dbt_tas_dist_arb(col_dist, pdims(2), INT(nblkcols, KIND=int_8))
351 126858 : repl_blksize = dbt_tas_blk_size_repl(row_blk_size, info%ngroup)
352 126858 : dir_blksize = dbt_tas_blk_size_arb(col_blk_size)
353 126858 : ALLOCATE (row_dist_obj, source=repl_dist)
354 126858 : ALLOCATE (col_dist_obj, source=dir_dist)
355 126858 : ALLOCATE (row_bsize_obj, source=repl_blksize)
356 253716 : ALLOCATE (col_bsize_obj, source=dir_blksize)
357 : CASE (colsplit)
358 55832 : dir_dist = dbt_tas_dist_arb(row_dist, pdims(1), INT(nblkrows, KIND=int_8))
359 55832 : repl_dist = dbt_tas_dist_repl(col_dist, pdims(2), nblkcols, info%ngroup, info%pgrid_split_size)
360 55832 : dir_blksize = dbt_tas_blk_size_arb(row_blk_size)
361 55832 : repl_blksize = dbt_tas_blk_size_repl(col_blk_size, info%ngroup)
362 55832 : ALLOCATE (row_dist_obj, source=dir_dist)
363 55832 : ALLOCATE (col_dist_obj, source=repl_dist)
364 55832 : ALLOCATE (row_bsize_obj, source=dir_blksize)
365 857618 : ALLOCATE (col_bsize_obj, source=repl_blksize)
366 : END SELECT
367 :
368 182690 : CALL dbt_tas_distribution_new(dist, mp_comm, row_dist_obj, col_dist_obj, split_info=info)
369 : CALL dbt_tas_create(matrix_out, TRIM(dbm_get_name(matrix_in))//" replicated", &
370 182690 : dist, row_bsize_obj, col_bsize_obj, own_dist=.TRUE.)
371 :
372 182690 : IF (nodata_prv) THEN
373 56720 : CALL dbt_tas_finalize(matrix_out)
374 56720 : CALL timestop(handle)
375 56720 : RETURN
376 : END IF
377 :
378 582188 : ALLOCATE (buffer_send(0:numnodes - 1))
379 456218 : ALLOCATE (buffer_recv(0:numnodes - 1))
380 377910 : ALLOCATE (num_blocks_recv(0:numnodes - 1))
381 251940 : ALLOCATE (num_blocks_send(0:numnodes - 1))
382 251940 : ALLOCATE (num_entries_recv(0:numnodes - 1))
383 251940 : ALLOCATE (num_entries_send(0:numnodes - 1))
384 377910 : ALLOCATE (num_rec(0:2*numnodes - 1))
385 660496 : ALLOCATE (num_send(0:2*numnodes - 1), SOURCE=0)
386 1698902 : ALLOCATE (req_array(1:numnodes, 4))
387 251940 : ALLOCATE (locks(0:numnodes - 1))
388 125970 : max_threads = 1
389 125970 : !$ max_threads = omp_get_max_threads()
390 503880 : ALLOCATE (iprocs(ngroup, max_threads))
391 330248 : DO iproc = 0, numnodes - 1
392 330248 : CALL omp_init_lock(locks(iproc))
393 : END DO
394 :
395 : !$OMP PARALLEL DEFAULT(NONE) SHARED(matrix_in,matrix_out,num_send,ngroup,iprocs,locks) &
396 125970 : !$OMP PRIVATE(iter,blk_index,blk_size,cache_idx,i,iproc)
397 : cache_idx = omp_get_thread_num() + 1
398 : CALL dbm_iterator_start(iter, matrix_in)
399 : DO WHILE (dbm_iterator_blocks_left(iter))
400 : CALL dbm_iterator_next_block(iter, blk_index(1), blk_index(2), &
401 : row_size=blk_size(1), col_size=blk_size(2))
402 : CALL dbt_repl_get_stored_coordinates(matrix_out, blk_index(1), blk_index(2), &
403 : iprocs(:, cache_idx))
404 : DO i = 1, ngroup
405 : iproc = iprocs(i, cache_idx)
406 : CALL omp_set_lock(locks(iproc))
407 : num_send(2*iproc) = num_send(2*iproc) + PRODUCT(blk_size)
408 : num_send(2*iproc + 1) = num_send(2*iproc + 1) + 1
409 : CALL omp_unset_lock(locks(iproc))
410 : END DO
411 : END DO
412 : CALL dbm_iterator_stop(iter)
413 : !$OMP END PARALLEL
414 :
415 125970 : CALL timeset(routineN//"_alltoall", handle2)
416 125970 : CALL mp_comm%alltoall(num_send, num_rec, 2)
417 125970 : CALL timestop(handle2)
418 :
419 330248 : DO iproc = 0, numnodes - 1
420 204278 : num_entries_recv(iproc) = num_rec(2*iproc)
421 204278 : num_blocks_recv(iproc) = num_rec(2*iproc + 1)
422 204278 : num_entries_send(iproc) = num_send(2*iproc)
423 204278 : num_blocks_send(iproc) = num_send(2*iproc + 1)
424 :
425 204278 : CALL dbt_buffer_create(buffer_send(iproc), num_blocks_send(iproc), num_entries_send(iproc))
426 330248 : CALL dbt_buffer_create(buffer_recv(iproc), num_blocks_recv(iproc), num_entries_recv(iproc))
427 : END DO
428 :
429 : !$OMP PARALLEL DEFAULT(NONE) SHARED(matrix_in,matrix_out,buffer_send,locks,ngroup,iprocs) &
430 125970 : !$OMP PRIVATE(iter,blk_index,blk_size,block,cache_idx,i,iproc)
431 : cache_idx = omp_get_thread_num() + 1
432 : CALL dbm_iterator_start(iter, matrix_in)
433 : DO WHILE (dbm_iterator_blocks_left(iter))
434 : CALL dbm_iterator_next_block(iter, blk_index(1), blk_index(2), block, &
435 : row_size=blk_size(1), col_size=blk_size(2))
436 : CALL dbt_repl_get_stored_coordinates(matrix_out, blk_index(1), blk_index(2), &
437 : iprocs(:, cache_idx))
438 : DO i = 1, ngroup
439 : iproc = iprocs(i, cache_idx)
440 : CALL omp_set_lock(locks(iproc))
441 : CALL dbt_buffer_add_block(buffer_send(iproc), INT(blk_index, KIND=int_8), block)
442 : CALL omp_unset_lock(locks(iproc))
443 : END DO
444 : END DO
445 : CALL dbm_iterator_stop(iter)
446 : !$OMP END PARALLEL
447 :
448 125970 : DEALLOCATE (iprocs)
449 :
450 125970 : IF (move_prv) CALL dbm_clear(matrix_in)
451 :
452 125970 : CALL timeset(routineN//"_communicate_buffer", handle2)
453 125970 : CALL dbt_tas_communicate_buffer(mp_comm, buffer_recv, buffer_send, req_array)
454 :
455 330248 : DO iproc = 0, numnodes - 1
456 204278 : CALL dbt_buffer_destroy(buffer_send(iproc))
457 330248 : CALL omp_destroy_lock(locks(iproc))
458 : END DO
459 125970 : DEALLOCATE (locks)
460 :
461 125970 : CALL timestop(handle2)
462 :
463 : ! Parallel unpack of received blocks.
464 330248 : nblk = SUM(num_blocks_recv)
465 377528 : ALLOCATE (blks_to_allocate(nblk, 2))
466 :
467 125970 : bcount = 0
468 330248 : DO iproc = 0, numnodes - 1
469 204278 : CALL dbt_buffer_get_index(buffer_recv(iproc), index_recv)
470 4796132 : blks_to_allocate(bcount + 1:bcount + SIZE(index_recv, 1), :) = INT(index_recv(:, :))
471 204278 : bcount = bcount + SIZE(index_recv, 1)
472 534526 : DEALLOCATE (index_recv)
473 : END DO
474 :
475 : !TODO: Parallelize creation of block list.
476 125970 : !$OMP PARALLEL DEFAULT(NONE) SHARED(matrix_out,nblk,blks_to_allocate) PRIVATE(nblk_per_thread,A,b)
477 : nblk_per_thread = nblk/omp_get_num_threads() + 1
478 : a = omp_get_thread_num()*nblk_per_thread + 1
479 : b = MIN(a + nblk_per_thread, nblk)
480 : CALL dbm_reserve_blocks(matrix_out%matrix, blks_to_allocate(a:b, 1), blks_to_allocate(a:b, 2))
481 : !$OMP END PARALLEL
482 125970 : DEALLOCATE (blks_to_allocate)
483 :
484 : !$OMP PARALLEL DEFAULT(NONE) SHARED(buffer_recv,matrix_out,numnodes) &
485 125970 : !$OMP PRIVATE(iproc,ndata,blk_index_i8,blk_size,block)
486 : !$OMP DO SCHEDULE(DYNAMIC)
487 : DO iproc = 0, numnodes - 1
488 : ! First, we need to get the index to create block
489 : DO WHILE (dbt_buffer_blocks_left(buffer_recv(iproc)))
490 : CALL dbt_buffer_get_next_block(buffer_recv(iproc), ndata, blk_index_i8)
491 : CALL dbt_tas_blk_sizes(matrix_out, blk_index_i8(1), blk_index_i8(2), blk_size(1), blk_size(2))
492 : ALLOCATE (block(blk_size(1), blk_size(2)))
493 : CALL dbt_buffer_get_next_block(buffer_recv(iproc), ndata, blk_index_i8, block)
494 : CALL dbm_put_block(matrix_out%matrix, INT(blk_index_i8(1)), INT(blk_index_i8(2)), block)
495 : DEALLOCATE (block)
496 : END DO
497 :
498 : CALL dbt_buffer_destroy(buffer_recv(iproc))
499 : END DO
500 : !$OMP END DO
501 : !$OMP END PARALLEL
502 :
503 125970 : CALL dbt_tas_finalize(matrix_out)
504 :
505 125970 : CALL timestop(handle)
506 :
507 2122016 : END SUBROUTINE dbt_tas_replicate
508 :
509 : ! **************************************************************************************************
510 : !> \brief Merge submatrices of matrix_in to matrix_out by sum
511 : !> \param matrix_out ...
512 : !> \param matrix_in ...
513 : !> \param summation ...
514 : !> \param move_data memory optimization: move data to matrix_out such that matrix_in is empty on return
515 : !> \author Patrick Seewald
516 : ! **************************************************************************************************
517 56720 : SUBROUTINE dbt_tas_merge(matrix_out, matrix_in, summation, move_data)
518 : TYPE(dbm_type), INTENT(INOUT) :: matrix_out
519 : TYPE(dbt_tas_type), INTENT(INOUT) :: matrix_in
520 : LOGICAL, INTENT(IN), OPTIONAL :: summation, move_data
521 :
522 : CHARACTER(LEN=*), PARAMETER :: routineN = 'dbt_tas_merge'
523 :
524 : INTEGER :: a, b, bcount, handle, handle2, iproc, &
525 : nblk, nblk_per_thread, ndata, numnodes
526 56720 : INTEGER(KIND=int_8), ALLOCATABLE, DIMENSION(:, :) :: index_recv
527 : INTEGER(KIND=int_8), DIMENSION(2) :: blk_index_i8
528 : INTEGER(kind=omp_lock_kind), ALLOCATABLE, &
529 56720 : DIMENSION(:) :: locks
530 56720 : INTEGER, ALLOCATABLE, DIMENSION(:) :: num_blocks_recv, num_blocks_send, &
531 56720 : num_entries_recv, num_entries_send, &
532 56720 : num_rec, num_send
533 56720 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: blks_to_allocate
534 : INTEGER, DIMENSION(2) :: blk_index, blk_size
535 113440 : INTEGER, DIMENSION(:), POINTER :: col_block_sizes, row_block_sizes
536 : LOGICAL :: move_prv
537 56720 : REAL(dp), DIMENSION(:, :), POINTER :: block
538 : TYPE(dbm_iterator) :: iter
539 56720 : TYPE(dbt_buffer_type), ALLOCATABLE, DIMENSION(:) :: buffer_recv, buffer_send
540 340320 : TYPE(dbt_tas_split_info) :: info
541 56720 : TYPE(mp_cart_type) :: mp_comm
542 : TYPE(mp_request_type), ALLOCATABLE, &
543 56720 : DIMENSION(:, :) :: req_array
544 :
545 56720 : CALL timeset(routineN, handle)
546 :
547 56720 : IF (PRESENT(summation)) THEN
548 0 : IF (.NOT. summation) CALL dbm_clear(matrix_out)
549 : ELSE
550 56720 : CALL dbm_clear(matrix_out)
551 : END IF
552 :
553 56720 : IF (PRESENT(move_data)) THEN
554 56720 : move_prv = move_data
555 : ELSE
556 : move_prv = .FALSE.
557 : END IF
558 :
559 56720 : info = dbt_tas_info(matrix_in)
560 56720 : CALL dbt_tas_get_split_info(info, mp_comm=mp_comm)
561 56720 : numnodes = mp_comm%num_pe
562 :
563 269888 : ALLOCATE (buffer_send(0:numnodes - 1))
564 213168 : ALLOCATE (buffer_recv(0:numnodes - 1))
565 170160 : ALLOCATE (num_blocks_recv(0:numnodes - 1))
566 113440 : ALLOCATE (num_blocks_send(0:numnodes - 1))
567 113440 : ALLOCATE (num_entries_recv(0:numnodes - 1))
568 113440 : ALLOCATE (num_entries_send(0:numnodes - 1))
569 170160 : ALLOCATE (num_rec(0:2*numnodes - 1))
570 312896 : ALLOCATE (num_send(0:2*numnodes - 1), SOURCE=0)
571 795952 : ALLOCATE (req_array(1:numnodes, 4))
572 113440 : ALLOCATE (locks(0:numnodes - 1))
573 156448 : DO iproc = 0, numnodes - 1
574 156448 : CALL omp_init_lock(locks(iproc))
575 : END DO
576 :
577 : !$OMP PARALLEL DEFAULT(NONE) SHARED(matrix_in,matrix_out,num_send,locks) &
578 56720 : !$OMP PRIVATE(iter,blk_index,blk_size,iproc)
579 : CALL dbm_iterator_start(iter, matrix_in%matrix)
580 : DO WHILE (dbm_iterator_blocks_left(iter))
581 : CALL dbm_iterator_next_block(iter, blk_index(1), blk_index(2), &
582 : row_size=blk_size(1), col_size=blk_size(2))
583 : CALL dbm_get_stored_coordinates(matrix_out, blk_index(1), blk_index(2), iproc)
584 : CALL omp_set_lock(locks(iproc))
585 : num_send(2*iproc) = num_send(2*iproc) + PRODUCT(blk_size)
586 : num_send(2*iproc + 1) = num_send(2*iproc + 1) + 1
587 : CALL omp_unset_lock(locks(iproc))
588 : END DO
589 : CALL dbm_iterator_stop(iter)
590 : !$OMP END PARALLEL
591 :
592 56720 : CALL timeset(routineN//"_alltoall", handle2)
593 56720 : CALL mp_comm%alltoall(num_send, num_rec, 2)
594 56720 : CALL timestop(handle2)
595 :
596 156448 : DO iproc = 0, numnodes - 1
597 99728 : num_entries_recv(iproc) = num_rec(2*iproc)
598 99728 : num_blocks_recv(iproc) = num_rec(2*iproc + 1)
599 99728 : num_entries_send(iproc) = num_send(2*iproc)
600 99728 : num_blocks_send(iproc) = num_send(2*iproc + 1)
601 :
602 99728 : CALL dbt_buffer_create(buffer_send(iproc), num_blocks_send(iproc), num_entries_send(iproc))
603 156448 : CALL dbt_buffer_create(buffer_recv(iproc), num_blocks_recv(iproc), num_entries_recv(iproc))
604 : END DO
605 :
606 : !$OMP PARALLEL DEFAULT(NONE) SHARED(matrix_in,matrix_out,buffer_send,locks) &
607 56720 : !$OMP PRIVATE(iter,blk_index,blk_size,block,iproc)
608 : CALL dbm_iterator_start(iter, matrix_in%matrix)
609 : DO WHILE (dbm_iterator_blocks_left(iter))
610 : CALL dbm_iterator_next_block(iter, blk_index(1), blk_index(2), block, &
611 : row_size=blk_size(1), col_size=blk_size(2))
612 : CALL dbm_get_stored_coordinates(matrix_out, blk_index(1), blk_index(2), iproc)
613 : CALL omp_set_lock(locks(iproc))
614 : CALL dbt_buffer_add_block(buffer_send(iproc), INT(blk_index, KIND=int_8), block)
615 : CALL omp_unset_lock(locks(iproc))
616 : END DO
617 : CALL dbm_iterator_stop(iter)
618 : !$OMP END PARALLEL
619 :
620 56720 : IF (move_prv) CALL dbt_tas_clear(matrix_in)
621 :
622 56720 : CALL timeset(routineN//"_communicate_buffer", handle2)
623 56720 : CALL dbt_tas_communicate_buffer(mp_comm, buffer_recv, buffer_send, req_array)
624 :
625 156448 : DO iproc = 0, numnodes - 1
626 99728 : CALL dbt_buffer_destroy(buffer_send(iproc))
627 156448 : CALL omp_destroy_lock(locks(iproc))
628 : END DO
629 56720 : DEALLOCATE (locks)
630 :
631 56720 : CALL timestop(handle2)
632 :
633 : ! Parallel unpack of received blocks.
634 156448 : nblk = SUM(num_blocks_recv)
635 161177 : ALLOCATE (blks_to_allocate(nblk, 2))
636 :
637 56720 : bcount = 0
638 156448 : DO iproc = 0, numnodes - 1
639 99728 : CALL dbt_buffer_get_index(buffer_recv(iproc), index_recv)
640 2252752 : blks_to_allocate(bcount + 1:bcount + SIZE(index_recv, 1), :) = INT(index_recv(:, :))
641 99728 : bcount = bcount + SIZE(index_recv, 1)
642 256176 : DEALLOCATE (index_recv)
643 : END DO
644 :
645 : !TODO: Parallelize creation of block list.
646 56720 : !$OMP PARALLEL DEFAULT(NONE) SHARED(matrix_out,nblk,blks_to_allocate) PRIVATE(nblk_per_thread,A,b)
647 : nblk_per_thread = nblk/omp_get_num_threads() + 1
648 : a = omp_get_thread_num()*nblk_per_thread + 1
649 : b = MIN(a + nblk_per_thread, nblk)
650 : CALL dbm_reserve_blocks(matrix_out, blks_to_allocate(a:b, 1), blks_to_allocate(a:b, 2))
651 : !$OMP END PARALLEL
652 56720 : DEALLOCATE (blks_to_allocate)
653 :
654 56720 : row_block_sizes => dbm_get_row_block_sizes(matrix_out)
655 56720 : col_block_sizes => dbm_get_col_block_sizes(matrix_out)
656 :
657 : !$OMP PARALLEL DEFAULT(NONE) SHARED(buffer_recv,matrix_out,numnodes,row_block_sizes,col_block_sizes) &
658 56720 : !$OMP PRIVATE(iproc,ndata,blk_index_i8,blk_size,block)
659 : !$OMP DO SCHEDULE(DYNAMIC)
660 : DO iproc = 0, numnodes - 1
661 : ! First, we need to get the index to create block
662 : DO WHILE (dbt_buffer_blocks_left(buffer_recv(iproc)))
663 : CALL dbt_buffer_get_next_block(buffer_recv(iproc), ndata, blk_index_i8)
664 : blk_size(1) = row_block_sizes(INT(blk_index_i8(1)))
665 : blk_size(2) = col_block_sizes(INT(blk_index_i8(2)))
666 : ALLOCATE (block(blk_size(1), blk_size(2)))
667 : CALL dbt_buffer_get_next_block(buffer_recv(iproc), ndata, blk_index_i8, block)
668 : CALL dbm_put_block(matrix_out, INT(blk_index_i8(1)), INT(blk_index_i8(2)), block, summation=.TRUE.)
669 : DEALLOCATE (block)
670 : END DO
671 : CALL dbt_buffer_destroy(buffer_recv(iproc))
672 : END DO
673 : !$OMP END DO
674 : !$OMP END PARALLEL
675 :
676 56720 : CALL dbm_finalize(matrix_out)
677 :
678 56720 : CALL timestop(handle)
679 483056 : END SUBROUTINE dbt_tas_merge
680 :
681 : ! **************************************************************************************************
682 : !> \brief get all indices from buffer
683 : !> \param buffer ...
684 : !> \param index ...
685 : !> \author Patrick Seewald
686 : ! **************************************************************************************************
687 625508 : SUBROUTINE dbt_buffer_get_index(buffer, index)
688 : TYPE(dbt_buffer_type), INTENT(IN) :: buffer
689 : INTEGER(KIND=int_8), ALLOCATABLE, &
690 : DIMENSION(:, :), INTENT(OUT) :: index
691 :
692 : CHARACTER(LEN=*), PARAMETER :: routineN = 'dbt_buffer_get_index'
693 :
694 : INTEGER :: handle
695 : INTEGER, DIMENSION(2) :: indx_shape
696 :
697 625508 : CALL timeset(routineN, handle)
698 :
699 3753048 : indx_shape = SHAPE(buffer%indx) - [0, 1]
700 2328827 : ALLOCATE (INDEX(indx_shape(1), indx_shape(2)))
701 13022262 : INDEX(:, :) = buffer%indx(1:indx_shape(1), 1:indx_shape(2))
702 625508 : CALL timestop(handle)
703 625508 : END SUBROUTINE dbt_buffer_get_index
704 :
705 : ! **************************************************************************************************
706 : !> \brief how many blocks left in iterator
707 : !> \param buffer ...
708 : !> \return ...
709 : !> \author Patrick Seewald
710 : ! **************************************************************************************************
711 6198377 : PURE FUNCTION dbt_buffer_blocks_left(buffer)
712 : TYPE(dbt_buffer_type), INTENT(IN) :: buffer
713 : LOGICAL :: dbt_buffer_blocks_left
714 :
715 6198377 : dbt_buffer_blocks_left = buffer%endpos < buffer%nblock
716 6198377 : END FUNCTION dbt_buffer_blocks_left
717 :
718 : ! **************************************************************************************************
719 : !> \brief Create block buffer for MPI communication.
720 : !> \param buffer block buffer
721 : !> \param nblock number of blocks
722 : !> \param ndata total number of block entries
723 : !> \author Patrick Seewald
724 : ! **************************************************************************************************
725 1251016 : SUBROUTINE dbt_buffer_create(buffer, nblock, ndata)
726 : TYPE(dbt_buffer_type), INTENT(OUT) :: buffer
727 : INTEGER, INTENT(IN) :: nblock, ndata
728 :
729 1251016 : buffer%nblock = nblock
730 1251016 : buffer%endpos = 0
731 3406638 : ALLOCATE (buffer%msg(ndata))
732 3406638 : ALLOCATE (buffer%indx(nblock, 3))
733 1251016 : END SUBROUTINE dbt_buffer_create
734 :
735 : ! **************************************************************************************************
736 : !> \brief ...
737 : !> \param buffer ...
738 : !> \author Patrick Seewald
739 : ! **************************************************************************************************
740 1251016 : SUBROUTINE dbt_buffer_destroy(buffer)
741 : TYPE(dbt_buffer_type), INTENT(INOUT) :: buffer
742 :
743 1251016 : DEALLOCATE (buffer%msg)
744 1251016 : DEALLOCATE (buffer%indx)
745 1251016 : buffer%nblock = -1
746 1251016 : buffer%endpos = -1
747 1251016 : END SUBROUTINE dbt_buffer_destroy
748 :
749 : ! **************************************************************************************************
750 : !> \brief insert a block into block buffer (at current iterator position)
751 : !> \param buffer ...
752 : !> \param index index of block
753 : !> \param block ...
754 : !> \param transposed ...
755 : !> \author Patrick Seewald
756 : ! **************************************************************************************************
757 5572869 : SUBROUTINE dbt_buffer_add_block(buffer, index, block, transposed)
758 : TYPE(dbt_buffer_type), INTENT(INOUT) :: buffer
759 : INTEGER(KIND=int_8), DIMENSION(2), INTENT(IN) :: index
760 : REAL(dp), DIMENSION(:, :), INTENT(IN) :: block
761 : LOGICAL, INTENT(IN), OPTIONAL :: transposed
762 :
763 : INTEGER :: ndata, p, p_data
764 : INTEGER(KIND=int_8), DIMENSION(2) :: index_prv
765 : LOGICAL :: tr
766 :
767 5572869 : IF (PRESENT(transposed)) THEN
768 2504436 : tr = transposed
769 : ELSE
770 : tr = .FALSE.
771 : END IF
772 :
773 5572869 : index_prv(:) = INDEX(:)
774 5572869 : IF (tr) THEN
775 831679 : CALL swap(index_prv)
776 : END IF
777 16718607 : ndata = PRODUCT(SHAPE(block))
778 :
779 5572869 : p = buffer%endpos
780 5572869 : IF (p == 0) THEN
781 : p_data = 0
782 : ELSE
783 5120566 : p_data = INT(buffer%indx(p, 3))
784 : END IF
785 :
786 5572869 : IF (tr) THEN
787 90131029 : buffer%msg(p_data + 1:p_data + ndata) = RESHAPE(TRANSPOSE(block), [ndata])
788 : ELSE
789 731386557 : buffer%msg(p_data + 1:p_data + ndata) = RESHAPE(block, [ndata])
790 : END IF
791 :
792 16718607 : buffer%indx(p + 1, 1:2) = index_prv(:)
793 5572869 : IF (p > 0) THEN
794 5120566 : buffer%indx(p + 1, 3) = buffer%indx(p, 3) + INT(ndata, KIND=int_8)
795 : ELSE
796 452303 : buffer%indx(p + 1, 3) = INT(ndata, KIND=int_8)
797 : END IF
798 5572869 : buffer%endpos = buffer%endpos + 1
799 5572869 : END SUBROUTINE dbt_buffer_add_block
800 :
801 : ! **************************************************************************************************
802 : !> \brief get next block from buffer. Iterator is advanced only if block is retrieved or advance_iter.
803 : !> \param buffer ...
804 : !> \param ndata ...
805 : !> \param index ...
806 : !> \param block ...
807 : !> \param advance_iter ...
808 : !> \author Patrick Seewald
809 : ! **************************************************************************************************
810 11145738 : SUBROUTINE dbt_buffer_get_next_block(buffer, ndata, index, block, advance_iter)
811 : TYPE(dbt_buffer_type), INTENT(INOUT) :: buffer
812 : INTEGER, INTENT(OUT) :: ndata
813 : INTEGER(KIND=int_8), DIMENSION(2), INTENT(OUT) :: index
814 : REAL(dp), DIMENSION(:, :), INTENT(OUT), OPTIONAL :: block
815 : LOGICAL, INTENT(IN), OPTIONAL :: advance_iter
816 :
817 : INTEGER :: p, p_data
818 : LOGICAL :: do_advance
819 :
820 11145738 : do_advance = .FALSE.
821 11145738 : IF (PRESENT(advance_iter)) THEN
822 0 : do_advance = advance_iter
823 11145738 : ELSE IF (PRESENT(block)) THEN
824 5572869 : do_advance = .TRUE.
825 : END IF
826 :
827 11145738 : p = buffer%endpos
828 11145738 : IF (p == 0) THEN
829 : p_data = 0
830 : ELSE
831 10241132 : p_data = INT(buffer%indx(p, 3))
832 : END IF
833 :
834 10241132 : IF (p > 0) THEN
835 10241132 : ndata = INT(buffer%indx(p + 1, 3) - buffer%indx(p, 3))
836 : ELSE
837 904606 : ndata = INT(buffer%indx(p + 1, 3))
838 : END IF
839 33437214 : INDEX(:) = buffer%indx(p + 1, 1:2)
840 :
841 11145738 : IF (PRESENT(block)) THEN
842 16718607 : block(:, :) = RESHAPE(buffer%msg(p_data + 1:p_data + ndata), SHAPE(block))
843 : END IF
844 :
845 11145738 : IF (do_advance) buffer%endpos = buffer%endpos + 1
846 11145738 : END SUBROUTINE dbt_buffer_get_next_block
847 :
848 : ! **************************************************************************************************
849 : !> \brief communicate buffer
850 : !> \param mp_comm ...
851 : !> \param buffer_recv ...
852 : !> \param buffer_send ...
853 : !> \param req_array ...
854 : !> \author Patrick Seewald
855 : ! **************************************************************************************************
856 4402972 : SUBROUTINE dbt_tas_communicate_buffer(mp_comm, buffer_recv, buffer_send, req_array)
857 : CLASS(mp_comm_type), INTENT(IN) :: mp_comm
858 : TYPE(dbt_buffer_type), DIMENSION(0:), &
859 : INTENT(INOUT) :: buffer_recv, buffer_send
860 : TYPE(mp_request_type), DIMENSION(:, :), &
861 : INTENT(OUT) :: req_array
862 :
863 : CHARACTER(LEN=*), PARAMETER :: routineN = 'dbt_tas_communicate_buffer'
864 :
865 : INTEGER :: handle, iproc, numnodes, &
866 : rec_counter, send_counter
867 :
868 380188 : CALL timeset(routineN, handle)
869 380188 : numnodes = mp_comm%num_pe
870 :
871 380188 : IF (numnodes > 1) THEN
872 :
873 245320 : send_counter = 0
874 245320 : rec_counter = 0
875 :
876 735960 : DO iproc = 0, numnodes - 1
877 735960 : IF (buffer_recv(iproc)%nblock > 0) THEN
878 335443 : rec_counter = rec_counter + 1
879 335443 : CALL mp_comm%irecv(buffer_recv(iproc)%indx, iproc, req_array(rec_counter, 3), tag=4)
880 335443 : CALL mp_comm%irecv(buffer_recv(iproc)%msg, iproc, req_array(rec_counter, 4), tag=7)
881 : END IF
882 : END DO
883 :
884 735960 : DO iproc = 0, numnodes - 1
885 735960 : IF (buffer_send(iproc)%nblock > 0) THEN
886 335443 : send_counter = send_counter + 1
887 335443 : CALL mp_comm%isend(buffer_send(iproc)%indx, iproc, req_array(send_counter, 1), tag=4)
888 335443 : CALL mp_comm%isend(buffer_send(iproc)%msg, iproc, req_array(send_counter, 2), tag=7)
889 : END IF
890 : END DO
891 :
892 245320 : IF (send_counter > 0) THEN
893 213048 : CALL mp_waitall(req_array(1:send_counter, 1:2))
894 : END IF
895 245320 : IF (rec_counter > 0) THEN
896 233238 : CALL mp_waitall(req_array(1:rec_counter, 3:4))
897 : END IF
898 :
899 : ELSE
900 134868 : IF (buffer_recv(0)%nblock > 0) THEN
901 5166172 : buffer_recv(0)%indx(:, :) = buffer_send(0)%indx(:, :)
902 463201493 : buffer_recv(0)%msg(:) = buffer_send(0)%msg(:)
903 : END IF
904 : END IF
905 380188 : CALL timestop(handle)
906 380188 : END SUBROUTINE dbt_tas_communicate_buffer
907 :
908 254218 : END MODULE dbt_tas_reshape_ops
|