LCOV - code coverage report
Current view: top level - src/dbt/tas - dbt_tas_reshape_ops.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:ccc2433) Lines: 355 359 98.9 %
Date: 2024-04-25 07:09:54 Functions: 10 12 83.3 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief communication routines to reshape / replicate / merge tall-and-skinny matrices.
      10             : !> \author Patrick Seewald
      11             : ! **************************************************************************************************
      12             : MODULE dbt_tas_reshape_ops
      13             :    USE OMP_LIB,                         ONLY: omp_get_num_threads,&
      14             :                                               omp_get_thread_num,&
      15             :                                               omp_init_lock,&
      16             :                                               omp_lock_kind,&
      17             :                                               omp_set_lock,&
      18             :                                               omp_unset_lock
      19             :    USE dbm_api,                         ONLY: &
      20             :         dbm_clear, dbm_distribution_col_dist, dbm_distribution_obj, dbm_distribution_row_dist, &
      21             :         dbm_finalize, dbm_get_col_block_sizes, dbm_get_distribution, dbm_get_name, &
      22             :         dbm_get_row_block_sizes, dbm_get_stored_coordinates, dbm_iterator, &
      23             :         dbm_iterator_blocks_left, dbm_iterator_next_block, dbm_iterator_start, dbm_iterator_stop, &
      24             :         dbm_put_block, dbm_reserve_blocks, dbm_type
      25             :    USE dbt_tas_base,                    ONLY: &
      26             :         dbt_repl_get_stored_coordinates, dbt_tas_blk_sizes, dbt_tas_clear, dbt_tas_create, &
      27             :         dbt_tas_distribution_new, dbt_tas_finalize, dbt_tas_get_stored_coordinates, dbt_tas_info, &
      28             :         dbt_tas_iterator_blocks_left, dbt_tas_iterator_next_block, dbt_tas_iterator_start, &
      29             :         dbt_tas_iterator_stop, dbt_tas_put_block, dbt_tas_reserve_blocks
      30             :    USE dbt_tas_global,                  ONLY: dbt_tas_blk_size_arb,&
      31             :                                               dbt_tas_blk_size_repl,&
      32             :                                               dbt_tas_dist_arb,&
      33             :                                               dbt_tas_dist_repl,&
      34             :                                               dbt_tas_distribution,&
      35             :                                               dbt_tas_rowcol_data
      36             :    USE dbt_tas_split,                   ONLY: colsplit,&
      37             :                                               dbt_tas_get_split_info,&
      38             :                                               rowsplit
      39             :    USE dbt_tas_types,                   ONLY: dbt_tas_distribution_type,&
      40             :                                               dbt_tas_iterator,&
      41             :                                               dbt_tas_split_info,&
      42             :                                               dbt_tas_type
      43             :    USE dbt_tas_util,                    ONLY: swap
      44             :    USE kinds,                           ONLY: dp,&
      45             :                                               int_8
      46             :    USE message_passing,                 ONLY: mp_cart_type,&
      47             :                                               mp_comm_type,&
      48             :                                               mp_request_type,&
      49             :                                               mp_waitall
      50             : #include "../../base/base_uses.f90"
      51             : 
      52             :    IMPLICIT NONE
      53             :    PRIVATE
      54             : 
      55             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'dbt_tas_reshape_ops'
      56             : 
      57             :    PUBLIC :: &
      58             :       dbt_tas_merge, &
      59             :       dbt_tas_replicate, &
      60             :       dbt_tas_reshape
      61             : 
      62             :    TYPE dbt_buffer_type
      63             :       INTEGER :: nblock = -1
      64             :       INTEGER(KIND=int_8), DIMENSION(:, :), ALLOCATABLE :: indx
      65             :       REAL(dp), DIMENSION(:), ALLOCATABLE :: msg
      66             :       INTEGER :: endpos = -1
      67             :    END TYPE
      68             : 
      69             : CONTAINS
      70             : 
      71             : ! **************************************************************************************************
      72             : !> \brief copy data (involves reshape)
      73             : !> \param matrix_in ...
      74             : !> \param matrix_out ...
      75             : !> \param summation whether matrix_out = matrix_out + matrix_in
      76             : !> \param transposed ...
      77             : !> \param move_data memory optimization: move data to matrix_out such that matrix_in is empty on return
      78             : !> \author Patrick Seewald
      79             : ! **************************************************************************************************
      80      110172 :    RECURSIVE SUBROUTINE dbt_tas_reshape(matrix_in, matrix_out, summation, transposed, move_data)
      81             :       TYPE(dbt_tas_type), INTENT(INOUT)                  :: matrix_in, matrix_out
      82             :       LOGICAL, INTENT(IN), OPTIONAL                      :: summation, transposed, move_data
      83             : 
      84             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'dbt_tas_reshape'
      85             : 
      86             :       INTEGER                                            :: a, b, bcount, handle, handle2, iproc, &
      87             :                                                             nblk, nblk_per_thread, ndata, numnodes
      88      110172 :       INTEGER(KIND=int_8), ALLOCATABLE, DIMENSION(:, :)  :: blks_to_allocate, index_recv
      89             :       INTEGER(KIND=int_8), DIMENSION(2)                  :: blk_index
      90             :       INTEGER(kind=omp_lock_kind), ALLOCATABLE, &
      91      110172 :          DIMENSION(:)                                    :: locks
      92      110172 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: num_blocks_recv, num_blocks_send, &
      93      110172 :                                                             num_entries_recv, num_entries_send, &
      94      110172 :                                                             num_rec, num_send
      95             :       INTEGER, DIMENSION(2)                              :: blk_size
      96             :       LOGICAL                                            :: move_prv, tr_in
      97      110172 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: block
      98      110172 :       TYPE(dbt_buffer_type), ALLOCATABLE, DIMENSION(:)   :: buffer_recv, buffer_send
      99             :       TYPE(dbt_tas_iterator)                             :: iter
     100      550860 :       TYPE(dbt_tas_split_info)                           :: info
     101             :       TYPE(mp_comm_type)                                 :: mp_comm
     102             :       TYPE(mp_request_type), ALLOCATABLE, &
     103      110172 :          DIMENSION(:, :)                                 :: req_array
     104             : 
     105      110172 :       CALL timeset(routineN, handle)
     106             : 
     107      110172 :       IF (PRESENT(summation)) THEN
     108       38048 :          IF (.NOT. summation) CALL dbm_clear(matrix_out%matrix)
     109             :       ELSE
     110       72124 :          CALL dbm_clear(matrix_out%matrix)
     111             :       END IF
     112             : 
     113      110172 :       IF (PRESENT(move_data)) THEN
     114      110172 :          move_prv = move_data
     115             :       ELSE
     116             :          move_prv = .FALSE.
     117             :       END IF
     118             : 
     119      110172 :       IF (PRESENT(transposed)) THEN
     120      110172 :          tr_in = transposed
     121             :       ELSE
     122           0 :          tr_in = .FALSE.
     123             :       END IF
     124             : 
     125      110172 :       IF (.NOT. matrix_out%valid) THEN
     126           0 :          CPABORT("can not reshape into invalid matrix")
     127             :       END IF
     128             : 
     129      110172 :       info = dbt_tas_info(matrix_in)
     130      110172 :       mp_comm = info%mp_comm
     131      110172 :       numnodes = mp_comm%num_pe
     132      523546 :       ALLOCATE (buffer_send(0:numnodes - 1))
     133      523546 :       ALLOCATE (buffer_recv(0:numnodes - 1))
     134      330516 :       ALLOCATE (num_blocks_recv(0:numnodes - 1))
     135      330516 :       ALLOCATE (num_blocks_send(0:numnodes - 1))
     136      330516 :       ALLOCATE (num_entries_recv(0:numnodes - 1))
     137      330516 :       ALLOCATE (num_entries_send(0:numnodes - 1))
     138      330516 :       ALLOCATE (num_rec(0:2*numnodes - 1))
     139      330516 :       ALLOCATE (num_send(0:2*numnodes - 1))
     140      496232 :       num_send(:) = 0
     141     1543324 :       ALLOCATE (req_array(1:numnodes, 4))
     142      330516 :       ALLOCATE (locks(0:numnodes - 1))
     143      303202 :       DO iproc = 0, numnodes - 1
     144      303202 :          CALL omp_init_lock(locks(iproc))
     145             :       END DO
     146             : 
     147      110172 :       CALL timeset(routineN//"_get_coord", handle2)
     148             : !$OMP PARALLEL DEFAULT(NONE) SHARED(matrix_in,matrix_out,tr_in,num_send) &
     149      110172 : !$OMP PRIVATE(iter,blk_index,blk_size,iproc)
     150             :       CALL dbt_tas_iterator_start(iter, matrix_in)
     151             :       DO WHILE (dbt_tas_iterator_blocks_left(iter))
     152             :          CALL dbt_tas_iterator_next_block(iter, blk_index(1), blk_index(2), &
     153             :                                           row_size=blk_size(1), col_size=blk_size(2))
     154             :          IF (tr_in) THEN
     155             :             CALL dbt_tas_get_stored_coordinates(matrix_out, blk_index(2), blk_index(1), iproc)
     156             :          ELSE
     157             :             CALL dbt_tas_get_stored_coordinates(matrix_out, blk_index(1), blk_index(2), iproc)
     158             :          END IF
     159             : !$OMP ATOMIC
     160             :          num_send(2*iproc) = num_send(2*iproc) + PRODUCT(blk_size)
     161             : !$OMP ATOMIC
     162             :          num_send(2*iproc + 1) = num_send(2*iproc + 1) + 1
     163             :       END DO
     164             :       CALL dbt_tas_iterator_stop(iter)
     165             : !$OMP END PARALLEL
     166      110172 :       CALL timestop(handle2)
     167             : 
     168      110172 :       CALL timeset(routineN//"_alltoall", handle2)
     169      110172 :       CALL mp_comm%alltoall(num_send, num_rec, 2)
     170      110172 :       CALL timestop(handle2)
     171             : 
     172      110172 :       CALL timeset(routineN//"_buffer_fill", handle2)
     173      303202 :       DO iproc = 0, numnodes - 1
     174      193030 :          num_entries_recv(iproc) = num_rec(2*iproc)
     175      193030 :          num_blocks_recv(iproc) = num_rec(2*iproc + 1)
     176      193030 :          num_entries_send(iproc) = num_send(2*iproc)
     177      193030 :          num_blocks_send(iproc) = num_send(2*iproc + 1)
     178             : 
     179      193030 :          CALL dbt_buffer_create(buffer_send(iproc), num_blocks_send(iproc), num_entries_send(iproc))
     180             : 
     181      303202 :          CALL dbt_buffer_create(buffer_recv(iproc), num_blocks_recv(iproc), num_entries_recv(iproc))
     182             : 
     183             :       END DO
     184             : 
     185             : !$OMP PARALLEL DEFAULT(NONE) SHARED(matrix_in,matrix_out,tr_in,buffer_send,locks) &
     186      110172 : !$OMP PRIVATE(iter,blk_index,blk_size,block,iproc)
     187             :       CALL dbt_tas_iterator_start(iter, matrix_in)
     188             :       DO WHILE (dbt_tas_iterator_blocks_left(iter))
     189             :          CALL dbt_tas_iterator_next_block(iter, blk_index(1), blk_index(2), block, &
     190             :                                           row_size=blk_size(1), col_size=blk_size(2))
     191             :          IF (tr_in) THEN
     192             :             CALL dbt_tas_get_stored_coordinates(matrix_out, blk_index(2), blk_index(1), iproc)
     193             :          ELSE
     194             :             CALL dbt_tas_get_stored_coordinates(matrix_out, blk_index(1), blk_index(2), iproc)
     195             :          END IF
     196             :          CALL omp_set_lock(locks(iproc))
     197             :          CALL dbt_buffer_add_block(buffer_send(iproc), blk_index, block, transposed=tr_in)
     198             :          CALL omp_unset_lock(locks(iproc))
     199             :       END DO
     200             :       CALL dbt_tas_iterator_stop(iter)
     201             : !$OMP END PARALLEL
     202             : 
     203      110172 :       IF (move_prv) CALL dbt_tas_clear(matrix_in)
     204             : 
     205      110172 :       CALL timestop(handle2)
     206             : 
     207      110172 :       CALL timeset(routineN//"_communicate_buffer", handle2)
     208      110172 :       CALL dbt_tas_communicate_buffer(mp_comm, buffer_recv, buffer_send, req_array)
     209             : 
     210      303202 :       DO iproc = 0, numnodes - 1
     211      303202 :          CALL dbt_buffer_destroy(buffer_send(iproc))
     212             :       END DO
     213             : 
     214      110172 :       CALL timestop(handle2)
     215             : 
     216      110172 :       CALL timeset(routineN//"_buffer_obtain", handle2)
     217             : 
     218             :       ! TODO Add OpenMP to the buffer unpacking.
     219      303202 :       nblk = SUM(num_blocks_recv)
     220      319047 :       ALLOCATE (blks_to_allocate(nblk, 2))
     221             : 
     222      110172 :       bcount = 0
     223      303202 :       DO iproc = 0, numnodes - 1
     224      193030 :          CALL dbt_buffer_get_index(buffer_recv(iproc), index_recv)
     225     4461072 :          blks_to_allocate(bcount + 1:bcount + SIZE(index_recv, 1), :) = index_recv(:, :)
     226      193030 :          bcount = bcount + SIZE(index_recv, 1)
     227      496232 :          DEALLOCATE (index_recv)
     228             :       END DO
     229             : 
     230             : !TODO: Parallelize creation of block list.
     231      110172 : !$OMP PARALLEL DEFAULT(NONE) SHARED(matrix_out,nblk,blks_to_allocate) PRIVATE(nblk_per_thread,A,b)
     232             :       nblk_per_thread = nblk/omp_get_num_threads() + 1
     233             :       a = omp_get_thread_num()*nblk_per_thread + 1
     234             :       b = MIN(a + nblk_per_thread, nblk)
     235             :       CALL dbt_tas_reserve_blocks(matrix_out, blks_to_allocate(a:b, 1), blks_to_allocate(a:b, 2))
     236             : !$OMP END PARALLEL
     237      110172 :       DEALLOCATE (blks_to_allocate)
     238             : 
     239      303202 :       DO iproc = 0, numnodes - 1
     240             :          ! First, we need to get the index to create block
     241     2134021 :          DO WHILE (dbt_buffer_blocks_left(buffer_recv(iproc)))
     242     1940991 :             CALL dbt_buffer_get_next_block(buffer_recv(iproc), ndata, blk_index)
     243     1940991 :             CALL dbt_tas_blk_sizes(matrix_out, blk_index(1), blk_index(2), blk_size(1), blk_size(2))
     244     7763964 :             ALLOCATE (block(blk_size(1), blk_size(2)))
     245     1940991 :             CALL dbt_buffer_get_next_block(buffer_recv(iproc), ndata, blk_index, block)
     246     1940991 :             CALL dbt_tas_put_block(matrix_out, blk_index(1), blk_index(2), block, summation=summation)
     247     6016003 :             DEALLOCATE (block)
     248             :          END DO
     249      303202 :          CALL dbt_buffer_destroy(buffer_recv(iproc))
     250             :       END DO
     251             : 
     252      110172 :       CALL timestop(handle2)
     253             : 
     254      110172 :       CALL dbt_tas_finalize(matrix_out)
     255             : 
     256      110172 :       CALL timestop(handle)
     257     1157264 :    END SUBROUTINE
     258             : 
     259             : ! **************************************************************************************************
     260             : !> \brief Replicate matrix_in such that each submatrix of matrix_out is an exact copy of matrix_in
     261             : !> \param matrix_in ...
     262             : !> \param info ...
     263             : !> \param matrix_out ...
     264             : !> \param nodata Don't copy data but create matrix_out
     265             : !> \param move_data memory optimization: move data to matrix_out such that matrix_in is empty on return
     266             : !> \author Patrick Seewald
     267             : ! **************************************************************************************************
     268      514830 :    SUBROUTINE dbt_tas_replicate(matrix_in, info, matrix_out, nodata, move_data)
     269             :       TYPE(dbm_type), INTENT(INOUT)                      :: matrix_in
     270             :       TYPE(dbt_tas_split_info), INTENT(IN)               :: info
     271             :       TYPE(dbt_tas_type), INTENT(OUT)                    :: matrix_out
     272             :       LOGICAL, INTENT(IN), OPTIONAL                      :: nodata, move_data
     273             : 
     274             :       INTEGER                                            :: a, b, nblk_per_thread, nblkcols, nblkrows
     275             :       INTEGER, DIMENSION(2)                              :: pdims
     276      205932 :       INTEGER, DIMENSION(:), POINTER                     :: col_blk_size, col_dist, row_blk_size, &
     277      102966 :                                                             row_dist
     278             :       TYPE(dbm_distribution_obj)                         :: dbm_dist
     279      102966 :       TYPE(dbt_tas_dist_arb), TARGET                     :: dir_dist
     280      102966 :       TYPE(dbt_tas_dist_repl), TARGET                    :: repl_dist
     281             : 
     282      205932 :       CLASS(dbt_tas_distribution), ALLOCATABLE :: col_dist_obj, row_dist_obj
     283      205932 :       CLASS(dbt_tas_rowcol_data), ALLOCATABLE :: row_bsize_obj, col_bsize_obj
     284      102966 :       TYPE(dbt_tas_blk_size_repl), TARGET :: repl_blksize
     285      102966 :       TYPE(dbt_tas_blk_size_arb), TARGET :: dir_blksize
     286      514830 :       TYPE(dbt_tas_distribution_type) :: dist
     287             :       INTEGER :: numnodes, ngroup
     288      102966 :       INTEGER(kind=omp_lock_kind), ALLOCATABLE, DIMENSION(:) :: locks
     289      102966 :       TYPE(dbt_buffer_type), ALLOCATABLE, DIMENSION(:) :: buffer_recv, buffer_send
     290      102966 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: num_blocks_recv, num_blocks_send, &
     291      102966 :                                                             num_entries_recv, num_entries_send, &
     292      102966 :                                                             num_rec, num_send
     293      102966 :       TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:, :) :: req_array
     294      102966 :       INTEGER, ALLOCATABLE, DIMENSION(:, :) :: blks_to_allocate
     295             :       INTEGER, DIMENSION(2) :: blk_size
     296             :       INTEGER, DIMENSION(2) :: blk_index
     297             :       INTEGER(KIND=int_8), DIMENSION(2) :: blk_index_i8
     298             :       TYPE(dbm_iterator) :: iter
     299             :       INTEGER :: i, iproc, bcount, nblk
     300      102966 :       INTEGER, DIMENSION(:), ALLOCATABLE :: iprocs
     301             :       LOGICAL :: nodata_prv, move_prv
     302      102966 :       INTEGER(KIND=int_8), ALLOCATABLE, DIMENSION(:, :) :: index_recv
     303             :       INTEGER :: ndata
     304      102966 :       TYPE(mp_cart_type) :: mp_comm
     305             : 
     306      102966 :       REAL(KIND=dp), DIMENSION(:, :), POINTER :: block
     307             : 
     308             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'dbt_tas_replicate'
     309             : 
     310             :       INTEGER :: handle, handle2
     311             : 
     312      102966 :       NULLIFY (col_blk_size, row_blk_size)
     313             : 
     314      102966 :       CALL timeset(routineN, handle)
     315             : 
     316      102966 :       IF (PRESENT(nodata)) THEN
     317       32196 :          nodata_prv = nodata
     318             :       ELSE
     319             :          nodata_prv = .FALSE.
     320             :       END IF
     321             : 
     322      102966 :       IF (PRESENT(move_data)) THEN
     323       70770 :          move_prv = move_data
     324             :       ELSE
     325             :          move_prv = .FALSE.
     326             :       END IF
     327             : 
     328      102966 :       row_blk_size => dbm_get_row_block_sizes(matrix_in)
     329      102966 :       col_blk_size => dbm_get_col_block_sizes(matrix_in)
     330      102966 :       nblkrows = SIZE(row_blk_size)
     331      102966 :       nblkcols = SIZE(col_blk_size)
     332      102966 :       dbm_dist = dbm_get_distribution(matrix_in)
     333      102966 :       row_dist => dbm_distribution_row_dist(dbm_dist)
     334      102966 :       col_dist => dbm_distribution_col_dist(dbm_dist)
     335             : 
     336      102966 :       mp_comm = info%mp_comm
     337      102966 :       ngroup = info%ngroup
     338             : 
     339      102966 :       numnodes = mp_comm%num_pe
     340      308898 :       pdims = mp_comm%num_pe_cart
     341             : 
     342      170594 :       SELECT CASE (info%split_rowcol)
     343             :       CASE (rowsplit)
     344       67628 :          repl_dist = dbt_tas_dist_repl(row_dist, pdims(1), nblkrows, info%ngroup, info%pgrid_split_size)
     345       67628 :          dir_dist = dbt_tas_dist_arb(col_dist, pdims(2), INT(nblkcols, KIND=int_8))
     346       67628 :          repl_blksize = dbt_tas_blk_size_repl(row_blk_size, info%ngroup)
     347       67628 :          dir_blksize = dbt_tas_blk_size_arb(col_blk_size)
     348       67628 :          ALLOCATE (row_dist_obj, source=repl_dist)
     349       67628 :          ALLOCATE (col_dist_obj, source=dir_dist)
     350       67628 :          ALLOCATE (row_bsize_obj, source=repl_blksize)
     351       67628 :          ALLOCATE (col_bsize_obj, source=dir_blksize)
     352             :       CASE (colsplit)
     353       35338 :          dir_dist = dbt_tas_dist_arb(row_dist, pdims(1), INT(nblkrows, KIND=int_8))
     354       35338 :          repl_dist = dbt_tas_dist_repl(col_dist, pdims(2), nblkcols, info%ngroup, info%pgrid_split_size)
     355       35338 :          dir_blksize = dbt_tas_blk_size_arb(row_blk_size)
     356       35338 :          repl_blksize = dbt_tas_blk_size_repl(col_blk_size, info%ngroup)
     357       35338 :          ALLOCATE (row_dist_obj, source=dir_dist)
     358       35338 :          ALLOCATE (col_dist_obj, source=repl_dist)
     359       35338 :          ALLOCATE (row_bsize_obj, source=dir_blksize)
     360      479492 :          ALLOCATE (col_bsize_obj, source=repl_blksize)
     361             :       END SELECT
     362             : 
     363      102966 :       CALL dbt_tas_distribution_new(dist, mp_comm, row_dist_obj, col_dist_obj, split_info=info)
     364             :       CALL dbt_tas_create(matrix_out, TRIM(dbm_get_name(matrix_in))//" replicated", &
     365      102966 :                           dist, row_bsize_obj, col_bsize_obj, own_dist=.TRUE.)
     366             : 
     367      102966 :       IF (nodata_prv) THEN
     368       32196 :          CALL dbt_tas_finalize(matrix_out)
     369       32196 :          CALL timestop(handle)
     370       32196 :          RETURN
     371             :       END IF
     372             : 
     373      335472 :       ALLOCATE (buffer_send(0:numnodes - 1))
     374      335472 :       ALLOCATE (buffer_recv(0:numnodes - 1))
     375      212310 :       ALLOCATE (num_blocks_recv(0:numnodes - 1))
     376      212310 :       ALLOCATE (num_blocks_send(0:numnodes - 1))
     377      212310 :       ALLOCATE (num_entries_recv(0:numnodes - 1))
     378      212310 :       ALLOCATE (num_entries_send(0:numnodes - 1))
     379      212310 :       ALLOCATE (num_rec(0:2*numnodes - 1))
     380      212310 :       ALLOCATE (num_send(0:2*numnodes - 1))
     381      317094 :       num_send(:) = 0
     382      988038 :       ALLOCATE (req_array(1:numnodes, 4))
     383      212310 :       ALLOCATE (locks(0:numnodes - 1))
     384      193932 :       DO iproc = 0, numnodes - 1
     385      193932 :          CALL omp_init_lock(locks(iproc))
     386             :       END DO
     387             : 
     388             : !$OMP PARALLEL DEFAULT(NONE) SHARED(matrix_in,matrix_out,num_send,ngroup) &
     389       70770 : !$OMP PRIVATE(iter,blk_index,blk_size,iprocs)
     390             :       ALLOCATE (iprocs(ngroup))
     391             :       CALL dbm_iterator_start(iter, matrix_in)
     392             :       DO WHILE (dbm_iterator_blocks_left(iter))
     393             :          CALL dbm_iterator_next_block(iter, blk_index(1), blk_index(2), &
     394             :                                       row_size=blk_size(1), col_size=blk_size(2))
     395             :          CALL dbt_repl_get_stored_coordinates(matrix_out, blk_index(1), blk_index(2), iprocs)
     396             :          DO i = 1, SIZE(iprocs)
     397             : !$OMP ATOMIC
     398             :             num_send(2*iprocs(i)) = num_send(2*iprocs(i)) + PRODUCT(blk_size)
     399             : !$OMP ATOMIC
     400             :             num_send(2*iprocs(i) + 1) = num_send(2*iprocs(i) + 1) + 1
     401             :          END DO
     402             :       END DO
     403             :       CALL dbm_iterator_stop(iter)
     404             :       DEALLOCATE (iprocs)
     405             : !$OMP END PARALLEL
     406             : 
     407       70770 :       CALL timeset(routineN//"_alltoall", handle2)
     408       70770 :       CALL mp_comm%alltoall(num_send, num_rec, 2)
     409       70770 :       CALL timestop(handle2)
     410             : 
     411      193932 :       DO iproc = 0, numnodes - 1
     412      123162 :          num_entries_recv(iproc) = num_rec(2*iproc)
     413      123162 :          num_blocks_recv(iproc) = num_rec(2*iproc + 1)
     414      123162 :          num_entries_send(iproc) = num_send(2*iproc)
     415      123162 :          num_blocks_send(iproc) = num_send(2*iproc + 1)
     416             : 
     417      123162 :          CALL dbt_buffer_create(buffer_send(iproc), num_blocks_send(iproc), num_entries_send(iproc))
     418             : 
     419      193932 :          CALL dbt_buffer_create(buffer_recv(iproc), num_blocks_recv(iproc), num_entries_recv(iproc))
     420             : 
     421             :       END DO
     422             : 
     423             : !$OMP PARALLEL DEFAULT(NONE) SHARED(matrix_in,matrix_out,buffer_send,locks,ngroup) &
     424       70770 : !$OMP PRIVATE(iter,blk_index,blk_size,block,iprocs)
     425             :       ALLOCATE (iprocs(ngroup))
     426             :       CALL dbm_iterator_start(iter, matrix_in)
     427             :       DO WHILE (dbm_iterator_blocks_left(iter))
     428             :          CALL dbm_iterator_next_block(iter, blk_index(1), blk_index(2), block, &
     429             :                                       row_size=blk_size(1), col_size=blk_size(2))
     430             :          CALL dbt_repl_get_stored_coordinates(matrix_out, blk_index(1), blk_index(2), iprocs)
     431             :          DO i = 1, SIZE(iprocs)
     432             :             CALL omp_set_lock(locks(iprocs(i)))
     433             :             CALL dbt_buffer_add_block(buffer_send(iprocs(i)), INT(blk_index, KIND=int_8), block)
     434             :             CALL omp_unset_lock(locks(iprocs(i)))
     435             :          END DO
     436             :       END DO
     437             :       CALL dbm_iterator_stop(iter)
     438             :       DEALLOCATE (iprocs)
     439             : !$OMP END PARALLEL
     440             : 
     441       70770 :       IF (move_prv) CALL dbm_clear(matrix_in)
     442             : 
     443       70770 :       CALL timeset(routineN//"_communicate_buffer", handle2)
     444       70770 :       CALL dbt_tas_communicate_buffer(mp_comm, buffer_recv, buffer_send, req_array)
     445             : 
     446      193932 :       DO iproc = 0, numnodes - 1
     447      193932 :          CALL dbt_buffer_destroy(buffer_send(iproc))
     448             :       END DO
     449             : 
     450       70770 :       CALL timestop(handle2)
     451             : 
     452             :       ! TODO Add OpenMP to the buffer unpacking.
     453      193932 :       nblk = SUM(num_blocks_recv)
     454      211848 :       ALLOCATE (blks_to_allocate(nblk, 2))
     455             : 
     456       70770 :       bcount = 0
     457      193932 :       DO iproc = 0, numnodes - 1
     458      123162 :          CALL dbt_buffer_get_index(buffer_recv(iproc), index_recv)
     459     4026714 :          blks_to_allocate(bcount + 1:bcount + SIZE(index_recv, 1), :) = INT(index_recv(:, :))
     460      123162 :          bcount = bcount + SIZE(index_recv, 1)
     461      317094 :          DEALLOCATE (index_recv)
     462             :       END DO
     463             : 
     464             : !TODO: Parallelize creation of block list.
     465       70770 : !$OMP PARALLEL DEFAULT(NONE) SHARED(matrix_out,nblk,blks_to_allocate) PRIVATE(nblk_per_thread,A,b)
     466             :       nblk_per_thread = nblk/omp_get_num_threads() + 1
     467             :       a = omp_get_thread_num()*nblk_per_thread + 1
     468             :       b = MIN(a + nblk_per_thread, nblk)
     469             :       CALL dbm_reserve_blocks(matrix_out%matrix, blks_to_allocate(a:b, 1), blks_to_allocate(a:b, 2))
     470             : !$OMP END PARALLEL
     471       70770 :       DEALLOCATE (blks_to_allocate)
     472             : 
     473      193932 :       DO iproc = 0, numnodes - 1
     474             :          ! First, we need to get the index to create block
     475     1951776 :          DO WHILE (dbt_buffer_blocks_left(buffer_recv(iproc)))
     476     1828614 :             CALL dbt_buffer_get_next_block(buffer_recv(iproc), ndata, blk_index_i8)
     477     1828614 :             CALL dbt_tas_blk_sizes(matrix_out, blk_index_i8(1), blk_index_i8(2), blk_size(1), blk_size(2))
     478     7314456 :             ALLOCATE (block(blk_size(1), blk_size(2)))
     479     1828614 :             CALL dbt_buffer_get_next_block(buffer_recv(iproc), ndata, blk_index_i8, block)
     480   298072601 :             CALL dbm_put_block(matrix_out%matrix, INT(blk_index_i8(1)), INT(blk_index_i8(2)), block)
     481     5609004 :             DEALLOCATE (block)
     482             :          END DO
     483             : 
     484      193932 :          CALL dbt_buffer_destroy(buffer_recv(iproc))
     485             :       END DO
     486             : 
     487       70770 :       CALL dbt_tas_finalize(matrix_out)
     488             : 
     489       70770 :       CALL timestop(handle)
     490             : 
     491     1211592 :    END SUBROUTINE
     492             : 
     493             : ! **************************************************************************************************
     494             : !> \brief Merge submatrices of matrix_in to matrix_out by sum
     495             : !> \param matrix_out ...
     496             : !> \param matrix_in ...
     497             : !> \param summation ...
     498             : !> \param move_data memory optimization: move data to matrix_out such that matrix_in is empty on return
     499             : !> \author Patrick Seewald
     500             : ! **************************************************************************************************
     501       32196 :    SUBROUTINE dbt_tas_merge(matrix_out, matrix_in, summation, move_data)
     502             :       TYPE(dbm_type), INTENT(INOUT)                      :: matrix_out
     503             :       TYPE(dbt_tas_type), INTENT(INOUT)                  :: matrix_in
     504             :       LOGICAL, INTENT(IN), OPTIONAL                      :: summation, move_data
     505             : 
     506             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'dbt_tas_merge'
     507             : 
     508             :       INTEGER                                            :: a, b, bcount, handle, handle2, iproc, &
     509             :                                                             nblk, nblk_per_thread, ndata, numnodes
     510       32196 :       INTEGER(KIND=int_8), ALLOCATABLE, DIMENSION(:, :)  :: index_recv
     511             :       INTEGER(KIND=int_8), DIMENSION(2)                  :: blk_index_i8
     512             :       INTEGER(kind=omp_lock_kind), ALLOCATABLE, &
     513       32196 :          DIMENSION(:)                                    :: locks
     514       32196 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: num_blocks_recv, num_blocks_send, &
     515       32196 :                                                             num_entries_recv, num_entries_send, &
     516       32196 :                                                             num_rec, num_send
     517       32196 :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: blks_to_allocate
     518             :       INTEGER, DIMENSION(2)                              :: blk_index, blk_size
     519       64392 :       INTEGER, DIMENSION(:), POINTER                     :: col_block_sizes, row_block_sizes
     520             :       LOGICAL                                            :: move_prv
     521       32196 :       REAL(dp), DIMENSION(:, :), POINTER                 :: block
     522             :       TYPE(dbm_iterator)                                 :: iter
     523       32196 :       TYPE(dbt_buffer_type), ALLOCATABLE, DIMENSION(:)   :: buffer_recv, buffer_send
     524      160980 :       TYPE(dbt_tas_split_info)                           :: info
     525       32196 :       TYPE(mp_cart_type)                                 :: mp_comm
     526             :       TYPE(mp_request_type), ALLOCATABLE, &
     527       32196 :          DIMENSION(:, :)                                 :: req_array
     528             : 
     529             : !!
     530             : 
     531       32196 :       CALL timeset(routineN, handle)
     532             : 
     533       32196 :       IF (PRESENT(summation)) THEN
     534           0 :          IF (.NOT. summation) CALL dbm_clear(matrix_out)
     535             :       ELSE
     536       32196 :          CALL dbm_clear(matrix_out)
     537             :       END IF
     538             : 
     539       32196 :       IF (PRESENT(move_data)) THEN
     540       32196 :          move_prv = move_data
     541             :       ELSE
     542             :          move_prv = .FALSE.
     543             :       END IF
     544             : 
     545       32196 :       info = dbt_tas_info(matrix_in)
     546       32196 :       CALL dbt_tas_get_split_info(info, mp_comm=mp_comm)
     547       32196 :       numnodes = mp_comm%num_pe
     548             : 
     549      156950 :       ALLOCATE (buffer_send(0:numnodes - 1))
     550      156950 :       ALLOCATE (buffer_recv(0:numnodes - 1))
     551       96588 :       ALLOCATE (num_blocks_recv(0:numnodes - 1))
     552       96588 :       ALLOCATE (num_blocks_send(0:numnodes - 1))
     553       96588 :       ALLOCATE (num_entries_recv(0:numnodes - 1))
     554       96588 :       ALLOCATE (num_entries_send(0:numnodes - 1))
     555       96588 :       ALLOCATE (num_rec(0:2*numnodes - 1))
     556       96588 :       ALLOCATE (num_send(0:2*numnodes - 1))
     557      152920 :       num_send(:) = 0
     558      466820 :       ALLOCATE (req_array(1:numnodes, 4))
     559       96588 :       ALLOCATE (locks(0:numnodes - 1))
     560       92558 :       DO iproc = 0, numnodes - 1
     561       92558 :          CALL omp_init_lock(locks(iproc))
     562             :       END DO
     563             : 
     564             : !$OMP PARALLEL DEFAULT(NONE) SHARED(matrix_in,matrix_out,num_send) &
     565       32196 : !$OMP PRIVATE(iter,blk_index,blk_size,iproc)
     566             :       CALL dbm_iterator_start(iter, matrix_in%matrix)
     567             :       DO WHILE (dbm_iterator_blocks_left(iter))
     568             :          CALL dbm_iterator_next_block(iter, blk_index(1), blk_index(2), &
     569             :                                       row_size=blk_size(1), col_size=blk_size(2))
     570             :          CALL dbm_get_stored_coordinates(matrix_out, blk_index(1), blk_index(2), iproc)
     571             : !$OMP ATOMIC
     572             :          num_send(2*iproc) = num_send(2*iproc) + PRODUCT(blk_size)
     573             : !$OMP ATOMIC
     574             :          num_send(2*iproc + 1) = num_send(2*iproc + 1) + 1
     575             :       END DO
     576             :       CALL dbm_iterator_stop(iter)
     577             : !$OMP END PARALLEL
     578             : 
     579       32196 :       CALL timeset(routineN//"_alltoall", handle2)
     580       32196 :       CALL mp_comm%alltoall(num_send, num_rec, 2)
     581       32196 :       CALL timestop(handle2)
     582             : 
     583       92558 :       DO iproc = 0, numnodes - 1
     584       60362 :          num_entries_recv(iproc) = num_rec(2*iproc)
     585       60362 :          num_blocks_recv(iproc) = num_rec(2*iproc + 1)
     586       60362 :          num_entries_send(iproc) = num_send(2*iproc)
     587       60362 :          num_blocks_send(iproc) = num_send(2*iproc + 1)
     588             : 
     589       60362 :          CALL dbt_buffer_create(buffer_send(iproc), num_blocks_send(iproc), num_entries_send(iproc))
     590             : 
     591       92558 :          CALL dbt_buffer_create(buffer_recv(iproc), num_blocks_recv(iproc), num_entries_recv(iproc))
     592             : 
     593             :       END DO
     594             : 
     595             : !$OMP PARALLEL DEFAULT(NONE) SHARED(matrix_in,matrix_out,buffer_send,locks) &
     596       32196 : !$OMP PRIVATE(iter,blk_index,blk_size,block,iproc)
     597             :       CALL dbm_iterator_start(iter, matrix_in%matrix)
     598             :       DO WHILE (dbm_iterator_blocks_left(iter))
     599             :          CALL dbm_iterator_next_block(iter, blk_index(1), blk_index(2), block, &
     600             :                                       row_size=blk_size(1), col_size=blk_size(2))
     601             :          CALL dbm_get_stored_coordinates(matrix_out, blk_index(1), blk_index(2), iproc)
     602             :          CALL omp_set_lock(locks(iproc))
     603             :          CALL dbt_buffer_add_block(buffer_send(iproc), INT(blk_index, KIND=int_8), block)
     604             :          CALL omp_unset_lock(locks(iproc))
     605             :       END DO
     606             :       CALL dbm_iterator_stop(iter)
     607             : !$OMP END PARALLEL
     608             : 
     609       32196 :       IF (move_prv) CALL dbt_tas_clear(matrix_in)
     610             : 
     611       32196 :       CALL timeset(routineN//"_communicate_buffer", handle2)
     612       32196 :       CALL dbt_tas_communicate_buffer(mp_comm, buffer_recv, buffer_send, req_array)
     613             : 
     614       92558 :       DO iproc = 0, numnodes - 1
     615       92558 :          CALL dbt_buffer_destroy(buffer_send(iproc))
     616             :       END DO
     617             : 
     618       32196 :       CALL timestop(handle2)
     619             : 
     620             :       ! TODO Add OpenMP to the buffer unpacking.
     621       92558 :       nblk = SUM(num_blocks_recv)
     622       92920 :       ALLOCATE (blks_to_allocate(nblk, 2))
     623             : 
     624       32196 :       bcount = 0
     625       92558 :       DO iproc = 0, numnodes - 1
     626       60362 :          CALL dbt_buffer_get_index(buffer_recv(iproc), index_recv)
     627     1582156 :          blks_to_allocate(bcount + 1:bcount + SIZE(index_recv, 1), :) = INT(index_recv(:, :))
     628       60362 :          bcount = bcount + SIZE(index_recv, 1)
     629      152920 :          DEALLOCATE (index_recv)
     630             :       END DO
     631             : 
     632             : !TODO: Parallelize creation of block list.
     633       32196 : !$OMP PARALLEL DEFAULT(NONE) SHARED(matrix_out,nblk,blks_to_allocate) PRIVATE(nblk_per_thread,A,b)
     634             :       nblk_per_thread = nblk/omp_get_num_threads() + 1
     635             :       a = omp_get_thread_num()*nblk_per_thread + 1
     636             :       b = MIN(a + nblk_per_thread, nblk)
     637             :       CALL dbm_reserve_blocks(matrix_out, blks_to_allocate(a:b, 1), blks_to_allocate(a:b, 2))
     638             : !$OMP END PARALLEL
     639       32196 :       DEALLOCATE (blks_to_allocate)
     640             : 
     641       92558 :       DO iproc = 0, numnodes - 1
     642             :          ! First, we need to get the index to create block
     643      760897 :          DO WHILE (dbt_buffer_blocks_left(buffer_recv(iproc)))
     644      700535 :             CALL dbt_buffer_get_next_block(buffer_recv(iproc), ndata, blk_index_i8)
     645      700535 :             row_block_sizes => dbm_get_row_block_sizes(matrix_out)
     646      700535 :             col_block_sizes => dbm_get_col_block_sizes(matrix_out)
     647      700535 :             blk_size(1) = row_block_sizes(INT(blk_index_i8(1)))
     648      700535 :             blk_size(2) = col_block_sizes(INT(blk_index_i8(2)))
     649     2802140 :             ALLOCATE (block(blk_size(1), blk_size(2)))
     650      700535 :             CALL dbt_buffer_get_next_block(buffer_recv(iproc), ndata, blk_index_i8, block)
     651    77188366 :             CALL dbm_put_block(matrix_out, INT(blk_index_i8(1)), INT(blk_index_i8(2)), block, summation=.TRUE.)
     652     2161967 :             DEALLOCATE (block)
     653             :          END DO
     654       92558 :          CALL dbt_buffer_destroy(buffer_recv(iproc))
     655             :       END DO
     656             : 
     657       32196 :       CALL dbm_finalize(matrix_out)
     658             : 
     659       32196 :       CALL timestop(handle)
     660      249508 :    END SUBROUTINE
     661             : 
     662             : ! **************************************************************************************************
     663             : !> \brief get all indices from buffer
     664             : !> \param buffer ...
     665             : !> \param index ...
     666             : !> \author Patrick Seewald
     667             : ! **************************************************************************************************
     668      376554 :    SUBROUTINE dbt_buffer_get_index(buffer, index)
     669             :       TYPE(dbt_buffer_type), INTENT(IN)                  :: buffer
     670             :       INTEGER(KIND=int_8), ALLOCATABLE, &
     671             :          DIMENSION(:, :), INTENT(OUT)                    :: index
     672             : 
     673             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'dbt_buffer_get_index'
     674             : 
     675             :       INTEGER                                            :: handle
     676             :       INTEGER, DIMENSION(2)                              :: indx_shape
     677             : 
     678      376554 :       CALL timeset(routineN, handle)
     679             : 
     680     1506216 :       indx_shape = SHAPE(buffer%indx) - [0, 1]
     681     1388283 :       ALLOCATE (INDEX(indx_shape(1), indx_shape(2)))
     682    10069942 :       INDEX(:, :) = buffer%indx(1:indx_shape(1), 1:indx_shape(2))
     683      376554 :       CALL timestop(handle)
     684      376554 :    END SUBROUTINE
     685             : 
     686             : ! **************************************************************************************************
     687             : !> \brief how many blocks left in iterator
     688             : !> \param buffer ...
     689             : !> \return ...
     690             : !> \author Patrick Seewald
     691             : ! **************************************************************************************************
     692     4846694 :    PURE FUNCTION dbt_buffer_blocks_left(buffer)
     693             :       TYPE(dbt_buffer_type), INTENT(IN)                  :: buffer
     694             :       LOGICAL                                            :: dbt_buffer_blocks_left
     695             : 
     696     4846694 :       dbt_buffer_blocks_left = buffer%endpos .LT. buffer%nblock
     697     4846694 :    END FUNCTION
     698             : 
     699             : ! **************************************************************************************************
     700             : !> \brief Create block buffer for MPI communication.
     701             : !> \param buffer block buffer
     702             : !> \param nblock number of blocks
     703             : !> \param ndata total number of block entries
     704             : !> \author Patrick Seewald
     705             : ! **************************************************************************************************
     706      753108 :    SUBROUTINE dbt_buffer_create(buffer, nblock, ndata)
     707             :       TYPE(dbt_buffer_type), INTENT(OUT)                 :: buffer
     708             :       INTEGER, INTENT(IN)                                :: nblock, ndata
     709             : 
     710      753108 :       buffer%nblock = nblock
     711      753108 :       buffer%endpos = 0
     712     2023458 :       ALLOCATE (buffer%msg(ndata))
     713     2023458 :       ALLOCATE (buffer%indx(nblock, 3))
     714      753108 :    END SUBROUTINE
     715             : 
     716             : ! **************************************************************************************************
     717             : !> \brief ...
     718             : !> \param buffer ...
     719             : !> \author Patrick Seewald
     720             : ! **************************************************************************************************
     721      753108 :    SUBROUTINE dbt_buffer_destroy(buffer)
     722             :       TYPE(dbt_buffer_type), INTENT(INOUT)               :: buffer
     723             : 
     724      753108 :       DEALLOCATE (buffer%msg)
     725      753108 :       DEALLOCATE (buffer%indx)
     726      753108 :       buffer%nblock = -1
     727      753108 :       buffer%endpos = -1
     728      753108 :    END SUBROUTINE dbt_buffer_destroy
     729             : 
     730             : ! **************************************************************************************************
     731             : !> \brief insert a block into block buffer (at current iterator position)
     732             : !> \param buffer ...
     733             : !> \param index index of block
     734             : !> \param block ...
     735             : !> \param transposed ...
     736             : !> \author Patrick Seewald
     737             : ! **************************************************************************************************
     738     4470140 :    SUBROUTINE dbt_buffer_add_block(buffer, index, block, transposed)
     739             :       TYPE(dbt_buffer_type), INTENT(INOUT)               :: buffer
     740             :       INTEGER(KIND=int_8), DIMENSION(2), INTENT(IN)      :: index
     741             :       REAL(dp), DIMENSION(:, :), INTENT(IN)              :: block
     742             :       LOGICAL, INTENT(IN), OPTIONAL                      :: transposed
     743             : 
     744             :       INTEGER                                            :: ndata, p, p_data
     745             :       INTEGER(KIND=int_8), DIMENSION(2)                  :: index_prv
     746             :       LOGICAL                                            :: tr
     747             : 
     748     4470140 :       IF (PRESENT(transposed)) THEN
     749     1940991 :          tr = transposed
     750             :       ELSE
     751             :          tr = .FALSE.
     752             :       END IF
     753             : 
     754     4470140 :       index_prv(:) = INDEX(:)
     755     4470140 :       IF (tr) THEN
     756      501055 :          CALL swap(index_prv)
     757             :       END IF
     758    13410420 :       ndata = PRODUCT(SHAPE(block))
     759             : 
     760     4470140 :       p = buffer%endpos
     761     4470140 :       IF (p .EQ. 0) THEN
     762             :          p_data = 0
     763             :       ELSE
     764     4211519 :          p_data = INT(buffer%indx(p, 3))
     765             :       END IF
     766             : 
     767     4470140 :       IF (tr) THEN
     768    73060289 :          buffer%msg(p_data + 1:p_data + ndata) = RESHAPE(TRANSPOSE(block), [ndata])
     769             :       ELSE
     770   624395236 :          buffer%msg(p_data + 1:p_data + ndata) = RESHAPE(block, [ndata])
     771             :       END IF
     772             : 
     773    13410420 :       buffer%indx(p + 1, 1:2) = index_prv(:)
     774     4470140 :       IF (p > 0) THEN
     775     4211519 :          buffer%indx(p + 1, 3) = buffer%indx(p, 3) + INT(ndata, KIND=int_8)
     776             :       ELSE
     777      258621 :          buffer%indx(p + 1, 3) = INT(ndata, KIND=int_8)
     778             :       END IF
     779     4470140 :       buffer%endpos = buffer%endpos + 1
     780     4470140 :    END SUBROUTINE
     781             : 
     782             : ! **************************************************************************************************
     783             : !> \brief get next block from buffer. Iterator is advanced only if block is retrieved or advance_iter.
     784             : !> \param buffer ...
     785             : !> \param ndata ...
     786             : !> \param index ...
     787             : !> \param block ...
     788             : !> \param advance_iter ...
     789             : !> \author Patrick Seewald
     790             : ! **************************************************************************************************
     791     8940280 :    SUBROUTINE dbt_buffer_get_next_block(buffer, ndata, index, block, advance_iter)
     792             :       TYPE(dbt_buffer_type), INTENT(INOUT)               :: buffer
     793             :       INTEGER, INTENT(OUT)                               :: ndata
     794             :       INTEGER(KIND=int_8), DIMENSION(2), INTENT(OUT)     :: index
     795             :       REAL(dp), DIMENSION(:, :), INTENT(OUT), OPTIONAL   :: block
     796             :       LOGICAL, INTENT(IN), OPTIONAL                      :: advance_iter
     797             : 
     798             :       INTEGER                                            :: p, p_data
     799             :       LOGICAL                                            :: do_advance
     800             : 
     801     8940280 :       do_advance = .FALSE.
     802     8940280 :       IF (PRESENT(advance_iter)) THEN
     803           0 :          do_advance = advance_iter
     804     8940280 :       ELSE IF (PRESENT(block)) THEN
     805     4470140 :          do_advance = .TRUE.
     806             :       END IF
     807             : 
     808     8940280 :       p = buffer%endpos
     809     8940280 :       IF (p .EQ. 0) THEN
     810             :          p_data = 0
     811             :       ELSE
     812     8423038 :          p_data = INT(buffer%indx(p, 3))
     813             :       END IF
     814             : 
     815     8940280 :       IF (p > 0) THEN
     816     8423038 :          ndata = INT(buffer%indx(p + 1, 3) - buffer%indx(p, 3))
     817             :       ELSE
     818      517242 :          ndata = INT(buffer%indx(p + 1, 3))
     819             :       END IF
     820    26820840 :       INDEX(:) = buffer%indx(p + 1, 1:2)
     821             : 
     822     8940280 :       IF (PRESENT(block)) THEN
     823    13410420 :          block(:, :) = RESHAPE(buffer%msg(p_data + 1:p_data + ndata), SHAPE(block))
     824             :       END IF
     825             : 
     826     8940280 :       IF (do_advance) buffer%endpos = buffer%endpos + 1
     827     8940280 :    END SUBROUTINE
     828             : 
     829             : ! **************************************************************************************************
     830             : !> \brief communicate buffer
     831             : !> \param mp_comm ...
     832             : !> \param buffer_recv ...
     833             : !> \param buffer_send ...
     834             : !> \param req_array ...
     835             : !> \author Patrick Seewald
     836             : ! **************************************************************************************************
     837     2571906 :    SUBROUTINE dbt_tas_communicate_buffer(mp_comm, buffer_recv, buffer_send, req_array)
     838             :       CLASS(mp_comm_type), INTENT(IN)                     :: mp_comm
     839             :       TYPE(dbt_buffer_type), DIMENSION(0:), &
     840             :          INTENT(INOUT)                                   :: buffer_recv, buffer_send
     841             :       TYPE(mp_request_type), DIMENSION(:, :), &
     842             :          INTENT(OUT)                                     :: req_array
     843             : 
     844             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'dbt_tas_communicate_buffer'
     845             : 
     846             :       INTEGER                                            :: handle, iproc, numnodes, &
     847             :                                                             rec_counter, send_counter
     848             : 
     849      213138 :       CALL timeset(routineN, handle)
     850      213138 :       numnodes = mp_comm%num_pe
     851             : 
     852      213138 :       IF (numnodes > 1) THEN
     853             : 
     854      163416 :          send_counter = 0
     855      163416 :          rec_counter = 0
     856             : 
     857      490248 :          DO iproc = 0, numnodes - 1
     858      490248 :             IF (buffer_recv(iproc)%nblock > 0) THEN
     859      212931 :                rec_counter = rec_counter + 1
     860      212931 :                CALL mp_comm%irecv(buffer_recv(iproc)%indx, iproc, req_array(rec_counter, 3), tag=4)
     861      212931 :                CALL mp_comm%irecv(buffer_recv(iproc)%msg, iproc, req_array(rec_counter, 4), tag=7)
     862             :             END IF
     863             :          END DO
     864             : 
     865      490248 :          DO iproc = 0, numnodes - 1
     866      490248 :             IF (buffer_send(iproc)%nblock > 0) THEN
     867      212931 :                send_counter = send_counter + 1
     868      212931 :                CALL mp_comm%isend(buffer_send(iproc)%indx, iproc, req_array(send_counter, 1), tag=4)
     869      212931 :                CALL mp_comm%isend(buffer_send(iproc)%msg, iproc, req_array(send_counter, 2), tag=7)
     870             :             END IF
     871             :          END DO
     872             : 
     873      163416 :          IF (send_counter > 0) THEN
     874      131305 :             CALL mp_waitall(req_array(1:send_counter, 1:2))
     875             :          END IF
     876      163416 :          IF (rec_counter > 0) THEN
     877      151849 :             CALL mp_waitall(req_array(1:rec_counter, 3:4))
     878             :          END IF
     879             : 
     880             :       ELSE
     881       49722 :          IF (buffer_recv(0)%nblock > 0) THEN
     882     2980491 :             buffer_recv(0)%indx(:, :) = buffer_send(0)%indx(:, :)
     883   409486961 :             buffer_recv(0)%msg(:) = buffer_send(0)%msg(:)
     884             :          END IF
     885             :       END IF
     886      213138 :       CALL timestop(handle)
     887      213138 :    END SUBROUTINE
     888             : 
     889      142368 : END MODULE

Generated by: LCOV version 1.15