LCOV - code coverage report
Current view: top level - src/pw - dct.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:9843133) Lines: 505 532 94.9 %
Date: 2024-05-10 06:53:45 Functions: 15 18 83.3 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief the type I Discrete Cosine Transform (DCT-I)
      10             : !> \par History
      11             : !>       07.2014 created [Hossein Bani-Hashemian]
      12             : !>       11.2015 dealt with periodic grids [Hossein Bani-Hashemian]
      13             : !>       03.2016 dct in one or two directions [Hossein Bani-Hashemian]
      14             : !> \author Mohammad Hossein Bani-Hashemian
      15             : ! **************************************************************************************************
      16             : MODULE dct
      17             : 
      18             :    USE kinds,                           ONLY: dp
      19             :    USE message_passing,                 ONLY: mp_comm_type,&
      20             :                                               mp_request_null,&
      21             :                                               mp_request_type,&
      22             :                                               mp_waitall
      23             :    USE pw_grid_types,                   ONLY: pw_grid_type
      24             :    USE pw_grids,                        ONLY: pw_grid_create
      25             :    USE pw_types,                        ONLY: pw_r3d_rs_type
      26             : #include "../base/base_uses.f90"
      27             : 
      28             :    IMPLICIT NONE
      29             :    PRIVATE
      30             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'dct'
      31             : 
      32             :    TYPE :: dct_type
      33             :       INTEGER, DIMENSION(:), POINTER     :: dests_expand => NULL()
      34             :       INTEGER, DIMENSION(:), POINTER     :: srcs_expand => NULL()
      35             :       INTEGER, DIMENSION(:), POINTER     :: flipg_stat => NULL()
      36             :       INTEGER, DIMENSION(:), POINTER     :: dests_shrink => NULL()
      37             :       INTEGER                            :: srcs_shrink = 0
      38             :       INTEGER, DIMENSION(:, :, :), POINTER :: recv_msgs_bnds => NULL()
      39             :       INTEGER, DIMENSION(2, 3)            :: dct_bounds = 0
      40             :       INTEGER, DIMENSION(2, 3)            :: dct_bounds_local = 0
      41             :       INTEGER, DIMENSION(2, 3)            :: bounds_shftd = 0
      42             :       INTEGER, DIMENSION(2, 3)            :: bounds_local_shftd = 0
      43             :    END TYPE dct_type
      44             : 
      45             :    TYPE dct_msg_type
      46             :       PRIVATE
      47             :       REAL(dp), DIMENSION(:, :, :), ALLOCATABLE :: msg
      48             :    END TYPE dct_msg_type
      49             : 
      50             :    PUBLIC dct_type, &
      51             :       dct_type_init, &
      52             :       dct_type_release, &
      53             :       setup_dct_pw_grids, &
      54             :       pw_shrink, &
      55             :       pw_expand
      56             : 
      57             :    INTEGER, PARAMETER, PRIVATE    :: NOT_FLIPPED = 0, &
      58             :                                      UD_FLIPPED = 1, &
      59             :                                      LR_FLIPPED = 2, &
      60             :                                      BF_FLIPPED = 3, &
      61             :                                      ROTATED = 4
      62             : 
      63             :    INTEGER, PARAMETER, PUBLIC     :: neumannXYZ = 111, &
      64             :                                      neumannXY = 110, &
      65             :                                      neumannXZ = 101, &
      66             :                                      neumannYZ = 011, &
      67             :                                      neumannX = 100, &
      68             :                                      neumannY = 010, &
      69             :                                      neumannZ = 001
      70             : 
      71             : CONTAINS
      72             : 
      73             : ! **************************************************************************************************
      74             : !> \brief  Initializes a dct_type
      75             : !> \param pw_grid the original plane wave grid
      76             : !> \param neumann_directions directions in which dct should be performed
      77             : !> \param dct_env dct_type to be initialized
      78             : !> \par History
      79             : !>       08.2014 created [Hossein Bani-Hashemian]
      80             : !> \author Mohammad Hossein Bani-Hashemian
      81             : ! **************************************************************************************************
      82          22 :    SUBROUTINE dct_type_init(pw_grid, neumann_directions, dct_env)
      83             : 
      84             :       TYPE(pw_grid_type), INTENT(IN), POINTER            :: pw_grid
      85             :       INTEGER, INTENT(IN)                                :: neumann_directions
      86             :       TYPE(dct_type), INTENT(INOUT)                      :: dct_env
      87             : 
      88             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'dct_type_init'
      89             : 
      90             :       INTEGER                                            :: handle, maxn_sendrecv
      91             : 
      92          22 :       CALL timeset(routineN, handle)
      93             : 
      94          22 :       SELECT CASE (neumann_directions)
      95             :       CASE (neumannXYZ, neumannXY)
      96           4 :          maxn_sendrecv = 4
      97             :       CASE (neumannX, neumannY, neumannXZ, neumannYZ)
      98           4 :          maxn_sendrecv = 2
      99             :       CASE (neumannZ)
     100           2 :          maxn_sendrecv = 1
     101             :       CASE DEFAULT
     102          22 :          CPABORT("Invalid combination of Neumann and periodic conditions.")
     103             :       END SELECT
     104             : 
     105          44 :       ALLOCATE (dct_env%flipg_stat(maxn_sendrecv))
     106         110 :       ALLOCATE (dct_env%dests_expand(maxn_sendrecv), dct_env%srcs_expand(maxn_sendrecv))
     107          66 :       ALLOCATE (dct_env%dests_shrink(maxn_sendrecv))
     108          44 :       ALLOCATE (dct_env%recv_msgs_bnds(2, 3, maxn_sendrecv))
     109             : 
     110             :       CALL set_dests_srcs_pid(pw_grid, neumann_directions, &
     111             :                               dct_env%dests_expand, dct_env%srcs_expand, dct_env%flipg_stat, &
     112          22 :                               dct_env%dests_shrink, dct_env%srcs_shrink)
     113             :       CALL expansion_bounds(pw_grid, neumann_directions, &
     114             :                             dct_env%srcs_expand, dct_env%flipg_stat, &
     115             :                             dct_env%bounds_shftd, dct_env%bounds_local_shftd, &
     116             :                             dct_env%recv_msgs_bnds, dct_env%dct_bounds, &
     117          22 :                             dct_env%dct_bounds_local)
     118             : 
     119          22 :       CALL timestop(handle)
     120             : 
     121          22 :    END SUBROUTINE dct_type_init
     122             : 
     123             : ! **************************************************************************************************
     124             : !> \brief  Releases a dct_type
     125             : !> \param dct_env dct_type to be released
     126             : !> \par History
     127             : !>       03.2016 created [Hossein Bani-Hashemian]
     128             : !> \author Mohammad Hossein Bani-Hashemian
     129             : ! **************************************************************************************************
     130          22 :    SUBROUTINE dct_type_release(dct_env)
     131             : 
     132             :       TYPE(dct_type), INTENT(INOUT)                      :: dct_env
     133             : 
     134             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'dct_type_release'
     135             : 
     136             :       INTEGER                                            :: handle
     137             : 
     138          22 :       CALL timeset(routineN, handle)
     139             : 
     140          22 :       IF (ASSOCIATED(dct_env%dests_shrink)) DEALLOCATE (dct_env%dests_shrink)
     141          22 :       IF (ASSOCIATED(dct_env%dests_expand)) DEALLOCATE (dct_env%dests_expand)
     142          22 :       IF (ASSOCIATED(dct_env%srcs_expand)) DEALLOCATE (dct_env%srcs_expand)
     143          22 :       IF (ASSOCIATED(dct_env%flipg_stat)) DEALLOCATE (dct_env%flipg_stat)
     144          22 :       IF (ASSOCIATED(dct_env%recv_msgs_bnds)) DEALLOCATE (dct_env%recv_msgs_bnds)
     145             : 
     146          22 :       CALL timestop(handle)
     147             : 
     148          22 :    END SUBROUTINE dct_type_release
     149             : 
     150             : ! **************************************************************************************************
     151             : !> \brief   sets up an extended pw_grid for Discrete Cosine Transform (DCT)
     152             : !>          calculations
     153             : !> \param pw_grid the original plane wave grid
     154             : !> \param cell_hmat cell hmat
     155             : !> \param neumann_directions directions in which dct should be performed
     156             : !> \param dct_pw_grid DCT plane-wave grid
     157             : !> \par History
     158             : !>       07.2014 created [Hossein Bani-Hashemian]
     159             : !> \author Mohammad Hossein Bani-Hashemian
     160             : ! **************************************************************************************************
     161          22 :    SUBROUTINE setup_dct_pw_grids(pw_grid, cell_hmat, neumann_directions, dct_pw_grid)
     162             : 
     163             :       TYPE(pw_grid_type), INTENT(IN), POINTER            :: pw_grid
     164             :       REAL(dp), DIMENSION(3, 3), INTENT(IN)              :: cell_hmat
     165             :       INTEGER, INTENT(IN)                                :: neumann_directions
     166             :       TYPE(pw_grid_type), INTENT(INOUT), POINTER         :: dct_pw_grid
     167             : 
     168             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'setup_dct_pw_grids'
     169             : 
     170             :       INTEGER                                            :: blocked, handle, maxn_sendrecv, &
     171             :                                                             srcs_shrink
     172             :       INTEGER, DIMENSION(2, 3)                           :: bounds_local_new, bounds_local_shftd, &
     173             :                                                             bounds_new, bounds_shftd
     174             :       INTEGER, DIMENSION(:), POINTER                     :: dests_expand, dests_shrink, flipg_stat, &
     175             :                                                             srcs_expand
     176             :       INTEGER, DIMENSION(:, :, :), POINTER               :: recv_msgs_bnds
     177             :       REAL(KIND=dp), DIMENSION(3)                        :: scfac
     178             :       REAL(KIND=dp), DIMENSION(3, 3)                     :: hmat2
     179             : 
     180          22 :       CALL timeset(routineN, handle)
     181             : 
     182          38 :       SELECT CASE (neumann_directions)
     183             :       CASE (neumannXYZ)
     184          16 :          maxn_sendrecv = 4
     185          16 :          scfac = (/2.0_dp, 2.0_dp, 2.0_dp/)
     186             :       CASE (neumannXY)
     187           0 :          maxn_sendrecv = 4
     188           0 :          scfac = (/2.0_dp, 2.0_dp, 1.0_dp/)
     189             :       CASE (neumannXZ)
     190           0 :          maxn_sendrecv = 2
     191           0 :          scfac = (/2.0_dp, 1.0_dp, 2.0_dp/)
     192             :       CASE (neumannYZ)
     193           2 :          maxn_sendrecv = 2
     194           2 :          scfac = (/1.0_dp, 2.0_dp, 2.0_dp/)
     195             :       CASE (neumannX)
     196           2 :          maxn_sendrecv = 2
     197           2 :          scfac = (/2.0_dp, 1.0_dp, 1.0_dp/)
     198             :       CASE (neumannY)
     199           0 :          maxn_sendrecv = 2
     200           0 :          scfac = (/1.0_dp, 2.0_dp, 1.0_dp/)
     201             :       CASE (neumannZ)
     202           2 :          maxn_sendrecv = 1
     203           2 :          scfac = (/1.0_dp, 1.0_dp, 2.0_dp/)
     204             :       CASE DEFAULT
     205          22 :          CPABORT("Invalid combination of Neumann and periodic conditions.")
     206             :       END SELECT
     207             : 
     208          44 :       ALLOCATE (flipg_stat(maxn_sendrecv))
     209         154 :       ALLOCATE (dests_expand(maxn_sendrecv), srcs_expand(maxn_sendrecv), dests_shrink(maxn_sendrecv))
     210          44 :       ALLOCATE (recv_msgs_bnds(2, 3, maxn_sendrecv))
     211             : 
     212             :       CALL set_dests_srcs_pid(pw_grid, neumann_directions, dests_expand, srcs_expand, flipg_stat, &
     213          22 :                               dests_shrink, srcs_shrink)
     214             :       CALL expansion_bounds(pw_grid, neumann_directions, srcs_expand, flipg_stat, &
     215          22 :                             bounds_shftd, bounds_local_shftd, recv_msgs_bnds, bounds_new, bounds_local_new)
     216             : 
     217          22 :       hmat2 = 0.0_dp
     218          22 :       hmat2(1, 1) = scfac(1)*cell_hmat(1, 1)
     219          22 :       hmat2(2, 2) = scfac(2)*cell_hmat(2, 2)
     220          22 :       hmat2(3, 3) = scfac(3)*cell_hmat(3, 3)
     221             : 
     222             :       ! uses bounds_local_new that is 2*n-2 in size....this is only rarely fft-able by fftsg, and needs fftw3,
     223             :       ! where it might use sizes that are not declared available in fft_get_radix.
     224             : 
     225          22 :       IF (pw_grid%para%blocked) THEN
     226           0 :          blocked = 1
     227          22 :       ELSE IF (pw_grid%para%ray_distribution) THEN
     228          22 :          blocked = 0
     229             :       END IF
     230             : 
     231             :       CALL pw_grid_create(dct_pw_grid, pw_grid%para%group, hmat2, &
     232             :                           bounds=bounds_new, &
     233             :                           rs_dims=pw_grid%para%group%num_pe_cart, &
     234             :                           blocked=blocked, &
     235          22 :                           bounds_local=bounds_local_new)
     236             : 
     237          22 :       DEALLOCATE (flipg_stat, dests_expand, srcs_expand, dests_shrink, recv_msgs_bnds)
     238             : 
     239          22 :       CALL timestop(handle)
     240             : 
     241          44 :    END SUBROUTINE setup_dct_pw_grids
     242             : 
     243             : ! **************************************************************************************************
     244             : !> \brief Finds the process ids for mpi_isend destiations and mpi_irecv sources
     245             : !>   for expanding and shrinking a pw_r3d_rs_type data
     246             : !> \param pw_grid the original plane wave grid
     247             : !> \param neumann_directions directions in which dct should be performed
     248             : !> \param dests_expand list of the destination processes (pw_expand)
     249             : !> \param srcs_expand list of the source processes (pw_expand)
     250             : !> \param flipg_stat flipping status for the received data chunks (pw_expand)
     251             : !> \param dests_shrink list of the destination processes (pw_shrink)
     252             : !> \param srcs_shrink list of the source processes (pw_shrink)
     253             : !> \par History
     254             : !>       07.2014 created [Hossein Bani-Hashemian]
     255             : !> \author Mohammad Hossein Bani-Hashemian
     256             : ! **************************************************************************************************
     257          44 :    SUBROUTINE set_dests_srcs_pid(pw_grid, neumann_directions, dests_expand, srcs_expand, &
     258             :                                  flipg_stat, dests_shrink, srcs_shrink)
     259             : 
     260             :       TYPE(pw_grid_type), INTENT(IN), POINTER            :: pw_grid
     261             :       INTEGER, INTENT(IN)                                :: neumann_directions
     262             :       INTEGER, DIMENSION(:), INTENT(INOUT), POINTER      :: dests_expand, srcs_expand, flipg_stat, &
     263             :                                                             dests_shrink
     264             :       INTEGER, INTENT(OUT)                               :: srcs_shrink
     265             : 
     266             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'set_dests_srcs_pid'
     267             : 
     268             :       INTEGER                                            :: group_size, handle, i, j, k, &
     269             :                                                             maxn_sendrecv, rs_dim1, rs_dim2, &
     270             :                                                             rs_mpo, tmp_size1, tmp_size2
     271          44 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: src_pos1_onesdd, src_pos2_onesdd, &
     272          44 :                                                             tmp1_arr, tmp2_arr
     273          44 :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: dests_shrink_all, src_pos1, src_pos2, &
     274          44 :                                                             srcs_coord, srcs_expand_all
     275             :       INTEGER, DIMENSION(2)                              :: rs_dims, rs_pos
     276             :       TYPE(mp_comm_type)                                 :: rs_group
     277             : 
     278          44 :       CALL timeset(routineN, handle)
     279             : 
     280             : ! example: 3x4 process grid
     281             : ! XYZ or XY
     282             : ! rs_dim1 = 3 -->  src_pos1 = [1  3  -2]
     283             : !                             [2 -3  -1]
     284             : ! rs_dim2 = 4 -->  src_pos2 = [1  3  -4  -2]
     285             : !                             [2  4  -3  -1]
     286             : ! => (1,1) receives from (1,1) ; (1,2) ; (2,1) ; (2,2) | flipping status 0 0 0 0
     287             : !    (2,4) receives from (3,2) ; (3,1) ; (3,2) ; (3,1) | flipping status 2 2 4 4
     288             : ! and so on ...
     289             : ! or equivalently
     290             : ! =>   0   receives from 0 ; 1 ; 4 ; 5 | flipping status 0 0 0 0
     291             : !      7   receives from 9 ; 8 ; 9 ; 8 | flipping status 2 2 4 4
     292             : ! from srcs_coord :
     293             : ! => rs_mpo = 0 -> rs_pos = 0,0 -> srcs_coord = [ 1  1  2  2] -> 0(0) 1(0) 4(0) 5(0)
     294             : !                                               [ 1  2  1  2]
     295             : !    rs_mpo = 7 -> rs_pos = 1,3 -> srcs_coord = [ 3  3 -3 -3] -> 9(2) 8(2) 9(4) 8(4)
     296             : !                                               [-2 -1 -2 -1]
     297             : ! schematically :
     298             : ! ij : coordinates in a 2D process grid (starting from 1 just for demonstration)
     299             : ! () : to be flipped from left to right
     300             : ! [] : to be flipped from up to down
     301             : ! {} : to be rotated 180 degrees
     302             : !    11 | 12 | 13 | 14          11   12  |  13   14  | (14) (13) | (12) (11)
     303             : !    -----------------    ==>   21   22  |  23   24  | (24) (23) | (22) (21)
     304             : !    21 | 22 | 23 | 24         ---------------------------------------------
     305             : !    -----------------          31   32  |  33   34  | (34) (33) | (32) (31)
     306             : !    31 | 32 | 33 | 34         [31] [32] | [33] [34] | {34} {33} | {32} {31}
     307             : !                              ---------------------------------------------
     308             : !                              [21] [22] | [23] [24] | {24} {23} | {22} {21}
     309             : !                              [11] [12] | [13] [14] | {14} {13} | {12} {11}
     310             : ! one(two)-sided :
     311             : ! YZ or Y
     312             : ! rs_dim1 = 3 -->  src_pos1 = [1  2  3]
     313             : ! rs_dim2 = 4 -->  src_pos2 = [1  3  -4  -2]
     314             : !                             [2  4  -3  -1]
     315             : ! XZ or X
     316             : ! rs_dim1 = 3 -->  src_pos1 = [1  3  -2]
     317             : !                             [2 -3  -1]
     318             : ! rs_dim2 = 4 -->  src_pos2 = [1  2   3  4]
     319             : ! Z
     320             : ! rs_dim1 = 3 -->  src_pos1 = [1  2  3]
     321             : ! rs_dim2 = 4 -->  src_pos2 = [1  2   3  4]
     322             : 
     323          44 :       rs_group = pw_grid%para%group
     324          44 :       rs_mpo = pw_grid%para%group%mepos
     325          44 :       group_size = pw_grid%para%group%num_pe
     326         132 :       rs_dims = pw_grid%para%group%num_pe_cart
     327             : 
     328         132 :       rs_pos = pw_grid%para%group%mepos_cart
     329          44 :       rs_dim1 = rs_dims(1); rs_dim2 = rs_dims(2)
     330             : 
     331             : ! prepare srcs_coord
     332          76 :       SELECT CASE (neumann_directions)
     333             :       CASE (neumannXYZ, neumannXY)
     334          32 :          maxn_sendrecv = 4
     335          32 :          ALLOCATE (srcs_coord(2, maxn_sendrecv))
     336             : 
     337          32 :          IF (MOD(rs_dim1, 2) .EQ. 0) THEN
     338             :             tmp_size1 = rs_dim1
     339             :          ELSE
     340           0 :             tmp_size1 = rs_dim1 + 1
     341             :          END IF
     342         160 :          ALLOCATE (tmp1_arr(tmp_size1), src_pos1(2, 0:rs_dim1 - 1))
     343             : 
     344          32 :          IF (MOD(rs_dim1, 2) .EQ. 0) THEN
     345         192 :             tmp1_arr(:) = (/(i, i=1, rs_dim1)/)
     346         416 :             src_pos1(:, :) = RESHAPE((/tmp1_arr, -tmp1_arr(tmp_size1:1:-1)/), (/2, rs_dim1/))
     347             :          ELSE
     348           0 :             tmp1_arr(:) = (/(i, i=1, rs_dim1), -rs_dim1/)
     349           0 :             src_pos1(:, :) = RESHAPE((/tmp1_arr, -tmp1_arr(tmp_size1 - 2:1:-1)/), (/2, rs_dim1/))
     350             :          END IF
     351             : !---
     352          32 :          IF (MOD(rs_dim2, 2) .EQ. 0) THEN
     353             :             tmp_size2 = rs_dim2
     354             :          ELSE
     355          32 :             tmp_size2 = rs_dim2 + 1
     356             :          END IF
     357         160 :          ALLOCATE (tmp2_arr(tmp_size2), src_pos2(2, 0:rs_dim2 - 1))
     358             : 
     359          32 :          IF (MOD(rs_dim2, 2) .EQ. 0) THEN
     360           0 :             tmp2_arr(:) = (/(i, i=1, rs_dim2)/)
     361           0 :             src_pos2(:, :) = RESHAPE((/tmp2_arr, -tmp2_arr(tmp_size2:1:-1)/), (/2, rs_dim2/))
     362             :          ELSE
     363         128 :             tmp2_arr(:) = (/(i, i=1, rs_dim2), -rs_dim2/)
     364         288 :             src_pos2(:, :) = RESHAPE((/tmp2_arr, -tmp2_arr(tmp_size2 - 2:1:-1)/), (/2, rs_dim2/))
     365             :          END IF
     366             : !---
     367          96 :          srcs_coord(:, 1) = (/src_pos1(1, rs_pos(1)), src_pos2(1, rs_pos(2))/)
     368          96 :          srcs_coord(:, 2) = (/src_pos1(1, rs_pos(1)), src_pos2(2, rs_pos(2))/)
     369          96 :          srcs_coord(:, 3) = (/src_pos1(2, rs_pos(1)), src_pos2(1, rs_pos(2))/)
     370          96 :          srcs_coord(:, 4) = (/src_pos1(2, rs_pos(1)), src_pos2(2, rs_pos(2))/)
     371             :       CASE (neumannXZ, neumannX)
     372           4 :          maxn_sendrecv = 2
     373           4 :          ALLOCATE (srcs_coord(2, maxn_sendrecv))
     374             : 
     375           4 :          IF (MOD(rs_dim1, 2) .EQ. 0) THEN
     376             :             tmp_size1 = rs_dim1
     377             :          ELSE
     378           0 :             tmp_size1 = rs_dim1 + 1
     379             :          END IF
     380          20 :          ALLOCATE (tmp1_arr(tmp_size1), src_pos1(2, 0:rs_dim1 - 1))
     381             : 
     382           4 :          IF (MOD(rs_dim1, 2) .EQ. 0) THEN
     383          24 :             tmp1_arr(:) = (/(i, i=1, rs_dim1)/)
     384          52 :             src_pos1(:, :) = RESHAPE((/tmp1_arr, -tmp1_arr(tmp_size1:1:-1)/), (/2, rs_dim1/))
     385             :          ELSE
     386           0 :             tmp1_arr(:) = (/(i, i=1, rs_dim1), -rs_dim1/)
     387           0 :             src_pos1(:, :) = RESHAPE((/tmp1_arr, -tmp1_arr(tmp_size1 - 2:1:-1)/), (/2, rs_dim1/))
     388             :          END IF
     389             : !---
     390          12 :          ALLOCATE (src_pos2_onesdd(0:rs_dim2 - 1))
     391          16 :          src_pos2_onesdd(:) = (/(i, i=1, rs_dim2)/)
     392             : !---
     393          12 :          srcs_coord(:, 1) = (/src_pos1(1, rs_pos(1)), src_pos2_onesdd(rs_pos(2))/)
     394          12 :          srcs_coord(:, 2) = (/src_pos1(2, rs_pos(1)), src_pos2_onesdd(rs_pos(2))/)
     395             :       CASE (neumannYZ, neumannY)
     396           4 :          maxn_sendrecv = 2
     397           4 :          ALLOCATE (srcs_coord(2, maxn_sendrecv))
     398             : 
     399          12 :          ALLOCATE (src_pos1_onesdd(0:rs_dim1 - 1))
     400          24 :          src_pos1_onesdd(:) = (/(i, i=1, rs_dim1)/)
     401             : !---
     402           4 :          IF (MOD(rs_dim2, 2) .EQ. 0) THEN
     403             :             tmp_size2 = rs_dim2
     404             :          ELSE
     405           4 :             tmp_size2 = rs_dim2 + 1
     406             :          END IF
     407          20 :          ALLOCATE (tmp2_arr(tmp_size2), src_pos2(2, 0:rs_dim2 - 1))
     408             : 
     409           4 :          IF (MOD(rs_dim2, 2) .EQ. 0) THEN
     410           0 :             tmp2_arr(:) = (/(i, i=1, rs_dim2)/)
     411           0 :             src_pos2(:, :) = RESHAPE((/tmp2_arr, -tmp2_arr(tmp_size2:1:-1)/), (/2, rs_dim2/))
     412             :          ELSE
     413          16 :             tmp2_arr(:) = (/(i, i=1, rs_dim2), -rs_dim2/)
     414          36 :             src_pos2(:, :) = RESHAPE((/tmp2_arr, -tmp2_arr(tmp_size2 - 2:1:-1)/), (/2, rs_dim2/))
     415             :          END IF
     416             : !---
     417          12 :          srcs_coord(:, 1) = (/src_pos1_onesdd(rs_pos(1)), src_pos2(1, rs_pos(2))/)
     418          12 :          srcs_coord(:, 2) = (/src_pos1_onesdd(rs_pos(1)), src_pos2(2, rs_pos(2))/)
     419             :       CASE (neumannZ)
     420           4 :          maxn_sendrecv = 1
     421           4 :          ALLOCATE (srcs_coord(2, maxn_sendrecv))
     422          12 :          ALLOCATE (src_pos1_onesdd(0:rs_dim1 - 1))
     423          12 :          ALLOCATE (src_pos2_onesdd(0:rs_dim2 - 1))
     424             : 
     425          24 :          src_pos1_onesdd(:) = (/(i, i=1, rs_dim1)/)
     426             : !---
     427          16 :          src_pos2_onesdd(:) = (/(i, i=1, rs_dim2)/)
     428             : !---
     429          56 :          srcs_coord(:, 1) = (/src_pos1_onesdd(rs_pos(1)), src_pos2_onesdd(rs_pos(2))/)
     430             :       END SELECT
     431             : 
     432             : ! default flipping status
     433         192 :       flipg_stat = NOT_FLIPPED
     434             : 
     435         192 :       DO k = 1, maxn_sendrecv
     436             : ! convert srcs_coord to pid
     437         444 :          CALL pw_grid%para%group%rank_cart(ABS(srcs_coord(:, k)) - 1, srcs_expand(k))
     438             : ! find out the flipping status
     439         192 :          IF ((srcs_coord(1, k) .GT. 0) .AND. (srcs_coord(2, k) .GT. 0)) THEN
     440          44 :             flipg_stat(k) = NOT_FLIPPED
     441         104 :          ELSE IF ((srcs_coord(1, k) .LT. 0) .AND. (srcs_coord(2, k) .GT. 0)) THEN
     442          36 :             flipg_stat(k) = UD_FLIPPED
     443          68 :          ELSE IF ((srcs_coord(1, k) .GT. 0) .AND. (srcs_coord(2, k) .LT. 0)) THEN
     444          36 :             flipg_stat(k) = LR_FLIPPED
     445             :          ELSE
     446          32 :             flipg_stat(k) = ROTATED
     447             :          END IF
     448             :       END DO
     449             : 
     450             : ! let all the nodes know about each others srcs_expand list
     451         176 :       ALLOCATE (srcs_expand_all(maxn_sendrecv, group_size))
     452         192 :       CALL rs_group%allgather(srcs_expand, srcs_expand_all)
     453             : ! now scan the srcs_expand_all list and check if I am on the srcs_expand list of the other nodes
     454             : ! if that is the case then I am obliged to send data to those nodes (the nodes are on my dests_expand list)
     455          44 :       k = 1
     456         132 :       DO i = 1, group_size
     457         428 :          DO j = 1, maxn_sendrecv
     458         384 :             IF (srcs_expand_all(j, i) .EQ. rs_mpo) THEN
     459         148 :                dests_expand(k) = i - 1
     460         148 :                k = k + 1
     461             :             END IF
     462             :          END DO
     463             :       END DO
     464             : 
     465             : ! find srcs and dests for the reverse procedure :
     466             : ! initialize dests_shrink and srcs_shrink with invalid process id
     467         192 :       dests_shrink = -1
     468          44 :       srcs_shrink = -1
     469             : ! scan the flipping status of the data that I am supposed to receive
     470             : ! if the flipping status for a process is NOT_FLIPPED that means I will have to resend
     471             : ! data to that process in the reverse procedure (the process is on my dests_shrink list)
     472         192 :       DO i = 1, maxn_sendrecv
     473         192 :          IF (flipg_stat(i) .EQ. NOT_FLIPPED) dests_shrink(i) = srcs_expand(i)
     474             :       END DO
     475             : 
     476             : ! let all the nodes know about each others dests_shrink list
     477         132 :       ALLOCATE (dests_shrink_all(maxn_sendrecv, group_size))
     478         192 :       CALL rs_group%allgather(dests_shrink, dests_shrink_all)
     479             : ! now scan the dests_shrink_all list and check if I am on the dests_shrink list of any other node
     480             : ! if that is the case then I'll receive data from that node (the node is on my srcs_shrink list)
     481             : ! note that in the shrinking procedure I will receive from only one node
     482         132 :       DO i = 1, group_size
     483         314 :          DO j = 1, maxn_sendrecv
     484         270 :             IF (dests_shrink_all(j, i) .EQ. rs_mpo) THEN
     485          44 :                srcs_shrink = i - 1
     486          44 :                EXIT
     487             :             END IF
     488             :          END DO
     489             :       END DO
     490             : 
     491          44 :       CALL timestop(handle)
     492             : 
     493          88 :    END SUBROUTINE set_dests_srcs_pid
     494             : 
     495             : ! **************************************************************************************************
     496             : !> \brief expands a pw_r3d_rs_type data to an evenly symmetric pw_r3d_rs_type data that is 8 times
     497             : !>   larger than the original one:
     498             : !>   the even symmetry for a 1D sequence of length n is defined as:
     499             : !>      1 2 3 ... n-2 n-1 n --> 1 2 3 ... n-2 n-1 n n-1 n-2 ... 3 2
     500             : !>   and is generalized to 3D by applying the same rule in all three directions
     501             : !>
     502             : !> \param neumann_directions directions in which dct should be performed
     503             : !> \param recv_msgs_bnds bounds of the messages to be received
     504             : !> \param dests_expand list of the destination processes
     505             : !> \param srcs_expand list of the source processes
     506             : !> \param flipg_stat flipping status for the received data chunks
     507             : !> \param bounds_shftd bounds of the original grid shifted to have g0 in the middle of the cell
     508             : !> \param pw_in the original plane wave data
     509             : !> \param pw_expanded the pw data after expansion
     510             : !> \par History
     511             : !>       07.2014 created [Hossein Bani-Hashemian]
     512             : !> \author Mohammad Hossein Bani-Hashemian
     513             : ! **************************************************************************************************
     514         690 :    SUBROUTINE pw_expand(neumann_directions, recv_msgs_bnds, dests_expand, srcs_expand, &
     515             :                         flipg_stat, bounds_shftd, pw_in, pw_expanded)
     516             : 
     517             :       INTEGER, INTENT(IN)                                :: neumann_directions
     518             :       INTEGER, DIMENSION(:, :, :), INTENT(IN), POINTER   :: recv_msgs_bnds
     519             :       INTEGER, DIMENSION(:), INTENT(IN), POINTER         :: dests_expand, srcs_expand, flipg_stat
     520             :       INTEGER, DIMENSION(2, 3), INTENT(IN)               :: bounds_shftd
     521             :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: pw_in
     522             :       TYPE(pw_r3d_rs_type), INTENT(INOUT)                :: pw_expanded
     523             : 
     524             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'pw_expand'
     525             : 
     526             :       INTEGER :: group_size, handle, i, ind, lb1, lb1_loc, lb1_new, lb2, lb2_loc, lb2_new, lb3, &
     527             :          lb3_loc, lb3_new, loc, maxn_sendrecv, rs_mpo, ub1, ub1_loc, ub1_new, ub2, ub2_loc, &
     528             :          ub2_new, ub3, ub3_loc, ub3_new
     529         690 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: dest_hist, src_hist
     530         690 :       INTEGER, ALLOCATABLE, DIMENSION(:, :, :)           :: pcs_bnds
     531             :       INTEGER, DIMENSION(2, 3)                           :: bounds_local_new
     532         690 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :, :)          :: catd, catd_flipdbf, cr3d_xpndd
     533         690 :       TYPE(dct_msg_type), ALLOCATABLE, DIMENSION(:)      :: pcs, recv_msgs
     534             :       TYPE(mp_comm_type)                                 :: rs_group
     535         690 :       TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:)   :: recv_reqs, send_reqs
     536             :       TYPE(pw_grid_type), POINTER                        :: pw_grid
     537             : 
     538         690 :       CALL timeset(routineN, handle)
     539             : 
     540         690 :       pw_grid => pw_in%pw_grid
     541         690 :       rs_group = pw_grid%para%group
     542         690 :       rs_mpo = pw_grid%para%group%mepos
     543         690 :       group_size = pw_grid%para%group%num_pe
     544             : 
     545        6900 :       bounds_local_new = pw_expanded%pw_grid%bounds_local
     546             : 
     547        1272 :       SELECT CASE (neumann_directions)
     548             :       CASE (neumannXYZ, neumannXY)
     549         582 :          maxn_sendrecv = 4
     550             :       CASE (neumannX, neumannY, neumannXZ, neumannYZ)
     551          76 :          maxn_sendrecv = 2
     552             :       CASE (neumannZ)
     553         690 :          maxn_sendrecv = 1
     554             :       END SELECT
     555             : 
     556        7784 :       ALLOCATE (recv_reqs(maxn_sendrecv), send_reqs(maxn_sendrecv))
     557        2070 :       ALLOCATE (dest_hist(maxn_sendrecv), src_hist(maxn_sendrecv))
     558        1380 :       ALLOCATE (pcs_bnds(2, 3, maxn_sendrecv))
     559             : 
     560        7094 :       ALLOCATE (pcs(maxn_sendrecv), recv_msgs(maxn_sendrecv))
     561             : 
     562        3202 :       send_reqs = mp_request_null
     563        3202 :       recv_reqs = mp_request_null
     564             : 
     565        3202 :       src_hist = -1 ! keeps the history of sources
     566        3202 :       dest_hist = -1 ! keeps the history of destinations
     567             : 
     568        3202 :       DO i = 1, maxn_sendrecv
     569             : ! no need to send to myself or to the destination that I have already sent to
     570        8580 :          IF ((dests_expand(i) .NE. rs_mpo) .AND. .NOT. ANY(dest_hist .EQ. dests_expand(i))) THEN
     571         614 :             CALL rs_group%isend(pw_in%array, dests_expand(i), send_reqs(i))
     572             :          END IF
     573        3202 :          dest_hist(i) = dests_expand(i)
     574             :       END DO
     575             : 
     576        3202 :       DO i = 1, maxn_sendrecv
     577        2512 :          lb1 = recv_msgs_bnds(1, 1, i)
     578        2512 :          ub1 = recv_msgs_bnds(2, 1, i)
     579        2512 :          lb2 = recv_msgs_bnds(1, 2, i)
     580        2512 :          ub2 = recv_msgs_bnds(2, 2, i)
     581        2512 :          lb3 = recv_msgs_bnds(1, 3, i)
     582        2512 :          ub3 = recv_msgs_bnds(2, 3, i)
     583             : ! no need to receive from myself
     584        2512 :          IF (srcs_expand(i) .EQ. rs_mpo) THEN
     585        6580 :             ALLOCATE (recv_msgs(i)%msg(lb1:ub1, lb2:ub2, lb3:ub3))
     586    79891578 :             recv_msgs(i)%msg(:, :, :) = pw_in%array
     587             : ! if I have already received data from the source, just use the one from the last time
     588        4752 :          ELSE IF (ANY(src_hist .EQ. srcs_expand(i))) THEN
     589        3492 :             loc = MINLOC(ABS(src_hist - srcs_expand(i)), 1)
     590         582 :             lb1_loc = recv_msgs_bnds(1, 1, loc)
     591         582 :             ub1_loc = recv_msgs_bnds(2, 1, loc)
     592         582 :             lb2_loc = recv_msgs_bnds(1, 2, loc)
     593         582 :             ub2_loc = recv_msgs_bnds(2, 2, loc)
     594         582 :             lb3_loc = recv_msgs_bnds(1, 3, loc)
     595         582 :             ub3_loc = recv_msgs_bnds(2, 3, loc)
     596        2910 :             ALLOCATE (recv_msgs(i)%msg(lb1_loc:ub1_loc, lb2_loc:ub2_loc, lb3_loc:ub3_loc))
     597    36325643 :             recv_msgs(i)%msg(:, :, :) = recv_msgs(loc)%msg
     598             :          ELSE
     599        3070 :             ALLOCATE (recv_msgs(i)%msg(lb1:ub1, lb2:ub2, lb3:ub3))
     600         614 :             CALL rs_group%irecv(recv_msgs(i)%msg, srcs_expand(i), recv_reqs(i))
     601         614 :             CALL recv_reqs(i)%wait()
     602             :          END IF
     603        3202 :          src_hist(i) = srcs_expand(i)
     604             :       END DO
     605             : ! cleanup mpi_request to prevent memory leak
     606         690 :       CALL mp_waitall(send_reqs(:))
     607             : 
     608             : ! flip the received data according on the flipping status
     609        3202 :       DO i = 1, maxn_sendrecv
     610         690 :          SELECT CASE (flipg_stat(i))
     611             :          CASE (NOT_FLIPPED)
     612         690 :             lb1 = recv_msgs_bnds(1, 1, i)
     613         690 :             ub1 = recv_msgs_bnds(2, 1, i)
     614         690 :             lb2 = recv_msgs_bnds(1, 2, i)
     615         690 :             ub2 = recv_msgs_bnds(2, 2, i)
     616         690 :             lb3 = recv_msgs_bnds(1, 3, i)
     617         690 :             ub3 = recv_msgs_bnds(2, 3, i)
     618        3450 :             ALLOCATE (pcs(i)%msg(lb1:ub1, lb2:ub2, lb3:ub3))
     619    41470061 :             pcs(i)%msg(:, :, :) = recv_msgs(i)%msg
     620             :          CASE (UD_FLIPPED)
     621         614 :             CALL flipud(recv_msgs(i)%msg, pcs(i)%msg, bounds_shftd)
     622             :          CASE (LR_FLIPPED)
     623         626 :             CALL fliplr(recv_msgs(i)%msg, pcs(i)%msg, bounds_shftd)
     624             :          CASE (BF_FLIPPED)
     625           0 :             CALL flipbf(recv_msgs(i)%msg, pcs(i)%msg, bounds_shftd)
     626             :          CASE (ROTATED)
     627        2512 :             CALL rot180(recv_msgs(i)%msg, pcs(i)%msg, bounds_shftd)
     628             :          END SELECT
     629             :       END DO
     630             : ! concatenate the received (flipped) data store the result as catd
     631             : ! need the bounds of the four pieces for concatenation
     632        3202 :       DO i = 1, maxn_sendrecv
     633        2512 :          pcs_bnds(1, 1, i) = LBOUND(pcs(i)%msg, 1)
     634        2512 :          pcs_bnds(2, 1, i) = UBOUND(pcs(i)%msg, 1)
     635        2512 :          pcs_bnds(1, 2, i) = LBOUND(pcs(i)%msg, 2)
     636        2512 :          pcs_bnds(2, 2, i) = UBOUND(pcs(i)%msg, 2)
     637        2512 :          pcs_bnds(1, 3, i) = LBOUND(pcs(i)%msg, 3)
     638        5714 :          pcs_bnds(2, 3, i) = UBOUND(pcs(i)%msg, 3)
     639             :       END DO
     640             : 
     641         690 :       lb1_new = bounds_local_new(1, 1); ub1_new = bounds_local_new(2, 1)
     642         690 :       lb2_new = bounds_local_new(1, 2); ub2_new = bounds_local_new(2, 2)
     643         690 :       lb3_new = bounds_local_new(1, 3); ub3_new = bounds_local_new(2, 3)
     644             : 
     645         658 :       SELECT CASE (neumann_directions)
     646             :       CASE (neumannXYZ, neumannXZ, neumannYZ, neumannZ)
     647         658 :          ind = INT(0.5*(ub3_new + lb3_new + 1))
     648        3290 :          ALLOCATE (catd(lb1_new:ub1_new, lb2_new:ub2_new, lb3_new:ind - 1))
     649             :       CASE (neumannXY, neumannX, neumannY)
     650         818 :          ALLOCATE (catd(lb1_new:ub1_new, lb2_new:ub2_new, lb3_new:ub3_new))
     651             :       END SELECT
     652             : 
     653        3202 :       DO i = 1, maxn_sendrecv
     654             :          catd(pcs_bnds(1, 1, i):pcs_bnds(2, 1, i), &
     655             :               pcs_bnds(1, 2, i):pcs_bnds(2, 2, i), &
     656   154067826 :               pcs_bnds(1, 3, i):pcs_bnds(2, 3, i)) = pcs(i)%msg
     657             :       END DO
     658             : 
     659             : ! flip catd from back to front
     660         690 :       CALL flipbf(catd, catd_flipdbf, bounds_shftd)
     661             : ! concatenate catd and catd_flipdbf to get cr3d_xpndd
     662        3450 :       ALLOCATE (cr3d_xpndd(lb1_new:ub1_new, lb2_new:ub2_new, lb3_new:ub3_new))
     663         658 :       SELECT CASE (neumann_directions)
     664             :       CASE (neumannXYZ, neumannXZ, neumannYZ, neumannZ)
     665   148328500 :          cr3d_xpndd(:, :, lb3_new:ind - 1) = catd
     666   148328500 :          cr3d_xpndd(:, :, ind:ub3_new) = catd_flipdbf
     667             :       CASE (neumannXY, neumannX, neumannY)
     668     2982930 :          cr3d_xpndd(:, :, :) = catd
     669             :       END SELECT
     670             : 
     671   299638614 :       pw_expanded%array = cr3d_xpndd
     672             : 
     673        3202 :       DO i = 1, maxn_sendrecv
     674        2512 :          DEALLOCATE (pcs(i)%msg)
     675        3202 :          DEALLOCATE (recv_msgs(i)%msg)
     676             :       END DO
     677        5714 :       DEALLOCATE (pcs, recv_msgs)
     678         690 :       DEALLOCATE (catd, catd_flipdbf, cr3d_xpndd)
     679             : 
     680         690 :       CALL timestop(handle)
     681             : 
     682        1380 :    END SUBROUTINE pw_expand
     683             : 
     684             : ! **************************************************************************************************
     685             : !> \brief shrinks an evenly symmetric pw_r3d_rs_type data to a pw_r3d_rs_type data that is 8
     686             : !>        times smaller (the reverse procedure of pw_expand).
     687             : !>
     688             : !> \param neumann_directions directions in which dct should be performed
     689             : !> \param dests_shrink list of the destination processes
     690             : !> \param srcs_shrink list of the source processes
     691             : !> \param bounds_local_shftd local bounds of the original grid after shifting
     692             : !> \param pw_in the original plane wave data
     693             : !> \param pw_shrinked the shrunk plane wave data
     694             : !> \par History
     695             : !>       07.2014 created [Hossein Bani-Hashemian]
     696             : !> \author Mohammad Hossein Bani-Hashemian
     697             : ! **************************************************************************************************
     698         320 :    SUBROUTINE pw_shrink(neumann_directions, dests_shrink, srcs_shrink, bounds_local_shftd, &
     699             :                         pw_in, pw_shrinked)
     700             : 
     701             :       INTEGER, INTENT(IN)                                :: neumann_directions
     702             :       INTEGER, DIMENSION(:), INTENT(IN), POINTER         :: dests_shrink
     703             :       INTEGER, INTENT(INOUT)                             :: srcs_shrink
     704             :       INTEGER, DIMENSION(2, 3), INTENT(IN)               :: bounds_local_shftd
     705             :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: pw_in
     706             :       TYPE(pw_r3d_rs_type), INTENT(INOUT)                :: pw_shrinked
     707             : 
     708             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'pw_shrink'
     709             : 
     710             :       INTEGER :: group_size, handle, i, lb1_orig, lb1_xpnd, lb2_orig, lb2_xpnd, lb3_orig, &
     711             :          lb3_xpnd, maxn_sendrecv, rs_mpo, send_lb1, send_lb2, send_lb3, send_ub1, send_ub2, &
     712             :          send_ub3, tag, ub1_orig, ub1_xpnd, ub2_orig, ub2_xpnd, ub3_orig, ub3_xpnd
     713         320 :       INTEGER, ALLOCATABLE, DIMENSION(:, :, :)           :: bounds_local_all
     714             :       INTEGER, DIMENSION(2, 3)                           :: bounds_local_xpnd
     715         320 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :, :)          :: cr3d, send_crmsg
     716             :       TYPE(mp_comm_type)                                 :: rs_group
     717             :       TYPE(pw_grid_type), POINTER                        :: pw_grid_orig
     718             : 
     719         320 :       CALL timeset(routineN, handle)
     720             : 
     721         320 :       pw_grid_orig => pw_shrinked%pw_grid
     722         320 :       rs_group = pw_grid_orig%para%group
     723         320 :       rs_mpo = pw_grid_orig%para%group%mepos
     724         320 :       group_size = pw_grid_orig%para%group%num_pe
     725        3200 :       bounds_local_xpnd = pw_in%pw_grid%bounds_local
     726         320 :       tag = 1
     727             : 
     728         592 :       SELECT CASE (neumann_directions)
     729             :       CASE (neumannXYZ, neumannXY)
     730         272 :          maxn_sendrecv = 4
     731             :       CASE (neumannX, neumannY, neumannXZ, neumannYZ)
     732          32 :          maxn_sendrecv = 2
     733             :       CASE (neumannZ)
     734         320 :          maxn_sendrecv = 1
     735             :       END SELECT
     736             : 
     737             : ! cosine transform is a real transform. The cosine transform of a 3D data must be real and 3D.
     738         320 :       lb1_xpnd = bounds_local_xpnd(1, 1)
     739         320 :       ub1_xpnd = bounds_local_xpnd(2, 1)
     740         320 :       lb2_xpnd = bounds_local_xpnd(1, 2)
     741         320 :       ub2_xpnd = bounds_local_xpnd(2, 2)
     742         320 :       lb3_xpnd = bounds_local_xpnd(1, 3)
     743         320 :       ub3_xpnd = bounds_local_xpnd(2, 3)
     744        1600 :       ALLOCATE (cr3d(lb1_xpnd:ub1_xpnd, lb2_xpnd:ub2_xpnd, lb3_xpnd:ub3_xpnd))
     745   138074880 :       cr3d(:, :, :) = pw_in%array
     746             : 
     747             : ! let all the nodes know about each others shifted local bounds
     748         960 :       ALLOCATE (bounds_local_all(2, 3, group_size))
     749         320 :       CALL rs_group%allgather(bounds_local_shftd, bounds_local_all)
     750             : 
     751        1488 :       DO i = 1, maxn_sendrecv
     752             : ! no need to send to myself or to an invalid destination (pid = -1)
     753        1488 :          IF ((dests_shrink(i) .NE. rs_mpo) .AND. (dests_shrink(i) .NE. -1)) THEN
     754         144 :             send_lb1 = bounds_local_all(1, 1, dests_shrink(i) + 1)
     755         144 :             send_ub1 = bounds_local_all(2, 1, dests_shrink(i) + 1)
     756         144 :             send_lb2 = bounds_local_all(1, 2, dests_shrink(i) + 1)
     757         144 :             send_ub2 = bounds_local_all(2, 2, dests_shrink(i) + 1)
     758         144 :             send_lb3 = bounds_local_all(1, 3, dests_shrink(i) + 1)
     759         144 :             send_ub3 = bounds_local_all(2, 3, dests_shrink(i) + 1)
     760             : 
     761         720 :             ALLOCATE (send_crmsg(send_lb1:send_ub1, send_lb2:send_ub2, send_lb3:send_ub3))
     762     8775044 :             send_crmsg(:, :, :) = cr3d(send_lb1:send_ub1, send_lb2:send_ub2, send_lb3:send_ub3)
     763         144 :             CALL rs_group%send(send_crmsg, dests_shrink(i), tag)
     764         144 :             DEALLOCATE (send_crmsg)
     765             :          END IF
     766             :       END DO
     767             : 
     768         320 :       lb1_orig = bounds_local_shftd(1, 1)
     769         320 :       ub1_orig = bounds_local_shftd(2, 1)
     770         320 :       lb2_orig = bounds_local_shftd(1, 2)
     771         320 :       ub2_orig = bounds_local_shftd(2, 2)
     772         320 :       lb3_orig = bounds_local_shftd(1, 3)
     773         320 :       ub3_orig = bounds_local_shftd(2, 3)
     774             : 
     775             : ! no need to receive from myself
     776         320 :       IF (srcs_shrink .EQ. rs_mpo) THEN
     777    10323616 :          pw_shrinked%array = cr3d(lb1_orig:ub1_orig, lb2_orig:ub2_orig, lb3_orig:ub3_orig)
     778         144 :       ELSE IF (srcs_shrink .EQ. -1) THEN
     779             : ! the source is invalid ... do nothing
     780             :       ELSE
     781         144 :          CALL rs_group%recv(pw_shrinked%array, srcs_shrink, tag)
     782             :       END IF
     783             : 
     784         320 :       DEALLOCATE (bounds_local_all)
     785         320 :       DEALLOCATE (cr3d)
     786         320 :       CALL timestop(handle)
     787             : 
     788         640 :    END SUBROUTINE pw_shrink
     789             : 
     790             : ! **************************************************************************************************
     791             : !> \brief   flips a 3d (real dp) array up to down (the way needed to expand data
     792             : !>          as explained in the description of the afore-defined subroutines)
     793             : !> \param cr3d_in input array
     794             : !> \param cr3d_out output array
     795             : !> \param bounds global lower and upper bounds
     796             : !> \par History
     797             : !>       07.2014 created [Hossein Bani-Hashemian]
     798             : !> \author Mohammad Hossein Bani-Hashemian
     799             : ! **************************************************************************************************
     800         614 :    SUBROUTINE flipud(cr3d_in, cr3d_out, bounds)
     801             : 
     802             :       REAL(dp), ALLOCATABLE, DIMENSION(:, :, :), &
     803             :          INTENT(IN)                                      :: cr3d_in
     804             :       REAL(dp), ALLOCATABLE, DIMENSION(:, :, :), &
     805             :          INTENT(INOUT)                                   :: cr3d_out
     806             :       INTEGER, DIMENSION(2, 3), INTENT(IN)               :: bounds
     807             : 
     808             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'flipud'
     809             : 
     810             :       INTEGER :: handle, i, lb1, lb1_glbl, lb1_new, lb2, lb2_glbl, lb2_new, lb3, lb3_glbl, &
     811             :          lb3_new, ub1, ub1_glbl, ub1_new, ub2, ub2_glbl, ub2_new, ub3, ub3_glbl, ub3_new
     812         614 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: indx
     813             :       INTEGER, DIMENSION(2, 3)                           :: bndsl, bndsl_new
     814             : 
     815         614 :       CALL timeset(routineN, handle)
     816             : 
     817        1228 :       lb1 = LBOUND(cr3d_in, 1); ub1 = UBOUND(cr3d_in, 1)
     818        1228 :       lb2 = LBOUND(cr3d_in, 2); ub2 = UBOUND(cr3d_in, 2)
     819        1228 :       lb3 = LBOUND(cr3d_in, 3); ub3 = UBOUND(cr3d_in, 3)
     820             : 
     821         614 :       lb1_glbl = bounds(1, 1); ub1_glbl = bounds(2, 1)
     822         614 :       lb2_glbl = bounds(1, 2); ub2_glbl = bounds(2, 2)
     823         614 :       lb3_glbl = bounds(1, 3); ub3_glbl = bounds(2, 3)
     824             : 
     825        4298 :       bndsl = RESHAPE((/lb1, ub1, lb2, ub2, lb3, ub3/), (/2, 3/))
     826         614 :       bndsl_new = flipud_bounds_local(bndsl, bounds)
     827             : 
     828         614 :       lb1_new = bndsl_new(1, 1); ub1_new = bndsl_new(2, 1)
     829         614 :       lb2_new = bndsl_new(1, 2); ub2_new = bndsl_new(2, 2)
     830         614 :       lb3_new = bndsl_new(1, 3); ub3_new = bndsl_new(2, 3)
     831             : 
     832        3070 :       ALLOCATE (cr3d_out(lb1_new:ub1_new, lb2_new:ub2_new, lb3_new:ub3_new))
     833    37849915 :       cr3d_out = 0.0_dp
     834             : 
     835             : ! set the data at the missing grid points (in a periodic grid) equal to the data at
     836             : ! the last existing grid points
     837        1842 :       ALLOCATE (indx(ub1_new - lb1_new + 1))
     838       33766 :       indx(:) = (/(i, i=2*(ub1_glbl + 1) - lb1_new, 2*(ub1_glbl + 1) - ub1_new, -1)/)
     839         614 :       IF (lb1_new .EQ. ub1_glbl + 1) indx(1) = indx(2)
     840    37849915 :       cr3d_out(lb1_new:ub1_new, lb2_new:ub2_new, lb3_new:ub3_new) = cr3d_in(indx, :, :)
     841             : 
     842         614 :       CALL timestop(handle)
     843             : 
     844        1228 :    END SUBROUTINE flipud
     845             : 
     846             : ! **************************************************************************************************
     847             : !> \brief   flips a 3d (real dp) array left to right (the way needed to expand data
     848             : !>          as explained in the description of the afore-defined subroutines)
     849             : !> \param cr3d_in input array
     850             : !> \param cr3d_out output array
     851             : !> \param bounds global lower and upper bounds
     852             : !> \par History
     853             : !>       07.2014 created [Hossein Bani-Hashemian]
     854             : !> \author Mohammad Hossein Bani-Hashemian
     855             : ! **************************************************************************************************
     856         626 :    SUBROUTINE fliplr(cr3d_in, cr3d_out, bounds)
     857             : 
     858             :       REAL(dp), ALLOCATABLE, DIMENSION(:, :, :), &
     859             :          INTENT(IN)                                      :: cr3d_in
     860             :       REAL(dp), ALLOCATABLE, DIMENSION(:, :, :), &
     861             :          INTENT(INOUT)                                   :: cr3d_out
     862             :       INTEGER, DIMENSION(2, 3), INTENT(IN)               :: bounds
     863             : 
     864             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'fliplr'
     865             : 
     866             :       INTEGER :: handle, i, lb1, lb1_glbl, lb1_new, lb2, lb2_glbl, lb2_new, lb3, lb3_glbl, &
     867             :          lb3_new, ub1, ub1_glbl, ub1_new, ub2, ub2_glbl, ub2_new, ub3, ub3_glbl, ub3_new
     868         626 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: indy
     869             :       INTEGER, DIMENSION(2, 3)                           :: bndsl, bndsl_new
     870             : 
     871         626 :       CALL timeset(routineN, handle)
     872             : 
     873        1252 :       lb1 = LBOUND(cr3d_in, 1); ub1 = UBOUND(cr3d_in, 1)
     874        1252 :       lb2 = LBOUND(cr3d_in, 2); ub2 = UBOUND(cr3d_in, 2)
     875        1252 :       lb3 = LBOUND(cr3d_in, 3); ub3 = UBOUND(cr3d_in, 3)
     876             : 
     877         626 :       lb1_glbl = bounds(1, 1); ub1_glbl = bounds(2, 1)
     878         626 :       lb2_glbl = bounds(1, 2); ub2_glbl = bounds(2, 2)
     879         626 :       lb3_glbl = bounds(1, 3); ub3_glbl = bounds(2, 3)
     880             : 
     881        4382 :       bndsl = RESHAPE((/lb1, ub1, lb2, ub2, lb3, ub3/), (/2, 3/))
     882         626 :       bndsl_new = fliplr_bounds_local(bndsl, bounds)
     883             : 
     884         626 :       lb1_new = bndsl_new(1, 1); ub1_new = bndsl_new(2, 1)
     885         626 :       lb2_new = bndsl_new(1, 2); ub2_new = bndsl_new(2, 2)
     886         626 :       lb3_new = bndsl_new(1, 3); ub3_new = bndsl_new(2, 3)
     887             : 
     888        3130 :       ALLOCATE (cr3d_out(lb1_new:ub1_new, lb2_new:ub2_new, lb3_new:ub3_new))
     889    38421517 :       cr3d_out = 0.0_dp
     890             : 
     891             : ! set the data at the missing grid points (in a periodic grid) equal to the data at
     892             : ! the last existing grid points
     893        1878 :       ALLOCATE (indy(ub2_new - lb2_new + 1))
     894       60912 :       indy(:) = (/(i, i=2*(ub2_glbl + 1) - lb2_new, 2*(ub2_glbl + 1) - ub2_new, -1)/)
     895         626 :       IF (lb2_new .EQ. ub2_glbl + 1) indy(1) = indy(2)
     896    38421517 :       cr3d_out(lb1_new:ub1_new, lb2_new:ub2_new, lb3_new:ub3_new) = cr3d_in(:, indy, :)
     897             : 
     898         626 :       CALL timestop(handle)
     899             : 
     900        1252 :    END SUBROUTINE fliplr
     901             : 
     902             : ! **************************************************************************************************
     903             : !> \brief   flips a 3d (real dp) array back to front (the way needed to expand data
     904             : !>          as explained in the description of the afore-defined subroutines)
     905             : !> \param cr3d_in input array
     906             : !> \param cr3d_out output array
     907             : !> \param bounds global lower and upper bounds
     908             : !> \par History
     909             : !>       07.2014 created [Hossein Bani-Hashemian]
     910             : !> \author Mohammad Hossein Bani-Hashemian
     911             : ! **************************************************************************************************
     912         690 :    SUBROUTINE flipbf(cr3d_in, cr3d_out, bounds)
     913             : 
     914             :       REAL(dp), ALLOCATABLE, DIMENSION(:, :, :), &
     915             :          INTENT(IN)                                      :: cr3d_in
     916             :       REAL(dp), ALLOCATABLE, DIMENSION(:, :, :), &
     917             :          INTENT(INOUT)                                   :: cr3d_out
     918             :       INTEGER, DIMENSION(2, 3), INTENT(IN)               :: bounds
     919             : 
     920             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'flipbf'
     921             : 
     922             :       INTEGER :: handle, i, lb1, lb1_glbl, lb1_new, lb2, lb2_glbl, lb2_new, lb3, lb3_glbl, &
     923             :          lb3_new, ub1, ub1_glbl, ub1_new, ub2, ub2_glbl, ub2_new, ub3, ub3_glbl, ub3_new
     924         690 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: indz
     925             :       INTEGER, DIMENSION(2, 3)                           :: bndsl, bndsl_new
     926             : 
     927         690 :       CALL timeset(routineN, handle)
     928             : 
     929        1380 :       lb1 = LBOUND(cr3d_in, 1); ub1 = UBOUND(cr3d_in, 1)
     930        1380 :       lb2 = LBOUND(cr3d_in, 2); ub2 = UBOUND(cr3d_in, 2)
     931        1380 :       lb3 = LBOUND(cr3d_in, 3); ub3 = UBOUND(cr3d_in, 3)
     932             : 
     933         690 :       lb1_glbl = bounds(1, 1); ub1_glbl = bounds(2, 1)
     934         690 :       lb2_glbl = bounds(1, 2); ub2_glbl = bounds(2, 2)
     935         690 :       lb3_glbl = bounds(1, 3); ub3_glbl = bounds(2, 3)
     936             : 
     937        4830 :       bndsl = RESHAPE((/lb1, ub1, lb2, ub2, lb3, ub3/), (/2, 3/))
     938         690 :       bndsl_new = flipbf_bounds_local(bndsl, bounds)
     939             : 
     940         690 :       lb1_new = bndsl_new(1, 1); ub1_new = bndsl_new(2, 1)
     941         690 :       lb2_new = bndsl_new(1, 2); ub2_new = bndsl_new(2, 2)
     942         690 :       lb3_new = bndsl_new(1, 3); ub3_new = bndsl_new(2, 3)
     943             : 
     944        3450 :       ALLOCATE (cr3d_out(lb1_new:ub1_new, lb2_new:ub2_new, lb3_new:ub3_new))
     945   151310772 :       cr3d_out = 0.0_dp
     946             : 
     947             : ! set the data at the missing grid points (in a periodic grid) equal to the data at
     948             : ! the last existing grid points
     949        2070 :       ALLOCATE (indz(ub3_new - lb3_new + 1))
     950       64784 :       indz(:) = (/(i, i=2*(ub3_glbl + 1) - lb3_new, 2*(ub3_glbl + 1) - ub3_new, -1)/)
     951         690 :       IF (lb3_new .EQ. ub3_glbl + 1) indz(1) = indz(2)
     952   151310772 :       cr3d_out(lb1_new:ub1_new, lb2_new:ub2_new, lb3_new:ub3_new) = cr3d_in(:, :, indz)
     953             : 
     954         690 :       CALL timestop(handle)
     955             : 
     956        1380 :    END SUBROUTINE flipbf
     957             : 
     958             : ! **************************************************************************************************
     959             : !> \brief   rotates a 3d (real dp) array by 180 degrees (the way needed to expand data
     960             : !>          as explained in the description of the afore-defined subroutines)
     961             : !> \param cr3d_in input array
     962             : !> \param cr3d_out output array
     963             : !> \param bounds global lower and upper bounds
     964             : !> \par History
     965             : !>       07.2014 created [Hossein Bani-Hashemian]
     966             : !> \author Mohammad Hossein Bani-Hashemian
     967             : ! **************************************************************************************************
     968         582 :    SUBROUTINE rot180(cr3d_in, cr3d_out, bounds)
     969             : 
     970             :       REAL(dp), ALLOCATABLE, DIMENSION(:, :, :), &
     971             :          INTENT(IN)                                      :: cr3d_in
     972             :       REAL(dp), ALLOCATABLE, DIMENSION(:, :, :), &
     973             :          INTENT(INOUT)                                   :: cr3d_out
     974             :       INTEGER, DIMENSION(2, 3), INTENT(IN)               :: bounds
     975             : 
     976             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'rot180'
     977             : 
     978             :       INTEGER :: handle, i, lb1, lb1_glbl, lb1_new, lb2, lb2_glbl, lb2_new, lb3, lb3_glbl, &
     979             :          lb3_new, ub1, ub1_glbl, ub1_new, ub2, ub2_glbl, ub2_new, ub3, ub3_glbl, ub3_new
     980         582 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: indx, indy
     981             :       INTEGER, DIMENSION(2, 3)                           :: bndsl, bndsl_new
     982             : 
     983         582 :       CALL timeset(routineN, handle)
     984             : 
     985        1164 :       lb1 = LBOUND(cr3d_in, 1); ub1 = UBOUND(cr3d_in, 1)
     986        1164 :       lb2 = LBOUND(cr3d_in, 2); ub2 = UBOUND(cr3d_in, 2)
     987        1164 :       lb3 = LBOUND(cr3d_in, 3); ub3 = UBOUND(cr3d_in, 3)
     988             : 
     989         582 :       lb1_glbl = bounds(1, 1); ub1_glbl = bounds(2, 1)
     990         582 :       lb2_glbl = bounds(1, 2); ub2_glbl = bounds(2, 2)
     991         582 :       lb3_glbl = bounds(1, 3); ub3_glbl = bounds(2, 3)
     992             : 
     993        4074 :       bndsl = RESHAPE((/lb1, ub1, lb2, ub2, lb3, ub3/), (/2, 3/))
     994         582 :       bndsl_new = rot180_bounds_local(bndsl, bounds)
     995             : 
     996         582 :       lb1_new = bndsl_new(1, 1); ub1_new = bndsl_new(2, 1)
     997         582 :       lb2_new = bndsl_new(1, 2); ub2_new = bndsl_new(2, 2)
     998         582 :       lb3_new = bndsl_new(1, 3); ub3_new = bndsl_new(2, 3)
     999             : 
    1000        2910 :       ALLOCATE (cr3d_out(lb1_new:ub1_new, lb2_new:ub2_new, lb3_new:ub3_new))
    1001    36325643 :       cr3d_out = 0.0_dp
    1002             : 
    1003             : ! set the data at the missing grid points (in a periodic grid) equal to the data at
    1004             : ! the last existing grid points
    1005        2910 :       ALLOCATE (indx(ub1_new - lb1_new + 1), indy(ub2_new - lb2_new + 1))
    1006       32262 :       indx(:) = (/(i, i=2*(ub1_glbl + 1) - lb1_new, 2*(ub1_glbl + 1) - ub1_new, -1)/)
    1007       56864 :       indy(:) = (/(i, i=2*(ub2_glbl + 1) - lb2_new, 2*(ub2_glbl + 1) - ub2_new, -1)/)
    1008         582 :       IF (lb1_new .EQ. ub1_glbl + 1) indx(1) = indx(2)
    1009         582 :       IF (lb2_new .EQ. ub2_glbl + 1) indy(1) = indy(2)
    1010    36325643 :       cr3d_out(lb1_new:ub1_new, lb2_new:ub2_new, lb3_new:ub3_new) = cr3d_in(indx, indy, :)
    1011             : 
    1012         582 :       CALL timestop(handle)
    1013             : 
    1014        1164 :    END SUBROUTINE rot180
    1015             : 
    1016             : ! **************************************************************************************************
    1017             : !> \brief   calculates the global and local bounds of the expanded data
    1018             : !> \param pw_grid original plane-wave grid
    1019             : !> \param neumann_directions directions in which dct should be performed
    1020             : !> \param srcs_expand list of the source processes (pw_expand)
    1021             : !> \param flipg_stat flipping status for the received data chunks (pw_expand)
    1022             : !> \param bounds_shftd bounds of the original grid shifted to have g0 in the middle of the cell
    1023             : !> \param bounds_local_shftd local bounds of the original grid after shifting
    1024             : !> \param recv_msgs_bnds bounds of the messages to be received (pw_expand)
    1025             : !> \param bounds_new new global lower and upper bounds
    1026             : !> \param bounds_local_new new local lower and upper bounds
    1027             : !> \par History
    1028             : !>       07.2014 created [Hossein Bani-Hashemian]
    1029             : !> \author Mohammad Hossein Bani-Hashemian
    1030             : ! **************************************************************************************************
    1031          44 :    SUBROUTINE expansion_bounds(pw_grid, neumann_directions, srcs_expand, flipg_stat, &
    1032             :                                bounds_shftd, bounds_local_shftd, &
    1033             :                                recv_msgs_bnds, bounds_new, bounds_local_new)
    1034             : 
    1035             :       TYPE(pw_grid_type), INTENT(IN), POINTER            :: pw_grid
    1036             :       INTEGER, INTENT(IN)                                :: neumann_directions
    1037             :       INTEGER, DIMENSION(:), INTENT(IN), POINTER         :: srcs_expand, flipg_stat
    1038             :       INTEGER, DIMENSION(2, 3), INTENT(OUT)              :: bounds_shftd, bounds_local_shftd
    1039             :       INTEGER, DIMENSION(:, :, :), INTENT(INOUT), &
    1040             :          POINTER                                         :: recv_msgs_bnds
    1041             :       INTEGER, DIMENSION(2, 3), INTENT(OUT)              :: bounds_new, bounds_local_new
    1042             : 
    1043             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'expansion_bounds'
    1044             : 
    1045             :       INTEGER                                            :: group_size, handle, i, lb1_new, lb2_new, &
    1046             :                                                             lb3_new, loc, maxn_sendrecv, rs_mpo, &
    1047             :                                                             ub1_new, ub2_new, ub3_new
    1048          44 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: src_hist
    1049          44 :       INTEGER, ALLOCATABLE, DIMENSION(:, :, :)           :: bounds_local_all, bounds_local_new_all, &
    1050          44 :                                                             pcs_bnds
    1051             :       INTEGER, DIMENSION(2, 3)                           :: bounds, bounds_local
    1052             :       INTEGER, DIMENSION(3)                              :: npts_new, shf_yesno, shift
    1053             :       TYPE(mp_comm_type)                                 :: rs_group
    1054             : 
    1055          44 :       CALL timeset(routineN, handle)
    1056             : 
    1057          44 :       rs_group = pw_grid%para%group
    1058          44 :       rs_mpo = pw_grid%para%group%mepos
    1059          44 :       group_size = pw_grid%para%group%num_pe
    1060         440 :       bounds = pw_grid%bounds
    1061         440 :       bounds_local = pw_grid%bounds_local
    1062             : 
    1063          44 :       SELECT CASE (neumann_directions)
    1064             :       CASE (neumannXYZ)
    1065          44 :          maxn_sendrecv = 4
    1066          44 :          shf_yesno = (/1, 1, 1/)
    1067             :       CASE (neumannXY)
    1068           0 :          maxn_sendrecv = 4
    1069           0 :          shf_yesno = (/1, 1, 0/)
    1070             :       CASE (neumannXZ)
    1071           0 :          maxn_sendrecv = 2
    1072           0 :          shf_yesno = (/1, 0, 1/)
    1073             :       CASE (neumannYZ)
    1074           4 :          maxn_sendrecv = 2
    1075           4 :          shf_yesno = (/0, 1, 1/)
    1076             :       CASE (neumannX)
    1077           4 :          maxn_sendrecv = 2
    1078           4 :          shf_yesno = (/1, 0, 0/)
    1079             :       CASE (neumannY)
    1080           0 :          maxn_sendrecv = 2
    1081           0 :          shf_yesno = (/0, 1, 0/)
    1082             :       CASE (neumannZ)
    1083           4 :          maxn_sendrecv = 1
    1084           4 :          shf_yesno = (/0, 0, 1/)
    1085             :       CASE DEFAULT
    1086           0 :          CPABORT("Unknown neumann direction")
    1087          44 :          shf_yesno = (/0, 0, 0/)
    1088             :       END SELECT
    1089             : 
    1090          88 :       ALLOCATE (pcs_bnds(2, 3, maxn_sendrecv))
    1091          88 :       ALLOCATE (src_hist(maxn_sendrecv))
    1092             : 
    1093             :       ! Note that this is not easily FFT-able ... needed anyway, so link in FFTW.
    1094         176 :       npts_new = 2*pw_grid%npts
    1095         176 :       shift = -npts_new/2
    1096         176 :       shift = shift - bounds(1, :)
    1097         132 :       bounds_shftd(:, 1) = bounds(:, 1) + shf_yesno(1)*shift(1)
    1098         132 :       bounds_shftd(:, 2) = bounds(:, 2) + shf_yesno(2)*shift(2)
    1099         132 :       bounds_shftd(:, 3) = bounds(:, 3) + shf_yesno(3)*shift(3)
    1100         132 :       bounds_local_shftd(:, 1) = bounds_local(:, 1) + shf_yesno(1)*shift(1)
    1101         132 :       bounds_local_shftd(:, 2) = bounds_local(:, 2) + shf_yesno(2)*shift(2)
    1102         132 :       bounds_local_shftd(:, 3) = bounds_local(:, 3) + shf_yesno(3)*shift(3)
    1103             : 
    1104             : ! let all the nodes know about each others local shifted bounds
    1105         132 :       ALLOCATE (bounds_local_all(2, 3, group_size))
    1106          44 :       CALL rs_group%allgather(bounds_local_shftd, bounds_local_all)
    1107             : 
    1108         192 :       src_hist = -1 ! keeps the history of sources
    1109             : 
    1110         192 :       DO i = 1, maxn_sendrecv
    1111             : ! no need to receive from myself
    1112         148 :          IF (srcs_expand(i) .EQ. rs_mpo) THEN
    1113          80 :             recv_msgs_bnds(1, 1, i) = bounds_local_shftd(1, 1)
    1114          80 :             recv_msgs_bnds(2, 1, i) = bounds_local_shftd(2, 1)
    1115          80 :             recv_msgs_bnds(1, 2, i) = bounds_local_shftd(1, 2)
    1116          80 :             recv_msgs_bnds(2, 2, i) = bounds_local_shftd(2, 2)
    1117          80 :             recv_msgs_bnds(1, 3, i) = bounds_local_shftd(1, 3)
    1118          80 :             recv_msgs_bnds(2, 3, i) = bounds_local_shftd(2, 3)
    1119             : ! if I have already received data from the source, just use the one from the last time
    1120         268 :          ELSE IF (ANY(src_hist .EQ. srcs_expand(i))) THEN
    1121         192 :             loc = MINLOC(ABS(src_hist - srcs_expand(i)), 1)
    1122          32 :             recv_msgs_bnds(1, 1, i) = bounds_local_all(1, 1, srcs_expand(loc) + 1)
    1123          32 :             recv_msgs_bnds(2, 1, i) = bounds_local_all(2, 1, srcs_expand(loc) + 1)
    1124          32 :             recv_msgs_bnds(1, 2, i) = bounds_local_all(1, 2, srcs_expand(loc) + 1)
    1125          32 :             recv_msgs_bnds(2, 2, i) = bounds_local_all(2, 2, srcs_expand(loc) + 1)
    1126          32 :             recv_msgs_bnds(1, 3, i) = bounds_local_all(1, 3, srcs_expand(loc) + 1)
    1127          32 :             recv_msgs_bnds(2, 3, i) = bounds_local_all(2, 3, srcs_expand(loc) + 1)
    1128             :          ELSE
    1129          36 :             recv_msgs_bnds(1, 1, i) = bounds_local_all(1, 1, srcs_expand(i) + 1)
    1130          36 :             recv_msgs_bnds(2, 1, i) = bounds_local_all(2, 1, srcs_expand(i) + 1)
    1131          36 :             recv_msgs_bnds(1, 2, i) = bounds_local_all(1, 2, srcs_expand(i) + 1)
    1132          36 :             recv_msgs_bnds(2, 2, i) = bounds_local_all(2, 2, srcs_expand(i) + 1)
    1133          36 :             recv_msgs_bnds(1, 3, i) = bounds_local_all(1, 3, srcs_expand(i) + 1)
    1134          36 :             recv_msgs_bnds(2, 3, i) = bounds_local_all(2, 3, srcs_expand(i) + 1)
    1135             :          END IF
    1136         192 :          src_hist(i) = srcs_expand(i)
    1137             :       END DO
    1138             : 
    1139             : ! flip the received data based on the flipping status
    1140         192 :       DO i = 1, maxn_sendrecv
    1141          44 :          SELECT CASE (flipg_stat(i))
    1142             :          CASE (NOT_FLIPPED)
    1143         440 :             pcs_bnds(:, :, i) = recv_msgs_bnds(:, :, i)
    1144             :          CASE (UD_FLIPPED)
    1145          36 :             pcs_bnds(:, :, i) = flipud_bounds_local(recv_msgs_bnds(:, :, i), bounds_shftd)
    1146             :          CASE (LR_FLIPPED)
    1147          36 :             pcs_bnds(:, :, i) = fliplr_bounds_local(recv_msgs_bnds(:, :, i), bounds_shftd)
    1148             :          CASE (BF_FLIPPED)
    1149           0 :             pcs_bnds(:, :, i) = flipbf_bounds_local(recv_msgs_bnds(:, :, i), bounds_shftd)
    1150             :          CASE (ROTATED)
    1151         148 :             pcs_bnds(:, :, i) = rot180_bounds_local(recv_msgs_bnds(:, :, i), bounds_shftd)
    1152             :          END SELECT
    1153             :       END DO
    1154             : 
    1155         384 :       lb1_new = MINVAL(pcs_bnds(1, 1, :)); ub1_new = MAXVAL(pcs_bnds(2, 1, :))
    1156         384 :       lb2_new = MINVAL(pcs_bnds(1, 2, :)); ub2_new = MAXVAL(pcs_bnds(2, 2, :))
    1157         384 :       lb3_new = MINVAL(pcs_bnds(1, 3, :)); ub3_new = MAXVAL(pcs_bnds(2, 3, :))
    1158             : 
    1159             : ! calculate the new local and global bounds
    1160         192 :       bounds_local_new(1, 1) = MINVAL(pcs_bnds(1, 1, :))
    1161         192 :       bounds_local_new(2, 1) = MAXVAL(pcs_bnds(2, 1, :))
    1162         192 :       bounds_local_new(1, 2) = MINVAL(pcs_bnds(1, 2, :))
    1163         192 :       bounds_local_new(2, 2) = MAXVAL(pcs_bnds(2, 2, :))
    1164         192 :       bounds_local_new(1, 3) = MINVAL(pcs_bnds(1, 3, :))
    1165             :       SELECT CASE (neumann_directions)
    1166             :       CASE (neumannXYZ, neumannXZ, neumannYZ, neumannZ)
    1167         180 :          bounds_local_new(2, 3) = 2*(MAXVAL(pcs_bnds(2, 3, :)) + 1) - bounds_local_new(1, 3) - 1
    1168             :       CASE (neumannXY, neumannX, neumannY)
    1169          56 :          bounds_local_new(2, 3) = MAXVAL(pcs_bnds(2, 3, :))
    1170             :       END SELECT
    1171             : 
    1172          88 :       ALLOCATE (bounds_local_new_all(2, 3, group_size))
    1173          44 :       CALL rs_group%allgather(bounds_local_new, bounds_local_new_all)
    1174         132 :       bounds_new(1, 1) = MINVAL(bounds_local_new_all(1, 1, :))
    1175         132 :       bounds_new(2, 1) = MAXVAL(bounds_local_new_all(2, 1, :))
    1176         132 :       bounds_new(1, 2) = MINVAL(bounds_local_new_all(1, 2, :))
    1177         132 :       bounds_new(2, 2) = MAXVAL(bounds_local_new_all(2, 2, :))
    1178         132 :       bounds_new(1, 3) = MINVAL(bounds_local_new_all(1, 3, :))
    1179         132 :       bounds_new(2, 3) = MAXVAL(bounds_local_new_all(2, 3, :))
    1180             : 
    1181          44 :       DEALLOCATE (bounds_local_all, bounds_local_new_all)
    1182             : 
    1183          44 :       CALL timestop(handle)
    1184             : 
    1185          88 :    END SUBROUTINE expansion_bounds
    1186             : 
    1187             : ! **************************************************************************************************
    1188             : !> \brief   precalculates the local bounds of a 3d array after applying flipud
    1189             : !> \param bndsl_in current local lower and upper bounds
    1190             : !> \param bounds global lower and upper bounds
    1191             : !> \return new local lower and upper bounds
    1192             : !> \par History
    1193             : !>       07.2014 created [Hossein Bani-Hashemian]
    1194             : !> \author Mohammad Hossein Bani-Hashemian
    1195             : ! **************************************************************************************************
    1196         650 :    PURE FUNCTION flipud_bounds_local(bndsl_in, bounds) RESULT(bndsl_out)
    1197             : 
    1198             :       INTEGER, DIMENSION(2, 3), INTENT(IN)               :: bndsl_in, bounds
    1199             :       INTEGER, DIMENSION(2, 3)                           :: bndsl_out
    1200             : 
    1201         650 :       bndsl_out(1, 1) = 2*(bounds(2, 1) + 1) - bndsl_in(2, 1)
    1202         650 :       bndsl_out(2, 1) = 2*(bounds(2, 1) + 1) - bndsl_in(1, 1)
    1203         650 :       IF (bndsl_out(1, 1) .EQ. bounds(2, 1) + 2) bndsl_out(1, 1) = bndsl_out(1, 1) - 1
    1204         650 :       IF (bndsl_out(2, 1) .EQ. 2*(bounds(2, 1) + 1) - bounds(1, 1)) bndsl_out(2, 1) = bndsl_out(2, 1) - 1
    1205             : 
    1206         650 :       bndsl_out(1, 2) = bndsl_in(1, 2)
    1207         650 :       bndsl_out(2, 2) = bndsl_in(2, 2)
    1208             : 
    1209         650 :       bndsl_out(1, 3) = bndsl_in(1, 3)
    1210         650 :       bndsl_out(2, 3) = bndsl_in(2, 3)
    1211             : 
    1212         650 :    END FUNCTION flipud_bounds_local
    1213             : 
    1214             : ! **************************************************************************************************
    1215             : !> \brief   precalculates the local bounds of a 3d array after applying fliplr
    1216             : !> \param bndsl_in current local lower and upper bounds
    1217             : !> \param bounds global lower and upper bounds
    1218             : !> \return new local lower and upper bounds
    1219             : !> \par History
    1220             : !>       07.2014 created [Hossein Bani-Hashemian]
    1221             : !> \author Mohammad Hossein Bani-Hashemian
    1222             : ! **************************************************************************************************
    1223         662 :    PURE FUNCTION fliplr_bounds_local(bndsl_in, bounds) RESULT(bndsl_out)
    1224             : 
    1225             :       INTEGER, DIMENSION(2, 3), INTENT(IN)               :: bndsl_in, bounds
    1226             :       INTEGER, DIMENSION(2, 3)                           :: bndsl_out
    1227             : 
    1228         662 :       bndsl_out(1, 1) = bndsl_in(1, 1)
    1229         662 :       bndsl_out(2, 1) = bndsl_in(2, 1)
    1230             : 
    1231         662 :       bndsl_out(1, 2) = 2*(bounds(2, 2) + 1) - bndsl_in(2, 2)
    1232         662 :       bndsl_out(2, 2) = 2*(bounds(2, 2) + 1) - bndsl_in(1, 2)
    1233         662 :       IF (bndsl_out(1, 2) .EQ. bounds(2, 2) + 2) bndsl_out(1, 2) = bndsl_out(1, 2) - 1
    1234         662 :       IF (bndsl_out(2, 2) .EQ. 2*(bounds(2, 2) + 1) - bounds(1, 2)) bndsl_out(2, 2) = bndsl_out(2, 2) - 1
    1235             : 
    1236         662 :       bndsl_out(1, 3) = bndsl_in(1, 3)
    1237         662 :       bndsl_out(2, 3) = bndsl_in(2, 3)
    1238             : 
    1239         662 :    END FUNCTION fliplr_bounds_local
    1240             : 
    1241             : ! **************************************************************************************************
    1242             : !> \brief   precalculates the local bounds of a 3d array after applying flipbf
    1243             : !> \param bndsl_in current local lower and upper bounds
    1244             : !> \param bounds global lower and upper bounds
    1245             : !> \return new local lower and upper bounds
    1246             : !> \par History
    1247             : !>       07.2014 created [Hossein Bani-Hashemian]
    1248             : !> \author Mohammad Hossein Bani-Hashemian
    1249             : ! **************************************************************************************************
    1250         690 :    PURE FUNCTION flipbf_bounds_local(bndsl_in, bounds) RESULT(bndsl_out)
    1251             : 
    1252             :       INTEGER, DIMENSION(2, 3), INTENT(IN)               :: bndsl_in, bounds
    1253             :       INTEGER, DIMENSION(2, 3)                           :: bndsl_out
    1254             : 
    1255         690 :       bndsl_out(1, 1) = bndsl_in(1, 1)
    1256         690 :       bndsl_out(2, 1) = bndsl_in(2, 1)
    1257             : 
    1258         690 :       bndsl_out(1, 2) = bndsl_in(1, 2)
    1259         690 :       bndsl_out(2, 2) = bndsl_in(2, 2)
    1260             : 
    1261         690 :       bndsl_out(1, 3) = 2*(bounds(2, 3) + 1) - bndsl_in(2, 3)
    1262         690 :       bndsl_out(2, 3) = 2*(bounds(2, 3) + 1) - bndsl_in(1, 3)
    1263         690 :       IF (bndsl_out(1, 3) .EQ. bounds(2, 3) + 2) bndsl_out(1, 3) = bndsl_out(1, 3) - 1
    1264         690 :       IF (bndsl_out(2, 3) .EQ. 2*(bounds(2, 3) + 1) - bounds(1, 3)) bndsl_out(2, 3) = bndsl_out(2, 3) - 1
    1265             : 
    1266         690 :    END FUNCTION flipbf_bounds_local
    1267             : 
    1268             : ! **************************************************************************************************
    1269             : !> \brief   precalculates the local bounds of a 3d array after applying rot180
    1270             : !> \param bndsl_in current local lower and upper bounds
    1271             : !> \param bounds global lower and upper bounds
    1272             : !> \return new local lower and upper bounds
    1273             : !> \par History
    1274             : !>       07.2014 created [Hossein Bani-Hashemian]
    1275             : !> \author Mohammad Hossein Bani-Hashemian
    1276             : ! **************************************************************************************************
    1277         614 :    PURE FUNCTION rot180_bounds_local(bndsl_in, bounds) RESULT(bndsl_out)
    1278             : 
    1279             :       INTEGER, DIMENSION(2, 3), INTENT(IN)               :: bndsl_in, bounds
    1280             :       INTEGER, DIMENSION(2, 3)                           :: bndsl_out
    1281             : 
    1282         614 :       bndsl_out(1, 1) = 2*(bounds(2, 1) + 1) - bndsl_in(2, 1)
    1283         614 :       bndsl_out(2, 1) = 2*(bounds(2, 1) + 1) - bndsl_in(1, 1)
    1284         614 :       IF (bndsl_out(1, 1) .EQ. bounds(2, 1) + 2) bndsl_out(1, 1) = bndsl_out(1, 1) - 1
    1285         614 :       IF (bndsl_out(2, 1) .EQ. 2*(bounds(2, 1) + 1) - bounds(1, 1)) bndsl_out(2, 1) = bndsl_out(2, 1) - 1
    1286             : 
    1287         614 :       bndsl_out(1, 2) = 2*(bounds(2, 2) + 1) - bndsl_in(2, 2)
    1288         614 :       bndsl_out(2, 2) = 2*(bounds(2, 2) + 1) - bndsl_in(1, 2)
    1289         614 :       IF (bndsl_out(1, 2) .EQ. bounds(2, 2) + 2) bndsl_out(1, 2) = bndsl_out(1, 2) - 1
    1290         614 :       IF (bndsl_out(2, 2) .EQ. 2*(bounds(2, 2) + 1) - bounds(1, 2)) bndsl_out(2, 2) = bndsl_out(2, 2) - 1
    1291             : 
    1292         614 :       bndsl_out(1, 3) = bndsl_in(1, 3)
    1293         614 :       bndsl_out(2, 3) = bndsl_in(2, 3)
    1294             : 
    1295         614 :    END FUNCTION rot180_bounds_local
    1296             : 
    1297           0 : END MODULE dct
    1298             : 

Generated by: LCOV version 1.15