LCOV - code coverage report
Current view: top level - src/pw - dct.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:42dac4a) Lines: 94.9 % 532 505
Test Date: 2025-07-25 12:55:17 Functions: 83.3 % 18 15

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief the type I Discrete Cosine Transform (DCT-I)
      10              : !> \par History
      11              : !>       07.2014 created [Hossein Bani-Hashemian]
      12              : !>       11.2015 dealt with periodic grids [Hossein Bani-Hashemian]
      13              : !>       03.2016 dct in one or two directions [Hossein Bani-Hashemian]
      14              : !> \author Mohammad Hossein Bani-Hashemian
      15              : ! **************************************************************************************************
      16              : MODULE dct
      17              : 
      18              :    USE kinds,                           ONLY: dp
      19              :    USE message_passing,                 ONLY: mp_comm_type,&
      20              :                                               mp_request_null,&
      21              :                                               mp_request_type,&
      22              :                                               mp_waitall
      23              :    USE pw_grid_types,                   ONLY: pw_grid_type
      24              :    USE pw_grids,                        ONLY: pw_grid_create
      25              :    USE pw_types,                        ONLY: pw_r3d_rs_type
      26              : #include "../base/base_uses.f90"
      27              : 
      28              :    IMPLICIT NONE
      29              :    PRIVATE
      30              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'dct'
      31              : 
      32              :    TYPE :: dct_type
      33              :       INTEGER, DIMENSION(:), POINTER     :: dests_expand => NULL()
      34              :       INTEGER, DIMENSION(:), POINTER     :: srcs_expand => NULL()
      35              :       INTEGER, DIMENSION(:), POINTER     :: flipg_stat => NULL()
      36              :       INTEGER, DIMENSION(:), POINTER     :: dests_shrink => NULL()
      37              :       INTEGER                            :: srcs_shrink = 0
      38              :       INTEGER, DIMENSION(:, :, :), POINTER :: recv_msgs_bnds => NULL()
      39              :       INTEGER, DIMENSION(2, 3)            :: dct_bounds = 0
      40              :       INTEGER, DIMENSION(2, 3)            :: dct_bounds_local = 0
      41              :       INTEGER, DIMENSION(2, 3)            :: bounds_shftd = 0
      42              :       INTEGER, DIMENSION(2, 3)            :: bounds_local_shftd = 0
      43              :    END TYPE dct_type
      44              : 
      45              :    TYPE dct_msg_type
      46              :       PRIVATE
      47              :       REAL(dp), DIMENSION(:, :, :), ALLOCATABLE :: msg
      48              :    END TYPE dct_msg_type
      49              : 
      50              :    PUBLIC dct_type, &
      51              :       dct_type_init, &
      52              :       dct_type_release, &
      53              :       setup_dct_pw_grids, &
      54              :       pw_shrink, &
      55              :       pw_expand
      56              : 
      57              :    INTEGER, PARAMETER, PRIVATE    :: NOT_FLIPPED = 0, &
      58              :                                      UD_FLIPPED = 1, &
      59              :                                      LR_FLIPPED = 2, &
      60              :                                      BF_FLIPPED = 3, &
      61              :                                      ROTATED = 4
      62              : 
      63              :    INTEGER, PARAMETER, PUBLIC     :: neumannXYZ = 111, &
      64              :                                      neumannXY = 110, &
      65              :                                      neumannXZ = 101, &
      66              :                                      neumannYZ = 011, &
      67              :                                      neumannX = 100, &
      68              :                                      neumannY = 010, &
      69              :                                      neumannZ = 001
      70              : 
      71              : CONTAINS
      72              : 
      73              : ! **************************************************************************************************
      74              : !> \brief  Initializes a dct_type
      75              : !> \param pw_grid the original plane wave grid
      76              : !> \param neumann_directions directions in which dct should be performed
      77              : !> \param dct_env dct_type to be initialized
      78              : !> \par History
      79              : !>       08.2014 created [Hossein Bani-Hashemian]
      80              : !> \author Mohammad Hossein Bani-Hashemian
      81              : ! **************************************************************************************************
      82           22 :    SUBROUTINE dct_type_init(pw_grid, neumann_directions, dct_env)
      83              : 
      84              :       TYPE(pw_grid_type), INTENT(IN), POINTER            :: pw_grid
      85              :       INTEGER, INTENT(IN)                                :: neumann_directions
      86              :       TYPE(dct_type), INTENT(INOUT)                      :: dct_env
      87              : 
      88              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'dct_type_init'
      89              : 
      90              :       INTEGER                                            :: handle, maxn_sendrecv
      91              : 
      92           22 :       CALL timeset(routineN, handle)
      93              : 
      94           22 :       SELECT CASE (neumann_directions)
      95              :       CASE (neumannXYZ, neumannXY)
      96            4 :          maxn_sendrecv = 4
      97              :       CASE (neumannX, neumannY, neumannXZ, neumannYZ)
      98            4 :          maxn_sendrecv = 2
      99              :       CASE (neumannZ)
     100            2 :          maxn_sendrecv = 1
     101              :       CASE DEFAULT
     102           22 :          CPABORT("Invalid combination of Neumann and periodic conditions.")
     103              :       END SELECT
     104              : 
     105           44 :       ALLOCATE (dct_env%flipg_stat(maxn_sendrecv))
     106          110 :       ALLOCATE (dct_env%dests_expand(maxn_sendrecv), dct_env%srcs_expand(maxn_sendrecv))
     107           66 :       ALLOCATE (dct_env%dests_shrink(maxn_sendrecv))
     108           44 :       ALLOCATE (dct_env%recv_msgs_bnds(2, 3, maxn_sendrecv))
     109              : 
     110              :       CALL set_dests_srcs_pid(pw_grid, neumann_directions, &
     111              :                               dct_env%dests_expand, dct_env%srcs_expand, dct_env%flipg_stat, &
     112           22 :                               dct_env%dests_shrink, dct_env%srcs_shrink)
     113              :       CALL expansion_bounds(pw_grid, neumann_directions, &
     114              :                             dct_env%srcs_expand, dct_env%flipg_stat, &
     115              :                             dct_env%bounds_shftd, dct_env%bounds_local_shftd, &
     116              :                             dct_env%recv_msgs_bnds, dct_env%dct_bounds, &
     117           22 :                             dct_env%dct_bounds_local)
     118              : 
     119           22 :       CALL timestop(handle)
     120              : 
     121           22 :    END SUBROUTINE dct_type_init
     122              : 
     123              : ! **************************************************************************************************
     124              : !> \brief  Releases a dct_type
     125              : !> \param dct_env dct_type to be released
     126              : !> \par History
     127              : !>       03.2016 created [Hossein Bani-Hashemian]
     128              : !> \author Mohammad Hossein Bani-Hashemian
     129              : ! **************************************************************************************************
     130           22 :    SUBROUTINE dct_type_release(dct_env)
     131              : 
     132              :       TYPE(dct_type), INTENT(INOUT)                      :: dct_env
     133              : 
     134              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'dct_type_release'
     135              : 
     136              :       INTEGER                                            :: handle
     137              : 
     138           22 :       CALL timeset(routineN, handle)
     139              : 
     140           22 :       IF (ASSOCIATED(dct_env%dests_shrink)) DEALLOCATE (dct_env%dests_shrink)
     141           22 :       IF (ASSOCIATED(dct_env%dests_expand)) DEALLOCATE (dct_env%dests_expand)
     142           22 :       IF (ASSOCIATED(dct_env%srcs_expand)) DEALLOCATE (dct_env%srcs_expand)
     143           22 :       IF (ASSOCIATED(dct_env%flipg_stat)) DEALLOCATE (dct_env%flipg_stat)
     144           22 :       IF (ASSOCIATED(dct_env%recv_msgs_bnds)) DEALLOCATE (dct_env%recv_msgs_bnds)
     145              : 
     146           22 :       CALL timestop(handle)
     147              : 
     148           22 :    END SUBROUTINE dct_type_release
     149              : 
     150              : ! **************************************************************************************************
     151              : !> \brief   sets up an extended pw_grid for Discrete Cosine Transform (DCT)
     152              : !>          calculations
     153              : !> \param pw_grid the original plane wave grid
     154              : !> \param cell_hmat cell hmat
     155              : !> \param neumann_directions directions in which dct should be performed
     156              : !> \param dct_pw_grid DCT plane-wave grid
     157              : !> \par History
     158              : !>       07.2014 created [Hossein Bani-Hashemian]
     159              : !> \author Mohammad Hossein Bani-Hashemian
     160              : ! **************************************************************************************************
     161           22 :    SUBROUTINE setup_dct_pw_grids(pw_grid, cell_hmat, neumann_directions, dct_pw_grid)
     162              : 
     163              :       TYPE(pw_grid_type), INTENT(IN), POINTER            :: pw_grid
     164              :       REAL(dp), DIMENSION(3, 3), INTENT(IN)              :: cell_hmat
     165              :       INTEGER, INTENT(IN)                                :: neumann_directions
     166              :       TYPE(pw_grid_type), INTENT(INOUT), POINTER         :: dct_pw_grid
     167              : 
     168              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'setup_dct_pw_grids'
     169              : 
     170              :       INTEGER                                            :: blocked, handle, maxn_sendrecv, &
     171              :                                                             srcs_shrink
     172              :       INTEGER, DIMENSION(2, 3)                           :: bounds_local_new, bounds_local_shftd, &
     173              :                                                             bounds_new, bounds_shftd
     174              :       INTEGER, DIMENSION(:), POINTER                     :: dests_expand, dests_shrink, flipg_stat, &
     175              :                                                             srcs_expand
     176              :       INTEGER, DIMENSION(:, :, :), POINTER               :: recv_msgs_bnds
     177              :       REAL(KIND=dp), DIMENSION(3)                        :: scfac
     178              :       REAL(KIND=dp), DIMENSION(3, 3)                     :: hmat2
     179              : 
     180           22 :       CALL timeset(routineN, handle)
     181              : 
     182           38 :       SELECT CASE (neumann_directions)
     183              :       CASE (neumannXYZ)
     184           16 :          maxn_sendrecv = 4
     185           16 :          scfac = (/2.0_dp, 2.0_dp, 2.0_dp/)
     186              :       CASE (neumannXY)
     187            0 :          maxn_sendrecv = 4
     188            0 :          scfac = (/2.0_dp, 2.0_dp, 1.0_dp/)
     189              :       CASE (neumannXZ)
     190            0 :          maxn_sendrecv = 2
     191            0 :          scfac = (/2.0_dp, 1.0_dp, 2.0_dp/)
     192              :       CASE (neumannYZ)
     193            2 :          maxn_sendrecv = 2
     194            2 :          scfac = (/1.0_dp, 2.0_dp, 2.0_dp/)
     195              :       CASE (neumannX)
     196            2 :          maxn_sendrecv = 2
     197            2 :          scfac = (/2.0_dp, 1.0_dp, 1.0_dp/)
     198              :       CASE (neumannY)
     199            0 :          maxn_sendrecv = 2
     200            0 :          scfac = (/1.0_dp, 2.0_dp, 1.0_dp/)
     201              :       CASE (neumannZ)
     202            2 :          maxn_sendrecv = 1
     203            2 :          scfac = (/1.0_dp, 1.0_dp, 2.0_dp/)
     204              :       CASE DEFAULT
     205           22 :          CPABORT("Invalid combination of Neumann and periodic conditions.")
     206              :       END SELECT
     207              : 
     208           44 :       ALLOCATE (flipg_stat(maxn_sendrecv))
     209          154 :       ALLOCATE (dests_expand(maxn_sendrecv), srcs_expand(maxn_sendrecv), dests_shrink(maxn_sendrecv))
     210           44 :       ALLOCATE (recv_msgs_bnds(2, 3, maxn_sendrecv))
     211              : 
     212              :       CALL set_dests_srcs_pid(pw_grid, neumann_directions, dests_expand, srcs_expand, flipg_stat, &
     213           22 :                               dests_shrink, srcs_shrink)
     214              :       CALL expansion_bounds(pw_grid, neumann_directions, srcs_expand, flipg_stat, &
     215           22 :                             bounds_shftd, bounds_local_shftd, recv_msgs_bnds, bounds_new, bounds_local_new)
     216              : 
     217           22 :       hmat2 = 0.0_dp
     218           22 :       hmat2(1, 1) = scfac(1)*cell_hmat(1, 1)
     219           22 :       hmat2(2, 2) = scfac(2)*cell_hmat(2, 2)
     220           22 :       hmat2(3, 3) = scfac(3)*cell_hmat(3, 3)
     221              : 
     222              :       ! uses bounds_local_new that is 2*n-2 in size....this is only rarely fft-able by fftsg, and needs fftw3,
     223              :       ! where it might use sizes that are not declared available in fft_get_radix.
     224              : 
     225           22 :       IF (pw_grid%para%blocked) THEN
     226            0 :          blocked = 1
     227           22 :       ELSE IF (pw_grid%para%ray_distribution) THEN
     228           22 :          blocked = 0
     229              :       END IF
     230              : 
     231              :       CALL pw_grid_create(dct_pw_grid, pw_grid%para%group, hmat2, &
     232              :                           bounds=bounds_new, &
     233              :                           rs_dims=pw_grid%para%group%num_pe_cart, &
     234              :                           blocked=blocked, &
     235           22 :                           bounds_local=bounds_local_new)
     236              : 
     237           22 :       DEALLOCATE (flipg_stat, dests_expand, srcs_expand, dests_shrink, recv_msgs_bnds)
     238              : 
     239           22 :       CALL timestop(handle)
     240              : 
     241           44 :    END SUBROUTINE setup_dct_pw_grids
     242              : 
     243              : ! **************************************************************************************************
     244              : !> \brief Finds the process ids for mpi_isend destiations and mpi_irecv sources
     245              : !>   for expanding and shrinking a pw_r3d_rs_type data
     246              : !> \param pw_grid the original plane wave grid
     247              : !> \param neumann_directions directions in which dct should be performed
     248              : !> \param dests_expand list of the destination processes (pw_expand)
     249              : !> \param srcs_expand list of the source processes (pw_expand)
     250              : !> \param flipg_stat flipping status for the received data chunks (pw_expand)
     251              : !> \param dests_shrink list of the destination processes (pw_shrink)
     252              : !> \param srcs_shrink list of the source processes (pw_shrink)
     253              : !> \par History
     254              : !>       07.2014 created [Hossein Bani-Hashemian]
     255              : !> \author Mohammad Hossein Bani-Hashemian
     256              : ! **************************************************************************************************
     257           44 :    SUBROUTINE set_dests_srcs_pid(pw_grid, neumann_directions, dests_expand, srcs_expand, &
     258              :                                  flipg_stat, dests_shrink, srcs_shrink)
     259              : 
     260              :       TYPE(pw_grid_type), INTENT(IN), POINTER            :: pw_grid
     261              :       INTEGER, INTENT(IN)                                :: neumann_directions
     262              :       INTEGER, DIMENSION(:), INTENT(INOUT), POINTER      :: dests_expand, srcs_expand, flipg_stat, &
     263              :                                                             dests_shrink
     264              :       INTEGER, INTENT(OUT)                               :: srcs_shrink
     265              : 
     266              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'set_dests_srcs_pid'
     267              : 
     268              :       INTEGER                                            :: group_size, handle, i, j, k, &
     269              :                                                             maxn_sendrecv, rs_dim1, rs_dim2, &
     270              :                                                             rs_mpo, tmp_size1, tmp_size2
     271           44 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: src_pos1_onesdd, src_pos2_onesdd, &
     272           44 :                                                             tmp1_arr, tmp2_arr
     273           44 :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: dests_shrink_all, src_pos1, src_pos2, &
     274           44 :                                                             srcs_coord, srcs_expand_all
     275              :       INTEGER, DIMENSION(2)                              :: rs_dims, rs_pos
     276              :       TYPE(mp_comm_type)                                 :: rs_group
     277              : 
     278           44 :       CALL timeset(routineN, handle)
     279              : 
     280              : ! example: 3x4 process grid
     281              : ! XYZ or XY
     282              : ! rs_dim1 = 3 -->  src_pos1 = [1  3  -2]
     283              : !                             [2 -3  -1]
     284              : ! rs_dim2 = 4 -->  src_pos2 = [1  3  -4  -2]
     285              : !                             [2  4  -3  -1]
     286              : ! => (1,1) receives from (1,1) ; (1,2) ; (2,1) ; (2,2) | flipping status 0 0 0 0
     287              : !    (2,4) receives from (3,2) ; (3,1) ; (3,2) ; (3,1) | flipping status 2 2 4 4
     288              : ! and so on ...
     289              : ! or equivalently
     290              : ! =>   0   receives from 0 ; 1 ; 4 ; 5 | flipping status 0 0 0 0
     291              : !      7   receives from 9 ; 8 ; 9 ; 8 | flipping status 2 2 4 4
     292              : ! from srcs_coord :
     293              : ! => rs_mpo = 0 -> rs_pos = 0,0 -> srcs_coord = [ 1  1  2  2] -> 0(0) 1(0) 4(0) 5(0)
     294              : !                                               [ 1  2  1  2]
     295              : !    rs_mpo = 7 -> rs_pos = 1,3 -> srcs_coord = [ 3  3 -3 -3] -> 9(2) 8(2) 9(4) 8(4)
     296              : !                                               [-2 -1 -2 -1]
     297              : ! schematically :
     298              : ! ij : coordinates in a 2D process grid (starting from 1 just for demonstration)
     299              : ! () : to be flipped from left to right
     300              : ! [] : to be flipped from up to down
     301              : ! {} : to be rotated 180 degrees
     302              : !    11 | 12 | 13 | 14          11   12  |  13   14  | (14) (13) | (12) (11)
     303              : !    -----------------    ==>   21   22  |  23   24  | (24) (23) | (22) (21)
     304              : !    21 | 22 | 23 | 24         ---------------------------------------------
     305              : !    -----------------          31   32  |  33   34  | (34) (33) | (32) (31)
     306              : !    31 | 32 | 33 | 34         [31] [32] | [33] [34] | {34} {33} | {32} {31}
     307              : !                              ---------------------------------------------
     308              : !                              [21] [22] | [23] [24] | {24} {23} | {22} {21}
     309              : !                              [11] [12] | [13] [14] | {14} {13} | {12} {11}
     310              : ! one(two)-sided :
     311              : ! YZ or Y
     312              : ! rs_dim1 = 3 -->  src_pos1 = [1  2  3]
     313              : ! rs_dim2 = 4 -->  src_pos2 = [1  3  -4  -2]
     314              : !                             [2  4  -3  -1]
     315              : ! XZ or X
     316              : ! rs_dim1 = 3 -->  src_pos1 = [1  3  -2]
     317              : !                             [2 -3  -1]
     318              : ! rs_dim2 = 4 -->  src_pos2 = [1  2   3  4]
     319              : ! Z
     320              : ! rs_dim1 = 3 -->  src_pos1 = [1  2  3]
     321              : ! rs_dim2 = 4 -->  src_pos2 = [1  2   3  4]
     322              : 
     323           44 :       rs_group = pw_grid%para%group
     324           44 :       rs_mpo = pw_grid%para%group%mepos
     325           44 :       group_size = pw_grid%para%group%num_pe
     326          132 :       rs_dims = pw_grid%para%group%num_pe_cart
     327              : 
     328          132 :       rs_pos = pw_grid%para%group%mepos_cart
     329           44 :       rs_dim1 = rs_dims(1); rs_dim2 = rs_dims(2)
     330              : 
     331              : ! prepare srcs_coord
     332           76 :       SELECT CASE (neumann_directions)
     333              :       CASE (neumannXYZ, neumannXY)
     334           32 :          maxn_sendrecv = 4
     335           32 :          ALLOCATE (srcs_coord(2, maxn_sendrecv))
     336              : 
     337           32 :          IF (MOD(rs_dim1, 2) .EQ. 0) THEN
     338              :             tmp_size1 = rs_dim1
     339              :          ELSE
     340            0 :             tmp_size1 = rs_dim1 + 1
     341              :          END IF
     342          160 :          ALLOCATE (tmp1_arr(tmp_size1), src_pos1(2, 0:rs_dim1 - 1))
     343              : 
     344           32 :          IF (MOD(rs_dim1, 2) .EQ. 0) THEN
     345          192 :             tmp1_arr(:) = (/(i, i=1, rs_dim1)/)
     346          416 :             src_pos1(:, :) = RESHAPE((/tmp1_arr, -tmp1_arr(tmp_size1:1:-1)/), (/2, rs_dim1/))
     347              :          ELSE
     348            0 :             tmp1_arr(:) = (/(i, i=1, rs_dim1), -rs_dim1/)
     349            0 :             src_pos1(:, :) = RESHAPE((/tmp1_arr, -tmp1_arr(tmp_size1 - 2:1:-1)/), (/2, rs_dim1/))
     350              :          END IF
     351              : !---
     352           32 :          IF (MOD(rs_dim2, 2) .EQ. 0) THEN
     353              :             tmp_size2 = rs_dim2
     354              :          ELSE
     355           32 :             tmp_size2 = rs_dim2 + 1
     356              :          END IF
     357          160 :          ALLOCATE (tmp2_arr(tmp_size2), src_pos2(2, 0:rs_dim2 - 1))
     358              : 
     359           32 :          IF (MOD(rs_dim2, 2) .EQ. 0) THEN
     360            0 :             tmp2_arr(:) = (/(i, i=1, rs_dim2)/)
     361            0 :             src_pos2(:, :) = RESHAPE((/tmp2_arr, -tmp2_arr(tmp_size2:1:-1)/), (/2, rs_dim2/))
     362              :          ELSE
     363          128 :             tmp2_arr(:) = (/(i, i=1, rs_dim2), -rs_dim2/)
     364          288 :             src_pos2(:, :) = RESHAPE((/tmp2_arr, -tmp2_arr(tmp_size2 - 2:1:-1)/), (/2, rs_dim2/))
     365              :          END IF
     366              : !---
     367           96 :          srcs_coord(:, 1) = (/src_pos1(1, rs_pos(1)), src_pos2(1, rs_pos(2))/)
     368           96 :          srcs_coord(:, 2) = (/src_pos1(1, rs_pos(1)), src_pos2(2, rs_pos(2))/)
     369           96 :          srcs_coord(:, 3) = (/src_pos1(2, rs_pos(1)), src_pos2(1, rs_pos(2))/)
     370           96 :          srcs_coord(:, 4) = (/src_pos1(2, rs_pos(1)), src_pos2(2, rs_pos(2))/)
     371              :       CASE (neumannXZ, neumannX)
     372            4 :          maxn_sendrecv = 2
     373            4 :          ALLOCATE (srcs_coord(2, maxn_sendrecv))
     374              : 
     375            4 :          IF (MOD(rs_dim1, 2) .EQ. 0) THEN
     376              :             tmp_size1 = rs_dim1
     377              :          ELSE
     378            0 :             tmp_size1 = rs_dim1 + 1
     379              :          END IF
     380           20 :          ALLOCATE (tmp1_arr(tmp_size1), src_pos1(2, 0:rs_dim1 - 1))
     381              : 
     382            4 :          IF (MOD(rs_dim1, 2) .EQ. 0) THEN
     383           24 :             tmp1_arr(:) = (/(i, i=1, rs_dim1)/)
     384           52 :             src_pos1(:, :) = RESHAPE((/tmp1_arr, -tmp1_arr(tmp_size1:1:-1)/), (/2, rs_dim1/))
     385              :          ELSE
     386            0 :             tmp1_arr(:) = (/(i, i=1, rs_dim1), -rs_dim1/)
     387            0 :             src_pos1(:, :) = RESHAPE((/tmp1_arr, -tmp1_arr(tmp_size1 - 2:1:-1)/), (/2, rs_dim1/))
     388              :          END IF
     389              : !---
     390           12 :          ALLOCATE (src_pos2_onesdd(0:rs_dim2 - 1))
     391           16 :          src_pos2_onesdd(:) = (/(i, i=1, rs_dim2)/)
     392              : !---
     393           12 :          srcs_coord(:, 1) = (/src_pos1(1, rs_pos(1)), src_pos2_onesdd(rs_pos(2))/)
     394           12 :          srcs_coord(:, 2) = (/src_pos1(2, rs_pos(1)), src_pos2_onesdd(rs_pos(2))/)
     395              :       CASE (neumannYZ, neumannY)
     396            4 :          maxn_sendrecv = 2
     397            4 :          ALLOCATE (srcs_coord(2, maxn_sendrecv))
     398              : 
     399           12 :          ALLOCATE (src_pos1_onesdd(0:rs_dim1 - 1))
     400           24 :          src_pos1_onesdd(:) = (/(i, i=1, rs_dim1)/)
     401              : !---
     402            4 :          IF (MOD(rs_dim2, 2) .EQ. 0) THEN
     403              :             tmp_size2 = rs_dim2
     404              :          ELSE
     405            4 :             tmp_size2 = rs_dim2 + 1
     406              :          END IF
     407           20 :          ALLOCATE (tmp2_arr(tmp_size2), src_pos2(2, 0:rs_dim2 - 1))
     408              : 
     409            4 :          IF (MOD(rs_dim2, 2) .EQ. 0) THEN
     410            0 :             tmp2_arr(:) = (/(i, i=1, rs_dim2)/)
     411            0 :             src_pos2(:, :) = RESHAPE((/tmp2_arr, -tmp2_arr(tmp_size2:1:-1)/), (/2, rs_dim2/))
     412              :          ELSE
     413           16 :             tmp2_arr(:) = (/(i, i=1, rs_dim2), -rs_dim2/)
     414           36 :             src_pos2(:, :) = RESHAPE((/tmp2_arr, -tmp2_arr(tmp_size2 - 2:1:-1)/), (/2, rs_dim2/))
     415              :          END IF
     416              : !---
     417           12 :          srcs_coord(:, 1) = (/src_pos1_onesdd(rs_pos(1)), src_pos2(1, rs_pos(2))/)
     418           12 :          srcs_coord(:, 2) = (/src_pos1_onesdd(rs_pos(1)), src_pos2(2, rs_pos(2))/)
     419              :       CASE (neumannZ)
     420            4 :          maxn_sendrecv = 1
     421            4 :          ALLOCATE (srcs_coord(2, maxn_sendrecv))
     422           12 :          ALLOCATE (src_pos1_onesdd(0:rs_dim1 - 1))
     423           12 :          ALLOCATE (src_pos2_onesdd(0:rs_dim2 - 1))
     424              : 
     425           24 :          src_pos1_onesdd(:) = (/(i, i=1, rs_dim1)/)
     426              : !---
     427           16 :          src_pos2_onesdd(:) = (/(i, i=1, rs_dim2)/)
     428              : !---
     429           56 :          srcs_coord(:, 1) = (/src_pos1_onesdd(rs_pos(1)), src_pos2_onesdd(rs_pos(2))/)
     430              :       END SELECT
     431              : 
     432              : ! default flipping status
     433          192 :       flipg_stat = NOT_FLIPPED
     434              : 
     435          192 :       DO k = 1, maxn_sendrecv
     436              : ! convert srcs_coord to pid
     437          444 :          CALL pw_grid%para%group%rank_cart(ABS(srcs_coord(:, k)) - 1, srcs_expand(k))
     438              : ! find out the flipping status
     439          192 :          IF ((srcs_coord(1, k) .GT. 0) .AND. (srcs_coord(2, k) .GT. 0)) THEN
     440           44 :             flipg_stat(k) = NOT_FLIPPED
     441          104 :          ELSE IF ((srcs_coord(1, k) .LT. 0) .AND. (srcs_coord(2, k) .GT. 0)) THEN
     442           36 :             flipg_stat(k) = UD_FLIPPED
     443           68 :          ELSE IF ((srcs_coord(1, k) .GT. 0) .AND. (srcs_coord(2, k) .LT. 0)) THEN
     444           36 :             flipg_stat(k) = LR_FLIPPED
     445              :          ELSE
     446           32 :             flipg_stat(k) = ROTATED
     447              :          END IF
     448              :       END DO
     449              : 
     450              : ! let all the nodes know about each others srcs_expand list
     451          176 :       ALLOCATE (srcs_expand_all(maxn_sendrecv, group_size))
     452          192 :       CALL rs_group%allgather(srcs_expand, srcs_expand_all)
     453              : ! now scan the srcs_expand_all list and check if I am on the srcs_expand list of the other nodes
     454              : ! if that is the case then I am obliged to send data to those nodes (the nodes are on my dests_expand list)
     455           44 :       k = 1
     456          132 :       DO i = 1, group_size
     457          428 :          DO j = 1, maxn_sendrecv
     458          384 :             IF (srcs_expand_all(j, i) .EQ. rs_mpo) THEN
     459          148 :                dests_expand(k) = i - 1
     460          148 :                k = k + 1
     461              :             END IF
     462              :          END DO
     463              :       END DO
     464              : 
     465              : ! find srcs and dests for the reverse procedure :
     466              : ! initialize dests_shrink and srcs_shrink with invalid process id
     467          192 :       dests_shrink = -1
     468           44 :       srcs_shrink = -1
     469              : ! scan the flipping status of the data that I am supposed to receive
     470              : ! if the flipping status for a process is NOT_FLIPPED that means I will have to resend
     471              : ! data to that process in the reverse procedure (the process is on my dests_shrink list)
     472          192 :       DO i = 1, maxn_sendrecv
     473          192 :          IF (flipg_stat(i) .EQ. NOT_FLIPPED) dests_shrink(i) = srcs_expand(i)
     474              :       END DO
     475              : 
     476              : ! let all the nodes know about each others dests_shrink list
     477          132 :       ALLOCATE (dests_shrink_all(maxn_sendrecv, group_size))
     478          192 :       CALL rs_group%allgather(dests_shrink, dests_shrink_all)
     479              : ! now scan the dests_shrink_all list and check if I am on the dests_shrink list of any other node
     480              : ! if that is the case then I'll receive data from that node (the node is on my srcs_shrink list)
     481              : ! note that in the shrinking procedure I will receive from only one node
     482          132 :       DO i = 1, group_size
     483          314 :          DO j = 1, maxn_sendrecv
     484          270 :             IF (dests_shrink_all(j, i) .EQ. rs_mpo) THEN
     485           44 :                srcs_shrink = i - 1
     486           44 :                EXIT
     487              :             END IF
     488              :          END DO
     489              :       END DO
     490              : 
     491           44 :       CALL timestop(handle)
     492              : 
     493           88 :    END SUBROUTINE set_dests_srcs_pid
     494              : 
     495              : ! **************************************************************************************************
     496              : !> \brief expands a pw_r3d_rs_type data to an evenly symmetric pw_r3d_rs_type data that is 8 times
     497              : !>   larger than the original one:
     498              : !>   the even symmetry for a 1D sequence of length n is defined as:
     499              : !>      1 2 3 ... n-2 n-1 n --> 1 2 3 ... n-2 n-1 n n-1 n-2 ... 3 2
     500              : !>   and is generalized to 3D by applying the same rule in all three directions
     501              : !>
     502              : !> \param neumann_directions directions in which dct should be performed
     503              : !> \param recv_msgs_bnds bounds of the messages to be received
     504              : !> \param dests_expand list of the destination processes
     505              : !> \param srcs_expand list of the source processes
     506              : !> \param flipg_stat flipping status for the received data chunks
     507              : !> \param bounds_shftd bounds of the original grid shifted to have g0 in the middle of the cell
     508              : !> \param pw_in the original plane wave data
     509              : !> \param pw_expanded the pw data after expansion
     510              : !> \par History
     511              : !>       07.2014 created [Hossein Bani-Hashemian]
     512              : !> \author Mohammad Hossein Bani-Hashemian
     513              : ! **************************************************************************************************
     514          674 :    SUBROUTINE pw_expand(neumann_directions, recv_msgs_bnds, dests_expand, srcs_expand, &
     515              :                         flipg_stat, bounds_shftd, pw_in, pw_expanded)
     516              : 
     517              :       INTEGER, INTENT(IN)                                :: neumann_directions
     518              :       INTEGER, DIMENSION(:, :, :), INTENT(IN), POINTER   :: recv_msgs_bnds
     519              :       INTEGER, DIMENSION(:), INTENT(IN), POINTER         :: dests_expand, srcs_expand, flipg_stat
     520              :       INTEGER, DIMENSION(2, 3), INTENT(IN)               :: bounds_shftd
     521              :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: pw_in
     522              :       TYPE(pw_r3d_rs_type), INTENT(INOUT)                :: pw_expanded
     523              : 
     524              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'pw_expand'
     525              : 
     526              :       INTEGER :: group_size, handle, i, ind, lb1, lb1_loc, lb1_new, lb2, lb2_loc, lb2_new, lb3, &
     527              :          lb3_loc, lb3_new, loc, maxn_sendrecv, rs_mpo, ub1, ub1_loc, ub1_new, ub2, ub2_loc, &
     528              :          ub2_new, ub3, ub3_loc, ub3_new
     529          674 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: dest_hist, src_hist
     530          674 :       INTEGER, ALLOCATABLE, DIMENSION(:, :, :)           :: pcs_bnds
     531              :       INTEGER, DIMENSION(2, 3)                           :: bounds_local_new
     532          674 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :, :)          :: catd, catd_flipdbf, cr3d_xpndd
     533          674 :       TYPE(dct_msg_type), ALLOCATABLE, DIMENSION(:)      :: pcs, recv_msgs
     534              :       TYPE(mp_comm_type)                                 :: rs_group
     535          674 :       TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:)   :: recv_reqs, send_reqs
     536              :       TYPE(pw_grid_type), POINTER                        :: pw_grid
     537              : 
     538          674 :       CALL timeset(routineN, handle)
     539              : 
     540          674 :       pw_grid => pw_in%pw_grid
     541          674 :       rs_group = pw_grid%para%group
     542          674 :       rs_mpo = pw_grid%para%group%mepos
     543          674 :       group_size = pw_grid%para%group%num_pe
     544              : 
     545         6740 :       bounds_local_new = pw_expanded%pw_grid%bounds_local
     546              : 
     547         1240 :       SELECT CASE (neumann_directions)
     548              :       CASE (neumannXYZ, neumannXY)
     549          566 :          maxn_sendrecv = 4
     550              :       CASE (neumannX, neumannY, neumannXZ, neumannYZ)
     551           76 :          maxn_sendrecv = 2
     552              :       CASE (neumannZ)
     553          674 :          maxn_sendrecv = 1
     554              :       END SELECT
     555              : 
     556         7592 :       ALLOCATE (recv_reqs(maxn_sendrecv), send_reqs(maxn_sendrecv))
     557         2022 :       ALLOCATE (dest_hist(maxn_sendrecv), src_hist(maxn_sendrecv))
     558         1348 :       ALLOCATE (pcs_bnds(2, 3, maxn_sendrecv))
     559              : 
     560         6918 :       ALLOCATE (pcs(maxn_sendrecv), recv_msgs(maxn_sendrecv))
     561              : 
     562         3122 :       send_reqs = mp_request_null
     563         3122 :       recv_reqs = mp_request_null
     564              : 
     565         3122 :       src_hist = -1 ! keeps the history of sources
     566         3122 :       dest_hist = -1 ! keeps the history of destinations
     567              : 
     568         3122 :       DO i = 1, maxn_sendrecv
     569              : ! no need to send to myself or to the destination that I have already sent to
     570         8356 :          IF ((dests_expand(i) .NE. rs_mpo) .AND. .NOT. ANY(dest_hist .EQ. dests_expand(i))) THEN
     571          598 :             CALL rs_group%isend(pw_in%array, dests_expand(i), send_reqs(i))
     572              :          END IF
     573         3122 :          dest_hist(i) = dests_expand(i)
     574              :       END DO
     575              : 
     576         3122 :       DO i = 1, maxn_sendrecv
     577         2448 :          lb1 = recv_msgs_bnds(1, 1, i)
     578         2448 :          ub1 = recv_msgs_bnds(2, 1, i)
     579         2448 :          lb2 = recv_msgs_bnds(1, 2, i)
     580         2448 :          ub2 = recv_msgs_bnds(2, 2, i)
     581         2448 :          lb3 = recv_msgs_bnds(1, 3, i)
     582         2448 :          ub3 = recv_msgs_bnds(2, 3, i)
     583              : ! no need to receive from myself
     584         2448 :          IF (srcs_expand(i) .EQ. rs_mpo) THEN
     585         6420 :             ALLOCATE (recv_msgs(i)%msg(lb1:ub1, lb2:ub2, lb3:ub3))
     586     77277082 :             recv_msgs(i)%msg(:, :, :) = pw_in%array
     587              : ! if I have already received data from the source, just use the one from the last time
     588         4624 :          ELSE IF (ANY(src_hist .EQ. srcs_expand(i))) THEN
     589         3396 :             loc = MINLOC(ABS(src_hist - srcs_expand(i)), 1)
     590          566 :             lb1_loc = recv_msgs_bnds(1, 1, loc)
     591          566 :             ub1_loc = recv_msgs_bnds(2, 1, loc)
     592          566 :             lb2_loc = recv_msgs_bnds(1, 2, loc)
     593          566 :             ub2_loc = recv_msgs_bnds(2, 2, loc)
     594          566 :             lb3_loc = recv_msgs_bnds(1, 3, loc)
     595          566 :             ub3_loc = recv_msgs_bnds(2, 3, loc)
     596         2830 :             ALLOCATE (recv_msgs(i)%msg(lb1_loc:ub1_loc, lb2_loc:ub2_loc, lb3_loc:ub3_loc))
     597     35018395 :             recv_msgs(i)%msg(:, :, :) = recv_msgs(loc)%msg
     598              :          ELSE
     599         2990 :             ALLOCATE (recv_msgs(i)%msg(lb1:ub1, lb2:ub2, lb3:ub3))
     600          598 :             CALL rs_group%irecv(recv_msgs(i)%msg, srcs_expand(i), recv_reqs(i))
     601          598 :             CALL recv_reqs(i)%wait()
     602              :          END IF
     603         3122 :          src_hist(i) = srcs_expand(i)
     604              :       END DO
     605              : ! cleanup mpi_request to prevent memory leak
     606          674 :       CALL mp_waitall(send_reqs(:))
     607              : 
     608              : ! flip the received data according on the flipping status
     609         3122 :       DO i = 1, maxn_sendrecv
     610          674 :          SELECT CASE (flipg_stat(i))
     611              :          CASE (NOT_FLIPPED)
     612          674 :             lb1 = recv_msgs_bnds(1, 1, i)
     613          674 :             ub1 = recv_msgs_bnds(2, 1, i)
     614          674 :             lb2 = recv_msgs_bnds(1, 2, i)
     615          674 :             ub2 = recv_msgs_bnds(2, 2, i)
     616          674 :             lb3 = recv_msgs_bnds(1, 3, i)
     617          674 :             ub3 = recv_msgs_bnds(2, 3, i)
     618         3370 :             ALLOCATE (pcs(i)%msg(lb1:ub1, lb2:ub2, lb3:ub3))
     619     40162813 :             pcs(i)%msg(:, :, :) = recv_msgs(i)%msg
     620              :          CASE (UD_FLIPPED)
     621          598 :             CALL flipud(recv_msgs(i)%msg, pcs(i)%msg, bounds_shftd)
     622              :          CASE (LR_FLIPPED)
     623          610 :             CALL fliplr(recv_msgs(i)%msg, pcs(i)%msg, bounds_shftd)
     624              :          CASE (BF_FLIPPED)
     625            0 :             CALL flipbf(recv_msgs(i)%msg, pcs(i)%msg, bounds_shftd)
     626              :          CASE (ROTATED)
     627         2448 :             CALL rot180(recv_msgs(i)%msg, pcs(i)%msg, bounds_shftd)
     628              :          END SELECT
     629              :       END DO
     630              : ! concatenate the received (flipped) data store the result as catd
     631              : ! need the bounds of the four pieces for concatenation
     632         3122 :       DO i = 1, maxn_sendrecv
     633         2448 :          pcs_bnds(1, 1, i) = LBOUND(pcs(i)%msg, 1)
     634         2448 :          pcs_bnds(2, 1, i) = UBOUND(pcs(i)%msg, 1)
     635         2448 :          pcs_bnds(1, 2, i) = LBOUND(pcs(i)%msg, 2)
     636         2448 :          pcs_bnds(2, 2, i) = UBOUND(pcs(i)%msg, 2)
     637         2448 :          pcs_bnds(1, 3, i) = LBOUND(pcs(i)%msg, 3)
     638         5570 :          pcs_bnds(2, 3, i) = UBOUND(pcs(i)%msg, 3)
     639              :       END DO
     640              : 
     641          674 :       lb1_new = bounds_local_new(1, 1); ub1_new = bounds_local_new(2, 1)
     642          674 :       lb2_new = bounds_local_new(1, 2); ub2_new = bounds_local_new(2, 2)
     643          674 :       lb3_new = bounds_local_new(1, 3); ub3_new = bounds_local_new(2, 3)
     644              : 
     645          642 :       SELECT CASE (neumann_directions)
     646              :       CASE (neumannXYZ, neumannXZ, neumannYZ, neumannZ)
     647          642 :          ind = INT(0.5*(ub3_new + lb3_new + 1))
     648         3210 :          ALLOCATE (catd(lb1_new:ub1_new, lb2_new:ub2_new, lb3_new:ind - 1))
     649              :       CASE (neumannXY, neumannX, neumannY)
     650          802 :          ALLOCATE (catd(lb1_new:ub1_new, lb2_new:ub2_new, lb3_new:ub3_new))
     651              :       END SELECT
     652              : 
     653         3122 :       DO i = 1, maxn_sendrecv
     654              :          catd(pcs_bnds(1, 1, i):pcs_bnds(2, 1, i), &
     655              :               pcs_bnds(1, 2, i):pcs_bnds(2, 2, i), &
     656    148838818 :               pcs_bnds(1, 3, i):pcs_bnds(2, 3, i)) = pcs(i)%msg
     657              :       END DO
     658              : 
     659              : ! flip catd from back to front
     660          674 :       CALL flipbf(catd, catd_flipdbf, bounds_shftd)
     661              : ! concatenate catd and catd_flipdbf to get cr3d_xpndd
     662         3370 :       ALLOCATE (cr3d_xpndd(lb1_new:ub1_new, lb2_new:ub2_new, lb3_new:ub3_new))
     663          642 :       SELECT CASE (neumann_directions)
     664              :       CASE (neumannXYZ, neumannXZ, neumannYZ, neumannZ)
     665    143195460 :          cr3d_xpndd(:, :, lb3_new:ind - 1) = catd
     666    143195460 :          cr3d_xpndd(:, :, ind:ub3_new) = catd_flipdbf
     667              :       CASE (neumannXY, neumannX, neumannY)
     668      2982914 :          cr3d_xpndd(:, :, :) = catd
     669              :       END SELECT
     670              : 
     671    289372550 :       pw_expanded%array = cr3d_xpndd
     672              : 
     673         3122 :       DO i = 1, maxn_sendrecv
     674         2448 :          DEALLOCATE (pcs(i)%msg)
     675         3122 :          DEALLOCATE (recv_msgs(i)%msg)
     676              :       END DO
     677         5570 :       DEALLOCATE (pcs, recv_msgs)
     678          674 :       DEALLOCATE (catd, catd_flipdbf, cr3d_xpndd)
     679              : 
     680          674 :       CALL timestop(handle)
     681              : 
     682         1348 :    END SUBROUTINE pw_expand
     683              : 
     684              : ! **************************************************************************************************
     685              : !> \brief shrinks an evenly symmetric pw_r3d_rs_type data to a pw_r3d_rs_type data that is 8
     686              : !>        times smaller (the reverse procedure of pw_expand).
     687              : !>
     688              : !> \param neumann_directions directions in which dct should be performed
     689              : !> \param dests_shrink list of the destination processes
     690              : !> \param srcs_shrink list of the source processes
     691              : !> \param bounds_local_shftd local bounds of the original grid after shifting
     692              : !> \param pw_in the original plane wave data
     693              : !> \param pw_shrinked the shrunk plane wave data
     694              : !> \par History
     695              : !>       07.2014 created [Hossein Bani-Hashemian]
     696              : !> \author Mohammad Hossein Bani-Hashemian
     697              : ! **************************************************************************************************
     698          312 :    SUBROUTINE pw_shrink(neumann_directions, dests_shrink, srcs_shrink, bounds_local_shftd, &
     699              :                         pw_in, pw_shrinked)
     700              : 
     701              :       INTEGER, INTENT(IN)                                :: neumann_directions
     702              :       INTEGER, DIMENSION(:), INTENT(IN), POINTER         :: dests_shrink
     703              :       INTEGER, INTENT(INOUT)                             :: srcs_shrink
     704              :       INTEGER, DIMENSION(2, 3), INTENT(IN)               :: bounds_local_shftd
     705              :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: pw_in
     706              :       TYPE(pw_r3d_rs_type), INTENT(INOUT)                :: pw_shrinked
     707              : 
     708              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'pw_shrink'
     709              : 
     710              :       INTEGER :: group_size, handle, i, lb1_orig, lb1_xpnd, lb2_orig, lb2_xpnd, lb3_orig, &
     711              :          lb3_xpnd, maxn_sendrecv, rs_mpo, send_lb1, send_lb2, send_lb3, send_ub1, send_ub2, &
     712              :          send_ub3, tag, ub1_orig, ub1_xpnd, ub2_orig, ub2_xpnd, ub3_orig, ub3_xpnd
     713          312 :       INTEGER, ALLOCATABLE, DIMENSION(:, :, :)           :: bounds_local_all
     714              :       INTEGER, DIMENSION(2, 3)                           :: bounds_local_xpnd
     715          312 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :, :)          :: cr3d, send_crmsg
     716              :       TYPE(mp_comm_type)                                 :: rs_group
     717              :       TYPE(pw_grid_type), POINTER                        :: pw_grid_orig
     718              : 
     719          312 :       CALL timeset(routineN, handle)
     720              : 
     721          312 :       pw_grid_orig => pw_shrinked%pw_grid
     722          312 :       rs_group = pw_grid_orig%para%group
     723          312 :       rs_mpo = pw_grid_orig%para%group%mepos
     724          312 :       group_size = pw_grid_orig%para%group%num_pe
     725         3120 :       bounds_local_xpnd = pw_in%pw_grid%bounds_local
     726          312 :       tag = 1
     727              : 
     728          576 :       SELECT CASE (neumann_directions)
     729              :       CASE (neumannXYZ, neumannXY)
     730          264 :          maxn_sendrecv = 4
     731              :       CASE (neumannX, neumannY, neumannXZ, neumannYZ)
     732           32 :          maxn_sendrecv = 2
     733              :       CASE (neumannZ)
     734          312 :          maxn_sendrecv = 1
     735              :       END SELECT
     736              : 
     737              : ! cosine transform is a real transform. The cosine transform of a 3D data must be real and 3D.
     738          312 :       lb1_xpnd = bounds_local_xpnd(1, 1)
     739          312 :       ub1_xpnd = bounds_local_xpnd(2, 1)
     740          312 :       lb2_xpnd = bounds_local_xpnd(1, 2)
     741          312 :       ub2_xpnd = bounds_local_xpnd(2, 2)
     742          312 :       lb3_xpnd = bounds_local_xpnd(1, 3)
     743          312 :       ub3_xpnd = bounds_local_xpnd(2, 3)
     744         1560 :       ALLOCATE (cr3d(lb1_xpnd:ub1_xpnd, lb2_xpnd:ub2_xpnd, lb3_xpnd:ub3_xpnd))
     745    132941848 :       cr3d(:, :, :) = pw_in%array
     746              : 
     747              : ! let all the nodes know about each others shifted local bounds
     748          936 :       ALLOCATE (bounds_local_all(2, 3, group_size))
     749          312 :       CALL rs_group%allgather(bounds_local_shftd, bounds_local_all)
     750              : 
     751         1448 :       DO i = 1, maxn_sendrecv
     752              : ! no need to send to myself or to an invalid destination (pid = -1)
     753         1448 :          IF ((dests_shrink(i) .NE. rs_mpo) .AND. (dests_shrink(i) .NE. -1)) THEN
     754          140 :             send_lb1 = bounds_local_all(1, 1, dests_shrink(i) + 1)
     755          140 :             send_ub1 = bounds_local_all(2, 1, dests_shrink(i) + 1)
     756          140 :             send_lb2 = bounds_local_all(1, 2, dests_shrink(i) + 1)
     757          140 :             send_ub2 = bounds_local_all(2, 2, dests_shrink(i) + 1)
     758          140 :             send_lb3 = bounds_local_all(1, 3, dests_shrink(i) + 1)
     759          140 :             send_ub3 = bounds_local_all(2, 3, dests_shrink(i) + 1)
     760              : 
     761          700 :             ALLOCATE (send_crmsg(send_lb1:send_ub1, send_lb2:send_ub2, send_lb3:send_ub3))
     762      8448232 :             send_crmsg(:, :, :) = cr3d(send_lb1:send_ub1, send_lb2:send_ub2, send_lb3:send_ub3)
     763          140 :             CALL rs_group%send(send_crmsg, dests_shrink(i), tag)
     764          140 :             DEALLOCATE (send_crmsg)
     765              :          END IF
     766              :       END DO
     767              : 
     768          312 :       lb1_orig = bounds_local_shftd(1, 1)
     769          312 :       ub1_orig = bounds_local_shftd(2, 1)
     770          312 :       lb2_orig = bounds_local_shftd(1, 2)
     771          312 :       ub2_orig = bounds_local_shftd(2, 2)
     772          312 :       lb3_orig = bounds_local_shftd(1, 3)
     773          312 :       ub3_orig = bounds_local_shftd(2, 3)
     774              : 
     775              : ! no need to receive from myself
     776          312 :       IF (srcs_shrink .EQ. rs_mpo) THEN
     777      9996804 :          pw_shrinked%array = cr3d(lb1_orig:ub1_orig, lb2_orig:ub2_orig, lb3_orig:ub3_orig)
     778          140 :       ELSE IF (srcs_shrink .EQ. -1) THEN
     779              : ! the source is invalid ... do nothing
     780              :       ELSE
     781          140 :          CALL rs_group%recv(pw_shrinked%array, srcs_shrink, tag)
     782              :       END IF
     783              : 
     784          312 :       DEALLOCATE (bounds_local_all)
     785          312 :       DEALLOCATE (cr3d)
     786          312 :       CALL timestop(handle)
     787              : 
     788          624 :    END SUBROUTINE pw_shrink
     789              : 
     790              : ! **************************************************************************************************
     791              : !> \brief   flips a 3d (real dp) array up to down (the way needed to expand data
     792              : !>          as explained in the description of the afore-defined subroutines)
     793              : !> \param cr3d_in input array
     794              : !> \param cr3d_out output array
     795              : !> \param bounds global lower and upper bounds
     796              : !> \par History
     797              : !>       07.2014 created [Hossein Bani-Hashemian]
     798              : !> \author Mohammad Hossein Bani-Hashemian
     799              : ! **************************************************************************************************
     800          598 :    SUBROUTINE flipud(cr3d_in, cr3d_out, bounds)
     801              : 
     802              :       REAL(dp), ALLOCATABLE, DIMENSION(:, :, :), &
     803              :          INTENT(IN)                                      :: cr3d_in
     804              :       REAL(dp), ALLOCATABLE, DIMENSION(:, :, :), &
     805              :          INTENT(INOUT)                                   :: cr3d_out
     806              :       INTEGER, DIMENSION(2, 3), INTENT(IN)               :: bounds
     807              : 
     808              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'flipud'
     809              : 
     810              :       INTEGER :: handle, i, lb1, lb1_glbl, lb1_new, lb2, lb2_glbl, lb2_new, lb3, lb3_glbl, &
     811              :          lb3_new, ub1, ub1_glbl, ub1_new, ub2, ub2_glbl, ub2_new, ub3, ub3_glbl, ub3_new
     812          598 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: indx
     813              :       INTEGER, DIMENSION(2, 3)                           :: bndsl, bndsl_new
     814              : 
     815          598 :       CALL timeset(routineN, handle)
     816              : 
     817         1196 :       lb1 = LBOUND(cr3d_in, 1); ub1 = UBOUND(cr3d_in, 1)
     818         1196 :       lb2 = LBOUND(cr3d_in, 2); ub2 = UBOUND(cr3d_in, 2)
     819         1196 :       lb3 = LBOUND(cr3d_in, 3); ub3 = UBOUND(cr3d_in, 3)
     820              : 
     821          598 :       lb1_glbl = bounds(1, 1); ub1_glbl = bounds(2, 1)
     822          598 :       lb2_glbl = bounds(1, 2); ub2_glbl = bounds(2, 2)
     823          598 :       lb3_glbl = bounds(1, 3); ub3_glbl = bounds(2, 3)
     824              : 
     825         4186 :       bndsl = RESHAPE((/lb1, ub1, lb2, ub2, lb3, ub3/), (/2, 3/))
     826          598 :       bndsl_new = flipud_bounds_local(bndsl, bounds)
     827              : 
     828          598 :       lb1_new = bndsl_new(1, 1); ub1_new = bndsl_new(2, 1)
     829          598 :       lb2_new = bndsl_new(1, 2); ub2_new = bndsl_new(2, 2)
     830          598 :       lb3_new = bndsl_new(1, 3); ub3_new = bndsl_new(2, 3)
     831              : 
     832         2990 :       ALLOCATE (cr3d_out(lb1_new:ub1_new, lb2_new:ub2_new, lb3_new:ub3_new))
     833     36542667 :       cr3d_out = 0.0_dp
     834              : 
     835              : ! set the data at the missing grid points (in a periodic grid) equal to the data at
     836              : ! the last existing grid points
     837         1794 :       ALLOCATE (indx(ub1_new - lb1_new + 1))
     838        32870 :       indx(:) = (/(i, i=2*(ub1_glbl + 1) - lb1_new, 2*(ub1_glbl + 1) - ub1_new, -1)/)
     839          598 :       IF (lb1_new .EQ. ub1_glbl + 1) indx(1) = indx(2)
     840     36542667 :       cr3d_out(lb1_new:ub1_new, lb2_new:ub2_new, lb3_new:ub3_new) = cr3d_in(indx, :, :)
     841              : 
     842          598 :       CALL timestop(handle)
     843              : 
     844         1196 :    END SUBROUTINE flipud
     845              : 
     846              : ! **************************************************************************************************
     847              : !> \brief   flips a 3d (real dp) array left to right (the way needed to expand data
     848              : !>          as explained in the description of the afore-defined subroutines)
     849              : !> \param cr3d_in input array
     850              : !> \param cr3d_out output array
     851              : !> \param bounds global lower and upper bounds
     852              : !> \par History
     853              : !>       07.2014 created [Hossein Bani-Hashemian]
     854              : !> \author Mohammad Hossein Bani-Hashemian
     855              : ! **************************************************************************************************
     856          610 :    SUBROUTINE fliplr(cr3d_in, cr3d_out, bounds)
     857              : 
     858              :       REAL(dp), ALLOCATABLE, DIMENSION(:, :, :), &
     859              :          INTENT(IN)                                      :: cr3d_in
     860              :       REAL(dp), ALLOCATABLE, DIMENSION(:, :, :), &
     861              :          INTENT(INOUT)                                   :: cr3d_out
     862              :       INTEGER, DIMENSION(2, 3), INTENT(IN)               :: bounds
     863              : 
     864              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'fliplr'
     865              : 
     866              :       INTEGER :: handle, i, lb1, lb1_glbl, lb1_new, lb2, lb2_glbl, lb2_new, lb3, lb3_glbl, &
     867              :          lb3_new, ub1, ub1_glbl, ub1_new, ub2, ub2_glbl, ub2_new, ub3, ub3_glbl, ub3_new
     868          610 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: indy
     869              :       INTEGER, DIMENSION(2, 3)                           :: bndsl, bndsl_new
     870              : 
     871          610 :       CALL timeset(routineN, handle)
     872              : 
     873         1220 :       lb1 = LBOUND(cr3d_in, 1); ub1 = UBOUND(cr3d_in, 1)
     874         1220 :       lb2 = LBOUND(cr3d_in, 2); ub2 = UBOUND(cr3d_in, 2)
     875         1220 :       lb3 = LBOUND(cr3d_in, 3); ub3 = UBOUND(cr3d_in, 3)
     876              : 
     877          610 :       lb1_glbl = bounds(1, 1); ub1_glbl = bounds(2, 1)
     878          610 :       lb2_glbl = bounds(1, 2); ub2_glbl = bounds(2, 2)
     879          610 :       lb3_glbl = bounds(1, 3); ub3_glbl = bounds(2, 3)
     880              : 
     881         4270 :       bndsl = RESHAPE((/lb1, ub1, lb2, ub2, lb3, ub3/), (/2, 3/))
     882          610 :       bndsl_new = fliplr_bounds_local(bndsl, bounds)
     883              : 
     884          610 :       lb1_new = bndsl_new(1, 1); ub1_new = bndsl_new(2, 1)
     885          610 :       lb2_new = bndsl_new(1, 2); ub2_new = bndsl_new(2, 2)
     886          610 :       lb3_new = bndsl_new(1, 3); ub3_new = bndsl_new(2, 3)
     887              : 
     888         3050 :       ALLOCATE (cr3d_out(lb1_new:ub1_new, lb2_new:ub2_new, lb3_new:ub3_new))
     889     37114269 :       cr3d_out = 0.0_dp
     890              : 
     891              : ! set the data at the missing grid points (in a periodic grid) equal to the data at
     892              : ! the last existing grid points
     893         1830 :       ALLOCATE (indy(ub2_new - lb2_new + 1))
     894        59152 :       indy(:) = (/(i, i=2*(ub2_glbl + 1) - lb2_new, 2*(ub2_glbl + 1) - ub2_new, -1)/)
     895          610 :       IF (lb2_new .EQ. ub2_glbl + 1) indy(1) = indy(2)
     896     37114269 :       cr3d_out(lb1_new:ub1_new, lb2_new:ub2_new, lb3_new:ub3_new) = cr3d_in(:, indy, :)
     897              : 
     898          610 :       CALL timestop(handle)
     899              : 
     900         1220 :    END SUBROUTINE fliplr
     901              : 
     902              : ! **************************************************************************************************
     903              : !> \brief   flips a 3d (real dp) array back to front (the way needed to expand data
     904              : !>          as explained in the description of the afore-defined subroutines)
     905              : !> \param cr3d_in input array
     906              : !> \param cr3d_out output array
     907              : !> \param bounds global lower and upper bounds
     908              : !> \par History
     909              : !>       07.2014 created [Hossein Bani-Hashemian]
     910              : !> \author Mohammad Hossein Bani-Hashemian
     911              : ! **************************************************************************************************
     912          674 :    SUBROUTINE flipbf(cr3d_in, cr3d_out, bounds)
     913              : 
     914              :       REAL(dp), ALLOCATABLE, DIMENSION(:, :, :), &
     915              :          INTENT(IN)                                      :: cr3d_in
     916              :       REAL(dp), ALLOCATABLE, DIMENSION(:, :, :), &
     917              :          INTENT(INOUT)                                   :: cr3d_out
     918              :       INTEGER, DIMENSION(2, 3), INTENT(IN)               :: bounds
     919              : 
     920              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'flipbf'
     921              : 
     922              :       INTEGER :: handle, i, lb1, lb1_glbl, lb1_new, lb2, lb2_glbl, lb2_new, lb3, lb3_glbl, &
     923              :          lb3_new, ub1, ub1_glbl, ub1_new, ub2, ub2_glbl, ub2_new, ub3, ub3_glbl, ub3_new
     924          674 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: indz
     925              :       INTEGER, DIMENSION(2, 3)                           :: bndsl, bndsl_new
     926              : 
     927          674 :       CALL timeset(routineN, handle)
     928              : 
     929         1348 :       lb1 = LBOUND(cr3d_in, 1); ub1 = UBOUND(cr3d_in, 1)
     930         1348 :       lb2 = LBOUND(cr3d_in, 2); ub2 = UBOUND(cr3d_in, 2)
     931         1348 :       lb3 = LBOUND(cr3d_in, 3); ub3 = UBOUND(cr3d_in, 3)
     932              : 
     933          674 :       lb1_glbl = bounds(1, 1); ub1_glbl = bounds(2, 1)
     934          674 :       lb2_glbl = bounds(1, 2); ub2_glbl = bounds(2, 2)
     935          674 :       lb3_glbl = bounds(1, 3); ub3_glbl = bounds(2, 3)
     936              : 
     937         4718 :       bndsl = RESHAPE((/lb1, ub1, lb2, ub2, lb3, ub3/), (/2, 3/))
     938          674 :       bndsl_new = flipbf_bounds_local(bndsl, bounds)
     939              : 
     940          674 :       lb1_new = bndsl_new(1, 1); ub1_new = bndsl_new(2, 1)
     941          674 :       lb2_new = bndsl_new(1, 2); ub2_new = bndsl_new(2, 2)
     942          674 :       lb3_new = bndsl_new(1, 3); ub3_new = bndsl_new(2, 3)
     943              : 
     944         3370 :       ALLOCATE (cr3d_out(lb1_new:ub1_new, lb2_new:ub2_new, lb3_new:ub3_new))
     945    146177732 :       cr3d_out = 0.0_dp
     946              : 
     947              : ! set the data at the missing grid points (in a periodic grid) equal to the data at
     948              : ! the last existing grid points
     949         2022 :       ALLOCATE (indz(ub3_new - lb3_new + 1))
     950        63024 :       indz(:) = (/(i, i=2*(ub3_glbl + 1) - lb3_new, 2*(ub3_glbl + 1) - ub3_new, -1)/)
     951          674 :       IF (lb3_new .EQ. ub3_glbl + 1) indz(1) = indz(2)
     952    146177732 :       cr3d_out(lb1_new:ub1_new, lb2_new:ub2_new, lb3_new:ub3_new) = cr3d_in(:, :, indz)
     953              : 
     954          674 :       CALL timestop(handle)
     955              : 
     956         1348 :    END SUBROUTINE flipbf
     957              : 
     958              : ! **************************************************************************************************
     959              : !> \brief   rotates a 3d (real dp) array by 180 degrees (the way needed to expand data
     960              : !>          as explained in the description of the afore-defined subroutines)
     961              : !> \param cr3d_in input array
     962              : !> \param cr3d_out output array
     963              : !> \param bounds global lower and upper bounds
     964              : !> \par History
     965              : !>       07.2014 created [Hossein Bani-Hashemian]
     966              : !> \author Mohammad Hossein Bani-Hashemian
     967              : ! **************************************************************************************************
     968          566 :    SUBROUTINE rot180(cr3d_in, cr3d_out, bounds)
     969              : 
     970              :       REAL(dp), ALLOCATABLE, DIMENSION(:, :, :), &
     971              :          INTENT(IN)                                      :: cr3d_in
     972              :       REAL(dp), ALLOCATABLE, DIMENSION(:, :, :), &
     973              :          INTENT(INOUT)                                   :: cr3d_out
     974              :       INTEGER, DIMENSION(2, 3), INTENT(IN)               :: bounds
     975              : 
     976              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'rot180'
     977              : 
     978              :       INTEGER :: handle, i, lb1, lb1_glbl, lb1_new, lb2, lb2_glbl, lb2_new, lb3, lb3_glbl, &
     979              :          lb3_new, ub1, ub1_glbl, ub1_new, ub2, ub2_glbl, ub2_new, ub3, ub3_glbl, ub3_new
     980          566 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: indx, indy
     981              :       INTEGER, DIMENSION(2, 3)                           :: bndsl, bndsl_new
     982              : 
     983          566 :       CALL timeset(routineN, handle)
     984              : 
     985         1132 :       lb1 = LBOUND(cr3d_in, 1); ub1 = UBOUND(cr3d_in, 1)
     986         1132 :       lb2 = LBOUND(cr3d_in, 2); ub2 = UBOUND(cr3d_in, 2)
     987         1132 :       lb3 = LBOUND(cr3d_in, 3); ub3 = UBOUND(cr3d_in, 3)
     988              : 
     989          566 :       lb1_glbl = bounds(1, 1); ub1_glbl = bounds(2, 1)
     990          566 :       lb2_glbl = bounds(1, 2); ub2_glbl = bounds(2, 2)
     991          566 :       lb3_glbl = bounds(1, 3); ub3_glbl = bounds(2, 3)
     992              : 
     993         3962 :       bndsl = RESHAPE((/lb1, ub1, lb2, ub2, lb3, ub3/), (/2, 3/))
     994          566 :       bndsl_new = rot180_bounds_local(bndsl, bounds)
     995              : 
     996          566 :       lb1_new = bndsl_new(1, 1); ub1_new = bndsl_new(2, 1)
     997          566 :       lb2_new = bndsl_new(1, 2); ub2_new = bndsl_new(2, 2)
     998          566 :       lb3_new = bndsl_new(1, 3); ub3_new = bndsl_new(2, 3)
     999              : 
    1000         2830 :       ALLOCATE (cr3d_out(lb1_new:ub1_new, lb2_new:ub2_new, lb3_new:ub3_new))
    1001     35018395 :       cr3d_out = 0.0_dp
    1002              : 
    1003              : ! set the data at the missing grid points (in a periodic grid) equal to the data at
    1004              : ! the last existing grid points
    1005         2830 :       ALLOCATE (indx(ub1_new - lb1_new + 1), indy(ub2_new - lb2_new + 1))
    1006        31366 :       indx(:) = (/(i, i=2*(ub1_glbl + 1) - lb1_new, 2*(ub1_glbl + 1) - ub1_new, -1)/)
    1007        55104 :       indy(:) = (/(i, i=2*(ub2_glbl + 1) - lb2_new, 2*(ub2_glbl + 1) - ub2_new, -1)/)
    1008          566 :       IF (lb1_new .EQ. ub1_glbl + 1) indx(1) = indx(2)
    1009          566 :       IF (lb2_new .EQ. ub2_glbl + 1) indy(1) = indy(2)
    1010     35018395 :       cr3d_out(lb1_new:ub1_new, lb2_new:ub2_new, lb3_new:ub3_new) = cr3d_in(indx, indy, :)
    1011              : 
    1012          566 :       CALL timestop(handle)
    1013              : 
    1014         1132 :    END SUBROUTINE rot180
    1015              : 
    1016              : ! **************************************************************************************************
    1017              : !> \brief   calculates the global and local bounds of the expanded data
    1018              : !> \param pw_grid original plane-wave grid
    1019              : !> \param neumann_directions directions in which dct should be performed
    1020              : !> \param srcs_expand list of the source processes (pw_expand)
    1021              : !> \param flipg_stat flipping status for the received data chunks (pw_expand)
    1022              : !> \param bounds_shftd bounds of the original grid shifted to have g0 in the middle of the cell
    1023              : !> \param bounds_local_shftd local bounds of the original grid after shifting
    1024              : !> \param recv_msgs_bnds bounds of the messages to be received (pw_expand)
    1025              : !> \param bounds_new new global lower and upper bounds
    1026              : !> \param bounds_local_new new local lower and upper bounds
    1027              : !> \par History
    1028              : !>       07.2014 created [Hossein Bani-Hashemian]
    1029              : !> \author Mohammad Hossein Bani-Hashemian
    1030              : ! **************************************************************************************************
    1031           44 :    SUBROUTINE expansion_bounds(pw_grid, neumann_directions, srcs_expand, flipg_stat, &
    1032              :                                bounds_shftd, bounds_local_shftd, &
    1033              :                                recv_msgs_bnds, bounds_new, bounds_local_new)
    1034              : 
    1035              :       TYPE(pw_grid_type), INTENT(IN), POINTER            :: pw_grid
    1036              :       INTEGER, INTENT(IN)                                :: neumann_directions
    1037              :       INTEGER, DIMENSION(:), INTENT(IN), POINTER         :: srcs_expand, flipg_stat
    1038              :       INTEGER, DIMENSION(2, 3), INTENT(OUT)              :: bounds_shftd, bounds_local_shftd
    1039              :       INTEGER, DIMENSION(:, :, :), INTENT(INOUT), &
    1040              :          POINTER                                         :: recv_msgs_bnds
    1041              :       INTEGER, DIMENSION(2, 3), INTENT(OUT)              :: bounds_new, bounds_local_new
    1042              : 
    1043              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'expansion_bounds'
    1044              : 
    1045              :       INTEGER                                            :: group_size, handle, i, lb1_new, lb2_new, &
    1046              :                                                             lb3_new, loc, maxn_sendrecv, rs_mpo, &
    1047              :                                                             ub1_new, ub2_new, ub3_new
    1048           44 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: src_hist
    1049           44 :       INTEGER, ALLOCATABLE, DIMENSION(:, :, :)           :: bounds_local_all, bounds_local_new_all, &
    1050           44 :                                                             pcs_bnds
    1051              :       INTEGER, DIMENSION(2, 3)                           :: bounds, bounds_local
    1052              :       INTEGER, DIMENSION(3)                              :: npts_new, shf_yesno, shift
    1053              :       TYPE(mp_comm_type)                                 :: rs_group
    1054              : 
    1055           44 :       CALL timeset(routineN, handle)
    1056              : 
    1057           44 :       rs_group = pw_grid%para%group
    1058           44 :       rs_mpo = pw_grid%para%group%mepos
    1059           44 :       group_size = pw_grid%para%group%num_pe
    1060          440 :       bounds = pw_grid%bounds
    1061          440 :       bounds_local = pw_grid%bounds_local
    1062              : 
    1063           44 :       SELECT CASE (neumann_directions)
    1064              :       CASE (neumannXYZ)
    1065           44 :          maxn_sendrecv = 4
    1066           44 :          shf_yesno = (/1, 1, 1/)
    1067              :       CASE (neumannXY)
    1068            0 :          maxn_sendrecv = 4
    1069            0 :          shf_yesno = (/1, 1, 0/)
    1070              :       CASE (neumannXZ)
    1071            0 :          maxn_sendrecv = 2
    1072            0 :          shf_yesno = (/1, 0, 1/)
    1073              :       CASE (neumannYZ)
    1074            4 :          maxn_sendrecv = 2
    1075            4 :          shf_yesno = (/0, 1, 1/)
    1076              :       CASE (neumannX)
    1077            4 :          maxn_sendrecv = 2
    1078            4 :          shf_yesno = (/1, 0, 0/)
    1079              :       CASE (neumannY)
    1080            0 :          maxn_sendrecv = 2
    1081            0 :          shf_yesno = (/0, 1, 0/)
    1082              :       CASE (neumannZ)
    1083            4 :          maxn_sendrecv = 1
    1084            4 :          shf_yesno = (/0, 0, 1/)
    1085              :       CASE DEFAULT
    1086            0 :          CPABORT("Unknown neumann direction")
    1087           44 :          shf_yesno = (/0, 0, 0/)
    1088              :       END SELECT
    1089              : 
    1090           88 :       ALLOCATE (pcs_bnds(2, 3, maxn_sendrecv))
    1091           88 :       ALLOCATE (src_hist(maxn_sendrecv))
    1092              : 
    1093              :       ! Note that this is not easily FFT-able ... needed anyway, so link in FFTW.
    1094          176 :       npts_new = 2*pw_grid%npts
    1095          176 :       shift = -npts_new/2
    1096          176 :       shift = shift - bounds(1, :)
    1097          132 :       bounds_shftd(:, 1) = bounds(:, 1) + shf_yesno(1)*shift(1)
    1098          132 :       bounds_shftd(:, 2) = bounds(:, 2) + shf_yesno(2)*shift(2)
    1099          132 :       bounds_shftd(:, 3) = bounds(:, 3) + shf_yesno(3)*shift(3)
    1100          132 :       bounds_local_shftd(:, 1) = bounds_local(:, 1) + shf_yesno(1)*shift(1)
    1101          132 :       bounds_local_shftd(:, 2) = bounds_local(:, 2) + shf_yesno(2)*shift(2)
    1102          132 :       bounds_local_shftd(:, 3) = bounds_local(:, 3) + shf_yesno(3)*shift(3)
    1103              : 
    1104              : ! let all the nodes know about each others local shifted bounds
    1105          132 :       ALLOCATE (bounds_local_all(2, 3, group_size))
    1106           44 :       CALL rs_group%allgather(bounds_local_shftd, bounds_local_all)
    1107              : 
    1108          192 :       src_hist = -1 ! keeps the history of sources
    1109              : 
    1110          192 :       DO i = 1, maxn_sendrecv
    1111              : ! no need to receive from myself
    1112          148 :          IF (srcs_expand(i) .EQ. rs_mpo) THEN
    1113           80 :             recv_msgs_bnds(1, 1, i) = bounds_local_shftd(1, 1)
    1114           80 :             recv_msgs_bnds(2, 1, i) = bounds_local_shftd(2, 1)
    1115           80 :             recv_msgs_bnds(1, 2, i) = bounds_local_shftd(1, 2)
    1116           80 :             recv_msgs_bnds(2, 2, i) = bounds_local_shftd(2, 2)
    1117           80 :             recv_msgs_bnds(1, 3, i) = bounds_local_shftd(1, 3)
    1118           80 :             recv_msgs_bnds(2, 3, i) = bounds_local_shftd(2, 3)
    1119              : ! if I have already received data from the source, just use the one from the last time
    1120          268 :          ELSE IF (ANY(src_hist .EQ. srcs_expand(i))) THEN
    1121          192 :             loc = MINLOC(ABS(src_hist - srcs_expand(i)), 1)
    1122           32 :             recv_msgs_bnds(1, 1, i) = bounds_local_all(1, 1, srcs_expand(loc) + 1)
    1123           32 :             recv_msgs_bnds(2, 1, i) = bounds_local_all(2, 1, srcs_expand(loc) + 1)
    1124           32 :             recv_msgs_bnds(1, 2, i) = bounds_local_all(1, 2, srcs_expand(loc) + 1)
    1125           32 :             recv_msgs_bnds(2, 2, i) = bounds_local_all(2, 2, srcs_expand(loc) + 1)
    1126           32 :             recv_msgs_bnds(1, 3, i) = bounds_local_all(1, 3, srcs_expand(loc) + 1)
    1127           32 :             recv_msgs_bnds(2, 3, i) = bounds_local_all(2, 3, srcs_expand(loc) + 1)
    1128              :          ELSE
    1129           36 :             recv_msgs_bnds(1, 1, i) = bounds_local_all(1, 1, srcs_expand(i) + 1)
    1130           36 :             recv_msgs_bnds(2, 1, i) = bounds_local_all(2, 1, srcs_expand(i) + 1)
    1131           36 :             recv_msgs_bnds(1, 2, i) = bounds_local_all(1, 2, srcs_expand(i) + 1)
    1132           36 :             recv_msgs_bnds(2, 2, i) = bounds_local_all(2, 2, srcs_expand(i) + 1)
    1133           36 :             recv_msgs_bnds(1, 3, i) = bounds_local_all(1, 3, srcs_expand(i) + 1)
    1134           36 :             recv_msgs_bnds(2, 3, i) = bounds_local_all(2, 3, srcs_expand(i) + 1)
    1135              :          END IF
    1136          192 :          src_hist(i) = srcs_expand(i)
    1137              :       END DO
    1138              : 
    1139              : ! flip the received data based on the flipping status
    1140          192 :       DO i = 1, maxn_sendrecv
    1141           44 :          SELECT CASE (flipg_stat(i))
    1142              :          CASE (NOT_FLIPPED)
    1143          440 :             pcs_bnds(:, :, i) = recv_msgs_bnds(:, :, i)
    1144              :          CASE (UD_FLIPPED)
    1145           36 :             pcs_bnds(:, :, i) = flipud_bounds_local(recv_msgs_bnds(:, :, i), bounds_shftd)
    1146              :          CASE (LR_FLIPPED)
    1147           36 :             pcs_bnds(:, :, i) = fliplr_bounds_local(recv_msgs_bnds(:, :, i), bounds_shftd)
    1148              :          CASE (BF_FLIPPED)
    1149            0 :             pcs_bnds(:, :, i) = flipbf_bounds_local(recv_msgs_bnds(:, :, i), bounds_shftd)
    1150              :          CASE (ROTATED)
    1151          148 :             pcs_bnds(:, :, i) = rot180_bounds_local(recv_msgs_bnds(:, :, i), bounds_shftd)
    1152              :          END SELECT
    1153              :       END DO
    1154              : 
    1155          384 :       lb1_new = MINVAL(pcs_bnds(1, 1, :)); ub1_new = MAXVAL(pcs_bnds(2, 1, :))
    1156          384 :       lb2_new = MINVAL(pcs_bnds(1, 2, :)); ub2_new = MAXVAL(pcs_bnds(2, 2, :))
    1157          384 :       lb3_new = MINVAL(pcs_bnds(1, 3, :)); ub3_new = MAXVAL(pcs_bnds(2, 3, :))
    1158              : 
    1159              : ! calculate the new local and global bounds
    1160          192 :       bounds_local_new(1, 1) = MINVAL(pcs_bnds(1, 1, :))
    1161          192 :       bounds_local_new(2, 1) = MAXVAL(pcs_bnds(2, 1, :))
    1162          192 :       bounds_local_new(1, 2) = MINVAL(pcs_bnds(1, 2, :))
    1163          192 :       bounds_local_new(2, 2) = MAXVAL(pcs_bnds(2, 2, :))
    1164          192 :       bounds_local_new(1, 3) = MINVAL(pcs_bnds(1, 3, :))
    1165              :       SELECT CASE (neumann_directions)
    1166              :       CASE (neumannXYZ, neumannXZ, neumannYZ, neumannZ)
    1167          180 :          bounds_local_new(2, 3) = 2*(MAXVAL(pcs_bnds(2, 3, :)) + 1) - bounds_local_new(1, 3) - 1
    1168              :       CASE (neumannXY, neumannX, neumannY)
    1169           56 :          bounds_local_new(2, 3) = MAXVAL(pcs_bnds(2, 3, :))
    1170              :       END SELECT
    1171              : 
    1172           88 :       ALLOCATE (bounds_local_new_all(2, 3, group_size))
    1173           44 :       CALL rs_group%allgather(bounds_local_new, bounds_local_new_all)
    1174          132 :       bounds_new(1, 1) = MINVAL(bounds_local_new_all(1, 1, :))
    1175          132 :       bounds_new(2, 1) = MAXVAL(bounds_local_new_all(2, 1, :))
    1176          132 :       bounds_new(1, 2) = MINVAL(bounds_local_new_all(1, 2, :))
    1177          132 :       bounds_new(2, 2) = MAXVAL(bounds_local_new_all(2, 2, :))
    1178          132 :       bounds_new(1, 3) = MINVAL(bounds_local_new_all(1, 3, :))
    1179          132 :       bounds_new(2, 3) = MAXVAL(bounds_local_new_all(2, 3, :))
    1180              : 
    1181           44 :       DEALLOCATE (bounds_local_all, bounds_local_new_all)
    1182              : 
    1183           44 :       CALL timestop(handle)
    1184              : 
    1185           88 :    END SUBROUTINE expansion_bounds
    1186              : 
    1187              : ! **************************************************************************************************
    1188              : !> \brief   precalculates the local bounds of a 3d array after applying flipud
    1189              : !> \param bndsl_in current local lower and upper bounds
    1190              : !> \param bounds global lower and upper bounds
    1191              : !> \return new local lower and upper bounds
    1192              : !> \par History
    1193              : !>       07.2014 created [Hossein Bani-Hashemian]
    1194              : !> \author Mohammad Hossein Bani-Hashemian
    1195              : ! **************************************************************************************************
    1196          634 :    PURE FUNCTION flipud_bounds_local(bndsl_in, bounds) RESULT(bndsl_out)
    1197              : 
    1198              :       INTEGER, DIMENSION(2, 3), INTENT(IN)               :: bndsl_in, bounds
    1199              :       INTEGER, DIMENSION(2, 3)                           :: bndsl_out
    1200              : 
    1201          634 :       bndsl_out(1, 1) = 2*(bounds(2, 1) + 1) - bndsl_in(2, 1)
    1202          634 :       bndsl_out(2, 1) = 2*(bounds(2, 1) + 1) - bndsl_in(1, 1)
    1203          634 :       IF (bndsl_out(1, 1) .EQ. bounds(2, 1) + 2) bndsl_out(1, 1) = bndsl_out(1, 1) - 1
    1204          634 :       IF (bndsl_out(2, 1) .EQ. 2*(bounds(2, 1) + 1) - bounds(1, 1)) bndsl_out(2, 1) = bndsl_out(2, 1) - 1
    1205              : 
    1206          634 :       bndsl_out(1, 2) = bndsl_in(1, 2)
    1207          634 :       bndsl_out(2, 2) = bndsl_in(2, 2)
    1208              : 
    1209          634 :       bndsl_out(1, 3) = bndsl_in(1, 3)
    1210          634 :       bndsl_out(2, 3) = bndsl_in(2, 3)
    1211              : 
    1212          634 :    END FUNCTION flipud_bounds_local
    1213              : 
    1214              : ! **************************************************************************************************
    1215              : !> \brief   precalculates the local bounds of a 3d array after applying fliplr
    1216              : !> \param bndsl_in current local lower and upper bounds
    1217              : !> \param bounds global lower and upper bounds
    1218              : !> \return new local lower and upper bounds
    1219              : !> \par History
    1220              : !>       07.2014 created [Hossein Bani-Hashemian]
    1221              : !> \author Mohammad Hossein Bani-Hashemian
    1222              : ! **************************************************************************************************
    1223          646 :    PURE FUNCTION fliplr_bounds_local(bndsl_in, bounds) RESULT(bndsl_out)
    1224              : 
    1225              :       INTEGER, DIMENSION(2, 3), INTENT(IN)               :: bndsl_in, bounds
    1226              :       INTEGER, DIMENSION(2, 3)                           :: bndsl_out
    1227              : 
    1228          646 :       bndsl_out(1, 1) = bndsl_in(1, 1)
    1229          646 :       bndsl_out(2, 1) = bndsl_in(2, 1)
    1230              : 
    1231          646 :       bndsl_out(1, 2) = 2*(bounds(2, 2) + 1) - bndsl_in(2, 2)
    1232          646 :       bndsl_out(2, 2) = 2*(bounds(2, 2) + 1) - bndsl_in(1, 2)
    1233          646 :       IF (bndsl_out(1, 2) .EQ. bounds(2, 2) + 2) bndsl_out(1, 2) = bndsl_out(1, 2) - 1
    1234          646 :       IF (bndsl_out(2, 2) .EQ. 2*(bounds(2, 2) + 1) - bounds(1, 2)) bndsl_out(2, 2) = bndsl_out(2, 2) - 1
    1235              : 
    1236          646 :       bndsl_out(1, 3) = bndsl_in(1, 3)
    1237          646 :       bndsl_out(2, 3) = bndsl_in(2, 3)
    1238              : 
    1239          646 :    END FUNCTION fliplr_bounds_local
    1240              : 
    1241              : ! **************************************************************************************************
    1242              : !> \brief   precalculates the local bounds of a 3d array after applying flipbf
    1243              : !> \param bndsl_in current local lower and upper bounds
    1244              : !> \param bounds global lower and upper bounds
    1245              : !> \return new local lower and upper bounds
    1246              : !> \par History
    1247              : !>       07.2014 created [Hossein Bani-Hashemian]
    1248              : !> \author Mohammad Hossein Bani-Hashemian
    1249              : ! **************************************************************************************************
    1250          674 :    PURE FUNCTION flipbf_bounds_local(bndsl_in, bounds) RESULT(bndsl_out)
    1251              : 
    1252              :       INTEGER, DIMENSION(2, 3), INTENT(IN)               :: bndsl_in, bounds
    1253              :       INTEGER, DIMENSION(2, 3)                           :: bndsl_out
    1254              : 
    1255          674 :       bndsl_out(1, 1) = bndsl_in(1, 1)
    1256          674 :       bndsl_out(2, 1) = bndsl_in(2, 1)
    1257              : 
    1258          674 :       bndsl_out(1, 2) = bndsl_in(1, 2)
    1259          674 :       bndsl_out(2, 2) = bndsl_in(2, 2)
    1260              : 
    1261          674 :       bndsl_out(1, 3) = 2*(bounds(2, 3) + 1) - bndsl_in(2, 3)
    1262          674 :       bndsl_out(2, 3) = 2*(bounds(2, 3) + 1) - bndsl_in(1, 3)
    1263          674 :       IF (bndsl_out(1, 3) .EQ. bounds(2, 3) + 2) bndsl_out(1, 3) = bndsl_out(1, 3) - 1
    1264          674 :       IF (bndsl_out(2, 3) .EQ. 2*(bounds(2, 3) + 1) - bounds(1, 3)) bndsl_out(2, 3) = bndsl_out(2, 3) - 1
    1265              : 
    1266          674 :    END FUNCTION flipbf_bounds_local
    1267              : 
    1268              : ! **************************************************************************************************
    1269              : !> \brief   precalculates the local bounds of a 3d array after applying rot180
    1270              : !> \param bndsl_in current local lower and upper bounds
    1271              : !> \param bounds global lower and upper bounds
    1272              : !> \return new local lower and upper bounds
    1273              : !> \par History
    1274              : !>       07.2014 created [Hossein Bani-Hashemian]
    1275              : !> \author Mohammad Hossein Bani-Hashemian
    1276              : ! **************************************************************************************************
    1277          598 :    PURE FUNCTION rot180_bounds_local(bndsl_in, bounds) RESULT(bndsl_out)
    1278              : 
    1279              :       INTEGER, DIMENSION(2, 3), INTENT(IN)               :: bndsl_in, bounds
    1280              :       INTEGER, DIMENSION(2, 3)                           :: bndsl_out
    1281              : 
    1282          598 :       bndsl_out(1, 1) = 2*(bounds(2, 1) + 1) - bndsl_in(2, 1)
    1283          598 :       bndsl_out(2, 1) = 2*(bounds(2, 1) + 1) - bndsl_in(1, 1)
    1284          598 :       IF (bndsl_out(1, 1) .EQ. bounds(2, 1) + 2) bndsl_out(1, 1) = bndsl_out(1, 1) - 1
    1285          598 :       IF (bndsl_out(2, 1) .EQ. 2*(bounds(2, 1) + 1) - bounds(1, 1)) bndsl_out(2, 1) = bndsl_out(2, 1) - 1
    1286              : 
    1287          598 :       bndsl_out(1, 2) = 2*(bounds(2, 2) + 1) - bndsl_in(2, 2)
    1288          598 :       bndsl_out(2, 2) = 2*(bounds(2, 2) + 1) - bndsl_in(1, 2)
    1289          598 :       IF (bndsl_out(1, 2) .EQ. bounds(2, 2) + 2) bndsl_out(1, 2) = bndsl_out(1, 2) - 1
    1290          598 :       IF (bndsl_out(2, 2) .EQ. 2*(bounds(2, 2) + 1) - bounds(1, 2)) bndsl_out(2, 2) = bndsl_out(2, 2) - 1
    1291              : 
    1292          598 :       bndsl_out(1, 3) = bndsl_in(1, 3)
    1293          598 :       bndsl_out(2, 3) = bndsl_in(2, 3)
    1294              : 
    1295          598 :    END FUNCTION rot180_bounds_local
    1296              : 
    1297            0 : END MODULE dct
    1298              : 
        

Generated by: LCOV version 2.0-1