LCOV - code coverage report
Current view: top level - src/pw - fft_tools.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:34ef472) Lines: 717 1357 52.8 %
Date: 2024-04-26 08:30:29 Functions: 18 31 58.1 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \par History
      10             : !>      JGH (30-Nov-2000): ESSL FFT Library added
      11             : !>      JGH (05-Jan-2001): Added SGI library FFT
      12             : !>      JGH (14-Jan-2001): Added parallel 3d FFT
      13             : !>      JGH (10-Feb-2006): New interface type
      14             : !>      JGH (31-Mar-2008): Remove local allocates and reshapes (performance)
      15             : !>                         Possible problems can be related with setting arrays
      16             : !>                         not to zero
      17             : !>                         Some interfaces could be further simplified by avoiding
      18             : !>                         an initial copy. However, this assumes contiguous arrays
      19             : !>      IAB (15-Oct-2008): Moved mp_cart_sub calls out of cube_tranpose_* and into
      20             : !>                         fft_scratch type, reducing number of calls dramatically
      21             : !>      IAB (05-Dec-2008): Moved all other non-essential MPI calls into scratch type
      22             : !>      IAB (09-Jan-2009): Added fft_plan_type to store FFT data, including cached FFTW plans
      23             : !>      IAB (13-Feb-2009): Extended plan caching to serial 3D FFT (fft3d_s)
      24             : !>      IAB (09-Oct-2009): Added OpenMP directives to parallel 3D FFT
      25             : !>                         (c) The Numerical Algorithms Group (NAG) Ltd, 2008-2009 on behalf of the HECToR project
      26             : !> \author JGH
      27             : ! **************************************************************************************************
      28             : MODULE fft_tools
      29             :    USE ISO_C_BINDING,                   ONLY: C_F_POINTER,&
      30             :                                               C_LOC,&
      31             :                                               C_PTR,&
      32             :                                               C_SIZE_T
      33             :    USE cp_log_handling,                 ONLY: cp_logger_get_default_io_unit
      34             :    USE fft_lib,                         ONLY: &
      35             :         fft_1dm, fft_3d, fft_alloc, fft_create_plan_1dm, fft_create_plan_3d, fft_dealloc, &
      36             :         fft_destroy_plan, fft_do_cleanup, fft_do_init, fft_get_lengths, fft_library
      37             :    USE fft_plan,                        ONLY: fft_plan_type
      38             :    USE kinds,                           ONLY: dp,&
      39             :                                               dp_size,&
      40             :                                               sp
      41             :    USE mathconstants,                   ONLY: z_zero
      42             :    USE message_passing,                 ONLY: mp_cart_type,&
      43             :                                               mp_comm_null,&
      44             :                                               mp_comm_type,&
      45             :                                               mp_request_type,&
      46             :                                               mp_waitall
      47             :    USE offload_api,                     ONLY: offload_free_pinned_mem,&
      48             :                                               offload_malloc_pinned_mem
      49             : 
      50             : !$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads
      51             : 
      52             : #include "../base/base_uses.f90"
      53             : 
      54             :    IMPLICIT NONE
      55             : 
      56             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'fft_tools'
      57             : 
      58             :    ! Types for the pool of scratch data needed in FFT routines
      59             :    ! keep the subroutine "is_equal" up-to-date
      60             :    ! needs a default initialization
      61             :    TYPE fft_scratch_sizes
      62             :       INTEGER                              :: nx = 0, ny = 0, nz = 0
      63             :       INTEGER                              :: lmax = 0, mmax = 0, nmax = 0
      64             :       INTEGER                              :: mx1 = 0, mx2 = 0, mx3 = 0
      65             :       INTEGER                              :: my1 = 0, my2 = 0, my3 = 0
      66             :       INTEGER                              :: mz1 = 0, mz2 = 0, mz3 = 0
      67             :       INTEGER                              :: mcz1 = 0, mcz2 = 0, mcy3 = 0, mcx2 = 0
      68             :       INTEGER                              :: lg = 0, mg = 0
      69             :       INTEGER                              :: nbx = 0, nbz = 0
      70             :       INTEGER                              :: nmray = 0, nyzray = 0
      71             :       TYPE(mp_comm_type)                   :: gs_group = mp_comm_type()
      72             :       TYPE(mp_cart_type)                   :: rs_group = mp_cart_type()
      73             :       INTEGER, DIMENSION(2)                :: g_pos = 0, r_pos = 0, r_dim = 0
      74             :       INTEGER                              :: numtask = 0
      75             :    END TYPE fft_scratch_sizes
      76             : 
      77             :    TYPE fft_scratch_type
      78             :       INTEGER                              :: fft_scratch_id = 0
      79             :       INTEGER                              :: tf_type = -1
      80             :       LOGICAL                              :: in_use = .TRUE.
      81             :       TYPE(mp_comm_type)                   :: group = mp_comm_type()
      82             :       INTEGER, DIMENSION(3)                :: nfft = -1
      83             :       ! to be used in cube_transpose_* routines
      84             :       TYPE(mp_cart_type), DIMENSION(2)     :: cart_sub_comm = mp_cart_type()
      85             :       INTEGER, DIMENSION(2)                :: dim = -1, pos = -1
      86             :       ! to be used in fft3d_s
      87             :       COMPLEX(KIND=dp), DIMENSION(:, :, :), POINTER, CONTIGUOUS &
      88             :          :: ziptr => NULL(), zoptr => NULL()
      89             :       ! to be used in fft3d_ps : block distribution
      90             :       COMPLEX(KIND=dp), DIMENSION(:, :), CONTIGUOUS, POINTER &
      91             :          :: p1buf => NULL(), p2buf => NULL(), p3buf => NULL(), p4buf => NULL(), &
      92             :             p5buf => NULL(), p6buf => NULL(), p7buf => NULL()
      93             :       ! to be used in fft3d_ps : plane distribution
      94             :       COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS &
      95             :          :: r1buf => NULL(), r2buf => NULL()
      96             :       COMPLEX(KIND=dp), DIMENSION(:, :, :), POINTER, CONTIGUOUS &
      97             :          :: tbuf => NULL()
      98             :       ! to be used in fft3d_pb
      99             :       COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS &
     100             :          :: a1buf => NULL(), a2buf => NULL(), a3buf => NULL(), &
     101             :             a4buf => NULL(), a5buf => NULL(), a6buf => NULL()
     102             :       ! to be used in communication routines
     103             :       INTEGER, DIMENSION(:), CONTIGUOUS, POINTER       :: scount => NULL(), rcount => NULL(), sdispl => NULL(), rdispl => NULL()
     104             :       INTEGER, DIMENSION(:, :), CONTIGUOUS, POINTER     :: pgcube => NULL()
     105             :       INTEGER, DIMENSION(:), CONTIGUOUS, POINTER       :: xzcount => NULL(), yzcount => NULL(), xzdispl => NULL(), yzdispl => NULL()
     106             :       INTEGER                              :: in = 0, mip = -1
     107             :       REAL(KIND=dp)                        :: rsratio = 1.0_dp
     108             :       COMPLEX(KIND=dp), DIMENSION(:), POINTER, CONTIGUOUS &
     109             :          :: xzbuf => NULL(), yzbuf => NULL()
     110             :       COMPLEX(KIND=sp), DIMENSION(:), POINTER, CONTIGUOUS &
     111             :          :: xzbuf_sgl => NULL(), yzbuf_sgl => NULL()
     112             :       COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS &
     113             :          :: rbuf1 => NULL(), rbuf2 => NULL(), rbuf3 => NULL(), rbuf4 => NULL(), &
     114             :             rbuf5 => NULL(), rbuf6 => NULL(), rr => NULL()
     115             :       COMPLEX(KIND=sp), DIMENSION(:, :), POINTER, CONTIGUOUS &
     116             :          :: ss => NULL(), tt => NULL()
     117             :       INTEGER, DIMENSION(:, :), POINTER, CONTIGUOUS     :: pgrid => NULL()
     118             :       INTEGER, DIMENSION(:), POINTER, CONTIGUOUS       :: xcor => NULL(), zcor => NULL(), pzcoord => NULL()
     119             :       TYPE(fft_scratch_sizes)              :: sizes = fft_scratch_sizes()
     120             :       TYPE(fft_plan_type), DIMENSION(6)   :: fft_plan = fft_plan_type()
     121             :       INTEGER                              :: last_tick = -1
     122             :    END TYPE fft_scratch_type
     123             : 
     124             :    TYPE fft_scratch_pool_type
     125             :       TYPE(fft_scratch_type), POINTER       :: fft_scratch => NULL()
     126             :       TYPE(fft_scratch_pool_type), POINTER  :: fft_scratch_next => NULL()
     127             :    END TYPE fft_scratch_pool_type
     128             : 
     129             :    INTEGER, SAVE                           :: init_fft_pool = 0
     130             :    ! the clock for fft pool. Allows to identify the least recently used scratch
     131             :    INTEGER, SAVE                           :: tick_fft_pool = 0
     132             :    ! limit the number of scratch pools to fft_pool_scratch_limit.
     133             :    INTEGER, SAVE                           :: fft_pool_scratch_limit = 15
     134             :    TYPE(fft_scratch_pool_type), POINTER, SAVE:: fft_scratch_first
     135             :    ! END of types for the pool of scratch data needed in FFT routines
     136             : 
     137             :    PRIVATE
     138             :    PUBLIC :: init_fft, fft3d, finalize_fft
     139             :    PUBLIC :: fft_alloc, fft_dealloc
     140             :    PUBLIC :: fft_radix_operations, fft_fw1d
     141             :    PUBLIC :: FWFFT, BWFFT
     142             :    PUBLIC :: FFT_RADIX_CLOSEST, FFT_RADIX_NEXT
     143             :    PUBLIC :: FFT_RADIX_NEXT_ODD
     144             : 
     145             :    INTEGER, PARAMETER :: FWFFT = +1, BWFFT = -1
     146             :    INTEGER, PARAMETER :: FFT_RADIX_CLOSEST = 493, FFT_RADIX_NEXT = 494
     147             :    INTEGER, PARAMETER :: FFT_RADIX_ALLOWED = 495, FFT_RADIX_DISALLOWED = 496
     148             :    INTEGER, PARAMETER :: FFT_RADIX_NEXT_ODD = 497
     149             : 
     150             :    REAL(KIND=dp), PARAMETER :: ratio_sparse_alltoall = 0.5_dp
     151             : 
     152             :    ! these saved variables are FFT globals
     153             :    INTEGER, SAVE :: fft_type = 0
     154             :    LOGICAL, SAVE :: alltoall_sgl = .FALSE.
     155             :    LOGICAL, SAVE :: use_fftsg_sizes = .TRUE.
     156             :    INTEGER, SAVE :: fft_plan_style = 1
     157             : 
     158             :    ! these are only needed for pw_gpu (-D__OFFLOAD)
     159             :    PUBLIC :: get_fft_scratch, release_fft_scratch
     160             :    PUBLIC :: cube_transpose_1, cube_transpose_2
     161             :    PUBLIC :: yz_to_x, x_to_yz, xz_to_yz, yz_to_xz
     162             :    PUBLIC :: fft_scratch_sizes, fft_scratch_type
     163             :    PUBLIC :: fft_type, fft_plan_style
     164             : 
     165             :    INTERFACE fft3d
     166             :       MODULE PROCEDURE fft3d_s, fft3d_ps, fft3d_pb
     167             :    END INTERFACE
     168             : 
     169             : ! **************************************************************************************************
     170             : 
     171             : CONTAINS
     172             : 
     173             : ! **************************************************************************************************
     174             : !> \brief ...
     175             : !> \param fftlib ...
     176             : !> \param alltoall ...
     177             : !> \param fftsg_sizes ...
     178             : !> \param pool_limit ...
     179             : !> \param wisdom_file ...
     180             : !> \param plan_style ...
     181             : !> \author JGH
     182             : ! **************************************************************************************************
     183        9005 :    SUBROUTINE init_fft(fftlib, alltoall, fftsg_sizes, pool_limit, wisdom_file, &
     184             :                        plan_style)
     185             : 
     186             :       CHARACTER(LEN=*), INTENT(IN)                       :: fftlib
     187             :       LOGICAL, INTENT(IN)                                :: alltoall, fftsg_sizes
     188             :       INTEGER, INTENT(IN)                                :: pool_limit
     189             :       CHARACTER(LEN=*), INTENT(IN)                       :: wisdom_file
     190             :       INTEGER, INTENT(IN)                                :: plan_style
     191             : 
     192        9005 :       use_fftsg_sizes = fftsg_sizes
     193        9005 :       alltoall_sgl = alltoall
     194        9005 :       fft_pool_scratch_limit = pool_limit
     195        9005 :       fft_type = fft_library(fftlib)
     196        9005 :       fft_plan_style = plan_style
     197             : 
     198        9005 :       IF (fft_type <= 0) CPABORT("Unknown FFT library: "//TRIM(fftlib))
     199             : 
     200        9005 :       CALL fft_do_init(fft_type, wisdom_file)
     201             : 
     202             :       ! setup the FFT scratch pool, if one is associated, clear first
     203        9005 :       CALL release_fft_scratch_pool()
     204        9005 :       CALL init_fft_scratch_pool()
     205             : 
     206        9005 :    END SUBROUTINE init_fft
     207             : 
     208             : ! **************************************************************************************************
     209             : !> \brief does whatever is needed to finalize the current fft setup
     210             : !> \param para_env ...
     211             : !> \param wisdom_file ...
     212             : !> \par History
     213             : !>      10.2007 created [Joost VandeVondele]
     214             : ! **************************************************************************************************
     215        8795 :    SUBROUTINE finalize_fft(para_env, wisdom_file)
     216             :       CLASS(mp_comm_type)                    :: para_env
     217             :       CHARACTER(LEN=*), INTENT(IN)                       :: wisdom_file
     218             : 
     219             : ! release the FFT scratch pool
     220             : 
     221        8795 :       CALL release_fft_scratch_pool()
     222             : 
     223             :       ! finalize fft libs
     224             : 
     225        8795 :       CALL fft_do_cleanup(fft_type, wisdom_file, para_env%is_source())
     226             : 
     227        8795 :    END SUBROUTINE finalize_fft
     228             : 
     229             : ! **************************************************************************************************
     230             : !> \brief Determine the allowed lengths of FFT's   '''
     231             : !> \param radix_in ...
     232             : !> \param radix_out ...
     233             : !> \param operation ...
     234             : !> \par History
     235             : !>      new library structure (JGH)
     236             : !> \author Ari Seitsonen
     237             : ! **************************************************************************************************
     238      108338 :    SUBROUTINE fft_radix_operations(radix_in, radix_out, operation)
     239             : 
     240             :       INTEGER, INTENT(IN)                                :: radix_in
     241             :       INTEGER, INTENT(OUT)                               :: radix_out
     242             :       INTEGER, INTENT(IN)                                :: operation
     243             : 
     244             :       INTEGER, PARAMETER                                 :: fft_type_sg = 1
     245             : 
     246             :       INTEGER                                            :: i, iloc, ldata
     247      108338 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: DATA
     248             : 
     249      108338 :       ldata = 1024
     250      108338 :       ALLOCATE (DATA(ldata))
     251   111046450 :       DATA = -1
     252             : 
     253             :       ! if the user wants to use fftsg sizes go for it
     254      108338 :       IF (use_fftsg_sizes) THEN
     255      107834 :          CALL fft_get_lengths(fft_type_sg, DATA, ldata)
     256             :       ELSE
     257         504 :          CALL fft_get_lengths(fft_type, DATA, ldata)
     258             :       END IF
     259             : 
     260      108338 :       iloc = 0
     261     1199988 :       DO i = 1, ldata
     262     1199988 :          IF (DATA(i) == radix_in) THEN
     263             :             iloc = i
     264             :             EXIT
     265             :          ELSE
     266     1170580 :             IF (OPERATION == FFT_RADIX_ALLOWED) THEN
     267             :                CYCLE
     268     1170580 :             ELSE IF (DATA(i) > radix_in) THEN
     269             :                iloc = i
     270             :                EXIT
     271             :             END IF
     272             :          END IF
     273             :       END DO
     274             : 
     275      108338 :       IF (iloc == 0) THEN
     276           0 :          CPABORT("Index to radix array not found.")
     277             :       END IF
     278             : 
     279      108338 :       IF (OPERATION == FFT_RADIX_ALLOWED) THEN
     280           0 :          IF (DATA(iloc) == radix_in) THEN
     281           0 :             radix_out = FFT_RADIX_ALLOWED
     282             :          ELSE
     283           0 :             radix_out = FFT_RADIX_DISALLOWED
     284             :          END IF
     285             : 
     286      108338 :       ELSE IF (OPERATION == FFT_RADIX_CLOSEST) THEN
     287         270 :          IF (DATA(iloc) == radix_in) THEN
     288         102 :             radix_out = DATA(iloc)
     289             :          ELSE
     290         168 :             IF (ABS(DATA(iloc - 1) - radix_in) <= &
     291             :                 ABS(DATA(iloc) - radix_in)) THEN
     292         162 :                radix_out = DATA(iloc - 1)
     293             :             ELSE
     294           6 :                radix_out = DATA(iloc)
     295             :             END IF
     296             :          END IF
     297             : 
     298      108068 :       ELSE IF (OPERATION == FFT_RADIX_NEXT) THEN
     299      106700 :          radix_out = DATA(iloc)
     300             : 
     301        1368 :       ELSE IF (OPERATION == FFT_RADIX_NEXT_ODD) THEN
     302        3028 :          DO i = iloc, ldata
     303        3028 :             IF (MOD(DATA(i), 2) == 1) THEN
     304        1368 :                radix_out = DATA(i)
     305        1368 :                EXIT
     306             :             END IF
     307             :          END DO
     308        1368 :          IF (MOD(radix_out, 2) == 0) THEN
     309           0 :             CPABORT("No odd radix found.")
     310             :          END IF
     311             : 
     312             :       ELSE
     313           0 :          CPABORT("Disallowed radix operation.")
     314             :       END IF
     315             : 
     316      108338 :       DEALLOCATE (DATA)
     317             : 
     318      108338 :    END SUBROUTINE fft_radix_operations
     319             : 
     320             : ! **************************************************************************************************
     321             : !> \brief Performs m 1-D forward FFT-s of size n.
     322             : !> \param n      size of the FFT
     323             : !> \param m      number of FFT-s
     324             : !> \param trans  shape of input and output arrays: [n x m] if it is FALSE, [m x n] if it is TRUE
     325             : !> \param zin    input array
     326             : !> \param zout   output array
     327             : !> \param scale  scaling factor
     328             : !> \param stat   status of the operation, non-zero code indicates an error
     329             : ! **************************************************************************************************
     330         208 :    SUBROUTINE fft_fw1d(n, m, trans, zin, zout, scale, stat)
     331             :       INTEGER, INTENT(in)                                :: n, m
     332             :       LOGICAL, INTENT(in)                                :: trans
     333             :       COMPLEX(kind=dp), DIMENSION(*), INTENT(inout)      :: zin, zout
     334             :       REAL(kind=dp), INTENT(in)                          :: scale
     335             :       INTEGER, INTENT(out)                               :: stat
     336             : 
     337             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'fft_fw1d'
     338             : 
     339             :       INTEGER                                            :: handle
     340             :       TYPE(fft_plan_type)                                :: fft_plan
     341             : 
     342         208 :       CALL timeset(routineN, handle)
     343             : 
     344         208 :       IF (fft_type == 3) THEN
     345         208 :          CALL fft_create_plan_1dm(fft_plan, fft_type, FWFFT, trans, n, m, zin, zout, fft_plan_style)
     346         208 :          CALL fft_1dm(fft_plan, zin, zout, scale, stat)
     347         208 :          CALL fft_destroy_plan(fft_plan)
     348             :       ELSE
     349             :          CALL cp_warn(__LOCATION__, &
     350           0 :                       "FFT library in use cannot handle transformation of an arbitrary length.")
     351           0 :          stat = 1
     352             :       END IF
     353             : 
     354         208 :       CALL timestop(handle)
     355         832 :    END SUBROUTINE fft_fw1d
     356             : 
     357             : ! **************************************************************************************************
     358             : !> \brief Calls the 3D-FFT function from the initialized library
     359             : !> \param fsign ...
     360             : !> \param n ...
     361             : !> \param zin ...
     362             : !> \param zout ...
     363             : !> \param scale ...
     364             : !> \param status ...
     365             : !> \param debug ...
     366             : !> \par History
     367             : !>      none
     368             : !> \author JGH
     369             : ! **************************************************************************************************
     370      614828 :    SUBROUTINE fft3d_s(fsign, n, zin, zout, scale, status, debug)
     371             : 
     372             :       INTEGER, INTENT(IN)                                :: fsign
     373             :       INTEGER, DIMENSION(:), INTENT(INOUT)               :: n
     374             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
     375             :          INTENT(INOUT)                                   :: zin
     376             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
     377             :          INTENT(INOUT), OPTIONAL, TARGET                 :: zout
     378             :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: scale
     379             :       INTEGER, INTENT(OUT), OPTIONAL                     :: status
     380             :       LOGICAL, INTENT(IN), OPTIONAL                      :: debug
     381             : 
     382             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'fft3d_s'
     383             : 
     384             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
     385      614828 :          POINTER                                         :: zoptr
     386             :       COMPLEX(KIND=dp), DIMENSION(1, 1, 1), TARGET       :: zdum
     387             :       INTEGER                                            :: handle, ld(3), lo(3), output_unit, sign, &
     388             :                                                             stat
     389             :       LOGICAL                                            :: fft_in_place, test
     390             :       REAL(KIND=dp)                                      :: in_sum, norm, out_sum
     391             :       TYPE(fft_scratch_type), POINTER                    :: fft_scratch
     392             : 
     393      614828 :       CALL timeset(routineN, handle)
     394      614828 :       output_unit = cp_logger_get_default_io_unit()
     395             : 
     396      614828 :       IF (PRESENT(scale)) THEN
     397      577777 :          norm = scale
     398             :       ELSE
     399       37051 :          norm = 1.0_dp
     400             :       END IF
     401             : 
     402      614828 :       IF (PRESENT(debug)) THEN
     403      576905 :          test = debug
     404             :       ELSE
     405             :          test = .FALSE.
     406             :       END IF
     407             : 
     408      614828 :       IF (PRESENT(zout)) THEN
     409             :          fft_in_place = .FALSE.
     410             :       ELSE
     411      614820 :          fft_in_place = .TRUE.
     412             :       END IF
     413             : 
     414      614828 :       IF (test) THEN
     415           0 :          in_sum = SUM(ABS(zin))
     416             :       END IF
     417             : 
     418      614828 :       ld(1) = SIZE(zin, 1)
     419      614828 :       ld(2) = SIZE(zin, 2)
     420      614828 :       ld(3) = SIZE(zin, 3)
     421             : 
     422      614828 :       IF (n(1) /= ld(1) .OR. n(2) /= ld(2) .OR. n(3) /= ld(3)) THEN
     423           0 :          CPABORT("Size and dimension (zin) have to be the same.")
     424             :       END IF
     425             : 
     426      614828 :       sign = fsign
     427      614828 :       CALL get_fft_scratch(fft_scratch, tf_type=400, n=n)
     428             : 
     429      614828 :       IF (fft_in_place) THEN
     430      614820 :          zoptr => zdum
     431      614820 :          IF (fsign == FWFFT) THEN
     432      290340 :             CALL fft_3d(fft_scratch%fft_plan(1), norm, zin, zoptr, stat)
     433             :          ELSE
     434      324480 :             CALL fft_3d(fft_scratch%fft_plan(2), norm, zin, zoptr, stat)
     435             :          END IF
     436             :       ELSE
     437           8 :          IF (fsign == FWFFT) THEN
     438           4 :             CALL fft_3d(fft_scratch%fft_plan(3), norm, zin, zout, stat)
     439             :          ELSE
     440           4 :             CALL fft_3d(fft_scratch%fft_plan(4), norm, zin, zout, stat)
     441             :          END IF
     442             :       END IF
     443             : 
     444      614828 :       CALL release_fft_scratch(fft_scratch)
     445             : 
     446      614828 :       IF (PRESENT(zout)) THEN
     447           8 :          lo(1) = SIZE(zout, 1)
     448           8 :          lo(2) = SIZE(zout, 2)
     449           8 :          lo(3) = SIZE(zout, 3)
     450           8 :          IF (n(1) /= lo(1) .OR. n(2) /= lo(2) .OR. n(3) /= lo(3)) THEN
     451           0 :             CPABORT("Size and dimension (zout) have to be the same.")
     452             :          END IF
     453             :       END IF
     454             : 
     455      614828 :       IF (PRESENT(status)) THEN
     456        8993 :          status = stat
     457             :       END IF
     458             : 
     459      614828 :       IF (test .AND. output_unit > 0) THEN
     460           0 :          IF (PRESENT(zout)) THEN
     461           0 :             out_sum = SUM(ABS(zout))
     462           0 :             WRITE (output_unit, '(A)') "  Out of place 3D FFT (local)  : fft3d_s"
     463           0 :             WRITE (output_unit, '(A,T60,3I7)') "     Transform lengths ", n
     464           0 :             WRITE (output_unit, '(A,T60,3I7)') "     Input array dimensions ", ld
     465           0 :             WRITE (output_unit, '(A,T60,3I7)') "     Output array dimensions ", lo
     466           0 :             WRITE (output_unit, '(A,T61,E20.14)') "     Sum of input data ", in_sum
     467           0 :             WRITE (output_unit, '(A,T61,E20.14)') "     Sum of output data ", out_sum
     468             :          ELSE
     469           0 :             out_sum = SUM(ABS(zin))
     470           0 :             WRITE (output_unit, '(A)') "  In place 3D FFT (local)  : fft3d_s"
     471           0 :             WRITE (output_unit, '(A,T60,3I7)') "     Transform lengths ", n
     472           0 :             WRITE (output_unit, '(A,T60,3I7)') "     Input/output array dimensions ", ld
     473           0 :             WRITE (output_unit, '(A,T61,E20.14)') "     Sum of input data ", in_sum
     474           0 :             WRITE (output_unit, '(A,T61,E20.14)') "     Sum of output data ", out_sum
     475             :          END IF
     476             :       END IF
     477             : 
     478      614828 :       CALL timestop(handle)
     479             : 
     480      614828 :    END SUBROUTINE fft3d_s
     481             : 
     482             : ! **************************************************************************************************
     483             : !> \brief ...
     484             : !> \param fsign ...
     485             : !> \param n ...
     486             : !> \param cin ...
     487             : !> \param gin ...
     488             : !> \param gs_group ...
     489             : !> \param rs_group ...
     490             : !> \param yzp ...
     491             : !> \param nyzray ...
     492             : !> \param bo ...
     493             : !> \param scale ...
     494             : !> \param status ...
     495             : !> \param debug ...
     496             : ! **************************************************************************************************
     497     2615884 :    SUBROUTINE fft3d_ps(fsign, n, cin, gin, gs_group, rs_group, yzp, nyzray, &
     498     2615884 :                        bo, scale, status, debug)
     499             : 
     500             :       INTEGER, INTENT(IN)                                :: fsign
     501             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: n
     502             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
     503             :          INTENT(INOUT)                                   :: cin
     504             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
     505             :          INTENT(INOUT)                                   :: gin
     506             :       TYPE(mp_comm_type), INTENT(IN)                     :: gs_group
     507             :       TYPE(mp_cart_type), INTENT(IN)                     :: rs_group
     508             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
     509             :          INTENT(IN)                                      :: yzp
     510             :       INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN)     :: nyzray
     511             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:, :), &
     512             :          INTENT(IN)                                      :: bo
     513             :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: scale
     514             :       INTEGER, INTENT(OUT), OPTIONAL                     :: status
     515             :       LOGICAL, INTENT(IN), OPTIONAL                      :: debug
     516             : 
     517             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'fft3d_ps'
     518             : 
     519             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
     520     2615884 :          POINTER                                         :: pbuf, qbuf, rbuf, sbuf
     521             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
     522     2615884 :          POINTER                                         :: tbuf
     523             :       INTEGER :: g_pos, handle, lg, lmax, mcx2, mcz1, mcz2, mg, mmax, mx1, mx2, my1, mz2, n1, n2, &
     524             :          nmax, numtask, numtask_g, numtask_r, nx, ny, nz, output_unit, r_dim(2), r_pos(2), rp, &
     525             :          sign, stat
     526     2615884 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: p2p
     527             :       LOGICAL                                            :: test
     528             :       REAL(KIND=dp)                                      :: norm, sum_data
     529    18311188 :       TYPE(fft_scratch_sizes)                            :: fft_scratch_size
     530             :       TYPE(fft_scratch_type), POINTER                    :: fft_scratch
     531             : 
     532     2615884 :       CALL timeset(routineN, handle)
     533     2615884 :       output_unit = cp_logger_get_default_io_unit()
     534             : 
     535     2615884 :       IF (PRESENT(debug)) THEN
     536     2615884 :          test = debug
     537             :       ELSE
     538             :          test = .FALSE.
     539             :       END IF
     540             : 
     541     2615884 :       numtask_g = gs_group%num_pe
     542     2615884 :       g_pos = gs_group%mepos
     543     2615884 :       numtask_r = rs_group%num_pe
     544     7847652 :       r_dim = rs_group%num_pe_cart
     545     7847652 :       r_pos = rs_group%mepos_cart
     546     2615884 :       IF (numtask_g /= numtask_r) THEN
     547           0 :          CPABORT("Real space and G space groups are different.")
     548             :       END IF
     549     2615884 :       numtask = numtask_r
     550             : 
     551     2615884 :       IF (PRESENT(scale)) THEN
     552     2615884 :          norm = scale
     553             :       ELSE
     554           0 :          norm = 1.0_dp
     555             :       END IF
     556             : 
     557     2615884 :       sign = fsign
     558             : 
     559     2615884 :       lg = SIZE(gin, 1)
     560     2615884 :       mg = SIZE(gin, 2)
     561             : 
     562     2615884 :       nx = SIZE(cin, 1)
     563     2615884 :       ny = SIZE(cin, 2)
     564     2615884 :       nz = SIZE(cin, 3)
     565             : 
     566     2615884 :       IF (mg == 0) THEN
     567             :          mmax = 1
     568             :       ELSE
     569     2615884 :          mmax = mg
     570             :       END IF
     571     2615884 :       lmax = MAX(lg, (nx*ny*nz)/mmax + 1)
     572             : 
     573     7847652 :       ALLOCATE (p2p(0:numtask - 1))
     574             : 
     575     2615884 :       CALL gs_group%rank_compare(rs_group, p2p)
     576             : 
     577     2615884 :       rp = p2p(g_pos)
     578     2615884 :       mx1 = bo(2, 1, rp, 1) - bo(1, 1, rp, 1) + 1
     579     2615884 :       my1 = bo(2, 2, rp, 1) - bo(1, 2, rp, 1) + 1
     580     2615884 :       mx2 = bo(2, 1, rp, 2) - bo(1, 1, rp, 2) + 1
     581     2615884 :       mz2 = bo(2, 3, rp, 2) - bo(1, 3, rp, 2) + 1
     582             : 
     583     7847652 :       n1 = MAXVAL(bo(2, 1, :, 1) - bo(1, 1, :, 1) + 1)
     584     7847652 :       n2 = MAXVAL(bo(2, 2, :, 1) - bo(1, 2, :, 1) + 1)
     585     2615884 :       nmax = MAX((2*n2)/numtask, 2)*mx2*mz2
     586     7847652 :       nmax = MAX(nmax, n1*MAXVAL(nyzray))
     587     7847652 :       n1 = MAXVAL(bo(2, 1, :, 2))
     588     7847652 :       n2 = MAXVAL(bo(2, 3, :, 2))
     589             : 
     590     2615884 :       fft_scratch_size%nx = nx
     591     2615884 :       fft_scratch_size%ny = ny
     592     2615884 :       fft_scratch_size%nz = nz
     593     2615884 :       fft_scratch_size%lmax = lmax
     594     2615884 :       fft_scratch_size%mmax = mmax
     595     2615884 :       fft_scratch_size%mx1 = mx1
     596     2615884 :       fft_scratch_size%mx2 = mx2
     597     2615884 :       fft_scratch_size%my1 = my1
     598     2615884 :       fft_scratch_size%mz2 = mz2
     599     2615884 :       fft_scratch_size%lg = lg
     600     2615884 :       fft_scratch_size%mg = mg
     601     2615884 :       fft_scratch_size%nbx = n1
     602     2615884 :       fft_scratch_size%nbz = n2
     603     7847652 :       mcz1 = MAXVAL(bo(2, 3, :, 1) - bo(1, 3, :, 1) + 1)
     604     7847652 :       mcx2 = MAXVAL(bo(2, 1, :, 2) - bo(1, 1, :, 2) + 1)
     605     7847652 :       mcz2 = MAXVAL(bo(2, 3, :, 2) - bo(1, 3, :, 2) + 1)
     606     2615884 :       fft_scratch_size%mcz1 = mcz1
     607     2615884 :       fft_scratch_size%mcx2 = mcx2
     608     2615884 :       fft_scratch_size%mcz2 = mcz2
     609     2615884 :       fft_scratch_size%nmax = nmax
     610     7847652 :       fft_scratch_size%nmray = MAXVAL(nyzray)
     611     2615884 :       fft_scratch_size%nyzray = nyzray(g_pos)
     612     2615884 :       fft_scratch_size%gs_group = gs_group
     613     2615884 :       fft_scratch_size%rs_group = rs_group
     614     7847652 :       fft_scratch_size%g_pos = g_pos
     615     7847652 :       fft_scratch_size%r_pos = r_pos
     616     7847652 :       fft_scratch_size%r_dim = r_dim
     617     2615884 :       fft_scratch_size%numtask = numtask
     618             : 
     619     2615884 :       IF (test) THEN
     620           8 :          IF (g_pos == 0 .AND. output_unit > 0) THEN
     621           4 :             WRITE (output_unit, '(A)') "  Parallel 3D FFT : fft3d_ps"
     622           4 :             WRITE (output_unit, '(A,T60,3I7)') "     Transform lengths ", n
     623           4 :             WRITE (output_unit, '(A,T67,2I7)') "     Array dimensions (gin) ", lg, mg
     624           4 :             WRITE (output_unit, '(A,T60,3I7)') "     Array dimensions (cin) ", nx, ny, nz
     625             :          END IF
     626             :       END IF
     627             : 
     628     2615884 :       IF (r_dim(2) > 1) THEN
     629             : 
     630             :          !
     631             :          ! real space is distributed over x and y coordinate
     632             :          ! we have two stages of communication
     633             :          !
     634             : 
     635           0 :          IF (r_dim(1) == 1) THEN
     636           0 :             CPABORT("This processor distribution is not supported.")
     637             :          END IF
     638           0 :          CALL get_fft_scratch(fft_scratch, tf_type=300, n=n, fft_sizes=fft_scratch_size)
     639             : 
     640           0 :          IF (sign == FWFFT) THEN
     641             :             ! cin -> gin
     642             : 
     643           0 :             IF (test) THEN
     644           0 :                sum_data = ABS(SUM(cin))
     645           0 :                CALL gs_group%sum(sum_data)
     646           0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     647           0 :                   WRITE (output_unit, '(A)') "  Two step communication algorithm "
     648           0 :                   WRITE (output_unit, '(A,T60,3I7)') "     Transform Z ", n(3), mx1*my1
     649           0 :                   WRITE (output_unit, '(A,T60,3I7)') "     Transform Y ", n(2), mx2*mz2
     650           0 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform X ", n(1), nyzray(g_pos)
     651           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(1) ", sum_data
     652             :                END IF
     653             :             END IF
     654             : 
     655           0 :             pbuf => fft_scratch%p1buf
     656           0 :             qbuf => fft_scratch%p2buf
     657             : 
     658             :             ! FFT along z
     659           0 :             CALL fft_1dm(fft_scratch%fft_plan(1), cin, qbuf, norm, stat)
     660             : 
     661           0 :             rbuf => fft_scratch%p3buf
     662             : 
     663           0 :             IF (test) THEN
     664           0 :                sum_data = ABS(SUM(qbuf))
     665           0 :                CALL gs_group%sum(sum_data)
     666           0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     667           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(2) T", sum_data
     668             :                END IF
     669             :             END IF
     670             : 
     671             :             ! Exchange data ( transpose of matrix )
     672           0 :             CALL cube_transpose_2(qbuf, bo(:, :, :, 1), bo(:, :, :, 2), rbuf, fft_scratch)
     673             : 
     674           0 :             IF (test) THEN
     675           0 :                sum_data = ABS(SUM(rbuf))
     676           0 :                CALL gs_group%sum(sum_data)
     677           0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     678           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(3) T", sum_data
     679             :                END IF
     680             :             END IF
     681             : 
     682           0 :             pbuf => fft_scratch%p4buf
     683             : 
     684             :             ! FFT along y
     685           0 :             CALL fft_1dm(fft_scratch%fft_plan(2), rbuf, pbuf, 1.0_dp, stat)
     686             : 
     687           0 :             qbuf => fft_scratch%p5buf
     688             : 
     689           0 :             IF (test) THEN
     690           0 :                sum_data = ABS(SUM(pbuf))
     691           0 :                CALL gs_group%sum(sum_data)
     692           0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     693           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(4) TS", sum_data
     694             :                END IF
     695             :             END IF
     696             : 
     697             :             ! Exchange data ( transpose of matrix ) and sort
     698             :             CALL xz_to_yz(pbuf, rs_group, r_dim, g_pos, p2p, yzp, nyzray, &
     699           0 :                           bo(:, :, :, 2), qbuf, fft_scratch)
     700             : 
     701           0 :             IF (test) THEN
     702           0 :                sum_data = ABS(SUM(qbuf))
     703           0 :                CALL gs_group%sum(sum_data)
     704           0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     705           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(5) TS", sum_data
     706             :                END IF
     707             :             END IF
     708             : 
     709             :             ! FFT along x
     710           0 :             CALL fft_1dm(fft_scratch%fft_plan(3), qbuf, gin, 1.0_dp, stat)
     711             : 
     712           0 :             IF (test) THEN
     713           0 :                sum_data = ABS(SUM(gin))
     714           0 :                CALL gs_group%sum(sum_data)
     715           0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     716           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(6) ", sum_data
     717             :                END IF
     718             :             END IF
     719             : 
     720           0 :          ELSE IF (sign == BWFFT) THEN
     721             :             ! gin -> cin
     722             : 
     723           0 :             IF (test) THEN
     724           0 :                sum_data = ABS(SUM(gin))
     725           0 :                CALL gs_group%sum(sum_data)
     726           0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     727           0 :                   WRITE (output_unit, '(A)') "  Two step communication algorithm "
     728           0 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform X ", n(1), nyzray(g_pos)
     729           0 :                   WRITE (output_unit, '(A,T60,3I7)') "     Transform Y ", n(2), mx2*mz2
     730           0 :                   WRITE (output_unit, '(A,T60,3I7)') "     Transform Z ", n(3), mx1*my1
     731           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(1) ", sum_data
     732             :                END IF
     733             :             END IF
     734             : 
     735           0 :             pbuf => fft_scratch%p7buf
     736             : 
     737             :             ! FFT along x
     738           0 :             CALL fft_1dm(fft_scratch%fft_plan(4), gin, pbuf, norm, stat)
     739             : 
     740           0 :             qbuf => fft_scratch%p4buf
     741             : 
     742           0 :             IF (test) THEN
     743           0 :                sum_data = ABS(SUM(pbuf))
     744           0 :                CALL gs_group%sum(sum_data)
     745           0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     746           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(2) TS", sum_data
     747             :                END IF
     748             :             END IF
     749             : 
     750             :             ! Exchange data ( transpose of matrix ) and sort
     751             :             CALL yz_to_xz(pbuf, rs_group, r_dim, g_pos, p2p, yzp, nyzray, &
     752           0 :                           bo(:, :, :, 2), qbuf, fft_scratch)
     753             : 
     754           0 :             IF (test) THEN
     755           0 :                sum_data = ABS(SUM(qbuf))
     756           0 :                CALL gs_group%sum(sum_data)
     757           0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     758           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(3) TS", sum_data
     759             :                END IF
     760             :             END IF
     761             : 
     762           0 :             rbuf => fft_scratch%p3buf
     763             : 
     764             :             ! FFT along y
     765           0 :             CALL fft_1dm(fft_scratch%fft_plan(5), qbuf, rbuf, 1.0_dp, stat)
     766             : 
     767           0 :             pbuf => fft_scratch%p2buf
     768             : 
     769           0 :             IF (test) THEN
     770           0 :                sum_data = ABS(SUM(rbuf))
     771           0 :                CALL gs_group%sum(sum_data)
     772           0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     773           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(4) T", sum_data
     774             :                END IF
     775             :             END IF
     776             : 
     777             :             ! Exchange data ( transpose of matrix )
     778           0 :             CALL cube_transpose_1(rbuf, bo(:, :, :, 2), bo(:, :, :, 1), pbuf, fft_scratch)
     779             : 
     780           0 :             IF (test) THEN
     781           0 :                sum_data = ABS(SUM(pbuf))
     782           0 :                CALL gs_group%sum(sum_data)
     783           0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     784           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(5) T", sum_data
     785             :                END IF
     786             :             END IF
     787             : 
     788           0 :             qbuf => fft_scratch%p1buf
     789             : 
     790             :             ! FFT along z
     791           0 :             CALL fft_1dm(fft_scratch%fft_plan(6), pbuf, cin, 1.0_dp, stat)
     792             : 
     793           0 :             IF (test) THEN
     794           0 :                sum_data = ABS(SUM(cin))
     795           0 :                CALL gs_group%sum(sum_data)
     796           0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     797           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(6) ", sum_data
     798             :                END IF
     799             :             END IF
     800             : 
     801             :          ELSE
     802             : 
     803           0 :             CPABORT("Illegal fsign parameter.")
     804             : 
     805             :          END IF
     806             : 
     807           0 :          CALL release_fft_scratch(fft_scratch)
     808             : 
     809             :       ELSE
     810             : 
     811             :          !
     812             :          ! real space is only distributed over x coordinate
     813             :          ! we have one stage of communication, after the transform of
     814             :          ! direction x
     815             :          !
     816             : 
     817     2615884 :          CALL get_fft_scratch(fft_scratch, tf_type=200, n=n, fft_sizes=fft_scratch_size)
     818             : 
     819     2615884 :          sbuf => fft_scratch%r1buf
     820     2615884 :          tbuf => fft_scratch%tbuf
     821             : 
     822 79693857614 :          sbuf = z_zero
     823 80072547383 :          tbuf = z_zero
     824             : 
     825     2615884 :          IF (sign == FWFFT) THEN
     826             :             ! cin -> gin
     827             : 
     828     1291630 :             IF (test) THEN
     829        9284 :                sum_data = ABS(SUM(cin))
     830           4 :                CALL gs_group%sum(sum_data)
     831           4 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     832           2 :                   WRITE (output_unit, '(A)') "     One step communication algorithm "
     833           2 :                   WRITE (output_unit, '(A,T60,3I7)') "     Transform YZ ", n(2), n(3), nx
     834           2 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform X ", n(1), nyzray(g_pos)
     835           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(1) ", sum_data
     836             :                END IF
     837             :             END IF
     838             : 
     839             :             ! FFT along y and z
     840     1291630 :             CALL fft_1dm(fft_scratch%fft_plan(1), cin, sbuf, 1._dp, stat)
     841     1291630 :             CALL fft_1dm(fft_scratch%fft_plan(2), sbuf, tbuf, 1._dp, stat)
     842             : 
     843     1291630 :             IF (test) THEN
     844        8740 :                sum_data = ABS(SUM(tbuf))
     845           4 :                CALL gs_group%sum(sum_data)
     846           4 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     847           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(2) TS", sum_data
     848             :                END IF
     849             :             END IF
     850             : 
     851             :             ! Exchange data ( transpose of matrix ) and sort
     852             :             CALL yz_to_x(tbuf, gs_group, g_pos, p2p, yzp, nyzray, &
     853     1291630 :                          bo(:, :, :, 2), sbuf, fft_scratch)
     854             : 
     855     1291630 :             IF (test) THEN
     856        8776 :                sum_data = ABS(SUM(sbuf))
     857           4 :                CALL gs_group%sum(sum_data)
     858           4 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     859           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(3) TS", sum_data
     860             :                END IF
     861             :             END IF
     862             :             ! FFT along x
     863     1291630 :             CALL fft_1dm(fft_scratch%fft_plan(3), sbuf, gin, norm, stat)
     864             : 
     865     1291630 :             IF (test) THEN
     866        8708 :                sum_data = ABS(SUM(gin))
     867           4 :                CALL gs_group%sum(sum_data)
     868           4 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     869           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(4) ", sum_data
     870             :                END IF
     871             :             END IF
     872             : 
     873     1324254 :          ELSE IF (sign == BWFFT) THEN
     874             :             ! gin -> cin
     875             : 
     876     1324254 :             IF (test) THEN
     877        8708 :                sum_data = ABS(SUM(gin))
     878           4 :                CALL gs_group%sum(sum_data)
     879           4 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     880           2 :                   WRITE (output_unit, '(A)') "  One step communication algorithm "
     881           2 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform X ", n(1), nyzray(g_pos)
     882           2 :                   WRITE (output_unit, '(A,T60,3I7)') "     Transform YZ ", n(2), n(3), nx
     883           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(1) ", sum_data
     884             :                END IF
     885             :             END IF
     886             : 
     887             :             ! FFT along x
     888     1324254 :             CALL fft_1dm(fft_scratch%fft_plan(4), gin, sbuf, norm, stat)
     889             : 
     890     1324254 :             IF (test) THEN
     891        8776 :                sum_data = ABS(SUM(sbuf))
     892           4 :                CALL gs_group%sum(sum_data)
     893           4 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     894           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(2) TS", sum_data
     895             :                END IF
     896             :             END IF
     897             : 
     898             :             ! Exchange data ( transpose of matrix ) and sort
     899             :             CALL x_to_yz(sbuf, gs_group, g_pos, p2p, yzp, nyzray, &
     900     1324254 :                          bo(:, :, :, 2), tbuf, fft_scratch)
     901             : 
     902     1324254 :             IF (test) THEN
     903        8740 :                sum_data = ABS(SUM(tbuf))
     904           4 :                CALL gs_group%sum(sum_data)
     905           4 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     906           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(3) TS", sum_data
     907             :                END IF
     908             :             END IF
     909             : 
     910             :             ! FFT along y and z
     911     1324254 :             CALL fft_1dm(fft_scratch%fft_plan(5), tbuf, sbuf, 1._dp, stat)
     912     1324254 :             CALL fft_1dm(fft_scratch%fft_plan(6), sbuf, cin, 1._dp, stat)
     913             : 
     914     1324254 :             IF (test) THEN
     915        9284 :                sum_data = ABS(SUM(cin))
     916           4 :                CALL gs_group%sum(sum_data)
     917           4 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     918           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(4) ", sum_data
     919             :                END IF
     920             :             END IF
     921             :          ELSE
     922           0 :             CPABORT("Illegal fsign parameter.")
     923             :          END IF
     924             : 
     925     2615884 :          CALL release_fft_scratch(fft_scratch)
     926             : 
     927             :       END IF
     928             : 
     929     2615884 :       DEALLOCATE (p2p)
     930             : 
     931     2615884 :       IF (PRESENT(status)) THEN
     932           0 :          status = stat
     933             :       END IF
     934     2615884 :       CALL timestop(handle)
     935             : 
     936     5231768 :    END SUBROUTINE fft3d_ps
     937             : 
     938             : ! **************************************************************************************************
     939             : !> \brief ...
     940             : !> \param fsign ...
     941             : !> \param n ...
     942             : !> \param zin ...
     943             : !> \param gin ...
     944             : !> \param group ...
     945             : !> \param bo ...
     946             : !> \param scale ...
     947             : !> \param status ...
     948             : !> \param debug ...
     949             : ! **************************************************************************************************
     950         208 :    SUBROUTINE fft3d_pb(fsign, n, zin, gin, group, bo, scale, status, debug)
     951             : 
     952             :       INTEGER, INTENT(IN)                                :: fsign
     953             :       INTEGER, DIMENSION(3), INTENT(IN)                  :: n
     954             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
     955             :          INTENT(INOUT)                                   :: zin
     956             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
     957             :          INTENT(INOUT)                                   :: gin
     958             :       TYPE(mp_cart_type), INTENT(IN)                     :: group
     959             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:, :), &
     960             :          INTENT(IN)                                      :: bo
     961             :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: scale
     962             :       INTEGER, INTENT(OUT), OPTIONAL                     :: status
     963             :       LOGICAL, INTENT(IN), OPTIONAL                      :: debug
     964             : 
     965             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'fft3d_pb'
     966             : 
     967             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
     968         208 :          POINTER                                         :: abuf, bbuf
     969             :       INTEGER                                            :: handle, lg(2), lz(3), mcx2, mcy3, mcz1, &
     970             :                                                             mcz2, mx1, mx2, mx3, my1, my2, my3, &
     971             :                                                             my_pos, mz1, mz2, mz3, output_unit, &
     972             :                                                             sign, stat
     973             :       INTEGER, DIMENSION(2)                              :: dim
     974             :       LOGICAL                                            :: test
     975             :       REAL(KIND=dp)                                      :: norm, sum_data
     976        1456 :       TYPE(fft_scratch_sizes)                            :: fft_scratch_size
     977             :       TYPE(fft_scratch_type), POINTER                    :: fft_scratch
     978             : 
     979             : !------------------------------------------------------------------------------
     980             : ! "Real Space"  1) xyZ      or      1) xYZ
     981             : !               2) xYz      or         not used
     982             : ! "G Space"     3) Xyz      or      3) XYz
     983             : !
     984             : ! There is one communicator (2-dimensional) for all distributions
     985             : ! np = n1 * n2, where np is the total number of processors
     986             : ! If n2 = 1, we have the second case and only one transpose step is needed
     987             : !
     988             : ! Assignment of dimensions to axis for different steps
     989             : ! First case: 1) n1=x; n2=y
     990             : !             2) n1=x; n2=z
     991             : !             3) n1=y; n2=z
     992             : ! Second case 1) n1=x
     993             : !             3) n1=z
     994             : !
     995             : ! The more general case with two communicators for the initial and final
     996             : ! distribution is not covered.
     997             : !------------------------------------------------------------------------------
     998             : 
     999         208 :       CALL timeset(routineN, handle)
    1000         208 :       output_unit = cp_logger_get_default_io_unit()
    1001             : 
    1002         624 :       dim = group%num_pe_cart
    1003         208 :       my_pos = group%mepos
    1004             : 
    1005         208 :       IF (PRESENT(debug)) THEN
    1006         208 :          test = debug
    1007             :       ELSE
    1008             :          test = .FALSE.
    1009             :       END IF
    1010             : 
    1011         208 :       IF (PRESENT(scale)) THEN
    1012         208 :          norm = scale
    1013             :       ELSE
    1014           0 :          norm = 1.0_dp
    1015             :       END IF
    1016             : 
    1017         208 :       sign = fsign
    1018             : 
    1019         208 :       IF (test) THEN
    1020           8 :          lg(1) = SIZE(gin, 1)
    1021           8 :          lg(2) = SIZE(gin, 2)
    1022           8 :          lz(1) = SIZE(zin, 1)
    1023           8 :          lz(2) = SIZE(zin, 2)
    1024           8 :          lz(3) = SIZE(zin, 3)
    1025           8 :          IF (my_pos == 0 .AND. output_unit > 0) THEN
    1026           4 :             WRITE (output_unit, '(A)') "  Parallel 3D FFT : fft3d_pb"
    1027           4 :             WRITE (output_unit, '(A,T60,3I7)') "     Transform lengths ", n
    1028           4 :             WRITE (output_unit, '(A,T67,2I7)') "     Array dimensions (gin) ", lg
    1029           4 :             WRITE (output_unit, '(A,T60,3I7)') "     Array dimensions (cin) ", lz
    1030             :          END IF
    1031             :       END IF
    1032             : 
    1033         208 :       mx1 = bo(2, 1, my_pos, 1) - bo(1, 1, my_pos, 1) + 1
    1034         208 :       my1 = bo(2, 2, my_pos, 1) - bo(1, 2, my_pos, 1) + 1
    1035         208 :       mz1 = bo(2, 3, my_pos, 1) - bo(1, 3, my_pos, 1) + 1
    1036         208 :       mx2 = bo(2, 1, my_pos, 2) - bo(1, 1, my_pos, 2) + 1
    1037         208 :       my2 = bo(2, 2, my_pos, 2) - bo(1, 2, my_pos, 2) + 1
    1038         208 :       mz2 = bo(2, 3, my_pos, 2) - bo(1, 3, my_pos, 2) + 1
    1039         208 :       mx3 = bo(2, 1, my_pos, 3) - bo(1, 1, my_pos, 3) + 1
    1040         208 :       my3 = bo(2, 2, my_pos, 3) - bo(1, 2, my_pos, 3) + 1
    1041         208 :       mz3 = bo(2, 3, my_pos, 3) - bo(1, 3, my_pos, 3) + 1
    1042         208 :       fft_scratch_size%mx1 = mx1
    1043         208 :       fft_scratch_size%mx2 = mx2
    1044         208 :       fft_scratch_size%mx3 = mx3
    1045         208 :       fft_scratch_size%my1 = my1
    1046         208 :       fft_scratch_size%my2 = my2
    1047         208 :       fft_scratch_size%my3 = my3
    1048         208 :       fft_scratch_size%mz1 = mz1
    1049         208 :       fft_scratch_size%mz2 = mz2
    1050         208 :       fft_scratch_size%mz3 = mz3
    1051         624 :       mcz1 = MAXVAL(bo(2, 3, :, 1) - bo(1, 3, :, 1) + 1)
    1052         624 :       mcx2 = MAXVAL(bo(2, 1, :, 2) - bo(1, 1, :, 2) + 1)
    1053         624 :       mcz2 = MAXVAL(bo(2, 3, :, 2) - bo(1, 3, :, 2) + 1)
    1054         624 :       mcy3 = MAXVAL(bo(2, 2, :, 3) - bo(1, 2, :, 3) + 1)
    1055         208 :       fft_scratch_size%mcz1 = mcz1
    1056         208 :       fft_scratch_size%mcx2 = mcx2
    1057         208 :       fft_scratch_size%mcz2 = mcz2
    1058         208 :       fft_scratch_size%mcy3 = mcy3
    1059         208 :       fft_scratch_size%gs_group = group
    1060         208 :       fft_scratch_size%rs_group = group
    1061         624 :       fft_scratch_size%g_pos = my_pos
    1062         208 :       fft_scratch_size%numtask = DIM(1)*DIM(2)
    1063             : 
    1064         208 :       IF (DIM(1) > 1 .AND. DIM(2) > 1) THEN
    1065             : 
    1066             :          !
    1067             :          ! First case; two stages of communication
    1068             :          !
    1069             : 
    1070           0 :          CALL get_fft_scratch(fft_scratch, tf_type=100, n=n, fft_sizes=fft_scratch_size)
    1071             : 
    1072           0 :          IF (sign == FWFFT) THEN
    1073             :             ! Stage 1 -> 3
    1074             : 
    1075           0 :             bbuf => fft_scratch%a2buf
    1076             : 
    1077           0 :             IF (test) THEN
    1078           0 :                sum_data = ABS(SUM(zin))
    1079           0 :                CALL group%sum(sum_data)
    1080           0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1081           0 :                   WRITE (output_unit, '(A)') "  Two step communication algorithm "
    1082           0 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform Z ", n(3), mx1*my1
    1083           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(1) ", sum_data
    1084             :                END IF
    1085             :             END IF
    1086             : 
    1087             :             ! FFT along z
    1088           0 :             CALL fft_1dm(fft_scratch%fft_plan(1), zin, bbuf, norm, stat)
    1089             : 
    1090           0 :             abuf => fft_scratch%a3buf
    1091             : 
    1092           0 :             IF (test) THEN
    1093           0 :                sum_data = ABS(SUM(bbuf))
    1094           0 :                CALL group%sum(sum_data)
    1095           0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1096           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(2) T", sum_data
    1097             :                END IF
    1098             :             END IF
    1099             : 
    1100           0 :             CALL cube_transpose_2(bbuf, bo(:, :, :, 1), bo(:, :, :, 2), abuf, fft_scratch)
    1101             : 
    1102           0 :             bbuf => fft_scratch%a4buf
    1103             : 
    1104           0 :             IF (test) THEN
    1105           0 :                sum_data = ABS(SUM(abuf))
    1106           0 :                CALL group%sum(sum_data)
    1107           0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1108           0 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform Y ", n(2), mx2*mz2
    1109           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(3) ", sum_data
    1110             :                END IF
    1111             :             END IF
    1112             : 
    1113             :             ! FFT along y
    1114           0 :             CALL fft_1dm(fft_scratch%fft_plan(2), abuf, bbuf, 1.0_dp, stat)
    1115             : 
    1116           0 :             abuf => fft_scratch%a5buf
    1117             : 
    1118           0 :             IF (test) THEN
    1119           0 :                sum_data = ABS(SUM(bbuf))
    1120           0 :                CALL group%sum(sum_data)
    1121           0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1122           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(4) T", sum_data
    1123             :                END IF
    1124             :             END IF
    1125             : 
    1126           0 :             CALL cube_transpose_4(bbuf, bo(:, :, :, 2), bo(:, :, :, 3), abuf, fft_scratch)
    1127             : 
    1128           0 :             IF (test) THEN
    1129           0 :                sum_data = ABS(SUM(abuf))
    1130           0 :                CALL group%sum(sum_data)
    1131           0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1132           0 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform X ", n(1), my3*mz3
    1133           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(5) ", sum_data
    1134             :                END IF
    1135             :             END IF
    1136             : 
    1137             :             ! FFT along x
    1138           0 :             CALL fft_1dm(fft_scratch%fft_plan(3), abuf, gin, 1.0_dp, stat)
    1139             : 
    1140           0 :             IF (test) THEN
    1141           0 :                sum_data = ABS(SUM(gin))
    1142           0 :                CALL group%sum(sum_data)
    1143           0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1144           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(6) ", sum_data
    1145             :                END IF
    1146             :             END IF
    1147             : 
    1148           0 :          ELSEIF (sign == BWFFT) THEN
    1149             :             ! Stage 3 -> 1
    1150             : 
    1151           0 :             bbuf => fft_scratch%a5buf
    1152             : 
    1153           0 :             IF (test) THEN
    1154           0 :                sum_data = ABS(SUM(gin))
    1155           0 :                CALL group%sum(sum_data)
    1156           0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1157           0 :                   WRITE (output_unit, '(A)') "  Two step communication algorithm "
    1158           0 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform X ", n(1), my3*mz3
    1159           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(1) ", sum_data
    1160             :                END IF
    1161             :             END IF
    1162             : 
    1163             :             ! FFT along x
    1164           0 :             CALL fft_1dm(fft_scratch%fft_plan(4), gin, bbuf, 1.0_dp, stat)
    1165             : 
    1166           0 :             abuf => fft_scratch%a4buf
    1167             : 
    1168           0 :             IF (test) THEN
    1169           0 :                sum_data = ABS(SUM(bbuf))
    1170           0 :                CALL group%sum(sum_data)
    1171           0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1172           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(2) T", sum_data
    1173             :                END IF
    1174             :             END IF
    1175             : 
    1176           0 :             CALL cube_transpose_3(bbuf, bo(:, :, :, 3), bo(:, :, :, 2), abuf, fft_scratch)
    1177             : 
    1178           0 :             bbuf => fft_scratch%a3buf
    1179             : 
    1180           0 :             IF (test) THEN
    1181           0 :                sum_data = ABS(SUM(abuf))
    1182           0 :                CALL group%sum(sum_data)
    1183           0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1184           0 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform Y ", n(2), mx2*mz2
    1185           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(3) ", sum_data
    1186             :                END IF
    1187             :             END IF
    1188             : 
    1189             :             ! FFT along y
    1190           0 :             CALL fft_1dm(fft_scratch%fft_plan(5), abuf, bbuf, 1.0_dp, stat)
    1191             : 
    1192           0 :             abuf => fft_scratch%a2buf
    1193             : 
    1194           0 :             IF (test) THEN
    1195           0 :                sum_data = ABS(SUM(bbuf))
    1196           0 :                CALL group%sum(sum_data)
    1197           0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1198           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(4) T", sum_data
    1199             :                END IF
    1200             :             END IF
    1201             : 
    1202           0 :             CALL cube_transpose_1(bbuf, bo(:, :, :, 2), bo(:, :, :, 1), abuf, fft_scratch)
    1203             : 
    1204           0 :             IF (test) THEN
    1205           0 :                sum_data = ABS(SUM(abuf))
    1206           0 :                CALL group%sum(sum_data)
    1207           0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1208           0 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform Z ", n(3), mx1*my1
    1209           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(5) ", sum_data
    1210             :                END IF
    1211             :             END IF
    1212             : 
    1213             :             ! FFT along z
    1214           0 :             CALL fft_1dm(fft_scratch%fft_plan(6), abuf, zin, norm, stat)
    1215             : 
    1216           0 :             IF (test) THEN
    1217           0 :                sum_data = ABS(SUM(zin))
    1218           0 :                CALL group%sum(sum_data)
    1219           0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1220           0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(6) ", sum_data
    1221             :                END IF
    1222             :             END IF
    1223             : 
    1224             :          ELSE
    1225           0 :             CPABORT("Illegal fsign parameter.")
    1226             :          END IF
    1227             : 
    1228           0 :          CALL release_fft_scratch(fft_scratch)
    1229             : 
    1230         208 :       ELSEIF (DIM(2) == 1) THEN
    1231             : 
    1232             :          !
    1233             :          ! Second case; one stage of communication
    1234             :          !
    1235             : 
    1236         208 :          CALL get_fft_scratch(fft_scratch, tf_type=101, n=n, fft_sizes=fft_scratch_size)
    1237             : 
    1238         208 :          IF (sign == FWFFT) THEN
    1239             :             ! Stage 1 -> 3
    1240             : 
    1241         104 :             IF (test) THEN
    1242        9284 :                sum_data = ABS(SUM(zin))
    1243           4 :                CALL group%sum(sum_data)
    1244           4 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1245           2 :                   WRITE (output_unit, '(A)') "  one step communication algorithm "
    1246           2 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform Z ", n(3), mx1*my1
    1247           2 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform Y ", n(2), mx1*mz1
    1248           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(1) ", sum_data
    1249             :                END IF
    1250             :             END IF
    1251             : 
    1252         104 :             abuf => fft_scratch%a3buf
    1253         104 :             bbuf => fft_scratch%a4buf
    1254             :             ! FFT along z and y
    1255         104 :             CALL fft_1dm(fft_scratch%fft_plan(1), zin, abuf, norm, stat)
    1256         104 :             CALL fft_1dm(fft_scratch%fft_plan(2), abuf, bbuf, 1.0_dp, stat)
    1257             : 
    1258         104 :             abuf => fft_scratch%a5buf
    1259             : 
    1260         104 :             IF (test) THEN
    1261        8708 :                sum_data = ABS(SUM(bbuf))
    1262           4 :                CALL group%sum(sum_data)
    1263           4 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1264           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(2) T", sum_data
    1265             :                END IF
    1266             :             END IF
    1267             : 
    1268         104 :             CALL cube_transpose_6(bbuf, group, bo(:, :, :, 1), bo(:, :, :, 3), abuf, fft_scratch)
    1269             : 
    1270         104 :             IF (test) THEN
    1271        8260 :                sum_data = ABS(SUM(abuf))
    1272           4 :                CALL group%sum(sum_data)
    1273           4 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1274           2 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform X ", n(1), my3*mz3
    1275           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(3) ", sum_data
    1276             :                END IF
    1277             :             END IF
    1278             : 
    1279             :             ! FFT along x
    1280         104 :             CALL fft_1dm(fft_scratch%fft_plan(3), abuf, gin, 1.0_dp, stat)
    1281             : 
    1282         104 :             IF (test) THEN
    1283        8708 :                sum_data = ABS(SUM(gin))
    1284           4 :                CALL group%sum(sum_data)
    1285           4 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1286           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(4) ", sum_data
    1287             :                END IF
    1288             :             END IF
    1289             : 
    1290         104 :          ELSEIF (sign == BWFFT) THEN
    1291             :             ! Stage 3 -> 1
    1292             : 
    1293         104 :             IF (test) THEN
    1294        8708 :                sum_data = ABS(SUM(gin))
    1295           4 :                CALL group%sum(sum_data)
    1296           4 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1297           2 :                   WRITE (output_unit, '(A)') "  one step communication algorithm "
    1298           2 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform X ", n(1), my3*mz3
    1299           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(1) ", sum_data
    1300             :                END IF
    1301             :             END IF
    1302             : 
    1303         104 :             bbuf => fft_scratch%a5buf
    1304             : 
    1305             :             ! FFT along x
    1306         104 :             CALL fft_1dm(fft_scratch%fft_plan(4), gin, bbuf, 1.0_dp, stat)
    1307             : 
    1308         104 :             abuf => fft_scratch%a4buf
    1309             : 
    1310         104 :             IF (test) THEN
    1311        8260 :                sum_data = ABS(SUM(bbuf))
    1312           4 :                CALL group%sum(sum_data)
    1313           4 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1314           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(2) T", sum_data
    1315             :                END IF
    1316             :             END IF
    1317             : 
    1318         104 :             CALL cube_transpose_5(bbuf, group, bo(:, :, :, 3), bo(:, :, :, 1), abuf, fft_scratch)
    1319             : 
    1320         104 :             bbuf => fft_scratch%a3buf
    1321             : 
    1322         104 :             IF (test) THEN
    1323        8708 :                sum_data = ABS(SUM(abuf))
    1324           4 :                CALL group%sum(sum_data)
    1325           4 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1326           2 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform Y ", n(2), mx1*mz1
    1327           2 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform Z ", n(3), mx1*my1
    1328           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(3) ", sum_data
    1329             :                END IF
    1330             :             END IF
    1331             : 
    1332             :             ! FFT along y
    1333         104 :             CALL fft_1dm(fft_scratch%fft_plan(5), abuf, bbuf, 1.0_dp, stat)
    1334             : 
    1335             :             ! FFT along z
    1336         104 :             CALL fft_1dm(fft_scratch%fft_plan(6), bbuf, zin, norm, stat)
    1337             : 
    1338         104 :             IF (test) THEN
    1339        9284 :                sum_data = ABS(SUM(zin))
    1340           4 :                CALL group%sum(sum_data)
    1341           4 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1342           2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(4) ", sum_data
    1343             :                END IF
    1344             :             END IF
    1345             : 
    1346             :          ELSE
    1347           0 :             CPABORT("Illegal fsign parameter.")
    1348             :          END IF
    1349             : 
    1350         208 :          CALL release_fft_scratch(fft_scratch)
    1351             : 
    1352             :       ELSE
    1353             : 
    1354           0 :          CPABORT("This partition not implemented.")
    1355             : 
    1356             :       END IF
    1357             : 
    1358         208 :       IF (PRESENT(status)) THEN
    1359           0 :          status = stat
    1360             :       END IF
    1361             : 
    1362         208 :       CALL timestop(handle)
    1363             : 
    1364         416 :    END SUBROUTINE fft3d_pb
    1365             : 
    1366             : ! **************************************************************************************************
    1367             : !> \brief ...
    1368             : !> \param sb ...
    1369             : !> \param group ...
    1370             : !> \param my_pos ...
    1371             : !> \param p2p ...
    1372             : !> \param yzp ...
    1373             : !> \param nray ...
    1374             : !> \param bo ...
    1375             : !> \param tb ...
    1376             : !> \param fft_scratch ...
    1377             : !> \par History
    1378             : !>      15. Feb. 2006 : single precision all_to_all
    1379             : !> \author JGH (14-Jan-2001)
    1380             : ! **************************************************************************************************
    1381     1324254 :    SUBROUTINE x_to_yz(sb, group, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
    1382             : 
    1383             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    1384             :          INTENT(IN)                                      :: sb
    1385             :       TYPE(mp_comm_type), INTENT(IN)                     :: group
    1386             :       INTEGER, INTENT(IN)                                :: my_pos
    1387             :       INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN)     :: p2p
    1388             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
    1389             :          INTENT(IN)                                      :: yzp
    1390             :       INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN)     :: nray
    1391             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
    1392             :          INTENT(IN)                                      :: bo
    1393             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
    1394             :          INTENT(INOUT)                                   :: tb
    1395             :       TYPE(fft_scratch_type), INTENT(IN)                 :: fft_scratch
    1396             : 
    1397             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'x_to_yz'
    1398             : 
    1399             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    1400     1324254 :          POINTER                                         :: rr
    1401             :       COMPLEX(KIND=sp), CONTIGUOUS, DIMENSION(:, :), &
    1402     1324254 :          POINTER                                         :: ss, tt
    1403             :       INTEGER                                            :: handle, ip, ir, ix, ixx, iy, iz, mpr, &
    1404             :                                                             nm, np, nr, nx
    1405     1324254 :       INTEGER, CONTIGUOUS, DIMENSION(:), POINTER         :: rcount, rdispl, scount, sdispl
    1406             : 
    1407     1324254 :       CALL timeset(routineN, handle)
    1408             : 
    1409     1324254 :       np = SIZE(p2p)
    1410     1324254 :       scount => fft_scratch%scount
    1411     1324254 :       rcount => fft_scratch%rcount
    1412     1324254 :       sdispl => fft_scratch%sdispl
    1413     1324254 :       rdispl => fft_scratch%rdispl
    1414             : 
    1415     1324254 :       IF (alltoall_sgl) THEN
    1416         230 :          ss => fft_scratch%ss
    1417         230 :          tt => fft_scratch%tt
    1418     3441099 :          ss(:, :) = CMPLX(sb(:, :), KIND=sp)
    1419     3353860 :          tt(:, :) = 0._sp
    1420             :       ELSE
    1421     1324024 :          rr => fft_scratch%rr
    1422             :       END IF
    1423             : 
    1424     1324254 :       mpr = p2p(my_pos)
    1425     3972762 :       nm = MAXVAL(nray(0:np - 1))
    1426     1324254 :       nr = nray(my_pos)
    1427             : !$OMP PARALLEL DO DEFAULT(NONE), &
    1428             : !$OMP             PRIVATE(ix,nx), &
    1429     1324254 : !$OMP             SHARED(np,p2p,bo,nr,scount,sdispl)
    1430             :       DO ip = 0, np - 1
    1431             :          ix = p2p(ip)
    1432             :          nx = bo(2, 1, ix) - bo(1, 1, ix) + 1
    1433             :          scount(ip) = nr*nx
    1434             :          sdispl(ip) = nr*(bo(1, 1, ix) - 1)
    1435             :       END DO
    1436             : !$OMP END PARALLEL DO
    1437     1324254 :       nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
    1438             : !$OMP PARALLEL DO DEFAULT(NONE), &
    1439             : !$OMP             PRIVATE(nr), &
    1440     1324254 : !$OMP             SHARED(np,nray,nx,rcount,rdispl,nm)
    1441             :       DO ip = 0, np - 1
    1442             :          nr = nray(ip)
    1443             :          rcount(ip) = nr*nx
    1444             :          rdispl(ip) = nm*nx*ip
    1445             :       END DO
    1446             : !$OMP END PARALLEL DO
    1447     1324254 :       IF (alltoall_sgl) THEN
    1448         230 :          CALL group%alltoall(ss, scount, sdispl, tt, rcount, rdispl)
    1449             :       ELSE
    1450     1324024 :          CALL group%alltoall(sb, scount, sdispl, rr, rcount, rdispl)
    1451             :       END IF
    1452             : 
    1453     1324254 :       nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
    1454             : !$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
    1455             : !$OMP             PRIVATE(ixx,ir,iy,iz,ix) &
    1456     1324254 : !$OMP             SHARED(np,nray,nx,alltoall_sgl,yzp,tt,rr,tb)
    1457             :       DO ip = 0, np - 1
    1458             :          DO ix = 1, nx
    1459             :             ixx = nray(ip)*(ix - 1)
    1460             :             IF (alltoall_sgl) THEN
    1461             :                DO ir = 1, nray(ip)
    1462             :                   iy = yzp(1, ir, ip)
    1463             :                   iz = yzp(2, ir, ip)
    1464             :                   tb(iy, iz, ix) = tt(ir + ixx, ip)
    1465             :                END DO
    1466             :             ELSE
    1467             :                DO ir = 1, nray(ip)
    1468             :                   iy = yzp(1, ir, ip)
    1469             :                   iz = yzp(2, ir, ip)
    1470             :                   tb(iy, iz, ix) = rr(ir + ixx, ip)
    1471             :                END DO
    1472             :             END IF
    1473             :          END DO
    1474             :       END DO
    1475             : !$OMP END PARALLEL DO
    1476             : 
    1477     1324254 :       CALL timestop(handle)
    1478             : 
    1479     1324254 :    END SUBROUTINE x_to_yz
    1480             : 
    1481             : ! **************************************************************************************************
    1482             : !> \brief ...
    1483             : !> \param tb ...
    1484             : !> \param group ...
    1485             : !> \param my_pos ...
    1486             : !> \param p2p ...
    1487             : !> \param yzp ...
    1488             : !> \param nray ...
    1489             : !> \param bo ...
    1490             : !> \param sb ...
    1491             : !> \param fft_scratch ...
    1492             : !> \par History
    1493             : !>      15. Feb. 2006 : single precision all_to_all
    1494             : !> \author JGH (14-Jan-2001)
    1495             : ! **************************************************************************************************
    1496     1291630 :    SUBROUTINE yz_to_x(tb, group, my_pos, p2p, yzp, nray, bo, sb, fft_scratch)
    1497             : 
    1498             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
    1499             :          INTENT(IN)                                      :: tb
    1500             :       TYPE(mp_comm_type), INTENT(IN)                     :: group
    1501             :       INTEGER, INTENT(IN)                                :: my_pos
    1502             :       INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN)     :: p2p
    1503             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
    1504             :          INTENT(IN)                                      :: yzp
    1505             :       INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN)     :: nray
    1506             :       INTEGER, DIMENSION(:, :, 0:), INTENT(IN)           :: bo
    1507             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    1508             :          INTENT(INOUT)                                   :: sb
    1509             :       TYPE(fft_scratch_type), INTENT(IN)                 :: fft_scratch
    1510             : 
    1511             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'yz_to_x'
    1512             : 
    1513             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    1514     1291630 :          POINTER                                         :: rr
    1515             :       COMPLEX(KIND=sp), CONTIGUOUS, DIMENSION(:, :), &
    1516     1291630 :          POINTER                                         :: ss, tt
    1517             :       INTEGER                                            :: handle, ip, ir, ix, ixx, iy, iz, mpr, &
    1518             :                                                             nm, np, nr, nx
    1519     1291630 :       INTEGER, CONTIGUOUS, DIMENSION(:), POINTER         :: rcount, rdispl, scount, sdispl
    1520             : 
    1521     1291630 :       CALL timeset(routineN, handle)
    1522             : 
    1523     1291630 :       np = SIZE(p2p)
    1524     1291630 :       mpr = p2p(my_pos)
    1525     1291630 :       scount => fft_scratch%scount
    1526     1291630 :       rcount => fft_scratch%rcount
    1527     1291630 :       sdispl => fft_scratch%sdispl
    1528     1291630 :       rdispl => fft_scratch%rdispl
    1529             : 
    1530     1291630 :       IF (alltoall_sgl) THEN
    1531         238 :          ss => fft_scratch%ss
    1532         238 :          tt => fft_scratch%tt
    1533     3703835 :          ss = 0._sp
    1534     3609884 :          tt = 0._sp
    1535             :       ELSE
    1536     1291392 :          rr => fft_scratch%rr
    1537             :       END IF
    1538             : 
    1539     1291630 :       nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
    1540             : !$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
    1541             : !$OMP             PRIVATE(ip, ixx, ir, iy, iz, ix) &
    1542     1291630 : !$OMP             SHARED(np,nray,nx,alltoall_sgl,yzp,tb,tt,rr)
    1543             :       DO ip = 0, np - 1
    1544             :          DO ix = 1, nx
    1545             :             ixx = nray(ip)*(ix - 1)
    1546             :             IF (alltoall_sgl) THEN
    1547             :                DO ir = 1, nray(ip)
    1548             :                   iy = yzp(1, ir, ip)
    1549             :                   iz = yzp(2, ir, ip)
    1550             :                   tt(ir + ixx, ip) = CMPLX(tb(iy, iz, ix), KIND=sp)
    1551             :                END DO
    1552             :             ELSE
    1553             :                DO ir = 1, nray(ip)
    1554             :                   iy = yzp(1, ir, ip)
    1555             :                   iz = yzp(2, ir, ip)
    1556             :                   rr(ir + ixx, ip) = tb(iy, iz, ix)
    1557             :                END DO
    1558             :             END IF
    1559             :          END DO
    1560             :       END DO
    1561             : !$OMP END PARALLEL DO
    1562     3874890 :       nm = MAXVAL(nray(0:np - 1))
    1563     1291630 :       nr = nray(my_pos)
    1564             : !$OMP PARALLEL DO DEFAULT(NONE), &
    1565             : !$OMP             PRIVATE(ix,nx), &
    1566     1291630 : !$OMP             SHARED(np,p2p,bo,rcount,rdispl,nr)
    1567             :       DO ip = 0, np - 1
    1568             :          ix = p2p(ip)
    1569             :          nx = bo(2, 1, ix) - bo(1, 1, ix) + 1
    1570             :          rcount(ip) = nr*nx
    1571             :          rdispl(ip) = nr*(bo(1, 1, ix) - 1)
    1572             :       END DO
    1573             : !$OMP END PARALLEL DO
    1574     1291630 :       nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
    1575             : !$OMP PARALLEL DO DEFAULT(NONE), &
    1576             : !$OMP             PRIVATE(nr), &
    1577     1291630 : !$OMP             SHARED(np,nray,scount,sdispl,nx,nm)
    1578             :       DO ip = 0, np - 1
    1579             :          nr = nray(ip)
    1580             :          scount(ip) = nr*nx
    1581             :          sdispl(ip) = nm*nx*ip
    1582             :       END DO
    1583             : !$OMP END PARALLEL DO
    1584             : 
    1585     1291630 :       IF (alltoall_sgl) THEN
    1586         238 :          CALL group%alltoall(tt, scount, sdispl, ss, rcount, rdispl)
    1587     3703835 :          sb = ss
    1588             :       ELSE
    1589     1291392 :          CALL group%alltoall(rr, scount, sdispl, sb, rcount, rdispl)
    1590             :       END IF
    1591             : 
    1592     1291630 :       CALL timestop(handle)
    1593             : 
    1594     1291630 :    END SUBROUTINE yz_to_x
    1595             : 
    1596             : ! **************************************************************************************************
    1597             : !> \brief ...
    1598             : !> \param sb ...
    1599             : !> \param group ...
    1600             : !> \param dims ...
    1601             : !> \param my_pos ...
    1602             : !> \param p2p ...
    1603             : !> \param yzp ...
    1604             : !> \param nray ...
    1605             : !> \param bo ...
    1606             : !> \param tb ...
    1607             : !> \param fft_scratch ...
    1608             : !> \par History
    1609             : !>      15. Feb. 2006 : single precision all_to_all
    1610             : !> \author JGH (18-Jan-2001)
    1611             : ! **************************************************************************************************
    1612           0 :    SUBROUTINE yz_to_xz(sb, group, dims, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
    1613             : 
    1614             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    1615             :          INTENT(IN)                                      :: sb
    1616             : 
    1617             :       CLASS(mp_comm_type), INTENT(IN)                     :: group
    1618             :       INTEGER, DIMENSION(2), INTENT(IN)                  :: dims
    1619             :       INTEGER, INTENT(IN)                                :: my_pos
    1620             :       INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN)                 :: p2p
    1621             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN)           :: yzp
    1622             :       INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN)                 :: nray
    1623             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN)           :: bo
    1624             :       COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(INOUT), CONTIGUOUS   :: tb
    1625             :       TYPE(fft_scratch_type), INTENT(INOUT)                 :: fft_scratch
    1626             : 
    1627             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'yz_to_xz'
    1628             : 
    1629           0 :       COMPLEX(KIND=dp), DIMENSION(:), POINTER, CONTIGUOUS            :: xzbuf, yzbuf
    1630           0 :       COMPLEX(KIND=sp), DIMENSION(:), POINTER, CONTIGUOUS            :: xzbuf_sgl, yzbuf_sgl
    1631             :       INTEGER                                            :: handle, icrs, ip, ipl, ipr, ir, ix, iz, &
    1632             :                                                             jj, jx, jy, jz, myx, myz, np, npx, &
    1633             :                                                             npz, nx, nz, rs_pos
    1634           0 :       INTEGER, DIMENSION(:), POINTER, CONTIGUOUS                     :: pzcoord, rcount, rdispl, scount, sdispl, &
    1635           0 :                                                                         xcor, zcor
    1636           0 :       INTEGER, DIMENSION(:, :), CONTIGUOUS, POINTER                  :: pgrid
    1637             : 
    1638           0 :       CALL timeset(routineN, handle)
    1639             : 
    1640           0 :       np = SIZE(p2p)
    1641             : 
    1642           0 :       rs_pos = p2p(my_pos)
    1643             : 
    1644           0 :       IF (alltoall_sgl) THEN
    1645           0 :          yzbuf_sgl => fft_scratch%yzbuf_sgl
    1646           0 :          xzbuf_sgl => fft_scratch%xzbuf_sgl
    1647             :       ELSE
    1648           0 :          yzbuf => fft_scratch%yzbuf
    1649           0 :          xzbuf => fft_scratch%xzbuf
    1650             :       END IF
    1651           0 :       npx = dims(1)
    1652           0 :       npz = dims(2)
    1653           0 :       pgrid => fft_scratch%pgrid
    1654           0 :       xcor => fft_scratch%xcor
    1655           0 :       zcor => fft_scratch%zcor
    1656           0 :       pzcoord => fft_scratch%pzcoord
    1657           0 :       scount => fft_scratch%scount
    1658           0 :       rcount => fft_scratch%rcount
    1659           0 :       sdispl => fft_scratch%sdispl
    1660           0 :       rdispl => fft_scratch%rdispl
    1661             : 
    1662           0 :       nx = SIZE(sb, 2)
    1663             : 
    1664             : ! If the send and recv counts are not already cached, then
    1665             : ! calculate and store them
    1666           0 :       IF (fft_scratch%in == 0) THEN
    1667             : 
    1668           0 :          scount = 0
    1669             : 
    1670           0 :          DO ix = 0, npx - 1
    1671           0 :             ip = pgrid(ix, 0)
    1672           0 :             xcor(bo(1, 1, ip):bo(2, 1, ip)) = ix
    1673             :          END DO
    1674           0 :          DO iz = 0, npz - 1
    1675           0 :             ip = pgrid(0, iz)
    1676           0 :             zcor(bo(1, 3, ip):bo(2, 3, ip)) = iz
    1677             :          END DO
    1678           0 :          DO jx = 1, nx
    1679           0 :             IF (alltoall_sgl) THEN
    1680           0 :                DO ir = 1, nray(my_pos)
    1681           0 :                   jy = yzp(1, ir, my_pos)
    1682           0 :                   jz = yzp(2, ir, my_pos)
    1683           0 :                   ip = pgrid(xcor(jx), zcor(jz))
    1684           0 :                   scount(ip) = scount(ip) + 1
    1685             :                END DO
    1686             :             ELSE
    1687           0 :                DO ir = 1, nray(my_pos)
    1688           0 :                   jy = yzp(1, ir, my_pos)
    1689           0 :                   jz = yzp(2, ir, my_pos)
    1690           0 :                   ip = pgrid(xcor(jx), zcor(jz))
    1691           0 :                   scount(ip) = scount(ip) + 1
    1692             :                END DO
    1693             :             END IF
    1694             :          END DO
    1695             : 
    1696           0 :          CALL group%alltoall(scount, rcount, 1)
    1697           0 :          fft_scratch%yzcount = scount
    1698           0 :          fft_scratch%xzcount = rcount
    1699             : 
    1700             :          ! Work out the correct displacements in the buffers
    1701           0 :          sdispl(0) = 0
    1702           0 :          rdispl(0) = 0
    1703           0 :          DO ip = 1, np - 1
    1704           0 :             sdispl(ip) = sdispl(ip - 1) + scount(ip - 1)
    1705           0 :             rdispl(ip) = rdispl(ip - 1) + rcount(ip - 1)
    1706             :          END DO
    1707             : 
    1708           0 :          fft_scratch%yzdispl = sdispl
    1709           0 :          fft_scratch%xzdispl = rdispl
    1710             : 
    1711           0 :          icrs = 0
    1712           0 :          DO ip = 0, np - 1
    1713           0 :             IF (scount(ip) /= 0) icrs = icrs + 1
    1714           0 :             IF (rcount(ip) /= 0) icrs = icrs + 1
    1715             :          END DO
    1716           0 :          CALL group%sum(icrs)
    1717           0 :          fft_scratch%rsratio = REAL(icrs, KIND=dp)/(REAL(2*np, KIND=dp)*REAL(np, KIND=dp))
    1718             : 
    1719           0 :          fft_scratch%in = 1
    1720             :       ELSE
    1721           0 :          scount = fft_scratch%yzcount
    1722           0 :          rcount = fft_scratch%xzcount
    1723           0 :          sdispl = fft_scratch%yzdispl
    1724           0 :          rdispl = fft_scratch%xzdispl
    1725             :       END IF
    1726             : 
    1727             : ! Do the actual packing
    1728             : !$OMP PARALLEL DO DEFAULT(NONE), &
    1729             : !$OMP             PRIVATE(ipl,jj,nx,ir,jx,jy,jz),&
    1730             : !$OMP             SHARED(np,p2p,pzcoord,bo,nray,yzp,zcor),&
    1731             : !$OMP             SHARED(yzbuf,sb,scount,sdispl,my_pos),&
    1732           0 : !$OMP             SHARED(yzbuf_sgl,alltoall_sgl)
    1733             :       DO ip = 0, np - 1
    1734             :          IF (scount(ip) == 0) CYCLE
    1735             :          ipl = p2p(ip)
    1736             :          jj = 0
    1737             :          nx = bo(2, 1, ipl) - bo(1, 1, ipl) + 1
    1738             :          DO ir = 1, nray(my_pos)
    1739             :             jz = yzp(2, ir, my_pos)
    1740             :             IF (zcor(jz) == pzcoord(ipl)) THEN
    1741             :                jj = jj + 1
    1742             :                jy = yzp(1, ir, my_pos)
    1743             :                IF (alltoall_sgl) THEN
    1744             :                   DO jx = 0, nx - 1
    1745             :                      yzbuf_sgl(sdispl(ip) + jj + jx*scount(ip)/nx) = CMPLX(sb(ir, jx + bo(1, 1, ipl)), KIND=sp)
    1746             :                   END DO
    1747             :                ELSE
    1748             :                   DO jx = 0, nx - 1
    1749             :                      yzbuf(sdispl(ip) + jj + jx*scount(ip)/nx) = sb(ir, jx + bo(1, 1, ipl))
    1750             :                   END DO
    1751             :                END IF
    1752             :             END IF
    1753             :          END DO
    1754             :       END DO
    1755             : !$OMP END PARALLEL DO
    1756             : 
    1757           0 :       IF (alltoall_sgl) THEN
    1758           0 :          CALL group%alltoall(yzbuf_sgl, scount, sdispl, xzbuf_sgl, rcount, rdispl)
    1759             :       ELSE
    1760           0 :          IF (fft_scratch%rsratio < ratio_sparse_alltoall) THEN
    1761           0 :             CALL sparse_alltoall(yzbuf, scount, sdispl, xzbuf, rcount, rdispl, group)
    1762             :          ELSE
    1763           0 :             CALL group%alltoall(yzbuf, scount, sdispl, xzbuf, rcount, rdispl)
    1764             :          END IF
    1765             :       END IF
    1766             : 
    1767           0 :       myx = fft_scratch%sizes%r_pos(1)
    1768           0 :       myz = fft_scratch%sizes%r_pos(2)
    1769           0 :       nz = bo(2, 3, rs_pos) - bo(1, 3, rs_pos) + 1
    1770             : 
    1771             : !$OMP PARALLEL DO DEFAULT(NONE), &
    1772             : !$OMP             PRIVATE(ipr,jj,ir,jx,jy,jz),&
    1773             : !$OMP             SHARED(tb,np,p2p,bo,rs_pos,nray),&
    1774             : !$OMP             SHARED(yzp,alltoall_sgl,zcor,myz),&
    1775           0 : !$OMP             SHARED(xzbuf,xzbuf_sgl,nz,rdispl)
    1776             :       DO ip = 0, np - 1
    1777             :          ipr = p2p(ip)
    1778             :          jj = 0
    1779             :          DO jx = 0, bo(2, 1, rs_pos) - bo(1, 1, rs_pos)
    1780             :             DO ir = 1, nray(ip)
    1781             :                jz = yzp(2, ir, ip)
    1782             :                IF (alltoall_sgl) THEN
    1783             :                   IF (zcor(jz) == myz) THEN
    1784             :                      jj = jj + 1
    1785             :                      jy = yzp(1, ir, ip)
    1786             :                      jz = jz - bo(1, 3, rs_pos) + 1
    1787             :                      tb(jy, jz + jx*nz) = xzbuf_sgl(jj + rdispl(ipr))
    1788             :                   END IF
    1789             :                ELSE
    1790             :                   IF (zcor(jz) == myz) THEN
    1791             :                      jj = jj + 1
    1792             :                      jy = yzp(1, ir, ip)
    1793             :                      jz = jz - bo(1, 3, rs_pos) + 1
    1794             :                      tb(jy, jz + jx*nz) = xzbuf(jj + rdispl(ipr))
    1795             :                   END IF
    1796             :                END IF
    1797             :             END DO
    1798             :          END DO
    1799             :       END DO
    1800             : !$OMP END PARALLEL DO
    1801             : 
    1802           0 :       CALL timestop(handle)
    1803             : 
    1804           0 :    END SUBROUTINE yz_to_xz
    1805             : 
    1806             : ! **************************************************************************************************
    1807             : !> \brief ...
    1808             : !> \param sb ...
    1809             : !> \param group ...
    1810             : !> \param dims ...
    1811             : !> \param my_pos ...
    1812             : !> \param p2p ...
    1813             : !> \param yzp ...
    1814             : !> \param nray ...
    1815             : !> \param bo ...
    1816             : !> \param tb ...
    1817             : !> \param fft_scratch ...
    1818             : !> \par History
    1819             : !>      15. Feb. 2006 : single precision all_to_all
    1820             : !> \author JGH (19-Jan-2001)
    1821             : ! **************************************************************************************************
    1822           0 :    SUBROUTINE xz_to_yz(sb, group, dims, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
    1823             : 
    1824             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    1825             :          INTENT(IN)                                      :: sb
    1826             : 
    1827             :       CLASS(mp_comm_type), INTENT(IN)                     :: group
    1828             :       INTEGER, DIMENSION(2), INTENT(IN)                  :: dims
    1829             :       INTEGER, INTENT(IN)                                :: my_pos
    1830             :       INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN)                 :: p2p
    1831             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN)           :: yzp
    1832             :       INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN)                 :: nray
    1833             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN)           :: bo
    1834             :       COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(INOUT), CONTIGUOUS   :: tb
    1835             :       TYPE(fft_scratch_type), INTENT(INOUT)              :: fft_scratch
    1836             : 
    1837             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'xz_to_yz'
    1838             : 
    1839           0 :       COMPLEX(KIND=dp), DIMENSION(:), POINTER, CONTIGUOUS            :: xzbuf, yzbuf
    1840           0 :       COMPLEX(KIND=sp), DIMENSION(:), POINTER, CONTIGUOUS            :: xzbuf_sgl, yzbuf_sgl
    1841             :       INTEGER                                            :: handle, icrs, ip, ipl, ir, ix, ixx, iz, &
    1842             :                                                             jj, jx, jy, jz, mp, myx, myz, np, npx, &
    1843             :                                                             npz, nx, nz
    1844           0 :       INTEGER, DIMENSION(:), POINTER, CONTIGUOUS                     :: pzcoord, rcount, rdispl, scount, sdispl, &
    1845           0 :                                                                         xcor, zcor
    1846           0 :       INTEGER, DIMENSION(:, :), CONTIGUOUS, POINTER                  :: pgrid
    1847             : 
    1848           0 :       CALL timeset(routineN, handle)
    1849             : 
    1850           0 :       np = SIZE(p2p)
    1851             : 
    1852           0 :       IF (alltoall_sgl) THEN
    1853           0 :          yzbuf_sgl => fft_scratch%yzbuf_sgl
    1854           0 :          xzbuf_sgl => fft_scratch%xzbuf_sgl
    1855             :       ELSE
    1856           0 :          yzbuf => fft_scratch%yzbuf
    1857           0 :          xzbuf => fft_scratch%xzbuf
    1858             :       END IF
    1859           0 :       npx = dims(1)
    1860           0 :       npz = dims(2)
    1861           0 :       pgrid => fft_scratch%pgrid
    1862           0 :       xcor => fft_scratch%xcor
    1863           0 :       zcor => fft_scratch%zcor
    1864           0 :       pzcoord => fft_scratch%pzcoord
    1865           0 :       scount => fft_scratch%scount
    1866           0 :       rcount => fft_scratch%rcount
    1867           0 :       sdispl => fft_scratch%sdispl
    1868           0 :       rdispl => fft_scratch%rdispl
    1869             : 
    1870             : ! If the send and recv counts are not already cached, then
    1871             : ! calculate and store them
    1872           0 :       IF (fft_scratch%in == 0) THEN
    1873             : 
    1874           0 :          rcount = 0
    1875           0 :          nx = MAXVAL(bo(2, 1, :))
    1876             : 
    1877           0 :          DO ix = 0, npx - 1
    1878           0 :             ip = pgrid(ix, 0)
    1879           0 :             xcor(bo(1, 1, ip):bo(2, 1, ip)) = ix
    1880             :          END DO
    1881           0 :          DO iz = 0, npz - 1
    1882           0 :             ip = pgrid(0, iz)
    1883           0 :             zcor(bo(1, 3, ip):bo(2, 3, ip)) = iz
    1884             :          END DO
    1885           0 :          DO jx = 1, nx
    1886           0 :             DO ir = 1, nray(my_pos)
    1887           0 :                jy = yzp(1, ir, my_pos)
    1888           0 :                jz = yzp(2, ir, my_pos)
    1889           0 :                ip = pgrid(xcor(jx), zcor(jz))
    1890           0 :                rcount(ip) = rcount(ip) + 1
    1891             :             END DO
    1892             :          END DO
    1893             : 
    1894           0 :          CALL group%alltoall(rcount, scount, 1)
    1895           0 :          fft_scratch%xzcount = scount
    1896           0 :          fft_scratch%yzcount = rcount
    1897             : 
    1898             :          ! Work out the correct displacements in the buffers
    1899           0 :          sdispl(0) = 0
    1900           0 :          rdispl(0) = 0
    1901           0 :          DO ip = 1, np - 1
    1902           0 :             sdispl(ip) = sdispl(ip - 1) + scount(ip - 1)
    1903           0 :             rdispl(ip) = rdispl(ip - 1) + rcount(ip - 1)
    1904             :          END DO
    1905             : 
    1906           0 :          fft_scratch%xzdispl = sdispl
    1907           0 :          fft_scratch%yzdispl = rdispl
    1908             : 
    1909           0 :          icrs = 0
    1910           0 :          DO ip = 0, np - 1
    1911           0 :             IF (scount(ip) /= 0) icrs = icrs + 1
    1912           0 :             IF (rcount(ip) /= 0) icrs = icrs + 1
    1913             :          END DO
    1914           0 :          CALL group%sum(icrs)
    1915           0 :          fft_scratch%rsratio = REAL(icrs, KIND=dp)/(REAL(2*np, KIND=dp)*REAL(np, KIND=dp))
    1916             : 
    1917           0 :          fft_scratch%in = 1
    1918             :       ELSE
    1919           0 :          scount = fft_scratch%xzcount
    1920           0 :          rcount = fft_scratch%yzcount
    1921           0 :          sdispl = fft_scratch%xzdispl
    1922           0 :          rdispl = fft_scratch%yzdispl
    1923             :       END IF
    1924             : 
    1925             : ! Now do the actual packing
    1926           0 :       myx = fft_scratch%sizes%r_pos(1)
    1927           0 :       myz = fft_scratch%sizes%r_pos(2)
    1928           0 :       mp = p2p(my_pos)
    1929           0 :       nz = bo(2, 3, mp) - bo(1, 3, mp) + 1
    1930           0 :       nx = bo(2, 1, mp) - bo(1, 1, mp) + 1
    1931             : 
    1932             : !$OMP PARALLEL DO DEFAULT(NONE), &
    1933             : !$OMP             PRIVATE(jj,ipl,ir,jx,jy,jz,ixx),&
    1934             : !$OMP             SHARED(np,p2p,nray,yzp,zcor,myz,bo,mp),&
    1935             : !$OMP             SHARED(alltoall_sgl,nx,scount,sdispl),&
    1936           0 : !$OMP             SHARED(xzbuf,xzbuf_sgl,sb,nz)
    1937             :       DO ip = 0, np - 1
    1938             :          jj = 0
    1939             :          ipl = p2p(ip)
    1940             :          DO ir = 1, nray(ip)
    1941             :             jz = yzp(2, ir, ip)
    1942             :             IF (zcor(jz) == myz) THEN
    1943             :                jj = jj + 1
    1944             :                jy = yzp(1, ir, ip)
    1945             :                jz = yzp(2, ir, ip) - bo(1, 3, mp) + 1
    1946             :                IF (alltoall_sgl) THEN
    1947             :                   DO jx = 0, nx - 1
    1948             :                      ixx = jj + jx*scount(ipl)/nx
    1949             :                      xzbuf_sgl(ixx + sdispl(ipl)) = CMPLX(sb(jy, jz + jx*nz), KIND=sp)
    1950             :                   END DO
    1951             :                ELSE
    1952             :                   DO jx = 0, nx - 1
    1953             :                      ixx = jj + jx*scount(ipl)/nx
    1954             :                      xzbuf(ixx + sdispl(ipl)) = sb(jy, jz + jx*nz)
    1955             :                   END DO
    1956             :                END IF
    1957             :             END IF
    1958             :          END DO
    1959             :       END DO
    1960             : !$OMP END PARALLEL DO
    1961             : 
    1962           0 :       IF (alltoall_sgl) THEN
    1963           0 :          CALL group%alltoall(xzbuf_sgl, scount, sdispl, yzbuf_sgl, rcount, rdispl)
    1964             :       ELSE
    1965           0 :          IF (fft_scratch%rsratio < ratio_sparse_alltoall) THEN
    1966           0 :             CALL sparse_alltoall(xzbuf, scount, sdispl, yzbuf, rcount, rdispl, group)
    1967             :          ELSE
    1968           0 :             CALL group%alltoall(xzbuf, scount, sdispl, yzbuf, rcount, rdispl)
    1969             :          END IF
    1970             :       END IF
    1971             : 
    1972             : !$OMP PARALLEL DO DEFAULT(NONE), &
    1973             : !$OMP             PRIVATE(ipl,jj,nx,ir,jx,jy,jz),&
    1974             : !$OMP             SHARED(p2p,pzcoord,bo,nray,my_pos,yzp),&
    1975             : !$OMP             SHARED(rcount,rdispl,tb,yzbuf,zcor),&
    1976           0 : !$OMP             SHARED(yzbuf_sgl,alltoall_sgl,np)
    1977             :       DO ip = 0, np - 1
    1978             :          IF (rcount(ip) == 0) CYCLE
    1979             :          ipl = p2p(ip)
    1980             :          jj = 0
    1981             :          nx = bo(2, 1, ipl) - bo(1, 1, ipl) + 1
    1982             :          DO ir = 1, nray(my_pos)
    1983             :             jz = yzp(2, ir, my_pos)
    1984             :             IF (zcor(jz) == pzcoord(ipl)) THEN
    1985             :                jj = jj + 1
    1986             :                jy = yzp(1, ir, my_pos)
    1987             :                IF (alltoall_sgl) THEN
    1988             :                   DO jx = 0, nx - 1
    1989             :                      tb(ir, jx + bo(1, 1, ipl)) = yzbuf_sgl(rdispl(ip) + jj + jx*rcount(ip)/nx)
    1990             :                   END DO
    1991             :                ELSE
    1992             :                   DO jx = 0, nx - 1
    1993             :                      tb(ir, jx + bo(1, 1, ipl)) = yzbuf(rdispl(ip) + jj + jx*rcount(ip)/nx)
    1994             :                   END DO
    1995             :                END IF
    1996             :             END IF
    1997             :          END DO
    1998             :       END DO
    1999             : !$OMP END PARALLEL DO
    2000             : 
    2001           0 :       CALL timestop(handle)
    2002             : 
    2003           0 :    END SUBROUTINE xz_to_yz
    2004             : 
    2005             : ! **************************************************************************************************
    2006             : !> \brief ...
    2007             : !> \param cin ...
    2008             : !> \param boin ...
    2009             : !> \param boout ...
    2010             : !> \param sout ...
    2011             : !> \param fft_scratch ...
    2012             : !> \par History
    2013             : !>      none
    2014             : !> \author JGH (20-Jan-2001)
    2015             : ! **************************************************************************************************
    2016           0 :    SUBROUTINE cube_transpose_1(cin, boin, boout, sout, fft_scratch)
    2017             : 
    2018             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2019             :          INTENT(IN)                                      :: cin
    2020             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
    2021             :          INTENT(IN)                                      :: boin, boout
    2022             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2023             :          INTENT(OUT)                                     :: sout
    2024             :       TYPE(fft_scratch_type), INTENT(IN)                 :: fft_scratch
    2025             : 
    2026             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cube_transpose_1'
    2027             : 
    2028             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2029           0 :          POINTER                                         :: rbuf
    2030             :       INTEGER                                            :: handle, ip, ipl, ir, is, ixy, iz, mip, &
    2031             :                                                             mz, np, nx, ny, nz
    2032           0 :       INTEGER, CONTIGUOUS, DIMENSION(:), POINTER         :: rcount, rdispl, scount, sdispl
    2033           0 :       INTEGER, CONTIGUOUS, DIMENSION(:, :), POINTER      :: pgrid
    2034             :       INTEGER, DIMENSION(2)                              :: dim, pos
    2035             : 
    2036           0 :       CALL timeset(routineN, handle)
    2037             : 
    2038           0 :       mip = fft_scratch%mip
    2039           0 :       dim = fft_scratch%dim
    2040           0 :       pos = fft_scratch%pos
    2041           0 :       scount => fft_scratch%scount
    2042           0 :       rcount => fft_scratch%rcount
    2043           0 :       sdispl => fft_scratch%sdispl
    2044           0 :       rdispl => fft_scratch%rdispl
    2045           0 :       pgrid => fft_scratch%pgcube
    2046           0 :       np = DIM(2)
    2047             : 
    2048           0 :       nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
    2049           0 :       nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
    2050             : 
    2051             : !$OMP PARALLEL DO DEFAULT(NONE), &
    2052             : !$OMP             PRIVATE(ipl,ny), &
    2053           0 : !$OMP             SHARED(np,pgrid,boout,scount,sdispl,nx,nz)
    2054             :       DO ip = 0, np - 1
    2055             :          ipl = pgrid(ip, 2)
    2056             :          ny = boout(2, 2, ipl) - boout(1, 2, ipl) + 1
    2057             :          scount(ip) = nx*nz*ny
    2058             :          sdispl(ip) = nx*nz*(boout(1, 2, ipl) - 1)
    2059             :       END DO
    2060             : !$OMP END PARALLEL DO
    2061           0 :       ny = boout(2, 2, mip) - boout(1, 2, mip) + 1
    2062           0 :       mz = MAXVAL(boin(2, 3, :) - boin(1, 3, :) + 1)
    2063             : !$OMP PARALLEL DO DEFAULT(NONE), &
    2064             : !$OMP             PRIVATE(ipl,nz), &
    2065           0 : !$OMP             SHARED(np,pgrid,boin,nx,ny,rcount,rdispl,mz)
    2066             :       DO ip = 0, np - 1
    2067             :          ipl = pgrid(ip, 2)
    2068             :          nz = boin(2, 3, ipl) - boin(1, 3, ipl) + 1
    2069             :          rcount(ip) = nx*nz*ny
    2070             :          rdispl(ip) = nx*ny*mz*ip
    2071             :       END DO
    2072             : !$OMP END PARALLEL DO
    2073             : 
    2074           0 :       rbuf => fft_scratch%rbuf1
    2075             : 
    2076           0 :       CALL fft_scratch%cart_sub_comm(2)%alltoall(cin, scount, sdispl, rbuf, rcount, rdispl)
    2077             : 
    2078             : !$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
    2079             : !$OMP             PRIVATE(ip,ipl,nz,iz,is,ir) &
    2080           0 : !$OMP             SHARED(nx,ny,np,pgrid,boin,sout,rbuf)
    2081             :       DO ixy = 1, nx*ny
    2082             :          DO ip = 0, np - 1
    2083             :             ipl = pgrid(ip, 2)
    2084             :             nz = boin(2, 3, ipl) - boin(1, 3, ipl) + 1
    2085             :             DO iz = 1, nz
    2086             :                is = boin(1, 3, ipl) + iz - 1
    2087             :                ir = iz + nz*(ixy - 1)
    2088             :                sout(is, ixy) = rbuf(ir, ip)
    2089             :             END DO
    2090             :          END DO
    2091             :       END DO
    2092             : !$OMP END PARALLEL DO
    2093             : 
    2094           0 :       CALL timestop(handle)
    2095             : 
    2096           0 :    END SUBROUTINE cube_transpose_1
    2097             : 
    2098             : ! **************************************************************************************************
    2099             : !> \brief ...
    2100             : !> \param cin ...
    2101             : !> \param boin ...
    2102             : !> \param boout ...
    2103             : !> \param sout ...
    2104             : !> \param fft_scratch ...
    2105             : ! **************************************************************************************************
    2106           0 :    SUBROUTINE cube_transpose_2(cin, boin, boout, sout, fft_scratch)
    2107             : 
    2108             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2109             :          INTENT(IN)                                      :: cin
    2110             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
    2111             :          INTENT(IN)                                      :: boin, boout
    2112             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2113             :          INTENT(OUT)                                     :: sout
    2114             :       TYPE(fft_scratch_type), INTENT(IN)                 :: fft_scratch
    2115             : 
    2116             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cube_transpose_2'
    2117             : 
    2118             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2119           0 :          POINTER                                         :: rbuf
    2120             :       INTEGER                                            :: handle, ip, ipl, ir, ixy, iz, mip, mz, &
    2121             :                                                             np, nx, ny, nz
    2122           0 :       INTEGER, CONTIGUOUS, DIMENSION(:), POINTER         :: rcount, rdispl, scount, sdispl
    2123           0 :       INTEGER, CONTIGUOUS, DIMENSION(:, :), POINTER      :: pgrid
    2124             :       INTEGER, DIMENSION(2)                              :: dim, pos
    2125             :       TYPE(mp_comm_type)                                 :: sub_group
    2126             : 
    2127           0 :       CALL timeset(routineN, handle)
    2128             : 
    2129           0 :       sub_group = fft_scratch%cart_sub_comm(2)
    2130           0 :       mip = fft_scratch%mip
    2131           0 :       dim = fft_scratch%dim
    2132           0 :       pos = fft_scratch%pos
    2133           0 :       scount => fft_scratch%scount
    2134           0 :       rcount => fft_scratch%rcount
    2135           0 :       sdispl => fft_scratch%sdispl
    2136           0 :       rdispl => fft_scratch%rdispl
    2137           0 :       pgrid => fft_scratch%pgcube
    2138           0 :       np = DIM(2)
    2139             : 
    2140           0 :       nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
    2141           0 :       ny = boin(2, 2, mip) - boin(1, 2, mip) + 1
    2142           0 :       mz = MAXVAL(boout(2, 3, :) - boout(1, 3, :) + 1)
    2143             : 
    2144           0 :       rbuf => fft_scratch%rbuf2
    2145             : 
    2146             : !$OMP PARALLEL DEFAULT(NONE), &
    2147             : !$OMP          PRIVATE(ip,ipl,nz,iz,ir), &
    2148           0 : !$OMP          SHARED(nx,ny,np,pgrid,boout,rbuf,cin,scount,sdispl,mz)
    2149             : !$OMP DO COLLAPSE(2)
    2150             :       DO ixy = 1, nx*ny
    2151             :          DO ip = 0, np - 1
    2152             :             ipl = pgrid(ip, 2)
    2153             :             nz = boout(2, 3, ipl) - boout(1, 3, ipl) + 1
    2154             :             DO iz = boout(1, 3, ipl), boout(2, 3, ipl)
    2155             :                ir = iz - boout(1, 3, ipl) + 1 + (ixy - 1)*nz
    2156             :                rbuf(ir, ip) = cin(iz, ixy)
    2157             :             END DO
    2158             :          END DO
    2159             :       END DO
    2160             : !$OMP END DO
    2161             : !$OMP DO
    2162             :       DO ip = 0, np - 1
    2163             :          ipl = pgrid(ip, 2)
    2164             :          nz = boout(2, 3, ipl) - boout(1, 3, ipl) + 1
    2165             :          scount(ip) = nx*ny*nz
    2166             :          sdispl(ip) = nx*ny*mz*ip
    2167             :       END DO
    2168             : !$OMP END DO
    2169             : !$OMP END PARALLEL
    2170           0 :       nz = boout(2, 3, mip) - boout(1, 3, mip) + 1
    2171             : !$OMP PARALLEL DO DEFAULT(NONE), &
    2172             : !$OMP             PRIVATE(ipl,ny), &
    2173           0 : !$OMP             SHARED(np,pgrid,boin,nx,nz,rcount,rdispl)
    2174             :       DO ip = 0, np - 1
    2175             :          ipl = pgrid(ip, 2)
    2176             :          ny = boin(2, 2, ipl) - boin(1, 2, ipl) + 1
    2177             :          rcount(ip) = nx*ny*nz
    2178             :          rdispl(ip) = nx*nz*(boin(1, 2, ipl) - 1)
    2179             :       END DO
    2180             : !$OMP END PARALLEL DO
    2181             : 
    2182           0 :       CALL sub_group%alltoall(rbuf, scount, sdispl, sout, rcount, rdispl)
    2183             : 
    2184           0 :       CALL timestop(handle)
    2185             : 
    2186           0 :    END SUBROUTINE cube_transpose_2
    2187             : 
    2188             : ! **************************************************************************************************
    2189             : !> \brief ...
    2190             : !> \param cin ...
    2191             : !> \param boin ...
    2192             : !> \param boout ...
    2193             : !> \param sout ...
    2194             : !> \param fft_scratch ...
    2195             : ! **************************************************************************************************
    2196           0 :    SUBROUTINE cube_transpose_3(cin, boin, boout, sout, fft_scratch)
    2197             : 
    2198             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2199             :          INTENT(IN)                                      :: cin
    2200             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
    2201             :          INTENT(IN)                                      :: boin, boout
    2202             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2203             :          INTENT(OUT)                                     :: sout
    2204             :       TYPE(fft_scratch_type), INTENT(IN)                 :: fft_scratch
    2205             : 
    2206             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cube_transpose_3'
    2207             : 
    2208             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2209           0 :          POINTER                                         :: rbuf
    2210             :       INTEGER                                            :: handle, ip, ipl, ir, is, ixz, iy, lb, &
    2211             :                                                             mip, my, my_id, np, num_threads, nx, &
    2212             :                                                             ny, nz, ub
    2213           0 :       INTEGER, CONTIGUOUS, DIMENSION(:), POINTER         :: rcount, rdispl, scount, sdispl
    2214           0 :       INTEGER, CONTIGUOUS, DIMENSION(:, :), POINTER      :: pgrid
    2215             :       INTEGER, DIMENSION(2)                              :: dim, pos
    2216             :       TYPE(mp_comm_type)                                 :: sub_group
    2217             : 
    2218           0 :       CALL timeset(routineN, handle)
    2219             : 
    2220           0 :       sub_group = fft_scratch%cart_sub_comm(1)
    2221           0 :       mip = fft_scratch%mip
    2222           0 :       dim = fft_scratch%dim
    2223           0 :       pos = fft_scratch%pos
    2224           0 :       np = DIM(1)
    2225           0 :       scount => fft_scratch%scount
    2226           0 :       rcount => fft_scratch%rcount
    2227           0 :       sdispl => fft_scratch%sdispl
    2228           0 :       rdispl => fft_scratch%rdispl
    2229           0 :       pgrid => fft_scratch%pgcube
    2230             : 
    2231           0 :       ny = boin(2, 2, mip) - boin(1, 2, mip) + 1
    2232           0 :       nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
    2233             : !$OMP PARALLEL DO DEFAULT(NONE), &
    2234             : !$OMP             PRIVATE(ipl, nx), &
    2235           0 : !$OMP             SHARED(np,pgrid,boout,ny,nz,scount,sdispl)
    2236             :       DO ip = 0, np - 1
    2237             :          ipl = pgrid(ip, 1)
    2238             :          nx = boout(2, 1, ipl) - boout(1, 1, ipl) + 1
    2239             :          scount(ip) = nx*nz*ny
    2240             :          sdispl(ip) = ny*nz*(boout(1, 1, ipl) - 1)
    2241             :       END DO
    2242             : !$OMP END PARALLEL DO
    2243           0 :       nx = boout(2, 1, mip) - boout(1, 1, mip) + 1
    2244           0 :       my = MAXVAL(boin(2, 2, :) - boin(1, 2, :) + 1)
    2245             : !$OMP PARALLEL DO DEFAULT(NONE), &
    2246             : !$OMP             PRIVATE(ipl, ny), &
    2247           0 : !$OMP             SHARED(np,pgrid,boin,nx,nz,my,rcount,rdispl)
    2248             :       DO ip = 0, np - 1
    2249             :          ipl = pgrid(ip, 1)
    2250             :          ny = boin(2, 2, ipl) - boin(1, 2, ipl) + 1
    2251             :          rcount(ip) = nx*nz*ny
    2252             :          rdispl(ip) = nx*my*nz*ip
    2253             :       END DO
    2254             : !$OMP END PARALLEL DO
    2255             : 
    2256           0 :       rbuf => fft_scratch%rbuf3
    2257           0 :       num_threads = 1
    2258           0 :       my_id = 0
    2259             : !$OMP PARALLEL DEFAULT(NONE), &
    2260             : !$OMP          PRIVATE(NUM_THREADS, my_id, lb, ub) &
    2261           0 : !$OMP          SHARED(rbuf)
    2262             : !$    num_threads = MIN(omp_get_max_threads(), SIZE(rbuf, 2))
    2263             : !$    my_id = omp_get_thread_num()
    2264             :       IF (my_id < num_threads) THEN
    2265             :          lb = (SIZE(rbuf, 2)*my_id)/num_threads
    2266             :          ub = (SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
    2267             :          rbuf(:, lb:ub) = 0.0_dp
    2268             :       END IF
    2269             : !$OMP END PARALLEL
    2270             : 
    2271           0 :       CALL sub_group%alltoall(cin, scount, sdispl, rbuf, rcount, rdispl)
    2272             : 
    2273             : !$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
    2274             : !$OMP             PRIVATE(ip,ipl,ny,iy,is,ir) &
    2275           0 : !$OMP             SHARED(nx,nz,np,pgrid,boin,rbuf,sout)
    2276             :       DO ixz = 1, nx*nz
    2277             :          DO ip = 0, np - 1
    2278             :             ipl = pgrid(ip, 1)
    2279             :             ny = boin(2, 2, ipl) - boin(1, 2, ipl) + 1
    2280             :             DO iy = 1, ny
    2281             :                is = boin(1, 2, ipl) + iy - 1
    2282             :                ir = iy + ny*(ixz - 1)
    2283             :                sout(is, ixz) = rbuf(ir, ip)
    2284             :             END DO
    2285             :          END DO
    2286             :       END DO
    2287             : !$OMP END PARALLEL DO
    2288             : 
    2289           0 :       CALL timestop(handle)
    2290             : 
    2291           0 :    END SUBROUTINE cube_transpose_3
    2292             : 
    2293             : ! **************************************************************************************************
    2294             : !> \brief ...
    2295             : !> \param cin ...
    2296             : !> \param boin ...
    2297             : !> \param boout ...
    2298             : !> \param sout ...
    2299             : !> \param fft_scratch ...
    2300             : ! **************************************************************************************************
    2301           0 :    SUBROUTINE cube_transpose_4(cin, boin, boout, sout, fft_scratch)
    2302             : 
    2303             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2304             :          INTENT(IN)                                      :: cin
    2305             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
    2306             :          INTENT(IN)                                      :: boin, boout
    2307             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2308             :          INTENT(OUT)                                     :: sout
    2309             :       TYPE(fft_scratch_type), INTENT(IN)                 :: fft_scratch
    2310             : 
    2311             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cube_transpose_4'
    2312             : 
    2313             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2314           0 :          POINTER                                         :: rbuf
    2315             :       INTEGER                                            :: handle, ip, ipl, ir, iy, izx, lb, mip, &
    2316             :                                                             my, my_id, np, num_threads, nx, ny, &
    2317             :                                                             nz, ub
    2318           0 :       INTEGER, CONTIGUOUS, DIMENSION(:), POINTER         :: rcount, rdispl, scount, sdispl
    2319           0 :       INTEGER, CONTIGUOUS, DIMENSION(:, :), POINTER      :: pgrid
    2320             :       INTEGER, DIMENSION(2)                              :: dim, pos
    2321             :       TYPE(mp_comm_type)                                 :: sub_group
    2322             : 
    2323           0 :       CALL timeset(routineN, handle)
    2324             : 
    2325           0 :       sub_group = fft_scratch%cart_sub_comm(1)
    2326           0 :       mip = fft_scratch%mip
    2327           0 :       dim = fft_scratch%dim
    2328           0 :       pos = fft_scratch%pos
    2329           0 :       np = DIM(1)
    2330           0 :       scount => fft_scratch%scount
    2331           0 :       rcount => fft_scratch%rcount
    2332           0 :       sdispl => fft_scratch%sdispl
    2333           0 :       rdispl => fft_scratch%rdispl
    2334           0 :       pgrid => fft_scratch%pgcube
    2335             : 
    2336           0 :       nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
    2337           0 :       nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
    2338           0 :       my = MAXVAL(boout(2, 2, :) - boout(1, 2, :) + 1)
    2339             : 
    2340           0 :       rbuf => fft_scratch%rbuf4
    2341           0 :       num_threads = 1
    2342           0 :       my_id = 0
    2343             : !$OMP PARALLEL DEFAULT(NONE), &
    2344             : !$OMP          PRIVATE(NUM_THREADS,my_id,lb,ub,ip,ipl,ny,iy,ir), &
    2345           0 : !$OMP          SHARED(rbuf,nz,nx,np,pgrid,boout,cin,my,scount,sdispl)
    2346             : !$    num_threads = MIN(omp_get_max_threads(), SIZE(rbuf, 2))
    2347             : !$    my_id = omp_get_thread_num()
    2348             :       IF (my_id < num_threads) THEN
    2349             :          lb = (SIZE(rbuf, 2)*my_id)/num_threads
    2350             :          ub = (SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
    2351             :          rbuf(:, lb:ub) = 0.0_dp
    2352             :       END IF
    2353             : !$OMP BARRIER
    2354             : 
    2355             : !$OMP DO COLLAPSE(2)
    2356             :       DO izx = 1, nz*nx
    2357             :          DO ip = 0, np - 1
    2358             :             ipl = pgrid(ip, 1)
    2359             :             ny = boout(2, 2, ipl) - boout(1, 2, ipl) + 1
    2360             :             DO iy = boout(1, 2, ipl), boout(2, 2, ipl)
    2361             :                ir = iy - boout(1, 2, ipl) + 1 + (izx - 1)*ny
    2362             :                rbuf(ir, ip) = cin(iy, izx)
    2363             :             END DO
    2364             :          END DO
    2365             :       END DO
    2366             : !$OMP END DO
    2367             : !$OMP DO
    2368             :       DO ip = 0, np - 1
    2369             :          ipl = pgrid(ip, 1)
    2370             :          ny = boout(2, 2, ipl) - boout(1, 2, ipl) + 1
    2371             :          scount(ip) = nx*ny*nz
    2372             :          sdispl(ip) = nx*nz*my*ip
    2373             :       END DO
    2374             : !$OMP END DO
    2375             : !$OMP END PARALLEL
    2376           0 :       ny = boout(2, 2, mip) - boout(1, 2, mip) + 1
    2377             : !$OMP PARALLEL DO DEFAULT(NONE), &
    2378             : !$OMP             PRIVATE(ipl,nx), &
    2379           0 : !$OMP             SHARED(np,pgrid,boin,rcount,rdispl,ny,nz)
    2380             :       DO ip = 0, np - 1
    2381             :          ipl = pgrid(ip, 1)
    2382             :          nx = boin(2, 1, ipl) - boin(1, 1, ipl) + 1
    2383             :          rcount(ip) = nx*ny*nz
    2384             :          rdispl(ip) = ny*nz*(boin(1, 1, ipl) - 1)
    2385             :       END DO
    2386             : !$OMP END PARALLEL DO
    2387             : 
    2388           0 :       CALL sub_group%alltoall(rbuf, scount, sdispl, sout, rcount, rdispl)
    2389             : 
    2390           0 :       CALL timestop(handle)
    2391             : 
    2392           0 :    END SUBROUTINE cube_transpose_4
    2393             : 
    2394             : ! **************************************************************************************************
    2395             : !> \brief ...
    2396             : !> \param cin ...
    2397             : !> \param group ...
    2398             : !> \param boin ...
    2399             : !> \param boout ...
    2400             : !> \param sout ...
    2401             : !> \param fft_scratch ...
    2402             : ! **************************************************************************************************
    2403         104 :    SUBROUTINE cube_transpose_5(cin, group, boin, boout, sout, fft_scratch)
    2404             : 
    2405             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2406             :          INTENT(IN)                                      :: cin
    2407             : 
    2408             :       CLASS(mp_comm_type), INTENT(IN)                     :: group
    2409             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN)           :: boin, boout
    2410             :       COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(OUT), CONTIGUOUS     :: sout
    2411             :       TYPE(fft_scratch_type), INTENT(IN)                :: fft_scratch
    2412             : 
    2413             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cube_transpose_5'
    2414             : 
    2415         104 :       COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS         :: rbuf
    2416             :       INTEGER                                            :: handle, ip, ir, is, ixz, iy, lb, mip, &
    2417             :                                                             my, my_id, np, num_threads, nx, ny, &
    2418             :                                                             nz, ub
    2419         104 :       INTEGER, DIMENSION(:), POINTER, CONTIGUOUS                     :: rcount, rdispl, scount, sdispl
    2420             : 
    2421         104 :       CALL timeset(routineN, handle)
    2422             : 
    2423         104 :       np = fft_scratch%sizes%numtask
    2424         104 :       mip = fft_scratch%mip
    2425         104 :       scount => fft_scratch%scount
    2426         104 :       rcount => fft_scratch%rcount
    2427         104 :       sdispl => fft_scratch%sdispl
    2428         104 :       rdispl => fft_scratch%rdispl
    2429             : 
    2430         104 :       ny = boin(2, 2, mip) - boin(1, 2, mip) + 1
    2431         104 :       nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
    2432             : !$OMP PARALLEL DO DEFAULT(NONE), &
    2433             : !$OMP             PRIVATE(nx), &
    2434         104 : !$OMP             SHARED(np,boout,ny,nz,scount,sdispl)
    2435             :       DO ip = 0, np - 1
    2436             :          nx = boout(2, 1, ip) - boout(1, 1, ip) + 1
    2437             :          scount(ip) = nx*nz*ny
    2438             :          sdispl(ip) = ny*nz*(boout(1, 1, ip) - 1)
    2439             :       END DO
    2440             : !$OMP END PARALLEL DO
    2441         104 :       nx = boout(2, 1, mip) - boout(1, 1, mip) + 1
    2442         312 :       my = MAXVAL(boin(2, 2, :) - boin(1, 2, :) + 1)
    2443             : !$OMP PARALLEL DO DEFAULT(NONE), &
    2444             : !$OMP             PRIVATE(ny), &
    2445         104 : !$OMP             SHARED(np,boin,nx,nz,rcount,rdispl,my)
    2446             :       DO ip = 0, np - 1
    2447             :          ny = boin(2, 2, ip) - boin(1, 2, ip) + 1
    2448             :          rcount(ip) = nx*nz*ny
    2449             :          rdispl(ip) = nx*my*nz*ip
    2450             :       END DO
    2451             : !$OMP END PARALLEL DO
    2452             : 
    2453         104 :       rbuf => fft_scratch%rbuf5
    2454         104 :       num_threads = 1
    2455         104 :       my_id = 0
    2456             : !$OMP PARALLEL DEFAULT(NONE), &
    2457             : !$OMP          PRIVATE(NUM_THREADS, my_id, lb, ub), &
    2458         104 : !$OMP          SHARED(rbuf)
    2459             : !$    num_threads = MIN(omp_get_max_threads(), SIZE(rbuf, 2))
    2460             : !$    my_id = omp_get_thread_num()
    2461             :       IF (my_id < num_threads) THEN
    2462             :          lb = (SIZE(rbuf, 2)*my_id)/num_threads
    2463             :          ub = (SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
    2464             :          rbuf(:, lb:ub) = 0.0_dp
    2465             :       END IF
    2466             : !$OMP END PARALLEL
    2467             : 
    2468         104 :       CALL group%alltoall(cin, scount, sdispl, rbuf, rcount, rdispl)
    2469             : 
    2470             : !$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
    2471             : !$OMP             PRIVATE(ip,ny,iy,is,ir) &
    2472         104 : !$OMP             SHARED(nx,nz,np,boin,sout,rbuf)
    2473             :       DO ixz = 1, nx*nz
    2474             :          DO ip = 0, np - 1
    2475             :             ny = boin(2, 2, ip) - boin(1, 2, ip) + 1
    2476             :             DO iy = 1, ny
    2477             :                is = boin(1, 2, ip) + iy - 1
    2478             :                ir = iy + ny*(ixz - 1)
    2479             :                sout(is, ixz) = rbuf(ir, ip)
    2480             :             END DO
    2481             :          END DO
    2482             :       END DO
    2483             : !$OMP END PARALLEL DO
    2484             : 
    2485         104 :       CALL timestop(handle)
    2486             : 
    2487         104 :    END SUBROUTINE cube_transpose_5
    2488             : 
    2489             : ! **************************************************************************************************
    2490             : !> \brief ...
    2491             : !> \param cin ...
    2492             : !> \param group ...
    2493             : !> \param boin ...
    2494             : !> \param boout ...
    2495             : !> \param sout ...
    2496             : !> \param fft_scratch ...
    2497             : ! **************************************************************************************************
    2498         104 :    SUBROUTINE cube_transpose_6(cin, group, boin, boout, sout, fft_scratch)
    2499             : 
    2500             :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2501             :          INTENT(IN)                                      :: cin
    2502             : 
    2503             :       CLASS(mp_comm_type), INTENT(IN)                     :: group
    2504             :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN)           :: boin, boout
    2505             :       COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(OUT), CONTIGUOUS     :: sout
    2506             :       TYPE(fft_scratch_type), INTENT(IN)                 :: fft_scratch
    2507             : 
    2508             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cube_transpose_6'
    2509             : 
    2510         104 :       COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS         :: rbuf
    2511             :       INTEGER                                            :: handle, ip, ir, iy, izx, lb, mip, my, &
    2512             :                                                             my_id, np, num_threads, nx, ny, nz, ub
    2513         104 :       INTEGER, DIMENSION(:), POINTER, CONTIGUOUS                     :: rcount, rdispl, scount, sdispl
    2514             : 
    2515         104 :       CALL timeset(routineN, handle)
    2516             : 
    2517         104 :       np = fft_scratch%sizes%numtask
    2518         104 :       mip = fft_scratch%mip
    2519         104 :       scount => fft_scratch%scount
    2520         104 :       rcount => fft_scratch%rcount
    2521         104 :       sdispl => fft_scratch%sdispl
    2522         104 :       rdispl => fft_scratch%rdispl
    2523             : 
    2524         104 :       nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
    2525         104 :       nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
    2526         312 :       my = MAXVAL(boout(2, 2, :) - boout(1, 2, :) + 1)
    2527             : 
    2528         104 :       rbuf => fft_scratch%rbuf5
    2529         104 :       num_threads = 1
    2530         104 :       my_id = 0
    2531             : !$OMP PARALLEL DEFAULT(NONE), &
    2532             : !$OMP          PRIVATE(NUM_THREADS,my_id,lb,ub,ip,ny,iy,ir), &
    2533         104 : !$OMP          SHARED(rbuf,nx,nz,np,boout,cin,my,scount,sdispl)
    2534             : !$    num_threads = MIN(omp_get_max_threads(), SIZE(rbuf, 2))
    2535             : !$    my_id = omp_get_thread_num()
    2536             :       IF (my_id < num_threads) THEN
    2537             :          lb = (SIZE(rbuf, 2)*my_id)/num_threads
    2538             :          ub = (SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
    2539             :          rbuf(:, lb:ub) = 0.0_dp
    2540             :       END IF
    2541             : !$OMP BARRIER
    2542             : 
    2543             : !$OMP DO COLLAPSE(2)
    2544             :       DO izx = 1, nz*nx
    2545             :          DO ip = 0, np - 1
    2546             :             ny = boout(2, 2, ip) - boout(1, 2, ip) + 1
    2547             :             DO iy = boout(1, 2, ip), boout(2, 2, ip)
    2548             :                ir = iy - boout(1, 2, ip) + 1 + (izx - 1)*ny
    2549             :                rbuf(ir, ip) = cin(iy, izx)
    2550             :             END DO
    2551             :          END DO
    2552             :       END DO
    2553             : !$OMP END DO
    2554             : !$OMP DO
    2555             :       DO ip = 0, np - 1
    2556             :          ny = boout(2, 2, ip) - boout(1, 2, ip) + 1
    2557             :          scount(ip) = nx*ny*nz
    2558             :          sdispl(ip) = nx*nz*my*ip
    2559             :       END DO
    2560             : !$OMP END DO
    2561             : !$OMP END PARALLEL
    2562         104 :       ny = boout(2, 2, mip) - boout(1, 2, mip) + 1
    2563             : !$OMP PARALLEL DO DEFAULT(NONE), &
    2564             : !$OMP             PRIVATE(nx), &
    2565         104 : !$OMP             SHARED(np,boin,rcount,rdispl,nz,ny)
    2566             :       DO ip = 0, np - 1
    2567             :          nx = boin(2, 1, ip) - boin(1, 1, ip) + 1
    2568             :          rcount(ip) = nx*ny*nz
    2569             :          rdispl(ip) = ny*nz*(boin(1, 1, ip) - 1)
    2570             :       END DO
    2571             : !$OMP END PARALLEL DO
    2572             : 
    2573         104 :       CALL group%alltoall(rbuf, scount, sdispl, sout, rcount, rdispl)
    2574             : 
    2575         104 :       CALL timestop(handle)
    2576             : 
    2577         104 :    END SUBROUTINE cube_transpose_6
    2578             : 
    2579             : ! **************************************************************************************************
    2580             : !> \brief ...
    2581             : ! **************************************************************************************************
    2582        9005 :    SUBROUTINE init_fft_scratch_pool()
    2583             : 
    2584        9005 :       CALL release_fft_scratch_pool()
    2585             : 
    2586             :       ! Allocate first scratch and mark it as used
    2587        9005 :       ALLOCATE (fft_scratch_first)
    2588      261145 :       ALLOCATE (fft_scratch_first%fft_scratch)
    2589             :       ! this is a very special scratch, it seems, we always keep it 'most - recent' so we will never delete it
    2590        9005 :       fft_scratch_first%fft_scratch%last_tick = HUGE(fft_scratch_first%fft_scratch%last_tick)
    2591             : 
    2592        9005 :       init_fft_pool = init_fft_pool + 1
    2593             : 
    2594        9005 :    END SUBROUTINE init_fft_scratch_pool
    2595             : 
    2596             : ! **************************************************************************************************
    2597             : !> \brief ...
    2598             : !> \param fft_scratch ...
    2599             : ! **************************************************************************************************
    2600       44164 :    SUBROUTINE deallocate_fft_scratch_type(fft_scratch)
    2601             :       TYPE(fft_scratch_type), INTENT(INOUT)    :: fft_scratch
    2602             : 
    2603             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    2604             :       INTEGER                   :: ierr
    2605             :       COMPLEX(KIND=dp), POINTER :: dummy_ptr_z
    2606             : #endif
    2607             : 
    2608             :       ! deallocate structures
    2609       44164 :       IF (ASSOCIATED(fft_scratch%ziptr)) THEN
    2610       10233 :          CALL fft_dealloc(fft_scratch%ziptr)
    2611             :       END IF
    2612       44164 :       IF (ASSOCIATED(fft_scratch%zoptr)) THEN
    2613       10233 :          CALL fft_dealloc(fft_scratch%zoptr)
    2614             :       END IF
    2615       44164 :       IF (ASSOCIATED(fft_scratch%p1buf)) THEN
    2616           0 :          CALL fft_dealloc(fft_scratch%p1buf)
    2617             :       END IF
    2618       44164 :       IF (ASSOCIATED(fft_scratch%p2buf)) THEN
    2619             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    2620             :          dummy_ptr_z => fft_scratch%p2buf(1, 1)
    2621             :          ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
    2622             : #else
    2623           0 :          CALL fft_dealloc(fft_scratch%p2buf)
    2624             : #endif
    2625             :       END IF
    2626       44164 :       IF (ASSOCIATED(fft_scratch%p3buf)) THEN
    2627             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    2628             :          dummy_ptr_z => fft_scratch%p3buf(1, 1)
    2629             :          ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
    2630             : #else
    2631           0 :          CALL fft_dealloc(fft_scratch%p3buf)
    2632             : #endif
    2633             :       END IF
    2634       44164 :       IF (ASSOCIATED(fft_scratch%p4buf)) THEN
    2635             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    2636             :          dummy_ptr_z => fft_scratch%p4buf(1, 1)
    2637             :          ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
    2638             : #else
    2639           0 :          CALL fft_dealloc(fft_scratch%p4buf)
    2640             : #endif
    2641             :       END IF
    2642       44164 :       IF (ASSOCIATED(fft_scratch%p5buf)) THEN
    2643             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    2644             :          dummy_ptr_z => fft_scratch%p5buf(1, 1)
    2645             :          ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
    2646             : #else
    2647           0 :          CALL fft_dealloc(fft_scratch%p5buf)
    2648             : #endif
    2649             :       END IF
    2650       44164 :       IF (ASSOCIATED(fft_scratch%p6buf)) THEN
    2651           0 :          CALL fft_dealloc(fft_scratch%p6buf)
    2652             :       END IF
    2653       44164 :       IF (ASSOCIATED(fft_scratch%p7buf)) THEN
    2654             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    2655             :          dummy_ptr_z => fft_scratch%p7buf(1, 1)
    2656             :          ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
    2657             : #else
    2658           0 :          CALL fft_dealloc(fft_scratch%p7buf)
    2659             : #endif
    2660             :       END IF
    2661       44164 :       IF (ASSOCIATED(fft_scratch%r1buf)) THEN
    2662             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    2663             :          dummy_ptr_z => fft_scratch%r1buf(1, 1)
    2664             :          ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
    2665             : #else
    2666       24918 :          CALL fft_dealloc(fft_scratch%r1buf)
    2667             : #endif
    2668             :       END IF
    2669       44164 :       IF (ASSOCIATED(fft_scratch%r2buf)) THEN
    2670       24918 :          CALL fft_dealloc(fft_scratch%r2buf)
    2671             :       END IF
    2672       44164 :       IF (ASSOCIATED(fft_scratch%tbuf)) THEN
    2673             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    2674             :          dummy_ptr_z => fft_scratch%tbuf(1, 1, 1)
    2675             :          ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
    2676             : #else
    2677       24918 :          CALL fft_dealloc(fft_scratch%tbuf)
    2678             : #endif
    2679             :       END IF
    2680       44164 :       IF (ASSOCIATED(fft_scratch%a1buf)) THEN
    2681           8 :          CALL fft_dealloc(fft_scratch%a1buf)
    2682             :       END IF
    2683       44164 :       IF (ASSOCIATED(fft_scratch%a2buf)) THEN
    2684           8 :          CALL fft_dealloc(fft_scratch%a2buf)
    2685             :       END IF
    2686       44164 :       IF (ASSOCIATED(fft_scratch%a3buf)) THEN
    2687           8 :          CALL fft_dealloc(fft_scratch%a3buf)
    2688             :       END IF
    2689       44164 :       IF (ASSOCIATED(fft_scratch%a4buf)) THEN
    2690           8 :          CALL fft_dealloc(fft_scratch%a4buf)
    2691             :       END IF
    2692       44164 :       IF (ASSOCIATED(fft_scratch%a5buf)) THEN
    2693           8 :          CALL fft_dealloc(fft_scratch%a5buf)
    2694             :       END IF
    2695       44164 :       IF (ASSOCIATED(fft_scratch%a6buf)) THEN
    2696           8 :          CALL fft_dealloc(fft_scratch%a6buf)
    2697             :       END IF
    2698       44164 :       IF (ASSOCIATED(fft_scratch%scount)) THEN
    2699           0 :          DEALLOCATE (fft_scratch%scount, fft_scratch%rcount, &
    2700       24926 :                      fft_scratch%sdispl, fft_scratch%rdispl)
    2701             :       END IF
    2702       44164 :       IF (ASSOCIATED(fft_scratch%rr)) THEN
    2703       24910 :          DEALLOCATE (fft_scratch%rr)
    2704             :       END IF
    2705       44164 :       IF (ASSOCIATED(fft_scratch%xzbuf)) THEN
    2706           0 :          DEALLOCATE (fft_scratch%xzbuf)
    2707             :       END IF
    2708       44164 :       IF (ASSOCIATED(fft_scratch%yzbuf)) THEN
    2709           0 :          DEALLOCATE (fft_scratch%yzbuf)
    2710             :       END IF
    2711       44164 :       IF (ASSOCIATED(fft_scratch%xzbuf_sgl)) THEN
    2712           0 :          DEALLOCATE (fft_scratch%xzbuf_sgl)
    2713             :       END IF
    2714       44164 :       IF (ASSOCIATED(fft_scratch%yzbuf_sgl)) THEN
    2715           0 :          DEALLOCATE (fft_scratch%yzbuf_sgl)
    2716             :       END IF
    2717       44164 :       IF (ASSOCIATED(fft_scratch%ss)) THEN
    2718           8 :          DEALLOCATE (fft_scratch%ss)
    2719             :       END IF
    2720       44164 :       IF (ASSOCIATED(fft_scratch%tt)) THEN
    2721           8 :          DEALLOCATE (fft_scratch%tt)
    2722             :       END IF
    2723       44164 :       IF (ASSOCIATED(fft_scratch%pgrid)) THEN
    2724           0 :          DEALLOCATE (fft_scratch%pgrid)
    2725             :       END IF
    2726       44164 :       IF (ASSOCIATED(fft_scratch%pgcube)) THEN
    2727       24926 :          DEALLOCATE (fft_scratch%pgcube)
    2728             :       END IF
    2729       44164 :       IF (ASSOCIATED(fft_scratch%xcor)) THEN
    2730           0 :          DEALLOCATE (fft_scratch%xcor, fft_scratch%zcor)
    2731             :       END IF
    2732       44164 :       IF (ASSOCIATED(fft_scratch%pzcoord)) THEN
    2733           0 :          DEALLOCATE (fft_scratch%pzcoord)
    2734             :       END IF
    2735       44164 :       IF (ASSOCIATED(fft_scratch%xzcount)) THEN
    2736           0 :          DEALLOCATE (fft_scratch%xzcount, fft_scratch%yzcount)
    2737           0 :          DEALLOCATE (fft_scratch%xzdispl, fft_scratch%yzdispl)
    2738           0 :          fft_scratch%in = 0
    2739           0 :          fft_scratch%rsratio = 1._dp
    2740             :       END IF
    2741       44164 :       IF (ASSOCIATED(fft_scratch%rbuf1)) THEN
    2742           0 :          DEALLOCATE (fft_scratch%rbuf1)
    2743             :       END IF
    2744       44164 :       IF (ASSOCIATED(fft_scratch%rbuf2)) THEN
    2745           0 :          DEALLOCATE (fft_scratch%rbuf2)
    2746             :       END IF
    2747       44164 :       IF (ASSOCIATED(fft_scratch%rbuf3)) THEN
    2748           0 :          DEALLOCATE (fft_scratch%rbuf3)
    2749             :       END IF
    2750       44164 :       IF (ASSOCIATED(fft_scratch%rbuf4)) THEN
    2751           0 :          DEALLOCATE (fft_scratch%rbuf4)
    2752             :       END IF
    2753       44164 :       IF (ASSOCIATED(fft_scratch%rbuf5)) THEN
    2754           8 :          DEALLOCATE (fft_scratch%rbuf5)
    2755             :       END IF
    2756       44164 :       IF (ASSOCIATED(fft_scratch%rbuf6)) THEN
    2757           8 :          DEALLOCATE (fft_scratch%rbuf6)
    2758             :       END IF
    2759             : 
    2760       44164 :       IF (fft_scratch%cart_sub_comm(1) /= mp_comm_null) THEN
    2761           0 :          CALL fft_scratch%cart_sub_comm(1)%free()
    2762             :       END IF
    2763       44164 :       IF (fft_scratch%cart_sub_comm(2) /= mp_comm_null) THEN
    2764           0 :          CALL fft_scratch%cart_sub_comm(2)%free()
    2765             :       END IF
    2766             : 
    2767       44164 :       CALL fft_destroy_plan(fft_scratch%fft_plan(1))
    2768       44164 :       CALL fft_destroy_plan(fft_scratch%fft_plan(2))
    2769       44164 :       CALL fft_destroy_plan(fft_scratch%fft_plan(3))
    2770       44164 :       CALL fft_destroy_plan(fft_scratch%fft_plan(4))
    2771       44164 :       CALL fft_destroy_plan(fft_scratch%fft_plan(5))
    2772       44164 :       CALL fft_destroy_plan(fft_scratch%fft_plan(6))
    2773             : 
    2774       44164 :    END SUBROUTINE deallocate_fft_scratch_type
    2775             : 
    2776             : ! **************************************************************************************************
    2777             : !> \brief ...
    2778             : ! **************************************************************************************************
    2779       26805 :    SUBROUTINE release_fft_scratch_pool()
    2780             : 
    2781             :       TYPE(fft_scratch_pool_type), POINTER               :: fft_scratch, fft_scratch_current
    2782             : 
    2783       26805 :       IF (init_fft_pool == 0) NULLIFY (fft_scratch_first)
    2784             : 
    2785       26805 :       fft_scratch => fft_scratch_first
    2786       42458 :       DO
    2787       69263 :          IF (ASSOCIATED(fft_scratch)) THEN
    2788       42458 :             fft_scratch_current => fft_scratch
    2789       42458 :             fft_scratch => fft_scratch_current%fft_scratch_next
    2790       42458 :             NULLIFY (fft_scratch_current%fft_scratch_next)
    2791             : 
    2792       42458 :             CALL deallocate_fft_scratch_type(fft_scratch_current%fft_scratch)
    2793             : 
    2794      169832 :             DEALLOCATE (fft_scratch_current%fft_scratch)
    2795       42458 :             DEALLOCATE (fft_scratch_current)
    2796             :          ELSE
    2797             :             EXIT
    2798             :          END IF
    2799             :       END DO
    2800             : 
    2801       26805 :       init_fft_pool = 0
    2802             : 
    2803       26805 :    END SUBROUTINE release_fft_scratch_pool
    2804             : 
    2805             : ! **************************************************************************************************
    2806             : !> \brief ...
    2807             : ! **************************************************************************************************
    2808     3230920 :    SUBROUTINE resize_fft_scratch_pool()
    2809             : 
    2810             :       INTEGER                                            :: last_tick, nscratch
    2811             :       TYPE(fft_scratch_pool_type), POINTER               :: fft_scratch_current, fft_scratch_old
    2812             : 
    2813     3230920 :       nscratch = 0
    2814             : 
    2815     3230920 :       last_tick = HUGE(last_tick)
    2816     3230920 :       NULLIFY (fft_scratch_old)
    2817             : 
    2818             :       ! start at the global pool, count, and find a deletion candidate
    2819     3230920 :       fft_scratch_current => fft_scratch_first
    2820    23679378 :       DO
    2821    26910298 :          IF (ASSOCIATED(fft_scratch_current)) THEN
    2822    23679378 :             nscratch = nscratch + 1
    2823             :             ! is this a candidate for deletion (i.e. least recently used, and not in use)
    2824    23679378 :             IF (.NOT. fft_scratch_current%fft_scratch%in_use) THEN
    2825    20448458 :                IF (fft_scratch_current%fft_scratch%last_tick < last_tick) THEN
    2826     4001787 :                   last_tick = fft_scratch_current%fft_scratch%last_tick
    2827     4001787 :                   fft_scratch_old => fft_scratch_current
    2828             :                END IF
    2829             :             END IF
    2830    23679378 :             fft_scratch_current => fft_scratch_current%fft_scratch_next
    2831             :          ELSE
    2832             :             EXIT
    2833             :          END IF
    2834             :       END DO
    2835             : 
    2836             :       ! we should delete a scratch
    2837     3230920 :       IF (nscratch > fft_pool_scratch_limit) THEN
    2838             :          ! note that we never deallocate the first (special) element of the list
    2839        1706 :          IF (ASSOCIATED(fft_scratch_old)) THEN
    2840             :             fft_scratch_current => fft_scratch_first
    2841             :             DO
    2842       29002 :                IF (ASSOCIATED(fft_scratch_current)) THEN
    2843             :                   ! should we delete the next in the list?
    2844       27296 :                   IF (ASSOCIATED(fft_scratch_current%fft_scratch_next, fft_scratch_old)) THEN
    2845             :                      ! fix the linked list
    2846        1706 :                      fft_scratch_current%fft_scratch_next => fft_scratch_old%fft_scratch_next
    2847             : 
    2848             :                      ! deallocate the element
    2849        1706 :                      CALL deallocate_fft_scratch_type(fft_scratch_old%fft_scratch)
    2850        6824 :                      DEALLOCATE (fft_scratch_old%fft_scratch)
    2851        1706 :                      DEALLOCATE (fft_scratch_old)
    2852             : 
    2853             :                   ELSE
    2854             :                      fft_scratch_current => fft_scratch_current%fft_scratch_next
    2855             :                   END IF
    2856             :                ELSE
    2857             :                   EXIT
    2858             :                END IF
    2859             :             END DO
    2860             : 
    2861             :          ELSE
    2862           0 :             CPWARN("The number of the scratches exceeded the limit, but none could be deallocated")
    2863             :          END IF
    2864             :       END IF
    2865             : 
    2866     3230920 :    END SUBROUTINE resize_fft_scratch_pool
    2867             : 
    2868             : ! **************************************************************************************************
    2869             : !> \brief ...
    2870             : !> \param fft_scratch ...
    2871             : !> \param tf_type ...
    2872             : !> \param n ...
    2873             : !> \param fft_sizes ...
    2874             : ! **************************************************************************************************
    2875     3230920 :    SUBROUTINE get_fft_scratch(fft_scratch, tf_type, n, fft_sizes)
    2876             :       TYPE(fft_scratch_type), POINTER          :: fft_scratch
    2877             :       INTEGER, INTENT(IN)                      :: tf_type
    2878             :       INTEGER, DIMENSION(:), INTENT(IN)        :: n
    2879             :       TYPE(fft_scratch_sizes), INTENT(IN), &
    2880             :          OPTIONAL                               :: fft_sizes
    2881             : 
    2882             :       CHARACTER(len=*), PARAMETER :: routineN = 'get_fft_scratch'
    2883             : 
    2884             :       INTEGER :: coord(2), DIM(2), handle, i, ix, iz, lg, lmax, m1, m2, &
    2885             :                  mcx2, mcy3, mcz1, mcz2, mg, mmax, mx1, mx2, my1, my3, mz1, mz2, mz3, &
    2886             :                  nbx, nbz, nm, nmax, nmray, np, nx, ny, nyzray, nz, pos(2)
    2887             :       INTEGER, DIMENSION(3)                    :: pcoord
    2888             :       LOGICAL                                  :: equal
    2889             :       LOGICAL, DIMENSION(2)                    :: dims
    2890             :       TYPE(fft_scratch_pool_type), POINTER     :: fft_scratch_current, &
    2891             :                                                   fft_scratch_last, &
    2892             :                                                   fft_scratch_new
    2893             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    2894             :       INTEGER                   :: ierr
    2895             :       INTEGER(KIND=C_SIZE_T)    :: length
    2896             :       TYPE(C_PTR)               :: cptr_r1buf, cptr_tbuf, &
    2897             :                                    cptr_p2buf, cptr_p3buf, cptr_p4buf, cptr_p5buf, cptr_p7buf
    2898             : #endif
    2899     3230920 :       CALL timeset(routineN, handle)
    2900             : 
    2901             :       ! this is the place to check that the scratch_pool does not grow without limits
    2902             :       ! before we add a new scratch check the size of the pool and release some of the list if needed
    2903     3230920 :       CALL resize_fft_scratch_pool()
    2904             : 
    2905             :       ! get the required scratch
    2906     3230920 :       tick_fft_pool = tick_fft_pool + 1
    2907     3230920 :       fft_scratch_current => fft_scratch_first
    2908             :       DO
    2909    15698363 :          IF (ASSOCIATED(fft_scratch_current)) THEN
    2910    15663204 :             IF (fft_scratch_current%fft_scratch%in_use) THEN
    2911     3230920 :                fft_scratch_last => fft_scratch_current
    2912     3230920 :                fft_scratch_current => fft_scratch_current%fft_scratch_next
    2913     3230920 :                CYCLE
    2914             :             END IF
    2915    12432284 :             IF (tf_type /= fft_scratch_current%fft_scratch%tf_type) THEN
    2916     4460914 :                fft_scratch_last => fft_scratch_current
    2917     4460914 :                fft_scratch_current => fft_scratch_current%fft_scratch_next
    2918     4460914 :                CYCLE
    2919             :             END IF
    2920    18509951 :             IF (.NOT. ALL(n == fft_scratch_current%fft_scratch%nfft)) THEN
    2921     4458939 :                fft_scratch_last => fft_scratch_current
    2922     4458939 :                fft_scratch_current => fft_scratch_current%fft_scratch_next
    2923     4458939 :                CYCLE
    2924             :             END IF
    2925     3512431 :             IF (PRESENT(fft_sizes)) THEN
    2926     2907836 :                IF (fft_sizes%gs_group /= fft_scratch_current%fft_scratch%group) THEN
    2927      312566 :                   fft_scratch_last => fft_scratch_current
    2928      312566 :                   fft_scratch_current => fft_scratch_current%fft_scratch_next
    2929      312566 :                   CYCLE
    2930             :                END IF
    2931     2595270 :                CALL is_equal(fft_sizes, fft_scratch_current%fft_scratch%sizes, equal)
    2932     2595270 :                IF (.NOT. equal) THEN
    2933        4104 :                   fft_scratch_last => fft_scratch_current
    2934        4104 :                   fft_scratch_current => fft_scratch_current%fft_scratch_next
    2935        4104 :                   CYCLE
    2936             :                END IF
    2937             :             END IF
    2938             :             ! Success
    2939     3195761 :             fft_scratch => fft_scratch_current%fft_scratch
    2940     3195761 :             fft_scratch_current%fft_scratch%in_use = .TRUE.
    2941     3195761 :             EXIT
    2942             :          ELSE
    2943             :             ! We cannot find the scratch type in this pool
    2944             :             ! Generate a new scratch set
    2945       35159 :             ALLOCATE (fft_scratch_new)
    2946      914134 :             ALLOCATE (fft_scratch_new%fft_scratch)
    2947             : 
    2948       35159 :             IF (tf_type .NE. 400) THEN
    2949       24926 :                fft_scratch_new%fft_scratch%sizes = fft_sizes
    2950       24926 :                np = fft_sizes%numtask
    2951             :                ALLOCATE (fft_scratch_new%fft_scratch%scount(0:np - 1), fft_scratch_new%fft_scratch%rcount(0:np - 1), &
    2952             :                          fft_scratch_new%fft_scratch%sdispl(0:np - 1), fft_scratch_new%fft_scratch%rdispl(0:np - 1), &
    2953      299112 :                          fft_scratch_new%fft_scratch%pgcube(0:np - 1, 2))
    2954             :             END IF
    2955             : 
    2956           0 :             SELECT CASE (tf_type)
    2957             :             CASE DEFAULT
    2958           0 :                CPABORT("")
    2959             :             CASE (100) ! fft3d_pb: full cube distribution
    2960           0 :                mx1 = fft_sizes%mx1
    2961           0 :                my1 = fft_sizes%my1
    2962           0 :                mx2 = fft_sizes%mx2
    2963           0 :                mz2 = fft_sizes%mz2
    2964           0 :                my3 = fft_sizes%my3
    2965           0 :                mz3 = fft_sizes%mz3
    2966           0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a1buf, [mx1*my1, n(3)])
    2967           0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a2buf, [n(3), mx1*my1])
    2968           0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a3buf, [mx2*mz2, n(2)])
    2969           0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a4buf, [n(2), mx2*mz2])
    2970           0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a5buf, [my3*mz3, n(1)])
    2971           0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a6buf, [n(1), my3*mz3])
    2972           0 :                fft_scratch_new%fft_scratch%group = fft_sizes%gs_group
    2973             : 
    2974           0 :                dim = fft_sizes%rs_group%num_pe_cart
    2975           0 :                pos = fft_sizes%rs_group%mepos_cart
    2976           0 :                fft_scratch_new%fft_scratch%mip = fft_sizes%rs_group%mepos
    2977           0 :                fft_scratch_new%fft_scratch%dim = dim
    2978           0 :                fft_scratch_new%fft_scratch%pos = pos
    2979           0 :                mcz1 = fft_sizes%mcz1
    2980           0 :                mcx2 = fft_sizes%mcx2
    2981           0 :                mcz2 = fft_sizes%mcz2
    2982           0 :                mcy3 = fft_sizes%mcy3
    2983           0 :                ALLOCATE (fft_scratch_new%fft_scratch%rbuf1(mx2*my1*mcz2, 0:DIM(2) - 1))
    2984           0 :                ALLOCATE (fft_scratch_new%fft_scratch%rbuf2(mx1*my1*mcz2, 0:DIM(2) - 1))
    2985           0 :                ALLOCATE (fft_scratch_new%fft_scratch%rbuf3(mx2*mz3*mcy3, 0:DIM(1) - 1))
    2986           0 :                ALLOCATE (fft_scratch_new%fft_scratch%rbuf4(mx2*mz2*mcy3, 0:DIM(1) - 1))
    2987             : 
    2988           0 :                dims = (/.TRUE., .FALSE./)
    2989           0 :                CALL fft_scratch_new%fft_scratch%cart_sub_comm(1)%from_sub(fft_sizes%rs_group, dims)
    2990           0 :                dims = (/.FALSE., .TRUE./)
    2991           0 :                CALL fft_scratch_new%fft_scratch%cart_sub_comm(2)%from_sub(fft_sizes%rs_group, dims)
    2992             : 
    2993             :                !initialise pgcube
    2994           0 :                DO i = 0, DIM(1) - 1
    2995           0 :                   coord = (/i, pos(2)/)
    2996           0 :                   CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgcube(i, 1))
    2997             :                END DO
    2998           0 :                DO i = 0, DIM(2) - 1
    2999           0 :                   coord = (/pos(1), i/)
    3000           0 :                   CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgcube(i, 2))
    3001             :                END DO
    3002             : 
    3003             :                !set up fft plans
    3004             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, FWFFT, .TRUE., n(3), mx1*my1, &
    3005           0 :                                         fft_scratch_new%fft_scratch%a1buf, fft_scratch_new%fft_scratch%a2buf, fft_plan_style)
    3006             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, FWFFT, .TRUE., n(2), mx2*mz2, &
    3007           0 :                                         fft_scratch_new%fft_scratch%a3buf, fft_scratch_new%fft_scratch%a4buf, fft_plan_style)
    3008             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, FWFFT, .TRUE., n(1), my3*mz3, &
    3009           0 :                                         fft_scratch_new%fft_scratch%a5buf, fft_scratch_new%fft_scratch%a6buf, fft_plan_style)
    3010             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, BWFFT, .TRUE., n(1), my3*mz3, &
    3011           0 :                                         fft_scratch_new%fft_scratch%a6buf, fft_scratch_new%fft_scratch%a5buf, fft_plan_style)
    3012             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(5), fft_type, BWFFT, .TRUE., n(2), mx2*mz2, &
    3013           0 :                                         fft_scratch_new%fft_scratch%a4buf, fft_scratch_new%fft_scratch%a3buf, fft_plan_style)
    3014             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(6), fft_type, BWFFT, .TRUE., n(3), mx1*my1, &
    3015           0 :                                         fft_scratch_new%fft_scratch%a2buf, fft_scratch_new%fft_scratch%a1buf, fft_plan_style)
    3016             : 
    3017             :             CASE (101) ! fft3d_pb: full cube distribution (dim 1)
    3018           8 :                mx1 = fft_sizes%mx1
    3019           8 :                my1 = fft_sizes%my1
    3020           8 :                mz1 = fft_sizes%mz1
    3021           8 :                my3 = fft_sizes%my3
    3022           8 :                mz3 = fft_sizes%mz3
    3023          24 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a1buf, [mx1*my1, n(3)])
    3024          24 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a2buf, [n(3), mx1*my1])
    3025           8 :                fft_scratch_new%fft_scratch%group = fft_sizes%gs_group
    3026          24 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a3buf, [mx1*mz1, n(2)])
    3027          24 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a4buf, [n(2), mx1*mz1])
    3028          24 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a5buf, [my3*mz3, n(1)])
    3029          24 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a6buf, [n(1), my3*mz3])
    3030             : 
    3031          24 :                dim = fft_sizes%rs_group%num_pe_cart
    3032          24 :                pos = fft_sizes%rs_group%mepos_cart
    3033           8 :                fft_scratch_new%fft_scratch%mip = fft_sizes%rs_group%mepos
    3034          24 :                fft_scratch_new%fft_scratch%dim = dim
    3035          24 :                fft_scratch_new%fft_scratch%pos = pos
    3036           8 :                mcy3 = fft_sizes%mcy3
    3037          32 :                ALLOCATE (fft_scratch_new%fft_scratch%rbuf5(mx1*mz3*mcy3, 0:DIM(1) - 1))
    3038          32 :                ALLOCATE (fft_scratch_new%fft_scratch%rbuf6(mx1*mz1*mcy3, 0:DIM(1) - 1))
    3039             : 
    3040             :                !set up fft plans
    3041             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, FWFFT, .TRUE., n(3), mx1*my1, &
    3042           8 :                                         fft_scratch_new%fft_scratch%a1buf, fft_scratch_new%fft_scratch%a3buf, fft_plan_style)
    3043             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, FWFFT, .TRUE., n(2), mx1*mz1, &
    3044           8 :                                         fft_scratch_new%fft_scratch%a3buf, fft_scratch_new%fft_scratch%a4buf, fft_plan_style)
    3045             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, FWFFT, .TRUE., n(1), my3*mz3, &
    3046           8 :                                         fft_scratch_new%fft_scratch%a5buf, fft_scratch_new%fft_scratch%a6buf, fft_plan_style)
    3047             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, BWFFT, .TRUE., n(1), my3*mz3, &
    3048           8 :                                         fft_scratch_new%fft_scratch%a6buf, fft_scratch_new%fft_scratch%a5buf, fft_plan_style)
    3049             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(5), fft_type, BWFFT, .TRUE., n(2), mx1*mz1, &
    3050           8 :                                         fft_scratch_new%fft_scratch%a4buf, fft_scratch_new%fft_scratch%a3buf, fft_plan_style)
    3051             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(6), fft_type, BWFFT, .TRUE., n(3), mx1*my1, &
    3052           8 :                                         fft_scratch_new%fft_scratch%a3buf, fft_scratch_new%fft_scratch%a1buf, fft_plan_style)
    3053             : 
    3054             :             CASE (200) ! fft3d_ps: plane distribution
    3055       24918 :                nx = fft_sizes%nx
    3056       24918 :                ny = fft_sizes%ny
    3057       24918 :                nz = fft_sizes%nz
    3058       24918 :                mx2 = fft_sizes%mx2
    3059       24918 :                lmax = fft_sizes%lmax
    3060       24918 :                mmax = fft_sizes%mmax
    3061       24918 :                lg = fft_sizes%lg
    3062       24918 :                mg = fft_sizes%mg
    3063       24918 :                np = fft_sizes%numtask
    3064       24918 :                nmray = fft_sizes%nmray
    3065       24918 :                nyzray = fft_sizes%nyzray
    3066             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    3067             :                length = INT(2*dp_size*MAX(mmax, 1)*MAX(lmax, 1), KIND=C_SIZE_T)
    3068             :                ierr = offload_malloc_pinned_mem(cptr_r1buf, length)
    3069             :                CPASSERT(ierr == 0)
    3070             :                CALL c_f_pointer(cptr_r1buf, fft_scratch_new%fft_scratch%r1buf, (/MAX(mmax, 1), MAX(lmax, 1)/))
    3071             :                length = INT(2*dp_size*MAX(ny, 1)*MAX(nz, 1)*MAX(nx, 1), KIND=C_SIZE_T)
    3072             :                ierr = offload_malloc_pinned_mem(cptr_tbuf, length)
    3073             :                CPASSERT(ierr == 0)
    3074             :                CALL c_f_pointer(cptr_tbuf, fft_scratch_new%fft_scratch%tbuf, (/MAX(ny, 1), MAX(nz, 1), MAX(nx, 1)/))
    3075             : #else
    3076       74754 :                CALL fft_alloc(fft_scratch_new%fft_scratch%r1buf, [mmax, lmax])
    3077       99672 :                CALL fft_alloc(fft_scratch_new%fft_scratch%tbuf, [ny, nz, nx])
    3078             : #endif
    3079       24918 :                fft_scratch_new%fft_scratch%group = fft_sizes%gs_group
    3080       74754 :                CALL fft_alloc(fft_scratch_new%fft_scratch%r2buf, [lg, mg])
    3081       24918 :                nm = nmray*mx2
    3082       24918 :                IF (alltoall_sgl) THEN
    3083          32 :                   ALLOCATE (fft_scratch_new%fft_scratch%ss(mmax, lmax))
    3084          32 :                   ALLOCATE (fft_scratch_new%fft_scratch%tt(nm, 0:np - 1))
    3085             :                ELSE
    3086       99640 :                   ALLOCATE (fft_scratch_new%fft_scratch%rr(nm, 0:np - 1))
    3087             :                END IF
    3088             : 
    3089             :                !set up fft plans
    3090             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, FWFFT, .TRUE., nz, nx*ny, &
    3091       24918 :                                         fft_scratch_new%fft_scratch%tbuf, fft_scratch_new%fft_scratch%r1buf, fft_plan_style)
    3092             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, FWFFT, .TRUE., ny, nx*nz, &
    3093       24918 :                                         fft_scratch_new%fft_scratch%r1buf, fft_scratch_new%fft_scratch%tbuf, fft_plan_style)
    3094             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, FWFFT, .TRUE., n(1), nyzray, &
    3095       24918 :                                         fft_scratch_new%fft_scratch%r1buf, fft_scratch_new%fft_scratch%r2buf, fft_plan_style)
    3096             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, BWFFT, .TRUE., n(1), nyzray, &
    3097       24918 :                                         fft_scratch_new%fft_scratch%r2buf, fft_scratch_new%fft_scratch%r1buf, fft_plan_style)
    3098             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(5), fft_type, BWFFT, .TRUE., ny, nx*nz, &
    3099       24918 :                                         fft_scratch_new%fft_scratch%tbuf, fft_scratch_new%fft_scratch%r1buf, fft_plan_style)
    3100             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(6), fft_type, BWFFT, .TRUE., nz, nx*ny, &
    3101       24918 :                                         fft_scratch_new%fft_scratch%r1buf, fft_scratch_new%fft_scratch%tbuf, fft_plan_style)
    3102             : 
    3103             :             CASE (300) ! fft3d_ps: block distribution
    3104           0 :                mx1 = fft_sizes%mx1
    3105           0 :                mx2 = fft_sizes%mx2
    3106           0 :                my1 = fft_sizes%my1
    3107           0 :                mz2 = fft_sizes%mz2
    3108           0 :                mcx2 = fft_sizes%mcx2
    3109           0 :                lg = fft_sizes%lg
    3110           0 :                mg = fft_sizes%mg
    3111           0 :                nmax = fft_sizes%nmax
    3112           0 :                nmray = fft_sizes%nmray
    3113           0 :                nyzray = fft_sizes%nyzray
    3114           0 :                m1 = fft_sizes%r_dim(1)
    3115           0 :                m2 = fft_sizes%r_dim(2)
    3116           0 :                nbx = fft_sizes%nbx
    3117           0 :                nbz = fft_sizes%nbz
    3118           0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%p1buf, [mx1*my1, n(3)])
    3119           0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%p6buf, [lg, mg])
    3120             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    3121             :                length = INT(2*dp_size*MAX(n(3), 1)*MAX(mx1*my1, 1), KIND=C_SIZE_T)
    3122             :                ierr = offload_malloc_pinned_mem(cptr_p2buf, length)
    3123             :                CPASSERT(ierr == 0)
    3124             :                CALL c_f_pointer(cptr_p2buf, fft_scratch_new%fft_scratch%p2buf, (/MAX(n(3), 1), MAX(mx1*my1, 1)/))
    3125             :                length = INT(2*dp_size*MAX(mx2*mz2, 1)*MAX(n(2), 1), KIND=C_SIZE_T)
    3126             :                ierr = offload_malloc_pinned_mem(cptr_p3buf, length)
    3127             :                CPASSERT(ierr == 0)
    3128             :                CALL c_f_pointer(cptr_p3buf, fft_scratch_new%fft_scratch%p3buf, (/MAX(mx2*mz2, 1), MAX(n(2), 1)/))
    3129             :                length = INT(2*dp_size*MAX(n(2), 1)*MAX(mx2*mz2, 1), KIND=C_SIZE_T)
    3130             :                ierr = offload_malloc_pinned_mem(cptr_p4buf, length)
    3131             :                CPASSERT(ierr == 0)
    3132             :                CALL c_f_pointer(cptr_p4buf, fft_scratch_new%fft_scratch%p4buf, (/MAX(n(2), 1), MAX(mx2*mz2, 1)/))
    3133             :                length = INT(2*dp_size*MAX(nyzray, 1)*MAX(n(1), 1), KIND=C_SIZE_T)
    3134             :                ierr = offload_malloc_pinned_mem(cptr_p5buf, length)
    3135             :                CPASSERT(ierr == 0)
    3136             :                CALL c_f_pointer(cptr_p5buf, fft_scratch_new%fft_scratch%p5buf, (/MAX(nyzray, 1), MAX(n(1), 1)/))
    3137             :                length = INT(2*dp_size*MAX(mg, 1)*MAX(lg, 1), KIND=C_SIZE_T)
    3138             :                ierr = offload_malloc_pinned_mem(cptr_p7buf, length)
    3139             :                CPASSERT(ierr == 0)
    3140             :                CALL c_f_pointer(cptr_p7buf, fft_scratch_new%fft_scratch%p7buf, (/MAX(mg, 1), MAX(lg, 1)/))
    3141             : #else
    3142           0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%p2buf, [n(3), mx1*my1])
    3143           0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%p3buf, [mx2*mz2, n(2)])
    3144           0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%p4buf, [n(2), mx2*mz2])
    3145           0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%p5buf, [nyzray, n(1)])
    3146           0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%p7buf, [mg, lg])
    3147             : #endif
    3148           0 :                IF (alltoall_sgl) THEN
    3149           0 :                   ALLOCATE (fft_scratch_new%fft_scratch%yzbuf_sgl(mg*lg))
    3150           0 :                   ALLOCATE (fft_scratch_new%fft_scratch%xzbuf_sgl(n(2)*mx2*mz2))
    3151             :                ELSE
    3152           0 :                   ALLOCATE (fft_scratch_new%fft_scratch%yzbuf(mg*lg))
    3153           0 :                   ALLOCATE (fft_scratch_new%fft_scratch%xzbuf(n(2)*mx2*mz2))
    3154             :                END IF
    3155           0 :                ALLOCATE (fft_scratch_new%fft_scratch%pgrid(0:m1 - 1, 0:m2 - 1))
    3156           0 :                ALLOCATE (fft_scratch_new%fft_scratch%xcor(nbx))
    3157           0 :                ALLOCATE (fft_scratch_new%fft_scratch%zcor(nbz))
    3158           0 :                ALLOCATE (fft_scratch_new%fft_scratch%pzcoord(0:np - 1))
    3159             :                ALLOCATE (fft_scratch_new%fft_scratch%xzcount(0:np - 1), &
    3160           0 :                          fft_scratch_new%fft_scratch%yzcount(0:np - 1))
    3161             :                ALLOCATE (fft_scratch_new%fft_scratch%xzdispl(0:np - 1), &
    3162           0 :                          fft_scratch_new%fft_scratch%yzdispl(0:np - 1))
    3163           0 :                fft_scratch_new%fft_scratch%group = fft_sizes%gs_group
    3164             : 
    3165           0 :                dim = fft_sizes%rs_group%num_pe_cart
    3166           0 :                pos = fft_sizes%rs_group%mepos_cart
    3167           0 :                fft_scratch_new%fft_scratch%mip = fft_sizes%rs_group%mepos
    3168           0 :                fft_scratch_new%fft_scratch%dim = dim
    3169           0 :                fft_scratch_new%fft_scratch%pos = pos
    3170           0 :                mcz1 = fft_sizes%mcz1
    3171           0 :                mcz2 = fft_sizes%mcz2
    3172           0 :                ALLOCATE (fft_scratch_new%fft_scratch%rbuf1(mx2*my1*mcz2, 0:DIM(2) - 1))
    3173           0 :                ALLOCATE (fft_scratch_new%fft_scratch%rbuf2(mx1*my1*mcz2, 0:DIM(2) - 1))
    3174             : 
    3175           0 :                dims = (/.FALSE., .TRUE./)
    3176           0 :                CALL fft_scratch_new%fft_scratch%cart_sub_comm(2)%from_sub(fft_sizes%rs_group, dims)
    3177             : 
    3178             :                !initialise pgcube
    3179           0 :                DO i = 0, DIM(2) - 1
    3180           0 :                   coord = (/pos(1), i/)
    3181           0 :                   CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgcube(i, 2))
    3182             :                END DO
    3183             : 
    3184             :                !initialise pgrid
    3185           0 :                DO ix = 0, m1 - 1
    3186           0 :                   DO iz = 0, m2 - 1
    3187           0 :                      coord = (/ix, iz/)
    3188           0 :                      CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgrid(ix, iz))
    3189             :                   END DO
    3190             :                END DO
    3191             : 
    3192             :                !initialise pzcoord
    3193           0 :                DO i = 0, np - 1
    3194           0 :                   CALL fft_sizes%rs_group%coords(i, pcoord)
    3195           0 :                   fft_scratch_new%fft_scratch%pzcoord(i) = pcoord(2)
    3196             :                END DO
    3197             : 
    3198             :                !set up fft plans
    3199             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, FWFFT, .TRUE., n(3), mx1*my1, &
    3200           0 :                                         fft_scratch_new%fft_scratch%p1buf, fft_scratch_new%fft_scratch%p2buf, fft_plan_style)
    3201             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, FWFFT, .TRUE., n(2), mx2*mz2, &
    3202           0 :                                         fft_scratch_new%fft_scratch%p3buf, fft_scratch_new%fft_scratch%p4buf, fft_plan_style)
    3203             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, FWFFT, .TRUE., n(1), nyzray, &
    3204           0 :                                         fft_scratch_new%fft_scratch%p5buf, fft_scratch_new%fft_scratch%p6buf, fft_plan_style)
    3205             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, BWFFT, .TRUE., n(1), nyzray, &
    3206           0 :                                         fft_scratch_new%fft_scratch%p6buf, fft_scratch_new%fft_scratch%p7buf, fft_plan_style)
    3207             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(5), fft_type, BWFFT, .TRUE., n(2), mx2*mz2, &
    3208           0 :                                         fft_scratch_new%fft_scratch%p4buf, fft_scratch_new%fft_scratch%p3buf, fft_plan_style)
    3209             :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(6), fft_type, BWFFT, .TRUE., n(3), mx1*my1, &
    3210           0 :                                         fft_scratch_new%fft_scratch%p3buf, fft_scratch_new%fft_scratch%p1buf, fft_plan_style)
    3211             : 
    3212             :             CASE (400) ! serial FFT
    3213       10233 :                np = 0
    3214       10233 :                CALL fft_alloc(fft_scratch_new%fft_scratch%ziptr, n)
    3215       10233 :                CALL fft_alloc(fft_scratch_new%fft_scratch%zoptr, n)
    3216             : 
    3217             :                !in place plans
    3218             :                CALL fft_create_plan_3d(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, .TRUE., FWFFT, n, &
    3219       10233 :                                        fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%ziptr, fft_plan_style)
    3220             :                CALL fft_create_plan_3d(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, .TRUE., BWFFT, n, &
    3221       10233 :                                        fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%ziptr, fft_plan_style)
    3222             :                ! out of place plans
    3223             :                CALL fft_create_plan_3d(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, .FALSE., FWFFT, n, &
    3224       10233 :                                        fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%zoptr, fft_plan_style)
    3225             :                CALL fft_create_plan_3d(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, .FALSE., BWFFT, n, &
    3226       45392 :                                        fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%zoptr, fft_plan_style)
    3227             : 
    3228             :             END SELECT
    3229             : 
    3230       35159 :             NULLIFY (fft_scratch_new%fft_scratch_next)
    3231             :             fft_scratch_new%fft_scratch%fft_scratch_id = &
    3232       35159 :                fft_scratch_last%fft_scratch%fft_scratch_id + 1
    3233       35159 :             fft_scratch_new%fft_scratch%in_use = .TRUE.
    3234      140636 :             fft_scratch_new%fft_scratch%nfft = n
    3235       35159 :             fft_scratch_last%fft_scratch_next => fft_scratch_new
    3236       35159 :             fft_scratch_new%fft_scratch%tf_type = tf_type
    3237       35159 :             fft_scratch => fft_scratch_new%fft_scratch
    3238       35159 :             EXIT
    3239             : 
    3240             :          END IF
    3241             :       END DO
    3242             : 
    3243     3230920 :       fft_scratch%last_tick = tick_fft_pool
    3244             : 
    3245     3230920 :       CALL timestop(handle)
    3246             : 
    3247     3230920 :    END SUBROUTINE get_fft_scratch
    3248             : 
    3249             : ! **************************************************************************************************
    3250             : !> \brief ...
    3251             : !> \param fft_scratch ...
    3252             : ! **************************************************************************************************
    3253     3230920 :    SUBROUTINE release_fft_scratch(fft_scratch)
    3254             : 
    3255             :       TYPE(fft_scratch_type), POINTER                    :: fft_scratch
    3256             : 
    3257             :       INTEGER                                            :: scratch_id
    3258             :       TYPE(fft_scratch_pool_type), POINTER               :: fft_scratch_current
    3259             : 
    3260     3230920 :       scratch_id = fft_scratch%fft_scratch_id
    3261             : 
    3262     3230920 :       fft_scratch_current => fft_scratch_first
    3263    12467443 :       DO
    3264    15698363 :          IF (ASSOCIATED(fft_scratch_current)) THEN
    3265    15698363 :             IF (scratch_id == fft_scratch_current%fft_scratch%fft_scratch_id) THEN
    3266     3230920 :                fft_scratch%in_use = .FALSE.
    3267     3230920 :                NULLIFY (fft_scratch)
    3268     3230920 :                EXIT
    3269             :             END IF
    3270    12467443 :             fft_scratch_current => fft_scratch_current%fft_scratch_next
    3271             :          ELSE
    3272             :             ! We cannot find the scratch type in this pool
    3273           0 :             CPABORT("")
    3274           0 :             EXIT
    3275             :          END IF
    3276             :       END DO
    3277             : 
    3278     3230920 :    END SUBROUTINE release_fft_scratch
    3279             : 
    3280             : ! **************************************************************************************************
    3281             : !> \brief ...
    3282             : !> \param rs ...
    3283             : !> \param scount ...
    3284             : !> \param sdispl ...
    3285             : !> \param rq ...
    3286             : !> \param rcount ...
    3287             : !> \param rdispl ...
    3288             : !> \param group ...
    3289             : ! **************************************************************************************************
    3290           0 :    SUBROUTINE sparse_alltoall(rs, scount, sdispl, rq, rcount, rdispl, group)
    3291             :       COMPLEX(KIND=dp), DIMENSION(:), POINTER            :: rs
    3292             :       INTEGER, DIMENSION(:), POINTER                     :: scount, sdispl
    3293             :       COMPLEX(KIND=dp), DIMENSION(:), POINTER            :: rq
    3294             :       INTEGER, DIMENSION(:), POINTER                     :: rcount, rdispl
    3295             : 
    3296             :       CLASS(mp_comm_type), INTENT(IN)                     :: group
    3297             : 
    3298           0 :       COMPLEX(KIND=dp), DIMENSION(:), POINTER            :: msgin, msgout
    3299             :       INTEGER                                            :: ip, n, nr, ns, pos
    3300           0 :       TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:)   :: rreq, sreq
    3301             : 
    3302           0 :       CALL group%sync()
    3303           0 :       n = group%num_pe
    3304           0 :       pos = group%mepos
    3305           0 :       ALLOCATE (sreq(0:n - 1))
    3306           0 :       ALLOCATE (rreq(0:n - 1))
    3307           0 :       nr = 0
    3308           0 :       DO ip = 0, n - 1
    3309           0 :          IF (rcount(ip) == 0) CYCLE
    3310           0 :          IF (ip == pos) CYCLE
    3311           0 :          msgout => rq(rdispl(ip) + 1:rdispl(ip) + rcount(ip))
    3312           0 :          CALL group%irecv(msgout, ip, rreq(nr))
    3313           0 :          nr = nr + 1
    3314             :       END DO
    3315           0 :       ns = 0
    3316           0 :       DO ip = 0, n - 1
    3317           0 :          IF (scount(ip) == 0) CYCLE
    3318           0 :          IF (ip == pos) CYCLE
    3319           0 :          msgin => rs(sdispl(ip) + 1:sdispl(ip) + scount(ip))
    3320           0 :          CALL group%isend(msgin, ip, sreq(ns))
    3321           0 :          ns = ns + 1
    3322             :       END DO
    3323           0 :       IF (rcount(pos) /= 0) THEN
    3324           0 :          IF (rcount(pos) /= scount(pos)) CPABORT("")
    3325           0 :          rq(rdispl(pos) + 1:rdispl(pos) + rcount(pos)) = rs(sdispl(pos) + 1:sdispl(pos) + scount(pos))
    3326             :       END IF
    3327           0 :       CALL mp_waitall(sreq(0:ns - 1))
    3328           0 :       CALL mp_waitall(rreq(0:nr - 1))
    3329           0 :       DEALLOCATE (sreq)
    3330           0 :       DEALLOCATE (rreq)
    3331           0 :       CALL group%sync()
    3332             : 
    3333           0 :    END SUBROUTINE sparse_alltoall
    3334             : 
    3335             : ! **************************************************************************************************
    3336             : !> \brief  test data structures for equality. It is assumed that if they are
    3337             : !>         different for one mpi task they are different for all (??)
    3338             : !> \param fft_size_1 ...
    3339             : !> \param fft_size_2 ...
    3340             : !> \param equal ...
    3341             : ! **************************************************************************************************
    3342     2595270 :    SUBROUTINE is_equal(fft_size_1, fft_size_2, equal)
    3343             :       TYPE(fft_scratch_sizes)                            :: fft_size_1, fft_size_2
    3344             :       LOGICAL                                            :: equal
    3345             : 
    3346             :       equal = .TRUE.
    3347             : 
    3348     2595270 :       equal = equal .AND. fft_size_1%nx == fft_size_2%nx
    3349     2595270 :       equal = equal .AND. fft_size_1%ny == fft_size_2%ny
    3350     2595270 :       equal = equal .AND. fft_size_1%nz == fft_size_2%nz
    3351             : 
    3352     2595270 :       equal = equal .AND. fft_size_1%lmax == fft_size_2%lmax
    3353     2595270 :       equal = equal .AND. fft_size_1%mmax == fft_size_2%mmax
    3354     2595270 :       equal = equal .AND. fft_size_1%nmax == fft_size_2%nmax
    3355             : 
    3356     2595270 :       equal = equal .AND. fft_size_1%mx1 == fft_size_2%mx1
    3357     2595270 :       equal = equal .AND. fft_size_1%mx2 == fft_size_2%mx2
    3358     2595270 :       equal = equal .AND. fft_size_1%mx3 == fft_size_2%mx3
    3359             : 
    3360     2595270 :       equal = equal .AND. fft_size_1%my1 == fft_size_2%my1
    3361     2595270 :       equal = equal .AND. fft_size_1%my2 == fft_size_2%my2
    3362     2595270 :       equal = equal .AND. fft_size_1%my3 == fft_size_2%my3
    3363             : 
    3364     2595270 :       equal = equal .AND. fft_size_1%mcz1 == fft_size_2%mcz1
    3365     2595270 :       equal = equal .AND. fft_size_1%mcx2 == fft_size_2%mcx2
    3366     2595270 :       equal = equal .AND. fft_size_1%mcz2 == fft_size_2%mcz2
    3367     2595270 :       equal = equal .AND. fft_size_1%mcy3 == fft_size_2%mcy3
    3368             : 
    3369     2595270 :       equal = equal .AND. fft_size_1%lg == fft_size_2%lg
    3370     2595270 :       equal = equal .AND. fft_size_1%mg == fft_size_2%mg
    3371             : 
    3372     2595270 :       equal = equal .AND. fft_size_1%nbx == fft_size_2%nbx
    3373     2595270 :       equal = equal .AND. fft_size_1%nbz == fft_size_2%nbz
    3374             : 
    3375     2595270 :       equal = equal .AND. fft_size_1%nmray == fft_size_2%nmray
    3376     2595270 :       equal = equal .AND. fft_size_1%nyzray == fft_size_2%nyzray
    3377             : 
    3378     2595270 :       equal = equal .AND. fft_size_1%gs_group == fft_size_2%gs_group
    3379     2595270 :       equal = equal .AND. fft_size_1%rs_group == fft_size_2%rs_group
    3380             : 
    3381     7785810 :       equal = equal .AND. ALL(fft_size_1%g_pos == fft_size_2%g_pos)
    3382     7785810 :       equal = equal .AND. ALL(fft_size_1%r_pos == fft_size_2%r_pos)
    3383     7785810 :       equal = equal .AND. ALL(fft_size_1%r_dim == fft_size_2%r_dim)
    3384             : 
    3385     2595270 :       equal = equal .AND. fft_size_1%numtask == fft_size_2%numtask
    3386             : 
    3387     2595270 :    END SUBROUTINE is_equal
    3388             : 
    3389           0 : END MODULE fft_tools

Generated by: LCOV version 1.15