LCOV - code coverage report
Current view: top level - src/pw - fft_tools.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:936074a) Lines: 52.2 % 1360 710
Test Date: 2025-12-04 06:27:48 Functions: 54.8 % 31 17

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \par History
      10              : !>      JGH (30-Nov-2000): ESSL FFT Library added
      11              : !>      JGH (05-Jan-2001): Added SGI library FFT
      12              : !>      JGH (14-Jan-2001): Added parallel 3d FFT
      13              : !>      JGH (10-Feb-2006): New interface type
      14              : !>      JGH (31-Mar-2008): Remove local allocates and reshapes (performance)
      15              : !>                         Possible problems can be related with setting arrays
      16              : !>                         not to zero
      17              : !>                         Some interfaces could be further simplified by avoiding
      18              : !>                         an initial copy. However, this assumes contiguous arrays
      19              : !>      IAB (15-Oct-2008): Moved mp_cart_sub calls out of cube_tranpose_* and into
      20              : !>                         fft_scratch type, reducing number of calls dramatically
      21              : !>      IAB (05-Dec-2008): Moved all other non-essential MPI calls into scratch type
      22              : !>      IAB (09-Jan-2009): Added fft_plan_type to store FFT data, including cached FFTW plans
      23              : !>      IAB (13-Feb-2009): Extended plan caching to serial 3D FFT (fft3d_s)
      24              : !>      IAB (09-Oct-2009): Added OpenMP directives to parallel 3D FFT
      25              : !>                         (c) The Numerical Algorithms Group (NAG) Ltd, 2008-2009 on behalf of the HECToR project
      26              : !>      HFP (17-Oct-2024): Thread-safety insurance (CPASSERT), and OMP ATOMIC (tick_fft_pool)
      27              : !> \author JGH
      28              : ! **************************************************************************************************
      29              : MODULE fft_tools
      30              :    USE ISO_C_BINDING,                   ONLY: C_F_POINTER,&
      31              :                                               C_LOC,&
      32              :                                               C_PTR,&
      33              :                                               C_SIZE_T
      34              :    USE cp_log_handling,                 ONLY: cp_logger_get_default_io_unit
      35              :    USE fft_lib,                         ONLY: &
      36              :         fft_1dm, fft_3d, fft_alloc, fft_create_plan_1dm, fft_create_plan_3d, fft_dealloc, &
      37              :         fft_destroy_plan, fft_do_cleanup, fft_do_init, fft_get_lengths, fft_library
      38              :    USE fft_plan,                        ONLY: fft_plan_type
      39              :    USE kinds,                           ONLY: dp,&
      40              :                                               dp_size,&
      41              :                                               sp
      42              :    USE mathconstants,                   ONLY: z_zero
      43              :    USE message_passing,                 ONLY: mp_cart_type,&
      44              :                                               mp_comm_null,&
      45              :                                               mp_comm_type,&
      46              :                                               mp_request_type,&
      47              :                                               mp_waitall
      48              :    USE offload_api,                     ONLY: offload_free_pinned_mem,&
      49              :                                               offload_malloc_pinned_mem
      50              : 
      51              : !$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_in_parallel
      52              : 
      53              : #include "../base/base_uses.f90"
      54              : 
      55              :    IMPLICIT NONE
      56              : 
      57              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'fft_tools'
      58              : 
      59              :    ! Types for the pool of scratch data needed in FFT routines
      60              :    ! keep the subroutine "is_equal" up-to-date
      61              :    ! needs a default initialization
      62              :    TYPE fft_scratch_sizes
      63              :       INTEGER                              :: nx = 0, ny = 0, nz = 0
      64              :       INTEGER                              :: lmax = 0, mmax = 0, nmax = 0
      65              :       INTEGER                              :: mx1 = 0, mx2 = 0, mx3 = 0
      66              :       INTEGER                              :: my1 = 0, my2 = 0, my3 = 0
      67              :       INTEGER                              :: mz1 = 0, mz2 = 0, mz3 = 0
      68              :       INTEGER                              :: mcz1 = 0, mcz2 = 0, mcy3 = 0, mcx2 = 0
      69              :       INTEGER                              :: lg = 0, mg = 0
      70              :       INTEGER                              :: nbx = 0, nbz = 0
      71              :       INTEGER                              :: nmray = 0, nyzray = 0
      72              :       TYPE(mp_cart_type)                   :: rs_group = mp_cart_type()
      73              :       INTEGER, DIMENSION(2)                :: g_pos = 0, r_pos = 0, r_dim = 0
      74              :       INTEGER                              :: numtask = 0
      75              :    END TYPE fft_scratch_sizes
      76              : 
      77              :    TYPE fft_scratch_type
      78              :       INTEGER                              :: fft_scratch_id = 0
      79              :       INTEGER                              :: tf_type = -1
      80              :       LOGICAL                              :: in_use = .TRUE.
      81              :       TYPE(mp_comm_type)                   :: group = mp_comm_type()
      82              :       INTEGER, DIMENSION(3)                :: nfft = -1
      83              :       ! to be used in cube_transpose_* routines
      84              :       TYPE(mp_cart_type), DIMENSION(2)     :: cart_sub_comm = mp_cart_type()
      85              :       INTEGER, DIMENSION(2)                :: dim = -1, pos = -1
      86              :       ! to be used in fft3d_s
      87              :       COMPLEX(KIND=dp), DIMENSION(:, :, :), POINTER, CONTIGUOUS &
      88              :          :: ziptr => NULL(), zoptr => NULL()
      89              :       ! to be used in fft3d_ps : block distribution
      90              :       COMPLEX(KIND=dp), DIMENSION(:, :), CONTIGUOUS, POINTER &
      91              :          :: p1buf => NULL(), p2buf => NULL(), p3buf => NULL(), p4buf => NULL(), &
      92              :             p5buf => NULL(), p6buf => NULL(), p7buf => NULL()
      93              :       ! to be used in fft3d_ps : plane distribution
      94              :       COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS &
      95              :          :: r1buf => NULL(), r2buf => NULL()
      96              :       COMPLEX(KIND=dp), DIMENSION(:, :, :), POINTER, CONTIGUOUS &
      97              :          :: tbuf => NULL()
      98              :       ! to be used in fft3d_pb
      99              :       COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS &
     100              :          :: a1buf => NULL(), a2buf => NULL(), a3buf => NULL(), &
     101              :             a4buf => NULL(), a5buf => NULL(), a6buf => NULL()
     102              :       ! to be used in communication routines
     103              :       INTEGER, DIMENSION(:), CONTIGUOUS, POINTER    :: scount => NULL(), rcount => NULL(), sdispl => NULL(), rdispl => NULL()
     104              :       INTEGER, DIMENSION(:, :), CONTIGUOUS, POINTER :: pgcube => NULL()
     105              :       INTEGER, DIMENSION(:), CONTIGUOUS, POINTER    :: xzcount => NULL(), yzcount => NULL(), xzdispl => NULL(), yzdispl => NULL()
     106              :       INTEGER                                       :: in = 0, mip = -1
     107              :       REAL(KIND=dp)                                 :: rsratio = 1.0_dp
     108              :       COMPLEX(KIND=dp), DIMENSION(:), POINTER, CONTIGUOUS &
     109              :          :: xzbuf => NULL(), yzbuf => NULL()
     110              :       COMPLEX(KIND=sp), DIMENSION(:), POINTER, CONTIGUOUS &
     111              :          :: xzbuf_sgl => NULL(), yzbuf_sgl => NULL()
     112              :       COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS &
     113              :          :: rbuf1 => NULL(), rbuf2 => NULL(), rbuf3 => NULL(), rbuf4 => NULL(), &
     114              :             rbuf5 => NULL(), rbuf6 => NULL(), rr => NULL()
     115              :       COMPLEX(KIND=sp), DIMENSION(:, :), POINTER, CONTIGUOUS &
     116              :          :: ss => NULL(), tt => NULL()
     117              :       INTEGER, DIMENSION(:, :), POINTER, CONTIGUOUS :: pgrid => NULL()
     118              :       INTEGER, DIMENSION(:), POINTER, CONTIGUOUS    :: xcor => NULL(), zcor => NULL(), pzcoord => NULL()
     119              :       TYPE(fft_scratch_sizes)                       :: sizes = fft_scratch_sizes()
     120              :       TYPE(fft_plan_type), DIMENSION(6)             :: fft_plan = fft_plan_type()
     121              :       INTEGER                                       :: last_tick = -1
     122              :    END TYPE fft_scratch_type
     123              : 
     124              :    TYPE fft_scratch_pool_type
     125              :       TYPE(fft_scratch_type), POINTER       :: fft_scratch => NULL()
     126              :       TYPE(fft_scratch_pool_type), POINTER  :: fft_scratch_next => NULL()
     127              :    END TYPE fft_scratch_pool_type
     128              : 
     129              :    INTEGER, SAVE                              :: init_fft_pool = 0
     130              :    ! the clock for fft pool. Allows to identify the least recently used scratch
     131              :    INTEGER, SAVE                              :: tick_fft_pool = 0
     132              :    ! limit the number of scratch pools to fft_pool_scratch_limit.
     133              :    INTEGER, SAVE                              :: fft_pool_scratch_limit = 15
     134              :    TYPE(fft_scratch_pool_type), POINTER, SAVE :: fft_scratch_first
     135              :    ! END of types for the pool of scratch data needed in FFT routines
     136              : 
     137              :    PRIVATE
     138              :    PUBLIC :: init_fft, fft3d, finalize_fft
     139              :    PUBLIC :: fft_alloc, fft_dealloc
     140              :    PUBLIC :: init_fft_scratch_pool
     141              :    PUBLIC :: fft_radix_operations, fft_fw1d
     142              :    PUBLIC :: FWFFT, BWFFT
     143              :    PUBLIC :: FFT_RADIX_CLOSEST, FFT_RADIX_NEXT
     144              :    PUBLIC :: FFT_RADIX_NEXT_ODD
     145              : 
     146              :    INTEGER, PARAMETER :: FWFFT = +1, BWFFT = -1
     147              :    INTEGER, PARAMETER :: FFT_RADIX_CLOSEST = 493, FFT_RADIX_NEXT = 494
     148              :    INTEGER, PARAMETER :: FFT_RADIX_ALLOWED = 495, FFT_RADIX_DISALLOWED = 496
     149              :    INTEGER, PARAMETER :: FFT_RADIX_NEXT_ODD = 497
     150              : 
     151              :    REAL(KIND=dp), PARAMETER :: ratio_sparse_alltoall = 0.5_dp
     152              : 
     153              :    ! these saved variables are FFT globals
     154              :    INTEGER, SAVE :: fft_type = 0
     155              :    LOGICAL, SAVE :: alltoall_sgl = .FALSE.
     156              :    LOGICAL, SAVE :: use_fftsg_sizes = .TRUE.
     157              :    INTEGER, SAVE :: fft_plan_style = 1
     158              : 
     159              :    ! these are only needed for pw_gpu (-D__OFFLOAD)
     160              :    PUBLIC :: get_fft_scratch, release_fft_scratch
     161              :    PUBLIC :: cube_transpose_1, cube_transpose_2
     162              :    PUBLIC :: yz_to_x, x_to_yz, xz_to_yz, yz_to_xz
     163              :    PUBLIC :: fft_scratch_sizes, fft_scratch_type
     164              :    PUBLIC :: fft_type, fft_plan_style
     165              : 
     166              :    INTERFACE fft3d
     167              :       MODULE PROCEDURE fft3d_s, fft3d_ps, fft3d_pb
     168              :    END INTERFACE
     169              : 
     170              : ! **************************************************************************************************
     171              : 
     172              : CONTAINS
     173              : 
     174              : ! **************************************************************************************************
     175              : !> \brief ...
     176              : !> \param fftlib ...
     177              : !> \param alltoall ...
     178              : !> \param fftsg_sizes ...
     179              : !> \param pool_limit ...
     180              : !> \param wisdom_file ...
     181              : !> \param plan_style ...
     182              : !> \author JGH
     183              : ! **************************************************************************************************
     184         9897 :    SUBROUTINE init_fft(fftlib, alltoall, fftsg_sizes, pool_limit, wisdom_file, &
     185              :                        plan_style)
     186              : 
     187              :       CHARACTER(LEN=*), INTENT(IN)                       :: fftlib
     188              :       LOGICAL, INTENT(IN)                                :: alltoall, fftsg_sizes
     189              :       INTEGER, INTENT(IN)                                :: pool_limit
     190              :       CHARACTER(LEN=*), INTENT(IN)                       :: wisdom_file
     191              :       INTEGER, INTENT(IN)                                :: plan_style
     192              : 
     193         9897 :       use_fftsg_sizes = fftsg_sizes
     194         9897 :       alltoall_sgl = alltoall
     195         9897 :       fft_pool_scratch_limit = pool_limit
     196         9897 :       fft_type = fft_library(fftlib)
     197         9897 :       fft_plan_style = plan_style
     198              : 
     199         9897 :       IF (fft_type <= 0) CPABORT("Unknown FFT library: "//TRIM(fftlib))
     200              : 
     201         9897 :       CALL fft_do_init(fft_type, wisdom_file)
     202              : 
     203              :       ! setup the FFT scratch pool, if one is associated, clear first
     204         9897 :       CALL init_fft_scratch_pool() ! CALLs release_fft_scratch_pool()
     205              : 
     206         9897 :    END SUBROUTINE init_fft
     207              : 
     208              : ! **************************************************************************************************
     209              : !> \brief does whatever is needed to finalize the current fft setup
     210              : !> \param para_env ...
     211              : !> \param wisdom_file ...
     212              : !> \par History
     213              : !>      10.2007 created [Joost VandeVondele]
     214              : ! **************************************************************************************************
     215         9687 :    SUBROUTINE finalize_fft(para_env, wisdom_file)
     216              :       CLASS(mp_comm_type)                    :: para_env
     217              :       CHARACTER(LEN=*), INTENT(IN)           :: wisdom_file
     218              : 
     219              : ! release the FFT scratch pool
     220              : 
     221         9687 :       CALL release_fft_scratch_pool()
     222              : 
     223              :       ! finalize fft libs
     224              : 
     225         9687 :       CALL fft_do_cleanup(fft_type, wisdom_file, para_env%is_source())
     226              : 
     227         9687 :    END SUBROUTINE finalize_fft
     228              : 
     229              : ! **************************************************************************************************
     230              : !> \brief Determine the allowed lengths of FFT's   '''
     231              : !> \param radix_in ...
     232              : !> \param radix_out ...
     233              : !> \param operation ...
     234              : !> \par History
     235              : !>      new library structure (JGH)
     236              : !> \author Ari Seitsonen
     237              : ! **************************************************************************************************
     238       115694 :    SUBROUTINE fft_radix_operations(radix_in, radix_out, operation)
     239              : 
     240              :       INTEGER, INTENT(IN)                                :: radix_in
     241              :       INTEGER, INTENT(OUT)                               :: radix_out
     242              :       INTEGER, INTENT(IN)                                :: operation
     243              : 
     244              :       INTEGER, PARAMETER                                 :: fft_type_sg = 1
     245              : 
     246              :       INTEGER                                            :: i, iloc, ldata
     247       115694 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: DATA
     248              : 
     249       115694 :       ldata = 1024
     250       115694 :       ALLOCATE (DATA(ldata))
     251    118586350 :       DATA = -1
     252              : 
     253              :       ! if the user wants to use fftsg sizes go for it
     254       115694 :       IF (use_fftsg_sizes) THEN
     255       115166 :          CALL fft_get_lengths(fft_type_sg, DATA, ldata)
     256              :       ELSE
     257          528 :          CALL fft_get_lengths(fft_type, DATA, ldata)
     258              :       END IF
     259              : 
     260       115694 :       iloc = 0
     261      1271136 :       DO i = 1, ldata
     262      1271136 :          IF (DATA(i) == radix_in) THEN
     263              :             iloc = i
     264              :             EXIT
     265              :          ELSE
     266      1241134 :             IF (OPERATION == FFT_RADIX_ALLOWED) THEN
     267              :                CYCLE
     268      1241134 :             ELSE IF (DATA(i) > radix_in) THEN
     269              :                iloc = i
     270              :                EXIT
     271              :             END IF
     272              :          END IF
     273              :       END DO
     274              : 
     275       115694 :       IF (iloc == 0) THEN
     276            0 :          CPABORT("Index to radix array not found.")
     277              :       END IF
     278              : 
     279       115694 :       IF (OPERATION == FFT_RADIX_ALLOWED) THEN
     280            0 :          IF (DATA(iloc) == radix_in) THEN
     281            0 :             radix_out = FFT_RADIX_ALLOWED
     282              :          ELSE
     283            0 :             radix_out = FFT_RADIX_DISALLOWED
     284              :          END IF
     285              : 
     286       115694 :       ELSE IF (OPERATION == FFT_RADIX_CLOSEST) THEN
     287          270 :          IF (DATA(iloc) == radix_in) THEN
     288          102 :             radix_out = DATA(iloc)
     289              :          ELSE
     290          168 :             IF (ABS(DATA(iloc - 1) - radix_in) <= &
     291              :                 ABS(DATA(iloc) - radix_in)) THEN
     292          162 :                radix_out = DATA(iloc - 1)
     293              :             ELSE
     294            6 :                radix_out = DATA(iloc)
     295              :             END IF
     296              :          END IF
     297              : 
     298       115424 :       ELSE IF (OPERATION == FFT_RADIX_NEXT) THEN
     299       113762 :          radix_out = DATA(iloc)
     300              : 
     301         1662 :       ELSE IF (OPERATION == FFT_RADIX_NEXT_ODD) THEN
     302         3788 :          DO i = iloc, ldata
     303         3788 :             IF (MOD(DATA(i), 2) == 1) THEN
     304         1662 :                radix_out = DATA(i)
     305         1662 :                EXIT
     306              :             END IF
     307              :          END DO
     308         1662 :          IF (MOD(radix_out, 2) == 0) THEN
     309            0 :             CPABORT("No odd radix found.")
     310              :          END IF
     311              : 
     312              :       ELSE
     313            0 :          CPABORT("Disallowed radix operation.")
     314              :       END IF
     315              : 
     316       115694 :       DEALLOCATE (DATA)
     317              : 
     318       115694 :    END SUBROUTINE fft_radix_operations
     319              : 
     320              : ! **************************************************************************************************
     321              : !> \brief Performs m 1-D forward FFT-s of size n.
     322              : !> \param n      size of the FFT
     323              : !> \param m      number of FFT-s
     324              : !> \param trans  shape of input and output arrays: [n x m] if it is FALSE, [m x n] if it is TRUE
     325              : !> \param zin    input array
     326              : !> \param zout   output array
     327              : !> \param scale  scaling factor
     328              : !> \param stat   status of the operation, non-zero code indicates an error
     329              : ! **************************************************************************************************
     330            0 :    SUBROUTINE fft_fw1d(n, m, trans, zin, zout, scale, stat)
     331              :       INTEGER, INTENT(in)                                :: n, m
     332              :       LOGICAL, INTENT(in)                                :: trans
     333              :       COMPLEX(kind=dp), DIMENSION(*), INTENT(inout)      :: zin, zout
     334              :       REAL(kind=dp), INTENT(in)                          :: scale
     335              :       INTEGER, INTENT(out)                               :: stat
     336              : 
     337              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'fft_fw1d'
     338              : 
     339              :       INTEGER                                            :: handle
     340              :       TYPE(fft_plan_type)                                :: fft_plan
     341              : 
     342            0 :       CALL timeset(routineN, handle)
     343              : 
     344            0 :       IF (fft_type == 3) THEN
     345            0 :          CALL fft_create_plan_1dm(fft_plan, fft_type, FWFFT, trans, n, m, zin, zout, fft_plan_style)
     346            0 :          CALL fft_1dm(fft_plan, zin, zout, scale, stat)
     347            0 :          CALL fft_destroy_plan(fft_plan)
     348              :       ELSE
     349              :          CALL cp_warn(__LOCATION__, &
     350            0 :                       "FFT library in use cannot handle transformation of an arbitrary length.")
     351            0 :          stat = 1
     352              :       END IF
     353              : 
     354            0 :       CALL timestop(handle)
     355            0 :    END SUBROUTINE fft_fw1d
     356              : 
     357              : ! **************************************************************************************************
     358              : !> \brief Calls the 3D-FFT function from the initialized library
     359              : !> \param fsign ...
     360              : !> \param n ...
     361              : !> \param zin ...
     362              : !> \param zout ...
     363              : !> \param status ...
     364              : !> \param debug ...
     365              : !> \par History
     366              : !>      none
     367              : !> \author JGH
     368              : ! **************************************************************************************************
     369       653108 :    SUBROUTINE fft3d_s(fsign, n, zin, zout, status, debug)
     370              : 
     371              :       INTEGER, INTENT(IN)                                :: fsign
     372              :       INTEGER, DIMENSION(:), INTENT(INOUT)               :: n
     373              :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
     374              :          INTENT(INOUT)                                   :: zin
     375              :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
     376              :          INTENT(INOUT), OPTIONAL, TARGET                 :: zout
     377              :       INTEGER, INTENT(OUT), OPTIONAL                     :: status
     378              :       LOGICAL, INTENT(IN), OPTIONAL                      :: debug
     379              : 
     380              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'fft3d_s'
     381              : 
     382              :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
     383       653108 :          POINTER                                         :: zoptr
     384              :       COMPLEX(KIND=dp), DIMENSION(1, 1, 1), TARGET       :: zdum
     385              :       INTEGER                                            :: handle, ld(3), lo(3), output_unit, sign, &
     386              :                                                             stat
     387              :       LOGICAL                                            :: fft_in_place, test
     388              :       REAL(KIND=dp)                                      :: in_sum, norm, out_sum
     389              :       TYPE(fft_scratch_type), POINTER                    :: fft_scratch
     390              : 
     391       653108 :       CALL timeset(routineN, handle)
     392       653108 :       output_unit = cp_logger_get_default_io_unit()
     393              : 
     394       653108 :       IF (fsign == FWFFT) THEN
     395      1241436 :          norm = 1.0_dp/REAL(PRODUCT(n), KIND=dp)
     396       342749 :       ELSE IF (fsign == BWFFT) THEN
     397       342749 :          norm = 1.0_dp
     398              :       ELSE
     399            0 :          CPABORT("Unknown FFT direction!")
     400              :       END IF
     401              : 
     402       653108 :       IF (PRESENT(debug)) THEN
     403       615505 :          test = debug
     404              :       ELSE
     405              :          test = .FALSE.
     406              :       END IF
     407              : 
     408       653108 :       IF (PRESENT(zout)) THEN
     409              :          fft_in_place = .FALSE.
     410              :       ELSE
     411       653100 :          fft_in_place = .TRUE.
     412              :       END IF
     413              : 
     414       653108 :       IF (test) THEN
     415            0 :          in_sum = SUM(ABS(zin))
     416              :       END IF
     417              : 
     418       653108 :       ld(1) = SIZE(zin, 1)
     419       653108 :       ld(2) = SIZE(zin, 2)
     420       653108 :       ld(3) = SIZE(zin, 3)
     421              : 
     422       653108 :       IF (n(1) /= ld(1) .OR. n(2) /= ld(2) .OR. n(3) /= ld(3)) THEN
     423            0 :          CPABORT("Size and dimension (zin) have to be the same.")
     424              :       END IF
     425              : 
     426       653108 :       sign = fsign
     427       653108 :       CALL get_fft_scratch(fft_scratch, tf_type=400, n=n)
     428              : 
     429       653108 :       IF (fft_in_place) THEN
     430       653100 :          zoptr => zdum
     431       653100 :          IF (fsign == FWFFT) THEN
     432       310355 :             CALL fft_3d(fft_scratch%fft_plan(1), norm, zin, zoptr, stat)
     433              :          ELSE
     434       342745 :             CALL fft_3d(fft_scratch%fft_plan(2), norm, zin, zoptr, stat)
     435              :          END IF
     436              :       ELSE
     437            8 :          IF (fsign == FWFFT) THEN
     438            4 :             CALL fft_3d(fft_scratch%fft_plan(3), norm, zin, zout, stat)
     439              :          ELSE
     440            4 :             CALL fft_3d(fft_scratch%fft_plan(4), norm, zin, zout, stat)
     441              :          END IF
     442              :       END IF
     443              : 
     444       653108 :       CALL release_fft_scratch(fft_scratch)
     445              : 
     446       653108 :       IF (PRESENT(zout)) THEN
     447            8 :          lo(1) = SIZE(zout, 1)
     448            8 :          lo(2) = SIZE(zout, 2)
     449            8 :          lo(3) = SIZE(zout, 3)
     450            8 :          IF (n(1) /= lo(1) .OR. n(2) /= lo(2) .OR. n(3) /= lo(3)) THEN
     451            0 :             CPABORT("Size and dimension (zout) have to be the same.")
     452              :          END IF
     453              :       END IF
     454              : 
     455       653108 :       IF (PRESENT(status)) THEN
     456         9885 :          status = stat
     457              :       END IF
     458              : 
     459       653108 :       IF (test .AND. output_unit > 0) THEN
     460            0 :          IF (PRESENT(zout)) THEN
     461            0 :             out_sum = SUM(ABS(zout))
     462            0 :             WRITE (output_unit, '(A)') "  Out of place 3D FFT (local)  : fft3d_s"
     463            0 :             WRITE (output_unit, '(A,T60,3I7)') "     Transform lengths ", n
     464            0 :             WRITE (output_unit, '(A,T60,3I7)') "     Input array dimensions ", ld
     465            0 :             WRITE (output_unit, '(A,T60,3I7)') "     Output array dimensions ", lo
     466            0 :             WRITE (output_unit, '(A,T61,E20.14)') "     Sum of input data ", in_sum
     467            0 :             WRITE (output_unit, '(A,T61,E20.14)') "     Sum of output data ", out_sum
     468              :          ELSE
     469            0 :             out_sum = SUM(ABS(zin))
     470            0 :             WRITE (output_unit, '(A)') "  In place 3D FFT (local)  : fft3d_s"
     471            0 :             WRITE (output_unit, '(A,T60,3I7)') "     Transform lengths ", n
     472            0 :             WRITE (output_unit, '(A,T60,3I7)') "     Input/output array dimensions ", ld
     473            0 :             WRITE (output_unit, '(A,T61,E20.14)') "     Sum of input data ", in_sum
     474            0 :             WRITE (output_unit, '(A,T61,E20.14)') "     Sum of output data ", out_sum
     475              :          END IF
     476              :       END IF
     477              : 
     478       653108 :       CALL timestop(handle)
     479              : 
     480       653108 :    END SUBROUTINE fft3d_s
     481              : 
     482              : ! **************************************************************************************************
     483              : !> \brief ...
     484              : !> \param fsign ...
     485              : !> \param n ...
     486              : !> \param cin ...
     487              : !> \param gin ...
     488              : !> \param rs_group ...
     489              : !> \param yzp ...
     490              : !> \param nyzray ...
     491              : !> \param bo ...
     492              : !> \param status ...
     493              : !> \param debug ...
     494              : ! **************************************************************************************************
     495      2819134 :    SUBROUTINE fft3d_ps(fsign, n, cin, gin, rs_group, yzp, nyzray, &
     496      2819134 :                        bo, status, debug)
     497              : 
     498              :       INTEGER, INTENT(IN)                                :: fsign
     499              :       INTEGER, DIMENSION(:), INTENT(IN)                  :: n
     500              :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
     501              :          INTENT(INOUT)                                   :: cin
     502              :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
     503              :          INTENT(INOUT)                                   :: gin
     504              :       TYPE(mp_cart_type), INTENT(IN)                     :: rs_group
     505              :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
     506              :          INTENT(IN)                                      :: yzp
     507              :       INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN)     :: nyzray
     508              :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:, :), &
     509              :          INTENT(IN)                                      :: bo
     510              :       INTEGER, INTENT(OUT), OPTIONAL                     :: status
     511              :       LOGICAL, INTENT(IN), OPTIONAL                      :: debug
     512              : 
     513              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'fft3d_ps'
     514              : 
     515              :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
     516      2819134 :          POINTER                                         :: pbuf, qbuf, rbuf, sbuf
     517              :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
     518      2819134 :          POINTER                                         :: tbuf
     519              :       INTEGER :: g_pos, handle, lg, lmax, mcx2, mcz1, mcz2, mg, mmax, mx1, mx2, my1, mz2, n1, n2, &
     520              :          nmax, numtask, nx, ny, nz, output_unit, r_dim(2), r_pos(2), rp, sign, stat
     521      2819134 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: p2p
     522              :       LOGICAL                                            :: test
     523              :       REAL(KIND=dp)                                      :: norm, sum_data
     524     19733938 :       TYPE(fft_scratch_sizes)                            :: fft_scratch_size
     525              :       TYPE(fft_scratch_type), POINTER                    :: fft_scratch
     526              : 
     527      2819134 :       CALL timeset(routineN, handle)
     528      2819134 :       output_unit = cp_logger_get_default_io_unit()
     529              : 
     530      2819134 :       IF (PRESENT(debug)) THEN
     531      2819134 :          test = debug
     532              :       ELSE
     533              :          test = .FALSE.
     534              :       END IF
     535              : 
     536      2819134 :       g_pos = rs_group%mepos
     537      2819134 :       numtask = rs_group%num_pe
     538      8457402 :       r_dim = rs_group%num_pe_cart
     539      8457402 :       r_pos = rs_group%mepos_cart
     540              : 
     541      2819134 :       IF (fsign == FWFFT) THEN
     542      5599136 :          norm = 1.0_dp/REAL(PRODUCT(n), KIND=dp)
     543      1419350 :       ELSE IF (fsign == BWFFT) THEN
     544      1419350 :          norm = 1.0_dp
     545              :       ELSE
     546            0 :          CPABORT("Unknown FFT direction!")
     547              :       END IF
     548              : 
     549      2819134 :       sign = fsign
     550              : 
     551      2819134 :       lg = SIZE(gin, 1)
     552      2819134 :       mg = SIZE(gin, 2)
     553              : 
     554      2819134 :       nx = SIZE(cin, 1)
     555      2819134 :       ny = SIZE(cin, 2)
     556      2819134 :       nz = SIZE(cin, 3)
     557              : 
     558      2819134 :       IF (mg == 0) THEN
     559              :          mmax = 1
     560              :       ELSE
     561      2819134 :          mmax = mg
     562              :       END IF
     563      2819134 :       lmax = MAX(lg, (nx*ny*nz)/mmax + 1)
     564              : 
     565      8457402 :       ALLOCATE (p2p(0:numtask - 1))
     566              : 
     567      2819134 :       CALL rs_group%rank_compare(rs_group, p2p)
     568              : 
     569      2819134 :       rp = p2p(g_pos)
     570      2819134 :       mx1 = bo(2, 1, rp, 1) - bo(1, 1, rp, 1) + 1
     571      2819134 :       my1 = bo(2, 2, rp, 1) - bo(1, 2, rp, 1) + 1
     572      2819134 :       mx2 = bo(2, 1, rp, 2) - bo(1, 1, rp, 2) + 1
     573      2819134 :       mz2 = bo(2, 3, rp, 2) - bo(1, 3, rp, 2) + 1
     574              : 
     575      8457402 :       n1 = MAXVAL(bo(2, 1, :, 1) - bo(1, 1, :, 1) + 1)
     576      8457402 :       n2 = MAXVAL(bo(2, 2, :, 1) - bo(1, 2, :, 1) + 1)
     577      2819134 :       nmax = MAX((2*n2)/numtask, 2)*mx2*mz2
     578      8457402 :       nmax = MAX(nmax, n1*MAXVAL(nyzray))
     579      8457402 :       n1 = MAXVAL(bo(2, 1, :, 2))
     580      8457402 :       n2 = MAXVAL(bo(2, 3, :, 2))
     581              : 
     582      2819134 :       fft_scratch_size%nx = nx
     583      2819134 :       fft_scratch_size%ny = ny
     584      2819134 :       fft_scratch_size%nz = nz
     585      2819134 :       fft_scratch_size%lmax = lmax
     586      2819134 :       fft_scratch_size%mmax = mmax
     587      2819134 :       fft_scratch_size%mx1 = mx1
     588      2819134 :       fft_scratch_size%mx2 = mx2
     589      2819134 :       fft_scratch_size%my1 = my1
     590      2819134 :       fft_scratch_size%mz2 = mz2
     591      2819134 :       fft_scratch_size%lg = lg
     592      2819134 :       fft_scratch_size%mg = mg
     593      2819134 :       fft_scratch_size%nbx = n1
     594      2819134 :       fft_scratch_size%nbz = n2
     595      8457402 :       mcz1 = MAXVAL(bo(2, 3, :, 1) - bo(1, 3, :, 1) + 1)
     596      8457402 :       mcx2 = MAXVAL(bo(2, 1, :, 2) - bo(1, 1, :, 2) + 1)
     597      8457402 :       mcz2 = MAXVAL(bo(2, 3, :, 2) - bo(1, 3, :, 2) + 1)
     598      2819134 :       fft_scratch_size%mcz1 = mcz1
     599      2819134 :       fft_scratch_size%mcx2 = mcx2
     600      2819134 :       fft_scratch_size%mcz2 = mcz2
     601      2819134 :       fft_scratch_size%nmax = nmax
     602      8457402 :       fft_scratch_size%nmray = MAXVAL(nyzray)
     603      2819134 :       fft_scratch_size%nyzray = nyzray(g_pos)
     604      2819134 :       fft_scratch_size%rs_group = rs_group
     605      8457402 :       fft_scratch_size%g_pos = g_pos
     606      8457402 :       fft_scratch_size%r_pos = r_pos
     607      8457402 :       fft_scratch_size%r_dim = r_dim
     608      2819134 :       fft_scratch_size%numtask = numtask
     609              : 
     610      2819134 :       IF (test) THEN
     611            8 :          IF (g_pos == 0 .AND. output_unit > 0) THEN
     612            4 :             WRITE (output_unit, '(A)') "  Parallel 3D FFT : fft3d_ps"
     613            4 :             WRITE (output_unit, '(A,T60,3I7)') "     Transform lengths ", n
     614            4 :             WRITE (output_unit, '(A,T67,2I7)') "     Array dimensions (gin) ", lg, mg
     615            4 :             WRITE (output_unit, '(A,T60,3I7)') "     Array dimensions (cin) ", nx, ny, nz
     616              :          END IF
     617              :       END IF
     618              : 
     619      2819134 :       IF (r_dim(2) > 1) THEN
     620              : 
     621              :          !
     622              :          ! real space is distributed over x and y coordinate
     623              :          ! we have two stages of communication
     624              :          !
     625              : 
     626            0 :          IF (r_dim(1) == 1) THEN
     627            0 :             CPABORT("This processor distribution is not supported.")
     628              :          END IF
     629            0 :          CALL get_fft_scratch(fft_scratch, tf_type=300, n=n, fft_sizes=fft_scratch_size)
     630              : 
     631            0 :          IF (sign == FWFFT) THEN
     632              :             ! cin -> gin
     633              : 
     634            0 :             IF (test) THEN
     635            0 :                sum_data = ABS(SUM(cin))
     636            0 :                CALL rs_group%sum(sum_data)
     637            0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     638            0 :                   WRITE (output_unit, '(A)') "  Two step communication algorithm "
     639            0 :                   WRITE (output_unit, '(A,T60,3I7)') "     Transform Z ", n(3), mx1*my1
     640            0 :                   WRITE (output_unit, '(A,T60,3I7)') "     Transform Y ", n(2), mx2*mz2
     641            0 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform X ", n(1), nyzray(g_pos)
     642            0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(1) ", sum_data
     643              :                END IF
     644              :             END IF
     645              : 
     646            0 :             pbuf => fft_scratch%p1buf
     647            0 :             qbuf => fft_scratch%p2buf
     648              : 
     649              :             ! FFT along z
     650            0 :             CALL fft_1dm(fft_scratch%fft_plan(1), cin, qbuf, norm, stat)
     651              : 
     652            0 :             rbuf => fft_scratch%p3buf
     653              : 
     654            0 :             IF (test) THEN
     655            0 :                sum_data = ABS(SUM(qbuf))
     656            0 :                CALL rs_group%sum(sum_data)
     657            0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     658            0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(2) T", sum_data
     659              :                END IF
     660              :             END IF
     661              : 
     662              :             ! Exchange data ( transpose of matrix )
     663            0 :             CALL cube_transpose_2(qbuf, bo(:, :, :, 1), bo(:, :, :, 2), rbuf, fft_scratch)
     664              : 
     665            0 :             IF (test) THEN
     666            0 :                sum_data = ABS(SUM(rbuf))
     667            0 :                CALL rs_group%sum(sum_data)
     668            0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     669            0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(3) T", sum_data
     670              :                END IF
     671              :             END IF
     672              : 
     673            0 :             pbuf => fft_scratch%p4buf
     674              : 
     675              :             ! FFT along y
     676            0 :             CALL fft_1dm(fft_scratch%fft_plan(2), rbuf, pbuf, 1.0_dp, stat)
     677              : 
     678            0 :             qbuf => fft_scratch%p5buf
     679              : 
     680            0 :             IF (test) THEN
     681            0 :                sum_data = ABS(SUM(pbuf))
     682            0 :                CALL rs_group%sum(sum_data)
     683            0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     684            0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(4) TS", sum_data
     685              :                END IF
     686              :             END IF
     687              : 
     688              :             ! Exchange data ( transpose of matrix ) and sort
     689              :             CALL xz_to_yz(pbuf, rs_group, r_dim, g_pos, p2p, yzp, nyzray, &
     690            0 :                           bo(:, :, :, 2), qbuf, fft_scratch)
     691              : 
     692            0 :             IF (test) THEN
     693            0 :                sum_data = ABS(SUM(qbuf))
     694            0 :                CALL rs_group%sum(sum_data)
     695            0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     696            0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(5) TS", sum_data
     697              :                END IF
     698              :             END IF
     699              : 
     700              :             ! FFT along x
     701            0 :             CALL fft_1dm(fft_scratch%fft_plan(3), qbuf, gin, 1.0_dp, stat)
     702              : 
     703            0 :             IF (test) THEN
     704            0 :                sum_data = ABS(SUM(gin))
     705            0 :                CALL rs_group%sum(sum_data)
     706            0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     707            0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(6) ", sum_data
     708              :                END IF
     709              :             END IF
     710              : 
     711            0 :          ELSE IF (sign == BWFFT) THEN
     712              :             ! gin -> cin
     713              : 
     714            0 :             IF (test) THEN
     715            0 :                sum_data = ABS(SUM(gin))
     716            0 :                CALL rs_group%sum(sum_data)
     717            0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     718            0 :                   WRITE (output_unit, '(A)') "  Two step communication algorithm "
     719            0 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform X ", n(1), nyzray(g_pos)
     720            0 :                   WRITE (output_unit, '(A,T60,3I7)') "     Transform Y ", n(2), mx2*mz2
     721            0 :                   WRITE (output_unit, '(A,T60,3I7)') "     Transform Z ", n(3), mx1*my1
     722            0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(1) ", sum_data
     723              :                END IF
     724              :             END IF
     725              : 
     726            0 :             pbuf => fft_scratch%p7buf
     727              : 
     728              :             ! FFT along x
     729            0 :             CALL fft_1dm(fft_scratch%fft_plan(4), gin, pbuf, norm, stat)
     730              : 
     731            0 :             qbuf => fft_scratch%p4buf
     732              : 
     733            0 :             IF (test) THEN
     734            0 :                sum_data = ABS(SUM(pbuf))
     735            0 :                CALL rs_group%sum(sum_data)
     736            0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     737            0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(2) TS", sum_data
     738              :                END IF
     739              :             END IF
     740              : 
     741              :             ! Exchange data ( transpose of matrix ) and sort
     742              :             CALL yz_to_xz(pbuf, rs_group, r_dim, g_pos, p2p, yzp, nyzray, &
     743            0 :                           bo(:, :, :, 2), qbuf, fft_scratch)
     744              : 
     745            0 :             IF (test) THEN
     746            0 :                sum_data = ABS(SUM(qbuf))
     747            0 :                CALL rs_group%sum(sum_data)
     748            0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     749            0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(3) TS", sum_data
     750              :                END IF
     751              :             END IF
     752              : 
     753            0 :             rbuf => fft_scratch%p3buf
     754              : 
     755              :             ! FFT along y
     756            0 :             CALL fft_1dm(fft_scratch%fft_plan(5), qbuf, rbuf, 1.0_dp, stat)
     757              : 
     758            0 :             pbuf => fft_scratch%p2buf
     759              : 
     760            0 :             IF (test) THEN
     761            0 :                sum_data = ABS(SUM(rbuf))
     762            0 :                CALL rs_group%sum(sum_data)
     763            0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     764            0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(4) T", sum_data
     765              :                END IF
     766              :             END IF
     767              : 
     768              :             ! Exchange data ( transpose of matrix )
     769            0 :             CALL cube_transpose_1(rbuf, bo(:, :, :, 2), bo(:, :, :, 1), pbuf, fft_scratch)
     770              : 
     771            0 :             IF (test) THEN
     772            0 :                sum_data = ABS(SUM(pbuf))
     773            0 :                CALL rs_group%sum(sum_data)
     774            0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     775            0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(5) T", sum_data
     776              :                END IF
     777              :             END IF
     778              : 
     779            0 :             qbuf => fft_scratch%p1buf
     780              : 
     781              :             ! FFT along z
     782            0 :             CALL fft_1dm(fft_scratch%fft_plan(6), pbuf, cin, 1.0_dp, stat)
     783              : 
     784            0 :             IF (test) THEN
     785            0 :                sum_data = ABS(SUM(cin))
     786            0 :                CALL rs_group%sum(sum_data)
     787            0 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     788            0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(6) ", sum_data
     789              :                END IF
     790              :             END IF
     791              : 
     792              :          ELSE
     793              : 
     794            0 :             CPABORT("Illegal fsign parameter.")
     795              : 
     796              :          END IF
     797              : 
     798            0 :          CALL release_fft_scratch(fft_scratch)
     799              : 
     800              :       ELSE
     801              : 
     802              :          !
     803              :          ! real space is only distributed over x coordinate
     804              :          ! we have one stage of communication, after the transform of
     805              :          ! direction x
     806              :          !
     807              : 
     808      2819134 :          CALL get_fft_scratch(fft_scratch, tf_type=200, n=n, fft_sizes=fft_scratch_size)
     809              : 
     810      2819134 :          sbuf => fft_scratch%r1buf
     811      2819134 :          tbuf => fft_scratch%tbuf
     812              : 
     813  87721421601 :          sbuf = z_zero
     814  88122418666 :          tbuf = z_zero
     815              : 
     816      2819134 :          IF (sign == FWFFT) THEN
     817              :             ! cin -> gin
     818              : 
     819      1399784 :             IF (test) THEN
     820         9284 :                sum_data = ABS(SUM(cin))
     821            4 :                CALL rs_group%sum(sum_data)
     822            4 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     823            2 :                   WRITE (output_unit, '(A)') "     One step communication algorithm "
     824            2 :                   WRITE (output_unit, '(A,T60,3I7)') "     Transform YZ ", n(2), n(3), nx
     825            2 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform X ", n(1), nyzray(g_pos)
     826            2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(1) ", sum_data
     827              :                END IF
     828              :             END IF
     829              : 
     830              :             ! FFT along y and z
     831      1399784 :             CALL fft_1dm(fft_scratch%fft_plan(1), cin, sbuf, 1._dp, stat)
     832      1399784 :             CALL fft_1dm(fft_scratch%fft_plan(2), sbuf, tbuf, 1._dp, stat)
     833              : 
     834      1399784 :             IF (test) THEN
     835         8740 :                sum_data = ABS(SUM(tbuf))
     836            4 :                CALL rs_group%sum(sum_data)
     837            4 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     838            2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(2) TS", sum_data
     839              :                END IF
     840              :             END IF
     841              : 
     842              :             ! Exchange data ( transpose of matrix ) and sort
     843              :             CALL yz_to_x(tbuf, rs_group, g_pos, p2p, yzp, nyzray, &
     844      1399784 :                          bo(:, :, :, 2), sbuf, fft_scratch)
     845              : 
     846      1399784 :             IF (test) THEN
     847         8776 :                sum_data = ABS(SUM(sbuf))
     848            4 :                CALL rs_group%sum(sum_data)
     849            4 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     850            2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(3) TS", sum_data
     851              :                END IF
     852              :             END IF
     853              :             ! FFT along x
     854      1399784 :             CALL fft_1dm(fft_scratch%fft_plan(3), sbuf, gin, norm, stat)
     855              : 
     856      1399784 :             IF (test) THEN
     857         8708 :                sum_data = ABS(SUM(gin))
     858            4 :                CALL rs_group%sum(sum_data)
     859            4 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     860            2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(4) ", sum_data
     861              :                END IF
     862              :             END IF
     863              : 
     864      1419350 :          ELSE IF (sign == BWFFT) THEN
     865              :             ! gin -> cin
     866              : 
     867      1419350 :             IF (test) THEN
     868         8708 :                sum_data = ABS(SUM(gin))
     869            4 :                CALL rs_group%sum(sum_data)
     870            4 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     871            2 :                   WRITE (output_unit, '(A)') "  One step communication algorithm "
     872            2 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform X ", n(1), nyzray(g_pos)
     873            2 :                   WRITE (output_unit, '(A,T60,3I7)') "     Transform YZ ", n(2), n(3), nx
     874            2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(1) ", sum_data
     875              :                END IF
     876              :             END IF
     877              : 
     878              :             ! FFT along x
     879      1419350 :             CALL fft_1dm(fft_scratch%fft_plan(4), gin, sbuf, norm, stat)
     880              : 
     881      1419350 :             IF (test) THEN
     882         8776 :                sum_data = ABS(SUM(sbuf))
     883            4 :                CALL rs_group%sum(sum_data)
     884            4 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     885            2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(2) TS", sum_data
     886              :                END IF
     887              :             END IF
     888              : 
     889              :             ! Exchange data ( transpose of matrix ) and sort
     890              :             CALL x_to_yz(sbuf, rs_group, g_pos, p2p, yzp, nyzray, &
     891      1419350 :                          bo(:, :, :, 2), tbuf, fft_scratch)
     892              : 
     893      1419350 :             IF (test) THEN
     894         8740 :                sum_data = ABS(SUM(tbuf))
     895            4 :                CALL rs_group%sum(sum_data)
     896            4 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     897            2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(3) TS", sum_data
     898              :                END IF
     899              :             END IF
     900              : 
     901              :             ! FFT along y and z
     902      1419350 :             CALL fft_1dm(fft_scratch%fft_plan(5), tbuf, sbuf, 1._dp, stat)
     903      1419350 :             CALL fft_1dm(fft_scratch%fft_plan(6), sbuf, cin, 1._dp, stat)
     904              : 
     905      1419350 :             IF (test) THEN
     906         9284 :                sum_data = ABS(SUM(cin))
     907            4 :                CALL rs_group%sum(sum_data)
     908            4 :                IF (g_pos == 0 .AND. output_unit > 0) THEN
     909            2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(4) ", sum_data
     910              :                END IF
     911              :             END IF
     912              :          ELSE
     913            0 :             CPABORT("Illegal fsign parameter.")
     914              :          END IF
     915              : 
     916      2819134 :          CALL release_fft_scratch(fft_scratch)
     917              : 
     918              :       END IF
     919              : 
     920      2819134 :       DEALLOCATE (p2p)
     921              : 
     922      2819134 :       IF (PRESENT(status)) THEN
     923            0 :          status = stat
     924              :       END IF
     925      2819134 :       CALL timestop(handle)
     926              : 
     927      5638268 :    END SUBROUTINE fft3d_ps
     928              : 
     929              : ! **************************************************************************************************
     930              : !> \brief ...
     931              : !> \param fsign ...
     932              : !> \param n ...
     933              : !> \param zin ...
     934              : !> \param gin ...
     935              : !> \param group ...
     936              : !> \param bo ...
     937              : !> \param status ...
     938              : !> \param debug ...
     939              : ! **************************************************************************************************
     940          208 :    SUBROUTINE fft3d_pb(fsign, n, zin, gin, group, bo, status, debug)
     941              : 
     942              :       INTEGER, INTENT(IN)                                :: fsign
     943              :       INTEGER, DIMENSION(3), INTENT(IN)                  :: n
     944              :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
     945              :          INTENT(INOUT)                                   :: zin
     946              :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
     947              :          INTENT(INOUT)                                   :: gin
     948              :       TYPE(mp_cart_type), INTENT(IN)                     :: group
     949              :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:, :), &
     950              :          INTENT(IN)                                      :: bo
     951              :       INTEGER, INTENT(OUT), OPTIONAL                     :: status
     952              :       LOGICAL, INTENT(IN), OPTIONAL                      :: debug
     953              : 
     954              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'fft3d_pb'
     955              : 
     956              :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
     957          208 :          POINTER                                         :: abuf, bbuf
     958              :       INTEGER                                            :: handle, lg(2), lz(3), mcx2, mcy3, mcz1, &
     959              :                                                             mcz2, mx1, mx2, mx3, my1, my2, my3, &
     960              :                                                             my_pos, mz1, mz2, mz3, output_unit, &
     961              :                                                             sign, stat
     962              :       INTEGER, DIMENSION(2)                              :: dim
     963              :       LOGICAL                                            :: test
     964              :       REAL(KIND=dp)                                      :: norm, sum_data
     965         1456 :       TYPE(fft_scratch_sizes)                            :: fft_scratch_size
     966              :       TYPE(fft_scratch_type), POINTER                    :: fft_scratch
     967              : 
     968              : !------------------------------------------------------------------------------
     969              : ! "Real Space"  1) xyZ      or      1) xYZ
     970              : !               2) xYz      or         not used
     971              : ! "G Space"     3) Xyz      or      3) XYz
     972              : !
     973              : ! There is one communicator (2-dimensional) for all distributions
     974              : ! np = n1 * n2, where np is the total number of processors
     975              : ! If n2 = 1, we have the second case and only one transpose step is needed
     976              : !
     977              : ! Assignment of dimensions to axis for different steps
     978              : ! First case: 1) n1=x; n2=y
     979              : !             2) n1=x; n2=z
     980              : !             3) n1=y; n2=z
     981              : ! Second case 1) n1=x
     982              : !             3) n1=z
     983              : !
     984              : ! The more general case with two communicators for the initial and final
     985              : ! distribution is not covered.
     986              : !------------------------------------------------------------------------------
     987              : 
     988          208 :       CALL timeset(routineN, handle)
     989          208 :       output_unit = cp_logger_get_default_io_unit()
     990              : 
     991          624 :       dim = group%num_pe_cart
     992          208 :       my_pos = group%mepos
     993              : 
     994          208 :       IF (PRESENT(debug)) THEN
     995          208 :          test = debug
     996              :       ELSE
     997              :          test = .FALSE.
     998              :       END IF
     999              : 
    1000          208 :       IF (fsign == FWFFT) THEN
    1001          416 :          norm = 1.0_dp/REAL(PRODUCT(n), KIND=dp)
    1002          104 :       ELSE IF (fsign == BWFFT) THEN
    1003          104 :          norm = 1.0_dp
    1004              :       ELSE
    1005            0 :          CPABORT("Unknown FFT direction!")
    1006              :       END IF
    1007              : 
    1008          208 :       sign = fsign
    1009              : 
    1010          208 :       IF (test) THEN
    1011            8 :          lg(1) = SIZE(gin, 1)
    1012            8 :          lg(2) = SIZE(gin, 2)
    1013            8 :          lz(1) = SIZE(zin, 1)
    1014            8 :          lz(2) = SIZE(zin, 2)
    1015            8 :          lz(3) = SIZE(zin, 3)
    1016            8 :          IF (my_pos == 0 .AND. output_unit > 0) THEN
    1017            4 :             WRITE (output_unit, '(A)') "  Parallel 3D FFT : fft3d_pb"
    1018            4 :             WRITE (output_unit, '(A,T60,3I7)') "     Transform lengths ", n
    1019            4 :             WRITE (output_unit, '(A,T67,2I7)') "     Array dimensions (gin) ", lg
    1020            4 :             WRITE (output_unit, '(A,T60,3I7)') "     Array dimensions (cin) ", lz
    1021              :          END IF
    1022              :       END IF
    1023              : 
    1024          208 :       mx1 = bo(2, 1, my_pos, 1) - bo(1, 1, my_pos, 1) + 1
    1025          208 :       my1 = bo(2, 2, my_pos, 1) - bo(1, 2, my_pos, 1) + 1
    1026          208 :       mz1 = bo(2, 3, my_pos, 1) - bo(1, 3, my_pos, 1) + 1
    1027          208 :       mx2 = bo(2, 1, my_pos, 2) - bo(1, 1, my_pos, 2) + 1
    1028          208 :       my2 = bo(2, 2, my_pos, 2) - bo(1, 2, my_pos, 2) + 1
    1029          208 :       mz2 = bo(2, 3, my_pos, 2) - bo(1, 3, my_pos, 2) + 1
    1030          208 :       mx3 = bo(2, 1, my_pos, 3) - bo(1, 1, my_pos, 3) + 1
    1031          208 :       my3 = bo(2, 2, my_pos, 3) - bo(1, 2, my_pos, 3) + 1
    1032          208 :       mz3 = bo(2, 3, my_pos, 3) - bo(1, 3, my_pos, 3) + 1
    1033          208 :       fft_scratch_size%mx1 = mx1
    1034          208 :       fft_scratch_size%mx2 = mx2
    1035          208 :       fft_scratch_size%mx3 = mx3
    1036          208 :       fft_scratch_size%my1 = my1
    1037          208 :       fft_scratch_size%my2 = my2
    1038          208 :       fft_scratch_size%my3 = my3
    1039          208 :       fft_scratch_size%mz1 = mz1
    1040          208 :       fft_scratch_size%mz2 = mz2
    1041          208 :       fft_scratch_size%mz3 = mz3
    1042          624 :       mcz1 = MAXVAL(bo(2, 3, :, 1) - bo(1, 3, :, 1) + 1)
    1043          624 :       mcx2 = MAXVAL(bo(2, 1, :, 2) - bo(1, 1, :, 2) + 1)
    1044          624 :       mcz2 = MAXVAL(bo(2, 3, :, 2) - bo(1, 3, :, 2) + 1)
    1045          624 :       mcy3 = MAXVAL(bo(2, 2, :, 3) - bo(1, 2, :, 3) + 1)
    1046          208 :       fft_scratch_size%mcz1 = mcz1
    1047          208 :       fft_scratch_size%mcx2 = mcx2
    1048          208 :       fft_scratch_size%mcz2 = mcz2
    1049          208 :       fft_scratch_size%mcy3 = mcy3
    1050          208 :       fft_scratch_size%rs_group = group
    1051          624 :       fft_scratch_size%g_pos = my_pos
    1052          208 :       fft_scratch_size%numtask = DIM(1)*DIM(2)
    1053              : 
    1054          208 :       IF (DIM(1) > 1 .AND. DIM(2) > 1) THEN
    1055              : 
    1056              :          !
    1057              :          ! First case; two stages of communication
    1058              :          !
    1059              : 
    1060            0 :          CALL get_fft_scratch(fft_scratch, tf_type=100, n=n, fft_sizes=fft_scratch_size)
    1061              : 
    1062            0 :          IF (sign == FWFFT) THEN
    1063              :             ! Stage 1 -> 3
    1064              : 
    1065            0 :             bbuf => fft_scratch%a2buf
    1066              : 
    1067            0 :             IF (test) THEN
    1068            0 :                sum_data = ABS(SUM(zin))
    1069            0 :                CALL group%sum(sum_data)
    1070            0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1071            0 :                   WRITE (output_unit, '(A)') "  Two step communication algorithm "
    1072            0 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform Z ", n(3), mx1*my1
    1073            0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(1) ", sum_data
    1074              :                END IF
    1075              :             END IF
    1076              : 
    1077              :             ! FFT along z
    1078            0 :             CALL fft_1dm(fft_scratch%fft_plan(1), zin, bbuf, norm, stat)
    1079              : 
    1080            0 :             abuf => fft_scratch%a3buf
    1081              : 
    1082            0 :             IF (test) THEN
    1083            0 :                sum_data = ABS(SUM(bbuf))
    1084            0 :                CALL group%sum(sum_data)
    1085            0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1086            0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(2) T", sum_data
    1087              :                END IF
    1088              :             END IF
    1089              : 
    1090            0 :             CALL cube_transpose_2(bbuf, bo(:, :, :, 1), bo(:, :, :, 2), abuf, fft_scratch)
    1091              : 
    1092            0 :             bbuf => fft_scratch%a4buf
    1093              : 
    1094            0 :             IF (test) THEN
    1095            0 :                sum_data = ABS(SUM(abuf))
    1096            0 :                CALL group%sum(sum_data)
    1097            0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1098            0 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform Y ", n(2), mx2*mz2
    1099            0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(3) ", sum_data
    1100              :                END IF
    1101              :             END IF
    1102              : 
    1103              :             ! FFT along y
    1104            0 :             CALL fft_1dm(fft_scratch%fft_plan(2), abuf, bbuf, 1.0_dp, stat)
    1105              : 
    1106            0 :             abuf => fft_scratch%a5buf
    1107              : 
    1108            0 :             IF (test) THEN
    1109            0 :                sum_data = ABS(SUM(bbuf))
    1110            0 :                CALL group%sum(sum_data)
    1111            0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1112            0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(4) T", sum_data
    1113              :                END IF
    1114              :             END IF
    1115              : 
    1116            0 :             CALL cube_transpose_4(bbuf, bo(:, :, :, 2), bo(:, :, :, 3), abuf, fft_scratch)
    1117              : 
    1118            0 :             IF (test) THEN
    1119            0 :                sum_data = ABS(SUM(abuf))
    1120            0 :                CALL group%sum(sum_data)
    1121            0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1122            0 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform X ", n(1), my3*mz3
    1123            0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(5) ", sum_data
    1124              :                END IF
    1125              :             END IF
    1126              : 
    1127              :             ! FFT along x
    1128            0 :             CALL fft_1dm(fft_scratch%fft_plan(3), abuf, gin, 1.0_dp, stat)
    1129              : 
    1130            0 :             IF (test) THEN
    1131            0 :                sum_data = ABS(SUM(gin))
    1132            0 :                CALL group%sum(sum_data)
    1133            0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1134            0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(6) ", sum_data
    1135              :                END IF
    1136              :             END IF
    1137              : 
    1138            0 :          ELSEIF (sign == BWFFT) THEN
    1139              :             ! Stage 3 -> 1
    1140              : 
    1141            0 :             bbuf => fft_scratch%a5buf
    1142              : 
    1143            0 :             IF (test) THEN
    1144            0 :                sum_data = ABS(SUM(gin))
    1145            0 :                CALL group%sum(sum_data)
    1146            0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1147            0 :                   WRITE (output_unit, '(A)') "  Two step communication algorithm "
    1148            0 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform X ", n(1), my3*mz3
    1149            0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(1) ", sum_data
    1150              :                END IF
    1151              :             END IF
    1152              : 
    1153              :             ! FFT along x
    1154            0 :             CALL fft_1dm(fft_scratch%fft_plan(4), gin, bbuf, 1.0_dp, stat)
    1155              : 
    1156            0 :             abuf => fft_scratch%a4buf
    1157              : 
    1158            0 :             IF (test) THEN
    1159            0 :                sum_data = ABS(SUM(bbuf))
    1160            0 :                CALL group%sum(sum_data)
    1161            0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1162            0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(2) T", sum_data
    1163              :                END IF
    1164              :             END IF
    1165              : 
    1166            0 :             CALL cube_transpose_3(bbuf, bo(:, :, :, 3), bo(:, :, :, 2), abuf, fft_scratch)
    1167              : 
    1168            0 :             bbuf => fft_scratch%a3buf
    1169              : 
    1170            0 :             IF (test) THEN
    1171            0 :                sum_data = ABS(SUM(abuf))
    1172            0 :                CALL group%sum(sum_data)
    1173            0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1174            0 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform Y ", n(2), mx2*mz2
    1175            0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(3) ", sum_data
    1176              :                END IF
    1177              :             END IF
    1178              : 
    1179              :             ! FFT along y
    1180            0 :             CALL fft_1dm(fft_scratch%fft_plan(5), abuf, bbuf, 1.0_dp, stat)
    1181              : 
    1182            0 :             abuf => fft_scratch%a2buf
    1183              : 
    1184            0 :             IF (test) THEN
    1185            0 :                sum_data = ABS(SUM(bbuf))
    1186            0 :                CALL group%sum(sum_data)
    1187            0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1188            0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(4) T", sum_data
    1189              :                END IF
    1190              :             END IF
    1191              : 
    1192            0 :             CALL cube_transpose_1(bbuf, bo(:, :, :, 2), bo(:, :, :, 1), abuf, fft_scratch)
    1193              : 
    1194            0 :             IF (test) THEN
    1195            0 :                sum_data = ABS(SUM(abuf))
    1196            0 :                CALL group%sum(sum_data)
    1197            0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1198            0 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform Z ", n(3), mx1*my1
    1199            0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(5) ", sum_data
    1200              :                END IF
    1201              :             END IF
    1202              : 
    1203              :             ! FFT along z
    1204            0 :             CALL fft_1dm(fft_scratch%fft_plan(6), abuf, zin, norm, stat)
    1205              : 
    1206            0 :             IF (test) THEN
    1207            0 :                sum_data = ABS(SUM(zin))
    1208            0 :                CALL group%sum(sum_data)
    1209            0 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1210            0 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(6) ", sum_data
    1211              :                END IF
    1212              :             END IF
    1213              : 
    1214              :          ELSE
    1215            0 :             CPABORT("Illegal fsign parameter.")
    1216              :          END IF
    1217              : 
    1218            0 :          CALL release_fft_scratch(fft_scratch)
    1219              : 
    1220          208 :       ELSEIF (DIM(2) == 1) THEN
    1221              : 
    1222              :          !
    1223              :          ! Second case; one stage of communication
    1224              :          !
    1225              : 
    1226          208 :          CALL get_fft_scratch(fft_scratch, tf_type=101, n=n, fft_sizes=fft_scratch_size)
    1227              : 
    1228          208 :          IF (sign == FWFFT) THEN
    1229              :             ! Stage 1 -> 3
    1230              : 
    1231          104 :             IF (test) THEN
    1232         9284 :                sum_data = ABS(SUM(zin))
    1233            4 :                CALL group%sum(sum_data)
    1234            4 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1235            2 :                   WRITE (output_unit, '(A)') "  one step communication algorithm "
    1236            2 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform Z ", n(3), mx1*my1
    1237            2 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform Y ", n(2), mx1*mz1
    1238            2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(1) ", sum_data
    1239              :                END IF
    1240              :             END IF
    1241              : 
    1242          104 :             abuf => fft_scratch%a3buf
    1243          104 :             bbuf => fft_scratch%a4buf
    1244              :             ! FFT along z and y
    1245          104 :             CALL fft_1dm(fft_scratch%fft_plan(1), zin, abuf, norm, stat)
    1246          104 :             CALL fft_1dm(fft_scratch%fft_plan(2), abuf, bbuf, 1.0_dp, stat)
    1247              : 
    1248          104 :             abuf => fft_scratch%a5buf
    1249              : 
    1250          104 :             IF (test) THEN
    1251         8708 :                sum_data = ABS(SUM(bbuf))
    1252            4 :                CALL group%sum(sum_data)
    1253            4 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1254            2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(2) T", sum_data
    1255              :                END IF
    1256              :             END IF
    1257              : 
    1258          104 :             CALL cube_transpose_6(bbuf, group, bo(:, :, :, 1), bo(:, :, :, 3), abuf, fft_scratch)
    1259              : 
    1260          104 :             IF (test) THEN
    1261         8260 :                sum_data = ABS(SUM(abuf))
    1262            4 :                CALL group%sum(sum_data)
    1263            4 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1264            2 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform X ", n(1), my3*mz3
    1265            2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(3) ", sum_data
    1266              :                END IF
    1267              :             END IF
    1268              : 
    1269              :             ! FFT along x
    1270          104 :             CALL fft_1dm(fft_scratch%fft_plan(3), abuf, gin, 1.0_dp, stat)
    1271              : 
    1272          104 :             IF (test) THEN
    1273         8708 :                sum_data = ABS(SUM(gin))
    1274            4 :                CALL group%sum(sum_data)
    1275            4 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1276            2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(4) ", sum_data
    1277              :                END IF
    1278              :             END IF
    1279              : 
    1280          104 :          ELSEIF (sign == BWFFT) THEN
    1281              :             ! Stage 3 -> 1
    1282              : 
    1283          104 :             IF (test) THEN
    1284         8708 :                sum_data = ABS(SUM(gin))
    1285            4 :                CALL group%sum(sum_data)
    1286            4 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1287            2 :                   WRITE (output_unit, '(A)') "  one step communication algorithm "
    1288            2 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform X ", n(1), my3*mz3
    1289            2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(1) ", sum_data
    1290              :                END IF
    1291              :             END IF
    1292              : 
    1293          104 :             bbuf => fft_scratch%a5buf
    1294              : 
    1295              :             ! FFT along x
    1296          104 :             CALL fft_1dm(fft_scratch%fft_plan(4), gin, bbuf, 1.0_dp, stat)
    1297              : 
    1298          104 :             abuf => fft_scratch%a4buf
    1299              : 
    1300          104 :             IF (test) THEN
    1301         8260 :                sum_data = ABS(SUM(bbuf))
    1302            4 :                CALL group%sum(sum_data)
    1303            4 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1304            2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(2) T", sum_data
    1305              :                END IF
    1306              :             END IF
    1307              : 
    1308          104 :             CALL cube_transpose_5(bbuf, group, bo(:, :, :, 3), bo(:, :, :, 1), abuf, fft_scratch)
    1309              : 
    1310          104 :             bbuf => fft_scratch%a3buf
    1311              : 
    1312          104 :             IF (test) THEN
    1313         8708 :                sum_data = ABS(SUM(abuf))
    1314            4 :                CALL group%sum(sum_data)
    1315            4 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1316            2 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform Y ", n(2), mx1*mz1
    1317            2 :                   WRITE (output_unit, '(A,T67,2I7)') "     Transform Z ", n(3), mx1*my1
    1318            2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(3) ", sum_data
    1319              :                END IF
    1320              :             END IF
    1321              : 
    1322              :             ! FFT along y
    1323          104 :             CALL fft_1dm(fft_scratch%fft_plan(5), abuf, bbuf, 1.0_dp, stat)
    1324              : 
    1325              :             ! FFT along z
    1326          104 :             CALL fft_1dm(fft_scratch%fft_plan(6), bbuf, zin, norm, stat)
    1327              : 
    1328          104 :             IF (test) THEN
    1329         9284 :                sum_data = ABS(SUM(zin))
    1330            4 :                CALL group%sum(sum_data)
    1331            4 :                IF (my_pos == 0 .AND. output_unit > 0) THEN
    1332            2 :                   WRITE (output_unit, '(A,T61,E20.14)') "     Sum of data(4) ", sum_data
    1333              :                END IF
    1334              :             END IF
    1335              : 
    1336              :          ELSE
    1337            0 :             CPABORT("Illegal fsign parameter.")
    1338              :          END IF
    1339              : 
    1340          208 :          CALL release_fft_scratch(fft_scratch)
    1341              : 
    1342              :       ELSE
    1343              : 
    1344            0 :          CPABORT("Partition not implemented.")
    1345              : 
    1346              :       END IF
    1347              : 
    1348          208 :       IF (PRESENT(status)) THEN
    1349            0 :          status = stat
    1350              :       END IF
    1351              : 
    1352          208 :       CALL timestop(handle)
    1353              : 
    1354          416 :    END SUBROUTINE fft3d_pb
    1355              : 
    1356              : ! **************************************************************************************************
    1357              : !> \brief ...
    1358              : !> \param sb ...
    1359              : !> \param group ...
    1360              : !> \param my_pos ...
    1361              : !> \param p2p ...
    1362              : !> \param yzp ...
    1363              : !> \param nray ...
    1364              : !> \param bo ...
    1365              : !> \param tb ...
    1366              : !> \param fft_scratch ...
    1367              : !> \par History
    1368              : !>      15. Feb. 2006 : single precision all_to_all
    1369              : !> \author JGH (14-Jan-2001)
    1370              : ! **************************************************************************************************
    1371      1419350 :    SUBROUTINE x_to_yz(sb, group, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
    1372              : 
    1373              :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    1374              :          INTENT(IN)                                      :: sb
    1375              : 
    1376              :       CLASS(mp_comm_type), INTENT(IN)                    :: group
    1377              :       INTEGER, INTENT(IN)                                :: my_pos
    1378              :       INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN)     :: p2p
    1379              :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
    1380              :          INTENT(IN)                                      :: yzp
    1381              :       INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN)     :: nray
    1382              :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
    1383              :          INTENT(IN)                                      :: bo
    1384              :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
    1385              :          INTENT(INOUT)                                   :: tb
    1386              :       TYPE(fft_scratch_type), INTENT(IN)                 :: fft_scratch
    1387              : 
    1388              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'x_to_yz'
    1389              : 
    1390              :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    1391      1419350 :          POINTER                                         :: rr
    1392              :       COMPLEX(KIND=sp), CONTIGUOUS, DIMENSION(:, :), &
    1393      1419350 :          POINTER                                         :: ss, tt
    1394              :       INTEGER                                            :: handle, ip, ir, ix, ixx, iy, iz, mpr, &
    1395              :                                                             nm, np, nr, nx
    1396      1419350 :       INTEGER, CONTIGUOUS, DIMENSION(:), POINTER         :: rcount, rdispl, scount, sdispl
    1397              : 
    1398      1419350 :       CALL timeset(routineN, handle)
    1399              : 
    1400      1419350 :       np = SIZE(p2p)
    1401      1419350 :       scount => fft_scratch%scount
    1402      1419350 :       rcount => fft_scratch%rcount
    1403      1419350 :       sdispl => fft_scratch%sdispl
    1404      1419350 :       rdispl => fft_scratch%rdispl
    1405              : 
    1406      1419350 :       IF (alltoall_sgl) THEN
    1407          230 :          ss => fft_scratch%ss
    1408          230 :          tt => fft_scratch%tt
    1409      3441099 :          ss(:, :) = CMPLX(sb(:, :), KIND=sp)
    1410      3353860 :          tt(:, :) = 0._sp
    1411              :       ELSE
    1412      1419120 :          rr => fft_scratch%rr
    1413              :       END IF
    1414              : 
    1415      1419350 :       mpr = p2p(my_pos)
    1416      4258050 :       nm = MAXVAL(nray(0:np - 1))
    1417      1419350 :       nr = nray(my_pos)
    1418              : !$OMP PARALLEL DO DEFAULT(NONE), &
    1419              : !$OMP             PRIVATE(ix,nx), &
    1420      1419350 : !$OMP             SHARED(np,p2p,bo,nr,scount,sdispl)
    1421              :       DO ip = 0, np - 1
    1422              :          ix = p2p(ip)
    1423              :          nx = bo(2, 1, ix) - bo(1, 1, ix) + 1
    1424              :          scount(ip) = nr*nx
    1425              :          sdispl(ip) = nr*(bo(1, 1, ix) - 1)
    1426              :       END DO
    1427              : !$OMP END PARALLEL DO
    1428      1419350 :       nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
    1429              : !$OMP PARALLEL DO DEFAULT(NONE), &
    1430              : !$OMP             PRIVATE(nr), &
    1431      1419350 : !$OMP             SHARED(np,nray,nx,rcount,rdispl,nm)
    1432              :       DO ip = 0, np - 1
    1433              :          nr = nray(ip)
    1434              :          rcount(ip) = nr*nx
    1435              :          rdispl(ip) = nm*nx*ip
    1436              :       END DO
    1437              : !$OMP END PARALLEL DO
    1438      1419350 :       IF (alltoall_sgl) THEN
    1439          230 :          CALL group%alltoall(ss, scount, sdispl, tt, rcount, rdispl)
    1440              :       ELSE
    1441      1419120 :          CALL group%alltoall(sb, scount, sdispl, rr, rcount, rdispl)
    1442              :       END IF
    1443              : 
    1444      1419350 :       nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
    1445              : !$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
    1446              : !$OMP             PRIVATE(ixx,ir,iy,iz,ix) &
    1447      1419350 : !$OMP             SHARED(np,nray,nx,alltoall_sgl,yzp,tt,rr,tb)
    1448              :       DO ip = 0, np - 1
    1449              :          DO ix = 1, nx
    1450              :             ixx = nray(ip)*(ix - 1)
    1451              :             IF (alltoall_sgl) THEN
    1452              :                DO ir = 1, nray(ip)
    1453              :                   iy = yzp(1, ir, ip)
    1454              :                   iz = yzp(2, ir, ip)
    1455              :                   tb(iy, iz, ix) = tt(ir + ixx, ip)
    1456              :                END DO
    1457              :             ELSE
    1458              :                DO ir = 1, nray(ip)
    1459              :                   iy = yzp(1, ir, ip)
    1460              :                   iz = yzp(2, ir, ip)
    1461              :                   tb(iy, iz, ix) = rr(ir + ixx, ip)
    1462              :                END DO
    1463              :             END IF
    1464              :          END DO
    1465              :       END DO
    1466              : !$OMP END PARALLEL DO
    1467              : 
    1468      1419350 :       CALL timestop(handle)
    1469              : 
    1470      1419350 :    END SUBROUTINE x_to_yz
    1471              : 
    1472              : ! **************************************************************************************************
    1473              : !> \brief ...
    1474              : !> \param tb ...
    1475              : !> \param group ...
    1476              : !> \param my_pos ...
    1477              : !> \param p2p ...
    1478              : !> \param yzp ...
    1479              : !> \param nray ...
    1480              : !> \param bo ...
    1481              : !> \param sb ...
    1482              : !> \param fft_scratch ...
    1483              : !> \par History
    1484              : !>      15. Feb. 2006 : single precision all_to_all
    1485              : !> \author JGH (14-Jan-2001)
    1486              : ! **************************************************************************************************
    1487      1399784 :    SUBROUTINE yz_to_x(tb, group, my_pos, p2p, yzp, nray, bo, sb, fft_scratch)
    1488              : 
    1489              :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
    1490              :          INTENT(IN)                                      :: tb
    1491              : 
    1492              :       CLASS(mp_comm_type), INTENT(IN)                    :: group
    1493              :       INTEGER, INTENT(IN)                                :: my_pos
    1494              :       INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN)     :: p2p
    1495              :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
    1496              :          INTENT(IN)                                      :: yzp
    1497              :       INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN)     :: nray
    1498              :       INTEGER, DIMENSION(:, :, 0:), INTENT(IN)           :: bo
    1499              :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    1500              :          INTENT(INOUT)                                   :: sb
    1501              :       TYPE(fft_scratch_type), INTENT(IN)                 :: fft_scratch
    1502              : 
    1503              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'yz_to_x'
    1504              : 
    1505              :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    1506      1399784 :          POINTER                                         :: rr
    1507              :       COMPLEX(KIND=sp), CONTIGUOUS, DIMENSION(:, :), &
    1508      1399784 :          POINTER                                         :: ss, tt
    1509              :       INTEGER                                            :: handle, ip, ir, ix, ixx, iy, iz, mpr, &
    1510              :                                                             nm, np, nr, nx
    1511      1399784 :       INTEGER, CONTIGUOUS, DIMENSION(:), POINTER         :: rcount, rdispl, scount, sdispl
    1512              : 
    1513      1399784 :       CALL timeset(routineN, handle)
    1514              : 
    1515      1399784 :       np = SIZE(p2p)
    1516      1399784 :       mpr = p2p(my_pos)
    1517      1399784 :       scount => fft_scratch%scount
    1518      1399784 :       rcount => fft_scratch%rcount
    1519      1399784 :       sdispl => fft_scratch%sdispl
    1520      1399784 :       rdispl => fft_scratch%rdispl
    1521              : 
    1522      1399784 :       IF (alltoall_sgl) THEN
    1523          238 :          ss => fft_scratch%ss
    1524          238 :          tt => fft_scratch%tt
    1525      3703835 :          ss = 0._sp
    1526      3609884 :          tt = 0._sp
    1527              :       ELSE
    1528      1399546 :          rr => fft_scratch%rr
    1529              :       END IF
    1530              : 
    1531      1399784 :       nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
    1532              : !$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
    1533              : !$OMP             PRIVATE(ip, ixx, ir, iy, iz, ix) &
    1534      1399784 : !$OMP             SHARED(np,nray,nx,alltoall_sgl,yzp,tb,tt,rr)
    1535              :       DO ip = 0, np - 1
    1536              :          DO ix = 1, nx
    1537              :             ixx = nray(ip)*(ix - 1)
    1538              :             IF (alltoall_sgl) THEN
    1539              :                DO ir = 1, nray(ip)
    1540              :                   iy = yzp(1, ir, ip)
    1541              :                   iz = yzp(2, ir, ip)
    1542              :                   tt(ir + ixx, ip) = CMPLX(tb(iy, iz, ix), KIND=sp)
    1543              :                END DO
    1544              :             ELSE
    1545              :                DO ir = 1, nray(ip)
    1546              :                   iy = yzp(1, ir, ip)
    1547              :                   iz = yzp(2, ir, ip)
    1548              :                   rr(ir + ixx, ip) = tb(iy, iz, ix)
    1549              :                END DO
    1550              :             END IF
    1551              :          END DO
    1552              :       END DO
    1553              : !$OMP END PARALLEL DO
    1554      4199352 :       nm = MAXVAL(nray(0:np - 1))
    1555      1399784 :       nr = nray(my_pos)
    1556              : !$OMP PARALLEL DO DEFAULT(NONE), &
    1557              : !$OMP             PRIVATE(ix,nx), &
    1558      1399784 : !$OMP             SHARED(np,p2p,bo,rcount,rdispl,nr)
    1559              :       DO ip = 0, np - 1
    1560              :          ix = p2p(ip)
    1561              :          nx = bo(2, 1, ix) - bo(1, 1, ix) + 1
    1562              :          rcount(ip) = nr*nx
    1563              :          rdispl(ip) = nr*(bo(1, 1, ix) - 1)
    1564              :       END DO
    1565              : !$OMP END PARALLEL DO
    1566      1399784 :       nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
    1567              : !$OMP PARALLEL DO DEFAULT(NONE), &
    1568              : !$OMP             PRIVATE(nr), &
    1569      1399784 : !$OMP             SHARED(np,nray,scount,sdispl,nx,nm)
    1570              :       DO ip = 0, np - 1
    1571              :          nr = nray(ip)
    1572              :          scount(ip) = nr*nx
    1573              :          sdispl(ip) = nm*nx*ip
    1574              :       END DO
    1575              : !$OMP END PARALLEL DO
    1576              : 
    1577      1399784 :       IF (alltoall_sgl) THEN
    1578          238 :          CALL group%alltoall(tt, scount, sdispl, ss, rcount, rdispl)
    1579      3703835 :          sb = ss
    1580              :       ELSE
    1581      1399546 :          CALL group%alltoall(rr, scount, sdispl, sb, rcount, rdispl)
    1582              :       END IF
    1583              : 
    1584      1399784 :       CALL timestop(handle)
    1585              : 
    1586      1399784 :    END SUBROUTINE yz_to_x
    1587              : 
    1588              : ! **************************************************************************************************
    1589              : !> \brief ...
    1590              : !> \param sb ...
    1591              : !> \param group ...
    1592              : !> \param dims ...
    1593              : !> \param my_pos ...
    1594              : !> \param p2p ...
    1595              : !> \param yzp ...
    1596              : !> \param nray ...
    1597              : !> \param bo ...
    1598              : !> \param tb ...
    1599              : !> \param fft_scratch ...
    1600              : !> \par History
    1601              : !>      15. Feb. 2006 : single precision all_to_all
    1602              : !> \author JGH (18-Jan-2001)
    1603              : ! **************************************************************************************************
    1604            0 :    SUBROUTINE yz_to_xz(sb, group, dims, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
    1605              : 
    1606              :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    1607              :          INTENT(IN)                                      :: sb
    1608              : 
    1609              :       CLASS(mp_comm_type), INTENT(IN)                    :: group
    1610              :       INTEGER, DIMENSION(2), INTENT(IN)                  :: dims
    1611              :       INTEGER, INTENT(IN)                                :: my_pos
    1612              :       INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN)                 :: p2p
    1613              :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN)           :: yzp
    1614              :       INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN)                 :: nray
    1615              :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN)           :: bo
    1616              :       COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(INOUT), CONTIGUOUS   :: tb
    1617              :       TYPE(fft_scratch_type), INTENT(INOUT)              :: fft_scratch
    1618              : 
    1619              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'yz_to_xz'
    1620              : 
    1621            0 :       COMPLEX(KIND=dp), DIMENSION(:), POINTER, CONTIGUOUS            :: xzbuf, yzbuf
    1622            0 :       COMPLEX(KIND=sp), DIMENSION(:), POINTER, CONTIGUOUS            :: xzbuf_sgl, yzbuf_sgl
    1623              :       INTEGER                                            :: handle, icrs, ip, ipl, ipr, ir, ix, iz, &
    1624              :                                                             jj, jx, jy, jz, myx, myz, np, npx, &
    1625              :                                                             npz, nx, nz, rs_pos
    1626            0 :       INTEGER, DIMENSION(:), POINTER, CONTIGUOUS                     :: pzcoord, rcount, rdispl, scount, sdispl, &
    1627            0 :                                                                         xcor, zcor
    1628            0 :       INTEGER, DIMENSION(:, :), CONTIGUOUS, POINTER                  :: pgrid
    1629              : 
    1630            0 :       CALL timeset(routineN, handle)
    1631              : 
    1632            0 :       np = SIZE(p2p)
    1633              : 
    1634            0 :       rs_pos = p2p(my_pos)
    1635              : 
    1636            0 :       IF (alltoall_sgl) THEN
    1637            0 :          yzbuf_sgl => fft_scratch%yzbuf_sgl
    1638            0 :          xzbuf_sgl => fft_scratch%xzbuf_sgl
    1639              :       ELSE
    1640            0 :          yzbuf => fft_scratch%yzbuf
    1641            0 :          xzbuf => fft_scratch%xzbuf
    1642              :       END IF
    1643            0 :       npx = dims(1)
    1644            0 :       npz = dims(2)
    1645            0 :       pgrid => fft_scratch%pgrid
    1646            0 :       xcor => fft_scratch%xcor
    1647            0 :       zcor => fft_scratch%zcor
    1648            0 :       pzcoord => fft_scratch%pzcoord
    1649            0 :       scount => fft_scratch%scount
    1650            0 :       rcount => fft_scratch%rcount
    1651            0 :       sdispl => fft_scratch%sdispl
    1652            0 :       rdispl => fft_scratch%rdispl
    1653              : 
    1654            0 :       nx = SIZE(sb, 2)
    1655              : 
    1656              : ! If the send and recv counts are not already cached, then
    1657              : ! calculate and store them
    1658            0 :       IF (fft_scratch%in == 0) THEN
    1659              : 
    1660            0 :          scount = 0
    1661              : 
    1662            0 :          DO ix = 0, npx - 1
    1663            0 :             ip = pgrid(ix, 0)
    1664            0 :             xcor(bo(1, 1, ip):bo(2, 1, ip)) = ix
    1665              :          END DO
    1666            0 :          DO iz = 0, npz - 1
    1667            0 :             ip = pgrid(0, iz)
    1668            0 :             zcor(bo(1, 3, ip):bo(2, 3, ip)) = iz
    1669              :          END DO
    1670            0 :          DO jx = 1, nx
    1671            0 :             IF (alltoall_sgl) THEN
    1672            0 :                DO ir = 1, nray(my_pos)
    1673            0 :                   jy = yzp(1, ir, my_pos)
    1674            0 :                   jz = yzp(2, ir, my_pos)
    1675            0 :                   ip = pgrid(xcor(jx), zcor(jz))
    1676            0 :                   scount(ip) = scount(ip) + 1
    1677              :                END DO
    1678              :             ELSE
    1679            0 :                DO ir = 1, nray(my_pos)
    1680            0 :                   jy = yzp(1, ir, my_pos)
    1681            0 :                   jz = yzp(2, ir, my_pos)
    1682            0 :                   ip = pgrid(xcor(jx), zcor(jz))
    1683            0 :                   scount(ip) = scount(ip) + 1
    1684              :                END DO
    1685              :             END IF
    1686              :          END DO
    1687              : 
    1688            0 :          CALL group%alltoall(scount, rcount, 1)
    1689            0 :          fft_scratch%yzcount = scount
    1690            0 :          fft_scratch%xzcount = rcount
    1691              : 
    1692              :          ! Work out the correct displacements in the buffers
    1693            0 :          sdispl(0) = 0
    1694            0 :          rdispl(0) = 0
    1695            0 :          DO ip = 1, np - 1
    1696            0 :             sdispl(ip) = sdispl(ip - 1) + scount(ip - 1)
    1697            0 :             rdispl(ip) = rdispl(ip - 1) + rcount(ip - 1)
    1698              :          END DO
    1699              : 
    1700            0 :          fft_scratch%yzdispl = sdispl
    1701            0 :          fft_scratch%xzdispl = rdispl
    1702              : 
    1703            0 :          icrs = 0
    1704            0 :          DO ip = 0, np - 1
    1705            0 :             IF (scount(ip) /= 0) icrs = icrs + 1
    1706            0 :             IF (rcount(ip) /= 0) icrs = icrs + 1
    1707              :          END DO
    1708            0 :          CALL group%sum(icrs)
    1709            0 :          fft_scratch%rsratio = REAL(icrs, KIND=dp)/(REAL(2*np, KIND=dp)*REAL(np, KIND=dp))
    1710              : 
    1711            0 :          fft_scratch%in = 1
    1712              :       ELSE
    1713            0 :          scount = fft_scratch%yzcount
    1714            0 :          rcount = fft_scratch%xzcount
    1715            0 :          sdispl = fft_scratch%yzdispl
    1716            0 :          rdispl = fft_scratch%xzdispl
    1717              :       END IF
    1718              : 
    1719              : ! Do the actual packing
    1720              : !$OMP PARALLEL DO DEFAULT(NONE), &
    1721              : !$OMP             PRIVATE(ipl,jj,nx,ir,jx,jy,jz),&
    1722              : !$OMP             SHARED(np,p2p,pzcoord,bo,nray,yzp,zcor),&
    1723              : !$OMP             SHARED(yzbuf,sb,scount,sdispl,my_pos),&
    1724            0 : !$OMP             SHARED(yzbuf_sgl,alltoall_sgl)
    1725              :       DO ip = 0, np - 1
    1726              :          IF (scount(ip) == 0) CYCLE
    1727              :          ipl = p2p(ip)
    1728              :          jj = 0
    1729              :          nx = bo(2, 1, ipl) - bo(1, 1, ipl) + 1
    1730              :          DO ir = 1, nray(my_pos)
    1731              :             jz = yzp(2, ir, my_pos)
    1732              :             IF (zcor(jz) == pzcoord(ipl)) THEN
    1733              :                jj = jj + 1
    1734              :                jy = yzp(1, ir, my_pos)
    1735              :                IF (alltoall_sgl) THEN
    1736              :                   DO jx = 0, nx - 1
    1737              :                      yzbuf_sgl(sdispl(ip) + jj + jx*scount(ip)/nx) = CMPLX(sb(ir, jx + bo(1, 1, ipl)), KIND=sp)
    1738              :                   END DO
    1739              :                ELSE
    1740              :                   DO jx = 0, nx - 1
    1741              :                      yzbuf(sdispl(ip) + jj + jx*scount(ip)/nx) = sb(ir, jx + bo(1, 1, ipl))
    1742              :                   END DO
    1743              :                END IF
    1744              :             END IF
    1745              :          END DO
    1746              :       END DO
    1747              : !$OMP END PARALLEL DO
    1748              : 
    1749            0 :       IF (alltoall_sgl) THEN
    1750            0 :          CALL group%alltoall(yzbuf_sgl, scount, sdispl, xzbuf_sgl, rcount, rdispl)
    1751              :       ELSE
    1752            0 :          IF (fft_scratch%rsratio < ratio_sparse_alltoall) THEN
    1753            0 :             CALL sparse_alltoall(yzbuf, scount, sdispl, xzbuf, rcount, rdispl, group)
    1754              :          ELSE
    1755            0 :             CALL group%alltoall(yzbuf, scount, sdispl, xzbuf, rcount, rdispl)
    1756              :          END IF
    1757              :       END IF
    1758              : 
    1759            0 :       myx = fft_scratch%sizes%r_pos(1)
    1760            0 :       myz = fft_scratch%sizes%r_pos(2)
    1761            0 :       nz = bo(2, 3, rs_pos) - bo(1, 3, rs_pos) + 1
    1762              : 
    1763              : !$OMP PARALLEL DO DEFAULT(NONE), &
    1764              : !$OMP             PRIVATE(ipr,jj,ir,jx,jy,jz),&
    1765              : !$OMP             SHARED(tb,np,p2p,bo,rs_pos,nray),&
    1766              : !$OMP             SHARED(yzp,alltoall_sgl,zcor,myz),&
    1767            0 : !$OMP             SHARED(xzbuf,xzbuf_sgl,nz,rdispl)
    1768              :       DO ip = 0, np - 1
    1769              :          ipr = p2p(ip)
    1770              :          jj = 0
    1771              :          DO jx = 0, bo(2, 1, rs_pos) - bo(1, 1, rs_pos)
    1772              :             DO ir = 1, nray(ip)
    1773              :                jz = yzp(2, ir, ip)
    1774              :                IF (alltoall_sgl) THEN
    1775              :                   IF (zcor(jz) == myz) THEN
    1776              :                      jj = jj + 1
    1777              :                      jy = yzp(1, ir, ip)
    1778              :                      jz = jz - bo(1, 3, rs_pos) + 1
    1779              :                      tb(jy, jz + jx*nz) = xzbuf_sgl(jj + rdispl(ipr))
    1780              :                   END IF
    1781              :                ELSE
    1782              :                   IF (zcor(jz) == myz) THEN
    1783              :                      jj = jj + 1
    1784              :                      jy = yzp(1, ir, ip)
    1785              :                      jz = jz - bo(1, 3, rs_pos) + 1
    1786              :                      tb(jy, jz + jx*nz) = xzbuf(jj + rdispl(ipr))
    1787              :                   END IF
    1788              :                END IF
    1789              :             END DO
    1790              :          END DO
    1791              :       END DO
    1792              : !$OMP END PARALLEL DO
    1793              : 
    1794            0 :       CALL timestop(handle)
    1795              : 
    1796            0 :    END SUBROUTINE yz_to_xz
    1797              : 
    1798              : ! **************************************************************************************************
    1799              : !> \brief ...
    1800              : !> \param sb ...
    1801              : !> \param group ...
    1802              : !> \param dims ...
    1803              : !> \param my_pos ...
    1804              : !> \param p2p ...
    1805              : !> \param yzp ...
    1806              : !> \param nray ...
    1807              : !> \param bo ...
    1808              : !> \param tb ...
    1809              : !> \param fft_scratch ...
    1810              : !> \par History
    1811              : !>      15. Feb. 2006 : single precision all_to_all
    1812              : !> \author JGH (19-Jan-2001)
    1813              : ! **************************************************************************************************
    1814            0 :    SUBROUTINE xz_to_yz(sb, group, dims, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
    1815              : 
    1816              :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    1817              :          INTENT(IN)                                      :: sb
    1818              : 
    1819              :       CLASS(mp_comm_type), INTENT(IN)                    :: group
    1820              :       INTEGER, DIMENSION(2), INTENT(IN)                  :: dims
    1821              :       INTEGER, INTENT(IN)                                :: my_pos
    1822              :       INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN)                 :: p2p
    1823              :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN)           :: yzp
    1824              :       INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN)                 :: nray
    1825              :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN)           :: bo
    1826              :       COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(INOUT), CONTIGUOUS   :: tb
    1827              :       TYPE(fft_scratch_type), INTENT(INOUT)              :: fft_scratch
    1828              : 
    1829              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'xz_to_yz'
    1830              : 
    1831            0 :       COMPLEX(KIND=dp), DIMENSION(:), POINTER, CONTIGUOUS            :: xzbuf, yzbuf
    1832            0 :       COMPLEX(KIND=sp), DIMENSION(:), POINTER, CONTIGUOUS            :: xzbuf_sgl, yzbuf_sgl
    1833              :       INTEGER                                            :: handle, icrs, ip, ipl, ir, ix, ixx, iz, &
    1834              :                                                             jj, jx, jy, jz, mp, myx, myz, np, npx, &
    1835              :                                                             npz, nx, nz
    1836            0 :       INTEGER, DIMENSION(:), POINTER, CONTIGUOUS                     :: pzcoord, rcount, rdispl, scount, sdispl, &
    1837            0 :                                                                         xcor, zcor
    1838            0 :       INTEGER, DIMENSION(:, :), CONTIGUOUS, POINTER                  :: pgrid
    1839              : 
    1840            0 :       CALL timeset(routineN, handle)
    1841              : 
    1842            0 :       np = SIZE(p2p)
    1843              : 
    1844            0 :       IF (alltoall_sgl) THEN
    1845            0 :          yzbuf_sgl => fft_scratch%yzbuf_sgl
    1846            0 :          xzbuf_sgl => fft_scratch%xzbuf_sgl
    1847              :       ELSE
    1848            0 :          yzbuf => fft_scratch%yzbuf
    1849            0 :          xzbuf => fft_scratch%xzbuf
    1850              :       END IF
    1851            0 :       npx = dims(1)
    1852            0 :       npz = dims(2)
    1853            0 :       pgrid => fft_scratch%pgrid
    1854            0 :       xcor => fft_scratch%xcor
    1855            0 :       zcor => fft_scratch%zcor
    1856            0 :       pzcoord => fft_scratch%pzcoord
    1857            0 :       scount => fft_scratch%scount
    1858            0 :       rcount => fft_scratch%rcount
    1859            0 :       sdispl => fft_scratch%sdispl
    1860            0 :       rdispl => fft_scratch%rdispl
    1861              : 
    1862              : ! If the send and recv counts are not already cached, then
    1863              : ! calculate and store them
    1864            0 :       IF (fft_scratch%in == 0) THEN
    1865              : 
    1866            0 :          rcount = 0
    1867            0 :          nx = MAXVAL(bo(2, 1, :))
    1868              : 
    1869            0 :          DO ix = 0, npx - 1
    1870            0 :             ip = pgrid(ix, 0)
    1871            0 :             xcor(bo(1, 1, ip):bo(2, 1, ip)) = ix
    1872              :          END DO
    1873            0 :          DO iz = 0, npz - 1
    1874            0 :             ip = pgrid(0, iz)
    1875            0 :             zcor(bo(1, 3, ip):bo(2, 3, ip)) = iz
    1876              :          END DO
    1877            0 :          DO jx = 1, nx
    1878            0 :             DO ir = 1, nray(my_pos)
    1879            0 :                jy = yzp(1, ir, my_pos)
    1880            0 :                jz = yzp(2, ir, my_pos)
    1881            0 :                ip = pgrid(xcor(jx), zcor(jz))
    1882            0 :                rcount(ip) = rcount(ip) + 1
    1883              :             END DO
    1884              :          END DO
    1885              : 
    1886            0 :          CALL group%alltoall(rcount, scount, 1)
    1887            0 :          fft_scratch%xzcount = scount
    1888            0 :          fft_scratch%yzcount = rcount
    1889              : 
    1890              :          ! Work out the correct displacements in the buffers
    1891            0 :          sdispl(0) = 0
    1892            0 :          rdispl(0) = 0
    1893            0 :          DO ip = 1, np - 1
    1894            0 :             sdispl(ip) = sdispl(ip - 1) + scount(ip - 1)
    1895            0 :             rdispl(ip) = rdispl(ip - 1) + rcount(ip - 1)
    1896              :          END DO
    1897              : 
    1898            0 :          fft_scratch%xzdispl = sdispl
    1899            0 :          fft_scratch%yzdispl = rdispl
    1900              : 
    1901            0 :          icrs = 0
    1902            0 :          DO ip = 0, np - 1
    1903            0 :             IF (scount(ip) /= 0) icrs = icrs + 1
    1904            0 :             IF (rcount(ip) /= 0) icrs = icrs + 1
    1905              :          END DO
    1906            0 :          CALL group%sum(icrs)
    1907            0 :          fft_scratch%rsratio = REAL(icrs, KIND=dp)/(REAL(2*np, KIND=dp)*REAL(np, KIND=dp))
    1908              : 
    1909            0 :          fft_scratch%in = 1
    1910              :       ELSE
    1911            0 :          scount = fft_scratch%xzcount
    1912            0 :          rcount = fft_scratch%yzcount
    1913            0 :          sdispl = fft_scratch%xzdispl
    1914            0 :          rdispl = fft_scratch%yzdispl
    1915              :       END IF
    1916              : 
    1917              : ! Now do the actual packing
    1918            0 :       myx = fft_scratch%sizes%r_pos(1)
    1919            0 :       myz = fft_scratch%sizes%r_pos(2)
    1920            0 :       mp = p2p(my_pos)
    1921            0 :       nz = bo(2, 3, mp) - bo(1, 3, mp) + 1
    1922            0 :       nx = bo(2, 1, mp) - bo(1, 1, mp) + 1
    1923              : 
    1924              : !$OMP PARALLEL DO DEFAULT(NONE), &
    1925              : !$OMP             PRIVATE(jj,ipl,ir,jx,jy,jz,ixx),&
    1926              : !$OMP             SHARED(np,p2p,nray,yzp,zcor,myz,bo,mp),&
    1927              : !$OMP             SHARED(alltoall_sgl,nx,scount,sdispl),&
    1928            0 : !$OMP             SHARED(xzbuf,xzbuf_sgl,sb,nz)
    1929              :       DO ip = 0, np - 1
    1930              :          jj = 0
    1931              :          ipl = p2p(ip)
    1932              :          DO ir = 1, nray(ip)
    1933              :             jz = yzp(2, ir, ip)
    1934              :             IF (zcor(jz) == myz) THEN
    1935              :                jj = jj + 1
    1936              :                jy = yzp(1, ir, ip)
    1937              :                jz = yzp(2, ir, ip) - bo(1, 3, mp) + 1
    1938              :                IF (alltoall_sgl) THEN
    1939              :                   DO jx = 0, nx - 1
    1940              :                      ixx = jj + jx*scount(ipl)/nx
    1941              :                      xzbuf_sgl(ixx + sdispl(ipl)) = CMPLX(sb(jy, jz + jx*nz), KIND=sp)
    1942              :                   END DO
    1943              :                ELSE
    1944              :                   DO jx = 0, nx - 1
    1945              :                      ixx = jj + jx*scount(ipl)/nx
    1946              :                      xzbuf(ixx + sdispl(ipl)) = sb(jy, jz + jx*nz)
    1947              :                   END DO
    1948              :                END IF
    1949              :             END IF
    1950              :          END DO
    1951              :       END DO
    1952              : !$OMP END PARALLEL DO
    1953              : 
    1954            0 :       IF (alltoall_sgl) THEN
    1955            0 :          CALL group%alltoall(xzbuf_sgl, scount, sdispl, yzbuf_sgl, rcount, rdispl)
    1956              :       ELSE
    1957            0 :          IF (fft_scratch%rsratio < ratio_sparse_alltoall) THEN
    1958            0 :             CALL sparse_alltoall(xzbuf, scount, sdispl, yzbuf, rcount, rdispl, group)
    1959              :          ELSE
    1960            0 :             CALL group%alltoall(xzbuf, scount, sdispl, yzbuf, rcount, rdispl)
    1961              :          END IF
    1962              :       END IF
    1963              : 
    1964              : !$OMP PARALLEL DO DEFAULT(NONE), &
    1965              : !$OMP             PRIVATE(ipl,jj,nx,ir,jx,jy,jz),&
    1966              : !$OMP             SHARED(p2p,pzcoord,bo,nray,my_pos,yzp),&
    1967              : !$OMP             SHARED(rcount,rdispl,tb,yzbuf,zcor),&
    1968            0 : !$OMP             SHARED(yzbuf_sgl,alltoall_sgl,np)
    1969              :       DO ip = 0, np - 1
    1970              :          IF (rcount(ip) == 0) CYCLE
    1971              :          ipl = p2p(ip)
    1972              :          jj = 0
    1973              :          nx = bo(2, 1, ipl) - bo(1, 1, ipl) + 1
    1974              :          DO ir = 1, nray(my_pos)
    1975              :             jz = yzp(2, ir, my_pos)
    1976              :             IF (zcor(jz) == pzcoord(ipl)) THEN
    1977              :                jj = jj + 1
    1978              :                jy = yzp(1, ir, my_pos)
    1979              :                IF (alltoall_sgl) THEN
    1980              :                   DO jx = 0, nx - 1
    1981              :                      tb(ir, jx + bo(1, 1, ipl)) = yzbuf_sgl(rdispl(ip) + jj + jx*rcount(ip)/nx)
    1982              :                   END DO
    1983              :                ELSE
    1984              :                   DO jx = 0, nx - 1
    1985              :                      tb(ir, jx + bo(1, 1, ipl)) = yzbuf(rdispl(ip) + jj + jx*rcount(ip)/nx)
    1986              :                   END DO
    1987              :                END IF
    1988              :             END IF
    1989              :          END DO
    1990              :       END DO
    1991              : !$OMP END PARALLEL DO
    1992              : 
    1993            0 :       CALL timestop(handle)
    1994              : 
    1995            0 :    END SUBROUTINE xz_to_yz
    1996              : 
    1997              : ! **************************************************************************************************
    1998              : !> \brief ...
    1999              : !> \param cin ...
    2000              : !> \param boin ...
    2001              : !> \param boout ...
    2002              : !> \param sout ...
    2003              : !> \param fft_scratch ...
    2004              : !> \par History
    2005              : !>      none
    2006              : !> \author JGH (20-Jan-2001)
    2007              : ! **************************************************************************************************
    2008            0 :    SUBROUTINE cube_transpose_1(cin, boin, boout, sout, fft_scratch)
    2009              : 
    2010              :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2011              :          INTENT(IN)                                      :: cin
    2012              :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
    2013              :          INTENT(IN)                                      :: boin, boout
    2014              :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2015              :          INTENT(OUT)                                     :: sout
    2016              :       TYPE(fft_scratch_type), INTENT(IN)                 :: fft_scratch
    2017              : 
    2018              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cube_transpose_1'
    2019              : 
    2020              :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2021            0 :          POINTER                                         :: rbuf
    2022              :       INTEGER                                            :: handle, ip, ipl, ir, is, ixy, iz, mip, &
    2023              :                                                             mz, np, nx, ny, nz
    2024            0 :       INTEGER, CONTIGUOUS, DIMENSION(:), POINTER         :: rcount, rdispl, scount, sdispl
    2025            0 :       INTEGER, CONTIGUOUS, DIMENSION(:, :), POINTER      :: pgrid
    2026              :       INTEGER, DIMENSION(2)                              :: dim, pos
    2027              : 
    2028            0 :       CALL timeset(routineN, handle)
    2029              : 
    2030            0 :       mip = fft_scratch%mip
    2031            0 :       dim = fft_scratch%dim
    2032            0 :       pos = fft_scratch%pos
    2033            0 :       scount => fft_scratch%scount
    2034            0 :       rcount => fft_scratch%rcount
    2035            0 :       sdispl => fft_scratch%sdispl
    2036            0 :       rdispl => fft_scratch%rdispl
    2037            0 :       pgrid => fft_scratch%pgcube
    2038            0 :       np = DIM(2)
    2039              : 
    2040            0 :       nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
    2041            0 :       nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
    2042              : 
    2043              : !$OMP PARALLEL DO DEFAULT(NONE), &
    2044              : !$OMP             PRIVATE(ipl,ny), &
    2045            0 : !$OMP             SHARED(np,pgrid,boout,scount,sdispl,nx,nz)
    2046              :       DO ip = 0, np - 1
    2047              :          ipl = pgrid(ip, 2)
    2048              :          ny = boout(2, 2, ipl) - boout(1, 2, ipl) + 1
    2049              :          scount(ip) = nx*nz*ny
    2050              :          sdispl(ip) = nx*nz*(boout(1, 2, ipl) - 1)
    2051              :       END DO
    2052              : !$OMP END PARALLEL DO
    2053            0 :       ny = boout(2, 2, mip) - boout(1, 2, mip) + 1
    2054            0 :       mz = MAXVAL(boin(2, 3, :) - boin(1, 3, :) + 1)
    2055              : !$OMP PARALLEL DO DEFAULT(NONE), &
    2056              : !$OMP             PRIVATE(ipl,nz), &
    2057            0 : !$OMP             SHARED(np,pgrid,boin,nx,ny,rcount,rdispl,mz)
    2058              :       DO ip = 0, np - 1
    2059              :          ipl = pgrid(ip, 2)
    2060              :          nz = boin(2, 3, ipl) - boin(1, 3, ipl) + 1
    2061              :          rcount(ip) = nx*nz*ny
    2062              :          rdispl(ip) = nx*ny*mz*ip
    2063              :       END DO
    2064              : !$OMP END PARALLEL DO
    2065              : 
    2066            0 :       rbuf => fft_scratch%rbuf1
    2067              : 
    2068            0 :       CALL fft_scratch%cart_sub_comm(2)%alltoall(cin, scount, sdispl, rbuf, rcount, rdispl)
    2069              : 
    2070              : !$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
    2071              : !$OMP             PRIVATE(ip,ipl,nz,iz,is,ir) &
    2072            0 : !$OMP             SHARED(nx,ny,np,pgrid,boin,sout,rbuf)
    2073              :       DO ixy = 1, nx*ny
    2074              :          DO ip = 0, np - 1
    2075              :             ipl = pgrid(ip, 2)
    2076              :             nz = boin(2, 3, ipl) - boin(1, 3, ipl) + 1
    2077              :             DO iz = 1, nz
    2078              :                is = boin(1, 3, ipl) + iz - 1
    2079              :                ir = iz + nz*(ixy - 1)
    2080              :                sout(is, ixy) = rbuf(ir, ip)
    2081              :             END DO
    2082              :          END DO
    2083              :       END DO
    2084              : !$OMP END PARALLEL DO
    2085              : 
    2086            0 :       CALL timestop(handle)
    2087              : 
    2088            0 :    END SUBROUTINE cube_transpose_1
    2089              : 
    2090              : ! **************************************************************************************************
    2091              : !> \brief ...
    2092              : !> \param cin ...
    2093              : !> \param boin ...
    2094              : !> \param boout ...
    2095              : !> \param sout ...
    2096              : !> \param fft_scratch ...
    2097              : ! **************************************************************************************************
    2098            0 :    SUBROUTINE cube_transpose_2(cin, boin, boout, sout, fft_scratch)
    2099              : 
    2100              :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2101              :          INTENT(IN)                                      :: cin
    2102              :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
    2103              :          INTENT(IN)                                      :: boin, boout
    2104              :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2105              :          INTENT(OUT)                                     :: sout
    2106              :       TYPE(fft_scratch_type), INTENT(IN)                 :: fft_scratch
    2107              : 
    2108              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cube_transpose_2'
    2109              : 
    2110              :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2111            0 :          POINTER                                         :: rbuf
    2112              :       INTEGER                                            :: handle, ip, ipl, ir, ixy, iz, mip, mz, &
    2113              :                                                             np, nx, ny, nz
    2114            0 :       INTEGER, CONTIGUOUS, DIMENSION(:), POINTER         :: rcount, rdispl, scount, sdispl
    2115            0 :       INTEGER, CONTIGUOUS, DIMENSION(:, :), POINTER      :: pgrid
    2116              :       INTEGER, DIMENSION(2)                              :: dim, pos
    2117              :       TYPE(mp_comm_type)                                 :: sub_group
    2118              : 
    2119            0 :       CALL timeset(routineN, handle)
    2120              : 
    2121            0 :       sub_group = fft_scratch%cart_sub_comm(2)
    2122            0 :       mip = fft_scratch%mip
    2123            0 :       dim = fft_scratch%dim
    2124            0 :       pos = fft_scratch%pos
    2125            0 :       scount => fft_scratch%scount
    2126            0 :       rcount => fft_scratch%rcount
    2127            0 :       sdispl => fft_scratch%sdispl
    2128            0 :       rdispl => fft_scratch%rdispl
    2129            0 :       pgrid => fft_scratch%pgcube
    2130            0 :       np = DIM(2)
    2131              : 
    2132            0 :       nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
    2133            0 :       ny = boin(2, 2, mip) - boin(1, 2, mip) + 1
    2134            0 :       mz = MAXVAL(boout(2, 3, :) - boout(1, 3, :) + 1)
    2135              : 
    2136            0 :       rbuf => fft_scratch%rbuf2
    2137              : 
    2138              : !$OMP PARALLEL DEFAULT(NONE), &
    2139              : !$OMP          PRIVATE(ip,ipl,nz,iz,ir), &
    2140            0 : !$OMP          SHARED(nx,ny,np,pgrid,boout,rbuf,cin,scount,sdispl,mz)
    2141              : !$OMP DO COLLAPSE(2)
    2142              :       DO ixy = 1, nx*ny
    2143              :          DO ip = 0, np - 1
    2144              :             ipl = pgrid(ip, 2)
    2145              :             nz = boout(2, 3, ipl) - boout(1, 3, ipl) + 1
    2146              :             DO iz = boout(1, 3, ipl), boout(2, 3, ipl)
    2147              :                ir = iz - boout(1, 3, ipl) + 1 + (ixy - 1)*nz
    2148              :                rbuf(ir, ip) = cin(iz, ixy)
    2149              :             END DO
    2150              :          END DO
    2151              :       END DO
    2152              : !$OMP END DO
    2153              : !$OMP DO
    2154              :       DO ip = 0, np - 1
    2155              :          ipl = pgrid(ip, 2)
    2156              :          nz = boout(2, 3, ipl) - boout(1, 3, ipl) + 1
    2157              :          scount(ip) = nx*ny*nz
    2158              :          sdispl(ip) = nx*ny*mz*ip
    2159              :       END DO
    2160              : !$OMP END DO
    2161              : !$OMP END PARALLEL
    2162            0 :       nz = boout(2, 3, mip) - boout(1, 3, mip) + 1
    2163              : !$OMP PARALLEL DO DEFAULT(NONE), &
    2164              : !$OMP             PRIVATE(ipl,ny), &
    2165            0 : !$OMP             SHARED(np,pgrid,boin,nx,nz,rcount,rdispl)
    2166              :       DO ip = 0, np - 1
    2167              :          ipl = pgrid(ip, 2)
    2168              :          ny = boin(2, 2, ipl) - boin(1, 2, ipl) + 1
    2169              :          rcount(ip) = nx*ny*nz
    2170              :          rdispl(ip) = nx*nz*(boin(1, 2, ipl) - 1)
    2171              :       END DO
    2172              : !$OMP END PARALLEL DO
    2173              : 
    2174            0 :       CALL sub_group%alltoall(rbuf, scount, sdispl, sout, rcount, rdispl)
    2175              : 
    2176            0 :       CALL timestop(handle)
    2177              : 
    2178            0 :    END SUBROUTINE cube_transpose_2
    2179              : 
    2180              : ! **************************************************************************************************
    2181              : !> \brief ...
    2182              : !> \param cin ...
    2183              : !> \param boin ...
    2184              : !> \param boout ...
    2185              : !> \param sout ...
    2186              : !> \param fft_scratch ...
    2187              : ! **************************************************************************************************
    2188            0 :    SUBROUTINE cube_transpose_3(cin, boin, boout, sout, fft_scratch)
    2189              : 
    2190              :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2191              :          INTENT(IN)                                      :: cin
    2192              :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
    2193              :          INTENT(IN)                                      :: boin, boout
    2194              :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2195              :          INTENT(OUT)                                     :: sout
    2196              :       TYPE(fft_scratch_type), INTENT(IN)                 :: fft_scratch
    2197              : 
    2198              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cube_transpose_3'
    2199              : 
    2200              :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2201            0 :          POINTER                                         :: rbuf
    2202              :       INTEGER                                            :: handle, ip, ipl, ir, is, ixz, iy, lb, &
    2203              :                                                             mip, my, my_id, np, num_threads, nx, &
    2204              :                                                             ny, nz, ub
    2205            0 :       INTEGER, CONTIGUOUS, DIMENSION(:), POINTER         :: rcount, rdispl, scount, sdispl
    2206            0 :       INTEGER, CONTIGUOUS, DIMENSION(:, :), POINTER      :: pgrid
    2207              :       INTEGER, DIMENSION(2)                              :: dim, pos
    2208              :       TYPE(mp_comm_type)                                 :: sub_group
    2209              : 
    2210            0 :       CALL timeset(routineN, handle)
    2211              : 
    2212            0 :       sub_group = fft_scratch%cart_sub_comm(1)
    2213            0 :       mip = fft_scratch%mip
    2214            0 :       dim = fft_scratch%dim
    2215            0 :       pos = fft_scratch%pos
    2216            0 :       np = DIM(1)
    2217            0 :       scount => fft_scratch%scount
    2218            0 :       rcount => fft_scratch%rcount
    2219            0 :       sdispl => fft_scratch%sdispl
    2220            0 :       rdispl => fft_scratch%rdispl
    2221            0 :       pgrid => fft_scratch%pgcube
    2222              : 
    2223            0 :       ny = boin(2, 2, mip) - boin(1, 2, mip) + 1
    2224            0 :       nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
    2225              : !$OMP PARALLEL DO DEFAULT(NONE), &
    2226              : !$OMP             PRIVATE(ipl, nx), &
    2227            0 : !$OMP             SHARED(np,pgrid,boout,ny,nz,scount,sdispl)
    2228              :       DO ip = 0, np - 1
    2229              :          ipl = pgrid(ip, 1)
    2230              :          nx = boout(2, 1, ipl) - boout(1, 1, ipl) + 1
    2231              :          scount(ip) = nx*nz*ny
    2232              :          sdispl(ip) = ny*nz*(boout(1, 1, ipl) - 1)
    2233              :       END DO
    2234              : !$OMP END PARALLEL DO
    2235            0 :       nx = boout(2, 1, mip) - boout(1, 1, mip) + 1
    2236            0 :       my = MAXVAL(boin(2, 2, :) - boin(1, 2, :) + 1)
    2237              : !$OMP PARALLEL DO DEFAULT(NONE), &
    2238              : !$OMP             PRIVATE(ipl, ny), &
    2239            0 : !$OMP             SHARED(np,pgrid,boin,nx,nz,my,rcount,rdispl)
    2240              :       DO ip = 0, np - 1
    2241              :          ipl = pgrid(ip, 1)
    2242              :          ny = boin(2, 2, ipl) - boin(1, 2, ipl) + 1
    2243              :          rcount(ip) = nx*nz*ny
    2244              :          rdispl(ip) = nx*my*nz*ip
    2245              :       END DO
    2246              : !$OMP END PARALLEL DO
    2247              : 
    2248            0 :       rbuf => fft_scratch%rbuf3
    2249            0 :       num_threads = 1
    2250            0 :       my_id = 0
    2251              : !$OMP PARALLEL DEFAULT(NONE), &
    2252              : !$OMP          PRIVATE(NUM_THREADS, my_id, lb, ub) &
    2253            0 : !$OMP          SHARED(rbuf)
    2254              : !$    num_threads = MIN(omp_get_max_threads(), SIZE(rbuf, 2))
    2255              : !$    my_id = omp_get_thread_num()
    2256              :       IF (my_id < num_threads) THEN
    2257              :          lb = (SIZE(rbuf, 2)*my_id)/num_threads
    2258              :          ub = (SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
    2259              :          rbuf(:, lb:ub) = 0.0_dp
    2260              :       END IF
    2261              : !$OMP END PARALLEL
    2262              : 
    2263            0 :       CALL sub_group%alltoall(cin, scount, sdispl, rbuf, rcount, rdispl)
    2264              : 
    2265              : !$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
    2266              : !$OMP             PRIVATE(ip,ipl,ny,iy,is,ir) &
    2267            0 : !$OMP             SHARED(nx,nz,np,pgrid,boin,rbuf,sout)
    2268              :       DO ixz = 1, nx*nz
    2269              :          DO ip = 0, np - 1
    2270              :             ipl = pgrid(ip, 1)
    2271              :             ny = boin(2, 2, ipl) - boin(1, 2, ipl) + 1
    2272              :             DO iy = 1, ny
    2273              :                is = boin(1, 2, ipl) + iy - 1
    2274              :                ir = iy + ny*(ixz - 1)
    2275              :                sout(is, ixz) = rbuf(ir, ip)
    2276              :             END DO
    2277              :          END DO
    2278              :       END DO
    2279              : !$OMP END PARALLEL DO
    2280              : 
    2281            0 :       CALL timestop(handle)
    2282              : 
    2283            0 :    END SUBROUTINE cube_transpose_3
    2284              : 
    2285              : ! **************************************************************************************************
    2286              : !> \brief ...
    2287              : !> \param cin ...
    2288              : !> \param boin ...
    2289              : !> \param boout ...
    2290              : !> \param sout ...
    2291              : !> \param fft_scratch ...
    2292              : ! **************************************************************************************************
    2293            0 :    SUBROUTINE cube_transpose_4(cin, boin, boout, sout, fft_scratch)
    2294              : 
    2295              :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2296              :          INTENT(IN)                                      :: cin
    2297              :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
    2298              :          INTENT(IN)                                      :: boin, boout
    2299              :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2300              :          INTENT(OUT)                                     :: sout
    2301              :       TYPE(fft_scratch_type), INTENT(IN)                 :: fft_scratch
    2302              : 
    2303              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cube_transpose_4'
    2304              : 
    2305              :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2306            0 :          POINTER                                         :: rbuf
    2307              :       INTEGER                                            :: handle, ip, ipl, ir, iy, izx, lb, mip, &
    2308              :                                                             my, my_id, np, num_threads, nx, ny, &
    2309              :                                                             nz, ub
    2310            0 :       INTEGER, CONTIGUOUS, DIMENSION(:), POINTER         :: rcount, rdispl, scount, sdispl
    2311            0 :       INTEGER, CONTIGUOUS, DIMENSION(:, :), POINTER      :: pgrid
    2312              :       INTEGER, DIMENSION(2)                              :: dim, pos
    2313              :       TYPE(mp_comm_type)                                 :: sub_group
    2314              : 
    2315            0 :       CALL timeset(routineN, handle)
    2316              : 
    2317            0 :       sub_group = fft_scratch%cart_sub_comm(1)
    2318            0 :       mip = fft_scratch%mip
    2319            0 :       dim = fft_scratch%dim
    2320            0 :       pos = fft_scratch%pos
    2321            0 :       np = DIM(1)
    2322            0 :       scount => fft_scratch%scount
    2323            0 :       rcount => fft_scratch%rcount
    2324            0 :       sdispl => fft_scratch%sdispl
    2325            0 :       rdispl => fft_scratch%rdispl
    2326            0 :       pgrid => fft_scratch%pgcube
    2327              : 
    2328            0 :       nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
    2329            0 :       nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
    2330            0 :       my = MAXVAL(boout(2, 2, :) - boout(1, 2, :) + 1)
    2331              : 
    2332            0 :       rbuf => fft_scratch%rbuf4
    2333            0 :       num_threads = 1
    2334            0 :       my_id = 0
    2335              : !$OMP PARALLEL DEFAULT(NONE), &
    2336              : !$OMP          PRIVATE(NUM_THREADS,my_id,lb,ub,ip,ipl,ny,iy,ir), &
    2337            0 : !$OMP          SHARED(rbuf,nz,nx,np,pgrid,boout,cin,my,scount,sdispl)
    2338              : !$    num_threads = MIN(omp_get_max_threads(), SIZE(rbuf, 2))
    2339              : !$    my_id = omp_get_thread_num()
    2340              :       IF (my_id < num_threads) THEN
    2341              :          lb = (SIZE(rbuf, 2)*my_id)/num_threads
    2342              :          ub = (SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
    2343              :          rbuf(:, lb:ub) = 0.0_dp
    2344              :       END IF
    2345              : !$OMP BARRIER
    2346              : 
    2347              : !$OMP DO COLLAPSE(2)
    2348              :       DO izx = 1, nz*nx
    2349              :          DO ip = 0, np - 1
    2350              :             ipl = pgrid(ip, 1)
    2351              :             ny = boout(2, 2, ipl) - boout(1, 2, ipl) + 1
    2352              :             DO iy = boout(1, 2, ipl), boout(2, 2, ipl)
    2353              :                ir = iy - boout(1, 2, ipl) + 1 + (izx - 1)*ny
    2354              :                rbuf(ir, ip) = cin(iy, izx)
    2355              :             END DO
    2356              :          END DO
    2357              :       END DO
    2358              : !$OMP END DO
    2359              : !$OMP DO
    2360              :       DO ip = 0, np - 1
    2361              :          ipl = pgrid(ip, 1)
    2362              :          ny = boout(2, 2, ipl) - boout(1, 2, ipl) + 1
    2363              :          scount(ip) = nx*ny*nz
    2364              :          sdispl(ip) = nx*nz*my*ip
    2365              :       END DO
    2366              : !$OMP END DO
    2367              : !$OMP END PARALLEL
    2368            0 :       ny = boout(2, 2, mip) - boout(1, 2, mip) + 1
    2369              : !$OMP PARALLEL DO DEFAULT(NONE), &
    2370              : !$OMP             PRIVATE(ipl,nx), &
    2371            0 : !$OMP             SHARED(np,pgrid,boin,rcount,rdispl,ny,nz)
    2372              :       DO ip = 0, np - 1
    2373              :          ipl = pgrid(ip, 1)
    2374              :          nx = boin(2, 1, ipl) - boin(1, 1, ipl) + 1
    2375              :          rcount(ip) = nx*ny*nz
    2376              :          rdispl(ip) = ny*nz*(boin(1, 1, ipl) - 1)
    2377              :       END DO
    2378              : !$OMP END PARALLEL DO
    2379              : 
    2380            0 :       CALL sub_group%alltoall(rbuf, scount, sdispl, sout, rcount, rdispl)
    2381              : 
    2382            0 :       CALL timestop(handle)
    2383              : 
    2384            0 :    END SUBROUTINE cube_transpose_4
    2385              : 
    2386              : ! **************************************************************************************************
    2387              : !> \brief ...
    2388              : !> \param cin ...
    2389              : !> \param group ...
    2390              : !> \param boin ...
    2391              : !> \param boout ...
    2392              : !> \param sout ...
    2393              : !> \param fft_scratch ...
    2394              : ! **************************************************************************************************
    2395          104 :    SUBROUTINE cube_transpose_5(cin, group, boin, boout, sout, fft_scratch)
    2396              : 
    2397              :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2398              :          INTENT(IN)                                      :: cin
    2399              : 
    2400              :       CLASS(mp_comm_type), INTENT(IN)                    :: group
    2401              :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN)           :: boin, boout
    2402              :       COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(OUT), CONTIGUOUS     :: sout
    2403              :       TYPE(fft_scratch_type), INTENT(IN)                 :: fft_scratch
    2404              : 
    2405              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cube_transpose_5'
    2406              : 
    2407          104 :       COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS         :: rbuf
    2408              :       INTEGER                                            :: handle, ip, ir, is, ixz, iy, lb, mip, &
    2409              :                                                             my, my_id, np, num_threads, nx, ny, &
    2410              :                                                             nz, ub
    2411          104 :       INTEGER, DIMENSION(:), POINTER, CONTIGUOUS                     :: rcount, rdispl, scount, sdispl
    2412              : 
    2413          104 :       CALL timeset(routineN, handle)
    2414              : 
    2415          104 :       np = fft_scratch%sizes%numtask
    2416          104 :       mip = fft_scratch%mip
    2417          104 :       scount => fft_scratch%scount
    2418          104 :       rcount => fft_scratch%rcount
    2419          104 :       sdispl => fft_scratch%sdispl
    2420          104 :       rdispl => fft_scratch%rdispl
    2421              : 
    2422          104 :       ny = boin(2, 2, mip) - boin(1, 2, mip) + 1
    2423          104 :       nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
    2424              : !$OMP PARALLEL DO DEFAULT(NONE), &
    2425              : !$OMP             PRIVATE(nx), &
    2426          104 : !$OMP             SHARED(np,boout,ny,nz,scount,sdispl)
    2427              :       DO ip = 0, np - 1
    2428              :          nx = boout(2, 1, ip) - boout(1, 1, ip) + 1
    2429              :          scount(ip) = nx*nz*ny
    2430              :          sdispl(ip) = ny*nz*(boout(1, 1, ip) - 1)
    2431              :       END DO
    2432              : !$OMP END PARALLEL DO
    2433          104 :       nx = boout(2, 1, mip) - boout(1, 1, mip) + 1
    2434          312 :       my = MAXVAL(boin(2, 2, :) - boin(1, 2, :) + 1)
    2435              : !$OMP PARALLEL DO DEFAULT(NONE), &
    2436              : !$OMP             PRIVATE(ny), &
    2437          104 : !$OMP             SHARED(np,boin,nx,nz,rcount,rdispl,my)
    2438              :       DO ip = 0, np - 1
    2439              :          ny = boin(2, 2, ip) - boin(1, 2, ip) + 1
    2440              :          rcount(ip) = nx*nz*ny
    2441              :          rdispl(ip) = nx*my*nz*ip
    2442              :       END DO
    2443              : !$OMP END PARALLEL DO
    2444              : 
    2445          104 :       rbuf => fft_scratch%rbuf5
    2446          104 :       num_threads = 1
    2447          104 :       my_id = 0
    2448              : !$OMP PARALLEL DEFAULT(NONE), &
    2449              : !$OMP          PRIVATE(NUM_THREADS, my_id, lb, ub), &
    2450          104 : !$OMP          SHARED(rbuf)
    2451              : !$    num_threads = MIN(omp_get_max_threads(), SIZE(rbuf, 2))
    2452              : !$    my_id = omp_get_thread_num()
    2453              :       IF (my_id < num_threads) THEN
    2454              :          lb = (SIZE(rbuf, 2)*my_id)/num_threads
    2455              :          ub = (SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
    2456              :          rbuf(:, lb:ub) = 0.0_dp
    2457              :       END IF
    2458              : !$OMP END PARALLEL
    2459              : 
    2460          104 :       CALL group%alltoall(cin, scount, sdispl, rbuf, rcount, rdispl)
    2461              : 
    2462              : !$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
    2463              : !$OMP             PRIVATE(ip,ny,iy,is,ir) &
    2464          104 : !$OMP             SHARED(nx,nz,np,boin,sout,rbuf)
    2465              :       DO ixz = 1, nx*nz
    2466              :          DO ip = 0, np - 1
    2467              :             ny = boin(2, 2, ip) - boin(1, 2, ip) + 1
    2468              :             DO iy = 1, ny
    2469              :                is = boin(1, 2, ip) + iy - 1
    2470              :                ir = iy + ny*(ixz - 1)
    2471              :                sout(is, ixz) = rbuf(ir, ip)
    2472              :             END DO
    2473              :          END DO
    2474              :       END DO
    2475              : !$OMP END PARALLEL DO
    2476              : 
    2477          104 :       CALL timestop(handle)
    2478              : 
    2479          104 :    END SUBROUTINE cube_transpose_5
    2480              : 
    2481              : ! **************************************************************************************************
    2482              : !> \brief ...
    2483              : !> \param cin ...
    2484              : !> \param group ...
    2485              : !> \param boin ...
    2486              : !> \param boout ...
    2487              : !> \param sout ...
    2488              : !> \param fft_scratch ...
    2489              : ! **************************************************************************************************
    2490          104 :    SUBROUTINE cube_transpose_6(cin, group, boin, boout, sout, fft_scratch)
    2491              : 
    2492              :       COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    2493              :          INTENT(IN)                                      :: cin
    2494              : 
    2495              :       CLASS(mp_comm_type), INTENT(IN)                    :: group
    2496              :       INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN)           :: boin, boout
    2497              :       COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(OUT), CONTIGUOUS     :: sout
    2498              :       TYPE(fft_scratch_type), INTENT(IN)                 :: fft_scratch
    2499              : 
    2500              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cube_transpose_6'
    2501              : 
    2502          104 :       COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS         :: rbuf
    2503              :       INTEGER                                            :: handle, ip, ir, iy, izx, lb, mip, my, &
    2504              :                                                             my_id, np, num_threads, nx, ny, nz, ub
    2505          104 :       INTEGER, DIMENSION(:), POINTER, CONTIGUOUS                     :: rcount, rdispl, scount, sdispl
    2506              : 
    2507          104 :       CALL timeset(routineN, handle)
    2508              : 
    2509          104 :       np = fft_scratch%sizes%numtask
    2510          104 :       mip = fft_scratch%mip
    2511          104 :       scount => fft_scratch%scount
    2512          104 :       rcount => fft_scratch%rcount
    2513          104 :       sdispl => fft_scratch%sdispl
    2514          104 :       rdispl => fft_scratch%rdispl
    2515              : 
    2516          104 :       nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
    2517          104 :       nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
    2518          312 :       my = MAXVAL(boout(2, 2, :) - boout(1, 2, :) + 1)
    2519              : 
    2520          104 :       rbuf => fft_scratch%rbuf5
    2521          104 :       num_threads = 1
    2522          104 :       my_id = 0
    2523              : !$OMP PARALLEL DEFAULT(NONE), &
    2524              : !$OMP          PRIVATE(NUM_THREADS,my_id,lb,ub,ip,ny,iy,ir), &
    2525          104 : !$OMP          SHARED(rbuf,nx,nz,np,boout,cin,my,scount,sdispl)
    2526              : !$    num_threads = MIN(omp_get_max_threads(), SIZE(rbuf, 2))
    2527              : !$    my_id = omp_get_thread_num()
    2528              :       IF (my_id < num_threads) THEN
    2529              :          lb = (SIZE(rbuf, 2)*my_id)/num_threads
    2530              :          ub = (SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
    2531              :          rbuf(:, lb:ub) = 0.0_dp
    2532              :       END IF
    2533              : !$OMP BARRIER
    2534              : 
    2535              : !$OMP DO COLLAPSE(2)
    2536              :       DO izx = 1, nz*nx
    2537              :          DO ip = 0, np - 1
    2538              :             ny = boout(2, 2, ip) - boout(1, 2, ip) + 1
    2539              :             DO iy = boout(1, 2, ip), boout(2, 2, ip)
    2540              :                ir = iy - boout(1, 2, ip) + 1 + (izx - 1)*ny
    2541              :                rbuf(ir, ip) = cin(iy, izx)
    2542              :             END DO
    2543              :          END DO
    2544              :       END DO
    2545              : !$OMP END DO
    2546              : !$OMP DO
    2547              :       DO ip = 0, np - 1
    2548              :          ny = boout(2, 2, ip) - boout(1, 2, ip) + 1
    2549              :          scount(ip) = nx*ny*nz
    2550              :          sdispl(ip) = nx*nz*my*ip
    2551              :       END DO
    2552              : !$OMP END DO
    2553              : !$OMP END PARALLEL
    2554          104 :       ny = boout(2, 2, mip) - boout(1, 2, mip) + 1
    2555              : !$OMP PARALLEL DO DEFAULT(NONE), &
    2556              : !$OMP             PRIVATE(nx), &
    2557          104 : !$OMP             SHARED(np,boin,rcount,rdispl,nz,ny)
    2558              :       DO ip = 0, np - 1
    2559              :          nx = boin(2, 1, ip) - boin(1, 1, ip) + 1
    2560              :          rcount(ip) = nx*ny*nz
    2561              :          rdispl(ip) = ny*nz*(boin(1, 1, ip) - 1)
    2562              :       END DO
    2563              : !$OMP END PARALLEL DO
    2564              : 
    2565          104 :       CALL group%alltoall(rbuf, scount, sdispl, sout, rcount, rdispl)
    2566              : 
    2567          104 :       CALL timestop(handle)
    2568              : 
    2569          104 :    END SUBROUTINE cube_transpose_6
    2570              : 
    2571              : ! **************************************************************************************************
    2572              : !> \brief ...
    2573              : ! **************************************************************************************************
    2574        19607 :    SUBROUTINE init_fft_scratch_pool()
    2575              : 
    2576        19607 :       CALL release_fft_scratch_pool()
    2577              : 
    2578              :       ! Allocate first scratch and mark it as used
    2579        19607 :       ALLOCATE (fft_scratch_first)
    2580       568603 :       ALLOCATE (fft_scratch_first%fft_scratch)
    2581              :       ! this is a very special scratch, it seems, we always keep it 'most - recent' so we will never delete it
    2582        19607 :       fft_scratch_first%fft_scratch%last_tick = HUGE(fft_scratch_first%fft_scratch%last_tick)
    2583              : 
    2584        19607 :       init_fft_pool = init_fft_pool + 1
    2585              : 
    2586        19607 :    END SUBROUTINE init_fft_scratch_pool
    2587              : 
    2588              : ! **************************************************************************************************
    2589              : !> \brief ...
    2590              : !> \param fft_scratch ...
    2591              : ! **************************************************************************************************
    2592        60958 :    SUBROUTINE deallocate_fft_scratch_type(fft_scratch)
    2593              :       TYPE(fft_scratch_type), INTENT(INOUT)    :: fft_scratch
    2594              : 
    2595              : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    2596              :       INTEGER                   :: ierr
    2597              :       COMPLEX(KIND=dp), POINTER :: dummy_ptr_z
    2598              : #endif
    2599              : 
    2600              :       ! deallocate structures
    2601        60958 :       IF (ASSOCIATED(fft_scratch%ziptr)) THEN
    2602        14109 :          CALL fft_dealloc(fft_scratch%ziptr)
    2603              :       END IF
    2604        60958 :       IF (ASSOCIATED(fft_scratch%zoptr)) THEN
    2605        14109 :          CALL fft_dealloc(fft_scratch%zoptr)
    2606              :       END IF
    2607        60958 :       IF (ASSOCIATED(fft_scratch%p1buf)) THEN
    2608            0 :          CALL fft_dealloc(fft_scratch%p1buf)
    2609              :       END IF
    2610        60958 :       IF (ASSOCIATED(fft_scratch%p2buf)) THEN
    2611              : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    2612              :          dummy_ptr_z => fft_scratch%p2buf(1, 1)
    2613              :          ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
    2614              : #else
    2615            0 :          CALL fft_dealloc(fft_scratch%p2buf)
    2616              : #endif
    2617              :       END IF
    2618        60958 :       IF (ASSOCIATED(fft_scratch%p3buf)) THEN
    2619              : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    2620              :          dummy_ptr_z => fft_scratch%p3buf(1, 1)
    2621              :          ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
    2622              : #else
    2623            0 :          CALL fft_dealloc(fft_scratch%p3buf)
    2624              : #endif
    2625              :       END IF
    2626        60958 :       IF (ASSOCIATED(fft_scratch%p4buf)) THEN
    2627              : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    2628              :          dummy_ptr_z => fft_scratch%p4buf(1, 1)
    2629              :          ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
    2630              : #else
    2631            0 :          CALL fft_dealloc(fft_scratch%p4buf)
    2632              : #endif
    2633              :       END IF
    2634        60958 :       IF (ASSOCIATED(fft_scratch%p5buf)) THEN
    2635              : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    2636              :          dummy_ptr_z => fft_scratch%p5buf(1, 1)
    2637              :          ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
    2638              : #else
    2639            0 :          CALL fft_dealloc(fft_scratch%p5buf)
    2640              : #endif
    2641              :       END IF
    2642        60958 :       IF (ASSOCIATED(fft_scratch%p6buf)) THEN
    2643            0 :          CALL fft_dealloc(fft_scratch%p6buf)
    2644              :       END IF
    2645        60958 :       IF (ASSOCIATED(fft_scratch%p7buf)) THEN
    2646              : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    2647              :          dummy_ptr_z => fft_scratch%p7buf(1, 1)
    2648              :          ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
    2649              : #else
    2650            0 :          CALL fft_dealloc(fft_scratch%p7buf)
    2651              : #endif
    2652              :       END IF
    2653        60958 :       IF (ASSOCIATED(fft_scratch%r1buf)) THEN
    2654              : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    2655              :          dummy_ptr_z => fft_scratch%r1buf(1, 1)
    2656              :          ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
    2657              : #else
    2658        27234 :          CALL fft_dealloc(fft_scratch%r1buf)
    2659              : #endif
    2660              :       END IF
    2661        60958 :       IF (ASSOCIATED(fft_scratch%r2buf)) THEN
    2662        27234 :          CALL fft_dealloc(fft_scratch%r2buf)
    2663              :       END IF
    2664        60958 :       IF (ASSOCIATED(fft_scratch%tbuf)) THEN
    2665              : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    2666              :          dummy_ptr_z => fft_scratch%tbuf(1, 1, 1)
    2667              :          ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
    2668              : #else
    2669        27234 :          CALL fft_dealloc(fft_scratch%tbuf)
    2670              : #endif
    2671              :       END IF
    2672        60958 :       IF (ASSOCIATED(fft_scratch%a1buf)) THEN
    2673            8 :          CALL fft_dealloc(fft_scratch%a1buf)
    2674              :       END IF
    2675        60958 :       IF (ASSOCIATED(fft_scratch%a2buf)) THEN
    2676            8 :          CALL fft_dealloc(fft_scratch%a2buf)
    2677              :       END IF
    2678        60958 :       IF (ASSOCIATED(fft_scratch%a3buf)) THEN
    2679            8 :          CALL fft_dealloc(fft_scratch%a3buf)
    2680              :       END IF
    2681        60958 :       IF (ASSOCIATED(fft_scratch%a4buf)) THEN
    2682            8 :          CALL fft_dealloc(fft_scratch%a4buf)
    2683              :       END IF
    2684        60958 :       IF (ASSOCIATED(fft_scratch%a5buf)) THEN
    2685            8 :          CALL fft_dealloc(fft_scratch%a5buf)
    2686              :       END IF
    2687        60958 :       IF (ASSOCIATED(fft_scratch%a6buf)) THEN
    2688            8 :          CALL fft_dealloc(fft_scratch%a6buf)
    2689              :       END IF
    2690        60958 :       IF (ASSOCIATED(fft_scratch%scount)) THEN
    2691            0 :          DEALLOCATE (fft_scratch%scount, fft_scratch%rcount, &
    2692        27242 :                      fft_scratch%sdispl, fft_scratch%rdispl)
    2693              :       END IF
    2694        60958 :       IF (ASSOCIATED(fft_scratch%rr)) THEN
    2695        27226 :          DEALLOCATE (fft_scratch%rr)
    2696              :       END IF
    2697        60958 :       IF (ASSOCIATED(fft_scratch%xzbuf)) THEN
    2698            0 :          DEALLOCATE (fft_scratch%xzbuf)
    2699              :       END IF
    2700        60958 :       IF (ASSOCIATED(fft_scratch%yzbuf)) THEN
    2701            0 :          DEALLOCATE (fft_scratch%yzbuf)
    2702              :       END IF
    2703        60958 :       IF (ASSOCIATED(fft_scratch%xzbuf_sgl)) THEN
    2704            0 :          DEALLOCATE (fft_scratch%xzbuf_sgl)
    2705              :       END IF
    2706        60958 :       IF (ASSOCIATED(fft_scratch%yzbuf_sgl)) THEN
    2707            0 :          DEALLOCATE (fft_scratch%yzbuf_sgl)
    2708              :       END IF
    2709        60958 :       IF (ASSOCIATED(fft_scratch%ss)) THEN
    2710            8 :          DEALLOCATE (fft_scratch%ss)
    2711              :       END IF
    2712        60958 :       IF (ASSOCIATED(fft_scratch%tt)) THEN
    2713            8 :          DEALLOCATE (fft_scratch%tt)
    2714              :       END IF
    2715        60958 :       IF (ASSOCIATED(fft_scratch%pgrid)) THEN
    2716            0 :          DEALLOCATE (fft_scratch%pgrid)
    2717              :       END IF
    2718        60958 :       IF (ASSOCIATED(fft_scratch%pgcube)) THEN
    2719        27242 :          DEALLOCATE (fft_scratch%pgcube)
    2720              :       END IF
    2721        60958 :       IF (ASSOCIATED(fft_scratch%xcor)) THEN
    2722            0 :          DEALLOCATE (fft_scratch%xcor, fft_scratch%zcor)
    2723              :       END IF
    2724        60958 :       IF (ASSOCIATED(fft_scratch%pzcoord)) THEN
    2725            0 :          DEALLOCATE (fft_scratch%pzcoord)
    2726              :       END IF
    2727        60958 :       IF (ASSOCIATED(fft_scratch%xzcount)) THEN
    2728            0 :          DEALLOCATE (fft_scratch%xzcount, fft_scratch%yzcount)
    2729            0 :          DEALLOCATE (fft_scratch%xzdispl, fft_scratch%yzdispl)
    2730            0 :          fft_scratch%in = 0
    2731            0 :          fft_scratch%rsratio = 1._dp
    2732              :       END IF
    2733        60958 :       IF (ASSOCIATED(fft_scratch%rbuf1)) THEN
    2734            0 :          DEALLOCATE (fft_scratch%rbuf1)
    2735              :       END IF
    2736        60958 :       IF (ASSOCIATED(fft_scratch%rbuf2)) THEN
    2737            0 :          DEALLOCATE (fft_scratch%rbuf2)
    2738              :       END IF
    2739        60958 :       IF (ASSOCIATED(fft_scratch%rbuf3)) THEN
    2740            0 :          DEALLOCATE (fft_scratch%rbuf3)
    2741              :       END IF
    2742        60958 :       IF (ASSOCIATED(fft_scratch%rbuf4)) THEN
    2743            0 :          DEALLOCATE (fft_scratch%rbuf4)
    2744              :       END IF
    2745        60958 :       IF (ASSOCIATED(fft_scratch%rbuf5)) THEN
    2746            8 :          DEALLOCATE (fft_scratch%rbuf5)
    2747              :       END IF
    2748        60958 :       IF (ASSOCIATED(fft_scratch%rbuf6)) THEN
    2749            8 :          DEALLOCATE (fft_scratch%rbuf6)
    2750              :       END IF
    2751              : 
    2752        60958 :       IF (fft_scratch%cart_sub_comm(1) /= mp_comm_null) THEN
    2753            0 :          CALL fft_scratch%cart_sub_comm(1)%free()
    2754              :       END IF
    2755        60958 :       IF (fft_scratch%cart_sub_comm(2) /= mp_comm_null) THEN
    2756            0 :          CALL fft_scratch%cart_sub_comm(2)%free()
    2757              :       END IF
    2758              : 
    2759        60958 :       CALL fft_destroy_plan(fft_scratch%fft_plan(1))
    2760        60958 :       CALL fft_destroy_plan(fft_scratch%fft_plan(2))
    2761        60958 :       CALL fft_destroy_plan(fft_scratch%fft_plan(3))
    2762        60958 :       CALL fft_destroy_plan(fft_scratch%fft_plan(4))
    2763        60958 :       CALL fft_destroy_plan(fft_scratch%fft_plan(5))
    2764        60958 :       CALL fft_destroy_plan(fft_scratch%fft_plan(6))
    2765              : 
    2766        60958 :    END SUBROUTINE deallocate_fft_scratch_type
    2767              : 
    2768              : ! **************************************************************************************************
    2769              : !> \brief ...
    2770              : ! **************************************************************************************************
    2771        29294 :    SUBROUTINE release_fft_scratch_pool()
    2772              :       TYPE(fft_scratch_pool_type), POINTER               :: fft_scratch, fft_scratch_current
    2773              : 
    2774        29294 : !$    CPASSERT(.NOT. omp_in_parallel() .OR. 0 == omp_get_thread_num())
    2775              : 
    2776        29294 :       IF (init_fft_pool == 0) NULLIFY (fft_scratch_first)
    2777              : 
    2778        29294 :       fft_scratch => fft_scratch_first
    2779        60824 :       DO
    2780        90118 :          IF (ASSOCIATED(fft_scratch)) THEN
    2781        60824 :             fft_scratch_current => fft_scratch
    2782        60824 :             fft_scratch => fft_scratch_current%fft_scratch_next
    2783        60824 :             NULLIFY (fft_scratch_current%fft_scratch_next)
    2784              : 
    2785        60824 :             CALL deallocate_fft_scratch_type(fft_scratch_current%fft_scratch)
    2786              : 
    2787       243296 :             DEALLOCATE (fft_scratch_current%fft_scratch)
    2788        60824 :             DEALLOCATE (fft_scratch_current)
    2789              :          ELSE
    2790              :             EXIT
    2791              :          END IF
    2792              :       END DO
    2793              : 
    2794        29294 :       init_fft_pool = 0
    2795              : 
    2796        29294 :    END SUBROUTINE release_fft_scratch_pool
    2797              : 
    2798              : ! **************************************************************************************************
    2799              : !> \brief ...
    2800              : ! **************************************************************************************************
    2801      3472450 :    SUBROUTINE resize_fft_scratch_pool()
    2802              : 
    2803              :       INTEGER                                            :: last_tick, nscratch
    2804              :       TYPE(fft_scratch_pool_type), POINTER               :: fft_scratch_current, fft_scratch_old
    2805              : 
    2806      3472450 :       nscratch = 0
    2807              : 
    2808      3472450 :       last_tick = HUGE(last_tick)
    2809      3472450 :       NULLIFY (fft_scratch_old)
    2810              : 
    2811              :       ! start at the global pool, count, and find a deletion candidate
    2812      3472450 :       fft_scratch_current => fft_scratch_first
    2813     18719328 :       DO
    2814     22191778 :          IF (ASSOCIATED(fft_scratch_current)) THEN
    2815     18719328 :             nscratch = nscratch + 1
    2816              :             ! is this a candidate for deletion (i.e. least recently used, and not in use)
    2817     18719328 :             IF (.NOT. fft_scratch_current%fft_scratch%in_use) THEN
    2818     15246878 :                IF (fft_scratch_current%fft_scratch%last_tick < last_tick) THEN
    2819      7124542 :                   last_tick = fft_scratch_current%fft_scratch%last_tick
    2820      7124542 :                   fft_scratch_old => fft_scratch_current
    2821              :                END IF
    2822              :             END IF
    2823     18719328 :             fft_scratch_current => fft_scratch_current%fft_scratch_next
    2824              :          ELSE
    2825              :             EXIT
    2826              :          END IF
    2827              :       END DO
    2828              : 
    2829              :       ! we should delete a scratch
    2830      3472450 :       IF (nscratch > fft_pool_scratch_limit) THEN
    2831              :          ! note that we never deallocate the first (special) element of the list
    2832          134 :          IF (ASSOCIATED(fft_scratch_old)) THEN
    2833              :             fft_scratch_current => fft_scratch_first
    2834              :             DO
    2835         2278 :                IF (ASSOCIATED(fft_scratch_current)) THEN
    2836              :                   ! should we delete the next in the list?
    2837         2144 :                   IF (ASSOCIATED(fft_scratch_current%fft_scratch_next, fft_scratch_old)) THEN
    2838              :                      ! fix the linked list
    2839          134 :                      fft_scratch_current%fft_scratch_next => fft_scratch_old%fft_scratch_next
    2840              : 
    2841              :                      ! deallocate the element
    2842          134 :                      CALL deallocate_fft_scratch_type(fft_scratch_old%fft_scratch)
    2843          536 :                      DEALLOCATE (fft_scratch_old%fft_scratch)
    2844          134 :                      DEALLOCATE (fft_scratch_old)
    2845              : 
    2846              :                   ELSE
    2847              :                      fft_scratch_current => fft_scratch_current%fft_scratch_next
    2848              :                   END IF
    2849              :                ELSE
    2850              :                   EXIT
    2851              :                END IF
    2852              :             END DO
    2853              : 
    2854              :          ELSE
    2855            0 :             CPWARN("The number of the scratches exceeded the limit, but none could be deallocated")
    2856              :          END IF
    2857              :       END IF
    2858              : 
    2859      3472450 :    END SUBROUTINE resize_fft_scratch_pool
    2860              : 
    2861              : ! **************************************************************************************************
    2862              : !> \brief ...
    2863              : !> \param fft_scratch ...
    2864              : !> \param tf_type ...
    2865              : !> \param n ...
    2866              : !> \param fft_sizes ...
    2867              : ! **************************************************************************************************
    2868      3472450 :    SUBROUTINE get_fft_scratch(fft_scratch, tf_type, n, fft_sizes)
    2869              :       TYPE(fft_scratch_type), POINTER          :: fft_scratch
    2870              :       INTEGER, INTENT(IN)                      :: tf_type
    2871              :       INTEGER, DIMENSION(:), INTENT(IN)        :: n
    2872              :       TYPE(fft_scratch_sizes), INTENT(IN), &
    2873              :          OPTIONAL                              :: fft_sizes
    2874              : 
    2875              :       CHARACTER(len=*), PARAMETER :: routineN = 'get_fft_scratch'
    2876              : 
    2877              :       INTEGER :: coord(2), DIM(2), handle, i, ix, iz, lg, lmax, m1, m2, &
    2878              :                  mcx2, mcy3, mcz1, mcz2, mg, mmax, mx1, mx2, my1, my3, mz1, mz2, mz3, &
    2879              :                  nbx, nbz, nm, nmax, nmray, np, nx, ny, nyzray, nz, pos(2)
    2880              :       INTEGER, DIMENSION(3)                    :: pcoord
    2881              :       LOGICAL                                  :: equal
    2882              :       LOGICAL, DIMENSION(2)                    :: dims
    2883              :       TYPE(fft_scratch_pool_type), POINTER     :: fft_scratch_current, &
    2884              :                                                   fft_scratch_last, &
    2885              :                                                   fft_scratch_new
    2886              : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    2887              :       INTEGER                   :: ierr
    2888              :       INTEGER(KIND=C_SIZE_T)    :: length
    2889              :       TYPE(C_PTR)               :: cptr_r1buf, cptr_tbuf, &
    2890              :                                    cptr_p2buf, cptr_p3buf, cptr_p4buf, cptr_p5buf, cptr_p7buf
    2891              : #endif
    2892      3472450 :       CALL timeset(routineN, handle)
    2893              : 
    2894              :       ! this is the place to check that the scratch_pool does not grow without limits
    2895              :       ! before we add a new scratch check the size of the pool and release some of the list if needed
    2896      3472450 :       CALL resize_fft_scratch_pool()
    2897              : 
    2898              :       ! get the required scratch
    2899      3472450 : !$OMP ATOMIC
    2900              :       tick_fft_pool = tick_fft_pool + 1
    2901      3472450 :       fft_scratch_current => fft_scratch_first
    2902              :       DO
    2903     10829728 :          IF (ASSOCIATED(fft_scratch_current)) THEN
    2904     10788377 :             IF (fft_scratch_current%fft_scratch%in_use) THEN
    2905      3472450 :                fft_scratch_last => fft_scratch_current
    2906      3472450 :                fft_scratch_current => fft_scratch_current%fft_scratch_next
    2907      3472450 :                CYCLE
    2908              :             END IF
    2909      7315927 :             IF (tf_type /= fft_scratch_current%fft_scratch%tf_type) THEN
    2910       441744 :                fft_scratch_last => fft_scratch_current
    2911       441744 :                fft_scratch_current => fft_scratch_current%fft_scratch_next
    2912       441744 :                CYCLE
    2913              :             END IF
    2914     17414602 :             IF (.NOT. ALL(n == fft_scratch_current%fft_scratch%nfft)) THEN
    2915      3361550 :                fft_scratch_last => fft_scratch_current
    2916      3361550 :                fft_scratch_current => fft_scratch_current%fft_scratch_next
    2917      3361550 :                CYCLE
    2918              :             END IF
    2919      3512633 :             IF (PRESENT(fft_sizes)) THEN
    2920      2873634 :                IF (fft_sizes%rs_group /= fft_scratch_current%fft_scratch%group) THEN
    2921        81214 :                   fft_scratch_last => fft_scratch_current
    2922        81214 :                   fft_scratch_current => fft_scratch_current%fft_scratch_next
    2923        81214 :                   CYCLE
    2924              :                END IF
    2925      2792420 :                CALL is_equal(fft_sizes, fft_scratch_current%fft_scratch%sizes, equal)
    2926      2792420 :                IF (.NOT. equal) THEN
    2927          320 :                   fft_scratch_last => fft_scratch_current
    2928          320 :                   fft_scratch_current => fft_scratch_current%fft_scratch_next
    2929          320 :                   CYCLE
    2930              :                END IF
    2931              :             END IF
    2932              :             ! Success
    2933      3431099 :             fft_scratch => fft_scratch_current%fft_scratch
    2934      3431099 :             fft_scratch_current%fft_scratch%in_use = .TRUE.
    2935      3431099 :             EXIT
    2936              :          ELSE
    2937              :             ! We cannot find the scratch type in this pool
    2938              :             ! Generate a new scratch set
    2939        41351 :             ALLOCATE (fft_scratch_new)
    2940      1075126 :             ALLOCATE (fft_scratch_new%fft_scratch)
    2941              : 
    2942        41351 :             IF (tf_type /= 400) THEN
    2943        27242 :                fft_scratch_new%fft_scratch%sizes = fft_sizes
    2944        27242 :                np = fft_sizes%numtask
    2945              :                ALLOCATE (fft_scratch_new%fft_scratch%scount(0:np - 1), fft_scratch_new%fft_scratch%rcount(0:np - 1), &
    2946              :                          fft_scratch_new%fft_scratch%sdispl(0:np - 1), fft_scratch_new%fft_scratch%rdispl(0:np - 1), &
    2947       245178 :                          fft_scratch_new%fft_scratch%pgcube(0:np - 1, 2))
    2948              :             END IF
    2949              : 
    2950            0 :             SELECT CASE (tf_type)
    2951              :             CASE DEFAULT
    2952            0 :                CPABORT("Invalid scratch type.")
    2953              :             CASE (100) ! fft3d_pb: full cube distribution
    2954            0 :                CPASSERT(PRESENT(fft_sizes))
    2955            0 :                mx1 = fft_sizes%mx1
    2956            0 :                my1 = fft_sizes%my1
    2957            0 :                mx2 = fft_sizes%mx2
    2958            0 :                mz2 = fft_sizes%mz2
    2959            0 :                my3 = fft_sizes%my3
    2960            0 :                mz3 = fft_sizes%mz3
    2961            0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a1buf, [mx1*my1, n(3)])
    2962            0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a2buf, [n(3), mx1*my1])
    2963            0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a3buf, [mx2*mz2, n(2)])
    2964            0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a4buf, [n(2), mx2*mz2])
    2965            0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a5buf, [my3*mz3, n(1)])
    2966            0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a6buf, [n(1), my3*mz3])
    2967            0 :                fft_scratch_new%fft_scratch%group = fft_sizes%rs_group
    2968              : 
    2969            0 :                dim = fft_sizes%rs_group%num_pe_cart
    2970            0 :                pos = fft_sizes%rs_group%mepos_cart
    2971            0 :                fft_scratch_new%fft_scratch%mip = fft_sizes%rs_group%mepos
    2972            0 :                fft_scratch_new%fft_scratch%dim = dim
    2973            0 :                fft_scratch_new%fft_scratch%pos = pos
    2974            0 :                mcz1 = fft_sizes%mcz1
    2975            0 :                mcx2 = fft_sizes%mcx2
    2976            0 :                mcz2 = fft_sizes%mcz2
    2977            0 :                mcy3 = fft_sizes%mcy3
    2978            0 :                ALLOCATE (fft_scratch_new%fft_scratch%rbuf1(mx2*my1*mcz2, 0:DIM(2) - 1))
    2979            0 :                ALLOCATE (fft_scratch_new%fft_scratch%rbuf2(mx1*my1*mcz2, 0:DIM(2) - 1))
    2980            0 :                ALLOCATE (fft_scratch_new%fft_scratch%rbuf3(mx2*mz3*mcy3, 0:DIM(1) - 1))
    2981            0 :                ALLOCATE (fft_scratch_new%fft_scratch%rbuf4(mx2*mz2*mcy3, 0:DIM(1) - 1))
    2982              : 
    2983            0 :                dims = [.TRUE., .FALSE.]
    2984            0 :                CALL fft_scratch_new%fft_scratch%cart_sub_comm(1)%from_sub(fft_sizes%rs_group, dims)
    2985            0 :                dims = [.FALSE., .TRUE.]
    2986            0 :                CALL fft_scratch_new%fft_scratch%cart_sub_comm(2)%from_sub(fft_sizes%rs_group, dims)
    2987              : 
    2988              :                !initialise pgcube
    2989            0 :                DO i = 0, DIM(1) - 1
    2990            0 :                   coord = [i, pos(2)]
    2991            0 :                   CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgcube(i, 1))
    2992              :                END DO
    2993            0 :                DO i = 0, DIM(2) - 1
    2994            0 :                   coord = [pos(1), i]
    2995            0 :                   CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgcube(i, 2))
    2996              :                END DO
    2997              : 
    2998              :                !set up fft plans
    2999              :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, FWFFT, .TRUE., n(3), mx1*my1, &
    3000            0 :                                         fft_scratch_new%fft_scratch%a1buf, fft_scratch_new%fft_scratch%a2buf, fft_plan_style)
    3001              :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, FWFFT, .TRUE., n(2), mx2*mz2, &
    3002            0 :                                         fft_scratch_new%fft_scratch%a3buf, fft_scratch_new%fft_scratch%a4buf, fft_plan_style)
    3003              :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, FWFFT, .TRUE., n(1), my3*mz3, &
    3004            0 :                                         fft_scratch_new%fft_scratch%a5buf, fft_scratch_new%fft_scratch%a6buf, fft_plan_style)
    3005              :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, BWFFT, .TRUE., n(1), my3*mz3, &
    3006            0 :                                         fft_scratch_new%fft_scratch%a6buf, fft_scratch_new%fft_scratch%a5buf, fft_plan_style)
    3007              :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(5), fft_type, BWFFT, .TRUE., n(2), mx2*mz2, &
    3008            0 :                                         fft_scratch_new%fft_scratch%a4buf, fft_scratch_new%fft_scratch%a3buf, fft_plan_style)
    3009              :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(6), fft_type, BWFFT, .TRUE., n(3), mx1*my1, &
    3010            0 :                                         fft_scratch_new%fft_scratch%a2buf, fft_scratch_new%fft_scratch%a1buf, fft_plan_style)
    3011              : 
    3012              :             CASE (101) ! fft3d_pb: full cube distribution (dim 1)
    3013            8 :                CPASSERT(PRESENT(fft_sizes))
    3014            8 :                mx1 = fft_sizes%mx1
    3015            8 :                my1 = fft_sizes%my1
    3016            8 :                mz1 = fft_sizes%mz1
    3017            8 :                my3 = fft_sizes%my3
    3018            8 :                mz3 = fft_sizes%mz3
    3019           24 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a1buf, [mx1*my1, n(3)])
    3020           24 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a2buf, [n(3), mx1*my1])
    3021            8 :                fft_scratch_new%fft_scratch%group = fft_sizes%rs_group
    3022           24 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a3buf, [mx1*mz1, n(2)])
    3023           24 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a4buf, [n(2), mx1*mz1])
    3024           24 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a5buf, [my3*mz3, n(1)])
    3025           24 :                CALL fft_alloc(fft_scratch_new%fft_scratch%a6buf, [n(1), my3*mz3])
    3026              : 
    3027           24 :                dim = fft_sizes%rs_group%num_pe_cart
    3028           24 :                pos = fft_sizes%rs_group%mepos_cart
    3029            8 :                fft_scratch_new%fft_scratch%mip = fft_sizes%rs_group%mepos
    3030           24 :                fft_scratch_new%fft_scratch%dim = dim
    3031           24 :                fft_scratch_new%fft_scratch%pos = pos
    3032            8 :                mcy3 = fft_sizes%mcy3
    3033           32 :                ALLOCATE (fft_scratch_new%fft_scratch%rbuf5(mx1*mz3*mcy3, 0:DIM(1) - 1))
    3034           32 :                ALLOCATE (fft_scratch_new%fft_scratch%rbuf6(mx1*mz1*mcy3, 0:DIM(1) - 1))
    3035              : 
    3036              :                !set up fft plans
    3037              :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, FWFFT, .TRUE., n(3), mx1*my1, &
    3038            8 :                                         fft_scratch_new%fft_scratch%a1buf, fft_scratch_new%fft_scratch%a3buf, fft_plan_style)
    3039              :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, FWFFT, .TRUE., n(2), mx1*mz1, &
    3040            8 :                                         fft_scratch_new%fft_scratch%a3buf, fft_scratch_new%fft_scratch%a4buf, fft_plan_style)
    3041              :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, FWFFT, .TRUE., n(1), my3*mz3, &
    3042            8 :                                         fft_scratch_new%fft_scratch%a5buf, fft_scratch_new%fft_scratch%a6buf, fft_plan_style)
    3043              :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, BWFFT, .TRUE., n(1), my3*mz3, &
    3044            8 :                                         fft_scratch_new%fft_scratch%a6buf, fft_scratch_new%fft_scratch%a5buf, fft_plan_style)
    3045              :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(5), fft_type, BWFFT, .TRUE., n(2), mx1*mz1, &
    3046            8 :                                         fft_scratch_new%fft_scratch%a4buf, fft_scratch_new%fft_scratch%a3buf, fft_plan_style)
    3047              :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(6), fft_type, BWFFT, .TRUE., n(3), mx1*my1, &
    3048            8 :                                         fft_scratch_new%fft_scratch%a3buf, fft_scratch_new%fft_scratch%a1buf, fft_plan_style)
    3049              : 
    3050              :             CASE (200) ! fft3d_ps: plane distribution
    3051        27234 :                CPASSERT(PRESENT(fft_sizes))
    3052        27234 :                nx = fft_sizes%nx
    3053        27234 :                ny = fft_sizes%ny
    3054        27234 :                nz = fft_sizes%nz
    3055        27234 :                mx2 = fft_sizes%mx2
    3056        27234 :                lmax = fft_sizes%lmax
    3057        27234 :                mmax = fft_sizes%mmax
    3058        27234 :                lg = fft_sizes%lg
    3059        27234 :                mg = fft_sizes%mg
    3060        27234 :                np = fft_sizes%numtask
    3061        27234 :                nmray = fft_sizes%nmray
    3062        27234 :                nyzray = fft_sizes%nyzray
    3063              : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    3064              :                length = INT(2*dp_size*MAX(mmax, 1)*MAX(lmax, 1), KIND=C_SIZE_T)
    3065              :                ierr = offload_malloc_pinned_mem(cptr_r1buf, length)
    3066              :                CPASSERT(ierr == 0)
    3067              :                CALL c_f_pointer(cptr_r1buf, fft_scratch_new%fft_scratch%r1buf, [MAX(mmax, 1), MAX(lmax, 1)])
    3068              :                length = INT(2*dp_size*MAX(ny, 1)*MAX(nz, 1)*MAX(nx, 1), KIND=C_SIZE_T)
    3069              :                ierr = offload_malloc_pinned_mem(cptr_tbuf, length)
    3070              :                CPASSERT(ierr == 0)
    3071              :                CALL c_f_pointer(cptr_tbuf, fft_scratch_new%fft_scratch%tbuf, [MAX(ny, 1), MAX(nz, 1), MAX(nx, 1)])
    3072              : #else
    3073        81702 :                CALL fft_alloc(fft_scratch_new%fft_scratch%r1buf, [mmax, lmax])
    3074       108936 :                CALL fft_alloc(fft_scratch_new%fft_scratch%tbuf, [ny, nz, nx])
    3075              : #endif
    3076        27234 :                fft_scratch_new%fft_scratch%group = fft_sizes%rs_group
    3077        81702 :                CALL fft_alloc(fft_scratch_new%fft_scratch%r2buf, [lg, mg])
    3078        27234 :                nm = nmray*mx2
    3079        27234 :                IF (alltoall_sgl) THEN
    3080           32 :                   ALLOCATE (fft_scratch_new%fft_scratch%ss(mmax, lmax))
    3081           32 :                   ALLOCATE (fft_scratch_new%fft_scratch%tt(nm, 0:np - 1))
    3082              :                ELSE
    3083       108904 :                   ALLOCATE (fft_scratch_new%fft_scratch%rr(nm, 0:np - 1))
    3084              :                END IF
    3085              : 
    3086              :                !set up fft plans
    3087              :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, FWFFT, .TRUE., nz, nx*ny, &
    3088        27234 :                                         fft_scratch_new%fft_scratch%tbuf, fft_scratch_new%fft_scratch%r1buf, fft_plan_style)
    3089              :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, FWFFT, .TRUE., ny, nx*nz, &
    3090        27234 :                                         fft_scratch_new%fft_scratch%r1buf, fft_scratch_new%fft_scratch%tbuf, fft_plan_style)
    3091              :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, FWFFT, .TRUE., n(1), nyzray, &
    3092        27234 :                                         fft_scratch_new%fft_scratch%r1buf, fft_scratch_new%fft_scratch%r2buf, fft_plan_style)
    3093              :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, BWFFT, .TRUE., n(1), nyzray, &
    3094        27234 :                                         fft_scratch_new%fft_scratch%r2buf, fft_scratch_new%fft_scratch%r1buf, fft_plan_style)
    3095              :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(5), fft_type, BWFFT, .TRUE., ny, nx*nz, &
    3096        27234 :                                         fft_scratch_new%fft_scratch%tbuf, fft_scratch_new%fft_scratch%r1buf, fft_plan_style)
    3097              :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(6), fft_type, BWFFT, .TRUE., nz, nx*ny, &
    3098        27234 :                                         fft_scratch_new%fft_scratch%r1buf, fft_scratch_new%fft_scratch%tbuf, fft_plan_style)
    3099              : 
    3100              :             CASE (300) ! fft3d_ps: block distribution
    3101            0 :                CPASSERT(PRESENT(fft_sizes))
    3102            0 :                mx1 = fft_sizes%mx1
    3103            0 :                mx2 = fft_sizes%mx2
    3104            0 :                my1 = fft_sizes%my1
    3105            0 :                mz2 = fft_sizes%mz2
    3106            0 :                mcx2 = fft_sizes%mcx2
    3107            0 :                lg = fft_sizes%lg
    3108            0 :                mg = fft_sizes%mg
    3109            0 :                nmax = fft_sizes%nmax
    3110            0 :                nmray = fft_sizes%nmray
    3111            0 :                nyzray = fft_sizes%nyzray
    3112            0 :                m1 = fft_sizes%r_dim(1)
    3113            0 :                m2 = fft_sizes%r_dim(2)
    3114            0 :                nbx = fft_sizes%nbx
    3115            0 :                nbz = fft_sizes%nbz
    3116            0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%p1buf, [mx1*my1, n(3)])
    3117            0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%p6buf, [lg, mg])
    3118              : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
    3119              :                length = INT(2*dp_size*MAX(n(3), 1)*MAX(mx1*my1, 1), KIND=C_SIZE_T)
    3120              :                ierr = offload_malloc_pinned_mem(cptr_p2buf, length)
    3121              :                CPASSERT(ierr == 0)
    3122              :                CALL c_f_pointer(cptr_p2buf, fft_scratch_new%fft_scratch%p2buf, [MAX(n(3), 1), MAX(mx1*my1, 1)])
    3123              :                length = INT(2*dp_size*MAX(mx2*mz2, 1)*MAX(n(2), 1), KIND=C_SIZE_T)
    3124              :                ierr = offload_malloc_pinned_mem(cptr_p3buf, length)
    3125              :                CPASSERT(ierr == 0)
    3126              :                CALL c_f_pointer(cptr_p3buf, fft_scratch_new%fft_scratch%p3buf, [MAX(mx2*mz2, 1), MAX(n(2), 1)])
    3127              :                length = INT(2*dp_size*MAX(n(2), 1)*MAX(mx2*mz2, 1), KIND=C_SIZE_T)
    3128              :                ierr = offload_malloc_pinned_mem(cptr_p4buf, length)
    3129              :                CPASSERT(ierr == 0)
    3130              :                CALL c_f_pointer(cptr_p4buf, fft_scratch_new%fft_scratch%p4buf, [MAX(n(2), 1), MAX(mx2*mz2, 1)])
    3131              :                length = INT(2*dp_size*MAX(nyzray, 1)*MAX(n(1), 1), KIND=C_SIZE_T)
    3132              :                ierr = offload_malloc_pinned_mem(cptr_p5buf, length)
    3133              :                CPASSERT(ierr == 0)
    3134              :                CALL c_f_pointer(cptr_p5buf, fft_scratch_new%fft_scratch%p5buf, [MAX(nyzray, 1), MAX(n(1), 1)])
    3135              :                length = INT(2*dp_size*MAX(mg, 1)*MAX(lg, 1), KIND=C_SIZE_T)
    3136              :                ierr = offload_malloc_pinned_mem(cptr_p7buf, length)
    3137              :                CPASSERT(ierr == 0)
    3138              :                CALL c_f_pointer(cptr_p7buf, fft_scratch_new%fft_scratch%p7buf, [MAX(mg, 1), MAX(lg, 1)])
    3139              : #else
    3140            0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%p2buf, [n(3), mx1*my1])
    3141            0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%p3buf, [mx2*mz2, n(2)])
    3142            0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%p4buf, [n(2), mx2*mz2])
    3143            0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%p5buf, [nyzray, n(1)])
    3144            0 :                CALL fft_alloc(fft_scratch_new%fft_scratch%p7buf, [mg, lg])
    3145              : #endif
    3146            0 :                IF (alltoall_sgl) THEN
    3147            0 :                   ALLOCATE (fft_scratch_new%fft_scratch%yzbuf_sgl(mg*lg))
    3148            0 :                   ALLOCATE (fft_scratch_new%fft_scratch%xzbuf_sgl(n(2)*mx2*mz2))
    3149              :                ELSE
    3150            0 :                   ALLOCATE (fft_scratch_new%fft_scratch%yzbuf(mg*lg))
    3151            0 :                   ALLOCATE (fft_scratch_new%fft_scratch%xzbuf(n(2)*mx2*mz2))
    3152              :                END IF
    3153            0 :                ALLOCATE (fft_scratch_new%fft_scratch%pgrid(0:m1 - 1, 0:m2 - 1))
    3154            0 :                ALLOCATE (fft_scratch_new%fft_scratch%xcor(nbx))
    3155            0 :                ALLOCATE (fft_scratch_new%fft_scratch%zcor(nbz))
    3156            0 :                ALLOCATE (fft_scratch_new%fft_scratch%pzcoord(0:np - 1))
    3157              :                ALLOCATE (fft_scratch_new%fft_scratch%xzcount(0:np - 1), &
    3158            0 :                          fft_scratch_new%fft_scratch%yzcount(0:np - 1))
    3159              :                ALLOCATE (fft_scratch_new%fft_scratch%xzdispl(0:np - 1), &
    3160            0 :                          fft_scratch_new%fft_scratch%yzdispl(0:np - 1))
    3161            0 :                fft_scratch_new%fft_scratch%group = fft_sizes%rs_group
    3162              : 
    3163            0 :                dim = fft_sizes%rs_group%num_pe_cart
    3164            0 :                pos = fft_sizes%rs_group%mepos_cart
    3165            0 :                fft_scratch_new%fft_scratch%mip = fft_sizes%rs_group%mepos
    3166            0 :                fft_scratch_new%fft_scratch%dim = dim
    3167            0 :                fft_scratch_new%fft_scratch%pos = pos
    3168            0 :                mcz1 = fft_sizes%mcz1
    3169            0 :                mcz2 = fft_sizes%mcz2
    3170            0 :                ALLOCATE (fft_scratch_new%fft_scratch%rbuf1(mx2*my1*mcz2, 0:DIM(2) - 1))
    3171            0 :                ALLOCATE (fft_scratch_new%fft_scratch%rbuf2(mx1*my1*mcz2, 0:DIM(2) - 1))
    3172              : 
    3173            0 :                dims = [.FALSE., .TRUE.]
    3174            0 :                CALL fft_scratch_new%fft_scratch%cart_sub_comm(2)%from_sub(fft_sizes%rs_group, dims)
    3175              : 
    3176              :                !initialise pgcube
    3177            0 :                DO i = 0, DIM(2) - 1
    3178            0 :                   coord = [pos(1), i]
    3179            0 :                   CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgcube(i, 2))
    3180              :                END DO
    3181              : 
    3182              :                !initialise pgrid
    3183            0 :                DO ix = 0, m1 - 1
    3184            0 :                   DO iz = 0, m2 - 1
    3185            0 :                      coord = [ix, iz]
    3186            0 :                      CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgrid(ix, iz))
    3187              :                   END DO
    3188              :                END DO
    3189              : 
    3190              :                !initialise pzcoord
    3191            0 :                DO i = 0, np - 1
    3192            0 :                   CALL fft_sizes%rs_group%coords(i, pcoord)
    3193            0 :                   fft_scratch_new%fft_scratch%pzcoord(i) = pcoord(2)
    3194              :                END DO
    3195              : 
    3196              :                !set up fft plans
    3197              :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, FWFFT, .TRUE., n(3), mx1*my1, &
    3198            0 :                                         fft_scratch_new%fft_scratch%p1buf, fft_scratch_new%fft_scratch%p2buf, fft_plan_style)
    3199              :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, FWFFT, .TRUE., n(2), mx2*mz2, &
    3200            0 :                                         fft_scratch_new%fft_scratch%p3buf, fft_scratch_new%fft_scratch%p4buf, fft_plan_style)
    3201              :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, FWFFT, .TRUE., n(1), nyzray, &
    3202            0 :                                         fft_scratch_new%fft_scratch%p5buf, fft_scratch_new%fft_scratch%p6buf, fft_plan_style)
    3203              :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, BWFFT, .TRUE., n(1), nyzray, &
    3204            0 :                                         fft_scratch_new%fft_scratch%p6buf, fft_scratch_new%fft_scratch%p7buf, fft_plan_style)
    3205              :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(5), fft_type, BWFFT, .TRUE., n(2), mx2*mz2, &
    3206            0 :                                         fft_scratch_new%fft_scratch%p4buf, fft_scratch_new%fft_scratch%p3buf, fft_plan_style)
    3207              :                CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(6), fft_type, BWFFT, .TRUE., n(3), mx1*my1, &
    3208            0 :                                         fft_scratch_new%fft_scratch%p3buf, fft_scratch_new%fft_scratch%p1buf, fft_plan_style)
    3209              : 
    3210              :             CASE (400) ! serial FFT
    3211        14109 :                np = 0
    3212        14109 :                CALL fft_alloc(fft_scratch_new%fft_scratch%ziptr, n)
    3213        14109 :                CALL fft_alloc(fft_scratch_new%fft_scratch%zoptr, n)
    3214              : 
    3215              :                !in place plans
    3216              :                CALL fft_create_plan_3d(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, .TRUE., FWFFT, n, &
    3217        14109 :                                        fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%ziptr, fft_plan_style)
    3218              :                CALL fft_create_plan_3d(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, .TRUE., BWFFT, n, &
    3219        14109 :                                        fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%ziptr, fft_plan_style)
    3220              :                ! out of place plans
    3221              :                CALL fft_create_plan_3d(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, .FALSE., FWFFT, n, &
    3222        14109 :                                        fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%zoptr, fft_plan_style)
    3223              :                CALL fft_create_plan_3d(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, .FALSE., BWFFT, n, &
    3224        55460 :                                        fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%zoptr, fft_plan_style)
    3225              : 
    3226              :             END SELECT
    3227              : 
    3228        41351 :             NULLIFY (fft_scratch_new%fft_scratch_next)
    3229              :             fft_scratch_new%fft_scratch%fft_scratch_id = &
    3230        41351 :                fft_scratch_last%fft_scratch%fft_scratch_id + 1
    3231        41351 :             fft_scratch_new%fft_scratch%in_use = .TRUE.
    3232       165404 :             fft_scratch_new%fft_scratch%nfft = n
    3233        41351 :             fft_scratch_last%fft_scratch_next => fft_scratch_new
    3234        41351 :             fft_scratch_new%fft_scratch%tf_type = tf_type
    3235        41351 :             fft_scratch => fft_scratch_new%fft_scratch
    3236        41351 :             EXIT
    3237              : 
    3238              :          END IF
    3239              :       END DO
    3240              : 
    3241      3472450 : !$OMP ATOMIC READ
    3242              :       fft_scratch%last_tick = tick_fft_pool
    3243              : 
    3244      3472450 :       CALL timestop(handle)
    3245              : 
    3246      3472450 :    END SUBROUTINE get_fft_scratch
    3247              : 
    3248              : ! **************************************************************************************************
    3249              : !> \brief ...
    3250              : !> \param fft_scratch ...
    3251              : ! **************************************************************************************************
    3252      3472450 :    SUBROUTINE release_fft_scratch(fft_scratch)
    3253              : 
    3254              :       TYPE(fft_scratch_type), POINTER                    :: fft_scratch
    3255              : 
    3256              :       INTEGER                                            :: scratch_id
    3257              :       TYPE(fft_scratch_pool_type), POINTER               :: fft_scratch_current
    3258              : 
    3259      3472450 :       scratch_id = fft_scratch%fft_scratch_id
    3260              : 
    3261      3472450 :       fft_scratch_current => fft_scratch_first
    3262      7357278 :       DO
    3263     10829728 :          IF (ASSOCIATED(fft_scratch_current)) THEN
    3264     10829728 :             IF (scratch_id == fft_scratch_current%fft_scratch%fft_scratch_id) THEN
    3265      3472450 :                fft_scratch%in_use = .FALSE.
    3266      3472450 :                NULLIFY (fft_scratch)
    3267      3472450 :                EXIT
    3268              :             END IF
    3269      7357278 :             fft_scratch_current => fft_scratch_current%fft_scratch_next
    3270              :          ELSE
    3271              :             ! We cannot find the scratch type in this pool
    3272            0 :             CPABORT("Invalid scratch type.")
    3273            0 :             EXIT
    3274              :          END IF
    3275              :       END DO
    3276              : 
    3277      3472450 :    END SUBROUTINE release_fft_scratch
    3278              : 
    3279              : ! **************************************************************************************************
    3280              : !> \brief ...
    3281              : !> \param rs ...
    3282              : !> \param scount ...
    3283              : !> \param sdispl ...
    3284              : !> \param rq ...
    3285              : !> \param rcount ...
    3286              : !> \param rdispl ...
    3287              : !> \param group ...
    3288              : ! **************************************************************************************************
    3289            0 :    SUBROUTINE sparse_alltoall(rs, scount, sdispl, rq, rcount, rdispl, group)
    3290              :       COMPLEX(KIND=dp), DIMENSION(:), POINTER            :: rs
    3291              :       INTEGER, DIMENSION(:), POINTER                     :: scount, sdispl
    3292              :       COMPLEX(KIND=dp), DIMENSION(:), POINTER            :: rq
    3293              :       INTEGER, DIMENSION(:), POINTER                     :: rcount, rdispl
    3294              : 
    3295              :       CLASS(mp_comm_type), INTENT(IN)                    :: group
    3296              : 
    3297            0 :       COMPLEX(KIND=dp), DIMENSION(:), POINTER            :: msgin, msgout
    3298              :       INTEGER                                            :: ip, n, nr, ns, pos
    3299            0 :       TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:)   :: rreq, sreq
    3300              : 
    3301            0 :       CALL group%sync()
    3302            0 :       n = group%num_pe
    3303            0 :       pos = group%mepos
    3304            0 :       ALLOCATE (sreq(0:n - 1))
    3305            0 :       ALLOCATE (rreq(0:n - 1))
    3306            0 :       nr = 0
    3307            0 :       DO ip = 0, n - 1
    3308            0 :          IF (rcount(ip) == 0) CYCLE
    3309            0 :          IF (ip == pos) CYCLE
    3310            0 :          msgout => rq(rdispl(ip) + 1:rdispl(ip) + rcount(ip))
    3311            0 :          CALL group%irecv(msgout, ip, rreq(nr))
    3312            0 :          nr = nr + 1
    3313              :       END DO
    3314            0 :       ns = 0
    3315            0 :       DO ip = 0, n - 1
    3316            0 :          IF (scount(ip) == 0) CYCLE
    3317            0 :          IF (ip == pos) CYCLE
    3318            0 :          msgin => rs(sdispl(ip) + 1:sdispl(ip) + scount(ip))
    3319            0 :          CALL group%isend(msgin, ip, sreq(ns))
    3320            0 :          ns = ns + 1
    3321              :       END DO
    3322            0 :       IF (rcount(pos) /= 0) THEN
    3323            0 :          IF (rcount(pos) /= scount(pos)) CPABORT("Invalid count.")
    3324            0 :          rq(rdispl(pos) + 1:rdispl(pos) + rcount(pos)) = rs(sdispl(pos) + 1:sdispl(pos) + scount(pos))
    3325              :       END IF
    3326            0 :       CALL mp_waitall(sreq(0:ns - 1))
    3327            0 :       CALL mp_waitall(rreq(0:nr - 1))
    3328            0 :       DEALLOCATE (sreq)
    3329            0 :       DEALLOCATE (rreq)
    3330            0 :       CALL group%sync()
    3331              : 
    3332            0 :    END SUBROUTINE sparse_alltoall
    3333              : 
    3334              : ! **************************************************************************************************
    3335              : !> \brief  test data structures for equality. It is assumed that if they are
    3336              : !>         different for one mpi task they are different for all (??)
    3337              : !> \param fft_size_1 ...
    3338              : !> \param fft_size_2 ...
    3339              : !> \param equal ...
    3340              : ! **************************************************************************************************
    3341      2792420 :    SUBROUTINE is_equal(fft_size_1, fft_size_2, equal)
    3342              :       TYPE(fft_scratch_sizes)                            :: fft_size_1, fft_size_2
    3343              :       LOGICAL                                            :: equal
    3344              : 
    3345              :       equal = .TRUE.
    3346              : 
    3347      2792420 :       equal = equal .AND. fft_size_1%nx == fft_size_2%nx
    3348      2792420 :       equal = equal .AND. fft_size_1%ny == fft_size_2%ny
    3349      2792420 :       equal = equal .AND. fft_size_1%nz == fft_size_2%nz
    3350              : 
    3351      2792420 :       equal = equal .AND. fft_size_1%lmax == fft_size_2%lmax
    3352      2792420 :       equal = equal .AND. fft_size_1%mmax == fft_size_2%mmax
    3353      2792420 :       equal = equal .AND. fft_size_1%nmax == fft_size_2%nmax
    3354              : 
    3355      2792420 :       equal = equal .AND. fft_size_1%mx1 == fft_size_2%mx1
    3356      2792420 :       equal = equal .AND. fft_size_1%mx2 == fft_size_2%mx2
    3357      2792420 :       equal = equal .AND. fft_size_1%mx3 == fft_size_2%mx3
    3358              : 
    3359      2792420 :       equal = equal .AND. fft_size_1%my1 == fft_size_2%my1
    3360      2792420 :       equal = equal .AND. fft_size_1%my2 == fft_size_2%my2
    3361      2792420 :       equal = equal .AND. fft_size_1%my3 == fft_size_2%my3
    3362              : 
    3363      2792420 :       equal = equal .AND. fft_size_1%mcz1 == fft_size_2%mcz1
    3364      2792420 :       equal = equal .AND. fft_size_1%mcx2 == fft_size_2%mcx2
    3365      2792420 :       equal = equal .AND. fft_size_1%mcz2 == fft_size_2%mcz2
    3366      2792420 :       equal = equal .AND. fft_size_1%mcy3 == fft_size_2%mcy3
    3367              : 
    3368      2792420 :       equal = equal .AND. fft_size_1%lg == fft_size_2%lg
    3369      2792420 :       equal = equal .AND. fft_size_1%mg == fft_size_2%mg
    3370              : 
    3371      2792420 :       equal = equal .AND. fft_size_1%nbx == fft_size_2%nbx
    3372      2792420 :       equal = equal .AND. fft_size_1%nbz == fft_size_2%nbz
    3373              : 
    3374      2792420 :       equal = equal .AND. fft_size_1%nmray == fft_size_2%nmray
    3375      2792420 :       equal = equal .AND. fft_size_1%nyzray == fft_size_2%nyzray
    3376              : 
    3377      2792420 :       equal = equal .AND. fft_size_1%rs_group == fft_size_2%rs_group
    3378              : 
    3379      8377260 :       equal = equal .AND. ALL(fft_size_1%g_pos == fft_size_2%g_pos)
    3380      8377260 :       equal = equal .AND. ALL(fft_size_1%r_pos == fft_size_2%r_pos)
    3381      8377260 :       equal = equal .AND. ALL(fft_size_1%r_dim == fft_size_2%r_dim)
    3382              : 
    3383      2792420 :       equal = equal .AND. fft_size_1%numtask == fft_size_2%numtask
    3384              : 
    3385      2792420 :    END SUBROUTINE is_equal
    3386              : 
    3387            0 : END MODULE fft_tools
        

Generated by: LCOV version 2.0-1