LCOV - code coverage report
Current view: top level - src/pw/fft - fftw3_lib.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:936074a) Lines: 74.2 % 357 265
Test Date: 2025-12-04 06:27:48 Functions: 68.0 % 25 17

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : MODULE fftw3_lib
       8              :    USE ISO_C_BINDING, ONLY: C_ASSOCIATED, &
       9              :                             C_CHAR, &
      10              :                             C_DOUBLE, &
      11              :                             C_DOUBLE_COMPLEX, &
      12              :                             C_INT, &
      13              :                             C_PTR
      14              : #if defined(__FFTW3)
      15              :    USE ISO_C_BINDING, ONLY: &
      16              :       C_FLOAT, &
      17              :       C_FLOAT_COMPLEX, &
      18              :       C_FUNPTR, &
      19              :       C_INT32_T, &
      20              :       C_INTPTR_T, &
      21              :       C_LOC, &
      22              :       C_NULL_CHAR, &
      23              :       C_SIZE_T, C_F_POINTER
      24              :    USE mathconstants, ONLY: z_zero
      25              : #endif
      26              :    USE cp_files, ONLY: get_unit_number
      27              :    USE fft_kinds, ONLY: dp
      28              :    USE fft_plan, ONLY: fft_plan_type
      29              : 
      30              : !$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads
      31              : 
      32              : #include "../../base/base_uses.f90"
      33              : 
      34              :    IMPLICIT NONE
      35              :    PRIVATE
      36              : 
      37              :    PUBLIC :: fftw3_do_init, fftw3_do_cleanup, fftw3_get_lengths, fftw33d, fftw31dm
      38              :    PUBLIC :: fftw3_destroy_plan, fftw3_create_plan_1dm, fftw3_create_plan_3d
      39              :    PUBLIC :: fftw_alloc, fftw_dealloc
      40              : 
      41              : #if defined(__FFTW3)
      42              : #include "fftw3.f03"
      43              : #endif
      44              : 
      45              :    INTERFACE fftw_alloc
      46              :       MODULE PROCEDURE :: fftw_alloc_complex_1d
      47              :       MODULE PROCEDURE :: fftw_alloc_complex_2d
      48              :       MODULE PROCEDURE :: fftw_alloc_complex_3d
      49              :    END INTERFACE fftw_alloc
      50              : 
      51              :    INTERFACE fftw_dealloc
      52              :       MODULE PROCEDURE :: fftw_dealloc_complex_1d
      53              :       MODULE PROCEDURE :: fftw_dealloc_complex_2d
      54              :       MODULE PROCEDURE :: fftw_dealloc_complex_3d
      55              :    END INTERFACE fftw_dealloc
      56              : 
      57              : CONTAINS
      58              : 
      59              :    #:set maxdim = 3
      60              :    #:for dim in range(1, maxdim+1)
      61              : ! Concatenate the components of the dimensions passed to this function to use it if FFTW3 is not used
      62              :       #:set dim_extended = ", ".join(["n("+str(i)+")" for i in range(1, dim+1)])
      63       110004 :       SUBROUTINE fftw_alloc_complex_${dim}$d(array, n)
      64              :          COMPLEX(C_DOUBLE_COMPLEX), DIMENSION(:${", :"*(dim-1)}$), CONTIGUOUS, POINTER, INTENT(OUT) :: array
      65              :          INTEGER, DIMENSION(${dim}$), INTENT(IN) :: n
      66              : 
      67              : #if defined(__FFTW3)
      68              :          TYPE(C_PTR) :: data_ptr
      69       385428 :          data_ptr = fftw_alloc_complex(INT(PRODUCT(n), KIND=C_SIZE_T))
      70       385428 :          CALL C_F_POINTER(data_ptr, array, n)
      71              : #else
      72              : ! Just allocate the array
      73              :          ALLOCATE (array(${dim_extended}$))
      74              : #endif
      75              : 
      76       110004 :       END SUBROUTINE fftw_alloc_complex_${dim}$d
      77              : 
      78       110004 :       SUBROUTINE fftw_dealloc_complex_${dim}$d(array)
      79              :          COMPLEX(C_DOUBLE_COMPLEX), DIMENSION(:${", :"*(dim-1)}$), CONTIGUOUS, POINTER, INTENT(INOUT) :: array
      80              : 
      81              : #if defined(__FFTW3)
      82       110004 :          CALL fftw_free(C_LOC(array))
      83       110004 :          NULLIFY (array)
      84              : #else
      85              : ! Just deallocate the array
      86              :          DEALLOCATE (array)
      87              : #endif
      88              : 
      89       110004 :       END SUBROUTINE fftw_dealloc_complex_${dim}$d
      90              :    #:endfor
      91              : 
      92              : #if defined(__FFTW3)
      93              : ! **************************************************************************************************
      94              : !> \brief A workaround that allows us to compile with -Werror=unused-parameter
      95              : ! **************************************************************************************************
      96            0 :    SUBROUTINE dummy_routine_to_call_mark_used()
      97              :       MARK_USED(FFTW_R2HC)
      98              :       MARK_USED(FFTW_HC2R)
      99              :       MARK_USED(FFTW_DHT)
     100              :       MARK_USED(FFTW_REDFT00)
     101              :       MARK_USED(FFTW_REDFT01)
     102              :       MARK_USED(FFTW_REDFT10)
     103              :       MARK_USED(FFTW_REDFT11)
     104              :       MARK_USED(FFTW_RODFT00)
     105              :       MARK_USED(FFTW_RODFT01)
     106              :       MARK_USED(FFTW_RODFT10)
     107              :       MARK_USED(FFTW_RODFT11)
     108              :       MARK_USED(FFTW_FORWARD)
     109              :       MARK_USED(FFTW_BACKWARD)
     110              :       MARK_USED(FFTW_MEASURE)
     111              :       MARK_USED(FFTW_DESTROY_INPUT)
     112              :       MARK_USED(FFTW_UNALIGNED)
     113              :       MARK_USED(FFTW_CONSERVE_MEMORY)
     114              :       MARK_USED(FFTW_EXHAUSTIVE)
     115              :       MARK_USED(FFTW_PRESERVE_INPUT)
     116              :       MARK_USED(FFTW_PATIENT)
     117              :       MARK_USED(FFTW_ESTIMATE)
     118              :       MARK_USED(FFTW_WISDOM_ONLY)
     119              :       MARK_USED(FFTW_ESTIMATE_PATIENT)
     120              :       MARK_USED(FFTW_BELIEVE_PCOST)
     121              :       MARK_USED(FFTW_NO_DFT_R2HC)
     122              :       MARK_USED(FFTW_NO_NONTHREADED)
     123              :       MARK_USED(FFTW_NO_BUFFERING)
     124              :       MARK_USED(FFTW_NO_INDIRECT_OP)
     125              :       MARK_USED(FFTW_ALLOW_LARGE_GENERIC)
     126              :       MARK_USED(FFTW_NO_RANK_SPLITS)
     127              :       MARK_USED(FFTW_NO_VRANK_SPLITS)
     128              :       MARK_USED(FFTW_NO_VRECURSE)
     129              :       MARK_USED(FFTW_NO_SIMD)
     130              :       MARK_USED(FFTW_NO_SLOW)
     131              :       MARK_USED(FFTW_NO_FIXED_RADIX_LARGE_N)
     132              :       MARK_USED(FFTW_ALLOW_PRUNING)
     133            0 :    END SUBROUTINE dummy_routine_to_call_mark_used
     134              : #endif
     135              : 
     136              : ! **************************************************************************************************
     137              : !> \brief ...
     138              : !> \param wisdom_file ...
     139              : !> \param ionode ...
     140              : ! **************************************************************************************************
     141         9675 :    SUBROUTINE fftw3_do_cleanup(wisdom_file, ionode)
     142              : 
     143              :       CHARACTER(LEN=*), INTENT(IN)             :: wisdom_file
     144              :       LOGICAL                                  :: ionode
     145              : 
     146              : #if defined(__FFTW3)
     147         9675 :       CHARACTER(LEN=1, KIND=C_CHAR), DIMENSION(:), ALLOCATABLE :: wisdom_file_name_c
     148              :       INTEGER                                  :: file_name_length, i, iunit, istat
     149              :       INTEGER(KIND=C_INT)                      :: isuccess
     150              :       ! Write out FFTW3 wisdom to file (if we can)
     151              :       ! only the ionode updates the wisdom
     152         9675 :       IF (ionode) THEN
     153         4941 :          iunit = get_unit_number()
     154              :          ! Check whether the file can be opened in the necessary manner
     155         4941 :          OPEN (UNIT=iunit, FILE=wisdom_file, STATUS="UNKNOWN", FORM="FORMATTED", ACTION="WRITE", IOSTAT=istat)
     156         4941 :          IF (istat == 0) THEN
     157            2 :             CLOSE (iunit)
     158            2 :             file_name_length = LEN_TRIM(wisdom_file)
     159            6 :             ALLOCATE (wisdom_file_name_c(file_name_length + 1))
     160           18 :             DO i = 1, file_name_length
     161           18 :                wisdom_file_name_c(i) = wisdom_file(i:i)
     162              :             END DO
     163            2 :             wisdom_file_name_c(file_name_length + 1) = C_NULL_CHAR
     164            2 :             isuccess = fftw_export_wisdom_to_filename(wisdom_file_name_c)
     165            2 :             IF (isuccess == 0) &
     166              :                CALL cp_warn(__LOCATION__, "Error exporting wisdom to file "//TRIM(wisdom_file)//". "// &
     167            0 :                             "Wisdom was not exported.")
     168              :          END IF
     169              :       END IF
     170              : 
     171         9675 :       CALL fftw_cleanup()
     172              : #else
     173              :       MARK_USED(wisdom_file)
     174              :       MARK_USED(ionode)
     175              : #endif
     176              : 
     177         9675 :    END SUBROUTINE fftw3_do_cleanup
     178              : 
     179              : ! **************************************************************************************************
     180              : !> \brief ...
     181              : !> \param wisdom_file ...
     182              : ! **************************************************************************************************
     183         9885 :    SUBROUTINE fftw3_do_init(wisdom_file)
     184              : 
     185              :       CHARACTER(LEN=*), INTENT(IN)             :: wisdom_file
     186              : 
     187              : #if defined(__FFTW3)
     188         9885 :       CHARACTER(LEN=1, KIND=C_CHAR), DIMENSION(:), ALLOCATABLE :: wisdom_file_name_c
     189              :       INTEGER                                  :: file_name_length, i, istat, iunit
     190              :       INTEGER(KIND=C_INT)                      :: isuccess
     191              :       LOGICAL :: file_exists
     192              : 
     193         9885 :       isuccess = fftw_init_threads()
     194         9885 :       IF (isuccess == 0) &
     195            0 :          CPABORT("Error initializing FFTW with threads")
     196              : 
     197              :       ! Read FFTW wisdom (if available)
     198              :       ! all nodes are opening the file here...
     199         9885 :       INQUIRE (FILE=wisdom_file, exist=file_exists)
     200         9885 :       IF (file_exists) THEN
     201            2 :          iunit = get_unit_number()
     202            2 :          file_name_length = LEN_TRIM(wisdom_file)
     203              :          ! Check whether the file can be opened in the necessary manner
     204              :          OPEN (UNIT=iunit, FILE=wisdom_file, STATUS="OLD", FORM="FORMATTED", POSITION="REWIND", &
     205            2 :                ACTION="READ", IOSTAT=istat)
     206            2 :          IF (istat == 0) THEN
     207            2 :             CLOSE (iunit)
     208            2 :             file_name_length = LEN_TRIM(wisdom_file)
     209            6 :             ALLOCATE (wisdom_file_name_c(file_name_length + 1))
     210           18 :             DO i = 1, file_name_length
     211           18 :                wisdom_file_name_c(i) = wisdom_file(i:i)
     212              :             END DO
     213            2 :             wisdom_file_name_c(file_name_length + 1) = C_NULL_CHAR
     214            2 :             isuccess = fftw_import_wisdom_from_filename(wisdom_file_name_c)
     215            2 :             IF (isuccess == 0) &
     216              :                CALL cp_warn(__LOCATION__, "Error importing wisdom from file "//TRIM(wisdom_file)//". "// &
     217              :                             "Maybe the file was created with a different configuration than CP2K is run with. "// &
     218            0 :                             "CP2K continues without importing wisdom.")
     219              :          END IF
     220              :       END IF
     221              : #else
     222              :       MARK_USED(wisdom_file)
     223              : #endif
     224              : 
     225         9885 :    END SUBROUTINE fftw3_do_init
     226              : 
     227              : ! **************************************************************************************************
     228              : !> \brief ...
     229              : !> \param DATA ...
     230              : !> \param max_length ...
     231              : !> \par History
     232              : !>      JGH 23-Jan-2006 : initial version
     233              : !>      Adapted for new interface
     234              : !>      IAB 09-Jan-2009 : Modified to cache plans in fft_plan_type
     235              : !>                        (c) The Numerical Algorithms Group (NAG) Ltd, 2009 on behalf of the HECToR project
     236              : !>      IAB 09-Oct-2009 : Added OpenMP directives to 1D FFT, and planning routines
     237              : !>                        (c) The Numerical Algorithms Group (NAG) Ltd, 2009 on behalf of the HECToR project
     238              : !>      IAB 11-Sep-2012 : OpenMP parallel 3D FFT (Ruyman Reyes, PRACE)
     239              : !> \author JGH
     240              : ! **************************************************************************************************
     241          528 :    SUBROUTINE fftw3_get_lengths(DATA, max_length)
     242              : 
     243              :       INTEGER, DIMENSION(*)                              :: DATA
     244              :       INTEGER, INTENT(INOUT)                             :: max_length
     245              : 
     246              :       INTEGER                                            :: h, i, j, k, m, maxn, maxn_elevens, &
     247              :                                                             maxn_fives, maxn_sevens, &
     248              :                                                             maxn_thirteens, maxn_threes, &
     249              :                                                             maxn_twos, ndata, nmax, number
     250          528 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: dlocal, idx
     251              : 
     252              : !------------------------------------------------------------------------------
     253              : ! compute ndata
     254              : !! FFTW can do arbitrary(?) lengths, maybe you want to limit them to some
     255              : !!    powers of small prime numbers though...
     256              : 
     257          528 :       maxn_twos = 15
     258          528 :       maxn_threes = 3
     259          528 :       maxn_fives = 2
     260          528 :       maxn_sevens = 1
     261          528 :       maxn_elevens = 1
     262          528 :       maxn_thirteens = 0
     263          528 :       maxn = 37748736
     264              : 
     265          528 :       ndata = 0
     266         8976 :       DO h = 0, maxn_twos
     267         8448 :          nmax = HUGE(0)/2**h
     268        42768 :          DO i = 0, maxn_threes
     269       143616 :             DO j = 0, maxn_fives
     270       337920 :                DO k = 0, maxn_sevens
     271       709632 :                   DO m = 0, maxn_elevens
     272       405504 :                      number = (3**i)*(5**j)*(7**k)*(11**m)
     273              : 
     274       405504 :                      IF (number > nmax) CYCLE
     275              : 
     276       405504 :                      number = number*2**h
     277       405504 :                      IF (number >= maxn) CYCLE
     278              : 
     279       608256 :                      ndata = ndata + 1
     280              :                   END DO
     281              :                END DO
     282              :             END DO
     283              :          END DO
     284              :       END DO
     285              : 
     286         2112 :       ALLOCATE (dlocal(ndata), idx(ndata))
     287              : 
     288          528 :       ndata = 0
     289       389136 :       dlocal(:) = 0
     290         8976 :       DO h = 0, maxn_twos
     291         8448 :          nmax = HUGE(0)/2**h
     292        42768 :          DO i = 0, maxn_threes
     293       143616 :             DO j = 0, maxn_fives
     294       337920 :                DO k = 0, maxn_sevens
     295       709632 :                   DO m = 0, maxn_elevens
     296       405504 :                      number = (3**i)*(5**j)*(7**k)*(11**m)
     297              : 
     298       405504 :                      IF (number > nmax) CYCLE
     299              : 
     300       405504 :                      number = number*2**h
     301       405504 :                      IF (number >= maxn) CYCLE
     302              : 
     303       388608 :                      ndata = ndata + 1
     304       608256 :                      dlocal(ndata) = number
     305              :                   END DO
     306              :                END DO
     307              :             END DO
     308              :          END DO
     309              :       END DO
     310              : 
     311          528 :       CALL sortint(dlocal, ndata, idx)
     312          528 :       ndata = MIN(ndata, max_length)
     313       389136 :       DATA(1:ndata) = dlocal(1:ndata)
     314          528 :       max_length = ndata
     315              : 
     316          528 :       DEALLOCATE (dlocal, idx)
     317              : 
     318          528 :    END SUBROUTINE fftw3_get_lengths
     319              : 
     320              : ! **************************************************************************************************
     321              : !> \brief ...
     322              : !> \param iarr ...
     323              : !> \param n ...
     324              : !> \param index ...
     325              : ! **************************************************************************************************
     326          528 :    SUBROUTINE sortint(iarr, n, index)
     327              : 
     328              :       INTEGER, INTENT(IN)                                :: n
     329              :       INTEGER, INTENT(INOUT)                             :: iarr(1:n)
     330              :       INTEGER, INTENT(OUT)                               :: INDEX(1:n)
     331              : 
     332              :       INTEGER, PARAMETER                                 :: m = 7, nstack = 50
     333              : 
     334              :       INTEGER                                            :: a, i, ib, ir, istack(1:nstack), itemp, &
     335              :                                                             j, jstack, k, l, temp
     336              : 
     337              : !------------------------------------------------------------------------------
     338              : 
     339       389136 :       DO i = 1, n
     340       389136 :          INDEX(i) = i
     341              :       END DO
     342              :       jstack = 0
     343              :       l = 1
     344              :       ir = n
     345              :       DO WHILE (.TRUE.)
     346       146256 :       IF (ir - l < m) THEN
     347       315744 :          DO j = l + 1, ir
     348       242352 :             a = iarr(j)
     349       242352 :             ib = INDEX(j)
     350       575520 :             DO i = j - 1, 0, -1
     351       575520 :                IF (i == 0) EXIT
     352       574464 :                IF (iarr(i) <= a) EXIT
     353       333168 :                iarr(i + 1) = iarr(i)
     354       575520 :                INDEX(i + 1) = INDEX(i)
     355              :             END DO
     356       242352 :             iarr(i + 1) = a
     357       315744 :             INDEX(i + 1) = ib
     358              :          END DO
     359        73392 :          IF (jstack == 0) RETURN
     360        72864 :          ir = istack(jstack)
     361        72864 :          l = istack(jstack - 1)
     362        72864 :          jstack = jstack - 2
     363              :       ELSE
     364        72864 :          k = (l + ir)/2
     365        72864 :          temp = iarr(k)
     366        72864 :          iarr(k) = iarr(l + 1)
     367        72864 :          iarr(l + 1) = temp
     368        72864 :          itemp = INDEX(k)
     369        72864 :          INDEX(k) = INDEX(l + 1)
     370        72864 :          INDEX(l + 1) = itemp
     371        72864 :          IF (iarr(l + 1) > iarr(ir)) THEN
     372        34320 :             temp = iarr(l + 1)
     373        34320 :             iarr(l + 1) = iarr(ir)
     374        34320 :             iarr(ir) = temp
     375        34320 :             itemp = INDEX(l + 1)
     376        34320 :             INDEX(l + 1) = INDEX(ir)
     377        34320 :             INDEX(ir) = itemp
     378              :          END IF
     379        72864 :          IF (iarr(l) > iarr(ir)) THEN
     380        27456 :             temp = iarr(l)
     381        27456 :             iarr(l) = iarr(ir)
     382        27456 :             iarr(ir) = temp
     383        27456 :             itemp = INDEX(l)
     384        27456 :             INDEX(l) = INDEX(ir)
     385        27456 :             INDEX(ir) = itemp
     386              :          END IF
     387        72864 :          IF (iarr(l + 1) > iarr(l)) THEN
     388        27456 :             temp = iarr(l + 1)
     389        27456 :             iarr(l + 1) = iarr(l)
     390        27456 :             iarr(l) = temp
     391        27456 :             itemp = INDEX(l + 1)
     392        27456 :             INDEX(l + 1) = INDEX(l)
     393        27456 :             INDEX(l) = itemp
     394              :          END IF
     395        72864 :          i = l + 1
     396        72864 :          j = ir
     397        72864 :          a = iarr(l)
     398        72864 :          ib = INDEX(l)
     399       464112 :          DO WHILE (.TRUE.)
     400       536976 :             i = i + 1
     401      1742928 :             DO WHILE (iarr(i) < a)
     402      1205952 :                i = i + 1
     403              :             END DO
     404       536976 :             j = j - 1
     405      1330560 :             DO WHILE (iarr(j) > a)
     406       793584 :                j = j - 1
     407              :             END DO
     408       536976 :             IF (j < i) EXIT
     409       464112 :             temp = iarr(i)
     410       464112 :             iarr(i) = iarr(j)
     411       464112 :             iarr(j) = temp
     412       464112 :             itemp = INDEX(i)
     413       464112 :             INDEX(i) = INDEX(j)
     414       464112 :             INDEX(j) = itemp
     415              :          END DO
     416        72864 :          iarr(l) = iarr(j)
     417        72864 :          iarr(j) = a
     418        72864 :          INDEX(l) = INDEX(j)
     419        72864 :          INDEX(j) = ib
     420        72864 :          jstack = jstack + 2
     421        72864 :          IF (jstack > nstack) CPABORT("Nstack too small in sortint")
     422        72864 :          IF (ir - i + 1 >= j - l) THEN
     423        37488 :             istack(jstack) = ir
     424        37488 :             istack(jstack - 1) = i
     425        37488 :             ir = j - 1
     426              :          ELSE
     427        35376 :             istack(jstack) = j - 1
     428        35376 :             istack(jstack - 1) = l
     429        35376 :             l = i
     430              :          END IF
     431              :       END IF
     432              : 
     433              :       END DO
     434              : 
     435              :    END SUBROUTINE sortint
     436              : 
     437              : ! **************************************************************************************************
     438              : 
     439              : ! **************************************************************************************************
     440              : !> \brief ...
     441              : !> \param plan ...
     442              : !> \param fft_rank ...
     443              : !> \param dim_n ...
     444              : !> \param dim_istride ...
     445              : !> \param dim_ostride ...
     446              : !> \param hm_rank ...
     447              : !> \param hm_n ...
     448              : !> \param hm_istride ...
     449              : !> \param hm_ostride ...
     450              : !> \param zin ...
     451              : !> \param zout ...
     452              : !> \param fft_direction ...
     453              : !> \param fftw_plan_type ...
     454              : !> \param valid ...
     455              : ! **************************************************************************************************
     456        56292 :    SUBROUTINE fftw3_create_guru_plan(plan, fft_rank, dim_n, &
     457              :                                      dim_istride, dim_ostride, hm_rank, &
     458              :                                      hm_n, hm_istride, hm_ostride, &
     459              :                                      zin, zout, fft_direction, fftw_plan_type, &
     460              :                                      valid)
     461              : 
     462              :       IMPLICIT NONE
     463              : 
     464              :       TYPE(C_PTR), INTENT(INOUT)                         :: plan
     465              :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT)      :: zin, zout
     466              :       INTEGER, INTENT(IN) :: dim_n(2), dim_istride(2), dim_ostride(2), &
     467              :                              hm_n(2), hm_istride(2), hm_ostride(2), fft_rank, &
     468              :                              fft_direction, fftw_plan_type, hm_rank
     469              :       LOGICAL, INTENT(OUT)                               :: valid
     470              : 
     471              : #if defined(__FFTW3)
     472              :       TYPE(fftw_iodim) :: dim(2), hm(2)
     473              :       INTEGER :: i
     474              : 
     475       168876 :       DO i = 1, 2
     476       112584 :          DIM(i) = fftw_iodim(dim_n(i), dim_istride(i), dim_ostride(i))
     477       168876 :          hm(i) = fftw_iodim(hm_n(i), hm_istride(i), hm_ostride(i))
     478              :       END DO
     479              : 
     480              :       plan = fftw_plan_guru_dft(fft_rank, &
     481              :                                 dim, hm_rank, hm, &
     482              :                                 zin, zout, &
     483        56292 :                                 fft_direction, fftw_plan_type)
     484              : 
     485        56292 :       valid = C_ASSOCIATED(plan)
     486              : 
     487              : #else
     488              :       MARK_USED(plan)
     489              :       MARK_USED(fft_rank)
     490              :       MARK_USED(dim_n)
     491              :       MARK_USED(dim_istride)
     492              :       MARK_USED(dim_ostride)
     493              :       MARK_USED(hm_rank)
     494              :       MARK_USED(hm_n)
     495              :       MARK_USED(hm_istride)
     496              :       MARK_USED(hm_ostride)
     497              :       MARK_USED(fft_direction)
     498              :       MARK_USED(fftw_plan_type)
     499              :       !MARK_USED does not work with assumed size arguments
     500              :       IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
     501              :       valid = .FALSE.
     502              : 
     503              : #endif
     504              : 
     505        56292 :    END SUBROUTINE fftw3_create_guru_plan
     506              : 
     507              : ! **************************************************************************************************
     508              : 
     509              : ! **************************************************************************************************
     510              : !> \brief Attempt to create a plan with the guru interface for a 2d sub-space.
     511              : !>        If this fails, fall back to the FFTW3 threaded 3D transform instead
     512              : !>        of the hand-optimised version.
     513              : !> \return ...
     514              : ! **************************************************************************************************
     515        56292 :    FUNCTION fftw3_is_guru_supported() RESULT(guru_supported)
     516              : 
     517              :       IMPLICIT NONE
     518              : 
     519              :       LOGICAL :: guru_supported
     520              : #if defined(__FFTW3)
     521              :       INTEGER :: dim_n(2), dim_istride(2), dim_ostride(2), &
     522              :                  howmany_n(2), howmany_istride(2), howmany_ostride(2)
     523              :       TYPE(C_PTR)                          :: test_plan
     524              :       COMPLEX(KIND=dp), DIMENSION(1, 1, 1) :: zin
     525              : 
     526        56292 :       dim_n(1) = 1
     527        56292 :       dim_n(2) = 1
     528        56292 :       dim_istride(1) = 1
     529        56292 :       dim_istride(2) = 1
     530        56292 :       dim_ostride(1) = 1
     531        56292 :       dim_ostride(2) = 1
     532        56292 :       howmany_n(1) = 1
     533        56292 :       howmany_n(2) = 1
     534        56292 :       howmany_istride(1) = 1
     535        56292 :       howmany_istride(2) = 1
     536        56292 :       howmany_ostride(1) = 1
     537        56292 :       howmany_ostride(2) = 1
     538        56292 :       zin = z_zero
     539              :       CALL fftw3_create_guru_plan(test_plan, 1, &
     540              :                                   dim_n, dim_istride, dim_ostride, &
     541              :                                   2, howmany_n, howmany_istride, howmany_ostride, &
     542              :                                   zin, zin, &
     543        56292 :                                   FFTW_FORWARD, FFTW_ESTIMATE, guru_supported)
     544        56292 :       IF (guru_supported) THEN
     545        56292 :          CALL fftw_destroy_plan(test_plan)
     546              :       END IF
     547              : 
     548              : #else
     549              :       guru_supported = .FALSE.
     550              : #endif
     551              : 
     552        56292 :    END FUNCTION fftw3_is_guru_supported
     553              : 
     554              : ! **************************************************************************************************
     555              : 
     556              : ! **************************************************************************************************
     557              : !> \brief ...
     558              : !> \param nrows ...
     559              : !> \param nt ...
     560              : !> \param rows_per_thread ...
     561              : !> \param rows_per_thread_r ...
     562              : !> \param th_planA ...
     563              : !> \param th_planB ...
     564              : ! **************************************************************************************************
     565            0 :    SUBROUTINE fftw3_compute_rows_per_th(nrows, nt, rows_per_thread, rows_per_thread_r, &
     566              :                                         th_planA, th_planB)
     567              : 
     568              :       INTEGER, INTENT(IN)                                :: nrows, nt
     569              :       INTEGER, INTENT(OUT)                               :: rows_per_thread, rows_per_thread_r, &
     570              :                                                             th_planA, th_planB
     571              : 
     572            0 :       IF (MOD(nrows, nt) == 0) THEN
     573            0 :          rows_per_thread = nrows/nt
     574            0 :          rows_per_thread_r = 0
     575            0 :          th_planA = nt
     576            0 :          th_planB = 0
     577              :       ELSE
     578            0 :          rows_per_thread = nrows/nt + 1
     579            0 :          rows_per_thread_r = nrows/nt
     580            0 :          th_planA = MOD(nrows, nt)
     581            0 :          th_planB = nt - th_planA
     582              :       END IF
     583              : 
     584            0 :    END SUBROUTINE fftw3_compute_rows_per_th
     585              : 
     586              : ! **************************************************************************************************
     587              : 
     588              : ! **************************************************************************************************
     589              : !> \brief ...
     590              : !> \param plan ...
     591              : !> \param plan_r ...
     592              : !> \param dim_n ...
     593              : !> \param dim_istride ...
     594              : !> \param dim_ostride ...
     595              : !> \param hm_n ...
     596              : !> \param hm_istride ...
     597              : !> \param hm_ostride ...
     598              : !> \param input ...
     599              : !> \param output ...
     600              : !> \param fft_direction ...
     601              : !> \param fftw_plan_type ...
     602              : !> \param rows_per_th ...
     603              : !> \param rows_per_th_r ...
     604              : ! **************************************************************************************************
     605            0 :    SUBROUTINE fftw3_create_3d_plans(plan, plan_r, dim_n, dim_istride, dim_ostride, &
     606              :                                     hm_n, hm_istride, hm_ostride, &
     607              :                                     input, output, &
     608              :                                     fft_direction, fftw_plan_type, rows_per_th, &
     609              :                                     rows_per_th_r)
     610              : 
     611              :       TYPE(C_PTR), INTENT(INOUT)                         :: plan, plan_r
     612              :       INTEGER, INTENT(INOUT)                             :: dim_n(2), dim_istride(2), &
     613              :                                                             dim_ostride(2), hm_n(2), &
     614              :                                                             hm_istride(2), hm_ostride(2)
     615              :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT)      :: input, output
     616              :       INTEGER, INTENT(INOUT)                             :: fft_direction, fftw_plan_type
     617              :       INTEGER, INTENT(IN)                                :: rows_per_th, rows_per_th_r
     618              : 
     619              :       LOGICAL                                            :: valid
     620              : 
     621              : ! First plans will have an additional row
     622              : 
     623            0 :       hm_n(2) = rows_per_th
     624              :       CALL fftw3_create_guru_plan(plan, 1, &
     625              :                                   dim_n, dim_istride, dim_ostride, &
     626              :                                   2, hm_n, hm_istride, hm_ostride, &
     627              :                                   input, output, &
     628            0 :                                   fft_direction, fftw_plan_type, valid)
     629              : 
     630            0 :       IF (.NOT. valid) THEN
     631            0 :          CPABORT("fftw3_create_plan")
     632              :       END IF
     633              : 
     634              :       !!!! Remainder
     635            0 :       hm_n(2) = rows_per_th_r
     636              :       CALL fftw3_create_guru_plan(plan_r, 1, &
     637              :                                   dim_n, dim_istride, dim_ostride, &
     638              :                                   2, hm_n, hm_istride, hm_ostride, &
     639              :                                   input, output, &
     640            0 :                                   fft_direction, fftw_plan_type, valid)
     641            0 :       IF (.NOT. valid) THEN
     642            0 :          CPABORT("fftw3_create_plan (remaining)")
     643              :       END IF
     644              : 
     645            0 :    END SUBROUTINE fftw3_create_3d_plans
     646              : 
     647              : ! **************************************************************************************************
     648              : 
     649              : ! **************************************************************************************************
     650              : !> \brief ...
     651              : !> \param plan ...
     652              : !> \param zin ...
     653              : !> \param zout ...
     654              : !> \param plan_style ...
     655              : ! **************************************************************************************************
     656        56292 :    SUBROUTINE fftw3_create_plan_3d(plan, zin, zout, plan_style)
     657              : 
     658              :       TYPE(fft_plan_type), INTENT(INOUT)              :: plan
     659              :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT)      :: zin
     660              :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT)      :: zout
     661              :       INTEGER                                            :: plan_style
     662              : #if defined(__FFTW3)
     663              :       INTEGER                                            :: n1, n2, n3
     664              :       INTEGER                                            :: nt
     665              :       INTEGER                                            :: rows_per_th
     666              :       INTEGER                                            :: rows_per_th_r
     667              :       INTEGER                                            :: fft_direction
     668              :       INTEGER                                            :: th_planA, th_planB
     669        56292 :       COMPLEX(KIND=dp), ALLOCATABLE                      :: tmp(:)
     670              : 
     671              :       ! GURU Interface
     672              :       INTEGER :: dim_n(2), dim_istride(2), dim_ostride(2), &
     673              :                  howmany_n(2), howmany_istride(2), howmany_ostride(2)
     674              : 
     675              :       INTEGER :: fftw_plan_type
     676       112544 :       SELECT CASE (plan_style)
     677              :       CASE (1)
     678        56252 :          fftw_plan_type = FFTW_ESTIMATE
     679              :       CASE (2)
     680            8 :          fftw_plan_type = FFTW_MEASURE
     681              :       CASE (3)
     682           24 :          fftw_plan_type = FFTW_PATIENT
     683              :       CASE (4)
     684            8 :          fftw_plan_type = FFTW_EXHAUSTIVE
     685              :       CASE DEFAULT
     686        56292 :          CPABORT("fftw3_create_plan_3d")
     687              :       END SELECT
     688              : 
     689              : #if defined(__FFTW3_UNALIGNED)
     690              :       fftw_plan_type = fftw_plan_type + FFTW_UNALIGNED
     691              : #endif
     692              : 
     693        56292 :       IF (plan%fsign == +1) THEN
     694        28146 :          fft_direction = FFTW_FORWARD
     695              :       ELSE
     696        28146 :          fft_direction = FFTW_BACKWARD
     697              :       END IF
     698              : 
     699        56292 :       n1 = plan%n_3d(1)
     700        56292 :       n2 = plan%n_3d(2)
     701        56292 :       n3 = plan%n_3d(3)
     702              : 
     703        56292 :       nt = 1
     704        56292 : !$OMP PARALLEL DEFAULT(NONE) SHARED(nt)
     705              : !$OMP MASTER
     706              : !$    nt = omp_get_num_threads()
     707              : !$OMP END MASTER
     708              : !$OMP END PARALLEL
     709              : 
     710              :       IF ((.NOT. fftw3_is_guru_supported()) .OR. &
     711        56292 :           (.NOT. plan_style == 1) .OR. &
     712              :           (n1 < 256 .AND. n2 < 256 .AND. n3 < 256 .AND. nt == 1)) THEN
     713              :          ! If the plan type is MEASURE, PATIENT and EXHAUSTIVE or
     714              :          ! the grid size is small (and we are single-threaded) then
     715              :          ! FFTW3 does a better job than handmade optimization
     716              :          ! so plan a single 3D FFT which will execute using all the threads
     717              : 
     718        56292 :          plan%separated_plans = .FALSE.
     719        56292 : !$       CALL fftw_plan_with_nthreads(nt)
     720              : 
     721        56292 :          IF (plan%fft_in_place) THEN
     722        28146 :             plan%fftw_plan = fftw_plan_dft_3d(n3, n2, n1, zin, zin, fft_direction, fftw_plan_type)
     723              :          ELSE
     724        28146 :             plan%fftw_plan = fftw_plan_dft_3d(n3, n2, n1, zin, zout, fft_direction, fftw_plan_type)
     725              :          END IF
     726              :       ELSE
     727            0 :          ALLOCATE (tmp(n1*n2*n3))
     728              :          ! ************************* PLANS WITH TRANSPOSITIONS ****************************
     729              :          !  In the cases described above, we manually thread each stage of the 3D FFT.
     730              :          !
     731              :          !  The following plans replace the 3D FFT call by running 1D FFTW across all
     732              :          !  3 directions of the array.
     733              :          !
     734              :          !  Output of FFTW is transposed to ensure that the next round of FFTW access
     735              :          !  contiguous information.
     736              :          !
     737              :          !  Assuming the input matrix is M(n3,n2,n1), FFTW/Transp are :
     738              :          !  M(n3,n2,n1) -> fftw(x) -> M(n3,n1,n2) -> fftw(y) -> M(n1,n2,n3) -> fftw(z) -> M(n1,n2,n3)
     739              :          !  Notice that last matrix is transposed in the Z axis. A DO-loop in the execute routine
     740              :          !  will perform the final transposition. Performance evaluation showed that using an external
     741              :          !  DO loop to do the final transposition performed better than directly transposing the output.
     742              :          !  However, this might vary depending on the compiler/platform, so a potential tuning spot
     743              :          !  is to perform the final transposition within the fftw library rather than using the external loop
     744              :          !  See comments below in Z-FFT for how to transpose the output to avoid the final DO loop.
     745              :          !
     746              :          !  Doc. for the Guru interface is in http://www.fftw.org/doc/Guru-Interface.html
     747              :          !
     748              :          !  OpenMP : Work is distributed on the Z plane.
     749              :          !           All transpositions are out-of-place to facilitate multi-threading
     750              :          !
     751              :          !!!! Plan for X : M(n3,n2,n1) -> fftw(x) -> M(n3,n1,n2)
     752              :          CALL fftw3_compute_rows_per_th(n3, nt, rows_per_th, rows_per_th_r, &
     753            0 :                                         th_planA, th_planB)
     754              : 
     755            0 :          dim_n(1) = n1
     756            0 :          dim_istride(1) = 1
     757            0 :          dim_ostride(1) = n2
     758            0 :          howmany_n(1) = n2
     759            0 :          howmany_n(2) = rows_per_th
     760            0 :          howmany_istride(1) = n1
     761            0 :          howmany_istride(2) = n1*n2
     762            0 :          howmany_ostride(1) = 1
     763            0 :          howmany_ostride(2) = n1*n2
     764              :          CALL fftw3_create_3d_plans(plan%fftw_plan_nx, plan%fftw_plan_nx_r, &
     765              :                                     dim_n, dim_istride, dim_ostride, howmany_n, &
     766              :                                     howmany_istride, howmany_ostride, &
     767              :                                     zin, tmp, &
     768              :                                     fft_direction, fftw_plan_type, rows_per_th, &
     769            0 :                                     rows_per_th_r)
     770              : 
     771              :          !!!! Plan for Y : M(n3,n1,n2) -> fftw(y) -> M(n1,n2,n3)
     772              :          CALL fftw3_compute_rows_per_th(n3, nt, rows_per_th, rows_per_th_r, &
     773            0 :                                         th_planA, th_planB)
     774            0 :          dim_n(1) = n2
     775              :          dim_istride(1) = 1
     776            0 :          dim_ostride(1) = n3
     777            0 :          howmany_n(1) = n1
     778            0 :          howmany_n(2) = rows_per_th
     779            0 :          howmany_istride(1) = n2
     780              :          howmany_istride(2) = n1*n2
     781              :          !!! transposed Z axis on output
     782            0 :          howmany_ostride(1) = n2*n3
     783            0 :          howmany_ostride(2) = 1
     784              : 
     785              :          CALL fftw3_create_3d_plans(plan%fftw_plan_ny, plan%fftw_plan_ny_r, &
     786              :                                     dim_n, dim_istride, dim_ostride, &
     787              :                                     howmany_n, howmany_istride, howmany_ostride, &
     788              :                                     tmp, zin, &
     789              :                                     fft_direction, fftw_plan_type, rows_per_th, &
     790            0 :                                     rows_per_th_r)
     791              : 
     792              :          !!!! Plan for Z : M(n1,n2,n3) -> fftw(z) -> M(n1,n2,n3)
     793              :          CALL fftw3_compute_rows_per_th(n1, nt, rows_per_th, rows_per_th_r, &
     794            0 :                                         th_planA, th_planB)
     795            0 :          dim_n(1) = n3
     796              :          dim_istride(1) = 1
     797            0 :          dim_ostride(1) = 1          ! To transpose: n2*n1
     798            0 :          howmany_n(1) = n2
     799            0 :          howmany_n(2) = rows_per_th
     800            0 :          howmany_istride(1) = n3
     801            0 :          howmany_istride(2) = n2*n3
     802            0 :          howmany_ostride(1) = n3     ! To transpose: n1
     803            0 :          howmany_ostride(2) = n2*n3  ! To transpose: 1
     804              : 
     805              :          CALL fftw3_create_3d_plans(plan%fftw_plan_nz, plan%fftw_plan_nz_r, &
     806              :                                     dim_n, dim_istride, dim_ostride, &
     807              :                                     howmany_n, howmany_istride, howmany_ostride, &
     808              :                                     zin, tmp, &
     809              :                                     fft_direction, fftw_plan_type, rows_per_th, &
     810            0 :                                     rows_per_th_r)
     811              : 
     812            0 :          plan%separated_plans = .TRUE.
     813              : 
     814            0 :          DEALLOCATE (tmp)
     815              :       END IF
     816              : 
     817              : #else
     818              :       MARK_USED(plan)
     819              :       MARK_USED(plan_style)
     820              :       !MARK_USED does not work with assumed size arguments
     821              :       IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
     822              : #endif
     823              : 
     824        56292 :    END SUBROUTINE fftw3_create_plan_3d
     825              : 
     826              : ! **************************************************************************************************
     827              : 
     828              : ! **************************************************************************************************
     829              : !> \brief ...
     830              : !> \param plan ...
     831              : !> \param plan_r ...
     832              : !> \param split_dim ...
     833              : !> \param nt ...
     834              : !> \param tid ...
     835              : !> \param input ...
     836              : !> \param istride ...
     837              : !> \param output ...
     838              : !> \param ostride ...
     839              : ! **************************************************************************************************
     840            0 :    SUBROUTINE fftw3_workshare_execute_dft(plan, plan_r, split_dim, nt, tid, &
     841              :                                           input, istride, output, ostride)
     842              : 
     843              :       INTEGER, INTENT(IN)                           :: split_dim, nt, tid
     844              :       INTEGER, INTENT(IN)                           :: istride, ostride
     845              :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT) :: input, output
     846              :       TYPE(C_PTR)                                   :: plan, plan_r
     847              : #if defined(__FFTW3)
     848              :       INTEGER                                     :: i_off, o_off
     849              :       INTEGER                                     :: th_planA, th_planB
     850              :       INTEGER :: rows_per_thread, rows_per_thread_r
     851              : 
     852              :       CALL fftw3_compute_rows_per_th(split_dim, nt, rows_per_thread, &
     853              :                                      rows_per_thread_r, &
     854            0 :                                      th_planA, th_planB)
     855              : 
     856            0 :       IF (th_planB > 0) THEN
     857            0 :          IF (tid < th_planA) THEN
     858            0 :             i_off = (tid)*(istride*(rows_per_thread)) + 1
     859            0 :             o_off = (tid)*(ostride*(rows_per_thread)) + 1
     860            0 :             IF (rows_per_thread > 0) THEN
     861              :                CALL fftw_execute_dft(plan, input(i_off), &
     862            0 :                                      output(o_off))
     863              :             END IF
     864            0 :          ELSE IF ((tid - th_planA) < th_planB) THEN
     865              : 
     866              :             i_off = (th_planA)*istride*(rows_per_thread) + &
     867            0 :                     (tid - th_planA)*istride*(rows_per_thread_r) + 1
     868              :             o_off = (th_planA)*ostride*(rows_per_thread) + &
     869            0 :                     (tid - th_planA)*ostride*(rows_per_thread_r) + 1
     870              : 
     871              :             CALL fftw_execute_dft(plan_r, input(i_off), &
     872            0 :                                   output(o_off))
     873              :          END IF
     874              : 
     875              :       ELSE
     876            0 :          i_off = (tid)*(istride*(rows_per_thread)) + 1
     877            0 :          o_off = (tid)*(ostride*(rows_per_thread)) + 1
     878              : 
     879              :          CALL fftw_execute_dft(plan, input(i_off), &
     880            0 :                                output(o_off))
     881              : 
     882              :       END IF
     883              : #else
     884              :       MARK_USED(plan)
     885              :       MARK_USED(plan_r)
     886              :       MARK_USED(split_dim)
     887              :       MARK_USED(nt)
     888              :       MARK_USED(tid)
     889              :       MARK_USED(istride)
     890              :       MARK_USED(ostride)
     891              :       !MARK_USED does not work with assumed size arguments
     892              :       IF (.FALSE.) THEN; DO; IF (ABS(input(1)) > ABS(output(1))) EXIT; END DO; END IF
     893              : #endif
     894              : 
     895            0 :    END SUBROUTINE fftw3_workshare_execute_dft
     896              : 
     897              : ! **************************************************************************************************
     898              : 
     899              : ! **************************************************************************************************
     900              : !> \brief ...
     901              : !> \param plan ...
     902              : !> \param scale ...
     903              : !> \param zin ...
     904              : !> \param zout ...
     905              : !> \param stat ...
     906              : ! **************************************************************************************************
     907       650224 :    SUBROUTINE fftw33d(plan, scale, zin, zout, stat)
     908              : 
     909              :       TYPE(fft_plan_type), INTENT(IN)                      :: plan
     910              :       REAL(KIND=dp), INTENT(IN)                            :: scale
     911              :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT), TARGET:: zin
     912              :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT), TARGET:: zout
     913              :       INTEGER, INTENT(OUT)                                 :: stat
     914              : #if defined(__FFTW3)
     915       650224 :       COMPLEX(KIND=dp), POINTER                            :: xout(:)
     916       650224 :       COMPLEX(KIND=dp), ALLOCATABLE                        :: tmp1(:)
     917              :       INTEGER                                              :: n1, n2, n3
     918              :       INTEGER                                              :: tid, nt
     919              :       INTEGER                                              :: i, j, k
     920              : 
     921       650224 :       n1 = plan%n_3d(1)
     922       650224 :       n2 = plan%n_3d(2)
     923       650224 :       n3 = plan%n_3d(3)
     924              : 
     925       650224 :       stat = 1
     926              : 
     927              :       ! We use a POINTER to the output array to avoid duplicating code
     928       650224 :       IF (plan%fft_in_place) THEN
     929       650220 :          xout => zin(:n1*n2*n3)
     930              :       ELSE
     931            4 :          xout => zout(:n1*n2*n3)
     932              :       END IF
     933              : 
     934              :       ! Either compute the full 3D FFT using a multithreaded plan
     935       650224 :       IF (.NOT. plan%separated_plans) THEN
     936       650224 :          CALL fftw_execute_dft(plan%fftw_plan, zin, xout)
     937              :       ELSE
     938              :          ! Or use the 3 stage FFT scheme described in fftw3_create_plan_3d
     939            0 :          ALLOCATE (tmp1(n1*n2*n3))   ! Temporary vector used for transpositions
     940            0 :          !$OMP PARALLEL DEFAULT(NONE) PRIVATE(tid,nt,i,j,k) SHARED(zin,tmp1,n1,n2,n3,plan,xout)
     941              :          tid = 0
     942              :          nt = 1
     943              : 
     944              : !$       tid = omp_get_thread_num()
     945              : !$       nt = omp_get_num_threads()
     946              :          CALL fftw3_workshare_execute_dft(plan%fftw_plan_nx, plan%fftw_plan_nx_r, &
     947              :                                           n3, nt, tid, &
     948              :                                           zin, n1*n2, tmp1, n1*n2)
     949              : 
     950              :          !$OMP BARRIER
     951              :          CALL fftw3_workshare_execute_dft(plan%fftw_plan_ny, plan%fftw_plan_ny_r, &
     952              :                                           n3, nt, tid, &
     953              :                                           tmp1, n1*n2, xout, 1)
     954              :          !$OMP BARRIER
     955              :          CALL fftw3_workshare_execute_dft(plan%fftw_plan_nz, plan%fftw_plan_nz_r, &
     956              :                                           n1, nt, tid, &
     957              :                                           xout, n2*n3, tmp1, n2*n3)
     958              :          !$OMP BARRIER
     959              : 
     960              :          !$OMP DO COLLAPSE(3)
     961              :          DO i = 1, n1
     962              :             DO j = 1, n2
     963              :                DO k = 1, n3
     964              :                   xout((i - 1) + (j - 1)*n1 + (k - 1)*n1*n2 + 1) = &
     965              :                      tmp1((k - 1) + (j - 1)*n3 + (i - 1)*n3*n2 + 1)
     966              :                END DO
     967              :             END DO
     968              :          END DO
     969              :          !$OMP END DO
     970              : 
     971              :          !$OMP END PARALLEL
     972              :       END IF
     973              : 
     974       650224 :       IF (scale /= 1.0_dp) THEN
     975       308909 :          CALL zdscal(n1*n2*n3, scale, xout, 1)
     976              :       END IF
     977              : 
     978              : #else
     979              :       MARK_USED(plan)
     980              :       MARK_USED(scale)
     981              :       !MARK_USED does not work with assumed size arguments
     982              :       IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
     983              :       stat = 0
     984              : 
     985              : #endif
     986              : 
     987       650224 :    END SUBROUTINE fftw33d
     988              : 
     989              : ! **************************************************************************************************
     990              : 
     991              : ! **************************************************************************************************
     992              : !> \brief ...
     993              : !> \param plan ...
     994              : !> \param zin ...
     995              : !> \param zout ...
     996              : !> \param plan_style ...
     997              : ! **************************************************************************************************
     998       163260 :    SUBROUTINE fftw3_create_plan_1dm(plan, zin, zout, plan_style)
     999              : 
    1000              :       IMPLICIT NONE
    1001              : 
    1002              :       TYPE(fft_plan_type), INTENT(INOUT)              :: plan
    1003              :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(IN)         :: zin
    1004              :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(IN)         :: zout
    1005              :       INTEGER, INTENT(IN)                                :: plan_style
    1006              : #if defined(__FFTW3)
    1007              :       INTEGER                                            :: istride, idist, ostride, odist, num_threads, num_rows
    1008              : 
    1009              :       INTEGER :: fftw_plan_type
    1010       326304 :       SELECT CASE (plan_style)
    1011              :       CASE (1)
    1012       163044 :          fftw_plan_type = FFTW_ESTIMATE
    1013              :       CASE (2)
    1014           48 :          fftw_plan_type = FFTW_MEASURE
    1015              :       CASE (3)
    1016          156 :          fftw_plan_type = FFTW_PATIENT
    1017              :       CASE (4)
    1018           12 :          fftw_plan_type = FFTW_EXHAUSTIVE
    1019              :       CASE DEFAULT
    1020       163260 :          CPABORT("fftw3_create_plan_1dm")
    1021              :       END SELECT
    1022              : 
    1023              : #if defined(__FFTW3_UNALIGNED)
    1024              :       fftw_plan_type = fftw_plan_type + FFTW_UNALIGNED
    1025              : #endif
    1026       163260 :       num_threads = 1
    1027       163260 :       plan%separated_plans = .FALSE.
    1028              : !$OMP PARALLEL DEFAULT(NONE), &
    1029       163260 : !$OMP          SHARED(NUM_THREADS)
    1030              : !$OMP MASTER
    1031              : !$    num_threads = omp_get_num_threads()
    1032              : !$OMP END MASTER
    1033              : !$OMP END PARALLEL
    1034              : 
    1035       163260 :       num_rows = plan%m/num_threads
    1036       163260 : !$    plan%num_threads_needed = num_threads
    1037              : 
    1038              : ! Check for number of rows less than num_threads
    1039       163260 : !$    IF (plan%m < num_threads) THEN
    1040            0 : !$       num_rows = 1
    1041            0 : !$       plan%num_threads_needed = plan%m
    1042              : !$    END IF
    1043              : 
    1044              : ! Check for total number of rows not divisible by num_threads
    1045       163260 : !$    IF (num_rows*plan%num_threads_needed /= plan%m) THEN
    1046            0 : !$       plan%need_alt_plan = .TRUE.
    1047              : !$    END IF
    1048              : 
    1049       163260 : !$    plan%num_rows = num_rows
    1050       163260 :       istride = 1
    1051       163260 :       idist = plan%n
    1052       163260 :       ostride = 1
    1053       163260 :       odist = plan%n
    1054       163260 :       IF (plan%fsign == +1 .AND. plan%trans) THEN
    1055        81618 :          istride = plan%m
    1056        81618 :          idist = 1
    1057        81642 :       ELSEIF (plan%fsign == -1 .AND. plan%trans) THEN
    1058        81618 :          ostride = plan%m
    1059        81618 :          odist = 1
    1060              :       END IF
    1061              : 
    1062       163260 :       IF (plan%fsign == +1) THEN
    1063              :          CALL dfftw_plan_many_dft(plan%fftw_plan, 1, plan%n, num_rows, zin, 0, istride, idist, &
    1064        81630 :                                   zout, 0, ostride, odist, FFTW_FORWARD, fftw_plan_type)
    1065              :       ELSE
    1066              :          CALL dfftw_plan_many_dft(plan%fftw_plan, 1, plan%n, num_rows, zin, 0, istride, idist, &
    1067        81630 :                                   zout, 0, ostride, odist, FFTW_BACKWARD, fftw_plan_type)
    1068              :       END IF
    1069              : 
    1070       163260 : !$    IF (plan%need_alt_plan) THEN
    1071            0 : !$       plan%alt_num_rows = plan%m - (plan%num_threads_needed - 1)*num_rows
    1072            0 : !$       IF (plan%fsign == +1) THEN
    1073              : !$          CALL dfftw_plan_many_dft(plan%alt_fftw_plan, 1, plan%n, plan%alt_num_rows, zin, 0, istride, idist, &
    1074            0 : !$                                   zout, 0, ostride, odist, FFTW_FORWARD, fftw_plan_type)
    1075              : !$       ELSE
    1076              : !$          CALL dfftw_plan_many_dft(plan%alt_fftw_plan, 1, plan%n, plan%alt_num_rows, zin, 0, istride, idist, &
    1077            0 : !$                                   zout, 0, ostride, odist, FFTW_BACKWARD, fftw_plan_type)
    1078              : !$       END IF
    1079              : !$    END IF
    1080              : 
    1081              : #else
    1082              :       MARK_USED(plan)
    1083              :       MARK_USED(plan_style)
    1084              :       !MARK_USED does not work with assumed size arguments
    1085              :       IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
    1086              : #endif
    1087              : 
    1088       163260 :    END SUBROUTINE fftw3_create_plan_1dm
    1089              : 
    1090              : ! **************************************************************************************************
    1091              : !> \brief ...
    1092              : !> \param plan ...
    1093              : ! **************************************************************************************************
    1094       219552 :    SUBROUTINE fftw3_destroy_plan(plan)
    1095              : 
    1096              :       TYPE(fft_plan_type), INTENT(INOUT)   :: plan
    1097              : 
    1098              : #if defined(__FFTW3)
    1099       219552 : !$    IF (plan%need_alt_plan) THEN
    1100            0 : !$       CALL fftw_destroy_plan(plan%alt_fftw_plan)
    1101              : !$    END IF
    1102              : 
    1103       219552 :       IF (.NOT. plan%separated_plans) THEN
    1104       219552 :          CALL fftw_destroy_plan(plan%fftw_plan)
    1105              :       ELSE
    1106              :          ! If it is a separated plan then we have to destroy
    1107              :          ! each dim plan individually
    1108            0 :          CALL fftw_destroy_plan(plan%fftw_plan_nx)
    1109            0 :          CALL fftw_destroy_plan(plan%fftw_plan_ny)
    1110            0 :          CALL fftw_destroy_plan(plan%fftw_plan_nz)
    1111            0 :          CALL fftw_destroy_plan(plan%fftw_plan_nx_r)
    1112            0 :          CALL fftw_destroy_plan(plan%fftw_plan_ny_r)
    1113            0 :          CALL fftw_destroy_plan(plan%fftw_plan_nz_r)
    1114              :       END IF
    1115              : 
    1116              : #else
    1117              :       MARK_USED(plan)
    1118              : #endif
    1119              : 
    1120       219552 :    END SUBROUTINE fftw3_destroy_plan
    1121              : 
    1122              : ! **************************************************************************************************
    1123              : !> \brief ...
    1124              : !> \param plan ...
    1125              : !> \param zin ...
    1126              : !> \param zout ...
    1127              : !> \param scale ...
    1128              : !> \param stat ...
    1129              : ! **************************************************************************************************
    1130      8441592 :    SUBROUTINE fftw31dm(plan, zin, zout, scale, stat)
    1131              :       TYPE(fft_plan_type), INTENT(IN)                    :: plan
    1132              :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(IN), TARGET :: zin
    1133              :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT), &
    1134              :          TARGET                                          :: zout
    1135              :       REAL(KIND=dp), INTENT(IN)                          :: scale
    1136              :       INTEGER, INTENT(OUT)                               :: stat
    1137              : 
    1138              :       INTEGER                                            :: in_offset, my_id, num_rows, out_offset, &
    1139              :                                                             scal_offset
    1140              :       TYPE(C_PTR)                                        :: fftw_plan
    1141              : 
    1142              : !------------------------------------------------------------------------------
    1143              : 
    1144      8441592 :       my_id = 0
    1145      8441592 :       num_rows = plan%m
    1146              : 
    1147              : !$OMP PARALLEL DEFAULT(NONE), &
    1148              : !$OMP          PRIVATE(my_id,num_rows,in_offset,out_offset,scal_offset,fftw_plan), &
    1149              : !$OMP          SHARED(zin,zout), &
    1150      8441592 : !$OMP          SHARED(plan,scale,stat)
    1151              : !$    my_id = omp_get_thread_num()
    1152              : 
    1153              : !$    if (my_id < plan%num_threads_needed) then
    1154              : 
    1155              :          fftw_plan = plan%fftw_plan
    1156              : 
    1157              :          in_offset = 1
    1158              :          out_offset = 1
    1159              :          scal_offset = 1
    1160              : 
    1161              : !$       in_offset = 1 + plan%num_rows*my_id*plan%n
    1162              : !$       out_offset = 1 + plan%num_rows*my_id*plan%n
    1163              : !$       IF (plan%fsign == +1 .AND. plan%trans) THEN
    1164              : !$          in_offset = 1 + plan%num_rows*my_id
    1165              : !$       ELSEIF (plan%fsign == -1 .AND. plan%trans) THEN
    1166              : !$          out_offset = 1 + plan%num_rows*my_id
    1167              : !$       END IF
    1168              : !$       scal_offset = 1 + plan%n*plan%num_rows*my_id
    1169              : !$       IF (plan%need_alt_plan .AND. my_id == plan%num_threads_needed - 1) THEN
    1170              : !$          num_rows = plan%alt_num_rows
    1171              : !$          fftw_plan = plan%alt_fftw_plan
    1172              : !$       ELSE
    1173              : !$          num_rows = plan%num_rows
    1174              : !$       END IF
    1175              : 
    1176              : #if defined(__FFTW3)
    1177              : !$OMP MASTER
    1178              :          stat = 1
    1179              : !$OMP END MASTER
    1180              :          CALL dfftw_execute_dft(fftw_plan, zin(in_offset:in_offset), zout(out_offset:out_offset))
    1181              : !$    end if
    1182              : ! all theads need to meet at this barrier
    1183              : !$OMP BARRIER
    1184              : !$    if (my_id < plan%num_threads_needed) then
    1185              :          IF (scale /= 1.0_dp) CALL zdscal(plan%n*num_rows, scale, zout(scal_offset:scal_offset), 1)
    1186              : !$    end if
    1187              : 
    1188              : #else
    1189              :       MARK_USED(plan)
    1190              :       MARK_USED(scale)
    1191              :       !MARK_USED does not work with assumed size arguments
    1192              :       IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
    1193              :       stat = 0
    1194              : 
    1195              : !$    else
    1196              : !$    end if
    1197              : 
    1198              : #endif
    1199              : 
    1200              : !$OMP END PARALLEL
    1201              : 
    1202      8441592 :       END SUBROUTINE fftw31dm
    1203              : 
    1204            0 :    END MODULE fftw3_lib
        

Generated by: LCOV version 2.0-1