LCOV - code coverage report
Current view: top level - src/pw/fft - fftw3_lib.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:8ebf9ad) Lines: 74.2 % 357 265
Test Date: 2026-01-22 06:43:13 Functions: 68.0 % 25 17

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2026 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : MODULE fftw3_lib
       8              :    USE ISO_C_BINDING, ONLY: C_ASSOCIATED, &
       9              :                             C_CHAR, &
      10              :                             C_DOUBLE, &
      11              :                             C_DOUBLE_COMPLEX, &
      12              :                             C_INT, &
      13              :                             C_PTR
      14              : #if defined(__FFTW3)
      15              :    USE ISO_C_BINDING, ONLY: &
      16              :       C_FLOAT, &
      17              :       C_FLOAT_COMPLEX, &
      18              :       C_FUNPTR, &
      19              :       C_INT32_T, &
      20              :       C_INTPTR_T, &
      21              :       C_LOC, &
      22              :       C_NULL_CHAR, &
      23              :       C_SIZE_T, C_F_POINTER
      24              :    USE mathconstants, ONLY: z_zero
      25              : #endif
      26              :    USE cp_files, ONLY: get_unit_number
      27              :    USE fft_kinds, ONLY: dp
      28              :    USE fft_plan, ONLY: fft_plan_type
      29              : 
      30              : !$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads
      31              : 
      32              : #include "../../base/base_uses.f90"
      33              : 
      34              :    IMPLICIT NONE
      35              :    PRIVATE
      36              : 
      37              :    PUBLIC :: fftw3_do_init, fftw3_do_cleanup, fftw3_get_lengths, fftw33d, fftw31dm
      38              :    PUBLIC :: fftw3_destroy_plan, fftw3_create_plan_1dm, fftw3_create_plan_3d
      39              :    PUBLIC :: fftw_alloc, fftw_dealloc
      40              : 
      41              : #if defined(__FFTW3)
      42              : #include "fftw3.f03"
      43              : #endif
      44              : 
      45              :    INTERFACE fftw_alloc
      46              :       MODULE PROCEDURE :: fftw_alloc_complex_1d
      47              :       MODULE PROCEDURE :: fftw_alloc_complex_2d
      48              :       MODULE PROCEDURE :: fftw_alloc_complex_3d
      49              :    END INTERFACE fftw_alloc
      50              : 
      51              :    INTERFACE fftw_dealloc
      52              :       MODULE PROCEDURE :: fftw_dealloc_complex_1d
      53              :       MODULE PROCEDURE :: fftw_dealloc_complex_2d
      54              :       MODULE PROCEDURE :: fftw_dealloc_complex_3d
      55              :    END INTERFACE fftw_dealloc
      56              : 
      57              : CONTAINS
      58              : 
      59              :    #:set maxdim = 3
      60              :    #:for dim in range(1, maxdim+1)
      61              : ! Concatenate the components of the dimensions passed to this function to use it if FFTW3 is not used
      62              :       #:set dim_extended = ", ".join(["n("+str(i)+")" for i in range(1, dim+1)])
      63       110382 :       SUBROUTINE fftw_alloc_complex_${dim}$d(array, n)
      64              :          COMPLEX(C_DOUBLE_COMPLEX), DIMENSION(:${", :"*(dim-1)}$), CONTIGUOUS, POINTER, INTENT(OUT) :: array
      65              :          INTEGER, DIMENSION(${dim}$), INTENT(IN) :: n
      66              : 
      67              : #if defined(__FFTW3)
      68              :          TYPE(C_PTR) :: data_ptr
      69       386776 :          data_ptr = fftw_alloc_complex(INT(PRODUCT(n), KIND=C_SIZE_T))
      70       386776 :          CALL C_F_POINTER(data_ptr, array, n)
      71              : #else
      72              : ! Just allocate the array
      73              :          ALLOCATE (array(${dim_extended}$))
      74              : #endif
      75              : 
      76       110382 :       END SUBROUTINE fftw_alloc_complex_${dim}$d
      77              : 
      78       110382 :       SUBROUTINE fftw_dealloc_complex_${dim}$d(array)
      79              :          COMPLEX(C_DOUBLE_COMPLEX), DIMENSION(:${", :"*(dim-1)}$), CONTIGUOUS, POINTER, INTENT(INOUT) :: array
      80              : 
      81              : #if defined(__FFTW3)
      82       110382 :          CALL fftw_free(C_LOC(array))
      83       110382 :          NULLIFY (array)
      84              : #else
      85              : ! Just deallocate the array
      86              :          DEALLOCATE (array)
      87              : #endif
      88              : 
      89       110382 :       END SUBROUTINE fftw_dealloc_complex_${dim}$d
      90              :    #:endfor
      91              : 
      92              : #if defined(__FFTW3)
      93              : ! **************************************************************************************************
      94              : !> \brief A workaround that allows us to compile with -Werror=unused-parameter
      95              : ! **************************************************************************************************
      96            0 :    SUBROUTINE dummy_routine_to_call_mark_used()
      97              :       MARK_USED(FFTW_R2HC)
      98              :       MARK_USED(FFTW_HC2R)
      99              :       MARK_USED(FFTW_DHT)
     100              :       MARK_USED(FFTW_REDFT00)
     101              :       MARK_USED(FFTW_REDFT01)
     102              :       MARK_USED(FFTW_REDFT10)
     103              :       MARK_USED(FFTW_REDFT11)
     104              :       MARK_USED(FFTW_RODFT00)
     105              :       MARK_USED(FFTW_RODFT01)
     106              :       MARK_USED(FFTW_RODFT10)
     107              :       MARK_USED(FFTW_RODFT11)
     108              :       MARK_USED(FFTW_FORWARD)
     109              :       MARK_USED(FFTW_BACKWARD)
     110              :       MARK_USED(FFTW_MEASURE)
     111              :       MARK_USED(FFTW_DESTROY_INPUT)
     112              :       MARK_USED(FFTW_UNALIGNED)
     113              :       MARK_USED(FFTW_CONSERVE_MEMORY)
     114              :       MARK_USED(FFTW_EXHAUSTIVE)
     115              :       MARK_USED(FFTW_PRESERVE_INPUT)
     116              :       MARK_USED(FFTW_PATIENT)
     117              :       MARK_USED(FFTW_ESTIMATE)
     118              :       MARK_USED(FFTW_WISDOM_ONLY)
     119              :       MARK_USED(FFTW_ESTIMATE_PATIENT)
     120              :       MARK_USED(FFTW_BELIEVE_PCOST)
     121              :       MARK_USED(FFTW_NO_DFT_R2HC)
     122              :       MARK_USED(FFTW_NO_NONTHREADED)
     123              :       MARK_USED(FFTW_NO_BUFFERING)
     124              :       MARK_USED(FFTW_NO_INDIRECT_OP)
     125              :       MARK_USED(FFTW_ALLOW_LARGE_GENERIC)
     126              :       MARK_USED(FFTW_NO_RANK_SPLITS)
     127              :       MARK_USED(FFTW_NO_VRANK_SPLITS)
     128              :       MARK_USED(FFTW_NO_VRECURSE)
     129              :       MARK_USED(FFTW_NO_SIMD)
     130              :       MARK_USED(FFTW_NO_SLOW)
     131              :       MARK_USED(FFTW_NO_FIXED_RADIX_LARGE_N)
     132              :       MARK_USED(FFTW_ALLOW_PRUNING)
     133            0 :    END SUBROUTINE dummy_routine_to_call_mark_used
     134              : #endif
     135              : 
     136              : ! **************************************************************************************************
     137              : !> \brief ...
     138              : !> \param wisdom_file ...
     139              : !> \param ionode ...
     140              : ! **************************************************************************************************
     141         9741 :    SUBROUTINE fftw3_do_cleanup(wisdom_file, ionode)
     142              : 
     143              :       CHARACTER(LEN=*), INTENT(IN)             :: wisdom_file
     144              :       LOGICAL                                  :: ionode
     145              : 
     146              : #if defined(__FFTW3)
     147         9741 :       CHARACTER(LEN=1, KIND=C_CHAR), DIMENSION(:), ALLOCATABLE :: wisdom_file_name_c
     148              :       INTEGER                                  :: file_name_length, i, iunit, istat
     149              :       INTEGER(KIND=C_INT)                      :: isuccess
     150              :       ! Write out FFTW3 wisdom to file (if we can)
     151              :       ! only the ionode updates the wisdom
     152         9741 :       IF (ionode) THEN
     153         4974 :          iunit = get_unit_number()
     154              :          ! Check whether the file can be opened in the necessary manner
     155         4974 :          OPEN (UNIT=iunit, FILE=wisdom_file, STATUS="UNKNOWN", FORM="FORMATTED", ACTION="WRITE", IOSTAT=istat)
     156         4974 :          IF (istat == 0) THEN
     157            2 :             CLOSE (iunit)
     158            2 :             file_name_length = LEN_TRIM(wisdom_file)
     159            6 :             ALLOCATE (wisdom_file_name_c(file_name_length + 1))
     160           18 :             DO i = 1, file_name_length
     161           18 :                wisdom_file_name_c(i) = wisdom_file(i:i)
     162              :             END DO
     163            2 :             wisdom_file_name_c(file_name_length + 1) = C_NULL_CHAR
     164            2 :             isuccess = fftw_export_wisdom_to_filename(wisdom_file_name_c)
     165            2 :             IF (isuccess == 0) &
     166              :                CALL cp_warn(__LOCATION__, "Error exporting wisdom to file "//TRIM(wisdom_file)//". "// &
     167            0 :                             "Wisdom was not exported.")
     168              :          END IF
     169              :       END IF
     170              : 
     171         9741 :       CALL fftw_cleanup()
     172              : #else
     173              :       MARK_USED(wisdom_file)
     174              :       MARK_USED(ionode)
     175              : #endif
     176              : 
     177         9741 :    END SUBROUTINE fftw3_do_cleanup
     178              : 
     179              : ! **************************************************************************************************
     180              : !> \brief ...
     181              : !> \param wisdom_file ...
     182              : ! **************************************************************************************************
     183         9951 :    SUBROUTINE fftw3_do_init(wisdom_file)
     184              : 
     185              :       CHARACTER(LEN=*), INTENT(IN)             :: wisdom_file
     186              : 
     187              : #if defined(__FFTW3)
     188         9951 :       CHARACTER(LEN=1, KIND=C_CHAR), DIMENSION(:), ALLOCATABLE :: wisdom_file_name_c
     189              :       INTEGER                                  :: file_name_length, i, istat, iunit
     190              :       INTEGER(KIND=C_INT)                      :: isuccess
     191              :       LOGICAL :: file_exists
     192              : 
     193         9951 :       isuccess = fftw_init_threads()
     194         9951 :       IF (isuccess == 0) &
     195            0 :          CPABORT("Error initializing FFTW with threads")
     196              : 
     197              :       ! Read FFTW wisdom (if available)
     198              :       ! all nodes are opening the file here...
     199         9951 :       INQUIRE (FILE=wisdom_file, exist=file_exists)
     200         9951 :       IF (file_exists) THEN
     201            2 :          iunit = get_unit_number()
     202            2 :          file_name_length = LEN_TRIM(wisdom_file)
     203              :          ! Check whether the file can be opened in the necessary manner
     204              :          OPEN (UNIT=iunit, FILE=wisdom_file, STATUS="OLD", FORM="FORMATTED", POSITION="REWIND", &
     205            2 :                ACTION="READ", IOSTAT=istat)
     206            2 :          IF (istat == 0) THEN
     207            2 :             CLOSE (iunit)
     208            2 :             file_name_length = LEN_TRIM(wisdom_file)
     209            6 :             ALLOCATE (wisdom_file_name_c(file_name_length + 1))
     210           18 :             DO i = 1, file_name_length
     211           18 :                wisdom_file_name_c(i) = wisdom_file(i:i)
     212              :             END DO
     213            2 :             wisdom_file_name_c(file_name_length + 1) = C_NULL_CHAR
     214            2 :             isuccess = fftw_import_wisdom_from_filename(wisdom_file_name_c)
     215            2 :             IF (isuccess == 0) &
     216              :                CALL cp_warn(__LOCATION__, "Error importing wisdom from file "//TRIM(wisdom_file)//". "// &
     217              :                             "Maybe the file was created with a different configuration than CP2K is run with. "// &
     218            0 :                             "CP2K continues without importing wisdom.")
     219              :          END IF
     220              :       END IF
     221              : #else
     222              :       MARK_USED(wisdom_file)
     223              : #endif
     224              : 
     225         9951 :    END SUBROUTINE fftw3_do_init
     226              : 
     227              : ! **************************************************************************************************
     228              : !> \brief ...
     229              : !> \param DATA ...
     230              : !> \param max_length ...
     231              : !> \par History
     232              : !>      JGH 23-Jan-2006 : initial version
     233              : !>      Adapted for new interface
     234              : !>      IAB 09-Jan-2009 : Modified to cache plans in fft_plan_type
     235              : !>                        (c) The Numerical Algorithms Group (NAG) Ltd, 2009 on behalf of the HECToR project
     236              : !>      IAB 09-Oct-2009 : Added OpenMP directives to 1D FFT, and planning routines
     237              : !>                        (c) The Numerical Algorithms Group (NAG) Ltd, 2009 on behalf of the HECToR project
     238              : !>      IAB 11-Sep-2012 : OpenMP parallel 3D FFT (Ruyman Reyes, PRACE)
     239              : !> \author JGH
     240              : ! **************************************************************************************************
     241          528 :    SUBROUTINE fftw3_get_lengths(DATA, max_length)
     242              : 
     243              :       INTEGER, DIMENSION(*)                              :: DATA
     244              :       INTEGER, INTENT(INOUT)                             :: max_length
     245              : 
     246              :       INTEGER                                            :: h, i, j, k, m, maxn, maxn_elevens, &
     247              :                                                             maxn_fives, maxn_sevens, &
     248              :                                                             maxn_thirteens, maxn_threes, &
     249              :                                                             maxn_twos, ndata, nmax, number
     250          528 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: dlocal, idx
     251              : 
     252              : !------------------------------------------------------------------------------
     253              : ! compute ndata
     254              : !! FFTW can do arbitrary(?) lengths, maybe you want to limit them to some
     255              : !!    powers of small prime numbers though...
     256              : 
     257          528 :       maxn_twos = 15
     258          528 :       maxn_threes = 3
     259          528 :       maxn_fives = 2
     260          528 :       maxn_sevens = 1
     261          528 :       maxn_elevens = 1
     262          528 :       maxn_thirteens = 0
     263          528 :       maxn = 37748736
     264              : 
     265          528 :       ndata = 0
     266         8976 :       DO h = 0, maxn_twos
     267         8448 :          nmax = HUGE(0)/2**h
     268        42768 :          DO i = 0, maxn_threes
     269       143616 :             DO j = 0, maxn_fives
     270       337920 :                DO k = 0, maxn_sevens
     271       709632 :                   DO m = 0, maxn_elevens
     272       405504 :                      number = (3**i)*(5**j)*(7**k)*(11**m)
     273              : 
     274       405504 :                      IF (number > nmax) CYCLE
     275              : 
     276       405504 :                      number = number*2**h
     277       405504 :                      IF (number >= maxn) CYCLE
     278              : 
     279       608256 :                      ndata = ndata + 1
     280              :                   END DO
     281              :                END DO
     282              :             END DO
     283              :          END DO
     284              :       END DO
     285              : 
     286         2112 :       ALLOCATE (dlocal(ndata), idx(ndata))
     287              : 
     288          528 :       ndata = 0
     289       389136 :       dlocal(:) = 0
     290         8976 :       DO h = 0, maxn_twos
     291         8448 :          nmax = HUGE(0)/2**h
     292        42768 :          DO i = 0, maxn_threes
     293       143616 :             DO j = 0, maxn_fives
     294       337920 :                DO k = 0, maxn_sevens
     295       709632 :                   DO m = 0, maxn_elevens
     296       405504 :                      number = (3**i)*(5**j)*(7**k)*(11**m)
     297              : 
     298       405504 :                      IF (number > nmax) CYCLE
     299              : 
     300       405504 :                      number = number*2**h
     301       405504 :                      IF (number >= maxn) CYCLE
     302              : 
     303       388608 :                      ndata = ndata + 1
     304       608256 :                      dlocal(ndata) = number
     305              :                   END DO
     306              :                END DO
     307              :             END DO
     308              :          END DO
     309              :       END DO
     310              : 
     311          528 :       CALL sortint(dlocal, ndata, idx)
     312          528 :       ndata = MIN(ndata, max_length)
     313       389136 :       DATA(1:ndata) = dlocal(1:ndata)
     314          528 :       max_length = ndata
     315              : 
     316          528 :       DEALLOCATE (dlocal, idx)
     317              : 
     318          528 :    END SUBROUTINE fftw3_get_lengths
     319              : 
     320              : ! **************************************************************************************************
     321              : !> \brief ...
     322              : !> \param iarr ...
     323              : !> \param n ...
     324              : !> \param index ...
     325              : ! **************************************************************************************************
     326          528 :    SUBROUTINE sortint(iarr, n, index)
     327              : 
     328              :       INTEGER, INTENT(IN)                                :: n
     329              :       INTEGER, INTENT(INOUT)                             :: iarr(1:n)
     330              :       INTEGER, INTENT(OUT)                               :: INDEX(1:n)
     331              : 
     332              :       INTEGER, PARAMETER                                 :: m = 7, nstack = 50
     333              : 
     334              :       INTEGER                                            :: a, i, ib, ir, istack(1:nstack), itemp, &
     335              :                                                             j, jstack, k, l, temp
     336              : 
     337              : !------------------------------------------------------------------------------
     338              : 
     339       389136 :       DO i = 1, n
     340       389136 :          INDEX(i) = i
     341              :       END DO
     342              :       jstack = 0
     343              :       l = 1
     344              :       ir = n
     345              :       DO WHILE (.TRUE.)
     346       146256 :       IF (ir - l < m) THEN
     347       315744 :          DO j = l + 1, ir
     348       242352 :             a = iarr(j)
     349       242352 :             ib = INDEX(j)
     350       575520 :             DO i = j - 1, 0, -1
     351       575520 :                IF (i == 0) EXIT
     352       574464 :                IF (iarr(i) <= a) EXIT
     353       333168 :                iarr(i + 1) = iarr(i)
     354       575520 :                INDEX(i + 1) = INDEX(i)
     355              :             END DO
     356       242352 :             iarr(i + 1) = a
     357       315744 :             INDEX(i + 1) = ib
     358              :          END DO
     359        73392 :          IF (jstack == 0) RETURN
     360        72864 :          ir = istack(jstack)
     361        72864 :          l = istack(jstack - 1)
     362        72864 :          jstack = jstack - 2
     363              :       ELSE
     364        72864 :          k = (l + ir)/2
     365        72864 :          temp = iarr(k)
     366        72864 :          iarr(k) = iarr(l + 1)
     367        72864 :          iarr(l + 1) = temp
     368        72864 :          itemp = INDEX(k)
     369        72864 :          INDEX(k) = INDEX(l + 1)
     370        72864 :          INDEX(l + 1) = itemp
     371        72864 :          IF (iarr(l + 1) > iarr(ir)) THEN
     372        34320 :             temp = iarr(l + 1)
     373        34320 :             iarr(l + 1) = iarr(ir)
     374        34320 :             iarr(ir) = temp
     375        34320 :             itemp = INDEX(l + 1)
     376        34320 :             INDEX(l + 1) = INDEX(ir)
     377        34320 :             INDEX(ir) = itemp
     378              :          END IF
     379        72864 :          IF (iarr(l) > iarr(ir)) THEN
     380        27456 :             temp = iarr(l)
     381        27456 :             iarr(l) = iarr(ir)
     382        27456 :             iarr(ir) = temp
     383        27456 :             itemp = INDEX(l)
     384        27456 :             INDEX(l) = INDEX(ir)
     385        27456 :             INDEX(ir) = itemp
     386              :          END IF
     387        72864 :          IF (iarr(l + 1) > iarr(l)) THEN
     388        27456 :             temp = iarr(l + 1)
     389        27456 :             iarr(l + 1) = iarr(l)
     390        27456 :             iarr(l) = temp
     391        27456 :             itemp = INDEX(l + 1)
     392        27456 :             INDEX(l + 1) = INDEX(l)
     393        27456 :             INDEX(l) = itemp
     394              :          END IF
     395        72864 :          i = l + 1
     396        72864 :          j = ir
     397        72864 :          a = iarr(l)
     398        72864 :          ib = INDEX(l)
     399       464112 :          DO WHILE (.TRUE.)
     400       536976 :             i = i + 1
     401      1742928 :             DO WHILE (iarr(i) < a)
     402      1205952 :                i = i + 1
     403              :             END DO
     404       536976 :             j = j - 1
     405      1330560 :             DO WHILE (iarr(j) > a)
     406       793584 :                j = j - 1
     407              :             END DO
     408       536976 :             IF (j < i) EXIT
     409       464112 :             temp = iarr(i)
     410       464112 :             iarr(i) = iarr(j)
     411       464112 :             iarr(j) = temp
     412       464112 :             itemp = INDEX(i)
     413       464112 :             INDEX(i) = INDEX(j)
     414       464112 :             INDEX(j) = itemp
     415              :          END DO
     416        72864 :          iarr(l) = iarr(j)
     417        72864 :          iarr(j) = a
     418        72864 :          INDEX(l) = INDEX(j)
     419        72864 :          INDEX(j) = ib
     420        72864 :          jstack = jstack + 2
     421        72864 :          IF (jstack > nstack) CPABORT("Nstack too small in sortint")
     422        72864 :          IF (ir - i + 1 >= j - l) THEN
     423        37488 :             istack(jstack) = ir
     424        37488 :             istack(jstack - 1) = i
     425        37488 :             ir = j - 1
     426              :          ELSE
     427        35376 :             istack(jstack) = j - 1
     428        35376 :             istack(jstack - 1) = l
     429        35376 :             l = i
     430              :          END IF
     431              :       END IF
     432              : 
     433              :       END DO
     434              : 
     435              :    END SUBROUTINE sortint
     436              : 
     437              : ! **************************************************************************************************
     438              : 
     439              : ! **************************************************************************************************
     440              : !> \brief ...
     441              : !> \param plan ...
     442              : !> \param fft_rank ...
     443              : !> \param dim_n ...
     444              : !> \param dim_istride ...
     445              : !> \param dim_ostride ...
     446              : !> \param hm_rank ...
     447              : !> \param hm_n ...
     448              : !> \param hm_istride ...
     449              : !> \param hm_ostride ...
     450              : !> \param zin ...
     451              : !> \param zout ...
     452              : !> \param fft_direction ...
     453              : !> \param fftw_plan_type ...
     454              : !> \param valid ...
     455              : ! **************************************************************************************************
     456        56556 :    SUBROUTINE fftw3_create_guru_plan(plan, fft_rank, dim_n, &
     457              :                                      dim_istride, dim_ostride, hm_rank, &
     458              :                                      hm_n, hm_istride, hm_ostride, &
     459              :                                      zin, zout, fft_direction, fftw_plan_type, &
     460              :                                      valid)
     461              : 
     462              :       TYPE(C_PTR), INTENT(INOUT)                         :: plan
     463              :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT)      :: zin, zout
     464              :       INTEGER, INTENT(IN) :: dim_n(2), dim_istride(2), dim_ostride(2), &
     465              :                              hm_n(2), hm_istride(2), hm_ostride(2), fft_rank, &
     466              :                              fft_direction, fftw_plan_type, hm_rank
     467              :       LOGICAL, INTENT(OUT)                               :: valid
     468              : 
     469              : #if defined(__FFTW3)
     470              :       TYPE(fftw_iodim) :: dim(2), hm(2)
     471              :       INTEGER :: i
     472              : 
     473       169668 :       DO i = 1, 2
     474       113112 :          DIM(i) = fftw_iodim(dim_n(i), dim_istride(i), dim_ostride(i))
     475       169668 :          hm(i) = fftw_iodim(hm_n(i), hm_istride(i), hm_ostride(i))
     476              :       END DO
     477              : 
     478              :       plan = fftw_plan_guru_dft(fft_rank, &
     479              :                                 dim, hm_rank, hm, &
     480              :                                 zin, zout, &
     481        56556 :                                 fft_direction, fftw_plan_type)
     482              : 
     483        56556 :       valid = C_ASSOCIATED(plan)
     484              : 
     485              : #else
     486              :       MARK_USED(plan)
     487              :       MARK_USED(fft_rank)
     488              :       MARK_USED(dim_n)
     489              :       MARK_USED(dim_istride)
     490              :       MARK_USED(dim_ostride)
     491              :       MARK_USED(hm_rank)
     492              :       MARK_USED(hm_n)
     493              :       MARK_USED(hm_istride)
     494              :       MARK_USED(hm_ostride)
     495              :       MARK_USED(fft_direction)
     496              :       MARK_USED(fftw_plan_type)
     497              :       !MARK_USED does not work with assumed size arguments
     498              :       IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
     499              :       valid = .FALSE.
     500              : 
     501              : #endif
     502              : 
     503        56556 :    END SUBROUTINE fftw3_create_guru_plan
     504              : 
     505              : ! **************************************************************************************************
     506              : 
     507              : ! **************************************************************************************************
     508              : !> \brief Attempt to create a plan with the guru interface for a 2d sub-space.
     509              : !>        If this fails, fall back to the FFTW3 threaded 3D transform instead
     510              : !>        of the hand-optimised version.
     511              : !> \return ...
     512              : ! **************************************************************************************************
     513        56556 :    FUNCTION fftw3_is_guru_supported() RESULT(guru_supported)
     514              :       LOGICAL :: guru_supported
     515              : #if defined(__FFTW3)
     516              :       INTEGER :: dim_n(2), dim_istride(2), dim_ostride(2), &
     517              :                  howmany_n(2), howmany_istride(2), howmany_ostride(2)
     518              :       TYPE(C_PTR)                          :: test_plan
     519              :       COMPLEX(KIND=dp), DIMENSION(1, 1, 1) :: zin
     520              : 
     521        56556 :       dim_n(1) = 1
     522        56556 :       dim_n(2) = 1
     523        56556 :       dim_istride(1) = 1
     524        56556 :       dim_istride(2) = 1
     525        56556 :       dim_ostride(1) = 1
     526        56556 :       dim_ostride(2) = 1
     527        56556 :       howmany_n(1) = 1
     528        56556 :       howmany_n(2) = 1
     529        56556 :       howmany_istride(1) = 1
     530        56556 :       howmany_istride(2) = 1
     531        56556 :       howmany_ostride(1) = 1
     532        56556 :       howmany_ostride(2) = 1
     533        56556 :       zin = z_zero
     534              :       CALL fftw3_create_guru_plan(test_plan, 1, &
     535              :                                   dim_n, dim_istride, dim_ostride, &
     536              :                                   2, howmany_n, howmany_istride, howmany_ostride, &
     537              :                                   zin, zin, &
     538        56556 :                                   FFTW_FORWARD, FFTW_ESTIMATE, guru_supported)
     539        56556 :       IF (guru_supported) THEN
     540        56556 :          CALL fftw_destroy_plan(test_plan)
     541              :       END IF
     542              : 
     543              : #else
     544              :       guru_supported = .FALSE.
     545              : #endif
     546              : 
     547        56556 :    END FUNCTION fftw3_is_guru_supported
     548              : 
     549              : ! **************************************************************************************************
     550              : 
     551              : ! **************************************************************************************************
     552              : !> \brief ...
     553              : !> \param nrows ...
     554              : !> \param nt ...
     555              : !> \param rows_per_thread ...
     556              : !> \param rows_per_thread_r ...
     557              : !> \param th_planA ...
     558              : !> \param th_planB ...
     559              : ! **************************************************************************************************
     560            0 :    SUBROUTINE fftw3_compute_rows_per_th(nrows, nt, rows_per_thread, rows_per_thread_r, &
     561              :                                         th_planA, th_planB)
     562              : 
     563              :       INTEGER, INTENT(IN)                                :: nrows, nt
     564              :       INTEGER, INTENT(OUT)                               :: rows_per_thread, rows_per_thread_r, &
     565              :                                                             th_planA, th_planB
     566              : 
     567            0 :       IF (MOD(nrows, nt) == 0) THEN
     568            0 :          rows_per_thread = nrows/nt
     569            0 :          rows_per_thread_r = 0
     570            0 :          th_planA = nt
     571            0 :          th_planB = 0
     572              :       ELSE
     573            0 :          rows_per_thread = nrows/nt + 1
     574            0 :          rows_per_thread_r = nrows/nt
     575            0 :          th_planA = MOD(nrows, nt)
     576            0 :          th_planB = nt - th_planA
     577              :       END IF
     578              : 
     579            0 :    END SUBROUTINE fftw3_compute_rows_per_th
     580              : 
     581              : ! **************************************************************************************************
     582              : 
     583              : ! **************************************************************************************************
     584              : !> \brief ...
     585              : !> \param plan ...
     586              : !> \param plan_r ...
     587              : !> \param dim_n ...
     588              : !> \param dim_istride ...
     589              : !> \param dim_ostride ...
     590              : !> \param hm_n ...
     591              : !> \param hm_istride ...
     592              : !> \param hm_ostride ...
     593              : !> \param input ...
     594              : !> \param output ...
     595              : !> \param fft_direction ...
     596              : !> \param fftw_plan_type ...
     597              : !> \param rows_per_th ...
     598              : !> \param rows_per_th_r ...
     599              : ! **************************************************************************************************
     600            0 :    SUBROUTINE fftw3_create_3d_plans(plan, plan_r, dim_n, dim_istride, dim_ostride, &
     601              :                                     hm_n, hm_istride, hm_ostride, &
     602              :                                     input, output, &
     603              :                                     fft_direction, fftw_plan_type, rows_per_th, &
     604              :                                     rows_per_th_r)
     605              : 
     606              :       TYPE(C_PTR), INTENT(INOUT)                         :: plan, plan_r
     607              :       INTEGER, INTENT(INOUT)                             :: dim_n(2), dim_istride(2), &
     608              :                                                             dim_ostride(2), hm_n(2), &
     609              :                                                             hm_istride(2), hm_ostride(2)
     610              :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT)      :: input, output
     611              :       INTEGER, INTENT(INOUT)                             :: fft_direction, fftw_plan_type
     612              :       INTEGER, INTENT(IN)                                :: rows_per_th, rows_per_th_r
     613              : 
     614              :       LOGICAL                                            :: valid
     615              : 
     616              : ! First plans will have an additional row
     617              : 
     618            0 :       hm_n(2) = rows_per_th
     619              :       CALL fftw3_create_guru_plan(plan, 1, &
     620              :                                   dim_n, dim_istride, dim_ostride, &
     621              :                                   2, hm_n, hm_istride, hm_ostride, &
     622              :                                   input, output, &
     623            0 :                                   fft_direction, fftw_plan_type, valid)
     624              : 
     625            0 :       IF (.NOT. valid) THEN
     626            0 :          CPABORT("fftw3_create_plan")
     627              :       END IF
     628              : 
     629              :       !!!! Remainder
     630            0 :       hm_n(2) = rows_per_th_r
     631              :       CALL fftw3_create_guru_plan(plan_r, 1, &
     632              :                                   dim_n, dim_istride, dim_ostride, &
     633              :                                   2, hm_n, hm_istride, hm_ostride, &
     634              :                                   input, output, &
     635            0 :                                   fft_direction, fftw_plan_type, valid)
     636            0 :       IF (.NOT. valid) THEN
     637            0 :          CPABORT("fftw3_create_plan (remaining)")
     638              :       END IF
     639              : 
     640            0 :    END SUBROUTINE fftw3_create_3d_plans
     641              : 
     642              : ! **************************************************************************************************
     643              : 
     644              : ! **************************************************************************************************
     645              : !> \brief ...
     646              : !> \param plan ...
     647              : !> \param zin ...
     648              : !> \param zout ...
     649              : !> \param plan_style ...
     650              : ! **************************************************************************************************
     651        56556 :    SUBROUTINE fftw3_create_plan_3d(plan, zin, zout, plan_style)
     652              : 
     653              :       TYPE(fft_plan_type), INTENT(INOUT)              :: plan
     654              :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT)      :: zin
     655              :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT)      :: zout
     656              :       INTEGER                                            :: plan_style
     657              : #if defined(__FFTW3)
     658              :       INTEGER                                            :: n1, n2, n3
     659              :       INTEGER                                            :: nt
     660              :       INTEGER                                            :: rows_per_th
     661              :       INTEGER                                            :: rows_per_th_r
     662              :       INTEGER                                            :: fft_direction
     663              :       INTEGER                                            :: th_planA, th_planB
     664        56556 :       COMPLEX(KIND=dp), ALLOCATABLE                      :: tmp(:)
     665              : 
     666              :       ! GURU Interface
     667              :       INTEGER :: dim_n(2), dim_istride(2), dim_ostride(2), &
     668              :                  howmany_n(2), howmany_istride(2), howmany_ostride(2)
     669              : 
     670              :       INTEGER :: fftw_plan_type
     671       113072 :       SELECT CASE (plan_style)
     672              :       CASE (1)
     673        56516 :          fftw_plan_type = FFTW_ESTIMATE
     674              :       CASE (2)
     675            8 :          fftw_plan_type = FFTW_MEASURE
     676              :       CASE (3)
     677           24 :          fftw_plan_type = FFTW_PATIENT
     678              :       CASE (4)
     679            8 :          fftw_plan_type = FFTW_EXHAUSTIVE
     680              :       CASE DEFAULT
     681        56556 :          CPABORT("fftw3_create_plan_3d")
     682              :       END SELECT
     683              : 
     684        56556 :       IF (plan%fsign == +1) THEN
     685        28278 :          fft_direction = FFTW_FORWARD
     686              :       ELSE
     687        28278 :          fft_direction = FFTW_BACKWARD
     688              :       END IF
     689              : 
     690        56556 :       n1 = plan%n_3d(1)
     691        56556 :       n2 = plan%n_3d(2)
     692        56556 :       n3 = plan%n_3d(3)
     693              : 
     694        56556 :       nt = 1
     695        56556 : !$OMP PARALLEL DEFAULT(NONE) SHARED(nt)
     696              : !$OMP MASTER
     697              : !$    nt = omp_get_num_threads()
     698              : !$OMP END MASTER
     699              : !$OMP END PARALLEL
     700              : 
     701              :       IF ((.NOT. fftw3_is_guru_supported()) .OR. &
     702        56556 :           (.NOT. plan_style == 1) .OR. &
     703              :           (n1 < 256 .AND. n2 < 256 .AND. n3 < 256 .AND. nt == 1)) THEN
     704              :          ! If the plan type is MEASURE, PATIENT and EXHAUSTIVE or
     705              :          ! the grid size is small (and we are single-threaded) then
     706              :          ! FFTW3 does a better job than handmade optimization
     707              :          ! so plan a single 3D FFT which will execute using all the threads
     708              : 
     709        56556 :          plan%separated_plans = .FALSE.
     710        56556 : !$       CALL fftw_plan_with_nthreads(nt)
     711              : 
     712        56556 :          IF (plan%fft_in_place) THEN
     713        28278 :             plan%fftw_plan = fftw_plan_dft_3d(n3, n2, n1, zin, zin, fft_direction, fftw_plan_type)
     714              :          ELSE
     715        28278 :             plan%fftw_plan = fftw_plan_dft_3d(n3, n2, n1, zin, zout, fft_direction, fftw_plan_type)
     716              :          END IF
     717              :       ELSE
     718            0 :          ALLOCATE (tmp(n1*n2*n3))
     719              :          ! ************************* PLANS WITH TRANSPOSITIONS ****************************
     720              :          !  In the cases described above, we manually thread each stage of the 3D FFT.
     721              :          !
     722              :          !  The following plans replace the 3D FFT call by running 1D FFTW across all
     723              :          !  3 directions of the array.
     724              :          !
     725              :          !  Output of FFTW is transposed to ensure that the next round of FFTW access
     726              :          !  contiguous information.
     727              :          !
     728              :          !  Assuming the input matrix is M(n3,n2,n1), FFTW/Transp are :
     729              :          !  M(n3,n2,n1) -> fftw(x) -> M(n3,n1,n2) -> fftw(y) -> M(n1,n2,n3) -> fftw(z) -> M(n1,n2,n3)
     730              :          !  Notice that last matrix is transposed in the Z axis. A DO-loop in the execute routine
     731              :          !  will perform the final transposition. Performance evaluation showed that using an external
     732              :          !  DO loop to do the final transposition performed better than directly transposing the output.
     733              :          !  However, this might vary depending on the compiler/platform, so a potential tuning spot
     734              :          !  is to perform the final transposition within the fftw library rather than using the external loop
     735              :          !  See comments below in Z-FFT for how to transpose the output to avoid the final DO loop.
     736              :          !
     737              :          !  Doc. for the Guru interface is in http://www.fftw.org/doc/Guru-Interface.html
     738              :          !
     739              :          !  OpenMP : Work is distributed on the Z plane.
     740              :          !           All transpositions are out-of-place to facilitate multi-threading
     741              :          !
     742              :          !!!! Plan for X : M(n3,n2,n1) -> fftw(x) -> M(n3,n1,n2)
     743              :          CALL fftw3_compute_rows_per_th(n3, nt, rows_per_th, rows_per_th_r, &
     744            0 :                                         th_planA, th_planB)
     745              : 
     746            0 :          dim_n(1) = n1
     747            0 :          dim_istride(1) = 1
     748            0 :          dim_ostride(1) = n2
     749            0 :          howmany_n(1) = n2
     750            0 :          howmany_n(2) = rows_per_th
     751            0 :          howmany_istride(1) = n1
     752            0 :          howmany_istride(2) = n1*n2
     753            0 :          howmany_ostride(1) = 1
     754            0 :          howmany_ostride(2) = n1*n2
     755              :          CALL fftw3_create_3d_plans(plan%fftw_plan_nx, plan%fftw_plan_nx_r, &
     756              :                                     dim_n, dim_istride, dim_ostride, howmany_n, &
     757              :                                     howmany_istride, howmany_ostride, &
     758              :                                     zin, tmp, &
     759              :                                     fft_direction, fftw_plan_type, rows_per_th, &
     760            0 :                                     rows_per_th_r)
     761              : 
     762              :          !!!! Plan for Y : M(n3,n1,n2) -> fftw(y) -> M(n1,n2,n3)
     763              :          CALL fftw3_compute_rows_per_th(n3, nt, rows_per_th, rows_per_th_r, &
     764            0 :                                         th_planA, th_planB)
     765            0 :          dim_n(1) = n2
     766              :          dim_istride(1) = 1
     767            0 :          dim_ostride(1) = n3
     768            0 :          howmany_n(1) = n1
     769            0 :          howmany_n(2) = rows_per_th
     770            0 :          howmany_istride(1) = n2
     771              :          howmany_istride(2) = n1*n2
     772              :          !!! transposed Z axis on output
     773            0 :          howmany_ostride(1) = n2*n3
     774            0 :          howmany_ostride(2) = 1
     775              : 
     776              :          CALL fftw3_create_3d_plans(plan%fftw_plan_ny, plan%fftw_plan_ny_r, &
     777              :                                     dim_n, dim_istride, dim_ostride, &
     778              :                                     howmany_n, howmany_istride, howmany_ostride, &
     779              :                                     tmp, zin, &
     780              :                                     fft_direction, fftw_plan_type, rows_per_th, &
     781            0 :                                     rows_per_th_r)
     782              : 
     783              :          !!!! Plan for Z : M(n1,n2,n3) -> fftw(z) -> M(n1,n2,n3)
     784              :          CALL fftw3_compute_rows_per_th(n1, nt, rows_per_th, rows_per_th_r, &
     785            0 :                                         th_planA, th_planB)
     786            0 :          dim_n(1) = n3
     787              :          dim_istride(1) = 1
     788            0 :          dim_ostride(1) = 1          ! To transpose: n2*n1
     789            0 :          howmany_n(1) = n2
     790            0 :          howmany_n(2) = rows_per_th
     791            0 :          howmany_istride(1) = n3
     792            0 :          howmany_istride(2) = n2*n3
     793            0 :          howmany_ostride(1) = n3     ! To transpose: n1
     794            0 :          howmany_ostride(2) = n2*n3  ! To transpose: 1
     795              : 
     796              :          CALL fftw3_create_3d_plans(plan%fftw_plan_nz, plan%fftw_plan_nz_r, &
     797              :                                     dim_n, dim_istride, dim_ostride, &
     798              :                                     howmany_n, howmany_istride, howmany_ostride, &
     799              :                                     zin, tmp, &
     800              :                                     fft_direction, fftw_plan_type, rows_per_th, &
     801            0 :                                     rows_per_th_r)
     802              : 
     803            0 :          plan%separated_plans = .TRUE.
     804              : 
     805            0 :          DEALLOCATE (tmp)
     806              :       END IF
     807              : 
     808              : #else
     809              :       MARK_USED(plan)
     810              :       MARK_USED(plan_style)
     811              :       !MARK_USED does not work with assumed size arguments
     812              :       IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
     813              : #endif
     814              : 
     815        56556 :    END SUBROUTINE fftw3_create_plan_3d
     816              : 
     817              : ! **************************************************************************************************
     818              : 
     819              : ! **************************************************************************************************
     820              : !> \brief ...
     821              : !> \param plan ...
     822              : !> \param plan_r ...
     823              : !> \param split_dim ...
     824              : !> \param nt ...
     825              : !> \param tid ...
     826              : !> \param input ...
     827              : !> \param istride ...
     828              : !> \param output ...
     829              : !> \param ostride ...
     830              : ! **************************************************************************************************
     831            0 :    SUBROUTINE fftw3_workshare_execute_dft(plan, plan_r, split_dim, nt, tid, &
     832              :                                           input, istride, output, ostride)
     833              : 
     834              :       INTEGER, INTENT(IN)                           :: split_dim, nt, tid
     835              :       INTEGER, INTENT(IN)                           :: istride, ostride
     836              :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT) :: input, output
     837              :       TYPE(C_PTR)                                   :: plan, plan_r
     838              : #if defined(__FFTW3)
     839              :       INTEGER                                     :: i_off, o_off
     840              :       INTEGER                                     :: th_planA, th_planB
     841              :       INTEGER :: rows_per_thread, rows_per_thread_r
     842              : 
     843              :       CALL fftw3_compute_rows_per_th(split_dim, nt, rows_per_thread, &
     844              :                                      rows_per_thread_r, &
     845            0 :                                      th_planA, th_planB)
     846              : 
     847            0 :       IF (th_planB > 0) THEN
     848            0 :          IF (tid < th_planA) THEN
     849            0 :             i_off = (tid)*(istride*(rows_per_thread)) + 1
     850            0 :             o_off = (tid)*(ostride*(rows_per_thread)) + 1
     851            0 :             IF (rows_per_thread > 0) THEN
     852              :                CALL fftw_execute_dft(plan, input(i_off), &
     853            0 :                                      output(o_off))
     854              :             END IF
     855            0 :          ELSE IF ((tid - th_planA) < th_planB) THEN
     856              : 
     857              :             i_off = (th_planA)*istride*(rows_per_thread) + &
     858            0 :                     (tid - th_planA)*istride*(rows_per_thread_r) + 1
     859              :             o_off = (th_planA)*ostride*(rows_per_thread) + &
     860            0 :                     (tid - th_planA)*ostride*(rows_per_thread_r) + 1
     861              : 
     862              :             CALL fftw_execute_dft(plan_r, input(i_off), &
     863            0 :                                   output(o_off))
     864              :          END IF
     865              : 
     866              :       ELSE
     867            0 :          i_off = (tid)*(istride*(rows_per_thread)) + 1
     868            0 :          o_off = (tid)*(ostride*(rows_per_thread)) + 1
     869              : 
     870              :          CALL fftw_execute_dft(plan, input(i_off), &
     871            0 :                                output(o_off))
     872              : 
     873              :       END IF
     874              : #else
     875              :       MARK_USED(plan)
     876              :       MARK_USED(plan_r)
     877              :       MARK_USED(split_dim)
     878              :       MARK_USED(nt)
     879              :       MARK_USED(tid)
     880              :       MARK_USED(istride)
     881              :       MARK_USED(ostride)
     882              :       !MARK_USED does not work with assumed size arguments
     883              :       IF (.FALSE.) THEN; DO; IF (ABS(input(1)) > ABS(output(1))) EXIT; END DO; END IF
     884              : #endif
     885              : 
     886            0 :    END SUBROUTINE fftw3_workshare_execute_dft
     887              : 
     888              : ! **************************************************************************************************
     889              : 
     890              : ! **************************************************************************************************
     891              : !> \brief ...
     892              : !> \param plan ...
     893              : !> \param scale ...
     894              : !> \param zin ...
     895              : !> \param zout ...
     896              : !> \param stat ...
     897              : ! **************************************************************************************************
     898       650290 :    SUBROUTINE fftw33d(plan, scale, zin, zout, stat)
     899              : 
     900              :       TYPE(fft_plan_type), INTENT(IN)                      :: plan
     901              :       REAL(KIND=dp), INTENT(IN)                            :: scale
     902              :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT), TARGET:: zin
     903              :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT), TARGET:: zout
     904              :       INTEGER, INTENT(OUT)                                 :: stat
     905              : #if defined(__FFTW3)
     906       650290 :       COMPLEX(KIND=dp), POINTER                            :: xout(:)
     907       650290 :       COMPLEX(KIND=dp), ALLOCATABLE                        :: tmp1(:)
     908              :       INTEGER                                              :: n1, n2, n3
     909              :       INTEGER                                              :: tid, nt
     910              :       INTEGER                                              :: i, j, k
     911              : 
     912       650290 :       n1 = plan%n_3d(1)
     913       650290 :       n2 = plan%n_3d(2)
     914       650290 :       n3 = plan%n_3d(3)
     915              : 
     916       650290 :       stat = 1
     917              : 
     918              :       ! We use a POINTER to the output array to avoid duplicating code
     919       650290 :       IF (plan%fft_in_place) THEN
     920       650286 :          xout => zin(:n1*n2*n3)
     921              :       ELSE
     922            4 :          xout => zout(:n1*n2*n3)
     923              :       END IF
     924              : 
     925              :       ! Either compute the full 3D FFT using a multithreaded plan
     926       650290 :       IF (.NOT. plan%separated_plans) THEN
     927       650290 :          CALL fftw_execute_dft(plan%fftw_plan, zin, xout)
     928              :       ELSE
     929              :          ! Or use the 3 stage FFT scheme described in fftw3_create_plan_3d
     930            0 :          ALLOCATE (tmp1(n1*n2*n3))   ! Temporary vector used for transpositions
     931            0 :          !$OMP PARALLEL DEFAULT(NONE) PRIVATE(tid,nt,i,j,k) SHARED(zin,tmp1,n1,n2,n3,plan,xout)
     932              :          tid = 0
     933              :          nt = 1
     934              : 
     935              : !$       tid = omp_get_thread_num()
     936              : !$       nt = omp_get_num_threads()
     937              :          CALL fftw3_workshare_execute_dft(plan%fftw_plan_nx, plan%fftw_plan_nx_r, &
     938              :                                           n3, nt, tid, &
     939              :                                           zin, n1*n2, tmp1, n1*n2)
     940              : 
     941              :          !$OMP BARRIER
     942              :          CALL fftw3_workshare_execute_dft(plan%fftw_plan_ny, plan%fftw_plan_ny_r, &
     943              :                                           n3, nt, tid, &
     944              :                                           tmp1, n1*n2, xout, 1)
     945              :          !$OMP BARRIER
     946              :          CALL fftw3_workshare_execute_dft(plan%fftw_plan_nz, plan%fftw_plan_nz_r, &
     947              :                                           n1, nt, tid, &
     948              :                                           xout, n2*n3, tmp1, n2*n3)
     949              :          !$OMP BARRIER
     950              : 
     951              :          !$OMP DO COLLAPSE(3)
     952              :          DO i = 1, n1
     953              :             DO j = 1, n2
     954              :                DO k = 1, n3
     955              :                   xout((i - 1) + (j - 1)*n1 + (k - 1)*n1*n2 + 1) = &
     956              :                      tmp1((k - 1) + (j - 1)*n3 + (i - 1)*n3*n2 + 1)
     957              :                END DO
     958              :             END DO
     959              :          END DO
     960              :          !$OMP END DO
     961              : 
     962              :          !$OMP END PARALLEL
     963              :       END IF
     964              : 
     965       650290 :       IF (scale /= 1.0_dp) THEN
     966       308975 :          CALL zdscal(n1*n2*n3, scale, xout, 1)
     967              :       END IF
     968              : 
     969              : #else
     970              :       MARK_USED(plan)
     971              :       MARK_USED(scale)
     972              :       !MARK_USED does not work with assumed size arguments
     973              :       IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
     974              :       stat = 0
     975              : 
     976              : #endif
     977              : 
     978       650290 :    END SUBROUTINE fftw33d
     979              : 
     980              : ! **************************************************************************************************
     981              : 
     982              : ! **************************************************************************************************
     983              : !> \brief ...
     984              : !> \param plan ...
     985              : !> \param zin ...
     986              : !> \param zout ...
     987              : !> \param plan_style ...
     988              : ! **************************************************************************************************
     989       163752 :    SUBROUTINE fftw3_create_plan_1dm(plan, zin, zout, plan_style)
     990              :       TYPE(fft_plan_type), INTENT(INOUT)                 :: plan
     991              :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(IN)         :: zin
     992              :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(IN)         :: zout
     993              :       INTEGER, INTENT(IN)                                :: plan_style
     994              : #if defined(__FFTW3)
     995              :       INTEGER                                            :: istride, idist, ostride, odist, num_threads, num_rows
     996              : 
     997              :       INTEGER :: fftw_plan_type
     998       327288 :       SELECT CASE (plan_style)
     999              :       CASE (1)
    1000       163536 :          fftw_plan_type = FFTW_ESTIMATE
    1001              :       CASE (2)
    1002           48 :          fftw_plan_type = FFTW_MEASURE
    1003              :       CASE (3)
    1004          156 :          fftw_plan_type = FFTW_PATIENT
    1005              :       CASE (4)
    1006           12 :          fftw_plan_type = FFTW_EXHAUSTIVE
    1007              :       CASE DEFAULT
    1008       163752 :          CPABORT("fftw3_create_plan_1dm")
    1009              :       END SELECT
    1010              : 
    1011       163752 :       num_threads = 1
    1012       163752 :       plan%separated_plans = .FALSE.
    1013              : !$OMP PARALLEL DEFAULT(NONE), &
    1014       163752 : !$OMP          SHARED(NUM_THREADS)
    1015              : !$OMP MASTER
    1016              : !$    num_threads = omp_get_num_threads()
    1017              : !$OMP END MASTER
    1018              : !$OMP END PARALLEL
    1019              : 
    1020       163752 :       num_rows = plan%m/num_threads
    1021       163752 : !$    plan%num_threads_needed = num_threads
    1022              : 
    1023              : ! Check for number of rows less than num_threads
    1024       163752 : !$    IF (plan%m < num_threads) THEN
    1025            0 : !$       num_rows = 1
    1026            0 : !$       plan%num_threads_needed = plan%m
    1027              : !$    END IF
    1028              : 
    1029              : ! Check for total number of rows not divisible by num_threads
    1030       163752 : !$    IF (num_rows*plan%num_threads_needed /= plan%m) THEN
    1031            0 : !$       plan%need_alt_plan = .TRUE.
    1032              : !$    END IF
    1033              : 
    1034       163752 : !$    plan%num_rows = num_rows
    1035       163752 :       istride = 1
    1036       163752 :       idist = plan%n
    1037       163752 :       ostride = 1
    1038       163752 :       odist = plan%n
    1039       163752 :       IF (plan%fsign == +1 .AND. plan%trans) THEN
    1040        81864 :          istride = plan%m
    1041        81864 :          idist = 1
    1042        81888 :       ELSEIF (plan%fsign == -1 .AND. plan%trans) THEN
    1043        81864 :          ostride = plan%m
    1044        81864 :          odist = 1
    1045              :       END IF
    1046              : 
    1047       163752 :       IF (plan%fsign == +1) THEN
    1048              :          CALL dfftw_plan_many_dft(plan%fftw_plan, 1, plan%n, num_rows, zin, 0, istride, idist, &
    1049        81876 :                                   zout, 0, ostride, odist, FFTW_FORWARD, fftw_plan_type)
    1050              :       ELSE
    1051              :          CALL dfftw_plan_many_dft(plan%fftw_plan, 1, plan%n, num_rows, zin, 0, istride, idist, &
    1052        81876 :                                   zout, 0, ostride, odist, FFTW_BACKWARD, fftw_plan_type)
    1053              :       END IF
    1054              : 
    1055       163752 : !$    IF (plan%need_alt_plan) THEN
    1056            0 : !$       plan%alt_num_rows = plan%m - (plan%num_threads_needed - 1)*num_rows
    1057            0 : !$       IF (plan%fsign == +1) THEN
    1058              : !$          CALL dfftw_plan_many_dft(plan%alt_fftw_plan, 1, plan%n, plan%alt_num_rows, zin, 0, istride, idist, &
    1059            0 : !$                                   zout, 0, ostride, odist, FFTW_FORWARD, fftw_plan_type)
    1060              : !$       ELSE
    1061              : !$          CALL dfftw_plan_many_dft(plan%alt_fftw_plan, 1, plan%n, plan%alt_num_rows, zin, 0, istride, idist, &
    1062            0 : !$                                   zout, 0, ostride, odist, FFTW_BACKWARD, fftw_plan_type)
    1063              : !$       END IF
    1064              : !$    END IF
    1065              : 
    1066              : #else
    1067              :       MARK_USED(plan)
    1068              :       MARK_USED(plan_style)
    1069              :       !MARK_USED does not work with assumed size arguments
    1070              :       IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
    1071              : #endif
    1072              : 
    1073       163752 :    END SUBROUTINE fftw3_create_plan_1dm
    1074              : 
    1075              : ! **************************************************************************************************
    1076              : !> \brief ...
    1077              : !> \param plan ...
    1078              : ! **************************************************************************************************
    1079       220308 :    SUBROUTINE fftw3_destroy_plan(plan)
    1080              : 
    1081              :       TYPE(fft_plan_type), INTENT(INOUT)   :: plan
    1082              : 
    1083              : #if defined(__FFTW3)
    1084       220308 : !$    IF (plan%need_alt_plan) THEN
    1085            0 : !$       CALL fftw_destroy_plan(plan%alt_fftw_plan)
    1086              : !$    END IF
    1087              : 
    1088       220308 :       IF (.NOT. plan%separated_plans) THEN
    1089       220308 :          CALL fftw_destroy_plan(plan%fftw_plan)
    1090              :       ELSE
    1091              :          ! If it is a separated plan then we have to destroy
    1092              :          ! each dim plan individually
    1093            0 :          CALL fftw_destroy_plan(plan%fftw_plan_nx)
    1094            0 :          CALL fftw_destroy_plan(plan%fftw_plan_ny)
    1095            0 :          CALL fftw_destroy_plan(plan%fftw_plan_nz)
    1096            0 :          CALL fftw_destroy_plan(plan%fftw_plan_nx_r)
    1097            0 :          CALL fftw_destroy_plan(plan%fftw_plan_ny_r)
    1098            0 :          CALL fftw_destroy_plan(plan%fftw_plan_nz_r)
    1099              :       END IF
    1100              : 
    1101              : #else
    1102              :       MARK_USED(plan)
    1103              : #endif
    1104              : 
    1105       220308 :    END SUBROUTINE fftw3_destroy_plan
    1106              : 
    1107              : ! **************************************************************************************************
    1108              : !> \brief ...
    1109              : !> \param plan ...
    1110              : !> \param zin ...
    1111              : !> \param zout ...
    1112              : !> \param scale ...
    1113              : !> \param stat ...
    1114              : ! **************************************************************************************************
    1115      8462778 :    SUBROUTINE fftw31dm(plan, zin, zout, scale, stat)
    1116              :       TYPE(fft_plan_type), INTENT(IN)                    :: plan
    1117              :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(IN), TARGET :: zin
    1118              :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT), &
    1119              :          TARGET                                          :: zout
    1120              :       REAL(KIND=dp), INTENT(IN)                          :: scale
    1121              :       INTEGER, INTENT(OUT)                               :: stat
    1122              : 
    1123              :       INTEGER                                            :: in_offset, my_id, num_rows, out_offset, &
    1124              :                                                             scal_offset
    1125              :       TYPE(C_PTR)                                        :: fftw_plan
    1126              : 
    1127              : !------------------------------------------------------------------------------
    1128              : 
    1129      8462778 :       my_id = 0
    1130      8462778 :       num_rows = plan%m
    1131              : 
    1132              : !$OMP PARALLEL DEFAULT(NONE), &
    1133              : !$OMP          PRIVATE(my_id,num_rows,in_offset,out_offset,scal_offset,fftw_plan), &
    1134              : !$OMP          SHARED(zin,zout), &
    1135      8462778 : !$OMP          SHARED(plan,scale,stat)
    1136              : !$    my_id = omp_get_thread_num()
    1137              : 
    1138              : !$    if (my_id < plan%num_threads_needed) then
    1139              : 
    1140              :          fftw_plan = plan%fftw_plan
    1141              : 
    1142              :          in_offset = 1
    1143              :          out_offset = 1
    1144              :          scal_offset = 1
    1145              : 
    1146              : !$       in_offset = 1 + plan%num_rows*my_id*plan%n
    1147              : !$       out_offset = 1 + plan%num_rows*my_id*plan%n
    1148              : !$       IF (plan%fsign == +1 .AND. plan%trans) THEN
    1149              : !$          in_offset = 1 + plan%num_rows*my_id
    1150              : !$       ELSEIF (plan%fsign == -1 .AND. plan%trans) THEN
    1151              : !$          out_offset = 1 + plan%num_rows*my_id
    1152              : !$       END IF
    1153              : !$       scal_offset = 1 + plan%n*plan%num_rows*my_id
    1154              : !$       IF (plan%need_alt_plan .AND. my_id == plan%num_threads_needed - 1) THEN
    1155              : !$          num_rows = plan%alt_num_rows
    1156              : !$          fftw_plan = plan%alt_fftw_plan
    1157              : !$       ELSE
    1158              : !$          num_rows = plan%num_rows
    1159              : !$       END IF
    1160              : 
    1161              : #if defined(__FFTW3)
    1162              : !$OMP MASTER
    1163              :          stat = 1
    1164              : !$OMP END MASTER
    1165              :          CALL dfftw_execute_dft(fftw_plan, zin(in_offset:in_offset), zout(out_offset:out_offset))
    1166              : !$    end if
    1167              : ! all threads need to meet at this barrier
    1168              : !$OMP BARRIER
    1169              : !$    if (my_id < plan%num_threads_needed) then
    1170              :          IF (scale /= 1.0_dp) CALL zdscal(plan%n*num_rows, scale, zout(scal_offset:scal_offset), 1)
    1171              : !$    end if
    1172              : 
    1173              : #else
    1174              :       MARK_USED(plan)
    1175              :       MARK_USED(scale)
    1176              :       !MARK_USED does not work with assumed size arguments
    1177              :       IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
    1178              :       stat = 0
    1179              : 
    1180              : !$    else
    1181              : !$    end if
    1182              : 
    1183              : #endif
    1184              : 
    1185              : !$OMP END PARALLEL
    1186              : 
    1187      8462778 :       END SUBROUTINE fftw31dm
    1188              : 
    1189            0 :    END MODULE fftw3_lib
        

Generated by: LCOV version 2.0-1