LCOV - code coverage report
Current view: top level - src/pw/fft - fftw3_lib.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:fe90383) Lines: 239 339 70.5 %
Date: 2024-06-15 07:32:52 Functions: 15 25 60.0 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : MODULE fftw3_lib
       8             : 
       9             :    USE ISO_C_BINDING, ONLY: C_ASSOCIATED, &
      10             :                             C_CHAR, &
      11             :                             C_DOUBLE, &
      12             :                             C_DOUBLE_COMPLEX, &
      13             :                             C_INT, &
      14             :                             C_PTR
      15             : #if defined(__FFTW3)
      16             :    USE ISO_C_BINDING, ONLY: &
      17             :       C_FLOAT, &
      18             :       C_FLOAT_COMPLEX, &
      19             :       C_FUNPTR, &
      20             :       C_INT32_T, &
      21             :       C_INTPTR_T, &
      22             :       C_LOC, &
      23             :       C_NULL_CHAR, &
      24             :       C_SIZE_T, C_F_POINTER
      25             : #endif
      26             :    USE cp_files, ONLY: get_unit_number
      27             :    USE fft_kinds, ONLY: dp
      28             :    USE fft_plan, ONLY: fft_plan_type
      29             : 
      30             : !$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads
      31             : 
      32             : #include "../../base/base_uses.f90"
      33             : 
      34             :    IMPLICIT NONE
      35             :    PRIVATE
      36             : 
      37             :    PUBLIC :: fftw3_do_init, fftw3_do_cleanup, fftw3_get_lengths, fftw33d, fftw31dm
      38             :    PUBLIC :: fftw3_destroy_plan, fftw3_create_plan_1dm, fftw3_create_plan_3d
      39             :    PUBLIC :: fftw_alloc, fftw_dealloc
      40             : 
      41             : #if defined ( __FFTW3 )
      42             : #if defined ( __FFTW3_MKL )
      43             : #include "fftw/fftw3.f03"
      44             : #else
      45             : #include "fftw3.f03"
      46             : #endif
      47             : #endif
      48             : 
      49             :    INTERFACE fftw_alloc
      50             :       MODULE PROCEDURE :: fftw_alloc_complex_1d
      51             :       MODULE PROCEDURE :: fftw_alloc_complex_2d
      52             :       MODULE PROCEDURE :: fftw_alloc_complex_3d
      53             :    END INTERFACE
      54             : 
      55             :    INTERFACE fftw_dealloc
      56             :       MODULE PROCEDURE :: fftw_dealloc_complex_1d
      57             :       MODULE PROCEDURE :: fftw_dealloc_complex_2d
      58             :       MODULE PROCEDURE :: fftw_dealloc_complex_3d
      59             :    END INTERFACE
      60             : 
      61             : CONTAINS
      62             : 
      63             :    #:set maxdim = 3
      64             :    #:for dim in range(1, maxdim+1)
      65             : ! Concatenate the components of the dimensions passed to this function to use it if FFTW3 is not used
      66             :       #:set dim_extended = ", ".join(["n("+str(i)+")" for i in range(1, dim+1)])
      67       92421 :       SUBROUTINE fftw_alloc_complex_${dim}$d(array, n)
      68             :          COMPLEX(C_DOUBLE_COMPLEX), DIMENSION(:${", :"*(dim-1)}$), CONTIGUOUS, POINTER, INTENT(OUT) :: array
      69             :          INTEGER, DIMENSION(${dim}$), INTENT(IN) :: n
      70             : 
      71             : #if defined(__FFTW3)
      72             :          TYPE(C_PTR) :: data_ptr
      73      322002 :          data_ptr = fftw_alloc_complex(INT(PRODUCT(n), KIND=C_SIZE_T))
      74      322002 :          CALL C_F_POINTER(data_ptr, array, n)
      75             : #else
      76             : ! Just allocate the array
      77             :          ALLOCATE (array(${dim_extended}$))
      78             : #endif
      79             : 
      80       92421 :       END SUBROUTINE fftw_alloc_complex_${dim}$d
      81             : 
      82       92421 :       SUBROUTINE fftw_dealloc_complex_${dim}$d(array)
      83             :          COMPLEX(C_DOUBLE_COMPLEX), DIMENSION(:${", :"*(dim-1)}$), CONTIGUOUS, POINTER, INTENT(INOUT) :: array
      84             : 
      85             : #if defined(__FFTW3)
      86       92421 :          CALL fftw_free(C_LOC(array))
      87       92421 :          NULLIFY (array)
      88             : #else
      89             : ! Just deallocate the array
      90             :          DEALLOCATE (array)
      91             : #endif
      92             : 
      93       92421 :       END SUBROUTINE fftw_dealloc_complex_${dim}$d
      94             :    #:endfor
      95             : 
      96             : #if defined ( __FFTW3 )
      97             : ! **************************************************************************************************
      98             : !> \brief A workaround that allows us to compile with -Werror=unused-parameter
      99             : ! **************************************************************************************************
     100           0 :    SUBROUTINE dummy_routine_to_call_mark_used()
     101             :       MARK_USED(FFTW_R2HC)
     102             :       MARK_USED(FFTW_HC2R)
     103             :       MARK_USED(FFTW_DHT)
     104             :       MARK_USED(FFTW_REDFT00)
     105             :       MARK_USED(FFTW_REDFT01)
     106             :       MARK_USED(FFTW_REDFT10)
     107             :       MARK_USED(FFTW_REDFT11)
     108             :       MARK_USED(FFTW_RODFT00)
     109             :       MARK_USED(FFTW_RODFT01)
     110             :       MARK_USED(FFTW_RODFT10)
     111             :       MARK_USED(FFTW_RODFT11)
     112             :       MARK_USED(FFTW_FORWARD)
     113             :       MARK_USED(FFTW_BACKWARD)
     114             :       MARK_USED(FFTW_MEASURE)
     115             :       MARK_USED(FFTW_DESTROY_INPUT)
     116             :       MARK_USED(FFTW_UNALIGNED)
     117             :       MARK_USED(FFTW_CONSERVE_MEMORY)
     118             :       MARK_USED(FFTW_EXHAUSTIVE)
     119             :       MARK_USED(FFTW_PRESERVE_INPUT)
     120             :       MARK_USED(FFTW_PATIENT)
     121             :       MARK_USED(FFTW_ESTIMATE)
     122             :       MARK_USED(FFTW_WISDOM_ONLY)
     123             :       MARK_USED(FFTW_ESTIMATE_PATIENT)
     124             :       MARK_USED(FFTW_BELIEVE_PCOST)
     125             :       MARK_USED(FFTW_NO_DFT_R2HC)
     126             :       MARK_USED(FFTW_NO_NONTHREADED)
     127             :       MARK_USED(FFTW_NO_BUFFERING)
     128             :       MARK_USED(FFTW_NO_INDIRECT_OP)
     129             :       MARK_USED(FFTW_ALLOW_LARGE_GENERIC)
     130             :       MARK_USED(FFTW_NO_RANK_SPLITS)
     131             :       MARK_USED(FFTW_NO_VRANK_SPLITS)
     132             :       MARK_USED(FFTW_NO_VRECURSE)
     133             :       MARK_USED(FFTW_NO_SIMD)
     134             :       MARK_USED(FFTW_NO_SLOW)
     135             :       MARK_USED(FFTW_NO_FIXED_RADIX_LARGE_N)
     136             :       MARK_USED(FFTW_ALLOW_PRUNING)
     137           0 :    END SUBROUTINE
     138             : #endif
     139             : 
     140             : ! **************************************************************************************************
     141             : !> \brief ...
     142             : !> \param wisdom_file ...
     143             : !> \param ionode ...
     144             : ! **************************************************************************************************
     145        8785 :    SUBROUTINE fftw3_do_cleanup(wisdom_file, ionode)
     146             : 
     147             :       CHARACTER(LEN=*), INTENT(IN)             :: wisdom_file
     148             :       LOGICAL                                  :: ionode
     149             : 
     150             : #if defined ( __FFTW3 )
     151             :       INTEGER                                  :: iunit, istat
     152             :       INTEGER(KIND=C_INT)                      :: isuccess
     153             :       ! Write out FFTW3 wisdom to file (if we can)
     154             :       ! only the ionode updates the wisdom
     155        8785 :       IF (ionode) THEN
     156        4496 :          iunit = get_unit_number()
     157        4496 :          OPEN (UNIT=iunit, FILE=wisdom_file, STATUS="UNKNOWN", FORM="FORMATTED", ACTION="WRITE", IOSTAT=istat)
     158        4496 :          IF (istat == 0) THEN
     159           0 :             isuccess = fftw_export_wisdom_to_filename(wisdom_file//C_NULL_CHAR)
     160           0 :             IF (isuccess == 0) &
     161           0 :                CPABORT("Error exporting wisdom to file "//wisdom_file)
     162           0 :             CLOSE (iunit)
     163             :          END IF
     164             :       END IF
     165             : 
     166        8785 :       CALL fftw_cleanup()
     167             : #else
     168             :       MARK_USED(wisdom_file)
     169             :       MARK_USED(ionode)
     170             : #endif
     171             : 
     172        8785 :    END SUBROUTINE
     173             : 
     174             : ! **************************************************************************************************
     175             : !> \brief ...
     176             : !> \param wisdom_file ...
     177             : ! **************************************************************************************************
     178        8995 :    SUBROUTINE fftw3_do_init(wisdom_file)
     179             : 
     180             :       CHARACTER(LEN=*), INTENT(IN)             :: wisdom_file
     181             : 
     182             : #if defined ( __FFTW3 )
     183             :       INTEGER                                  :: istat, iunit
     184             :       INTEGER(KIND=C_INT) :: isuccess
     185             :       LOGICAL                                  :: exist
     186             : 
     187             : #if defined(__MKL)
     188             : !$    LOGICAL                                  :: mkl_is_safe
     189             : #endif
     190             : 
     191             : ! If using the Intel compiler then we need to declare
     192             : ! a C interface to a global variable in MKL that sets
     193             : ! the number of threads which can concurrently execute
     194             : ! an FFT
     195             : ! We need __INTEL_COMPILER so we can be sure that the compiler
     196             : ! understands the !DEC$ version definitions
     197             : #if defined (__INTEL_COMPILER) && defined (__MKL)
     198             : !$    include "mkl.fi"
     199             : !DEC$ IF DEFINED (INTEL_MKL_VERSION)
     200             : !DEC$ IF INTEL_MKL_VERSION .EQ. 110100
     201             : !DIR$ ATTRIBUTES ALIGN : 8 :: fftw3_mkl
     202             : !$    COMMON/fftw3_mkl/ignore(4), mkl_dft_number_of_user_threads, ignore2(7)
     203             : !$    INTEGER*4 :: ignore, mkl_dft_number_of_user_threads, ignore2
     204             : !$    BIND(c) :: /fftw3_mkl/
     205             : !DEC$ ENDIF
     206             : !DEC$ ENDIF
     207             : #elif defined (__MKL)
     208             : ! Preprocessing is enabled by default, and below header is not language specific
     209             : #include <mkl_version.h>
     210             : #endif
     211             : 
     212             :       ! Read FFTW wisdom (if available)
     213             :       ! all nodes are opening the file here...
     214        8995 :       INQUIRE (FILE=wisdom_file, exist=exist)
     215        8995 :       IF (exist) THEN
     216           0 :          iunit = get_unit_number()
     217             :          OPEN (UNIT=iunit, FILE=wisdom_file, STATUS="OLD", FORM="FORMATTED", POSITION="REWIND", &
     218           0 :                ACTION="READ", IOSTAT=istat)
     219           0 :          IF (istat == 0) THEN
     220           0 :             isuccess = fftw_import_wisdom_from_filename(wisdom_file//C_NULL_CHAR)
     221           0 :             IF (isuccess == 0) &
     222           0 :                CPABORT("Error importing wisdom from file "//wisdom_file)
     223             :             ! write(*,*) "FFTW3 import wisdom from file ....",MERGE((/"OK    "/),(/"NOT OK"/),(/isuccess==1/))
     224           0 :             CLOSE (iunit)
     225             :          END IF
     226             :       END IF
     227             : 
     228             : #if defined (__MKL)
     229             :       ! Now check if we have a real FFTW3 library, or are using MKL wrappers
     230             : 
     231             : !$    IF (fftw3_is_mkl_wrapper() .and. omp_get_max_threads() .gt. 1) THEN
     232             : ! If we are not using the Intel compiler, there is no way to tell which
     233             : ! MKL version is in use, so fail safe...
     234             : !$       mkl_is_safe = .FALSE.
     235             : #if defined(INTEL_MKL_VERSION) && (110100 < INTEL_MKL_VERSION)
     236             : !$       mkl_is_safe = .TRUE.
     237             : #elif defined (__INTEL_COMPILER)
     238             : ! If we have an Intel compiler (__INTEL_COMPILER is defined) then check the
     239             : ! MKL version and make the appropriate action
     240             : !DEC$ IF DEFINED (INTEL_MKL_VERSION)
     241             : !DEC$ IF INTEL_MKL_VERSION .EQ. 110100
     242             : !$       mkl_dft_number_of_user_threads = omp_get_max_threads()
     243             : !DEC$ ENDIF
     244             : !DEC$ IF INTEL_MKL_VERSION .GE. 110100
     245             : !$       mkl_is_safe = .TRUE.
     246             : !DEC$ ENDIF
     247             : !DEC$ ENDIF
     248             : #endif
     249             : !$       IF (.NOT. mkl_is_safe) THEN
     250             : !$          STOP "Intel's FFTW3 interface to MKL is not "// &
     251             : !$             "thread-safe prior to MKL 11.1.0!  Please "// &
     252             : !$             "rebuild CP2K, linking against FFTW 3 from "// &
     253             : !$             "www.fftw.org or a newer version of MKL. "// &
     254             : !$             "Now exiting..."
     255             : !$       END IF
     256             : !$    END IF
     257             : #endif
     258             : #else
     259             :       MARK_USED(wisdom_file)
     260             : #endif
     261             : 
     262        8995 :    END SUBROUTINE
     263             : 
     264             : ! **************************************************************************************************
     265             : !> \brief ...
     266             : !> \param DATA ...
     267             : !> \param max_length ...
     268             : !> \par History
     269             : !>      JGH 23-Jan-2006 : initial version
     270             : !>      Adapted for new interface
     271             : !>      IAB 09-Jan-2009 : Modified to cache plans in fft_plan_type
     272             : !>                        (c) The Numerical Algorithms Group (NAG) Ltd, 2009 on behalf of the HECToR project
     273             : !>      IAB 09-Oct-2009 : Added OpenMP directives to 1D FFT, and planning routines
     274             : !>                        (c) The Numerical Algorithms Group (NAG) Ltd, 2009 on behalf of the HECToR project
     275             : !>      IAB 11-Sep-2012 : OpenMP parallel 3D FFT (Ruyman Reyes, PRACE)
     276             : !> \author JGH
     277             : ! **************************************************************************************************
     278         504 :    SUBROUTINE fftw3_get_lengths(DATA, max_length)
     279             : 
     280             :       INTEGER, DIMENSION(*)                              :: DATA
     281             :       INTEGER, INTENT(INOUT)                             :: max_length
     282             : 
     283             :       INTEGER                                            :: h, i, j, k, m, maxn, maxn_elevens, &
     284             :                                                             maxn_fives, maxn_sevens, &
     285             :                                                             maxn_thirteens, maxn_threes, &
     286             :                                                             maxn_twos, ndata, nmax, number
     287         504 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: dlocal, idx
     288             : 
     289             : !------------------------------------------------------------------------------
     290             : ! compute ndata
     291             : !! FFTW can do arbitrary(?) lengths, maybe you want to limit them to some
     292             : !!    powers of small prime numbers though...
     293             : 
     294         504 :       maxn_twos = 15
     295         504 :       maxn_threes = 3
     296         504 :       maxn_fives = 2
     297         504 :       maxn_sevens = 1
     298         504 :       maxn_elevens = 1
     299         504 :       maxn_thirteens = 0
     300         504 :       maxn = 37748736
     301             : 
     302         504 :       ndata = 0
     303        8568 :       DO h = 0, maxn_twos
     304        8064 :          nmax = HUGE(0)/2**h
     305       40824 :          DO i = 0, maxn_threes
     306      137088 :             DO j = 0, maxn_fives
     307      322560 :                DO k = 0, maxn_sevens
     308      677376 :                   DO m = 0, maxn_elevens
     309      387072 :                      number = (3**i)*(5**j)*(7**k)*(11**m)
     310             : 
     311      387072 :                      IF (number > nmax) CYCLE
     312             : 
     313      387072 :                      number = number*2**h
     314      387072 :                      IF (number >= maxn) CYCLE
     315             : 
     316      580608 :                      ndata = ndata + 1
     317             :                   END DO
     318             :                END DO
     319             :             END DO
     320             :          END DO
     321             :       END DO
     322             : 
     323        2016 :       ALLOCATE (dlocal(ndata), idx(ndata))
     324             : 
     325         504 :       ndata = 0
     326      371448 :       dlocal(:) = 0
     327        8568 :       DO h = 0, maxn_twos
     328        8064 :          nmax = HUGE(0)/2**h
     329       40824 :          DO i = 0, maxn_threes
     330      137088 :             DO j = 0, maxn_fives
     331      322560 :                DO k = 0, maxn_sevens
     332      677376 :                   DO m = 0, maxn_elevens
     333      387072 :                      number = (3**i)*(5**j)*(7**k)*(11**m)
     334             : 
     335      387072 :                      IF (number > nmax) CYCLE
     336             : 
     337      387072 :                      number = number*2**h
     338      387072 :                      IF (number >= maxn) CYCLE
     339             : 
     340      370944 :                      ndata = ndata + 1
     341      580608 :                      dlocal(ndata) = number
     342             :                   END DO
     343             :                END DO
     344             :             END DO
     345             :          END DO
     346             :       END DO
     347             : 
     348         504 :       CALL sortint(dlocal, ndata, idx)
     349         504 :       ndata = MIN(ndata, max_length)
     350      371448 :       DATA(1:ndata) = dlocal(1:ndata)
     351         504 :       max_length = ndata
     352             : 
     353         504 :       DEALLOCATE (dlocal, idx)
     354             : 
     355         504 :    END SUBROUTINE fftw3_get_lengths
     356             : 
     357             : ! **************************************************************************************************
     358             : !> \brief ...
     359             : !> \param iarr ...
     360             : !> \param n ...
     361             : !> \param index ...
     362             : ! **************************************************************************************************
     363         504 :    SUBROUTINE sortint(iarr, n, index)
     364             : 
     365             :       INTEGER, INTENT(IN)                                :: n
     366             :       INTEGER, INTENT(INOUT)                             :: iarr(1:n)
     367             :       INTEGER, INTENT(OUT)                               :: INDEX(1:n)
     368             : 
     369             :       INTEGER, PARAMETER                                 :: m = 7, nstack = 50
     370             : 
     371             :       INTEGER                                            :: a, i, ib, ir, istack(1:nstack), itemp, &
     372             :                                                             j, jstack, k, l, temp
     373             : 
     374             : !------------------------------------------------------------------------------
     375             : 
     376      371448 :       DO i = 1, n
     377      371448 :          INDEX(i) = i
     378             :       END DO
     379             :       jstack = 0
     380             :       l = 1
     381             :       ir = n
     382             :       DO WHILE (.TRUE.)
     383      139608 :       IF (ir - l < m) THEN
     384      301392 :          DO j = l + 1, ir
     385      231336 :             a = iarr(j)
     386      231336 :             ib = INDEX(j)
     387      549360 :             DO i = j - 1, 0, -1
     388      549360 :                IF (i == 0) EXIT
     389      548352 :                IF (iarr(i) <= a) EXIT
     390      318024 :                iarr(i + 1) = iarr(i)
     391      549360 :                INDEX(i + 1) = INDEX(i)
     392             :             END DO
     393      231336 :             iarr(i + 1) = a
     394      301392 :             INDEX(i + 1) = ib
     395             :          END DO
     396       70056 :          IF (jstack == 0) RETURN
     397       69552 :          ir = istack(jstack)
     398       69552 :          l = istack(jstack - 1)
     399       69552 :          jstack = jstack - 2
     400             :       ELSE
     401       69552 :          k = (l + ir)/2
     402       69552 :          temp = iarr(k)
     403       69552 :          iarr(k) = iarr(l + 1)
     404       69552 :          iarr(l + 1) = temp
     405       69552 :          itemp = INDEX(k)
     406       69552 :          INDEX(k) = INDEX(l + 1)
     407       69552 :          INDEX(l + 1) = itemp
     408       69552 :          IF (iarr(l + 1) > iarr(ir)) THEN
     409       32760 :             temp = iarr(l + 1)
     410       32760 :             iarr(l + 1) = iarr(ir)
     411       32760 :             iarr(ir) = temp
     412       32760 :             itemp = INDEX(l + 1)
     413       32760 :             INDEX(l + 1) = INDEX(ir)
     414       32760 :             INDEX(ir) = itemp
     415             :          END IF
     416       69552 :          IF (iarr(l) > iarr(ir)) THEN
     417       26208 :             temp = iarr(l)
     418       26208 :             iarr(l) = iarr(ir)
     419       26208 :             iarr(ir) = temp
     420       26208 :             itemp = INDEX(l)
     421       26208 :             INDEX(l) = INDEX(ir)
     422       26208 :             INDEX(ir) = itemp
     423             :          END IF
     424       69552 :          IF (iarr(l + 1) > iarr(l)) THEN
     425       26208 :             temp = iarr(l + 1)
     426       26208 :             iarr(l + 1) = iarr(l)
     427       26208 :             iarr(l) = temp
     428       26208 :             itemp = INDEX(l + 1)
     429       26208 :             INDEX(l + 1) = INDEX(l)
     430       26208 :             INDEX(l) = itemp
     431             :          END IF
     432       69552 :          i = l + 1
     433       69552 :          j = ir
     434       69552 :          a = iarr(l)
     435       69552 :          ib = INDEX(l)
     436      443016 :          DO WHILE (.TRUE.)
     437      512568 :             i = i + 1
     438     1663704 :             DO WHILE (iarr(i) < a)
     439     1151136 :                i = i + 1
     440             :             END DO
     441      512568 :             j = j - 1
     442     1270080 :             DO WHILE (iarr(j) > a)
     443      757512 :                j = j - 1
     444             :             END DO
     445      512568 :             IF (j < i) EXIT
     446      443016 :             temp = iarr(i)
     447      443016 :             iarr(i) = iarr(j)
     448      443016 :             iarr(j) = temp
     449      443016 :             itemp = INDEX(i)
     450      443016 :             INDEX(i) = INDEX(j)
     451      443016 :             INDEX(j) = itemp
     452             :          END DO
     453       69552 :          iarr(l) = iarr(j)
     454       69552 :          iarr(j) = a
     455       69552 :          INDEX(l) = INDEX(j)
     456       69552 :          INDEX(j) = ib
     457       69552 :          jstack = jstack + 2
     458       69552 :          IF (jstack > nstack) CPABORT(" Nstack too small in sortr")
     459       69552 :          IF (ir - i + 1 >= j - l) THEN
     460       35784 :             istack(jstack) = ir
     461       35784 :             istack(jstack - 1) = i
     462       35784 :             ir = j - 1
     463             :          ELSE
     464       33768 :             istack(jstack) = j - 1
     465       33768 :             istack(jstack - 1) = l
     466       33768 :             l = i
     467             :          END IF
     468             :       END IF
     469             : 
     470             :       END DO
     471             : 
     472             :    END SUBROUTINE sortint
     473             : 
     474             : ! **************************************************************************************************
     475             : 
     476             : ! **************************************************************************************************
     477             : !> \brief ...
     478             : !> \param plan ...
     479             : !> \param fft_rank ...
     480             : !> \param dim_n ...
     481             : !> \param dim_istride ...
     482             : !> \param dim_ostride ...
     483             : !> \param hm_rank ...
     484             : !> \param hm_n ...
     485             : !> \param hm_istride ...
     486             : !> \param hm_ostride ...
     487             : !> \param zin ...
     488             : !> \param zout ...
     489             : !> \param fft_direction ...
     490             : !> \param fftw_plan_type ...
     491             : !> \param valid ...
     492             : ! **************************************************************************************************
     493       40940 :    SUBROUTINE fftw3_create_guru_plan(plan, fft_rank, dim_n, &
     494             :                                      dim_istride, dim_ostride, hm_rank, &
     495             :                                      hm_n, hm_istride, hm_ostride, &
     496             :                                      zin, zout, fft_direction, fftw_plan_type, &
     497             :                                      valid)
     498             : 
     499             :       IMPLICIT NONE
     500             : 
     501             :       TYPE(C_PTR), INTENT(INOUT)                         :: plan
     502             :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT)      :: zin, zout
     503             :       INTEGER, INTENT(IN) :: dim_n(2), dim_istride(2), dim_ostride(2), &
     504             :                              hm_n(2), hm_istride(2), hm_ostride(2), fft_rank, &
     505             :                              fft_direction, fftw_plan_type, hm_rank
     506             :       LOGICAL, INTENT(OUT)                               :: valid
     507             : 
     508             : #if defined (__FFTW3)
     509             :       TYPE(fftw_iodim) :: dim(2), hm(2)
     510             :       INTEGER :: i
     511             : 
     512      122820 :       DO i = 1, 2
     513       81880 :          DIM(i) = fftw_iodim(dim_n(i), dim_istride(i), dim_ostride(i))
     514      122820 :          hm(i) = fftw_iodim(hm_n(i), hm_istride(i), hm_ostride(i))
     515             :       END DO
     516             : 
     517             :       plan = fftw_plan_guru_dft(fft_rank, &
     518             :                                 dim, hm_rank, hm, &
     519             :                                 zin, zout, &
     520       40940 :                                 fft_direction, fftw_plan_type)
     521             : 
     522       40940 :       valid = C_ASSOCIATED(plan)
     523             : 
     524             : #else
     525             :       MARK_USED(plan)
     526             :       MARK_USED(fft_rank)
     527             :       MARK_USED(dim_n)
     528             :       MARK_USED(dim_istride)
     529             :       MARK_USED(dim_ostride)
     530             :       MARK_USED(hm_rank)
     531             :       MARK_USED(hm_n)
     532             :       MARK_USED(hm_istride)
     533             :       MARK_USED(hm_ostride)
     534             :       MARK_USED(fft_direction)
     535             :       MARK_USED(fftw_plan_type)
     536             :       !MARK_USED does not work with assumed size arguments
     537             :       IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
     538             :       valid = .FALSE.
     539             : 
     540             : #endif
     541             : 
     542       40940 :    END SUBROUTINE
     543             : 
     544             : ! **************************************************************************************************
     545             : 
     546             : ! **************************************************************************************************
     547             : !> \brief ...
     548             : !> \return ...
     549             : ! **************************************************************************************************
     550       40940 :    FUNCTION fftw3_is_mkl_wrapper() RESULT(is_mkl)
     551             : 
     552             :       IMPLICIT NONE
     553             : 
     554             :       LOGICAL :: is_mkl
     555             : #if defined ( __FFTW3 )
     556             :       LOGICAL :: guru_supported
     557             :       INTEGER :: dim_n(2), dim_istride(2), dim_ostride(2), &
     558             :                  howmany_n(2), howmany_istride(2), howmany_ostride(2)
     559             :       TYPE(C_PTR)                          :: test_plan
     560             :       COMPLEX(KIND=dp), DIMENSION(1, 1, 1) :: zin
     561             : 
     562             :       ! Attempt to create a plan with the guru interface for a 2d sub-space
     563             :       ! If this fails (e.g. for MKL's FFTW3 interface), fall back to the
     564             :       ! FFTW3 threaded 3D transform instead of the hand-optimised version
     565       40940 :       dim_n(1) = 1
     566       40940 :       dim_istride(1) = 1
     567       40940 :       dim_ostride(1) = 1
     568       40940 :       howmany_n(1) = 1
     569       40940 :       howmany_n(2) = 1
     570       40940 :       howmany_istride(1) = 1
     571       40940 :       howmany_istride(2) = 1
     572       40940 :       howmany_ostride(1) = 1
     573       40940 :       howmany_ostride(2) = 1
     574       40940 :       zin = CMPLX(0.0_dp, 0.0_dp, KIND=dp)
     575             :       CALL fftw3_create_guru_plan(test_plan, 1, &
     576             :                                   dim_n, dim_istride, dim_ostride, &
     577             :                                   2, howmany_n, howmany_istride, howmany_ostride, &
     578             :                                   zin, zin, &
     579       40940 :                                   FFTW_FORWARD, FFTW_ESTIMATE, guru_supported)
     580       40940 :       IF (guru_supported) THEN
     581       40940 :          CALL fftw_destroy_plan(test_plan)
     582       40940 :          is_mkl = .FALSE.
     583             :       ELSE
     584             :          is_mkl = .TRUE.
     585             :       END IF
     586             : 
     587             : #else
     588             :       is_mkl = .FALSE.
     589             : #endif
     590             : 
     591       40940 :    END FUNCTION
     592             : 
     593             : ! **************************************************************************************************
     594             : 
     595             : ! **************************************************************************************************
     596             : !> \brief ...
     597             : !> \param nrows ...
     598             : !> \param nt ...
     599             : !> \param rows_per_thread ...
     600             : !> \param rows_per_thread_r ...
     601             : !> \param th_planA ...
     602             : !> \param th_planB ...
     603             : ! **************************************************************************************************
     604           0 :    SUBROUTINE fftw3_compute_rows_per_th(nrows, nt, rows_per_thread, rows_per_thread_r, &
     605             :                                         th_planA, th_planB)
     606             : 
     607             :       INTEGER, INTENT(IN)                                :: nrows, nt
     608             :       INTEGER, INTENT(OUT)                               :: rows_per_thread, rows_per_thread_r, &
     609             :                                                             th_planA, th_planB
     610             : 
     611           0 :       IF (MOD(nrows, nt) .EQ. 0) THEN
     612           0 :          rows_per_thread = nrows/nt
     613           0 :          rows_per_thread_r = 0
     614           0 :          th_planA = nt
     615           0 :          th_planB = 0
     616             :       ELSE
     617           0 :          rows_per_thread = nrows/nt + 1
     618           0 :          rows_per_thread_r = nrows/nt
     619           0 :          th_planA = MOD(nrows, nt)
     620           0 :          th_planB = nt - th_planA
     621             :       END IF
     622             : 
     623           0 :    END SUBROUTINE
     624             : 
     625             : ! **************************************************************************************************
     626             : 
     627             : ! **************************************************************************************************
     628             : !> \brief ...
     629             : !> \param plan ...
     630             : !> \param plan_r ...
     631             : !> \param dim_n ...
     632             : !> \param dim_istride ...
     633             : !> \param dim_ostride ...
     634             : !> \param hm_n ...
     635             : !> \param hm_istride ...
     636             : !> \param hm_ostride ...
     637             : !> \param input ...
     638             : !> \param output ...
     639             : !> \param fft_direction ...
     640             : !> \param fftw_plan_type ...
     641             : !> \param rows_per_th ...
     642             : !> \param rows_per_th_r ...
     643             : ! **************************************************************************************************
     644           0 :    SUBROUTINE fftw3_create_3d_plans(plan, plan_r, dim_n, dim_istride, dim_ostride, &
     645             :                                     hm_n, hm_istride, hm_ostride, &
     646             :                                     input, output, &
     647             :                                     fft_direction, fftw_plan_type, rows_per_th, &
     648             :                                     rows_per_th_r)
     649             : 
     650             :       TYPE(C_PTR), INTENT(INOUT)                         :: plan, plan_r
     651             :       INTEGER, INTENT(INOUT)                             :: dim_n(2), dim_istride(2), &
     652             :                                                             dim_ostride(2), hm_n(2), &
     653             :                                                             hm_istride(2), hm_ostride(2)
     654             :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT)      :: input, output
     655             :       INTEGER, INTENT(INOUT)                             :: fft_direction, fftw_plan_type
     656             :       INTEGER, INTENT(IN)                                :: rows_per_th, rows_per_th_r
     657             : 
     658             :       LOGICAL                                            :: valid
     659             : 
     660             : ! First plans will have an additional row
     661             : 
     662           0 :       hm_n(2) = rows_per_th
     663             :       CALL fftw3_create_guru_plan(plan, 1, &
     664             :                                   dim_n, dim_istride, dim_ostride, &
     665             :                                   2, hm_n, hm_istride, hm_ostride, &
     666             :                                   input, output, &
     667           0 :                                   fft_direction, fftw_plan_type, valid)
     668             : 
     669           0 :       IF (.NOT. valid) THEN
     670           0 :          CPABORT("fftw3_create_plan")
     671             :       END IF
     672             : 
     673             :       !!!! Remainder
     674           0 :       hm_n(2) = rows_per_th_r
     675             :       CALL fftw3_create_guru_plan(plan_r, 1, &
     676             :                                   dim_n, dim_istride, dim_ostride, &
     677             :                                   2, hm_n, hm_istride, hm_ostride, &
     678             :                                   input, output, &
     679           0 :                                   fft_direction, fftw_plan_type, valid)
     680           0 :       IF (.NOT. valid) THEN
     681           0 :          CPABORT("fftw3_create_plan (remaining)")
     682             :       END IF
     683             : 
     684           0 :    END SUBROUTINE
     685             : 
     686             : ! **************************************************************************************************
     687             : 
     688             : ! **************************************************************************************************
     689             : !> \brief ...
     690             : !> \param plan ...
     691             : !> \param zin ...
     692             : !> \param zout ...
     693             : !> \param plan_style ...
     694             : ! **************************************************************************************************
     695       40940 :    SUBROUTINE fftw3_create_plan_3d(plan, zin, zout, plan_style)
     696             : 
     697             :       TYPE(fft_plan_type), INTENT(INOUT)              :: plan
     698             :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT)      :: zin
     699             :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT)      :: zout
     700             :       INTEGER                                            :: plan_style
     701             : #if defined ( __FFTW3 )
     702             :       INTEGER                                            :: n1, n2, n3
     703             :       INTEGER                                            :: nt
     704             :       INTEGER                                            :: rows_per_th
     705             :       INTEGER                                            :: rows_per_th_r
     706             :       INTEGER                                            :: fft_direction
     707             :       INTEGER                                            :: th_planA, th_planB
     708       40940 :       COMPLEX(KIND=dp), ALLOCATABLE                      :: tmp(:)
     709             : 
     710             :       ! GURU Interface
     711             :       INTEGER :: dim_n(2), dim_istride(2), dim_ostride(2), &
     712             :                  howmany_n(2), howmany_istride(2), howmany_ostride(2)
     713             : 
     714             :       INTEGER :: fftw_plan_type
     715       81856 :       SELECT CASE (plan_style)
     716             :       CASE (1)
     717       40916 :          fftw_plan_type = FFTW_ESTIMATE
     718             :       CASE (2)
     719           8 :          fftw_plan_type = FFTW_MEASURE
     720             :       CASE (3)
     721           8 :          fftw_plan_type = FFTW_PATIENT
     722             :       CASE (4)
     723           8 :          fftw_plan_type = FFTW_EXHAUSTIVE
     724             :       CASE DEFAULT
     725       40940 :          CPABORT("fftw3_create_plan_3d")
     726             :       END SELECT
     727             : 
     728             : #if defined (__FFTW3_UNALIGNED)
     729             :       fftw_plan_type = fftw_plan_type + FFTW_UNALIGNED
     730             : #endif
     731             : 
     732       40940 :       IF (plan%fsign == +1) THEN
     733       20470 :          fft_direction = FFTW_FORWARD
     734             :       ELSE
     735       20470 :          fft_direction = FFTW_BACKWARD
     736             :       END IF
     737             : 
     738       40940 :       n1 = plan%n_3d(1)
     739       40940 :       n2 = plan%n_3d(2)
     740       40940 :       n3 = plan%n_3d(3)
     741             : 
     742       40940 :       nt = 1
     743       40940 : !$OMP PARALLEL DEFAULT(NONE) SHARED(nt)
     744             : !$OMP MASTER
     745             : !$    nt = omp_get_num_threads()
     746             : !$OMP END MASTER
     747             : !$OMP END PARALLEL
     748             : 
     749             :       IF ((fftw3_is_mkl_wrapper()) .OR. &
     750       40940 :           (.NOT. plan_style == 1) .OR. &
     751             :           (n1 < 256 .AND. n2 < 256 .AND. n3 < 256 .AND. nt == 1)) THEN
     752             :          ! If the plan type is MEASURE, PATIENT and EXHAUSTIVE or
     753             :          ! the grid size is small (and we are single-threaded) then
     754             :          ! FFTW3 does a better job than handmade optimization
     755             :          ! so plan a single 3D FFT which will execute using all the threads
     756             : 
     757       40940 :          plan%separated_plans = .FALSE.
     758       40940 : !$       CALL fftw_plan_with_nthreads(nt)
     759             : 
     760       40940 :          IF (plan%fft_in_place) THEN
     761       20470 :             plan%fftw_plan = fftw_plan_dft_3d(n3, n2, n1, zin, zin, fft_direction, fftw_plan_type)
     762             :          ELSE
     763       20470 :             plan%fftw_plan = fftw_plan_dft_3d(n3, n2, n1, zin, zout, fft_direction, fftw_plan_type)
     764             :          END IF
     765             :       ELSE
     766           0 :          ALLOCATE (tmp(n1*n2*n3))
     767             :          ! ************************* PLANS WITH TRANSPOSITIONS ****************************
     768             :          !  In the cases described above, we manually thread each stage of the 3D FFT.
     769             :          !
     770             :          !  The following plans replace the 3D FFT call by running 1D FFTW across all
     771             :          !  3 directions of the array.
     772             :          !
     773             :          !  Output of FFTW is transposed to ensure that the next round of FFTW access
     774             :          !  contiguous information.
     775             :          !
     776             :          !  Assuming the input matrix is M(n3,n2,n1), FFTW/Transp are :
     777             :          !  M(n3,n2,n1) -> fftw(x) -> M(n3,n1,n2) -> fftw(y) -> M(n1,n2,n3) -> fftw(z) -> M(n1,n2,n3)
     778             :          !  Notice that last matrix is transposed in the Z axis. A DO-loop in the execute routine
     779             :          !  will perform the final transposition. Performance evaluation showed that using an external
     780             :          !  DO loop to do the final transposition performed better than directly transposing the output.
     781             :          !  However, this might vary depending on the compiler/platform, so a potential tuning spot
     782             :          !  is to perform the final transposition within the fftw library rather than using the external loop
     783             :          !  See comments below in Z-FFT for how to transpose the output to avoid the final DO loop.
     784             :          !
     785             :          !  Doc. for the Guru interface is in http://www.fftw.org/doc/Guru-Interface.html
     786             :          !
     787             :          !  OpenMP : Work is distributed on the Z plane.
     788             :          !           All transpositions are out-of-place to facilitate multi-threading
     789             :          !
     790             :          !!!! Plan for X : M(n3,n2,n1) -> fftw(x) -> M(n3,n1,n2)
     791             :          CALL fftw3_compute_rows_per_th(n3, nt, rows_per_th, rows_per_th_r, &
     792           0 :                                         th_planA, th_planB)
     793             : 
     794           0 :          dim_n(1) = n1
     795           0 :          dim_istride(1) = 1
     796           0 :          dim_ostride(1) = n2
     797           0 :          howmany_n(1) = n2
     798           0 :          howmany_n(2) = rows_per_th
     799           0 :          howmany_istride(1) = n1
     800           0 :          howmany_istride(2) = n1*n2
     801           0 :          howmany_ostride(1) = 1
     802           0 :          howmany_ostride(2) = n1*n2
     803             :          CALL fftw3_create_3d_plans(plan%fftw_plan_nx, plan%fftw_plan_nx_r, &
     804             :                                     dim_n, dim_istride, dim_ostride, howmany_n, &
     805             :                                     howmany_istride, howmany_ostride, &
     806             :                                     zin, tmp, &
     807             :                                     fft_direction, fftw_plan_type, rows_per_th, &
     808           0 :                                     rows_per_th_r)
     809             : 
     810             :          !!!! Plan for Y : M(n3,n1,n2) -> fftw(y) -> M(n1,n2,n3)
     811             :          CALL fftw3_compute_rows_per_th(n3, nt, rows_per_th, rows_per_th_r, &
     812           0 :                                         th_planA, th_planB)
     813           0 :          dim_n(1) = n2
     814             :          dim_istride(1) = 1
     815           0 :          dim_ostride(1) = n3
     816           0 :          howmany_n(1) = n1
     817           0 :          howmany_n(2) = rows_per_th
     818           0 :          howmany_istride(1) = n2
     819             :          howmany_istride(2) = n1*n2
     820             :          !!! transposed Z axis on output
     821           0 :          howmany_ostride(1) = n2*n3
     822           0 :          howmany_ostride(2) = 1
     823             : 
     824             :          CALL fftw3_create_3d_plans(plan%fftw_plan_ny, plan%fftw_plan_ny_r, &
     825             :                                     dim_n, dim_istride, dim_ostride, &
     826             :                                     howmany_n, howmany_istride, howmany_ostride, &
     827             :                                     tmp, zin, &
     828             :                                     fft_direction, fftw_plan_type, rows_per_th, &
     829           0 :                                     rows_per_th_r)
     830             : 
     831             :          !!!! Plan for Z : M(n1,n2,n3) -> fftw(z) -> M(n1,n2,n3)
     832             :          CALL fftw3_compute_rows_per_th(n1, nt, rows_per_th, rows_per_th_r, &
     833           0 :                                         th_planA, th_planB)
     834           0 :          dim_n(1) = n3
     835             :          dim_istride(1) = 1
     836           0 :          dim_ostride(1) = 1          ! To transpose: n2*n1
     837           0 :          howmany_n(1) = n2
     838           0 :          howmany_n(2) = rows_per_th
     839           0 :          howmany_istride(1) = n3
     840           0 :          howmany_istride(2) = n2*n3
     841           0 :          howmany_ostride(1) = n3     ! To transpose: n1
     842           0 :          howmany_ostride(2) = n2*n3  ! To transpose: 1
     843             : 
     844             :          CALL fftw3_create_3d_plans(plan%fftw_plan_nz, plan%fftw_plan_nz_r, &
     845             :                                     dim_n, dim_istride, dim_ostride, &
     846             :                                     howmany_n, howmany_istride, howmany_ostride, &
     847             :                                     zin, tmp, &
     848             :                                     fft_direction, fftw_plan_type, rows_per_th, &
     849           0 :                                     rows_per_th_r)
     850             : 
     851           0 :          plan%separated_plans = .TRUE.
     852             : 
     853           0 :          DEALLOCATE (tmp)
     854             :       END IF
     855             : 
     856             : #else
     857             :       MARK_USED(plan)
     858             :       MARK_USED(plan_style)
     859             :       !MARK_USED does not work with assumed size arguments
     860             :       IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
     861             : #endif
     862             : 
     863       40940 :    END SUBROUTINE fftw3_create_plan_3d
     864             : 
     865             : ! **************************************************************************************************
     866             : 
     867             : ! **************************************************************************************************
     868             : !> \brief ...
     869             : !> \param plan ...
     870             : !> \param plan_r ...
     871             : !> \param split_dim ...
     872             : !> \param nt ...
     873             : !> \param tid ...
     874             : !> \param input ...
     875             : !> \param istride ...
     876             : !> \param output ...
     877             : !> \param ostride ...
     878             : ! **************************************************************************************************
     879           0 :    SUBROUTINE fftw3_workshare_execute_dft(plan, plan_r, split_dim, nt, tid, &
     880             :                                           input, istride, output, ostride)
     881             : 
     882             :       INTEGER, INTENT(IN)                           :: split_dim, nt, tid
     883             :       INTEGER, INTENT(IN)                           :: istride, ostride
     884             :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT) :: input, output
     885             :       TYPE(C_PTR)                                   :: plan, plan_r
     886             : #if defined (__FFTW3)
     887             :       INTEGER                                     :: i_off, o_off
     888             :       INTEGER                                     :: th_planA, th_planB
     889             :       INTEGER :: rows_per_thread, rows_per_thread_r
     890             : 
     891             :       CALL fftw3_compute_rows_per_th(split_dim, nt, rows_per_thread, &
     892             :                                      rows_per_thread_r, &
     893           0 :                                      th_planA, th_planB)
     894             : 
     895           0 :       IF (th_planB .GT. 0) THEN
     896           0 :          IF (tid .LT. th_planA) THEN
     897           0 :             i_off = (tid)*(istride*(rows_per_thread)) + 1
     898           0 :             o_off = (tid)*(ostride*(rows_per_thread)) + 1
     899           0 :             IF (rows_per_thread .GT. 0) THEN
     900             :                CALL fftw_execute_dft(plan, input(i_off), &
     901           0 :                                      output(o_off))
     902             :             END IF
     903           0 :          ELSE IF ((tid - th_planA) < th_planB) THEN
     904             : 
     905             :             i_off = (th_planA)*istride*(rows_per_thread) + &
     906           0 :                     (tid - th_planA)*istride*(rows_per_thread_r) + 1
     907             :             o_off = (th_planA)*ostride*(rows_per_thread) + &
     908           0 :                     (tid - th_planA)*ostride*(rows_per_thread_r) + 1
     909             : 
     910             :             CALL fftw_execute_dft(plan_r, input(i_off), &
     911           0 :                                   output(o_off))
     912             :          END IF
     913             : 
     914             :       ELSE
     915           0 :          i_off = (tid)*(istride*(rows_per_thread)) + 1
     916           0 :          o_off = (tid)*(ostride*(rows_per_thread)) + 1
     917             : 
     918             :          CALL fftw_execute_dft(plan, input(i_off), &
     919           0 :                                output(o_off))
     920             : 
     921             :       END IF
     922             : #else
     923             :       MARK_USED(plan)
     924             :       MARK_USED(plan_r)
     925             :       MARK_USED(split_dim)
     926             :       MARK_USED(nt)
     927             :       MARK_USED(tid)
     928             :       MARK_USED(istride)
     929             :       MARK_USED(ostride)
     930             :       !MARK_USED does not work with assumed size arguments
     931             :       IF (.FALSE.) THEN; DO; IF (ABS(input(1)) > ABS(output(1))) EXIT; END DO; END IF
     932             : #endif
     933             : 
     934           0 :    END SUBROUTINE
     935             : 
     936             : ! **************************************************************************************************
     937             : 
     938             : ! **************************************************************************************************
     939             : !> \brief ...
     940             : !> \param plan ...
     941             : !> \param scale ...
     942             : !> \param zin ...
     943             : !> \param zout ...
     944             : !> \param stat ...
     945             : ! **************************************************************************************************
     946      620266 :    SUBROUTINE fftw33d(plan, scale, zin, zout, stat)
     947             : 
     948             :       TYPE(fft_plan_type), INTENT(IN)                      :: plan
     949             :       REAL(KIND=dp), INTENT(IN)                            :: scale
     950             :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT), TARGET:: zin
     951             :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT), TARGET:: zout
     952             :       INTEGER, INTENT(OUT)                                 :: stat
     953             : #if defined ( __FFTW3 )
     954      620266 :       COMPLEX(KIND=dp), POINTER                            :: xout(:)
     955      620266 :       COMPLEX(KIND=dp), ALLOCATABLE                        :: tmp1(:)
     956             :       INTEGER                                              :: n1, n2, n3
     957             :       INTEGER                                              :: tid, nt
     958             :       INTEGER                                              :: i, j, k
     959             : 
     960      620266 :       n1 = plan%n_3d(1)
     961      620266 :       n2 = plan%n_3d(2)
     962      620266 :       n3 = plan%n_3d(3)
     963             : 
     964      620266 :       stat = 1
     965             : 
     966             :       ! We use a POINTER to the output array to avoid duplicating code
     967      620266 :       IF (plan%fft_in_place) THEN
     968      620262 :          xout => zin(:n1*n2*n3)
     969             :       ELSE
     970           4 :          xout => zout(:n1*n2*n3)
     971             :       END IF
     972             : 
     973             :       ! Either compute the full 3D FFT using a multithreaded plan
     974      620266 :       IF (.NOT. plan%separated_plans) THEN
     975      620266 :          CALL fftw_execute_dft(plan%fftw_plan, zin, xout)
     976             :       ELSE
     977             :          ! Or use the 3 stage FFT scheme described in fftw3_create_plan_3d
     978           0 :          ALLOCATE (tmp1(n1*n2*n3))   ! Temporary vector used for transpositions
     979           0 :          !$OMP PARALLEL DEFAULT(NONE) PRIVATE(tid,nt,i,j,k) SHARED(zin,tmp1,n1,n2,n3,plan,xout)
     980             :          tid = 0
     981             :          nt = 1
     982             : 
     983             : !$       tid = omp_get_thread_num()
     984             : !$       nt = omp_get_num_threads()
     985             :          CALL fftw3_workshare_execute_dft(plan%fftw_plan_nx, plan%fftw_plan_nx_r, &
     986             :                                           n3, nt, tid, &
     987             :                                           zin, n1*n2, tmp1, n1*n2)
     988             : 
     989             :          !$OMP BARRIER
     990             :          CALL fftw3_workshare_execute_dft(plan%fftw_plan_ny, plan%fftw_plan_ny_r, &
     991             :                                           n3, nt, tid, &
     992             :                                           tmp1, n1*n2, xout, 1)
     993             :          !$OMP BARRIER
     994             :          CALL fftw3_workshare_execute_dft(plan%fftw_plan_nz, plan%fftw_plan_nz_r, &
     995             :                                           n1, nt, tid, &
     996             :                                           xout, n2*n3, tmp1, n2*n3)
     997             :          !$OMP BARRIER
     998             : 
     999             :          !$OMP DO COLLAPSE(3)
    1000             :          DO i = 1, n1
    1001             :             DO j = 1, n2
    1002             :                DO k = 1, n3
    1003             :                   xout((i - 1) + (j - 1)*n1 + (k - 1)*n1*n2 + 1) = &
    1004             :                      tmp1((k - 1) + (j - 1)*n3 + (i - 1)*n3*n2 + 1)
    1005             :                END DO
    1006             :             END DO
    1007             :          END DO
    1008             :          !$OMP END DO
    1009             : 
    1010             :          !$OMP END PARALLEL
    1011             :       END IF
    1012             : 
    1013      620266 :       IF (scale /= 1.0_dp) THEN
    1014      293064 :          CALL zdscal(n1*n2*n3, scale, xout, 1)
    1015             :       END IF
    1016             : 
    1017             : #else
    1018             :       MARK_USED(plan)
    1019             :       MARK_USED(scale)
    1020             :       !MARK_USED does not work with assumed size arguments
    1021             :       IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
    1022             :       stat = 0
    1023             : 
    1024             : #endif
    1025             : 
    1026      620266 :    END SUBROUTINE fftw33d
    1027             : 
    1028             : ! **************************************************************************************************
    1029             : 
    1030             : ! **************************************************************************************************
    1031             : !> \brief ...
    1032             : !> \param plan ...
    1033             : !> \param zin ...
    1034             : !> \param zout ...
    1035             : !> \param plan_style ...
    1036             : ! **************************************************************************************************
    1037      143002 :    SUBROUTINE fftw3_create_plan_1dm(plan, zin, zout, plan_style)
    1038             : 
    1039             :       IMPLICIT NONE
    1040             : 
    1041             :       TYPE(fft_plan_type), INTENT(INOUT)              :: plan
    1042             :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(IN)         :: zin
    1043             :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(IN)         :: zout
    1044             :       INTEGER, INTENT(IN)                                :: plan_style
    1045             : #if defined ( __FFTW3 )
    1046             :       INTEGER                                            :: ii, di, io, DO, num_threads, num_rows
    1047             : 
    1048             :       INTEGER :: fftw_plan_type
    1049      285896 :       SELECT CASE (plan_style)
    1050             :       CASE (1)
    1051      142894 :          fftw_plan_type = FFTW_ESTIMATE
    1052             :       CASE (2)
    1053          48 :          fftw_plan_type = FFTW_MEASURE
    1054             :       CASE (3)
    1055          48 :          fftw_plan_type = FFTW_PATIENT
    1056             :       CASE (4)
    1057          12 :          fftw_plan_type = FFTW_EXHAUSTIVE
    1058             :       CASE DEFAULT
    1059      143002 :          CPABORT("fftw3_create_plan_1dm")
    1060             :       END SELECT
    1061             : 
    1062             : #if defined (__FFTW3_UNALIGNED)
    1063             :       fftw_plan_type = fftw_plan_type + FFTW_UNALIGNED
    1064             : #endif
    1065      143002 :       num_threads = 1
    1066      143002 :       plan%separated_plans = .FALSE.
    1067             : !$OMP PARALLEL DEFAULT(NONE), &
    1068      143002 : !$OMP          SHARED(NUM_THREADS)
    1069             : !$OMP MASTER
    1070             : !$    num_threads = omp_get_num_threads()
    1071             : !$OMP END MASTER
    1072             : !$OMP END PARALLEL
    1073             : 
    1074      143002 :       num_rows = plan%m/num_threads
    1075      143002 : !$    plan%num_threads_needed = num_threads
    1076             : 
    1077             : ! Check for number of rows less than num_threads
    1078      143002 : !$    IF (plan%m < num_threads) THEN
    1079           0 : !$       num_rows = 1
    1080           0 : !$       plan%num_threads_needed = plan%m
    1081             : !$    END IF
    1082             : 
    1083             : ! Check for total number of rows not divisible by num_threads
    1084      143002 : !$    IF (num_rows*plan%num_threads_needed .NE. plan%m) THEN
    1085           0 : !$       plan%need_alt_plan = .TRUE.
    1086             : !$    END IF
    1087             : 
    1088      143002 : !$    plan%num_rows = num_rows
    1089      143002 :       ii = 1
    1090      143002 :       di = plan%n
    1091      143002 :       io = 1
    1092      143002 :       DO = plan%n
    1093      143002 :       IF (plan%fsign == +1 .AND. plan%trans) THEN
    1094       71391 :          ii = plan%m
    1095       71391 :          di = 1
    1096       71611 :       ELSEIF (plan%fsign == -1 .AND. plan%trans) THEN
    1097       71391 :          io = plan%m
    1098       71391 :          DO = 1
    1099             :       END IF
    1100             : 
    1101      143002 :       IF (plan%fsign == +1) THEN
    1102             :          CALL dfftw_plan_many_dft(plan%fftw_plan, 1, plan%n, num_rows, zin, 0, ii, di, &
    1103       71611 :                                   zout, 0, io, DO, FFTW_FORWARD, fftw_plan_type)
    1104             :       ELSE
    1105             :          CALL dfftw_plan_many_dft(plan%fftw_plan, 1, plan%n, num_rows, zin, 0, ii, di, &
    1106       71391 :                                   zout, 0, io, DO, FFTW_BACKWARD, fftw_plan_type)
    1107             :       END IF
    1108             : 
    1109      143002 : !$    IF (plan%need_alt_plan) THEN
    1110           0 : !$       plan%alt_num_rows = plan%m - (plan%num_threads_needed - 1)*num_rows
    1111           0 : !$       IF (plan%fsign == +1) THEN
    1112             : !$          CALL dfftw_plan_many_dft(plan%alt_fftw_plan, 1, plan%n, plan%alt_num_rows, zin, 0, ii, di, &
    1113           0 : !$                                   zout, 0, io, DO, FFTW_FORWARD, fftw_plan_type)
    1114             : !$       ELSE
    1115             : !$          CALL dfftw_plan_many_dft(plan%alt_fftw_plan, 1, plan%n, plan%alt_num_rows, zin, 0, ii, di, &
    1116           0 : !$                                   zout, 0, io, DO, FFTW_BACKWARD, fftw_plan_type)
    1117             : !$       END IF
    1118             : !$    END IF
    1119             : 
    1120             : #else
    1121             :       MARK_USED(plan)
    1122             :       MARK_USED(plan_style)
    1123             :       !MARK_USED does not work with assumed size arguments
    1124             :       IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
    1125             : #endif
    1126             : 
    1127      143002 :    END SUBROUTINE fftw3_create_plan_1dm
    1128             : 
    1129             : ! **************************************************************************************************
    1130             : !> \brief ...
    1131             : !> \param plan ...
    1132             : ! **************************************************************************************************
    1133      183942 :    SUBROUTINE fftw3_destroy_plan(plan)
    1134             : 
    1135             :       TYPE(fft_plan_type), INTENT(INOUT)   :: plan
    1136             : 
    1137             : #if defined ( __FFTW3 )
    1138      183942 : !$    IF (plan%need_alt_plan) THEN
    1139           0 : !$       CALL fftw_destroy_plan(plan%alt_fftw_plan)
    1140             : !$    END IF
    1141             : 
    1142      183942 :       IF (.NOT. plan%separated_plans) THEN
    1143      183942 :          CALL fftw_destroy_plan(plan%fftw_plan)
    1144             :       ELSE
    1145             :          ! If it is a separated plan then we have to destroy
    1146             :          ! each dim plan individually
    1147           0 :          CALL fftw_destroy_plan(plan%fftw_plan_nx)
    1148           0 :          CALL fftw_destroy_plan(plan%fftw_plan_ny)
    1149           0 :          CALL fftw_destroy_plan(plan%fftw_plan_nz)
    1150           0 :          CALL fftw_destroy_plan(plan%fftw_plan_nx_r)
    1151           0 :          CALL fftw_destroy_plan(plan%fftw_plan_ny_r)
    1152           0 :          CALL fftw_destroy_plan(plan%fftw_plan_nz_r)
    1153             :       END IF
    1154             : 
    1155             : #else
    1156             :       MARK_USED(plan)
    1157             : #endif
    1158             : 
    1159      183942 :    END SUBROUTINE fftw3_destroy_plan
    1160             : 
    1161             : ! **************************************************************************************************
    1162             : !> \brief ...
    1163             : !> \param plan ...
    1164             : !> \param zin ...
    1165             : !> \param zout ...
    1166             : !> \param scale ...
    1167             : !> \param stat ...
    1168             : ! **************************************************************************************************
    1169     7835086 :    SUBROUTINE fftw31dm(plan, zin, zout, scale, stat)
    1170             :       TYPE(fft_plan_type), INTENT(IN)                    :: plan
    1171             :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(IN), TARGET :: zin
    1172             :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT), &
    1173             :          TARGET                                          :: zout
    1174             :       REAL(KIND=dp), INTENT(IN)                          :: scale
    1175             :       INTEGER, INTENT(OUT)                               :: stat
    1176             : 
    1177             :       INTEGER                                            :: in_offset, my_id, num_rows, out_offset, &
    1178             :                                                             scal_offset
    1179             :       TYPE(C_PTR)                                        :: fftw_plan
    1180             : 
    1181             : !------------------------------------------------------------------------------
    1182             : 
    1183     7835086 :       my_id = 0
    1184     7835086 :       num_rows = plan%m
    1185             : 
    1186             : !$OMP PARALLEL DEFAULT(NONE), &
    1187             : !$OMP          PRIVATE(my_id,num_rows,in_offset,out_offset,scal_offset,fftw_plan), &
    1188             : !$OMP          SHARED(zin,zout), &
    1189     7835086 : !$OMP          SHARED(plan,scale,stat)
    1190             : !$    my_id = omp_get_thread_num()
    1191             : 
    1192             : !$    if (my_id < plan%num_threads_needed) then
    1193             : 
    1194             :          fftw_plan = plan%fftw_plan
    1195             : 
    1196             :          in_offset = 1
    1197             :          out_offset = 1
    1198             :          scal_offset = 1
    1199             : 
    1200             : !$       in_offset = 1 + plan%num_rows*my_id*plan%n
    1201             : !$       out_offset = 1 + plan%num_rows*my_id*plan%n
    1202             : !$       IF (plan%fsign == +1 .AND. plan%trans) THEN
    1203             : !$          in_offset = 1 + plan%num_rows*my_id
    1204             : !$       ELSEIF (plan%fsign == -1 .AND. plan%trans) THEN
    1205             : !$          out_offset = 1 + plan%num_rows*my_id
    1206             : !$       END IF
    1207             : !$       scal_offset = 1 + plan%n*plan%num_rows*my_id
    1208             : !$       IF (plan%need_alt_plan .AND. my_id .EQ. plan%num_threads_needed - 1) THEN
    1209             : !$          num_rows = plan%alt_num_rows
    1210             : !$          fftw_plan = plan%alt_fftw_plan
    1211             : !$       ELSE
    1212             : !$          num_rows = plan%num_rows
    1213             : !$       END IF
    1214             : 
    1215             : #if defined ( __FFTW3 )
    1216             : !$OMP MASTER
    1217             :          stat = 1
    1218             : !$OMP END MASTER
    1219             :          CALL dfftw_execute_dft(fftw_plan, zin(in_offset:in_offset), zout(out_offset:out_offset))
    1220             : !$    end if
    1221             : ! all theads need to meet at this barrier
    1222             : !$OMP BARRIER
    1223             : !$    if (my_id < plan%num_threads_needed) then
    1224             :          IF (scale /= 1.0_dp) CALL zdscal(plan%n*num_rows, scale, zout(scal_offset:scal_offset), 1)
    1225             : !$    end if
    1226             : 
    1227             : #else
    1228             :       MARK_USED(plan)
    1229             :       MARK_USED(scale)
    1230             :       !MARK_USED does not work with assumed size arguments
    1231             :       IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
    1232             :       stat = 0
    1233             : 
    1234             : !$    else
    1235             : !$    end if
    1236             : 
    1237             : #endif
    1238             : 
    1239             : !$OMP END PARALLEL
    1240             : 
    1241     7835086 :       END SUBROUTINE fftw31dm
    1242             : 
    1243           0 :    END MODULE

Generated by: LCOV version 1.15