LCOV - code coverage report
Current view: top level - src/pw/fft - fftw3_lib.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:f515968) Lines: 229 345 66.4 %
Date: 2022-07-03 19:52:34 Functions: 11 18 61.1 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2022 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : #define XFFTW_PLAN_WITH_NTHREADS dfftw_plan_with_nthreads
       8             : #define XFFTW_PLAN_DFT_3D dfftw_plan_dft_3d
       9             : #define XFFTW_PLAN_GURU_DFT dfftw_plan_guru_dft
      10             : #define XFFTW_EXECUTE_DFT dfftw_execute_dft
      11             : #define XFFTW_DESTROY_PLAN dfftw_destroy_plan
      12             : 
      13             : MODULE fftw3_lib
      14             : 
      15             :    USE ISO_C_BINDING,                   ONLY: C_ASSOCIATED,&
      16             :                                               C_CHAR,&
      17             :                                               C_INT,&
      18             :                                               C_PTR
      19             :    USE cp_files,                        ONLY: get_unit_number
      20             :    USE fft_kinds,                       ONLY: dp
      21             :    USE fft_plan,                        ONLY: fft_plan_type
      22             : 
      23             : !$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads
      24             : 
      25             : #include "../../base/base_uses.f90"
      26             : 
      27             :    IMPLICIT NONE
      28             :    PRIVATE
      29             : 
      30             :    PUBLIC :: fftw3_do_init, fftw3_do_cleanup, fftw3_get_lengths, fftw33d, fftw31dm
      31             :    PUBLIC :: fftw3_destroy_plan, fftw3_create_plan_1dm, fftw3_create_plan_3d
      32             : 
      33             : #if defined ( __FFTW3 )
      34             :    INTERFACE
      35             :       SUBROUTINE fftw_cleanup() BIND(C, name="fftw_cleanup")
      36             :       END SUBROUTINE
      37             :    END INTERFACE
      38             : #endif
      39             : 
      40             : CONTAINS
      41             : 
      42             : ! **************************************************************************************************
      43             : !> \brief ...
      44             : !> \param wisdom_file ...
      45             : !> \param ionode ...
      46             : ! **************************************************************************************************
      47        7993 :    SUBROUTINE fftw3_do_cleanup(wisdom_file, ionode)
      48             : 
      49             :       CHARACTER(LEN=*), INTENT(IN)             :: wisdom_file
      50             :       LOGICAL                                  :: ionode
      51             : 
      52             : #if defined ( __FFTW3 )
      53             :       INTEGER                                  :: iunit, istat
      54             :       ! Write out FFTW3 wisdom to file (if we can)
      55             :       ! only the ionode updates the wisdom
      56        7993 :       IF (ionode) THEN
      57        4100 :          iunit = get_unit_number()
      58        4100 :          OPEN (UNIT=iunit, FILE=wisdom_file, STATUS="UNKNOWN", FORM="FORMATTED", ACTION="WRITE", IOSTAT=istat)
      59        4100 :          IF (istat == 0) THEN
      60           0 :             CALL fftw_export_wisdom_to_file(iunit)
      61           0 :             CLOSE (iunit)
      62             :          END IF
      63             :       END IF
      64             : 
      65        7993 :       CALL fftw_cleanup()
      66             : #else
      67             :       MARK_USED(wisdom_file)
      68             :       MARK_USED(ionode)
      69             : #endif
      70             : 
      71        7993 :    END SUBROUTINE
      72             : 
      73             : ! **************************************************************************************************
      74             : !> \brief ...
      75             : !> \param wisdom_file ...
      76             : ! **************************************************************************************************
      77        8203 :    SUBROUTINE fftw3_do_init(wisdom_file)
      78             : 
      79             :       CHARACTER(LEN=*), INTENT(IN)             :: wisdom_file
      80             : 
      81             : #if defined ( __FFTW3 )
      82             :       INTEGER                                  :: istat, isuccess, iunit
      83             :       LOGICAL                                  :: exist
      84             : 
      85             : !$    LOGICAL                                  :: mkl_is_safe
      86             : 
      87             : ! If using the Intel compiler then we need to declare
      88             : ! a C interface to a global variable in MKL that sets
      89             : ! the number of threads which can concurrently execute
      90             : ! an FFT
      91             : ! We need __INTEL_COMPILER so we can be sure that the compiler
      92             : ! understands the !DEC$ version definitions
      93             : #if defined (__INTEL_COMPILER) && defined (__MKL) && defined (__FFTW3)
      94             : !$    include "mkl.fi"
      95             : !DEC$ IF DEFINED (INTEL_MKL_VERSION)
      96             : !DEC$ IF INTEL_MKL_VERSION .EQ. 110100
      97             : !DIR$ ATTRIBUTES ALIGN : 8 :: fftw3_mkl
      98             : !$    COMMON/fftw3_mkl/ignore(4), mkl_dft_number_of_user_threads, ignore2(7)
      99             : !$    INTEGER*4 :: ignore, mkl_dft_number_of_user_threads, ignore2
     100             : !$    BIND(c) :: /fftw3_mkl/
     101             : !DEC$ ENDIF
     102             : !DEC$ ENDIF
     103             : #elif defined (__MKL) && defined (__FFTW3)
     104             : ! Preprocessing is enabled by default, and below header is not language specific
     105             : #include <mkl_version.h>
     106             : #endif
     107             : 
     108             :       ! Read FFTW wisdom (if available)
     109             :       ! all nodes are opening the file here...
     110        8203 :       INQUIRE (FILE=wisdom_file, exist=exist)
     111        8203 :       IF (exist) THEN
     112           0 :          iunit = get_unit_number()
     113             :          OPEN (UNIT=iunit, FILE=wisdom_file, STATUS="OLD", FORM="FORMATTED", POSITION="REWIND", &
     114           0 :                ACTION="READ", IOSTAT=istat)
     115           0 :          IF (istat == 0) THEN
     116           0 :             CALL fftw_import_wisdom_from_file(isuccess, iunit)
     117             :             ! write(*,*) "FFTW3 import wisdom from file ....",MERGE((/"OK    "/),(/"NOT OK"/),(/isuccess==1/))
     118           0 :             CLOSE (iunit)
     119             :          END IF
     120             :       END IF
     121             : 
     122             :       ! Now check if we have a real FFTW3 library, or are using MKL wrappers
     123             : 
     124        8203 : !$    IF (fftw3_is_mkl_wrapper() .and. omp_get_max_threads() .gt. 1) THEN
     125             : ! If we are not using the Intel compiler, there is no way to tell which
     126             : ! MKL version is in use, so fail safe...
     127           0 : !$       mkl_is_safe = .FALSE.
     128             : #if defined (__MKL) && defined (__FFTW3) && defined(INTEL_MKL_VERSION) && (110100 < INTEL_MKL_VERSION)
     129             : !$       mkl_is_safe = .TRUE.
     130             : #elif defined (__INTEL_COMPILER) && defined (__MKL) && defined (__FFTW3)
     131             : ! If we have an Intel compiler (__INTEL_COMPILER is defined) then check the
     132             : ! MKL version and make the appropriate action
     133             : !DEC$ IF DEFINED (INTEL_MKL_VERSION)
     134             : !DEC$ IF INTEL_MKL_VERSION .EQ. 110100
     135             : !$       mkl_dft_number_of_user_threads = omp_get_max_threads()
     136             : !DEC$ ENDIF
     137             : !DEC$ IF INTEL_MKL_VERSION .GE. 110100
     138             : !$       mkl_is_safe = .TRUE.
     139             : !DEC$ ENDIF
     140             : !DEC$ ENDIF
     141             : #endif
     142             : !$       IF (.NOT. mkl_is_safe) THEN
     143             : !$          STOP "Intel's FFTW3 interface to MKL is not "// &
     144             : !$             "thread-safe prior to MKL 11.1.0!  Please "// &
     145             : !$             "rebuild CP2K, linking against FFTW 3 from "// &
     146             : !$             "www.fftw.org or a newer version of MKL. "// &
     147           0 : !$             "Now exiting..."
     148             : !$       END IF
     149             : !$    END IF
     150             : #else
     151             :       MARK_USED(wisdom_file)
     152             : #endif
     153             : 
     154        8203 :    END SUBROUTINE
     155             : 
     156             : ! **************************************************************************************************
     157             : !> \brief ...
     158             : !> \param DATA ...
     159             : !> \param max_length ...
     160             : !> \par History
     161             : !>      JGH 23-Jan-2006 : initial version
     162             : !>      Adapted for new interface
     163             : !>      IAB 09-Jan-2009 : Modified to cache plans in fft_plan_type
     164             : !>                        (c) The Numerical Algorithms Group (NAG) Ltd, 2009 on behalf of the HECToR project
     165             : !>      IAB 09-Oct-2009 : Added OpenMP directives to 1D FFT, and planning routines
     166             : !>                        (c) The Numerical Algorithms Group (NAG) Ltd, 2009 on behalf of the HECToR project
     167             : !>      IAB 11-Sep-2012 : OpenMP parallel 3D FFT (Ruyman Reyes, PRACE)
     168             : !> \author JGH
     169             : ! **************************************************************************************************
     170         504 :    SUBROUTINE fftw3_get_lengths(DATA, max_length)
     171             : 
     172             :       INTEGER, DIMENSION(*)                              :: DATA
     173             :       INTEGER, INTENT(INOUT)                             :: max_length
     174             : 
     175             :       INTEGER                                            :: h, i, j, k, m, maxn, maxn_elevens, &
     176             :                                                             maxn_fives, maxn_sevens, &
     177             :                                                             maxn_thirteens, maxn_threes, &
     178             :                                                             maxn_twos, ndata, nmax, number
     179         504 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: dlocal, idx
     180             : 
     181             : !------------------------------------------------------------------------------
     182             : ! compute ndata
     183             : !! FFTW can do arbitrary(?) lengths, maybe you want to limit them to some
     184             : !!    powers of small prime numbers though...
     185             : 
     186         504 :       maxn_twos = 15
     187         504 :       maxn_threes = 3
     188         504 :       maxn_fives = 2
     189         504 :       maxn_sevens = 1
     190         504 :       maxn_elevens = 1
     191         504 :       maxn_thirteens = 0
     192         504 :       maxn = 37748736
     193             : 
     194         504 :       ndata = 0
     195        8568 :       DO h = 0, maxn_twos
     196        8064 :          nmax = HUGE(0)/2**h
     197       40824 :          DO i = 0, maxn_threes
     198      137088 :             DO j = 0, maxn_fives
     199      322560 :                DO k = 0, maxn_sevens
     200      677376 :                   DO m = 0, maxn_elevens
     201      387072 :                      number = (3**i)*(5**j)*(7**k)*(11**m)
     202             : 
     203      387072 :                      IF (number > nmax) CYCLE
     204             : 
     205      387072 :                      number = number*2**h
     206      387072 :                      IF (number >= maxn) CYCLE
     207             : 
     208      580608 :                      ndata = ndata + 1
     209             :                   END DO
     210             :                END DO
     211             :             END DO
     212             :          END DO
     213             :       END DO
     214             : 
     215        2520 :       ALLOCATE (dlocal(ndata), idx(ndata))
     216             : 
     217         504 :       ndata = 0
     218      371448 :       dlocal(:) = 0
     219        8568 :       DO h = 0, maxn_twos
     220        8064 :          nmax = HUGE(0)/2**h
     221       40824 :          DO i = 0, maxn_threes
     222      137088 :             DO j = 0, maxn_fives
     223      322560 :                DO k = 0, maxn_sevens
     224      677376 :                   DO m = 0, maxn_elevens
     225      387072 :                      number = (3**i)*(5**j)*(7**k)*(11**m)
     226             : 
     227      387072 :                      IF (number > nmax) CYCLE
     228             : 
     229      387072 :                      number = number*2**h
     230      387072 :                      IF (number >= maxn) CYCLE
     231             : 
     232      370944 :                      ndata = ndata + 1
     233      580608 :                      dlocal(ndata) = number
     234             :                   END DO
     235             :                END DO
     236             :             END DO
     237             :          END DO
     238             :       END DO
     239             : 
     240         504 :       CALL sortint(dlocal, ndata, idx)
     241         504 :       ndata = MIN(ndata, max_length)
     242      371448 :       DATA(1:ndata) = dlocal(1:ndata)
     243         504 :       max_length = ndata
     244             : 
     245         504 :       DEALLOCATE (dlocal, idx)
     246             : 
     247         504 :    END SUBROUTINE fftw3_get_lengths
     248             : 
     249             : ! **************************************************************************************************
     250             : !> \brief ...
     251             : !> \param iarr ...
     252             : !> \param n ...
     253             : !> \param index ...
     254             : ! **************************************************************************************************
     255         504 :    SUBROUTINE sortint(iarr, n, index)
     256             : 
     257             :       INTEGER, INTENT(IN)                                :: n
     258             :       INTEGER, INTENT(INOUT)                             :: iarr(1:n)
     259             :       INTEGER, INTENT(OUT)                               :: INDEX(1:n)
     260             : 
     261             :       INTEGER, PARAMETER                                 :: m = 7, nstack = 50
     262             : 
     263             :       INTEGER                                            :: a, i, ib, ir, istack(1:nstack), itemp, &
     264             :                                                             j, jstack, k, l, temp
     265             : 
     266             : !------------------------------------------------------------------------------
     267             : 
     268      371448 :       DO i = 1, n
     269      371448 :          INDEX(i) = i
     270             :       END DO
     271             :       jstack = 0
     272             :       l = 1
     273             :       ir = n
     274             :       DO WHILE (.TRUE.)
     275      139608 :       IF (ir - l < m) THEN
     276      301392 :          DO j = l + 1, ir
     277      231336 :             a = iarr(j)
     278      231336 :             ib = INDEX(j)
     279      549360 :             DO i = j - 1, 0, -1
     280      549360 :                IF (i == 0) EXIT
     281      548352 :                IF (iarr(i) <= a) EXIT
     282      318024 :                iarr(i + 1) = iarr(i)
     283      549360 :                INDEX(i + 1) = INDEX(i)
     284             :             END DO
     285      231336 :             iarr(i + 1) = a
     286      301392 :             INDEX(i + 1) = ib
     287             :          END DO
     288       70056 :          IF (jstack == 0) RETURN
     289       69552 :          ir = istack(jstack)
     290       69552 :          l = istack(jstack - 1)
     291       69552 :          jstack = jstack - 2
     292             :       ELSE
     293       69552 :          k = (l + ir)/2
     294       69552 :          temp = iarr(k)
     295       69552 :          iarr(k) = iarr(l + 1)
     296       69552 :          iarr(l + 1) = temp
     297       69552 :          itemp = INDEX(k)
     298       69552 :          INDEX(k) = INDEX(l + 1)
     299       69552 :          INDEX(l + 1) = itemp
     300       69552 :          IF (iarr(l + 1) > iarr(ir)) THEN
     301       32760 :             temp = iarr(l + 1)
     302       32760 :             iarr(l + 1) = iarr(ir)
     303       32760 :             iarr(ir) = temp
     304       32760 :             itemp = INDEX(l + 1)
     305       32760 :             INDEX(l + 1) = INDEX(ir)
     306       32760 :             INDEX(ir) = itemp
     307             :          END IF
     308       69552 :          IF (iarr(l) > iarr(ir)) THEN
     309       26208 :             temp = iarr(l)
     310       26208 :             iarr(l) = iarr(ir)
     311       26208 :             iarr(ir) = temp
     312       26208 :             itemp = INDEX(l)
     313       26208 :             INDEX(l) = INDEX(ir)
     314       26208 :             INDEX(ir) = itemp
     315             :          END IF
     316       69552 :          IF (iarr(l + 1) > iarr(l)) THEN
     317       26208 :             temp = iarr(l + 1)
     318       26208 :             iarr(l + 1) = iarr(l)
     319       26208 :             iarr(l) = temp
     320       26208 :             itemp = INDEX(l + 1)
     321       26208 :             INDEX(l + 1) = INDEX(l)
     322       26208 :             INDEX(l) = itemp
     323             :          END IF
     324       69552 :          i = l + 1
     325       69552 :          j = ir
     326       69552 :          a = iarr(l)
     327       69552 :          ib = INDEX(l)
     328      443016 :          DO WHILE (.TRUE.)
     329      512568 :             i = i + 1
     330     1663704 :             DO WHILE (iarr(i) < a)
     331     1151136 :                i = i + 1
     332             :             END DO
     333      512568 :             j = j - 1
     334     1270080 :             DO WHILE (iarr(j) > a)
     335      757512 :                j = j - 1
     336             :             END DO
     337      512568 :             IF (j < i) EXIT
     338      443016 :             temp = iarr(i)
     339      443016 :             iarr(i) = iarr(j)
     340      443016 :             iarr(j) = temp
     341      443016 :             itemp = INDEX(i)
     342      443016 :             INDEX(i) = INDEX(j)
     343      443016 :             INDEX(j) = itemp
     344             :          END DO
     345       69552 :          iarr(l) = iarr(j)
     346       69552 :          iarr(j) = a
     347       69552 :          INDEX(l) = INDEX(j)
     348       69552 :          INDEX(j) = ib
     349       69552 :          jstack = jstack + 2
     350       69552 :          IF (jstack > nstack) CPABORT(" Nstack too small in sortr")
     351       69552 :          IF (ir - i + 1 >= j - l) THEN
     352       35784 :             istack(jstack) = ir
     353       35784 :             istack(jstack - 1) = i
     354       35784 :             ir = j - 1
     355             :          ELSE
     356       33768 :             istack(jstack) = j - 1
     357       33768 :             istack(jstack - 1) = l
     358       33768 :             l = i
     359             :          END IF
     360             :       END IF
     361             : 
     362             :       END DO
     363             : 
     364             :    END SUBROUTINE sortint
     365             : 
     366             : ! **************************************************************************************************
     367             : 
     368             : ! **************************************************************************************************
     369             : !> \brief ...
     370             : !> \param plan ...
     371             : !> \param fft_rank ...
     372             : !> \param dim_n ...
     373             : !> \param dim_istride ...
     374             : !> \param dim_ostride ...
     375             : !> \param hm_rank ...
     376             : !> \param hm_n ...
     377             : !> \param hm_istride ...
     378             : !> \param hm_ostride ...
     379             : !> \param zin ...
     380             : !> \param zout ...
     381             : !> \param fft_direction ...
     382             : !> \param fftw_plan_type ...
     383             : !> \param valid ...
     384             : ! **************************************************************************************************
     385       45695 :    SUBROUTINE fftw3_create_guru_plan(plan, fft_rank, dim_n, &
     386             :                                      dim_istride, dim_ostride, hm_rank, &
     387             :                                      hm_n, hm_istride, hm_ostride, &
     388             :                                      zin, zout, fft_direction, fftw_plan_type, &
     389             :                                      valid)
     390             : 
     391             :       IMPLICIT NONE
     392             : 
     393             :       TYPE(C_PTR), INTENT(INOUT)                         :: plan
     394             :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(IN)         :: zin
     395             :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(IN)         :: zout
     396             :       INTEGER, INTENT(IN) :: dim_n(2), dim_istride(2), dim_ostride(2), &
     397             :                              hm_n(2), hm_istride(2), hm_ostride(2), fft_rank, &
     398             :                              fft_direction, fftw_plan_type, hm_rank
     399             :       LOGICAL, INTENT(OUT)                               :: valid
     400             : 
     401             : #if defined (__FFTW3)
     402             : 
     403             :       CALL XFFTW_PLAN_GURU_DFT(plan, fft_rank, &
     404             :                                dim_n, dim_istride, dim_ostride, &
     405             :                                hm_rank, hm_n, hm_istride, hm_ostride, &
     406             :                                zin, zout, &
     407       45695 :                                fft_direction, fftw_plan_type)
     408             : 
     409       45695 :       valid = C_ASSOCIATED(plan)
     410             : 
     411             : #else
     412             :       MARK_USED(plan)
     413             :       MARK_USED(fft_rank)
     414             :       MARK_USED(dim_n)
     415             :       MARK_USED(dim_istride)
     416             :       MARK_USED(dim_ostride)
     417             :       MARK_USED(hm_rank)
     418             :       MARK_USED(hm_n)
     419             :       MARK_USED(hm_istride)
     420             :       MARK_USED(hm_ostride)
     421             :       MARK_USED(fft_direction)
     422             :       MARK_USED(fftw_plan_type)
     423             :       !MARK_USED does not work with assumed size arguments
     424             :       IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
     425             :       valid = .FALSE.
     426             : 
     427             : #endif
     428             : 
     429       45695 :    END SUBROUTINE
     430             : 
     431             : ! **************************************************************************************************
     432             : 
     433             : ! **************************************************************************************************
     434             : !> \brief ...
     435             : !> \return ...
     436             : ! **************************************************************************************************
     437       45695 :    FUNCTION fftw3_is_mkl_wrapper() RESULT(is_mkl)
     438             : 
     439             :       IMPLICIT NONE
     440             : 
     441             :       LOGICAL :: is_mkl
     442             : #if defined ( __FFTW3 )
     443             :       LOGICAL :: guru_supported
     444             :       INTEGER :: dim_n(2), dim_istride(2), dim_ostride(2), &
     445             :                  howmany_n(2), howmany_istride(2), howmany_ostride(2)
     446             :       TYPE(C_PTR)                          :: test_plan
     447             :       COMPLEX(KIND=dp), DIMENSION(1, 1, 1) :: zin
     448             : 
     449             : #include "fftw3.f"
     450             : 
     451             :       ! Attempt to create a plan with the guru interface for a 2d sub-space
     452             :       ! If this fails (e.g. for MKL's FFTW3 interface), fall back to the
     453             :       ! FFTW3 threaded 3D transform instead of the hand-optimised version
     454       45695 :       dim_n(1) = 1
     455       45695 :       dim_istride(1) = 1
     456       45695 :       dim_ostride(1) = 1
     457       45695 :       howmany_n(1) = 1
     458       45695 :       howmany_n(2) = 1
     459       45695 :       howmany_istride(1) = 1
     460       45695 :       howmany_istride(2) = 1
     461       45695 :       howmany_ostride(1) = 1
     462       45695 :       howmany_ostride(2) = 1
     463       45695 :       zin = CMPLX(0.0_dp, 0.0_dp, KIND=dp)
     464             :       CALL fftw3_create_guru_plan(test_plan, 1, &
     465             :                                   dim_n, dim_istride, dim_ostride, &
     466             :                                   2, howmany_n, howmany_istride, howmany_ostride, &
     467             :                                   zin, zin, &
     468       45695 :                                   FFTW_FORWARD, FFTW_ESTIMATE, guru_supported)
     469       45695 :       IF (guru_supported) THEN
     470       45695 :          CALL XFFTW_DESTROY_PLAN(test_plan)
     471       45695 :          is_mkl = .FALSE.
     472             :       ELSE
     473             :          is_mkl = .TRUE.
     474             :       END IF
     475             : 
     476             : #else
     477             :       is_mkl = .FALSE.
     478             : #endif
     479             : 
     480       45695 :    END FUNCTION
     481             : 
     482             : ! **************************************************************************************************
     483             : 
     484             : ! **************************************************************************************************
     485             : !> \brief ...
     486             : !> \param nrows ...
     487             : !> \param nt ...
     488             : !> \param rows_per_thread ...
     489             : !> \param rows_per_thread_r ...
     490             : !> \param th_planA ...
     491             : !> \param th_planB ...
     492             : ! **************************************************************************************************
     493           0 :    SUBROUTINE fftw3_compute_rows_per_th(nrows, nt, rows_per_thread, rows_per_thread_r, &
     494             :                                         th_planA, th_planB)
     495             : 
     496             :       INTEGER, INTENT(IN)                                :: nrows, nt
     497             :       INTEGER, INTENT(OUT)                               :: rows_per_thread, rows_per_thread_r, &
     498             :                                                             th_planA, th_planB
     499             : 
     500           0 :       IF (MOD(nrows, nt) .EQ. 0) THEN
     501           0 :          rows_per_thread = nrows/nt
     502           0 :          rows_per_thread_r = 0
     503           0 :          th_planA = nt
     504           0 :          th_planB = 0
     505             :       ELSE
     506           0 :          rows_per_thread = nrows/nt + 1
     507           0 :          rows_per_thread_r = nrows/nt
     508           0 :          th_planA = MOD(nrows, nt)
     509           0 :          th_planB = nt - th_planA
     510             :       END IF
     511             : 
     512           0 :    END SUBROUTINE
     513             : 
     514             : ! **************************************************************************************************
     515             : 
     516             : ! **************************************************************************************************
     517             : !> \brief ...
     518             : !> \param plan ...
     519             : !> \param plan_r ...
     520             : !> \param dim_n ...
     521             : !> \param dim_istride ...
     522             : !> \param dim_ostride ...
     523             : !> \param hm_n ...
     524             : !> \param hm_istride ...
     525             : !> \param hm_ostride ...
     526             : !> \param input ...
     527             : !> \param output ...
     528             : !> \param fft_direction ...
     529             : !> \param fftw_plan_type ...
     530             : !> \param rows_per_th ...
     531             : !> \param rows_per_th_r ...
     532             : ! **************************************************************************************************
     533           0 :    SUBROUTINE fftw3_create_3d_plans(plan, plan_r, dim_n, dim_istride, dim_ostride, &
     534             :                                     hm_n, hm_istride, hm_ostride, &
     535             :                                     input, output, &
     536             :                                     fft_direction, fftw_plan_type, rows_per_th, &
     537             :                                     rows_per_th_r)
     538             : 
     539             :       TYPE(C_PTR), INTENT(INOUT)                         :: plan, plan_r
     540             :       INTEGER, INTENT(INOUT)                             :: dim_n(2), dim_istride(2), &
     541             :                                                             dim_ostride(2), hm_n(2), &
     542             :                                                             hm_istride(2), hm_ostride(2)
     543             :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT)      :: input, output
     544             :       INTEGER, INTENT(INOUT)                             :: fft_direction, fftw_plan_type
     545             :       INTEGER, INTENT(IN)                                :: rows_per_th, rows_per_th_r
     546             : 
     547             :       LOGICAL                                            :: valid
     548             : 
     549             : ! First plans will have an additional row
     550             : 
     551           0 :       hm_n(2) = rows_per_th
     552             :       CALL fftw3_create_guru_plan(plan, 1, &
     553             :                                   dim_n, dim_istride, dim_ostride, &
     554             :                                   2, hm_n, hm_istride, hm_ostride, &
     555             :                                   input, output, &
     556           0 :                                   fft_direction, fftw_plan_type, valid)
     557             : 
     558           0 :       IF (.NOT. valid) THEN
     559           0 :          CPABORT("fftw3_create_plan")
     560             :       END IF
     561             : 
     562             :       !!!! Remainder
     563           0 :       hm_n(2) = rows_per_th_r
     564             :       CALL fftw3_create_guru_plan(plan_r, 1, &
     565             :                                   dim_n, dim_istride, dim_ostride, &
     566             :                                   2, hm_n, hm_istride, hm_ostride, &
     567             :                                   input, output, &
     568           0 :                                   fft_direction, fftw_plan_type, valid)
     569           0 :       IF (.NOT. valid) THEN
     570           0 :          CPABORT("fftw3_create_plan (remaining)")
     571             :       END IF
     572             : 
     573           0 :    END SUBROUTINE
     574             : 
     575             : ! **************************************************************************************************
     576             : 
     577             : ! **************************************************************************************************
     578             : !> \brief ...
     579             : !> \param plan ...
     580             : !> \param zin ...
     581             : !> \param zout ...
     582             : !> \param plan_style ...
     583             : ! **************************************************************************************************
     584       37492 :    SUBROUTINE fftw3_create_plan_3d(plan, zin, zout, plan_style)
     585             : 
     586             :       TYPE(fft_plan_type), INTENT(INOUT)              :: plan
     587             :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT)      :: zin
     588             :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT)      :: zout
     589             :       INTEGER                                            :: plan_style
     590             : #if defined ( __FFTW3 )
     591             :       INTEGER                                            :: n1, n2, n3
     592             :       INTEGER                                            :: nt
     593             :       INTEGER                                            :: rows_per_th
     594             :       INTEGER                                            :: rows_per_th_r
     595             :       INTEGER                                            :: fft_direction
     596             :       INTEGER                                            :: th_planA, th_planB
     597       37492 :       COMPLEX(KIND=dp), ALLOCATABLE                      :: tmp(:)
     598             : 
     599             :       ! GURU Interface
     600             :       INTEGER :: dim_n(2), dim_istride(2), dim_ostride(2), &
     601             :                  howmany_n(2), howmany_istride(2), howmany_ostride(2)
     602             : 
     603             : #include "fftw3.f"
     604             :       INTEGER :: fftw_plan_type
     605       74960 :       SELECT CASE (plan_style)
     606             :       CASE (1)
     607       37468 :          fftw_plan_type = FFTW_ESTIMATE
     608             :       CASE (2)
     609           8 :          fftw_plan_type = FFTW_MEASURE
     610             :       CASE (3)
     611           8 :          fftw_plan_type = FFTW_PATIENT
     612             :       CASE (4)
     613           8 :          fftw_plan_type = FFTW_EXHAUSTIVE
     614             :       CASE DEFAULT
     615       37492 :          CPABORT("fftw3_create_plan_3d")
     616             :       END SELECT
     617             : 
     618             : #if defined (__FFTW3_UNALIGNED)
     619             :       fftw_plan_type = fftw_plan_type + FFTW_UNALIGNED
     620             : #endif
     621             : 
     622       37492 :       IF (plan%fsign == +1) THEN
     623       18746 :          fft_direction = FFTW_FORWARD
     624             :       ELSE
     625       18746 :          fft_direction = FFTW_BACKWARD
     626             :       END IF
     627             : 
     628       37492 :       n1 = plan%n_3d(1)
     629       37492 :       n2 = plan%n_3d(2)
     630       37492 :       n3 = plan%n_3d(3)
     631             : 
     632       37492 :       nt = 1
     633       37492 : !$OMP PARALLEL DEFAULT(NONE) SHARED(nt)
     634             : !$OMP MASTER
     635             : !$    nt = omp_get_num_threads()
     636             : !$OMP END MASTER
     637             : !$OMP END PARALLEL
     638             : 
     639             :       IF ((fftw3_is_mkl_wrapper()) .OR. &
     640       37492 :           (.NOT. plan_style == 1) .OR. &
     641             :           (n1 < 256 .AND. n2 < 256 .AND. n3 < 256 .AND. nt == 1)) THEN
     642             :          ! If the plan type is MEASURE, PATIENT and EXHAUSTIVE or
     643             :          ! the grid size is small (and we are single-threaded) then
     644             :          ! FFTW3 does a better job than handmade optimization
     645             :          ! so plan a single 3D FFT which will execute using all the threads
     646             : 
     647       37492 :          plan%separated_plans = .FALSE.
     648       37492 : !$       CALL XFFTW_PLAN_WITH_NTHREADS(nt)
     649             : 
     650       37492 :          IF (plan%fft_in_place) THEN
     651       18746 :             CALL XFFTW_PLAN_DFT_3D(plan%fftw_plan, n1, n2, n3, zin, zin, fft_direction, fftw_plan_type)
     652             :          ELSE
     653       18746 :             CALL XFFTW_PLAN_DFT_3D(plan%fftw_plan, n1, n2, n3, zin, zout, fft_direction, fftw_plan_type)
     654             :          END IF
     655             :       ELSE
     656           0 :          ALLOCATE (tmp(n1*n2*n3))
     657             :          ! ************************* PLANS WITH TRANSPOSITIONS ****************************
     658             :          !  In the cases described above, we manually thread each stage of the 3D FFT.
     659             :          !
     660             :          !  The following plans replace the 3D FFT call by running 1D FFTW across all
     661             :          !  3 directions of the array.
     662             :          !
     663             :          !  Output of FFTW is transposed to ensure that the next round of FFTW access
     664             :          !  contiguous information.
     665             :          !
     666             :          !  Assuming the input matrix is M(n3,n2,n1), FFTW/Transp are :
     667             :          !  M(n3,n2,n1) -> fftw(x) -> M(n3,n1,n2) -> fftw(y) -> M(n1,n2,n3) -> fftw(z) -> M(n1,n2,n3)
     668             :          !  Notice that last matrix is transposed in the Z axis. A DO-loop in the execute routine
     669             :          !  will perform the final transposition. Performance evaluation showed that using an external
     670             :          !  DO loop to do the final transposition performed better than directly transposing the output.
     671             :          !  However, this might vary depending on the compiler/platform, so a potential tuning spot
     672             :          !  is to perform the final transposition within the fftw library rather than using the external loop
     673             :          !  See comments below in Z-FFT for how to transpose the output to avoid the final DO loop.
     674             :          !
     675             :          !  Doc. for the Guru interface is in http://www.fftw.org/doc/Guru-Interface.html
     676             :          !
     677             :          !  OpenMP : Work is distributed on the Z plane.
     678             :          !           All transpositions are out-of-place to facilitate multi-threading
     679             :          !
     680             :          !!!! Plan for X : M(n3,n2,n1) -> fftw(x) -> M(n3,n1,n2)
     681             :          CALL fftw3_compute_rows_per_th(n3, nt, rows_per_th, rows_per_th_r, &
     682           0 :                                         th_planA, th_planB)
     683             : 
     684           0 :          dim_n(1) = n1
     685           0 :          dim_istride(1) = 1
     686           0 :          dim_ostride(1) = n2
     687           0 :          howmany_n(1) = n2
     688           0 :          howmany_n(2) = rows_per_th
     689           0 :          howmany_istride(1) = n1
     690           0 :          howmany_istride(2) = n1*n2
     691           0 :          howmany_ostride(1) = 1
     692           0 :          howmany_ostride(2) = n1*n2
     693             :          CALL fftw3_create_3d_plans(plan%fftw_plan_nx, plan%fftw_plan_nx_r, &
     694             :                                     dim_n, dim_istride, dim_ostride, howmany_n, &
     695             :                                     howmany_istride, howmany_ostride, &
     696             :                                     zin, tmp, &
     697             :                                     fft_direction, fftw_plan_type, rows_per_th, &
     698           0 :                                     rows_per_th_r)
     699             : 
     700             :          !!!! Plan for Y : M(n3,n1,n2) -> fftw(y) -> M(n1,n2,n3)
     701             :          CALL fftw3_compute_rows_per_th(n3, nt, rows_per_th, rows_per_th_r, &
     702           0 :                                         th_planA, th_planB)
     703           0 :          dim_n(1) = n2
     704             :          dim_istride(1) = 1
     705           0 :          dim_ostride(1) = n3
     706           0 :          howmany_n(1) = n1
     707           0 :          howmany_n(2) = rows_per_th
     708           0 :          howmany_istride(1) = n2
     709             :          howmany_istride(2) = n1*n2
     710             :          !!! transposed Z axis on output
     711           0 :          howmany_ostride(1) = n2*n3
     712           0 :          howmany_ostride(2) = 1
     713             : 
     714             :          CALL fftw3_create_3d_plans(plan%fftw_plan_ny, plan%fftw_plan_ny_r, &
     715             :                                     dim_n, dim_istride, dim_ostride, &
     716             :                                     howmany_n, howmany_istride, howmany_ostride, &
     717             :                                     tmp, zin, &
     718             :                                     fft_direction, fftw_plan_type, rows_per_th, &
     719           0 :                                     rows_per_th_r)
     720             : 
     721             :          !!!! Plan for Z : M(n1,n2,n3) -> fftw(z) -> M(n1,n2,n3)
     722             :          CALL fftw3_compute_rows_per_th(n1, nt, rows_per_th, rows_per_th_r, &
     723           0 :                                         th_planA, th_planB)
     724           0 :          dim_n(1) = n3
     725             :          dim_istride(1) = 1
     726           0 :          dim_ostride(1) = 1          ! To transpose: n2*n1
     727           0 :          howmany_n(1) = n2
     728           0 :          howmany_n(2) = rows_per_th
     729           0 :          howmany_istride(1) = n3
     730           0 :          howmany_istride(2) = n2*n3
     731           0 :          howmany_ostride(1) = n3     ! To transpose: n1
     732           0 :          howmany_ostride(2) = n2*n3  ! To transpose: 1
     733             : 
     734             :          CALL fftw3_create_3d_plans(plan%fftw_plan_nz, plan%fftw_plan_nz_r, &
     735             :                                     dim_n, dim_istride, dim_ostride, &
     736             :                                     howmany_n, howmany_istride, howmany_ostride, &
     737             :                                     zin, tmp, &
     738             :                                     fft_direction, fftw_plan_type, rows_per_th, &
     739           0 :                                     rows_per_th_r)
     740             : 
     741           0 :          plan%separated_plans = .TRUE.
     742             : 
     743           0 :          DEALLOCATE (tmp)
     744             :       END IF
     745             : 
     746             : #else
     747             :       MARK_USED(plan)
     748             :       MARK_USED(plan_style)
     749             :       !MARK_USED does not work with assumed size arguments
     750             :       IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
     751             : #endif
     752             : 
     753       37492 :    END SUBROUTINE fftw3_create_plan_3d
     754             : 
     755             : ! **************************************************************************************************
     756             : 
     757             : ! **************************************************************************************************
     758             : !> \brief ...
     759             : !> \param plan ...
     760             : !> \param plan_r ...
     761             : !> \param split_dim ...
     762             : !> \param nt ...
     763             : !> \param tid ...
     764             : !> \param input ...
     765             : !> \param istride ...
     766             : !> \param output ...
     767             : !> \param ostride ...
     768             : ! **************************************************************************************************
     769           0 :    SUBROUTINE fftw3_workshare_execute_dft(plan, plan_r, split_dim, nt, tid, &
     770             :                                           input, istride, output, ostride)
     771             : 
     772             :       INTEGER, INTENT(IN)                           :: split_dim, nt, tid
     773             :       INTEGER, INTENT(IN)                           :: istride, ostride
     774             :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT) :: input, output
     775             :       TYPE(C_PTR)                                   :: plan, plan_r
     776             : #if defined (__FFTW3)
     777             :       INTEGER                                     :: i_off, o_off
     778             :       INTEGER                                     :: th_planA, th_planB
     779             :       INTEGER :: rows_per_thread, rows_per_thread_r
     780             : 
     781             :       CALL fftw3_compute_rows_per_th(split_dim, nt, rows_per_thread, &
     782             :                                      rows_per_thread_r, &
     783           0 :                                      th_planA, th_planB)
     784             : 
     785           0 :       IF (th_planB .GT. 0) THEN
     786           0 :          IF (tid .LT. th_planA) THEN
     787           0 :             i_off = (tid)*(istride*(rows_per_thread)) + 1
     788           0 :             o_off = (tid)*(ostride*(rows_per_thread)) + 1
     789           0 :             IF (rows_per_thread .GT. 0) THEN
     790             :                CALL XFFTW_EXECUTE_DFT(plan, input(i_off), &
     791           0 :                                       output(o_off))
     792             :             END IF
     793           0 :          ELSE IF ((tid - th_planA) < th_planB) THEN
     794             : 
     795             :             i_off = (th_planA)*istride*(rows_per_thread) + &
     796           0 :                     (tid - th_planA)*istride*(rows_per_thread_r) + 1
     797             :             o_off = (th_planA)*ostride*(rows_per_thread) + &
     798           0 :                     (tid - th_planA)*ostride*(rows_per_thread_r) + 1
     799             : 
     800             :             CALL XFFTW_EXECUTE_DFT(plan_r, input(i_off), &
     801           0 :                                    output(o_off))
     802             :          END IF
     803             : 
     804             :       ELSE
     805           0 :          i_off = (tid)*(istride*(rows_per_thread)) + 1
     806           0 :          o_off = (tid)*(ostride*(rows_per_thread)) + 1
     807             : 
     808             :          CALL XFFTW_EXECUTE_DFT(plan, input(i_off), &
     809           0 :                                 output(o_off))
     810             : 
     811             :       END IF
     812             : #else
     813             :       MARK_USED(plan)
     814             :       MARK_USED(plan_r)
     815             :       MARK_USED(split_dim)
     816             :       MARK_USED(nt)
     817             :       MARK_USED(tid)
     818             :       MARK_USED(istride)
     819             :       MARK_USED(ostride)
     820             :       !MARK_USED does not work with assumed size arguments
     821             :       IF (.FALSE.) THEN; DO; IF (ABS(input(1)) > ABS(output(1))) EXIT; END DO; END IF
     822             : #endif
     823             : 
     824           0 :    END SUBROUTINE
     825             : 
     826             : ! **************************************************************************************************
     827             : 
     828             : ! **************************************************************************************************
     829             : !> \brief ...
     830             : !> \param plan ...
     831             : !> \param scale ...
     832             : !> \param zin ...
     833             : !> \param zout ...
     834             : !> \param stat ...
     835             : ! **************************************************************************************************
     836      526776 :    SUBROUTINE fftw33d(plan, scale, zin, zout, stat)
     837             : 
     838             :       TYPE(fft_plan_type), INTENT(IN)                      :: plan
     839             :       REAL(KIND=dp), INTENT(IN)                            :: scale
     840             :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT), TARGET:: zin
     841             :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT), TARGET:: zout
     842             :       INTEGER, INTENT(OUT)                                 :: stat
     843             : #if defined ( __FFTW3 )
     844      526776 :       COMPLEX(KIND=dp), POINTER                            :: xout(:)
     845      526776 :       COMPLEX(KIND=dp), ALLOCATABLE                        :: tmp1(:)
     846             :       INTEGER                                              :: n1, n2, n3
     847             :       INTEGER                                              :: tid, nt
     848             :       INTEGER                                              :: i, j, k
     849             : 
     850             : !------------------------------------------------------------------------------
     851             : 
     852             : #include "fftw3.f"
     853             : 
     854      526776 :       n1 = plan%n_3d(1)
     855      526776 :       n2 = plan%n_3d(2)
     856      526776 :       n3 = plan%n_3d(3)
     857             : 
     858      526776 :       stat = 1
     859             : 
     860             :       ! We use a POINTER to the output array to avoid duplicating code
     861      526776 :       IF (plan%fft_in_place) THEN
     862      526772 :          xout => zin(:n1*n2*n3)
     863             :       ELSE
     864           4 :          xout => zout(:n1*n2*n3)
     865             :       END IF
     866             : 
     867             :       ! Either compute the full 3D FFT using a multithreaded plan
     868      526776 :       IF (.NOT. plan%separated_plans) THEN
     869      526776 :          CALL XFFTW_EXECUTE_DFT(plan%fftw_plan, zin, xout)
     870             :       ELSE
     871             :          ! Or use the 3 stage FFT scheme described in fftw3_create_plan_3d
     872           0 :          ALLOCATE (tmp1(n1*n2*n3))   ! Temporary vector used for transpositions
     873           0 :          !$OMP PARALLEL DEFAULT(NONE) PRIVATE(tid,nt,i,j,k) SHARED(zin,tmp1,n1,n2,n3,plan,xout)
     874             :          tid = 0
     875             :          nt = 1
     876             : 
     877             : !$       tid = omp_get_thread_num()
     878             : !$       nt = omp_get_num_threads()
     879             :          CALL fftw3_workshare_execute_dft(plan%fftw_plan_nx, plan%fftw_plan_nx_r, &
     880             :                                           n3, nt, tid, &
     881             :                                           zin, n1*n2, tmp1, n1*n2)
     882             : 
     883             :          !$OMP BARRIER
     884             :          CALL fftw3_workshare_execute_dft(plan%fftw_plan_ny, plan%fftw_plan_ny_r, &
     885             :                                           n3, nt, tid, &
     886             :                                           tmp1, n1*n2, xout, 1)
     887             :          !$OMP BARRIER
     888             :          CALL fftw3_workshare_execute_dft(plan%fftw_plan_nz, plan%fftw_plan_nz_r, &
     889             :                                           n1, nt, tid, &
     890             :                                           xout, n2*n3, tmp1, n2*n3)
     891             :          !$OMP BARRIER
     892             : 
     893             :          !$OMP DO COLLAPSE(3)
     894             :          DO i = 1, n1
     895             :             DO j = 1, n2
     896             :                DO k = 1, n3
     897             :                   xout((i - 1) + (j - 1)*n1 + (k - 1)*n1*n2 + 1) = &
     898             :                      tmp1((k - 1) + (j - 1)*n3 + (i - 1)*n3*n2 + 1)
     899             :                END DO
     900             :             END DO
     901             :          END DO
     902             :          !$OMP END DO
     903             : 
     904             :          !$OMP END PARALLEL
     905             :       END IF
     906             : 
     907      526776 :       IF (scale /= 1.0_dp) THEN
     908      238953 :          CALL zdscal(n1*n2*n3, scale, xout, 1)
     909             :       END IF
     910             : 
     911             : #else
     912             :       MARK_USED(plan)
     913             :       MARK_USED(scale)
     914             :       !MARK_USED does not work with assumed size arguments
     915             :       IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
     916             :       stat = 0
     917             : 
     918             : #endif
     919             : 
     920      526776 :    END SUBROUTINE fftw33d
     921             : 
     922             : ! **************************************************************************************************
     923             : 
     924             : ! **************************************************************************************************
     925             : !> \brief ...
     926             : !> \param plan ...
     927             : !> \param zin ...
     928             : !> \param zout ...
     929             : !> \param plan_style ...
     930             : ! **************************************************************************************************
     931      124984 :    SUBROUTINE fftw3_create_plan_1dm(plan, zin, zout, plan_style)
     932             : 
     933             :       IMPLICIT NONE
     934             : 
     935             :       TYPE(fft_plan_type), INTENT(INOUT)              :: plan
     936             :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(IN)         :: zin
     937             :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(IN)         :: zout
     938             :       INTEGER, INTENT(IN)                                :: plan_style
     939             : #if defined ( __FFTW3 )
     940             :       INTEGER                                            :: ii, di, io, DO, num_threads, num_rows
     941             : 
     942             : #include "fftw3.f"
     943             :       INTEGER :: fftw_plan_type
     944      249860 :       SELECT CASE (plan_style)
     945             :       CASE (1)
     946      124876 :          fftw_plan_type = FFTW_ESTIMATE
     947             :       CASE (2)
     948          48 :          fftw_plan_type = FFTW_MEASURE
     949             :       CASE (3)
     950          48 :          fftw_plan_type = FFTW_PATIENT
     951             :       CASE (4)
     952          12 :          fftw_plan_type = FFTW_EXHAUSTIVE
     953             :       CASE DEFAULT
     954      124984 :          CPABORT("fftw3_create_plan_1dm")
     955             :       END SELECT
     956             : 
     957             : #if defined (__FFTW3_UNALIGNED)
     958             :       fftw_plan_type = fftw_plan_type + FFTW_UNALIGNED
     959             : #endif
     960      124984 :       num_threads = 1
     961      124984 :       plan%separated_plans = .FALSE.
     962             : !$OMP PARALLEL DEFAULT(NONE), &
     963      124984 : !$OMP          SHARED(NUM_THREADS)
     964             : !$OMP MASTER
     965             : !$    num_threads = omp_get_num_threads()
     966             : !$OMP END MASTER
     967             : !$OMP END PARALLEL
     968             : 
     969      124984 :       num_rows = plan%m/num_threads
     970      124984 : !$    plan%num_threads_needed = num_threads
     971             : 
     972             : ! Check for number of rows less than num_threads
     973      124984 : !$    IF (plan%m < num_threads) THEN
     974           0 : !$       num_rows = 1
     975           0 : !$       plan%num_threads_needed = plan%m
     976             : !$    END IF
     977             : 
     978             : ! Check for total number of rows not divisible by num_threads
     979      124984 : !$    IF (num_rows*plan%num_threads_needed .NE. plan%m) THEN
     980           0 : !$       plan%need_alt_plan = .TRUE.
     981             : !$    END IF
     982             : 
     983      124984 : !$    plan%num_rows = num_rows
     984      124984 :       ii = 1
     985      124984 :       di = plan%n
     986      124984 :       io = 1
     987      124984 :       DO = plan%n
     988      124984 :       IF (plan%fsign == +1 .AND. plan%trans) THEN
     989       62358 :          ii = plan%m
     990       62358 :          di = 1
     991       62626 :       ELSEIF (plan%fsign == -1 .AND. plan%trans) THEN
     992       62358 :          io = plan%m
     993       62358 :          DO = 1
     994             :       END IF
     995             : 
     996      124984 :       IF (plan%fsign == +1) THEN
     997             :          CALL dfftw_plan_many_dft(plan%fftw_plan, 1, plan%n, num_rows, zin, 0, ii, di, &
     998       62626 :                                   zout, 0, io, DO, FFTW_FORWARD, fftw_plan_type)
     999             :       ELSE
    1000             :          CALL dfftw_plan_many_dft(plan%fftw_plan, 1, plan%n, num_rows, zin, 0, ii, di, &
    1001       62358 :                                   zout, 0, io, DO, FFTW_BACKWARD, fftw_plan_type)
    1002             :       END IF
    1003             : 
    1004      124984 : !$    IF (plan%need_alt_plan) THEN
    1005           0 : !$       plan%alt_num_rows = plan%m - (plan%num_threads_needed - 1)*num_rows
    1006           0 : !$       IF (plan%fsign == +1) THEN
    1007             : !$          CALL dfftw_plan_many_dft(plan%alt_fftw_plan, 1, plan%n, plan%alt_num_rows, zin, 0, ii, di, &
    1008           0 : !$                                   zout, 0, io, DO, FFTW_FORWARD, fftw_plan_type)
    1009             : !$       ELSE
    1010             : !$          CALL dfftw_plan_many_dft(plan%alt_fftw_plan, 1, plan%n, plan%alt_num_rows, zin, 0, ii, di, &
    1011           0 : !$                                   zout, 0, io, DO, FFTW_BACKWARD, fftw_plan_type)
    1012             : !$       END IF
    1013             : !$    END IF
    1014             : 
    1015             : #else
    1016             :       MARK_USED(plan)
    1017             :       MARK_USED(plan_style)
    1018             :       !MARK_USED does not work with assumed size arguments
    1019             :       IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
    1020             : #endif
    1021             : 
    1022      124984 :    END SUBROUTINE fftw3_create_plan_1dm
    1023             : 
    1024             : ! **************************************************************************************************
    1025             : !> \brief ...
    1026             : !> \param plan ...
    1027             : ! **************************************************************************************************
    1028      162476 :    SUBROUTINE fftw3_destroy_plan(plan)
    1029             : 
    1030             :       TYPE(fft_plan_type), INTENT(INOUT)   :: plan
    1031             : 
    1032             : #if defined ( __FFTW3 )
    1033      162476 : !$    IF (plan%need_alt_plan) THEN
    1034           0 : !$       CALL XFFTW_DESTROY_PLAN(plan%alt_fftw_plan)
    1035             : !$    END IF
    1036             : 
    1037      162476 :       IF (.NOT. plan%separated_plans) THEN
    1038      162476 :          CALL XFFTW_DESTROY_PLAN(plan%fftw_plan)
    1039             :       ELSE
    1040             :          ! If it is a separated plan then we have to destroy
    1041             :          ! each dim plan individually
    1042           0 :          CALL XFFTW_DESTROY_PLAN(plan%fftw_plan_nx)
    1043           0 :          CALL XFFTW_DESTROY_PLAN(plan%fftw_plan_ny)
    1044           0 :          CALL XFFTW_DESTROY_PLAN(plan%fftw_plan_nz)
    1045           0 :          CALL XFFTW_DESTROY_PLAN(plan%fftw_plan_nx_r)
    1046           0 :          CALL XFFTW_DESTROY_PLAN(plan%fftw_plan_ny_r)
    1047           0 :          CALL XFFTW_DESTROY_PLAN(plan%fftw_plan_nz_r)
    1048             :       END IF
    1049             : 
    1050             : #else
    1051             :       MARK_USED(plan)
    1052             : #endif
    1053             : 
    1054      162476 :    END SUBROUTINE fftw3_destroy_plan
    1055             : 
    1056             : ! **************************************************************************************************
    1057             : !> \brief ...
    1058             : !> \param plan ...
    1059             : !> \param zin ...
    1060             : !> \param zout ...
    1061             : !> \param scale ...
    1062             : !> \param stat ...
    1063             : ! **************************************************************************************************
    1064     5907394 :    SUBROUTINE fftw31dm(plan, zin, zout, scale, stat)
    1065             :       TYPE(fft_plan_type), INTENT(IN)                    :: plan
    1066             :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(IN), TARGET :: zin
    1067             :       COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT), &
    1068             :          TARGET                                          :: zout
    1069             :       REAL(KIND=dp), INTENT(IN)                          :: scale
    1070             :       INTEGER, INTENT(OUT)                               :: stat
    1071             : 
    1072             :       INTEGER                                            :: in_offset, my_id, num_rows, out_offset, &
    1073             :                                                             scal_offset
    1074             :       TYPE(C_PTR)                                        :: fftw_plan
    1075             : 
    1076             : !------------------------------------------------------------------------------
    1077             : 
    1078     5907394 :       my_id = 0
    1079     5907394 :       num_rows = plan%m
    1080             : 
    1081             : !$OMP PARALLEL DEFAULT(NONE), &
    1082             : !$OMP          PRIVATE(my_id,num_rows,in_offset,out_offset,scal_offset,fftw_plan), &
    1083             : !$OMP          SHARED(zin,zout), &
    1084     5907394 : !$OMP          SHARED(plan,scale,stat)
    1085             : !$    my_id = omp_get_thread_num()
    1086             : 
    1087             : !$    if (my_id < plan%num_threads_needed) then
    1088             : 
    1089             :          fftw_plan = plan%fftw_plan
    1090             : 
    1091             :          in_offset = 1
    1092             :          out_offset = 1
    1093             :          scal_offset = 1
    1094             : 
    1095             : !$       in_offset = 1 + plan%num_rows*my_id*plan%n
    1096             : !$       out_offset = 1 + plan%num_rows*my_id*plan%n
    1097             : !$       IF (plan%fsign == +1 .AND. plan%trans) THEN
    1098             : !$          in_offset = 1 + plan%num_rows*my_id
    1099             : !$       ELSEIF (plan%fsign == -1 .AND. plan%trans) THEN
    1100             : !$          out_offset = 1 + plan%num_rows*my_id
    1101             : !$       END IF
    1102             : !$       scal_offset = 1 + plan%n*plan%num_rows*my_id
    1103             : !$       IF (plan%need_alt_plan .AND. my_id .EQ. plan%num_threads_needed - 1) THEN
    1104             : !$          num_rows = plan%alt_num_rows
    1105             : !$          fftw_plan = plan%alt_fftw_plan
    1106             : !$       ELSE
    1107             : !$          num_rows = plan%num_rows
    1108             : !$       END IF
    1109             : 
    1110             : #if defined ( __FFTW3 )
    1111             : !$OMP MASTER
    1112             :          stat = 1
    1113             : !$OMP END MASTER
    1114             :          CALL dfftw_execute_dft(fftw_plan, zin(in_offset:in_offset), zout(out_offset:out_offset))
    1115             : !$    end if
    1116             : ! all theads need to meet at this barrier
    1117             : !$OMP BARRIER
    1118             : !$    if (my_id < plan%num_threads_needed) then
    1119             :          IF (scale /= 1.0_dp) CALL zdscal(plan%n*num_rows, scale, zout(scal_offset:scal_offset), 1)
    1120             : !$    end if
    1121             : 
    1122             : #else
    1123             :       MARK_USED(plan)
    1124             :       MARK_USED(scale)
    1125             :       !MARK_USED does not work with assumed size arguments
    1126             :       IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
    1127             :       stat = 0
    1128             : 
    1129             : !$    else
    1130             : !$    end if
    1131             : 
    1132             : #endif
    1133             : 
    1134             : !$OMP END PARALLEL
    1135             : 
    1136     5907394 :       END SUBROUTINE fftw31dm
    1137             : 
    1138             : !     Copyright (c) 2003, 2006 Matteo Frigo
    1139             : !     Copyright (c) 2003, 2006 Massachusetts Institute of Technology
    1140             : !
    1141             : !     This program is free software; you can redistribute it and/or modify
    1142             : !     it under the terms of the GNU General Public License as published by
    1143             : !     the Free Software Foundation; either version 2 of the License, or
    1144             : !     (at your option) any later version.
    1145             : !
    1146             : !     This program is distributed in the hope that it will be useful,
    1147             : !     but WITHOUT ANY WARRANTY; without even the implied warranty of
    1148             : !     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    1149             : !     GNU General Public License for more details.
    1150             : !
    1151             : !     You should have received a copy of the GNU General Public License
    1152             : !     along with this program; if not, write to the Free Software
    1153             : !     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
    1154             : !
    1155             : !ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
    1156             : !
    1157             : !     This is an example implementation of Fortran wisdom export/import
    1158             : !     to/from a Fortran unit (file), exploiting the generic
    1159             : !     dfftw_export_wisdom/dfftw_import_wisdom functions.
    1160             : !
    1161             : !     We cannot compile this file into the FFTW library itself, lest all
    1162             : !     FFTW-calling programs be required to link to the Fortran I/O
    1163             : !     libraries.
    1164             : !
    1165             : !     adapted to become more standard Fortran 90 [2007-10] Joost VandeVondele
    1166             : !     and added some namespacing
    1167             : !
    1168             : !ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
    1169             : 
    1170             : ! **************************************************************************************************
    1171             : !> \brief ...
    1172             : !> \param c ...
    1173             : !> \param iunit ...
    1174             : ! **************************************************************************************************
    1175           0 :       SUBROUTINE fftw_write_char(c, iunit) BIND(C, name="fftw_write_char")
    1176             :       CHARACTER(KIND=C_CHAR)                             :: c
    1177             :       INTEGER(KIND=C_INT)                                :: iunit
    1178             : 
    1179           0 :          WRITE (iunit, '(a)', ADVANCE="NO") c
    1180           0 :       END SUBROUTINE
    1181             : 
    1182             : ! **************************************************************************************************
    1183             : !> \brief ...
    1184             : !> \param iunit ...
    1185             : ! **************************************************************************************************
    1186           0 :       SUBROUTINE fftw_export_wisdom_to_file(iunit)
    1187             :          INTEGER                                  :: iunit
    1188             : 
    1189             : #if defined ( __FFTW3 )
    1190           0 :          CALL dfftw_export_wisdom(fftw_write_char, iunit)
    1191             : #else
    1192             :          MARK_USED(iunit)
    1193             : #endif
    1194           0 :       END SUBROUTINE
    1195             : 
    1196             : !     Fortran 77 does not have any portable way to read an arbitrary
    1197             : !     file one character at a time [needs to wait for stream IO of F2003].
    1198             : !     The best alternative seems to be to
    1199             : !     read a whole line into a buffer, since for fftw-exported wisdom we
    1200             : !     can bound the line length.  (If the file contains longer lines,
    1201             : !     then the lines will be truncated and the wisdom import should
    1202             : !     simply fail.)  Ugh (and not thread safe).
    1203             : 
    1204             : ! **************************************************************************************************
    1205             : !> \brief ...
    1206             : !> \param ic ...
    1207             : !> \param iunit ...
    1208             : ! **************************************************************************************************
    1209           0 :       SUBROUTINE fftw_read_char(ic, iunit) BIND(C, name="fftw_read_char")
    1210             :       INTEGER(KIND=C_INT)                                :: ic, iunit
    1211             : 
    1212             :       CHARACTER(LEN=256)                                 :: buf
    1213             : 
    1214             :          SAVE buf
    1215             :          INTEGER ibuf
    1216             :          DATA ibuf/257/
    1217             :          SAVE ibuf
    1218           0 :          IF (ibuf .LT. 257) THEN
    1219           0 :             ic = ICHAR(buf(ibuf:ibuf))
    1220           0 :             ibuf = ibuf + 1
    1221           0 :             RETURN
    1222             :          END IF
    1223           0 :          READ (iunit, 123, END=666) buf
    1224           0 :          ic = ICHAR(buf(1:1))
    1225           0 :          ibuf = 2
    1226           0 :          RETURN
    1227           0 : 666      ic = -1
    1228           0 :          ibuf = 257
    1229             : 123      FORMAT(a256)
    1230             :       END SUBROUTINE
    1231             : 
    1232             : ! **************************************************************************************************
    1233             : !> \brief ...
    1234             : !> \param isuccess ...
    1235             : !> \param iunit ...
    1236             : ! **************************************************************************************************
    1237           0 :       SUBROUTINE fftw_import_wisdom_from_file(isuccess, iunit)
    1238             :       INTEGER                                            :: isuccess, iunit
    1239             : 
    1240           0 :          isuccess = 0
    1241             : #if defined ( __FFTW3 )
    1242           0 :          CALL dfftw_import_wisdom(isuccess, fftw_read_char, iunit)
    1243             : #else
    1244             :          MARK_USED(iunit)
    1245             : #endif
    1246           0 :       END SUBROUTINE
    1247             : 
    1248             :    END MODULE

Generated by: LCOV version 1.15