Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2023 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 : MODULE fftw3_lib
8 :
9 : USE ISO_C_BINDING, ONLY: C_ASSOCIATED, &
10 : C_CHAR, &
11 : C_DOUBLE, &
12 : C_DOUBLE_COMPLEX, &
13 : C_INT, &
14 : C_PTR
15 : #if defined(__FFTW3)
16 : USE ISO_C_BINDING, ONLY: &
17 : C_FLOAT, &
18 : C_FLOAT_COMPLEX, &
19 : C_FUNPTR, &
20 : C_INT32_T, &
21 : C_INTPTR_T, &
22 : C_LOC, &
23 : C_NULL_CHAR, &
24 : C_SIZE_T, C_F_POINTER
25 : #endif
26 : USE cp_files, ONLY: get_unit_number
27 : USE fft_kinds, ONLY: dp
28 : USE fft_plan, ONLY: fft_plan_type
29 :
30 : !$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads
31 :
32 : #include "../../base/base_uses.f90"
33 :
34 : IMPLICIT NONE
35 : PRIVATE
36 :
37 : PUBLIC :: fftw3_do_init, fftw3_do_cleanup, fftw3_get_lengths, fftw33d, fftw31dm
38 : PUBLIC :: fftw3_destroy_plan, fftw3_create_plan_1dm, fftw3_create_plan_3d
39 : PUBLIC :: fftw_alloc, fftw_dealloc
40 :
41 : #if defined ( __FFTW3 )
42 : #if defined ( __FFTW3_MKL )
43 : #include "fftw/fftw3.f03"
44 : #else
45 : #include "fftw3.f03"
46 : #endif
47 : #endif
48 :
49 : INTERFACE fftw_alloc
50 : MODULE PROCEDURE :: fftw_alloc_complex_1d
51 : MODULE PROCEDURE :: fftw_alloc_complex_2d
52 : MODULE PROCEDURE :: fftw_alloc_complex_3d
53 : END INTERFACE
54 :
55 : INTERFACE fftw_dealloc
56 : MODULE PROCEDURE :: fftw_dealloc_complex_1d
57 : MODULE PROCEDURE :: fftw_dealloc_complex_2d
58 : MODULE PROCEDURE :: fftw_dealloc_complex_3d
59 : END INTERFACE
60 :
61 : CONTAINS
62 :
63 : #:set maxdim = 3
64 : #:for dim in range(1, maxdim+1)
65 : ! Concatenate the components of the dimensions passed to this function to use it if FFTW3 is not used
66 : #:set dim_extended = ", ".join(["n("+str(i)+")" for i in range(1, dim+1)])
67 95218 : SUBROUTINE fftw_alloc_complex_${dim}$d(array, n)
68 : COMPLEX(C_DOUBLE_COMPLEX), DIMENSION(:${", :"*(dim-1)}$), CONTIGUOUS, POINTER, INTENT(OUT) :: array
69 : INTEGER, DIMENSION(${dim}$), INTENT(IN) :: n
70 :
71 : #if defined(__FFTW3)
72 : TYPE(C_PTR) :: data_ptr
73 331240 : data_ptr = fftw_alloc_complex(INT(PRODUCT(n), KIND=C_SIZE_T))
74 331240 : CALL C_F_POINTER(data_ptr, array, n)
75 : #else
76 : ! Just allocate the array
77 : ALLOCATE (array(${dim_extended}$))
78 : #endif
79 :
80 95218 : END SUBROUTINE fftw_alloc_complex_${dim}$d
81 :
82 95218 : SUBROUTINE fftw_dealloc_complex_${dim}$d(array)
83 : COMPLEX(C_DOUBLE_COMPLEX), DIMENSION(:${", :"*(dim-1)}$), CONTIGUOUS, POINTER, INTENT(INOUT) :: array
84 :
85 : #if defined(__FFTW3)
86 95218 : CALL fftw_free(C_LOC(array))
87 95218 : NULLIFY (array)
88 : #else
89 : ! Just deallocate the array
90 : DEALLOCATE (array)
91 : #endif
92 :
93 95218 : END SUBROUTINE fftw_dealloc_complex_${dim}$d
94 : #:endfor
95 :
96 : #if defined ( __FFTW3 )
97 : ! **************************************************************************************************
98 : !> \brief A workaround that allows us to compile with -Werror=unused-parameter
99 : ! **************************************************************************************************
100 0 : SUBROUTINE dummy_routine_to_call_mark_used()
101 : MARK_USED(FFTW_R2HC)
102 : MARK_USED(FFTW_HC2R)
103 : MARK_USED(FFTW_DHT)
104 : MARK_USED(FFTW_REDFT00)
105 : MARK_USED(FFTW_REDFT01)
106 : MARK_USED(FFTW_REDFT10)
107 : MARK_USED(FFTW_REDFT11)
108 : MARK_USED(FFTW_RODFT00)
109 : MARK_USED(FFTW_RODFT01)
110 : MARK_USED(FFTW_RODFT10)
111 : MARK_USED(FFTW_RODFT11)
112 : MARK_USED(FFTW_FORWARD)
113 : MARK_USED(FFTW_BACKWARD)
114 : MARK_USED(FFTW_MEASURE)
115 : MARK_USED(FFTW_DESTROY_INPUT)
116 : MARK_USED(FFTW_UNALIGNED)
117 : MARK_USED(FFTW_CONSERVE_MEMORY)
118 : MARK_USED(FFTW_EXHAUSTIVE)
119 : MARK_USED(FFTW_PRESERVE_INPUT)
120 : MARK_USED(FFTW_PATIENT)
121 : MARK_USED(FFTW_ESTIMATE)
122 : MARK_USED(FFTW_WISDOM_ONLY)
123 : MARK_USED(FFTW_ESTIMATE_PATIENT)
124 : MARK_USED(FFTW_BELIEVE_PCOST)
125 : MARK_USED(FFTW_NO_DFT_R2HC)
126 : MARK_USED(FFTW_NO_NONTHREADED)
127 : MARK_USED(FFTW_NO_BUFFERING)
128 : MARK_USED(FFTW_NO_INDIRECT_OP)
129 : MARK_USED(FFTW_ALLOW_LARGE_GENERIC)
130 : MARK_USED(FFTW_NO_RANK_SPLITS)
131 : MARK_USED(FFTW_NO_VRANK_SPLITS)
132 : MARK_USED(FFTW_NO_VRECURSE)
133 : MARK_USED(FFTW_NO_SIMD)
134 : MARK_USED(FFTW_NO_SLOW)
135 : MARK_USED(FFTW_NO_FIXED_RADIX_LARGE_N)
136 : MARK_USED(FFTW_ALLOW_PRUNING)
137 0 : END SUBROUTINE
138 : #endif
139 :
140 : ! **************************************************************************************************
141 : !> \brief ...
142 : !> \param wisdom_file ...
143 : !> \param ionode ...
144 : ! **************************************************************************************************
145 8741 : SUBROUTINE fftw3_do_cleanup(wisdom_file, ionode)
146 :
147 : CHARACTER(LEN=*), INTENT(IN) :: wisdom_file
148 : LOGICAL :: ionode
149 :
150 : #if defined ( __FFTW3 )
151 : INTEGER :: iunit, istat
152 : INTEGER(KIND=C_INT) :: isuccess
153 : ! Write out FFTW3 wisdom to file (if we can)
154 : ! only the ionode updates the wisdom
155 8741 : IF (ionode) THEN
156 4474 : iunit = get_unit_number()
157 4474 : OPEN (UNIT=iunit, FILE=wisdom_file, STATUS="UNKNOWN", FORM="FORMATTED", ACTION="WRITE", IOSTAT=istat)
158 4474 : IF (istat == 0) THEN
159 0 : isuccess = fftw_export_wisdom_to_filename(wisdom_file//C_NULL_CHAR)
160 0 : IF (isuccess == 0) &
161 0 : CPABORT("Error exporting wisdom to file "//wisdom_file)
162 0 : CLOSE (iunit)
163 : END IF
164 : END IF
165 :
166 8741 : CALL fftw_cleanup()
167 : #else
168 : MARK_USED(wisdom_file)
169 : MARK_USED(ionode)
170 : #endif
171 :
172 8741 : END SUBROUTINE
173 :
174 : ! **************************************************************************************************
175 : !> \brief ...
176 : !> \param wisdom_file ...
177 : ! **************************************************************************************************
178 8951 : SUBROUTINE fftw3_do_init(wisdom_file)
179 :
180 : CHARACTER(LEN=*), INTENT(IN) :: wisdom_file
181 :
182 : #if defined ( __FFTW3 )
183 : INTEGER :: istat, iunit
184 : INTEGER(KIND=C_INT) :: isuccess
185 : LOGICAL :: exist
186 :
187 : #if defined(__MKL)
188 : !$ LOGICAL :: mkl_is_safe
189 : #endif
190 :
191 : ! If using the Intel compiler then we need to declare
192 : ! a C interface to a global variable in MKL that sets
193 : ! the number of threads which can concurrently execute
194 : ! an FFT
195 : ! We need __INTEL_COMPILER so we can be sure that the compiler
196 : ! understands the !DEC$ version definitions
197 : #if defined (__INTEL_COMPILER) && defined (__MKL)
198 : !$ include "mkl.fi"
199 : !DEC$ IF DEFINED (INTEL_MKL_VERSION)
200 : !DEC$ IF INTEL_MKL_VERSION .EQ. 110100
201 : !DIR$ ATTRIBUTES ALIGN : 8 :: fftw3_mkl
202 : !$ COMMON/fftw3_mkl/ignore(4), mkl_dft_number_of_user_threads, ignore2(7)
203 : !$ INTEGER*4 :: ignore, mkl_dft_number_of_user_threads, ignore2
204 : !$ BIND(c) :: /fftw3_mkl/
205 : !DEC$ ENDIF
206 : !DEC$ ENDIF
207 : #elif defined (__MKL)
208 : ! Preprocessing is enabled by default, and below header is not language specific
209 : #include <mkl_version.h>
210 : #endif
211 :
212 : ! Read FFTW wisdom (if available)
213 : ! all nodes are opening the file here...
214 8951 : INQUIRE (FILE=wisdom_file, exist=exist)
215 8951 : IF (exist) THEN
216 0 : iunit = get_unit_number()
217 : OPEN (UNIT=iunit, FILE=wisdom_file, STATUS="OLD", FORM="FORMATTED", POSITION="REWIND", &
218 0 : ACTION="READ", IOSTAT=istat)
219 0 : IF (istat == 0) THEN
220 0 : isuccess = fftw_import_wisdom_from_filename(wisdom_file//C_NULL_CHAR)
221 0 : IF (isuccess == 0) &
222 0 : CPABORT("Error importing wisdom from file "//wisdom_file)
223 : ! write(*,*) "FFTW3 import wisdom from file ....",MERGE((/"OK "/),(/"NOT OK"/),(/isuccess==1/))
224 0 : CLOSE (iunit)
225 : END IF
226 : END IF
227 :
228 : #if defined (__MKL)
229 : ! Now check if we have a real FFTW3 library, or are using MKL wrappers
230 :
231 : !$ IF (fftw3_is_mkl_wrapper() .and. omp_get_max_threads() .gt. 1) THEN
232 : ! If we are not using the Intel compiler, there is no way to tell which
233 : ! MKL version is in use, so fail safe...
234 : !$ mkl_is_safe = .FALSE.
235 : #if defined(INTEL_MKL_VERSION) && (110100 < INTEL_MKL_VERSION)
236 : !$ mkl_is_safe = .TRUE.
237 : #elif defined (__INTEL_COMPILER)
238 : ! If we have an Intel compiler (__INTEL_COMPILER is defined) then check the
239 : ! MKL version and make the appropriate action
240 : !DEC$ IF DEFINED (INTEL_MKL_VERSION)
241 : !DEC$ IF INTEL_MKL_VERSION .EQ. 110100
242 : !$ mkl_dft_number_of_user_threads = omp_get_max_threads()
243 : !DEC$ ENDIF
244 : !DEC$ IF INTEL_MKL_VERSION .GE. 110100
245 : !$ mkl_is_safe = .TRUE.
246 : !DEC$ ENDIF
247 : !DEC$ ENDIF
248 : #endif
249 : !$ IF (.NOT. mkl_is_safe) THEN
250 : !$ STOP "Intel's FFTW3 interface to MKL is not "// &
251 : !$ "thread-safe prior to MKL 11.1.0! Please "// &
252 : !$ "rebuild CP2K, linking against FFTW 3 from "// &
253 : !$ "www.fftw.org or a newer version of MKL. "// &
254 : !$ "Now exiting..."
255 : !$ END IF
256 : !$ END IF
257 : #endif
258 : #else
259 : MARK_USED(wisdom_file)
260 : #endif
261 :
262 8951 : END SUBROUTINE
263 :
264 : ! **************************************************************************************************
265 : !> \brief ...
266 : !> \param DATA ...
267 : !> \param max_length ...
268 : !> \par History
269 : !> JGH 23-Jan-2006 : initial version
270 : !> Adapted for new interface
271 : !> IAB 09-Jan-2009 : Modified to cache plans in fft_plan_type
272 : !> (c) The Numerical Algorithms Group (NAG) Ltd, 2009 on behalf of the HECToR project
273 : !> IAB 09-Oct-2009 : Added OpenMP directives to 1D FFT, and planning routines
274 : !> (c) The Numerical Algorithms Group (NAG) Ltd, 2009 on behalf of the HECToR project
275 : !> IAB 11-Sep-2012 : OpenMP parallel 3D FFT (Ruyman Reyes, PRACE)
276 : !> \author JGH
277 : ! **************************************************************************************************
278 504 : SUBROUTINE fftw3_get_lengths(DATA, max_length)
279 :
280 : INTEGER, DIMENSION(*) :: DATA
281 : INTEGER, INTENT(INOUT) :: max_length
282 :
283 : INTEGER :: h, i, j, k, m, maxn, maxn_elevens, &
284 : maxn_fives, maxn_sevens, &
285 : maxn_thirteens, maxn_threes, &
286 : maxn_twos, ndata, nmax, number
287 504 : INTEGER, ALLOCATABLE, DIMENSION(:) :: dlocal, idx
288 :
289 : !------------------------------------------------------------------------------
290 : ! compute ndata
291 : !! FFTW can do arbitrary(?) lengths, maybe you want to limit them to some
292 : !! powers of small prime numbers though...
293 :
294 504 : maxn_twos = 15
295 504 : maxn_threes = 3
296 504 : maxn_fives = 2
297 504 : maxn_sevens = 1
298 504 : maxn_elevens = 1
299 504 : maxn_thirteens = 0
300 504 : maxn = 37748736
301 :
302 504 : ndata = 0
303 8568 : DO h = 0, maxn_twos
304 8064 : nmax = HUGE(0)/2**h
305 40824 : DO i = 0, maxn_threes
306 137088 : DO j = 0, maxn_fives
307 322560 : DO k = 0, maxn_sevens
308 677376 : DO m = 0, maxn_elevens
309 387072 : number = (3**i)*(5**j)*(7**k)*(11**m)
310 :
311 387072 : IF (number > nmax) CYCLE
312 :
313 387072 : number = number*2**h
314 387072 : IF (number >= maxn) CYCLE
315 :
316 580608 : ndata = ndata + 1
317 : END DO
318 : END DO
319 : END DO
320 : END DO
321 : END DO
322 :
323 2520 : ALLOCATE (dlocal(ndata), idx(ndata))
324 :
325 504 : ndata = 0
326 371448 : dlocal(:) = 0
327 8568 : DO h = 0, maxn_twos
328 8064 : nmax = HUGE(0)/2**h
329 40824 : DO i = 0, maxn_threes
330 137088 : DO j = 0, maxn_fives
331 322560 : DO k = 0, maxn_sevens
332 677376 : DO m = 0, maxn_elevens
333 387072 : number = (3**i)*(5**j)*(7**k)*(11**m)
334 :
335 387072 : IF (number > nmax) CYCLE
336 :
337 387072 : number = number*2**h
338 387072 : IF (number >= maxn) CYCLE
339 :
340 370944 : ndata = ndata + 1
341 580608 : dlocal(ndata) = number
342 : END DO
343 : END DO
344 : END DO
345 : END DO
346 : END DO
347 :
348 504 : CALL sortint(dlocal, ndata, idx)
349 504 : ndata = MIN(ndata, max_length)
350 371448 : DATA(1:ndata) = dlocal(1:ndata)
351 504 : max_length = ndata
352 :
353 504 : DEALLOCATE (dlocal, idx)
354 :
355 504 : END SUBROUTINE fftw3_get_lengths
356 :
357 : ! **************************************************************************************************
358 : !> \brief ...
359 : !> \param iarr ...
360 : !> \param n ...
361 : !> \param index ...
362 : ! **************************************************************************************************
363 504 : SUBROUTINE sortint(iarr, n, index)
364 :
365 : INTEGER, INTENT(IN) :: n
366 : INTEGER, INTENT(INOUT) :: iarr(1:n)
367 : INTEGER, INTENT(OUT) :: INDEX(1:n)
368 :
369 : INTEGER, PARAMETER :: m = 7, nstack = 50
370 :
371 : INTEGER :: a, i, ib, ir, istack(1:nstack), itemp, &
372 : j, jstack, k, l, temp
373 :
374 : !------------------------------------------------------------------------------
375 :
376 371448 : DO i = 1, n
377 371448 : INDEX(i) = i
378 : END DO
379 : jstack = 0
380 : l = 1
381 : ir = n
382 : DO WHILE (.TRUE.)
383 139608 : IF (ir - l < m) THEN
384 301392 : DO j = l + 1, ir
385 231336 : a = iarr(j)
386 231336 : ib = INDEX(j)
387 549360 : DO i = j - 1, 0, -1
388 549360 : IF (i == 0) EXIT
389 548352 : IF (iarr(i) <= a) EXIT
390 318024 : iarr(i + 1) = iarr(i)
391 549360 : INDEX(i + 1) = INDEX(i)
392 : END DO
393 231336 : iarr(i + 1) = a
394 301392 : INDEX(i + 1) = ib
395 : END DO
396 70056 : IF (jstack == 0) RETURN
397 69552 : ir = istack(jstack)
398 69552 : l = istack(jstack - 1)
399 69552 : jstack = jstack - 2
400 : ELSE
401 69552 : k = (l + ir)/2
402 69552 : temp = iarr(k)
403 69552 : iarr(k) = iarr(l + 1)
404 69552 : iarr(l + 1) = temp
405 69552 : itemp = INDEX(k)
406 69552 : INDEX(k) = INDEX(l + 1)
407 69552 : INDEX(l + 1) = itemp
408 69552 : IF (iarr(l + 1) > iarr(ir)) THEN
409 32760 : temp = iarr(l + 1)
410 32760 : iarr(l + 1) = iarr(ir)
411 32760 : iarr(ir) = temp
412 32760 : itemp = INDEX(l + 1)
413 32760 : INDEX(l + 1) = INDEX(ir)
414 32760 : INDEX(ir) = itemp
415 : END IF
416 69552 : IF (iarr(l) > iarr(ir)) THEN
417 26208 : temp = iarr(l)
418 26208 : iarr(l) = iarr(ir)
419 26208 : iarr(ir) = temp
420 26208 : itemp = INDEX(l)
421 26208 : INDEX(l) = INDEX(ir)
422 26208 : INDEX(ir) = itemp
423 : END IF
424 69552 : IF (iarr(l + 1) > iarr(l)) THEN
425 26208 : temp = iarr(l + 1)
426 26208 : iarr(l + 1) = iarr(l)
427 26208 : iarr(l) = temp
428 26208 : itemp = INDEX(l + 1)
429 26208 : INDEX(l + 1) = INDEX(l)
430 26208 : INDEX(l) = itemp
431 : END IF
432 69552 : i = l + 1
433 69552 : j = ir
434 69552 : a = iarr(l)
435 69552 : ib = INDEX(l)
436 443016 : DO WHILE (.TRUE.)
437 512568 : i = i + 1
438 1663704 : DO WHILE (iarr(i) < a)
439 1151136 : i = i + 1
440 : END DO
441 512568 : j = j - 1
442 1270080 : DO WHILE (iarr(j) > a)
443 757512 : j = j - 1
444 : END DO
445 512568 : IF (j < i) EXIT
446 443016 : temp = iarr(i)
447 443016 : iarr(i) = iarr(j)
448 443016 : iarr(j) = temp
449 443016 : itemp = INDEX(i)
450 443016 : INDEX(i) = INDEX(j)
451 443016 : INDEX(j) = itemp
452 : END DO
453 69552 : iarr(l) = iarr(j)
454 69552 : iarr(j) = a
455 69552 : INDEX(l) = INDEX(j)
456 69552 : INDEX(j) = ib
457 69552 : jstack = jstack + 2
458 69552 : IF (jstack > nstack) CPABORT(" Nstack too small in sortr")
459 69552 : IF (ir - i + 1 >= j - l) THEN
460 35784 : istack(jstack) = ir
461 35784 : istack(jstack - 1) = i
462 35784 : ir = j - 1
463 : ELSE
464 33768 : istack(jstack) = j - 1
465 33768 : istack(jstack - 1) = l
466 33768 : l = i
467 : END IF
468 : END IF
469 :
470 : END DO
471 :
472 : END SUBROUTINE sortint
473 :
474 : ! **************************************************************************************************
475 :
476 : ! **************************************************************************************************
477 : !> \brief ...
478 : !> \param plan ...
479 : !> \param fft_rank ...
480 : !> \param dim_n ...
481 : !> \param dim_istride ...
482 : !> \param dim_ostride ...
483 : !> \param hm_rank ...
484 : !> \param hm_n ...
485 : !> \param hm_istride ...
486 : !> \param hm_ostride ...
487 : !> \param zin ...
488 : !> \param zout ...
489 : !> \param fft_direction ...
490 : !> \param fftw_plan_type ...
491 : !> \param valid ...
492 : ! **************************************************************************************************
493 40684 : SUBROUTINE fftw3_create_guru_plan(plan, fft_rank, dim_n, &
494 : dim_istride, dim_ostride, hm_rank, &
495 : hm_n, hm_istride, hm_ostride, &
496 : zin, zout, fft_direction, fftw_plan_type, &
497 : valid)
498 :
499 : IMPLICIT NONE
500 :
501 : TYPE(C_PTR), INTENT(INOUT) :: plan
502 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT) :: zin, zout
503 : INTEGER, INTENT(IN) :: dim_n(2), dim_istride(2), dim_ostride(2), &
504 : hm_n(2), hm_istride(2), hm_ostride(2), fft_rank, &
505 : fft_direction, fftw_plan_type, hm_rank
506 : LOGICAL, INTENT(OUT) :: valid
507 :
508 : #if defined (__FFTW3)
509 : TYPE(fftw_iodim) :: dim(2), hm(2)
510 : INTEGER :: i
511 :
512 122052 : DO i = 1, 2
513 81368 : DIM(i) = fftw_iodim(dim_n(i), dim_istride(i), dim_ostride(i))
514 122052 : hm(i) = fftw_iodim(hm_n(i), hm_istride(i), hm_ostride(i))
515 : END DO
516 :
517 : plan = fftw_plan_guru_dft(fft_rank, &
518 : dim, hm_rank, hm, &
519 : zin, zout, &
520 40684 : fft_direction, fftw_plan_type)
521 :
522 40684 : valid = C_ASSOCIATED(plan)
523 :
524 : #else
525 : MARK_USED(plan)
526 : MARK_USED(fft_rank)
527 : MARK_USED(dim_n)
528 : MARK_USED(dim_istride)
529 : MARK_USED(dim_ostride)
530 : MARK_USED(hm_rank)
531 : MARK_USED(hm_n)
532 : MARK_USED(hm_istride)
533 : MARK_USED(hm_ostride)
534 : MARK_USED(fft_direction)
535 : MARK_USED(fftw_plan_type)
536 : !MARK_USED does not work with assumed size arguments
537 : IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
538 : valid = .FALSE.
539 :
540 : #endif
541 :
542 40684 : END SUBROUTINE
543 :
544 : ! **************************************************************************************************
545 :
546 : ! **************************************************************************************************
547 : !> \brief ...
548 : !> \return ...
549 : ! **************************************************************************************************
550 40684 : FUNCTION fftw3_is_mkl_wrapper() RESULT(is_mkl)
551 :
552 : IMPLICIT NONE
553 :
554 : LOGICAL :: is_mkl
555 : #if defined ( __FFTW3 )
556 : LOGICAL :: guru_supported
557 : INTEGER :: dim_n(2), dim_istride(2), dim_ostride(2), &
558 : howmany_n(2), howmany_istride(2), howmany_ostride(2)
559 : TYPE(C_PTR) :: test_plan
560 : COMPLEX(KIND=dp), DIMENSION(1, 1, 1) :: zin
561 :
562 : ! Attempt to create a plan with the guru interface for a 2d sub-space
563 : ! If this fails (e.g. for MKL's FFTW3 interface), fall back to the
564 : ! FFTW3 threaded 3D transform instead of the hand-optimised version
565 40684 : dim_n(1) = 1
566 40684 : dim_istride(1) = 1
567 40684 : dim_ostride(1) = 1
568 40684 : howmany_n(1) = 1
569 40684 : howmany_n(2) = 1
570 40684 : howmany_istride(1) = 1
571 40684 : howmany_istride(2) = 1
572 40684 : howmany_ostride(1) = 1
573 40684 : howmany_ostride(2) = 1
574 40684 : zin = CMPLX(0.0_dp, 0.0_dp, KIND=dp)
575 : CALL fftw3_create_guru_plan(test_plan, 1, &
576 : dim_n, dim_istride, dim_ostride, &
577 : 2, howmany_n, howmany_istride, howmany_ostride, &
578 : zin, zin, &
579 40684 : FFTW_FORWARD, FFTW_ESTIMATE, guru_supported)
580 40684 : IF (guru_supported) THEN
581 40684 : CALL fftw_destroy_plan(test_plan)
582 40684 : is_mkl = .FALSE.
583 : ELSE
584 : is_mkl = .TRUE.
585 : END IF
586 :
587 : #else
588 : is_mkl = .FALSE.
589 : #endif
590 :
591 40684 : END FUNCTION
592 :
593 : ! **************************************************************************************************
594 :
595 : ! **************************************************************************************************
596 : !> \brief ...
597 : !> \param nrows ...
598 : !> \param nt ...
599 : !> \param rows_per_thread ...
600 : !> \param rows_per_thread_r ...
601 : !> \param th_planA ...
602 : !> \param th_planB ...
603 : ! **************************************************************************************************
604 0 : SUBROUTINE fftw3_compute_rows_per_th(nrows, nt, rows_per_thread, rows_per_thread_r, &
605 : th_planA, th_planB)
606 :
607 : INTEGER, INTENT(IN) :: nrows, nt
608 : INTEGER, INTENT(OUT) :: rows_per_thread, rows_per_thread_r, &
609 : th_planA, th_planB
610 :
611 0 : IF (MOD(nrows, nt) .EQ. 0) THEN
612 0 : rows_per_thread = nrows/nt
613 0 : rows_per_thread_r = 0
614 0 : th_planA = nt
615 0 : th_planB = 0
616 : ELSE
617 0 : rows_per_thread = nrows/nt + 1
618 0 : rows_per_thread_r = nrows/nt
619 0 : th_planA = MOD(nrows, nt)
620 0 : th_planB = nt - th_planA
621 : END IF
622 :
623 0 : END SUBROUTINE
624 :
625 : ! **************************************************************************************************
626 :
627 : ! **************************************************************************************************
628 : !> \brief ...
629 : !> \param plan ...
630 : !> \param plan_r ...
631 : !> \param dim_n ...
632 : !> \param dim_istride ...
633 : !> \param dim_ostride ...
634 : !> \param hm_n ...
635 : !> \param hm_istride ...
636 : !> \param hm_ostride ...
637 : !> \param input ...
638 : !> \param output ...
639 : !> \param fft_direction ...
640 : !> \param fftw_plan_type ...
641 : !> \param rows_per_th ...
642 : !> \param rows_per_th_r ...
643 : ! **************************************************************************************************
644 0 : SUBROUTINE fftw3_create_3d_plans(plan, plan_r, dim_n, dim_istride, dim_ostride, &
645 : hm_n, hm_istride, hm_ostride, &
646 : input, output, &
647 : fft_direction, fftw_plan_type, rows_per_th, &
648 : rows_per_th_r)
649 :
650 : TYPE(C_PTR), INTENT(INOUT) :: plan, plan_r
651 : INTEGER, INTENT(INOUT) :: dim_n(2), dim_istride(2), &
652 : dim_ostride(2), hm_n(2), &
653 : hm_istride(2), hm_ostride(2)
654 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT) :: input, output
655 : INTEGER, INTENT(INOUT) :: fft_direction, fftw_plan_type
656 : INTEGER, INTENT(IN) :: rows_per_th, rows_per_th_r
657 :
658 : LOGICAL :: valid
659 :
660 : ! First plans will have an additional row
661 :
662 0 : hm_n(2) = rows_per_th
663 : CALL fftw3_create_guru_plan(plan, 1, &
664 : dim_n, dim_istride, dim_ostride, &
665 : 2, hm_n, hm_istride, hm_ostride, &
666 : input, output, &
667 0 : fft_direction, fftw_plan_type, valid)
668 :
669 0 : IF (.NOT. valid) THEN
670 0 : CPABORT("fftw3_create_plan")
671 : END IF
672 :
673 : !!!! Remainder
674 0 : hm_n(2) = rows_per_th_r
675 : CALL fftw3_create_guru_plan(plan_r, 1, &
676 : dim_n, dim_istride, dim_ostride, &
677 : 2, hm_n, hm_istride, hm_ostride, &
678 : input, output, &
679 0 : fft_direction, fftw_plan_type, valid)
680 0 : IF (.NOT. valid) THEN
681 0 : CPABORT("fftw3_create_plan (remaining)")
682 : END IF
683 :
684 0 : END SUBROUTINE
685 :
686 : ! **************************************************************************************************
687 :
688 : ! **************************************************************************************************
689 : !> \brief ...
690 : !> \param plan ...
691 : !> \param zin ...
692 : !> \param zout ...
693 : !> \param plan_style ...
694 : ! **************************************************************************************************
695 40684 : SUBROUTINE fftw3_create_plan_3d(plan, zin, zout, plan_style)
696 :
697 : TYPE(fft_plan_type), INTENT(INOUT) :: plan
698 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT) :: zin
699 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT) :: zout
700 : INTEGER :: plan_style
701 : #if defined ( __FFTW3 )
702 : INTEGER :: n1, n2, n3
703 : INTEGER :: nt
704 : INTEGER :: rows_per_th
705 : INTEGER :: rows_per_th_r
706 : INTEGER :: fft_direction
707 : INTEGER :: th_planA, th_planB
708 40684 : COMPLEX(KIND=dp), ALLOCATABLE :: tmp(:)
709 :
710 : ! GURU Interface
711 : INTEGER :: dim_n(2), dim_istride(2), dim_ostride(2), &
712 : howmany_n(2), howmany_istride(2), howmany_ostride(2)
713 :
714 : INTEGER :: fftw_plan_type
715 81344 : SELECT CASE (plan_style)
716 : CASE (1)
717 40660 : fftw_plan_type = FFTW_ESTIMATE
718 : CASE (2)
719 8 : fftw_plan_type = FFTW_MEASURE
720 : CASE (3)
721 8 : fftw_plan_type = FFTW_PATIENT
722 : CASE (4)
723 8 : fftw_plan_type = FFTW_EXHAUSTIVE
724 : CASE DEFAULT
725 40684 : CPABORT("fftw3_create_plan_3d")
726 : END SELECT
727 :
728 : #if defined (__FFTW3_UNALIGNED)
729 : fftw_plan_type = fftw_plan_type + FFTW_UNALIGNED
730 : #endif
731 :
732 40684 : IF (plan%fsign == +1) THEN
733 20342 : fft_direction = FFTW_FORWARD
734 : ELSE
735 20342 : fft_direction = FFTW_BACKWARD
736 : END IF
737 :
738 40684 : n1 = plan%n_3d(1)
739 40684 : n2 = plan%n_3d(2)
740 40684 : n3 = plan%n_3d(3)
741 :
742 40684 : nt = 1
743 40684 : !$OMP PARALLEL DEFAULT(NONE) SHARED(nt)
744 : !$OMP MASTER
745 : !$ nt = omp_get_num_threads()
746 : !$OMP END MASTER
747 : !$OMP END PARALLEL
748 :
749 : IF ((fftw3_is_mkl_wrapper()) .OR. &
750 40684 : (.NOT. plan_style == 1) .OR. &
751 : (n1 < 256 .AND. n2 < 256 .AND. n3 < 256 .AND. nt == 1)) THEN
752 : ! If the plan type is MEASURE, PATIENT and EXHAUSTIVE or
753 : ! the grid size is small (and we are single-threaded) then
754 : ! FFTW3 does a better job than handmade optimization
755 : ! so plan a single 3D FFT which will execute using all the threads
756 :
757 40684 : plan%separated_plans = .FALSE.
758 40684 : !$ CALL fftw_plan_with_nthreads(nt)
759 :
760 40684 : IF (plan%fft_in_place) THEN
761 20342 : plan%fftw_plan = fftw_plan_dft_3d(n3, n2, n1, zin, zin, fft_direction, fftw_plan_type)
762 : ELSE
763 20342 : plan%fftw_plan = fftw_plan_dft_3d(n3, n2, n1, zin, zout, fft_direction, fftw_plan_type)
764 : END IF
765 : ELSE
766 0 : ALLOCATE (tmp(n1*n2*n3))
767 : ! ************************* PLANS WITH TRANSPOSITIONS ****************************
768 : ! In the cases described above, we manually thread each stage of the 3D FFT.
769 : !
770 : ! The following plans replace the 3D FFT call by running 1D FFTW across all
771 : ! 3 directions of the array.
772 : !
773 : ! Output of FFTW is transposed to ensure that the next round of FFTW access
774 : ! contiguous information.
775 : !
776 : ! Assuming the input matrix is M(n3,n2,n1), FFTW/Transp are :
777 : ! M(n3,n2,n1) -> fftw(x) -> M(n3,n1,n2) -> fftw(y) -> M(n1,n2,n3) -> fftw(z) -> M(n1,n2,n3)
778 : ! Notice that last matrix is transposed in the Z axis. A DO-loop in the execute routine
779 : ! will perform the final transposition. Performance evaluation showed that using an external
780 : ! DO loop to do the final transposition performed better than directly transposing the output.
781 : ! However, this might vary depending on the compiler/platform, so a potential tuning spot
782 : ! is to perform the final transposition within the fftw library rather than using the external loop
783 : ! See comments below in Z-FFT for how to transpose the output to avoid the final DO loop.
784 : !
785 : ! Doc. for the Guru interface is in http://www.fftw.org/doc/Guru-Interface.html
786 : !
787 : ! OpenMP : Work is distributed on the Z plane.
788 : ! All transpositions are out-of-place to facilitate multi-threading
789 : !
790 : !!!! Plan for X : M(n3,n2,n1) -> fftw(x) -> M(n3,n1,n2)
791 : CALL fftw3_compute_rows_per_th(n3, nt, rows_per_th, rows_per_th_r, &
792 0 : th_planA, th_planB)
793 :
794 0 : dim_n(1) = n1
795 0 : dim_istride(1) = 1
796 0 : dim_ostride(1) = n2
797 0 : howmany_n(1) = n2
798 0 : howmany_n(2) = rows_per_th
799 0 : howmany_istride(1) = n1
800 0 : howmany_istride(2) = n1*n2
801 0 : howmany_ostride(1) = 1
802 0 : howmany_ostride(2) = n1*n2
803 : CALL fftw3_create_3d_plans(plan%fftw_plan_nx, plan%fftw_plan_nx_r, &
804 : dim_n, dim_istride, dim_ostride, howmany_n, &
805 : howmany_istride, howmany_ostride, &
806 : zin, tmp, &
807 : fft_direction, fftw_plan_type, rows_per_th, &
808 0 : rows_per_th_r)
809 :
810 : !!!! Plan for Y : M(n3,n1,n2) -> fftw(y) -> M(n1,n2,n3)
811 : CALL fftw3_compute_rows_per_th(n3, nt, rows_per_th, rows_per_th_r, &
812 0 : th_planA, th_planB)
813 0 : dim_n(1) = n2
814 : dim_istride(1) = 1
815 0 : dim_ostride(1) = n3
816 0 : howmany_n(1) = n1
817 0 : howmany_n(2) = rows_per_th
818 0 : howmany_istride(1) = n2
819 : howmany_istride(2) = n1*n2
820 : !!! transposed Z axis on output
821 0 : howmany_ostride(1) = n2*n3
822 0 : howmany_ostride(2) = 1
823 :
824 : CALL fftw3_create_3d_plans(plan%fftw_plan_ny, plan%fftw_plan_ny_r, &
825 : dim_n, dim_istride, dim_ostride, &
826 : howmany_n, howmany_istride, howmany_ostride, &
827 : tmp, zin, &
828 : fft_direction, fftw_plan_type, rows_per_th, &
829 0 : rows_per_th_r)
830 :
831 : !!!! Plan for Z : M(n1,n2,n3) -> fftw(z) -> M(n1,n2,n3)
832 : CALL fftw3_compute_rows_per_th(n1, nt, rows_per_th, rows_per_th_r, &
833 0 : th_planA, th_planB)
834 0 : dim_n(1) = n3
835 : dim_istride(1) = 1
836 0 : dim_ostride(1) = 1 ! To transpose: n2*n1
837 0 : howmany_n(1) = n2
838 0 : howmany_n(2) = rows_per_th
839 0 : howmany_istride(1) = n3
840 0 : howmany_istride(2) = n2*n3
841 0 : howmany_ostride(1) = n3 ! To transpose: n1
842 0 : howmany_ostride(2) = n2*n3 ! To transpose: 1
843 :
844 : CALL fftw3_create_3d_plans(plan%fftw_plan_nz, plan%fftw_plan_nz_r, &
845 : dim_n, dim_istride, dim_ostride, &
846 : howmany_n, howmany_istride, howmany_ostride, &
847 : zin, tmp, &
848 : fft_direction, fftw_plan_type, rows_per_th, &
849 0 : rows_per_th_r)
850 :
851 0 : plan%separated_plans = .TRUE.
852 :
853 0 : DEALLOCATE (tmp)
854 : END IF
855 :
856 : #else
857 : MARK_USED(plan)
858 : MARK_USED(plan_style)
859 : !MARK_USED does not work with assumed size arguments
860 : IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
861 : #endif
862 :
863 40684 : END SUBROUTINE fftw3_create_plan_3d
864 :
865 : ! **************************************************************************************************
866 :
867 : ! **************************************************************************************************
868 : !> \brief ...
869 : !> \param plan ...
870 : !> \param plan_r ...
871 : !> \param split_dim ...
872 : !> \param nt ...
873 : !> \param tid ...
874 : !> \param input ...
875 : !> \param istride ...
876 : !> \param output ...
877 : !> \param ostride ...
878 : ! **************************************************************************************************
879 0 : SUBROUTINE fftw3_workshare_execute_dft(plan, plan_r, split_dim, nt, tid, &
880 : input, istride, output, ostride)
881 :
882 : INTEGER, INTENT(IN) :: split_dim, nt, tid
883 : INTEGER, INTENT(IN) :: istride, ostride
884 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT) :: input, output
885 : TYPE(C_PTR) :: plan, plan_r
886 : #if defined (__FFTW3)
887 : INTEGER :: i_off, o_off
888 : INTEGER :: th_planA, th_planB
889 : INTEGER :: rows_per_thread, rows_per_thread_r
890 :
891 : CALL fftw3_compute_rows_per_th(split_dim, nt, rows_per_thread, &
892 : rows_per_thread_r, &
893 0 : th_planA, th_planB)
894 :
895 0 : IF (th_planB .GT. 0) THEN
896 0 : IF (tid .LT. th_planA) THEN
897 0 : i_off = (tid)*(istride*(rows_per_thread)) + 1
898 0 : o_off = (tid)*(ostride*(rows_per_thread)) + 1
899 0 : IF (rows_per_thread .GT. 0) THEN
900 : CALL fftw_execute_dft(plan, input(i_off), &
901 0 : output(o_off))
902 : END IF
903 0 : ELSE IF ((tid - th_planA) < th_planB) THEN
904 :
905 : i_off = (th_planA)*istride*(rows_per_thread) + &
906 0 : (tid - th_planA)*istride*(rows_per_thread_r) + 1
907 : o_off = (th_planA)*ostride*(rows_per_thread) + &
908 0 : (tid - th_planA)*ostride*(rows_per_thread_r) + 1
909 :
910 : CALL fftw_execute_dft(plan_r, input(i_off), &
911 0 : output(o_off))
912 : END IF
913 :
914 : ELSE
915 0 : i_off = (tid)*(istride*(rows_per_thread)) + 1
916 0 : o_off = (tid)*(ostride*(rows_per_thread)) + 1
917 :
918 : CALL fftw_execute_dft(plan, input(i_off), &
919 0 : output(o_off))
920 :
921 : END IF
922 : #else
923 : MARK_USED(plan)
924 : MARK_USED(plan_r)
925 : MARK_USED(split_dim)
926 : MARK_USED(nt)
927 : MARK_USED(tid)
928 : MARK_USED(istride)
929 : MARK_USED(ostride)
930 : !MARK_USED does not work with assumed size arguments
931 : IF (.FALSE.) THEN; DO; IF (ABS(input(1)) > ABS(output(1))) EXIT; END DO; END IF
932 : #endif
933 :
934 0 : END SUBROUTINE
935 :
936 : ! **************************************************************************************************
937 :
938 : ! **************************************************************************************************
939 : !> \brief ...
940 : !> \param plan ...
941 : !> \param scale ...
942 : !> \param zin ...
943 : !> \param zout ...
944 : !> \param stat ...
945 : ! **************************************************************************************************
946 613402 : SUBROUTINE fftw33d(plan, scale, zin, zout, stat)
947 :
948 : TYPE(fft_plan_type), INTENT(IN) :: plan
949 : REAL(KIND=dp), INTENT(IN) :: scale
950 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT), TARGET:: zin
951 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT), TARGET:: zout
952 : INTEGER, INTENT(OUT) :: stat
953 : #if defined ( __FFTW3 )
954 613402 : COMPLEX(KIND=dp), POINTER :: xout(:)
955 613402 : COMPLEX(KIND=dp), ALLOCATABLE :: tmp1(:)
956 : INTEGER :: n1, n2, n3
957 : INTEGER :: tid, nt
958 : INTEGER :: i, j, k
959 :
960 613402 : n1 = plan%n_3d(1)
961 613402 : n2 = plan%n_3d(2)
962 613402 : n3 = plan%n_3d(3)
963 :
964 613402 : stat = 1
965 :
966 : ! We use a POINTER to the output array to avoid duplicating code
967 613402 : IF (plan%fft_in_place) THEN
968 613398 : xout => zin(:n1*n2*n3)
969 : ELSE
970 4 : xout => zout(:n1*n2*n3)
971 : END IF
972 :
973 : ! Either compute the full 3D FFT using a multithreaded plan
974 613402 : IF (.NOT. plan%separated_plans) THEN
975 613402 : CALL fftw_execute_dft(plan%fftw_plan, zin, xout)
976 : ELSE
977 : ! Or use the 3 stage FFT scheme described in fftw3_create_plan_3d
978 0 : ALLOCATE (tmp1(n1*n2*n3)) ! Temporary vector used for transpositions
979 0 : !$OMP PARALLEL DEFAULT(NONE) PRIVATE(tid,nt,i,j,k) SHARED(zin,tmp1,n1,n2,n3,plan,xout)
980 : tid = 0
981 : nt = 1
982 :
983 : !$ tid = omp_get_thread_num()
984 : !$ nt = omp_get_num_threads()
985 : CALL fftw3_workshare_execute_dft(plan%fftw_plan_nx, plan%fftw_plan_nx_r, &
986 : n3, nt, tid, &
987 : zin, n1*n2, tmp1, n1*n2)
988 :
989 : !$OMP BARRIER
990 : CALL fftw3_workshare_execute_dft(plan%fftw_plan_ny, plan%fftw_plan_ny_r, &
991 : n3, nt, tid, &
992 : tmp1, n1*n2, xout, 1)
993 : !$OMP BARRIER
994 : CALL fftw3_workshare_execute_dft(plan%fftw_plan_nz, plan%fftw_plan_nz_r, &
995 : n1, nt, tid, &
996 : xout, n2*n3, tmp1, n2*n3)
997 : !$OMP BARRIER
998 :
999 : !$OMP DO COLLAPSE(3)
1000 : DO i = 1, n1
1001 : DO j = 1, n2
1002 : DO k = 1, n3
1003 : xout((i - 1) + (j - 1)*n1 + (k - 1)*n1*n2 + 1) = &
1004 : tmp1((k - 1) + (j - 1)*n3 + (i - 1)*n3*n2 + 1)
1005 : END DO
1006 : END DO
1007 : END DO
1008 : !$OMP END DO
1009 :
1010 : !$OMP END PARALLEL
1011 : END IF
1012 :
1013 613402 : IF (scale /= 1.0_dp) THEN
1014 280445 : CALL zdscal(n1*n2*n3, scale, xout, 1)
1015 : END IF
1016 :
1017 : #else
1018 : MARK_USED(plan)
1019 : MARK_USED(scale)
1020 : !MARK_USED does not work with assumed size arguments
1021 : IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
1022 : stat = 0
1023 :
1024 : #endif
1025 :
1026 613402 : END SUBROUTINE fftw33d
1027 :
1028 : ! **************************************************************************************************
1029 :
1030 : ! **************************************************************************************************
1031 : !> \brief ...
1032 : !> \param plan ...
1033 : !> \param zin ...
1034 : !> \param zout ...
1035 : !> \param plan_style ...
1036 : ! **************************************************************************************************
1037 148852 : SUBROUTINE fftw3_create_plan_1dm(plan, zin, zout, plan_style)
1038 :
1039 : IMPLICIT NONE
1040 :
1041 : TYPE(fft_plan_type), INTENT(INOUT) :: plan
1042 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(IN) :: zin
1043 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(IN) :: zout
1044 : INTEGER, INTENT(IN) :: plan_style
1045 : #if defined ( __FFTW3 )
1046 : INTEGER :: ii, di, io, DO, num_threads, num_rows
1047 :
1048 : INTEGER :: fftw_plan_type
1049 297596 : SELECT CASE (plan_style)
1050 : CASE (1)
1051 148744 : fftw_plan_type = FFTW_ESTIMATE
1052 : CASE (2)
1053 48 : fftw_plan_type = FFTW_MEASURE
1054 : CASE (3)
1055 48 : fftw_plan_type = FFTW_PATIENT
1056 : CASE (4)
1057 12 : fftw_plan_type = FFTW_EXHAUSTIVE
1058 : CASE DEFAULT
1059 148852 : CPABORT("fftw3_create_plan_1dm")
1060 : END SELECT
1061 :
1062 : #if defined (__FFTW3_UNALIGNED)
1063 : fftw_plan_type = fftw_plan_type + FFTW_UNALIGNED
1064 : #endif
1065 148852 : num_threads = 1
1066 148852 : plan%separated_plans = .FALSE.
1067 : !$OMP PARALLEL DEFAULT(NONE), &
1068 148852 : !$OMP SHARED(NUM_THREADS)
1069 : !$OMP MASTER
1070 : !$ num_threads = omp_get_num_threads()
1071 : !$OMP END MASTER
1072 : !$OMP END PARALLEL
1073 :
1074 148852 : num_rows = plan%m/num_threads
1075 148852 : !$ plan%num_threads_needed = num_threads
1076 :
1077 : ! Check for number of rows less than num_threads
1078 148852 : !$ IF (plan%m < num_threads) THEN
1079 0 : !$ num_rows = 1
1080 0 : !$ plan%num_threads_needed = plan%m
1081 : !$ END IF
1082 :
1083 : ! Check for total number of rows not divisible by num_threads
1084 148852 : !$ IF (num_rows*plan%num_threads_needed .NE. plan%m) THEN
1085 0 : !$ plan%need_alt_plan = .TRUE.
1086 : !$ END IF
1087 :
1088 148852 : !$ plan%num_rows = num_rows
1089 148852 : ii = 1
1090 148852 : di = plan%n
1091 148852 : io = 1
1092 148852 : DO = plan%n
1093 148852 : IF (plan%fsign == +1 .AND. plan%trans) THEN
1094 74316 : ii = plan%m
1095 74316 : di = 1
1096 74536 : ELSEIF (plan%fsign == -1 .AND. plan%trans) THEN
1097 74316 : io = plan%m
1098 74316 : DO = 1
1099 : END IF
1100 :
1101 148852 : IF (plan%fsign == +1) THEN
1102 : CALL dfftw_plan_many_dft(plan%fftw_plan, 1, plan%n, num_rows, zin, 0, ii, di, &
1103 74536 : zout, 0, io, DO, FFTW_FORWARD, fftw_plan_type)
1104 : ELSE
1105 : CALL dfftw_plan_many_dft(plan%fftw_plan, 1, plan%n, num_rows, zin, 0, ii, di, &
1106 74316 : zout, 0, io, DO, FFTW_BACKWARD, fftw_plan_type)
1107 : END IF
1108 :
1109 148852 : !$ IF (plan%need_alt_plan) THEN
1110 0 : !$ plan%alt_num_rows = plan%m - (plan%num_threads_needed - 1)*num_rows
1111 0 : !$ IF (plan%fsign == +1) THEN
1112 : !$ CALL dfftw_plan_many_dft(plan%alt_fftw_plan, 1, plan%n, plan%alt_num_rows, zin, 0, ii, di, &
1113 0 : !$ zout, 0, io, DO, FFTW_FORWARD, fftw_plan_type)
1114 : !$ ELSE
1115 : !$ CALL dfftw_plan_many_dft(plan%alt_fftw_plan, 1, plan%n, plan%alt_num_rows, zin, 0, ii, di, &
1116 0 : !$ zout, 0, io, DO, FFTW_BACKWARD, fftw_plan_type)
1117 : !$ END IF
1118 : !$ END IF
1119 :
1120 : #else
1121 : MARK_USED(plan)
1122 : MARK_USED(plan_style)
1123 : !MARK_USED does not work with assumed size arguments
1124 : IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
1125 : #endif
1126 :
1127 148852 : END SUBROUTINE fftw3_create_plan_1dm
1128 :
1129 : ! **************************************************************************************************
1130 : !> \brief ...
1131 : !> \param plan ...
1132 : ! **************************************************************************************************
1133 189536 : SUBROUTINE fftw3_destroy_plan(plan)
1134 :
1135 : TYPE(fft_plan_type), INTENT(INOUT) :: plan
1136 :
1137 : #if defined ( __FFTW3 )
1138 189536 : !$ IF (plan%need_alt_plan) THEN
1139 0 : !$ CALL fftw_destroy_plan(plan%alt_fftw_plan)
1140 : !$ END IF
1141 :
1142 189536 : IF (.NOT. plan%separated_plans) THEN
1143 189536 : CALL fftw_destroy_plan(plan%fftw_plan)
1144 : ELSE
1145 : ! If it is a separated plan then we have to destroy
1146 : ! each dim plan individually
1147 0 : CALL fftw_destroy_plan(plan%fftw_plan_nx)
1148 0 : CALL fftw_destroy_plan(plan%fftw_plan_ny)
1149 0 : CALL fftw_destroy_plan(plan%fftw_plan_nz)
1150 0 : CALL fftw_destroy_plan(plan%fftw_plan_nx_r)
1151 0 : CALL fftw_destroy_plan(plan%fftw_plan_ny_r)
1152 0 : CALL fftw_destroy_plan(plan%fftw_plan_nz_r)
1153 : END IF
1154 :
1155 : #else
1156 : MARK_USED(plan)
1157 : #endif
1158 :
1159 189536 : END SUBROUTINE fftw3_destroy_plan
1160 :
1161 : ! **************************************************************************************************
1162 : !> \brief ...
1163 : !> \param plan ...
1164 : !> \param zin ...
1165 : !> \param zout ...
1166 : !> \param scale ...
1167 : !> \param stat ...
1168 : ! **************************************************************************************************
1169 7831384 : SUBROUTINE fftw31dm(plan, zin, zout, scale, stat)
1170 : TYPE(fft_plan_type), INTENT(IN) :: plan
1171 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(IN), TARGET :: zin
1172 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT), &
1173 : TARGET :: zout
1174 : REAL(KIND=dp), INTENT(IN) :: scale
1175 : INTEGER, INTENT(OUT) :: stat
1176 :
1177 : INTEGER :: in_offset, my_id, num_rows, out_offset, &
1178 : scal_offset
1179 : TYPE(C_PTR) :: fftw_plan
1180 :
1181 : !------------------------------------------------------------------------------
1182 :
1183 7831384 : my_id = 0
1184 7831384 : num_rows = plan%m
1185 :
1186 : !$OMP PARALLEL DEFAULT(NONE), &
1187 : !$OMP PRIVATE(my_id,num_rows,in_offset,out_offset,scal_offset,fftw_plan), &
1188 : !$OMP SHARED(zin,zout), &
1189 7831384 : !$OMP SHARED(plan,scale,stat)
1190 : !$ my_id = omp_get_thread_num()
1191 :
1192 : !$ if (my_id < plan%num_threads_needed) then
1193 :
1194 : fftw_plan = plan%fftw_plan
1195 :
1196 : in_offset = 1
1197 : out_offset = 1
1198 : scal_offset = 1
1199 :
1200 : !$ in_offset = 1 + plan%num_rows*my_id*plan%n
1201 : !$ out_offset = 1 + plan%num_rows*my_id*plan%n
1202 : !$ IF (plan%fsign == +1 .AND. plan%trans) THEN
1203 : !$ in_offset = 1 + plan%num_rows*my_id
1204 : !$ ELSEIF (plan%fsign == -1 .AND. plan%trans) THEN
1205 : !$ out_offset = 1 + plan%num_rows*my_id
1206 : !$ END IF
1207 : !$ scal_offset = 1 + plan%n*plan%num_rows*my_id
1208 : !$ IF (plan%need_alt_plan .AND. my_id .EQ. plan%num_threads_needed - 1) THEN
1209 : !$ num_rows = plan%alt_num_rows
1210 : !$ fftw_plan = plan%alt_fftw_plan
1211 : !$ ELSE
1212 : !$ num_rows = plan%num_rows
1213 : !$ END IF
1214 :
1215 : #if defined ( __FFTW3 )
1216 : !$OMP MASTER
1217 : stat = 1
1218 : !$OMP END MASTER
1219 : CALL dfftw_execute_dft(fftw_plan, zin(in_offset:in_offset), zout(out_offset:out_offset))
1220 : !$ end if
1221 : ! all theads need to meet at this barrier
1222 : !$OMP BARRIER
1223 : !$ if (my_id < plan%num_threads_needed) then
1224 : IF (scale /= 1.0_dp) CALL zdscal(plan%n*num_rows, scale, zout(scal_offset:scal_offset), 1)
1225 : !$ end if
1226 :
1227 : #else
1228 : MARK_USED(plan)
1229 : MARK_USED(scale)
1230 : !MARK_USED does not work with assumed size arguments
1231 : IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
1232 : stat = 0
1233 :
1234 : !$ else
1235 : !$ end if
1236 :
1237 : #endif
1238 :
1239 : !$OMP END PARALLEL
1240 :
1241 7831384 : END SUBROUTINE fftw31dm
1242 :
1243 0 : END MODULE
|