Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 : MODULE fftw3_lib
8 : USE ISO_C_BINDING, ONLY: C_ASSOCIATED, &
9 : C_CHAR, &
10 : C_DOUBLE, &
11 : C_DOUBLE_COMPLEX, &
12 : C_INT, &
13 : C_PTR
14 : #if defined(__FFTW3)
15 : USE ISO_C_BINDING, ONLY: &
16 : C_FLOAT, &
17 : C_FLOAT_COMPLEX, &
18 : C_FUNPTR, &
19 : C_INT32_T, &
20 : C_INTPTR_T, &
21 : C_LOC, &
22 : C_NULL_CHAR, &
23 : C_SIZE_T, C_F_POINTER
24 : USE mathconstants, ONLY: z_zero
25 : #endif
26 : USE cp_files, ONLY: get_unit_number
27 : USE fft_kinds, ONLY: dp
28 : USE fft_plan, ONLY: fft_plan_type
29 :
30 : !$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads
31 :
32 : #include "../../base/base_uses.f90"
33 :
34 : IMPLICIT NONE
35 : PRIVATE
36 :
37 : PUBLIC :: fftw3_do_init, fftw3_do_cleanup, fftw3_get_lengths, fftw33d, fftw31dm
38 : PUBLIC :: fftw3_destroy_plan, fftw3_create_plan_1dm, fftw3_create_plan_3d
39 : PUBLIC :: fftw_alloc, fftw_dealloc
40 :
41 : #if defined(__FFTW3)
42 : #include "fftw3.f03"
43 : #endif
44 :
45 : INTERFACE fftw_alloc
46 : MODULE PROCEDURE :: fftw_alloc_complex_1d
47 : MODULE PROCEDURE :: fftw_alloc_complex_2d
48 : MODULE PROCEDURE :: fftw_alloc_complex_3d
49 : END INTERFACE fftw_alloc
50 :
51 : INTERFACE fftw_dealloc
52 : MODULE PROCEDURE :: fftw_dealloc_complex_1d
53 : MODULE PROCEDURE :: fftw_dealloc_complex_2d
54 : MODULE PROCEDURE :: fftw_dealloc_complex_3d
55 : END INTERFACE fftw_dealloc
56 :
57 : CONTAINS
58 :
59 : #:set maxdim = 3
60 : #:for dim in range(1, maxdim+1)
61 : ! Concatenate the components of the dimensions passed to this function to use it if FFTW3 is not used
62 : #:set dim_extended = ", ".join(["n("+str(i)+")" for i in range(1, dim+1)])
63 110004 : SUBROUTINE fftw_alloc_complex_${dim}$d(array, n)
64 : COMPLEX(C_DOUBLE_COMPLEX), DIMENSION(:${", :"*(dim-1)}$), CONTIGUOUS, POINTER, INTENT(OUT) :: array
65 : INTEGER, DIMENSION(${dim}$), INTENT(IN) :: n
66 :
67 : #if defined(__FFTW3)
68 : TYPE(C_PTR) :: data_ptr
69 385428 : data_ptr = fftw_alloc_complex(INT(PRODUCT(n), KIND=C_SIZE_T))
70 385428 : CALL C_F_POINTER(data_ptr, array, n)
71 : #else
72 : ! Just allocate the array
73 : ALLOCATE (array(${dim_extended}$))
74 : #endif
75 :
76 110004 : END SUBROUTINE fftw_alloc_complex_${dim}$d
77 :
78 110004 : SUBROUTINE fftw_dealloc_complex_${dim}$d(array)
79 : COMPLEX(C_DOUBLE_COMPLEX), DIMENSION(:${", :"*(dim-1)}$), CONTIGUOUS, POINTER, INTENT(INOUT) :: array
80 :
81 : #if defined(__FFTW3)
82 110004 : CALL fftw_free(C_LOC(array))
83 110004 : NULLIFY (array)
84 : #else
85 : ! Just deallocate the array
86 : DEALLOCATE (array)
87 : #endif
88 :
89 110004 : END SUBROUTINE fftw_dealloc_complex_${dim}$d
90 : #:endfor
91 :
92 : #if defined(__FFTW3)
93 : ! **************************************************************************************************
94 : !> \brief A workaround that allows us to compile with -Werror=unused-parameter
95 : ! **************************************************************************************************
96 0 : SUBROUTINE dummy_routine_to_call_mark_used()
97 : MARK_USED(FFTW_R2HC)
98 : MARK_USED(FFTW_HC2R)
99 : MARK_USED(FFTW_DHT)
100 : MARK_USED(FFTW_REDFT00)
101 : MARK_USED(FFTW_REDFT01)
102 : MARK_USED(FFTW_REDFT10)
103 : MARK_USED(FFTW_REDFT11)
104 : MARK_USED(FFTW_RODFT00)
105 : MARK_USED(FFTW_RODFT01)
106 : MARK_USED(FFTW_RODFT10)
107 : MARK_USED(FFTW_RODFT11)
108 : MARK_USED(FFTW_FORWARD)
109 : MARK_USED(FFTW_BACKWARD)
110 : MARK_USED(FFTW_MEASURE)
111 : MARK_USED(FFTW_DESTROY_INPUT)
112 : MARK_USED(FFTW_UNALIGNED)
113 : MARK_USED(FFTW_CONSERVE_MEMORY)
114 : MARK_USED(FFTW_EXHAUSTIVE)
115 : MARK_USED(FFTW_PRESERVE_INPUT)
116 : MARK_USED(FFTW_PATIENT)
117 : MARK_USED(FFTW_ESTIMATE)
118 : MARK_USED(FFTW_WISDOM_ONLY)
119 : MARK_USED(FFTW_ESTIMATE_PATIENT)
120 : MARK_USED(FFTW_BELIEVE_PCOST)
121 : MARK_USED(FFTW_NO_DFT_R2HC)
122 : MARK_USED(FFTW_NO_NONTHREADED)
123 : MARK_USED(FFTW_NO_BUFFERING)
124 : MARK_USED(FFTW_NO_INDIRECT_OP)
125 : MARK_USED(FFTW_ALLOW_LARGE_GENERIC)
126 : MARK_USED(FFTW_NO_RANK_SPLITS)
127 : MARK_USED(FFTW_NO_VRANK_SPLITS)
128 : MARK_USED(FFTW_NO_VRECURSE)
129 : MARK_USED(FFTW_NO_SIMD)
130 : MARK_USED(FFTW_NO_SLOW)
131 : MARK_USED(FFTW_NO_FIXED_RADIX_LARGE_N)
132 : MARK_USED(FFTW_ALLOW_PRUNING)
133 0 : END SUBROUTINE dummy_routine_to_call_mark_used
134 : #endif
135 :
136 : ! **************************************************************************************************
137 : !> \brief ...
138 : !> \param wisdom_file ...
139 : !> \param ionode ...
140 : ! **************************************************************************************************
141 9675 : SUBROUTINE fftw3_do_cleanup(wisdom_file, ionode)
142 :
143 : CHARACTER(LEN=*), INTENT(IN) :: wisdom_file
144 : LOGICAL :: ionode
145 :
146 : #if defined(__FFTW3)
147 9675 : CHARACTER(LEN=1, KIND=C_CHAR), DIMENSION(:), ALLOCATABLE :: wisdom_file_name_c
148 : INTEGER :: file_name_length, i, iunit, istat
149 : INTEGER(KIND=C_INT) :: isuccess
150 : ! Write out FFTW3 wisdom to file (if we can)
151 : ! only the ionode updates the wisdom
152 9675 : IF (ionode) THEN
153 4941 : iunit = get_unit_number()
154 : ! Check whether the file can be opened in the necessary manner
155 4941 : OPEN (UNIT=iunit, FILE=wisdom_file, STATUS="UNKNOWN", FORM="FORMATTED", ACTION="WRITE", IOSTAT=istat)
156 4941 : IF (istat == 0) THEN
157 2 : CLOSE (iunit)
158 2 : file_name_length = LEN_TRIM(wisdom_file)
159 6 : ALLOCATE (wisdom_file_name_c(file_name_length + 1))
160 18 : DO i = 1, file_name_length
161 18 : wisdom_file_name_c(i) = wisdom_file(i:i)
162 : END DO
163 2 : wisdom_file_name_c(file_name_length + 1) = C_NULL_CHAR
164 2 : isuccess = fftw_export_wisdom_to_filename(wisdom_file_name_c)
165 2 : IF (isuccess == 0) &
166 : CALL cp_warn(__LOCATION__, "Error exporting wisdom to file "//TRIM(wisdom_file)//". "// &
167 0 : "Wisdom was not exported.")
168 : END IF
169 : END IF
170 :
171 9675 : CALL fftw_cleanup()
172 : #else
173 : MARK_USED(wisdom_file)
174 : MARK_USED(ionode)
175 : #endif
176 :
177 9675 : END SUBROUTINE fftw3_do_cleanup
178 :
179 : ! **************************************************************************************************
180 : !> \brief ...
181 : !> \param wisdom_file ...
182 : ! **************************************************************************************************
183 9885 : SUBROUTINE fftw3_do_init(wisdom_file)
184 :
185 : CHARACTER(LEN=*), INTENT(IN) :: wisdom_file
186 :
187 : #if defined(__FFTW3)
188 9885 : CHARACTER(LEN=1, KIND=C_CHAR), DIMENSION(:), ALLOCATABLE :: wisdom_file_name_c
189 : INTEGER :: file_name_length, i, istat, iunit
190 : INTEGER(KIND=C_INT) :: isuccess
191 : LOGICAL :: file_exists
192 :
193 9885 : isuccess = fftw_init_threads()
194 9885 : IF (isuccess == 0) &
195 0 : CPABORT("Error initializing FFTW with threads")
196 :
197 : ! Read FFTW wisdom (if available)
198 : ! all nodes are opening the file here...
199 9885 : INQUIRE (FILE=wisdom_file, exist=file_exists)
200 9885 : IF (file_exists) THEN
201 2 : iunit = get_unit_number()
202 2 : file_name_length = LEN_TRIM(wisdom_file)
203 : ! Check whether the file can be opened in the necessary manner
204 : OPEN (UNIT=iunit, FILE=wisdom_file, STATUS="OLD", FORM="FORMATTED", POSITION="REWIND", &
205 2 : ACTION="READ", IOSTAT=istat)
206 2 : IF (istat == 0) THEN
207 2 : CLOSE (iunit)
208 2 : file_name_length = LEN_TRIM(wisdom_file)
209 6 : ALLOCATE (wisdom_file_name_c(file_name_length + 1))
210 18 : DO i = 1, file_name_length
211 18 : wisdom_file_name_c(i) = wisdom_file(i:i)
212 : END DO
213 2 : wisdom_file_name_c(file_name_length + 1) = C_NULL_CHAR
214 2 : isuccess = fftw_import_wisdom_from_filename(wisdom_file_name_c)
215 2 : IF (isuccess == 0) &
216 : CALL cp_warn(__LOCATION__, "Error importing wisdom from file "//TRIM(wisdom_file)//". "// &
217 : "Maybe the file was created with a different configuration than CP2K is run with. "// &
218 0 : "CP2K continues without importing wisdom.")
219 : END IF
220 : END IF
221 : #else
222 : MARK_USED(wisdom_file)
223 : #endif
224 :
225 9885 : END SUBROUTINE fftw3_do_init
226 :
227 : ! **************************************************************************************************
228 : !> \brief ...
229 : !> \param DATA ...
230 : !> \param max_length ...
231 : !> \par History
232 : !> JGH 23-Jan-2006 : initial version
233 : !> Adapted for new interface
234 : !> IAB 09-Jan-2009 : Modified to cache plans in fft_plan_type
235 : !> (c) The Numerical Algorithms Group (NAG) Ltd, 2009 on behalf of the HECToR project
236 : !> IAB 09-Oct-2009 : Added OpenMP directives to 1D FFT, and planning routines
237 : !> (c) The Numerical Algorithms Group (NAG) Ltd, 2009 on behalf of the HECToR project
238 : !> IAB 11-Sep-2012 : OpenMP parallel 3D FFT (Ruyman Reyes, PRACE)
239 : !> \author JGH
240 : ! **************************************************************************************************
241 528 : SUBROUTINE fftw3_get_lengths(DATA, max_length)
242 :
243 : INTEGER, DIMENSION(*) :: DATA
244 : INTEGER, INTENT(INOUT) :: max_length
245 :
246 : INTEGER :: h, i, j, k, m, maxn, maxn_elevens, &
247 : maxn_fives, maxn_sevens, &
248 : maxn_thirteens, maxn_threes, &
249 : maxn_twos, ndata, nmax, number
250 528 : INTEGER, ALLOCATABLE, DIMENSION(:) :: dlocal, idx
251 :
252 : !------------------------------------------------------------------------------
253 : ! compute ndata
254 : !! FFTW can do arbitrary(?) lengths, maybe you want to limit them to some
255 : !! powers of small prime numbers though...
256 :
257 528 : maxn_twos = 15
258 528 : maxn_threes = 3
259 528 : maxn_fives = 2
260 528 : maxn_sevens = 1
261 528 : maxn_elevens = 1
262 528 : maxn_thirteens = 0
263 528 : maxn = 37748736
264 :
265 528 : ndata = 0
266 8976 : DO h = 0, maxn_twos
267 8448 : nmax = HUGE(0)/2**h
268 42768 : DO i = 0, maxn_threes
269 143616 : DO j = 0, maxn_fives
270 337920 : DO k = 0, maxn_sevens
271 709632 : DO m = 0, maxn_elevens
272 405504 : number = (3**i)*(5**j)*(7**k)*(11**m)
273 :
274 405504 : IF (number > nmax) CYCLE
275 :
276 405504 : number = number*2**h
277 405504 : IF (number >= maxn) CYCLE
278 :
279 608256 : ndata = ndata + 1
280 : END DO
281 : END DO
282 : END DO
283 : END DO
284 : END DO
285 :
286 2112 : ALLOCATE (dlocal(ndata), idx(ndata))
287 :
288 528 : ndata = 0
289 389136 : dlocal(:) = 0
290 8976 : DO h = 0, maxn_twos
291 8448 : nmax = HUGE(0)/2**h
292 42768 : DO i = 0, maxn_threes
293 143616 : DO j = 0, maxn_fives
294 337920 : DO k = 0, maxn_sevens
295 709632 : DO m = 0, maxn_elevens
296 405504 : number = (3**i)*(5**j)*(7**k)*(11**m)
297 :
298 405504 : IF (number > nmax) CYCLE
299 :
300 405504 : number = number*2**h
301 405504 : IF (number >= maxn) CYCLE
302 :
303 388608 : ndata = ndata + 1
304 608256 : dlocal(ndata) = number
305 : END DO
306 : END DO
307 : END DO
308 : END DO
309 : END DO
310 :
311 528 : CALL sortint(dlocal, ndata, idx)
312 528 : ndata = MIN(ndata, max_length)
313 389136 : DATA(1:ndata) = dlocal(1:ndata)
314 528 : max_length = ndata
315 :
316 528 : DEALLOCATE (dlocal, idx)
317 :
318 528 : END SUBROUTINE fftw3_get_lengths
319 :
320 : ! **************************************************************************************************
321 : !> \brief ...
322 : !> \param iarr ...
323 : !> \param n ...
324 : !> \param index ...
325 : ! **************************************************************************************************
326 528 : SUBROUTINE sortint(iarr, n, index)
327 :
328 : INTEGER, INTENT(IN) :: n
329 : INTEGER, INTENT(INOUT) :: iarr(1:n)
330 : INTEGER, INTENT(OUT) :: INDEX(1:n)
331 :
332 : INTEGER, PARAMETER :: m = 7, nstack = 50
333 :
334 : INTEGER :: a, i, ib, ir, istack(1:nstack), itemp, &
335 : j, jstack, k, l, temp
336 :
337 : !------------------------------------------------------------------------------
338 :
339 389136 : DO i = 1, n
340 389136 : INDEX(i) = i
341 : END DO
342 : jstack = 0
343 : l = 1
344 : ir = n
345 : DO WHILE (.TRUE.)
346 146256 : IF (ir - l < m) THEN
347 315744 : DO j = l + 1, ir
348 242352 : a = iarr(j)
349 242352 : ib = INDEX(j)
350 575520 : DO i = j - 1, 0, -1
351 575520 : IF (i == 0) EXIT
352 574464 : IF (iarr(i) <= a) EXIT
353 333168 : iarr(i + 1) = iarr(i)
354 575520 : INDEX(i + 1) = INDEX(i)
355 : END DO
356 242352 : iarr(i + 1) = a
357 315744 : INDEX(i + 1) = ib
358 : END DO
359 73392 : IF (jstack == 0) RETURN
360 72864 : ir = istack(jstack)
361 72864 : l = istack(jstack - 1)
362 72864 : jstack = jstack - 2
363 : ELSE
364 72864 : k = (l + ir)/2
365 72864 : temp = iarr(k)
366 72864 : iarr(k) = iarr(l + 1)
367 72864 : iarr(l + 1) = temp
368 72864 : itemp = INDEX(k)
369 72864 : INDEX(k) = INDEX(l + 1)
370 72864 : INDEX(l + 1) = itemp
371 72864 : IF (iarr(l + 1) > iarr(ir)) THEN
372 34320 : temp = iarr(l + 1)
373 34320 : iarr(l + 1) = iarr(ir)
374 34320 : iarr(ir) = temp
375 34320 : itemp = INDEX(l + 1)
376 34320 : INDEX(l + 1) = INDEX(ir)
377 34320 : INDEX(ir) = itemp
378 : END IF
379 72864 : IF (iarr(l) > iarr(ir)) THEN
380 27456 : temp = iarr(l)
381 27456 : iarr(l) = iarr(ir)
382 27456 : iarr(ir) = temp
383 27456 : itemp = INDEX(l)
384 27456 : INDEX(l) = INDEX(ir)
385 27456 : INDEX(ir) = itemp
386 : END IF
387 72864 : IF (iarr(l + 1) > iarr(l)) THEN
388 27456 : temp = iarr(l + 1)
389 27456 : iarr(l + 1) = iarr(l)
390 27456 : iarr(l) = temp
391 27456 : itemp = INDEX(l + 1)
392 27456 : INDEX(l + 1) = INDEX(l)
393 27456 : INDEX(l) = itemp
394 : END IF
395 72864 : i = l + 1
396 72864 : j = ir
397 72864 : a = iarr(l)
398 72864 : ib = INDEX(l)
399 464112 : DO WHILE (.TRUE.)
400 536976 : i = i + 1
401 1742928 : DO WHILE (iarr(i) < a)
402 1205952 : i = i + 1
403 : END DO
404 536976 : j = j - 1
405 1330560 : DO WHILE (iarr(j) > a)
406 793584 : j = j - 1
407 : END DO
408 536976 : IF (j < i) EXIT
409 464112 : temp = iarr(i)
410 464112 : iarr(i) = iarr(j)
411 464112 : iarr(j) = temp
412 464112 : itemp = INDEX(i)
413 464112 : INDEX(i) = INDEX(j)
414 464112 : INDEX(j) = itemp
415 : END DO
416 72864 : iarr(l) = iarr(j)
417 72864 : iarr(j) = a
418 72864 : INDEX(l) = INDEX(j)
419 72864 : INDEX(j) = ib
420 72864 : jstack = jstack + 2
421 72864 : IF (jstack > nstack) CPABORT("Nstack too small in sortint")
422 72864 : IF (ir - i + 1 >= j - l) THEN
423 37488 : istack(jstack) = ir
424 37488 : istack(jstack - 1) = i
425 37488 : ir = j - 1
426 : ELSE
427 35376 : istack(jstack) = j - 1
428 35376 : istack(jstack - 1) = l
429 35376 : l = i
430 : END IF
431 : END IF
432 :
433 : END DO
434 :
435 : END SUBROUTINE sortint
436 :
437 : ! **************************************************************************************************
438 :
439 : ! **************************************************************************************************
440 : !> \brief ...
441 : !> \param plan ...
442 : !> \param fft_rank ...
443 : !> \param dim_n ...
444 : !> \param dim_istride ...
445 : !> \param dim_ostride ...
446 : !> \param hm_rank ...
447 : !> \param hm_n ...
448 : !> \param hm_istride ...
449 : !> \param hm_ostride ...
450 : !> \param zin ...
451 : !> \param zout ...
452 : !> \param fft_direction ...
453 : !> \param fftw_plan_type ...
454 : !> \param valid ...
455 : ! **************************************************************************************************
456 56292 : SUBROUTINE fftw3_create_guru_plan(plan, fft_rank, dim_n, &
457 : dim_istride, dim_ostride, hm_rank, &
458 : hm_n, hm_istride, hm_ostride, &
459 : zin, zout, fft_direction, fftw_plan_type, &
460 : valid)
461 :
462 : IMPLICIT NONE
463 :
464 : TYPE(C_PTR), INTENT(INOUT) :: plan
465 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT) :: zin, zout
466 : INTEGER, INTENT(IN) :: dim_n(2), dim_istride(2), dim_ostride(2), &
467 : hm_n(2), hm_istride(2), hm_ostride(2), fft_rank, &
468 : fft_direction, fftw_plan_type, hm_rank
469 : LOGICAL, INTENT(OUT) :: valid
470 :
471 : #if defined(__FFTW3)
472 : TYPE(fftw_iodim) :: dim(2), hm(2)
473 : INTEGER :: i
474 :
475 168876 : DO i = 1, 2
476 112584 : DIM(i) = fftw_iodim(dim_n(i), dim_istride(i), dim_ostride(i))
477 168876 : hm(i) = fftw_iodim(hm_n(i), hm_istride(i), hm_ostride(i))
478 : END DO
479 :
480 : plan = fftw_plan_guru_dft(fft_rank, &
481 : dim, hm_rank, hm, &
482 : zin, zout, &
483 56292 : fft_direction, fftw_plan_type)
484 :
485 56292 : valid = C_ASSOCIATED(plan)
486 :
487 : #else
488 : MARK_USED(plan)
489 : MARK_USED(fft_rank)
490 : MARK_USED(dim_n)
491 : MARK_USED(dim_istride)
492 : MARK_USED(dim_ostride)
493 : MARK_USED(hm_rank)
494 : MARK_USED(hm_n)
495 : MARK_USED(hm_istride)
496 : MARK_USED(hm_ostride)
497 : MARK_USED(fft_direction)
498 : MARK_USED(fftw_plan_type)
499 : !MARK_USED does not work with assumed size arguments
500 : IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
501 : valid = .FALSE.
502 :
503 : #endif
504 :
505 56292 : END SUBROUTINE fftw3_create_guru_plan
506 :
507 : ! **************************************************************************************************
508 :
509 : ! **************************************************************************************************
510 : !> \brief Attempt to create a plan with the guru interface for a 2d sub-space.
511 : !> If this fails, fall back to the FFTW3 threaded 3D transform instead
512 : !> of the hand-optimised version.
513 : !> \return ...
514 : ! **************************************************************************************************
515 56292 : FUNCTION fftw3_is_guru_supported() RESULT(guru_supported)
516 :
517 : IMPLICIT NONE
518 :
519 : LOGICAL :: guru_supported
520 : #if defined(__FFTW3)
521 : INTEGER :: dim_n(2), dim_istride(2), dim_ostride(2), &
522 : howmany_n(2), howmany_istride(2), howmany_ostride(2)
523 : TYPE(C_PTR) :: test_plan
524 : COMPLEX(KIND=dp), DIMENSION(1, 1, 1) :: zin
525 :
526 56292 : dim_n(1) = 1
527 56292 : dim_n(2) = 1
528 56292 : dim_istride(1) = 1
529 56292 : dim_istride(2) = 1
530 56292 : dim_ostride(1) = 1
531 56292 : dim_ostride(2) = 1
532 56292 : howmany_n(1) = 1
533 56292 : howmany_n(2) = 1
534 56292 : howmany_istride(1) = 1
535 56292 : howmany_istride(2) = 1
536 56292 : howmany_ostride(1) = 1
537 56292 : howmany_ostride(2) = 1
538 56292 : zin = z_zero
539 : CALL fftw3_create_guru_plan(test_plan, 1, &
540 : dim_n, dim_istride, dim_ostride, &
541 : 2, howmany_n, howmany_istride, howmany_ostride, &
542 : zin, zin, &
543 56292 : FFTW_FORWARD, FFTW_ESTIMATE, guru_supported)
544 56292 : IF (guru_supported) THEN
545 56292 : CALL fftw_destroy_plan(test_plan)
546 : END IF
547 :
548 : #else
549 : guru_supported = .FALSE.
550 : #endif
551 :
552 56292 : END FUNCTION fftw3_is_guru_supported
553 :
554 : ! **************************************************************************************************
555 :
556 : ! **************************************************************************************************
557 : !> \brief ...
558 : !> \param nrows ...
559 : !> \param nt ...
560 : !> \param rows_per_thread ...
561 : !> \param rows_per_thread_r ...
562 : !> \param th_planA ...
563 : !> \param th_planB ...
564 : ! **************************************************************************************************
565 0 : SUBROUTINE fftw3_compute_rows_per_th(nrows, nt, rows_per_thread, rows_per_thread_r, &
566 : th_planA, th_planB)
567 :
568 : INTEGER, INTENT(IN) :: nrows, nt
569 : INTEGER, INTENT(OUT) :: rows_per_thread, rows_per_thread_r, &
570 : th_planA, th_planB
571 :
572 0 : IF (MOD(nrows, nt) == 0) THEN
573 0 : rows_per_thread = nrows/nt
574 0 : rows_per_thread_r = 0
575 0 : th_planA = nt
576 0 : th_planB = 0
577 : ELSE
578 0 : rows_per_thread = nrows/nt + 1
579 0 : rows_per_thread_r = nrows/nt
580 0 : th_planA = MOD(nrows, nt)
581 0 : th_planB = nt - th_planA
582 : END IF
583 :
584 0 : END SUBROUTINE fftw3_compute_rows_per_th
585 :
586 : ! **************************************************************************************************
587 :
588 : ! **************************************************************************************************
589 : !> \brief ...
590 : !> \param plan ...
591 : !> \param plan_r ...
592 : !> \param dim_n ...
593 : !> \param dim_istride ...
594 : !> \param dim_ostride ...
595 : !> \param hm_n ...
596 : !> \param hm_istride ...
597 : !> \param hm_ostride ...
598 : !> \param input ...
599 : !> \param output ...
600 : !> \param fft_direction ...
601 : !> \param fftw_plan_type ...
602 : !> \param rows_per_th ...
603 : !> \param rows_per_th_r ...
604 : ! **************************************************************************************************
605 0 : SUBROUTINE fftw3_create_3d_plans(plan, plan_r, dim_n, dim_istride, dim_ostride, &
606 : hm_n, hm_istride, hm_ostride, &
607 : input, output, &
608 : fft_direction, fftw_plan_type, rows_per_th, &
609 : rows_per_th_r)
610 :
611 : TYPE(C_PTR), INTENT(INOUT) :: plan, plan_r
612 : INTEGER, INTENT(INOUT) :: dim_n(2), dim_istride(2), &
613 : dim_ostride(2), hm_n(2), &
614 : hm_istride(2), hm_ostride(2)
615 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT) :: input, output
616 : INTEGER, INTENT(INOUT) :: fft_direction, fftw_plan_type
617 : INTEGER, INTENT(IN) :: rows_per_th, rows_per_th_r
618 :
619 : LOGICAL :: valid
620 :
621 : ! First plans will have an additional row
622 :
623 0 : hm_n(2) = rows_per_th
624 : CALL fftw3_create_guru_plan(plan, 1, &
625 : dim_n, dim_istride, dim_ostride, &
626 : 2, hm_n, hm_istride, hm_ostride, &
627 : input, output, &
628 0 : fft_direction, fftw_plan_type, valid)
629 :
630 0 : IF (.NOT. valid) THEN
631 0 : CPABORT("fftw3_create_plan")
632 : END IF
633 :
634 : !!!! Remainder
635 0 : hm_n(2) = rows_per_th_r
636 : CALL fftw3_create_guru_plan(plan_r, 1, &
637 : dim_n, dim_istride, dim_ostride, &
638 : 2, hm_n, hm_istride, hm_ostride, &
639 : input, output, &
640 0 : fft_direction, fftw_plan_type, valid)
641 0 : IF (.NOT. valid) THEN
642 0 : CPABORT("fftw3_create_plan (remaining)")
643 : END IF
644 :
645 0 : END SUBROUTINE fftw3_create_3d_plans
646 :
647 : ! **************************************************************************************************
648 :
649 : ! **************************************************************************************************
650 : !> \brief ...
651 : !> \param plan ...
652 : !> \param zin ...
653 : !> \param zout ...
654 : !> \param plan_style ...
655 : ! **************************************************************************************************
656 56292 : SUBROUTINE fftw3_create_plan_3d(plan, zin, zout, plan_style)
657 :
658 : TYPE(fft_plan_type), INTENT(INOUT) :: plan
659 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT) :: zin
660 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT) :: zout
661 : INTEGER :: plan_style
662 : #if defined(__FFTW3)
663 : INTEGER :: n1, n2, n3
664 : INTEGER :: nt
665 : INTEGER :: rows_per_th
666 : INTEGER :: rows_per_th_r
667 : INTEGER :: fft_direction
668 : INTEGER :: th_planA, th_planB
669 56292 : COMPLEX(KIND=dp), ALLOCATABLE :: tmp(:)
670 :
671 : ! GURU Interface
672 : INTEGER :: dim_n(2), dim_istride(2), dim_ostride(2), &
673 : howmany_n(2), howmany_istride(2), howmany_ostride(2)
674 :
675 : INTEGER :: fftw_plan_type
676 112544 : SELECT CASE (plan_style)
677 : CASE (1)
678 56252 : fftw_plan_type = FFTW_ESTIMATE
679 : CASE (2)
680 8 : fftw_plan_type = FFTW_MEASURE
681 : CASE (3)
682 24 : fftw_plan_type = FFTW_PATIENT
683 : CASE (4)
684 8 : fftw_plan_type = FFTW_EXHAUSTIVE
685 : CASE DEFAULT
686 56292 : CPABORT("fftw3_create_plan_3d")
687 : END SELECT
688 :
689 : #if defined(__FFTW3_UNALIGNED)
690 : fftw_plan_type = fftw_plan_type + FFTW_UNALIGNED
691 : #endif
692 :
693 56292 : IF (plan%fsign == +1) THEN
694 28146 : fft_direction = FFTW_FORWARD
695 : ELSE
696 28146 : fft_direction = FFTW_BACKWARD
697 : END IF
698 :
699 56292 : n1 = plan%n_3d(1)
700 56292 : n2 = plan%n_3d(2)
701 56292 : n3 = plan%n_3d(3)
702 :
703 56292 : nt = 1
704 56292 : !$OMP PARALLEL DEFAULT(NONE) SHARED(nt)
705 : !$OMP MASTER
706 : !$ nt = omp_get_num_threads()
707 : !$OMP END MASTER
708 : !$OMP END PARALLEL
709 :
710 : IF ((.NOT. fftw3_is_guru_supported()) .OR. &
711 56292 : (.NOT. plan_style == 1) .OR. &
712 : (n1 < 256 .AND. n2 < 256 .AND. n3 < 256 .AND. nt == 1)) THEN
713 : ! If the plan type is MEASURE, PATIENT and EXHAUSTIVE or
714 : ! the grid size is small (and we are single-threaded) then
715 : ! FFTW3 does a better job than handmade optimization
716 : ! so plan a single 3D FFT which will execute using all the threads
717 :
718 56292 : plan%separated_plans = .FALSE.
719 56292 : !$ CALL fftw_plan_with_nthreads(nt)
720 :
721 56292 : IF (plan%fft_in_place) THEN
722 28146 : plan%fftw_plan = fftw_plan_dft_3d(n3, n2, n1, zin, zin, fft_direction, fftw_plan_type)
723 : ELSE
724 28146 : plan%fftw_plan = fftw_plan_dft_3d(n3, n2, n1, zin, zout, fft_direction, fftw_plan_type)
725 : END IF
726 : ELSE
727 0 : ALLOCATE (tmp(n1*n2*n3))
728 : ! ************************* PLANS WITH TRANSPOSITIONS ****************************
729 : ! In the cases described above, we manually thread each stage of the 3D FFT.
730 : !
731 : ! The following plans replace the 3D FFT call by running 1D FFTW across all
732 : ! 3 directions of the array.
733 : !
734 : ! Output of FFTW is transposed to ensure that the next round of FFTW access
735 : ! contiguous information.
736 : !
737 : ! Assuming the input matrix is M(n3,n2,n1), FFTW/Transp are :
738 : ! M(n3,n2,n1) -> fftw(x) -> M(n3,n1,n2) -> fftw(y) -> M(n1,n2,n3) -> fftw(z) -> M(n1,n2,n3)
739 : ! Notice that last matrix is transposed in the Z axis. A DO-loop in the execute routine
740 : ! will perform the final transposition. Performance evaluation showed that using an external
741 : ! DO loop to do the final transposition performed better than directly transposing the output.
742 : ! However, this might vary depending on the compiler/platform, so a potential tuning spot
743 : ! is to perform the final transposition within the fftw library rather than using the external loop
744 : ! See comments below in Z-FFT for how to transpose the output to avoid the final DO loop.
745 : !
746 : ! Doc. for the Guru interface is in http://www.fftw.org/doc/Guru-Interface.html
747 : !
748 : ! OpenMP : Work is distributed on the Z plane.
749 : ! All transpositions are out-of-place to facilitate multi-threading
750 : !
751 : !!!! Plan for X : M(n3,n2,n1) -> fftw(x) -> M(n3,n1,n2)
752 : CALL fftw3_compute_rows_per_th(n3, nt, rows_per_th, rows_per_th_r, &
753 0 : th_planA, th_planB)
754 :
755 0 : dim_n(1) = n1
756 0 : dim_istride(1) = 1
757 0 : dim_ostride(1) = n2
758 0 : howmany_n(1) = n2
759 0 : howmany_n(2) = rows_per_th
760 0 : howmany_istride(1) = n1
761 0 : howmany_istride(2) = n1*n2
762 0 : howmany_ostride(1) = 1
763 0 : howmany_ostride(2) = n1*n2
764 : CALL fftw3_create_3d_plans(plan%fftw_plan_nx, plan%fftw_plan_nx_r, &
765 : dim_n, dim_istride, dim_ostride, howmany_n, &
766 : howmany_istride, howmany_ostride, &
767 : zin, tmp, &
768 : fft_direction, fftw_plan_type, rows_per_th, &
769 0 : rows_per_th_r)
770 :
771 : !!!! Plan for Y : M(n3,n1,n2) -> fftw(y) -> M(n1,n2,n3)
772 : CALL fftw3_compute_rows_per_th(n3, nt, rows_per_th, rows_per_th_r, &
773 0 : th_planA, th_planB)
774 0 : dim_n(1) = n2
775 : dim_istride(1) = 1
776 0 : dim_ostride(1) = n3
777 0 : howmany_n(1) = n1
778 0 : howmany_n(2) = rows_per_th
779 0 : howmany_istride(1) = n2
780 : howmany_istride(2) = n1*n2
781 : !!! transposed Z axis on output
782 0 : howmany_ostride(1) = n2*n3
783 0 : howmany_ostride(2) = 1
784 :
785 : CALL fftw3_create_3d_plans(plan%fftw_plan_ny, plan%fftw_plan_ny_r, &
786 : dim_n, dim_istride, dim_ostride, &
787 : howmany_n, howmany_istride, howmany_ostride, &
788 : tmp, zin, &
789 : fft_direction, fftw_plan_type, rows_per_th, &
790 0 : rows_per_th_r)
791 :
792 : !!!! Plan for Z : M(n1,n2,n3) -> fftw(z) -> M(n1,n2,n3)
793 : CALL fftw3_compute_rows_per_th(n1, nt, rows_per_th, rows_per_th_r, &
794 0 : th_planA, th_planB)
795 0 : dim_n(1) = n3
796 : dim_istride(1) = 1
797 0 : dim_ostride(1) = 1 ! To transpose: n2*n1
798 0 : howmany_n(1) = n2
799 0 : howmany_n(2) = rows_per_th
800 0 : howmany_istride(1) = n3
801 0 : howmany_istride(2) = n2*n3
802 0 : howmany_ostride(1) = n3 ! To transpose: n1
803 0 : howmany_ostride(2) = n2*n3 ! To transpose: 1
804 :
805 : CALL fftw3_create_3d_plans(plan%fftw_plan_nz, plan%fftw_plan_nz_r, &
806 : dim_n, dim_istride, dim_ostride, &
807 : howmany_n, howmany_istride, howmany_ostride, &
808 : zin, tmp, &
809 : fft_direction, fftw_plan_type, rows_per_th, &
810 0 : rows_per_th_r)
811 :
812 0 : plan%separated_plans = .TRUE.
813 :
814 0 : DEALLOCATE (tmp)
815 : END IF
816 :
817 : #else
818 : MARK_USED(plan)
819 : MARK_USED(plan_style)
820 : !MARK_USED does not work with assumed size arguments
821 : IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
822 : #endif
823 :
824 56292 : END SUBROUTINE fftw3_create_plan_3d
825 :
826 : ! **************************************************************************************************
827 :
828 : ! **************************************************************************************************
829 : !> \brief ...
830 : !> \param plan ...
831 : !> \param plan_r ...
832 : !> \param split_dim ...
833 : !> \param nt ...
834 : !> \param tid ...
835 : !> \param input ...
836 : !> \param istride ...
837 : !> \param output ...
838 : !> \param ostride ...
839 : ! **************************************************************************************************
840 0 : SUBROUTINE fftw3_workshare_execute_dft(plan, plan_r, split_dim, nt, tid, &
841 : input, istride, output, ostride)
842 :
843 : INTEGER, INTENT(IN) :: split_dim, nt, tid
844 : INTEGER, INTENT(IN) :: istride, ostride
845 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT) :: input, output
846 : TYPE(C_PTR) :: plan, plan_r
847 : #if defined(__FFTW3)
848 : INTEGER :: i_off, o_off
849 : INTEGER :: th_planA, th_planB
850 : INTEGER :: rows_per_thread, rows_per_thread_r
851 :
852 : CALL fftw3_compute_rows_per_th(split_dim, nt, rows_per_thread, &
853 : rows_per_thread_r, &
854 0 : th_planA, th_planB)
855 :
856 0 : IF (th_planB > 0) THEN
857 0 : IF (tid < th_planA) THEN
858 0 : i_off = (tid)*(istride*(rows_per_thread)) + 1
859 0 : o_off = (tid)*(ostride*(rows_per_thread)) + 1
860 0 : IF (rows_per_thread > 0) THEN
861 : CALL fftw_execute_dft(plan, input(i_off), &
862 0 : output(o_off))
863 : END IF
864 0 : ELSE IF ((tid - th_planA) < th_planB) THEN
865 :
866 : i_off = (th_planA)*istride*(rows_per_thread) + &
867 0 : (tid - th_planA)*istride*(rows_per_thread_r) + 1
868 : o_off = (th_planA)*ostride*(rows_per_thread) + &
869 0 : (tid - th_planA)*ostride*(rows_per_thread_r) + 1
870 :
871 : CALL fftw_execute_dft(plan_r, input(i_off), &
872 0 : output(o_off))
873 : END IF
874 :
875 : ELSE
876 0 : i_off = (tid)*(istride*(rows_per_thread)) + 1
877 0 : o_off = (tid)*(ostride*(rows_per_thread)) + 1
878 :
879 : CALL fftw_execute_dft(plan, input(i_off), &
880 0 : output(o_off))
881 :
882 : END IF
883 : #else
884 : MARK_USED(plan)
885 : MARK_USED(plan_r)
886 : MARK_USED(split_dim)
887 : MARK_USED(nt)
888 : MARK_USED(tid)
889 : MARK_USED(istride)
890 : MARK_USED(ostride)
891 : !MARK_USED does not work with assumed size arguments
892 : IF (.FALSE.) THEN; DO; IF (ABS(input(1)) > ABS(output(1))) EXIT; END DO; END IF
893 : #endif
894 :
895 0 : END SUBROUTINE fftw3_workshare_execute_dft
896 :
897 : ! **************************************************************************************************
898 :
899 : ! **************************************************************************************************
900 : !> \brief ...
901 : !> \param plan ...
902 : !> \param scale ...
903 : !> \param zin ...
904 : !> \param zout ...
905 : !> \param stat ...
906 : ! **************************************************************************************************
907 650224 : SUBROUTINE fftw33d(plan, scale, zin, zout, stat)
908 :
909 : TYPE(fft_plan_type), INTENT(IN) :: plan
910 : REAL(KIND=dp), INTENT(IN) :: scale
911 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT), TARGET:: zin
912 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT), TARGET:: zout
913 : INTEGER, INTENT(OUT) :: stat
914 : #if defined(__FFTW3)
915 650224 : COMPLEX(KIND=dp), POINTER :: xout(:)
916 650224 : COMPLEX(KIND=dp), ALLOCATABLE :: tmp1(:)
917 : INTEGER :: n1, n2, n3
918 : INTEGER :: tid, nt
919 : INTEGER :: i, j, k
920 :
921 650224 : n1 = plan%n_3d(1)
922 650224 : n2 = plan%n_3d(2)
923 650224 : n3 = plan%n_3d(3)
924 :
925 650224 : stat = 1
926 :
927 : ! We use a POINTER to the output array to avoid duplicating code
928 650224 : IF (plan%fft_in_place) THEN
929 650220 : xout => zin(:n1*n2*n3)
930 : ELSE
931 4 : xout => zout(:n1*n2*n3)
932 : END IF
933 :
934 : ! Either compute the full 3D FFT using a multithreaded plan
935 650224 : IF (.NOT. plan%separated_plans) THEN
936 650224 : CALL fftw_execute_dft(plan%fftw_plan, zin, xout)
937 : ELSE
938 : ! Or use the 3 stage FFT scheme described in fftw3_create_plan_3d
939 0 : ALLOCATE (tmp1(n1*n2*n3)) ! Temporary vector used for transpositions
940 0 : !$OMP PARALLEL DEFAULT(NONE) PRIVATE(tid,nt,i,j,k) SHARED(zin,tmp1,n1,n2,n3,plan,xout)
941 : tid = 0
942 : nt = 1
943 :
944 : !$ tid = omp_get_thread_num()
945 : !$ nt = omp_get_num_threads()
946 : CALL fftw3_workshare_execute_dft(plan%fftw_plan_nx, plan%fftw_plan_nx_r, &
947 : n3, nt, tid, &
948 : zin, n1*n2, tmp1, n1*n2)
949 :
950 : !$OMP BARRIER
951 : CALL fftw3_workshare_execute_dft(plan%fftw_plan_ny, plan%fftw_plan_ny_r, &
952 : n3, nt, tid, &
953 : tmp1, n1*n2, xout, 1)
954 : !$OMP BARRIER
955 : CALL fftw3_workshare_execute_dft(plan%fftw_plan_nz, plan%fftw_plan_nz_r, &
956 : n1, nt, tid, &
957 : xout, n2*n3, tmp1, n2*n3)
958 : !$OMP BARRIER
959 :
960 : !$OMP DO COLLAPSE(3)
961 : DO i = 1, n1
962 : DO j = 1, n2
963 : DO k = 1, n3
964 : xout((i - 1) + (j - 1)*n1 + (k - 1)*n1*n2 + 1) = &
965 : tmp1((k - 1) + (j - 1)*n3 + (i - 1)*n3*n2 + 1)
966 : END DO
967 : END DO
968 : END DO
969 : !$OMP END DO
970 :
971 : !$OMP END PARALLEL
972 : END IF
973 :
974 650224 : IF (scale /= 1.0_dp) THEN
975 308909 : CALL zdscal(n1*n2*n3, scale, xout, 1)
976 : END IF
977 :
978 : #else
979 : MARK_USED(plan)
980 : MARK_USED(scale)
981 : !MARK_USED does not work with assumed size arguments
982 : IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
983 : stat = 0
984 :
985 : #endif
986 :
987 650224 : END SUBROUTINE fftw33d
988 :
989 : ! **************************************************************************************************
990 :
991 : ! **************************************************************************************************
992 : !> \brief ...
993 : !> \param plan ...
994 : !> \param zin ...
995 : !> \param zout ...
996 : !> \param plan_style ...
997 : ! **************************************************************************************************
998 163260 : SUBROUTINE fftw3_create_plan_1dm(plan, zin, zout, plan_style)
999 :
1000 : IMPLICIT NONE
1001 :
1002 : TYPE(fft_plan_type), INTENT(INOUT) :: plan
1003 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(IN) :: zin
1004 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(IN) :: zout
1005 : INTEGER, INTENT(IN) :: plan_style
1006 : #if defined(__FFTW3)
1007 : INTEGER :: istride, idist, ostride, odist, num_threads, num_rows
1008 :
1009 : INTEGER :: fftw_plan_type
1010 326304 : SELECT CASE (plan_style)
1011 : CASE (1)
1012 163044 : fftw_plan_type = FFTW_ESTIMATE
1013 : CASE (2)
1014 48 : fftw_plan_type = FFTW_MEASURE
1015 : CASE (3)
1016 156 : fftw_plan_type = FFTW_PATIENT
1017 : CASE (4)
1018 12 : fftw_plan_type = FFTW_EXHAUSTIVE
1019 : CASE DEFAULT
1020 163260 : CPABORT("fftw3_create_plan_1dm")
1021 : END SELECT
1022 :
1023 : #if defined(__FFTW3_UNALIGNED)
1024 : fftw_plan_type = fftw_plan_type + FFTW_UNALIGNED
1025 : #endif
1026 163260 : num_threads = 1
1027 163260 : plan%separated_plans = .FALSE.
1028 : !$OMP PARALLEL DEFAULT(NONE), &
1029 163260 : !$OMP SHARED(NUM_THREADS)
1030 : !$OMP MASTER
1031 : !$ num_threads = omp_get_num_threads()
1032 : !$OMP END MASTER
1033 : !$OMP END PARALLEL
1034 :
1035 163260 : num_rows = plan%m/num_threads
1036 163260 : !$ plan%num_threads_needed = num_threads
1037 :
1038 : ! Check for number of rows less than num_threads
1039 163260 : !$ IF (plan%m < num_threads) THEN
1040 0 : !$ num_rows = 1
1041 0 : !$ plan%num_threads_needed = plan%m
1042 : !$ END IF
1043 :
1044 : ! Check for total number of rows not divisible by num_threads
1045 163260 : !$ IF (num_rows*plan%num_threads_needed /= plan%m) THEN
1046 0 : !$ plan%need_alt_plan = .TRUE.
1047 : !$ END IF
1048 :
1049 163260 : !$ plan%num_rows = num_rows
1050 163260 : istride = 1
1051 163260 : idist = plan%n
1052 163260 : ostride = 1
1053 163260 : odist = plan%n
1054 163260 : IF (plan%fsign == +1 .AND. plan%trans) THEN
1055 81618 : istride = plan%m
1056 81618 : idist = 1
1057 81642 : ELSEIF (plan%fsign == -1 .AND. plan%trans) THEN
1058 81618 : ostride = plan%m
1059 81618 : odist = 1
1060 : END IF
1061 :
1062 163260 : IF (plan%fsign == +1) THEN
1063 : CALL dfftw_plan_many_dft(plan%fftw_plan, 1, plan%n, num_rows, zin, 0, istride, idist, &
1064 81630 : zout, 0, ostride, odist, FFTW_FORWARD, fftw_plan_type)
1065 : ELSE
1066 : CALL dfftw_plan_many_dft(plan%fftw_plan, 1, plan%n, num_rows, zin, 0, istride, idist, &
1067 81630 : zout, 0, ostride, odist, FFTW_BACKWARD, fftw_plan_type)
1068 : END IF
1069 :
1070 163260 : !$ IF (plan%need_alt_plan) THEN
1071 0 : !$ plan%alt_num_rows = plan%m - (plan%num_threads_needed - 1)*num_rows
1072 0 : !$ IF (plan%fsign == +1) THEN
1073 : !$ CALL dfftw_plan_many_dft(plan%alt_fftw_plan, 1, plan%n, plan%alt_num_rows, zin, 0, istride, idist, &
1074 0 : !$ zout, 0, ostride, odist, FFTW_FORWARD, fftw_plan_type)
1075 : !$ ELSE
1076 : !$ CALL dfftw_plan_many_dft(plan%alt_fftw_plan, 1, plan%n, plan%alt_num_rows, zin, 0, istride, idist, &
1077 0 : !$ zout, 0, ostride, odist, FFTW_BACKWARD, fftw_plan_type)
1078 : !$ END IF
1079 : !$ END IF
1080 :
1081 : #else
1082 : MARK_USED(plan)
1083 : MARK_USED(plan_style)
1084 : !MARK_USED does not work with assumed size arguments
1085 : IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
1086 : #endif
1087 :
1088 163260 : END SUBROUTINE fftw3_create_plan_1dm
1089 :
1090 : ! **************************************************************************************************
1091 : !> \brief ...
1092 : !> \param plan ...
1093 : ! **************************************************************************************************
1094 219552 : SUBROUTINE fftw3_destroy_plan(plan)
1095 :
1096 : TYPE(fft_plan_type), INTENT(INOUT) :: plan
1097 :
1098 : #if defined(__FFTW3)
1099 219552 : !$ IF (plan%need_alt_plan) THEN
1100 0 : !$ CALL fftw_destroy_plan(plan%alt_fftw_plan)
1101 : !$ END IF
1102 :
1103 219552 : IF (.NOT. plan%separated_plans) THEN
1104 219552 : CALL fftw_destroy_plan(plan%fftw_plan)
1105 : ELSE
1106 : ! If it is a separated plan then we have to destroy
1107 : ! each dim plan individually
1108 0 : CALL fftw_destroy_plan(plan%fftw_plan_nx)
1109 0 : CALL fftw_destroy_plan(plan%fftw_plan_ny)
1110 0 : CALL fftw_destroy_plan(plan%fftw_plan_nz)
1111 0 : CALL fftw_destroy_plan(plan%fftw_plan_nx_r)
1112 0 : CALL fftw_destroy_plan(plan%fftw_plan_ny_r)
1113 0 : CALL fftw_destroy_plan(plan%fftw_plan_nz_r)
1114 : END IF
1115 :
1116 : #else
1117 : MARK_USED(plan)
1118 : #endif
1119 :
1120 219552 : END SUBROUTINE fftw3_destroy_plan
1121 :
1122 : ! **************************************************************************************************
1123 : !> \brief ...
1124 : !> \param plan ...
1125 : !> \param zin ...
1126 : !> \param zout ...
1127 : !> \param scale ...
1128 : !> \param stat ...
1129 : ! **************************************************************************************************
1130 8441592 : SUBROUTINE fftw31dm(plan, zin, zout, scale, stat)
1131 : TYPE(fft_plan_type), INTENT(IN) :: plan
1132 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(IN), TARGET :: zin
1133 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT), &
1134 : TARGET :: zout
1135 : REAL(KIND=dp), INTENT(IN) :: scale
1136 : INTEGER, INTENT(OUT) :: stat
1137 :
1138 : INTEGER :: in_offset, my_id, num_rows, out_offset, &
1139 : scal_offset
1140 : TYPE(C_PTR) :: fftw_plan
1141 :
1142 : !------------------------------------------------------------------------------
1143 :
1144 8441592 : my_id = 0
1145 8441592 : num_rows = plan%m
1146 :
1147 : !$OMP PARALLEL DEFAULT(NONE), &
1148 : !$OMP PRIVATE(my_id,num_rows,in_offset,out_offset,scal_offset,fftw_plan), &
1149 : !$OMP SHARED(zin,zout), &
1150 8441592 : !$OMP SHARED(plan,scale,stat)
1151 : !$ my_id = omp_get_thread_num()
1152 :
1153 : !$ if (my_id < plan%num_threads_needed) then
1154 :
1155 : fftw_plan = plan%fftw_plan
1156 :
1157 : in_offset = 1
1158 : out_offset = 1
1159 : scal_offset = 1
1160 :
1161 : !$ in_offset = 1 + plan%num_rows*my_id*plan%n
1162 : !$ out_offset = 1 + plan%num_rows*my_id*plan%n
1163 : !$ IF (plan%fsign == +1 .AND. plan%trans) THEN
1164 : !$ in_offset = 1 + plan%num_rows*my_id
1165 : !$ ELSEIF (plan%fsign == -1 .AND. plan%trans) THEN
1166 : !$ out_offset = 1 + plan%num_rows*my_id
1167 : !$ END IF
1168 : !$ scal_offset = 1 + plan%n*plan%num_rows*my_id
1169 : !$ IF (plan%need_alt_plan .AND. my_id == plan%num_threads_needed - 1) THEN
1170 : !$ num_rows = plan%alt_num_rows
1171 : !$ fftw_plan = plan%alt_fftw_plan
1172 : !$ ELSE
1173 : !$ num_rows = plan%num_rows
1174 : !$ END IF
1175 :
1176 : #if defined(__FFTW3)
1177 : !$OMP MASTER
1178 : stat = 1
1179 : !$OMP END MASTER
1180 : CALL dfftw_execute_dft(fftw_plan, zin(in_offset:in_offset), zout(out_offset:out_offset))
1181 : !$ end if
1182 : ! all theads need to meet at this barrier
1183 : !$OMP BARRIER
1184 : !$ if (my_id < plan%num_threads_needed) then
1185 : IF (scale /= 1.0_dp) CALL zdscal(plan%n*num_rows, scale, zout(scal_offset:scal_offset), 1)
1186 : !$ end if
1187 :
1188 : #else
1189 : MARK_USED(plan)
1190 : MARK_USED(scale)
1191 : !MARK_USED does not work with assumed size arguments
1192 : IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
1193 : stat = 0
1194 :
1195 : !$ else
1196 : !$ end if
1197 :
1198 : #endif
1199 :
1200 : !$OMP END PARALLEL
1201 :
1202 8441592 : END SUBROUTINE fftw31dm
1203 :
1204 0 : END MODULE fftw3_lib
|